[ { "id": "--qiQPsCV94", "title": "Unsupervised Manifold Linearizing and Clustering", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Clustering data lying close to a union of low-dimensional manifolds, with each manifold as a cluster, is a fundamental problem in machine learning. When the manifolds are assumed to be linear subspaces, many methods succeed using low-rank and sparse priors, which have been studied extensively over the past two decades. Unfortunately, most real-world datasets can not be well approximated by linear subspaces. On the other hand, several works have proposed to identify the manifolds by learning a feature map such that the data transformed by the map lie in a union of linear subspaces, even though the original data are from non-linear manifolds. However, most works either assume knowledge of the membership of samples to clusters, or are shown to learn trivial representations. In this paper, we propose to simultaneously perform clustering and learn a union-of-subspace representation via Maximal Coding Rate Reduction. Experiments on synthetic and realistic datasets show that the proposed method achieves clustering accuracy comparable with state-of-the-art alternatives, while being more scalable and learning geometrically meaningful representations.", "keywords": "Clustering;Manifold Embedding;Manifold Clustering", "primary_area": "", "supplementary_material": "", "author": "Tianjiao Ding;Shengbang Tong;Kwan Ho Ryan Chan;Xili Dai;Yi Ma;Benjamin David Haeffele", "authorids": "~Tianjiao_Ding1;~Shengbang_Tong1;~Kwan_Ho_Ryan_Chan1;~Xili_Dai2;~Yi_Ma4;~Benjamin_David_Haeffele1", "gender": "M;M;M;M;M;", "homepage": "https://tianjiaoding.com/;https://tsb0601.github.io/petertongsb/;https://ryanchankh.github.io/;https://delay-xili.github.io/;http://people.eecs.berkeley.edu/~yima/;", "dblp": "230/1227;306/1406;267/5496;170/8561;;", "google_scholar": "L3wy9QMAAAAJ;https://scholar.google.com/citations?hl=en;DBXWBqcAAAAJ;CtRMD1UAAAAJ;https://scholar.google.com.hk/citations?user=XqLiBQMAAAAJ;", "orcid": ";;;;;", "linkedin": ";;ryanchankh/;xili-daley-dai-b87030179/;;", "or_profile": "~Tianjiao_Ding1;~Shengbang_Tong1;~Kwan_Ho_Ryan_Chan1;~Xili_Dai2;~Yi_Ma4;~Benjamin_David_Haeffele1", "aff": "Johns Hopkins University;University of California, Berkeley;University of Pennsylvania ;Hong Kong University of Science and Technology (Guangzhou);University of California, Berkeley;", "aff_domain": "jhu.edu;berkeley.edu;seas.upenn.edu;hkust.edu;berkeley.edu;", "position": "PhD student;Undergrad student;PhD student;PhD student;Full Professor;", "bibtex": "@misc{\nding2023unsupervised,\ntitle={Unsupervised Manifold Linearizing and Clustering},\nauthor={Tianjiao Ding and Shengbang Tong and Kwan Ho Ryan Chan and Xili Dai and Yi Ma and Benjamin David Haeffele},\nyear={2023},\nurl={https://openreview.net/forum?id=--qiQPsCV94}\n}", "github": "", "project": "", "reviewers": "jNCY;qY5S;z44Z", "site": "https://openreview.net/forum?id=--qiQPsCV94", "pdf_size": 27187941, "recommendation": "3;5;5", "confidence": "3;3;3", "correctness": "3;3;3", "technical_novelty": "1;2;2", "empirical_novelty": "3;2;2", "wc_summary_paper": "23;193;84", "wc_strength_and_weaknesses": "5;315;267", "wc_clarity_quality_novelty_and_reproducibility": "267;113;37", "wc_summary_review": "13;214;33", "wc_review": "308;835;421", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 100.0, 70.3183238328863 ], "wc_strength_and_weaknesses_avg": [ 195.66666666666666, 136.23835322290446 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 139.0, 95.68002229654144 ], "wc_summary_review_avg": [ 86.66666666666667, 90.4077184512227 ], "wc_review_avg": [ 521.3333333333334, 226.54261311187253 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18192772613037788708&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "Johns Hopkins University;University of California, Berkeley;University of Pennsylvania;Hong Kong University of Science and Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.jhu.edu;https://www.berkeley.edu;https://www.upenn.edu;https://www.ust.hk", "aff_unique_abbr": "JHU;UC Berkeley;UPenn;HKUST", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Berkeley;Hong Kong SAR", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;China" }, { "id": "-0jbdOhFn4g", "title": "Is the Deep Model Representation Sparse and Symbolic with Causal Patterns?", "track": "main", "status": "Withdraw", "tldr": "This paper shows that the inference logic of a deep model can usually be represented as a sparse causal graph, and the faithfulness of such a symbolic representation is theoretically guaranteed.", "abstract": "This paper aims to show that the inference logic of a deep model can be faithfully approximated as a sparse, symbolic causal graph. Such a causal graph potentially bridges the gap between connectionism and symbolism. To this end, the faithfulness of the causal graph is theoretically guaranteed, because we show that the causal graph can well mimic the model's output on an exponential number of different masked samples. Besides, such a causal graph can be further simplified and re-written as an And-Or graph (AOG), which explains the logical relationship between interactive concepts encoded by the deep model, without losing much explanation accuracy. The code will be released when the paper is accepted.", "keywords": "Representation Learning;Deep Learning Theory;Explainable AI", "primary_area": "", "supplementary_material": "/attachment/60c77e2944d076578cb1098386f40617035c2724.zip", "author": "Jie Ren;Mingjie Li;Qirui Chen;Huiqi Deng;Quanshi Zhang", "authorids": "~Jie_Ren1;~Mingjie_Li3;~Qirui_Chen1;~Huiqi_Deng1;~Quanshi_Zhang1", "gender": "F;M;;F;M", "homepage": "https://jie-ren.github.io/;http://lmjjjjjj.github.io;;;http://qszhang.com", "dblp": "r/JieRen-18;48/10103;;229/1317;http://dblp.uni-trier.de/pers/hd/z/Zhang:Quanshi", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;7dXDygoAAAAJ;;QEjqzXgAAAAJ;iFFhHK0AAAAJ", "orcid": "0000-0001-9918-3000;;;;", "linkedin": ";;;;", "or_profile": "~Jie_Ren1;~Mingjie_Li3;~Qirui_Chen1;~Huiqi_Deng1;~Quanshi_Zhang1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;;Shanghai jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;;edu.cn;sjtu.edu.cn", "position": "PhD student;MS student;;Postdoc;Associate Professor", "bibtex": "@misc{\nren2023is,\ntitle={Is the Deep Model Representation Sparse and Symbolic with Causal Patterns?},\nauthor={Jie Ren and Mingjie Li and Qirui Chen and Huiqi Deng and Quanshi Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=-0jbdOhFn4g}\n}", "github": "", "project": "", "reviewers": "CNFq;qJwb;1xP1;KVrC", "site": "https://openreview.net/forum?id=-0jbdOhFn4g", "pdf_size": 4821678, "recommendation": "3;5;5;5", "confidence": "3;2;3;3", "correctness": "3;3;3;2", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "61;190;49;69", "wc_strength_and_weaknesses": "387;226;172;190", "wc_clarity_quality_novelty_and_reproducibility": "24;107;43;20", "wc_summary_review": "9;22;28;25", "wc_review": "481;545;292;304", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 92.25, 56.88310381826927 ], "wc_strength_and_weaknesses_avg": [ 243.75, 84.95991701973348 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.5, 34.87477598494362 ], "wc_summary_review_avg": [ 21.0, 7.245688373094719 ], "wc_review_avg": [ 405.5, 109.93748223422256 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0zu9K1ZYtS4J:scholar.google.com/&scioq=Is+the+Deep+Model+Representation+Sparse+and+Symbolic+with+Causal+Patterns%3F&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "-0tPmzgXS5", "title": "Probing into Overfitting for Video Recognition", "track": "main", "status": "Reject", "tldr": "We propose a data augmentation tailored for action recognition which shows consistent improvement over various models and datasets.", "abstract": "Video recognition methods based on 2D networks have thrived in recent years, leveraging advanced image classification techniques. However, overfitting is an even severe problem in 2D video recognition models as 1) the scale of video datasets is relatively small compared to image recognition datasets like ImageNet; 2) current pipeline treats background and semantic frames equally during optimization which aggravates overfitting. Based on these challenges, we design a video-specific data augmentation approach, named as Ghost Motion (GM), to alleviate overfitting. Specifically, GM shifts channels along temporal dimension to enable semantic motion information diffused into other frames which may be irrelevant originally, leading to improvement in frame-wise accuracy. In addition, for challenging video samples with significant temporal dependency (e.g., Something-Something), we further scale the logits during training to prevent overconfident predictions on background frames. Comprehensive empirical validation on various popular datasets shows that the proposed method can improve the generalization of existing methods and is compatible to other competing data augmentation approaches.", "keywords": "Action Recognition;Data Augmentation;Overfitting", "primary_area": "", "supplementary_material": "", "author": "Yitian Zhang;Yue Bai;Huan Wang;Yizhou Wang;Yun Fu", "authorids": "~Yitian_Zhang1;~Yue_Bai1;~Huan_Wang3;~Yizhou_Wang3;~Yun_Fu1", "gender": ";M;M;M;M", "homepage": ";https://yueb17.github.io/;https://huanwang.tech/;https://wyzjack.github.io/;http://www1.ece.neu.edu/~yunfu/", "dblp": ";119/0848;70/6155-14;71/3387-6;00/5815-1", "google_scholar": ";https://scholar.google.com/citations?hl=en;0-On0y4AAAAJ;H4kqV1MAAAAJ;https://scholar.google.com.tw/citations?user=h-JEcQ8AAAAJ", "orcid": ";;0000-0001-6951-901X;0000-0003-1601-9649;0000-0002-5098-2853", "linkedin": ";;huanwang-zju/;yizhou-wang-786603155/;furaymond/", "or_profile": "~Yitian_Zhang1;~Yue_Bai1;~Huan_Wang3;~Yizhou_Wang3;~Yun_Fu1", "aff": ";Northeastern University;Northeastern University;Northeastern University;Northeastern University", "aff_domain": ";neu.edu;neu.edu;northeastern.edu;northeastern.edu", "position": ";PhD student;PhD student;MS student;Full Professor", "bibtex": "@misc{\nzhang2023probing,\ntitle={Probing into Overfitting for Video Recognition},\nauthor={Yitian Zhang and Yue Bai and Huan Wang and Yizhou Wang and Yun Fu},\nyear={2023},\nurl={https://openreview.net/forum?id=-0tPmzgXS5}\n}", "github": "", "project": "", "reviewers": "6srd;Xyj5;Xnqd", "site": "https://openreview.net/forum?id=-0tPmzgXS5", "pdf_size": 1094275, "recommendation": "5;6;6", "confidence": "4;4;4", "correctness": "3;3;2", "technical_novelty": "2;3;3", "empirical_novelty": "3;3;2", "wc_summary_paper": "82;63;59", "wc_strength_and_weaknesses": "343;205;480", "wc_clarity_quality_novelty_and_reproducibility": "51;95;171", "wc_summary_review": "57;55;64", "wc_review": "533;418;774", "wc_reply_reviewers": "521;27;338", "wc_reply_authors": "3577;991;2537", "reply_reviewers": "2;1;1", "reply_authors": "8;2;4", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 68.0, 10.03327796219494 ], "wc_strength_and_weaknesses_avg": [ 342.6666666666667, 112.26852730049602 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 105.66666666666667, 49.56701412117628 ], "wc_summary_review_avg": [ 58.666666666666664, 3.8586123009300755 ], "wc_review_avg": [ 575.0, 148.33970023788868 ], "wc_reply_reviewers_avg": [ 295.3333333333333, 203.91882916711955 ], "wc_reply_authors_avg": [ 2368.3333333333335, 1062.4453972897725 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 4.666666666666667, 2.494438257849294 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2YqUO7arjXsJ:scholar.google.com/&scioq=Probing+into+Overfitting+for+Video+Recognition&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "-1k-zfgHFWQ", "title": "Improving Molecular Pretraining with Complementary Featurizations", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Molecular pretraining, which learns molecular representations over massive unlabeled data, has become a prominent paradigm to solve a variety of tasks in computational chemistry and drug discovery. Recently, prosperous progress has been made in molecular pretraining with different molecular featurizations, including 1D SMILES strings, 2D graphs, and 3D geometries. However, the role of molecular featurizations with their corresponding neural architectures in molecular pretraining remains largely unexamined. In this paper, through two case studies\u2014chirality classification and aromatic ring counting\u2014we first demonstrate that different featurization techniques convey chemical information differently. In light of this observation, we propose a simple and effective MOlecular pretraining framework with COmplementary featurizations (MOCO). MOCO comprehensively leverages multiple featurizations that complement each other and outperforms existing state-of-the-art models that solely relies on one or two featurizations on a wide range of molecular property prediction tasks.", "keywords": "molecular pretraining;featurizations;contrastive learning", "primary_area": "", "supplementary_material": "/attachment/4b332520d3d8b116f389241fd6f1f44e9ab1ec93.zip", "author": "Yanqiao Zhu;Dingshuo Chen;Yuanqi Du;Yingze Wang;Qiang Liu;Shu Wu", "authorids": "~Yanqiao_Zhu1;~Dingshuo_Chen1;~Yuanqi_Du1;~Yingze_Wang2;~Qiang_Liu8;~Shu_Wu1", "gender": "M;M;M;M;M;M", "homepage": "https://sxkdz.github.io;;https://yuanqidu.github.io/;http://www.pku.edu.cn;https://john-qiangliu.tech/;http://www.shuwu.name", "dblp": "67/8383-1;289/7535;266/2837;;61/3234-6;06/3577", "google_scholar": "NBbJT3AAAAAJ;jvrhEfIAAAAJ;fAc_zZMAAAAJ;;https://scholar.google.co.jp/citations?user=D-lKLcMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-2205-5304;;;;0000-0002-9233-3827;0000-0003-2164-3577", "linkedin": ";;;;;", "or_profile": "~Yanqiao_Zhu1;~Dingshuo_Chen1;~Yuanqi_Du1;~Yingze_Wang2;~Qiang_Liu8;~Shu_Wu1", "aff": "University of California, Los Angeles;Institute of automation, Chinese Academy of Sciences;Cornell University;;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ucla.edu;ia.ac.cn;cornell.edu;;nlpr.ia.ac.cn;ia.ac.cn", "position": "PhD student;PhD student;PhD student;;Associate Professor;Associate Professor", "bibtex": "@misc{\nzhu2023improving,\ntitle={Improving Molecular Pretraining with Complementary Featurizations},\nauthor={Yanqiao Zhu and Dingshuo Chen and Yuanqi Du and Yingze Wang and Qiang Liu and Shu Wu},\nyear={2023},\nurl={https://openreview.net/forum?id=-1k-zfgHFWQ}\n}", "github": "", "project": "", "reviewers": "iiJx;SCSw;gws2;c1Xv", "site": "https://openreview.net/forum?id=-1k-zfgHFWQ", "pdf_size": 1007855, "recommendation": "3;3;6;6", "confidence": "4;3;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;1;2;2", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "55;55;23;83", "wc_strength_and_weaknesses": "213;209;219;346", "wc_clarity_quality_novelty_and_reproducibility": "27;31;47;71", "wc_summary_review": "33;22;28;78", "wc_review": "328;317;317;578", "wc_reply_reviewers": "147;88;0;0", "wc_reply_authors": "701;604;447;1056", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;2;3", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 54.0, 21.236760581595302 ], "wc_strength_and_weaknesses_avg": [ 246.75, 57.41243332240848 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.0, 17.291616465790582 ], "wc_summary_review_avg": [ 40.25, 22.1401784093986 ], "wc_review_avg": [ 385.0, 111.51905666745931 ], "wc_reply_reviewers_avg": [ 58.75, 62.34330356983018 ], "wc_reply_authors_avg": [ 702.0, 223.57660879439067 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5773502691896258, "corr_recommendation_correctness": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14530046873805816970&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "University of California, Los Angeles;Chinese Academy of Sciences;Cornell University", "aff_unique_dep": ";Institute of Automation;", "aff_unique_url": "https://www.ucla.edu;http://www.ia.cas.cn;https://www.cornell.edu", "aff_unique_abbr": "UCLA;CAS;Cornell", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;1;0;1;1", "aff_country_unique": "United States;China" }, { "id": "-1vpxBUtP0B", "title": "TransformMix: Learning Transformation and Mixing Strategies for Sample-mixing Data Augmentation", "track": "main", "status": "Withdraw", "tldr": "We propose an automated approach, TransformMix, to learn better transformation and mixing augmentation strategies from data", "abstract": "Data augmentation improves the generalization power of deep learning models by synthesizing more training samples. Sample-mixing is a popular data augmentation approach that creates additional training samples by combining existing images. Recent sample-mixing methods, like Mixup and Cutmix, adopt simple mixing operations to blend multiple input images. Although such a heuristic approach shows certain performance gains in some computer vision tasks, it mixes the images blindly and does not adapt to different datasets automatically. A mixing strategy that is effective for a particular dataset does not often generalize well to other datasets. If not properly configured, the methods may create misleading mixed images, which jeopardize the effectiveness of sample-mixing augmentations. In this work, we propose an automated approach, TransformMix, to learn better transformation and mixing augmentation strategies from data. In particular, TransformMix applies learned transformations and mixing masks to create compelling mixed images that contain correct and important information for the target tasks. We demonstrate the effectiveness of TransformMix in multiple datasets under the direct and transfer settings. Experimental results show that our method achieves better top-1 and top-5 accuracy as well as efficiency when compared with strong sample-mixing baselines.", "keywords": "Data Augmentation;Automated Data Augmentation;Sample-mixing;Computer Vision", "primary_area": "", "supplementary_material": "/attachment/7d0712a80bcc6c7d8fadc4c60ab4a1e29cef778c.zip", "author": "Tsz-Him Cheung;Dit-Yan Yeung", "authorids": "~Tsz-Him_Cheung1;~Dit-Yan_Yeung2", "gender": "M;M", "homepage": ";https://cse.hkust.edu.hk/faculty/dyyeung/", "dblp": "295/5321;41/5668", "google_scholar": ";nEsOOx8AAAAJ", "orcid": "0000-0002-3600-2927;0000-0003-3716-8125", "linkedin": ";", "or_profile": "~Tsz-Him_Cheung1;~Dit-Yan_Yeung2", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;ust.hk", "position": "PhD student;Chair Professor", "bibtex": "@misc{\ncheung2023transformmix,\ntitle={TransformMix: Learning Transformation and Mixing Strategies for Sample-mixing Data Augmentation},\nauthor={Tsz-Him Cheung and Dit-Yan Yeung},\nyear={2023},\nurl={https://openreview.net/forum?id=-1vpxBUtP0B}\n}", "github": "", "project": "", "reviewers": "cZc9;WFPK;yb6r;a18R", "site": "https://openreview.net/forum?id=-1vpxBUtP0B", "pdf_size": 2587840, "recommendation": "3;5;5;5", "confidence": "4;4;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;3;2;2", "empirical_novelty": "0;2;1;2", "wc_summary_paper": "61;34;64;12", "wc_strength_and_weaknesses": "393;186;248;59", "wc_clarity_quality_novelty_and_reproducibility": "68;61;25;12", "wc_summary_review": "66;55;38;2", "wc_review": "588;336;375;85", "wc_reply_reviewers": "0;0;405;0", "wc_reply_authors": "1230;290;665;711", "reply_reviewers": "0;0;1;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 42.75, 21.25294097295713 ], "wc_strength_and_weaknesses_avg": [ 221.5, 120.1883937824281 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.5, 23.58495283014151 ], "wc_summary_review_avg": [ 40.25, 24.23195204683271 ], "wc_review_avg": [ 346.0, 178.62390657467998 ], "wc_reply_reviewers_avg": [ 101.25, 175.37014426634883 ], "wc_reply_authors_avg": [ 724.0, 334.6796976214721 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bGWGDWI8qL8J:scholar.google.com/&scioq=TransformMix:+Learning+Transformation+and+Mixing+Strategies+for+Sample-mixing+Data+Augmentation&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "-1x2-lp1eZf", "title": "Rethinking Deep Spiking Neural Networks: A Multi-Layer Perceptron Approach", "track": "main", "status": "Reject", "tldr": "A multi-layer perceptron approach for deep spiking neural network, achieving state-of-the-art results on ImageNet.", "abstract": "By adopting deep convolution architectures, spiking neural networks (SNNs) have recently achieved competitive performances with their artificial counterparts in image classification, meanwhile with much lower computation cost due to event-driven and sparse activation. However, the multiplication-free inference (MFI) principle makes SNNs incompatible with attention or transformer mechanisms which have shown significant performance gains on high resolution vision tasks. Inspired from recent works on multi-layer perceptrons (MLPs), we explore an efficient spiking MLP design using batch normalization instead of layer normalization in both the token and the channel block to be compatible with MFI. We further strengthen the network\u2019s local feature learning ability with a spiking patch encoding layer, which significantly improves the network performance. Based on these building blocks, we explore an optimal skip connection configuration and develop an efficient multi-stage spiking MLP network combining global receptive field and local feature extraction, achieving full spike-based computation. Without pre-training or other advanced SNN training techniques, the spiking MLP network achieves 66.39% top-1 accuracy on the ImageNet-1K dataset, surpassing the state-of-the-art directly trained spiking ResNet-34 by 2.67% under similar model capacity meanwhile with shorter simulation steps and much less computation cost. Another larger variant of the network achieves 68.84% top-1 accuracy, rivaling the spiking VGG-16 network with 4 times smaller model capacity. Our work demonstrates the effectiveness of an alternative deep SNN architecture combining both global and local learning abilities. More interestingly, finally we show a close resemblance of the trained receptive field of our network to cells in the cortex. Code will be publicly available.", "keywords": "spiking neural network;multi-layer perceptron;image classification", "primary_area": "", "supplementary_material": "", "author": "Luziwei Leng;Boyan Li;Ran Cheng;Shuaijie Shen;Kaixuan Zhang;Jianguo Zhang;Jianxing Liao", "authorids": "~Luziwei_Leng1;~Boyan_Li2;~Ran_Cheng1;~Shuaijie_Shen1;~Kaixuan_Zhang3;~Jianguo_Zhang2;liaojianxing@huawei.com", "gender": ";M;M;M;M;M;", "homepage": ";https://github.com/BugMaker-Boyan;https://chengran.tech/;https://github.com/shenshuaijie;https://github.com/zkx-sust;https://scholar.google.com/citations?hl=en&user=ypSmZtIAAAAJ&view_op=list_works;", "dblp": ";;;;;90/6415-1;", "google_scholar": ";https://scholar.google.cz/citations?user=RZ2oElwAAAAJ;bjeIdlcAAAAJ;;;https://scholar.google.com/citations?hl=en;", "orcid": ";0009-0009-8391-4687;0000-0001-9410-8263;0009-0006-7563-2178;;;", "linkedin": ";;;;;;", "or_profile": "~Luziwei_Leng1;~Boyan_Li2;~Ran_Cheng1;~Shuaijie_Shen1;~Kaixuan_Zhang3;~Jianguo_Zhang2;liaojianxing@huawei.com", "aff": ";South University of Science and Technology of China;Southern Unviersity of Science and Technology;Southern University of Science and Technology;Southern University of Science and Technology;Southern University for Science and Technology;", "aff_domain": ";sustc.edu.cn;sustech.edu.cn;sustech.edu.cn;sustc.edu.cn;sustech.edu;", "position": ";Undergrad student;Asociate Professor;MS student;MS student;Full Professor;", "bibtex": "@misc{\nleng2023rethinking,\ntitle={Rethinking Deep Spiking Neural Networks: A Multi-Layer Perceptron Approach},\nauthor={Luziwei Leng and Boyan Li and Ran Cheng and Shuaijie Shen and Kaixuan Zhang and Jianguo Zhang and Jianxing Liao},\nyear={2023},\nurl={https://openreview.net/forum?id=-1x2-lp1eZf}\n}", "github": "", "project": "", "reviewers": "saxA;2Yab;86ui;BQAF;8C6v", "site": "https://openreview.net/forum?id=-1x2-lp1eZf", "pdf_size": 781148, "recommendation": "3;3;3;3;6", "confidence": "4;4;4;5;4", "correctness": "3;3;2;3;4", "technical_novelty": "2;3;2;2;3", "empirical_novelty": "2;3;2;2;3", "wc_summary_paper": "13;55;62;30;51", "wc_strength_and_weaknesses": "279;75;530;489;46", "wc_clarity_quality_novelty_and_reproducibility": "16;77;19;70;47", "wc_summary_review": "16;72;22;25;10", "wc_review": "324;279;633;614;154", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 3.6, 1.2 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 42.2, 18.082035283673132 ], "wc_strength_and_weaknesses_avg": [ 283.8, 201.44021445580324 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.8, 25.166644591601795 ], "wc_summary_review_avg": [ 29.0, 22.108821768696767 ], "wc_review_avg": [ 400.8, 190.2707544527009 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.25000000000000006, "corr_recommendation_correctness": 0.7905694150420948, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qd6SeeJCiJMJ:scholar.google.com/&scioq=Rethinking+Deep+Spiking+Neural+Networks:+A+Multi-Layer+Perceptron+Approach&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "South University of Science and Technology of China;Southern University of Science and Technology;Southern University for Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sustech.edu.cn;https://www.sustech.edu.cn;https://www.sustech.edu.cn", "aff_unique_abbr": "SUSTech;SUSTech;SUSTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "BEVDistill: Cross-Modal BEV Distillation for Multi-View 3D Object Detection", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12222", "id": "-2zfgNS917", "poster": "", "openreview": "https://openreview.net/forum?id=-2zfgNS917", "slides": "https://iclr.cc/virtual/2023/poster/12222", "video": "https://iclr.cc/virtual/2023/poster/12222", "author_site": "Zehui Chen, Zhenyu Li, Shiquan Zhang, Liangji Fang, Qinhong Jiang, Feng Zhao", "tldr": "We leverage LiDAR-based knowledge into multi-view 3d detectors with cross-modal BEV distillation.", "abstract": "3D object detection from multiple image views is a fundamental and challenging task for visual scene understanding. Owing to its low cost and high efficiency, multi-view 3D object detection has demonstrated promising application prospects. However, accurately detecting objects through perspective views is extremely difficult due to the lack of depth information. Current approaches tend to adopt heavy backbones for image encoders, making them inapplicable for real-world deployment. Different from the images, LiDAR points are superior in providing spatial cues, resulting in highly precise localization. In this paper, we explore the incorporation of LiDAR-based detectors for multi-view 3D object detection. Instead of directly training a depth prediction network, we unify the image and LiDAR features in the Bird-Eye-View (BEV) space and adaptively transfer knowledge across non-homogenous representations in a teacher-student paradigm. To this end, we propose BEVDistill, a cross-modal BEV knowledge distillation (KD) framework for multi-view 3D object detection. \nExtensive experiments demonstrate that the proposed method outperforms current KD approaches on a highly-competitive baseline, BEVFormer, without introducing any extra cost in the inference phase. Notably, our best model achieves 59.4 NDS on the nuScenes test leaderboard, achieving new state-of-the-arts in comparison with various image-based detectors. Code will be available at https://github.com/zehuichen123/BEVDistill.", "keywords": "object detection;3d detection;BEV perception", "primary_area": "", "supplementary_material": "/attachment/6984b6929e9f6a09ca297f6c6540e52ce11c4c97.zip", "author": "Zehui Chen;Zhenyu Li;Shiquan Zhang;Liangji Fang;Qinhong Jiang;Feng Zhao", "authorids": "~Zehui_Chen1;~Zhenyu_Li3;~Shiquan_Zhang2;~Liangji_Fang1;~Qinhong_Jiang1;~Feng_Zhao6", "gender": "M;M;M;M;M;M", "homepage": "https://lovesnowbest.site;https://zhyever.github.io/;;;https://jiangqinhong.top;https://bivlab123.github.io/", "dblp": ";;;;;181/2734-4", "google_scholar": "NfSsLncAAAAJ;https://scholar.google.com/citations?view_op=list_works;v7wggU8AAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;;https://scholar.google.co.uk/citations?hl=en", "orcid": "0000-0002-1843-4478;;;;;0000-0001-6767-8105", "linkedin": ";;;;;", "or_profile": "~Zehui_Chen1;~Zhenyu_Li3;~Shiquan_Zhang2;~Liangji_Fang1;~Qinhong_Jiang1;~Feng_Zhao6", "aff": "University of Science and Technology of China;Harbin Institute of Technology;senseauto;;Zhejiang University, Tsinghua University;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;hit.edu.cn;senseauto.com;;zju.edu.cn;ustc.edu.cn", "position": "PhD student;MS student;Researcher;;MS student;Full Professor", "bibtex": "@inproceedings{\nchen2023bevdistill,\ntitle={{BEVD}istill: Cross-Modal {BEV} Distillation for Multi-View 3D Object Detection},\nauthor={Zehui Chen and Zhenyu Li and Shiquan Zhang and Liangji Fang and Qinhong Jiang and Feng Zhao},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-2zfgNS917}\n}", "github": "", "project": "", "reviewers": "diMS;BsUP;2Rwe;zDXj", "pdf_size": 8240758, "recommendation": "6;6;6;8", "confidence": "3;4;4;3", "correctness": "4;3;3;4", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;0;3;3", "wc_summary_paper": "42;179;81;122", "wc_strength_and_weaknesses": "250;183;122;308", "wc_clarity_quality_novelty_and_reproducibility": "15;38;21;122", "wc_summary_review": "28;66;96;102", "wc_review": "335;466;320;654", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "477;235;290;358", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 106.0, 50.75923561284193 ], "wc_strength_and_weaknesses_avg": [ 215.75, 69.9012696594275 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.0, 42.98255460067491 ], "wc_summary_review_avg": [ 73.0, 29.34280150224242 ], "wc_review_avg": [ 443.75, 134.01562408913372 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 340.0, 90.30227018187306 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 84, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6242373118546274657&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=-2zfgNS917", "email": "ustc.edu.cn;hit.edu.cn;senseauto.com;;zju.edu.cn;ustc.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of Science and Technology of China;Harbin Institute of Technology;senseauto;Zhejiang University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.ustc.edu.cn;http://www.hit.edu.cn/;;http://www.zju.edu.cn", "aff_unique_abbr": "USTC;HIT;;ZJU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "id": "-3br92QL76O", "title": "Neural Integral Equations", "track": "main", "status": "Withdraw", "tldr": "Neural Integral Equations are a novel method that allows to model non-local dynamics with complex spatio-temporal relations through neural networks", "abstract": "Integral equations (IEs) are functional equations defined through integral operators, where the unknown function is integrated over a possibly multidimensional space. Important applications of IEs have been found throughout theoretical and applied sciences, including in physics, chemistry, biology, and engineering; often in the form of inverse problems. IEs are especially useful since differential equations, e.g. ordinary differential equations (ODEs), and partial differential equations (PDEs) can be formulated in an integral version which is often more convenient to solve. Moreover, unlike ODEs and PDEs, IEs can model inherently non-local dynamical systems, such as ones with long distance spatio-temporal relations. While efficient algorithms exist for solving given IEs, no method exists that can learn an integral equation and its associated dynamics from data alone. In this article, we introduce Neural Integral Equations (NIE), a method that learns an unknown integral operator from data through a solver. We also introduce an attentional version of NIE, called Attentional Neural Integral Equations (ANIE), where the integral is replaced by self-attention, which improves scalability and provides interpretability. We show that learning dynamics via integral equations is faster than doing so via other continuous methods, such as Neural ODEs. Finally, we show that ANIE outperforms other methods on several benchmark tasks in ODE, PDE, and IE systems of synthetic and real-world data.", "keywords": "integral equations;dynamical systems;non-local equations;self-attention;brain dynamics", "primary_area": "", "supplementary_material": "/attachment/4cc4e3a51c9394c0fb23b4574a8a6110080f0043.zip", "author": "Emanuele Zappala;Antonio Henrique de Oliveira Fonseca;Josue Ortega Caro;Andrew Henry Moberly;Michael James Higley;Jessica Cardin;David van Dijk", "authorids": "~Emanuele_Zappala1;~Antonio_Henrique_de_Oliveira_Fonseca1;~Josue_Ortega_Caro1;~Andrew_Henry_Moberly1;~Michael_James_Higley1;~Jessica_Cardin1;~David_van_Dijk1", "gender": "M;M;;M;;M;M", "homepage": "https://eazappala.com/;https://ahof1704.github.io/;;http://higleylab.org;https://cardinlab.org;http://www.vandijklab.org;https://josueortc.github.io/", "dblp": ";;;;;136/9930;", "google_scholar": "J4OOzEwAAAAJ;;fMFNFOoAAAAJ;;BV5qSLAAAAAJ;fjjZr6UAAAAJ;SctA3g8AAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Emanuele_Zappala1;~Antonio_Henrique_de_Oliveira_Fonseca1;~Andrew_Henry_Moberly1;~Michael_James_Higley1;~Jessica_Cardin1;~David_van_Dijk1;~Josue_A_Ortega1", "aff": "Idaho State University;Yale University;Yale University;Yale University;Yale University;Yale University;Yale University", "aff_domain": "isu.edu;yale.edu;yale.edu;yale.edu;yale.edu;yale.edu;yale.edu", "position": "Assistant Professor;PhD student;Postdoc;Associate Professor;Associate Professor;Assistant Professor;Postdoc", "bibtex": "@misc{\nzappala2023neural,\ntitle={Neural Integral Equations},\nauthor={Emanuele Zappala and Antonio Henrique de Oliveira Fonseca and Josue Ortega Caro and Andrew Henry Moberly and Michael James Higley and Jessica Cardin and David van Dijk},\nyear={2023},\nurl={https://openreview.net/forum?id=-3br92QL76O}\n}", "github": "", "project": "", "reviewers": "Jt6d;4o9i;RirC", "site": "https://openreview.net/forum?id=-3br92QL76O", "pdf_size": 2732610, "recommendation": "3;3;5", "confidence": "4;5;4", "correctness": "3;2;3", "technical_novelty": "2;3;2", "empirical_novelty": "2;3;2", "wc_summary_paper": "16;28;62", "wc_strength_and_weaknesses": "123;1079;410", "wc_clarity_quality_novelty_and_reproducibility": "114;14;111", "wc_summary_review": "12;73;43", "wc_review": "265;1194;626", "wc_reply_reviewers": "211;1221;101", "wc_reply_authors": "773;3480;809", "reply_reviewers": "1;15;1", "reply_authors": "2;17;1", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 35.333333333333336, 19.48218559493661 ], "wc_strength_and_weaknesses_avg": [ 537.3333333333334, 400.53658454073275 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 79.66666666666667, 46.44949467492144 ], "wc_summary_review_avg": [ 42.666666666666664, 24.9042611258038 ], "wc_review_avg": [ 695.0, 382.38810999646245 ], "wc_reply_reviewers_avg": [ 511.0, 504.05026204404123 ], "wc_reply_authors_avg": [ 1687.3333333333333, 1267.6919534685424 ], "reply_reviewers_avg": [ 5.666666666666667, 6.599663291074443 ], "reply_authors_avg": [ 6.666666666666667, 7.318166133366716 ], "replies_avg": [ 41, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.5, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;1;1;1;1;1;1", "aff_unique_norm": "Idaho State University;Yale University", "aff_unique_dep": ";", "aff_unique_url": "https://www.isu.edu;https://www.yale.edu", "aff_unique_abbr": "ISU;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "-4DiyBMgv9m", "title": "Identifying Phase Transition Thresholds of Permuted Linear Regression via Message Passing", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper considers the permuted linear regression, i.e., ${\\mathbf{Y}} = {\\mathbf{\\Pi}}^{\\natural}{\\mathbf{X}}{\\mathbf{B}}^{\\natural} + {\\mathbf{W}}$, where ${\\mathbf{Y}} \\in \\mathbb{R}^{n\\times m}, {\\mathbf{\\Pi}}^{\\natural}\\in\\mathbb{R}^{n\\times n}, {\\mathbf{X}} \\in \\mathbb{R}^{n\\times p}, {\\mathbf{B}}^{\\natural}\\in \\mathbb{R}^{p\\times m}$, and ${\\mathbf{W}}\\in \\mathbb{R}^{n\\times m}$ represent the observations, missing (or incomplete) information about ordering, sensing matrix, signal of interests, and additive sensing noise, respectively. As is shown in the previous work, there exists phase transition phenomena in terms of the \\emph{signal-to-noise ratio} ($\\mathsf{snr}$), number of permuted rows, etc. While all existing works only concern the convergence rates without specifying the associate constants in front of them, we give a precise identification of the phase transition thresholds via the message passing algorithm. Depending on whether the signal ${\\mathbf{B}}^{\\natural}$ is known or not, we separately identify the corresponding critical points around the phase transition regimes. Moreover, we provide numerical experiments and show the empirical phase transition points are well aligned with theoretical predictions.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/3ab6ea4cc5937634944587be88911ea3abfbf8b0.zip", "author": "Hang Zhang;Ping Li", "authorids": "~Hang_Zhang8;~Ping_Li3", "gender": "M;M", "homepage": ";http://www.stat.rutgers.edu/home/pingli/", "dblp": ";62/5860-1", "google_scholar": ";", "orcid": "0000-0003-2774-1792;", "linkedin": ";", "or_profile": "~Hang_Zhang8;~Ping_Li3", "aff": "Baidu;LinkedIn", "aff_domain": "baidu.com;linkedin.com", "position": "Researcher;Engineer", "bibtex": "@misc{\nzhang2023identifying,\ntitle={Identifying Phase Transition Thresholds of Permuted Linear Regression via Message Passing},\nauthor={Hang Zhang and Ping Li},\nyear={2023},\nurl={https://openreview.net/forum?id=-4DiyBMgv9m}\n}", "github": "", "project": "", "reviewers": "eTFJ;vjWT;tsTr;PjJn;K1xv", "site": "https://openreview.net/forum?id=-4DiyBMgv9m", "pdf_size": 785727, "recommendation": "1;3;3;6;6", "confidence": "4;3;3;3;2", "correctness": "1;2;3;3;4", "technical_novelty": "3;2;3;3;3", "empirical_novelty": "0;2;3;0;0", "wc_summary_paper": "67;85;119;132;132", "wc_strength_and_weaknesses": "178;466;120;168;49", "wc_clarity_quality_novelty_and_reproducibility": "780;76;151;504;52", "wc_summary_review": "89;21;2099;45;57", "wc_review": "1114;648;2489;849;290", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "418;265;316;40;92", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 3.8, 1.9390719429665315 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "correctness_avg": [ 2.6, 1.019803902718557 ], "technical_novelty_avg": [ 2.8, 0.39999999999999997 ], "empirical_novelty_avg": [ 1.0, 1.2649110640673518 ], "wc_summary_paper_avg": [ 107.0, 26.374229846575616 ], "wc_strength_and_weaknesses_avg": [ 196.2, 142.4028089610595 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 312.6, 284.64124788933873 ], "wc_summary_review_avg": [ 462.2, 818.6932026101108 ], "wc_review_avg": [ 1078.0, 755.0499321236974 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 226.2, 140.73862298601617 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.8154100913168026, "corr_recommendation_correctness": 0.8697968986174398, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RN1pAWykaxYJ:scholar.google.com/&scioq=Identifying+Phase+Transition+Thresholds+of+Permuted+Linear+Regression+via+Message+Passing&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Baidu;LinkedIn Corporation", "aff_unique_dep": "Baidu, Inc.;", "aff_unique_url": "https://www.baidu.com;https://www.linkedin.com", "aff_unique_abbr": "Baidu;LinkedIn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "id": "-4HJSA3Y2vg", "title": "MolBART: Generative Masked Language Models for Molecular Representations", "track": "main", "status": "Withdraw", "tldr": "We develop self-supervised representations of molecules using generative masked language models that set state-of-the-art for many chemical property and reaction prediction tasks and implicitly learn features and substructures important in chemistry", "abstract": "We discover a robust self-supervised strategy tailored towards molecular representations for generative masked language models through a series of tailored, in-depth ablations. Using this pre-training strategy, we train MolBART, a BART-like model with an order of magnitude more compute than previous self-supervised molecular representations. In-depth evaluations show that MolBART consistently outperforms other self-supervised representations across classification, regression, and generation tasks setting a new state-of-the-art on 10 tasks. We then quantitatively show that when applied to the molecular domain, the BART objective learns representations that implicitly encode our downstream tasks of interest. For example, by selecting seven neurons from a frozen MolBART, we can obtain a model having performance within two percentage points of the full fine-tuned model on task Clintox. Lastly, we show that standard attribution interpretability methods, when applied to MolBART, highlight certain substructures that chemists use to explain specific properties of molecules.", "keywords": "representation learning;machine learning for chemistry;self-supervised learning;molecular representations", "primary_area": "", "supplementary_material": "", "author": "Gayane Chilingaryan;Hovhannes Tamoyan;Ani Tevosyan;Nelly Babayan;Lusine Khondkaryan;Karen Hambardzumyan;Zaven Navoyan;Hrant Khachatrian;Armen Aghajanyan", "authorids": "~Gayane_Chilingaryan1;~Hovhannes_Tamoyan1;ani.tevosyan@yerevann.com;nbabayan@toxometris.ai;lkhondkaryan@toxometris.ai;~Karen_Hambardzumyan1;znavoyan@toxometris.ai;~Hrant_Khachatrian1;~Armen_Aghajanyan1", "gender": "F;;;;;M;;;", "homepage": ";;;;;https://mahnerak.com;;;", "dblp": ";;;;;215/3332;;20/10360;", "google_scholar": ";;;;;V3JjNJ0AAAAJ;;CxOrE_kAAAAJ;", "orcid": ";;;;;0000-0001-8764-9598;;0000-0002-1544-5649;", "linkedin": "gayane-chilingar/;;;;;mahnerak/;;;", "or_profile": "~Gayane_Chilingaryan1;~Hovhannes_Tamoyan1;ani.tevosyan@yerevann.com;nbabayan@toxometris.ai;lkhondkaryan@toxometris.ai;~Karen_Hambardzumyan1;znavoyan@toxometris.ai;~Hrant_Khachatrian1;~Armen_Aghajanyan1", "aff": ";;;;;YerevaNN;;YerevaNN;", "aff_domain": ";;;;;yerevann.com;;yerevann.com;", "position": ";;;;;Researcher;;Researcher;", "bibtex": "@misc{\nchilingaryan2023molbart,\ntitle={Mol{BART}: Generative Masked Language Models for Molecular Representations},\nauthor={Gayane Chilingaryan and Hovhannes Tamoyan and Ani Tevosyan and Nelly Babayan and Lusine Khondkaryan and Karen Hambardzumyan and Zaven Navoyan and Hrant Khachatrian and Armen Aghajanyan},\nyear={2023},\nurl={https://openreview.net/forum?id=-4HJSA3Y2vg}\n}", "github": "", "project": "", "reviewers": "koXV;A9F3;Gu6S;srsc", "site": "https://openreview.net/forum?id=-4HJSA3Y2vg", "pdf_size": 9067475, "recommendation": "3;3;5;5", "confidence": "4;5;2;4", "correctness": "2;2;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "3;2;3;2", "wc_summary_paper": "143;56;28;103", "wc_strength_and_weaknesses": "554;195;463;32", "wc_clarity_quality_novelty_and_reproducibility": "54;2;16;55", "wc_summary_review": "44;36;5;36", "wc_review": "795;289;512;226", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 82.5, 44.02556075736003 ], "wc_strength_and_weaknesses_avg": [ 311.0, 208.236644229588 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.75, 23.284920012746447 ], "wc_summary_review_avg": [ 30.25, 14.939461168328663 ], "wc_review_avg": [ 455.5, 222.96019824174897 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.6882472016116854, "corr_recommendation_correctness": 1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5094067226858674311&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "YerevaNN", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Armenia" }, { "id": "-4Maz7s3YXz", "title": "Towards Understanding Robust Memorization in Adversarial Training", "track": "main", "status": "Reject", "tldr": "We provide a theoretical understanding of adversarial training by proposing a novel implicit bias called robust memorization.", "abstract": "Adversarial training is a standard method to train neural networks to be robust to adversarial perturbation. However, in contrast with benign overfitting in the standard deep learning setting, which means that over-parameterized neural networks surprisingly generalize well for unseen data, while adversarial training method is able to achieve low robust training error, there still exists a significant robust generalization gap, which promotes us exploring what mechanism leads to robust overfitting during learning process. In this paper, we propose an implicit bias called $\\textit{robust memorization}$ in adversarial training under the realistic data assumption. By function approximation theory, we prove that ReLU nets with efficient size have the ability to achieve robust memorization, while robust generalization requires exponentially large models. Then, we demonstrate robust memorization in adversarial training from both empirical and theoretical perspectives. In particular, we empirically investigate the dynamics of loss landscape over input, and we also provide theoretical analysis of robust memorization on data with linear separable assumption. Finally, we prove novel generalization bounds based on robust memorization, which further explains why deep neural networks have both high clean test accuracy and robust overfitting at the same time.", "keywords": "adversarial robustness;adversarial training;robust generalization gap;robust overfitting;deep learning theory", "primary_area": "", "supplementary_material": "/attachment/e58c0ad28aca874055100c0dc68d4dd135566237.zip", "author": "Binghui Li;Yuanzhi Li", "authorids": "~Binghui_Li1;~Yuanzhi_Li1", "gender": "Not Specified;M", "homepage": "https://libinghui0000.github.io/;", "dblp": "244/9096.html;73/3628", "google_scholar": "U6BRIM4AAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Binghui_Li1;~Yuanzhi_Li1", "aff": "Peking University;Carnegie Mellon University", "aff_domain": "pku.edu.cn;andrew.cmu.edu", "position": "Undergrad student;Assistant Professor", "bibtex": "@misc{\nli2023towards,\ntitle={Towards Understanding Robust Memorization in Adversarial Training},\nauthor={Binghui Li and Yuanzhi Li},\nyear={2023},\nurl={https://openreview.net/forum?id=-4Maz7s3YXz}\n}", "github": "", "project": "", "reviewers": "z1zg;W3fx;59uS", "site": "https://openreview.net/forum?id=-4Maz7s3YXz", "pdf_size": 273191, "recommendation": "3;3;5", "confidence": "4;4;4", "correctness": "3;4;3", "technical_novelty": "3;2;3", "empirical_novelty": "0;0;2", "wc_summary_paper": "21;46;62", "wc_strength_and_weaknesses": "449;182;88", "wc_clarity_quality_novelty_and_reproducibility": "83;37;14", "wc_summary_review": "25;50;205", "wc_review": "578;315;369", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "36;39;526", "reply_reviewers": "0;0;0", "reply_authors": "2;2;2", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 0.6666666666666666, 0.9428090415820634 ], "wc_summary_paper_avg": [ 43.0, 16.87206764645835 ], "wc_strength_and_weaknesses_avg": [ 239.66666666666666, 152.91464576321292 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.666666666666664, 28.686039965266886 ], "wc_summary_review_avg": [ 93.33333333333333, 79.61713941664124 ], "wc_review_avg": [ 420.6666666666667, 113.41467669084496 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 200.33333333333334, 230.28436527234948 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:m4yjcHInKNIJ:scholar.google.com/&scioq=Towards+Understanding+Robust+Memorization+in+Adversarial+Training&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Peking University;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.cmu.edu", "aff_unique_abbr": "Peking U;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "id": "-4Z25gkP7Oi", "title": "Counterfactual Contrastive Learning for Robust Text Classification", "track": "main", "status": "Reject", "tldr": "", "abstract": "Text classification has recently been promoted by large pre-trained language models (PLMs) which aim to identify target classes with knowledge transferred from sets of reading comprehension tasks. However, derivative models of PLMs still suffer from sensitive performance on different datasets, the reasons are multiple such as cross-domain and label imbalance problems, from which most models may learn the spurious correlation between texts and labels. Existing research requires people to manually add counterfactual samples to the dataset or automatically match so-called counterfactual pairs that are already in the dataset for augmentation. In this paper, we propose a novel LDA-based counterfactual contrastive learning framework and three data augmentation methods, to capture the causal information in texts, which can promote the robustness of text classification. To confirm the effectiveness of our proposed model and methods, we design and conduct several couples of experiments. Experimental results demonstrate that our model works well on five popular text classification datasets on distinct tasks, we find that training with proposed data augmentation outperforms other augmentation methods on many superior models by 1\\% or above. Plus, robustness tests on different datasets also show a competitive performance, which proves the effectiveness of our model and data.", "keywords": "Contrastive Learning;Representation Learning;Structural Causal Model", "primary_area": "", "supplementary_material": "", "author": "Xiaosong Yuan;Renchu Guan;Wanli Zuo;Yijia Zhang", "authorids": "~Xiaosong_Yuan1;guanrenchu@jlu.edu.cn;zuowl@jlu.edu.cn;zhangyj_gfkj@163.com", "gender": "M;;;", "homepage": ";;;", "dblp": "25/2886;;;", "google_scholar": "-Fg_EuEAAAAJ;;;", "orcid": "0000-0001-5748-5174;;;", "linkedin": ";;;", "or_profile": "~Xiaosong_Yuan1;guanrenchu@jlu.edu.cn;zuowl@jlu.edu.cn;zhangyj_gfkj@163.com", "aff": "Jilin University;;;", "aff_domain": "jlu.edu.cn;;;", "position": "PhD student;;;", "bibtex": "@misc{\nyuan2023counterfactual,\ntitle={Counterfactual Contrastive Learning for Robust Text Classification},\nauthor={Xiaosong Yuan and Renchu Guan and Wanli Zuo and Yijia Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=-4Z25gkP7Oi}\n}", "github": "", "project": "", "reviewers": "BjCX;Dfgo;PD4A;WvsR", "site": "https://openreview.net/forum?id=-4Z25gkP7Oi", "pdf_size": 275981, "recommendation": "3;3;5;5", "confidence": "4;5;5;3", "correctness": "2;3;2;3", "technical_novelty": "2;1;3;3", "empirical_novelty": "2;1;3;3", "wc_summary_paper": "76;18;169;82", "wc_strength_and_weaknesses": "489;248;558;195", "wc_clarity_quality_novelty_and_reproducibility": "152;25;61;26", "wc_summary_review": "80;20;51;52", "wc_review": "797;311;839;355", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 86.25, 53.91834103531005 ], "wc_strength_and_weaknesses_avg": [ 372.5, 154.1014276377737 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.0, 51.725235620536324 ], "wc_summary_review_avg": [ 50.75, 21.22940178149163 ], "wc_review_avg": [ 575.5, 243.4517405975977 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.30151134457776363, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:48B1KdW13L8J:scholar.google.com/&scioq=Counterfactual+Contrastive+Learning+for+Robust+Text+Classification&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Jilin University", "aff_unique_dep": "", "aff_unique_url": "http://www.jlu.edu.cn", "aff_unique_abbr": "JLU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "-59_mb1lOf4", "title": "Communication-Efficient and Drift-Robust Federated Learning via Elastic Net", "track": "main", "status": "Reject", "tldr": "", "abstract": "Federated learning (FL) is a distributed method to train a global model over a set of local clients while keeping data localized, which reduces risks of privacy and security. FL framework faces important challenges including expensive communication cost and client drift problem. Leveraging the elastic net, we propose a communication-efficient and drift-robust FL framework to improve the communication efficiency and resolve the client drift problem. We repurpose two types of the elastic net regularizers (i.e., $\\ell_1$ and $\\ell_2$ penalties on the local model updates): (1) the $\\ell_1$-norm regularizer sparsifies the local updates to enhance the communication efficiency and (2) the $\\ell_2$-norm regularizer attempts to resolve the client drift problem by limiting the impact of drifting local updates due to data heterogeneity. Our framework is general; hence, it can be integrated with prior FL techniques, e.g., FedAvg, FedProx, SCAFFOLD, and FedDyn. We show that our framework effectively resolves the communication cost problem and the client drift problem simultaneously.", "keywords": "Federated learning;Data heterogeneity;Optimization", "primary_area": "", "supplementary_material": "", "author": "Seonhyeong Kim;jiheon woo;Daewon Seo;Yongjune Kim", "authorids": "~Seonhyeong_Kim1;~jiheon_woo1;~Daewon_Seo1;~Yongjune_Kim1", "gender": ";M;;", "homepage": ";;https://dae-won-seo.github.io/;https://iil.postech.ac.kr", "dblp": ";;;124/3256", "google_scholar": ";62pWEW0AAAAJ;;WPKrXEoAAAAJ", "orcid": ";;;0000-0003-0120-3750", "linkedin": "http://linkedin.com/in/seonhyeong-kim-81b956213;;;", "or_profile": "~Seonhyeong_Kim1;~jiheon_woo1;~Daewon_Seo1;~Yongjune_Kim1", "aff": "Daegu Gyeongbuk Institute of Science and Technology;Daegu Gyeongbuk Institute of Science and Technology;;POSTECH", "aff_domain": "dgist.ac.kr;dgist.ac.kr;;postech.ac.kr", "position": "MS student;MS student;;Assistant Professor", "bibtex": "@misc{\nkim2023communicationefficient,\ntitle={Communication-Efficient and Drift-Robust Federated Learning via Elastic Net},\nauthor={Seonhyeong Kim and jiheon woo and Daewon Seo and Yongjune Kim},\nyear={2023},\nurl={https://openreview.net/forum?id=-59_mb1lOf4}\n}", "github": "", "project": "", "reviewers": "jTwe;Rp9d;odbQ;9ArF", "site": "https://openreview.net/forum?id=-59_mb1lOf4", "pdf_size": 1271325, "recommendation": "3;3;3;3", "confidence": "4;3;4;4", "correctness": "4;2;2;2", "technical_novelty": "2;1;3;1", "empirical_novelty": "2;1;2;2", "wc_summary_paper": "64;208;112;45", "wc_strength_and_weaknesses": "60;113;438;240", "wc_clarity_quality_novelty_and_reproducibility": "21;10;11;3", "wc_summary_review": "59;34;89;32", "wc_review": "204;365;650;320", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 1.75, 0.82915619758885 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 107.25, 63.08476440472771 ], "wc_strength_and_weaknesses_avg": [ 212.75, 145.5702150166716 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 11.25, 6.417748826496718 ], "wc_summary_review_avg": [ 53.5, 23.092206477510977 ], "wc_review_avg": [ 384.75, 164.02038745229203 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gcYaCeaahB8J:scholar.google.com/&scioq=Communication-Efficient+and+Drift-Robust+Federated+Learning+via+Elastic+Net&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Daegu Gyeongbuk Institute of Science and Technology;Pohang University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.dgist.ac.kr;https://www.postech.ac.kr", "aff_unique_abbr": "DGIST;POSTECH", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Daegu;Pohang", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "NTK-SAP: Improving neural network pruning by aligning training dynamics", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12107", "id": "-5EWhW_4qWP", "poster": "/media/PosterPDFs/ICLR%202023/12107.png?t=1680833898.8708394", "openreview": "https://openreview.net/forum?id=-5EWhW_4qWP", "slides": "https://iclr.cc/virtual/2023/poster/12107", "video": "https://iclr.cc/virtual/2023/poster/12107", "author_site": "Yite Wang, Dawei Li, Ruoyu Sun", "tldr": "We introduce a pruning-at-initialization method by aligning the eigenspectrum of NTK to that of the dense network.", "abstract": "Pruning neural networks before training has received increasing interest due to its potential to reduce training time and memory. One popular method is to prune the connections based on a certain metric, but it is not entirely clear what metric is the best choice. Recent advances in neural tangent kernel (NTK) theory suggest that the training dynamics of large enough neural networks is closely related to the spectrum of the NTK. Motivated by this finding, we propose to prune the connections that have the least influence on the spectrum of the NTK. This method can help maintain the NTK spectrum, which may help align the training dynamics to that of its dense counterpart. However, one possible issue is that the fixed-weight-NTK corresponding to a given initial point can be very different from the NTK corresponding to later iterates during the training phase. We further propose to sample multiple realizations of random weights to estimate the NTK spectrum. Note that our approach is weight-agnostic, which is different from most existing methods that are weight-dependent. In addition, we use random inputs to compute the fixed-weight-NTK, making our method data-agnostic as well. We name our foresight pruning algorithm Neural Tangent Kernel Spectrum-Aware Pruning (NTK-SAP). Empirically, our method achieves better performance than all baselines on multiple datasets.", "keywords": "empirical deep learning;pruning at initialization;neural network pruning", "primary_area": "", "supplementary_material": "", "author": "Yite Wang;Dawei Li;Ruoyu Sun", "authorids": "~Yite_Wang1;~Dawei_Li3;~Ruoyu_Sun1", "gender": "M;M;", "homepage": "https://yitewang.github.io/;;https://ruoyus.github.io/", "dblp": "317/0407;;30/9879-1", "google_scholar": "wEGgxUIAAAAJ;;PsfzbCMAAAAJ", "orcid": ";0000-0003-0374-3101;", "linkedin": "yite-wang-261057140/;;", "or_profile": "~Yite_Wang1;~Dawei_Li3;~Ruoyu_Sun1", "aff": "University of Illinois, Urbana Champaign;University of Chicago;The Chinese University of Hong Kong", "aff_domain": "illinois.edu;uchicago.edu;cuhk.edu.cn", "position": "PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nwang2023ntksap,\ntitle={{NTK}-{SAP}: Improving neural network pruning by aligning training dynamics},\nauthor={Yite Wang and Dawei Li and Ruoyu Sun},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-5EWhW_4qWP}\n}", "github": "", "project": "", "reviewers": "8ZBV;w2XF;B3ZS;2M9f", "pdf_size": 1058764, "recommendation": "6;6;6;6", "confidence": "3;4;3;3", "correctness": "3;2;3;3", "technical_novelty": "3;4;3;3", "empirical_novelty": "0;3;2;3", "wc_summary_paper": "139;222;73;136", "wc_strength_and_weaknesses": "262;224;86;115", "wc_clarity_quality_novelty_and_reproducibility": "21;66;33;49", "wc_summary_review": "67;31;36;90", "wc_review": "489;543;228;390", "wc_reply_reviewers": "0;174;0;0", "wc_reply_authors": "1498;1962;507;884", "reply_reviewers": "0;1;0;0", "reply_authors": "3;3;1;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 142.5, 52.92683629313205 ], "wc_strength_and_weaknesses_avg": [ 171.75, 73.2269588334788 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.25, 16.931848688197046 ], "wc_summary_review_avg": [ 56.0, 23.98958107179031 ], "wc_review_avg": [ 412.5, 119.82174260124913 ], "wc_reply_reviewers_avg": [ 43.5, 75.34421012924616 ], "wc_reply_authors_avg": [ 1212.75, 558.7715879498527 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8796652764319586007&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=-5EWhW_4qWP", "email": "illinois.edu;uchicago.edu;cuhk.edu.cn", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Chicago;Chinese University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://illinois.edu;https://www.uchicago.edu;https://www.cuhk.edu.hk", "aff_unique_abbr": "UIUC;UChicago;CUHK", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Urbana-Champaign;;Hong Kong SAR", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;China" }, { "id": "-5fSvp1ofdd", "title": "Memory of Unimaginable Outcomes in Experience Replay", "track": "main", "status": "Reject", "tldr": "This paper proposes techniques to add only the most relevant experiences in the replay buffer, using model uncertainty as selection criterion.", "abstract": "Model-based reinforcement learning (MBRL) applies a single-shot dynamics model to imagined actions to select those with best expected outcome. The dynamics model is an unfaithful representation of the environment physics, and its capacity to predict the outcome of a future action varies as it is trained iteratively. An experience replay buffer collects the outcomes of all actions executed in the environment and is used to iteratively train the dynamics model. With growing experience, it is expected that the model becomes more accurate at predicting the outcome and expected reward of imagined actions. However, training times and memory requirements drastically increase with the growing collection of experiences. \nIndeed, it would be preferable to retain only those experiences that could not be anticipated by the model while interacting with the environment. \nWe argue that doing so results in a lean replay buffer with diverse experiences that correspond directly to the model's predictive weaknesses at a given point in time. \nWe propose strategies for: i) determining reliable predictions of the dynamics model with respect to the imagined actions, ii) retaining only the unimaginable experiences in the replay buffer, and iii) training further only when sufficient novel experience has been acquired. \nWe show that these contributions lead to lower training times, drastic reduction of the replay buffer size, fewer updates to the dynamics model and reduction of catastrophic forgetting. All of which enable the effective implementation of continual-learning agents using MBRL.", "keywords": "Transfer Multitask and Meta-learning;Robotics;Model-Based Reinforcement Learning;Batch/Offline RL;Deep RL;Continuous Action RL", "primary_area": "", "supplementary_material": "/attachment/0dd7ba567100e262a001b55b2bb0ab7c580ad889.zip", "author": "Adrian Remonda;Cole Corbitt Terrell;Eduardo E. Veas", "authorids": "~Adrian_Remonda1;~Cole_Corbitt_Terrell1;~Eduardo_E._Veas1", "gender": "M;M;M", "homepage": ";;", "dblp": "290/8669;;46/2797", "google_scholar": "5yRHUUcAAAAJ;https://scholar.google.com/citations?hl=en;-78yV4YAAAAJ", "orcid": ";;", "linkedin": "adrian-remonda-46678735/;cole-terrell-509307154/;", "or_profile": "~Adrian_Remonda1;~Cole_Corbitt_Terrell1;~Eduardo_E._Veas1", "aff": "Technische Universit\u00e4t Graz;Technische Universit\u00e4t Graz;Technische Universit\u00e4t Graz", "aff_domain": "tugraz.at;tugraz.at;tugraz.at", "position": "PhD student;PhD student;Full Professor", "bibtex": "@misc{\nremonda2023memory,\ntitle={Memory of Unimaginable Outcomes in Experience Replay},\nauthor={Adrian Remonda and Cole Corbitt Terrell and Eduardo E. Veas},\nyear={2023},\nurl={https://openreview.net/forum?id=-5fSvp1ofdd}\n}", "github": "", "project": "", "reviewers": "qNFh;3gsD;SwfR;LPd9", "site": "https://openreview.net/forum?id=-5fSvp1ofdd", "pdf_size": 3787649, "recommendation": "3;3;3;3", "confidence": "2;4;3;3", "correctness": "3;3;2;2", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "113;171;159;45", "wc_strength_and_weaknesses": "436;1145;389;735", "wc_clarity_quality_novelty_and_reproducibility": "51;96;4;2", "wc_summary_review": "20;44;32;40", "wc_review": "620;1456;584;822", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 122.0, 49.44694126030446 ], "wc_strength_and_weaknesses_avg": [ 676.25, 301.41779559276193 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.25, 38.68058298423125 ], "wc_summary_review_avg": [ 34.0, 9.16515138991168 ], "wc_review_avg": [ 870.5, 349.9982142811589 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:eOkKTFRVgOoJ:scholar.google.com/&scioq=Memory+of+Unimaginable+Outcomes+in+Experience+Replay&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Technische Universit\u00e4t Graz", "aff_unique_dep": "", "aff_unique_url": "https://www.tugraz.at", "aff_unique_abbr": "TU Graz", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Austria" }, { "id": "-94tJCOo7OM", "title": "MCTransformer: Combining Transformers And Monte-Carlo Tree Search For Offline Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "A novel approach for sequential decision making using reinforcement learning by combining MCTS and transformers.", "abstract": "Recent studies explored the framing of reinforcement learning as a sequence modeling problem, and then using Transformers to generate effective solutions. In this study, we introduce MCTransformer, a framework that combines Monte-Carlo Tree Search (MCTS) with Transformers. Our approach uses an actor-critic setup, where the MCTS component is responsible for navigating previously-explored states, aided by input from the Transformer. The Transformer controls the exploration and evaluation of new states, enabling an effective and efficient evaluation of various strategies. In addition to the development of highly effective strategies, our setup enables the use of more efficient sampling compared to existing MCTS-based solutions. MCTransformer is therefore able to perform a small number of evaluations for each newly-explored node, and to do so without degrading its performance. Our evaluation, conducted on the challenging and well-known problem of SameGame, shows that our approach outperforms both Transformer-based and MCTS-based solutions.", "keywords": "Transformer;Monte Carlo Tree Search;Offline Reinforcement Learning;SameGame", "primary_area": "", "supplementary_material": "", "author": "Gur Yaari;Lior Rokach;Rami Puzis;Gilad Katz", "authorids": "~Gur_Yaari1;~Lior_Rokach2;~Rami_Puzis1;~Gilad_Katz1", "gender": "M;M;M;M", "homepage": ";https://www.ise.bgu.ac.il/faculty/liorr/index.htm;https://faramirp.wixsite.com/puzis;", "dblp": ";r/LiorRokach;13/3098;54/10321", "google_scholar": ";makfxEUAAAAJ;https://scholar.google.com.tw/citations?user=SfJ_pOYAAAAJ;FTCVfdMAAAAJ", "orcid": ";;0000-0002-7229-3899;0000-0001-9478-7550", "linkedin": "https://linkedin.com/in/yaarigur;;rami-puzis-9396124/;", "or_profile": "~Gur_Yaari1;~Lior_Rokach2;~Rami_Puzis1;~Gilad_Katz1", "aff": "Ben-Gurion University of the Negev;Ben Gurion University of the Negev, Technion;Ben Gurion University of the Negev;Ben-Gurion University of the Negev", "aff_domain": "bgu.ac.il;bgu.ac.il;bgu.ac.il;bgu.ac.il", "position": "MS student;Full Professor;Associate Professor;Assistant Professor", "bibtex": "@misc{\nyaari2023mctransformer,\ntitle={{MCT}ransformer: Combining Transformers And Monte-Carlo Tree Search For Offline Reinforcement Learning},\nauthor={Gur Yaari and Lior Rokach and Rami Puzis and Gilad Katz},\nyear={2023},\nurl={https://openreview.net/forum?id=-94tJCOo7OM}\n}", "github": "", "project": "", "reviewers": "cK37;scYv;XF4h", "site": "https://openreview.net/forum?id=-94tJCOo7OM", "pdf_size": 518649, "recommendation": "1;3;3", "confidence": "5;4;4", "correctness": "3;3;2", "technical_novelty": "1;2;3", "empirical_novelty": "1;2;2", "wc_summary_paper": "30;71;82", "wc_strength_and_weaknesses": "259;502;212", "wc_clarity_quality_novelty_and_reproducibility": "37;80;157", "wc_summary_review": "36;13;92", "wc_review": "362;666;543", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 61.0, 22.37558192911788 ], "wc_strength_and_weaknesses_avg": [ 324.3333333333333, 127.08614750982457 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 91.33333333333333, 49.64093293061909 ], "wc_summary_review_avg": [ 47.0, 33.1762967593833 ], "wc_review_avg": [ 523.6666666666666, 124.85814172714392 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": -0.5, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13399719858192280210&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Ben-Gurion University of the Negev;Ben Gurion University of the Negev", "aff_unique_dep": ";", "aff_unique_url": "https://www.bgu.ac.il;https://www.bgu.ac.il", "aff_unique_abbr": "BGU;BGU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Israel" }, { "title": "Martingale Posterior Neural Processes", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11594", "id": "-9PVqZ-IR_", "poster": "", "openreview": "https://openreview.net/forum?id=-9PVqZ-IR_", "slides": "https://iclr.cc/virtual/2023/poster/11594", "video": "https://iclr.cc/virtual/2023/poster/11594", "author_site": "Hyungi Lee, Eunggu Yun, Giung Nam, Edwin Fong, Juho Lee", "tldr": "Martingale Posterior Distribution, Neural Processes", "abstract": "A Neural Process (NP) estimates a stochastic process implicitly defined with neural networks given a stream of data, rather than pre-specifying priors already known, such as Gaussian processes. An ideal NP would learn everything from data without any inductive biases, but in practice, we often restrict the class of stochastic processes for the ease of estimation. One such restriction is the use of a finite-dimensional latent variable accounting for the uncertainty in the functions drawn from NPs. Some recent works show that this can be improved with more \u201cdata-driven\u201d source of uncertainty such as bootstrapping. In this work, we take a different approach based on the martingale posterior, a recently developed alternative to Bayesian inference. For the martingale posterior, instead of specifying prior-likelihood pairs, a predictive distribution for future data is specified. Under specific conditions on the predictive distribution, it can be shown that the uncertainty in the generated future data actually corresponds to the uncertainty of the implicitly defined Bayesian posteriors. Based on this result, instead of assuming any form of the latent variables, we equip a NP with a predictive distribution implicitly defined with neural networks and use the corresponding martingale posteriors as the source of uncertainty. The resulting model, which we name as Martingale Posterior Neural Process (MPNP), is demonstrated to outperform baselines on various tasks.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/46f2ab3ae1c88cca2222c0937d1c9d4a1f3f9876.zip", "author": "Hyungi Lee;Eunggu Yun;Giung Nam;Edwin Fong;Juho Lee", "authorids": "~Hyungi_Lee1;~Eunggu_Yun1;~Giung_Nam1;~Edwin_Fong1;~Juho_Lee2", "gender": "M;;M;M;M", "homepage": ";https://cs-giung.github.io/;https://edfong.github.io/;https://juho.lee.github.io;https://yuneg11.github.io", "dblp": "221/7959;304/9008;236/6357;55/3410-1;", "google_scholar": ";https://scholar.google.co.kr/citations?user=HO-fMd8AAAAJ;eT4pY6QAAAAJ;Py4URJUAAAAJ;r7-847MAAAAJ", "orcid": ";;;;0000-0002-4648-1415", "linkedin": "hyungi-lee-a8b161149/;;;;yuneg/", "or_profile": "~Hyungi_Lee1;~Giung_Nam1;~Edwin_Fong1;~Juho_Lee2;~EungGu_Yun1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Novo Nordisk;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;novonordisk.com;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;Data scientist;Assistant Professor;MS student", "bibtex": "@inproceedings{\nlee2023martingale,\ntitle={Martingale Posterior Neural Processes},\nauthor={Hyungi Lee and Eunggu Yun and Giung Nam and Edwin Fong and Juho Lee},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-9PVqZ-IR_}\n}", "github": "", "project": "", "reviewers": "apWK;ejFi;eSf2", "pdf_size": 1131596, "recommendation": "8;8;10", "confidence": "4;3;3", "correctness": "4;4;4", "technical_novelty": "4;4;3", "empirical_novelty": "3;4;3", "wc_summary_paper": "75;78;58", "wc_strength_and_weaknesses": "279;123;23", "wc_clarity_quality_novelty_and_reproducibility": "56;67;844", "wc_summary_review": "19;43;106", "wc_review": "429;311;1031", "wc_reply_reviewers": "9;0;0", "wc_reply_authors": "689;362;1169", "reply_reviewers": "1;0;0", "reply_authors": "1;1;2", "recommendation_avg": [ 8.666666666666666, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 70.33333333333333, 8.806563209081938 ], "wc_strength_and_weaknesses_avg": [ 141.66666666666666, 105.3417718138863 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 322.3333333333333, 368.901371944077 ], "wc_summary_review_avg": [ 56.0, 36.68787265568828 ], "wc_review_avg": [ 590.3333333333334, 315.3002096767812 ], "wc_reply_reviewers_avg": [ 3.0, 4.242640687119285 ], "wc_reply_authors_avg": [ 740.0, 331.4241994785535 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11321318451493523157&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=-9PVqZ-IR_", "email": "kaist.ac.kr;kaist.ac.kr;novonordisk.com;kaist.ac.kr;kaist.ac.kr", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Novo Nordisk", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.novonordisk.com", "aff_unique_abbr": "KAIST;NN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "South Korea;Denmark" }, { "id": "-AEYAk13n_a", "title": "DeepReShape: Redesigning Neural Networks for Private Inference", "track": "main", "status": "Withdraw", "tldr": "Redesigning the neural network by distributing the network's ReLU in their order of criticality for higher ReLU-efficiency, and enabling FLOPs-ReLU-Accuracy balance for fast Private Inference. ", "abstract": "The increased demand for privacy and security has given rise to private inference (PI), where inferences are made on encrypted data using cryptographic techniques. A challenge with deploying PI is computational and storage overheads, which makes them impractical. Unlike plaintext inference, PI's overheads stem from non-linear operations,i.e., ReLU. Despite the inverted neural operator overheads, all the previous ReLU-optimizations for PI still leverage classic networks optimized for plaintext. This paper investigates what PI-optimized network architectures should look like, and through thorough experimentation, we find that wider networks are more ReLU efficient and that how ReLUs are allocated between layers has a significant impact. The insights are compiled into a set of design principles (DeepReShape) and used to synthesize specific architectures (HybReNet) for efficient PI. We further develop a novel channel-wise ReLU dropping mechanism, ReLU-reuse, and achieve upto 3\\% accuracy boost. Compared to the state-of-the-art (SNL on CIFAR-100), we achieve a 2.35\\% accuracy gain at 180K ReLUs. For ResNet50 on TinyImageNet our method saves 4.2$\\times$ ReLUs at iso-accuracy. ", "keywords": "Private Inference;Neural network design;ReLU efficiency", "primary_area": "", "supplementary_material": "", "author": "Nandan Kumar Jha;Brandon Reagen", "authorids": "~Nandan_Kumar_Jha1;~Brandon_Reagen1", "gender": "M;M", "homepage": "https://www.nankj.com;https://brandonreagen.com/", "dblp": "241/0656.html;135/8203", "google_scholar": "NX7zp18AAAAJ;cO2uYoAAAAAJ", "orcid": "0000-0001-6334-1740;", "linkedin": "nandan-kumar-jha-7a076839/;", "or_profile": "~Nandan_Kumar_Jha1;~Brandon_Reagen1", "aff": "New York University;New York University", "aff_domain": "nyu.edu;nyu.edu", "position": "PhD student;Professor", "bibtex": "@misc{\njha2023deepreshape,\ntitle={DeepReShape: Redesigning Neural Networks for Private Inference},\nauthor={Nandan Kumar Jha and Brandon Reagen},\nyear={2023},\nurl={https://openreview.net/forum?id=-AEYAk13n_a}\n}", "github": "", "project": "", "reviewers": "xGSo;ejhS;its4;9x3t", "site": "https://openreview.net/forum?id=-AEYAk13n_a", "pdf_size": 1121518, "recommendation": "1;3;5;6", "confidence": "5;2;4;2", "correctness": "2;2;3;3", "technical_novelty": "1;3;2;3", "empirical_novelty": "1;2;3;0", "wc_summary_paper": "47;134;101;71", "wc_strength_and_weaknesses": "703;271;221;168", "wc_clarity_quality_novelty_and_reproducibility": "78;50;5;32", "wc_summary_review": "195;29;37;47", "wc_review": "1023;484;364;318", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.75, 1.920286436967152 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 1.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 88.25, 32.61422235773835 ], "wc_strength_and_weaknesses_avg": [ 340.75, 212.29269299719198 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.25, 26.58359456506964 ], "wc_summary_review_avg": [ 77.0, 68.42514157822401 ], "wc_review_avg": [ 547.25, 281.28044279686424 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5762619548021687, "corr_recommendation_correctness": 0.911322376865767, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:b9RUAOiyvCEJ:scholar.google.com/&scioq=DeepReShape:+Redesigning+Neural+Networks+for+Private+Inference&hl=en&as_sdt=0,21", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "-AdWUM183OU", "title": "The Dynamic of Consensus in Deep Networks and the Identification of Noisy Labels", "track": "main", "status": "Reject", "tldr": "We propose a new way to detect label noise through the lens of model disagreement, and describe a method that improves the SOTA in supervised learning with noisy labels. ", "abstract": "Deep neural networks have incredible capacity and expressibility, and can seemingly memorize any training set. This introduces a problem when training in the presence of noisy labels, as the noisy examples cannot be distinguished from clean examples by the end of training. Recent research has dealt with this challenge by utilizing the fact that deep networks seem to memorize clean examples much earlier than noisy examples. Here we report a new empirical result: for each example, when looking at the time it has been memorized by each model in an ensemble of networks, the diversity seen in noisy examples is much larger than the clean examples. We use this observation to develop a new method for noisy labels filtration. The method is based on a statistics of the data, which captures the differences in ensemble learning dynamics between clean and noisy data. We test our method on three tasks: (i) noise amount estimation; (ii) noise filtration; (iii) supervised classification. We show that our method improves over existing baselines in all three tasks using a variety of datasets, noise models, and noise levels. Aside from its improved performance, our method has two other advantages. (i) Simplicity, which implies that no additional hyperparameters are introduced. (ii) Our method is modular: it does not work in an end-to-end fashion, and can therefore be used to clean a dataset for any other future usage.", "keywords": "Noisy Labels;Training Dynamics;Label Noise", "primary_area": "", "supplementary_material": "", "author": "Daniel Shwartz;Uri Stern;Daphna Weinshall", "authorids": "~Daniel_Shwartz1;~Uri_Stern1;~Daphna_Weinshall1", "gender": "M;;F", "homepage": ";;http://www.cs.huji.ac.il/~daphna", "dblp": ";;93/1568", "google_scholar": ";;https://scholar.google.co.il/citations?hl=en", "orcid": ";;", "linkedin": "daniel-shwartz/;uri-stern-756b5023a/;", "or_profile": "~Daniel_Shwartz1;~Uri_Stern1;~Daphna_Weinshall1", "aff": ";, Hebrew University of Jerusalem;Hebrew University of Jerusalem", "aff_domain": ";mail.huji.ac.il;huji.ac.il", "position": ";MS student;Full Professor", "bibtex": "@misc{\nshwartz2023the,\ntitle={The Dynamic of Consensus in Deep Networks and the Identification of Noisy Labels},\nauthor={Daniel Shwartz and Uri Stern and Daphna Weinshall},\nyear={2023},\nurl={https://openreview.net/forum?id=-AdWUM183OU}\n}", "github": "", "project": "", "reviewers": "skxN;thYY;gseX;d5tP;ft5z", "site": "https://openreview.net/forum?id=-AdWUM183OU", "pdf_size": 5631296, "recommendation": "3;5;5;5;6", "confidence": "5;3;4;3;4", "correctness": "2;2;4;3;2", "technical_novelty": "2;3;2;3;2", "empirical_novelty": "2;3;2;2;3", "wc_summary_paper": "87;61;163;69;76", "wc_strength_and_weaknesses": "202;199;363;228;202", "wc_clarity_quality_novelty_and_reproducibility": "306;26;120;12;505", "wc_summary_review": "30;30;37;46;92", "wc_review": "625;316;683;355;875", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 4.8, 0.9797958971132712 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "correctness_avg": [ 2.6, 0.8 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 91.2, 36.902032464350796 ], "wc_strength_and_weaknesses_avg": [ 238.8, 62.983807442865825 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 193.8, 187.66395498336914 ], "wc_summary_review_avg": [ 47.0, 23.25510696599781 ], "wc_review_avg": [ 570.8, 209.55037580496008 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.6000991981489792, "corr_recommendation_correctness": 0.15309310892394865, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14479417195551019064&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Hebrew University of Jerusalem", "aff_unique_dep": "", "aff_unique_url": "https://www.huji.ac.il", "aff_unique_abbr": "HUJI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Jerusalem", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "GLM-130B: An Open Bilingual Pre-trained Model", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11329", "id": "-Aw0rrrPUF", "poster": "", "openreview": "https://openreview.net/forum?id=-Aw0rrrPUF", "slides": "https://iclr.cc/virtual/2023/poster/11329", "video": "https://iclr.cc/virtual/2023/poster/11329", "author_site": "Aohan Zeng, Xiao Liu, Zhengxiao Du, Zihan Wang, Hanyu Lai, Ming Ding, Zhuoyi Yang, Yifan Xu, Wendi Zheng, Xiao Xia, Weng Lam Tam, Zixuan Ma, Yufei Xue, Jidong Zhai, Wenguang Chen, Zhiyuan Liu, Peng Zhang, Yuxiao Dong, Jie Tang", "tldr": "", "abstract": "We introduce GLM-130B, a bilingual (English and Chinese) pre-trained language model with 130 billion parameters. It is an attempt to open-source a 100B-scale model as good as GPT-3 (davinci) and unveil how models of such a scale can be successfully pre-trained. Over the course of this effort, we face numerous unexpected technical and engineering challenges, particularly on loss spikes and divergence. In this paper, we introduce the pre-training process of GLM-130B including its design choices, training strategies for both efficiency and stability, and engineering efforts. The resultant GLM-130B model offers significant outperformance over GPT-3 175B on a wide range of popular English benchmarks while the performance advantage is not observed in OPT-175B and BLOOM-176B. It also consistently and significantly outperforms ERNIE TITAN 3.0 260B\u2014the largest Chinese language model\u2014across related benchmarks. Finally, we leverage a unique scaling property of GLM-130B to reach INT4 quantization with almost no performance loss, making it the first among 100B-scale models and more importantly, allowing its effective inference on 4\u00d7RTX 3090 (24G) or 8\u00d7RTX 2080 Ti (11G) GPUs, the most ever affordable GPUs required for using 100B-scale models. The GLM-130B model weights are publicly accessible and its code, training logs, related toolkit, and lessons learned are open-sourced at https://github.com/THUDM/GLM-130B/.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Aohan Zeng;Xiao Liu;Zhengxiao Du;Zihan Wang;Hanyu Lai;Ming Ding;Zhuoyi Yang;Yifan Xu;Wendi Zheng;Xiao Xia;Weng Lam Tam;Zixuan Ma;Yufei Xue;Jidong Zhai;Wenguang Chen;Zhiyuan Liu;Peng Zhang;Yuxiao Dong;Jie Tang", "authorids": "~Aohan_Zeng1;~Xiao_Liu15;~Zhengxiao_Du1;zhwang19@mails.tsinghua.edu.cn;laihy19@mails.tsinghua.edu.cn;~Ming_Ding1;~Zhuoyi_Yang1;~Yifan_Xu7;~Wendi_Zheng1;~Xiao_Xia1;rainatam9784@gmail.com;mzx22@mails.tsinghua.edu.cn;yufei.xue@aminer.cn;~Jidong_Zhai1;~Wenguang_Chen1;~Zhiyuan_Liu1;peng.zhang@aminer.cn;~Yuxiao_Dong1;~Jie_Tang1", "gender": "M;M;M;;;M;M;M;M;;;;;M;M;M;;M;", "homepage": ";https://github.com/xiao9905;https://duzx16.github.io;;;;;https://github.com/xuyifan-0731;http://info.tsinghua.edu.cn;;;;;http://pacman.cs.tsinghua.edu.cn/~zjd/;https://pacman.cs.tsinghua.edu.cn/~cwg/;http://nlp.csai.tsinghua.edu.cn/~lzy;;https://keg.cs.tsinghua.edu.cn/yuxiao/;", "dblp": "286/8519.html;82/1364-36;234/0081;;;48/3462-4;230/8320;;;;;;;;;53/3245-1;;17/9267;", "google_scholar": "STftvjoAAAAJ;VKI8EhUAAAAJ;A8x07E0AAAAJ;;;Va50YzkAAAAJ;;fPvbfBUAAAAJ;;YcotX9cAAAAJ;;;;;;dT0v5u0AAAAJ;;https://scholar.google.com.hk/citations?hl=en;", "orcid": ";0000-0002-9226-4569;;;;;;;;;;;;;0000-0002-4281-1018;0000-0002-7709-2543;;0000-0002-6092-2002;", "linkedin": ";;;;;;zhuoyi-yang-4a1051210/;;;;;;;;;;;;", "or_profile": "~Aohan_Zeng1;~Xiao_Liu15;~Zhengxiao_Du1;zhwang19@mails.tsinghua.edu.cn;laihy19@mails.tsinghua.edu.cn;~Ming_Ding1;~Zhuoyi_Yang1;~Yifan_Xu7;~Wendi_Zheng1;~Xiao_Xia1;rainatam9784@gmail.com;mzx22@mails.tsinghua.edu.cn;yufei.xue@aminer.cn;~Jidong_Zhai1;~Wenguang_Chen1;~Zhiyuan_Liu1;peng.zhang@aminer.cn;~Yuxiao_Dong1;~Jie_Tang1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;;;Tsinghua University;Tsinghua University;Beijing University of Posts and Telecommunications;Tsinghua University;Tsinghua University;;;;Tsinghua University;Tsinghua University;Tsinghua University;;Tsinghua University;", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;;;tsinghua.edu.cn;tsinghua.edu.cn;bupt.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;;;;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn;", "position": "PhD student;PhD student;PhD student;;;PhD student;PhD student;Undergrad student;Undergrad student;Undergrad student;;;;Associate Professor;Full Professor;Associate Professor;;Associate Professor;", "bibtex": "@inproceedings{\nzeng2023glmb,\ntitle={{GLM}-130B: An Open Bilingual Pre-trained Model},\nauthor={Aohan Zeng and Xiao Liu and Zhengxiao Du and Zihan Wang and Hanyu Lai and Ming Ding and Zhuoyi Yang and Yifan Xu and Wendi Zheng and Xiao Xia and Weng Lam Tam and Zixuan Ma and Yufei Xue and Jidong Zhai and Wenguang Chen and Zhiyuan Liu and Peng Zhang and Yuxiao Dong and Jie Tang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-Aw0rrrPUF}\n}", "github": "", "project": "", "reviewers": "asnf;XkYq;tHby;tevQ", "pdf_size": 4449412, "recommendation": "8;8;8;8", "confidence": "4;4;5;4", "correctness": "3;3;3;3", "technical_novelty": "3;3;3;2", "empirical_novelty": "4;3;3;3", "wc_summary_paper": "59;127;346;55", "wc_strength_and_weaknesses": "303;120;229;309", "wc_clarity_quality_novelty_and_reproducibility": "78;108;99;775", "wc_summary_review": "53;97;27;41", "wc_review": "493;452;701;1180", "wc_reply_reviewers": "0;0;0;217", "wc_reply_authors": "698;981;939;1441", "reply_reviewers": "0;0;0;1", "reply_authors": "2;3;3;3", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 146.75, 118.54192296398773 ], "wc_strength_and_weaknesses_avg": [ 240.25, 76.24098307341006 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 265.0, 294.64979212617817 ], "wc_summary_review_avg": [ 54.5, 26.205915362757317 ], "wc_review_avg": [ 706.5, 289.2166143222066 ], "wc_reply_reviewers_avg": [ 54.25, 93.96375631061159 ], "wc_reply_authors_avg": [ 1014.75, 268.7455813590244 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 19, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 715, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3470531221442223221&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=-Aw0rrrPUF", "email": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;;;tsinghua.edu.cn;tsinghua.edu.cn;bupt.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;;;;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn;", "author_num": 19, "aff_unique_index": "0;0;0;0;0;1;0;0;0;0;0;0", "aff_unique_norm": "Tsinghua University;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.bupt.edu.cn/", "aff_unique_abbr": "THU;BUPT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Why adversarial training can hurt robust accuracy", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11173", "id": "-CA8yFkPc7O", "poster": "", "openreview": "https://openreview.net/forum?id=-CA8yFkPc7O", "slides": "https://iclr.cc/virtual/2023/poster/11173", "video": "https://iclr.cc/virtual/2023/poster/11173", "author_site": "Jacob Clarysse, Julia H\u00f6rrmann, Fanny Yang", "tldr": "Adversarial training can hurt robust generalization for perceptible perturbations when the sample size is small", "abstract": "Machine learning classifiers with high test accuracy often perform poorly under adversarial attacks. It is commonly believed that adversarial training alleviates this issue. In this paper, we demonstrate that, surprisingly, the opposite can be true for a natural class of perceptible perturbations --- even though adversarial training helps when enough data is available, it may in fact hurt robust generalization in the small sample size regime. We first prove this phenomenon for a high-dimensional linear classification setting with noiseless observations. Using intuitive insights from the proof, we could surprisingly find perturbations on standard image datasets for which this behavior persists. Specifically, it occurs for perceptible attacks that effectively reduce class information such as object occlusions or corruptions. ", "keywords": "Adversarial training;Learning Theory;Robust generalisation", "primary_area": "", "supplementary_material": "/attachment/d5425a0d625331f955ca6ecaddc5c71402d6efaf.zip", "author": "Jacob Clarysse;Julia H\u00f6rrmann;Fanny Yang", "authorids": "~Jacob_Clarysse1;julia.hoerrmann@stat.math.ethz.ch;~Fanny_Yang1", "gender": "M;;", "homepage": ";;http://www.fanny-yang.de", "dblp": ";;126/4852", "google_scholar": ";;BfDKicQAAAAJ", "orcid": ";;", "linkedin": "jacob-clarysse-a03185b1/;;", "or_profile": "~Jacob_Clarysse1;julia.hoerrmann@stat.math.ethz.ch;~Fanny_Yang1", "aff": ";;Swiss Federal Institute of Technology", "aff_domain": ";;ethz.ch", "position": ";;Professor", "bibtex": "@inproceedings{\nclarysse2023why,\ntitle={Why adversarial training can hurt robust accuracy},\nauthor={Jacob Clarysse and Julia H{\\\"o}rrmann and Fanny Yang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-CA8yFkPc7O}\n}", "github": "", "project": "", "reviewers": "7vDF;bvDH;udiw;SVdL", "pdf_size": 19852321, "recommendation": "6;6;8;8", "confidence": "5;4;4;4", "correctness": "3;3;4;4", "technical_novelty": "4;3;4;3", "empirical_novelty": "4;2;4;4", "wc_summary_paper": "160;58;113;63", "wc_strength_and_weaknesses": "143;193;225;88", "wc_clarity_quality_novelty_and_reproducibility": "927;60;10;11", "wc_summary_review": "376;44;16;27", "wc_review": "1606;355;364;189", "wc_reply_reviewers": "1305;25;15;0", "wc_reply_authors": "4513;744;517;98", "reply_reviewers": "7;1;1;0", "reply_authors": "10;2;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 98.5, 41.512046444375635 ], "wc_strength_and_weaknesses_avg": [ 162.25, 51.88147549944971 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 252.0, 390.2351854971563 ], "wc_summary_review_avg": [ 115.75, 150.58614644116503 ], "wc_review_avg": [ 628.5, 568.6451002162948 ], "wc_reply_reviewers_avg": [ 336.25, 559.3788407689372 ], "wc_reply_authors_avg": [ 1468.0, 1773.238703615506 ], "reply_reviewers_avg": [ 2.25, 2.7726341266023544 ], "reply_authors_avg": [ 3.5, 3.774917217635375 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2194870789955064604&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=-CA8yFkPc7O", "email": ";;ethz.ch", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Swiss Federal Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich", "aff_country_unique_index": "0", "aff_country_unique": "Switzerland" }, { "id": "-CIOGGhkEfy", "title": "Augmentation Backdoors", "track": "main", "status": "Reject", "tldr": "We present three backdoor attacks that can be covertly inserted into data augmentation functions.", "abstract": "Data augmentation is used extensively to improve model generalisation. However, reliance on external libraries to implement augmentation methods introduces a vulnerability into the machine learning pipeline. It is well known that backdoors can be inserted into machine learning models through serving a modified dataset to train on. Augmentation therefore presents a perfect opportunity to perform this modification without requiring an initially backdoored dataset. In this paper we present three backdoor attacks that can be covertly inserted into data augmentation. Our attacks each insert a backdoor using a different type of computer vision augmentation transform, covering simple image transforms, GAN-based augmentation, and composition-based augmentation. By inserting the backdoor using these augmentation transforms, we make our backdoors difficult to detect, while still supporting arbitrary backdoor functionality. We evaluate our attacks on a range of computer vision benchmarks and demonstrate that an attacker is able to introduce backdoors through just a malicious augmentation routine.", "keywords": "training time attacks;backdoors;augmentation", "primary_area": "", "supplementary_material": "/attachment/fc338a41c24c4757ef012cc87d050d1a11c8da29.zip", "author": "Joseph Rance;Yiren Zhao;Ilia Shumailov;Robert D. Mullins", "authorids": "~Joseph_Rance1;~Yiren_Zhao2;~Ilia_Shumailov1;~Robert_D._Mullins1", "gender": "M;M;Unspecified;M", "homepage": ";https://aaronzhao.me;https://www.cl.cam.ac.uk/~is410/;https://www.csat.cam.ac.uk/~rdm34", "dblp": "330/3288;https://dblp.uni-trier.de/pers/hd/z/Zhao:Yiren;213/8587;31/789", "google_scholar": ";lOOmgEgAAAAJ;https://scholar.google.co.uk/citations?hl=en;zjXO2HMAAAAJ", "orcid": ";;;", "linkedin": "josephrance/;yiren-aaron-zhao-baa8b5116/;ilia-shumailov/;", "or_profile": "~Joseph_Rance1;~Yiren_Zhao2;~I_Shumailov1;~Robert_Mullins1", "aff": "University of Cambridge;Imperial College London;University of Oxford;University of Cambridge", "aff_domain": "cam.ac.uk;ic.ac.uk;ox.ac.uk;cam.ac.uk", "position": "Undergrad student;Assistant Professor;Fellowship;Associate Professor", "bibtex": "@misc{\nrance2023augmentation,\ntitle={Augmentation Backdoors},\nauthor={Joseph Rance and Yiren Zhao and Ilia Shumailov and Robert D. Mullins},\nyear={2023},\nurl={https://openreview.net/forum?id=-CIOGGhkEfy}\n}", "github": "", "project": "", "reviewers": "jPb2;hnQ2;Z2bo", "site": "https://openreview.net/forum?id=-CIOGGhkEfy", "pdf_size": 1160207, "recommendation": "5;5;5", "confidence": "4;2;4", "correctness": "3;2;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "85;38;30", "wc_strength_and_weaknesses": "245;167;270", "wc_clarity_quality_novelty_and_reproducibility": "206;52;19", "wc_summary_review": "71;64;19", "wc_review": "607;321;338", "wc_reply_reviewers": "0;11;118", "wc_reply_authors": "237;185;1027", "reply_reviewers": "0;1;1", "reply_authors": "1;1;2", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 51.0, 24.26245384677046 ], "wc_strength_and_weaknesses_avg": [ 227.33333333333334, 43.86595744107522 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 92.33333333333333, 81.49573949327385 ], "wc_summary_review_avg": [ 51.333333333333336, 23.041026211713937 ], "wc_review_avg": [ 422.0, 130.99872772919082 ], "wc_reply_reviewers_avg": [ 43.0, 53.22280213091628 ], "wc_reply_authors_avg": [ 483.0, 385.25143304946533 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16709679102859831978&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Cambridge;Imperial College London;University of Oxford", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;https://www.imperial.ac.uk;https://www.ox.ac.uk", "aff_unique_abbr": "Cambridge;ICL;Oxford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Maximizing Communication Efficiency for Large-scale Training via 0/1 Adam", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11999", "id": "-CefY2EOupj", "poster": "/media/PosterPDFs/ICLR%202023/11999.png?t=1682799195.8569367", "openreview": "https://openreview.net/forum?id=-CefY2EOupj", "slides": "https://iclr.cc/virtual/2023/poster/11999", "video": "https://iclr.cc/virtual/2023/poster/11999", "author_site": "Yucheng Lu, Conglong Li, Minjia Zhang, Christopher De Sa, Yuxiong He", "tldr": "", "abstract": "1-bit gradient compression and local steps are two representative techniques that enable drastic communication reduction in distributed SGD. Their benefits, however, remain an open question on Adam-based large model pre-training (e.g. BERT and GPT). In this paper, we demonstrate the non-linearity in Adam causes slow convergence even when 1-bit compression or local steps are individually applied. To alleviate this limitation, we propose \\textbf{0/1 Adam} that linearizes each Adam step via approximating its optimizer states using their stale estimates and linear correlation. \\textbf{0/1 Adam} performs an Adam-like step to preserve the adaptivity, while its linearity allows utilizing 1-bit compression and local steps simultaneously for wall-clock time speed up. We provide convergence guarantee for \\textbf{0/1 Adam} on smooth non-convex objectives. On various large-scale benchmarks such as BERT-Base, BERT-Large, GPT-2 pre-training and ImageNet, we demonstrate on up to 128 GPUs that \\textbf{0/1 Adam} is able to reduce up to 87\\% of data volume, 54\\% of communication rounds, and achieve up to 2$\\times$ higher training throughput and end-to-end training time reduction compared to the state-of-the-art baseline 1-bit Adam; while enjoying the same statistical convergence speed and end task model accuracy on GLUE dataset and ImageNet validation set. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yucheng Lu;Conglong Li;Minjia Zhang;Christopher De Sa;Yuxiong He", "authorids": "~Yucheng_Lu1;~Conglong_Li1;~Minjia_Zhang1;~Christopher_De_Sa2;~Yuxiong_He1", "gender": "M;;M;;M", "homepage": "https://www.yucheng-lu.me/;;https://minjiazhang.github.io/;;http://cs.cornell.edu/~cdesa", "dblp": ";158/7995;58/9033;https://dblp.org/pers/hd/h/He:Yuxiong;154/6336", "google_scholar": "FsBgPhQAAAAJ;;https://scholar.google.com/citations?hl=en;SB3_eb0AAAAJ;", "orcid": ";;0000-0002-8165-166X;;", "linkedin": ";;minjia-zhang-05857226/;;", "or_profile": "~Yucheng_Lu1;~Conglong_Li1;~Minjia_Zhang1;~Yuxiong_He1;~Christopher_De_Sa1", "aff": "Cornell University;Microsoft;Microsoft ;Microsoft;Cornell University", "aff_domain": "cornell.edu;microsoft.com;microsoft.com;microsoft.com;cornell.edu", "position": "PhD student;Researcher;Principle Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nlu2023maximizing,\ntitle={Maximizing Communication Efficiency for Large-scale Training via 0/1 Adam},\nauthor={Yucheng Lu and Conglong Li and Minjia Zhang and Christopher De Sa and Yuxiong He},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-CefY2EOupj}\n}", "github": "", "project": "", "reviewers": "LNoo;SjhX;en2k", "pdf_size": 1683347, "recommendation": "5;6;6", "confidence": "2;4;3", "correctness": "3;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "81;67;61", "wc_strength_and_weaknesses": "458;100;123", "wc_clarity_quality_novelty_and_reproducibility": "387;53;36", "wc_summary_review": "108;44;42", "wc_review": "1034;264;262", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1751;127;386", "reply_reviewers": "0;0;0", "reply_authors": "3;1;1", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 69.66666666666667, 8.379870059984357 ], "wc_strength_and_weaknesses_avg": [ 227.0, 163.61132805116725 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 158.66666666666666, 161.60514293246433 ], "wc_summary_review_avg": [ 64.66666666666667, 30.652170486860395 ], "wc_review_avg": [ 520.0, 363.4538026581462 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 754.6666666666666, 712.4045355149153 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.8660254037844385, "corr_recommendation_correctness": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10835111776928428136&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=-CefY2EOupj", "email": "cornell.edu;microsoft.com;microsoft.com;microsoft.com;cornell.edu", "author_num": 5, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "Cornell University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.cornell.edu;https://www.microsoft.com", "aff_unique_abbr": "Cornell;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "An Exact Poly-Time Membership-Queries Algorithm for Extracting a Three-Layer ReLU Network", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11830", "id": "-CoNloheTs", "poster": "/media/PosterPDFs/ICLR%202023/11830.png?t=1681998316.826431", "openreview": "https://openreview.net/forum?id=-CoNloheTs", "slides": "https://iclr.cc/virtual/2023/poster/11830", "video": "https://iclr.cc/virtual/2023/poster/11830", "author_site": "Amit Daniely, Elad Granot", "tldr": "A first polynomial-time algorithm to extract the parameters and architecture of two- and three-layer neural networks using membership-queries", "abstract": "We consider the natural problem of learning a ReLU network from queries, which was recently remotivated by model extraction attacks. In this work, we present a polynomial-time algorithm that can learn a depth-two ReLU network from queries under mild general position assumptions. We also present a polynomial-time algorithm that, under mild general position assumptions, can learn a rich class of depth-three ReLU networks from queries. For instance, it can learn most networks where the number of first layer neurons is smaller than the dimension and the number of second layer neurons.\n\nThese two results substantially improve state-of-the-art: Until our work, polynomial-time algorithms were only shown to learn from queries depth-two networks under the assumption that either the underlying distribution is Gaussian (Chen et al. (2021)) or that the weights matrix rows are linearly independent (Milli et al. (2019)). For depth three or more, there were no known poly-time results.", "keywords": "Learning With Queries;ReLU Networks;Model Extraction", "primary_area": "", "supplementary_material": "", "author": "Amit Daniely;Elad Granot", "authorids": "~Amit_Daniely2;~Elad_Granot1", "gender": "M;M", "homepage": "https://www.cs.huji.ac.il/~amitd/;", "dblp": "19/7805;250/9489", "google_scholar": "https://scholar.google.com.tw/citations?user=jUtYwE0AAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Amit_Daniely2;~Elad_Granot1", "aff": "Google;", "aff_domain": "google.com;", "position": "Researcher;", "bibtex": "@inproceedings{\ndaniely2023an,\ntitle={An Exact Poly-Time Membership-Queries Algorithm for Extracting a Three-Layer Re{LU} Network},\nauthor={Amit Daniely and Elad Granot},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-CoNloheTs}\n}", "github": "", "project": "", "reviewers": "3xMF;qJaS;vJNj", "pdf_size": 460104, "recommendation": "5;6;6", "confidence": "2;3;4", "correctness": "4;3;4", "technical_novelty": "3;3;3", "empirical_novelty": "0;0;0", "wc_summary_paper": "52;58;119", "wc_strength_and_weaknesses": "239;298;159", "wc_clarity_quality_novelty_and_reproducibility": "1;21;52", "wc_summary_review": "36;71;80", "wc_review": "328;448;410", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 76.33333333333333, 30.26916289265731 ], "wc_strength_and_weaknesses_avg": [ 232.0, 56.96197562116913 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 24.666666666666668, 20.98147330914162 ], "wc_summary_review_avg": [ 62.333333333333336, 18.979521127315678 ], "wc_review_avg": [ 395.3333333333333, 50.075498555237125 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.8660254037844385, "corr_recommendation_correctness": -0.4999999999999999, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=19470415273654521&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=-CoNloheTs", "email": "google.com;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "TaskPrompter: Spatial-Channel Multi-Task Prompting for Dense Scene Understanding", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12218", "id": "-CwPopPJda", "poster": "/media/PosterPDFs/ICLR%202023/12218.png?t=1681994273.146713", "openreview": "https://openreview.net/forum?id=-CwPopPJda", "slides": "https://iclr.cc/virtual/2023/poster/12218", "video": "https://iclr.cc/virtual/2023/poster/12218", "author_site": "Hanrong Ye, Dan Xu", "tldr": "We propose a novel multi-task prompting framework to concurrently learn task-specific and task-generic representations as well as cross-task interaction along spatial and channel dimensions based on transformer for multiple dense predictions tasks.", "abstract": "Learning effective representations simultaneously from multiple tasks in a unified network framework is a fundamental paradigm for multi-task dense visual scene understanding. This requires joint modeling (i) task-generic and (ii) task-specific representations, and (iii) cross-task representation interactions. Existing works typically model these three perspectives with separately designed structures, using shared network modules for task-generic learning, different modules for task-specific learning, and establishing connections among these components for cross-task interactions. It is barely explored in the literature to model these three perspectives in each network layer in an end-to-end manner, which can not only minimize the effort of carefully designing empirical structures for the three multi-task representation learning objectives, but also greatly improve the representation learning capability of the multi-task network since all the model capacity will be used to optimize the three objectives together. In this paper, we propose TaskPrompter, a novel spatial-channel multi-task prompting transformer framework to achieve this target. Specifically, we design a set of spatial-channel task prompts and learn their spatial- and channel interactions with the shared image tokens in each transformer layer with attention mechanism, as aggregating spatial and channel information is critical for dense prediction tasks. Each task prompt learns task-specific representation for one task, while all the prompts can jointly contribute to the learning of the shared image token representations, and the interactions between different task prompts model the cross-task relationship. To decode dense predictions for multiple tasks with the learned spatial-channel task prompts from transformer, we accordingly design a dense task prompt decoding mechanism, which queries the shared image tokens using task prompts to obtain spatial- and channel-wise task-specific representations. Extensive experiments on two challenging multi-task dense scene understanding benchmarks (i.e. NYUD-V2 and PASCAL-Context) show the superiority of the proposed framework and TaskPrompter establishes significant state-of-the-art performances on multi-task dense predictions. Codes and models are made publicly available at https://github.com/prismformore/Multi-Task-Transformer.", "keywords": "Multi-task Learning;Scene Understanding;Computer Vision", "primary_area": "", "supplementary_material": "", "author": "Hanrong Ye;Dan Xu", "authorids": "~Hanrong_Ye1;~Dan_Xu4", "gender": ";M", "homepage": ";https://www.danxurgb.net", "dblp": ";16/3823-2.html", "google_scholar": ";OuSPv-AAAAAJ", "orcid": ";0000-0003-0136-9603", "linkedin": ";", "or_profile": "~Hanrong_Ye1;~Dan_Xu4", "aff": ";VGG, University of Oxford", "aff_domain": ";ox.ac.uk", "position": ";Postdoc", "bibtex": "@inproceedings{\nye2023taskprompter,\ntitle={TaskPrompter: Spatial-Channel Multi-Task Prompting for Dense Scene Understanding},\nauthor={Hanrong Ye and Dan Xu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-CwPopPJda}\n}", "github": "", "project": "", "reviewers": "GABz;NUwM;y2MQ;Xzsy;CiXq", "pdf_size": 8525394, "recommendation": "6;6;6;8;8", "confidence": "4;4;3;3;3", "correctness": "3;3;3;4;3", "technical_novelty": "3;3;3;3;3", "empirical_novelty": "2;2;3;3;0", "wc_summary_paper": "81;91;77;96;43", "wc_strength_and_weaknesses": "315;224;176;83;125", "wc_clarity_quality_novelty_and_reproducibility": "31;17;13;51;31", "wc_summary_review": "61;104;18;39;22", "wc_review": "488;436;284;269;221", "wc_reply_reviewers": "50;25;0;0;23", "wc_reply_authors": "1189;1207;710;38;723", "reply_reviewers": "1;1;0;0;1", "reply_authors": "3;2;2;2;2", "recommendation_avg": [ 6.8, 0.9797958971132712 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 1.0954451150103321 ], "wc_summary_paper_avg": [ 77.6, 18.586016248782308 ], "wc_strength_and_weaknesses_avg": [ 184.6, 80.62902703121253 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.6, 13.35065541462291 ], "wc_summary_review_avg": [ 48.8, 31.49222126176558 ], "wc_review_avg": [ 339.6, 103.39941972757873 ], "wc_reply_reviewers_avg": [ 19.6, 18.618270596379247 ], "wc_reply_authors_avg": [ 773.4, 426.17020074144085 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.6666666666666667, "corr_recommendation_correctness": 0.6123724356957946, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4898560903239057877&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=-CwPopPJda", "email": ";ox.ac.uk", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "VGG", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "0", "aff_campus_unique": "Oxford", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "id": "-D5TVtzt3fP", "title": "Sparse Tokens for Dense Prediction - The Medical Image Segmentation Case", "track": "main", "status": "Withdraw", "tldr": "We show how to perform dense prediction efficiently with a sparse token ViT while maintaining performance.", "abstract": "Can we use sparse tokens for dense prediction, e.g., segmentation? Although token sparsification has been applied to Vision Transformers (ViT) for acceleration on classification tasks, it is still unknown how to perform segmentation from sparse tokens. To this end, we reformulate segmentation as a sparse encoding -> token completion -> dense decoding (SCD) pipeline.\nWe first show empirically that naively applying existing approaches from classification token pruning and masked image modeling (MIM) \nleads to failure and training inefficiency. This is caused by inappropriate sampling algorithms and the low quality of the restored dense features. In this paper, we propose Soft-topK Token Pruning (STP) and Multi-layer Token Assembly (MTA) to address the above problems.\nParticularly, in the sparse encoding stage, STP predicts token-wise importance scores with a lightweight sub-network and samples topK-scored tokens. The intractable gradients of topK are approximated through a continuous perturbed score distribution.\nIn the token completion stage, MTA restores a full token sequence by assembling both sparse output tokens and pruned intermediate tokens from multiple layers. Compared to MIM which fills the pruned positions with mask tokens, MTA produces more informative representations allowing more accurate segmentation. The last dense decoding stage is compatible with decoders of existing segmentation frameworks, e.g., UNETR. Experiments show SCD pipelines equipped with our STP and MTA are much faster than baselines without token sparsification in both training (up to 120% higher throughput) and inference (up to 60.6% higher throughput) while maintaining segmentation quality.", "keywords": "token pruning;vision transformer;dense prediction;medical image segmentation", "primary_area": "", "supplementary_material": "", "author": "Lei Zhou;Huidong Liu;Joseph Bae;Junjun He;Dimitris Samaras;Prateek Prasanna", "authorids": "~Lei_Zhou9;~Huidong_Liu1;~Joseph_Bae1;~Junjun_He2;~Dimitris_Samaras3;~Prateek_Prasanna3", "gender": "M;M;;M;M;M", "homepage": ";https://harryliew.github.io/;https://joseph-bae.github.io/;https://junjun2016.github.io/;https://www.cs.stonybrook.edu/~samaras/;https://you.stonybrook.edu/imaginelab/", "dblp": ";174/9885;270/2954;128/7027;s/DimitrisSamaras;133/6611", "google_scholar": "AdsoCBgAAAAJ;https://scholar.google.com/citations?hl=en;;Z4LgebkAAAAJ;https://scholar.google.com/citations?hl=en;uyA1Q18AAAAJ", "orcid": ";;;;0000-0002-1373-0294;", "linkedin": "lei-zhou-800b41143/;;;;;", "or_profile": "~Lei_Zhou9;~Huidong_Liu1;~Joseph_Bae1;~Junjun_He2;~Dimitris_Samaras3;~Prateek_Prasanna3", "aff": "State University of New York, Stony Brook;Amazon;State University of New York at Stony Brook;Shanghai AI Laboratory;Stony Brook University;State University of New York, Stony Brook", "aff_domain": "stonybrook.edu;amazon.com;stonybrookmedicine.edu;pjlab.org.cn;cs.stonybrook.edu;stonybrook.edu", "position": "PhD student;Researcher;PhD student;Researcher;Full Professor;Assistant Professor", "bibtex": "@misc{\nzhou2023sparse,\ntitle={Sparse Tokens for Dense Prediction - The Medical Image Segmentation Case},\nauthor={Lei Zhou and Huidong Liu and Joseph Bae and Junjun He and Dimitris Samaras and Prateek Prasanna},\nyear={2023},\nurl={https://openreview.net/forum?id=-D5TVtzt3fP}\n}", "github": "", "project": "", "reviewers": "krjD;1ABv;qcVm;fc7U", "site": "https://openreview.net/forum?id=-D5TVtzt3fP", "pdf_size": 2131590, "recommendation": "5;5;5;6", "confidence": "5;4;4;4", "correctness": "4;3;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "0;2;2;3", "wc_summary_paper": "121;114;101;51", "wc_strength_and_weaknesses": "309;308;245;68", "wc_clarity_quality_novelty_and_reproducibility": "22;49;17;23", "wc_summary_review": "17;79;6;27", "wc_review": "469;550;369;169", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "517;918;661;53", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 96.75, 27.371289702898547 ], "wc_strength_and_weaknesses_avg": [ 232.5, 98.44922549212868 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.75, 12.47747971346778 ], "wc_summary_review_avg": [ 32.25, 27.99441908666797 ], "wc_review_avg": [ 389.25, 142.40852327020318 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 537.25, 314.3217897314789 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-cVfaK0DD-AJ:scholar.google.com/&scioq=Sparse+Tokens+for+Dense+Prediction+-+The+Medical+Image+Segmentation+Case&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4;0", "aff_unique_norm": "State University of New York;Amazon;State University of New York at Stony Brook;Shanghai AI Laboratory;Stony Brook University", "aff_unique_dep": ";Amazon.com, Inc.;;;", "aff_unique_url": "https://www.stonybrook.edu;https://www.amazon.com;https://www.stonybrook.edu;https://www.shanghai-ai-lab.com;https://www.stonybrook.edu", "aff_unique_abbr": "SUNY Stony Brook;Amazon;SUNY Stony Brook;SAIL;SBU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stony Brook;", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Generalization Bounds for Federated Learning: Fast Rates, Unparticipating Clients and Unbounded Losses", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11528", "id": "-EHqoysUYLx", "poster": "/media/PosterPDFs/ICLR%202023/11528.png?t=1681631132.3870661", "openreview": "https://openreview.net/forum?id=-EHqoysUYLx", "slides": "https://iclr.cc/virtual/2023/poster/11528", "video": "https://iclr.cc/virtual/2023/poster/11528", "author_site": "Xiaolin Hu, Shaojie Li, Yong Liu", "tldr": "", "abstract": "In {federated learning}, the underlying data distributions may be different across clients. This paper provides a theoretical analysis of generalization error of {federated learning}, which captures both heterogeneity and relatedness of the distributions. In particular, we assume that the heterogeneous distributions are sampled from a meta-distribution. In this two-level distribution framework, we characterize the generalization error not only for clients participating in the training but also for unparticipating clients. We first show that the generalization error for unparticipating clients can be bounded by participating generalization error and participating gap caused by clients' sampling. We further establish fast learning bounds of order $\\mathcal{O}(\\frac{1}{mn} + \\frac{1}{m})$ for unparticipating clients, where $m$ is the number of clients and $n$ is the sample size at each client. To our knowledge, the obtained fast bounds are state-of-the-art in the two-level distribution framework. Moreover, previous theoretical results mostly require the loss function to be bounded. We derive convergence bounds of order $\\mathcal{O}(\\frac{1}{\\sqrt{mn}} + \\frac{1}{\\sqrt{m}})$ under unbounded assumptions, including sub-exponential and sub-Weibull losses. ", "keywords": "Federated learning;Generalization error;Risk bound;Unbounded losses;Learning theory", "primary_area": "", "supplementary_material": "", "author": "Xiaolin Hu;Shaojie Li;Yong Liu", "authorids": "~Xiaolin_Hu6;~Shaojie_Li2;~Yong_Liu7", "gender": "M;M;M", "homepage": "https://www.xiaolinhu.art/;;https://iie-liuyong.github.io", "dblp": "60/6028-3.html;;29/4867-18", "google_scholar": "6CSzbVEAAAAJ;;vVhmzbAAAAAJ", "orcid": "0009-0002-5493-5779;;0000-0002-6739-621X", "linkedin": ";;", "or_profile": "~Xiaolin_Hu6;~Shaojie_Li2;~Yong_Liu7", "aff": "Renmin University of China;Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nhu2023generalization,\ntitle={Generalization Bounds for Federated Learning: Fast Rates, Unparticipating Clients and Unbounded Losses},\nauthor={Xiaolin Hu and Shaojie Li and Yong Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-EHqoysUYLx}\n}", "github": "", "project": "", "reviewers": "8Tvp;QQ79;zSzo;yZwY;zYTD;KoJF;Uj7L", "pdf_size": 632775, "recommendation": "3;5;6;6;6;6;8", "confidence": "4;4;3;2;2;3;2", "correctness": "2;3;4;3;4;3;4", "technical_novelty": "2;2;3;3;4;3;3", "empirical_novelty": "0;0;0;3;0;0;0", "wc_summary_paper": "42;102;60;88;70;78;78", "wc_strength_and_weaknesses": "677;490;364;107;256;304;209", "wc_clarity_quality_novelty_and_reproducibility": "59;9;84;204;5;566;6", "wc_summary_review": "44;39;21;71;59;83;55", "wc_review": "822;640;529;470;390;1031;348", "wc_reply_reviewers": "163;0;113;0;0;580;0", "wc_reply_authors": "2034;504;633;797;527;2870;566", "reply_reviewers": "1;0;1;0;0;1;0", "reply_authors": "6;3;2;2;1;6;1", "recommendation_avg": [ 5.714285714285714, 1.3850513878332367 ], "confidence_avg": [ 2.857142857142857, 0.8329931278350429 ], "correctness_avg": [ 3.2857142857142856, 0.6998542122237652 ], "technical_novelty_avg": [ 2.857142857142857, 0.6388765649999398 ], "empirical_novelty_avg": [ 0.42857142857142855, 1.049781318335648 ], "wc_summary_paper_avg": [ 74.0, 17.92045917779054 ], "wc_strength_and_weaknesses_avg": [ 343.85714285714283, 175.80137214760182 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 133.28571428571428, 188.33686710308228 ], "wc_summary_review_avg": [ 53.142857142857146, 19.141791015088064 ], "wc_review_avg": [ 604.2857142857143, 228.67988498317547 ], "wc_reply_reviewers_avg": [ 122.28571428571429, 196.7817603958502 ], "wc_reply_authors_avg": [ 1133.0, 868.1862867907029 ], "reply_reviewers_avg": [ 0.42857142857142855, 0.49487165930539345 ], "reply_authors_avg": [ 3.0, 2.0 ], "replies_avg": [ 37, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.7783040514449477, "corr_recommendation_correctness": 0.8210981230398559, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18365105649354405994&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=-EHqoysUYLx", "email": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Renmin University of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ruc.edu.cn", "aff_unique_abbr": "RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Unsupervised Learning for Combinatorial Optimization Needs Meta Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10837", "id": "-ENYHCE8zBp", "poster": "/media/PosterPDFs/ICLR%202023/10837.png?t=1682608595.1472986", "openreview": "https://openreview.net/forum?id=-ENYHCE8zBp", "slides": "https://iclr.cc/virtual/2023/poster/10837", "video": "https://iclr.cc/virtual/2023/poster/10837", "author_site": "Haoyu Wang, Pan Li", "tldr": "", "abstract": "A general framework of unsupervised learning for combinatorial optimization (CO) is to train a neural network whose output gives a problem solution by directly optimizing the CO objective. Albeit with some advantages over traditional solvers, current frameworks optimize an averaged performance over the distribution of historical problem instances, which misaligns with the actual goal of CO that looks for a good solution to every future encountered instance. With this observation, we propose a new objective of unsupervised learning for CO where the goal of learning is to search for good initialization for future problem instances rather than give direct solutions. We propose a meta-learning-based training pipeline for this new objective. Our method achieves good performance. We observe that even the initial solution given by our model before fine-tuning can significantly outperform the baselines under various evaluation settings including evaluation across multiple datasets, and the case with big shifts in the problem scale. The reason we conjecture is that meta-learning-based training lets the model be loosely tied to each local optimum for a training instance while being more adaptive to the changes of optimization landscapes across instances.", "keywords": "combinatorial optimization;unsupervised learning;meta learning;graph neural networks", "primary_area": "", "supplementary_material": "/attachment/40b87371b03f0baa49a9200008204d484851ec15.zip", "author": "Haoyu Peter Wang;Pan Li", "authorids": "~Haoyu_Peter_Wang1;~Pan_Li2", "gender": "M;", "homepage": ";", "dblp": ";https://dblp.org/pers/hd/l/Li_0005:Pan", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;IroP0EwAAAAJ", "orcid": ";", "linkedin": ";pan-li-b951105a/", "or_profile": "~Haoyu_Peter_Wang1;~Pan_Li2", "aff": "Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2023unsupervised,\ntitle={Unsupervised Learning for Combinatorial Optimization Needs Meta Learning},\nauthor={Haoyu Peter Wang and Pan Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-ENYHCE8zBp}\n}", "github": "", "project": "", "reviewers": "uyeg;5Wy8;sUXy;oMcq", "pdf_size": 647821, "recommendation": "6;6;8;8", "confidence": "4;2;3;4", "correctness": "4;4;3;2", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;3;2", "wc_summary_paper": "65;59;41;59", "wc_strength_and_weaknesses": "103;64;391;126", "wc_clarity_quality_novelty_and_reproducibility": "396;26;19;170", "wc_summary_review": "41;12;34;6", "wc_review": "605;161;485;361", "wc_reply_reviewers": "24;0;15;93", "wc_reply_authors": "1450;242;939;1146", "reply_reviewers": "1;0;1;2", "reply_authors": "4;1;3;3", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 56.0, 9.0 ], "wc_strength_and_weaknesses_avg": [ 171.0, 128.9360306508619 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 152.75, 152.82567683475182 ], "wc_summary_review_avg": [ 23.25, 14.618053906043718 ], "wc_review_avg": [ 403.0, 164.2071862008481 ], "wc_reply_reviewers_avg": [ 33.0, 35.68613176011096 ], "wc_reply_authors_avg": [ 944.25, 444.3165397551615 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.30151134457776363, "corr_recommendation_correctness": -0.9045340337332909, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15033459598062454455&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=-ENYHCE8zBp", "email": "purdue.edu;purdue.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning Kernelized Contextual Bandits in a Distributed and Asynchronous Environment", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11356", "id": "-G1kjTFsSs", "poster": "", "openreview": "https://openreview.net/forum?id=-G1kjTFsSs", "slides": "https://iclr.cc/virtual/2023/poster/11356", "video": "https://iclr.cc/virtual/2023/poster/11356", "author_site": "Chuanhao Li, Huazheng Wang, Mengdi Wang, Hongning Wang", "tldr": "We propose and analyze a communication efficient asynchronous Kernel UCB algorithm with Nystrom approximation.", "abstract": "Despite the recent advances in communication-efficient distributed bandit learning, most existing solutions are restricted to parametric models, e.g., linear bandits and generalized linear bandits (GLB). In comparison, kernel bandits, which search for non-parametric functions in a reproducing kernel Hilbert space (RKHS), offer higher modeling capacity. But the only existing work in distributed kernel bandits adopts a synchronous communication protocol, which greatly limits its practical use (e.g., every synchronization step requires all clients to participate and wait for data exchange).\nIn this paper, in order to improve the robustness against delays and unavailability of clients that are common in practice, we propose the first asynchronous solution based on approximated kernel regression for distributed kernel bandit learning. A set of effective treatments are developed to ensure approximation quality and communication efficiency. Rigorous theoretical analysis about the regret and communication cost is provided; and extensive empirical evaluations demonstrate the effectiveness of our solution.", "keywords": "contextual bandit;kernelized method;asynchronous distributed learning;communication efficiency", "primary_area": "", "supplementary_material": "", "author": "Chuanhao Li;Huazheng Wang;Mengdi Wang;Hongning Wang", "authorids": "~Chuanhao_Li1;~Huazheng_Wang1;~Mengdi_Wang1;~Hongning_Wang1", "gender": ";;F;M", "homepage": "https://cyrilli.github.io/;https://huazhengwang.github.io/;http://mwang.princeton.edu;http://www.cs.virginia.edu/~hw5x/", "dblp": "195/9947;163/2233;;05/6545", "google_scholar": "w2ShljkAAAAJ;w3PrbKwAAAAJ;;qkdvKNoAAAAJ", "orcid": ";;;0000-0002-6524-9195", "linkedin": ";;;", "or_profile": "~Chuanhao_Li1;~Huazheng_Wang1;~Mengdi_Wang1;~Hongning_Wang1", "aff": "University of Virginia;Oregon State University;Princeton University;University of Virginia", "aff_domain": "virginia.edu;oregonstate.edu;princeton.edu;virginia.edu", "position": "PhD student;Assistant Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nli2023learning,\ntitle={Learning Kernelized Contextual Bandits in a Distributed and Asynchronous Environment},\nauthor={Chuanhao Li and Huazheng Wang and Mengdi Wang and Hongning Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-G1kjTFsSs}\n}", "github": "", "project": "", "reviewers": "iavx;6Kat;yaEL;fUPj", "pdf_size": 769535, "recommendation": "6;6;6;8", "confidence": "3;3;3;2", "correctness": "3;4;4;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;3;4;3", "wc_summary_paper": "47;42;53;50", "wc_strength_and_weaknesses": "82;77;92;113", "wc_clarity_quality_novelty_and_reproducibility": "10;40;74;8", "wc_summary_review": "63;33;38;59", "wc_review": "202;192;257;230", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "418;681;476;207", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 48.0, 4.06201920231798 ], "wc_strength_and_weaknesses_avg": [ 91.0, 13.80217374184226 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.0, 26.851443164195103 ], "wc_summary_review_avg": [ 48.25, 12.94942083646987 ], "wc_review_avg": [ 220.25, 25.380849079571785 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 445.5, 168.84090144274876 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8825461530471129443&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=-G1kjTFsSs", "email": "virginia.edu;oregonstate.edu;princeton.edu;virginia.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Virginia;Oregon State University;Princeton University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.virginia.edu;https://oregonstate.edu;https://www.princeton.edu", "aff_unique_abbr": "UVA;OSU;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "-H7FPruqEX", "title": "CASA: Bridging the Gap between Policy Improvement and Policy Evaluation with Conflict Averse Policy Iteration", "track": "main", "status": "Reject", "tldr": "This paper proposes a method to eliminate gradient conflicts between policy improvement and policy evaluation.", "abstract": "We study the problem of model-free reinforcement learning, which is often solved following the principle of Generalized Policy Iteration (GPI). While GPI is typically an interplay between policy evaluation and policy improvement, most conventional model-free methods with function approximation assume the independence of GPI steps, despite of the inherent connections between them. In this paper, we present a method that attempts to eliminate the inconsistency between policy evaluation step and policy improvement step, leading to a conflict averse GPI solution with gradient-based functional approximation. Our method is capital to balancing exploitation and exploration between policy-based and value-based methods and is applicable to existed policy-based and value-based methods. We conduct extensive experiments to study theoretical properties of our method and demonstrate the effectiveness of our method on Atari 200M benchmark.", "keywords": "reinforcement learning;policy iteration", "primary_area": "", "supplementary_material": "", "author": "Changnan Xiao;Haosen Shi;Jiajun Fan;Shihong Deng;Haiyan Yin", "authorids": "~Changnan_Xiao1;~Haosen_Shi1;~Jiajun_Fan1;~Shihong_Deng1;~Haiyan_Yin1", "gender": "M;;;M;", "homepage": "https://github.com/ChangnXX;;;;", "dblp": ";;;;", "google_scholar": ";;;9ShInFAAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Changnan_Xiao1;~Haosen_Shi1;~Jiajun_Fan1;~Shihong_Deng1;~Haiyan_Yin1", "aff": "Bytedance;;;Bytedance;", "aff_domain": "bytedance.com;;;bytedance.com;", "position": "Researcher;;;Researcher;", "bibtex": "@misc{\nxiao2023casa,\ntitle={{CASA}: Bridging the Gap between Policy Improvement and Policy Evaluation with Conflict Averse Policy Iteration},\nauthor={Changnan Xiao and Haosen Shi and Jiajun Fan and Shihong Deng and Haiyan Yin},\nyear={2023},\nurl={https://openreview.net/forum?id=-H7FPruqEX}\n}", "github": "", "project": "", "reviewers": "aLPu;QNNj;hwhf;o19T", "site": "https://openreview.net/forum?id=-H7FPruqEX", "pdf_size": 7884850, "recommendation": "3;3;5;6", "confidence": "3;3;4;4", "correctness": "2;3;2;4", "technical_novelty": "3;1;3;4", "empirical_novelty": "2;1;3;3", "wc_summary_paper": "35;47;77;79", "wc_strength_and_weaknesses": "119;165;110;132", "wc_clarity_quality_novelty_and_reproducibility": "950;47;166;80", "wc_summary_review": "42;44;249;21", "wc_review": "1146;303;602;312", "wc_reply_reviewers": "0;50;0;0", "wc_reply_authors": "1037;723;735;215", "reply_reviewers": "0;1;0;0", "reply_authors": "3;2;2;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 1.0897247358851685 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 59.5, 18.993419913222578 ], "wc_strength_and_weaknesses_avg": [ 131.5, 20.862646045025066 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 310.75, 371.6190085288964 ], "wc_summary_review_avg": [ 89.0, 92.81433079002402 ], "wc_review_avg": [ 590.75, 342.3925926476798 ], "wc_reply_reviewers_avg": [ 12.5, 21.650635094610966 ], "wc_reply_authors_avg": [ 677.5, 295.17918287033723 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.9622504486493761, "corr_recommendation_correctness": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CD6WtiSlYeAJ:scholar.google.com/&scioq=CASA:+Bridging+the+Gap+between+Policy+Improvement+and+Policy+Evaluation+with+Conflict+Averse+Policy+Iteration&hl=en&as_sdt=0,33", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "ByteDance", "aff_unique_dep": "", "aff_unique_url": "https://www.bytedance.com", "aff_unique_abbr": "Bytedance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Learning Domain-Agnostic Representation for Disease Diagnosis", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12219", "id": "-HHJZlRpGb", "poster": "/media/PosterPDFs/ICLR%202023/12219.png?t=1680801137.3082223", "openreview": "https://openreview.net/forum?id=-HHJZlRpGb", "slides": "https://iclr.cc/virtual/2023/poster/12219", "video": "https://iclr.cc/virtual/2023/poster/12219", "author_site": "Churan Wang, Jing Li, Xinwei Sun, Fandong Zhang, Yizhou Yu, Yizhou Wang", "tldr": "We propose a disentanglement model in medical imaging diagnosis, in order to achieve robustness to multi centers.", "abstract": "In clinical environments, image-based diagnosis is desired to achieve robustness on multi-center samples. Toward this goal, a natural way is to capture only clinically disease-related features. However, such disease-related features are often entangled with center-effect, disabling robust transferring to unseen centers/domains. To disentangle disease-related features, we first leverage structural causal modeling to explicitly model disease-related and center-effects that are provable to be disentangled from each other. Guided by this, we propose a novel Domain Agnostic Representation Model (DarMo) based on variational Auto-Encoder. To facilitate disentanglement, we design domain-agnostic and domain-aware encoders to respectively capture disease-related features and varied center-effects by incorporating a domain-aware batch normalization layer. Besides, we constrain the disease-related features to well predict the disease label as well as clinical attributes, by leveraging Graph Convolutional Network (GCN) into our decoder. The effectiveness and utility of our method are demonstrated by the superior performance over others on both public datasets and inhouse datasets.", "keywords": "multi centers disease diagnosis;mammogram classification", "primary_area": "", "supplementary_material": "/attachment/2f2c6eda9e5519fca563c117e250d1597d13de47.zip", "author": "Churan Wang;Jing Li;Xinwei Sun;Fandong Zhang;Yizhou Yu;Yizhou Wang", "authorids": "~Churan_Wang1;~Jing__Li1;~Xinwei_Sun1;~Fandong_Zhang1;~Yizhou_Yu1;~Yizhou_Wang1", "gender": "F;M;M;M;M;F", "homepage": "https://github.com/churan08;https://sunxinwei0625.github.io/sunxw.github.io/;;;https://cfcs.pku.edu.cn/wangyizhou/;", "dblp": "248/5699;145/6592-1;195/8230;90/6896.html;71/3387-1;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;e38fTZQAAAAJ;831z_VcAAAAJ;VVISORcAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Churan_Wang1;~Xinwei_Sun1;~Fandong_Zhang1;~Yizhou_Yu1;~Yizhou_Wang1;~li_jing2", "aff": "Peking University;Fudan University;;The University of Hong Kong;Peking University;Peking University,", "aff_domain": "pku.edu.cn;fudan.edu.cn;;hku.hk;pku.edu.cn;pku.edu.cn", "position": "Postdoc;Assistant Professor;;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nwang2023learning,\ntitle={Learning Domain-Agnostic Representation for Disease Diagnosis},\nauthor={Churan Wang and Jing Li and Xinwei Sun and Fandong Zhang and Yizhou Yu and Yizhou Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-HHJZlRpGb}\n}", "github": "", "project": "", "reviewers": "RBtC;eNnv;kDRU", "pdf_size": 3115112, "recommendation": "6;6;8", "confidence": "3;3;3", "correctness": "3;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "3;3;4", "wc_summary_paper": "71;47;70", "wc_strength_and_weaknesses": "427;104;204", "wc_clarity_quality_novelty_and_reproducibility": "45;41;115", "wc_summary_review": "32;11;19", "wc_review": "575;203;408", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 62.666666666666664, 11.08552609887726 ], "wc_strength_and_weaknesses_avg": [ 245.0, 135.01357956393375 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 67.0, 33.980386499665755 ], "wc_summary_review_avg": [ 20.666666666666668, 8.65383665716478 ], "wc_review_avg": [ 395.3333333333333, 152.13225240632644 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9693706755641863847&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=-HHJZlRpGb", "email": "pku.edu.cn;fudan.edu.cn;;hku.hk;pku.edu.cn;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Peking University;Fudan University;University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://www.fudan.edu.cn;https://www.hku.hk", "aff_unique_abbr": "Peking U;Fudan;HKU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "-I2nYWac2Id", "title": "Uplift Modelling based on Graph Neural Network Combined with Causal Knowledge", "track": "main", "status": "Withdraw", "tldr": "Improve uplift modeling performance through causal knowledge representation and structural neighborhood learning", "abstract": "Uplift modeling is a crucial method to estimate marketing effect modeling, which is widely used to evaluate the effect of treatment on outcomes. On the one hand, we can find the treatment with the best effect through uplift modeling. On the other hand, we can find customers who tend to make corresponding positive decisions in a given treatment. The past uplift modeling methods are mostly based on the difference-in-difference(DID) framework, combined with the machine learning model as the learner to make an estimation, ignoring the relationship and confidential information among features in uplift modeling. We propose a graph neural network-based framework combining causal knowledge as an estimator of uplift value. Firstly, we proposed a causal representation method based on conditional average treatment effect(CATE) estimation and adjacency matrix structure learning. Secondly, we proposed an uplift modeling framework based on graph convolution networks to combine the causal knowledge, which has better scalability. Our experimental results show that our method can estimate the uplift value with minor errors in the general simulation data, and its performance has also been verified in the actual industry marketing data.", "keywords": "uplift modeling;Graph Neural Network;Knowledge Representation;Structure Learning", "primary_area": "", "supplementary_material": "", "author": "Haowen Wang;Xinyan Ye;Yangze Zhou;Zhiyi Zhang;Longhan Zhang;Jing Jiang", "authorids": "~Haowen_Wang1;xy2119@ic.ac.uk;~Yangze_Zhou2;emma0302@pku.edu.cn;longhanz@zhejianglab.com;~Jing_Jiang7", "gender": "M;;M;;;M", "homepage": ";;;;;", "dblp": ";;;;;", "google_scholar": "X-HgtJIAAAAJ;;ZxFQboUAAAAJ;;;dplQ73EAAAAJ", "orcid": "0000-0003-4323-7166;;;;;", "linkedin": ";;;;;", "or_profile": "~Haowen_Wang1;xy2119@ic.ac.uk;~Yangze_Zhou2;emma0302@pku.edu.cn;longhanz@zhejianglab.com;~Jing_Jiang7", "aff": "Alipay;;Zhejiang University;;;", "aff_domain": "alipay.com;;zju.edu.cn;;;", "position": "Researcher;;Undergrad student;;;", "bibtex": "@misc{\nwang2023uplift,\ntitle={Uplift Modelling based on Graph Neural Network Combined with Causal Knowledge},\nauthor={Haowen Wang and Xinyan Ye and Yangze Zhou and Zhiyi Zhang and Longhan Zhang and Jing Jiang},\nyear={2023},\nurl={https://openreview.net/forum?id=-I2nYWac2Id}\n}", "github": "", "project": "", "reviewers": "Lte9;X1aK;9Ueh", "site": "https://openreview.net/forum?id=-I2nYWac2Id", "pdf_size": 783502, "recommendation": "3;3;3", "confidence": "4;3;3", "correctness": "2;2;2", "technical_novelty": "2;2;2", "empirical_novelty": "2;1;2", "wc_summary_paper": "36;47;48", "wc_strength_and_weaknesses": "111;225;354", "wc_clarity_quality_novelty_and_reproducibility": "33;118;37", "wc_summary_review": "21;39;12", "wc_review": "201;429;451", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 43.666666666666664, 5.436502143433364 ], "wc_strength_and_weaknesses_avg": [ 230.0, 99.26731586982696 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.666666666666664, 39.16063783387032 ], "wc_summary_review_avg": [ 24.0, 11.224972160321824 ], "wc_review_avg": [ 360.3333333333333, 113.02310481588364 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1710186026829226929&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1", "aff_unique_norm": "Alipay;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.alipay.com;https://www.zju.edu.cn", "aff_unique_abbr": "Alipay;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Task-Aware Information Routing from Common Representation Space in Lifelong Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10699", "id": "-M0TNnyWFT5", "poster": "/media/PosterPDFs/ICLR%202023/10699.png?t=1680886446.1349952", "openreview": "https://openreview.net/forum?id=-M0TNnyWFT5", "slides": "https://iclr.cc/virtual/2023/poster/10699", "video": "https://iclr.cc/virtual/2023/poster/10699", "author_site": "Prashant Bhat, Bahram Yoosefizonooz, Elahe Arani", "tldr": "A continual learning method that entails task-attention modules to capture task-specific information from the common representation space", "abstract": "Intelligent systems deployed in the real world suffer from catastrophic forgetting when exposed to a sequence of tasks. Humans, on the other hand, acquire, consolidate, and transfer knowledge between tasks that rarely interfere with the consolidated knowledge. Accompanied by self-regulated neurogenesis, continual learning in the brain is governed by the rich set of neurophysiological processes that harbor different types of knowledge which are then integrated by the conscious processing. Thus, inspired by Global Workspace Theory of conscious information access in the brain, we propose TAMiL, a continual learning method that entails task-attention modules to capture task-specific information from the common representation space. We employ simple, undercomplete autoencoders to create a communication bottleneck between the common representation space and the global workspace, allowing only the task-relevant information to the global workspace, thereby greatly reducing task interference. Experimental results show that our method outperforms state-of-the-art rehearsal-based and dynamic sparse approaches and bridges the gap between fixed capacity and parameter isolation approaches while being scalable. We also show that our method effectively mitigates catastrophic forgetting while being well-calibrated with reduced task-recency bias.", "keywords": "Continual learning;Lifelong learning;Representation learning;Global workspace theory;Task-specific attention", "primary_area": "", "supplementary_material": "", "author": "Prashant Shivaram Bhat;Bahram Zonooz;Elahe Arani", "authorids": "~Prashant_Shivaram_Bhat1;~Bahram_Zonooz1;~Elahe_Arani1", "gender": "M;M;F", "homepage": ";https://sites.google.com/view/bahramzonooz;https://sites.google.com/view/elahe-arani", "dblp": "340/2336;250/9573;", "google_scholar": "https://scholar.google.com/citations?hl=en;;e_I_v6cAAAAJ", "orcid": ";;0000-0002-0952-7007", "linkedin": "prashant-s-bhat/;;elahe-arani-630870b2/", "or_profile": "~Prashant_Shivaram_Bhat1;~Bahram_Zonooz1;~Elahe_Arani1", "aff": "NavInfo Europe B.V;Eindhoven University of Technology;Advanced Research Lab, NavInfo Europe", "aff_domain": "navinfo.eu;tue.nl;navinfo.eu", "position": "Researcher;Assistant Professor;Sr. AI Manager & Sr. Research Scientist", "bibtex": "@inproceedings{\nbhat2023taskaware,\ntitle={Task-Aware Information Routing from Common Representation Space in Lifelong Learning},\nauthor={Prashant Shivaram Bhat and Bahram Zonooz and Elahe Arani},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-M0TNnyWFT5}\n}", "github": "", "project": "", "reviewers": "HdW5;yHwh;JQ6f", "pdf_size": 820706, "recommendation": "6;6;8", "confidence": "5;4;2", "correctness": "3;4;3", "technical_novelty": "2;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "83;57;88", "wc_strength_and_weaknesses": "129;125;178", "wc_clarity_quality_novelty_and_reproducibility": "1227;58;87", "wc_summary_review": "142;191;43", "wc_review": "1581;431;396", "wc_reply_reviewers": "659;0;84", "wc_reply_authors": "2498;1572;618", "reply_reviewers": "1;0;1", "reply_authors": "4;3;2", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 76.0, 13.589211407093005 ], "wc_strength_and_weaknesses_avg": [ 144.0, 24.097026095903757 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 457.3333333333333, 544.3652776909596 ], "wc_summary_review_avg": [ 125.33333333333333, 61.55936610748649 ], "wc_review_avg": [ 802.6666666666666, 550.5502298206364 ], "wc_reply_reviewers_avg": [ 247.66666666666666, 292.87122691646505 ], "wc_reply_authors_avg": [ 1562.6666666666667, 767.535160251452 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9449111825230679, "corr_recommendation_correctness": -0.49999999999999983, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12634402650661673343&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=-M0TNnyWFT5", "email": "navinfo.eu;tue.nl;navinfo.eu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "NavInfo Europe;Eindhoven University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.navinfo.com;https://www.tue.nl", "aff_unique_abbr": "NavInfo;TU/e", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Netherlands;Unknown" }, { "id": "-MQWXqNyoa", "title": "Intrinsic Computational Complexity of Equivariant Neural Networks", "track": "main", "status": "Withdraw", "tldr": "This paper theoretically studies the requried computational complexity for equivariant neural networks to achieve a desired expressivity.", "abstract": "Equivariant neural networks have shown significant advantages in learning on data with intrinsic symmetries represented by groups. A major concern is on the high computational costs in the cases of large-scale groups, especially in the inference stage. This paper studies the required computational complexity of equivariant neural networks in inference for achieving a desired expressivity. We theoretically compare three classes of ReLU networks: (1) two-layer group-averaging networks (TGNs); (2) two-layer layer-wise equivariant networks (TENs); and (3) two-layer networks without any equivariant constraints (TNs), with a new notion {\\it intrinsic computational complexity} for better characterizing computational costs. We prove that (1) TGNs/TENs have equal and full expressivities to represent any invariant function that can be learned by a TN, where the TGNs and TENs have equal intrinsic computational complexities; (2) a TGN/TEN requires at most double the intrinsic computational complexity of a TN; and (3) a TEN can achieve the inference speed coincident with its intrinsic computational complexity, while TGNs are strictly slower, which justifies the computational advantages of layer-wise equivariant architectures over group averaging. Our theory rules out the existence of equivariant networks with group-scale-independent computational costs, summarized in a new no-free-lunch theorem: when more equivariance is desired, more computation is required.", "keywords": "Equivariant Neural Networks;Learning Theory", "primary_area": "", "supplementary_material": "", "author": "Tian Qin;Fengxiang He;Dacheng Tao", "authorids": "~Tian_Qin2;~Fengxiang_He1;~Dacheng_Tao1", "gender": "M;;", "homepage": "https://tianqin0.github.io/;https://fengxianghe.github.io/;", "dblp": ";225/4682;", "google_scholar": "58X8nLsAAAAJ;QSx-Yu0AAAAJ;", "orcid": ";;", "linkedin": ";fengxiang-he-35b173122;", "or_profile": "~Tian_Qin2;~Fengxiang_He1;~Dacheng_Tao1", "aff": "University of Science and Technology of China;JD.com, Inc.;", "aff_domain": "ustc.edu.cn;jd.com;", "position": "MS student;Algorithm Scientist;", "bibtex": "@misc{\nqin2023intrinsic,\ntitle={Intrinsic Computational Complexity of Equivariant Neural Networks},\nauthor={Tian Qin and Fengxiang He and Dacheng Tao},\nyear={2023},\nurl={https://openreview.net/forum?id=-MQWXqNyoa}\n}", "github": "", "project": "", "reviewers": "VZSR;egcu;3gDM", "site": "https://openreview.net/forum?id=-MQWXqNyoa", "pdf_size": 263648, "recommendation": "3;5;5", "confidence": "4;2;4", "correctness": "2;2;3", "technical_novelty": "2;3;2", "empirical_novelty": "2;0;0", "wc_summary_paper": "98;28;70", "wc_strength_and_weaknesses": "864;42;270", "wc_clarity_quality_novelty_and_reproducibility": "191;193;32", "wc_summary_review": "2;11;17", "wc_review": "1155;274;389", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 0.6666666666666666, 0.9428090415820634 ], "wc_summary_paper_avg": [ 65.33333333333333, 28.767265347188555 ], "wc_strength_and_weaknesses_avg": [ 392.0, 346.4909811236073 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 138.66666666666666, 75.42914261447994 ], "wc_summary_review_avg": [ 10.0, 6.164414002968976 ], "wc_review_avg": [ 606.0, 391.0302631084539 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:k9Yv16enIrEJ:scholar.google.com/&scioq=Intrinsic+Computational+Complexity+of+Equivariant+Neural+Networks&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of Science and Technology of China;JD.com", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.jd.com", "aff_unique_abbr": "USTC;JD.com", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "-NAi1oQJbA3", "title": "Adapting Pre-trained Language Models for Quantum Natural Language Processing", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The emerging classical-quantum transfer learning paradigm has brought a decent performance to quantum computational models in many tasks, such as computer vision, by enabling a combination of quantum models and classical pre-trained neural networks. However, using quantum computing with pre-trained models has yet been explored in natural language processing (NLP). Due to the high linearity constraints of the underlying quantum computing infrastructures, existing Quantum NLP models are limited in performance on real tasks. We fill this gap by pre-training a sentence state with complex-valued BERT-like architecture, and adapting it to the classical-quantum transfer learning scheme for sentence classification. On quantum simulation experiments, the pre-trained representation can bring 50% to 60% increases to the capacity of end-to-end quantum models.", "keywords": "Quantum Computing;Complex-valued Neural Network;Pre-trained Language Model", "primary_area": "", "supplementary_material": "", "author": "Qiuchi Li;Benyou Wang;Yudong Zhu;Qun Liu;Christina Lioma", "authorids": "~Qiuchi_Li1;~Benyou_Wang2;~Yudong_Zhu1;~Qun_Liu1;~Christina_Lioma1", "gender": "M;M;M;M;F", "homepage": ";https://wabyking.github.io/old.html;;http://liuquncn.github.io/;", "dblp": "166/3079;169/1793;40/1170.html;75/4402-1;http://dblp.uni-trier.de/pers/hd/l/Lioma:Christina", "google_scholar": ";Jk4vJU8AAAAJ;;2HhiGzcAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-1501-9914;;0000-0002-7000-1792;", "linkedin": ";;;qunliu/;", "or_profile": "~Qiuchi_Li1;~Benyou_Wang2;~Yudong_Zhu1;~Qun_Liu1;~Christina_Lioma1", "aff": "Copenhagen University;The Chinese University of Hong Kong, Shenzhen;;Huawei Noah's Ark Lab;University of Copenhagen", "aff_domain": "ku.dk;cuhk.edu.cn;;huawei.com;ku.dk", "position": "Assistant Professor;Assistant Professor;;Chief Scientist of Speech and Language Computing;Professor", "bibtex": "@misc{\nli2023adapting,\ntitle={Adapting Pre-trained Language Models for Quantum Natural Language Processing},\nauthor={Qiuchi Li and Benyou Wang and Yudong Zhu and Qun Liu and Christina Lioma},\nyear={2023},\nurl={https://openreview.net/forum?id=-NAi1oQJbA3}\n}", "github": "", "project": "", "reviewers": "oCYT;zk4i;qVGk", "site": "https://openreview.net/forum?id=-NAi1oQJbA3", "pdf_size": 533298, "recommendation": "5;5;5", "confidence": "3;4;5", "correctness": "3;3;3", "technical_novelty": "2;3;1", "empirical_novelty": "2;2;3", "wc_summary_paper": "83;96;134", "wc_strength_and_weaknesses": "275;166;61", "wc_clarity_quality_novelty_and_reproducibility": "13;79;30", "wc_summary_review": "45;21;17", "wc_review": "416;362;242", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 104.33333333333333, 21.638443156156644 ], "wc_strength_and_weaknesses_avg": [ 167.33333333333334, 87.3702212172749 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.666666666666664, 27.980151695244412 ], "wc_summary_review_avg": [ 27.666666666666668, 12.364824660660938 ], "wc_review_avg": [ 340.0, 72.71863585079137 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2174956356430861253&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Copenhagen;Chinese University of Hong Kong;Huawei", "aff_unique_dep": ";;Noah's Ark Lab", "aff_unique_url": "https://www.ku.dk;https://www.cuhk.edu.cn;https://www.huawei.com", "aff_unique_abbr": "UCPH;CUHK;Huawei", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Denmark;China" }, { "id": "-Ov808Vm7dw", "title": "Less is More: Task-aware Layer-wise Distillation for Language Model Compression", "track": "main", "status": "Withdraw", "tldr": "We propose a task-aware layer-wise knowledge distillation method for language model compression.", "abstract": "Layer-wise distillation is a powerful tool to compress large models (i.e. teacher models) into small ones (i.e., student models). The student distills knowledge from the teacher by mimicking the hidden representations of the teacher at every intermediate layer. However, layer-wise distillation is difficult. Since the student has a smaller model capacity than the teacher, it is often under-fitted. Furthermore, the hidden representations of the teacher contain redundant information that the student does not necessarily need for the target task's learning. To address these challenges, we propose a novel Task-aware layEr-wise Distillation (TED). TED designs task-aware filters to align the hidden representations of the student and the teacher at each layer. The filters select the knowledge that is useful for the target task from the hidden representations. As such, TED reduces the knowledge gap between the two models and helps the student to fit better on the target task. We evaluate TED in two scenarios: continual pre-training and fine-tuning. TED demonstrates significant and consistent improvements over existing distillation methods in both scenarios. We will make our code publicly available.", "keywords": "Knowledge Distillation;Pre-trained Language Models;Model Compression", "primary_area": "", "supplementary_material": "/attachment/c2c86ddf0de0c63e72705c40d31020258b4d88ab.zip", "author": "Chen Liang;Simiao Zuo;Qingru Zhang;Pengcheng He;Weizhu Chen;Tuo Zhao", "authorids": "~Chen_Liang3;~Simiao_Zuo1;~Qingru_Zhang2;~Pengcheng_He2;~Weizhu_Chen1;~Tuo_Zhao1", "gender": "F;;M;M;M;M", "homepage": "https://cliang1453.github.io/;;https://qingruzhang.github.io/;;https://www.microsoft.com/en-us/research/people/wzchen/;http://www2.isye.gatech.edu/~tzhao80", "dblp": "35/3221-6;232/2089;228/6749;116/8665;79/2536;", "google_scholar": "https://scholar.google.com/citations?hl=en;J8TSTXMAAAAJ;7YM-faYAAAAJ;https://scholar.google.com/citations?hl=en;LG_E-4EAAAAJ;EJXN6tYAAAAJ", "orcid": ";;;;;", "linkedin": ";;qingru-zhang-4b789a187;;;", "or_profile": "~Chen_Liang3;~Simiao_Zuo1;~Qingru_Zhang2;~Pengcheng_He2;~Weizhu_Chen1;~Tuo_Zhao1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Microsoft;Microsoft GenAI;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;microsoft.com;microsoft.com;gatech.edu", "position": "PhD student;PhD student;PhD student;Principal Researcher;Vice President;Associate Professor", "bibtex": "@misc{\nliang2023less,\ntitle={Less is More: Task-aware Layer-wise Distillation for Language Model Compression},\nauthor={Chen Liang and Simiao Zuo and Qingru Zhang and Pengcheng He and Weizhu Chen and Tuo Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=-Ov808Vm7dw}\n}", "github": "", "project": "", "reviewers": "dGYQ;UUqJ;hurr", "site": "https://openreview.net/forum?id=-Ov808Vm7dw", "pdf_size": 596238, "recommendation": "3;5;5", "confidence": "4;3;4", "correctness": "3;3;3", "technical_novelty": "3;2;3", "empirical_novelty": "0;2;2", "wc_summary_paper": "97;57;90", "wc_strength_and_weaknesses": "227;247;131", "wc_clarity_quality_novelty_and_reproducibility": "6;11;36", "wc_summary_review": "35;35;171", "wc_review": "365;350;428", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 81.33333333333333, 17.441967269268172 ], "wc_strength_and_weaknesses_avg": [ 201.66666666666666, 50.63156678946007 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 17.666666666666668, 13.123346456686352 ], "wc_summary_review_avg": [ 80.33333333333333, 64.1110148275803 ], "wc_review_avg": [ 381.0, 33.793490497431605 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.0, "gs_citation": 91, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10161302936206132409&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;1;1;0", "aff_unique_norm": "Georgia Institute of Technology;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.gatech.edu;https://www.microsoft.com", "aff_unique_abbr": "Georgia Tech;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "-Ozk9LVtqbV", "title": "Cali-NCE: Boosting Cross-modal Video Representation Learning with Calibrated Alignment", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "With the large-scale video-text datasets being collected, learning general visual-textual representation has gained increasing attention. While recent methods are designed with the assumption that the alt-text description naturally conveys the meaning and context of the video in semantics (i.e. well aligned with each other), it is unlikely to be satisfied for the Internet data, which potentially harms the quality of the learned visual-textual representation. To address this challenge, we first revisit three mainstream approaches: correspondence modeling, contrastive learning and predictive coding, demonstrating that a simple co-training strategy with these methods leads to a clear improvement in performance. To further explore the complementary nature of different training strategies, we propose a simple yet effective joint training framework that factorizes the total objective into conditional ones, termed as Cali-NCE. Specifically, the correspondence between video and text descriptions is firstly estimated with a correspondence score, which is later used to calibrate the sample weightings during contrastive training. Through extensive experiments, we show that the proposed approach achieves state-of-the-art performance on multiple downstream tasks: text-to-video retrieval, video action recognition, and video retrieval. Code and models will be made publicly available. ", "keywords": "visual-textual representation learning", "primary_area": "", "supplementary_material": "", "author": "Nanxuan Zhao;Jianbo Jiao;Weidi Xie;Dahua Lin", "authorids": "~Nanxuan_Zhao1;~Jianbo_Jiao2;~Weidi_Xie1;~Dahua_Lin1", "gender": "F;;M;M", "homepage": "http://nxzhao.com;https://jianbojiao.com/;http://dahua.site;https://weidixie.github.io", "dblp": "224/0709;150/6622;53/6088;199/1718", "google_scholar": ";HkEiMMwAAAAJ;GMzzRRUAAAAJ;https://scholar.google.co.uk/citations?user=Vtrqj4gAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Nanxuan_Zhao1;~Jianbo_Jiao2;~Dahua_Lin1;~Weidi_Xie3", "aff": "Adobe Research;University of Birmingham;The Chinese University of Hong Kong;Shanghai Jiaotong University", "aff_domain": "adobe.com;bham.ac.uk;cuhk.edu.hk;sjtu.edu.cn", "position": "Researcher;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@misc{\nzhao2023calince,\ntitle={Cali-{NCE}: Boosting Cross-modal Video Representation Learning with Calibrated Alignment},\nauthor={Nanxuan Zhao and Jianbo Jiao and Weidi Xie and Dahua Lin},\nyear={2023},\nurl={https://openreview.net/forum?id=-Ozk9LVtqbV}\n}", "github": "", "project": "", "reviewers": "eUUZ;nPDy;t1gW;Jb8i", "site": "https://openreview.net/forum?id=-Ozk9LVtqbV", "pdf_size": 3525416, "recommendation": "3;3;3;5", "confidence": "4;4;4;5", "correctness": "2;2;2;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;0;2;2", "wc_summary_paper": "82;34;113;104", "wc_strength_and_weaknesses": "320;64;576;126", "wc_clarity_quality_novelty_and_reproducibility": "50;1;48;43", "wc_summary_review": "30;29;5;58", "wc_review": "482;128;742;331", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 83.25, 30.589009464185008 ], "wc_strength_and_weaknesses_avg": [ 271.5, 199.56139406207805 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.5, 20.081085628023203 ], "wc_summary_review_avg": [ 30.5, 18.76832437912346 ], "wc_review_avg": [ 420.75, 224.00376670940156 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=378253428188699466&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Adobe;University of Birmingham;Chinese University of Hong Kong;Shanghai Jiao Tong University", "aff_unique_dep": "Adobe Research;;;", "aff_unique_url": "https://research.adobe.com;https://www.birmingham.ac.uk;https://www.cuhk.edu.hk;https://www.sjtu.edu.cn", "aff_unique_abbr": "Adobe;Birmingham;CUHK;SJTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "United States;United Kingdom;China" }, { "title": "Formal Mathematics Statement Curriculum Learning", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11923", "id": "-P7G-8dmSh4", "poster": "", "openreview": "https://openreview.net/forum?id=-P7G-8dmSh4", "slides": "https://iclr.cc/virtual/2023/poster/11923", "video": "https://iclr.cc/virtual/2023/poster/11923", "author_site": "Stanislas Polu, Jesse Han, Kunhao Zheng, Mantas Baksys, Igor Babuschkin, Ilya Sutskever", "tldr": "", "abstract": "We explore the use of expert iteration in the context of language modeling applied to formal mathematics. We show that at same compute budget, expert iteration, by which we mean proof search interleaved with learning, dramatically outperforms proof search only. We also observe that when applied to a collection of formal statements of sufficiently varied difficulty, expert iteration is capable of finding and solving a curriculum of increasingly difficult problems, without the need for associated ground-truth proofs. Finally, by applying this expert iteration to a manually curated set of problem statements, we surpass previous state-of-the-art on the miniF2F benchmark, automatically solving multiple challenging problems drawn from high school olympiads.", "keywords": "neural theorem proving;formal mathematics;language modeling;expert iteration", "primary_area": "", "supplementary_material": "", "author": "Stanislas Polu;Jesse Michael Han;Kunhao Zheng;Mantas Baksys;Igor Babuschkin;Ilya Sutskever", "authorids": "spolu@protonmail.com;~Jesse_Michael_Han1;~Kunhao_Zheng1;~Mantas_Baksys1;~Igor_Babuschkin1;~Ilya_Sutskever1", "gender": ";M;M;Not Specified;M;", "homepage": ";https://jesse-michael-han.github.io;https://dyekuu.github.io/;;https://www.babushk.in;", "dblp": ";;301/7847;;198/1445;60/5276", "google_scholar": ";;zDy4jSYAAAAJ;;_N2COeAAAAAJ;", "orcid": ";;0000-0003-1548-1890;0000-0001-9532-1007;;", "linkedin": ";;kunhao-zheng-x18/;;;", "or_profile": "spolu@protonmail.com;~Jesse_Michael_Han1;~Kunhao_Zheng1;~Mantas_Baksys1;~Igor_Babuschkin1;~Ilya_Sutskever1", "aff": ";University of Pittsburgh;Sea AI Lab;University of Cambridge;Google DeepMind;", "aff_domain": ";pitt.edu;sea.com;cam.ac.uk;deepmind.com;", "position": ";PhD student;Research Intern;Undergrad student;Senior Research Engineer;", "bibtex": "@inproceedings{\npolu2023formal,\ntitle={Formal Mathematics Statement Curriculum Learning},\nauthor={Stanislas Polu and Jesse Michael Han and Kunhao Zheng and Mantas Baksys and Igor Babuschkin and Ilya Sutskever},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-P7G-8dmSh4}\n}", "github": "", "project": "", "reviewers": "GFD4;kECs;pG4L", "pdf_size": 915794, "recommendation": "5;8;8", "confidence": "4;4;4", "correctness": "2;4;4", "technical_novelty": "2;3;3", "empirical_novelty": "0;3;3", "wc_summary_paper": "66;292;224", "wc_strength_and_weaknesses": "342;94;140", "wc_clarity_quality_novelty_and_reproducibility": "38;25;157", "wc_summary_review": "30;15;26", "wc_review": "476;426;547", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1183;181;373", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 194.0, 94.6713613859369 ], "wc_strength_and_weaknesses_avg": [ 192.0, 107.71567512050726 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.33333333333333, 59.39884024307396 ], "wc_summary_review_avg": [ 23.666666666666668, 6.342099196813483 ], "wc_review_avg": [ 483.0, 49.64540932117154 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 579.0, 434.225747739583 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 177, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15715057210388047579&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=-P7G-8dmSh4", "email": ";pitt.edu;sea.com;cam.ac.uk;deepmind.com;", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Pittsburgh;Sea AI Lab;University of Cambridge;Google", "aff_unique_dep": ";;;Google DeepMind", "aff_unique_url": "https://www.pitt.edu;;https://www.cam.ac.uk;https://deepmind.com", "aff_unique_abbr": "Pitt;;Cambridge;DeepMind", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;2;2", "aff_country_unique": "United States;;United Kingdom" }, { "id": "-PL1Gk4jt7", "title": "Key Design Choices for Double-transfer in Source-free Unsupervised Domain Adaptation", "track": "main", "status": "Reject", "tldr": "We systematically analyze the impact of the main design choices in Source-free Unsupervised Domain Adaptation through a large-scale empirical study.", "abstract": "Fine-tuning and Domain Adaptation emerged as effective strategies for efficiently transferring deep learning models to new target tasks. However, target domain labels are not accessible in many real-world scenarios. This led to the development of Unsupervised Domain Adaptation (UDA) methods, which only employ unlabeled target samples. Furthermore, efficiency and privacy requirements may also prevent the use of source domain data during the adaptation stage. This particularly challenging setting, known as Source-free Unsupervised Domain Adaptation (SF-UDA), is still understudied. In this paper, we systematically analyze the impact of the main design choices in SF-UDA through a large-scale empirical study on 500 models and 74 domain pairs. We identify the normalization approach, pre-training strategy, and backbone architecture as the most critical factors. Based on our observations, we propose recipes to best tackle SF-UDA scenarios. Moreover, we show that SF-UDA performs competitively also beyond standard benchmarks and backbone architectures, performing on par with UDA at a fraction of the data and computational cost. Experimental data and code will be released upon acceptance.", "keywords": "Transfer Learning;Unsupervised Domain Adaptation", "primary_area": "", "supplementary_material": "", "author": "Andrea Maracani;Raffaello Camoriano;Elisa Maiettini;Davide Talon;Lorenzo Rosasco;Lorenzo Natale", "authorids": "~Andrea_Maracani1;~Raffaello_Camoriano1;~Elisa_Maiettini1;~Davide_Talon1;~Lorenzo_Rosasco1;~Lorenzo_Natale1", "gender": "M;M;F;M;;M", "homepage": ";https://www.iit.it/it/people-details/-/people/raffaello-camoriano;;https://davidetalon.github.io/;;https://lornat75.github.io", "dblp": "266/1466;166/1687;;277/6653;;17/4667", "google_scholar": "IfwA_5gAAAAJ;vBBJ2wkAAAAJ;ZhaHBUMAAAAJ;IiMwp7EAAAAJ;;https://scholar.google.ca/citations?user=gERbHcEAAAAJ", "orcid": "0000-0002-6217-8731;0000-0002-8890-2732;;;;0000-0002-8777-5233", "linkedin": "andreamaracani/;raffaellocamoriano;;davidetalon;;", "or_profile": "~Andrea_Maracani1;~Raffaello_Camoriano1;~Elisa_Maiettini1;~Davide_Talon1;~Lorenzo_Rosasco1;~Lorenzo_Natale1", "aff": "Amazon;Politecnico di Torino;;University of Genoa;;Istituto Italiano di Tecnologia", "aff_domain": "amazon.com;polito.it;;unige.it;;iit.it", "position": "Intern;Postdoc;;PhD student;;Principal Researcher", "bibtex": "@misc{\nmaracani2023key,\ntitle={Key Design Choices for Double-transfer in Source-free Unsupervised Domain Adaptation},\nauthor={Andrea Maracani and Raffaello Camoriano and Elisa Maiettini and Davide Talon and Lorenzo Rosasco and Lorenzo Natale},\nyear={2023},\nurl={https://openreview.net/forum?id=-PL1Gk4jt7}\n}", "github": "", "project": "", "reviewers": "cjNK;1ukJ;GrZy;R8P1", "site": "https://openreview.net/forum?id=-PL1Gk4jt7", "pdf_size": 5798611, "recommendation": "5;5;5;6", "confidence": "4;5;4;5", "correctness": "3;3;4;4", "technical_novelty": "3;1;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "67;30;119;36", "wc_strength_and_weaknesses": "144;184;302;275", "wc_clarity_quality_novelty_and_reproducibility": "185;17;108;24", "wc_summary_review": "39;184;18;34", "wc_review": "435;415;547;369", "wc_reply_reviewers": "130;0;0;59", "wc_reply_authors": "2432;502;2116;957", "reply_reviewers": "1;0;0;1", "reply_authors": "6;1;4;2", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 63.0, 35.249113464029136 ], "wc_strength_and_weaknesses_avg": [ 226.25, 64.54601072103527 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.5, 68.67495904622004 ], "wc_summary_review_avg": [ 68.75, 66.99020450782338 ], "wc_review_avg": [ 441.5, 65.44272304847958 ], "wc_reply_reviewers_avg": [ 47.25, 53.50408862881415 ], "wc_reply_authors_avg": [ 1501.75, 796.6995591187433 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 1.920286436967152 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17679519261820742493&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Amazon;Politecnico di Torino;University of Genoa;Istituto Italiano di Tecnologia", "aff_unique_dep": "Amazon.com, Inc.;;;", "aff_unique_url": "https://www.amazon.com;https://www.polito.it;https://www.unige.it;https://www.iit.it", "aff_unique_abbr": "Amazon;Polito;UniGe;IIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;Italy" }, { "title": "Risk-Aware Reinforcement Learning with Coherent Risk Measures and Non-linear Function Approximation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11899", "id": "-RwZOVybbj", "poster": "/media/PosterPDFs/ICLR%202023/11899.png?t=1682607067.4093435", "openreview": "https://openreview.net/forum?id=-RwZOVybbj", "slides": "https://iclr.cc/virtual/2023/poster/11899", "video": "https://iclr.cc/virtual/2023/poster/11899", "author_site": "Thanh Steve Lam, Arun Verma, Bryan Kian Hsiang Low, Patrick Jaillet", "tldr": "We propose a unified framework to analyze the regret of risk-aware RL policy that uses a coherent risk measure in conjunction with non-linear function approximation.", "abstract": "We study the risk-aware reinforcement learning (RL) problem in the episodic finite-horizon Markov decision process with unknown transition and reward functions. In contrast to the risk-neutral RL problem, we consider minimizing the risk of having low rewards, which arise due to the intrinsic randomness of the MDPs and imperfect knowledge of the model. Our work provides a unified framework to analyze the regret of risk-aware RL policy with coherent risk measures in conjunction with non-linear function approximation, which gives the first sub-linear regret bounds in the setting. Finally, we validate our theoretical results via empirical experiments on synthetic and real-world data.", "keywords": "Risk-Aware Reinforcement Learning;Coherent Risk Measures;Non-linear Function Approximation", "primary_area": "", "supplementary_material": "/attachment/94a13b2773ae6b07ff069a72400fcc8b3d7e37f0.zip", "author": "Thanh Lam;Arun Verma;Bryan Kian Hsiang Low;Patrick Jaillet", "authorids": "~Thanh_Lam1;~Arun_Verma1;~Bryan_Kian_Hsiang_Low1;~Patrick_Jaillet1", "gender": "M;M;M;M", "homepage": ";https://arunv3rma.github.io/;http://www.comp.nus.edu.sg/~lowkh;http://web.mit.edu/jaillet/www/", "dblp": "280/1674;28/3688;97/4877;https://dblp.uni-trier.de/pers/hd/j/Jaillet:Patrick", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.co.in/citations?user=tBcixlUAAAAJ;https://scholar.google.com.tw/citations?user=2P-Q09UAAAAJ;ND0FM6EAAAAJ", "orcid": ";;;0000-0002-8585-6566", "linkedin": ";;;patrick-jaillet-1260445/", "or_profile": "~Thanh_Lam1;~Arun_Verma1;~Bryan_Kian_Hsiang_Low1;~Patrick_Jaillet1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;Massachusetts Institute of Technology", "aff_domain": "nus.edu.sg;nus.edu.sg;nus.edu.sg;mit.edu", "position": "PhD student;Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nlam2023riskaware,\ntitle={Risk-Aware Reinforcement Learning with Coherent Risk Measures and Non-linear Function Approximation},\nauthor={Thanh Lam and Arun Verma and Bryan Kian Hsiang Low and Patrick Jaillet},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-RwZOVybbj}\n}", "github": "", "project": "", "reviewers": "osfS;aLWD;yVV1", "pdf_size": 1952252, "recommendation": "6;6;8", "confidence": "3;3;3", "correctness": "4;4;4", "technical_novelty": "3;3;3", "empirical_novelty": "2;0;3", "wc_summary_paper": "19;104;117", "wc_strength_and_weaknesses": "132;254;217", "wc_clarity_quality_novelty_and_reproducibility": "79;308;63", "wc_summary_review": "91;25;65", "wc_review": "321;691;462", "wc_reply_reviewers": "0;23;62", "wc_reply_authors": "1419;2159;619", "reply_reviewers": "0;1;1", "reply_authors": "4;6;2", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 80.0, 43.45879274285777 ], "wc_strength_and_weaknesses_avg": [ 201.0, 51.07510809255979 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 150.0, 111.91365719458312 ], "wc_summary_review_avg": [ 60.333333333333336, 27.145697428669774 ], "wc_review_avg": [ 491.3333333333333, 152.46930474324623 ], "wc_reply_reviewers_avg": [ 28.333333333333332, 25.590796956892316 ], "wc_reply_authors_avg": [ 1399.0, 628.8614049746309 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 4.0, 1.632993161855452 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1600675746037206771&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=-RwZOVybbj", "email": "nus.edu.sg;nus.edu.sg;nus.edu.sg;mit.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "National University of Singapore;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://web.mit.edu", "aff_unique_abbr": "NUS;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Singapore;United States" }, { "id": "-SBZ8c356Oc", "title": "Improving Adversarial Robustness by Putting More Regularizations on Less Robust Samples", "track": "main", "status": "Reject", "tldr": "", "abstract": "\nAdversarial training, which is to enhance robustness against adversarial attacks, has received much attention because it is easy to generate human-imperceptible perturbations of data to deceive a given deep neural network. In this paper, we propose a new adversarial training algorithm that is theoretically well motivated and empirically superior to other existing algorithms. A novel feature of the proposed algorithm is to apply more regularization to data vulnerable to adversarial attacks than other existing regularization algorithms do. Theoretically, we show that our algorithm can be understood as an algorithm of minimizing a newly derived upper bound of the robust risk. Numerical experiments illustrate that our proposed algorithm improves the generalization (accuracy on examples) and robustness (accuracy on adversarial attacks) simultaneously to achieve the state-of-the-art performance.", "keywords": "Adversarial Training;Adversarial Attack;Robust Learning", "primary_area": "", "supplementary_material": "/attachment/d39784e3e94f9e0d713da691859819f635b618e5.zip", "author": "Dongyoon Yang;Insung Kong;Yongdai Kim", "authorids": "~Dongyoon_Yang2;~Insung_Kong1;~Yongdai_Kim1", "gender": "M;M;M", "homepage": "https://sites.google.com/view/dyoony;https://sites.google.com/view/insungkong/home;", "dblp": ";;93/734", "google_scholar": ";NYdp2FQAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Dongyoon_Yang2;~Insung_Kong1;~Yongdai_Kim1", "aff": "Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "Researcher;PhD student;Full Professor", "bibtex": "@misc{\nyang2023improving,\ntitle={Improving Adversarial Robustness by Putting More Regularizations on Less Robust Samples},\nauthor={Dongyoon Yang and Insung Kong and Yongdai Kim},\nyear={2023},\nurl={https://openreview.net/forum?id=-SBZ8c356Oc}\n}", "github": "", "project": "", "reviewers": "m348;cZx4;E21G;hmqr", "site": "https://openreview.net/forum?id=-SBZ8c356Oc", "pdf_size": 368162, "recommendation": "5;6;6;8", "confidence": "4;5;4;4", "correctness": "2;3;4;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "32;67;149;36", "wc_strength_and_weaknesses": "388;218;140;158", "wc_clarity_quality_novelty_and_reproducibility": "4;48;58;13", "wc_summary_review": "46;54;35;36", "wc_review": "470;387;382;243", "wc_reply_reviewers": "1187;318;0;29", "wc_reply_authors": "1855;1301;388;365", "reply_reviewers": "7;2;0;1", "reply_authors": "7;3;1;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 71.0, 47.026588224109986 ], "wc_strength_and_weaknesses_avg": [ 226.0, 97.88769074812215 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.75, 22.75274708689041 ], "wc_summary_review_avg": [ 42.75, 7.790218225441442 ], "wc_review_avg": [ 370.5, 81.48772913758242 ], "wc_reply_reviewers_avg": [ 383.5, 480.27205831695017 ], "wc_reply_authors_avg": [ 977.25, 631.9265681232274 ], "reply_reviewers_avg": [ 2.5, 2.692582403567252 ], "reply_authors_avg": [ 3.0, 2.449489742783178 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.13245323570650439, "corr_recommendation_correctness": 0.3244428422615251, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3210535082734519034&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "-SKvXtXPCaJ", "title": "Learning Control by Iterative Inversion", "track": "main", "status": "Reject", "tldr": "Inverting a dynamical system to give the actions which yield desired behavior, represented as an embedding of a trajectory.", "abstract": "We formulate learning for control as an inverse problem - inverting a dynamical system to give the actions which yield desired behavior. The key challenge in this formulation is a distribution shift in the inputs to the function to be inverted - the learning agent can only observe the forward mapping (its actions' consequences) on trajectories that it can execute, yet must learn the inverse mapping for inputs-outputs that correspond to a different, desired behavior. We propose a general recipe for inverse problems with a distribution shift that we term $\\textit{iterative inversion}$ - learn the inverse mapping under the current input distribution (policy), then use it on the desired output samples to obtain a new input distribution, and repeat.\nAs we show, iterative inversion can converge to the desired inverse mapping, but under rather strict conditions on the mapping itself.\nWe next apply iterative inversion to learn control. Our input is a set of demonstrations of desired behavior, given as video embeddings of trajectories (without actions), and our method iteratively learns to imitate trajectories generated by the current policy, perturbed by random exploration noise. We find that constantly adding the demonstrated trajectory embeddings as input to the policy when generating trajectories to imitate, a-la iterative inversion, we effectively steer the learning towards the desired trajectory distribution. To the best of our knowledge, this is the first exploration of learning control from the viewpoint of inverse problems, and the main advantage of our approach is simplicity - it does not require rewards, and only employs supervised learning, which can be easily scaled to use state-of-the-art trajectory embedding techniques and policy representations. Indeed, with a VQ-VAE embedding, and a transformer-based policy, we demonstrate non-trivial continuous control on several tasks. Further, we report an improved performance on imitating diverse behaviors compared to reward based methods. ", "keywords": "RL;IRL", "primary_area": "", "supplementary_material": "", "author": "Gal Leibovich;Guy Jacob;Or Avner;Gal Novik;Aviv Tamar", "authorids": "~Gal_Leibovich1;~Guy_Jacob1;~Or_Avner1;~Gal_Novik1;~Aviv_Tamar2", "gender": "M;M;;;M", "homepage": ";;;;https://avivt.github.io/avivt/", "dblp": ";;264/1999;;49/10622", "google_scholar": ";;;;https://scholar.google.co.il/citations?user=kppa2vgAAAAJ", "orcid": ";;;;", "linkedin": "gal-leibovich-04b22522/;;oravner/;gal-novik-9871641/;", "or_profile": "~Gal_Leibovich1;~Guy_Jacob1;~Or_Avner1;~Gal_Novik1;~Aviv_Tamar2", "aff": ";Intel;Huawei Technologies Ltd.;Intel Corporation;Technion, Technion", "aff_domain": ";intel.com;huawei.com;intel.com;technion.ac.il", "position": ";Researcher;Researcher;Research Engineer;Assistant Professor", "bibtex": "@misc{\nleibovich2023learning,\ntitle={Learning Control by Iterative Inversion},\nauthor={Gal Leibovich and Guy Jacob and Or Avner and Gal Novik and Aviv Tamar},\nyear={2023},\nurl={https://openreview.net/forum?id=-SKvXtXPCaJ}\n}", "github": "", "project": "", "reviewers": "9eeh;VhZe;2R3c", "site": "https://openreview.net/forum?id=-SKvXtXPCaJ", "pdf_size": 3307935, "recommendation": "5;6;6", "confidence": "3;3;4", "correctness": "3;4;3", "technical_novelty": "2;3;2", "empirical_novelty": "2;3;2", "wc_summary_paper": "174;115;154", "wc_strength_and_weaknesses": "209;185;396", "wc_clarity_quality_novelty_and_reproducibility": "145;25;7", "wc_summary_review": "31;23;51", "wc_review": "559;348;608", "wc_reply_reviewers": "0;0;13", "wc_reply_authors": "1037;152;1069", "reply_reviewers": "0;0;1", "reply_authors": "3;1;3", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 147.66666666666666, 24.499433100017278 ], "wc_strength_and_weaknesses_avg": [ 263.3333333333333, 94.3197870132361 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.0, 61.25357132445422 ], "wc_summary_review_avg": [ 35.0, 11.775681155103795 ], "wc_review_avg": [ 505.0, 112.80366424308505 ], "wc_reply_reviewers_avg": [ 4.333333333333333, 6.128258770283413 ], "wc_reply_authors_avg": [ 752.6666666666666, 424.93633510078763 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9480286826914959418&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Intel;Huawei;Technion - Israel Institute of Technology", "aff_unique_dep": "Intel Corporation;Huawei Technologies;", "aff_unique_url": "https://www.intel.com;https://www.huawei.com;https://www.technion.ac.il/en/", "aff_unique_abbr": "Intel;Huawei;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "United States;China;Israel" }, { "id": "-UsbRlXzMG", "title": "How (Un)Fair is Text Summarization?", "track": "main", "status": "Reject", "tldr": "We show that machine learning based summarizers exhibit bias toward different groups and are very sensitive to document structure.", "abstract": "Creating a good summary requires carefully choosing details from the original text to accurately represent it in a limited space. If a summary contains biased information about a group, it risks passing this bias off to readers as fact. These risks increase if we consider not just one biased summary, but rather a biased summarization algorithm. Despite this, little work has measured whether these summarizers demonstrate biased performance. Rather, most work in summarization focuses on improving performance, ignoring questions of bias. In this paper we demonstrate that automatic summarizers both amplify and introduce bias towards information about under-represented groups. Additionally, we show that summarizers are highly sensitive to document structure, making the summaries they generate unstable under changes that are semantically meaningless to humans, which poses a further fairness risk. Given these results, and the large scale potential for harm presented by biased summarization, we recommend that bias analysis be performed and reported on summarizers to ensure that new automatic summarization methods do not introduce bias to the summaries they generate.", "keywords": "Natural language processing;Summarization;Fairness", "primary_area": "", "supplementary_material": "/attachment/1dbd198fa7bd843a52af2c5de91afef6fd363904.zip", "author": "Hannah Brown;Reza Shokri", "authorids": "~Hannah_Brown1;~Reza_Shokri1", "gender": "Non-Binary;", "homepage": ";", "dblp": "261/4912;", "google_scholar": "t4wu34QAAAAJ;", "orcid": "0000-0001-6350-874X;", "linkedin": ";", "or_profile": "~Hannah_Brown1;~Reza_Shokri1", "aff": "National University of Singapore;", "aff_domain": "nus.edu;", "position": "PhD student;", "bibtex": "@misc{\nbrown2023how,\ntitle={How (Un)Fair is Text Summarization?},\nauthor={Hannah Brown and Reza Shokri},\nyear={2023},\nurl={https://openreview.net/forum?id=-UsbRlXzMG}\n}", "github": "", "project": "", "reviewers": "45gx;tjG5;KeGL;4JiR", "site": "https://openreview.net/forum?id=-UsbRlXzMG", "pdf_size": 298832, "recommendation": "3;3;5;5", "confidence": "3;3;4;4", "correctness": "3;2;3;2", "technical_novelty": "1;2;4;2", "empirical_novelty": "4;2;3;2", "wc_summary_paper": "51;207;116;111", "wc_strength_and_weaknesses": "393;362;55;603", "wc_clarity_quality_novelty_and_reproducibility": "57;53;274;90", "wc_summary_review": "87;58;41;90", "wc_review": "588;680;486;894", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "753;641;533;499", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 1.0897247358851685 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 121.25, 55.72420928106562 ], "wc_strength_and_weaknesses_avg": [ 353.25, 195.56632506645923 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 118.5, 90.9189199231931 ], "wc_summary_review_avg": [ 69.0, 20.43281674170255 ], "wc_review_avg": [ 662.0, 150.4991694329241 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 606.5, 99.51256202108355 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2714892913761231928&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_country_unique_index": "0", "aff_country_unique": "Singapore" }, { "id": "-WiOF7FTt-n", "title": "Rethinking Positive Sampling for Contrastive Learning with Kernel", "track": "main", "status": "Reject", "tldr": "Improving positive sampling in contrastive learning using kernel", "abstract": " Data augmentation is a crucial component in unsupervised contrastive learning (CL). It determines how positive samples are defined and, ultimately, the quality of the representation. Even if efforts have been made to find efficient augmentations for ImageNet, CL underperforms compared to supervised methods and it is still an open problem in other applications, such as medical imaging, or in datasets with easy-to-learn but irrelevant imaging features. In this work, we propose a new way to define positive samples using kernel theory along with a novel loss called \\textit{decoupled uniformity}. We propose to integrate prior information, learnt from generative models viewed as feature extractor, or given as auxiliary attributes, into contrastive learning, to make it less dependent on data augmentation. We draw a connection between contrastive learning and the conditional mean embedding theory to derive tight bounds on the downstream classification loss. In an unsupervised setting, we empirically demonstrate that CL benefits from generative models, such as VAE and GAN, to less rely on data augmentations. We validate our framework on vision and medical datasets including CIFAR10, CIFAR100, STL10, ImageNet100, CheXpert and a brain MRI dataset. In the weakly supervised setting, we demonstrate that our formulation provides state-of-the-art results.", "keywords": "contrastive learning;kernel theory;representation learning;deep learning", "primary_area": "", "supplementary_material": "", "author": "Benoit Dufumier;Carlo Alberto Barbano;Robin Louiset;Edouard Duchesnay;Pietro Gori", "authorids": "~Benoit_Dufumier1;~Carlo_Alberto_Barbano1;robin.louiset@cea.fr;~Edouard_Duchesnay1;~Pietro_Gori1", "gender": ";M;;M;", "homepage": "http://benoitdufumier.ml;;;https://duchesnay.github.io/;https://perso.telecom-paristech.fr/pgori/index.html", "dblp": "294/4585;262/6495;;;134/9724", "google_scholar": ";sq0-Os4AAAAJ;;https://scholar.google.fr/citations?user=mG6V3q4AAAAJ;https://scholar.google.fr/citations?user=id9wCjsAAAAJ", "orcid": "0000-0002-8253-2363;0000-0001-9512-0440;;0000-0002-4073-3490;", "linkedin": ";;;edouard-duchesnay-27b47b8;", "or_profile": "~Benoit_Dufumier1;~Carlo_Alberto_Barbano1;robin.louiset@cea.fr;~Edouard_Duchesnay1;~Pietro_Gori1", "aff": "EPFL - EPF Lausanne;T\u00e9l\u00e9com Paris;;CEA;Telecom Paris", "aff_domain": "epfl.ch;telecom-paris.fr;;cea.fr;telecom-paris.fr", "position": "Postdoc;PhD student;;Full Professor;Associate Professor", "bibtex": "@misc{\ndufumier2023rethinking,\ntitle={Rethinking Positive Sampling for Contrastive Learning with Kernel},\nauthor={Benoit Dufumier and Carlo Alberto Barbano and Robin Louiset and Edouard Duchesnay and Pietro Gori},\nyear={2023},\nurl={https://openreview.net/forum?id=-WiOF7FTt-n}\n}", "github": "", "project": "", "reviewers": "YGqm;6Wk9;khWQ;rmip", "site": "https://openreview.net/forum?id=-WiOF7FTt-n", "pdf_size": 609332, "recommendation": "5;5;5;6", "confidence": "4;4;4;4", "correctness": "4;3;3;4", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;3;3;4", "wc_summary_paper": "73;116;83;107", "wc_strength_and_weaknesses": "328;385;319;150", "wc_clarity_quality_novelty_and_reproducibility": "69;246;208;86", "wc_summary_review": "86;41;71;26", "wc_review": "556;788;681;369", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 94.75, 17.41228014936585 ], "wc_strength_and_weaknesses_avg": [ 295.5, 87.73397289533855 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 152.25, 76.18521838257078 ], "wc_summary_review_avg": [ 56.0, 23.717082451262844 ], "wc_review_avg": [ 598.5, 155.8789594525188 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15623970906745890414&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "EPFL;T\u00e9l\u00e9com Paris;Commissariat \u00e0 l'\u00c9nergie Atomique et aux \u00c9nergies Alternatives;Telecom Paris", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.epfl.ch;https://www.telecom-paris.fr;https://www cea fr;https://www.telecom-paris.fr", "aff_unique_abbr": "EPFL;T\u00e9l\u00e9com Paris;CEA;Telecom Paris", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Switzerland;France" }, { "id": "-XC_lMynIT", "title": "Signal to Sequence Attention-Based Multiple Instance Network for Segmentation Free Inference of RNA Modifications", "track": "main", "status": "Reject", "tldr": "", "abstract": "Direct RNA sequencing technology works by allowing long RNA molecules to pass through tiny pores, generating electrical current, called squiggle, that are interpreted as a series of RNA nucleotides through the use of Deep Learning algorithms. The platform has also facilitated computational detection of RNA modifications via machine learning and statistical approaches as they cause detectable shift in the current generated as the modified nucleotides pass through the pores. Nevertheless, since modifications only occur in a handful of positions along the molecules, existing techniques require segmentation of the long squiggle in order to filter off irrelevant signals and this step produces large computational and storage overhead. Inspired by the recent work in vector similarity search, we introduce a segmentation-free approach by utilizing scaled-dot product attention to perform implicit segmentation and feature extraction of raw signals that correspond to sites of interest. We further demonstrate the feasibility of our approach by achieving significant speedup while maintaining competitive performance in m6A detection against existing state-of-the-art methods.", "keywords": "Multiple Instance Learning;Deep Learning;RNA Modification;Computational Biology", "primary_area": "", "supplementary_material": "/attachment/6c5f4ce32043a747aae79fbe471e73cfc73598d7.zip", "author": "Christopher Hendra;Alexandre H. Thiery;Jonathan Goeke", "authorids": "~Christopher_Hendra1;~Alexandre_H._Thiery1;~Jonathan_Goeke1", "gender": ";M;M", "homepage": ";https://github.com/GoekeLab;http://www.normalesup.org/~athiery/", "dblp": ";;203/7143", "google_scholar": "YTJXA54AAAAJ;;https://scholar.google.com.sg/citations?user=szBOsCgAAAAJ", "orcid": ";;", "linkedin": ";;alexandre-thiery-2981686/", "or_profile": "~Christopher_Hendra1;~Jonathan_Goeke1;~Alexandre_Hoang_THIERY1", "aff": ";Genome Institute of Singapore;National University of Singapore", "aff_domain": ";astar.edu.sg;nus.edu.sg", "position": ";Principal Researcher;Associate Professor", "bibtex": "@misc{\nhendra2023signal,\ntitle={Signal to Sequence Attention-Based Multiple Instance Network for Segmentation Free Inference of {RNA} Modifications},\nauthor={Christopher Hendra and Alexandre H. Thiery and Jonathan Goeke},\nyear={2023},\nurl={https://openreview.net/forum?id=-XC_lMynIT}\n}", "github": "", "project": "", "reviewers": "FDNP;MYMy;rbVq;NJ1g", "site": "https://openreview.net/forum?id=-XC_lMynIT", "pdf_size": 2896724, "recommendation": "3;5;6;6", "confidence": "2;1;4;3", "correctness": "3;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "96;34;127;120", "wc_strength_and_weaknesses": "151;227;146;448", "wc_clarity_quality_novelty_and_reproducibility": "410;65;59;40", "wc_summary_review": "46;60;68;40", "wc_review": "703;386;400;648", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 94.25, 36.635877224382114 ], "wc_strength_and_weaknesses_avg": [ 243.0, 122.63156200587188 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 143.5, 154.14035811558244 ], "wc_summary_review_avg": [ 53.5, 11.07925990308017 ], "wc_review_avg": [ 534.25, 142.66810260180796 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5477225575051661, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xu5EmKe79KUJ:scholar.google.com/&scioq=Signal+to+Sequence+Attention-Based+Multiple+Instance+Network+for+Segmentation+Free+Inference+of+RNA+Modifications&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Genome Institute of Singapore;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.genome-institute-of-singapore.org;https://www.nus.edu.sg", "aff_unique_abbr": "GIS;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Policy Expansion for Bridging Offline-to-Online Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11562", "id": "-Y34L45JR6z", "poster": "", "openreview": "https://openreview.net/forum?id=-Y34L45JR6z", "slides": "https://iclr.cc/virtual/2023/poster/11562", "video": "https://iclr.cc/virtual/2023/poster/11562", "author_site": "Haichao Zhang, Wei Xu, Haonan Yu", "tldr": "Bridging offline-to-online RL with Policy Expansion", "abstract": "Pre-training with offline data and online fine-tuning using reinforcement learning is a promising strategy for learning control policies by leveraging the best of both worlds in terms of sample efficiency and performance. One natural approach is to initialize the policy for online learning with the one trained offline.\nIn this work, we introduce a policy expansion scheme for this task. After learning the offline policy, we use it as one candidate policy in a policy set, and further learn another policy that will be responsible for further learning as an expansion to the policy set. The two policies will be composed in an adaptive manner for interacting with the environment. With this approach, the policy previously learned offline is fully retained during online learning, thus mitigating the potential issues such as destroying the useful behaviors of the offline policy in the initial stage of online learning while allowing the offline policy participate in the exploration naturally in an adaptive manner. Moreover, new useful behaviors can potentially be captured by the newly added policy through learning. \nExperiments are conducted on a number of tasks and the results demonstrate the effectiveness of the proposed approach.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Haichao Zhang;Wei Xu;Haonan Yu", "authorids": "~Haichao_Zhang4;~Wei_Xu13;~Haonan_Yu5", "gender": "M;;M", "homepage": ";;https://sites.google.com/site/hczhang1/", "dblp": ";;", "google_scholar": "Gxz1fqwAAAAJ;Army5cEAAAAJ;_OsT-RgAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Wei_Xu13;~Haonan_Yu5;~Haichao_Zhang2", "aff": "Horizon Robotics;Horizon Robotics;Horizon Robotics", "aff_domain": "horizon.auto;horizon.auto;horizon.ai", "position": "Researcher;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nzhang2023policy,\ntitle={Policy Expansion for Bridging Offline-to-Online Reinforcement Learning},\nauthor={Haichao Zhang and Wei Xu and Haonan Yu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-Y34L45JR6z}\n}", "github": "", "project": "", "reviewers": "EsUG;2rLA;U35N;NB3z", "pdf_size": 3663158, "recommendation": "5;6;6;8", "confidence": "3;4;3;4", "correctness": "3;3;4;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;3;4", "wc_summary_paper": "60;121;94;76", "wc_strength_and_weaknesses": "658;218;273;259", "wc_clarity_quality_novelty_and_reproducibility": "42;35;36;18", "wc_summary_review": "78;14;69;35", "wc_review": "838;388;472;388", "wc_reply_reviewers": "78;0;0;47", "wc_reply_authors": "2252;590;1371;605", "reply_reviewers": "1;0;0;1", "reply_authors": "4;1;3;3", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 87.75, 22.65364209128413 ], "wc_strength_and_weaknesses_avg": [ 352.0, 177.82153975264077 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.75, 8.926785535678562 ], "wc_summary_review_avg": [ 49.0, 25.79728667902886 ], "wc_review_avg": [ 521.5, 185.92135434102238 ], "wc_reply_reviewers_avg": [ 31.25, 33.11627243516396 ], "wc_reply_authors_avg": [ 1204.5, 682.2735888190309 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.6882472016116854, "corr_recommendation_correctness": 0.6882472016116854, "gs_citation": 75, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3431029221792713047&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=-Y34L45JR6z", "email": "horizon.auto;horizon.auto;horizon.ai", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Horizon Robotics", "aff_unique_dep": "", "aff_unique_url": "https://www.horizon-robotics.com/", "aff_unique_abbr": "Horizon Robotics", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Compositionality with Variation Reliably Emerges in Neural Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10921", "id": "-Yzz6vlX7V-", "poster": "", "openreview": "https://openreview.net/forum?id=-Yzz6vlX7V-", "slides": "https://iclr.cc/virtual/2023/poster/10921", "video": "https://iclr.cc/virtual/2023/poster/10921", "author_site": "Henry Conklin, Kenny Smith", "tldr": "Compositional systems reliably emerge between neural networks- just with natural language like variation.", "abstract": "Human languages enable robust generalization, letting us leverage our prior experience to communicate about novel meanings. This is partly due to language being compositional, where the meaning of a whole expression is a function of its parts. Natural languages also exhibit extensive variation, encoding meaning predictably enough to enable generalization without limiting speakers to one and only one way of expressing something. Previous work looking at the languages that emerge between neural networks in a communicative task has shown languages that enable robust communication and generalization reliably emerge. Despite this those languages score poorly on existing measures of compositionality leading to claims that a language's degree of compositionality has little bearing on how well it can generalise. We argue that the languages that emerge between networks are in fact straightforwardly compositional, but with a degree of natural language-like variation that can obscure their compositionality from existing measures. We introduce 4 measures of linguistic variation and show that early in training measures of variation correlate with generalization performance, but that this effect goes away over time as the languages that emerge become regular enough to generalize robustly. Like natural languages, emergent languages appear able to support a high degree of variation while retaining the generalizability we expect from compositionality. In an effort to decrease the variability of emergent languages we show how reducing a model's capacity results in greater regularity, in line with claims about factors shaping the emergence of regularity in human language.", "keywords": "compositionality;emergence;generalization;regularity", "primary_area": "", "supplementary_material": "", "author": "Henry Conklin;Kenny Smith", "authorids": "~Henry_Conklin1;~Kenny_Smith1", "gender": "M;M", "homepage": "https://hconklin.com/;http://www.ling.ed.ac.uk/~kenny", "dblp": ";58/6224", "google_scholar": ";", "orcid": ";0000-0002-4530-6914", "linkedin": ";", "or_profile": "~Henry_Conklin1;~Kenny_Smith1", "aff": "University of Edinburgh, University of Edinburgh;University of Edinburgh", "aff_domain": "ed.ac.uk;ed.ac.uk", "position": "PhD student;Professor", "bibtex": "@inproceedings{\nconklin2023compositionality,\ntitle={Compositionality with Variation Reliably Emerges in Neural Networks},\nauthor={Henry Conklin and Kenny Smith},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-Yzz6vlX7V-}\n}", "github": "", "project": "", "reviewers": "UTH9;uoNs;uQjZ;hjdA", "pdf_size": 2914193, "recommendation": "5;5;5;5", "confidence": "2;4;4;4", "correctness": "3;2;3;3", "technical_novelty": "2;2;4;3", "empirical_novelty": "3;3;4;3", "wc_summary_paper": "69;56;200;71", "wc_strength_and_weaknesses": "247;92;120;907", "wc_clarity_quality_novelty_and_reproducibility": "180;47;52;58", "wc_summary_review": "37;52;61;30", "wc_review": "533;247;433;1066", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 99.0, 58.59607495387383 ], "wc_strength_and_weaknesses_avg": [ 341.5, 331.6749161453124 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 84.25, 55.41829571540431 ], "wc_summary_review_avg": [ 45.0, 12.186057606953941 ], "wc_review_avg": [ 569.75, 304.3364708673609 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17179446392451033631&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=-Yzz6vlX7V-", "email": "ed.ac.uk;ed.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "-aEuKX6zQKmr", "title": "EmbedDistill: A geometric knowledge distillation for information retrieval", "track": "main", "status": "Reject", "tldr": "We propose a novel distillation approach to train dual encoder information retrieval models that goes beyond score-matching and aims to explicitly align embedding spaces of teacher and student models.", "abstract": "Large neural models (such as Transformers) achieve state-of-the-art performance for information retrieval. In this paper, we aim to improve distillation methods that pave the way for the deployment of such models in practice. The proposed distillation approach supports both retrieval and re-ranking stages and crucially leverages the relative geometry among queries and documents learned by the large teacher model. It goes beyond existing distillation methods in the information retrieval literature, which simply rely on the teacher's scalar scores over the training data, on two fronts: providing stronger signals about local geometry via embedding matching and attaining better coverage of data manifold globally via query generation. Embedding matching provides a stronger signal to align the representations of the teacher and student models. At the same time, query generation explores the data manifold to reduce the discrepancies between the student and teacher where the training data is sparse. Our distillation approach is theoretically justified and applies to both dual encoder (DE) and cross-encoder (CE) models. Furthermore, for distilling a CE model to a DE model via embedding matching, we propose a novel dual pooling-based scorer for the CE model that facilitates a more distillation-friendly embedding geometry, especially for DE student models.", "keywords": "Knowledge distillation;dual encoder;cross encoder;information retrieval;query generation;embedding matching;retrieval;re-ranking", "primary_area": "", "supplementary_material": "", "author": "Seungyeon Kim;Ankit Singh Rawat;Manzil Zaheer;Sadeep Jayasumana;Veeranjaneyulu Sadhanala;Wittawat Jitkrittum;Aditya Krishna Menon;Rob Fergus;Sanjiv Kumar", "authorids": "~Seungyeon_Kim1;~Ankit_Singh_Rawat1;~Manzil_Zaheer1;~Sadeep_Jayasumana1;~Veeranjaneyulu_Sadhanala1;~Wittawat_Jitkrittum1;~Aditya_Krishna_Menon1;~Rob_Fergus1;~Sanjiv_Kumar1", "gender": ";M;M;;M;M;;M;", "homepage": "https://www.seungyeon.ai;https://ankitsrawat.github.io/home/;https://www.aclweb.org/anthology/people/m/manzil-zaheer/;;https://veeranjaneyulus.github.io/;http://wittawat.com;;http://cs.nyu.edu/fergus/;http://www.sanjivk.com/", "dblp": "74/7997-1.html;https://dblp.org/pers/hd/r/Rawat:Ankit_Singh;40/10701;;81/7249;95/3398.html;;77/3763;", "google_scholar": "zbcN_QIAAAAJ;http://scholar.google.com/citations?user=U0_ab4cAAAAJ;A33FhJMAAAAJ;;FuIExf4AAAAJ;https://scholar.google.co.uk/citations?hl=en;;https://scholar.google.com.tw/citations?user=GgQ9GEkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;0000-0002-9400-9262;;;", "linkedin": ";;;;;wittawat-jitkrittum/;;;", "or_profile": "~Seungyeon_Kim1;~Ankit_Singh_Rawat1;~Manzil_Zaheer1;~Sadeep_Jayasumana1;~Veeranjaneyulu_Sadhanala1;~Wittawat_Jitkrittum1;~Aditya_Krishna_Menon1;~Rob_Fergus1;~Sanjiv_Kumar1", "aff": "Google;Google;Google DeepMind;;Google;Google Research;;Google;Google", "aff_domain": "google.com;google.com;deepmind.com;;google.com;google.com;;google.com;google.com", "position": "Researcher;Research Scientist;Researcher;;Researcher;Research Scientist;;Research scientist;Research Scientist", "bibtex": "@misc{\nkim2023embeddistill,\ntitle={EmbedDistill: A geometric knowledge distillation for information retrieval},\nauthor={Seungyeon Kim and Ankit Singh Rawat and Manzil Zaheer and Sadeep Jayasumana and Veeranjaneyulu Sadhanala and Wittawat Jitkrittum and Aditya Krishna Menon and Rob Fergus and Sanjiv Kumar},\nyear={2023},\nurl={https://openreview.net/forum?id=-aEuKX6zQKmr}\n}", "github": "", "project": "", "reviewers": "mS5Z;VV1L;CgJL;zx15", "site": "https://openreview.net/forum?id=-aEuKX6zQKmr", "pdf_size": 697191, "recommendation": "3;5;5;6", "confidence": "4;3;4;3", "correctness": "2;3;4;3", "technical_novelty": "2;3;3;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "29;52;36;86", "wc_strength_and_weaknesses": "152;133;68;331", "wc_clarity_quality_novelty_and_reproducibility": "26;23;23;179", "wc_summary_review": "21;29;43;79", "wc_review": "228;237;170;675", "wc_reply_reviewers": "125;0;0;116", "wc_reply_authors": "1250;1097;539;1515", "reply_reviewers": "2;0;0;1", "reply_authors": "4;2;2;3", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 50.75, 21.992896580487074 ], "wc_strength_and_weaknesses_avg": [ 171.0, 97.48589641584059 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.75, 67.12814238454689 ], "wc_summary_review_avg": [ 43.0, 22.22611077089287 ], "wc_review_avg": [ 327.5, 202.27024002556578 ], "wc_reply_reviewers_avg": [ 60.25, 60.33396638710238 ], "wc_reply_authors_avg": [ 1100.25, 356.8804947037593 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.6882472016116854, "corr_recommendation_correctness": 0.6488856845230502, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2626310148737686729&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "-azium0cV9", "title": "SWARM Parallelism: Training Large Models Can Be Surprisingly Communication-Efficient", "track": "main", "status": "Reject", "tldr": "We propose a model-parallel training algorithm designed for poorly connected, heterogeneous unreliable devices (i.e. preemptible instances or volunteer devices).", "abstract": "Many deep learning applications benefit from using large models with billions of parameters. Training these models is notoriously expensive due to the need for specialized HPC clusters. In this work, we consider alternative setups for training large models: using cheap ``preemptible'' instances or pooling existing resources from multiple regions. We analyze the performance of existing model-parallel algorithms in these conditions and find configurations where training larger models becomes less communication-intensive. Based on these findings, we propose SWARM Parallelism (Stochastically Wired Adaptively Rebalanced Model Parallelism), a model-parallel training algorithm designed for poorly connected, heterogeneous and unreliable devices. SWARM creates temporary randomized pipelines between nodes that are rebalanced in case of failure. We empirically validate our findings and compare SWARM Parallelism with existing large-scale training approaches. Finally, we combine our insights with compression strategies to train a large Transformer language model with 1B shared parameters ($\\approx$13B before sharing) on preemptible T4 GPUs with less than 200 Mb/s network.", "keywords": "distributed training;model-parallel training;model parallelism;fault-tolerant training;communication efficiency;volunteer computing", "primary_area": "", "supplementary_material": "", "author": "Max Ryabinin;Tim Dettmers;Michael Diskin;Alexander Borzunov", "authorids": "~Max_Ryabinin1;~Tim_Dettmers2;~Michael_Diskin1;~Alexander_Borzunov1", "gender": "Not Specified;M;M;M", "homepage": "https://mryab.github.io/;https://timdettmers.com/;;https://github.com/borzunov", "dblp": "276/0192;172/1045;295/8914.html;295/8854", "google_scholar": "930PERsAAAAJ;lHI3w5kAAAAJ;LRKQhcYAAAAJ;https://scholar.google.ru/citations?user=HdwzsCMAAAAJ", "orcid": ";;0000-0001-8902-513X;", "linkedin": ";;https://www.linkedin.com/m/in/yhn112/;", "or_profile": "~Max_Ryabinin1;~Tim_Dettmers2;~Michael_Diskin1;~Alexander_Borzunov1", "aff": "Yandex;University of Washington;Brask AI;HSE University", "aff_domain": "yandex-team.ru;cs.washington.edu;brask.ai;hse.ru", "position": "Research Scientist;PhD student;Researcher;Instructor", "bibtex": "@misc{\nryabinin2023swarm,\ntitle={{SWARM} Parallelism: Training Large Models Can Be Surprisingly Communication-Efficient},\nauthor={Max Ryabinin and Tim Dettmers and Michael Diskin and Alexander Borzunov},\nyear={2023},\nurl={https://openreview.net/forum?id=-azium0cV9}\n}", "github": "", "project": "", "reviewers": "A2NV;uZBm;r1H3;rTpG;8Ywq;fehi", "site": "https://openreview.net/forum?id=-azium0cV9", "pdf_size": 767375, "recommendation": "3;3;5;6;8;8", "confidence": "4;5;2;4;4;3", "correctness": "2;2;4;3;3;3", "technical_novelty": "2;2;2;3;1;2", "empirical_novelty": "2;2;2;3;3;3", "wc_summary_paper": "77;34;38;101;45;57", "wc_strength_and_weaknesses": "249;198;386;189;216;73", "wc_clarity_quality_novelty_and_reproducibility": "26;578;56;95;25;86", "wc_summary_review": "83;41;71;13;15;15", "wc_review": "435;851;551;398;301;231", "wc_reply_reviewers": "224;0;152;34;0;16", "wc_reply_authors": "1611;592;619;456;389;258", "reply_reviewers": "2;0;1;1;0;1", "reply_authors": "4;1;2;2;1;1", "recommendation_avg": [ 5.5, 2.0615528128088303 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "technical_novelty_avg": [ 2.0, 0.5773502691896257 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 58.666666666666664, 23.640830968662858 ], "wc_strength_and_weaknesses_avg": [ 218.5, 92.60444553763784 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 144.33333333333334, 195.76743572128865 ], "wc_summary_review_avg": [ 39.666666666666664, 28.25282680055612 ], "wc_review_avg": [ 461.1666666666667, 201.38885823754555 ], "wc_reply_reviewers_avg": [ 71.0, 86.06780272939857 ], "wc_reply_authors_avg": [ 654.1666666666666, 444.8150989143941 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.6871842709362768 ], "reply_authors_avg": [ 1.8333333333333333, 1.0671873729054748 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.34299717028501764, "corr_recommendation_correctness": 0.5294117647058822, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18445653334642104295&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Yandex;University of Washington;Brask AI;Higher School of Economics", "aff_unique_dep": ";;;", "aff_unique_url": "https://yandex.com;https://www.washington.edu;;https://hse.ru", "aff_unique_abbr": "Yandex;UW;;HSE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Russian Federation;United States;Unknown" }, { "title": "Modeling Sequential Sentence Relation to Improve Cross-lingual Dense Retrieval", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11876", "id": "-bVsNeR56KS", "poster": "", "openreview": "https://openreview.net/forum?id=-bVsNeR56KS", "slides": "https://iclr.cc/virtual/2023/poster/11876", "video": "https://iclr.cc/virtual/2023/poster/11876", "author_site": "Shunyu Zhang, Yaobo Liang, MING GONG, Daxin Jiang, Nan Duan", "tldr": "", "abstract": "Recently multi-lingual pre-trained language models (PLM) such as mBERT and XLM-R have achieved impressive strides in cross-lingual dense retrieval. Despite its successes, they are general-purpose PLM while the multilingual PLM tailored for cross-lingual retrieval is still unexplored. Motivated by an observation that the sentences in parallel documents are approximately in the same order, which is universal across languages, we propose to model this sequential sentence relation to facilitate cross-lingual representation learning. Specifically, we propose a multilingual PLM called masked sentence model (MSM), which consists of a sentence encoder to generate the sentence representations, and a document encoder applied to a sequence of sentence vectors from a document. The document encoder is shared for all languages to model the universal sequential sentence relation across languages. To train the model, we propose a masked sentence prediction task, which masks and predicts the sentence vector via a hierarchical contrastive loss with sampled negatives. Comprehensive experiments on four cross-lingual retrieval tasks show MSM significantly outperforms existing advanced pre-training models, demonstrating the effectiveness and stronger cross-lingual retrieval capabilities of our approach. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Shunyu Zhang;Yaobo Liang;MING GONG;Daxin Jiang;Nan Duan", "authorids": "~Shunyu_Zhang1;~Yaobo_Liang1;~MING_GONG2;~Daxin_Jiang2;~Nan_Duan1", "gender": "M;M;;M;M", "homepage": ";https://www.microsoft.com/en-us/research/people/yalia/;;https://www.microsoft.com/en-us/research/people/djiang/;https://nanduan.github.io/", "dblp": "288/1696;245/8600.html;;77/5094;", "google_scholar": ";z92gIuEAAAAJ;;N-wAHCoAAAAJ;Qaa6OxIAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Shunyu_Zhang1;~Yaobo_Liang1;~MING_GONG2;~Daxin_Jiang2;~Nan_Duan1", "aff": "Kuaishou;Microsoft;;Microsoft;Microsoft Research Asia", "aff_domain": "kuaishou.com;microsoft.com;;microsoft.com;microsoft.com", "position": "Researcher;Researcher;;Researcher/Scientist;Principal Researcher", "bibtex": "@inproceedings{\nzhang2023modeling,\ntitle={Modeling Sequential Sentence Relation to Improve Cross-lingual Dense Retrieval},\nauthor={Shunyu Zhang and Yaobo Liang and MING GONG and Daxin Jiang and Nan Duan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-bVsNeR56KS}\n}", "github": "", "project": "", "reviewers": "XpJe;Hf6d;TsJC;VGZA", "pdf_size": 357584, "recommendation": "3;6;6;8", "confidence": "5;4;4;3", "correctness": "2;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "104;139;75;228", "wc_strength_and_weaknesses": "314;130;76;89", "wc_clarity_quality_novelty_and_reproducibility": "86;66;1;10", "wc_summary_review": "60;23;25;9", "wc_review": "564;358;177;336", "wc_reply_reviewers": "0;0;0;31", "wc_reply_authors": "1318;259;227;294", "reply_reviewers": "0;0;0;1", "reply_authors": "2;1;1;1", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 136.5, 57.48260606479146 ], "wc_strength_and_weaknesses_avg": [ 152.25, 95.48920096010858 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.75, 36.09276243237694 ], "wc_summary_review_avg": [ 29.25, 18.793283374652763 ], "wc_review_avg": [ 358.75, 137.54885495706608 ], "wc_reply_reviewers_avg": [ 7.75, 13.423393758658799 ], "wc_reply_authors_avg": [ 524.5, 458.7398500239542 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.9901475429766743, "corr_recommendation_correctness": 0.8892972917998875, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12773187740667521895&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=-bVsNeR56KS", "email": "kuaishou.com;microsoft.com;;microsoft.com;microsoft.com", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Kuaishou Technology;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.kuaishou.com;https://www.microsoft.com", "aff_unique_abbr": "Kuaishou;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "China;United States" }, { "title": "Recitation-Augmented Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10962", "id": "-cqvvvb-NkI", "poster": "", "openreview": "https://openreview.net/forum?id=-cqvvvb-NkI", "slides": "https://iclr.cc/virtual/2023/poster/10962", "video": "https://iclr.cc/virtual/2023/poster/10962", "author_site": "Zhiqing Sun, Xuezhi Wang, Yi Tay, Yiming Yang, Denny Zhou", "tldr": "We propose a novel recitation-augmented generation framework to improve language models\u2019 performance in the closed-book question-answering setting.", "abstract": "We propose a new paradigm to help Large Language Models (LLMs) generate more accurate factual knowledge without retrieving from an external corpus, called RECITation-augmented gEneration (RECITE). Different from retrieval-augmented language models that retrieve relevant documents before generating the outputs, given an input, RECITE first recites one or several relevant passages from LLMs\u2019 own memory via sampling, and then produces the final answers. We show that RECITE is a powerful paradigm for knowledge-intensive NLP tasks. Specifically, we show that by utilizing recitation as the intermediate step, a recite-and-answer scheme can achieve new state-of-the-art performance in various closed-book question answering (CBQA) tasks. In experiments, we verify the effectiveness of RECITE on three pre-trained models (In-house LM, UL2, and OPT) and three CBQA tasks (Natural Questions, TriviaQA, and HotpotQA). Our code is available at \"https://github.com/Edward-Sun/RECITE\".", "keywords": "Large Language Models;In-context Learning;Memorization;Closed-book Question Answering;CBQA", "primary_area": "", "supplementary_material": "", "author": "Zhiqing Sun;Xuezhi Wang;Yi Tay;Yiming Yang;Denny Zhou", "authorids": "~Zhiqing_Sun1;~Xuezhi_Wang3;~Yi_Tay1;~Yiming_Yang1;~Denny_Zhou1", "gender": "M;;M;F;", "homepage": "https://www.cs.cmu.edu/~zhiqings/;https://research.google/people/105995/;http://yitay.net;http://www.cs.cmu.edu/~yiming/;https://dennyzhou.github.io/", "dblp": "211/7692;70/4090-2;;25/1666;178/3277", "google_scholar": "https://scholar.google.com/citations?hl=en;ScLUQ-YAAAAJ;VBclY_cAAAAJ;MlZq4XwAAAAJ;UwLsYw8AAAAJ", "orcid": ";;;0000-0001-8322-607X;", "linkedin": "zhiqing-sun-5781b3100/;;;yiming-yang-24100924/;", "or_profile": "~Zhiqing_Sun1;~Xuezhi_Wang3;~Yi_Tay1;~Yiming_Yang1;~Dengyong_Zhou2", "aff": "Carnegie Mellon University;Google DeepMind;Google;School of Computer Science, Carnegie Mellon University;Google DeepMind", "aff_domain": "cs.cmu.edu;google.com;google.com;cs.cmu.edu;google.com", "position": "PhD student;Research Scientist;Research Scientist;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nsun2023recitationaugmented,\ntitle={Recitation-Augmented Language Models},\nauthor={Zhiqing Sun and Xuezhi Wang and Yi Tay and Yiming Yang and Denny Zhou},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-cqvvvb-NkI}\n}", "github": "", "project": "", "reviewers": "m3Mw;HxiC;rRR1;9YM6", "pdf_size": 887619, "recommendation": "5;6;6;6", "confidence": "5;4;4;3", "correctness": "3;3;4;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "0;3;3;0", "wc_summary_paper": "92;143;306;64", "wc_strength_and_weaknesses": "184;627;48;614", "wc_clarity_quality_novelty_and_reproducibility": "22;86;4;31", "wc_summary_review": "40;137;383;36", "wc_review": "338;993;741;745", "wc_reply_reviewers": "934;59;0;0", "wc_reply_authors": "1665;762;915;711", "reply_reviewers": "8;1;0;0", "reply_authors": "9;1;2;1", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 1.5 ], "wc_summary_paper_avg": [ 151.25, 93.72666376224004 ], "wc_strength_and_weaknesses_avg": [ 368.25, 256.8329953491179 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.75, 30.597181242722343 ], "wc_summary_review_avg": [ 149.0, 141.02304776170453 ], "wc_review_avg": [ 704.25, 234.80137882900092 ], "wc_reply_reviewers_avg": [ 248.25, 396.64995588049675 ], "wc_reply_authors_avg": [ 1013.25, 383.7032544818978 ], "reply_reviewers_avg": [ 2.25, 3.344772040064913 ], "reply_authors_avg": [ 3.25, 3.344772040064913 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 131, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5899952811092272331&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=-cqvvvb-NkI", "email": "cs.cmu.edu;google.com;google.com;cs.cmu.edu;google.com", "author_num": 5, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Carnegie Mellon University;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.cmu.edu;https://deepmind.com", "aff_unique_abbr": "CMU;DeepMind", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Pittsburgh", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "-gTqRt6RpqV", "title": "Gated Class-Attention with Cascaded Feature Drift Compensation for Exemplar-free Continual Learning of Vision Transformers", "track": "main", "status": "Withdraw", "tldr": "We propose a gated-class attention mechanism with feature drift compensation that achieves improved plasticity and stability for exemplar-free continual learning of visual transformers", "abstract": "Vision transformers (ViTs) have achieved remarkable successes across a broad range of computer vision applications. As a consequence there has been increasing interest in extending continual learning theory and techniques to ViT architectures. In this paper, we propose a new method for exemplar-free class incremental training of ViTs. The main challenge of exemplar-free continual learning is maintaining plasticity of the learner without causing catastrophic forgetting of previously learned tasks. This is often achieved via exemplar replay which can help recalibrate previous task classifiers to the feature drift which occurs when learning new tasks. Exemplar replay, however, comes at the cost of retaining samples from previous tasks which for some applications may not be possible. To address the problem of continual ViT training, we first propose gated class-attention to minimize the drift in the final ViT transformer block. This mask-based gating is applied to class-attention mechanism of the last transformer block and strongly regulates the weights crucial for previous tasks. Secondly, we propose a new method of feature drift compensation that accommodates feature drift in the backbone when learning new tasks. The combination of gated class-attention and cascaded feature drift compensation allows for plasticity towards new tasks while limiting forgetting of previous ones. Extensive experiments performed on CIFAR-100 and Tiny-ImageNet demonstrate that our method outperforms existing exemplar-free state-of-the-art methods without the need to store any representative exemplars of past tasks.", "keywords": "Exemplar-Free Continual Learning;Vision Transformer;Class-incremental learning", "primary_area": "", "supplementary_material": "", "author": "Marco Cotogni;Fei Yang;Claudio Cusano;Andrew D. Bagdanov;Joost van de Weijer", "authorids": "~Marco_Cotogni1;~Fei_Yang4;~Claudio_Cusano1;~Andrew_D._Bagdanov2;~Joost_van_de_Weijer5", "gender": "M;M;M;M;M", "homepage": ";;;http://www.micc.unifi.it/bagdanov;http://lamp.cvc.uab.es/", "dblp": "153/8957;19/2504-4;53/6816;64/3935;67/3379", "google_scholar": "8PUz5lAAAAAJ;S1gksNwAAAAJ;https://scholar.google.it/citations?user=lhZpU_8AAAAJ;_Fk4YUcAAAAJ;https://scholar.google.es/citations?user=Gsw2iUEAAAAJ", "orcid": "0000-0001-7950-7370;;0000-0001-9365-8167;;0000-0002-9656-9706", "linkedin": ";;claudio-cusano-39774b4a;;", "or_profile": "~Marco_Cotogni1;~Fei_Yang4;~Claudio_Cusano1;~Andrew_D._Bagdanov2;~Joost_van_de_Weijer1", "aff": "Universit\u00e0 di Pavia;Computer Vision Center, Universitat Aut\u00f3noma de Barcelona;University of Pavia;Universit\u00e0 degli Studi di Firenze;Computer Vision Center, Universitat Aut\u00f3noma de Barcelona", "aff_domain": "unipv.it;cvc.uab.es;unipv.it;unifi.it;cvc.uab.es", "position": "PhD student;Postdoc;Associate Professor;Associate Professor;Researcher", "bibtex": "@misc{\ncotogni2023gated,\ntitle={Gated Class-Attention with Cascaded Feature Drift Compensation for Exemplar-free Continual Learning of Vision Transformers},\nauthor={Marco Cotogni and Fei Yang and Claudio Cusano and Andrew D. Bagdanov and Joost van de Weijer},\nyear={2023},\nurl={https://openreview.net/forum?id=-gTqRt6RpqV}\n}", "github": "", "project": "", "reviewers": "orHe;UznR;RTCv;wA6T", "site": "https://openreview.net/forum?id=-gTqRt6RpqV", "pdf_size": 5466245, "recommendation": "3;5;6;6", "confidence": "3;4;4;4", "correctness": "2;3;3;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "20;78;119;59", "wc_strength_and_weaknesses": "258;287;312;146", "wc_clarity_quality_novelty_and_reproducibility": "70;22;67;5", "wc_summary_review": "40;57;40;4", "wc_review": "388;444;538;214", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 69.0, 35.644073841243234 ], "wc_strength_and_weaknesses_avg": [ 250.75, 63.42466003062216 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.0, 28.16913204200655 ], "wc_summary_review_avg": [ 35.25, 19.330998422223306 ], "wc_review_avg": [ 396.0, 117.957619508025 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.9428090415820632, "corr_recommendation_correctness": 0.8660254037844386, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7173607734760931128&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;1", "aff_unique_norm": "University of Pavia;Universitat Aut\u00f3noma de Barcelona;University of Florence", "aff_unique_dep": ";Computer Vision Center;", "aff_unique_url": "https://www.unipv.eu;https://www.uab.cat;https://www.unifi.it", "aff_unique_abbr": "Unipv;UAB;UNIFI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "Italy;Spain" }, { "id": "-hMNEMgT8Wd", "title": "RG: OUT-OF-DISTRIBUTION DETECTION WITH REACTIVATE GRADNORM", "track": "main", "status": "Reject", "tldr": "The information of joint feature space and output space improves the performance of OOD detection.", "abstract": "Detecting out-of-distribution (OOD) data is critical to building reliable machine learning systems in the open world. Previous works mainly perform OOD detection in feature space or output space. Recently, researchers have achieved promising results using gradient information, which combines the information in both feature and output space for OOD detection. However, existing works still suffer from the problem of overconfidence. To address this problem, we propose a novel method called ``Reactivate Gradnorm (RG)'', which exploits the norm of the clipped feature vector and the energy in the output space for OOD detection. To verify the effectiveness of our method, we conduct experiments on four benchmark datasets. Experimental results demonstrate that our RG outperforms existing state-of-the-art approaches by 2.06\\% in average AUROC. Meanwhile, RG is easy to implement and does not require additional OOD data or fine-tuning process. We can realize OOD detection in only one forward pass of any pretrained model.", "keywords": "OOD detection;Uncertainty Learning", "primary_area": "", "supplementary_material": "/attachment/8136f7e4d75454a218455ba49e7b0998e171e7ef.zip", "author": "Mingyu Xu;Kexin Wang;Zheng Lian;Licai Sun;Bin Liu;Jianhua Tao", "authorids": "~Mingyu_Xu1;wangkexin2021@ia.ac.cn;~Zheng_Lian3;~Licai_Sun1;~Bin_Liu13;jhtao@nlpr.ia.ac.cn", "gender": ";;M;;M;", "homepage": ";;https://zeroqiaoba.github.io/Homepage/;;https://people.ucas.ac.cn/~bin.liu;", "dblp": ";;;241/3466;35/837-41;", "google_scholar": ";;S34nWz0AAAAJ;7qo_cTcAAAAJ;;", "orcid": ";;0000-0001-9477-0599;;;", "linkedin": ";;;;;", "or_profile": "~Mingyu_Xu1;wangkexin2021@ia.ac.cn;~Zheng_Lian3;~Licai_Sun1;~Bin_Liu13;jhtao@nlpr.ia.ac.cn", "aff": ";;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science;", "aff_domain": ";;ia.ac.cn;ia.ac.cn;nlpr.ia.ac.cn;", "position": ";;Assistant Professor;PhD student;Associate Professor;", "bibtex": "@misc{\nxu2023rg,\ntitle={{RG}: {OUT}-{OF}-{DISTRIBUTION} {DETECTION} {WITH} {REACTIVATE} {GRADNORM}},\nauthor={Mingyu Xu and Kexin Wang and Zheng Lian and Licai Sun and Bin Liu and Jianhua Tao},\nyear={2023},\nurl={https://openreview.net/forum?id=-hMNEMgT8Wd}\n}", "github": "", "project": "", "reviewers": "TTD6;diYs;b6vP;7gKP", "site": "https://openreview.net/forum?id=-hMNEMgT8Wd", "pdf_size": 223157, "recommendation": "3;3;5;5", "confidence": "5;3;3;3", "correctness": "1;3;3;2", "technical_novelty": "3;2;2;2", "empirical_novelty": "3;2;2;2", "wc_summary_paper": "88;93;174;85", "wc_strength_and_weaknesses": "218;70;61;452", "wc_clarity_quality_novelty_and_reproducibility": "26;34;92;24", "wc_summary_review": "21;216;35;26", "wc_review": "353;413;362;587", "wc_reply_reviewers": "0;0;0;586", "wc_reply_authors": "0;962;0;972", "reply_reviewers": "0;0;0;1", "reply_authors": "0;2;0;2", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 110.0, 37.06076092041285 ], "wc_strength_and_weaknesses_avg": [ 200.25, 158.15241857145278 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.0, 27.964262908219126 ], "wc_summary_review_avg": [ 74.5, 81.84894623634442 ], "wc_review_avg": [ 428.75, 94.18698158450562 ], "wc_reply_reviewers_avg": [ 146.5, 253.74544330884052 ], "wc_reply_authors_avg": [ 483.5, 483.5129264042483 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 1.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Rj5fw9SwJZIJ:scholar.google.com/&scioq=RG:+OUT-OF-DISTRIBUTION+DETECTION+WITH+REACTIVATE+GRADNORM&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "-hWhz9xfrB9", "title": "Lovasz Theta Contrastive Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "We establish a connection between the Lovasz theta function of a graph and the widely used InfoNCE loss. We show that under certain conditions, the minima of the InfoNCE loss are related to minimizing the Lovasz theta function on the empty similarity graph between the samples. Building on this connection, we generalize contrastive learning on weighted similarity graphs between samples. Our Lovasz theta contrastive loss uses a weighted graph that can be learned to take into account similarities between our data. We evaluate our method on image classification tasks, demonstrating an improvement of $1 \\%$ in the supervised case and up to $4 \\%$ in the unsupervised case.", "keywords": "Lovasz theta;Contrastive learning;Similarity graph;Graph Theory", "primary_area": "", "supplementary_material": "/attachment/d8d35177279c06a35f750a6693da0427b03813fc.zip", "author": "Georgios Smyrnis;Matt Jordan;Ananya Uppal;Giannis Daras;Alex Dimakis", "authorids": "~Georgios_Smyrnis1;~Matt_Jordan1;~Ananya_Uppal1;~Giannis_Daras1;~Alex_Dimakis1", "gender": "M;M;F;M;M", "homepage": ";https://www.cs.utexas.edu/~mjordan/;https://ananyauppal.github.io/;https://giannisdaras.github.io/;https://people.eecs.berkeley.edu/~alexdimakis/", "dblp": "255/9114;236/5728;220/5296;254/2703;19/5000.html", "google_scholar": ";Zj7R8p0AAAAJ;kCdRr1gAAAAJ;LaScvbQAAAAJ;JSFmVQEAAAAJ", "orcid": ";;;;", "linkedin": ";;;;alex-dimakis-b1b20320/", "or_profile": "~Georgios_Smyrnis1;~Matt_Jordan1;~Ananya_Uppal1;~Giannis_Daras1;~Alex_Dimakis1", "aff": "University of Texas, Austin;University of Texas, Austin;University of Texas at Austin;University of Texas, Austin;University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;PhD student;Postdoc;PhD student;Full Professor", "bibtex": "@misc{\nsmyrnis2023lovasz,\ntitle={Lovasz Theta Contrastive Learning},\nauthor={Georgios Smyrnis and Matt Jordan and Ananya Uppal and Giannis Daras and Alex Dimakis},\nyear={2023},\nurl={https://openreview.net/forum?id=-hWhz9xfrB9}\n}", "github": "", "project": "", "reviewers": "HNxb;fWo7;4DeW;NuU3", "site": "https://openreview.net/forum?id=-hWhz9xfrB9", "pdf_size": 591435, "recommendation": "3;5;6;6", "confidence": "4;4;4;4", "correctness": "2;3;4;3", "technical_novelty": "2;3;4;4", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "78;226;309;73", "wc_strength_and_weaknesses": "212;273;144;335", "wc_clarity_quality_novelty_and_reproducibility": "89;30;91;127", "wc_summary_review": "52;52;52;130", "wc_review": "431;581;596;665", "wc_reply_reviewers": "208;0;0;0", "wc_reply_authors": "1223;564;284;801", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;2", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 171.5, 100.40044820617088 ], "wc_strength_and_weaknesses_avg": [ 241.0, 70.90486584149215 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 84.25, 34.78056210011563 ], "wc_summary_review_avg": [ 71.5, 33.77499074759311 ], "wc_review_avg": [ 568.25, 85.33866357050596 ], "wc_reply_reviewers_avg": [ 52.0, 90.06664199358161 ], "wc_reply_authors_avg": [ 718.0, 344.23320583581125 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8660254037844386, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CsgNVP24hYEJ:scholar.google.com/&scioq=Lovasz+Theta+Contrastive+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Transferable Unlearnable Examples", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10879", "id": "-htnolWDLvP", "poster": "/media/PosterPDFs/ICLR%202023/10879.png?t=1682865720.6079168", "openreview": "https://openreview.net/forum?id=-htnolWDLvP", "slides": "https://iclr.cc/virtual/2023/poster/10879", "video": "https://iclr.cc/virtual/2023/poster/10879", "author_site": "Jie Ren, Han Xu, Yuxuan Wan, Xingjun Ma, Lichao Sun, Jiliang Tang", "tldr": "", "abstract": "With more people publishing their personal data online, unauthorized data usage has become a serious concern. The unlearnable examples strategies have been introduced to prevent third parties from training on the data without permission. They add perturbations to the users\u2019 data before publishing, so as to make the models trained on the perturbed published dataset invalidated. These perturbations have been generated for a specific training setting and a target dataset. However, their unlearnable effects significantly decrease when used in other training settings or datasets. To tackle this issue, we propose a novel unlearnable strategy based on Class-wise Separability Discriminant (CSD), which boosts the transferability of the unlearnable perturbations by enhancing the linear separability. Extensive experiments demonstrate the transferability of the unlearnable examples crafted by our proposed method across training settings and datasets.", "keywords": "Unlearnable Examples;Data Protection", "primary_area": "", "supplementary_material": "", "author": "Jie Ren;Han Xu;Yuxuan Wan;Xingjun Ma;Lichao Sun;Jiliang Tang", "authorids": "~Jie_Ren6;~Han_Xu1;~Yuxuan_Wan1;~Xingjun_Ma1;~Lichao_Sun1;~Jiliang_Tang1", "gender": "M;M;;M;M;M", "homepage": "https://renjie3.github.io/;https://cse.msu.edu/~xuhan1/;https://wanyu42.github.io/;http://xingjunma.com/;https://lichao-sun.github.io/;https://www.cse.msu.edu/~tangjili/", "dblp": "181/2887-19.html;32/34-2;;195/8270;121/0780-1.html;64/10812", "google_scholar": ";mX2rL3IAAAAJ;jTwbiScAAAAJ;https://scholar.google.com.au/citations?user=XQViiyYAAAAJ;WhGUE7AAAAAJ;WtzKMWAAAAAJ", "orcid": ";0000-0002-4016-6748;;;;0000-0001-7125-3898", "linkedin": ";;;xingjun-ma-173532129/;lichao-sun-b273a290/;", "or_profile": "~Jie_Ren6;~Han_Xu1;~Yuxuan_Wan1;~Xingjun_Ma1;~Lichao_Sun1;~Jiliang_Tang1", "aff": "Baidu;Michigan State University;Michigan State University;Fudan University;Lehigh University;Michigan State University", "aff_domain": "baidu.com;msu.edu;msu.edu;fudan.edu.cn;lehigh.edu;msu.edu", "position": "Intern;PhD student;PhD student;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nren2023transferable,\ntitle={Transferable Unlearnable Examples},\nauthor={Jie Ren and Han Xu and Yuxuan Wan and Xingjun Ma and Lichao Sun and Jiliang Tang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-htnolWDLvP}\n}", "github": "", "project": "", "reviewers": "sZJu;5F6K;fZkw;esp8", "pdf_size": 4531420, "recommendation": "6;6;6;8", "confidence": "4;4;3;4", "correctness": "3;3;3;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "4;2;3;3", "wc_summary_paper": "45;70;114;34", "wc_strength_and_weaknesses": "41;184;53;150", "wc_clarity_quality_novelty_and_reproducibility": "14;50;261;46", "wc_summary_review": "28;51;35;37", "wc_review": "128;355;463;267", "wc_reply_reviewers": "0;0;496;94", "wc_reply_authors": "118;1151;2692;2373", "reply_reviewers": "0;0;2;2", "reply_authors": "1;3;7;6", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 65.75, 30.760160922856045 ], "wc_strength_and_weaknesses_avg": [ 107.0, 61.33922073192649 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 92.75, 98.1360662549707 ], "wc_summary_review_avg": [ 37.75, 8.347903928532 ], "wc_review_avg": [ 303.25, 122.70365723970903 ], "wc_reply_reviewers_avg": [ 147.5, 204.8334689448968 ], "wc_reply_authors_avg": [ 1583.5, 1023.0871175027081 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 4.25, 2.384848003542364 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5666554405158877405&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=-htnolWDLvP", "email": "baidu.com;msu.edu;msu.edu;fudan.edu.cn;lehigh.edu;msu.edu", "author_num": 6, "aff_unique_index": "0;1;1;2;3;1", "aff_unique_norm": "Baidu;Michigan State University;Fudan University;Lehigh University", "aff_unique_dep": "Baidu, Inc.;;;", "aff_unique_url": "https://www.baidu.com;https://www.msu.edu;https://www.fudan.edu.cn;https://www.lehigh.edu", "aff_unique_abbr": "Baidu;MSU;Fudan;Lehigh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1;1", "aff_country_unique": "China;United States" }, { "id": "-i73LPWa3bD", "title": "Semi-supervised learning of partial differential operators and dynamical flows", "track": "main", "status": "Reject", "tldr": "", "abstract": "The evolution of dynamical systems is generically governed by nonlinear partial differential equations (PDEs), whose solution, in a simulation framework, requires vast amounts of computational resources. In this work, we present a novel method that combines a hyper-network solver with a Fourier Neural Operator architecture. Our method treats time and space separately and as a result, it successfully propagates initial conditions in continuous time steps by employing the general composition properties of the partial differential operators. Following previous works, supervision is provided at a specific time point. We test our method on various time evolution PDEs, including nonlinear fluid flows in one, two, or three spatial dimensions. The results show that the new method improves the learning accuracy at the time of the supervision point, and can interpolate the solutions to any intermediate time.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/6aeed47513a2b7e24527df0475122a2abe829b24.zip", "author": "Michael Rotman;Amit Dekel;Ran Ilan Ber;Lior Wolf;Yaron Oz", "authorids": "~Michael_Rotman1;~Amit_Dekel1;~Ran_Ilan_Ber1;~Lior_Wolf1;~Yaron_Oz1", "gender": ";M;M;M;", "homepage": "https://rotmanmichael.com;;;http://www.cs.tau.ac.il/~wolf;", "dblp": "217/3007;259/2006;;83/4103;", "google_scholar": "tzlpNi8AAAAJ;mY12KaoAAAAJ;jkVZ56EAAAAJ;UbFrXTsAAAAJ;", "orcid": ";;;0000-0001-5578-8892;", "linkedin": ";;;;", "or_profile": "~Michael_Rotman1;~Amit_Dekel1;~Ran_Ilan_Ber1;~Lior_Wolf1;~Yaron_Oz1", "aff": "Amazon;Univrses;K Health;Tel Aviv University;Tel Aviv University, Technion", "aff_domain": "amazon.com;univrses.com;khealth.com;tau.ac.il;tau.ac.il", "position": "Researcher;Researcher;Researcher;Full Professor;Full Professor", "bibtex": "@misc{\nrotman2023semisupervised,\ntitle={Semi-supervised learning of partial differential operators and dynamical flows},\nauthor={Michael Rotman and Amit Dekel and Ran Ilan Ber and Lior Wolf and Yaron Oz},\nyear={2023},\nurl={https://openreview.net/forum?id=-i73LPWa3bD}\n}", "github": "", "project": "", "reviewers": "ykCW;cCio;rv2Z;2qHC;NVyD", "site": "https://openreview.net/forum?id=-i73LPWa3bD", "pdf_size": 513743, "recommendation": "3;3;5;5;5", "confidence": "3;4;4;4;3", "correctness": "3;3;2;4;3", "technical_novelty": "3;2;3;3;3", "empirical_novelty": "3;2;0;2;3", "wc_summary_paper": "27;10;114;53;79", "wc_strength_and_weaknesses": "98;222;175;98;349", "wc_clarity_quality_novelty_and_reproducibility": "12;113;95;17;147", "wc_summary_review": "35;20;47;31;67", "wc_review": "172;365;431;199;642", "wc_reply_reviewers": "136;0;0;0;48", "wc_reply_authors": "252;577;525;279;780", "reply_reviewers": "1;0;0;0;1", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 4.2, 0.9797958971132712 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.8, 0.39999999999999997 ], "empirical_novelty_avg": [ 2.0, 1.0954451150103321 ], "wc_summary_paper_avg": [ 56.6, 37.032958293930555 ], "wc_strength_and_weaknesses_avg": [ 188.4, 93.21502024888478 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 76.8, 53.56267357031387 ], "wc_summary_review_avg": [ 40.0, 16.024980499208105 ], "wc_review_avg": [ 361.8, 170.78571368823563 ], "wc_reply_reviewers_avg": [ 36.8, 52.969425143189916 ], "wc_reply_authors_avg": [ 482.6, 196.8680776560791 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.16666666666666663, "corr_recommendation_correctness": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7638457215727254674&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Amazon;Univrses;K Health;Tel Aviv University", "aff_unique_dep": "Amazon.com, Inc.;;;", "aff_unique_url": "https://www.amazon.com;;;https://www.tau.ac.il", "aff_unique_abbr": "Amazon;;;TAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;2;2", "aff_country_unique": "United States;;Israel" }, { "title": "Monocular Scene Reconstruction with 3D SDF Transformers", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11859", "id": "-iADdfa4GKH", "poster": "/media/PosterPDFs/ICLR%202023/11859.png?t=1681186774.3506641", "openreview": "https://openreview.net/forum?id=-iADdfa4GKH", "slides": "https://iclr.cc/virtual/2023/poster/11859", "video": "https://iclr.cc/virtual/2023/poster/11859", "author_site": "Weihao Yuan, Xiaodong Gu, Heng Li, Zilong Dong, Siyu Zhu", "tldr": "", "abstract": "Monocular scene reconstruction from posed images is challenging due to the complexity of a large environment. Recent volumetric methods learn to directly predict the TSDF volume and have demonstrated promising results in this task. However, most methods focus on how to extract and fuse the 2D features to a 3D feature volume, but none of them improve the way how the 3D volume is aggregated. In this work, we propose an SDF transformer network, which replaces the role of 3D CNN for better 3D feature aggregation. To reduce the explosive computation complexity of the 3D multi-head attention, we propose a sparse window attention module, where the attention is only calculated between the non-empty voxels within a local window. Then a top-down-bottom-up 3D attention network is built for 3D feature aggregation, where a dilate-attention structure is proposed to prevent geometry degeneration, and two global modules are employed to equip with global receptive fields. The experiments on multiple datasets show that this 3D transformer network generates a more accurate and complete reconstruction, which outperforms previous methods by a large margin. Remarkably, the mesh accuracy is improved by 41.8%, and the mesh completeness is improved by 25.3% on the ScanNet dataset. The code of our method will be made public.", "keywords": "3D Reconstruction;Monocular Scene Reconstruction;3D Transformer;TSDF volume", "primary_area": "", "supplementary_material": "/attachment/e192dc4bb552b86ffc3cffa2bcebcc169dc6956a.zip", "author": "Weihao Yuan;Xiaodong Gu;Heng Li;Zilong Dong;Siyu Zhu", "authorids": "~Weihao_Yuan1;~Xiaodong_Gu3;~Heng_Li6;~Zilong_Dong2;~Siyu_Zhu1", "gender": "M;M;M;M;M", "homepage": "https://www.weihao-yuan.com;;http://hengli.me;https://sites.google.com/site/zhusiyucs;https://baike.baidu.com/item/%E8%91%A3%E5%AD%90%E9%BE%99/62931048", "dblp": "217/2047-1;71/4467-4;02/3672-9;81/8842-1;81/1423", "google_scholar": "m3tqxRQAAAAJ;aJPO514AAAAJ;tjbbehcAAAAJ;vNCnDiMAAAAJ;GHOQKCwAAAAJ", "orcid": ";0000-0003-2623-7973;0000-0001-5143-5061;;0000-0002-6833-9102", "linkedin": ";;;;", "or_profile": "~Weihao_Yuan1;~Xiaodong_Gu3;~Heng_Li6;~Siyu_Zhu1;~Zlong_Dong1", "aff": "Alibaba Group;Alibaba Group;Simon Fraser University;Alibaba Group;Alibaba Group", "aff_domain": "alibaba-inc.com;alibaba-inc.com;sfu.ca;alibaba-inc.com;alibaba-inc.com", "position": "Researcher;Researcher;PhD student;Director;Researcher", "bibtex": "@inproceedings{\nyuan2023monocular,\ntitle={Monocular Scene Reconstruction with 3D {SDF} Transformers},\nauthor={Weihao Yuan and Xiaodong Gu and Heng Li and Zilong Dong and Siyu Zhu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-iADdfa4GKH}\n}", "github": "", "project": "", "reviewers": "mQ2G;N9G6;9DEs;Zo2i", "pdf_size": 45870486, "recommendation": "5;6;6;8", "confidence": "4;4;3;4", "correctness": "4;1;4;4", "technical_novelty": "3;2;2;3", "empirical_novelty": "0;3;2;3", "wc_summary_paper": "88;92;77;39", "wc_strength_and_weaknesses": "564;231;91;182", "wc_clarity_quality_novelty_and_reproducibility": "74;79;40;47", "wc_summary_review": "66;33;48;12", "wc_review": "792;435;256;280", "wc_reply_reviewers": "0;62;0;0", "wc_reply_authors": "631;783;736;376", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 1.299038105676658 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 74.0, 20.940391591371924 ], "wc_strength_and_weaknesses_avg": [ 267.0, 178.6798813520985 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.0, 16.777961735562517 ], "wc_summary_review_avg": [ 39.75, 19.828956099603428 ], "wc_review_avg": [ 440.75, 214.11606081749215 ], "wc_reply_reviewers_avg": [ 15.5, 26.846787517317598 ], "wc_reply_authors_avg": [ 631.5, 157.44284677304333 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.13245323570650439, "corr_recommendation_correctness": 0.13245323570650439, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6557272293183909694&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=-iADdfa4GKH", "email": "alibaba-inc.com;alibaba-inc.com;sfu.ca;alibaba-inc.com;alibaba-inc.com", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Alibaba Group;Simon Fraser University", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://www.sfu.ca", "aff_unique_abbr": "Alibaba;SFU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;Canada" }, { "id": "-itAMjwvDJC", "title": "Efficient neural representation in the cognitive neuroscience domain: Manifold Capacity in One-vs-rest Recognition Limit", "track": "main", "status": "Reject", "tldr": "Our Sparse Replica Manifold Analysis enables a separability and geometric analysis of neural data by extending the scope of the theory to a realistic number of neurons and tasks more relevant to cognitive neuroscience.", "abstract": "The structure in neural representations as manifolds has become a popular approach to study information encoding in neural populations. One particular interest is the connection between object recognition capability and the separability of neural representations for different objects, often called \"object manifolds.\" In learning theory, separability has been studied under the notion of storage capacity, which refers to the number of patterns encoded in a feature dimension. Chung et al (2018) extended the notion of capacity from discrete points to manifolds, where manifold capacity refers to the maximum number of object manifolds that can be linearly separated with high probability given random assignment of labels. Despite the use of manifold capacity in analyzing artificial neural networks (ANNs), its application to neuroscience has been limited. Due to the limited number of \"features\", such as neurons, available in neural experiments, manifold capacity cannot be verified empirically, unlike in ANNs. Additionally, the usage of random label assignment, while common in learning theory, is of limited relevance to the definition of object recognition tasks in cognitive science. To overcome these limits, we present the Sparse Replica Manifold analysis to study object recognition. Sparse manifold capacity measures how many object manifolds can be separated under one versus the rest classification, a form of task widely used in both in cognitive neuroscience experiments and machine learning applications. We demonstrate the application of sparse manifold capacity allows analysis of a wider class of neural data - in particular, neural data that has a limited number of neurons with empirical measurements. Furthermore, sparse manifold capacity requires less computations to evaluate underlying geometries and enables a connection to a measure of dimension, the participation ratio. We analyze the relationship between capacity and dimension, and demonstrate that both manifold intrinsic dimension and the ambient space dimension play a role in capacity. ", "keywords": "computational neuroscience;statistical physics of learning;representation geometry;perceptual manifolds;object recognition", "primary_area": "", "supplementary_material": "", "author": "Nga Yu Lo;SueYeon Chung", "authorids": "~Nga_Yu_Lo1;~SueYeon_Chung1", "gender": "F;F", "homepage": "https://ngayulo.github.io/;https://sites.google.com/site/sueyeonchung/", "dblp": ";173/5418", "google_scholar": ";h7yVv0QAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Nga_Yu_Lo1;~SueYeon_Chung1", "aff": "Flatiron Institute;Flatiron Institute / Simons Foundation", "aff_domain": "flatironinstitute.org;simonsfoundation.org", "position": "Research analyst;Principal Investigator", "bibtex": "@misc{\nlo2023efficient,\ntitle={Efficient neural representation in the cognitive neuroscience domain: Manifold Capacity in One-vs-rest Recognition Limit},\nauthor={Nga Yu Lo and SueYeon Chung},\nyear={2023},\nurl={https://openreview.net/forum?id=-itAMjwvDJC}\n}", "github": "", "project": "", "reviewers": "EQzE;pKPQ;4A5K;yhnc;TxMd", "site": "https://openreview.net/forum?id=-itAMjwvDJC", "pdf_size": 2868832, "recommendation": "3;3;6;6;8", "confidence": "3;4;3;2;4", "correctness": "3;3;4;3;3", "technical_novelty": "2;2;3;3;4", "empirical_novelty": "2;0;3;3;4", "wc_summary_paper": "52;56;119;44;129", "wc_strength_and_weaknesses": "258;257;76;64;284", "wc_clarity_quality_novelty_and_reproducibility": "26;13;187;44;47", "wc_summary_review": "118;18;77;18;67", "wc_review": "454;344;459;170;527", "wc_reply_reviewers": "0;0;0;0;43", "wc_reply_authors": "1280;1223;1286;353;1012", "reply_reviewers": "0;0;0;0;1", "reply_authors": "2;2;2;1;2", "recommendation_avg": [ 5.2, 1.9390719429665317 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 2.8, 0.7483314773547882 ], "empirical_novelty_avg": [ 2.4, 1.3564659966250538 ], "wc_summary_paper_avg": [ 80.0, 36.2712006969717 ], "wc_strength_and_weaknesses_avg": [ 187.8, 96.74378533011824 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.4, 63.02570904004174 ], "wc_summary_review_avg": [ 59.6, 38.024202818731126 ], "wc_review_avg": [ 390.8, 125.01103951251666 ], "wc_reply_reviewers_avg": [ 8.6, 17.2 ], "wc_reply_authors_avg": [ 1030.8, 353.25197805532525 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.027565892320998583, "corr_recommendation_correctness": 0.20628424925175864, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:72Bpnanm9XoJ:scholar.google.com/&scioq=Efficient+neural+representation+in+the+cognitive+neuroscience+domain:+Manifold+Capacity+in+One-vs-rest+Recognition+Limit&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Flatiron Institute", "aff_unique_dep": "", "aff_unique_url": "https://flatironinstitute.org", "aff_unique_abbr": "Flatiron", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Approximate Nearest Neighbor Search through Modern Error-Correcting Codes", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11856", "id": "-jP_rDkyfpI", "poster": "", "openreview": "https://openreview.net/forum?id=-jP_rDkyfpI", "slides": "https://iclr.cc/virtual/2023/poster/11856", "video": "https://iclr.cc/virtual/2023/poster/11856", "author_site": "Noam Touitou, Nissim Halabi", "tldr": "Using modern error-correcting codes, we present an improved method of using locality-sensitive hash functions for approximate nearest-neighbor search..", "abstract": "A locality-sensitive hash (or LSH) is a function that can efficiently map dataset points into a latent space while preserving pairwise distances. Such LSH functions have been used in approximate nearest-neighbor search (ANNS) in the following classic way, which we call classic hash clustering (CHC): first, the dataset points are hashed into a low-dimensional binary space using the LSH function; then, the points are clustered by these hash values. Upon receiving a query, its nearest neighbors are sought within its hash-cluster and nearby hash-clusters (i.e., multi-probe). However, CHC mandates a low-dimensional latent space for the LSH function, which distorts distances from the (high-dimensional) original real space; this results in inferior recall. This is often mitigated through using multiple hash tables at additional storage and memory costs.\n\nIn this paper, we introduce a better way of using LSH functions for ANNS. Our method, called the Polar Code Nearest-Neighbor (PCNN) algorithm, uses modern error-correcting codes (specifically polar codes) to maintain a manageable number of clusters inside a high-dimensional latent space. Allowing the LSH function to embed into this high-dimensional latent space results in higher recall, as the embedding faithfully captures distances in the original space. The crux of PCNN is using polar codes for probing: we present a multi-probe scheme for PCNN which uses efficient list-decoding methods for polar codes, with time complexity independent of the dataset size. Fixing the choice of LSH, experiment results demonstrate significant performance gains of PCNN over CHC; in particular, PCNN with a single table outperforms CHC with multiple tables, obviating the need for large memory and storage.", "keywords": "Similarity Search;Nearest-Neighbor Search;Polar Codes;Locality-Sensitive Hashing;LSH", "primary_area": "", "supplementary_material": "", "author": "Noam Touitou;Nissim Halabi", "authorids": "~Noam_Touitou1;~Nissim_Halabi1", "gender": "M;M", "homepage": "https://noamtouitou.com;", "dblp": "211/8092;54/1891.html", "google_scholar": "-N6FOX0AAAAJ;", "orcid": "0000-0002-5720-4114;", "linkedin": "noam-touitou/;", "or_profile": "~Noam_Touitou1;~Nissim_Halabi1", "aff": "Amazon;Amazon", "aff_domain": "amazon.com;amazon.com", "position": "Researcher;Researcher", "bibtex": "@inproceedings{\ntouitou2023approximate,\ntitle={Approximate Nearest Neighbor Search through Modern Error-Correcting Codes},\nauthor={Noam Touitou and Nissim Halabi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-jP_rDkyfpI}\n}", "github": "", "project": "", "reviewers": "96rD;8eNv;sMa7;FYov", "pdf_size": 1466680, "recommendation": "3;6;6;8", "confidence": "2;3;4;4", "correctness": "4;3;3;4", "technical_novelty": "2;3;4;4", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "24;80;59;116", "wc_strength_and_weaknesses": "18;320;87;189", "wc_clarity_quality_novelty_and_reproducibility": "22;71;12;252", "wc_summary_review": "10;42;132;52", "wc_review": "74;513;290;609", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "16;411;396;328", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 69.75, 33.36446462930284 ], "wc_strength_and_weaknesses_avg": [ 153.5, 113.75961497825139 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 89.25, 96.57995392419691 ], "wc_summary_review_avg": [ 59.0, 44.91102314577124 ], "wc_review_avg": [ 371.5, 207.10927067613366 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 287.75, 159.9818349063418 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.8866206949335731, "corr_recommendation_correctness": -0.14002800840280097, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MywsFTRMv7oJ:scholar.google.com/&scioq=Approximate+Nearest+Neighbor+Search+through+Modern+Error-Correcting+Codes&hl=en&as_sdt=0,5", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=-jP_rDkyfpI", "email": "amazon.com;amazon.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon.com, Inc.", "aff_unique_url": "https://www.amazon.com", "aff_unique_abbr": "Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Breaking Correlation Shift via Conditional Invariant Regularizer", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12093", "id": "-jTaz3CMk72", "poster": "/media/PosterPDFs/ICLR%202023/12093.png?t=1681113266.768077", "openreview": "https://openreview.net/forum?id=-jTaz3CMk72", "slides": "https://iclr.cc/virtual/2023/poster/12093", "video": "https://iclr.cc/virtual/2023/poster/12093", "author_site": "Mingyang Yi, Ruoyu Wang, Jiacheng Sun, Zhenguo Li, Zhi-Ming Ma", "tldr": "This paper proposes an algorithm to make the model to generalize on data with spurious correlation, the method can be implemented without information on spurious feature. ", "abstract": "Recently, generalization on out-of-distribution (OOD) data with correlation shift has attracted great attentions. The correlation shift is caused by the spurious attributes that correlate to the class label, as the correlation between them may vary in training and test data. For such a problem, we show that given the class label, the models that are conditionally independent of spurious attributes are OOD generalizable. Based on this, a metric Conditional Spurious Variation (CSV) which controls the OOD generalization error, is proposed to measure such conditional independence. To improve the OOD generalization, we regularize the training process with the proposed CSV. Under mild assumptions, our training objective can be formulated as a nonconvex-concave mini-max problem. An algorithm with a provable convergence rate is proposed to solve the problem. Extensive empirical results verify our algorithm's efficacy in improving OOD generalization. ", "keywords": "OOD Generalization;Spurious Correlation;Optimization", "primary_area": "", "supplementary_material": "/attachment/1590b37d6ab906e9ec91d1151715b4155224e296.zip", "author": "Mingyang Yi;Ruoyu Wang;Jiacheng Sun;Zhenguo Li;Zhi-Ming Ma", "authorids": "~Mingyang_Yi1;~Ruoyu_Wang2;~Jiacheng_Sun1;~Zhenguo_Li1;~Zhi-Ming_Ma1", "gender": "M;M;M;M;", "homepage": "http://mingyangyi.github.io;;;http://www.ee.columbia.edu/~zgli/;http://homepage.amss.ac.cn/research/homePage/8eb59241e2e74d828fb84eec0efadba5/myHomePage.html", "dblp": ";278/6323;165/5350;23/6479;", "google_scholar": "RlOZiPUAAAAJ;1mO8fMgAAAAJ;;XboZC1AAAAAJ;", "orcid": ";0000-0002-4561-2954;;;", "linkedin": ";;https://www.linkedin.cn/incareer/in/jiacheng-sun-ab622b131;;", "or_profile": "~Mingyang_Yi1;~Ruoyu_Wang2;~Jiacheng_Sun1;~Zhenguo_Li1;~Zhi-Ming_Ma1", "aff": "Huawei Noah's ark Lab;Harvard University;Huawei Noah's Ark Lab;Huawei Noah's Ark Lab;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences", "aff_domain": "huawei.com;harvard.edu;huawei.com;huawei.com;amss.ac.cn", "position": "Researcher;Postdoc;Senior Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nyi2023breaking,\ntitle={Breaking Correlation Shift via Conditional Invariant Regularizer},\nauthor={Mingyang Yi and Ruoyu Wang and Jiacheng Sun and Zhenguo Li and Zhi-Ming Ma},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-jTaz3CMk72}\n}", "github": "", "project": "", "reviewers": "fPjQ;M8jr;Lqom;bzT6", "pdf_size": 947812, "recommendation": "6;6;8;8", "confidence": "4;4;3;4", "correctness": "3;4;3;3", "technical_novelty": "3;3;4;3", "empirical_novelty": "3;3;0;3", "wc_summary_paper": "65;78;111;108", "wc_strength_and_weaknesses": "98;320;1104;881", "wc_clarity_quality_novelty_and_reproducibility": "69;26;59;56", "wc_summary_review": "36;44;53;48", "wc_review": "268;468;1327;1093", "wc_reply_reviewers": "92;0;224;0", "wc_reply_authors": "825;998;2248;1167", "reply_reviewers": "1;0;1;0", "reply_authors": "2;2;4;3", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 90.5, 19.576771950451892 ], "wc_strength_and_weaknesses_avg": [ 600.75, 407.2403313769401 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 52.5, 16.03901493234544 ], "wc_summary_review_avg": [ 45.25, 6.219927652312364 ], "wc_review_avg": [ 789.0, 434.8396256092584 ], "wc_reply_reviewers_avg": [ 79.0, 91.7551088495894 ], "wc_reply_authors_avg": [ 1309.5, 555.1713699390486 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5786946013078665037&as_sdt=8000005&sciodt=0,19&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=-jTaz3CMk72", "email": "huawei.com;harvard.edu;huawei.com;huawei.com;amss.ac.cn", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Huawei;Harvard University;Chinese Academy of Sciences", "aff_unique_dep": "Noah's ark Lab;;Academy of Mathematics and Systems Science", "aff_unique_url": "https://www.huawei.com;https://www.harvard.edu;http://www.cas.cn", "aff_unique_abbr": "Huawei;Harvard;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Localized Randomized Smoothing for Collective Robustness Certification", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11175", "id": "-k7Lvk0GpBl", "poster": "/media/PosterPDFs/ICLR%202023/11175.png?t=1682682019.87089", "openreview": "https://openreview.net/forum?id=-k7Lvk0GpBl", "slides": "https://iclr.cc/virtual/2023/poster/11175", "video": "https://iclr.cc/virtual/2023/poster/11175", "author_site": "Jan Schuchardt, Tom Wollschl\u00e4ger, Aleksandar Bojchevski, Stephan G\u00fcnnemann", "tldr": "We propose a novel collective robustness certificate based on randomized smoothing that uses different anisotropic smoothign distribution for the different outputs of a multi-output model.", "abstract": "Models for image segmentation, node classification and many other tasks map a single input to multiple labels. By perturbing this single shared input (e.g. the image) an adversary can manipulate several predictions (e.g. misclassify several pixels). Collective robustness certification is the task of provably bounding the number of robust predictions under this threat model. The only dedicated method that goes beyond certifying each output independently is limited to strictly local models, where each prediction is associated with a small receptive field. We propose a more general collective robustness certificate for all types of models. We further show that this approach is beneficial for the larger class of softly local models, where each output is dependent on the entire input but assigns different levels of importance to different input regions (e.g. based on their proximity in the image). The certificate is based on our novel localized randomized smoothing approach, where the random perturbation strength for different input regions is proportional to their importance for the outputs. Localized smoothing Pareto-dominates existing certificates on both image segmentation and node classification tasks, simultaneously offering higher accuracy and stronger certificates.", "keywords": "Robustness;Certification;Verification;Trustworthiness;Graph neural networks", "primary_area": "", "supplementary_material": "", "author": "Jan Schuchardt;Tom Wollschl\u00e4ger;Aleksandar Bojchevski;Stephan G\u00fcnnemann", "authorids": "~Jan_Schuchardt1;~Tom_Wollschl\u00e4ger1;~Aleksandar_Bojchevski1;~Stephan_G\u00fcnnemann1", "gender": ";M;M;M", "homepage": "https://www.cs.cit.tum.de/daml/team/jan-schuchardt/;https://www.linkedin.com/in/wollschlaeger/;https://abojchevski.github.io/;http://www.daml.in.tum.de", "dblp": "241/5487;332/0829;203/8114;43/3011", "google_scholar": "O-cixlwAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.de/citations?user=F1APiN4AAAAJ;", "orcid": ";;;", "linkedin": ";wollschlaeger/;;", "or_profile": "~Jan_Schuchardt1;~Tom_Wollschl\u00e4ger1;~Aleksandar_Bojchevski1;~Stephan_G\u00fcnnemann1", "aff": "Department of Informatics, Technical University Munich;Technische Universit\u00e4t M\u00fcnchen;CISPA Helmholtz Center for Information Security;Technical University Munich", "aff_domain": "in.tum.de;tum.de;cispa.de;tum.de", "position": "PhD student;PhD student;Principal Researcher;Professor", "bibtex": "@inproceedings{\nschuchardt2023localized,\ntitle={Localized Randomized Smoothing for Collective Robustness Certification},\nauthor={Jan Schuchardt and Tom Wollschl{\\\"a}ger and Aleksandar Bojchevski and Stephan G{\\\"u}nnemann},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-k7Lvk0GpBl}\n}", "github": "", "project": "", "reviewers": "gYEN;7Fc5;jKFf", "pdf_size": 1775013, "recommendation": "6;8;8", "confidence": "4;4;4", "correctness": "3;4;4", "technical_novelty": "3;3;3", "empirical_novelty": "3;0;3", "wc_summary_paper": "51;85;177", "wc_strength_and_weaknesses": "210;87;55", "wc_clarity_quality_novelty_and_reproducibility": "35;43;190", "wc_summary_review": "51;76;23", "wc_review": "347;291;445", "wc_reply_reviewers": "93;0;30", "wc_reply_authors": "1279;35;490", "reply_reviewers": "1;0;1", "reply_authors": "2;1;1", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 104.33333333333333, 53.224889749898864 ], "wc_strength_and_weaknesses_avg": [ 117.33333333333333, 66.81483534531999 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 89.33333333333333, 71.25696847014723 ], "wc_summary_review_avg": [ 50.0, 21.64871050817269 ], "wc_review_avg": [ 361.0, 63.64484791926733 ], "wc_reply_reviewers_avg": [ 41.0, 38.7556447501522 ], "wc_reply_authors_avg": [ 601.3333333333334, 513.9262809219064 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1992399688580716259&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=-k7Lvk0GpBl", "email": "in.tum.de;tum.de;cispa.de;tum.de", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Technical University Munich;Technische Universit\u00e4t M\u00fcnchen;CISPA Helmholtz Center for Information Security;Technical University of Munich", "aff_unique_dep": "Department of Informatics;;;", "aff_unique_url": "https://www.tum.de;https://www.tum.de;https://www.cispa.de/;https://www.tum.de", "aff_unique_abbr": "TUM;TUM;CISPA;TUM", "aff_campus_unique_index": "0", "aff_campus_unique": "Munich;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "-kAWfaLkPT3", "title": "Multi-Environment Pretraining Enables Transfer to Action Limited Datasets", "track": "main", "status": "Reject", "tldr": "", "abstract": "Using massive datasets to train large-scale models has emerged as a dominant approach for broad generalization in natural language and vision applications. In reinforcement learning, however, a key challenge is that available data of sequential decision making is often not annotated with actions - for example, videos of game-play are much more available than sequences of frames paired with the logged game controls. We propose to circumvent this challenge by combining large but sparsely-annotated datasets from a \\emph{target} environment of interest with fully-annotated datasets from various other \\emph{source} environments. Our method, Action Limited PreTraining (ALPT), leverages the generalization capabilities of inverse dynamics modelling (IDM) to label missing action data in the target environment. We show that utilizing even one additional environment dataset of labelled data during IDM pretraining gives rise to substantial improvements in generating action labels for unannotated sequences. We evaluate our method on benchmark game-playing environments and show that we can significantly improve game performance and generalization capability compared to other approaches, even when using annotated datasets equivalent to only $12$ minutes of gameplay. ", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/30a92cd9cf402da56807c9544050971e08307bca.zip", "author": "David Venuto;Sherry Yang;Pieter Abbeel;Doina Precup;Igor Mordatch;Ofir Nachum", "authorids": "~David_Venuto1;~Sherry_Yang1;~Pieter_Abbeel2;~Doina_Precup1;~Igor_Mordatch4;~Ofir_Nachum1", "gender": "F;M;F;M;M;M", "homepage": "https://sherryy.github.io;https://people.eecs.berkeley.edu/~pabbeel/;http://cs.mcgill.ca/~dprecup/;https://scholar.google.com/citations?user=C-ZlBWMAAAAJ&hl=en;;", "dblp": ";;p/DoinaPrecup;;;21/17", "google_scholar": "7c1B_fIAAAAJ;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;https://scholar.google.com.tw/citations?user=j54VcVEAAAAJ;C-ZlBWMAAAAJ;https://scholar.google.ca/citations?user=32rbUtYAAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Sherry_Yang1;~Pieter_Abbeel2;~Doina_Precup1;~Ofir_Nachum1;~David_Anthony_Venuto1;~Igor_Mordatch1", "aff": "University of California, Berkeley;Covariant;McGill University;OpenAI;Mila;OpenAI", "aff_domain": "berkeley.edu;covariant.ai;mcgill.ca;openai.com;mila.quebec;openai.com", "position": "Student;Founder;Associate Professor;Researcher;PhD student;Research Scientist", "bibtex": "@misc{\nvenuto2023multienvironment,\ntitle={Multi-Environment Pretraining Enables Transfer to Action Limited Datasets},\nauthor={David Venuto and Sherry Yang and Pieter Abbeel and Doina Precup and Igor Mordatch and Ofir Nachum},\nyear={2023},\nurl={https://openreview.net/forum?id=-kAWfaLkPT3}\n}", "github": "", "project": "", "reviewers": "qasR;hAS5;aonF;PQGj;NLz9", "site": "https://openreview.net/forum?id=-kAWfaLkPT3", "pdf_size": 1387939, "recommendation": "3;3;5;6;8", "confidence": "4;4;2;3;4", "correctness": "2;3;4;3;3", "technical_novelty": "2;2;3;3;3", "empirical_novelty": "3;2;0;3;3", "wc_summary_paper": "112;61;109;175;78", "wc_strength_and_weaknesses": "482;238;228;547;196", "wc_clarity_quality_novelty_and_reproducibility": "21;225;22;218;27", "wc_summary_review": "45;298;17;41;33", "wc_review": "660;822;376;981;334", "wc_reply_reviewers": "0;41;0;49;0", "wc_reply_authors": "958;664;245;1064;466", "reply_reviewers": "0;1;0;1;0", "reply_authors": "4;2;2;3;2", "recommendation_avg": [ 5.0, 1.8973665961010275 ], "confidence_avg": [ 3.4, 0.8 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.6, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.2, 1.16619037896906 ], "wc_summary_paper_avg": [ 107.0, 39.01281840626232 ], "wc_strength_and_weaknesses_avg": [ 338.2, 146.06902477938297 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 102.6, 97.12795684044836 ], "wc_summary_review_avg": [ 86.8, 106.03471129776324 ], "wc_review_avg": [ 634.6, 250.19640285183957 ], "wc_reply_reviewers_avg": [ 18.0, 22.190087877248256 ], "wc_reply_authors_avg": [ 679.4, 303.3200290122629 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.6, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.13176156917368248, "corr_recommendation_correctness": 0.33333333333333337, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6048693456765016189&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2;3;4;3", "aff_unique_norm": "University of California, Berkeley;Covariant;McGill University;OpenAI;Mila", "aff_unique_dep": ";;;;Quebec Artificial Intelligence Institute", "aff_unique_url": "https://www.berkeley.edu;;https://www.mcgill.ca;https://openai.com;https://mila.quebec", "aff_unique_abbr": "UC Berkeley;;McGill;OpenAI;Mila", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;2;0;2;0", "aff_country_unique": "United States;;Canada" }, { "id": "-kzQHkTvyMg", "title": "On the Expressive Equivalence Between Graph Convolution and Attention Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Graph neural networks (GNNs) have achieved remarkable successes in various graph tasks, and recent years have witnessed a flourishing growth in research regarding GNNs' expressive power. The number of linear regions generated from GNNs is a recently considered metric that quantifies GNNs' capacity. The estimate of the number of linear regions has been previously developed for deep and convolution neural networks (DNN and CNN). In this paper, we compare the expressive power of the classic graph convolution network (GCN) and attention based models in terms of their capability to generate linear regions. We show that the prediction advantage of attention models can be matched or even surpassed by enhancing GCN with refined graph Ricci curvature resulting the so-called high rank graph convolution network (HRGCN). Thus, the two models are equivalent to each other in terms of expressive power. Experimental results show that the proposed HRGCN model outperforms the state-of-the-art results in various classification and prediction tasks.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/038a4ec36e7597f088fecfbec1bacfc0fca5e1a4.zip", "author": "Dai Shi;zhiqi shao;Andi Han;Junbin Gao;Yi Guo", "authorids": "~Dai_Shi1;zsha2911@uni.sydney.edu.au;~Andi_Han1;~Junbin_Gao1;~Yi_Guo3", "gender": "M;;M;;", "homepage": "https://github.com/EEthanShi;;https://github.com/andyjm3;https://www.sydney.edu.au/business/about/our-people/academic-staff/junbin-gao.html;", "dblp": "96/8513;;268/7976.html;30/3983;24/3508-1", "google_scholar": ";;AKHQHs0AAAAJ;https://scholar.google.com.au/citations?user=3-KJN8IAAAAJ;", "orcid": "0000-0002-6600-4325;;0000-0003-4655-655X;0000-0001-9803-0256;", "linkedin": ";;;;", "or_profile": "~Dai_Shi1;zsha2911@uni.sydney.edu.au;~Andi_Han1;~Junbin_Gao1;~Yi_Guo3", "aff": "University of Sydney;;University of Sydney;University of Sydney;Western Sydney University", "aff_domain": "sydney.edu.au;;sydney.edu.au;sydney.edu.au;wsu.edu.au", "position": "Researcher;;PhD student;Full Professor;Associate Professor", "bibtex": "@misc{\nshi2023on,\ntitle={On the Expressive Equivalence Between Graph Convolution and Attention Models},\nauthor={Dai Shi and zhiqi shao and Andi Han and Junbin Gao and Yi Guo},\nyear={2023},\nurl={https://openreview.net/forum?id=-kzQHkTvyMg}\n}", "github": "", "project": "", "reviewers": "v1us;4BTp;9ZhC;vaQh", "site": "https://openreview.net/forum?id=-kzQHkTvyMg", "pdf_size": 430822, "recommendation": "1;3;8;8", "confidence": "2;4;3;2", "correctness": "2;3;4;4", "technical_novelty": "2;2;4;4", "empirical_novelty": "0;2;3;4", "wc_summary_paper": "90;86;57;108", "wc_strength_and_weaknesses": "76;421;107;145", "wc_clarity_quality_novelty_and_reproducibility": "1142;68;14;59", "wc_summary_review": "61;33;38;67", "wc_review": "1369;608;216;379", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1740;1272;189;342", "reply_reviewers": "0;0;0;0", "reply_authors": "3;2;1;1", "recommendation_avg": [ 5.0, 3.082207001484488 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.0, 1.0 ], "empirical_novelty_avg": [ 2.25, 1.479019945774904 ], "wc_summary_paper_avg": [ 85.25, 18.2944663764757 ], "wc_strength_and_weaknesses_avg": [ 187.25, 137.15023696661993 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 320.75, 474.590020438694 ], "wc_summary_review_avg": [ 49.75, 14.515078366994786 ], "wc_review_avg": [ 643.0, 441.68031425455223 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 885.75, 644.2159478777284 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.09782319760890369, "corr_recommendation_correctness": 0.9782319760890369, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-kfQp8PKhuYJ:scholar.google.com/&scioq=On+the+Expressive+Equivalence+Between+Graph+Convolution+and+Attention+Models&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Sydney;Western Sydney University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sydney.edu.au;https://www.westernsydney.edu.au", "aff_unique_abbr": "USYD;WSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "title": "Sequential Gradient Coding For Straggler Mitigation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11099", "id": "-lGvSmht7a", "poster": "", "openreview": "https://openreview.net/forum?id=-lGvSmht7a", "slides": "https://iclr.cc/virtual/2023/poster/11099", "video": "https://iclr.cc/virtual/2023/poster/11099", "author_site": "Nikhil Krishnan Muralee Krishnan, MohammadReza Ebrahimi, Ashish Khisti", "tldr": "We propose to improve gradient coding by exploiting the temporal dimension while training deep learning models in distributed cloud systems.", "abstract": "In distributed computing, slower nodes (stragglers) usually become a bottleneck. Gradient Coding (GC), introduced by Tandon et al., is an efficient technique that uses principles of error-correcting codes to distribute gradient computation in the presence of stragglers. In this paper, we consider the distributed computation of a sequence of gradients $\\{g(1),g(2),\\ldots,g(J)\\}$, where processing of each gradient $g(t)$ starts in round-$t$ and finishes by round-$(t+T)$. Here $T\\geq 0$ denotes a delay parameter. For the GC scheme, coding is only across computing nodes and this results in a solution where $T=0$. On the other hand, having $T>0$ allows for designing schemes which exploit the temporal dimension as well. In this work, we propose two schemes that demonstrate improved performance compared to GC. Our first scheme combines GC with selective repetition of previously unfinished tasks and achieves improved straggler mitigation. In our second scheme, which constitutes our main contribution, we apply GC to a subset of the tasks and repetition for the remainder of the tasks. We then multiplex these two classes of tasks across workers and rounds in an adaptive manner, based on past straggler patterns. Using theoretical analysis, we demonstrate that our second scheme achieves significant reduction in the computational load. In our experiments, we study a practical setting of concurrently training multiple neural networks over an AWS Lambda cluster involving 256 worker nodes, where our framework naturally applies. We demonstrate that the latter scheme can yield a 16\\% improvement in runtime over the baseline GC scheme, in the presence of naturally occurring, non-simulated stragglers.\n", "keywords": "gradient coding;straggler mitigation;distributed computation;coded computing", "primary_area": "", "supplementary_material": "/attachment/d9b44118174bd6ab84743a5255e5695f03914243.zip", "author": "Nikhil Krishnan Muralee Krishnan;MohammadReza Ebrahimi;Ashish J Khisti", "authorids": "~Nikhil_Krishnan_Muralee_Krishnan1;~MohammadReza_Ebrahimi1;~Ashish_J_Khisti1", "gender": "M;M;M", "homepage": ";;https://www.comm.utoronto.ca/~akhisti/", "dblp": ";204/4421;84/5679.html", "google_scholar": "ZQJxEtEAAAAJ;mkSGwPYAAAAJ;https://scholar.google.ca/citations?user=jiGeAg4AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Nikhil_Krishnan_Muralee_Krishnan1;~MohammadReza_Ebrahimi1;~Ashish_J_Khisti1", "aff": "Indian Institute of Technology, Palakkad;Qualcomm Inc, QualComm;Toronto University", "aff_domain": "iitpkd.ac.in;qti.qualcomm.com;utoronto.ca", "position": "DST-INSPIRE Faculty Fellow;Intern;Professor", "bibtex": "@inproceedings{\nkrishnan2023sequential,\ntitle={Sequential Gradient Coding For Straggler Mitigation},\nauthor={Nikhil Krishnan Muralee Krishnan and MohammadReza Ebrahimi and Ashish J Khisti},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-lGvSmht7a}\n}", "github": "", "project": "", "reviewers": "dEGh;9BzY;82EB;euSx", "pdf_size": 4916810, "recommendation": "6;6;6;8", "confidence": "3;4;4;4", "correctness": "4;4;4;4", "technical_novelty": "3;3;2;2", "empirical_novelty": "3;2;0;3", "wc_summary_paper": "45;176;76;109", "wc_strength_and_weaknesses": "201;306;342;65", "wc_clarity_quality_novelty_and_reproducibility": "18;157;112;19", "wc_summary_review": "63;73;68;59", "wc_review": "327;712;598;252", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1131;1280;1821;79", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;3;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 101.5, 48.602983447521 ], "wc_strength_and_weaknesses_avg": [ 228.5, 107.67659912905867 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 76.5, 60.143578210811505 ], "wc_summary_review_avg": [ 65.75, 5.261891294962297 ], "wc_review_avg": [ 472.25, 189.01107771768298 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1077.75, 631.201780336526 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11964907132500961678&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=-lGvSmht7a", "email": "iitpkd.ac.in;qti.qualcomm.com;utoronto.ca", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Indian Institute of Technology;Qualcomm Incorporated;University of Toronto", "aff_unique_dep": ";;", "aff_unique_url": "https://www.iitpkd.ac.in;https://www.qualcomm.com;https://www.utoronto.ca", "aff_unique_abbr": "IIT Palakkad;Qualcomm;U of T", "aff_campus_unique_index": "0", "aff_campus_unique": "Palakkad;", "aff_country_unique_index": "0;1;2", "aff_country_unique": "India;United States;Canada" }, { "id": "-ltZ1uw8ZE7", "title": "Variational Imbalanced Regression", "track": "main", "status": "Reject", "tldr": "We propose a probabilistic deep learning model, dubbed variational imbalanced regression (VIR), which not only performs well in imbalanced regression but naturally produces reasonable uncertainty estimation as a byproduct.", "abstract": "Existing regression models tend to fall short in both accuracy and uncertainty estimation when the label distribution is imbalanced. In this paper, we propose a probabilistic deep learning model, dubbed variational imbalanced regression (VIR), which not only performs well in imbalanced regression but naturally produces reasonable uncertainty estimation as a byproduct. Different from typical variational autoencoders assuming I.I.D. representation (a data point's representation is not directly affected by other data points), our VIR borrows data with similar regression labels to compute the latent representation's variational distribution; furthermore, different from deterministic regression models producing point estimates, VIR predicts the entire normal-inverse-gamma distributions and modulates the associated conjugate distributions to impose probabilistic reweighting on the imbalanced data, thereby providing better uncertainty estimation. Experiments in several real-world datasets show that our VIR can outperform state-of-the-art imbalanced regression models in terms of both accuracy and uncertainty estimation. ", "keywords": "probabilistic methods;variational inference;imbalanced regression;uncertainty estimation", "primary_area": "", "supplementary_material": "", "author": "Ziyan Wang;Hao Wang", "authorids": "~Ziyan_Wang4;~Hao_Wang3", "gender": "M;M", "homepage": ";http://www.wanghao.in", "dblp": ";w/HaoWang-14", "google_scholar": "BP_1to8AAAAJ;NrOA9QoAAAAJ", "orcid": "0000-0001-5624-5275;", "linkedin": ";", "or_profile": "~Ziyan_Wang4;~Hao_Wang4", "aff": "Georgia Institute of Technology;Rutgers University", "aff_domain": "gatech.edu;cs.rutgers.edu", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nwang2023variational,\ntitle={Variational Imbalanced Regression},\nauthor={Ziyan Wang and Hao Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=-ltZ1uw8ZE7}\n}", "github": "", "project": "", "reviewers": "xTgV;5JWG;ddev;bYud;EUDh", "site": "https://openreview.net/forum?id=-ltZ1uw8ZE7", "pdf_size": 483612, "recommendation": "3;5;6;6;6", "confidence": "4;4;3;2;3", "correctness": "2;4;2;3;3", "technical_novelty": "2;3;3;2;3", "empirical_novelty": "2;2;3;2;3", "wc_summary_paper": "43;69;82;62;51", "wc_strength_and_weaknesses": "709;325;142;136;183", "wc_clarity_quality_novelty_and_reproducibility": "75;339;132;17;15", "wc_summary_review": "41;65;26;55;34", "wc_review": "868;798;382;270;283", "wc_reply_reviewers": "672;284;0;0;0", "wc_reply_authors": "1278;2276;1508;532;892", "reply_reviewers": "1;1;0;0;0", "reply_authors": "2;4;3;1;2", "recommendation_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "correctness_avg": [ 2.8, 0.7483314773547882 ], "technical_novelty_avg": [ 2.6, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 61.4, 13.632314550361578 ], "wc_strength_and_weaknesses_avg": [ 299.0, 216.078689370331 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 115.6, 119.72234544979479 ], "wc_summary_review_avg": [ 44.2, 14.105318146004365 ], "wc_review_avg": [ 520.2, 259.26850946460894 ], "wc_reply_reviewers_avg": [ 191.2, 264.36822804565605 ], "wc_reply_authors_avg": [ 1297.2, 592.042701162678 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.4, 1.019803902718557 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.7333587976225691, "corr_recommendation_correctness": 0.2750095491084634, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:eI2plSCVYvMJ:scholar.google.com/&scioq=Variational+Imbalanced+Regression&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Georgia Institute of Technology;Rutgers University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.rutgers.edu", "aff_unique_abbr": "Georgia Tech;Rutgers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Neural Image-based Avatars: Generalizable Radiance Fields for Human Avatar Modeling", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10830", "id": "-ng-FXFlzgK", "poster": "/media/PosterPDFs/ICLR%202023/10830.png?t=1680899509.7776244", "openreview": "https://openreview.net/forum?id=-ng-FXFlzgK", "slides": "https://iclr.cc/virtual/2023/poster/10830", "video": "https://iclr.cc/virtual/2023/poster/10830", "author_site": "Youngjoong Kwon, Dahun Kim, Duygu Ceylan, Henry Fuchs", "tldr": "", "abstract": "We present a method that enables synthesizing novel views and novel poses of arbitrary human performers from sparse multi-view images. A key ingredient of our method is a hybrid appearance blending module that combines the advantages of the implicit body NeRF representation and image-based rendering. Existing generalizable human NeRF methods that are conditioned on the body model have shown robustness against the geometric variation of arbitrary human performers. Yet they often exhibit blurry results when generalized onto unseen identities. Meanwhile, image-based rendering shows high-quality results when sufficient observations are available, whereas it suffers artifacts in sparse-view settings. We propose Neural Image-based Avatars (NIA) that exploits the best of those two methods: to maintain robustness under new articulations and self-occlusions while directly leveraging the available (sparse) source view colors to preserve appearance details of new subject identities. Our hybrid design outperforms recent methods on both in-domain identity generalization as well as challenging cross-dataset generalization settings. Also, in terms of the pose generalization, our method outperforms even the per-subject optimized animatable NeRF methods.", "keywords": "Generalizable human radiance fields;Human performance capture;Human NeRF;Neural radiance fields", "primary_area": "", "supplementary_material": "/attachment/9fbf06fafeb8f72ad28fc41cd94dc1b8943f0556.zip", "author": "YoungJoong Kwon;Dahun Kim;Duygu Ceylan;Henry Fuchs", "authorids": "~YoungJoong_Kwon1;~Dahun_Kim1;~Duygu_Ceylan1;~Henry_Fuchs1", "gender": ";;;M", "homepage": ";;http://www.duygu-ceylan.com/;http://www.cs.unc.edu/~fuchs/", "dblp": ";205/2487;118/2739;f/HenryFuchs", "google_scholar": ";mHpN1xoAAAAJ;56Kj2QoAAAAJ;https://scholar.google.com.tw/citations?user=guhwcP8AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~YoungJoong_Kwon1;~Dahun_Kim1;~Duygu_Ceylan1;~Henry_Fuchs1", "aff": ";Google;Adobe Systems;University of North Carolina, Chapel Hill", "aff_domain": ";google.com;adobe.com;cs.unc.edu", "position": ";Research Scientist;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nkwon2023neural,\ntitle={Neural Image-based Avatars: Generalizable Radiance Fields for Human Avatar Modeling},\nauthor={YoungJoong Kwon and Dahun Kim and Duygu Ceylan and Henry Fuchs},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-ng-FXFlzgK}\n}", "github": "", "project": "", "reviewers": "Pdgp;28Gy;Ke1f;4ShL", "pdf_size": 8948798, "recommendation": "3;6;8;8", "confidence": "4;4;3;4", "correctness": "3;3;4;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;2;4", "wc_summary_paper": "49;125;41;42", "wc_strength_and_weaknesses": "448;139;99;92", "wc_clarity_quality_novelty_and_reproducibility": "26;75;84;19", "wc_summary_review": "81;58;42;46", "wc_review": "604;397;266;199", "wc_reply_reviewers": "140;0;0;59", "wc_reply_authors": "2705;522;226;478", "reply_reviewers": "1;0;0;1", "reply_authors": "6;1;1;2", "recommendation_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 64.25, 35.20919624189112 ], "wc_strength_and_weaknesses_avg": [ 194.5, 147.45253473575826 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.0, 28.78367592924851 ], "wc_summary_review_avg": [ 56.75, 15.188400179084036 ], "wc_review_avg": [ 366.5, 154.50970843283602 ], "wc_reply_reviewers_avg": [ 49.75, 57.40372374680932 ], "wc_reply_authors_avg": [ 982.75, 1000.7345739505556 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 2.0615528128088303 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.49374193110101877, "corr_recommendation_correctness": 0.8551861104941366, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4519821255598635634&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=-ng-FXFlzgK", "email": ";google.com;adobe.com;cs.unc.edu", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;Adobe;University of North Carolina", "aff_unique_dep": "Google;Adobe Systems Incorporated;", "aff_unique_url": "https://www.google.com;https://www.adobe.com;https://www.unc.edu", "aff_unique_abbr": "Google;Adobe;UNC", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Mountain View;;Chapel Hill", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "On the Data-Efficiency with Contrastive Image Transformation in Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11525", "id": "-nm-rHXi5ga", "poster": "/media/PosterPDFs/ICLR%202023/11525.png?t=1682802845.8452652", "openreview": "https://openreview.net/forum?id=-nm-rHXi5ga", "slides": "https://iclr.cc/virtual/2023/poster/11525", "video": "https://iclr.cc/virtual/2023/poster/11525", "author_site": "Sicong Liu, Xi Zhang, Yushuo Li, Yifan Zhang, Jian Cheng", "tldr": "CoIT is a learnable image transformation for sample-efficiency improvement.", "abstract": "Data-efficiency has always been an essential issue in pixel-based reinforcement learning (RL). As the agent not only learns decision-making but also meaningful representations from images. The line of reinforcement learning with data augmentation shows significant improvements in sample-efficiency. However, it is challenging to guarantee the optimality invariant transformation, that is, the augmented data are readily recognized as a completely different state by the agent. In the end, we propose a contrastive invariant transformation (CoIT), a simple yet promising learnable data augmentation combined with standard model-free algorithms to improve sample-efficiency. Concretely, the differentiable CoIT leverages original samples with augmented samples and hastens the state encoder for a contrastive invariant embedding. We evaluate our approach on DeepMind Control Suite and Atari100K. Empirical results verify advances using CoIT, enabling it to outperform the new state-of-the-art on various tasks. Source code is available at https://github.com/mooricAnna/CoIT.", "keywords": "Reinforcement Learning;Data Augmentation;Self-Supervised Learning;Representation Learning", "primary_area": "", "supplementary_material": "/attachment/5e358364f001b55005f70356dec2cbcc84e5c73a.zip", "author": "Sicong Liu;Xi Sheryl Zhang;Yushuo Li;Yifan Zhang;Jian Cheng", "authorids": "~Sicong_Liu2;~Xi_Sheryl_Zhang1;~Yushuo_Li1;~Yifan_Zhang2;~Jian_Cheng7", "gender": "M;F;F;M;M", "homepage": "https://mooricanna.github.io/sicongliu1014.github.io/;;;;https://people.ucas.ac.cn/~chengjian?language=en", "dblp": ";;275/5182.html;57/4707-1.html;14/6145-1", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;2rI0LRoAAAAJ;Jx0Hd6UAAAAJ;6EmRro4AAAAJ;ZGCIUJ8AAAAJ", "orcid": ";;;;0000-0003-1289-2758", "linkedin": ";xi-sheryl-zhang-82765049/;;;", "or_profile": "~Sicong_Liu2;~Xi_Sheryl_Zhang1;~Yushuo_Li1;~Yifan_Zhang2;~Jian_Cheng7", "aff": "Nanjing University of Science and Technology;Institute of automation, Chinese academy of science, University of Chinese Academy of Sciences;;Institute of automation, Chinese academy of science;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "njust.edu.cn;ia.ac.cn;;nlpr.ia.ac.cn;ia.ac.cn", "position": "MS student;Associate Professor;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023on,\ntitle={On the Data-Efficiency with Contrastive Image Transformation in Reinforcement Learning},\nauthor={Sicong Liu and Xi Sheryl Zhang and Yushuo Li and Yifan Zhang and Jian Cheng},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-nm-rHXi5ga}\n}", "github": "", "project": "", "reviewers": "RdJD;ao8s;yPYT;7fsn", "pdf_size": 2483105, "recommendation": "6;6;6;8", "confidence": "4;3;3;4", "correctness": "3;3;3;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;3;2;4", "wc_summary_paper": "47;55;42;81", "wc_strength_and_weaknesses": "164;282;341;50", "wc_clarity_quality_novelty_and_reproducibility": "35;39;63;44", "wc_summary_review": "32;54;69;9", "wc_review": "278;430;515;184", "wc_reply_reviewers": "0;16;185;0", "wc_reply_authors": "795;680;645;147", "reply_reviewers": "0;1;1;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 56.25, 15.022899187573616 ], "wc_strength_and_weaknesses_avg": [ 209.25, 111.86906408833498 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.25, 10.732543966832841 ], "wc_summary_review_avg": [ 41.0, 22.68259244442751 ], "wc_review_avg": [ 351.75, 128.79513771878192 ], "wc_reply_reviewers_avg": [ 50.25, 78.07168180588913 ], "wc_reply_authors_avg": [ 566.75, 248.61453597889243 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10711972495855390175&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=-nm-rHXi5ga", "email": "njust.edu.cn;ia.ac.cn;;nlpr.ia.ac.cn;ia.ac.cn", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Nanjing University of Science and Technology;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation", "aff_unique_url": "http://www.nust.edu.cn/;http://www.ucas.ac.cn", "aff_unique_abbr": "NUST;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "-p5ZEVGtojQ", "title": "Continuous Depth Recurrent Neural Differential Equations", "track": "main", "status": "Reject", "tldr": "Proposing novel RNN models based on differential equations that continuously transform hidden states in both temporal and depth dimensions.", "abstract": "Recurrent neural networks (RNNs) have brought a lot of advancements in sequence labeling tasks and sequence data. However, their effectiveness is limited when the observations in the sequence are irregularly sampled, where the observations arrive at irregular time intervals. To address this, continuous time variants of the RNNs were introduced based on neural ordinary differential equations (NODE). They learn a better representation of the data using the continuous transformation of hidden states over time, taking into account the time interval between the observations. However, they are still limited in their capability as they use the discrete transformations and discrete number of layers (depth) over an input in the sequence to produce the output observation. We intend to address this limitation by proposing RNNs based on differential equations which model continuous transformations over depth and time to predict an output for a given input in the sequence. Specifically, we propose continuous depth recurrent neural differential equations (CDR-NDE) which generalizes RNN models by continuously evolving the hidden states in both the temporal and depth dimensions. CDR-NDE considers two separate differential equations over each of these dimensions and models the evolution in the temporal and depth directions alternatively. We also propose the CDR-NDE-heat model based on partial differential equations which treats the computation of hidden states as solving a heat equation over time. We demonstrate the effectiveness of the proposed models by comparing against the state-of-the-art RNN models on real world sequence modeling problems and data sets.", "keywords": "neural ordinary differential equations;recurrent neural networks;sequence data", "primary_area": "", "supplementary_material": "", "author": "Srinivas Anumasa;Geetakrishnasai Gunapati;P. K. Srijith", "authorids": "~Srinivas_Anumasa1;~Geetakrishnasai_Gunapati1;~P._K._Srijith1", "gender": "M;M;M", "homepage": "https://sites.google.com/view/brainiith/people?authuser=0#h.p_TmRjBsUKAuEP;;https://sites.google.com/site/pksrijith/home", "dblp": "256/7962;;120/8712", "google_scholar": "OjAbXBAAAAAJ;;https://scholar.google.com.tw/citations?user=C1YpEWsAAAAJ", "orcid": ";;", "linkedin": ";www.linkedin.com/in/geetakrishnasai-gunapati-889882a3;", "or_profile": "~Srinivas_Anumasa1;~Geetakrishnasai_Gunapati1;~Srijith_P_K1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;;Indian Institute of Technology Hyderabad", "aff_domain": "mbzuai.ac.ae;;iith.ac.in", "position": "Postdoc;;Associate Professor", "bibtex": "@misc{\nanumasa2023continuous,\ntitle={Continuous Depth Recurrent Neural Differential Equations},\nauthor={Srinivas Anumasa and Geetakrishnasai Gunapati and P. K. Srijith},\nyear={2023},\nurl={https://openreview.net/forum?id=-p5ZEVGtojQ}\n}", "github": "", "project": "", "reviewers": "FygP;itQc;Ngo4", "site": "https://openreview.net/forum?id=-p5ZEVGtojQ", "pdf_size": 548639, "recommendation": "3;3;3", "confidence": "3;5;4", "correctness": "3;3;2", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;0", "wc_summary_paper": "69;54;126", "wc_strength_and_weaknesses": "336;232;297", "wc_clarity_quality_novelty_and_reproducibility": "119;112;49", "wc_summary_review": "45;15;25", "wc_review": "569;413;497", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1091;39;1069", "reply_reviewers": "0;0;0", "reply_authors": "2;1;2", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 83.0, 31.016124838541646 ], "wc_strength_and_weaknesses_avg": [ 288.3333333333333, 42.897811391983886 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 93.33333333333333, 31.47838764754143 ], "wc_summary_review_avg": [ 28.333333333333332, 12.47219128924647 ], "wc_review_avg": [ 493.0, 63.74950980203691 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 733.0, 490.8142893872046 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6022076350382050927&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;Indian Institute of Technology Hyderabad", "aff_unique_dep": ";", "aff_unique_url": "https://mbzuai.ac.ae;https://www.iith.ac.in", "aff_unique_abbr": "MBZUAI;IIT Hyderabad", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hyderabad", "aff_country_unique_index": "0;1", "aff_country_unique": "United Arab Emirates;India" }, { "id": "-pAV454n6mS", "title": "GRAPHSENSOR: A Graph Attention Network for Time-Series Sensor Data", "track": "main", "status": "Reject", "tldr": "", "abstract": "Our work focuses on the exploration of the internal relationships of signals in an individual sensor. In particular, we address the problem of not being able to evaluate such inter-sensor relationships due to missing rich and explicit feature representation. To solve this problem, we propose GRAPHSENSOR, a graph attention network, with a shared-weight convolution feature encoder to generate the signal segments and learn the internal relationships between them. Furthermore, we enrich the representation of the features by utilizing a multi-head approach when creating the internal relationship graph. Compared with traditional multi-head approaches, we propose a more efficient convolution-based multi-head mechanism, which only requires 56% of model parameters compared with the best multi-head baseline as demonstrated in the experiments. Moreover, GRAPHSENSOR is capable of achieving the state-of-the-art performance in the electroencephalography dataset and improving the accuracy by 13.8% compared to the best baseline in an inertial measurement unit (IMU) dataset.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "jianchao lu;Yuzhe Tian;Quan Z. Sheng;Xi Zheng", "authorids": "~jianchao_lu1;~Yuzhe_Tian1;~Quan_Z._Sheng1;~Xi_Zheng1", "gender": "M;M;M;M", "homepage": ";;http://web.science.mq.edu.au/~qsheng/;https://itseg.org", "dblp": ";;s/QuanZSheng;", "google_scholar": "https://scholar.google.com.au/citations?user=C_VhpDYAAAAJ;;https://scholar.google.com/citations?hl=en;SNfRGJoAAAAJ", "orcid": "0000-0003-0788-1448; 0000-0002-5742-7414;0000-0002-3326-4147;", "linkedin": ";;;", "or_profile": "~jianchao_lu1;~Yuzhe_Tian1;~Quan_Z._Sheng1;~Xi_Zheng1", "aff": "Macquarie University;;Macquarie University;Macquarie University", "aff_domain": "mq.edu.au;;mq.edu.au;mq.edu.au", "position": "PhD student;;Full Professor;Associate Professor", "bibtex": "@misc{\nlu2023graphsensor,\ntitle={{GRAPHSENSOR}: A Graph Attention Network for Time-Series Sensor Data},\nauthor={jianchao lu and Yuzhe Tian and Quan Z. Sheng and Xi Zheng},\nyear={2023},\nurl={https://openreview.net/forum?id=-pAV454n6mS}\n}", "github": "", "project": "", "reviewers": "jWJx;vaNK;Vp7E", "site": "https://openreview.net/forum?id=-pAV454n6mS", "pdf_size": 2196699, "recommendation": "3;5;6", "confidence": "3;4;4", "correctness": "2;3;4", "technical_novelty": "2;2;3", "empirical_novelty": "1;3;3", "wc_summary_paper": "66;35;52", "wc_strength_and_weaknesses": "56;163;67", "wc_clarity_quality_novelty_and_reproducibility": "26;32;18", "wc_summary_review": "69;60;51", "wc_review": "217;290;188", "wc_reply_reviewers": "0;166;0", "wc_reply_authors": "823;468;106", "reply_reviewers": "0;1;0", "reply_authors": "2;1;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_paper_avg": [ 51.0, 12.675435561221029 ], "wc_strength_and_weaknesses_avg": [ 95.33333333333333, 48.05783552715993 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.333333333333332, 5.734883511361751 ], "wc_summary_review_avg": [ 60.0, 7.3484692283495345 ], "wc_review_avg": [ 231.66666666666666, 42.913349386357105 ], "wc_reply_reviewers_avg": [ 55.333333333333336, 78.25315045131126 ], "wc_reply_authors_avg": [ 465.6666666666667, 292.71867419456214 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.9449111825230683, "corr_recommendation_correctness": 0.9819805060619659, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zp-T1nPW5hcJ:scholar.google.com/&scioq=GRAPHSENSOR:+A+Graph+Attention+Network+for+Time-Series+Sensor+Data&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Macquarie University", "aff_unique_dep": "", "aff_unique_url": "https://www.mq.edu.au", "aff_unique_abbr": "MQ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "SeaFormer: Squeeze-enhanced Axial Transformer for Mobile Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11994", "id": "-qg8MQNrxZw", "poster": "/media/PosterPDFs/ICLR%202023/11994.png?t=1681271855.870793", "openreview": "https://openreview.net/forum?id=-qg8MQNrxZw", "slides": "https://iclr.cc/virtual/2023/poster/11994", "video": "https://iclr.cc/virtual/2023/poster/11994", "author_site": "Qiang Wan, Zilong Huang, Jiachen Lu, Gang Yu, Li Zhang", "tldr": "", "abstract": "Since the introduction of Vision Transformers, the landscape of many computer vision tasks (e.g., semantic segmentation), which has been overwhelmingly dominated by CNNs, recently has significantly revolutionized. However, the computational cost and memory requirement render these methods unsuitable on the mobile device, especially for the high resolution per-pixel semantic segmentation task. In this paper, we introduce a new method squeeze-enhanced Axial Transformer (SeaFormer) for mobile semantic segmentation. Specifically, we design a generic attention block characterized by the formulation of squeeze Axial and spatial enhancement. It can be further used to create a family of backbone architectures with superior cost-effectiveness. Coupled with a light segmentation head, we demonstrate state-of-the-art results on the ADE20K, Pascal Context and COCO-stuff datasets. Critically, we beat both the mobile-friendly rivals and Transformer-based counterparts with better performance and lower latency without bells and whistles. Beyond semantic segmentation, we further apply the proposed SeaFormer architecture to image classification problem, demonstrating the potentials of serving as a versatile mobile-friendly backbone.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/cc036c33a5923402f4597ff8a6ad68a946e06f0b.zip", "author": "Qiang Wan;Zilong Huang;Jiachen Lu;Gang YU;Li Zhang", "authorids": "~Qiang_Wan1;~Zilong_Huang1;~Jiachen_Lu2;~Gang_YU2;~Li_Zhang5", "gender": ";M;;M;M", "homepage": ";http://speedinghzl.github.io/;;https://skicyyu.org/;http://www.robots.ox.ac.uk/~lz/", "dblp": ";185/9199;;;89/5992-40", "google_scholar": ";GW9vw8UAAAAJ;;https://scholar.google.com.sg/citations?user=BJdigYsAAAAJ;-wOTCE8AAAAJ", "orcid": ";;;0000-0001-5570-2710;", "linkedin": ";;;;", "or_profile": "~Qiang_Wan1;~Zilong_Huang1;~Jiachen_Lu2;~Gang_YU2;~Li_Zhang5", "aff": ";Tencent GY Lab;;Tencent;Fudan University", "aff_domain": ";tencent.com;;tencent.com;fudan.edu.cn", "position": ";Researcher;;Research Scientist;Associate Professor", "bibtex": "@inproceedings{\nwan2023seaformer,\ntitle={SeaFormer: Squeeze-enhanced Axial Transformer for Mobile Semantic Segmentation},\nauthor={Qiang Wan and Zilong Huang and Jiachen Lu and Gang YU and Li Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-qg8MQNrxZw}\n}", "github": "", "project": "", "reviewers": "wqtJ;xU22;6wEd;sRfJ", "pdf_size": 1859432, "recommendation": "3;5;8;8", "confidence": "4;4;2;3", "correctness": "3;3;4;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "63;133;73;59", "wc_strength_and_weaknesses": "284;342;188;174", "wc_clarity_quality_novelty_and_reproducibility": "57;112;17;34", "wc_summary_review": "45;68;68;57", "wc_review": "449;655;346;324", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1304;714;158;399", "reply_reviewers": "0;0;0;0", "reply_authors": "3;1;1;1", "recommendation_avg": [ 6.0, 2.1213203435596424 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 82.0, 29.88310559496787 ], "wc_strength_and_weaknesses_avg": [ 247.0, 69.28924880528002 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.0, 35.83992187491485 ], "wc_summary_review_avg": [ 59.5, 9.5 ], "wc_review_avg": [ 443.5, 130.90931975990097 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 643.75, 429.1621925333125 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8528028654224419, "corr_recommendation_correctness": 0.5443310539518174, "gs_citation": 189, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4727679837063235236&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=-qg8MQNrxZw", "email": ";tencent.com;;tencent.com;fudan.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "Tencent;Fudan University", "aff_unique_dep": "GY Lab;", "aff_unique_url": "https://www.tencent.com;https://www.fudan.edu.cn", "aff_unique_abbr": "Tencent;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "-qjmJkacGv", "title": "Tackling Imbalanced Class in Federated Learning via Class Distribution Estimation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Federated Learning (FL) has become an upsurging machine learning method due to its applicability in large-scale distributed system and its privacy-preserving property. However, in real-world applications, the presence of class imbalance issue, especially the mismatch between local and global class distribution, greatly degrades the performance of FL. Moreover, due to the privacy constrain, the class distribution information of clients can not be accessed directly. To tackle class imbalance issue under FL setting, a novel algorithm, FedRE, is proposed in this paper. We propose a new class distribution estimation method for the FedRE algorithm, which requires no extra client data information and thus has no privacy concern. Both experimental results and theoretical analysis are provided to support the validity of our distribution estimation method. The proposed algorithm is verified with several experiment, including different datasets with the presence of class imbalance and local-global distribution mismatch. The experimental results show that FedRE is effective and it outperforms other related methods in terms of both overall and minority class classification accuracy.", "keywords": "Federated Learning;class imbalance;class distribution estimation", "primary_area": "", "supplementary_material": "/attachment/5374162b0154cf1ceafd1ce14d776f32bf1cb76e.zip", "author": "You-Ru Lu;Xiaoqian Wang;Dengfeng Sun", "authorids": "~You-Ru_Lu1;~Xiaoqian_Wang1;~Dengfeng_Sun1", "gender": "M;F;M", "homepage": "https://web.ics.purdue.edu/~dsun/people.html;https://engineering.purdue.edu/~joywang/;https://web.ics.purdue.edu/~dsun/", "dblp": ";151/3215-1;", "google_scholar": ";I3tc214AAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~You-Ru_Lu1;~Xiaoqian_Wang1;~Dengfeng_Sun1", "aff": "Purdue University;Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu;purdue.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nlu2023tackling,\ntitle={Tackling Imbalanced Class in Federated Learning via Class Distribution Estimation},\nauthor={You-Ru Lu and Xiaoqian Wang and Dengfeng Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=-qjmJkacGv}\n}", "github": "", "project": "", "reviewers": "8crs;oRX1;wyG4", "site": "https://openreview.net/forum?id=-qjmJkacGv", "pdf_size": 453178, "recommendation": "3;3;5", "confidence": "4;4;4", "correctness": "2;3;3", "technical_novelty": "3;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "15;100;66", "wc_strength_and_weaknesses": "157;610;113", "wc_clarity_quality_novelty_and_reproducibility": "38;59;6", "wc_summary_review": "39;29;47", "wc_review": "249;798;232", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 60.333333333333336, 34.93167935015754 ], "wc_strength_and_weaknesses_avg": [ 293.3333333333333, 224.63649649056484 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.333333333333336, 21.791945504908206 ], "wc_summary_review_avg": [ 38.333333333333336, 7.363574011458175 ], "wc_review_avg": [ 426.3333333333333, 262.89964287199444 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13648964131109872889&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "-rHOeHtdWP", "title": "Wide Attention is the Way Forward for Transformers", "track": "main", "status": "Reject", "tldr": "Widening the attention layer in a Transformer and only using a single layer is surprisingly effective, with a number of advantages.", "abstract": "The Transformer is an extremely powerful and prominent deep learning architecture. In this work, we challenge the commonly held belief in deep learning that going deeper is better, and show an alternative design approach that is building wider attention Transformers. We demonstrate that wide single layer Transformer models can compete with or outperform deeper ones in a variety of Natural Language Processing (NLP) tasks when both are trained from scratch. The impact of changing the model aspect ratio on Transformers is then studied systematically. This ratio balances the number of layers and the number of attention heads per layer while keeping the total number of attention heads and all other hyperparameters constant. On average, across 4 NLP tasks and 10 attention types, single layer wide models perform 0.3% better than their deep counterparts. We show an in-depth evaluation and demonstrate how wide models require a far smaller memory footprint and can run faster on commodity hardware, in addition, these wider models are also more interpretable. For example, a single layer Transformer on the IMDb byte level text classification has 3.1x faster inference latency on a CPU than its equally accurate deeper counterpart, and is half the size. Our results suggest that the critical direction for building better Transformers for NLP is their width, and that their depth is less relevant.", "keywords": "transformer;attention;wide;deep;accuracy;latency;interpretability;xformer;size", "primary_area": "", "supplementary_material": "/attachment/bc97d253e13c55030484dd41c625446f878a7761.zip", "author": "Jason Ross Brown;Yiren Zhao;Ilia Shumailov;Robert D. Mullins", "authorids": "~Jason_Ross_Brown1;~Yiren_Zhao2;~Ilia_Shumailov1;~Robert_D._Mullins1", "gender": ";M;;", "homepage": ";https://aaronzhao.me;;", "dblp": ";https://dblp.uni-trier.de/pers/hd/z/Zhao:Yiren;;", "google_scholar": ";lOOmgEgAAAAJ;;", "orcid": ";;;", "linkedin": ";yiren-aaron-zhao-baa8b5116/;;", "or_profile": "~Jason_Ross_Brown1;~Yiren_Zhao2;~Ilia_Shumailov1;~Robert_D._Mullins1", "aff": ";Imperial College London;;", "aff_domain": ";ic.ac.uk;;", "position": ";Assistant Professor;;", "bibtex": "@misc{\nbrown2023wide,\ntitle={Wide Attention is the Way Forward for Transformers},\nauthor={Jason Ross Brown and Yiren Zhao and Ilia Shumailov and Robert D. Mullins},\nyear={2023},\nurl={https://openreview.net/forum?id=-rHOeHtdWP}\n}", "github": "", "project": "", "reviewers": "xaMf;MwhQ;Kk5h;RFvg", "site": "https://openreview.net/forum?id=-rHOeHtdWP", "pdf_size": 320364, "recommendation": "3;3;5;5", "confidence": "4;4;4;4", "correctness": "1;3;3;3", "technical_novelty": "1;2;2;1", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "71;88;49;142", "wc_strength_and_weaknesses": "258;275;68;150", "wc_clarity_quality_novelty_and_reproducibility": "37;10;39;41", "wc_summary_review": "14;46;27;49", "wc_review": "380;419;183;382", "wc_reply_reviewers": "15;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 1.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 87.5, 34.3693177121688 ], "wc_strength_and_weaknesses_avg": [ 187.75, 84.1319647934125 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.75, 12.636751956100111 ], "wc_summary_review_avg": [ 34.0, 14.300349646075091 ], "wc_review_avg": [ 341.0, 92.53377761660873 ], "wc_reply_reviewers_avg": [ 3.75, 6.49519052838329 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8984518346401341561&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "id": "-syx4GzWdTM", "title": "SpENCNN: Orchestrating Encoding and Sparsity for Fast Homomorphically Encrypted Neural Network Inference", "track": "main", "status": "Reject", "tldr": "", "abstract": "Homomorphic Encryption (HE) is a promising technology for protecting user's data privacy for Machine Learning as a Service (MLaaS) on public clouds. However, the computation overheads associated with the HE operations, which can be orders of magnitude slower than their counterparts for plaintexts, can lead to extremely high latency in neural network inference, seriously hindering its application in practice. While extensive neural network optimization techniques have been proposed, such as sparsification and pruning for plaintext domain, they cannot address this problem effectively. In this paper, we propose an HE-based CNN inference framework, i.e., SpENCNN, that can effectively exploit the single-instruction-multiple-data (SIMD) feature of the HE scheme to improve the CNN inference latency. In particular, we first develop a HE-group convolution technique that can partition channels among different groups based on the data size and ciphertext size, and then encode them into the same ciphertext in an interleaved manner, so as to dramatically reduce the bottlenecked operations in HE convolution. We further develop a sub-block weight pruning technique that can reduce more costly HE-operations for CNN convolutions. Our experiment results show that the SpENCNN-optimized CNN models can achieve overall speedups of 8.37x, 12.11x, and 19.26x for LeNet, VGG-5, and HEFNet, respectively, with negligible accuracy loss.", "keywords": "Cryptographic inference;model sparsity;data encoding", "primary_area": "", "supplementary_material": "", "author": "Ran Ran;Xinwei Luo;Wei Wang;Tao Liu;Gang Quan;Wujie Wen", "authorids": "~Ran_Ran2;~Xinwei_Luo1;~Wei_Wang70;~Tao_Liu3;~Gang_Quan1;~Wujie_Wen2", "gender": "M;M;M;;M;M", "homepage": ";;;;;https://www.lehigh.edu/~wuw219/", "dblp": ";;;;53/5678.html;70/11466.html", "google_scholar": "zjgo17YAAAAJ;;;;xP-U9_YAAAAJ;QKQrD1wAAAAJ", "orcid": ";;;;;", "linkedin": "ranran0523/;xinwei-luo-170987223;wei-wang-9b012026/;;;", "or_profile": "~Ran_Ran2;~Xinwei_Luo1;~Wei_Wang70;~Tao_Liu3;~Gang_Quan1;~Wujie_Wen2", "aff": "Lehigh University;Lehigh University;;;Florida International University;North Carolina State University", "aff_domain": "lehigh.edu;lehigh.edu;;;fiu.edu;ncsu.edu", "position": "PhD student;PhD student;;;Full Professor;Associate Professor", "bibtex": "@misc{\nran2023spencnn,\ntitle={Sp{ENCNN}: Orchestrating Encoding and Sparsity for Fast Homomorphically Encrypted Neural Network Inference},\nauthor={Ran Ran and Xinwei Luo and Wei Wang and Tao Liu and Gang Quan and Wujie Wen},\nyear={2023},\nurl={https://openreview.net/forum?id=-syx4GzWdTM}\n}", "github": "", "project": "", "reviewers": "WT91;CCNN;AXmR", "site": "https://openreview.net/forum?id=-syx4GzWdTM", "pdf_size": 800683, "recommendation": "5;5;5", "confidence": "3;3;5", "correctness": "4;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "4;4;3", "wc_summary_paper": "66;38;85", "wc_strength_and_weaknesses": "143;16;261", "wc_clarity_quality_novelty_and_reproducibility": "47;130;141", "wc_summary_review": "52;6;141", "wc_review": "308;190;628", "wc_reply_reviewers": "0;0;193", "wc_reply_authors": "730;74;1405", "reply_reviewers": "0;0;1", "reply_authors": "1;1;3", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 63.0, 19.30457631409368 ], "wc_strength_and_weaknesses_avg": [ 140.0, 100.04332394851076 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 106.0, 41.960298696108765 ], "wc_summary_review_avg": [ 66.33333333333333, 56.037685732212594 ], "wc_review_avg": [ 375.3333333333333, 185.04293796005533 ], "wc_reply_reviewers_avg": [ 64.33333333333333, 90.98107251266912 ], "wc_reply_authors_avg": [ 736.3333333333334, 543.396928793513 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18019730477984425585&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Lehigh University;Florida International University;North Carolina State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.lehigh.edu;https://www.fiu.edu;https://www.ncsu.edu", "aff_unique_abbr": "Lehigh;FIU;NCSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Temporal Coherent Test Time Optimization for Robust Video Classification", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11570", "id": "-t4D61w4zvQ", "poster": "", "openreview": "https://openreview.net/forum?id=-t4D61w4zvQ", "slides": "https://iclr.cc/virtual/2023/poster/11570", "video": "https://iclr.cc/virtual/2023/poster/11570", "author_site": "Chenyu Yi, SIYUAN YANG, Yufei Wang, Haoliang Li, Yap-peng Tan, Alex Kot", "tldr": "", "abstract": "Deep neural networks are likely to fail when the test data is corrupted in real-world deployment (e.g., blur, weather, etc.). Test-time optimization is an effective way that adapts models to generalize to corrupted data during testing, which has been shown in the image domain. However, the techniques for improving video classification corruption robustness remain few. In this work, we propose a Temporal Coherent Test-time Optimization framework (TeCo) to utilize spatio-temporal information in test-time optimization for robust video classification. To exploit information in video with self-supervised learning, TeCo minimizes the entropy of the prediction based on the global content from video clips. Meanwhile, it also feeds local content to regularize the temporal coherence at the feature level. TeCo retains the generalization ability of various video classification models and achieves significant improvements in corruption robustness across Mini Kinetics-C and Mini SSV2-C. Furthermore, TeCo sets a new baseline in video classification corruption robustness via test-time optimization. ", "keywords": "Video Classification;Robustness;Test Time Optimization", "primary_area": "", "supplementary_material": "/attachment/44d7453a968519a4a2bf104d48280688760e4f99.zip", "author": "Chenyu Yi;SIYUAN YANG;Yufei Wang;Haoliang Li;Yap-peng Tan;Alex Kot", "authorids": "~Chenyu_Yi1;~SIYUAN_YANG1;~Yufei_Wang5;~Haoliang_Li2;~Yap-peng_Tan1;~Alex_Kot1", "gender": ";M;M;;M;", "homepage": ";;https://github.com/wyf0912/;;https://personal.ntu.edu.sg/eyptan/;https://www.ntu.edu.sg/home/eackot/", "dblp": ";201/7699-1.html;;;93/4472.html;", "google_scholar": ";lzLsF2MAAAAJ;jLd1l_sAAAAJ;;https://scholar.google.com.sg/citations?user=t9EqYQIAAAAJ;", "orcid": "0000-0001-5002-6549;0000-0003-4681-0431;;;0000-0002-0645-9109;", "linkedin": ";;;;;", "or_profile": "~Chenyu_Yi1;~SIYUAN_YANG1;~Yufei_Wang5;~Haoliang_Li2;~Yap-peng_Tan1;~Alex_Kot1", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;;ntu.edu.sg;ntu.edu.sg", "position": "PhD student;PhD student;PhD student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyi2023temporal,\ntitle={Temporal Coherent Test Time Optimization for Robust Video Classification},\nauthor={Chenyu Yi and SIYUAN YANG and Yufei Wang and Haoliang Li and Yap-peng Tan and Alex Kot},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-t4D61w4zvQ}\n}", "github": "", "project": "", "reviewers": "bEhU;rshF;naa6", "pdf_size": 1619274, "recommendation": "6;6;6", "confidence": "4;4;2", "correctness": "3;3;4", "technical_novelty": "3;2;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "72;59;73", "wc_strength_and_weaknesses": "112;338;113", "wc_clarity_quality_novelty_and_reproducibility": "13;46;291", "wc_summary_review": "28;146;99", "wc_review": "225;589;576", "wc_reply_reviewers": "32;11;50", "wc_reply_authors": "203;1324;685", "reply_reviewers": "1;1;1", "reply_authors": "2;3;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 68.0, 6.377042156569663 ], "wc_strength_and_weaknesses_avg": [ 187.66666666666666, 106.30250336761699 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 116.66666666666667, 124.0062722428005 ], "wc_summary_review_avg": [ 91.0, 48.5042953424402 ], "wc_review_avg": [ 463.3333333333333, 168.61066263897888 ], "wc_reply_reviewers_avg": [ 31.0, 15.937377450509228 ], "wc_reply_authors_avg": [ 737.3333333333334, 459.14001737547943 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=712239658202817336&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=-t4D61w4zvQ", "email": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;;ntu.edu.sg;ntu.edu.sg", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Singapore" }, { "id": "-tYCaP0phY_", "title": "FlexRound: Learnable Rounding by Element-wise Division for Post-Training Quantization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Post-training Quantization (PTQ) has been gaining popularity for the deployment of deep neural networks on resource-limited devices since unlike quantization-aware training, neither a full training dataset nor end-to-end training is required at all. As PTQ schemes based on reconstructing each layer or block output turn out to be effective to enhance quantized model performance, recent works have developed algorithms to devise and learn a new weight-rounding scheme so as to better reconstruct each layer or block output. We notice that, however, such new rounding schemes are established on element-wise addition. In this work, we propose a simple yet effective new rounding mechanism for PTQ, coined FlexRound, via element-wise division to learn not only a common quantization grid size but also a different scale for each pre-trained weight. Thanks to the reciprocal rule of derivatives induced by element-wise division, FlexRound is inherently able to exploit the importance of a pre-trained weight when updating its corresponding scale, and thus, flexibly quantize a pre-trained weight depending on its own importance. We empirically validate the efficacy of FlexRound on a wide range of models and tasks. To the best of our knowledge, our work is the first to carry out comprehensive experiments on image classification, natural language understanding, and natural language generation in the per-tensor uniform PTQ setting. Our code will be open-sourced soon.", "keywords": "Efficient Inference;Quantization;Post-Training Quantization", "primary_area": "", "supplementary_material": "/attachment/c11e81420bd8ffabd59decec683a18158ed5ccc0.zip", "author": "Jung Hyun Lee;Jeonghoon Kim;Se Jung Kwon;Dongsoo Lee", "authorids": "~Jung_Hyun_Lee1;~Jeonghoon_Kim1;~Se_Jung_Kwon1;~Dongsoo_Lee1", "gender": "M;M;M;M", "homepage": ";;;", "dblp": "132/2899;;119/5676;11/9680", "google_scholar": ";https://scholar.google.com/citations?hl=ko;https://scholar.google.co.kr/citations?user=8eTxKOkAAAAJ;ALiieEkAAAAJ", "orcid": ";0000-0002-6068-6476;;", "linkedin": ";jeonghoon-kim-804892175/;se-jung-kwon-305503175/;", "or_profile": "~Jung_Hyun_Lee1;~Jeonghoon_Kim1;~Se_Jung_Kwon1;~Dongsoo_Lee1", "aff": "NAVER CLOVA;NAVER;NAVER Cloud;NAVER CLOVA", "aff_domain": "navercorp.com;navercorp.com;navercorp.com;navercorp.com", "position": "Researcher;Researcher;AI Researcher;Executive Officer", "bibtex": "@misc{\nlee2023flexround,\ntitle={FlexRound: Learnable Rounding by Element-wise Division for Post-Training Quantization},\nauthor={Jung Hyun Lee and Jeonghoon Kim and Se Jung Kwon and Dongsoo Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=-tYCaP0phY_}\n}", "github": "", "project": "", "reviewers": "y3vc;vGda;mQzD;oCmz", "site": "https://openreview.net/forum?id=-tYCaP0phY_", "pdf_size": 705378, "recommendation": "5;5;5;6", "confidence": "3;3;4;4", "correctness": "3;3;3;3", "technical_novelty": "3;2;3;2", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "143;39;66;74", "wc_strength_and_weaknesses": "129;62;377;585", "wc_clarity_quality_novelty_and_reproducibility": "256;92;3;78", "wc_summary_review": "91;26;97;63", "wc_review": "619;219;543;800", "wc_reply_reviewers": "0;113;0;159", "wc_reply_authors": "594;563;584;1354", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;3", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 80.5, 38.34383914007569 ], "wc_strength_and_weaknesses_avg": [ 288.25, 207.65762085702514 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 107.25, 92.3075701120986 ], "wc_summary_review_avg": [ 69.25, 28.07467720206236 ], "wc_review_avg": [ 545.25, 210.22651474064827 ], "wc_reply_reviewers_avg": [ 68.0, 69.9178088901533 ], "wc_reply_authors_avg": [ 773.75, 335.19425338152803 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FfcFToOW764J:scholar.google.com/&scioq=FlexRound:+Learnable+Rounding+by+Element-wise+Division+for+Post-Training+Quantization&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "NAVER Corporation", "aff_unique_dep": "CLOVA", "aff_unique_url": "https://www.naver.com", "aff_unique_abbr": "NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Towards Lightweight, Model-Agnostic and Diversity-Aware Active Anomaly Detection", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11672", "id": "-vKlt84fHs", "poster": "/media/PosterPDFs/ICLR%202023/11672.png?t=1681115423.3385198", "openreview": "https://openreview.net/forum?id=-vKlt84fHs", "slides": "https://iclr.cc/virtual/2023/poster/11672", "video": "https://iclr.cc/virtual/2023/poster/11672", "author_site": "Xu Zhang, Yuan Zhao, Ziang Cui, Liqun Li, Shilin He, Qingwei Lin, Yingnong Dang, Saravan Rajmohan, Dongmei Zhang", "tldr": "", "abstract": "Active Anomaly Discovery (AAD) is flourishing in the anomaly detection research area, which aims to incorporate analysts\u2019 feedback into unsupervised anomaly detectors. However, existing AAD approaches usually prioritize the samples with the highest anomaly scores for user labeling, which hinders the exploration of anomalies that were initially ranked lower. Besides, most existing AAD approaches are specially tailored for a certain unsupervised detector, making it difficult to extend to other detection models. To tackle these problems, we propose a lightweight, model-agnostic and diversity-aware AAD method, named LMADA. In LMADA, we design a diversity-aware sample selector powered by Determinantal Point Process (DPP). It considers the diversity of samples in addition to their anomaly scores for feedback querying. Furthermore, we propose a model-agnostic tuner. It approximates diverse unsupervised detectors with a unified proxy model, based on which the feedback information is incorporated by a lightweight non-linear representation adjuster. Through extensive experiments on 8 public datasets, LMADA achieved 74% F1-Score improvement on average, outperforming other comparative AAD approaches. Besides, LMADA can also achieve significant performance boosting under any unsupervised detectors.", "keywords": "Active Anomaly Discovery;Diversity Sampling;Deep Learning", "primary_area": "", "supplementary_material": "", "author": "Xu Zhang;Yuan Zhao;Ziang Cui;Liqun Li;Shilin He;Qingwei Lin;Yingnong Dang;Saravan Rajmohan;Dongmei Zhang", "authorids": "~Xu_Zhang14;zhaoyuan@stu.pku.edu.cn;ziang_cui@seu.edu.cn;liqun.li@microsoft.com;~Shilin_He1;~Qingwei_Lin1;dang.yingnong@microsoft.com;saravanakumar.rajmohan@outlook.com;~Dongmei_Zhang2", "gender": "M;;;;M;M;;;", "homepage": ";;;;https://shilinhe.github.io/;https://www.microsoft.com/en-us/research/people/qlin/;;;https://www.microsoft.com/en-us/research/people/dongmeiz/", "dblp": ";;;;;120/0743;;;87/461-1", "google_scholar": "bqXdMMMAAAAJ;;;;;https://scholar.google.co.jp/citations?hl=zh-CN;;;jLlBBl4AAAAJ", "orcid": ";;;;;0000-0003-2559-2383;;;0000-0002-9230-2799", "linkedin": ";;;;;;;;dongmei-zhang-38a86317/", "or_profile": "~Xu_Zhang14;zhaoyuan@stu.pku.edu.cn;ziang_cui@seu.edu.cn;liqun.li@microsoft.com;~Shilin_He1;~Qingwei_Lin1;dang.yingnong@microsoft.com;saravanakumar.rajmohan@outlook.com;~Dongmei_Zhang2", "aff": ";;;;Microsoft Research;Microsoft Research;;;Microsoft", "aff_domain": ";;;;microsoft.com;microsoft.com;;;microsoft.com", "position": ";;;;Researcher;Sr. Principal Researcher;;;Assistant Managing Director, Microsoft Research Asia", "bibtex": "@inproceedings{\nzhang2023towards,\ntitle={Towards Lightweight, Model-Agnostic and Diversity-Aware Active Anomaly Detection},\nauthor={Xu Zhang and Yuan Zhao and Ziang Cui and Liqun Li and Shilin He and Qingwei Lin and Yingnong Dang and Saravan Rajmohan and Dongmei Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-vKlt84fHs}\n}", "github": "", "project": "", "reviewers": "SZBd;NYZD;NuRo;6kFK", "pdf_size": 791625, "recommendation": "5;5;6;8", "confidence": "4;3;3;3", "correctness": "3;2;2;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "50;63;119;65", "wc_strength_and_weaknesses": "301;431;334;145", "wc_clarity_quality_novelty_and_reproducibility": "43;60;77;51", "wc_summary_review": "30;48;45;43", "wc_review": "424;602;575;304", "wc_reply_reviewers": "0;122;158;0", "wc_reply_authors": "487;825;513;73", "reply_reviewers": "0;1;2;0", "reply_authors": "1;2;2;1", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 74.25, 26.47050245084139 ], "wc_strength_and_weaknesses_avg": [ 302.75, 102.85031599368084 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.75, 12.636751956100111 ], "wc_summary_review_avg": [ 41.5, 6.87386354243376 ], "wc_review_avg": [ 476.25, 120.37934831190938 ], "wc_reply_reviewers_avg": [ 70.0, 71.14773362518304 ], "wc_reply_authors_avg": [ 474.5, 267.2503507949054 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.4714045207910316, "corr_recommendation_correctness": 0.7385489458759963, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8213107763470182424&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=-vKlt84fHs", "email": ";;;;microsoft.com;microsoft.com;;;microsoft.com", "author_num": 9, "aff_unique_index": "0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Research", "aff_unique_url": "https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "-wDaB590pkt", "title": "Coarse-to-fine Knowledge Graph Domain Adaptation based on Distantly-supervised Iterative Training", "track": "main", "status": "Reject", "tldr": "", "abstract": "Modern supervised learning neural network models require a large amount of manually labeled data, which makes the construction of domain-specific knowledge graphs time-consuming and labor-intensive. In parallel, although there has been much research on named entity recognition and relation extraction based on distantly supervised learning, constructing a domain-specific knowledge graph from large collections of textual data without manual annotations is still an urgent problem to be solved. In response, we propose an integrated framework for adapting and re-learning knowledge graphs from one coarse domain (biomedical) to a finer-define domain (oncology). In this framework, we apply distant-supervision on cross-domain knowledge graph adaptation. Consequently, no manual data annotation is required to train the model. We introduce a novel iterative training strategy to facilitate the discovery of domain-specific named entities and triples. Experimental results indicate that the proposed framework can perform domain adaptation and construction of knowledge graph efficiently.", "keywords": "Knowledge Graph Domain Adaptation;Knowledge Graph Construction;Named Entity Recognition;Relationship Extraction", "primary_area": "", "supplementary_material": "/attachment/3d15da653a15805c9a3da871487d9d68c1f79350.zip", "author": "Hongmin Cai;Wenxiong Liao;Zhengliang Liu;Yuzhong Chen;Tianming Liu;Xiang Li", "authorids": "~Hongmin_Cai1;~Wenxiong_Liao1;~Zhengliang_Liu1;~Yuzhong_Chen1;~Tianming_Liu4;~Xiang_Li14", "gender": "M;M;M;M;M;M", "homepage": "http://www2.scut.edu.cn/bioinformatics/;;;;https://xiangli-shaun.github.io/;https://cobweb.cs.uga.edu/~tliu/", "dblp": "50/3384;;;32/3775.html;;96/5013-1.html", "google_scholar": "https://scholar.google.com/citations?hl=en;gFZZfEgAAAAJ;p8tAM0AAAAAJ;;MjkwwiQAAAAJ;92RPXm0AAAAJ", "orcid": "0000-0002-2747-7234;;0000-0001-7061-6714;;0000-0002-9851-6376;", "linkedin": ";;;;xiang-shaun-li-11b2b99/;", "or_profile": "~Hongmin_Cai1;~Wenxiong_Liao1;~Zhengliang_Liu1;~Yuzhong_Chen1;~Xiang_Li14;~Tianming_Liu3", "aff": "South China University of Technology;South China University of Technology;University of Georgia;;Massachusetts General Hospital, Harvard University;University of Georgia", "aff_domain": "scut.edu.cn;scut.edu.cn;uga.edu;;mgh.harvard.edu;uga.edu", "position": "Full Professor;PhD student;PhD student;;Instructor;Professor", "bibtex": "@misc{\ncai2023coarsetofine,\ntitle={Coarse-to-fine Knowledge Graph Domain Adaptation based on Distantly-supervised Iterative Training},\nauthor={Hongmin Cai and Wenxiong Liao and Zhengliang Liu and Yuzhong Chen and Tianming Liu and Xiang Li},\nyear={2023},\nurl={https://openreview.net/forum?id=-wDaB590pkt}\n}", "github": "", "project": "", "reviewers": "8yCf;KkJe;hrcf;cH5Y", "site": "https://openreview.net/forum?id=-wDaB590pkt", "pdf_size": 690739, "recommendation": "1;3;3;3", "confidence": "3;5;3;4", "correctness": "2;3;3;2", "technical_novelty": "1;1;1;1", "empirical_novelty": "1;1;1;1", "wc_summary_paper": "62;116;90;34", "wc_strength_and_weaknesses": "168;130;40;96", "wc_clarity_quality_novelty_and_reproducibility": "22;2;25;21", "wc_summary_review": "36;2;16;52", "wc_review": "288;250;171;203", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 1.0, 0.0 ], "empirical_novelty_avg": [ 1.0, 0.0 ], "wc_summary_paper_avg": [ 75.5, 30.63902739970706 ], "wc_strength_and_weaknesses_avg": [ 108.5, 47.039876700518676 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 17.5, 9.069178573608527 ], "wc_summary_review_avg": [ 26.5, 19.04599695474091 ], "wc_review_avg": [ 228.0, 44.603811496328426 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5222329678670935, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7074277190037951901&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;1;2;1", "aff_unique_norm": "South China University of Technology;University of Georgia;Harvard University", "aff_unique_dep": ";;Massachusetts General Hospital", "aff_unique_url": "https://www.scut.edu.cn;https://www.uga.edu;https://www.harvard.edu", "aff_unique_abbr": "SCUT;UGA;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;1", "aff_country_unique": "China;United States" }, { "id": "-x5WuMO4APy", "title": "FastDiff 2: Dually Incorporating GANs into Diffusion Models for High-Quality Speech Synthesis", "track": "main", "status": "Withdraw", "tldr": "We propose FastDiff 2, a conditional diffusion model to trade off diversity for quality and speed by incorporating GANs into diffusion models.", "abstract": "FastDiff, as a class of denoising probabilistic models, has recently achieved impressive performances in speech synthesis. It utilizes a noise predictor to learn a tight inference schedule for skipping denoising steps. Despite the successful speedup of FastDiff, there is still room for improvements, e.g., further optimizing the speed-quality trade-off and accelerating DDPMs training procedures. After analyzing GANs and diffusion models in conditional speech synthesis, we find that: GANs produce samples but do not cover the whole distribution, and the coverage degree does not distinctly impact audio quality. Inspired by these observations, we propose to trade off diversity for quality and speed by incorporating GANs into diffusion models, introducing two GAN-empowered modeling perspectives: (1) FastDiff 2 (Diff-GAN), whose denoising distribution is parametrized by conditional GANs; and (2) FastDiff 2 (GAN-Diff), in which the denoising model is treated as a generator in GAN for adversarial training. Unlike the acceleration methods based on skipping the denoising steps, FastDiff 2 provides a principled way to speed up both the training and inference processes. Experimental results demonstrate that both variants of FastDiff 2 enjoy an efficient 4-step sampling process as in FastDiff yet demonstrate a superior sample quality. Audio samples are available at https://FastDiff2.github.io/.", "keywords": "Speech synthesis;Neural vocoder;Diffusion probabilistic model;Generative adversarial network", "primary_area": "", "supplementary_material": "", "author": "Rongjie Huang;Yi Ren;Jinglin Liu;Luping Liu;Zhou Zhao", "authorids": "~Rongjie_Huang1;~Yi_Ren2;~Jinglin_Liu1;~Luping_Liu2;~Zhou_Zhao2", "gender": "M;M;M;;M", "homepage": ";https://rayeren.github.io/;;;https://dblp.uni-trier.de/pid/75/7785.html?", "dblp": "212/8936-1;75/6568-6;;;75/7785", "google_scholar": "iRHBUsgAAAAJ;4FA6C0AAAAAJ;Ri8x0jEAAAAJ;;https://scholar.google.com.hk/citations?user=IIoFY90AAAAJ", "orcid": ";;;;0000-0001-6121-0384", "linkedin": ";;;;", "or_profile": "~Rongjie_Huang1;~Yi_Ren2;~Jinglin_Liu1;~Luping_Liu2;~Zhou_Zhao2", "aff": "Zhejiang University;ByteDance;Zhejiang University;;Zhejiang University", "aff_domain": "zju.edu.cn;bytedance.com;zju.edu.cn;;zju.edu.cn", "position": "MS student;Researcher;MS student;;Associate Professor", "bibtex": "@misc{\nhuang2023fastdiff,\ntitle={FastDiff 2: Dually Incorporating {GAN}s into Diffusion Models for High-Quality Speech Synthesis},\nauthor={Rongjie Huang and Yi Ren and Jinglin Liu and Luping Liu and Zhou Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=-x5WuMO4APy}\n}", "github": "", "project": "", "reviewers": "VrnD;qgcu;NBcb;8mmY", "site": "https://openreview.net/forum?id=-x5WuMO4APy", "pdf_size": 787153, "recommendation": "3;3;5;6", "confidence": "5;2;4;3", "correctness": "2;2;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "17;49;78;104", "wc_strength_and_weaknesses": "30;12;393;110", "wc_clarity_quality_novelty_and_reproducibility": "675;29;89;26", "wc_summary_review": "48;18;50;34", "wc_review": "770;108;610;274", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 62.0, 32.45766473423496 ], "wc_strength_and_weaknesses_avg": [ 136.25, 152.75531905632616 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 204.75, 272.65947168583745 ], "wc_summary_review_avg": [ 37.5, 12.835497652993435 ], "wc_review_avg": [ 440.5, 262.47809432407877 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.08606629658238704, "corr_recommendation_correctness": 0.9622504486493761, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1uMLnvxFJGsJ:scholar.google.com/&scioq=FastDiff+2:+Dually+Incorporating+GANs+into+Diffusion+Models+for+High-Quality+Speech+Synthesis&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Zhejiang University;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.bytedance.com", "aff_unique_abbr": "ZJU;ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "-yqNb_CxRr", "title": "REST: REtrieve & Self-Train for generative action recognition", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "This work is on training a generative action/video recognition model whose output is a free-form action-specific caption describing the video (rather than an action class label). A generative approach has practical advantages like producing more fine-grained and human-readable output, and being naturally open-world. To this end, we propose to adapt a pre-trained generative Vision & Language (V&L) Foundation Model for video/action recognition. While recently there have been a few attempts to adapt V&L models trained with contrastive learning (e.g. CLIP) for video/action, to the best of our knowledge, we propose the very first method that sets outs to accomplish this goal for a generative model. We firstly show that direct fine-tuning of a generative model to produce action classes suffers from severe overfitting. To alleviate this, we introduce REST, a training framework consisting of two key components: an unsupervised method for adapting the generative model to action/video by means of pseudo-caption generation and Self-training, i.e. without using any action-specific labels; (b) a Retrieval approach based on CLIP for discovering a diverse set of pseudo-captions for each video to train the model. Importantly, we show that both components are necessary to obtain high accuracy. We evaluate REST on the problem of zero-shot action recognition where we show that our approach is very competitive when compared to contrastive learning-based methods. Code will be made available.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Adrian Bulat;Enrique Sanchez;Brais Martinez;Georgios Tzimiropoulos", "authorids": "~Adrian_Bulat1;~Enrique_Sanchez1;~Brais_Martinez3;~Georgios_Tzimiropoulos1", "gender": ";M;M;M", "homepage": "https://www.adrianbulat.com;http://www.braismartinez.org/;https://ytzimiro.github.io/;https://esanchezlozano.github.io", "dblp": "185/6878;14/111;03/3273;119/1475", "google_scholar": "https://scholar.google.co.uk/citations?user=5sKcsg0AAAAJ;https://scholar.google.co.uk/citations?user=-62MApgAAAAJ;https://scholar.google.co.uk/citations?user=D4JkWxf-8fwC;https://scholar.google.co.uk/citations?user=VLIQpIYAAAAJ", "orcid": "0000-0002-3185-4979;;;0000-0003-0196-922X", "linkedin": ";;;enrique-sanchez-lozano/", "or_profile": "~Adrian_Bulat1;~Brais_Martinez3;~Georgios_Tzimiropoulos1;~Enrique_S\u00e1nchez-Lozano1", "aff": "Samsung AI Center Cambridge;Samsung;Queen Mary University London;Samsung AI Center", "aff_domain": "samsung.com;samsung.com;qmul.ac.uk;samsung.com", "position": "Research Scientist;Samsung AI Center;Associate Professor;Researcher", "bibtex": "@misc{\nbulat2023rest,\ntitle={{REST}: {RE}trieve \\& Self-Train for generative action recognition},\nauthor={Adrian Bulat and Enrique Sanchez and Brais Martinez and Georgios Tzimiropoulos},\nyear={2023},\nurl={https://openreview.net/forum?id=-yqNb_CxRr}\n}", "github": "", "project": "", "reviewers": "VoCv;wo3E;UrMu;xKKd", "site": "https://openreview.net/forum?id=-yqNb_CxRr", "pdf_size": 7037933, "recommendation": "3;5;5;5", "confidence": "4;4;4;3", "correctness": "3;4;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;0;2;3", "wc_summary_paper": "81;46;90;59", "wc_strength_and_weaknesses": "974;297;165;66", "wc_clarity_quality_novelty_and_reproducibility": "128;36;32;30", "wc_summary_review": "196;45;64;14", "wc_review": "1379;424;351;169", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "23;363;466;195", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;2;2", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 69.0, 17.421251390184345 ], "wc_strength_and_weaknesses_avg": [ 375.5, 355.1284978708411 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.5, 41.33702940463913 ], "wc_summary_review_avg": [ 79.75, 69.44917206129962 ], "wc_review_avg": [ 580.75, 470.132095798617 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 261.75, 168.39444022888642 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16431095515888118873&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Samsung;Queen Mary University of London", "aff_unique_dep": "AI Center;", "aff_unique_url": "https://www.samsung.com/global/innovation/ai-research/;https://www.qmul.ac.uk", "aff_unique_abbr": "SAC;QMUL", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Cambridge;;London", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United Kingdom;South Korea" }, { "id": "-z7O7fk_Cs", "title": "Invertible normalizing flow neural networks by JKO scheme", "track": "main", "status": "Reject", "tldr": "We propose JKO-Flow to train normalizing flow neural ODE model block-wise with time reparametrization, and experimentally show JKO-Flow reaches competitive performance while greatly reduce computation", "abstract": "Normalizing flow is a class of deep generative models for efficient sampling and density estimation. In practice, the flow often appears as a chain of invertible neural network blocks. To facilitate training, past works have regularized flow trajectories and designed special network architectures. The current paper develops a neural ODE flow network inspired by the Jordan-Kinderleherer-Otto (JKO) scheme, which allows an efficient \\textit{block-wise} training procedure: as the JKO scheme unfolds the dynamic of gradient flow, the proposed model naturally stacks residual network blocks one-by-one and reduces the memory load as well as the difficulty of training deep networks. We also develop an adaptive time-reparametrization of the flow network with a progressive refinement of the trajectory in probability space, which improves the optimization efficiency and model accuracy in practice. \nOn high-dimensional generative tasks for tabular data, JKO-Flow can process larger data batches and perform competitively as or better than continuous and discrete flow models, using 10X less number of iterations (e.g., batches) and significantly less time per iteration. ", "keywords": "Normalizing flow;invertible neural networks;JKO scheme", "primary_area": "", "supplementary_material": "/attachment/e771e0430b6e15756d82da5c103457c92a39e54b.zip", "author": "Chen Xu;Xiuyuan Cheng;Yao Xie", "authorids": "~Chen_Xu12;~Xiuyuan_Cheng1;~Yao_Xie2", "gender": "M;;F", "homepage": "https://hamrel-cxu.github.io/;;http://www2.isye.gatech.edu/~yxie77", "dblp": ";79/9747;13/4242-2", "google_scholar": "https://scholar.google.com/citations?hl=en;I2gwdssAAAAJ;qvYp8ZQAAAAJ", "orcid": ";;", "linkedin": "chen-xu-92013714a/;;yaoxie/", "or_profile": "~Chen_Xu12;~Xiuyuan_Cheng1;~Yao_Xie2", "aff": "Georgia Institute of Technology;Duke University;Georgia Institute of Technology", "aff_domain": "gatech.edu;duke.edu;gatech.edu", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@misc{\nxu2023invertible,\ntitle={Invertible normalizing flow neural networks by {JKO} scheme},\nauthor={Chen Xu and Xiuyuan Cheng and Yao Xie},\nyear={2023},\nurl={https://openreview.net/forum?id=-z7O7fk_Cs}\n}", "github": "", "project": "", "reviewers": "tb5k;y2DU;ffZu;T8Fn", "site": "https://openreview.net/forum?id=-z7O7fk_Cs", "pdf_size": 5930575, "recommendation": "3;5;5;6", "confidence": "5;4;4;3", "correctness": "4;3;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "15;80;103;113", "wc_strength_and_weaknesses": "260;532;565;142", "wc_clarity_quality_novelty_and_reproducibility": "11;85;30;463", "wc_summary_review": "2;42;29;47", "wc_review": "288;739;727;765", "wc_reply_reviewers": "0;38;93;231", "wc_reply_authors": "111;609;353;569", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 77.75, 38.15347297429161 ], "wc_strength_and_weaknesses_avg": [ 374.75, 179.06894621904715 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 147.25, 184.31274372652587 ], "wc_summary_review_avg": [ 30.0, 17.449928366615147 ], "wc_review_avg": [ 629.75, 197.78697505144265 ], "wc_reply_reviewers_avg": [ 90.5, 87.59708899272852 ], "wc_reply_authors_avg": [ 410.5, 198.45087553346798 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9733285267845754, "corr_recommendation_correctness": -0.9271726499455306, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7381887434881140274&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Georgia Institute of Technology;Duke University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.duke.edu", "aff_unique_abbr": "Georgia Tech;Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "-z911HH4RFv", "title": "Adversarial Learned Fair Representations using Dampening and Stacking", "track": "main", "status": "Reject", "tldr": "", "abstract": "As more decisions in our daily life become automated, the need to have machine learning algorithms that make fair decisions increases. In fair representation learning we are tasked with finding a suitable representation of the data in which a sensitive variable is censored. Recent work aims to learn fair representations through adversarial learning. This paper builds upon this work by introducing a novel algorithm which uses dampening and stacking to learn adversarial fair representations. Results show that that our algorithm improves upon earlier work in both censoring and reconstruction.", "keywords": "Machine Learning;Deep Learning;Fairness;Adversarial Learning;Fair Representation Learning", "primary_area": "", "supplementary_material": "/attachment/97316d1d8d9dcba6e4d884e4f877d29ffab14744.zip", "author": "Max Knobbout", "authorids": "~Max_Knobbout1", "gender": "M", "homepage": "https://knobbout.com/", "dblp": "63/10481.html", "google_scholar": "https://scholar.google.nl/citations?user=GJnM-Y0AAAAJ", "orcid": "0009-0006-0918-0441", "linkedin": "max-knobbout-670354101/", "or_profile": "~Max_Knobbout1", "aff": "", "aff_domain": "", "position": "", "bibtex": "@misc{\nknobbout2023adversarial,\ntitle={Adversarial Learned Fair Representations using Dampening and Stacking},\nauthor={Max Knobbout},\nyear={2023},\nurl={https://openreview.net/forum?id=-z911HH4RFv}\n}", "github": "", "project": "", "reviewers": "kwp8;4DEf;Hobp", "site": "https://openreview.net/forum?id=-z911HH4RFv", "pdf_size": 310277, "recommendation": "3;3;5", "confidence": "4;3;2", "correctness": "2;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "1;1;2", "wc_summary_paper": "168;153;156", "wc_strength_and_weaknesses": "293;368;88", "wc_clarity_quality_novelty_and_reproducibility": "23;35;72", "wc_summary_review": "22;28;50", "wc_review": "506;584;366", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_summary_paper_avg": [ 159.0, 6.48074069840786 ], "wc_strength_and_weaknesses_avg": [ 249.66666666666666, 118.34506984051156 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.333333333333336, 20.8539897594894 ], "wc_summary_review_avg": [ 33.333333333333336, 12.036980056845191 ], "wc_review_avg": [ 485.3333333333333, 90.18992306362293 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": 0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3931609547328945163&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3 }, { "title": "Linear Convergence of Natural Policy Gradient Methods with Log-Linear Policies", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12146", "id": "-z9hdsyUwVQ", "poster": "", "openreview": "https://openreview.net/forum?id=-z9hdsyUwVQ", "slides": "https://iclr.cc/virtual/2023/poster/12146", "video": "https://iclr.cc/virtual/2023/poster/12146", "author_site": "Rui Yuan, Simon Du, Robert M. Gower, Alessandro Lazaric, Lin Xiao", "tldr": "We show linear convergence of natural policy gradient methods with log-linear policies without any regularization.", "abstract": "We consider infinite-horizon discounted Markov decision processes and study the convergence rates of the natural policy gradient (NPG) and the Q-NPG methods with the log-linear policy class. Using the compatible function approximation framework, both methods with log-linear policies can be written as approximate versions of the policy mirror descent (PMD) method. We show that both methods attain linear convergence rates and $\\tilde{\\mathcal{O}}(1/\\epsilon^2)$ sample complexities using a simple, non-adaptive geometrically increasing step size, without resorting to entropy or other strongly convex regularization. Lastly, as a byproduct, we obtain sublinear convergence rates for both methods with arbitrary constant step size.", "keywords": "Discounted Markov decision process;natural policy gradient;policy mirror descent;log-linear policy;sample complexity", "primary_area": "", "supplementary_material": "", "author": "Rui Yuan;Simon Shaolei Du;Robert M. Gower;Alessandro Lazaric;Lin Xiao", "authorids": "~Rui_Yuan1;~Simon_Shaolei_Du1;~Robert_M._Gower1;~Alessandro_Lazaric2;~Lin_Xiao1", "gender": "M;M;M;M;", "homepage": "https://rui-yuan91.github.io/;http://simonshaoleidu.com;https://gowerrobert.github.io/;;", "dblp": ";176/5602;143/0056;36/321;", "google_scholar": "4QZgrj0AAAAJ;OttawxUAAAAJ;okKw87MAAAAJ;6JZ3R6wAAAAJ;vK0-CDcAAAAJ", "orcid": "0000-0002-1768-9639;;;;0000-0002-9759-3898", "linkedin": "rui-yuan-phd-55135537/;;;;", "or_profile": "~Rui_Yuan1;~Simon_Shaolei_Du1;~Robert_M._Gower1;~Alessandro_Lazaric2;~Lin_Xiao1", "aff": "T\u00e9l\u00e9com Paris;Meta Facebook;Flatiron Institute;Meta Facebook;Meta Facebook", "aff_domain": "telecom-paristech.fr;fb.com;simonsfoundation.org;fb.com;meta.com", "position": "PhD student;Visiting Professor;Researcher;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nyuan2023linear,\ntitle={Linear Convergence of Natural Policy Gradient Methods with Log-Linear Policies},\nauthor={Rui Yuan and Simon Shaolei Du and Robert M. Gower and Alessandro Lazaric and Lin Xiao},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=-z9hdsyUwVQ}\n}", "github": "", "project": "", "reviewers": "sgtm;NuUP;Cu4x;yaHy", "pdf_size": 611132, "recommendation": "5;6;6;8", "confidence": "3;4;4;4", "correctness": "4;4;4;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "0;0;0;0", "wc_summary_paper": "57;80;122;71", "wc_strength_and_weaknesses": "98;135;175;320", "wc_clarity_quality_novelty_and_reproducibility": "17;22;17;259", "wc_summary_review": "68;110;66;122", "wc_review": "240;347;380;772", "wc_reply_reviewers": "0;50;0;0", "wc_reply_authors": "29;123;57;1299", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;3", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 82.5, 24.23324163210527 ], "wc_strength_and_weaknesses_avg": [ 182.0, 84.19916864197651 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 78.75, 104.08740317636904 ], "wc_summary_review_avg": [ 91.5, 24.8746859276655 ], "wc_review_avg": [ 434.75, 201.47130689008796 ], "wc_reply_reviewers_avg": [ 12.5, 21.650635094610966 ], "wc_reply_authors_avg": [ 377.0, 533.4097861869428 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.6622661785325219, "corr_recommendation_correctness": 0.0, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14548058201592938918&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=-z9hdsyUwVQ", "email": "telecom-paristech.fr;fb.com;simonsfoundation.org;fb.com;meta.com", "author_num": 5, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "T\u00e9l\u00e9com Paris;Meta;Flatiron Institute", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.telecom-paris.fr;https://meta.com;https://flatironinstitute.org", "aff_unique_abbr": "T\u00e9l\u00e9com Paris;Meta;Flatiron", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "France;United States" }, { "id": "-zw8zmeIt7M", "title": "Efficiently Meta-Learning for Robust Deep Networks without Prior Unbiased Set", "track": "main", "status": "Withdraw", "tldr": "We present an efficiently meta-learning approach, which eliminates the dependence on additional unbiased data and reduces the optimization complexity of recent meta-learning based method", "abstract": "Learning with noisy labels is a practically challenging problem in robust deep learning. Recent efforts to improve the robustness are made by meta-learning the sample weights or transition matrix from a prior unbiased set. Thus, previous meta-learning based approaches generally assume the existence of such prior unbiased set. Unfortunately, this assumption unrealistically simplifies the task of learning noisy labels in real-world scenarios; even worse the updating iterations in previous meta-learning algorithms typically demand prohibitive computational cost. This paper proposes an efficient meta-learning approach for robust deep learning to address these challenges. Specifically, without relying on prior unbiased validation set, our method dynamically estimates unbiased samples in training data and leverages meta-learning to refine the deep networks. Furthermore, to significantly reduce the updating iterations in optimization cost, we elaborately design the inner loop adaption and outer loop optimization of the meta-learning paradigm, respectively. Experimental results demonstrate that our approach is able to save about 6 times training time while achieving comparable or even better generalization performance. In particular, we improve accuracy on the CIFAR100 benchmark at 40% instance-dependent noise by more than 13% in absolute accuracy.", "keywords": "Robust deep learning;Noisy Label;Meta-learning;KD", "primary_area": "", "supplementary_material": "/attachment/36108a000809a08c746de9261b2433f1051d5a56.zip", "author": "Xing XiMing;Tiantian Zhang;Haiyang Zhang;Qian Yu;Yanwei Fu", "authorids": "~Xing_XiMing1;~Tiantian_Zhang1;~Haiyang_Zhang2;~Qian_Yu4;~Yanwei_Fu2", "gender": "M;F;;F;M", "homepage": "https://ximinng.github.io/;https://github.com/July-zh;http://www.bupt.edu.cn;https://yuqian1023.github.io/;http://yanweifu.github.io", "dblp": "350/0927;;;;63/9065", "google_scholar": "https://scholar.google.com.tw/citations?user=tFpaF7AAAAAJ;;;mmm90qgAAAAJ;https://scholar.google.co.uk/citations?user=Vg54TcsAAAAJ", "orcid": ";;0000-0002-5040-2468;0000-0002-0538-7940;0000-0002-6595-6893", "linkedin": ";;;;", "or_profile": "~Xing_XiMing1;~Tiantian_Zhang1;~Haiyang_Zhang2;~Qian_Yu4;~Yanwei_Fu2", "aff": "Beihang University;Beijing University of Post and Telecommunication;Beijing University of Posts and Telecommunications;Beihang University;Fudan University,", "aff_domain": "buaa.edu.cn;bupt.edu.cn;bupt.edu.cn;buaa.edu.cn;fudan.edu.cn", "position": "PhD student;MS student;Associate Professor;Associate Professor;Professor", "bibtex": "@misc{\nximing2023efficiently,\ntitle={Efficiently Meta-Learning for Robust Deep Networks without Prior Unbiased Set},\nauthor={Xing XiMing and Tiantian Zhang and Haiyang Zhang and Qian Yu and Yanwei Fu},\nyear={2023},\nurl={https://openreview.net/forum?id=-zw8zmeIt7M}\n}", "github": "", "project": "", "reviewers": "tZqU;Uy7S;LU8E;ZZYi", "site": "https://openreview.net/forum?id=-zw8zmeIt7M", "pdf_size": 7382691, "recommendation": "3;5;5;8", "confidence": "3;3;3;5", "correctness": "2;3;3;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "51;28;48;61", "wc_strength_and_weaknesses": "101;295;290;272", "wc_clarity_quality_novelty_and_reproducibility": "78;27;5;23", "wc_summary_review": "29;38;6;34", "wc_review": "259;388;349;390", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "131;406;462;167", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 47.0, 11.979148550710939 ], "wc_strength_and_weaknesses_avg": [ 239.5, 80.41921412199947 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.25, 27.13277538328875 ], "wc_summary_review_avg": [ 26.75, 12.397076268217438 ], "wc_review_avg": [ 346.5, 53.09661006128357 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 291.5, 144.43077926813245 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.8892972917998875, "corr_recommendation_correctness": 0.9901475429766743, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:c4mDmNjZQNkJ:scholar.google.com/&scioq=Efficiently+Meta-Learning+for+Robust+Deep+Networks+without+Prior+Unbiased+Set&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;1;0;2", "aff_unique_norm": "Beihang University;Beijing University of Posts and Telecommunications;Fudan University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.buaa.edu.cn/;http://www.bupt.edu.cn/;https://www.fudan.edu.cn", "aff_unique_abbr": "BUAA;BUPT;Fudan", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "00kPgkoahtO", "title": "IAE: Implicit Autoencoder for Point Cloud Self-supervised Representation Learning", "track": "main", "status": "Withdraw", "tldr": "We propose a simple yet effective non-symmetric autoencoder for point cloud self-supervised learning which leverages implicit function.", "abstract": "Autoencoding has been a popular topic across many fields and recently emerged in the 3D domain. However, many 3D representations (e.g., point clouds) are discrete samples of the underlying continuous 3D surface which makes them different from other data modalities. This process inevitably introduces sampling variations on the underlying 3D shapes. In learning 3D representation, a desirable goal is to disregard such sampling variations while focusing on capturing transferable knowledge of the underlying 3D shape. This aim poses a grand challenge to existing representation learning paradigms. For example, the standard autoencoding paradigm forces the encoder to capture such sampling variations as the decoder has to reconstruct the original point cloud. In this paper, we introduce the Implicit Autoencoder (IAE). This simple yet effective method addresses this challenge by replacing the point cloud decoder with an implicit decoder. The implicit decoder can output a continuous representation that is shared among different point cloud samplings of the same model. Reconstructing under the implicit representation can prioritize that the encoder discards sampling variations, introducing appropriate inductive bias to learn more generalizable feature representations. We validate this claim experimentally and show a theoretical analysis under a simple linear autoencoder. Moreover, our implicit decoder offers excellent flexibility in designing suitable implicit representations for different tasks. We demonstrate the usefulness of IAE across various self-supervised learning tasks for both 3D objects and 3D scenes. Experimental results show that IAE consistently outperforms the state-of-the-art in each task. ", "keywords": "point cloud;self-supervised learning;representation learning;autoencoder;implicit function", "primary_area": "", "supplementary_material": "/attachment/97fa30e4c54e7673162a4469c0da40d536d01c8f.zip", "author": "Siming Yan;Zhenpei Yang;Haoxiang Li;Li Guan;Hao Kang;Gang Hua;Qixing Huang", "authorids": "~Siming_Yan1;~Zhenpei_Yang1;~Haoxiang_Li1;li.guan@gmail.com;~Hao_Kang2;~Gang_Hua3;~Qixing_Huang1", "gender": "M;M;M;;M;M;M", "homepage": ";https://www.cs.utexas.edu/~yzp12/;https://resume.haoxiang.org;;https://www.linkedin.com/in/haokang2017;http://www.ganghua.org;https://www.cs.utexas.edu/~huangqx/", "dblp": "156/8709;218/7413;;;;75/5209.html;82/241", "google_scholar": "znWC2vAAAAAJ;7AiInscAAAAJ;Fu6aoXAAAAAJ;;VeTCSyEAAAAJ;7SgUlggAAAAJ;https://scholar.google.com.tw/citations?user=pamL_rIAAAAJ", "orcid": ";0000-0003-2717-5639;;;;0000-0001-9522-6157;", "linkedin": ";;haoxiangli/;;;ganghua/;", "or_profile": "~Siming_Yan1;~Zhenpei_Yang1;~Haoxiang_Li1;li.guan@gmail.com;~Hao_Kang2;~Gang_Hua3;~Qixing_Huang1", "aff": "The University of Texas at Austin;Waymo LLC;Wormpex AI Research;;Wormpex AI Research;Wormpex AI Research;University of Texas at Austin", "aff_domain": "cs.utexas.edu;waymo.com;wormpexai.com;;wormpex.com;bianlifeng.com;utexas.edu", "position": "PhD student;Researcher;Principal Researcher;;Researcher;Chief Scientist and Managing Director;Associate Professor", "bibtex": "@misc{\nyan2023iae,\ntitle={{IAE}: Implicit Autoencoder for Point Cloud Self-supervised Representation Learning},\nauthor={Siming Yan and Zhenpei Yang and Haoxiang Li and Li Guan and Hao Kang and Gang Hua and Qixing Huang},\nyear={2023},\nurl={https://openreview.net/forum?id=00kPgkoahtO}\n}", "github": "", "project": "", "reviewers": "zrve;cZLp;4bPT;tdX9", "site": "https://openreview.net/forum?id=00kPgkoahtO", "pdf_size": 11453049, "recommendation": "3;3;5;6", "confidence": "4;4;4;5", "correctness": "3;2;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;0;3", "wc_summary_paper": "93;42;59;79", "wc_strength_and_weaknesses": "384;28;352;266", "wc_clarity_quality_novelty_and_reproducibility": "31;42;22;15", "wc_summary_review": "26;233;46;27", "wc_review": "534;345;479;387", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 68.25, 19.382659776202026 ], "wc_strength_and_weaknesses_avg": [ 257.5, 139.3511750937178 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.5, 10.111874208078342 ], "wc_summary_review_avg": [ 83.0, 86.96838506031948 ], "wc_review_avg": [ 436.25, 74.38875923148605 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.7777777777777777, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6770733038858377473&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;2;2;0", "aff_unique_norm": "University of Texas at Austin;Waymo;Wormpex AI Research", "aff_unique_dep": ";;AI Research", "aff_unique_url": "https://www.utexas.edu;https://www.waymo.com;", "aff_unique_abbr": "UT Austin;Waymo;Wormpex AI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Exploring Low-Rank Property in Multiple Instance Learning for Whole Slide Image Classification", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12090", "id": "01KmhBsEPFO", "poster": "/media/PosterPDFs/ICLR%202023/12090.png?t=1681055960.1862047", "openreview": "https://openreview.net/forum?id=01KmhBsEPFO", "slides": "https://iclr.cc/virtual/2023/poster/12090", "video": "https://iclr.cc/virtual/2023/poster/12090", "author_site": "Jinxi Xiang, Jun Zhang", "tldr": "draft", "abstract": "The classification of gigapixel-sized whole slide images (WSIs) with slide-level labels can be formulated as a multiple-instance-learning (MIL) problem. State-of-the-art models often consist of two decoupled parts: local feature embedding with a pre-trained model followed by a global feature aggregation network for classification. We leverage the properties of the apparent similarity in high-resolution WSIs, which essentially exhibit \\textit{low-rank} structures in the data manifold, to develop a novel MIL with a boost in both feature embedding and feature aggregation. We extend the contrastive learning with a pathology-specific Low-Rank Constraint (LRC) for feature embedding to pull together samples (i.e., patches) belonging to the same pathological tissue in the low-rank subspace and simultaneously push apart those from different latent subspaces. At the feature aggregation stage, we introduce an iterative low-rank attention MIL (ILRA-MIL) model to aggregate features with low-rank learnable latent vectors to model global interactions among all instances. We highlight the importance of instance correlation modeling but refrain from directly using the transformer encoder considering the $O(n^2)$ complexity. ILRA-MIL with LRC pre-trained features achieves strong empirical results across various benchmarks, including (i) 96.49\\% AUC on the CAMELYON16 for binary metastasis classification, (ii) 97.63\\% AUC on the TCGA-NSCLC for lung cancer subtyping, and (iii) 0.6562 kappa on the large-scale PANDA dataset for prostate cancer classification. The code is available at https://github.com/jinxixiang/low_rank_wsi.", "keywords": "computational pathology;multiple instance learning;low-rank constraint;self-attention", "primary_area": "", "supplementary_material": "", "author": "Jinxi Xiang;Jun Zhang", "authorids": "~Jinxi_Xiang1;~Jun_Zhang17", "gender": "M;M", "homepage": "https://jinxixiang.netlify.app/;https://junzhang.org", "dblp": "227/4249;29/4190-18.html", "google_scholar": "Zn-0LioAAAAJ;", "orcid": ";0000-0001-5579-7094", "linkedin": ";", "or_profile": "~Jinxi_Xiang1;~Jun_Zhang17", "aff": "Tencent AI Lab;Tencent AI Lab", "aff_domain": "tencent.com;tencent.com", "position": "Researcher;Principal Researcher", "bibtex": "@inproceedings{\nxiang2023exploring,\ntitle={Exploring Low-Rank Property in Multiple Instance Learning for Whole Slide Image Classification},\nauthor={Jinxi Xiang and Jun Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=01KmhBsEPFO}\n}", "github": "", "project": "", "reviewers": "NvEE;oayv;LxYk;j84C", "pdf_size": 20098431, "recommendation": "5;5;6;8", "confidence": "3;3;4;4", "correctness": "3;4;2;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;1;0;3", "wc_summary_paper": "93;136;63;111", "wc_strength_and_weaknesses": "66;41;175;120", "wc_clarity_quality_novelty_and_reproducibility": "179;14;45;95", "wc_summary_review": "27;45;11;13", "wc_review": "365;236;294;339", "wc_reply_reviewers": "0;0;0;24", "wc_reply_authors": "662;1114;663;419", "reply_reviewers": "0;0;0;1", "reply_authors": "3;3;2;2", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 1.75, 1.299038105676658 ], "wc_summary_paper_avg": [ 100.75, 26.61179249881526 ], "wc_strength_and_weaknesses_avg": [ 100.5, 51.62605931116571 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.25, 62.37938361349846 ], "wc_summary_review_avg": [ 24.0, 13.601470508735444 ], "wc_review_avg": [ 308.5, 48.96171974103851 ], "wc_reply_reviewers_avg": [ 6.0, 10.392304845413264 ], "wc_reply_authors_avg": [ 714.5, 251.16180043947764 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.8164965809277259, "corr_recommendation_correctness": 0.24618298195866545, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18242350847823287160&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=01KmhBsEPFO", "email": "tencent.com;tencent.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Tencent", "aff_unique_dep": "Tencent AI Lab", "aff_unique_url": "https://ai.tencent.com", "aff_unique_abbr": "Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "01LMSeReNvY", "title": "PromptBoosting: Black-Box Text Classification with Ten Forward Passes", "track": "main", "status": "Reject", "tldr": "", "abstract": "We describe PromptBoosting, a query-efficient procedure for building a text classifier from a neural language model (LM) without access to the LM\u2019s parameters, gradients, or hidden representations. This form of \u201cblack-box\u201d classifier training has become increasingly important as the cost of training and inference in large-scale LMs grows. But existing black-box LM classifier learning approaches are themselves computationally inefficient, typically specializing LMs to the target task by searching in a large space of (discrete or continuous) prompts using zeroth-order optimization methods. Instead of directly optimizing in prompt space, PromptBoosting obtains a small pool of prompts via a gradient-free approach and then constructs a large pool of weak learners by pairing these prompts with different elements of the LM\u2019s output distribution. These weak learners are then ensembled using the AdaBoost algorithm. The entire learning process requires only a small number of forward passes and no backward pass. Experiments show that PromptBoosting achieves state-of-the-art performance in multiple black-box few-shot classification tasks, and matches or outperforms full fine-tuning in both few-shot and standard learning paradigms, while training 10x faster than existing black-box methods.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/d854f8f80397be432a670505bb0041324f16b8fc.zip", "author": "Bairu Hou;Joe O'Connor;Jacob Andreas;Shiyu Chang;Yang Zhang", "authorids": "~Bairu_Hou2;~Joe_O'Connor1;~Jacob_Andreas1;~Shiyu_Chang2;~Yang_Zhang3", "gender": ";M;M;Unspecified;M", "homepage": "https://hbr690188270.github.io/;https://joeoc21.github.io;http://web.mit.edu/jda/www;http://people.csail.mit.edu/chang87/;", "dblp": "274/7151;;97/8154;28/9988;06/6785-1", "google_scholar": "FO7taJgAAAAJ;;dnZ8udEAAAAJ;r21asW4AAAAJ;_-5PSgQAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Bairu_Hou2;~Joe_O'Connor1;~Jacob_Andreas1;~Shiyu_Chang2;~Yang_Zhang3", "aff": "University of California, Santa Barbara;;Microsoft;University of California, Santa Barbara;International Business Machines", "aff_domain": "ucsb.edu;;microsoft.com;ucsb.edu;ibm.com", "position": "PhD student;;Researcher;Assistant Professor;Research Staff Employee", "bibtex": "@misc{\nhou2023promptboosting,\ntitle={PromptBoosting: Black-Box Text Classification with Ten Forward Passes},\nauthor={Bairu Hou and Joe O'Connor and Jacob Andreas and Shiyu Chang and Yang Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=01LMSeReNvY}\n}", "github": "", "project": "", "reviewers": "cHte;QZLQ;jDoU;QXXp", "site": "https://openreview.net/forum?id=01LMSeReNvY", "pdf_size": 700916, "recommendation": "6;6;6;6", "confidence": "2;3;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;3;3;2", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "37;136;77;91", "wc_strength_and_weaknesses": "81;99;274;191", "wc_clarity_quality_novelty_and_reproducibility": "210;14;29;40", "wc_summary_review": "71;41;54;37", "wc_review": "399;290;434;359", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 85.25, 35.3721288587498 ], "wc_strength_and_weaknesses_avg": [ 161.25, 77.31873964311627 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.25, 79.49017234853626 ], "wc_summary_review_avg": [ 50.75, 13.273563952458284 ], "wc_review_avg": [ 370.5, 53.518688324733816 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11992129337656043139&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of California, Santa Barbara;Microsoft;International Business Machines Corporation", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.ucsb.edu;https://www.microsoft.com;https://www.ibm.com", "aff_unique_abbr": "UCSB;Microsoft;IBM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Santa Barbara;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "020JErJMvVZ", "title": "Learning Continuous Grasping Function with a Dexterous Hand from Human Demonstrations", "track": "main", "status": "Withdraw", "tldr": "We propose Continuous Grasping Function (CGF) to generate grasping motion for manipulation with a dexterous hand.", "abstract": "We propose to learn to generate grasping motion for manipulation with a dexterous hand using implicit functions. With continuous time inputs, the model can generate a continuous and smooth grasping plan. We name the proposed model Continuous Grasping Function (CGF). CGF is learned via generative modeling with a Conditional Variational Autoencoder using 3D human demonstrations. We will first convert the large-scale human-object interaction trajectories to robot demonstrations via motion retargeting, and then use these demonstrations to train CGF. During inference, we perform sampling with CGF to generate different grasping plans in the simulator and select the successful ones to transfer to the real robot. By training on diverse human data, our CGF allows generalization to manipulate multiple objects. Compared to previous planning algorithms, CGF is more efficient and achieves significant improvement on success rate when transferred to grasping with the real Allegro Hand. Our anonymous project page is available at https://continuous-grasping.github.io/.", "keywords": "Dexterous Grasping;Implicit Function;Generative Model;Sim2Real", "primary_area": "", "supplementary_material": "/attachment/1c1e8de031ff48379d07ef64d16675973f365fca.zip", "author": "Jianglong Ye;Jiashun Wang;Binghao Huang;Yuzhe Qin;Xiaolong Wang", "authorids": "~Jianglong_Ye1;~Jiashun_Wang1;~Binghao_Huang1;~Yuzhe_Qin1;~Xiaolong_Wang3", "gender": "M;M;;M;M", "homepage": "https://jianglongye.com;https://jiashunwang.github.io/;https://binghao-huang.github.io/;https://yzqin.github.io/;https://xiaolonw.github.io/", "dblp": "307/5025;260/6495;;241/9337;91/952-4", "google_scholar": "nkEGpKsAAAAJ;gdO9Gb0AAAAJ;nqoOetAAAAAJ;3KF3AIMAAAAJ;Y8O9N_0AAAAJ", "orcid": "0000-0003-1347-9199;;;0000-0002-9321-9305;", "linkedin": ";;;;", "or_profile": "~Jianglong_Ye1;~Jiashun_Wang1;~Binghao_Huang1;~Yuzhe_Qin1;~Xiaolong_Wang3", "aff": "University of California, San Diego;Boston Dynamics AI Institute;University of California, San Diego;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;theaiinstitute.com;ucsd.edu;ucsd.edu;ucsd.edu", "position": "PhD student;Intern;MS student;PhD student;Assistant Professor", "bibtex": "@misc{\nye2023learning,\ntitle={Learning Continuous Grasping Function with a Dexterous Hand from Human Demonstrations},\nauthor={Jianglong Ye and Jiashun Wang and Binghao Huang and Yuzhe Qin and Xiaolong Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=020JErJMvVZ}\n}", "github": "", "project": "", "reviewers": "yYBe;pp26;3SdR;YY2P", "site": "https://openreview.net/forum?id=020JErJMvVZ", "pdf_size": 14463567, "recommendation": "3;5;5;8", "confidence": "3;4;5;3", "correctness": "3;3;3;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "55;92;165;77", "wc_strength_and_weaknesses": "96;317;55;113", "wc_clarity_quality_novelty_and_reproducibility": "9;21;54;20", "wc_summary_review": "3;36;248;35", "wc_review": "163;466;522;245", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 97.25, 41.26969226926705 ], "wc_strength_and_weaknesses_avg": [ 145.25, 101.37646423110247 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.0, 16.837458240482736 ], "wc_summary_review_avg": [ 80.5, 97.61275531404695 ], "wc_review_avg": [ 349.0, 149.18947684069408 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.1266600992762247, "corr_recommendation_correctness": 0.8892972917998875, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7917046790892856772&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of California, San Diego;Boston Dynamics AI Institute", "aff_unique_dep": ";AI Institute", "aff_unique_url": "https://www.ucsd.edu;https://www.bostondynamics.com/", "aff_unique_abbr": "UCSD;BD AI", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "02Bt_4tx6r", "title": "Joint rotational invariance and adversarial training of a dual-stream Transformer yields state of the art Brain-Score for Area V4", "track": "main", "status": "Reject", "tldr": "We provide evidence that a specific Vision Transformer under a joint rotationally-invariant and adversarial optimization procedure can reach state of the art Brain-Score for Area V4", "abstract": "Modern high-scoring models of vision in the brain score competition do not stem from Vision Transformers. However, in this paper, we provide evidence against the unexpected trend of Vision Transformers (ViT) being not perceptually aligned with human visual representations by showing how a dual-stream Transformer, a CrossViT $~\\textit{a la}$ Chen et. al. (2021), under a joint rotationally-invariant and adversarial optimization procedure yields 2nd place in the aggregate Brain-Score 2022 competition (Schrimpf et al., 2020b) averaged across all visual categories, and at the time of the competition held 1st place for the highest explainable variance of area V4. In addition, our current Transformer-based model also achieves greater explainable variance for areas V4, IT, and Behaviour than a biologically-inspired CNN (ResNet50) that integrates a frontal V1-like computation module (Dapello et al., 2020). To assess the contribution of the optimization scheme with respect to the CrossViT architecture, we perform several additional experiments on differently optimized CrossViT's regarding adversarial robustness, common corruption benchmarks, mid-ventral stimuli interpretation, and feature inversion. Against our initial expectations, our family of results provides tentative support for an $\\textit{``All roads lead to Rome''}$ argument enforced via a joint optimization rule even for non biologically-motivated models of vision such as Vision Transformers.", "keywords": "Vision Transformer;Brain-Score competition;adversarial training;rotation invariance.", "primary_area": "", "supplementary_material": "/attachment/fe8c37ca3f8945fa8dbfe5002302a43bff983b5a.zip", "author": "William Berrios;Arturo Deza", "authorids": "~William_Berrios1;~Arturo_Deza1", "gender": "M;M", "homepage": ";http://arturodeza.wikidot.com/", "dblp": "315/9168;160/8606", "google_scholar": "EQhSvuAAAAAJ;KZLsTmQAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~William_Berrios1;~Arturo_Deza1", "aff": "Massachusetts Institute of Technology;UTEC - Universidad de Ingenier\u00eda y Tecnolog\u00eda", "aff_domain": "mit.edu;utec.edu.pe", "position": "Researcher;Assistant Professor", "bibtex": "@misc{\nberrios2023joint,\ntitle={Joint rotational invariance and adversarial training of a dual-stream Transformer yields state of the art Brain-Score for Area V4},\nauthor={William Berrios and Arturo Deza},\nyear={2023},\nurl={https://openreview.net/forum?id=02Bt_4tx6r}\n}", "github": "", "project": "", "reviewers": "84qC;ywsh;kkk2;2i2E", "site": "https://openreview.net/forum?id=02Bt_4tx6r", "pdf_size": 20321413, "recommendation": "3;5;6;8", "confidence": "4;5;5;4", "correctness": "3;4;4;3", "technical_novelty": "2;2;2;1", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "97;104;121;111", "wc_strength_and_weaknesses": "397;88;429;66", "wc_clarity_quality_novelty_and_reproducibility": "65;61;102;235", "wc_summary_review": "68;280;365;35", "wc_review": "627;533;1017;447", "wc_reply_reviewers": "0;229;0;0", "wc_reply_authors": "1583;1236;616;540", "reply_reviewers": "0;1;0;0", "reply_authors": "3;2;1;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 108.25, 8.870597499605086 ], "wc_strength_and_weaknesses_avg": [ 245.0, 168.56007831037573 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 115.75, 70.68017756061455 ], "wc_summary_review_avg": [ 187.0, 139.28208786487946 ], "wc_review_avg": [ 656.0, 217.92888748396803 ], "wc_reply_reviewers_avg": [ 57.25, 99.15990873331822 ], "wc_reply_authors_avg": [ 993.75, 434.3054080943501 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5977331304258901736&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Universidad de Ingenier\u00eda y Tecnolog\u00eda", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.utec.edu.pe", "aff_unique_abbr": "MIT;UTEC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Peru" }, { "id": "03sXXjL1um3", "title": "Linear convergence for natural policy gradient with log-linear policy parametrization", "track": "main", "status": "Reject", "tldr": "", "abstract": "We analyze the convergence rate of the \\emph{unregularized} natural policy gradient algorithm with log-linear policy parametrizations in infinite-horizon discounted Markov decision processes. In the deterministic case, when the Q-value is known and can be approximated by a linear combination of a known feature function up to a bias error, we show that a geometrically-increasing step size yields a linear convergence rate towards an optimal policy. We then consider the sample-based case, when the best representation of the Q-value function among linear combinations of a known feature function is known up to an estimation error. In this setting, we show that the algorithm enjoys the same linear guarantees as in the deterministic case up to an error term that depends on the estimation error, the bias error, and the condition number of the feature covariance matrix. Our results build upon the general framework of policy mirror descent and extend previous findings for the softmax tabular parametrization to the log-linear policy class.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Carlo Alfano;Patrick Rebeschini", "authorids": "~Carlo_Alfano1;~Patrick_Rebeschini1", "gender": "M;M", "homepage": ";http://www.stats.ox.ac.uk/~rebeschi/", "dblp": "302/4418;164/7439", "google_scholar": ";", "orcid": ";0000-0001-7772-4160", "linkedin": "carlo-alfano-7a4378171/;patrick-rebeschini/", "or_profile": "~Carlo_Alfano1;~Patrick_Rebeschini1", "aff": "University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;oxford.ac.uk", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nalfano2023linear,\ntitle={Linear convergence for natural policy gradient with log-linear policy parametrization},\nauthor={Carlo Alfano and Patrick Rebeschini},\nyear={2023},\nurl={https://openreview.net/forum?id=03sXXjL1um3}\n}", "github": "", "project": "", "reviewers": "XCzT;VEtG;5t57;SUKm;vAhQ", "site": "https://openreview.net/forum?id=03sXXjL1um3", "pdf_size": 281535, "recommendation": "3;5;5;5;6", "confidence": "5;4;4;4;4", "correctness": "4;3;4;3;4", "technical_novelty": "2;2;2;3;2", "empirical_novelty": "0;0;0;0;2", "wc_summary_paper": "65;129;50;47;71", "wc_strength_and_weaknesses": "124;450;133;185;170", "wc_clarity_quality_novelty_and_reproducibility": "44;30;41;4;34", "wc_summary_review": "77;49;223;85;155", "wc_review": "310;658;447;321;430", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "69;238;33;204;120", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 4.8, 0.9797958971132712 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "correctness_avg": [ 3.6, 0.4898979485566356 ], "technical_novelty_avg": [ 2.2, 0.39999999999999997 ], "empirical_novelty_avg": [ 0.4, 0.8 ], "wc_summary_paper_avg": [ 72.4, 29.68905522242161 ], "wc_strength_and_weaknesses_avg": [ 212.4, 120.93072397037899 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.6, 14.192955999368138 ], "wc_summary_review_avg": [ 117.8, 63.12653958518557 ], "wc_review_avg": [ 433.2, 125.3018754847668 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 132.8, 77.8855570693309 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.9185586535436918, "corr_recommendation_correctness": -0.16666666666666663, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7940608220559942066&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "The hidden uniform cluster prior in self-supervised learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11302", "id": "04K3PMtMckp", "poster": "/media/PosterPDFs/ICLR%202023/11302.png?t=1682540230.4963794", "openreview": "https://openreview.net/forum?id=04K3PMtMckp", "slides": "https://iclr.cc/virtual/2023/poster/11302", "video": "https://iclr.cc/virtual/2023/poster/11302", "author_site": "Mahmoud Assran, Randall Balestriero, Quentin Duval, Florian Bordes, Ishan Misra, Piotr Bojanowski, Pascal Vincent, Michael Rabbat, Nicolas Ballas", "tldr": "Many common self-supervised learning frameworks provably impose a hidden uniform prior, which is detrimental when pretraining with real-world class-imbalanced data.", "abstract": "A successful paradigm in representation learning is to perform self-supervised pretraining using tasks based on mini-batch statistics; (e.g., SimCLR, VICReg, SwAV, MSN). We show that in the formulation of all these methods is an overlooked prior to learn features that enable uniform clustering of the data. While this prior has led to remarkably semantic representations when pretraining on class-balanced data, such as ImageNet, we demonstrate that it can hamper performance when pretraining on class-imbalanced data. By moving away from conventional uniformity priors and instead preferring power-law distributed feature clusters, we show that one can improve the quality of the learned representations on real-world class-imbalanced datasets. To demonstrate this, we develop an extension of the Masked Siamese Networks (MSN) method to support the use of arbitrary features priors.", "keywords": "self-supervised learning;unsupervised learning;representation learning;transfer learning", "primary_area": "", "supplementary_material": "", "author": "Mido Assran;Randall Balestriero;Quentin Duval;Florian Bordes;Ishan Misra;Piotr Bojanowski;Pascal Vincent;Michael Rabbat;Nicolas Ballas", "authorids": "~Mido_Assran1;~Randall_Balestriero1;~Quentin_Duval1;~Florian_Bordes1;~Ishan_Misra2;~Piotr_Bojanowski1;~Pascal_Vincent1;~Michael_Rabbat1;~Nicolas_Ballas1", "gender": "M;M;M;M;M;M;;M;M", "homepage": "https://randallbalestriero.github.io/;https://quentinduval.github.io/;;;http://www.iro.umontreal.ca/~vincentp;;;http://imisra.github.io/;http://www.midoassran.ca/", "dblp": "175/5364;33/7355;194/9862;142/2542;43/861;47/1744;120/9066;12/10954;216/2717", "google_scholar": "S1x_xqcAAAAJ;XTaVGqYAAAAJ;OADfWhUAAAAJ;https://scholar.google.fr/citations?user=lJ_oh2EAAAAJ;WBCKQMsAAAAJ;https://scholar.google.ch/citations?user=cMPKe9UAAAAJ;euUV4iUAAAAJ;WvufSLAAAAAJ;gcQTTvkAAAAJ", "orcid": ";;;;;;;;0000-0001-9159-8447", "linkedin": "randallbalestriero/;quentin-duval-53ba6576/;florianbordes;piotr-bojanowski-9a94402a;;;;ishan-misra-7a140215;", "or_profile": "~Randall_Balestriero1;~Quentin_Duval1;~Florian_Bordes1;~Piotr_Bojanowski1;~Pascal_Vincent1;~Michael_Rabbat1;~Nicolas_Ballas1;~Ishan_Misra1;~Mahmoud_Assran1", "aff": "Meta Facebook;Meta Facebook;University of Montreal;Meta;Facebook A.I. Research;Mila;Meta;Meta Facebook;McGill University", "aff_domain": "facebook.com;meta.com;umontreal.ca;meta.com;fb.com;mila.quebec;meta.com;fb.com;mcgill.ca", "position": "Postdoc;Researcher;PhD student;Researcher;Research Scientist;Associate Member;Researcher;Research Scientist;PhD student", "bibtex": "@inproceedings{\nassran2023the,\ntitle={The hidden uniform cluster prior in self-supervised learning},\nauthor={Mido Assran and Randall Balestriero and Quentin Duval and Florian Bordes and Ishan Misra and Piotr Bojanowski and Pascal Vincent and Michael Rabbat and Nicolas Ballas},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=04K3PMtMckp}\n}", "github": "", "project": "", "reviewers": "vagN;sHjw;1J6n;JLgU", "pdf_size": 3956336, "recommendation": "6;6;6;6", "confidence": "4;4;4;2", "correctness": "4;3;4;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "4;3;3;3", "wc_summary_paper": "92;194;67;76", "wc_strength_and_weaknesses": "100;208;114;165", "wc_clarity_quality_novelty_and_reproducibility": "20;25;36;19", "wc_summary_review": "68;131;23;38", "wc_review": "280;558;240;298", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "388;326;314;344", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 107.25, 50.87914602270757 ], "wc_strength_and_weaknesses_avg": [ 146.75, 42.84492385335747 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.0, 6.745368781616021 ], "wc_summary_review_avg": [ 65.0, 41.40652122552678 ], "wc_review_avg": [ 344.0, 125.32358118087753 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 343.0, 28.089143810376278 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5612170609686775497&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=04K3PMtMckp", "email": "facebook.com;meta.com;umontreal.ca;meta.com;fb.com;mila.quebec;meta.com;fb.com;mcgill.ca", "author_num": 9, "aff_unique_index": "0;0;1;0;0;2;0;0;3", "aff_unique_norm": "Meta;University of Montreal;Mila;McGill University", "aff_unique_dep": "Meta Platforms, Inc.;;Quebec Artificial Intelligence Institute;", "aff_unique_url": "https://meta.com;https://wwwumontreal.ca;https://mila.quebec;https://www.mcgill.ca", "aff_unique_abbr": "Meta;UM;Mila;McGill", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;1;0;0;1", "aff_country_unique": "United States;Canada" }, { "id": "04OL67rm6ok", "title": "QUIC-FL: : Quick Unbiased Compression for Federated Learning", "track": "main", "status": "Reject", "tldr": "A distributed mean estimation compression scheme with accuracy on-par with the state of the art while asymptotically improving the decoding time.", "abstract": "Distributed Mean Estimation (DME) is a fundamental building block in communication efficient federated learning. In DME, clients communicate their lossily compressed gradients to the parameter server, which estimates the average and updates the model. \nState of the art DME techniques apply either unbiased quantization methods, resulting in large estimation errors, or biased quantization methods, where unbiasing the result requires that the server decodes each gradient individually, which markedly slows the aggregation time.\nIn this paper, we propose QUIC-FL, a DME algorithm that achieves the best of all worlds. QUIC-FL is unbiased, offers fast aggregation time, and is competitive with the most accurate (slow aggregation) DME techniques. To achieve this, we formalize the problem in a novel way that allows us to use standard solvers to design near-optimal unbiased quantization schemes.", "keywords": "Distirubted;Mean Estimation;Federate Learning;Quantization;Unbiased;Communication Efficient;Bandwidth Reduction;Compression", "primary_area": "", "supplementary_material": "/attachment/a21e3433424978c7e5373e49f3f825f07e450e9a.zip", "author": "Ran Ben-Basat;Shay Vargaftik;Amit Portnoy;Gil Einziger;Yaniv Ben-Itzhak;Michael Mitzenmacher", "authorids": "~Ran_Ben-Basat1;~Shay_Vargaftik2;~Amit_Portnoy1;~Gil_Einziger1;~Yaniv_Ben-Itzhak1;~Michael_Mitzenmacher1", "gender": ";;;M;M;M", "homepage": "https://bbasat.com;;;;;", "dblp": "140/7690;;159/8806;139/7090;75/7855;74/838", "google_scholar": "6G61qDwAAAAJ;;https://scholar.google.co.il/citations?user=2umZKagAAAAJ;;https://scholar.google.co.il/citations?user=6YWAONwAAAAJ;e8aRmAsAAAAJ", "orcid": ";;0000-0001-6491-5814;;;", "linkedin": ";;amit-portnoy-75060766;;yaniv-ben-itzhak-5889307/;", "or_profile": "~Ran_Ben-Basat1;~Shay_Vargaftik2;~Amit_Portnoy1;~Gil_Einziger1;~Yaniv_Ben-Itzhak1;~Michael_Mitzenmacher1", "aff": "University College London;;Ben Gurion University of the Negev;;VMware;Harvard University", "aff_domain": "ucl.ac.uk;;bgu.ac.il;;vmware.com;harvard.edu", "position": "Assistant Professor;;PhD student;;Researcher;Full Professor", "bibtex": "@misc{\nben-basat2023quicfl,\ntitle={{QUIC}-{FL}: : Quick Unbiased Compression for Federated Learning},\nauthor={Ran Ben-Basat and Shay Vargaftik and Amit Portnoy and Gil Einziger and Yaniv Ben-Itzhak and Michael Mitzenmacher},\nyear={2023},\nurl={https://openreview.net/forum?id=04OL67rm6ok}\n}", "github": "", "project": "", "reviewers": "V1XF;GcFN;Whkw", "site": "https://openreview.net/forum?id=04OL67rm6ok", "pdf_size": 1201123, "recommendation": "3;3;3", "confidence": "4;3;2", "correctness": "2;2;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;3", "wc_summary_paper": "74;90;24", "wc_strength_and_weaknesses": "441;198;207", "wc_clarity_quality_novelty_and_reproducibility": "32;37;24", "wc_summary_review": "19;44;37", "wc_review": "566;369;292", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "708;474;303", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 62.666666666666664, 28.110891523077353 ], "wc_strength_and_weaknesses_avg": [ 282.0, 112.48999955551605 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.0, 5.354126134736337 ], "wc_summary_review_avg": [ 33.333333333333336, 10.530379332620875 ], "wc_review_avg": [ 409.0, 115.3805298421994 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 495.0, 166.00602398708307 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5146084313704041414&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University College London;Ben Gurion University of the Negev;VMware, Inc.;Harvard University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ucl.ac.uk;https://www.bgu.ac.il;https://www.vmware.com;https://www.harvard.edu", "aff_unique_abbr": "UCL;BGU;VMware;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "United Kingdom;Israel;United States" }, { "id": "05ff9BRSMzE", "title": "Gandalf : Data Augmentation is all you need for Extreme Classification", "track": "main", "status": "Reject", "tldr": "", "abstract": "Extreme Multi-label Text Classification (XMC) involves learning a classifier that can assign an input with a subset of most relevant labels from millions of label choices. Recent works in this domain have increasingly focused on the problem setting with short-text input data, and labels endowed with short textual descriptions called label features. Short-text XMC with label features has found numerous applications in areas such as prediction of related searches, title-based product recommendation, bid-phrase suggestion, amongst others. In this paper, we propose Gandalf, a graph induced data augmentation based on label features, such that the generated data-points can supplement the training distribution. By exploiting the characteristics of the short-text XMC problem, it leverages the label features to construct valid training instances, and uses the label graph for generating the corresponding soft-label targets, hence effectively capturing the label-label correlations. While most recent advances (such as SiameseXML and ECLARE) in XMC have been algorithmic, mainly aimed towards developing novel deep-learning architectures, our data-centric augmentation approach is orthogonal to these methodologies. We demonstrate the generality and effectiveness of Gandalf by showing up to 30% relative improvements for 5 state-of-the-art algorithms across 4 benchmark datasets consisting of up to 1.3 million labels. ", "keywords": "Extreme Classification;Data Augmentation;Search and Recommendation", "primary_area": "", "supplementary_material": "", "author": "Siddhant Kharbanda;Devaansh Gupta;Erik Schultheis;Atmadeep Banerjee;Vikas Verma;Rohit Babbar", "authorids": "~Siddhant_Kharbanda1;~Devaansh_Gupta1;~Erik_Schultheis1;~Atmadeep_Banerjee1;~Vikas_Verma1;~Rohit_Babbar1", "gender": "M;M;;M;M;", "homepage": ";https://devaansh100.github.io;https://www.aalto.fi/en/people/erik-schultheis;https://atom-101.github.io;;", "dblp": "302/0835;351/9786;268/7969;;57/6603;", "google_scholar": "4lVrfloAAAAJ;lSBqiz4AAAAJ;MGxmO7EAAAAJ;lMSaCAkAAAAJ;;", "orcid": "0009-0000-6847-5836;;0000-0003-1685-8397;;;", "linkedin": "siddhant-kharbanda-32782b18a/;devaanshgupta/;;;;", "or_profile": "~Siddhant_Kharbanda1;~Devaansh_Gupta1;~Erik_Schultheis1;~Atmadeep_Banerjee1;~Vikas_Verma1;~Rohit_Babbar1", "aff": "Aalto University;BITS Pilani, Birla Institute of Technology and Science;Aalto University;;Aalto University;", "aff_domain": "aalto.fi;pilani.bits-pilani.ac.in;aalto.fi;;aalto.fi;", "position": "Research Assistant;Undergrad student;PhD student;;Postdoc;", "bibtex": "@misc{\nkharbanda2023gandalf,\ntitle={Gandalf : Data Augmentation is all you need for Extreme Classification},\nauthor={Siddhant Kharbanda and Devaansh Gupta and Erik Schultheis and Atmadeep Banerjee and Vikas Verma and Rohit Babbar},\nyear={2023},\nurl={https://openreview.net/forum?id=05ff9BRSMzE}\n}", "github": "", "project": "", "reviewers": "k6DK;g2YZ;qcbk;xmDr", "site": "https://openreview.net/forum?id=05ff9BRSMzE", "pdf_size": 2602275, "recommendation": "3;3;3;6", "confidence": "5;4;3;4", "correctness": "2;3;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;0;2;3", "wc_summary_paper": "77;53;82;56", "wc_strength_and_weaknesses": "166;144;296;32", "wc_clarity_quality_novelty_and_reproducibility": "53;44;58;31", "wc_summary_review": "19;38;176;153", "wc_review": "315;279;612;272", "wc_reply_reviewers": "0;0;149;0", "wc_reply_authors": "674;550;347;468", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 67.0, 12.668859459319927 ], "wc_strength_and_weaknesses_avg": [ 159.5, 93.76966460428447 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.5, 10.259142264341596 ], "wc_summary_review_avg": [ 96.5, 68.8131528125256 ], "wc_review_avg": [ 369.5, 140.95477998280157 ], "wc_reply_reviewers_avg": [ 37.25, 64.51889258194068 ], "wc_reply_authors_avg": [ 509.75, 119.19390714294083 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ygPPK3DLNpAJ:scholar.google.com/&scioq=Gandalf+:+Data+Augmentation+is+all+you+need+for+Extreme+Classification&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Aalto University;Birla Institute of Technology and Science", "aff_unique_dep": ";", "aff_unique_url": "https://www.aalto.fi;https://www.bits-pilani.ac.in", "aff_unique_abbr": "Aalto;BITS Pilani", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pilani", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Finland;India" }, { "id": "05rBhFU3mLX", "title": "Accelerated Riemannian Optimization: Handling Constraints to Bound Geometric Penalties", "track": "main", "status": "Reject", "tldr": "We propose accelerated first-order methods for Riemannian optimization in Hadamard manifolds by using a proximal method that we design. We can work without undesirable assumptions previous accelerated works made", "abstract": " We propose a globally-accelerated, first-order method for the optimization of smooth and (strongly or not) geodesically-convex functions in Hadamard manifolds. Our algorithm enjoys the same convergence rates as Nesterov's accelerated gradient descent, up to a multiplicative geometric penalty and log factors. \n Crucially, we can enforce our method to stay within a compact set we define. Prior fully accelerated works resort to assuming that the iterates of their algorithms stay in some pre-specified compact set, except for two previous methods, whose applicability is limited to local optimization and to spaces of constant curvature, respectively. Achieving global and general Riemannian acceleration without iterates assumptively staying in the feasible set was asked as an open question in (Kim & Yang, 2022), which we solve for Hadamard manifolds.\n In our solution, we show that we can use a linearly convergent algorithm for constrained strongly g-convex smooth problems to implement a Riemannian inexact proximal point operator that we use as a subroutine, which is of independent interest.", "keywords": "Riemannian optimization;geodesic convexity;first-order accelerated methods", "primary_area": "", "supplementary_material": "/attachment/03bf5ad890d8f9925e8ff0e992ecca0aadb6e6e7.zip", "author": "David Mart\u00ednez-Rubio;Sebastian Pokutta", "authorids": "~David_Mart\u00ednez-Rubio2;~Sebastian_Pokutta1", "gender": "M;", "homepage": "http://www.pokutta.com;", "dblp": "75/7718;198/1019", "google_scholar": ";https://scholar.google.co.uk/citations?user=dMwpf-4AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Sebastian_Pokutta1;~David_Mart\u00ednez1", "aff": "TU Berlin;Zuse Institute Berlin", "aff_domain": "tu-berlin.de;zib.de", "position": "Full Professor;Postdoc", "bibtex": "@misc{\nmart{\\'\\i}nez-rubio2023accelerated,\ntitle={Accelerated Riemannian Optimization: Handling Constraints to Bound Geometric Penalties},\nauthor={David Mart{\\'\\i}nez-Rubio and Sebastian Pokutta},\nyear={2023},\nurl={https://openreview.net/forum?id=05rBhFU3mLX}\n}", "github": "", "project": "", "reviewers": "E5FH;S9aC;qdaa", "site": "https://openreview.net/forum?id=05rBhFU3mLX", "pdf_size": 561208, "recommendation": "3;5;6", "confidence": "4;1;3", "correctness": "3;3;4", "technical_novelty": "3;3;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "35;70;56", "wc_strength_and_weaknesses": "69;194;29", "wc_clarity_quality_novelty_and_reproducibility": "56;51;5", "wc_summary_review": "16;62;93", "wc_review": "176;377;183", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "559;502;380", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 2.6666666666666665, 1.247219128924647 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 53.666666666666664, 14.383632673594278 ], "wc_strength_and_weaknesses_avg": [ 97.33333333333333, 70.27722880769338 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.333333333333336, 22.954060400915758 ], "wc_summary_review_avg": [ 57.0, 31.63331577098213 ], "wc_review_avg": [ 245.33333333333334, 93.14624105256327 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 480.3333333333333, 74.66517855659951 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.49999999999999994, "corr_recommendation_correctness": 0.7559289460184545, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:L2AOlpr86sAJ:scholar.google.com/&scioq=Accelerated+Riemannian+Optimization:+Handling+Constraints+to+Bound+Geometric+Penalties&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Technische Universit\u00e4t Berlin;Zuse Institute Berlin", "aff_unique_dep": ";", "aff_unique_url": "https://www.tu-berlin.de;https://www.zib.de", "aff_unique_abbr": "TU Berlin;ZIB", "aff_campus_unique_index": "0", "aff_campus_unique": "Berlin;", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Scaling Up Probabilistic Circuits by Latent Variable Distillation", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10744", "id": "067CGykiZTS", "poster": "", "openreview": "https://openreview.net/forum?id=067CGykiZTS", "slides": "https://iclr.cc/virtual/2023/poster/10744", "video": "https://iclr.cc/virtual/2023/poster/10744", "author_site": "Anji Liu, Honghua Zhang, Guy Van den Broeck", "tldr": "", "abstract": "Probabilistic Circuits (PCs) are a unified framework for tractable probabilistic models that support efficient computation of various probabilistic queries (e.g., marginal probabilities). One key challenge is to scale PCs to model large and high-dimensional real-world datasets: we observe that as the number of parameters in PCs increases, their performance immediately plateaus. This phenomenon suggests that the existing optimizers fail to exploit the full expressive power of large PCs. We propose to overcome such bottleneck by latent variable distillation: we leverage the less tractable but more expressive deep generative models to provide extra supervision over the latent variables of PCs. Specifically, we extract information from Transformer-based generative models to assign values to latent variables of PCs, providing guidance to PC optimizers. Experiments on both image and language modeling benchmarks (e.g., ImageNet and WikiText-2) show that latent variable distillation substantially boosts the performance of large PCs compared to their counterparts without latent variable distillation. In particular, on the image modeling benchmarks, PCs achieve competitive performance against some of the widely-used deep generative models, including variational autoencoders and flow-based models, opening up new avenues for tractable generative modeling. Our code can be found at https://github.com/UCLA-StarAI/LVD.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Anji Liu;Honghua Zhang;Guy Van den Broeck", "authorids": "~Anji_Liu1;~Honghua_Zhang1;~Guy_Van_den_Broeck1", "gender": "M;M;M", "homepage": "https://liuanji.github.io/;http://web.cs.ucla.edu/~hzhang19/;http://web.cs.ucla.edu/~guyvdb/", "dblp": "227/8622;65/6130;96/7521.html", "google_scholar": "k_4zYecAAAAJ;2qxBYJUAAAAJ;d0KQ9z0AAAAJ", "orcid": ";;0000-0003-3434-2503", "linkedin": "anji-liu-7610b7190/;;guyvdb", "or_profile": "~Anji_Liu1;~Honghua_Zhang1;~Guy_Van_den_Broek1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;cs.ucla.edu;ucla.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nliu2023scaling,\ntitle={Scaling Up Probabilistic Circuits by Latent Variable Distillation},\nauthor={Anji Liu and Honghua Zhang and Guy Van den Broeck},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=067CGykiZTS}\n}", "github": "", "project": "", "reviewers": "7gAx;tcWu;o5Cj", "pdf_size": 2034282, "recommendation": "8;8;8", "confidence": "4;5;4", "correctness": "4;4;3", "technical_novelty": "4;3;4", "empirical_novelty": "4;3;4", "wc_summary_paper": "172;93;214", "wc_strength_and_weaknesses": "396;63;57", "wc_clarity_quality_novelty_and_reproducibility": "103;34;25", "wc_summary_review": "48;94;255", "wc_review": "719;284;551", "wc_reply_reviewers": "69;0;0", "wc_reply_authors": "1363;340;723", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 159.66666666666666, 50.16195991209098 ], "wc_strength_and_weaknesses_avg": [ 172.0, 158.41085821369697 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.0, 34.84250278036869 ], "wc_summary_review_avg": [ 132.33333333333334, 88.74808292139173 ], "wc_review_avg": [ 518.0, 179.11448852619378 ], "wc_reply_reviewers_avg": [ 23.0, 32.526911934581186 ], "wc_reply_authors_avg": [ 808.6666666666666, 422.0081621116929 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12625901458522188542&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=067CGykiZTS", "email": "ucla.edu;cs.ucla.edu;ucla.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "DiffMimic: Efficient Motion Mimicking with Differentiable Physics", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12134", "id": "06mk-epSwZ", "poster": "/media/PosterPDFs/ICLR%202023/12134.png?t=1681196883.8413472", "openreview": "https://openreview.net/forum?id=06mk-epSwZ", "slides": "https://iclr.cc/virtual/2023/poster/12134", "video": "https://iclr.cc/virtual/2023/poster/12134", "author_site": "Jiawei Ren, Cunjun Yu, Siwei Chen, Xiao Ma, Liang Pan, Ziwei Liu", "tldr": "Mimic agile skills for physics-based character with differentiable physics simulators.", "abstract": "Motion mimicking is a foundational task in physics-based character animation. However, most existing motion mimicking methods are built upon reinforcement learning (RL) and suffer from heavy reward engineering, high variance, and slow convergence with hard explorations. Specifically, they usually take tens of hours or even days of training to mimic a simple motion sequence, resulting in poor scalability. In this work, we leverage differentiable physics simulators (DPS) and propose an efficient motion mimicking method dubbed $\\textbf{DiffMimic}$. Our key insight is that DPS casts a complex policy learning task to a much simpler state matching problem. In particular, DPS learns a stable policy by analytical gradients with ground-truth physical priors hence leading to significantly faster and stabler convergence than RL-based methods. Moreover, to escape from local optima, we utilize an \\textit{Demonstration Replay} mechanism to enable stable gradient backpropagation in a long horizon. Extensive experiments on standard benchmarks show that DiffMimic has a better sample efficiency and time efficiency than existing methods (e.g., DeepMimic). Notably, DiffMimic allows a physically simulated character to learn back-flip after 10 minutes of training and be able to cycle it after 3 hours of training, while DeepMimic requires about a day of training to cycle back-flip. More importantly, we hope DiffMimic can benefit more differentiable animation systems with techniques like differentiable clothes simulation in future research. Our code is available at https://github.com/diffmimic/diffmimic. Qualitative results can be viewed at https://diffmimic-demo-main-g7h0i8.streamlitapp.com.", "keywords": "Physics-based Animation", "primary_area": "", "supplementary_material": "", "author": "Jiawei Ren;Cunjun Yu;Siwei Chen;Xiao Ma;Liang Pan;Ziwei Liu", "authorids": "~Jiawei_Ren1;~Cunjun_Yu1;~Siwei_Chen3;~Xiao_Ma2;~Liang_Pan2;~Ziwei_Liu1", "gender": "Unspecified;Unspecified;M;M;M;M", "homepage": "https://jiawei-ren.github.io/;;https://yusufma03.github.io/;https://scholar.google.com/citations?user=lSDISOcAAAAJ&hl=en;https://liuziwei7.github.io/;", "dblp": "122/3626-1;232/3014;35/573-6;90/343;05/6300-2;88/10339", "google_scholar": "https://scholar.google.com.sg/citations?user=YUKPVCoAAAAJ;4xwyGM8AAAAJ;hR4G6hoAAAAJ;lSDISOcAAAAJ;https://scholar.google.com.hk/citations?user=lc45xlcAAAAJ;", "orcid": "0000-0003-1950-5976;;;;;0000-0001-8384-8944", "linkedin": ";;;;;", "or_profile": "~Jiawei_Ren1;~Cunjun_Yu1;~Xiao_Ma2;~Liang_Pan2;~Ziwei_Liu1;~SIWEI_CHEN2", "aff": "Nanyang Technological University;National University of Singapore;SEA AI Lab;Nanyang Technological University;Nanyang Technological University;National University of Singapore", "aff_domain": "ntu.edu.sg;u.nus.edu;sea.com;ntu.eud.sg;ntu.edu.sg;nus.edu.sg", "position": "PhD student;PhD student;Research Scientist;Postdoc;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nren2023diffmimic,\ntitle={DiffMimic: Efficient Motion Mimicking with Differentiable Physics},\nauthor={Jiawei Ren and Cunjun Yu and Siwei Chen and Xiao Ma and Liang Pan and Ziwei Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=06mk-epSwZ}\n}", "github": "", "project": "", "reviewers": "8KfC;JNig;yUu6;a2aF;s4rT", "pdf_size": 3091332, "recommendation": "5;6;6;8;8", "confidence": "4;4;4;3;4", "correctness": "3;3;4;3;3", "technical_novelty": "2;2;1;3;2", "empirical_novelty": "2;3;2;3;3", "wc_summary_paper": "102;82;73;87;75", "wc_strength_and_weaknesses": "290;151;96;178;164", "wc_clarity_quality_novelty_and_reproducibility": "78;55;101;149;40", "wc_summary_review": "111;43;56;57;319", "wc_review": "581;331;326;471;598", "wc_reply_reviewers": "70;54;46;15;120", "wc_reply_authors": "1086;621;649;571;957", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;2;2;2;3", "recommendation_avg": [ 6.6, 1.2 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 2.0, 0.6324555320336759 ], "empirical_novelty_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_paper_avg": [ 83.8, 10.380751417888785 ], "wc_strength_and_weaknesses_avg": [ 175.8, 63.51188865086599 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 84.6, 38.27584094438684 ], "wc_summary_review_avg": [ 117.2, 103.57296944666597 ], "wc_review_avg": [ 461.4, 116.95571811587496 ], "wc_reply_reviewers_avg": [ 61.0, 34.50217384455652 ], "wc_reply_authors_avg": [ 776.8, 205.4442990204401 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5833333333333334, "corr_recommendation_correctness": -0.24999999999999994, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4712877937180788158&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=06mk-epSwZ", "email": "ntu.edu.sg;u.nus.edu;sea.com;ntu.eud.sg;ntu.edu.sg;nus.edu.sg", "author_num": 6, "aff_unique_index": "0;1;2;0;0;1", "aff_unique_norm": "Nanyang Technological University;National University of Singapore;Sea AI Lab", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.nus.edu.sg;", "aff_unique_abbr": "NTU;NUS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "Singapore;Unknown" }, { "id": "074e7Rojdj", "title": "Unsupervised Performance Predictor for Architecture Search", "track": "main", "status": "Reject", "tldr": "We propose a performance predictor which can utilize existing fully-trained architectures, thus reducing the high cost of annotating architectures in the background of NAS.", "abstract": "Performance predictors can directly predict the performance value of given neural architectures without training, thus broadly being studied to alleviate the prohibitive cost of Neural Architecture Search (NAS). However, existing performance predictors still require training a large number of architectures from scratch to get their performance labels as the training dataset, which is still computationally expensive. To solve this issue, we develop an unsupervised performance predictor called USPP, which can avoid costly dataset construction by using existing fully-trained architectures. Specifically, a progressive domain-invariant feature extraction method is proposed to assist in extracting domain-invariant features due to the great transferability challenge caused by the rich domain-specific features. Furthermore, a learnable representation (denoted as operation embedding) is designed to replace the fixed encoding of the operations to transfer more knowledge about operations to the target search space. In experiments, we train the predictor by the labeled architectures in NAS-Bench-101 and predict the architectures in the DARTS search space. Compared with other state-of-the-art NAS methods, the proposed USPP only costs $0.02$ GPU days but finds the architecture with $97.86\\%$ on CIFAR-10 and $96.50\\%$ top-1 accuracy on ImageNet.", "keywords": "Neural Architecture Search;AutoML;Performance Predictor", "primary_area": "", "supplementary_material": "/attachment/254a1d2a6ab3e9aaea10277de3960ac139cbf1d3.zip", "author": "Xiangning Xie;Yanan Sun;Yuqiao Liu", "authorids": "~Xiangning_Xie1;~Yanan_Sun4;~Yuqiao_Liu1", "gender": "F;;", "homepage": ";;https://scholar.google.com/citations?user=-_XP9K0AAAAJ&hl", "dblp": ";;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Xiangning_Xie1;~Yanan_Sun4;~Yuqiao_Liu1", "aff": "Sichuan University;;Sichuan University", "aff_domain": "scu.edu.cn;;scu.edu.cn", "position": "PhD student;;MS student", "bibtex": "@misc{\nxie2023unsupervised,\ntitle={Unsupervised Performance Predictor for Architecture Search},\nauthor={Xiangning Xie and Yanan Sun and Yuqiao Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=074e7Rojdj}\n}", "github": "", "project": "", "reviewers": "4ZdJ;YaG5;8UES", "site": "https://openreview.net/forum?id=074e7Rojdj", "pdf_size": 341650, "recommendation": "5;5;5", "confidence": "4;4;4", "correctness": "4;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "178;73;24", "wc_strength_and_weaknesses": "227;224;192", "wc_clarity_quality_novelty_and_reproducibility": "58;8;42", "wc_summary_review": "45;10;53", "wc_review": "508;315;311", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1019;899;927", "reply_reviewers": "0;0;0", "reply_authors": "2;2;2", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 91.66666666666667, 64.24086619036895 ], "wc_strength_and_weaknesses_avg": [ 214.33333333333334, 15.839472494022298 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.0, 20.848661028149188 ], "wc_summary_review_avg": [ 36.0, 18.672618098881223 ], "wc_review_avg": [ 378.0, 91.93838516455826 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 948.3333333333334, 51.259687431309565 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jEA5pRFGq_oJ:scholar.google.com/&scioq=Unsupervised+Performance+Predictor+for+Architecture+Search&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Sichuan University", "aff_unique_dep": "", "aff_unique_url": "https://www.scu.edu.cn", "aff_unique_abbr": "SCU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Delving into Semantic Scale Imbalance", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12237", "id": "07tc5kKRIo", "poster": "/media/PosterPDFs/ICLR%202023/12237.png?t=1680955125.2809381", "openreview": "https://openreview.net/forum?id=07tc5kKRIo", "slides": "https://iclr.cc/virtual/2023/poster/12237", "video": "https://iclr.cc/virtual/2023/poster/12237", "author_site": "Yanbiao Ma, Licheng Jiao, Fang Liu, Yuxin Li, Shuyuan Yang, Xu Liu", "tldr": "Our proposed semantic scale, like the number of samples, is a natural measure of class imbalance and does not depend on the model\u2019s predictions.", "abstract": "Model bias triggered by long-tailed data has been widely studied. However, measure based on the number of samples cannot explicate three phenomena simultaneously: (1) Given enough data, the classification performance gain is marginal with additional samples. (2) Classification performance decays precipitously as the number of training samples decreases when there is insufficient data. (3) Model trained on sample-balanced datasets still has different biases for different classes. In this work, we define and quantify the semantic scale of classes, which is equivalent to the feature diversity of classes. It is exciting to find experimentally that there is a marginal effect of semantic scale, which perfectly describes the first two phenomena. Further, the quantitative measurement of semantic scale imbalance is proposed, which can accurately reflect model bias on multiple datasets, even on sample-balanced data, revealing a novel perspective for the study of class imbalance. Due to the prevalence of semantic scale imbalance, we propose semantic-scale-balanced learning, including a general loss improvement scheme and a dynamic re-weighting training framework that overcomes the challenge of calculating semantic scales in real-time during iterations. Comprehensive experiments show that dynamic semantic-scale-balanced learning consistently enables the model to perform superiorly on large-scale long-tailed and non-long-tailed datasets, which is a good starting point for mitigating the prevalent but unnoticed model bias. ", "keywords": "Imbalanced Learning;Model bias;Long-tailed distribution", "primary_area": "", "supplementary_material": "/attachment/9e67658798e34fc6c40b97f7eca64eb50b37edb3.zip", "author": "Yanbiao Ma;Licheng Jiao;Fang Liu;Yuxin Li;Shuyuan Yang;Xu Liu", "authorids": "~Yanbiao_Ma1;~Licheng_Jiao4;~Fang_Liu5;~Yuxin_Li4;~Shuyuan_Yang1;~Xu_Liu5", "gender": "M;M;F;F;F;M", "homepage": "https://blog.csdn.net/weixin_38817895?spm=1001.2101.3001.5343;https://web.xidian.edu.cn/lchjiao/students.html;;https://blog.csdn.net/weixin_44316581?type=blog;https://web.xidian.edu.cn/syyang/;https://faculty.xidian.edu.cn/liuxu", "dblp": "260/7705;;67/5807-1;;81/2383.html;93/3167-6", "google_scholar": ";;https://scholar.google.com.sg/citations?user=qrQkfxYAAAAJ;;;_09bkMgAAAAJ", "orcid": ";;;;;0000-0002-8780-5455", "linkedin": ";;;;;", "or_profile": "~Yanbiao_Ma1;~Licheng_Jiao4;~Fang_Liu5;~Yuxin_Li4;~Shuyuan_Yang1;~Xu_Liu5", "aff": "Xidian University;Xi'an University of Electronic Science and Technology;Xidian University;Xidian University;Xidian University;Xidian University", "aff_domain": "xidian.edu;xidian.edu.cn;xidian.edu.cn;xidian.edu;xidian.edu;xidian.edu", "position": "PhD student;Full Professor;Full Professor;MS student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nma2023delving,\ntitle={Delving into Semantic Scale Imbalance},\nauthor={Yanbiao Ma and Licheng Jiao and Fang Liu and Yuxin Li and Shuyuan Yang and Xu Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=07tc5kKRIo}\n}", "github": "", "project": "", "reviewers": "bNtG;DeDZ;kLsB;MKbH", "pdf_size": 21915145, "recommendation": "6;6;6;8", "confidence": "4;4;4;5", "correctness": "3;3;3;3", "technical_novelty": "3;3;3;4", "empirical_novelty": "3;3;2;4", "wc_summary_paper": "60;53;106;104", "wc_strength_and_weaknesses": "393;469;357;656", "wc_clarity_quality_novelty_and_reproducibility": "37;43;27;21", "wc_summary_review": "89;37;31;12", "wc_review": "579;602;521;793", "wc_reply_reviewers": "269;0;0;42", "wc_reply_authors": "864;710;1148;543", "reply_reviewers": "2;0;0;1", "reply_authors": "3;1;2;2", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 80.75, 24.38621536852326 ], "wc_strength_and_weaknesses_avg": [ 468.75, 115.42178087345559 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.0, 8.54400374531753 ], "wc_summary_review_avg": [ 42.25, 28.525208149985513 ], "wc_review_avg": [ 623.75, 102.07687054372308 ], "wc_reply_reviewers_avg": [ 77.75, 111.74161042333334 ], "wc_reply_authors_avg": [ 816.25, 222.6503705364085 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11794289626281258550&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=07tc5kKRIo", "email": "xidian.edu;xidian.edu.cn;xidian.edu.cn;xidian.edu;xidian.edu;xidian.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Xidian University;Xi'an University of Electronic Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.xidian.edu.cn/;http://www.xidian.edu.cn/", "aff_unique_abbr": "Xidian;Xidian University", "aff_campus_unique_index": "1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Fantastic Rewards and How to Tame Them: A Case Study on Reward Learning for Task-oriented Dialogue Systems", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10877", "id": "086pmarAris", "poster": "", "openreview": "https://openreview.net/forum?id=086pmarAris", "slides": "https://iclr.cc/virtual/2023/poster/10877", "video": "https://iclr.cc/virtual/2023/poster/10877", "author_site": "Yihao Feng, Shentao Yang, Shujian Zhang, Jianguo Zhang, Caiming Xiong, Mingyuan Zhou, Huan Wang", "tldr": "we propose techniques for learning and utilizing reward functions that can be used for training task-oriented dialogue agents", "abstract": "When learning task-oriented dialogue (ToD) agents, reinforcement learning (RL) techniques can naturally be utilized to train dialogue strategies to achieve user-specific goals. Prior works mainly focus on adopting advanced RL techniques to train the ToD agents, while the design of the reward function is not well studied. This paper aims at answering the question of how to efficiently learn and leverage a reward function for training end-to-end (E2E) ToD agents. Specifically, we introduce two generalized objectives for reward-function learning, inspired by the classical learning-to-rank literature. Further, we utilize the learned reward function to guide the training of the E2E ToD agent. With the proposed techniques, we achieve competitive results on the E2E response-generation task on the Multiwoz 2.0 dataset. Source code and checkpoints are publicly released at https://github.com/Shentao-YANG/Fantastic_Reward_ICLR2023.", "keywords": "task-oriented dialogue;reinforcement learning;reward learning", "primary_area": "", "supplementary_material": "", "author": "Yihao Feng;Shentao Yang;Shujian Zhang;Jianguo Zhang;Caiming Xiong;Mingyuan Zhou;Huan Wang", "authorids": "~Yihao_Feng1;~Shentao_Yang1;~Shujian_Zhang1;~Jianguo_Zhang3;~Caiming_Xiong1;~Mingyuan_Zhou1;~Huan_Wang1", "gender": "M;M;;M;M;M;M", "homepage": ";;https://www.utexas.edu/;https://jianguoz.github.io/;http://cmxiong.com/;http://mingyuanzhou.github.io;http://www.cs.yale.edu/homes/wang-huan/", "dblp": "204/3696;;84/3190.html;;80/7282;;70/6155-16.html", "google_scholar": "uqnNle0AAAAJ;https://scholar.google.com/citations?hl=en;7RmLVQkAAAAJ;mAAVFEsAAAAJ;vaSdahkAAAAJ;LXwCIisAAAAJ;7NpTttkAAAAJ", "orcid": ";0009-0009-8058-3149;;;;;", "linkedin": ";shentaoyang/;;jianguo-zhang-3b267712a;caiming-xiong-150a1417;;huanwangyale/", "or_profile": "~Yihao_Feng1;~Shentao_Yang1;~Shujian_Zhang1;~Jianguo_Zhang3;~Caiming_Xiong1;~Mingyuan_Zhou1;~Huan_Wang1", "aff": "Salesforce AI Research;University of Texas at Austin;University of Texas, Austin;SalesForce AI Research;Salesforce Research;Google;Salesforce.com", "aff_domain": "salesforce.com;utexas.edu;utexas.edu;salesforce.com;salesforce.com;google.com;salesforce.com", "position": "Researcher;PhD student;PhD student;Researcher;Research Scientist;Researcher;Researcher", "bibtex": "@inproceedings{\nfeng2023fantastic,\ntitle={Fantastic Rewards and How to Tame Them: A Case Study on Reward Learning for Task-oriented Dialogue Systems},\nauthor={Yihao Feng and Shentao Yang and Shujian Zhang and Jianguo Zhang and Caiming Xiong and Mingyuan Zhou and Huan Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=086pmarAris}\n}", "github": "", "project": "", "reviewers": "NRwE;tRUe;7ZSu;kmgW", "pdf_size": 920006, "recommendation": "6;6;6;8", "confidence": "4;4;3;3", "correctness": "3;3;3;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "120;77;142;43", "wc_strength_and_weaknesses": "63;145;421;168", "wc_clarity_quality_novelty_and_reproducibility": "245;49;162;66", "wc_summary_review": "142;47;81;41", "wc_review": "570;318;806;318", "wc_reply_reviewers": "0;17;0;0", "wc_reply_authors": "1478;919;2090;469", "reply_reviewers": "0;1;0;0", "reply_authors": "4;3;4;2", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 95.5, 38.27858409084641 ], "wc_strength_and_weaknesses_avg": [ 199.25, 133.84389227753354 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 130.5, 78.90659034580065 ], "wc_summary_review_avg": [ 77.75, 40.10844674130376 ], "wc_review_avg": [ 503.0, 202.94580557380337 ], "wc_reply_reviewers_avg": [ 4.25, 7.361215932167728 ], "wc_reply_authors_avg": [ 1239.0, 607.5816817515156 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15269036734656288714&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=086pmarAris", "email": "salesforce.com;utexas.edu;utexas.edu;salesforce.com;salesforce.com;google.com;salesforce.com", "author_num": 7, "aff_unique_index": "0;1;1;0;0;2;0", "aff_unique_norm": "Salesforce;University of Texas at Austin;Google", "aff_unique_dep": "Salesforce AI Research;;Google", "aff_unique_url": "https://www.salesforce.com;https://www.utexas.edu;https://www.google.com", "aff_unique_abbr": "Salesforce AI;UT Austin;Google", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Austin;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "09I1M8YRJBR", "title": "Neural Diffusion Processes", "track": "main", "status": "Reject", "tldr": "Diffusion models for stochastic processes", "abstract": "Gaussian processes provide an elegant framework for specifying prior and posterior distributions over functions. They are, however, also computationally expensive, and limited by the expressivity of their covariance function. We propose Neural Diffusion Processes (NDPs), a novel approach based upon diffusion models, that learns to sample from distributions over functions. Using a novel attention block we are able to incorporate properties of stochastic processes, such as exchangeability, directly into the NDP's architecture. We empirically show that NDPs are able to capture functional distributions that are close to the true Bayesian posterior. This enables a variety of downstream tasks, including hyperparameter marginalisation, non-Gaussian posteriors and global optimisation.", "keywords": "diffusion models;gaussian processes;neural processes;stochastic processes", "primary_area": "", "supplementary_material": "/attachment/01e867ffb8d752e6dee18fe5760ed691f4d5e452.zip", "author": "Vincent Dutordoir;Alan Saul;Zoubin Ghahramani;Fergus Simpson", "authorids": "~Vincent_Dutordoir1;~Alan_Saul1;~Zoubin_Ghahramani1;~Fergus_Simpson1", "gender": "M;M;M;M", "homepage": ";;http://mlg.eng.cam.ac.uk/zoubin/;", "dblp": "212/5487;;g/ZoubinGhahramani;277/6392.html", "google_scholar": ";;https://scholar.google.co.uk/citations?user=0uTu7fYAAAAJ;https://scholar.google.co.uk/citations?user=c5nCaXEAAAAJ", "orcid": ";;;", "linkedin": ";https://linkedin.com/in/alan-saul;;fergus-simpson/", "or_profile": "~Vincent_Dutordoir1;~Alan_Saul1;~Zoubin_Ghahramani1;~Fergus_Simpson1", "aff": "University of Cambridge;Secondmind.ai;University of Cambridge;Secondmind Ltd", "aff_domain": "cam.ac.uk;secondmind.ai;cam.ac.uk;secondmind.ai", "position": "PhD student;Researcher;Full Professor;Senior Researcher", "bibtex": "@misc{\ndutordoir2023neural,\ntitle={Neural Diffusion Processes},\nauthor={Vincent Dutordoir and Alan Saul and Zoubin Ghahramani and Fergus Simpson},\nyear={2023},\nurl={https://openreview.net/forum?id=09I1M8YRJBR}\n}", "github": "", "project": "", "reviewers": "6nma;YxyB;sTEa;d5b3", "site": "https://openreview.net/forum?id=09I1M8YRJBR", "pdf_size": 1269804, "recommendation": "3;6;6;8", "confidence": "4;2;4;3", "correctness": "2;3;4;4", "technical_novelty": "2;3;4;4", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "22;87;53;86", "wc_strength_and_weaknesses": "396;111;144;292", "wc_clarity_quality_novelty_and_reproducibility": "18;168;104;45", "wc_summary_review": "22;82;25;70", "wc_review": "458;448;326;493", "wc_reply_reviewers": "680;0;0;0", "wc_reply_authors": "1129;757;305;375", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 62.0, 26.842131062939096 ], "wc_strength_and_weaknesses_avg": [ 235.75, 114.91817741332308 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.75, 57.73376395143487 ], "wc_summary_review_avg": [ 49.75, 26.61179249881526 ], "wc_review_avg": [ 431.25, 63.02132575565195 ], "wc_reply_reviewers_avg": [ 170.0, 294.44863728670913 ], "wc_reply_authors_avg": [ 641.5, 329.8677765408437 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.46442036401282394, "corr_recommendation_correctness": 0.8866206949335731, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7293234488544676773&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Cambridge;Secondmind", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.secondmind.ai", "aff_unique_abbr": "Cambridge;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Corrupted Image Modeling for Self-Supervised Visual Pre-Training", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11664", "id": "09hVcSDkea", "poster": "", "openreview": "https://openreview.net/forum?id=09hVcSDkea", "slides": "https://iclr.cc/virtual/2023/poster/11664", "video": "https://iclr.cc/virtual/2023/poster/11664", "author_site": "Yuxin Fang, Li Dong, Hangbo Bao, Xinggang Wang, Furu Wei", "tldr": "", "abstract": "We introduce Corrupted Image Modeling (CIM) for self-supervised visual pre-training. CIM uses an auxiliary generator with a small trainable BEiT to corrupt the input image instead of using artificial [MASK] tokens, where some patches are randomly selected and replaced with plausible alternatives sampled from the BEiT output distribution. Given this corrupted image, an enhancer network learns to either recover all the original image pixels, or predict whether each visual token is replaced by a generator sample or not. The generator and the enhancer are simultaneously trained and synergistically updated. After pre-training, the enhancer can be used as a high-capacity visual encoder for downstream tasks. CIM is a general and flexible visual pre-training framework that is suitable for various network architectures. For the first time, CIM demonstrates that both ViT and CNN can learn rich visual representations using a unified, non-Siamese framework. Experimental results show that our approach achieves compelling results in vision benchmarks, such as ImageNet classification and ADE20K semantic segmentation.", "keywords": "Self-supervised Learning;Representation Learning;Vision Transformer", "primary_area": "", "supplementary_material": "/attachment/223016db6b0eaaed98424b1bd4deb2ca6bce43dc.zip", "author": "Yuxin Fang;Li Dong;Hangbo Bao;Xinggang Wang;Furu Wei", "authorids": "~Yuxin_Fang1;~Li_Dong1;~Hangbo_Bao1;~Xinggang_Wang1;~Furu_Wei1", "gender": ";M;M;M;M", "homepage": ";http://dong.li;https://scholar.google.com/citations?user=lXCZGqYAAAAJ&hl=en;https://xwcv.github.io/index.htm;https://www.microsoft.com/en-us/research/people/fuwei/", "dblp": ";85/5090-4;199/2036;95/3056;72/5870", "google_scholar": ";wEfQgPgAAAAJ;lXCZGqYAAAAJ;qNCTLV0AAAAJ;G-V1VpwAAAAJ", "orcid": ";;;0000-0001-6732-7823;", "linkedin": ";;;;", "or_profile": "~Yuxin_Fang1;~Li_Dong1;~Hangbo_Bao1;~Xinggang_Wang1;~Furu_Wei1", "aff": ";Microsoft Research;Harbin Institute of Technology;Huazhong University of Science and Technology;Microsoft Research", "aff_domain": ";microsoft.com;hit.edu.cn;hust.edu.cn;microsoft.com", "position": ";Principal Researcher;PhD student;Full Professor;Distinguished Scientist", "bibtex": "@inproceedings{\nfang2023corrupted,\ntitle={Corrupted Image Modeling for Self-Supervised Visual Pre-Training},\nauthor={Yuxin Fang and Li Dong and Hangbo Bao and Xinggang Wang and Furu Wei},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=09hVcSDkea}\n}", "github": "", "project": "", "reviewers": "JSbL;E2vC;wVPU;UGiU;1bnH;hKbV", "pdf_size": 2145057, "recommendation": "5;5;6;6;8;8", "confidence": "4;4;4;3;4;3", "correctness": "4;3;3;3;4;3", "technical_novelty": "2;2;3;3;4;3", "empirical_novelty": "2;2;3;2;3;0", "wc_summary_paper": "47;95;61;62;100;92", "wc_strength_and_weaknesses": "286;145;243;123;225;216", "wc_clarity_quality_novelty_and_reproducibility": "24;51;61;24;11;117", "wc_summary_review": "56;17;27;36;42;34", "wc_review": "413;308;392;245;378;459", "wc_reply_reviewers": "202;39;31;12;137;31", "wc_reply_authors": "892;410;682;384;330;496", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "2;2;1;1;2;1", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.8333333333333335, 0.6871842709362768 ], "empirical_novelty_avg": [ 2.0, 1.0 ], "wc_summary_paper_avg": [ 76.16666666666667, 20.227181931472533 ], "wc_strength_and_weaknesses_avg": [ 206.33333333333334, 56.0347114643137 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.0, 35.26093211095437 ], "wc_summary_review_avg": [ 35.333333333333336, 12.106013198223252 ], "wc_review_avg": [ 365.8333333333333, 70.33590042708552 ], "wc_reply_reviewers_avg": [ 75.33333333333333, 69.6531087285812 ], "wc_reply_authors_avg": [ 532.3333333333334, 196.2087890204944 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.37796447300922736, "corr_recommendation_correctness": 0.09449111825230684, "gs_citation": 93, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9198665512103914196&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=09hVcSDkea", "email": ";microsoft.com;hit.edu.cn;hust.edu.cn;microsoft.com", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Microsoft;Harbin Institute of Technology;Huazhong University of Science and Technology", "aff_unique_dep": "Microsoft Research;;", "aff_unique_url": "https://www.microsoft.com/en-us/research;http://www.hit.edu.cn/;http://www.hust.edu.cn", "aff_unique_abbr": "MSR;HIT;HUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United States;China" }, { "id": "0CbYJNJtM-X", "title": "It Takes Two: Masked Appearance-Motion Modeling for Self-Supervised Video Transformer Pre-Training", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Self-supervised video transformer pre-training has recently benefited from the mask-and-predict pipeline. They have demonstrated outstanding effectiveness on downstream video tasks and superior data efficiency on small datasets. However, temporal relation is not fully exploited by these methods. In this work, we explicitly investigate motion cues in videos as extra prediction target and propose our Masked Appearance-Motion Modeling (MAM\u00b2) framework. Specifically, we design an encoder-regressor-decoder pipeline for this task. The regressor separates feature encoding and pretext tasks completion, such that the feature extraction process is completed adequately by the encoder. In order to guide the encoder to fully excavate spatial-temporal features, two separate decoders are used for two pretext tasks of disentangled appearance and motion prediction. We explore various motion prediction targets and figure out RGB-difference is simple yet effective. As for appearance prediction, VQGAN codes are leveraged as prediction target. With our pre-training pipeline, convergence can be remarkably speed up, e.g., we only require 2x fewer epochs than state-of-the-art VideoMAE (400 v.s. 800) to achieve the competitive performance. Extensive experimental results prove that our method learns generalized video representations. Notably, our MAM\u00b2 with ViT-B achieves 82.3% on Kinects-400, 71.3% on Something-Something V2, 91.5% on UCF101, and 62.5% on HMDB51. ", "keywords": "Video Understanding;Masked Visual Modeling;Self-supervised Learning", "primary_area": "", "supplementary_material": "", "author": "YuXin Song;Min Yang;Wenhao Wu;Dongliang He;Fu Li;Jingdong Wang", "authorids": "~YuXin_Song1;~Min_Yang7;~Wenhao_Wu2;~Dongliang_He1;~Fu_Li1;~Jingdong_Wang1", "gender": "M;M;M;M;M;M", "homepage": "https://github.com/byrsongyuxin;https://github.com/feymanpriv;https://whwu95.github.io/;;;https://jingdongwang2017.github.io/", "dblp": ";;;167/0539;;49/3441", "google_scholar": ";l_e7BDEAAAAJ;Kn5d1ckAAAAJ;ui6DYGoAAAAJ;;z5SPCmgAAAAJ", "orcid": ";;0000-0002-8511-743X;;;0000-0002-4888-4445", "linkedin": ";;wenhao-w-usyd/;;%E7%94%AB-%E6%9D%8E-8a0b8293/;", "or_profile": "~YuXin_Song1;~Min_Yang7;~Wenhao_Wu2;~Dongliang_He1;~Fu_Li1;~Jingdong_Wang1", "aff": "Baidu;Bytedance;Baidu;ByteDance Inc. ;;Baidu", "aff_domain": "baidu.com;bytedance.com;baidu.com;bytedance.com;;baidu.com", "position": "Researcher;Researcher;Senior R&D Engineer;ByteDance;;Chief Scientist for Computer Vision", "bibtex": "@misc{\nsong2023it,\ntitle={It Takes Two: Masked Appearance-Motion Modeling for Self-Supervised Video Transformer Pre-Training},\nauthor={YuXin Song and Min Yang and Wenhao Wu and Dongliang He and Fu Li and Jingdong Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=0CbYJNJtM-X}\n}", "github": "", "project": "", "reviewers": "fspH;Q3Ws;wB4f;NvVg", "site": "https://openreview.net/forum?id=0CbYJNJtM-X", "pdf_size": 761882, "recommendation": "3;5;5;5", "confidence": "5;4;5;4", "correctness": "3;3;2;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "44;71;78;82", "wc_strength_and_weaknesses": "167;208;364;214", "wc_clarity_quality_novelty_and_reproducibility": "13;24;45;44", "wc_summary_review": "20;38;74;43", "wc_review": "244;341;561;383", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 68.75, 14.821858857781638 ], "wc_strength_and_weaknesses_avg": [ 238.25, 74.821036480391 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.5, 13.573871960498227 ], "wc_summary_review_avg": [ 43.75, 19.447043477094404 ], "wc_review_avg": [ 382.25, 114.85507172084304 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8386270864847883550&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "Baidu;ByteDance", "aff_unique_dep": "Baidu, Inc.;", "aff_unique_url": "https://www.baidu.com;https://www.bytedance.com", "aff_unique_abbr": "Baidu;Bytedance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "0DIkhwclYX3", "title": "Efficient debiasing with contrastive weight pruning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Neural networks are often biased to spuriously correlated features that provide misleading statistical evidence that does not generalize. This raises a fundamental question: \"Does an optimal unbiased functional subnetwork exist in a severely biased network? If so, how to extract such subnetwork?\" While few studies have revealed the existence of such optimal subnetworks with the guidance of ground-truth unbiased samples, the way to discover the optimal subnetworks with biased training dataset is still unexplored in practice. \nTo address this, here we first present our theoretical insight that alerts potential limitations of existing algorithms in exploring unbiased subnetworks in the presence of strong spurious correlations. We then further elucidate the importance of bias-conflicting samples on structure learning. Motivated by these observations, we propose a Debiased Contrastive Weight Pruning (DCWP) algorithm, which probes unbiased subnetworks without expensive group annotations. Experimental results demonstrate that our approach significantly outperforms state-of-the-art debiasing methods despite its considerable reduction in the number of parameters.", "keywords": "Debiasing;spurious correlation;pruning", "primary_area": "", "supplementary_material": "/attachment/9d8c2016e10f3e6a897b7b3a3181e1c1fac8fc65.zip", "author": "Geon Yeong Park;Sang Wan Lee;Jong Chul Ye", "authorids": "~Geon_Yeong_Park1;~Sang_Wan_Lee1;~Jong_Chul_Ye1", "gender": "M;M;M", "homepage": "https://geonyeong-park.github.io/;https://aibrain.kaist.ac.kr/sang-wan-lee;https://bispl.weebly.com/", "dblp": "289/5924;77/6650;15/5613", "google_scholar": "HGF4a14AAAAJ;0rMoHW4AAAAJ;HNMjoNEAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Geon_Yeong_Park1;~Sang_Wan_Lee1;~Jong_Chul_Ye1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@misc{\npark2023efficient,\ntitle={Efficient debiasing with contrastive weight pruning},\nauthor={Geon Yeong Park and Sang Wan Lee and Jong Chul Ye},\nyear={2023},\nurl={https://openreview.net/forum?id=0DIkhwclYX3}\n}", "github": "", "project": "", "reviewers": "ciCZ;rhqp;jvQr", "site": "https://openreview.net/forum?id=0DIkhwclYX3", "pdf_size": 1258203, "recommendation": "5;5;5", "confidence": "4;4;3", "correctness": "3;3;3", "technical_novelty": "2;3;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "65;37;57", "wc_strength_and_weaknesses": "195;218;245", "wc_clarity_quality_novelty_and_reproducibility": "14;19;18", "wc_summary_review": "45;11;49", "wc_review": "319;285;369", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 53.0, 11.775681155103795 ], "wc_strength_and_weaknesses_avg": [ 219.33333333333334, 20.434176165325468 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 17.0, 2.160246899469287 ], "wc_summary_review_avg": [ 35.0, 17.048949136725895 ], "wc_review_avg": [ 324.3333333333333, 34.49959742116163 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=760531141133985890&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "0DTpO6lLIN", "title": "On the Complexity of Bayesian Generalization", "track": "main", "status": "Withdraw", "tldr": "Correlating the shift between rule- and similarity-based generalization with the subjective complexity of the natural visual world.", "abstract": "We consider the concept generalization at a large scale in a diverse and natural visual spectrum. Established computational modes (\\ie, rule-based or similarity-based) are primarily studied isolated and focus on confined and abstract problem spaces. In this work, we study the two modes when the problem space scales up and the *complexity* of concepts becomes diverse. Specifically, at the **representational level**, we seek to answer how the complexity varies when a visual concept is mapped to the representation space. Prior psychology literature has shown that two types of complexities (*i.e.*, subjective complexity and visual complexity) (Griffiths and Tenenbaum, 2003) build an inverted-U relation (Donderi, 2006; Sun and Firestone, 2021). Leveraging *Representativeness of Attribute* (RoA), we computationally confirm the following observation: Models use attributes with high RoA to describe visual concepts, and the description length falls in an inverted-U relation with the increment on visual complexity. Meanwhile, at the **computational level**, we aim to answer how the complexity of representation affects the shift between the rule- and similarity-based generalization. We hypothesize that category-conditioned visual modeling estimates the co-occurrence frequency between visual and categorical attributes, thus having the potential to serve as the prior for the natural visual world. Experimental results show that representations with relatively high subjective complexity outperform those with relatively low subjective complexity in the rule-based generalization, while the trend is opposite in the similarity-based generalization.", "keywords": "Bayesian Generalization;Rational Analysis", "primary_area": "", "supplementary_material": "/attachment/e52691a97d9872d199e662eea026a67a21ba3ad4.zip", "author": "Yu-Zhe Shi;Manjie Xu;Song-Chun Zhu;John E. Hopcroft;Kun He;Joshua B. Tenenbaum;Ying Nian Wu;Wenjuan Han;Yixin Zhu", "authorids": "~Yu-Zhe_Shi1;~Manjie_Xu1;~Song-Chun_Zhu1;~John_E._Hopcroft1;~Kun_He1;~Joshua_B._Tenenbaum1;~Ying_Nian_Wu1;~Wenjuan_Han1;~Yixin_Zhu1", "gender": "M;M;M;M;F;;F;M;M", "homepage": "https://yuzheshi.github.io/;https://mjtsu.github.io;https://zhusongchun.net/;http://www.cs.cornell.edu/jeh/;http://faculty.hust.edu.cn/hekun/zh_CN/more/1411001/jsjjgd/index.htm;;https://scholar.google.com/citations?user=rfVLLfAAAAAJ;https://yzhu.io/;http://www.stat.ucla.edu/~ywu/", "dblp": "334/2089;322/5851;10/10313;h/JohnEHopcroft;59/1028-1;t/JoshuaBTenenbaum;188/9071;91/1103-1.html;18/568.html", "google_scholar": ";j-WwUGEAAAAJ;https://scholar.google.com.tw/citations?user=Al8dyb4AAAAJ;4Z6vo5QAAAAJ;YTQnGJsAAAAJ;;rfVLLfAAAAAJ;qG9l6JEAAAAJ;7k_1QFIAAAAJ", "orcid": "0000-0003-2066-005X;;;0000-0001-8681-6075;0000-0001-7627-4604;;0000-0002-2327-0842;0000-0001-7024-1545;", "linkedin": ";;;;;;;;", "or_profile": "~Yu-Zhe_Shi1;~Manjie_Xu1;~Song-Chun_Zhu1;~John_E._Hopcroft1;~Kun_He1;~Joshua_B._Tenenbaum1;~Wenjuan_Han1;~Yixin_Zhu1;~Yingnian_Wu1", "aff": "PersLab Research;Tencent AI Lab;Peking University;Department of Computer Science, Cornell University;Huazhong University of Sceince and Technology;Massachusetts Institute of Technology;Beijing Jiaotong University;Peking University;UCLA", "aff_domain": "perslab.co;tencent.com;pku.edu.cn;cs.cornell.edu;hust.edu.cn;mit.edu;bjtu.edu.cn;pku.edu.cn;stat.ucla.edu", "position": "Researcher;Intern;Full Professor;Full Professor;Full Professor;Professor;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@misc{\nshi2023on,\ntitle={On the Complexity of Bayesian Generalization},\nauthor={Yu-Zhe Shi and Manjie Xu and Song-Chun Zhu and John E. Hopcroft and Kun He and Joshua B. Tenenbaum and Ying Nian Wu and Wenjuan Han and Yixin Zhu},\nyear={2023},\nurl={https://openreview.net/forum?id=0DTpO6lLIN}\n}", "github": "", "project": "", "reviewers": "cJCK;esQf;qBTu;P7Vi", "site": "https://openreview.net/forum?id=0DTpO6lLIN", "pdf_size": 7976117, "recommendation": "3;3;3;5", "confidence": "2;3;3;1", "correctness": "2;2;2;3", "technical_novelty": "4;2;3;2", "empirical_novelty": "4;0;3;2", "wc_summary_paper": "40;226;182;115", "wc_strength_and_weaknesses": "414;648;636;52", "wc_clarity_quality_novelty_and_reproducibility": "123;64;41;14", "wc_summary_review": "46;102;53;30", "wc_review": "623;1040;912;211", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 1.479019945774904 ], "wc_summary_paper_avg": [ 140.75, 70.32558211632521 ], "wc_strength_and_weaknesses_avg": [ 437.5, 241.28561913218118 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.5, 40.190172928217166 ], "wc_summary_review_avg": [ 57.75, 26.873546472321067 ], "wc_review_avg": [ 696.5, 318.41207577602955 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.8703882797784891, "corr_recommendation_correctness": 1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10153808080258812487&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff_unique_index": "0;1;2;3;4;5;6;2;7", "aff_unique_norm": "PersLab Research;Tencent;Peking University;Cornell University;Huazhong University of Science and Technology;Massachusetts Institute of Technology;Beijing Jiao Tong University;University of California, Los Angeles", "aff_unique_dep": ";Tencent AI Lab;;Department of Computer Science;;;;", "aff_unique_url": ";https://ai.tencent.com;http://www.pku.edu.cn;https://www.cornell.edu;http://www.hust.edu.cn;https://web.mit.edu;http://www.njtu.edu.cn/en;https://www.ucla.edu", "aff_unique_abbr": ";Tencent AI Lab;Peking U;Cornell;HUST;MIT;BJTU;UCLA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "1;1;2;1;2;1;1;2", "aff_country_unique": ";China;United States" }, { "id": "0DwzMsUNIr", "title": "From Points to Functions: Infinite-dimensional Representations in Diffusion Models", "track": "main", "status": "Reject", "tldr": "We perform an analysis on the trajectory-based representation obtained from Diffusion Based Representation Learning to measure how different points of the trajectory encode semantically different information.", "abstract": "Diffusion-based generative models learn to iteratively transfer unstructured noise to a complex target distribution as opposed to Generative Adversarial Networks (GANs) or the decoder of Variational Autoencoders (VAEs) which produce samples from the target distribution in a single step. Thus, in diffusion models every sample is naturally connected to a random trajectory which is a solution to a learned stochastic differential equation (SDE). Generative models are only concerned with the final state of this trajectory that delivers samples from the desired distribution. \\cite{abstreiter2021diffusion} showed that these stochastic trajectories can be seen as continuous filters that wash out information along the way. Consequently, it is reasonable to ask if there is an intermediate time step at which the preserved information is optimal for a given downstream task. In this work, we show that a combination of information content from different time steps gives a strictly better representation for the downstream task. We introduce an attention and recurrence based modules that ``learn to mix'' information content of various time-steps such that the resultant representation leads to superior performance in downstream tasks.\n", "keywords": "representation learning;diffusion models;score-based learning", "primary_area": "", "supplementary_material": "/attachment/1bb55b7711bbc7936a9c712915b545b5a299455e.zip", "author": "Sarthak Mittal;Guillaume Lajoie;Stefan Bauer;Arash Mehrjou", "authorids": "~Sarthak_Mittal1;~Guillaume_Lajoie1;~Stefan_Bauer1;~Arash_Mehrjou1", "gender": "M;M;;M", "homepage": "https://sarthmit.github.io/;https://dms.umontreal.ca/~lajoie/;https://cifar.ca/bios/stefan-bauer/;https://distantvantagepoint.com", "dblp": "228/8275;31/10384;;174/1295", "google_scholar": "FGGgTrcAAAAJ;;O-oICE8AAAAJ;pnypNygAAAAJ", "orcid": ";;;0000-0002-3832-7784", "linkedin": ";;;arash-mehrjou/", "or_profile": "~Sarthak_Mittal1;~Guillaume_Lajoie1;~Stefan_Bauer1;~Arash_Mehrjou1", "aff": "University of Montreal;Mila - Quebec Artificial Intelligence Institute;KTH Royal Institute of Technology;GlaxoSmithKlein", "aff_domain": "umontreal.ca;mila.quebec;kth.se;gsk.ai", "position": "PhD student;Associate Professor;Assistant Professor;Researcher", "bibtex": "@misc{\nmittal2023from,\ntitle={From Points to Functions: Infinite-dimensional Representations in Diffusion Models},\nauthor={Sarthak Mittal and Guillaume Lajoie and Stefan Bauer and Arash Mehrjou},\nyear={2023},\nurl={https://openreview.net/forum?id=0DwzMsUNIr}\n}", "github": "", "project": "", "reviewers": "ZUwW;tLzz;WYmT;PmAJ", "site": "https://openreview.net/forum?id=0DwzMsUNIr", "pdf_size": 3225151, "recommendation": "3;3;5;5", "confidence": "4;3;3;4", "correctness": "3;3;4;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "135;109;95;58", "wc_strength_and_weaknesses": "153;317;239;272", "wc_clarity_quality_novelty_and_reproducibility": "37;30;63;164", "wc_summary_review": "18;59;101;65", "wc_review": "343;515;498;559", "wc_reply_reviewers": "117;0;0;0", "wc_reply_authors": "484;836;463;881", "reply_reviewers": "1;0;0;0", "reply_authors": "2;2;1;2", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 99.25, 27.806249297594956 ], "wc_strength_and_weaknesses_avg": [ 245.25, 60.02655662288151 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.5, 53.67727638395972 ], "wc_summary_review_avg": [ 60.75, 29.448047473474364 ], "wc_review_avg": [ 478.75, 81.47507287508247 ], "wc_reply_reviewers_avg": [ 29.25, 50.66248612138966 ], "wc_reply_authors_avg": [ 666.0, 193.29899120274789 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15419772278250526967&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Montreal;Quebec Artificial Intelligence Institute;KTH Royal Institute of Technology;GlaxoSmithKline", "aff_unique_dep": ";Artificial Intelligence;;", "aff_unique_url": "https://wwwumontreal.ca;https://mila.quebec;https://www.kth.se;https://www.gsk.com", "aff_unique_abbr": "UM;Mila;KTH;GSK", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "Canada;Sweden;United Kingdom" }, { "id": "0Hfv9xPBSPQ", "title": "Do We Really Need Labels for Backdoor Defense?", "track": "main", "status": "Reject", "tldr": "", "abstract": "Since training a model from scratch always requires massive computational resources recently, it has become popular to download pre-trained backbones from third-party platforms and deploy them in various downstream tasks. While providing some convenience, it also introduces potential security risks like backdoor attacks, which lead to target misclassification for any input image with a specifically defined trigger (i.e., backdoored examples). Current backdoor defense methods always rely on clean labeled data, which indicates that safely deploying the pre-trained model in downstream tasks still demands these costly or hard-to-obtain labels. In this paper, we focus on how to purify a backdoored backbone with only unlabeled data. To evoke the backdoor patterns without labels, we propose to leverage the unsupervised contrastive loss to search for backdoors in the feature space. Surprisingly, we find that we can mimic backdoored examples with adversarial examples crafted by contrastive loss, and erase them with adversarial finetuning. Thus, we name our method as Contrastive Backdoor Defense (CBD). Against several backdoored backbones from both supervised and self-supervised learning, extensive experiments demonstrate our unsupervised method achieves comparable or even better defense compared to these supervised backdoor defense methods. Thus, our method allows practitioners to safely deploy pre-trained backbones on downstream tasks without extra labeling costs.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Zidi Xiong;Dongxian Wu;Yifei Wang;Yisen Wang", "authorids": "~Zidi_Xiong2;~Dongxian_Wu1;~Yifei_Wang1;~Yisen_Wang1", "gender": "M;M;M;M", "homepage": "https://polaris-73.github.io/;;https://yifeiwang77.com;https://yisenwang.github.io/", "dblp": "314/6808;259/1755;00/555-1;172/1346-1", "google_scholar": "XL6QafwAAAAJ;ZQzqQqwAAAAJ;-CLy6YsAAAAJ;uMWPDboAAAAJ", "orcid": ";;;", "linkedin": "https://www.linkedin.com/public-profile/settings;;;", "or_profile": "~Zidi_Xiong2;~Dongxian_Wu1;~Yifei_Wang1;~Yisen_Wang1", "aff": "Department of Computer Science, University of Illinois at Urbana-Champaign;The University of Tokyo;Peking University;Peking University", "aff_domain": "cs.illinois.edu;u-tokyo.ac.jp;pku.edu.cn;pku.edu.cn", "position": "Undergrad student;Postdoc;PhD student;Assistant Professor", "bibtex": "@misc{\nxiong2023do,\ntitle={Do We Really Need Labels for Backdoor Defense?},\nauthor={Zidi Xiong and Dongxian Wu and Yifei Wang and Yisen Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=0Hfv9xPBSPQ}\n}", "github": "", "project": "", "reviewers": "4Caq;WhiY;eqcZ", "site": "https://openreview.net/forum?id=0Hfv9xPBSPQ", "pdf_size": 694966, "recommendation": "3;5;5", "confidence": "4;4;3", "correctness": "2;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "99;106;126", "wc_strength_and_weaknesses": "243;220;247", "wc_clarity_quality_novelty_and_reproducibility": "118;19;155", "wc_summary_review": "33;22;97", "wc_review": "493;367;625", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "924;1012;1053", "reply_reviewers": "0;0;0", "reply_authors": "2;2;2", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 110.33333333333333, 11.440668201153676 ], "wc_strength_and_weaknesses_avg": [ 236.66666666666666, 11.897712198383164 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 97.33333333333333, 57.41273571449302 ], "wc_summary_review_avg": [ 50.666666666666664, 33.06895153396242 ], "wc_review_avg": [ 495.0, 105.33755265810954 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 996.3333333333334, 53.81656085464978 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:u5kV9m4t66cJ:scholar.google.com/&scioq=Do+We+Really+Need+Labels+for+Backdoor+Defense%3F&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Tokyo;Peking University", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": "https://illinois.edu;https://www.u-tokyo.ac.jp;http://www.pku.edu.cn", "aff_unique_abbr": "UIUC;UTokyo;Peking U", "aff_campus_unique_index": "0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "United States;Japan;China" }, { "title": "Targeted Hyperparameter Optimization with Lexicographic Preferences Over Multiple Objectives", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11310", "id": "0Ij9_q567Ma", "poster": "", "openreview": "https://openreview.net/forum?id=0Ij9_q567Ma", "slides": "https://iclr.cc/virtual/2023/poster/11310", "video": "https://iclr.cc/virtual/2023/poster/11310", "author_site": "Shaokun Zhang, Feiran Jia, Chi Wang, Qingyun Wu", "tldr": "Hyperparameter tuning under lexicographic preference", "abstract": "Motivated by various practical applications, we propose a novel and general formulation of targeted multi-objective hyperparameter optimization. Our formulation allows a clear specification of an automatable optimization goal using lexicographic preference over multiple objectives. We then propose a randomized directed search method named LexiFlow to solve this problem. We demonstrate the strong empirical performance of the proposed algorithm in multiple hyperparameter optimization tasks.", "keywords": "Automatic Machine learning;Hyperparameter tuning;Lexicographic preference", "primary_area": "", "supplementary_material": "/attachment/f6ab1cbadf61d2b6bfb1cd40de5ad4a7812898a0.zip", "author": "Shaokun Zhang;Feiran Jia;Chi Wang;Qingyun Wu", "authorids": "~Shaokun_Zhang2;~Feiran_Jia1;~Chi_Wang3;~Qingyun_Wu2", "gender": ";F;M;", "homepage": ";https://feiran.io;http://chiwang.cc;", "dblp": ";277/9284;09/404-1;", "google_scholar": ";haBpKDQAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;;", "linkedin": ";jiafeiran/;chi-wang-autogen/;", "or_profile": "~Shaokun_Zhang2;~Feiran_Jia1;~Chi_Wang3;~Qingyun_Wu2", "aff": ";Pennsylvania State University;Microsoft Research;", "aff_domain": ";psu.edu;microsoft.com;", "position": ";PhD student;Principal Researcher;", "bibtex": "@inproceedings{\nzhang2023targeted,\ntitle={Targeted Hyperparameter Optimization with Lexicographic Preferences Over Multiple Objectives},\nauthor={Shaokun Zhang and Feiran Jia and Chi Wang and Qingyun Wu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=0Ij9_q567Ma}\n}", "github": "", "project": "", "reviewers": "NjRw;Fdr2;GSAk", "pdf_size": 1698367, "recommendation": "8;8;8", "confidence": "4;4;4", "correctness": "3;3;4", "technical_novelty": "3;4;4", "empirical_novelty": "3;4;4", "wc_summary_paper": "167;136;68", "wc_strength_and_weaknesses": "402;341;79", "wc_clarity_quality_novelty_and_reproducibility": "81;84;88", "wc_summary_review": "56;33;31", "wc_review": "706;594;266", "wc_reply_reviewers": "20;41;0", "wc_reply_authors": "502;878;257", "reply_reviewers": "1;1;0", "reply_authors": "1;2;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 123.66666666666667, 41.34677200889515 ], "wc_strength_and_weaknesses_avg": [ 274.0, 140.11661809602268 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 84.33333333333333, 2.8674417556808756 ], "wc_summary_review_avg": [ 40.0, 11.343133018115703 ], "wc_review_avg": [ 522.0, 186.70475801828582 ], "wc_reply_reviewers_avg": [ 20.333333333333332, 16.73983937265296 ], "wc_reply_authors_avg": [ 545.6666666666666, 255.39555377666406 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12905519458032890836&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=0Ij9_q567Ma", "email": ";psu.edu;microsoft.com;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Pennsylvania State University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.psu.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "PSU;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "0JD3EN75NJE", "title": "Understanding and Mitigating Robust Overfitting through the Lens of Feature Dynamics", "track": "main", "status": "Reject", "tldr": "", "abstract": "Adversarial Training (AT) has become arguably the state-of-the-art algorithm for extracting robust features. However, researchers recently notice that AT suffers from severe robust overfitting problems, particularly after the learning rate (LR) decay, while the existing static view of feature robustness fails to explain this phenomenon. In this paper, we propose a new dynamic feature robustness framework which takes the dynamic interplay between the model trainer and the attacker into consideration. By tracing temporal and dataset-specific feature robustness, we develop a new understanding of robust overfitting from the dynamics of non-robust features, and empirically verify it on real-world datasets. Built upon this understanding, we explore three techniques to restore the balance between the model trainer and the attacker, and show that they could effectively alleviate robust overfitting and attain state-of-the-art robustness on benchmark datasets. Notably, different from previous studies, our interpretation highlights the necessity of considering the min-max nature of AT for robust overfitting.\n", "keywords": "Adversarial Training;Robust Overfitting;Generalization;Robustness;Adversarial Attack", "primary_area": "", "supplementary_material": "/attachment/44f9891c38032b8af6f1d0218305b211f76b1ebf.zip", "author": "Yifei Wang;Liangchen Li;Yisen Wang;Jiansheng Yang;Zhouchen Lin", "authorids": "~Yifei_Wang1;~Liangchen_Li1;~Yisen_Wang1;yjs@math.pku.edu.cn;~Zhouchen_Lin1", "gender": "M;;M;;M", "homepage": "https://yifeiwang77.com;;https://yisenwang.github.io/;;https://zhouchenlin.github.io", "dblp": "00/555-1;;172/1346-1;;l/ZhouchenLin", "google_scholar": "-CLy6YsAAAAJ;;uMWPDboAAAAJ;;https://scholar.google.com.tw/citations?user=TanjFwoAAAAJ", "orcid": ";;;;0000-0003-1493-7569", "linkedin": ";;;;", "or_profile": "~Yifei_Wang1;~Liangchen_Li1;~Yisen_Wang1;yjs@math.pku.edu.cn;~Zhouchen_Lin1", "aff": "Peking University;;Peking University;;Peking University", "aff_domain": "pku.edu.cn;;pku.edu.cn;;pku.edu.cn", "position": "PhD student;;Assistant Professor;;Professor", "bibtex": "@misc{\nwang2023understanding,\ntitle={Understanding and Mitigating Robust Overfitting through the Lens of Feature Dynamics},\nauthor={Yifei Wang and Liangchen Li and Yisen Wang and Jiansheng Yang and Zhouchen Lin},\nyear={2023},\nurl={https://openreview.net/forum?id=0JD3EN75NJE}\n}", "github": "", "project": "", "reviewers": "p7yt;zGeX;vLyN;kLGk;yb7Z", "site": "https://openreview.net/forum?id=0JD3EN75NJE", "pdf_size": 8451150, "recommendation": "3;5;6;6;8", "confidence": "2;3;3;4;4", "correctness": "1;2;3;3;3", "technical_novelty": "2;2;3;2;3", "empirical_novelty": "2;2;2;3;3", "wc_summary_paper": "78;399;112;77;87", "wc_strength_and_weaknesses": "371;259;97;262;429", "wc_clarity_quality_novelty_and_reproducibility": "23;182;143;11;6", "wc_summary_review": "58;322;40;7;20", "wc_review": "530;1162;392;357;542", "wc_reply_reviewers": "133;0;25;88;36", "wc_reply_authors": "2745;5024;593;2019;1349", "reply_reviewers": "1;0;1;1;1", "reply_authors": "5;9;1;4;4", "recommendation_avg": [ 5.6, 1.624807680927192 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "correctness_avg": [ 2.4, 0.8 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 150.6, 124.84005767380918 ], "wc_strength_and_weaknesses_avg": [ 283.6, 113.72880022228318 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.0, 74.31554346164738 ], "wc_summary_review_avg": [ 89.4, 117.58843480546885 ], "wc_review_avg": [ 596.6, 292.01479414577614 ], "wc_reply_reviewers_avg": [ 56.4, 47.85227267330152 ], "wc_reply_authors_avg": [ 2346.0, 1516.8936679939038 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 4.6, 2.5768197453450252 ], "replies_avg": [ 36, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.8882347881956882, "corr_recommendation_correctness": 0.8924133096001623, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4s0MmSYE7HwJ:scholar.google.com/&scioq=Understanding+and+Mitigating+Robust+Overfitting+through+the+Lens+of+Feature+Dynamics&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "0KfAZAyClG1", "title": "Why pseudo-label based algorithm is effective? --from the perspective of pseudo-labeled data", "track": "main", "status": "Withdraw", "tldr": "Theoretical analysis of the superiority of pseudo-label based semi-supervised learning algorithm --from the perspective of pseudo-labeled data", "abstract": "Recently, pseudo label based semi-supervised learning has achieved great success in many fields. The core idea of the pseudo label based semi-supervised learning algorithm is to use the model trained on the labeled data to generate pseudo labels on the unlabeled data, and then train a model to fit the previously generated pseudo labels. We give a theory analysis for why pseudo label based semi-supervised learning is effective in this paper. We mainly compare the generalization error of the model trained under two settings: (1) There are $N$ labeled data. (2) There are $N$ unlabeled data and a suitable initial model. Our analysis shows that, firstly, when the amount of unlabeled data tends to infinity, the pseudo label based semi-supervised learning algorithm can obtain model which have the same generalization error upper bound as model obtained by normally training in the condition of the amount of labeled data tends to infinity. More importantly, we prove that when the amount of unlabeled data is large enough, the generalization error upper bound of the model obtained by pseudo label based semi-supervised learning algorithm can converge to the optimal upper bound with linear convergence rate. We also give the lower bound on sampling complexity to achieve linear convergence rate. Our analysis contributes to understanding the empirical successes of pseudo label-based semi-supervised learning.", "keywords": "pseudo-label based algorithm;semi-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Zeping Min;Cheng Tai", "authorids": "~Zeping_Min1;~Cheng_Tai1", "gender": ";", "homepage": ";", "dblp": ";139/1280", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Zeping_Min1;~Cheng_Tai1", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@misc{\nmin2023why,\ntitle={Why pseudo-label based algorithm is effective? --from the perspective of pseudo-labeled data},\nauthor={Zeping Min and Cheng Tai},\nyear={2023},\nurl={https://openreview.net/forum?id=0KfAZAyClG1}\n}", "github": "", "project": "", "reviewers": "i6LM;dkxs;qh5j;DuKf", "site": "https://openreview.net/forum?id=0KfAZAyClG1", "pdf_size": 341141, "recommendation": "3;3;5;6", "confidence": "4;3;2;3", "correctness": "2;3;3;4", "technical_novelty": "3;3;2;3", "empirical_novelty": "0;2;0;0", "wc_summary_paper": "77;81;54;154", "wc_strength_and_weaknesses": "388;75;203;177", "wc_clarity_quality_novelty_and_reproducibility": "57;119;7;63", "wc_summary_review": "65;34;20;35", "wc_review": "587;309;284;429", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 0.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 91.5, 37.526657191921586 ], "wc_strength_and_weaknesses_avg": [ 210.75, 112.96542612675792 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.5, 39.68311983702894 ], "wc_summary_review_avg": [ 38.5, 16.408839081421938 ], "wc_review_avg": [ 402.25, 119.92367364286336 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5443310539518174, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=730778771867297112&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0 }, { "id": "0L8tuglXJaW", "title": "HOYER REGULARIZER IS ALL YOU NEED FOR EXTREMELY SPARSE SPIKING NEURAL NETWORKS", "track": "main", "status": "Reject", "tldr": "", "abstract": "Spiking Neural networks (SNN) have emerged as an attractive spatio-temporal\ncomputing paradigm for a wide range of low-power vision tasks. However, state-\nof-the-art (SOTA) SNN models either incur multiple time steps which hinder their\ndeployment in real-time use cases or increase the training complexity significantly.\nTo mitigate this concern, we present a training framework (from scratch) for one-\ntime-step SNNs that uses a novel variant of the recently proposed Hoyer regularizer. We estimate the threshold of each SNN layer as the Hoyer extremum of a\nclipped version of its activation map, where the clipping threshold is trained using\ngradient descent with our Hoyer regularizer. This approach not only downscales\nthe value of the trainable threshold, thereby emitting a large number of spikes for\nweight update with limited number of iterations (due to only one time step) but\nalso shifts the pre-activation values away from the threshold, thereby mitigating\nthe effect of noise that can degrade the SNN accuracy. Our approach outperforms\nexisting spiking, binary, and adder neural networks in terms of accuracy-FLOPs\ntrade-off for complex image recognition tasks. Downstream experiments on object detection also demonstrate the efficacy of our approach.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/df6fea91b0d6c7d0fe5fad2848b6cf6ae4928c5f.zip", "author": "Gourav Datta;Zeyu Liu;Peter Anthony Beerel", "authorids": "~Gourav_Datta1;~Zeyu_Liu2;~Peter_Anthony_Beerel1", "gender": "M;M;M", "homepage": "https://godatta.github.io;;http://sites.usc.edu/eessc.html", "dblp": "250/9607.html;116/0645-3;29/6330", "google_scholar": "hxSN-fcAAAAJ;Gk5kyEEAAAAJ;JSdH7PsAAAAJ", "orcid": ";;", "linkedin": "gourav-datta-959571a3/;zeyu-liu-364982220/;peter-beerel-b9902a1/", "or_profile": "~Gourav_Datta1;~Zeyu_Liu2;~Peter_Anthony_Beerel1", "aff": "University of Southern California;University of Southern California;University of Southern California", "aff_domain": "usc.edu;usc.edu;usc.edu", "position": "PhD student;MS student;Full Professor", "bibtex": "@misc{\ndatta2023hoyer,\ntitle={{HOYER} {REGULARIZER} {IS} {ALL} {YOU} {NEED} {FOR} {EXTREMELY} {SPARSE} {SPIKING} {NEURAL} {NETWORKS}},\nauthor={Gourav Datta and Zeyu Liu and Peter Anthony Beerel},\nyear={2023},\nurl={https://openreview.net/forum?id=0L8tuglXJaW}\n}", "github": "", "project": "", "reviewers": "YzPx;ujG1;pFTU;hEby;qbCE", "site": "https://openreview.net/forum?id=0L8tuglXJaW", "pdf_size": 3090842, "recommendation": "5;5;5;5;8", "confidence": "5;3;4;4;4", "correctness": "2;3;3;3;3", "technical_novelty": "2;3;2;2;3", "empirical_novelty": "2;3;3;2;3", "wc_summary_paper": "143;119;59;16;36", "wc_strength_and_weaknesses": "237;85;781;502;149", "wc_clarity_quality_novelty_and_reproducibility": "48;22;56;61;60", "wc_summary_review": "92;21;43;22;9", "wc_review": "520;247;939;601;254", "wc_reply_reviewers": "206;85;187;150;13", "wc_reply_authors": "2525;504;1705;888;528", "reply_reviewers": "1;1;1;1;1", "reply_authors": "6;1;3;2;1", "recommendation_avg": [ 5.6, 1.2 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "correctness_avg": [ 2.8, 0.39999999999999997 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_paper_avg": [ 74.6, 48.615223952996445 ], "wc_strength_and_weaknesses_avg": [ 350.8, 257.7816129982897 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.4, 14.44437606821423 ], "wc_summary_review_avg": [ 37.4, 29.41156235224508 ], "wc_review_avg": [ 512.2, 255.7744318730862 ], "wc_reply_reviewers_avg": [ 128.2, 70.8841308051386 ], "wc_reply_authors_avg": [ 1230.0, 779.6786517534003 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6, 1.8547236990991407 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.25000000000000006, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:OnyALMSLDvcJ:scholar.google.com/&scioq=HOYER+REGULARIZER+IS+ALL+YOU+NEED+FOR+EXTREMELY+SPARSE+SPIKING+NEURAL+NETWORKS&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "0LJRS7B3r4_", "title": "Advantage Constrained Proximal Policy Optimization in Multi-Agent Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "A multi-agent policy gradient reinforcement learning based on local advantage constraned.", "abstract": "We explore the value-based method and policy gradient combination in multi-agent reinforcement learning (MARL). In value-based MARL, {\\itshape{Individual-Global-Max}} (IGM) principle plays an important role, which maintains the consistency between joint and local action values. At the same time, IGM is difficult to guarantee in multi-agent policy gradient methods due to stochastic exploration and conflicting gradient directions. In this paper, we propose a novel multi-agent policy gradient algorithm called {\\itshape{Advantage Constrained Proximal Policy Optimization}} (ACPPO). Based on {\\itshape{multi-agent advantage decomposition lemma}}, ACPPO introduces an advantage network for each agent to estimate current local state-action advantage. The coefficient of each agent constrains the joint-action advantage according to the consistency of the estimated joint-action advantage and local advantage. Unlike previous policy gradient-based MARL algorithms, ACPPO does not need an extra sampled baseline to reduce variance. We evaluate the proposed methods for continuous matrix game and Multi-Agent MuJoCo tasks. Results show that ACPPO outperforms the baselines such as MAPPO, MADDPG, and HAPPO.", "keywords": "Multi agent;reinforcement learning;neural network;deep learning;trust region.", "primary_area": "", "supplementary_material": "", "author": "Weifan Li", "authorids": "~Weifan_Li1", "gender": "F", "homepage": "", "dblp": "", "google_scholar": "https://scholar.google.com/citations?view_op=list_works", "orcid": "", "linkedin": "", "or_profile": "~Weifan_Li1", "aff": "", "aff_domain": "", "position": "", "bibtex": "@misc{\nli2023advantage,\ntitle={Advantage Constrained Proximal Policy Optimization in Multi-Agent Reinforcement Learning},\nauthor={Weifan Li},\nyear={2023},\nurl={https://openreview.net/forum?id=0LJRS7B3r4_}\n}", "github": "", "project": "", "reviewers": "gE8o;HtQd;ghg2;U46S", "site": "https://openreview.net/forum?id=0LJRS7B3r4_", "pdf_size": 1987427, "recommendation": "3;3;3;3", "confidence": "4;4;4;4", "correctness": "2;3;2;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;0", "wc_summary_paper": "68;105;55;16", "wc_strength_and_weaknesses": "256;316;81;183", "wc_clarity_quality_novelty_and_reproducibility": "49;13;247;49", "wc_summary_review": "33;33;15;9", "wc_review": "406;467;398;257", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 61.0, 31.804087787578503 ], "wc_strength_and_weaknesses_avg": [ 209.0, 87.63275643274038 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 89.5, 92.11270270706424 ], "wc_summary_review_avg": [ 22.5, 10.712142642814275 ], "wc_review_avg": [ 382.0, 76.94478539836211 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TD0MqC5rCSgJ:scholar.google.com/&scioq=Advantage+Constrained+Proximal+Policy+Optimization+in+Multi-Agent+Reinforcement+Learning&hl=en&as_sdt=0,14", "gs_version_total": 3 }, { "id": "0MqQ88Z2Kta", "title": "Evaluating and Inducing Personality in Pre-trained Language Models", "track": "main", "status": "Reject", "tldr": "We propose the Machine Personality Inventory (MPI) dataset for evaluating the machine personality and devise a Chain Prompting method to induce the language model with a specific personality, capable of producing diversified behaviors.", "abstract": "Originated as a philosophical quest, personality discerns how individuals differ from each other in terms of thinking, feeling, and behaving. Toward building social machines that work with humans on a daily basis, we are motivated to ask: (1) Do existing Large Language Models (LLMs) possess personalities, akin to their human counterparts? (2) If so, how can we evaluate them? (3) Further, given this evaluation framework, how can we induce a certain personality in a fully controllable fashion? To tackle these three questions, we propose the Machine Personality Inventory (MPI) dataset for evaluating the machine personality; MPI follows standardized personality tests, built upon the Big Five Personality Factors (Big Five) theory and personality assessment inventories. By evaluating models with MPI, we provide the first piece of evidence showing the existence of personality in LLMs. We further devise a Chain Prompting method to induce LLMs with a specific personality in a controllable manner, capable of producing diversified behaviors. We hope to shed light on future studies by adopting personality as the essential guide for various downstream tasks, building more human-like and in situ dialogue agents.", "keywords": "machine personality;pre-trained language model;personality trait theory;psychometric inventory;prompt", "primary_area": "", "supplementary_material": "/attachment/826a80339b77152c8ac56d89f02fec02aca0b545.zip", "author": "Guangyuan Jiang;Manjie Xu;Song-Chun Zhu;Wenjuan Han;Chi Zhang;Yixin Zhu", "authorids": "~Guangyuan_Jiang1;~Manjie_Xu1;~Song-Chun_Zhu1;~Wenjuan_Han1;~Chi_Zhang12;~Yixin_Zhu1", "gender": "M;M;M;F;;M", "homepage": "https://jiang.gy/;https://mjtsu.github.io;https://zhusongchun.net/;https://scholar.google.com/citations?user=rfVLLfAAAAAJ;;https://yzhu.io/", "dblp": "322/5214;322/5851;10/10313;188/9071;;91/1103-1.html", "google_scholar": "3L79mEAAAAAJ;j-WwUGEAAAAJ;https://scholar.google.com.tw/citations?user=Al8dyb4AAAAJ;rfVLLfAAAAAJ;;qG9l6JEAAAAJ", "orcid": ";;;0000-0002-2327-0842;;0000-0001-7024-1545", "linkedin": ";;;;;", "or_profile": "~Guangyuan_Jiang1;~Manjie_Xu1;~Song-Chun_Zhu1;~Wenjuan_Han1;~Chi_Zhang12;~Yixin_Zhu1", "aff": "Peking University;Tencent AI Lab;Peking University;Beijing Jiaotong University;;Peking University", "aff_domain": "pku.edu.cn;tencent.com;pku.edu.cn;bjtu.edu.cn;;pku.edu.cn", "position": "Undergrad student;Intern;Full Professor;Associate Professor;;Assistant Professor", "bibtex": "@misc{\njiang2023evaluating,\ntitle={Evaluating and Inducing Personality in Pre-trained Language Models},\nauthor={Guangyuan Jiang and Manjie Xu and Song-Chun Zhu and Wenjuan Han and Chi Zhang and Yixin Zhu},\nyear={2023},\nurl={https://openreview.net/forum?id=0MqQ88Z2Kta}\n}", "github": "", "project": "", "reviewers": "v3tK;YTcu;w6LV;43Q3", "site": "https://openreview.net/forum?id=0MqQ88Z2Kta", "pdf_size": 4178213, "recommendation": "5;6;6;6", "confidence": "4;4;3;3", "correctness": "3;3;3;3", "technical_novelty": "3;3;3;2", "empirical_novelty": "4;3;3;2", "wc_summary_paper": "100;128;107;154", "wc_strength_and_weaknesses": "226;318;295;137", "wc_clarity_quality_novelty_and_reproducibility": "13;167;89;18", "wc_summary_review": "36;67;47;45", "wc_review": "375;680;538;354", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "387;461;786;715", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;2", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 122.25, 21.0282548015759 ], "wc_strength_and_weaknesses_avg": [ 244.0, 70.44501401802685 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 71.75, 62.67126534545158 ], "wc_summary_review_avg": [ 48.75, 11.321991874224253 ], "wc_review_avg": [ 486.75, 132.36573385888056 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 587.25, 167.227950713988 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 143, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12841151047820386225&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Peking University;Tencent;Beijing Jiao Tong University", "aff_unique_dep": ";Tencent AI Lab;", "aff_unique_url": "http://www.pku.edu.cn;https://ai.tencent.com;http://www.njtu.edu.cn/en", "aff_unique_abbr": "Peking U;Tencent AI Lab;BJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "0N66Gl63vq", "title": "FS-DETR: Few-Shot DEtection TRansformer with prompting and without re-training", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "This paper is on Few-Shot Object Detection (FSOD), where given a few templates (examples) depicting a novel class (not seen during training), the goal is to detect all of its occurrences within a set of images. From a practical perspective, an FSOD system must fulfil the following desiderata: (a) it must be used as is, without requiring any fine-tuning at test time, (b) it must be able to process an arbitrary number of novel objects concurrently while supporting an arbitrary number of examples from each class and (c) it must achieve accuracy comparable to a closed system. While there are (relatively) few systems that support (a), to our knowledge, there is no system supporting (b) and (c). In this work, we make the following contributions: We introduce, for the first time, a simple, yet powerful, few-shot detection transformer (FS-DETR) that can address both desiderata (a) and (b). Our system builds upon the DETR framework, extending it based on two key ideas: (1) feed the provided visual templates of the novel classes as visual prompts during test time, and (2) ``stamp'' these prompts with pseudo-class embeddings, which are then predicted at the output of the decoder. Importantly, we show that our system is not only more flexible than existing methods, but also, making a step towards satisfying desideratum (c), it is more accurate, matching and outperforming the current state-of-the-art on the most well-established benchmarks (PASCAL VOC & MSCOCO) for FSOD. Code will be made available.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Adrian Bulat;Ricardo Guerrero;Brais Martinez;Georgios Tzimiropoulos", "authorids": "~Adrian_Bulat1;~Ricardo_Guerrero1;~Brais_Martinez3;~Georgios_Tzimiropoulos1", "gender": ";M;M;M", "homepage": "https://www.adrianbulat.com;;http://www.braismartinez.org/;https://ytzimiro.github.io/", "dblp": "185/6878;76/10169;14/111;03/3273", "google_scholar": "https://scholar.google.co.uk/citations?user=5sKcsg0AAAAJ;https://scholar.google.co.uk/citations?user=bn2TAy8AAAAJ;https://scholar.google.co.uk/citations?user=-62MApgAAAAJ;https://scholar.google.co.uk/citations?user=D4JkWxf-8fwC", "orcid": "0000-0002-3185-4979;;;", "linkedin": ";;;", "or_profile": "~Adrian_Bulat1;~Ricardo_Guerrero1;~Brais_Martinez3;~Georgios_Tzimiropoulos1", "aff": "Samsung AI Center Cambridge;Samsung AI Center;Samsung;Queen Mary University London", "aff_domain": "samsung.com;samsung.com;samsung.com;qmul.ac.uk", "position": "Research Scientist;Senior Researcher;Samsung AI Center;Associate Professor", "bibtex": "@misc{\nbulat2023fsdetr,\ntitle={{FS}-{DETR}: Few-Shot {DE}tection {TR}ansformer with prompting and without re-training},\nauthor={Adrian Bulat and Ricardo Guerrero and Brais Martinez and Georgios Tzimiropoulos},\nyear={2023},\nurl={https://openreview.net/forum?id=0N66Gl63vq}\n}", "github": "", "project": "", "reviewers": "F3H1;RE9a;DR2c;UGfe;BuJR", "site": "https://openreview.net/forum?id=0N66Gl63vq", "pdf_size": 5988389, "recommendation": "3;5;5;5;6", "confidence": "5;4;4;5;3", "correctness": "3;3;2;3;4", "technical_novelty": "2;2;2;3;3", "empirical_novelty": "2;2;3;3;3", "wc_summary_paper": "85;89;81;81;27", "wc_strength_and_weaknesses": "409;480;318;196;238", "wc_clarity_quality_novelty_and_reproducibility": "20;48;11;27;27", "wc_summary_review": "22;35;21;22;25", "wc_review": "536;652;431;326;317", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "1650;752;560;353;599", "reply_reviewers": "0;0;0;0;0", "reply_authors": "4;2;2;2;2", "recommendation_avg": [ 4.8, 0.9797958971132712 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_paper_avg": [ 72.6, 22.992172581119863 ], "wc_strength_and_weaknesses_avg": [ 328.2, 105.1558842861397 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.6, 12.208193969625482 ], "wc_summary_review_avg": [ 25.0, 5.176871642217914 ], "wc_review_avg": [ 452.4, 127.74756357754929 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 782.8, 451.9147707256314 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.4, 0.8000000000000002 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.7637626158259733, "corr_recommendation_correctness": 0.3227486121839514, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4463657962532215023&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Samsung;Queen Mary University of London", "aff_unique_dep": "AI Center;", "aff_unique_url": "https://www.samsung.com/global/innovation/ai-research/;https://www.qmul.ac.uk", "aff_unique_abbr": "SAC;QMUL", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Cambridge;;London", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United Kingdom;South Korea" }, { "id": "0OlEBibFa_g", "title": "Detecting Out-of-Distribution Data with Semi-supervised Graph \u201cFeature\" Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Anomalous and out-of-distribution (OOD) data present a significant challenge to the robustness of decisions taken by deep neural networks, with myriad real-world consequences. State-of-the-art OOD detection techniques use embeddings learned by large pre-trained transformers. We demonstrate that graph structures and topological properties can be leveraged to detect both far-OOD and near-OOD data reliably, simply by characterising each data point (image) as a network of related features (visual concepts). Furthermore, we facilitate human-in-the-loop machine learning by expressing this data to comprise high-level domain-specific concepts. We obtained \\textit{97.95\\% AUROC} on far-OOD and \\textit{98.79\\% AUROC} on near-OOD detection tasks based on the LSUN dataset (comparable to the performance of state-of-the-art techniques).", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Debargha Ganguly;Debayan Gupta", "authorids": "~Debargha_Ganguly1;~Debayan_Gupta1", "gender": "M;M", "homepage": "https://debargha.com;https://www.ashoka.edu.in/profile/debayan-gupta/", "dblp": "367/0168;121/4322.html", "google_scholar": "mzCrbIIAAAAJ;Z16kmr8AAAAJ", "orcid": "0000-0001-5662-1592;0000-0002-4457-1556", "linkedin": "debargha-ganguly/;debayang", "or_profile": "~Debargha_Ganguly1;~Debayan_Gupta1", "aff": "Case Western Reserve University;Ashoka University", "aff_domain": "case.edu;ashoka.edu.in", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nganguly2023detecting,\ntitle={Detecting Out-of-Distribution Data with Semi-supervised Graph {\\textquotedblleft}Feature'' Networks},\nauthor={Debargha Ganguly and Debayan Gupta},\nyear={2023},\nurl={https://openreview.net/forum?id=0OlEBibFa_g}\n}", "github": "", "project": "", "reviewers": "ox4H;qAw3;sju8;cXz2", "site": "https://openreview.net/forum?id=0OlEBibFa_g", "pdf_size": 685644, "recommendation": "1;3;3;5", "confidence": "5;5;3;3", "correctness": "1;3;2;3", "technical_novelty": "1;3;2;3", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "65;41;63;56", "wc_strength_and_weaknesses": "158;149;157;288", "wc_clarity_quality_novelty_and_reproducibility": "42;231;29;127", "wc_summary_review": "21;69;17;86", "wc_review": "286;490;266;557", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 1.0 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 56.25, 9.41740410091868 ], "wc_strength_and_weaknesses_avg": [ 188.0, 57.84029737129642 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 107.25, 80.75386987630995 ], "wc_summary_review_avg": [ 48.25, 29.894606536965828 ], "wc_review_avg": [ 399.75, 126.19503754110143 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.7071067811865476, "corr_recommendation_correctness": 0.8528028654224418, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mLx9o_uDssEJ:scholar.google.com/&scioq=Detecting+Out-of-Distribution+Data+with+Semi-supervised+Graph+%E2%80%9CFeature%22+Networks&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Case Western Reserve University;Ashoka University", "aff_unique_dep": ";", "aff_unique_url": "https://www.case.edu;https://www.ashoka.edu.in", "aff_unique_abbr": "CWRU;Ashoka", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;India" }, { "id": "0PH-P_FIqGD", "title": "Compact Bilinear Pooling via General Bilinear Projection", "track": "main", "status": "Reject", "tldr": "We proposed a general bilinear projection based on complete matrix bases, and then we design a compact bilinear pooling algorithm by using the proposed general bilinear pooling.", "abstract": "Most factorized bilinear pooling (FBiP) employs Hadamard product-based bilinear projection to learn appropriate projecting directions to reduce the dimension of bilinear features. However, in this paper, we reveal that the Hadamard product-based bilinear projection makes FBiP miss a lot of possible projecting directions, which will significantly harm the performance of outputted compact bilinear features, including compactness and effectiveness. To address this issue, we propose a general matrix-based bilinear projection based on the rank-$k$ matrix base decomposition, where the Hadamard-based bilinear projection is a special case of our proposed one. Using the proposed bilinear projection, we design a novel low-rank factorized bilinear pooling (named RK-FBP), which does not miss any projecting directions. Thus, our RK-FBP can generate better compact bilinear features. To leverage high-order information in local features, we nest several RK-FBP modules together to formulate a multi-linear pooling that outputs compact multi-linear features. At last, we conduct experiments on several fine-grained image tasks to evaluate our models. The experiments show that our models achieve new state-of-the-art classification accuracy by the lowest dimension.", "keywords": "Bilinear Pooling;Bilinear Projection;fine-grained recognition", "primary_area": "", "supplementary_material": "/attachment/e2cc47adf5c65bc62829d447377b2ba5ed7f8f3e.zip", "author": "Kun Song;Junwei Han;Feiping Nie;Gong Cheng;Bin Gu;Fakhri Karray", "authorids": "~Kun_Song2;~Junwei_Han1;~Feiping_Nie2;~Gong_Cheng2;~Bin_Gu1;fakhri.karray@mbzuai.ac.ae", "gender": ";;M;M;M;", "homepage": ";;https://dblp.org/pid/80/5755.html;https://gcheng-nwpu.github.io/;https://mbzuai.ac.ae/study/faculty/bin-gu/;", "dblp": ";;;69/1215-3.html;29/1758-1;", "google_scholar": ";;;dw1n0vIAAAAJ;Vo8OgCgAAAAJ;", "orcid": ";;;0000-0001-5030-0683;0000-0001-6049-1815;", "linkedin": ";;;;;", "or_profile": "~Kun_Song2;~Junwei_Han1;~Feiping_Nie2;~Gong_Cheng2;~Bin_Gu1;fakhri.karray@mbzuai.ac.ae", "aff": ";;Northwest Polytechnical University Xi'an;Northwestern Polytechnical University;Mohamed bin Zayed University of Artificial Intelligence;", "aff_domain": ";;nwpu.edu.cn;nwpu.edu.cn;mbzuai.ac.ae;", "position": ";;Full Professor;Full Professor;Assistant Professor;", "bibtex": "@misc{\nsong2023compact,\ntitle={Compact Bilinear Pooling via General Bilinear Projection},\nauthor={Kun Song and Junwei Han and Feiping Nie and Gong Cheng and Bin Gu and Fakhri Karray},\nyear={2023},\nurl={https://openreview.net/forum?id=0PH-P_FIqGD}\n}", "github": "", "project": "", "reviewers": "xdH1;MKth;yyVh", "site": "https://openreview.net/forum?id=0PH-P_FIqGD", "pdf_size": 1258514, "recommendation": "3;6;6", "confidence": "4;5;4", "correctness": "2;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "115;80;61", "wc_strength_and_weaknesses": "227;299;95", "wc_clarity_quality_novelty_and_reproducibility": "73;38;317", "wc_summary_review": "62;42;32", "wc_review": "477;459;505", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "665;471;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;0", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 85.33333333333333, 22.365648262955002 ], "wc_strength_and_weaknesses_avg": [ 207.0, 84.4748483277715 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 142.66666666666666, 124.09763181552749 ], "wc_summary_review_avg": [ 45.333333333333336, 12.472191289246473 ], "wc_review_avg": [ 480.3333333333333, 18.926759422104517 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 378.6666666666667, 279.22551618519554 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.6666666666666666, 0.4714045207910317 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:EyOBoilBpVsJ:scholar.google.com/&scioq=Compact+Bilinear+Pooling+via+General+Bilinear+Projection&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Northwest Polytechnical University;Northwestern Polytechnical University;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "http://www.nwpu.edu.cn;https://www.nwpu.edu.cn;https://mbzuai.ac.ae", "aff_unique_abbr": "NWPU;NWPU;MBZUAI", "aff_campus_unique_index": "0", "aff_campus_unique": "Xi'an;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United Arab Emirates" }, { "title": "Deep Learning meets Nonparametric Regression: Are Weight-Decayed DNNs Locally Adaptive?", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11989", "id": "0Q9H_Pgx132", "poster": "/media/PosterPDFs/ICLR%202023/11989.png?t=1681940348.4388313", "openreview": "https://openreview.net/forum?id=0Q9H_Pgx132", "slides": "https://iclr.cc/virtual/2023/poster/11989", "video": "https://iclr.cc/virtual/2023/poster/11989", "author_site": "Kaiqi Zhang, Yu-Xiang Wang", "tldr": "Parallel NN with only weight decay achieves an estimation error close to the minimax rates for both the Besov and BV classes.", "abstract": "We study the theory of neural network (NN) from the lens of classical nonparametric regression problems with a focus on NN\u2019s ability to adaptively estimate functions with heterogeneous smoothness \u2014 a property of functions in Besov or Bounded Variation (BV) classes. Existing work on this problem requires tuning the NN architecture based on the function spaces and sample sizes. We consider a \u201cParallel NN\u201d variant of deep ReLU networks and show that the standard weight decay is equivalent to promoting the \u2113p -sparsity (0 < p < 1) of the coefficient vector of an end-to-end learned function bases, i.e., a dictionary. Using this equivalence, we further establish that by tuning only the weight decay, such Parallel NN achieves an estimation error arbitrarily close to the minimax rates for both the Besov and BV classes. Notably, it gets exponentially closer to minimax optimal as the NN gets deeper. Our research sheds new lights on why depth matters and how NNs are more powerful than kernel methods", "keywords": "Neural network;nonparametric regression;minimax optimal", "primary_area": "", "supplementary_material": "/attachment/065fd27e08926b6b4d13caf97e3bef14e26e173e.zip", "author": "Kaiqi Zhang;Yu-Xiang Wang", "authorids": "~Kaiqi_Zhang2;~Yu-Xiang_Wang1", "gender": "M;", "homepage": ";http://www.cs.ucsb.edu/~yuxiangw/publications.html", "dblp": ";62/1637-3.html", "google_scholar": "XA7np8gAAAAJ;HGNZ1fkAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Kaiqi_Zhang2;~Yu-Xiang_Wang1", "aff": "UC Santa Barbara;UC Santa Barbara", "aff_domain": "ucsb.edu;ucsb.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023deep,\ntitle={Deep Learning meets Nonparametric Regression: Are Weight-Decayed {DNN}s Locally Adaptive?},\nauthor={Kaiqi Zhang and Yu-Xiang Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=0Q9H_Pgx132}\n}", "github": "", "project": "", "reviewers": "ikZk;wKTh;YNWp;UB7m", "pdf_size": 1375459, "recommendation": "5;6;6;6", "confidence": "4;4;2;3", "correctness": "2;4;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "0;3;3;2", "wc_summary_paper": "57;49;87;252", "wc_strength_and_weaknesses": "354;187;546;41", "wc_clarity_quality_novelty_and_reproducibility": "89;131;11;16", "wc_summary_review": "44;41;52;16", "wc_review": "544;408;696;325", "wc_reply_reviewers": "435;0;0;0", "wc_reply_authors": "1129;387;533;242", "reply_reviewers": "2;0;0;0", "reply_authors": "3;1;1;1", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 111.25, 82.48749905288679 ], "wc_strength_and_weaknesses_avg": [ 282.0, 188.40514854960838 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.75, 50.514230668198834 ], "wc_summary_review_avg": [ 38.25, 13.460590625971804 ], "wc_review_avg": [ 493.25, 140.76465287848367 ], "wc_reply_reviewers_avg": [ 108.75, 188.3605253231154 ], "wc_reply_authors_avg": [ 572.75, 337.22868724353805 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5222329678670935, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16360783716504106893&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=0Q9H_Pgx132", "email": "ucsb.edu;ucsb.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Santa Barbara", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsb.edu", "aff_unique_abbr": "UCSB", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Santa Barbara", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "0UzYWLzPBjA", "title": "An Intrinsic Dimension Perspective of Transformers for Sequential Modeling", "track": "main", "status": "Withdraw", "tldr": "The analysis of transformers applied to sequential modeling from an perspective of intrinsic dimension.", "abstract": "Transformers have gained great popularity for sequential modeling, especially in fields such as natural language processing (NLP). Recently, numerous architectures based on the Transformer framework are proposed, leading to great achievements in applications. However, the working principles behind still remain mysterious. In this work, we numerically investigate the geometrical properties of data representation learned by Transformers, via a mathematical concept called intrinsic dimension (ID), which can be viewed as the minimal number of parameters required for modeling. A series of experiments, mainly focusing on text classification tasks, backs up the following empirical claims on relationships among embedding dimension, depth, respective ID per layer and tasks performance. First, we surprisingly observe that a higher ID (of terminal features extracted by Transformers) typically implies a lower classification error rate. This is contrary to that of CNNs (or other models) performed on image classification tasks. In addition, it is shown that the ID per layer tends to decrease as the depth increases, and this reduction usually appears more significant for deeper architectures. Moreover, we give numerical evidence on geometrical structures of data representation learned by Transformers, where only the nonlinear dimension reduction can be achieved.\nFinally, we explore the effect of sequential lengths on the ID and tasks performance, which guarantees the validity of data reduction in training. We hope that these findings can play a guiding role in hyper-parameters selection and dimension/data reduction for Transformers on text classification and other mainstream NLP tasks.", "keywords": "intrinsic dimension;transformer;text Classification;NLP", "primary_area": "", "supplementary_material": "", "author": "Zeping Min;Qian Ge;Zhong Li", "authorids": "~Zeping_Min1;~Qian_Ge4;~Zhong_Li2", "gender": ";M;M", "homepage": ";;https://www.microsoft.com/en-us/research/people/lzhong/", "dblp": ";153/5844;", "google_scholar": ";;https://scholar.google.com/citations?view_op=list_works", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zeping_Min1;~Qian_Ge4;~Zhong_Li2", "aff": ";Peking University;Microsoft Research Asia", "aff_domain": ";pku.edu.cn;microsoft.com", "position": ";MS student;Researcher", "bibtex": "@misc{\nmin2023an,\ntitle={An Intrinsic Dimension Perspective of Transformers for Sequential Modeling},\nauthor={Zeping Min and Qian Ge and Zhong Li},\nyear={2023},\nurl={https://openreview.net/forum?id=0UzYWLzPBjA}\n}", "github": "", "project": "", "reviewers": "KrRN;oq7L;NxYz;NkqR", "site": "https://openreview.net/forum?id=0UzYWLzPBjA", "pdf_size": 1073243, "recommendation": "1;3;3;5", "confidence": "4;4;4;3", "correctness": "3;2;2;3", "technical_novelty": "1;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "64;77;80;37", "wc_strength_and_weaknesses": "209;154;79;150", "wc_clarity_quality_novelty_and_reproducibility": "32;67;54;54", "wc_summary_review": "58;21;44;25", "wc_review": "363;319;257;266", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 64.5, 16.977926846349646 ], "wc_strength_and_weaknesses_avg": [ 148.0, 46.15733961137708 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.75, 12.577261228105266 ], "wc_summary_review_avg": [ 37.0, 14.916433890176299 ], "wc_review_avg": [ 301.25, 42.80405938693198 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hSxBdp0VlzoJ:scholar.google.com/&scioq=An+Intrinsic+Dimension+Perspective+of+Transformers+for+Sequential+Modeling&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Peking University;Microsoft", "aff_unique_dep": ";Research", "aff_unique_url": "http://www.pku.edu.cn;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "Peking U;MSR Asia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "0VhwJYrZew", "title": "FINE: Future-Aware Inference for Streaming Speech Translation", "track": "main", "status": "Reject", "tldr": "Future-aware inference for streaming speech translation", "abstract": "A popular approach to streaming speech translation is to employ a single offline model together with a \\textit{wait-$k$} policy to support different latency requirements. It is a simpler alternative compared to training multiple online models with different latency constraints. However, there is an apparent mismatch in using a model trained with complete utterances on partial streaming speech during online inference. We demonstrate that there is a significant difference between the speech representations extracted at the end of a streaming input and their counterparts at the same positions when the complete utterance is available. Built upon our observation that this problem can be alleviated by introducing a few frames of future speech signals, we propose \\textbf{F}uture-aware \\textbf{in}ferenc\\textbf{e} (FINE) for streaming speech translation with two different methods to make the model aware of the future. The first method FINE-Mask incorporates future context through a trainable masked speech model. The second method FINE-Wait simply waits for more actual future audio frames at the cost of extra latency. Experiments on the MuST-C EnDe, EnEs and EnFr benchmarks show that both methods are effective and can achieve better trade-offs between translation quality and latency than strong baselines, and a hybrid approach combining the two can achieve further improvement. Extensive analyses suggest that our methods can effectively alleviate the aforementioned mismatch problem between offline training and online inference.", "keywords": "Streaming Speech Translation;Future-Aware Inference", "primary_area": "", "supplementary_material": "/attachment/d60d1b5447e706745ca7327b652a2a6aba84dd5e.zip", "author": "Biao Fu;Kai Fan;Minpeng Liao;Zhongqiang Huang;Boxing Chen;Xiaodong Shi;Yidong Chen", "authorids": "~Biao_Fu1;~Kai_Fan1;~Minpeng_Liao1;~Zhongqiang_Huang1;~Boxing_Chen1;~Xiaodong_Shi2;~Yidong_Chen2", "gender": "M;M;;M;M;M;M", "homepage": ";https://scholar.google.com/citations?user=SQqkcdgAAAAJ&hl=zh;;;https://sites.google.com/site/chenboxing/Home;;http://nlp.xmu.edu.cn/teachers/ydchen/index_en.html", "dblp": "144/8117;20/3825-2.html;;10/3565;12/1081;73/5055;11/1492", "google_scholar": ";SQqkcdgAAAAJ;;;LiINs3gAAAAJ;;", "orcid": ";0000-0002-8256-0807;;;0000-0002-3170-4858;;", "linkedin": ";;;;;;", "or_profile": "~Biao_Fu1;~Kai_Fan1;~Minpeng_Liao1;~Zhongqiang_Huang1;~Boxing_Chen1;~Xiaodong_Shi2;~Yidong_Chen2", "aff": "Xiamen University;Alibaba Group;;Alibaba Group;Huawei Technologies Ltd.;Xiamen University, Tsinghua University;Xiamen University", "aff_domain": "xmu.edu.cn;alibaba-inc.com;;alibaba-inc.com;huawei.com;xmu.edu.cn;xmu.edu.cn", "position": "MS student;Researcher;;Senior Staff Engineer;Principal Researcher;Full Professor;Associate Professor", "bibtex": "@misc{\nfu2023fine,\ntitle={{FINE}: Future-Aware Inference for Streaming Speech Translation},\nauthor={Biao Fu and Kai Fan and Minpeng Liao and Zhongqiang Huang and Boxing Chen and Xiaodong Shi and Yidong Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=0VhwJYrZew}\n}", "github": "", "project": "", "reviewers": "2wmu;xgfh;JSnA;uAxq;3NNH", "site": "https://openreview.net/forum?id=0VhwJYrZew", "pdf_size": 590643, "recommendation": "5;5;6;6;8", "confidence": "3;4;4;4;3", "correctness": "3;3;4;4;4", "technical_novelty": "2;2;3;3;4", "empirical_novelty": "3;3;3;3;4", "wc_summary_paper": "46;96;49;162;49", "wc_strength_and_weaknesses": "75;179;129;163;173", "wc_clarity_quality_novelty_and_reproducibility": "14;281;50;31;26", "wc_summary_review": "212;104;39;29;17", "wc_review": "347;660;267;385;265", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "500;772;612;623;547", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;2;1;1;1", "recommendation_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "correctness_avg": [ 3.6, 0.4898979485566356 ], "technical_novelty_avg": [ 2.8, 0.7483314773547882 ], "empirical_novelty_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_paper_avg": [ 80.4, 44.84908025812793 ], "wc_strength_and_weaknesses_avg": [ 143.8, 38.50402576354841 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 80.4, 100.96850994245682 ], "wc_summary_review_avg": [ 80.2, 72.45798782743005 ], "wc_review_avg": [ 384.8, 145.1845721831352 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 610.8, 92.17678666562423 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.372677996249965, "corr_recommendation_correctness": 0.74535599249993, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_Q458LGLjvEJ:scholar.google.com/&scioq=FINE:+Future-Aware+Inference+for+Streaming+Speech+Translation&hl=en&as_sdt=0,21", "gs_version_total": 0, "aff_unique_index": "0;1;1;2;0;0", "aff_unique_norm": "Xiamen University;Alibaba Group;Huawei", "aff_unique_dep": ";;Huawei Technologies", "aff_unique_url": "https://www.xmu.edu.cn;https://www.alibaba.com;https://www.huawei.com", "aff_unique_abbr": "XMU;Alibaba;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Capturing the Motion of Every Joint: 3D Human Pose and Shape Estimation with Independent Tokens", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11251", "id": "0Vv4H4Ch0la", "poster": "/media/PosterPDFs/ICLR%202023/11251.png?t=1680761102.998848", "openreview": "https://openreview.net/forum?id=0Vv4H4Ch0la", "slides": "https://iclr.cc/virtual/2023/poster/11251", "video": "https://iclr.cc/virtual/2023/poster/11251", "author_site": "Sen Yang, Wen Heng, Gang Liu, GUOZHONG LUO, Wankou Yang, Gang Yu", "tldr": "We present a novel, effective and robust model with designed independent tokens to estimate 3D human pose and shape from monocular videos", "abstract": "In this paper we present a novel method to estimate 3D human pose and shape from monocular videos. This task requires directly recovering pixel-alignment 3D human pose and body shape from monocular images or videos, which is challenging due to its inherent ambiguity. To improve precision, existing methods highly rely on the initialized mean pose and shape as prior estimates and parameter regression with an iterative error feedback manner. In addition, video-based approaches model the overall change over the image-level features to temporally enhance the single-frame feature, but fail to capture the rotational motion at the joint level, and cannot guarantee local temporal consistency. To address these issues, we propose a novel Transformer-based model with a design of independent tokens. First, we introduce three types of tokens independent of the image feature: \\textit{joint rotation tokens, shape token, and camera token}. \nBy progressively interacting with image features through Transformer layers, these tokens learn to encode the prior knowledge of human 3D joint rotations, body shape, and position information from large-scale data, and are updated to estimate SMPL parameters conditioned on a given image. Second, benefiting from the proposed token-based representation, we further use a temporal model to focus on capturing the rotational temporal information of each joint, which is empirically conducive to preventing large jitters in local parts. Despite being conceptually simple, the proposed method attains superior performances on the 3DPW and Human3.6M datasets. Using ResNet-50 and Transformer architectures, it obtains 42.0 mm error on the PA-MPJPE metric of the challenging 3DPW, outperforming state-of-the-art counterparts by a large margin. Code will be publicly available\\footnote{\\url{https://github.com/yangsenius/INT_HMR_Model}}.", "keywords": "3D human pose and shape estimation;3d human reconstruction;transformer;independent tokens;temporal modeling;joint rotational motion", "primary_area": "", "supplementary_material": "/attachment/1d15422a02dd78bc59ea3be5022ed237cd8f84fc.zip", "author": "Sen Yang;Wen Heng;Gang Liu;GUOZHONG LUO;Wankou Yang;Gang YU", "authorids": "~Sen_Yang3;~Wen_Heng2;~Gang_Liu1;~GUOZHONG_LUO2;~Wankou_Yang1;~Gang_YU2", "gender": "M;M;;M;M;M", "homepage": ";;;https://github.com/guozhongluo;https://automation.seu.edu.cn/ywk/list.htm;https://skicyyu.org/", "dblp": ";;;;99/3602;", "google_scholar": "z5O3DLcAAAAJ;;ZyzfB9sAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.sg/citations?user=BJdigYsAAAAJ", "orcid": ";0000-0002-8780-6693;;;;0000-0001-5570-2710", "linkedin": ";;;;;", "or_profile": "~Sen_Yang3;~Wen_Heng2;~Gang_Liu1;~GUOZHONG_LUO2;~Wankou_Yang1;~Gang_YU2", "aff": "Southeast University;;;Tencent GY Lab;Southeast University;Tencent", "aff_domain": "seu.edu.cn;;;tencent.com;seu.edu.cn;tencent.com", "position": "PhD student;;;Instructor;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nyang2023capturing,\ntitle={Capturing the Motion of Every Joint: 3D Human Pose and Shape Estimation with Independent Tokens},\nauthor={Sen Yang and Wen Heng and Gang Liu and GUOZHONG LUO and Wankou Yang and Gang YU},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=0Vv4H4Ch0la}\n}", "github": "", "project": "", "reviewers": "pLqz;w5go;EHtd", "pdf_size": 8365938, "recommendation": "6;6;8", "confidence": "4;3;4", "correctness": "3;4;4", "technical_novelty": "3;2;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "72;60;101", "wc_strength_and_weaknesses": "209;63;182", "wc_clarity_quality_novelty_and_reproducibility": "7;60;98", "wc_summary_review": "53;55;28", "wc_review": "341;238;409", "wc_reply_reviewers": "458;0;0", "wc_reply_authors": "1631;564;587", "reply_reviewers": "2;0;0", "reply_authors": "4;1;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 77.66666666666667, 17.21110752456745 ], "wc_strength_and_weaknesses_avg": [ 151.33333333333334, 63.42624763367993 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.0, 37.31844941401862 ], "wc_summary_review_avg": [ 45.333333333333336, 12.283683848458853 ], "wc_review_avg": [ 329.3333333333333, 70.29619872763787 ], "wc_reply_reviewers_avg": [ 152.66666666666666, 215.90327052229253 ], "wc_reply_authors_avg": [ 927.3333333333334, 497.65606150790086 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.49999999999999983, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11283881215145831135&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=0Vv4H4Ch0la", "email": "seu.edu.cn;;;tencent.com;seu.edu.cn;tencent.com", "author_num": 6, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Southeast University;Tencent", "aff_unique_dep": ";GY Lab", "aff_unique_url": "https://www.seu.edu.cn/;https://www.tencent.com", "aff_unique_abbr": "SEU;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "0W1TQ_hoMFN", "title": "Policy Architectures for Compositional Generalization in Control", "track": "main", "status": "Reject", "tldr": "", "abstract": "Several tasks in control, robotics, and planning can be specified through desired goal configurations for entities in the environment. Learning goal-conditioned policies is a natural paradigm to solve such tasks. However, learning and generalizing on complex tasks can be challenging due to variations in number of entities or compositions of goals. To address this challenge, we introduce the Entity-Factored Markov Decision Process (EFMDP), a formal framework for modeling the entity-based compositional structure in control tasks. Geometrical properties of the EFMDP framework provide theoretical motivation for policy architecture design, particularly Deep Sets and popular relational mechanisms such as graphs and self attention. These structured policy architectures are flexible and can be trained end-to-end with standard reinforcement and imitation learning algorithms. We study and compare the learning and generalization properties of these architectures on a suite of simulated robot manipulation tasks, finding that they achieve significantly higher success rates with less data compared to standard multilayer perceptrons. Structured policies also enable broader and more compositional generalization, producing policies that extrapolate to different numbers of entities than seen in training, and stitch together (i.e. compose) learned skills in novel ways. Video results can be found at https://sites.google.com/view/comp-gen-anon.", "keywords": "Reinforcement Learning;Imitation Learning;Compositionality", "primary_area": "", "supplementary_material": "", "author": "Allan Zhou;Vikash Kumar;Chelsea Finn;Aravind Rajeswaran", "authorids": "~Allan_Zhou1;~Vikash_Kumar2;~Chelsea_Finn1;~Aravind_Rajeswaran1", "gender": ";M;F;M", "homepage": "http://bland.website;http://vikashplus.github.io/;https://ai.stanford.edu/~cbfinn/;http://aravindr93.github.io/", "dblp": "195/6907;82/7475;131/1783;164/5778", "google_scholar": ";nu3W--sAAAAJ;vfPE6hgAAAAJ;_EJrRVAAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Allan_Zhou1;~Vikash_Kumar2;~Chelsea_Finn1;~Aravind_Rajeswaran1", "aff": "Google Deepmind;Meta Facebook;Google;Meta Facebook", "aff_domain": "google.com;facebook.com;google.com;meta.com", "position": "Intern;Researcher;Research Scientist;Research Scientist", "bibtex": "@misc{\nzhou2023policy,\ntitle={Policy Architectures for Compositional Generalization in Control},\nauthor={Allan Zhou and Vikash Kumar and Chelsea Finn and Aravind Rajeswaran},\nyear={2023},\nurl={https://openreview.net/forum?id=0W1TQ_hoMFN}\n}", "github": "", "project": "", "reviewers": "KcU5;Et3S;HR7M;dhRF", "site": "https://openreview.net/forum?id=0W1TQ_hoMFN", "pdf_size": 1772392, "recommendation": "3;5;6;8", "confidence": "3;3;3;3", "correctness": "2;4;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "66;68;63;86", "wc_strength_and_weaknesses": "451;207;183;172", "wc_clarity_quality_novelty_and_reproducibility": "22;16;26;78", "wc_summary_review": "200;37;213;52", "wc_review": "739;328;485;388", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "727;235;148;259", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 70.75, 8.98262211161084 ], "wc_strength_and_weaknesses_avg": [ 253.25, 114.87030730349771 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.5, 24.794152536434876 ], "wc_summary_review_avg": [ 125.5, 81.30344396149526 ], "wc_review_avg": [ 485.0, 156.9824830992299 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 342.25, 225.94177900512335 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.39223227027636803, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6905041105503073130&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "DeepMind;Meta;Google", "aff_unique_dep": "DeepMind;Meta Platforms, Inc.;Google", "aff_unique_url": "https://deepmind.com;https://meta.com;https://www.google.com", "aff_unique_abbr": "DeepMind;Meta;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Deep Reinforcement Learning for Cost-Effective Medical Diagnosis", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11964", "id": "0WVNuEnqVu", "poster": "/media/PosterPDFs/ICLR%202023/11964.png?t=1680881050.7846823", "openreview": "https://openreview.net/forum?id=0WVNuEnqVu", "slides": "https://iclr.cc/virtual/2023/poster/11964", "video": "https://iclr.cc/virtual/2023/poster/11964", "author_site": "Zheng Yu, Yikuan Li, Joseph Kim, Kaixuan Huang, Yuan Luo, Mengdi Wang", "tldr": "Our proposed RL-based approach is able to reduce up to 85% testing cost while having the state-of-art diagnosis accuracy in three real-world medical diagnostics tasks.", "abstract": "Dynamic diagnosis is desirable when medical tests are costly or time-consuming. In this work, we use reinforcement learning (RL) to find a dynamic policy that selects lab test panels sequentially based on previous observations, ensuring accurate testing at a low cost. Clinical diagnostic data are often highly imbalanced; therefore, we aim to maximize the F1 score instead of the error rate. However, optimizing the non-concave $F_1$ score is not a classic RL problem, thus invalidating standard RL methods. To remedy this issue, we develop a reward shaping approach, leveraging properties of the $F_1$ score and duality of policy optimization, to provably find the set of all Pareto-optimal policies for budget-constrained $F_1$ score maximization. To handle the combinatorially complex state space, we propose a Semi-Model-based Deep Diagnosis Policy Optimization (SM-DDPO) framework that is compatible with end-to-end training and online learning. SM-DDPO is tested on diverse clinical tasks: ferritin abnormality detection, sepsis mortality prediction, and acute kidney injury diagnosis. Experiments with real-world data validate that SM-DDPO trains efficiently and identify all Pareto-front solutions. Across all tasks, SM-DDPO is able to achieve state-of-the-art diagnosis accuracy (in some cases higher than conventional methods) with up to $85\\%$ reduction in testing cost. Core codes are available at https://github.com/Zheng321/Deep-Reinforcement-Learning-for-Cost-Effective-Medical-Diagnosis.", "keywords": "medical diagnostics;Pareto front;reinforcement learning;non-Markovian reward;semi-model-based policy optimization", "primary_area": "", "supplementary_material": "/attachment/1c3ec24113d98c528ea31725284d6d94294a392c.zip", "author": "Zheng Yu;Yikuan Li;Joseph Chahn Kim;Kaixuan Huang;Yuan Luo;Mengdi Wang", "authorids": "~Zheng_Yu1;~Yikuan_Li1;~Joseph_Chahn_Kim1;~Kaixuan_Huang1;~Yuan_Luo3;~Mengdi_Wang1", "gender": "M;M;M;M;M;F", "homepage": "https://sites.google.com/view/zhengyu/;;https://www.hadtomakeaurl.com/;https://hackyhuang.github.io/;https://www.feinberg.northwestern.edu/faculty-profiles/az/profile.html?xid=33821;http://mwang.princeton.edu", "dblp": "28/4466;;;;90/6959-4;", "google_scholar": ";1RvQidMAAAAJ;;EfxwV6oAAAAJ;txsHQx4AAAAJ;", "orcid": "; 0000-0001-7546-9979;;;;", "linkedin": ";;;;yuan-luo-16797137/;", "or_profile": "~Zheng_Yu1;~Yikuan_Li1;~Joseph_Chahn_Kim1;~Kaixuan_Huang1;~Yuan_Luo3;~Mengdi_Wang1", "aff": "Alibaba Group;Northwestern University;Princeton University;Princeton University;Northwestern University;Princeton University", "aff_domain": "alibaba-inc.com;northwestern.edu;princeton.edu;princeton.edu;northwestern.edu;princeton.edu", "position": "Researcher;PhD student;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyu2023deep,\ntitle={Deep Reinforcement Learning for Cost-Effective Medical Diagnosis},\nauthor={Zheng Yu and Yikuan Li and Joseph Chahn Kim and Kaixuan Huang and Yuan Luo and Mengdi Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=0WVNuEnqVu}\n}", "github": "", "project": "", "reviewers": "i4nR;Yh4E;Ws9d", "pdf_size": 1991483, "recommendation": "6;6;8", "confidence": "3;3;3", "correctness": "3;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;2", "wc_summary_paper": "60;78;156", "wc_strength_and_weaknesses": "134;107;400", "wc_clarity_quality_novelty_and_reproducibility": "113;22;105", "wc_summary_review": "107;41;21", "wc_review": "414;248;682", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 98.0, 41.66533331199932 ], "wc_strength_and_weaknesses_avg": [ 213.66666666666666, 132.21783372735902 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 80.0, 41.14203041497426 ], "wc_summary_review_avg": [ 56.333333333333336, 36.745370078721784 ], "wc_review_avg": [ 448.0, 178.80343024300922 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2326330240216545592&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=0WVNuEnqVu", "email": "alibaba-inc.com;northwestern.edu;princeton.edu;princeton.edu;northwestern.edu;princeton.edu", "author_num": 6, "aff_unique_index": "0;1;2;2;1;2", "aff_unique_norm": "Alibaba Group;Northwestern University;Princeton University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.alibaba.com;https://www.northwestern.edu;https://www.princeton.edu", "aff_unique_abbr": "Alibaba;NU;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "China;United States" }, { "id": "0Wu7vlNZ8f", "title": "Graph Attention Retrospective", "track": "main", "status": "Desk Reject", "tldr": "We characterize the power of graph attention mechanism for distinguishing inter-class from intra-class edges over contextual stochastic block models.", "abstract": "Graph-based learning is a rapidly growing sub-field of machine learning with applications in social networks, citation networks, and bioinformatics. One of the most popular type of models is graph attention networks. These models were introduced to allow a node to aggregate information from the features of neighbor nodes in a non-uniform way, in contrast to simple graph convolution which does not distinguish the neighbors of a node. In this paper, we study theoretically this expected behaviour of graph attention networks. We prove multiple results on the performance of the graph attention mechanism for the problem of node classification for a contextual stochastic block model. Here the features of the nodes are obtained from a mixture of Gaussians and the edges from a stochastic block model where the features and the edges are coupled in a natural way. First, we show that in an \"easy\" regime, where the distance between the means of the Gaussians is large enough, graph attention is able to distinguish inter-class from intra-class edges, and thus it maintains the weights of important edges and significantly reduces the weights of unimportant edges. As a corollary, we show that this implies perfect node classification. However, a classical argument shows that in the \"easy\" regime, the graph is not needed at all to classify the data with high probability. In the \"hard\" regime, we show that every attention mechanism fails to distinguish intra-class from inter-class edges. We evaluate our theoretical results on synthetic and real-world data.", "keywords": "Graph attention;graph neural networks", "primary_area": "", "supplementary_material": "", "author": "Kimon Fountoulakis;Amit Levi;Shenghao Yang;Aseem Baranwal;Aukosh Jagannath", "authorids": "~Kimon_Fountoulakis1;~Amit_Levi1;~Shenghao_Yang1;~Aseem_Baranwal1;~Aukosh_Jagannath1", "gender": "M;M;M;M;", "homepage": "https://opallab.ca;https://sites.google.com/view/amit-levi/home;https://cs.uwaterloo.ca/~s286yang/;https://aseemrb.me;", "dblp": "149/5799;161/4014.html;41/4482-2;285/5304;", "google_scholar": "https://scholar.google.ca/citations?user=K-SafJUAAAAJ;https://scholar.google.ca/citations?user=kb4ubhcAAAAJ;ocLDM-AAAAAJ;DPt626YAAAAJ;", "orcid": ";;;0000-0001-5318-6054;", "linkedin": ";;;aseemrb/;", "or_profile": "~Kimon_Fountoulakis1;~Amit_Levi1;~Shenghao_Yang1;~Aseem_Baranwal1;~Aukosh_Jagannath1", "aff": "University of Waterloo;Huawei Noah\u2019s Ark Lab;University of Waterloo;University of Waterloo;", "aff_domain": "uwaterloo.ca;huawei.com;uwaterloo.ca;uwaterloo.ca;", "position": "Assistant Professor;Researcher;PhD student;PhD student;", "bibtex": "@misc{\nfountoulakis2023graph,\ntitle={Graph Attention Retrospective},\nauthor={Kimon Fountoulakis and Amit Levi and Shenghao Yang and Aseem Baranwal and Aukosh Jagannath},\nyear={2023},\nurl={https://openreview.net/forum?id=0Wu7vlNZ8f}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=0Wu7vlNZ8f", "pdf_size": 3409174, "recommendation": "", "confidence": "", "correctness": "", "technical_novelty": "", "empirical_novelty": "", "wc_summary_paper": "", "wc_strength_and_weaknesses": "", "wc_clarity_quality_novelty_and_reproducibility": "", "wc_summary_review": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "recommendation_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "correctness_avg": [ 0, 0 ], "technical_novelty_avg": [ 0, 0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 0, 0 ], "wc_strength_and_weaknesses_avg": [ 0, 0 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 0, 0 ], "wc_summary_review_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 1, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0, "corr_recommendation_correctness": 0, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16034513926440034157&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Waterloo;Huawei", "aff_unique_dep": ";Noah\u2019s Ark Lab", "aff_unique_url": "https://uwaterloo.ca;https://www.huawei.com", "aff_unique_abbr": "UW;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Canada;China" }, { "id": "0YXmOFLb1wQ", "title": "MotifExplainer: a Motif-based Graph Neural Network Explainer", "track": "main", "status": "Reject", "tldr": "We propose a motif-based explainer that can provide better human-understandable explanations than methods based on nodes, edges, and regular subgraphs.", "abstract": "We consider the explanation problem of Graph Neural Networks (GNNs). Most existing GNN explanation methods identify the most important edges or nodes but fail to consider substructures, which are more important for graph data. One method considering subgraphs tries to search all possible subgraphs and identifies the most significant ones. However, the subgraphs identified may not be recurrent or statistically important for interpretation. This work proposes a novel method, named MotifExplainer, to explain GNNs by identifying important motifs, which are recurrent and statistically significant patterns in graphs. Our proposed motif-based methods can provide better human-understandable explanations than methods based on nodes, edges, and regular subgraphs. Given an instance graph and a pre-trained GNN model, our method first extracts motifs in the graph using domain-specific motif extraction rules. Then, a motif embedding is encoded by feeding motifs into the pre-trained GNN. Finally, we employ an attention-based method to identify the most influential motifs as explanations for the prediction results. The empirical studies on both synthetic and real-world datasets demonstrate the effectiveness of our method.", "keywords": "Graph Neural Networks;Explainer;Motif", "primary_area": "", "supplementary_material": "/attachment/6bc009b5954372e8bae9b5c102083e34dbc377e0.zip", "author": "Zhaoning Yu;Hongyang Gao", "authorids": "~Zhaoning_Yu2;~Hongyang_Gao1", "gender": "M;M", "homepage": "https://faculty.sites.iastate.edu/hygao/;https://zhaoningyu1996.github.io/", "dblp": "200/7985;313/1914", "google_scholar": "jGmq0aEAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-9020-9080;0000-0001-6813-779X", "linkedin": "hongyang-gao-74924690/;zhaoning-yu-112773168/", "or_profile": "~Hongyang_Gao1;~ZHAONING_YU1", "aff": "Iowa State University;Iowa State University", "aff_domain": "iastate.edu;iastate.edu", "position": "Assistant Professor;PhD student", "bibtex": "@misc{\nyu2023motifexplainer,\ntitle={MotifExplainer: a Motif-based Graph Neural Network Explainer},\nauthor={Zhaoning Yu and Hongyang Gao},\nyear={2023},\nurl={https://openreview.net/forum?id=0YXmOFLb1wQ}\n}", "github": "", "project": "", "reviewers": "EDg3;M4gL;U3mX;ayU3;b3v8", "site": "https://openreview.net/forum?id=0YXmOFLb1wQ", "pdf_size": 3357845, "recommendation": "3;5;5;6;6", "confidence": "5;3;3;4;4", "correctness": "2;3;3;3;3", "technical_novelty": "3;2;2;4;2", "empirical_novelty": "3;2;3;3;3", "wc_summary_paper": "114;82;56;104;33", "wc_strength_and_weaknesses": "394;221;260;203;58", "wc_clarity_quality_novelty_and_reproducibility": "69;35;58;17;39", "wc_summary_review": "54;30;61;44;255", "wc_review": "631;368;435;368;385", "wc_reply_reviewers": "0;0;0;62;213", "wc_reply_authors": "854;310;487;256;470", "reply_reviewers": "0;0;0;2;1", "reply_authors": "2;1;1;2;2", "recommendation_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "correctness_avg": [ 2.8, 0.39999999999999997 ], "technical_novelty_avg": [ 2.6, 0.8 ], "empirical_novelty_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_paper_avg": [ 77.8, 29.989331436362498 ], "wc_strength_and_weaknesses_avg": [ 227.2, 107.8432195365105 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.6, 18.194504664870657 ], "wc_summary_review_avg": [ 88.8, 83.7505820875294 ], "wc_review_avg": [ 437.4, 99.86510902212044 ], "wc_reply_reviewers_avg": [ 55.0, 82.5687592252663 ], "wc_reply_authors_avg": [ 475.4, 209.28220182327976 ], "reply_reviewers_avg": [ 0.6, 0.8 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.48795003647426666, "corr_recommendation_correctness": 0.9128709291752771, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14705695650780903594&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Iowa State University", "aff_unique_dep": "", "aff_unique_url": "https://www.iastate.edu", "aff_unique_abbr": "ISU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "0YYQ_KKsIZ", "title": "BrGANs: Stabilizing GANs' Training Process with Brownian Motion Control", "track": "main", "status": "Reject", "tldr": "We propose a higher order Brownian Motion Controller (BMC) for BrGANs to stablize GANs' training process ", "abstract": "The training process of generative adversarial networks (GANs) is unstable and does not converge globally. In this paper, we propose a universal higher order noise based control called Brownian Motion Control (BMC) that is invariant to GANs frameworks so that the training process of GANs is exponential stable almost surely. Specifically, starting with the prototypical case of Dirac-GANs, we design a BMC and propose Dirac-BrGANs that retrieve exactly the same but reachable optimal equilibrium regardless of GANs' framework. The optimal equilibrium of our Dirac-BrGANs' training system is globally unique and always exists. Furthermore, the training process of Dirac-BrGANs achieve exponentially stability almost surely for any arbitrary initial value. Then we extend our BMC to normal GANs' settings and propose BrGANs. We provide numerical experiments showing that our BrGANs effectively stabilizes GANs's training process and obtains state-of-the art performance compared to other stabilizing methods. ", "keywords": "GAN;stability;control theory;Brownian motion", "primary_area": "", "supplementary_material": "/attachment/2075818b29fdb8de15c743dd4ac987e5c63223ee.zip", "author": "Tianjiao Luo;Ziyu Zhu;Gabriele Oliaro;Jun Zhu;Zhidong Deng", "authorids": "~Tianjiao_Luo1;~Ziyu_Zhu1;~Gabriele_Oliaro1;~Jun_Zhu2;~Zhidong_Deng1", "gender": "F;M;M;M;M", "homepage": "https://tianjiaoluo.github.io/;https://scholar.google.com/citations?user=Zhh8nbQAAAAJ&hl=en;https://www.gabrieleoliaro.com;http://ml.cs.tsinghua.edu.cn/~jun;https://thurid.lib.tsinghua.edu.cn/scholar/651107", "dblp": "240/3012;;304/2364;50/2644-1;", "google_scholar": "8GETNEsAAAAJ;Zhh8nbQAAAAJ;6-evBPAAAAAJ;axsP38wAAAAJ;qfewonIAAAAJ", "orcid": ";0000-0003-1556-0791;0000-0001-5406-0736;;", "linkedin": "tianjiao-luo-99a60bba/;;gabrieleoliaro/;;", "or_profile": "~Tianjiao_Luo1;~Ziyu_Zhu1;~Gabriele_Oliaro1;~Jun_Zhu2;~Zhidong_Deng1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;MS student;Professor;Full Professor", "bibtex": "@misc{\nluo2023brgans,\ntitle={Br{GAN}s: Stabilizing {GAN}s' Training Process with Brownian Motion Control},\nauthor={Tianjiao Luo and Ziyu Zhu and Gabriele Oliaro and Jun Zhu and Zhidong Deng},\nyear={2023},\nurl={https://openreview.net/forum?id=0YYQ_KKsIZ}\n}", "github": "", "project": "", "reviewers": "y1pm;sHwP;Vy2E;YEms", "site": "https://openreview.net/forum?id=0YYQ_KKsIZ", "pdf_size": 4289172, "recommendation": "3;3;5;5", "confidence": "3;4;3;4", "correctness": "3;3;3;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "39;60;40;62", "wc_strength_and_weaknesses": "155;230;170;251", "wc_clarity_quality_novelty_and_reproducibility": "235;12;9;64", "wc_summary_review": "61;39;28;23", "wc_review": "490;341;247;400", "wc_reply_reviewers": "0;0;95;0", "wc_reply_authors": "252;71;99;281", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 50.25, 10.779030568655049 ], "wc_strength_and_weaknesses_avg": [ 201.5, 40.05308976845607 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 80.0, 92.12220145003049 ], "wc_summary_review_avg": [ 37.75, 14.618053906043718 ], "wc_review_avg": [ 369.5, 88.4152136229959 ], "wc_reply_reviewers_avg": [ 23.75, 41.13620667976084 ], "wc_reply_authors_avg": [ 175.75, 91.86232905821625 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4anxhTVmXzgJ:scholar.google.com/&scioq=BrGANs:+Stabilizing+GANs%27+Training+Process+with+Brownian+Motion+Control&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A law of adversarial risk, interpolation, and label noise", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11239", "id": "0_TxFpAsEI", "poster": "/media/PosterPDFs/ICLR%202023/11239.png?t=1682895390.341371", "openreview": "https://openreview.net/forum?id=0_TxFpAsEI", "slides": "https://iclr.cc/virtual/2023/poster/11239", "video": "https://iclr.cc/virtual/2023/poster/11239", "author_site": "Daniel Paleka, Amartya Sanyal", "tldr": "Laws for how interpolating label noise increases adversarial risk, with stronger guarantees in presence of inductive bias and distributional assumptions.", "abstract": "In supervised learning, it has been shown that label noise in the data can be interpolated without penalties on test accuracy. We show that interpolating label noise induces adversarial vulnerability, and prove the first theorem showing the relationship between label noise and adversarial risk for any data distribution. Our results are almost tight if we do not make any assumptions on the inductive bias of the learning algorithm. We then investigate how different components of this problem affect this result including properties of the distribution. We also discuss non-uniform label noise distributions; and prove a new theorem showing uniform label noise induces nearly as large an adversarial risk as the worst poisoning with the same noise rate. Then, we provide theoretical and empirical evidence that uniform label noise is more harmful than typical real-world label noise. Finally, we show how inductive biases amplify the effect of label noise and argue the need for future work in this direction.", "keywords": "label noise;adversarial robustness;lower bound;robust machine learning", "primary_area": "", "supplementary_material": "", "author": "Daniel Paleka;Amartya Sanyal", "authorids": "~Daniel_Paleka1;~Amartya_Sanyal1", "gender": ";M", "homepage": "https://danielpaleka.com/;https://amartya18x.github.io", "dblp": "324/2779;203/8807", "google_scholar": ";", "orcid": ";0000-0002-4190-0449", "linkedin": ";", "or_profile": "~Daniel_Paleka1;~Amartya_Sanyal1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": "inf.ethz.ch;ethz.ch", "position": "PhD student;Postdoc", "bibtex": "@inproceedings{\npaleka2023a,\ntitle={A law of adversarial risk, interpolation, and label noise},\nauthor={Daniel Paleka and Amartya Sanyal},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=0_TxFpAsEI}\n}", "github": "", "project": "", "reviewers": "AQu9;59Qr;HmKE;eERJ;sRSH;BXAC;UsFF;BqSH", "pdf_size": 3544468, "recommendation": "6;6;6;6;6;6;8;8", "confidence": "4;3;3;4;3;3;3;4", "correctness": "3;4;3;4;4;4;3;4", "technical_novelty": "2;3;2;2;3;3;4;4", "empirical_novelty": "3;3;3;0;2;0;4;0", "wc_summary_paper": "144;117;91;202;185;54;53;218", "wc_strength_and_weaknesses": "177;332;188;634;175;83;177;249", "wc_clarity_quality_novelty_and_reproducibility": "106;10;248;4;101;16;47;123", "wc_summary_review": "88;34;41;80;38;60;41;31", "wc_review": "515;493;568;920;499;213;318;621", "wc_reply_reviewers": "186;16;0;310;0;0;0;0", "wc_reply_authors": "783;641;817;1672;468;505;158;575", "reply_reviewers": "1;1;0;2;0;0;0;0", "reply_authors": "1;1;2;3;1;1;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.375, 0.4841229182759271 ], "correctness_avg": [ 3.625, 0.4841229182759271 ], "technical_novelty_avg": [ 2.875, 0.7806247497997998 ], "empirical_novelty_avg": [ 1.875, 1.5360257159305635 ], "wc_summary_paper_avg": [ 133.0, 60.69596362197407 ], "wc_strength_and_weaknesses_avg": [ 251.875, 158.96889436301683 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 81.875, 76.66719882061689 ], "wc_summary_review_avg": [ 51.625, 20.45077932500373 ], "wc_review_avg": [ 518.375, 196.4471541534771 ], "wc_reply_reviewers_avg": [ 64.0, 110.78357278947091 ], "wc_reply_authors_avg": [ 702.375, 413.8290521157257 ], "reply_reviewers_avg": [ 0.5, 0.7071067811865476 ], "reply_authors_avg": [ 1.375, 0.6959705453537527 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.14907119849998599, "corr_recommendation_correctness": -0.14907119849998599, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15509377812307193630&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=0_TxFpAsEI", "email": "inf.ethz.ch;ethz.ch", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "id": "0a5r6iNmacV", "title": "On Storage Neural Network Augmented Approximate Nearest Neighbor Search", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Large-scale approximate nearest neighbor search (ANN) has been gaining attention along with the latest machine learning researches employing ANNs. If the data is too large to fit in memory, it is necessary to search for the most similar vectors to a given query vector from the data stored in storage devices, not from that in memory. The storage device such as NAND flash memory has larger capacity than the memory device such as DRAM, but they also have larger latency to read data. Therefore, ANN methods for storage require completely different approaches from conventional in-memory ANN methods. Since the approximation that the time required for search is determined only by the amount of data fetched from storage holds under reasonable assumptions, our goal is to minimize it while maximizing recall. For partitioning-based ANNs, vectors are partitioned into clusters in the index building phase. In the search phase, some of the clusters are chosen, the vectors in the chosen clusters are fetched from storage, and the nearest vector is retrieved from the fetched vectors. Thus, the key point is to accurately select the clusters containing the ground truth nearest neighbor vectors. We accomplish this by proposing a method to predict the correct clusters by means of a neural network that is gradually refined by alternating supervised learning and duplicated cluster assignment. Compared to state-of-the-art SPANN and an exhaustive method using k-means clustering and linear search, the proposed method achieves 90% recall on SIFT1M with 80% and 58% less data fetched from storage, respectively.", "keywords": "approximate nearest neighbor search;neural network", "primary_area": "", "supplementary_material": "", "author": "Taiga Ikeda;Daisuke Miyashita;Jun Deguchi", "authorids": "~Taiga_Ikeda1;~Daisuke_Miyashita2;~Jun_Deguchi1", "gender": ";M;", "homepage": ";;", "dblp": "261/6063.html;;40/6985.html", "google_scholar": ";;", "orcid": ";0000-0003-2108-3397;", "linkedin": ";;", "or_profile": "~Taiga_Ikeda1;~Daisuke_Miyashita2;~Jun_Deguchi1", "aff": "Kioxia Corporation;Kioxia Corporation;Kioxia Corporation", "aff_domain": "kioxia.com;kioxia.com;kioxia.com", "position": "Researcher;Researcher;Researcher", "bibtex": "@misc{\nikeda2023on,\ntitle={On Storage Neural Network Augmented Approximate Nearest Neighbor Search},\nauthor={Taiga Ikeda and Daisuke Miyashita and Jun Deguchi},\nyear={2023},\nurl={https://openreview.net/forum?id=0a5r6iNmacV}\n}", "github": "", "project": "", "reviewers": "m33f;cyNf;UKqC;xAfd", "site": "https://openreview.net/forum?id=0a5r6iNmacV", "pdf_size": 4102192, "recommendation": "3;3;5;5", "confidence": "3;4;5;4", "correctness": "3;2;3;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;1;2;2", "wc_summary_paper": "105;103;260;61", "wc_strength_and_weaknesses": "360;187;145;361", "wc_clarity_quality_novelty_and_reproducibility": "22;32;347;18", "wc_summary_review": "22;31;44;23", "wc_review": "509;353;796;463", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 132.25, 75.82009957788237 ], "wc_strength_and_weaknesses_avg": [ 263.25, 98.37777950330045 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 104.75, 139.95601987767444 ], "wc_summary_review_avg": [ 30.0, 8.803408430829505 ], "wc_review_avg": [ 530.25, 163.5655449659249 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.7071067811865475, "corr_recommendation_correctness": 0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1vlnEPWqTCAJ:scholar.google.com/&scioq=On+Storage+Neural+Network+Augmented+Approximate+Nearest+Neighbor+Search&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Kioxia Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.kioxia.com", "aff_unique_abbr": "Kioxia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "id": "0aAd19ZQp11", "title": "Efficient Bayesian Optimization with Deep Kernel Learning and Transformer Pre-trained on Muliple Heterogeneous Datasets", "track": "main", "status": "Reject", "tldr": "", "abstract": "Bayesian optimization (BO) is widely adopted in black-box optimization problems and it relies on a surrogate model to approximate the black-box response function. With the increasing number of black-box optimization tasks solved and even more to solve, the ability to learn from multiple prior tasks to jointly pre-train a surrogate model is long-awaited to further boost optimization efficiency. In this paper, we propose a simple approach to pre-train a surrogate, which is a Gaussian process (GP) with a kernel defined on deep features learned from a Transformer-based encoder, using datasets from prior tasks with possibly heterogeneous input spaces. In addition, we provide a simple yet effective mix-up initialization strategy for input tokens corresponding to unseen input variables and therefore accelerate new tasks' convergence. Experiments on both synthetic and real benchmark problems demonstrate the effectiveness of our proposed pre-training and transfer BO strategy over existing methods.", "keywords": "Pre-training;Bayesian optimization;Transformer;Transfer learning", "primary_area": "", "supplementary_material": "", "author": "Wenlong Lyu;Shoubo Hu;Jie Chuai;Zhitang Chen", "authorids": "~Wenlong_Lyu1;~Shoubo_Hu1;chuaijie@huawei.com;~Zhitang_Chen1", "gender": "M;M;;M", "homepage": ";https://amber0309.github.io/about/;;", "dblp": "219/4148;218/9202;;06/10875", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Wenlong_Lyu1;~Shoubo_Hu1;chuaijie@huawei.com;~Zhitang_Chen1", "aff": ";Huawei Technologies Ltd.;;Huawei Technologies Ltd.", "aff_domain": ";huawei.com;;huawei.com", "position": ";Researcher;;Researcher", "bibtex": "@misc{\nlyu2023efficient,\ntitle={Efficient Bayesian Optimization with Deep Kernel Learning and Transformer Pre-trained on Muliple Heterogeneous Datasets},\nauthor={Wenlong Lyu and Shoubo Hu and Jie Chuai and Zhitang Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=0aAd19ZQp11}\n}", "github": "", "project": "", "reviewers": "DKdC;cAwj;xRh6", "site": "https://openreview.net/forum?id=0aAd19ZQp11", "pdf_size": 1464340, "recommendation": "3;5;6", "confidence": "4;5;3", "correctness": "3;2;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;3", "wc_summary_paper": "64;38;90", "wc_strength_and_weaknesses": "406;13;330", "wc_clarity_quality_novelty_and_reproducibility": "232;434;26", "wc_summary_review": "18;58;39", "wc_review": "720;543;485", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 64.0, 21.228911104120876 ], "wc_strength_and_weaknesses_avg": [ 249.66666666666666, 170.20053531708479 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 230.66666666666666, 166.5679707773643 ], "wc_summary_review_avg": [ 38.333333333333336, 16.33673433979046 ], "wc_review_avg": [ 582.6666666666666, 99.95443406317145 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.3273268353539886, "corr_recommendation_correctness": -0.18898223650461363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9321082378642242203&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "0bLE93R9d0O", "title": "Transformer Module Networks for Systematic Generalization in Visual Question Answering", "track": "main", "status": "Reject", "tldr": "Investigating whether and how modularity brings benefits to Transformer-based models", "abstract": "Transformers achieve great performance on Visual Question Answering (VQA). However, their systematic generalization capabilities, i.e., handling novel combinations of known concepts, is unclear. We reveal that Neural Module Networks (NMNs), i.e., question-specific compositions of modules that tackle a sub-task, achieve better or similar systematic generalization performance than the conventional Transformers, even though NMNs' modules are CNN-based. In order to address this shortcoming of Transformers with respect to NMNs, in this paper we investigate whether and how modularity can bring benefits to Transformers. Namely, we introduce Transformer Module Network (TMN), a novel NMN based on compositions of Transformer modules. TMNs achieve state-of-the-art systematic generalization performance in three VQA datasets, improving more than 30% over standard Transformers for novel compositions of sub-tasks. We show that not only the module composition but also the module specialization for each sub-task are the key of such performance gain.", "keywords": "Systematic generalization;Neural Module Network;Transformer", "primary_area": "", "supplementary_material": "/attachment/5a9550fe5cfa9a4584924e4076633a538cdf1a2a.zip", "author": "Moyuru Yamada;Vanessa D'Amario;Kentaro Takemoto;Xavier Boix;Tomotake Sasaki", "authorids": "~Moyuru_Yamada1;~Vanessa_D'Amario1;~Kentaro_Takemoto1;~Xavier_Boix1;~Tomotake_Sasaki1", "gender": "M;F;M;;", "homepage": ";;;;", "dblp": "01/11186;214/7683;15/6304;;", "google_scholar": ";vFwd8YoAAAAJ;;;", "orcid": ";;;;", "linkedin": "moyuru-yamada-688704b3;;;;", "or_profile": "~Moyuru_Yamada1;~Vanessa_D'Amario1;~Kentaro_Takemoto1;~Xavier_Boix1;~Tomotake_Sasaki1", "aff": "Fujitsu Limited;Nova Southeastern University;Fujitsu Limited;;", "aff_domain": "fujitsu.com;nova.edu;fujitsu.com;;", "position": "Senior Researcher;Assistant Professor;Researcher;;", "bibtex": "@misc{\nyamada2023transformer,\ntitle={Transformer Module Networks for Systematic Generalization in Visual Question Answering},\nauthor={Moyuru Yamada and Vanessa D'Amario and Kentaro Takemoto and Xavier Boix and Tomotake Sasaki},\nyear={2023},\nurl={https://openreview.net/forum?id=0bLE93R9d0O}\n}", "github": "", "project": "", "reviewers": "zXs6;pauq;o4YA;W6Kf", "site": "https://openreview.net/forum?id=0bLE93R9d0O", "pdf_size": 771561, "recommendation": "5;5;5;5", "confidence": "2;4;5;3", "correctness": "3;3;2;4", "technical_novelty": "2;2;2;4", "empirical_novelty": "3;3;2;4", "wc_summary_paper": "49;31;37;76", "wc_strength_and_weaknesses": "195;218;197;80", "wc_clarity_quality_novelty_and_reproducibility": "27;25;39;15", "wc_summary_review": "21;24;89;19", "wc_review": "292;298;362;190", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 48.25, 17.282577932704367 ], "wc_strength_and_weaknesses_avg": [ 172.5, 54.15948670362377 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.5, 8.52936105461599 ], "wc_summary_review_avg": [ 38.25, 29.354514133264068 ], "wc_review_avg": [ 285.5, 61.58530668917709 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4301653976409538067&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff_unique_index": "0;1;0", "aff_unique_norm": "Fujitsu Limited;Nova Southeastern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.fujitsu.com;https://www.nova.edu", "aff_unique_abbr": "Fujitsu;NSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Japan;United States" }, { "id": "0c2SbGJ3Lt", "title": "Textless Phrase Structure Induction from Visually-Grounded Speech", "track": "main", "status": "Withdraw", "tldr": "The first study on grammar induction from audio-visual inputs, without relying on intermediate text or ASR. ", "abstract": "We study phrase structure induction from visually-grounded speech without intermediate text or text pre-trained models. The core idea is to first segment the speech waveform into sequences of word segments, then induce phrase structure based on the inferred segment-level continuous representations. To this end, we present the Audio-Visual Neural Syntax Learner (AV-NSL) that learns non-trivial phrase structure by listening to audio and looking at images, without ever reading text. Experiments on SpokenCOCO, the spoken version of MSCOCO with paired images and spoken captions, show that AV-NSL infers meaningful phrase structures similar to those learned from naturally-supervised text parsing, quantitatively and qualitatively. The findings in this paper extend prior work in unsupervised language acquisition from speech and grounded grammar induction, and manifest one possibility of bridging the gap between the two fields.", "keywords": "unsupervised speech processing;grammar induction;speech representation learning;visually-grounded representation learning", "primary_area": "", "supplementary_material": "/attachment/8cba121288ddae42262fd95e49a12f4449e7232a.zip", "author": "Cheng-I Lai;Freda Shi;Puyuan Peng;Yoon Kim;Kevin Gimpel;Shiyu Chang;Yung-Sung Chuang;Saurabhchand Bhati;David Daniel Cox;David Harwath;Yang Zhang;Karen Livescu;James R. Glass", "authorids": "~Cheng-I_Lai1;~Freda_Shi1;~Puyuan_Peng1;~Yoon_Kim1;~Kevin_Gimpel1;~Shiyu_Chang2;~Yung-Sung_Chuang1;~Saurabhchand_Bhati1;~David_Daniel_Cox1;~David_Harwath1;~Yang_Zhang3;~Karen_Livescu1;~James_R._Glass1", "gender": "M;F;M;;M;Unspecified;M;;;M;M;;", "homepage": "http://people.csail.mit.edu/clai24/;http://ttic.uchicago.edu/~freda;https://jasonppy.github.io/;https://people.csail.mit.edu/yoonkim/;http://ttic.uchicago.edu/~kgimpel/index.html;http://people.csail.mit.edu/chang87/;https://people.csail.mit.edu/yungsung/;;;https://www.cs.utexas.edu/~harwath/index.html;;;", "dblp": "226/2039.html;194/2512;280/3431;;47/1252;28/9988;64/3095;;48/7659;;06/6785-1;;", "google_scholar": "mV4mRm0AAAAJ;jkDd-3QAAAAJ;https://scholar.google.com/citations?hl=en;n_ts4eYAAAAJ;http://scholar.google.com/citations?user=kDHs7DYAAAAJ;r21asW4AAAAJ;3ar1DOwAAAAJ;;;C0kDOzcAAAAJ;_-5PSgQAAAAJ;;", "orcid": ";0009-0009-5697-449X;;;;;0000-0002-1723-5063;;;;;;", "linkedin": ";;;;;;yschuang;;;;;;", "or_profile": "~Cheng-I_Lai1;~Freda_Shi1;~Puyuan_Peng1;~Yoon_Kim1;~Kevin_Gimpel1;~Shiyu_Chang2;~Yung-Sung_Chuang1;~Saurabhchand_Bhati1;~David_Daniel_Cox1;~David_Harwath1;~Yang_Zhang3;~Karen_Livescu1;~James_R._Glass1", "aff": "Massachusetts Institute of Technology;Toyota Technological Institute at Chicago;University of Texas at Austin;Massachusetts Institute of Technology;Toyota Technological Institute at Chicago;University of California, Santa Barbara;Massachusetts Institute of Technology;;International Business Machines;University of Texas, Austin;International Business Machines;;", "aff_domain": "mit.edu;ttic.edu;utexas.edu;mit.edu;ttic.edu;ucsb.edu;mit.edu;;ibm.com;utexas.edu;ibm.com;;", "position": "PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor;Assistant Professor;PhD student;;IBM Director, MIT-IBM Watson AI Lab;Assistant Professor;Research Staff Employee;;", "bibtex": "@misc{\nlai2023textless,\ntitle={Textless Phrase Structure Induction from Visually-Grounded Speech},\nauthor={Cheng-I Lai and Freda Shi and Puyuan Peng and Yoon Kim and Kevin Gimpel and Shiyu Chang and Yung-Sung Chuang and Saurabhchand Bhati and David Daniel Cox and David Harwath and Yang Zhang and Karen Livescu and James R. Glass},\nyear={2023},\nurl={https://openreview.net/forum?id=0c2SbGJ3Lt}\n}", "github": "", "project": "", "reviewers": "PMHU;8ru5;z2zs", "site": "https://openreview.net/forum?id=0c2SbGJ3Lt", "pdf_size": 2922734, "recommendation": "3;5;5", "confidence": "4;3;3", "correctness": "3;2;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "439;71;66", "wc_strength_and_weaknesses": "2;384;131", "wc_clarity_quality_novelty_and_reproducibility": "2;138;15", "wc_summary_review": "2;189;19", "wc_review": "445;782;231", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "424;138;176", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 192.0, 174.6673027978238 ], "wc_strength_and_weaknesses_avg": [ 172.33333333333334, 158.66596638500926 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.666666666666664, 61.27714817849219 ], "wc_summary_review_avg": [ 70.0, 84.43143174592426 ], "wc_review_avg": [ 486.0, 226.80534973114428 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 246.0, 126.81745410891462 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 13, 0 ], "corr_recommendation_confidence": -0.9999999999999998, "corr_recommendation_correctness": -0.49999999999999983, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3564194995693226564&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;1;3;0;4;2;4", "aff_unique_norm": "Massachusetts Institute of Technology;Toyota Technological Institute at Chicago;University of Texas at Austin;University of California, Santa Barbara;International Business Machines Corporation", "aff_unique_dep": ";;;;", "aff_unique_url": "https://web.mit.edu;https://www.tti-chicago.org;https://www.utexas.edu;https://www.ucsb.edu;https://www.ibm.com", "aff_unique_abbr": "MIT;TTI Chicago;UT Austin;UCSB;IBM", "aff_campus_unique_index": "1;2;1;3;2", "aff_campus_unique": ";Chicago;Austin;Santa Barbara", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "0cm8HroIxJV", "title": "Explaining Representation Bottlenecks of Convolutional Decoder Networks", "track": "main", "status": "Reject", "tldr": "In this paper, we prove representation bottlenecks of a cascaded convolutional decoder network, considering the capacity of representing different frequency components of an input sample.", "abstract": "In this paper, we prove representation bottlenecks of a cascaded convolutional decoder network, considering the capacity of representing different frequency components of an input sample. We conduct the discrete Fourier transform on each channel of the feature map in an intermediate layer of the decoder network. Then, we introduce the rule of the forward propagation of such intermediate-layer spectrum maps, which is equivalent to the forward propagation of feature maps through a convolutional layer. Based on this, we find that each frequency component in the spectrum map is forward propagated independently with other frequency components. Furthermore, we prove two bottlenecks in representing feature spectrums. First, we prove that the convolution operation, the zero-padding operation, and a set of other settings all make a convolutional decoder network more likely to weaken high-frequency components. Second, we prove that the upsampling operation generates a feature spectrum, in which strong signals repetitively appears at certain frequencies. We will release all codes when this paper is accepted.", "keywords": "Fourier transform;Deep Learning Theory;Representation Learning", "primary_area": "", "supplementary_material": "", "author": "Ling Tang;Wen Shen;Zhanpeng Zhou;YueFeng Chen;Quanshi Zhang", "authorids": "~Ling_Tang1;~Wen_Shen3;~Zhanpeng_Zhou1;~YueFeng_Chen1;~Quanshi_Zhang1", "gender": "M;F;M;M;M", "homepage": "https://tling2000.github.io/;https://ada-shen.github.io/;https://zzp1012.github.io/;;http://qszhang.com", "dblp": ";55/8186-2;;52/8180;http://dblp.uni-trier.de/pers/hd/z/Zhang:Quanshi", "google_scholar": "61qA1g4AAAAJ;;idxXY3UAAAAJ;Kf-IpFsAAAAJ;iFFhHK0AAAAJ", "orcid": "0009-0009-0070-8942;0000-0002-4210-5447;;;", "linkedin": ";;;;", "or_profile": "~Ling_Tang1;~Wen_Shen3;~Zhanpeng_Zhou1;~YueFeng_Chen1;~Quanshi_Zhang1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Alibaba Group;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;alibaba-inc.com;sjtu.edu.cn", "position": "Undergrad student;Postdoc;PhD student;Staff Algorithm Engineer;Associate Professor", "bibtex": "@misc{\ntang2023explaining,\ntitle={Explaining Representation Bottlenecks of Convolutional Decoder Networks},\nauthor={Ling Tang and Wen Shen and Zhanpeng Zhou and YueFeng Chen and Quanshi Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=0cm8HroIxJV}\n}", "github": "", "project": "", "reviewers": "fHYG;V1PA;wmw7;oTym", "site": "https://openreview.net/forum?id=0cm8HroIxJV", "pdf_size": 15981409, "recommendation": "3;3;3;3", "confidence": "4;3;4;4", "correctness": "2;2;3;3", "technical_novelty": "1;2;2;2", "empirical_novelty": "1;2;1;2", "wc_summary_paper": "57;52;87;23", "wc_strength_and_weaknesses": "22;524;721;245", "wc_clarity_quality_novelty_and_reproducibility": "41;27;30;49", "wc_summary_review": "177;13;131;44", "wc_review": "297;616;969;361", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 0.5 ], "wc_summary_paper_avg": [ 54.75, 22.69774217846348 ], "wc_strength_and_weaknesses_avg": [ 378.0, 266.1719369129661 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.75, 8.78564169540279 ], "wc_summary_review_avg": [ 91.25, 65.7433456708738 ], "wc_review_avg": [ 560.75, 264.18968091127255 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ONpTdkq5lFIJ:scholar.google.com/&scioq=Explaining+Representation+Bottlenecks+of+Convolutional+Decoder+Networks&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "SJTU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MeshDiffusion: Score-based Generative 3D Mesh Modeling", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11403", "id": "0cpM2ApF9p6", "poster": "/media/PosterPDFs/ICLR%202023/11403.png?t=1682714870.2982545", "openreview": "https://openreview.net/forum?id=0cpM2ApF9p6", "slides": "https://iclr.cc/virtual/2023/poster/11403", "video": "https://iclr.cc/virtual/2023/poster/11403", "author_site": "Zhen Liu, Yao Feng, Michael J Black, Derek Nowrouzezahrai, Liam Paull, Weiyang Liu", "tldr": "Diffusion model on 3D meshes of arbitrary topology by direct parametrizing meshes with tetrahedral grids", "abstract": "We consider the task of generating realistic 3D shapes, which is useful for a variety of applications such as automatic scene generation and physical simulation. Compared to other 3D representations like voxels and point clouds, meshes are more desirable in practice, because (1) they enable easy and arbitrary manipulation of shapes for relighting and simulation, and (2) they can fully leverage the power of modern graphics pipelines which are mostly optimized for meshes. Previous scalable methods for generating meshes typically rely on sub-optimal post-processing, and they tend to produce overly-smooth or noisy surfaces without fine-grained geometric details. To overcome these shortcomings, we take advantage of the graph structure of meshes and use a simple yet very effective generative modeling method to generate 3D meshes. Specifically, we represent meshes with deformable tetrahedral grids, and then train a diffusion model on this direct parameterization. We demonstrate the effectiveness of our model on multiple generative tasks.", "keywords": "generative model;diffusion model;3D mesh;shape generation", "primary_area": "", "supplementary_material": "", "author": "Zhen Liu;Yao Feng;Michael J. Black;Derek Nowrouzezahrai;Liam Paull;Weiyang Liu", "authorids": "~Zhen_Liu6;~Yao_Feng3;~Michael_J._Black1;~Derek_Nowrouzezahrai1;~Liam_Paull1;~Weiyang_Liu1", "gender": "M;F;;Not Specified;;M", "homepage": ";https://ps.is.tuebingen.mpg.de/person/yfeng;;https://www.cim.mcgill.ca/~derek/;;http://wyliu.com/", "dblp": "77/35-19;05/9861;;30/4225;;137/1532", "google_scholar": "I1IiJCAAAAAJ;wNQQhSIAAAAJ;;https://scholar.google.ca/citations?user=nCZ2PMcAAAAJ;;DMjROf0AAAAJ", "orcid": ";0000-0002-9481-9783;;;;", "linkedin": ";;;;;", "or_profile": "~Zhen_Liu6;~Yao_Feng3;~Michael_J._Black1;~Derek_Nowrouzezahrai1;~Liam_Paull1;~Weiyang_Liu1", "aff": "University of Montreal;ETHZ - ETH Zurich;;McGill University;;University of Cambridge", "aff_domain": "umontreal.ca;ethz.ch;;mcgill.ca;;cam.ac.uk", "position": "PhD student;PhD student;;Full Professor;;Researcher", "bibtex": "@inproceedings{\nliu2023meshdiffusion,\ntitle={MeshDiffusion: Score-based Generative 3D Mesh Modeling},\nauthor={Zhen Liu and Yao Feng and Michael J. Black and Derek Nowrouzezahrai and Liam Paull and Weiyang Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=0cpM2ApF9p6}\n}", "github": "", "project": "", "reviewers": "Mo7X;NSt1;jFa3", "pdf_size": 20135858, "recommendation": "6;6;6", "confidence": "4;5;3", "correctness": "4;3;4", "technical_novelty": "3;2;2", "empirical_novelty": "3;2;2", "wc_summary_paper": "47;45;63", "wc_strength_and_weaknesses": "130;240;201", "wc_clarity_quality_novelty_and_reproducibility": "92;22;110", "wc_summary_review": "58;63;54", "wc_review": "327;370;428", "wc_reply_reviewers": "12;59;0", "wc_reply_authors": "595;1073;87", "reply_reviewers": "1;1;0", "reply_authors": "3;4;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 51.666666666666664, 8.055363982396381 ], "wc_strength_and_weaknesses_avg": [ 190.33333333333334, 45.536310297997964 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 74.66666666666667, 37.959042254631356 ], "wc_summary_review_avg": [ 58.333333333333336, 3.6817870057290873 ], "wc_review_avg": [ 375.0, 41.38437708443449 ], "wc_reply_reviewers_avg": [ 23.666666666666668, 25.460208605237746 ], "wc_reply_authors_avg": [ 585.0, 402.59491634478775 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 175, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8274698217301729309&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=0cpM2ApF9p6", "email": "umontreal.ca;ethz.ch;;mcgill.ca;;cam.ac.uk", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Montreal;ETH Zurich;McGill University;University of Cambridge", "aff_unique_dep": ";;;", "aff_unique_url": "https://wwwumontreal.ca;https://www.ethz.ch;https://www.mcgill.ca;https://www.cam.ac.uk", "aff_unique_abbr": "UM;ETHZ;McGill;Cambridge", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "Canada;Switzerland;United Kingdom" }, { "id": "0eSq84hbXhe", "title": "The Graph Learning Attention Mechanism: Learnable Sparsification Without Heuristics", "track": "main", "status": "Reject", "tldr": "We introduce a drop-in, differentiable graph structure learning layer for use with GNNs.", "abstract": "Graph Neural Networks (GNNs) are local aggregators that derive their expressive power from their sensitivity to network structure. However, this sensitivity comes at a cost: noisy edges degrade performance. In response, many GNNs include edge-weighting mechanisms that scale the contribution of each edge in the aggregation step. However, to account for neighborhoods of varying size, node-embedding mechanisms must normalize these edge-weights across each neighborhood. As such, the impact of noisy edges cannot be eliminated without removing those edges altogether. Motivated by this issue, we introduce the Graph Learning Attention Mechanism (GLAM): a drop-in, differentiable structure learning layer for GNNs that separates the distinct tasks of structure learning and node embedding. In contrast to existing graph learning approaches, GLAM does not require the addition of exogenous structural regularizers or edge-selection heuristics to learn optimal graph structures. In experiments on citation and co-purchase datasets, we demonstrate that our approach can match state of the art semi-supervised node classification accuracies while inducing an order of magnitude greater sparsity than existing graph learning methods.", "keywords": "graph structure learning;graph attention networks", "primary_area": "", "supplementary_material": "", "author": "Mattson Thieme;Yada Zhu;Han Liu", "authorids": "~Mattson_Thieme1;~Yada_Zhu1;~Han_Liu4", "gender": "M;;", "homepage": "https://mattsonthieme.github.io;https://researcher.watson.ibm.com/researcher/view.php?person=us-yzhu;", "dblp": ";56/8808;", "google_scholar": "SAjppGoAAAAJ;AJb408gAAAAJ;", "orcid": ";0000-0002-3338-6371;", "linkedin": "mattsonthieme/;yadazhu/;", "or_profile": "~Mattson_Thieme1;~Yada_Zhu1;~Han_Liu4", "aff": "Northwestern University;IBM Research;Northwestern University", "aff_domain": "northwestern.edu;us.ibm.com;u.northwestern.edu", "position": "PhD student;Principal Research Scientist;Associate Professor", "bibtex": "@misc{\nthieme2023the,\ntitle={The Graph Learning Attention Mechanism: Learnable Sparsification Without Heuristics},\nauthor={Mattson Thieme and Yada Zhu and Han Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=0eSq84hbXhe}\n}", "github": "", "project": "", "reviewers": "jLy4;7cxi;aSBH;BqHK", "site": "https://openreview.net/forum?id=0eSq84hbXhe", "pdf_size": 472620, "recommendation": "3;3;5;5", "confidence": "4;3;3;4", "correctness": "2;2;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "49;93;82;53", "wc_strength_and_weaknesses": "59;395;139;78", "wc_clarity_quality_novelty_and_reproducibility": "88;119;72;33", "wc_summary_review": "175;102;34;12", "wc_review": "371;709;327;176", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "464;561;222;332", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 69.25, 18.713297411199342 ], "wc_strength_and_weaknesses_avg": [ 167.75, 134.4904736403289 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 78.0, 30.99193443462347 ], "wc_summary_review_avg": [ 80.75, 63.73136982679723 ], "wc_review_avg": [ 395.75, 194.778560165127 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 394.75, 128.66113438020045 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oduVi3gN7TcJ:scholar.google.com/&scioq=The+Graph+Learning+Attention+Mechanism:+Learnable+Sparsification+Without+Heuristics&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Northwestern University;IBM", "aff_unique_dep": ";IBM Research", "aff_unique_url": "https://www.northwestern.edu;https://www.ibm.com/research", "aff_unique_abbr": "NU;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "HiCLIP: Contrastive Language-Image Pretraining with Hierarchy-aware Attention", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10805", "id": "0eTTKOOOQkV", "poster": "", "openreview": "https://openreview.net/forum?id=0eTTKOOOQkV", "slides": "https://iclr.cc/virtual/2023/poster/10805", "video": "https://iclr.cc/virtual/2023/poster/10805", "author_site": "Shijie Geng, Jianbo Yuan, Yu Tian, Yuxiao Chen, Yongfeng Zhang", "tldr": "", "abstract": "The success of large-scale contrastive vision-language pretraining (CLIP) has benefited both visual recognition and multimodal content understanding. The concise design brings CLIP the advantage in inference efficiency against other vision-language models with heavier cross-attention fusion layers, making it a popular choice for a wide spectrum of downstream tasks. However, CLIP does not explicitly capture the hierarchical nature of high-level and fine-grained semantics conveyed in images and texts, which is arguably critical to vision-language understanding and reasoning. To this end, we equip both the visual and language branches in CLIP with hierarchy-aware attentions, namely Hierarchy-aware CLIP (HiCLIP), to progressively discover semantic hierarchies layer-by-layer from both images and texts in an unsupervised manner. As a result, such hierarchical aggregation significantly improves the cross-modal alignment. To demonstrate the advantages of HiCLIP, we conduct qualitative analysis on its unsupervised hierarchy induction during inference, as well as extensive quantitative experiments on both visual recognition and vision-language downstream tasks.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Shijie Geng;Jianbo Yuan;Yu Tian;Yuxiao Chen;Yongfeng Zhang", "authorids": "~Shijie_Geng1;~Jianbo_Yuan1;~Yu_Tian4;~Yuxiao_Chen5;~Yongfeng_Zhang1", "gender": "M;M;;M;", "homepage": ";;;http://yuxiaochen.com/;", "dblp": "171/3642;134/6790;;158/4934-2;", "google_scholar": "wujqvGYAAAAJ;https://scholar.google.com/citations?hl=en;;6PW8cyQAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Shijie_Geng1;~Jianbo_Yuan1;~Yu_Tian4;~Yuxiao_Chen5;~Yongfeng_Zhang1", "aff": "ByteDance Inc.;Bytedance;;Rutgers University, New Brunswick;", "aff_domain": "bytedance.com;bytedance.com;;rutgers.edu;", "position": "Researcher;Researcher;;PhD student;", "bibtex": "@inproceedings{\ngeng2023hiclip,\ntitle={Hi{CLIP}: Contrastive Language-Image Pretraining with Hierarchy-aware Attention},\nauthor={Shijie Geng and Jianbo Yuan and Yu Tian and Yuxiao Chen and Yongfeng Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=0eTTKOOOQkV}\n}", "github": "", "project": "", "reviewers": "79pR;jjXQ;WpY2;o933", "pdf_size": 12786532, "recommendation": "6;6;8;8", "confidence": "4;3;3;4", "correctness": "4;3;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;4;4;4", "wc_summary_paper": "66;45;204;61", "wc_strength_and_weaknesses": "233;224;1054;96", "wc_clarity_quality_novelty_and_reproducibility": "70;76;281;68", "wc_summary_review": "50;125;263;22", "wc_review": "419;470;1802;247", "wc_reply_reviewers": "0;0;344;64", "wc_reply_authors": "1149;957;2403;486", "reply_reviewers": "0;0;2;1", "reply_authors": "3;2;6;2", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 94.0, 63.98046576885792 ], "wc_strength_and_weaknesses_avg": [ 401.75, 380.45523718303576 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 123.75, 90.83604736006515 ], "wc_summary_review_avg": [ 115.0, 93.37826299519605 ], "wc_review_avg": [ 734.5, 621.834584113814 ], "wc_reply_reviewers_avg": [ 102.0, 142.1407752898513 ], "wc_reply_authors_avg": [ 1248.75, 708.7222216778588 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7129552052888130889&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=0eTTKOOOQkV", "email": "bytedance.com;bytedance.com;;rutgers.edu;", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "ByteDance;Rutgers University", "aff_unique_dep": ";", "aff_unique_url": "https://www.bytedance.com;https://www.rutgers.edu", "aff_unique_abbr": "ByteDance;Rutgers", "aff_campus_unique_index": "1", "aff_campus_unique": ";New Brunswick", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "title": "Improving Out-of-distribution Generalization with Indirection Representations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11603", "id": "0f-0I6RFAch", "poster": "", "openreview": "https://openreview.net/forum?id=0f-0I6RFAch", "slides": "https://iclr.cc/virtual/2023/poster/11603", "video": "https://iclr.cc/virtual/2023/poster/11603", "author_site": "Kha Pham, Hung Le, Man Ngo, Truyen Tran", "tldr": "", "abstract": "We propose a generic module named Indirection Layer (InLay), which leverages indirection and data internal relationships to effectively construct symbolic indirect representations to improve out-of-distribution generalization capabilities of various neural architectures. InLay receives data input in the form of a sequence of objects, treats it as a complete weighted graph whose vertices are the objects and edge weights are scalars representing relationships between vertices. The input is first mapped via indirection to a symbolic graph with data-independent and trainable vertices. This symbolic graph is then propagated, resulting in new vertex features whose indirection will be used for prediction steps afterward. Theoretically, we show that the distances between indirection representations are bounded by the distances between corresponding graphs, implying that unseen samples with very different surface statistics can still be close in the representation space to the seen samples if they share similar internal relationships. We demonstrate that InLay is consistently effective in improving out-of-distribution generalization throughout a comprehensive suite of experiments, including IQ problems, distorted image classification, and few-shot domain adaptation NLP classification. We also conduct ablation studies to verify different design choices of InLay.", "keywords": "out-of-distribution generalization;indirection;representation", "primary_area": "", "supplementary_material": "/attachment/70a958bf720c19e7290ff6272c38d43a7278da1e.zip", "author": "Kha Pham;Hung Le;Man Ngo;Truyen Tran", "authorids": "~Kha_Pham1;~Hung_Le1;~Man_Ngo1;~Truyen_Tran1", "gender": "M;M;M;M", "homepage": ";http://truyentran.github.io;https://thaihungle.github.io/;", "dblp": "326/7309;55/2269;45/466-2;", "google_scholar": "O02f60AAAAAJ;https://scholar.google.com.au/citations?user=zvspVLwAAAAJ;https://scholar.google.com.au/citations?user=q2HbxngAAAAJ;tshz60MAAAAJ", "orcid": "0000-0002-2281-7332;0000-0001-6531-8907;0000-0002-3126-184X;", "linkedin": ";truyen-tran;;", "or_profile": "~Kha_Pham1;~Truyen_Tran1;~Hung_Thai_Le1;~Man_Minh_Ngo1", "aff": "Deakin University;Deakin University, Australia;Deakin University;Ho Chi Minh city University of Science, Vietnam National University", "aff_domain": "deakin.edu.au;deakin.edu.au;deakin.edu.au;hcmus.edu.vn", "position": "PhD student;Associate Professor;Lecturer;Lecturer", "bibtex": "@inproceedings{\npham2023improving,\ntitle={Improving Out-of-distribution Generalization with Indirection Representations},\nauthor={Kha Pham and Hung Le and Man Ngo and Truyen Tran},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=0f-0I6RFAch}\n}", "github": "", "project": "", "reviewers": "ttym;JfzH;EM2y;r9MA", "pdf_size": 2263930, "recommendation": "5;6;6;8", "confidence": "4;4;4;3", "correctness": "1;3;3;4", "technical_novelty": "3;3;2;2", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "210;82;250;224", "wc_strength_and_weaknesses": "1053;107;237;90", "wc_clarity_quality_novelty_and_reproducibility": "61;97;64;15", "wc_summary_review": "53;57;36;77", "wc_review": "1377;343;587;406", "wc_reply_reviewers": "850;0;0;0", "wc_reply_authors": "1750;330;555;286", "reply_reviewers": "2;0;0;0", "reply_authors": "4;1;1;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 1.0897247358851685 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 191.5, 64.82862022286145 ], "wc_strength_and_weaknesses_avg": [ 371.75, 397.4087159336091 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.25, 29.192250684042847 ], "wc_summary_review_avg": [ 55.75, 14.58380951603524 ], "wc_review_avg": [ 678.25, 413.2465214614637 ], "wc_reply_reviewers_avg": [ 212.5, 368.0607966083864 ], "wc_reply_authors_avg": [ 730.25, 597.5283989066963 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.9271726499455306, "corr_recommendation_correctness": 0.894736842105263, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5766942108046922503&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=0f-0I6RFAch", "email": "deakin.edu.au;deakin.edu.au;deakin.edu.au;hcmus.edu.vn", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Deakin University;Ho Chi Minh City University of Science", "aff_unique_dep": ";", "aff_unique_url": "https://www.deakin.edu.au;", "aff_unique_abbr": "Deakin;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Australia;Vietnam" }, { "title": "What learning algorithm is in-context learning? Investigations with linear models", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10852", "id": "0g0X4H8yN4I", "poster": "/media/PosterPDFs/ICLR%202023/10852.png?t=1683020051.347224", "openreview": "https://openreview.net/forum?id=0g0X4H8yN4I", "slides": "https://iclr.cc/virtual/2023/poster/10852", "video": "https://iclr.cc/virtual/2023/poster/10852", "author_site": "Ekin Aky\u00fcrek, Dale Schuurmans, Jacob Andreas, Tengyu Ma, Denny Zhou", "tldr": "We prove that the transformers can implement learning algorithms for linear models based e.g gradient descent, then observe they closely match the predictors of known algorithms, transitioning between different predictors as transformer depth vary.", "abstract": "Neural sequence models, especially transformers, exhibit a remarkable capacity for in-context learning. They can construct new predictors from sequences of labeled examples $(x, f(x))$ presented in the input without further parameter updates. We investigate the hypothesis that transformer-based in-context learners implement standard learning algorithms implicitly, by encoding context-specific parametric models in their hidden representations, and updating these implicit models as new examples appear in the context. Using linear regression as a model problem, we offer three sources of evidence for this hypothesis. First, we prove by construction that transformers can implement learning algorithms for linear models based on gradient descent and closed-form computation of regression parameters. Second, we show that trained in-context learners closely match the predictors computed by gradient descent, ridge regression, and exact least-squares regression, transitioning between different predictors as transformer depth and dataset noise vary. Third, we present preliminary evidence that in-context learners share algorithmic features with these predictors: learners' late layers encode weight vectors and moment matrices. These results suggest that in-context learning is understandable in algorithmic terms, and that (at least in the linear case) learners may work by rediscovering standard estimation algorithms.", "keywords": "in-context learning;transformers;sequence models;deep learning;meta learning", "primary_area": "", "supplementary_material": "", "author": "Ekin Aky\u00fcrek;Dale Schuurmans;Jacob Andreas;Tengyu Ma;Denny Zhou", "authorids": "~Ekin_Aky\u00fcrek1;~Dale_Schuurmans1;~Jacob_Andreas1;~Tengyu_Ma1;~Denny_Zhou1", "gender": ";;M;M;", "homepage": ";;http://web.mit.edu/jda/www;http://ai.stanford.edu/~tengyuma/;", "dblp": ";;97/8154;54/9061;", "google_scholar": ";;dnZ8udEAAAAJ;i38QlUwAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Ekin_Aky\u00fcrek1;~Dale_Schuurmans1;~Jacob_Andreas1;~Tengyu_Ma1;~Denny_Zhou1", "aff": ";;Microsoft;Facebook AI Research;", "aff_domain": ";;microsoft.com;fb.com;", "position": ";;Researcher;Visiting Scientist;", "bibtex": "@inproceedings{\naky{\\\"u}rek2023what,\ntitle={\u200b\u200bWhat learning algorithm is in-context learning? Investigations with linear models},\nauthor={Ekin Aky{\\\"u}rek and Dale Schuurmans and Jacob Andreas and Tengyu Ma and Denny Zhou},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=0g0X4H8yN4I}\n}", "github": "", "project": "", "reviewers": "NUsG;5hCi;p6X8", "pdf_size": 1156704, "recommendation": "8;8;8", "confidence": "4;4;4", "correctness": "3;1;3", "technical_novelty": "4;3;4", "empirical_novelty": "4;0;2", "wc_summary_paper": "287;105;209", "wc_strength_and_weaknesses": "811;113;162", "wc_clarity_quality_novelty_and_reproducibility": "376;55;171", "wc_summary_review": "47;61;55", "wc_review": "1521;334;597", "wc_reply_reviewers": "116;0;0", "wc_reply_authors": "486;207;385", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "technical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 1.632993161855452 ], "wc_summary_paper_avg": [ 200.33333333333334, 74.55348564770276 ], "wc_strength_and_weaknesses_avg": [ 362.0, 318.12052223436746 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 200.66666666666666, 132.71607120298412 ], "wc_summary_review_avg": [ 54.333333333333336, 5.734883511361751 ], "wc_review_avg": [ 817.3333333333334, 509.0201916972995 ], "wc_reply_reviewers_avg": [ 38.666666666666664, 54.68292441175968 ], "wc_reply_authors_avg": [ 359.3333333333333, 115.33815018843023 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 517, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10716583995991425174&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=0g0X4H8yN4I", "email": ";;microsoft.com;fb.com;", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Microsoft;Meta", "aff_unique_dep": "Microsoft Corporation;Facebook AI Research", "aff_unique_url": "https://www.microsoft.com;https://research.facebook.com", "aff_unique_abbr": "Microsoft;FAIR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "0g1JdUJF7Fr", "title": "An Optimal Transport Perspective on Unpaired Image Super-Resolution", "track": "main", "status": "Reject", "tldr": "", "abstract": "Real-world image super-resolution (SR) tasks often do not have paired\ndatasets, which limits the application of supervised techniques. As a result, the tasks are usually approached by unpaired techniques based on Generative Adversarial Networks (GANs), which yield complex training losses with several regularization terms, e.g., content or identity losses. We theoretically investigate optimization problems which arise in such models and find two surprizing observations. First, the learned SR map is always an optimal transport (OT) map. Second, we theoretically prove and empirically show that the learned map is biased, i.e., it does not actually transform the distribution of low-resolution images to high-resolution ones. Inspired by these findings, we propose an algorithm for unpaired SR which learns an unbiased OT map for the perceptual transport cost. Unlike the existing GAN-based alternatives, our algorithm has a simple optimization objective reducing the need for complex hyperparameter selection and an application of additional regularizations. At the same time, it provides a nearly state-of-the-art performance on the large-scale unpaired AIM19 dataset.", "keywords": "optimal transport;unpaired image super-resolution", "primary_area": "", "supplementary_material": "/attachment/e85ef91e938e9be64b0ca8942ad7ff539ab56680.zip", "author": "Milena Gazdieva;Litu Rout;Alexander Korotin;Andrey Kravchenko;Alexander Filippov;Evgeny Burnaev", "authorids": "~Milena_Gazdieva1;~Litu_Rout1;~Alexander_Korotin2;~Andrey_Kravchenko3;~Alexander_Filippov1;~Evgeny_Burnaev1", "gender": "F;M;M;M;M;M", "homepage": ";https://liturout.github.io/;https://www.chch.ox.ac.uk/staff/dr-andrey-kravchenko;;http://faculty.skoltech.ru/people/evgenyburnaev;https://akorotin.netlify.app", "dblp": "309/6585;206/6445;;;144/7845;209/9906", "google_scholar": "h52_Zx8AAAAJ;https://scholar.google.co.in/citations?hl=en;;;https://scholar.google.ru/citations?user=pCRdcOwAAAAJ;https://scholar.google.ru/citations?user=1rIIvjAAAAAJ", "orcid": "0000-0003-0047-1577;;;;0000-0001-8424-0690;0000-0003-4286-925X", "linkedin": ";litu-rout-sac-isro/;;alexander-filippov-3022a381/;;", "or_profile": "~Milena_Gazdieva1;~Litu_Rout1;~Andrey_Kravchenko3;~Alexander_Filippov1;~Evgeny_Burnaev1;~Alexander_Andreevich_Korotin1", "aff": "Skolkovo Institute of Science and Technology;University of Texas at Austin;;Huawei Noah's Ark Lab;Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology", "aff_domain": "skoltech.ru;utexas.edu;;noahlab.com.hk;skoltech.ru;skoltech.ru", "position": "PhD student;PhD student;;Lab Director;Full Professor;Head of Research Group", "bibtex": "@misc{\ngazdieva2023an,\ntitle={An Optimal Transport Perspective on Unpaired Image Super-Resolution},\nauthor={Milena Gazdieva and Litu Rout and Alexander Korotin and Andrey Kravchenko and Alexander Filippov and Evgeny Burnaev},\nyear={2023},\nurl={https://openreview.net/forum?id=0g1JdUJF7Fr}\n}", "github": "", "project": "", "reviewers": "xhtM;GsaW;mWTP;nWwQ", "site": "https://openreview.net/forum?id=0g1JdUJF7Fr", "pdf_size": 24664649, "recommendation": "3;5;6;8", "confidence": "4;4;5;3", "correctness": "3;3;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "1;3;2;3", "wc_summary_paper": "56;215;82;154", "wc_strength_and_weaknesses": "460;462;244;100", "wc_clarity_quality_novelty_and_reproducibility": "192;6;65;61", "wc_summary_review": "11;126;58;168", "wc_review": "719;809;449;483", "wc_reply_reviewers": "803;0;0;0", "wc_reply_authors": "1581;428;529;64", "reply_reviewers": "1;0;0;0", "reply_authors": "3;2;2;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 126.75, 62.327261290706495 ], "wc_strength_and_weaknesses_avg": [ 316.5, 153.20819168699825 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 81.0, 68.19457456425694 ], "wc_summary_review_avg": [ 90.75, 60.50361559444196 ], "wc_review_avg": [ 615.0, 152.83324245726124 ], "wc_reply_reviewers_avg": [ 200.75, 347.70919961945214 ], "wc_reply_authors_avg": [ 650.5, 564.3759828341387 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.39223227027636803, "corr_recommendation_correctness": 0.8006407690254357, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15000693346106100309&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Skolkovo Institute of Science and Technology;University of Texas at Austin;Huawei", "aff_unique_dep": ";;Noah's Ark Lab", "aff_unique_url": "https://www.skoltech.ru;https://www.utexas.edu;https://www.huawei.com", "aff_unique_abbr": "Skoltech;UT Austin;Huawei", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "Russian Federation;United States;China" }, { "id": "0h-YwriPUI", "title": "Memory-Augmented Variational Adaptation for Online Few-Shot Segmentation", "track": "main", "status": "Withdraw", "tldr": "We propose a memory-augmented variational adaptation mechanism, which learns to adapt the model to every new sample that arrives sequentially.", "abstract": "We investigate online few-show segmentation, which learns to make dense predictions for novel classes while observing samples sequentially. The main challenge in such an online scenario is the sample diversity in the sequence, resulting in models that do not generalize well to future samples. To this end, we propose a memory-augmented variational adaptation mechanism, which learns to adapt the model to every new sample that arrives sequentially. Specifically, we first introduce a prototype memory, which retains category knowledge from previous samples to facilitate the model adaptation to future samples. The adaptation to each new sample is then formulated as a variational Bayesian inference problem, which strives to generate sample-specific model parameters by conditioning the sample and the prototype memory. Furthermore, we propose memory-augmented segmentation to learn sample-specific feature representation for better adaptation to the segmentation of each sample. With extensive experiments, we show that a simple extension of existing few-shot segmentation methods tends to converge to over-smoothed, averaged masks of lesser performance. By contrast, the proposed method achieves considerably better online few-shot segmentation performance.", "keywords": "Online few-shot segmentation;Variation inference;Memory-augmented.", "primary_area": "", "supplementary_material": "", "author": "Jie Liu;Yingjun Du;Zehao Xiao;Cees G. M. Snoek;Jan-jakob Sonke;Efstratios Gavves", "authorids": "~Jie_Liu21;~Yingjun_Du1;~Zehao_Xiao1;~Cees_G._M._Snoek1;~Jan-jakob_Sonke1;~Efstratios_Gavves1", "gender": "M;M;M;;M;M", "homepage": "https://jliu4ai.github.io/;https://yingjundu.github.io/;https://zzzx1224.github.io/;;https://www.egavves.com;http://www.ceessnoek.info", "dblp": ";263/6794;225/5426;20/4093;03/8693;s/CeesSnoek", "google_scholar": "OUtjZyAAAAAJ;oAeW6rAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=nl;https://scholar.google.nl/citations?user=QqfCvsgAAAAJ;https://scholar.google.nl/citations?user=0uKdbscAAAAJ", "orcid": ";;;0000-0001-5155-5274;;0000-0001-9092-1556", "linkedin": ";%E8%8B%B1%E5%86%9B-%E6%9D%9C-a938a0174/;;;;cgmsnoek/", "or_profile": "~Jie_Liu21;~Yingjun_Du1;~Zehao_Xiao1;~Jan-jakob_Sonke1;~Efstratios_Gavves1;~Cees_Snoek1", "aff": "University of Amsterdam;University of Amsterdam;University of Amsterdam;University of Amsterdam;University of Amsterdam;University of Amsterdam", "aff_domain": "uva.nl;uva.nl;uva.nl;uva.nl;uva.nl;uva.nl", "position": "PhD student;PhD student;PhD student;Full Professor;Associate Professor;Full Professor", "bibtex": "@misc{\nliu2023memoryaugmented,\ntitle={Memory-Augmented Variational Adaptation for Online Few-Shot Segmentation},\nauthor={Jie Liu and Yingjun Du and Zehao Xiao and Cees G. M. Snoek and Jan-jakob Sonke and Efstratios Gavves},\nyear={2023},\nurl={https://openreview.net/forum?id=0h-YwriPUI}\n}", "github": "", "project": "", "reviewers": "efJU;LscA;fxaX;x8DV", "site": "https://openreview.net/forum?id=0h-YwriPUI", "pdf_size": 19571760, "recommendation": "3;3;5;5", "confidence": "4;3;3;4", "correctness": "3;2;3;3", "technical_novelty": "2;1;3;2", "empirical_novelty": "2;0;2;2", "wc_summary_paper": "60;85;78;86", "wc_strength_and_weaknesses": "301;245;145;487", "wc_clarity_quality_novelty_and_reproducibility": "18;24;20;134", "wc_summary_review": "60;14;31;69", "wc_review": "439;368;274;776", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 77.25, 10.425329730996522 ], "wc_strength_and_weaknesses_avg": [ 294.5, 124.39754820735013 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.0, 49.122296363260546 ], "wc_summary_review_avg": [ 43.5, 22.073740054644116 ], "wc_review_avg": [ 464.25, 189.26486071112092 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12585905251564624134&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Netherlands" }, { "id": "0h4_YLDhf4K", "title": "SeqSHAP: Subsequence Level Shapley Value Explanations for Sequential Predictions", "track": "main", "status": "Reject", "tldr": "", "abstract": "With the increasing demands of interpretability in real-world applications, various methods for explainable artificial intelligence (XAI) have been proposed. However, most of them overlook the interpretability in sequential scenarios, which have a wide range of applications, e.g., online transactions and sequential recommendations. In this paper, we propose a Shapley value based explainer named SeqSHAP to explain the model predictions in sequential scenarios. Compared to existing methods, SeqSHAP provides more intuitive explanations at a subsequence level, which explicitly models the effect of contextual information among the related elements in a sequence. We propose to calculate subsequence-level feature attributions instead of element-wise attributions to utilize the information embedded in sequence structure, and provide a distribution-based segmentation method to obtain reasonable subsequences. Extensive experiments on two online transaction datasets from a real-world e-commerce platform show that the proposed method could provide valid and reliable explanations for sequential predictions.", "keywords": "XAI;Explainability;SHAP;Sequential Predictions", "primary_area": "", "supplementary_material": "", "author": "Guanyu Jiang;Fuzhen Zhuang;Bowen Song;Jiani Li;Ying Sun;Yongchun Zhu;Tianyi Zhang;Weiqiang Wang;deqing wang", "authorids": "~Guanyu_Jiang2;~Fuzhen_Zhuang1;~Bowen_Song2;~Jiani_Li3;~Ying_Sun4;~Yongchun_Zhu1;~Tianyi_Zhang5;~Weiqiang_Wang4;~deqing_wang2", "gender": "M;M;M;;F;M;M;M;M", "homepage": ";https://fuzhenzhuang.github.io/index.html;;;https://sunyinggilly.github.io;https://easezyc.github.io/;;https://www.linkedin.com/in/weiqiang-wang-489b925/;https://ktl.buaa.edu.cn/", "dblp": ";48/5638;;;10/5415-6.html;56/11341;;;", "google_scholar": "E48YubYAAAAJ;https://scholar.google.com/citations?hl=en;MvmgQFwAAAAJ;;;https://scholar.google.com.hk/citations?user=iKUIgeQAAAAJ;;;NrYqxY4AAAAJ", "orcid": ";0000-0001-9170-7009;;;0000-0002-4763-6060;;;0000-0002-6159-619X;0000-0001-6441-4390", "linkedin": ";;;https://www.linkedin.cn/incareer/in/\u4f73\u9713-\u674e-09a872a7;;;tianyi-zhang-178a491a/;weiqiang-wang-489b925/;", "or_profile": "~Guanyu_Jiang2;~Fuzhen_Zhuang1;~Bowen_Song2;~Jiani_Li3;~Ying_Sun4;~Yongchun_Zhu1;~Tianyi_Zhang5;~Weiqiang_Wang4;~deqing_wang2", "aff": "Beihang University;Institute of Computing Technology, Chinese Academy of Sciences;Ant Group;Alipay;Hong Kong University of Science and Technology (Guangzhou);, Chinese Academy of Sciences;Alipay;Ant Group;", "aff_domain": "buaa.edu.cn;ict.ac.cn;antgroup.com;alipay.com;hkust-gz.edu.cn;ict.ac.cn;alipay.com;antgroup.com;", "position": "MS student;Associate Professor;Researcher;Researcher;Assistant Professor;PhD student;Principal Researcher;Researcher;", "bibtex": "@misc{\njiang2023seqshap,\ntitle={Seq{SHAP}: Subsequence Level Shapley Value Explanations for Sequential Predictions},\nauthor={Guanyu Jiang and Fuzhen Zhuang and Bowen Song and Jiani Li and Ying Sun and Yongchun Zhu and Tianyi Zhang and Weiqiang Wang and deqing wang},\nyear={2023},\nurl={https://openreview.net/forum?id=0h4_YLDhf4K}\n}", "github": "", "project": "", "reviewers": "EXsb;Sgob;nQqx;z5Gv", "site": "https://openreview.net/forum?id=0h4_YLDhf4K", "pdf_size": 351990, "recommendation": "5;5;5;6", "confidence": "4;3;3;4", "correctness": "2;3;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "30;117;50;148", "wc_strength_and_weaknesses": "322;200;160;665", "wc_clarity_quality_novelty_and_reproducibility": "20;115;27;99", "wc_summary_review": "20;229;47;36", "wc_review": "392;661;284;948", "wc_reply_reviewers": "0;0;0;78", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 86.25, 48.05400607649689 ], "wc_strength_and_weaknesses_avg": [ 336.75, 198.68741152876294 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.25, 42.204117097742966 ], "wc_summary_review_avg": [ 83.0, 84.83808107212232 ], "wc_review_avg": [ 571.25, 257.2152551852242 ], "wc_reply_reviewers_avg": [ 19.5, 33.77499074759311 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16191195700820181674&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3;4;1;3;2", "aff_unique_norm": "Beihang University;Chinese Academy of Sciences;Ant Group;Alipay;Hong Kong University of Science and Technology", "aff_unique_dep": ";Institute of Computing Technology;;;", "aff_unique_url": "http://www.buaa.edu.cn/;http://www.ict.ac.cn;https://www.antgroup.com;https://www.alipay.com;https://www.ust.hk", "aff_unique_abbr": "BUAA;CAS;Ant Group;Alipay;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A Convergent Single-Loop Algorithm for Relaxation of Gromov-Wasserstein in Graph Data", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12260", "id": "0jxPyVWmiiF", "poster": "", "openreview": "https://openreview.net/forum?id=0jxPyVWmiiF", "slides": "https://iclr.cc/virtual/2023/poster/12260", "video": "https://iclr.cc/virtual/2023/poster/12260", "author_site": "Jiajin Li, Jianheng Tang, Lemin Kong, Huikang Liu, Jia Li, Anthony So, Jose Blanchet", "tldr": "We propose the first provable single-loop algorithm for computing the Gromov-Wasserstein (GW) distance.", "abstract": "In this work, we present the Bregman Alternating Projected Gradient (BAPG) method, a single-loop algorithm that offers an approximate solution to the Gromov-Wasserstein (GW) distance. \nWe introduce a novel relaxation technique that balances accuracy and computational efficiency, albeit with some compromises in the feasibility of the coupling map. Our analysis is based on the observation that the GW problem satisfies the Luo-Tseng error bound condition, which relates to estimating the distance of a point to the critical point set of the GW problem based on the optimality residual.\nThis observation allows us to provide an approximation bound for the distance between the fixed-point set of BAPG and the critical point set of GW. Moreover, under a mild technical assumption, we can show that BAPG converges to its fixed point set.\nThe effectiveness of BAPG has been validated through comprehensive numerical experiments in graph alignment and partition tasks, where it outperforms existing methods in terms of both solution quality and wall-clock time.", "keywords": "Gromov-Wasserstein;Graph Learning;Optimization", "primary_area": "", "supplementary_material": "/attachment/3eb5dd5b21bb2fe644a375f5643f6507e78d7310.zip", "author": "Jiajin Li;Jianheng Tang;Lemin Kong;Huikang Liu;Jia Li;Anthony Man-Cho So;Jose Blanchet", "authorids": "~Jiajin_Li2;~Jianheng_Tang1;~Lemin_Kong1;~Huikang_Liu2;~Jia_Li4;~Anthony_Man-Cho_So1;~Jose_Blanchet1", "gender": "F;M;;M;M;M;M", "homepage": "https://gerrili1996.github.io/;https://squareroot3.github.io/;;https://huikang2019.github.io;https://sites.google.com/view/lijia;http://www1.se.cuhk.edu.hk/~manchoso/;https://web.stanford.edu/~jblanche/", "dblp": ";;320/8260;62/8489;23/6950-9;82/3202;75/5093.html", "google_scholar": ";w4kWvXEAAAAJ;;https://scholar.google.com.hk/citations?hl=zh-TW;1gSbcYoAAAAJ;https://scholar.google.com.hk/citations?user=whi3UisAAAAJ;https://scholar.google.co.in/citations?user=O24CcQQAAAAJ", "orcid": ";0000-0001-9341-7312;;;0000-0002-6362-4385;0000-0003-2588-7851;", "linkedin": ";;lemin-kong/;;;;jose-blanchet", "or_profile": "~Jiajin_Li2;~Jianheng_Tang1;~Lemin_Kong1;~Huikang_Liu2;~Jia_Li4;~Anthony_Man-Cho_So1;~Jose_Blanchet1", "aff": "Stanford University;Hong Kong University of Science and Technology;Chinese University of Hong Kong, The Chinese University of Hong Kong;Shanghai University of Finance and Economics;Hong Kong University of Science and Technology (Guangzhou);The Chinese University of Hong Kong;Stanford University", "aff_domain": "stanford.edu;ust.hk;se.cuhk.edu.hk;sufe.edu;ust.hk;cuhk.edu.hk;stanford.edu", "position": "Postdoc;PhD student;PhD student;Assistant Professor;Assistant Professor;Full Professor;Professor", "bibtex": "@inproceedings{\nli2023a,\ntitle={A Convergent Single-Loop Algorithm for Relaxation of Gromov-Wasserstein in Graph Data },\nauthor={Jiajin Li and Jianheng Tang and Lemin Kong and Huikang Liu and Jia Li and Anthony Man-Cho So and Jose Blanchet},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=0jxPyVWmiiF}\n}", "github": "", "project": "", "reviewers": "Jipi;huho;h6Kq;sorA", "pdf_size": 434421, "recommendation": "8;8;8;8", "confidence": "2;2;4;3", "correctness": "4;4;4;3", "technical_novelty": "3;4;3;3", "empirical_novelty": "0;4;3;0", "wc_summary_paper": "192;106;280;135", "wc_strength_and_weaknesses": "142;82;391;423", "wc_clarity_quality_novelty_and_reproducibility": "23;83;112;256", "wc_summary_review": "71;59;28;88", "wc_review": "428;330;811;902", "wc_reply_reviewers": "0;10;14;165", "wc_reply_authors": "302;191;529;2713", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;6", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 1.7853571071357126 ], "wc_summary_paper_avg": [ 178.25, 66.39418272710343 ], "wc_strength_and_weaknesses_avg": [ 259.5, 149.44647871395298 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 118.5, 85.62855832022399 ], "wc_summary_review_avg": [ 61.5, 21.914607000811127 ], "wc_review_avg": [ 617.75, 243.3869090563418 ], "wc_reply_reviewers_avg": [ 47.25, 68.1739503036167 ], "wc_reply_authors_avg": [ 933.75, 1034.4489777171225 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 2.165063509461097 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16189442419389467985&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=0jxPyVWmiiF", "email": "stanford.edu;ust.hk;se.cuhk.edu.hk;sufe.edu;ust.hk;cuhk.edu.hk;stanford.edu", "author_num": 7, "aff_unique_index": "0;1;2;3;1;2;0", "aff_unique_norm": "Stanford University;Hong Kong University of Science and Technology;Chinese University of Hong Kong;Shanghai University of Finance and Economics", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.stanford.edu;https://www.ust.hk;https://www.cuhk.edu.hk;http://www.sufe.edu.cn", "aff_unique_abbr": "Stanford;HKUST;CUHK;SUFE", "aff_campus_unique_index": "0;1;1;1;1;0", "aff_campus_unique": "Stanford;Hong Kong SAR;", "aff_country_unique_index": "0;1;1;1;1;1;0", "aff_country_unique": "United States;China" }, { "id": "0nI0G46i6kT", "title": "Real Data Distributions Prefer Simplicity and So Do Our Models: Why Machine Learning and Model Selection Are Possible", "track": "main", "status": "Withdraw", "tldr": "We demonstrate that neural networks, trained or randomly initialized, prefer the low-complexity data we observe in practice, and we explain how model selection can be automated.", "abstract": "No free lunch theorems for supervised learning state that no learner can solve all problems or that all learners achieve exactly the same accuracy on average over a uniform distribution on learning problems. Accordingly, these theorems are often referenced in support of the notion that individual problems require specially tailored inductive biases. While all but exponentially few uniformly sampled datasets have high complexity, we argue that neural network models share the same preference for low-complexity data that we observe on real-world problems. Notably, we show that architectures designed for a particular domain, such as computer vision, are compressors for labeling functions on a variety of seemingly unrelated domains. From our experiments, we see that pre-trained and even randomly initialized language models prefer to generate low-complexity sequences and can therefore be used for inference. In principle, the use of expert knowledge and bias for simplicity of human practitioners could be folded into the learning algorithm, automating design and selection of models. We explain how typical areas requiring human intervention such as picking the appropriately sized model when labeled data is sparse or plentiful can be automated into a single learning algorithm. These observations help justify the trend in deep learning of unifying seemingly disparate problems with an increasingly small set of machine learning models.", "keywords": "No Free Lunch;PAC-Bayes;Simplicity Bias;Model Selection;Meta-Learning", "primary_area": "", "supplementary_material": "", "author": "Micah Goldblum;Marc Anton Finzi;Keefer Rowan;Andrew Gordon Wilson", "authorids": "~Micah_Goldblum1;~Marc_Anton_Finzi1;~Keefer_Rowan1;~Andrew_Gordon_Wilson1", "gender": ";M;M;Not Specified", "homepage": ";https://mfinzi.github.io;https://cims.nyu.edu/~kjr9750/;https://cims.nyu.edu/~andrewgw", "dblp": "241/7231;222/3062;344/5763;65/10453", "google_scholar": "pGDKzuUAAAAJ;ysMAhlwAAAAJ;;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Micah_Goldblum1;~Marc_Anton_Finzi1;~Keefer_Rowan1;~Andrew_Gordon_Wilson1", "aff": "New York University;New York University;NYU, New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;cims.nyu.edu;nyu.edu", "position": "Postdoc;PhD student;PhD student;Associate Professor", "bibtex": "@misc{\ngoldblum2023real,\ntitle={Real Data Distributions Prefer Simplicity and So Do Our Models: Why Machine Learning and Model Selection Are Possible},\nauthor={Micah Goldblum and Marc Anton Finzi and Keefer Rowan and Andrew Gordon Wilson},\nyear={2023},\nurl={https://openreview.net/forum?id=0nI0G46i6kT}\n}", "github": "", "project": "", "reviewers": "Kcnb;2AeB;Y5WA;7B9H", "site": "https://openreview.net/forum?id=0nI0G46i6kT", "pdf_size": 471115, "recommendation": "3;3;3;3", "confidence": "3;3;3;4", "correctness": "1;2;3;2", "technical_novelty": "2;2;2;1", "empirical_novelty": "1;2;2;1", "wc_summary_paper": "51;99;144;33", "wc_strength_and_weaknesses": "401;182;311;261", "wc_clarity_quality_novelty_and_reproducibility": "45;33;22;18", "wc_summary_review": "90;9;66;25", "wc_review": "587;323;543;337", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "586;127;521;263", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.0, 0.7071067811865476 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 0.5 ], "wc_summary_paper_avg": [ 81.75, 43.28611209152423 ], "wc_strength_and_weaknesses_avg": [ 288.75, 79.46815399894476 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.5, 10.5 ], "wc_summary_review_avg": [ 47.5, 32.159757461772 ], "wc_review_avg": [ 447.5, 118.62862217862939 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 374.25, 187.00451197765256 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MBFkl-SUmWkJ:scholar.google.com/&scioq=Real+Data+Distributions+Prefer+Simplicity+and+So+Do+Our+Models:+Why+Machine+Learning+and+Model+Selection+Are+Possible&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "1", "aff_campus_unique": ";New York", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "0nroZT5gHsS", "title": "Generalization Properties of Retrieval-based Models", "track": "main", "status": "Reject", "tldr": "We present a novel theoretical analysis to study the generalization bounds for retrieval-based classification models.", "abstract": "Many modern high-performing machine learning models such as GPT-3 primarily rely on scaling up models, e.g., transformer networks. Simultaneously, a parallel line of work aims to improve the model performance by augmenting an input instance with other (labeled) instances during inference. Examples of such augmentations include task-specific prompts and similar examples retrieved from the training data by a nonparametric component. Remarkably, retrieval-based methods have enjoyed success on a wide range of problems, ranging from standard natural language processing and vision tasks to protein folding, as demonstrated by many recent efforts, including WebGPT and AlphaFold. Despite growing literature showcasing the promise of these models, the theoretical underpinning for such models remains underexplored. In this paper, we present a formal treatment of retrieval-based models to characterize their generalization ability. In particular, we focus on two classes of retrieval-based classification approaches: First, we analyze a local learning framework that employs an explicit local empirical risk minimization based on retrieved examples for each input instance. Interestingly, we show that breaking down the underlying learning task into local sub-tasks enables the model to employ a low complexity parametric component to ensure good overall accuracy. The second class of retrieval-based approaches we explore learns a global model using kernel methods to directly map an input instance and retrieved examples to a prediction, without explicitly solving a local learning task.", "keywords": "Generalization bounds;retrieval-based models;local empirical risk minimization;semiparametric models;nonparametric models;kernel methods", "primary_area": "", "supplementary_material": "", "author": "Soumya Basu;Ankit Singh Rawat;Manzil Zaheer", "authorids": "~Soumya_Basu2;~Ankit_Singh_Rawat1;~Manzil_Zaheer1", "gender": "M;M;M", "homepage": "https://basusoumya.github.io/;https://ankitsrawat.github.io/home/;https://www.aclweb.org/anthology/people/m/manzil-zaheer/", "dblp": "153/0318-1;https://dblp.org/pers/hd/r/Rawat:Ankit_Singh;40/10701", "google_scholar": "VNQp_doAAAAJ;http://scholar.google.com/citations?user=U0_ab4cAAAAJ;A33FhJMAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Soumya_Basu2;~Ankit_Singh_Rawat1;~Manzil_Zaheer1", "aff": "Google;Google;Google DeepMind", "aff_domain": "google.com;google.com;deepmind.com", "position": "SWE;Research Scientist;Researcher", "bibtex": "@misc{\nbasu2023generalization,\ntitle={Generalization Properties of Retrieval-based Models},\nauthor={Soumya Basu and Ankit Singh Rawat and Manzil Zaheer},\nyear={2023},\nurl={https://openreview.net/forum?id=0nroZT5gHsS}\n}", "github": "", "project": "", "reviewers": "uQx9;5HmN;94k1;bKCw", "site": "https://openreview.net/forum?id=0nroZT5gHsS", "pdf_size": 603693, "recommendation": "3;5;6;6", "confidence": "4;3;3;1", "correctness": "4;2;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "0;2;0;2", "wc_summary_paper": "250;73;242;209", "wc_strength_and_weaknesses": "266;329;196;2", "wc_clarity_quality_novelty_and_reproducibility": "75;102;75;2", "wc_summary_review": "78;66;71;2", "wc_review": "669;570;584;215", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "906;1142;292;40", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.0, 1.0 ], "wc_summary_paper_avg": [ 193.5, 71.24780698379425 ], "wc_strength_and_weaknesses_avg": [ 198.25, 122.68328125706452 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.5, 37.178622890042604 ], "wc_summary_review_avg": [ 54.25, 30.466169762541533 ], "wc_review_avg": [ 509.5, 174.19888059341827 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 595.0, 446.0280260252712 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.7492686492653551, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10564565796411018547&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "0oDzoRjrbj", "title": "Weak Supervision Variational Auto-Encoder", "track": "main", "status": "Withdraw", "tldr": "A VAE model with specifically designed components to perform weak supervision. Compared to existing weak supervision methods, it is considerably more robust to labelling functions design.", "abstract": "Recent advances in weak supervision (WS) techniques allow to mitigate the enormous labelling cost of human data annotation for deep learning by automating it using simple rule-based labelling functions (LFs). However, LFs need to be carefully designed, often requiring expert domain knowledge to be of sufficient accuracy, cover enough data and be independent of each other for existing WS methods to be viable. In addition, weak supervision methods often rely on small amounts of validation data with true labels to fine-tune and select models. \nTo tackle these issues, we propose the Weak Supervision Variational Auto-Encoder (WS-VAE), a novel framework that combines unsupervised representation learning and weak labelling to reduce the dependence of WS on expert and manual engineering of LFs. The proposed technique learns from inputs and weak labels jointly and captures the input signals distribution with an artificial latent space, leading to considerably improved robustness to LFs quality. Our extensive empirical evaluation shows that our WS-VAE performs competitively to existing WS on a standard WS benchmark while it is substantially more robust to LF engineering.", "keywords": "Variational Auto-Encoders;Weak Supervision;Weak Labelling", "primary_area": "", "supplementary_material": "", "author": "Francesco Tonolini;Nikolaos Aletras;Yunlong Jiao;Gabriella Kazai", "authorids": "~Francesco_Tonolini1;~Nikolaos_Aletras1;~Yunlong_Jiao1;gkazai@amazon.com", "gender": "M;;M;", "homepage": ";;https://yunlongjiao.github.io/;", "dblp": ";118/9116;164/7317;", "google_scholar": "4urrvVQAAAAJ;https://scholar.google.co.uk/citations?user=uxRWFhoAAAAJ;https://scholar.google.co.uk/citations?user=NgTM33MAAAAJ;", "orcid": ";;0000-0002-0776-0550;", "linkedin": ";;yunlong-jiao/;", "or_profile": "~Francesco_Tonolini1;~Nikolaos_Aletras1;~Yunlong_Jiao1;gkazai@amazon.com", "aff": "Amazon;Amazon;Amazon;", "aff_domain": "amazon.com;amazon.com;amazon.com;", "position": "Researcher;Researcher;Machine Learning Scientist;", "bibtex": "@misc{\ntonolini2023weak,\ntitle={Weak Supervision Variational Auto-Encoder},\nauthor={Francesco Tonolini and Nikolaos Aletras and Yunlong Jiao and Gabriella Kazai},\nyear={2023},\nurl={https://openreview.net/forum?id=0oDzoRjrbj}\n}", "github": "", "project": "", "reviewers": "TAK1;SJ2d;zBxr", "site": "https://openreview.net/forum?id=0oDzoRjrbj", "pdf_size": 557591, "recommendation": "3;3;5", "confidence": "4;4;5", "correctness": "2;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "103;73;30", "wc_strength_and_weaknesses": "169;467;60", "wc_clarity_quality_novelty_and_reproducibility": "23;65;123", "wc_summary_review": "37;36;25", "wc_review": "332;641;238", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 68.66666666666667, 29.95923155816176 ], "wc_strength_and_weaknesses_avg": [ 232.0, 172.0251919535818 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.33333333333333, 40.99864496405812 ], "wc_summary_review_avg": [ 32.666666666666664, 5.436502143433364 ], "wc_review_avg": [ 403.6666666666667, 172.15174185067727 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_6qz0UOOB14J:scholar.google.com/&scioq=Weak+Supervision+Variational+Auto-Encoder&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon.com, Inc.", "aff_unique_url": "https://www.amazon.com", "aff_unique_abbr": "Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "0o_PPAJstY", "title": "A Multi-objective Perspective towards Improving Meta-Generalization", "track": "main", "status": "Withdraw", "tldr": "We propose to improve meta-generalization from a multi-objective point of view. ", "abstract": "To improve meta-generalization, i.e., accommodating out-of-domain meta-testing tasks beyond meta-training ones, is of significance to extending the success of meta-learning beyond standard benchmarks. Previous heterogeneous meta-learning algorithms have shown that tailoring the global meta-knowledge by the learned clusters during meta-training promotes better meta-generalization to novel meta-testing tasks. Inspired by this, we propose a novel multi-objective perspective to sharpen the compositionality of the meta-trained clusters, through which we have empirically validated that the meta-generalization further improves. Grounded on the hierarchically structured meta-learning framework, we formulate a hypervolume loss to evaluate the degree of conflict between multiple cluster-conditioned parameters in the two-dimensional loss space over two randomly chosen tasks belonging to two clusters and two mixed tasks imitating out-of-domain tasks. Experimental results on more than 16 few-shot image classification datasets show not only improved performance on out-of-domain meta-testing datasets but also better clusters in visualization. ", "keywords": "meta learning;multi-objective optimization", "primary_area": "", "supplementary_material": "/attachment/26d3ca64c362bce25b5fc96f621f0671e1684b48.zip", "author": "Weiduo Liao;Ying Wei;Qirui Sun;Qingfu Zhang;Hisao Ishibuchi", "authorids": "~Weiduo_Liao1;~Ying_Wei1;~Qirui_Sun1;~Qingfu_Zhang1;~Hisao_Ishibuchi1", "gender": "M;F;M;M;M", "homepage": ";https://wei-ying.net/;;https://www.cs.cityu.edu.hk/~qzhan7/index.html;", "dblp": ";14/4899-1;;98/1240.html;i/HisaoIshibuchi", "google_scholar": "tUNrOg8AAAAJ;5UpFdKsAAAAJ;https://scholar.google.ca/citations?hl=en;https://scholar.google.co.uk/citations?user=nhL9PHwAAAAJ;vx9EZN4AAAAJ", "orcid": ";;;;0000-0001-9186-6472", "linkedin": ";;qirui-sun-b55907228/;;", "or_profile": "~Weiduo_Liao1;~Ying_Wei1;~Qirui_Sun1;~Qingfu_Zhang1;~Hisao_Ishibuchi1", "aff": "City University of Hong Kong;City University of Hong Kong;City University of Hong Kong;City University of Hong Kong;Southern University of Science and Technology", "aff_domain": "cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;sustech.edu.cn", "position": "PhD student;Assistant Professor;PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nliao2023a,\ntitle={A Multi-objective Perspective towards Improving Meta-Generalization},\nauthor={Weiduo Liao and Ying Wei and Qirui Sun and Qingfu Zhang and Hisao Ishibuchi},\nyear={2023},\nurl={https://openreview.net/forum?id=0o_PPAJstY}\n}", "github": "", "project": "", "reviewers": "LKne;ExzU;3C5m", "site": "https://openreview.net/forum?id=0o_PPAJstY", "pdf_size": 3651170, "recommendation": "3;3;3", "confidence": "4;4;4", "correctness": "3;3;2", "technical_novelty": "2;3;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "37;126;21", "wc_strength_and_weaknesses": "126;148;41", "wc_clarity_quality_novelty_and_reproducibility": "30;1027;31", "wc_summary_review": "37;69;62", "wc_review": "230;1370;155", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 61.333333333333336, 46.19042709864843 ], "wc_strength_and_weaknesses_avg": [ 105.0, 46.13747572924495 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 362.6666666666667, 469.75478236581637 ], "wc_summary_review_avg": [ 56.0, 13.73559851869101 ], "wc_review_avg": [ 585.0, 555.9226564909907 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9CHSpJ5qyrAJ:scholar.google.com/&scioq=A+Multi-objective+Perspective+towards+Improving+Meta-Generalization&hl=en&as_sdt=0,7", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "City University of Hong Kong;Southern University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cityu.edu.hk;https://www.sustech.edu.cn", "aff_unique_abbr": "CityU;SUSTech", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ISAAC Newton: Input-based Approximate Curvature for Newton's Method", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11813", "id": "0paCJSFW7j", "poster": "/media/PosterPDFs/ICLR%202023/11813.png?t=1682762990.9360697", "openreview": "https://openreview.net/forum?id=0paCJSFW7j", "slides": "https://iclr.cc/virtual/2023/poster/11813", "video": "https://iclr.cc/virtual/2023/poster/11813", "author_site": "Felix Petersen, Tobias Sutter, Christian Borgelt, Dongsung Huh, Hilde Kuehne, Yuekai Sun, Oliver Deussen", "tldr": "", "abstract": "We present ISAAC (Input-baSed ApproximAte Curvature), a novel method that conditions the gradient using selected second-order information and has an asymptotically vanishing computational overhead, assuming a batch size smaller than the number of neurons. We show that it is possible to compute a good conditioner based on only the input to a respective layer without a substantial computational overhead. The proposed method allows effective training even in small-batch stochastic regimes, which makes it competitive to first-order as well as second-order methods.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Felix Petersen;Tobias Sutter;Christian Borgelt;Dongsung Huh;Hilde Kuehne;Yuekai Sun;Oliver Deussen", "authorids": "~Felix_Petersen1;~Tobias_Sutter1;~Christian_Borgelt1;~Dongsung_Huh1;~Hilde_Kuehne5;~Yuekai_Sun1;~Oliver_Deussen1", "gender": "Not Specified;M;M;;F;;M", "homepage": "http://www.petersen.ai/;https://sites.google.com/view/suttert/home;https://www.borgelt.net/;;https://hildekuehne.github.io;https://yuekai.github.io/;https://graphics.uni-konstanz.de", "dblp": "230/3983;01/10961;b/ChristianBorgelt.html;147/6326;45/4963;;48/2158", "google_scholar": "v8Kat6YAAAAJ;https://scholar.google.ch/citations?user=11gxHJIAAAAJ;https://scholar.google.de/citations?user=T50Bxb8AAAAJ;;pxhCcH0AAAAJ;6T1XtW8AAAAJ;https://scholar.google.de/scholar?hl=en", "orcid": ";0000-0003-1226-6845;;;0000-0003-1079-4441;;0000-0001-5803-2185", "linkedin": ";;christian-borgelt-a2429071/;;hilde-kuehne-8b9aa661;;", "or_profile": "~Felix_Petersen1;~Tobias_Sutter1;~Christian_Borgelt1;~Dongsung_Huh1;~Hilde_Kuehne5;~Yuekai_Sun1;~Oliver_Deussen1", "aff": "Stanford University;Universit\u00e4t Konstanz;Paris-Lodron-University of Salzburg;International Business Machines;Goethe University Frankfurt;University of Michigan - Ann Arbor;University of Konstanz", "aff_domain": "stanford.edu;uni-konstanz.de;sbg.ac.at;ibm.com;uni-frankfurt.de;umich.edu;uni-konstanz.de", "position": "Postdoc;Assistant Professor;Full Professor;Principal Researcher;Assistant Professor;Assistant \u2192 Associate Professor of Statistics;Full Professor", "bibtex": "@inproceedings{\npetersen2023isaac,\ntitle={{ISAAC} Newton: Input-based Approximate Curvature for Newton's Method},\nauthor={Felix Petersen and Tobias Sutter and Christian Borgelt and Dongsung Huh and Hilde Kuehne and Yuekai Sun and Oliver Deussen},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=0paCJSFW7j}\n}", "github": "", "project": "", "reviewers": "2oTp;xNi3;U2Re", "pdf_size": 793307, "recommendation": "5;6;8", "confidence": "3;2;5", "correctness": "4;3;3", "technical_novelty": "2;2;4", "empirical_novelty": "2;2;3", "wc_summary_paper": "21;62;127", "wc_strength_and_weaknesses": "36;19;62", "wc_clarity_quality_novelty_and_reproducibility": "18;6;401", "wc_summary_review": "16;64;29", "wc_review": "91;151;619", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 70.0, 43.642486944108335 ], "wc_strength_and_weaknesses_avg": [ 39.0, 17.682382946499793 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 141.66666666666666, 183.44178610362715 ], "wc_summary_review_avg": [ 36.333333333333336, 20.270394394014364 ], "wc_review_avg": [ 287.0, 236.03389587091087 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.7857142857142859, "corr_recommendation_correctness": -0.7559289460184545, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1742067218575233274&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=0paCJSFW7j", "email": "stanford.edu;uni-konstanz.de;sbg.ac.at;ibm.com;uni-frankfurt.de;umich.edu;uni-konstanz.de", "author_num": 7, "aff_unique_index": "0;1;2;3;4;5;6", "aff_unique_norm": "Stanford University;Universit\u00e4t Konstanz;Paris-Lodron-University of Salzburg;International Business Machines Corporation;Goethe University Frankfurt;University of Michigan;University of Konstanz", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://www.stanford.edu;https://www.uni-konstanz.de;https://www.uni-salzburg.at;https://www.ibm.com;https://www.uni-frankfurt.de;https://www.umich.edu;https://www.uni-konstanz.de", "aff_unique_abbr": "Stanford;Uni Konstanz;PLUS;IBM;GU Frankfurt;UM;Uni Konstanz", "aff_campus_unique_index": "0;2;3", "aff_campus_unique": "Stanford;;Frankfurt;Ann Arbor", "aff_country_unique_index": "0;1;2;0;1;0;1", "aff_country_unique": "United States;Germany;Austria" }, { "title": "Specformer: Spectral Graph Neural Networks Meet Transformers", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10782", "id": "0pdSt3oyJa1", "poster": "/media/PosterPDFs/ICLR%202023/10782.png?t=1682928493.999109", "openreview": "https://openreview.net/forum?id=0pdSt3oyJa1", "slides": "https://iclr.cc/virtual/2023/poster/10782", "video": "https://iclr.cc/virtual/2023/poster/10782", "author_site": "Deyu Bo, Chuan Shi, Lele Wang, Renjie Liao", "tldr": "We propose a novel set-to-set spectral graph filter by using a spectral domain Transformer.", "abstract": "Spectral graph neural networks (GNNs) learn graph representations via spectral-domain graph convolutions. However, most existing spectral graph filters are scalar-to-scalar functions, i.e., mapping a single eigenvalue to a single filtered value, thus ignoring the global pattern of the spectrum. Furthermore, these filters are often constructed based on some fixed-order polynomials, which have limited expressiveness and flexibility. To tackle these issues, we introduce Specformer, which effectively encodes the set of all eigenvalues and performs self-attention in the spectral domain, leading to a learnable set-to-set spectral filter. We also design a decoder with learnable bases to enable non-local graph convolution. Importantly, Specformer is equivariant to permutation. By stacking multiple Specformer layers, one can build a powerful spectral GNN. On synthetic datasets, we show that our Specformer can better recover ground-truth spectral filters than other spectral GNNs. Extensive experiments of both node-level and graph-level tasks on real-world graph datasets show that our Specformer outperforms state-of-the-art GNNs and learns meaningful spectrum patterns. Code and data are available at https://github.com/bdy9527/Specformer.", "keywords": "Spectral Graph Neural Networks;Transformer", "primary_area": "", "supplementary_material": "/attachment/332dcb39715783fb5cc9c801d9ef9c9404db4220.zip", "author": "Deyu Bo;Chuan Shi;Lele Wang;Renjie Liao", "authorids": "~Deyu_Bo1;~Chuan_Shi1;~Lele_Wang1;~Renjie_Liao1", "gender": "M;M;F;M", "homepage": "https://bdy9527.github.io/;http://www.shichuan.org/;https://sites.google.com/site/wanglele1986/;https://lrjconan.github.io/", "dblp": "258/0824;64/3041-1;11/7909-1;08/8180", "google_scholar": "m4rsQCAAAAAJ;tUq_v90AAAAJ;ySwF8ioAAAAJ;2wrS35MAAAAJ", "orcid": "0000-0003-2063-8223;0000-0002-3734-0266;;", "linkedin": ";;;", "or_profile": "~Deyu_Bo1;~Chuan_Shi1;~Lele_Wang1;~Renjie_Liao1", "aff": "Beijing University of Post and Telecommunication;Beijing University of Post and Telecommunication;University of British Columbia;Department of Electrical and Computer Engineering, The University of British Columbia", "aff_domain": "bupt.edu.cn;bupt.edu.cn;ubc.ca;ece.ubc.ca", "position": "PhD student;Full Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nbo2023specformer,\ntitle={Specformer: Spectral Graph Neural Networks Meet Transformers},\nauthor={Deyu Bo and Chuan Shi and Lele Wang and Renjie Liao},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=0pdSt3oyJa1}\n}", "github": "", "project": "", "reviewers": "T8vi;Ge61;ur3z;JCjn", "pdf_size": 1119140, "recommendation": "5;5;6;6", "confidence": "4;4;4;5", "correctness": "2;3;3;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "25;42;54;70", "wc_strength_and_weaknesses": "216;199;103;189", "wc_clarity_quality_novelty_and_reproducibility": "216;28;301;32", "wc_summary_review": "78;68;69;32", "wc_review": "535;337;527;323", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "633;360;734;244", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 47.75, 16.467771555374455 ], "wc_strength_and_weaknesses_avg": [ 176.75, 43.65990723764768 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 144.25, 118.14477347728929 ], "wc_summary_review_avg": [ 61.75, 17.612140698961042 ], "wc_review_avg": [ 430.5, 100.66156168071306 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 492.75, 198.34991177210037 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.7071067811865475, "gs_citation": 109, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11296222061664966611&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=0pdSt3oyJa1", "email": "bupt.edu.cn;bupt.edu.cn;ubc.ca;ece.ubc.ca", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Beijing University of Posts and Telecommunications;University of British Columbia", "aff_unique_dep": ";", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.ubc.ca", "aff_unique_abbr": "BUPT;UBC", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Beijing;;Vancouver", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "China;Canada" }, { "title": "Calibrating Sequence likelihood Improves Conditional Language Generation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11774", "id": "0qSOodKmJaN", "poster": "", "openreview": "https://openreview.net/forum?id=0qSOodKmJaN", "slides": "https://iclr.cc/virtual/2023/poster/11774", "video": "https://iclr.cc/virtual/2023/poster/11774", "author_site": "Yao Zhao, Misha Khalman, Rishabh Joshi, Shashi Narayan, Mohammad Saleh, Peter Liu", "tldr": "A proposed sequence likelihood calibration stage improves fine-tuned conditional language models, leading to new state-of-the-art results in abstractive summarization, question generation, abstractive question answering and data-to-text.", "abstract": "Conditional language models are predominantly trained with maximum likelihood estimation (MLE), giving probability mass to sparsely observed target sequences. While MLE trained models assign high probability to plausible sequences given the context, the model probabilities often do not accurately rank-order generated sequences by quality. This has been empirically observed in beam search decoding as output quality degrading with large beam sizes, and decoding strategies benefiting from heuristics such as length normalization and repetition-blocking. In this work, we introduce sequence likelihood calibration (SLiC) where the likelihood of model generated sequences are calibrated to better align with reference sequences in the model\u2019s latent space. With SLiC, decoding heuristics become unnecessary and decoding candidates\u2019 quality significantly improves regardless of the decoding method. Furthermore, SLiC shows no sign of diminishing returns with model scale, and presents alternative ways to improve quality with limited training and inference budgets. With SLiC, we exceed or match SOTA results on a wide range of generation tasks spanning abstractive summarization, question generation, abstractive question answering and data-to-text generation, even with modest-sized models.", "keywords": "Natural Language Processing;conditional language models;sequence-to-sequence;text generation", "primary_area": "", "supplementary_material": "", "author": "Yao Zhao;Mikhail Khalman;Rishabh Joshi;Shashi Narayan;Mohammad Saleh;Peter J Liu", "authorids": "~Yao_Zhao5;~Mikhail_Khalman1;~Rishabh_Joshi1;~Shashi_Narayan1;~Mohammad_Saleh1;~Peter_J_Liu1", "gender": ";M;M;M;;", "homepage": ";;http://rishabhjoshi.github.io;https://sites.google.com/corp/view/shashinarayan/;;http://www.peterjliu.com", "dblp": ";;228/5645;74/8458;;190/7667", "google_scholar": "p7L3HrMAAAAJ;;https://scholar.google.co.in/citations?user=vu2pNVAAAAAJ;prEcE9IAAAAJ;MmX7K38AAAAJ;", "orcid": ";;;;;", "linkedin": ";khalman/;joshi-rishabh/;;mohammad-saleh-18a56b155;p3t3rliu", "or_profile": "~Yao_Zhao5;~Mikhail_Khalman1;~Rishabh_Joshi1;~Shashi_Narayan1;~Mohammad_Saleh1;~Peter_J_Liu1", "aff": "Google;Google;Google;Google;;Google Brain", "aff_domain": "google.com;google.com;google.com;google.com;;google.com", "position": "Researcher;Researcher;Researcher;Research Scientist;;Research Scientist", "bibtex": "@inproceedings{\nzhao2023calibrating,\ntitle={Calibrating Sequence likelihood Improves Conditional Language Generation},\nauthor={Yao Zhao and Mikhail Khalman and Rishabh Joshi and Shashi Narayan and Mohammad Saleh and Peter J Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=0qSOodKmJaN}\n}", "github": "", "project": "", "reviewers": "o62J;YBaL;JpRY", "pdf_size": 515984, "recommendation": "6;6;8", "confidence": "4;4;2", "correctness": "3;3;3", "technical_novelty": "3;3;2", "empirical_novelty": "3;3;3", "wc_summary_paper": "108;225;73", "wc_strength_and_weaknesses": "566;222;394", "wc_clarity_quality_novelty_and_reproducibility": "1;209;50", "wc_summary_review": "67;18;60", "wc_review": "742;674;577", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "364;688;359", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 135.33333333333334, 64.99401681864433 ], "wc_strength_and_weaknesses_avg": [ 394.0, 140.4374119195689 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 86.66666666666667, 88.7856344736517 ], "wc_summary_review_avg": [ 48.333333333333336, 21.638443156156644 ], "wc_review_avg": [ 664.3333333333334, 67.70688459988557 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 470.3333333333333, 153.92711117784143 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 140, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4090400391430053556&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=0qSOodKmJaN", "email": "google.com;google.com;google.com;google.com;;google.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Rethinking the Effect of Data Augmentation in Adversarial Contrastive Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12097", "id": "0qmwFNJyxCL", "poster": "/media/PosterPDFs/ICLR%202023/12097.png?t=1682257599.4938314", "openreview": "https://openreview.net/forum?id=0qmwFNJyxCL", "slides": "https://iclr.cc/virtual/2023/poster/12097", "video": "https://iclr.cc/virtual/2023/poster/12097", "author_site": "Rundong Luo, Yifei Wang, Yisen Wang", "tldr": "We revisit adversarial contrastive training through the lens of data augmentation, and propose an effective adversarial contrastive framework that outperforms vanilla supervised adversarial robustness.", "abstract": "Recent works have shown that self-supervised learning can achieve remarkable robustness when integrated with adversarial training (AT). However, the robustness gap between supervised AT (sup-AT) and self-supervised AT (self-AT) remains significant. Motivated by this observation, we revisit existing self-AT methods and discover an inherent dilemma that affects self-AT robustness: either strong or weak data augmentations are harmful to self-AT, and a medium strength is insufficient to bridge the gap. To resolve this dilemma, we propose a simple remedy named DYNACL (Dynamic Adversarial Contrastive Learning). In particular, we propose an augmentation schedule that gradually anneals from a strong augmentation to a weak one to benefit from both extreme cases. Besides, we adopt a fast post-processing stage for adapting it to downstream tasks. Through extensive experiments, we show that DYNACL can improve state-of-the-art self-AT robustness by 8.84% under Auto-Attack on the CIFAR-10 dataset, and can even outperform vanilla supervised adversarial training for the first time. Our code is available at \\url{https://github.com/PKU-ML/DYNACL}.", "keywords": "adversarial training;contrastive learning;adversarial contrastive learning", "primary_area": "", "supplementary_material": "/attachment/1e2ea2293968ddcc81642c2d5770c383b5d57d3f.zip", "author": "Rundong Luo;Yifei Wang;Yisen Wang", "authorids": "~Rundong_Luo1;~Yifei_Wang1;~Yisen_Wang1", "gender": "M;M;M", "homepage": "https://red-fairy.github.io/;https://yifeiwang77.com;https://yisenwang.github.io/", "dblp": "328/0914;00/555-1;172/1346-1", "google_scholar": "dc8tL2sAAAAJ;-CLy6YsAAAAJ;uMWPDboAAAAJ", "orcid": "0009-0005-3219-0376;;", "linkedin": ";;", "or_profile": "~Rundong_Luo1;~Yifei_Wang1;~Yisen_Wang1", "aff": "Stanford University;Peking University;Peking University", "aff_domain": "stanford.edu;pku.edu.cn;pku.edu.cn", "position": "Intern;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nluo2023rethinking,\ntitle={Rethinking the Effect of Data Augmentation in Adversarial Contrastive Learning},\nauthor={Rundong Luo and Yifei Wang and Yisen Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=0qmwFNJyxCL}\n}", "github": "", "project": "", "reviewers": "Kni5;UQhW;umnn", "pdf_size": 1923571, "recommendation": "6;6;8", "confidence": "4;4;5", "correctness": "3;3;4", "technical_novelty": "2;3;2", "empirical_novelty": "3;3;0", "wc_summary_paper": "81;71;51", "wc_strength_and_weaknesses": "138;93;327", "wc_clarity_quality_novelty_and_reproducibility": "45;66;52", "wc_summary_review": "41;24;18", "wc_review": "305;254;448", "wc_reply_reviewers": "0;10;238", "wc_reply_authors": "356;440;2512", "reply_reviewers": "0;1;1", "reply_authors": "1;1;4", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 67.66666666666667, 12.472191289246473 ], "wc_strength_and_weaknesses_avg": [ 186.0, 101.3804714922948 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.333333333333336, 8.73053390247253 ], "wc_summary_review_avg": [ 27.666666666666668, 9.741092797468305 ], "wc_review_avg": [ 335.6666666666667, 82.11509537770074 ], "wc_reply_reviewers_avg": [ 82.66666666666667, 109.9130969852496 ], "wc_reply_authors_avg": [ 1102.6666666666667, 997.1390185035497 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.9999999999999997, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7552996629882222774&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=0qmwFNJyxCL", "email": "stanford.edu;pku.edu.cn;pku.edu.cn", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Stanford University;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;http://www.pku.edu.cn", "aff_unique_abbr": "Stanford;Peking U", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;China" }, { "id": "0qnryNf6XwR", "title": "When are smooth-ReLUs ReLU-like?", "track": "main", "status": "Reject", "tldr": "We parametrize relaxations of ReLU and devise initialization schemes that retain ReLU-like properties while being differentiable, verified experimentally and confirmed during training.", "abstract": "ReLU is one of the most popular activations in deep learning, especially thanks to its stabilizing effect on training. However, because it is non-differentiable at the origin, it complicates the use of analysis methods that examine derivatives, such as the Neural Tangent Kernel (NTK). Many smooth relaxations try to retain the practical benefits of ReLU while increasing network regularity. Although their success has ranged widely, some notable architectures (e.g., the BERT family) do utilize them. We present a theoretical characterization of smooth-ReLUs within fully-connected feed-forward neural networks. In addition to the well-known SWISH and GeLU, we introduce GumbelLU, AlgebraicLU, and GudermanLU, as new relaxations. All these activations can be characterized by a positive temperature parameter which we can lower to continuously improve the approximation. By studying the interplay of initialization schemes with temperature, we confirm that when these relaxations converge uniformly to ReLU, the statistical properties of the corresponding neural networks at initialization also converge to those of ReLU networks. Moreover, we derive temperature-dependent critical initialization schemes with which networks based on these activations exhibit stable ReLU-like behavior at any temperature. Finally, we empirically study both classes of networks on MNIST and CIFAR-10 in the full-batch training regime. We show that, while all networks exhibit very similar train loss trajectories at criticality, smooth-ReLU networks feature differentiable NTKs throughout training, whereas ReLU networks exhibit stochastic NTK fluctuations. Our results clarify how smooth-ReLU relaxations reproduce the practical benefits of ReLU in everywhere-smooth neural networks.", "keywords": "ReLU;SWISH;GeLU;Critical Initialization;Fully Connected Neural Networks;Deep Networks", "primary_area": "", "supplementary_material": "/attachment/5220aeebfc261bf7d1475308696229468ce29e37.zip", "author": "Ermal Rrapaj;Luca Celotti;Qiyao Wei;Martin Magill", "authorids": "~Ermal_Rrapaj1;~Luca_Celotti1;~Qiyao_Wei1;~Martin_Magill1", "gender": "M;M;M;", "homepage": ";https://lucehe.github.io/;https://qiyaowei.github.io;https://martinmagill.netlify.com/", "dblp": "308/2432;;327/3121;223/5780", "google_scholar": "i2p1e4YAAAAJ;;;https://scholar.google.ca/citations?user=6WJQZrkAAAAJ", "orcid": "0000-0002-3222-7010;;;", "linkedin": "ermalrrapaj;;qiyaowei;", "or_profile": "~Ermal_Rrapaj1;~Luca_Celotti1;~Qiyao_Wei1;~Martin_Magill1", "aff": "iTHEMS, RIKEN;Universit\u00e9 de Sherbrooke;University of Cambridge;", "aff_domain": "ithems.riken.jp;usherbrooke.ca;cam.ac.uk;", "position": "Postdoc;PhD student;PhD student;", "bibtex": "@misc{\nrrapaj2023when,\ntitle={When are smooth-Re{LU}s Re{LU}-like?},\nauthor={Ermal Rrapaj and Luca Celotti and Qiyao Wei and Martin Magill},\nyear={2023},\nurl={https://openreview.net/forum?id=0qnryNf6XwR}\n}", "github": "", "project": "", "reviewers": "Tjrr;AkRp;S8pa", "site": "https://openreview.net/forum?id=0qnryNf6XwR", "pdf_size": 635184, "recommendation": "5;5;5", "confidence": "2;2;3", "correctness": "3;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;2;2", "wc_summary_paper": "98;213;39", "wc_strength_and_weaknesses": "67;553;319", "wc_clarity_quality_novelty_and_reproducibility": "9;71;35", "wc_summary_review": "6;73;55", "wc_review": "180;910;448", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "154;544;742", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 116.66666666666667, 72.25110533564329 ], "wc_strength_and_weaknesses_avg": [ 313.0, 198.45402490249472 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.333333333333336, 25.42090128658349 ], "wc_summary_review_avg": [ 44.666666666666664, 28.311756490114764 ], "wc_review_avg": [ 512.6666666666666, 301.5087984712147 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 480.0, 244.27852955182124 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:l4MWvVgYM8AJ:scholar.google.com/&scioq=When+are+smooth-ReLUs+ReLU-like%3F&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "RIKEN;Universit\u00e9 de Sherbrooke;University of Cambridge", "aff_unique_dep": "iTHEMS;;", "aff_unique_url": "https://www.riken.jp;https://www.usherbrooke.ca;https://www.cam.ac.uk", "aff_unique_abbr": "RIKEN;UdeS;Cambridge", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Japan;Canada;United Kingdom" }, { "id": "0sjwFxqLHw3", "title": "Spurious Local Minima Provably Exist for Deep Convolutional Neural Networks", "track": "main", "status": "Reject", "tldr": "We prove that a general class of spurious local minima exist in the loss landscape of deep convolutional neural networks with squared loss or cross-entropy loss.", "abstract": "In this paper, we prove that a general family of spurious local minima exist in the loss landscape of deep convolutional neural networks with squared loss or cross-entropy loss. For this purpose, we develop some new techniques to solve the challenges introduced by convolutional layers. We solve a combinatorial problem which considers the limited receptive fields of hidden neurons, and possible distinct activation status for different samples and different locations in feature maps, to show that a differentiation of data samples is always possible somewhere in feature maps. Training loss is then decreased by perturbation of network parameters that can affect different samples in different ways. Despite filters and biases are tied in each feature map, we give a construction in which this perturbation only affects the output of a single ReLU neuron and keeps the outputs at other locations unchanged. Finally, we give an example of nontrivial spurious local minimum in which different activation patterns of samples are explicitly constructed. Experimental results verify our theoretical findings. ", "keywords": "theoretical issues in deep learning", "primary_area": "", "supplementary_material": "", "author": "Bo Liu;Keyi Fu;Tongtong Yuan", "authorids": "~Bo_Liu21;~Keyi_Fu1;~Tongtong_Yuan1", "gender": "M;F;F", "homepage": ";;", "dblp": "58/2670-11;;187/9122", "google_scholar": ";;https://scholar.google.com.hk/citations?user=jAc5SHAAAAAJ", "orcid": "0000-0002-3482-6930;0000-0002-8667-449X;", "linkedin": ";;", "or_profile": "~Bo_Liu21;~Keyi_Fu1;~Tongtong_Yuan1", "aff": "Beijing University of Technology;College of Computer Science, Faculty of Information Technology, Beijing University of Technology, Beijing, China;Beijing University of Technology", "aff_domain": "bjut.edu.cn;bjut.edu.cn;bjut.edu.cn", "position": "Associate Professor;MS student;Lecturer", "bibtex": "@misc{\nliu2023spurious,\ntitle={Spurious Local Minima Provably Exist for Deep Convolutional Neural Networks},\nauthor={Bo Liu and Keyi Fu and Tongtong Yuan},\nyear={2023},\nurl={https://openreview.net/forum?id=0sjwFxqLHw3}\n}", "github": "", "project": "", "reviewers": "wvat;JdM7;h5wB;L56V", "site": "https://openreview.net/forum?id=0sjwFxqLHw3", "pdf_size": 3753582, "recommendation": "3;3;5;5", "confidence": "4;4;3;3", "correctness": "4;2;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "0;3;0;0", "wc_summary_paper": "118;108;135;96", "wc_strength_and_weaknesses": "359;347;609;590", "wc_clarity_quality_novelty_and_reproducibility": "92;3;147;491", "wc_summary_review": "53;36;70;58", "wc_review": "622;494;961;1235", "wc_reply_reviewers": "0;118;176;167", "wc_reply_authors": "1006;1186;2370;3083", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;4;5", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 0.75, 1.299038105676658 ], "wc_summary_paper_avg": [ 114.25, 14.289419162443238 ], "wc_strength_and_weaknesses_avg": [ 476.25, 123.50581970093555 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 183.25, 184.9599618836466 ], "wc_summary_review_avg": [ 54.25, 12.214233500306108 ], "wc_review_avg": [ 828.0, 290.400585398859 ], "wc_reply_reviewers_avg": [ 115.25, 70.104832215761 ], "wc_reply_authors_avg": [ 1911.25, 855.7036212965328 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TO9paLoBVHkJ:scholar.google.com/&scioq=Spurious+Local+Minima+Provably+Exist+for+Deep+Convolutional+Neural+Networks&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Beijing University of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.bjut.edu.cn", "aff_unique_abbr": "BJUT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "0tiMn18oNd", "title": "Group-Equivariant Transformers Without Positional Encoding", "track": "main", "status": "Withdraw", "tldr": "We propose an effective group-equivariant transformer without positional encoding, replacing point-wise MLPs with group-equivariant convolutions to act as both a group mixer and an implicit positional encoding.", "abstract": "Self-attention is a permutation-equivariant operator in its basic form and can further extend to achieve equivariance for a specific symmetry group by incorporating group-invariant positional encoding. In this work, we propose an effective group-equivariant transformer without positional encoding. Instead of injecting group-invariant position encoding to the transformer, we replace point-wise MLPs with group-equivariant convolutions that act as both a group mixer and an implicit positional encoding. This allows to reduce the group of self-attention to translation only while preserving group equivariance, resulting in less computation and memory. Our strategy not only retains dynamic long-range interactions of transformers but also incorporates the static effective kernel learning of convolution, resulting in a significant accuracy gain. We also find that adopting a group-equivariant convolution stem and a translation-equivariant pooling further improves the performance. The proposed method sets a new state of the art in standard benchmarks, outperforming the existing group-equivariant transformers by a large margin.", "keywords": "equivariant;invariant;group-equivariant;self-attention;transformer;group-equivariant convolution;group-equivariant self-attention", "primary_area": "", "supplementary_material": "", "author": "Byungjin Kim;Minsu Cho", "authorids": "~Byungjin_Kim1;~Minsu_Cho1", "gender": "M;M", "homepage": "https://github.com/kbjpc123/;http://cvlab.postech.ac.kr/~mcho/", "dblp": ";", "google_scholar": ";5TyoF5QAAAAJ", "orcid": ";", "linkedin": ";minsu-cho-062b3750/", "or_profile": "~Byungjin_Kim1;~Minsu_Cho1", "aff": "POSTECH;POSTECH", "aff_domain": "postech.ac.kr;postech.ac.kr", "position": "MS student;Associate Professor", "bibtex": "@misc{\nkim2023groupequivariant,\ntitle={Group-Equivariant Transformers Without Positional Encoding},\nauthor={Byungjin Kim and Minsu Cho},\nyear={2023},\nurl={https://openreview.net/forum?id=0tiMn18oNd}\n}", "github": "", "project": "", "reviewers": "q8r4;dwmb;nsqJ;mcMF", "site": "https://openreview.net/forum?id=0tiMn18oNd", "pdf_size": 0, "recommendation": "3;3;3;6", "confidence": "4;4;4;3", "correctness": "3;2;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "69;79;112;117", "wc_strength_and_weaknesses": "327;439;324;90", "wc_clarity_quality_novelty_and_reproducibility": "119;154;79;20", "wc_summary_review": "65;53;50;37", "wc_review": "580;725;565;264", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "135;153;169;64", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 94.25, 20.632195714465293 ], "wc_strength_and_weaknesses_avg": [ 295.0, 127.10822160662937 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 93.0, 49.80461825975579 ], "wc_summary_review_avg": [ 51.25, 9.959292143521045 ], "wc_review_avg": [ 533.5, 167.67304494163633 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 130.25, 40.095978601351035 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UE-KYt5qm70J:scholar.google.com/&scioq=Group-Equivariant+Transformers+Without+Positional+Encoding&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Pohang University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.postech.ac.kr", "aff_unique_abbr": "POSTECH", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Pohang", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "0uHNy9jmR7z", "title": "CWATR: Generating Richer Captions with Object Attributes", "track": "main", "status": "Reject", "tldr": "We propose a method to generate richer and more grounded image captions by integrating attributes of the objects in the scene to the generated caption.", "abstract": "Image captioning is a popular yet challenging task which is at the intersection of Computer Vision and Natural Language Processing. Recently, transformer-based unified Vision and Language models advanced the state-of-the-art further on image captioning. However, there are still fundamental problems in these models. Even though the generated captions by these models are grammatically correct and describe the input image fairly good, they might overlook important details in the image. In this paper, we demonstrate these problems in a state-of-the-art baseline image captioning method and analyze the reasoning behind these problems. We propose a novel approach, named CWATR (Captioning With ATtRibutes), to integrate object attributes to the generated captions in order to obtain richer and more detailed captions. Our analyses demonstrate that the proposed approach generates richer and more visually grounded captions by integrating attributes of the objects in the scene to the generated captions successfully.", "keywords": "image captioning;vision and language pretraining;object attributes;machine learning;deep learning;computer vision", "primary_area": "", "supplementary_material": "", "author": "Enes Muvahhid \u015eahin;G\u00f6zde BOZDA\u011eI AKAR", "authorids": "~Enes_Muvahhid_\u015eahin1;~G\u00f6zde_BOZDA\u011eI_AKAR1", "gender": "M;F", "homepage": ";https://eee.metu.edu.tr/personel/gozde-bozdagi-akar", "dblp": ";", "google_scholar": ";https://scholar.google.co.uk/citations?user=HSmub9wAAAAJ", "orcid": ";0000-0002-4227-5606", "linkedin": "enesmuvahhidsahin/;gozde-akar-8a551133/?originalSubdomain=tr", "or_profile": "~Enes_Muvahhid_\u015eahin1;~G\u00f6zde_BOZDA\u011eI_AKAR1", "aff": ";Cornell University", "aff_domain": ";cornell.edu", "position": ";Researcher", "bibtex": "@misc{\n{\\c{s}}ahin2023cwatr,\ntitle={{CWATR}: Generating Richer Captions with Object Attributes},\nauthor={Enes Muvahhid {\\c{S}}ahin and G{\\\"o}zde BOZDA{\\u{G}}I AKAR},\nyear={2023},\nurl={https://openreview.net/forum?id=0uHNy9jmR7z}\n}", "github": "", "project": "", "reviewers": "4nF4;E9nV;hWrE", "site": "https://openreview.net/forum?id=0uHNy9jmR7z", "pdf_size": 4530438, "recommendation": "3;3;3", "confidence": "3;4;3", "correctness": "2;3;2", "technical_novelty": "2;2;3", "empirical_novelty": "0;2;1", "wc_summary_paper": "68;63;138", "wc_strength_and_weaknesses": "216;191;24", "wc_clarity_quality_novelty_and_reproducibility": "19;24;12", "wc_summary_review": "35;19;37", "wc_review": "338;297;211", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 89.66666666666667, 34.23773097362356 ], "wc_strength_and_weaknesses_avg": [ 143.66666666666666, 85.23040667638647 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 18.333333333333332, 4.9216076867444665 ], "wc_summary_review_avg": [ 30.333333333333332, 8.055363982396383 ], "wc_review_avg": [ 282.0, 52.92132525425517 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ZcC86lXBcrQJ:scholar.google.com/&scioq=CWATR:+Generating+Richer+Captions+with+Object+Attributes&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Predictive Inference with Feature Conformal Prediction", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11668", "id": "0uRm1YmFTu", "poster": "/media/PosterPDFs/ICLR%202023/11668.png?t=1682169922.6665988", "openreview": "https://openreview.net/forum?id=0uRm1YmFTu", "slides": "https://iclr.cc/virtual/2023/poster/11668", "video": "https://iclr.cc/virtual/2023/poster/11668", "author_site": "Jiaye Teng, Chuan Wen, Dinghuai Zhang, Yoshua Bengio, Yang Gao, Yang Yuan", "tldr": "Conformal inference in feature space. ", "abstract": "Conformal prediction is a distribution-free technique for establishing valid prediction intervals. Although conventionally people conduct conformal prediction in the output space, this is not the only possibility. In this paper, we propose feature conformal prediction, which extends the scope of conformal prediction to semantic feature spaces by leveraging the inductive bias of deep representation learning. From a theoretical perspective, we demonstrate that feature conformal prediction provably outperforms regular conformal prediction under mild assumptions. Our approach could be combined with not only vanilla conformal prediction, but also other adaptive conformal prediction methods. Apart from experiments on existing predictive inference benchmarks, we also demonstrate the state-of-the-art performance of the proposed methods on \\textit{large-scale} tasks such as ImageNet classification and Cityscapes image segmentation.", "keywords": "conformal prediction;uncertainty", "primary_area": "", "supplementary_material": "", "author": "Jiaye Teng;Chuan Wen;Dinghuai Zhang;Yoshua Bengio;Yang Gao;Yang Yuan", "authorids": "~Jiaye_Teng2;~Chuan_Wen1;~Dinghuai_Zhang1;~Yoshua_Bengio1;~Yang_Gao1;~Yang_Yuan4", "gender": "M;M;;M;M;M", "homepage": "http://www.tengjiaye.com;https://alvinwen428.github.io/;;http://yoshuabengio.org;http://yang-gao.weebly.com;http://people.iiis.tsinghua.edu.cn/~yuanyang/index.html", "dblp": "266/8187;239/8286;;56/953;89/4402-29;", "google_scholar": "NGqfK2wAAAAJ;G5M9nYwAAAAJ;;kukA0LcAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": "0000-0002-4385-5792;;;;;", "linkedin": ";;;yoshuabengio/?originalSubdomain=ca;yang-gao-45245348/;", "or_profile": "~Jiaye_Teng2;~Chuan_Wen1;~Dinghuai_Zhang1;~Yoshua_Bengio1;~Yang_Gao1;~Yang_Yuan4", "aff": "Tsinghua University;University of California, Berkeley;;University of Montreal;Tsinghua University;Tsinghua University", "aff_domain": "iiis.tsinghua.edu.cn;berkeley.edu;;umontreal.ca;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Intern;;Full Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nteng2023predictive,\ntitle={Predictive Inference with Feature Conformal Prediction},\nauthor={Jiaye Teng and Chuan Wen and Dinghuai Zhang and Yoshua Bengio and Yang Gao and Yang Yuan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=0uRm1YmFTu}\n}", "github": "", "project": "", "reviewers": "9LYb;2teK;5bhg;MB3r", "pdf_size": 1221128, "recommendation": "5;6;6;6", "confidence": "3;4;4;3", "correctness": "4;3;3;4", "technical_novelty": "3;4;3;2", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "88;85;58;19", "wc_strength_and_weaknesses": "411;538;638;210", "wc_clarity_quality_novelty_and_reproducibility": "8;52;74;42", "wc_summary_review": "58;105;40;12", "wc_review": "565;780;810;283", "wc_reply_reviewers": "0;0;93;0", "wc_reply_authors": "1179;580;890;401", "reply_reviewers": "0;0;1;0", "reply_authors": "4;2;3;1", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 62.5, 27.69927796892908 ], "wc_strength_and_weaknesses_avg": [ 449.25, 159.8489521391992 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.0, 23.790754506740637 ], "wc_summary_review_avg": [ 53.75, 33.825840713868445 ], "wc_review_avg": [ 609.5, 210.8631072520748 ], "wc_reply_reviewers_avg": [ 23.25, 40.2701812759764 ], "wc_reply_authors_avg": [ 762.5, 297.3705600761447 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9705593086465732622&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=0uRm1YmFTu", "email": "iiis.tsinghua.edu.cn;berkeley.edu;;umontreal.ca;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Tsinghua University;University of California, Berkeley;University of Montreal", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.berkeley.edu;https://wwwumontreal.ca", "aff_unique_abbr": "THU;UC Berkeley;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "China;United States;Canada" }, { "title": "Priors, Hierarchy, and Information Asymmetry for Skill Transfer in Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12006", "id": "0v4VkCSkHNm", "poster": "/media/PosterPDFs/ICLR%202023/12006.png?t=1682262456.9688299", "openreview": "https://openreview.net/forum?id=0v4VkCSkHNm", "slides": "https://iclr.cc/virtual/2023/poster/12006", "video": "https://iclr.cc/virtual/2023/poster/12006", "author_site": "Sasha Salter, Kristian Hartikainen, Walter Goodwin, Ingmar Posner", "tldr": "We introduce 'Attentive Priors for Expressive and Transferable Skills' (APES), a hierarchical KL-regularized skill transfer method that automates the choice of information asymmetry thereby maximising transfer benefits.", "abstract": "The ability to discover behaviours from past experience and transfer them to new tasks is a hallmark of intelligent agents acting sample-efficiently in the real world. Equipping embodied reinforcement learners with the same ability may be crucial for their successful deployment in robotics. While hierarchical and KL-regularized reinforcement learning individually hold promise here, arguably a hybrid approach could combine their respective benefits. Key to these fields is the use of information asymmetry across architectural modules to bias which skills are learnt. While asymmetry choice has a large influence on transferability, existing methods base their choice primarily on intuition in a domain-independent, potentially sub-optimal, manner. In this paper, we theoretically and empirically show the crucial expressivity-transferability trade-off of skills across sequential tasks, controlled by information asymmetry. Given this insight, we introduce Attentive Priors for Expressive and Transferable Skills (APES), a hierarchical KL-regularized method, heavily benefiting from both priors and hierarchy. Unlike existing approaches, APES automates the choice of asymmetry by learning it in a data-driven, domain-dependent, way based on our expressivity-transferability theorems. Experiments over complex transfer domains of varying levels of extrapolation and sparsity, such as robot block stacking, demonstrate the criticality of the correct asymmetric choice, with APES drastically outperforming previous methods.", "keywords": "Skills;Transfer Learning;Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Sasha Salter;Kristian Hartikainen;Walter Goodwin;Ingmar Posner", "authorids": "~Sasha_Salter1;~Kristian_Hartikainen1;~Walter_Goodwin1;~Ingmar_Posner1", "gender": "M;M;M;", "homepage": ";https://hartikainen.github.io;;", "dblp": "217/1564;218/5178;;59/542", "google_scholar": ";eVYhlDQAAAAJ;;dPk-iwsAAAAJ", "orcid": ";;;0000-0001-6270-700X", "linkedin": ";khartikainen/;walter-goodwin-291194115/;ingmar-posner-20b49a", "or_profile": "~Sasha_Salter1;~Kristian_Hartikainen1;~Walter_Goodwin1;~Ingmar_Posner1", "aff": "Meta;University of Oxford;;University of Oxford", "aff_domain": "meta.com;ox.ac.uk;;ox.ac.uk", "position": "Researcher;PhD student;;Full Professor", "bibtex": "@inproceedings{\nsalter2023priors,\ntitle={Priors, Hierarchy, and Information Asymmetry for Skill Transfer in Reinforcement Learning},\nauthor={Sasha Salter and Kristian Hartikainen and Walter Goodwin and Ingmar Posner},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=0v4VkCSkHNm}\n}", "github": "", "project": "", "reviewers": "pBgA;RbDM;Dj5g;ia3Q", "pdf_size": 5163892, "recommendation": "5;6;6;8", "confidence": "4;3;3;4", "correctness": "2;3;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "108;118;140;76", "wc_strength_and_weaknesses": "121;220;156;234", "wc_clarity_quality_novelty_and_reproducibility": "294;69;277;21", "wc_summary_review": "77;36;31;44", "wc_review": "600;443;604;375", "wc_reply_reviewers": "322;69;72;19", "wc_reply_authors": "3038;556;830;655", "reply_reviewers": "1;1;1;1", "reply_authors": "5;2;3;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 110.5, 23.038012067016545 ], "wc_strength_and_weaknesses_avg": [ 182.75, 46.21349911010851 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 165.25, 121.59024426326316 ], "wc_summary_review_avg": [ 47.0, 17.930421077041107 ], "wc_review_avg": [ 505.5, 99.45979087048192 ], "wc_reply_reviewers_avg": [ 120.5, 118.22542027838176 ], "wc_reply_authors_avg": [ 1269.75, 1025.6028410159558 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.2294157338705618, "corr_recommendation_correctness": 0.6622661785325219, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9889514503886316391&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=0v4VkCSkHNm", "email": "meta.com;ox.ac.uk;;ox.ac.uk", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Meta;University of Oxford", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.ox.ac.uk", "aff_unique_abbr": "Meta;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "0vG8GbuPOH3", "title": "Semantic Prior for Weakly Supervised Class-Incremental Segmentation", "track": "main", "status": "Reject", "tldr": "Leveraging semantic similarities between old and new classes to improve weakly supervised class-incremental semantic segmentation", "abstract": "Class-incremental semantic image segmentation assumes multiple model updates, each enriching the model to segment new categories. This is typically carried out by providing pixel-level manual annotations for all new objects, limiting the adoption of such methods. Approaches which solely require image-level labels offer an attractive alternative, yet, such annotations lack crucial information about the location and boundary of new objects. In this paper we argue that, since classes represent not just indices but semantic entities, the conceptual relationships between them can provide valuable information that should be leveraged. We propose a weakly supervised approach that leverages such semantic relations in order to transfer some cues from the previously learned classes into the new ones, complementing the supervisory signal from image-level labels. We validate our approach on a number of continual learning tasks, and show how even a simple pairwise interaction between classes can significantly improve the segmentation mask quality of both old and new classes. We show these conclusions still hold for longer and, hence, more realistic sequences of tasks and for a challenging few-shot scenario.", "keywords": "class-incremental learning;weakly supervised semantic segmentation", "primary_area": "", "supplementary_material": "/attachment/fb1057fdc5443a305320ed8ef01ee3c15b47a26d.zip", "author": "Subhankar Roy;Riccardo Volpi;Gabriela Csurka;Diane Larlus", "authorids": "~Subhankar_Roy1;~Riccardo_Volpi1;~Gabriela_Csurka2;~Diane_Larlus1", "gender": "M;M;F;F", "homepage": "https://roysubhankar.github.io/;https://ricvolpi.github.io;https://europe.naverlabs.com/people_user/gabriela-csurka-khedari;https://dlarlus.github.io/", "dblp": ";194/2478;c/GabrielaCsurka;48/4033", "google_scholar": "YfzgrDYAAAAJ;YkeS_SoAAAAJ;https://scholar.google.fr/citations?user=PXm1lPAAAAAJ;https://scholar.google.fr/citations?user=nI2oJqkAAAAJ", "orcid": "0009-0008-2395-8111;;;", "linkedin": ";;gabriela-csurka-0387bb2a/;", "or_profile": "~Subhankar_Roy1;~Riccardo_Volpi1;~Gabriela_Csurka2;~Diane_Larlus1", "aff": "Fondazione Bruno Kessler;Naver Labs Europe;Naver Labs Europe;NAVER LABS Europe", "aff_domain": "fbk.eu;naverlabs.com;naverlabs.com;naverlabs.com", "position": "Researcher;Researcher;Principal Researcher;Principal Researcher", "bibtex": "@misc{\nroy2023semantic,\ntitle={Semantic Prior for Weakly Supervised Class-Incremental Segmentation},\nauthor={Subhankar Roy and Riccardo Volpi and Gabriela Csurka and Diane Larlus},\nyear={2023},\nurl={https://openreview.net/forum?id=0vG8GbuPOH3}\n}", "github": "", "project": "", "reviewers": "AMCk;PPnr;RkMT;paY4", "site": "https://openreview.net/forum?id=0vG8GbuPOH3", "pdf_size": 5815395, "recommendation": "3;3;5;6", "confidence": "4;4;3;4", "correctness": "2;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;0;3;0", "wc_summary_paper": "74;124;81;46", "wc_strength_and_weaknesses": "200;293;392;75", "wc_clarity_quality_novelty_and_reproducibility": "35;10;80;29", "wc_summary_review": "58;28;95;33", "wc_review": "367;455;648;183", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1042;1580;1726;755", "reply_reviewers": "0;0;0;0", "reply_authors": "3;4;4;2", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 81.25, 27.94078560098123 ], "wc_strength_and_weaknesses_avg": [ 240.0, 116.98076765007144 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.5, 25.675864152935535 ], "wc_summary_review_avg": [ 53.5, 26.51886121235224 ], "wc_review_avg": [ 413.25, 167.33555360412802 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1275.75, 394.0535337996603 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CwjT72i8zJUJ:scholar.google.com/&scioq=Semantic+Prior+for+Weakly+Supervised+Class-Incremental+Segmentation&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Fondazione Bruno Kessler;NAVER LABS", "aff_unique_dep": ";", "aff_unique_url": "https://www.fbk.eu;https://labs.naver.com", "aff_unique_abbr": "FBK;NLE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "Italy;Unknown;France" }, { "title": "DDM$^2$: Self-Supervised Diffusion MRI Denoising with Generative Diffusion Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12099", "id": "0vqjc50HfcC", "poster": "/media/PosterPDFs/ICLR%202023/12099.png?t=1681340042.8689122", "openreview": "https://openreview.net/forum?id=0vqjc50HfcC", "slides": "https://iclr.cc/virtual/2023/poster/12099", "video": "https://iclr.cc/virtual/2023/poster/12099", "author_site": "Tiange Xiang, Mahmut Yurt, Ali Syed, Kawin Setsompop, Akshay Chaudhari", "tldr": "", "abstract": "Magnetic resonance imaging (MRI) is a common and life-saving medical imaging technique. However, acquiring high signal-to-noise ratio MRI scans requires long scan times, resulting in increased costs and patient discomfort, and decreased throughput. Thus, there is great interest in denoising MRI scans, especially for the subtype of diffusion MRI scans that are severely SNR-limited. While most prior MRI denoising methods are supervised in nature, acquiring supervised training datasets for the multitude of anatomies, MRI scanners, and scan parameters proves impractical. Here, we propose Denoising Diffusion Models for Denoising Diffusion MRI (DDM^2), a self-supervised denoising method for MRI denoising using diffusion denoising generative models. Our three-stage framework integrates statistic-based denoising theory into diffusion models and performs denoising through conditional generation. During inference, we represent input noisy measurements as a sample from an intermediate posterior distribution within the diffusion Markov chain. We conduct experiments on 4 real-world in-vivo diffusion MRI datasets and show that our DDM^2 demonstrates superior denoising performances ascertained with clinically-relevant visual qualitative and quantitative metrics.", "keywords": "Unsupervised MRI Denoising;Diffusion Models", "primary_area": "", "supplementary_material": "/attachment/ef5ef639efb2345c727f49768ee35f5f7908e51b.zip", "author": "Tiange Xiang;Mahmut Yurt;Ali B Syed;Kawin Setsompop;Akshay Chaudhari", "authorids": "~Tiange_Xiang1;~Mahmut_Yurt1;~Ali_B_Syed1;kawins@stanford.edu;~Akshay_Chaudhari1", "gender": "M;;M;;", "homepage": "https://tiangexiang.github.io/;;https://profiles.stanford.edu/ali-syed;;", "dblp": "245/7663;215/3714;;;225/4729", "google_scholar": ";https://scholar.google.com.tr/citations?user=oAXHlRMAAAAJ;;;08Y4NhMAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Tiange_Xiang1;~Mahmut_Yurt1;~Ali_B_Syed1;kawins@stanford.edu;~Akshay_Chaudhari1", "aff": "Stanford University;Stanford University;Stanford University;;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;;stanford.edu", "position": "PhD student;PhD student;Assistant Professor;;Assistant Professor", "bibtex": "@inproceedings{\nxiang2023ddm,\ntitle={{DDM}\\${\\textasciicircum}2\\$: Self-Supervised Diffusion {MRI} Denoising with Generative Diffusion Models},\nauthor={Tiange Xiang and Mahmut Yurt and Ali B Syed and Kawin Setsompop and Akshay Chaudhari},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=0vqjc50HfcC}\n}", "github": "", "project": "", "reviewers": "RsQ4;Bjyp;Wzbi;M8jP", "pdf_size": 3845860, "recommendation": "1;6;6;8", "confidence": "5;3;4;4", "correctness": "1;3;4;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;0;3;4", "wc_summary_paper": "55;55;88;112", "wc_strength_and_weaknesses": "343;169;534;424", "wc_clarity_quality_novelty_and_reproducibility": "39;25;34;226", "wc_summary_review": "28;50;40;206", "wc_review": "465;299;696;968", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "716;603;1018;545", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 5.25, 2.5860201081971503 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 1.224744871391589 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 1.479019945774904 ], "wc_summary_paper_avg": [ 77.5, 24.046829312822098 ], "wc_strength_and_weaknesses_avg": [ 367.5, 133.15122981031757 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 81.0, 83.86596449096618 ], "wc_summary_review_avg": [ 81.0, 72.58787777583802 ], "wc_review_avg": [ 607.0, 251.6296882325295 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 720.5, 182.43697541891007 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.6835859270246631, "corr_recommendation_correctness": 0.9472044455566301, "gs_citation": 71, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1058328347805773124&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=0vqjc50HfcC", "email": "stanford.edu;stanford.edu;stanford.edu;;stanford.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "0xHVGIiYK2n", "title": "Multi-Agent Sequential Decision-Making via Communication", "track": "main", "status": "Reject", "tldr": "A novel communication scheme for multi-agent cooperation", "abstract": " Communication helps agents to obtain information about others so that better coordinated behavior can be learned. Some existing work communicates predicted future trajectory with others, hoping to get clues about what others would do for better coordination. However, circular dependencies sometimes can occur when agents are treated synchronously so it is hard to coordinate decision-making. In this paper, we propose a novel communication scheme, Sequential Communication (SeqComm). SeqComm treats agents asynchronously (the upper-level agents make decisions before the lower-level ones) and has two communication phases. In negotiation phase, agents determine the priority of decision-making by communicating hidden states of observations and comparing the value of intention, which is obtained by modeling the environment dynamics. In launching phase, the upper-level agents take the lead in making decisions and communicate their actions with the lower-level agents. Theoretically, we prove the policies learned by SeqComm are guaranteed to improve monotonically and converge. Empirically, we show that SeqComm outperforms existing methods in various multi-agent cooperative tasks.\n", "keywords": "multi-agent communication;multi-agent reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Ziluo Ding;Kefan Su;Weixin Hong;Liwen Zhu;Tiejun Huang;Zongqing Lu", "authorids": "~Ziluo_Ding1;~Kefan_Su1;~Weixin_Hong1;~Liwen_Zhu1;~Tiejun_Huang1;~Zongqing_Lu2", "gender": "M;;M;F;M;", "homepage": ";;;http://www.liwenzhu-pku.cn/;https://idm.pku.edu.cn/~tjhuang/;", "dblp": "267/2359;;;;h/TiejunHuang;", "google_scholar": ";;;;https://scholar.google.com.tw/citations?user=knvEK4AAAAAJ;", "orcid": ";;;;0000-0002-4234-6099;", "linkedin": "ziluo/;;Hong-weixin;;;", "or_profile": "~Ziluo_Ding1;~Kefan_Su1;~Weixin_Hong1;~Liwen_Zhu1;~Tiejun_Huang1;~Zongqing_Lu2", "aff": "Peking University;;Peking University, Tsinghua University;WeChat AI;Peking University;", "aff_domain": "pku.edu.cn;;pku.edu.cn;tencent.com;pku.edu.cn;", "position": "PhD student;;Undergrad student;Researcher;Full Professor;", "bibtex": "@misc{\nding2023multiagent,\ntitle={Multi-Agent Sequential Decision-Making via Communication},\nauthor={Ziluo Ding and Kefan Su and Weixin Hong and Liwen Zhu and Tiejun Huang and Zongqing Lu},\nyear={2023},\nurl={https://openreview.net/forum?id=0xHVGIiYK2n}\n}", "github": "", "project": "", "reviewers": "44Uf;emu3;7QwV;TanN", "site": "https://openreview.net/forum?id=0xHVGIiYK2n", "pdf_size": 3916651, "recommendation": "3;5;6;6", "confidence": "4;4;2;2", "correctness": "2;3;4;3", "technical_novelty": "3;3;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "46;154;127;79", "wc_strength_and_weaknesses": "328;266;85;37", "wc_clarity_quality_novelty_and_reproducibility": "89;47;21;25", "wc_summary_review": "26;45;35;29", "wc_review": "489;512;268;170", "wc_reply_reviewers": "0;1340;89;0", "wc_reply_authors": "763;1726;202;73", "reply_reviewers": "0;2;1;0", "reply_authors": "1;3;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 1.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 101.5, 41.812079594299064 ], "wc_strength_and_weaknesses_avg": [ 179.0, 121.21262310502154 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.5, 26.995369973386175 ], "wc_summary_review_avg": [ 33.75, 7.258615570478987 ], "wc_review_avg": [ 359.75, 145.1798453642929 ], "wc_reply_reviewers_avg": [ 357.25, 568.553152748272 ], "wc_reply_authors_avg": [ 691.0, 651.4318843900719 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.8164965809277259, "corr_recommendation_correctness": 0.8660254037844386, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17033068032142088345&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Peking University;WeChat", "aff_unique_dep": ";WeChat AI", "aff_unique_url": "http://www.pku.edu.cn;https://www.wechat.com", "aff_unique_abbr": "Peking U;WeChat AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "View Synthesis with Sculpted Neural Points", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11951", "id": "0ypGZvm0er0", "poster": "/media/PosterPDFs/ICLR%202023/11951.png?t=1681704422.1957314", "openreview": "https://openreview.net/forum?id=0ypGZvm0er0", "slides": "https://iclr.cc/virtual/2023/poster/11951", "video": "https://iclr.cc/virtual/2023/poster/11951", "author_site": "Yiming Zuo, Jia Deng", "tldr": "", "abstract": "We address the task of view synthesis, generating novel views of a scene given a set of images as input. In many recent works such as NeRF (Mildenhall et al., 2020), the scene geometry is parameterized using neural implicit representations (i.e., MLPs). Implicit neural representations have achieved impressive visual quality but have drawbacks in computational efficiency. In this work, we propose a new approach that performs view synthesis using point clouds. It is the first point-based method that achieves better visual quality than NeRF while being 100\u00d7 faster in rendering speed. Our approach builds on existing works on differentiable point-based rendering but introduces a novel technique we call \u201cSculpted Neural Points (SNP)\u201d, which significantly improves the robustness to errors and holes in the reconstructed point cloud. We further propose to use view-dependent point features based on spherical harmonics to capture non-Lambertian surfaces, and new designs in the point-based rendering pipeline that further boost the performance. Finally, we show that our system supports fine-grained scene editing. Code is available at https://github.com/princeton-vl/SNP.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/48cb4d26c510c2629c1b3743c42bc78bc250c882.zip", "author": "Yiming Zuo;Jia Deng", "authorids": "~Yiming_Zuo2;~Jia_Deng1", "gender": "M;M", "homepage": "https://zuoym15.github.io/;", "dblp": "146/3853-1;07/6526-1.html", "google_scholar": ";U3Eub-EAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Yiming_Zuo2;~Jia_Deng1", "aff": "Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzuo2023view,\ntitle={View Synthesis with Sculpted Neural Points},\nauthor={Yiming Zuo and Jia Deng},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=0ypGZvm0er0}\n}", "github": "", "project": "", "reviewers": "kuEi;bjQn;UGSW", "pdf_size": 9359472, "recommendation": "6;8;8", "confidence": "5;4;3", "correctness": "3;4;4", "technical_novelty": "3;3;2", "empirical_novelty": "3;3;3", "wc_summary_paper": "72;108;61", "wc_strength_and_weaknesses": "204;291;211", "wc_clarity_quality_novelty_and_reproducibility": "23;63;76", "wc_summary_review": "21;34;77", "wc_review": "320;496;425", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "700;263;414", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 80.33333333333333, 20.07209228976613 ], "wc_strength_and_weaknesses_avg": [ 235.33333333333334, 39.46587837050578 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.0, 22.55363976538303 ], "wc_summary_review_avg": [ 44.0, 23.930454794396756 ], "wc_review_avg": [ 413.6666666666667, 72.29722601102264 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 459.0, 181.219940036042 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.8660254037844385, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14588219090420948890&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=0ypGZvm0er0", "email": "princeton.edu;princeton.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "0z_cXcu1N6o", "title": "Transformer needs NMDA receptor nonlinearity for long-term memory", "track": "main", "status": "Reject", "tldr": "", "abstract": "The NMDA receptor (NMDAR) in the hippocampus is essential for learning and memory. We find an interesting resemblance between deep models' nonlinear activation function and the NMDAR's nonlinear dynamics. In light of a recent study that compared the transformer architecture to the formation of hippocampal memory, this paper presents new findings that NMDAR-like nonlinearity may be essential for consolidating short-term working memory into long-term reference memory. We design a navigation task assessing these two memory functions and show that manipulating the activation function (i.e., mimicking the Mg$^{2+}$-gating of NMDAR) disrupts long-term memory formation. Our experimental data suggest that the concept of place cells and reference memory may reside in the feed-forward network layer of transformers and that nonlinearity plays a key role in these processes. Our findings propose that the transformer architecture and hippocampal spatial representation resemble by sharing the overlapping concept of NMDAR-like nonlinearity.", "keywords": "NMDAR;hippocampus;transformer;memory", "primary_area": "", "supplementary_material": "/attachment/aa8489826a749d111e381e606a868e533636c264.zip", "author": "Dong-Kyum Kim;Jea Kwon;Meeyoung Cha;C. Justin Lee", "authorids": "~Dong-Kyum_Kim1;jeakwon@ibs.re.kr;~Meeyoung_Cha2;cjl@ibs.re.kr", "gender": ";;F;", "homepage": ";;https://www.mpi-sp.org/cha;", "dblp": ";;57/4924;", "google_scholar": ";;iFlnVCoAAAAJ;", "orcid": ";;0000-0003-4085-9648;", "linkedin": ";;meeyoungcha/;", "or_profile": "~Dong-Kyum_Kim1;jeakwon@ibs.re.kr;~Meeyoung_Cha2;cjl@ibs.re.kr", "aff": ";;Korea Advanced Institute of Science & Technology;", "aff_domain": ";;kaist.ac.kr;", "position": ";;Full Professor;", "bibtex": "@misc{\nkim2023transformer,\ntitle={Transformer needs {NMDA} receptor nonlinearity for long-term memory},\nauthor={Dong-Kyum Kim and Jea Kwon and Meeyoung Cha and C. Justin Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=0z_cXcu1N6o}\n}", "github": "", "project": "", "reviewers": "7x8r;oq3h;91tN;o25e", "site": "https://openreview.net/forum?id=0z_cXcu1N6o", "pdf_size": 8716229, "recommendation": "3;3;5;6", "confidence": "3;4;3;3", "correctness": "1;3;3;2", "technical_novelty": "2;2;3;4", "empirical_novelty": "3;1;3;3", "wc_summary_paper": "132;56;76;77", "wc_strength_and_weaknesses": "216;437;128;455", "wc_clarity_quality_novelty_and_reproducibility": "185;25;51;371", "wc_summary_review": "22;43;24;100", "wc_review": "555;561;279;1003", "wc_reply_reviewers": "0;0;0;486", "wc_reply_authors": "1462;2439;1331;2734", "reply_reviewers": "0;0;0;4", "reply_authors": "3;5;3;6", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 85.25, 28.261059781968545 ], "wc_strength_and_weaknesses_avg": [ 309.0, 140.63249980001066 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 158.0, 137.1459077041674 ], "wc_summary_review_avg": [ 47.25, 31.538666744172936 ], "wc_review_avg": [ 599.5, 259.3236394932016 ], "wc_reply_reviewers_avg": [ 121.5, 210.4441731196186 ], "wc_reply_authors_avg": [ 1991.5, 605.845070954613 ], "reply_reviewers_avg": [ 1.0, 1.7320508075688772 ], "reply_authors_avg": [ 4.25, 1.299038105676658 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5555555555555555, "corr_recommendation_correctness": 0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:eWrANS71S2cJ:scholar.google.com/&scioq=Transformer+needs+NMDA+receptor+nonlinearity+for+long-term+memory&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "id": "1-B8dz847_", "title": "Pairwise Confidence Difference on Unlabeled Data is Sufficient for Binary Classification", "track": "main", "status": "Reject", "tldr": "The difference of confidence labels on unlabeled data pairs, as a novel type of weak supervision, is sufficient to train binary classifiers with theoretical guarantees.", "abstract": "Learning with confidence labels is an emerging weakly supervised learning paradigm, where training data are equipped with confidence labels instead of exact labels. Positive-confidence (Pconf) classification is a typical learning problem in this context, where we are given only positive data equipped with confidence. However, pointwise confidence may not be accessible in real-world scenarios. In this paper, we dive into a novel weakly supervised learning problem called confidence-difference (ConfDiff) classification. Instead of pointwise confidence, we are given only unlabeled data pairs equipped with confidence difference specifying the difference in the probabilities of being positive. An unbiased risk estimator is derived to tackle the problem, and we show that the estimation error bound achieves the optimal convergence rate. Extensive experiments on benchmark data sets validate the effectiveness of our proposed approaches in leveraging the supervision information of the confidence difference.", "keywords": "Weakly supervised learning;binary classification;unbiased risk estimator", "primary_area": "", "supplementary_material": "/attachment/b25ff14c9afacd59ee08343355f6262b2ed119b1.zip", "author": "Wei Wang;Lei Feng;Gang Niu;Min-Ling Zhang;Masashi Sugiyama", "authorids": "~Wei_Wang68;~Lei_Feng1;~Gang_Niu1;~Min-Ling_Zhang2;~Masashi_Sugiyama1", "gender": "M;M;M;M;M", "homepage": "https://wwangwitsel.github.io/;https://lfeng1995.github.io/;https://niug1984.github.io;http://palm.seu.edu.cn/zhangml/;http://www.ms.k.u-tokyo.ac.jp/sugi/", "dblp": "35/7092-373.html;76/847-6;26/3367-1;84/271.html;35/1228", "google_scholar": "a38jZkwAAAAJ;https://scholar.google.com.sg/citations?user=KomQOFkAAAAJ;https://scholar.google.co.jp/citations?user=HOkcy00AAAAJ;uFHCIM0AAAAJ;https://scholar.google.co.jp/citations?user=GkYIrlIAAAAJ", "orcid": "0000-0002-8860-0494;0000-0003-2839-5799;;0000-0003-1880-5918;0000-0001-6658-6743", "linkedin": ";;;;", "or_profile": "~Wei_Wang68;~Lei_Feng1;~Gang_Niu1;~Min-Ling_Zhang2;~Masashi_Sugiyama1", "aff": "The University of Tokyo;Nanyang Technological University;RIKEN;Southeast University;The University of Tokyo", "aff_domain": "u-tokyo.ac.jp;ntu.edu.sg;riken.jp;seu.edu.cn;u-tokyo.ac.jp", "position": "PhD student;Visiting Professor;Research Scientist (tenured);Full Professor;Full Professor", "bibtex": "@misc{\nwang2023pairwise,\ntitle={Pairwise Confidence Difference on Unlabeled Data is Sufficient for Binary Classification},\nauthor={Wei Wang and Lei Feng and Gang Niu and Min-Ling Zhang and Masashi Sugiyama},\nyear={2023},\nurl={https://openreview.net/forum?id=1-B8dz847_}\n}", "github": "", "project": "", "reviewers": "bMCm;dSMd;sacA", "site": "https://openreview.net/forum?id=1-B8dz847_", "pdf_size": 2834738, "recommendation": "3;6;6", "confidence": "3;4;3", "correctness": "2;4;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "88;77;87", "wc_strength_and_weaknesses": "94;172;320", "wc_clarity_quality_novelty_and_reproducibility": "50;20;7", "wc_summary_review": "206;21;54", "wc_review": "438;290;468", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "676;561;689", "reply_reviewers": "0;0;0", "reply_authors": "1;1;2", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 84.0, 4.96655480858378 ], "wc_strength_and_weaknesses_avg": [ 195.33333333333334, 93.7277380975818 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.666666666666668, 18.00617178142601 ], "wc_summary_review_avg": [ 93.66666666666667, 80.56605295587522 ], "wc_review_avg": [ 398.6666666666667, 77.80888266915431 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 642.0, 57.52101065407897 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gZr2jmC1n3cJ:scholar.google.com/&scioq=Pairwise+Confidence+Difference+on+Unlabeled+Data+is+Sufficient+for+Binary+Classification&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of Tokyo;Nanyang Technological University;RIKEN;Southeast University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.ntu.edu.sg;https://www.riken.jp;https://www.seu.edu.cn/", "aff_unique_abbr": "UTokyo;NTU;RIKEN;SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;0", "aff_country_unique": "Japan;Singapore;China" }, { "title": "Discrete Contrastive Diffusion for Cross-Modal Music and Image Generation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11915", "id": "1-MBdJssZ-S", "poster": "/media/PosterPDFs/ICLR%202023/11915.png?t=1680730512.4274323", "openreview": "https://openreview.net/forum?id=1-MBdJssZ-S", "slides": "https://iclr.cc/virtual/2023/poster/11915", "video": "https://iclr.cc/virtual/2023/poster/11915", "author_site": "Ye Zhu, Yu Wu, Kyle Olszewski, Jian Ren, Sergey Tulyakov, Yan Yan", "tldr": "We present a conditional contrastive diffusion approach for better input-output correspondence via maximized mutual information, applicable for music and image generations.", "abstract": "Diffusion probabilistic models (DPMs) have become a popular approach to conditional generation, due to their promising results and support for cross-modal synthesis. A key desideratum in conditional synthesis is to achieve high correspondence between the conditioning input and generated output. Most existing methods learn such relationships implicitly, by incorporating the prior into the variational lower bound. In this work, we take a different route---we explicitly enhance input-output connections by maximizing their mutual information. To this end, we introduce a Conditional Discrete Contrastive Diffusion (CDCD) loss and design two contrastive diffusion mechanisms to effectively incorporate it into the denoising process, combining the diffusion training and contrastive learning for the first time by connecting it with the conventional variational objectives. We demonstrate the efficacy of our approach in evaluations with diverse multimodal conditional synthesis tasks: dance-to-music generation, text-to-image synthesis, as well as class-conditioned image synthesis. On each, we enhance the input-output correspondence and achieve higher or competitive general synthesis quality. Furthermore, the proposed approach improves the convergence of diffusion models, reducing the number of required diffusion steps by more than 35% on two benchmarks, significantly increasing the inference speed.", "keywords": "Contrastive Diffusion;Conditioned Generations;Music Generation;Image Synthesis", "primary_area": "", "supplementary_material": "/attachment/0f1db91b179efdced0835913e2d06d7131e49369.zip", "author": "Ye Zhu;Yu Wu;Kyle Olszewski;Jian Ren;Sergey Tulyakov;Yan Yan", "authorids": "~Ye_Zhu3;~Yu_Wu3;~Kyle_Olszewski1;~Jian_Ren2;~Sergey_Tulyakov1;~Yan_Yan6", "gender": "F;M;M;M;M;M", "homepage": "https://l-yezhu.github.io/;https://yu-wu.net;https://kyleolsz.github.io/;https://alanspike.github.io/;http://www.stulyakov.com/;", "dblp": ";22/0-11;165/9717;59/2180-5;40/6115;13/3953-2", "google_scholar": "uk5WuyIAAAAJ;23SZHUwAAAAJ;FWDVqjgAAAAJ;https://scholar.google.co.jp/citations?user=vDALiU4AAAAJ;mgzXR0sAAAAJ;", "orcid": ";;0000-0001-8775-6879;;;", "linkedin": ";;kyle-olszewski-2623ab1b;;sergeytulyakov/;", "or_profile": "~Ye_Zhu3;~Yu_Wu3;~Kyle_Olszewski1;~Jian_Ren2;~Sergey_Tulyakov1;~Yan_Yan6", "aff": "Illinois Institute of Technology;Wuhan University;Snap Inc.;Snap Inc.;;", "aff_domain": "iit.edu;whu.edu.cn;snap.com;snapchat.com;;", "position": "PhD student;Full Professor;Researcher;Research Scientist;;", "bibtex": "@inproceedings{\nzhu2023discrete,\ntitle={Discrete Contrastive Diffusion for Cross-Modal Music and Image Generation},\nauthor={Ye Zhu and Yu Wu and Kyle Olszewski and Jian Ren and Sergey Tulyakov and Yan Yan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=1-MBdJssZ-S}\n}", "github": "", "project": "", "reviewers": "6GAW;MzmQ;uFmy;3LEn", "pdf_size": 2807502, "recommendation": "6;6;6;8", "confidence": "3;4;2;4", "correctness": "3;3;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "53;73;146;87", "wc_strength_and_weaknesses": "85;184;489;156", "wc_clarity_quality_novelty_and_reproducibility": "60;31;233;1", "wc_summary_review": "48;91;77;19", "wc_review": "246;379;945;263", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 89.75, 34.65093793824346 ], "wc_strength_and_weaknesses_avg": [ 228.5, 154.66819323959274 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 81.25, 90.06213133165349 ], "wc_summary_review_avg": [ 58.75, 27.698149757700424 ], "wc_review_avg": [ 458.25, 285.64783825542946 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5222329678670935, "corr_recommendation_correctness": 0.0, "gs_citation": 90, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2152301810034158196&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=1-MBdJssZ-S", "email": "iit.edu;whu.edu.cn;snap.com;snapchat.com;;", "author_num": 6, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Illinois Institute of Technology;Wuhan University;Snap Inc.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.iit.edu;http://www.whu.edu.cn/;https://www.snapinc.com", "aff_unique_abbr": "IIT;WHU;Snap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "id": "10E_ZGfTBt", "title": "Improving Adversarial Robustness via Frequency Regularization", "track": "main", "status": "Reject", "tldr": "We show that AT-CNNs extract robust features from the low-frequency region to gain robustness and explain why the white-box attack is hard to defend from a spectral perspective, then propose a frequency regularization to improve the robustness.", "abstract": "Deep neural networks (DNNs) are incredibly vulnerable to crafted, human-imperceptible adversarial perturbations. While adversarial training (AT) has proven to be an effective defense approach, the properties of AT for robustness improvement remain an open issue. In this paper, we investigate AT from a spectral perspective, providing new insights into the design of effective defenses. Our analyses show that AT induces the deep model to focus more on the low-frequency region, which retains the shape-biased representations, to gain robustness. Further, we find that the spectrum of a white-box attack is primarily distributed in regions the model focuses on, and the perturbation attacks the spectral bands where the model is vulnerable. To train a model tolerant to frequency-varying perturbation, we propose a frequency regularization (FR) such that the spectral output inferred by an attacked input stays as close as possible to its natural input counterpart. Experiments demonstrate that FR and its weight averaging (WA) extension could significantly improve the robust accuracy by 1.14% ~ 4.57%, across multiple datasets (SVHN, CIFAR-10, CIFAR-100, and Tiny ImageNet), and various attacks (PGD, C&W, and Autoattack), without any extra data.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/567ee35ae259248056f9081683f82df5eede7ccb.zip", "author": "Binxiao Huang;Chaofan Tao;Rui Lin;Ngai Wong", "authorids": "~Binxiao_Huang1;~Chaofan_Tao1;~Rui_Lin3;~Ngai_Wong1", "gender": "M;M;F;M", "homepage": "https://harr7y.github.io/;;https://rlin27.github.io/;https://www.eee.hku.hk/~nwong/", "dblp": "317/0063;239/5831;https://dblp.org/rec/journals/corr/abs-2203-13556;88/3656", "google_scholar": "kJ_qMjoAAAAJ;gjmfLroAAAAJ;gx0RITkAAAAJ;PM_uMYIAAAAJ", "orcid": "0000-0001-5316-703X;;;0000-0002-3026-0108", "linkedin": ";;;", "or_profile": "~Binxiao_Huang1;~Chaofan_Tao1;~Rui_Lin3;~Ngai_Wong1", "aff": "University of Hong Kong;The University of Hong Kong;Huawei Technologies Ltd.;The University of Hong Kong", "aff_domain": "hku.hk;hku.hk;huawei.com;hku.hk", "position": "PhD student;PhD Student;Researcher;Associate Professor", "bibtex": "@misc{\nhuang2023improving,\ntitle={Improving Adversarial Robustness via Frequency Regularization},\nauthor={Binxiao Huang and Chaofan Tao and Rui Lin and Ngai Wong},\nyear={2023},\nurl={https://openreview.net/forum?id=10E_ZGfTBt}\n}", "github": "", "project": "", "reviewers": "j5uT;5QQs;DY1G;q8SJ", "site": "https://openreview.net/forum?id=10E_ZGfTBt", "pdf_size": 6739979, "recommendation": "3;5;5;5", "confidence": "4;4;3;4", "correctness": "2;2;3;2", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "41;71;28;61", "wc_strength_and_weaknesses": "372;169;188;44", "wc_clarity_quality_novelty_and_reproducibility": "12;59;37;16", "wc_summary_review": "49;193;30;5", "wc_review": "474;492;283;126", "wc_reply_reviewers": "0;0;121;0", "wc_reply_authors": "1118;635;1735;618", "reply_reviewers": "0;0;1;0", "reply_authors": "2;1;3;1", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 50.25, 16.78354849249705 ], "wc_strength_and_weaknesses_avg": [ 193.25, 117.09264494407836 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.0, 18.748333259252675 ], "wc_summary_review_avg": [ 69.25, 73.13130314714759 ], "wc_review_avg": [ 343.75, 150.04061950018735 ], "wc_reply_reviewers_avg": [ 30.25, 52.39453692895854 ], "wc_reply_authors_avg": [ 1026.5, 455.65584600661055 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:K9Gi80HXGR4J:scholar.google.com/&scioq=Improving+Adversarial+Robustness+via+Frequency+Regularization&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Hong Kong;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.hku.hk;https://www.huawei.com", "aff_unique_abbr": "HKU;Huawei", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Patch-Level Contrasting without Patch Correspondence for Accurate and Dense Contrastive Representation Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12240", "id": "10R_bcjFwJ", "poster": "/media/PosterPDFs/ICLR%202023/12240.png?t=1680958681.9248545", "openreview": "https://openreview.net/forum?id=10R_bcjFwJ", "slides": "https://iclr.cc/virtual/2023/poster/12240", "video": "https://iclr.cc/virtual/2023/poster/12240", "author_site": "Shaofeng Zhang, Feng Zhu, Rui Zhao, Junchi Yan", "tldr": "We propose a new self-supervised leanring method to learn both spatial-sensitive and global-discriminative information", "abstract": "We propose ADCLR: \\underline{A}ccurate and \\underline{D}ense \\underline{C}ontrastive \\underline{R}epresentation \\underline{L}earning, a novel self-supervised learning framework for learning accurate and dense vision representation. To extract spatial-sensitive information, ADCLR introduces query patches for contrasting in addition with global contrasting. Compared with previous dense contrasting methods, ADCLR mainly enjoys three merits: i) achieving both global-discriminative and spatial-sensitive representation, ii) model-efficient (no extra parameters in addition to the global contrasting baseline), and iii) correspondence-free and thus simpler to implement. Our approach achieves new state-of-the-art performance for contrastive methods. On classification tasks, for ViT-S, ADCLR achieves 78.1\\% top-1 accuracy on ImageNet with linear probing, outperforming our baseline (DINO) without our devised techniques as plug-in, by 1.1\\%. For ViT-B, ADCLR achieves 79.8\\%, 84.0\\% accuracy on ImageNet by linear probing and finetune, outperforming DINO by 0.6\\%, 0.4\\% accuracy. For dense tasks, on MS-COCO, ADCLR achieves significant improvements of 44.3\\% AP on object detection, 39.7\\% AP on instance segmentation, outperforming previous SOTA method SelfPatch by 2.2\\% and 1.2\\%, respectively. On ADE20K, ADCLR outperforms SelfPatch by 1.0\\% mIoU, 1.2\\% mAcc on the segmentation task.", "keywords": "self supervised learning;contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Shaofeng Zhang;Feng Zhu;Rui Zhao;Junchi Yan", "authorids": "~Shaofeng_Zhang1;~Feng_Zhu1;~Rui_Zhao6;~Junchi_Yan2", "gender": "M;M;M;M", "homepage": "https://sherrylone.github.io;http://home.ustc.edu.cn/~zhufengx/;http://zhaorui.xyz/;http://thinklab.sjtu.edu.cn/", "dblp": "132/2540;71/2791-6;26/2578-1;60/7949.html", "google_scholar": "VoVVJIgAAAAJ;oO53gjEAAAAJ;1c9oQNMAAAAJ;ga230VoAAAAJ", "orcid": ";;;0000-0001-9639-7679", "linkedin": ";;;", "or_profile": "~Shaofeng_Zhang1;~Feng_Zhu1;~Rui_Zhao6;~Junchi_Yan1", "aff": "Shanghai Jiaotong University;SenseTime Group LTD;SenseTime Research;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sensetime.com;sensetime.com;sjtu.edu.cn", "position": "PhD student;Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nzhang2023patchlevel,\ntitle={Patch-Level Contrasting without Patch Correspondence for Accurate and Dense Contrastive Representation Learning},\nauthor={Shaofeng Zhang and Feng Zhu and Rui Zhao and Junchi Yan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=10R_bcjFwJ}\n}", "github": "", "project": "", "reviewers": "7W9J;D4eS;wYqQ;kB6d", "pdf_size": 479427, "recommendation": "6;6;6;8", "confidence": "2;3;3;3", "correctness": "3;3;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "27;65;46;68", "wc_strength_and_weaknesses": "127;103;123;76", "wc_clarity_quality_novelty_and_reproducibility": "5;30;10;24", "wc_summary_review": "17;50;44;18", "wc_review": "176;248;223;186", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "63;161;267;35", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 51.5, 16.469669092000604 ], "wc_strength_and_weaknesses_avg": [ 107.25, 20.20365066021485 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 17.25, 10.133484099755622 ], "wc_summary_review_avg": [ 32.25, 14.905955185763842 ], "wc_review_avg": [ 208.25, 28.86498744153546 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 131.5, 91.15234500549067 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 1.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13479789491265261609&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=10R_bcjFwJ", "email": "sjtu.edu.cn;sensetime.com;sensetime.com;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Shanghai Jiao Tong University;SenseTime Group;SenseTime", "aff_unique_dep": ";;SenseTime Research", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.sensetime.com;https://www.sensetime.com", "aff_unique_abbr": "SJTU;SenseTime;SenseTime", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "10tgIzcC2vY", "title": "Upcycled-FL: Improving Accuracy and Privacy with Less Computation in Federated Learning", "track": "main", "status": "Withdraw", "tldr": "We propose a federated learning framework that improves accuracy-privacy tradeoff with less computation.", "abstract": "Federated learning (FL) is a distributed learning paradigm that allows multiple decentralized edge devices to collaboratively learn toward a common objective without sharing local data. Although local data is not exposed directly, privacy concerns nonetheless exist as sensitive information can be inferred from intermediate computations. As the same data is repeatedly used over an iterative process, information leakage accumulates substantially over time, making it difficult to balance the trade-off between privacy and accuracy. In this paper we introduce Upcycled-FL, a novel federated learning framework, where first-order approximation is applied at every even iteration. Under such a scheme, half of the steps incur no privacy loss and require much less computation. Theoretically, we establish the convergence rate performance of Upcycled-FL and provide privacy analysis based on objective and output perturbations. Experiments on real-world data show that Upcycled-FL consistently outperforms existing methods over heterogeneous data, and significantly improves privacy-accuracy trade-off, while reducing 48% of the training time on average.", "keywords": "Federated Learning;Differential Privacy", "primary_area": "", "supplementary_material": "/attachment/bbbe306893ad003a348b87736100d5f48420779c.zip", "author": "Tongxin Yin;Xueru Zhang;Mohammad Mahdi Khalili;Mingyan Liu", "authorids": "~Tongxin_Yin1;~Xueru_Zhang2;~Mohammad_Mahdi_Khalili3;~Mingyan_Liu1", "gender": "F;F;M;F", "homepage": "https://www.linkedin.com/in/tongxinyin/;https://xueruzhang.github.io/;https://Khalilimahdi.github.io;https://liu.engin.umich.edu", "dblp": "305/3911;;159/2163.html;97/5725", "google_scholar": "_02Q5nEAAAAJ;PNBO_a4AAAAJ;hSgnKecAAAAJ;WiIM-MgAAAAJ", "orcid": "0000-0002-6166-3890;;0000-0002-4223-3254;0000-0003-3295-9200", "linkedin": "tongxinyin/;;mohammad-mahdi-khalili-aa4241127;", "or_profile": "~Tongxin_Yin1;~Xueru_Zhang2;~Mohammad_Mahdi_Khalili3;~Mingyan_Liu1", "aff": "University of Michigan - Ann Arbor;Ohio State University;Yahoo! Research;University of Michigan - Ann Arbor", "aff_domain": "umich.edu;osu.edu;yahooinc.com;umich.edu", "position": "PhD student;Assistant Professor;Research Scientist;Full Professor", "bibtex": "@misc{\nyin2023upcycledfl,\ntitle={Upcycled-{FL}: Improving Accuracy and Privacy with Less Computation in Federated Learning},\nauthor={Tongxin Yin and Xueru Zhang and Mohammad Mahdi Khalili and Mingyan Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=10tgIzcC2vY}\n}", "github": "", "project": "", "reviewers": "FLJt;xGd4;N8S2", "site": "https://openreview.net/forum?id=10tgIzcC2vY", "pdf_size": 2002845, "recommendation": "3;5;5", "confidence": "4;4;4", "correctness": "2;3;3", "technical_novelty": "2;3;2", "empirical_novelty": "1;2;2", "wc_summary_paper": "150;83;110", "wc_strength_and_weaknesses": "373;319;344", "wc_clarity_quality_novelty_and_reproducibility": "54;61;35", "wc_summary_review": "20;20;41", "wc_review": "597;483;530", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 114.33333333333333, 27.52372713779069 ], "wc_strength_and_weaknesses_avg": [ 345.3333333333333, 22.065558884580486 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.0, 10.98483803552272 ], "wc_summary_review_avg": [ 27.0, 9.899494936611665 ], "wc_review_avg": [ 536.6666666666666, 46.77843757782235 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9jeTa2RIgsEJ:scholar.google.com/&scioq=Upcycled-FL:+Improving+Accuracy+and+Privacy+with+Less+Computation+in+Federated+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Michigan;Ohio State University;Yahoo!", "aff_unique_dep": ";;Yahoo! Research", "aff_unique_url": "https://www.umich.edu;https://www.osu.edu;https://research.yahoo.com", "aff_unique_abbr": "UM;OSU;Yahoo!", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Reward Design with Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10964", "id": "10uNUgI5Kl", "poster": "/media/PosterPDFs/ICLR%202023/10964.png?t=1682480831.5414803", "openreview": "https://openreview.net/forum?id=10uNUgI5Kl", "slides": "https://iclr.cc/virtual/2023/poster/10964", "video": "https://iclr.cc/virtual/2023/poster/10964", "author_site": "Minae Kwon, Sang Michael Xie, Kalesha Bullard, Dorsa Sadigh", "tldr": "We make reward design easier by using large language models models (like GPT-3) as a proxy for a user's reward function given that a user provides a few examples (or a description) of the desired behavior.", "abstract": "Reward design in reinforcement learning (RL) is challenging since specifying human notions of desired behavior may be difficult via reward functions or require many expert demonstrations. Can we instead cheaply design rewards using a natural language interface? This paper explores how to simplify reward design by using a large language model (LLM) such as GPT-3 as a proxy reward function, where the user provides a textual prompt containing a few examples (few-shot) or a description (zero-shot) of desired behavior. Our approach leverages this proxy reward function in an RL framework. Specifically, users specify a prompt once at the beginning of training. During training, the LLM evaluates an RL agent's behavior against the desired behavior described by the prompt and outputs a corresponding reward signal. The RL agent then uses this reward to update its behavior. We evaluate whether our approach can train agents aligned with user objectives in the Ultimatum Game, matrix games, and the DealOrNoDeal negotiation task. In all three tasks, we show that RL agents trained with our framework are well-aligned with the user's objectives and outperforms RL agents trained with reward functions learned via supervised learning. ", "keywords": "reward design;foundation models;gpt3;reward specification;reinforcement learning;human-ai interaction", "primary_area": "", "supplementary_material": "/attachment/473e7d8a231ee29e2d80742375520aa6ebaea36b.zip", "author": "Minae Kwon;Sang Michael Xie;Kalesha Bullard;Dorsa Sadigh", "authorids": "~Minae_Kwon1;~Sang_Michael_Xie1;~Kalesha_Bullard1;~Dorsa_Sadigh1", "gender": "F;;F;F", "homepage": ";https://cs.stanford.edu/~eix/;http://www.kaleshabullard.com;https://dorsa.fyi/", "dblp": ";220/3987;153/7408;117/3174", "google_scholar": ";EBNa5IEAAAAJ;QehMdGIAAAAJ;ZaJEZpYAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Minae_Kwon1;~Sang_Michael_Xie1;~Kalesha_Bullard1;~Dorsa_Sadigh1", "aff": "Stanford University;Stanford University;Google DeepMind;Stanford University", "aff_domain": "stanford.edu;stanford.edu;deepmind.com;stanford.edu", "position": "PhD student;PhD student;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nkwon2023reward,\ntitle={Reward Design with Language Models},\nauthor={Minae Kwon and Sang Michael Xie and Kalesha Bullard and Dorsa Sadigh},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=10uNUgI5Kl}\n}", "github": "", "project": "", "reviewers": "UKq1;1rSQ;49Yr;7igZ", "pdf_size": 1839270, "recommendation": "5;5;8;8", "confidence": "3;3;5;3", "correctness": "3;3;3;1", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;3;4", "wc_summary_paper": "50;86;40;141", "wc_strength_and_weaknesses": "210;218;383;560", "wc_clarity_quality_novelty_and_reproducibility": "31;113;32;347", "wc_summary_review": "221;52;45;228", "wc_review": "512;469;500;1276", "wc_reply_reviewers": "0;0;52;0", "wc_reply_authors": "1367;1012;429;1094", "reply_reviewers": "0;0;1;0", "reply_authors": "3;2;2;3", "recommendation_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 79.25, 39.543488718118944 ], "wc_strength_and_weaknesses_avg": [ 342.75, 143.18061146677647 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 130.75, 129.20985837001757 ], "wc_summary_review_avg": [ 136.5, 88.06957476904269 ], "wc_review_avg": [ 689.25, 339.12341042753155 ], "wc_reply_reviewers_avg": [ 13.0, 22.516660498395403 ], "wc_reply_authors_avg": [ 975.5, 341.8000146284374 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896258, "corr_recommendation_correctness": -0.5773502691896258, "gs_citation": 277, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14992709418129263154&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=10uNUgI5Kl", "email": "stanford.edu;stanford.edu;deepmind.com;stanford.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.stanford.edu;https://deepmind.com", "aff_unique_abbr": "Stanford;DeepMind", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "13rQhx37o3u", "title": "DeepTime: Deep Time-index Meta-learning for Non-stationary Time-series Forecasting", "track": "main", "status": "Reject", "tldr": "We propose a deep time-index model which leverages a meta-learning formulation to tackle non-stationary time-series forecasting.", "abstract": "Advances in I.T. infrastructure has led to the collection of longer sequences of time-series. Such sequences are typically non-stationary, exhibiting distribution shifts over time -- a challenging scenario for the forecasting task, due to the problems of covariate shift, and conditional distribution shift. In this paper, we show that deep time-index models possess strong synergies with a meta-learning formulation of forecasting, displaying significant advantages over existing neural forecasting methods in tackling the problems arising from non-stationarity. These advantages include having a stronger smoothness prior, avoiding the problem of covariate shift, and having better sample efficiency. To this end, we propose DeepTime, a deep time-index model trained via meta-learning. Extensive experiments on real-world datasets in the long sequence time-series forecasting setting demonstrate that our approach achieves competitive results with state-of-the-art methods, and is highly efficient. Code is attached as supplementary material, and will be publicly released.", "keywords": "time-series;forecasting;deep learning;implicit neural representation;meta-learning;time-index;non-stationary", "primary_area": "", "supplementary_material": "/attachment/95afd6041b005131b92074dd27ad3dc76eda9888.zip", "author": "Gerald Woo;Chenghao Liu;Doyen Sahoo;Akshat Kumar;Steven Hoi", "authorids": "~Gerald_Woo1;~Chenghao_Liu1;~Doyen_Sahoo1;~Akshat_Kumar2;~Steven_Hoi2", "gender": "M;M;M;M;M", "homepage": ";;https://www.linkedin.com/in/doyensahoo/?originalSubdomain=sg;http://www.smu.edu.sg/faculty/profile/102291/Akshat-KUMAR;http://stevenhoi.com", "dblp": "246/5297;;151/3155;73/193;", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com.sg/citations?hl=en;https://scholar.google.com.tw/citations?user=zsYC3R0AAAAJ;JoLjflYAAAAJ", "orcid": ";;;;", "linkedin": "gerald-woo/;chenghao-liu-40a62a56/;doyensahoo/?originalSubdomain=sg;;", "or_profile": "~Gerald_Woo1;~Chenghao_Liu1;~Doyen_Sahoo1;~Akshat_Kumar2;~Steven_Hoi2", "aff": "Singapore Management University;Salesforce AI Research;SalesForce.com;Singapore Management University;Singapore Management University", "aff_domain": "smu.edu.sg;salesforce.com;salesforce.com;smu.edu.sg;smu.edu.sg", "position": "PhD student;Researcher;Researcher;Associate Professor;Associate Professor", "bibtex": "@misc{\nwoo2023deeptime,\ntitle={DeepTime: Deep Time-index Meta-learning for Non-stationary Time-series Forecasting},\nauthor={Gerald Woo and Chenghao Liu and Doyen Sahoo and Akshat Kumar and Steven Hoi},\nyear={2023},\nurl={https://openreview.net/forum?id=13rQhx37o3u}\n}", "github": "", "project": "", "reviewers": "comQ;NCFZ;i4vc;3pqc", "site": "https://openreview.net/forum?id=13rQhx37o3u", "pdf_size": 802719, "recommendation": "3;3;5;6", "confidence": "4;2;4;4", "correctness": "2;3;3;3", "technical_novelty": "3;1;2;3", "empirical_novelty": "2;1;2;3", "wc_summary_paper": "37;14;76;39", "wc_strength_and_weaknesses": "166;258;892;315", "wc_clarity_quality_novelty_and_reproducibility": "71;58;65;57", "wc_summary_review": "26;53;160;26", "wc_review": "300;383;1193;437", "wc_reply_reviewers": "0;0;1085;18", "wc_reply_authors": "1594;2052;6060;995", "reply_reviewers": "0;0;3;1", "reply_authors": "5;6;13;3", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 41.5, 22.20923231451281 ], "wc_strength_and_weaknesses_avg": [ 407.75, 284.59126392073244 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.75, 5.673402858955108 ], "wc_summary_review_avg": [ 66.25, 55.23755515951082 ], "wc_review_avg": [ 578.25, 358.2648287231109 ], "wc_reply_reviewers_avg": [ 275.75, 467.2784903031596 ], "wc_reply_authors_avg": [ 2675.25, 1989.8061934520156 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 6.75, 3.766629793329841 ], "replies_avg": [ 38, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5555555555555555, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13883895672513596547&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Singapore Management University;Salesforce", "aff_unique_dep": ";Salesforce AI Research", "aff_unique_url": "https://www.smu.edu.sg;https://www.salesforce.com", "aff_unique_abbr": "SMU;Salesforce AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "Singapore;United States" }, { "title": "Efficient Deep Reinforcement Learning Requires Regulating Overfitting", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10790", "id": "14-kr46GvP-", "poster": "/media/PosterPDFs/ICLR%202023/10790.png?t=1682038444.233435", "openreview": "https://openreview.net/forum?id=14-kr46GvP-", "slides": "https://iclr.cc/virtual/2023/poster/10790", "video": "https://iclr.cc/virtual/2023/poster/10790", "author_site": "Qiyang Li, Aviral Kumar, Ilya Kostrikov, Sergey Levine", "tldr": "", "abstract": "Deep reinforcement learning algorithms that learn policies by trial-and-error must learn from limited amounts of data collected by actively interacting with the environment. While many prior works have shown that proper regularization techniques are crucial for enabling data-efficient RL, a general understanding of the bottlenecks in data-efficient RL has remained unclear. Consequently, it has been difficult to devise a universal technique that works well across all domains. In this paper, we attempt to understand the primary bottleneck in sample-efficient deep RL by examining several potential hypotheses such as non-stationarity, excessive action distribution shift, and overfitting. We perform thorough empirical analysis on state-based DeepMind control suite (DMC) tasks in a controlled and systematic way to show that high temporal-difference (TD) error on the validation set of transitions is the main culprit that severely affects the performance of deep RL algorithms, and prior methods that lead to good performance do in fact, control the validation TD error to be low. This observation gives us a robust principle for making deep RL efficient: we can hill-climb on the validation TD error by utilizing any form of regularization techniques from supervised learning. We show that a simple online model selection method that targets the validation TD error is effective across state-based DMC and Gym tasks.", "keywords": "Reinforcement Learning;Sample Efficient RL;Statistical Overfitting", "primary_area": "", "supplementary_material": "", "author": "Qiyang Li;Aviral Kumar;Ilya Kostrikov;Sergey Levine", "authorids": "~Qiyang_Li1;~Aviral_Kumar2;~Ilya_Kostrikov1;~Sergey_Levine1", "gender": "M;M;M;M", "homepage": "https://colinqiyangli.github.io/;https://aviralkumar2907.github.io/;;https://people.eecs.berkeley.edu/~svlevine/", "dblp": ";202/7961;https://dblp.org/pers/k/Kostrikov:Ilya.html;80/7594", "google_scholar": "qlwwdfEAAAAJ;;PTS2AOgAAAAJ;8R35rCwAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Qiyang_Li1;~Aviral_Kumar2;~Ilya_Kostrikov1;~Sergey_Levine1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Google", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;google.com", "position": "PhD student;PhD student;Postdoc;Research Scientist", "bibtex": "@inproceedings{\nli2023efficient,\ntitle={Efficient Deep Reinforcement Learning Requires Regulating Overfitting},\nauthor={Qiyang Li and Aviral Kumar and Ilya Kostrikov and Sergey Levine},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=14-kr46GvP-}\n}", "github": "", "project": "", "reviewers": "yYZT;zRMn;rmvQ", "pdf_size": 2383431, "recommendation": "6;8;8", "confidence": "5;3;4", "correctness": "3;2;3", "technical_novelty": "3;3;3", "empirical_novelty": "4;3;4", "wc_summary_paper": "230;54;62", "wc_strength_and_weaknesses": "155;753;59", "wc_clarity_quality_novelty_and_reproducibility": "32;7;184", "wc_summary_review": "92;80;31", "wc_review": "509;894;336", "wc_reply_reviewers": "127;593;0", "wc_reply_authors": "631;516;324", "reply_reviewers": "1;2;0", "reply_authors": "1;2;1", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 115.33333333333333, 81.147328291749 ], "wc_strength_and_weaknesses_avg": [ 322.3333333333333, 307.03890452007687 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 74.33333333333333, 78.21480394116932 ], "wc_summary_review_avg": [ 67.66666666666667, 26.386023236217735 ], "wc_review_avg": [ 579.6666666666666, 233.21854319262198 ], "wc_reply_reviewers_avg": [ 240.0, 254.9365934240643 ], "wc_reply_authors_avg": [ 490.3333333333333, 126.6394707646694 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8660254037844385, "corr_recommendation_correctness": -0.49999999999999983, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10056245592117662485&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=14-kr46GvP-", "email": "berkeley.edu;berkeley.edu;berkeley.edu;google.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of California, Berkeley;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.berkeley.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;Google", "aff_campus_unique_index": "0;0;0;1", "aff_campus_unique": "Berkeley;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "15fiz99C8B", "title": "Look Back When Surprised: Stabilizing Reverse Experience Replay for Neural Approximation", "track": "main", "status": "Reject", "tldr": "We propose a new experience replay which outperforms previous SOTA on most environments ", "abstract": "Experience replay-based sampling techniques are essential to several reinforcement learning (RL) algorithms since they aid in convergence by breaking spurious correlations. The most popular techniques, such as uniform experience replay(UER) and prioritized experience replay (PER), seem to suffer from sub-optimal convergence and significant bias error, respectively. To alleviate this, we introduce a new experience replay method for reinforcement learning, called IntrospectiveExperience Replay (IER). IER picks batches corresponding to data points consecutively before the \u2018surprising\u2019 points. Our proposed approach is based on the theoretically rigorous reverse experience replay (RER), which can be shown to remove bias in the linear approximation setting but can be sub-optimal with neural approximation. We show empirically that IER is stable with neural function approximation and has a superior performance compared to the state-of-the-art techniques like uniform experience replay (UER), prioritized experience replay(PER), and hindsight experience replay (HER) on the majority of tasks.", "keywords": "Experience Replay;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/94cb727ce816b0588e4b87dfea4039a9768f3fb3.zip", "author": "Ramnath Kumar;Dheeraj Mysore Nagaraj", "authorids": "~Ramnath_Kumar1;~Dheeraj_Mysore_Nagaraj1", "gender": "M;M", "homepage": "https://ramnathkumar181.github.io/;https://dheerajmn.mit.edu", "dblp": ";215/5097", "google_scholar": "csZjvdEAAAAJ;0g80b7sAAAAJ", "orcid": ";", "linkedin": ";dheeraj-m-nagaraj-01739792/", "or_profile": "~Ramnath_Kumar1;~Dheeraj_Mysore_Nagaraj1", "aff": "Google;Google", "aff_domain": "google.com;google.com", "position": "Pre-Doctoral Researcher;Research Scientist", "bibtex": "@misc{\nkumar2023look,\ntitle={Look Back When Surprised: Stabilizing Reverse Experience Replay for Neural Approximation},\nauthor={Ramnath Kumar and Dheeraj Mysore Nagaraj},\nyear={2023},\nurl={https://openreview.net/forum?id=15fiz99C8B}\n}", "github": "", "project": "", "reviewers": "FAX4;HzYs;nF51;j7Bo", "site": "https://openreview.net/forum?id=15fiz99C8B", "pdf_size": 8978753, "recommendation": "1;6;6;8", "confidence": "5;3;4;3", "correctness": "1;3;3;3", "technical_novelty": "1;2;3;3", "empirical_novelty": "1;2;0;3", "wc_summary_paper": "51;129;107;157", "wc_strength_and_weaknesses": "1061;702;259;245", "wc_clarity_quality_novelty_and_reproducibility": "67;10;13;29", "wc_summary_review": "60;46;52;35", "wc_review": "1239;887;431;466", "wc_reply_reviewers": "0;351;18;0", "wc_reply_authors": "2462;4346;1166;570", "reply_reviewers": "0;1;1;0", "reply_authors": "4;8;3;1", "recommendation_avg": [ 5.25, 2.5860201081971503 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 1.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 111.0, 38.91015291668744 ], "wc_strength_and_weaknesses_avg": [ 566.75, 339.41447744608655 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.75, 22.68672519338126 ], "wc_summary_review_avg": [ 48.25, 9.12071817347735 ], "wc_review_avg": [ 755.75, 331.72833388180754 ], "wc_reply_reviewers_avg": [ 92.25, 149.57000869158227 ], "wc_reply_authors_avg": [ 2136.0, 1447.725112029214 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 4.0, 2.5495097567963922 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.903594257860088, "corr_recommendation_correctness": 0.9488474727161108, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13160092192046829804&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "15hYIH0TUi", "title": "Neural Collaborative Filtering Bandits via Meta Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Contextual multi-armed bandits provide powerful tools to solve the exploitation-exploration dilemma in decision making, with direct applications in the personalized recommendation. In fact, collaborative effects among users carry the significant potential to improve the recommendation. In this paper, we introduce and study the problem by exploring `Neural Collaborative Filtering Bandits', where the rewards can be non-linear functions and groups are formed dynamically given different specific contents. To solve this problem, we propose a meta-learning based bandit algorithm, Meta-Ban (\\textbf{meta-ban}dits), where a meta-learner is designed to represent and rapidly adapt to dynamic groups, along with an informative UCB-based exploration strategy. Furthermore, we analyze that Meta-Ban can achieve the regret bound of $\\mathcal{O}(\\sqrt{nT\\log T})$, which is sharper over state-of-the-art related works. In the end, we conduct extensive experiments showing that Meta-Ban outperforms six strong baselines.", "keywords": "Neural Contextual Bandit;Meta Learning", "primary_area": "", "supplementary_material": "/attachment/aa1dc95d1d511c82054a9b3a178c972461f6b044.zip", "author": "Yikun Ban;Yunzhe Qi;Tianxin Wei;Jingrui He", "authorids": "~Yikun_Ban1;~Yunzhe_Qi1;~Tianxin_Wei1;~Jingrui_He1", "gender": ";M;;F", "homepage": ";https://www.linkedin.com/in/yunzhe-qi-a1409b161/;https://weitianxin.github.io/;https://www.hejingrui.org", "dblp": ";259/3914;277/5800;34/2685", "google_scholar": ";Gt17_A0AAAAJ;_LU2-kMAAAAJ;hXpZynkAAAAJ", "orcid": ";0000-0001-5828-7436;0000-0003-4450-2005;0000-0002-6429-6272", "linkedin": ";yunzhe-qi-a1409b161/;tianxin-wei-7063a2180/;", "or_profile": "~Yikun_Ban1;~Yunzhe_Qi1;~Tianxin_Wei1;~Jingrui_He1", "aff": ";University of Illinois Urbana-Champaign;University of Illinois, Urbana-Champaign;University of Illinois, Urbana Champaign", "aff_domain": ";illinois.edu;uiuc.edu;illinois.edu", "position": ";PhD student;PhD student;Associate Professor", "bibtex": "@misc{\nban2023neural,\ntitle={Neural Collaborative Filtering Bandits via Meta Learning},\nauthor={Yikun Ban and Yunzhe Qi and Tianxin Wei and Jingrui He},\nyear={2023},\nurl={https://openreview.net/forum?id=15hYIH0TUi}\n}", "github": "", "project": "", "reviewers": "Uhmg;GgGh;ygez;2vmf", "site": "https://openreview.net/forum?id=15hYIH0TUi", "pdf_size": 12994700, "recommendation": "3;5;5;8", "confidence": "4;4;3;4", "correctness": "2;4;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "231;80;68;234", "wc_strength_and_weaknesses": "902;389;257;183", "wc_clarity_quality_novelty_and_reproducibility": "4;28;210;33", "wc_summary_review": "41;61;317;27", "wc_review": "1178;558;852;477", "wc_reply_reviewers": "0;167;0;0", "wc_reply_authors": "1861;1386;480;229", "reply_reviewers": "0;1;0;0", "reply_authors": "4;2;1;1", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 153.25, 79.37057074256175 ], "wc_strength_and_weaknesses_avg": [ 432.75, 280.7902909646272 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.75, 82.2841874238301 ], "wc_summary_review_avg": [ 111.5, 119.25917155506322 ], "wc_review_avg": [ 766.25, 275.6468528751961 ], "wc_reply_reviewers_avg": [ 41.75, 72.31312121600062 ], "wc_reply_authors_avg": [ 989.0, 662.3205417318717 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.08084520834544431, "corr_recommendation_correctness": 0.39605901719066966, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11828990201781680837&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Illinois", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://illinois.edu", "aff_unique_abbr": "UIUC;UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "15lSKp0wBnm", "title": "3D-IntPhys: Learning 3D Visual Intuitive Physics for Fluids, Rigid Bodies, and Granular Materials", "track": "main", "status": "Reject", "tldr": "An intuitive physics model with explicit 3D and compositional structures learned from multi-view videos. The learned model can handle complicated objects (e.g., fluid, rigid objects, granular materials) and perform extrapolated generalization.", "abstract": "Given a visual scene, humans have strong intuitions about how a scene can evolve over time under given actions. The intuition, often termed visual intuitive physics, is a critical ability that allows us to make effective plans to manipulate the scene to achieve desired outcomes without relying on extensive trial and error. In this paper, we present a framework capable of learning 3D-grounded visual intuitive physics models purely from unlabeled images. Our method is composed of a conditional Neural Radiance Field (NeRF)-style visual frontend and a 3D point-based dynamics prediction backend, in which we impose strong relational and structural inductive bias to capture the structure of the underlying environment. Unlike existing intuitive point-based dynamics works that rely on the supervision of dense point trajectory from simulators, we relax the requirements and only assume access to multi-view RGB images and (imperfect) instance masks. This enables the proposed model to handle scenarios where accurate point estimation and tracking are hard or impossible. We evaluate the models on three challenging scenarios involving fluid, granular materials, and rigid objects, where standard detection and tracking methods are not applicable. We show our model can make long-horizon future predictions by learning from raw images and significantly outperforms models that do not employ an explicit 3D representation space. We also show that, once trained, our model can achieve strong generalization in complex scenarios under extrapolate settings.", "keywords": "Visual Intuitive Physics;Neural Implicit Representations;Graph Neural Networks;Learning-Based Dynamics Modeling;Particle-Based Dynamics", "primary_area": "", "supplementary_material": "/attachment/3b2aca3d5d5d934935e6d564b64d2d718be8bcba.zip", "author": "Haotian Xue;Antonio Torralba;Daniel LK Yamins;Joshua B. Tenenbaum;Yunzhu Li;Hsiao-Yu Tung", "authorids": "~Haotian_Xue1;~Antonio_Torralba1;~Daniel_LK_Yamins1;~Joshua_B._Tenenbaum1;~Yunzhu_Li1;~Hsiao-Yu_Tung1", "gender": "M;M;;M;M;F", "homepage": "http://web.mit.edu/torralba/www//;https://Neuroailab.stanford.edu;;https://yunzhuli.github.io/;https://xavihart.github.io;", "dblp": "t/AntonioBTorralba;;t/JoshuaBTenenbaum;182/1831;;199/1661", "google_scholar": "https://scholar.google.com.tw/citations?user=8cxDHS4AAAAJ;;;WlA92lcAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;;", "linkedin": ";;;;haotian-xue-gatech/;", "or_profile": "~Antonio_Torralba1;~Daniel_LK_Yamins1;~Joshua_B._Tenenbaum1;~Yunzhu_Li1;~Xue_Haotian1;~Hsiao-Yu_Fish_Tung1", "aff": "Massachusetts Institute of Technology;Stanford University;Massachusetts Institute of Technology;Stanford University;Georgia Institute of Technology;", "aff_domain": "mit.edu;stanford.edu;mit.edu;stanford.edu;gatech.edu;", "position": "Full Professor;Assistant Professor;Professor;Postdoc;PhD student;", "bibtex": "@misc{\nxue2023dintphys,\ntitle={3D-IntPhys: Learning 3D Visual Intuitive Physics for Fluids, Rigid Bodies, and Granular Materials},\nauthor={Haotian Xue and Antonio Torralba and Daniel LK Yamins and Joshua B. Tenenbaum and Yunzhu Li and Hsiao-Yu Tung},\nyear={2023},\nurl={https://openreview.net/forum?id=15lSKp0wBnm}\n}", "github": "", "project": "", "reviewers": "1Jtf;E7oV;z9jL;TwJK", "site": "https://openreview.net/forum?id=15lSKp0wBnm", "pdf_size": 8220633, "recommendation": "3;3;5;6", "confidence": "3;5;4;3", "correctness": "3;2;3;3", "technical_novelty": "1;3;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "85;47;89;74", "wc_strength_and_weaknesses": "203;764;165;95", "wc_clarity_quality_novelty_and_reproducibility": "22;330;32;39", "wc_summary_review": "56;125;53;33", "wc_review": "366;1266;339;241", "wc_reply_reviewers": "105;124;86;0", "wc_reply_authors": "327;600;107;113", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 73.75, 16.391689967785506 ], "wc_strength_and_weaknesses_avg": [ 306.75, 266.82051551558027 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 105.75, 129.61167964346424 ], "wc_summary_review_avg": [ 66.75, 34.77337343428158 ], "wc_review_avg": [ 553.0, 414.269839597333 ], "wc_reply_reviewers_avg": [ 78.75, 47.409782745758285 ], "wc_reply_authors_avg": [ 286.75, 201.39808216564526 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.4061811972299616, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zrP8NywqzHEJ:scholar.google.com/&scioq=3D-IntPhys:+Learning+3D+Visual+Intuitive+Physics+for+Fluids,+Rigid+Bodies,+and+Granular+Materials&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;1;0;1;2", "aff_unique_norm": "Massachusetts Institute of Technology;Stanford University;Georgia Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.stanford.edu;https://www.gatech.edu", "aff_unique_abbr": "MIT;Stanford;Georgia Tech", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "16BDzjpOwe", "title": "Learning Debiased Representations via Conditional Attribute Interpolation", "track": "main", "status": "Withdraw", "tldr": "This paper proposes a novel method to learn debiased representation via conditional attribute interpolation.", "abstract": "An image is usually associated with more than one attribute, e.g., annotated based on both \"shape\" and \"color\". If most samples have attributes spuriously correlated with the target label, a Deep Neural Network (DNN) is prone to neglect those samples with attributes intrinsically consistent with the targets and leads to representations with large intra-class covariance. To improve the generalization ability of such a biased model, we propose a $\\chi^2$-model to fill in the intra-class blanks and learn debiased representations. First, we use a $\\chi$-shape pattern to match the training dynamics of a DNN and find Intermediate Attribute Samples (IASs) --- samples near decision boundaries when discerning various attributes, which indicate how attribute values change from one extreme to another. Then we rectify the decision boundary with a $\\chi$-branch metric learning objective. Conditional interpolation among IASs eliminates the negative effect of peripheral attributes and facilitates making intra-class samples compact. Experiments show that $\\chi^2$-model learns debiased representation effectively and achieves remarkable improvements on various datasets.", "keywords": "debiased representation;conditional attribute interpolation;image classification", "primary_area": "", "supplementary_material": "", "author": "Yi-Kai Zhang;Qi-Wei Wang;De-Chuan Zhan;Han-Jia Ye", "authorids": "~Yi-Kai_Zhang2;~Qi-Wei_Wang1;~De-Chuan_Zhan1;~Han-Jia_Ye1", "gender": "M;;M;M", "homepage": "http://www.lamda.nju.edu.cn/zhangyk;http://www.lamda.nju.edu.cn/wangqiwei/;http://www.lamda.nju.edu.cn/zhandc/;http://www.lamda.nju.edu.cn/yehj", "dblp": "330/8964;195/9944;74/498;165/3014", "google_scholar": ";PQkB2EsAAAAJ;mYJf4TcAAAAJ;mgOYhtoAAAAJ", "orcid": ";;0000-0002-3533-2078;", "linkedin": ";;;", "or_profile": "~Yi-Kai_Zhang2;~Qi-Wei_Wang1;~De-Chuan_Zhan1;~Han-Jia_Ye1", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "MS student;MS student;Full Professor;Associate Professor", "bibtex": "@misc{\nzhang2023learning,\ntitle={Learning Debiased Representations via Conditional Attribute Interpolation},\nauthor={Yi-Kai Zhang and Qi-Wei Wang and De-Chuan Zhan and Han-Jia Ye},\nyear={2023},\nurl={https://openreview.net/forum?id=16BDzjpOwe}\n}", "github": "", "project": "", "reviewers": "MpVa;kg71;iYVQ", "site": "https://openreview.net/forum?id=16BDzjpOwe", "pdf_size": 17254576, "recommendation": "3;3;6", "confidence": "4;1;3", "correctness": "2;1;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "86;147;81", "wc_strength_and_weaknesses": "315;167;63", "wc_clarity_quality_novelty_and_reproducibility": "121;132;10", "wc_summary_review": "56;82;9", "wc_review": "578;528;163", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 2.6666666666666665, 1.247219128924647 ], "correctness_avg": [ 2.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 104.66666666666667, 30.00370347510824 ], "wc_strength_and_weaknesses_avg": [ 181.66666666666666, 103.39997850848691 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 87.66666666666667, 55.10192575783738 ], "wc_summary_review_avg": [ 49.0, 30.21037349432586 ], "wc_review_avg": [ 423.0, 184.9774761063267 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.18898223650461363, "corr_recommendation_correctness": 0.8660254037844387, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4486950223834093801&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "17RDXeF-skZ", "title": "Doing Fast Adaptation Fast: Conditionally Independent Deep Ensembles for Distribution Shifts", "track": "main", "status": "Reject", "tldr": "", "abstract": "Classifiers in a diverse ensemble capture distinct predictive signals, which is valuable for datasets containing multiple strongly predictive signals. Performing fast adaptation at test time allows us to generalize to distributions where certain signals are no longer predictive, or to avoid relying on sensitive or protected attributes. However, ensemble learning is often expensive, even more so when we need to enforce diversity constraints between the high-dimensional representations of the classifiers. Instead, we propose an efficient and fast method for learning ensemble diversity. We minimize conditional mutual information of the output distributions between classifiers, a quantity which can be cheaply and exactly computed from empirical data. The resulting ensemble contains individually strong predictors that are only dependent because they predict the label. We demonstrate the efficacy of our method on shortcut learning tasks. Performing fast adaptation on our ensemble selects shortcut-invariant models that generalize well to test distributions where the shortcuts are uncorrelated with the label.\n", "keywords": "deep ensemble;diverse ensemble;shortcut learning;spurious correlations;conditional mutual information", "primary_area": "", "supplementary_material": "", "author": "Wanqian Yang;Aahlad Manas Puli;Andrew Gordon Wilson;Rajesh Ranganath", "authorids": "~Wanqian_Yang1;~Aahlad_Manas_Puli1;~Andrew_Gordon_Wilson1;~Rajesh_Ranganath2", "gender": "M;M;Not Specified;", "homepage": "https://wanqianyang.me/;http://aahladmanas.github.io;https://cims.nyu.edu/~andrewgw;", "dblp": "241/5900;228/9272;65/10453;97/7057", "google_scholar": "eAJISV8AAAAJ;xWmCmBQAAAAJ;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ;", "orcid": "0000-0002-6372-3607;;;", "linkedin": "wanqianyang/;;;", "or_profile": "~Wanqian_Yang1;~Aahlad_Manas_Puli1;~Andrew_Gordon_Wilson1;~Rajesh_Ranganath2", "aff": "New York University;New York University;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu;nyu.edu", "position": "PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@misc{\nyang2023doing,\ntitle={Doing Fast Adaptation Fast: Conditionally Independent Deep Ensembles for Distribution Shifts},\nauthor={Wanqian Yang and Aahlad Manas Puli and Andrew Gordon Wilson and Rajesh Ranganath},\nyear={2023},\nurl={https://openreview.net/forum?id=17RDXeF-skZ}\n}", "github": "", "project": "", "reviewers": "aogU;iHMc;q3rX", "site": "https://openreview.net/forum?id=17RDXeF-skZ", "pdf_size": 257844, "recommendation": "5;6;6", "confidence": "3;3;1", "correctness": "2;3;4", "technical_novelty": "3;2;3", "empirical_novelty": "1;2;3", "wc_summary_paper": "135;41;71", "wc_strength_and_weaknesses": "179;253;66", "wc_clarity_quality_novelty_and_reproducibility": "18;13;7", "wc_summary_review": "20;6;18", "wc_review": "352;313;162", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1111;1313;151", "reply_reviewers": "0;0;0", "reply_authors": "2;2;1", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 82.33333333333333, 39.20317447463775 ], "wc_strength_and_weaknesses_avg": [ 166.0, 76.89386624865905 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 12.666666666666666, 4.4969125210773475 ], "wc_summary_review_avg": [ 14.666666666666666, 6.182412330330469 ], "wc_review_avg": [ 275.6666666666667, 81.9362896130619 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 858.3333333333334, 506.9130979654096 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9oKECbSfcZoJ:scholar.google.com/&scioq=Doing+Fast+Adaptation+Fast:+Conditionally+Independent+Deep+Ensembles+for+Distribution+Shifts&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "17mPeO4rqGj", "title": "Tackling Diverse Tasks via Cross-Modal Transfer Learning", "track": "main", "status": "Reject", "tldr": "We study how to effectively transfer pretrained models to problems outside the pretraining modalities.", "abstract": "Fine-tuning large-scale pretrained models has led to remarkable progress in well-studied modalities such as vision and NLP. However, similar gains have not been observed in many other tasks due to an assumed lack of relevant pretrained models for these diverse modalities. In this work, we revisit this assumption by studying the cross-modal transfer ability of large-scale pretrained models. We introduce ORCA, a general cross-modal fine-tuning workflow that enables fast and automatic exploitation of existing pretrained models for diverse tasks. ORCA achieves task-specific adaptation by learning feature embeddings that minimize an optimal transport distance metric to map the data distribution in the end-task modality to the pretraining modality. We test ORCA on 13 tasks with varying modalities and input-output types. ORCA performs the best on 10 of them and is in the top three on the others. We further quantify the importance of embedding distance for downstream performance, highlight ORCA\u2019s utility for data-limited tasks, and demonstrate its compatibility with same-modality transfer.", "keywords": "Cross-modal transfer learning;pretrained models;fine-tuning", "primary_area": "", "supplementary_material": "/attachment/8bad086a073047e575d6ceec149548571160e302.zip", "author": "Junhong Shen;Liam Li;Lucio M. Dery;Corey Staten;Mikhail Khodak;Graham Neubig;Ameet Talwalkar", "authorids": "~Junhong_Shen1;~Liam_Li1;~Lucio_M._Dery1;~Corey_Staten1;~Mikhail_Khodak1;~Graham_Neubig1;~Ameet_Talwalkar1", "gender": "F;;M;;;M;M", "homepage": "https://sjunhongshen.github.io;;https://ldery.github.io/;;;http://phontron.com;http://www.cs.cmu.edu/~atalwalk/", "dblp": "256/9575;23/2305;211/7773;;;03/8155;56/5528", "google_scholar": "M561o6QAAAAJ;xPSkgtIAAAAJ;ggFzw0MAAAAJ;;;wlosgkoAAAAJ;https://scholar.google.com.tw/citations?user=TW7U1W0AAAAJ", "orcid": "0009-0002-3156-4899;;;;;;", "linkedin": ";;;corey-staten-a79b39175/;;;", "or_profile": "~Junhong_Shen1;~Liam_Li1;~Lucio_M._Dery1;~Corey_Staten1;~Mikhail_Khodak1;~Graham_Neubig1;~Ameet_Talwalkar1", "aff": "Carnegie Mellon University;;Carnegie Mellon University;;;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;;cmu.edu;;;cmu.edu;cmu.edu", "position": "PhD student;;PhD student;;;Associate Professor;Associate Professor", "bibtex": "@misc{\nshen2023tackling,\ntitle={Tackling Diverse Tasks via Cross-Modal Transfer Learning},\nauthor={Junhong Shen and Liam Li and Lucio M. Dery and Corey Staten and Mikhail Khodak and Graham Neubig and Ameet Talwalkar},\nyear={2023},\nurl={https://openreview.net/forum?id=17mPeO4rqGj}\n}", "github": "", "project": "", "reviewers": "tx7E;J4V2;WPBn;AGVC;uNsA", "site": "https://openreview.net/forum?id=17mPeO4rqGj", "pdf_size": 1227480, "recommendation": "5;5;6;8;8", "confidence": "4;3;4;3;4", "correctness": "2;3;4;3;3", "technical_novelty": "2;3;2;3;4", "empirical_novelty": "2;3;2;3;3", "wc_summary_paper": "41;56;51;98;68", "wc_strength_and_weaknesses": "124;203;117;228;229", "wc_clarity_quality_novelty_and_reproducibility": "16;47;37;76;17", "wc_summary_review": "27;3;26;48;35", "wc_review": "208;309;231;450;349", "wc_reply_reviewers": "0;41;71;25;0", "wc_reply_authors": "768;867;630;310;786", "reply_reviewers": "0;1;1;1;0", "reply_authors": "2;3;1;1;2", "recommendation_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.8, 0.7483314773547882 ], "empirical_novelty_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_paper_avg": [ 62.8, 19.630588376307013 ], "wc_strength_and_weaknesses_avg": [ 180.2, 49.67655382572346 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.6, 22.132329294495868 ], "wc_summary_review_avg": [ 27.8, 14.688771221582831 ], "wc_review_avg": [ 309.4, 86.90822745862442 ], "wc_reply_reviewers_avg": [ 27.4, 26.807461647832305 ], "wc_reply_authors_avg": [ 672.2, 196.49162832039434 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.06019292654288467, "corr_recommendation_correctness": 0.23312620206007845, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mH8K4EgO5VIJ:scholar.google.com/&scioq=Tackling+Diverse+Tasks+via+Cross-Modal+Transfer+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Online Bias Correction for Task-Free Continual Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10703", "id": "18XzeuYZh_", "poster": "/media/PosterPDFs/ICLR%202023/10703.png?t=1682884828.0887065", "openreview": "https://openreview.net/forum?id=18XzeuYZh_", "slides": "https://iclr.cc/virtual/2023/poster/10703", "video": "https://iclr.cc/virtual/2023/poster/10703", "author_site": "Aristotelis Chrysakis, Marie-Francine Moens", "tldr": "", "abstract": "Task-free continual learning is the machine-learning setting where a model is trained online with data generated by a nonstationary stream. Conventional wisdom suggests that, in this setting, models are trained using an approach called experience replay, where the risk is computed both with respect to current stream observations and to a small subset of past observations. In this work, we explain both theoretically and empirically how experience replay biases the outputs of the model towards recent stream observations. Moreover, we propose a simple approach to mitigate this bias online, by changing how the output layer of the model is optimized. We show that our approach improves significantly the learning performance of experience-replay approaches over different datasets. Our findings suggest that, when performing experience replay, the output layer of the model should be optimized separately from the preceding layers.", "keywords": "Task-Free Continual Learning", "primary_area": "", "supplementary_material": "", "author": "Aristotelis Chrysakis;Marie-Francine Moens", "authorids": "~Aristotelis_Chrysakis1;~Marie-Francine_Moens1", "gender": ";F", "homepage": ";https://people.cs.kuleuven.be/~sien.moens/", "dblp": "280/1679;m/MarieFrancineMoens", "google_scholar": ";https://scholar.google.com.tw/citations?user=O9hYMUUAAAAJ", "orcid": "0000-0002-8652-3050;0000-0002-3732-9323", "linkedin": ";marie-francine-moens-8175a56/?originalSubdomain=be", "or_profile": "~Aristotelis_Chrysakis1;~Marie-Francine_Moens1", "aff": "KU Leuven;KU Leuven, KU Leuven", "aff_domain": "kuleuven.be;cs.kuleuven.be", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nchrysakis2023online,\ntitle={Online Bias Correction for Task-Free Continual Learning},\nauthor={Aristotelis Chrysakis and Marie-Francine Moens},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=18XzeuYZh_}\n}", "github": "", "project": "", "reviewers": "cbbB;yCcF;zZ8e;S1Ps", "pdf_size": 323493, "recommendation": "6;6;6;8", "confidence": "3;4;4;4", "correctness": "3;3;3;4", "technical_novelty": "3;2;3;4", "empirical_novelty": "3;1;2;4", "wc_summary_paper": "83;76;40;96", "wc_strength_and_weaknesses": "239;53;161;310", "wc_clarity_quality_novelty_and_reproducibility": "59;614;11;41", "wc_summary_review": "77;54;19;68", "wc_review": "458;797;231;515", "wc_reply_reviewers": "670;648;0;22", "wc_reply_authors": "1978;2090;368;371", "reply_reviewers": "3;3;0;1", "reply_authors": "4;5;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 73.75, 20.765054779605084 ], "wc_strength_and_weaknesses_avg": [ 190.75, 95.40538506813962 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 181.25, 250.43599481703902 ], "wc_summary_review_avg": [ 54.5, 22.073740054644116 ], "wc_review_avg": [ 500.25, 201.59287561816265 ], "wc_reply_reviewers_avg": [ 335.0, 324.18667461818967 ], "wc_reply_authors_avg": [ 1201.75, 833.192167209942 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.75, 1.7853571071357126 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 1.0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18233925644400208921&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=18XzeuYZh_", "email": "kuleuven.be;cs.kuleuven.be", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Katholieke Universiteit Leuven;KU Leuven", "aff_unique_dep": ";", "aff_unique_url": "https://www.kuleuven.be;https://www.kuleuven.be", "aff_unique_abbr": "KU Leuven;KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Belgium" }, { "id": "1BEoYnjZVV", "title": "Geometry Problem Solving based on Counterfactual Evolutionary Reasoning", "track": "main", "status": "Reject", "tldr": "A new method using counterfactual evolutionary reasoning for geometry problem solving", "abstract": "As a representative topic in natural language processing and automated theorem proving, geometry problem solving requires an abstract problem understanding and symbolic reasoning. A major challenge here is to find a feasible reasoning sequence that is consistent with given axioms and the theorems already proved. Most recent methods have exploited neural network-based techniques to automatically discover eligible solving steps. Such a kind of methods, however, is greatly impacted by the expert solutions for training. To improve the accuracy, this paper proposes a new method called counterfactual evolutionary reasoning, which uses a generative adversarial network to generate initial reasoning sequences and then introduces counterfactual reasoning to explore potential solutions. By directly exploring theorem candidates rather than the neural network selection, the new method can sufficiently extend the searching space to get a more appropriate reasoning step. Through comparative experiments on the recent proposed geometry3k, the largest geometry problem solving dataset, our method generally achieves a higher accuracy than most previous methods, bringing an overall improvement about 4.4% compared with the transformer models.", "keywords": "Counterfactual Reasoning;Geometry Problem Solving;Symbolic Reasoning", "primary_area": "", "supplementary_material": "/attachment/27b8f7a09d91a10e3f06185f5f49139794bc0372.zip", "author": "SONG Bing;Xiong Gang;Fenghua Zhu;Yisheng Lv;Peijun Ye", "authorids": "~SONG_Bing1;~Xiong_Gang1;~Fenghua_Zhu1;~Yisheng_Lv1;~Peijun_Ye1", "gender": "M;M;M;M;M", "homepage": "https://ieeexplore.ieee.org/author/37088971251;https://teacher.ucas.ac.cn/~gxiong;http://www.ia.cas.cn/sourcedb_ia_cas/cn/iaexpert/201308/t20130807_3910174.html;https://people.ucas.edu.cn/~lvyisheng;https://teacher.ucas.ac.cn/~0070153", "dblp": ";96/372-1;;;", "google_scholar": ";;;RRKqjKAAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~SONG_Bing1;~Xiong_Gang1;~Fenghua_Zhu1;~Yisheng_Lv1;~Peijun_Ye1", "aff": ";Institute of Automation, Chinese Academy of Science;;University of Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": ";ia.ac.cn;;ucas.ac.cn;ia.ac.cn", "position": ";Full Professor;;Full Professor;Associate Professor", "bibtex": "@misc{\nbing2023geometry,\ntitle={Geometry Problem Solving based on Counterfactual Evolutionary Reasoning},\nauthor={SONG Bing and Xiong Gang and Fenghua Zhu and Yisheng Lv and Peijun Ye},\nyear={2023},\nurl={https://openreview.net/forum?id=1BEoYnjZVV}\n}", "github": "", "project": "", "reviewers": "Zd2j;4Jop;zfWo", "site": "https://openreview.net/forum?id=1BEoYnjZVV", "pdf_size": 1298049, "recommendation": "1;3;3", "confidence": "3;4;3", "correctness": "2;2;3", "technical_novelty": "2;1;3", "empirical_novelty": "2;1;3", "wc_summary_paper": "51;93;56", "wc_strength_and_weaknesses": "368;386;267", "wc_clarity_quality_novelty_and_reproducibility": "26;19;68", "wc_summary_review": "33;38;39", "wc_review": "478;536;430", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 66.66666666666667, 18.732028424302822 ], "wc_strength_and_weaknesses_avg": [ 340.3333333333333, 52.37259673616431 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.666666666666664, 21.63844315615664 ], "wc_summary_review_avg": [ 36.666666666666664, 2.6246692913372702 ], "wc_review_avg": [ 481.3333333333333, 43.33846123505335 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10796344284099366643&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation;", "aff_unique_url": "http://www.ia.cas.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "CAS;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "A Simple Approach for Visual Room Rearrangement: 3D Mapping and Semantic Search", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10930", "id": "1C6nCCaRe6p", "poster": "/media/PosterPDFs/ICLR%202023/10930.png?t=1682722879.4370043", "openreview": "https://openreview.net/forum?id=1C6nCCaRe6p", "slides": "https://iclr.cc/virtual/2023/poster/10930", "video": "https://iclr.cc/virtual/2023/poster/10930", "author_site": "Brandon Trabucco, Gunnar Sigurdsson, Robinson Piramuthu, Gaurav Sukhatme, Ruslan Salakhutdinov", "tldr": "A System For Exploring A Scene, Mapping Objects, and Rearranging Objects To A Visual Goal", "abstract": "Physically rearranging objects is an important capability for embodied agents. Visual room rearrangement evaluates an agent's ability to rearrange objects in a room to a desired goal based solely on visual input. We propose a simple yet effective method for this problem: (1) search for and map which objects need to be rearranged, and (2) rearrange each object until the task is complete. Our approach consists of an off-the-shelf semantic segmentation model, voxel-based semantic map, and semantic search policy to efficiently find objects that need to be rearranged. Our method was the winning submission to the AI2-THOR Rearrangement Challenge in the 2022 Embodied AI Workshop at CVPR 2022, and improves on current state-of-the-art end-to-end reinforcement learning-based methods that learn visual room rearrangement policies from 0.53% correct rearrangement to 16.56%, using only 2.7% as many samples from the environment.", "keywords": "Embodied AI;Deep Learning;Object Rearrangement", "primary_area": "", "supplementary_material": "/attachment/edc1711e77a2a0d0163a1fad24e208ba735a0f0e.zip", "author": "Brandon Trabucco;Gunnar A Sigurdsson;Robinson Piramuthu;Gaurav S. Sukhatme;Ruslan Salakhutdinov", "authorids": "~Brandon_Trabucco1;~Gunnar_A_Sigurdsson1;~Robinson_Piramuthu1;~Gaurav_S._Sukhatme1;~Ruslan_Salakhutdinov1", "gender": "M;M;M;M;M", "homepage": "http://btrabucco.com;https://scholar.google.com/citations?user=2CkqEGcAAAAJ&hl=en;http://www-robotics.usc.edu/~gaurav/;http://www.gunnar.xyz;https://www.cs.cmu.edu/~rsalakhu/", "dblp": ";29/1333;s/GauravSSukhatme;179/2347;", "google_scholar": "aLquhd4AAAAJ;https://scholar.google.cl/citations?user=2CkqEGcAAAAJ;https://scholar.google.com.tw/citations?user=lRUi-A8AAAAJ;clTKG0QAAAAJ;", "orcid": ";0000-0002-1767-8382;0000-0003-2408-474X;0000-0001-8967-7322;", "linkedin": ";rpiramuthu/;gaurav-sukhatme-9b6420b/;https://linkedin.com/in/gasigurdsson/;", "or_profile": "~Brandon_Trabucco1;~Robinson_Piramuthu1;~Gaurav_S._Sukhatme1;~Gunnar_Atli_Sigurdsson1;~Russ_Salakhutdinov1", "aff": "Carnegie Mellon University;Amazon Inc;University of Southern California;Amazon;School of Computer Science, Carnegie Mellon University", "aff_domain": "mld.cs.cmu.edu;amazon.com;usc.edu;amazon.com;cs.cmu.edu", "position": "PhD student;Principal Scientist;Full Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\ntrabucco2023a,\ntitle={A Simple Approach for Visual Room Rearrangement: 3D Mapping and Semantic Search},\nauthor={Brandon Trabucco and Gunnar A Sigurdsson and Robinson Piramuthu and Gaurav S. Sukhatme and Ruslan Salakhutdinov},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=1C6nCCaRe6p}\n}", "github": "", "project": "", "reviewers": "Vjug;jZAh;wg2G", "pdf_size": 1688240, "recommendation": "6;6;8", "confidence": "4;4;4", "correctness": "3;3;4", "technical_novelty": "3;3;2", "empirical_novelty": "3;3;2", "wc_summary_paper": "118;58;104", "wc_strength_and_weaknesses": "308;165;144", "wc_clarity_quality_novelty_and_reproducibility": "386;15;40", "wc_summary_review": "69;39;101", "wc_review": "881;277;389", "wc_reply_reviewers": "0;59;383", "wc_reply_authors": "1876;509;1144", "reply_reviewers": "0;1;3", "reply_authors": "5;2;3", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 93.33333333333333, 25.62984371565478 ], "wc_strength_and_weaknesses_avg": [ 205.66666666666666, 72.86669716376306 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 147.0, 169.30642830875226 ], "wc_summary_review_avg": [ 69.66666666666667, 25.315783394730033 ], "wc_review_avg": [ 515.6666666666666, 262.344980681714 ], "wc_reply_reviewers_avg": [ 147.33333333333334, 168.3732625910526 ], "wc_reply_authors_avg": [ 1176.3333333333333, 558.5435425182973 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 3.3333333333333335, 1.247219128924647 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3457295438850508574&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=1C6nCCaRe6p", "email": "mld.cs.cmu.edu;amazon.com;usc.edu;amazon.com;cs.cmu.edu", "author_num": 5, "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "Carnegie Mellon University;Amazon;University of Southern California", "aff_unique_dep": ";Amazon;", "aff_unique_url": "https://www.cmu.edu;https://www.amazon.com;https://www.usc.edu", "aff_unique_abbr": "CMU;Amazon;USC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Los Angeles;Pittsburgh", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "1CHhsUY32a", "title": "Controllable Image Generation via Collage Representations", "track": "main", "status": "Reject", "tldr": "We present Mixing and Match scenes (M&Ms), a novel approach to controllable image generation by conditioning on representations of image collages containing objects and backgrounds from several reference images, resulting in high quality generations.", "abstract": "Recent advances in conditional generative image models have enabled impressive results. On the one hand, text-based conditional models have achieved remarkable generation quality, by leveraging large-scale datasets of image-text pairs. To enable fine-grained controllability, however, text-based models require long prompts, whose details may be ignored by the model. On the other hand, layout-based conditional models have also witnessed significant advances. These models rely on bounding boxes or segmentation maps for precise spatial conditioning in combination with coarse semantic labels. The semantic labels, however, cannot be used to express detailed appearance characteristics. In this paper, we approach fine-grained scene controllability through image collages which allow a rich visual description of the desired scene as well as the appearance and location of the objects therein, without the need of class nor attribute labels. We introduce \"mixing and matching scenes\" (M&Ms), an approach that consists of an adversarially trained generative image model which is conditioned on appearance features and spatial positions of the different elements in a collage, and integrates these into a coherent image. We train our model on the OpenImages (OI) dataset and evaluate it on collages derived from OI and MS-COCO datasets. Our experiments on the OI dataset show that M&Ms outperforms baselines in terms of fine-grained scene controllability while being very competitive in terms of image quality and sample diversity. On the MS-COCO dataset, we highlight the generalization ability of our model by outperforming DALL-E in terms of the zero-shot FID metric, despite using two magnitudes fewer parameters and data. Collage based generative models have the potential to advance content creation in an efficient and effective way as they are intuitive to use and yield high quality generations.", "keywords": "image generation;controllable image generation;conditional image generation;instance-conditioned generation;image collage;out-of-distribution generation;unseen layout generation;scene generation", "primary_area": "", "supplementary_material": "/attachment/d6a0b8619b78d5f0fe1043416db733c5b0c991b7.zip", "author": "Arantxa Casanova;Marlene Careil;Adriana Romero-Soriano;Christopher Pal;Jakob Verbeek;Michal Drozdzal", "authorids": "~Arantxa_Casanova1;~Marlene_Careil1;~Adriana_Romero-Soriano1;~Christopher_Pal1;~Jakob_Verbeek1;~Michal_Drozdzal1", "gender": "F;;;Not Specified;M;F", "homepage": ";;https://scholar.google.ca/citations?user=1ScWJOoAAAAJ&hl=en&oi=ao;http://lear.inrialpes.fr/~verbeek;;https://sites.google.com/site/adriromsor/home", "dblp": "193/6415.html;;45/1217;v/JakobJVerbeek;24/9794;54/10771", "google_scholar": "iFhSTbAAAAAJ;;https://scholar.google.ca/citations?user=1ScWJOoAAAAJ;oZGA-rAAAAAJ;https://scholar.google.ca/citations?user=XK_ktwQAAAAJ;https://scholar.google.ca/citations?user=Sm15FXIAAAAJ", "orcid": ";;;0000-0003-1419-1816;;", "linkedin": ";marl%C3%A8ne-careil-901804155/?originalSubdomain=fr;;jakob-verbeek-3b11aa14a/;;https://ca.linkedin.com/in/adriana-romero-a6415123", "or_profile": "~Arantxa_Casanova1;~Marlene_Careil1;~Christopher_Pal1;~Jakob_Verbeek1;~Michal_Drozdzal1;~Adriana_Romero1", "aff": "Polytechnique Montreal;T\u00e9l\u00e9com ParisTech;Polytechnique Montreal;Meta;Meta;Meta", "aff_domain": "polymtl.ca;telecom-paristech.fr;polymtl.ca;meta.com;fb.com;meta.com", "position": "PhD student;PhD student;Full Professor;Research Scientist;Research Scientst;Research Scientist", "bibtex": "@misc{\ncasanova2023controllable,\ntitle={Controllable Image Generation via Collage Representations},\nauthor={Arantxa Casanova and Marlene Careil and Adriana Romero-Soriano and Christopher Pal and Jakob Verbeek and Michal Drozdzal},\nyear={2023},\nurl={https://openreview.net/forum?id=1CHhsUY32a}\n}", "github": "", "project": "", "reviewers": "LMNa;ovRC;NK57;abbj", "site": "https://openreview.net/forum?id=1CHhsUY32a", "pdf_size": 29994879, "recommendation": "5;5;6;6", "confidence": "5;2;4;4", "correctness": "3;4;4;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "59;33;77;181", "wc_strength_and_weaknesses": "307;75;421;203", "wc_clarity_quality_novelty_and_reproducibility": "45;16;5;2", "wc_summary_review": "41;20;21;37", "wc_review": "452;144;524;423", "wc_reply_reviewers": "186;0;230;26", "wc_reply_authors": "1444;1409;1856;528", "reply_reviewers": "1;0;1;1", "reply_authors": "3;4;3;2", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 87.5, 56.20275793944635 ], "wc_strength_and_weaknesses_avg": [ 251.5, 127.78399743316845 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 17.0, 16.98528775146303 ], "wc_summary_review_avg": [ 29.75, 9.364160400164021 ], "wc_review_avg": [ 385.75, 144.33706211503684 ], "wc_reply_reviewers_avg": [ 110.5, 99.16022388034428 ], "wc_reply_authors_avg": [ 1309.25, 484.0957420800146 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.22941573387056177, "corr_recommendation_correctness": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2642321816814794175&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;2;2;2", "aff_unique_norm": "Polytechnique Montreal;T\u00e9l\u00e9com ParisTech;Meta", "aff_unique_dep": ";;Meta Platforms, Inc.", "aff_unique_url": "https://www.polymtl.ca;https://www.telecom-paristech.fr;https://meta.com", "aff_unique_abbr": "PolyMTL;TP;Meta", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Montreal;", "aff_country_unique_index": "0;1;0;2;2;2", "aff_country_unique": "Canada;France;United States" }, { "title": "STREET: A MULTI-TASK STRUCTURED REASONING AND EXPLANATION BENCHMARK", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12169", "id": "1C_kSW1-k0", "poster": "", "openreview": "https://openreview.net/forum?id=1C_kSW1-k0", "slides": "https://iclr.cc/virtual/2023/poster/12169", "video": "https://iclr.cc/virtual/2023/poster/12169", "author_site": "Danilo Neves Ribeiro, Shen Wang, Xiaofei Ma, Henghui Zhu, Rui Dong, Deguang Kong, Juliette Burger, Anjelica Ramos, zhiheng huang, William Wang, George Karypis, Bing Xiang, Dan Roth", "tldr": "We introduce STREET, a unified multi-task and multi-domain natural language reasoning and explanation benchmark.", "abstract": "We introduce STREET, a unified multi-task and multi-domain natural language reasoning and explanation benchmark. Unlike most existing question-answering (QA) datasets, we expect models to not only answer questions, but also produce step-by-step structured explanations describing how premises in the question are used to produce intermediate conclusions that can prove the correctness of a certain answer. We perform extensive evaluation with popular language models such as few-shot prompting GPT-3 and fine-tuned T5. We find that these models still lag behind human performance when producing such structured reasoning steps. We believe this work will provide a way for the community to better train and test systems on multi-step reasoning and explanations in natural language.", "keywords": "natural language understanding;question answering;structured explanations;soft reasoning;dataset", "primary_area": "", "supplementary_material": "", "author": "Danilo Neves Ribeiro;Shen Wang;Xiaofei Ma;Henghui Zhu;Rui Dong;Deguang Kong;Juliette Burger;Anjelica Ramos;zhiheng huang;William Yang Wang;George Karypis;Bing Xiang;Dan Roth", "authorids": "~Danilo_Neves_Ribeiro1;~Shen_Wang2;~Xiaofei_Ma1;~Henghui_Zhu1;~Rui_Dong1;~Deguang_Kong1;~Juliette_Burger1;~Anjelica_Ramos1;~zhiheng_huang4;~William_Yang_Wang2;~George_Karypis1;~Bing_Xiang2;~Dan_Roth3", "gender": "M;M;M;M;F;M;F;F;M;M;;M;M", "homepage": "https://dnr2.github.io/academic_website/;;https://www.amazon.science/author/xiaofei-ma;;;;;https://anjelicaramos.com;;;;https://www.cis.upenn.edu/~danroth/;https://www.cs.ucsb.edu/~william/", "dblp": "315/3193;;;150/4170;127/0871;53/1448;;;;;;r/DanRoth;08/9282", "google_scholar": "cSkrMRsAAAAJ;G7twX6YAAAAJ;Pc2SfvMAAAAJ;s7jVUzQAAAAJ;ePtqyakAAAAJ;wCkI3_AAAAAJ;;;uW8JaBsAAAAJ;ElqwScwAAAAJ;A6yjdJAAAAAJ;E-bpPWgAAAAJ;gf8Ms_8AAAAJ", "orcid": ";;;;;;;;;;;;", "linkedin": "danilodnr2/;shen-wang-97309138/;xiaofei-ma-b3627928;;;;juliette-burger-74a343146/;;;;;dan-roth-8667361/;", "or_profile": "~Danilo_Neves_Ribeiro1;~Shen_Wang2;~Xiaofei_Ma1;~Henghui_Zhu1;~Rui_Dong1;~Deguang_Kong1;~Juliette_Burger1;~Anjelica_Ramos1;~zhiheng_huang4;~George_Karypis1;~Bing_Xiang2;~Dan_Roth3;~William_Wang1", "aff": "Meta Facebook;Amazon;Amazon Web Services;Amazon;Amazon;Google;Amazon;Amazon;Amazon;University of Minnesota, Minneapolis;Goldman Sachs;Amazon;UC Santa Barbara", "aff_domain": "meta.com;amazon.com;amazon.com;amazon.com;amazon.com;google.com;amazon.com;amazon.com;amazon.com;umn.edu;gs.com;amazon.com;ucsb.edu", "position": "Intern;Researcher;Applied Science Manager;Researcher;Researcher;Researcher;ML Data Operation Manager;Researcher;Principal Researcher;Full Professor;Managing Director;VP and Distinguished Scientist;Full Professor", "bibtex": "@inproceedings{\nribeiro2023street,\ntitle={{STREET}: A {MULTI}-{TASK} {STRUCTURED} {REASONING} {AND} {EXPLANATION} {BENCHMARK}},\nauthor={Danilo Neves Ribeiro and Shen Wang and Xiaofei Ma and Henghui Zhu and Rui Dong and Deguang Kong and Juliette Burger and Anjelica Ramos and zhiheng huang and William Yang Wang and George Karypis and Bing Xiang and Dan Roth},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=1C_kSW1-k0}\n}", "github": "", "project": "", "reviewers": "xxyE;4X1J;KKP2;3BMb", "pdf_size": 559125, "recommendation": "5;6;8;8", "confidence": "3;4;4;3", "correctness": "3;3;4;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "111;30;80;99", "wc_strength_and_weaknesses": "426;296;368;296", "wc_clarity_quality_novelty_and_reproducibility": "21;10;195;28", "wc_summary_review": "49;86;54;14", "wc_review": "607;422;697;437", "wc_reply_reviewers": "0;105;0;0", "wc_reply_authors": "386;270;422;128", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 80.0, 30.911163032147464 ], "wc_strength_and_weaknesses_avg": [ 346.5, 54.50458696293368 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.5, 76.19219120093607 ], "wc_summary_review_avg": [ 50.75, 25.52817071393875 ], "wc_review_avg": [ 540.75, 115.83258393042952 ], "wc_reply_reviewers_avg": [ 26.25, 45.46633369868303 ], "wc_reply_authors_avg": [ 301.5, 114.84228315389764 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 13, 0 ], "corr_recommendation_confidence": 0.19245008972987526, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4404088795561950975&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=1C_kSW1-k0", "email": "meta.com;amazon.com;amazon.com;amazon.com;amazon.com;google.com;amazon.com;amazon.com;amazon.com;umn.edu;gs.com;amazon.com;ucsb.edu", "author_num": 13, "aff_unique_index": "0;1;1;1;1;2;1;1;1;3;4;1;5", "aff_unique_norm": "Meta;Amazon;Google;University of Minnesota;Goldman Sachs;University of California, Santa Barbara", "aff_unique_dep": "Meta Platforms, Inc.;Amazon.com, Inc.;Google;;;", "aff_unique_url": "https://meta.com;https://www.amazon.com;https://www.google.com;https://www.minnesota.edu;https://www.goldmansachs.com;https://www.ucsb.edu", "aff_unique_abbr": "Meta;Amazon;Google;UMN;GS;UCSB", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Mountain View;Minneapolis;Santa Barbara", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "1DOS0kifqeP", "title": "Eigen Memory Trees", "track": "main", "status": "Reject", "tldr": "We create an episodic memory model for online learning and evaluate it for solving contextual bandit problems. ", "abstract": "This work introduces the Eigen Memory Tree (EMT), a novel online memory model for sequential learning scenarios. EMTs store data at the leaves of a binary tree, and route new samples through the structure using the principal components of previous experiences, facilitating efficient (logarithmic) access to relevant memories. We demonstrate that EMT outperforms existing online memory approaches, and provide a hybridized EMT-parametric algorithm that enjoys drastically improved performance over purely parametric methods with nearly no downsides. Our findings are validated using 206 datasets from OpenML repository in both bounded and infinite memory budget situations. ", "keywords": "Episodic Memory;Contextual Bandits;Sequential Learning", "primary_area": "", "supplementary_material": "/attachment/d30246ca2cbb71d0e4ec0419f0726dd3698c32cf.zip", "author": "Mark Rucker;Jordan T. Ash;John Langford;Paul Mineiro;Ida Momennejad", "authorids": "~Mark_Rucker1;~Jordan_T._Ash1;~John_Langford1;~Paul_Mineiro1;~Ida_Momennejad1", "gender": ";;M;;F", "homepage": "https://markrucker.net;http://www.jordantash.com;http://hunch.net/~jl;;https://www.momen-nejad.org", "dblp": "205/8066;176/5225;77/4488;35/5613;", "google_scholar": "SOXp7LEAAAAJ;bmRNH-UAAAAJ;LFiqVpwAAAAJ;;https://scholar.google.de/citations?user=OFdUAJwAAAAJ", "orcid": ";;;;0000-0003-0830-3973", "linkedin": ";;;;ida-momennejad-8661a710/", "or_profile": "~Mark_Rucker1;~Jordan_T._Ash1;~John_Langford1;~Paul_Mineiro1;~Ida_Momennejad1", "aff": "University of Virginia, Charlottesville;Microsoft Research;Microsoft;;Microsoft Research", "aff_domain": "virginia.edu;research.microsoft.com;microsoft.com;;research.microsoft.com", "position": "PhD student;Postdoc;Researcher;;Principal Researcher", "bibtex": "@misc{\nrucker2023eigen,\ntitle={Eigen Memory Trees},\nauthor={Mark Rucker and Jordan T. Ash and John Langford and Paul Mineiro and Ida Momennejad},\nyear={2023},\nurl={https://openreview.net/forum?id=1DOS0kifqeP}\n}", "github": "", "project": "", "reviewers": "UauN;ibPD;RGJi", "site": "https://openreview.net/forum?id=1DOS0kifqeP", "pdf_size": 1560477, "recommendation": "3;3;6", "confidence": "3;2;2", "correctness": "4;3;3", "technical_novelty": "1;2;2", "empirical_novelty": "2;2;3", "wc_summary_paper": "118;16;73", "wc_strength_and_weaknesses": "50;71;128", "wc_clarity_quality_novelty_and_reproducibility": "44;33;8", "wc_summary_review": "56;11;4", "wc_review": "268;131;213", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "239;173;151", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 69.0, 41.737273509418415 ], "wc_strength_and_weaknesses_avg": [ 83.0, 32.95451410656816 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.333333333333332, 15.062831370260005 ], "wc_summary_review_avg": [ 23.666666666666668, 23.041026211713937 ], "wc_review_avg": [ 204.0, 56.290911048469155 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 187.66666666666666, 37.39280976634709 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UiBseiDaNb0J:scholar.google.com/&scioq=Eigen+Memory+Trees&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Virginia;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.virginia.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "UVA;MSR", "aff_campus_unique_index": "0", "aff_campus_unique": "Charlottesville;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "1EVPT82ttr", "title": "Learning Unified Representations for Multi-Resolution Face Recognition", "track": "main", "status": "Reject", "tldr": "We propose Branch-to-Trunk Network to learn discriminative embeddings for multi-resolution face recognition while preserving representation compatibility.", "abstract": "In this work, we propose Branch-to-Trunk network (BTNet), a novel representation learning method for multi-resolution face recognition. It consists of a trunk network (TNet), namely a unified encoder, and multiple branch networks (BNets), namely resolution adapters. As per the input, a resolution-specific BNet is used and the output are implanted as feature maps in the feature pyramid of TNet, at a layer with the same resolution. The discriminability of tiny faces is significantly improved, as the interpolation error introduced by rescaling, especially up-sampling, is mitigated on the inputs. With branch distillation and backward-compatible training, BTNet transfers discriminative high-resolution information to multiple branches while guaranteeing representation compatibility. Our experiments demonstrate strong performance on face recognition benchmarks, both for multi-resolution face verification and face identification, with much less computation amount and parameter storage. We establish new state-of-the-art on the challenging QMUL-SurvFace 1: N face identification task.", "keywords": "multi-resolution face recognition;deep representation learning", "primary_area": "", "supplementary_material": "/attachment/a57a5b3c00fb1602854136edba4379dcd126bc09.zip", "author": "Hulingxiao He;Wu Yuan;Yidian Huang;Shilong Zhao;Wen Yuan;HanQing Li", "authorids": "~Hulingxiao_He1;~Wu_Yuan1;~Yidian_Huang1;~Shilong_Zhao1;~Wen_Yuan1;~HanQing_Li2", "gender": "M;M;;M;M;M", "homepage": "https://github.com/hlxhe;https://cs.bit.edu.cn/szdw/jsml/fjs/yw/index.htm;https://github.com/Huang-Yidian;;http://www.igsnrr.cas.cn/sourcedb_igsnrr_cas/zw/dsjs/sssds/dtxydlxxxt/201307/t20130702_3890188.html;https://github.com/constlhq", "dblp": ";;359/0964;;;", "google_scholar": ";;;;;", "orcid": ";;0009-0006-8267-6590;;;", "linkedin": ";;;https://www.linkedin.cn/incareer/in/%E4%B8%96%E9%BE%99-%E8%B5%B5-bb031a23a;;", "or_profile": "~Hulingxiao_He1;~Wu_Yuan1;~Yidian_Huang1;~Shilong_Zhao1;~Wen_Yuan1;~HanQing_Li2", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Institute of Geographic Sciences and Natural Resources Research, Chinese Academy of Sciences;", "aff_domain": "bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;igsnrr.ac.cn;", "position": "Undergrad student;Lecturer;MS student;Undergrad student;Associate Professor;", "bibtex": "@misc{\nhe2023learning,\ntitle={Learning Unified Representations for Multi-Resolution Face Recognition},\nauthor={Hulingxiao He and Wu Yuan and Yidian Huang and Shilong Zhao and Wen Yuan and HanQing Li},\nyear={2023},\nurl={https://openreview.net/forum?id=1EVPT82ttr}\n}", "github": "", "project": "", "reviewers": "JeWQ;FLmy;Lmwt;yRHk", "site": "https://openreview.net/forum?id=1EVPT82ttr", "pdf_size": 10915076, "recommendation": "3;5;5;5", "confidence": "4;4;4;4", "correctness": "2;4;3;3", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "93;48;54;58", "wc_strength_and_weaknesses": "121;169;279;101", "wc_clarity_quality_novelty_and_reproducibility": "25;41;18;14", "wc_summary_review": "49;12;4;65", "wc_review": "288;270;355;238", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 63.25, 17.541023345289748 ], "wc_strength_and_weaknesses_avg": [ 167.5, 68.95469527160569 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 24.5, 10.307764064044152 ], "wc_summary_review_avg": [ 32.5, 25.30316185775999 ], "wc_review_avg": [ 287.75, 42.75730931665368 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:l2Kv4lRAkTMJ:scholar.google.com/&scioq=Learning+Unified+Representations+for+Multi-Resolution+Face+Recognition&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Beijing Institute of Technology;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Geographic Sciences and Natural Resources Research", "aff_unique_url": "http://www.bit.edu.cn/;http://www.igsnrr.cas.cn", "aff_unique_abbr": "BIT;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "1FVv8PS8LYW", "title": "Dr-Fairness: Dynamic Data Ratio Adjustment for Fair Training on Real and Generated Data", "track": "main", "status": "Reject", "tldr": "We propose a novel sampling approach called Dr-Fairness that adaptively adjusts data ratios among groups and between real and generated data, which improves group fairness while minimizing accuracy degradation.", "abstract": "Fair visual recognition has become critical for preventing demographic disparity. A major cause of model unfairness is the imbalanced representation of different groups in training data. Recently, several works aim to alleviate this issue using generated data. However, these approaches often use generated data to obtain similar amounts of data across groups, which is not optimal for achieving high fairness due to different learning difficulties and generated data qualities across groups. To address this issue, we propose a novel adaptive sampling approach that leverages both real and generated data for fairness. We design a bilevel optimization that finds the optimal data sampling ratios among groups and between real and generated data while training a model. The ratios are dynamically adjusted considering both the model's accuracy as well as its fairness. To efficiently solve our non-convex bilevel optimization, we propose a simple approximation to the solution given by the implicit function theorem. Extensive experiments show that our framework achieves state-of-the-art fairness and accuracy on the CelebA and ImageNet People Subtree datasets. We also observe that our method adaptively relies less on the generated data when it has poor quality.", "keywords": "trustworthy AI;fairness;visual recognition;generated data", "primary_area": "", "supplementary_material": "", "author": "Yuji Roh;Weili Nie;De-An Huang;Steven Euijong Whang;Arash Vahdat;Anima Anandkumar", "authorids": "~Yuji_Roh1;~Weili_Nie1;~De-An_Huang1;~Steven_Euijong_Whang1;~Arash_Vahdat3;~Anima_Anandkumar1", "gender": "F;M;M;M;M;F", "homepage": ";https://weilinie.github.io/;http://ai.stanford.edu/~dahuang/;http://www.stevenwhang.com;http://latentspace.cc/;http://tensorlab.cms.caltech.edu/users/anima/", "dblp": "230/3981;147/4786;119/0335;w/StevenEuijongWhang;92/8108;", "google_scholar": ";zW7BH7oAAAAJ;HEY3UzgAAAAJ;w6hts30AAAAJ;https://scholar.google.ca/citations?user=p9-nlRIAAAAJ;bEcLezcAAAAJ", "orcid": ";;;0000-0001-6419-931X;;", "linkedin": ";;;steven-euijong-whang-1612b5a/;;anima-anandkumar-35171b1/", "or_profile": "~Yuji_Roh1;~Weili_Nie1;~De-An_Huang1;~Steven_Euijong_Whang1;~Arash_Vahdat3;~anima_anandkumar1", "aff": "Korea Advanced Institute of Science & Technology;NVIDIA;NVIDIA;Korea Advanced Institute of Science & Technology;NVIDIA;California Institute of Technology", "aff_domain": "kaist.ac.kr;nvidia.com;nvidia.com;kaist.ac.kr;nvidia.com;caltech.edu", "position": "PhD student;Research Scientist;Research Scientist;Associate Professor;Research Scientist;Full Professor", "bibtex": "@misc{\nroh2023drfairness,\ntitle={Dr-Fairness: Dynamic Data Ratio Adjustment for Fair Training on Real and Generated Data},\nauthor={Yuji Roh and Weili Nie and De-An Huang and Steven Euijong Whang and Arash Vahdat and Anima Anandkumar},\nyear={2023},\nurl={https://openreview.net/forum?id=1FVv8PS8LYW}\n}", "github": "", "project": "", "reviewers": "SsCr;geXT;ZWfo;niP4", "site": "https://openreview.net/forum?id=1FVv8PS8LYW", "pdf_size": 1749114, "recommendation": "5;5;5;5", "confidence": "4;5;4;2", "correctness": "3;3;3;3", "technical_novelty": "2;3;1;2", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "27;71;54;60", "wc_strength_and_weaknesses": "187;262;524;177", "wc_clarity_quality_novelty_and_reproducibility": "40;20;15;41", "wc_summary_review": "37;35;98;21", "wc_review": "291;388;691;299", "wc_reply_reviewers": "0;79;113;0", "wc_reply_authors": "1062;880;1911;796", "reply_reviewers": "0;1;1;0", "reply_authors": "3;4;4;3", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 53.0, 16.20185174601965 ], "wc_strength_and_weaknesses_avg": [ 287.5, 140.43948874871342 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.0, 11.640446726822816 ], "wc_summary_review_avg": [ 47.75, 29.65952629426168 ], "wc_review_avg": [ 417.25, 162.57056160326198 ], "wc_reply_reviewers_avg": [ 48.0, 49.48232007495202 ], "wc_reply_authors_avg": [ 1162.25, 442.8545895663722 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=777624293537908811&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;1;0;1;2", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;NVIDIA;California Institute of Technology", "aff_unique_dep": ";NVIDIA Corporation;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.nvidia.com;https://www.caltech.edu", "aff_unique_abbr": "KAIST;NVIDIA;Caltech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pasadena", "aff_country_unique_index": "0;1;1;0;1;1", "aff_country_unique": "South Korea;United States" }, { "id": "1FsLDqHivn4", "title": "Music-to-Text Synaesthesia: Generating Descriptive Text from Music Recordings", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this paper, we consider a novel research problem, music-to-text synaesthesia. Different from the classical music tagging problem that classifies a music recording into pre-defined categories, the music-to-text synaesthesia aims to generate descriptive texts from music recordings for further understanding. Although this is a new and interesting application to the machine learning community, to our best knowledge, the existing music-related datasets do not contain the semantic description on music recordings and cannot serve the music-to-text synaesthesia task. In light of this, we collect a new dataset that contains 1,955 aligned pairs of classical music recordings and text descriptions. Based on this, we build a computational model to generate sentences that can describe the content of the music recording. To tackle the highly non-discriminative classical music, we design a group topology-preservation loss in our computational model, which considers more samples as a group reference and preserves the relative topology among different samples. Extensive experimental results qualitatively and quantitatively demonstrate the effectiveness of our proposed model over five heuristics or pre-trained competitive methods and their variants on our collected dataset.", "keywords": "Multi-modal Learning;Music Description;Text Generation", "primary_area": "", "supplementary_material": "", "author": "Zhihuan Kuang;Shi Zong;Jianbing Zhang;Jiajun Chen;Hongfu Liu", "authorids": "~Zhihuan_Kuang1;~Shi_Zong1;~Jianbing_Zhang1;~Jiajun_Chen1;~Hongfu_Liu2", "gender": "M;;M;M;M", "homepage": "https://kuangkzh.github.io/;;https://cs.nju.edu.cn/zhangjb/;https://cs.nju.edu.cn/chenjiajun/index_en.htm;http://hongfuliu.com/", "dblp": ";;11/6084;;32/9075-1", "google_scholar": ";;;https://scholar.google.com.tw/citations?user=WIF7VaoAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Zhihuan_Kuang1;~Shi_Zong1;~Jianbing_Zhang1;~Jiajun_Chen1;~Hongfu_Liu2", "aff": "Nanjing University;;Nanjing University;Nanjing University;Brandeis University", "aff_domain": "nju.edu.cn;;nju.edu.cn;nju.edu.cn;brandeis.edu", "position": "MS student;;Associate Professor;Full Professor;Assistant Professor", "bibtex": "@misc{\nkuang2023musictotext,\ntitle={Music-to-Text Synaesthesia: Generating Descriptive Text from Music Recordings},\nauthor={Zhihuan Kuang and Shi Zong and Jianbing Zhang and Jiajun Chen and Hongfu Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=1FsLDqHivn4}\n}", "github": "", "project": "", "reviewers": "DjXC;TFVv;fsng;ahJ7", "site": "https://openreview.net/forum?id=1FsLDqHivn4", "pdf_size": 1454431, "recommendation": "3;3;5;5", "confidence": "3;3;3;3", "correctness": "2;2;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "1;3;3;2", "wc_summary_paper": "99;44;25;84", "wc_strength_and_weaknesses": "568;359;51;595", "wc_clarity_quality_novelty_and_reproducibility": "31;101;60;227", "wc_summary_review": "141;29;235;128", "wc_review": "839;533;371;1034", "wc_reply_reviewers": "465;0;0;0", "wc_reply_authors": "1261;560;601;1522", "reply_reviewers": "1;0;0;0", "reply_authors": "3;1;1;2", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 63.0, 29.75735203273302 ], "wc_strength_and_weaknesses_avg": [ 393.25, 217.6859837012939 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 104.75, 74.83440051206397 ], "wc_summary_review_avg": [ 133.25, 72.98758456066346 ], "wc_review_avg": [ 694.25, 258.29960801363984 ], "wc_reply_reviewers_avg": [ 116.25, 201.350906379882 ], "wc_reply_authors_avg": [ 986.0, 416.11957416108174 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6716796178185708954&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Nanjing University;Brandeis University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.brandeis.edu", "aff_unique_abbr": "Nanjing U;Brandeis", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "1FsdIfRngtw", "title": "Rethinking the Value of Prompt Learning for Vision-Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large-scale visual-language pre-training like CLIP has demonstrated great success in open-set visual concept learning that enables zero-shot transfer to downstream tasks through prompting. To automate prompt engineering, prompt learning is proposed to automatically learn the optimal task-relevant prompts. In this paper, we make some surprising observations that contradict common beliefs about prompts. We observe that even random prompts can achieve pretty good performance for zero-shot recognition. We also find that prompt learning gives comparable or worse performance than directly fine-tuning of the linear classifier. Moreover, prompt learning is no more than parameter-efficient learning, and is a trade-off between optimality and generalization. Our results highlight the need for the rethinking of existing prompt learning, more careful baseline evaluations in future research on prompt learning methods in vision-language models. ", "keywords": "Prompt Tuning;Visual-Language Pre-training", "primary_area": "", "supplementary_material": "", "author": "Peisong Wang;Weihan Chen;Weixiang Xu;Qinghao Hu;Jian Cheng", "authorids": "~Peisong_Wang1;~Weihan_Chen1;~Weixiang_Xu2;~Qinghao_Hu2;~Jian_Cheng7", "gender": "M;M;M;M;M", "homepage": ";;;https://people.ucas.ac.cn/~chengjian?language=en;", "dblp": "187/5474;250/2516;;14/6145-1;14/5824", "google_scholar": "UYFZpk4AAAAJ;;fjuWXroAAAAJ;ZGCIUJ8AAAAJ;", "orcid": ";;0000-0003-0422-5509;0000-0003-1289-2758;", "linkedin": ";;;;", "or_profile": "~Peisong_Wang1;~Weihan_Chen1;~Qinghao_Hu2;~Jian_Cheng7;~Weixiang_Xu1", "aff": "Institute of Automation of\uff0cChinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn", "position": "Associate Professor;PhD student;Associate Professor;Full Professor;PhD student", "bibtex": "@misc{\nwang2023rethinking,\ntitle={Rethinking the Value of Prompt Learning for Vision-Language Models},\nauthor={Peisong Wang and Weihan Chen and Weixiang Xu and Qinghao Hu and Jian Cheng},\nyear={2023},\nurl={https://openreview.net/forum?id=1FsdIfRngtw}\n}", "github": "", "project": "", "reviewers": "2dEp;Tvm8;pGd5;4WBY", "site": "https://openreview.net/forum?id=1FsdIfRngtw", "pdf_size": 568652, "recommendation": "3;3;3;5", "confidence": "4;5;5;3", "correctness": "4;2;2;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "352;109;189;89", "wc_strength_and_weaknesses": "167;792;170;38", "wc_clarity_quality_novelty_and_reproducibility": "42;30;42;56", "wc_summary_review": "39;48;60;22", "wc_review": "600;979;461;205", "wc_reply_reviewers": "0;89;0;0", "wc_reply_authors": "302;657;645;298", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 184.75, 103.5576530247765 ], "wc_strength_and_weaknesses_avg": [ 291.75, 293.6940372224128 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.5, 9.205976319760984 ], "wc_summary_review_avg": [ 42.25, 13.863170633011771 ], "wc_review_avg": [ 561.25, 279.7234124988468 ], "wc_reply_reviewers_avg": [ 22.25, 38.53813046840752 ], "wc_reply_authors_avg": [ 475.5, 175.55697081004786 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8703882797784891, "corr_recommendation_correctness": 0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iLnFjJOpjQoJ:scholar.google.com/&scioq=Rethinking+the+Value+of+Prompt+Learning+for+Vision-Language+Models&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MCAL: Minimum Cost Human-Machine Active Labeling", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12195", "id": "1FxRPKrH8bw", "poster": "", "openreview": "https://openreview.net/forum?id=1FxRPKrH8bw", "slides": "https://iclr.cc/virtual/2023/poster/12195", "video": "https://iclr.cc/virtual/2023/poster/12195", "author_site": "Hang Qiu, Krishna Chintalapudi, Ramesh Govindan", "tldr": "A framework to address the prohibitive data labeling cost challenge using hybrid human-machine labeling.", "abstract": "Today, ground-truth generation uses data sets annotated by cloud-based annotation services. These services rely on human annotation, which can be prohibitively expensive. In this paper, we consider the problem of hybrid human-machine labeling, which trains a classifier to accurately auto-label part of the data set. However, training the classifier can be expensive too. We propose an iterative approach that minimizes total overall cost by, at each step, jointly determining which samples to label using humans and which to label using the trained classifier. We validate our approach on well known public data sets such as Fashion-MNIST, CIFAR-10, CIFAR-100, and ImageNet. In some cases, our approach has 6\u00d7 lower overall cost relative to human labeling the entire data set, and is always cheaper than the cheapest competing strategy.", "keywords": "Active Labeling;Groundtruth Annotation;Dataset Labeling", "primary_area": "", "supplementary_material": "", "author": "Hang Qiu;Krishna Chintalapudi;Ramesh Govindan", "authorids": "~Hang_Qiu1;krchinta@microsoft.com;~Ramesh_Govindan1", "gender": ";;M", "homepage": "https://hangqiu.github.io/;;https://govindan.usc.edu", "dblp": "20/1303;;", "google_scholar": "9i_MgykAAAAJ;;", "orcid": "0000-0003-1206-9032;;", "linkedin": ";;", "or_profile": "~Hang_Qiu1;krchinta@microsoft.com;~Ramesh_Govindan1", "aff": "Waymo LLC;;University of Southern California", "aff_domain": "waymo.com;;usc.edu", "position": "Researcher;;Researcher", "bibtex": "@inproceedings{\nqiu2023mcal,\ntitle={{MCAL}: Minimum Cost Human-Machine Active Labeling},\nauthor={Hang Qiu and Krishna Chintalapudi and Ramesh Govindan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=1FxRPKrH8bw}\n}", "github": "", "project": "", "reviewers": "hXWK;9gqM;Dg4t", "pdf_size": 800622, "recommendation": "5;6;8", "confidence": "3;4;3", "correctness": "3;3;3", "technical_novelty": "2;3;2", "empirical_novelty": "2;3;3", "wc_summary_paper": "119;49;91", "wc_strength_and_weaknesses": "685;290;218", "wc_clarity_quality_novelty_and_reproducibility": "11;62;44", "wc_summary_review": "19;186;59", "wc_review": "834;587;412", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "721;432;501", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 86.33333333333333, 28.767265347188555 ], "wc_strength_and_weaknesses_avg": [ 397.6666666666667, 205.29057996464968 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.0, 21.118712081942874 ], "wc_summary_review_avg": [ 88.0, 71.19456908126256 ], "wc_review_avg": [ 611.0, 173.11460558447016 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 551.3333333333334, 123.23509601119677 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.18898223650461363, "corr_recommendation_correctness": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17121763102609470971&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=1FxRPKrH8bw", "email": "waymo.com;;usc.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Waymo;University of Southern California", "aff_unique_dep": ";", "aff_unique_url": "https://www.waymo.com;https://www.usc.edu", "aff_unique_abbr": "Waymo;USC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "1Imd7_uamo", "title": "How you start matters for generalization", "track": "main", "status": "Withdraw", "tldr": "We promote a shift of focus towards initialization rather than neural architecture or (stochastic) gradient descent to explain this implicit regularization", "abstract": "Characterizing the remarkable generalization properties of over-parameterized neural networks remains an open problem. A growing body of recent literature shows that the bias of stochastic gradient descent (SGD) and architecture choice implicitly leads to better generalization. In this paper, we show on the contrary that, independently of architecture, SGD can itself be the cause of poor generalization if one does not ensure good initialization. Specifically, we prove that any differentiably parameterized model, trained under gradient flow, obeys a weak spectral bias law which states that sufficiently high frequencies train arbitrarily slowly. This implies that very high frequencies present at initialization will remain after training, and hamper generalization. Further, we empirically test the developed theoretical insights using practical, deep networks. Finally, we contrast our framework with that supplied by the \\emph{flat-minima} conjecture and show that Fourier analysis grants a more reliable framework for understanding the generalization of neural networks.", "keywords": "spectral bias;generalization", "primary_area": "", "supplementary_material": "/attachment/3402f1e6b1ba9f477bb861d85e7701c9426f5d26.zip", "author": "Sameera Ramasinghe;Lachlan Ewen MacDonald;Moshiur R Farazi;Hemanth Saratchandran;Simon Lucey", "authorids": "~Sameera_Ramasinghe1;~Lachlan_Ewen_MacDonald1;~Moshiur_R_Farazi1;~Hemanth_Saratchandran1;~Simon_Lucey2", "gender": "M;;M;;M", "homepage": ";https://researchers.adelaide.edu.au/profile/lachlan.macdonald;http://www.moshiurfarazi.com/;;https://www.adelaide.edu.au/directory/simon.lucey", "dblp": "181/4514;306/7691;165/8023;;01/3542", "google_scholar": "https://scholar.google.com.au/citations?user=-j0m9aMAAAAJ;r953DlQAAAAJ;https://scholar.google.ca/citations?user=qoytZAMAAAAJ;;vmAe35UAAAAJ", "orcid": ";;;;", "linkedin": ";;moshiur-farazi;;", "or_profile": "~Sameera_Ramasinghe1;~Lachlan_Ewen_MacDonald1;~Moshiur_R_Farazi1;~Hemanth_Saratchandran1;~Simon_Lucey2", "aff": "Amazon;University of Adelaide;CSIRO;;University of Adelaide", "aff_domain": "amazon.com;adelaide.edu.au;csiro.au;;adelaide.edu.au", "position": "Researcher;Postdoc;Reseach Scientist;;Full Professor", "bibtex": "@misc{\nramasinghe2023how,\ntitle={How you start matters for generalization},\nauthor={Sameera Ramasinghe and Lachlan Ewen MacDonald and Moshiur R Farazi and Hemanth Saratchandran and Simon Lucey},\nyear={2023},\nurl={https://openreview.net/forum?id=1Imd7_uamo}\n}", "github": "", "project": "", "reviewers": "wraX;TChc;uoEM", "site": "https://openreview.net/forum?id=1Imd7_uamo", "pdf_size": 3580048, "recommendation": "3;3;6", "confidence": "3;3;2", "correctness": "3;2;4", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "56;71;130", "wc_strength_and_weaknesses": "240;258;117", "wc_clarity_quality_novelty_and_reproducibility": "49;89;83", "wc_summary_review": "27;17;15", "wc_review": "372;435;345", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 85.66666666666667, 31.94091767971331 ], "wc_strength_and_weaknesses_avg": [ 205.0, 62.65780079128216 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.66666666666667, 17.613126418163876 ], "wc_summary_review_avg": [ 19.666666666666668, 5.2493385826745405 ], "wc_review_avg": [ 384.0, 37.70941526992961 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.8660254037844387, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2297655539788762053&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Amazon;University of Adelaide;Commonwealth Scientific and Industrial Research Organisation", "aff_unique_dep": "Amazon.com, Inc.;;", "aff_unique_url": "https://www.amazon.com;https://www.adelaide.edu.au;https://www.csiro.au", "aff_unique_abbr": "Amazon;Adelaide;CSIRO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;Australia" }, { "title": "Differentiable Mathematical Programming for Object-Centric Representation Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11838", "id": "1J-ZTr7aypY", "poster": "", "openreview": "https://openreview.net/forum?id=1J-ZTr7aypY", "slides": "https://iclr.cc/virtual/2023/poster/11838", "video": "https://iclr.cc/virtual/2023/poster/11838", "author_site": "Adeel Pervez, Phillip Lippe, Efstratios Gavves", "tldr": "", "abstract": "We propose topology-aware feature partitioning into $k$ disjoint partitions for given scene features as a method for object-centric representation learning. To this end, we propose to use minimum $s$-$t$ graph cuts as a partitioning method which is represented as a linear program. The method is topologically aware since it explicitly encodes neighborhood relationships in the image graph. To solve the graph cuts our solution relies on an efficient, scalable, and differentiable quadratic programming approximation. Optimizations specific to cut problems allow us to solve the quadratic programs and compute their gradients significantly more efficiently compared with the general quadratic programming approach. Our results show that our approach is scalable and outperforms existing methods on object discovery tasks with textured scenes and objects.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Adeel Pervez;Phillip Lippe;Efstratios Gavves", "authorids": "~Adeel_Pervez1;~Phillip_Lippe1;~Efstratios_Gavves1", "gender": ";M;M", "homepage": ";https://phlippe.github.io;https://www.egavves.com", "dblp": "225/4821;267/9431;03/8693", "google_scholar": ";69hFZp4AAAAJ;https://scholar.google.nl/citations?user=QqfCvsgAAAAJ", "orcid": ";0000-0002-3639-6938;", "linkedin": ";phillip-lippe/;", "or_profile": "~Adeel_Pervez1;~Phillip_Lippe1;~Efstratios_Gavves1", "aff": "University of Amsterdam;Google DeepMind;University of Amsterdam", "aff_domain": "uva.nl;google.com;uva.nl", "position": "PhD student;Intern;Associate Professor", "bibtex": "@inproceedings{\npervez2023differentiable,\ntitle={Differentiable Mathematical Programming for Object-Centric Representation Learning},\nauthor={Adeel Pervez and Phillip Lippe and Efstratios Gavves},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=1J-ZTr7aypY}\n}", "github": "", "project": "", "reviewers": "wmJ4;9nSj;eQdL;miwq", "pdf_size": 17872377, "recommendation": "5;5;8;8", "confidence": "3;3;5;1", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;2;3;0", "wc_summary_paper": "42;65;116;28", "wc_strength_and_weaknesses": "156;235;157;70", "wc_clarity_quality_novelty_and_reproducibility": "56;15;41;77", "wc_summary_review": "21;34;57;62", "wc_review": "275;349;371;237", "wc_reply_reviewers": "0;0;16;0", "wc_reply_authors": "1075;1049;472;276", "reply_reviewers": "0;0;1;0", "reply_authors": "2;2;2;1", "recommendation_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.0, 1.4142135623730951 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 62.75, 33.46173187388842 ], "wc_strength_and_weaknesses_avg": [ 154.5, 58.37165407969865 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.25, 22.587330519563395 ], "wc_summary_review_avg": [ 43.5, 16.740669042783207 ], "wc_review_avg": [ 308.0, 54.26785420486054 ], "wc_reply_reviewers_avg": [ 4.0, 6.928203230275509 ], "wc_reply_authors_avg": [ 718.0, 351.03062544456145 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896258, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9861389134795961288&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=1J-ZTr7aypY", "email": "uva.nl;google.com;uva.nl", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Amsterdam;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.uva.nl;https://deepmind.com", "aff_unique_abbr": "UvA;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Netherlands;United Kingdom" }, { "id": "1KaSx3GrBBm", "title": "Moving Beyond Handcrafted Architectures in Self-Supervised Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "The current literature on self-supervised learning (SSL) focuses on developing learning objectives to train neural networks more effectively on unlabeled data. The typical development process involves taking well-established architectures, e.g., ResNet demonstrated on ImageNet, and using them to evaluate newly developed objectives on downstream scenarios. While convenient, this does not take into account the role of architectures which has been shown to be crucial in the supervised learning literature. In this work, we establish extensive evidence showing that architecture plays a significant role in SSL. We conduct a large-scale study with over 100 variants of ResNet and MobileNet architectures and evaluate them across 11 downstream scenarios in the SSL setting. We show that there is no one network that performs consistently well across the scenarios. Based on this, we propose to learn not only network weights but also architecture topologies in the SSL regime. We show that ``self-supervised architectures'' significantly outperform popular handcrafted architectures (ResNet-50 and MobileNetV2) on major image classification benchmarks (ImageNet-1K, iNat2021, and more). Our results suggest that it is time to consider moving beyond handcrafted architectures in SSL and start thinking about incorporating architecture search into self-supervised learning objectives.", "keywords": "NAS;Self-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Sharath Girish;Debadeepta Dey;Neel Joshi;Vibhav Vineet;Shital Shah;Caio Cesar Teodoro Mendes;Abhinav Shrivastava;Yale Song", "authorids": "~Sharath_Girish1;~Debadeepta_Dey1;~Neel_Joshi1;~Vibhav_Vineet5;~Shital_Shah1;~Caio_Cesar_Teodoro_Mendes1;~Abhinav_Shrivastava2;~Yale_Song1", "gender": ";M;;;M;M;M;M", "homepage": "https://sharath-girish.github.io/;http://www.debadeepta.com;;;http://shital.com;;http://abhinavsh.info;https://people.csail.mit.edu/yalesong", "dblp": "232/3030;76/10090;;;188/5763;20/11498;65/10572;31/9606.html", "google_scholar": "KRB9iksAAAAJ;uIBzJWIAAAAJ;;;1PEHzesAAAAJ;;mIF9BowAAAAJ;dNHNpxoAAAAJ", "orcid": "0000-0003-4364-0262;;;;;;0000-0001-8928-8554;", "linkedin": ";;;;http://www.linkedin.com/in/shitals;;;", "or_profile": "~Sharath_Girish1;~Debadeepta_Dey1;~Neel_Joshi1;~Vibhav_Vineet5;~Shital_Shah1;~Caio_Cesar_Teodoro_Mendes1;~Abhinav_Shrivastava2;~Yale_Song1", "aff": "University of Maryland, College Park;Microsoft Research;;;Microsoft Research;Microsoft;Department of Computer Science, University of Maryland, College Park;FAIR, Meta", "aff_domain": "umd.edu;microsoft.com;;;research.microsoft.com;microsoft.com;cs.umd.edu;meta.com", "position": "PhD student;Principal Researcher;;;Principal Research Engineer;Researcher;Assistant Professor;Research Scientist", "bibtex": "@misc{\ngirish2023moving,\ntitle={Moving Beyond Handcrafted Architectures in Self-Supervised Learning},\nauthor={Sharath Girish and Debadeepta Dey and Neel Joshi and Vibhav Vineet and Shital Shah and Caio Cesar Teodoro Mendes and Abhinav Shrivastava and Yale Song},\nyear={2023},\nurl={https://openreview.net/forum?id=1KaSx3GrBBm}\n}", "github": "", "project": "", "reviewers": "JRjb;ZDfX;5FVM;6dkS", "site": "https://openreview.net/forum?id=1KaSx3GrBBm", "pdf_size": 5450143, "recommendation": "3;3;6;6", "confidence": "5;3;4;4", "correctness": "2;3;4;3", "technical_novelty": "2;1;3;2", "empirical_novelty": "2;1;3;2", "wc_summary_paper": "43;39;66;110", "wc_strength_and_weaknesses": "436;224;179;122", "wc_clarity_quality_novelty_and_reproducibility": "19;87;8;234", "wc_summary_review": "51;50;14;101", "wc_review": "549;400;267;567", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "822;627;662;479", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 64.5, 28.217902119044926 ], "wc_strength_and_weaknesses_avg": [ 240.25, 118.65575207296105 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 87.0, 90.10271915985666 ], "wc_summary_review_avg": [ 54.0, 30.95965116082544 ], "wc_review_avg": [ 445.75, 121.86749976921656 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 647.5, 121.93543373441537 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.7071067811865476, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:y3Ya5dA6gDsJ:scholar.google.com/&scioq=Moving+Beyond+Handcrafted+Architectures+in+Self-Supervised+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;2;3", "aff_unique_norm": "University of Maryland;Microsoft;University of Maryland, College Park;Meta", "aff_unique_dep": ";Microsoft Research;Department of Computer Science;Facebook AI Research (FAIR)", "aff_unique_url": "https://www/umd.edu;https://www.microsoft.com/en-us/research;https://www/umd.edu;https://meta.com", "aff_unique_abbr": "UMD;MSR;UMD;Meta", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "1KtU2ya2zh5", "title": "META-STORM: Generalized Fully-Adaptive Variance Reduced SGD for Unbounded Functions", "track": "main", "status": "Reject", "tldr": "We propose new fully adaptive variance reduced algorithms removing bounded function values and bounded gradients assumptions and improving upon previous work both in the theoretical convergence rate and empirical performance.", "abstract": "We study the application of variance reduction (VR) techniques to general non-convex stochastic optimization problems. In this setting, the recent work STORM (Cutkosky & Orabona, 2019) overcomes the drawback of having to compute gradients of \u201cmega-batches\u201d that earlier VR methods rely on. There, STORM utilizes recursive momentum to achieve the VR effect and is then later made fully adaptive in STORM+ (Levy et al., 2021), where full-adaptivity removes the requirement for obtaining certain problem-specific parameters such as the smoothness of the objective and bounds on the variance and norm of the stochastic gradients in order to set the step size. However, STORM+ crucially relies on the assumption that the function values are bounded, excluding a large class of useful functions. In this work, we propose META-STORM, a generalized framework of STORM+ that removes this bounded function values assumption while still attaining the optimal convergence rate for non-convex optimization. META-STORM not only maintains full-adaptivity, removing the need to obtain problem specific parameters, but also improves the convergence rate\u2019s dependency on the problem parameters. Furthermore, META-STORM can utilize a large range of parameter settings that subsumes previous methods allowing for more flexibility in a wider range of settings. Finally, we demonstrate the effectiveness of META-STORM through experiments across common deep learning tasks. Our algorithm improves upon the previous work STORM+ and is competitive with widely used algorithms after the addition of per-coordinate update and exponential moving average heuristics.", "keywords": "Nonconvex Optimization;Stochastic Optimization;Adaptive Algorithms;Variance Reduction", "primary_area": "", "supplementary_material": "", "author": "Zijian Liu;Ta Duy Nguyen;Thien Hang Nguyen;Alina Ene;Huy Nguyen", "authorids": "~Zijian_Liu1;~Ta_Duy_Nguyen1;~Thien_Hang_Nguyen1;~Alina_Ene1;~Huy_Nguyen1", "gender": ";;;;M", "homepage": ";https://nguyentaduy.github.io/;;;https://www.khoury.northeastern.edu/~hlnguyen/", "dblp": ";;;;62/3796", "google_scholar": ";;;;https://scholar.google.com.tw/citations?user=MDCu0WEAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Zijian_Liu1;~Ta_Duy_Nguyen1;~Thien_Hang_Nguyen1;~Alina_Ene1;~Huy_Nguyen1", "aff": ";Boston University;;;Northeastern University", "aff_domain": ";bu.edu;;;northeastern.edu", "position": ";PhD student;;;Associate Professor", "bibtex": "@misc{\nliu2023metastorm,\ntitle={{META}-{STORM}: Generalized Fully-Adaptive Variance Reduced {SGD} for Unbounded Functions},\nauthor={Zijian Liu and Ta Duy Nguyen and Thien Hang Nguyen and Alina Ene and Huy Nguyen},\nyear={2023},\nurl={https://openreview.net/forum?id=1KtU2ya2zh5}\n}", "github": "", "project": "", "reviewers": "HMzj;KkGZ;caqd;XfXs", "site": "https://openreview.net/forum?id=1KtU2ya2zh5", "pdf_size": 928876, "recommendation": "5;5;6;6", "confidence": "3;3;4;3", "correctness": "3;3;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "83;38;33;52", "wc_strength_and_weaknesses": "272;113;212;109", "wc_clarity_quality_novelty_and_reproducibility": "57;51;14;21", "wc_summary_review": "60;28;33;18", "wc_review": "472;230;292;200", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 51.5, 19.474342094150447 ], "wc_strength_and_weaknesses_avg": [ 176.5, 68.86399639869879 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.75, 18.538810641462412 ], "wc_summary_review_avg": [ 34.75, 15.54630181104175 ], "wc_review_avg": [ 298.5, 105.52132485900658 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9907796259689268709&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Boston University;Northeastern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.bu.edu;https://www.northeastern.edu", "aff_unique_abbr": "BU;NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "1Lr5QxntGcM", "title": "NeuralEQ: Neural-Network-Based Equalizer for High-Speed Wireline Communication", "track": "main", "status": "Reject", "tldr": "", "abstract": "Rapid growth of ML applications demand high-performance computing systems to perform massive data processing. In such systems, I/O bandwidth must be scaled up to prevent any performance degradation due to the limited data transfer rates. To meet this demand, recently wireline communication started adopting PAM4 signaling and DSP-based equalizers. However, multi-level signaling and conventional equalizing techniques degrade the bit-error-rate (BER) performance significantly. To mitigate this problem, this paper proposes a novel neural network architecture that mimics the forward-backward algorithm estimating the posterior probabilities in Hidden Markov Models. The proposed neural network overcomes the existing equalizer performance such as feed-forward equalizers or decision-feedback equalizers, while reducing the complexity of the forward-backward algorithm.", "keywords": "Forward-backward algorithm;Equalizer;Neural network;BER", "primary_area": "", "supplementary_material": "/attachment/443dbdffeeca74532be0f651ff825ec205e3c0a0.zip", "author": "Hanseok Kim;Jae Hyung Ju;Hyun Seok Choi;Hyeri Roh;Woo-Seok Choi", "authorids": "~Hanseok_Kim1;hpotato@snu.ac.kr;tonyc1541@gmail.com;~Hyeri_Roh1;~Woo-Seok_Choi2", "gender": "M;;;;M", "homepage": ";;;;https://sites.google.com/view/wschoi", "dblp": ";;;;", "google_scholar": ";;;_xVOAwgAAAAJ;", "orcid": ";;;;", "linkedin": "hanseok-kim-39625337/;;;;", "or_profile": "~Hanseok_Kim1;hpotato@snu.ac.kr;tonyc1541@gmail.com;~Hyeri_Roh1;~Woo-Seok_Choi2", "aff": "Seoul National University;;;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;;;snu.ac.kr;snu.ac.kr", "position": "PhD student;;;PhD student;Assistant Professor", "bibtex": "@misc{\nkim2023neuraleq,\ntitle={Neural{EQ}: Neural-Network-Based Equalizer for High-Speed Wireline Communication},\nauthor={Hanseok Kim and Jae Hyung Ju and Hyun Seok Choi and Hyeri Roh and Woo-Seok Choi},\nyear={2023},\nurl={https://openreview.net/forum?id=1Lr5QxntGcM}\n}", "github": "", "project": "", "reviewers": "X5rc;Qiy5;HhyM", "site": "https://openreview.net/forum?id=1Lr5QxntGcM", "pdf_size": 1825215, "recommendation": "3;6;6", "confidence": "4;3;3", "correctness": "2;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;2;2", "wc_summary_paper": "23;86;23", "wc_strength_and_weaknesses": "204;343;69", "wc_clarity_quality_novelty_and_reproducibility": "26;6;28", "wc_summary_review": "36;12;69", "wc_review": "289;447;189", "wc_reply_reviewers": "0;22;0", "wc_reply_authors": "659;1074;245", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 44.0, 29.698484809834994 ], "wc_strength_and_weaknesses_avg": [ 205.33333333333334, 111.86400473590938 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 20.0, 9.93310961716756 ], "wc_summary_review_avg": [ 39.0, 23.366642891095847 ], "wc_review_avg": [ 308.3333333333333, 106.21152898291639 ], "wc_reply_reviewers_avg": [ 7.333333333333333, 10.370899457402697 ], "wc_reply_authors_avg": [ 659.3333333333334, 338.43791487098815 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:McMFGLS5wHgJ:scholar.google.com/&scioq=NeuralEQ:+Neural-Network-Based+Equalizer+for+High-Speed+Wireline+Communication&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "DaxBench: Benchmarking Deformable Object Manipulation with Differentiable Physics", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12149", "id": "1NAzMofMnWl", "poster": "", "openreview": "https://openreview.net/forum?id=1NAzMofMnWl", "slides": "https://iclr.cc/virtual/2023/poster/12149", "video": "https://iclr.cc/virtual/2023/poster/12149", "author_site": "Siwei Chen, Yiqing Xu, Cunjun Yu, Linfeng Li, Xiao Ma, Zhongwen Xu, David Hsu", "tldr": "", "abstract": "Deformable object manipulation (DOM) is a long-standing challenge in robotics and has attracted significant interest recently. This paper presents DaXBench, a differentiable simulation framework for DOM. While existing work often focuses on a specific type of deformable objects, DaXBench supports fluid, rope, cloth ...; it provides a general-purpose benchmark to evaluate widely different DOM methods, including planning, imitation learning, and reinforcement learning. DaXBench combines recent advances in deformable object simulation with JAX, a high-performance computational framework. All DOM tasks in DaXBench are wrapped with the OpenAI Gym API for easy integration with DOM algorithms. We hope that DaXBench provides to the research community a comprehensive, standardized benchmark and a valuable tool to support the development and evaluation of new DOM methods. The code and video are available online.", "keywords": "deformable object manipulation;differentiable physics;benchmark", "primary_area": "", "supplementary_material": "/attachment/b0144952beb552a91c4702cb44ce0530e1818bc1.zip", "author": "Siwei Chen;Yiqing Xu;Cunjun Yu;Linfeng Li;Xiao Ma;Zhongwen Xu;David Hsu", "authorids": "~Siwei_Chen3;~Yiqing_Xu1;~Cunjun_Yu1;~Linfeng_Li2;~Xiao_Ma2;~Zhongwen_Xu1;~David_Hsu1", "gender": "F;Unspecified;M;M;M;M;M", "homepage": "https://eeching.github.io/;;;https://yusufma03.github.io/;https://zhongwen.one/;http://www.comp.nus.edu.sg/~dyhsu/;", "dblp": "27/870;232/3014;;35/573-6;130/5077;29/331;88/10339", "google_scholar": "bJm1-QQAAAAJ;4xwyGM8AAAAJ;;hR4G6hoAAAAJ;https://scholar.google.co.uk/citations?user=T4xuHn8AAAAJ;S9LHLKEAAAAJ;", "orcid": ";;0000-0001-7536-4894;;;0000-0002-2309-4535;0000-0001-8384-8944", "linkedin": "yiqing-xu-2746a9166/;;;;;david-hsu-a86200a1/;", "or_profile": "~Yiqing_Xu1;~Cunjun_Yu1;~Linfeng_Li2;~Xiao_Ma2;~Zhongwen_Xu1;~David_Hsu1;~SIWEI_CHEN2", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;SEA AI Lab;Sea AI Lab;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;u.nus.edu;u.nus.edu;sea.com;sea.com;nus.edu.sg;nus.edu.sg", "position": "PhD student;PhD student;PhD student;Research Scientist;Principal Researcher;Professor;PhD student", "bibtex": "@inproceedings{\nchen2023daxbench,\ntitle={DaxBench: Benchmarking Deformable Object Manipulation with Differentiable Physics},\nauthor={Siwei Chen and Yiqing Xu and Cunjun Yu and Linfeng Li and Xiao Ma and Zhongwen Xu and David Hsu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=1NAzMofMnWl}\n}", "github": "", "project": "", "reviewers": "5D3R;4Kpq;yxby", "pdf_size": 1820504, "recommendation": "8;8;8", "confidence": "4;3;3", "correctness": "3;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "24;122;63", "wc_strength_and_weaknesses": "150;260;203", "wc_clarity_quality_novelty_and_reproducibility": "4;182;21", "wc_summary_review": "24;57;41", "wc_review": "202;621;328", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "681;960;906", "reply_reviewers": "0;0;0", "reply_authors": "1;2;2", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 69.66666666666667, 40.28509512076258 ], "wc_strength_and_weaknesses_avg": [ 204.33333333333334, 44.917207788948275 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 69.0, 80.20390680426152 ], "wc_summary_review_avg": [ 40.666666666666664, 13.474255287605157 ], "wc_review_avg": [ 383.6666666666667, 175.52650955213448 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 849.0, 120.82218339361361 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3691992468631333614&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=1NAzMofMnWl", "email": "u.nus.edu;u.nus.edu;u.nus.edu;sea.com;sea.com;nus.edu.sg;nus.edu.sg", "author_num": 7, "aff_unique_index": "0;0;0;1;1;0;0", "aff_unique_norm": "National University of Singapore;Sea AI Lab", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;", "aff_unique_abbr": "NUS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "Singapore;Unknown;" }, { "id": "1P8eOmWgdk", "title": "Model-free Reinforcement Learning that Transfers Using Random Reward Features", "track": "main", "status": "Reject", "tldr": "We develop model-free reinforcement learning algorithms that transfer across tasks using random features to approximate reward functions.", "abstract": "Favorable reinforcement learning (RL) algorithms should not only be able to synthesize controller for complex tasks, but also transfer across various such tasks. Classical model-free RL algorithms like Q-learning can be made stable, and has the potential to solve complicated tasks individually. However, rewards are key supervision signals in model-free approaches, making it challenging in general to transfer across multiple tasks with different reward functions. On the other hand, model-based RL algorithms, naturally transfers to various reward functions if the transition dynamics are learned well. Unfortunately, model-learning usually suffers from high dimensional observations and/or long horizons due to the challenges of compounding error. In this work, we propose a new way to transfer behaviors across problems with different reward functions that enjoy the best of both worlds. Specifically, we develop a model-free approach that implicitly learns the model without constructing the transition dynamics. This is achieved by using random features to generate reward functions in training, and incorporating model predictive control with open-loop policies in online planning. We show that the approach enables fast adaptation to problems with completely new reward functions, while scaling to high dimensional observations and long horizons. Moreover, our method can easily be trained on large offline datasets, and be quickly deployed on new tasks with good performance, making it more widely applicable than typical model-free and model-based RL methods. We evaluate the superior performance of our algorithm in a variety of RL and robotics domains.", "keywords": "Reinforcement Learning;Model-free;Transfer;Random Features", "primary_area": "", "supplementary_material": "", "author": "Boyuan Chen;Chuning Zhu;Pulkit Agrawal;Kaiqing Zhang;Abhishek Gupta", "authorids": "~Boyuan_Chen2;~Chuning_Zhu1;~Pulkit_Agrawal1;~Kaiqing_Zhang3;~Abhishek_Gupta1", "gender": "M;M;M;M;M", "homepage": "https://boyuan.space/;https://homes.cs.washington.edu/~zchuning/;https://people.eecs.berkeley.edu/~pulkitag/;https://homes.cs.washington.edu/~abhgupta/;https://kzhang66.github.io/", "dblp": "193/7174-3.html;295/9468;149/2672;18/6404-4;", "google_scholar": "rEL4-fgAAAAJ;;UpZmJI0AAAAJ;1wLVDP4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0009-0009-1960-9135;;;;", "linkedin": "boyuan99/;chuning-zhu-39b086167/;;;", "or_profile": "~Boyuan_Chen2;~Chuning_Zhu1;~Pulkit_Agrawal1;~Abhishek_Gupta1;~kaiqing_zhang1", "aff": "Massachusetts Institute of Technology;University of Washington;Massachusetts Institute of Technology;University of Washington;University of Maryland, College Park", "aff_domain": "mit.edu;cs.washington.edu;mit.edu;uw.edu;umd.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nchen2023modelfree,\ntitle={Model-free Reinforcement Learning that Transfers Using Random Reward Features},\nauthor={Boyuan Chen and Chuning Zhu and Pulkit Agrawal and Kaiqing Zhang and Abhishek Gupta},\nyear={2023},\nurl={https://openreview.net/forum?id=1P8eOmWgdk}\n}", "github": "", "project": "", "reviewers": "xLea;R3oi;puvd;5Avm", "site": "https://openreview.net/forum?id=1P8eOmWgdk", "pdf_size": 2770477, "recommendation": "3;5;5;8", "confidence": "3;2;4;3", "correctness": "2;3;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;2;2", "wc_summary_paper": "167;35;67;92", "wc_strength_and_weaknesses": "929;15;307;112", "wc_clarity_quality_novelty_and_reproducibility": "56;19;18;32", "wc_summary_review": "104;176;17;36", "wc_review": "1256;245;409;272", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 90.25, 48.69997433264211 ], "wc_strength_and_weaknesses_avg": [ 340.75, 355.53366577583057 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.25, 15.31951369985353 ], "wc_summary_review_avg": [ 83.25, 62.5594717049305 ], "wc_review_avg": [ 545.5, 414.8930585102624 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9901475429766743, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:I-ulSeybaPIJ:scholar.google.com/&scioq=Model-free+Reinforcement+Learning+that+Transfers+Using+Random+Reward+Features&hl=en&as_sdt=0,48", "gs_version_total": 0, "aff_unique_index": "0;1;0;1;2", "aff_unique_norm": "Massachusetts Institute of Technology;University of Washington;University of Maryland", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.washington.edu;https://www/umd.edu", "aff_unique_abbr": "MIT;UW;UMD", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Self-Consistency Improves Chain of Thought Reasoning in Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11718", "id": "1PL1NIMMrw", "poster": "/media/PosterPDFs/ICLR%202023/11718.png?t=1681168070.2573102", "openreview": "https://openreview.net/forum?id=1PL1NIMMrw", "slides": "https://iclr.cc/virtual/2023/poster/11718", "video": "https://iclr.cc/virtual/2023/poster/11718", "author_site": "Xuezhi Wang, Jason Wei, Dale Schuurmans, Quoc V Le, Ed H. Chi, SHARAN NARANG, Aakanksha Chowdhery, Denny Zhou", "tldr": "We propose a new decoding strategy, self-consistency, that greatly improves chain-of-thought prompting", "abstract": "Chain-of-thought prompting combined with pretrained large language models has achieved encouraging results on complex reasoning tasks. In this paper, we propose a new decoding strategy, self-consistency, to replace the naive greedy decoding used in chain-of-thought prompting. It first samples a diverse set of reasoning paths instead of only taking the greedy one, and then selects the most consistent answer by marginalizing out all possible reasoning paths. Self-consistency leverages the intuition that a complex reasoning problem typically admits multiple different ways of thinking leading to its unique correct answer. Our extensive empirical evaluation shows that self-consistency boosts the performance of chain-of-thought prompting with a striking margin on a range of popular arithmetic and commonsense reasoning benchmarks, including GSM8K (+17.9%), SVAMP (+11.0%), AQuA (+12.2%), StrategyQA (+6.4%) and ARC-challenge (+3.9%).", "keywords": "Language models;natural language processing;reasoning", "primary_area": "", "supplementary_material": "/attachment/a5ebb76d51eb4d2ac19033b9d7e9234ec4f4217b.zip", "author": "Xuezhi Wang;Jason Wei;Dale Schuurmans;Quoc V Le;Ed H. Chi;Sharan Narang;Aakanksha Chowdhery;Denny Zhou", "authorids": "~Xuezhi_Wang3;~Jason_Wei1;~Dale_Schuurmans1;~Quoc_V_Le1;~Ed_H._Chi1;~Sharan_Narang1;~Aakanksha_Chowdhery1;~Denny_Zhou1", "gender": ";M;;M;;M;;", "homepage": "https://research.google/people/105995/;https://jasonwei20.github.io;;;;;http://www.achowdhery.com;", "dblp": "70/4090-2;02/11220.html;;29/6166;;;;", "google_scholar": "ScLUQ-YAAAAJ;;;;;CWOixywAAAAJ;7KDSCpQAAAAJ;", "orcid": ";;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Xuezhi_Wang3;~Jason_Wei1;~Dale_Schuurmans1;~Quoc_V_Le1;~Ed_H._Chi1;~Sharan_Narang1;~Aakanksha_Chowdhery1;~Denny_Zhou1", "aff": "Google DeepMind;OpenAI;;Google;;Meta;Google;", "aff_domain": "google.com;openai.com;;google.com;;meta.com;google.com;", "position": "Research Scientist;Researcher;;Scientist;;Researcher;Researcher;", "bibtex": "@inproceedings{\nwang2023selfconsistency,\ntitle={Self-Consistency Improves Chain of Thought Reasoning in Language Models},\nauthor={Xuezhi Wang and Jason Wei and Dale Schuurmans and Quoc V Le and Ed H. Chi and Sharan Narang and Aakanksha Chowdhery and Denny Zhou},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=1PL1NIMMrw}\n}", "github": "", "project": "", "reviewers": "Tj1M;Z6Ub;s5hD;avbc", "pdf_size": 10334089, "recommendation": "5;6;6;10", "confidence": "4;4;3;5", "correctness": "3;3;4;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "3;4;4;4", "wc_summary_paper": "71;69;33;83", "wc_strength_and_weaknesses": "231;148;248;155", "wc_clarity_quality_novelty_and_reproducibility": "6;262;77;4", "wc_summary_review": "43;51;33;29", "wc_review": "351;530;391;271", "wc_reply_reviewers": "39;0;0;0", "wc_reply_authors": "818;561;421;141", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.75, 1.920286436967152 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 64.0, 18.681541692269406 ], "wc_strength_and_weaknesses_avg": [ 195.5, 44.477522413012174 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 87.25, 105.08895041820524 ], "wc_summary_review_avg": [ 39.0, 8.602325267042627 ], "wc_review_avg": [ 385.75, 93.82263852610413 ], "wc_reply_reviewers_avg": [ 9.75, 16.887495373796554 ], "wc_reply_authors_avg": [ 485.25, 244.48760193514926 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.7364596943186587, "corr_recommendation_correctness": 0.6509445549041193, "gs_citation": 1586, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14169927924580294112&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=1PL1NIMMrw", "email": "google.com;openai.com;;google.com;;meta.com;google.com;", "author_num": 8, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Google;OpenAI;Meta", "aff_unique_dep": "Google DeepMind;;Meta Platforms, Inc.", "aff_unique_url": "https://deepmind.com;https://openai.com;https://meta.com", "aff_unique_abbr": "DeepMind;OpenAI;Meta", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "1PTeB4MWCfU", "title": "Do Summarization Models Synthesize?", "track": "main", "status": "Reject", "tldr": "We measure if multidocument summarization models can effectively synthesize contrasting inputs, and explore methods to change synthesis performance.", "abstract": "Multi-document summarization entails producing concise synopses of collections of inputs. For some applications, the synopsis should accurately \\emph{synthesize} inputs with respect to a key property or aspect. For example, a synopsis of film reviews all written about a particular movie should reflect the average critic consensus. As a more consequential example, consider narrative summaries that accompany biomedical \\emph{systematic reviews} of clinical trial results. These narratives should fairly summarize the potentially conflicting results from individual trials.\n\nIn this paper we ask: To what extent do modern multi-document summarization models implicitly perform this type of synthesis? To assess this we perform a suite of experiments that probe the degree to which conditional generation models trained for summarization using standard methods yield outputs that appropriately synthesize inputs. We find that existing models do partially perform synthesis, but do so imperfectly. In particular, they are over-sensitive to changes in input ordering and under-sensitive to changes in input compositions (e.g., the ratio of positive to negative movie reviews). We propose a simple, general method for improving model synthesis capabilities by generating an explicitly diverse set of candidate outputs, and then selecting from these the string best aligned with the expected aggregate measure for the inputs, or \\emph{abstaining} when the model produces no good candidate. This approach improves model synthesis performance. Our hope is that by highlighting the need for synthesis (in some summarization settings), this work motivates further research into multi-document summarization methods and learning objectives that explicitly account for the need to synthesize. ", "keywords": "Summarization;Factuality;Sentiment;Systematic Reviews;Evidence Synthesis", "primary_area": "", "supplementary_material": "", "author": "Jay DeYoung;Iain James Marshall;Byron C Wallace", "authorids": "~Jay_DeYoung1;~Iain_James_Marshall1;~Byron_C_Wallace1", "gender": "M;Not Specified;M", "homepage": ";;http://www.byronwallace.com/", "dblp": "136/8673;117/4523;00/8247", "google_scholar": "f8aP6RMAAAAJ;4kdySIYAAAAJ;KTzRHmwAAAAJ", "orcid": "0000-0002-1315-7213;;", "linkedin": "jay-deyoung-a86b2425/;;", "or_profile": "~Jay_DeYoung1;~Iain_James_Marshall1;~Byron_C_Wallace1", "aff": "Northeastern University;King's College London, University of London;Northeastern University", "aff_domain": "neu.edu;kcl.ac.uk;northeastern.edu", "position": "PhD student;Clinical Senior Lecturer;Associate Professor", "bibtex": "@misc{\ndeyoung2023do,\ntitle={Do Summarization Models Synthesize?},\nauthor={Jay DeYoung and Iain James Marshall and Byron C Wallace},\nyear={2023},\nurl={https://openreview.net/forum?id=1PTeB4MWCfU}\n}", "github": "", "project": "", "reviewers": "YA3t;18KJ;Z9mN;ACAC", "site": "https://openreview.net/forum?id=1PTeB4MWCfU", "pdf_size": 1069629, "recommendation": "3;5;5;6", "confidence": "4;3;2;4", "correctness": "2;3;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "156;96;45;118", "wc_strength_and_weaknesses": "124;72;336;154", "wc_clarity_quality_novelty_and_reproducibility": "30;197;17;37", "wc_summary_review": "75;60;24;33", "wc_review": "385;425;422;342", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1228;600;1744;744", "reply_reviewers": "0;0;0;0", "reply_authors": "3;1;4;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 103.75, 40.139600147485275 ], "wc_strength_and_weaknesses_avg": [ 171.5, 99.40196175126525 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.25, 73.5301808783305 ], "wc_summary_review_avg": [ 48.0, 20.457272545478784 ], "wc_review_avg": [ 393.5, 33.64892271678248 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1079.0, 448.91313190861325 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.20751433915982243, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pRPk8QIkBGIJ:scholar.google.com/&scioq=Do+Summarization+Models+Synthesize%3F&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Northeastern University;King's College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.northeastern.edu;https://www.kcl.ac.uk", "aff_unique_abbr": "NEU;KCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "1QQnYd02etI", "title": "Unified Vision and Language Prompt Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Prompt tuning, a parameter- and data-efficient transfer learning paradigm that tunes only a small number of parameters in a model's input space, has become a trend in the vision community since the emergence of large vision-language models like CLIP. We present a systematic study on two representative prompt tuning methods, namely text prompt tuning and visual prompt tuning. A major finding is that none of the unimodal prompt tuning methods performs consistently well: text prompt tuning fails on data with high intra-class visual variances while visual prompt tuning cannot handle low inter-class variances. To combine the best from both worlds, we propose a simple approach called Unified Prompt Tuning (UPT), which essentially learns a tiny neural network to jointly optimize prompts across different modalities. Extensive experiments on over 11 vision datasets show that UPT achieves a better trade-off than the unimodal counterparts on few-shot learning benchmarks, as well as on domain generalization benchmarks. Code and models will be released to facilitate future research.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yuhang Zang;Wei Li;Kaiyang Zhou;Chen Huang;Chen Change Loy", "authorids": "~Yuhang_Zang1;~Wei_Li51;~Kaiyang_Zhou1;~Chen_Huang6;~Chen_Change_Loy2", "gender": "M;M;M;M;M", "homepage": "https://yuhangzang.github.io;https://weivision.github.io/;https://kaiyangzhou.github.io/;;https://www.mmlab-ntu.com/person/ccloy/index.html", "dblp": "230/4433;;203/3155;05/8125-1;01/5855", "google_scholar": "hW23VKIAAAAJ;41KAd6AAAAAJ;https://scholar.google.co.uk/citations?user=gRIejugAAAAJ;QZ-JKOUAAAAJ;https://scholar.google.co.uk/citations?user=559LF80AAAAJ", "orcid": "0000-0003-1110-5062;;;;0000-0001-5345-1591", "linkedin": "yuhang-zang/;;;;", "or_profile": "~Yuhang_Zang1;~Wei_Li51;~Kaiyang_Zhou1;~Chen_Huang6;~Chen_Change_Loy2", "aff": "Nanyang Technological University;Nanyang Technological University;Hong Kong Baptist University;Apple;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;hkbu.edu.hk;apple.com;ntu.edu.sg", "position": "PhD student;Postdoc;Assistant Professor;Research Scientist;Full Professor", "bibtex": "@misc{\nzang2023unified,\ntitle={Unified Vision and Language Prompt Learning},\nauthor={Yuhang Zang and Wei Li and Kaiyang Zhou and Chen Huang and Chen Change Loy},\nyear={2023},\nurl={https://openreview.net/forum?id=1QQnYd02etI}\n}", "github": "", "project": "", "reviewers": "ww1n;RJDm;d8Y6", "site": "https://openreview.net/forum?id=1QQnYd02etI", "pdf_size": 1696554, "recommendation": "3;5;5", "confidence": "4;3;4", "correctness": "3;3;4", "technical_novelty": "3;3;2", "empirical_novelty": "2;0;2", "wc_summary_paper": "74;42;61", "wc_strength_and_weaknesses": "249;144;220", "wc_clarity_quality_novelty_and_reproducibility": "30;8;78", "wc_summary_review": "59;8;44", "wc_review": "412;202;403", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "797;422;617", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 59.0, 13.140268896284683 ], "wc_strength_and_weaknesses_avg": [ 204.33333333333334, 44.274396915398206 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.666666666666664, 29.227080289043965 ], "wc_summary_review_avg": [ 37.0, 21.400934559032695 ], "wc_review_avg": [ 339.0, 96.94328238717729 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 612.0, 153.13392831113555 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 184, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11140835428736735371&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Nanyang Technological University;Hong Kong Baptist University;Apple", "aff_unique_dep": ";;Apple Inc.", "aff_unique_url": "https://www.ntu.edu.sg;https://www.hkbu.edu.hk;https://www.apple.com", "aff_unique_abbr": "NTU;HKBU;Apple", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;2;0", "aff_country_unique": "Singapore;China;United States" }, { "title": "ChiroDiff: Modelling chirographic data with Diffusion Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12201", "id": "1ROAstc9jv", "poster": "/media/PosterPDFs/ICLR%202023/12201.png?t=1680899814.0378954", "openreview": "https://openreview.net/forum?id=1ROAstc9jv", "slides": "https://iclr.cc/virtual/2023/poster/12201", "video": "https://iclr.cc/virtual/2023/poster/12201", "author_site": "Ayan Das, Yongxin Yang, Timothy Hospedales, Tao Xiang, Yi-Zhe Song", "tldr": "Learning diffusion model for continuous-time chirographic data (e.g. handwriting, sketch etc.)", "abstract": "Generative modelling over continuous-time geometric constructs, a.k.a $chirographic\\ data$ such as handwriting, sketches, drawings etc., have been accomplished through autoregressive distributions. Such strictly-ordered discrete factorization however falls short of capturing key properties of chirographic data -- it fails to build holistic understanding of the temporal concept due to one-way visibility (causality). Consequently, temporal data has been modelled as discrete token sequences of fixed sampling rate instead of capturing the true underlying concept. In this paper, we introduce a powerful model-class namely Denoising\\ Diffusion\\ Probabilistic\\ Models or DDPMs for chirographic data that specifically addresses these flaws. Our model named \"ChiroDiff\", being non-autoregressive, learns to capture holistic concepts and therefore remains resilient to higher temporal sampling rate up to a good extent. Moreover, we show that many important downstream utilities (e.g. conditional sampling, creative mixing) can be flexibly implemented using ChiroDiff. We further show some unique use-cases like stochastic vectorization, de-noising/healing, abstraction are also possible with this model-class. We perform quantitative and qualitative evaluation of our framework on relevant datasets and found it to be better or on par with competing approaches.", "keywords": "chirographic data;continuous-time;diffusion model;generative model", "primary_area": "", "supplementary_material": "", "author": "Ayan Das;Yongxin Yang;Timothy Hospedales;Tao Xiang;Yi-Zhe Song", "authorids": "~Ayan_Das1;~Yongxin_Yang1;~Timothy_Hospedales1;~Tao_Xiang1;~Yi-Zhe_Song2", "gender": "M;M;M;M;M", "homepage": "https://ayandas.me/;http://homepages.inf.ed.ac.uk/thospeda/;https://www.surrey.ac.uk/people/tao-xiang;http://personal.ee.surrey.ac.uk/Personal/Y.Song/;", "dblp": "269/9613;32/3545;22/4460-2.html;98/1684;150/4258", "google_scholar": "x-WI_EgAAAAJ;https://scholar.google.fr/citations?user=nHhtvqkAAAAJ;MeS5d4gAAAAJ;https://scholar.google.co.uk/citations?user=irZFP_AAAAAJ;https://scholar.google.co.uk/citations?user=F7PtrL8AAAAJ", "orcid": "0000-0002-7764-1346;0000-0003-4867-7486;0000-0002-2530-1059;;", "linkedin": "ayan-das-a49928a7/;timothyhospedales/;;;", "or_profile": "~Ayan_Das1;~Timothy_Hospedales1;~Tao_Xiang1;~Yi-Zhe_Song2;~Yongxin_Yang3", "aff": "University of Surrey;Samsung AI Research Centre;University of Surrey;University of Surrey;Queen Mary University of London", "aff_domain": "surrey.ac.uk;samsung.com;surrey.ac.uk;surrey.ac.uk;qmul.ac.uk", "position": "PhD student;Principal Researcher;Full Professor;Professor;Assistant Professor", "bibtex": "@inproceedings{\ndas2023chirodiff,\ntitle={ChiroDiff: Modelling chirographic data with Diffusion Models},\nauthor={Ayan Das and Yongxin Yang and Timothy Hospedales and Tao Xiang and Yi-Zhe Song},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=1ROAstc9jv}\n}", "github": "", "project": "", "reviewers": "VahX;DeoY;N1HR", "pdf_size": 1017886, "recommendation": "6;6;6", "confidence": "4;4;4", "correctness": "3;4;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;4", "wc_summary_paper": "118;23;89", "wc_strength_and_weaknesses": "380;62;236", "wc_clarity_quality_novelty_and_reproducibility": "37;15;45", "wc_summary_review": "95;29;43", "wc_review": "630;129;413", "wc_reply_reviewers": "112;14;21", "wc_reply_authors": "739;269;147", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 76.66666666666667, 39.75200903378624 ], "wc_strength_and_weaknesses_avg": [ 226.0, 130.01538370516005 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.333333333333336, 12.684198393626966 ], "wc_summary_review_avg": [ 55.666666666666664, 28.394052585395805 ], "wc_review_avg": [ 390.6666666666667, 205.14114382270782 ], "wc_reply_reviewers_avg": [ 49.0, 44.63929509598765 ], "wc_reply_authors_avg": [ 385.0, 255.22277850275563 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17812551051505765975&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=1ROAstc9jv", "email": "surrey.ac.uk;samsung.com;surrey.ac.uk;surrey.ac.uk;qmul.ac.uk", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "University of Surrey;Samsung;Queen Mary University of London", "aff_unique_dep": ";AI Research;", "aff_unique_url": "https://www.surrey.ac.uk;https://www.samsung.com/global/researchers/samsung-ai-research-centre/;https://www.qmul.ac.uk", "aff_unique_abbr": "Surrey;SARC;QMUL", "aff_campus_unique_index": "1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United Kingdom;South Korea" }, { "id": "1T853KDY3t", "title": "Unleashing Vanilla Vision Transformer with Masked Image Modeling for Object Detection", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We present an approach to efficiently and effectively adapt a masked image modeling (MIM) pre-trained vanilla Vision Transformer (ViT) for object detection, which is based on our two novel observations: (i) A MIM pre-trained vanilla ViT encoder can work surprisingly well in the challenging object-level recognition scenario even with randomly sampled partial observations, e.g., only 25% ~ 50% of the input embeddings. (ii) In order to construct multi-scale representations for object detection from single-scale ViT, a randomly initialized compact convolutional stem supplants the pre-trained patchify stem, and its intermediate features can naturally serve as the higher resolution inputs of a feature pyramid network without further upsampling or other manipulations. While the pre-trained ViT is only regarded as the third-stage of our detector's backbone instead of the whole feature extractor. This naturally results in a ConvNet-ViT hybrid architecture. The proposed detector, named MIMDet, enables a MIM pre-trained vanilla ViT to outperform leading hierarchical architectures such as Swin Transformer, MViTv2 and ConvNeXt on COCO object detection & instance segmentation, and achieves better results compared with the previous best adapted vanilla ViT detector using a more modest fine-tuning recipe while converging 2.8x faster.", "keywords": "Vision Transformer;Object Detection;Instance Segmentation;Representation Learning", "primary_area": "", "supplementary_material": "/attachment/0bc27030d5e3f918a2583e8e18a238dbf5d67c64.zip", "author": "Yuxin Fang;Shusheng Yang;Shijie Wang;Yixiao Ge;Ying Shan;Xinggang Wang", "authorids": "~Yuxin_Fang1;~Shusheng_Yang1;~Shijie_Wang1;~Yixiao_Ge2;~Ying_Shan2;~Xinggang_Wang1", "gender": ";M;M;F;M;M", "homepage": ";https://shushengyang.com;https://github.com/simonJJJ;https://geyixiao.com/;;https://xwcv.github.io/index.htm", "dblp": ";290/1972;;228/6649;68/5910;95/3056", "google_scholar": ";v6dmW5cntoMC;DuAqyTwAAAAJ;TtU74NAAAAAJ;4oXBp9UAAAAJ;qNCTLV0AAAAJ", "orcid": ";;;;0000-0001-7673-8325;0000-0001-6732-7823", "linkedin": ";shushengyang/;;;YingShanProfile/;", "or_profile": "~Yuxin_Fang1;~Shusheng_Yang1;~Shijie_Wang1;~Yixiao_Ge2;~Ying_Shan2;~Xinggang_Wang1", "aff": ";;Huazhong University of Science and Technology;Tencent;Tencent PCG ARC Lab;Huazhong University of Science and Technology", "aff_domain": ";;hust.edu;tencent.com;arc.tencent.com;hust.edu.cn", "position": ";;MS student;Researcher;Director;Full Professor", "bibtex": "@misc{\nfang2023unleashing,\ntitle={Unleashing Vanilla Vision Transformer with Masked Image Modeling for Object Detection},\nauthor={Yuxin Fang and Shusheng Yang and Shijie Wang and Yixiao Ge and Ying Shan and Xinggang Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=1T853KDY3t}\n}", "github": "", "project": "", "reviewers": "7UPw;dbqN;Vms8", "site": "https://openreview.net/forum?id=1T853KDY3t", "pdf_size": 1291151, "recommendation": "3;3;6", "confidence": "4;4;4", "correctness": "2;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "73;57;71", "wc_strength_and_weaknesses": "567;324;191", "wc_clarity_quality_novelty_and_reproducibility": "110;76;20", "wc_summary_review": "58;103;43", "wc_review": "808;560;325", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 67.0, 7.118052168020874 ], "wc_strength_and_weaknesses_avg": [ 360.6666666666667, 155.67558860941844 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.66666666666667, 37.10645346686865 ], "wc_summary_review_avg": [ 68.0, 25.495097567963924 ], "wc_review_avg": [ 564.3333333333334, 197.20773029698628 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5, "gs_citation": 70, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5950996925168865328&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Huazhong University of Science and Technology;Tencent", "aff_unique_dep": ";Tencent Holdings Limited", "aff_unique_url": "http://www.hust.edu.cn;https://www.tencent.com", "aff_unique_abbr": "HUST;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "1TxMUE7cF6_", "title": "Modeling Temporal Data as Continuous Functions with Process Diffusion", "track": "main", "status": "Reject", "tldr": "We modify the diffusion framework to model continuous functions and apply the learned generative model on different time series tasks.", "abstract": "Temporal data like time series are often observed at irregular intervals which is a challenging setting for the existing machine learning methods. To tackle this problem, we view such data as samples from some underlying continuous function. We then define a diffusion-based generative model that adds noise from a predefined stochastic process while preserving the continuity of the resulting underlying function. A neural network is trained to reverse this process which allows us to sample new realizations from the learned distribution. We define suitable stochastic processes as noise sources and introduce novel denoising and score-matching models on processes. Further, we show how to apply this approach to the multivariate probabilistic forecasting and imputation tasks. Through our extensive experiments, we demonstrate that our method outperforms previous models on synthetic and real-world datasets.", "keywords": "time series;stochastic process;diffusion;probabilistic forecasting;score-based matching", "primary_area": "", "supplementary_material": "", "author": "Marin Bilo\u0161;Kashif Rasul;Anderson Schneider;Yuriy Nevmyvaka;Stephan G\u00fcnnemann", "authorids": "~Marin_Bilo\u01611;~Kashif_Rasul1;~Anderson_Schneider1;~Yuriy_Nevmyvaka1;~Stephan_G\u00fcnnemann1", "gender": ";;;;M", "homepage": ";;;;http://www.daml.in.tum.de", "dblp": ";80/5769;;92/1859;43/3011", "google_scholar": ";cfIrwmAAAAAJ;;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Marin_Bilo\u01611;~Kashif_Rasul1;~Anderson_Schneider1;~Yuriy_Nevmyvaka1;~Stephan_G\u00fcnnemann1", "aff": ";Zalando SE;;Morgan Stanley;Technical University Munich", "aff_domain": ";zalando.de;;morganstanley.com;tum.de", "position": ";Researcher;;Principal Researcher;Professor", "bibtex": "@misc{\nbilo{\\v{s}}2023modeling,\ntitle={Modeling Temporal Data as Continuous Functions with Process Diffusion},\nauthor={Marin Bilo{\\v{s}} and Kashif Rasul and Anderson Schneider and Yuriy Nevmyvaka and Stephan G{\\\"u}nnemann},\nyear={2023},\nurl={https://openreview.net/forum?id=1TxMUE7cF6_}\n}", "github": "", "project": "", "reviewers": "c2e3;JDSC;ienA;8xdq", "site": "https://openreview.net/forum?id=1TxMUE7cF6_", "pdf_size": 5085882, "recommendation": "5;6;6;6", "confidence": "4;2;4;3", "correctness": "4;4;4;3", "technical_novelty": "2;3;4;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "36;32;40;132", "wc_strength_and_weaknesses": "166;78;76;283", "wc_clarity_quality_novelty_and_reproducibility": "133;29;310;22", "wc_summary_review": "33;28;36;22", "wc_review": "368;167;462;459", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "735;395;610;799", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 60.0, 41.66533331199932 ], "wc_strength_and_weaknesses_avg": [ 150.75, 84.56173780144303 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 123.5, 116.30240754171858 ], "wc_summary_review_avg": [ 29.75, 5.3091901453988255 ], "wc_review_avg": [ 364.0, 119.84782017208323 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 634.75, 154.20826015489573 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5222329678670935, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3943474662145357510&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2", "aff_unique_norm": "Zalando SE;Morgan Stanley;Technical University of Munich", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zalando.de;https://www.morganstanley.com;https://www.tum.de", "aff_unique_abbr": "Zalando;Morgan Stanley;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;United States" }, { "id": "1UBSvnGHFxK", "title": "Wasserstein Gradient Flows for Optimizing GMM-based Policies", "track": "main", "status": "Reject", "tldr": "Policy structure-aware Optimization via Wasserstein Gradient Flows for Robot Motion Adaptation", "abstract": "Robots often rely on a repertoire of previously-learned motion policies for performing tasks of diverse complexities. \nWhen facing unseen task conditions or when new task requirements arise, robots must adapt their motion policies accordingly.\nIn this context, policy optimization is the de facto paradigm to adapt robot policies as a function of task-specific objectives. \nMost commonly-used motion policies carry particular structures that are often overlooked in policy optimization algorithms. \nWe instead propose to leverage the structure of probabilistic policies by casting the policy optimization as an optimal transport problem.\nSpecifically, we focus on robot motion policies that build on Gaussian mixture models (GMMs) and formulate the policy optimization as a Wassertein gradient flow over the GMMs space.\nThis naturally allows us to constrain the policy updates via the $L^2$-Wasserstein distance between GMMs to enhance the stability of the policy optimization process.\nFurthermore, we leverage the geometry of the Bures-Wasserstein manifold to optimize the Gaussian distributions of the GMM policy via Riemannian optimization.\nWe evaluate our approach over a set of common robotic settings: Reaching motions, collision-avoidance behaviors and multi-goal tasks. \nOur results show that our method outperforms common policy optimization baselines in terms of task success rate and low-variance solutions. ", "keywords": "Optimal transport;policy optimization;gaussian mixture models;robot motion adaptation.", "primary_area": "", "supplementary_material": "/attachment/d7cbc4ac4d2b471dc9eee88e23c20169ecaf0efa.zip", "author": "Hanna Ziesche;Leonel Rozo", "authorids": "~Hanna_Ziesche1;~Leonel_Rozo1", "gender": "F;M", "homepage": ";https://leonelrozo.weebly.com/", "dblp": "284/0793;10/9515", "google_scholar": ";https://scholar.google.it/citations?user=vLWgi-YAAAAJ", "orcid": "0000-0003-2042-3660;0000-0001-5970-9135", "linkedin": ";leonelrozo/", "or_profile": "~Hanna_Carolin_Maria_Ziesche1;~Leonel_Dario_Rozo1", "aff": "Robert Bosch GmbH, Bosch;Robert Bosch GmbH, Bosch", "aff_domain": "de.bosch.com;de.bosch.com", "position": "Research Scientist;Principal Researcher", "bibtex": "@misc{\nziesche2023wasserstein,\ntitle={Wasserstein Gradient Flows for Optimizing {GMM}-based Policies},\nauthor={Hanna Ziesche and Leonel Rozo},\nyear={2023},\nurl={https://openreview.net/forum?id=1UBSvnGHFxK}\n}", "github": "", "project": "", "reviewers": "1kyr;zFF5;8XBw", "site": "https://openreview.net/forum?id=1UBSvnGHFxK", "pdf_size": 9902132, "recommendation": "3;5;5", "confidence": "4;3;4", "correctness": "3;3;3", "technical_novelty": "3;3;2", "empirical_novelty": "3;3;4", "wc_summary_paper": "77;94;57", "wc_strength_and_weaknesses": "102;231;449", "wc_clarity_quality_novelty_and_reproducibility": "30;6;62", "wc_summary_review": "80;45;43", "wc_review": "289;376;611", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 76.0, 15.121728296285006 ], "wc_strength_and_weaknesses_avg": [ 260.6666666666667, 143.2069209997276 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.666666666666664, 22.939534045447004 ], "wc_summary_review_avg": [ 56.0, 16.990193249832878 ], "wc_review_avg": [ 425.3333333333333, 136.00571883400917 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:a3Fg-ZcejLIJ:scholar.google.com/&scioq=Wasserstein+Gradient+Flows+for+Optimizing+GMM-based+Policies&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Robert Bosch GmbH", "aff_unique_dep": "", "aff_unique_url": "https://www.bosch.com", "aff_unique_abbr": "Bosch", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Understanding Neural Coding on Latent Manifolds by Sharing Features and Dividing Ensembles", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11197", "id": "1UCaQYUdE_o", "poster": "/media/PosterPDFs/ICLR%202023/11197.png?t=1681212088.7760751", "openreview": "https://openreview.net/forum?id=1UCaQYUdE_o", "slides": "https://iclr.cc/virtual/2023/poster/11197", "video": "https://iclr.cc/virtual/2023/poster/11197", "author_site": "Martin Bjerke, Lukas Schott, Kristopher Jensen, Claudia Battistin, David Klindt, Benjamin Dunn", "tldr": "We propose neural latent variable models with feature sharing and ensemble detection.", "abstract": "Systems neuroscience relies on two complementary views of neural data, characterized by single neuron tuning curves and analysis of population activity. These two perspectives combine elegantly in neural latent variable models that constrain the relationship between latent variables and neural activity, modeled by simple tuning curve functions. This has recently been demonstrated using Gaussian processes, with applications to realistic and topologically relevant latent manifolds. Those and previous models, however, missed crucial shared coding properties of neural populations. We propose $\\textit{feature sharing}$ across neural tuning curves which significantly improves performance and helps optimization. We also propose a solution to the $\\textit{ensemble detection}$ problem, where different groups of neurons, i.e., ensembles, can be modulated by different latent manifolds. Achieved through a soft clustering of neurons during training, this allows for the separation of mixed neural populations in an unsupervised manner. These innovations lead to more interpretable models of neural population activity that train well and perform better even on mixtures of complex latent manifolds. Finally, we apply our method on a recently published grid cell dataset, and recover distinct ensembles, infer toroidal latents and predict neural tuning curves in a single integrated modeling framework.", "keywords": "neuroscience;neural activity;tuning curves;neural ensemble detection;grid cells;latent variable models", "primary_area": "", "supplementary_material": "/attachment/4efa4819e7e3002e1b82a9e7969810ef7c120db8.zip", "author": "Martin Bjerke;Lukas Schott;Kristopher T Jensen;Claudia Battistin;David A. Klindt;Benjamin Adric Dunn", "authorids": "~Martin_Bjerke1;~Lukas_Schott2;~Kristopher_T_Jensen1;~Claudia_Battistin1;~David_A._Klindt1;~Benjamin_Adric_Dunn1", "gender": "M;;;F;;M", "homepage": "https://www.ntnu.no/ansatte/martin.bjerke;;https://krisjensen.github.io/;;;https://www.ntnu.edu/employees/benjamin.dunn", "dblp": ";;267/5296;;;", "google_scholar": ";;https://scholar.google.com/citations?hl=en;;;hAjFjXkAAAAJ", "orcid": ";;;0000-0001-7808-5129;;0000-0002-3287-4744", "linkedin": ";;;;;", "or_profile": "~Martin_Bjerke1;~Lukas_Schott2;~Kristopher_T_Jensen1;~Claudia_Battistin1;~David_A._Klindt1;~Benjamin_Adric_Dunn1", "aff": "Norwegian University of Science and Technology;;University of Cambridge;;;Norwegian Institute of Technology", "aff_domain": "ntnu.no;;cam.ac.uk;;;ntnu.no", "position": "PhD student;;PhD student;;;Associate Professor", "bibtex": "@inproceedings{\nbjerke2023understanding,\ntitle={Understanding Neural Coding on Latent Manifolds by Sharing Features and Dividing Ensembles},\nauthor={Martin Bjerke and Lukas Schott and Kristopher T Jensen and Claudia Battistin and David A. Klindt and Benjamin Adric Dunn},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=1UCaQYUdE_o}\n}", "github": "", "project": "", "reviewers": "qWBb;xqoS;3QYX", "pdf_size": 23181827, "recommendation": "6;6;6", "confidence": "5;3;3", "correctness": "4;3;4", "technical_novelty": "3;3;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "57;121;90", "wc_strength_and_weaknesses": "129;214;395", "wc_clarity_quality_novelty_and_reproducibility": "174;376;31", "wc_summary_review": "102;71;149", "wc_review": "462;782;665", "wc_reply_reviewers": "11;84;0", "wc_reply_authors": "1287;1507;1359", "reply_reviewers": "1;1;0", "reply_authors": "2;3;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 89.33333333333333, 26.132142830026183 ], "wc_strength_and_weaknesses_avg": [ 246.0, 110.92640202704975 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 193.66666666666666, 141.53052281712553 ], "wc_summary_review_avg": [ 107.33333333333333, 32.065904356843305 ], "wc_review_avg": [ 636.3333333333334, 132.20270630949867 ], "wc_reply_reviewers_avg": [ 31.666666666666668, 37.27674282385138 ], "wc_reply_authors_avg": [ 1384.3333333333333, 91.58359872573011 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17828299340876593576&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=1UCaQYUdE_o", "email": "ntnu.no;;cam.ac.uk;;;ntnu.no", "author_num": 6, "aff_unique_index": "0;1;2", "aff_unique_norm": "Norwegian University of Science and Technology;University of Cambridge;Norwegian Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntnu.no;https://www.cam.ac.uk;https://www.ntnu.no", "aff_unique_abbr": "NTNU;Cambridge;NTNU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Norway;United Kingdom" }, { "title": "RGI: robust GAN-inversion for mask-free image inpainting and unsupervised pixel-wise anomaly detection", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11709", "id": "1UbNwQC89a", "poster": "/media/PosterPDFs/ICLR%202023/11709.png?t=1680810513.1051857", "openreview": "https://openreview.net/forum?id=1UbNwQC89a", "slides": "https://iclr.cc/virtual/2023/poster/11709", "video": "https://iclr.cc/virtual/2023/poster/11709", "author_site": "Shancong Mou, Xiaoyi Gu, Meng Cao, Haoping Bai, Ping Huang, Jiulong Shan, Jianjun Shi", "tldr": "", "abstract": "Generative adversarial networks (GANs), trained on a large-scale image dataset, can be a good approximator of the natural image manifold. GAN-inversion, using a pre-trained generator as a deep generative prior, is a promising tool for image restoration under corruptions. However, the performance of GAN-inversion can be limited by a lack of robustness to unknown gross corruptions, i.e., the restored image might easily deviate from the ground truth. In this paper, we propose a Robust GAN-inversion (RGI) method with a provable robustness guarantee to achieve image restoration under unknown \\textit{gross} corruptions, where a small fraction of pixels are completely corrupted. Under mild assumptions, we show that the restored image and the identified corrupted region mask converge asymptotically to the ground truth. Moreover, we extend RGI to Relaxed-RGI (R-RGI) for generator fine-tuning to mitigate the gap between the GAN learned manifold and the true image manifold while avoiding trivial overfitting to the corrupted input image, which further improves the image restoration and corrupted region mask identification performance. The proposed RGI/R-RGI method unifies two important applications with state-of-the-art (SOTA) performance: (i) mask-free semantic inpainting, where the corruptions are unknown missing regions, the restored background can be used to restore the missing content. (ii) unsupervised pixel-wise anomaly detection, where the corruptions are unknown anomalous regions, the retrieved mask can be used as the anomalous region\u2019s segmentation mask.", "keywords": "Robust GAN-inversion;Mask-free Semantic Inpainting;Unsupervised Pixel-wise Anomaly Detection", "primary_area": "", "supplementary_material": "", "author": "Shancong Mou;Xiaoyi Gu;Meng Cao;Haoping Bai;Ping Huang;Jiulong Shan;Jianjun Shi", "authorids": "~Shancong_Mou1;xiaoyigu@gatech.edu;~Meng_Cao2;~Haoping_Bai1;~Ping_Huang1;~Jiulong_Shan2;~Jianjun_Shi1", "gender": "M;;M;;M;;M", "homepage": "https://sites.google.com/view/shancongmou;;https://www.linkedin.com/in/caomeng/;;;;https://sites.gatech.edu/jianjun-shi/", "dblp": "271/4435;;;;;;", "google_scholar": "y2CYLbMAAAAJ;;;;;;NhMlhaoAAAAJ", "orcid": ";;;;;;0000-0002-3774-9176", "linkedin": ";;caomeng/;;ping-huang-82845138/;;jianjun-jan-shi-5217b411/", "or_profile": "~Shancong_Mou1;xiaoyigu@gatech.edu;~Meng_Cao2;~Haoping_Bai1;~Ping_Huang1;~Jiulong_Shan2;~Jianjun_Shi1", "aff": "Georgia Institute of Technology;;Apple;;Apple;;Georgia Institute of Technology", "aff_domain": "gatech.edu;;apple.com;;apple.com;;gatech.edu", "position": "PhD student;;Researcher;;Researcher;;Full Professor", "bibtex": "@inproceedings{\nmou2023rgi,\ntitle={{RGI}: robust {GAN}-inversion for mask-free image inpainting and unsupervised pixel-wise anomaly detection},\nauthor={Shancong Mou and Xiaoyi Gu and Meng Cao and Haoping Bai and Ping Huang and Jiulong Shan and Jianjun Shi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=1UbNwQC89a}\n}", "github": "", "project": "", "reviewers": "XsAp;pBCc;sD9p;fpcW;TJ5g", "pdf_size": 2201872, "recommendation": "5;6;6;6;6", "confidence": "3;3;4;3;3", "correctness": "3;3;4;4;3", "technical_novelty": "2;3;3;3;3", "empirical_novelty": "2;3;3;2;2", "wc_summary_paper": "40;52;69;84;51", "wc_strength_and_weaknesses": "254;79;141;228;370", "wc_clarity_quality_novelty_and_reproducibility": "61;18;20;5;101", "wc_summary_review": "42;29;44;155;69", "wc_review": "397;178;274;472;591", "wc_reply_reviewers": "583;0;0;351;0", "wc_reply_authors": "1888;1157;1300;2246;1578", "reply_reviewers": "3;0;0;1;0", "reply_authors": "3;2;3;4;3", "recommendation_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 2.8, 0.39999999999999997 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 59.2, 15.484185480676729 ], "wc_strength_and_weaknesses_avg": [ 214.4, 99.68470293881605 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.0, 35.400564967243106 ], "wc_summary_review_avg": [ 67.8, 45.48142477979335 ], "wc_review_avg": [ 382.4, 145.1104406994893 ], "wc_reply_reviewers_avg": [ 186.8, 240.25769498602952 ], "wc_reply_authors_avg": [ 1633.8, 395.2924992964071 ], "reply_reviewers_avg": [ 0.8, 1.1661903789690602 ], "reply_authors_avg": [ 3.0, 0.6324555320336759 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.25000000000000006, "corr_recommendation_correctness": 0.408248290463863, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=374681849546967174&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=1UbNwQC89a", "email": "gatech.edu;;apple.com;;apple.com;;gatech.edu", "author_num": 7, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Georgia Institute of Technology;Apple", "aff_unique_dep": ";Apple Inc.", "aff_unique_url": "https://www.gatech.edu;https://www.apple.com", "aff_unique_abbr": "Georgia Tech;Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "1VQnc0wnIQ", "title": "Understanding Graph Contrastive Learning From A Statistical Perspective", "track": "main", "status": "Withdraw", "tldr": "From a statistical perspective, we propose two principles to guide graph contrastive learning.", "abstract": "Although recent advances have prompted the prosperity in graph contrastive learning, the researches on universal principles for model design and desirable properties of latent representations are still inadequate. From a statistical perspective, this paper proposes two principles for guidance and constructs a general graph self-supervised framework. Reformulating data augmentation as a mixture process, the first one, termed consistency principle, lays stress on exploring and mapping cross-view common information to consistent and essence-revealing representations. For the purpose of instantiation, four statistical indicators are employed to estimate and maximize the correlation between representations from various views, whose accordant variation trend during training implies the extraction of common content. With awareness of the insufficiency of a solo consistency principle, suffering from degenerated and coupled solutions, a decorrelation principle is put forward to encourage diverse and informative representations. Accordingly, two specific strategies, performing in representation space and eigen spectral space, respectively, are propounded to decouple various representation channels. Under two principles, various combinations of concrete implementations derive a family of methods. Provably, after decomposition and analysis for the commonly used \\textit{InfoNCE} loss, we clarify that the approaches based on mutual information maximization implicitly fulfill the two principles and are covered within our framework. The comparison experiments with current state-of-the-arts demonstrate the effectiveness and sufficiency of two principles for high-quality graph representations. Furthermore, visual studies reveal how certain principles affect learned representations.", "keywords": "graph contrastive learning;unsupervised;general principles", "primary_area": "", "supplementary_material": "", "author": "Jinyong Wen", "authorids": "~Jinyong_Wen1", "gender": "M", "homepage": "https://wenjinyong.github.io/", "dblp": "337/4329", "google_scholar": "", "orcid": "0000-0002-6661-7770", "linkedin": "", "or_profile": "~Jinyong_Wen1", "aff": "University of Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn", "position": "PhD student", "bibtex": "@misc{\nwen2023understanding,\ntitle={Understanding Graph Contrastive Learning From A Statistical Perspective},\nauthor={Jinyong Wen},\nyear={2023},\nurl={https://openreview.net/forum?id=1VQnc0wnIQ}\n}", "github": "", "project": "", "reviewers": "21sj;Qhp1;wJsQ;kAkh", "site": "https://openreview.net/forum?id=1VQnc0wnIQ", "pdf_size": 24555839, "recommendation": "5;5;5;6", "confidence": "3;4;4;3", "correctness": "3;4;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "51;50;69;136", "wc_strength_and_weaknesses": "171;218;251;115", "wc_clarity_quality_novelty_and_reproducibility": "39;80;26;23", "wc_summary_review": "23;33;21;38", "wc_review": "284;381;367;312", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "629;995;737;230", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 76.5, 35.174564673923115 ], "wc_strength_and_weaknesses_avg": [ 188.75, 51.197534120307004 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.0, 22.74862633215465 ], "wc_summary_review_avg": [ 28.75, 7.013380069552769 ], "wc_review_avg": [ 336.0, 39.5790348543266 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 647.75, 275.4154815909955 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:n-zHR0ufguwJ:scholar.google.com/&scioq=Understanding+Graph+Contrastive+Learning+From+A+Statistical+Perspective&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of Chinese Academy of Sciences", "aff_unique_dep": "", "aff_unique_url": "http://www.ucas.ac.cn", "aff_unique_abbr": "UCAS", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "1VuBdlNBuR", "title": "Active Learning with Partial Labels", "track": "main", "status": "Withdraw", "tldr": "we propose a new problem setting named active learning with partial labels, where the oracle provides partial labels to the selected samples.", "abstract": "In this paper, we for the first time study a new problem setting called active learning with partial labels (ALPL), where an oracle provides the query samples with a set of candidate labels that contains the true label. Such a setting relaxes the oracle from the demanding labeling process. To address ALPL, we firstly propose a firm and intuitive baseline by directly adapting a state-of-the-art method for learning with partial labels to train the predictor, which can be seamlessly incorporated into existing AL frameworks. Inspired by human inference in cognitive science, we propose to improve the baseline by exploiting and exploring counter examples (CEs) to relieve the overfitting caused by a few training samples in ALPL. Specifically, we propose to construct CEs by reversing the partial labels for each instance, learning from which we propose a simple but effective WorseNet. By leveraging the distribution gap between WorseNet and the predictor, both the predictor itself and the sample selection process can be improved. Experimental results on five real-world datasets and four benchmark datasets show that our proposed methods achieve comprehensive improvements over ten representative AL frameworks, highlighting the superiority and effectiveness of CEs and WorseNet. ", "keywords": "weakly supervised learning;active learning;partial label learning", "primary_area": "", "supplementary_material": "", "author": "Fei Zhang;Junjie Ye;Lei Feng;Zhongwen Rao;Jieming Zhu;Marcus Kalander;Chen Gong;Jianye HAO;Bo Han", "authorids": "~Fei_Zhang3;~Junjie_Ye1;~Lei_Feng1;~Zhongwen_Rao1;~Jieming_Zhu2;~Marcus_Kalander1;~Chen_Gong5;~Jianye_HAO1;~Bo_Han1", "gender": "M;;M;M;M;M;M;M;M", "homepage": ";;https://lfeng1995.github.io/;;https://jiemingzhu.github.io/;https://shaido987.github.io/;http://www.escience.cn/people/chengong/index.html;http://www.icdai.org/jianye.html;https://bhanml.github.io/", "dblp": ";19/8588.html;76/847-6;338/6279;10/2717;256/9291;21/8587-2;21/7664.html;241/0472-3", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com.sg/citations?user=KomQOFkAAAAJ;;oNKerP8AAAAJ;DtBioHoAAAAJ;https://scholar.google.com.hk/citations?user=guttoBwAAAAJ;;nTNjqHwAAAAJ", "orcid": ";;0000-0003-2839-5799;;0000-0002-5666-8320;;;0000-0002-0422-8235;", "linkedin": "ferenas97/;;;;;;;;", "or_profile": "~Fei_Zhang3;~Junjie_Ye1;~Lei_Feng1;~Zhongwen_Rao1;~Jieming_Zhu2;~Marcus_Kalander1;~Chen_Gong5;~Jianye_HAO1;~bo_han2", "aff": "Shanghai AI Lab;Huawei Technologies Ltd.;Nanyang Technological University;Huawei Technologies Ltd.;Huawei Noah's Ark Lab;Huawei Technologies Ltd.;Nanjing University of Science and Technology;Tianjin University;RIKEN", "aff_domain": "pjlab.org.cn;huawei.com;ntu.edu.sg;huawei.com;huawei.com;huawei.com;njust.edu.cn;tju.edu.cn;riken.jp", "position": "Researcher;Principal Researcher;Visiting Professor;Researcher;Researcher;Researcher;Full Professor;Associate Professor;Adjunct Scientist", "bibtex": "@misc{\nzhang2023active,\ntitle={Active Learning with Partial Labels},\nauthor={Fei Zhang and Junjie Ye and Lei Feng and Zhongwen Rao and Jieming Zhu and Marcus Kalander and Chen Gong and Jianye HAO and Bo Han},\nyear={2023},\nurl={https://openreview.net/forum?id=1VuBdlNBuR}\n}", "github": "", "project": "", "reviewers": "S15R;CXcH;6eQs;25Lo", "site": "https://openreview.net/forum?id=1VuBdlNBuR", "pdf_size": 2229071, "recommendation": "3;5;5;8", "confidence": "4;5;3;3", "correctness": "3;3;3;3", "technical_novelty": "3;3;2;4", "empirical_novelty": "3;3;2;4", "wc_summary_paper": "38;62;109;49", "wc_strength_and_weaknesses": "173;256;333;215", "wc_clarity_quality_novelty_and_reproducibility": "109;64;140;148", "wc_summary_review": "23;29;11;23", "wc_review": "343;411;593;435", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 64.5, 27.060118255469618 ], "wc_strength_and_weaknesses_avg": [ 244.25, 59.04817948082735 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 115.25, 32.98010764081888 ], "wc_summary_review_avg": [ 21.5, 6.5383484153110105 ], "wc_review_avg": [ 445.5, 91.60103711203274 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.46442036401282394, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IqYznuULWzYJ:scholar.google.com/&scioq=Active+Learning+with+Partial+Labels&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;1;1;1;3;4;5", "aff_unique_norm": "Shanghai AI Lab;Huawei;Nanyang Technological University;Nanjing University of Science and Technology;Tianjin University;RIKEN", "aff_unique_dep": ";Huawei Technologies;;;;", "aff_unique_url": "https://www.shanghaiailab.com;https://www.huawei.com;https://www.ntu.edu.sg;http://www.nust.edu.cn/;http://www.tju.edu.cn;https://www.riken.jp", "aff_unique_abbr": "SAIL;Huawei;NTU;NUST;TJU;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0;0;0;2", "aff_country_unique": "China;Singapore;Japan" }, { "id": "1Wo0vqaZ8WJ", "title": "Let Offline RL Flow: Training Conservative Agents in the Latent Space of Normalizing Flow", "track": "main", "status": "Reject", "tldr": "Latent-Variable Policy Optimization for Offline RL based on Normalizing Flows (outperforms both PLAS and LAPO)", "abstract": "Offline reinforcement learning aims to train a policy on a pre-recorded and fixed dataset without any additional environment interactions. There are two major challenges in this setting: (1) extrapolation error caused by approximating the value of state-action pairs not well-covered by the training data and (2) distributional shift between behavior and inference policies. One way to tackle these problems is to induce conservatism - i.e., keeping the learned policies closer to the behavioral ones. To achieve this, we build upon recent works on learning policies in latent action spaces and use a special form of normalizing flow for constructing a generative model, which we use as a conservative action encoder. This normalizing flow action encoder is pre-trained in a supervised manner on the offline dataset, and then an additional policy model - controller in the latent space - is trained via reinforcement learning. This approach avoids querying actions outside of the training dataset and therefore does not require additional regularization for out-of-dataset actions. We evaluate our method on various locomotion and navigation tasks, demonstrating that our approach outperforms recently proposed algorithms with generative action models on a large portion of datasets.", "keywords": "Offline Reinforcement Learning;Normalizing Flows", "primary_area": "", "supplementary_material": "/attachment/6da2907885fc3920ea535eca50b45dca1941aeb2.zip", "author": "Dmitry Akimov;Vladislav Kurenkov;Alexander Nikulin;Denis Tarasov;Sergey Kolesnikov", "authorids": "~Dmitry_Akimov2;~Vladislav_Kurenkov1;~Alexander_Nikulin1;~Denis_Tarasov1;~Sergey_Kolesnikov1", "gender": ";M;M;;M", "homepage": ";https://vkurenkov.me;https://howuhh.github.io/;https://dt6a.github.io/;https://scitator.com", "dblp": ";251/9126;314/6349;255/7697;191/1945", "google_scholar": "l7lXoM4AAAAJ;w09vtVsAAAAJ;yACvnqUAAAAJ;LQcCkD8AAAAJ;iukbpVEAAAAJ", "orcid": ";0000-0003-4078-1086;;0000-0001-9744-5265;", "linkedin": ";;;tarasovdeal/;scitator/", "or_profile": "~Dmitry_Akimov2;~Vladislav_Kurenkov1;~Alexander_Nikulin1;~Denis_Tarasov1;~Sergey_Kolesnikov1", "aff": "Tinkoff;Tinkoff;Higher School of Economics, Higher School of Economics;Jacobs University Bremen;Tinkoff", "aff_domain": "tinkoff.ai;tinkoff.ai;edu.hse.ru;jacobs-university.de;tinkoff.ru", "position": "Researcher;Researcher;MS student;Undergrad student;Principal Researcher", "bibtex": "@misc{\nakimov2023let,\ntitle={Let Offline {RL} Flow: Training Conservative Agents in the Latent Space of Normalizing Flow},\nauthor={Dmitry Akimov and Vladislav Kurenkov and Alexander Nikulin and Denis Tarasov and Sergey Kolesnikov},\nyear={2023},\nurl={https://openreview.net/forum?id=1Wo0vqaZ8WJ}\n}", "github": "", "project": "", "reviewers": "DZCL;gFEy;17pi;iyr1", "site": "https://openreview.net/forum?id=1Wo0vqaZ8WJ", "pdf_size": 13992184, "recommendation": "5;5;6;6", "confidence": "4;3;4;5", "correctness": "3;2;4;3", "technical_novelty": "3;3;2;2", "empirical_novelty": "2;0;3;2", "wc_summary_paper": "90;50;125;173", "wc_strength_and_weaknesses": "42;165;284;245", "wc_clarity_quality_novelty_and_reproducibility": "29;19;1;518", "wc_summary_review": "513;22;59;73", "wc_review": "674;256;469;1009", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "85;178;124;515", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 109.5, 45.2575960475145 ], "wc_strength_and_weaknesses_avg": [ 184.0, 92.5283740265655 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 141.75, 217.45962268890287 ], "wc_summary_review_avg": [ 166.75, 200.77397117156397 ], "wc_review_avg": [ 602.0, 277.59592936496745 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 225.5, 170.3738536278381 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.7071067811865475, "corr_recommendation_correctness": 0.7071067811865475, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8549049973729068251&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Tinkoff Bank;Higher School of Economics;Jacobs University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tinkoff.ru;https://www.hse.ru;https://www.jacobs-university.de", "aff_unique_abbr": "Tinkoff;HSE;JUB", "aff_campus_unique_index": "1", "aff_campus_unique": ";Bremen", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Russian Federation;Germany" }, { "id": "1YE_zTFICdr", "title": "Deep Attention Pooling Graph Neural Network for Text Classification", "track": "main", "status": "Desk Reject", "tldr": "A fresh model based on GNN with dual adjacency matrix, and attention pooling for text classification.", "abstract": "Graph Neural Networks (GNN) is a classical method that has been applied to document classification as a compelling message-passing framework inside and between documents. Consider the graph-based models are transductive when representing the documents as nodes in one graph(inter-documents), and require high memory and time efficiency to employ the GNN to each document after aligning the documents to the longest one(intra-documents). This paper proposes a novel method named Deep Attention Pooling Graph Neural Networks (DAPG) to use the structure of each document for inductive document classification. The attention pooling layer (APL) in DAPG adaptively selects nodes to form smaller graphs based on their scalar attention values to alleviate resource consumption. Additionally, regarding the structural variation, a fresh dual adjacency matrix for individual graphs based on the word co-occurrence and the word distance has been built to conquer the sparsity and keep stability after pooling. Experiments conducted on five standard text classification datasets show that our method is competitive with the state-of-the-art. Ablation studies reveal further insights into the impact of the different components on performance.", "keywords": "GNN;Attention;Pooling;Adjacency matrix;Text Classification", "primary_area": "", "supplementary_material": "", "author": "jiejie fan;Xiaojuan Ban;Manman Yuan", "authorids": "~jiejie_fan1;~Xiaojuan_Ban1;~Manman_Yuan1", "gender": "M;F;M", "homepage": ";http://scce.ustb.edu.cn/shiziduiwu/jiaoshixinxi/2018-04-10/37.html;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;0000-0003-4817-9372", "linkedin": "https://www.linkedin.cn/incareer/in/%E6%8D%B7%E6%9D%B0-%E6%A8%8A-73777010b;;", "or_profile": "~jiejie_fan1;~Xiaojuan_Ban1;~Manman_Yuan1", "aff": ";;Inner Mongolia University", "aff_domain": ";;imu.edu.cn", "position": ";;Full Professor", "bibtex": "@misc{\nfan2023deep,\ntitle={Deep Attention Pooling Graph Neural Network for Text Classification},\nauthor={jiejie fan and Xiaojuan Ban and Manman Yuan},\nyear={2023},\nurl={https://openreview.net/forum?id=1YE_zTFICdr}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=1YE_zTFICdr", "pdf_size": 666656, "recommendation": "", "confidence": "", "correctness": "", "technical_novelty": "", "empirical_novelty": "", "wc_summary_paper": "", "wc_strength_and_weaknesses": "", "wc_clarity_quality_novelty_and_reproducibility": "", "wc_summary_review": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "recommendation_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "correctness_avg": [ 0, 0 ], "technical_novelty_avg": [ 0, 0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 0, 0 ], "wc_strength_and_weaknesses_avg": [ 0, 0 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 0, 0 ], "wc_summary_review_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 1, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0, "corr_recommendation_correctness": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3zdUvMl2sNsJ:scholar.google.com/&scioq=Deep+Attention+Pooling+Graph+Neural+Network+for+Text+Classification&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Inner Mongolia University", "aff_unique_dep": "", "aff_unique_url": "http://www.imu.edu.cn/", "aff_unique_abbr": "IMU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Learning Fair Graph Representations via Automated Data Augmentations", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11107", "id": "1_OGWcP1s9w", "poster": "", "openreview": "https://openreview.net/forum?id=1_OGWcP1s9w", "slides": "https://iclr.cc/virtual/2023/poster/11107", "video": "https://iclr.cc/virtual/2023/poster/11107", "author_site": "Hongyi Ling, Zhimeng Jiang, Youzhi Luo, Shuiwang Ji, Na Zou", "tldr": "We propose an automated graph data augmentation method to learn fair graph representations.", "abstract": "We consider fair graph representation learning via data augmentations. While this direction has been explored previously, existing methods invariably rely on certain assumptions on the properties of fair graph data in order to design fixed strategies on data augmentations. Nevertheless, the exact properties of fair graph data may vary significantly in different scenarios. Hence, heuristically designed augmentations may not always generate fair graph data in different application scenarios. In this work, we propose a method, known as Graphair, to learn fair representations based on automated graph data augmentations. Such fairness-aware augmentations are themselves learned from data. Our Graphair is designed to automatically discover fairness-aware augmentations from input graphs in order to circumvent sensitive information while preserving other useful information. Experimental results demonstrate that our Graphair consistently outperforms many baselines on multiple node classification datasets in terms of fairness-accuracy trade-off performance. In addition, results indicate that Graphair can automatically learn to generate fair graph data without prior knowledge on fairness-relevant graph properties.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Hongyi Ling;Zhimeng Jiang;Youzhi Luo;Shuiwang Ji;Na Zou", "authorids": "~Hongyi_Ling1;~Zhimeng_Jiang1;~Youzhi_Luo1;~Shuiwang_Ji1;~Na_Zou2", "gender": ";M;M;M;F", "homepage": ";http://www.zhimengjiang.com/;https://lyzustc.github.io/;http://people.tamu.edu/~sji;https://nzou1.github.io/", "dblp": "259/0934;217/3235;280/0590;84/6405;152/0090-1.html", "google_scholar": "ei8O1BEAAAAJ;5Es3Yk4AAAAJ;3lqQFIoAAAAJ;BZGj6sAAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0001-6933-3952;0000-0002-3763-0239;0000-0002-4205-4563;0000-0003-1984-795X", "linkedin": ";;youzhi-luo-139981172/;shuiwang-ji-9a040715/;na-zou-a1721535/", "or_profile": "~Hongyi_Ling1;~Zhimeng_Jiang1;~Youzhi_Luo1;~Shuiwang_Ji1;~Na_Zou2", "aff": "Texas A&M University - College Station;Texas A&M University;Texas A&M University;Texas A&M University;Texas A&M University - College Station", "aff_domain": "tamu.edu;tamu.edu;tamu.edu;tamu.edu;tamu.edu", "position": "PhD student;PhD student;PhD student;Professor;Assistant Professor", "bibtex": "@inproceedings{\nling2023learning,\ntitle={Learning Fair Graph Representations via Automated Data Augmentations},\nauthor={Hongyi Ling and Zhimeng Jiang and Youzhi Luo and Shuiwang Ji and Na Zou},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=1_OGWcP1s9w}\n}", "github": "", "project": "", "reviewers": "fjjw;QdJb;oCtR;fgyY", "pdf_size": 1625684, "recommendation": "6;8;8;8", "confidence": "4;4;3;4", "correctness": "3;3;4;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;0", "wc_summary_paper": "72;47;35;73", "wc_strength_and_weaknesses": "144;142;110;165", "wc_clarity_quality_novelty_and_reproducibility": "27;22;32;104", "wc_summary_review": "9;57;23;58", "wc_review": "252;268;200;400", "wc_reply_reviewers": "13;9;8;12", "wc_reply_authors": "473;301;243;475", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 56.75, 16.315253599009733 ], "wc_strength_and_weaknesses_avg": [ 140.25, 19.651653874419832 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.25, 33.52890543993347 ], "wc_summary_review_avg": [ 36.75, 21.33512362279628 ], "wc_review_avg": [ 280.0, 73.70210309075311 ], "wc_reply_reviewers_avg": [ 10.5, 2.0615528128088303 ], "wc_reply_authors_avg": [ 373.0, 103.06308747558458 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16332112528418321758&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=1_OGWcP1s9w", "email": "tamu.edu;tamu.edu;tamu.edu;tamu.edu;tamu.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Station;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "1_ZHr9Ha_ZJ", "title": "Multi-Reward Fusion: Learning from Other Policies by Distilling", "track": "main", "status": "Withdraw", "tldr": "Multi-Reward Fusion: Learn from other policies by distilling ", "abstract": "Designing rewards is crucial for applying reinforcement learning in practice. However, it is difficult to design a shaping reward which can accelerate agents' learning process without biasing the original task's optimization objective. Moreover, the low-dimensional representation of the reward and value function (i.e. scalar value) may also be an obstruction during the learning process. This paper contributes towards tackling these challenges, by proposing a new method, called Multi-Reward Fusion (MRF). MRF take as input a list of human designed rewards, which contains the information from multiple perspectives about the task, and learns separate policies for each component of the reward list. We formulate the problem of learning the target policy as a distillation task, propose a novel method which can selectively distills knowledge from the auxiliary policies, and theoretically show the feasibility of this method. We conduct extensive experiments and show that the MRF method performs better than state-of-the-art reward shaping methods.", "keywords": "Energy-Based;Policy Distilling;Reinforcement Learning;Auto Reward Shaping", "primary_area": "", "supplementary_material": "/attachment/9b580c17dd02654f50f5f496fa3c45477e8377b8.zip", "author": "Yiwen Zhu;Yujing Hu;Wenya Wei;Yuan Wang;Zhou Fang", "authorids": "~Yiwen_Zhu2;~Yujing_Hu2;wwy_vivian@qq.com;~Yuan_Wang7;zfang@zju.edu.cn", "gender": ";;;M;", "homepage": ";;;;", "dblp": ";https://dblp.uni-trier.de/pid/160/1923.html;;;", "google_scholar": ";IR5WY-wAAAAJ;;;", "orcid": ";;;0000-0002-9922-069X;", "linkedin": ";;;;", "or_profile": "~Yiwen_Zhu2;~Yujing_Hu2;wwy_vivian@qq.com;~Yuan_Wang7;zfang@zju.edu.cn", "aff": ";NetEase, Inc.;;Hainan University;", "aff_domain": ";corp.netease.com;;hainu.edu.cn;", "position": ";Researcher;;MS student;", "bibtex": "@misc{\nzhu2023multireward,\ntitle={Multi-Reward Fusion: Learning from Other Policies by Distilling },\nauthor={Yiwen Zhu and Yujing Hu and Wenya Wei and Yuan Wang and Zhou Fang},\nyear={2023},\nurl={https://openreview.net/forum?id=1_ZHr9Ha_ZJ}\n}", "github": "", "project": "", "reviewers": "pvC2;9zKP;mofe", "site": "https://openreview.net/forum?id=1_ZHr9Ha_ZJ", "pdf_size": 1131096, "recommendation": "1;3;3", "confidence": "4;2;3", "correctness": "2;2;2", "technical_novelty": "1;2;3", "empirical_novelty": "1;2;2", "wc_summary_paper": "21;125;75", "wc_strength_and_weaknesses": "169;265;490", "wc_clarity_quality_novelty_and_reproducibility": "34;217;39", "wc_summary_review": "3;84;42", "wc_review": "227;691;646", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 2.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 73.66666666666667, 42.4682888230213 ], "wc_strength_and_weaknesses_avg": [ 308.0, 134.52880732393342 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 96.66666666666667, 85.11299678793024 ], "wc_summary_review_avg": [ 43.0, 33.075670817082454 ], "wc_review_avg": [ 521.3333333333334, 208.93433311822056 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pftMeT083ksJ:scholar.google.com/&scioq=Multi-Reward+Fusion:+Learning+from+Other+Policies+by+Distilling&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "NetEase, Inc.;Hainan University", "aff_unique_dep": ";", "aff_unique_url": "https://www.163.com;http://www.hainanu.edu.cn", "aff_unique_abbr": "NetEase;HNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Unsupervised Semantic Segmentation with Self-supervised Object-centric Representations", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11276", "id": "1_jFneF07YC", "poster": "/media/PosterPDFs/ICLR%202023/11276.png?t=1682844976.5395331", "openreview": "https://openreview.net/forum?id=1_jFneF07YC", "slides": "https://iclr.cc/virtual/2023/poster/11276", "video": "https://iclr.cc/virtual/2023/poster/11276", "author_site": "Andrii Zadaianchuk, Matth\u00e4us Kleindessner, Yi Zhu, Francesco Locatello, Thomas Brox", "tldr": "Strong and simple baseline for unsupervised segmentation methods obtained by leveraging and combining object-centric priors.", "abstract": "In this paper, we show that recent advances in self-supervised representation learning enable unsupervised object discovery and semantic segmentation with a performance that matches the state of the field on supervised semantic segmentation 10 years ago. We propose a methodology based on unsupervised saliency masks and self-supervised feature clustering to kickstart object discovery followed by training a semantic segmentation network on pseudo-labels to bootstrap the system on images with multiple objects. We show that while being conceptually simple our proposed baseline is surprisingly strong. We present results on PASCAL VOC that go far beyond the current state of the art (50.0 mIoU), and we report for the first time results on MS COCO for the whole set of 81 classes: our method discovers 34 categories with more than 20% IoU, while obtaining an average IoU of 19.6 for all 81 categories.", "keywords": "unsupervised semantic segmentation;object segmentation;object-centric learning", "primary_area": "", "supplementary_material": "", "author": "Andrii Zadaianchuk;Matthaeus Kleindessner;Yi Zhu;Francesco Locatello;Thomas Brox", "authorids": "~Andrii_Zadaianchuk1;matkle@amazon.de;~Yi_Zhu1;~Francesco_Locatello1;~Thomas_Brox1", "gender": "M;;M;M;M", "homepage": "https://zadaianchuk.github.io/;;https://bryanyzhu.github.io/;https://twitter.com/FrancescoLocat8;https://lmb.informatik.uni-freiburg.de/people/brox/index.en.html", "dblp": "274/9441;;;195/6074;97/4586", "google_scholar": ";;IXw4UiwAAAAJ;;https://scholar.google.com/citations?hl=de", "orcid": ";;0000-0002-6482-6712;;0000-0002-6282-8861", "linkedin": ";;yi-zhu-546a437a/;;", "or_profile": "~Andrii_Zadaianchuk1;matkle@amazon.de;~Yi_Zhu1;~Francesco_Locatello1;~Thomas_Brox1", "aff": "Max-Planck-Institute for Intelligent Systems, Max-Planck Institute;;Amazon;Amazon;University of Freiburg", "aff_domain": "is.mpg.de;;amazon.com;amazon.com;uni-freiburg.de", "position": "PhD student;;Applied Scientist;Senior Applied Scientist;Full Professor", "bibtex": "@inproceedings{\nzadaianchuk2023unsupervised,\ntitle={Unsupervised Semantic Segmentation with Self-supervised Object-centric Representations},\nauthor={Andrii Zadaianchuk and Matthaeus Kleindessner and Yi Zhu and Francesco Locatello and Thomas Brox},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=1_jFneF07YC}\n}", "github": "", "project": "", "reviewers": "6jsH;7EBx;hDe9;m7o9", "pdf_size": 8306180, "recommendation": "6;6;8;8", "confidence": "5;4;5;4", "correctness": "3;4;4;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;4;3", "wc_summary_paper": "59;96;51;78", "wc_strength_and_weaknesses": "578;211;224;203", "wc_clarity_quality_novelty_and_reproducibility": "44;75;48;147", "wc_summary_review": "78;55;31;48", "wc_review": "759;437;354;476", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "943;346;452;346", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 71.0, 17.449928366615147 ], "wc_strength_and_weaknesses_avg": [ 304.0, 158.3713989330144 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 78.5, 41.30677910464576 ], "wc_summary_review_avg": [ 53.0, 16.867127793433 ], "wc_review_avg": [ 506.5, 152.29330254479348 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 521.75, 247.02871796615065 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=85687609319380561&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=1_jFneF07YC", "email": "is.mpg.de;;amazon.com;amazon.com;uni-freiburg.de", "author_num": 5, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Max-Planck-Institute for Intelligent Systems;Amazon;University of Freiburg", "aff_unique_dep": "Intelligent Systems;Amazon.com, Inc.;", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.amazon.com;https://www.uni-freiburg.de", "aff_unique_abbr": "MPI-IS;Amazon;UoF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Germany;United States" }, { "title": "Exponential Generalization Bounds with Near-Optimal Rates for $L_q$-Stable Algorithms", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11546", "id": "1_jtWjhSSkr", "poster": "/media/PosterPDFs/ICLR%202023/11546.png?t=1682857934.4425292", "openreview": "https://openreview.net/forum?id=1_jtWjhSSkr", "slides": "https://iclr.cc/virtual/2023/poster/11546", "video": "https://iclr.cc/virtual/2023/poster/11546", "author_site": "Xiaotong Yuan, Ping Li", "tldr": "We presented a set of sharper and near-optimal exponential generalization bounds for $L_q$-stable learning algorithms", "abstract": "The \\emph{stability} of learning algorithms to changes in the training sample has been actively studied as a powerful proxy for reasoning about generalization. Recently, exponential generalization and excess risk bounds with near-optimal rates have been obtained under the stringent and distribution-free notion of uniform stability~\\citep{bousquet2020sharper,klochkov2021stability}. In the meanwhile, under the notion of $L_q$-stability, which is weaker and distribution dependent, exponential generalization bounds are also available yet so far only with sub-optimal rates. Therefore, a fundamental question we would like to address in this paper is whether it is possible to derive near-optimal exponential generalization bounds for $L_q$-stable learning algorithms. As the core contribution of the present work, we give an affirmative answer to this question by developing strict analogues of the near-optimal generalization and risk bounds of uniformly stable algorithms for $L_q$-stable algorithms. Further, we demonstrate the power of our improved $L_q$-stability and generalization theory by applying it to derive strong sparse excess risk bounds, under mild conditions, for computationally tractable sparsity estimation algorithms such as Iterative Hard Thresholding (IHT).", "keywords": "$L_q$-stability;Uniform stability;Moments inequality;Exponential generalization bound;Excess risk;Sparsity", "primary_area": "", "supplementary_material": "", "author": "Xiaotong Yuan;Ping Li", "authorids": "~Xiaotong_Yuan1;~Ping_Li3", "gender": "M;M", "homepage": "https://sites.google.com/site/xtyuan1980/;http://www.stat.rutgers.edu/home/pingli/", "dblp": "64/5926;62/5860-1", "google_scholar": "yzU6g24AAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Xiaotong_Yuan1;~Ping_Li3", "aff": "Nanjing University;LinkedIn", "aff_domain": "nju.edu.cn;linkedin.com", "position": "Full Professor;Engineer", "bibtex": "@inproceedings{\nyuan2023exponential,\ntitle={Exponential Generalization Bounds with Near-Optimal Rates for \\$L\\_q\\$-Stable Algorithms},\nauthor={Xiaotong Yuan and Ping Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=1_jtWjhSSkr}\n}", "github": "", "project": "", "reviewers": "6gau;LaZo;Ryqw;dfYo;Hu4j", "pdf_size": 427464, "recommendation": "6;8;8;8;8", "confidence": "4;3;3;3;3", "correctness": "3;4;4;4;3", "technical_novelty": "3;3;4;4;3", "empirical_novelty": "0;0;4;4;0", "wc_summary_paper": "82;108;103;33;114", "wc_strength_and_weaknesses": "346;240;27;42;807", "wc_clarity_quality_novelty_and_reproducibility": "30;53;29;20;80", "wc_summary_review": "27;28;21;44;633", "wc_review": "485;429;180;139;1634", "wc_reply_reviewers": "0;0;0;0;76", "wc_reply_authors": "612;350;84;11;843", "reply_reviewers": "0;0;0;0;1", "reply_authors": "1;1;1;1;2", "recommendation_avg": [ 7.6, 0.7999999999999999 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "correctness_avg": [ 3.6, 0.4898979485566356 ], "technical_novelty_avg": [ 3.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 1.6, 1.9595917942265426 ], "wc_summary_paper_avg": [ 88.0, 29.536418198556166 ], "wc_strength_and_weaknesses_avg": [ 292.4, 284.10181273620907 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.4, 21.731083728153088 ], "wc_summary_review_avg": [ 150.6, 241.32020222103245 ], "wc_review_avg": [ 573.4, 547.1755111479313 ], "wc_reply_reviewers_avg": [ 15.2, 30.400000000000002 ], "wc_reply_authors_avg": [ 380.0, 313.9649661984598 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.6123724356957948, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11490307693448301330&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=1_jtWjhSSkr", "email": "nju.edu.cn;linkedin.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Nanjing University;LinkedIn Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.linkedin.com", "aff_unique_abbr": "Nanjing U;LinkedIn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "id": "1bLT3dGNS0", "title": "Relational Curriculum Learning for Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "We propose a novel curriculum learning strategy to improve the generalization performance of graph neural network models by gradually involving edges from well-expected to less-expected in training.", "abstract": "Graph neural networks have achieved great success in representing structured data and its downstream tasks such as node classification. The key idea is to recursively propagate and aggregate information along the edges of a given graph topology. However, edges in real-world graphs often have varying degrees of difficulty, and some edges may even be noisy to the downstream tasks. Therefore, existing graph neural network models may lead to suboptimal learned representations because they usually consider every edge in a given graph topology equally. On the other hand, curriculum learning, which mimics the human learning principle of learning data samples in a meaningful order, has been shown to be effective in improving the generalization ability of representation learners by gradually proceeding from easy to more difficult samples during training. Unfortunately, most existing curriculum learning strategies are designed for i.i.d data samples and cannot be trivially generalized to handle structured data with dependencies. In order to address these issues, in this paper we propose a novel curriculum learning method for structured data to leverage the various underlying difficulties of data dependencies to improve the quality of learned representations on structured data. Specifically, we design a learning strategy that gradually incorporates edges in a given graph topology into training according to their difficulty from easy to hard, where the degree of difficulty is measured by a self-supervised learning paradigm. We demonstrate the strength of our proposed method in improving the generalization ability of learned representations through extensive experiments on nine synthetic datasets and seven real-world datasets with different commonly used graph neural network models as backbone models.", "keywords": "Graph neural networks;Curriculum learning", "primary_area": "", "supplementary_material": "/attachment/904fab3431361201f4bf11267dc06c38b90939d5.zip", "author": "Zheng Zhang;Junxiang Wang;Liang Zhao", "authorids": "~Zheng_Zhang10;~Junxiang_Wang1;~Liang_Zhao6", "gender": "M;M;M", "homepage": ";https://xianggebenben.github.io/Junxiang_Wang/;https://cs.emory.edu/~lzhao41/", "dblp": "181/2621-18;53/8843;63/5422-2", "google_scholar": "fRdZRHsAAAAJ;;qnvyqtwAAAAJ", "orcid": ";0000-0002-6635-4296;0000-0002-2648-9989", "linkedin": ";;", "or_profile": "~Zheng_Zhang10;~Junxiang_Wang1;~Liang_Zhao6", "aff": "Emory University;NEC Labs America;Emory University", "aff_domain": "emory.edu;nec.com;emory.edu", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@misc{\nzhang2023relational,\ntitle={Relational Curriculum Learning for Graph Neural Networks},\nauthor={Zheng Zhang and Junxiang Wang and Liang Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=1bLT3dGNS0}\n}", "github": "", "project": "", "reviewers": "yJ3m;fwhF;f5Ws", "site": "https://openreview.net/forum?id=1bLT3dGNS0", "pdf_size": 534716, "recommendation": "5;6;6", "confidence": "3;4;4", "correctness": "2;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "59;56;60", "wc_strength_and_weaknesses": "216;366;94", "wc_clarity_quality_novelty_and_reproducibility": "30;4;37", "wc_summary_review": "27;4;28", "wc_review": "332;430;219", "wc_reply_reviewers": "0;35;0", "wc_reply_authors": "2178;1243;788", "reply_reviewers": "0;1;0", "reply_authors": "5;2;2", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 58.333333333333336, 1.699673171197595 ], "wc_strength_and_weaknesses_avg": [ 225.33333333333334, 111.23948140036532 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 23.666666666666668, 14.197026292697903 ], "wc_summary_review_avg": [ 19.666666666666668, 11.08552609887726 ], "wc_review_avg": [ 327.0, 86.21291473246144 ], "wc_reply_reviewers_avg": [ 11.666666666666666, 16.49915822768611 ], "wc_reply_authors_avg": [ 1403.0, 578.6334475872153 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.9999999999999997, "corr_recommendation_correctness": 0.9999999999999997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13924603878430840683&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Emory University;NEC Labs America", "aff_unique_dep": ";", "aff_unique_url": "https://www.emory.edu;https://www.nec-labs.com", "aff_unique_abbr": "Emory;NEC LA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "1ehuYMrigt", "title": "Learning from Asymmetrically-corrupted Data in Regression for Sensor Magnitude", "track": "main", "status": "Reject", "tldr": "This paper addresses a regression problem for sensor magnitude in which a low value of labels can also mean incomplete observation. We derive an unbiased learning algorithm with a regression learned from data without incomplete observations.", "abstract": "This paper addresses a regression problem in which output label values represent the results of sensing the magnitude of a phenomenon. A low value of such labels can either mean that the actual magnitude of the phenomenon has been low or that the sensor has made an incomplete observation. This leads to a bias toward lower values in labels and its resultant learning because labels for incomplete observations are recorded as lower than those for typical observations, even if both have monitored similar phenomena. Moreover, because an incomplete observation does not provide any tags indicating incompleteness, we cannot eliminate or impute them. To address this issue, we propose a learning algorithm that explicitly models the incomplete observations to be corrupted with an asymmetric noise that always has a negative value. We show that our algorithm is unbiased with a regression learned from the uncorrupted data that does not involve incomplete observations. We demonstrate the advantages of our algorithm through numerical experiments.", "keywords": "regression;sensor data analytics;healthcare", "primary_area": "", "supplementary_material": "", "author": "Takayuki Katsuki;Takayuki Osogami", "authorids": "~Takayuki_Katsuki2;~Takayuki_Osogami1", "gender": ";M", "homepage": "https://research.ibm.com/people/takayuki-katsuki;https://sites.google.com/site/takayukiosogami/", "dblp": "01/10264;95/5631", "google_scholar": "bZZ0I4UAAAAJ;wtOZ8wwAAAAJ", "orcid": "0000-0002-3670-1138;", "linkedin": ";takayuki-osogami-1151853/?ppe=1", "or_profile": "~Takayuki_Katsuki2;~Takayuki_Osogami1", "aff": "International Business Machines;International Business Machines", "aff_domain": "ibm.com;ibm.com", "position": "Research staff member;Principal Researcher", "bibtex": "@misc{\nkatsuki2023learning,\ntitle={Learning from Asymmetrically-corrupted Data in Regression for Sensor Magnitude},\nauthor={Takayuki Katsuki and Takayuki Osogami},\nyear={2023},\nurl={https://openreview.net/forum?id=1ehuYMrigt}\n}", "github": "", "project": "", "reviewers": "QdLV;Sduq;mxCJ;gU2i", "site": "https://openreview.net/forum?id=1ehuYMrigt", "pdf_size": 4662294, "recommendation": "1;5;6;6", "confidence": "3;3;4;3", "correctness": "2;3;3;4", "technical_novelty": "1;2;3;3", "empirical_novelty": "0;2;4;3", "wc_summary_paper": "44;85;68;74", "wc_strength_and_weaknesses": "396;163;148;192", "wc_clarity_quality_novelty_and_reproducibility": "37;42;30;31", "wc_summary_review": "25;34;53;46", "wc_review": "502;324;299;343", "wc_reply_reviewers": "0;24;11;0", "wc_reply_authors": "709;539;204;545", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;2", "recommendation_avg": [ 4.5, 2.0615528128088303 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 1.479019945774904 ], "wc_summary_paper_avg": [ 67.75, 15.006248698458919 ], "wc_strength_and_weaknesses_avg": [ 224.75, 100.12835512480967 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.0, 4.847679857416329 ], "wc_summary_review_avg": [ 39.5, 10.781929326423912 ], "wc_review_avg": [ 367.0, 79.48899294870957 ], "wc_reply_reviewers_avg": [ 8.75, 9.883698700385398 ], "wc_reply_authors_avg": [ 499.25, 183.603342834492 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.42008402520840293, "corr_recommendation_correctness": 0.8574929257125441, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:z1bliU2C8MMJ:scholar.google.com/&scioq=Learning+from+Asymmetrically-corrupted+Data+in+Regression+for+Sensor+Magnitude&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "International Business Machines Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.ibm.com", "aff_unique_abbr": "IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Masked Image Modeling with Denoising Contrast", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11879", "id": "1fZd4owfJP6", "poster": "", "openreview": "https://openreview.net/forum?id=1fZd4owfJP6", "slides": "https://iclr.cc/virtual/2023/poster/11879", "video": "https://iclr.cc/virtual/2023/poster/11879", "author_site": "Kun Yi, Yixiao Ge, Xiaotong Li, Shusheng Yang, Dian Li, Jianping Wu, Ying Shan, Xiaohu Qie", "tldr": "We first treat masked patch prediction as denoising contrastive learning in self-supervised image pre-training, achieving state-of-the-art results.", "abstract": "Since the development of self-supervised visual representation learning from contrastive learning to masked image modeling (MIM), there is no significant difference in essence, that is, how to design proper pretext tasks for vision dictionary look-up. MIM recently dominates this line of research with state-of-the-art performance on vision Transformers (ViTs), where the core is to enhance the patch-level visual context capturing of the network via denoising auto-encoding mechanism. Rather than tailoring image tokenizers with extra training stages as in previous works, we unleash the great potential of contrastive learning on de- noising auto-encoding and introduce a pure MIM method, ConMIM, to produce simple intra-image inter-patch contrastive constraints as the sole learning objectives for masked patch prediction. We further strengthen the denoising mechanism with asymmetric designs, including image perturbations and model progress rates, to improve the network pre-training. ConMIM-pretrained models with various scales achieve competitive results on downstream image classification, semantic segmentation, object detection, and instance segmentation tasks, e.g., on ImageNet-1K classification, we achieve 83.9% top-1 accuracy with ViT-Small and 85.3% with ViT-Base without extra data for pre-training. Code will be available at https://github.com/TencentARC/ConMIM.\n", "keywords": "masked image modeling;self-supervised learning;image pre-training", "primary_area": "", "supplementary_material": "", "author": "Kun Yi;Yixiao Ge;Xiaotong Li;Shusheng Yang;Dian Li;Jianping Wu;Ying Shan;Xiaohu Qie", "authorids": "~Kun_Yi1;~Yixiao_Ge2;~Xiaotong_Li2;~Shusheng_Yang1;~Dian_Li1;~Jianping_Wu1;~Ying_Shan2;~Xiaohu_Qie1", "gender": "M;F;M;M;M;M;M;", "homepage": "http://www.lamda.nju.edu.cn/yik/;https://geyixiao.com/;https://github.com/lixiaotong97;https://shushengyang.com;;https://www.tsinghua.edu.cn/publish/csen/4623/2010/20101224194435414856631/20101224194435414856631_.html;;", "dblp": "202/8470;228/6649;;290/1972;68/4844;;68/5910;62/1827", "google_scholar": "6xtzo4AAAAAJ;TtU74NAAAAAJ;cpCE_T4AAAAJ;v6dmW5cntoMC;https://scholar.google.com.hk/citations?user=rF7HU94AAAAJ;https://scholar.google.com.tw/citations?user=Y-nqSYgAAAAJ;4oXBp9UAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;0000-0001-7673-8325;", "linkedin": ";;;shushengyang/;;;YingShanProfile/;", "or_profile": "~Kun_Yi1;~Yixiao_Ge2;~Xiaotong_Li2;~Shusheng_Yang1;~Dian_Li1;~Jianping_Wu1;~Ying_Shan2;~Xiaohu_Qie1", "aff": "Tencent ARC Lab;Tencent;Peking University;;Tencent PCG AI;;Tencent PCG ARC Lab;Tencent", "aff_domain": "tencent.com;tencent.com;pku.edu.cn;;tencent.com;;arc.tencent.com;tencent.com", "position": "Researcher;Researcher;PhD student;;Principal Researcher;;Director;VP", "bibtex": "@inproceedings{\nyi2023masked,\ntitle={Masked Image Modeling with Denoising Contrast},\nauthor={Kun Yi and Yixiao Ge and Xiaotong Li and Shusheng Yang and Dian Li and Jianping Wu and Ying Shan and Xiaohu Qie},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=1fZd4owfJP6}\n}", "github": "", "project": "", "reviewers": "LV22;oXdu;1DfA", "pdf_size": 1689955, "recommendation": "5;6;8", "confidence": "4;4;3", "correctness": "3;3;4", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "64;82;58", "wc_strength_and_weaknesses": "364;151;280", "wc_clarity_quality_novelty_and_reproducibility": "39;55;81", "wc_summary_review": "45;75;59", "wc_review": "512;363;478", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "957;762;623", "reply_reviewers": "0;0;0", "reply_authors": "3;2;1", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 68.0, 10.198039027185569 ], "wc_strength_and_weaknesses_avg": [ 265.0, 87.60136985230311 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 58.333333333333336, 17.30767331432956 ], "wc_summary_review_avg": [ 59.666666666666664, 12.256517540566822 ], "wc_review_avg": [ 451.0, 63.75473838599502 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 780.6666666666666, 136.99229499825486 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.944911182523068, "corr_recommendation_correctness": 0.944911182523068, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17044496310888240815&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=1fZd4owfJP6", "email": "tencent.com;tencent.com;pku.edu.cn;;tencent.com;;arc.tencent.com;tencent.com", "author_num": 8, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Tencent;Peking University", "aff_unique_dep": "ARC Lab;", "aff_unique_url": "https://www.tencent.com;http://www.pku.edu.cn", "aff_unique_abbr": "Tencent ARC Lab;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "gDDIM: Generalized denoising diffusion implicit models", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10905", "id": "1hKE9qjvz-", "poster": "", "openreview": "https://openreview.net/forum?id=1hKE9qjvz-", "slides": "https://iclr.cc/virtual/2023/poster/10905", "video": "https://iclr.cc/virtual/2023/poster/10905", "author_site": "Qinsheng Zhang, Molei Tao, Yongxin Chen", "tldr": "a small but delicate modification in parameterization to accelerate general diffusion models", "abstract": "Our goal is to extend the denoising diffusion implicit model (DDIM) to general diffusion models~(DMs) besides isotropic diffusions. Instead of constructing a non-Markov noising process as in the original DDIM, we examine the mechanism of DDIM from a numerical perspective. We discover that the DDIM can be obtained by using some specific approximations of the score when solving the corresponding stochastic differential equation. We present an interpretation of the accelerating effects of DDIM that also explains the advantages of a deterministic sampling scheme over the stochastic one for fast sampling. Building on this insight, we extend DDIM to general DMs, coined generalized DDIM (gDDIM), with a small but delicate modification in parameterizing the score network. We validate gDDIM in two non-isotropic DMs: Blurring diffusion model (BDM) and Critically-damped Langevin diffusion model (CLD). We observe more than 20 times acceleration in BDM. In the CLD, a diffusion model by augmenting the diffusion process with velocity, our algorithm achieves an FID score of 2.26, on CIFAR10, with only 50 number of score function evaluations~(NFEs) and an FID score of 2.86 with only 27 NFEs.", "keywords": "Fast sampling;diffusion model", "primary_area": "", "supplementary_material": "/attachment/37f034deb16520ec8ca12fdd83464d87fdd6bf3d.zip", "author": "Qinsheng Zhang;Molei Tao;Yongxin Chen", "authorids": "~Qinsheng_Zhang1;~Molei_Tao1;~Yongxin_Chen1", "gender": "M;;M", "homepage": "https://qsh-zh.github.io/;http://people.math.gatech.edu/~mtao8/;https://yongxin.ae.gatech.edu/", "dblp": ";56/9263;", "google_scholar": ";;X8BYiV4AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Qinsheng_Zhang1;~Molei_Tao1;~Yongxin_Chen1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023gddim,\ntitle={g{DDIM}: Generalized denoising diffusion implicit models},\nauthor={Qinsheng Zhang and Molei Tao and Yongxin Chen},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=1hKE9qjvz-}\n}", "github": "", "project": "", "reviewers": "4f5G;85mY;vCLj;PjHX", "pdf_size": 11447884, "recommendation": "6;8;8;8", "confidence": "3;3;4;3", "correctness": "3;3;4;4", "technical_novelty": "3;4;4;1", "empirical_novelty": "2;3;4;3", "wc_summary_paper": "120;23;50;74", "wc_strength_and_weaknesses": "326;99;136;76", "wc_clarity_quality_novelty_and_reproducibility": "29;184;31;29", "wc_summary_review": "50;45;32;40", "wc_review": "525;351;249;219", "wc_reply_reviewers": "196;0;15;0", "wc_reply_authors": "924;893;291;163", "reply_reviewers": "1;0;1;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 1.224744871391589 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 66.75, 35.64670391494843 ], "wc_strength_and_weaknesses_avg": [ 159.25, 98.62397021008636 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.25, 66.83328137986344 ], "wc_summary_review_avg": [ 41.75, 6.6473679001541655 ], "wc_review_avg": [ 336.0, 119.5867885679685 ], "wc_reply_reviewers_avg": [ 52.75, 82.93182441017441 ], "wc_reply_authors_avg": [ 567.75, 343.9166868588961 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 129, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10430020682680804243&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=1hKE9qjvz-", "email": "gatech.edu;gatech.edu;gatech.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "1jDN-RfQfrb", "title": "Unveiling Transformers with LEGO: A Synthetic Reasoning Task", "track": "main", "status": "Reject", "tldr": "We propose a synthetic task for logical reasoning on which we study transformer models' intriguing behaviors regarding generalization and the role of pre-training; we gain insights leading to large-scale practical improvements.", "abstract": "We propose a synthetic reasoning task, LEGO (Learning Equality and Group Operations), that encapsulates the problem of following a chain of reasoning, and we study how the Transformer architectures learn this task. We pay special attention to data effects such as pretraining (on seemingly unrelated NLP tasks) and dataset composition (e.g., differing chain length at training and test time), as well as architectural variants such as weight-tied layers or adding convolutional components. We study how the trained models eventually succeed at the task, and in particular, we are able to understand (to some extent) some of the attention heads as well as how the information flows in the network. Based on these observations we propose a hypothesis that here pretraining helps for LEGO tasks due to certain structured attention patterns, and we experimentally verify this hypothesis. We also observe that in some data regimes the trained transformer finds ``shortcut\" solutions to follow the chain of reasoning, which impedes the model's robustness, and moreover we propose ways to prevent it. Motivated by our findings on structured attention patterns, we propose to replace certain attention heads with hardcoded patterns. This architectural change significantly reduces Flops and maintains or even improves the model's performance at large-scale pretraining.", "keywords": "transformers;logical reasoning;role of pretraining;attention pattern", "primary_area": "", "supplementary_material": "/attachment/deac87cacfa1f143fbac8d5e37aa3c050a8c6370.zip", "author": "Yi Zhang;Arturs Backurs;Sebastien Bubeck;Ronen Eldan;Suriya Gunasekar;Tal Wagner", "authorids": "~Yi_Zhang1;~Arturs_Backurs1;~Sebastien_Bubeck1;~Ronen_Eldan1;~Suriya_Gunasekar1;~Tal_Wagner1", "gender": "M;;;M;;M", "homepage": "https://yi-zhang.me;http://www.mit.edu/~backurs/;http://sbubeck.com/;;http://sgunasekar.github.io;http://www.mit.edu/~talw/", "dblp": "64/6544-74;74/10669;35/4292;85/9583;;https://dblp.org/pers/hd/w/Wagner:Tal", "google_scholar": "lc6CVqEAAAAJ;UNHdIKoAAAAJ;V2Y1L4sAAAAJ;;EkREu_QAAAAJ;gV4dPToAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;tal-wagner-22645857/", "or_profile": "~Yi_Zhang1;~Arturs_Backurs1;~Sebastien_Bubeck1;~Ronen_Eldan1;~Suriya_Gunasekar1;~Tal_Wagner1", "aff": "Microsoft;Microsoft;Microsoft;Microsoft Research;Microsoft;Amazon", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;amazon.com", "position": "Postdoc;Researcher;Researcher;Principal Researcher;Senior Researcher;Researcher", "bibtex": "@misc{\nzhang2023unveiling,\ntitle={Unveiling Transformers with {LEGO}: A Synthetic Reasoning Task},\nauthor={Yi Zhang and Arturs Backurs and Sebastien Bubeck and Ronen Eldan and Suriya Gunasekar and Tal Wagner},\nyear={2023},\nurl={https://openreview.net/forum?id=1jDN-RfQfrb}\n}", "github": "", "project": "", "reviewers": "3VMG;s6py;drii;7SQ8", "site": "https://openreview.net/forum?id=1jDN-RfQfrb", "pdf_size": 1216225, "recommendation": "3;6;6;8", "confidence": "4;4;3;3", "correctness": "1;4;4;4", "technical_novelty": "1;3;2;3", "empirical_novelty": "1;3;2;3", "wc_summary_paper": "82;68;97;130", "wc_strength_and_weaknesses": "176;45;333;110", "wc_clarity_quality_novelty_and_reproducibility": "3;53;28;1", "wc_summary_review": "48;39;47;106", "wc_review": "309;205;505;347", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "478;116;202;86", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 1.299038105676658 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 94.25, 23.047505287991584 ], "wc_strength_and_weaknesses_avg": [ 166.0, 106.96494752955287 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 21.25, 21.194043974664204 ], "wc_summary_review_avg": [ 60.0, 26.78619047195775 ], "wc_review_avg": [ 341.5, 107.76247027606597 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 220.5, 154.64394588861214 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.7001400420140049, "corr_recommendation_correctness": 0.8892972917998875, "gs_citation": 79, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15909295041795604063&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Microsoft;Amazon", "aff_unique_dep": "Microsoft Corporation;Amazon.com, Inc.", "aff_unique_url": "https://www.microsoft.com;https://www.amazon.com", "aff_unique_abbr": "Microsoft;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "1kTxYvMRR8N", "title": "AdaptFSP: Adaptive Fictitious Self Play", "track": "main", "status": "Withdraw", "tldr": "Use deep rl to modify FSP for better performance in continuous control games", "abstract": "Fictitious Self-Play (FSP) is an iterative algorithm capable of learning approximate Nash equilibria in many types of two-player zero-sum games. In FSP, at each iteration, a best response is learned to the opponent's meta strategy. However, FSP can be slow to converge in continuous control games in which two embodied agents compete against one another. We propose Adaptive FSP (AdaptFSP), a deep reinforcement learning (RL) algorithm inspired by FSP. The main idea is that instead of training a best response only against the meta strategy, we additionally train against an adaptive deep RL agent that can adapt to the best response. In four test domains, two tabular cases--random normal-form matrix games, Leduc poker--and two continuous control tasks--Thou Shall Not Pass and a soccer environment--we show that AdaptFSP achieves lower exploitability more quickly than vanilla FSP.", "keywords": "Deep reinforcement learning;game theory;exploitability", "primary_area": "", "supplementary_material": "", "author": "Maxwell Goldstein;Noam Brown", "authorids": "~Maxwell_Goldstein1;~Noam_Brown2", "gender": ";", "homepage": "https://wp.nyu.edu/cilvr/;http://www.cs.cmu.edu/~noamb", "dblp": ";https://dblp.uni-trier.de/pers/hd/b/Brown:Noam", "google_scholar": ";RLDbLcUAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Maxwell_Goldstein1;~Noam_Brown2", "aff": "New York University;Meta Facebook", "aff_domain": "nyu.edu;facebook.com", "position": "PhD student;Research Scientist", "bibtex": "@misc{\ngoldstein2023adaptfsp,\ntitle={Adapt{FSP}: Adaptive Fictitious Self Play},\nauthor={Maxwell Goldstein and Noam Brown},\nyear={2023},\nurl={https://openreview.net/forum?id=1kTxYvMRR8N}\n}", "github": "", "project": "", "reviewers": "kD46;3rvw;8kmB;HJtD", "site": "https://openreview.net/forum?id=1kTxYvMRR8N", "pdf_size": 2854769, "recommendation": "3;3;3;3", "confidence": "3;3;3;3", "correctness": "2;3;4;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;2;1", "wc_summary_paper": "104;54;104;25", "wc_strength_and_weaknesses": "87;199;271;180", "wc_clarity_quality_novelty_and_reproducibility": "28;79;51;9", "wc_summary_review": "8;51;19;11", "wc_review": "227;383;445;225", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 71.75, 33.840619084171614 ], "wc_strength_and_weaknesses_avg": [ 184.25, 65.61011736005355 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.75, 26.14741861063918 ], "wc_summary_review_avg": [ 22.25, 17.07886120325357 ], "wc_review_avg": [ 320.0, 96.52460826131335 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VslxlVaoU7kJ:scholar.google.com/&scioq=AdaptFSP:+Adaptive+Fictitious+Self+Play&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "New York University;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.nyu.edu;https://meta.com", "aff_unique_abbr": "NYU;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "STaSy: Score-based Tabular data Synthesis", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11517", "id": "1mNssCWt_v", "poster": "", "openreview": "https://openreview.net/forum?id=1mNssCWt_v", "slides": "https://iclr.cc/virtual/2023/poster/11517", "video": "https://iclr.cc/virtual/2023/poster/11517", "author_site": "Jayoung Kim, Chaejeong Lee, Noseong Park", "tldr": "We design a score-based generative model for tabular data and apply two training strategies, including the self-paced learning and the proposed fine-tuning method, to stabilize the denoising score matching training.", "abstract": "Tabular data synthesis is a long-standing research topic in machine learning. Many different methods have been proposed over the past decades, ranging from statistical methods to deep generative methods. However, it has not always been successful due to the complicated nature of real-world tabular data. In this paper, we present a new model named $\\textbf{S}$core-based $\\textbf{Ta}$bular data $\\textbf{Sy}$nthesis ($\\texttt{STaSy}$) and its training strategy based on the paradigm of score-based generative modeling. Despite the fact that score-based generative models have resolved many issues in generative models, there still exists room for improvement in tabular data synthesis. Our proposed training strategy includes a self-paced learning technique and a fine-tuning strategy, which further increases the sampling quality and diversity by stabilizing the denoising score matching training. Furthermore, we also conduct rigorous experimental studies in terms of the generative task trilemma: sampling quality, diversity, and time. In our experiments with 15 benchmark tabular datasets and 7 baselines, our method outperforms existing methods in terms of task-dependant evaluations and diversity.\n", "keywords": "Score-based generative model;Tabular data;Self-paced learning", "primary_area": "", "supplementary_material": "/attachment/842fa5c040f64f1e8d59e24d0d67919abc7fc0af.zip", "author": "Jayoung Kim;Chaejeong Lee;Noseong Park", "authorids": "~Jayoung_Kim1;~Chaejeong_Lee1;~Noseong_Park1", "gender": "F;;", "homepage": ";;", "dblp": "26/9969-2;;", "google_scholar": "3qbSHGwAAAAJ;T1UHgUEAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jayoung_Kim1;~Chaejeong_Lee1;~Noseong_Park1", "aff": "Yonsei University;Yonsei University;", "aff_domain": "yonsei.ac.kr;yonsei.ac.kr;", "position": "MS student;MS student;", "bibtex": "@inproceedings{\nkim2023stasy,\ntitle={{ST}aSy: Score-based Tabular data Synthesis},\nauthor={Jayoung Kim and Chaejeong Lee and Noseong Park},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=1mNssCWt_v}\n}", "github": "", "project": "", "reviewers": "xhm1;A3vU;AiyZ;dEhg", "pdf_size": 20284947, "recommendation": "5;8;8;8", "confidence": "3;2;3;4", "correctness": "2;4;3;4", "technical_novelty": "3;4;3;3", "empirical_novelty": "3;4;3;4", "wc_summary_paper": "243;105;59;78", "wc_strength_and_weaknesses": "182;370;67;202", "wc_clarity_quality_novelty_and_reproducibility": "31;227;140;34", "wc_summary_review": "54;31;27;12", "wc_review": "510;733;293;326", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 121.25, 72.1677732786595 ], "wc_strength_and_weaknesses_avg": [ 205.25, 108.17433845418238 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 108.0, 81.53220222709552 ], "wc_summary_review_avg": [ 31.0, 15.049916943292411 ], "wc_review_avg": [ 465.5, 175.18062107436427 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8703882797784892, "gs_citation": 79, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13611080388706373891&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=1mNssCWt_v", "email": "yonsei.ac.kr;yonsei.ac.kr;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Yonsei University", "aff_unique_dep": "", "aff_unique_url": "https://www.yonsei.ac.kr", "aff_unique_abbr": "Yonsei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "1mU6ADbjk-c", "title": "Neural Frailty Machine: Beyond proportional hazard assumption in neural survival regressions", "track": "main", "status": "Reject", "tldr": "A flexible framework of neural survival regression with provable statistical guarantees", "abstract": "We present neural frailty machine (NFM), a powerful and flexible neural modeling framework for survival regressions. The NFM framework utilizes the classical idea of multiplicative frailty in survival analysis to capture unobserved heterogeneity among individuals, at the same time being able to leverage the strong approximation power of neural architectures for handling nonlinear covariate dependence. Two concrete models are derived under the framework that extends neural proportional hazard models and nonparametric hazard regression models. Both models allow efficient training under the likelihood objective. Theoretically, for both proposed models, we establish statistical guarantees of neural function approximation with respect to nonparametric components via characterizing their rate of convergence. Empirically, we provide synthetic experiments that verify our theoretical statements. We also conduct experimental evaluations over 6 benchmark datasets of different scales, showing that the proposed NFM models outperform state-of-the-art survival models in terms of predictive performance. ", "keywords": "survival analysis;sieve method;theory", "primary_area": "", "supplementary_material": "/attachment/a078cb6580e3ed46531d8c88ce8b04b5da1cc405.zip", "author": "Jiawei Qiao;Ruofan Wu;Mingzhe Wu;Wen Yu;Ming Zheng;Tengfei LIU;Tianyi Zhang;Weiqiang Wang", "authorids": "~Jiawei_Qiao1;~Ruofan_Wu1;~Mingzhe_Wu1;~Wen_Yu1;mingzheng@fudan.edu.cn;~Tengfei_LIU2;~Tianyi_Zhang5;~Weiqiang_Wang4", "gender": "M;M;M;M;;;M;M", "homepage": ";https://rorschach1989.github.io/;;https://www.fdsm.fudan.edu.cn/AboutUs/preview.html?uid=012077;;;;https://www.linkedin.com/in/weiqiang-wang-489b925/", "dblp": "342/9195;;;;;;;", "google_scholar": ";;;;;;;", "orcid": ";;;;;;;0000-0002-6159-619X", "linkedin": "https://www.linkedin.cn/incareer/in/ACoAAC5mkMQBMjdfr90k5lIqIbS7BwXWIuR0brw;;mingzhe-wu-4904a6148/;;;;tianyi-zhang-178a491a/;weiqiang-wang-489b925/", "or_profile": "~Jiawei_Qiao1;~Ruofan_Wu1;~Mingzhe_Wu1;~Wen_Yu1;mingzheng@fudan.edu.cn;~Tengfei_LIU2;~Tianyi_Zhang5;~Weiqiang_Wang4", "aff": "Fudan University;Ant Group;;Fudan University;;;Alipay;Ant Group", "aff_domain": "fudan.edu.cn;antgroup.com;;fdu.edu;;;alipay.com;antgroup.com", "position": "PhD student;Researcher;;Full Professor;;;Principal Researcher;Researcher", "bibtex": "@misc{\nqiao2023neural,\ntitle={Neural Frailty Machine: Beyond proportional hazard assumption in neural survival regressions},\nauthor={Jiawei Qiao and Ruofan Wu and Mingzhe Wu and Wen Yu and Ming Zheng and Tengfei LIU and Tianyi Zhang and Weiqiang Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=1mU6ADbjk-c}\n}", "github": "", "project": "", "reviewers": "oRcF;yX4d;nTQp;xG5j", "site": "https://openreview.net/forum?id=1mU6ADbjk-c", "pdf_size": 655270, "recommendation": "5;5;6;6", "confidence": "4;4;4;3", "correctness": "3;3;3;4", "technical_novelty": "3;2;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "53;65;39;56", "wc_strength_and_weaknesses": "137;338;159;112", "wc_clarity_quality_novelty_and_reproducibility": "129;2;25;19", "wc_summary_review": "43;29;59;28", "wc_review": "362;434;282;215", "wc_reply_reviewers": "648;0;0;0", "wc_reply_authors": "571;683;262;377", "reply_reviewers": "2;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 53.25, 9.33742469849155 ], "wc_strength_and_weaknesses_avg": [ 186.5, 89.03510543600204 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.75, 49.936835101956554 ], "wc_summary_review_avg": [ 39.75, 12.597122687344122 ], "wc_review_avg": [ 323.25, 82.44202508429788 ], "wc_reply_reviewers_avg": [ 162.0, 280.59223082615813 ], "wc_reply_authors_avg": [ 473.25, 163.89077917930587 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=420681692207483073&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0;2;1", "aff_unique_norm": "Fudan University;Ant Group;Alipay", "aff_unique_dep": ";;", "aff_unique_url": "https://www.fudan.edu.cn;https://www.antgroup.com;https://www.alipay.com", "aff_unique_abbr": "Fudan;Ant Group;Alipay", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "1maXoEyeqx", "title": "Assessing Model Out-of-distribution Generalization with Softmax Prediction Probability Baselines and A Correlation Method", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper studies the use of Softmax prediction to assess model generalization under distribution shift. Specifically, given an out-of distribution (OOD) test set and a pool of classifiers, we aim to develop a Softmax prediction-based measure which has a monotonic relationship with OOD generalization performance. We first show existing uncertainty measures (e.g., entropy and maximum Softmax prediction) are fairly useful of predicting generalization in some OOD scenarios. We then move ahead with proposing a new measure, Softmax Correlation (SoftmaxCorr). To obtain the SoftmaxCorr score for a classifier, we compute the class-class correlation matrix from all the Softmax vectors in a test set, and then its cosine similarity with an identity matrix. We show that the class-class correlation matrix reveals significant knowledge about the confusion matrix: its high similarity with the identity matrix means predictions have low confusion (uncertainty) and evenly cover all classes, and vice versa. Across three setups including ImageNet, CIFAR-10, and WILDS, we show that SoftmaxCorr is well predictive of model accuracy on both in-distribution and OOD datasets.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Weijie Tu;Weijian Deng;Tom Gedeon;Liang Zheng", "authorids": "~Weijie_Tu1;~Weijian_Deng1;~Tom_Gedeon1;~Liang_Zheng4", "gender": "M;M;M;M", "homepage": ";http://weijiandeng.xyz;https://cs.anu.edu.au/people/Tom.Gedeon/;http://zheng-lab.cecs.anu.edu.au/", "dblp": "344/1001;198/1517;g/TamasDGedeon.html;61/7360-1", "google_scholar": ";https://scholar.google.com.hk/citations?user=lReHnAEAAAAJ;https://scholar.google.com.tw/citations?user=lPTjWIkAAAAJ;https://scholar.google.com.au/citations?user=vNHqr3oAAAAJ", "orcid": ";;0000-0001-8356-4909;", "linkedin": "weijie-tu;;tom-gedeon;liang-zheng-76341311a/", "or_profile": "~Weijie_Tu1;~Weijian_Deng1;~Tom_Gedeon1;~Liang_Zheng4", "aff": "Australian National University;Australian National University;Curtin University of Technology;Australian National University", "aff_domain": "anu.edu.au;anu.edu.au;curtin.edu.au;anu.edu.au", "position": "PhD student;PhD student;Full Professor;Senior Lecturer", "bibtex": "@misc{\ntu2023assessing,\ntitle={Assessing Model Out-of-distribution Generalization with Softmax Prediction Probability Baselines and A Correlation Method},\nauthor={Weijie Tu and Weijian Deng and Tom Gedeon and Liang Zheng},\nyear={2023},\nurl={https://openreview.net/forum?id=1maXoEyeqx}\n}", "github": "", "project": "", "reviewers": "hesX;8Aud;Ku6q", "site": "https://openreview.net/forum?id=1maXoEyeqx", "pdf_size": 3261733, "recommendation": "5;5;6", "confidence": "4;4;4", "correctness": "3;4;4", "technical_novelty": "3;2;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "40;97;61", "wc_strength_and_weaknesses": "305;726;86", "wc_clarity_quality_novelty_and_reproducibility": "7;29;138", "wc_summary_review": "336;107;63", "wc_review": "688;959;348", "wc_reply_reviewers": "0;315;0", "wc_reply_authors": "2803;3468;817", "reply_reviewers": "0;1;0", "reply_authors": "6;7;3", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 66.0, 23.53720459187964 ], "wc_strength_and_weaknesses_avg": [ 372.3333333333333, 265.58154219665863 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 58.0, 57.27710420985567 ], "wc_summary_review_avg": [ 168.66666666666666, 119.67827241771536 ], "wc_review_avg": [ 665.0, 249.96933145221368 ], "wc_reply_reviewers_avg": [ 105.0, 148.49242404917499 ], "wc_reply_authors_avg": [ 2362.6666666666665, 1126.164681069731 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 5.333333333333333, 1.699673171197595 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9264745442364223744&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Australian National University;Curtin University", "aff_unique_dep": ";", "aff_unique_url": "https://www.anu.edu.au;https://www.curtin.edu.au", "aff_unique_abbr": "ANU;Curtin", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "id": "1mjOVFZ3C-", "title": "Global-Scale Species Mapping From Crowdsourced Data", "track": "main", "status": "Reject", "tldr": "A new model for jointly estimating the spatial range of thousands of different species from sparse partially observed data. ", "abstract": "Estimating the geographical range of a species from in situ observational data is a challenging and important geospatial prediction problem. Given a set of locations indicating where a species has been observed, the goal is to learn a model that can predict how likely it is for the species to be present at any other location. While this is a well-studied problem, traditional approaches are unable to take advantage of more recently available large-scale datasets that cover many locations and species. We propose a new approach that jointly estimates the geographical ranges of tens of thousands of different species simultaneously. We develop a series of benchmark evaluation tasks that measure different aspects of the species range and spatial representation learning problems. We show that our approach scales both in terms of amount of training data and species, where adding more data enables the models to learn better spatial representations that generalize to other species. Despite being only trained on weakly supervised crowdsourced data, our models can approach the predictions of current expert-developed gold standard models.", "keywords": "species distribution modeling;coordinate networks;deep learning", "primary_area": "", "supplementary_material": "", "author": "Elijah Cole;Grant Van Horn;Alexander Shepard;Patrick Leary;Scott Loarie;Pietro Perona;Oisin Mac Aodha", "authorids": "~Elijah_Cole1;~Grant_Van_Horn1;~Alexander_Shepard1;~Patrick_Leary1;~Scott_Loarie2;~Pietro_Perona1;~Oisin_Mac_Aodha2", "gender": "M;M;M;;M;Not Specified;M", "homepage": "https://elijahcole.me/;https://gvh.codes/;;;;https://www.vision.caltech.edu;https://homepages.inf.ed.ac.uk/omacaod/", "dblp": "195/2520;144/8033;04/11130;;;p/PietroPerona.html;90/8653", "google_scholar": "-atuVWQAAAAJ;PxYY_nsAAAAJ;;;;j29kMCwAAAAJ;IfZBjkUAAAAJ", "orcid": "0000-0001-6623-0966;0000-0003-2953-9651;;0000-0001-5172-8577;;0000-0002-7583-5809;0000-0002-5787-5073", "linkedin": "elicole/;;;;scott-loarie-872b665/;https://www.linkedin.com/company/perona/;oisin-mac-aodha-406273273/", "or_profile": "~Elijah_Cole1;~Grant_Van_Horn1;~Alexander_Shepard1;~Patrick_Leary1;~Scott_Loarie2;~Pietro_Perona1;~Oisin_Mac_Aodha2", "aff": "California Institute of Technology;Cornell University;California Academy of Sciences;California Academy of Sciences;;California Institute of Technology;University of Edinburgh, University of Edinburgh", "aff_domain": "caltech.edu;cornell.edu;calacademy.org;calacademy.org;;caltech.edu;ed.ac.uk", "position": "PhD student;Researcher;Software Developer;Employee;;Full Professor;Assistant Professor", "bibtex": "@misc{\ncole2023globalscale,\ntitle={Global-Scale Species Mapping From Crowdsourced Data},\nauthor={Elijah Cole and Grant Van Horn and Alexander Shepard and Patrick Leary and Scott Loarie and Pietro Perona and Oisin Mac Aodha},\nyear={2023},\nurl={https://openreview.net/forum?id=1mjOVFZ3C-}\n}", "github": "", "project": "", "reviewers": "eJKM;f3dW;apN4;Jszv", "site": "https://openreview.net/forum?id=1mjOVFZ3C-", "pdf_size": 8000051, "recommendation": "3;3;3;6", "confidence": "4;4;4;3", "correctness": "3;2;3;2", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "57;92;51;44", "wc_strength_and_weaknesses": "257;305;140;133", "wc_clarity_quality_novelty_and_reproducibility": "36;69;129;608", "wc_summary_review": "44;61;54;136", "wc_review": "394;527;374;921", "wc_reply_reviewers": "0;0;0;401", "wc_reply_authors": "359;427;176;385", "reply_reviewers": "0;0;0;1", "reply_authors": "3;3;3;2", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 61.0, 18.479718612576328 ], "wc_strength_and_weaknesses_avg": [ 208.75, 74.25757537113638 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 210.5, 231.90569203881134 ], "wc_summary_review_avg": [ 73.75, 36.44430682562093 ], "wc_review_avg": [ 554.0, 219.89656659438774 ], "wc_reply_reviewers_avg": [ 100.25, 173.63809345877993 ], "wc_reply_authors_avg": [ 336.75, 95.92803292051808 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lBS6ly0SbX4J:scholar.google.com/&scioq=Global-Scale+Species+Mapping+From+Crowdsourced+Data&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;1;2;2;0;3", "aff_unique_norm": "California Institute of Technology;Cornell University;California Academy of Sciences;University of Edinburgh", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.caltech.edu;https://www.cornell.edu;https://www.calacademy.org;https://www.ed.ac.uk", "aff_unique_abbr": "Caltech;Cornell;;Edinburgh", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Pasadena;", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "1n1c7cHl3Zc", "title": "The Plug and Play of Language Models for Text-to-image Generation", "track": "main", "status": "Reject", "tldr": "This paper introduces a new method to efficiently plug new language models to exiting text-to-image generation models as enhancement in scalability.", "abstract": "Text-to-image (T2I) models enable controllable image generation through user-provided captions. A text encoder is typically used to map captions to a latent space, and it has been shown to be critical for model's performance. However, replacing or upgrading the text encoder in a T2I model is challenging due to the tight bond between the current encoder and the image decoder. It requires training the model from scratch, which can be prohibitively expensive. To address this problem, we introduce a more efficient approach to align a pre-trained language model with the latent space of an existing T2I model. We propose a Model Translation Network (MTN) and a new training objective to align the representation spaces of the two text encoders using only a corpus of unlabeled text. We empirically find that MTN can be trained efficiently and can boost the performance of existing T2I models by upgrading their text encoder. Moreover, we find that MTN can align multilingual language models such as XLM-Roberta, thus allowing existing T2I models to generate high-quality images from captions beyond English. ", "keywords": "Text-to-Image Generation;Language Models;Efficiency", "primary_area": "", "supplementary_material": "/attachment/5920eacecbb6ccdd67a035ec79c20118b10ff8ea.zip", "author": "Can Qin;Ning Yu;Chen Xing;Shu Zhang;Stefano Ermon;Yun Fu;Caiming Xiong;Ran Xu", "authorids": "~Can_Qin1;~Ning_Yu2;~Chen_Xing2;~Shu_Zhang1;~Stefano_Ermon1;~Yun_Fu1;~Caiming_Xiong1;~Ran_Xu1", "gender": "M;;F;M;M;M;M;M", "homepage": "http://canqin.tech;;;;http://cs.stanford.edu/~ermon/;http://www1.ece.neu.edu/~yunfu/;http://cmxiong.com/;", "dblp": "214/2488;;;30/2700-7;47/8135;00/5815-1;80/7282;", "google_scholar": "QCik-YcAAAAJ;;tAUdLM0AAAAJ;k9zsuBIAAAAJ;;https://scholar.google.com.tw/citations?user=h-JEcQ8AAAAJ;vaSdahkAAAAJ;sgBB2sUAAAAJ", "orcid": ";;;;;0000-0002-5098-2853;;", "linkedin": ";;chen-xing-83082074/;shu-zhang-5b34b320;;furaymond/;caiming-xiong-150a1417;", "or_profile": "~Can_Qin1;~Ning_Yu2;~Chen_Xing2;~Shu_Zhang1;~Stefano_Ermon1;~Yun_Fu1;~Caiming_Xiong1;~Ran_Xu1", "aff": "Northeastern University;;SalesForce.com;Salesforce Research;Stanford University;Northeastern University;Salesforce Research;SalesForce.com", "aff_domain": "neu.edu;;salesforce.com;salesforce.com;stanford.edu;northeastern.edu;salesforce.com;salesforce.com", "position": "PhD student;;Researcher;Researcher;Associate Professor;Full Professor;Research Scientist;senior manager", "bibtex": "@misc{\nqin2023the,\ntitle={The Plug and Play of Language Models for Text-to-image Generation},\nauthor={Can Qin and Ning Yu and Chen Xing and Shu Zhang and Stefano Ermon and Yun Fu and Caiming Xiong and Ran Xu},\nyear={2023},\nurl={https://openreview.net/forum?id=1n1c7cHl3Zc}\n}", "github": "", "project": "", "reviewers": "1TQj;2Z7M;5yb4;jTu3", "site": "https://openreview.net/forum?id=1n1c7cHl3Zc", "pdf_size": 9742320, "recommendation": "6;6;6;6", "confidence": "3;3;4;2", "correctness": "3;3;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "4;2;3;0", "wc_summary_paper": "88;75;69;38", "wc_strength_and_weaknesses": "508;237;368;76", "wc_clarity_quality_novelty_and_reproducibility": "46;75;17;9", "wc_summary_review": "134;33;64;6", "wc_review": "776;420;518;129", "wc_reply_reviewers": "77;75;162;0", "wc_reply_authors": "823;1470;1752;412", "reply_reviewers": "1;1;2;0", "reply_authors": "2;3;5;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 1.479019945774904 ], "wc_summary_paper_avg": [ 67.5, 18.364367672206956 ], "wc_strength_and_weaknesses_avg": [ 297.25, 159.68934685820466 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.75, 26.022826518270456 ], "wc_summary_review_avg": [ 59.25, 47.7879430400598 ], "wc_review_avg": [ 460.75, 231.50526451897375 ], "wc_reply_reviewers_avg": [ 78.5, 57.334544560849174 ], "wc_reply_authors_avg": [ 1114.25, 527.0827140971329 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6kXl3zrTsfAJ:scholar.google.com/&scioq=The+Plug+and+Play+of+Language+Models+for+Text-to-image+Generation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1;2;0;1;1", "aff_unique_norm": "Northeastern University;Salesforce;Stanford University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.northeastern.edu;https://www.salesforce.com;https://www.stanford.edu", "aff_unique_abbr": "NEU;Salesforce;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "1nZelVKqpks", "title": "Disentangled (Un)Controllable Features", "track": "main", "status": "Withdraw", "tldr": "Separation of Controllable and Uncontrollable Features in Latent Space", "abstract": "In the context of MDPs with high-dimensional states, reinforcement learning can achieve better results when using a compressed, low-dimensional representation of the original input space. A variety of learning objectives have therefore been used to learn useful representations. However, these representations usually lack interpretability of the different features. We propose a representation learning algorithm that is able to disentangle latent features into a controllable and an uncontrollable part. The resulting representations are easily interpretable and can be used for learning and planning efficiently by leveraging the specific properties of the two parts. To highlight the benefits of the approach, the disentangling properties of the algorithm are illustrated in three different environments.", "keywords": "Representation Learning;Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Jacob Eeuwe Kooi;Mark Hoogendoorn;Vincent Francois-Lavet", "authorids": "~Jacob_Eeuwe_Kooi1;~Mark_Hoogendoorn2;~Vincent_Francois-Lavet1", "gender": "M;M;", "homepage": ";http://www.cs.vu.nl/~mhoogen;http://vincent.francois-l.be", "dblp": "287/9059;19/1103.html;127/3326", "google_scholar": "GMcfK1MAAAAJ;3s4lqHkAAAAJ;", "orcid": "0000-0002-3694-0745;;", "linkedin": "jacob-kooi-4b776a5a/?originalSubdomain=nl;;", "or_profile": "~Jacob_Eeuwe_Kooi1;~Mark_Hoogendoorn2;~Vincent_Francois-Lavet1", "aff": "Vrije Universiteit Amsterdam;VU University Amsterdam;VU Amsterdam", "aff_domain": "vu.nl;vu.nl;vu.nl", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@misc{\nkooi2023disentangled,\ntitle={Disentangled (Un)Controllable Features},\nauthor={Jacob Eeuwe Kooi and Mark Hoogendoorn and Vincent Francois-Lavet},\nyear={2023},\nurl={https://openreview.net/forum?id=1nZelVKqpks}\n}", "github": "", "project": "", "reviewers": "S58A;7kJZ;mzPL;QPR4", "site": "https://openreview.net/forum?id=1nZelVKqpks", "pdf_size": 696126, "recommendation": "3;3;3;3", "confidence": "3;3;4;4", "correctness": "4;3;2;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "55;94;114;111", "wc_strength_and_weaknesses": "225;145;116;504", "wc_clarity_quality_novelty_and_reproducibility": "39;86;113;171", "wc_summary_review": "42;46;64;76", "wc_review": "361;371;407;862", "wc_reply_reviewers": "62;206;0;61", "wc_reply_authors": "541;515;391;515", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 93.5, 23.5 ], "wc_strength_and_weaknesses_avg": [ 247.5, 153.3761715521678 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 102.25, 47.714646598293065 ], "wc_summary_review_avg": [ 57.0, 13.74772708486752 ], "wc_review_avg": [ 500.25, 209.5559292885792 ], "wc_reply_reviewers_avg": [ 82.25, 75.73102072466737 ], "wc_reply_authors_avg": [ 490.5, 58.41874699101308 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4938243735141180204&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "Vrije Universiteit Amsterdam;VU University Amsterdam", "aff_unique_dep": ";", "aff_unique_url": "https://www.vu.nl;https://www.vu.nl", "aff_unique_abbr": "VU Amsterdam;VU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Amsterdam", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "id": "1ndyt02WPo", "title": "Better handling unlabeled entity problem using PU-learning and negative sampling", "track": "main", "status": "Reject", "tldr": "", "abstract": "The NER task is largely developed based on well-annotated data.\nHowever, in many scenarios, the entities may not be fully annotated, leading to performance degradation.\nA common approach for this problem is to distinguish unlabeled entities from negative instances using labeled data.\nHowever, the vast differences between entities make such empirical approaches difficult to realize.\nOur solution is to treat unlabeled entities based on both empirical inference and random sampling.\nTo this end, we propose a simple yet effective two-step method that consists of a novel Positive-Unlabeled (PU-learning) algorithm and negative sampling, in which PU-learning distinguishes part of the unlabeled entities from negative instances based on the confidence threshold.\nIn general, the proposed method can mitigate the impact of unlabeled entities at the outset and can be easily applied to any character-level NER model.\nWe verify the effectiveness of our method on several NER models and datasets, showing a strong ability to deal with unlabeled entities.\nFinally, in real-world situations, we establish new state-of-the-art results on many benchmark NER datasets.", "keywords": "NER;unlabeled entity problem;PU-learning;negative sampling;self-supervision", "primary_area": "", "supplementary_material": "", "author": "Shunqin Zhang;Xuan Zhang;Wenduo He;Sanguo Zhang", "authorids": "~Shunqin_Zhang1;zhangx@cernet.edu.cn;hewd@cernet.edu.cn;sgzhang@ucas.ac.cn", "gender": "M;;;", "homepage": "https://kns.cnki.net/kcms/detail/knetsearch.aspx?sfield=au&skey=%E5%BC%A0%E9%A1%BA%E9%92%A6&code=000020623867&v=c8GxYIyX9F-bqPRZgavWOmToVoqWRGSEEf9EzdmWlT0cR1-fJQ7gi5Ib4u3kKSP9;;;", "dblp": ";;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Shunqin_Zhang1;zhangx@cernet.edu.cn;hewd@cernet.edu.cn;sgzhang@ucas.ac.cn", "aff": "University of Chinese Academy of Sciences;;;", "aff_domain": "ucas.edu.cn;;;", "position": "PhD student;;;", "bibtex": "@misc{\nzhang2023better,\ntitle={Better handling unlabeled entity problem using {PU}-learning and negative sampling},\nauthor={Shunqin Zhang and Xuan Zhang and Wenduo He and Sanguo Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=1ndyt02WPo}\n}", "github": "", "project": "", "reviewers": "6ird;GbhS;NTwJ;DG9f", "site": "https://openreview.net/forum?id=1ndyt02WPo", "pdf_size": 587892, "recommendation": "3;3;3;3", "confidence": "3;4;4;3", "correctness": "3;3;2;3", "technical_novelty": "2;2;1;3", "empirical_novelty": "2;3;1;3", "wc_summary_paper": "66;78;86;59", "wc_strength_and_weaknesses": "52;267;69;133", "wc_clarity_quality_novelty_and_reproducibility": "219;54;12;89", "wc_summary_review": "75;35;68;49", "wc_review": "412;434;235;330", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 72.25, 10.449282272003183 ], "wc_strength_and_weaknesses_avg": [ 130.25, 84.53216843308824 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 93.5, 77.4160836002442 ], "wc_summary_review_avg": [ 56.75, 15.75396775418815 ], "wc_review_avg": [ 352.75, 78.2539935083188 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:rqkq5tRKhiIJ:scholar.google.com/&scioq=Better+handling+unlabeled+entity+problem+using+PU-learning+and+negative+sampling&hl=en&as_sdt=0,7", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of Chinese Academy of Sciences", "aff_unique_dep": "", "aff_unique_url": "http://www.ucas.ac.cn", "aff_unique_abbr": "UCAS", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "1o5SGx71kAO", "title": "FV-MgNet: Fully Connected V-cycle MgNet for Interpretable Time Series Forecasting", "track": "main", "status": "Withdraw", "tldr": "By investigating iterative methods for a constrained linear model, we propose a new class of fully connected V-cycle MgNet for long-term time series forecasting.", "abstract": "By investigating iterative methods for a constrained linear model, we propose a new class of fully connected V-cycle MgNet for long-term time series forecasting, one of the most difficult tasks in forecasting problems. MgNet is a CNN model that was proposed for image classification based on the multigrid (MG) methods for solving discretized partial differential equations (PDEs). We replace the convolutional operations with fully connected operations in the existing MgNet and then apply it to the forecasting problems. Motivated by the V-cycle structure in MG, we further propose the FV-MgNet, a V-cycle version of fully connected MgNet, to extract features hierarchically. By evaluating the performance of FV-MgNet on popular data sets and comparing it with state-of-the-art models, we show that the FV-MgNet achieves better results with less memory usage and faster inference speed. In addition, we also develop ablation experiments to demonstrate the structure of FV-MgNet is the best choice among the many variants.", "keywords": "long time series forecasting;multigrid;MgNet;V-cycle;fully connected layer", "primary_area": "", "supplementary_material": "", "author": "Jianqing Zhu;Juncai He;Lian Zhang;Jinchao Xu", "authorids": "~Jianqing_Zhu2;~Juncai_He1;~Lian_Zhang1;jinchao.xu@kaust.edu.sa", "gender": ";M;M;", "homepage": ";https://juncaihe.github.io;;", "dblp": ";223/4286;;", "google_scholar": ";CG5GBW0AAAAJ;;", "orcid": ";;;", "linkedin": ";;lian-zhang-47027a187/;", "or_profile": "~Jianqing_Zhu2;~Juncai_He1;~Lian_Zhang1;jinchao.xu@kaust.edu.sa", "aff": ";King Abdullah University of Science and Technology;In-Chao Institute Ltd;", "aff_domain": ";kaust.edu.sa;in-chao.com;", "position": ";Researcher;Researcher;", "bibtex": "@misc{\nzhu2023fvmgnet,\ntitle={{FV}-MgNet: Fully Connected V-cycle MgNet for Interpretable Time Series Forecasting},\nauthor={Jianqing Zhu and Juncai He and Lian Zhang and Jinchao Xu},\nyear={2023},\nurl={https://openreview.net/forum?id=1o5SGx71kAO}\n}", "github": "", "project": "", "reviewers": "yZZL;pVpk;Byt2", "site": "https://openreview.net/forum?id=1o5SGx71kAO", "pdf_size": 423313, "recommendation": "3;3;5", "confidence": "4;3;4", "correctness": "3;2;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "24;63;188", "wc_strength_and_weaknesses": "153;333;378", "wc_clarity_quality_novelty_and_reproducibility": "19;298;83", "wc_summary_review": "39;75;48", "wc_review": "235;769;697", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 91.66666666666667, 69.95395310885266 ], "wc_strength_and_weaknesses_avg": [ 288.0, 97.21111047611791 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 133.33333333333334, 119.33240223100438 ], "wc_summary_review_avg": [ 54.0, 15.297058540778355 ], "wc_review_avg": [ 567.0, 236.5924766344019 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.5, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7967748636770285652&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1", "aff_unique_norm": "King Abdullah University of Science and Technology;In-Chao Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.kast.kau.edu.sa;", "aff_unique_abbr": "KAUST;ICI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "Saudi Arabia;" }, { "id": "1pGmKJvneD7", "title": "LVQ-VAE:End-to-end Hyperprior-based Variational Image Compression with Lattice Vector Quantization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Image compression technology has become more important research topic. In recent years, learning-based methods have been extensively studied and variational autoencoder (VAE)-based methods using hyperprior-based context-adaptive entropy model have been reported to be comparable to the latest video coding standard H.266/VVC in terms of RD performance.\n We think there is room for improvement in quantization process of latent features by adopting vector quantization (VQ). Many VAE-based methods use scalar quantization for latent features and do not exploit correlation between the features. Although there are methods that incorporate VQ into learning-based methods, to the best our knowledge, there are no studies that utilizes the hyperprior-based VAE with VQ because incorporating VQ into a hyperprior-based VAE makes it difficult to estimate the likelihood.\n In this paper, we propose a new VAE-based image compression method using VQ based latent representation for hyperprior-based context-adaptive entropy model to improve the coding efficiency. The proposed method resolves problem faced by conventional VQ-based methods due to codebook size bloat by adopting Lattice VQ as the basis quantization method and achieves end-to-end optimization with hyperprior-based context-adaptive entropy model by approximating the likelihood calculation of latent feature vectors with high accuracy using Monte Carlo integration. Furthermore, in likelihood estimation, we model each latent feature vector with multivariate normal distribution including covariance matrix parameters, which improves the likelihood estimation accuracy and RD performance.\n Experimental results show that the proposed method achieves a state-of-the-art RD performance exceeding existing learning-based methods and the latest video coding standard H.266/VVC by 18.0%.", "keywords": "Image Compression;Variational Autoencoder;Vector Quantization;Lattice", "primary_area": "", "supplementary_material": "", "author": "Shinobu Kudo;Yukihiro Bandoh;Seishi Takamura;Masaki Kitahara", "authorids": "~Shinobu_Kudo1;yukihiro.bandou.pe@hco.ntt.co.jp;takamura@ieee.org;masaki.kitahara.ve@hco.ntt.co.jp", "gender": ";;;", "homepage": ";;;", "dblp": ";;;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Shinobu_Kudo1;yukihiro.bandou.pe@hco.ntt.co.jp;takamura@ieee.org;masaki.kitahara.ve@hco.ntt.co.jp", "aff": "NTT;;;", "aff_domain": "ntt.co.jp;;;", "position": "Researcher;;;", "bibtex": "@misc{\nkudo2023lvqvaeendtoend,\ntitle={{LVQ}-{VAE}:End-to-end Hyperprior-based Variational Image Compression with Lattice Vector Quantization},\nauthor={Shinobu Kudo and Yukihiro Bandoh and Seishi Takamura and Masaki Kitahara},\nyear={2023},\nurl={https://openreview.net/forum?id=1pGmKJvneD7}\n}", "github": "", "project": "", "reviewers": "9ZNU;fFaA;nBnd;SVp9", "site": "https://openreview.net/forum?id=1pGmKJvneD7", "pdf_size": 4240591, "recommendation": "3;3;5;5", "confidence": "5;4;4;3", "correctness": "3;3;4;4", "technical_novelty": "3;3;3;2", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "51;71;38;209", "wc_strength_and_weaknesses": "180;304;80;508", "wc_clarity_quality_novelty_and_reproducibility": "24;19;62;75", "wc_summary_review": "81;60;54;99", "wc_review": "336;454;234;891", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "513;880;363;937", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;2", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 92.25, 68.42285802273975 ], "wc_strength_and_weaknesses_avg": [ 268.0, 159.6746692496966 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.0, 24.01041440708594 ], "wc_summary_review_avg": [ 73.5, 17.811513130556875 ], "wc_review_avg": [ 478.75, 250.42102048350495 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 673.25, 241.99418897981826 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.7071067811865475, "corr_recommendation_correctness": 1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14223065479188099596&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "NTT Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.ntt.co.jp", "aff_unique_abbr": "NTT", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "id": "1sN_4ROgel", "title": "A Scalable and Exact Gaussian Process Sampler via Kernel Packets", "track": "main", "status": "Reject", "tldr": "", "abstract": "In view of the widespread use of Gaussian processes (GPs) in machine learning models, generating random sample paths of GPs is crucial for many machine learning applications. Sampling from a GP essentially requires generating high-dimensional Gaussian random vectors, which is computationally challenging if a direct method, such as the one based on Cholesky decomposition, is implemented. We develop a scalable algorithm to sample random realizations of the prior and the posterior of GP models with Mat\u00e9rn correlation functions. Unlike existing scalable sampling algorithms, the proposed approach draws samples from the theoretical distributions exactly. The algorithm exploits a novel structure called the kernel packets (KP), which gives an exact sparse representation of the dense covariance matrices. The proposed method is applicable for one-dimensional GPs, and multi-dimensional GPs under some conditions such as separable kernels with full grid designs. Via a series of experiments and comparisons with other recent works, we demonstrate the efficiency and accuracy of the proposed method.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Haoyuan Chen;Rui Tuo", "authorids": "~Haoyuan_Chen1;~Rui_Tuo1", "gender": ";M", "homepage": ";https://sites.google.com/site/ruituo2017/home?authuser=0", "dblp": ";184/0554", "google_scholar": "maRSH-AAAAAJ;J_D0pSUAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Haoyuan_Chen1;~Rui_Tuo1", "aff": "Texas A&M University - College Station;Texas A&M University - College Station", "aff_domain": "tamu.edu;tamu.edu", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nchen2023a,\ntitle={A Scalable and Exact Gaussian Process Sampler via Kernel Packets},\nauthor={Haoyuan Chen and Rui Tuo},\nyear={2023},\nurl={https://openreview.net/forum?id=1sN_4ROgel}\n}", "github": "", "project": "", "reviewers": "4pJF;xLQn;pohu;fxzb", "site": "https://openreview.net/forum?id=1sN_4ROgel", "pdf_size": 5037731, "recommendation": "3;3;3;6", "confidence": "4;4;5;4", "correctness": "4;4;3;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;0", "wc_summary_paper": "126;53;69;65", "wc_strength_and_weaknesses": "226;347;226;115", "wc_clarity_quality_novelty_and_reproducibility": "561;86;138;22", "wc_summary_review": "160;43;54;87", "wc_review": "1073;529;487;289", "wc_reply_reviewers": "332;15;0;0", "wc_reply_authors": "405;118;223;132", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 78.25, 28.19020219863632 ], "wc_strength_and_weaknesses_avg": [ 228.5, 82.06247619953957 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 201.75, 211.44310700517053 ], "wc_summary_review_avg": [ 86.0, 45.689167206242665 ], "wc_review_avg": [ 594.5, 290.74860274814733 ], "wc_reply_reviewers_avg": [ 86.75, 141.72751144361493 ], "wc_reply_authors_avg": [ 219.5, 114.43447906990271 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9788781359621345449&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Station", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Multi-task Self-supervised Graph Neural Networks Enable Stronger Task Generalization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11928", "id": "1tHAZRqftM", "poster": "/media/PosterPDFs/ICLR%202023/11928.png?t=1680884350.4198675", "openreview": "https://openreview.net/forum?id=1tHAZRqftM", "slides": "https://iclr.cc/virtual/2023/poster/11928", "video": "https://iclr.cc/virtual/2023/poster/11928", "author_site": "Mingxuan Ju, Tong Zhao, Qianlong Wen, Wenhao Yu, Neil Shah, Yanfang Ye, Chuxu Zhang", "tldr": "We present ParetoGNN, a novel multi-task self-supervised learning framework for graph neural networks, that enhances the task generalization across various downstream tasks and datasets.", "abstract": "Self-supervised learning (SSL) for graph neural networks (GNNs) has attracted increasing attention from the graph machine learning community in recent years, owing to its capability to learn performant node embeddings without costly label information. One weakness of conventional SSL frameworks for GNNs is that they learn through a single philosophy, such as mutual information maximization or generative reconstruction. When applied to various downstream tasks, these frameworks rarely perform equally well for every task, because one philosophy may not span the extensive knowledge required for all tasks. To enhance the task generalization across tasks, as an important first step forward in exploring fundamental graph models, we introduce PARETOGNN, a multi-task SSL framework for node representation learning over graphs. Specifically, PARETOGNN is self-supervised by manifold pretext tasks observing multiple philosophies. To reconcile different philosophies, we explore a multiple-gradient descent algorithm, such that PARETOGNN actively learns from every pretext task while minimizing potential conflicts. We conduct comprehensive experiments over four downstream tasks (i.e., node classification, node clustering, link prediction, and partition prediction), and our proposal achieves the best overall performance across tasks on 11 widely adopted benchmark datasets. Besides, we observe that learning from multiple philosophies enhances not only the task generalization but also the single task performances, demonstrating that PARETOGNN achieves better task generalization via the disjoint yet complementary knowledge learned from different philosophies. Our code is publicly available at https://github.com/jumxglhf/ParetoGNN.", "keywords": "Graph Neural Network;Self-supervised Learning", "primary_area": "", "supplementary_material": "/attachment/7af994fbc9df683665cd26a27869f3748371ebca.zip", "author": "Mingxuan Ju;Tong Zhao;Qianlong Wen;Wenhao Yu;Neil Shah;Yanfang Ye;Chuxu Zhang", "authorids": "~Mingxuan_Ju1;~Tong_Zhao3;~Qianlong_Wen1;~Wenhao_Yu2;~Neil_Shah2;~Yanfang_Ye1;~Chuxu_Zhang2", "gender": "M;M;M;M;M;;", "homepage": "https://jumxglhf.github.io;https://tzhao.io/;https://hoytwen.github.io/;https://wyu97.github.io/;http://nshah.net;http://yes-lab.org/;", "dblp": "234/2715;94/6503-3;301/6224;159/8117-2.html;71/7771;;", "google_scholar": "qNoO67AAAAAJ;05cRc-MAAAAJ;cc-uK9gAAAAJ;z4qSdX8AAAAJ;Qut69OgAAAAJ;egjr888AAAAJ;", "orcid": "0009-0008-9054-3856;0000-0001-7660-1732;0000-0003-3812-8395;0000-0002-4075-5980;0000-0003-3261-8430;;", "linkedin": ";;qianlong-wen-87550a1a7/;;;;", "or_profile": "~Mingxuan_Ju1;~Tong_Zhao3;~Qianlong_Wen1;~Wenhao_Yu2;~Neil_Shah2;~Yanfang_Ye1;~Chuxu_Zhang2", "aff": "University of Notre Dame;Snap Inc.;University of Notre Dame;University of Notre Dame;Snap Inc.;University of Notre Dame;", "aff_domain": "nd.edu;snap.com;nd.edu;nd.edu;snap.com;nd.edu;", "position": "PhD student;Researcher;PhD student;PhD student;Research Scientist;Associate Professor;", "bibtex": "@inproceedings{\nju2023multitask,\ntitle={Multi-task Self-supervised Graph Neural Networks Enable Stronger Task Generalization},\nauthor={Mingxuan Ju and Tong Zhao and Qianlong Wen and Wenhao Yu and Neil Shah and Yanfang Ye and Chuxu Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=1tHAZRqftM}\n}", "github": "", "project": "", "reviewers": "15ww;WfQN;wz1H", "pdf_size": 761251, "recommendation": "6;6;6", "confidence": "3;4;2", "correctness": "4;4;3", "technical_novelty": "3;3;2", "empirical_novelty": "3;4;2", "wc_summary_paper": "40;63;105", "wc_strength_and_weaknesses": "266;176;161", "wc_clarity_quality_novelty_and_reproducibility": "21;13;8", "wc_summary_review": "40;2;27", "wc_review": "367;254;301", "wc_reply_reviewers": "12;0;0", "wc_reply_authors": "789;715;1256", "reply_reviewers": "1;0;0", "reply_authors": "3;1;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 69.33333333333333, 26.911377189252548 ], "wc_strength_and_weaknesses_avg": [ 201.0, 46.36809247747852 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 14.0, 5.354126134736337 ], "wc_summary_review_avg": [ 23.0, 15.769168230019828 ], "wc_review_avg": [ 307.3333333333333, 46.348918242200895 ], "wc_reply_reviewers_avg": [ 4.0, 5.656854249492381 ], "wc_reply_authors_avg": [ 920.0, 239.50086986620042 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15663663401797294798&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=1tHAZRqftM", "email": "nd.edu;snap.com;nd.edu;nd.edu;snap.com;nd.edu;", "author_num": 7, "aff_unique_index": "0;1;0;0;1;0", "aff_unique_norm": "University of Notre Dame;Snap Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.nd.edu;https://www.snapinc.com", "aff_unique_abbr": "Notre Dame;Snap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "1tXzHPdOJGZ", "title": "On the Universal Approximation Property of Deep Fully Convolutional Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": " We study the approximation of shift-invariant or equivariant functions by deep fully convolutional networks from the dynamical systems perspective. We prove that deep residual fully convolutional networks and their continuous-layer counterpart can achieve universal approximation of these symmetric functions at constant channel width. Moreover, we show that the same can be achieved by non-residual variants with at least 2 channels in each layer and convolutional kernel size of at least 2. In addition, we show that these requirements are necessary, in the sense that networks with fewer channels or smaller kernels fail to be universal approximators.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Ting Lin;Zuowei Shen;Qianxiao Li", "authorids": "~Ting_Lin2;~Zuowei_Shen1;~Qianxiao_Li1", "gender": ";M;M", "homepage": ";https://blog.nus.edu.sg/matzuows/;https://blog.nus.edu.sg/qianxiaoli/", "dblp": ";;172/0930.html", "google_scholar": ";985QGhAAAAAJ;https://scholar.google.com.sg/citations?user=zLgReYoAAAAJ", "orcid": "0000-0003-2369-2559;;0000-0002-3903-3737", "linkedin": ";;", "or_profile": "~Ting_Lin2;~Zuowei_Shen1;~Qianxiao_Li1", "aff": "School of mathematical Science, Peking University, Peking University;National University of Singapore;National University of Singapore", "aff_domain": "math.pku.edu.cn;nus.edu;nus.edu.sg", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@misc{\nlin2023on,\ntitle={On the Universal Approximation Property of Deep Fully Convolutional Neural Networks},\nauthor={Ting Lin and Zuowei Shen and Qianxiao Li},\nyear={2023},\nurl={https://openreview.net/forum?id=1tXzHPdOJGZ}\n}", "github": "", "project": "", "reviewers": "iJgd;RUpT;gqo7", "site": "https://openreview.net/forum?id=1tXzHPdOJGZ", "pdf_size": 512800, "recommendation": "5;5;6", "confidence": "4;2;4", "correctness": "4;3;4", "technical_novelty": "3;3;3", "empirical_novelty": "0;0;0", "wc_summary_paper": "57;121;101", "wc_strength_and_weaknesses": "161;159;84", "wc_clarity_quality_novelty_and_reproducibility": "1;29;121", "wc_summary_review": "8;66;51", "wc_review": "227;375;357", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1088;883;1463", "reply_reviewers": "0;0;0", "reply_authors": "2;2;2", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 93.0, 26.733250207684563 ], "wc_strength_and_weaknesses_avg": [ 134.66666666666666, 35.83604640891936 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.333333333333336, 51.25968743130956 ], "wc_summary_review_avg": [ 41.666666666666664, 24.580932086115496 ], "wc_review_avg": [ 319.6666666666667, 65.93599590174972 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1144.6666666666667, 240.15041582771042 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7750865921618027710&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Peking University;National University of Singapore", "aff_unique_dep": "School of Mathematical Sciences;", "aff_unique_url": "http://www.pku.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "PKU;NUS", "aff_campus_unique_index": "0", "aff_campus_unique": "Peking;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;Singapore" }, { "id": "1tfGKiwnJRJ", "title": "Risk-aware Bayesian RL for Cautious Exploration", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper addresses the problem of maintaining safety during training in Reinforcement Learning (RL), such that the safety constraint violations are bounded at any point during learning. Whilst enforcing safety during training might limit the agent's exploration, we propose a new architecture that handles the trade-off between efficient progress in exploration and safety maintenance. \nAs the agent's exploration progresses, we update Dirichlet-Categorical models of the transition probabilities of the Markov decision process that describes the agent's behavior within the environment by means of Bayesian inference. We then propose a way to approximate moments of the agent's belief about the risk associated with the agent's behavior originating from local action selection. We demonstrate that this approach can be easily coupled with RL, we provide rigorous theoretical guarantees, and we present experimental results to showcase the performance of the overall architecture.", "keywords": "Reinforcement learning;Bayesian inference;Safe learning;Risk;Safety Specification", "primary_area": "", "supplementary_material": "", "author": "Rohan Mitta;Hosein Hasanbeig;Daniel Kroening;Alessandro Abate", "authorids": "mitta.rohan@gmail.com;~Hosein_Hasanbeig1;~Daniel_Kroening1;~Alessandro_Abate1", "gender": ";;;M", "homepage": ";;https://www.kroening.com;https://www.cs.ox.ac.uk/people/alessandro.abate/", "dblp": ";;k/DanielKroening;19/3904", "google_scholar": ";;https://scholar.google.co.uk/citations?user=DHddutUAAAAJ;https://scholar.google.co.uk/citations?hl=en", "orcid": ";;0000-0002-6681-5283;0000-0002-5627-9093", "linkedin": ";;kroening/?originalSubdomain=uk;", "or_profile": "mitta.rohan@gmail.com;~Hosein_Hasanbeig1;~Daniel_Kroening1;~Alessandro_Abate1", "aff": ";;Amazon;University of Oxford", "aff_domain": ";;amazon.com;ox.ac.uk", "position": ";;Senior Principal Scientist;Full Professor", "bibtex": "@misc{\nmitta2023riskaware,\ntitle={Risk-aware Bayesian {RL} for Cautious Exploration},\nauthor={Rohan Mitta and Hosein Hasanbeig and Daniel Kroening and Alessandro Abate},\nyear={2023},\nurl={https://openreview.net/forum?id=1tfGKiwnJRJ}\n}", "github": "", "project": "", "reviewers": "BCMN;Fqdj;G8rL;qaTz;HHjA", "site": "https://openreview.net/forum?id=1tfGKiwnJRJ", "pdf_size": 1473240, "recommendation": "3;3;3;5;10", "confidence": "4;5;3;3;3", "correctness": "2;2;3;3;4", "technical_novelty": "3;2;2;2;4", "empirical_novelty": "2;2;0;0;4", "wc_summary_paper": "84;71;55;61;155", "wc_strength_and_weaknesses": "372;115;195;144;280", "wc_clarity_quality_novelty_and_reproducibility": "136;220;60;29;46", "wc_summary_review": "25;17;22;22;48", "wc_review": "617;423;332;256;529", "wc_reply_reviewers": "0;0;75;0;0", "wc_reply_authors": "130;287;504;175;56", "reply_reviewers": "0;0;1;0;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 4.8, 2.7129319932501077 ], "confidence_avg": [ 3.6, 0.8 ], "correctness_avg": [ 2.8, 0.7483314773547882 ], "technical_novelty_avg": [ 2.6, 0.8 ], "empirical_novelty_avg": [ 1.6, 1.4966629547095767 ], "wc_summary_paper_avg": [ 85.2, 36.25686141959891 ], "wc_strength_and_weaknesses_avg": [ 221.2, 93.93912922738852 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 98.2, 71.04477461432332 ], "wc_summary_review_avg": [ 26.8, 10.906878563548785 ], "wc_review_avg": [ 431.4, 130.14545708552413 ], "wc_reply_reviewers_avg": [ 15.0, 30.0 ], "wc_reply_authors_avg": [ 230.4, 155.96486783888224 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.4976166020227778, "corr_recommendation_correctness": 0.8669214468630108, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Iv5eJeAyYEAJ:scholar.google.com/&scioq=Risk-aware+Bayesian+RL+for+Cautious+Exploration&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Amazon;University of Oxford", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.ox.ac.uk", "aff_unique_abbr": "Amazon;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "1uPo_IrEp8", "title": "Online Reinforcement Learning via Posterior Sampling of Policy", "track": "main", "status": "Reject", "tldr": "", "abstract": "We propose a Reward-Weighted Posterior Sampling of Policy (RWPSP) algorithm to tackle the classic trade-off problem between exploration and exploitation under finite Markov decision processes (MDPs). The Thompson sampling method so far has only considered posterior sampling over transition probabilities, which is hard to gain the globally sub-optimal rewards. RWPSP runs posterior sampling over stationary policy distributions instead of transition probabilities, and meanwhile keeps transition probabilities updated. Particularly, we leverage both relevant count functions and reward-weighting to online update the policy posterior, aiming to balance between local and long-term policy distributions for a globally near-optimal game value. Theoretically, we establish a bound of $\\tilde{\\mathcal{O}}(\\Gamma\\sqrt{T}/S^{2})$\\footnote{The symbol $\\tilde{\\mathcal{O}}$ hides logarithmic factors.} on the total regret in time horizon $T$ with $\\Gamma/S^{2} < D\\sqrt{SA}$ satisfied in general, where $S$ and $A$ represents the sizes of state and action spaces, respectively, $D$ the diameter. This matches the best regret bound thus far for MDPs. Experimental results corroborate our theoretical results and show the advantage of our algorithm over the state of the art in terms of efficiency.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Shuqing Shi;zhiqiang xu;Fan Zhang;Zhiyou Yang;Yali Du;Hong Qu", "authorids": "~Shuqing_Shi1;~zhiqiang_xu1;~Fan_Zhang13;~Zhiyou_Yang1;~Yali_Du1;~Hong_Qu1", "gender": "M;M;M;M;;M", "homepage": ";https://scholar.google.com/citations?user=0R20iBMAAAAJ&hl=en;;;;https://www.scse.uestc.edu.cn/info/1081/11251.htm", "dblp": "314/5372;72/51-3.html;;314/5832;;", "google_scholar": "https://scholar.google.com.au/citations?hl=en;;icg-mqgAAAAJ;;;", "orcid": ";0000-0002-5693-8933;;0000-0001-8955-5835;;", "linkedin": ";;;;;", "or_profile": "~Shuqing_Shi1;~zhiqiang_xu1;~Fan_Zhang13;~Zhiyou_Yang1;~Yali_Du1;~Hong_Qu1", "aff": "University of Electronic Science and Technology of China;Mohamed bin Zayed University of Artificial Intelligence;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;;University of Electronic Science and Technology of China", "aff_domain": "uestc.edu.cn;mbzuai.ac.ae;uestc.edu.cn;uestc.edu.cn;;uestc.edu.cn", "position": "MS student;Assistant Professor;PhD student;PhD student;;Full Professor", "bibtex": "@misc{\nshi2023online,\ntitle={Online Reinforcement Learning via Posterior Sampling of Policy},\nauthor={Shuqing Shi and zhiqiang xu and Fan Zhang and Zhiyou Yang and Yali Du and Hong Qu},\nyear={2023},\nurl={https://openreview.net/forum?id=1uPo_IrEp8}\n}", "github": "", "project": "", "reviewers": "craj;RWGK;5tze;j9Gs", "site": "https://openreview.net/forum?id=1uPo_IrEp8", "pdf_size": 443937, "recommendation": "1;1;3;3", "confidence": "5;4;1;3", "correctness": "2;2;2;2", "technical_novelty": "2;1;3;3", "empirical_novelty": "2;0;0;2", "wc_summary_paper": "31;29;93;132", "wc_strength_and_weaknesses": "198;426;3;442", "wc_clarity_quality_novelty_and_reproducibility": "23;10;601;137", "wc_summary_review": "17;49;2;111", "wc_review": "269;514;699;822", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "249;515;1378;595", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 2.0, 1.0 ], "confidence_avg": [ 3.25, 1.479019945774904 ], "correctness_avg": [ 2.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 1.0, 1.0 ], "wc_summary_paper_avg": [ 71.25, 43.49928160326329 ], "wc_strength_and_weaknesses_avg": [ 267.25, 180.52891042711136 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 192.75, 240.82605236975505 ], "wc_summary_review_avg": [ 44.75, 41.8471922594575 ], "wc_review_avg": [ 576.0, 208.40945276066535 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 684.25, 420.51835572302906 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.8451542547285166, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:acyh3cYGZEwJ:scholar.google.com/&scioq=Online+Reinforcement+Learning+via+Posterior+Sampling+of+Policy&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Electronic Science and Technology of China;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.uestc.edu.cn;https://mbzuai.ac.ae", "aff_unique_abbr": "UESTC;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;United Arab Emirates" }, { "id": "1usJZBGNrZ", "title": "Offline Reinforcement Learning with Closed-Form Policy Improvement Operators", "track": "main", "status": "Reject", "tldr": "We proposed a closed-form policy improvement operator and modeled the behavior policies as a Gaussian Mixture.", "abstract": "Behavior constrained policy optimization has been demonstrated to be a successful paradigm for tackling Offline Reinforcement Learning. By exploiting historical transitions, a policy is trained to maximize a learned value function while constrained by the behavior policy to avoid a significant distributional shift. In this paper, we propose our closed-form policy improvement operators. We make a novel observation that the behavior constraint naturally motivates the use of first-order Taylor approximation, leading to a linear approximation of the policy objective. Additionally, as practical datasets are usually collected by heterogeneous policies, we model the behavior policies as a Gaussian Mixture and overcome the induced optimization difficulties by leveraging the LogSumExp's lower bound and Jensen's Inequality, giving rise to a closed-form policy improvement operator. We instantiate an offline RL algorithm with our novel policy improvement operator and empirically demonstrate its effectiveness over state-of-the-art algorithms on the standard D4RL benchmark.", "keywords": "Offline Reinforcement Learning algorithms;Deep Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/cfda72278dd503158f6eca2c1ff62f7d899d6bc9.zip", "author": "Jiachen Li;Edwin Zhang;Ming Yin;Qinxun Bai;Yu-Xiang Wang;William Yang Wang", "authorids": "~Jiachen_Li6;~Edwin_Zhang2;~Ming_Yin4;~Qinxun_Bai4;~Yu-Xiang_Wang1;~William_Yang_Wang2", "gender": "M;;M;M;;M", "homepage": "https://sites.google.com/view/jiachenli/;https://eddie.win;https://mingyin0312.github.io;;http://www.cs.ucsb.edu/~yuxiangw/publications.html;https://www.cs.ucsb.edu/~william/", "dblp": ";;89/453.html;;62/1637-3.html;08/9282", "google_scholar": "https://scholar.google.com/citations?hl=en;;ncBRYIUAAAAJ;p1tu16UAAAAJ;HGNZ1fkAAAAJ;gf8Ms_8AAAAJ", "orcid": ";;0000-0001-6458-0751;;;", "linkedin": ";;;;;", "or_profile": "~Jiachen_Li6;~Edwin_Zhang2;~Ming_Yin4;~Qinxun_Bai4;~Yu-Xiang_Wang1;~William_Wang1", "aff": "Amazon;Harvard University;UC, Santa Barbara;Horizon Robotics Inc.;UC Santa Barbara;UC Santa Barbara", "aff_domain": "amazon.com;harvard.edu;ucsb.edu;horizon.ai;ucsb.edu;ucsb.edu", "position": "Intern;PhD student;PhD student;Senior Research Scientist;Assistant Professor;Full Professor", "bibtex": "@misc{\nli2023offline,\ntitle={Offline Reinforcement Learning with Closed-Form Policy Improvement Operators},\nauthor={Jiachen Li and Edwin Zhang and Ming Yin and Qinxun Bai and Yu-Xiang Wang and William Yang Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=1usJZBGNrZ}\n}", "github": "", "project": "", "reviewers": "m3D5;ypTb;CVbg", "site": "https://openreview.net/forum?id=1usJZBGNrZ", "pdf_size": 1672344, "recommendation": "5;6;6", "confidence": "4;2;3", "correctness": "3;3;3", "technical_novelty": "2;4;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "52;112;33", "wc_strength_and_weaknesses": "170;208;116", "wc_clarity_quality_novelty_and_reproducibility": "86;136;12", "wc_summary_review": "701;21;37", "wc_review": "1009;477;198", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1277;846;325", "reply_reviewers": "0;0;0", "reply_authors": "3;3;3", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 65.66666666666667, 33.66831679124389 ], "wc_strength_and_weaknesses_avg": [ 164.66666666666666, 37.7477004450455 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 78.0, 50.93787065304817 ], "wc_summary_review_avg": [ 253.0, 316.8511743179543 ], "wc_review_avg": [ 561.3333333333334, 336.4167389150282 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 816.0, 389.23086551128836 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.8660254037844385, "corr_recommendation_correctness": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7996976253603315756&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2;3;2;2", "aff_unique_norm": "Amazon;Harvard University;University of California, Santa Barbara;Horizon Robotics", "aff_unique_dep": "Amazon.com, Inc.;;;", "aff_unique_url": "https://www.amazon.com;https://www.harvard.edu;https://www.ucsb.edu;https://www.horizon-robotics.com/", "aff_unique_abbr": "Amazon;Harvard;UCSB;Horizon Robotics", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Constraining Representations Yields Models That Know What They Don't Know", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11420", "id": "1w_Amtk67X", "poster": "", "openreview": "https://openreview.net/forum?id=1w_Amtk67X", "slides": "https://iclr.cc/virtual/2023/poster/11420", "video": "https://iclr.cc/virtual/2023/poster/11420", "author_site": "Joao Monteiro, Pau Rodriguez Lopez, Pierre-Andr\u00e9 No\u00ebl, Issam Laradji, David Vazquez", "tldr": "We introduce a model class able to provide confidence scores indicating how likely it is that it is making an erroneous prediction.", "abstract": "A well-known failure mode of neural networks is that they may confidently return erroneous predictions. Such unsafe behaviour is particularly frequent when the use case slightly differs from the training context, and/or in the presence of an adversary. This work presents a novel direction to address these issues in a broad, general manner: imposing class-aware constraints on a model's internal activation patterns. Specifically, we assign to each class a unique, fixed, randomly-generated binary vector - hereafter called class code - and train the model so that its cross-depths activation patterns predict the appropriate class code according to the input sample's class. The resulting predictors are dubbed total activation classifiers (TAC), and TACs may either be trained from scratch, or used with negligible cost as a thin add-on on top of a frozen, pre-trained neural network. The distance between a TAC's activation pattern and the closest valid code acts as an additional confidence score, besides the default unTAC'ed prediction head's. In the add-on case, the original neural network's inference head is completely unaffected (so its accuracy remains the same) but we now have the option to use TAC's own confidence and prediction when determining which course of action to take in an hypothetical production workflow. In particular, we show that TAC strictly improves the value derived from models allowed to reject/defer. We provide further empirical evidence that TAC works well on multiple types of architectures and data modalities and that it is at least as good as state-of-the-art alternative confidence scores derived from existing models.", "keywords": "Rejecting classifiers;Selective classification;Uncertainty estimation;Robust classification;Out-of-distribution detection", "primary_area": "", "supplementary_material": "", "author": "Joao Monteiro;Pau Rodriguez;Pierre-Andre Noel;Issam H. Laradji;David Vazquez", "authorids": "~Joao_Monteiro1;~Pau_Rodriguez2;~Pierre-Andre_Noel1;~Issam_H._Laradji1;~David_Vazquez1", "gender": "M;M;M;M;", "homepage": ";;https://issamlaradji.github.io/;http://www.david-vazquez.com;https://prlz77.github.io", "dblp": "215/5354-2;47/9226.html;142/0043;94/8653;190/7735", "google_scholar": "https://scholar.google.ca/citations?hl=en;https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?user=8vRS7F0AAAAJ;1jHvtfsAAAAJ;https://scholar.google.es/citations?user=IwBx73wAAAAJ", "orcid": ";0000-0001-6979-1873;;0000-0002-2845-8158;0000-0002-1689-8084", "linkedin": "joao-monteiro-47180256/;panoel/;issam-laradji-67ba1a99/;https://www.linkedin.com/company/david-vazquez/;", "or_profile": "~Joao_Monteiro1;~Pierre-Andre_Noel1;~Issam_H._Laradji1;~David_Vazquez1;~Pau_Rodriguez_Lopez1", "aff": "ServiceNow Research;ServiceNow;ServiceNow;ServiceNow research;Apple", "aff_domain": "servicenow.com;servicenow.com;servicenow.com;servicenow.com;apple.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nmonteiro2023constraining,\ntitle={Constraining Representations Yields Models That Know What They Don't Know},\nauthor={Joao Monteiro and Pau Rodriguez and Pierre-Andre Noel and Issam H. Laradji and David Vazquez},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=1w_Amtk67X}\n}", "github": "", "project": "", "reviewers": "eB98;sw7u;rL3k", "pdf_size": 617660, "recommendation": "6;6;8", "confidence": "4;4;4", "correctness": "2;2;3", "technical_novelty": "2;2;2", "empirical_novelty": "3;3;3", "wc_summary_paper": "254;282;58", "wc_strength_and_weaknesses": "158;169;388", "wc_clarity_quality_novelty_and_reproducibility": "56;164;41", "wc_summary_review": "50;77;53", "wc_review": "518;692;540", "wc_reply_reviewers": "0;0;13", "wc_reply_authors": "663;996;1437", "reply_reviewers": "0;0;1", "reply_authors": "1;3;3", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 198.0, 99.65273035229224 ], "wc_strength_and_weaknesses_avg": [ 238.33333333333334, 105.92555037488465 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 87.0, 54.7905101272109 ], "wc_summary_review_avg": [ 60.0, 12.083045973594572 ], "wc_review_avg": [ 583.3333333333334, 77.36206362868619 ], "wc_reply_reviewers_avg": [ 4.333333333333333, 6.128258770283413 ], "wc_reply_authors_avg": [ 1032.0, 317.00788633723295 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15773939179790826581&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=1w_Amtk67X", "email": "servicenow.com;servicenow.com;servicenow.com;servicenow.com;apple.com", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "ServiceNow;Apple", "aff_unique_dep": "Research;Apple Inc.", "aff_unique_url": "https://www.servicenow.com;https://www.apple.com", "aff_unique_abbr": "ServiceNow;Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "1yaLQb4mIl", "title": "Can Fair Federated Learning reduce the need for personalization?", "track": "main", "status": "Reject", "tldr": "This work evaluates Q-Fair Federated Learning as an alternative to personalization, we find that it does not satisfactorily improve local federated model performance and propose an approach based on Knowledge Distillation offering favourable results.", "abstract": "Federated Learning (FL) allows edge devices to collaboratively train machine learning models without sharing local data. Since the data distribution varies across client partitions, the performance of the federated model on local data also varies. To solve this, fair FL approaches attempt to reduce the accuracy disparity between local partitions by emphasizing clients with larger losses during training; while local adaptation personalizes the federated model by re-training on local data to provide a device participation incentive in cases where a federated model underperforms relative to one trained locally---their accuracy difference is less than zero. This paper evaluates Q-Fair Federated Learning (Q-FFL) in this relative domain and determines whether it provides a better starting point for personalization or supplants it. Contrary to expectation, Q-FFL does not significantly reduce the number of underperforming clients in a language task while doubling them in an image recognition task. Furthermore, fairness levels which maintain average accuracy provide no benefit to relative accuracy in federated or adapted models. We postulate that Q-FFL is unsuitable for our goal since clients with highly accurate local models require the federated model to have a disproportionate local partition accuracy to receive a benefit. Instead, we propose using knowledge distillation during FL training to create models with a higher local accuracy floor without forfeiting the ceiling. Our preliminary evaluation shows a 50% reduction in underperforming clients in the language task with no increase in underperforming clients for the image task. Thus, we argue that this simple change represents a more promising avenue for reducing the need for personalization than fairness.", "keywords": "Federated Learning;Fair Federated Learning;FL;Fair FL;Local Adaptation;Personalization;Machine Learning;ML;Deep Learning;DL;Distributed Machine Learning", "primary_area": "", "supplementary_material": "", "author": "Alex Iacob;Pedro Porto Buarque de Gusmao;Nicholas Donald Lane", "authorids": "~Alex_Iacob1;pp524@cam.ac.uk;~Nicholas_Donald_Lane1", "gender": ";;", "homepage": ";;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Alex_Iacob1;pp524@cam.ac.uk;~Nicholas_Donald_Lane1", "aff": ";;", "aff_domain": ";;", "position": ";;", "bibtex": "@misc{\niacob2023can,\ntitle={Can Fair Federated Learning reduce the need for personalization?},\nauthor={Alex Iacob and Pedro Porto Buarque de Gusmao and Nicholas Donald Lane},\nyear={2023},\nurl={https://openreview.net/forum?id=1yaLQb4mIl}\n}", "github": "", "project": "", "reviewers": "Wm3o;AD53;j5Ef;C1c4", "site": "https://openreview.net/forum?id=1yaLQb4mIl", "pdf_size": 3608215, "recommendation": "3;3;3;5", "confidence": "3;3;4;3", "correctness": "2;3;2;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "41;46;53;122", "wc_strength_and_weaknesses": "169;127;91;211", "wc_clarity_quality_novelty_and_reproducibility": "20;8;10;92", "wc_summary_review": "16;33;43;114", "wc_review": "246;214;197;539", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "332;255;299;343", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 65.5, 32.89756829919196 ], "wc_strength_and_weaknesses_avg": [ 149.5, 44.97499305169485 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.5, 34.65183977799736 ], "wc_summary_review_avg": [ 51.5, 37.35304539123952 ], "wc_review_avg": [ 299.0, 139.676411752307 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 307.25, 34.23722389446901 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4806580583666385613&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5 }, { "id": "1yclzf1DWsf", "title": "Open-Set 3D Detection via Image-level Class and Debiased Cross-modal Contrastive Learning", "track": "main", "status": "Reject", "tldr": "We propose an open-set 3D detection method that detects unseen categories without corresponding 3D labels", "abstract": "Current point-cloud detection methods have difficulty detecting the open-set objects in the real world, due to their limited generalization capability. Moreover, it is extremely laborious and expensive to collect and fully annotate a point-cloud detection dataset with numerous classes of objects, leading to the limited classes of existing point-cloud datasets and hindering the model to learn general representations to achieve open-set point-cloud detection. Instead of seeking a point-cloud dataset with full labels, we resort to ImageNet1K to broaden the vocabulary of the point-cloud detector. We propose OS-3DETIC, an Open-Set 3D DETector using Image-level Class supervision. Specifically, we take advantage of two modalities, the image modality for recognition and the point-cloud modality for localization, to generate pseudo labels for unseen classes. Then we propose a novel debiased cross-modal cross-task contrastive learning method to transfer the knowledge from image modality to point-cloud modality during training. Without hurting the latency during inference, OS-3DETIC makes the well-known point-cloud detector capable of achieving open-set detection. Extensive experiments demonstrate that the proposed OS-3DETIC achieves at least 10.77 % mAP improvement (absolute value) and 9.56 % mAP improvement (absolute value) by a wide range of baselines on the SUN-RGBD dataset and ScanNet dataset, respectively. Besides, we conduct sufficient experiments to shed light on why the proposed OS-3DETIC works.", "keywords": "open vocabulary;3d detection;contrastive learning", "primary_area": "", "supplementary_material": "/attachment/e6c137cf1f5c9bb8577dc06eb96114fce9217125.zip", "author": "Yuheng Lu;Chenfeng Xu;Xiaobao Wei;Xiaodong Xie;Masayoshi Tomizuka;Kurt Keutzer;Shanghang Zhang", "authorids": "~Yuheng_Lu3;~Chenfeng_Xu1;~Xiaobao_Wei1;~Xiaodong_Xie1;~Masayoshi_Tomizuka1;~Kurt_Keutzer1;~Shanghang_Zhang4", "gender": "M;M;M;M;M;M;F", "homepage": ";;;http://idm.pku.edu.cn/en/info/1009/1010.htm;https://me.berkeley.edu/people/masayoshi-tomizuka/;https://people.eecs.berkeley.edu/~keutzer/;https://www.shanghangzhang.com/", "dblp": "155/3107;65/1881;319/5692;;10/4434;k/KurtKeutzer.html;95/11531", "google_scholar": "VC3pSuQAAAAJ;RpqvaTUAAAAJ;MGAz5PkAAAAJ;;;ID9QePIAAAAJ;voqw10cAAAAJ", "orcid": ";0000-0002-4941-6985;;;;0000-0003-3868-8501;", "linkedin": ";;;;;kurtkeutzer/;", "or_profile": "~Yuheng_Lu3;~Chenfeng_Xu1;~Xiaobao_Wei1;~Xiaodong_Xie1;~Masayoshi_Tomizuka1;~Kurt_Keutzer1;~Shanghang_Zhang1", "aff": ";University of California, Berkeley;Beihang University;Peking University;University of California, Berkeley;University of California, Berkeley;Peking University", "aff_domain": ";berkeley.edu;buaa.edu.cn;pku.edu.cn;berkeley.edu;berkeley.edu;pku.edu.cn", "position": ";PhD student;Undergrad student;Full Professor;Full Professor;Full Professor;Assistant Professor", "bibtex": "@misc{\nlu2023openset,\ntitle={Open-Set 3D Detection via Image-level Class and Debiased Cross-modal Contrastive Learning},\nauthor={Yuheng Lu and Chenfeng Xu and Xiaobao Wei and Xiaodong Xie and Masayoshi Tomizuka and Kurt Keutzer and Shanghang Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=1yclzf1DWsf}\n}", "github": "", "project": "", "reviewers": "2qoz;6Lrw;fcKh;rTqP", "site": "https://openreview.net/forum?id=1yclzf1DWsf", "pdf_size": 12369592, "recommendation": "6;6;6;6", "confidence": "4;5;3;5", "correctness": "3;4;3;3", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;2;3;0", "wc_summary_paper": "99;73;197;98", "wc_strength_and_weaknesses": "306;131;283;218", "wc_clarity_quality_novelty_and_reproducibility": "49;29;157;26", "wc_summary_review": "29;33;43;34", "wc_review": "483;266;680;376", "wc_reply_reviewers": "0;9;21;0", "wc_reply_authors": "877;975;1550;1517", "reply_reviewers": "0;1;1;0", "reply_authors": "4;4;4;3", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 116.75, 47.488814472462884 ], "wc_strength_and_weaknesses_avg": [ 234.5, 67.913547985656 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.25, 53.70463201624232 ], "wc_summary_review_avg": [ 34.75, 5.11737237261468 ], "wc_review_avg": [ 451.25, 152.73731534893494 ], "wc_reply_reviewers_avg": [ 7.5, 8.616843969807043 ], "wc_reply_authors_avg": [ 1229.75, 305.9422943955281 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.75, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Egl1TMUyB58J:scholar.google.com/&scioq=Open-Set+3D+Detection+via+Image-level+Class+and+Debiased+Cross-modal+Contrastive+Learning&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;0;2", "aff_unique_norm": "University of California, Berkeley;Beihang University;Peking University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.berkeley.edu;http://www.buaa.edu.cn/;http://www.pku.edu.cn", "aff_unique_abbr": "UC Berkeley;BUAA;Peking U", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;1;1;0;0;1", "aff_country_unique": "United States;China" }, { "id": "1z9VTrxCgf", "title": "Semantic Image Manipulation with Background-guided Internal Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Image manipulation has attracted a lot of interest due to its wide range of applications. Prior work modifies images either from low-level manipulation, such as image inpainting or through manual edits via paintbrushes and scribbles, or from high-level manipulation, employing deep generative networks to output an image conditioned on high-level semantic input. In this study, we propose Semantic Image Manipulation with Background-guided Internal Learning (SIMBIL), which combines high-level and low-level manipulation. Specifically, users can edit an image at the semantic level by applying changes on the scene graph. Then our model manipulates the image at the pixel level according to the modified scene graph. There are two major advantages of our approach. First, high-level manipulation requires less manual effort from the user compared to manipulating raw image pixels. Second, our low-level internal learning approach is scalable to images of various sizes without reliance on external visual datasets for training. We outperform the state-of-the-art in a quantitative and qualitative evaluation on CLEVR and Visual Genome datasets. Experiments show around 8 points improvement of SSIM (RoI) on CLEVR and around 25% improvement of user evaluation accuracy on Visual Genome, demonstrating the effectiveness of our approach.", "keywords": "semantic image manipulation;internal learning;scene-graph driven image editing", "primary_area": "", "supplementary_material": "", "author": "Zhongping Zhang;Huiwen He;Bryan A. Plummer;Zhenyu Liao;Huayan Wang", "authorids": "~Zhongping_Zhang1;huiwenhe@bu.edu;~Bryan_A._Plummer1;zyliao@amazon.com;wanghy514@gmail.com", "gender": "M;;;;", "homepage": "http://cs-people.bu.edu/zpzhang/;;;;", "dblp": "132/6203;;;;", "google_scholar": "6C20vTwAAAAJ;;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Zhongping_Zhang1;huiwenhe@bu.edu;~Bryan_A._Plummer1;zyliao@amazon.com;wanghy514@gmail.com", "aff": "Boston University;;;;", "aff_domain": "bu.edu;;;;", "position": "PhD student;;;;", "bibtex": "@misc{\nzhang2023semantic,\ntitle={Semantic Image Manipulation with Background-guided Internal Learning},\nauthor={Zhongping Zhang and Huiwen He and Bryan A. Plummer and Zhenyu Liao and Huayan Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=1z9VTrxCgf}\n}", "github": "", "project": "", "reviewers": "8S2u;mRHW;Pno2;BfVZ", "site": "https://openreview.net/forum?id=1z9VTrxCgf", "pdf_size": 11328299, "recommendation": "3;5;5;6", "confidence": "5;4;4;4", "correctness": "3;4;2;3", "technical_novelty": "1;2;2;3", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "44;33;57;62", "wc_strength_and_weaknesses": "251;244;270;508", "wc_clarity_quality_novelty_and_reproducibility": "44;20;8;51", "wc_summary_review": "20;40;3;60", "wc_review": "359;337;338;681", "wc_reply_reviewers": "0;45;44;0", "wc_reply_authors": "1066;1348;1009;1146", "reply_reviewers": "0;1;1;0", "reply_authors": "2;3;3;2", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 49.0, 11.335784048754634 ], "wc_strength_and_weaknesses_avg": [ 318.25, 109.96448290243536 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.75, 17.455300054711177 ], "wc_summary_review_avg": [ 30.75, 21.370248009791556 ], "wc_review_avg": [ 428.75, 145.90129368857563 ], "wc_reply_reviewers_avg": [ 22.25, 22.252808811473667 ], "wc_reply_authors_avg": [ 1142.25, 128.37128767758 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.9271726499455306, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16197469781054360879&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Boston University", "aff_unique_dep": "", "aff_unique_url": "https://www.bu.edu", "aff_unique_abbr": "BU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "1zaoVA_z8Q", "title": "SemSup-XC: Semantic Supervision for Extreme Classification", "track": "main", "status": "Withdraw", "tldr": "We propose a new model for extreme classification over very large label spaces and achieve SOTA results on three popular benchmarks.", "abstract": "Extreme classification (XC) considers the scenario of predicting over a very large number of classes (thousands to millions), with real-world applications including serving search engine results, e-commerce product tagging, and news article classification. The zero-shot version of this task involves the addition of new categories at test time, requiring models to generalize to novel classes without\nadditional training data (e.g. one may add a new class \u201cfidget spinner\u201d for ecommerce product tagging). In this paper, we develop SEMSUP-XC, a model that achieves state-of-the-art zero-shot (ZS) and few-shot (FS) performance on three extreme classification benchmarks spanning the domains of law, e-commerce, and Wikipedia. SEMSUP-XC builds upon the recently proposed framework of semantic supervision that uses semantic label descriptions to represent and generalize to classes (e.g., \u201cfidget spinner\u201d described as \u201cA popular spinning toy intended as a stress reliever\u201d). Specifically, we use a combination of contrastive learning, a hybrid lexico-semantic similarity module and automated description collection to train SEMSUP-XC efficiently over extremely large class spaces. SEMSUP-XC\nsignificantly outperforms baselines and state-of-the-art models on all three datasets, by up to 6-10 precision@1 points on zero-shot classification and >10 precision points on few-shot classification, with similar gains for recall@10 (3 for zero-shot and 2 for few-shot). Our ablation studies and qualitative analyses demonstrate the relative importance of our various improvements and show that SEMSUP-XC\u2019s\nautomated pipeline offers a consistently efficient method for extreme classification.", "keywords": "Extreme classification;zero-shot inference;few-shot learning", "primary_area": "", "supplementary_material": "/attachment/d772bb23b2fc5c4e3f56284c5c72d3e5409553fc.zip", "author": "Pranjal Aggarwal;Ameet Deshpande;Karthik R Narasimhan", "authorids": "~Pranjal_Aggarwal1;~Ameet_Deshpande1;~Karthik_R_Narasimhan1", "gender": "M;M;M", "homepage": "https://github.com/Pranjal2041/;http://www.karthiknarasimhan.com;https://ameet-1997.github.io", "dblp": "163/0764;147/0322;220/4337", "google_scholar": "https://scholar.google.com/citations?hl=en;euc0GX4AAAAJ;332L1coAAAAJ", "orcid": "0000-0002-2962-1535;;", "linkedin": ";;", "or_profile": "~Pranjal_Aggarwal1;~Karthik_R_Narasimhan1;~Ameet_S_Deshpande1", "aff": "Indian Institute of Technology, Delhi;Princeton University;Princeton University", "aff_domain": "iitd.ac.in;princeton.edu;princeton.edu", "position": "Undergrad student;Assistant Professor;PhD student", "bibtex": "@misc{\naggarwal2023semsupxc,\ntitle={SemSup-{XC}: Semantic Supervision for Extreme Classification},\nauthor={Pranjal Aggarwal and Ameet Deshpande and Karthik R Narasimhan},\nyear={2023},\nurl={https://openreview.net/forum?id=1zaoVA_z8Q}\n}", "github": "", "project": "", "reviewers": "9AdX;xoaf;1k36;nXRD", "site": "https://openreview.net/forum?id=1zaoVA_z8Q", "pdf_size": 738844, "recommendation": "5;5;5;5", "confidence": "4;4;3;4", "correctness": "3;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "59;92;77;188", "wc_strength_and_weaknesses": "173;149;301;326", "wc_clarity_quality_novelty_and_reproducibility": "49;65;32;82", "wc_summary_review": "70;81;32;144", "wc_review": "351;387;442;740", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "542;691;794;968", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;3", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 104.0, 49.88486744494767 ], "wc_strength_and_weaknesses_avg": [ 237.25, 77.22815225032902 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.0, 18.560711193270585 ], "wc_summary_review_avg": [ 81.75, 40.2763888649417 ], "wc_review_avg": [ 480.0, 153.56920264167553 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 748.75, 155.07961664899742 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2eYJ9O1ZInoJ:scholar.google.com/&scioq=SemSup-XC:+Semantic+Supervision+for+Extreme+Classification&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Indian Institute of Technology Delhi;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitdelhi.ac.in;https://www.princeton.edu", "aff_unique_abbr": "IIT Delhi;Princeton", "aff_campus_unique_index": "0", "aff_campus_unique": "Delhi;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "India;United States" }, { "title": "Self-Supervised Category-Level Articulated Object Pose Estimation with Part-Level SE(3) Equivariance", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12050", "id": "20GtJ6hIaPA", "poster": "/media/PosterPDFs/ICLR%202023/12050.png?t=1682186916.792952", "openreview": "https://openreview.net/forum?id=20GtJ6hIaPA", "slides": "https://iclr.cc/virtual/2023/poster/12050", "video": "https://iclr.cc/virtual/2023/poster/12050", "author_site": "Xueyi Liu, Ji Zhang, Ruizhen Hu, Haibin Huang, He Wang, Li Yi", "tldr": "", "abstract": "Category-level articulated object pose estimation aims to estimate a hierarchy of articulation-aware object poses of an unseen articulated object from a known category. To reduce the heavy annotations needed for supervised learning methods, we present a novel self-supervised strategy that solves this problem without any human labels. Our key idea is to factorize canonical shapes and articulated object poses from input articulated shapes through part-level equivariant shape analysis. Specifically, we first introduce the concept of part-level SE(3) equivariance and devise a network to learn features of such property. Then, through a carefully designed fine-grained pose-shape disentanglement strategy, we expect that canonical spaces to support pose estimation could be induced automatically. Thus, we could further predict articulated object poses as per-part rigid transformations describing how parts transform from their canonical part spaces to the camera space. Extensive experiments demonstrate the effectiveness of our method on both complete and partial point clouds from synthetic and real articulated object datasets.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/ef58b1aafcb2394afa1b5ee45b3512491e0f043a.zip", "author": "Xueyi Liu;Ji Zhang;Ruizhen Hu;Haibin Huang;He Wang;Li Yi", "authorids": "~Xueyi_Liu1;~Ji_Zhang6;~Ruizhen_Hu1;~Haibin_Huang1;~He_Wang5;~Li_Yi2", "gender": "F;M;F;M;M;M", "homepage": "https://meowuu7.github.io;;https://csse.szu.edu.cn/staff/ruizhenhu/;https://brotherhuang.github.io/;https://hughw19.github.io;https://ericyi.github.io/", "dblp": "47/10627;;14/11270;;01/6368-10;26/4239-1", "google_scholar": "bspSfNEAAAAJ;;https://scholar.google.ca/citations?user=MloRITsAAAAJ;YDl1M80AAAAJ;roCAWkoAAAAJ;UyZL660AAAAJ", "orcid": ";;;;;", "linkedin": ";jizhang123;;;;", "or_profile": "~Xueyi_Liu1;~Ji_Zhang6;~Ruizhen_Hu1;~Haibin_Huang1;~He_Wang5;~Li_Yi2", "aff": "Tsinghua University;Fudan University;Shenzhen University;Kuaishou Technology;Peking University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;fudan.edu.cn;szu.edu.cn;kuaishou.com;pku.edu.cn;tsinghua.edu.cn", "position": "PhD student;Undergrad student;Associate Professor;Sr.Research Scientist;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nliu2023selfsupervised,\ntitle={Self-Supervised Category-Level Articulated Object Pose Estimation with Part-Level {SE}(3) Equivariance},\nauthor={Xueyi Liu and Ji Zhang and Ruizhen Hu and Haibin Huang and He Wang and Li Yi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=20GtJ6hIaPA}\n}", "github": "", "project": "", "reviewers": "QLf2;gB2X;sxak;GUmv", "pdf_size": 37365338, "recommendation": "6;6;6;10", "confidence": "2;2;3;3", "correctness": "4;4;4;4", "technical_novelty": "3;3;2;4", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "93;89;67;140", "wc_strength_and_weaknesses": "74;126;194;191", "wc_clarity_quality_novelty_and_reproducibility": "38;32;86;61", "wc_summary_review": "26;20;22;39", "wc_review": "231;267;369;431", "wc_reply_reviewers": "0;0;0;22", "wc_reply_authors": "182;959;1114;972", "reply_reviewers": "0;0;0;1", "reply_authors": "2;3;3;3", "recommendation_avg": [ 7.0, 1.7320508075688772 ], "confidence_avg": [ 2.5, 0.5 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 97.25, 26.592997198510737 ], "wc_strength_and_weaknesses_avg": [ 146.25, 49.781397127842844 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.25, 21.288200957337846 ], "wc_summary_review_avg": [ 26.75, 7.39509972887452 ], "wc_review_avg": [ 324.5, 79.64138371474971 ], "wc_reply_reviewers_avg": [ 5.5, 9.526279441628825 ], "wc_reply_authors_avg": [ 806.75, 365.7877629172414 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17133608603914851123&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=20GtJ6hIaPA", "email": "tsinghua.edu.cn;fudan.edu.cn;szu.edu.cn;kuaishou.com;pku.edu.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;4;0", "aff_unique_norm": "Tsinghua University;Fudan University;Shenzhen University;Kuaishou Technology;Peking University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.fudan.edu.cn;https://www.szu.edu.cn;https://www.kuaishou.com;http://www.pku.edu.cn", "aff_unique_abbr": "THU;Fudan;SZU;Kuaishou;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Performance Bounds for Model and Policy Transfer in Hidden-parameter MDPs", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11041", "id": "20gBzEzgtiI", "poster": "", "openreview": "https://openreview.net/forum?id=20gBzEzgtiI", "slides": "https://iclr.cc/virtual/2023/poster/11041", "video": "https://iclr.cc/virtual/2023/poster/11041", "author_site": "Haotian Fu, Jiayu Yao, Omer Gottesman, Finale Doshi-Velez, George D Konidaris", "tldr": "", "abstract": "In the Hidden-Parameter MDP (HiP-MDP) framework, a family of reinforcement learning tasks is generated by varying hidden parameters specifying the dynamics and reward function for each individual task. HiP-MDP is a natural model for families of tasks in which meta- and lifelong-reinforcement learning approaches can succeed. Given a learned context encoder that infers the hidden parameters from previous experience, most existing algorithms fall into two categories: $\\textit{model transfer}$ and $\\textit{policy transfer}$, depending on which function the hidden parameters are used to parameterize. We characterize the robustness of model and policy transfer algorithms with respect to hidden parameter estimation error. We first show that the value function of HiP-MDPs is Lipschitz continuous under certain conditions. We then derive regret bounds for both settings through the lens of Lipschitz continuity. Finally, we empirically corroborate our theoretical analysis by experimentally varying the hyper-parameters governing the Lipschitz constants of two continuous control problems; the resulting performance is consistent with our predictions.", "keywords": "Reinforcement learning;Meta learning;Transfer learning;Theory", "primary_area": "", "supplementary_material": "", "author": "Haotian Fu;Jiayu Yao;Omer Gottesman;Finale Doshi-Velez;George Konidaris", "authorids": "~Haotian_Fu3;~Jiayu_Yao1;~Omer_Gottesman1;~Finale_Doshi-Velez1;~George_Konidaris1", "gender": "M;F;M;F;M", "homepage": "https://haotianfu.me/;;https://omergott.github.io/;https://finale.seas.harvard.edu/;http://cs.brown.edu/people/gdk/", "dblp": "237/9681;40/7704;;64/7056;56/6762", "google_scholar": "btaP96wAAAAJ;WNoNASEAAAAJ;glNJx5zYUbsC;https://scholar.google.com/citations?hl=en;9UERvVEAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Haotian_Fu3;~Jiayu_Yao1;~Omer_Gottesman1;~Finale_Doshi-Velez1;~George_Konidaris1", "aff": "Brown University;Harvard University;Amazon;Harvard University;Brown University", "aff_domain": "brown.edu;harvard.edu;amazon.com;harvard.edu;brown.edu", "position": "PhD student;PhD student;Researcher;Professor;Assistant Professor", "bibtex": "@inproceedings{\nfu2023performance,\ntitle={Performance Bounds for Model and Policy Transfer in Hidden-parameter {MDP}s},\nauthor={Haotian Fu and Jiayu Yao and Omer Gottesman and Finale Doshi-Velez and George Konidaris},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=20gBzEzgtiI}\n}", "github": "", "project": "", "reviewers": "zeh4;qyA5;g5Zd", "pdf_size": 1303572, "recommendation": "5;6;8", "confidence": "3;2;3", "correctness": "4;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "0;2;3", "wc_summary_paper": "75;85;98", "wc_strength_and_weaknesses": "270;228;50", "wc_clarity_quality_novelty_and_reproducibility": "29;29;41", "wc_summary_review": "29;24;20", "wc_review": "403;366;209", "wc_reply_reviewers": "104;0;0", "wc_reply_authors": "1459;558;43", "reply_reviewers": "1;0;0", "reply_authors": "4;1;1", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 86.0, 9.41629792788369 ], "wc_strength_and_weaknesses_avg": [ 182.66666666666666, 95.3636315490461 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.0, 5.656854249492381 ], "wc_summary_review_avg": [ 24.333333333333332, 3.6817870057290873 ], "wc_review_avg": [ 326.0, 84.09914783555578 ], "wc_reply_reviewers_avg": [ 34.666666666666664, 49.026070162267295 ], "wc_reply_authors_avg": [ 686.6666666666666, 585.195314023921 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.18898223650461363, "corr_recommendation_correctness": 0.18898223650461363, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3653266121375650719&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "pdf": "https://openreview.net/pdf?id=20gBzEzgtiI", "email": "brown.edu;harvard.edu;amazon.com;harvard.edu;brown.edu", "author_num": 5, "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "Brown University;Harvard University;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.brown.edu;https://www.harvard.edu;https://www.amazon.com", "aff_unique_abbr": "Brown;Harvard;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "20tAZh6Ut3", "title": "Improving Accuracy and Explainability of Online Handwriting Recognition", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Handwriting recognition technology allows recognizing a written text from a given data. The recognition task can target letters, symbols, or words, and the input data can be a digital image or recorded by various sensors. A wide range of applications from signature verification to electronic document processing can be realized by implementing efficient and accurate handwriting recognition algorithms. Over the years, there has been an increasing interest in experimenting with different types of technology to collect handwriting data, create datasets, and develop algorithms to recognize characters and symbols. More recently, the OnHW-chars dataset has been published that contains multivariate time series data of the English alphabet collected using a ballpoint pen fitted with sensors. The authors of OnHW-chars also provided some baseline results through their machine learning (ML) and deep learning (DL) classifiers.\n\nIn this paper, we develop handwriting recognition models on the OnHW-chars dataset and improve the accuracy of previous models. More specifically, our ML models provide $11.3\\%$-$23.56\\%$ improvements over the previous ML models, and our optimized DL models with ensemble learning provide $3.08\\%$-$7.01\\%$ improvements over the previous DL models. In addition to our accuracy improvements over the spectrum, we aim to provide some level of explainability for our models to provide more logic behind chosen methods and why the models make sense for the data type in the dataset. Our source codes, data, and models will be made publicly available for verifiability and reproducibility of our results.", "keywords": "Machine Learning;Deep Learning;Explainability;Computer vision;Ensemble Learning", "primary_area": "", "supplementary_material": "", "author": "Jonathan Gold;Hilda Azimi;Steven Chang;Koray Karabina", "authorids": "~Jonathan_Gold1;hilda.azimi@nrc-cnrc.gc.ca;h42chang@uwaterloo.ca;koray.karabina@nrc-cnrc.gc.ca", "gender": "M;;;", "homepage": ";;;", "dblp": ";;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": "jonathan-gold-1b340a13a/;;;", "or_profile": "~Jonathan_Gold1;hilda.azimi@nrc-cnrc.gc.ca;h42chang@uwaterloo.ca;koray.karabina@nrc-cnrc.gc.ca", "aff": "University of Waterloo;;;", "aff_domain": "uwaterloo.ca;;;", "position": "MS student;;;", "bibtex": "@misc{\ngold2023improving,\ntitle={Improving Accuracy and Explainability of Online Handwriting Recognition},\nauthor={Jonathan Gold and Hilda Azimi and Steven Chang and Koray Karabina},\nyear={2023},\nurl={https://openreview.net/forum?id=20tAZh6Ut3}\n}", "github": "", "project": "", "reviewers": "4ZHS;q6Em;2jWp;dFiu", "site": "https://openreview.net/forum?id=20tAZh6Ut3", "pdf_size": 363375, "recommendation": "1;1;3;3", "confidence": "5;5;4;3", "correctness": "3;3;3;3", "technical_novelty": "1;1;1;1", "empirical_novelty": "1;1;1;2", "wc_summary_paper": "83;73;30;44", "wc_strength_and_weaknesses": "396;306;22;59", "wc_clarity_quality_novelty_and_reproducibility": "70;195;15;187", "wc_summary_review": "113;182;22;21", "wc_review": "662;756;89;311", "wc_reply_reviewers": "21;0;0;0", "wc_reply_authors": "767;729;17;558", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 2.0, 1.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 1.0, 0.0 ], "empirical_novelty_avg": [ 1.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 57.5, 21.383404780343096 ], "wc_strength_and_weaknesses_avg": [ 195.75, 159.0163120563422 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 116.75, 76.80616837207803 ], "wc_summary_review_avg": [ 84.5, 67.55923326977594 ], "wc_review_avg": [ 454.5, 268.39383375927247 ], "wc_reply_reviewers_avg": [ 5.25, 9.093266739736606 ], "wc_reply_authors_avg": [ 517.75, 299.6342562191446 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.9045340337332909, "corr_recommendation_correctness": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3360705895102177436&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "id": "22Hsbl8twlY", "title": "Beyond the injective assumption in causal representation learning", "track": "main", "status": "Withdraw", "tldr": "A hierarchy of generative functions for causal representation learning to consider that relaxes the injective assumption.", "abstract": "Causal representation learning aims to take some entangled observation, $x$, and recover the latent causal variables $z$ from which the observation was generated using via generative function $g(\\cdot): \\mathcal{Z}\\rightarrow \\mathcal{X}$. While this problem is impossible in its full generality, there has been considerable recent progress in showing a variety of conditions in which the latents are identifiable. All of these approaches share the assumption that $g(\\cdot)$ is injective: i.e. for any two observations $x_1$ and $x_2$, if $x_1 = x_2$ then the corresponding latent variables, $z_1$ and $z_2$ are equal. This assumption is restrictive but dropping it entirely would allow pathological examples that we could never hope to identify, so in order to make progress beyond injectivity, we need to make explicit the important classes of non-injective functions. In this paper we present formal hierarchy over generative functions that includes injective functions and two non-trivial classes of non-injective functions---occlusion and observable effects---that we argue are important for causal representation learning to consider. We demonstrate that the injective assumption is not necessary, by proving the first identifiability results in settings with occluded variables. ", "keywords": "Representation learning;identifiability;ica;causal representation learning", "primary_area": "", "supplementary_material": "", "author": "Jason Hartford;Kartik Ahuja;Yoshua Bengio;Dhanya Sridhar", "authorids": "~Jason_Hartford1;~Kartik_Ahuja1;~Yoshua_Bengio1;~Dhanya_Sridhar2", "gender": "M;;M;", "homepage": "https://jhartford.github.io;;http://yoshuabengio.org;", "dblp": "191/6716;;56/953;", "google_scholar": "https://scholar.google.ca/citations?user=eBNK7SsAAAAJ;;kukA0LcAAAAJ;", "orcid": ";;;", "linkedin": "jasonhartford1/;;yoshuabengio/?originalSubdomain=ca;", "or_profile": "~Jason_Hartford1;~Kartik_Ahuja1;~Yoshua_Bengio1;~Dhanya_Sridhar2", "aff": "Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal;;University of Montreal;", "aff_domain": "mila.umontreal.ca;;umontreal.ca;", "position": "Postdoc;;Full Professor;", "bibtex": "@misc{\nhartford2023beyond,\ntitle={Beyond the injective assumption in causal representation learning},\nauthor={Jason Hartford and Kartik Ahuja and Yoshua Bengio and Dhanya Sridhar},\nyear={2023},\nurl={https://openreview.net/forum?id=22Hsbl8twlY}\n}", "github": "", "project": "", "reviewers": "tDpD;r2Ya;fZEd;kGTx", "site": "https://openreview.net/forum?id=22Hsbl8twlY", "pdf_size": 914137, "recommendation": "3;3;6;6", "confidence": "4;3;4;3", "correctness": "2;3;4;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "0;0;0;2", "wc_summary_paper": "132;53;90;50", "wc_strength_and_weaknesses": "425;131;66;235", "wc_clarity_quality_novelty_and_reproducibility": "87;39;91;19", "wc_summary_review": "45;21;31;23", "wc_review": "689;244;278;327", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 0.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 81.25, 33.26691299174001 ], "wc_strength_and_weaknesses_avg": [ 214.25, 135.78912879903163 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.0, 30.854497241083024 ], "wc_summary_review_avg": [ 30.0, 9.433981132056603 ], "wc_review_avg": [ 384.5, 178.26174575606512 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.7071067811865476, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Fls74eE7xdwJ:scholar.google.com/&scioq=Beyond+the+injective+assumption+in+causal+representation+learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Montreal", "aff_unique_dep": "Montreal Institute for Learning Algorithms", "aff_unique_url": "https://www.umontreal.ca", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0", "aff_campus_unique": "Montreal;", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "22h1XSEiN0", "title": "Deep Probabilistic Time Series Forecasting over Long Horizons", "track": "main", "status": "Reject", "tldr": "We demonstrate that with simple adaptations high performing deterministic models can be made into state of the art probabilistic forecasters.", "abstract": "Recent advances in neural network architectures for time series have led to significant improvements on deterministic forecasting metrics like mean squared error. We show that for many common benchmark datasets with deterministic evaluation metrics, intrinsic stochasticity is so significant that simply predicting summary statistics of the inputs outperforms many state-of-the-art methods, despite these simple forecasters capturing essentially no information from the noisy signals in the dataset. We demonstrate that using a probabilistic framework and moving away from deterministic evaluation acts as a simple fix for this apparent misalignment between good performance and poor understanding. With simple and scalable approaches for uncertainty representation we can adapt state-of-the-art architectures for point prediction to be excellent probabilistic forecasters, outperforming complex probabilistic methods constructed from deep generative models (DGMs) on popular benchmarks. Finally, we demonstrate that our simple adaptations to point predictors yield reliable probabilistic forecasts on many problems of practical significance, namely large and highly stochastic datasets of climatological and economic data.", "keywords": "time series;neural networks;probabilistic forecasting", "primary_area": "", "supplementary_material": "", "author": "Gregory Benton;Nate Gruver;Wesley Maddox;Andrew Gordon Wilson", "authorids": "~Gregory_Benton1;~Nate_Gruver1;~Wesley_Maddox1;~Andrew_Gordon_Wilson1", "gender": ";M;;Not Specified", "homepage": "https://g-benton.github.io/;https://ngruver.github.io/;https://wjmaddox.github.io;https://cims.nyu.edu/~andrewgw", "dblp": ";223/5568;;65/10453", "google_scholar": ";R5QNdhcAAAAJ;;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Gregory_Benton1;~Nate_Gruver1;~Wesley_Maddox1;~Andrew_Gordon_Wilson1", "aff": ";New York University;Jump Trading;New York University", "aff_domain": ";nyu.edu;jumptrading.com;nyu.edu", "position": ";PhD student;Researcher;Associate Professor", "bibtex": "@misc{\nbenton2023deep,\ntitle={Deep Probabilistic Time Series Forecasting over Long Horizons},\nauthor={Gregory Benton and Nate Gruver and Wesley Maddox and Andrew Gordon Wilson},\nyear={2023},\nurl={https://openreview.net/forum?id=22h1XSEiN0}\n}", "github": "", "project": "", "reviewers": "RuZ4;Cp5J;5QvS", "site": "https://openreview.net/forum?id=22h1XSEiN0", "pdf_size": 1491275, "recommendation": "3;3;5", "confidence": "4;4;3", "correctness": "2;2;4", "technical_novelty": "2;1;4", "empirical_novelty": "2;2;4", "wc_summary_paper": "72;104;80", "wc_strength_and_weaknesses": "316;188;57", "wc_clarity_quality_novelty_and_reproducibility": "60;27;100", "wc_summary_review": "239;93;12", "wc_review": "687;412;249", "wc_reply_reviewers": "298;0;32", "wc_reply_authors": "570;214;222", "reply_reviewers": "1;0;1", "reply_authors": "1;1;2", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "technical_novelty_avg": [ 2.3333333333333335, 1.247219128924647 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_paper_avg": [ 85.33333333333333, 13.59738536958076 ], "wc_strength_and_weaknesses_avg": [ 187.0, 105.73867157604481 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.333333333333336, 29.847761874031505 ], "wc_summary_review_avg": [ 114.66666666666667, 93.93023415753252 ], "wc_review_avg": [ 449.3333333333333, 180.7509028715732 ], "wc_reply_reviewers_avg": [ 110.0, 133.57644502930398 ], "wc_reply_authors_avg": [ 335.3333333333333, 165.96652942352227 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15344712766367303301&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "New York University;Jump Trading", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://www.jumptrading.com", "aff_unique_abbr": "NYU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "22z1JIM6mwI", "title": "CAPE: Channel-Attention-Based PDE Parameter Embeddings for SciML", "track": "main", "status": "Reject", "tldr": "a new parameter embedding module based on channel-attention for scientific machine learning", "abstract": "Scientific Machine Learning (SciML) designs machine learning methods that predict physical systems governed by partial differential equations (PDE). These ML-based surrogate models substitute inefficient and often non-differentiable numerical simulation algorithms and find multiple applications such as weather forecasting, molecular dynamics, and medical applications.\nWhile a number of ML-based methods for approximating the solutions of PDEs have been proposed in recent years, they typically do not consider the parameters of the PDEs, making it difficult for the ML surrogate models to generalize to PDE parameters not seen during training. \n\nWe propose a new channel-attention-based parameter embedding (CAPE) component for scientific machine learning models and a simple and effective curriculum learning strategy. The CAPE module can be combined with any kind of ML surrogate model, which can adapt to changing PDE parameters without harming the original model's ability to find approximate solutions to PDEs. The curriculum learning strategy provides a seamless transition between teacher-forcing and fully auto-regressive training. \nWe compare CAPE in conjunction with the curriculum learning strategy using a PDE benchmark and obtain consistent and significant improvements over the base models. The experiments also show several advantages of CAPE, such as its increased ability to generalize to unseen PDE parameters without substantially increasing inference time and parameter count.\nAn implementation of the method and experiments are available at \\url{https://anonymous.4open.science/r/CAPE-ML4Sci-145B}.", "keywords": "machine learning;partial differential equation;attention;generalization", "primary_area": "", "supplementary_material": "/attachment/3091edc091e547f1b742849b34e491eb1e14cc27.zip", "author": "Makoto Takamoto;Francesco Alesiani;Mathias Niepert", "authorids": "~Makoto_Takamoto1;~Francesco_Alesiani1;~Mathias_Niepert1", "gender": "M;;M", "homepage": "https://www.neclab.eu/;https://falesiani.github.io/;http://www.matlog.net", "dblp": ";122/8256;n/MathiasNiepert", "google_scholar": ";0puEQdgAAAAJ;https://scholar.google.de/citations?user=p5vLzq0AAAAJ", "orcid": ";0000-0003-4413-7247;", "linkedin": ";francesco-alesiani-2b48b74;", "or_profile": "~Makoto_Takamoto1;~Francesco_Alesiani1;~Mathias_Niepert1", "aff": "NEC;NEC;NEC", "aff_domain": "neclab.eu;neclab.eu;neclab.eu", "position": "Researcher;Senior Researcher;Research Scientist", "bibtex": "@misc{\ntakamoto2023cape,\ntitle={{CAPE}: Channel-Attention-Based {PDE} Parameter Embeddings for Sci{ML}},\nauthor={Makoto Takamoto and Francesco Alesiani and Mathias Niepert},\nyear={2023},\nurl={https://openreview.net/forum?id=22z1JIM6mwI}\n}", "github": "", "project": "", "reviewers": "adWD;PGvV;65qq;WSSM", "site": "https://openreview.net/forum?id=22z1JIM6mwI", "pdf_size": 925533, "recommendation": "3;6;6;6", "confidence": "4;3;3;4", "correctness": "3;2;4;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "61;69;45;52", "wc_strength_and_weaknesses": "249;387;161;102", "wc_clarity_quality_novelty_and_reproducibility": "2;8;12;72", "wc_summary_review": "2;50;5;48", "wc_review": "314;514;223;274", "wc_reply_reviewers": "0;417;13;20", "wc_reply_authors": "538;1321;416;351", "reply_reviewers": "0;3;1;1", "reply_authors": "2;4;2;2", "recommendation_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 56.75, 9.065732182234372 ], "wc_strength_and_weaknesses_avg": [ 224.75, 107.29020225537838 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 23.5, 28.226760352544886 ], "wc_summary_review_avg": [ 26.25, 22.78568629644497 ], "wc_review_avg": [ 331.25, 110.32990301817544 ], "wc_reply_reviewers_avg": [ 112.5, 175.94956663771583 ], "wc_reply_authors_avg": [ 656.5, 389.4781765388146 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.17407765595569782, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=383018666570624609&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "NEC Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.nec.com", "aff_unique_abbr": "NEC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "id": "23jfQBSUh4x", "title": "Online Continual Learning for Progressive Distribution Shift (OCL-PDS): A Practitioner's Perspective", "track": "main", "status": "Reject", "tldr": "We introduce the novel OCL-PDS problem for studying gradual distribution shift with time, and release 4 new benchmarks and 12 algorithms/baselines implementation for this new problem.", "abstract": "We introduce the novel OCL-PDS problem - Online Continual Learning for Progressive Distribution Shift. PDS refers to the subtle, gradual, and continuous distribution shift that widely exists in modern deep learning applications. It is widely observed in industry that PDS can cause significant performance drop. While previous work in continual learning and domain adaptation addresses this problem to some extent, our investigations from the practitioner's perspective reveal flawed assumptions that limit their applicability on daily challenges faced in real-world scenarios, and this work aims to close the gap between academic research and industry. For this new problem, we build 4 new benchmarks from the Wilds dataset, and implement 12 algorithms and baselines including both supervised and semi-supervised methods, which we test extensively on the new benchmarks. We hope that this work can provide practitioners with tools to better handle realistic PDS, and help scientists design better OCL algorithms.", "keywords": "Distribution shift;Continual learning;Domain Adaptation;Semi-supervised learning;Model drift;Online learning;Benchmark", "primary_area": "", "supplementary_material": "/attachment/884f6497ab6d92667b3ab743998d9dd964d925b0.zip", "author": "Runtian Zhai;Stefan Schroedl;Aram Galstyan;Anoop Kumar;Greg Ver Steeg;Pradeep Natarajan", "authorids": "~Runtian_Zhai1;schroedl@amazon.com;~Aram_Galstyan1;~Anoop_Kumar1;~Greg_Ver_Steeg1;~Pradeep_Natarajan1", "gender": "M;;M;M;M;M", "homepage": "http://www.runtianzhai.com;;http://www.isi.edu/~galstyan;;https://profiles.ucr.edu/app/home/profile/gregoryv;", "dblp": "242/8411;;16/3411;44/6841;82/9058;95/5978.html", "google_scholar": "EXd0ES8AAAAJ;;rJTwW0MAAAAJ;NTqD9TAAAAAJ;goLucoIAAAAJ;E1IdmqwAAAAJ", "orcid": "0000-0003-3332-3466;;;0009-0007-9124-7541;0000-0002-0793-141X;", "linkedin": ";;aram-galstyan-4a01373/;anoop-kumar-293191/;;", "or_profile": "~Runtian_Zhai1;schroedl@amazon.com;~Aram_Galstyan1;~Anoop_Kumar1;~Greg_Ver_Steeg1;~Pradeep_Natarajan1", "aff": "Carnegie Mellon University;;Amazon Alexa;Amazon;USC/ISI;Amazon", "aff_domain": "cmu.edu;;amazon.com;amazon.com;isi.edu;amazon.com", "position": "PhD student;;Scholar;Computer Scientist;Associate Professor;Principal Researcher", "bibtex": "@misc{\nzhai2023online,\ntitle={Online Continual Learning for Progressive Distribution Shift ({OCL}-{PDS}): A Practitioner's Perspective},\nauthor={Runtian Zhai and Stefan Schroedl and Aram Galstyan and Anoop Kumar and Greg Ver Steeg and Pradeep Natarajan},\nyear={2023},\nurl={https://openreview.net/forum?id=23jfQBSUh4x}\n}", "github": "", "project": "", "reviewers": "6758;UPWo;FxrK;FcFA", "site": "https://openreview.net/forum?id=23jfQBSUh4x", "pdf_size": 1004046, "recommendation": "3;5;6;10", "confidence": "4;5;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "43;104;39;73", "wc_strength_and_weaknesses": "129;714;123;317", "wc_clarity_quality_novelty_and_reproducibility": "75;96;19;8", "wc_summary_review": "90;59;240;19", "wc_review": "337;973;421;417", "wc_reply_reviewers": "0;0;72;0", "wc_reply_authors": "939;1563;1328;678", "reply_reviewers": "0;0;1;0", "reply_authors": "2;2;3;1", "recommendation_avg": [ 6.0, 2.5495097567963922 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 64.75, 26.1951808545007 ], "wc_strength_and_weaknesses_avg": [ 320.75, 240.0691306686472 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.5, 36.96281915655244 ], "wc_summary_review_avg": [ 102.0, 83.55537086268004 ], "wc_review_avg": [ 537.0, 253.94487590813878 ], "wc_reply_reviewers_avg": [ 18.0, 31.176914536239792 ], "wc_reply_authors_avg": [ 1127.0, 341.84865072133897 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.22645540682891915, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15889221078335757206&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "Carnegie Mellon University;Amazon;University of Southern California", "aff_unique_dep": ";Amazon Alexa;", "aff_unique_url": "https://www.cmu.edu;https://www.amazon.com/alexa;https://isi.usc.edu", "aff_unique_abbr": "CMU;Amazon Alexa;USC", "aff_campus_unique_index": "1", "aff_campus_unique": ";ISI", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "241s3NHjxc", "title": "Extending graph transformers with quantum computed aggregation", "track": "main", "status": "Reject", "tldr": "A new Graph Neural Network architecture where the aggregation weights are computed with a quantum computer.", "abstract": "Recently, efforts have been made in the community to design new Graph Neural Networks (GNN), as limitations of Message Passing Neural Networks became more apparent. This led to the appearance of Graph Transformers using global graph features such as Laplacian Eigenmaps. In our paper, we introduce a GNN architecture where the aggregation weights are computed using the long-range correlations of a quantum system. These correlations are generated by translating the graph topology into the interactions of a set of qubits in a quantum computer. The recent development of quantum processing units enables the computation of a new family of global graph features that would be otherwise out of reach for classical hardware. We give some theoretical insights about the potential benefits of this approach, and benchmark our algorithm on standard datasets. Although not being adapted to all datasets, our model performs similarly to standard GNN architectures, and paves a promising future for quantum enhanced GNNs.", "keywords": "graph neural networks;graph representation learning;quantum computing;graph transformers", "primary_area": "", "supplementary_material": "/attachment/6d010f80318824d156499f407e446bd451190163.zip", "author": "Slimane Thabet;Romain Fouilland;Loic Henriet", "authorids": "~Slimane_Thabet1;~Romain_Fouilland1;~Loic_Henriet1", "gender": "M;M;", "homepage": ";https://fouilland.fr;", "dblp": ";;", "google_scholar": "eGUJjGkAAAAJ;;https://scholar.google.com/scholar?hl=en", "orcid": ";;", "linkedin": ";romainfouilland/;", "or_profile": "~Slimane_Thabet1;~Romain_Fouilland1;~Loic_Henriet1", "aff": "Pasqal;;", "aff_domain": "pasqal.com;;", "position": "Researcher;;", "bibtex": "@misc{\nthabet2023extending,\ntitle={Extending graph transformers with quantum computed aggregation},\nauthor={Slimane Thabet and Romain Fouilland and Loic Henriet},\nyear={2023},\nurl={https://openreview.net/forum?id=241s3NHjxc}\n}", "github": "", "project": "", "reviewers": "fcGg;w3FW;FV73", "site": "https://openreview.net/forum?id=241s3NHjxc", "pdf_size": 2074999, "recommendation": "3;3;5", "confidence": "4;4;2", "correctness": "2;3;3", "technical_novelty": "3;2;2", "empirical_novelty": "1;1;1", "wc_summary_paper": "121;58;83", "wc_strength_and_weaknesses": "275;223;193", "wc_clarity_quality_novelty_and_reproducibility": "61;34;69", "wc_summary_review": "55;31;35", "wc_review": "512;346;380", "wc_reply_reviewers": "16;0;0", "wc_reply_authors": "64;213;279", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.0, 0.0 ], "wc_summary_paper_avg": [ 87.33333333333333, 25.901522906749882 ], "wc_strength_and_weaknesses_avg": [ 230.33333333333334, 33.87558937576667 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.666666666666664, 14.974051630144135 ], "wc_summary_review_avg": [ 40.333333333333336, 10.498677165349081 ], "wc_review_avg": [ 412.6666666666667, 71.59764117778059 ], "wc_reply_reviewers_avg": [ 5.333333333333333, 7.542472332656507 ], "wc_reply_authors_avg": [ 185.33333333333334, 89.9271309944273 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.5, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7785656638304229247&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "Pasqal", "aff_unique_dep": "", "aff_unique_url": "https://www.pasqal.com", "aff_unique_abbr": "", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "id": "24quGic59-", "title": "Take One Gram of Neural Features, Get Enhanced Group Robustness", "track": "main", "status": "Reject", "tldr": "We improve group robustness without group annotations by introducing GramClust, a two-stage method which (1) partition a dataset into groups based on features Gram matrices and (2) apply a robust optimization based on these pseudo-groups. ", "abstract": "Predictive performance of machine learning models trained with empirical risk minimization (ERM) can degrade considerably under distribution shifts. In particular, the presence of spurious correlations in training datasets leads ERM-trained models to display high loss when evaluated on minority groups not presenting such correlations in test sets. Extensive attempts have been made to develop methods improving worst-group robustness. However, they require group information for each training input or at least, a validation set with group labels to tune their hyperparameters, which may be expensive to get or unknown a priori. In this paper, we address the challenge of improving group robustness without group annotations during training. To this end, we propose to partition automatically the training dataset into groups based on Gram matrices of features extracted from an identification model and to apply robust optimization based on these pseudo-groups. In the realistic context where no group labels are available, our experiments show that our approach not only improves group robustness over ERM but also outperforms all recent baselines.", "keywords": "group robustness;distribution shift;spurious correlations;Gram matrices", "primary_area": "", "supplementary_material": "/attachment/2c30a29f49b4741573998c7150ed060453804358.zip", "author": "Simon Roburin;Charles Corbi\u00e8re;Gilles Puy;Nicolas THOME;Mathieu Aubry;Renaud Marlet;Patrick Perez", "authorids": "~Simon_Roburin1;~Charles_Corbi\u00e8re1;~Gilles_Puy2;~Nicolas_THOME2;~Mathieu_Aubry3;~Renaud_Marlet1;~Patrick_Perez1", "gender": ";M;;;;M;", "homepage": ";https://chcorbi.github.io;;;http://imagine.enpc.fr/~aubrym/;http://imagine.enpc.fr/~marletr/;", "dblp": ";https://dblp.uni-trier.de/pers/c/Corbi=egrave=re:Charles.html;;;57/10067;61/5462;", "google_scholar": ";https://scholar.google.fr/citations?user=UcnFUZ8AAAAJ;;;https://scholar.google.fr/citations?user=0MiPsosAAAAJ;2rclwh4AAAAJ;", "orcid": ";0000-0001-8024-7553;;;0000-0002-3804-0193;0000-0003-1612-1758;", "linkedin": ";https://linkedin.com/in/charles-corbi\u00e8re-6167015b;;;;renaud-marlet-9914ab/;", "or_profile": "~Simon_Roburin1;~Charles_Corbi\u00e8re1;~Gilles_Puy2;~Nicolas_THOME2;~Mathieu_Aubry3;~Renaud_Marlet1;~Patrick_Perez1", "aff": ";EPFL - EPF Lausanne;;;ENPC;Ecole des Ponts ParisTech;", "aff_domain": ";epfl.ch;;;enpc.fr;enpc.fr;", "position": ";Postdoc;;;Principal Researcher;Researcher;", "bibtex": "@misc{\nroburin2023take,\ntitle={Take One Gram of Neural Features, Get Enhanced Group Robustness},\nauthor={Simon Roburin and Charles Corbi{\\`e}re and Gilles Puy and Nicolas THOME and Mathieu Aubry and Renaud Marlet and Patrick Perez},\nyear={2023},\nurl={https://openreview.net/forum?id=24quGic59-}\n}", "github": "", "project": "", "reviewers": "Nwqa;nkU5;4Q7E;9Mcs", "site": "https://openreview.net/forum?id=24quGic59-", "pdf_size": 3344805, "recommendation": "3;5;6;6", "confidence": "3;3;3;4", "correctness": "2;2;4;4", "technical_novelty": "1;2;2;3", "empirical_novelty": "1;2;3;3", "wc_summary_paper": "37;91;135;184", "wc_strength_and_weaknesses": "585;52;317;265", "wc_clarity_quality_novelty_and_reproducibility": "38;168;28;30", "wc_summary_review": "60;18;17;35", "wc_review": "720;329;497;514", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1433;412;184;346", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 1.0 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 111.75, 54.264974891729196 ], "wc_strength_and_weaknesses_avg": [ 304.75, 189.83726583576788 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.0, 59.00847396772772 ], "wc_summary_review_avg": [ 32.5, 17.414074767267998 ], "wc_review_avg": [ 515.0, 138.69571009948362 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 593.75, 491.5914843648128 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.4714045207910316, "corr_recommendation_correctness": 0.8164965809277259, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=566778632605364345&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "EPFL;\u00c9cole Nationale des Ponts et Chauss\u00e9es;Ecole des Ponts ParisTech", "aff_unique_dep": ";;", "aff_unique_url": "https://www.epfl.ch;https://www.enpc.fr;https://www.ponts.org", "aff_unique_abbr": "EPFL;ENPC;ENPC", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Switzerland;France" }, { "id": "253DOGs6EF", "title": "Mesh-free Eulerian Physics-Informed Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Physics-informed Neural Networks (PINNs) have recently emerged as a principled way to include prior physical knowledge in form of partial differential equations (PDEs) into neural networks. Although PINNs are generally viewed as mesh-free, current approaches still rely on collocation points within a bounded region, even in settings with spatially sparse signals. Furthermore, if the boundaries are not known, the selection of such a region is difficult and often results in a large proportion of collocation points being selected in areas of low relevance. To resolve this severe drawback of current methods, we present a mesh-free and adaptive approach termed particle-density PINN (pdPINN), which is inspired by the microscopic viewpoint of fluid dynamics. The method is based on the Eulerian formulation and, different from classical mesh-free method, does not require the introduction of Lagrangian updates. We propose to sample directly from the distribution over the particle positions, eliminating the need to introduce boundaries while adaptively focusing on the most relevant regions. This is achieved by interpreting a non-negative physical quantity (such as the density or temperature) as an unnormalized probability distribution from which we sample with dynamic Monte Carlo methods. The proposed method leads to higher sample efficiency and improved performance of PINNs. These advantages are demonstrated on various experiments based on the continuity equations, Fokker-Planck equations, and the heat equation.", "keywords": "Physics-informed Neural Network;PINN;SIREN;fluid dynamics;implicit neural representations;PDEs", "primary_area": "", "supplementary_material": "/attachment/510847c553a8bc8b4a209e7e7b48eb9d9449ec9a.zip", "author": "Fabricio Arend Torres;Marcello Massimo Negri;Monika Nagy-Huber;Maxim Samarin;Volker Roth", "authorids": "~Fabricio_Arend_Torres1;~Marcello_Massimo_Negri1;~Monika_Nagy-Huber1;~Maxim_Samarin1;~Volker_Roth1", "gender": "M;M;;M;M", "homepage": ";;https://bmda.dmi.unibas.ch/people/monika.nagy/;;", "dblp": ";;;;23/1185-1", "google_scholar": "https://scholar.google.com/citations?authuser=1;;;https://scholar.google.com/citations?hl=de;https://scholar.google.ch/citations?user=v1qj03cAAAAJ", "orcid": ";;;0000-0002-9242-1827;0000-0003-0991-0273", "linkedin": ";marcello-negri-b7b025176/;;samarinm17/;", "or_profile": "~Fabricio_Arend_Torres1;~Marcello_Massimo_Negri1;~Monika_Nagy-Huber1;~Maxim_Samarin1;~Volker_Roth1", "aff": "University of Basel;University of Basel;University of Basel;Swiss Data Science Center / ETH Zurich;University of Basel", "aff_domain": "unibas.ch;unibas.ch;unibas.ch;ethz.ch;unibas.ch", "position": "PhD student;PhD student;PhD student;Researcher;Full Professor", "bibtex": "@misc{\ntorres2023meshfree,\ntitle={Mesh-free Eulerian Physics-Informed Neural Networks},\nauthor={Fabricio Arend Torres and Marcello Massimo Negri and Monika Nagy-Huber and Maxim Samarin and Volker Roth},\nyear={2023},\nurl={https://openreview.net/forum?id=253DOGs6EF}\n}", "github": "", "project": "", "reviewers": "J4Gd;cZb3;N5cW;ukjY;ZR9E;jfSc", "site": "https://openreview.net/forum?id=253DOGs6EF", "pdf_size": 3851210, "recommendation": "3;3;5;6;6;6", "confidence": "4;2;4;3;4;3", "correctness": "2;2;3;3;4;3", "technical_novelty": "2;2;2;4;2;2", "empirical_novelty": "1;1;2;2;2;2", "wc_summary_paper": "68;35;70;56;59;151", "wc_strength_and_weaknesses": "220;24;130;73;180;139", "wc_clarity_quality_novelty_and_reproducibility": "130;12;209;16;74;44", "wc_summary_review": "57;17;83;27;45;17", "wc_review": "475;88;492;172;358;351", "wc_reply_reviewers": "126;0;0;0;0;0", "wc_reply_authors": "1184;296;709;207;313;379", "reply_reviewers": "1;0;0;0;0;0", "reply_authors": "2;1;1;1;1;1", "recommendation_avg": [ 4.833333333333333, 1.3437096247164249 ], "confidence_avg": [ 3.3333333333333335, 0.7453559924999298 ], "correctness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "technical_novelty_avg": [ 2.3333333333333335, 0.7453559924999298 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.47140452079103173 ], "wc_summary_paper_avg": [ 73.16666666666667, 36.621563168287736 ], "wc_strength_and_weaknesses_avg": [ 127.66666666666667, 64.72162612982534 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 80.83333333333333, 69.74815808002069 ], "wc_summary_review_avg": [ 41.0, 23.748684174075834 ], "wc_review_avg": [ 322.6666666666667, 148.15494891797874 ], "wc_reply_reviewers_avg": [ 21.0, 46.95742752749558 ], "wc_reply_authors_avg": [ 514.6666666666666, 338.4576126423448 ], "reply_reviewers_avg": [ 0.16666666666666666, 0.372677996249965 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.2218800784900917, "corr_recommendation_correctness": 0.8724024145144709, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5244757252713665307&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Basel;ETH Zurich", "aff_unique_dep": ";Swiss Data Science Center", "aff_unique_url": "https://www.unibas.ch;https://www.ethz.ch", "aff_unique_abbr": "UniBas;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Switzerland" }, { "id": "25VgHaPz0l4", "title": "Selection Collider Bias in Large Language Models", "track": "main", "status": "Reject", "tldr": "Using causal inference methods, we explain and demonstrate how sample selection bias causes spurious correlations during training, and how those spurious correlations can be used to classify prediction tasks as underspecified during inference.", "abstract": "In this paper we motivate the causal mechanisms behind sample selection induced collider bias (selection collider bias) that can cause Large Language Mod- els (LLMs) to learn unconditional dependence between entities that are unconditionally independent in the real world. We show that selection collider bias can become amplified in underspecified learning tasks, and although difficult to overcome, we describe a method to exploit the resulting spurious correlations for determination of when a model may be uncertain about its prediction. We demonstrate an uncertainty metric that matches human uncertainty in tasks with gender pronoun underspecification on an extended version of the Winogender Schemas evaluation set, and we provide online demos where users can evaluate spurious correlations and apply our uncertainty metric to their own texts and models. Finally, we generalize our approach to address a wider range of prediction tasks.", "keywords": "large language models;causal inference;selection bias", "primary_area": "", "supplementary_material": "", "author": "Emily McMilin", "authorids": "~Emily_McMilin1", "gender": "F", "homepage": "https://github.com/2dot71mily", "dblp": "132/8495.html", "google_scholar": "https://scholar.google.com/citations?hl=en", "orcid": "", "linkedin": "emilymcmilin/", "or_profile": "~Emily_McMilin1", "aff": "Meta", "aff_domain": "meta.com", "position": "Software Engineer", "bibtex": "@misc{\nmcmilin2023selection,\ntitle={Selection Collider Bias in Large Language Models},\nauthor={Emily McMilin},\nyear={2023},\nurl={https://openreview.net/forum?id=25VgHaPz0l4}\n}", "github": "", "project": "", "reviewers": "qdVm;ukSe;LK4n", "site": "https://openreview.net/forum?id=25VgHaPz0l4", "pdf_size": 8568763, "recommendation": "3;5;5", "confidence": "3;3;3", "correctness": "2;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "172;124;54", "wc_strength_and_weaknesses": "663;544;31", "wc_clarity_quality_novelty_and_reproducibility": "72;245;195", "wc_summary_review": "57;106;34", "wc_review": "964;1019;314", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 116.66666666666667, 48.45157949495099 ], "wc_strength_and_weaknesses_avg": [ 412.6666666666667, 274.2168647054533 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 170.66666666666666, 72.69265590293301 ], "wc_summary_review_avg": [ 65.66666666666667, 30.02591473303612 ], "wc_review_avg": [ 765.6666666666666, 320.1648880741852 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:P9fo-xzdF2QJ:scholar.google.com/&scioq=Selection+Collider+Bias+in+Large+Language+Models&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "26aAV_wjoc", "title": "VoLTA: Vision-Language Transformer with Weakly-Supervised Local-Feature Alignment", "track": "main", "status": "Reject", "tldr": "We introduce VoLTA, Vision-Language Transformer with weakly-supervised local-feature Alignment, a VLP paradigm trained with graph optimal transport (GOT) based image-text matching.", "abstract": "Vision-language pre-training (VLP) has recently proven highly effective for various uni- and multi-modal downstream applications. However, most existing end-to-end VLP methods use high-resolution image-text-box data to perform well on fine-grained region-level tasks, such as object detection, segmentation, and referring expression comprehension. Unfortunately, such high-resolution images with accurate bounding box annotations are expensive to collect and use for supervision at scale. In this work, we propose VoLTA (Vision-Language Transformer with weakly-supervised local-feature Alignment), a new VLP paradigm that only utilizes image-caption data but achieves fine-grained region-level image understanding, eliminating the use of expensive box annotations. VoLTA adopts graph optimal transport-based weakly-supervised alignment on local image patches and text tokens to germinate an explicit, self-normalized, and interpretable low-level matching criterion. In addition, VoLTA pushes multi-modal fusion deep into the uni-modal backbones during pre-training and removes fusion-specific transformer layers, further reducing memory requirements. Extensive experiments on a wide range of vision- and vision-language downstream tasks demonstrate the effectiveness of VoLTA on fine-grained applications without compromising the coarse-grained downstream performance, often outperforming methods using significantly more caption and box annotations.", "keywords": "self-supervision;vision-language pre-training;transformer;patch-word alignment", "primary_area": "", "supplementary_material": "/attachment/bb68c6cde778b37c17495704dac0652fcdbe2a8a.zip", "author": "Shraman Pramanick;Li Jing;Sayan Nag;Jiachen Zhu;Hardik J Shah;Yann LeCun;Rama Chellappa", "authorids": "~Shraman_Pramanick1;~Li_Jing1;~Sayan_Nag1;~Jiachen_Zhu1;~Hardik_J_Shah1;~Yann_LeCun1;~Rama_Chellappa1", "gender": "M;M;M;M;M;M;", "homepage": "https://shramanpramanick.github.io/;http://jingli.io/;https://sayannag.github.io/;https://cs.nyu.edu/~jz3224/;;http://yann.lecun.com;", "dblp": "289/0043;59/6222;198/1398;250/0741-2;;l/YannLeCun;", "google_scholar": "20SubC8AAAAJ;VhxDLwcAAAAJ;K8w4dj4AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;WLN3QrAAAAAJ;", "orcid": ";;;;;;", "linkedin": "shramanpramanick/;li-jing-568b3765/;sayan-nag-176046124/;;hardik-shah-75ab5429/;;", "or_profile": "~Shraman_Pramanick1;~Li_Jing1;~Sayan_Nag1;~Jiachen_Zhu1;~Hardik_J_Shah1;~Yann_LeCun1;~Rama_Chellappa1", "aff": "Meta AI;OpenAI;University of Toronto;New York University;Meta Inc;New York University;", "aff_domain": "meta.com;openai.com;utoronto.ca;nyu.edu;meta.com;nyu.edu;", "position": "Intern;Researcher;PhD student;PhD student;Research Scientist;Full Professor;", "bibtex": "@misc{\npramanick2023volta,\ntitle={Vo{LTA}: Vision-Language Transformer with Weakly-Supervised Local-Feature Alignment},\nauthor={Shraman Pramanick and Li Jing and Sayan Nag and Jiachen Zhu and Hardik J Shah and Yann LeCun and Rama Chellappa},\nyear={2023},\nurl={https://openreview.net/forum?id=26aAV_wjoc}\n}", "github": "", "project": "", "reviewers": "PoWP;mig1;RqXJ", "site": "https://openreview.net/forum?id=26aAV_wjoc", "pdf_size": 24604596, "recommendation": "3;5;6", "confidence": "5;3;4", "correctness": "3;2;4", "technical_novelty": "1;2;4", "empirical_novelty": "2;2;3", "wc_summary_paper": "65;53;142", "wc_strength_and_weaknesses": "267;129;382", "wc_clarity_quality_novelty_and_reproducibility": "32;18;65", "wc_summary_review": "37;24;121", "wc_review": "401;224;710", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1638;641;1009", "reply_reviewers": "0;0;0", "reply_authors": "3;2;3", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.3333333333333335, 1.247219128924647 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 86.66666666666667, 39.43207943906698 ], "wc_strength_and_weaknesses_avg": [ 259.3333333333333, 103.42898798477898 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.333333333333336, 19.70335560817553 ], "wc_summary_review_avg": [ 60.666666666666664, 42.99095512107427 ], "wc_review_avg": [ 445.0, 200.8332641770282 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1096.0, 411.6462882945341 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.6546536707079772, "corr_recommendation_correctness": 0.3273268353539886, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=158162138722193477&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;3;0;3", "aff_unique_norm": "Meta;OpenAI;University of Toronto;New York University", "aff_unique_dep": "Meta AI;;;", "aff_unique_url": "https://meta.com;https://openai.com;https://www.utoronto.ca;https://www.nyu.edu", "aff_unique_abbr": "Meta;OpenAI;U of T;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Data Continuity Matters: Improving Sequence Modeling with Lipschitz Regularizer", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11331", "id": "27uBgHuoSQ", "poster": "/media/PosterPDFs/ICLR%202023/11331.png?t=1683035587.4656491", "openreview": "https://openreview.net/forum?id=27uBgHuoSQ", "slides": "https://iclr.cc/virtual/2023/poster/11331", "video": "https://iclr.cc/virtual/2023/poster/11331", "author_site": "Eric Qu, Xufang Luo, Dongsheng Li", "tldr": "", "abstract": "Sequence modeling is a core problem in machine learning, and various neural networks have been designed to process different types of sequence data. However, few attempts have been made to understand the inherent data property of sequence data, neglecting the critical factor that may significantly affect the performance of sequence modeling. In this paper, we theoretically and empirically analyze a generic property of sequence data, i.e., continuity, and connect this property with the performance of deep models. First, we empirically observe that different kinds of models for sequence modeling prefer data with different continuity. Then, we theoretically analyze the continuity preference of different models in both time and frequency domains. To further utilize continuity to improve sequence modeling, we propose a simple yet effective Lipschitz Regularizer, that can flexibly adjust data continuity according to model preferences, and bring very little extra computational cost. Extensive experiments on various tasks demonstrate that altering data continuity via Lipschitz Regularizer can largely improve the performance of many deep models for sequence modeling.", "keywords": "deep learning;data continuity;sequence modeling", "primary_area": "", "supplementary_material": "/attachment/f4a12c7ae003c3a5a267a55c9c76b2662a2e5bd2.zip", "author": "Eric Qu;Xufang Luo;Dongsheng Li", "authorids": "~Eric_Qu1;~Xufang_Luo1;~Dongsheng_Li2", "gender": "M;F;M", "homepage": "https://people.eecs.berkeley.edu/~ericqu/;;http://recmind.cn", "dblp": "312/6521;218/7350;254/0830-2.html", "google_scholar": "-qOBJlcAAAAJ;;VNg5rA8AAAAJ", "orcid": ";;0000-0003-3103-8442", "linkedin": ";;", "or_profile": "~Eric_Qu1;~Xufang_Luo1;~Dongsheng_Li2", "aff": "Duke University;Microsoft Research;Microsoft Research Asia", "aff_domain": "duke.edu;microsoft.com;microsoft.com", "position": "Undergrad student;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nqu2023data,\ntitle={Data Continuity Matters: Improving Sequence Modeling with Lipschitz Regularizer},\nauthor={Eric Qu and Xufang Luo and Dongsheng Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=27uBgHuoSQ}\n}", "github": "", "project": "", "reviewers": "EzfA;cBYY;L7Qs;Tz2y", "pdf_size": 1247097, "recommendation": "6;6;8;8", "confidence": "2;2;3;3", "correctness": "4;4;4;4", "technical_novelty": "3;3;3;4", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "29;16;59;81", "wc_strength_and_weaknesses": "143;86;110;217", "wc_clarity_quality_novelty_and_reproducibility": "14;32;33;25", "wc_summary_review": "33;12;47;33", "wc_review": "219;146;249;356", "wc_reply_reviewers": "0;0;11;0", "wc_reply_authors": "512;461;250;343", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 2.5, 0.5 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 46.25, 25.410381736605217 ], "wc_strength_and_weaknesses_avg": [ 139.0, 49.371044145328746 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.0, 7.582875444051551 ], "wc_summary_review_avg": [ 31.25, 12.497499749949988 ], "wc_review_avg": [ 242.5, 75.4801298356064 ], "wc_reply_reviewers_avg": [ 2.75, 4.763139720814412 ], "wc_reply_authors_avg": [ 391.5, 102.13349107907749 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18256516143614440850&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=27uBgHuoSQ", "email": "duke.edu;microsoft.com;microsoft.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Duke University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.duke.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Duke;MSR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;China" }, { "id": "293zPCqNqe", "title": "PointDP: Diffusion-driven Purification against 3D Adversarial Point Clouds", "track": "main", "status": "Reject", "tldr": "We propose PointDP, a diffusion-driven purification strategy to defend against adversarial point cloud. PointDP consistently achieves the strongest robustness under various attacks.", "abstract": "3D Point cloud is a critical data representation in many real-world applications, such as autonomous driving, robotics, and medical imaging. Although the success of deep learning further accelerates the adoption of 3D point clouds in the physical world, deep learning is notoriously vulnerable to adversarial attacks. Various defense solutions have been proposed to build robust models against adversarial attacks. In this work, we identify that the state-of-the-art empirical defense, adversarial training, has a major limitation in 3D point cloud models due to gradient obfuscation, resulting in significant degradation of robustness against strong attacks. To bridge the gap, we propose PointDP, a purification strategy that leverages diffusion models to defend against 3D adversarial attacks. Since PointDP does not rely on predefined adversarial examples for training, it can defend against diverse threats. We extensively evaluate PointDP on six representative 3D point cloud architectures and leverage sixteen strong and adaptive attacks to demonstrate its lower-bound robustness. Our evaluation shows that PointDP achieves significantly better (i.e., 12.6\\%-40.3\\%) adversarial robustness than state-of-the-art methods under strong attacks bounded by different $\\ell_p$ norms. ", "keywords": "Adversarial Robustness;Point Cloud Classification;Diffusion Model", "primary_area": "", "supplementary_material": "/attachment/1fcd635aa271afa1b79e5e74a9a607cb31c9e5cd.zip", "author": "Jiachen Sun;Jiongxiao Wang;Weili Nie;Zhiding Yu;Zhuoqing Mao;Chaowei Xiao", "authorids": "~Jiachen_Sun1;~Jiongxiao_Wang1;~Weili_Nie1;~Zhiding_Yu1;~Zhuoqing_Mao1;~Chaowei_Xiao2", "gender": "M;;M;;F;", "homepage": "https://web.eecs.umich.edu/~jiachens/;https://jayfeather1024.github.io/jxwang.github.io/;https://weilinie.github.io/;;https://web.eecs.umich.edu/~zmao/;", "dblp": ";322/5991;147/4786;;;", "google_scholar": "Knnv3p4AAAAJ;sIGapHMAAAAJ;zW7BH7oAAAAJ;;Ba_Ci9UAAAAJ;", "orcid": ";;;;;", "linkedin": "jiachensun23/;;;;;", "or_profile": "~Jiachen_Sun1;~Jiongxiao_Wang1;~Weili_Nie1;~Zhiding_Yu1;~Zhuoqing_Mao1;~Chaowei_Xiao2", "aff": "University of Michigan;Arizona State University;NVIDIA;;University of Michigan;", "aff_domain": "umich.edu;asu.edu;nvidia.com;;umich.edu;", "position": "PhD student;PhD student;Research Scientist;;Professor;", "bibtex": "@misc{\nsun2023pointdp,\ntitle={Point{DP}: Diffusion-driven Purification against 3D Adversarial Point Clouds},\nauthor={Jiachen Sun and Jiongxiao Wang and Weili Nie and Zhiding Yu and Zhuoqing Mao and Chaowei Xiao},\nyear={2023},\nurl={https://openreview.net/forum?id=293zPCqNqe}\n}", "github": "", "project": "", "reviewers": "TFZv;TVXV;PNfK;gbYk", "site": "https://openreview.net/forum?id=293zPCqNqe", "pdf_size": 1148635, "recommendation": "5;5;6;8", "confidence": "4;3;2;3", "correctness": "3;3;4;4", "technical_novelty": "3;2;3;2", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "41;34;186;183", "wc_strength_and_weaknesses": "161;60;56;391", "wc_clarity_quality_novelty_and_reproducibility": "24;46;38;45", "wc_summary_review": "45;35;35;50", "wc_review": "271;175;315;669", "wc_reply_reviewers": "72;166;0;34", "wc_reply_authors": "3435;1730;416;990", "reply_reviewers": "1;2;0;1", "reply_authors": "12;6;2;4", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 111.0, 73.54930319180461 ], "wc_strength_and_weaknesses_avg": [ 167.0, 135.99816175228253 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.25, 8.78564169540279 ], "wc_summary_review_avg": [ 41.25, 6.49519052838329 ], "wc_review_avg": [ 357.5, 186.83348201005086 ], "wc_reply_reviewers_avg": [ 68.0, 62.048368229954285 ], "wc_reply_authors_avg": [ 1642.75, 1134.7654768717632 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 6.0, 3.7416573867739413 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.28867513459481287, "corr_recommendation_correctness": 0.8164965809277259, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:csXGM00zZckJ:scholar.google.com/&scioq=PointDP:+Diffusion-driven+Purification+against+3D+Adversarial+Point+Clouds&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Michigan;Arizona State University;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://www.umich.edu;https://www.asu.edu;https://www.nvidia.com", "aff_unique_abbr": "UM;ASU;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "The Best of Both Worlds: Accurate Global and Personalized Models through Federated Learning with Data-Free Hyper-Knowledge Distillation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10802", "id": "29V3AWjVAFi", "poster": "/media/PosterPDFs/ICLR%202023/10802.png?t=1680756442.9588368", "openreview": "https://openreview.net/forum?id=29V3AWjVAFi", "slides": "https://iclr.cc/virtual/2023/poster/10802", "video": "https://iclr.cc/virtual/2023/poster/10802", "author_site": "Huancheng Chen, Chaining Wang, Haris Vikalo", "tldr": "", "abstract": "Heterogeneity of data distributed across clients limits the performance of global models trained through federated learning, especially in the settings with highly imbalanced class distributions of local datasets. In recent years, personalized federated learning (pFL) has emerged as a potential solution to the challenges presented by heterogeneous data. However, existing pFL methods typically enhance performance of local models at the expense of the global model's accuracy. We propose FedHKD (Federated Hyper-Knowledge Distillation), a novel FL algorithm in which clients rely on knowledge distillation (KD) to train local models. In particular, each client extracts and sends to the server the means of local data representations and the corresponding soft predictions -- information that we refer to as ``hyper-knowledge\". The server aggregates this information and broadcasts it to the clients in support of local training. Notably, unlike other KD-based pFL methods, FedHKD does not rely on a public dataset nor it deploys a generative model at the server. We analyze convergence of FedHKD and conduct extensive experiments on visual datasets in a variety of scenarios, demonstrating that FedHKD provides significant improvement in both personalized as well as global model performance compared to state-of-the-art FL methods designed for heterogeneous data settings.", "keywords": "Federated Learning;Representation Learning;Knowledge Distillation", "primary_area": "", "supplementary_material": "/attachment/7f74a7df88db99a90dffd225d8052c133fb5e10e.zip", "author": "Huancheng Chen;Chianing Wang;Haris Vikalo", "authorids": "~Huancheng_Chen1;~Chianing_Wang1;~Haris_Vikalo1", "gender": "M;;", "homepage": "https://citychan.github.io/;https://scholar.google.com/citations?user=FHdgiYQAAAAJ&hl=en;", "dblp": "302/4540;;", "google_scholar": "https://scholar.google.com.tw/citations?hl=zh-TW;FHdgiYQAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Huancheng_Chen1;~Chianing_Wang1;~Haris_Vikalo1", "aff": "University of Texas, Austin;Toyota Motor North American;", "aff_domain": "utexas.edu;toyota.com;", "position": "PhD student;Principal Researcher;", "bibtex": "@inproceedings{\nchen2023the,\ntitle={The Best of Both Worlds: Accurate Global and Personalized Models through Federated Learning with Data-Free Hyper-Knowledge Distillation},\nauthor={Huancheng Chen and Chianing Wang and Haris Vikalo},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=29V3AWjVAFi}\n}", "github": "", "project": "", "reviewers": "y1Yi;GssC;yjPN;HZm8", "pdf_size": 807823, "recommendation": "6;6;6;8", "confidence": "3;4;3;3", "correctness": "3;3;3;4", "technical_novelty": "3;3;2;3", "empirical_novelty": "2;3;2;0", "wc_summary_paper": "116;147;140;42", "wc_strength_and_weaknesses": "342;128;172;81", "wc_clarity_quality_novelty_and_reproducibility": "58;13;482;34", "wc_summary_review": "40;20;182;36", "wc_review": "556;308;976;193", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 111.25, 41.601532423698046 ], "wc_strength_and_weaknesses_avg": [ 180.75, 98.50222078714773 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 146.75, 194.2104206781912 ], "wc_summary_review_avg": [ 69.5, 65.38157232737677 ], "wc_review_avg": [ 508.25, 300.2302241613925 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 1.0, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5995978773247769997&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=29V3AWjVAFi", "email": "utexas.edu;toyota.com;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Texas at Austin;Toyota Motor Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.toyota.com", "aff_unique_abbr": "UT Austin;Toyota", "aff_campus_unique_index": "0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Japan" }, { "id": "2BruD7pa7E", "title": "Global View For GCN: Why Go Deep When You Can Be Shallow?", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Existing graph convolutional network (GCN) methods attempt to expand the receptive field of its convolution by either stacking up more convolutional layers or accumulating multi-hop adjacency matrices. Either approach increases computation complexity while providing a limited view of the network topology. We propose to extend k-hop adjacency matrices into one generalized exponential matrix to provide GCNs with a global overview of the network topology. This technique allows the GCNs to learn global topology without going deep and with much fewer parameters than most state-of-the-art GCNs, challenging the common assumption that deep GCNs are empirically better for learning global features. We show a significant improvement in performance in semi-supervised learning when this technique is used for common GCNs while maintaining much shallower network architectures ($\\leq4$ layers) than the existing ones.", "keywords": "GCN;GNN;Clustering;Semi-supervised Learning", "primary_area": "", "supplementary_material": "", "author": "Hong Chul Nam;Ye Sle Cha;Chanwoo Park", "authorids": "~Hong_Chul_Nam1;yesle.cha@alsemy.com;chanwoo.park@alsemy.com", "gender": "M;;", "homepage": "https://www.alsemy.com;;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Hong_Chul_Nam1;yesle.cha@alsemy.com;chanwoo.park@alsemy.com", "aff": "ETHZ - ETH Zurich;;", "aff_domain": "ethz.ch;;", "position": "Undergrad student;;", "bibtex": "@misc{\nnam2023global,\ntitle={Global View For {GCN}: Why Go Deep When You Can Be Shallow?},\nauthor={Hong Chul Nam and Ye Sle Cha and Chanwoo Park},\nyear={2023},\nurl={https://openreview.net/forum?id=2BruD7pa7E}\n}", "github": "", "project": "", "reviewers": "NJqG;KZxk;sjGK;NSAk", "site": "https://openreview.net/forum?id=2BruD7pa7E", "pdf_size": 418747, "recommendation": "1;1;3;5", "confidence": "4;4;5;5", "correctness": "2;3;2;3", "technical_novelty": "1;1;1;3", "empirical_novelty": "1;1;0;3", "wc_summary_paper": "32;46;48;133", "wc_strength_and_weaknesses": "60;201;299;388", "wc_clarity_quality_novelty_and_reproducibility": "7;6;84;15", "wc_summary_review": "16;12;34;22", "wc_review": "115;265;465;558", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.5, 1.6583123951777 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 1.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 1.25, 1.0897247358851685 ], "wc_summary_paper_avg": [ 64.75, 39.88342387508876 ], "wc_strength_and_weaknesses_avg": [ 237.0, 121.72715391398913 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.0, 32.5192250830182 ], "wc_summary_review_avg": [ 21.0, 8.306623862918075 ], "wc_review_avg": [ 350.75, 172.4360388665896 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.9045340337332909, "corr_recommendation_correctness": 0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14749522488713422677&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_country_unique_index": "0", "aff_country_unique": "Switzerland" }, { "id": "2EFQ_QlcPs8", "title": "Multi-Vector Retrieval as Sparse Alignment", "track": "main", "status": "Reject", "tldr": "We propose a novel multi-vector retrieval model with pairwise alignment and unary salience.", "abstract": "Multi-vector retrieval models improve over single-vector dual encoders on many information retrieval tasks. In this paper, we cast the multi-vector retrieval problem as sparse alignment between query and document tokens. We propose ALIGNER, a novel multi-vector retrieval model that learns sparsified pairwise alignments between query and document tokens (e.g. `dog' vs. `puppy') and per-token unary saliences reflecting their relative importance for retrieval. We show that controlling the sparsity of pairwise token alignments often brings significant performance gains. While most factoid questions focusing on a specific part of a document require a smaller number of alignments, others requiring a broader understanding of a document favor a larger number of alignments. Unary saliences, on the other hand, decide whether a token ever needs to be aligned with others for retrieval (e.g. `kind' from `what kind of currency is used in new zealand'). With sparsified unary saliences, we are able to prune a large number of query and document token vectors and improve the efficiency of multi-vector retrieval. We learn the sparse unary saliences with entropy-regularized linear programming, which outperforms other methods to achieve sparsity. In a zero-shot setting, ALIGNER scores 51.1 nDCG@10, achieving a new retriever-only state-of-the-art on 13 tasks in the BEIR benchmark. In addition, adapting pairwise alignments with a few examples (<= 8) further improves the performance up to 15.7 points nDCG@10 for argument retrieval tasks. The unary saliences of ALIGNER helps us to keep only 20% of the document token representations with minimal performance loss. We further show that our model often produces interpretable alignments and significantly improves its performance when initialized from larger language models.", "keywords": "natural language processing;document retrieval;information retrieval", "primary_area": "", "supplementary_material": "", "author": "Yujie Qian;Jinhyuk Lee;Karthik Duddu;Zhuyun Dai;Tao Lei;Siddhartha Brahma;Iftekhar Naim;Vincent Y Zhao", "authorids": "~Yujie_Qian1;~Jinhyuk_Lee2;karthikduddu@google.com;~Zhuyun_Dai1;~Tao_Lei1;~Siddhartha_Brahma1;~Iftekhar_Naim1;~Vincent_Y_Zhao1", "gender": "M;M;;;M;M;M;M", "homepage": "https://people.csail.mit.edu/yujieq/;https://jhyuklee.github.io;;;;;;https://foo.bar", "dblp": "187/3108;https://dblp.uni-trier.de/pers/hd/l/Lee:Jinhyuk;;148/4531;;;11/8759;301/7889", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.co.kr/citations?user=YWm_zVcAAAAJ;;9bbHwJIAAAAJ;g2uay50AAAAJ;OZj382cAAAAJ;E8-dfNcAAAAJ;", "orcid": ";0000-0003-4972-239X;;;;;;", "linkedin": ";jinhyuk-lee-73b27489/;;;;sidbrahma;;", "or_profile": "~Yujie_Qian1;~Jinhyuk_Lee2;karthikduddu@google.com;~Zhuyun_Dai1;~Tao_Lei1;~Siddhartha_Brahma1;~Iftekhar_Naim1;~Vincent_Y_Zhao1", "aff": "Massachusetts Institute of Technology;Google;;Google;Google;Research, Google;Google;Google", "aff_domain": "mit.edu;google.com;;google.com;google.com;research.google.com;google.com;google.com", "position": "PhD student;Research Scientist;;Researcher;Research scientist;Researcher;Researcher;Researcher", "bibtex": "@misc{\nqian2023multivector,\ntitle={Multi-Vector Retrieval as Sparse Alignment},\nauthor={Yujie Qian and Jinhyuk Lee and Karthik Duddu and Zhuyun Dai and Tao Lei and Siddhartha Brahma and Iftekhar Naim and Vincent Y Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=2EFQ_QlcPs8}\n}", "github": "", "project": "", "reviewers": "5rDb;4BA2;sjAr;waGp", "site": "https://openreview.net/forum?id=2EFQ_QlcPs8", "pdf_size": 1460966, "recommendation": "6;6;6;6", "confidence": "4;5;5;4", "correctness": "4;4;3;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;4;2;3", "wc_summary_paper": "108;150;39;83", "wc_strength_and_weaknesses": "150;244;133;103", "wc_clarity_quality_novelty_and_reproducibility": "23;51;216;16", "wc_summary_review": "42;44;66;51", "wc_review": "323;489;454;253", "wc_reply_reviewers": "24;20;59;0", "wc_reply_authors": "818;680;1669;545", "reply_reviewers": "1;1;1;0", "reply_authors": "4;3;5;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 95.0, 40.23058537978288 ], "wc_strength_and_weaknesses_avg": [ 157.5, 52.69962049199216 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 76.5, 81.59810046808688 ], "wc_summary_review_avg": [ 50.75, 9.41740410091868 ], "wc_review_avg": [ 379.75, 95.83155795456943 ], "wc_reply_reviewers_avg": [ 25.75, 21.241174637952582 ], "wc_reply_authors_avg": [ 928.0, 438.56983480399106 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16736428659023412763&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1;1;1;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://web.mit.edu;https://www.google.com", "aff_unique_abbr": "MIT;Google", "aff_campus_unique_index": "1;1;1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "2EO8eQ2vySB", "title": "Masked inverse folding with sequence transfer for protein representation learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Self-supervised pretraining on protein sequences has led to state-of-the art performance on protein function and fitness prediction. \nHowever, sequence-only methods ignore the rich information contained in experimental and predicted protein structures.\nMeanwhile, inverse folding methods reconstruct a protein's amino-acid sequence given its structure, but do not take advantage of sequences that do not have known structures.\nIn this study, we train a masked inverse folding protein language model parameterized as a structured graph neural network. \nWe then show that using the outputs from a pretrained sequence-only protein masked language model as input to the inverse folding model further improves pretraining perplexity. \nWe evaluate both of these models on downstream protein engineering tasks and analyze the effect of using information from experimental or predicted structures on performance. ", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/e0c3ee333e3b3bfe8b25c74426fc9da608796c06.zip", "author": "Kevin K Yang;Hugh Yeh;Niccol\u00f2 Zanichelli", "authorids": "~Kevin_K_Yang1;~Hugh_Yeh1;niccolo.zanichelli@gmail.com", "gender": ";M;", "homepage": ";;", "dblp": "216/0400;;", "google_scholar": "mq-Vzk8AAAAJ;;", "orcid": ";;", "linkedin": ";hugh-yeh-18881510b/;", "or_profile": "~Kevin_K_Yang1;~Hugh_Yeh1;niccolo.zanichelli@gmail.com", "aff": ";University of Chicago;", "aff_domain": ";uchicago.edu;", "position": ";PhD student;", "bibtex": "@misc{\nyang2023masked,\ntitle={Masked inverse folding with sequence transfer for protein representation learning},\nauthor={Kevin K Yang and Hugh Yeh and Niccol{\\`o} Zanichelli},\nyear={2023},\nurl={https://openreview.net/forum?id=2EO8eQ2vySB}\n}", "github": "", "project": "", "reviewers": "u96a;ABwW;vWP7;mxA7", "site": "https://openreview.net/forum?id=2EO8eQ2vySB", "pdf_size": 1143424, "recommendation": "5;5;5;6", "confidence": "3;4;5;4", "correctness": "3;4;4;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "77;77;34;98", "wc_strength_and_weaknesses": "485;133;287;211", "wc_clarity_quality_novelty_and_reproducibility": "84;60;83;70", "wc_summary_review": "20;319;72;50", "wc_review": "666;589;476;429", "wc_reply_reviewers": "45;0;146;104", "wc_reply_authors": "535;674;377;302", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 71.5, 23.286262044390035 ], "wc_strength_and_weaknesses_avg": [ 279.0, 130.80519867344722 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 74.25, 9.908960591303208 ], "wc_summary_review_avg": [ 115.25, 119.07429403527867 ], "wc_review_avg": [ 540.0, 93.13162728096187 ], "wc_reply_reviewers_avg": [ 73.75, 55.67932740254681 ], "wc_reply_authors_avg": [ 472.0, 143.78629976461596 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 88, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16332887134596955400&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff_unique_index": "0", "aff_unique_norm": "University of Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uchicago.edu", "aff_unique_abbr": "UChicago", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Effectively Modeling Time Series with Simple Discrete State Spaces", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10874", "id": "2EpjkjzdCAa", "poster": "", "openreview": "https://openreview.net/forum?id=2EpjkjzdCAa", "slides": "https://iclr.cc/virtual/2023/poster/10874", "video": "https://iclr.cc/virtual/2023/poster/10874", "author_site": "Michael Zhang, Khaled Saab, Michael Poli, Tri Dao, Karan Goel, Christopher Re", "tldr": "We propose SpaceTime, a deep state space time series model that achieves state-of-the-art results on forecasting and classification benchmarks, by improving expressiveness, forecasting flexibility, and training efficiency over prior approaches. ", "abstract": "Time series modeling is a well-established problem, which often requires that methods (1) expressively represent complicated dependencies, (2) forecast long horizons, and (3) efficiently train over long sequences. State-space models (SSMs) are classical models for time series, and prior works combine SSMs with deep learning layers for efficient sequence modeling. However, we find fundamental limitations with these prior approaches, proving their SSM representations cannot express autoregressive time series processes. We thus introduce SpaceTime, a new state-space time series architecture that improves all three criteria. For expressivity, we propose a new SSM parameterization based on the companion matrix---a canonical representation for discrete-time processes---which enables SpaceTime's SSM layers to learn desirable autoregressive processes. For long horizon forecasting, we introduce a \"closed-loop\" variation of the companion SSM, which enables SpaceTime to predict many future time-steps by generating its own layer-wise inputs. For efficient training and inference, we introduce an algorithm that reduces the memory and compute of a forward pass with the companion matrix. With sequence length $\\ell$ and state-space size $d$, we go from $\\tilde{O}(d \\ell)$ na\u00efvely to $\\tilde{O}(d + \\ell)$. In experiments, our contributions lead to state-of-the-art results on extensive and diverse benchmarks, with best or second-best AUROC on 6 / 7 ECG and speech time series classification, and best MSE on 14 / 16 Informer forecasting tasks. Furthermore, we find SpaceTime (1) fits AR($p$) processes that prior deep SSMs fail on, (2) forecasts notably more accurately on longer horizons than prior state-of-the-art, and (3) speeds up training on real-world ETTh1 data by 73% and 80% relative wall-clock time over Transformers and LSTMs.", "keywords": "time series;forecasting;state-space models;time series classification", "primary_area": "", "supplementary_material": "/attachment/ddfb5e93d72d58dcd01837292f1a68ffa9f42b5e.zip", "author": "Michael Zhang;Khaled Kamal Saab;Michael Poli;Tri Dao;Karan Goel;Christopher Re", "authorids": "~Michael_Zhang4;~Khaled_Kamal_Saab1;~Michael_Poli1;~Tri_Dao1;~Karan_Goel1;~Christopher_Re1", "gender": "M;;M;;M;", "homepage": "https://michaelzhang.xyz/;https://web.stanford.edu/~ksaab/;;https://tridao.me/;http://krandiash.github.io;", "dblp": ";176/4061;;206/7018;175/1290;", "google_scholar": "DG_asaIAAAAJ;W77CiNUAAAAJ;RgIBwboAAAAJ;NQRw0bQAAAAJ;;", "orcid": ";0000-0003-1427-0469;;;;", "linkedin": ";khaled-saab-181034122/;;;;", "or_profile": "~Michael_Zhang4;~Khaled_Kamal_Saab1;~Michael_Poli1;~Tri_Dao1;~Karan_Goel1;~Christopher_Re1", "aff": "Apple;Stanford University;Stanford University;Stanford University;Stanford University;", "aff_domain": "apple.com;stanford.edu;stanford.edu;stanford.edu;stanford.edu;", "position": "Intern;PhD student;PhD student;PhD student;PhD student;", "bibtex": "@inproceedings{\nzhang2023effectively,\ntitle={Effectively Modeling Time Series with Simple Discrete State Spaces},\nauthor={Michael Zhang and Khaled Kamal Saab and Michael Poli and Tri Dao and Karan Goel and Christopher Re},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=2EpjkjzdCAa}\n}", "github": "", "project": "", "reviewers": "BK6G;hBe1;YRVv;61Ty", "pdf_size": 9609814, "recommendation": "3;5;6;8", "confidence": "3;3;2;3", "correctness": "3;3;3;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "30;33;58;147", "wc_strength_and_weaknesses": "215;55;347;319", "wc_clarity_quality_novelty_and_reproducibility": "54;33;26;55", "wc_summary_review": "116;352;100;62", "wc_review": "415;473;531;583", "wc_reply_reviewers": "0;338;279;0", "wc_reply_authors": "1648;3239;3114;477", "reply_reviewers": "0;1;2;0", "reply_authors": "3;6;5;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 67.0, 47.44997365647319 ], "wc_strength_and_weaknesses_avg": [ 234.0, 114.45086281894078 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.0, 12.747548783981962 ], "wc_summary_review_avg": [ 157.5, 113.99451741202293 ], "wc_review_avg": [ 500.5, 62.854991846312416 ], "wc_reply_reviewers_avg": [ 154.25, 155.65406355119677 ], "wc_reply_authors_avg": [ 2119.5, 1136.0489646137617 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.75, 1.920286436967152 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.16012815380508713, "corr_recommendation_correctness": 0.8006407690254357, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6087967633035202831&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=2EpjkjzdCAa", "email": "apple.com;stanford.edu;stanford.edu;stanford.edu;stanford.edu;", "author_num": 6, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Apple;Stanford University", "aff_unique_dep": "Apple Inc.;", "aff_unique_url": "https://www.apple.com;https://www.stanford.edu", "aff_unique_abbr": "Apple;Stanford", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "2Fb-h04mt5I", "title": "Robustify Transformers with Robust Kernel Density Estimation", "track": "main", "status": "Reject", "tldr": "We propose a robust transformer that can that can alleviate the effect from contaminated data while improve the clean data performance.", "abstract": "Recent advances in Transformer architecture have empowered its empirical success in various tasks across different domains. However, existing works mainly focus on improving the standard accuracy and computational cost, without considering the robustness of contaminated samples. Existing work (Nguyen et al, 2022, FourierFormer) has shown that the self-attention mechanism, which is the center of the Transformer architecture, can be viewed as a non-parametric estimator based on the well-known kernel density estimation (KDE). This motivates us to leverage the robust kernel density estimation (RKDE) in the self-attention mechanism, to alleviate the issue of the contamination of data by down-weighting the weight of bad samples in the estimation process. The modified self-attention mechanism can be incorporated into different Transformer variants. Empirical results on language modeling and image classification tasks demonstrate the effectiveness of this approach.", "keywords": "Transformers;Kernel Density Estimation;Robustness", "primary_area": "", "supplementary_material": "", "author": "Xing Han;Tongzheng Ren;Tan Minh Nguyen;Khai Nguyen;Joydeep Ghosh;Nhat Ho", "authorids": "~Xing_Han1;~Tongzheng_Ren1;~Tan_Minh_Nguyen1;~Khai_Nguyen1;~Joydeep_Ghosh1;~Nhat_Ho1", "gender": "M;M;M;M;M;M", "homepage": "https://aaronhan223.github.io/;https://www.cs.utexas.edu/~tzren/;https://tanmnguyen89.github.io/;https://khainb.com;http://ideal.ece.utexas.edu/ghosh/;https://nhatptnk8912.github.io/", "dblp": "05/2143;211/8004;255/4725;120/4308;51/2272;203/4479", "google_scholar": "Vejou24AAAAJ;VgNDYeYAAAAJ;OizOh88AAAAJ;im5fNaQAAAAJ;;https://scholar.google.ca/citations?user=Xs7cKMwAAAAJ", "orcid": "0000-0003-0857-5506;;;;;", "linkedin": "xing-han-628653b6/;;;;;nhat-pham-minh-ho-267b8164/", "or_profile": "~Xing_Han1;~Tongzheng_Ren1;~Tan_Minh_Nguyen1;~Khai_Nguyen1;~Joydeep_Ghosh1;~Nhat_Ho1", "aff": "University of Texas at Austin;Google;University of California, Los Angeles;University of Texas, Austin;University of Texas, Austin;University of Texas, Austin", "aff_domain": "utexas.edu;google.com;ucla.edu;utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;Intern;Postdoc;PhD student;Full Professor;Assistant Professor", "bibtex": "@misc{\nhan2023robustify,\ntitle={Robustify Transformers with Robust Kernel Density Estimation},\nauthor={Xing Han and Tongzheng Ren and Tan Minh Nguyen and Khai Nguyen and Joydeep Ghosh and Nhat Ho},\nyear={2023},\nurl={https://openreview.net/forum?id=2Fb-h04mt5I}\n}", "github": "", "project": "", "reviewers": "cLTJ;tAEV;NtCQ;N1gC;3QyS", "site": "https://openreview.net/forum?id=2Fb-h04mt5I", "pdf_size": 604060, "recommendation": "3;3;5;5;5", "confidence": "4;2;4;4;3", "correctness": "3;3;4;3;4", "technical_novelty": "1;2;3;2;3", "empirical_novelty": "1;2;2;2;2", "wc_summary_paper": "48;40;96;58;57", "wc_strength_and_weaknesses": "47;209;126;243;69", "wc_clarity_quality_novelty_and_reproducibility": "12;45;22;17;9", "wc_summary_review": "13;115;20;17;59", "wc_review": "120;409;264;335;194", "wc_reply_reviewers": "0;0;32;45;0", "wc_reply_authors": "98;96;82;99;111", "reply_reviewers": "0;0;1;1;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 4.2, 0.9797958971132712 ], "confidence_avg": [ 3.4, 0.8 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 2.2, 0.7483314773547882 ], "empirical_novelty_avg": [ 1.8, 0.4000000000000001 ], "wc_summary_paper_avg": [ 59.8, 19.249935064825547 ], "wc_strength_and_weaknesses_avg": [ 138.8, 76.48372375871875 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 21.0, 12.790621564255586 ], "wc_summary_review_avg": [ 44.8, 38.80412349222696 ], "wc_review_avg": [ 264.4, 101.68697065012805 ], "wc_reply_reviewers_avg": [ 15.4, 19.303885619221848 ], "wc_reply_authors_avg": [ 97.2, 9.239047569960876 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.4082482904638631, "corr_recommendation_correctness": 0.6666666666666667, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13125294337630594273&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;0;0;0", "aff_unique_norm": "University of Texas at Austin;Google;University of California, Los Angeles", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.utexas.edu;https://www.google.com;https://www.ucla.edu", "aff_unique_abbr": "UT Austin;Google;UCLA", "aff_campus_unique_index": "0;1;2;0;0;0", "aff_campus_unique": "Austin;Mountain View;Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "2G-vUJ7XcSB", "title": "On the Power of Pre-training for Generalization in RL: Provable Benefits and Hardness", "track": "main", "status": "Reject", "tldr": "A theoretical research on how much pre-training in reinforcement learning can help improve performance in target environment.", "abstract": "Generalization in Reinforcement Learning (RL) aims to train an agent during training that generalizes to the target environment. In this work, we first point out that RL generalization is fundamentally different from the generalization in supervised learning, and fine-tuning on the target environment is necessary for good test performance. Therefore, we seek to answer the following question: how much can we expect pre-training over training environments to be helpful for efficient and effective fine-tuning? On one hand, we give a surprising result showing that asymptotically, the improvement from pre-training is at most a constant factor. On the other hand, we show that pre-training can be indeed helpful in the non-asymptotic regime by designing a policy collection-elimination (PCE) algorithm and proving a distribution-dependent regret bound that is independent of the state-action space. We hope our theoretical results can provide insight towards understanding pre-training and generalization in RL.", "keywords": "Reinforcement Learning;Generalization;Learning Theory", "primary_area": "", "supplementary_material": "", "author": "Haotian Ye;Xiaoyu Chen;Liwei Wang;Simon Shaolei Du", "authorids": "~Haotian_Ye1;~Xiaoyu_Chen2;~Liwei_Wang1;~Simon_Shaolei_Du1", "gender": "M;M;M;M", "homepage": "https://haotianye.com;;http://www.liweiwang-pku.com/;http://simonshaoleidu.com", "dblp": "284/0539;30/4497;;176/5602", "google_scholar": "VU4chlsAAAAJ;sioumZAAAAAJ;VZHxoh8AAAAJ;OttawxUAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Haotian_Ye1;~Xiaoyu_Chen2;~Liwei_Wang1;~Simon_Shaolei_Du1", "aff": "Peking University;Peking University;Peking University;Meta Facebook", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;fb.com", "position": "Undergrad student;PhD student;Full Professor;Visiting Professor", "bibtex": "@misc{\nye2023on,\ntitle={On the Power of Pre-training for Generalization in {RL}: Provable Benefits and Hardness},\nauthor={Haotian Ye and Xiaoyu Chen and Liwei Wang and Simon Shaolei Du},\nyear={2023},\nurl={https://openreview.net/forum?id=2G-vUJ7XcSB}\n}", "github": "", "project": "", "reviewers": "s13Q;tCeD;SxSR", "site": "https://openreview.net/forum?id=2G-vUJ7XcSB", "pdf_size": 464544, "recommendation": "5;5;8", "confidence": "4;3;3", "correctness": "3;4;3", "technical_novelty": "3;2;3", "empirical_novelty": "0;0;0", "wc_summary_paper": "34;111;157", "wc_strength_and_weaknesses": "253;249;542", "wc_clarity_quality_novelty_and_reproducibility": "1;36;178", "wc_summary_review": "12;98;185", "wc_review": "300;494;1062", "wc_reply_reviewers": "58;67;209", "wc_reply_authors": "675;388;1276", "reply_reviewers": "1;1;1", "reply_authors": "3;2;4", "recommendation_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 100.66666666666667, 50.743363003341514 ], "wc_strength_and_weaknesses_avg": [ 348.0, 137.18843488671584 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 71.66666666666667, 76.53466897789234 ], "wc_summary_review_avg": [ 98.33333333333333, 70.62734755193786 ], "wc_review_avg": [ 618.6666666666666, 323.33402061782624 ], "wc_reply_reviewers_avg": [ 111.33333333333333, 69.15843324489711 ], "wc_reply_authors_avg": [ 779.6666666666666, 370.0021020961307 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": -0.5, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17477892907714990111&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Peking University;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "http://www.pku.edu.cn;https://meta.com", "aff_unique_abbr": "Peking U;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "2L-nspTvNVC", "title": "Deep Class Conditional Gaussians for Continual Learning", "track": "main", "status": "Reject", "tldr": "We present an empirical Bayesian method to solve the problem in continual learning of how to use simple metric-based probabilistic models when the embedding function must be learnt online.", "abstract": "The current state of the art for continual learning with frozen, pre-trained embedding networks are simple probabilistic models defined over the embedding space, for example class conditional Gaussians. However, as of yet, in the task-incremental online setting, it has been an open question how to extend these methods to when the embedding function has to be learned from scratch. In this paper, we propose an empirical Bayesian framework that works by storing a fixed number of examples in memory which are used to calculate the posterior of the probabilistic model and a conditional marginal likelihood term used to fit the embedding function. The learning of the embedding function can be interpreted as using a variant of experience replay, which is a highly performative method for continual learning. As part of our framework, we decide which examples to store by selecting the subset that minimises the KL divergence between the true posterior and the posterior induced by the subset, which is shown to be necessary to achieve good performance. We demonstrate the performance of our method on a range of task-incremental online settings, including those with overlapping tasks which thus far have been under-explored. Our method outperforms all other methods, including several other replay-based methods, evidencing the potential of our approach.", "keywords": "Contiual Learning;Lifelong Learning;Bayesian;Emprical Bayes;Probabilistic Machine Learning", "primary_area": "", "supplementary_material": "/attachment/57ded7a4fd38caf21eca92b92016836fb68d700b.zip", "author": "Thomas L Lee;Amos Storkey", "authorids": "~Thomas_L_Lee1;~Amos_Storkey1", "gender": ";Not Specified", "homepage": "https://tlee43.github.io/;http://homepages.inf.ed.ac.uk/amos/", "dblp": ";", "google_scholar": "pRcPv_cAAAAJ;", "orcid": ";", "linkedin": "thomas-lee-aa27a9176/;", "or_profile": "~Thomas_L_Lee1;~Amos_Storkey1", "aff": "University of Edinburgh, University of Edinburgh;University of Edinburgh", "aff_domain": "sms.ed.ac.uk;ed.ac.uk", "position": "PhD student;Full Professor", "bibtex": "@misc{\nlee2023deep,\ntitle={Deep Class Conditional Gaussians for Continual Learning},\nauthor={Thomas L Lee and Amos Storkey},\nyear={2023},\nurl={https://openreview.net/forum?id=2L-nspTvNVC}\n}", "github": "", "project": "", "reviewers": "juyJ;rHsD;U88A;e2L5", "site": "https://openreview.net/forum?id=2L-nspTvNVC", "pdf_size": 639330, "recommendation": "5;5;5;6", "confidence": "4;4;5;3", "correctness": "3;2;3;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;2;2;0", "wc_summary_paper": "107;139;96;27", "wc_strength_and_weaknesses": "400;489;326;119", "wc_clarity_quality_novelty_and_reproducibility": "18;107;30;7", "wc_summary_review": "45;50;42;14", "wc_review": "570;785;494;167", "wc_reply_reviewers": "46;204;269;57", "wc_reply_authors": "1243;2156;1671;321", "reply_reviewers": "1;1;2;2", "reply_authors": "2;3;5;3", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 92.25, 40.84957160118084 ], "wc_strength_and_weaknesses_avg": [ 333.5, 136.6281449775265 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.5, 39.24601890638081 ], "wc_summary_review_avg": [ 37.75, 14.00669482783144 ], "wc_review_avg": [ 504.0, 221.9155244682084 ], "wc_reply_reviewers_avg": [ 144.0, 95.39129939360298 ], "wc_reply_authors_avg": [ 1347.75, 675.0827264121043 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12925153870485740695&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Does Learning from Decentralized Non-IID Unlabeled Data Benefit from Self Supervision?", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11453", "id": "2L9gzS80tA4", "poster": "", "openreview": "https://openreview.net/forum?id=2L9gzS80tA4", "slides": "https://iclr.cc/virtual/2023/poster/11453", "video": "https://iclr.cc/virtual/2023/poster/11453", "author_site": "Lirui Wang, Kaiqing Zhang, Yunzhu Li, Yonglong Tian, Russ Tedrake", "tldr": "We study decentralized learning with non-IID unlabeled data, and try to understand the robustness and communication efficiency of decentralized self-supervised learning, through extensive experiments and theoretical analysis.", "abstract": "The success of machine learning relies heavily on massive amounts of data, which are usually generated and stored across a range of diverse and distributed data sources. Decentralized learning has thus been advocated and widely deployed to make efficient use of distributed datasets, with an extensive focus on supervised learning (SL) problems. Unfortunately, the majority of real-world data are unlabeled and can be highly heterogeneous across sources. In this work, we carefully study decentralized learning with unlabeled data through the lens of self-supervised learning (SSL), specifically contrastive visual representation learning. We study the effectiveness of a range of contrastive learning algorithms under a decentralized learning setting, on relatively large-scale datasets including ImageNet-100, MS-COCO, and a new real-world robotic warehouse dataset. Our experiments show that the decentralized SSL (Dec-SSL) approach is robust to the heterogeneity of decentralized datasets, and learns useful representation for object classification, detection, and segmentation tasks, even when combined with the simple and standard decentralized learning algorithm of Federated Averaging (FedAvg). This robustness makes it possible to significantly reduce communication and to reduce the participation ratio of data sources with only minimal drops in performance. Interestingly, using the same amount of data, the representation learned by Dec-SSL can not only perform on par with that learned by centralized SSL which requires communication and excessive data storage costs, but also sometimes outperform representations extracted from decentralized SL which requires extra knowledge about the data labels. Finally, we provide theoretical insights into understanding why data heterogeneity is less of a concern for Dec-SSL objectives, and introduce feature alignment and clustering techniques to develop a new Dec-SSL algorithm that further improves the performance, in the face of highly non-IID data. Our study presents positive evidence to embrace unlabeled data in decentralized learning, and we hope to provide new insights into whether and why decentralized SSL is effective and/or even advantageous.", "keywords": "Decentralized Learning;Heterogeneous and Unlabeled Data;Federated Learning;Self-Supervised Learning;Representation Learning", "primary_area": "", "supplementary_material": "/attachment/7e548f9cfdb2220d68487e86dee72e58a4bbbd24.zip", "author": "Lirui Wang;Kaiqing Zhang;Yunzhu Li;Yonglong Tian;Russ Tedrake", "authorids": "~Lirui_Wang1;~Kaiqing_Zhang3;~Yunzhu_Li1;~Yonglong_Tian1;~Russ_Tedrake1", "gender": "M;M;;M;M", "homepage": "https://liruiw.github.io/;https://yunzhuli.github.io/;http://people.csail.mit.edu/yonglong/;http://people.csail.mit.edu/russt;https://kzhang66.github.io/", "dblp": "221/9612;182/1831;151/6328;73/1296;", "google_scholar": "EM9YhH0AAAAJ;WlA92lcAAAAJ;https://scholar.google.com.hk/citations?user=OsP7JHAAAAAJ;nxNkEiYAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Lirui_Wang1;~Yunzhu_Li1;~Yonglong_Tian1;~Russ_Tedrake1;~kaiqing_zhang1", "aff": "Massachusetts Institute of Technology;Stanford University;Google;Massachusetts Institute of Technology;University of Maryland, College Park", "aff_domain": "mit.edu;stanford.edu;google.com;mit.edu;umd.edu", "position": "PhD student;Postdoc;Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2023does,\ntitle={Does Learning from Decentralized Non-{IID} Unlabeled Data Benefit from Self Supervision?},\nauthor={Lirui Wang and Kaiqing Zhang and Yunzhu Li and Yonglong Tian and Russ Tedrake},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=2L9gzS80tA4}\n}", "github": "", "project": "", "reviewers": "bG2C;84PM;AXwb;fhez", "pdf_size": 5355452, "recommendation": "5;6;6;8", "confidence": "3;4;3;3", "correctness": "3;3;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;3;2;4", "wc_summary_paper": "39;101;97;122", "wc_strength_and_weaknesses": "141;587;489;360", "wc_clarity_quality_novelty_and_reproducibility": "34;15;109;80", "wc_summary_review": "24;100;88;62", "wc_review": "238;803;783;624", "wc_reply_reviewers": "0;0;0;109", "wc_reply_authors": "1270;1574;1559;615", "reply_reviewers": "0;0;0;1", "reply_authors": "4;4;5;3", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 89.75, 30.80077109424373 ], "wc_strength_and_weaknesses_avg": [ 394.25, 166.91221495145285 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.5, 37.08436328157732 ], "wc_summary_review_avg": [ 68.5, 29.13331426391443 ], "wc_review_avg": [ 612.0, 226.79395935518212 ], "wc_reply_reviewers_avg": [ 27.25, 47.198384506251905 ], "wc_reply_authors_avg": [ 1254.5, 388.5875062325087 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.0, 0.7071067811865476 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.13245323570650439, "corr_recommendation_correctness": 0.0, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13098349893707766603&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=2L9gzS80tA4", "email": "mit.edu;stanford.edu;google.com;mit.edu;umd.edu", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Massachusetts Institute of Technology;Stanford University;Google;University of Maryland", "aff_unique_dep": ";;Google;", "aff_unique_url": "https://web.mit.edu;https://www.stanford.edu;https://www.google.com;https://www/umd.edu", "aff_unique_abbr": "MIT;Stanford;Google;UMD", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Stanford;Mountain View;College Park", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "2NQ8wlmU9a_", "title": "Hybrid-Regressive Neural Machine Translation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "\nIn this work, we empirically confirm that non-autoregressive translation with an iterative refinement mechanism (IR-NAT) suffers from poor acceleration robustness because it is more sensitive to decoding batch size and computing device setting than autoregressive translation (AT). Inspired by it, we attempt to investigate how to combine the strengths of autoregressive and non-autoregressive translation paradigms better. To this end, we demonstrate through synthetic experiments that prompting a small number of AT's predictions can promote one-shot non-autoregressive translation to achieve the equivalent performance of IR-NAT. Following this line, we propose a new two-stage translation prototype called hybrid-regressive translation (HRT). Specifically, HRT first generates discontinuous sequences via autoregression (e.g., make a prediction every $k$ tokens, $k>1$) and then fills in all previously skipped tokens at once in a non-autoregressive manner. We also propose a bag of techniques to effectively and efficiently train HRT without adding any model parameters. HRT achieves the state-of-the-art BLEU score of 28.49 on the WMT \\entode task and is at least 1.5x faster than AT, regardless of batch size and device. In addition, another bonus of HRT is that it successfully inherits the good characteristics of AT in the deep-encoder-shallow-decoder architecture. Concretely, compared to the vanilla HRT with a 6-layer encoder and 6-layer decoder, the inference speed of HRT with a 12-layer encoder and 1-layer decoder is further doubled on both GPU and CPU without BLEU loss.", "keywords": "autoregressive translation;non-autoregressive translation;inference acceleration", "primary_area": "", "supplementary_material": "", "author": "Qiang Wang;Xinhui Hu;Ming Chen", "authorids": "~Qiang_Wang8;huxinhui@myhexin.com;chm@zju.edu.cn", "gender": "M;;", "homepage": "https://wangqiangneu.github.io/;;", "dblp": ";;", "google_scholar": "gDCxDEsAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Qiang_Wang8;huxinhui@myhexin.com;chm@zju.edu.cn", "aff": "Hithink RoyalFlush AI Research Institute;;", "aff_domain": "myhexin.com;;", "position": "Researcher;;", "bibtex": "@misc{\nwang2023hybridregressive,\ntitle={Hybrid-Regressive Neural Machine Translation},\nauthor={Qiang Wang and Xinhui Hu and Ming Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=2NQ8wlmU9a_}\n}", "github": "", "project": "", "reviewers": "e4XY;PeQ7;RnHw;r7mp", "site": "https://openreview.net/forum?id=2NQ8wlmU9a_", "pdf_size": 335643, "recommendation": "3;5;5;6", "confidence": "5;4;4;4", "correctness": "3;3;3;4", "technical_novelty": "2;3;3;2", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "97;150;88;136", "wc_strength_and_weaknesses": "370;169;298;244", "wc_clarity_quality_novelty_and_reproducibility": "72;40;18;44", "wc_summary_review": "49;44;56;35", "wc_review": "588;403;460;459", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "945;352;764;450", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 117.75, 25.926579026165406 ], "wc_strength_and_weaknesses_avg": [ 270.25, 73.58795757459232 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.5, 19.20286436967152 ], "wc_summary_review_avg": [ 46.0, 7.648529270389178 ], "wc_review_avg": [ 477.5, 67.83988502348747 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 627.75, 238.14110837904488 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9271726499455306, "corr_recommendation_correctness": 0.6622661785325219, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17631314983308142979&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "Hithink RoyalFlush AI Research Institute", "aff_unique_dep": "AI Research Institute", "aff_unique_url": "", "aff_unique_abbr": "", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "2OETPKmG4S0", "title": "On the Lower Bound of Minimizing Polyak-\u0141ojasiewicz functions", "track": "main", "status": "Reject", "tldr": "We show that any first-order algorithm requires at least $\\tilde{\\Omega}\\left((L/\\mu)^{1-\\alpha} \\right)$ gradient costs to find an $\\epsilon$-approximate optimal solution for a general $L$-smooth, $\\mu$-PL function for any $\\alpha>0$ .", "abstract": "Polyak-\u0141ojasiewicz (PL) [Polyak, 1963] condition is a weaker condition than the strong convexity but suffices to ensure a global convergence for the Gradient Descent algorithm. In this paper, we study the lower bound of algorithms using first-order oracles to find an approximate optimal solution. We show that any first-order algorithm requires at least $\\Omega\\left((L/\\mu)^{1-\\alpha} \\right)$ gradient costs to find an $\\epsilon$-approximate optimal solution for a general $L$-smooth function that has an $\\mu$-PL constant for any $\\alpha>0$. This result demonstrates the near optimality of the Gradient Descent algorithm to minimize smooth PL functions in the sense that there exists a ``hard'' PL function such that no first-order algorithm can be faster by a polynomial order. In contrast, it is well-known that the momentum technique, e.g. [Nesterov, 2003, chap. 2] can provably accelerate Gradient Descent to $O\\left(\\sqrt{L/\\hat{\\mu}}\\log\\frac{1}{\\epsilon}\\right)$ gradient costs for functions that are $L$-smooth and $\\hat{\\mu}$-strongly convex. Therefore, our result distinguishes the hardness of minimizing a smooth PL function and a smooth strongly convex function as the complexity of the former cannot be improved by any polynomial order in general. ", "keywords": "Polyak-\u0141ojasiewicz Condition;First-order Algorithms;Lower Bound;Complexity", "primary_area": "", "supplementary_material": "/attachment/9301d0453e80b6f7f7935f87b001e02eb10b9c12.zip", "author": "Pengyun Yue;Cong Fang;Zhouchen Lin", "authorids": "~Pengyun_Yue1;~Cong_Fang1;~Zhouchen_Lin1", "gender": ";M;M", "homepage": ";https://congfang-ml.github.io/;https://zhouchenlin.github.io", "dblp": "354/3974;140/6568;l/ZhouchenLin", "google_scholar": ";N2M9RPoAAAAJ;https://scholar.google.com.tw/citations?user=TanjFwoAAAAJ", "orcid": "0000-0001-7134-7106;;0000-0003-1493-7569", "linkedin": ";;", "or_profile": "~Pengyun_Yue1;~Cong_Fang1;~Zhouchen_Lin1", "aff": "Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;Assistant Professor;Professor", "bibtex": "@misc{\nyue2023on,\ntitle={On the Lower Bound of Minimizing Polyak-{\\L}ojasiewicz functions},\nauthor={Pengyun Yue and Cong Fang and Zhouchen Lin},\nyear={2023},\nurl={https://openreview.net/forum?id=2OETPKmG4S0}\n}", "github": "", "project": "", "reviewers": "a1uz;dAiB;sXZK", "site": "https://openreview.net/forum?id=2OETPKmG4S0", "pdf_size": 415461, "recommendation": "3;5;8", "confidence": "4;4;4", "correctness": "3;4;3", "technical_novelty": "4;3;2", "empirical_novelty": "0;0;0", "wc_summary_paper": "70;49;30", "wc_strength_and_weaknesses": "1460;46;598", "wc_clarity_quality_novelty_and_reproducibility": "81;43;34", "wc_summary_review": "145;217;33", "wc_review": "1756;355;695", "wc_reply_reviewers": "76;0;94", "wc_reply_authors": "786;165;693", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "recommendation_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 49.666666666666664, 16.33673433979046 ], "wc_strength_and_weaknesses_avg": [ 701.3333333333334, 581.86901924364 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 52.666666666666664, 20.368821489936252 ], "wc_summary_review_avg": [ 131.66666666666666, 75.70703768841808 ], "wc_review_avg": [ 935.3333333333334, 596.6687150802827 ], "wc_reply_reviewers_avg": [ 56.666666666666664, 40.737642979872504 ], "wc_reply_authors_avg": [ 548.0, 273.4702908909851 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.1147078669352809, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14279576987513792768&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "2PI2EKASh_Z", "title": "Revisiting Information-Based Clustering with Pseudo-Posterior Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Maximization of mutual information (MI) between the network's input and output motivates standard losses for unsupervised discriminative clustering enforcing \"decisiveness\" and \"fairness\". In the context of common softmax models, we clarify several general properties of such discriminative losses that were previously not well understood: the relation to K-means, or lack thereof, and \"margin-maximization\". In particular, we show that \"desiciveness\" without the extra regularization term can lead to poor classification margins. Also, non-convexity of information-based losses motivates us to focus on self-supervised approaches introducing effective higher-order optimization algorithms with auxiliary variables. Addressing limitations of existing formulations, we propose a new self-supervised loss with soft auxiliary variables, or \"pseudo-confidence\" estimates. In particular, we introduce \"strong\" fairness and motivate the \"reverse\" cross-entropy as a robust loss for network training from noisy pseudo-confidence estimates. The latter is efficiently computed using variational inference - we derive a new EM algorithm with closed-form solutions for E and M steps. Empirically, our algorithm improves the performance of earlier methods for information-based clustering.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Zhongwen Zhang;Yuri Boykov", "authorids": "~Zhongwen_Zhang1;~Yuri_Boykov1", "gender": "M;M", "homepage": ";https://cs.uwaterloo.ca/~yboykov/", "dblp": "02/10655;b/YuriBoykov", "google_scholar": ";h6_PdYsAAAAJ", "orcid": ";0000-0001-6374-1736", "linkedin": ";", "or_profile": "~Zhongwen_Zhang1;~Yuri_Boykov1", "aff": "University of Waterloo;University of Waterloo", "aff_domain": "uwaterloo.ca;uwaterloo.ca", "position": "PhD student;Professor", "bibtex": "@misc{\nzhang2023revisiting,\ntitle={Revisiting Information-Based Clustering with Pseudo-Posterior Models},\nauthor={Zhongwen Zhang and Yuri Boykov},\nyear={2023},\nurl={https://openreview.net/forum?id=2PI2EKASh_Z}\n}", "github": "", "project": "", "reviewers": "oWJS;3fzk;7XGP", "site": "https://openreview.net/forum?id=2PI2EKASh_Z", "pdf_size": 701430, "recommendation": "1;3;6", "confidence": "5;4;2", "correctness": "1;3;4", "technical_novelty": "1;2;2", "empirical_novelty": "1;2;3", "wc_summary_paper": "13;401;62", "wc_strength_and_weaknesses": "194;84;132", "wc_clarity_quality_novelty_and_reproducibility": "38;77;24", "wc_summary_review": "30;469;33", "wc_review": "275;1031;251", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.3333333333333335, 2.0548046676563256 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "correctness_avg": [ 2.6666666666666665, 1.247219128924647 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 158.66666666666666, 172.51924208298877 ], "wc_strength_and_weaknesses_avg": [ 136.66666666666666, 45.02838610871542 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.333333333333336, 22.425184255405547 ], "wc_summary_review_avg": [ 177.33333333333334, 206.2431143631763 ], "wc_review_avg": [ 519.0, 362.17123022128635 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.9971764649527378, "corr_recommendation_correctness": 0.953820966476532, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_JbnA9-u3LUJ:scholar.google.com/&scioq=Revisiting+Information-Based+Clustering+with+Pseudo-Posterior+Models&hl=en&as_sdt=0,6", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "MocoSFL: enabling cross-client collaborative self-supervised learning", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12142", "id": "2QGJXyMNoPz", "poster": "", "openreview": "https://openreview.net/forum?id=2QGJXyMNoPz", "slides": "https://iclr.cc/virtual/2023/poster/12142", "video": "https://iclr.cc/virtual/2023/poster/12142", "author_site": "Jingtao Li, Lingjuan Lyu, Daisuke Iso, Chaitali Chakrabarti, Michael Spranger", "tldr": "Existing collaborative SSL schemes are not suitable for cross-client applications because of their expensive computation and local data requirements. To address these issues, we propose MocoSFL based on Split Federated Learning and MoCo.", "abstract": "Existing collaborative self-supervised learning (SSL) schemes are not suitable for cross-client applications because of their expensive computation and large local data requirements. To address these issues, we propose MocoSFL, a collaborative SSL framework based on Split Federated Learning (SFL) and Momentum Contrast (MoCo). In MocoSFL, the large backbone model is split into a small client-side model and a large server-side model, and only the small client-side model is processed locally on the client's local devices. MocoSFL has three key components: (i) vector concatenation which enables the use of small batch size and reduces computation and memory requirements by orders of magnitude; (ii) feature sharing that helps achieve high accuracy regardless of the quality and volume of local data; (iii) frequent synchronization that helps achieve better non-IID performance because of smaller local model divergence. For a 1,000-client case with non-IID data (each client only has data from 2 random classes of CIFAR-10), MocoSFL can achieve over 84% accuracy with ResNet-18 model. Next we present TAResSFL module that significantly improves the resistance to privacy threats and communication overhead with small sacrifice in accuracy for a MocoSFL system. On a Raspberry Pi 4B device, the MocoSFL-based scheme requires less than 1MB of memory and less than 40MB of communication, and consumes less than 5W power. The code is available at https://github.com/SonyAI/MocoSFL.", "keywords": "Self-supervised Learning;Collaborative Learning;Split Federated Learning;Momentum Contrast", "primary_area": "", "supplementary_material": "/attachment/ebcf468f684d290bcc742c1331dad3c7f5b890e2.zip", "author": "Jingtao Li;Lingjuan Lyu;Daisuke Iso;Chaitali Chakrabarti;Michael Spranger", "authorids": "~Jingtao_Li1;~Lingjuan_Lyu1;daisuke.iso@sony.com;~Chaitali_Chakrabarti1;~Michael_Spranger2", "gender": "M;F;;F;", "homepage": "https://zlijingtao.github.io;https://sites.google.com/view/lingjuan-lyu;;https://www.public.asu.edu/~chaitali/;", "dblp": ";178/9876;;45/2824;", "google_scholar": "JIBdJbAAAAAJ;;;u5DHuKcAAAAJ;", "orcid": "0000-0003-4250-869X;;;;", "linkedin": ";;;;", "or_profile": "~Jingtao_Li1;~Lingjuan_Lyu1;daisuke.iso@sony.com;~Chaitali_Chakrabarti1;~Michael_Spranger2", "aff": "Arizona State University;Sony;;;", "aff_domain": "asu.edu;sony.com;;;", "position": "PhD student;scientist;;;", "bibtex": "@inproceedings{\nli2023mocosfl,\ntitle={Moco{SFL}: enabling cross-client collaborative self-supervised learning},\nauthor={Jingtao Li and Lingjuan Lyu and Daisuke Iso and Chaitali Chakrabarti and Michael Spranger},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=2QGJXyMNoPz}\n}", "github": "", "project": "", "reviewers": "bsEM;zHbH;Ddhx;Sd6w", "pdf_size": 2387828, "recommendation": "6;8;8;8", "confidence": "4;4;5;4", "correctness": "3;4;4;4", "technical_novelty": "2;4;3;3", "empirical_novelty": "2;4;4;3", "wc_summary_paper": "87;87;140;137", "wc_strength_and_weaknesses": "441;167;290;233", "wc_clarity_quality_novelty_and_reproducibility": "15;110;40;187", "wc_summary_review": "88;110;65;91", "wc_review": "631;474;535;648", "wc_reply_reviewers": "151;0;0;0", "wc_reply_authors": "1651;157;628;394", "reply_reviewers": "1;0;0;0", "reply_authors": "4;1;1;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 112.75, 25.77183540223707 ], "wc_strength_and_weaknesses_avg": [ 282.75, 101.20369311443136 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 88.0, 66.92906692910039 ], "wc_summary_review_avg": [ 88.5, 15.976545308670458 ], "wc_review_avg": [ 572.0, 71.11610225539643 ], "wc_reply_reviewers_avg": [ 37.75, 65.38491798572512 ], "wc_reply_authors_avg": [ 707.5, 569.6150015580699 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 1.0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=432069636533465994&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=2QGJXyMNoPz", "email": "asu.edu;sony.com;;;", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Arizona State University;Sony Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.asu.edu;https://www.sony.com", "aff_unique_abbr": "ASU;Sony", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Japan" }, { "title": "Bitrate-Constrained DRO: Beyond Worst Case Robustness To Unknown Group Shifts", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10750", "id": "2QzNuaRHn4Z", "poster": "", "openreview": "https://openreview.net/forum?id=2QzNuaRHn4Z", "slides": "https://iclr.cc/virtual/2023/poster/10750", "video": "https://iclr.cc/virtual/2023/poster/10750", "author_site": "Amrith Setlur, Don Kurian Dennis, Benjamin Eysenbach, Aditi Raghunathan, Chelsea Finn, Virginia Smith, Sergey Levine", "tldr": "Robustness to group shifts without training group annotations can be achieved with a constrained form of DRO.", "abstract": "Training machine learning models robust to distribution shifts is critical for real-world applications. Some robust training algorithms (e.g., Group DRO) specialize to group shifts and require group information on all training points. Other methods (e.g., CVaR DRO) that do not need group annotations can be overly conservative, since they naively upweight high loss points which may form a contrived set that does not correspond to any meaningful group in the real world (e.g., when the high loss points are randomly mislabeled training points). In this work, we address limitations in prior approaches by assuming a more nuanced form of group shift: conditioned on the label, we assume that the true group function (indicator over group) is simple. For example, we may expect that group shifts occur along low bitrate features (e.g., image background, lighting). Thus, we aim to learn a model that maintains high accuracy on simple group functions realized by these low bitrate features, that need not spend valuable model capacity achieving high accuracy on contrived groups of examples. Based on this, we consider the two-player game formulation of DRO where the adversary's capacity is bitrate-constrained. Our resulting practical algorithm, Bitrate-Constrained DRO (\\bdro), does not require group information on training samples yet matches the performance of Group DRO on datasets that have training group annotations and that of CVaR DRO on long-tailed distributions. Our theoretical analysis reveals that in some settings \\bdro objective can provably yield statistically efficient and less conservative solutions than unconstrained CVaR DRO.", "keywords": "Robustness;Distribution shift;Group Shift", "primary_area": "", "supplementary_material": "/attachment/3423bbb7076cec0da302895a5971accfe855895c.zip", "author": "Amrith Setlur;Don Dennis;Benjamin Eysenbach;Aditi Raghunathan;Chelsea Finn;Virginia Smith;Sergey Levine", "authorids": "~Amrith_Setlur1;~Don_Dennis2;~Benjamin_Eysenbach1;~Aditi_Raghunathan1;~Chelsea_Finn1;~Virginia_Smith1;~Sergey_Levine1", "gender": "M;;M;F;F;F;M", "homepage": "http://ars22.github.io;https://dkdennis.xyz;https://ben-eysenbach.github.io/;https://www.cs.cmu.edu/~aditirag/;https://ai.stanford.edu/~cbfinn/;;https://people.eecs.berkeley.edu/~svlevine/", "dblp": "https://dblp.uni-trier.de/pers/hd/s/Setlur:Amrith;227/4804;192/1863;166/1409;131/1783;120/0921;80/7594", "google_scholar": "https://scholar.google.ru/citations?user=i7V1kJgAAAAJ;https://scholar.google.co.in/citations?user=GaPs1q0AAAAJ;DRnOvU8AAAAJ;Ch9iRwQAAAAJ;vfPE6hgAAAAJ;;8R35rCwAAAAJ", "orcid": "0000-0002-7061-3094;;0009-0000-7136-6307;;;;", "linkedin": ";;benjamin-eysenbach-a7235775/;;;;", "or_profile": "~Amrith_Setlur1;~Don_Dennis2;~Benjamin_Eysenbach1;~Aditi_Raghunathan1;~Chelsea_Finn1;~Virginia_Smith1;~Sergey_Levine1", "aff": "Carnegie Mellon University;Machine Learning Department, School of Computer Science;Carnegie Mellon University;Carnegie Mellon University;Google;Carnegie Mellon University;Google", "aff_domain": "cmu.edu;mld.cs.cmu.edu;cmu.edu;cmu.edu;google.com;cmu.edu;google.com", "position": "PhD student;PhD student;PhD student;Assistant Professor;Research Scientist;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\nsetlur2023bitrateconstrained,\ntitle={Bitrate-Constrained {DRO}: Beyond Worst Case Robustness To Unknown Group Shifts},\nauthor={Amrith Setlur and Don Dennis and Benjamin Eysenbach and Aditi Raghunathan and Chelsea Finn and Virginia Smith and Sergey Levine},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=2QzNuaRHn4Z}\n}", "github": "", "project": "", "reviewers": "Pjrc;WsDK;BZ25;EndR", "pdf_size": 4079326, "recommendation": "5;6;6;8", "confidence": "3;3;3;3", "correctness": "3;4;3;4", "technical_novelty": "3;2;3;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "122;103;91;185", "wc_strength_and_weaknesses": "332;161;164;56", "wc_clarity_quality_novelty_and_reproducibility": "49;5;35;63", "wc_summary_review": "29;55;60;66", "wc_review": "532;324;350;370", "wc_reply_reviewers": "0;0;25;0", "wc_reply_authors": "1520;439;773;62", "reply_reviewers": "0;0;1;0", "reply_authors": "3;2;2;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 125.25, 36.22412869897632 ], "wc_strength_and_weaknesses_avg": [ 178.25, 98.84931714483413 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.0, 21.470910553583888 ], "wc_summary_review_avg": [ 52.5, 14.115594213493104 ], "wc_review_avg": [ 394.0, 81.32650244538985 ], "wc_reply_reviewers_avg": [ 6.25, 10.825317547305483 ], "wc_reply_authors_avg": [ 698.5, 536.862412541612 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.6882472016116854, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6474768810933984424&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=2QzNuaRHn4Z", "email": "cmu.edu;mld.cs.cmu.edu;cmu.edu;cmu.edu;google.com;cmu.edu;google.com", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0;1", "aff_unique_norm": "Carnegie Mellon University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "CMU;Google", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "2RjnzZqax1J", "title": "Persistence-based Contrastive Learning with Graph Neural Recurrent Networks for Time-series Forecasting", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In the recent years, combinations of graph convolution and recurrent architectures have emerged as a new powerful alternative for multivariate spatio-temporal forecasting, with applications ranging from biosurveillance to traffic monitoring. However, such methods often tend to suffer from vulnerability to noise and limited generalization abilities, especially when semantics and structural properties of time series evolve over time. To address these limitations, we propose a simple yet flexible and highly effective framework, i.e., Persistence-based Contrastive Learning with Graph Neural Recurrent Networks (PCL-GCRN). The key idea behind PCL-GCRN is the notion of topological invariance that we introduce to contrastive graph learning for multivariate spatio-temporal processes. PCL-GCRN allows us to simultaneously focus on multiple most important data shape characteristics at different granularities that play the key role in the learning performance. As a result, PCL-GCRN leads to richer data augmentation, improved performance, and enhanced robustness. Our extensive experiments on a broad range of real-world datasets, from spatio-temporal forecasting of traffic to monkeypox surveillance, suggest that PCL-GCRN yields competitive results both in terms of prediction accuracy and robustness, outperforming 19 competing approaches.", "keywords": "Spatio-temporal forecasting;graph neural network;topological data analysis;contrastive learning", "primary_area": "", "supplementary_material": "/attachment/f46b1ed9ed60c6143f8d8972293bb4a7a57c06d8.zip", "author": "Yuzhou Chen;Yulia Gel", "authorids": "~Yuzhou_Chen1;~Yulia_Gel1", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": ";", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@misc{\nchen2023persistencebased,\ntitle={Persistence-based Contrastive Learning with Graph Neural Recurrent Networks for Time-series Forecasting},\nauthor={Yuzhou Chen and Yulia Gel},\nyear={2023},\nurl={https://openreview.net/forum?id=2RjnzZqax1J}\n}", "github": "", "project": "", "reviewers": "22Hg;V2UY;eVzC", "site": "https://openreview.net/forum?id=2RjnzZqax1J", "pdf_size": 1879189, "recommendation": "3;3;5", "confidence": "3;3;4", "correctness": "3;2;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "33;55;62", "wc_strength_and_weaknesses": "132;65;297", "wc_clarity_quality_novelty_and_reproducibility": "54;34;21", "wc_summary_review": "111;226;81", "wc_review": "330;380;461", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 50.0, 12.355835328567093 ], "wc_strength_and_weaknesses_avg": [ 164.66666666666666, 97.48960058499688 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.333333333333336, 13.572848714334887 ], "wc_summary_review_avg": [ 139.33333333333334, 62.49444419750891 ], "wc_review_avg": [ 390.3333333333333, 53.977361509762176 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qv6HhOnBB1cJ:scholar.google.com/&scioq=Persistence-based+Contrastive+Learning+with+Graph+Neural+Recurrent+Networks+for+Time-series+Forecasting&hl=en&as_sdt=0,5", "gs_version_total": 0 }, { "title": "Exploring Active 3D Object Detection from a Generalization Perspective", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12071", "id": "2RwXVje1rAh", "poster": "/media/PosterPDFs/ICLR%202023/12071.png?t=1682662760.3766987", "openreview": "https://openreview.net/forum?id=2RwXVje1rAh", "slides": "https://iclr.cc/virtual/2023/poster/12071", "video": "https://iclr.cc/virtual/2023/poster/12071", "author_site": "Yadan Luo, Zhuoxiao Chen, Zijian Wang, Xin Yu, Zi Huang, Mahsa Baktashmotlagh", "tldr": "", "abstract": "To alleviate the high annotation cost in LiDAR-based 3D object detection, active learning is a promising solution that learns to select only a small portion of unlabeled data to annotate, without compromising model performance. Our empirical study, however, suggests that mainstream uncertainty-based and diversity-based active learning policies are not effective when applied in the 3D detection task, as they fail to balance the trade-off between point cloud informativeness and box-level annotation costs. To overcome this limitation, we jointly investigate three novel criteria in our framework CRB for point cloud acquisition - label conciseness, feature representativeness and \ngeometric balance, which hierarchically filters out the point clouds of redundant 3D bounding box labels, latent features and geometric characteristics (e.g., point cloud density) from the unlabeled sample pool and greedily selects informative ones with fewer objects to annotate. Our theoretical analysis demonstrates that the proposed criteria aligns the marginal distributions of the selected subset and the prior distributions of the unseen test set, and minimizes the upper bound of the generalization error. To validate the effectiveness and applicability of CRB, we conduct extensive experiments on the two benchmark 3D object detection datasets of KITTI and Waymo and examine both one-stage (i.e., Second) and two-stage 3D detector (i.e., PV-RCNN). Experiments evidence that the proposed approach outperforms existing active learning strategies and achieves fully supervised performance requiring $1\\%$ and $8\\%$ annotations of bounding boxes and point clouds, respectively. ", "keywords": "Active Learning;3D Object Detection;Lidar Point Clouds", "primary_area": "", "supplementary_material": "/attachment/f51cb80f98f84e37de4a788ea173a0b9dd425ebf.zip", "author": "Yadan Luo;Zhuoxiao Chen;Zijian Wang;Xin Yu;Zi Huang;Mahsa Baktashmotlagh", "authorids": "~Yadan_Luo1;~Zhuoxiao_Chen1;~Zijian_Wang2;~Xin_Yu1;~Zi_Huang1;~Mahsa_Baktashmotlagh1", "gender": "F;M;M;M;F;F", "homepage": "https://sites.google.com/view/yadanluo/home;https://zhuoxiao-chen.github.io/;;https://sites.google.com/view/xinyus-homepage/Home;https://staff.itee.uq.edu.au/huang/;", "dblp": "182/2414;301/7822;03/4540-9;54/1184-2;70/6862;119/1507", "google_scholar": "3IfL11AAAAAJ;t3cg17IAAAAJ;OfTXHvsAAAAJ;oxdtuSEAAAAJ;https://scholar.google.com.au/citations?user=iAWMsgEAAAAJ;https://scholar.google.com.au/citations?user=3kaiBBYAAAAJ", "orcid": "0000-0001-6272-2971;;;0000-0002-0269-5649;;", "linkedin": ";;;;;", "or_profile": "~Yadan_Luo1;~Zhuoxiao_Chen1;~Zijian_Wang2;~Xin_Yu1;~Zi_Huang1;~Mahsa_Baktashmotlagh1", "aff": "The University of Queensland;The University of Queensland;University of Queensland;University of Queensland;University of Queensland;The University of Queensland", "aff_domain": "uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au", "position": "Assistant Professor;PhD student;PhD student;Senior Lecturer;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nluo2023exploring,\ntitle={Exploring Active 3D Object Detection from a Generalization Perspective},\nauthor={Yadan Luo and Zhuoxiao Chen and Zijian Wang and Xin Yu and Zi Huang and Mahsa Baktashmotlagh},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=2RwXVje1rAh}\n}", "github": "", "project": "", "reviewers": "8qfL;cyaq;BMpC;S6aF", "pdf_size": 2294430, "recommendation": "6;6;8;8", "confidence": "4;3;4;3", "correctness": "3;4;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "78;81;150;90", "wc_strength_and_weaknesses": "336;158;101;364", "wc_clarity_quality_novelty_and_reproducibility": "37;23;28;53", "wc_summary_review": "65;46;28;21", "wc_review": "516;308;307;528", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "2289;1380;833;1504", "reply_reviewers": "0;0;0;0", "reply_authors": "4;3;2;3", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 99.75, 29.345996319770776 ], "wc_strength_and_weaknesses_avg": [ 239.75, 112.51305479809888 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.25, 11.409973707244026 ], "wc_summary_review_avg": [ 40.0, 17.073371078963874 ], "wc_review_avg": [ 414.75, 107.33446557373824 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1501.5, 520.0521608454291 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11230440706592302242&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=2RwXVje1rAh", "email": "uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Queensland", "aff_unique_dep": "", "aff_unique_url": "https://www.uq.edu.au", "aff_unique_abbr": "UQ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Australia" }, { "title": "Predictor-corrector algorithms for stochastic optimization under gradual distribution shift", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11784", "id": "2SV2dlfBuE3", "poster": "/media/PosterPDFs/ICLR%202023/11784.png?t=1683149494.0679996", "openreview": "https://openreview.net/forum?id=2SV2dlfBuE3", "slides": "https://iclr.cc/virtual/2023/poster/11784", "video": "https://iclr.cc/virtual/2023/poster/11784", "author_site": "Subha Maity, Debarghya Mukherjee, Moulinath Banerjee, Yuekai Sun", "tldr": "", "abstract": "Time-varying stochastic optimization problems frequently arise in machine learning practice (e.g., gradual domain shift, object tracking, strategic classification). Often, the underlying process that drives the distribution shift is continuous in nature. We exploit this underlying continuity by developing predictor-corrector algorithms for time-varying stochastic optimization that anticipates changes in the underlying data generating process through a predictor-corrector term in the update rule. The key challenge is the estimation of the predictor-corrector term; a naive approach based on sample-average approximation may lead to non-convergence. We develop a general moving-average based method to estimate the predictor-corrector term and provide error bounds for the iterates, both in presence of pure and noisy access to the queries from the relevant derivatives of the loss function. Furthermore, we show (theoretically and empirically in several examples) that our method outperforms non-predictor corrector methods that do not anticipate changes in the data generating process.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/ff22fe85af4f97d80537fdaaa2cc47e562946553.zip", "author": "Subha Maity;Debarghya Mukherjee;Moulinath Banerjee;Yuekai Sun", "authorids": "~Subha_Maity1;~Debarghya_Mukherjee1;~Moulinath_Banerjee1;~Yuekai_Sun1", "gender": "M;M;M;", "homepage": "https://lsa.umich.edu/stats/people/phd-students/smaity.html;https://debarghya-mukherjee.github.io;https://lsa.umich.edu/stats/people/faculty/moulib.html;https://yuekai.github.io/", "dblp": "278/2922;;;", "google_scholar": "eD9vCGMAAAAJ;https://scholar.google.com/citations?hl=en;;6T1XtW8AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Subha_Maity1;~Debarghya_Mukherjee1;~Moulinath_Banerjee1;~Yuekai_Sun1", "aff": ";Princeton University;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor", "aff_domain": ";princeton.edu;umich.edu;umich.edu", "position": ";Postdoc;Full Professor;Assistant \u2192 Associate Professor of Statistics", "bibtex": "@inproceedings{\nmaity2023predictorcorrector,\ntitle={Predictor-corrector algorithms for stochastic optimization under gradual distribution shift},\nauthor={Subha Maity and Debarghya Mukherjee and Moulinath Banerjee and Yuekai Sun},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=2SV2dlfBuE3}\n}", "github": "", "project": "", "reviewers": "iKWJ;YXuw;SvER;UaJ8", "pdf_size": 542051, "recommendation": "5;5;6;6", "confidence": "2;2;4;2", "correctness": "3;3;4;3", "technical_novelty": "2;3;4;3", "empirical_novelty": "0;3;4;3", "wc_summary_paper": "62;56;138;83", "wc_strength_and_weaknesses": "149;132;187;139", "wc_clarity_quality_novelty_and_reproducibility": "26;172;2;27", "wc_summary_review": "17;35;65;37", "wc_review": "254;395;392;286", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "676;354;375;83", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 1.5 ], "wc_summary_paper_avg": [ 84.75, 32.33709170596515 ], "wc_strength_and_weaknesses_avg": [ 151.75, 21.22940178149163 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.75, 67.28809330037521 ], "wc_summary_review_avg": [ 38.5, 17.168284713389397 ], "wc_review_avg": [ 331.75, 62.7868417743718 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 372.0, 209.92260478566857 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4993659276895485199&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=2SV2dlfBuE3", "email": ";princeton.edu;umich.edu;umich.edu", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Princeton University;University of Michigan", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;https://www.umich.edu", "aff_unique_abbr": "Princeton;UM", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "2SXIFDczAJG", "title": "Domain Transfer with Large Dynamics Shift in Offline Reinforcement Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The domain transfer problem with large dynamics shift commonly exists when using offline reinforcement learning (RL) in real-world applications, where the source dataset collected from one domain needs to be reused to accelerate training the target domain agent with offline RL. The large dynamics shift issue arises when there are unpredictable changes in the target domain\u2019s environment. Existing works typically assume that each state-action pair in the target domain should be at least covered in the source domain, which is often unrealistic and limited to small dynamics shift transfers. To tackle the large dynamics shift problem, we propose to use the source domain data not only for offline policy training but also for safe and efficient data collection in the target domain, thus relaxing the above requirement. Specifically, the source data will play two roles, one is to serve as augmentation data by compensating for the difference in dynamics with modified reward. Another is to form prior knowledge for the behaviour policy to collect a small amount of new data in the target domain safely and efficiently. The target domain policy is trained using offline RL with the source data and modest amounts of newly collected target data. We justify our method in gridworld and autonomous driving environments. Results show that our method requires fewer target domain data and collecting the data in a safer manner compared with prior methods.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Hongming Zhang;Jun Jin;Martin M\u00fcller;Jun Luo", "authorids": "~Hongming_Zhang3;~Jun_Jin1;~Martin_M\u00fcller2;~Jun_Luo1", "gender": "M;;;M", "homepage": "https://github.com/initial-h;;;https://webdocs.cs.ualberta.ca/~mmueller/", "dblp": ";78/8436.html;42/2501;https://dblp.org/pers/hd/m/M=uuml=ller_0003:Martin", "google_scholar": "https://scholar.google.ca/citations?user=mwbsY3AAAAAJ;a6grwUcAAAAJ;;J60BcHkAAAAJ", "orcid": ";0000-0003-4413-8565;;0000-0002-5639-5318", "linkedin": ";;;", "or_profile": "~Hongming_Zhang3;~Jun_Jin1;~Jun_Luo1;~Martin_Mueller1", "aff": "University of Alberta;Huawei Technologies Ltd. Canada;Huawei Technologies Ltd.;University of Alberta", "aff_domain": "ualberta.ca;huawei.com;huawei.com;ualberta.ca", "position": "PhD student;Researcher;Researcher;Full Professor", "bibtex": "@misc{\nzhang2023domain,\ntitle={Domain Transfer with Large Dynamics Shift in Offline Reinforcement Learning},\nauthor={Hongming Zhang and Jun Jin and Martin M{\\\"u}ller and Jun Luo},\nyear={2023},\nurl={https://openreview.net/forum?id=2SXIFDczAJG}\n}", "github": "", "project": "", "reviewers": "mQjW;3hcK;xwuy", "site": "https://openreview.net/forum?id=2SXIFDczAJG", "pdf_size": 343334, "recommendation": "3;3;3", "confidence": "4;3;4", "correctness": "3;3;2", "technical_novelty": "1;3;2", "empirical_novelty": "2;2;1", "wc_summary_paper": "71;129;53", "wc_strength_and_weaknesses": "133;101;267", "wc_clarity_quality_novelty_and_reproducibility": "44;373;109", "wc_summary_review": "10;23;26", "wc_review": "258;626;455", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 84.33333333333333, 32.42769735204082 ], "wc_strength_and_weaknesses_avg": [ 167.0, 71.90734779330042 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 175.33333333333334, 142.26813495024888 ], "wc_summary_review_avg": [ 19.666666666666668, 6.944222218666553 ], "wc_review_avg": [ 446.3333333333333, 150.3603080012216 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CRvpDW_thJUJ:scholar.google.com/&scioq=Domain+Transfer+with+Large+Dynamics+Shift+in+Offline+Reinforcement+Learning&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Alberta;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.ualberta.ca;https://www.huawei.com/ca-en/", "aff_unique_abbr": "UAlberta;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Canada;China" }, { "id": "2T80ygeeWE0", "title": "Graph schemas as abstractions for transfer learning, inference, and planning", "track": "main", "status": "Reject", "tldr": "We propose schemas in a higher order graph structures as a model for abstractions that can be used for rapid transfer learning, inference, and planning.", "abstract": "We propose schemas as a model for abstractions that can be used for rapid transfer learning, inference, and planning. Common structured representations of concepts and behaviors---schemas---have been proposed as a powerful way to encode abstractions. Latent graph learning is emerging as a new computational model of the hippocampus to explain map learning and transitive inference. We build on this work to show that learned latent graphs in these models have a slot structure---schemas---that allow for quick knowledge transfer across environments. In a new environment, an agent can rapidly learn new bindings between the sensory stream to multiple latent schemas and select the best fitting one to guide behavior. To evaluate these graph schemas, we use two previously published challenging tasks: the memory \\& planning game and one-shot StreetLearn, that are designed to test rapid task solving in novel environments. Graph schemas can be learned in far fewer episodes than previous baselines, and can model and plan in a few steps in novel variations of these tasks. We further demonstrate learning, matching, and reusing graph schemas in navigation tasks in more challenging environments with aliased observations and size variations, and show how different schemas can be composed to model larger environments.", "keywords": "Schema learning;abstractions;higher order graphs;perceptual aliasing;aliased graphs;planning;spatial navigation;cognitive science", "primary_area": "", "supplementary_material": "", "author": "J Swaroop Guntupalli;Rajkumar Vasudeva Raju;Shrinu Kushagra;Danny Sawyer;Ishan Deshpande;Guangyao Zhou;Miguel Lazaro-Gredilla;Dileep George", "authorids": "~J_Swaroop_Guntupalli1;~Rajkumar_Vasudeva_Raju2;~Shrinu_Kushagra1;dannysawyer@deepmind.com;ishansd@deepmind.com;~Guangyao_Zhou1;~Miguel_Lazaro-Gredilla1;~Dileep_George1", "gender": "M;;M;;;M;M;", "homepage": ";;https://cs.uwaterloo.ca/~skushagr/;;;https://stanniszhou.github.io;;", "dblp": "136/5291;;129/9107;;;;77/4660;", "google_scholar": "LtpDr8MAAAAJ;;https://scholar.google.ca/citations?user=8RYloKYAAAAJ;;;RW94MCIAAAAJ;SFjDQk8AAAAJ;", "orcid": "0000-0002-0677-5590;;;;;;;", "linkedin": ";;;;;;miguel-lazaro-g/;", "or_profile": "~J_Swaroop_Guntupalli1;~Rajkumar_Vasudeva_Raju2;~Shrinu_Kushagra1;dannysawyer@deepmind.com;ishansd@deepmind.com;~Guangyao_Zhou1;~Miguel_Lazaro-Gredilla1;~Dileep_George1", "aff": "Google DeepMind;;Google;;;Google DeepMind;Google Deepmind;Vicarious AI", "aff_domain": "deepmind.com;;google.com;;;google.com;google.com;vicarious.com", "position": "Researcher;;Researcher;;;Research Scientist;Research Scientist;Co-founder", "bibtex": "@misc{\nguntupalli2023graph,\ntitle={Graph schemas as abstractions for transfer learning, inference, and planning},\nauthor={J Swaroop Guntupalli and Rajkumar Vasudeva Raju and Shrinu Kushagra and Danny Sawyer and Ishan Deshpande and Guangyao Zhou and Miguel Lazaro-Gredilla and Dileep George},\nyear={2023},\nurl={https://openreview.net/forum?id=2T80ygeeWE0}\n}", "github": "", "project": "", "reviewers": "RKUK;rzAg;XbR6;Zycs", "site": "https://openreview.net/forum?id=2T80ygeeWE0", "pdf_size": 11338940, "recommendation": "3;3;5;6", "confidence": "4;3;3;2", "correctness": "3;3;4;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;1;2;3", "wc_summary_paper": "50;56;87;43", "wc_strength_and_weaknesses": "427;124;383;69", "wc_clarity_quality_novelty_and_reproducibility": "32;204;33;32", "wc_summary_review": "30;88;347;34", "wc_review": "539;472;850;178", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "812;662;1138;64", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 59.0, 16.80773631397161 ], "wc_strength_and_weaknesses_avg": [ 250.75, 156.24719997491155 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 75.25, 74.33496821819459 ], "wc_summary_review_avg": [ 124.75, 130.34449547257452 ], "wc_review_avg": [ 509.75, 238.80365889156724 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 669.0, 389.3854131833908 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.9622504486493761, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5630870254637561534&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Google;DeepMind;Vicarious AI", "aff_unique_dep": "Google DeepMind;DeepMind;", "aff_unique_url": "https://deepmind.com;https://deepmind.com;https://www.vicarious.com", "aff_unique_abbr": "DeepMind;DeepMind;Vicarious AI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "2U_AM7TcRQK", "title": "Deep Reinforcement Learning for Cryptocurrency Trading: Practical Approach to Address Backtest Overfitting", "track": "main", "status": "Withdraw", "tldr": "A practical approach to address backtest overfitting for cryptocurrency trading using deep reinforcement learning.", "abstract": "Designing profitable and reliable trading strategies is challenging in the highly volatile cryptocurrency market. Existing works applied deep reinforcement learning methods and optimistically reported increased profits in backtesting, which may suffer from the \\textit{false positive} issue due to overfitting. In this paper, we propose a practical approach to address backtest overfitting for cryptocurrency trading using deep reinforcement learning. First, we formulate the detection of backtest overfitting as a hypothesis test. Then, we train the DRL agents, estimate the probability of overfitting, and reject the overfitted agents, increasing the chance of good trading performance. Finally, on 10 cryptocurrencies over a testing period from 05/01/2022 to 06/27/2022 (during which the crypto market \\textbf{crashed two times}), we show that the less overfitted deep reinforcement learning agents have a higher return than that of more overfitted agents, an equal weight strategy, and the S\\&P DBM Index (market benchmark), offering confidence in possible deployment to a real market.", "keywords": "Computing methodologies;Markov decision processes;Neural networks;Reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Berend Jelmer Dirk Gort;Xiao-Yang Liu;Xinghang Sun;Jiechao Gao;Shuaiyu Chen;Christina Dan Wang", "authorids": "~Berend_Jelmer_Dirk_Gort1;~Xiao-Yang_Liu1;xs2421@columbia.edu;~Jiechao_Gao1;chen4144@purdue.edu;christina.wang@nyu.edu", "gender": "M;M;;;;", "homepage": ";http://www.tensorlet.org/publications/;;;;", "dblp": ";125/9849;;;;", "google_scholar": ";https://scholar.google.com/citations?hl=en;;;;", "orcid": ";;;;;", "linkedin": "berendgort/;;;;;", "or_profile": "~Berend_Jelmer_Dirk_Gort1;~Xiao-Yang_Liu1;xs2421@columbia.edu;~Jiechao_Gao1;chen4144@purdue.edu;christina.wang@nyu.edu", "aff": ";Columbia University;;;;", "aff_domain": ";columbia.edu;;;;", "position": ";PhD student;;;;", "bibtex": "@misc{\ngort2023deep,\ntitle={Deep Reinforcement Learning for Cryptocurrency Trading: Practical Approach to Address Backtest Overfitting},\nauthor={Berend Jelmer Dirk Gort and Xiao-Yang Liu and Xinghang Sun and Jiechao Gao and Shuaiyu Chen and Christina Dan Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=2U_AM7TcRQK}\n}", "github": "", "project": "", "reviewers": "9MH3;xFVi;MxX4", "site": "https://openreview.net/forum?id=2U_AM7TcRQK", "pdf_size": 1184951, "recommendation": "3;3;3", "confidence": "4;4;3", "correctness": "2;3;3", "technical_novelty": "3;2;1", "empirical_novelty": "0;2;1", "wc_summary_paper": "94;32;55", "wc_strength_and_weaknesses": "366;68;110", "wc_clarity_quality_novelty_and_reproducibility": "213;26;21", "wc_summary_review": "59;17;31", "wc_review": "732;143;217", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 1.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 60.333333333333336, 25.590796956892312 ], "wc_strength_and_weaknesses_avg": [ 181.33333333333334, 131.69999578165857 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 86.66666666666667, 89.35447511021606 ], "wc_summary_review_avg": [ 35.666666666666664, 17.46106780494506 ], "wc_review_avg": [ 364.0, 261.9631017274507 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5768151287109225164&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "2VWa8qj2vd0", "title": "Linear Video Transformer with Feature Fixation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Vision Transformers have achieved impressive performance in video classification, while suffering from the quadratic complexity caused by the Softmax attention mechanism. Some studies alleviate the computational costs by reducing the number of tokens attended in attention calculation, but the complexity is still quadratic. Another promising way is to replace Softmax attention with linear attention, which owns linear complexity but presents a clear performance drop. We find that such a drop in linear attention results from the lack of attention concentration to critical features. Therefore, we propose a feature fixation module to reweight feature importance of the query and key prior to computing linear attention. Specifically, we regard the query, key, and value as latent representations of the input token, and learn the feature fixation ratio by aggregating Query-Key-Value information. This is beneficial for measuring the feature importance comprehensively. Furthermore, we improve the feature fixation by neighborhood association, which leverages additional guidance from spatial and temporal neighboring tokens. Our proposed method significantly improves the linear attention baseline, and achieves state-of-the-art performance among linear video Transformers on three popular video classification benchmarks. Our performance is even comparable to some quadratic Transformers with fewer parameters and higher efficiency.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/37531e28df3773e0034454737a73091c01ed57c6.zip", "author": "Kaiyue Lu;ZeXiang Liu;Jianyuan Wang;Weixuan Sun;Zhen Qin;Dong Li;Xuyang Shen;Hui Deng;Xiaodong Han;Yuchao Dai;Yiran Zhong", "authorids": "~Kaiyue_Lu1;~ZeXiang_Liu1;~Jianyuan_Wang2;~Weixuan_Sun1;~Zhen_Qin6;~Dong_Li11;~Xuyang_Shen1;~Hui_Deng2;~Xiaodong_Han3;~Yuchao_Dai1;~Yiran_Zhong1", "gender": "M;;M;M;;M;M;M;;M;M", "homepage": ";;https://jytime.github.io/;https://weixuansun.github.io/weixuansun-github.io/;https://github.com/Doraemonzzz;;;https://www.researchgate.net/profile/Hui-Deng-24;;http://npu-cvr.cn/;", "dblp": "190/8790;;;186/6724;;;274/2342;88/2704;;65/7804;158/9624", "google_scholar": ";;2wk2RdgAAAAJ;vIS56AoAAAAJ;https://scholar.google.com.sg/citations?user=IcBRtycAAAAJ;bxmsqZIAAAAJ;k6Q1mcoAAAAJ;;;https://scholar.google.com.tw/citations?user=fddAbqsAAAAJ;https://scholar.google.com.sg/citations?user=E9NVOBUAAAAJ", "orcid": ";;;;;;0000-0002-1968-7055;0009-0009-5985-3976;;0000-0002-4432-7406;", "linkedin": ";;jianyuan-wang-026860148/;;;;;;;;", "or_profile": "~Kaiyue_Lu1;~ZeXiang_Liu1;~Jianyuan_Wang2;~Weixuan_Sun1;~Zhen_Qin6;~Dong_Li11;~Xuyang_Shen1;~Hui_Deng2;~Xiaodong_Han3;~Yuchao_Dai1;~Yiran_Zhong1", "aff": ";;University of Oxford, University of Oxford;Australian National University;Sensetime;Shanghai AI Lab;Sensetime Research;Northwest Polytechnical University Xi'an;;Northwestern Polytechnical University;Shanghai AI Lab", "aff_domain": ";;robots.ox.ac.uk;anu.edu.au;sensetime.com;org.cn;sensetime.com;nwpu.edu.cn;;nwpu.edu.cn;pjlab.org.cn", "position": ";;PhD student;PhD student;Researcher;Researcher;Researcher;PhD student;;Professor;PI", "bibtex": "@misc{\nlu2023linear,\ntitle={Linear Video Transformer with Feature Fixation},\nauthor={Kaiyue Lu and ZeXiang Liu and Jianyuan Wang and Weixuan Sun and Zhen Qin and Dong Li and Xuyang Shen and Hui Deng and Xiaodong Han and Yuchao Dai and Yiran Zhong},\nyear={2023},\nurl={https://openreview.net/forum?id=2VWa8qj2vd0}\n}", "github": "", "project": "", "reviewers": "uvWv;xSk5;nema;8R83", "site": "https://openreview.net/forum?id=2VWa8qj2vd0", "pdf_size": 467788, "recommendation": "3;3;5;6", "confidence": "4;4;1;4", "correctness": "3;4;3;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "52;74;74;39", "wc_strength_and_weaknesses": "197;243;209;106", "wc_clarity_quality_novelty_and_reproducibility": "65;71;2;36", "wc_summary_review": "28;28;2;38", "wc_review": "342;416;287;219", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 59.75, 14.972892172189045 ], "wc_strength_and_weaknesses_avg": [ 188.75, 50.66742049877811 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.5, 27.37243138634199 ], "wc_summary_review_avg": [ 24.0, 13.341664064126334 ], "wc_review_avg": [ 316.0, 72.32910893962402 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 11, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.19245008972987526, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10297810130634920586&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3;2;4;5;3", "aff_unique_norm": "University of Oxford;Australian National University;SenseTime;Shanghai AI Lab;Northwest Polytechnical University;Northwestern Polytechnical University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.ox.ac.uk;https://www.anu.edu.au;https://www.sensetime.com;https://www.shanghaiailab.com;http://www.nwpu.edu.cn;https://www.nwpu.edu.cn", "aff_unique_abbr": "Oxford;ANU;SenseTime;SAIL;NWPU;NWPU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;1;2;2;2;2;2;2", "aff_country_unique": "United Kingdom;Australia;China" }, { "id": "2W6ExpOzWGV", "title": "AQuaMaM: An Autoregressive, Quaternion Manifold Model for Rapidly Estimating Complex SO(3) Distributions", "track": "main", "status": "Reject", "tldr": "", "abstract": "Accurately modeling complex, multimodal distributions is necessary for optimal decision-making, but doing so for rotations in three-dimensions, i.e., the SO(3) group, is challenging due to the curvature of the rotation manifold. The recently described implicit-PDF (IPDF) is a simple, elegant, and effective approach for learning arbitrary distributions on SO(3) up to a given precision. However, inference with IPDF requires $N$ forward passes through the network's final multilayer perceptron\u2014where $N$ places an upper bound on the likelihood that can be calculated by the model\u2014which is prohibitively slow for those without the computational resources necessary to parallelize the queries. In this paper, I introduce AQuaMaM, a neural network capable of both learning complex distributions on the rotation manifold and calculating exact likelihoods for query rotations in a single forward pass. Specifically, AQuaMaM autoregressively models the projected components of unit quaternions as a mixture of uniform distributions that partition their geometrically-restricted domain of values. On an \"infinite\" toy dataset with ambiguous viewpoints, AQuaMaM rapidly converges to a sampling distribution closely matching the true data distribution. In contrast, the sampling distribution for IPDF dramatically diverges from the true data distribution, despite IPDF approaching its theoretical minimum evaluation loss during training. On a constructed dataset of 500,000 renders of a die in different rotations, an AQuaMaM model trained from scratch reaches a log-likelihood 14% higher than an IPDF model using a pretrained ResNet-50. Further, compared to IPDF, AQuaMaM uses 24% fewer parameters, has a prediction throughput 52$\\times$ faster on a single GPU, and converges in a similar amount of time during training.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/2282887aea8b6e261ff69278e6e4e41625949773.zip", "author": "Michael A. Alcorn", "authorids": "~Michael_A._Alcorn1", "gender": "M", "homepage": "https://sites.google.com/view/michaelaalcorn", "dblp": "230/8040", "google_scholar": "ulrFgHQAAAAJ", "orcid": "", "linkedin": "michaelaalcorn/", "or_profile": "~Michael_A._Alcorn1", "aff": "Bear Flag Robotics", "aff_domain": "bearflagrobotics.com", "position": "Researcher", "bibtex": "@misc{\nalcorn2023aquamam,\ntitle={{AQ}uaMaM: An Autoregressive, Quaternion Manifold Model for Rapidly Estimating Complex {SO}(3) Distributions},\nauthor={Michael A. Alcorn},\nyear={2023},\nurl={https://openreview.net/forum?id=2W6ExpOzWGV}\n}", "github": "", "project": "", "reviewers": "oE5t;WNVz;KBHk", "site": "https://openreview.net/forum?id=2W6ExpOzWGV", "pdf_size": 1637325, "recommendation": "3;5;6", "confidence": "3;4;4", "correctness": "3;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "38;56;101", "wc_strength_and_weaknesses": "141;136;180", "wc_clarity_quality_novelty_and_reproducibility": "108;292;691", "wc_summary_review": "10;57;69", "wc_review": "297;541;1041", "wc_reply_reviewers": "0;525;157", "wc_reply_authors": "467;2466;1423", "reply_reviewers": "0;4;2", "reply_authors": "1;5;2", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 65.0, 26.49528259898354 ], "wc_strength_and_weaknesses_avg": [ 152.33333333333334, 19.669491322575904 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 363.6666666666667, 243.3438353898085 ], "wc_summary_review_avg": [ 45.333333333333336, 25.460208605237746 ], "wc_review_avg": [ 626.3333333333334, 309.6722281524271 ], "wc_reply_reviewers_avg": [ 227.33333333333334, 220.0247460830006 ], "wc_reply_authors_avg": [ 1452.0, 816.3459234091064 ], "reply_reviewers_avg": [ 2.0, 1.632993161855452 ], "reply_authors_avg": [ 2.6666666666666665, 1.699673171197595 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0.9449111825230683, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3954272074408874993&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0", "aff_unique_norm": "Bear Flag Robotics", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Hebbian and Gradient-based Plasticity Enables Robust Memory and Rapid Learning in RNNs", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11522", "id": "2WklawyeI08", "poster": "", "openreview": "https://openreview.net/forum?id=2WklawyeI08", "slides": "https://iclr.cc/virtual/2023/poster/11522", "video": "https://iclr.cc/virtual/2023/poster/11522", "author_site": "Yu Duan, Zhongfan Jia, Qian Li, Yi Zhong, Kaisheng Ma", "tldr": "", "abstract": "Rapidly learning from ongoing experiences and remembering past events with a flexible memory system are two core capacities of biological intelligence. While the underlying neural mechanisms are not fully understood, various evidence supports that synaptic plasticity plays a critical role in memory formation and fast learning. Inspired by these results, we equip Recurrent Neural Networks (RNNs) with plasticity rules to enable them to adapt their parameters according to ongoing experiences. In addition to the traditional local Hebbian plasticity, we propose a global, gradient-based plasticity rule, which allows the model to evolve towards its self-determined target. Our models show promising results on sequential and associative memory tasks, illustrating their ability to robustly form and retain memories. In the meantime, these models can cope with many challenging few-shot learning problems. Comparing different plasticity rules under the same framework shows that Hebbian plasticity is well-suited for several memory and associative learning tasks; however, it is outperformed by gradient-based plasticity on few-shot regression tasks which require the model to infer the underlying mapping.", "keywords": "synaptic plasticity;meta-learning;Hebbian learning;few-shot learning;recurrent neural networks", "primary_area": "", "supplementary_material": "", "author": "Yu Duan;Zhongfan Jia;Qian Li;Yi Zhong;Kaisheng Ma", "authorids": "~Yu_Duan2;~Zhongfan_Jia1;~Qian_Li3;~Yi_Zhong1;~Kaisheng_Ma1", "gender": "M;;M;M;M", "homepage": "https://yuvenduan.github.io/;;;http://life.tsinghua.edu.cn/publish/smkx/11230/2018/20180205194642525261278/20180205194642525261278_.html;http://group.iiis.tsinghua.edu.cn/~maks/index.html", "dblp": ";;69/5902-6;;133/4053.html", "google_scholar": "8KW8hX8AAAAJ;;;;VtDpVoEAAAAJ", "orcid": ";;;;0000-0001-9226-3366", "linkedin": ";;;;", "or_profile": "~Yu_Duan2;~Zhongfan_Jia1;~Qian_Li3;~Yi_Zhong1;~Kaisheng_Ma1", "aff": "Tsinghua University;IIIS, Tsinghua University;Tsinghua University;Tsinghua University;", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;", "position": "Undergrad student;PhD student;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nduan2023hebbian,\ntitle={Hebbian and Gradient-based Plasticity Enables Robust Memory and Rapid Learning in {RNN}s},\nauthor={Yu Duan and Zhongfan Jia and Qian Li and Yi Zhong and Kaisheng Ma},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=2WklawyeI08}\n}", "github": "", "project": "", "reviewers": "vDcd;fkhw;5CwK;x3Sh", "pdf_size": 804063, "recommendation": "6;6;6;6", "confidence": "3;4;3;4", "correctness": "3;3;2;4", "technical_novelty": "3;4;3;2", "empirical_novelty": "3;3;2;2", "wc_summary_paper": "58;78;49;84", "wc_strength_and_weaknesses": "373;92;393;367", "wc_clarity_quality_novelty_and_reproducibility": "77;126;34;48", "wc_summary_review": "73;26;27;62", "wc_review": "581;322;503;561", "wc_reply_reviewers": "47;65;26;138", "wc_reply_authors": "702;625;625;133", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 67.25, 14.271912976192084 ], "wc_strength_and_weaknesses_avg": [ 306.25, 124.07130006572834 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 71.25, 35.20919624189112 ], "wc_summary_review_avg": [ 47.0, 20.868636754709208 ], "wc_review_avg": [ 491.75, 102.1062559297911 ], "wc_reply_reviewers_avg": [ 69.0, 42.16040796766559 ], "wc_reply_authors_avg": [ 521.25, 226.34970178906798 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4413736736097360991&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=2WklawyeI08", "email": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "2WmBMrCZSx", "title": "FedTiny: Pruned Federated Learning Towards Specialized Tiny Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Neural network pruning has been a well-established compression technique to enable deep learning models on resource-constrained devices. The pruned model is usually specialized to meet specific hardware platforms and training tasks (defined as deployment scenarios). However, existing pruning approaches rely heavily on training data to trade off model size, efficiency, and accuracy, which becomes ineffective for federated learning (FL) over distributed and confidential datasets. Moreover, the memory- and compute-intensive pruning process of most existing approaches cannot be handled by most FL devices with resource limitations. \nIn this paper, we develop FedTiny, a novel distributed pruning framework for FL, to obtain specialized tiny models for memory- and computing-constrained participating devices with confidential local data. To alleviate biased pruning due to unseen heterogeneous data over devices, FedTiny introduces an adaptive batch normalization (BN) selection module to adaptively obtain an initially pruned model to fit deployment scenarios. Besides, to further improve the initial pruning, FedTiny develops a lightweight progressive pruning module for local finer pruning under tight memory and computational budgets, where the pruning policy for each layer is gradually determined rather than evaluating the overall deep model structure. Extensive experimental results demonstrate the effectiveness of FedTiny, which outperforms state-of-the-art baseline approaches, especially when compressing deep models to extremely sparse tiny models.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Hong Huang;Lan Zhang;Chaoyue Sun;Ruogu Fang;Xiaoyong Yuan;Dapeng Wu", "authorids": "~Hong_Huang4;~Lan_Zhang4;~Chaoyue_Sun1;~Ruogu_Fang1;~Xiaoyong_Yuan1;dpwu@ieee.org", "gender": "M;;M;F;M;", "homepage": "https://little0o0.github.io/;;;https://www.cis.fiu.edu/faculty-staff/fang-ruogu/;https://sites.google.com/view/xiaoyong-yuan;", "dblp": ";;;80/8845;150/3870;", "google_scholar": "_E4FBygAAAAJ;;;LVb46zEAAAAJ;wl_qADcAAAAJ;", "orcid": ";;my-orcid?orcid=0000-0003-0913-5668;0000-0003-3980-3532;0000-0003-0782-4187;", "linkedin": ";;;ruogufang;xiaoyongyuan/;", "or_profile": "~Hong_Huang4;~Lan_Zhang4;~Chaoyue_Sun1;~Ruogu_Fang1;~Xiaoyong_Yuan1;dpwu@ieee.org", "aff": "University of Florida;;University of Florida;University of Florida;Michigan Technological University;", "aff_domain": "ufl.edu;;ufl.edu;ufl.edu;mtu.edu;", "position": "MS student;;PhD student;Associate Professor;Assistant Professor;", "bibtex": "@misc{\nhuang2023fedtiny,\ntitle={FedTiny: Pruned Federated Learning Towards Specialized Tiny Models},\nauthor={Hong Huang and Lan Zhang and Chaoyue Sun and Ruogu Fang and Xiaoyong Yuan and Dapeng Wu},\nyear={2023},\nurl={https://openreview.net/forum?id=2WmBMrCZSx}\n}", "github": "", "project": "", "reviewers": "AyCo;mNhR;X987;r2Vc", "site": "https://openreview.net/forum?id=2WmBMrCZSx", "pdf_size": 792448, "recommendation": "5;5;5;6", "confidence": "3;4;3;3", "correctness": "4;3;3;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "0;2;2;2", "wc_summary_paper": "64;87;66;67", "wc_strength_and_weaknesses": "321;363;90;490", "wc_clarity_quality_novelty_and_reproducibility": "16;116;29;139", "wc_summary_review": "47;6;21;117", "wc_review": "448;572;206;813", "wc_reply_reviewers": "0;0;0;84", "wc_reply_authors": "870;737;161;1097", "reply_reviewers": "0;0;0;1", "reply_authors": "3;3;3;3", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 71.0, 9.300537618869138 ], "wc_strength_and_weaknesses_avg": [ 316.0, 144.5562174380611 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 75.0, 53.32447843157962 ], "wc_summary_review_avg": [ 47.75, 42.587410111440214 ], "wc_review_avg": [ 509.75, 219.03923735258027 ], "wc_reply_reviewers_avg": [ 21.0, 36.373066958946424 ], "wc_reply_authors_avg": [ 716.25, 345.44997828918736 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5377922587955184549&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Florida;Michigan Technological University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ufl.edu;https://www.mtu.edu", "aff_unique_abbr": "UF;MTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "ViewCo: Discovering Text-Supervised Segmentation Masks via Multi-View Semantic Consistency", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11596", "id": "2XLRBjY46O6", "poster": "/media/PosterPDFs/ICLR%202023/11596.png?t=1680754640.1469536", "openreview": "https://openreview.net/forum?id=2XLRBjY46O6", "slides": "https://iclr.cc/virtual/2023/poster/11596", "video": "https://iclr.cc/virtual/2023/poster/11596", "author_site": "Pengzhen Ren, Changlin Li, Hang Xu, Yi Zhu, Guangrun Wang, Jianzhuang Liu, Xiaojun Chang, Xiaodan Liang", "tldr": "Discovering text-supervised segmentation masks via multi-view semantic consistency", "abstract": "Recently, great success has been made in learning visual representations from text supervision, facilitating the emergence of text-supervised semantic segmentation. However, existing works focus on pixel grouping and cross-modal semantic alignment, while ignoring the correspondence among multiple augmented views of the same image. To overcome such limitation, we propose multi-View Consistent learning (ViewCo) for text-supervised semantic segmentation. Specifically, we first propose text-to-views consistency modeling to learn correspondence for multiple views of the same input image. Additionally, we propose cross-view segmentation consistency modeling to address the ambiguity issue of text supervision by contrasting the segment features of Siamese visual encoders. The text-to-views consistency benefits dense assignment of the visual features by encouraging different crops to align with the same text, while the cross-view segmentation consistency modeling provides additional self-supervision, overcoming the limitation of ambiguous text supervision for segmentation masks. Trained with large-scale image-text data, our model can directly segment objects of arbitrary categories in a zero-shot manner. Extensive experiments show that ViewCo outperforms state-of-the-art methods on average by up to 2.9%, 1.6%, and 2.4% mIoU on PASCAL VOC2012, PASCAL Context, and COCO, respectively.", "keywords": "Zero-shot semantic segmentation;Vision-Language Pretraining;Visual Self-Supervision;Consistent Semantics", "primary_area": "", "supplementary_material": "/attachment/f903d42ca97ab3eea0fc6792edf8bfe51447b318.zip", "author": "Pengzhen Ren;Changlin Li;Hang Xu;Yi Zhu;Guangrun Wang;Jianzhuang Liu;Xiaojun Chang;Xiaodan Liang", "authorids": "~Pengzhen_Ren2;~Changlin_Li2;~Hang_Xu1;~Yi_Zhu3;~Guangrun_Wang1;~Jianzhuang_Liu3;~Xiaojun_Chang1;~Xiaodan_Liang2", "gender": ";M;M;F;M;M;M;F", "homepage": ";;;https://yeezhu.github.io;https://wanggrun.github.io;;https://www.xiaojun.ai;https://www.sysu-hcp.net/", "dblp": "222/7912;;;;165/1374.html;l/JianzhuangLiu;116/8412;", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=J_8TX6sAAAAJ;https://scholar.google.com/citations?view_op=list_works;nuHIZx0AAAAJ;sKauaAwAAAAJ;https://scholar.google.co.uk/citations?user=8suupocAAAAJ;voxznZAAAAAJ", "orcid": ";;0000-0003-3645-8972;0000-0002-5087-895X;;;;", "linkedin": ";;;;;;;", "or_profile": "~Pengzhen_Ren2;~Changlin_Li2;~Hang_Xu1;~Yi_Zhu3;~Guangrun_Wang1;~Jianzhuang_Liu3;~Xiaojun_Chang1;~Xiaodan_Liang2", "aff": "SUN YAT-SEN UNIVERSITY;University of Technology Sydney;Huawei Noah\u2018s Ark Lab;Huawei Technologies Ltd.;University of Oxford;Huawei Technologies Ltd.;University of Technology Sydney;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;uts.edu.au;huawei.com;huawei.com;ox.ac.uk;huawei.com;uts.edu.au;sysu.edu.cn", "position": "Postdoc;PhD student;Researcher;Researcher;Researcher;Principal Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nren2023viewco,\ntitle={ViewCo: Discovering Text-Supervised Segmentation Masks via Multi-View Semantic Consistency},\nauthor={Pengzhen Ren and Changlin Li and Hang Xu and Yi Zhu and Guangrun Wang and Jianzhuang Liu and Xiaojun Chang and Xiaodan Liang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=2XLRBjY46O6}\n}", "github": "", "project": "", "reviewers": "cqK1;Dedj;kMrx;wv76", "pdf_size": 1759690, "recommendation": "3;3;8;8", "confidence": "4;4;3;4", "correctness": "2;2;4;3", "technical_novelty": "2;2;4;3", "empirical_novelty": "2;2;4;3", "wc_summary_paper": "106;73;71;38", "wc_strength_and_weaknesses": "151;249;65;49", "wc_clarity_quality_novelty_and_reproducibility": "20;28;27;26", "wc_summary_review": "56;34;20;16", "wc_review": "333;384;183;129", "wc_reply_reviewers": "775;187;0;0", "wc_reply_authors": "1184;882;91;91", "reply_reviewers": "3;1;0;0", "reply_authors": "4;2;1;1", "recommendation_avg": [ 5.5, 2.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 72.0, 24.052026941611388 ], "wc_strength_and_weaknesses_avg": [ 128.5, 79.6539390112002 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.25, 3.112474899497183 ], "wc_summary_review_avg": [ 31.5, 15.644487847162015 ], "wc_review_avg": [ 257.25, 104.60013145307228 ], "wc_reply_reviewers_avg": [ 240.5, 317.89660268710014 ], "wc_reply_authors_avg": [ 562.0, 482.95082565412395 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10884218682386412092&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=2XLRBjY46O6", "email": "sysu.edu.cn;uts.edu.au;huawei.com;huawei.com;ox.ac.uk;huawei.com;uts.edu.au;sysu.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;2;3;2;1;0", "aff_unique_norm": "Sun Yat-sen University;University of Technology Sydney;Huawei;University of Oxford", "aff_unique_dep": ";;Noah's Ark Lab;", "aff_unique_url": "http://www.sysu.edu.cn;https://www.uts.edu.au;https://www.huawei.com;https://www.ox.ac.uk", "aff_unique_abbr": "SYSU;UTS;Huawei;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;2;0;1;0", "aff_country_unique": "China;Australia;United Kingdom" }, { "title": "Edgeformers: Graph-Empowered Transformers for Representation Learning on Textual-Edge Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11312", "id": "2YQrqe4RNv", "poster": "/media/PosterPDFs/ICLR%202023/11312.png?t=1681483939.3982873", "openreview": "https://openreview.net/forum?id=2YQrqe4RNv", "slides": "https://iclr.cc/virtual/2023/poster/11312", "video": "https://iclr.cc/virtual/2023/poster/11312", "author_site": "Bowen Jin, Yu Zhang, Yu Meng, Jiawei Han", "tldr": "", "abstract": "Edges in many real-world social/information networks are associated with rich text information (e.g., user-user communications or user-product reviews). However, mainstream network representation learning models focus on propagating and aggregating node attributes, lacking specific designs to utilize text semantics on edges. While there exist edge-aware graph neural networks, they directly initialize edge attributes as a feature vector, which cannot fully capture the contextualized text semantics of edges. In this paper, we propose Edgeformers, a framework built upon graph-enhanced Transformers, to perform edge and node representation learning by modeling texts on edges in a contextualized way. Specifically, in edge representation learning, we inject network information into each Transformer layer when encoding edge texts; in node representation learning, we aggregate edge representations through an attention mechanism within each node\u2019s ego-graph. On five public datasets from three different domains, Edgeformers consistently outperform state-of-the-art baselines in edge classification and link prediction, demonstrating the efficacy in learning edge and node representations, respectively.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Bowen Jin;Yu Zhang;Yu Meng;Jiawei Han", "authorids": "~Bowen_Jin1;~Yu_Zhang26;~Yu_Meng1;~Jiawei_Han1", "gender": "M;M;M;M", "homepage": "https://peterjin.me/;https://yuzhimanhua.github.io/;https://yumeng5.github.io/;http://hanj.cs.illinois.edu/", "dblp": "235/8066;50/671-44;30/4233-1;h/JiaweiHan.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;N0PrmgIAAAAJ;S2-yZKcAAAAJ;https://scholar.google.com.tw/citations?user=Kv9AbjMAAAAJ", "orcid": "0000-0003-1295-2829;0000-0003-0540-6758;0000-0003-2554-2888;0000-0002-3629-2696", "linkedin": "bowen-peter-jin/;;;", "or_profile": "~Bowen_Jin1;~Yu_Zhang26;~Yu_Meng1;~Jiawei_Han1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois at Urbana-Champaign (UIUC)", "aff_domain": "illinois.edu;illinois.edu;illinois.edu;illinois.edu", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\njin2023edgeformers,\ntitle={Edgeformers: Graph-Empowered Transformers for Representation Learning on Textual-Edge Networks},\nauthor={Bowen Jin and Yu Zhang and Yu Meng and Jiawei Han},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=2YQrqe4RNv}\n}", "github": "", "project": "", "reviewers": "Sciz;x3wZ;CnjZ", "pdf_size": 482866, "recommendation": "6;6;8", "confidence": "4;3;3", "correctness": "3;3;4", "technical_novelty": "3;2;2", "empirical_novelty": "3;2;3", "wc_summary_paper": "63;101;129", "wc_strength_and_weaknesses": "132;229;290", "wc_clarity_quality_novelty_and_reproducibility": "11;74;106", "wc_summary_review": "14;65;54", "wc_review": "220;469;579", "wc_reply_reviewers": "70;404;0", "wc_reply_authors": "505;896;488", "reply_reviewers": "1;1;0", "reply_authors": "2;3;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 97.66666666666667, 27.047283700134393 ], "wc_strength_and_weaknesses_avg": [ 217.0, 65.05894762956643 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.666666666666664, 39.46587837050577 ], "wc_summary_review_avg": [ 44.333333333333336, 21.9139732185248 ], "wc_review_avg": [ 422.6666666666667, 150.17841241521884 ], "wc_reply_reviewers_avg": [ 158.0, 176.28008017546017 ], "wc_reply_authors_avg": [ 629.6666666666666, 188.45394368091343 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8152249607062026423&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=2YQrqe4RNv", "email": "illinois.edu;illinois.edu;illinois.edu;illinois.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "2_B-eiVbgBs", "title": "A Differentiable Loss Function for Learning Heuristics in A*", "track": "main", "status": "Reject", "tldr": "A novel loss function", "abstract": "Optimization of heuristic functions for the A* algorithm, realized by deep neural networks, is usually done by minimizing square root loss of estimate of the cost to goal values. This paper argues that this does not necessarily lead to a faster search of A* algorithm since its execution relies on relative values instead of absolute ones. As a mitigation, we propose a L* loss, which upper-bounds the number of excessively expanded states inside the A* search. The L* loss, when used in the optimization of state-of-the-art deep neural networks for automated planning in maze domains like Sokoban and maze with teleports, significantly improves the fraction of solved problems, the quality of founded plans, and reduces the number of expanded states to approximately 50%", "keywords": "Differentiable Loss Function;a star;heuristic search", "primary_area": "", "supplementary_material": "/attachment/376fd322f94648da548fb9f2990042884824ba92.zip", "author": "Leah Chrestien;Tom\u00e1\u0161 Pevn\u00fd;Antonin Komenda;Stefan Edelkamp", "authorids": "~Leah_Chrestien1;~Tom\u00e1\u0161_Pevn\u00fd1;~Antonin_Komenda1;~Stefan_Edelkamp1", "gender": "F;M;M;M", "homepage": ";https://cs.felk.cvut.cz/en/people/pevnytom;http://agents.fel.cvut.cz/~komenda/;https://www.aic.fel.cvut.cz/members/stefan-edelkamp", "dblp": ";20/1317;https://dblp.uni-trier.de/pid/96/5384;98/3919", "google_scholar": "TW855V8AAAAJ;MnXqDssAAAAJ;https://scholar.google.cz/citations?user=dzh73HkAAAAJ;https://scholar.google.de/citations?user=TKVCz1MAAAAJ", "orcid": ";0000-0002-5768-9713;0000-0002-6947-308X;0000-0001-8435-5025", "linkedin": "leah-chrestien/;;akomenda/;", "or_profile": "~Leah_Chrestien1;~Tom\u00e1\u0161_Pevn\u00fd1;~Antonin_Komenda1;~Stefan_Edelkamp2", "aff": "Czech Technical Univeresity in Prague, Czech Technical University of Prague;Czech Technical University in Prague;Czech Technical University in Prague;AIC FEL CTU Prague", "aff_domain": "fel.cvut.cz;cvut.cz;cvut.cz;aic.fel.cvut.cz", "position": "PhD student;Associate Professor;Associate Professor;Full Professor", "bibtex": "@misc{\nchrestien2023a,\ntitle={A Differentiable Loss Function for Learning Heuristics in A*},\nauthor={Leah Chrestien and Tom{\\'a}{\\v{s}} Pevn{\\'y} and Antonin Komenda and Stefan Edelkamp},\nyear={2023},\nurl={https://openreview.net/forum?id=2_B-eiVbgBs}\n}", "github": "", "project": "", "reviewers": "SoyE;8chY;ytHz;oNqW", "site": "https://openreview.net/forum?id=2_B-eiVbgBs", "pdf_size": 341657, "recommendation": "5;5;8;8", "confidence": "4;4;3;5", "correctness": "3;3;3;2", "technical_novelty": "3;2;4;3", "empirical_novelty": "2;2;4;2", "wc_summary_paper": "104;23;164;65", "wc_strength_and_weaknesses": "386;106;467;597", "wc_clarity_quality_novelty_and_reproducibility": "96;35;49;91", "wc_summary_review": "60;80;64;93", "wc_review": "646;244;744;846", "wc_reply_reviewers": "26;0;0;387", "wc_reply_authors": "1049;463;0;1676", "reply_reviewers": "1;0;0;4", "reply_authors": "2;1;0;6", "recommendation_avg": [ 6.5, 1.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 89.0, 51.91820489963034 ], "wc_strength_and_weaknesses_avg": [ 389.0, 179.89302376690432 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 67.75, 26.280934153869037 ], "wc_summary_review_avg": [ 74.25, 13.160072188251856 ], "wc_review_avg": [ 620.0, 228.3111911405133 ], "wc_reply_reviewers_avg": [ 103.25, 164.1666455160731 ], "wc_reply_authors_avg": [ 797.0, 629.0687561785278 ], "reply_reviewers_avg": [ 1.25, 1.6393596310755 ], "reply_authors_avg": [ 2.25, 2.277608394786075 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.5773502691896258, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6454538498412427890&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Czech Technical University in Prague;Czech Technical University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ctu.cz;https://www.ctu.cz", "aff_unique_abbr": "CTU;CTU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Prague", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Czech Republic" }, { "id": "2_BsVZ6R-ef", "title": "Analytical Composition of Differential Privacy via the Edgeworth Accountant", "track": "main", "status": "Reject", "tldr": "We developed an efficient analytical tool via the Edgeworth expansion with finite-sample bounds to to keep track of DP guarantees with a large number of compositions.", "abstract": "Many modern machine learning algorithms are composed of simple private algorithms; thus, an increasingly important problem is to efficiently compute the overall privacy loss under composition. In this study, we introduce the Edgeworth Accountant, an analytical approach to composing differential privacy guarantees of private algorithms. The Edgeworth Accountant starts by losslessly tracking the privacy loss under composition using the $f$-differential privacy framework, which allows us to express the privacy guarantees using privacy-loss log-likelihood ratios (PLLRs). As the name suggests, this accountant next uses the Edgeworth expansion to the upper and lower bounds the probability distribution of the sum of the PLLRs. Moreover, by relying on a technique for approximating complex distributions using simple ones, we demonstrate that the Edgeworth Accountant can be applied to the composition of any noise-addition mechanism. Owing to certain appealing features of the Edgeworth expansion, the $(\\epsilon, \\delta)$-differential privacy bounds offered by this accountant are non-asymptotic, with essentially no extra computational cost, as opposed to the prior approaches, wherein the running times increase with the number of compositions. Finally, we demonstrate that our upper and lower $(\\epsilon, \\delta)$-differential privacy bounds are tight in federated analytics and certain regimes of training private deep learning models.", "keywords": "Differential Privacy;f-Differential Privacy;Edgeworth Expansion;PLLR;Edgeworth Accountant", "primary_area": "", "supplementary_material": "/attachment/34f000a7184c5e3f09e3356d763cf32cfd991ba2.zip", "author": "Hua Wang;Sheng Gao;Huanyu Zhang;Milan Shen;Weijie J Su", "authorids": "~Hua_Wang7;~Sheng_Gao2;~Huanyu_Zhang2;~Milan_Shen1;~Weijie_J_Su1", "gender": "M;M;M;F;M", "homepage": "https://statistics.wharton.upenn.edu/profile/wanghua/;https://sggao.github.io/;https://huanyuzhang.github.io;;http://stat.wharton.upenn.edu/~suw/", "dblp": ";;163/7342;;228/9127", "google_scholar": ";cZrdt4EAAAAJ;;;Uhf4nBkAAAAJ", "orcid": ";;;;", "linkedin": ";sheng-gao-8001aa146/;;milan-shen-860b5062/;", "or_profile": "~Hua_Wang7;~Sheng_Gao2;~Huanyu_Zhang2;~Milan_Shen1;~Weijie_J_Su1", "aff": "The Wharton School, University of Pennsylvania;The Wharton School, University of Pennsylvania;Meta;Research, Facebook;University of Pennsylvania", "aff_domain": "wharton.upenn.edu;wharton.upenn.edu;fb.com;research.facebook.com;upenn.edu", "position": "PhD student;PhD student;Researcher;Research Scientist;Associate Professor", "bibtex": "@misc{\nwang2023analytical,\ntitle={Analytical Composition of Differential Privacy via the Edgeworth Accountant},\nauthor={Hua Wang and Sheng Gao and Huanyu Zhang and Milan Shen and Weijie J Su},\nyear={2023},\nurl={https://openreview.net/forum?id=2_BsVZ6R-ef}\n}", "github": "", "project": "", "reviewers": "kz4w;Ri6D;iZYg;ESEo", "site": "https://openreview.net/forum?id=2_BsVZ6R-ef", "pdf_size": 394791, "recommendation": "3;5;6;6", "confidence": "3;5;3;2", "correctness": "3;4;4;3", "technical_novelty": "3;2;4;3", "empirical_novelty": "2;1;4;3", "wc_summary_paper": "118;44;400;61", "wc_strength_and_weaknesses": "315;565;300;95", "wc_clarity_quality_novelty_and_reproducibility": "187;78;112;17", "wc_summary_review": "18;81;67;52", "wc_review": "638;768;879;225", "wc_reply_reviewers": "606;0;0;0", "wc_reply_authors": "1616;1542;213;89", "reply_reviewers": "2;0;0;0", "reply_authors": "3;2;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 155.75, 143.656491325662 ], "wc_strength_and_weaknesses_avg": [ 318.75, 166.6348928045984 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 98.5, 61.39421796879573 ], "wc_summary_review_avg": [ 54.5, 23.43608329051593 ], "wc_review_avg": [ 627.5, 247.54242060705474 ], "wc_reply_reviewers_avg": [ 151.5, 262.4056973466849 ], "wc_reply_authors_avg": [ 865.0, 715.8229529709145 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.18731716231633877, "corr_recommendation_correctness": 0.40824829046386296, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3714246034054728713&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "University of Pennsylvania;Meta", "aff_unique_dep": "The Wharton School;Meta Platforms, Inc.", "aff_unique_url": "https://www.wharton.upenn.edu;https://meta.com", "aff_unique_abbr": "UPenn Wharton;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "2_I3JQ70U2", "title": "Asynchronous Message Passing: A new Framework for Learning in Graphs", "track": "main", "status": "Reject", "tldr": "A new framework for neural networks in graphs: messages are handled one at a time giving beneits in expressiveness and longrange propagation.", "abstract": "This paper studies asynchronous message passing (AMP), a new framework for applying neural networks to graphs. Existing graph neural networks (GNNs) use the message passing framework which is based on the synchronous distributed computing model. In traditional GNNs, nodes aggregate their neighbors in each round, which causes problems such as oversmoothing and expressiveness limitations. On the other hand, our AMP framework is based on the \\textit{asynchronous} model, where nodes react to messages of their neighbors individually. We prove (i) AMP is at least as powerful as the message passing framework, (ii) AMP is more powerful than the $1-$WL test for graph isomorphism, an important benchmark for message passing GNNs, and (iii) conceptually, AMP can even separate any pair of graphs and compute graph isomorphism. We experimentally validate the findings on AMP's expressiveness, and show that AMP might be better suited to propagate messages over large distances in graphs. We also demonstrate that AMP performs well on several graph classification benchmarks.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/890732f9673d7c3ea1f40fbc93101915e855c5c6.zip", "author": "Lukas Faber;Roger Wattenhofer", "authorids": "~Lukas_Faber1;~Roger_Wattenhofer1", "gender": ";Not Specified", "homepage": ";https://disco.ethz.ch/members/wroger", "dblp": ";w/RogerWattenhofer", "google_scholar": ";https://scholar.google.ch/citations?user=EG3VPm4AAAAJ", "orcid": ";", "linkedin": ";roger-wattenhofer-4466731/", "or_profile": "~Lukas_Faber1;~Roger_Wattenhofer1", "aff": ";Swiss Federal Institute of Technology", "aff_domain": ";ethz.ch", "position": ";Full Professor", "bibtex": "@misc{\nfaber2023asynchronous,\ntitle={Asynchronous Message Passing: A new Framework for Learning in Graphs},\nauthor={Lukas Faber and Roger Wattenhofer},\nyear={2023},\nurl={https://openreview.net/forum?id=2_I3JQ70U2}\n}", "github": "", "project": "", "reviewers": "oodJ;6FHJ;BeiX;qHED", "site": "https://openreview.net/forum?id=2_I3JQ70U2", "pdf_size": 826612, "recommendation": "5;5;6;6", "confidence": "3;4;3;3", "correctness": "3;2;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;2;3;2", "wc_summary_paper": "37;67;40;54", "wc_strength_and_weaknesses": "297;272;609;153", "wc_clarity_quality_novelty_and_reproducibility": "244;269;60;30", "wc_summary_review": "67;36;115;25", "wc_review": "645;644;824;262", "wc_reply_reviewers": "108;739;138;52", "wc_reply_authors": "967;2241;1011;280", "reply_reviewers": "1;2;1;1", "reply_authors": "2;4;2;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 49.5, 11.968709203585824 ], "wc_strength_and_weaknesses_avg": [ 332.75, 168.5176177733355 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 150.75, 106.64749176609828 ], "wc_summary_review_avg": [ 60.75, 34.90254288730264 ], "wc_review_avg": [ 593.75, 205.07605296572294 ], "wc_reply_reviewers_avg": [ 259.25, 278.69820146531265 ], "wc_reply_authors_avg": [ 1124.75, 706.6542206058066 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dV440pQyLcgJ:scholar.google.com/&scioq=Asynchronous+Message+Passing:+A+new+Framework+for+Learning+in+Graphs&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Swiss Federal Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich", "aff_country_unique_index": "0", "aff_country_unique": "Switzerland" }, { "id": "2a3aR6geXxy", "title": "Explanation Uncertainty with Decision Boundary Awareness", "track": "main", "status": "Reject", "tldr": "We introduce a method that generates uncertainty estimates for feature attribution explanations.", "abstract": "Post-hoc explanation methods have become increasingly depended upon for understanding black-box classifiers in high-stakes applications, precipitating a need for reliable explanations. While numerous explanation methods have been proposed, recent works have shown that many existing methods can be inconsistent or unstable. In addition, high-performing classifiers are often highly nonlinear and can exhibit complex behavior around the decision boundary, leading to brittle or misleading local explanations. Therefore, there is an impending need to quantify the uncertainty of such explanation methods in order to understand when explanations are trustworthy. We introduce a novel uncertainty quantification method parameterized by a Gaussian Process model, which combines the uncertainty approximation of existing methods with a novel geodesic-based similarity which captures the complexity of the target black-box decision boundary. The proposed framework is highly flexible\u2014it can be used with any black-box classifier and feature attribution method to amortize uncertainty estimates for explanations. We show theoretically that our proposed geodesic-based kernel similarity increases with the complexity of the decision boundary. Empirical results on multiple tabular and image datasets show that our decision boundary-aware uncertainty estimate improves understanding of explanations as compared to existing methods", "keywords": "Explainability;Interpretability;XAI;Feature Importance;Explanation Uncertainty;Reliability", "primary_area": "", "supplementary_material": "/attachment/23cf154ecb15dc4720fad652903449434d640afc.zip", "author": "Davin Hill;Aria Masoomi;Sandesh Ghimire;Max Torop;Jennifer Dy", "authorids": "~Davin_Hill1;~Aria_Masoomi1;~Sandesh_Ghimire2;~Max_Torop1;~Jennifer_Dy1", "gender": ";M;;M;", "homepage": ";;;https://maxtorop.github.io/;https://mllabneu.github.io/", "dblp": ";242/9324;;305/7085;24/6000", "google_scholar": ";KXcX8coAAAAJ;;NjhrmBEAAAAJ;6h7b0fAAAAAJ", "orcid": ";;;;", "linkedin": ";aria-masoomi-779a02232;;max-torop-048ab4a9/;", "or_profile": "~Davin_Hill1;~Aria_Masoomi1;~Sandesh_Ghimire2;~Max_Torop1;~Jennifer_Dy1", "aff": ";Northeastern University;;Northeastern University;Northeastern University", "aff_domain": ";northeastern.edu;;northeastern.edu;northeastern.edu", "position": ";PhD student;;PhD student;Full Professor", "bibtex": "@misc{\nhill2023explanation,\ntitle={Explanation Uncertainty with Decision Boundary Awareness},\nauthor={Davin Hill and Aria Masoomi and Sandesh Ghimire and Max Torop and Jennifer Dy},\nyear={2023},\nurl={https://openreview.net/forum?id=2a3aR6geXxy}\n}", "github": "", "project": "", "reviewers": "dnzT;6BBB;R7Fs", "site": "https://openreview.net/forum?id=2a3aR6geXxy", "pdf_size": 16258091, "recommendation": "3;5;5", "confidence": "3;3;3", "correctness": "3;3;4", "technical_novelty": "3;3;3", "empirical_novelty": "1;2;2", "wc_summary_paper": "74;79;118", "wc_strength_and_weaknesses": "477;186;348", "wc_clarity_quality_novelty_and_reproducibility": "144;20;45", "wc_summary_review": "41;4;118", "wc_review": "736;289;629", "wc_reply_reviewers": "137;0;0", "wc_reply_authors": "1464;309;613", "reply_reviewers": "1;0;0", "reply_authors": "3;1;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 90.33333333333333, 19.669491322575904 ], "wc_strength_and_weaknesses_avg": [ 337.0, 119.0546093185812 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 69.66666666666667, 53.543336549834926 ], "wc_summary_review_avg": [ 54.333333333333336, 47.48567035315906 ], "wc_review_avg": [ 551.3333333333334, 190.57165464873194 ], "wc_reply_reviewers_avg": [ 45.666666666666664, 64.58241934837135 ], "wc_reply_authors_avg": [ 795.3333333333334, 488.83557790142714 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13810219030887286185&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "2a5Ru3JtNe0", "title": "From ChebNet to ChebGibbsNet", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Recent advancements in Spectral Graph Convolutional Networks (SpecGCNs) have led to state-of-the-art performance in various graph representation learning tasks. To exploit the potential of SpecGCNs, we analyze corresponding graph filters via polynomial interpolation, the cornerstone of graph signal processing. Different polynomial bases, such as Bernstein, Chebyshev, and monomial basis, have various convergence rates that will affect the error in polynomial interpolation. Although adopting Chebyshev basis for interpolation can minimize maximum error, the performance of ChebNet is still weaker than GPR-GNN and BernNet. We point out it is caused by the Gibbs phenomenon, which occurs when the corresponding graph frequency response function approximates the target function. It reduces the approximation ability of a truncated polynomial interpolation. In order to mitigate the Gibbs phenomenon, we propose to add the Gibbs damping factor with each term of Chebyshev polynomials on ChebNet. As a result, our lightweight approach leads to a significant performance boost. Afterwards, we reorganize ChebNet via decoupling feature propagation and transformation. We name this variant as ChebGibbsNet. Our experiments indicate that ChebGibbsNet is superior to other advanced SpecGCNs, such as GPR-GNN and BernNet, in both homogeneous graphs and heterogeneous graphs.", "keywords": "Spectral Graph Convolutional Networks;Gibbs phenomenon;Gibbs damping factors;ChebNet", "primary_area": "", "supplementary_material": "", "author": "Jie Zhang;Bo Hui;Po-wei Harn;Min-Te Sun;Wei-Shinn Ku", "authorids": "~Jie_Zhang22;~Bo_Hui1;~Po-wei_Harn1;~Min-Te_Sun1;~Wei-Shinn_Ku1", "gender": "M;;M;M;M", "homepage": ";https://bohui.herokuapp.com/;;https://wasn.csie.ncu.edu.tw/advisor;http://www.eng.auburn.edu/~weishinn/", "dblp": ";260/4200-1.html;192/2182.html;70/2846;21/1694", "google_scholar": ";cdwA-5IAAAAJ;npzWdwEAAAAJ;3hXth30AAAAJ;https://scholar.google.com.tw/citations?user=ZQ87sO4AAAAJ", "orcid": "0000-0001-9137-2501;0009-0008-9054-4437;;0000-0002-8911-3831;0000-0001-8636-4689", "linkedin": "jie-zhang-66b9b8a4/;;;;", "or_profile": "~Jie_Zhang22;~Bo_Hui1;~Po-wei_Harn1;~Min-Te_Sun1;~Wei-Shinn_Ku1", "aff": "National Central University;Auburn University;Auburn University;National Central University;Auburn University", "aff_domain": "ncu.edu.tw;auburn.edu;auburn.edu;ncu.edu.tw;auburn.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nzhang2023from,\ntitle={From ChebNet to ChebGibbsNet},\nauthor={Jie Zhang and Bo Hui and Po-wei Harn and Min-Te Sun and Wei-Shinn Ku},\nyear={2023},\nurl={https://openreview.net/forum?id=2a5Ru3JtNe0}\n}", "github": "", "project": "", "reviewers": "g7GR;Wusn;EhZn;sSrL", "site": "https://openreview.net/forum?id=2a5Ru3JtNe0", "pdf_size": 460874, "recommendation": "3;3;3;5", "confidence": "5;4;4;4", "correctness": "2;3;3;3", "technical_novelty": "2;1;3;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "50;59;16;55", "wc_strength_and_weaknesses": "254;98;423;208", "wc_clarity_quality_novelty_and_reproducibility": "29;39;8;95", "wc_summary_review": "36;29;23;25", "wc_review": "369;225;470;383", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 45.0, 17.04406054905931 ], "wc_strength_and_weaknesses_avg": [ 245.75, 116.98370613038382 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.75, 32.17432982985038 ], "wc_summary_review_avg": [ 28.25, 4.968651728587948 ], "wc_review_avg": [ 361.75, 87.92432826015789 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9610874883240143290&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "National Central University;Auburn University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ncu.edu.tw;https://www.auburn.edu", "aff_unique_abbr": "NCU;Auburn", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Taiwan;", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "China;United States" }, { "id": "2aRlyrY-LsJ", "title": "Revisiting Domain Randomization Via Relaxed State-Adversarial Policy Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Domain randomization (DR) is widely used in reinforcement learning (RL) to bridge the gap between simulation and reality through maximizing its average returns under the perturbation of environmental parameters. Although effective, the methods have two limitations: (1) Even the most complex simulators cannot capture all details in reality due to finite domain parameters and simplified physical models. (2) Previous methods often assume that the distribution of domain parameters is a specific family of probability functions, such as a normal or a uniform distribution, which may not be correct. To enable robust RL via DR without the aforementioned limitations, we rethink DR from the perspective of adversarial state perturbation, without the need for re-configuring the simulator or relying on prior knowledge about the environment. We point out that perturbing agents to the worst states during training is naive and could make the agents over-conservative. Hence, we present a Relaxed State-Adversarial Algorithm to tackle the over-conservatism issue by simultaneously maximizing the average-case and worst-case performance of policies. We compared our method to the state-of-the-art methods for evaluation. Experimental results and theoretical proofs verified the effectiveness of our method.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/db000d3c2d25eff0244ed7fb327e2e8e8294d345.zip", "author": "Yun-Hsuan Lien;Ping-Chun Hsieh;Yu-Shuen Wang", "authorids": "~Yun-Hsuan_Lien1;~Ping-Chun_Hsieh1;~Yu-Shuen_Wang1", "gender": "F;M;M", "homepage": ";https://pinghsieh.github.io/;https://people.cs.nycu.edu.tw/~yushuen/", "dblp": ";163/7352;08/742", "google_scholar": ";ix38JgoAAAAJ;AKeIOxIAAAAJ", "orcid": ";;0000-0003-2550-2990", "linkedin": ";;", "or_profile": "~Yun-Hsuan_Lien1;~Ping-Chun_Hsieh1;~Yu-Shuen_Wang1", "aff": "National Yang Ming Chiao Tung University;National Yang Ming Chiao Tung University;National Yang Ming Chiao Tung University", "aff_domain": "nycu.edu.tw;nycu.edu.tw;cs.nycu.edu.tw", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@misc{\nlien2023revisiting,\ntitle={Revisiting Domain Randomization Via Relaxed State-Adversarial Policy Optimization},\nauthor={Yun-Hsuan Lien and Ping-Chun Hsieh and Yu-Shuen Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=2aRlyrY-LsJ}\n}", "github": "", "project": "", "reviewers": "E4mE;H7Ak;1qFW;PgB5", "site": "https://openreview.net/forum?id=2aRlyrY-LsJ", "pdf_size": 1136658, "recommendation": "5;5;6;6", "confidence": "4;4;4;3", "correctness": "2;2;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;4", "wc_summary_paper": "36;59;94;71", "wc_strength_and_weaknesses": "521;275;240;335", "wc_clarity_quality_novelty_and_reproducibility": "98;36;36;17", "wc_summary_review": "23;50;67;58", "wc_review": "678;420;437;481", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 65.0, 20.940391591371924 ], "wc_strength_and_weaknesses_avg": [ 342.75, 108.37521626276 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.75, 30.589009464185008 ], "wc_summary_review_avg": [ 49.5, 16.439282222773596 ], "wc_review_avg": [ 504.0, 102.89557813628339 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dc1SN4hPDYwJ:scholar.google.com/&scioq=Revisiting+Domain+Randomization+Via+Relaxed+State-Adversarial+Policy+Optimization&hl=en&as_sdt=0,33", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "National Yang Ming Chiao Tung University", "aff_unique_dep": "", "aff_unique_url": "https://www.nycu.edu.tw", "aff_unique_abbr": "NYCU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "2aSj08z30A1", "title": "ReaKE: Contrastive Molecular Representation Learning with Chemical Synthetic Knowledge Graph", "track": "main", "status": "Reject", "tldr": "", "abstract": "Molecular representation learning has demonstrated great promise in bridging machine learning and chemical science and in supporting novel chemical discoveries. State-of-the-art methods mostly employ graph neural networks (GNNs) with self-supervised learning (SSL) and extra chemical reaction knowledge to empower the learned embeddings. However, prior works ignore three major issues in modeling reaction data, that is abnormal energy flow, ambiguous embeddings, and sparse embedding space problems. To address these problems, we propose ReaKE, a chemical synthetic knowledge graph-driven pre-training framework for molecular representation learning. We first construct a large-scale chemical synthetic knowledge graph comprising reactants, products and reaction rules. We then propose triplet-level and graph-level contrastive learning strategies to jointly optimize the knowledge graph and molecular embeddings. Representations learned by ReaKE can capture intermolecular relationships reflected in the semantic knowledge graph and molecular structures. By comparing with other state-of-the-art methods, we show that ReaKE can achieve competitive performance on the reaction prediction pretext task and the learned representations transfer well to various downstream tasks, including reaction classification, yield prediction, and molecule property prediction. Further visualization shows that the learned representations can capture the fine-grained differences both between reactions and between molecules.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/f92dfcdbefc4f815ed776e52d79419faa1714654.zip", "author": "Yi Wang;Shuangjia Zheng;Jiahua Rao;Yunan Luo;Yuedong Yang", "authorids": "~Yi_Wang30;~Shuangjia_Zheng2;~Jiahua_Rao1;~Yunan_Luo1;~Yuedong_Yang1", "gender": ";;;M;M", "homepage": ";https://scholar.google.com/citations?user=zeTuIZ4AAAAJ&hl=en;https://faculty.cc.gatech.edu/~yunan/;http://biomed.nscc-gz.cn;https://zhenglab.sjtu.edu.cn/", "dblp": ";244/2508;225/8950;98/2972;235/3743.html", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;zeTuIZ4AAAAJ;N8RBFoAAAAAJ;AfjwTKoAAAAJ;_7z2_9kAAAAJ", "orcid": ";0000-0002-6840-8198;0000-0001-7728-6412;0000-0002-6782-2813;0000-0001-9747-4285", "linkedin": ";;;;", "or_profile": "~Yi_Wang30;~Jiahua_Rao1;~Yunan_Luo1;~Yuedong_Yang1;~SHUANGJIA_ZHENG1", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;Georgia Institute of Technology;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;sysu.edu.cn;gatech.edu;sysu.edu.cn;sysu.edu.cn", "position": "MS student;PhD student;Assistant Professor;Full Professor;PhD student", "bibtex": "@misc{\nwang2023reake,\ntitle={Rea{KE}: Contrastive Molecular Representation Learning with Chemical Synthetic Knowledge Graph},\nauthor={Yi Wang and Shuangjia Zheng and Jiahua Rao and Yunan Luo and Yuedong Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=2aSj08z30A1}\n}", "github": "", "project": "", "reviewers": "EYxB;ToSM;oZTX;5dFJ", "site": "https://openreview.net/forum?id=2aSj08z30A1", "pdf_size": 1792847, "recommendation": "5;5;5;6", "confidence": "3;4;4;2", "correctness": "3;3;3;4", "technical_novelty": "3;2;2;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "35;47;85;116", "wc_strength_and_weaknesses": "340;280;101;39", "wc_clarity_quality_novelty_and_reproducibility": "28;95;383;40", "wc_summary_review": "31;71;311;214", "wc_review": "434;493;880;409", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "332;700;747;760", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 70.75, 31.98730216820418 ], "wc_strength_and_weaknesses_avg": [ 190.0, 123.8163963294038 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 136.5, 144.5415165272594 ], "wc_summary_review_avg": [ 156.75, 112.06778082928206 ], "wc_review_avg": [ 554.0, 190.6711829301953 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 634.75, 176.21205265247892 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8703882797784891, "corr_recommendation_correctness": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14021226188093107263&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Sun Yat-sen University;Georgia Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.sysu.edu.cn;https://www.gatech.edu", "aff_unique_abbr": "SYSU;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "LogicDP: Creating Labels for Graph Data via Inductive Logic Programming", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10944", "id": "2b2s9vd7wYv", "poster": "/media/PosterPDFs/ICLR%202023/10944.png?t=1681058077.4095426", "openreview": "https://openreview.net/forum?id=2b2s9vd7wYv", "slides": "https://iclr.cc/virtual/2023/poster/10944", "video": "https://iclr.cc/virtual/2023/poster/10944", "author_site": "Yuan Yang, Faramarz Fekri, James Kerce, Ali Payani", "tldr": "A data programming framework for generating training labels for graph data", "abstract": "Graph data, such as scene graphs and knowledge graphs, see wide use in AI systems. In real-world and large applications graph data are usually incomplete, motivating graph reasoning models for missing-fact or missing-relationship inference. While these models can achieve state-of-the-art performance, they require a large amount of training data.\n\nRecent years have witnessed the rising interest in label creation with data programming (DP) methods, which aim to generate training labels from heuristic labeling functions. However, existing methods typically focus on unstructured data and are not optimized for graphs. In this work, we propose LogicDP, a data programming framework for graph data. Unlike existing DP methods, (1) LogicDP utilizes the inductive logic programming (ILP) technique and automatically discovers the labeling functions from the graph data; (2) LogicDP employs a budget-aware framework to iteratively refine the functions by querying an oracle, which significantly reduces the human efforts in function creations. Experiments show that LogicDP achieves better data efficiency in both scene graph and knowledge graph reasoning tasks.", "keywords": "Data Programming;Graph Reasoning;Inductive Logic Programming", "primary_area": "", "supplementary_material": "", "author": "Yuan Yang;Faramarz Fekri;James Clayton Kerce;Ali Payani", "authorids": "~Yuan_Yang1;~Faramarz_Fekri1;~James_Clayton_Kerce1;~Ali_Payani1", "gender": "M;M;;M", "homepage": "https://gblackout.github.io/;http://Fekri.ece.gatech.edu;http://kerce.net;", "dblp": ";77/2313;305/7702;184/3921", "google_scholar": "Lt4tmL8AAAAJ;https://scholar.google.com/citations?hl=en;FPtjf0gAAAAJ;9rHwD8wAAAAJ", "orcid": ";;;0000-0003-4054-2958", "linkedin": ";;claytonkerce/;ali-payani-59267515", "or_profile": "~Yuan_Yang1;~Faramarz_Fekri1;~James_Clayton_Kerce1;~Ali_Payani1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Cisco", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;cisco.com", "position": "PhD student;Full Professor;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nyang2023logicdp,\ntitle={Logic{DP}: Creating Labels for Graph Data via Inductive Logic Programming},\nauthor={Yuan Yang and Faramarz Fekri and James Clayton Kerce and Ali Payani},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=2b2s9vd7wYv}\n}", "github": "", "project": "", "reviewers": "nzQK;1tSH;FEgy;M5YG", "pdf_size": 1075347, "recommendation": "3;5;6;8", "confidence": "4;4;3;3", "correctness": "2;4;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "85;117;84;26", "wc_strength_and_weaknesses": "192;152;199;44", "wc_clarity_quality_novelty_and_reproducibility": "74;21;423;36", "wc_summary_review": "68;49;18;67", "wc_review": "419;339;724;173", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1077;440;1062;75", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;2;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 78.0, 32.8252951243397 ], "wc_strength_and_weaknesses_avg": [ 146.75, 61.973280532823175 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 138.5, 165.38817974692145 ], "wc_summary_review_avg": [ 50.5, 20.22992832414391 ], "wc_review_avg": [ 413.75, 199.89419076101237 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 663.5, 426.0484127420263 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8320502943378437, "corr_recommendation_correctness": 0.39223227027636803, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hOeQfHKiNS4J:scholar.google.com/&scioq=LogicDP:+Creating+Labels+for+Graph+Data+via+Inductive+Logic+Programming&hl=en&as_sdt=0,44", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=2b2s9vd7wYv", "email": "gatech.edu;gatech.edu;gatech.edu;cisco.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Georgia Institute of Technology;Cisco Systems", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.cisco.com", "aff_unique_abbr": "Georgia Tech;Cisco", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "2bJ6Cqrd-a", "title": "Few-shot Lifelong Reinforcement Learning with Generalization Guarantees: An Empirical PAC-Bayes Approach", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We propose a new empirical PAC-Bayes approach to develop lifelong reinforcement learning algorithms with theoretical guarantees. The main idea is to extend the PAC-Bayes theory in supervised learning to the reinforcement learning regime. More specifically, we train a distribution of policies, and gradually improve the distribution parameters via optimizing the generalization error bound using trajectories from each task. As the agent sees more tasks, it elicits better prior distributions of policies, resulting in tighter generalization bounds and improved future learning. To demonstrate the superior performance of our method compared to recent state-of-the-art methods, we test the proposed algorithms on various OpenAI's Gym and Mujuco environments and show that they adapt to new tasks more efficiently by continuously distilling knowledge from past tasks.", "keywords": "Few-shot Learning;Lifelong Meta RL;Multi-Task RL;PAC-Bayes Bound;Generalization Error Bound", "primary_area": "", "supplementary_material": "/attachment/fe6f1e9fd5ceddfb76c4eb15e0e76b17bc90057b.zip", "author": "Zhi Zhang;Han Liu", "authorids": "~Zhi_Zhang1;~Han_Liu4", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": "O__axAoAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Zhi_Zhang1;~Han_Liu4", "aff": "University of California, Los Angeles;Northwestern University", "aff_domain": "ucla.edu;u.northwestern.edu", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nzhang2023fewshot,\ntitle={Few-shot Lifelong Reinforcement Learning with Generalization Guarantees: An Empirical {PAC}-Bayes Approach},\nauthor={Zhi Zhang and Han Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=2bJ6Cqrd-a}\n}", "github": "", "project": "", "reviewers": "sWqw;3bhb;7Fy7", "site": "https://openreview.net/forum?id=2bJ6Cqrd-a", "pdf_size": 1246695, "recommendation": "3;3;5", "confidence": "4;3;2", "correctness": "2;3;2", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;2", "wc_summary_paper": "170;82;56", "wc_strength_and_weaknesses": "219;347;160", "wc_clarity_quality_novelty_and_reproducibility": "109;109;108", "wc_summary_review": "102;59;25", "wc_review": "600;597;349", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 102.66666666666667, 48.780688346471244 ], "wc_strength_and_weaknesses_avg": [ 242.0, 78.05553578489271 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 108.66666666666667, 0.4714045207910317 ], "wc_summary_review_avg": [ 62.0, 31.506613062445584 ], "wc_review_avg": [ 515.3333333333334, 117.62180447896934 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QtrG23WNPWkJ:scholar.google.com/&scioq=Few-shot+Lifelong+Reinforcement+Learning+with+Generalization+Guarantees:+An+Empirical+PAC-Bayes+Approach&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of California, Los Angeles;Northwestern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucla.edu;https://www.northwestern.edu", "aff_unique_abbr": "UCLA;NU", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "2bhXOpq53RP", "title": "A Robust Stacking Framework for Training Deep Graph Models with Multifaceted Node Features", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Graph Neural Networks (GNNs) with numerical node features and graph structure as inputs have demonstrated superior performance on various supervised learning tasks with graph data. However the numerical node features utilized by GNNs are commonly extracted from raw data which is of text or tabular (numeric/categorical) type in most real-world applications. \nThe best models for such data types in most standard supervised learning settings with IID (non-graph) data are not simple neural network layers and thus are not easily incorporated into a GNN. Here we propose a robust stacking framework that fuses graph-aware propagation with arbitrary models intended for IID data, which are ensembled and stacked in multiple layers. Our layer-wise framework leverages bagging and stacking strategies to enjoy strong generalization, in a manner which effectively mitigates label leakage and overfitting. Across a variety of graph datasets with tabular/text node features, our method achieves comparable or superior performance relative to both tabular/text and graph neural network models, as well as existing state-of-the-art hybrid strategies that combine the two. ", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/52011cc7b01598db86eb6051e1cf638fdce0fa4c.zip", "author": "Jiuhai Chen;Jonas Mueller;Vassilis N. Ioannidis;Tom Goldstein;David Wipf", "authorids": "~Jiuhai_Chen1;~Jonas_Mueller1;~Vassilis_N._Ioannidis1;~Tom_Goldstein1;~David_Wipf1", "gender": "M;M;;M;M", "homepage": "https://www.linkedin.com/in/jiuhai-chen-6a486715a/;;https://scholar.google.com/citations?hl=en&user=mjmiI4sAAAAJ&view_op=list_works&authuser=1;https://www.cs.umd.edu/~tomg/;http://www.davidwipf.com/", "dblp": ";178/3250;;25/8184;81/6421", "google_scholar": ";HeVcLzAAAAAJ;;KmSuVtgAAAAJ;YJx1WSgAAAAJ", "orcid": ";;0000-0002-8367-0733;;", "linkedin": ";;;;", "or_profile": "~Jiuhai_Chen1;~Jonas_Mueller1;~Vassilis_N._Ioannidis1;~Tom_Goldstein1;~David_Wipf1", "aff": "University of Maryland, College Park;Cleanlab;Amazon Web Services;University of Maryland, College Park;Amazon AI Research Lab", "aff_domain": "umd.edu;cleanlab.ai;amazon.com;umd.edu;amazon.com", "position": "PhD student;Researcher;Applied Scientist II;Full Professor;Principal Research Scientist", "bibtex": "@misc{\nchen2023a,\ntitle={A Robust Stacking Framework for Training Deep Graph Models with Multifaceted Node Features},\nauthor={Jiuhai Chen and Jonas Mueller and Vassilis N. Ioannidis and Tom Goldstein and David Wipf},\nyear={2023},\nurl={https://openreview.net/forum?id=2bhXOpq53RP}\n}", "github": "", "project": "", "reviewers": "YZeR;Ufrm;Vp3V", "site": "https://openreview.net/forum?id=2bhXOpq53RP", "pdf_size": 451065, "recommendation": "3;3;5", "confidence": "3;3;3", "correctness": "2;2;4", "technical_novelty": "3;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "60;52;77", "wc_strength_and_weaknesses": "110;209;49", "wc_clarity_quality_novelty_and_reproducibility": "437;13;241", "wc_summary_review": "68;18;7", "wc_review": "675;292;374", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 63.0, 10.424330514074594 ], "wc_strength_and_weaknesses_avg": [ 122.66666666666667, 65.93094030035435 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 230.33333333333334, 173.2615235866162 ], "wc_summary_review_avg": [ 31.0, 26.54555832275273 ], "wc_review_avg": [ 447.0, 164.65924409721632 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8641455436724383704&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "University of Maryland;Cleanlab;Amazon", "aff_unique_dep": ";;Amazon Web Services", "aff_unique_url": "https://www/umd.edu;https://www.cleanlab.ai;https://aws.amazon.com", "aff_unique_abbr": "UMD;;AWS", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "2iKvo44-Bya", "title": "System Identification as a Reinforcement Learning Problem", "track": "main", "status": "Reject", "tldr": "System Identification as a Reinforcement Learning Problem", "abstract": "System identification, also known as learning forward models, transfer functions, system dynamics, etc., has a long tradition both in science and engineering in different fields. Particularly, it is a recurring theme in Reinforcement Learning research, where forward models approximate the state transition function of a Markov Decision Process by learning a mapping function from current state and action to the next state. This problem is commonly defined as a Supervised Learning problem in a direct way. This common approach faces several difficulties due to the inherent complexities of the dynamics to learn, for example, delayed effects, high non-linearity, non-stationarity, partial observability and, more important, error accumulation when using bootstrapped predictions (predictions based on past predictions), over large time horizons. Here we explore the use of Reinforcement Learning in this problem. We elaborate on why and how this problem fits naturally and sound as a Reinforcement Learning problem, and present some experimental results that demonstrate RL is a promising technique to solve these kind of problems.", "keywords": "System Identification;Reinforcement Learning;Offline Reinforcement Learning;Forward Models", "primary_area": "", "supplementary_material": "", "author": "Jose Antonio Martin H.;Oscar Fern\u00e1ndez Vicente;Sergio Perez;Anas Belfadil;Cristina Ibanez-Llano;Freddy Perozo;Jose Javier Valle;Javier Arechalde Pelaz", "authorids": "~Jose_Antonio_Martin_H.1;~Oscar_Fern\u00e1ndez_Vicente1;~Sergio_Perez1;~Anas_Belfadil1;~Cristina_Ibanez-Llano1;f.perozo@repsol.com;jjvallea@repsol.com;~Javier_Arechalde_Pelaz1", "gender": "M;M;;M;F;;;M", "homepage": "https://jamh-web.appspot.com/;https://www.linkedin.com/in/oscarfv;;;;;;https://www.linkedin.com/in/javier-arechalde", "dblp": "m/JoseAntonioMartinH;;;;;;;", "google_scholar": "0YHcAXsAAAAJ;;BVGuD7IAAAAJ;;;;;", "orcid": "0000-0002-0874-2194;;;;;;;", "linkedin": "https://es.linkedin.com/in/jamartinh;oscarfv;spmorillo;anas-belfadil/;cristina-ib%C3%A1%C3%B1ez-llano-a0242b25/;;;javier-arechalde", "or_profile": "~Jose_Antonio_Martin_H.1;~Oscar_Fern\u00e1ndez_Vicente1;~Sergio_Perez1;~Anas_Belfadil1;~Cristina_Ibanez-Llano1;f.perozo@repsol.com;jjvallea@repsol.com;~Javier_Arechalde_Pelaz1", "aff": "Repsol Technology Lab;Universidad Carlos III de Madrid;;Universidad Polit\u00e9cnica de Cataluna;;;;Repsol", "aff_domain": "repsol.com;uc3m.es;;upc.edu;;;;repsol.com", "position": "Principal Researcher;PhD student;;PhD student;;;;Researcher", "bibtex": "@misc{\nh.2023system,\ntitle={System Identification as a Reinforcement Learning Problem},\nauthor={Jose Antonio Martin H. and Oscar Fern{\\'a}ndez Vicente and Sergio Perez and Anas Belfadil and Cristina Ibanez-Llano and Freddy Perozo and Jose Javier Valle and Javier Arechalde Pelaz},\nyear={2023},\nurl={https://openreview.net/forum?id=2iKvo44-Bya}\n}", "github": "", "project": "", "reviewers": "C9nJ;xo1S;NoVW;wuwo", "site": "https://openreview.net/forum?id=2iKvo44-Bya", "pdf_size": 8763383, "recommendation": "1;3;5;6", "confidence": "4;5;4;4", "correctness": "2;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "87;43;240;40", "wc_strength_and_weaknesses": "222;274;248;166", "wc_clarity_quality_novelty_and_reproducibility": "388;69;107;21", "wc_summary_review": "19;50;39;40", "wc_review": "716;436;634;267", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "808;509;643;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;0", "recommendation_avg": [ 3.75, 1.920286436967152 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 102.5, 81.53680150704956 ], "wc_strength_and_weaknesses_avg": [ 227.5, 39.98437194704951 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 146.25, 142.86247757896402 ], "wc_summary_review_avg": [ 37.0, 11.247221879201993 ], "wc_review_avg": [ 513.25, 174.85333139520105 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 490.0, 302.07366651199504 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.75, 0.4330127018922193 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.22549380840084865, "corr_recommendation_correctness": 0.8268106308031117, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12651083148928387049&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Repsol;Universidad Carlos III de Madrid;Universitat Polit\u00e8cnica de Catalunya", "aff_unique_dep": "Technology Lab;;", "aff_unique_url": "https://www.repsol.com;https://www.uc3m.es;https://www.upc.edu", "aff_unique_abbr": "Repsol;UC3M;UPC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Spain" }, { "id": "2iu9NhxX23", "title": "Conceptual SCAN: Learning With and About Rules", "track": "main", "status": "Reject", "tldr": "", "abstract": "The ability to learn from a mix of rules and examples and to reflect on the learned abstractions is an important aspect of human intelligence. At the same time, there is a lack of benchmarks that systematically test for this ability, which makes it hard to evaluate the degree to which it is present in state-of-the-art ML architectures. We introduce a method to systematically construct such benchmarks by using an example structure that allows us to explicitly provide and ask about rules that are relevant for the given task. We present a simple dataset that is constructed according to this method, and we use it to analyze the performance of a variety of T5-based machine learning models. We identify four challenge areas in this setup: maintaining consistency between learned rules and their application, scaling to larger rule sets, compositional generalization, and dealing with limited training data.", "keywords": "reasoning;compositional generalization;rule learning;semantic parsing;consistency", "primary_area": "", "supplementary_material": "/attachment/69682f7bbf8d9ada56058c74027d3206b61c4eb3.zip", "author": "Nathan Scales;Nathanael Sch\u00e4rli;Abubakr Babiker;Yu-Han Liu;Mostafa Dehghani;Olivier Bousquet", "authorids": "~Nathan_Scales1;~Nathanael_Sch\u00e4rli1;~Abubakr_Babiker1;~Yu-Han_Liu1;~Mostafa_Dehghani1;~Olivier_Bousquet2", "gender": "M;M;;M;;M", "homepage": ";;;http://mostafadehghani.com/;;", "dblp": ";;160/2029;125/4062;;", "google_scholar": "64RoFnUAAAAJ;iy8RPhgAAAAJ;;https://scholar.google.nl/citations?user=MiHOX3QAAAAJ;;zIop5SgAAAAJ", "orcid": ";;;;;", "linkedin": "https://ch.linkedin.com/in/nathanael-sch%C3%A4rli-118a3984;https://gh.linkedin.com/in/abubakr-hassan;;;;", "or_profile": "~Nathanael_Sch\u00e4rli1;~Abubakr_Babiker1;~Yu-Han_Liu1;~Mostafa_Dehghani1;~Olivier_Bousquet2;~Nathan_K._S._Scales1", "aff": "Research, Google;;Google;Google DeepMind;Google;Google", "aff_domain": "research.google.com;;google.com;google.com;google.com;google.com", "position": "Researcher;;Researcher;Research Scientist;Software Engineer;Software Engineer", "bibtex": "@misc{\nscales2023conceptual,\ntitle={Conceptual {SCAN}: Learning With and About Rules},\nauthor={Nathan Scales and Nathanael Sch{\\\"a}rli and Abubakr Babiker and Yu-Han Liu and Mostafa Dehghani and Olivier Bousquet},\nyear={2023},\nurl={https://openreview.net/forum?id=2iu9NhxX23}\n}", "github": "", "project": "", "reviewers": "44yb;N6X6;qoQG;VbTf", "site": "https://openreview.net/forum?id=2iu9NhxX23", "pdf_size": 1953122, "recommendation": "3;5;6;6", "confidence": "4;4;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "29;30;113;49", "wc_strength_and_weaknesses": "9;213;153;550", "wc_clarity_quality_novelty_and_reproducibility": "40;27;899;158", "wc_summary_review": "127;34;87;60", "wc_review": "205;304;1252;817", "wc_reply_reviewers": "0;0;281;0", "wc_reply_authors": "1385;567;2018;1957", "reply_reviewers": "0;0;1;0", "reply_authors": "2;1;3;3", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 55.25, 34.28100786149672 ], "wc_strength_and_weaknesses_avg": [ 231.25, 198.40158139490723 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 281.0, 360.4337664536995 ], "wc_summary_review_avg": [ 77.0, 34.416565778706044 ], "wc_review_avg": [ 644.5, 420.6878296314263 ], "wc_reply_reviewers_avg": [ 70.25, 121.67656923171363 ], "wc_reply_authors_avg": [ 1481.75, 582.9997319896468 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Y4W3fcm9gssJ:scholar.google.com/&scioq=Conceptual+SCAN:+Learning+With+and+About+Rules&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Research", "aff_unique_url": "https://research.google", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "2jcvy1htS_r", "title": "A Hierarchical Bayesian Approach to Federated Learning", "track": "main", "status": "Reject", "tldr": "We propose a novel hierarchical Bayesian approach to Federated learning (FL) where the block-coordinate descent solution to the variational inference leads to a viable algorithm for FL with proved convergence and generalisation guarantee.", "abstract": "We propose a novel hierarchical Bayesian approach to Federated learning (FL), where our models reasonably describe the generative process of clients' local data via hierarchical Bayesian modeling: constituting random variables of local models for clients that are governed by a higher-level global variate. Interestingly, the variational inference in our Bayesian model leads to an optimization problem whose block-coordinate descent solution becomes a distributed algorithm that is separable over clients and allows them not to reveal their own private data at all, thus fully compatible with FL. We also highlight that our block-coordinate algorithm has particular forms that subsume the well-known FL algorithms including Fed-Avg and Fed-Prox as special cases. That is, we not only justify the previous Fed-Avg and Fed-Prox algorithms whose learning protocols look intuitive but theoretically less underpinned, but also generalise them even further via principled Bayesian approaches. Beyond introducing novel modeling and derivations, we also offer convergence analysis showing that our block-coordinate FL algorithm converges to an (local) optimum of the objective at the rate of $O(1/\\sqrt{t})$, the same rate as regular (centralised) SGD, as well as the generalisation error analysis where we prove that the test error of our model on unseen data is guaranteed to vanish as we increase the training data size, thus asymptotically optimal.", "keywords": "Federated Learning;Bayesian Methods;Probabilistic Models", "primary_area": "", "supplementary_material": "/attachment/4564d1c26d5ae0768cfdceb1a2986c2926d53938.zip", "author": "Minyoung Kim;Timothy Hospedales", "authorids": "~Minyoung_Kim2;~Timothy_Hospedales1", "gender": "M;M", "homepage": "https://sites.google.com/site/mikim21/;http://homepages.inf.ed.ac.uk/thospeda/", "dblp": ";32/3545", "google_scholar": ";https://scholar.google.fr/citations?user=nHhtvqkAAAAJ", "orcid": ";0000-0003-4867-7486", "linkedin": ";timothyhospedales/", "or_profile": "~Minyoung_Kim2;~Timothy_Hospedales1", "aff": "Samsung AI Center, Cambridge, UK;Samsung AI Research Centre", "aff_domain": "samsung.com;samsung.com", "position": "Senior Researcher;Principal Researcher", "bibtex": "@misc{\nkim2023a,\ntitle={A Hierarchical Bayesian Approach to Federated Learning},\nauthor={Minyoung Kim and Timothy Hospedales},\nyear={2023},\nurl={https://openreview.net/forum?id=2jcvy1htS_r}\n}", "github": "", "project": "", "reviewers": "FDT8;7vmX;SHVY;UL1w", "site": "https://openreview.net/forum?id=2jcvy1htS_r", "pdf_size": 747720, "recommendation": "5;5;6;6", "confidence": "3;3;3;3", "correctness": "3;3;4;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "240;265;58;101", "wc_strength_and_weaknesses": "117;479;104;235", "wc_clarity_quality_novelty_and_reproducibility": "154;61;10;25", "wc_summary_review": "25;33;54;48", "wc_review": "536;838;226;409", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1013;1361;243;169", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 166.0, 88.26947377208046 ], "wc_strength_and_weaknesses_avg": [ 233.75, 150.51141983251637 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.5, 55.98437281956457 ], "wc_summary_review_avg": [ 40.0, 11.554220008291344 ], "wc_review_avg": [ 502.25, 222.97799779350427 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 696.5, 506.3721457584333 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11891154169260934318&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Samsung", "aff_unique_dep": "AI Center", "aff_unique_url": "https://www.samsung.com/global/research-innovation/ai-research/", "aff_unique_abbr": "SAC", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;South Korea" }, { "id": "2lbtqs4enl", "title": "Optimising 2D Pose Representation: Improving Accuracy, Stability and Generalisability inUnsupervised 2D-3D Human Pose Estimation", "track": "main", "status": "Reject", "tldr": "Investigating how the representation of a 2D pose can effect the 3D ordinate predictions during the unsupervised adversarial 2D-3D lifting cycle.", "abstract": "This paper addresses the problem of 2D pose representation during unsupervised 2D to 3D pose lifting to improve the accuracy, stability and generalisability of 3D human pose estimation (HPE) models. All unsupervised 2D-3D HPE approaches provide the entire 2D kinematic skeleton to a model during training. We argue that this is sub-optimal and disruptive as long-range correlations are induced between independent 2D key points and predicted 3D ordinates during training. To this end, we conduct the following study. With a maximum architecture capacity of 6 residual blocks, we evaluate the performance of 5 models which each represent a 2D pose differently during the adversarial unsupervised 2D-3D HPE process. Additionally, we show the correlations between 2D key points which are learned during the training process, highlighting the unintuitive correlations induced when an entire 2D pose is provided to a lifting model. Our results show that the most optimal representation of a 2D pose is that of two independent segments, the torso and legs, with no shared features between each lifting network. This approach decreased the average error by 20% on the Human3.6M dataset when compared to a model with a near identical parameter count trained on the entire 2D kinematic skeleton. Furthermore, due to the complex nature of adversarial learning, we show how this representation can also improve convergence during training allowing for an optimum result to be obtained more often.", "keywords": "Unsupervised Learning;3D Human Pose Estimation;Data Representation;Adversarial Learning", "primary_area": "", "supplementary_material": "/attachment/426607471e01371043de6d87af3d8f39b12377bc.zip", "author": "Peter Timothy David Hardy;Srinandan Dasmahapatra;Hansung Kim", "authorids": "~Peter_Timothy_David_Hardy1;~Srinandan_Dasmahapatra1;~Hansung_Kim1", "gender": "M;;M", "homepage": ";;", "dblp": ";64/5025;44/4871", "google_scholar": "https://scholar.google.co.uk/citations?user=z154GbsAAAAJ;https://scholar.google.co.uk/citations?user=4FVYygkAAAAJ;https://scholar.google.co.uk/citations?user=frG8WZAAAAAJ", "orcid": "my-orcid?orcid=0000-0002-7682-2110;;0000-0003-4907-0491", "linkedin": "peter-hardy-0041ab121/;;hansung-kim-03168619/", "or_profile": "~Peter_Timothy_David_Hardy1;~Srinandan_Dasmahapatra1;~Hansung_Kim1", "aff": "University of Southampton;University of Southampton;University of Southampton", "aff_domain": "soton.ac.uk;soton.ac.uk;soton.ac.uk", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@misc{\nhardy2023optimising,\ntitle={Optimising 2D Pose Representation: Improving Accuracy, Stability and Generalisability inUnsupervised 2D-3D Human Pose Estimation},\nauthor={Peter Timothy David Hardy and Srinandan Dasmahapatra and Hansung Kim},\nyear={2023},\nurl={https://openreview.net/forum?id=2lbtqs4enl}\n}", "github": "", "project": "", "reviewers": "QAQe;rPD4;cAex;p3BW;2qG2", "site": "https://openreview.net/forum?id=2lbtqs4enl", "pdf_size": 647178, "recommendation": "3;5;5;5;8", "confidence": "4;4;4;5;1", "correctness": "4;3;3;4;4", "technical_novelty": "2;3;2;3;4", "empirical_novelty": "2;2;3;0;0", "wc_summary_paper": "122;85;71;57;1", "wc_strength_and_weaknesses": "120;105;414;157;1", "wc_clarity_quality_novelty_and_reproducibility": "14;8;53;1;1", "wc_summary_review": "20;46;54;53;1", "wc_review": "276;244;592;268;4", "wc_reply_reviewers": "0;0;0;138;0", "wc_reply_authors": "0;385;814;324;0", "reply_reviewers": "0;0;0;1;0", "reply_authors": "0;1;1;1;0", "recommendation_avg": [ 5.2, 1.6 ], "confidence_avg": [ 3.6, 1.3564659966250536 ], "correctness_avg": [ 3.6, 0.4898979485566356 ], "technical_novelty_avg": [ 2.8, 0.7483314773547882 ], "empirical_novelty_avg": [ 1.4, 1.2 ], "wc_summary_paper_avg": [ 67.2, 39.54946270178648 ], "wc_strength_and_weaknesses_avg": [ 159.4, 137.42576177704092 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 15.4, 19.417517864031954 ], "wc_summary_review_avg": [ 34.8, 20.913153755471697 ], "wc_review_avg": [ 276.8, 187.04266892877678 ], "wc_reply_reviewers_avg": [ 27.6, 55.2 ], "wc_reply_authors_avg": [ 304.6, 300.62973904788595 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 0.6, 0.48989794855663565 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.7925005143325722, "corr_recommendation_correctness": 0.10206207261596574, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8196xeSZRJsJ:scholar.google.com/&scioq=Optimising+2D+Pose+Representation:+Improving+Accuracy,+Stability+and+Generalisability+inUnsupervised+2D-3D+Human+Pose+Estimation&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Southampton", "aff_unique_dep": "", "aff_unique_url": "https://www.southampton.ac.uk", "aff_unique_abbr": "Southampton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "2lrx543-MbS", "title": "Improving Generalization of Motor-Imagery Brainwave Decoding via Dynamic Convolutions", "track": "main", "status": "Withdraw", "tldr": "Tackling inter-subject variability using dynamic convolutions and causal reasoning", "abstract": "Deep Convolutional Neural Networks (CNNs) have recently demonstrated impressive results in electroencephalogram (EEG) decoding for several Brain-Computer Interfaces (BCI) paradigms, including Motor-Imagery (MI). However, neurophysiological processes underpinning EEG signals vary across subjects causing covariate shifts in data distributions and hence hindering the generalization of deep models across subjects. In this paper, we aim to address the challenge of inter-subject variability in MI. To this end, we employ causal reasoning to characterize all possible distribution shifts in the MI task and propose a dynamic convolution framework to account for shifts caused by the inter-subject variability. Using publicly available MI datasets, we demonstrate improved generalization performance across subjects in various MI tasks for four well-established deep architectures.", "keywords": "Brain-Computer Interfaces;Dynamic Convolution;Causality", "primary_area": "", "supplementary_material": "", "author": "Konstantinos Barmpas;Yannis Panagakis;Stylianos Bakas;Dimitrios Adamos;Nikolaos Laskaris;Stefanos Zafeiriou", "authorids": "~Konstantinos_Barmpas1;~Yannis_Panagakis1;~Stylianos_Bakas1;~Dimitrios_Adamos1;laskaris@csd.auth.gr;~Stefanos_Zafeiriou1", "gender": "M;;;M;;M", "homepage": "https://www.barmpas.com;;;https://www.imperial.ac.uk/people/d.adamos;;http://www.imperial.ac.uk/people/s.zafeiriou/", "dblp": "313/3029;;;01/4925;;25/1885.html", "google_scholar": "JkRlsiQAAAAJ;;K36GuxEAAAAJ;Y4GWFzIAAAAJ;;QKOH5iYAAAAJ", "orcid": "0000-0001-6724-3689;;;;;", "linkedin": "konstantinos-barmpas/;;;dimitriosadamos/;;", "or_profile": "~Konstantinos_Barmpas1;~Yannis_Panagakis1;~Stylianos_Bakas1;~Dimitrios_Adamos1;laskaris@csd.auth.gr;~Stefanos_Zafeiriou1", "aff": "Imperial College London, Imperial College London;;Aristotle University of Thessaloniki;Cogitat Ltd;;Imperial College London", "aff_domain": "imperial.ac.uk;;auth.gr;cogitat.io;;ic.ac.uk", "position": "PhD student;;PhD student;Cofounder and CTO;;Full Professor", "bibtex": "@misc{\nbarmpas2023improving,\ntitle={Improving Generalization of Motor-Imagery Brainwave Decoding via Dynamic Convolutions},\nauthor={Konstantinos Barmpas and Yannis Panagakis and Stylianos Bakas and Dimitrios Adamos and Nikolaos Laskaris and Stefanos Zafeiriou},\nyear={2023},\nurl={https://openreview.net/forum?id=2lrx543-MbS}\n}", "github": "", "project": "", "reviewers": "diqh;MRAe;ramo", "site": "https://openreview.net/forum?id=2lrx543-MbS", "pdf_size": 737243, "recommendation": "1;5;5", "confidence": "5;4;3", "correctness": "2;3;3", "technical_novelty": "1;3;3", "empirical_novelty": "1;3;3", "wc_summary_paper": "114;206;155", "wc_strength_and_weaknesses": "270;115;112", "wc_clarity_quality_novelty_and_reproducibility": "33;576;6", "wc_summary_review": "49;73;28", "wc_review": "466;970;301", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 1.8856180831641267 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_paper_avg": [ 158.33333333333334, 37.63272807307786 ], "wc_strength_and_weaknesses_avg": [ 165.66666666666666, 73.78497287539125 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 205.0, 262.56808640807816 ], "wc_summary_review_avg": [ 50.0, 18.384776310850235 ], "wc_review_avg": [ 579.0, 284.56633673011993 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.8660254037844385, "corr_recommendation_correctness": 0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wahleHNMGfEJ:scholar.google.com/&scioq=Improving+Generalization+of+Motor-Imagery+Brainwave+Decoding+via+Dynamic+Convolutions&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Imperial College London;Aristotle University of Thessaloniki;Cogitat Ltd", "aff_unique_dep": ";;", "aff_unique_url": "https://www.imperial.ac.uk;https://www.auth.gr;", "aff_unique_abbr": "ICL;AUTH;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Thessaloniki", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;Greece" }, { "title": "Lower Bounds on the Depth of Integral ReLU Neural Networks via Lattice Polytopes", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11246", "id": "2mvALOAWaxY", "poster": "", "openreview": "https://openreview.net/forum?id=2mvALOAWaxY", "slides": "https://iclr.cc/virtual/2023/poster/11246", "video": "https://iclr.cc/virtual/2023/poster/11246", "author_site": "Christian Haase, Christoph Hertrich, Georg Loho", "tldr": "We derive lower bounds on the depth of integral ReLU neural networks using volume arguments for lattice polytopes arising from connections to tropical geometry.", "abstract": "We prove that the set of functions representable by ReLU neural networks with integer weights strictly increases with the network depth while allowing arbitrary width. More precisely, we show that $\\lceil\\log_2(n)\\rceil$ hidden layers are indeed necessary to compute the maximum of $n$ numbers, matching known upper bounds. Our results are based on the known duality between neural networks and Newton polytopes via tropical geometry. The integrality assumption implies that these Newton polytopes are lattice polytopes. Then, our depth lower bounds follow from a parity argument on the normalized volume of faces of such polytopes.", "keywords": "Rectified Linear Unit;Neural Network Expressivity;Neural Network Depth;Lattice Polytope;Normalized Volume", "primary_area": "", "supplementary_material": "", "author": "Christian Alexander Haase;Christoph Hertrich;Georg Loho", "authorids": "~Christian_Alexander_Haase1;~Christoph_Hertrich1;~Georg_Loho1", "gender": ";;", "homepage": ";https://christophhertrich.gitlab.io;https://lohomath.github.io/", "dblp": ";234/8939;178/6497", "google_scholar": ";bbMbGU4AAAAJ;", "orcid": "0000-0003-4078-0913;0000-0001-5646-8567;0000-0001-6500-385X", "linkedin": ";;", "or_profile": "~Christian_Alexander_Haase1;~Christoph_Hertrich1;~Georg_Loho1", "aff": "Freie Universit\u00e4t Berlin;London School of Economics and Political Science;University of Twente", "aff_domain": "fu-berlin.de;lse.ac.uk;utwente.nl", "position": "Associate Professor;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nhaase2023lower,\ntitle={Lower Bounds on the Depth of Integral Re{LU} Neural Networks via Lattice Polytopes},\nauthor={Christian Alexander Haase and Christoph Hertrich and Georg Loho},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=2mvALOAWaxY}\n}", "github": "", "project": "", "reviewers": "iGLm;TovE;U42b;dCJW", "pdf_size": 288202, "recommendation": "6;6;8;8", "confidence": "4;3;2;4", "correctness": "3;4;4;3", "technical_novelty": "3;3;4;3", "empirical_novelty": "0;0;0;0", "wc_summary_paper": "55;90;133;87", "wc_strength_and_weaknesses": "257;230;127;161", "wc_clarity_quality_novelty_and_reproducibility": "40;25;29;124", "wc_summary_review": "30;38;19;53", "wc_review": "382;383;308;425", "wc_reply_reviewers": "0;67;49;28", "wc_reply_authors": "675;433;267;357", "reply_reviewers": "0;1;1;1", "reply_authors": "2;1;2;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 91.25, 27.73422975314079 ], "wc_strength_and_weaknesses_avg": [ 193.75, 52.06426317542581 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.5, 40.5 ], "wc_summary_review_avg": [ 35.0, 12.389511693363866 ], "wc_review_avg": [ 374.5, 42.13371571556442 ], "wc_reply_reviewers_avg": [ 36.0, 24.9499498997493 ], "wc_reply_authors_avg": [ 433.0, 151.57176518072222 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.30151134457776363, "corr_recommendation_correctness": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14036954026812140199&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=2mvALOAWaxY", "email": "fu-berlin.de;lse.ac.uk;utwente.nl", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Freie Universit\u00e4t Berlin;London School of Economics and Political Science;University of Twente", "aff_unique_dep": ";;", "aff_unique_url": "https://www.fu-berlin.de;https://www.lse.ac.uk;https://www.utwente.nl", "aff_unique_abbr": "FU Berlin;LSE;UT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Germany;United Kingdom;Netherlands" }, { "title": "Versatile Neural Processes for Learning Implicit Neural Representations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11651", "id": "2nLeOOfAjK", "poster": "/media/PosterPDFs/ICLR%202023/11651.png?t=1682549046.4924412", "openreview": "https://openreview.net/forum?id=2nLeOOfAjK", "slides": "https://iclr.cc/virtual/2023/poster/11651", "video": "https://iclr.cc/virtual/2023/poster/11651", "author_site": "Zongyu Guo, Cuiling Lan, Zhizheng Zhang, Yan Lu, Zhibo Chen", "tldr": "We propose a new neural process framework for efficient learning of the implicit neural representations w.r.t. various signals, including complex 3D scenes.", "abstract": "Representing a signal as a continuous function parameterized by neural network (a.k.a. Implicit Neural Representations, INRs) has attracted increasing attention in recent years. Neural Processes (NPs), which model the distributions over functions conditioned on partial observations (context set), provide a practical solution for fast inference of continuous functions. However, existing NP architectures suffer from inferior modeling capability for complex signals. In this paper, we propose an efficient NP framework dubbed Versatile Neural Processes (VNP), which largely increases the capability of approximating functions. Specifically, we introduce a bottleneck encoder that produces fewer and informative context tokens, relieving the high computational cost while providing high modeling capability. At the decoder side, we hierarchically learn multiple global latent variables that jointly model the global structure and the uncertainty of a function, enabling our model to capture the distribution of complex signals. We demonstrate the effectiveness of the proposed VNP on a variety of tasks involving 1D, 2D and 3D signals. Particularly, our method shows promise in learning accurate INRs w.r.t. a 3D scene without further finetuning.", "keywords": "Implicit Neural Representations;Neural Processes;Variational Inference", "primary_area": "", "supplementary_material": "/attachment/2b408e1c18ec2f0d929034afc4998793c4b9bfa1.zip", "author": "Zongyu Guo;Cuiling Lan;Zhizheng Zhang;Yan Lu;Zhibo Chen", "authorids": "~Zongyu_Guo1;~Cuiling_Lan1;~Zhizheng_Zhang1;~Yan_Lu7;~Zhibo_Chen1", "gender": "M;F;M;M;M", "homepage": ";https://www.microsoft.com/en-us/research/people/culan/;;https://www.microsoft.com/en-us/research/people/yanlu/;https://faculty.ustc.edu.cn/chenzhibo", "dblp": "247/4138;95/8115;67/4758;15/4830-1;54/6561.html", "google_scholar": "paus9RMAAAAJ;XZugqiwAAAAJ;X7M0I8kAAAAJ;djk5l-4AAAAJ;1ayDJfsAAAAJ", "orcid": ";0000-0001-9145-9957;;0000-0001-5383-6424;", "linkedin": ";;;;", "or_profile": "~Zongyu_Guo1;~Cuiling_Lan1;~Zhizheng_Zhang1;~Yan_Lu7;~Zhibo_Chen1", "aff": "University of Science and Technology of China;Microsoft;Microsoft Research;Microsoft Research Asia;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;microsoft.com;microsoft.com;microsoft.com;ustc.edu.cn", "position": "PhD student;Principal Researcher;Senior Researcher;Partner Research Manager;Full Professor", "bibtex": "@inproceedings{\nguo2023versatile,\ntitle={Versatile Neural Processes for Learning Implicit Neural Representations},\nauthor={Zongyu Guo and Cuiling Lan and Zhizheng Zhang and Yan Lu and Zhibo Chen},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=2nLeOOfAjK}\n}", "github": "", "project": "", "reviewers": "HSYx;L3M4;vtKS;vL4F", "pdf_size": 1172662, "recommendation": "6;6;8;8", "confidence": "3;3;3;3", "correctness": "3;3;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "80;114;157;81", "wc_strength_and_weaknesses": "293;264;210;154", "wc_clarity_quality_novelty_and_reproducibility": "34;471;64;36", "wc_summary_review": "76;34;57;22", "wc_review": "483;883;488;293", "wc_reply_reviewers": "27;28;0;15", "wc_reply_authors": "903;952;241;349", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 108.0, 31.424512724941337 ], "wc_strength_and_weaknesses_avg": [ 230.25, 53.152492886034985 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 151.25, 184.98834422741342 ], "wc_summary_review_avg": [ 47.25, 20.825165065372232 ], "wc_review_avg": [ 536.75, 214.80732645792136 ], "wc_reply_reviewers_avg": [ 17.5, 11.324751652906125 ], "wc_reply_authors_avg": [ 611.25, 319.01753478453185 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2083912454284129338&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=2nLeOOfAjK", "email": "ustc.edu.cn;microsoft.com;microsoft.com;microsoft.com;ustc.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "University of Science and Technology of China;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "http://www.ustc.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "USTC;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "China;United States" }, { "title": "KnowDA: All-in-One Knowledge Mixture Model for Data Augmentation in Low-Resource NLP", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11624", "id": "2nocgE1m0A", "poster": "", "openreview": "https://openreview.net/forum?id=2nocgE1m0A", "slides": "https://iclr.cc/virtual/2023/poster/11624", "video": "https://iclr.cc/virtual/2023/poster/11624", "author_site": "Yufei Wang, Jiayi Zheng, Can Xu, Xiubo Geng, Tao Shen, Chongyang Tao, Daxin Jiang", "tldr": "We propose a Knowledge Mixture Data Augmentation Model (KnowDA) that is trained with diverse NLP task knowledge. KnowDA could generate additional synthetic data to improve model performance in various low-resource NLP tasks.", "abstract": "This paper focuses on data augmentation for low-resource NLP tasks where the training set is limited. The existing solutions either leverage task-independent heuristic rules (e.g., Synonym Replacement) or fine-tune general-purpose pre-trained language models (e.g., GPT2) using the limited training instances to produce new synthetic data. Consequently, they have trivial task-specific knowledge and are limited to yielding low-quality synthetic data. To combat this issue, we propose Knowledge Mixture Data Augmentation Model (KnowDA), a Seq2Seq language model pretrained on a mixture of diverse NLP tasks under a novel framework of Knowledge Mixture Training (KoMT). The goal of KoMT is to condense diverse NLP task-specific knowledge into the single KnowDA model\n(i.e., all-in-one). The resulting KnowDA could utilize these knowledge to quickly grasp the inherent synthesis law of the target task through limited training instances. Specifically, KoMT reformulates input examples from various heterogeneous NLP tasks into a unified text-to-text format and employs denoising training objectives in different granularity to learn to reconstruct partial or complete samples. To the best of our knowledge, we are the first to attempt to apply 100+ NLP multi-task training for data augmentation. Extensive experiments show that i) the synthetic data produced by KnowDA successfully improves the performance of the strong pre-trained language\nmodels (i.e., Bert, ALBert and Deberta) by a large margin on the low-resource NLP benchmark FewGLUE, CoNLL\u201903 and WikiAnn; ii) KnowDA successful transfer the task knowledge to NLP tasks whose types are seen and unseen in KoMT.", "keywords": "Data Augmentation;Low-Resource NLP", "primary_area": "", "supplementary_material": "/attachment/206863e9c0ffabe16536994c0ab44f2f0fd5ab5f.zip", "author": "Yufei Wang;Jiayi Zheng;Can Xu;Xiubo Geng;Tao Shen;Chongyang Tao;Daxin Jiang", "authorids": "~Yufei_Wang7;~Jiayi_Zheng1;~Can_Xu2;~Xiubo_Geng2;~Tao_Shen1;~Chongyang_Tao1;~Daxin_Jiang2", "gender": "M;M;M;F;M;M;M", "homepage": "https://garyyufei.github.io/;;;https://xiubo0211.github.io/;;;https://www.microsoft.com/en-us/research/people/djiang/", "dblp": "61/5568-3;243/2336;;19/189;95/4097-1;;77/5094", "google_scholar": "gFoSqqkAAAAJ;;5aiE_NcAAAAJ;XxeX3FgAAAAJ;https://scholar.google.com.au/citations?user=SegyX9AAAAAJ;x_cOKuwAAAAJ;N-wAHCoAAAAJ", "orcid": ";;0000-0002-1949-5715;;;;", "linkedin": "garyyufei/;;;;;;", "or_profile": "~Yufei_Wang7;~Jiayi_Zheng1;~Can_Xu2;~Xiubo_Geng2;~Tao_Shen1;~Chongyang_Tao1;~Daxin_Jiang2", "aff": "Huawei Technologies Ltd.;Peking University;Microsoft;Microsoft;University of Technology Sydney;Microsoft;Microsoft", "aff_domain": "huawei.com;pku.edu.cn;microsoft.com;microsoft.com;uts.edu.au;microsoft.com;microsoft.com", "position": "Researcher;MS student;Researcher;Researcher;Postdoc;Researcher;Researcher/Scientist", "bibtex": "@inproceedings{\nwang2023knowda,\ntitle={Know{DA}: All-in-One Knowledge Mixture Model for Data Augmentation in Low-Resource {NLP}},\nauthor={Yufei Wang and Jiayi Zheng and Can Xu and Xiubo Geng and Tao Shen and Chongyang Tao and Daxin Jiang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=2nocgE1m0A}\n}", "github": "", "project": "", "reviewers": "CnYA;U7D8;Fev8", "pdf_size": 1320811, "recommendation": "6;6;8", "confidence": "4;4;3", "correctness": "3;3;4", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "248;130;202", "wc_strength_and_weaknesses": "200;329;246", "wc_clarity_quality_novelty_and_reproducibility": "21;21;346", "wc_summary_review": "38;59;90", "wc_review": "507;539;884", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 193.33333333333334, 48.56153027059817 ], "wc_strength_and_weaknesses_avg": [ 258.3333333333333, 53.38122849425213 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 129.33333333333334, 153.20646925708527 ], "wc_summary_review_avg": [ 62.333333333333336, 21.359359124801056 ], "wc_review_avg": [ 643.3333333333334, 170.67773401615364 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.9999999999999998, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7936158195659195059&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=2nocgE1m0A", "email": "huawei.com;pku.edu.cn;microsoft.com;microsoft.com;uts.edu.au;microsoft.com;microsoft.com", "author_num": 7, "aff_unique_index": "0;1;2;2;3;2;2", "aff_unique_norm": "Huawei;Peking University;Microsoft;University of Technology Sydney", "aff_unique_dep": "Huawei Technologies;;Microsoft Corporation;", "aff_unique_url": "https://www.huawei.com;http://www.pku.edu.cn;https://www.microsoft.com;https://www.uts.edu.au", "aff_unique_abbr": "Huawei;Peking U;Microsoft;UTS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;2;1;1", "aff_country_unique": "China;United States;Australia" }, { "id": "2outcw5N9wH", "title": "Safer Reinforcement Learning with Counterexample-guided Offline Training", "track": "main", "status": "Reject", "tldr": "", "abstract": "Safe reinforcement learning (RL) aims at addressing the limitation of reinforcement learning in safety-critical scenarios, where failures during learning may incur high costs. Several methods exist to incorporate external knowledge or to use proximal sensor data to limit the exploration of unsafe states. However, dealing with (partially) unknown environments and dynamics, where an agent must discover safety threats during exploration, remains challenging. In this paper, we propose a method to abstract hybrid continuous-discrete systems into compact surrogate models representing the safety-relevant knowledge acquired by the agent at any time during exploration. We exploit probabilistic counterexamples generation to synthesise minimal, partial simulation environments from the surrogate model where the agent can train offline to produce heuristic strategies to minimise the risk of visiting unsafe states during subsequent online exploration. We demonstrate our method's effectiveness in increasing the agent's exploration safety on a selection of OpenAI Gym benchmarks. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Xiaotong Ji;Antonio Filieri", "authorids": "~Xiaotong_Ji1;~Antonio_Filieri1", "gender": ";M", "homepage": ";https://antonio.filieri.name", "dblp": "258/8352;", "google_scholar": ";https://scholar.google.com.tw/citations?user=MdbPRwMAAAAJ", "orcid": "0000-0002-2821-8058;0000-0001-9646-646X", "linkedin": ";", "or_profile": "~Xiaotong_Ji1;~Antonio_Filieri1", "aff": "Imperial College London;Amazon AWS", "aff_domain": "ic.ac.uk;amazon.com", "position": "PhD student;Researcher", "bibtex": "@misc{\nji2023safer,\ntitle={Safer Reinforcement Learning with Counterexample-guided Offline Training},\nauthor={Xiaotong Ji and Antonio Filieri},\nyear={2023},\nurl={https://openreview.net/forum?id=2outcw5N9wH}\n}", "github": "", "project": "", "reviewers": "4QqX;xUpK;QNmG;noqT", "site": "https://openreview.net/forum?id=2outcw5N9wH", "pdf_size": 2046092, "recommendation": "3;3;3;3", "confidence": "3;4;3;2", "correctness": "4;2;2;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "2;2;1;2", "wc_summary_paper": "26;125;71;73", "wc_strength_and_weaknesses": "563;250;357;83", "wc_clarity_quality_novelty_and_reproducibility": "24;145;213;31", "wc_summary_review": "34;68;32;147", "wc_review": "647;588;673;334", "wc_reply_reviewers": "0;0;0;110", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 73.75, 35.0526389876711 ], "wc_strength_and_weaknesses_avg": [ 313.25, 174.14415723761735 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 103.25, 79.51218460085221 ], "wc_summary_review_avg": [ 70.25, 46.56380031741396 ], "wc_review_avg": [ 560.5, 134.34749718547047 ], "wc_reply_reviewers_avg": [ 27.5, 47.63139720814412 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:P9JWQC4IXM8J:scholar.google.com/&scioq=Safer+Reinforcement+Learning+with+Counterexample-guided+Offline+Training&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Imperial College London;Amazon", "aff_unique_dep": ";Amazon Web Services", "aff_unique_url": "https://www.imperial.ac.uk;https://aws.amazon.com", "aff_unique_abbr": "ICL;AWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "2ppuWD3dkie", "title": "SimST: A GNN-Free Spatio-Temporal Learning Framework for Traffic Forecasting", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Traffic forecasting is a crucial and challenging problem in smart city efforts. Spatio-Temporal Graph Neural Networks (STGNNs) have demonstrated great promise and become the de facto solution in this field. While successful, they require the message passing scheme of GNNs to construct spatial dependencies between nodes, and thus inevitably inherit the notorious inefficiency of GNNs. Given these facts, in this paper, we propose a simple yet effective GNN-free spatio-temporal learning framework, entitled SimST. Specifically, our framework replaces GNNs with two feasible and efficient spatial context injectors, which provide proximity and position information, respectively. SimST is also compatible with various temporal encoding backbones and involves a tailored training strategy. We conduct extensive experiments on five popular traffic benchmarks to assess the capability of SimST in terms of effectiveness and efficiency. Experimental results show that such a simple baseline performs surprisingly well. Using much fewer parameters, SimST not only achieves comparable or better performance than more sophisticated state-of-the-art STGNNs, but also obtains substantial throughput improvements.", "keywords": "Traffic Forecasting;Spatio-Temporal Graph Neural Networks", "primary_area": "", "supplementary_material": "/attachment/22dda8c6a2c82d39b27c06b6a4b8780376b3045f.zip", "author": "Xu Liu;Yuxuan Liang;Chao Huang;Hengchang Hu;Yushi Cao;Bryan Hooi;Roger Zimmermann", "authorids": "~Xu_Liu9;~Yuxuan_Liang1;~Chao_Huang7;~Hengchang_Hu1;~Yushi_Cao1;~Bryan_Hooi1;~Roger_Zimmermann1", "gender": ";M;M;M;;;M", "homepage": ";https://yuxuanliang.com;;https://holdenhu.github.io/;;http://bhooi.github.io;https://www.comp.nus.edu.sg/cs/bio/rogerz/", "dblp": "93/3167-14;183/0977;;305/9820;274/2297;169/9975;79/1490", "google_scholar": "JTzLTycAAAAJ;n9cODgcAAAAJ;Zkv9FqwAAAAJ;;y8SqtE4AAAAJ;;https://scholar.google.com.tw/citations?user=IDREwXEAAAAJ", "orcid": "0000-0003-2708-0584;0000-0003-2817-7337;;;;0000-0002-5645-1754;0000-0002-7410-2590", "linkedin": "liuxu-187825160/;yoshall/;;;;;roger-zimmermann-76b56b6/", "or_profile": "~Xu_Liu9;~Yuxuan_Liang1;~Chao_Huang7;~Hengchang_Hu1;~Yushi_Cao1;~Bryan_Hooi1;~Roger_Zimmermann1", "aff": "National University of Singapore;The Hong Kong University of Science and Technology (Guangzhou);University of Hong Kong;National University of Singapore;Nanyang Technological University;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;hkust-gz.edu.cn;hku.hk;u.nus.edu;ntu.edu.sg;nus.edu.sg;nus.edu.sg", "position": "PhD student;Assistant Professor;Assistant Professor;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nliu2023simst,\ntitle={Sim{ST}: A {GNN}-Free Spatio-Temporal Learning Framework for Traffic Forecasting},\nauthor={Xu Liu and Yuxuan Liang and Chao Huang and Hengchang Hu and Yushi Cao and Bryan Hooi and Roger Zimmermann},\nyear={2023},\nurl={https://openreview.net/forum?id=2ppuWD3dkie}\n}", "github": "", "project": "", "reviewers": "k8Ws;4rep;1y45;FBm5", "site": "https://openreview.net/forum?id=2ppuWD3dkie", "pdf_size": 577078, "recommendation": "3;5;5;6", "confidence": "4;4;5;4", "correctness": "3;3;3;4", "technical_novelty": "1;2;3;3", "empirical_novelty": "1;2;3;3", "wc_summary_paper": "47;50;82;125", "wc_strength_and_weaknesses": "77;139;159;71", "wc_clarity_quality_novelty_and_reproducibility": "23;191;32;86", "wc_summary_review": "59;37;22;111", "wc_review": "206;417;295;393", "wc_reply_reviewers": "0;110;0;0", "wc_reply_authors": "560;738;485;199", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;2;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 76.0, 31.44041984452498 ], "wc_strength_and_weaknesses_avg": [ 111.5, 38.21975928757271 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.0, 66.84683986547158 ], "wc_summary_review_avg": [ 57.25, 33.70738049745189 ], "wc_review_avg": [ 327.75, 83.84323168867002 ], "wc_reply_reviewers_avg": [ 27.5, 47.63139720814412 ], "wc_reply_authors_avg": [ 495.5, 194.28651522944148 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.13245323570650439, "corr_recommendation_correctness": 0.6622661785325219, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10210331623672355915&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;3;0;0", "aff_unique_norm": "National University of Singapore;Hong Kong University of Science and Technology;University of Hong Kong;Nanyang Technological University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.nus.edu.sg;https://www.ust.hk;https://www.hku.hk;https://www.ntu.edu.sg", "aff_unique_abbr": "NUS;HKUST;HKU;NTU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Guangzhou;Hong Kong SAR", "aff_country_unique_index": "0;1;1;0;0;0;0", "aff_country_unique": "Singapore;China" }, { "id": "2qM88ymKO6r", "title": "DeNF: Unsupervised Scene-Decompositional Normalizing Flows", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Unsupervised object-centric scene decomposition models can learn compositional and hierarchical representations of multi-object scene data that allow the abstraction of the data into object entities and spaces. However, previous approaches, either based on VAE or GAN frameworks, have no guarantee of preserving particular aspects of the image in scene reconstruction. In this work, we propose the first probabilistic model called DeNF. Based on recent advances in normalizing flows, we represent the scene as a mixture of bidirectional flows that map a set of structured prior distributions into the scene data distribution. The bijective mapping of DeNF yields an efficient sampling and density evaluation in training time. Furthermore, it improves the fidelity of the scene's visual contents in the reconstruction process. In our experiments on real and synthetic image data for unsupervised scene decomposition, DeNF achieves competitive results.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Farhad Ghazvinian Zanjani;Hanno Ackermann;Daniel Dijkman;Fatih Porikli", "authorids": "~Farhad_Ghazvinian_Zanjani1;~Hanno_Ackermann1;~Daniel_Dijkman1;~Fatih_Porikli2", "gender": ";;;M", "homepage": ";http://www.tnt.uni-hannover.de/staff/ackerman/;;https://www.porikli.com", "dblp": ";10/1322;;p/FatihMuratPorikli", "google_scholar": ";UK3TEWUAAAAJ;;https://scholar.google.com.tw/citations?user=VpB8NZ8AAAAJ", "orcid": ";;;0000-0002-1520-4466", "linkedin": ";;;fatih-porikli-a95643/", "or_profile": "~Farhad_Ghazvinian_Zanjani1;~Hanno_Ackermann1;~Daniel_Dijkman1;~Fatih_Porikli2", "aff": ";Qualcomm AI Research;;QualComm", "aff_domain": ";qti.qualcomm.com;;qualcomm.com", "position": ";Scientific Researcher;;Senior Director", "bibtex": "@misc{\nzanjani2023denf,\ntitle={De{NF}: Unsupervised Scene-Decompositional Normalizing Flows},\nauthor={Farhad Ghazvinian Zanjani and Hanno Ackermann and Daniel Dijkman and Fatih Porikli},\nyear={2023},\nurl={https://openreview.net/forum?id=2qM88ymKO6r}\n}", "github": "", "project": "", "reviewers": "pktk;1mhe;V7hn;LKGJ;pABo", "site": "https://openreview.net/forum?id=2qM88ymKO6r", "pdf_size": 1205128, "recommendation": "3;3;3;5;5", "confidence": "3;4;4;4;4", "correctness": "2;2;2;3;3", "technical_novelty": "3;2;3;2;4", "empirical_novelty": "1;1;2;2;1", "wc_summary_paper": "110;89;236;50;98", "wc_strength_and_weaknesses": "201;186;712;273;212", "wc_clarity_quality_novelty_and_reproducibility": "8;105;42;43;128", "wc_summary_review": "13;60;133;63;64", "wc_review": "332;440;1123;429;502", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 3.8, 0.9797958971132712 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "correctness_avg": [ 2.4, 0.4898979485566356 ], "technical_novelty_avg": [ 2.8, 0.7483314773547882 ], "empirical_novelty_avg": [ 1.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 116.6, 62.99714279235209 ], "wc_strength_and_weaknesses_avg": [ 316.8, 199.80130129706362 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.2, 44.34140277438232 ], "wc_summary_review_avg": [ 66.6, 38.328057607971736 ], "wc_review_avg": [ 565.2, 284.16291102112535 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.40824829046386296, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:R9f2FOUGqbcJ:scholar.google.com/&scioq=DeNF:+Unsupervised+Scene-Decompositional+Normalizing+Flows&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Qualcomm;Qualcomm Incorporated", "aff_unique_dep": "Qualcomm AI Research;", "aff_unique_url": "https://www.qualcomm.com/research;https://www.qualcomm.com", "aff_unique_abbr": "QAI;Qualcomm", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "ROCO: A General Framework for Evaluating Robustness of Combinatorial Optimization Solvers on Graphs", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11370", "id": "2r6YMqz4Mml", "poster": "/media/PosterPDFs/ICLR%202023/11370.png?t=1682318371.5965288", "openreview": "https://openreview.net/forum?id=2r6YMqz4Mml", "slides": "https://iclr.cc/virtual/2023/poster/11370", "video": "https://iclr.cc/virtual/2023/poster/11370", "author_site": "Han Lu, Zenan Li, Runzhong Wang, Qibing Ren, Xijun Li, Mingxuan Yuan, Jia Zeng, Xiaokang Yang, Junchi Yan", "tldr": "", "abstract": "Solving combinatorial optimization (CO) on graphs has been attracting increasing interests from the machine learning community whereby data-driven approaches were recently devised to go beyond traditional manually-designated algorithms. In this paper, we study the robustness of a combinatorial solver as a blackbox regardless it is classic or learning-based though the latter can often be more interesting to the ML community. Specifically, we develop a practically feasible robustness metric for general CO solvers. A no-worse optimal cost guarantee is developed as such the optimal solutions are not required to achieve for solvers, and we tackle the non-differentiable challenge in input instance disturbance by resorting to black-box adversarial attack methods. Extensive experiments are conducted on 14 unique combinations of solvers and CO problems, and we demonstrate that the performance of state-of-the-art solvers like Gurobi can degenerate by over 20% under the given time limit bound on the hard instances discovered by our robustness metric, raising concerns about the robustness of combinatorial optimization solvers.", "keywords": "Combinatorial Optimization;Robustness;Graph Neural Networks;Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Han Lu;Zenan Li;Runzhong Wang;Qibing Ren;Xijun Li;Mingxuan Yuan;Jia Zeng;Xiaokang Yang;Junchi Yan", "authorids": "~Han_Lu2;~Zenan_Li4;~Runzhong_Wang1;~Qibing_Ren1;~Xijun_Li1;~Mingxuan_Yuan1;~Jia_Zeng1;~Xiaokang_Yang1;~Junchi_Yan2", "gender": "M;M;M;;M;M;M;M;", "homepage": ";https://github.com/Emiyalzn;http://runzhong.wang;;https://xijunlee.github.io/;;;https://icne.sjtu.edu.cn/info/1064/1078.htm;", "dblp": ";;239/4351;;203/0784;74/2356;31/435;06/3071-1.html;", "google_scholar": "HESzE0UAAAAJ;;uoM0g3cAAAAJ;;QXU_QbMAAAAJ;https://scholar.google.com/citations?hl=en;;yDEavdMAAAAJ;", "orcid": ";;0000-0002-9566-738X;;0000-0002-9013-1180;0000-0002-2236-8784;;0000-0003-4029-3322;", "linkedin": ";;;;;;;;", "or_profile": "~Han_Lu2;~Zenan_Li4;~Runzhong_Wang1;~Qibing_Ren1;~Xijun_Li1;~Mingxuan_Yuan1;~Jia_Zeng1;~Xiaokang_Yang1;~Junchi_Yan2", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;;Huawei Technologies Ltd.;Huawei Technologies Ltd.;;Shanghai Jiaotong University;", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;;huawei.com;huawei.com;;sjtu.edu.cn;", "position": "PhD student;Undergrad student;PhD student;;Researcher;Researcher;;Full Professor;", "bibtex": "@inproceedings{\nlu2023roco,\ntitle={{ROCO}: A General Framework for Evaluating Robustness of Combinatorial Optimization Solvers on Graphs},\nauthor={Han Lu and Zenan Li and Runzhong Wang and Qibing Ren and Xijun Li and Mingxuan Yuan and Jia Zeng and Xiaokang Yang and Junchi Yan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=2r6YMqz4Mml}\n}", "github": "", "project": "", "reviewers": "oJSV;baVX;2fgV;Agfm", "pdf_size": 1063109, "recommendation": "5;6;6;8", "confidence": "4;4;3;3", "correctness": "3;3;3;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "260;65;147;83", "wc_strength_and_weaknesses": "242;177;132;93", "wc_clarity_quality_novelty_and_reproducibility": "147;38;21;40", "wc_summary_review": "108;49;21;18", "wc_review": "757;329;321;234", "wc_reply_reviewers": "0;25;0;0", "wc_reply_authors": "2845;1636;1266;540", "reply_reviewers": "0;1;0;0", "reply_authors": "5;3;2;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 138.75, 76.34911590843734 ], "wc_strength_and_weaknesses_avg": [ 161.0, 55.41209254305417 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.5, 49.912423303221814 ], "wc_summary_review_avg": [ 49.0, 36.145539143855636 ], "wc_review_avg": [ 410.25, 203.63370914463056 ], "wc_reply_reviewers_avg": [ 6.25, 10.825317547305483 ], "wc_reply_authors_avg": [ 1571.75, 834.158970160964 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.6882472016116854, "corr_recommendation_correctness": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7085134185668175827&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=2r6YMqz4Mml", "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;;huawei.com;huawei.com;;sjtu.edu.cn;", "author_num": 9, "aff_unique_index": "0;0;0;1;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.huawei.com", "aff_unique_abbr": "SJTU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "2rzFscFzJ0B", "title": "Corruption-free Single-view Self-supervised Learning on Graphs", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Self-supervised learning (SSL) for graphs is an essential problem since graph data are ubiquitous and data labeling is costly. We argue that existing SSL approaches for graphs have two limitations. First, they rely on corruption techniques such as node attribute perturbation and edge dropping to generate graph views for contrastive learning. These unnatural corruption techniques require extensive tuning efforts and provide marginal improvements. Second, the current approaches require the computation of multiple graph views, which is memory and computationally inefficient. These shortcomings of graph SSL call for a corruption-free single-view learning approach, but the strawman approach of using neighboring nodes as positive examples suffers two problems: it ignores the strength of connections between nodes implied by the graph structure on a macro level, and cannot deal with the high noise in real-world graphs. We propose CURSIVE, a corruption-free single-view graph SSL approach that overcomes these problems by leveraging graph diffusion to measure connection strength and denoise. With extensive experiments, we show that CURSIVE achieves up to $4.55\\%$ absolute improvement in ROC-AUC on graph SSL tasks over state-of-the-art approaches while being more memory efficient. Moreover, CURSIVE even outperforms supervised training on node classification tasks of ogbn-proteins dataset.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Tianyi Zhang;Zhenwei DAI;Zhaozhuo Xu;Anshumali Shrivastava", "authorids": "~Tianyi_Zhang6;~Zhenwei_DAI1;~Zhaozhuo_Xu2;~Anshumali_Shrivastava1", "gender": "M;M;;M", "homepage": "https://github.com/tonyzhang617;https://daizhenwei.github.io/;https://ottovonxu.github.io/;https://www.cs.rice.edu/~as143/", "dblp": "17/322-11.html;;195/4352;63/9828", "google_scholar": "ekRl428AAAAJ;f73pQXsAAAAJ;7tDlVAsAAAAJ;https://scholar.google.com.tw/citations?user=SGT23RAAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Tianyi_Zhang6;~Zhenwei_DAI1;~Zhaozhuo_Xu2;~Anshumali_Shrivastava1", "aff": "Amazon;Amazon;Rice University;ThirdAI Corp.", "aff_domain": "amazon.com;amazon.com;rice.edu;thirdai.com", "position": "Intern;Researcher;PhD student;CEO", "bibtex": "@misc{\nzhang2023corruptionfree,\ntitle={Corruption-free Single-view Self-supervised Learning on Graphs},\nauthor={Tianyi Zhang and Zhenwei DAI and Zhaozhuo Xu and Anshumali Shrivastava},\nyear={2023},\nurl={https://openreview.net/forum?id=2rzFscFzJ0B}\n}", "github": "", "project": "", "reviewers": "F3yC;CLaf;xDZy;tUGN", "site": "https://openreview.net/forum?id=2rzFscFzJ0B", "pdf_size": 6743519, "recommendation": "3;3;3;5", "confidence": "4;5;4;4", "correctness": "3;4;2;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "50;55;54;29", "wc_strength_and_weaknesses": "161;30;435;77", "wc_clarity_quality_novelty_and_reproducibility": "29;17;8;109", "wc_summary_review": "18;40;73;42", "wc_review": "258;142;570;257", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 47.0, 10.559356040971437 ], "wc_strength_and_weaknesses_avg": [ 175.75, 156.8620014535069 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.75, 40.10221315588455 ], "wc_summary_review_avg": [ 43.25, 19.587942719948924 ], "wc_review_avg": [ 306.75, 159.13418080349678 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VuiWxtFtK8QJ:scholar.google.com/&scioq=Corruption-free+Single-view+Self-supervised+Learning+on+Graphs&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Amazon;Rice University;ThirdAI Corp.", "aff_unique_dep": "Amazon.com, Inc.;;", "aff_unique_url": "https://www.amazon.com;https://www.rice.edu;", "aff_unique_abbr": "Amazon;Rice;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "2sAVJZGwQRx", "title": "EFFECTIVE FREQUENCY-BASED BACKDOOR ATTACKS WITH LOW POISONING RATIOS", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Backdoor attack has been considered a serious threat to deep learning. Although several seminal backdoor attack methods have been proposed, they often required at least a certain poisoning ratio (\\eg, 1\\% or more) to achieve high attack success rate (ASR). \nHowever, the attack with a large poisoning ratio may be difficult to evade human inspection or backdoor defenses, \\ie, low stealthiness. \nTo tackle the dilemma between high ASR and low stealthiness, we aim to enhance ASR under low poisoning ratio, \\ie, pursuing high ASR and high stealthiness simultaneously. To achieve this goal, we propose a novel frequency-based backdoor attack, where the trigger is generated based on important frequencies that contribute positively to the model prediction with respect to the target class. \nExtensive experiments on four benchmark datasets (CIFAR-10, CIFAR-100, GTSRB, Tiny ImageNet) verify the effectiveness and stealthiness of the proposed method under extremely low poisoning ratios. Specifically, with only 0.01\\% poisoning ratio, our attack could achieve the ASR of 80.51%, 51.3%, 76.3%, and 87.2% on above four datasets, respectively, while the ASR values of most state-of-the-art (SOTA) attack methods are close to 0. Meanwhile, our method could well evade several SOTA backdoor defense methods, \\ie, the ASR values are not significantly affected under defense. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Danni Yuan;Mingda Zhang;Shaokui Wei;Shicai Yang;Baoyuan Wu", "authorids": "~Danni_Yuan2;~Mingda_Zhang2;~Shaokui_Wei1;~Shicai_Yang1;~Baoyuan_Wu1", "gender": "F;M;M;M;M", "homepage": "https://sds.cuhk.edu.cn/en/node/747;https://github.com/mdzhangst;https://shawkui.github.io/;;https://sites.google.com/site/baoyuanwu2015/", "dblp": ";;323/4243;126/6822;73/7781", "google_scholar": ";pmwwTcgAAAAJ;WHkEfnsAAAAJ;https://scholar.google.com/citations?hl=en;JNTG1KoAAAAJ", "orcid": ";;;;0000-0003-2183-5990", "linkedin": ";;;;", "or_profile": "~Danni_Yuan2;~Mingda_Zhang2;~Shaokui_Wei1;~Shicai_Yang1;~Baoyuan_Wu1", "aff": "The Chinese University of HongKong, Shenzhen;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen;Hikvision Research Institute;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn;hikvision.com;cuhk.edu.cn", "position": "PhD student;PhD student;PhD student;Research Engineer;Associate Professor", "bibtex": "@misc{\nyuan2023effective,\ntitle={{EFFECTIVE} {FREQUENCY}-{BASED} {BACKDOOR} {ATTACKS} {WITH} {LOW} {POISONING} {RATIOS}},\nauthor={Danni Yuan and Mingda Zhang and Shaokui Wei and Shicai Yang and Baoyuan Wu},\nyear={2023},\nurl={https://openreview.net/forum?id=2sAVJZGwQRx}\n}", "github": "", "project": "", "reviewers": "bz6X;jCMA;diiL", "site": "https://openreview.net/forum?id=2sAVJZGwQRx", "pdf_size": 1026809, "recommendation": "3;3;5", "confidence": "4;3;4", "correctness": "3;3;4", "technical_novelty": "2;2;2", "empirical_novelty": "2;3;2", "wc_summary_paper": "51;108;63", "wc_strength_and_weaknesses": "146;613;305", "wc_clarity_quality_novelty_and_reproducibility": "338;136;37", "wc_summary_review": "26;88;66", "wc_review": "561;945;471", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 74.0, 24.535688292770594 ], "wc_strength_and_weaknesses_avg": [ 354.6666666666667, 193.85962848297103 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 170.33333333333334, 125.25795605691303 ], "wc_summary_review_avg": [ 60.0, 25.664502073226878 ], "wc_review_avg": [ 659.0, 205.54318281081473 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Iv4WI-JonhgJ:scholar.google.com/&scioq=EFFECTIVE+FREQUENCY-BASED+BACKDOOR+ATTACKS+WITH+LOW+POISONING+RATIOS&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;Hikvision Research Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;https://www.hikvision.com/cn/", "aff_unique_abbr": "CUHK;Hikvision", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "2skHw9HVf3", "title": "TAPPFL: TASK-AGNOSTIC PRIVACY-PRESERVING REPRESENTATION LEARNING FOR FEDERATED LEARNING AGAINST ATTRIBUTE INFERENCE ATTACKS", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Federated learning (FL), a new collaborative learning paradigm, has been widely studied recently due to its property to collaboratively train data from different sources without needing to share the raw training data. Nevertheless, recent studies show that an adversary (e.g., an honest-but-curious server) can still be possible to infer private information about the training data, e.g., sensitive information such as income, race, and sexual orientation. To mitigate the attribute inference attacks, various existing privacy-preserving FL methods can be adopted/adapted. However, all these existing methods have key limitations: they need to know the FL task in advance, or have intolerable computational overheads or utility losses, or do not have provable privacy guarantees. We aim to address all these issues and design a task-agnostic privacy-preserving FL (short for TAPPFL) method against attribute inference attacks from the information-theoretic perspective. Specifically, we formally formulate TAPPFL via two mutual information goals, where one goal learns task-agnostic data representations that contain the least information about the private attribute in each device\u2019s data, and the other goal includes as much information as possible about the training data to maintain utility. However, it is intractable to compute exact mutual information in general. Then, we derive tractable variational mutual information bounds, and each bound can be parameterized via a neural network. Next, we alternatively train these parameterized neural networks to approximate the true mutual information and learn privacy-preserving representations for device data. We also derive theoretical privacy guarantees of our TAPPFL against worst-case attribute inference attacks. Extensive results on multiple datesets and applications validates the effectiveness of our TAPPFL to protect data privacy, maintain the FL utility, and be efficient as well.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Caridad Arroyo Arevalo;Sayedeh Leila Noorbakhsh;Yun Dong;Yuan Hong;Binghui Wang", "authorids": "carroyoarevalo@hawk.iit.edu;snoorbakhsh@hawk.iit.edu;yund@iastate.edu;~Yuan_Hong1;~Binghui_Wang2", "gender": ";;;M;M", "homepage": ";;;https://yhongcs.github.io/;https://wangbinghui.net", "dblp": ";;;79/5433-1;123/7149", "google_scholar": ";;;KJuZW2wAAAAJ;SoOztcEAAAAJ", "orcid": ";;;;0000-0001-5616-060X", "linkedin": ";;;;", "or_profile": "carroyoarevalo@hawk.iit.edu;snoorbakhsh@hawk.iit.edu;yund@iastate.edu;~Yuan_Hong1;~Binghui_Wang2", "aff": ";;;University of Connecticut;Illinois Institute of Technology", "aff_domain": ";;;uconn.edu;iit.edu", "position": ";;;Associate Professor;Assistant Professor", "bibtex": "@misc{\narevalo2023tappfl,\ntitle={{TAPPFL}: {TASK}-{AGNOSTIC} {PRIVACY}-{PRESERVING} {REPRESENTATION} {LEARNING} {FOR} {FEDERATED} {LEARNING} {AGAINST} {ATTRIBUTE} {INFERENCE} {ATTACKS}},\nauthor={Caridad Arroyo Arevalo and Sayedeh Leila Noorbakhsh and Yun Dong and Yuan Hong and Binghui Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=2skHw9HVf3}\n}", "github": "", "project": "", "reviewers": "1SVy;1Z8m;A2Fx", "site": "https://openreview.net/forum?id=2skHw9HVf3", "pdf_size": 4705412, "recommendation": "3;3;5", "confidence": "3;4;4", "correctness": "2;2;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "51;283;95", "wc_strength_and_weaknesses": "213;283;301", "wc_clarity_quality_novelty_and_reproducibility": "56;31;98", "wc_summary_review": "48;106;55", "wc_review": "368;703;549", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 143.0, 100.6114638928719 ], "wc_strength_and_weaknesses_avg": [ 265.6666666666667, 37.959042254631356 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.666666666666664, 27.644569488820444 ], "wc_summary_review_avg": [ 69.66666666666667, 25.84999462712173 ], "wc_review_avg": [ 540.0, 136.91116341141313 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 1.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14876375558587848747&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1", "aff_unique_norm": "University of Connecticut;Illinois Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.uconn.edu;https://www.iit.edu", "aff_unique_abbr": "UConn;IIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "2t7L0lcDqAr", "title": "PathFusion: Path-consistent Lidar-Camera Deep Feature Fusion", "track": "main", "status": "Reject", "tldr": "", "abstract": "Fusing camera with LiDAR is a promising technique to improve the accuracy of 3D detection due to its complementary physical properties.\nWhile most existing methods focus on fusing camera features directly with raw LiDAR point clouds or shallow 3D features, it is observed that direct deep 3D feature fusion achieves inferior accuracy due to feature mis-alignment. The mis-alignment that originates from the feature aggregation across large receptive fields becomes increasingly severe for deep network stages. In this paper, we propose PathFusion to enable path-consistent LiDAR-camera deep feature fusion. PathFusion introduces a path consistency loss between shallow and deep features, which encourages the 2D backbone and its fusion path to transform 2D features in a way that is semantically aligned with the transform of the 3D backbone. We apply PathFusion to the prior-art fusion baseline, Focals Conv, and observe more than 1.2% mAP improvements on the nuScenes test split consistently with and without testing-time augmentations. Moreover, PathFusion also improves KITTI AP 3D (R11) by more than 0.6% on moderate level.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Lemeng Wu;Dilin Wang;Meng Li;Yunyang Xiong;Raghuraman Krishnamoorthi;qiang liu;Vikas Chandra", "authorids": "~Lemeng_Wu1;~Dilin_Wang1;~Meng_Li1;~Yunyang_Xiong2;~Raghuraman_Krishnamoorthi1;~qiang_liu4;~Vikas_Chandra2", "gender": "M;M;M;M;M;M;M", "homepage": "https://sites.google.com/utexas.edu/wlm/home?authuser=1;https://mengli.me;;;https://v-chandra.github.io/;;https://www.cs.utexas.edu/~lqiang/", "dblp": "232/3021;70/1726-4;140/7645;;57/5163;142/7035;61/3234-1", "google_scholar": "https://scholar.google.ca/citations?user=PCDSl2sAAAAJ;lvdRkEkAAAAJ;k5FaRwcAAAAJ;F1mr9C0AAAAJ;p-h_BvcAAAAJ;dmTy9EIAAAAJ;https://scholar.google.com.tw/citations?user=2qDh4WUAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;raghuraman-krishnamoorthi-b8670a5/;vchandra/;;", "or_profile": "~Lemeng_Wu1;~Meng_Li1;~Yunyang_Xiong2;~Raghuraman_Krishnamoorthi1;~Vikas_Chandra2;~Dilin_Wang2;~Qiang_Liu1", "aff": "University of Texas, Austin;Peking University;Meta Facebook;Meta Facebook;Meta;Meta;University of Texas, Austin", "aff_domain": "cs.utexas.edu;pku.edu.cn;fb.com;meta.com;meta.com;meta.com;utexas.edu", "position": "PhD student;Assistant Professor;Researcher;Researcher;Director, AI;Research Scientist;Assistant Professor", "bibtex": "@misc{\nwu2023pathfusion,\ntitle={PathFusion: Path-consistent Lidar-Camera Deep Feature Fusion},\nauthor={Lemeng Wu and Dilin Wang and Meng Li and Yunyang Xiong and Raghuraman Krishnamoorthi and qiang liu and Vikas Chandra},\nyear={2023},\nurl={https://openreview.net/forum?id=2t7L0lcDqAr}\n}", "github": "", "project": "", "reviewers": "gP9m;W3vh;3wCP", "site": "https://openreview.net/forum?id=2t7L0lcDqAr", "pdf_size": 3203709, "recommendation": "5;5;5", "confidence": "4;3;4", "correctness": "3;4;3", "technical_novelty": "3;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "87;108;80", "wc_strength_and_weaknesses": "184;57;224", "wc_clarity_quality_novelty_and_reproducibility": "119;134;6", "wc_summary_review": "68;36;69", "wc_review": "458;335;379", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "873;912;672", "reply_reviewers": "0;0;0", "reply_authors": "2;2;1", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 91.66666666666667, 11.897712198383164 ], "wc_strength_and_weaknesses_avg": [ 155.0, 71.19456908126256 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 86.33333333333333, 57.13337222869154 ], "wc_summary_review_avg": [ 57.666666666666664, 15.326085243430198 ], "wc_review_avg": [ 390.6666666666667, 50.88767587103537 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 819.0, 105.15702544290609 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1122231679755721615&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;2;2;2;0", "aff_unique_norm": "University of Texas at Austin;Peking University;Meta", "aff_unique_dep": ";;Meta Platforms, Inc.", "aff_unique_url": "https://www.utexas.edu;http://www.pku.edu.cn;https://meta.com", "aff_unique_abbr": "UT Austin;Peking U;Meta", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Learning to Estimate Single-View Volumetric Flow Motions without 3D Supervision", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11842", "id": "2vmGv5wPDBZ", "poster": "/media/PosterPDFs/ICLR%202023/11842.png?t=1681817416.6503706", "openreview": "https://openreview.net/forum?id=2vmGv5wPDBZ", "slides": "https://iclr.cc/virtual/2023/poster/11842", "video": "https://iclr.cc/virtual/2023/poster/11842", "author_site": "Erik Franz, Barbara Solenthaler, Nils Thuerey", "tldr": "We train a network to estimate 3D motions and densities from single view videos of smoke without using 3D ground truth.", "abstract": "We address the challenging problem of jointly inferring the 3D flow and volumetric densities moving in a fluid from a monocular input video with a deep neural network. Despite the complexity of this task, we show that it is possible to train the corresponding networks without requiring any 3D ground truth for training. In the absence of ground truth data we can train our model with observations from real-world capture setups instead of relying on synthetic reconstructions. We make this unsupervised training approach possible by first generating an initial prototype volume which is then moved and transported over time without the need for volumetric supervision. Our approach relies purely on image-based losses, an adversarial discriminator network, and regularization. Our method can estimate long-term sequences in a stable manner, while achieving closely matching targets for inputs such as rising smoke plumes.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/42abf802117494ef1e3481261e2e01f2371daf90.zip", "author": "Aleksandra Franz;Barbara Solenthaler;Nils Thuerey", "authorids": "~Aleksandra_Franz1;~Barbara_Solenthaler1;~Nils_Thuerey1", "gender": ";F;M", "homepage": ";https://cgl.ethz.ch;https://ge.in.tum.de", "dblp": ";;42/478", "google_scholar": ";RFOOllwAAAAJ;https://scholar.google.com.tw/citations?user=GEehwv8AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Aleksandra_Franz1;~Barbara_Solenthaler1;~Nils_Thuerey1", "aff": ";ETHZ - ETH Zurich;Technical University Munich", "aff_domain": ";ethz.ch;tum.de", "position": ";Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nfranz2023learning,\ntitle={Learning to Estimate Single-View Volumetric Flow Motions without 3D Supervision},\nauthor={Aleksandra Franz and Barbara Solenthaler and Nils Thuerey},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=2vmGv5wPDBZ}\n}", "github": "", "project": "", "reviewers": "8YBe;UynW;TTZh", "pdf_size": 3817246, "recommendation": "6;6;8", "confidence": "4;3;4", "correctness": "3;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "0;3;3", "wc_summary_paper": "77;127;42", "wc_strength_and_weaknesses": "286;197;224", "wc_clarity_quality_novelty_and_reproducibility": "53;127;95", "wc_summary_review": "98;64;34", "wc_review": "514;515;395", "wc_reply_reviewers": "57;24;18", "wc_reply_authors": "510;436;453", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 82.0, 34.88074922742725 ], "wc_strength_and_weaknesses_avg": [ 235.66666666666666, 37.2588542795162 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 91.66666666666667, 30.3021818063027 ], "wc_summary_review_avg": [ 65.33333333333333, 26.144895401503437 ], "wc_review_avg": [ 474.6666666666667, 56.334319517995034 ], "wc_reply_reviewers_avg": [ 33.0, 17.146428199482248 ], "wc_reply_authors_avg": [ 466.3333333333333, 31.647362537114457 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.49999999999999983, "corr_recommendation_correctness": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16303130592587700508&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=2vmGv5wPDBZ", "email": ";ethz.ch;tum.de", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "ETH Zurich;Technical University of Munich", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.tum.de", "aff_unique_abbr": "ETHZ;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Switzerland;Germany" }, { "id": "2x8EKbGU51k", "title": "Fine-Tuning Offline Policies With Optimistic Action Selection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Offline reinforcement learning algorithms can train performant policies for hard tasks using previously-collected datasets. However, the quality of the offline dataset often limits the levels of performance possible. We consider the problem of improving offline policies through online fine-tuning. Offline RL requires a pessimistic training objective to mitigate distributional shift between the trained policy and the offline behavior policy, which will make the trained policy averse to picking novel actions. In contrast, online RL requires exploration, or optimism. Thus, fine-tuning online policies with the offline training objective is not ideal. Additionally, loosening the fine-tuning objective to allow for more exploration can potentially destroy the behaviors learned in the offline phase because of the sudden and significant change in the optimization objective. To mitigate this challenge, we propose a method to facilitate exploration during online fine-tuning that maintains the same training objective throughout both offline and online phases, while encouraging exploration. We accomplish this by changing the action-selection method to be more optimistic with respect to the Q-function. By choosing to take actions in the environment with higher expected Q-values, our method is able to explore and improve behaviors more efficiently, obtaining 56% more returns on average than the alternative approaches on several locomotion, navigation, and manipulation tasks.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/48b0b16fbf76ef88867f131ae9a2be8ab8fa2c29.zip", "author": "Max Sobol Mark;Ali Ghadirzadeh;Xi Chen;Chelsea Finn", "authorids": "~Max_Sobol_Mark1;~Ali_Ghadirzadeh1;~Xi_Chen18;~Chelsea_Finn1", "gender": "M;M;F;F", "homepage": "https://github.com/MaxSobolMark/;;https://ai.stanford.edu/~cbfinn/;", "dblp": ";;131/1783;16/3283-51", "google_scholar": "https://scholar.google.com/citations?hl=en;bPX8_8AAAAAJ;vfPE6hgAAAAJ;O7X6si8AAAAJ", "orcid": ";;;", "linkedin": "max-sobol-mark/;;;", "or_profile": "~Max_Sobol_Mark1;~Ali_Ghadirzadeh1;~Chelsea_Finn1;~Xi_Chen16", "aff": "Computer Science Department, Stanford University;;Google;Tsinghua University", "aff_domain": "cs.stanford.edu;;google.com;tsinghua.edu.cn", "position": "MS student;;Research Scientist;Postdoc", "bibtex": "@misc{\nmark2023finetuning,\ntitle={Fine-Tuning Offline Policies With Optimistic Action Selection},\nauthor={Max Sobol Mark and Ali Ghadirzadeh and Xi Chen and Chelsea Finn},\nyear={2023},\nurl={https://openreview.net/forum?id=2x8EKbGU51k}\n}", "github": "", "project": "", "reviewers": "ggWX;NH1h;b1rp;jHot", "site": "https://openreview.net/forum?id=2x8EKbGU51k", "pdf_size": 786034, "recommendation": "3;3;3;5", "confidence": "4;4;4;3", "correctness": "2;3;2;3", "technical_novelty": "3;1;2;3", "empirical_novelty": "3;2;2;2", "wc_summary_paper": "85;66;103;141", "wc_strength_and_weaknesses": "386;391;664;183", "wc_clarity_quality_novelty_and_reproducibility": "63;48;181;2", "wc_summary_review": "54;54;91;35", "wc_review": "588;559;1039;361", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "130;103;367;42", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 98.75, 27.680092124124155 ], "wc_strength_and_weaknesses_avg": [ 406.0, 170.9663709622451 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.5, 66.00946901770988 ], "wc_summary_review_avg": [ 58.5, 20.303940504246953 ], "wc_review_avg": [ 636.75, 248.12534634736534 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 160.5, 123.41089903245985 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3634557272622520883&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2", "aff_unique_norm": "Stanford University;Google;Tsinghua University", "aff_unique_dep": "Computer Science Department;Google;", "aff_unique_url": "https://www.stanford.edu;https://www.google.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Stanford;Google;THU", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Stanford;Mountain View;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;China" }, { "id": "2xNKMFGPJU5", "title": "Unsupervised Learning of Causal Relationships from Unstructured Data", "track": "main", "status": "Reject", "tldr": "We propose a modification to the VAE that learns variables and causal relationships between them in an unsupervised way.", "abstract": "Endowing deep neural networks with the ability to reason about cause and effect would be an important step to make them more robust and interpretable. In this work we propose a variational framework that allows deep networks to learn latent variables and their causal relationships from unstructured data, with no supervision, or labeled interventions. Starting from an abstract Structural Equation Model (SEM), we show that maximizing its posterior probability yields a similar construction to a Variational Auto-Encoder (VAE), but with a structured prior coupled by non-linear equations. This prior represents an interpretable SEM with learnable parameters (such as a physical model or dependence structure), which can be fitted to data while simultaneously learning the latent variables. Unfortunately, computing KL-divergences with this non-linear prior is intractable. We show how linearizing arbitrary SEMs via back-propagation produces local non-isotropic Gaussian priors, for which the KL-divergences can be computed efficiently and differentiably. We propose two versions, one for IID data (such as images) which detects related causal variables within a sample, and one for non-IID data (such as video) which detects variables that are also related over time. Our proposal is complementary to causal discovery techniques, which assume given variables, and instead discovers both variables and their causal relationships. We experiment with recovering causal models from images, and learning temporal relations based on the Super Mario Bros videogame.", "keywords": "causality;deep learning;causal representation learning;unsupervised;VAE", "primary_area": "", "supplementary_material": "", "author": "Marian Longa;Joao F. Henriques", "authorids": "~Marian_Longa1;~Joao_F._Henriques1", "gender": ";M", "homepage": "http://marianlonga.com;http://www.robots.ox.ac.uk/~joao/", "dblp": ";31/8617.html", "google_scholar": ";aCQjyp0AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Marian_Longa1;~Joao_F._Henriques1", "aff": "University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk", "position": "PhD student;Principal Researcher", "bibtex": "@misc{\nlonga2023unsupervised,\ntitle={Unsupervised Learning of Causal Relationships from Unstructured Data},\nauthor={Marian Longa and Joao F. Henriques},\nyear={2023},\nurl={https://openreview.net/forum?id=2xNKMFGPJU5}\n}", "github": "", "project": "", "reviewers": "mc4c;qqHC;WspD;8bM1", "site": "https://openreview.net/forum?id=2xNKMFGPJU5", "pdf_size": 1295433, "recommendation": "1;3;3;8", "confidence": "4;3;5;4", "correctness": "1;3;2;4", "technical_novelty": "2;2;1;4", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "89;128;184;34", "wc_strength_and_weaknesses": "477;370;513;36", "wc_clarity_quality_novelty_and_reproducibility": "106;64;41;42", "wc_summary_review": "153;65;75;35", "wc_review": "825;627;813;147", "wc_reply_reviewers": "207;225;0;0", "wc_reply_authors": "358;764;684;196", "reply_reviewers": "2;2;0;0", "reply_authors": "1;2;1;1", "recommendation_avg": [ 3.75, 2.5860201081971503 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 1.118033988749895 ], "technical_novelty_avg": [ 2.25, 1.0897247358851685 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 108.75, 54.79678366473711 ], "wc_strength_and_weaknesses_avg": [ 349.0, 188.2086607996561 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.25, 26.337947907914163 ], "wc_summary_review_avg": [ 82.0, 43.55456348076513 ], "wc_review_avg": [ 603.0, 274.72531736263403 ], "wc_reply_reviewers_avg": [ 108.0, 108.18733752154178 ], "wc_reply_authors_avg": [ 500.5, 232.44945687181118 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9079091724509455, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ZQniH3eZObAJ:scholar.google.com/&scioq=Unsupervised+Learning+of+Causal+Relationships+from+Unstructured+Data&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "2xQVAXKjLdH", "title": "Probing into the Fine-grained Manifestation in Multi-modal Image Synthesis", "track": "main", "status": "Reject", "tldr": "A new method for evaluating the semantic consistency and robustness of multi-modal image synthesis models", "abstract": "The ever-growing development of multi-modal image synthesis brings unprecedented realism to generation tasks. In practice, it is straightforward to judge the visual quality and reality of an image. However, it is labor-consuming to verify the correctness of semantic consistency in the auto-generation, which requires a comprehensive understanding and mapping of different modalities. The results of existing models are sorted and displayed largely relying on the global visual-text similarity. However, this coarse-grained approach does not capture the fine-grained semantic alignment between image regions and text spans. To address this issue, we first present a new method to evaluate the cross-modal consistency by inspecting the decomposed semantic concepts. We then introduce a new metric, called MIS-Score, which is designed to measure the fine-grained semantic alignment between a prompt and its generation quantitatively. Moreover, we have also developed an automated robustness testing technique with referential transforms to test and measure the robustness of multi-modal synthesis models. We have conducted comprehensive experiments to evaluate the performance of recent popular models for text-to-image generation. Our study demonstrates that the proposed metric MIS-Score represents better evaluation criteria than existing coarse-grained ones (e.g., CLIP) to understand the semantic consistency of the synthesized results. Our robustness testing method also proves the existence of biases embedded in the models, hence uncovering their limitations in real applications.", "keywords": "Multi-modal image synthesis;semantic consistency measurement;robustness testing", "primary_area": "", "supplementary_material": "", "author": "Qianyu Feng;Peike Li;Yulei Sui;Hongyu Zhang", "authorids": "~Qianyu_Feng1;~Peike_Li1;~Yulei_Sui1;~Hongyu_Zhang1", "gender": "F;;M;M", "homepage": ";;http://yuleisui.github.io;https://sites.google.com/site/hongyujohn", "dblp": "246/4698;251/5626;58/10567.html;29/2726-2", "google_scholar": "https://scholar.google.com.au/citations?user=d0EHVf0AAAAJ;dOzTcvwAAAAJ;https://scholar.google.com.au/citations?user=wGHqq1cAAAAJ;https://scholar.google.com.au/citations?user=zsUN6PkAAAAJ", "orcid": "0000-0003-1014-6081;;;0000-0002-3063-9425", "linkedin": ";peikeli/;;", "or_profile": "~Qianyu_Feng1;~Peike_Li1;~Yulei_Sui1;~Hongyu_Zhang1", "aff": "University of Newcastle;Futureverse AI;University of New South Wales;University of Newcastle, Australia", "aff_domain": "newcastle.edu.au;futureverse.com;unsw.edu.au;newcastle.edu.au", "position": "Postdoc;Principal Researcher;Associate Professor;Full Professor", "bibtex": "@misc{\nfeng2023probing,\ntitle={Probing into the Fine-grained Manifestation in Multi-modal Image Synthesis},\nauthor={Qianyu Feng and Peike Li and Yulei Sui and Hongyu Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=2xQVAXKjLdH}\n}", "github": "", "project": "", "reviewers": "jHfC;nMLm;1VeC", "site": "https://openreview.net/forum?id=2xQVAXKjLdH", "pdf_size": 5977211, "recommendation": "3;3;6", "confidence": "4;3;4", "correctness": "2;2;3", "technical_novelty": "2;2;3", "empirical_novelty": "0;1;3", "wc_summary_paper": "54;43;59", "wc_strength_and_weaknesses": "198;309;111", "wc_clarity_quality_novelty_and_reproducibility": "18;18;7", "wc_summary_review": "16;17;26", "wc_review": "286;387;203", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "312;467;231", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.3333333333333333, 1.247219128924647 ], "wc_summary_paper_avg": [ 52.0, 6.683312551921141 ], "wc_strength_and_weaknesses_avg": [ 206.0, 81.03085831953157 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 14.333333333333334, 5.185449728701348 ], "wc_summary_review_avg": [ 19.666666666666668, 4.496912521077347 ], "wc_review_avg": [ 292.0, 75.23740204623407 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 336.6666666666667, 97.91265949247261 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AzbExrg1zj4J:scholar.google.com/&scioq=Probing+into+the+Fine-grained+Manifestation+in+Multi-modal+Image+Synthesis&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Newcastle;Futureverse AI;University of New South Wales", "aff_unique_dep": ";;", "aff_unique_url": "https://www.newcastle.edu.au;;https://www.unsw.edu.au", "aff_unique_abbr": "UON;Futureverse AI;UNSW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Australia;United States" }, { "id": "32w-1DCZuVS", "title": "The Cost of Privacy in Fair Machine Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "A common task in fair machine learning is training ML models that preserve certain summary statistics across subpopulations defined by sensitive attributes. However, access to such sensitive attributes in training data is restricted and the learner must rely on noisy proxies for the sensitive attributes. In this paper, we study the effect of a privacy mechanism that obfuscates the sensitive attributes from the learner on the fairness of the resulting classifier. We show that the cost of privacy in fair ML is a decline in the generalizability of fairness constraints.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Songkai Xue;Yuekai Sun", "authorids": "~Songkai_Xue1;~Yuekai_Sun1", "gender": ";", "homepage": "http://www-personal.umich.edu/~sxue/;https://yuekai.github.io/", "dblp": "260/6635;", "google_scholar": "YZjCcnoAAAAJ;6T1XtW8AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Songkai_Xue1;~Yuekai_Sun1", "aff": "University of Michigan;University of Michigan - Ann Arbor", "aff_domain": "umich.edu;umich.edu", "position": "PhD student;Assistant \u2192 Associate Professor of Statistics", "bibtex": "@misc{\nxue2023the,\ntitle={The Cost of Privacy in Fair Machine Learning},\nauthor={Songkai Xue and Yuekai Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=32w-1DCZuVS}\n}", "github": "", "project": "", "reviewers": "YkN8;MtpY;gE91;iTev", "site": "https://openreview.net/forum?id=32w-1DCZuVS", "pdf_size": 423021, "recommendation": "3;5;5;5", "confidence": "4;4;3;4", "correctness": "4;4;4;3", "technical_novelty": "3;4;3;3", "empirical_novelty": "1;1;0;2", "wc_summary_paper": "76;86;26;99", "wc_strength_and_weaknesses": "344;380;161;1164", "wc_clarity_quality_novelty_and_reproducibility": "31;15;79;104", "wc_summary_review": "13;62;78;35", "wc_review": "464;543;344;1402", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "381;376;421;1604", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;3", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 71.75, 27.643941470058138 ], "wc_strength_and_weaknesses_avg": [ 512.25, 385.34165035718627 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.25, 35.82160660830276 ], "wc_summary_review_avg": [ 47.0, 24.92990172463582 ], "wc_review_avg": [ 688.25, 418.1305866592397 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 695.5, 524.8125855960392 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:nMnXoS1RU5QJ:scholar.google.com/&scioq=The+Cost+of+Privacy+in+Fair+Machine+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "33csNbhVnD", "title": "Homotopy-based training of NeuralODEs for accurate dynamics discovery", "track": "main", "status": "Reject", "tldr": "Building upon ideas from the chaos literature, we introduce a novel method of training neural ordinary differential equations with drastic improvements for long complex time series prediction.", "abstract": "Conceptually, Neural Ordinary Differential Equations (NeuralODEs) pose an attractive way to extract dynamical laws from time series data, as they are natural extensions of the traditional differential equation-based modeling paradigm of the physical sciences. In practice, NeuralODEs display long training times and suboptimal results, especially for longer duration data where they may fail to fit the data altogether. While methods have been proposed to stabilize NeuralODE training, many of these involve placing a strong constraint on the functional form the trained NeuralODE can take that the actual underlying governing equation does not guarantee satisfaction. In this work, we present a novel NeuralODE training algorithm that leverages tools from the chaos and mathematical optimization communities -- synchronization and homotopy optimization -- for a breakthrough in tackling the NeuralODE training obstacle. We demonstrate architectural changes are unnecessary for effective NeuralODE training. Compared to the conventional training methods, our algorithm achieves drastically lower loss values without any changes to the model architectures. Experiments on both simulated and real systems with complex temporal behaviors demonstrate NeuralODEs trained with our algorithm are able to accurately capture true long term behaviors and correctly extrapolate into the future.\n", "keywords": "neural ordinary differential equation;synchronization;homotopy;dynamical systems;physics", "primary_area": "", "supplementary_material": "", "author": "Joon-Hyuk Ko;Hankyul Koh;Nojun Park;Wonho Jhe", "authorids": "~Joon-Hyuk_Ko1;~Hankyul_Koh1;bnj11526@snu.ac.kr;~Wonho_Jhe1", "gender": "M;M;;M", "homepage": "https://www.kias.re.kr/kias/people/faculty/viewMember.do?memberId=11024&menuNo=403021;;;http://jhe.snu.ac.kr/", "dblp": "281/7112;330/4266;;226/8082", "google_scholar": "t7lTWNQAAAAJ;https://scholar.google.com/citations?view_op=list_works;;z_c9ABQAAAAJ", "orcid": "0000-0001-9283-3859;;;0000-0002-4716-5449", "linkedin": "joon-hyuk-ko-577843271/;hankyul-koh-015177277/;;wonho-jhe-861457b3/", "or_profile": "~Joon-Hyuk_Ko1;~Hankyul_Koh1;bnj11526@snu.ac.kr;~Wonho_Jhe1", "aff": "Seoul National University;Seoul National University;;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;;snu.ac.kr", "position": "PhD student;PhD student;;Full Professor", "bibtex": "@misc{\nko2023homotopybased,\ntitle={Homotopy-based training of Neural{ODE}s for accurate dynamics discovery},\nauthor={Joon-Hyuk Ko and Hankyul Koh and Nojun Park and Wonho Jhe},\nyear={2023},\nurl={https://openreview.net/forum?id=33csNbhVnD}\n}", "github": "", "project": "", "reviewers": "4PH8;3MeW;89pi;zjmg;nRZJ", "site": "https://openreview.net/forum?id=33csNbhVnD", "pdf_size": 649383, "recommendation": "3;3;5;5;6", "confidence": "3;4;3;5;3", "correctness": "3;2;3;3;4", "technical_novelty": "1;3;4;4;3", "empirical_novelty": "2;2;2;3;2", "wc_summary_paper": "52;54;40;81;89", "wc_strength_and_weaknesses": "123;331;157;450;187", "wc_clarity_quality_novelty_and_reproducibility": "326;124;21;95;94", "wc_summary_review": "258;60;13;111;21", "wc_review": "759;569;231;737;391", "wc_reply_reviewers": "31;0;0;0;0", "wc_reply_authors": "173;565;448;571;322", "reply_reviewers": "1;0;0;0;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 4.4, 1.2 ], "confidence_avg": [ 3.6, 0.8 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 3.0, 1.0954451150103321 ], "empirical_novelty_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_paper_avg": [ 63.2, 18.605375567292374 ], "wc_strength_and_weaknesses_avg": [ 249.6, 122.72505856588539 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 132.0, 102.79494150978442 ], "wc_summary_review_avg": [ 92.6, 89.6896872555591 ], "wc_review_avg": [ 537.4, 202.61253663088078 ], "wc_reply_reviewers_avg": [ 6.2, 12.400000000000002 ], "wc_reply_authors_avg": [ 415.8, 151.74636733707993 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.0416666666666667, "corr_recommendation_correctness": 0.7905694150420948, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18035061862011802574&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "33daZzvuzY6", "title": "DLP: Data-Driven Label-Poisoning Backdoor Attack", "track": "main", "status": "Reject", "tldr": "We introduce a new type of end-to-end clean-sample backdoor attack, allowing attackers to backdoor effectively, as measured by test performances, for an arbitrary backdoor sample size. ", "abstract": "Backdoor attacks, which aim to disrupt or paralyze classifiers on specific tasks, are becoming an emerging concern in several learning scenarios, e.g., Machine Learning as a Service (MLaaS). Various backdoor attacks have been introduced in the literature, including perturbation-based methods, which modify a subset of training data; and clean-sample methods, which relabel only a proportion of training samples. Indeed, clean-sample attacks can be particularly stealthy since they never require modifying the samples at the training and test stages. However, the state-of-the-art clean-sample attack of relabelling training data based on their semantic meanings could be ineffective and inefficient in test performances due to heuristic selections of semantic patterns. In this work, we introduce a new type of clean-sample backdoor attack, named as DLP backdoor attack, allowing attackers to backdoor effectively, as measured by test performances, for an arbitrary backdoor sample size. The critical component of DLP is a data-driven backdoor scoring mechanism embedding in a multi-task formulation, which enables attackers to simultaneously perform well on the normal learning tasks and the backdoor tasks. Systematic empirical evaluations show the superior performance of the proposed DLP to state-of-the-art clean-sample attacks.", "keywords": "Backdoor learning;End-to-end learning;Clean-sample attack", "primary_area": "", "supplementary_material": "/attachment/5c219983b08cfc1feb75d4d1756f29fddbed7808.zip", "author": "Xun Xian;Xuan Bi;Mingyi Hong;Jie Ding", "authorids": "~Xun_Xian1;~Xuan_Bi1;~Mingyi_Hong1;~Jie_Ding2", "gender": "M;;M;M", "homepage": "https://jeremyxianx.github.io/;;http://people.ece.umn.edu/~mhong/mingyi.html;http://jding.org", "dblp": "262/3278;;57/8053;94/1825-2", "google_scholar": "https://scholar.google.com/citations?hl=en;F3eRk9MAAAAJ;qRnP-p0AAAAJ;ZyqvoqcAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Xun_Xian1;~Xuan_Bi1;~Mingyi_Hong1;~Jie_Ding2", "aff": "University of Minnesota, Minneapolis;University of Minnesota - Twin Cities;University of Minnesota, Minneapolis;University of Minnesota, Minneapolis", "aff_domain": "umn.edu;umn.edu;umn.edu;umn.edu", "position": "PhD student;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@misc{\nxian2023dlp,\ntitle={{DLP}: Data-Driven Label-Poisoning Backdoor Attack},\nauthor={Xun Xian and Xuan Bi and Mingyi Hong and Jie Ding},\nyear={2023},\nurl={https://openreview.net/forum?id=33daZzvuzY6}\n}", "github": "", "project": "", "reviewers": "e5gT;1LBS;9Uiq;xGur", "site": "https://openreview.net/forum?id=33daZzvuzY6", "pdf_size": 5403068, "recommendation": "3;3;5;5", "confidence": "4;4;4;4", "correctness": "3;2;3;3", "technical_novelty": "3;3;3;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "230;92;52;54", "wc_strength_and_weaknesses": "625;193;185;136", "wc_clarity_quality_novelty_and_reproducibility": "114;20;35;36", "wc_summary_review": "42;24;55;14", "wc_review": "1011;329;327;240", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 107.0, 72.78049189171504 ], "wc_strength_and_weaknesses_avg": [ 284.75, 197.65168225947383 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.25, 36.77890020106637 ], "wc_summary_review_avg": [ 33.75, 15.848895860595462 ], "wc_review_avg": [ 476.75, 310.5353240776321 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:o-0RPl_KJ2UJ:scholar.google.com/&scioq=DLP:+Data-Driven+Label-Poisoning+Backdoor+Attack&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Minnesota", "aff_unique_dep": "", "aff_unique_url": "https://www.minnesota.edu", "aff_unique_abbr": "UMN", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Minneapolis;Twin Cities", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "35PLkGkJOQ4", "title": "Energy Consumption-Aware Tabular Benchmarks for Neural Architecture Search", "track": "main", "status": "Reject", "tldr": "Energy consumption-aware tabular benchmarks for NAS can be used to access sub-space of architectures that are inherently efficient.", "abstract": "The demand for large-scale computational resources for Neural Architecture Search (NAS) has been lessened by tabular benchmarks for NAS. Evaluating NAS strategies is now possible on extensive search spaces and at a moderate computational cost. But so far, NAS has mainly focused on maximising performance on some hold-out validation/test set. However, energy consumption is a partially conflicting objective that should not be neglected. We hypothesise that constraining NAS to include the energy consumption of training the models could reveal a sub-space of undiscovered architectures that are more computationally efficient with a smaller carbon footprint. To support the hypothesis, an existing tabular benchmark for NAS is augmented with the energy consumption of each architecture. We then perform multi-objective optimisation that includes energy consumption as an additional objective. We demonstrate the usefulness of multi-objective NAS for uncovering the trade-off between performance and energy consumption as well as for finding more energy-efficient architectures. The updated tabular benchmark is open-sourced to encourage the further exploration of energy consumption-aware NAS.", "keywords": "NAS;tabular benchmarks;energy consumption;carbon footprint;deep learning;multi-objective optimisation;automl", "primary_area": "", "supplementary_material": "/attachment/99dfe3c3f7ba26fa866af3fddaa76c80d212f366.zip", "author": "Pedram Bakhtiarifard;Christian Igel;Raghavendra Selvan", "authorids": "~Pedram_Bakhtiarifard1;~Christian_Igel1;~Raghavendra_Selvan1", "gender": "M;M;M", "homepage": ";https://christian-igel.github.io/;https://raghavian.github.io/", "dblp": ";38/6146;183/9041", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.dk/citations?user=d-jF4zIAAAAJ;R9VBQ54AAAAJ", "orcid": ";0000-0003-2868-0856;", "linkedin": "pedrambakh/;christianigel/;", "or_profile": "~Pedram_Bakhtiarifard1;~Christian_Igel1;~Raghavendra_Selvan1", "aff": "Copenhagen University;University of Copenhagen;University of Copenhagen", "aff_domain": "ku.dk;ku.dk;ku.dk", "position": "MS student;Full Professor;Assistant Professor", "bibtex": "@misc{\nbakhtiarifard2023energy,\ntitle={Energy Consumption-Aware Tabular Benchmarks for Neural Architecture Search},\nauthor={Pedram Bakhtiarifard and Christian Igel and Raghavendra Selvan},\nyear={2023},\nurl={https://openreview.net/forum?id=35PLkGkJOQ4}\n}", "github": "", "project": "", "reviewers": "gjRN;bAGs;Bb6X;FG4d", "site": "https://openreview.net/forum?id=35PLkGkJOQ4", "pdf_size": 1103298, "recommendation": "3;5;5;5", "confidence": "5;3;4;5", "correctness": "4;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "36;76;59;25", "wc_strength_and_weaknesses": "245;116;167;184", "wc_clarity_quality_novelty_and_reproducibility": "71;106;52;84", "wc_summary_review": "38;25;24;60", "wc_review": "390;323;302;353", "wc_reply_reviewers": "0;47;31;0", "wc_reply_authors": "556;431;437;350", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 49.0, 19.836834424877374 ], "wc_strength_and_weaknesses_avg": [ 178.0, 46.07059799915777 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 78.25, 19.651653874419832 ], "wc_summary_review_avg": [ 36.75, 14.515078366994786 ], "wc_review_avg": [ 342.0, 33.1134413795969 ], "wc_reply_reviewers_avg": [ 19.5, 20.303940504246953 ], "wc_reply_authors_avg": [ 443.5, 73.47958900266114 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5222329678670935, "corr_recommendation_correctness": -1.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11381615047994652214&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Copenhagen", "aff_unique_dep": "", "aff_unique_url": "https://www.ku.dk", "aff_unique_abbr": "UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Denmark" }, { "title": "ESCHER: Eschewing Importance Sampling in Games by Computing a History Value Function to Estimate Regret", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10948", "id": "35QyoZv8cKO", "poster": "/media/PosterPDFs/ICLR%202023/10948.png?t=1682702356.5652025", "openreview": "https://openreview.net/forum?id=35QyoZv8cKO", "slides": "https://iclr.cc/virtual/2023/poster/10948", "video": "https://iclr.cc/virtual/2023/poster/10948", "author_site": "Stephen McAleer, Gabriele Farina, Marc Lanctot, Tuomas Sandholm", "tldr": "We propose a principled deep CFR algorithm that can scale to large games by removing importance sampling", "abstract": "Recent techniques for approximating Nash equilibria in very large games leverage neural networks to learn approximately optimal policies (strategies). One promis- ing line of research uses neural networks to approximate counterfactual regret minimization (CFR) or its modern variants. DREAM, the only current CFR-based neural method that is model free and therefore scalable to very large games, trains a neural network on an estimated regret target that can have extremely high variance due to an importance sampling term inherited from Monte Carlo CFR (MCCFR). In this paper we propose an unbiased model-free method that does not require any importance sampling. Our method, ESCHER, is principled and is guaranteed to converge to an approximate Nash equilibrium with high probability. We show that the variance of the estimated regret of ESCHER is orders of magnitude lower than DREAM and other baselines. We then show that ESCHER outperforms the prior state of the art\u2014DREAM and neural fictitious self play (NFSP)\u2014on a number of games and the difference becomes dramatic as game size increases. In the very large game of dark chess, ESCHER is able to beat DREAM and NFSP in a head-to-head competition over 90% of the time.", "keywords": "game theory;two-player zero-sum;CFR;Reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/911b5caa5b661b0dcdaabe7c87af462cc4eae1c9.zip", "author": "Stephen Marcus McAleer;Gabriele Farina;Marc Lanctot;Tuomas Sandholm", "authorids": "~Stephen_Marcus_McAleer1;~Gabriele_Farina1;~Marc_Lanctot1;~Tuomas_Sandholm1", "gender": "M;M;M;M", "homepage": "https://www.andrew.cmu.edu/user/smcaleer/;http://www.cs.cmu.edu/~gfarina/about/;http://mlanctot.info;http://www.cs.cmu.edu/~sandholm", "dblp": ";;64/10094.html;s/TuomasSandholm", "google_scholar": "iEFL4-YAAAAJ;sktDNcEAAAAJ;E_oZZj8AAAAJ;0DpK1EMAAAAJ", "orcid": ";;;", "linkedin": "stephen-mcaleer/;;;", "or_profile": "~Stephen_Marcus_McAleer1;~Gabriele_Farina1;~Marc_Lanctot1;~Tuomas_Sandholm1", "aff": "Carnegie Mellon University;FAIR, Meta AI;Google DeepMind;Carnegie Mellon University", "aff_domain": "cmu.edu;meta.com;deepmind.com;cmu.edu", "position": "Postdoc;Researcher;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nmcaleer2023escher,\ntitle={{ESCHER}: Eschewing Importance Sampling in Games by Computing a History Value Function to Estimate Regret},\nauthor={Stephen Marcus McAleer and Gabriele Farina and Marc Lanctot and Tuomas Sandholm},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=35QyoZv8cKO}\n}", "github": "", "project": "", "reviewers": "FHV8;yTxH;VgYx", "pdf_size": 679738, "recommendation": "6;6;6", "confidence": "4;3;3", "correctness": "3;4;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "19;158;90", "wc_strength_and_weaknesses": "235;16;237", "wc_clarity_quality_novelty_and_reproducibility": "18;649;1", "wc_summary_review": "47;2;2", "wc_review": "319;825;330", "wc_reply_reviewers": "562;83;39", "wc_reply_authors": "1354;354;391", "reply_reviewers": "3;1;1", "reply_authors": "4;2;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 89.0, 56.75091776056724 ], "wc_strength_and_weaknesses_avg": [ 162.66666666666666, 103.71220864595557 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 222.66666666666666, 301.54306860251694 ], "wc_summary_review_avg": [ 17.0, 21.213203435596427 ], "wc_review_avg": [ 491.3333333333333, 235.98069600899325 ], "wc_reply_reviewers_avg": [ 228.0, 236.8557929767956 ], "wc_reply_authors_avg": [ 699.6666666666666, 462.93004031086843 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5880894236757472962&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=35QyoZv8cKO", "email": "cmu.edu;meta.com;deepmind.com;cmu.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Carnegie Mellon University;Meta;Google", "aff_unique_dep": ";Meta AI;Google DeepMind", "aff_unique_url": "https://www.cmu.edu;https://meta.ai;https://deepmind.com", "aff_unique_abbr": "CMU;Meta AI;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "36g8Ept_CCj", "title": "Learning Mixture Models with Simultaneous Data Partitioning and Parameter Estimation", "track": "main", "status": "Reject", "tldr": "PRESTO learns a mixture models such that each model performs well on a data partition", "abstract": "We study a new framework of learning mixture models via data partitioning called PRESTO, wherein we optimize a joint objective function on the model parameters and the partitioning, with each model tailored to perform well on its specific partition. We connect PRESTO to a number of past works in data partitioning, mixture models, and clustering, and show that PRESTO generalizes several loss functions including the k-means and Bregman clustering objective, the Gaussian mixture model objective, mixtures of support vector machines, and mixtures of linear regression. We then propose a new joint discrete-continuous optimization algorithm which achieves a bounded approximation guarantee for any general loss function, thereby achieving guarantees for the afore-mentioned problems as well. We study PRESTO in the context of resource efficient deep learning, where we train smaller resource constrained models on each partition and show that it outperforms existing data partitioning and model pruning/knowledge distillation approaches, which in contrast to PRESTO, require large initial (teacher) models.", "keywords": "mixture models;resource constrained learning", "primary_area": "", "supplementary_material": "/attachment/b019984183aee652c7537bf3b42bda1977f2b4cf.zip", "author": "Parth Vipul Sangani;Arjun Shashank Kashettiwar;Durga S;Ganesh Ramakrishnan;Rishabh K Iyer;Abir De", "authorids": "~Parth_Vipul_Sangani1;~Arjun_Shashank_Kashettiwar1;~Durga_S1;~Ganesh_Ramakrishnan1;~Rishabh_K_Iyer2;~Abir_De1", "gender": "M;M;F;M;M;M", "homepage": ";;;https://www.cse.iitb.ac.in/~ganesh/;https://www.rishiyer.com;", "dblp": ";;;r/GaneshRamakrishnan;37/10544.html;118/7174", "google_scholar": ";;4JXFWTwAAAAJ;https://scholar.google.com/scholar?hl=hi;l_XxJ1kAAAAJ;https://scholar.google.co.in/citations?user=_9ZKKbIAAAAJ", "orcid": ";;;;;", "linkedin": "parth-sangani-b34b59165;arjun-kashettiwar-05748a173;;;rishabh-iyer-36893717/;", "or_profile": "~Parth_Vipul_Sangani1;~Arjun_Shashank_Kashettiwar1;~Durga_S1;~Ganesh_Ramakrishnan1;~Rishabh_K_Iyer2;~Abir_De1", "aff": ";;Indian Institute of Technology Bombay;Indian Institute of Technology Bombay, Indian Institute of Technology Bombay;Microsoft;Indian Institute of Technology Bombay,", "aff_domain": ";;iitb.ac.in;cse.iitb.ac.in;microsoft.com;iitb.ac.in", "position": ";;PhD student;Full Professor;Research Scientist;Assistant Professor", "bibtex": "@misc{\nsangani2023learning,\ntitle={Learning Mixture Models with Simultaneous Data Partitioning and Parameter Estimation},\nauthor={Parth Vipul Sangani and Arjun Shashank Kashettiwar and Durga S and Ganesh Ramakrishnan and Rishabh K Iyer and Abir De},\nyear={2023},\nurl={https://openreview.net/forum?id=36g8Ept_CCj}\n}", "github": "", "project": "", "reviewers": "jcju;f3vu;dXtq", "site": "https://openreview.net/forum?id=36g8Ept_CCj", "pdf_size": 634739, "recommendation": "5;5;6", "confidence": "4;4;4", "correctness": "2;3;4", "technical_novelty": "3;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "75;45;128", "wc_strength_and_weaknesses": "258;107;154", "wc_clarity_quality_novelty_and_reproducibility": "36;53;445", "wc_summary_review": "33;273;21", "wc_review": "402;478;748", "wc_reply_reviewers": "134;198;115", "wc_reply_authors": "867;2106;1116", "reply_reviewers": "1;2;1", "reply_authors": "2;3;2", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 82.66666666666667, 34.315529364349835 ], "wc_strength_and_weaknesses_avg": [ 173.0, 63.09252464964979 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 178.0, 188.9250292223523 ], "wc_summary_review_avg": [ 109.0, 116.06894502837527 ], "wc_review_avg": [ 542.6666666666666, 148.47072289025726 ], "wc_reply_reviewers_avg": [ 149.0, 35.505868059613285 ], "wc_reply_authors_avg": [ 1363.0, 535.1242846292812 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:d04U2kFiAeUJ:scholar.google.com/&scioq=Learning+Mixture+Models+with+Simultaneous+Data+Partitioning+and+Parameter+Estimation&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Indian Institute of Technology Bombay;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.iitb.ac.in;https://www.microsoft.com", "aff_unique_abbr": "IIT Bombay;Microsoft", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Bombay;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "India;United States" }, { "id": "383GRAoNhzb", "title": "Examining the Difference Among Transformers and CNNs with Explanation Methods", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We propose a methodology that systematically applies deep explanation algorithms on a dataset-wide basis, to compare different types of visual recognition backbones, such as convolutional networks (CNNs), global attention networks, and local attention networks. We examine both qualitative visualizations and quantitative statistics across the dataset, in order to generate intuitions that are not just anecdotal, but are supported by the statistics computed on the whole dataset. Specifically, we propose two methods. The first one, sub-explanation counting, systematically searches for minimally-sufficient explanations of all images and count the amount of sub-explanations for each network. The second one, called cross-testing, computes salient regions using one network and then evaluates the performance by only showing these regions as an image to other networks. Through a combination of qualitative insights and quantitative statistics, we illustrate that 1) there are significant differences between the salient features of CNNs and attention models; 2) the occlusion-robustness in local attention models and global attention models may come from different decision-making mechanisms.", "keywords": "Explanation;transformers;multiple explanations", "primary_area": "", "supplementary_material": "", "author": "Mingqi Jiang;Saeed Khorram;Li Fuxin", "authorids": "~Mingqi_Jiang1;~Saeed_Khorram1;~Li_Fuxin1", "gender": "M;;", "homepage": ";;", "dblp": ";;", "google_scholar": "https://scholar.google.com/citations?hl=en;;", "orcid": ";;", "linkedin": "mingqi-jiang-23a10423a/;;", "or_profile": "~Mingqi_Jiang1;~Saeed_Khorram1;~Li_Fuxin1", "aff": "Oregon State University;;", "aff_domain": "oregonstate.edu;;", "position": "MS student;;", "bibtex": "@misc{\njiang2023examining,\ntitle={Examining the Difference Among Transformers and {CNN}s with Explanation Methods},\nauthor={Mingqi Jiang and Saeed Khorram and Li Fuxin},\nyear={2023},\nurl={https://openreview.net/forum?id=383GRAoNhzb}\n}", "github": "", "project": "", "reviewers": "D7k6;kg3f;soHZ;ajTX", "site": "https://openreview.net/forum?id=383GRAoNhzb", "pdf_size": 21259255, "recommendation": "1;3;5;5", "confidence": "5;4;4;3", "correctness": "2;1;4;2", "technical_novelty": "1;2;4;2", "empirical_novelty": "1;2;3;3", "wc_summary_paper": "29;259;156;131", "wc_strength_and_weaknesses": "32;300;110;65", "wc_clarity_quality_novelty_and_reproducibility": "25;81;363;6", "wc_summary_review": "19;93;39;5", "wc_review": "105;733;668;207", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;391;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;1;0;0", "recommendation_avg": [ 3.5, 1.6583123951777 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.25, 1.0897247358851685 ], "technical_novelty_avg": [ 2.25, 1.0897247358851685 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 143.75, 81.79662279091967 ], "wc_strength_and_weaknesses_avg": [ 126.75, 103.78674048258766 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 118.75, 143.68781263558856 ], "wc_summary_review_avg": [ 39.0, 33.436506994600975 ], "wc_review_avg": [ 428.25, 275.5878943277444 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 97.75, 169.30796643985775 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.25, 0.4330127018922193 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.8528028654224417, "corr_recommendation_correctness": 0.48420012470625223, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7564932876882455448&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Oregon State University", "aff_unique_dep": "", "aff_unique_url": "https://oregonstate.edu", "aff_unique_abbr": "OSU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Federated Neural Bandits", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10831", "id": "38m4h8HcNRL", "poster": "", "openreview": "https://openreview.net/forum?id=38m4h8HcNRL", "slides": "https://iclr.cc/virtual/2023/poster/10831", "video": "https://iclr.cc/virtual/2023/poster/10831", "author_site": "Zhongxiang Dai, Yao Shu, Arun Verma, Flint Xiaofeng Fan, Bryan Kian Hsiang Low, Patrick Jaillet", "tldr": "We introduce federated neural-UCB, which uses a weighted combination of two UCBs that respectively, (a) accelerates exploration using observations from other agents and (b) improves reward prediction using a neural network with aggregated parameters.", "abstract": "Recent works on neural contextual bandits have achieved compelling performances due to their ability to leverage the strong representation power of neural networks (NNs) for reward prediction. Many applications of contextual bandits involve multiple agents who collaborate without sharing raw observations, thus giving rise to the setting of federated contextual bandits}. Existing works on federated contextual bandits rely on linear or kernelized bandits, which may fall short when modeling complex real-world reward functions. So, this paper introduces the federated neural-upper confidence bound (FN-UCB) algorithm. To better exploit the federated setting, FN-UCB adopts a weighted combination of two UCBs: $\\text{UCB}^{a}$ allows every agent to additionally use the observations from the other agents to accelerate exploration (without sharing raw observations), while $\\text{UCB}^{b}$ uses an NN with aggregated parameters for reward prediction in a similar way to federated averaging for supervised learning. Notably, the weight between the two UCBs required by our theoretical analysis is amenable to an interesting interpretation, which emphasizes $\\text{UCB}^{a}$ initially for accelerated exploration and relies more on $\\text{UCB}^{b}$ later after enough observations have been collected to train the NNs for accurate reward prediction (i.e., reliable exploitation). We prove sub-linear upper bounds on both the cumulative regret and the number of communication rounds of FN-UCB, and empirically demonstrate its competitive performance.", "keywords": "neural contextual bandits;federated bandits", "primary_area": "", "supplementary_material": "/attachment/26c2f63b553af2990599975e7a07a5406b4c9d3c.zip", "author": "Zhongxiang Dai;Yao Shu;Arun Verma;Flint Xiaofeng Fan;Bryan Kian Hsiang Low;Patrick Jaillet", "authorids": "~Zhongxiang_Dai1;~Yao_Shu1;~Arun_Verma1;~Flint_Xiaofeng_Fan1;~Bryan_Kian_Hsiang_Low1;~Patrick_Jaillet1", "gender": "M;M;M;M;M;M", "homepage": "https://daizhongxiang.github.io/;https://yao.notion.site;https://arunv3rma.github.io/;http://www.comp.nus.edu.sg/~lowkh;http://web.mit.edu/jaillet/www/;https://flint-xf-fan.github.io/", "dblp": "172/4968;44/1338;28/3688;97/4877;https://dblp.uni-trier.de/pers/hd/j/Jaillet:Patrick;304/8793", "google_scholar": "1v8xOIYAAAAJ;https://scholar.google.com.au/citations?hl=en;https://scholar.google.co.in/citations?user=tBcixlUAAAAJ;https://scholar.google.com.tw/citations?user=2P-Q09UAAAAJ;ND0FM6EAAAAJ;https://scholar.google.com.sg/citations?user=MfU2wj8AAAAJ", "orcid": ";;;;0000-0002-8585-6566;0000-0003-1658-4699", "linkedin": ";yao-shu-a5640514b;;;patrick-jaillet-1260445/;flintxffan/", "or_profile": "~Zhongxiang_Dai1;~Yao_Shu1;~Arun_Verma1;~Bryan_Kian_Hsiang_Low1;~Patrick_Jaillet1;~Xiaofeng_Fan1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore;Massachusetts Institute of Technology;ETHZ - ETH Zurich", "aff_domain": "nus.edu.sg;nus.edu.sg;nus.edu.sg;nus.edu.sg;mit.edu;ethz.ch", "position": "Postdoc;Postdoc;Postdoc;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\ndai2023federated,\ntitle={Federated Neural Bandits},\nauthor={Zhongxiang Dai and Yao Shu and Arun Verma and Flint Xiaofeng Fan and Bryan Kian Hsiang Low and Patrick Jaillet},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=38m4h8HcNRL}\n}", "github": "", "project": "", "reviewers": "H38q;D1SJ;oNap;cGE9;f21Q", "pdf_size": 8440488, "recommendation": "6;6;6;6;8", "confidence": "3;3;3;4;3", "correctness": "4;3;4;3;4", "technical_novelty": "2;3;3;3;3", "empirical_novelty": "2;2;4;3;4", "wc_summary_paper": "84;61;48;38;55", "wc_strength_and_weaknesses": "242;216;222;162;103", "wc_clarity_quality_novelty_and_reproducibility": "23;16;49;44;32", "wc_summary_review": "82;19;29;44;41", "wc_review": "431;312;348;288;231", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "correctness_avg": [ 3.6, 0.4898979485566356 ], "technical_novelty_avg": [ 2.8, 0.39999999999999997 ], "empirical_novelty_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_paper_avg": [ 57.2, 15.43243337908834 ], "wc_strength_and_weaknesses_avg": [ 189.0, 50.50148512667722 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.8, 12.383860464330176 ], "wc_summary_review_avg": [ 43.0, 21.438283513378586 ], "wc_review_avg": [ 322.0, 66.47405508918499 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.25000000000000006, "corr_recommendation_correctness": 0.408248290463863, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6981951943654532094&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=38m4h8HcNRL", "email": "nus.edu.sg;nus.edu.sg;nus.edu.sg;nus.edu.sg;mit.edu;ethz.ch", "author_num": 6, "aff_unique_index": "0;0;0;0;1;2", "aff_unique_norm": "National University of Singapore;Massachusetts Institute of Technology;ETH Zurich", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://web.mit.edu;https://www.ethz.ch", "aff_unique_abbr": "NUS;MIT;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;2", "aff_country_unique": "Singapore;United States;Switzerland" }, { "id": "39cMBLyo_ia", "title": "Push and Pull: Competing Feature-Prototype Interactions Improve Semi-supervised Semantic Segmentation", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper challenges semi-supervised segmentation with a rethink on the feature-prototype interaction in the classification head. Specifically, we view each weight vector in the classification head as the prototype of a semantic category. The basic practice in the softmax classifier is to pull a feature towards its positive prototype (i.e., the prototype of its class), as well as to push it away from its negative prototypes. In this paper, we focus on the interaction between the feature and its negative prototypes, which is always \u201cpushing\u201d to make them dissimilar. While the pushing-away interaction is necessary, this paper reveals a new mechanism that the contrary interaction of pulling close negative prototypes is also beneficial. We have two insights for this counter-intuitive interaction: 1) some pseudo negative prototypes might actually be positive so that the pulling interaction can help resisting the pseudo-label noises, and 2) some true negative prototypes might contain contextual information that is beneficial. Therefore, we integrate these two competing interactions into a Push-and-Pull Learning (PPL) method. On the one hand, PPL introduces the novel pulling-close interaction between features and negative prototypes with a feature-to-prototype attention. On the other hand, PPL reinforces the original pushing-away interaction with a multi-prototype contrastive learning. While PPL is very simple, experiments show that it substantially improves semi-supervised segmentation and sets a new state of the art.", "keywords": "Semi-supervised;Segmentation;Competing Interactions;Classifier Prototype", "primary_area": "", "supplementary_material": "", "author": "Yuhang Ding;Yifan Sun;Yi Yang", "authorids": "~Yuhang_Ding1;~Yifan_Sun2;~Yi_Yang22", "gender": "M;M;M", "homepage": ";https://yifansun-reid.github.io;https://person.zju.edu.cn/yiyang", "dblp": "244/9493;99/10261-3.html;33/4854-1.html", "google_scholar": "2zbnTq8AAAAJ;uUZEL7UAAAAJ;RMSuNFwAAAAJ", "orcid": ";0000-0003-3532-6521;", "linkedin": ";;", "or_profile": "~Yuhang_Ding1;~Yifan_Sun2;~Yi_Yang22", "aff": "University of Technology Sydney;Baidu;Zhejiang University", "aff_domain": "uts.edu.au;baidu.com;zju.edu.cn", "position": "PhD student;Senior Expert;Full Professor", "bibtex": "@misc{\nding2023push,\ntitle={Push and Pull: Competing Feature-Prototype Interactions Improve Semi-supervised Semantic Segmentation},\nauthor={Yuhang Ding and Yifan Sun and Yi Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=39cMBLyo_ia}\n}", "github": "", "project": "", "reviewers": "ewoj;gCrj;xCik;CAt6", "site": "https://openreview.net/forum?id=39cMBLyo_ia", "pdf_size": 1915147, "recommendation": "5;5;5;6", "confidence": "4;3;5;4", "correctness": "4;4;3;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;2;2;0", "wc_summary_paper": "101;97;119;110", "wc_strength_and_weaknesses": "270;175;378;325", "wc_clarity_quality_novelty_and_reproducibility": "124;12;19;10", "wc_summary_review": "90;34;52;32", "wc_review": "585;318;568;477", "wc_reply_reviewers": "63;0;0;30", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 106.75, 8.496322733983215 ], "wc_strength_and_weaknesses_avg": [ 287.0, 75.09660445053424 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.25, 47.89245765253648 ], "wc_summary_review_avg": [ 52.0, 23.280893453645632 ], "wc_review_avg": [ 487.0, 105.8607576016722 ], "wc_reply_reviewers_avg": [ 23.25, 26.013217794036937 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:l0ZrbZv_P-gJ:scholar.google.com/&scioq=Push+and+Pull:+Competing+Feature-Prototype+Interactions+Improve+Semi-supervised+Semantic+Segmentation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Technology Sydney;Baidu;Zhejiang University", "aff_unique_dep": ";Baidu, Inc.;", "aff_unique_url": "https://www.uts.edu.au;https://www.baidu.com;https://www.zju.edu.cn", "aff_unique_abbr": "UTS;Baidu;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Australia;China" }, { "title": "Don\u2019t forget the nullspace! Nullspace occupancy as a mechanism for out of distribution failure", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11035", "id": "39z0zPZ0AvB", "poster": "", "openreview": "https://openreview.net/forum?id=39z0zPZ0AvB", "slides": "https://iclr.cc/virtual/2023/poster/11035", "video": "https://iclr.cc/virtual/2023/poster/11035", "author_site": "Daksh Idnani, Vivek Madan, Naman Goyal, David J Schwab, Shanmukha Ramakrishna Vedantam", "tldr": "", "abstract": "Out of distribution (OoD) generalization has received considerable interest in recent years. In this work, we identify a particular failure mode of OoD generalization for discriminative classifiers that is based on test data (from a new domain) lying in the nullspace of features learnt from source data. We demonstrate the existence of this failure mode across multiple networks trained across RotatedMNIST, PACS, TerraIncognita, DomainNet and ImageNet-R datasets. We then study different choices for characterizing the feature space and show that projecting intermediate representations onto the span of directions that obtain maximum training accuracy provides consistent improvements in OoD performance. Finally, we show that such nullspace behavior also provides an insight into neural networks trained on poisoned data. We hope our work galvanizes interest in the relationship between the nullspace occupancy failure mode and generalization.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Daksh Idnani;Vivek Madan;Naman Goyal;David J. Schwab;Shanmukha Ramakrishna Vedantam", "authorids": "~Daksh_Idnani1;~Vivek_Madan2;~Naman_Goyal1;~David_J._Schwab1;~Shanmukha_Ramakrishna_Vedantam1", "gender": "M;M;M;M;M", "homepage": ";;;;http://vrama91.github.io", "dblp": "276/0050;52/11466.html;183/1418;153/1725;154/6748.html", "google_scholar": ";;CRbM_P4AAAAJ;xRtvC50AAAAJ;v1CRzeAAAAAJ", "orcid": ";;;;", "linkedin": "dakshidnani/;;ngoyal2707/;;", "or_profile": "~Daksh_Idnani1;~Vivek_Madan2;~Naman_Goyal1;~David_J._Schwab1;~Shanmukha_Ramakrishna_Vedantam1", "aff": ";Amazon;;CUNY Graduate Center;Meta Facebook", "aff_domain": ";amazon.com;;cuny.edu;fb.com", "position": ";Scientist;;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nidnani2023dont,\ntitle={Don{\\textquoteright}t forget the nullspace! Nullspace occupancy as a mechanism for out of distribution failure},\nauthor={Daksh Idnani and Vivek Madan and Naman Goyal and David J. Schwab and Shanmukha Ramakrishna Vedantam},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=39z0zPZ0AvB}\n}", "github": "", "project": "", "reviewers": "k2wc;tTTa;L6zm;mtmw", "pdf_size": 973650, "recommendation": "5;5;5;8", "confidence": "4;4;3;3", "correctness": "3;3;3;4", "technical_novelty": "3;3;3;4", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "109;64;51;236", "wc_strength_and_weaknesses": "457;107;186;390", "wc_clarity_quality_novelty_and_reproducibility": "80;11;22;64", "wc_summary_review": "50;28;39;50", "wc_review": "696;210;298;740", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "898;317;682;705", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 115.0, 73.09924760214705 ], "wc_strength_and_weaknesses_avg": [ 285.0, 143.26025268719863 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.25, 28.586491565073178 ], "wc_summary_review_avg": [ 41.75, 9.12071817347735 ], "wc_review_avg": [ 486.0, 234.59326503546515 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 650.5, 210.0244033439924 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17319253489734839790&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=39z0zPZ0AvB", "email": ";amazon.com;;cuny.edu;fb.com", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Amazon;City University of New York;Meta", "aff_unique_dep": "Amazon.com, Inc.;;Meta Platforms, Inc.", "aff_unique_url": "https://www.amazon.com;https://www.gc.cuny.edu;https://meta.com", "aff_unique_abbr": "Amazon;CUNY GC;Meta", "aff_campus_unique_index": "1", "aff_campus_unique": ";Graduate Center", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "3BOwNcqM_Wq", "title": "Learning to reason with relational abstractions", "track": "main", "status": "Reject", "tldr": "Sequences with abstract relations can help models solve mathematical reasoning tasks with a significantly higher accuracy compared to those that are trained with human-generated sequences and other baselines.", "abstract": "Large language models have recently shown promising progress in mathematical reasoning when fine-tuned with human-generated sequences walking through a sequence of solution steps. However, the solution sequences are not formally structured and the resulting model-generated sequences may not reflect the kind of systematic reasoning we might expect an expert human to produce.\nIn this paper, we study how to build stronger reasoning capability in language models using the idea of relational abstractions. We introduce new types of sequences that more explicitly provide an abstract characterization of the transitions through intermediate solution steps to the goal state. We found that models that are supplied with such sequences as prompts can solve tasks with a significantly higher accuracy, and models that are trained to produce such sequences solve problems better than those that are trained with previously used human-generated sequences and other baselines. Our work thus takes several steps toward elucidating and improving how language models perform on tasks requiring multi-step mathematical reasoning.", "keywords": "mathematical reasoning;language models;relational abstraction", "primary_area": "", "supplementary_material": "", "author": "Andrew Joohun Nam;Mengye Ren;Chelsea Finn;James Lloyd McClelland", "authorids": "~Andrew_Joohun_Nam1;~Mengye_Ren1;~Chelsea_Finn1;~James_Lloyd_McClelland1", "gender": "M;;F;M", "homepage": ";http://www.cs.toronto.edu/~mren;https://ai.stanford.edu/~cbfinn/;https://web.stanford.edu/~jlmcc/", "dblp": ";163/1952;131/1783;49/5831", "google_scholar": ";XcQ9WqMAAAAJ;vfPE6hgAAAAJ;ht_psVIAAAAJ", "orcid": "0000-0001-9860-4221;;;0000-0002-8217-405X", "linkedin": ";;;", "or_profile": "~Andrew_Joohun_Nam1;~Mengye_Ren1;~Chelsea_Finn1;~James_McClelland1", "aff": "Stanford University;New York University;Google;Stanford University", "aff_domain": "stanford.edu;nyu.edu;google.com;stanford.edu", "position": "PhD student;Assistant Professor;Research Scientist;Full Professor", "bibtex": "@misc{\nnam2023learning,\ntitle={Learning to reason with relational abstractions},\nauthor={Andrew Joohun Nam and Mengye Ren and Chelsea Finn and James Lloyd McClelland},\nyear={2023},\nurl={https://openreview.net/forum?id=3BOwNcqM_Wq}\n}", "github": "", "project": "", "reviewers": "XGS4;YJ77;wEws;cUHL", "site": "https://openreview.net/forum?id=3BOwNcqM_Wq", "pdf_size": 503664, "recommendation": "3;3;6;6", "confidence": "2;3;4;4", "correctness": "3;3;4;3", "technical_novelty": "2;1;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "110;102;148;35", "wc_strength_and_weaknesses": "107;117;231;112", "wc_clarity_quality_novelty_and_reproducibility": "14;14;159;14", "wc_summary_review": "9;47;53;9", "wc_review": "240;280;591;170", "wc_reply_reviewers": "0;0;230;0", "wc_reply_authors": "432;908;1412;411", "reply_reviewers": "0;0;1;0", "reply_authors": "1;2;3;1", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 98.75, 40.702426217610174 ], "wc_strength_and_weaknesses_avg": [ 141.75, 51.64966117991482 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.25, 62.7868417743718 ], "wc_summary_review_avg": [ 29.5, 20.60946384552495 ], "wc_review_avg": [ 320.25, 161.19921680951182 ], "wc_reply_reviewers_avg": [ 57.5, 99.59292143521044 ], "wc_reply_authors_avg": [ 790.75, 410.064248014869 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.9045340337332909, "corr_recommendation_correctness": 0.5773502691896258, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17825368019297475491&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Stanford University;New York University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.stanford.edu;https://www.nyu.edu;https://www.google.com", "aff_unique_abbr": "Stanford;NYU;Google", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Stanford;;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Hyperbolic Self-paced Learning for Self-supervised Skeleton-based Action Representations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11632", "id": "3Bh6sRPKS3J", "poster": "", "openreview": "https://openreview.net/forum?id=3Bh6sRPKS3J", "slides": "https://iclr.cc/virtual/2023/poster/11632", "video": "https://iclr.cc/virtual/2023/poster/11632", "author_site": "Luca Franco, Paolo Mandica, Bharti Munjal, Fabio Galasso", "tldr": "", "abstract": "Self-paced learning has been beneficial for tasks where some initial knowledge is available, such as weakly supervised learning and domain adaptation, to select and order the training sample sequence, from easy to complex. However its applicability remains unexplored in unsupervised learning, whereby the knowledge of the task matures during training.\nWe propose a novel HYperbolic Self-Paced model (HYSP) for learning skeletonbased action representations. HYSP adopts self-supervision: it uses data augmentations to generate two views of the same sample, and it learns by matching one (named online) to the other (the target). We propose to use hyperbolic uncertainty to determine the algorithmic learning pace, under the assumption that less uncertain samples should be more strongly driving the training, with a larger weight and pace. Hyperbolic uncertainty is a by-product of the adopted hyperbolic neural networks, it matures during training and it comes with no extra cost, compared to the established Euclidean SSL framework counterparts.\nWhen tested on three established skeleton-based action recognition datasets, HYSP outperforms the state-of-the-art on PKU-MMD I, as well as on 2 out of 3 downstream tasks on NTU-60 and NTU-120. Additionally, HYSP only uses positive pairs and bypasses therefore the complex and computationally-demanding mining procedures required for the negatives in contrastive techniques.\nCode is available at https://github.com/paolomandica/HYSP.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Luca Franco;Paolo Mandica;Bharti Munjal;Fabio Galasso", "authorids": "~Luca_Franco1;~Paolo_Mandica1;~Bharti_Munjal2;~Fabio_Galasso1", "gender": "M;M;F;M", "homepage": "https://fraluca.github.io/;https://paolomandica.github.io/;http://campar.in.tum.de/Main/BhartiMunjal;https://fgalasso.bitbucket.io/", "dblp": "304/2582;342/3996;;48/3897", "google_scholar": "https://scholar.google.com/citations?hl=it;https://scholar.google.com/citations?hl=en;https://scholar.google.de/citations?user=APfNTAcAAAAJ;https://scholar.google.de/citations?user=2gSuGBEAAAAJ", "orcid": "0000-0003-0107-6755;0000-0002-4493-2497;;0000-0003-1875-7813", "linkedin": "luca-franco-968819196/;paolo-mandica/;;fabio-galasso-61141b32/", "or_profile": "~Luca_Franco1;~Paolo_Mandica1;~Bharti_Munjal2;~Fabio_Galasso1", "aff": "Panasonic;Panasonic;Department of Informatics, Technical University Munich;University of Roma \"La Sapienza\"", "aff_domain": "us.panasonic.com;us.panasonic.com;in.tum.de;uniroma1.it", "position": "Intern;Intern;PhD student;Associate Professor", "bibtex": "@inproceedings{\nfranco2023hyperbolic,\ntitle={Hyperbolic Self-paced Learning for Self-supervised Skeleton-based Action Representations},\nauthor={Luca Franco and Paolo Mandica and Bharti Munjal and Fabio Galasso},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3Bh6sRPKS3J}\n}", "github": "", "project": "", "reviewers": "GGZs;V8KY;R1Eg;Dsnu", "pdf_size": 1031579, "recommendation": "6;6;6;8", "confidence": "3;5;4;3", "correctness": "3;3;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "63;32;64;109", "wc_strength_and_weaknesses": "314;181;411;217", "wc_clarity_quality_novelty_and_reproducibility": "41;32;40;58", "wc_summary_review": "59;31;34;26", "wc_review": "477;276;549;410", "wc_reply_reviewers": "0;0;103;17", "wc_reply_authors": "618;563;1131;309", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;3;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 67.0, 27.44995446262161 ], "wc_strength_and_weaknesses_avg": [ 280.75, 89.5610825079733 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.75, 9.470348462437906 ], "wc_summary_review_avg": [ 37.5, 12.737739202856996 ], "wc_review_avg": [ 428.0, 100.58578428386389 ], "wc_reply_reviewers_avg": [ 30.0, 42.714166268347086 ], "wc_reply_authors_avg": [ 655.25, 298.38094359392323 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5222329678670935, "corr_recommendation_correctness": 1.0, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9839479658288128047&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=3Bh6sRPKS3J", "email": "us.panasonic.com;us.panasonic.com;in.tum.de;uniroma1.it", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Panasonic Corporation;Technical University Munich;University of Rome La Sapienza", "aff_unique_dep": ";Department of Informatics;", "aff_unique_url": "https://www.panasonic.com;https://www.tum.de;https://www.uniroma1.it", "aff_unique_abbr": "Panasonic;TUM;La Sapienza", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Munich;Rome", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "Japan;Germany;Italy" }, { "id": "3C9Eqd0hCrr", "title": "Beyond Deep Learning: An Evolutionary Feature Engineering Approach to Tabular Data Classification", "track": "main", "status": "Reject", "tldr": "", "abstract": "In recent years, deep learning has achieved impressive performance in the computer vision and natural language processing domains. In the tabular data classification scenario, with the emergence of the transformer architecture, a number of algorithms have been reported to yield better results than conventional tree-based models. Most of these methods attribute the success of deep learning methods to the expressive feature construction capability of neural networks. Nonetheless, in real practice, manually designed high-order features with traditional machine learning methods are still widely used because neural-network-based features can be easy to over-fitting. In this paper, we propose an evolution-based feature engineering algorithm to imitate the manual feature construction process through trial and improvement. Importantly, the evolutionary method provides an opportunity to optimize cross-validation loss, where gradient methods fail to do so. On a large-scale classification benchmark of 119 datasets, the experimental results demonstrate that the proposed method outperforms existing fine-tuned state-of-the-art tree-based and deep-learning-based classification algorithms.", "keywords": "Automated Feature Construction;Automated Machine Learning;Genetic Programming;Evolutionary Algorithm", "primary_area": "", "supplementary_material": "/attachment/ec1323306899819c37eba94cb66501ffc617685e.zip", "author": "Hengzhe Zhang;Qi Chen;Aimin Zhou;bing xue;Yan Wang;mengjie zhang", "authorids": "~Hengzhe_Zhang1;qi.chen@vuw.ac.nz;~Aimin_Zhou1;bing.xue@vuw.ac.nz;~Yan_Wang16;mengjie.zhang@vuw.ac.nz", "gender": "M;;;;M;", "homepage": "https://github.com/hengzhe-zhang;;;;http://web.science.mq.edu.au/~yanwang/;", "dblp": ";;;;59/2227-2;", "google_scholar": ";;;;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;0000-0002-5344-1884;", "linkedin": ";;;;yan-wang-967884/;", "or_profile": "~Hengzhe_Zhang1;qi.chen@vuw.ac.nz;~Aimin_Zhou1;bing.xue@vuw.ac.nz;~Yan_Wang16;mengjie.zhang@vuw.ac.nz", "aff": "Victoria University of Wellington;;;;Macquarie University;", "aff_domain": "vuw.ac.nz;;;;mq.edu.au;", "position": "PhD student;;;;Full Professor;", "bibtex": "@misc{\nzhang2023beyond,\ntitle={Beyond Deep Learning: An Evolutionary Feature Engineering Approach to Tabular Data Classification},\nauthor={Hengzhe Zhang and Qi Chen and Aimin Zhou and bing xue and Yan Wang and mengjie zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=3C9Eqd0hCrr}\n}", "github": "", "project": "", "reviewers": "bBog;LFDt;8pAi", "site": "https://openreview.net/forum?id=3C9Eqd0hCrr", "pdf_size": 346846, "recommendation": "3;6;6", "confidence": "3;3;4", "correctness": "3;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "31;174;47", "wc_strength_and_weaknesses": "170;116;99", "wc_clarity_quality_novelty_and_reproducibility": "11;118;11", "wc_summary_review": "21;94;26", "wc_review": "233;502;183", "wc_reply_reviewers": "0;14;19", "wc_reply_authors": "889;460;865", "reply_reviewers": "0;1;1", "reply_authors": "3;2;2", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 84.0, 63.97395303298575 ], "wc_strength_and_weaknesses_avg": [ 128.33333333333334, 30.26916289265731 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.666666666666664, 50.440283724640395 ], "wc_summary_review_avg": [ 47.0, 33.29664647778612 ], "wc_review_avg": [ 306.0, 140.08806753848333 ], "wc_reply_reviewers_avg": [ 11.0, 8.04155872120988 ], "wc_reply_authors_avg": [ 738.0, 196.8197144597055 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9339611576111447287&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Victoria University of Wellington;Macquarie University", "aff_unique_dep": ";", "aff_unique_url": "https://www.victoria.ac.nz;https://www.mq.edu.au", "aff_unique_abbr": "VUW;MQ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "New Zealand;Australia" }, { "title": "Spatial Attention Kinetic Networks with E(n)-Equivariance", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11959", "id": "3DIpIf3wQMC", "poster": "/media/PosterPDFs/ICLR%202023/11959.png?t=1682648650.4400866", "openreview": "https://openreview.net/forum?id=3DIpIf3wQMC", "slides": "https://iclr.cc/virtual/2023/poster/11959", "video": "https://iclr.cc/virtual/2023/poster/11959", "author_site": "Yuanqing Wang, John Chodera", "tldr": "Equivariant functional form termed spatial attention uses neurally parametrized linear combinations of edge vectors to equivariantly yet describe node environments ", "abstract": "Neural networks that are equivariant to rotations, translations, reflections, and permutations on $n$-dimensional geometric space have shown promise in physical modeling for tasks such as accurately but inexpensively modeling complex potential energy surfaces to guiding the sampling of complex dynamical systems or forecasting their time evolution.\nCurrent state-of-the-art methods employ spherical harmonics to encode higher-order interactions among particles, which are computationally expensive.\nIn this paper, we propose a simple alternative functional form that uses neurally parametrized linear combinations of edge vectors to achieve equivariance while still universally approximating node environments.\nIncorporating this insight, we design \\emph{spatial attention kinetic networks} with E(n)-equivariance, or SAKE, which are competitive in many-body system modeling tasks while being significantly faster.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yuanqing Wang;John Chodera", "authorids": "~Yuanqing_Wang1;~John_Chodera1", "gender": "M;M", "homepage": "https://wangyq.net;http://choderalab.org", "dblp": "83/7566;", "google_scholar": "Njp5EY4AAAAJ;nnEg7_8AAAAJ", "orcid": ";0000-0003-0542-119X", "linkedin": "yuanqing-wang/;", "or_profile": "~Yuanqing_Wang1;~John_Chodera1", "aff": "Weill Cornell Medicine, Cornell University;Memorial Sloan Kettering Cancer Centre", "aff_domain": "med.cornell.edu;mskcc.org", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nwang2023spatial,\ntitle={Spatial Attention Kinetic Networks with E(n)-Equivariance},\nauthor={Yuanqing Wang and John Chodera},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3DIpIf3wQMC}\n}", "github": "", "project": "", "reviewers": "g5jj;ULzV;YAqW;u1rJ", "pdf_size": 384633, "recommendation": "6;6;6;8", "confidence": "5;4;4;4", "correctness": "3;3;4;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "46;41;49;528", "wc_strength_and_weaknesses": "273;416;571;366", "wc_clarity_quality_novelty_and_reproducibility": "64;56;154;52", "wc_summary_review": "51;34;74;4", "wc_review": "434;547;848;950", "wc_reply_reviewers": "76;17;95;90", "wc_reply_authors": "565;605;426;130", "reply_reviewers": "1;1;2;2", "reply_authors": "5;5;4;3", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 166.0, 209.0203339390692 ], "wc_strength_and_weaknesses_avg": [ 406.5, 107.95022000903936 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 81.5, 42.080280417316615 ], "wc_summary_review_avg": [ 40.75, 25.52817071393875 ], "wc_review_avg": [ 694.75, 211.22189162111013 ], "wc_reply_reviewers_avg": [ 69.5, 31.100643080167973 ], "wc_reply_authors_avg": [ 431.5, 186.3176051799722 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 4.25, 0.82915619758885 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17751034224296524499&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=3DIpIf3wQMC", "email": "med.cornell.edu;mskcc.org", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Cornell University;Memorial Sloan Kettering Cancer Center", "aff_unique_dep": ";", "aff_unique_url": "https://www.weill.cornell.edu;https://www.mskcc.org", "aff_unique_abbr": "Cornell;MSKCC", "aff_campus_unique_index": "0", "aff_campus_unique": "Weill Cornell Medicine;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "HiViT: A Simpler and More Efficient Design of Hierarchical Vision Transformer", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11895", "id": "3F6I-0-57SC", "poster": "", "openreview": "https://openreview.net/forum?id=3F6I-0-57SC", "slides": "https://iclr.cc/virtual/2023/poster/11895", "video": "https://iclr.cc/virtual/2023/poster/11895", "author_site": "Xiaosong Zhang, Yunjie Tian, Lingxi Xie, Wei Huang, Qi Dai, Qixiang Ye, Qi Tian", "tldr": "A novel hierarchical vision transformer that is stronger and faster when applied to masked image modeling", "abstract": "There has been a debate on the choice of plain vs. hierarchical vision transformers, where researchers often believe that the former (e.g., ViT) has a simpler design but the latter (e.g., Swin) enjoys higher recognition accuracy. Recently, the emerge of masked image modeling (MIM), a self-supervised visual pre-training method, raised a new challenge to vision transformers in terms of flexibility, i.e., part of image patches or tokens are to be discarded, which seems to claim the advantages of plain vision transformers. In this paper, we delve deep into the comparison between ViT and Swin, revealing that (i) the performance gain of Swin is mainly brought by a deepened backbone and relative positional encoding, (ii) the hierarchical design of Swin can be simplified into hierarchical patch embedding (proposed in this work), and (iii) other designs such as shifted-window attentions can be removed. By removing the unnecessary operations, we come up with a new architecture named HiViT (short for hierarchical ViT), which is simpler and more efficient than Swin yet further improves its performance on fully-supervised and self-supervised visual representation learning. In particular, after pre-trained using masked autoencoder (MAE) on ImageNet-1K, HiViT-B reports a 84.6% accuracy on ImageNet-1K classification, a 53.3% box AP on COCO detection, and a 52.8% mIoU on ADE20K segmentation, significantly surpassing the baseline. Code is available at https://github.com/zhangxiaosong18/hivit.", "keywords": "Hierarchical vision transformers;self-supervised learning;masked image modeling", "primary_area": "", "supplementary_material": "/attachment/1f35accd3be082244c68fb391909ec922537ad4a.zip", "author": "Xiaosong Zhang;Yunjie Tian;Lingxi Xie;Wei Huang;Qi Dai;Qixiang Ye;Qi Tian", "authorids": "~Xiaosong_Zhang2;~Yunjie_Tian1;~Lingxi_Xie1;~Wei_Huang11;~Qi_Dai2;~Qixiang_Ye1;~Qi_Tian3", "gender": "M;M;M;F;M;M;M", "homepage": "https://zhangxiaosong18.github.io/XiaosongZhang.htm;https://sunsmarterjie.github.io/;http://lingxixie.com/;https://github.com/Vickeyhw;http://people.ucas.ac.cn/~qxye?language=en;https://www.qitian1987.com/index.html;", "dblp": "26/3075-4;270/0554;123/2869;;06/4335;78/1467-1.html;35/5587-1.html", "google_scholar": "98exn6wAAAAJ;https://scholar.google.com.hk/citations?user=DuetWVcAAAAJ;EEMm7hwAAAAJ;;https://scholar.google.com.hk/citations?user=tjEfgsEAAAAJ;https://scholar.google.com/citations?hl=en;NSJY12IAAAAJ", "orcid": ";0000-0002-5103-3748;;0000-0001-8899-0069;;0000-0002-7252-5047;", "linkedin": ";;;;;;", "or_profile": "~Xiaosong_Zhang2;~Yunjie_Tian1;~Lingxi_Xie1;~Wei_Huang11;~Qixiang_Ye1;~Qi_Tian3;~Qi_Dai4", "aff": "University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;Huawei Technologies Ltd.;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;Huawei Technologies Ltd.;Microsoft Research Asia", "aff_domain": "ucas.ac.cn;ucas.ac.cn;huawei.com;ucas.edu.cn;ucas.ac.cn;huawei.com;microsoft.com", "position": "PhD student;PhD student;Researcher;PhD student;Full Professor;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nzhang2023hivit,\ntitle={HiViT: A Simpler and More Efficient Design of Hierarchical Vision Transformer},\nauthor={Xiaosong Zhang and Yunjie Tian and Lingxi Xie and Wei Huang and Qi Dai and Qixiang Ye and Qi Tian},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3F6I-0-57SC}\n}", "github": "", "project": "", "reviewers": "mVGH;5NXC;vrWv", "pdf_size": 578900, "recommendation": "5;6;8", "confidence": "5;4;5", "correctness": "3;4;3", "technical_novelty": "3;2;3", "empirical_novelty": "2;3;4", "wc_summary_paper": "74;74;55", "wc_strength_and_weaknesses": "540;193;247", "wc_clarity_quality_novelty_and_reproducibility": "25;53;49", "wc_summary_review": "116;39;67", "wc_review": "755;359;418", "wc_reply_reviewers": "0;46;26", "wc_reply_authors": "1373;403;634", "reply_reviewers": "0;1;1", "reply_authors": "3;1;1", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 67.66666666666667, 8.956685895029603 ], "wc_strength_and_weaknesses_avg": [ 326.6666666666667, 152.45181388083108 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.333333333333336, 12.364824660660938 ], "wc_summary_review_avg": [ 74.0, 31.822423959633664 ], "wc_review_avg": [ 510.6666666666667, 174.44069351947542 ], "wc_reply_reviewers_avg": [ 24.0, 18.83259585576738 ], "wc_reply_authors_avg": [ 803.3333333333334, 413.7070890806146 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.18898223650461357, "corr_recommendation_correctness": -0.18898223650461363, "gs_citation": 74, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12123782355259278004&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=3F6I-0-57SC", "email": "ucas.ac.cn;ucas.ac.cn;huawei.com;ucas.edu.cn;ucas.ac.cn;huawei.com;microsoft.com", "author_num": 7, "aff_unique_index": "0;0;1;0;0;1;2", "aff_unique_norm": "University of Chinese Academy of Sciences;Huawei;Microsoft", "aff_unique_dep": ";Huawei Technologies;Research", "aff_unique_url": "http://www.ucas.ac.cn;https://www.huawei.com;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "UCAS;Huawei;MSR Asia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "3FdmckXo3cN", "title": "BPFL: Towards Efficient Byzantine-Robust and Provably Privacy-Preserving Federated Learning", "track": "main", "status": "Withdraw", "tldr": "We propose to address both Byzantine (security) attacks and data reconstruction (privacy) attacks against federated learning.", "abstract": "Federated learning (FL) is an emerging distributed learning paradigm without sharing participating clients' private data. However, existing works show that FL is vulnerable to both Byzantine (security) attacks and data reconstruction (privacy) attacks. Existing FL defenses only address one of the two attacks, and also face the efficiency issue. We propose BPFL, an efficient Byzantine-robust and provably privacy-preserving FL method that addresses all the issues. Specifically, we draw on the state-of-the-art Byzantine-robust FL method and use similarity metrics to measure the robustness of each participating client in FL. The validity of clients are formulated as circuit constraints on similarity metrics and verified via a zero-knowledge proof. Moreover, the client models are masked by a shared random vector, which is generated based on homomorphic encryption. In doing so, the server receives the masked client models rather than the true ones, which are proven to be private. BPFL is also efficient due to the usage of non-interactive zero-knowledge proof. Experimental results on various datasets show that our BPFL is efficient, Byzantine-robust, and privacy-preserving.", "keywords": "federated learning;Byzantine-robust;privacy-preserving", "primary_area": "", "supplementary_material": "/attachment/cf0c8efc9fe8e33279ac40178b3635466ae8b1ef.zip", "author": "Chenfei Nie;Binghui Wang;Yuede Ji;Qiang Li", "authorids": "~Chenfei_Nie1;~Binghui_Wang2;~Yuede_Ji1;li_qiang@jlu.edu.cn", "gender": "M;M;M;", "homepage": ";https://wangbinghui.net;https://yuede.github.io/;", "dblp": ";123/7149;138/4316;", "google_scholar": ";SoOztcEAAAAJ;1-GjVYgAAAAJ;", "orcid": "0000-0003-4334-5597;0000-0001-5616-060X;0000-0002-2419-6592;", "linkedin": ";;;", "or_profile": "~Chenfei_Nie1;~Binghui_Wang2;~Yuede_Ji1;li_qiang@jlu.edu.cn", "aff": "Jilin University;Illinois Institute of Technology;University of North Texas;", "aff_domain": "jlu.edu.cn;iit.edu;unt.edu;", "position": "MS student;Assistant Professor;Assistant Professor;", "bibtex": "@misc{\nnie2023bpfl,\ntitle={{BPFL}: Towards Efficient Byzantine-Robust and Provably Privacy-Preserving Federated Learning},\nauthor={Chenfei Nie and Binghui Wang and Yuede Ji and Qiang Li},\nyear={2023},\nurl={https://openreview.net/forum?id=3FdmckXo3cN}\n}", "github": "", "project": "", "reviewers": "Ltoy;tYSG;D7Nm;rLH4", "site": "https://openreview.net/forum?id=3FdmckXo3cN", "pdf_size": 3330045, "recommendation": "1;1;5;6", "confidence": "4;4;4;4", "correctness": "2;1;3;4", "technical_novelty": "1;2;2;2", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "31;105;15;69", "wc_strength_and_weaknesses": "27;326;103;102", "wc_clarity_quality_novelty_and_reproducibility": "121;61;214;15", "wc_summary_review": "11;148;31;47", "wc_review": "190;640;363;233", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.25, 2.277608394786075 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.5, 1.118033988749895 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 55.0, 34.899856733230294 ], "wc_strength_and_weaknesses_avg": [ 139.5, 112.00111606586785 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 102.75, 74.41899959015842 ], "wc_summary_review_avg": [ 59.25, 52.803290617157565 ], "wc_review_avg": [ 356.5, 175.63385209007973 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9326733179802504, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15768384673723646557&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Jilin University;Illinois Institute of Technology;University of North Texas", "aff_unique_dep": ";;", "aff_unique_url": "http://www.jlu.edu.cn;https://www.iit.edu;https://www.unt.edu", "aff_unique_abbr": "JLU;IIT;UNT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "id": "3GDft6lexE", "title": "Cooperate or Compete: A New Perspective on Training of Generative Networks", "track": "main", "status": "Desk Reject", "tldr": "Generative networks can perform better and learn faster if training is modeled as an infinitely repeated simultaneous game", "abstract": "GANs have two competing modules: the generator module is trained to generate new examples, and the discriminator module is trained to discriminate real examples from generated examples. The training procedure of GAN is modeled as a finitely repeated simultaneous game. Each module tries to increase its performance at every repetition of the base game (at every batch of training data) in a non-cooperative manner. We observed that each module can perform better and learn faster if training is modeled as an infinitely repeated simultaneous game. At every repetition of the base game (at every batch of training data) the stronger module (whose performance is increased or remains the same compared to the previous batch of training data) cooperates with the weaker module (whose performance is decreased compared to the previous batch of training data) and only the weaker module is allowed to increase its performance. ", "keywords": "Generative Adversarial Networks;Nash Equilibrium;Correlated Equilibrium;Repeated Games", "primary_area": "", "supplementary_material": "", "author": "Sobhan Babu", "authorids": "~Sobhan_Babu1", "gender": "M", "homepage": "https://www.iith.ac.in/~sobhan/", "dblp": "233/3456.html", "google_scholar": "https://scholar.google.co.in/citations?user=UFMtsfkAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Sobhan_Babu1", "aff": "Indian Institute of Technology Hyderabad, Dhirubhai Ambani Institute Of Information and Communication Technology", "aff_domain": "iith.ac.in", "position": "Associate Professor", "bibtex": "@misc{\nbabu2023cooperate,\ntitle={Cooperate or Compete: A New Perspective on Training of Generative Networks},\nauthor={Sobhan Babu},\nyear={2023},\nurl={https://openreview.net/forum?id=3GDft6lexE}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=3GDft6lexE", "pdf_size": 3140546, "recommendation": "", "confidence": "", "correctness": "", "technical_novelty": "", "empirical_novelty": "", "wc_summary_paper": "", "wc_strength_and_weaknesses": "", "wc_clarity_quality_novelty_and_reproducibility": "", "wc_summary_review": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "recommendation_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "correctness_avg": [ 0, 0 ], "technical_novelty_avg": [ 0, 0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 0, 0 ], "wc_strength_and_weaknesses_avg": [ 0, 0 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 0, 0 ], "wc_summary_review_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 1, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0, "corr_recommendation_correctness": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iDPSlt2FgzIJ:scholar.google.com/&scioq=Cooperate+or+Compete:+A+New+Perspective+on+Training+of+Generative+Networks&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Indian Institute of Technology Hyderabad", "aff_unique_dep": "", "aff_unique_url": "https://www.iith.ac.in", "aff_unique_abbr": "IIT Hyderabad", "aff_campus_unique_index": "0", "aff_campus_unique": "Hyderabad", "aff_country_unique_index": "0", "aff_country_unique": "India" }, { "id": "3HX__RcSFZj", "title": "A Semantic Hierarchical Graph Neural Network for Text Classification", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The key to the text classification task is language representation and important information extraction, and there are many related studies. In recent years, the research on graph neural network (GNN) in text classification has gradually emerged and shown its advantages, but the existing models mainly focus on directly inputting words as graph nodes into the GNN models ignoring the different levels of semantic structure information in the samples. To address the issue, we propose a new hierarchical graph neural network (HieGNN) which extracts corresponding information from word-level, sentence-level (sen-level) and document-level (doc-level) respectively. The doc-level focuses on processing samples from a global perspective, while sen-level and word-level focus on processing samples from the sentences and words themselves. The model is tested on five datasets, and compared with the pure GNN-based model and the hybrid GNN and BERT model, it achieves better classification results on two datasets and similar results on three datasets, which demonstrate that our model is able to obtain more useful information for classification from samples.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Shuai Hua;Xinxin Li;Yunpeng Jing;Qunfeng Liu", "authorids": "~Shuai_Hua2;lixin_2@163.com;jing.yunpeng@qq.com;liuqf@dgut.edu.cn", "gender": "M;;;", "homepage": ";;;", "dblp": ";;;", "google_scholar": ";;;", "orcid": "0000-0002-5361-313X;;;", "linkedin": ";;;", "or_profile": "~Shuai_Hua2;lixin_2@163.com;jing.yunpeng@qq.com;liuqf@dgut.edu.cn", "aff": "Dongguan University of Technology;;;", "aff_domain": "dgut.edu.cn;;;", "position": "MS student;;;", "bibtex": "@misc{\nhua2023a,\ntitle={A Semantic Hierarchical Graph Neural Network for Text Classification},\nauthor={Shuai Hua and Xinxin Li and Yunpeng Jing and Qunfeng Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=3HX__RcSFZj}\n}", "github": "", "project": "", "reviewers": "9ans;6J1H;bXnL;mawE", "site": "https://openreview.net/forum?id=3HX__RcSFZj", "pdf_size": 327988, "recommendation": "3;3;3;8", "confidence": "5;4;5;3", "correctness": "2;2;2;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "117;46;53;82", "wc_strength_and_weaknesses": "193;64;329;32", "wc_clarity_quality_novelty_and_reproducibility": "73;32;28;25", "wc_summary_review": "34;27;69;46", "wc_review": "417;169;479;185", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 2.165063509461097 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 74.5, 28.00446392988089 ], "wc_strength_and_weaknesses_avg": [ 154.5, 117.39782791857778 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.5, 19.5 ], "wc_summary_review_avg": [ 44.0, 15.953056133543816 ], "wc_review_avg": [ 312.5, 137.3781278078865 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8703882797784891, "corr_recommendation_correctness": 1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14245028589352115311&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Dongguan University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.dgut.edu.cn", "aff_unique_abbr": "DGUT", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "3HnIBTjlXTS", "title": "Visual Prompt Tuning For Test-time Domain Adaptation", "track": "main", "status": "Reject", "tldr": "Vision Transformer can generalize better during testing by tuning a set of visual prompts with only a little unlabeled target domain data.", "abstract": "Models should have the ability to adapt to unseen data during test-time to avoid performance drops caused by inevitable distribution shifts in real-world deployment scenarios. In this work, we tackle the practical yet challenging test-time adaptation (TTA) problem, where a model adapts to the target domain without accessing the source data. We propose a simple recipe called data-efficient prompt tuning (DePT) with two key ingredients. First, DePT plugs visual prompts into the vision Transformer and only tunes these source-initialized prompts during adaptation. We find such parameter-efficient finetuning can efficiently adapt the model representation to the target domain without overfitting to the noise in the learning objective. Second, DePT bootstraps the source representation to the target domain by memory bank-based online pseudo labeling. A hierarchical self-supervised regularization specially designed for prompts is jointly optimized to alleviate error accumulation during self-training. With much fewer tunable parameters, DePT demonstrates not only state-of-the-art performance on major adaptation benchmarks, but also superior data efficiency, i.e., adaptation with only 1\\% or 10\\% data without much performance degradation compared to 100\\% data. In addition, DePT is also versatile to be extended to online or multi-source TTA settings.", "keywords": "deep learning;test-time domain adaptation;unsupervised learning;visual prompt tuning;vision transformer;self-supervision", "primary_area": "", "supplementary_material": "", "author": "Yunhe Gao;Xingjian Shi;Yi Zhu;Hao Wang;Zhiqiang Tang;Xiong Zhou;Mu Li;Dimitris N. Metaxas", "authorids": "~Yunhe_Gao2;~Xingjian_Shi1;~Yi_Zhu1;~Hao_Wang3;~Zhiqiang_Tang1;~Xiong_Zhou2;~Mu_Li4;~Dimitris_N._Metaxas1", "gender": "M;M;M;M;;;M;M", "homepage": "https://www.cs.rutgers.edu/people/graduate-students/details/yunhe-gao;https://sxjscience.github.io/;https://bryanyzhu.github.io/;https://sites.google.com/site/zhiqiangtanghomepage/home;;https://github.com/mli;https://www.cs.rutgers.edu/~dnm/;http://www.wanghao.in", "dblp": "237/4741;145/9987;;71/10098-1;06/4105;;m/DNMetaxas;w/HaoWang-14", "google_scholar": "TOsFPu4AAAAJ;https://scholar.google.com.hk/citations?user=P4G6H7oAAAAJ;IXw4UiwAAAAJ;https://scholar.google.com/citations?view_op=list_works;MqZPM6AAAAAJ;;https://scholar.google.com.tw/citations?user=a7VNhCIAAAAJ;NrOA9QoAAAAJ", "orcid": ";;0000-0002-6482-6712;;;;;", "linkedin": ";;yi-zhu-546a437a/;;;;dimitris-metaxas-1bb74914/;", "or_profile": "~Yunhe_Gao2;~Xingjian_Shi1;~Yi_Zhu1;~Zhiqiang_Tang1;~Xiong_Zhou2;~Mu_Li4;~Dimitris_Metaxas1;~Hao_Wang4", "aff": "Rutgers University;Amazon Web Services;Amazon;AWS;Amazon;Amazon;Rutgers University;Rutgers University", "aff_domain": "rutgers.edu;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;cs.rutgers.edu;cs.rutgers.edu", "position": "PhD student;Applied Scientist;Applied Scientist;Applied Scientist;Applied Scientist;Researcher;Full Professor;Assistant Professor", "bibtex": "@misc{\ngao2023visual,\ntitle={Visual Prompt Tuning For Test-time Domain Adaptation},\nauthor={Yunhe Gao and Xingjian Shi and Yi Zhu and Hao Wang and Zhiqiang Tang and Xiong Zhou and Mu Li and Dimitris N. Metaxas},\nyear={2023},\nurl={https://openreview.net/forum?id=3HnIBTjlXTS}\n}", "github": "", "project": "", "reviewers": "n2AM;H4uU;Za8v;zNVW", "site": "https://openreview.net/forum?id=3HnIBTjlXTS", "pdf_size": 3159882, "recommendation": "5;5;5;6", "confidence": "4;4;4;4", "correctness": "3;4;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "56;45;99;304", "wc_strength_and_weaknesses": "331;171;158;497", "wc_clarity_quality_novelty_and_reproducibility": "50;161;46;529", "wc_summary_review": "58;46;37;465", "wc_review": "495;423;340;1795", "wc_reply_reviewers": "0;0;111;192", "wc_reply_authors": "1529;1042;1030;1427", "reply_reviewers": "0;0;1;1", "reply_authors": "3;3;3;3", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 126.0, 104.7306067966762 ], "wc_strength_and_weaknesses_avg": [ 289.25, 137.94269643587515 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 196.5, 197.4392311573361 ], "wc_summary_review_avg": [ 151.5, 181.15256001503263 ], "wc_review_avg": [ 763.25, 598.2007919586868 ], "wc_reply_reviewers_avg": [ 75.75, 80.98263702794569 ], "wc_reply_authors_avg": [ 1257.0, 223.96316661451274 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 91, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13014566741154429642&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1;1;1;1;0;0", "aff_unique_norm": "Rutgers University;Amazon", "aff_unique_dep": ";Amazon Web Services", "aff_unique_url": "https://www.rutgers.edu;https://aws.amazon.com", "aff_unique_abbr": "Rutgers;AWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "3IFO8Jii0vI", "title": "Algorithmic Determination of the Combinatorial Structure of the Linear Regions of ReLU Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "We algorithmically determine the regions and facets of all dimensions of the canonical polyhedral complex, the universal object into which a ReLU network decomposes its input space. We show that the locations of the vertices of the canonical polyhedral complex along with their signs with respect to layer maps determine the full facet structure across all dimensions. We present an algorithm which calculates this full combinatorial structure, making use of our theorems that the dual complex to the canonical polyhedral complex is cubical and it possesses a multiplication compatible with its facet structure. The resulting algorithm is numerically stable, polynomial time in the number of intermediate neurons, and obtains accurate information across all dimensions. This permits us to obtain, for example, the true topology of the decision boundaries of networks with low-dimensional inputs. We run empirics on such networks at initialization, finding that width alone does not increase observed topology, but width in the presence of depth does. ", "keywords": "ReLU networks;algebraic topology;linear regions;computational geometry", "primary_area": "", "supplementary_material": "/attachment/97dfddb28a14097066f892a695ffec0511879274.zip", "author": "Marissa Masden", "authorids": "~Marissa_Masden1", "gender": "F", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "0000-0002-0522-1297", "linkedin": "", "or_profile": "~Marissa_Masden1", "aff": "University of Oregon", "aff_domain": "uoregon.edu", "position": "PhD student", "bibtex": "@misc{\nmasden2023algorithmic,\ntitle={Algorithmic Determination of the Combinatorial Structure of the Linear Regions of Re{LU} Neural Networks},\nauthor={Marissa Masden},\nyear={2023},\nurl={https://openreview.net/forum?id=3IFO8Jii0vI}\n}", "github": "", "project": "", "reviewers": "3fLq;Pp96;6J8n", "site": "https://openreview.net/forum?id=3IFO8Jii0vI", "pdf_size": 1312629, "recommendation": "5;6;6", "confidence": "4;3;4", "correctness": "2;4;4", "technical_novelty": "3;4;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "87;150;223", "wc_strength_and_weaknesses": "552;264;189", "wc_clarity_quality_novelty_and_reproducibility": "105;99;125", "wc_summary_review": "90;92;76", "wc_review": "834;605;613", "wc_reply_reviewers": "0;4;55", "wc_reply_authors": "1026;761;210", "reply_reviewers": "0;1;1", "reply_authors": "2;1;2", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 153.33333333333334, 55.57177541002467 ], "wc_strength_and_weaknesses_avg": [ 335.0, 156.46724896923317 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 109.66666666666667, 11.115554667022044 ], "wc_summary_review_avg": [ 86.0, 7.118052168020874 ], "wc_review_avg": [ 684.0, 106.116288413545 ], "wc_reply_reviewers_avg": [ 19.666666666666668, 25.037749277618563 ], "wc_reply_authors_avg": [ 665.6666666666666, 339.8826594903338 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 1.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7183300852891519461&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "University of Oregon", "aff_unique_dep": "", "aff_unique_url": "https://www.uoregon.edu", "aff_unique_abbr": "UO", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "3IXDfzaJ2LF", "title": "Flatter, Faster: Scaling Momentum for Optimal Speedup of SGD", "track": "main", "status": "Reject", "tldr": "We find the implicit bias induced by noise in SGD with momentum; this leads us to identify a scaling limit of the momentum hyperparameter in the learning rate that maximally accelerates training, without depleting generalization.", "abstract": "Commonly used optimization algorithms often show a trade-off between good generalization and fast training times. For instance, stochastic gradient descent (SGD) tends to have good generalization; however, adaptive gradient methods have superior training times. Momentum can help accelerate training with SGD, but so far there has been no principled way to select the momentum hyperparameter. Here we study implicit bias arising from the interplay between SGD with label noise and momentum in the training of overparameterized neural networks. We find that scaling the momentum hyperparameter $1-\\beta$ with the learning rate to the power of $2/3$ maximally accelerates training, without sacrificing generalization. To analytically derive this result we develop an architecture-independent framework, where the main assumption is the existence of a degenerate manifold of global minimizers, as is natural in overparameterized models. Training dynamics display the emergence of two characteristic timescales that are well-separated for generic values of the hyperparameters. The maximum acceleration of training is reached when these two timescales meet, which in turn determines the scaling limit we propose. We perform experiments, including matrix sensing and ResNet on CIFAR10, which provide evidence for the robustness of these results.", "keywords": "SGD;momentum;acceleration;generalization;scaling limit;deep learning;implicit bias;implicit regularization", "primary_area": "", "supplementary_material": "", "author": "Aditya Cowsik;Tankut Can;Paolo Glorioso", "authorids": "~Aditya_Cowsik1;~Tankut_Can1;~Paolo_Glorioso1", "gender": "M;M;", "homepage": ";https://sites.google.com/view/tankut-can;", "dblp": ";;", "google_scholar": "23og9KYAAAAJ;H5MicWUAAAAJ;4y1ZjNcAAAAJ", "orcid": ";0000-0002-0999-2355;", "linkedin": ";;", "or_profile": "~Aditya_Cowsik1;~Tankut_Can1;~Paolo_Glorioso1", "aff": "Stanford University;Institute for Advanced Study, Princeton;Stanford University", "aff_domain": "stanford.edu;ias.edu;stanford.edu", "position": "PhD student;Researcher;Postdoc", "bibtex": "@misc{\ncowsik2023flatter,\ntitle={Flatter, Faster: Scaling Momentum for Optimal Speedup of {SGD}},\nauthor={Aditya Cowsik and Tankut Can and Paolo Glorioso},\nyear={2023},\nurl={https://openreview.net/forum?id=3IXDfzaJ2LF}\n}", "github": "", "project": "", "reviewers": "e9QR;F44e;oJpk", "site": "https://openreview.net/forum?id=3IXDfzaJ2LF", "pdf_size": 976588, "recommendation": "3;6;6", "confidence": "4;3;3", "correctness": "2;4;4", "technical_novelty": "2;3;3", "empirical_novelty": "0;1;2", "wc_summary_paper": "46;104;71", "wc_strength_and_weaknesses": "631;95;134", "wc_clarity_quality_novelty_and_reproducibility": "17;22;9", "wc_summary_review": "83;162;34", "wc_review": "777;383;248", "wc_reply_reviewers": "539;0;0", "wc_reply_authors": "2726;726;594", "reply_reviewers": "2;0;0", "reply_authors": "6;2;2", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 73.66666666666667, 23.753362335093158 ], "wc_strength_and_weaknesses_avg": [ 286.6666666666667, 244.00045537298126 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 16.0, 5.354126134736337 ], "wc_summary_review_avg": [ 93.0, 52.73202695389839 ], "wc_review_avg": [ 469.3333333333333, 224.4256867849331 ], "wc_reply_reviewers_avg": [ 179.66666666666666, 254.0870367063661 ], "wc_reply_authors_avg": [ 1348.6666666666667, 975.4114801229047 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 3.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2191395481680509560&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "Stanford University;Institute for Advanced Study", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://ias.edu", "aff_unique_abbr": "Stanford;IAS", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Stanford;Princeton", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "3KHzMQUOH4x", "title": "DEEAPR: Controllable Depth Enhancement via Adaptive Parametric Feature Rotation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Understanding depth of an image provides viewers with a better interpretation of the 3D structures within an image. Photographers utilize numerous factors that can affect depth perception to aesthetically improve a scene. Unfortunately, controlling depth perception after the image has been captured is a difficult process as it requires accurate and explicit depth information. Also, defining a quantitative metric of a subjective quality (i.e., depth perception) is difficult which makes supervised learning a great challenge. To this end, we propose DEpth Enhancement via Adaptive Parametric feature Rotation (DEEAPR), which modulates the perceptual depth of an input scene using a single control parameter without the need for explicit depth information. We first embed content-independent depth perception of a scene by visual representation learning. Then, we train the controllable depth enhancer network with a novel modulator, parametric feature rotation block (PFRB), that allows for continuous modulation of a representative feature. We demonstrate the effectiveness of our proposed approach by verifying each component through an ablation study and comparison to other controllable methods.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/e1f8f96c93b2e3a7da4eb2fd8eb7638cfa81e4ed.zip", "author": "Hanul Shin;Youngchan Song;Soo Min Kang", "authorids": "~Hanul_Shin2;~Youngchan_Song1;~Soo_Min_Kang1", "gender": ";M;", "homepage": ";;", "dblp": "200/8440.html;;", "google_scholar": ";;", "orcid": ";0000-0001-9467-5348;", "linkedin": ";;soo-min-kang-731a748a", "or_profile": "~Hanul_Shin2;~Youngchan_Song1;~Soo_Min_Kang1", "aff": ";Samsung;Samsung", "aff_domain": ";samsung.com;samsung.com", "position": ";Researcher;Staff Engineer", "bibtex": "@misc{\nshin2023deeapr,\ntitle={{DEEAPR}: Controllable Depth Enhancement via Adaptive Parametric Feature Rotation},\nauthor={Hanul Shin and Youngchan Song and Soo Min Kang},\nyear={2023},\nurl={https://openreview.net/forum?id=3KHzMQUOH4x}\n}", "github": "", "project": "", "reviewers": "4wXg;MFV5;mEsF;3SWe", "site": "https://openreview.net/forum?id=3KHzMQUOH4x", "pdf_size": 3384381, "recommendation": "1;3;3;3", "confidence": "4;3;4;5", "correctness": "2;3;2;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "23;56;77;67", "wc_strength_and_weaknesses": "111;183;325;212", "wc_clarity_quality_novelty_and_reproducibility": "7;11;45;76", "wc_summary_review": "39;33;36;188", "wc_review": "180;283;483;543", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 55.75, 20.314711418083203 ], "wc_strength_and_weaknesses_avg": [ 207.75, 77.03692296554945 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.75, 28.021197333447407 ], "wc_summary_review_avg": [ 74.0, 65.85210702779372 ], "wc_review_avg": [ 372.25, 146.9240875418323 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hunMAFtmknwJ:scholar.google.com/&scioq=DEEAPR:+Controllable+Depth+Enhancement+via+Adaptive+Parametric+Feature+Rotation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Samsung", "aff_unique_dep": "Samsung", "aff_unique_url": "https://www.samsung.com", "aff_unique_abbr": "Samsung", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Distributionally Robust Post-hoc Classifiers under Prior Shifts", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11787", "id": "3KUfbI9_DQE", "poster": "", "openreview": "https://openreview.net/forum?id=3KUfbI9_DQE", "slides": "https://iclr.cc/virtual/2023/poster/11787", "video": "https://iclr.cc/virtual/2023/poster/11787", "author_site": "Jiaheng Wei, Harikrishna Narasimhan, Ehsan Amid, Wen-Sheng Chu, Yang Liu, Abhishek Kumar", "tldr": "We propose a method for scaling the model predictions at test-time for improved distribution robustness to prior shifts. ", "abstract": "The generalization ability of machine learning models degrades significantly when the test distribution shifts away from the training distribution. We investigate the problem of training models that are robust to shifts caused by changes in the distribution of class-priors or group-priors. The presence of skewed training priors can often lead to the models overfitting to spurious features. Unlike existing methods, which optimize for either the worst or the average performance over classes or groups, our work is motivated by the need for finer control over the robustness properties of the model. We present an extremely lightweight post-hoc approach that performs scaling adjustments to predictions from a pre-trained model, with the goal of minimizing a distributionally robust loss around a chosen target distribution. These adjustments are computed by solving a constrained optimization problem on a validation set and applied to the model during test time. Our constrained optimization objective is inspired from a natural notion of robustness to controlled distribution shifts. Our method comes with provable guarantees and empirically makes a strong case for distributional robust post-hoc classifiers. An empirical implementation is available at https://github.com/weijiaheng/Drops.\n", "keywords": "Distributional robustness;post-hoc scaling;group robustness;class imbalance;spurious correlations", "primary_area": "", "supplementary_material": "/attachment/9651a20c4b10906126657e675e81c015b8e75025.zip", "author": "Jiaheng Wei;Harikrishna Narasimhan;Ehsan Amid;Wen-Sheng Chu;Yang Liu;Abhishek Kumar", "authorids": "~Jiaheng_Wei1;~Harikrishna_Narasimhan1;~Ehsan_Amid1;~Wen-Sheng_Chu1;~Yang_Liu3;~Abhishek_Kumar1", "gender": "M;M;M;;M;", "homepage": "https://sites.google.com/ucsc.edu/jiahengwei;https://hari-research.github.io/;https://sites.google.com/corp/view/eamid/;;http://www.yliuu.com;http://inductivebias.ml", "dblp": "270/8936;56/7573;142/5754;35/8617;51/3710-18;67/6188-1", "google_scholar": "https://scholar.google.com/citations?hl=en;7X_oT4YAAAAJ;https://scholar.google.fi/citations?user=F6omR3gAAAAJ;R-OrlSgAAAAJ;jKrIVCIAAAAJ;6vghMS0AAAAJ", "orcid": ";;;;0000-0001-8420-6011;", "linkedin": "jiahengwei/;;ehsan-amid-63aba754;;;", "or_profile": "~Jiaheng_Wei1;~Harikrishna_Narasimhan1;~Ehsan_Amid1;~Wen-Sheng_Chu1;~Yang_Liu3;~Abhishek_Kumar1", "aff": "ByteDance Inc.;Google;Google DeepMind;Google Research;University of California, Santa Cruz;Google DeepMind", "aff_domain": "bytedance.com;google.com;google.com;google.com;ucsc.edu;google.com", "position": "Intern;Research Scientist;Research Scientist;Research Scientist;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nwei2023distributionally,\ntitle={Distributionally Robust Post-hoc Classifiers under Prior Shifts},\nauthor={Jiaheng Wei and Harikrishna Narasimhan and Ehsan Amid and Wen-Sheng Chu and Yang Liu and Abhishek Kumar},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3KUfbI9_DQE}\n}", "github": "", "project": "", "reviewers": "xUWa;P4Hw;b7xp", "pdf_size": 448007, "recommendation": "3;6;6", "confidence": "3;3;4", "correctness": "3;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;0;3", "wc_summary_paper": "65;73;89", "wc_strength_and_weaknesses": "171;322;149", "wc_clarity_quality_novelty_and_reproducibility": "35;50;10", "wc_summary_review": "14;102;38", "wc_review": "285;547;286", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "411;1642;1904", "reply_reviewers": "0;0;0", "reply_authors": "1;3;4", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 75.66666666666667, 9.977753031397176 ], "wc_strength_and_weaknesses_avg": [ 214.0, 76.89386624865905 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.666666666666668, 16.49915822768611 ], "wc_summary_review_avg": [ 51.333333333333336, 37.14236873915765 ], "wc_review_avg": [ 372.6666666666667, 123.27295819530829 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1319.0, 650.9014262287852 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15438170053364474857&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=3KUfbI9_DQE", "email": "bytedance.com;google.com;google.com;google.com;ucsc.edu;google.com", "author_num": 6, "aff_unique_index": "0;1;1;1;2;1", "aff_unique_norm": "ByteDance;Google;University of California, Santa Cruz", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.bytedance.com;https://www.google.com;https://www.ucsc.edu", "aff_unique_abbr": "ByteDance;Google;UCSC", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Mountain View;Santa Cruz", "aff_country_unique_index": "0;1;2;1;1;2", "aff_country_unique": "China;United States;United Kingdom" }, { "title": "Conditional Positional Encodings for Vision Transformers", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12151", "id": "3KWnuT-R1bh", "poster": "/media/PosterPDFs/ICLR%202023/12151.png?t=1680965577.618862", "openreview": "https://openreview.net/forum?id=3KWnuT-R1bh", "slides": "https://iclr.cc/virtual/2023/poster/12151", "video": "https://iclr.cc/virtual/2023/poster/12151", "author_site": "Xiangxiang Chu, Zhi Tian, Bo Zhang, Xinlong Wang, Chunhua Shen", "tldr": "A conditional positional encoding scheme for vision transformers", "abstract": "We propose a conditional positional encoding (CPE) scheme for vision Transformers. Unlike previous fixed or learnable positional encodings that are predefined and independent of input tokens, CPE is dynamically generated and conditioned on the local neighborhood of the input tokens. As a result, CPE can easily generalize to the input sequences that are longer than what the model has ever seen during the training. Besides, CPE can keep the desired translation equivalence in vision tasks, resulting in improved performance. We implement CPE with a simple Position Encoding Generator (PEG) to get seamlessly incorporated into the current Transformer framework. Built on PEG, we present Conditional Position encoding Vision Transformer (CPVT). We demonstrate that CPVT has visually similar attention maps compared to those with learned positional encodings and delivers outperforming results.", "keywords": "Vision Transformer", "primary_area": "", "supplementary_material": "", "author": "Xiangxiang Chu;Zhi Tian;Bo Zhang;Xinlong Wang;Chunhua Shen", "authorids": "~Xiangxiang_Chu1;~Zhi_Tian2;~Bo_Zhang7;~Xinlong_Wang2;~Chunhua_Shen2", "gender": "M;M;M;M;", "homepage": "https://cxxgtxy.github.io/;;;;", "dblp": "207/8002;;36/2259-46;;", "google_scholar": "jn21pUsAAAAJ;xSF3BBoAAAAJ;uUNQnu0AAAAJ;DPz0DjYAAAAJ;", "orcid": "0000-0003-2548-0605;;0000-0003-0564-617X;;", "linkedin": ";;bo-zhang-20a86588/;;", "or_profile": "~Xiangxiang_Chu1;~Zhi_Tian2;~Bo_Zhang7;~Xinlong_Wang2;~Chunhua_Shen2", "aff": "MeiTuan;Meituan Inc.;Meituan Inc.;Beijing Academy of Artificial Intelligence;", "aff_domain": "meituan.com;meituan.com;meituan.com;baai.ac.cn;", "position": "Senior Engineer;Researcher;Senior Software Engineer;Researcher;", "bibtex": "@inproceedings{\nchu2023conditional,\ntitle={Conditional Positional Encodings for Vision Transformers},\nauthor={Xiangxiang Chu and Zhi Tian and Bo Zhang and Xinlong Wang and Chunhua Shen},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3KWnuT-R1bh}\n}", "github": "", "project": "", "reviewers": "oX4R;eRhn;tvtM;Md5K", "pdf_size": 1584152, "recommendation": "5;6;8;8", "confidence": "4;4;4;4", "correctness": "4;3;4;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "88;103;116;47", "wc_strength_and_weaknesses": "300;647;241;304", "wc_clarity_quality_novelty_and_reproducibility": "102;713;50;61", "wc_summary_review": "19;114;51;251", "wc_review": "509;1577;458;663", "wc_reply_reviewers": "0;1233;78;0", "wc_reply_authors": "694;3038;565;242", "reply_reviewers": "0;4;1;0", "reply_authors": "3;10;1;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 88.5, 25.927784324928346 ], "wc_strength_and_weaknesses_avg": [ 373.0, 160.1483687085198 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 231.5, 278.66871012009943 ], "wc_summary_review_avg": [ 108.75, 88.95609872290937 ], "wc_review_avg": [ 801.75, 453.9082368717272 ], "wc_reply_reviewers_avg": [ 327.75, 523.6154958554989 ], "wc_reply_authors_avg": [ 1134.75, 1111.1074149244077 ], "reply_reviewers_avg": [ 1.25, 1.6393596310755 ], "reply_authors_avg": [ 3.75, 3.6996621467371855 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 802, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17870066505440679476&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=3KWnuT-R1bh", "email": "meituan.com;meituan.com;meituan.com;baai.ac.cn;", "author_num": 5, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Meituan;Meituan Inc.;Beijing Academy of Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.meituan.com;https://www.meituan.com;https://www.baaic.cn", "aff_unique_abbr": "MeiTuan;Meituan;BAAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "3LUxNRrhK1", "title": "Robust Graph Representation Learning via Predictive Coding", "track": "main", "status": "Reject", "tldr": "For the first time, we use predictive coding in deep geometric learning and demonstrate that we can enhance the robustness of learning representation through energy minimization.", "abstract": "Graph neural networks have recently shown outstanding results in diverse types of tasks in machine learning, providing interdisciplinary state-of-the-art performance on structured data. However, they have been proved to be vulnerable to imperceptible adversarial attacks and shown to be unfit for out-of-distribution generalisation.\nHere, we address this problem by introducing a novel message-passing scheme based on the theory of predictive coding, an energy-based alternative to back-propagation that has its roots in neuroscience.\nAs both graph convolution and predictive coding can be seen as low-pass filtering mechanisms, we postulate that predictive coding adds a second efficient filter to the messaging passing process which enhances the robustness of the learned representation. Through an extensive set of experiments, we show that the proposed model attains comparable performance to its graph convolution network counterpart, delivering strictly better performance on inductive tasks. Most importantly, we show that the energy minimization enhances the robustness of the produced presentation and can be leveraged to further calibrate our models and provide representations that are more robust against advanced graph adversarial attacks.\n", "keywords": "Predictive coding;deep geometric learning;deep learning;machine learning;bio-inspired learning;neuroscience", "primary_area": "", "supplementary_material": "/attachment/61f897c78dbecd8bbb2a040b58153a7344e787d8.zip", "author": "Billy Byiringiro;Tommaso Salvatori;Thomas Lukasiewicz", "authorids": "~Billy_Byiringiro1;~Tommaso_Salvatori1;~Thomas_Lukasiewicz2", "gender": "M;M;", "homepage": ";https://www.cs.ox.ac.uk/people/tommaso.salvatori/;https://www.cs.ox.ac.uk/people/thomas.lukasiewicz/", "dblp": ";270/2016;l/ThomasLukasiewicz", "google_scholar": ";https://scholar.google.com/citations?hl=en;arjucpEAAAAJ", "orcid": ";;", "linkedin": "billy-byiringiro/;;", "or_profile": "~Billy_Byiringiro1;~Tommaso_Salvatori1;~Thomas_Lukasiewicz2", "aff": ";VERSES;Department of Computer Science, University of Oxford", "aff_domain": ";verses.ai;cs.ox.ac.uk", "position": ";Researcher;Full Professor", "bibtex": "@misc{\nbyiringiro2023robust,\ntitle={Robust Graph Representation Learning via Predictive Coding},\nauthor={Billy Byiringiro and Tommaso Salvatori and Thomas Lukasiewicz},\nyear={2023},\nurl={https://openreview.net/forum?id=3LUxNRrhK1}\n}", "github": "", "project": "", "reviewers": "3kAN;b3aa;opfV;evdf", "site": "https://openreview.net/forum?id=3LUxNRrhK1", "pdf_size": 3215691, "recommendation": "3;3;3;5", "confidence": "3;4;4;3", "correctness": "2;2;3;4", "technical_novelty": "2;2;1;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "79;38;70;47", "wc_strength_and_weaknesses": "235;153;442;315", "wc_clarity_quality_novelty_and_reproducibility": "4;137;72;157", "wc_summary_review": "76;20;72;138", "wc_review": "394;348;656;657", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "121;383;305;238", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 58.5, 16.62077013859466 ], "wc_strength_and_weaknesses_avg": [ 286.25, 106.61466831538708 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 92.5, 59.98541489395568 ], "wc_summary_review_avg": [ 76.5, 41.81805830021284 ], "wc_review_avg": [ 513.75, 143.67389289637836 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 261.75, 96.10768699745094 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.8703882797784891, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14371434629798646959&as_sdt=800005&sciodt=0,15&hl=en", "gs_version_total": 4, "aff_unique_index": "1", "aff_unique_norm": ";University of Oxford", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": ";https://www.ox.ac.uk", "aff_unique_abbr": ";Oxford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Oxford", "aff_country_unique_index": "1", "aff_country_unique": ";United Kingdom" }, { "id": "3M1JnCdz-5F", "title": "Learning to Generate Pseudo Anomalies", "track": "main", "status": "Withdraw", "tldr": "We propose learning mechanism to generate pseudo anomalies for one-class classification in anomaly detection.", "abstract": "Due to rare occurrence of anomalous events, anomaly detection is often seen as one-class classification (OCC) problem. In this setting, an autoencoder (AE) is typically trained to reconstruct using only normal data in order to learn normalcy representations. It is expected that, at test time, the AE can well reconstruct normal data while poorly reconstructing anomalous data. However, anomalous data is often well reconstructed as well. This phenomenon can be attributed to the fact that when training AE with only normal data, the boundary between normal and abnormal data is unknown, consequently resulting in a boundary that includes the abnormal data as well. To alleviate this problem, we utilize pseudo anomalies to limit the reconstruction capability of an AE. Without imposing strong inductive bias, pseudo anomalies are generated by adding noise to the normal data. Moreover, to improve the quality of pseudo anomalies, we propose a learning mechanism to generate noise by exploiting the aforementioned weakness of AE, i.e., reconstructing anomalies too well. Evaluations on Ped2, Avenue, ShanghaiTech, and CIFAR-10 datasets demonstrate the effectiveness of our approach in improving the discriminative capability of AEs for anomaly detection.", "keywords": "anomaly detection;generative model;pseudo anomaly;autoencoder", "primary_area": "", "supplementary_material": "", "author": "Marcella Astrid;Muhammad Zaigham Zaheer;Seung-Ik Lee", "authorids": "~Marcella_Astrid1;~Muhammad_Zaigham_Zaheer1;~Seung-Ik_Lee1", "gender": ";M;M", "homepage": ";http://zaighamzaheer.com;https://sites.google.com/view/cvml-ust", "dblp": ";260/4206;30/1902", "google_scholar": ";nFxWrXEAAAAJ;", "orcid": ";;0000-0003-2986-7540", "linkedin": ";;", "or_profile": "~Marcella_Astrid1;~Muhammad_Zaigham_Zaheer1;~Seung-Ik_Lee1", "aff": ";Mohamed bin Zayed University of Artificial Intelligence;Electronics and Telecommunications Research Institute", "aff_domain": ";mbzuai.ac.ae;etri.re.kr", "position": ";Researcher;Principal researcher", "bibtex": "@misc{\nastrid2023learning,\ntitle={Learning to Generate Pseudo Anomalies},\nauthor={Marcella Astrid and Muhammad Zaigham Zaheer and Seung-Ik Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=3M1JnCdz-5F}\n}", "github": "", "project": "", "reviewers": "pMSo;84Ky;tNeH", "site": "https://openreview.net/forum?id=3M1JnCdz-5F", "pdf_size": 1296567, "recommendation": "3;3;5", "confidence": "5;4;5", "correctness": "2;3;3", "technical_novelty": "1;4;2", "empirical_novelty": "0;2;1", "wc_summary_paper": "84;91;64", "wc_strength_and_weaknesses": "329;297;222", "wc_clarity_quality_novelty_and_reproducibility": "18;242;11", "wc_summary_review": "26;37;23", "wc_review": "457;667;320", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 1.247219128924647 ], "empirical_novelty_avg": [ 1.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 79.66666666666667, 11.440668201153676 ], "wc_strength_and_weaknesses_avg": [ 282.6666666666667, 44.84293577464447 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 90.33333333333333, 107.2825967040114 ], "wc_summary_review_avg": [ 28.666666666666668, 6.018490028422596 ], "wc_review_avg": [ 481.3333333333333, 142.70326633340326 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:47mTjkrp_TUJ:scholar.google.com/&scioq=Learning+to+Generate+Pseudo+Anomalies&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;Electronics and Telecommunications Research Institute", "aff_unique_dep": ";", "aff_unique_url": "https://mbzuai.ac.ae;http://www.etri.re.kr", "aff_unique_abbr": "MBZUAI;ETRI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Arab Emirates;South Korea" }, { "title": "Near-optimal Policy Identification in Active Reinforcement Learning", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11757", "id": "3OR2tbtnYC-", "poster": "/media/PosterPDFs/ICLR%202023/11757.png?t=1681971606.0121577", "openreview": "https://openreview.net/forum?id=3OR2tbtnYC-", "slides": "https://iclr.cc/virtual/2023/poster/11757", "video": "https://iclr.cc/virtual/2023/poster/11757", "author_site": "Xiang Li, Viraj Mehta, Johannes Kirschner, Ian Char, Willie Neiswanger, Jeff Schneider, Andreas Krause, Ilija Bogunovic", "tldr": "We propose a novel kernelized LSVI algorithm for active reinforcement learning which provably identifies a near-optimal policy uniformly over the entire state space.", "abstract": "Many real-world reinforcement learning tasks require control of complex dynamical systems that involve both costly data acquisition processes and large state spaces. In cases where the expensive transition dynamics can be readily evaluated at specified states (e.g., via a simulator), agents can operate in what is often referred to as planning with a \\emph{generative model}. We propose the AE-LSVI algorithm for best policy identification, a novel variant of the kernelized least-squares value iteration (LSVI) algorithm that combines optimism with pessimism for active exploration (AE). AE-LSVI provably identifies a near-optimal policy \\emph{uniformly} over an entire state space and achieves polynomial sample complexity guarantees that are independent of the number of states. When specialized to the recently introduced offline contextual Bayesian optimization setting, our algorithm achieves improved sample complexity bounds. Experimentally, we demonstrate that AE-LSVI outperforms other RL algorithms in a variety of environments when robustness to the initial state is required. ", "keywords": "reinforcement learning;contextual bayesian optimization;kernelized least-squares value iteration", "primary_area": "", "supplementary_material": "/attachment/7c1bb7a69faa78900abb2e05ba29ad0dfe0ebc1b.zip", "author": "Xiang Li;Viraj Mehta;Johannes Kirschner;Ian Char;Willie Neiswanger;Jeff Schneider;Andreas Krause;Ilija Bogunovic", "authorids": "~Xiang_Li42;~Viraj_Mehta1;~Johannes_Kirschner1;~Ian_Char1;~Willie_Neiswanger2;~Jeff_Schneider1;~Andreas_Krause1;~Ilija_Bogunovic2", "gender": "M;M;;M;M;;M;M", "homepage": ";http://virajm.com;;http://ianchar.com;https://willieneis.github.io/;https://www.cs.cmu.edu/~schneide;https://las.inf.ethz.ch/krausea;http://ilijabogunovic.com/", "dblp": ";https://dblp.org/pers/m/Mehta:Viraj.html;223/0106;157/7519;120/7593.html;38/247;87/1831-1.html;142/2725", "google_scholar": ";4pHjHBkAAAAJ;https://scholar.google.ch/citations?user=IgO2ThIAAAAJ;3SDKldkAAAAJ;QwKHApEAAAAJ;3bSbb20AAAAJ;https://scholar.google.ch/citations?user=eDHv58AAAAAJ;xMvt3NEAAAAJ", "orcid": ";0000-0002-2021-9718;0000-0002-7228-8280;;;0000-0002-5080-9073;0000-0001-7260-9673;", "linkedin": "xiang-li1;virajrmehta/;;;;jeff-schneider-1593b322/;krausea/;", "or_profile": "~Xiang_Li42;~Viraj_Mehta1;~Johannes_Kirschner1;~Ian_Char1;~Willie_Neiswanger2;~Jeff_Schneider1;~Andreas_Krause1;~Ilija_Bogunovic1", "aff": "ETHZ - ETH Zurich;Carnegie Mellon University;University of Alberta;Carnegie Mellon University;Stanford University;Carnegie Mellon University;ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;cmu.edu;ualberta.ca;cmu.edu;stanford.edu;cs.cmu.edu;ethz.ch;ethz.ch", "position": "MS student;PhD student;Postdoc;PhD student;Postdoc;Researcher;Full Professor;Postdoc", "bibtex": "@inproceedings{\nli2023nearoptimal,\ntitle={Near-optimal Policy Identification in Active Reinforcement Learning},\nauthor={Xiang Li and Viraj Mehta and Johannes Kirschner and Ian Char and Willie Neiswanger and Jeff Schneider and Andreas Krause and Ilija Bogunovic},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3OR2tbtnYC-}\n}", "github": "", "project": "", "reviewers": "Zsrd;maWK;v5Ka", "pdf_size": 464677, "recommendation": "8;8;8", "confidence": "3;4;3", "correctness": "3;4;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "59;48;113", "wc_strength_and_weaknesses": "285;63;545", "wc_clarity_quality_novelty_and_reproducibility": "38;24;188", "wc_summary_review": "43;15;33", "wc_review": "425;150;879", "wc_reply_reviewers": "19;10;66", "wc_reply_authors": "947;57;886", "reply_reviewers": "1;1;1", "reply_authors": "3;1;3", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 73.33333333333333, 28.40578970718626 ], "wc_strength_and_weaknesses_avg": [ 297.6666666666667, 196.9794123478108 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.33333333333333, 74.23087108624162 ], "wc_summary_review_avg": [ 30.333333333333332, 11.585431464655178 ], "wc_review_avg": [ 484.6666666666667, 300.5886816823873 ], "wc_reply_reviewers_avg": [ 31.666666666666668, 24.553795814270526 ], "wc_reply_authors_avg": [ 630.0, 405.93677668655084 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8158057972627031206&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=3OR2tbtnYC-", "email": "ethz.ch;cmu.edu;ualberta.ca;cmu.edu;stanford.edu;cs.cmu.edu;ethz.ch;ethz.ch", "author_num": 8, "aff_unique_index": "0;1;2;1;3;1;0;4", "aff_unique_norm": "ETH Zurich;Carnegie Mellon University;University of Alberta;Stanford University;Swiss Federal Institute of Technology", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ethz.ch;https://www.cmu.edu;https://www.ualberta.ca;https://www.stanford.edu;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;CMU;UAlberta;Stanford;ETH Zurich", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;2;1;1;1;0;0", "aff_country_unique": "Switzerland;United States;Canada" }, { "title": "Generative Modeling Helps Weak Supervision (and Vice Versa)", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11760", "id": "3OaBBATwsvP", "poster": "", "openreview": "https://openreview.net/forum?id=3OaBBATwsvP", "slides": "https://iclr.cc/virtual/2023/poster/11760", "video": "https://iclr.cc/virtual/2023/poster/11760", "author_site": "Benedikt Boecking, Nicholas Roberts, Willie Neiswanger, Stefano Ermon, Frederic Sala, Artur Dubrawski", "tldr": "", "abstract": "Many promising applications of supervised machine learning face hurdles in the acquisition of labeled data in sufficient quantity and quality, creating an expensive bottleneck. To overcome such limitations, techniques that do not depend on ground truth labels have been studied, including weak supervision and generative modeling. While these techniques would seem to be usable in concert, improving one another, how to build an interface between them is not well-understood. In this work, we propose a model fusing programmatic weak supervision and generative adversarial networks and provide theoretical justification motivating this fusion. The proposed approach captures discrete latent variables in the data alongside the weak supervision derived label estimate. Alignment of the two allows for better modeling of sample-dependent accuracies of the weak supervision sources, improving the estimate of unobserved labels. It is the first approach to enable data augmentation through weakly supervised synthetic images and pseudolabels. Additionally, its learned latent variables can be inspected qualitatively. The model outperforms baseline weak supervision label models on a number of multiclass image classification datasets, improves the quality of generated images, and further improves end-model performance through data augmentation with synthetic samples.", "keywords": "generative model;weak supervision", "primary_area": "", "supplementary_material": "", "author": "Benedikt Boecking;Nicholas Roberts;Willie Neiswanger;Stefano Ermon;Frederic Sala;Artur Dubrawski", "authorids": "~Benedikt_Boecking1;~Nicholas_Roberts2;~Willie_Neiswanger2;~Stefano_Ermon1;~Frederic_Sala1;~Artur_Dubrawski2", "gender": "M;M;M;M;M;M", "homepage": "http://www.cs.cmu.edu/~boecking/;https://willieneis.github.io/;http://cs.stanford.edu/~ermon/;https://pages.cs.wisc.edu/~fredsala/;https://www.autonlab.org;https://nick11roberts.science/", "dblp": "146/0168;120/7593.html;47/8135;133/3602;76/48;", "google_scholar": "wNtfa1wAAAAJ;QwKHApEAAAAJ;;9KhIkNkAAAAJ;O3gezzcAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;0000-0002-2372-0831;0000-0002-0625-9182", "linkedin": ";;;;artur-dubrawski-33a2a87/;nick11roberts/", "or_profile": "~Benedikt_Boecking1;~Willie_Neiswanger2;~Stefano_Ermon1;~Frederic_Sala1;~Artur_Dubrawski2;~Nicholas_Carl_Roberts1", "aff": "Carnegie Mellon University;Stanford University;Stanford University;University of Wisconsin, Madison;Carnegie Mellon University;Microsoft", "aff_domain": "cmu.edu;stanford.edu;stanford.edu;wisc.edu;cmu.edu;microsoft.com", "position": "PhD student;Postdoc;Associate Professor;Assistant Professor;Research Professor;Intern", "bibtex": "@inproceedings{\nboecking2023generative,\ntitle={Generative Modeling Helps Weak Supervision (and Vice Versa)},\nauthor={Benedikt Boecking and Nicholas Roberts and Willie Neiswanger and Stefano Ermon and Frederic Sala and Artur Dubrawski},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3OaBBATwsvP}\n}", "github": "", "project": "", "reviewers": "vpDG;ERZw;kuyc", "pdf_size": 11570027, "recommendation": "6;6;8", "confidence": "3;4;4", "correctness": "3;4;4", "technical_novelty": "3;3;4", "empirical_novelty": "3;3;4", "wc_summary_paper": "264;164;81", "wc_strength_and_weaknesses": "217;71;140", "wc_clarity_quality_novelty_and_reproducibility": "84;136;46", "wc_summary_review": "32;33;24", "wc_review": "597;404;291", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "305;635;149", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 169.66666666666666, 74.8168133213087 ], "wc_strength_and_weaknesses_avg": [ 142.66666666666666, 59.63406930792349 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 88.66666666666667, 36.890227552685126 ], "wc_summary_review_avg": [ 29.666666666666668, 4.0276819911981905 ], "wc_review_avg": [ 430.6666666666667, 126.33905000258454 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 363.0, 202.6030601940652 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.49999999999999983, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7062427508789753897&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=3OaBBATwsvP", "email": "cmu.edu;stanford.edu;stanford.edu;wisc.edu;cmu.edu;microsoft.com", "author_num": 6, "aff_unique_index": "0;1;1;2;0;3", "aff_unique_norm": "Carnegie Mellon University;Stanford University;University of Wisconsin;Microsoft", "aff_unique_dep": ";;;Microsoft Corporation", "aff_unique_url": "https://www.cmu.edu;https://www.stanford.edu;https://www.wisc.edu;https://www.microsoft.com", "aff_unique_abbr": "CMU;Stanford;UW;Microsoft", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Stanford;Madison", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Selection-Inference: Exploiting Large Language Models for Interpretable Logical Reasoning", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11166", "id": "3Pf3Wg6o-A4", "poster": "", "openreview": "https://openreview.net/forum?id=3Pf3Wg6o-A4", "slides": "https://iclr.cc/virtual/2023/poster/11166", "video": "https://iclr.cc/virtual/2023/poster/11166", "author_site": "Antonia Creswell, Murray Shanahan, Irina Higgins", "tldr": "Using language models to produce a human interpretable chain of logical reasoning to answer questions.", "abstract": "Large language models (LLMs) have been shown to be capable of impressive few-shot generalisation to new tasks. However, they still tend to perform poorly on multi-step logical reasoning problems. Here we carry out a comprehensive evaluation of LLMs on 46 tasks that probe different aspects of logical reasoning. We show that language models tend to perform fairly well at single step inference or entailment tasks, but struggle to chain together multiple reasoning steps to solve more complex problems. In light of this, we propose a Selection-Inference (SI) framework that exploits pre-trained LLMs as general processing modules, and alternates between selection and inference to generate a series of interpretable, casual reasoning steps leading to the final answer. We show that a 7B parameter LLM used within the SI framework in a 5-shot generalisation setting, with no fine-tuning, yields a performance improvement of over 100% compared to an equivalent vanilla baseline on a suite of 10 logical reasoning tasks. The same model in the same setting even outperforms a significantly larger 280B parameter baseline on the same suite of tasks. Moreover, answers produced by the SI framework are accompanied by a causal natural-language-based reasoning trace, which has important implications for the safety and trustworthiness of the system.", "keywords": "System 2;Logical reasoning;Language Models;Large Language Models;Reasoning;Neuro-symbolic;Neural Symbolic;Interpretability", "primary_area": "", "supplementary_material": "/attachment/cb841040a356427e79fb1947b003ee8a3d412822.zip", "author": "Antonia Creswell;Murray Shanahan;Irina Higgins", "authorids": "~Antonia_Creswell2;~Murray_Shanahan1;~Irina_Higgins1", "gender": "F;M;F", "homepage": ";https://www.doc.ic.ac.uk/~mpsha/;https://scholar.google.com/citations?user=YWVuCKUAAAAJ&hl=en", "dblp": "183/6675;11/5268;155/7461", "google_scholar": ";https://scholar.google.co.uk/citations?user=00bnGpAAAAAJ;YWVuCKUAAAAJ", "orcid": ";0000-0001-5984-2964;0000-0002-1890-2091", "linkedin": ";;https://uk.linkedin.com/in/irina-higgins-74455235", "or_profile": "~Antonia_Creswell2;~Murray_Shanahan1;~Irina_Higgins1", "aff": "Google DeepMind;Imperial College London;Google DeepMind", "aff_domain": "google.com;;google.com", "position": "Researcher;Full Professor;Staff Research Scientist", "bibtex": "@inproceedings{\ncreswell2023selectioninference,\ntitle={Selection-Inference: Exploiting Large Language Models for Interpretable Logical Reasoning},\nauthor={Antonia Creswell and Murray Shanahan and Irina Higgins},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3Pf3Wg6o-A4}\n}", "github": "", "project": "", "reviewers": "TQ9M;BSuH;g5nq;wTCG;QvM7", "pdf_size": 1577611, "recommendation": "6;8;8;8;8", "confidence": "3;4;5;4;4", "correctness": "3;3;4;4;3", "technical_novelty": "3;3;4;3;2", "empirical_novelty": "3;4;3;3;3", "wc_summary_paper": "98;90;111;234;72", "wc_strength_and_weaknesses": "321;297;116;340;240", "wc_clarity_quality_novelty_and_reproducibility": "9;359;70;362;5", "wc_summary_review": "58;26;37;45;20", "wc_review": "486;772;334;981;337", "wc_reply_reviewers": "0;20;22;34;0", "wc_reply_authors": "266;741;415;978;119", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;1;1;2;1", "recommendation_avg": [ 7.6, 0.7999999999999999 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 3.0, 0.6324555320336759 ], "empirical_novelty_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_paper_avg": [ 121.0, 57.89645930452051 ], "wc_strength_and_weaknesses_avg": [ 262.8, 80.73016784325424 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 161.0, 164.51504490471382 ], "wc_summary_review_avg": [ 37.2, 13.526270735128733 ], "wc_review_avg": [ 582.0, 255.36092105097052 ], "wc_reply_reviewers_avg": [ 15.2, 13.302631318652711 ], "wc_reply_authors_avg": [ 503.8, 314.2084658312058 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.790569415042095, "corr_recommendation_correctness": 0.408248290463863, "gs_citation": 384, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3152121723255432053&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=3Pf3Wg6o-A4", "email": "google.com;;google.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Google;Imperial College London", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.imperial.ac.uk", "aff_unique_abbr": "DeepMind;ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "3QdSdm6Oqat", "title": "HEAT: Hardware-Efficient Automatic Tensor Decomposition for Transformer Compression", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Transformers have attained superior performance in natural language processing and computer vision tasks. Their self-attention and feedforward layers are overparameterized, limiting inference speed and energy efficiency. Tensor decomposition is a promising technique to reduce parameter redundancy by leveraging tensor algebraic properties to express the parameters in an efficiently factorized form. Prior efforts used manual or heuristic settings without hardware-aware customization, resulting in poor hardware efficiencies and large performance degradations. \nIn this work, we propose a hardware-aware tensor decomposition framework, dubbed HEAT, that enables efficient exploration of the exponential space of possible tensor decompositions and automates the choice of tensorization shape and decomposition rank. We jointly investigate tensor contraction path optimizations and a fused Einsum mapping strategy to bridge the gap between theoretical benefits and real hardware efficiency improvement. Our two-stage knowledge distillation flow resolves the trainability bottleneck and thus significantly boosts the final accuracy of factorized Transformers. Overall, we experimentally show that our hardware-aware factorized BERT variants reduce the energy-delay product by 5.7x with less than 1.1% accuracy loss and achieve a better efficiency-accuracy Pareto frontier than hand-tuned and heuristic baselines.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/2c8ce9ead306e25085dd569a296e774980b169a1.zip", "author": "Jiaqi Gu;Ben Keller;Jean Kossaifi;Anima Anandkumar;Brucek Khailany;David Z. Pan", "authorids": "~Jiaqi_Gu3;~Ben_Keller1;~Jean_Kossaifi1;~Anima_Anandkumar1;bkhailany@nvidia.com;~David_Z._Pan1", "gender": "M;;M;;;M", "homepage": "https://scopex-asu.github.io;https://research.nvidia.com/person/ben-keller;http://jeankossaifi.com/;;;http://users.ece.utexas.edu/~dpan/", "dblp": ";;155/6766;;;p/DavidZhigangPan.html", "google_scholar": "FeIV12MAAAAJ;;https://scholar.google.co.uk/citations?user=hJS2TXwAAAAJ;;;3aLlroEAAAAJ", "orcid": ";;;;;0000-0002-5705-2501", "linkedin": ";;;;;davidzpan/", "or_profile": "~Jiaqi_Gu3;~Ben_Keller1;~Jean_Kossaifi1;~Anima_Anandkumar1;bkhailany@nvidia.com;~David_Z._Pan1", "aff": "University of Texas, Austin;;NVIDIA AI;;;University of Texas, Austin", "aff_domain": "utexas.edu;;nvidia.com;;;utexas.edu", "position": "PhD student;;Researcher;;;Professor", "bibtex": "@misc{\ngu2023heat,\ntitle={{HEAT}: Hardware-Efficient Automatic Tensor Decomposition for Transformer Compression},\nauthor={Jiaqi Gu and Ben Keller and Jean Kossaifi and Anima Anandkumar and Brucek Khailany and David Z. Pan},\nyear={2023},\nurl={https://openreview.net/forum?id=3QdSdm6Oqat}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=3QdSdm6Oqat", "pdf_size": 10956417, "recommendation": "", "confidence": "", "correctness": "", "technical_novelty": "", "empirical_novelty": "", "wc_summary_paper": "", "wc_strength_and_weaknesses": "", "wc_clarity_quality_novelty_and_reproducibility": "", "wc_summary_review": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "recommendation_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "correctness_avg": [ 0, 0 ], "technical_novelty_avg": [ 0, 0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 0, 0 ], "wc_strength_and_weaknesses_avg": [ 0, 0 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 0, 0 ], "wc_summary_review_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 1, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0, "corr_recommendation_correctness": 0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14138139316889380215&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Texas at Austin;NVIDIA", "aff_unique_dep": ";NVIDIA AI", "aff_unique_url": "https://www.utexas.edu;https://www.nvidia.com/en-us/research/", "aff_unique_abbr": "UT Austin;NVIDIA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Single-shot General Hyper-parameter Optimization for Federated Learning", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12042", "id": "3RhuF8foyPW", "poster": "", "openreview": "https://openreview.net/forum?id=3RhuF8foyPW", "slides": "https://iclr.cc/virtual/2023/poster/12042", "video": "https://iclr.cc/virtual/2023/poster/12042", "author_site": "Yi Zhou, Parikshit Ram, Theodoros Salonidis, Nathalie Baracaldo, Horst Samulowitz, Heiko Ludwig", "tldr": "We propose a single-shot hyperparameter optimization scheme for Federated Learning systems with theoretical performance guarantees and strong empirical performance against baselines.", "abstract": "We address the problem of hyper-parameter optimization (HPO) for federated learning (FL-HPO). We introduce Federated Loss SuRface Aggregation (FLoRA), a general FL-HPO solution framework that can address use cases of tabular data and any Machine Learning (ML) model including gradient boosting training algorithms, SVMs, neural networks, among others and thereby further expands the scope of FL-HPO. FLoRA enables single-shot FL-HPO: identifying a single set of good hyper-parameters that are subsequently used in a single FL training. Thus, it enables FL-HPO solutions with minimal additional communication overhead compared to FL training without HPO. Utilizing standard smoothness assumptions, we theoretically characterize the optimality gap of FLoRA for any convex and non-convex loss functions, which explicitly accounts for the heterogeneous nature of the parties' local data distributions, a dominant characteristic of FL systems. Our empirical evaluation of FLoRA for multiple FL algorithms on seven OpenML datasets demonstrates significant model accuracy improvements over the baselines, and robustness to increasing number of parties involved in FL-HPO training.", "keywords": "Federated Learning;Hyperparameter Optimization;Optimality Gap Analysis", "primary_area": "", "supplementary_material": "/attachment/e75b7dd85264fb7266bd4c321a2ba2763a681f59.zip", "author": "Yi Zhou;Parikshit Ram;Theodoros Salonidis;Nathalie Baracaldo;Horst Samulowitz;Heiko Ludwig", "authorids": "~Yi_Zhou13;~Parikshit_Ram1;~Theodoros_Salonidis1;~Nathalie_Baracaldo1;~Horst_Samulowitz1;~Heiko_Ludwig1", "gender": "F;M;;;;M", "homepage": ";https://rithram.github.io/;https://researcher.watson.ibm.com/researcher/view.php?person=us-tsaloni;https://researcher.watson.ibm.com/researcher/view.php?person=us-baracald;;", "dblp": "01/1901-15;99/8314;74/5471.html;87/10087;13/2167;l/HeikoLudwig", "google_scholar": "WQlCxy4AAAAJ;JaXmmnkAAAAJ;qr7HVuYAAAAJ;3ACndBYAAAAJ;km7EsqsAAAAJ;E71s8kwAAAAJ", "orcid": ";0000-0002-9456-029X;;;;", "linkedin": ";parikshit-ram-4861325/;;;horsts/;", "or_profile": "~Yi_Zhou13;~Parikshit_Ram1;~Theodoros_Salonidis1;~Nathalie_Baracaldo1;~Horst_Samulowitz1;~Heiko_Ludwig1", "aff": "International Business Machines;International Business Machines;;IBM, International Business Machines;International Business Machines;International Business Machines", "aff_domain": "ibm.com;ibm.com;;us.ibm.com;ibm.com;ibm.com", "position": "Researcher;Principal Researcher;;Researcher;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nzhou2023singleshot,\ntitle={Single-shot General Hyper-parameter Optimization for Federated Learning},\nauthor={Yi Zhou and Parikshit Ram and Theodoros Salonidis and Nathalie Baracaldo and Horst Samulowitz and Heiko Ludwig},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3RhuF8foyPW}\n}", "github": "", "project": "", "reviewers": "mUdQ;4yTc;rh1d;wq54", "pdf_size": 850171, "recommendation": "6;6;6;8", "confidence": "2;4;3;3", "correctness": "3;3;4;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "164;86;134;83", "wc_strength_and_weaknesses": "292;769;182;117", "wc_clarity_quality_novelty_and_reproducibility": "12;71;34;26", "wc_summary_review": "55;39;56;25", "wc_review": "523;965;406;251", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "701;2786;511;411", "reply_reviewers": "0;0;0;0", "reply_authors": "1;5;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 116.75, 33.96597562267276 ], "wc_strength_and_weaknesses_avg": [ 340.0, 255.4593901190559 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.75, 21.821720830401986 ], "wc_summary_review_avg": [ 43.75, 12.754901018824098 ], "wc_review_avg": [ 536.25, 265.67590688656736 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1102.25, 977.6782126548592 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6784811202844926903&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=3RhuF8foyPW", "email": "ibm.com;ibm.com;;us.ibm.com;ibm.com;ibm.com", "author_num": 6, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "International Business Machines Corporation;International Business Machines", "aff_unique_dep": ";", "aff_unique_url": "https://www.ibm.com;https://www.ibm.com", "aff_unique_abbr": "IBM;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "3S62EPkO7k-", "title": "DSP: Dynamic Semantic Prototype for Generative Zero-Shot Learning", "track": "main", "status": "Reject", "tldr": "Dynamic Semantic Prototype should be Considered in Generative Zero-Shot Learning", "abstract": "Generative models (e.g., generative adversarial network (GAN)) have advanced zero-shot learning (ZSL). Studies on the generative ZSL methods typically produce visual features of unseen classes to mitigate the issue of lacking unseen samples based on the predefined class semantic prototypes. As these empirically designed prototypes are not able to faithfully represent the actual semantic prototypes of visual features (i.e., visual prototypes), existing methods limit their ability to synthesize visual features that accurately represent real features and prototypes. We formulate this phenomenon as a visual-semantic domain shift problem. It prevents the generative models from further improving the ZSL performance. In this paper, we propose a dynamic semantic prototype learning (DSP) method to align the empirical and actual semantic prototypes for synthesizing accurate visual features. The alignment is conducted by jointly refining semantic prototypes and visual features so that the generator synthesizes visual features which are close to the real ones. We utilize a visual$\\rightarrow$semantic mapping network (V2SM) to map both the synthesized and real features into the class semantic space. The V2SM benefits the generator to synthesize visual representations with rich semantics. The real/synthesized visual features supervise our visual-oriented semantic prototype evolving network (VOPE) where the predefined class semantic prototypes are iteratively evolved to become dynamic semantic prototypes. Such prototypes are then fed back to the generative network as conditional supervision. Finally, we enhance visual features by fusing the evolved semantic prototypes into their corresponding visual features. Our extensive experiments on three benchmark datasets show that our DSP improves existing generative ZSL methods, \\textit{e.g.}, the average improvements of the harmonic mean over four baselines (e.g., CLSWGAN, f-VAEGAN, TF-VAEGAN and FREE) by 8.5\\%, 8.0\\% and 9.7\\% on CUB, SUN and AWA2, respectively.", "keywords": "Zero-Shot Learning;Generative Model;Knowledge Transfer", "primary_area": "", "supplementary_material": "", "author": "Shiming Chen;Hou Wen Jin;Ziming Hong;Yibing Song;Tongliang Liu;Xinge You;Kun Zhang", "authorids": "~Shiming_Chen1;~Hou_Wen_Jin1;~Ziming_Hong1;~Yibing_Song1;~Tongliang_Liu1;~Xinge_You1;~Kun_Zhang1", "gender": "M;M;M;;M;M;M", "homepage": "https://shiming-chen.github.io/;;https://sites.google.com/view/ziminghong/;https://ybsong00.github.io/;https://tongliang-liu.github.io/;http://bmal.hust.edu.cn/info/1005/1091.htm;http://www.andrew.cmu.edu/user/kunz1/", "dblp": "63/3682-2;260/2868;304/3159;77/2117;150/6667;16/1184;96/3115-1", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;j6gyDlIAAAAJ;oRhJHmIAAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;v7bRZX8AAAAJ;RGoypN4AAAAJ", "orcid": ";;0000-0001-8574-6108;;;;", "linkedin": ";;;;;;", "or_profile": "~Shiming_Chen1;~Hou_Wen_Jin1;~Ziming_Hong1;~Yibing_Song1;~Tongliang_Liu1;~Xinge_You1;~Kun_Zhang1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;Huazhong University of Science and Technology;;Tencent AI Lab;University of Sydney;Huazhong University of Science and Technology;Carnegie Mellon University", "aff_domain": "mbzuai.ac.ae;hust.edu.cn;;tencent.com;sydney.edu.au;hust.edu.cn;cmu.edu", "position": "Postdoc;MS student;;Senior Researcher;Lecturer;Full Professor;Associate Professor", "bibtex": "@misc{\nchen2023dsp,\ntitle={{DSP}: Dynamic Semantic Prototype for Generative Zero-Shot Learning},\nauthor={Shiming Chen and Hou Wen Jin and Ziming Hong and Yibing Song and Tongliang Liu and Xinge You and Kun Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=3S62EPkO7k-}\n}", "github": "", "project": "", "reviewers": "qGfG;J6h5;gkTe;LoBd", "site": "https://openreview.net/forum?id=3S62EPkO7k-", "pdf_size": 2524280, "recommendation": "3;5;5;5", "confidence": "4;5;4;4", "correctness": "1;2;3;2", "technical_novelty": "1;2;2;3", "empirical_novelty": "0;2;2;3", "wc_summary_paper": "89;52;91;67", "wc_strength_and_weaknesses": "212;222;91;359", "wc_clarity_quality_novelty_and_reproducibility": "55;31;217;10", "wc_summary_review": "64;22;42;63", "wc_review": "420;327;441;499", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 74.75, 16.161296358893985 ], "wc_strength_and_weaknesses_avg": [ 221.0, 94.9025816297955 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 78.25, 81.67427685630281 ], "wc_summary_review_avg": [ 47.75, 17.268106439329124 ], "wc_review_avg": [ 421.75, 61.88446897243281 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:g3-K0JdAZyQJ:scholar.google.com/&scioq=DSP:+Dynamic+Semantic+Prototype+for+Generative+Zero-Shot+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;1;4", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;Huazhong University of Science and Technology;Tencent;University of Sydney;Carnegie Mellon University", "aff_unique_dep": ";;Tencent AI Lab;;", "aff_unique_url": "https://mbzuai.ac.ae;http://www.hust.edu.cn;https://ai.tencent.com;https://www.sydney.edu.au;https://www.cmu.edu", "aff_unique_abbr": "MBZUAI;HUST;Tencent AI Lab;USYD;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2;1;3", "aff_country_unique": "United Arab Emirates;China;Australia;United States" }, { "id": "3TduOwfFNoy", "title": "Contextualized Generative Retrieval", "track": "main", "status": "Reject", "tldr": "By utilizing contextualized token embeddings in generative retrieval, it can utilize both the parametric space of the model and the non-parametric space of contextualized embeddings.", "abstract": "The text retrieval task is mainly performed in two ways: the bi-encoder approach and the generative approach. The bi-encoder approach maps the document and query embeddings to common vector space and performs a nearest neighbor search. It stably shows high performance and efficiency across different domains but has an embedding space bottleneck as it interacts in L2 or inner product space. The generative retrieval model retrieves by generating a target sequence and overcomes the embedding space bottleneck by interacting in the parametric space. However, it fails to retrieve the information it has not seen during the training process as it depends solely on the information encoded in its own model parameters. To leverage the advantages of both approaches, we propose Contextualized Generative Retrieval model, which uses contextualized embeddings (output embeddings of a language model encoder) as vocab embeddings at the decoding step of generative retrieval. The model uses information encoded in both the non-parametric space of contextualized token embeddings and the parametric space of the generative retrieval model. Our approach of generative retrieval with contextualized vocab embeddings shows higher performance than generative retrieval with only vanilla vocab embeddings in the document retrieval task, an average of 6% higher performance in KILT (NQ, TQA) and 2X higher in NQ-320k, suggesting the benefits of using contextualized embedding in generative retrieval models.", "keywords": "NLP;Information Retrieval", "primary_area": "", "supplementary_material": "", "author": "Hyunji Lee;JaeYoung Kim;Hoyeon Chang;Hanseok Oh;Sohee Yang;vladimir karpukhin;Yi Lu;Minjoon Seo", "authorids": "~Hyunji_Lee1;~JaeYoung_Kim5;~Hoyeon_Chang1;~Hanseok_Oh1;~Sohee_Yang1;~vladimir_karpukhin1;~Yi_Lu6;~Minjoon_Seo1", "gender": "F;M;M;M;F;M;M;M", "homepage": "https://amy-hyunji.github.io/;;https://duemoo.github.io/about/;https://hanseokoh.github.io/;https://soheeyang.github.io;;;https://seominjoon.github.io", "dblp": ";;;304/2544;236/5847;236/4633;;149/1367", "google_scholar": "LQ-52vsAAAAJ;7NXeVkQAAAAJ;https://scholar.google.com/citations?hl=ko;;jh547hEAAAAJ;-VLmu2MAAAAJ;https://scholar.google.com/citations?hl=en;zYze5fIAAAAJ", "orcid": ";;;;;;;", "linkedin": "hyunji-lee-9b53b511a/;jaeyoung-kim-439691232/;;hanseok-oh-80180b180;;;yi-lu-b14636a3/;minjoon-seo/", "or_profile": "~Hyunji_Lee1;~JaeYoung_Kim5;~Hoyeon_Chang1;~Hanseok_Oh1;~Sohee_Yang1;~vladimir_karpukhin1;~Yi_Lu6;~Minjoon_Seo1", "aff": "Korea Advanced Institute of Science & Technology;KaKao Corp;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Research, Facebook;Forethought;Twelve Labs", "aff_domain": "kaist.ac.kr;kakaocorp.com;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;research.facebook.com;forethought.ai;twelvelabs.io", "position": "MS student;AI Engineer;MS student;MS student;MS student;Researcher;Principal Researcher;Chief Scientist", "bibtex": "@misc{\nlee2023contextualized,\ntitle={Contextualized Generative Retrieval},\nauthor={Hyunji Lee and JaeYoung Kim and Hoyeon Chang and Hanseok Oh and Sohee Yang and vladimir karpukhin and Yi Lu and Minjoon Seo},\nyear={2023},\nurl={https://openreview.net/forum?id=3TduOwfFNoy}\n}", "github": "", "project": "", "reviewers": "rcjJ;UYMW;ZLXX;DJS8", "site": "https://openreview.net/forum?id=3TduOwfFNoy", "pdf_size": 395867, "recommendation": "3;5;6;6", "confidence": "4;4;4;4", "correctness": "3;4;4;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "43;69;89;298", "wc_strength_and_weaknesses": "82;63;51;206", "wc_clarity_quality_novelty_and_reproducibility": "27;24;4;73", "wc_summary_review": "29;204;15;81", "wc_review": "181;360;159;658", "wc_reply_reviewers": "0;42;0;334", "wc_reply_authors": "936;920;467;2447", "reply_reviewers": "0;1;0;2", "reply_authors": "2;3;1;5", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 124.75, 101.34686724314669 ], "wc_strength_and_weaknesses_avg": [ 100.5, 61.90516941257814 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.0, 25.268557536986556 ], "wc_summary_review_avg": [ 82.25, 74.4693728991993 ], "wc_review_avg": [ 339.5, 199.72793995833433 ], "wc_reply_reviewers_avg": [ 94.0, 139.62091533864114 ], "wc_reply_authors_avg": [ 1192.5, 748.3597062910322 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.40824829046386296, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15381694683981532529&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;0;2;3;4", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Kakao Corporation;Meta;Forethought;Twelve Labs", "aff_unique_dep": ";;Research;;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.kakao.com;https://www.facebook.com;https://www.forethought.com;https://twelvelabs.com", "aff_unique_abbr": "KAIST;KaKao;FB;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;1;1", "aff_country_unique": "South Korea;United States" }, { "id": "3TfSOxiRiFH", "title": "On a Built-in Conflict between Deep Learning and Systematic Generalization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Out-of-distribution or systematic generalization is a desirable property that most deep learning algorithms lack. In this paper, we hypothesize that internal function sharing is one of the reasons to weaken systematic generalization in deep learning for classification tasks. Under equivalent prediction, a model partitions an input space into multiple parts separated by boundaries. The function sharing prefers to reuse boundaries, leading to fewer parts for new outputs, which conflicts with systematic generalization. We show such phenomena in standard deep learning models, such as fully connected, convolutional, residual networks, LSTMs, and (Vision) Transformers. We hope this study provides novel insights and forms a basis for new research directions to improve systematic generalization.", "keywords": "out-of-distribution generalization;systematic generalization;compositional generalization", "primary_area": "", "supplementary_material": "/attachment/e55d173b07ef5c5a17e611ee37ca86162d54eb9b.zip", "author": "Yuanpeng Li", "authorids": "~Yuanpeng_Li2", "gender": "M", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Yuanpeng_Li2", "aff": "", "aff_domain": "", "position": "", "bibtex": "@misc{\nli2023on,\ntitle={On a Built-in Conflict between Deep Learning and Systematic Generalization},\nauthor={Yuanpeng Li},\nyear={2023},\nurl={https://openreview.net/forum?id=3TfSOxiRiFH}\n}", "github": "", "project": "", "reviewers": "w8Ad;4tfA;hir1;e3dk", "site": "https://openreview.net/forum?id=3TfSOxiRiFH", "pdf_size": 1142934, "recommendation": "3;5;5;5", "confidence": "4;4;5;3", "correctness": "3;3;2;2", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "87;41;75;86", "wc_strength_and_weaknesses": "230;306;618;253", "wc_clarity_quality_novelty_and_reproducibility": "69;36;85;202", "wc_summary_review": "45;28;126;139", "wc_review": "431;411;904;680", "wc_reply_reviewers": "0;61;0;0", "wc_reply_authors": "112;37;132;152", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 72.25, 18.64638034579366 ], "wc_strength_and_weaknesses_avg": [ 351.75, 156.17037971395217 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 98.0, 62.58993529314438 ], "wc_summary_review_avg": [ 84.5, 48.59269492423733 ], "wc_review_avg": [ 606.5, 201.82232284858878 ], "wc_reply_reviewers_avg": [ 15.25, 26.413774815425377 ], "wc_reply_authors_avg": [ 108.25, 43.49928160326329 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:agJYDGCqgT8J:scholar.google.com/&scioq=On+a+Built-in+Conflict+between+Deep+Learning+and+Systematic+Generalization&hl=en&as_sdt=0,44", "gs_version_total": 3 }, { "id": "3UDAU2unja", "title": "Perceptual Grouping in Vision-Language Models", "track": "main", "status": "Withdraw", "tldr": "We describe a minimal set of changes to vision-language models to endow these models with perceptual grouping and localization information.", "abstract": "Recent advances in zero-shot image recognition suggest that vision-language models learn generic visual representations with a high degree of semantic information that may be arbitrarily probed with natural language phrases. Understanding an image, however, is not just about understanding {\\it what} content resides within an image, but importantly, {\\it where} that content resides. In this work we examine how well vision-language models are able to understand where objects reside within an image and group together visually related parts of the imagery. We demonstrate how contemporary vision and language representation learning models based on contrastive losses and large web-based data capture limited object localization information. We propose a minimal set of modifications that results in models that uniquely learn both semantic and spatial information. We measure this performance in terms of zero-shot image recognition, unsupervised bottom-up and top-down semantic segmentations, as well as robustness analyses. We find that the resulting model achieves state-of-the-art results in terms of unsupervised segmentation, and demonstrate that the learned representations are uniquely robust to spurious correlations in datasets designed to probe the causal behavior of vision models.", "keywords": "vision-language models;multimodal learning;perceptual grouping;image segmentation", "primary_area": "", "supplementary_material": "/attachment/1002f60ad95f594c8785e7ccfe293823863fee53.zip", "author": "Kanchana Ranasinghe;Brandon McKinzie;Sachin Ravi;Yinfei Yang;Alexander T Toshev;Jonathon Shlens", "authorids": "~Kanchana_Ranasinghe1;~Brandon_McKinzie1;~Sachin_Ravi1;~Yinfei_Yang1;~Alexander_T_Toshev1;~Jonathon_Shlens1", "gender": ";M;M;;;", "homepage": ";http://mckinziebrandon.me;https://sachinravi14.github.io/;;;", "dblp": ";;;117/4082;;", "google_scholar": ";;cr53lHIAAAAJ;kvDbu90AAAAJ;;", "orcid": ";;;;;", "linkedin": ";brandon-mckinzie-452a03112;;;;", "or_profile": "~Kanchana_Ranasinghe1;~Brandon_McKinzie1;~Sachin_Ravi1;~Yinfei_Yang1;~Alexander_T_Toshev1;~Jonathon_Shlens1", "aff": ";;Apple;Apple;;", "aff_domain": ";;apple.com;apple.com;;", "position": ";;ML Researcher;Researcher;;", "bibtex": "@misc{\nranasinghe2023perceptual,\ntitle={Perceptual Grouping in Vision-Language Models},\nauthor={Kanchana Ranasinghe and Brandon McKinzie and Sachin Ravi and Yinfei Yang and Alexander T Toshev and Jonathon Shlens},\nyear={2023},\nurl={https://openreview.net/forum?id=3UDAU2unja}\n}", "github": "", "project": "", "reviewers": "P7aP;N1PR;cMdU", "site": "https://openreview.net/forum?id=3UDAU2unja", "pdf_size": 1659760, "recommendation": "3;3;5", "confidence": "4;4;3", "correctness": "3;2;3", "technical_novelty": "1;2;2", "empirical_novelty": "2;2;3", "wc_summary_paper": "55;37;68", "wc_strength_and_weaknesses": "107;98;430", "wc_clarity_quality_novelty_and_reproducibility": "47;11;53", "wc_summary_review": "58;32;44", "wc_review": "267;178;595", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 53.333333333333336, 12.710450643291745 ], "wc_strength_and_weaknesses_avg": [ 211.66666666666666, 154.4286962394691 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.0, 18.547236990991408 ], "wc_summary_review_avg": [ 44.666666666666664, 10.624918300339484 ], "wc_review_avg": [ 346.6666666666667, 179.31784319718128 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.5, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17881187136615185654&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Apple", "aff_unique_dep": "Apple Inc.", "aff_unique_url": "https://www.apple.com", "aff_unique_abbr": "Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Differentially Private $L_2$-Heavy Hitters in the Sliding Window Model", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11484", "id": "3UHoYrglYkG", "poster": "", "openreview": "https://openreview.net/forum?id=3UHoYrglYkG", "slides": "https://iclr.cc/virtual/2023/poster/11484", "video": "https://iclr.cc/virtual/2023/poster/11484", "author_site": "Jeremiah Blocki, Seunghoon Lee, Tamalika Mukherjee, Samson Zhou", "tldr": "", "abstract": "The data management of large companies often prioritize more recent data, as a source of higher accuracy prediction than outdated data. For example, the Facebook data policy retains user search histories for $6$ months while the Google data retention policy states that browser information may be stored for up to $9$ months. These policies are captured by the sliding window model, in which only the most recent $W$ statistics form the underlying dataset. In this paper, we consider the problem of privately releasing the $L_2$-heavy hitters in the sliding window model, which include $L_p$-heavy hitters for $p\\le 2$ and in some sense are the strongest possible guarantees that can be achieved using polylogarithmic space, but cannot be handled by existing techniques due to the sub-additivity of the $L_2$ norm. Moreover, existing non-private sliding window algorithms use the smooth histogram framework, which has high sensitivity. To overcome these barriers, we introduce the first differentially private algorithm for $L_2$-heavy hitters in the sliding window model by initiating a number of $L_2$-heavy hitter algorithms across the stream with significantly lower threshold. Similarly, we augment the algorithms with an approximate frequency tracking algorithm with significantly higher accuracy. We then use smooth sensitivity and statistical distance arguments to show that we can add noise proportional to an estimation of the $L_2$ norm. To the best of our knowledge, our techniques are the first to privately release statistics that are related to a sub-additive function in the sliding window model, and may be of independent interest to future differentially private algorithmic design in the sliding window model.", "keywords": "differential privacy;heavy hitters;streaming algorithms;sliding window model", "primary_area": "", "supplementary_material": "/attachment/6c52be204a7750587c5a87de660b9d496c52f4e5.zip", "author": "Jeremiah Blocki;Seunghoon Lee;Tamalika Mukherjee;Samson Zhou", "authorids": "~Jeremiah_Blocki2;~Seunghoon_Lee1;~Tamalika_Mukherjee1;~Samson_Zhou1", "gender": "M;M;F;", "homepage": "https://www.cs.purdue.edu/homes/jblocki/;https://lee2856.github.io/;https://loriochi.github.io/;https://samsonzhou.github.io/", "dblp": "30/8037;;185/5691;179/2683", "google_scholar": ";fLWCOIgAAAAJ;https://scholar.google.com/citations?hl=en;NpjsgocAAAAJ", "orcid": ";;;", "linkedin": ";;tamalika-mukherjee/;", "or_profile": "~Jeremiah_Blocki2;~Seunghoon_Lee1;~Tamalika_Mukherjee1;~Samson_Zhou1", "aff": "Purdue University;Purdue University;Purdue University;University of California, Berkeley", "aff_domain": "cs.purdue.edu;purdue.edu;cs.purdue.edu;berkeley.edu", "position": "Assistant Professor;PhD student;PhD student;Postdoc", "bibtex": "@inproceedings{\nblocki2023differentially,\ntitle={Differentially Private \\$L\\_2\\$-Heavy Hitters in the Sliding Window Model},\nauthor={Jeremiah Blocki and Seunghoon Lee and Tamalika Mukherjee and Samson Zhou},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3UHoYrglYkG}\n}", "github": "", "project": "", "reviewers": "jYg8;EhqA;eT6L;1XJv", "pdf_size": 312386, "recommendation": "5;5;8;8", "confidence": "2;3;3;3", "correctness": "3;4;3;3", "technical_novelty": "2;3;4;3", "empirical_novelty": "0;0;0;0", "wc_summary_paper": "36;48;160;123", "wc_strength_and_weaknesses": "120;278;66;121", "wc_clarity_quality_novelty_and_reproducibility": "5;1;114;218", "wc_summary_review": "44;63;39;22", "wc_review": "205;390;379;484", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "593;755;330;352", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.5, 1.5 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 91.75, 51.61576793965193 ], "wc_strength_and_weaknesses_avg": [ 146.25, 79.25394311956977 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 84.5, 89.42175350550893 ], "wc_summary_review_avg": [ 42.0, 14.611639196202457 ], "wc_review_avg": [ 364.5, 100.72363178519726 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 507.5, 176.24769502038885 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896258, "corr_recommendation_correctness": -0.5773502691896258, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9352845246599612586&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=3UHoYrglYkG", "email": "cs.purdue.edu;purdue.edu;cs.purdue.edu;berkeley.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Purdue University;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.purdue.edu;https://www.berkeley.edu", "aff_unique_abbr": "Purdue;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Seeing Differently, Acting Similarly: Heterogeneously Observable Imitation Learning", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10769", "id": "3ULaIHxn9u7", "poster": "", "openreview": "https://openreview.net/forum?id=3ULaIHxn9u7", "slides": "https://iclr.cc/virtual/2023/poster/10769", "video": "https://iclr.cc/virtual/2023/poster/10769", "author_site": "Xinqiang Cai, Yao-Xiang Ding, Zixuan Chen, Yuan Jiang, Masashi Sugiyama, Zhi-Hua Zhou", "tldr": "", "abstract": "In many real-world imitation learning tasks, the demonstrator and the learner have to act under different observation spaces. This situation brings significant obstacles to existing imitation learning approaches, since most of them learn policies under homogeneous observation spaces. On the other hand, previous studies under different observation spaces have strong assumptions that these two observation spaces coexist during the entire learning process. However, in reality, the observation coexistence will be limited due to the high cost of acquiring expert observations. In this work, we study this challenging problem with limited observation coexistence under heterogeneous observations: Heterogeneously Observable Imitation Learning (HOIL). We identify two underlying issues in HOIL: the dynamics mismatch and the support mismatch, and further propose the Importance Weighting with REjection (IWRE) algorithm based on importance weighting and learning with rejection to solve HOIL problems. Experimental results show that IWRE can solve various HOIL tasks, including the challenging tasks of transforming the vision-based demonstrations to random access memory (RAM)-based policies in the Atari domain, even with limited visual observations.", "keywords": "Imitation Learning;Heterogeneous Observation;Importance Weighting;Learning with Rejection", "primary_area": "", "supplementary_material": "", "author": "Xin-Qiang Cai;Yao-Xiang Ding;Zixuan Chen;Yuan Jiang;Masashi Sugiyama;Zhi-Hua Zhou", "authorids": "~Xin-Qiang_Cai1;~Yao-Xiang_Ding2;~Zixuan_Chen4;~Yuan_Jiang1;~Masashi_Sugiyama1;~Zhi-Hua_Zhou2", "gender": "M;M;M;F;M;M", "homepage": "https://caixq1996.github.io/;https://yaoxiangding.github.io/;http://www.lamda.nju.edu.cn/chenzx/;http://lamda.nju.edu.cn/jiangy;http://www.ms.k.u-tokyo.ac.jp/sugi/;https://cs.nju.edu.cn/zhouzh/", "dblp": "248/8034.html;186/8301-1;;;35/1228;z/ZhiHuaZhou", "google_scholar": "rtMUMooAAAAJ;POTjhnUAAAAJ;woPoDW0AAAAJ;;https://scholar.google.co.jp/citations?user=GkYIrlIAAAAJ;https://scholar.google.com.tw/citations?user=rSVIHasAAAAJ", "orcid": ";0000-0001-8580-1103;;;0000-0001-6658-6743;0000-0003-0746-1494", "linkedin": ";;;;;", "or_profile": "~Xin-Qiang_Cai1;~Yao-Xiang_Ding2;~Zixuan_Chen4;~Yuan_Jiang1;~Masashi_Sugiyama1;~Zhi-hua_Zhou1", "aff": "The University of Tokyo;Zhejiang University;Nanjing University;Nanjing University;The University of Tokyo;Nanjing University", "aff_domain": "u-tokyo.ac.jp;zju.edu.cn;nju.edu.cn;nju.edu.cn;u-tokyo.ac.jp;nju.edu.cn", "position": "PhD student;Assistant Professor;MS student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\ncai2023seeing,\ntitle={Seeing Differently, Acting Similarly: Heterogeneously Observable Imitation Learning},\nauthor={Xin-Qiang Cai and Yao-Xiang Ding and Zixuan Chen and Yuan Jiang and Masashi Sugiyama and Zhi-Hua Zhou},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3ULaIHxn9u7}\n}", "github": "", "project": "", "reviewers": "j2UZ;ntXD;w9cE;N1Ya", "pdf_size": 3992256, "recommendation": "3;6;8;10", "confidence": "4;2;4;5", "correctness": "3;4;4;4", "technical_novelty": "3;3;3;4", "empirical_novelty": "1;3;3;3", "wc_summary_paper": "150;41;350;257", "wc_strength_and_weaknesses": "263;167;136;76", "wc_clarity_quality_novelty_and_reproducibility": "97;14;45;48", "wc_summary_review": "35;66;43;96", "wc_review": "545;288;574;477", "wc_reply_reviewers": "807;0;0;0", "wc_reply_authors": "1671;686;146;82", "reply_reviewers": "3;0;0;0", "reply_authors": "7;2;1;1", "recommendation_avg": [ 6.75, 2.5860201081971503 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 199.5, 115.68167529907232 ], "wc_strength_and_weaknesses_avg": [ 160.5, 67.6184146516317 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.0, 29.706901555025897 ], "wc_summary_review_avg": [ 60.0, 23.695991222145572 ], "wc_review_avg": [ 471.0, 111.36651202224122 ], "wc_reply_reviewers_avg": [ 201.75, 349.441250427021 ], "wc_reply_authors_avg": [ 646.25, 636.459101199755 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 2.75, 2.48746859276655 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.4213906660783363, "corr_recommendation_correctness": 0.8372183582789214, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1102839393877905224&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=3ULaIHxn9u7", "email": "u-tokyo.ac.jp;zju.edu.cn;nju.edu.cn;nju.edu.cn;u-tokyo.ac.jp;nju.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;2;0;2", "aff_unique_norm": "University of Tokyo;Zhejiang University;Nanjing University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.zju.edu.cn;https://www.nju.edu.cn", "aff_unique_abbr": "UTokyo;ZJU;Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0;1", "aff_country_unique": "Japan;China" }, { "title": "Variational Latent Branching Model for Off-Policy Evaluation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11436", "id": "3VFQfAG3vwi", "poster": "/media/PosterPDFs/ICLR%202023/11436.png?t=1683224206.4059849", "openreview": "https://openreview.net/forum?id=3VFQfAG3vwi", "slides": "https://iclr.cc/virtual/2023/poster/11436", "video": "https://iclr.cc/virtual/2023/poster/11436", "author_site": "Qitong Gao, Ge Gao, Min Chi, Miroslav Pajic", "tldr": "", "abstract": "Model-based methods have recently shown great potential for off-policy evaluation (OPE); offline trajectories induced by behavioral policies are fitted to transitions of Markov decision processes (MDPs), which are used to rollout simulated trajectories and estimate the performance of policies. Model-based OPE methods face two key challenges. First, as offline trajectories are usually fixed, they tend to cover limited state and action space. Second, the performance of model-based methods can be sensitive to the initialization of their parameters. In this work, we propose the variational latent branching model (VLBM) to learn the transition function of MDPs by formulating the environmental dynamics as a compact latent space, from which the next states and rewards are then sampled. Specifically, VLBM leverages and extends the variational inference framework with the recurrent state alignment (RSA), which is designed to capture as much information underlying the limited training data, by smoothing out the information flow between the variational (encoding) and generative (decoding) part of VLBM. Moreover, we also introduce the branching architecture to improve the model\u2019s robustness against randomly initialized model weights. The effectiveness of the VLBM is evaluated on the deep OPE (DOPE) benchmark, from which the training trajectories are designed to result in varied coverage of the state-action space. We show that the VLBM outperforms existing state-of-the-art OPE methods in general.", "keywords": "Model-Based Off-policy Evaluation;Reinforcement Learning;Variational Inference", "primary_area": "", "supplementary_material": "/attachment/4ccbace0f5345e12aa304f6d0bd3e9f86018709c.zip", "author": "Qitong Gao;Ge Gao;Min Chi;Miroslav Pajic", "authorids": "~Qitong_Gao1;~Ge_Gao4;~Min_Chi1;~Miroslav_Pajic2", "gender": "M;;;M", "homepage": "http://qitonggao.com;https://gegao.tech/;;http://people.duke.edu/~mp275/", "dblp": "238/5422;;;74/7446.html", "google_scholar": "Flv4SrsAAAAJ;d_WL-9cAAAAJ;;Fbn21-8AAAAJ", "orcid": ";0000-0002-3474-8637;;", "linkedin": "qitong-gao;;;", "or_profile": "~Qitong_Gao1;~Ge_Gao4;~Min_Chi1;~Miroslav_Pajic2", "aff": "Duke University;North Carolina State University;;Duke University", "aff_domain": "duke.edu;ncsu.edu;;duke.edu", "position": "PhD student;PhD student;;Associate Professor", "bibtex": "@inproceedings{\ngao2023variational,\ntitle={Variational Latent Branching Model for Off-Policy Evaluation},\nauthor={Qitong Gao and Ge Gao and Min Chi and Miroslav Pajic},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3VFQfAG3vwi}\n}", "github": "", "project": "", "reviewers": "grrh;QLEv;k3gq;MDLU", "pdf_size": 7192493, "recommendation": "5;6;6;6", "confidence": "4;4;3;3", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;0;2;3", "wc_summary_paper": "53;66;180;101", "wc_strength_and_weaknesses": "163;202;130;159", "wc_clarity_quality_novelty_and_reproducibility": "107;1;74;131", "wc_summary_review": "465;55;59;28", "wc_review": "788;324;443;419", "wc_reply_reviewers": "0;372;74;145", "wc_reply_authors": "1076;2034;856;794", "reply_reviewers": "0;4;1;2", "reply_authors": "3;5;3;3", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 100.0, 49.411537114321796 ], "wc_strength_and_weaknesses_avg": [ 163.5, 25.617376914898998 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 78.25, 48.976397376695644 ], "wc_summary_review_avg": [ 151.75, 181.2475861908235 ], "wc_review_avg": [ 493.5, 175.75622321841124 ], "wc_reply_reviewers_avg": [ 147.75, 139.25224414708725 ], "wc_reply_authors_avg": [ 1190.0, 498.4235146940802 ], "reply_reviewers_avg": [ 1.75, 1.479019945774904 ], "reply_authors_avg": [ 3.5, 0.8660254037844386 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5478163305402458313&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=3VFQfAG3vwi", "email": "duke.edu;ncsu.edu;;duke.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Duke University;North Carolina State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.duke.edu;https://www.ncsu.edu", "aff_unique_abbr": "Duke;NCSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Gradient Boosting Performs Gaussian Process Inference", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11849", "id": "3VKiaagxw1S", "poster": "", "openreview": "https://openreview.net/forum?id=3VKiaagxw1S", "slides": "https://iclr.cc/virtual/2023/poster/11849", "video": "https://iclr.cc/virtual/2023/poster/11849", "author_site": "Aleksei Ustimenko, Artem Beliakov, Liudmila Prokhorenkova", "tldr": "We prove that gradient boosting converges to a Gaussian process' posterior mean and can be transformed into a sampler from the posterior, which leads to improved knowledge uncertainty estimates.", "abstract": "This paper shows that gradient boosting based on symmetric decision trees can be equivalently reformulated as a kernel method that converges to the solution of a certain Kernel Ridge Regression problem. Thus, we obtain the convergence to a Gaussian Process' posterior mean, which, in turn, allows us to easily transform gradient boosting into a sampler from the posterior to provide better knowledge uncertainty estimates through Monte-Carlo estimation of the posterior variance. We show that the proposed sampler allows for better knowledge uncertainty estimates leading to improved out-of-domain detection.", "keywords": "gradient boosting;gaussian process;knowledge uncertainty;kernel gradient boosting", "primary_area": "", "supplementary_material": "", "author": "Aleksei Ustimenko;Artem Beliakov;Liudmila Prokhorenkova", "authorids": "~Aleksei_Ustimenko1;~Artem_Beliakov1;~Liudmila_Prokhorenkova1", "gender": "M;M;F", "homepage": ";;", "dblp": "242/3873;;45/11468", "google_scholar": "OES5pK4AAAAJ;;https://scholar.google.ru/citations?user=6JyZlSEAAAAJ", "orcid": ";;", "linkedin": ";artem-beliakov-6a155123a;", "or_profile": "~Aleksei_Ustimenko1;~Artem_Beliakov1;~Liudmila_Prokhorenkova1", "aff": "ShareChat;St. Petersburg State University;Yandex", "aff_domain": "sharechat.co;spbu.ru;yandex-team.ru", "position": "Researcher;MS student;Researcher", "bibtex": "@inproceedings{\nustimenko2023gradient,\ntitle={Gradient Boosting Performs Gaussian Process Inference},\nauthor={Aleksei Ustimenko and Artem Beliakov and Liudmila Prokhorenkova},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3VKiaagxw1S}\n}", "github": "", "project": "", "reviewers": "1ARD;HjFB;iZU9", "pdf_size": 816027, "recommendation": "6;6;6", "confidence": "3;3;3", "correctness": "4;4;4", "technical_novelty": "2;3;3", "empirical_novelty": "1;3;3", "wc_summary_paper": "90;137;49", "wc_strength_and_weaknesses": "76;195;137", "wc_clarity_quality_novelty_and_reproducibility": "28;221;54", "wc_summary_review": "94;50;26", "wc_review": "288;603;266", "wc_reply_reviewers": "75;0;21", "wc_reply_authors": "446;1198;323", "reply_reviewers": "1;0;1", "reply_authors": "2;2;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_paper_avg": [ 92.0, 35.95367389665021 ], "wc_strength_and_weaknesses_avg": [ 136.0, 48.586692279539534 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 101.0, 85.514131385793 ], "wc_summary_review_avg": [ 56.666666666666664, 28.158282775923837 ], "wc_review_avg": [ 385.6666666666667, 153.94010379220728 ], "wc_reply_reviewers_avg": [ 32.0, 31.591137997862628 ], "wc_reply_authors_avg": [ 655.6666666666666, 386.7611953418055 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10564034494988166553&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=3VKiaagxw1S", "email": "sharechat.co;spbu.ru;yandex-team.ru", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "ShareChat;St. Petersburg State University;Yandex", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sharechat.com;https://www.spbu.ru;https://yandex.com", "aff_unique_abbr": ";SPbU;Yandex", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "India;Russian Federation" }, { "title": "StableDR: Stabilized Doubly Robust Learning for Recommendation on Data Missing Not at Random", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11206", "id": "3VO1y5N7K1H", "poster": "", "openreview": "https://openreview.net/forum?id=3VO1y5N7K1H", "slides": "https://iclr.cc/virtual/2023/poster/11206", "video": "https://iclr.cc/virtual/2023/poster/11206", "author_site": "Haoxuan Li, Chunyuan Zheng, Peng Wu", "tldr": "This paper proposes a theoretically guaranteed stabilized doubly robust learning approach that overcomes the shortcomings due to the presence of extremely small propensities in debiased recommendations.", "abstract": "In recommender systems, users always choose the favorite items to rate, which leads to data missing not at random and poses a great challenge for unbiased evaluation and learning of prediction models. Currently, the doubly robust (DR) methods have been widely studied and demonstrate superior performance. However, in this paper, we show that DR methods are unstable and have unbounded bias, variance, and generalization bounds to extremely small propensities. Moreover, the fact that DR relies more on extrapolation will lead to suboptimal performance. To address the above limitations while retaining double robustness, we propose a stabilized doubly robust (StableDR) learning approach with a weaker reliance on extrapolation. Theoretical analysis shows that StableDR has bounded bias, variance, and generalization error bound simultaneously under inaccurate imputed errors and arbitrarily small propensities. In addition, we propose a novel learning approach for StableDR that updates the imputation, propensity, and prediction models cyclically, achieving more stable and accurate predictions. Extensive experiments show that our approaches significantly outperform the existing methods.", "keywords": "Recommender System;Bias;Debias;Doubly Robust", "primary_area": "", "supplementary_material": "/attachment/cfc352d7f504a5c7fc4e052b31f2e024034d9e51.zip", "author": "Haoxuan Li;Chunyuan Zheng;Peng Wu", "authorids": "~Haoxuan_Li6;~Chunyuan_Zheng1;~Peng_Wu5", "gender": "M;M;M", "homepage": "https://haoxuanli-pku.github.io/;;https://pengwu.site/", "dblp": "145/4965-1.html;;15/6146-12", "google_scholar": "gtDqiucAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?view_op=list_works", "orcid": "0000-0003-3620-3769;0000-0002-0306-7310;0000-0001-7154-8880", "linkedin": ";;", "or_profile": "~Haoxuan_Li6;~Chunyuan_Zheng1;~Peng_Wu5", "aff": "Peking University;Department of Computer Science, University of Illinois at Urbana-Champaign;Beijing Technology and Business University", "aff_domain": "pku.edu.cn;cs.illinois.edu;btbu.edu.cn", "position": "PhD student;MS student;Associate Professor", "bibtex": "@inproceedings{\nli2023stabledr,\ntitle={Stable{DR}: Stabilized Doubly Robust Learning for Recommendation on Data Missing Not at Random},\nauthor={Haoxuan Li and Chunyuan Zheng and Peng Wu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3VO1y5N7K1H}\n}", "github": "", "project": "", "reviewers": "f6Th;HkXH;si5q", "pdf_size": 505140, "recommendation": "5;8;8", "confidence": "4;4;3", "correctness": "3;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "163;142;115", "wc_strength_and_weaknesses": "283;198;153", "wc_clarity_quality_novelty_and_reproducibility": "23;42;43", "wc_summary_review": "19;119;32", "wc_review": "488;501;343", "wc_reply_reviewers": "0;245;32", "wc_reply_authors": "2782;1607;1470", "reply_reviewers": "0;2;1", "reply_authors": "6;4;3", "recommendation_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 140.0, 19.6468827043885 ], "wc_strength_and_weaknesses_avg": [ 211.33333333333334, 53.903205429320764 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.0, 9.201449161228174 ], "wc_summary_review_avg": [ 56.666666666666664, 44.39469437769438 ], "wc_review_avg": [ 444.0, 71.6147098483731 ], "wc_reply_reviewers_avg": [ 92.33333333333333, 108.73923956981778 ], "wc_reply_authors_avg": [ 1953.0, 588.8536886754355 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 4.333333333333333, 1.247219128924647 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.5, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=766123580581011590&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=3VO1y5N7K1H", "email": "pku.edu.cn;cs.illinois.edu;btbu.edu.cn", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Peking University;University of Illinois Urbana-Champaign;Beijing Technology and Business University", "aff_unique_dep": ";Department of Computer Science;", "aff_unique_url": "http://www.pku.edu.cn;https://illinois.edu;http://www.btbu.edu.cn", "aff_unique_abbr": "Peking U;UIUC;BTBU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "id": "3WYtm7UzsR", "title": "Towards scalable and non-IID robust Hierarchical Federated Learning via Label-driven Knowledge Aggregator", "track": "main", "status": "Reject", "tldr": "We propose a Hierarchical FL framework to divide and conquer non-IID group-by-group", "abstract": "In real-world applications, Federated Learning (FL) meets two challenges: (1) scalability, especially when applied to massive IoT networks, and (2) how to be robust against an environment with heterogeneous data. Realizing the first problem, we aim to design a novel FL framework named Full-stack FL (F2L). More specifically, F2L utilizes a hierarchical network architecture, making extending the FL network accessible without reconstructing the whole network system. Moreover, leveraging the advantages of hierarchical network design, we propose a new label-driven knowledge distillation (LKD) technique at the global server to address the second problem. As opposed to current knowledge distillation techniques, LKD is capable of training a student model, which consists of good knowledge from all teachers' models. Therefore, our proposed algorithm can effectively extract the knowledge of the regions' data distribution (i.e., the regional aggregated models) to reduce the divergence between clients' models when operating under the FL system with non-independent identically distributed data. Extensive experiment results reveal that: (i) our F2L method can significantly improve the overall FL efficiency in all global distillations, and (ii) F2L rapidly achieves convergence as global distillation stages occur instead of increasing on each communication cycle.", "keywords": "Federated Learning;Knowledge Distillation;non-IID", "primary_area": "", "supplementary_material": "/attachment/9763752658251bffed9d12c8b6d321edb2660a55.zip", "author": "Duong Minh Nguyen;Viet Quoc Pham;Hoang Thai Dinh;Diep Nguyen;Long Tran-Thanh;Won-Joo Hwang", "authorids": "~Duong_Minh_Nguyen1;vietpq90@gmail.com;dinhthaihoang.au@gmail.com;diep.nguyen@uts.edu.au;~Long_Tran-Thanh1;wjhwang@pusan.ac.kr", "gender": "M;;;;;", "homepage": "https://www.github.com/skydvn;;;;https://warwick.ac.uk/fac/sci/dcs/people/long_tran-thanh/;", "dblp": "157/4392;;;;46/8333;", "google_scholar": "3ea0RLkAAAAJ;;;;https://scholar.google.co.uk/citations?user=YBQai3gAAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Duong_Minh_Nguyen1;vietpq90@gmail.com;dinhthaihoang.au@gmail.com;diep.nguyen@uts.edu.au;~Long_Tran-Thanh1;wjhwang@pusan.ac.kr", "aff": "Pusan National University;;;;;", "aff_domain": "pusan.ac.kr;;;;;", "position": "PhD student;;;;;", "bibtex": "@misc{\nnguyen2023towards,\ntitle={Towards scalable and non-{IID} robust Hierarchical Federated Learning via Label-driven Knowledge Aggregator},\nauthor={Duong Minh Nguyen and Viet Quoc Pham and Hoang Thai Dinh and Diep Nguyen and Long Tran-Thanh and Won-Joo Hwang},\nyear={2023},\nurl={https://openreview.net/forum?id=3WYtm7UzsR}\n}", "github": "", "project": "", "reviewers": "jW2m;3LcY;yujU", "site": "https://openreview.net/forum?id=3WYtm7UzsR", "pdf_size": 446147, "recommendation": "3;3;3", "confidence": "4;4;4", "correctness": "3;4;2", "technical_novelty": "2;3;2", "empirical_novelty": "2;3;2", "wc_summary_paper": "58;100;18", "wc_strength_and_weaknesses": "167;434;292", "wc_clarity_quality_novelty_and_reproducibility": "5;83;50", "wc_summary_review": "3;55;65", "wc_review": "233;672;425", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 58.666666666666664, 33.47967874530592 ], "wc_strength_and_weaknesses_avg": [ 297.6666666666667, 109.075916478183 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.0, 31.96873472629156 ], "wc_summary_review_avg": [ 41.0, 27.17842281418601 ], "wc_review_avg": [ 443.3333333333333, 179.68923791430086 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8678509986185504913&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Pusan National University", "aff_unique_dep": "", "aff_unique_url": "https://www.pnu.ac.kr", "aff_unique_abbr": "PNU", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "title": "No Reason for No Supervision: Improved Generalization in Supervised Models", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12145", "id": "3Y5Uhf5KgGK", "poster": "/media/PosterPDFs/ICLR%202023/12145.png?t=1682412941.7485278", "openreview": "https://openreview.net/forum?id=3Y5Uhf5KgGK", "slides": "https://iclr.cc/virtual/2023/poster/12145", "video": "https://iclr.cc/virtual/2023/poster/12145", "author_site": "Mert Bulent Sariyildiz, Yannis Kalantidis, Karteek Alahari, Diane Larlus", "tldr": "", "abstract": "We consider the problem of training a deep neural network on a given classification task, e.g., ImageNet-1K (IN1K), so that it excels at both the training task as well as at other (future) transfer tasks. These two seemingly contradictory properties impose a trade-off between improving the model\u2019s generalization and maintaining its performance on the original task. Models trained with self-supervised learning tend to generalize better than their supervised counterparts for transfer learning; yet, they still lag behind supervised models on IN1K. In this paper, we propose a supervised learning setup that leverages the best of both worlds. We extensively analyze supervised training using multi-scale crops for data augmentation and an expendable projector head, and reveal that the design of the projector allows us to control the trade-off between performance on the training task and transferability. We further replace the last layer of class weights with class prototypes computed on the fly using a memory bank and derive two models: t-ReX that achieves a new state of the art for transfer learning and outperforms top methods such as DINO and PAWS on IN1K, and t-ReX* that matches the highly optimized RSB-A1 model on IN1K while performing better on transfer tasks.\nCode and pretrained models: https://europe.naverlabs.com/t-rex", "keywords": "supervised learning;transfer learning;representation learning", "primary_area": "", "supplementary_material": "", "author": "Mert B\u00fclent Sar\u0131y\u0131ld\u0131z;Yannis Kalantidis;Karteek Alahari;Diane Larlus", "authorids": "~Mert_B\u00fclent_Sar\u0131y\u0131ld\u0131z1;~Yannis_Kalantidis2;~Karteek_Alahari1;~Diane_Larlus1", "gender": "M;M;M;F", "homepage": "https://mbsariyildiz.github.io;https://www.skamalas.com/;http://thoth.inrialpes.fr/people/alahari;https://dlarlus.github.io/", "dblp": "247/9362;33/8693;a/KarteekAlahari;48/4033", "google_scholar": "9vpQ9tIAAAAJ;QJZQgN8AAAAJ;https://scholar.google.fr/citations?user=qcyG7rwAAAAJ;https://scholar.google.fr/citations?user=nI2oJqkAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Mert_B\u00fclent_Sar\u0131y\u0131ld\u0131z1;~Yannis_Kalantidis2;~Karteek_Alahari1;~Diane_Larlus1", "aff": "INRIA;Naver Labs Europe;Inria;NAVER LABS Europe", "aff_domain": "inria.fr;naverlabs.com;inria.fr;naverlabs.com", "position": "PhD student;Research Scientist;Tenured researcher (eq. Asso. prof.);Principal Researcher", "bibtex": "@inproceedings{\nsar{\\i}y{\\i}ld{\\i}z2023no,\ntitle={No Reason for No Supervision: Improved Generalization in Supervised Models},\nauthor={Mert B{\\\"u}lent Sar{\\i}y{\\i}ld{\\i}z and Yannis Kalantidis and Karteek Alahari and Diane Larlus},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3Y5Uhf5KgGK}\n}", "github": "", "project": "", "reviewers": "yEpr;o6eH;p7Yg;63Ba", "pdf_size": 5453756, "recommendation": "5;6;8;8", "confidence": "4;4;4;3", "correctness": "3;4;4;4", "technical_novelty": "1;2;2;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "78;89;62;124", "wc_strength_and_weaknesses": "308;129;147;246", "wc_clarity_quality_novelty_and_reproducibility": "67;41;25;121", "wc_summary_review": "70;30;33;33", "wc_review": "523;289;267;524", "wc_reply_reviewers": "108;26;27;41", "wc_reply_authors": "782;364;280;741", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 88.25, 22.76373211931646 ], "wc_strength_and_weaknesses_avg": [ 207.5, 73.15223851667152 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.5, 36.42457961322272 ], "wc_summary_review_avg": [ 41.5, 16.5 ], "wc_review_avg": [ 400.75, 122.99669711012568 ], "wc_reply_reviewers_avg": [ 50.5, 33.7231374578345 ], "wc_reply_authors_avg": [ 541.75, 222.22103298292896 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5555555555555555, "corr_recommendation_correctness": 0.7777777777777777, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7175920448881401435&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "pdf": "https://openreview.net/pdf?id=3Y5Uhf5KgGK", "email": "inria.fr;naverlabs.com;inria.fr;naverlabs.com", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "INRIA;NAVER LABS", "aff_unique_dep": ";", "aff_unique_url": "https://www.inria.fr;https://labs.naver.com", "aff_unique_abbr": "INRIA;NLE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "France;Unknown" }, { "title": "Unicom: Universal and Compact Representation Learning for Image Retrieval", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11789", "id": "3YFDsSRSxB-", "poster": "/media/PosterPDFs/ICLR%202023/11789.png?t=1681065578.438124", "openreview": "https://openreview.net/forum?id=3YFDsSRSxB-", "slides": "https://iclr.cc/virtual/2023/poster/11789", "video": "https://iclr.cc/virtual/2023/poster/11789", "author_site": "xiang an, Jiankang Deng, Kaicheng Yang, Jaiwei Li, Ziyong Feng, Jia Guo, Jing Yang, Tongliang Liu", "tldr": "", "abstract": "Modern image retrieval methods typically rely on fine-tuning pre-trained encoders to extract image-level descriptors.\nHowever, the most widely used models are pre-trained on ImageNet-1K with limited classes. The pre-trained feature representation is therefore not universal enough to generalize well to the diverse open-world classes. \nIn this paper, we first cluster the large-scale \\laion{} into one million pseudo classes based on the joint textual and visual features extracted by the CLIP model. Due to the confusion of label granularity, the automatically clustered dataset inevitably contains heavy inter-class conflict. To alleviate such conflict, we randomly select partial inter-class prototypes to construct the margin-based softmax loss. To further enhance the low-dimensional feature representation, we randomly select partial feature dimensions when calculating the similarities between embeddings and class-wise prototypes. The dual random partial selections are with respect to the class dimension and the feature dimension of the prototype matrix, making the classification conflict-robust and the feature embedding compact. Our method significantly outperforms state-of-the-art unsupervised and supervised image retrieval approaches on multiple benchmarks. The code and pre-trained models are released to facilitate future research \\url{https://github.com/deepglint/unicom}. ", "keywords": "Cluster Discrimination;Image Retrieval", "primary_area": "", "supplementary_material": "/attachment/d480be33f9c8d6b38427f8997ba48eebaeae38dd.zip", "author": "Xiang An;Jiankang Deng;Kaicheng Yang;Jaiwei Li;Ziyong Feng;Jia Guo;Jing Yang;Tongliang Liu", "authorids": "~Xiang_An1;~Jiankang_Deng1;~Kaicheng_Yang1;~Jaiwei_Li1;~Ziyong_Feng1;~Jia_Guo1;~Jing_Yang7;~Tongliang_Liu1", "gender": "M;M;M;M;M;;F;M", "homepage": ";https://jiankangdeng.github.io/;https://kaicheng-yang0828.github.io/;https://github.com/KirtoXX;https://github.com/fengziyong;https://insightface.ai;https://jingyang2017.github.io/;https://tongliang-liu.github.io/", "dblp": "132/0889;156/7808;118/4505-2.html;;120/4362.html;;62/5839-38.html;150/6667", "google_scholar": "1ckaPgwAAAAJ;Z_UoQFsAAAAJ;AQMkoXIAAAAJ;;;H_-hMLUAAAAJ;https://scholar.google.co.uk/citations?user=a0HJYXcAAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ", "orcid": "0009-0008-4652-8296;0000-0002-3709-6216;0009-0008-6073-9014;;0009-0007-8689-8366;;0000-0002-8794-4842;", "linkedin": ";jiankang-deng-b45b21b4/?originalSubdomain=uk;;;;;;", "or_profile": "~Xiang_An1;~Jiankang_Deng1;~Kaicheng_Yang1;~Jaiwei_Li1;~Ziyong_Feng1;~Jia_Guo1;~Jing_Yang7;~Tongliang_Liu1", "aff": "deepglint;;DeepGlint;;DeepGlint;InsightFace.AI;University of Cambridge;University of Sydney", "aff_domain": "deepglint.com;;deepglint.com;;deepglint.com;insightface.ai;cam.ac.uk;sydney.edu.au", "position": "Researcher;;Researcher;;PhD student;Researcher;Postdoc;Lecturer", "bibtex": "@inproceedings{\nan2023unicom,\ntitle={Unicom: Universal and Compact Representation Learning for Image Retrieval},\nauthor={Xiang An and Jiankang Deng and Kaicheng Yang and Jaiwei Li and Ziyong Feng and Jia Guo and Jing Yang and Tongliang Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3YFDsSRSxB-}\n}", "github": "", "project": "", "reviewers": "FUwi;1Pq2;htKd;HyPg", "pdf_size": 3241722, "recommendation": "5;5;6;8", "confidence": "4;4;4;4", "correctness": "4;3;4;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "71;27;88;70", "wc_strength_and_weaknesses": "54;20;209;462", "wc_clarity_quality_novelty_and_reproducibility": "54;54;55;107", "wc_summary_review": "40;58;54;42", "wc_review": "219;159;406;681", "wc_reply_reviewers": "0;0;0;60", "wc_reply_authors": "525;656;811;1222", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 64.0, 22.52776065213762 ], "wc_strength_and_weaknesses_avg": [ 186.25, 174.41670648191933 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 67.5, 22.808989455914087 ], "wc_summary_review_avg": [ 48.5, 7.664854858377946 ], "wc_review_avg": [ 366.25, 203.27490622307516 ], "wc_reply_reviewers_avg": [ 15.0, 25.98076211353316 ], "wc_reply_authors_avg": [ 803.5, 261.971849632742 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.40824829046386296, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=975883267321958668&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=3YFDsSRSxB-", "email": "deepglint.com;;deepglint.com;;deepglint.com;insightface.ai;cam.ac.uk;sydney.edu.au", "author_num": 8, "aff_unique_index": "0;0;0;1;2;3", "aff_unique_norm": "DeepGlint;InsightFace.AI;University of Cambridge;University of Sydney", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.deepglint.com;https://www.insightface.ai;https://www.cam.ac.uk;https://www.sydney.edu.au", "aff_unique_abbr": "DeepGlint;InsightFace.AI;Cambridge;USYD", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0;1;2", "aff_country_unique": "China;United Kingdom;Australia" }, { "title": "FoSR: First-order spectral rewiring for addressing oversquashing in GNNs", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11758", "id": "3YjQfCLdrzz", "poster": "/media/PosterPDFs/ICLR%202023/11758.png?t=1685908929.5937953", "openreview": "https://openreview.net/forum?id=3YjQfCLdrzz", "slides": "https://iclr.cc/virtual/2023/poster/11758", "video": "https://iclr.cc/virtual/2023/poster/11758", "author_site": "Kedar Karhadkar, Pradeep Banerjee, Guido Montufar", "tldr": "We propose a graph rewiring algorithm that prevents oversquashing in GNNs via spectral expansion while retaining the original graph via a relational structure that prevents oversmoothing.", "abstract": "Graph neural networks (GNNs) are able to leverage the structure of graph data by passing messages along the edges of the graph. While this allows GNNs to learn features depending on the graph structure, for certain graph topologies it leads to inefficient information propagation and a problem known as oversquashing. This has recently been linked with the curvature and spectral gap of the graph. On the other hand, adding edges to the message-passing graph can lead to increasingly similar node representations and a problem known as oversmoothing. We propose a computationally efficient algorithm that prevents oversquashing by systematically adding edges to the graph based on spectral expansion. We combine this with a relational architecture, which lets the GNN preserve the original graph structure and provably prevents oversmoothing. We find experimentally that our algorithm outperforms existing graph rewiring methods in several graph classification tasks.", "keywords": "oversquashing;oversmoothing;graph rewiring;graph neural networks;GNN;relational GNN;spectral expansion", "primary_area": "", "supplementary_material": "", "author": "Kedar Karhadkar;Pradeep Kr. Banerjee;Guido Montufar", "authorids": "~Kedar_Karhadkar1;~Pradeep_Kr._Banerjee2;~Guido_Montufar1", "gender": "M;M;M", "homepage": "https://www.math.ucla.edu/~kedar/;https://e5150pro.github.io/;http://www.math.ucla.edu/~montufar/", "dblp": "278/8407;;", "google_scholar": "WVdm3mQAAAAJ;https://scholar.google.de/citations?user=cnSjMBwAAAAJ;https://scholar.google.de/citations?user=pDIuuVwAAAAJ", "orcid": ";;0000-0002-0131-2669", "linkedin": ";;", "or_profile": "~Kedar_Karhadkar1;~Pradeep_Kr._Banerjee2;~Guido_Montufar1", "aff": "Max Planck Institute for Mathematics in the Sciences, Max-Planck Institute;Max Planck Institute for Mathematics in the Sciences, Max-Planck Institute;UCLA ", "aff_domain": "mis.mpg.de;mis.mpg.de;math.ucla.edu", "position": "Intern;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nkarhadkar2023fosr,\ntitle={Fo{SR}: First-order spectral rewiring for addressing oversquashing in {GNN}s},\nauthor={Kedar Karhadkar and Pradeep Kr. Banerjee and Guido Montufar},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3YjQfCLdrzz}\n}", "github": "", "project": "", "reviewers": "T9j4;2pg5;j5dr;42Gr", "pdf_size": 1690473, "recommendation": "6;8;8;8", "confidence": "4;3;3;4", "correctness": "3;4;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "71;149;77;160", "wc_strength_and_weaknesses": "695;117;338;729", "wc_clarity_quality_novelty_and_reproducibility": "75;697;55;100", "wc_summary_review": "152;94;34;91", "wc_review": "993;1057;504;1080", "wc_reply_reviewers": "336;143;45;143", "wc_reply_authors": "1713;652;1108;1342", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;2;2", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 114.25, 40.49305496007926 ], "wc_strength_and_weaknesses_avg": [ 469.75, 254.82285513666156 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 231.75, 269.0849076035295 ], "wc_summary_review_avg": [ 92.75, 41.73352968537409 ], "wc_review_avg": [ 908.5, 235.70373353003978 ], "wc_reply_reviewers_avg": [ 166.75, 105.58971304061774 ], "wc_reply_authors_avg": [ 1203.75, 384.72222121941434 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 89, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17400176532005303489&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=3YjQfCLdrzz", "email": "mis.mpg.de;mis.mpg.de;math.ucla.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Max Planck Institute for Mathematics in the Sciences;University of California, Los Angeles", "aff_unique_dep": "Mathematics in the Sciences;", "aff_unique_url": "https://www.mis.mpg.de;https://www.ucla.edu", "aff_unique_abbr": "MPI MIS;UCLA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Germany;United States" }, { "id": "3Z-xKxKc-R", "title": "An Evolutionary Approach to Dynamic Introduction of Tasks in Large-scale Multitask Learning Systems", "track": "main", "status": "Reject", "tldr": "", "abstract": "Multitask learning assumes that models capable of learning from multiple tasks can achieve better quality and efficiency via knowledge transfer, a key feature of human learning. Though, state of the art ML models rely on high customization for each task and leverage size and data scale rather than scaling the number of tasks. Also, continual learning, that adds the temporal aspect to multitask, is often focused to the study of common pitfalls such as catastrophic forgetting instead of being studied at a large scale as a critical component to build the next generation artificial intelligence.\nWe propose an evolutionary method capable of generating large scale multitask models that support the dynamic addition of new tasks. The generated multitask models are sparsely activated and integrates a task-based routing that guarantees bounded compute cost and fewer added parameters per task as the model expands. The proposed method relies on a knowledge compartmentalization technique to achieve immunity against catastrophic forgetting and other common pitfalls such as gradient interference and negative transfer. We demonstrate empirically that the proposed method can jointly solve and achieve competitive results on 69 public image classification tasks, for example improving the state of the art on a competitive benchmark such as cifar10 by achieving a 15% relative error reduction compared to the best model trained on public data.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/5c290e7f5ef64182cbc0395e23ecc9c30d08c7c7.zip", "author": "Andrea Gesmundo;Jeff Dean", "authorids": "~Andrea_Gesmundo1;~Jeff_Dean1", "gender": ";M", "homepage": ";https://research.google/people/jeff/", "dblp": ";d/JeffreyDean", "google_scholar": "TrbdFnYAAAAJ;NMS69lQAAAAJ", "orcid": ";", "linkedin": "andrea-gesmundo-a703b96b/;", "or_profile": "~Andrea_Gesmundo1;~Jeff_Dean1", "aff": "Google;Google", "aff_domain": "google.com;google.com", "position": "SWE in Research;Researcher", "bibtex": "@misc{\ngesmundo2023an,\ntitle={An Evolutionary Approach to Dynamic Introduction of Tasks in Large-scale Multitask Learning Systems},\nauthor={Andrea Gesmundo and Jeff Dean},\nyear={2023},\nurl={https://openreview.net/forum?id=3Z-xKxKc-R}\n}", "github": "", "project": "", "reviewers": "uLXC;W895;uZHy;3FfT", "site": "https://openreview.net/forum?id=3Z-xKxKc-R", "pdf_size": 1212140, "recommendation": "1;5;6;6", "confidence": "3;4;4;4", "correctness": "2;2;3;4", "technical_novelty": "1;2;2;4", "empirical_novelty": "1;2;3;4", "wc_summary_paper": "19;42;69;62", "wc_strength_and_weaknesses": "137;149;635;431", "wc_clarity_quality_novelty_and_reproducibility": "17;48;21;26", "wc_summary_review": "26;10;24;7", "wc_review": "199;249;749;526", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 2.0615528128088303 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 1.0897247358851685 ], "empirical_novelty_avg": [ 2.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 48.0, 19.45507645834372 ], "wc_strength_and_weaknesses_avg": [ 338.0, 207.95432190748045 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.0, 11.979148550710939 ], "wc_summary_review_avg": [ 16.75, 8.347903928532 ], "wc_review_avg": [ 430.75, 221.97789867462032 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.9801960588196067, "corr_recommendation_correctness": 0.6581451817144176, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13720678451144289229&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "3ZGJVocZ2XQ", "title": "Building compact representations for image-language learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We propose a method to learn compact vision and language representations, which adaptively and iteratively fuses the multi-modal features. It greatly lowers the FLOPs of the model by effectively combining and reducing the number of tokens used for both text and images. This allows the model to scale without a large increase in FLOPs or memory and leads to a data efficient training. In addition, we propose adaptive pre-training data sampling which further improves the data efficiency. We achieve competitive performance compared to much larger models, and do so with significantly less data and FLOPs. With only 40M training examples and with 39 GFLOPs our model of 350M parameters outperforms all methods that have used less than 1B examples for pre-training. Code will be released.\n", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "AJ Piergiovanni;Anelia Angelova", "authorids": "~AJ_Piergiovanni1;~Anelia_Angelova1", "gender": ";", "homepage": "http://homes.sice.indiana.edu/ajpiergi/;https://research.google/people/aneliaangelova/", "dblp": "175/9876;46/3065", "google_scholar": "https://scholar.google.com/citations?hl=en;nkmDOPgAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~AJ_Piergiovanni1;~Anelia_Angelova1", "aff": "Google;California Institute of Technology", "aff_domain": "google.com;caltech.edu", "position": "Research Scientist;PhD student", "bibtex": "@misc{\npiergiovanni2023building,\ntitle={Building compact representations for image-language learning},\nauthor={AJ Piergiovanni and Anelia Angelova},\nyear={2023},\nurl={https://openreview.net/forum?id=3ZGJVocZ2XQ}\n}", "github": "", "project": "", "reviewers": "agT4;6t2a;TE2Z;jk3m", "site": "https://openreview.net/forum?id=3ZGJVocZ2XQ", "pdf_size": 348182, "recommendation": "3;3;5;8", "confidence": "3;4;4;3", "correctness": "2;3;4;3", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "63;57;125;78", "wc_strength_and_weaknesses": "73;240;211;299", "wc_clarity_quality_novelty_and_reproducibility": "9;38;294;38", "wc_summary_review": "16;29;60;67", "wc_review": "161;364;690;482", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "311;221;163;198", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 80.75, 26.668098919870534 ], "wc_strength_and_weaknesses_avg": [ 205.75, 82.94388163089548 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 94.75, 115.64466049065992 ], "wc_summary_review_avg": [ 43.0, 21.15419580130618 ], "wc_review_avg": [ 424.25, 191.63034076053822 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 223.25, 54.71003107292117 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.3665083330689157, "corr_recommendation_correctness": 0.34554737023254406, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:F9VIV2pf84oJ:scholar.google.com/&scioq=Building+compact+representations+for+image-language+learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Google;California Institute of Technology", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.caltech.edu", "aff_unique_abbr": "Google;Caltech", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Mountain View;Pasadena", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "3ZHX6_Mydd7", "title": "Invariant Aggregator for Defending against Federated Backdoor Attacks", "track": "main", "status": "Reject", "tldr": "This paper shows how to defend against federated backdoor attacks by focusing on the invariant directions in the model optimization trajectory. ", "abstract": "Federated learning is gaining popularity as it enables training of high-utility models across several clients without directly sharing their private data. As a downside, the federated setting makes the model vulnerable to various adversarial attacks in the presence of malicious clients. Specifically, an adversary can perform backdoor attacks to control model predictions via poisoning the training dataset with a trigger. In this work, we propose a mitigation for backdoor attacks in a federated learning setup. Our solution forces the model optimization trajectory to focus on the invariant directions that are generally useful for utility and avoid selecting directions that favor few and possibly malicious clients. Concretely, we consider the sign consistency of the pseudo-gradient (the client update) as an estimation of the invariance. Following this, our approach performs dimension-wise filtering to remove pseudo-gradient elements with low sign consistency. Then, a robust mean estimator eliminates outliers among the remaining dimensions. Our theoretical analysis further shows the necessity of the defense combination and illustrates how our proposed solution defends the federated learning model. Empirical results on three datasets with different modalities and varying number of clients show that our approach mitigates backdoor attacks with a negligible cost on the model utility.\n", "keywords": "Federated learning;robustness;backdoor attack;invariant learning", "primary_area": "", "supplementary_material": "/attachment/071f5c62cbc3449dd0769bdb151cf006ec442413.zip", "author": "Xiaoyang Wang;Dimitrios Dimitriadis;Oluwasanmi O Koyejo;Shruti Tople", "authorids": "~Xiaoyang_Wang6;~Dimitrios_Dimitriadis1;~Oluwasanmi_O_Koyejo1;~Shruti_Tople2", "gender": "M;;M;", "homepage": "https://xiaoyang-wang.github.io/;;https://cs.stanford.edu/~sanmi/;", "dblp": ";05/3143;14/8885;", "google_scholar": ";AQSvco0AAAAJ;EaaOeJwAAAAJ;", "orcid": ";0000-0001-8483-0105;0000-0002-4023-419X;", "linkedin": ";https://www.linkedin.com/dimitrios.dimitriadis;sanmi-koyejo-984754/;", "or_profile": "~Xiaoyang_Wang6;~Dimitrios_Dimitriadis1;~Oluwasanmi_O_Koyejo1;~Shruti_Tople2", "aff": "FedML;Amazon;Google;", "aff_domain": "fedml.ai;amazon.com;google.com;", "position": "Intern;Principal Applied Scientist;Research Scientist;", "bibtex": "@misc{\nwang2023invariant,\ntitle={Invariant Aggregator for Defending against Federated Backdoor Attacks},\nauthor={Xiaoyang Wang and Dimitrios Dimitriadis and Oluwasanmi O Koyejo and Shruti Tople},\nyear={2023},\nurl={https://openreview.net/forum?id=3ZHX6_Mydd7}\n}", "github": "", "project": "", "reviewers": "f8s8;uH18;9BAU;Pmty", "site": "https://openreview.net/forum?id=3ZHX6_Mydd7", "pdf_size": 307097, "recommendation": "3;3;5;5", "confidence": "4;4;5;4", "correctness": "2;3;3;2", "technical_novelty": "2;1;3;2", "empirical_novelty": "2;1;2;2", "wc_summary_paper": "101;31;89;27", "wc_strength_and_weaknesses": "470;92;35;244", "wc_clarity_quality_novelty_and_reproducibility": "67;6;33;40", "wc_summary_review": "13;10;171;41", "wc_review": "651;139;328;352", "wc_reply_reviewers": "0;0;0;41", "wc_reply_authors": "1233;597;1056;984", "reply_reviewers": "0;0;0;1", "reply_authors": "2;1;2;2", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 62.0, 33.301651610693426 ], "wc_strength_and_weaknesses_avg": [ 210.25, 168.3038546795646 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.5, 21.70829334609241 ], "wc_summary_review_avg": [ 58.75, 65.9256209678756 ], "wc_review_avg": [ 367.5, 183.2927985492065 ], "wc_reply_reviewers_avg": [ 10.25, 17.75352077758099 ], "wc_reply_authors_avg": [ 967.5, 232.30637098452553 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16256624720413343166&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "FedML;Amazon;Google", "aff_unique_dep": ";Amazon.com, Inc.;Google", "aff_unique_url": "https://www.fedml.ai;https://www.amazon.com;https://www.google.com", "aff_unique_abbr": "FedML;Amazon;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Approximate Vanishing Ideal Computations at Scale", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11979", "id": "3ZPESALKXO", "poster": "/media/PosterPDFs/ICLR%202023/11979.png?t=1680783022.825687", "openreview": "https://openreview.net/forum?id=3ZPESALKXO", "slides": "https://iclr.cc/virtual/2023/poster/11979", "video": "https://iclr.cc/virtual/2023/poster/11979", "author_site": "Elias Wirth, Hiroshi Kera, Sebastian Pokutta", "tldr": "We study approximate vanishing ideal algorithms at scale.", "abstract": "The vanishing ideal of a set of points $X = \\{\\mathbf{x}_1, \\ldots, \\mathbf{x}_m\\}\\subseteq \\mathbb{R}^n$ is the set of polynomials that evaluate to $0$ over all points $\\mathbf{x} \\in X$ and admits an efficient representation by a finite subset of generators. In practice, to accommodate noise in the data, algorithms that construct generators of the approximate vanishing ideal are widely studied but their computational complexities remain expensive. In this paper, we scale up the oracle approximate vanishing ideal algorithm (OAVI), the only generator-constructing algorithm with known learning guarantees. We prove that the computational complexity of OAVI is not superlinear, as previously claimed, but linear in the number of samples $m$. In addition, we propose two modifications that accelerate OAVI's training time: Our analysis reveals that replacing the pairwise conditional gradients algorithm, one of the solvers used in OAVI, with the faster blended pairwise conditional gradients algorithm leads to an exponential speed-up in the number of features $n$. Finally, using a new inverse Hessian boosting approach, intermediate convex optimization problems can be solved almost instantly, improving OAVI's training time by multiple orders of magnitude in a variety of numerical experiments.", "keywords": "approximate vanishing ideal;convex optimization;conditional gradients algorithms;Hessian matrix", "primary_area": "", "supplementary_material": "", "author": "Elias Samuel Wirth;Hiroshi Kera;Sebastian Pokutta", "authorids": "~Elias_Samuel_Wirth1;kera.hiroshi@gmail.com;~Sebastian_Pokutta1", "gender": "M;;M", "homepage": "https://elwirth.github.io/;;http://www.pokutta.com", "dblp": "293/8082.html;;75/7718", "google_scholar": "Sz2JU6oAAAAJ;;", "orcid": "0009-0008-8957-8736;;", "linkedin": ";;", "or_profile": "~Elias_Samuel_Wirth1;kera.hiroshi@gmail.com;~Sebastian_Pokutta1", "aff": "TU Berlin;;TU Berlin", "aff_domain": "tu-berlin.de;;tu-berlin.de", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nwirth2023approximate,\ntitle={Approximate Vanishing Ideal Computations at Scale},\nauthor={Elias Samuel Wirth and Hiroshi Kera and Sebastian Pokutta},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3ZPESALKXO}\n}", "github": "", "project": "", "reviewers": "qggK;thxT;298L", "pdf_size": 2322056, "recommendation": "6;8;8", "confidence": "3;3;3", "correctness": "4;4;4", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;0", "wc_summary_paper": "60;47;93", "wc_strength_and_weaknesses": "75;327;184", "wc_clarity_quality_novelty_and_reproducibility": "25;48;68", "wc_summary_review": "140;55;144", "wc_review": "300;477;489", "wc_reply_reviewers": "0;160;0", "wc_reply_authors": "322;559;872", "reply_reviewers": "0;2;0", "reply_authors": "1;2;2", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 66.66666666666667, 19.362047641943477 ], "wc_strength_and_weaknesses_avg": [ 195.33333333333334, 103.19022348179222 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.0, 17.568911937472585 ], "wc_summary_review_avg": [ 113.0, 41.04469108991645 ], "wc_review_avg": [ 422.0, 86.4060183089118 ], "wc_reply_reviewers_avg": [ 53.333333333333336, 75.42472332656506 ], "wc_reply_authors_avg": [ 584.3333333333334, 225.24998458502847 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15820688190889032810&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=3ZPESALKXO", "email": "tu-berlin.de;;tu-berlin.de", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Technische Universit\u00e4t Berlin", "aff_unique_dep": "", "aff_unique_url": "https://www.tu-berlin.de", "aff_unique_abbr": "TU Berlin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berlin", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "3_NvTLGjDKy", "title": "Unified neural representation model for physical and conceptual spaces", "track": "main", "status": "Reject", "tldr": "A single model explains how grid-like and concept-specific representations emerge and function in the entorhinal cortex.", "abstract": "The spatial processing system of the brain uses grid-like neural representations (grid cells) for supporting vector-based navigation. Experiments also suggest that neural representations for concepts (concept cells) exist in the human brain, and conceptual inference relies on navigation in conceptual spaces. We propose a unified model called ``disentangled successor information (DSI)'' that explains neural representations for both physical and conceptual spaces. DSI generates grid-like representations in a 2-dimensional space that highly resemble those observed in the brain. Moreover, the same model creates concept-specific representations from linguistic inputs, corresponding to concept cells. Mathematically, DSI vectors approximate value functions for navigation and word vectors obtained by word embedding methods, thus enabling both spatial navigation and conceptual inference based on vector-based calculation. Our results suggest that a single principle can explain computation of physical and conceptual spaces in the human brain.", "keywords": "Neuroscience;Grid cell;Concept cell;Spatial navigation;Reinforcement learning;Word embedding", "primary_area": "", "supplementary_material": "", "author": "Tatsuya Haga;Yohei Oseki;Tomoki Fukai", "authorids": "~Tatsuya_Haga1;~Yohei_Oseki1;~Tomoki_Fukai1", "gender": "M;M;M", "homepage": "https://sites.google.com/view/tatsuyahaga-compneuro;https://researchmap.jp/oseki/?lang=english;https://groups.oist.jp/ncbc", "dblp": ";249/6882;", "google_scholar": "https://scholar.google.co.jp/citations?user=LU-LnVAAAAAJ;GshpLs8AAAAJ;https://scholar.google.co.jp/citations?user=iO7jHc4AAAAJ", "orcid": "0000-0003-3145-709X;0000-0002-1189-1588;my-orcid?orcid=0000-0001-6977-5638", "linkedin": ";;", "or_profile": "~Tatsuya_Haga1;~Yohei_Oseki1;~Tomoki_Fukai1", "aff": "Okinawa Institute of Science and Technology (OIST);University of Tokyo;Okinawa Institute of Science and Technology (OIST)", "aff_domain": "oist.jp;g.ecc.u-tokyo.ac.jp;oist.jp", "position": "Postdoc;Assistant Professor;Full Professor", "bibtex": "@misc{\nhaga2023unified,\ntitle={Unified neural representation model for physical and conceptual spaces},\nauthor={Tatsuya Haga and Yohei Oseki and Tomoki Fukai},\nyear={2023},\nurl={https://openreview.net/forum?id=3_NvTLGjDKy}\n}", "github": "", "project": "", "reviewers": "33UN;Eei5;K3nY;pkWT", "site": "https://openreview.net/forum?id=3_NvTLGjDKy", "pdf_size": 3099635, "recommendation": "3;3;6;8", "confidence": "3;4;4;4", "correctness": "2;2;3;4", "technical_novelty": "3;3;3;4", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "207;50;51;152", "wc_strength_and_weaknesses": "734;324;171;231", "wc_clarity_quality_novelty_and_reproducibility": "32;9;78;21", "wc_summary_review": "55;126;14;40", "wc_review": "1028;509;314;444", "wc_reply_reviewers": "0;0;0;22", "wc_reply_authors": "1967;1131;619;423", "reply_reviewers": "0;0;0;1", "reply_authors": "3;2;1;2", "recommendation_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 115.0, 67.36839021380874 ], "wc_strength_and_weaknesses_avg": [ 365.0, 219.90566159151064 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.0, 26.124700955226263 ], "wc_summary_review_avg": [ 58.75, 41.5052707496289 ], "wc_review_avg": [ 573.75, 271.4962016308884 ], "wc_reply_reviewers_avg": [ 5.5, 9.526279441628825 ], "wc_reply_authors_avg": [ 1035.0, 596.9589600634201 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5443310539518174, "corr_recommendation_correctness": 0.994936676326182, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FI5r_ruEFIUJ:scholar.google.com/&scioq=Unified+neural+representation+model+for+physical+and+conceptual+spaces&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Okinawa Institute of Science and Technology;University of Tokyo", "aff_unique_dep": ";", "aff_unique_url": "https://www.oist.jp;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "OIST;UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "title": "Test-Time Robust Personalization for Federated Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12065", "id": "3aBuJEza5sq", "poster": "", "openreview": "https://openreview.net/forum?id=3aBuJEza5sq", "slides": "https://iclr.cc/virtual/2023/poster/12065", "video": "https://iclr.cc/virtual/2023/poster/12065", "author_site": "Liangze Jiang, Tao Lin", "tldr": "We identify the pitfalls of existing personalized federated learning methods during deployment and propose a novel test-time personalization solution.", "abstract": "Federated Learning (FL) is a machine learning paradigm where many clients collaboratively learn a shared global model with decentralized training data. Personalization on FL models additionally adapts the global model to different clients, achieving promising results on consistent local training & test distributions. However, for real-world personalized FL applications, it is crucial to go one step further: robustifying FL models under the evolving local test set during deployment, where various types of distribution shifts can arise. In this work, we identify the pitfalls of existing works under test-time distribution shifts and propose Federated Test-time Head Ensemble plus tuning (FedTHE+), which personalizes FL models with robustness to various test-time distribution shifts. We illustrate the advancement of FedTHE+ (and its degraded computationally efficient variant FedTHE) over strong competitors, for training various neural architectures (CNN, ResNet, and Transformer) on CIFAR10 and ImageNet and evaluating on diverse test distributions. Along with this, we build a benchmark for assessing the performance and robustness of personalized FL methods during deployment. Code: \\url{https://github.com/LINs-lab/FedTHE}.\n", "keywords": "Federated Learning;Personalized Federated Learning;Test-time Robustness", "primary_area": "", "supplementary_material": "/attachment/27dbdf24d46e452631c48c49cb76ee790c7580a5.zip", "author": "Liangze Jiang;Tao Lin", "authorids": "~Liangze_Jiang1;~Tao_Lin1", "gender": "M;M", "homepage": "https://liangzejiang.github.io/;https://lins-lab.github.io/", "dblp": ";64/4492-4.html", "google_scholar": "zU76rDEAAAAJ;QE9pa_cAAAAJ", "orcid": ";0000-0002-3246-6935", "linkedin": "liangze-jiang-3b60571bb/;", "or_profile": "~Liangze_Jiang1;~Tao_Lin1", "aff": "Swiss Federal Institute of Technology Lausanne;Westlake University", "aff_domain": "epfl.ch;westlake.edu", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\njiang2023testtime,\ntitle={Test-Time Robust Personalization for Federated Learning},\nauthor={Liangze Jiang and Tao Lin},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3aBuJEza5sq}\n}", "github": "", "project": "", "reviewers": "wWXU;bhMe;129X;JEHY", "pdf_size": 3882268, "recommendation": "5;6;8;8", "confidence": "3;5;4;4", "correctness": "3;3;3;3", "technical_novelty": "3;3;3;4", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "149;63;145;50", "wc_strength_and_weaknesses": "136;144;50;40", "wc_clarity_quality_novelty_and_reproducibility": "37;2;274;147", "wc_summary_review": "20;2;64;12", "wc_review": "342;211;533;249", "wc_reply_reviewers": "104;0;0;188", "wc_reply_authors": "1260;1049;289;2132", "reply_reviewers": "1;0;0;2", "reply_authors": "4;3;1;5", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 101.75, 45.50480743833557 ], "wc_strength_and_weaknesses_avg": [ 92.5, 47.71530152896448 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 115.0, 106.25205880358271 ], "wc_summary_review_avg": [ 24.5, 23.680160472429236 ], "wc_review_avg": [ 333.75, 124.51782000982831 ], "wc_reply_reviewers_avg": [ 73.0, 78.8098978555359 ], "wc_reply_authors_avg": [ 1182.5, 656.4527781950504 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 1.479019945774904 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.2721655269759087, "corr_recommendation_correctness": 0.0, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8267318836942039958&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=3aBuJEza5sq", "email": "epfl.ch;westlake.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;Westlake University", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.westlake.edu.cn", "aff_unique_abbr": "EPFL;WU", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;1", "aff_country_unique": "Switzerland;China" }, { "title": "How Much Data Are Augmentations Worth? An Investigation into Scaling Laws, Invariance, and Implicit Regularization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10891", "id": "3aQs3MCSexD", "poster": "/media/PosterPDFs/ICLR%202023/10891.png?t=1682446335.8456194", "openreview": "https://openreview.net/forum?id=3aQs3MCSexD", "slides": "https://iclr.cc/virtual/2023/poster/10891", "video": "https://iclr.cc/virtual/2023/poster/10891", "author_site": "Jonas Geiping, Micah Goldblum, Gowthami Somepalli, Ravid Shwartz-Ziv, Tom Goldstein, Andrew Wilson", "tldr": "We uncover mechanisms by which data augmentations regularize training and inform the relationship between augmentations and extra data, invariance, stochasticity, and flatness.", "abstract": "Despite the clear performance benefits of data augmentations, little is known about why they are so effective. In this paper, we disentangle several key mechanisms through which data augmentations operate. Establishing an exchange rate between augmented and additional real data, we find that in out-of-distribution testing scenarios, augmentations which yield samples that are diverse, but inconsistent with the data distribution can be even more valuable than additional training data. Moreover, we find that data augmentations which encourage invariances can be more valuable than invariance alone, especially on small and medium sized training sets. Following this observation, we show that augmentations induce additional stochasticity during training, effectively flattening the loss landscape.", "keywords": "Data Augmentations;Stochasticity;Flatness;Neural Networks;Invariance", "primary_area": "", "supplementary_material": "/attachment/fd23b7cf09bdcc462e04ecaf2bed3ef13873ea2b.zip", "author": "Jonas Geiping;Micah Goldblum;Gowthami Somepalli;Ravid Shwartz-Ziv;Tom Goldstein;Andrew Gordon Wilson", "authorids": "~Jonas_Geiping1;~Micah_Goldblum1;~Gowthami_Somepalli1;~Ravid_Shwartz-Ziv2;~Tom_Goldstein1;~Andrew_Gordon_Wilson1", "gender": "M;;F;M;Not Specified;M", "homepage": "https://jonasgeiping.github.io/;;https://somepago.github.io/;https://www.cs.umd.edu/~tomg/;https://cims.nyu.edu/~andrewgw;https://www.ravid-shwartz-ziv.com/", "dblp": "190/7229;241/7231;286/5012;25/8184;65/10453;", "google_scholar": "https://scholar.google.de/citations?user=206vNCEAAAAJ;pGDKzuUAAAAJ;T2ezBDsAAAAJ;KmSuVtgAAAAJ;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ;https://scholar.google.co.il/citations?user=SqsLFwMAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Jonas_Geiping1;~Micah_Goldblum1;~Gowthami_Somepalli1;~Tom_Goldstein1;~Andrew_Gordon_Wilson1;~ravid_ziv1", "aff": "University of Maryland, College Park;New York University;University of Maryland, College Park;University of Maryland, College Park;New York University;New York University", "aff_domain": "umd.edu;nyu.edu;umd.edu;umd.edu;nyu.edu;nyu.edu", "position": "Postdoc;Postdoc;PhD student;Full Professor;Associate Professor;Postdoc", "bibtex": "@inproceedings{\ngeiping2023how,\ntitle={How Much Data Are Augmentations Worth? An Investigation into Scaling Laws, Invariance, and Implicit Regularization},\nauthor={Jonas Geiping and Micah Goldblum and Gowthami Somepalli and Ravid Shwartz-Ziv and Tom Goldstein and Andrew Gordon Wilson},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3aQs3MCSexD}\n}", "github": "", "project": "", "reviewers": "iXJA;G4Yp;WExn;VSLP", "pdf_size": 1373891, "recommendation": "5;8;8;8", "confidence": "3;4;3;4", "correctness": "3;3;4;4", "technical_novelty": "2;3;2;2", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "63;108;57;116", "wc_strength_and_weaknesses": "215;1157;171;280", "wc_clarity_quality_novelty_and_reproducibility": "78;67;64;75", "wc_summary_review": "106;62;32;88", "wc_review": "462;1394;324;559", "wc_reply_reviewers": "0;469;0;0", "wc_reply_authors": "960;2309;135;329", "reply_reviewers": "0;2;0;0", "reply_authors": "2;4;1;1", "recommendation_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 86.0, 26.239283526803852 ], "wc_strength_and_weaknesses_avg": [ 455.75, 406.7194211000994 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 71.0, 5.70087712549569 ], "wc_summary_review_avg": [ 72.0, 27.892651361962706 ], "wc_review_avg": [ 684.75, 417.9134928427174 ], "wc_reply_reviewers_avg": [ 117.25, 203.08295718745086 ], "wc_reply_authors_avg": [ 933.25, 850.8414584985854 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18372057636101327455&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=3aQs3MCSexD", "email": "umd.edu;nyu.edu;umd.edu;umd.edu;nyu.edu;nyu.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;1;1", "aff_unique_norm": "University of Maryland;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www/umd.edu;https://www.nyu.edu", "aff_unique_abbr": "UMD;NYU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Behavior Proximal Policy Optimization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10714", "id": "3c13LptpIph", "poster": "/media/PosterPDFs/ICLR%202023/10714.png?t=1682604742.3698013", "openreview": "https://openreview.net/forum?id=3c13LptpIph", "slides": "https://iclr.cc/virtual/2023/poster/10714", "video": "https://iclr.cc/virtual/2023/poster/10714", "author_site": "Zifeng Zhuang, Kun LEI, Jinxin Liu, Donglin Wang, Yilang Guo", "tldr": " We propose Behavior Proximal Policy Optimization (BPPO), which bases on on-policy method (PPO) and effectively solves offline RL without any extra constraint or regularization introduced. ", "abstract": "Offline reinforcement learning (RL) is a challenging setting where existing off-policy actor-critic methods perform poorly due to overestimating of out-of-distribution state-action pairs. Thus, various additional augmentations are proposed to keep the learned policy close to the offline dataset (or the behavior policy). In this work, starting from the analysis of offline monotonic policy improvement, we reach a surprising conclusion that online on-policy algorithms are naturally able to solve offline RL. Specifically, the inherent conservatism of these on-policy algorithms is exactly what the offline RL method needs to overcome the overestimation. Based on this, we propose Behavior Proximal Policy Optimization (BPPO), which solves offline RL without any extra constraint or regularization introduced compared to PPO. Extensive experiments on the D4RL benchmark empirically show this extremely succinct method outperforms state-of-the-art offline RL algorithms. Our implementation is available at https://github.com/Dragon-Zhuang/BPPO.", "keywords": "Offline Reinforcement Learning;Monotonic Policy Improvement", "primary_area": "", "supplementary_material": "", "author": "Zifeng Zhuang;Kun LEI;Jinxin Liu;Donglin Wang;Yilang Guo", "authorids": "~Zifeng_Zhuang1;~Kun_LEI1;~Jinxin_Liu1;~Donglin_Wang1;~Yilang_Guo1", "gender": "M;M;;M;M", "homepage": ";https://lei-kun.github.io/;;https://milab.westlake.edu.cn/;https://scholar.google.com.hk/citations?user=r9vbaj4AAAA&user=r9vbaj4AAAAJ", "dblp": "276/5034;292/4342;;;340/8399", "google_scholar": ";https://scholar.google.com.hk/citations?user=GfUvUacAAAAJ;;https://scholar.google.ca/citations?user=-fo6wdwAAAAJ;https://scholar.google.com.hk/citations?user=r9vbaj4AAAA", "orcid": ";;;0000-0002-8188-3735;", "linkedin": ";;;;", "or_profile": "~Zifeng_Zhuang1;~Kun_LEI1;~Jinxin_Liu1;~Donglin_Wang1;~Yilang_Guo1", "aff": "Zhejiang University;;;Westlake University;Beijing Jiaotong University", "aff_domain": "zju.edu.cn;;;westlake.edu.cn;bjtu.edu.cn", "position": "PhD student;;;Associate Professor;Undergrad student", "bibtex": "@inproceedings{\nzhuang2023behavior,\ntitle={Behavior Proximal Policy Optimization },\nauthor={Zifeng Zhuang and Kun LEI and Jinxin Liu and Donglin Wang and Yilang Guo},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3c13LptpIph}\n}", "github": "", "project": "", "reviewers": "qeAt;ksVK;7X1S;QTZM;t4J4", "pdf_size": 1222216, "recommendation": "3;5;5;6;6", "confidence": "3;3;4;3;3", "correctness": "2;3;3;3;3", "technical_novelty": "3;2;3;2;3", "empirical_novelty": "3;2;3;1;3", "wc_summary_paper": "179;92;90;77;45", "wc_strength_and_weaknesses": "510;164;594;345;8", "wc_clarity_quality_novelty_and_reproducibility": "60;8;175;60;8", "wc_summary_review": "51;6;64;144;106", "wc_review": "800;270;923;626;167", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "1294;717;1602;1580;410", "reply_reviewers": "0;0;0;0;0", "reply_authors": "3;2;3;3;2", "recommendation_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "correctness_avg": [ 2.8, 0.39999999999999997 ], "technical_novelty_avg": [ 2.6, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.4, 0.8 ], "wc_summary_paper_avg": [ 96.6, 44.49988764030759 ], "wc_strength_and_weaknesses_avg": [ 324.2, 216.05221591087653 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.2, 61.0062291901409 ], "wc_summary_review_avg": [ 74.2, 47.27959390688545 ], "wc_review_avg": [ 557.2, 294.0186388649536 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1120.6, 477.6415392320898 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.6, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9128709291752771, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16473765994661068931&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=3c13LptpIph", "email": "zju.edu.cn;;;westlake.edu.cn;bjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Zhejiang University;Westlake University;Beijing Jiao Tong University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.westlake.edu.cn;http://www.njtu.edu.cn/en", "aff_unique_abbr": "ZJU;WU;BJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "3dH2aqKGzZe", "title": "S$^6$-DAMON: Bridging Self-Supervised Speech Models and Real-time Speech Recognition", "track": "main", "status": "Reject", "tldr": "We propose a data-model co-compression framework dubbed S$^6$-DAMON for bridging self-supervised speech models with real-time speech recognition.", "abstract": "There has been an growing demand for deep neural network (DNN) powered automatic speech recognition (ASR) on mobile platforms for real-time speech recognition. However, ubiquitous on-device ASR systems are still hindered by two bottlenecks: (1) the lack of large-scale transcribed speech data especially for low-resource spoken languages and (2) the large gap between DNNs' prohibitive complexity and mobiles' limited resources. In parallel, speech models pretrained via self-supervised learning (SSL) have emerged to reduce the reliance on the availability of transcribed speech data, which however further enlarges the efficiency gap because they often adopt large transformers to ensure expressive speech representations. Thus, it is highly desired to trim down the complexity of speech SSL models to enable real-time on-device ASR. This is particularly challenging since only structured sparsity can favor hardware efficiency in commercial devices, under which the speech representation learned by SSL could easily be demolished. To this end, we develop a framework dubbed S$^6$-DAMON to pursue structured sparsity in speech SSL models via data-model co-compression. On the data side, leveraging both the duration of each phoneme and the pauses between the words/phonemes of human utterances, we propose a salient audio token detector, dubbed SALAD, to remove input audio tokens that are redundant; On the model side, we identify that the failure of the SOTA ASR pruning method under structured sparsity is caused by the sparsity discrepancy between finetuning/deployment and their limited learnability of sparsity distributions, and then tackle it via a new ASR pruning pipeline dubbed SAFARI, which adopts a three-step pipeline - sparsify, finetune, and adjust sparsity. Extensive experiments validate that S$^6$-DAMON can enable real-time ASR with limited transcribed speech data requirements while maintaining decent recognition performance. All source codes will be released upon acceptance.", "keywords": "automated speech recognition;self-supervised learning;model compression", "primary_area": "", "supplementary_material": "", "author": "Yonggan Fu;Zhifan Ye;Shunyao Zhang;Jiayi Yuan;Zhongzhi Yu;Yingyan Lin", "authorids": "~Yonggan_Fu1;~Zhifan_Ye1;~Shunyao_Zhang1;~Jiayi_Yuan1;~Zhongzhi_Yu1;~Yingyan_Lin1", "gender": "M;M;M;;M;F", "homepage": "https://www.yongganfu.com/;https://github.com/LemonAndRabbit;;https://jy-yuan.github.io/;;https://eiclab.scs.gatech.edu/", "dblp": "244/8166;168/9226.html;;251/4029-1.html;198/8338;120/6981", "google_scholar": "https://scholar.google.com/citations?hl=en;zlPfnWEAAAAJ;https://scholar.google.com/citations?hl=zh-CN;XMrlrV8AAAAJ;KjvcaBQAAAAJ;dio8IesAAAAJ", "orcid": ";0000-0003-0755-8843;;;;", "linkedin": "yonggan-fu-b211831b0;zhifan-ye/;https://www.linkedin.com/public-profile/settings?trk=d_flagship3_profile_self_view_public_profile;;zhongzhi-yu/;yingyan-celine-lin-a281211a/", "or_profile": "~Yonggan_Fu1;~Zhifan_Ye1;~Shunyao_Zhang1;~Jiayi_Yuan1;~Zhongzhi_Yu1;~Yingyan_Lin1", "aff": "Rice University;Rice University;Rice University;Rice University;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "rice.edu;rice.edu;rice.edu;rice.edu;gatech.edu;gatech.edu", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@misc{\nfu2023sdamon,\ntitle={S\\${\\textasciicircum}6\\$-{DAMON}: Bridging Self-Supervised Speech Models and Real-time Speech Recognition},\nauthor={Yonggan Fu and Zhifan Ye and Shunyao Zhang and Jiayi Yuan and Zhongzhi Yu and Yingyan Lin},\nyear={2023},\nurl={https://openreview.net/forum?id=3dH2aqKGzZe}\n}", "github": "", "project": "", "reviewers": "V9mU;JZjt;jv1R", "site": "https://openreview.net/forum?id=3dH2aqKGzZe", "pdf_size": 585919, "recommendation": "5;5;5", "confidence": "4;4;4", "correctness": "3;3;4", "technical_novelty": "3;2;3", "empirical_novelty": "2;2;2", "wc_summary_paper": "182;70;121", "wc_strength_and_weaknesses": "123;131;64", "wc_clarity_quality_novelty_and_reproducibility": "240;16;12", "wc_summary_review": "65;34;32", "wc_review": "610;251;229", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "782;604;1241", "reply_reviewers": "0;0;0", "reply_authors": "2;1;2", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 124.33333333333333, 45.784519460427035 ], "wc_strength_and_weaknesses_avg": [ 106.0, 29.87752778706208 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 89.33333333333333, 106.54993612803759 ], "wc_summary_review_avg": [ 43.666666666666664, 15.107025591499548 ], "wc_review_avg": [ 363.3333333333333, 174.6507626347188 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 875.6666666666666, 268.35589967222427 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:uWxyfs-nB74J:scholar.google.com/&scioq=S%24%5E6%24-DAMON:+Bridging+Self-Supervised+Speech+Models+and+Real-time+Speech+Recognition&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1;1", "aff_unique_norm": "Rice University;Georgia Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.rice.edu;https://www.gatech.edu", "aff_unique_abbr": "Rice;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Energy-Based Test Sample Adaptation for Domain Generalization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11140", "id": "3dnrKbeVatv", "poster": "/media/PosterPDFs/ICLR%202023/11140.png?t=1680781765.7213178", "openreview": "https://openreview.net/forum?id=3dnrKbeVatv", "slides": "https://iclr.cc/virtual/2023/poster/11140", "video": "https://iclr.cc/virtual/2023/poster/11140", "author_site": "Zehao Xiao, Xiantong Zhen, Shengcai Liao, Cees G Snoek", "tldr": "We propose a discriminative energy-based model to adapt target samples to the source domain distributions for domain generalization.", "abstract": "In this paper, we propose energy-based sample adaptation at test time for domain generalization. Where previous works adapt their models to target domains, we adapt the unseen target samples to source-trained models. To this end, we design a discriminative energy-based model, which is trained on source domains to jointly model the conditional distribution for classification and data distribution for sample adaptation. The model is optimized to simultaneously learn a classifier and an energy function. To adapt target samples to source distributions, we iteratively update the samples by energy minimization with stochastic gradient Langevin dynamics. Moreover, to preserve the categorical information in the sample during adaptation, we introduce a categorical latent variable into the energy-based model. The latent variable is learned from the original sample before adaptation by variational inference and fixed as a condition to guide the sample update. Experiments on six benchmarks for classification of images and microblog threads demonstrate the effectiveness of our proposal.", "keywords": "domain generalization;energy-based model;test-time sample adaptation;variational inference", "primary_area": "", "supplementary_material": "/attachment/6cdb7074aec8df6c7dd5a12632d8691cc548b132.zip", "author": "Zehao Xiao;Xiantong Zhen;Shengcai Liao;Cees G. M. Snoek", "authorids": "~Zehao_Xiao1;~Xiantong_Zhen1;~Shengcai_Liao2;~Cees_G._M._Snoek1", "gender": "M;M;M;M", "homepage": "https://zzzx1224.github.io/;;https://shengcailiao.github.io/;http://www.ceessnoek.info", "dblp": "225/5426;78/10651;16/8313;s/CeesSnoek", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.ca/citations?user=DnBb3e0AAAAJ;CnqsHlAAAAAJ;https://scholar.google.nl/citations?user=0uKdbscAAAAJ", "orcid": ";;;0000-0001-9092-1556", "linkedin": ";;;cgmsnoek/", "or_profile": "~Zehao_Xiao1;~Xiantong_Zhen1;~Shengcai_Liao2;~Cees_Snoek1", "aff": "University of Amsterdam;United Imaging Healthcare, Co., Ltd.;Inception Institute of Artificial Intelligence;University of Amsterdam", "aff_domain": "uva.nl;cri-united-imaging.com;inceptioniai.org;uva.nl", "position": "PhD student;Principal Researcher;Lead Scientist;Full Professor", "bibtex": "@inproceedings{\nxiao2023energybased,\ntitle={Energy-Based Test Sample Adaptation for Domain Generalization},\nauthor={Zehao Xiao and Xiantong Zhen and Shengcai Liao and Cees G. M. Snoek},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3dnrKbeVatv}\n}", "github": "", "project": "", "reviewers": "Y7Jd;yzZP;E7hz;aESp", "pdf_size": 7032342, "recommendation": "6;6;6;8", "confidence": "4;5;3;4", "correctness": "2;3;4;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;2;0;3", "wc_summary_paper": "93;84;45;64", "wc_strength_and_weaknesses": "309;124;179;229", "wc_clarity_quality_novelty_and_reproducibility": "211;105;49;33", "wc_summary_review": "123;410;13;53", "wc_review": "736;723;286;379", "wc_reply_reviewers": "136;439;11;0", "wc_reply_authors": "1305;2061;581;271", "reply_reviewers": "2;3;1;0", "reply_authors": "4;6;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 71.5, 18.553975315279473 ], "wc_strength_and_weaknesses_avg": [ 210.25, 68.04180700128414 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 99.5, 69.70473441596346 ], "wc_summary_review_avg": [ 149.75, 155.327677829806 ], "wc_review_avg": [ 531.0, 201.25729800432083 ], "wc_reply_reviewers_avg": [ 146.5, 177.12213300431992 ], "wc_reply_authors_avg": [ 1054.5, 691.7129101007151 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 3.0, 2.1213203435596424 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3027943185987486652&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=3dnrKbeVatv", "email": "uva.nl;cri-united-imaging.com;inceptioniai.org;uva.nl", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Amsterdam;United Imaging Healthcare;Inception Institute of Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uva.nl;https://www.united-imaging.com;https://www.inceptioniai.org", "aff_unique_abbr": "UvA;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Netherlands;China;United Arab Emirates" }, { "id": "3e5nHhhRK93", "title": "Universal embodied intelligence: learning from crowd, recognizing the world, and reinforced with experience", "track": "main", "status": "Reject", "tldr": "", "abstract": "The interactive artificial intelligence in the motion control field is an interesting topic, especially when universal knowledge adaptive to multiple task and universal environments is wanted. Although there are increasing efforts on Reinforcement learning (RL) studies with the assistance of transformers, it might subject to the limitation of the offline training pipeline, in which the exploration and generalization ability is prohibited. Motivated by the cognitive and behavioral psychology, such agent should have the ability to learn from others, recognize the world, and practice itself based its own experience. In this study, we propose the framework of Online Decision MetaMorphFormer (ODM) which attempts to achieve the above learning modes, with a unified model architecture to both highlight its own body perception and produce action and observation predictions. ODM can be applied on any arbitrary agent with a multi-joint body, located in different environments, trained with different type of tasks. Large-scale pretrained dataset are used to warmup ODM while the targeted environment continues to reinforce the universal policy. Substantial interactive experiments as well as few-shot and zero-shot tests in unseen environments and never-experienced tasks verify ODM's performance, and generalization ability. Our study shed some lights on research of general artificial intelligence on the embodied and cognitive field studies. ", "keywords": "reinforcement learning;transformer;morphology;pretrain;finetune;generalization", "primary_area": "", "supplementary_material": "/attachment/9a2631885c0997532bbf9dd8f97d304fef568a59.zip", "author": "Luo Ji;Longfei Ma;Chang Zhou;Fei Wu;Hongxia Yang", "authorids": "~Luo_Ji1;~Longfei_Ma1;~Chang_Zhou2;~Fei_Wu1;~Hongxia_Yang2", "gender": "M;;M;M;F", "homepage": "https://www.researchgate.net/profile/Luo-Ji-6;https://github.com/ma-longfei;;https://person.zju.edu.cn/wufei;https://www4.comp.polyu.edu.hk/~hongxyang/", "dblp": "137/5083;;;84/3254-1;", "google_scholar": "https://scholar.google.com/citations?hl=en;;QeSoG3sAAAAJ;XJLn4MYAAAAJ;iJlC5mMAAAAJ", "orcid": "0000-0002-2484-5345;;;;", "linkedin": "luo-ji-14420933/;;;;", "or_profile": "~Luo_Ji1;~Longfei_Ma1;~Chang_Zhou2;~Fei_Wu1;~Hongxia_Yang2", "aff": "Alibaba Group;Zhejiang University;Alibaba Group;Zhejiang University;ByteDance Inc.", "aff_domain": "alibaba-inc.com;cs.zju.edu.cn;alibaba-inc.com;zju.edu.cn;bytedance.com", "position": "Researcher;PhD student;Researcher;Full Professor;Principal Researcher", "bibtex": "@misc{\nji2023universal,\ntitle={Universal embodied intelligence: learning from crowd, recognizing the world, and reinforced with experience},\nauthor={Luo Ji and Longfei Ma and Chang Zhou and Fei Wu and Hongxia Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=3e5nHhhRK93}\n}", "github": "", "project": "", "reviewers": "Vvnb;t6eV;o4Ch;UikL", "site": "https://openreview.net/forum?id=3e5nHhhRK93", "pdf_size": 1642482, "recommendation": "1;3;6;6", "confidence": "4;3;4;3", "correctness": "2;3;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "1;2;3;3", "wc_summary_paper": "85;13;86;149", "wc_strength_and_weaknesses": "103;88;65;241", "wc_clarity_quality_novelty_and_reproducibility": "138;9;39;140", "wc_summary_review": "32;20;50;94", "wc_review": "358;130;240;624", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "939;396;36;512", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 4.0, 2.1213203435596424 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 83.25, 48.137173784924265 ], "wc_strength_and_weaknesses_avg": [ 124.25, 68.75090908489865 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 81.5, 58.47435335255962 ], "wc_summary_review_avg": [ 49.0, 28.089143810376278 ], "wc_review_avg": [ 338.0, 183.7552720332127 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 470.75, 322.31768102293114 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.23570226039551587, "corr_recommendation_correctness": 0.8164965809277261, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yClWdjpyjlgJ:scholar.google.com/&scioq=Universal+embodied+intelligence:+learning+from+crowd,+recognizing+the+world,+and+reinforced+with+experience&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;1;2", "aff_unique_norm": "Alibaba Group;Zhejiang University;ByteDance", "aff_unique_dep": ";;", "aff_unique_url": "https://www.alibaba.com;https://www.zju.edu.cn;https://www.bytedance.com", "aff_unique_abbr": "Alibaba;ZJU;ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "3eQEil044E", "title": "Momentum Tracking: Momentum Acceleration for Decentralized Deep Learning on Heterogeneous Data", "track": "main", "status": "Reject", "tldr": "In this work, we propose Momentum Tracking, which is the method with momentum acceleration whose convergence rate is proved to be independent of the data-heterogeneity.", "abstract": "SGD with momentum acceleration is one of the key components for improving the performance of neural networks. For decentralized learning, a straightforward approach using momentum acceleration is Distributed SGD (DSGD) with momentum acceleration (DSGDm). However, DSGDm performs worse than DSGD when the data distributions are statistically heterogeneous. Recently, several studies have addressed this issue and proposed methods with momentum acceleration that are more robust to data heterogeneity than DSGDm, although their convergence rates remain dependent on data heterogeneity and decrease when the data distributions are heterogeneous. In this study, we propose Momentum Tracking, which is a method with momentum acceleration whose convergence rate is proven to be independent of data heterogeneity. More specifically, we analyze the convergence rate of Momentum Tracking in the standard deep learning setting, where the objective function is non-convex and the stochastic gradient is used. Then, we identify that it is independent of data heterogeneity for any momentum coefficient $\\beta \\in [0, 1)$. Through image classification tasks, we demonstrate that Momentum Tracking is more robust to data heterogeneity than the existing decentralized learning methods with momentum acceleration and can consistently outperform these existing methods when the data distributions are heterogeneous.", "keywords": "Decentralized Optimization;Non-Convex Stochastic Optimization;Momentum Acceleration", "primary_area": "", "supplementary_material": "/attachment/f357cc42168e576a446a960510872673fa0b2ca9.zip", "author": "Yuki Takezawa;Han Bao;Kenta Niwa;Ryoma Sato;Makoto Yamada", "authorids": "~Yuki_Takezawa1;~Han_Bao2;~Kenta_Niwa1;~Ryoma_Sato1;~Makoto_Yamada3", "gender": "M;M;M;M;M", "homepage": "https://yukitakezawa.github.io/;https://hermite.jp/;http://www.kecl.ntt.co.jp/icl/ls/members/niwa/index.html;https://joisino.net/en/;https://groups.oist.jp/mlds", "dblp": "284/1294;120/1444-2;64/1008.html;227/2014;56/4937", "google_scholar": "eaKQb8IAAAAJ;MqMzjeMAAAAJ;Btla06EAAAAJ;https://scholar.google.co.jp/citations?user=S4kMic4AAAAJ;1cKNu1gAAAAJ", "orcid": "0000-0002-8532-2775;0000-0002-4473-2604;0000-0002-6911-0238;;", "linkedin": ";;;;", "or_profile": "~Yuki_Takezawa1;~Han_Bao2;~Kenta_Niwa1;~Ryoma_Sato1;~Makoto_Yamada3", "aff": "Kyoto University;Kyoto University, Kyoto University;NTT Corporation;Kyoto University;Kyoto University", "aff_domain": "kyoto-u.ac.jp;i.kyoto-u.ac.jp;ntt.co.jp;kyoto-u.ac.jp;kyoto-u.ac.jp", "position": "MS student;Assistant Professor;Researcher;PhD student;Associate Professor", "bibtex": "@misc{\ntakezawa2023momentum,\ntitle={Momentum Tracking: Momentum Acceleration for Decentralized Deep Learning on Heterogeneous Data},\nauthor={Yuki Takezawa and Han Bao and Kenta Niwa and Ryoma Sato and Makoto Yamada},\nyear={2023},\nurl={https://openreview.net/forum?id=3eQEil044E}\n}", "github": "", "project": "", "reviewers": "hspD;AZPB;uTVi;i18n", "site": "https://openreview.net/forum?id=3eQEil044E", "pdf_size": 433971, "recommendation": "5;5;5;6", "confidence": "2;4;5;4", "correctness": "3;3;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "43;52;55;48", "wc_strength_and_weaknesses": "440;270;153;284", "wc_clarity_quality_novelty_and_reproducibility": "72;6;43;5", "wc_summary_review": "11;40;27;88", "wc_review": "566;368;278;425", "wc_reply_reviewers": "19;157;219;236", "wc_reply_authors": "844;1442;2236;1402", "reply_reviewers": "1;1;3;1", "reply_authors": "2;2;7;3", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 49.5, 4.5 ], "wc_strength_and_weaknesses_avg": [ 286.75, 102.05727558581995 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.5, 27.95084971874737 ], "wc_summary_review_avg": [ 41.5, 28.74456470360962 ], "wc_review_avg": [ 409.25, 104.57861875163584 ], "wc_reply_reviewers_avg": [ 157.75, 85.33280436033964 ], "wc_reply_authors_avg": [ 1481.0, 495.8719592798125 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.5, 2.0615528128088303 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.13245323570650439, "corr_recommendation_correctness": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4579194864389920398&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Kyoto University;NTT Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.kyoto-u.ac.jp;https://www.ntt.co.jp", "aff_unique_abbr": "Kyoto U;NTT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Kyoto", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Japan" }, { "id": "3gZop22KWP", "title": "UNDERSTANDING PURE CLIP GUIDANCE FOR VOXEL GRID NERF MODELS", "track": "main", "status": "Reject", "tldr": "We explore various mechanics that prevent adversarial generations from using CLIP as guidance for training a voxel grid NeRF model without any datasets.", "abstract": "We explore the task of text to 3D object generation using CLIP. Specifically, we use CLIP for guidance without access to any datasets, a setting we refer to as pure CLIP guidance. While prior work has adopted this setting, there is no systematic study of mechanics for preventing adversarial generations within CLIP. We illustrate how different image-based augmentations prevent the adversarial generation problem, and how the generated results are impacted. We test different CLIP model architectures and show that ensembling different models for guidance can prevent adversarial generations within bigger models and generate sharper results. Furthermore, we implement an implicit voxel grid model to show how neural networks provide an additional layer of regularization, resulting in better geometrical structure and coherency of generated objects. Compared to prior work, we achieve more coherent results with higher memory efficiency and faster training speeds.", "keywords": "Text to 3D Generation;CLIP;NeRF;Adversarial Examples;Augmentation", "primary_area": "", "supplementary_material": "", "author": "Han-Hung Lee;Angel X Chang", "authorids": "~Han-Hung_Lee1;~Angel_X_Chang1", "gender": "M;F", "homepage": "https://hanhung.github.io/;https://angelxuanchang.github.io", "dblp": "294/0092;46/10489", "google_scholar": "32ebx0UAAAAJ;8gfs8XIAAAAJ", "orcid": ";0009-0003-5055-6437", "linkedin": ";", "or_profile": "~Han-Hung_Lee1;~Angel_X_Chang1", "aff": "Simon Fraser University;Simon Fraser University", "aff_domain": "sfu.ca;sfu.ca", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nlee2023understanding,\ntitle={{UNDERSTANDING} {PURE} {CLIP} {GUIDANCE} {FOR} {VOXEL} {GRID} {NERF} {MODELS}},\nauthor={Han-Hung Lee and Angel X Chang},\nyear={2023},\nurl={https://openreview.net/forum?id=3gZop22KWP}\n}", "github": "", "project": "", "reviewers": "3Rq7;hRKS;XGe4", "site": "https://openreview.net/forum?id=3gZop22KWP", "pdf_size": 3449413, "recommendation": "5;5;6", "confidence": "4;4;4", "correctness": "4;3;4", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;3", "wc_summary_paper": "65;61;88", "wc_strength_and_weaknesses": "146;147;253", "wc_clarity_quality_novelty_and_reproducibility": "49;10;61", "wc_summary_review": "78;20;85", "wc_review": "338;238;487", "wc_reply_reviewers": "0;47;0", "wc_reply_authors": "404;674;211", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 71.33333333333333, 11.897712198383164 ], "wc_strength_and_weaknesses_avg": [ 182.0, 50.20624131187941 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.0, 21.77154105707724 ], "wc_summary_review_avg": [ 61.0, 29.13188402192118 ], "wc_review_avg": [ 354.3333333333333, 102.30781440774155 ], "wc_reply_reviewers_avg": [ 15.666666666666666, 22.15601247717849 ], "wc_reply_authors_avg": [ 429.6666666666667, 189.88827124273777 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14471989285777972660&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Simon Fraser University", "aff_unique_dep": "", "aff_unique_url": "https://www.sfu.ca", "aff_unique_abbr": "SFU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "3i9EgUss-Vs", "title": "Graph Convolutional Normalizing Flows for Semi-Supervised Classification and Clustering", "track": "main", "status": "Reject", "tldr": "A normalizing flow architecture based on graphs is developed for semi-supervised learning, producing high-quality classification and clustering.", "abstract": "Graph neural networks (GNNs) are \\emph{discriminative models} that directly model the class posterior $p(y|\\mathbf{x})$ for semi-supervised classification of graph data. While being effective for prediction, as a representation learning approach, the node representations extracted from a GNN often miss useful information for effective clustering, because that is not necessary for a good classification. In this work, we replace a GNN layer by a combination of graph convolutions and normalizing flows under a Gaussian mixture representation space, which allows us to build a \\emph{generative model} that models both the class conditional likelihood $p(\\mathbf{x}|y)$ and the class prior $p(y)$. The resulting neural network, GC-Flow, enjoys two benefits: it not only maintains the predictive power because of the retention of graph convolutions, but also produces high-quality clusters in the representation space, due to the structuring of the representation as a mixture of Gaussians. We demonstrate these benefits on a variety of benchmark data sets. Moreover, we show that additional parameterization, such as that on the adjacency matrix used for graph convolutions, yields additional improvement in classification and clustering.\n", "keywords": "graph convolutional network;normalizing flow;generative model", "primary_area": "", "supplementary_material": "", "author": "Tianchun Wang;Farzaneh Mirzazadeh;Xiang Zhang;Jie Chen", "authorids": "~Tianchun_Wang1;~Farzaneh_Mirzazadeh1;~Xiang_Zhang4;~Jie_Chen1", "gender": "M;F;;", "homepage": ";https://researcher.watson.ibm.com/researcher/view.php?person=ibm-Farzaneh;https://jiechenjiechen.github.io;https://faculty.ist.psu.edu/xzz89/", "dblp": "153/5231.html;149/1268;92/6289-7;91/4353-1", "google_scholar": "8su8b60AAAAJ;https://scholar.google.com/citations?hl=en;Z-lkme8AAAAJ;", "orcid": ";;;0000-0003-0940-6595", "linkedin": ";farzaneh-mirzazadeh/;;", "or_profile": "~Tianchun_Wang1;~Farzaneh_Mirzazadeh1;~Jie_Chen1;~Xiang_Zhang24", "aff": "Pennsylvania State University;International Business Machines;International Business Machines;Pennsylvania State University", "aff_domain": "psu.edu;ibm.com;ibm.com;psu.edu", "position": "PhD student;Researcher;Research Staff Member;Associate Professor", "bibtex": "@misc{\nwang2023graph,\ntitle={Graph Convolutional Normalizing Flows for Semi-Supervised Classification and Clustering},\nauthor={Tianchun Wang and Farzaneh Mirzazadeh and Xiang Zhang and Jie Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=3i9EgUss-Vs}\n}", "github": "", "project": "", "reviewers": "kEff;bfhQ;cUhi;qGKA", "site": "https://openreview.net/forum?id=3i9EgUss-Vs", "pdf_size": 1479928, "recommendation": "5;5;5;8", "confidence": "4;3;3;4", "correctness": "4;3;3;4", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "43;134;53;76", "wc_strength_and_weaknesses": "250;338;113;298", "wc_clarity_quality_novelty_and_reproducibility": "72;6;7;59", "wc_summary_review": "91;57;15;46", "wc_review": "456;535;188;479", "wc_reply_reviewers": "260;0;0;122", "wc_reply_authors": "922;504;586;340", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;1", "recommendation_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 76.5, 35.28809997718778 ], "wc_strength_and_weaknesses_avg": [ 249.75, 84.87748523607424 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.0, 29.857997253667232 ], "wc_summary_review_avg": [ 52.25, 27.160403163428924 ], "wc_review_avg": [ 414.5, 133.88894651912085 ], "wc_reply_reviewers_avg": [ 95.5, 107.24154978365428 ], "wc_reply_authors_avg": [ 588.0, 212.20273325289662 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1879157520138779628&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Pennsylvania State University;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.psu.edu;https://www.ibm.com", "aff_unique_abbr": "PSU;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "3i_7H3phuy3", "title": "Incompatibility between Deterministic Policy and Generative Adversarial Imitation Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deterministic policies are widely applied in generative adversarial imitation learning (GAIL). When adopting these policies, some GAIL variants modify the reward function to avoid training instability. However, the mechanism behind this instability is still largely unknown. In this paper, we capture the instability through the underlying exploding gradients theoretically in the updating process. Our novelties lie in the following aspects: 1) By employing multivariate Gaussian policy with small covariance to approximate deterministic policy, we establish and prove the probabilistic lower bound for the exploding gradients, which can describe the degree of instability universally, while the stochastic policy will never suffer from such pathology subsequently. 2) We also prove that the modified reward function of adversarial inverse reinforcement learning (AIRL) can relieve exploding gradients, but at the expense of ``non-confidence''. Experiments and a toy demo support our analysis.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Wanying Wang;Yirui Zhou;Chaomin Shen;Yangchun Zhang;Jian Tang;Zhiyuan Xu;Yaxin Peng", "authorids": "~Wanying_Wang1;~Yirui_Zhou1;~Chaomin_Shen1;~Yangchun_Zhang1;~Jian_Tang5;~Zhiyuan_Xu1;~Yaxin_Peng1", "gender": "F;M;M;;M;M;F", "homepage": "https://www.mdmlab-shu.com/author/wanying-wang/;;;;https://ecs.syr.edu/faculty/tang;https://xuzhiyuan1528.github.io/;https://www.mdmlab-shu.com/author/yaxin-peng/", "dblp": "153/8907;326/5673;32/3402-1;https://dblp.uni-trier.de/pid/324/8512;181/2667-8;;20/7643.html", "google_scholar": ";https://scholar.google.com.hk/citations?user=UY7IUBIAAAAJ;;https://scholar.google.com.hk/citations?user=CjY56LgAAAAJ;;jKHMVnYAAAAJ;https://scholar.google.com.hk/citations?user=4cRt3XoAAAAJ", "orcid": "0000-0002-0452-7593;0000-0002-6591-0852;;0000-0002-0540-9070;;0000-0003-2879-3244;0000-0002-2983-555X", "linkedin": ";;;;;zhiyuan-xu-19a66191;", "or_profile": "~Wanying_Wang1;~Yirui_Zhou1;~Chaomin_Shen1;~Yangchun_Zhang1;~Jian_Tang5;~Zhiyuan_Xu1;~Yaxin_Peng1", "aff": "Shanghai University;Shanghai University;East China Normal University;Shanghai University;Midea Group;Midea;Shanghai University", "aff_domain": "shu.edu.cn;shu.edu.cn;ecnu.edu.cn;shu.edu.cn;midea.com;midea.com;shu.edu.cn", "position": "MS student;PhD student;Associate Professor;Lecturer;Researcher;Researcher;Full Professor", "bibtex": "@misc{\nwang2023incompatibility,\ntitle={Incompatibility between Deterministic Policy and Generative Adversarial Imitation Learning},\nauthor={Wanying Wang and Yirui Zhou and Chaomin Shen and Yangchun Zhang and Jian Tang and Zhiyuan Xu and Yaxin Peng},\nyear={2023},\nurl={https://openreview.net/forum?id=3i_7H3phuy3}\n}", "github": "", "project": "", "reviewers": "rV27;F1Ws;EU3W;Qudi;59jo", "site": "https://openreview.net/forum?id=3i_7H3phuy3", "pdf_size": 4820376, "recommendation": "3;3;3;5;6", "confidence": "3;4;3;3;4", "correctness": "2;2;1;3;3", "technical_novelty": "3;2;2;4;3", "empirical_novelty": "2;2;1;3;3", "wc_summary_paper": "25;54;88;209;49", "wc_strength_and_weaknesses": "178;271;467;609;85", "wc_clarity_quality_novelty_and_reproducibility": "18;18;24;309;40", "wc_summary_review": "39;28;29;53;17", "wc_review": "260;371;608;1180;191", "wc_reply_reviewers": "0;216;0;0;0", "wc_reply_authors": "498;1925;1384;1324;182", "reply_reviewers": "0;1;0;0;0", "reply_authors": "1;4;3;2;1", "recommendation_avg": [ 4.0, 1.2649110640673518 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "correctness_avg": [ 2.2, 0.7483314773547882 ], "technical_novelty_avg": [ 2.8, 0.7483314773547882 ], "empirical_novelty_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_paper_avg": [ 85.0, 65.17975145702843 ], "wc_strength_and_weaknesses_avg": [ 322.0, 191.2589867169645 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 81.8, 113.88485412907195 ], "wc_summary_review_avg": [ 33.2, 12.106196760337244 ], "wc_review_avg": [ 522.0, 358.1524814935672 ], "wc_reply_reviewers_avg": [ 43.2, 86.4 ], "wc_reply_authors_avg": [ 1062.6, 633.9702201207876 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 1.16619037896906 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.3227486121839514, "corr_recommendation_correctness": 0.8451542547285165, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:q7yWT1oAdWMJ:scholar.google.com/&scioq=Incompatibility+between+Deterministic+Policy+and+Generative+Adversarial+Imitation+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;2;2;0", "aff_unique_norm": "Shanghai University;East China Normal University;Midea Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.shu.edu.cn;http://www.ecnu.edu.cn;https://www.mideaglobal.com", "aff_unique_abbr": "SHU;ECNU;Midea", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "3i_Bzt7Hmcm", "title": "DP-InstaHide: Data Augmentations Provably Enhance Guarantees Against Dataset Manipulations", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Data poisoning and backdoor attacks manipulate training data to induce security breaches in a victim model. These attacks can be provably deflected using differentially private (DP) training methods, although this comes with a sharp decrease in model performance. The InstaHide method has recently been proposed as an alternative to DP training that leverages supposed privacy properties of the mixup augmentation, although without rigorous guarantees. In this paper, we rigorously show that $k$-way mixup provably yields at least $k$ times stronger DP guarantees than a naive DP mechanism, and we observe that this enhanced privacy guarantee is a strong foundation for building defenses against poisoning.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/4cf29a4169b8dfd3c2e3516a6da938f61eee5799.zip", "author": "Eitan Borgnia;Jonas Geiping;Valeriia Cherepanova;Liam H Fowl;Arjun Gupta;Amin Ghiasi;Furong Huang;Micah Goldblum;Tom Goldstein", "authorids": "~Eitan_Borgnia1;~Jonas_Geiping1;~Valeriia_Cherepanova1;~Liam_H_Fowl1;~Arjun_Gupta2;~Amin_Ghiasi1;~Furong_Huang1;~Micah_Goldblum1;~Tom_Goldstein1", "gender": "M;M;F;;M;M;F;;M", "homepage": "https://eitanborgnia.com;https://jonasgeiping.github.io/;https://www.vcherepanova.com/;;https://github.com/Arjung27;http://cs.umd.edu/~amin;https://furong-huang.com;;https://www.cs.umd.edu/~tomg/", "dblp": ";190/7229;;241/6940;;239/8313;72/8513;241/7231;25/8184", "google_scholar": ";https://scholar.google.de/citations?user=206vNCEAAAAJ;PySUqqUAAAAJ;IXv3ToAAAAAJ;5pcsbisAAAAJ;tNQWOxUAAAAJ;13yyuCcAAAAJ;pGDKzuUAAAAJ;KmSuVtgAAAAJ", "orcid": ";;;;;;;;", "linkedin": ";;;;arjung27/;;;;", "or_profile": "~Eitan_Borgnia1;~Jonas_Geiping1;~Valeriia_Cherepanova1;~Liam_H_Fowl1;~Arjun_Gupta2;~Amin_Ghiasi1;~Furong_Huang1;~Micah_Goldblum1;~Tom_Goldstein1", "aff": "University of Chicago;University of Maryland, College Park;University of Maryland, College Park;Google;Zipline International Inc;Apple;University of Maryland;New York University;University of Maryland, College Park", "aff_domain": "uchicago.edu;umd.edu;umd.edu;google.com;flyzipline.com;apple.com;cs.umd.edu;nyu.edu;umd.edu", "position": "PhD student;Postdoc;PhD student;Google;Professional;Researcher;Assistant Professor;Postdoc;Full Professor", "bibtex": "@misc{\nborgnia2023dpinstahide,\ntitle={{DP}-InstaHide: Data Augmentations Provably Enhance Guarantees Against Dataset Manipulations},\nauthor={Eitan Borgnia and Jonas Geiping and Valeriia Cherepanova and Liam H Fowl and Arjun Gupta and Amin Ghiasi and Furong Huang and Micah Goldblum and Tom Goldstein},\nyear={2023},\nurl={https://openreview.net/forum?id=3i_Bzt7Hmcm}\n}", "github": "", "project": "", "reviewers": "868m;mhJU;JPUy;84vE", "site": "https://openreview.net/forum?id=3i_Bzt7Hmcm", "pdf_size": 470391, "recommendation": "3;3;3;5", "confidence": "3;3;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;3;1;2", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "26;55;68;220", "wc_strength_and_weaknesses": "198;112;90;117", "wc_clarity_quality_novelty_and_reproducibility": "28;15;171;97", "wc_summary_review": "37;112;28;82", "wc_review": "289;294;357;516", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 92.25, 75.3072871639923 ], "wc_strength_and_weaknesses_avg": [ 129.25, 40.97178907492325 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 77.75, 62.206812327911486 ], "wc_summary_review_avg": [ 64.75, 34.098203765007916 ], "wc_review_avg": [ 364.0, 91.75783345306274 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:39vGf1qA2TUJ:scholar.google.com/&scioq=DP-InstaHide:+Data+Augmentations+Provably+Enhance+Guarantees+Against+Dataset+Manipulations&hl=en&as_sdt=0,48", "gs_version_total": 2, "aff_unique_index": "0;1;1;2;3;4;1;5;1", "aff_unique_norm": "University of Chicago;University of Maryland;Google;Zipline International Inc;Apple;New York University", "aff_unique_dep": ";;Google;;Apple Inc.;", "aff_unique_url": "https://www.uchicago.edu;https://www/umd.edu;https://www.google.com;https://www.zipline.com;https://www.apple.com;https://www.nyu.edu", "aff_unique_abbr": "UChicago;UMD;Google;;Apple;NYU", "aff_campus_unique_index": "1;1;2;1", "aff_campus_unique": ";College Park;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Analog Bits: Generating Discrete Data using Diffusion Models with Self-Conditioning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11907", "id": "3itjR9QxFw", "poster": "", "openreview": "https://openreview.net/forum?id=3itjR9QxFw", "slides": "https://iclr.cc/virtual/2023/poster/11907", "video": "https://iclr.cc/virtual/2023/poster/11907", "author_site": "Ting Chen, Ruixiang ZHANG, Geoffrey E Hinton", "tldr": "Generating discrete/categorical data with (continuous) diffusion models; also presents a technique that improves diffusion models in general.", "abstract": "We present Bit Diffusion: a simple and generic approach for generating discrete data with continuous state and continuous time diffusion models. The main idea behind our approach is to first represent the discrete data as binary bits, and then train a continuous diffusion model to model these bits as real numbers which we call analog bits. To generate samples, the model first generates the analog bits, which are then thresholded to obtain the bits that represent the discrete variables. We further propose two simple techniques, namely Self-Conditioning and Asymmetric Time Intervals, which lead to a significant improvement in sample quality. Despite its simplicity, the proposed approach can achieve strong performance in both discrete image generation and image captioning tasks. For discrete image generation, we significantly improve previous state-of-the-art on both CIFAR-10 (which has 3K discrete 8-bit tokens) and ImageNet-64x64 (which has 12K discrete 8-bit tokens), outperforming the best autoregressive model in both sample quality (measured by FID) and efficiency. For image captioning on MS-COCO dataset, our approach achieves competitive results compared to autoregressive models.", "keywords": "Diffusion Models;Discrete Data", "primary_area": "", "supplementary_material": "", "author": "Ting Chen;Ruixiang ZHANG;Geoffrey Hinton", "authorids": "~Ting_Chen1;~Ruixiang_ZHANG1;~Geoffrey_Hinton1", "gender": "M;M;M", "homepage": ";http://ruixiangz.me/;https://www.cs.toronto.edu/~hinton/bio.html", "dblp": "19/1766;20/9860;10/3248", "google_scholar": "KoXUMbsAAAAJ;https://scholar.google.ca/citations?user=VQYdApgAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ting_Chen1;~Ruixiang_ZHANG1;~Geoffrey_Hinton1", "aff": "Google;Mila, UdeM;University of Toronto", "aff_domain": "google.com;mila.qubec;utoronto.ca", "position": "Research Scientist;PhD student;Full Professor", "bibtex": "@inproceedings{\nchen2023analog,\ntitle={Analog Bits: Generating Discrete Data using Diffusion Models with Self-Conditioning},\nauthor={Ting Chen and Ruixiang ZHANG and Geoffrey Hinton},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3itjR9QxFw}\n}", "github": "", "project": "", "reviewers": "YYSB;ECCX;j3eh;h3v1", "pdf_size": 2577911, "recommendation": "6;6;8;8", "confidence": "4;4;4;4", "correctness": "3;4;3;4", "technical_novelty": "2;4;3;3", "empirical_novelty": "2;4;3;3", "wc_summary_paper": "58;66;77;50", "wc_strength_and_weaknesses": "234;230;120;127", "wc_clarity_quality_novelty_and_reproducibility": "11;12;50;18", "wc_summary_review": "36;97;29;39", "wc_review": "339;405;276;234", "wc_reply_reviewers": "44;50;0;0", "wc_reply_authors": "780;705;500;266", "reply_reviewers": "2;2;0;0", "reply_authors": "3;2;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 62.75, 9.98436277385793 ], "wc_strength_and_weaknesses_avg": [ 177.75, 54.32483317967944 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 22.75, 15.958931668504631 ], "wc_summary_review_avg": [ 50.25, 27.23394022171599 ], "wc_review_avg": [ 313.5, 64.70896383036897 ], "wc_reply_reviewers_avg": [ 23.5, 23.595550427993835 ], "wc_reply_authors_avg": [ 562.75, 199.64390173506428 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 305, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13397791710550724105&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=3itjR9QxFw", "email": "google.com;mila.qubec;utoronto.ca", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;Universit\u00e9 de Montr\u00e9al;University of Toronto", "aff_unique_dep": "Google;Mila;", "aff_unique_url": "https://www.google.com;https://www.udemontreal.ca;https://www.utoronto.ca", "aff_unique_abbr": "Google;UdeM;U of T", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Canada" }, { "id": "3jBXX9Xb1iz", "title": "Multi-Label Knowledge Distillation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Existing knowledge distillation methods typically work by enforcing the consistency of output logits or intermediate feature maps between the teacher network and student network. Unfortunately, these methods can hardly be extended to the multi-label learning scenario. Because each instance is associated with multiple semantic labels, neither the prediction logits nor the feature maps obtained from the whole example can accurately transfer knowledge for each label. In this paper, we propose a novel multi-label knowledge distillation method. On one hand, it exploits the informative semantic knowledge from the logits by label decoupling with the one-versus-all reduction strategy; on the other hand, it enhances the distinctiveness of the learned feature representations by leveraging the structural information of label-wise embeddings. Experimental results on multiple benchmark datasets validate that the proposed method can avoid knowledge counteraction among labels, and achieve superior performance against diverse comparing methods.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/42b9d078ef368794e781293c21293a2cb3855e43.zip", "author": "Peng-Hui Yang;Ming-Kun Xie;Chen-Chen Zong;Lei Feng;Gang Niu;Masashi Sugiyama;Sheng-Jun Huang", "authorids": "~Peng-Hui_Yang1;~Ming-Kun_Xie1;~Chen-Chen_Zong1;~Lei_Feng1;~Gang_Niu1;~Masashi_Sugiyama1;~Sheng-Jun_Huang1", "gender": "M;M;M;M;M;;M", "homepage": "http://www.phyang.top;http://www.xiemk.pro/;https://lfeng1995.github.io/;https://niug1984.github.io;http://www.ms.k.u-tokyo.ac.jp/sugi/;http://parnec.nuaa.edu.cn/huangsj;https://chenchenzong.github.io/", "dblp": "245/5714-1.html;215/4362;76/847-6;26/3367-1;35/1228;01/3367.html;299/5077", "google_scholar": "OuxqnTAAAAAJ;https://scholar.google.co.jp/citations?hl=zh-CN;https://scholar.google.com.sg/citations?user=KomQOFkAAAAJ;https://scholar.google.co.jp/citations?user=HOkcy00AAAAJ;https://scholar.google.co.jp/citations?user=GkYIrlIAAAAJ;https://scholar.google.com/citations?hl=zh-CN;h7FiyU8AAAAJ", "orcid": "0009-0003-3626-5094;;0000-0003-2839-5799;;0000-0001-6658-6743;0000-0002-7673-5367;0000-0003-3588-1461", "linkedin": ";;;;;;", "or_profile": "~Peng-Hui_Yang1;~Ming-Kun_Xie1;~Lei_Feng1;~Gang_Niu1;~Masashi_Sugiyama1;~Sheng-Jun_Huang1;~Chenchen_Zong1", "aff": "Nanjing University of Aeronautics and Astronautics;Nanjing University of Aeronautics and Astronautics;Nanyang Technological University;RIKEN;The University of Tokyo;Nanjing University of Aeronautics and Astronautics;Nanjing University of Aeronautics and Astronautics", "aff_domain": "nuaa.edu.cn;nuaa.edu.cn;ntu.edu.sg;riken.jp;u-tokyo.ac.jp;nuaa.edu.cn;nuaa.edu.cn", "position": "Undergrad student;PhD student;Visiting Professor;Research Scientist (tenured);Full Professor;Full Professor;PhD student", "bibtex": "@misc{\nyang2023multilabel,\ntitle={Multi-Label Knowledge Distillation},\nauthor={Peng-Hui Yang and Ming-Kun Xie and Chen-Chen Zong and Lei Feng and Gang Niu and Masashi Sugiyama and Sheng-Jun Huang},\nyear={2023},\nurl={https://openreview.net/forum?id=3jBXX9Xb1iz}\n}", "github": "", "project": "", "reviewers": "SHPH;c6wM;NCc1;fqJx;gMao", "site": "https://openreview.net/forum?id=3jBXX9Xb1iz", "pdf_size": 1642188, "recommendation": "3;3;3;5;5", "confidence": "4;5;3;4;3", "correctness": "3;3;3;4;4", "technical_novelty": "2;2;2;4;3", "empirical_novelty": "2;2;2;4;3", "wc_summary_paper": "47;34;63;48;110", "wc_strength_and_weaknesses": "124;217;284;322;98", "wc_clarity_quality_novelty_and_reproducibility": "29;7;51;15;22", "wc_summary_review": "38;30;68;61;18", "wc_review": "238;288;466;446;248", "wc_reply_reviewers": "0;0;421;0;95", "wc_reply_authors": "656;596;861;877;176", "reply_reviewers": "0;0;1;0;1", "reply_authors": "1;1;2;2;1", "recommendation_avg": [ 3.8, 0.9797958971132712 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 2.6, 0.8 ], "empirical_novelty_avg": [ 2.6, 0.8 ], "wc_summary_paper_avg": [ 60.4, 26.446927987953533 ], "wc_strength_and_weaknesses_avg": [ 209.0, 87.18256706475212 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 24.8, 14.99866660740214 ], "wc_summary_review_avg": [ 43.0, 18.804254837669053 ], "wc_review_avg": [ 337.2, 98.63549057007828 ], "wc_reply_reviewers_avg": [ 103.2, 163.10413851279188 ], "wc_reply_authors_avg": [ 633.2, 253.876662968458 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.3273268353539886, "corr_recommendation_correctness": 1.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3837833932871249177&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff_unique_index": "0;0;1;2;3;0;0", "aff_unique_norm": "Nanjing University of Aeronautics and Astronautics;Nanyang Technological University;RIKEN;University of Tokyo", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.nuaa.edu.cn;https://www.ntu.edu.sg;https://www.riken.jp;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "NUAA;NTU;RIKEN;UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;2;0;0", "aff_country_unique": "China;Singapore;Japan" }, { "title": "Benchmarking Offline Reinforcement Learning on Real-Robot Hardware", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11771", "id": "3k5CUGDLNdd", "poster": "/media/PosterPDFs/ICLR%202023/11771.png?t=1682598724.4803753", "openreview": "https://openreview.net/forum?id=3k5CUGDLNdd", "slides": "https://iclr.cc/virtual/2023/poster/11771", "video": "https://iclr.cc/virtual/2023/poster/11771", "author_site": "Nico G\u00fcrtler, Sebastian Blaes, Pavel Kolev, Felix Widmaier, Manuel Wuthrich, Stefan Bauer, Bernhard Schoelkopf, Georg Martius", "tldr": "We propose new robotics datasets for dexterous manipulation and benchmark offline RL algorithms on them.", "abstract": "Learning policies from previously recorded data is a promising direction for real-world robotics tasks, as online learning is often infeasible. Dexterous manipulation in particular remains an open problem in its general form. The combination of offline reinforcement learning with large diverse datasets, however, has the potential to lead to a breakthrough in this challenging domain analogously to the rapid progress made in supervised learning in recent years. To coordinate the efforts of the research community toward tackling this problem, we propose a benchmark including: i) a large collection of data for offline learning from a dexterous manipulation platform on two tasks, obtained with capable RL agents trained in simulation; ii) the option to execute learned policies on a real-world robotic system and a simulation for efficient debugging. We evaluate prominent open-sourced offline reinforcement learning algorithms on the datasets and provide a reproducible experimental setup for offline reinforcement learning on real systems.", "keywords": "offline reinforcement learning;robotic manipulation;dexterous manipulation;TriFinger platform", "primary_area": "", "supplementary_material": "", "author": "Nico G\u00fcrtler;Sebastian Blaes;Pavel Kolev;Felix Widmaier;Manuel Wuthrich;Stefan Bauer;Bernhard Sch\u00f6lkopf;Georg Martius", "authorids": "~Nico_G\u00fcrtler1;~Sebastian_Blaes1;~Pavel_Kolev1;~Felix_Widmaier1;~Manuel_Wuthrich1;~Stefan_Bauer1;~Bernhard_Sch\u00f6lkopf1;~Georg_Martius1", "gender": "M;M;M;;M;;;M", "homepage": "https://www.is.mpg.de/person/nguertler;https://sblaes.com;http://pavelkolev.github.io/;https://is.tuebingen.mpg.de/person/felixwidmaier;;https://cifar.ca/bios/stefan-bauer/;;https://uni-tuebingen.de/de/264672", "dblp": "223/4063;163/8117;153/5818.html;;https://dblp.uni-trier.de/pers/hd/w/W=uuml=thrich:Manuel;;;47/2706", "google_scholar": "kuzmML4AAAAJ;https://scholar.google.de/citations?user=ftV9OHMAAAAJ;https://scholar.google.de/citations?user=m1j0aaoAAAAJ;;;O-oICE8AAAAJ;;https://scholar.google.de/citations?user=b-JF-UIAAAAJ", "orcid": ";;;;;;;", "linkedin": ";sebastian-blaes/;pavel-kolev-72495b1a/;;;;;", "or_profile": "~Nico_G\u00fcrtler1;~Sebastian_Blaes1;~Pavel_Kolev1;~Felix_Widmaier1;~Manuel_Wuthrich1;~Stefan_Bauer1;~Bernhard_Sch\u00f6lkopf1;~Georg_Martius1", "aff": "Max Planck Institute for Intelligent Systems, Max-Planck Institute;Max Planck Institute for Intelligent Systems, Max Planck Institute for Intelligent Systems;;, Max Planck Institute for Intelligent Systems;Max Planck Institute for Intelligent Systems;KTH Royal Institute of Technology;;Max Planck Institute for Intelligent Systems", "aff_domain": "tuebingen.mpg.de;is.tue.mpg.de;;is.tuebingen.mpg.de;mpg.tuebingen.de;kth.se;;tuebingen.mpg.de", "position": "PhD student;Postdoc;;Research Engineer;Postdoc;Assistant Professor;;Assistant Professor", "bibtex": "@inproceedings{\ng{\\\"u}rtler2023benchmarking,\ntitle={Benchmarking Offline Reinforcement Learning on Real-Robot Hardware},\nauthor={Nico G{\\\"u}rtler and Sebastian Blaes and Pavel Kolev and Felix Widmaier and Manuel Wuthrich and Stefan Bauer and Bernhard Sch{\\\"o}lkopf and Georg Martius},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3k5CUGDLNdd}\n}", "github": "", "project": "", "reviewers": "RMbA;Yamp;KUtS;zztz", "pdf_size": 7122786, "recommendation": "6;6;8;8", "confidence": "5;4;4;2", "correctness": "4;4;4;4", "technical_novelty": "2;3;3;1", "empirical_novelty": "2;3;4;4", "wc_summary_paper": "53;106;72;99", "wc_strength_and_weaknesses": "123;162;98;61", "wc_clarity_quality_novelty_and_reproducibility": "88;8;49;41", "wc_summary_review": "152;71;27;59", "wc_review": "416;347;246;260", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "285;377;66;97", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 82.5, 21.242645786248 ], "wc_strength_and_weaknesses_avg": [ 111.0, 36.78994427829431 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.5, 28.464890654980568 ], "wc_summary_review_avg": [ 77.25, 46.05635135353212 ], "wc_review_avg": [ 317.25, 68.90346507977665 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 206.25, 129.38580872723253 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.6882472016116854, "corr_recommendation_correctness": 0.0, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2343096472995878496&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=3k5CUGDLNdd", "email": "tuebingen.mpg.de;is.tue.mpg.de;;is.tuebingen.mpg.de;mpg.tuebingen.de;kth.se;;tuebingen.mpg.de", "author_num": 8, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;KTH Royal Institute of Technology", "aff_unique_dep": "Intelligent Systems;", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.kth.se", "aff_unique_abbr": "MPI-IS;KTH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "Germany;Sweden" }, { "id": "3l36EPLnPzA", "title": "Efficient parametric approximations of neural net function space distance", "track": "main", "status": "Reject", "tldr": "We propose an efficient parametric approximation of neural network function space distance that is memory-efficient and can be successfully applied to continual learning and influence function estimation tasks.", "abstract": "It is often useful to compactly summarize important properties of a training dataset so that they can be used later without storing and/or iterating over the entire dataset. We consider a specific case of this: approximating the function space distance (FSD) over the training set, i.e. the average distance between the outputs of two neural networks. We propose an efficient approximation to FSD for ReLU neural networks based on approximating the architecture as a linear network with stochastic gating. Despite requiring only one parameter per unit of the network, our approach outcompetes other parametric approximations with larger memory requirements. Applied to continual learning, our parametric approximation is competitive with state-of-the-art nonparametric approximations which require storing many training examples. Furthermore, we show its efficacy in influence function estimation, allowing influence functions to be accurately estimated without iterating over the full dataset.", "keywords": "Function space distance;memory-efficiency;continual learning;influence function estimation", "primary_area": "", "supplementary_material": "/attachment/69a96e2a71adbe3fdda7b946c69e4414183801c4.zip", "author": "Nikita Dhawan;Sicong Huang;Juhan Bae;Roger Baker Grosse", "authorids": "~Nikita_Dhawan1;~Sicong_Huang1;~Juhan_Bae2;~Roger_Baker_Grosse1", "gender": ";M;M;M", "homepage": "https://www.cs.toronto.edu/~nikita/;http://www.cs.toronto.edu/~huang/;http://www.juhanbae.com/;http://www.cs.toronto.edu/~rgrosse/", "dblp": "255/4863;213/8048.html;158/9492;26/7058", "google_scholar": "4D_8pTEAAAAJ;https://scholar.google.ca/citations?hl=en;https://scholar.google.ca/citations?user=9RFr4usAAAAJ;xgQd1qgAAAAJ", "orcid": ";0009-0006-8791-0243;;", "linkedin": "nikita-dhawan-7a4a29149/;sicong-sheldon-huang-7a4292106/;;", "or_profile": "~Nikita_Dhawan1;~Sicong_Huang1;~Juhan_Bae2;~Roger_Baker_Grosse1", "aff": "Google Research;University of Toronto;University of Toronto;Vector Institute", "aff_domain": "google.com;cs.toronto.edu;cs.toronto.edu;vectorinstitute.ai", "position": "Intern;PhD student;PhD student;Faculty Member", "bibtex": "@misc{\ndhawan2023efficient,\ntitle={Efficient parametric approximations of neural net function space distance},\nauthor={Nikita Dhawan and Sicong Huang and Juhan Bae and Roger Baker Grosse},\nyear={2023},\nurl={https://openreview.net/forum?id=3l36EPLnPzA}\n}", "github": "", "project": "", "reviewers": "qY8f;i4TZ;QzNA;cRKz", "site": "https://openreview.net/forum?id=3l36EPLnPzA", "pdf_size": 581338, "recommendation": "5;5;6;8", "confidence": "3;4;3;2", "correctness": "2;3;3;4", "technical_novelty": "2;2;2;4", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "157;76;216;44", "wc_strength_and_weaknesses": "197;520;177;177", "wc_clarity_quality_novelty_and_reproducibility": "47;109;66;33", "wc_summary_review": "80;122;37;14", "wc_review": "481;827;496;268", "wc_reply_reviewers": "241;136;0;0", "wc_reply_authors": "792;759;953;500", "reply_reviewers": "1;1;0;0", "reply_authors": "2;1;2;1", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 123.25, 67.55507012800742 ], "wc_strength_and_weaknesses_avg": [ 267.75, 145.8653060189434 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.75, 28.630185119904482 ], "wc_summary_review_avg": [ 63.25, 41.372545244400904 ], "wc_review_avg": [ 518.0, 199.89622307587504 ], "wc_reply_reviewers_avg": [ 94.25, 101.29751971297225 ], "wc_reply_authors_avg": [ 751.0, 162.44229744743208 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8660254037844386, "corr_recommendation_correctness": 0.8660254037844386, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MNRWzT5ZI5EJ:scholar.google.com/&scioq=Efficient+parametric+approximations+of+neural+net+function+space+distance&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Google;University of Toronto;Vector Institute", "aff_unique_dep": "Google Research;;", "aff_unique_url": "https://research.google;https://www.utoronto.ca;https://vectorinstitute.ai/", "aff_unique_abbr": "Google Research;U of T;Vector Institute", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;Canada" }, { "id": "3l9mLzLa0BA", "title": "Signs in the Lottery: Structural Similarities Between Winning Tickets", "track": "main", "status": "Reject", "tldr": "Winning tickets show structural similarities when taking signs of connections into account.", "abstract": "Winning tickets are sparse subnetworks of a deep network that can be trained in isolation to the same performance as the full network. Winning tickets have been found in many different contexts, however their structural characteristics are not well understood. We propose that the signs of the connections in winning tickets play a crucial role. We back this claim by introducing a sign-based structural comparison\nmetric that allows to distinguish winning tickets from other sparse networks. We further analyze typical (signed) patterns in convolutional kernels of winning tickets and find structures that resemble patterns found in trained networks.", "keywords": "lottery ticket hypothesis;sparse networks;structural similarity;deep learning", "primary_area": "", "supplementary_material": "", "author": "Isabel Holler;Mats Leon Richter;Ulf Krumnack", "authorids": "~Isabel_Holler1;~Mats_Leon_Richter1;~Ulf_Krumnack1", "gender": ";M;", "homepage": ";;", "dblp": ";245/2691.html;15/778", "google_scholar": ";xtlV5SAAAAAJ;", "orcid": ";;0000-0003-1976-8186", "linkedin": ";https://de.linkedin.com/in/mats-richter-879609154;", "or_profile": "~Isabel_Holler1;~Mats_Leon_Richter1;~Ulf_Krumnack1", "aff": ";Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;Institute of Cognitive Science, Osnabr\u00fcck University, Universit\u00e4t Osnabr\u00fcck", "aff_domain": ";mila.umontreal.ca;ikw.uni-osnabrueck.de", "position": ";Postdoc;Postdoc", "bibtex": "@misc{\nholler2023signs,\ntitle={Signs in the Lottery: Structural Similarities Between Winning Tickets},\nauthor={Isabel Holler and Mats Leon Richter and Ulf Krumnack},\nyear={2023},\nurl={https://openreview.net/forum?id=3l9mLzLa0BA}\n}", "github": "", "project": "", "reviewers": "ghvk;nFr4;MFkC;UJuy", "site": "https://openreview.net/forum?id=3l9mLzLa0BA", "pdf_size": 401249, "recommendation": "1;3;3;5", "confidence": "4;4;3;4", "correctness": "2;3;3;2", "technical_novelty": "2;2;2;3", "empirical_novelty": "1;2;3;3", "wc_summary_paper": "74;83;23;69", "wc_strength_and_weaknesses": "371;266;97;471", "wc_clarity_quality_novelty_and_reproducibility": "108;31;23;113", "wc_summary_review": "73;16;39;35", "wc_review": "626;396;182;688", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 62.25, 23.209642392764263 ], "wc_strength_and_weaknesses_avg": [ 301.25, 138.42032907055236 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.75, 41.883021619744675 ], "wc_summary_review_avg": [ 40.75, 20.54720175595694 ], "wc_review_avg": [ 473.0, 200.15244190366502 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VRfn-cBm-xwJ:scholar.google.com/&scioq=Signs+in+the+Lottery:+Structural+Similarities+Between+Winning+Tickets&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of Montreal;Osnabr\u00fcck University", "aff_unique_dep": "Montreal Institute for Learning Algorithms;Institute of Cognitive Science", "aff_unique_url": "https://www.mila.quebec;https://www.uni-osnabrueck.de", "aff_unique_abbr": "MILA;Uni Osnabr\u00fcck", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Montreal;Osnabr\u00fcck", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;Germany" }, { "id": "3lH6Pc0Qeg2", "title": "Reconciling Adversarial Robustness with Accuracy via Randomized Weights", "track": "main", "status": "Withdraw", "tldr": "We study the trade-off between clean accuracy and robustness through randomized weights, design a novel adversarial training method based on Tylor series of randomized weights to improve both clean accuracy and robustness.", "abstract": "Recent years have seen a rapid growth of research on building more robust deep neural networks against adversarial examples. Among them, adversarial training has been shown to be one of the most effective approaches. To balance the robustness of adversarial examples and the accuracy of clean examples, a series of works design enhanced adversarial training methods to strike a trade-off between them with \\emph{deterministic} model parameters (i.e., weights). Noting that clean and adversarial examples are highly entangled with the network weights, we propose to study such a trade-off from another perspective, by \\emph{treating weights as random variables} in order to harvest the insights yielded from statistical learning theory. Inspired by recent advances of information-theoretic generalization error bound, we found that adversarial training over the randomized weight space can potentially narrow the generalization bound of both clean and adversarial data, and improve both adversarial robustness and clean accuracy simultaneously. Building upon such insights, we propose a novel adversarial training method via Taylor expansion in the hypothesis space of the randomized weights. With PGD, CW, and Auto Attacks, an extensive set of experiments demonstrate that our method further enhances adversarial training, boosting both robustness and clean accuracy.", "keywords": "adversarial robustness;adversarial training;randomized weights", "primary_area": "", "supplementary_material": "/attachment/428fcc50684be2d0dbc1157748db7c60457cbee9.zip", "author": "Gaojie Jin;Xinping Yi;Dengyu Wu;Ronghui Mu;Xiaowei Huang", "authorids": "~Gaojie_Jin1;~Xinping_Yi1;~Dengyu_Wu1;~Ronghui_Mu1;~Xiaowei_Huang1", "gender": "M;M;;;M", "homepage": "https://alexkael.github.io/;https://sites.google.com/site/xinpingyi00/;;;https://cgi.csc.liv.ac.uk/~xiaowei/", "dblp": "276/5476;95/10043.html;;;60/5414-1.html", "google_scholar": "n_cu7jwAAAAJ;wAcbI5kAAAAJ;;;https://scholar.google.co.uk/citations?user=X4fLCCIAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Gaojie_Jin1;~Xinping_Yi1;~Dengyu_Wu1;~Ronghui_Mu1;~Xiaowei_Huang1", "aff": "University of Liverpool;University of Liverpool;;;University of Liverpool", "aff_domain": "liverpool.ac.uk;liverpool.ac.uk;;;liverpool.ac.uk", "position": "PhD student;Assistant Professor;;;Full Professor", "bibtex": "@misc{\njin2023reconciling,\ntitle={Reconciling Adversarial Robustness with Accuracy via Randomized Weights},\nauthor={Gaojie Jin and Xinping Yi and Dengyu Wu and Ronghui Mu and Xiaowei Huang},\nyear={2023},\nurl={https://openreview.net/forum?id=3lH6Pc0Qeg2}\n}", "github": "", "project": "", "reviewers": "JVVe;sgLr;dYx7;FZov", "site": "https://openreview.net/forum?id=3lH6Pc0Qeg2", "pdf_size": 1046967, "recommendation": "1;3;5;5", "confidence": "4;4;3;3", "correctness": "1;2;3;3", "technical_novelty": "2;1;2;4", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "149;37;69;64", "wc_strength_and_weaknesses": "251;375;387;312", "wc_clarity_quality_novelty_and_reproducibility": "41;27;35;19", "wc_summary_review": "34;58;19;95", "wc_review": "475;497;510;490", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 1.6583123951777 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 1.0897247358851685 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 79.75, 41.79339062579154 ], "wc_strength_and_weaknesses_avg": [ 331.25, 54.38922227794768 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.5, 8.2915619758885 ], "wc_summary_review_avg": [ 51.5, 28.709754439911183 ], "wc_review_avg": [ 493.0, 12.62933094031509 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.9045340337332909, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:asGV3LbBQN8J:scholar.google.com/&scioq=Reconciling+Adversarial+Robustness+with+Accuracy+via+Randomized+Weights&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Liverpool", "aff_unique_dep": "", "aff_unique_url": "https://www.liverpool.ac.uk", "aff_unique_abbr": "Liv Uni", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "3lJ3pMuAwDT", "title": "AGREE: A Simple Aggregator of Detectors\u2019 Decisions", "track": "main", "status": "Withdraw", "tldr": "We propose a simple yet effective method to aggregate the decisions based on the soft-probability outputs of multiple trained detectors, possibly provided by a third party.", "abstract": "A simple yet effective method to aggregate the decisions based on the soft-probability outputs of multiple trained detectors, possibly provided by a third party, is introduced. We formally derive a mathematically sound theoretical framework, which is straightforward as it does not require further training of the given detectors, and modular, allowing existing (and future) detectors to be merged into a single one. As an application, we evaluate our framework by tackling the recently proposed problem of simultaneous adversarial examples detection, i.e. when the attacks at the evaluation time can be simultaneously crafted according to a variety of algorithms and objective loss functions. While each single detector tends to underperform or fail in the aforementioned attack scenario,\nour framework successfully aggregates the knowledge of the available detectors to guarantee a more reliable decision.\nWe validate our AGgregatoR of dEtectors' dEcisions (Agree) on popular datasets (e.g., CIFAR10 and SVHN) and we show that it consistently outperforms the state-of-the-art when simultaneous adversarial attacks are present at evaluation time.", "keywords": "AI Safety;Algorithms Evaluation;Deep Learning;Adversarial Examples", "primary_area": "", "supplementary_material": "/attachment/2dec8e30ba72d38ea6afc9d682887ad91f870be9.zip", "author": "Federica Granese;Marco Romanelli;Marine Picot;Francisco Messina;Pablo Piantanida", "authorids": "~Federica_Granese1;~Marco_Romanelli1;~Marine_Picot2;~Francisco_Messina1;~Pablo_Piantanida2", "gender": "F;;;M;M", "homepage": "https://fgranese.github.io/;;;;https://www.pablo-piantanida.org", "dblp": "251/6090;;;;44/1416", "google_scholar": "https://scholar.google.ca/citations?hl=it;;;pJ4zRlgAAAAJ;https://scholar.google.fr/citations?user=QyBEFv0AAAAJ", "orcid": "0000-0002-0084-521X;;;;", "linkedin": "federica-granese-201b311a0/;;;;pablo-piantanida-60a51bb5/?locale=en_US", "or_profile": "~Federica_Granese1;~Marco_Romanelli1;~Marine_Picot2;~Francisco_Messina1;~Pablo_Piantanida2", "aff": "\u00c9cole Polytechnique;;;;Mila - Quebec AI Institute ", "aff_domain": "polytechnique.edu;;;;mila.quebec", "position": "PhD student;;;;Full Professor", "bibtex": "@misc{\ngranese2023agree,\ntitle={{AGREE}: A Simple Aggregator of Detectors{\\textquoteright} Decisions},\nauthor={Federica Granese and Marco Romanelli and Marine Picot and Francisco Messina and Pablo Piantanida},\nyear={2023},\nurl={https://openreview.net/forum?id=3lJ3pMuAwDT}\n}", "github": "", "project": "", "reviewers": "Dk1X;Mpns;Zm4M;nZhm", "site": "https://openreview.net/forum?id=3lJ3pMuAwDT", "pdf_size": 1086743, "recommendation": "3;3;3;5", "confidence": "4;4;3;3", "correctness": "2;3;2;4", "technical_novelty": "1;2;2;2", "empirical_novelty": "1;3;2;2", "wc_summary_paper": "64;145;26;78", "wc_strength_and_weaknesses": "478;916;67;126", "wc_clarity_quality_novelty_and_reproducibility": "22;67;7;31", "wc_summary_review": "34;79;25;28", "wc_review": "598;1207;125;263", "wc_reply_reviewers": "0;0;0;140", "wc_reply_authors": "0;0;145;580", "reply_reviewers": "0;0;0;1", "reply_authors": "0;0;1;2", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 78.25, 42.97891925118639 ], "wc_strength_and_weaknesses_avg": [ 396.75, 338.47553456638485 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.75, 22.083647796503186 ], "wc_summary_review_avg": [ 41.5, 21.891779278989635 ], "wc_review_avg": [ 548.25, 417.41308975641863 ], "wc_reply_reviewers_avg": [ 35.0, 60.6217782649107 ], "wc_reply_authors_avg": [ 181.25, 237.70714650594752 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 0.75, 0.82915619758885 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1Ls3vquNAqkJ:scholar.google.com/&scioq=AGREE:+A+Simple+Aggregator+of+Detectors%E2%80%99+Decisions&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Ecole Polytechnique;Quebec AI Institute", "aff_unique_dep": ";AI Institute", "aff_unique_url": "https://www.polytechnique.edu;https://mila.quebec", "aff_unique_abbr": "X;Mila", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "France;Canada" }, { "id": "3leZITnUE9r", "title": "An Empirical Study of Metrics to Measure Representational Harms in Pre-Trained Language Models", "track": "main", "status": "Reject", "tldr": "Measuring implicit hate in pretrained language models", "abstract": "Large-scale Pre-Trained Language Models (PTLMs) capture knowledge from massive human-written data which contains latent societal biases and toxic contents. In this paper, we leverage the primary task of PTLMs, i.e. language modeling, and propose a new metric to quantify manifested implicit representational harms in PTLMs towards 13 marginalized demographics. Using this metric, we conducted an empirical analysis of 24 widely used PTLMs. Our analysis provides insights into the correlation between the proposed metric in this work and other related fairness metrics. We observe that our metric correlates with the majority of gender-specific fairness metrics in the literature. Through extensive experiments, we explore the connections between PTLMs architectures and representational harms across two dimensions: depth and width of the networks. We found that prioritizing depth over width, mitigates representational harms in some PTLMs.", "keywords": "Natural Language Processing;Fairness;Safety", "primary_area": "", "supplementary_material": "", "author": "Saghar Hosseini;Ahmed Hassan Awadallah;Hamid Palangi", "authorids": "~Saghar_Hosseini1;~Ahmed_Hassan_Awadallah1;~Hamid_Palangi1", "gender": "F;M;M", "homepage": "https://saghar-hosseini.com/;https://www.microsoft.com/en-us/research/people/hassanam/publications/;https://www.hamidpalangi.com/", "dblp": "125/5437;147/9148;01/963", "google_scholar": "XhTT61UAAAAJ;sNGk-9MAAAAJ;https://scholar.google.ca/citations?user=B1lAghgAAAAJ", "orcid": ";;", "linkedin": "sagharh/;ahmed-hassan-awadallah-a355a27/;", "or_profile": "~Saghar_Hosseini1;~Ahmed_Hassan_Awadallah1;~Hamid_Palangi1", "aff": "Microsoft;Microsoft Research;Google", "aff_domain": "microsoft.com;microsoft.com;google.com", "position": "Senior Researcher;Principal Researcher;Staff Research Scientist", "bibtex": "@misc{\nhosseini2023an,\ntitle={An Empirical Study of Metrics to Measure Representational Harms in Pre-Trained Language Models},\nauthor={Saghar Hosseini and Ahmed Hassan Awadallah and Hamid Palangi},\nyear={2023},\nurl={https://openreview.net/forum?id=3leZITnUE9r}\n}", "github": "", "project": "", "reviewers": "K8hw;hcJz;82Na;UeDp", "site": "https://openreview.net/forum?id=3leZITnUE9r", "pdf_size": 453389, "recommendation": "3;5;5;6", "confidence": "4;4;4;3", "correctness": "3;3;3;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "65;83;58;43", "wc_strength_and_weaknesses": "96;307;81;83", "wc_clarity_quality_novelty_and_reproducibility": "54;125;185;10", "wc_summary_review": "47;55;36;24", "wc_review": "262;570;360;160", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "477;789;744;278", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 62.25, 14.376630342329875 ], "wc_strength_and_weaknesses_avg": [ 141.75, 95.5807904340616 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 93.5, 66.8898348032046 ], "wc_summary_review_avg": [ 40.5, 11.672617529928752 ], "wc_review_avg": [ 338.0, 151.4661678395542 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 572.0, 207.44517347964498 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.6622661785325219, "corr_recommendation_correctness": 0.6622661785325219, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15367485438481362239&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1", "aff_unique_norm": "Microsoft;Google", "aff_unique_dep": "Microsoft Corporation;Google", "aff_unique_url": "https://www.microsoft.com;https://www.google.com", "aff_unique_abbr": "Microsoft;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "DiffEdit: Diffusion-based semantic image editing with mask guidance", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11648", "id": "3lge0p5o-M-", "poster": "/media/PosterPDFs/ICLR%202023/11648.png?t=1681561335.3666303", "openreview": "https://openreview.net/forum?id=3lge0p5o-M-", "slides": "https://iclr.cc/virtual/2023/poster/11648", "video": "https://iclr.cc/virtual/2023/poster/11648", "author_site": "Guillaume Couairon, Jakob Verbeek, Holger Schwenk, MATTHIEU CORD", "tldr": "", "abstract": "Image generation has recently seen tremendous advances, with diffusion models allowing to synthesize convincing images for a large variety of text prompts. In this article, we propose DiffEdit, a method to take advantage of text-conditioned diffusion models for the task of semantic image editing, where the goal is to edit an image based on a text query. Semantic image editing is an extension of image generation, with the additional constraint that the generated image should be as similar as possible to a given input image. \nCurrent editing methods based on diffusion models usually require to provide a mask, making the task much easier by treating it as a conditional inpainting task. In contrast, our main contribution is able to automatically generate a mask highlighting regions of the input image that need to be edited, by contrasting predictions of a diffusion model conditioned on different text prompts. Moreover, we rely on latent inference to preserve content in those regions of interest and show excellent synergies with mask-based diffusion. \nDiffEdit achieves state-of-the-art editing performance on ImageNet. In addition, we evaluate semantic image editing in more challenging settings, using images from the COCO dataset as well as text-based generated images.", "keywords": "computer vision;image editing;diffusion models", "primary_area": "", "supplementary_material": "", "author": "Guillaume Couairon;Jakob Verbeek;Holger Schwenk;Matthieu Cord", "authorids": "~Guillaume_Couairon1;~Jakob_Verbeek1;~Holger_Schwenk1;~Matthieu_Cord1", "gender": ";Not Specified;M;M", "homepage": ";http://lear.inrialpes.fr/~verbeek;https://ai.meta.com/people/271799079300984/holger-schwenk/;https://cord.isir.upmc.fr/", "dblp": ";v/JakobJVerbeek;92/6322.html;68/3117", "google_scholar": ";oZGA-rAAAAAJ;https://scholar.google.fr/citations?user=Ysjk8kkAAAAJ;SpAotDcAAAAJ", "orcid": ";0000-0003-1419-1816;;", "linkedin": ";jakob-verbeek-3b11aa14a/;;", "or_profile": "~Guillaume_Couairon1;~Jakob_Verbeek1;~Holger_Schwenk1;~Matthieu_Cord1", "aff": ";Meta;Meta Facebook;Sorbonne Universit\u00e9", "aff_domain": ";meta.com;fb.com;isir.upmc.fr", "position": ";Research Scientist;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\ncouairon2023diffedit,\ntitle={DiffEdit: Diffusion-based semantic image editing with mask guidance},\nauthor={Guillaume Couairon and Jakob Verbeek and Holger Schwenk and Matthieu Cord},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3lge0p5o-M-}\n}", "github": "", "project": "", "reviewers": "zQQb;hxgu;ZwKd;WyT5", "pdf_size": 20372644, "recommendation": "5;8;8;10", "confidence": "4;4;5;4", "correctness": "4;4;4;4", "technical_novelty": "1;3;2;4", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "70;73;142;70", "wc_strength_and_weaknesses": "189;167;192;161", "wc_clarity_quality_novelty_and_reproducibility": "20;81;83;47", "wc_summary_review": "27;60;67;16", "wc_review": "306;381;484;294", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "390;105;138;23", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.75, 1.7853571071357126 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 2.5, 1.118033988749895 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 88.75, 30.768287245149022 ], "wc_strength_and_weaknesses_avg": [ 177.25, 13.460590625971804 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.75, 26.070817018267764 ], "wc_summary_review_avg": [ 42.5, 21.5 ], "wc_review_avg": [ 366.25, 75.71781494470109 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 164.0, 137.03466714667496 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.08084520834544431, "corr_recommendation_correctness": 0.0, "gs_citation": 508, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2689039245260908418&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=3lge0p5o-M-", "email": ";meta.com;fb.com;isir.upmc.fr", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Meta;Sorbonne Universit\u00e9", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.sorbonne-universite.fr", "aff_unique_abbr": "Meta;Sorbonne U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;France" }, { "id": "3lr-ESFLUO", "title": "Toward Discovering Options that Achieve Faster Planning", "track": "main", "status": "Withdraw", "tldr": "We propose a new objective for option discovery that emphasizes the computational advantage of using options in planning.", "abstract": "We propose a new objective for option discovery that emphasizes the computational advantage of using options in planning. In a sequential machine, the speed of planning is proportional to the number of elementary operations used to achieve a good policy. For episodic tasks, the number of elementary operations depends on the number of options composed by the policy in an episode and the number of options being considered at each decision point. To reduce the amount of computation in planning, for a given set of episodic tasks and a given number of options, our objective prefers options with which it is possible to achieve a high return by composing few options, and also prefers a smaller set of options to choose from at each decision point. We develop an algorithm that optimizes the proposed objective. In a variant of the classic four-room domain, we show that 1) a higher objective value is typically associated with fewer number of elementary planning operations used by the option-value iteration algorithm to obtain a near-optimal value function, 2) our algorithm achieves an objective value that matches it achieved by two human-designed options 3) the amount of computation used by option-value iteration with options discovered by our algorithm matches those with the human-designed options, 4) the options produced by our algorithm also make intuitive sense--they seem to move to and terminate at the entrance of each room.", "keywords": "Option Discovery;Temporal Abstraction;Planning;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/5068392e98e7e588268b1da424ce3007128456aa.zip", "author": "Yi Wan;Richard S. Sutton", "authorids": "~Yi_Wan1;~Richard_S._Sutton1", "gender": "M;M", "homepage": "https://sites.google.com/view/yi-wan/;http://richsutton.com", "dblp": ";48/6070", "google_scholar": "zMVstroAAAAJ;https://scholar.google.ca/citations?user=6m4wv6gAAAAJ", "orcid": ";0000-0002-3679-3415", "linkedin": ";richard-sutton-0653545/", "or_profile": "~Yi_Wan1;~Richard_S_Sutton1", "aff": "University of Alberta;Google DeepMind", "aff_domain": "ualberta.ca;deepmind.com", "position": "PhD student;Research Scientist", "bibtex": "@misc{\nwan2023toward,\ntitle={Toward Discovering Options that Achieve Faster Planning},\nauthor={Yi Wan and Richard S. Sutton},\nyear={2023},\nurl={https://openreview.net/forum?id=3lr-ESFLUO}\n}", "github": "", "project": "", "reviewers": "iSDF;3Dk2;bgW3;wJtH", "site": "https://openreview.net/forum?id=3lr-ESFLUO", "pdf_size": 1171227, "recommendation": "3;3;5;6", "confidence": "3;4;3;4", "correctness": "3;3;4;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "54;74;110;127", "wc_strength_and_weaknesses": "142;223;223;181", "wc_clarity_quality_novelty_and_reproducibility": "112;103;27;180", "wc_summary_review": "39;50;26;31", "wc_review": "347;450;386;519", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 91.25, 28.78693279944913 ], "wc_strength_and_weaknesses_avg": [ 192.25, 33.699962907991456 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 105.5, 54.22407214512757 ], "wc_summary_review_avg": [ 36.5, 9.069178573608527 ], "wc_review_avg": [ 425.5, 65.31653695657785 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.19245008972987526, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2864707952530250271&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Alberta;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.ualberta.ca;https://deepmind.com", "aff_unique_abbr": "UAlberta;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;United Kingdom" }, { "id": "3ly9cG9Ql9h", "title": "What does a platypus look like? Generating customized prompts for zero-shot image classification", "track": "main", "status": "Reject", "tldr": "Using GPT-3 to generate better CLIP prompts", "abstract": "Open vocabulary models are a promising new paradigm for image classification. Unlike traditional classification models, open vocabulary models classify among any arbitrary set of categories specified with natural language during inference. This natural language, called \"prompts\", typically consists of a set of hand-written templates (e.g., \"a photo of a {}\u201d) which are completed with each of the category names. This work introduces a simple method to generate higher accuracy prompts, without relying on any explicit knowledge of the task domain and with far fewer hand-constructed sentences. To achieve this, we combine open vocabulary models with large language models (LLMs) to create Customized Prompts via Language models (CuPL, pronounced \"couple\"). In particular, we leverage the knowledge contained in LLMs in order to generate many descriptive sentences that are customized for each object category. We find that this straightforward and general approach improves accuracy on a range of zero-shot image classification benchmarks, including over one percentage point gain on ImageNet. Finally, this simple baseline requires no additional training and remains completely zero-shot.", "keywords": "zero-shot;image classification;prompts;open vocabulary models", "primary_area": "", "supplementary_material": "/attachment/9db19b1bf5b303aca4b7a6c7766a614b22017fce.zip", "author": "Sarah M Pratt;Rosanne Liu;Ali Farhadi", "authorids": "~Sarah_M_Pratt1;~Rosanne_Liu1;~Ali_Farhadi3", "gender": "F;F;M", "homepage": ";https://rosanneliu.com/;https://homes.cs.washington.edu/~ali/", "dblp": ";218/6453;37/5826", "google_scholar": ";_GzrRGwAAAAJ;jeOFRDsAAAAJ", "orcid": ";;", "linkedin": "sarahpratt;;", "or_profile": "~Sarah_M_Pratt1;~Rosanne_Liu1;~Ali_Farhadi3", "aff": "University of Washington;ML Collective;University of Washington", "aff_domain": "uw.edu;mlcollective.org;cs.uw.edu", "position": "PhD student;Researcher;Full Professor", "bibtex": "@misc{\npratt2023what,\ntitle={What does a platypus look like? Generating customized prompts for zero-shot image classification},\nauthor={Sarah M Pratt and Rosanne Liu and Ali Farhadi},\nyear={2023},\nurl={https://openreview.net/forum?id=3ly9cG9Ql9h}\n}", "github": "", "project": "", "reviewers": "DGAE;ECmW;UY2k;LiMR", "site": "https://openreview.net/forum?id=3ly9cG9Ql9h", "pdf_size": 3920580, "recommendation": "3;3;6;8", "confidence": "4;5;4;4", "correctness": "4;3;4;4", "technical_novelty": "2;1;3;2", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "38;69;90;45", "wc_strength_and_weaknesses": "324;355;219;443", "wc_clarity_quality_novelty_and_reproducibility": "37;51;115;48", "wc_summary_review": "31;41;41;127", "wc_review": "430;516;465;663", "wc_reply_reviewers": "0;216;0;0", "wc_reply_authors": "434;962;622;536", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "recommendation_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 60.5, 20.54872258803452 ], "wc_strength_and_weaknesses_avg": [ 335.25, 80.06364655697367 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.75, 30.6135182558294 ], "wc_summary_review_avg": [ 60.0, 38.897300677553446 ], "wc_review_avg": [ 518.5, 88.85521931771932 ], "wc_reply_reviewers_avg": [ 54.0, 93.53074360871938 ], "wc_reply_authors_avg": [ 638.5, 198.27443102931855 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5443310539518174, "corr_recommendation_correctness": 0.5443310539518174, "gs_citation": 286, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3104852545770202356&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Washington;ML Collective", "aff_unique_dep": ";", "aff_unique_url": "https://www.washington.edu;", "aff_unique_abbr": "UW;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States;" }, { "title": "DINO: DETR with Improved DeNoising Anchor Boxes for End-to-End Object Detection", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11884", "id": "3mRwyG5one", "poster": "/media/PosterPDFs/ICLR%202023/11884.png?t=1682497405.6270707", "openreview": "https://openreview.net/forum?id=3mRwyG5one", "slides": "https://iclr.cc/virtual/2023/poster/11884", "video": "https://iclr.cc/virtual/2023/poster/11884", "author_site": "Hao Zhang, Feng Li, Shilong Liu, Lei Zhang, Hang Su, Jun Zhu, Lionel Ni, Heung-Yeung Shum", "tldr": "We present a state-of-the-art end-to-end object detector, the first DETR-like model on top of the COCO detection leader board.", "abstract": "We present DINO (DETR with Improved deNoising anchOr boxes), a strong end-to-end object detector. DINO improves over previous DETR-like models in performance and efficiency by using a contrastive way for denoising training, a look forward twice scheme for box prediction, and a mixed query selection method for anchor initialization. DINO achieves 49.4AP in 12 epochs and 51.3AP in 24 epochs on COCO with a ResNet-50 backbone and multi-scale features, yielding a significant improvement of +6.0AP and +2.7AP, respectively, compared to DN-DETR, the previous best DETR-like model. DINO scales well in both model size and data size. Without bells and whistles, after pre-training on the Objects365 dataset with a SwinL backbone, DINO obtains the best results on both COCO val2017 (63.2AP) and test-dev (63.3AP) with model size under 1 billion parameters. Compared to other models on the leaderboard, DINO significantly reduces its model size and pre-training data size while achieving better results. The code will be available.", "keywords": "Object Detection;Detection Transformer;End-to-End Detector", "primary_area": "", "supplementary_material": "", "author": "Hao Zhang;Feng Li;Shilong Liu;Lei Zhang;Hang Su;Jun Zhu;Lionel Ni;Heung-Yeung Shum", "authorids": "~Hao_Zhang39;~Feng_Li9;~Shilong_Liu1;~Lei_Zhang23;~Hang_Su3;~Jun_Zhu2;~Lionel_Ni1;~Heung-Yeung_Shum1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://haozhang534.github.io/;https://fengli-ust.github.io/;https://www.lsl.zone;http://ml.cs.tsinghua.edu.cn/~jun;http://repository.ust.hk/ir/AuthorProfile/ni-lionel;https://www.microsoft.com/en-us/research/people/hshum/;;https://www.leizhang.org/", "dblp": "55/2270-97;92/2954-40.html;;50/2644-1;n/LionelMNi;;26/5371-6;z/LeiZhang", "google_scholar": "B8hPxMQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;nkSVY3MAAAAJ;axsP38wAAAAJ;https://scholar.google.com.tw/citations?user=OzMYwDIAAAAJ;;dxN1_X0AAAAJ;fIlGZToAAAAJ", "orcid": ";;;;;;;", "linkedin": "hao-zhang-3b09b8196/;;;;;;;", "or_profile": "~Hao_Zhang39;~Feng_Li9;~Shilong_Liu1;~Jun_Zhu2;~Lionel_Ni1;~Heung-Yeung_Shum1;~Hang_Su2;~Lei_Zhang1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Microsoft Research, Redmond;Tsinghua University;Hong Kong University of Science and Technology;;Tsinghua University;International Digital Economy Academy", "aff_domain": "ust.hk;ust.hk;microsoft.com;mail.tsinghua.edu.cn;ust.hk;;tsinghua.edu.cn;idea.edu.cn", "position": "PhD student;PhD student;Research Intern;Professor;Full Professor;;Associate Professor;Chief Scientist", "bibtex": "@inproceedings{\nzhang2023dino,\ntitle={{DINO}: {DETR} with Improved DeNoising Anchor Boxes for End-to-End Object Detection},\nauthor={Hao Zhang and Feng Li and Shilong Liu and Lei Zhang and Hang Su and Jun Zhu and Lionel Ni and Heung-Yeung Shum},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3mRwyG5one}\n}", "github": "", "project": "", "reviewers": "7vU5;nKUz;tCi7;dnJ7;eqZC", "pdf_size": 1894199, "recommendation": "5;6;8;8;8", "confidence": "4;3;4;5;4", "correctness": "3;4;4;4;3", "technical_novelty": "2;4;3;2;3", "empirical_novelty": "2;4;3;0;3", "wc_summary_paper": "226;49;55;72;104", "wc_strength_and_weaknesses": "187;94;122;98;357", "wc_clarity_quality_novelty_and_reproducibility": "82;24;174;35;51", "wc_summary_review": "46;20;22;13;28", "wc_review": "541;187;373;218;540", "wc_reply_reviewers": "0;0;0;16;0", "wc_reply_authors": "513;334;789;336;955", "reply_reviewers": "0;0;0;1;0", "reply_authors": "2;2;1;3;2", "recommendation_avg": [ 7.0, 1.2649110640673518 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "correctness_avg": [ 3.6, 0.4898979485566356 ], "technical_novelty_avg": [ 2.8, 0.7483314773547882 ], "empirical_novelty_avg": [ 2.4, 1.3564659966250536 ], "wc_summary_paper_avg": [ 101.2, 65.26070793364104 ], "wc_strength_and_weaknesses_avg": [ 171.6, 98.49791875973827 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.2, 54.057006946370976 ], "wc_summary_review_avg": [ 25.8, 11.178550889985697 ], "wc_review_avg": [ 371.8, 151.47725901929965 ], "wc_reply_reviewers_avg": [ 3.2, 6.4 ], "wc_reply_authors_avg": [ 585.4, 248.47583383500293 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.49999999999999994, "corr_recommendation_correctness": 0.3227486121839514, "gs_citation": 1881, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7039269942427062691&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=3mRwyG5one", "email": "ust.hk;ust.hk;microsoft.com;mail.tsinghua.edu.cn;ust.hk;;tsinghua.edu.cn;idea.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;2;0;2;3", "aff_unique_norm": "Hong Kong University of Science and Technology;Microsoft;Tsinghua University;International Digital Economy Academy", "aff_unique_dep": ";Microsoft Research;;", "aff_unique_url": "https://www.ust.hk;https://www.microsoft.com/en-us/research;https://www.tsinghua.edu.cn;", "aff_unique_abbr": "HKUST;MSR;THU;", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Hong Kong SAR;Redmond;", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;United States;" }, { "id": "3m_awcLrg8E", "title": "Token Turing Machines", "track": "main", "status": "Withdraw", "tldr": "Token Turing Machines (TTM) is a sequential, autoregressive transformer model with memory for real-world sequential decision making, modernizing Neural Turing Machines.", "abstract": "We propose Token Turing Machines (TTM), a sequential, autoregressive Transformer model with memory for real-world sequential decision making. Our model is inspired by the seminal Neural Turing Machine, and has an external memory consisting of a set of tokens which summarise the previous history. This memory is efficiently addressed, read and written using a Transformer as the processing unit/controller at each step. The model's memory module ensures that a new observation will only be processed with the contents of the memory (and not the entire history), meaning that it can efficiently process long sequences with a bounded computational cost at each step. We show that TTM outperforms other alternatives, such as other Transformer models designed for long sequences and recurrent neural networks, on two real-world sequential decision making tasks: online temporal activity localization from videos and vision-based robot action policy learning.", "keywords": "memory;Neural Turing Machine;robot learning;sequence", "primary_area": "", "supplementary_material": "", "author": "Michael S Ryoo;Keerthana Gopalakrishnan;Kumara Kahatapitiya;Ted Xiao;Kanishka Rao;Austin Stone;Yao Lu;Julian Ibarz;Anurag Arnab", "authorids": "~Michael_S_Ryoo1;~Keerthana_Gopalakrishnan1;~Kumara_Kahatapitiya1;~Ted_Xiao1;~Kanishka_Rao1;~Austin_Stone1;~Yao_Lu13;~Julian_Ibarz1;~Anurag_Arnab1", "gender": "M;F;M;M;;;;;", "homepage": "http://michaelryoo.com/;https://keerthanapg.com;https://www3.cs.stonybrook.edu/~kkahatapitiy/;https://www.tedxiao.me;https://research.google/people/KanishkaRao/;;;;", "dblp": "r/MichaelSRyoo;;227/5409;198/0598;;202/1823;26/5662-6;;", "google_scholar": "vcw0TJIAAAAJ;;https://scholar.google.com/citations?hl=en;;;IU4ZllQAAAAJ;OI7zFmwAAAAJ;;", "orcid": ";;;;;;;;", "linkedin": ";;;;;austin-charles-stone-1ba33b138/;;;", "or_profile": "~Michael_S_Ryoo1;~Keerthana_Gopalakrishnan1;~Kumara_Kahatapitiya1;~Ted_Xiao1;~Kanishka_Rao1;~Austin_Stone1;~Yao_Lu13;~Julian_Ibarz1;~Anurag_Arnab1", "aff": "Google DeepMind;Research, Google;Google DeepMind;;;Google;Google;;", "aff_domain": "google.com;research.google.com;google.com;;;google.com;google.com;;", "position": "Research Scientist;Researcher;Student Researcher;;;Research Engineer;Researcher;;", "bibtex": "@misc{\nryoo2023token,\ntitle={Token Turing Machines},\nauthor={Michael S Ryoo and Keerthana Gopalakrishnan and Kumara Kahatapitiya and Ted Xiao and Kanishka Rao and Austin Stone and Yao Lu and Julian Ibarz and Anurag Arnab},\nyear={2023},\nurl={https://openreview.net/forum?id=3m_awcLrg8E}\n}", "github": "", "project": "", "reviewers": "tu2F;6R2c;QnKS;wbGg", "site": "https://openreview.net/forum?id=3m_awcLrg8E", "pdf_size": 24248085, "recommendation": "3;3;5;6", "confidence": "4;4;4;4", "correctness": "2;2;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "35;89;81;85", "wc_strength_and_weaknesses": "257;526;61;500", "wc_clarity_quality_novelty_and_reproducibility": "88;130;142;48", "wc_summary_review": "36;48;49;43", "wc_review": "416;793;333;676", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 72.5, 21.834605560898048 ], "wc_strength_and_weaknesses_avg": [ 336.0, 190.30370464076626 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 102.0, 37.067505985701274 ], "wc_summary_review_avg": [ 44.0, 5.1478150704935 ], "wc_review_avg": [ 554.5, 187.00868963767434 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9622504486493761, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18255469387018596277&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "3mji6eUxzY", "title": "Cortically motivated recurrence enables task extrapolation", "track": "main", "status": "Reject", "tldr": "Biologically inspired recurrent network solves (easy and) hard instances of a task with (less and) more iterations.", "abstract": "Feedforward deep neural networks have become the standard class of models in the field of computer vision. Yet, they possess a striking difference relative to their biological counterparts which predominantly perform \u201crecurrent\u201d computations. Why do biological neurons evolve to employ recurrence pervasively? In this paper, we show that a recurrent network is able to flexibly adapt its computational budget during inference and generalize within-task across difficulties. Simultaneously in this study, we contribute a recurrent module we call LocRNN that is designed based on a prior computational model of local recurrent intracortical connections in primates to support such dynamic task extrapolation. LocRNN learns highly accurate solutions to the challenging visual reasoning problems of Mazes and PathFinder that we use here. More importantly, it is able to flexibly use less or more recurrent iterations during inference to zero-shot generalize to less- and more difficult instantiations of each task without requiring extra training data, a potential functional advantage of recurrence that biological visual systems capitalize on. Feedforward networks on the other hand with their fixed computational graphs only partially exhibit this trend, potentially owing to image-level similarities across difficulties. We also posit an intriguing tradeoff between recurrent networks\u2019 representational capacity and their stability in the recurrent state space. Our work encourages further study of the role of recurrence in deep learning models \u2013 especially from the context of out-of-distribution generalization & task extrapolation \u2013 and their properties of task performance and stability.", "keywords": "cognitive science;recurrent neural networks;task extrapolation;out of distribution generalization;visual routines;path integration", "primary_area": "", "supplementary_material": "", "author": "Vijay Veerabadran;Yuan Tang;Ritik Raina;Virginia R. de Sa", "authorids": "~Vijay_Veerabadran1;yutang@ucsd.edu;~Ritik_Raina1;~Virginia_R._de_Sa2", "gender": "M;;M;", "homepage": "https://vijayvee.github.io;;https://rainarit.github.io;", "dblp": "220/4325;;;", "google_scholar": "https://scholar.google.co.in/citations?user=I6b38LoAAAAJ;;4lUt1VsAAAAJ;", "orcid": ";;;", "linkedin": "vijayvee/;;;", "or_profile": "~Vijay_Veerabadran1;yutang@ucsd.edu;~Ritik_Raina1;~Virginia_R._de_Sa2", "aff": "University of California, San Diego;;University of California, San Diego;", "aff_domain": "ucsd.edu;;ucsd.edu;", "position": "PhD student;;Researcher;", "bibtex": "@misc{\nveerabadran2023cortically,\ntitle={Cortically motivated recurrence enables task extrapolation},\nauthor={Vijay Veerabadran and Yuan Tang and Ritik Raina and Virginia R. de Sa},\nyear={2023},\nurl={https://openreview.net/forum?id=3mji6eUxzY}\n}", "github": "", "project": "", "reviewers": "j482;3CSc;vnpB;i8Ub", "site": "https://openreview.net/forum?id=3mji6eUxzY", "pdf_size": 2347688, "recommendation": "3;6;6;6", "confidence": "4;4;3;3", "correctness": "3;3;4;3", "technical_novelty": "2;4;3;3", "empirical_novelty": "3;4;2;3", "wc_summary_paper": "219;47;79;194", "wc_strength_and_weaknesses": "602;296;287;194", "wc_clarity_quality_novelty_and_reproducibility": "33;75;166;143", "wc_summary_review": "84;43;48;89", "wc_review": "938;461;580;620", "wc_reply_reviewers": "380;23;21;0", "wc_reply_authors": "1319;822;480;607", "reply_reviewers": "1;1;1;0", "reply_authors": "3;2;1;1", "recommendation_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 134.75, 73.17231375322227 ], "wc_strength_and_weaknesses_avg": [ 344.75, 153.79755362163598 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 104.25, 53.0253477122027 ], "wc_summary_review_avg": [ 66.0, 20.65187642806338 ], "wc_review_avg": [ 649.75, 176.39781036055976 ], "wc_reply_reviewers_avg": [ 106.0, 158.4503076677354 ], "wc_reply_authors_avg": [ 807.0, 319.88200949725194 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7P_xXGLJzGAJ:scholar.google.com/&scioq=Cortically+motivated+recurrence+enables+task+extrapolation&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Interneurons accelerate learning dynamics in recurrent neural networks for statistical adaptation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11104", "id": "3mlITJRYYbs", "poster": "/media/PosterPDFs/ICLR%202023/11104.png?t=1681939287.9499688", "openreview": "https://openreview.net/forum?id=3mlITJRYYbs", "slides": "https://iclr.cc/virtual/2023/poster/11104", "video": "https://iclr.cc/virtual/2023/poster/11104", "author_site": "David Lipshutz, Cengiz Pehlevan, Dmitri Chklovskii", "tldr": "We show that adding interneurons to a recurrent neural network for statistical whitening accelerates the learning dynamics", "abstract": "Early sensory systems in the brain rapidly adapt to fluctuating input statistics, which requires recurrent communication between neurons. Mechanistically, such recurrent communication is often indirect and mediated by local interneurons. In this work, we explore the computational benefits of mediating recurrent communication via interneurons compared with direct recurrent connections. To this end, we consider two mathematically tractable recurrent neural networks that statistically whiten their inputs --- one with direct recurrent connections and the other with interneurons that mediate recurrent communication. By analyzing the corresponding continuous synaptic dynamics and numerically simulating the networks, we show that the network with interneurons is more robust to initialization than the network with direct recurrent connections in the sense that the convergence time for the synaptic dynamics in the network with interneurons (resp. direct recurrent connections) scales logarithmically (resp. linearly) with the spectrum of their initialization. Our results suggest that interneurons are computationally useful for rapid adaptation to changing input statistics. Interestingly, the network with interneurons is an overparameterized solution of the whitening objective for the network with direct recurrent connections, so our results can be viewed as a recurrent neural network analogue of the implicit acceleration phenomenon observed in overparameterized feedforward linear networks.", "keywords": "Interneurons;recurrent neural networks;gradient flows;implicit acceleration;statistical whitening", "primary_area": "", "supplementary_material": "/attachment/504b3487de22416a3e4d99a6537ca0b7a187f2c7.zip", "author": "David Lipshutz;Cengiz Pehlevan;Dmitri Chklovskii", "authorids": "~David_Lipshutz1;~Cengiz_Pehlevan2;~Dmitri_Chklovskii1", "gender": "M;;", "homepage": "https://lipshutzlab.com;https://pehlevan.seas.harvard.edu/;", "dblp": "173/4650;145/3480;06/2796", "google_scholar": "XeWdtXcAAAAJ;veDLTPEAAAAJ;7Bgb5TUAAAAJ", "orcid": "0000-0001-9347-8326;0000-0001-9767-6063;", "linkedin": ";;", "or_profile": "~David_Lipshutz1;~Cengiz_Pehlevan2;~Dmitri_Chklovskii1", "aff": "Flatiron Institute;School of Engineering and Applied Sciences, Harvard University;Simons Foundation", "aff_domain": "flatironinstitute.org;seas.harvard.edu;simonsfoundation.org", "position": "Associate Research Scientist;Assistant Professor;Group Leader", "bibtex": "@inproceedings{\nlipshutz2023interneurons,\ntitle={Interneurons accelerate learning dynamics in recurrent neural networks for statistical adaptation},\nauthor={David Lipshutz and Cengiz Pehlevan and Dmitri Chklovskii},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3mlITJRYYbs}\n}", "github": "", "project": "", "reviewers": "JKUS;Meay;qLnU;hapb", "pdf_size": 7033292, "recommendation": "5;6;8;8", "confidence": "3;2;4;3", "correctness": "3;4;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "0;2;0;3", "wc_summary_paper": "157;55;30;45", "wc_strength_and_weaknesses": "423;98;182;47", "wc_clarity_quality_novelty_and_reproducibility": "202;121;35;98", "wc_summary_review": "102;70;18;24", "wc_review": "884;344;265;214", "wc_reply_reviewers": "0;0;0;24", "wc_reply_authors": "553;353;295;115", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 71.75, 50.016872153304426 ], "wc_strength_and_weaknesses_avg": [ 187.5, 144.2575821230898 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 114.0, 59.77039400907442 ], "wc_summary_review_avg": [ 53.5, 34.478254016118626 ], "wc_review_avg": [ 426.75, 268.0255351640959 ], "wc_reply_reviewers_avg": [ 6.0, 10.392304845413264 ], "wc_reply_authors_avg": [ 329.0, 156.28819533157326 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5443310539518174, "corr_recommendation_correctness": 0.19245008972987526, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10430071131364534927&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=3mlITJRYYbs", "email": "flatironinstitute.org;seas.harvard.edu;simonsfoundation.org", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Flatiron Institute;Harvard University;Simons Foundation", "aff_unique_dep": ";School of Engineering and Applied Sciences;", "aff_unique_url": "https://flatironinstitute.org;https://www.harvard.edu;https://www.simonsfoundation.org", "aff_unique_abbr": "Flatiron;Harvard;Simons Foundation", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Stochastic Differentially Private and Fair Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12184", "id": "3nM5uhPlfv6", "poster": "", "openreview": "https://openreview.net/forum?id=3nM5uhPlfv6", "slides": "https://iclr.cc/virtual/2023/poster/12184", "video": "https://iclr.cc/virtual/2023/poster/12184", "author_site": "Andrew Lowy, Devansh Gupta, Meisam Razaviyayn", "tldr": "The first efficient differentially private fair learning algorithm that is guaranteed to converge, even when stochastic minibatches of data are used in each iteration of training. ", "abstract": "Machine learning models are increasingly used in high-stakes decision-making systems. In such applications, a major concern is that these models sometimes discriminate against certain demographic groups such as individuals with certain race, gender, or age. Another major concern in these applications is the violation of the privacy of users. While fair learning algorithms have been developed to mitigate discrimination issues, these algorithms can still leak sensitive information, such as individuals\u2019 health or financial records. Utilizing the notion of differential privacy (DP), prior works aimed at developing learning algorithms that are both private and fair. However, existing algorithms for DP fair learning are either not guaranteed to converge or require full batch of data in each iteration of the algorithm to converge. In this paper, we provide the first stochastic differentially private algorithm for fair learning that is guaranteed to converge. Here, the term \u201cstochastic\" refers to the fact that our proposed algorithm converges even when minibatches of data are used at each iteration (i.e. stochastic optimization). Our framework is flexible enough to permit different fairness notions, including demographic parity and equalized odds. In addition, our algorithm can be applied to non-binary classification tasks with multiple (non-binary) sensitive attributes. As a byproduct of our convergence analysis, we provide the first utility guarantee for a DP algorithm for solving nonconvex-strongly concave min-max problems. Our numerical experiments show that the proposed algorithm consistently offers significant performance gains over the state-of-the-art baselines, and can be applied to larger scale problems with non-binary target/sensitive attributes.", "keywords": "algorithmic fairness;differential privacy;private fair learning;stochastic optimization", "primary_area": "", "supplementary_material": "/attachment/4d4ff10a46f8d91c0b19b3aed014f32f890cc2c3.zip", "author": "Andrew Lowy;Devansh Gupta;Meisam Razaviyayn", "authorids": "~Andrew_Lowy1;~Devansh_Gupta1;~Meisam_Razaviyayn1", "gender": ";M;M", "homepage": "https://sites.google.com/view/andrewlowy;;https://sites.usc.edu/razaviyayn/", "dblp": "285/5314;;43/8577", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.co.in/citations?view_op=list_works;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";devansh-g-784842106/;", "or_profile": "~Andrew_Lowy1;~Devansh_Gupta1;~Meisam_Razaviyayn1", "aff": "University of Southern California;Indraprastha Institute of Information Technology, Delhi;Google", "aff_domain": "usc.edu;iiitd.ac.in;google.com", "position": "PhD student;Undergrad student;Researcher", "bibtex": "@inproceedings{\nlowy2023stochastic,\ntitle={Stochastic Differentially Private and Fair Learning},\nauthor={Andrew Lowy and Devansh Gupta and Meisam Razaviyayn},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3nM5uhPlfv6}\n}", "github": "", "project": "", "reviewers": "8e2X;oW3z;yWF7;bxYf", "pdf_size": 2207841, "recommendation": "3;5;6;8", "confidence": "4;4;3;3", "correctness": "3;3;3;3", "technical_novelty": "2;3;2;4", "empirical_novelty": "3;2;3;2", "wc_summary_paper": "26;95;161;16", "wc_strength_and_weaknesses": "426;397;476;239", "wc_clarity_quality_novelty_and_reproducibility": "36;22;28;71", "wc_summary_review": "9;39;184;55", "wc_review": "497;553;849;381", "wc_reply_reviewers": "558;0;0;21", "wc_reply_authors": "2700;815;1294;657", "reply_reviewers": "1;0;0;1", "reply_authors": "5;2;2;2", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 74.5, 58.47435335255962 ], "wc_strength_and_weaknesses_avg": [ 384.5, 88.62984824538515 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.25, 18.9917745353087 ], "wc_summary_review_avg": [ 71.75, 66.87815413122584 ], "wc_review_avg": [ 570.0, 172.61228229763952 ], "wc_reply_reviewers_avg": [ 144.75, 238.7439789816698 ], "wc_reply_authors_avg": [ 1366.5, 804.8324359765826 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.8320502943378437, "corr_recommendation_correctness": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4550500883208591338&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=3nM5uhPlfv6", "email": "usc.edu;iiitd.ac.in;google.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Southern California;Indraprastha Institute of Information Technology;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.usc.edu;http://www.iiitd.ac.in;https://www.google.com", "aff_unique_abbr": "USC;IIIT-D;Google", "aff_campus_unique_index": "0;1;2", "aff_campus_unique": "Los Angeles;Delhi;Mountain View", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;India" }, { "id": "3nfMmcditWu", "title": "Breaking the Curse of Dimensionality for Parametric Elliptic PDEs", "track": "main", "status": "Reject", "tldr": "", "abstract": "Motivated by recent empirical success, we examine how neural network-based ansatz classes can break the curse of dimensionality for high-dimensional, non-linear elliptic partial differential equations (PDEs) with variational structure. The high-dimensionality of the PDEs can either be induced through a high-dimensional physical domain or a high-dimensional parameter space. The latter include parametric right-hand sides, parametric domains, and material constants. Our main result shows that any scheme, that computes neural network based $W^{1,p}$-approximations, leverages the extraordinary approximation capabilities of neural networks and, thus, is able to beat the curse of dimensionality if the ground truth solution is smooth or possesses Barron regularity. Popular examples of $W^{1,p}$-convergent schemes include, e.g., the Deep Ritz Method and physics-informed neural networks. We present numerical experiments supporting our theoretical findings.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/af278d256a40e41157fbc4468f01be8d83229647.zip", "author": "Marius Zeinhofer;Alex Kaltenbach", "authorids": "~Marius_Zeinhofer1;~Alex_Kaltenbach1", "gender": "M;M", "homepage": "https://math.ethz.ch/sam/the-institute/people.ethz_search.html?u=mzeinhofer;https://alexkaltenbach.github.io", "dblp": "255/5011;", "google_scholar": ";hEYc25gAAAAJ", "orcid": ";0000-0001-6478-7963", "linkedin": ";alex-kaltenbach-6646911bb/", "or_profile": "~Marius_Zeinhofer1;~Alex_Kaltenbach1", "aff": "Simula Research Laboratory;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_domain": "simula.no;uni-freiburg.de", "position": "Postdoc;Postdoc", "bibtex": "@misc{\nzeinhofer2023breaking,\ntitle={Breaking the Curse of Dimensionality for Parametric Elliptic {PDE}s},\nauthor={Marius Zeinhofer and Alex Kaltenbach},\nyear={2023},\nurl={https://openreview.net/forum?id=3nfMmcditWu}\n}", "github": "", "project": "", "reviewers": "jdZY;bRj3;iQb8", "site": "https://openreview.net/forum?id=3nfMmcditWu", "pdf_size": 465469, "recommendation": "1;3;10", "confidence": "5;3;1", "correctness": "3;4;4", "technical_novelty": "2;1;4", "empirical_novelty": "1;0;0", "wc_summary_paper": "84;26;149", "wc_strength_and_weaknesses": "103;232;151", "wc_clarity_quality_novelty_and_reproducibility": "22;5;35", "wc_summary_review": "22;40;40", "wc_review": "231;303;375", "wc_reply_reviewers": "0;202;0", "wc_reply_authors": "93;1183;26", "reply_reviewers": "0;2;0", "reply_authors": "1;3;1", "recommendation_avg": [ 4.666666666666667, 3.8586123009300755 ], "confidence_avg": [ 3.0, 1.632993161855452 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 1.247219128924647 ], "empirical_novelty_avg": [ 0.3333333333333333, 0.4714045207910317 ], "wc_summary_paper_avg": [ 86.33333333333333, 50.24163833139025 ], "wc_strength_and_weaknesses_avg": [ 162.0, 53.23532661682466 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 20.666666666666668, 12.283683848458853 ], "wc_summary_review_avg": [ 34.0, 8.48528137423857 ], "wc_review_avg": [ 303.0, 58.787753826796276 ], "wc_reply_reviewers_avg": [ 67.33333333333333, 95.2237131997884 ], "wc_reply_authors_avg": [ 434.0, 530.3288288096987 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.9522165814091076, "corr_recommendation_correctness": 0.6719319439596787, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LjWD5jB_RfcJ:scholar.google.com/&scioq=Breaking+the+Curse+of+Dimensionality+for+Parametric+Elliptic+PDEs&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Simula Research Laboratory;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_unique_dep": ";", "aff_unique_url": "https://www.simula.no;https://www.uni-freiburg.de", "aff_unique_abbr": "Simula;Albert-Ludwigs-Universit\u00e4t", "aff_campus_unique_index": "1", "aff_campus_unique": ";Freiburg", "aff_country_unique_index": "0;1", "aff_country_unique": "Norway;Germany" }, { "title": "Meta Learning to Bridge Vision and Language Models for Multimodal Few-Shot Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11091", "id": "3oWo92cQyxL", "poster": "/media/PosterPDFs/ICLR%202023/11091.png?t=1682429501.9694076", "openreview": "https://openreview.net/forum?id=3oWo92cQyxL", "slides": "https://iclr.cc/virtual/2023/poster/11091", "video": "https://iclr.cc/virtual/2023/poster/11091", "author_site": "Ivona Najdenkoska, Xiantong Zhen, Marcel Worring", "tldr": "We introduce a novel multimodal few-shot meta-learner, by learning how to bridge large-scale frozen vision and language models.", "abstract": "Multimodal few-shot learning is challenging due to the large domain gap between vision and language modalities. Existing methods are trying to communicate visual concepts as prompts to frozen language models, but rely on hand-engineered task induction to reduce the hypothesis space. To make the whole process learnable, we introduce a multimodal meta-learning approach. Specifically, our approach decomposes the training of the model into a set of related multimodal few-shot tasks. We define a meta-mapper network, acting as a meta-learner, to efficiently bridge frozen large-scale vision and language models and leverage their already learned capacity. By updating the learnable parameters only of the meta-mapper, it learns to accrue shared meta-knowledge among these tasks. Thus, it can rapidly adapt to newly presented samples with only a few gradient updates. Importantly, it induces the task in a completely data-driven manner, with no need for a hand-engineered task induction. We evaluate our approach on recently proposed multimodal few-shot benchmarks, measuring how rapidly the model can bind novel visual concepts to words and answer visual questions by observing only a limited set of labeled examples. The experimental results show that our meta-learning approach outperforms the baseline across multiple datasets and various training settings while being computationally more efficient.", "keywords": "multimodal;few-shot learning;meta-learning;transformers;vision and language models", "primary_area": "", "supplementary_material": "/attachment/ef3f0dd3567a0e71d7a15ad6086061ca57fff4d6.zip", "author": "Ivona Najdenkoska;Xiantong Zhen;Marcel Worring", "authorids": "~Ivona_Najdenkoska1;~Xiantong_Zhen1;~Marcel_Worring1", "gender": "F;M;M", "homepage": "https://ivonajdenkoska.github.io/;;https://staff.fnwi.uva.nl/m.worring/", "dblp": "297/4696;78/10651;35/4613", "google_scholar": "2rFidrcAAAAJ;https://scholar.google.ca/citations?user=DnBb3e0AAAAJ;pdu8f3sAAAAJ", "orcid": "0000-0001-6852-0609;;", "linkedin": "ivona-najdenkoska/;;", "or_profile": "~Ivona_Najdenkoska1;~Xiantong_Zhen1;~Marcel_Worring2", "aff": "University of Amsterdam;United Imaging Healthcare, Co., Ltd.;University of Amsterdam", "aff_domain": "uva.nl;cri-united-imaging.com;uva.nl", "position": "PhD student;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nnajdenkoska2023meta,\ntitle={Meta Learning to Bridge Vision and Language Models for Multimodal Few-Shot Learning},\nauthor={Ivona Najdenkoska and Xiantong Zhen and Marcel Worring},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3oWo92cQyxL}\n}", "github": "", "project": "", "reviewers": "zhwM;idxn;VtRH;c2Z3", "pdf_size": 16602570, "recommendation": "3;6;6;8", "confidence": "4;4;4;5", "correctness": "3;1;3;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "3;3;2;4", "wc_summary_paper": "75;97;76;104", "wc_strength_and_weaknesses": "106;106;140;129", "wc_clarity_quality_novelty_and_reproducibility": "18;33;64;77", "wc_summary_review": "36;23;35;24", "wc_review": "235;259;315;334", "wc_reply_reviewers": "290;0;0;0", "wc_reply_authors": "1569;284;512;420", "reply_reviewers": "1;0;0;0", "reply_authors": "3;1;1;1", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 1.0897247358851685 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 88.0, 12.747548783981962 ], "wc_strength_and_weaknesses_avg": [ 120.25, 14.771171246722448 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.0, 23.569047498785352 ], "wc_summary_review_avg": [ 29.5, 6.020797289396148 ], "wc_review_avg": [ 285.75, 40.23291562887283 ], "wc_reply_reviewers_avg": [ 72.5, 125.5736835487436 ], "wc_reply_authors_avg": [ 696.25, 510.36867801619644 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.7276068751089989, "corr_recommendation_correctness": 0.22487239817113241, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4525157245767554035&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=3oWo92cQyxL", "email": "uva.nl;cri-united-imaging.com;uva.nl", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Amsterdam;United Imaging Healthcare", "aff_unique_dep": ";", "aff_unique_url": "https://www.uva.nl;https://www.united-imaging.com", "aff_unique_abbr": "UvA;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Netherlands;China" }, { "id": "3owqfawaLv", "title": "Shot Retrieval and Assembly with Text Script for Video Montage Generation", "track": "main", "status": "Withdraw", "tldr": "We propose a novel transformer-based model for video montage generation by retrieving and assembling shots with arbitrary text scripts.", "abstract": "With the development of video sharing websites, numerous users desire to create their own attractive video montages. However, it is difficult for inexperienced users to create a well-edited video montage due to the lack of professional expertise. In the meantime, it is time-consuming even for experts to create video montages of high quality, which requires effectively selecting shots from abundant candidates and assembling them together. Instead of manual creation, a number of automatic methods have been proposed for video montage generation. However, these methods typically take a single sentence as input for text-to-shot retrieval, and ignore the semantic cross-sentence coherence given complicated text script of multiple sentences. To overcome this drawback, we propose a novel model for video montage generation by retrieving and assembling shots with arbitrary text scripts. To this end, a sequence consistency transformer is devised for cross-sentence coherence modeling. More importantly, with this transformer, two novel sequence-level tasks are defined for sentence-shot alignment in sequence-level: Cross-Modal Sequence Matching (CMSM) task, and Chaotic Sequence Recovering (CSR) task. To facilitate the research on video montage generation, we construct a new, highly-varied dataset which collects thousands of video-script pairs in documentary. Extensive experiments on the constructed dataset demonstrate the superior performance of the proposed model. The dataset and generated video demos are available at https://github.com/RATVDemo/RATV", "keywords": "Video montage generation;text-to-shot retrieval;transformer;dataset construction", "primary_area": "", "supplementary_material": "/attachment/a8fa766177620eb06ad8f8568470062b00880a49.zip", "author": "Guoxing Yang;Haoyu Lu;Zelong Sun;Shiqi Zhao;Haoran Wu;Zhiwu Lu", "authorids": "~Guoxing_Yang3;~Haoyu_Lu1;~Zelong_Sun1;~Shiqi_Zhao3;~Haoran_Wu6;~Zhiwu_Lu1", "gender": ";M;M;M;M;M", "homepage": "https://haoyulu1998.github.io/;;;https://gsai.ruc.edu.cn/luzhiwu;https://github.com/GuoxingY;", "dblp": "240/2720;;;53/5234;271/9521;130/1330", "google_scholar": "https://scholar.google.com.hk/citations?view_op=list_works;;;OUXS8doAAAAJ;;https://scholar.google.com/citations?view_op=list_works", "orcid": ";0009-0006-2508-7108;;;;", "linkedin": "%E6%B5%A9%E5%AE%87-%E5%8D%A2-4b42b7198/;;https://www.linkedin.cn/incareer/in/%E6%B5%A9%E7%84%B6-%E5%90%B4-b807a0164;;;", "or_profile": "~Haoyu_Lu1;~Shiqi_Zhao3;~Haoran_Wu6;~Zhiwu_Lu1;~GuoXing_Yang2;~\u6cfd\u9f99_\u5b591", "aff": "Renmin University of China;China Unicom Research Institute;China Unicom Research Institute ;Renmin University of China;Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;chinaunicom.cn;chinaunicom.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn", "position": "PhD student;Researcher;Researcher;Full Professor;PhD student;PhD student", "bibtex": "@misc{\nyang2023shot,\ntitle={Shot Retrieval and Assembly with Text Script for Video Montage Generation},\nauthor={Guoxing Yang and Haoyu Lu and Zelong Sun and Shiqi Zhao and Haoran Wu and Zhiwu Lu},\nyear={2023},\nurl={https://openreview.net/forum?id=3owqfawaLv}\n}", "github": "", "project": "", "reviewers": "GsLm;kWC4;7B6W;s74o", "site": "https://openreview.net/forum?id=3owqfawaLv", "pdf_size": 14564556, "recommendation": "3;5;6;6", "confidence": "3;4;4;3", "correctness": "2;3;3;3", "technical_novelty": "2;2;4;3", "empirical_novelty": "2;2;4;4", "wc_summary_paper": "84;92;139;115", "wc_strength_and_weaknesses": "107;471;219;106", "wc_clarity_quality_novelty_and_reproducibility": "49;81;26;18", "wc_summary_review": "89;53;56;46", "wc_review": "329;697;440;285", "wc_reply_reviewers": "711;108;20;24", "wc_reply_authors": "1914;2123;263;254", "reply_reviewers": "2;1;1;1", "reply_authors": "5;5;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 3.0, 1.0 ], "wc_summary_paper_avg": [ 107.5, 21.453437952924933 ], "wc_strength_and_weaknesses_avg": [ 225.75, 148.85794402718318 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.5, 24.45914961727002 ], "wc_summary_review_avg": [ 61.0, 16.56804152578089 ], "wc_review_avg": [ 437.75, 159.98027222129608 ], "wc_reply_reviewers_avg": [ 215.75, 288.08364670699376 ], "wc_reply_authors_avg": [ 1138.5, 883.1026271051401 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 2.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.40824829046386296, "corr_recommendation_correctness": 0.9428090415820632, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3117289344701100327&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1;0;0;0", "aff_unique_norm": "Renmin University of China;China Unicom Research Institute", "aff_unique_dep": ";", "aff_unique_url": "http://www.ruc.edu.cn;https://www.chinaunicom.com.cn/en-US/ResearchInstitute", "aff_unique_abbr": "RUC;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Random Laplacian Features for Learning with Hyperbolic Space", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10880", "id": "3pfNb4pZBNp", "poster": "", "openreview": "https://openreview.net/forum?id=3pfNb4pZBNp", "slides": "https://iclr.cc/virtual/2023/poster/10880", "video": "https://iclr.cc/virtual/2023/poster/10880", "author_site": "Tao Yu, Christopher De Sa", "tldr": "", "abstract": "Due to its geometric properties, hyperbolic space can support high-fidelity embeddings of tree- and graph-structured data, upon which various hyperbolic networks have been developed. Existing hyperbolic networks encode geometric priors not only for the input, but also at every layer of the network. This approach involves repeatedly mapping to and from hyperbolic space, which makes these networks complicated to implement, computationally expensive to scale, and numerically unstable to train. In this paper, we propose a simpler approach: learn a hyperbolic embedding of the input, then map once from it to Euclidean space using a mapping that encodes geometric priors by respecting the isometries of hyperbolic space, and finish with a standard Euclidean network. The key insight is to use a random feature mapping via the eigenfunctions of the Laplace operator, which we show can approximate any isometry-invariant kernel on hyperbolic space. Our method can be used together with any graph neural networks: using even a linear graph model yields significant improvements in both efficiency and performance over other hyperbolic baselines in both transductive and inductive tasks. ", "keywords": "hyperbolic space;random features;kernel approximation", "primary_area": "", "supplementary_material": "/attachment/b022052047d56a0a5cb8fdf7d41d8a9606c4ccb5.zip", "author": "Tao Yu;Christopher De Sa", "authorids": "~Tao_Yu1;~Christopher_De_Sa2", "gender": "M;M", "homepage": "https://ydtydr.github.io/;http://cs.cornell.edu/~cdesa", "dblp": ";154/6336", "google_scholar": "lbi95bUAAAAJ;", "orcid": ";", "linkedin": "tao-yu-220720182/;", "or_profile": "~Tao_Yu1;~Christopher_De_Sa1", "aff": "Cornell University;Cornell University", "aff_domain": "cornell.edu;cornell.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyu2023random,\ntitle={Random Laplacian Features for Learning with Hyperbolic Space},\nauthor={Tao Yu and Christopher De Sa},\nbooktitle={International Conference on Learning Representations},\nyear={2023},\nurl={https://openreview.net/forum?id=3pfNb4pZBNp}\n}", "github": "", "project": "", "reviewers": "iAHJ;BVuW;Ue38", "pdf_size": 2349272, "recommendation": "5;6;8", "confidence": "5;3;3", "correctness": "3;3;3", "technical_novelty": "2;3;4", "empirical_novelty": "2;3;2", "wc_summary_paper": "43;72;134", "wc_strength_and_weaknesses": "36;288;235", "wc_clarity_quality_novelty_and_reproducibility": "8;54;224", "wc_summary_review": "55;52;39", "wc_review": "142;466;632", "wc_reply_reviewers": "835;155;23", "wc_reply_authors": "2317;462;504", "reply_reviewers": "2;1;1", "reply_authors": "4;1;1", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 83.0, 37.95611501018863 ], "wc_strength_and_weaknesses_avg": [ 186.33333333333334, 108.48143722417315 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 95.33333333333333, 92.89898934984289 ], "wc_summary_review_avg": [ 48.666666666666664, 6.944222218666553 ], "wc_review_avg": [ 413.3333333333333, 203.4786366072752 ], "wc_reply_reviewers_avg": [ 337.6666666666667, 355.7727114636847 ], "wc_reply_authors_avg": [ 1094.3333333333333, 864.7259039076422 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.7559289460184546, "corr_recommendation_correctness": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9706466824230275049&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=3pfNb4pZBNp", "email": "cornell.edu;cornell.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "3qvEPE6q4L", "title": "FedMAE: Federated Self-Supervised Learning with One-Block Masked Auto-Encoder", "track": "main", "status": "Withdraw", "tldr": "A novel federated self-supervised learning framework with a cascade design", "abstract": "Latest federated learning (FL) methods started to focus on how to use unlabeled data in clients for training due to users' privacy concerns, high labeling costs, or lack of expertise. However, current Federated Semi-Supervised/Self-Supervised Learning (FSSL) approaches fail to learn large-scale images because of the limited computing resources of local clients. In this paper, we introduce a new framework FedMAE, which stands for Federated Masked AutoEncoder, to address the problem of how to utilize unlabeled large-scale images for FL. Specifically, FedMAE can pre-train one-block Masked AutoEncoder (MAE) using large images in lightweight client devices, and then cascades multiple pre-trained one-block MAEs in the server to build a multi-block ViT backbone for downstream tasks. Theoretical analysis and experimental results on image reconstruction and classification show that our FedMAE achieves superior performance compared to the state-of-the-art FSSL methods.", "keywords": "Federated Learning;Self-Supervised Learning;Masked AutoEncoder", "primary_area": "", "supplementary_material": "/attachment/024675a03b5fcc09c7677312ca302c7fe9c5b5d5.zip", "author": "NAN YANG;Xuanyu Chen;Charles Liu;Dong Yuan;Wei Bao;Lizhen Cui", "authorids": "~NAN_YANG6;xuanyu.chen@sydney.edu.au;zhenzhong.liu@sydney.edu.au;~Dong_Yuan1;~Wei_Bao1;~Lizhen_Cui1", "gender": "F;;;M;;M", "homepage": ";;;https://www.sydney.edu.au/engineering/about/our-people/academic-staff/dong-yuan.html;https://www.sydney.edu.au/engineering/about/our-people/academic-staff/wei-bao.html;https://faculty.sdu.edu.cn/cuilizhen/zh_CN/index.htm", "dblp": ";;;;;", "google_scholar": "01zmhNAAAAAJ;;;https://scholar.google.com.au/citations?user=UU0veX4AAAAJ;;", "orcid": ";;;0000-0003-1130-0888;;", "linkedin": "nan-yang-732baa157/;;;;;", "or_profile": "~NAN_YANG6;xuanyu.chen@sydney.edu.au;zhenzhong.liu@sydney.edu.au;~Dong_Yuan1;~Wei_Bao1;~Lizhen_Cui1", "aff": "University of Sydney;;;;University of Sydney;Shandong University", "aff_domain": "usyd.edu.au;;;;sydney.edu.au;sdu.edu.cn", "position": "Researcher;;;;Lecturer;Full Professor", "bibtex": "@misc{\nyang2023fedmae,\ntitle={Fed{MAE}: Federated Self-Supervised Learning with One-Block Masked Auto-Encoder},\nauthor={NAN YANG and Xuanyu Chen and Charles Liu and Dong Yuan and Wei Bao and Lizhen Cui},\nyear={2023},\nurl={https://openreview.net/forum?id=3qvEPE6q4L}\n}", "github": "", "project": "", "reviewers": "GfZh;cGok;Nqyb;Sw1t", "site": "https://openreview.net/forum?id=3qvEPE6q4L", "pdf_size": 2063700, "recommendation": "3;3;3;6", "confidence": "4;4;4;4", "correctness": "3;2;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "96;151;41;46", "wc_strength_and_weaknesses": "296;294;195;48", "wc_clarity_quality_novelty_and_reproducibility": "43;643;17;17", "wc_summary_review": "60;84;222;109", "wc_review": "495;1172;475;220", "wc_reply_reviewers": "0;35;0;0", "wc_reply_authors": "169;378;123;198", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 83.5, 44.51123453691214 ], "wc_strength_and_weaknesses_avg": [ 208.25, 101.1295579936944 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 180.0, 267.52383071420013 ], "wc_summary_review_avg": [ 118.75, 62.07807584002584 ], "wc_review_avg": [ 590.5, 352.8005810652811 ], "wc_reply_reviewers_avg": [ 8.75, 15.155444566227676 ], "wc_reply_authors_avg": [ 217.0, 96.72383367092105 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1805284000599411106&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Sydney;Shandong University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sydney.edu.au;http://www.sdu.edu.cn", "aff_unique_abbr": "USYD;SDU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Australia;China" }, { "id": "3rGLfR0dqp", "title": "Predicting Out-of-Domain Generalization with Local Manifold Smoothness", "track": "main", "status": "Withdraw", "tldr": "Local manifold smoothness is a novel complexity measure that can be used to predict generalization even on out-of-domain test sets without labels.", "abstract": "Understanding how machine learning models generalize to new environments is a critical part of their safe deployment. Recent work has proposed a variety of complexity measures that directly predict or theoretically bound the generalization capacity of a model. However, these methods rely on a strong set of assumptions that in practice are not always satisfied. Motivated by the limited settings in which existing measures can be applied, we propose a novel complexity measure based on the local manifold smoothness of a classifier. We define local manifold smoothness as a classifier's output sensitivity to perturbations in the manifold neighborhood around a given test point. Intuitively, a classifier that is less sensitive to these perturbations should generalize better. To estimate smoothness we sample points using data augmentation and measure the fraction of these points classified into the majority class. Our method only requires selecting a data augmentation method and makes no other assumptions about the model or data distributions, meaning it can be applied even in out-of-domain (OOD) settings where existing methods cannot. In experiments on robustness benchmarks in image classification, sentiment analysis, and natural language inference, we demonstrate a strong and robust correlation between our manifold smoothness measure and actual OOD generalization on over 4,000 models evaluated on over 100 train/test domain pairs.", "keywords": "complexity measure;out of domain generalization;smoothness", "primary_area": "", "supplementary_material": "", "author": "Nathan Hoyen Ng;Neha Hulkund;Kyunghyun Cho;Marzyeh Ghassemi", "authorids": "~Nathan_Hoyen_Ng1;~Neha_Hulkund1;~Kyunghyun_Cho1;~Marzyeh_Ghassemi2", "gender": "M;F;M;F", "homepage": ";https://hulkund.github.io;http://kyunghyuncho.me;https://www.healthyml.org/", "dblp": "195/5521;297/5263;41/9736;145/6563", "google_scholar": "psuwztYAAAAJ;;https://scholar.google.fi/citations?user=0RAmmIAAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Nathan_Hoyen_Ng1;~Neha_Hulkund1;~Kyunghyun_Cho1;~Marzyeh_Ghassemi2", "aff": "University of Toronto;Massachusetts Institute of Technology;New York University;Massachusetts Institute of Technology", "aff_domain": "utoronto.ca;mit.edu;nyu.edu;mit.edu", "position": "PhD student;MS student;Associate Professor;Assistant Professor", "bibtex": "@misc{\nng2023predicting,\ntitle={Predicting Out-of-Domain Generalization with Local Manifold Smoothness},\nauthor={Nathan Hoyen Ng and Neha Hulkund and Kyunghyun Cho and Marzyeh Ghassemi},\nyear={2023},\nurl={https://openreview.net/forum?id=3rGLfR0dqp}\n}", "github": "", "project": "", "reviewers": "NHLb;4Tiq;sbeG;uuY2", "site": "https://openreview.net/forum?id=3rGLfR0dqp", "pdf_size": 628781, "recommendation": "3;3;3;8", "confidence": "3;4;4;3", "correctness": "3;4;2;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "1;3;3;3", "wc_summary_paper": "64;60;46;66", "wc_strength_and_weaknesses": "172;255;313;381", "wc_clarity_quality_novelty_and_reproducibility": "59;24;6;27", "wc_summary_review": "42;51;24;52", "wc_review": "337;390;389;526", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 2.165063509461097 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 59.0, 7.810249675906654 ], "wc_strength_and_weaknesses_avg": [ 280.25, 76.77686826121524 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.0, 19.091883092036785 ], "wc_summary_review_avg": [ 42.25, 11.233320969330485 ], "wc_review_avg": [ 410.5, 70.0446286306095 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5222329678670935, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9544885060205111599&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University of Toronto;Massachusetts Institute of Technology;New York University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utoronto.ca;https://web.mit.edu;https://www.nyu.edu", "aff_unique_abbr": "U of T;MIT;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Canada;United States" }, { "id": "3tYvDb4dwab", "title": "Understanding Self-Supervised Pretraining with Part-Aware Representation Learning", "track": "main", "status": "Reject", "tldr": "We study the capability of learning part-aware representations of self-supervised pretraining methods, including contrastive learning and masked image modeling.", "abstract": "In this paper, we are interested in understanding self-supervised pretraining through studying the capability that self-supervised representation pretraining methods learn part-aware representations. The study is mainly motivated by that random views, used in contrastive learning, and random masked (visible) patches, used in masked image modeling, are often about object parts.\n\nWe explain that masked image modeling is a part-to-part task: the masked patches of the object are hallucinated from the visible patches, and that contrastive learning is a part-to-whole task: the projection layer hallucinates the whole object representation from the object part representation learned from the encoder. The explanation suggests that the self-supervised pretrained encoder is required to understand the object part. We empirically compare the off-the-shelf encoders pretrained with several representative methods on object-level recognition and part-level recognition. The results show that the fully-supervised model outperforms self-supervised models for object-level recognition, and most self-supervised contrastive learning and masked image modeling methods outperform the fully-supervised method for part-level recognition. It is observed that the combination of contrastive learning and masked image modeling further improves the performance.", "keywords": "Part-aware representation;Self-supervised learning;Masked image modeling;Contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Jiyang Qi;Jie Zhu;Mingyu Ding;Xiaokang Chen;Ping Luo;Leye Wang;Xinggang Wang;Wenyu Liu;Jingdong Wang", "authorids": "~Jiyang_Qi1;~Jie_Zhu3;~Mingyu_Ding1;~Xiaokang_Chen1;~Ping_Luo2;~Leye_Wang1;~Xinggang_Wang1;~Wenyu_Liu3;~Jingdong_Wang1", "gender": "M;M;M;M;M;M;M;M;", "homepage": ";https://dingmyu.github.io/;https://charlescxk.github.io/;https://wangleye.github.io/;https://xwcv.github.io/index.htm;http://eic.hust.edu.cn/professor/liuwenyu/;https://jingdongwang2017.github.io/;https://scholar.google.com/citations?hl=zh-CN&user=ZL506kEAAAAJ;http://luoping.me/", "dblp": ";188/5243;163/6632;07/8764;95/3056;42/4110-1.html;49/3441;;54/4989-2.html", "google_scholar": "v8pL6_gAAAAJ;w4yTWwoAAAAJ;https://scholar.google.com.hk/citations?view_op=list_works;;qNCTLV0AAAAJ;D7jDk7gAAAAJ;z5SPCmgAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?hl=en", "orcid": ";0000-0001-6556-8359;;;0000-0001-6732-7823;0000-0002-4582-7488;0000-0002-4888-4445;;0000-0002-6685-7950", "linkedin": ";dingmyu/;;;;;;;", "or_profile": "~Jiyang_Qi1;~Mingyu_Ding1;~Xiaokang_Chen1;~Leye_Wang1;~Xinggang_Wang1;~Wenyu_Liu3;~Jingdong_Wang1;~zhu_Jie1;~Luo_Ping2", "aff": "Huazhong University of Science and Technology;University of California, Berkeley;Peking University;Peking University;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Baidu;Peking University;The University of Hong Kong", "aff_domain": "hust.edu.cn;berkeley.edu;pku.edu.cn;pku.edu.cn;hust.edu.cn;hust.edu.cn;baidu.com;pku.edu.cn;hku.hk", "position": "MS student;Postdoc;PhD student;Assistant Professor;Full Professor;Full Professor;Chief Scientist for Computer Vision;PhD student;Assistant Professor", "bibtex": "@misc{\nqi2023understanding,\ntitle={Understanding Self-Supervised Pretraining with Part-Aware Representation Learning},\nauthor={Jiyang Qi and Jie Zhu and Mingyu Ding and Xiaokang Chen and Ping Luo and Leye Wang and Xinggang Wang and Wenyu Liu and Jingdong Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=3tYvDb4dwab}\n}", "github": "", "project": "", "reviewers": "nE5Q;AQH3;SGtU", "site": "https://openreview.net/forum?id=3tYvDb4dwab", "pdf_size": 7287188, "recommendation": "5;6;6", "confidence": "3;3;3", "correctness": "3;3;2", "technical_novelty": "2;3;1", "empirical_novelty": "2;3;2", "wc_summary_paper": "47;39;93", "wc_strength_and_weaknesses": "127;183;570", "wc_clarity_quality_novelty_and_reproducibility": "3;81;51", "wc_summary_review": "49;89;135", "wc_review": "226;392;849", "wc_reply_reviewers": "229;34;461", "wc_reply_authors": "1971;1357;3084", "reply_reviewers": "1;1;2", "reply_authors": "4;2;5", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 59.666666666666664, 23.79542439676633 ], "wc_strength_and_weaknesses_avg": [ 293.3333333333333, 196.96418174096752 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.0, 32.12475680841802 ], "wc_summary_review_avg": [ 91.0, 35.13782387494517 ], "wc_review_avg": [ 489.0, 263.4248786023573 ], "wc_reply_reviewers_avg": [ 241.33333333333334, 174.540030429189 ], "wc_reply_authors_avg": [ 2137.3333333333335, 714.787769590076 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 1.247219128924647 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.4999999999999999, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16281433324161522121&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;2;0;0;3;2;4", "aff_unique_norm": "Huazhong University of Science and Technology;University of California, Berkeley;Peking University;Baidu;University of Hong Kong", "aff_unique_dep": ";;;Baidu, Inc.;", "aff_unique_url": "http://www.hust.edu.cn;https://www.berkeley.edu;http://www.pku.edu.cn;https://www.baidu.com;https://www.hku.hk", "aff_unique_abbr": "HUST;UC Berkeley;Peking U;Baidu;HKU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Berkeley;Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "id": "3uDXZZLBAwd", "title": "Deep Reinforcement Learning based Insight Selection Policy", "track": "main", "status": "Reject", "tldr": "", "abstract": "We live in the era of ubiquitous sensing and computing. More and more data is being collected and processed from devices, sensors and systems. This opens up opportunities to discover patterns from these data that could help in gaining better understanding into the source that produces them. This is useful in a wide range of domains, especially in the area of personal health, in which such knowledge could help in allowing users to comprehend their behaviour and indirectly improve their lifestyle. Insight generators are systems that identify such patterns and verbalise them in a readable text format, referred to as insights. The selection of insights is done using a scoring algorithm which aims at optimizing this process based on multiple objectives, e.g., factual correctness, usefulness and interestingness of insights. In this paper, we propose a novel Reinforcement Learning (RL) framework for insight selection where the scoring model is trained by user feedback on interestingness and their lifestyle quality estimates. With the use of highly reusable and simple principles of automatic user simulation based on real data, we demonstrate in this preliminary study that the RL solution may improve the selection of insights towards multiple pre-defined objectives.", "keywords": "recommender;insight;reinforcement learning;behavior change support system;health coaching;lifestyle simulator;Gaussian mixture modeling", "primary_area": "", "supplementary_material": "", "author": "Libio Goncalves Braz;Allmin Pradhap Singh Susaiyah;Milan Petkovic;Aki H\u00e4rm\u00e4", "authorids": "~Libio_Goncalves_Braz1;~Allmin_Pradhap_Singh_Susaiyah1;m.petkovic@tue.nl;aki.harma@philips.com", "gender": "M;Not Specified;;", "homepage": ";https://allmins.wordpress.com/;;", "dblp": ";;;", "google_scholar": "KGMVtaIAAAAJ;https://scholar.google.co.uk/citations?user=_qSEHSEAAAAJ;;", "orcid": ";;;", "linkedin": "libio-gon%C3%A7alves-braz-23bb81169/;;;", "or_profile": "~Libio_Goncalves_Braz1;~Allmin_Pradhap_Singh_Susaiyah1;m.petkovic@tue.nl;aki.harma@philips.com", "aff": "Utrecht University;Eindhoven University of Technology;;", "aff_domain": "uu.nl;tue.nl;;", "position": " Scientific Programmer;PhD student;;", "bibtex": "@misc{\nbraz2023deep,\ntitle={Deep Reinforcement Learning based Insight Selection Policy},\nauthor={Libio Goncalves Braz and Allmin Pradhap Singh Susaiyah and Milan Petkovic and Aki H{\\\"a}rm{\\\"a}},\nyear={2023},\nurl={https://openreview.net/forum?id=3uDXZZLBAwd}\n}", "github": "", "project": "", "reviewers": "XoQA;a8Lx;p6Dn", "site": "https://openreview.net/forum?id=3uDXZZLBAwd", "pdf_size": 1525537, "recommendation": "3;5;5", "confidence": "4;4;4", "correctness": "2;3;3", "technical_novelty": "1;2;3", "empirical_novelty": "1;3;2", "wc_summary_paper": "157;112;68", "wc_strength_and_weaknesses": "159;184;234", "wc_clarity_quality_novelty_and_reproducibility": "16;113;20", "wc_summary_review": "70;100;45", "wc_review": "402;509;367", "wc_reply_reviewers": "0;106;0", "wc_reply_authors": "410;879;481", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 112.33333333333333, 36.33486235314815 ], "wc_strength_and_weaknesses_avg": [ 192.33333333333334, 31.18047822311618 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.666666666666664, 44.81319250201019 ], "wc_summary_review_avg": [ 71.66666666666667, 22.484562605386735 ], "wc_review_avg": [ 426.0, 60.404194114868105 ], "wc_reply_reviewers_avg": [ 35.333333333333336, 49.968879203849355 ], "wc_reply_authors_avg": [ 590.0, 206.3992894044615 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3352082380828637680&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Utrecht University;Eindhoven University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.uu.nl;https://www.tue.nl", "aff_unique_abbr": "UU;TU/e", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "id": "3urtgEaXCA9", "title": "A Weight Variation-Aware Training Method for Hardware Neuromorphic Chips", "track": "main", "status": "Reject", "tldr": "", "abstract": "Hardware neuromorphic chips that mimic the biological nervous systems have recently attracted significant attention due to their ultra-low power and parallel computation. However, the inherent variability of nano-scale synaptic devices causes a weight perturbation and performance drop of neural networks. This paper proposes a training method to find weight with robustness to intrinsic device variability. A stochastic weight characteristic incurred by device inherent variability is considered during training. We investigate the impact of weight variation on both Spiking Neural Network (SNN) and standard Artificial Neural Network (ANN) with different architectures including fully connected, convolutional neural network (CNN), VGG, and ResNet on MNIST, CIFAR-10, and CIFAR-100. Experimental results show that a weight variation-aware training method (WVAT) can dramatically minimize the performance drop on weight variability by exploring a flat loss landscape. When there are weight perturbations, WVAT yields 85.21% accuracy of VGG-5 on CIFAR-10, reducing accuracy degradation by more than 1/10 compared with SGD. Finally, WVAT is easy to implement on various architectures with little computational overhead.", "keywords": "edge computing systems;neuro-inspired computing;hardware implementation;synaptic device;hardware-oriented neural network", "primary_area": "", "supplementary_material": "", "author": "Min-Hye Oh", "authorids": "~Min-Hye_Oh1", "gender": "F", "homepage": "", "dblp": "", "google_scholar": "https://scholar.google.co.kr/citations?user=Mj90gSEAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Min-Hye_Oh1", "aff": "", "aff_domain": "", "position": "", "bibtex": "@misc{\noh2023a,\ntitle={A Weight Variation-Aware Training Method for Hardware Neuromorphic Chips},\nauthor={Min-Hye Oh},\nyear={2023},\nurl={https://openreview.net/forum?id=3urtgEaXCA9}\n}", "github": "", "project": "", "reviewers": "giaL;XxLX;oSwx;RFsq", "site": "https://openreview.net/forum?id=3urtgEaXCA9", "pdf_size": 1770919, "recommendation": "3;5;5;6", "confidence": "3;3;3;3", "correctness": "3;3;3;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "66;93;11;79", "wc_strength_and_weaknesses": "35;428;117;176", "wc_clarity_quality_novelty_and_reproducibility": "68;46;9;195", "wc_summary_review": "211;79;41;88", "wc_review": "380;646;178;538", "wc_reply_reviewers": "0;0;0;44", "wc_reply_authors": "140;918;304;294", "reply_reviewers": "0;0;0;1", "reply_authors": "1;2;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 62.25, 31.09159854365806 ], "wc_strength_and_weaknesses_avg": [ 189.0, 146.7906672782708 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 79.5, 69.93747207327414 ], "wc_summary_review_avg": [ 104.75, 63.82936236560726 ], "wc_review_avg": [ 435.5, 176.21222999553692 ], "wc_reply_reviewers_avg": [ 11.0, 19.05255888325765 ], "wc_reply_authors_avg": [ 414.0, 298.15767640629343 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.6622661785325219, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JGmLy6Dgf0cJ:scholar.google.com/&scioq=A+Weight+Variation-Aware+Training+Method+for+Hardware+Neuromorphic+Chips&hl=en&as_sdt=0,5", "gs_version_total": 2 }, { "id": "3v2DIO9oVl", "title": "Generalization error bounds for Neural Networks with ReLU activation", "track": "main", "status": "Reject", "tldr": "We show that generalization error of Neural Netowrks with ReLU activations approaches zero with proabbility 1 as we increase the training points", "abstract": "We show rigorous bounds on the generalization error for Neural Networks with ReLU activation under the condition that the network size doesn't grow with the training set size. In order to prove these bounds we weaken the notion of uniform stability of a learning algorithm in a probabilistic way by positing the notion of almost sure (a.s.) support stability and proving that if an algorithm has low enough a.s. support stability its generalization error tends to 0 as the training set size increases. Further we show that for Stochastic Gradient Descent to be almost surely support stable we only need the loss function to be locally Lipschitz and locally smooth with probability 1, thereby showing low generalization error with weaker conditions than have been used in the literature. We then show that Neural Networks with ReLU activation and a doubly differentiable loss function possess these properties, thereby proving low generalization error. The caveat is that the size of NN must not grow with the size of the training set. Finally we present experimental evidence to validate our theoretical results.", "keywords": "relu stability;sgd stability;non smooth neural network stability", "primary_area": "", "supplementary_material": "", "author": "Harsh Pandey;Amitabha Bagchi;Srikanta J. Bedathur;Arindam Bhattacharya", "authorids": "~Harsh_Pandey1;~Amitabha_Bagchi3;~Srikanta_J._Bedathur1;~Arindam_Bhattacharya1", "gender": "M;M;;", "homepage": ";http://www.cse.iitd.ac.in/~bagchi;https://www.cse.iitd.ac.in/~srikanta/;", "dblp": "135/8401;77/5034;b/SrikantaJBedathur;", "google_scholar": "https://scholar.google.com/citations?hl=en;;ngfF2oAAAAAJ;", "orcid": ";;0000-0002-3949-2175;", "linkedin": "harshpan/;;;", "or_profile": "~Harsh_Pandey1;~Amitabha_Bagchi3;~Srikanta_J._Bedathur1;~Arindam_Bhattacharya1", "aff": "Indian Institute of Technology Delhi;Indian Institute of Technology, Delhi;Indian Institute of Technology Delhi;", "aff_domain": "iitd.ac.in;iitd.ac.in;iitd.ac.in;", "position": "MS student;Full Professor;Associate Professor;", "bibtex": "@misc{\npandey2023generalization,\ntitle={Generalization error bounds for Neural Networks with Re{LU} activation},\nauthor={Harsh Pandey and Amitabha Bagchi and Srikanta J. Bedathur and Arindam Bhattacharya},\nyear={2023},\nurl={https://openreview.net/forum?id=3v2DIO9oVl}\n}", "github": "", "project": "", "reviewers": "omdi;gUyv;UXnu;8Tfr", "site": "https://openreview.net/forum?id=3v2DIO9oVl", "pdf_size": 367906, "recommendation": "5;5;5;6", "confidence": "4;4;4;3", "correctness": "4;4;4;4", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;0;2;2", "wc_summary_paper": "104;65;88;144", "wc_strength_and_weaknesses": "122;233;327;228", "wc_clarity_quality_novelty_and_reproducibility": "344;20;44;12", "wc_summary_review": "57;31;39;69", "wc_review": "627;349;498;453", "wc_reply_reviewers": "0;0;0;27", "wc_reply_authors": "572;435;594;400", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 100.25, 28.81297450802329 ], "wc_strength_and_weaknesses_avg": [ 227.5, 72.56204241888454 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 105.0, 138.4882666510055 ], "wc_summary_review_avg": [ 49.0, 14.89966442575134 ], "wc_review_avg": [ 481.75, 99.76315702703077 ], "wc_reply_reviewers_avg": [ 6.75, 11.691342951089922 ], "wc_reply_authors_avg": [ 500.25, 84.03087230298159 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:I606n04R1V8J:scholar.google.com/&scioq=Generalization+error+bounds+for+Neural+Networks+with+ReLU+activation&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Indian Institute of Technology Delhi", "aff_unique_dep": "", "aff_unique_url": "https://www.iitd.ac.in", "aff_unique_abbr": "IIT Delhi", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Delhi", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India" }, { "id": "3vOtC1t1kF", "title": "Efficient Personalized Federated Learning via Sparse Model-Adaptation", "track": "main", "status": "Reject", "tldr": "We propose an efficient personalized FL method with theoretical analysis, which adaptively learns sparse local models, and achieves SOTA accuracy and efficiency simultaneously.", "abstract": "Federated Learning (FL) aims to train machine learning models for multiple clients without sharing their own private data. Due to the heterogeneity of clients' local data distribution, recent studies explore the personalized FL that learns and deploys distinct local models with the help of auxiliary global models. However, the clients can be heterogeneous in terms of not only local data distribution, but also their computation and communication resources. The capacity and efficiency of personalized models are restricted by the lowest-resource clients, leading to sub-optimal performance and limited practicality of personalized FL. To overcome these challenges, we propose a novel approach named pFedGate for efficient personalized FL by adaptively and efficiently learning sparse local models. With a lightweight trainable gating layer, pFedGate enables clients to reach their full potential in model capacity by generating different sparse models accounting for both the heterogeneous data distributions and resource constraints. Meanwhile, the computation and communication efficiency are both improved thanks to the adaptability between the model sparsity and clients' resources. Further, we theoretically show that the proposed pFedGate has superior complexity with guaranteed convergence and generalization error. Extensive experiments show that pFedGate achieves superior global accuracy, individual accuracy and efficiency simultaneously over state-of-the-art methods, by up to 4.53\\% accuracy improvement and 12x smaller model size. We also demonstrate that pFedGate performs better than competitors in the novel clients participation and partial clients participation scenarios, and can learn meaningful sparse local models adapted to different data distributions.", "keywords": "Efficient Federated Learning;Personalization;Sparse Model-Adaptation", "primary_area": "", "supplementary_material": "", "author": "Daoyuan Chen;Liuyi Yao;Dawei Gao;Bolin Ding;Yaliang Li", "authorids": "~Daoyuan_Chen1;~Liuyi_Yao1;~Dawei_Gao1;~Bolin_Ding3;~Yaliang_Li1", "gender": "M;F;M;M;M", "homepage": "https://yxdyc.github.io/;;https://davdgao.github.io/;https://bolinding.github.io/;https://sites.google.com/site/yaliangli/", "dblp": "217/4891;219/1767;;46/3522.html;https://dblp.org/pers/hd/l/Li:Yaliang", "google_scholar": "https://scholar.google.com.hk/citations?user=1GdfinUAAAAJ;0c5is-gAAAAJ;NNEeYaUAAAAJ;AjYkTi8AAAAJ;CCPBcdYAAAAJ", "orcid": "0000-0002-8015-2121;;0009-0007-3882-5189;;0000-0002-4204-6096", "linkedin": ";;;bolin-ding-50a0119/;", "or_profile": "~Daoyuan_Chen1;~Liuyi_Yao1;~Dawei_Gao1;~Bolin_Ding3;~Yaliang_Li1", "aff": "Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group", "aff_domain": "alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com", "position": "Staff;Staff Engineer;Researcher;Senior Director;Staff Engineer", "bibtex": "@misc{\nchen2023efficient,\ntitle={Efficient Personalized Federated Learning via Sparse Model-Adaptation},\nauthor={Daoyuan Chen and Liuyi Yao and Dawei Gao and Bolin Ding and Yaliang Li},\nyear={2023},\nurl={https://openreview.net/forum?id=3vOtC1t1kF}\n}", "github": "", "project": "", "reviewers": "PyZd;sfrQ;wTvc;gH8r;eJXn", "site": "https://openreview.net/forum?id=3vOtC1t1kF", "pdf_size": 3128174, "recommendation": "3;5;6;6;6", "confidence": "4;4;4;5;3", "correctness": "2;3;3;1;3", "technical_novelty": "2;3;3;2;2", "empirical_novelty": "2;4;3;0;3", "wc_summary_paper": "87;87;205;61;123", "wc_strength_and_weaknesses": "248;272;177;201;410", "wc_clarity_quality_novelty_and_reproducibility": "26;31;513;22;19", "wc_summary_review": "48;30;93;63;154", "wc_review": "409;420;988;347;706", "wc_reply_reviewers": "131;0;0;7;141", "wc_reply_authors": "1380;1248;1243;555;2914", "reply_reviewers": "1;0;0;1;2", "reply_authors": "3;3;2;1;6", "recommendation_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "correctness_avg": [ 2.4, 0.8 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.4, 1.3564659966250536 ], "wc_summary_paper_avg": [ 112.6, 50.23783434822803 ], "wc_strength_and_weaknesses_avg": [ 261.6, 81.41891672087021 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 122.2, 195.44144903269623 ], "wc_summary_review_avg": [ 77.6, 43.42625933694958 ], "wc_review_avg": [ 574.0, 241.36694056974744 ], "wc_reply_reviewers_avg": [ 55.8, 65.60914570393368 ], "wc_reply_authors_avg": [ 1468.0, 778.6210888487417 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 3.0, 1.6733200530681511 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.12862393885688164, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9716153031719410340&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Alibaba Group", "aff_unique_dep": "", "aff_unique_url": "https://www.alibaba.com", "aff_unique_abbr": "Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "3vzguDiEOr", "title": "Membership Leakage in Pre-trained Language Models", "track": "main", "status": "Reject", "tldr": "This paper evaluates membership leakage of pre-trained language models", "abstract": "Pre-trained language models are becoming a dominating component in NLP domain and have achieved state-of-the-art in various downstream tasks. Recent research has shown that language models are vulnerable to privacy leakage of their training data, such as text extraction and membership leakage. However, existing works against NLP applications mainly focus on the privacy leakage of text generation and downstream classification, and the privacy leakage of pre-trained language models is largely unexplored. In this paper, we take the first step toward systematically auditing the privacy risks of pre-trained language models through the lens of membership leakage. In particular, we focus on membership leakage of pre-training data in the exposure of downstream models adapted from pre-trained language models. We conduct extensive experiments on a variety of pre-trained model architectures and different types of downstream tasks. Our empirical evaluations demonstrate that membership leakage of pre-trained language models exists even when only the downstream model output is exposed, thereby posing a more severe risk than previously thought. We further conduct sophisticated ablation studies to analyze the relationship between membership leakage of pre-trained models and the characteristic of downstream tasks, which can guide developers or researchers to be vigilant about the vulnerability of pre-trained language models. Lastly, we explore possible defenses against membership leakage of PLMs and propose two promising defenses based on empirical evaluations.", "keywords": "membership leakage;pre-trained language models;natural language processing", "primary_area": "", "supplementary_material": "", "author": "Yuan Xin;Zheng Li;Ning Yu;Michael Backes;Yang Zhang", "authorids": "~Yuan_Xin2;~Zheng_Li17;~Ning_Yu2;~Michael_Backes1;~Yang_Zhang15", "gender": "F;M;;;M", "homepage": "https://applexy.github.io;https://zhenglisec.github.io/;;;https://yangzhangalmo.github.io/", "dblp": ";10/1143-23;;;06/6785-16", "google_scholar": ";xEAaaGsAAAAJ;;;Xeb2888AAAAJ", "orcid": ";0000-0002-4466-7523;;;0000-0003-3612-7348", "linkedin": ";;;;", "or_profile": "~Yuan_Xin2;~Zheng_Li17;~Ning_Yu2;~Michael_Backes1;~Yang_Zhang15", "aff": "CISPA, saarland university, saarland informatics campus;CISPA Helmholtz Center for Information Security;;;CISPA Helmholtz Center for Information Security", "aff_domain": "cispa.saarland;cispa.de;;;cispa.de", "position": "PhD student;PhD student;;;Assistant Professor", "bibtex": "@misc{\nxin2023membership,\ntitle={Membership Leakage in Pre-trained Language Models},\nauthor={Yuan Xin and Zheng Li and Ning Yu and Michael Backes and Yang Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=3vzguDiEOr}\n}", "github": "", "project": "", "reviewers": "EhJ8;h5pk;qzMP", "site": "https://openreview.net/forum?id=3vzguDiEOr", "pdf_size": 485849, "recommendation": "1;3;5", "confidence": "5;3;4", "correctness": "1;1;2", "technical_novelty": "1;2;2", "empirical_novelty": "1;1;3", "wc_summary_paper": "77;50;124", "wc_strength_and_weaknesses": "289;244;737", "wc_clarity_quality_novelty_and_reproducibility": "155;30;60", "wc_summary_review": "158;36;44", "wc_review": "679;360;965", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;379", "reply_reviewers": "0;0;0", "reply_authors": "0;0;1", "recommendation_avg": [ 3.0, 1.632993161855452 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 1.3333333333333333, 0.4714045207910317 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.9428090415820634 ], "wc_summary_paper_avg": [ 83.66666666666667, 30.57595278791634 ], "wc_strength_and_weaknesses_avg": [ 423.3333333333333, 222.55536140225627 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 81.66666666666667, 53.28122454381927 ], "wc_summary_review_avg": [ 79.33333333333333, 55.7215298505783 ], "wc_review_avg": [ 668.0, 247.11265986724894 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 126.33333333333333, 178.66231337980102 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.8660254037844387, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16617312678791717557&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Saarland University;CISPA Helmholtz Center for Information Security", "aff_unique_dep": "CISPA;", "aff_unique_url": "https://www.uni-saarland.de;https://www.cispa.de/", "aff_unique_abbr": "Saarland U;CISPA", "aff_campus_unique_index": "0", "aff_campus_unique": "Saarland Informatics Campus;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "3wCqIZivcJx", "title": "Quark: A Gradient-Free Quantum Learning Framework for Classification Tasks", "track": "main", "status": "Reject", "tldr": "A new quantum learning framework for classification task", "abstract": "As more practical and scalable quantum computers emerge, much attention has been focused on realizing quantum supremacy in machine learning. Existing quantum ML methods either (1) embed a classical model into a target Hamiltonian to enable quantum optimization or (2) represent a quantum model using variational quantum circuits and apply classical gradient-based optimization. The former method leverages the power of quantum optimization but only supports simple ML models, while the latter provides flexibility in model design but relies on gradient calculation, resulting in barren plateau (i.e., gradient vanishing) and frequent classical-quantum interactions. To address the limitations of existing quantum ML methods, we introduce Quark, a gradient-free quantum learning framework that optimizes quantum ML models using quantum optimization. Quark does not rely on gradient computation and therefore avoids barren plateau and frequent classical-quantum interactions. In addition, Quark can support more general ML models than prior quantum ML methods and achieves a dataset-size-independent optimization complexity. Theoretically, we prove that Quark can outperform classical gradient-based methods by reducing model query complexity for highly non-convex problems; empirically, evaluations on the Edge Detection and Tiny-MNIST tasks show that Quark can support complex ML models and significantly reduce the number of measurements needed for discovering near-optimal weights for these tasks.", "keywords": "Quantum Computing;Deep Learning;Quantum Machine Learning", "primary_area": "", "supplementary_material": "/attachment/704c0d6aa85eec14ac9882486672ca11b6f63654.zip", "author": "Zhihao Zhang;Zhuoming Chen;Heyang Huang;Zhihao Jia", "authorids": "~Zhihao_Zhang2;~Zhuoming_Chen1;~Heyang_Huang1;~Zhihao_Jia1", "gender": ";M;M;M", "homepage": ";;;https://www.cs.cmu.edu/~zhihaoj2/", "dblp": "91/5464;226/5729;;", "google_scholar": "https://scholar.google.com/citations?hl=en;4Bb5KRYAAAAJ;;", "orcid": ";;;", "linkedin": ";zhuoming-chen-325075234/;heyang-huang-0176bb155/;", "or_profile": "~Zhihao_Zhang2;~Zhuoming_Chen1;~Heyang_Huang1;~Zhihao_Jia1", "aff": "Carnegie Mellon University;Tsinghua University;;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;tsinghua.edu.cn;;cmu.edu", "position": "PhD student;Undergrad student;;Assistant Professor", "bibtex": "@misc{\nzhang2023quark,\ntitle={Quark: A Gradient-Free Quantum Learning Framework for Classification Tasks},\nauthor={Zhihao Zhang and Zhuoming Chen and Heyang Huang and Zhihao Jia},\nyear={2023},\nurl={https://openreview.net/forum?id=3wCqIZivcJx}\n}", "github": "", "project": "", "reviewers": "YQCZ;5Kqc;Wci6;o76V", "site": "https://openreview.net/forum?id=3wCqIZivcJx", "pdf_size": 7465556, "recommendation": "1;3;3;6", "confidence": "4;4;5;2", "correctness": "2;3;3;4", "technical_novelty": "1;3;2;3", "empirical_novelty": "2;2;1;3", "wc_summary_paper": "25;75;42;118", "wc_strength_and_weaknesses": "190;668;45;187", "wc_clarity_quality_novelty_and_reproducibility": "290;13;9;31", "wc_summary_review": "37;105;13;78", "wc_review": "542;861;109;414", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "364;677;261;166", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 3.25, 1.7853571071357126 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 65.0, 35.48943504763072 ], "wc_strength_and_weaknesses_avg": [ 272.5, 235.73979299218874 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 85.75, 118.21458243380975 ], "wc_summary_review_avg": [ 58.25, 35.61863978312479 ], "wc_review_avg": [ 481.5, 269.71883508572404 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 367.0, 192.18870934578857 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.7388664511337208, "corr_recommendation_correctness": 0.9901475429766743, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13093190883888668632&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Carnegie Mellon University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "CMU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "id": "3yEIFSMwKBC", "title": "AutoMoE: Neural Architecture Search for Efficient Sparsely Activated Transformers", "track": "main", "status": "Reject", "tldr": "AutoMoE: a flexible Neural Architecture Search framework to design efficient sparse models under latency constraints.", "abstract": "Neural architecture search (NAS) has demonstrated promising results on identifying efficient Transformer architectures which outperform manually designed ones for natural language tasks like neural machine translation (NMT). Existing NAS methods operate on a space of dense architectures, where all of the sub-architecture weights are activated for every input. Motivated by the recent advances in sparsely activated models like the Mixture-of-Experts (MoE) model, we introduce sparse architectures with conditional computation into the NAS search space. Given this expressive search space which subsumes prior densely activated architectures, we develop a new framework AutoMoE to search for efficient sparsely activated sub-Transformers. AutoMoE sparse models obtain (i) 3x FLOPs reduction over manually designed dense Transformers and (ii) 23% FLOPs reduction over state-of-the-art NAS-generated dense sub-Transformers with parity in BLEU score on benchmark datasets for NMT. AutoMoE consists of three training phases: (a) Heterogeneous search space design with dense and sparsely activated Transformer modules (e.g., how many experts? where to place them? what should be their sizes?}; (b) SuperNet training that jointly trains several subnetworks sampled from the large search space by weight-sharing; (c) Evolutionary search for the architecture with the optimal trade-off between task performance and computational constraint like FLOPs and latency.", "keywords": "Mixture-of-expert models;Neural architecture search;Efficiency", "primary_area": "", "supplementary_material": "/attachment/4cce02cf984aaba3a5451b54718e3f65079d1a24.zip", "author": "Ganesh Jawahar;Subhabrata Mukherjee;Xiaodong Liu;Young Jin Kim;Muhammad Abdul-Mageed;Laks V. S. Lakshmanan;Ahmed Hassan Awadallah;Sebastien Bubeck;Jianfeng Gao", "authorids": "~Ganesh_Jawahar1;~Subhabrata_Mukherjee2;~Xiaodong_Liu1;~Young_Jin_Kim1;~Muhammad_Abdul-Mageed2;~Laks_V._S._Lakshmanan1;~Ahmed_Hassan_Awadallah1;~Sebastien_Bubeck1;~Jianfeng_Gao1", "gender": "M;;;M;;;M;;M", "homepage": "https://ganeshjawahar.github.io/;https://subhomukherjee.com/;;https://www.microsoft.com/en-us/research/people/youki/;;https://www.cs.ubc.ca/~laks;https://www.microsoft.com/en-us/research/people/hassanam/publications/;http://sbubeck.com/;https://www.microsoft.com/en-us/research/people/jfgao/", "dblp": "203/9710;37/11030.html;65/622;00/8110-1.html;;l/LVSLakshmanan;147/9148;35/4292;92/5339", "google_scholar": "https://scholar.google.co.in/citations?user=X7SMP1EAAAAJ;T4iBN5cAAAAJ;NIewcxMAAAAJ;;;https://scholar.google.ca/citations?user=_RCsaOsAAAAJ;sNGk-9MAAAAJ;V2Y1L4sAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;0000-0002-9775-4241;;;", "linkedin": "https://in.linkedin.com/in/ganesh-jawahar-ab928435;subho87;;ykim362/;;laksvslakshmanan/;ahmed-hassan-awadallah-a355a27/;;", "or_profile": "~Ganesh_Jawahar1;~Subhabrata_Mukherjee2;~Xiaodong_Liu1;~Young_Jin_Kim1;~Muhammad_Abdul-Mageed2;~Laks_V._S._Lakshmanan1;~Ahmed_Hassan_Awadallah1;~Sebastien_Bubeck1;~Jianfeng_Gao1", "aff": "University of British Columbia;Microsoft;Microsoft Research;Microsoft;;University of British Columbia;Microsoft Research;Microsoft;Microsoft Research", "aff_domain": "ubc.ca;microsoft.com;microsoft.com;microsoft.com;;ubc.ca;microsoft.com;microsoft.com;microsoft.com", "position": "PhD student;Principal Researcher;Researcher;Principal Researcher;;Professor;Principal Researcher;Researcher;Principal Researcher", "bibtex": "@misc{\njawahar2023automoe,\ntitle={AutoMoE: Neural Architecture Search for Efficient Sparsely Activated Transformers},\nauthor={Ganesh Jawahar and Subhabrata Mukherjee and Xiaodong Liu and Young Jin Kim and Muhammad Abdul-Mageed and Laks V. S. Lakshmanan and Ahmed Hassan Awadallah and Sebastien Bubeck and Jianfeng Gao},\nyear={2023},\nurl={https://openreview.net/forum?id=3yEIFSMwKBC}\n}", "github": "", "project": "", "reviewers": "vutH;CMSP;Yxc3", "site": "https://openreview.net/forum?id=3yEIFSMwKBC", "pdf_size": 682897, "recommendation": "3;5;5", "confidence": "5;4;4", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "58;46;70", "wc_strength_and_weaknesses": "239;92;229", "wc_clarity_quality_novelty_and_reproducibility": "12;10;10", "wc_summary_review": "47;31;27", "wc_review": "356;179;336", "wc_reply_reviewers": "0;0;169", "wc_reply_authors": "779;598;551", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 58.0, 9.797958971132712 ], "wc_strength_and_weaknesses_avg": [ 186.66666666666666, 67.06381703687582 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 10.666666666666666, 0.9428090415820634 ], "wc_summary_review_avg": [ 35.0, 8.640987597877148 ], "wc_review_avg": [ 290.3333333333333, 79.14683962245253 ], "wc_reply_reviewers_avg": [ 56.333333333333336, 79.66736401368435 ], "wc_reply_authors_avg": [ 642.6666666666666, 98.29321215402189 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.9999999999999997, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17865414431413444487&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;0;1;1;1", "aff_unique_norm": "University of British Columbia;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.ubc.ca;https://www.microsoft.com", "aff_unique_abbr": "UBC;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0;1;1;1", "aff_country_unique": "Canada;United States" }, { "title": "Adaptive Robust Evidential Optimization For Open Set Detection from Imbalanced Data", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11736", "id": "3yJ-hcJBqe", "poster": "/media/PosterPDFs/ICLR%202023/11736.png?t=1682859684.5095131", "openreview": "https://openreview.net/forum?id=3yJ-hcJBqe", "slides": "https://iclr.cc/virtual/2023/poster/11736", "video": "https://iclr.cc/virtual/2023/poster/11736", "author_site": "Hitesh Sapkota, Qi Yu", "tldr": "We propose adaptive robust uncertainty mass quantification for effective open set detection from imbalanced data. ", "abstract": "Open set detection (OSD) aims at identifying data samples of an unknown class ($i.e.$, open set) from those of known classes ($i.e.$, closed set) based on a model trained from closed set samples. However, a closed set may involve a highly imbalanced class distribution. Accurately differentiating open set samples and those from a minority class in the closed set poses a fundamental challenge as the model may be equally uncertain when recognizing samples from the minority class. In this paper, we propose Adaptive Robust Evidential Optimization (AREO) that offers a principled way to quantify sample uncertainty through evidential learning while optimally balancing the model training over all classes in the closed set through adaptive distributively robust optimization (DRO). To avoid the model to primarily focus on the most difficult samples by following the standard DRO, adaptive DRO training is performed, which is governed by a novel multi-scheduler learning mechanism to ensure an optimal model training behavior that gives sufficient attention to the difficult samples and the minority class while capable of learning common patterns from the majority classes. Our experimental results on multiple real-world datasets demonstrate that the proposed model outputs uncertainty scores that can clearly separate samples from closed and open sets, respectively, and the detection results outperform the competitive baselines. ", "keywords": "Open Set Detection;Imbalanced Data", "primary_area": "", "supplementary_material": "", "author": "Hitesh Sapkota;Qi Yu", "authorids": "~Hitesh_Sapkota1;~Qi_Yu1", "gender": "M;M", "homepage": "https://hiteshsapkota.github.io/;https://www.rit.edu/mining/", "dblp": "251/4284;58/6957-1", "google_scholar": "0FKsBXYAAAAJ;L3gWdfEAAAAJ", "orcid": ";0000-0002-0426-5407", "linkedin": "hitesh-sapkota-2226051ba/;", "or_profile": "~Hitesh_Sapkota1;~Qi_Yu1", "aff": "Rochester Institute of Technology;Rochester Institute of Technology", "aff_domain": "rit.edu;rit.edu", "position": "PhD student;Professor", "bibtex": "@inproceedings{\nsapkota2023adaptive,\ntitle={Adaptive Robust Evidential Optimization For Open Set Detection from Imbalanced Data},\nauthor={Hitesh Sapkota and Qi Yu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3yJ-hcJBqe}\n}", "github": "", "project": "", "reviewers": "c1ud;gFy7;FxMN;HU7D", "pdf_size": 689100, "recommendation": "6;6;6;6", "confidence": "5;4;4;2", "correctness": "2;3;4;3", "technical_novelty": "3;3;3;2", "empirical_novelty": "3;3;3;2", "wc_summary_paper": "62;148;130;223", "wc_strength_and_weaknesses": "103;225;338;51", "wc_clarity_quality_novelty_and_reproducibility": "256;178;47;45", "wc_summary_review": "131;283;64;32", "wc_review": "552;834;579;351", "wc_reply_reviewers": "0;0;0;285", "wc_reply_authors": "203;764;614;1377", "reply_reviewers": "0;0;0;4", "reply_authors": "1;1;1;5", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 140.75, 57.30346848141044 ], "wc_strength_and_weaknesses_avg": [ 179.25, 111.30672711026949 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 131.5, 89.84013579686976 ], "wc_summary_review_avg": [ 127.5, 96.62427231291318 ], "wc_review_avg": [ 579.0, 171.5648565411926 ], "wc_reply_reviewers_avg": [ 71.25, 123.40862003928251 ], "wc_reply_authors_avg": [ 739.5, 421.4822060301004 ], "reply_reviewers_avg": [ 1.0, 1.7320508075688772 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15053056017543254422&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=3yJ-hcJBqe", "email": "rit.edu;rit.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Rochester Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.rit.edu", "aff_unique_abbr": "RIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "3z1Ws6GEYV4", "title": "Multi-Objective GFlowNets", "track": "main", "status": "Reject", "tldr": "We generate diverse Pareto-optimal candidates for high-dimensional multi-objective optimization problems with GFlowNets. ", "abstract": "In many applications of machine learning, like drug discovery and material design, the goal is to generate candidates that simultaneously maximize a set of objectives. As these objectives are often conflicting, there is no single candidate that simultaneously maximizes all objectives, but rather a set of Pareto-optimal candidates where one objective cannot be improved without worsening another. Moreover, these objectives, when considered in practice are often under-specified, making diversity of candidates a key consideration. The existing multi-objective optimization methods focus predominantly on covering the Pareto front, failing the capture diversity in the space of candidates. Motivated by the success of GFlowNets for generation of diverse candidates in a single objective setting, in this paper we consider Multi-Objective GFlowNets (MOGFNs). MOGFNs consist of a Conditional GFlowNet which models a family of single-objective sub-problems derived by decomposing the multi-objective optimization problem. Our work is the first to empirically demonstrate conditional GFlowNets. Through a series of experiments on synthetic tasks and real-world domains, we empirically demonstrate that MOGFNs outperform existing methods in terms of Hypervolume, R2-distance and candidate diversity. We also demonstrate the effectiveness of MOGFNs over existing methods in active learning settings. Finally, we supplement our empirical results with a careful analysis of each component of MOGFNs.", "keywords": "generative flow networks;multi-objective optimization;drug discovery;material design", "primary_area": "", "supplementary_material": "/attachment/655a590585a2cae02dc4feeb911f1102e7baa79a.zip", "author": "Moksh Jain;Sharath Chandra Raparthy;Alex Hern\u00e1ndez-Garc\u00eda;Jarrid Rector-Brooks;Yoshua Bengio;Santiago Miret;Emmanuel Bengio", "authorids": "~Moksh_Jain1;~Sharath_Chandra_Raparthy3;~Alex_Hern\u00e1ndez-Garc\u00eda1;~Jarrid_Rector-Brooks2;~Yoshua_Bengio1;~Santiago_Miret1;~Emmanuel_Bengio1", "gender": "M;M;;M;M;M;M", "homepage": "https://mj10.github.io;https://sharathraparthy.github.io/;https://alexhernandezgarcia.github.io;;http://yoshuabengio.org;https://www.intel.ai/bio/santiago-miret/;http://folinoid.com", "dblp": "249/9368;302/4190;213/8573;230/4010;56/953;241/5030;137/8040", "google_scholar": "TD07G_wAAAAJ;https://scholar.google.ca/citations?user=S1R0_UMAAAAJ;f8vQCOAAAAAJ;gxRPZh4AAAAJ;kukA0LcAAAAJ;HLQ_te4AAAAJ;https://scholar.google.ca/citations?user=yVtSOt8AAAAJ", "orcid": ";;;;;0000-0002-5121-3853;", "linkedin": ";;;;yoshuabengio/?originalSubdomain=ca;santiago-miret/;", "or_profile": "~Moksh_Jain1;~Sharath_Chandra_Raparthy3;~Alex_Hern\u00e1ndez-Garc\u00eda1;~Jarrid_Rector-Brooks2;~Yoshua_Bengio1;~Santiago_Miret1;~Emmanuel_Bengio1", "aff": "Universit\u00e9 de Montr\u00e9al;Meta Facebook;Universit\u00e9 de Montr\u00e9al;Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal;University of Montreal;Intel;Valence Labs powered by recursion", "aff_domain": "umontreal.ca;fb.com;umontreal.ca;mila.umontreal.ca;umontreal.ca;intel.com;valencelabs.com", "position": "MS student;Researcher;Postdoc;PhD student;Full Professor;Researcher;Researcher", "bibtex": "@misc{\njain2023multiobjective,\ntitle={Multi-Objective {GF}lowNets},\nauthor={Moksh Jain and Sharath Chandra Raparthy and Alex Hern{\\'a}ndez-Garc{\\'\\i}a and Jarrid Rector-Brooks and Yoshua Bengio and Santiago Miret and Emmanuel Bengio},\nyear={2023},\nurl={https://openreview.net/forum?id=3z1Ws6GEYV4}\n}", "github": "", "project": "", "reviewers": "Qeid;MZeU;sp4N", "site": "https://openreview.net/forum?id=3z1Ws6GEYV4", "pdf_size": 1057952, "recommendation": "3;3;8", "confidence": "3;4;2", "correctness": "2;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "31;33;63", "wc_strength_and_weaknesses": "134;121;202", "wc_clarity_quality_novelty_and_reproducibility": "22;56;55", "wc_summary_review": "24;30;31", "wc_review": "211;240;351", "wc_reply_reviewers": "0;0;66", "wc_reply_authors": "821;1017;717", "reply_reviewers": "0;0;1", "reply_authors": "3;4;3", "recommendation_avg": [ 4.666666666666667, 2.357022603955158 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 42.333333333333336, 14.636332266733433 ], "wc_strength_and_weaknesses_avg": [ 152.33333333333334, 35.51838334659329 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.333333333333336, 15.797327481430381 ], "wc_summary_review_avg": [ 28.333333333333332, 3.0912061651652345 ], "wc_review_avg": [ 267.3333333333333, 60.33425413661979 ], "wc_reply_reviewers_avg": [ 22.0, 31.11269837220809 ], "wc_reply_authors_avg": [ 851.6666666666666, 124.37934805353429 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": 0.5000000000000001, "gs_citation": 84, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2501387690786223303&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0;2;2;3;4", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;Meta;University of Montreal;Intel;Valence Labs", "aff_unique_dep": ";Meta Platforms, Inc.;Montreal Institute for Learning Algorithms;Intel Corporation;", "aff_unique_url": "https://www.umontreal.ca;https://meta.com;https://www.umontreal.ca;https://www.intel.com;", "aff_unique_abbr": "UdeM;Meta;UM;Intel;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;1;0;0;0;1", "aff_country_unique": "Canada;United States;" }, { "title": "What shapes the loss landscape of self supervised learning?", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12144", "id": "3zSn48RUO8M", "poster": "", "openreview": "https://openreview.net/forum?id=3zSn48RUO8M", "slides": "https://iclr.cc/virtual/2023/poster/12144", "video": "https://iclr.cc/virtual/2023/poster/12144", "author_site": "Liu Ziyin, Ekdeep Singh Lubana, Masahito Ueda, Hidenori Tanaka", "tldr": "We analytically solve the loss landscape of self-supervised learning and identify the causes of complete and dimensional collapse", "abstract": "Prevention of complete and dimensional collapse of representations has recently become a design principle for self-supervised learning (SSL). However, questions remain in our theoretical understanding: When do those collapses occur? What are the mechanisms and causes? We answer these questions by deriving and thoroughly analyzing an analytically tractable theory of SSL loss landscapes. In this theory, we identify the causes of the dimensional collapse and study the effect of normalization and bias. Finally, we leverage the interpretability afforded by the analytical theory to understand how dimensional collapse can be beneficial and what affects the robustness of SSL against data imbalance.", "keywords": "loss landscape;self-supervised learning;collapse", "primary_area": "", "supplementary_material": "/attachment/685839045185e54cf5ed5ea89ae3c5635bb8f5cb.zip", "author": "Liu Ziyin;Ekdeep Singh Lubana;Masahito Ueda;Hidenori Tanaka", "authorids": "~Liu_Ziyin1;~Ekdeep_Singh_Lubana1;~Masahito_Ueda1;~Hidenori_Tanaka1", "gender": ";M;M;", "homepage": "https://www.mit.edu/~ziyinl/;https://ekdeepslubana.github.io/;http://cat.phys.s.u-tokyo.ac.jp/index-e.html;https://sites.google.com/view/htanaka/home", "dblp": ";228/2683;;", "google_scholar": "NpN9oRMAAAAJ;https://scholar.google.co.in/citations?user=OP7S3vsAAAAJ;https://scholar.google.co.jp/citations?user=Xpjx9CwAAAAJ;f_pWOGIAAAAJ", "orcid": ";;0000-0002-5367-1436;", "linkedin": ";;;", "or_profile": "~Liu_Ziyin1;~Ekdeep_Singh_Lubana1;~Masahito_Ueda1;~Hidenori_Tanaka1", "aff": "The University of Tokyo;University of Michigan;The University of Tokyo;Physics & Informatics Lab, NTT Research, Inc.", "aff_domain": "u-tokyo.ac.jp;umich.edu;u-tokyo.ac.jp;ntt-research.com", "position": "PhD student;PhD student;Full Professor;Senior Research Scientist", "bibtex": "@inproceedings{\nziyin2023what,\ntitle={What shapes the loss landscape of self supervised learning?},\nauthor={Liu Ziyin and Ekdeep Singh Lubana and Masahito Ueda and Hidenori Tanaka},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=3zSn48RUO8M}\n}", "github": "", "project": "", "reviewers": "Q5CS;m5m1;ffca", "pdf_size": 4558447, "recommendation": "6;6;6", "confidence": "4;3;4", "correctness": "3;4;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "11;179;118", "wc_strength_and_weaknesses": "177;179;117", "wc_clarity_quality_novelty_and_reproducibility": "50;29;68", "wc_summary_review": "21;18;82", "wc_review": "259;405;385", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "419;631;493", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 102.66666666666667, 69.43742186714276 ], "wc_strength_and_weaknesses_avg": [ 157.66666666666666, 28.767265347188555 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.0, 15.937377450509228 ], "wc_summary_review_avg": [ 40.333333333333336, 29.48822740612863 ], "wc_review_avg": [ 349.6666666666667, 64.62885492478486 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 514.3333333333334, 87.85341326449543 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3414242301022038555&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=3zSn48RUO8M", "email": "u-tokyo.ac.jp;umich.edu;u-tokyo.ac.jp;ntt-research.com", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Tokyo;University of Michigan;NTT Research, Inc.", "aff_unique_dep": ";;Physics & Informatics Lab", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.umich.edu;https://www.ntt-research.com", "aff_unique_abbr": "UTokyo;UM;NTT Research", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Japan;United States" }, { "id": "4-aEhZnvNnk", "title": "Neural Embeddings for Text", "track": "main", "status": "Reject", "tldr": "We propose a new kind of embedding for natural language text that deeply represents semantic meaning.", "abstract": "We propose a new kind of embedding for natural language text that deeply represents semantic meaning. Standard text embeddings use the vector output of a pretrained language model. In our method, we let a language model learn from the text and then literally pick its brain, taking the actual weights of the model's neurons to generate a vector. We call this representation of the text a neural embedding. With analysis of its behavior on several datasets, we confirm the ability of this representation to reflect semantics of the text. We also compare neural embeddings with GPT sentence (SGPT) embeddings. We observe that neural embeddings achieve comparable performance with a far smaller model, and that the embeddings respond to semantics differently.", "keywords": "text embedding;semantic embedding;neural embedding;neural text representation", "primary_area": "", "supplementary_material": "/attachment/cb94f48425e0c522ae5806774e0b289e4b447c75.zip", "author": "Oleg Vasilyev;John Bohannon", "authorids": "~Oleg_Vasilyev1;~John_Bohannon1", "gender": "M;", "homepage": ";https://johnbohannon.org", "dblp": ";", "google_scholar": "OcWfPcoAAAAJ;", "orcid": "0000-0002-4735-6106;", "linkedin": "olegvasilyev/;", "or_profile": "~Oleg_Vasilyev1;~John_Bohannon1", "aff": "Primer Technologies;", "aff_domain": "primer.ai;", "position": "Staff Research Scientist;", "bibtex": "@misc{\nvasilyev2023neural,\ntitle={Neural Embeddings for Text},\nauthor={Oleg Vasilyev and John Bohannon},\nyear={2023},\nurl={https://openreview.net/forum?id=4-aEhZnvNnk}\n}", "github": "", "project": "", "reviewers": "JM7i;gPmr;SjTe;fZyy", "site": "https://openreview.net/forum?id=4-aEhZnvNnk", "pdf_size": 438710, "recommendation": "3;3;3;5", "confidence": "5;4;5;4", "correctness": "2;2;3;3", "technical_novelty": "2;4;1;2", "empirical_novelty": "2;4;1;2", "wc_summary_paper": "104;97;68;79", "wc_strength_and_weaknesses": "160;234;164;98", "wc_clarity_quality_novelty_and_reproducibility": "34;280;45;101", "wc_summary_review": "28;64;57;95", "wc_review": "326;675;334;373", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 1.0897247358851685 ], "empirical_novelty_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_paper_avg": [ 87.0, 14.265342617687105 ], "wc_strength_and_weaknesses_avg": [ 164.0, 48.14561246884289 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 115.0, 98.59259607090179 ], "wc_summary_review_avg": [ 61.0, 23.822258499143192 ], "wc_review_avg": [ 427.0, 144.28270859669914 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12370952214480241338&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "Primer Technologies", "aff_unique_dep": "", "aff_unique_url": "https://www.primer.ai", "aff_unique_abbr": "", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Offline Q-learning on Diverse Multi-Task Data Both Scales And Generalizes", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10845", "id": "4-k7kUavAj", "poster": "", "openreview": "https://openreview.net/forum?id=4-k7kUavAj", "slides": "https://iclr.cc/virtual/2023/poster/10845", "video": "https://iclr.cc/virtual/2023/poster/10845", "author_site": "Aviral Kumar, Rishabh Agarwal, Xinyang Geng, George Tucker, Sergey Levine", "tldr": "", "abstract": "The potential of offline reinforcement learning (RL) is that high-capacity models trained on large, heterogeneous datasets can lead to agents that generalize broadly, analogously to similar advances in vision and NLP. However, recent works argue that offline RL methods encounter unique challenges to scaling up model capacity. Drawing on the learnings from these works, we re-examine previous design choices and find that with appropriate choices: ResNets, cross-entropy based distributional backups, and feature normalization, offline Q-learning algorithms exhibit strong performance that scales with model capacity. Using multi-task Atari as a testbed for scaling and generalization, we train a single policy on 40 games with near-human performance using up-to 80 million parameter networks, finding that model performance scales favorably with capacity. In contrast to prior work, we extrapolate beyond dataset performance even when trained entirely on a large (400M transitions) but highly suboptimal dataset (51% human-level performance). Compared to return-conditioned supervised approaches, offline Q-learning scales similarly with model capacity and has better performance, especially when the dataset is suboptimal. Finally, we show that offline Q-learning with a diverse dataset is sufficient to learn powerful representations that facilitate rapid transfer to novel games and fast online learning on new variations of a training game, improving over existing state-of-the-art representation learning approaches.", "keywords": "offline RL;multi-task Atari;large models", "primary_area": "", "supplementary_material": "/attachment/99d459af38db89b1441b838ab9ef01f3bb8bea5c.zip", "author": "Aviral Kumar;Rishabh Agarwal;Xinyang Geng;George Tucker;Sergey Levine", "authorids": "~Aviral_Kumar2;~Rishabh_Agarwal2;~Xinyang_Geng1;~George_Tucker1;~Sergey_Levine1", "gender": "M;M;M;M;M", "homepage": "https://aviralkumar2907.github.io/;https://agarwl.github.io;http://young-geng.xyz/;https://sites.google.com/view/gjt;https://people.eecs.berkeley.edu/~svlevine/", "dblp": "202/7961;;186/8221;135/5748;80/7594", "google_scholar": ";https://scholar.google.ca/citations?user=aH8AJu4AAAAJ;vYougn0AAAAJ;-gJkPHIAAAAJ;8R35rCwAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Aviral_Kumar2;~Rishabh_Agarwal2;~Xinyang_Geng1;~George_Tucker1;~Sergey_Levine1", "aff": "University of California, Berkeley;Google DeepMind;University of California, Berkeley;Google Brain;Google", "aff_domain": "berkeley.edu;google.com;berkeley.edu;google.com;google.com", "position": "PhD student;Research Scientist;PhD student;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nkumar2023offline,\ntitle={Offline Q-learning on Diverse Multi-Task Data Both Scales And Generalizes},\nauthor={Aviral Kumar and Rishabh Agarwal and Xinyang Geng and George Tucker and Sergey Levine},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4-k7kUavAj}\n}", "github": "", "project": "", "reviewers": "oQ8T;k6mY;YtKE;PcZp", "pdf_size": 7075793, "recommendation": "6;8;8;10", "confidence": "3;5;5;3", "correctness": "3;4;4;4", "technical_novelty": "3;3;3;4", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "228;82;23;103", "wc_strength_and_weaknesses": "290;190;287;116", "wc_clarity_quality_novelty_and_reproducibility": "2;56;21;73", "wc_summary_review": "50;76;105;44", "wc_review": "570;404;436;336", "wc_reply_reviewers": "19;64;56;0", "wc_reply_authors": "381;468;777;177", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "recommendation_avg": [ 8.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 1.0 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 109.0, 74.70274426016758 ], "wc_strength_and_weaknesses_avg": [ 220.75, 72.63392802265344 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.0, 27.99107000455681 ], "wc_summary_review_avg": [ 68.75, 24.138920854089562 ], "wc_review_avg": [ 436.5, 85.11609718496261 ], "wc_reply_reviewers_avg": [ 34.75, 26.280934153869037 ], "wc_reply_authors_avg": [ 450.75, 215.95181754271022 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3880526403192103650&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=4-k7kUavAj", "email": "berkeley.edu;google.com;berkeley.edu;google.com;google.com", "author_num": 5, "aff_unique_index": "0;1;0;1;1", "aff_unique_norm": "University of California, Berkeley;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.berkeley.edu;https://deepmind.com", "aff_unique_abbr": "UC Berkeley;DeepMind", "aff_campus_unique_index": "0;0;2;2", "aff_campus_unique": "Berkeley;;Mountain View", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "4-oNRO0Fqy", "title": "Model ChangeLists: Characterizing Changes in ML Prediction APIs", "track": "main", "status": "Reject", "tldr": "In this work, we study MLaaS API updates. We introduce, Mocha, a new framework for describing model updates. Then we use Mocha to demonstrate how subtle, but significant, shifts are commonly introduced by updates.", "abstract": "Updates to Machine Learning as a Service (MLaaS) APIs may affect downstream systems that depend on their predictions. However, performance changes introduced by these updates are poorly documented by providers and seldom studied in the literature. As a result, users are left wondering: do model updates introduce subtle performance changes that could adversely affect my system? Ideally, users would have access to a detailed ChangeList specifying the slices of data where model performance has improved and degraded since the update. But, producing a ChangeList is challenging because it requires (1) discovering slices in the absence of detailed annotations or metadata, (2) accurately attributing coherent concepts to the discovered slices, and (3) communicating them to the user in a digestable manner. We introduce Mocha, an interactive framework for building, verifying and releasing ChangeLists that addresses these challenges. Using it, we perform a large-scale analysis of three real-world MLaaS API updates. We produce a ChangeList for each, identifying over 100 coherent data slices on which the model\u2019s performance changed significantly. Notably, we find 63 instances where an update improves performance globally, but hurts performance on a coherent slice \u2013 a phenomenon not previously documented at scale in the literature. These findings underscore the importance of producing a detailed ChangeList when the model behind an API is updated.", "keywords": "model evaluation;model comparison;machine learning as a service;robustness", "primary_area": "", "supplementary_material": "", "author": "Sabri Eyuboglu;Karan Goel;Arjun D Desai;Lingjiao Chen;Mathew Monfort;Matei Zaharia;Christopher Re;James Zou", "authorids": "~Sabri_Eyuboglu1;~Karan_Goel1;~Arjun_D_Desai1;~Lingjiao_Chen1;~Mathew_Monfort1;~Matei_Zaharia1;~Christopher_Re1;~James_Zou1", "gender": ";M;;;M;M;;", "homepage": "http://www.sabrieyuboglu.com/;http://krandiash.github.io;;;http://people.csail.mit.edu/mmonfort/;https://cs.stanford.edu/~matei/;;", "dblp": "298/7563;175/1290;;131/6638.html;160/9991;36/2133;;", "google_scholar": ";;;;02FGPmwAAAAJ;I1EvjZsAAAAJ;;23ZXZvEAAAAJ", "orcid": ";;;;0000-0001-6373-5520;0000-0002-7547-7204;;", "linkedin": ";;;;mathew-monfort-56953748;mateizaharia/;;", "or_profile": "~Sabri_Eyuboglu1;~Karan_Goel1;~Arjun_D_Desai1;~Lingjiao_Chen1;~Mathew_Monfort1;~Matei_Zaharia1;~Christopher_Re1;~James_Zou1", "aff": "Stanford University;Stanford University;;Stanford University;Amazon;Stanford University;;Stanford University", "aff_domain": "stanford.edu;stanford.edu;;stanford.edu;amazon.com;stanford.edu;;stanford.edu", "position": "PhD student;PhD student;;PhD student;Applied Scientist;Associate Professor;;Assistant Professor", "bibtex": "@misc{\neyuboglu2023model,\ntitle={Model ChangeLists: Characterizing Changes in {ML} Prediction {API}s},\nauthor={Sabri Eyuboglu and Karan Goel and Arjun D Desai and Lingjiao Chen and Mathew Monfort and Matei Zaharia and Christopher Re and James Zou},\nyear={2023},\nurl={https://openreview.net/forum?id=4-oNRO0Fqy}\n}", "github": "", "project": "", "reviewers": "Q1WT;C4kp;NHrB;LdES", "site": "https://openreview.net/forum?id=4-oNRO0Fqy", "pdf_size": 4454226, "recommendation": "3;3;5;6", "confidence": "4;3;3;4", "correctness": "3;3;4;4", "technical_novelty": "1;2;2;4", "empirical_novelty": "1;2;3;3", "wc_summary_paper": "137;99;77;90", "wc_strength_and_weaknesses": "137;218;15;422", "wc_clarity_quality_novelty_and_reproducibility": "43;11;24;57", "wc_summary_review": "14;88;66;18", "wc_review": "331;416;182;587", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;133", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 1.0897247358851685 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 100.75, 22.342504335906483 ], "wc_strength_and_weaknesses_avg": [ 198.0, 148.14351150151666 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.75, 17.597940220378067 ], "wc_summary_review_avg": [ 46.5, 31.5079355083763 ], "wc_review_avg": [ 379.0, 146.41208966475412 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 33.25, 57.59068935166517 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.25, 0.4330127018922193 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.19245008972987526, "corr_recommendation_correctness": 0.9622504486493761, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hD7CX7qJJwEJ:scholar.google.com/&scioq=Model+ChangeLists:+Characterizing+Changes+in+ML+Prediction+APIs&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Stanford University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.stanford.edu;https://www.amazon.com", "aff_unique_abbr": "Stanford;Amazon", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "40Mw2GJnlZ", "title": "Fourier PINNs: From Strong Boundary Conditions to Adaptive Fourier Bases", "track": "main", "status": "Reject", "tldr": "", "abstract": "Interest in Physics-Informed Neural Networks (PINNs) is rising as a mesh-free alternative to traditional numerical solvers for partial differential equations (PDEs). While successful, PINNs often struggle to learn high-frequency and multi-scale target solutions\u2014which, according to prior analysis, might arise from competition during optimization between the weakly enforced boundary loss and residual loss terms. By creatively modifying the neural network architecture, some simple boundary conditions (BCs) can be satisfied exactly without jointly optimizing an additional loss term, thus avoiding the aforementioned competition altogether. Motivated by this analysis, we first study a strong BC version of PINNs for Dirichlet BCs and observe a consistent improvement compared to the standard PINNs. We conducted a Fourier analysis and found that strong BC PINNs can better learn the amplitudes of high-frequency components of the target solutions. While BC PINNs provide a promising improvement, constructing these unique architectures is an intricate process made difficult (if not impossible) by certain BCs and domain geometries. Enlightened by our analysis, we propose Fourier PINNs\u2014a simple, general, yet powerful method that augments PINNs with pre-specified, dense Fourier bases. Our proposed architecture likewise better learns high-frequency components but places no restrictions on the particular BCs. We developed an adaptive learning and basis selection algorithm based on alternating NN basis optimization, Fourier and NN basis coefficient estimations, and coefficient truncation. This schema can flexibly identify the significant frequencies while weakening the nominal to better capture the target solution's power spectrum. We show the advantage of our approach in learning high-frequency and multi-scale solutions in a set of systematic experiments. ", "keywords": "Physics Informed Machine Learning;Fourier Analysis;Scientific Machine Learning;Partial Differential Equations", "primary_area": "", "supplementary_material": "", "author": "Madison Cooley;Da Long;Robert Kirby;Shandian Zhe", "authorids": "~Madison_Cooley1;~Da_Long1;~Robert_Kirby1;~Shandian_Zhe1", "gender": ";M;;", "homepage": ";https://long-da.github.io/;;", "dblp": ";;;", "google_scholar": ";https://scholar.google.com/citations?hl=en;;", "orcid": ";;;", "linkedin": ";da-long-utah/;;", "or_profile": "~Madison_Cooley1;~Da_Long1;~Robert_Kirby1;~Shandian_Zhe1", "aff": ";The University of Utah;;", "aff_domain": ";umail.utah.edu;;", "position": ";PhD student;;", "bibtex": "@misc{\ncooley2023fourier,\ntitle={Fourier {PINN}s: From Strong Boundary Conditions to Adaptive Fourier Bases},\nauthor={Madison Cooley and Da Long and Robert Kirby and Shandian Zhe},\nyear={2023},\nurl={https://openreview.net/forum?id=40Mw2GJnlZ}\n}", "github": "", "project": "", "reviewers": "gWnw;9ZeR;nwfE", "site": "https://openreview.net/forum?id=40Mw2GJnlZ", "pdf_size": 469083, "recommendation": "3;3;5", "confidence": "3;4;3", "correctness": "3;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "63;78;54", "wc_strength_and_weaknesses": "211;236;118", "wc_clarity_quality_novelty_and_reproducibility": "43;9;13", "wc_summary_review": "23;58;36", "wc_review": "340;381;221", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 65.0, 9.899494936611665 ], "wc_strength_and_weaknesses_avg": [ 188.33333333333334, 50.769632218045025 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 21.666666666666668, 15.173075568988056 ], "wc_summary_review_avg": [ 39.0, 14.445299120013633 ], "wc_review_avg": [ 314.0, 67.85769423334885 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7456275304903810373&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "University of Utah", "aff_unique_dep": "", "aff_unique_url": "https://www.utah.edu", "aff_unique_abbr": "Utah", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "40RNVzSoCqD", "title": "FedDM: Iterative Distribution Matching for Communication-Efficient Federated Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Federated learning (FL) has recently attracted increasing attention from academia and industry, with the ultimate goal of achieving collaborative training under privacy and communication constraints. Existing iterative model averaging based FL algorithms require a large number of communication rounds to obtain a well-performed model due to extremely unbalanced and non-i.i.d data partitioning among different clients. Thus, we propose FedDM to build the global training objective from multiple local surrogate functions, which enables the server to gain a more global view of the loss landscape. In detail, we construct synthetic sets of data on each client to locally match the loss landscape from original data through distribution matching. FedDM reduces communication rounds and improves model quality by transmitting more informative and smaller synthesized data compared with unwieldy model weights. We conduct extensive experiments on three image classification datasets, and results show that our method can outperform other FL counterparts in terms of efficiency and model performance. Moreover, we demonstrate that FedDM can be adapted to preserve differential privacy with Gaussian mechanism and train a better model under the same privacy budget.", "keywords": "data distillation;federated learning", "primary_area": "", "supplementary_material": "/attachment/66bd95d1d98cb455d900104c6e4846be2478c4f8.zip", "author": "Yuanhao Xiong;Ruochen Wang;Minhao Cheng;Felix Yu;Cho-Jui Hsieh", "authorids": "~Yuanhao_Xiong1;~Ruochen_Wang2;~Minhao_Cheng1;~Felix_Yu1;~Cho-Jui_Hsieh1", "gender": "M;M;M;M;M", "homepage": "https://xyh97.github.io/;https://ruocwang.github.io/;https://cmhcbb.github.io/;http://felixyu.org;http://web.cs.ucla.edu/~chohsieh/index.html", "dblp": "232/1248;33/120;174/1717;23/10574;14/2770", "google_scholar": "DVKxiMkAAAAJ;8fXrlRAAAAAJ;_LkC1yoAAAAJ;lYvF6cUAAAAJ;Wy89g4IAAAAJ", "orcid": ";;0000-0003-3965-4215;;", "linkedin": ";ruochen-wang-1699b1113/;;;", "or_profile": "~Yuanhao_Xiong1;~Ruochen_Wang2;~Minhao_Cheng1;~Felix_Yu1;~Cho-Jui_Hsieh1", "aff": "University of California, Los Angeles;University of California, Los Angeles;Hong Kong University of Science and Technology;Google;Amazon", "aff_domain": "cs.ucla.edu;ucla.edu;ust.hk;google.com;amazon.com", "position": "PhD student;PhD student;Assistant Professor;Research Scientist;visiting scholar", "bibtex": "@misc{\nxiong2023feddm,\ntitle={Fed{DM}: Iterative Distribution Matching for Communication-Efficient Federated Learning},\nauthor={Yuanhao Xiong and Ruochen Wang and Minhao Cheng and Felix Yu and Cho-Jui Hsieh},\nyear={2023},\nurl={https://openreview.net/forum?id=40RNVzSoCqD}\n}", "github": "", "project": "", "reviewers": "nvFf;mFcP;aK1y", "site": "https://openreview.net/forum?id=40RNVzSoCqD", "pdf_size": 1512023, "recommendation": "3;5;5", "confidence": "4;4;3", "correctness": "2;3;2", "technical_novelty": "3;3;4", "empirical_novelty": "3;3;3", "wc_summary_paper": "53;75;102", "wc_strength_and_weaknesses": "284;179;27", "wc_clarity_quality_novelty_and_reproducibility": "201;16;20", "wc_summary_review": "25;36;298", "wc_review": "563;306;447", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "92;92;221", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 76.66666666666667, 20.038851153585515 ], "wc_strength_and_weaknesses_avg": [ 163.33333333333334, 105.50302786597591 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 79.0, 86.28248180637055 ], "wc_summary_review_avg": [ 119.66666666666667, 126.18064651742577 ], "wc_review_avg": [ 438.6666666666667, 105.08515065835367 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 135.0, 60.81118318204309 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 122, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6221166668601548172&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "University of California, Los Angeles;Hong Kong University of Science and Technology;Google;Amazon", "aff_unique_dep": ";;Google;Amazon.com, Inc.", "aff_unique_url": "https://www.ucla.edu;https://www.ust.hk;https://www.google.com;https://www.amazon.com", "aff_unique_abbr": "UCLA;HKUST;Google;Amazon", "aff_campus_unique_index": "0;0;1;2", "aff_campus_unique": "Los Angeles;Hong Kong SAR;Mountain View;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;China" }, { "id": "424tG_RaE-", "title": "Physics-empowered Molecular Representation Learning", "track": "main", "status": "Reject", "tldr": "We propose a Transformer-based molecular energy prediction model equipped with physical insights and self-supervised masked atomic modeling.", "abstract": "Estimating the energetic properties of molecular systems is a critical task in material design. With the trade-off between accuracy and computational cost, various methods have been used to predict the energy of materials, including recent neural-net-based models. However, most existing neural-net models are context-free (physics-ignoring) black-box models, limiting their applications to predict energy only within the distribution of the training set and thus preventing from being applied to the real practice of molecular design. Inspired by the physical mechanism of the interatomic potential, we propose a physics-driven energy prediction model using a Transformer. Our model is trained not only on the energy regression in the training set, but also with conditions inspired by physical insights and self-supervision based on Masked Atomic Modeling, making it adaptable to the optimization of molecular structure beyond the range observed during training, taking a step towards realizable molecular structure optimization.", "keywords": "Physics;Transformer;Molecular representation learning;ML potential", "primary_area": "", "supplementary_material": "/attachment/489b5135d86966acee43488de47aa419c76dfb97.zip", "author": "Seunghoon Yi;Youngwoo Cho;Jinhwan Sul;Seung Woo Ko;Soo Kyung Kim;Jaegul Choo;Hongkee Yoon;Joonseok Lee", "authorids": "~Seunghoon_Yi1;~Youngwoo_Cho1;~Jinhwan_Sul1;~Seung_Woo_Ko1;~Soo_Kyung_Kim1;~Jaegul_Choo1;~Hongkee_Yoon1;~Joonseok_Lee1", "gender": "M;M;M;M;F;M;M;M", "homepage": "https://github.com/Seunghoon-Yi;;;;https://sites.google.com/view/soo-kyung-kim/home?authuser=0;https://sites.google.com/site/jaegulchoo/;;http://www.joonseok.net", "dblp": "354/4204.html;276/6715;;;;07/2074;;77/1319.html", "google_scholar": "qIeCCG8AAAAJ;Ys4ejKUAAAAJ;;;ftHUysoAAAAJ;GHJYsLEAAAAJ;dE6DtRkAAAAJ;https://scholar.google.co.kr/citations?user=M-MfqpMAAAAJ", "orcid": ";0000-0001-6082-9468;;;;;;", "linkedin": ";youngwoo-cho;jinhwan-sul-347799250/;seung-woo-ko-1b9bb8137;soo-kyung-kim-a9758217/;;;joonseoklee", "or_profile": "~Seunghoon_Yi1;~Youngwoo_Cho1;~Jinhwan_Sul1;~Seung_Woo_Ko1;~Soo_Kyung_Kim1;~Jaegul_Choo1;~Hongkee_Yoon1;~Joonseok_Lee1", "aff": "Seoul National University;Korea Advanced Institute of Science & Technology;Georgia Institute of Technology;Seoul National University;Palo Alto Research Center;Korea Advanced Institute of Science & Technology;;Google Research", "aff_domain": "snu.ac.kr;kaist.ac.kr;gatech.edu;snu.ac.kr;parc.com;kaist.ac.kr;;google.com", "position": "MS student;PhD student;PhD student;MS student;Researcher;Associate Professor;;Research Scientist", "bibtex": "@misc{\nyi2023physicsempowered,\ntitle={Physics-empowered Molecular Representation Learning},\nauthor={Seunghoon Yi and Youngwoo Cho and Jinhwan Sul and Seung Woo Ko and Soo Kyung Kim and Jaegul Choo and Hongkee Yoon and Joonseok Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=424tG_RaE-}\n}", "github": "", "project": "", "reviewers": "x1A4;bSWh;wZ2H;jtDm", "site": "https://openreview.net/forum?id=424tG_RaE-", "pdf_size": 1612970, "recommendation": "3;5;5;6", "confidence": "4;3;3;3", "correctness": "3;3;4;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "101;50;88;101", "wc_strength_and_weaknesses": "268;126;189;65", "wc_clarity_quality_novelty_and_reproducibility": "24;11;51;19", "wc_summary_review": "66;26;56;62", "wc_review": "459;213;384;247", "wc_reply_reviewers": "0;0;0;41", "wc_reply_authors": "1279;1061;897;532", "reply_reviewers": "0;0;0;1", "reply_authors": "3;2;2;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 85.0, 20.89258241577618 ], "wc_strength_and_weaknesses_avg": [ 162.0, 75.28280016046162 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.25, 15.022899187573616 ], "wc_summary_review_avg": [ 52.5, 15.708278072405008 ], "wc_review_avg": [ 325.75, 100.07840676189845 ], "wc_reply_reviewers_avg": [ 10.25, 17.75352077758099 ], "wc_reply_authors_avg": [ 942.25, 272.8803538182989 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.9271726499455306, "corr_recommendation_correctness": 0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TRezvo73n58J:scholar.google.com/&scioq=Physics-empowered+Molecular+Representation+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;3;1;4", "aff_unique_norm": "Seoul National University;Korea Advanced Institute of Science and Technology;Georgia Institute of Technology;Palo Alto Research Center;Google", "aff_unique_dep": ";;;;Google Research", "aff_unique_url": "https://www.snu.ac.kr;https://www.kaist.ac.kr;https://www.gatech.edu;https://www.parc.com;https://research.google", "aff_unique_abbr": "SNU;KAIST;Georgia Tech;PARC;Google Research", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Palo Alto;Mountain View", "aff_country_unique_index": "0;0;1;0;1;0;1", "aff_country_unique": "South Korea;United States" }, { "id": "42Xu5gudPL", "title": "Impact of the Last Fully Connected Layer on Out-of-distribution Detection", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Out-of-distribution (OOD) detection, a task that aims to detect OOD data during deployment, has received lots of research attention recently, due to its importance for the safe deployment of deep models. In this task, a major problem is how to handle the overconfidence problem in OOD data. While this problem has been explored from several perspectives in previous works, such as the measure of OOD uncertainty and the activation function, the connection between the last fully connected (FC) layer and this overconfidence problem is still less explored. In this paper, we find that the weight of the last FC layer of the model trained on in-distribution (ID) data can be an important source of the overconfidence problem, and we propose a simple yet effective OOD detection method to assign the weight of the last FC layer with small values instead of using the original weight trained on ID data. We analyze in Sec.5 that our proposed method can make the OOD data and the ID data to be more separable, and thus alleviate the overconfidence problem. Moreover, our proposed method can be flexibly applied on various off-the-shelf OOD detection methods. We show the effectiveness of our proposed method through extensive experiments on the ImageNet dataset, the CIFAR-10 dataset, and the CIFAR-100 dataset.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Jinhong Lin;Haoxuan Qu;Jun Liu", "authorids": "~Jinhong_Lin1;~Haoxuan_Qu1;~Jun_Liu8", "gender": "Non-Binary;M;M", "homepage": "https://jonneslin.github.io/;;", "dblp": "309/6999;302/3883;95/3736-36", "google_scholar": "https://scholar.google.com/citations?hl=en-CN;https://scholar.google.com.sg/citations?user=fR83-ycAAAAJ;Q5Ild8UAAAAJ", "orcid": ";0000-0001-5054-3394;", "linkedin": ";;", "or_profile": "~Jinhong_Lin1;~Haoxuan_Qu1;~Jun_Liu8", "aff": "University of Wisconsin - Madison;Singapore University of Technology and Design;Singapore University of Technology and Design", "aff_domain": "wisc.edu;sutd.edu.sg;sutd.edu.sg", "position": "Undergrad student;PhD student;Assistant Professor", "bibtex": "@misc{\nlin2023impact,\ntitle={Impact of the Last Fully Connected Layer on Out-of-distribution Detection},\nauthor={Jinhong Lin and Haoxuan Qu and Jun Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=42Xu5gudPL}\n}", "github": "", "project": "", "reviewers": "AdpZ;AiKz;xCiv;WwP7", "site": "https://openreview.net/forum?id=42Xu5gudPL", "pdf_size": 1870611, "recommendation": "3;3;3;6", "confidence": "4;4;4;4", "correctness": "3;2;2;3", "technical_novelty": "3;1;2;3", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "93;97;29;77", "wc_strength_and_weaknesses": "325;198;95;126", "wc_clarity_quality_novelty_and_reproducibility": "117;160;327;85", "wc_summary_review": "17;58;39;42", "wc_review": "552;513;490;330", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 74.0, 27.03701166919155 ], "wc_strength_and_weaknesses_avg": [ 186.0, 88.52400804301622 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 172.25, 93.22385692514551 ], "wc_summary_review_avg": [ 39.0, 14.611639196202457 ], "wc_review_avg": [ 471.25, 84.50850548909264 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Fu-rRaZIIaoJ:scholar.google.com/&scioq=Impact+of+the+Last+Fully+Connected+Layer+on+Out-of-distribution+Detection&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Wisconsin-Madison;Singapore University of Technology and Design", "aff_unique_dep": ";", "aff_unique_url": "https://www.wisc.edu;https://www.sutd.edu.sg", "aff_unique_abbr": "UW-Madison;SUTD", "aff_campus_unique_index": "0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Singapore" }, { "title": "Offline Reinforcement Learning via High-Fidelity Generative Behavior Modeling", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11212", "id": "42zs3qa2kpy", "poster": "", "openreview": "https://openreview.net/forum?id=42zs3qa2kpy", "slides": "https://iclr.cc/virtual/2023/poster/11212", "video": "https://iclr.cc/virtual/2023/poster/11212", "author_site": "Huayu Chen, Cheng Lu, Chengyang Ying, Hang Su, Jun Zhu", "tldr": "", "abstract": "In offline reinforcement learning, weighted regression is a common method to ensure the learned policy stays close to the behavior policy and to prevent selecting out-of-sample actions. In this work, we show that due to the limited distributional expressivity of policy models, previous methods might still select unseen actions during training, which deviates from their initial motivation. To address this problem, we adopt a generative approach by decoupling the learned policy into two parts: an expressive generative behavior model and an action evaluation model. The key insight is that such decoupling avoids learning an explicitly parameterized policy model with a closed-form expression. Directly learning the behavior policy allows us to leverage existing advances in generative modeling, such as diffusion-based methods, to model diverse behaviors. As for action evaluation, we combine our method with an in-sample planning technique to further avoid selecting out-of-sample actions and increase computational efficiency. Experimental results on D4RL datasets show that our proposed method achieves competitive or superior performance compared with state-of-the-art offline RL methods, especially in complex tasks such as AntMaze. We also empirically demonstrate that our method can successfully learn from a heterogeneous dataset containing multiple distinctive but similarly successful strategies, whereas previous unimodal policies fail.", "keywords": "offline reinforcement learning;generative models;diffusion models;behavior modeling", "primary_area": "", "supplementary_material": "/attachment/ea32e0fee0b32bc3ab045eb3780b67439ab20526.zip", "author": "Huayu Chen;Cheng Lu;Chengyang Ying;Hang Su;Jun Zhu", "authorids": "~Huayu_Chen1;~Cheng_Lu5;~Chengyang_Ying1;~Hang_Su3;~Jun_Zhu2", "gender": "M;M;M;M;M", "homepage": "https://chendrag.github.io/;https://luchengthu.github.io/;https://yingchengyang.github.io/;http://ml.cs.tsinghua.edu.cn/~jun;", "dblp": "259/3113;91/1482-11;296/2065;50/2644-1;26/5371-6", "google_scholar": "0FBCHc4AAAAJ;vPE9VRoAAAAJ;vM6KE18AAAAJ;axsP38wAAAAJ;dxN1_X0AAAAJ", "orcid": ";;;;", "linkedin": ";;%E9%93%96%E9%98%B3-%E5%BA%94-9b682a203/;;", "or_profile": "~Huayu_Chen1;~Cheng_Lu5;~Chengyang_Ying1;~Jun_Zhu2;~Hang_Su2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;PhD student;Professor;Associate Professor", "bibtex": "@inproceedings{\nchen2023offline,\ntitle={Offline Reinforcement Learning via High-Fidelity Generative Behavior Modeling},\nauthor={Huayu Chen and Cheng Lu and Chengyang Ying and Hang Su and Jun Zhu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=42zs3qa2kpy}\n}", "github": "", "project": "", "reviewers": "dfM5;eCnE;3dPg", "pdf_size": 1187015, "recommendation": "6;6;6", "confidence": "4;4;4", "correctness": "3;3;4", "technical_novelty": "2;3;4", "empirical_novelty": "2;3;4", "wc_summary_paper": "36;58;66", "wc_strength_and_weaknesses": "156;113;188", "wc_clarity_quality_novelty_and_reproducibility": "43;29;149", "wc_summary_review": "26;262;59", "wc_review": "261;462;462", "wc_reply_reviewers": "14;32;6", "wc_reply_authors": "1882;2224;1171", "reply_reviewers": "1;1;1", "reply_authors": "6;6;5", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 53.333333333333336, 12.684198393626966 ], "wc_strength_and_weaknesses_avg": [ 152.33333333333334, 30.728199137310703 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.66666666666667, 53.57445494097184 ], "wc_summary_review_avg": [ 115.66666666666667, 104.34664451827007 ], "wc_review_avg": [ 395.0, 94.75230867899737 ], "wc_reply_reviewers_avg": [ 17.333333333333332, 10.873004286866728 ], "wc_reply_authors_avg": [ 1759.0, 438.59548561288227 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 5.666666666666667, 0.4714045207910317 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 107, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8334389364184456486&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=42zs3qa2kpy", "email": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "43nOUI4VHUw", "title": "Deep Watermarks for Attributing Generative Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Generative models have enabled the creation of contents that are indistinguishable from those taken from the Nature. Open-source development of such models raised concerns about the risks in their misuse for malicious purposes. One potential risk mitigation strategy is to attribute generative models via watermarking. Current watermarking methods exhibit significant tradeoff between robust attribution accuracy and generation quality, and also lack principles for designing watermarks to improve this tradeoff. This paper investigates the use of latent semantic dimensions as watermarks, from where we can analyze the effects of design variables, including the choice of watermarking dimensions, watermarking strength, and the capacity of watermarks, on the accuracy-quality tradeoff. Compared with previous SOTA, our method requires minimum computation and is more applicable to large-scale models. We use StyleGAN2 and the latent diffusion model to demonstrate the efficacy of our method.", "keywords": "Model Attribution;Watermarking;Generative Models", "primary_area": "", "supplementary_material": "/attachment/bfcec1c182d726d82c1b4af2c0d06e7d0b67217d.zip", "author": "Guangyu Nie;Changhoon Kim;Yezhou Yang;Yi Ren", "authorids": "~Guangyu_Nie1;~Changhoon_Kim1;~Yezhou_Yang1;~Yi_Ren3", "gender": "M;M;M;M", "homepage": ";https://www.changhoonkim.com/;https://yezhouyang.engineering.asu.edu;http://designinformaticslab.github.io/", "dblp": "192/2782;;78/7455;", "google_scholar": ";z_04VyYAAAAJ;k2suuZgAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": "jeremynie/;;;", "or_profile": "~Guangyu_Nie1;~Changhoon_Kim1;~Yezhou_Yang1;~Yi_Ren3", "aff": "Arizona State University;Arizona State University;Arizona State University;Arizona State University", "aff_domain": "asu.edu;asu.edu;asu.edu;asu.edu", "position": "PhD student;PhD student;Associate Professor;Associate Professor", "bibtex": "@misc{\nnie2023deep,\ntitle={Deep Watermarks for Attributing Generative Models},\nauthor={Guangyu Nie and Changhoon Kim and Yezhou Yang and Yi Ren},\nyear={2023},\nurl={https://openreview.net/forum?id=43nOUI4VHUw}\n}", "github": "", "project": "", "reviewers": "tdgU;h68J;X6TA;Dm6b", "site": "https://openreview.net/forum?id=43nOUI4VHUw", "pdf_size": 4293361, "recommendation": "3;5;6;6", "confidence": "3;3;3;3", "correctness": "3;3;4;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "48;53;72;111", "wc_strength_and_weaknesses": "323;33;152;315", "wc_clarity_quality_novelty_and_reproducibility": "30;10;24;229", "wc_summary_review": "37;161;28;80", "wc_review": "438;257;276;735", "wc_reply_reviewers": "0;0;0;31", "wc_reply_authors": "877;1055;572;588", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 71.0, 24.76893215300167 ], "wc_strength_and_weaknesses_avg": [ 205.75, 120.84571775615386 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.25, 90.21467452693048 ], "wc_summary_review_avg": [ 76.5, 52.59515186782903 ], "wc_review_avg": [ 426.5, 191.497389016143 ], "wc_reply_reviewers_avg": [ 7.75, 13.423393758658799 ], "wc_reply_authors_avg": [ 773.0, 203.08003348433837 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8164965809277259, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-mGKU6rSbhYJ:scholar.google.com/&scioq=Deep+Watermarks+for+Attributing+Generative+Models&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Arizona State University", "aff_unique_dep": "", "aff_unique_url": "https://www.asu.edu", "aff_unique_abbr": "ASU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "44DHnx0Ya_j", "title": "Coordinated Strategy Identification Multi-Agent Reinforcement Learning", "track": "main", "status": "Withdraw", "tldr": "We present a framework which expedites and stabilizes learning of a hierarchical multi-agent reinforcement learning via episodic memory, while achieving coordinated behaviors among agents with a novel theoretic regularization.", "abstract": "An agent's strategy can be considered as a subset of action spaces, specialized in certain goals. This paper introduces a coordinated Strategy Identification Multi-Agent reinforcement learning (MARL) with episodic memory, called SIMA. SIMA derives a new temporal difference (TD) target to increase the sample efficiency. The efficiency is achived by keeping the best returns and corresponding to the best joint strategies for given states. This TD target with an additive strategy mixer automatically switches between an episodic control and a conventional Q-learning according to the existence of similar memories. In addition, each agent needs to behave similarly according to its strategy trajectory for coordinated behaviors among agents and coherent evaluation of a group's joint strategies. To this end, SIMA introduces a theoretical regularization for action policies to maximize the mutual information between an agent\u2019s trajectory and its specified strategy. We demonstrate its significant performance improvement on the StarCraft Multi-Agent Challenge benchmark. ", "keywords": "Multi-Agent Reinforcement Learning;Coordinated Strategy;Hierarchical Multi-Agent learning;Episodic Memory", "primary_area": "", "supplementary_material": "", "author": "Hyungho Na;Il-chul Moon", "authorids": "~Hyungho_Na1;~Il-chul_Moon1", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": ";", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@misc{\nna2023coordinated,\ntitle={Coordinated Strategy Identification Multi-Agent Reinforcement Learning},\nauthor={Hyungho Na and Il-chul Moon},\nyear={2023},\nurl={https://openreview.net/forum?id=44DHnx0Ya_j}\n}", "github": "", "project": "", "reviewers": "6m7f;n26R;cypY", "site": "https://openreview.net/forum?id=44DHnx0Ya_j", "pdf_size": 1602786, "recommendation": "3;3;3", "confidence": "2;4;3", "correctness": "3;2;2", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;2", "wc_summary_paper": "111;70;114", "wc_strength_and_weaknesses": "222;153;384", "wc_clarity_quality_novelty_and_reproducibility": "146;30;75", "wc_summary_review": "54;61;62", "wc_review": "533;314;635", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 98.33333333333333, 20.07209228976613 ], "wc_strength_and_weaknesses_avg": [ 253.0, 96.81941953967706 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.66666666666667, 47.75167245471327 ], "wc_summary_review_avg": [ 59.0, 3.559026084010437 ], "wc_review_avg": [ 494.0, 133.91788528796295 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lm0uZTEI_e8J:scholar.google.com/&scioq=Coordinated+Strategy+Identification+Multi-Agent+Reinforcement+Learning&hl=en&as_sdt=0,44", "gs_version_total": 0 }, { "id": "44GCcwJ5X2", "title": "Representing Multi-view Time-series Graph Structures for Multivariate Long-term Time-series Forecasting", "track": "main", "status": "Reject", "tldr": "An efficient, highly accurate, lightweight model for multivariate long-term time series forecasting.", "abstract": "Multivariate long-term time-series forecasting task is a very challenging task in real-world application areas, such as electricity consumption and influenza-like illness forecasting. At present, researchers are focusing on designing robust and effective models, and have achieved good results. However, there are several issues with existing models that need to be overcome to ensure they provide optimal performance. First, the lack of a relationship structure between multivariate variables needs to be addressed. Second, most models only have a weak ability to capture local dynamic changes across the entire long-term time-series. And, third, the current models suffer from high computational complexity and unsatisfactory accuracy. To address these issues, we propose a novel method called Multi-view Time-series Graph Structure Representation (MTGSR) for multivariate long-term time-series forecasting tasks. MTGSR uses graph convolutional networks (GCNs) to construct topological relationships in the multivariate long-term time-series from three different perspectives: time, dimension, and crossing segments. Variation trends in the different dimensions of the multivariate long-term time-series are extracted through a difference operation so as to construct a topological map that reflects the correlations between the different dimensions. Then, to capture the dynamically changing characteristics of the fluctuation correlations between adjacent local sequences, MTGSR constructs a cross graph by calculating the correlation coefficients between adjacent local sequences. Extensive experiments on five different datasets show that MTGSR reduces errors by 20.41% over the state-of-the-art while maintaining linear complexity. Additionally, memory use is decreased by 66.52% and running time is reduced by 78.09%. ", "keywords": "time series forecasting;deep learning;representational learning", "primary_area": "", "supplementary_material": "", "author": "Wzh Rslh;Jin Fan;Huifeng Wu;Danfeng Sun", "authorids": "~Wzh_Rslh1;fanjin@hdu.edu.cn;whf@hdu.edu.cn;danfeng.sun@ifak.eu", "gender": "M;;;", "homepage": ";;;", "dblp": ";;;", "google_scholar": "LvP_YfYAAAAJ;;;", "orcid": "0000-0003-4267-5359;;;", "linkedin": ";;;", "or_profile": "~Wzh_Rslh1;fanjin@hdu.edu.cn;whf@hdu.edu.cn;danfeng.sun@ifak.eu", "aff": "Hangzhou Dianzi University;;;", "aff_domain": "hdu.edu.cn;;;", "position": "MS student;;;", "bibtex": "@misc{\nrslh2023representing,\ntitle={Representing Multi-view Time-series Graph Structures for Multivariate Long-term Time-series Forecasting},\nauthor={Wzh Rslh and Jin Fan and Huifeng Wu and Danfeng Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=44GCcwJ5X2}\n}", "github": "", "project": "", "reviewers": "9BLg;sneL;PLaJ;fjaW", "site": "https://openreview.net/forum?id=44GCcwJ5X2", "pdf_size": 2104173, "recommendation": "1;1;3;5", "confidence": "4;5;4;3", "correctness": "3;1;2;3", "technical_novelty": "2;1;2;2", "empirical_novelty": "0;1;2;2", "wc_summary_paper": "45;12;61;68", "wc_strength_and_weaknesses": "264;111;395;109", "wc_clarity_quality_novelty_and_reproducibility": "22;45;4;31", "wc_summary_review": "23;116;53;56", "wc_review": "354;284;513;264", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.5, 1.6583123951777 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 46.5, 21.592822881689184 ], "wc_strength_and_weaknesses_avg": [ 219.75, 119.1246720876914 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.5, 14.874474780643517 ], "wc_summary_review_avg": [ 62.0, 33.741665637605976 ], "wc_review_avg": [ 353.75, 97.82733513696466 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8528028654224417, "corr_recommendation_correctness": 0.4545454545454545, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11629653695503727053&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Hangzhou Dianzi University", "aff_unique_dep": "", "aff_unique_url": "http://www.hdu.edu.cn/", "aff_unique_abbr": "HGHDU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "45FFlw8N47", "title": "AUGMENTING ZERO-SHOT DENSE RETRIEVERS WITH PLUG-IN MIXTURE-OF-MEMORIES", "track": "main", "status": "Reject", "tldr": "We explore the potential of augmenting lanuguage models with mixture-of-memory and plugging in new corpus during inference, which leads to their enhanced generalization ability on the zero-shot dense retrieval task.", "abstract": "In this paper we improve the zero-shot generalization ability of language models via Mixture-Of-Memory Augmentation (MoMA), a mechanism that retrieves augmentation documents from multiple information corpora (\u201cexternal memories\u201d), with the option to \u201cplug in\u201d new memory at inference time. We develop a joint learning mechanism that trains the augmentation component with latent labels derived from the end retrieval task, paired with hard negatives from the memory mixture. We instantiate the model in a zero-shot dense retrieval setting by augmenting a strong T5-based retriever with MoMA. Our model, MoMA-DR, obtains strong zero-shot retrieval accuracy on the eighteen tasks included in the standard BEIR benchmark. It outperforms other dense retrieval models of similar scales and achieves comparable accuracy with systems that seek generalization from increased scales in encoder models or vector indices. Our analysis illustrates the necessity of augmenting with mixture-of-memory for robust generalization, the benefits of joint learning, and how MoMA-DR utilizes the plug-in memory at inference time without changing its parameters. We plan to open source our code.", "keywords": "Retrieval Augmented Language Model;Zero-shot Dense Retrieval;Mixture of Memory", "primary_area": "", "supplementary_material": "/attachment/556b0a7fd55ce0f2f701bbd5b6e59f5a86a13cc7.zip", "author": "Suyu Ge;Chenyan Xiong;Corby Louis Rosset;Arnold Overwijk;Jiawei Han;Paul N. Bennett", "authorids": "~Suyu_Ge1;~Chenyan_Xiong1;corbyrosset@microsoft.com;~Arnold_Overwijk1;~Jiawei_Han1;~Paul_N._Bennett1", "gender": ";M;;M;M;", "homepage": ";https://www.cs.cmu.edu/~cx/;;;http://hanj.cs.illinois.edu/;https://www.microsoft.com/en-us/research/people/pauben/publications/", "dblp": ";18/10886;;16/7404;h/JiaweiHan.html;33/6188", "google_scholar": ";E9BaEBYAAAAJ;;zKiMGDgAAAAJ;https://scholar.google.com.tw/citations?user=Kv9AbjMAAAAJ;AIncPrIAAAAJ", "orcid": ";;;;0000-0002-3629-2696;0009-0006-7852-9651", "linkedin": ";;;;;paulnbennett/", "or_profile": "~Suyu_Ge1;~Chenyan_Xiong1;corbyrosset@microsoft.com;~Arnold_Overwijk1;~Jiawei_Han1;~Paul_N._Bennett1", "aff": ";Microsoft Research;;Meta;University of Illinois at Urbana-Champaign (UIUC);Microsoft", "aff_domain": ";research.microsoft.com;;meta.com;illinois.edu;microsoft.com", "position": ";Principal Researcher;;Engineering Manager;Full Professor;Researcher", "bibtex": "@misc{\nge2023augmenting,\ntitle={{AUGMENTING} {ZERO}-{SHOT} {DENSE} {RETRIEVERS} {WITH} {PLUG}-{IN} {MIXTURE}-{OF}-{MEMORIES}},\nauthor={Suyu Ge and Chenyan Xiong and Corby Louis Rosset and Arnold Overwijk and Jiawei Han and Paul N. Bennett},\nyear={2023},\nurl={https://openreview.net/forum?id=45FFlw8N47}\n}", "github": "", "project": "", "reviewers": "VaeQ;DtfB;U4cd;KU9C", "site": "https://openreview.net/forum?id=45FFlw8N47", "pdf_size": 929977, "recommendation": "5;5;5;6", "confidence": "4;3;3;2", "correctness": "3;3;2;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;1;2;3", "wc_summary_paper": "98;67;21;76", "wc_strength_and_weaknesses": "205;254;121;9", "wc_clarity_quality_novelty_and_reproducibility": "134;61;13;50", "wc_summary_review": "60;32;19;145", "wc_review": "497;414;174;280", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1023;928;155;497", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 65.5, 28.0579756931964 ], "wc_strength_and_weaknesses_avg": [ 147.25, 92.914947667208 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.5, 43.88906469725688 ], "wc_summary_review_avg": [ 64.0, 49.05609034564414 ], "wc_review_avg": [ 341.25, 123.76868545799458 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 650.75, 348.1539709668698 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13247171523430239100&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Microsoft;Meta;University of Illinois Urbana-Champaign", "aff_unique_dep": "Microsoft Research;Meta Platforms, Inc.;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://meta.com;https://illinois.edu", "aff_unique_abbr": "MSR;Meta;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "45TeQUJw9tn", "title": "Exploring Chemical Space with Score-based Out-of-distribution Generation", "track": "main", "status": "Reject", "tldr": "We propose a score-based molecular generative framework that aims to generate out-of-distribution molecules beyond the known molecular space and find novel chemical optima of desired properties.", "abstract": "A well-known limitation of existing works on molecule generation is that the generated molecules highly resemble those in the training set. To generate truly novel molecules with completely different structures that may have even better properties than known molecules for de novo drug discovery, more powerful exploration in the chemical space is necessary. To this end, we propose Molecular Out-Of-distribution Diffusion (MOOD), a novel score-based diffusion scheme that incorporates out-of-distribution (OOD) control in the generative stochastic differential equation (SDE) with simple control of a hyperparameter, thus requires no additional computational costs unlike existing methods (e.g., RL-based methods). However, some novel molecules may be chemically implausible, or may not meet the basic requirements of real-world drugs. Thus, MOOD performs conditional generation by utilizing the gradients from a property prediction network that guides the reverse-time diffusion process to high-scoring regions according to multiple target properties such as protein-ligand interactions, drug-likeness, and synthesizability. This allows MOOD to search for novel and meaningful molecules rather than generating unseen yet trivial ones. We experimentally validate that MOOD is able to explore the chemical space beyond the training distribution, generating molecules that outscore ones found with existing methods, and even the top 0.01% of the original training pool.", "keywords": "molecule generation;score-based generative modeling", "primary_area": "", "supplementary_material": "/attachment/2951e3b55b600a33997be74b44554ab40e3266e0.zip", "author": "Seul Lee;Jaehyeong Jo;Sung Ju Hwang", "authorids": "~Seul_Lee1;~Jaehyeong_Jo1;~Sung_Ju_Hwang1", "gender": "Not Specified;M;", "homepage": "https://seullee05.github.io;https://github.com/harryjo97;", "dblp": "159/0357;296/2037;", "google_scholar": "Ek0N9YYAAAAJ;https://scholar.google.com/citations?hl=ko;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Seul_Lee1;~Jaehyeong_Jo1;~Sung_Ju_Hwang1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;", "aff_domain": "kaist.ac.kr;kaist.ac.kr;", "position": "PhD student;MS student;", "bibtex": "@misc{\nlee2023exploring,\ntitle={Exploring Chemical Space with Score-based Out-of-distribution Generation},\nauthor={Seul Lee and Jaehyeong Jo and Sung Ju Hwang},\nyear={2023},\nurl={https://openreview.net/forum?id=45TeQUJw9tn}\n}", "github": "", "project": "", "reviewers": "ZWEt;gMda;3YM5;xSWJ", "site": "https://openreview.net/forum?id=45TeQUJw9tn", "pdf_size": 3968149, "recommendation": "1;5;5;8", "confidence": "4;3;3;5", "correctness": "1;2;2;4", "technical_novelty": "3;2;4;3", "empirical_novelty": "2;2;4;4", "wc_summary_paper": "80;83;252;94", "wc_strength_and_weaknesses": "1049;213;148;596", "wc_clarity_quality_novelty_and_reproducibility": "224;56;63;68", "wc_summary_review": "110;44;140;178", "wc_review": "1463;396;603;936", "wc_reply_reviewers": "1574;185;175;160", "wc_reply_authors": "3275;1355;813;1279", "reply_reviewers": "4;1;1;1", "reply_authors": "7;3;2;2", "recommendation_avg": [ 4.75, 2.48746859276655 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.25, 1.0897247358851685 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 1.0 ], "wc_summary_paper_avg": [ 127.25, 72.21279318791096 ], "wc_strength_and_weaknesses_avg": [ 501.5, 359.47218251208255 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 102.75, 70.13335511723362 ], "wc_summary_review_avg": [ 118.0, 49.05099387372289 ], "wc_review_avg": [ 849.5, 403.20249255181943 ], "wc_reply_reviewers_avg": [ 523.5, 606.5717187604447 ], "wc_reply_authors_avg": [ 1680.5, 943.6814875793633 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 3.5, 2.0615528128088303 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.9453431006169687, "gs_citation": 76, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4770756405792375766&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "DCI-ES: An Extended Disentanglement Framework with Connections to Identifiability", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11402", "id": "462z-gLgSht", "poster": "/media/PosterPDFs/ICLR%202023/11402.png?t=1681738449.552845", "openreview": "https://openreview.net/forum?id=462z-gLgSht", "slides": "https://iclr.cc/virtual/2023/poster/11402", "video": "https://iclr.cc/virtual/2023/poster/11402", "author_site": "Cian Eastwood, Andrei L Nicolicioiu, Julius von K\u00fcgelgen, Armin Kekic, Frederik Tr\u00e4uble, Andrea Dittadi, Bernhard Schoelkopf", "tldr": "We extend the DCI framework for evaluating disentangled representations and connect it to identifiability.", "abstract": "In representation learning, a common approach is to seek representations which disentangle the underlying factors of variation. Eastwood & Williams (2018) proposed three metrics for quantifying the quality of such disentangled representations: disentanglement (D), completeness (C) and informativeness (I). In this work, we first connect this DCI framework to two common notions of linear and nonlinear identifiability, thereby establishing a formal link between disentanglement and the closely-related field of independent component analysis. We then propose an extended DCI-ES framework with two new measures of representation quality\u2014explicitness (E) and size (S)\u2014and point out how D and C can be computed for black-box predictors. Our main idea is that the functional capacity required to use a representation is an important but thus-far neglected aspect of representation quality, which we quantify using explicitness or ease-of-use (E). We illustrate the relevance of our extensions on the MPI3D and Cars3D datasets.", "keywords": "disentanglement;identifiability;representation learning", "primary_area": "", "supplementary_material": "", "author": "Cian Eastwood;Andrei Liviu Nicolicioiu;Julius Von K\u00fcgelgen;Armin Keki\u0107;Frederik Tr\u00e4uble;Andrea Dittadi;Bernhard Sch\u00f6lkopf", "authorids": "~Cian_Eastwood1;~Andrei_Liviu_Nicolicioiu1;~Julius_Von_K\u00fcgelgen1;~Armin_Keki\u01071;~Frederik_Tr\u00e4uble1;~Andrea_Dittadi1;~Bernhard_Sch\u00f6lkopf1", "gender": "M;;M;;M;M;", "homepage": "https://cianeastwood.github.io/;https://andreinicolicioiu.github.io/;https://sites.google.com/view/julius-von-kuegelgen/home;https://arminkekic.com/;https://ei.is.tuebingen.mpg.de/person/ftraeuble;https://addtt.github.io;", "dblp": "238/2792;;223/5666;330/4165;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;BVUKrDQAAAAJ;6EOl3hAAAAAJ;b7GNNQ8AAAAJ;https://scholar.google.de/citations?user=oc2OOyMAAAAJ;PrvuuaAAAAAJ;", "orcid": ";;0000-0001-6469-4118;0000-0002-1940-2523;;;", "linkedin": ";;julius-von-k%C3%BCgelgen/;arminkekic/;;;", "or_profile": "~Cian_Eastwood1;~Andrei_Liviu_Nicolicioiu1;~Julius_Von_K\u00fcgelgen1;~Armin_Keki\u01071;~Frederik_Tr\u00e4uble1;~Andrea_Dittadi1;~Bernhard_Sch\u00f6lkopf1", "aff": "University of Edinburgh;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;, Max Planck Institute for Intelligent Systems;Max Planck Institute for Intelligent Systems, Max-Planck Institute;Max Planck Institute for Intelligent Systems;KTH Royal Institute of Technology;", "aff_domain": "ed.ac.uk;mila.umontreal.ca;is.tuebingen.mpg.de;tue.mpg.de;is.tuebingen.mpg.de;kth.se;", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Postdoc;", "bibtex": "@inproceedings{\neastwood2023dcies,\ntitle={{DCI}-{ES}: An Extended Disentanglement Framework with Connections to Identifiability},\nauthor={Cian Eastwood and Andrei Liviu Nicolicioiu and Julius Von K{\\\"u}gelgen and Armin Keki{\\'c} and Frederik Tr{\\\"a}uble and Andrea Dittadi and Bernhard Sch{\\\"o}lkopf},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=462z-gLgSht}\n}", "github": "", "project": "", "reviewers": "xBj7;wQrr;WGrh;R81A", "pdf_size": 948727, "recommendation": "6;6;6;8", "confidence": "4;4;3;4", "correctness": "3;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "33;121;49;87", "wc_strength_and_weaknesses": "467;322;198;476", "wc_clarity_quality_novelty_and_reproducibility": "5;419;2;35", "wc_summary_review": "93;90;117;113", "wc_review": "598;952;366;711", "wc_reply_reviewers": "797;135;44;42", "wc_reply_authors": "2214;1368;1313;1087", "reply_reviewers": "1;1;1;1", "reply_authors": "4;3;4;4", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 72.5, 34.18698582794336 ], "wc_strength_and_weaknesses_avg": [ 365.75, 114.52155910569851 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 115.25, 175.8442137233978 ], "wc_summary_review_avg": [ 103.25, 11.882234638316145 ], "wc_review_avg": [ 656.75, 211.01110752754226 ], "wc_reply_reviewers_avg": [ 254.5, 315.4572078745388 ], "wc_reply_authors_avg": [ 1495.5, 427.982768344708 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.75, 0.4330127018922193 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16242067451985264742&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=462z-gLgSht", "email": "ed.ac.uk;mila.umontreal.ca;is.tuebingen.mpg.de;tue.mpg.de;is.tuebingen.mpg.de;kth.se;", "author_num": 7, "aff_unique_index": "0;1;2;2;2;3", "aff_unique_norm": "University of Edinburgh;University of Montreal;Max Planck Institute for Intelligent Systems;KTH Royal Institute of Technology", "aff_unique_dep": ";Montreal Institute for Learning Algorithms;;", "aff_unique_url": "https://www.ed.ac.uk;https://www.mila.quebec;https://www.mpi-is.mpg.de;https://www.kth.se", "aff_unique_abbr": "Edinburgh;MILA;MPI-IS;KTH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;1;2;2;2;3", "aff_country_unique": "United Kingdom;Canada;Germany;Sweden" }, { "id": "470wZ5Qk4ur", "title": "Results for Perfect Classification for Graph Attention on the Contextual Stochastic Block Model", "track": "main", "status": "Desk Reject", "tldr": "", "abstract": "We study the ability of one layer Graph Attention Networks (GAT) to achieve perfect node classification for a simple synthetic data model called the contextual stochastic block model (CSBM). We determine a \\textit{positive} CSBM parameter regime such that GAT achieves perfect classification and a \\textit{negative} CSBM parameter regime such that GAT fails to achieve perfect classification. For the positive result we use a generalized attention mechanism of the original~\\citep{Velickovic2018GraphAN}. For the negative result we consider a fixed attention mechanism which is determined using the labels of the nodes. We pose two questions. \\textit{Is the condition of GAT for achieving perfect classification better than that of a simple community detection method, i.e., thresholding the second principal eigenvector of the adjacency matrix~\\citep{Abbe2018}?} The answer to this question is negative, and it depends on the parameter regime of the CSBM distribution. This happens because the graph information is coupled with the feature information using the operation of matrix multiplication. However, such matrix multiplication operation can be detrimental for perfect node classification. The second question is, \\textit{is the condition of GAT for achieving perfect classification better than that of simple graph convolution (GCN)~\\citep{kipf:gcn}?} We show that GAT is better than GCN if the attention mechanism of GAT is a Lipschitz function, while it is not better if it is not a Lipschitz function.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Kimon Fountoulakis;Amit Levi", "authorids": "~Kimon_Fountoulakis1;~Amit_Levi1", "gender": "M;M", "homepage": "https://opallab.ca;https://sites.google.com/view/amit-levi/home", "dblp": "149/5799;161/4014.html", "google_scholar": "https://scholar.google.ca/citations?user=K-SafJUAAAAJ;https://scholar.google.ca/citations?user=kb4ubhcAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Kimon_Fountoulakis1;~Amit_Levi1", "aff": "University of Waterloo;Huawei Noah\u2019s Ark Lab", "aff_domain": "uwaterloo.ca;huawei.com", "position": "Assistant Professor;Researcher", "bibtex": "@misc{\nfountoulakis2023results,\ntitle={Results for Perfect Classification for Graph Attention on the Contextual Stochastic Block Model},\nauthor={Kimon Fountoulakis and Amit Levi},\nyear={2023},\nurl={https://openreview.net/forum?id=470wZ5Qk4ur}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=470wZ5Qk4ur", "pdf_size": 303943, "recommendation": "", "confidence": "", "correctness": "", "technical_novelty": "", "empirical_novelty": "", "wc_summary_paper": "", "wc_strength_and_weaknesses": "", "wc_clarity_quality_novelty_and_reproducibility": "", "wc_summary_review": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "recommendation_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "correctness_avg": [ 0, 0 ], "technical_novelty_avg": [ 0, 0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 0, 0 ], "wc_strength_and_weaknesses_avg": [ 0, 0 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 0, 0 ], "wc_summary_review_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 1, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0, "corr_recommendation_correctness": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:v3pus33h6fkJ:scholar.google.com/&scioq=Results+for+Perfect+Classification+for+Graph+Attention+on+the+Contextual+Stochastic+Block+Model&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of Waterloo;Huawei", "aff_unique_dep": ";Noah\u2019s Ark Lab", "aff_unique_url": "https://uwaterloo.ca;https://www.huawei.com", "aff_unique_abbr": "UW;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;China" }, { "title": "Learning Input-agnostic Manipulation Directions in StyleGAN with Text Guidance", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11219", "id": "47B_ctC4pJ", "poster": "", "openreview": "https://openreview.net/forum?id=47B_ctC4pJ", "slides": "https://iclr.cc/virtual/2023/poster/11219", "video": "https://iclr.cc/virtual/2023/poster/11219", "author_site": "Yoonjeon Kim, Hyunsu Kim, Junho Kim, Yunjey Choi, Eunho Yang", "tldr": "", "abstract": "With the advantages of fast inference and human-friendly flexible manipulation, image-agnostic style manipulation via text guidance enables new applications that were not previously available. The state-of-the-art text-guided image-agnostic manipulation method embeds the representation of each channel of StyleGAN independently in the Contrastive Language-Image Pre-training (CLIP) space, and provides it in the form of a Dictionary to quickly find out the channel-wise manipulation direction during inference time. However, in this paper we argue that this dictionary which is constructed by controlling single channel individually is limited to accommodate the versatility of text guidance since the collective and interactive relation among multiple channels are not considered. Indeed, we show that it fails to discover a large portion of manipulation directions that can be found by existing methods, which manually manipulates latent space without texts. To alleviate this issue, we propose a novel method that learns a Dictionary, whose entry corresponds to the representation of a single channel, by taking into account the manipulation effect coming from the interaction with multiple other channels. We demonstrate that our strategy resolves the inability of previous methods in finding diverse known directions from unsupervised methods and unknown directions from random text while maintaining the real-time inference speed and disentanglement ability.", "keywords": "image manipulation;deep learning;generative adversarial network", "primary_area": "", "supplementary_material": "", "author": "Yoonjeon Kim;Hyunsu Kim;Junho Kim;Yunjey Choi;Eunho Yang", "authorids": "~Yoonjeon_Kim1;~Hyunsu_Kim1;~Junho_Kim3;~Yunjey_Choi3;~Eunho_Yang1", "gender": ";M;M;M;M", "homepage": ";https://github.com/blandocs;http://bit.ly/jhkim_resume;https://sites.google.com/site/hleehome2/;https://yunjey.github.io/", "dblp": "279/2921;239/8447;;96/2621;210/0980", "google_scholar": ";VY5PodkAAAAJ;WtjDugkAAAAJ;;v_4lOaAAAAAJ", "orcid": ";;0000-0003-3712-8510;;", "linkedin": "yoonjeon-kim-9898061b0/;blandocs/;taki0112/;;", "or_profile": "~Yoonjeon_Kim1;~Hyunsu_Kim1;~Junho_Kim3;~Eunho_Yang1;~yunjey_choi1", "aff": "Korea Advanced Institute of Science & Technology;NAVER;NAVER;Korea Advanced Institute of Science & Technology;NAVER", "aff_domain": "kaist.ac.kr;navercorp.com;navercorp.com;kaist.ac.kr;navercorp.com", "position": "MS student;Researcher;Research Scientist;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\nkim2023learning,\ntitle={Learning Input-agnostic Manipulation Directions in Style{GAN} with Text Guidance},\nauthor={Yoonjeon Kim and Hyunsu Kim and Junho Kim and Yunjey Choi and Eunho Yang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=47B_ctC4pJ}\n}", "github": "", "project": "", "reviewers": "jy1p;bYKo;RdgP;v5mm", "pdf_size": 43332084, "recommendation": "6;6;6;6", "confidence": "2;4;4;3", "correctness": "3;2;3;2", "technical_novelty": "3;2;3;2", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "46;56;77;55", "wc_strength_and_weaknesses": "176;335;238;217", "wc_clarity_quality_novelty_and_reproducibility": "29;49;41;60", "wc_summary_review": "63;62;13;61", "wc_review": "314;502;369;393", "wc_reply_reviewers": "44;73;0;19", "wc_reply_authors": "710;1451;422;719", "reply_reviewers": "1;1;0;1", "reply_authors": "1;4;1;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 58.5, 11.368817000902073 ], "wc_strength_and_weaknesses_avg": [ 241.5, 58.40590723548432 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.75, 11.321991874224253 ], "wc_summary_review_avg": [ 49.75, 21.22940178149163 ], "wc_review_avg": [ 394.5, 68.35385870600138 ], "wc_reply_reviewers_avg": [ 34.0, 27.39525506360545 ], "wc_reply_authors_avg": [ 825.5, 380.3764582620749 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10144662880557780576&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=47B_ctC4pJ", "email": "kaist.ac.kr;navercorp.com;navercorp.com;kaist.ac.kr;navercorp.com", "author_num": 5, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;NAVER Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.naver.com", "aff_unique_abbr": "KAIST;NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "47C06k5D2cn", "title": "Blessing from Experts: Super Reinforcement Learning in Confounded Environments", "track": "main", "status": "Reject", "tldr": "", "abstract": "We introduce super reinforcement learning in the batch setting, which takes the observed action as input for enhanced policy learning. In the presence of unmeasured confounders, the recommendations from human experts recorded in the observed data allow us to recover certain unobserved information. Including this information in the policy search, the proposed super reinforcement learning will yield a super policy that is guaranteed to outperform both the standard optimal policy and the behavior one (e.g., the expert\u2019s recommendation). Furthermore, to address the issue of unmeasured confounding in finding super-policies, a number of non-parametric identification results are established. Finally, we develop two super-policy learning algorithms and derive their corresponding finite-sample regret guarantees.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Jiayi Wang;Zhengling Qi;Chengchun Shi", "authorids": "~Jiayi_Wang7;~Zhengling_Qi1;~Chengchun_Shi1", "gender": "F;;M", "homepage": "https://jiayiwang1017.github.io/;https://sites.google.com/view/statsqizl/home?authuser=0;https://callmespring.github.io/", "dblp": ";173/0201;", "google_scholar": ";;dDGy3N0AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jiayi_Wang7;~Zhengling_Qi1;~Chengchun_Shi1", "aff": "University of Texas at Dallas;George Washington University;London School of Economics and Political Science, University of London", "aff_domain": "utdallas.edu;gwu.edu;lse.ac.uk", "position": "Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nwang2023blessing,\ntitle={Blessing from Experts: Super Reinforcement Learning in Confounded Environments},\nauthor={Jiayi Wang and Zhengling Qi and Chengchun Shi},\nyear={2023},\nurl={https://openreview.net/forum?id=47C06k5D2cn}\n}", "github": "", "project": "", "reviewers": "ye4h;FcTZ;SFSz", "site": "https://openreview.net/forum?id=47C06k5D2cn", "pdf_size": 789960, "recommendation": "3;5;6", "confidence": "4;3;2", "correctness": "3;4;3", "technical_novelty": "3;3;3", "empirical_novelty": "0;0;2", "wc_summary_paper": "200;473;65", "wc_strength_and_weaknesses": "451;93;70", "wc_clarity_quality_novelty_and_reproducibility": "90;2;333", "wc_summary_review": "46;37;54", "wc_review": "787;605;522", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "2032;1119;1169", "reply_reviewers": "0;0;0", "reply_authors": "5;3;3", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 0.6666666666666666, 0.9428090415820634 ], "wc_summary_paper_avg": [ 246.0, 169.7115199389835 ], "wc_strength_and_weaknesses_avg": [ 204.66666666666666, 174.43687173938378 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 141.66666666666666, 139.98174484156456 ], "wc_summary_review_avg": [ 45.666666666666664, 6.944222218666553 ], "wc_review_avg": [ 638.0, 110.67369455596332 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1440.0, 419.10460110414755 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9819805060619659, "corr_recommendation_correctness": 0.18898223650461363, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17229274720950509273&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Texas at Dallas;George Washington University;London School of Economics and Political Science", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utdallas.edu;https://www.gwu.edu;https://www.lse.ac.uk", "aff_unique_abbr": "UT Dallas;GWU;LSE", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Dallas;;London", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "47DzlkyH3dM", "title": "Variational Learning ISTA", "track": "main", "status": "Reject", "tldr": "", "abstract": "Compressed sensing combines the power of convex optimization techniques with a sparsity inducing prior on the signal space to solve an underdetermined system of equations. For many problems, the sparsifying dictionary is not directly given, nor its existence can be assumed. Besides, the sensing matrix can change across different scenarios. Addressing these issues requires solving a sparse representation learning problem, namely dictionary learning, taking into account the epistemic uncertainty on the learned dictionaries and, finally, jointly learning sparse representations and reconstructions under varying sensing matrix conditions.\nWe propose a variant of the LISTA architecture that incorporates the sensing matrix into the architecture. In particular, we propose to learn a distribution over dictionaries via a variational approach, dubbed \\ac{VLISTA}, which approximates a posterior distribution over the dictionaries as part of an unfolded LISTA-based recovery network. Such a variational posterior distribution is updated after each iteration, and thereby adapts the dictionary according to the optimization dynamics. As a result, \\ac{VLISTA} provides a probabilistic way to jointly learn the dictionary distribution and the reconstruction algorithm with varying sensing matrices. We provide theoretical and experimental support for our architecture and show that it learns calibrated uncertainties.", "keywords": "compressed sensing;LISTA;variational models;inverse problems", "primary_area": "", "supplementary_material": "", "author": "Fabio Valerio Massoli;Christos Louizos;Arash Behboodi", "authorids": "~Fabio_Valerio_Massoli1;~Christos_Louizos1;~Arash_Behboodi1", "gender": "M;;M", "homepage": ";;https://arashbehboodi.github.io/", "dblp": "244/4976;;97/7718", "google_scholar": "https://scholar.google.it/citations?user=b5kxczMAAAAJ;;", "orcid": ";;", "linkedin": "fvmassoli17/;;", "or_profile": "~Fabio_Valerio_Massoli1;~Christos_Louizos1;~Arash_Behboodi1", "aff": "Qualcomm Inc, QualComm;;QualComm", "aff_domain": "qti.qualcomm.com;;qualcomm.com", "position": "Researcher;;Machine Learning Researcher", "bibtex": "@misc{\nmassoli2023variational,\ntitle={Variational Learning {ISTA}},\nauthor={Fabio Valerio Massoli and Christos Louizos and Arash Behboodi},\nyear={2023},\nurl={https://openreview.net/forum?id=47DzlkyH3dM}\n}", "github": "", "project": "", "reviewers": "uswp;2dyB;3LdD", "site": "https://openreview.net/forum?id=47DzlkyH3dM", "pdf_size": 733000, "recommendation": "6;6;6", "confidence": "3;4;4", "correctness": "4;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "2;2;2", "wc_summary_paper": "48;48;57", "wc_strength_and_weaknesses": "285;72;288", "wc_clarity_quality_novelty_and_reproducibility": "316;21;34", "wc_summary_review": "56;30;163", "wc_review": "705;171;542", "wc_reply_reviewers": "87;0;0", "wc_reply_authors": "1000;528;919", "reply_reviewers": "1;0;0", "reply_authors": "2;2;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 51.0, 4.242640687119285 ], "wc_strength_and_weaknesses_avg": [ 215.0, 101.1236866416568 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 123.66666666666667, 136.10371862011053 ], "wc_summary_review_avg": [ 83.0, 57.55577005537035 ], "wc_review_avg": [ 472.6666666666667, 223.44922366290638 ], "wc_reply_reviewers_avg": [ 29.0, 41.012193308819754 ], "wc_reply_authors_avg": [ 815.6666666666666, 206.081429429135 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FNoW9EWdGnwJ:scholar.google.com/&scioq=Variational+Learning+ISTA&hl=en&as_sdt=0,47", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Qualcomm Incorporated", "aff_unique_dep": "", "aff_unique_url": "https://www.qualcomm.com", "aff_unique_abbr": "Qualcomm", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Online Low Rank Matrix Completion", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11229", "id": "47KG_AvNqeZ", "poster": "/media/PosterPDFs/ICLR%202023/11229.png?t=1680946070.4581635", "openreview": "https://openreview.net/forum?id=47KG_AvNqeZ", "slides": "https://iclr.cc/virtual/2023/poster/11229", "video": "https://iclr.cc/virtual/2023/poster/11229", "author_site": "Soumyabrata Pal, Prateek Jain", "tldr": "A novel algorithm for solving online low-rank matrix completion problem with optimal regret for rank-one case.", "abstract": " We study the problem of online low-rank matrix completion with $\\mathsf{M}$ users, $\\mathsf{N}$ items and $\\mathsf{T}$ rounds. In each round, the algorithm recommends one item per user, for which it gets a (noisy) reward sampled from a low-rank user-item preference matrix. The goal is to design a method with sub-linear regret (in $\\mathsf{T}$) and nearly optimal dependence on $\\mathsf{M}$ and $\\mathsf{N}$. The problem can be easily mapped to the standard multi-armed bandit problem where each item is an independent arm, but that leads to poor regret as the correlation between arms and users is not exploited. On the other hand, exploiting the low-rank structure of reward matrix is challenging due to non-convexity of the low-rank manifold. We first demonstrate that the low-rank structure can be exploited using a simple explore-then-commit (ETC) approach that ensures a regret of $O(\\mathsf{polylog} (\\mathsf{M}+\\mathsf{N}) \\mathsf{T}^{2/3})$. That is, roughly only $\\mathsf{polylog} (\\mathsf{M}+\\mathsf{N})$ item recommendations are required per user to get a non-trivial solution. We then improve our result for the rank-$1$ setting which in itself is quite challenging and encapsulates some of the key issues. Here, we propose OCTAL (Online Collaborative filTering using iterAtive user cLustering) that guarantees nearly optimal regret of $O(\\mathsf{polylog} (\\mathsf{M}+\\mathsf{N}) \\mathsf{T}^{1/2})$. OCTAL is based on a novel technique of clustering users that allows iterative elimination of items and leads to a nearly optimal minimax rate. ", "keywords": "Matrix Completion;Online Learning;Recommendation System", "primary_area": "", "supplementary_material": "/attachment/37757cbaa4eba54c33e364c2198851de563f9c4f.zip", "author": "Soumyabrata Pal;Prateek Jain", "authorids": "~Soumyabrata_Pal1;~Prateek_Jain1", "gender": "M;M", "homepage": "https://soumyabratap.github.io/;http://prateekjain.org", "dblp": "206/6371;https://dblp.uni-trier.de/pers/j/Jain_0002:Prateek.html", "google_scholar": "J4UxoTEAAAAJ;qYhRbJoAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Soumyabrata_Pal1;~Prateek_Jain1", "aff": "Google;Google", "aff_domain": "google.com;google.com", "position": "Postdoc;Researcher", "bibtex": "@inproceedings{\npal2023online,\ntitle={Online Low Rank Matrix Completion},\nauthor={Soumyabrata Pal and Prateek Jain},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=47KG_AvNqeZ}\n}", "github": "", "project": "", "reviewers": "qsQL;qZSG;cVaQ", "pdf_size": 509695, "recommendation": "6;8;8", "confidence": "4;4;4", "correctness": "3;1;4", "technical_novelty": "2;4;4", "empirical_novelty": "0;3;4", "wc_summary_paper": "55;68;181", "wc_strength_and_weaknesses": "615;74;276", "wc_clarity_quality_novelty_and_reproducibility": "31;8;964", "wc_summary_review": "47;38;631", "wc_review": "748;188;2052", "wc_reply_reviewers": "335;0;1341", "wc_reply_authors": "1602;74;1768", "reply_reviewers": "2;0;7", "reply_authors": "4;1;6", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 1.247219128924647 ], "technical_novelty_avg": [ 3.3333333333333335, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.3333333333333335, 1.699673171197595 ], "wc_summary_paper_avg": [ 101.33333333333333, 56.58229012293118 ], "wc_strength_and_weaknesses_avg": [ 321.6666666666667, 223.21041393467485 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 334.3333333333333, 445.340568803497 ], "wc_summary_review_avg": [ 238.66666666666666, 277.44589062053564 ], "wc_review_avg": [ 996.0, 780.9191165970178 ], "wc_reply_reviewers_avg": [ 558.6666666666666, 569.848127915578 ], "wc_reply_authors_avg": [ 1148.0, 762.4504355475617 ], "reply_reviewers_avg": [ 3.0, 2.943920288775949 ], "reply_authors_avg": [ 3.6666666666666665, 2.0548046676563256 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.18898223650461365, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9270810389960565051&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=47KG_AvNqeZ", "email": "google.com;google.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "48EwqCCosOO", "title": "Grafting Vision Transformers", "track": "main", "status": "Reject", "tldr": "We present a simple and efficient add-on component (termed GrafT) that considers global dependencies and multi-scale information. GrafT can be easily adopted in both homogeneous (ViT) and pyramid (Swin) Transformers.", "abstract": "Vision Transformers (ViTs) have recently become the state-of-the-art across many computer vision tasks. In contrast to convolutional networks (CNNs), ViTs enable global information sharing even within shallow layers of a network, i.e., among high-resolution features. However, this perk was later overlooked with the success of pyramid architectures such as Swin Transformer, which show better performance-complexity trade-offs. In this paper, we present a simple and efficient add-on component (termed GrafT) that considers global dependencies and multi-scale information throughout the network, in both high- and low-resolution features alike. GrafT can be easily adopted in both homogeneous and pyramid Transformers while showing consistent gains. It has the flexibility of branching- out at arbitrary depths, widening a network with multiple scales. This grafting operation enables us to share most of the parameters and computations of the backbone, adding only minimal complexity, but with a higher yield. In fact, the process of progressively compounding multi-scale receptive fields in GrafT enables communications between local regions. We show the benefits of the proposed method on multiple benchmarks, including image classification (ImageNet-1K), semantic segmentation (ADE20K), object detection and instance segmentation (COCO2017). Our code and models will be made available.", "keywords": "Vision Transformers;Multi-scale;Multi-branch;Grafting;Classification;Semantic segmentation;Object detection", "primary_area": "", "supplementary_material": "", "author": "Jongwoo Park;Kumara Kahatapitiya;Donghyun Kim;Shivchander Sudalairaj;Quanfu Fan;Michael S Ryoo", "authorids": "~Jongwoo_Park1;~Kumara_Kahatapitiya1;~Donghyun_Kim2;~Shivchander_Sudalairaj1;~Quanfu_Fan1;~Michael_S_Ryoo1", "gender": "M;M;M;M;M;M", "homepage": "https://github.com/jongwoopark7978;https://www3.cs.stonybrook.edu/~kkahatapitiy/;https://cs-people.bu.edu/donhk;;;http://michaelryoo.com/", "dblp": "43/3065-3;227/5409;;314/2623;66/3950;r/MichaelSRyoo", "google_scholar": "qIn5k_sAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.kr/citations?user=UsqNPH4AAAAJ;O71amfMAAAAJ;kCxHiwUAAAAJ;vcw0TJIAAAAJ", "orcid": ";;;;;", "linkedin": "jongwpark/;;;shivchanders/;;", "or_profile": "~Jongwoo_Park1;~Kumara_Kahatapitiya1;~Donghyun_Kim2;~Shivchander_Sudalairaj1;~Quanfu_Fan1;~Michael_S_Ryoo1", "aff": "Samsung;Google DeepMind;MIT-IBM Watson AI Lab;MIT-IBM Watson AI Lab;MIT-IBM Watson AI Lab;Google DeepMind", "aff_domain": "samsung.com;google.com;ibm.com;ibm.com;us.ibm.com;google.com", "position": "Intern;Student Researcher;Researcher;Researcher;Researcher;Research Scientist", "bibtex": "@misc{\npark2023grafting,\ntitle={Grafting Vision Transformers},\nauthor={Jongwoo Park and Kumara Kahatapitiya and Donghyun Kim and Shivchander Sudalairaj and Quanfu Fan and Michael S Ryoo},\nyear={2023},\nurl={https://openreview.net/forum?id=48EwqCCosOO}\n}", "github": "", "project": "", "reviewers": "EcB8;wB1L;br4Z", "site": "https://openreview.net/forum?id=48EwqCCosOO", "pdf_size": 519671, "recommendation": "3;6;6", "confidence": "5;5;3", "correctness": "3;3;4", "technical_novelty": "3;3;2", "empirical_novelty": "0;0;2", "wc_summary_paper": "30;42;40", "wc_strength_and_weaknesses": "113;143;152", "wc_clarity_quality_novelty_and_reproducibility": "12;27;15", "wc_summary_review": "15;48;26", "wc_review": "170;260;233", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "965;866;837", "reply_reviewers": "0;0;0", "reply_authors": "2;2;2", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 0.6666666666666666, 0.9428090415820634 ], "wc_summary_paper_avg": [ 37.333333333333336, 5.2493385826745405 ], "wc_strength_and_weaknesses_avg": [ 136.0, 16.673332000533065 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 18.0, 6.48074069840786 ], "wc_summary_review_avg": [ 29.666666666666668, 13.719410418171117 ], "wc_review_avg": [ 221.0, 37.70941526992961 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 889.3333333333334, 54.798621231641306 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.5, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2274134687039106504&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff_unique_index": "0;1;2;2;2;1", "aff_unique_norm": "Samsung;Google;Massachusetts Institute of Technology", "aff_unique_dep": "Samsung;Google DeepMind;IBM Watson AI Lab", "aff_unique_url": "https://www.samsung.com;https://deepmind.com;https://www.mitibmwatsonailab.org", "aff_unique_abbr": "Samsung;DeepMind;MIT-IBM AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2;2;1", "aff_country_unique": "South Korea;United Kingdom;United States" }, { "id": "49N06mWPFUm", "title": "Provably Efficient Reinforcement Learning for Online Adaptive Influence Maximization", "track": "main", "status": "Reject", "tldr": "We propose a model-based optimistic RL approach to solve the content-dependent online adaptive influence maximization problem.", "abstract": "Online influence maximization aims to maximize the influence spread of a content in a social network with an unknown network model by selecting a few seed nodes. Recent studies followed a non-adaptive setting, where the seed nodes are selected before the start of the diffusion process and network parameters are updated when the diffusion stops. We consider an adaptive version of content-dependent online influence maximization problem where the seed nodes are sequentially activated based on real-time feedback. In this paper, we formulate the problem as an infinite-horizon discounted MDP under a linear diffusion process and present a model-based reinforcement learning solution. Our algorithm maintains a network model estimate and selects seed users adaptively, exploring the social network while improving the optimal policy optimistically. We establish $\\widetilde O(\\sqrt{T})$ regret bound for our algorithm. Empirical evaluations on synthetic and real-world networks demonstrate the efficiency of our algorithm. ", "keywords": "influence maximization;reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Kaixuan Huang;Yu Wu;Xuezhou Zhang;Shenyinying Tu;Qingyun Wu;Mengdi Wang;Huazheng Wang", "authorids": "~Kaixuan_Huang1;~Yu_Wu6;~Xuezhou_Zhang2;~Shenyinying_Tu1;~Qingyun_Wu2;~Mengdi_Wang1;~Huazheng_Wang1", "gender": "M;F;F;F;;F;M", "homepage": "https://hackyhuang.github.io/;https://ece.princeton.edu/people/yu-wu;;http://mwang.princeton.edu;https://huazhengwang.github.io/;https://qingyun-wu.github.io/;https://zhangxz1123.github.io/", "dblp": ";;;;163/2233;183/0579;213/7993", "google_scholar": "EfxwV6oAAAAJ;;6CGPiYYAAAAJ;;w3PrbKwAAAAJ;Y54J21sAAAAJ;tR-p-r8AAAAJ", "orcid": ";;;;;;", "linkedin": ";;shenyinying-ruby-tu-02b79042/;;;;", "or_profile": "~Kaixuan_Huang1;~Yu_Wu6;~Shenyinying_Tu1;~Mengdi_Wang1;~Huazheng_Wang1;~Qingyun_Wu1;~Xuezhou_Zhang1", "aff": "Princeton University;Princeton University;LinkedIn;Princeton University;Oregon State University;Pennsylvania State University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;linkedin.com;princeton.edu;oregonstate.edu;psu.edu;princeton.edu", "position": "PhD student;PhD student;Researcher;Full Professor;Assistant Professor;Assistant Professor;Postdoc", "bibtex": "@misc{\nhuang2023provably,\ntitle={Provably Efficient Reinforcement Learning for Online Adaptive Influence Maximization},\nauthor={Kaixuan Huang and Yu Wu and Xuezhou Zhang and Shenyinying Tu and Qingyun Wu and Mengdi Wang and Huazheng Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=49N06mWPFUm}\n}", "github": "", "project": "", "reviewers": "vMK7;Asps;o5dv", "site": "https://openreview.net/forum?id=49N06mWPFUm", "pdf_size": 832162, "recommendation": "3;5;6", "confidence": "4;3;4", "correctness": "3;3;3", "technical_novelty": "3;2;4", "empirical_novelty": "2;2;0", "wc_summary_paper": "85;62;68", "wc_strength_and_weaknesses": "112;83;233", "wc_clarity_quality_novelty_and_reproducibility": "182;82;24", "wc_summary_review": "63;180;55", "wc_review": "442;407;380", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "978;451;618", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 71.66666666666667, 9.741092797468305 ], "wc_strength_and_weaknesses_avg": [ 142.66666666666666, 64.9632374672185 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 96.0, 65.25846049874811 ], "wc_summary_review_avg": [ 99.33333333333333, 57.13337222869154 ], "wc_review_avg": [ 409.6666666666667, 25.381533094401966 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 682.3333333333334, 219.90351419555702 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.18898223650461363, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7009934944586901246&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;0;2;3;0", "aff_unique_norm": "Princeton University;LinkedIn Corporation;Oregon State University;Pennsylvania State University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.princeton.edu;https://www.linkedin.com;https://oregonstate.edu;https://www.psu.edu", "aff_unique_abbr": "Princeton;LinkedIn;OSU;PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Towards convergence to Nash equilibria in two-team zero-sum games", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11004", "id": "4BPFwvKOvo5", "poster": "", "openreview": "https://openreview.net/forum?id=4BPFwvKOvo5", "slides": "https://iclr.cc/virtual/2023/poster/11004", "video": "https://iclr.cc/virtual/2023/poster/11004", "author_site": "Foivos Kalogiannis, Ioannis Panageas, Emmanouil-Vasileios Vlatakis-Gkaragkounis", "tldr": "Common no-regret algorithms fail to converge to a Nash equilibrium in two-team zero-sum games but a novel approach does converge locally.", "abstract": "Contemporary applications of machine learning raise important and overlooked theoretical questions regarding optimization in two-team games. Formally, two-team zero-sum games are defined as multi-player games where players are split into two competing sets of agents, each experiencing a utility identical to that of their teammates and opposite to that of the opposing team. We focus on the solution concept of Nash equilibria and prove $\\textrm{CLS}$-hardness of computing them in this class of games. To further examine the capabilities of online learning algorithms in games with full-information feedback, we propose a benchmark of a simple ---yet nontrivial--- family of such games. These games do not enjoy the properties used to prove convergence for relevant algorithms. In particular, we use a dynamical systems perspective to demonstrate that gradient descent-ascent, its optimistic variant, optimistic multiplicative weights update, and extra gradient fail to converge (even locally) to a Nash equilibrium. On a brighter note, we propose a first-order method that leverages control theory techniques and under some conditions enjoys last-iterate local convergence to a Nash equilibrium. We also believe our proposed method is of independent interest for general min-max optimization.", "keywords": "no-regret-learning;no-regret;optimization;learning-in-games;nash-equilibrium;game-theory;min-max-optimization;min-max", "primary_area": "", "supplementary_material": "/attachment/bbdd175a35694b9c0dfa958b27d6702ded484cde.zip", "author": "Fivos Kalogiannis;Ioannis Panageas;Emmanouil-Vasileios Vlatakis-Gkaragkounis", "authorids": "~Fivos_Kalogiannis1;~Ioannis_Panageas1;~Emmanouil-Vasileios_Vlatakis-Gkaragkounis1", "gender": "M;M;M", "homepage": "https://fivoskal.github.io/;https://panageas.github.io;http://www.cs.columbia.edu/~emvlatakis/", "dblp": "305/7347;139/3829;251/8372", "google_scholar": "FVEj9MIAAAAJ;5NiFWuwAAAAJ;MKutDKcAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Fivos_Kalogiannis1;~Ioannis_Panageas1;~Emmanouil-Vasileios_Vlatakis-Gkaragkounis1", "aff": "University of California, Irvine;Donald Bren School of Information and Computer Sciences, University of California, Irvine;University of California, Berkeley", "aff_domain": "uci.edu;ics.uci.edu;berkeley.edu", "position": "PhD student;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nkalogiannis2023towards,\ntitle={Towards convergence to Nash equilibria in two-team zero-sum games},\nauthor={Fivos Kalogiannis and Ioannis Panageas and Emmanouil-Vasileios Vlatakis-Gkaragkounis},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4BPFwvKOvo5}\n}", "github": "", "project": "", "reviewers": "1M7d;m964;fCR8", "pdf_size": 2755847, "recommendation": "3;6;6", "confidence": "4;4;2", "correctness": "3;3;4", "technical_novelty": "2;4;3", "empirical_novelty": "2;3;0", "wc_summary_paper": "119;29;122", "wc_strength_and_weaknesses": "505;351;80", "wc_clarity_quality_novelty_and_reproducibility": "2;14;69", "wc_summary_review": "68;34;15", "wc_review": "694;428;286", "wc_reply_reviewers": "0;522;0", "wc_reply_authors": "1474;1190;84", "reply_reviewers": "0;2;0", "reply_authors": "2;4;1", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 90.0, 43.15089802078283 ], "wc_strength_and_weaknesses_avg": [ 312.0, 175.68342741040394 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.333333333333332, 29.169999809545576 ], "wc_summary_review_avg": [ 39.0, 21.924111536540465 ], "wc_review_avg": [ 469.3333333333333, 169.11008906100847 ], "wc_reply_reviewers_avg": [ 174.0, 246.07315985291854 ], "wc_reply_authors_avg": [ 916.0, 599.6287740482995 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5000000000000001, "corr_recommendation_correctness": 0.5, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2358404128665876291&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=4BPFwvKOvo5", "email": "uci.edu;ics.uci.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of California, Irvine;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.uci.edu;https://www.berkeley.edu", "aff_unique_abbr": "UCI;UC Berkeley", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Irvine;Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "The Curious Case of Benign Memorization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11180", "id": "4C8ChYvMYBn", "poster": "", "openreview": "https://openreview.net/forum?id=4C8ChYvMYBn", "slides": "https://iclr.cc/virtual/2023/poster/11180", "video": "https://iclr.cc/virtual/2023/poster/11180", "author_site": "Sotiris Anagnostidis, Gregor Bachmann, Lorenzo Noci, Thomas Hofmann", "tldr": "", "abstract": "Despite the empirical advances of deep learning across a variety of learning tasks, our theoretical understanding of its success is still very restricted. One of the key challenges is the overparametrized nature of modern models, enabling complete overfitting of the data even if the labels are randomized, i.e. networks can completely \\textit{memorize} all given patterns. While such a memorization capacity seems worrisome, in this work we show that under training protocols that include \\textit{data augmentation}, neural networks learn to memorize entirely random labels in a benign way, i.e. they learn embeddings that lead to highly non-trivial performance under nearest neighbour probing. We demonstrate that deep models have the surprising ability to separate noise from signal by distributing the task of memorization and feature learning to different layers. As a result, only the very last layers are used for memorization, while preceding layers encode performant features which remain largely unaffected by the label noise. We explore the intricate role of the augmentations used for training and identify a memorization-generalization trade-off in terms of their diversity, marking a clear distinction to all previous works. Finally, we give a first explanation for the emergence of benign memorization by showing that \\textit{malign} memorization under data augmentation is infeasible due to the insufficient capacity of the model for the increased sample size. As a consequence, the network is forced to leverage the correlated nature of the augmentations and as a result learns meaningful features. To complete the picture, a better theory of feature learning in deep neural networks is required to fully understand the origins of this phenomenon.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/1278465a3d2892f9a7cc35bbd74b301ee00d467e.zip", "author": "Sotiris Anagnostidis;Gregor Bachmann;Lorenzo Noci;Thomas Hofmann", "authorids": "~Sotiris_Anagnostidis1;~Gregor_Bachmann1;~Lorenzo_Noci1;~Thomas_Hofmann1", "gender": "M;M;M;M", "homepage": ";http://www.da.inf.ethz.ch/people/GregorBachmann;;http://www.da.inf.ethz.ch/", "dblp": "286/1763;;268/6839;h/ThHofmann", "google_scholar": "qjzTKWUAAAAJ;bbGqqloAAAAJ;;T3hAyLkAAAAJ", "orcid": ";;;", "linkedin": "sotiris-anagnostidis-b064a5129/;;lorenzo-noci-97aa59130;thomas-hofmann-1ab2402/", "or_profile": "~Sotiris_Anagnostidis1;~Gregor_Bachmann1;~Lorenzo_Noci1;~Thomas_Hofmann1", "aff": "ETH Zurich;Swiss Federal Institute of Technology;ETHZ - ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": "inf.ethz.ch;ethz.ch;ethz.ch;ethz.ch", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nanagnostidis2023the,\ntitle={The Curious Case of Benign Memorization},\nauthor={Sotiris Anagnostidis and Gregor Bachmann and Lorenzo Noci and Thomas Hofmann},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4C8ChYvMYBn}\n}", "github": "", "project": "", "reviewers": "rsqh;ZaD4;dZQx;CAZe", "pdf_size": 10993732, "recommendation": "5;6;6;8", "confidence": "4;4;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;3;4", "wc_summary_paper": "115;36;92;118", "wc_strength_and_weaknesses": "685;191;269;776", "wc_clarity_quality_novelty_and_reproducibility": "2;36;111;492", "wc_summary_review": "55;16;52;88", "wc_review": "857;279;524;1474", "wc_reply_reviewers": "817;0;22;299", "wc_reply_authors": "1679;258;343;655", "reply_reviewers": "2;0;1;1", "reply_authors": "4;1;1;2", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 90.25, 32.89661836724255 ], "wc_strength_and_weaknesses_avg": [ 480.25, 253.81230762120265 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 160.25, 195.553541261722 ], "wc_summary_review_avg": [ 52.75, 25.488968201949643 ], "wc_review_avg": [ 783.5, 448.3450122394583 ], "wc_reply_reviewers_avg": [ 284.5, 329.2464882120992 ], "wc_reply_authors_avg": [ 733.75, 565.403119464334 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=776397485693409935&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=4C8ChYvMYBn", "email": "inf.ethz.ch;ethz.ch;ethz.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "id": "4CQ9os3s4h3", "title": "Random Matrix Analysis to Balance between Supervised and Unsupervised Learning under the Low Density Separation Assumption", "track": "main", "status": "Reject", "tldr": "We introduce a semi-supervised learning algorithm based on the low density assumption and propose a theoretical analysis of the latter.", "abstract": "We propose a theoretical framework to analyze semi-supervised classification under the low density separation assumption in a high-dimensional regime. In particular, we introduce QLDS, a linear classification model, where the low density separation assumption is implemented via quadratic margin maximization.\nThe algorithm has an explicit solution with rich theoretical properties, and we show that particular cases of our algorithm are the least-square support vector machine in the supervised case, the spectral clustering in the fully unsupervised regime, and a class of semi-supervised graph-based approaches. As such, QLDS establishes a smooth bridge between these supervised and unsupervised learning methods. Using recent advances in the random matrix theory, we formally derive a theoretical evaluation of the classification error in the asymptotic regime.\nAs an application, we derive a hyperparameter selection policy that finds the best balance between the supervised and the unsupervised terms of our learning criterion.\nFinally, we provide extensive illustrations of our framework, as well as an experimental study on several benchmarks to demonstrate that QLDS, while being computationally more efficient, improves over cross-validation for hyperparameter selection, indicating a high promise of the usage of random matrix theory for semi-supervised model selection.", "keywords": "Random Matrix Theory;semi-supervised learning;high dimensional statistics", "primary_area": "", "supplementary_material": "/attachment/9a893cb04c2f6a17a797e3b0c814645878bfca64.zip", "author": "Vasilii Feofanov;Malik Tiomoko;Aladin Virmaux", "authorids": "~Vasilii_Feofanov1;malik.tiomoko@huawei.com;~Aladin_Virmaux1", "gender": "M;;", "homepage": ";;https://avirmaux.github.io", "dblp": "245/3361;;192/8303", "google_scholar": "https://scholar.google.ru/citations?user=UIteS6oAAAAJ;;5FxvLvwAAAAJ", "orcid": "0000-0002-5777-4205;;", "linkedin": ";;", "or_profile": "~Vasilii_Feofanov1;malik.tiomoko@huawei.com;~Aladin_Virmaux1", "aff": "Huawei Noah's Ark Lab;;Huawei Technologies Ltd.", "aff_domain": "huawei.com;;huawei.com", "position": "Researcher;;Researcher", "bibtex": "@misc{\nfeofanov2023random,\ntitle={Random Matrix Analysis to Balance between Supervised and Unsupervised Learning under the Low Density Separation Assumption},\nauthor={Vasilii Feofanov and Malik Tiomoko and Aladin Virmaux},\nyear={2023},\nurl={https://openreview.net/forum?id=4CQ9os3s4h3}\n}", "github": "", "project": "", "reviewers": "LHdi;xxab;Cq3e", "site": "https://openreview.net/forum?id=4CQ9os3s4h3", "pdf_size": 698604, "recommendation": "3;6;8", "confidence": "3;4;3", "correctness": "2;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "77;271;44", "wc_strength_and_weaknesses": "66;901;19", "wc_clarity_quality_novelty_and_reproducibility": "223;50;26", "wc_summary_review": "57;66;15", "wc_review": "423;1288;104", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "887;499;228", "reply_reviewers": "0;0;0", "reply_authors": "2;2;2", "recommendation_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 130.66666666666666, 100.14101168962806 ], "wc_strength_and_weaknesses_avg": [ 328.6666666666667, 405.1553885719513 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 99.66666666666667, 87.75850702670115 ], "wc_summary_review_avg": [ 46.0, 22.22611077089287 ], "wc_review_avg": [ 605.0, 500.2046247953598 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 538.0, 270.4453117853343 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.1147078669352809, "corr_recommendation_correctness": 0.9933992677987828, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1875372136113138309&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Noah's Ark Lab", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "4CVu_buZwt", "title": "Learn Appropriate Precise Distributions for Binary Neural Networks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Binary Neural Networks (BNNs) have shown great promise for real-world embedded devices. However, BNNs always suffer from obtaining unsatisfactory accuracy performance on a large dataset such as ImageNet, which could hinder their further widespread applications in practice. Nevertheless, enhancing BNN's performance is extremely challenging owing to its limited capacity. Several distillation approaches in which the knowledge of a real-valued teacher model is distilled to a binary student network have been proposed to boost one BNN's accuracy. However, directly employing previous distillation solutions yields inferior results due to an unsuitable match between the representational capacity of the adopted real-valued teacher model and target binary student network. In this work, we reexamine the design of knowledge distillation framework specially for BNNs and test the limits of what a pure BNN can achieve. We firstly define one group which consists of multi real-valued networks owning particular properties, and then introduce a distribution-specific loss to enforce the binary network to mimic the distribution of one real-valued network fetched from this group in a certain order. In addition, we propose one distance-aware combinational model to provide one binary network with more comprehensive guidance, and present related suitable training strategies. The BNN in this built knowledge distillation framework can be facilitated to learn appropriate precise distributions, dubbed APD-BNN. As a result, APD-BNN can reach its performance limit while incurring no additional computational cost. Compared with the state-of-the-art BNNs, APD-BNN can obtain up to 1.4$\\%$ higher accuracy on the ImageNet dataset with using the same architecture. Specifically, APD-BNN is capable of gaining 72.0$\\%$ top-1 accuracy on ImageNet with only 87M OPs. Thus, it achieves the same accuracy of existing official real-valued MobileNetV2 at 71$\\%$ fewer OPs, demonstrating the huge potential to apply BNNs in practice. Our code and models will be available.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Qingliang Liu;Jinmei Lai", "authorids": "~Qingliang_Liu2;jmlai@fudan.edu.cn", "gender": "M;", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": "htttp://lql.linkedin;", "or_profile": "~Qingliang_Liu2;jmlai@fudan.edu.cn", "aff": "Fudan University;", "aff_domain": "fudan.edu.cn;", "position": "PhD student;", "bibtex": "@misc{\nliu2023learn,\ntitle={Learn Appropriate Precise Distributions for Binary Neural Networks},\nauthor={Qingliang Liu and Jinmei Lai},\nyear={2023},\nurl={https://openreview.net/forum?id=4CVu_buZwt}\n}", "github": "", "project": "", "reviewers": "g85S;BNiX;nZrs;kPx2", "site": "https://openreview.net/forum?id=4CVu_buZwt", "pdf_size": 271447, "recommendation": "3;3;3;3", "confidence": "5;2;5;5", "correctness": "2;3;2;2", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;0;2", "wc_summary_paper": "59;54;52;125", "wc_strength_and_weaknesses": "594;143;474;653", "wc_clarity_quality_novelty_and_reproducibility": "75;11;30;21", "wc_summary_review": "60;29;167;33", "wc_review": "788;237;723;832", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.25, 1.299038105676658 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 72.5, 30.41792234851026 ], "wc_strength_and_weaknesses_avg": [ 466.0, 197.32333871085802 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.25, 24.468091466234142 ], "wc_summary_review_avg": [ 72.25, 55.988280023590654 ], "wc_review_avg": [ 645.0, 238.72892577146993 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VzRm3P_5pUYJ:scholar.google.com/&scioq=Learn+Appropriate+Precise+Distributions+for+Binary+Neural+Networks&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Learning Math Reasoning from Self-Sampled Correct and Partially-Correct Solutions", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11735", "id": "4D4TSJE6-K", "poster": "", "openreview": "https://openreview.net/forum?id=4D4TSJE6-K", "slides": "https://iclr.cc/virtual/2023/poster/11735", "video": "https://iclr.cc/virtual/2023/poster/11735", "author_site": "Ansong Ni, Jeevana Priya Inala, Chenglong Wang, Alex Polozov, Christopher Meek, Dragomir Radev, Jianfeng Gao", "tldr": "We propose to let pretrained language models sample additional solutions for each problem and learn from the self-sampled solutions that are correct or partially-correct.", "abstract": "Pretrained language models have shown superior performance on many natural language processing tasks, yet they still struggle at multi-step formal reasoning tasks like grade school math problems. One key challenge of finetuning them to solve such math reasoning problems is that many existing datasets only contain one reference solution for each problem, despite the fact that there are often alternative solutions resembling different reasoning paths to the final answer. This way, the finetuned models are biased towards the limited reference solutions, which limits their generalization to unseen examples. To mitigate this issue, we propose to let the model perform sampling during training and learn from both self-sampled fully-correct solutions, which yield the correct answer upon execution, and partially-correct solutions, whose intermediate state matches an intermediate state of a known correct solution. We show that our use of self-sampled correct and partially-correct solutions can benefit learning and help guide the sampling process, leading to more efficient exploration of the solution space. Additionally, we explore various training objectives to support learning from multiple solutions per example and find they greatly affect the performance. Experiments on two math reasoning datasets show the effectiveness of our method compared to learning from a single reference solution with MLE, where we improve PASS@100 from 35.5% to 44.5% for GSM8K, and 27.6% to 36.2% PASS@80 for MathQA. Such improvements are also consistent across different model sizes.", "keywords": "mathematical reasoning;multi-target learning;self-sampling;large language models", "primary_area": "", "supplementary_material": "", "author": "Ansong Ni;Jeevana Priya Inala;Chenglong Wang;Alex Polozov;Christopher Meek;Dragomir Radev;Jianfeng Gao", "authorids": "~Ansong_Ni1;~Jeevana_Priya_Inala1;~Chenglong_Wang1;~Alex_Polozov1;~Christopher_Meek1;~Dragomir_Radev2;~Jianfeng_Gao1", "gender": "M;;M;M;M;;", "homepage": "https://niansong1996.github.io/;http://jinala.github.io/;https://chenglongwang.org/;https://www.microsoft.com/en-us/research/people/jfgao/;https://www.alexpolozov.com/;https://cm1x.github.io/;http://www.cs.yale.edu/~radev", "dblp": "202/1480;166/1342;;92/5339;151/3318;m/ChristopherMeek;r/DragomirRRadev", "google_scholar": "4IA1clAAAAAJ;;;https://scholar.google.com/citations?hl=en;-SuHe48AAAAJ;ajIYB6wAAAAJ;vIqWvgwAAAAJ", "orcid": ";;;;;;0000-0002-0213-7487", "linkedin": ";;;;apskim;;dragomir-radev/", "or_profile": "~Ansong_Ni1;~Jeevana_Priya_Inala1;~Chenglong_Wang1;~Jianfeng_Gao1;~Oleksandr_Polozov1;~Chris_Meek1;~Dragomir_Radkov_Radev1", "aff": "Yale University;Microsoft;Microsoft;Microsoft Research;Google;University of Washington;Yale University", "aff_domain": "yale.edu;microsoft.com;microsoft.com;microsoft.com;google.com;uw.edu;yale.edu", "position": "PhD student;Researcher;Researcher;Principal Researcher;Research Scientist;Affiliate Professor;Full Professor", "bibtex": "@inproceedings{\nni2023learning,\ntitle={Learning Math Reasoning from Self-Sampled Correct and Partially-Correct Solutions},\nauthor={Ansong Ni and Jeevana Priya Inala and Chenglong Wang and Alex Polozov and Christopher Meek and Dragomir Radev and Jianfeng Gao},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4D4TSJE6-K}\n}", "github": "", "project": "", "reviewers": "2ima;bPyF;dnGk;bc3k", "pdf_size": 4294437, "recommendation": "5;6;6;6", "confidence": "4;4;4;4", "correctness": "3;4;4;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;2", "wc_summary_paper": "136;254;161;76", "wc_strength_and_weaknesses": "328;236;402;287", "wc_clarity_quality_novelty_and_reproducibility": "126;80;9;78", "wc_summary_review": "49;69;43;38", "wc_review": "639;639;615;479", "wc_reply_reviewers": "223;0;72;0", "wc_reply_authors": "831;712;789;531", "reply_reviewers": "1;0;1;0", "reply_authors": "3;2;3;2", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 156.75, 64.0834416990848 ], "wc_strength_and_weaknesses_avg": [ 313.25, 60.72633283839886 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.25, 41.76945654422619 ], "wc_summary_review_avg": [ 49.75, 11.776565713313877 ], "wc_review_avg": [ 593.0, 66.5432190384565 ], "wc_reply_reviewers_avg": [ 73.75, 91.04497514964788 ], "wc_reply_authors_avg": [ 715.75, 114.88554086568074 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16740974549599802554&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=4D4TSJE6-K", "email": "yale.edu;microsoft.com;microsoft.com;microsoft.com;google.com;uw.edu;yale.edu", "author_num": 7, "aff_unique_index": "0;1;1;1;2;3;0", "aff_unique_norm": "Yale University;Microsoft;Google;University of Washington", "aff_unique_dep": ";Microsoft Corporation;Google;", "aff_unique_url": "https://www.yale.edu;https://www.microsoft.com;https://www.google.com;https://www.washington.edu", "aff_unique_abbr": "Yale;Microsoft;Google;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "4DL3cyuVHrV", "title": "Divide and conquer policy for efficient GAN training", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Recent advances in Generative Adversarial Networks (GANs) have achieved impressive results for the purpose of generating high quality synthetic imagery. While capable of synthesizing high-fidelity images, these models often generate unsatisfactory images which fall outside of the data manifold. A considerable research effort has investigated the data manifold, either by simply discarding images having a lower probability according to the discriminator output, or by filtering real images which are within the sparse regions of the data manifold. While effective, these methods fail to get access to either the fake distribution or the real distribution. In this paper, we propose a divide and conquer policy for GAN training. We first introduce a new local data-manifold detector (LDMD), which estimates whether the generated images are inside or outside of the data manifold. With the proposed LDMD, we further introduce a noise replay mode if it is outside the manifold, and a fake sample reuse mode if it is inside the manifold. Extensive experimental results on a number of GANs variants (e.g., SAGAN,SNGAN,BigGAN and StyleGAN) demonstrate qualitatively and quantitatively that our method improves the GAN\u2019s performance, resulting in more realistic images than previous methods as confirmed by a significant drop in the FID.", "keywords": "GANs;image generation", "primary_area": "", "supplementary_material": "/attachment/bb3ec29a42fa8777ee3bd8e511c9191b78f2d180.zip", "author": "Senmao Li;Yaxing Wang;Joost van de Weijer;Fahad Khan;Jian Yang", "authorids": "~Senmao_Li2;~Yaxing_Wang3;~Joost_van_de_Weijer5;~Fahad_Khan1;csjyang@nankai.edu.cn", "gender": ";M;;M;", "homepage": "https://sen-mao.github.io/;https://yaxingwang.netlify.app/author/yaxing-wang/;;https://sites.google.com/view/fahadkhans/home;", "dblp": "344/2376;;;05/8618;", "google_scholar": "F96SDKwAAAAJ;https://scholar.google.es/citations?user=6CsB8k0AAAAJ;;zvaeYnUAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Senmao_Li2;~Yaxing_Wang3;~Joost_van_de_Weijer5;~Fahad_Khan1;csjyang@nankai.edu.cn", "aff": "Nankai University;Nankai University;;Link\u00f6ping University;", "aff_domain": "nankai.edu.cn;nku.nankai.edu.cn;;liu.se;", "position": "PhD student;Associate Professor;;Associate Professor;", "bibtex": "@misc{\nli2023divide,\ntitle={Divide and conquer policy for efficient {GAN} training},\nauthor={Senmao Li and Yaxing Wang and Joost van de Weijer and Fahad Khan and Jian Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=4DL3cyuVHrV}\n}", "github": "", "project": "", "reviewers": "huvr;BcUd;3jZe;vYwX", "site": "https://openreview.net/forum?id=4DL3cyuVHrV", "pdf_size": 2476134, "recommendation": "1;3;3;5", "confidence": "5;4;4;4", "correctness": "2;2;2;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "63;124;77;41", "wc_strength_and_weaknesses": "406;389;489;162", "wc_clarity_quality_novelty_and_reproducibility": "17;29;264;1", "wc_summary_review": "3;72;23;23", "wc_review": "489;614;853;227", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 76.25, 30.408674749156695 ], "wc_strength_and_weaknesses_avg": [ 361.5, 121.23633943665571 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 77.75, 107.98929345078614 ], "wc_summary_review_avg": [ 30.25, 25.449705302812447 ], "wc_review_avg": [ 545.75, 225.7668875189628 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:y8zCSm5CiBcJ:scholar.google.com/&scioq=Divide+and+conquer+policy+for+efficient+GAN+training&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "Nankai University;Link\u00f6ping University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nankai.edu.cn;https://www.liu.se", "aff_unique_abbr": "NKU;LiU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;Sweden" }, { "title": "Short-Term Memory Convolutions", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11192", "id": "4DU_HCijfJp", "poster": "/media/PosterPDFs/ICLR%202023/11192.png?t=1682086848.379501", "openreview": "https://openreview.net/forum?id=4DU_HCijfJp", "slides": "https://iclr.cc/virtual/2023/poster/11192", "video": "https://iclr.cc/virtual/2023/poster/11192", "author_site": "Grzegorz Stefa\u0144ski, Krzysztof Arendt, Pawe\u0142 Daniluk, Bart\u0142omiej Jasik, Artur Szumaczuk", "tldr": "", "abstract": "The real-time processing of time series signals is a critical issue for many real-life applications. The idea of real-time processing is especially important in audio domain as the human perception of sound is sensitive to any kind of disturbance in perceived signals, especially the lag between auditory and visual modalities. The rise of deep learning (DL) models complicated the landscape of signal processing. Although they often have superior quality compared to standard DSP methods, this advantage is diminished by higher latency. In this work we propose novel method for minimization of inference time latency and memory consumption, called Short-Term Memory Convolution (STMC) and its transposed counterpart. The main advantage of STMC is the low latency comparable to long short-term memory (LSTM) networks. Furthermore, the training of STMC-based models is faster and more stable as the method is based solely on convolutional neural networks (CNNs). In this study we demonstrate an application of this solution to a U-Net model for a speech separation task and GhostNet model in acoustic scene classification (ASC) task. In case of speech separation we achieved a 5-fold reduction in inference time and a 2-fold reduction in latency without affecting the output quality. The inference time for ASC task was up to 4 times faster while preserving the original accuracy.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Grzegorz Stefa\u0144ski;Krzysztof Arendt;Pawe\u0142 Daniluk;Bart\u0142omiej Jasik;Artur Szumaczuk", "authorids": "~Grzegorz_Stefa\u0144ski1;~Krzysztof_Arendt1;~Pawe\u0142_Daniluk1;~Bart\u0142omiej_Jasik1;~Artur_Szumaczuk1", "gender": "M;M;;M;M", "homepage": "https://github.com/GrzegorzStefanski;https://krzysztofarendt.github.io/;;;", "dblp": "339/8733.html;;57/7064;;293/5618", "google_scholar": "UdGFCFsAAAAJ;ZDX0THoAAAAJ;https://scholar.google.pl/citations?user=5PIIXdwAAAAJ;;", "orcid": "0000-0002-0858-0180;0000-0001-7095-4676;;0000-0001-8550-0335;0000-0002-0329-5022", "linkedin": "g-stefanski/;krzysztofarendt/;;;", "or_profile": "~Grzegorz_Stefa\u0144ski1;~Krzysztof_Arendt1;~Pawe\u0142_Daniluk1;~Bart\u0142omiej_Jasik1;~Artur_Szumaczuk1", "aff": "Samsung;;Samsung;;Samsung", "aff_domain": "samsung.com;;samsung.com;;samsung.com", "position": "Researcher;;Principal Researcher;;Data Scientist", "bibtex": "@inproceedings{\nstefa{\\'n}ski2023shortterm,\ntitle={Short-Term Memory Convolutions},\nauthor={Grzegorz Stefa{\\'n}ski and Krzysztof Arendt and Pawe{\\l} Daniluk and Bart{\\l}omiej Jasik and Artur Szumaczuk},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4DU_HCijfJp}\n}", "github": "", "project": "", "reviewers": "TkYW;EphW;xgZF", "pdf_size": 2190429, "recommendation": "5;6;6", "confidence": "3;2;3", "correctness": "4;3;4", "technical_novelty": "3;2;2", "empirical_novelty": "2;2;3", "wc_summary_paper": "78;102;55", "wc_strength_and_weaknesses": "87;124;205", "wc_clarity_quality_novelty_and_reproducibility": "321;322;9", "wc_summary_review": "54;96;44", "wc_review": "540;644;313", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1878;1709;1072", "reply_reviewers": "0;0;0", "reply_authors": "3;3;2", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 78.33333333333333, 19.189117286165672 ], "wc_strength_and_weaknesses_avg": [ 138.66666666666666, 49.27699485786671 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 217.33333333333334, 147.31447843153623 ], "wc_summary_review_avg": [ 64.66666666666667, 22.528993664954402 ], "wc_review_avg": [ 499.0, 138.2051615051575 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1553.0, 347.0456262030494 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": -0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=944746402951706919&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=4DU_HCijfJp", "email": "samsung.com;;samsung.com;;samsung.com", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Samsung", "aff_unique_dep": "Samsung", "aff_unique_url": "https://www.samsung.com", "aff_unique_abbr": "Samsung", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Few-Shot Domain Adaptation For End-to-End Communication", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10701", "id": "4F1gvduDeL", "poster": "", "openreview": "https://openreview.net/forum?id=4F1gvduDeL", "slides": "https://iclr.cc/virtual/2023/poster/10701", "video": "https://iclr.cc/virtual/2023/poster/10701", "author_site": "Jayaram Raghuram, Yijing Zeng, Dolores Garcia, Rafael Ruiz, Somesh Jha, Joerg Widmer, Suman Banerjee", "tldr": "We propose a sample-efficient domain adaptation method for the autoencoder based end-to-end communication problem", "abstract": "The problem of end-to-end learning of a communication system using an autoencoder -- consisting of an encoder, channel, and decoder modeled using neural networks -- has recently been shown to be an effective approach. A challenge faced in the practical adoption of this learning approach is that under changing channel conditions (e.g. a wireless link), it requires frequent retraining of the autoencoder in order to maintain a low decoding error rate. Since retraining is both time consuming and requires a large number of samples, it becomes impractical when the channel distribution is changing quickly. We propose to address this problem using a fast and sample-efficient (few-shot) domain adaptation method that does not change the encoder and decoder networks. Different from conventional training-time unsupervised or semi-supervised domain adaptation, here we have a trained autoencoder from a source distribution that we want to adapt (at test time) to a target distribution using only a small labeled dataset, and no unlabeled data. We focus on a generative channel model based on the Gaussian mixture density network (MDN), and propose a regularized, parameter-efficient adaptation of the MDN using a set of affine transformations. The learned affine transformations are then used to design an optimal transformation at the decoder input to compensate for the distribution shift, and effectively present to the decoder inputs close to the source distribution. Experiments on many simulated distribution changes common to the wireless setting, and a real mmWave FPGA testbed demonstrate the effectiveness of our method at adaptation using very few target domain samples~\\footnote{Code for our work: \\url{https://github.com/jayaram-r/domain-adaptation-autoencoder}}.", "keywords": "domain adaptation;end-to-end communication;autoencoders;Gaussian mixtures;mixture density networks;few-shot;wireless channel", "primary_area": "", "supplementary_material": "/attachment/86158f485ce842a56e030f8d01bcf535bbab4d9f.zip", "author": "Jayaram Raghuram;Yijing Zeng;Dolores Garcia;Rafael Ruiz;Somesh Jha;Joerg Widmer;Suman Banerjee", "authorids": "~Jayaram_Raghuram1;~Yijing_Zeng1;~Dolores_Garcia1;~Rafael_Ruiz1;~Somesh_Jha1;~Joerg_Widmer1;~Suman_Banerjee3", "gender": "M;M;;;M;M;M", "homepage": ";;;;;https://www.joergwidmer.org/;http://pages.cs.wisc.edu/~suman", "dblp": "117/7273;;;;j/SomeshJha;;", "google_scholar": "xvjzWWEAAAAJ;GmNBWSkAAAAJ;https://scholar.google.com/citations?hl=en;;BaI7l8QAAAAJ;LT1_KV4AAAAJ;cLb-v7gAAAAJ", "orcid": "0000-0002-9473-3357;;;0000-0002-9421-3415;;0000-0001-6667-8779;", "linkedin": "jayaram-raghuram-32b66410/;;;;;;", "or_profile": "~Jayaram_Raghuram1;~Yijing_Zeng1;~Dolores_Garcia1;~Rafael_Ruiz1;~Somesh_Jha1;~Joerg_Widmer1;~Suman_Banerjee3", "aff": "University of Wisconsin - Madison;Meta;CERN;;Department of Computer Science, University of Wisconsin, Madison;IMDEA Networks;UW-Madison", "aff_domain": "cs.wisc.edu;meta.com;cern.ch;;cs.wisc.edu;imdea.org;cs.wisc.edu", "position": "Researcher;Researcher;Postdoc;;Full Professor;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nraghuram2023fewshot,\ntitle={Few-Shot Domain Adaptation For End-to-End Communication},\nauthor={Jayaram Raghuram and Yijing Zeng and Dolores Garcia and Rafael Ruiz and Somesh Jha and Joerg Widmer and Suman Banerjee},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4F1gvduDeL}\n}", "github": "", "project": "", "reviewers": "91WK;jGKn;fua1", "pdf_size": 11181882, "recommendation": "6;8;8", "confidence": "4;3;3", "correctness": "3;4;3", "technical_novelty": "3;3;4", "empirical_novelty": "3;3;4", "wc_summary_paper": "51;82;117", "wc_strength_and_weaknesses": "193;197;494", "wc_clarity_quality_novelty_and_reproducibility": "36;16;159", "wc_summary_review": "45;35;78", "wc_review": "325;330;848", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "747;554;1450", "reply_reviewers": "0;0;0", "reply_authors": "1;1;2", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 83.33333333333333, 26.96087700518826 ], "wc_strength_and_weaknesses_avg": [ 294.6666666666667, 140.95941102159713 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.33333333333333, 63.22622521988447 ], "wc_summary_review_avg": [ 52.666666666666664, 18.372685039360892 ], "wc_review_avg": [ 501.0, 245.37454364026163 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 917.0, 385.0359290594407 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.9999999999999998, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5388704749747210853&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=4F1gvduDeL", "email": "cs.wisc.edu;meta.com;cern.ch;;cs.wisc.edu;imdea.org;cs.wisc.edu", "author_num": 7, "aff_unique_index": "0;1;2;0;3;0", "aff_unique_norm": "University of Wisconsin-Madison;Meta;European Organization for Nuclear Research;IMDEA Networks Institute", "aff_unique_dep": ";Meta Platforms, Inc.;;", "aff_unique_url": "https://www.wisc.edu;https://meta.com;https://home.cern;https://www.imdea.org/", "aff_unique_abbr": "UW-Madison;Meta;CERN;IMDEA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;0;1;0;2;0", "aff_country_unique": "United States;Switzerland;Spain" }, { "title": "A Mixture-of-Expert Approach to RL-based Dialogue Management", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10925", "id": "4FBUihxz5nm", "poster": "", "openreview": "https://openreview.net/forum?id=4FBUihxz5nm", "slides": "https://iclr.cc/virtual/2023/poster/10925", "video": "https://iclr.cc/virtual/2023/poster/10925", "author_site": "Yinlam Chow, Azamat Tulepbergenov, Ofir Nachum, Dhawal Gupta, Moonkyung Ryu, Mohammad Ghavamzadeh, Craig Boutilier", "tldr": "A mixture-of-expert based dialogue manager that is amenable to sequential decision making techniques", "abstract": "Despite recent advancements in language models (LMs), their application to dialogue management (DM) problems and ability to carry on rich conversations remain a challenge. We use reinforcement learning (RL) to develop a dialogue agent that avoids being short-sighted (outputting generic utterances) and maximizes overall user satisfaction. Most existing RL approaches to DM train the agent at the word-level, and thus, have to deal with a combinatorially complex action space even for a medium-size vocabulary. As a result, they struggle to produce a successful and engaging dialogue even if they are warm-started with a pre-trained LM. To address this issue, we develop a RL-based DM using a novel mixture of expert language model (MoE-LM) that consists of (i) a LM capable of learning diverse semantics for conversation histories, (ii) a number of specialized LMs (or experts) capable of generating utterances corresponding to a particular attribute or personality, and (iii) a RL-based DM that performs dialogue planning with the utterances generated by the experts. Our MoE approach provides greater flexibility to generate sensible utterances with different intents and allows RL to focus on conversational-level DM. We compare it with SOTA baselines on open-domain dialogues and demonstrate its effectiveness both in terms of the diversity and sensibility of the generated utterances and the overall DM performance. ", "keywords": "Reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/3cce63d1214340498b46201c4f78579099eb6d26.zip", "author": "Yinlam Chow;Azamat Tulepbergenov;Ofir Nachum;Dhawal Gupta;Moonkyung Ryu;Mohammad Ghavamzadeh;Craig Boutilier", "authorids": "~Yinlam_Chow1;~Azamat_Tulepbergenov1;~Ofir_Nachum1;~Dhawal_Gupta1;~Moonkyung_Ryu1;~Mohammad_Ghavamzadeh2;~Craig_Boutilier2", "gender": "M;M;M;M;M;M;M", "homepage": ";https://atulep.github.io;https://scholar.google.com/citations?user=C-ZlBWMAAAAJ&hl=en;https://dhawgupta.github.io/;;https://research.google/people/craigboutilier/;https://mohammadghavamzadeh.github.io/", "dblp": "146/7869;;;231/0618;;10/3411;88/6389", "google_scholar": ";https://scholar.google.com/citations?hl=en;C-ZlBWMAAAAJ;n1Lsp_8AAAAJ;EEBuCJ8AAAAJ;cXkm3rsAAAAJ;https://scholar.google.ca/citations?user=LHIPpCsAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;dhawgupta/;;;", "or_profile": "~Yinlam_Chow1;~Azamat_Tulepbergenov1;~Ofir_Nachum1;~Dhawal_Gupta1;~Moonkyung_Ryu1;~Craig_Boutilier2;~Mohammad_Ghavamzadeh1", "aff": "Google Research;Google;OpenAI;Department of Computer Science, University of Massachusetts at Amherst;Google Research;Google;Google Research", "aff_domain": "google.com;google.com;openai.com;cs.umass.edu;google.com;google.com;google.com", "position": "Research Scientist;Research Software Engineer;Researcher;PhD student;Software Engineer;Principal Researcher;Senior Staff Research Scientist", "bibtex": "@inproceedings{\nchow2023a,\ntitle={A Mixture-of-Expert Approach to {RL}-based Dialogue Management},\nauthor={Yinlam Chow and Azamat Tulepbergenov and Ofir Nachum and Dhawal Gupta and Moonkyung Ryu and Mohammad Ghavamzadeh and Craig Boutilier},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4FBUihxz5nm}\n}", "github": "", "project": "", "reviewers": "ySba;FimV;8p8z;Esbb;nxAX", "pdf_size": 2655257, "recommendation": "3;6;6;8;8", "confidence": "3;4;3;4;3", "correctness": "1;3;3;4;4", "technical_novelty": "4;3;3;4;3", "empirical_novelty": "0;3;3;3;0", "wc_summary_paper": "92;54;99;172;127", "wc_strength_and_weaknesses": "237;52;313;83;222", "wc_clarity_quality_novelty_and_reproducibility": "145;40;74;74;81", "wc_summary_review": "113;74;44;49;63", "wc_review": "587;220;530;378;493", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 6.2, 1.8330302779823362 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "correctness_avg": [ 3.0, 1.0954451150103321 ], "technical_novelty_avg": [ 3.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 1.8, 1.4696938456699067 ], "wc_summary_paper_avg": [ 108.8, 39.260157921231034 ], "wc_strength_and_weaknesses_avg": [ 181.4, 98.47354974814303 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 82.8, 34.23098012035296 ], "wc_summary_review_avg": [ 68.6, 24.581293700698506 ], "wc_review_avg": [ 441.6, 130.19155118516716 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.3563483225498992, "corr_recommendation_correctness": 0.9960238411119947, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9691239996294860343&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "pdf": "https://openreview.net/pdf?id=4FBUihxz5nm", "email": "google.com;google.com;openai.com;cs.umass.edu;google.com;google.com;google.com", "author_num": 7, "aff_unique_index": "0;0;1;2;0;0;0", "aff_unique_norm": "Google;OpenAI;University of Massachusetts Amherst", "aff_unique_dep": "Google Research;;Department of Computer Science", "aff_unique_url": "https://research.google;https://openai.com;https://www.umass.edu", "aff_unique_abbr": "Google Research;OpenAI;UMass Amherst", "aff_campus_unique_index": "0;0;2;0;0;0", "aff_campus_unique": "Mountain View;;Amherst", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "4Ff0zhHYxwl", "title": "Model-Agnostic Meta-Attack: Towards Reliable Evaluation of Adversarial Robustness", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The vulnerability of deep neural networks to adversarial examples has motivated an increasing number of defense strategies for promoting model robustness. However, the progress is usually hampered by insufficient robustness evaluations. As the de facto standard to evaluate adversarial robustness, adversarial attacks typically solve an optimization problem of crafting adversarial examples with an iterative process. But the existing attacks are usually limited by using hand-designed optimization algorithms, leading to less accurate robustness evaluations. In this paper, we propose a Model-Agnostic Meta-Attack (MAMA) approach to discover stronger attack algorithms automatically. Our method learns the optimizer in adversarial attacks parameterized by a recurrent neural network, which is trained over a class of data samples and defense models to produce effective update directions during adversarial example generation. Furthermore, we develop a model-agnostic training algorithm to improve the generalization ability of the learned optimizer when attacking unseen defenses. Our approach can be flexibly incorporated with various attacks and consistently improves their performance. Extensive experiments demonstrate the effectiveness and efficiency of the learned attacks by MAMA, e.g., MAMA achieves x2 speedup over the state-of-the-art AutoAttack while obtaining lower robust test accuracy on all adopted defense models. Therefore, MAMA leads to a more reliable and efficient evaluation of adversarial robustness. ", "keywords": "Adversarial attacks;robust evaluation", "primary_area": "", "supplementary_material": "/attachment/2c2598e1bd917572592bfe510cc6f2c9d1b4e422.zip", "author": "Xiao Yang;Yinpeng Dong;Wenzhao Xiang;Tianyu Pang;Hang Su;Jun Zhu", "authorids": "~Xiao_Yang4;~Yinpeng_Dong2;~Wenzhao_Xiang1;~Tianyu_Pang1;~Hang_Su3;~Jun_Zhu2", "gender": "M;M;M;M;M;M", "homepage": "https://ml.cs.tsinghua.edu.cn/~xiaoyang/;https://dongyp13.github.io;https://wenzhao-xiang.github.io/Blog/;https://p2333.github.io/;http://ml.cs.tsinghua.edu.cn/~jun;", "dblp": "57/33851;183/0980;;202/2550;50/2644-1;26/5371-6", "google_scholar": "bwkwp0MAAAAJ;6_4ad84AAAAJ;;wYDbtFsAAAAJ;axsP38wAAAAJ;dxN1_X0AAAAJ", "orcid": "0000-0001-9502-9962;;;0000-0003-0639-6176;;", "linkedin": ";;;%E5%A4%A9%E5%AE%87-%E5%BA%9E-b3999017a/;;", "or_profile": "~Xiao_Yang4;~Yinpeng_Dong2;~Wenzhao_Xiang1;~Tianyu_Pang1;~Jun_Zhu2;~Hang_Su2", "aff": "Tsinghua University;Tsinghua University;University of Chinese Academy of Sciences;Sea AI Lab;Tsinghua University;Tsinghua University", "aff_domain": "mail.tsinghua.edu.cn;tsinghua.edu.cn;ucas.ac.cn;sea.com;mail.tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Postdoc;PhD student;Research Scientist;Professor;Associate Professor", "bibtex": "@misc{\nyang2023modelagnostic,\ntitle={Model-Agnostic Meta-Attack: Towards Reliable Evaluation of Adversarial Robustness},\nauthor={Xiao Yang and Yinpeng Dong and Wenzhao Xiang and Tianyu Pang and Hang Su and Jun Zhu},\nyear={2023},\nurl={https://openreview.net/forum?id=4Ff0zhHYxwl}\n}", "github": "", "project": "", "reviewers": "AV7P;MFMw;FNDw", "site": "https://openreview.net/forum?id=4Ff0zhHYxwl", "pdf_size": 593959, "recommendation": "3;5;6", "confidence": "4;4;4", "correctness": "3;3;4", "technical_novelty": "3;3;3", "empirical_novelty": "0;2;2", "wc_summary_paper": "92;33;87", "wc_strength_and_weaknesses": "515;233;82", "wc_clarity_quality_novelty_and_reproducibility": "170;70;125", "wc_summary_review": "75;18;24", "wc_review": "852;354;318", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 70.66666666666667, 26.71246067953223 ], "wc_strength_and_weaknesses_avg": [ 276.6666666666667, 179.447918783015 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 121.66666666666667, 40.89281382128433 ], "wc_summary_review_avg": [ 39.0, 25.573423705088842 ], "wc_review_avg": [ 508.0, 243.68832553078943 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.7559289460184545, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18375669268194129285&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "Tsinghua University;University of Chinese Academy of Sciences;Sea AI Lab", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ucas.ac.cn;", "aff_unique_abbr": "THU;UCAS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China;" }, { "id": "4Fi-5Jiyy5w", "title": "Applying Second Order Optimization to Deep Transformers with Parameter-Efficient Tuning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Despite the theoretical superiority in convergence issues, second-order optimizers are generally not among the top choices for training large-scale neural networks due to their high computational and memory cost. Nevertheless, introduced in recent progress of parameter-efficient tuning is a new paradigm that large-scale pre-trained models (PTMs) can be adapted to specific tasks by optimizing a tiny proportion of parameters, which might hopefully change the game. We associate this new paradigm with the computational tractability of second-order optimizers and succeed in applying them to large PTMs that are from hundreds of millions to billions in scale. Beyond verifying their tractability, we further investigate the stability-influencing factors in the optimization process and propose accordingly a Newton-step-clipping approach in which we clip the update tensors rather than the gradients. This approach stabilizes the convergence by gating the magnitude of Newton steps along the optimization trajectories through the rugged landscapes of deep transformers. \nWe conduct extensive experiments across different downstream tasks, demonstrating that, when equipped with our Newton-step-clipping strategy, second-order optimizers, especially Kronecker-factored curvature approximation (K-FAC), can attain comparable and even superior results and faster convergence to those state-of-the-art bars implemented with AdamW. Furthermore, we scale the model up to 3 billion parameters and validate the tractability and effectiveness of our method. This work is not only the first successful application of second-order optimization on such large-scale models but also sheds light on the possibility of further optimization-wise analysis on large-scale models in the future.", "keywords": "Pre-trained Models;NLP;Model Adaptation", "primary_area": "", "supplementary_material": "/attachment/8031a4016677dacaa1a8c11ef822f3f4c1a08e9b.zip", "author": "Ning Ding;Qiaosen Wang;Yulin Chen;Pengjun Xie;Zhiyuan Liu;Hai-Tao Zheng;Maosong Sun", "authorids": "~Ning_Ding5;~Qiaosen_Wang1;~Yulin_Chen1;~Pengjun_Xie2;~Zhiyuan_Liu1;~Hai-Tao_Zheng2;~Maosong_Sun1", "gender": "M;M;F;M;M;M;M", "homepage": "https://www.stingning.cn/;;;;http://nlp.csai.tsinghua.edu.cn/~lzy;https://www.sigs.tsinghua.edu.cn/fg3/105069.jhtml;https://www.cs.tsinghua.edu.cn/csen/info/1312/4394.htm", "dblp": ";;;212/1755.html;53/3245-1;20/134-2;95/3291-1", "google_scholar": "uZXQuYAAAAAJ;https://scholar.google.com/citations?view_op=new_profile;tAiXl18AAAAJ;;dT0v5u0AAAAJ;https://scholar.google.com.hk/citations?user=7VPeORoAAAAJ;https://scholar.google.com.tw/citations?user=zIgT0HMAAAAJ", "orcid": ";;;;0000-0002-7709-2543;0000-0001-5128-5649;", "linkedin": ";;;;;;", "or_profile": "~Ning_Ding5;~Qiaosen_Wang1;~Yulin_Chen1;~Pengjun_Xie2;~Zhiyuan_Liu1;~Hai-Tao_Zheng2;~Maosong_Sun1", "aff": "Tsinghua University;University of Chicago;Tsinghua University;Alibaba Group;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;uchicago.edu;tsinghua.edu.cn;alibaba-inc.com;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;MS student;Researcher;Associate Professor;Associate Professor;Full Professor", "bibtex": "@misc{\nding2023applying,\ntitle={Applying Second Order Optimization to Deep Transformers with Parameter-Efficient Tuning},\nauthor={Ning Ding and Qiaosen Wang and Yulin Chen and Pengjun Xie and Zhiyuan Liu and Hai-Tao Zheng and Maosong Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=4Fi-5Jiyy5w}\n}", "github": "", "project": "", "reviewers": "fdsh;xAvz;Cqu9;tTAh", "site": "https://openreview.net/forum?id=4Fi-5Jiyy5w", "pdf_size": 2871040, "recommendation": "3;3;5;5", "confidence": "4;4;3;4", "correctness": "2;4;3;2", "technical_novelty": "1;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "91;129;25;82", "wc_strength_and_weaknesses": "504;162;139;155", "wc_clarity_quality_novelty_and_reproducibility": "35;61;149;768", "wc_summary_review": "66;28;40;34", "wc_review": "696;380;353;1039", "wc_reply_reviewers": "348;0;0;177", "wc_reply_authors": "1511;603;746;1750", "reply_reviewers": "1;0;0;1", "reply_authors": "3;1;1;4", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 81.75, 37.21138938550938 ], "wc_strength_and_weaknesses_avg": [ 240.0, 152.6482885590271 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 253.25, 300.17859267442776 ], "wc_summary_review_avg": [ 42.0, 14.491376746189438 ], "wc_review_avg": [ 617.0, 278.4735175918888 ], "wc_reply_reviewers_avg": [ 131.25, 144.50497396283632 ], "wc_reply_authors_avg": [ 1152.5, 488.03713998014535 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": -0.30151134457776363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15847707043676726221&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;0;0;0", "aff_unique_norm": "Tsinghua University;University of Chicago;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.uchicago.edu;https://www.alibaba.com", "aff_unique_abbr": "THU;UChicago;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "id": "4GtbV7o7-6l", "title": "SC2EGSet: StarCraft II Esport Replay and Game-state Dataset", "track": "main", "status": "Withdraw", "tldr": "Infrastructure, and a dataset crucial for research in a new and developing field of esports.", "abstract": "As a relatively new form of sport, esports offers unparalleled data availability. Despite the vast amounts of data that are generated by game engines, it can be challenging to extract them and verify their integrity for the purposes of practical and scientific use.\n\nOur work aims to open esports to a broader scientific community by supplying raw and pre-processed files from StarCraft II esports tournaments. These files can be used in statistical and machine learning modeling tasks and related to various laboratory-based measurements (e.g., behavioral tests, brain imaging). We have gathered publicly available game-engine generated \"replays\" of tournament matches and performed data extraction and cleanup using a low-level application programming interface (API) parser library.\n\nAdditionally, we open-sourced and published all the custom tools that were developed in the process of creating our dataset. These tools include PyTorch and PyTorch Lightning API abstractions to load and model the data.\n\nOur dataset contains replays from major and premiere StarCraft II tournaments since 2016. To prepare the dataset, we processed 55 tournament \"replaypacks\" that contained 17930 files with game-state information. Based on initial investigation of available StarCraft II datasets, we observed that our dataset is the largest publicly available source of StarCraft II esports data upon its publication.\n\nAnalysis of the extracted data holds promise for further Artificial Intelligence (AI), Machine Learning (ML), psychological, Human-Computer Interaction (HCI), and sports-related studies in a variety of supervised and self-supervised tasks.", "keywords": "StarCraft II;esports;machine learning;dataset", "primary_area": "", "supplementary_material": "/attachment/57491ee6e7d8d8f6f2edc64e0b7d41d17bfb8a37.zip", "author": "Andrzej Bia\u0142ecki;Natalia Jakubowska;Pawe\u0142 Dobrowolski;Piotr Bia\u0142ecki;Leszek Krupi\u0144ski;Andrzej Szczap;Robert Bia\u0142ecki;Jan Gajewski", "authorids": "~Andrzej_Bia\u0142ecki1;~Natalia_Jakubowska1;~Pawe\u0142_Dobrowolski1;~Piotr_Bia\u0142ecki1;~Leszek_Krupi\u0144ski1;~Andrzej_Szczap1;~Robert_Bia\u0142ecki1;~Jan_Gajewski1", "gender": "M;F;;M;M;M;;", "homepage": "https://github.com/Kaszanas;;;;https://leafnode.pl/;;https://www.awf.edu.pl/pracownik/wyszukiwarka-pracownikow/wydzial-wychowania-fizycznego/bialecki-robert;https://www.awf.edu.pl/", "dblp": ";;;;;;;", "google_scholar": "lts1VPkAAAAJ;HI5reeYAAAAJ;;;;;;https://scholar.google.pl/citations?user=0BzM8yUAAAAJ", "orcid": "0000-0003-3668-4638;0000-0002-6032-4387;0000-0002-1640-0565;;;;;0000-0002-2146-6198", "linkedin": "andrzej-bialecki/;;;piotr-bia%C5%82ecki-b7561b61/;;andrzej-szczap-109377205/;;", "or_profile": "~Andrzej_Bia\u0142ecki1;~Natalia_Jakubowska1;~Pawe\u0142_Dobrowolski1;~Piotr_Bia\u0142ecki1;~Leszek_Krupi\u0144ski1;~Andrzej_Szczap1;~Robert_Bia\u0142ecki1;~Jan_Gajewski1", "aff": "Warsaw University of Technology;Warsaw School of Social Psychology;Institute of Psychology, Polish Academy of Sciences;;;Adam Mickiewicz University of Poznan;Physical Education Academy \"Jozef Pilsudski\" in Warsaw;Physical Education Academy \"Jozef Pilsudski\" in Warsaw", "aff_domain": "pw.edu.pl;swps.edu.pl;psych.pan.pl;;;amu.edu.pl;awf.edu.pl;awf.edu.pl", "position": "PhD student;PhD student;Assistant Professor;;;Undergrad student;Lecturer;Associate Professor", "bibtex": "@misc{\nbia{\\l}ecki2023scegset,\ntitle={{SC}2{EGS}et: StarCraft {II} Esport Replay and Game-state Dataset},\nauthor={Andrzej Bia{\\l}ecki and Natalia Jakubowska and Pawe{\\l} Dobrowolski and Piotr Bia{\\l}ecki and Leszek Krupi{\\'n}ski and Andrzej Szczap and Robert Bia{\\l}ecki and Jan Gajewski},\nyear={2023},\nurl={https://openreview.net/forum?id=4GtbV7o7-6l}\n}", "github": "", "project": "", "reviewers": "2855;RMqu;4wGe", "site": "https://openreview.net/forum?id=4GtbV7o7-6l", "pdf_size": 1604551, "recommendation": "1;3;3", "confidence": "4;4;4", "correctness": "2;2;3", "technical_novelty": "2;1;1", "empirical_novelty": "2;2;1", "wc_summary_paper": "17;73;21", "wc_strength_and_weaknesses": "71;450;52", "wc_clarity_quality_novelty_and_reproducibility": "15;115;2", "wc_summary_review": "9;51;2", "wc_review": "112;689;77", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 1.3333333333333333, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 37.0, 25.508168626278653 ], "wc_strength_and_weaknesses_avg": [ 191.0, 183.30484627163207 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.0, 50.48432099837203 ], "wc_summary_review_avg": [ 20.666666666666668, 21.638443156156644 ], "wc_review_avg": [ 292.6666666666667, 280.6140093121194 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7377218485598849613&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff_unique_index": "0;1;2;3;4;4", "aff_unique_norm": "Warsaw University of Technology;Warsaw School of Social Psychology;Polish Academy of Sciences;Adam Mickiewicz University;Physical Education Academy \"Jozef Pilsudski\"", "aff_unique_dep": ";Social Psychology;Institute of Psychology;;Physical Education", "aff_unique_url": "https://www.pw.edu.pl;https://www.wssps.pl;https://www.pan.pl;https://www.amu.edu.pl;", "aff_unique_abbr": "WUT;;PAS;AMU;", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Poznan;Warsaw", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Poland" }, { "id": "4I3vW2sInc", "title": "Efficient Evaluation of Adversarial Robustness for Deep Hashing based Retrieval", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Deep hashing has been extensively applied to massive image retrieval due to its efficiency and effectiveness. Recently, several adversarial attacks have been presented to reveal the vulnerability of deep hashing models against adversarial examples. However, existing attack methods suffer in degraded performance or inefficiency because they underutilize the semantic relations between original samples or spend a lot of time learning from these samples. In this paper, we propose a novel Pharos-guided Attack, dubbed \\textbf{PgA}, to evaluate the adversarial robustness of deep hashing networks efficiently. Specifically, we design \\textit{pharos code} to represent the semantics of the benign image, which preserves the similarity with semantically related samples and dissimilarity with irrelevant examples. It is proven that we can quickly calculate the pharos code via a simple math formula rather than time-consuming iterative procedures. Thus, PgA can directly conduct a reliable and efficient attack on deep hashing-based retrieval by maximizing the similarity between the hash code of the adversarial example and the pharos code. Extensive experiments on the benchmark datasets verify that the proposed algorithm outperforms the prior state-of-the-arts in both attack strength and speed.", "keywords": "Adversarial Attack;Adversarial Training;Deep Hashing;Similarity Retrieval", "primary_area": "", "supplementary_material": "", "author": "Xunguang Wang;Xinyue Xu;Jiawang Bai;Xiaomeng Li", "authorids": "~Xunguang_Wang1;~Xinyue_Xu1;~Jiawang_Bai2;~Xiaomeng_Li1", "gender": "M;F;M;F", "homepage": "https://sites.google.com/view/xunguangwang/;;;https://xmengli.github.io/", "dblp": "265/5513;;237/9675;02/9850-1", "google_scholar": "KNdj9HMAAAAJ;;https://scholar.google.com.hk/citations?user=sRksETcAAAAJ;uVTzPpoAAAAJ", "orcid": "0000-0002-5330-2286;0000-0002-2037-5462;;", "linkedin": ";;;", "or_profile": "~Xunguang_Wang1;~Xinyue_Xu1;~Jiawang_Bai2;~Xiaomeng_Li1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Tsinghua University;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;ust.hk;tsinghua.edu.cn;ust.hk", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nwang2023efficient,\ntitle={Efficient Evaluation of Adversarial Robustness for Deep Hashing based Retrieval},\nauthor={Xunguang Wang and Xinyue Xu and Jiawang Bai and Xiaomeng Li},\nyear={2023},\nurl={https://openreview.net/forum?id=4I3vW2sInc}\n}", "github": "", "project": "", "reviewers": "sVm7;vA2p;Mz6c", "site": "https://openreview.net/forum?id=4I3vW2sInc", "pdf_size": 4492254, "recommendation": "3;5;5", "confidence": "3;5;2", "correctness": "3;3;2", "technical_novelty": "2;3;3", "empirical_novelty": "3;2;2", "wc_summary_paper": "42;88;29", "wc_strength_and_weaknesses": "156;130;103", "wc_clarity_quality_novelty_and_reproducibility": "20;57;67", "wc_summary_review": "17;52;73", "wc_review": "235;327;272", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 53.0, 25.311394008759507 ], "wc_strength_and_weaknesses_avg": [ 129.66666666666666, 21.63844315615664 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.0, 20.215505600075073 ], "wc_summary_review_avg": [ 47.333333333333336, 23.098821518760552 ], "wc_review_avg": [ 278.0, 37.797707161502096 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.18898223650461365, "corr_recommendation_correctness": -0.49999999999999983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ftwde4EJ3WoJ:scholar.google.com/&scioq=Efficient+Evaluation+of+Adversarial+Robustness+for+Deep+Hashing+based+Retrieval&hl=en&as_sdt=0,48", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;https://www.tsinghua.edu.cn", "aff_unique_abbr": "HKUST;THU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "4IaQ99pSbg5", "title": "ProtoVAE: Using Prototypical Networks for Unsupervised Disentanglement", "track": "main", "status": "Reject", "tldr": "Unsupervised Disentangled representation learning using Isometric inductive biases", "abstract": "Generative modeling and self-supervised learning have in recent years made great strides towards learning from data in a completely \\emph{unsupervised} way. There is still, however, an open area of investigation into guiding the neural network to learn useful or good representations. The problem of unsupervised \\textit{Disentanglement} is of particular importance as it offers to learn interpretable representations, with disjoint subsets of the representation encoding different, meaningful factors of variation. Recent work has theoretically grounded the factors of variation, via the lens of group theory, as disentangled actions of the symmetry subgroups which transform only the correspond subspaces of the disentangled representation. We use this mathematical formalism instead to impose constraints on the representations learned by a unsupervised generative neural network, such that transformations of the representation correspond to the actions of a unique symmetry subgroup. To this end, we introduce a novel model, ProtoVAE, that leverages a deep metric learning Prototypical network trained via self-supervision to constrain the latent space of a Variational Autoencoder to decompose into independent subspaces. Further, we actively change or \\textit{intervene} in the latent space during training to enforce each dimension of the representation to uniquely and consistently transform the data corresponding to some symmetry subgroup. We demonstrate and evaluate our proposed model on the benchmark DSprites and 3DShapes datasets and compare with other state of the art disentanglement methods via qualitative traversals in the latent space, as well as quantitative disentanglement metrics. We further qualitatively demonstrate the effectiveness of our model on the real-world datasets CelebA which consistently encodes the different factors.", "keywords": "Unsupervised Learning;Disentangled Representations", "primary_area": "", "supplementary_material": "", "author": "Vaishnavi S Patil;Matthew Evanusa;Joseph JaJa", "authorids": "~Vaishnavi_S_Patil1;~Matthew_Evanusa1;~Joseph_JaJa1", "gender": "F;M;M", "homepage": "https://www.cs.umd.edu/people/vspatil;;http://users.umiacs.umd.edu/~josephj/", "dblp": ";168/7705;j/JosephJaJa", "google_scholar": ";;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Vaishnavi_S_Patil1;~Matthew_Evanusa1;~Joseph_JaJa1", "aff": "University of Maryland, College Park;;University of Maryland, College Park", "aff_domain": "umd.edu;;umd.edu", "position": "PhD student;;Full Professor", "bibtex": "@misc{\npatil2023protovae,\ntitle={Proto{VAE}: Using Prototypical Networks for Unsupervised Disentanglement},\nauthor={Vaishnavi S Patil and Matthew Evanusa and Joseph JaJa},\nyear={2023},\nurl={https://openreview.net/forum?id=4IaQ99pSbg5}\n}", "github": "", "project": "", "reviewers": "L6Gs;69qY;Fvdi;jevi", "site": "https://openreview.net/forum?id=4IaQ99pSbg5", "pdf_size": 8207488, "recommendation": "3;3;3;3", "confidence": "4;4;4;3", "correctness": "1;3;3;2", "technical_novelty": "1;3;3;2", "empirical_novelty": "2;1;3;2", "wc_summary_paper": "27;69;117;148", "wc_strength_and_weaknesses": "145;508;89;182", "wc_clarity_quality_novelty_and_reproducibility": "83;39;205;92", "wc_summary_review": "54;33;75;75", "wc_review": "309;649;486;497", "wc_reply_reviewers": "0;28;0;0", "wc_reply_authors": "500;719;255;305", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 90.25, 46.10517866791105 ], "wc_strength_and_weaknesses_avg": [ 231.0, 163.31717607159388 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 104.75, 61.25510182833753 ], "wc_summary_review_avg": [ 59.25, 17.41228014936585 ], "wc_review_avg": [ 485.25, 120.4333321800904 ], "wc_reply_reviewers_avg": [ 7.0, 12.12435565298214 ], "wc_reply_authors_avg": [ 444.75, 182.89392417464282 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:OoP2zj0HQUIJ:scholar.google.com/&scioq=ProtoVAE:+Using+Prototypical+Networks+for+Unsupervised+Disentanglement&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "4JRX93ADS2r", "title": "Extreme Masking for Learning Instance and Distributed Visual Representations", "track": "main", "status": "Withdraw", "tldr": "A method that uses extremely large masking as a novel augmentation for learning siamese networks.", "abstract": "The paper presents a scalable approach for learning distributed representations over individual tokens and a holistic instance representation simultaneously. We use self-attention blocks to represent distributed tokens, followed by cross-attention blocks to aggregate the holistic instance. The core of the approach is the use of extremely large token masking (75\\%-90\\%) as the data augmentation for supervision. Our model, named ExtreMA, follows the plain BYOL approach where the instance representation from the unmasked subset is trained to predict that from the intact input. Learning requires the model to capture informative variations in an instance, instead of encouraging invariances. \nThe paper makes three contributions: 1) Random masking is a strong and computationally efficient data augmentation for learning generalizable attention representations. 2) With multiple sampling per instance, extreme masking greatly speeds up learning and hungers for more data. 3) Distributed representations can be learned from the instance supervision alone, unlike per-token supervisions in masked modeling.", "keywords": "visual representation learning;self-supervised learning;masked modeling", "primary_area": "", "supplementary_material": "/attachment/f5e6d074ff8aeb83413065333338246600432827.zip", "author": "Zhirong Wu;Zihang Lai;Xiao Sun;Stephen Lin", "authorids": "~Zhirong_Wu4;~Zihang_Lai1;~Xiao_Sun2;~Stephen_Lin1", "gender": ";M;;M", "homepage": ";https://jimmysuen.github.io/;https://www.microsoft.com/en-us/research/people/stevelin/;https://www.microsoft.com/en-us/research/people/wuzhiron/", "dblp": "227/2343;151/8845;55/4755-1.html;147/5025", "google_scholar": "31eXgMYAAAAJ;wYIe0tYAAAAJ;c3PYmxUAAAAJ;lH4zgcIAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Zihang_Lai1;~Xiao_Sun2;~Stephen_Lin1;~Zhirong_Wu1", "aff": "University of Oxford;Shanghai Artificial Intelligence Laboratory;Microsoft Research;Microsoft Research", "aff_domain": "ox.ac.uk;pjlab.org.cn;microsoft.com;microsoft.com", "position": "PhD student;Principal Researcher;Researcher;Researcher", "bibtex": "@misc{\nwu2023extreme,\ntitle={Extreme Masking for Learning Instance and Distributed Visual Representations},\nauthor={Zhirong Wu and Zihang Lai and Xiao Sun and Stephen Lin},\nyear={2023},\nurl={https://openreview.net/forum?id=4JRX93ADS2r}\n}", "github": "", "project": "", "reviewers": "wgtJ;6Gi7;1mB8;etds", "site": "https://openreview.net/forum?id=4JRX93ADS2r", "pdf_size": 3450645, "recommendation": "1;3;5;8", "confidence": "5;4;4;4", "correctness": "2;2;4;4", "technical_novelty": "1;1;3;3", "empirical_novelty": "0;2;3;4", "wc_summary_paper": "67;33;41;80", "wc_strength_and_weaknesses": "257;518;186;114", "wc_clarity_quality_novelty_and_reproducibility": "32;48;80;64", "wc_summary_review": "24;10;27;41", "wc_review": "380;609;334;299", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 2.5860201081971503 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 1.0 ], "technical_novelty_avg": [ 2.0, 1.0 ], "empirical_novelty_avg": [ 2.25, 1.479019945774904 ], "wc_summary_paper_avg": [ 55.25, 19.031224343168255 ], "wc_strength_and_weaknesses_avg": [ 268.75, 152.52766142572304 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.0, 17.88854381999832 ], "wc_summary_review_avg": [ 25.5, 11.01135777277262 ], "wc_review_avg": [ 405.5, 120.95143653549552 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.7255892438417318, "corr_recommendation_correctness": 0.8700628401410974, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2975702416015261645&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "University of Oxford;Shanghai Artificial Intelligence Laboratory;Microsoft", "aff_unique_dep": ";;Microsoft Research", "aff_unique_url": "https://www.ox.ac.uk;http://www.shailab.org/;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Oxford;Shanghai AI Lab;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "United Kingdom;China;United States" }, { "id": "4JVdg72e7f", "title": "CLIP2Point: Transfer CLIP to Point Cloud Classification with Image-Depth Pre-training", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Pre-training across 3D vision and language remains under development because of limited training data. Recent works attempt to transfer vision-language pre-training models to 3D vision. PointCLIP converts point cloud data to multi-view depth maps, adopting CLIP for shape classification. However, its performance is restricted by the domain gap between rendered depth maps and images, as well as the diversity of depth distributions. To address this issue, we propose CLIP2Point, an image-depth pre-training method by contrastive learning to transfer CLIP to the 3D domain, and adapt it to point cloud classification. We introduce a new depth rendering setting that forms a better visual effect, and then render 52,460 pairs of images and depth maps from ShapeNet for pre-training. The pre-training scheme of CLIP2Point combines cross-modality learning to enforce the depth features for capturing expressive visual and textual features and intra-modality learning to enhance the invariance of depth aggregation. Additionally, we propose a novel Dual-Path Adapter (DPA) module, i.e., a dual-path structure with simplified adapters for few-shot learning. The dual-path structure allows the joint use of CLIP and CLIP2Point, and the simplified adapter can well fit few-shot tasks without post-search. Experimental results show that CLIP2Point is effective in transferring CLIP knowledge to 3D vision. Our CLIP2Point outperforms PointCLIP and other self-supervised 3D networks, achieving state-of-the-art results on zero-shot and few-shot classification.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Tianyu Huang;Bowen Dong;Yunhan Yang;Xiaoshui Huang;Rynson W. H. Lau;Wanli Ouyang;Wangmeng Zuo", "authorids": "~Tianyu_Huang2;~Bowen_Dong1;~Yunhan_Yang1;~Xiaoshui_Huang1;~Rynson_W._H._Lau1;~Wanli_Ouyang1;~Wangmeng_Zuo3", "gender": ";M;M;Not Specified;;;M", "homepage": ";;https://yhyang-myron.github.io/;https://xiaoshuihuang.github.io/;;;", "dblp": ";;220/2431;167/9599;;;93/2671", "google_scholar": ";t0WhKEYAAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.ca/citations?user=rp7mYNsAAAAJ;;;rUOpCEYAAAAJ", "orcid": ";0000-0001-7379-1286;;;;;0000-0002-3330-783X", "linkedin": ";;;;;;", "or_profile": "~Tianyu_Huang2;~Bowen_Dong1;~Yunhan_Yang1;~Xiaoshui_Huang1;~Rynson_W._H._Lau1;~Wanli_Ouyang1;~Wangmeng_Zuo3", "aff": ";Harbin Institute of Technology;Harbin Institute of Technology;Shanghai AI Laboratory;;;Harbin Institute of Technology", "aff_domain": ";hit.edu.cn;hit.edu.cn;pjlab.org.cn;;;hit.edu.cn", "position": ";PhD student;Undergrad student;Research Fellow;;;Full Professor", "bibtex": "@misc{\nhuang2023clippoint,\ntitle={{CLIP}2Point: Transfer {CLIP} to Point Cloud Classification with Image-Depth Pre-training},\nauthor={Tianyu Huang and Bowen Dong and Yunhan Yang and Xiaoshui Huang and Rynson W. H. Lau and Wanli Ouyang and Wangmeng Zuo},\nyear={2023},\nurl={https://openreview.net/forum?id=4JVdg72e7f}\n}", "github": "", "project": "", "reviewers": "eMRp;LNqF;nqzW;1hPo", "site": "https://openreview.net/forum?id=4JVdg72e7f", "pdf_size": 2854982, "recommendation": "3;3;5;5", "confidence": "4;4;3;4", "correctness": "4;3;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "0;2;2;2", "wc_summary_paper": "103;85;101;77", "wc_strength_and_weaknesses": "208;189;110;338", "wc_clarity_quality_novelty_and_reproducibility": "22;109;13;81", "wc_summary_review": "32;42;7;44", "wc_review": "365;425;231;540", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 91.5, 10.897247358851684 ], "wc_strength_and_weaknesses_avg": [ 211.25, 81.88826228465224 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.25, 40.12091100660602 ], "wc_summary_review_avg": [ 31.25, 14.720309100015529 ], "wc_review_avg": [ 390.25, 111.38979980231584 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 166, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11271067648820224142&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Harbin Institute of Technology;Shanghai AI Laboratory", "aff_unique_dep": ";", "aff_unique_url": "http://www.hit.edu.cn/;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "HIT;SAIL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "4JoV9g5R1M", "title": "Emergent Communication with Attention", "track": "main", "status": "Reject", "tldr": "We study emergent language from attention agents with the referential game showing that their language is more compositional and interpretable.", "abstract": "To develop computational agents that can better communicate with others using their own emergent language, we endow the agents with an ability to focus their attention on particular concepts in the environment. Humans often understand a thing or scene as a composite of concepts and those concepts are further mapped onto words. We implement this intuition as attention mechanisms in Speaker and Listener agents in a referential game and show attention leads to more compositional and interpretable emergent language. We also demonstrate how attention helps us understand the learned communication protocol by investigating the attention weights associated with each message symbol and the alignment of attention weights between Speaker and Listener agents. Overall, our results suggest that attention is a promising mechanism for developing more human-like emergent language.", "keywords": "emergent communication;attention mechanism;compositionality;interpretability", "primary_area": "", "supplementary_material": "", "author": "Ryokan Ri;Ryo Ueda;Jason Naradowsky", "authorids": "~Ryokan_Ri1;~Ryo_Ueda1;~Jason_Naradowsky2", "gender": "M;M;M", "homepage": "https://ryou0634.github.io/;https://sites.google.com/view/ryo-ueda/;http://narad.github.io", "dblp": "254/9596;191/3366;47/7442", "google_scholar": "https://scholar.google.co.jp/citations?user=z9is5FAAAAAJ;https://scholar.google.co.jp/citations?user=4HULQlwAAAAJ;w4d5WRcAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ryokan_Ri1;~Ryo_Ueda1;~Jason_Naradowsky2", "aff": "The University of Tokyo, Tokyo Institute of Technology;The University of Tokyo;The University of Tokyo", "aff_domain": "u-tokyo.ac.jp;u-tokyo.ac.jp;u-tokyo.ac.jp", "position": "PhD student;MS student;Researcher", "bibtex": "@misc{\nri2023emergent,\ntitle={Emergent Communication with Attention},\nauthor={Ryokan Ri and Ryo Ueda and Jason Naradowsky},\nyear={2023},\nurl={https://openreview.net/forum?id=4JoV9g5R1M}\n}", "github": "", "project": "", "reviewers": "gZp6;kyNy;4G68;4TD5", "site": "https://openreview.net/forum?id=4JoV9g5R1M", "pdf_size": 1021281, "recommendation": "3;5;5;5", "confidence": "5;3;4;2", "correctness": "3;3;3;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "41;164;80;96", "wc_strength_and_weaknesses": "245;268;375;279", "wc_clarity_quality_novelty_and_reproducibility": "196;167;38;24", "wc_summary_review": "92;108;89;15", "wc_review": "574;707;582;414", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "443;270;778;763", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 95.25, 44.448706392874925 ], "wc_strength_and_weaknesses_avg": [ 291.75, 49.605317255310446 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 106.25, 76.10642219944386 ], "wc_summary_review_avg": [ 76.0, 35.9513560244951 ], "wc_review_avg": [ 569.25, 103.99849758530168 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 563.5, 215.91259805763997 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.7745966692414834, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5342810340863241975&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_campus_unique_index": "0", "aff_campus_unique": "Tokyo;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "id": "4Jq0XWCZQel", "title": "Neural Image Compression with a Diffusion-based Decoder", "track": "main", "status": "Reject", "tldr": "Diffusion-based neural image codec allowing smooth and competitive rate-distortion-perception traversal at test time.", "abstract": "Diffusion probabilistic models have recently achieved remarkable success in generating high quality image and video data. In this work, we build on this class of generative models and introduce a method for lossy compression of high resolution images. The resulting codec, which we call \\emph{DIffuson-based Residual Augmentation Codec (DIRAC)}, is the first neural codec to allow smooth traversal of the rate-distortion-perception tradeoff at test time, while obtaining competitive performance with GAN-based methods in perceptual quality. Furthermore, while sampling from diffusion probabilistic models is notoriously expensive, we show that in the compression setting the number of steps can be drastically reduced.", "keywords": "neural;image;lossy;compression;diffusion;gan;perceptual", "primary_area": "", "supplementary_material": "", "author": "Noor Fathima Khanum Mohamed Ghouse;Jens Petersen;Auke J. Wiggers;Tianlin Xu;Guillaume Sautiere", "authorids": "~Noor_Fathima_Khanum_Mohamed_Ghouse1;~Jens_Petersen2;~Auke_J._Wiggers1;~Tianlin_Xu1;~Guillaume_Sautiere1", "gender": "F;M;M;F;M", "homepage": ";https://jens.pe;https://aukejw.github.io/;https://github.com/tianlinxu312;", "dblp": "226/1856;;182/2485;;", "google_scholar": "M9BUCaUAAAAJ;https://scholar.google.de/citations?hl=en;https://scholar.google.nl/citations?user=rrwwB4cAAAAJ;KPrpfPsAAAAJ;Mou2yEEAAAAJ", "orcid": ";;;;", "linkedin": "noor-fathima;;;;guillaumesautiere/", "or_profile": "~Noor_Fathima_Khanum_Mohamed_Ghouse1;~Jens_Petersen2;~Auke_J._Wiggers1;~Tianlin_Xu1;~Guillaume_Sautiere1", "aff": "Qualcomm Inc, QualComm;Qualcomm AI Research;QualComm;Illumina AI Lab;Qualcomm Inc, QualComm", "aff_domain": "qti.qualcomm.com;qualcomm.com;qualcomm.com;illumina.com;qti.qualcomm.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher", "bibtex": "@misc{\nghouse2023neural,\ntitle={Neural Image Compression with a Diffusion-based Decoder},\nauthor={Noor Fathima Khanum Mohamed Ghouse and Jens Petersen and Auke J. Wiggers and Tianlin Xu and Guillaume Sautiere},\nyear={2023},\nurl={https://openreview.net/forum?id=4Jq0XWCZQel}\n}", "github": "", "project": "", "reviewers": "G974;Yvxp;gzCy", "site": "https://openreview.net/forum?id=4Jq0XWCZQel", "pdf_size": 8744391, "recommendation": "3;3;6", "confidence": "5;3;3", "correctness": "4;3;4", "technical_novelty": "2;1;2", "empirical_novelty": "2;1;3", "wc_summary_paper": "58;67;183", "wc_strength_and_weaknesses": "168;213;270", "wc_clarity_quality_novelty_and_reproducibility": "23;128;177", "wc_summary_review": "19;55;66", "wc_review": "268;463;696", "wc_reply_reviewers": "0;328;0", "wc_reply_authors": "378;562;555", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 102.66666666666667, 56.92294987280809 ], "wc_strength_and_weaknesses_avg": [ 217.0, 41.737273509418415 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 109.33333333333333, 64.24086619036895 ], "wc_summary_review_avg": [ 46.666666666666664, 20.07209228976613 ], "wc_review_avg": [ 475.6666666666667, 174.95967789433337 ], "wc_reply_reviewers_avg": [ 109.33333333333333, 154.6206828194584 ], "wc_reply_authors_avg": [ 498.3333333333333, 85.13649171901683 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5000000000000001, "corr_recommendation_correctness": 0.5, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6024459467569198037&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Qualcomm Incorporated;Qualcomm;Illumina", "aff_unique_dep": ";Qualcomm AI Research;AI Lab", "aff_unique_url": "https://www.qualcomm.com;https://www.qualcomm.com/research;https://www.illumina.com", "aff_unique_abbr": "Qualcomm;QAI;Illumina", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "4K2SRejNGEI", "title": "Boosting Drug-Target Affinity Prediction from Nearest Neighbors", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Precisely predicting Drug-Target binding Affinity (DTA) is essential for drug discovery. \nRecently, deep learning methods have been popular with DTA prediction. However, the prediction accuracy is still far from satisfaction. \nIn this work, inspired by the recent success of retrieval methods, we propose $k$NN-DTA, a non-parametric embedding-based retrieval method adopted on a pre-trained DTA prediction model, which can extend the power of the neural DTA model with no or negligible cost. \nCompared to traditional chemical similarity retrieval, our embedding-based retrieval shows extremely high efficiency.\nDifferent from existing methods, we introduce two neighbor aggregation ways from both embedding space and label space that are integrated in a unified framework. \nSpecifically, we propose a \\emph{label aggregation} with \\emph{pair-wise retrieval} and a \\emph{representation aggregation} with \\emph{point-wise retrieval} of the nearest neighbors. \nThis method executes in the inference phase and can efficiently boost the DTA prediction performance with no training cost.\nIn addition, we propose an extension, Ada-$k$NN-DTA, an instance-wise and adaptive aggregation with lightweight learning.\nResults on four benchmark datasets show that $k$NN-DTA brings significant improvements, outperforming previous state-of-the-art (SOTA) results, e.g, on BindingDB IC$_{50}$ and $K_i$ testbeds, $k$NN-DTA obtains new records of RMSE scores $\\bf{0.687}$ and $\\bf{0.748}$ with both $\\bf{4}$ point improvement. \nThe extended Ada-$k$NN-DTA can further improve the performance, e.g., another $\\bf{1}$ point gain on BindingDB. These results strongly prove the effectiveness and efficiency of our method.\nResults on other settings and comprehensive studies/analyses also show the great potential of our $k$NN-DTA approach.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Qizhi Pei;Lijun Wu;Jinhua Zhu;Zhenyu He;Yingce Xia;Shufang Xie;Tao Qin;Rui Yan;Tie-Yan Liu", "authorids": "~Qizhi_Pei1;~Lijun_Wu1;~Jinhua_Zhu1;~Zhenyu_He3;~Yingce_Xia1;~Shufang_Xie1;~Tao_Qin1;~Rui_Yan2;~Tie-Yan_Liu1", "gender": ";M;M;M;M;M;M;M;M", "homepage": "https://qizhipei.github.io/;https://apeterswu.github.io/;https://github.com/teslacool;https://zhenyuhe00.github.io/;https://www.microsoft.com/en-us/research/people/yinxia/;;https://www.microsoft.com/en-us/research/people/taoqin/;https://gsai.ruc.edu.cn/english/ruiyan;http://member.acm.org/~tieyanliu", "dblp": "322/9716;68/1284-3;18/1965-1;355/4626;http://dblp.uni-trier.de/pers/hd/x/Xia:Yingce;https://dblp.uni-trier.de/pid/163/2704-3;14/6841;19/2405-1;l/TieYanLiu", "google_scholar": "sf3xGU8AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=FvGy0LQAAAAJ;https://scholar.google.co.jp/citations?user=bKwkUO4AAAAJ;GS5wRxYAAAAJ;;Bl4SRU0AAAAJ;eLw6g-UAAAAJ;Nh832fgAAAAJ", "orcid": "0000-0002-7242-422X;0000-0002-3530-590X;0000-0003-2157-9077;;;;;0000-0002-3356-6823;0000-0002-0476-8020", "linkedin": "%E5%90%AF%E6%99%BA-%E8%A3%B4-680192218/en?trk=people-guest_people_search-card;lijun-wu-59340478/;;;;;;;", "or_profile": "~Qizhi_Pei1;~Lijun_Wu1;~Jinhua_Zhu1;~Zhenyu_He3;~Yingce_Xia1;~Shufang_Xie1;~Tao_Qin1;~Rui_Yan2;~Tie-Yan_Liu1", "aff": "Microsoft;Microsoft Research;University of Science and Technology of China;University of Electronic Science and Technology of China;Microsoft;Renmin University of China;;Renmin University of China;Microsoft", "aff_domain": "microsoft.com;microsoft.com;ustc.edu.cn;uestc.edu.cn;microsoft.com;ruc.edu.cn;;ruc.edu.cn;microsoft.com", "position": "Intern;Researcher;PhD student;Undergrad student;Researcher;PhD student;;Associate Professor;Distinguished Scientist", "bibtex": "@misc{\npei2023boosting,\ntitle={Boosting Drug-Target Affinity Prediction from Nearest Neighbors},\nauthor={Qizhi Pei and Lijun Wu and Jinhua Zhu and Zhenyu He and Yingce Xia and Shufang Xie and Tao Qin and Rui Yan and Tie-Yan Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=4K2SRejNGEI}\n}", "github": "", "project": "", "reviewers": "GJwX;gADj;qna3", "site": "https://openreview.net/forum?id=4K2SRejNGEI", "pdf_size": 1554318, "recommendation": "3;3;5", "confidence": "4;4;4", "correctness": "2;2;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "31;111;70", "wc_strength_and_weaknesses": "200;109;255", "wc_clarity_quality_novelty_and_reproducibility": "48;548;35", "wc_summary_review": "23;30;39", "wc_review": "302;798;399", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "659;1832;1721", "reply_reviewers": "0;0;0", "reply_authors": "2;4;4", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 70.66666666666667, 32.6632651290236 ], "wc_strength_and_weaknesses_avg": [ 188.0, 60.20520464766037 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 210.33333333333334, 238.8253662313858 ], "wc_summary_review_avg": [ 30.666666666666668, 6.548960901462833 ], "wc_review_avg": [ 499.6666666666667, 214.6381968698851 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1404.0, 528.7400117259899 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gUj-6y2xBdMJ:scholar.google.com/&scioq=Boosting+Drug-Target+Affinity+Prediction+from+Nearest+Neighbors&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;0;3;3;0", "aff_unique_norm": "Microsoft;University of Science and Technology of China;University of Electronic Science and Technology of China;Renmin University of China", "aff_unique_dep": "Microsoft Corporation;;;", "aff_unique_url": "https://www.microsoft.com;http://www.ustc.edu.cn;https://www.uestc.edu.cn;http://www.ruc.edu.cn", "aff_unique_abbr": "Microsoft;USTC;UESTC;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;1;1;0", "aff_country_unique": "United States;China" }, { "title": "Robust Fair Clustering: A Novel Fairness Attack and Defense Framework", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11957", "id": "4LMIZY7gt7h", "poster": "/media/PosterPDFs/ICLR%202023/11957.png?t=1681494006.954899", "openreview": "https://openreview.net/forum?id=4LMIZY7gt7h", "slides": "https://iclr.cc/virtual/2023/poster/11957", "video": "https://iclr.cc/virtual/2023/poster/11957", "author_site": "Anshuman Chhabra, Peizhao Li, Prasant Mohapatra, Hongfu Liu", "tldr": "We propose a highly effective & novel fairness attack against state-of-the-art fair clustering models, & for self-completeness, we propose a defense framework based on consensus clustering & graph representation learning that is robust to our attack.", "abstract": "Clustering algorithms are widely used in many societal resource allocation applications, such as loan approvals and candidate recruitment, among others, and hence, biased or unfair model outputs can adversely impact individuals that rely on these applications. To this end, many $\\textit{fair}$ clustering approaches have been recently proposed to counteract this issue. Due to the potential for significant harm, it is essential to ensure that fair clustering algorithms provide consistently fair outputs even under adversarial influence. However, fair clustering algorithms have not been studied from an adversarial attack perspective. In contrast to previous research, we seek to bridge this gap and conduct a robustness analysis against fair clustering by proposing a novel $\\textit{black-box fairness attack}$. Through comprehensive experiments, we find that state-of-the-art models are highly susceptible to our attack as it can reduce their fairness performance significantly. Finally, we propose Consensus Fair Clustering (CFC), the first $\\textit{robust fair clustering}$ approach that transforms consensus clustering into a fair graph partitioning problem, and iteratively learns to generate fair cluster outputs. Experimentally, we observe that CFC is highly robust to the proposed attack and is thus a truly robust fair clustering alternative.", "keywords": "Data Clustering;Fairness Attack;Fairness Defense;Consensus Clustering", "primary_area": "", "supplementary_material": "", "author": "Anshuman Chhabra;Peizhao Li;Prasant Mohapatra;Hongfu Liu", "authorids": "~Anshuman_Chhabra1;~Peizhao_Li1;~Prasant_Mohapatra1;~Hongfu_Liu2", "gender": "M;M;M;M", "homepage": "https://anshumanc.com;https://peizhaoli.com;https://faculty.engineering.ucdavis.edu/mohapatra/;http://hongfuliu.com/", "dblp": "199/8940;232/1771;m/Prasant_Mohapatra2.html;32/9075-1", "google_scholar": "https://scholar.google.co.in/citations?user=1U7Zy7sAAAAJ;h8UyqB4AAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-9376-2896;;0000-0002-2768-5308;", "linkedin": "anshuman-chhabra-860a8411a/;peizhao-li-099037182/;;", "or_profile": "~Anshuman_Chhabra1;~Peizhao_Li1;~Prasant_Mohapatra1;~Hongfu_Liu2", "aff": "University of California, Davis;Brandeis University;University of California, Davis;Brandeis University", "aff_domain": "ucdavis.edu;brandeis.edu;ucdavis.edu;brandeis.edu", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nchhabra2023robust,\ntitle={Robust Fair Clustering: A Novel Fairness Attack and Defense Framework},\nauthor={Anshuman Chhabra and Peizhao Li and Prasant Mohapatra and Hongfu Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4LMIZY7gt7h}\n}", "github": "", "project": "", "reviewers": "EUnH;UuYT;HmWf;XY3W", "pdf_size": 3277486, "recommendation": "6;6;8;8", "confidence": "1;4;4;2", "correctness": "3;4;4;3", "technical_novelty": "3;3;4;2", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "45;85;101;29", "wc_strength_and_weaknesses": "65;70;205;152", "wc_clarity_quality_novelty_and_reproducibility": "14;68;271;47", "wc_summary_review": "35;72;63;44", "wc_review": "159;295;640;272", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "463;600;1761;728", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;3;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 2.75, 1.299038105676658 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 65.0, 29.120439557122072 ], "wc_strength_and_weaknesses_avg": [ 123.0, 58.6046073274107 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 100.0, 100.58578428386389 ], "wc_summary_review_avg": [ 53.5, 14.705441169852742 ], "wc_review_avg": [ 341.5, 179.861752465609 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 888.0, 512.6641200630291 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.19245008972987523, "corr_recommendation_correctness": 0.0, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11061965317912660967&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=4LMIZY7gt7h", "email": "ucdavis.edu;brandeis.edu;ucdavis.edu;brandeis.edu", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of California, Davis;Brandeis University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucdavis.edu;https://www.brandeis.edu", "aff_unique_abbr": "UC Davis;Brandeis", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Davis;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Equivariant Shape-Conditioned Generation of 3D Molecules for Ligand-Based Drug Design", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11314", "id": "4MbGnp4iPQ", "poster": "/media/PosterPDFs/ICLR%202023/11314.png?t=1682108634.6946483", "openreview": "https://openreview.net/forum?id=4MbGnp4iPQ", "slides": "https://iclr.cc/virtual/2023/poster/11314", "video": "https://iclr.cc/virtual/2023/poster/11314", "author_site": "Keir Adams, Connor Coley", "tldr": "We develop a shape-conditioned 3D generative model for ligand-based drug design", "abstract": "Shape-based virtual screening is widely used in ligand-based drug design to search chemical libraries for molecules with similar 3D shapes yet novel 2D graph structures compared to known ligands. 3D deep generative models can potentially automate this exploration of shape-conditioned 3D chemical space; however, no existing models can reliably generate geometrically realistic drug-like molecules in conformations with a specific shape. We introduce a new multimodal 3D generative model that enables shape-conditioned 3D molecular design by equivariantly encoding molecular shape and variationally encoding chemical identity. We ensure local geometric and chemical validity of generated molecules by using autoregressive fragment-based generation with heuristic bonding geometries, allowing the model to prioritize the scoring of rotatable bonds to best align the growing conformation to the target shape. We evaluate our 3D generative model in tasks relevant to drug design including shape-conditioned generation of chemically diverse molecular structures and shape-constrained molecular property optimization, demonstrating its utility over virtual screening of enumerated libraries.", "keywords": "molecules;equivariance;generation", "primary_area": "", "supplementary_material": "/attachment/722ee67e5fda2af8492e6f1706d0dc1d7ef90f66.zip", "author": "Keir Adams;Connor W. Coley", "authorids": "~Keir_Adams1;~Connor_W._Coley1", "gender": "M;M", "homepage": ";https://coley.mit.edu", "dblp": ";206/6284", "google_scholar": "eh75v58AAAAJ;l015S80AAAAJ", "orcid": "0000-0001-9035-7959;0000-0002-8271-8723", "linkedin": "keir-adams-584675167/;", "or_profile": "~Keir_Adams1;~Connor_Coley1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nadams2023equivariant,\ntitle={Equivariant Shape-Conditioned Generation of 3D Molecules for Ligand-Based Drug Design},\nauthor={Keir Adams and Connor W. Coley},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4MbGnp4iPQ}\n}", "github": "", "project": "", "reviewers": "TAEy;UfVq;vKuJ;bX75", "pdf_size": 52313324, "recommendation": "6;6;6;6", "confidence": "3;4;4;5", "correctness": "3;3;3;3", "technical_novelty": "4;2;3;3", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "45;74;92;37", "wc_strength_and_weaknesses": "119;293;718;228", "wc_clarity_quality_novelty_and_reproducibility": "137;67;30;22", "wc_summary_review": "228;39;14;40", "wc_review": "529;473;854;327", "wc_reply_reviewers": "0;19;241;38", "wc_reply_authors": "1851;1333;3450;1467", "reply_reviewers": "0;1;1;1", "reply_authors": "3;2;6;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 62.0, 22.124646889837585 ], "wc_strength_and_weaknesses_avg": [ 339.5, 227.1987015807969 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.0, 45.436769251345325 ], "wc_summary_review_avg": [ 80.25, 85.93711363549511 ], "wc_review_avg": [ 545.75, 192.64134421250284 ], "wc_reply_reviewers_avg": [ 74.5, 97.06312379065491 ], "wc_reply_authors_avg": [ 2025.25, 844.264287708535 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1321623885969293517&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=4MbGnp4iPQ", "email": "mit.edu;mit.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "4NLyCJQR3ZR", "title": "Optimal Neural Network Approximation of Wasserstein Gradient Direction via Convex Optimization", "track": "main", "status": "Reject", "tldr": "Wasserstein gradient descent meets neural networks and convex optimization", "abstract": "The computation of Wasserstein gradient direction is essential for posterior sampling problems and scientific computing. The approximation of the Wasserstein gradient with finite samples requires solving a variational problem. We study the variational problem in the family of two-layer networks with squared-ReLU activations, towards which we derive a semi-definite programming (SDP) relaxation. This SDP can be viewed as an approximation of the Wasserstein gradient in a broader function family including two-layer networks. By solving the convex SDP, we obtain the optimal approximation of the Wasserstein gradient direction in this class of functions. Numerical experiments including PDE-constrained Bayesian inference and parameter estimation in COVID-19 modeling demonstrate the effectiveness of the proposed method.", "keywords": "Bayesian inference;convex optimization;semi-definite programming", "primary_area": "", "supplementary_material": "", "author": "Yifei Wang;Peng Chen;Mert Pilanci;Wuchen Li", "authorids": "~Yifei_Wang2;~Peng_Chen1;~Mert_Pilanci3;~Wuchen_Li1", "gender": "M;;M;M", "homepage": "http://web.stanford.edu/~wangyf18/;;https://stanford.edu/~pilanci/;https://people.math.sc.edu/wuchen/index.html", "dblp": ";;45/8056;138/1749", "google_scholar": ";;aSAS-aAAAAAJ;rlAIMRMAAAAJ", "orcid": ";;;", "linkedin": ";;mert-pilanci-ba615743/;", "or_profile": "~Yifei_Wang2;~Peng_Chen1;~Mert_Pilanci3;~Wuchen_Li1", "aff": "Stanford University;;Stanford University;University of South Carolina", "aff_domain": "stanford.edu;;stanford.edu;sc.edu", "position": "PhD student;;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nwang2023optimal,\ntitle={Optimal Neural Network Approximation of Wasserstein Gradient Direction via Convex Optimization},\nauthor={Yifei Wang and Peng Chen and Mert Pilanci and Wuchen Li},\nyear={2023},\nurl={https://openreview.net/forum?id=4NLyCJQR3ZR}\n}", "github": "", "project": "", "reviewers": "mWPZ;efky;4MPo", "site": "https://openreview.net/forum?id=4NLyCJQR3ZR", "pdf_size": 593672, "recommendation": "3;5;5", "confidence": "4;3;2", "correctness": "3;3;4", "technical_novelty": "3;2;2", "empirical_novelty": "1;3;2", "wc_summary_paper": "48;140;30", "wc_strength_and_weaknesses": "130;346;129", "wc_clarity_quality_novelty_and_reproducibility": "2;198;29", "wc_summary_review": "24;767;19", "wc_review": "204;1451;207", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 72.66666666666667, 48.175604707039106 ], "wc_strength_and_weaknesses_avg": [ 201.66666666666666, 102.05989526852466 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 76.33333333333333, 86.73458876877719 ], "wc_summary_review_avg": [ 270.0, 351.4379983249772 ], "wc_review_avg": [ 620.6666666666666, 587.1356080346534 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8660254037844385, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11532592907739585177&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;1", "aff_unique_norm": "Stanford University;University of South Carolina", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.sc.edu", "aff_unique_abbr": "Stanford;USC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "4NT3umNU3D0", "title": "Backdoor or Feature? A New Perspective on Data Poisoning", "track": "main", "status": "Reject", "tldr": "A new theoretical foundation of data poisoning, with a theory inspired defense algorithm", "abstract": "In a backdoor attack, an adversary adds maliciously constructed (\"backdoor\") examples into a training set to make the resulting model\nvulnerable to manipulation. Defending against such attacks---that is, finding and removing the backdoor examples---typically involves viewing these examples as outliers and using techniques from robust statistics to detect and remove them.\n\nIn this work, we present a new perspective on backdoor attacks. We argue that without structural information on the training data distribution, backdoor attacks are indistinguishable from naturally-occuring features in the data (and thus impossible to ``detect'' in a general sense). To circumvent this impossibility, we assume that a backdoor attack corresponds to the strongest feature in the training data. Under this assumption---which we make formal---we develop a new framework for detecting backdoor attacks. Our framework naturally gives rise to a corresponding algorithm whose efficacy we show both theoretically and experimentally.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Alaa Khaddaj;Guillaume Leclerc;Aleksandar Makelov;Kristian Georgiev;Andrew Ilyas;Hadi Salman;Aleksander Madry", "authorids": "~Alaa_Khaddaj1;~Guillaume_Leclerc1;~Aleksandar_Makelov1;~Kristian_Georgiev1;~Andrew_Ilyas1;~Hadi_Salman1;~Aleksander_Madry1", "gender": ";M;M;M;M;M;M", "homepage": ";;http://andrewilyas.com;https://hadisalman.com/;https://people.csail.mit.edu/madry/;https://amakelov.github.io/;https://kristian-georgiev.github.io/", "dblp": ";183/9387;156/5465;192/3204;67/2454;202/2447;304/2868", "google_scholar": "BA1kFjMAAAAJ;;Dtw3YBoAAAAJ;Kr8JjF0AAAAJ;SupjsEUAAAAJ;haO4sKoAAAAJ;t8RKSJsAAAAJ", "orcid": ";;;;;;0000-0003-4802-1962", "linkedin": "alaa-khaddaj;;;;;;", "or_profile": "~Alaa_Khaddaj1;~Guillaume_Leclerc1;~Andrew_Ilyas1;~Hadi_Salman1;~Aleksander_Madry1;~Aleksandar_Aleksandrov_Makelov1;~Kristian_Georgiev_Georgiev1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Independent;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;amakelov.github.io;mit.edu", "position": "PhD student;PhD student;PhD student;PhD Student;Professor;Researcher;PhD student", "bibtex": "@misc{\nkhaddaj2023backdoor,\ntitle={Backdoor or Feature? A New Perspective on Data Poisoning},\nauthor={Alaa Khaddaj and Guillaume Leclerc and Aleksandar Makelov and Kristian Georgiev and Andrew Ilyas and Hadi Salman and Aleksander Madry},\nyear={2023},\nurl={https://openreview.net/forum?id=4NT3umNU3D0}\n}", "github": "", "project": "", "reviewers": "9GLS;kC2s;RpQm;nR1F", "site": "https://openreview.net/forum?id=4NT3umNU3D0", "pdf_size": 2136571, "recommendation": "3;3;5;5", "confidence": "4;4;4;4", "correctness": "2;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "55;97;365;26", "wc_strength_and_weaknesses": "31;129;265;301", "wc_clarity_quality_novelty_and_reproducibility": "509;546;212;20", "wc_summary_review": "26;71;46;13", "wc_review": "621;843;888;360", "wc_reply_reviewers": "0;0;0;101", "wc_reply_authors": "752;837;442;245", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 135.75, 134.7430424919966 ], "wc_strength_and_weaknesses_avg": [ 181.5, 108.00347216640769 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 321.75, 217.0534208438098 ], "wc_summary_review_avg": [ 39.0, 21.89748844045819 ], "wc_review_avg": [ 678.0, 209.58172630265264 ], "wc_reply_reviewers_avg": [ 25.25, 43.73428289111415 ], "wc_reply_authors_avg": [ 569.0, 237.91700233484787 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2392066606766740190&as_sdt=800005&sciodt=0,15&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Independent", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;", "aff_unique_abbr": "MIT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "id": "4NWwhku4AEI", "title": "Robust Learning with Decoupled Meta Label Purifier", "track": "main", "status": "Withdraw", "tldr": "A method that decouples the label purification process into label-free representation learning and a simple meta label purifier.", "abstract": "Training deep neural networks (DNN) with noisy labels is challenging since DNN can easily memorize inaccurate labels, leading to poor generalization ability. Recently, the meta-learning based label correction strategy is widely adopted to tackle this problem via identifying and correcting potential noisy labels with the help of a small set of clean validation data. Although training with purified labels can effectively improve performance, solving the meta-learning problem inevitably involves a nested loop of bi-level optimization between model weights and hyper-parameters (i.e., label distribution). As compromise, previous methods resort to a coupled learning process with alternating update. In this paper, we empirically find such simultaneous optimization over both model weights and label distribution can not achieve an optimal routine, consequently limiting the representation ability of backbone and accuracy of corrected labels. From this observation, a novel multi-stage label purifier named DMLP is proposed. DMLP decouples the label correction process into label-free representation learning and a simple meta label purifier, In this way, DMLP can focus on extracting discriminative feature and label correction in two distinctive stages. DMLP is a plug-and-play label purifier, the purified labels can be directly reused in naive end-to-end network retraining or other robust learning methods, where state-of-the-art results are obtained on several synthetic and real-world noisy datasets, especially under high noise levels.", "keywords": "Learning with Noisy labels;Decoupled Optimization;Meta Learning", "primary_area": "", "supplementary_material": "/attachment/8f017213c609d67d293ec3d846c18fb265b00c2b.zip", "author": "Yuanpeng Tu;Boshen Zhang;Yuxi Li;Liang Liu;Jian Li;Yabiao Wang;Chengjie Wang;Cai Rong Zhao", "authorids": "~Yuanpeng_Tu1;~Boshen_Zhang1;~Yuxi_Li2;~Liang_Liu5;~Jian_Li12;~Yabiao_Wang1;~Chengjie_Wang1;~Cai_Rong_Zhao1", "gender": "M;M;M;M;M;;M;M", "homepage": "https://github.com/helloTongji;https://openreview.net/profile?id=~Boshen_Zhang1;https://github.com/lyxok1;;https://swordlidev.github.io/;;;https://vill.tongji.edu.cn/", "dblp": ";145/3980;;10/6178-7.html;33/5448-62;;;81/8614", "google_scholar": "https://scholar.google.com.sg/citations?user=a70oH2wAAAAJ;GOnKOMcAAAAJ;-24oYQoAAAAJ;Kkg3IPMAAAAJ;ACb5C40AAAAJ;;fqte5H4AAAAJ;z-XzWZcAAAAJ", "orcid": ";0000-0001-9204-5676;;;0000-0002-0242-6481;;0000-0003-4216-8090;0000-0001-6745-9674", "linkedin": ";;;;;;;", "or_profile": "~Yuanpeng_Tu1;~Boshen_Zhang1;~Yuxi_Li2;~Liang_Liu5;~Jian_Li12;~Yabiao_Wang1;~Chengjie_Wang1;~Cai_Rong_Zhao1", "aff": "Tongji University;Tencent;Tencent Youtu Lab;Tencent Youtu Lab;Tencent Youtu;;Tencent YouTu Lab;Tongji University", "aff_domain": "tongji.edu.cn;tencent.com;tencent.com;tencent.com;tencent.com;;tencent.com;tongji.edu.cn", "position": "MS student;Researcher;Researcher;Researcher;Researcher;;Researcher;Full Professor", "bibtex": "@misc{\ntu2023robust,\ntitle={Robust Learning with Decoupled Meta Label Purifier},\nauthor={Yuanpeng Tu and Boshen Zhang and Yuxi Li and Liang Liu and Jian Li and Yabiao Wang and Chengjie Wang and Cai Rong Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=4NWwhku4AEI}\n}", "github": "", "project": "", "reviewers": "USBq;TiU1;Zo6h;b363", "site": "https://openreview.net/forum?id=4NWwhku4AEI", "pdf_size": 11244884, "recommendation": "3;5;6;8", "confidence": "5;4;3;2", "correctness": "2;2;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "41;68;73;105", "wc_strength_and_weaknesses": "236;476;157;81", "wc_clarity_quality_novelty_and_reproducibility": "51;13;95;45", "wc_summary_review": "56;112;13;18", "wc_review": "384;669;338;249", "wc_reply_reviewers": "145;0;0;0", "wc_reply_authors": "1059;667;373;27", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 71.75, 22.730761095924613 ], "wc_strength_and_weaknesses_avg": [ 237.5, 148.2034075181809 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.0, 29.223278392404914 ], "wc_summary_review_avg": [ 49.75, 39.60034722069997 ], "wc_review_avg": [ 410.0, 157.21164079036896 ], "wc_reply_reviewers_avg": [ 36.25, 62.7868417743718 ], "wc_reply_authors_avg": [ 531.5, 379.55862524780014 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.9922778767136676, "corr_recommendation_correctness": 0.8320502943378437, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QgE8C7kiO3cJ:scholar.google.com/&scioq=Robust+Learning+with+Decoupled+Meta+Label+Purifier&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;1;1;0", "aff_unique_norm": "Tongji University;Tencent", "aff_unique_dep": ";Tencent Holdings Limited", "aff_unique_url": "https://www.tongji.edu.cn;https://www.tencent.com", "aff_unique_abbr": "Tongji;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "4O4eoAVEdIs", "title": "Why do Models with Conditional Computation Learn Suboptimal Solutions?", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Sparsely-activated neural networks with conditional computation learn to route their inputs through different subnetworks, providing a strong structural prior and reducing computational costs. Despite their possible benefits, models with learned routing often underperform their parameter-matched densely-activated counterparts as well as models that use non-learned heuristic routing strategies. In this paper, we hypothesize that these shortcomings stem from the gradient estimation techniques used to train sparsely-activated models with non-differentiable discrete routing decisions. To test this hypothesis, we evaluate the performance of sparsely-activated models trained with various gradient estimation techniques in three settings where a high-quality heuristic routing strategy can be designed. Our experiments reveal that learned routing reaches substantially different (and worse) solutions than heuristic routing in various settings. As a first step towards remedying this gap, we demonstrate that supervising the routing decision on a small fraction of the examples is sufficient to help the model to learn better routing strategies. Our results shed light on the difficulties of learning effective routing and set the stage for future work on conditional computation mechanisms and training techniques.", "keywords": "neural networks;conditional computation;gradient estimation", "primary_area": "", "supplementary_material": "/attachment/9645a01392851793fc912aa9e8ce981e7c8abc88.zip", "author": "Muqeeth Mohammed;Haokun Liu;Colin Raffel", "authorids": "~Muqeeth_Mohammed1;~Haokun_Liu1;~Colin_Raffel1", "gender": ";;", "homepage": "https://haokunliu.github.io/;http://colinraffel.com;https://muqeeth.github.io", "dblp": "169/0460;149/0082;320/4437", "google_scholar": "T3dz_MQAAAAJ;I66ZBYwAAAAJ;dsAzIX4AAAAJ", "orcid": ";;", "linkedin": ";;muqeeth-mohammed/", "or_profile": "~Haokun_Liu1;~Colin_Raffel1;~Mohammed_Muqeeth1", "aff": "Department of Computer Science, University of North Carolina, Chapel Hill;University of North Carolina, Chapel Hill;University of North Carolina at Chapel Hill", "aff_domain": "cs.unc.edu;unc.edu;cs.unc.edu", "position": "PhD student;Assistant Professor;MS student", "bibtex": "@misc{\nmohammed2023why,\ntitle={Why do Models with Conditional Computation Learn Suboptimal Solutions?},\nauthor={Muqeeth Mohammed and Haokun Liu and Colin Raffel},\nyear={2023},\nurl={https://openreview.net/forum?id=4O4eoAVEdIs}\n}", "github": "", "project": "", "reviewers": "RCbM;Sn59;a3VV;YEGR", "site": "https://openreview.net/forum?id=4O4eoAVEdIs", "pdf_size": 556706, "recommendation": "3;3;3;5", "confidence": "2;4;3;4", "correctness": "2;3;1;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "113;97;107;92", "wc_strength_and_weaknesses": "204;235;163;211", "wc_clarity_quality_novelty_and_reproducibility": "13;67;45;68", "wc_summary_review": "18;167;130;83", "wc_review": "348;566;445;454", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 102.25, 8.227241335952167 ], "wc_strength_and_weaknesses_avg": [ 203.25, 25.926579026165406 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.25, 22.331312097590683 ], "wc_summary_review_avg": [ 99.5, 55.679888649313945 ], "wc_review_avg": [ 453.25, 77.23138934397076 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5222329678670935, "corr_recommendation_correctness": 0.5222329678670935, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9598364795799594124&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of North Carolina", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.unc.edu", "aff_unique_abbr": "UNC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "4OS-U1a5kB-", "title": "Safe Reinforcement Learning with Contrastive Risk Prediction", "track": "main", "status": "Reject", "tldr": "We propose a contrastive risk prediction method to train safe RL agents with risk preventive trajectory exploration and reward shaping.", "abstract": "As safety violations can lead to severe consequences in real-world applications, the increasing deployment of Reinforcement Learning (RL) in safety-critical domains such as robotics has propelled the study of safe exploration for reinforcement learning (safe RL). In this work, we propose a risk preventive training method for safe RL, which learns a statistical contrastive classifier to predict the probability of a state-action pair leading to unsafe states. Based on the predicted risk probabilities, we can collect risk preventive trajectories and reshape the reward function with risk penalties to induce safe RL policies. We conduct experiments in robotic simulation environments. The results show the proposed approach has comparable performance with the state-of-the-art model-based methods and outperforms conventional model-free safe RL approaches.\n", "keywords": "safe reinforcement learning;contrastive risk prediction", "primary_area": "", "supplementary_material": "", "author": "Hanping Zhang;Yuhong Guo", "authorids": "~Hanping_Zhang1;~Yuhong_Guo1", "gender": "M;", "homepage": "https://jajajag.github.io/;", "dblp": "230/3460;", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Hanping_Zhang1;~Yuhong_Guo1", "aff": "Carleton University;", "aff_domain": "carleton.ca;", "position": "PhD student;", "bibtex": "@misc{\nzhang2023safe,\ntitle={Safe Reinforcement Learning with Contrastive Risk Prediction},\nauthor={Hanping Zhang and Yuhong Guo},\nyear={2023},\nurl={https://openreview.net/forum?id=4OS-U1a5kB-}\n}", "github": "", "project": "", "reviewers": "aGao;jp1h;mSBr", "site": "https://openreview.net/forum?id=4OS-U1a5kB-", "pdf_size": 1156695, "recommendation": "3;5;6", "confidence": "4;4;4", "correctness": "2;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;1;2", "wc_summary_paper": "68;91;32", "wc_strength_and_weaknesses": "506;530;451", "wc_clarity_quality_novelty_and_reproducibility": "176;108;39", "wc_summary_review": "52;92;37", "wc_review": "802;821;559", "wc_reply_reviewers": "0;107;209", "wc_reply_authors": "782;809;919", "reply_reviewers": "0;1;2", "reply_authors": "2;2;2", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 63.666666666666664, 24.280765135299085 ], "wc_strength_and_weaknesses_avg": [ 495.6666666666667, 33.06895153396242 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 107.66666666666667, 55.93051244376563 ], "wc_summary_review_avg": [ 60.333333333333336, 23.213980461973534 ], "wc_review_avg": [ 727.3333333333334, 119.28211191214811 ], "wc_reply_reviewers_avg": [ 105.33333333333333, 85.33203124006573 ], "wc_reply_authors_avg": [ 836.6666666666666, 59.25275427259807 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9449111825230683, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14595248330630601870&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "Carleton University", "aff_unique_dep": "", "aff_unique_url": "https://carleton.ca", "aff_unique_abbr": "Carleton", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "id": "4Ox_yJWZP56", "title": "Exploring Methods for Parsing Movie Scripts - Feature Extraction for Further Social Injustice Analysis", "track": "main", "status": "Withdraw", "tldr": "An exploration of methods to parse movie scripts ", "abstract": "When it comes to analysing movie scripts for things like bias and given the variation of movie script formatting due to inconsistencies by the authors, it is important that we create methods that can help extract all the relevant features required for any further analysis. In this paper, we discuss multiple parsing techniques that can be used to extract features and understand the structure of movie scripts in an automated fashion. We compare and contrast the accuracy and time of a rule based and a variety of machine learning approaches including; Deep Neural Networks, Decision Tress and BERT for sequence classification model. ", "keywords": "Movie Parsing;Script Parsing;Parsers;IMSDB;Deep Neural Networks;Discussion Tree;BERT Parser", "primary_area": "", "supplementary_material": "", "author": "Gwilym Newton;Lamogha Chiazor", "authorids": "~Gwilym_Newton1;~Lamogha_Chiazor1", "gender": "M;F", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": "https://uk.linkedin.com/in/gwilym-newton-91774659;lamogha/", "or_profile": "~Gwilym_Newton1;~Lamogha_Chiazor1", "aff": "International Business Machines;International Business Machines", "aff_domain": "ibm.com;ibm.com", "position": "Researcher;Researcher", "bibtex": "@misc{\nnewton2023exploring,\ntitle={Exploring Methods for Parsing Movie Scripts - Feature Extraction for Further Social Injustice Analysis},\nauthor={Gwilym Newton and Lamogha Chiazor},\nyear={2023},\nurl={https://openreview.net/forum?id=4Ox_yJWZP56}\n}", "github": "", "project": "", "reviewers": "4XRJ;U4Vu;UeJ2", "site": "https://openreview.net/forum?id=4Ox_yJWZP56", "pdf_size": 1723793, "recommendation": "3;3;5", "confidence": "5;5;4", "correctness": "3;2;3", "technical_novelty": "1;1;2", "empirical_novelty": "1;1;3", "wc_summary_paper": "66;47;53", "wc_strength_and_weaknesses": "118;192;8", "wc_clarity_quality_novelty_and_reproducibility": "30;16;184", "wc_summary_review": "19;21;86", "wc_review": "233;276;331", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 1.3333333333333333, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.9428090415820634 ], "wc_summary_paper_avg": [ 55.333333333333336, 7.93025150224688 ], "wc_strength_and_weaknesses_avg": [ 106.0, 75.59541432300419 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 76.66666666666667, 76.11103000806708 ], "wc_summary_review_avg": [ 42.0, 31.123410267299864 ], "wc_review_avg": [ 280.0, 40.10818702792071 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PGoXdXGFL9MJ:scholar.google.com/&scioq=Exploring+Methods+for+Parsing+Movie+Scripts+-+Feature+Extraction+for+Further+Social+Injustice+Analysis&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "International Business Machines Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.ibm.com", "aff_unique_abbr": "IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Generative Modelling with Inverse Heat Dissipation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11661", "id": "4PJUBT9f2Ol", "poster": "", "openreview": "https://openreview.net/forum?id=4PJUBT9f2Ol", "slides": "https://iclr.cc/virtual/2023/poster/11661", "video": "https://iclr.cc/virtual/2023/poster/11661", "author_site": "Severi Rissanen, Markus Heinonen, Arno Solin", "tldr": "We propose a generative model that iteratively reverses the heat equation, increasing the effective resolution of the image", "abstract": "While diffusion models have shown great success in image generation, their noise-inverting generative process does not explicitly consider the structure of images, such as their inherent multi-scale nature. Inspired by diffusion models and the empirical success of coarse-to-fine modelling, we propose a new diffusion-like model that generates images through stochastically reversing the heat equation, a PDE that locally erases fine-scale information when run over the 2D plane of the image. We interpret the solution of the forward heat equation with constant additive noise as a variational approximation in the diffusion latent variable model. Our new model shows emergent qualitative properties not seen in standard diffusion models, such as disentanglement of overall colour and shape in images. Spectral analysis on natural images highlights connections to diffusion models and reveals an implicit coarse-to-fine inductive bias in them.", "keywords": "diffusion model;partial differential equation;inductive bias", "primary_area": "", "supplementary_material": "/attachment/bbe91026859118e5a32c1678d49be15fa58740c0.zip", "author": "Severi Rissanen;Markus Heinonen;Arno Solin", "authorids": "~Severi_Rissanen1;~Markus_Heinonen1;~Arno_Solin1", "gender": "M;M;", "homepage": ";https://users.aalto.fi/~heinom10/;http://arno.solin.fi", "dblp": ";22/7709;98/11225", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;hFtfHZoAAAAJ;U_fJCnAAAAAJ", "orcid": ";;0000-0002-0958-7886", "linkedin": ";;asolin/", "or_profile": "~Severi_Rissanen1;~Markus_Heinonen1;~Arno_Solin1", "aff": "Aalto University;Aalto University;Aalto University", "aff_domain": "aalto.fi;aalto.fi;aalto.fi", "position": "PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nrissanen2023generative,\ntitle={Generative Modelling with Inverse Heat Dissipation},\nauthor={Severi Rissanen and Markus Heinonen and Arno Solin},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4PJUBT9f2Ol}\n}", "github": "", "project": "", "reviewers": "ZA3r;D1k3;dZbG;kPGb", "pdf_size": 0, "recommendation": "5;6;6;8", "confidence": "3;4;4;4", "correctness": "3;4;3;3", "technical_novelty": "2;3;3;4", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "60;12;63;88", "wc_strength_and_weaknesses": "146;199;257;168", "wc_clarity_quality_novelty_and_reproducibility": "50;40;52;1", "wc_summary_review": "12;23;58;104", "wc_review": "268;274;430;361", "wc_reply_reviewers": "0;54;36;48", "wc_reply_authors": "478;495;688;140", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 55.75, 27.49886361288408 ], "wc_strength_and_weaknesses_avg": [ 192.5, 41.728287767412645 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.75, 20.571521577170707 ], "wc_summary_review_avg": [ 49.25, 35.88436288970448 ], "wc_review_avg": [ 333.25, 66.8931050258545 ], "wc_reply_reviewers_avg": [ 34.5, 20.946360065653412 ], "wc_reply_authors_avg": [ 450.25, 197.20088108322437 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.6622661785325219, "corr_recommendation_correctness": -0.13245323570650439, "gs_citation": 106, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15069610173074438195&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=4PJUBT9f2Ol", "email": "aalto.fi;aalto.fi;aalto.fi", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Aalto University", "aff_unique_dep": "", "aff_unique_url": "https://www.aalto.fi", "aff_unique_abbr": "Aalto", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Finland" }, { "id": "4QIgPD5BLnv", "title": "Diffusing Graph Attention", "track": "main", "status": "Reject", "tldr": "", "abstract": "The dominant paradigm for machine learning on graphs uses Message Passing Graph Neural Networks~(MP-GNNs), in which node representations are updated by aggregating information in their local neighborhood. Recently, there have been increasingly more attempts to adapt the Transformer architecture to graphs in an effort to solve some known limitations of MP-GNN. A challenging aspect of designing Graph Transformers is integrating the arbitrary graph structure into the architecture. We propose \\emph{Graph Diffuser}~(GD) to address this challenge. GD learns to extract structural and positional relationships between distant nodes in the graph, which it then uses to direct the Transformer's attention and node representation. We demonstrate that existing GNNs and Graph Transformers struggle to capture long-range interactions and how Graph Diffuser does so while admitting intuitive visualizations. Experiments on eight benchmarks show Graph Diffuser to be a highly competitive model, outperforming the state-of-the-art in a diverse set of domains.", "keywords": "Graph Transformer;graph neural networks;transformers;long-range context", "primary_area": "", "supplementary_material": "", "author": "Daniel Glickman;Eran Yahav", "authorids": "~Daniel_Glickman1;~Eran_Yahav1", "gender": ";M", "homepage": ";http://www.cs.technion.ac.il/~yahave/", "dblp": ";54/5133", "google_scholar": ";https://scholar.google.com.tw/citations?user=grAfX0MAAAAJ", "orcid": ";", "linkedin": "https://linkedin.com/in/daniel-glickman-7134a712b;", "or_profile": "~Daniel_Glickman1;~Eran_Yahav1", "aff": "Tel Aviv University;Technion, Technion", "aff_domain": "tau.ac.il;technion.ac.il", "position": "MS student;Associate Professor", "bibtex": "@misc{\nglickman2023diffusing,\ntitle={Diffusing Graph Attention},\nauthor={Daniel Glickman and Eran Yahav},\nyear={2023},\nurl={https://openreview.net/forum?id=4QIgPD5BLnv}\n}", "github": "", "project": "", "reviewers": "6Q8a;6mkR;1vG8;NF2T", "site": "https://openreview.net/forum?id=4QIgPD5BLnv", "pdf_size": 297067, "recommendation": "3;3;3;3", "confidence": "3;4;3;4", "correctness": "2;4;3;2", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "101;68;47;40", "wc_strength_and_weaknesses": "355;146;227;172", "wc_clarity_quality_novelty_and_reproducibility": "123;20;28;95", "wc_summary_review": "50;45;54;36", "wc_review": "629;279;356;343", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 64.0, 23.717082451262844 ], "wc_strength_and_weaknesses_avg": [ 225.0, 80.55122593728788 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.5, 43.729280808172454 ], "wc_summary_review_avg": [ 46.25, 6.722164829874376 ], "wc_review_avg": [ 401.75, 134.40121837245374 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17711513746430999779&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Tel Aviv University;Technion - Israel Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.tau.ac.il;https://www.technion.ac.il/en/", "aff_unique_abbr": "TAU;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "id": "4QTrtR181T", "title": "An alternative approach to train neural networks using monotone variational inequality", "track": "main", "status": "Withdraw", "tldr": "We investigate training neural networks with monotone variation inequality, yielding performance guarantees and competitive/better performance than widely-used stochastic gradient descent methods, especially during initial training phases.", "abstract": "The current paper investigates an alternative approach to neural network training, which is a non-convex optimization problem, through the lens of another convex problem \u2014 to solve a monotone variational inequality (MVI) - inspired by a recent work of (Juditsky and Nemirovski, 2019). MVI solutions can be found by computationally efficient procedures, with performance guarantee of $\\ell_2$ and $\\ell_{\\infty}$ bounds on model recovery and prediction accuracy under the theoretical setting of training a single-layer linear neural network. We study the use of MVI for training multi-layer neural networks by proposing a practical and completely general algorithm called \\textit{stochastic variational inequality} (\\texttt{SVI}). We demonstrate its applicability in training fully-connected neural networks, graph neural networks (GNN), and convolutional networks (CNN) (\\texttt{SVI} is completely general for training other network architectures). We show the competitive or better performance of \\texttt{SVI} compared to widely-used stochastic gradient descent methods on both synthetic and real network data prediction tasks regarding various performance metrics, especially in the improved efficiency in the early stage of training.", "keywords": "monotone variational inequality;graph neural networks;neural network training", "primary_area": "", "supplementary_material": "/attachment/3a8787603534549f30d7ef2a8f7bc139e77a14d3.zip", "author": "Chen Xu;Xiuyuan Cheng;Yao Xie", "authorids": "~Chen_Xu12;~Xiuyuan_Cheng1;~Yao_Xie2", "gender": "M;;F", "homepage": "https://hamrel-cxu.github.io/;;http://www2.isye.gatech.edu/~yxie77", "dblp": ";79/9747;13/4242-2", "google_scholar": "https://scholar.google.com/citations?hl=en;I2gwdssAAAAJ;qvYp8ZQAAAAJ", "orcid": ";;", "linkedin": "chen-xu-92013714a/;;yaoxie/", "or_profile": "~Chen_Xu12;~Xiuyuan_Cheng1;~Yao_Xie2", "aff": "Georgia Institute of Technology;Duke University;Georgia Institute of Technology", "aff_domain": "gatech.edu;duke.edu;gatech.edu", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@misc{\nxu2023an,\ntitle={An alternative approach to train neural networks using monotone variational inequality},\nauthor={Chen Xu and Xiuyuan Cheng and Yao Xie},\nyear={2023},\nurl={https://openreview.net/forum?id=4QTrtR181T}\n}", "github": "", "project": "", "reviewers": "PUVE;9g7F;9kww;Hgig;vGXo", "site": "https://openreview.net/forum?id=4QTrtR181T", "pdf_size": 2704162, "recommendation": "3;5;5;6;6", "confidence": "3;4;2;4;3", "correctness": "4;3;2;4;3", "technical_novelty": "2;2;2;2;2", "empirical_novelty": "0;2;2;3;2", "wc_summary_paper": "33;74;75;192;98", "wc_strength_and_weaknesses": "79;187;313;323;203", "wc_clarity_quality_novelty_and_reproducibility": "32;99;13;35;83", "wc_summary_review": "30;55;34;71;67", "wc_review": "174;415;435;621;451", "wc_reply_reviewers": "153;120;86;124;31", "wc_reply_authors": "673;459;774;496;29", "reply_reviewers": "2;1;1;1;1", "reply_authors": "4;3;3;1;1", "recommendation_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "correctness_avg": [ 3.2, 0.7483314773547882 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.8, 0.9797958971132712 ], "wc_summary_paper_avg": [ 94.4, 53.10593187206115 ], "wc_strength_and_weaknesses_avg": [ 221.0, 90.0133323458253 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 52.4, 32.800000000000004 ], "wc_summary_review_avg": [ 51.4, 16.74037036627326 ], "wc_review_avg": [ 419.2, 142.91591933721028 ], "wc_reply_reviewers_avg": [ 102.8, 41.72001917545101 ], "wc_reply_authors_avg": [ 486.2, 255.96515387841367 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.4, 1.2000000000000002 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.24397501823713333, "corr_recommendation_correctness": -0.24397501823713333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8024873361545459158&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;0", "aff_unique_norm": "Georgia Institute of Technology;Duke University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.duke.edu", "aff_unique_abbr": "Georgia Tech;Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "4RwkbKZhGV", "title": "A Time-Consistency Curriculum for Learning from Instance-Dependent Noisy Labels", "track": "main", "status": "Reject", "tldr": "", "abstract": "Many machine learning algorithms are known to be fragile on simple instance-independent noisy labels. However, noisy labels in real-world data are more devastating since they are produced by more complicated mechanisms in an instance-dependent manner. In this paper, we target this practical challenge of \\textit{Instance-Dependent Noisy Labels} by jointly training \n(1) a model reversely engineering the noise generating mechanism, which produces an \\textit{instance-dependent mapping} between the clean label posterior and the observed noisy label; and (2) a robust classifier that produces clean label posteriors. Compared to previous methods, the former model is novel and enables end-to-end learning of the latter directly from noisy labels. An extensive empirical study indicates that the time-consistency of data is critical to the success of training both models and motivates us to develop a curriculum selecting training data based on their dynamics on the two models' outputs over the course of training. We show that the curriculum-selected data provide both clean labels and high-quality input-output pairs for training the two models. Therefore, it leads to promising and robust classification performance even in notably challenging settings of instance-dependent noisy labels where many SoTA methods could easily fail. Extensive experimental comparisons and ablation studies further demonstrate the advantages and significance of the time-consistency curriculum in learning from instance-dependent noisy labels on multiple benchmark datasets.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Songhua Wu;Tianyi Zhou;Yuxuan Du;Jun Yu;Bo Han;Tongliang Liu", "authorids": "~Songhua_Wu1;~Tianyi_Zhou1;~Yuxuan_Du2;~Jun_Yu3;~Bo_Han1;~Tongliang_Liu1", "gender": "M;M;M;M;M;M", "homepage": "https://scifancier.github.io/;https://tianyizhou.github.io/;https://github.com/yuxuan-du/Yuxuan-Du.github.io;https://faculty.ustc.edu.cn/yujun_AI/en/index.htm;https://tongliang-liu.github.io/;https://bhanml.github.io/", "dblp": ";88/8205-1;;50/5754-1.html;150/6667;241/0472-3", "google_scholar": "https://scholar.google.com/citations?hl=en;OKvgizMAAAAJ;https://scholar.google.com.au/citations?user=50sFkzIAAAAJ;efZyqyQAAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;nTNjqHwAAAAJ", "orcid": ";0000-0001-5348-0632;0000-0002-1193-9756;0000-0002-3197-8103;;", "linkedin": ";tianyizhou;;;;", "or_profile": "~Songhua_Wu1;~Tianyi_Zhou1;~Yuxuan_Du2;~Jun_Yu3;~Tongliang_Liu1;~bo_han2", "aff": "University of Sydney;University of Maryland, College Park;JD.com;University of Science and Technology of China;University of Sydney;RIKEN", "aff_domain": "sydney.edu.au;umd.edu;jd.com;ustc.edu.cn;sydney.edu.au;riken.jp", "position": "PhD student;Assistant Professor;Researcher;Associate Professor;Lecturer;Adjunct Scientist", "bibtex": "@misc{\nwu2023a,\ntitle={A Time-Consistency Curriculum for Learning from Instance-Dependent Noisy Labels},\nauthor={Songhua Wu and Tianyi Zhou and Yuxuan Du and Jun Yu and Bo Han and Tongliang Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=4RwkbKZhGV}\n}", "github": "", "project": "", "reviewers": "vCae;FeeE;zfb4;GBT4", "site": "https://openreview.net/forum?id=4RwkbKZhGV", "pdf_size": 3802134, "recommendation": "3;3;5;6", "confidence": "4;4;4;4", "correctness": "3;2;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "68;78;26;175", "wc_strength_and_weaknesses": "451;588;435;299", "wc_clarity_quality_novelty_and_reproducibility": "34;20;34;157", "wc_summary_review": "15;36;33;89", "wc_review": "568;722;528;720", "wc_reply_reviewers": "151;0;0;19", "wc_reply_authors": "1068;640;681;179", "reply_reviewers": "1;0;0;1", "reply_authors": "3;2;2;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 86.75, 54.55902766728894 ], "wc_strength_and_weaknesses_avg": [ 443.25, 102.33370656826615 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.25, 55.57596153014359 ], "wc_summary_review_avg": [ 43.25, 27.60774347895894 ], "wc_review_avg": [ 634.5, 87.65129776563494 ], "wc_reply_reviewers_avg": [ 42.5, 63.120915709454025 ], "wc_reply_authors_avg": [ 642.0, 315.18645275455606 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16590423098805309782&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;3;0;4", "aff_unique_norm": "University of Sydney;University of Maryland;JD.com;University of Science and Technology of China;RIKEN", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.sydney.edu.au;https://www/umd.edu;https://www.jd.com;http://www.ustc.edu.cn;https://www.riken.jp", "aff_unique_abbr": "USYD;UMD;JD;USTC;RIKEN", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;1;2;2;0;3", "aff_country_unique": "Australia;United States;China;Japan" }, { "id": "4Sp2v2DQcxX", "title": "Skill Machines: Temporal Logic Composition in Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "A framework where an agent first learns a set of base skills in a reward-free setting, and then combines them with the learned skill machine to produce composite behaviours specified by any regular language, such as linear temporal logics.", "abstract": "A major challenge in reinforcement learning is specifying tasks in a manner that is both interpretable and verifiable. One common approach is to specify tasks through reward machines---finite state machines that encode the task to be solved. We introduce skill machines, a representation that can be learned directly from these reward machines that encode the solution to such tasks. We propose a framework where an agent first learns a set of base skills in a reward-free setting, and then combines these skills with the learned skill machine to produce composite behaviours specified by any regular language, such as linear temporal logics. This provides the agent with the ability to map from complex logical task specifications to near-optimal behaviours zero-shot. We demonstrate our approach in both a tabular and high-dimensional video game environment, where an agent is faced with several of these complex, long-horizon tasks. Our results indicate that the agent is capable of satisfying extremely complex task specifications, producing near optimal performance with no further learning. Finally, we demonstrate that the performance of skill machines can be improved with regular off-policy reinforcement learning algorithms when optimal behaviours are desired.", "keywords": "Reinforcement Learning;Lifelong learning;Multi task learning;Transfer learning;Logical composition;Deep Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Geraud Nangue Tasse;Devon Jarvis;Steven James;Benjamin Rosman", "authorids": "~Geraud_Nangue_Tasse1;~Devon_Jarvis1;~Steven_James1;~Benjamin_Rosman1", "gender": "M;M;M;M", "homepage": "https://geraudnt.github.io/;https://jarvisdevon.github.io/;;http://www.raillab.org", "dblp": "256/0971;320/3650;195/8202;45/4591", "google_scholar": "CAfsMIsAAAAJ;https://scholar.google.co.za/citations?user=MJjN5nEAAAAJ;;https://scholar.google.co.za/citations?user=pWJ0SocAAAAJ", "orcid": "0000-0002-6152-8429;0000-0003-2362-7538;;", "linkedin": "geraud-nangue-tasse-264281a5/;devon-jarvis-6b059a139;;", "or_profile": "~Geraud_Nangue_Tasse1;~Devon_Jarvis1;~Steven_James1;~Benjamin_Rosman1", "aff": "University of the Witwatersrand;University College London, University of London;University of the Witwatersrand;University of the Witwatersrand", "aff_domain": "wits.ac.za;ucl.ac.uk;wits.ac.za;wits.ac.za", "position": "PhD student;Researcher;Lecturer;Full Professor", "bibtex": "@misc{\ntasse2023skill,\ntitle={Skill Machines: Temporal Logic Composition in Reinforcement Learning},\nauthor={Geraud Nangue Tasse and Devon Jarvis and Steven James and Benjamin Rosman},\nyear={2023},\nurl={https://openreview.net/forum?id=4Sp2v2DQcxX}\n}", "github": "", "project": "", "reviewers": "38dZ;MuPq;ronJ;UCg6", "site": "https://openreview.net/forum?id=4Sp2v2DQcxX", "pdf_size": 3771515, "recommendation": "5;6;6;6", "confidence": "3;4;3;4", "correctness": "3;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "121;70;91;121", "wc_strength_and_weaknesses": "115;237;163;1089", "wc_clarity_quality_novelty_and_reproducibility": "1219;91;10;79", "wc_summary_review": "138;71;24;87", "wc_review": "1593;469;288;1376", "wc_reply_reviewers": "403;430;0;69", "wc_reply_authors": "2175;1261;524;827", "reply_reviewers": "1;2;0;1", "reply_authors": "4;3;1;1", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 100.75, 21.568205766822608 ], "wc_strength_and_weaknesses_avg": [ 401.0, 399.5872870850623 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 349.75, 502.812775792342 ], "wc_summary_review_avg": [ 80.0, 40.712405971644564 ], "wc_review_avg": [ 931.5, 561.9521776806279 ], "wc_reply_reviewers_avg": [ 225.5, 192.7880961055428 ], "wc_reply_authors_avg": [ 1196.75, 622.5770534640673 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6720884225006772755&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of the Witwatersrand;University College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.wits.ac.za;https://www.ucl.ac.uk", "aff_unique_abbr": "Wits;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "South Africa;United Kingdom" }, { "id": "4Tx2-AH-jG_", "title": "MixMIM: Mixed and Masked Image Modeling for Efficient Visual Representation Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In this study, we propose Mixed and Masked Image Modeling (MixMIM), a simple but efficient MIM method that is applicable to various hierarchical Vision Transformers. Existing MIM methods replace a random subset of input tokens with a special $\\mathrm{[MASK]}$ symbol and aim at reconstructing original image tokens from the corrupted image. However, we find that using the $\\mathrm{[MASK]}$ symbol greatly slows down the training and causes training-finetuning inconsistency, due to the large masking ratio (e.g., 60% in SimMIM). In contrast, we replace the masked tokens of one image with visible tokens of another image, i.e., creating a mixed image. We then conduct dual reconstruction to reconstruct the original two images from the mixed input, which significantly improves efficiency.\nWhile MixMIM can be applied to various architectures, this paper explores a simpler but stronger hierarchical Transformer, and scales with MixMIM-B, -L, and -H. Empirical results demonstrate that MixMIM can learn high-quality visual representations efficiently. Notably, MixMIM-B with 88M parameters achieves 85.1% top-1 accuracy on ImageNet-1K by pretraining for 600 epochs.\nBesides, its transferring performances on the other 6 datasets show MixMIM has better FLOPs / performance tradeoff than previous MIM methods.", "keywords": "self-supervised learning;masked image modeling", "primary_area": "", "supplementary_material": "", "author": "Jihao Liu;Xin Huang;Jinliang Zheng;Yu Liu;Hongsheng Li", "authorids": "~Jihao_Liu3;~Xin_Huang2;~Jinliang_Zheng1;~Yu_Liu2;~Hongsheng_Li3", "gender": "M;M;M;M;M", "homepage": "https://wasedamagina.github.io;https://2toinf.github.io/;http://liuyu.us;http://www.ee.cuhk.edu.hk/~hsli;https://jihaonew.github.io/", "dblp": ";156/3720.html;97/2274-15;27/7402-1;167/0509", "google_scholar": "VGJ1rRAAAAAJ;3j5AHFsAAAAJ;;BN2Ze-QAAAAJ;PP1HyToAAAAJ", "orcid": ";0009-0000-0605-2969;;;", "linkedin": ";;;;", "or_profile": "~Xin_Huang2;~Jinliang_Zheng1;~Yu_Liu2;~Hongsheng_Li3;~Jihao_Liu4", "aff": "Waseda University;Beijing University of Posts and Telecommunications;SenseTime;The Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": "waseda.jp;bupt.edu.cn;sensetime.com;cuhk.edu.hk;cuhk.edu.hk", "position": "PhD student;Undergrad student;Principal Researcher;Associate Professor;PhD student", "bibtex": "@misc{\nliu2023mixmim,\ntitle={Mix{MIM}: Mixed and Masked Image Modeling for Efficient Visual Representation Learning},\nauthor={Jihao Liu and Xin Huang and Jinliang Zheng and Yu Liu and Hongsheng Li},\nyear={2023},\nurl={https://openreview.net/forum?id=4Tx2-AH-jG_}\n}", "github": "", "project": "", "reviewers": "WqWi;6Nsv;MCMZ;qgk5", "site": "https://openreview.net/forum?id=4Tx2-AH-jG_", "pdf_size": 888736, "recommendation": "3;3;5;5", "confidence": "4;5;5;4", "correctness": "1;2;3;2", "technical_novelty": "1;2;2;2", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "60;49;58;129", "wc_strength_and_weaknesses": "137;267;476;238", "wc_clarity_quality_novelty_and_reproducibility": "12;19;47;41", "wc_summary_review": "33;53;41;40", "wc_review": "242;388;622;448", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 2.0, 0.7071067811865476 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 74.0, 32.02342892321183 ], "wc_strength_and_weaknesses_avg": [ 279.5, 123.28523837021203 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.75, 14.618053906043718 ], "wc_summary_review_avg": [ 41.75, 7.189401922274203 ], "wc_review_avg": [ 425.0, 136.194713553794 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.7071067811865475, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=292139761076510968&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Waseda University;Beijing University of Posts and Telecommunications;SenseTime;Chinese University of Hong Kong", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.waseda.jp/top;http://www.bupt.edu.cn/;https://www.sensetime.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "Waseda;BUPT;SenseTime;CUHK", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Beijing;Hong Kong SAR", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "Japan;China" }, { "title": "TypeT5: Seq2seq Type Inference using Static Analysis", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10941", "id": "4TyNEhI2GdN", "poster": "", "openreview": "https://openreview.net/forum?id=4TyNEhI2GdN", "slides": "https://iclr.cc/virtual/2023/poster/10941", "video": "https://iclr.cc/virtual/2023/poster/10941", "author_site": "Jiayi Wei, Greg Durrett, Isil Dillig", "tldr": "Combining the strengths of CodeT5 and static analysis to predict Python type annotations.", "abstract": "There has been growing interest in automatically predicting missing type annotations in programs written in Python and JavaScript. While prior methods have achieved impressive accuracy when predicting the most common types, they often perform poorly on rare or complex types. In this paper, we present a new type inference method that treats type prediction as a code infilling task by leveraging CodeT5, a state-of-the-art seq2seq pre-trained language model for code. Our method uses static analysis to construct dynamic contexts for each code element whose type signature is to be predicted by the model. We also propose an iterative decoding scheme that incorporates previous type predictions in the model's input context, allowing information exchange between related code elements. Our evaluation shows that the proposed approach, TypeT5, not only achieves a higher overall accuracy (particularly on rare and complex types) but also produces more coherent results with fewer type errors---while enabling easy user intervention.", "keywords": "Type inference;Code completion;Static analysis;Transformers;Pre-training", "primary_area": "", "supplementary_material": "/attachment/a1886133498e2c6d5a23d846f341f04db4cd1ccd.zip", "author": "Jiayi Wei;Greg Durrett;Isil Dillig", "authorids": "~Jiayi_Wei2;~Greg_Durrett1;~Isil_Dillig1", "gender": "M;M;F", "homepage": "https://mrvplusone.github.io;http://www.cs.utexas.edu/~gdurrett/;https://www.cs.utexas.edu/~isil/", "dblp": ";69/7968;", "google_scholar": "fTJ8pY8AAAAJ;https://scholar.google.com.tw/citations?user=EpQ_sDEAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jiayi_Wei2;~Greg_Durrett1;~Isil_Dillig1", "aff": "University of Texas, Austin;University of Texas, Austin;University of Texas, Austin", "aff_domain": "cs.utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nwei2023typet,\ntitle={TypeT5: Seq2seq Type Inference using Static Analysis},\nauthor={Jiayi Wei and Greg Durrett and Isil Dillig},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4TyNEhI2GdN}\n}", "github": "", "project": "", "reviewers": "JQ8L;2PXr;SGAP;7ekP;8rav", "pdf_size": 4324209, "recommendation": "6;6;6;6;8", "confidence": "5;4;4;4;5", "correctness": "3;4;4;3;3", "technical_novelty": "3;3;3;3;3", "empirical_novelty": "0;3;0;4;3", "wc_summary_paper": "74;93;136;204;307", "wc_strength_and_weaknesses": "563;196;82;307;301", "wc_clarity_quality_novelty_and_reproducibility": "74;50;734;221;60", "wc_summary_review": "87;43;171;110;33", "wc_review": "798;382;1123;842;701", "wc_reply_reviewers": "95;0;0;0;70", "wc_reply_authors": "539;292;1728;1110;455", "reply_reviewers": "1;0;0;0;1", "reply_authors": "1;1;4;2;1", "recommendation_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 1.6733200530681511 ], "wc_summary_paper_avg": [ 162.8, 84.81367814214875 ], "wc_strength_and_weaknesses_avg": [ 289.8, 159.39811793117258 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 227.8, 260.6564021849454 ], "wc_summary_review_avg": [ 88.8, 49.84134829636935 ], "wc_review_avg": [ 769.2, 239.11453322623447 ], "wc_reply_reviewers_avg": [ 33.0, 41.182520563948 ], "wc_reply_authors_avg": [ 824.8, 529.0404143352378 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 1.1661903789690602 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.6123724356957946, "corr_recommendation_correctness": -0.408248290463863, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16038766445731354858&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=4TyNEhI2GdN", "email": "cs.utexas.edu;utexas.edu;utexas.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "4UbhxQIjeSH", "title": "Text-Conditioned Graph Generation Using Discrete Graph Variational Autoencoders", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Inspired by recent progress in text-conditioned image generation, we propose a model for the novel problem of text-conditioned graph generation. In this paper we introduce the Vector Quantized Text To Graph generator (VQ-T2G), a discrete graph variational autoencoder and autoregressive transformer for generating general graphs conditioned on text. We curate two multimodal datasets of graphs paired with text, a real-world dataset of 8000 subgraphs from the Wikipedia link network and a dataset of over 5000 synthetic graphs. Experimental results on these datasets demonstrate that VQ-T2G synthesises novel graphs with structure aligned with the text conditioning. Additional experiments in the unconditioned graph generation setting show VQ-T2G is competitive with existing unconditioned graph generation methods across various graph metrics. Code will be released on github following paper acceptance.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/c5a5546a3eaecb048dd350352631a7a58a06d664.zip", "author": "Michael Longland;David Liebowitz;Kristen Moore;Salil S. Kanhere", "authorids": "~Michael_Longland1;~David_Liebowitz1;~Kristen_Moore1;~Salil_S._Kanhere1", "gender": ";;F;M", "homepage": ";;https://people.csiro.au/m/k/kristen-moore;https://salilkanhere.net/", "dblp": ";89/289;167/5919.html;42/840", "google_scholar": "DD4mpsAAAAAJ;;uI20HykAAAAJ;sgqmaPMAAAAJ", "orcid": ";;0000-0002-9962-5080;0000-0002-1835-3475", "linkedin": ";;;salilkanhere/", "or_profile": "~Michael_Longland1;~David_Liebowitz1;~Kristen_Moore1;~Salil_S._Kanhere1", "aff": "University of New South Wales;;CSIRO's Data61;University of New South Wales", "aff_domain": "unsw.edu.au;;data61.csiro.au;unsw.edu.au", "position": "MS student;;Researcher;Full Professor", "bibtex": "@misc{\nlongland2023textconditioned,\ntitle={Text-Conditioned Graph Generation Using Discrete Graph Variational Autoencoders},\nauthor={Michael Longland and David Liebowitz and Kristen Moore and Salil S. Kanhere},\nyear={2023},\nurl={https://openreview.net/forum?id=4UbhxQIjeSH}\n}", "github": "", "project": "", "reviewers": "CRUn;cztS;2xXR;ESEe", "site": "https://openreview.net/forum?id=4UbhxQIjeSH", "pdf_size": 595977, "recommendation": "3;3;3;5", "confidence": "4;4;3;4", "correctness": "4;3;3;4", "technical_novelty": "1;2;2;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "103;32;59;89", "wc_strength_and_weaknesses": "280;192;215;364", "wc_clarity_quality_novelty_and_reproducibility": "41;43;23;75", "wc_summary_review": "43;65;14;102", "wc_review": "467;332;311;630", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 70.75, 27.444261695297982 ], "wc_strength_and_weaknesses_avg": [ 262.75, 66.77340413667706 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.5, 18.728320800328042 ], "wc_summary_review_avg": [ 56.0, 32.132538026119256 ], "wc_review_avg": [ 435.0, 127.50882322412046 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9795650079314237589&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of New South Wales;CSIRO", "aff_unique_dep": ";Data61", "aff_unique_url": "https://www.unsw.edu.au;https://www.csiro.au", "aff_unique_abbr": "UNSW;CSIRO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "id": "4Udi4sd8qz9", "title": "Sparse Misinformation Detector", "track": "main", "status": "Withdraw", "tldr": "We present an efficient sparse misinformation detector based on a special sparsity pattern (CircuSparsity), with very encouraging performance.", "abstract": "We present Sparse Misinformation Detector (SMD), a new efficient misinformation detection network with regular fine-grained sparsity. We propose two technical components to enable SMD. First, CircuSparsity, a new hardware-friendly sparsity pattern, is introduced for improved training and testing efficiency. Second, through dedicated empirical analyses, we discover that document-level misinformation detection is pretty insensitive to a compact model size, which inspires us to make early exit for the document-level misinformation classifier. With these two techniques, we successfully achieve efficient misinformation detection on both document and event levels with one single model. Empirically, our approach significantly outperforms the original dense misinformation detection network while enjoying 50% to 75% sparsity. Extensive experiments and analyses demonstrate the merits of our method compared to other top-performing counterpart approaches. To our best knowledge, this is the first attempt for efficient misinformation detection from the network sparse training perspective.", "keywords": "Misinformation detection;fake news detection;sparse training;network pruning", "primary_area": "", "supplementary_material": "", "author": "Huan Wang;Ting Han;Tao Zhang;Hanning Zhou;Yun Fu", "authorids": "~Huan_Wang3;~Ting_Han1;~Tao_Zhang21;~Hanning_Zhou1;~Yun_Fu1", "gender": "M;Non-Binary;M;M;M", "homepage": "https://huanwang.tech/;https://tingh.github.io/;http://www.ia.cas.cn/sourcedb_ia_cas/cn/iaexpert/200908/t20090804_2310451.html;;http://www1.ece.neu.edu/~yunfu/", "dblp": "70/6155-14;22/5110;;26/253;00/5815-1", "google_scholar": "0-On0y4AAAAJ;;;aP1YGX0AAAAJ;https://scholar.google.com.tw/citations?user=h-JEcQ8AAAAJ", "orcid": "0000-0001-6951-901X;;;;0000-0002-5098-2853", "linkedin": "huanwang-zju/;;;hanzhou;furaymond/", "or_profile": "~Huan_Wang3;~Ting_Han1;~Tao_Zhang21;~Hanning_Zhou1;~Yun_Fu1", "aff": "Northeastern University;AIST, National Institute of Advanced Industrial Science and Technology;;;Northeastern University", "aff_domain": "neu.edu;aist.go.jp;;;northeastern.edu", "position": "PhD student;Researcher;;;Full Professor", "bibtex": "@misc{\nwang2023sparse,\ntitle={Sparse Misinformation Detector},\nauthor={Huan Wang and Ting Han and Tao Zhang and Hanning Zhou and Yun Fu},\nyear={2023},\nurl={https://openreview.net/forum?id=4Udi4sd8qz9}\n}", "github": "", "project": "", "reviewers": "so62;HokR;WT9e", "site": "https://openreview.net/forum?id=4Udi4sd8qz9", "pdf_size": 353959, "recommendation": "5;5;5", "confidence": "3;5;4", "correctness": "3;3;3", "technical_novelty": "3;2;2", "empirical_novelty": "3;2;2", "wc_summary_paper": "51;20;310", "wc_strength_and_weaknesses": "71;265;484", "wc_clarity_quality_novelty_and_reproducibility": "65;10;70", "wc_summary_review": "242;15;79", "wc_review": "429;310;943", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 127.0, 130.0179474790564 ], "wc_strength_and_weaknesses_avg": [ 273.3333333333333, 168.70948073208243 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.333333333333336, 27.182510717166817 ], "wc_summary_review_avg": [ 112.0, 95.56498661469412 ], "wc_review_avg": [ 560.6666666666666, 274.6808248778126 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JiMTmbnS_KYJ:scholar.google.com/&scioq=Sparse+Misinformation+Detector&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Northeastern University;National Institute of Advanced Industrial Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.northeastern.edu;https://www.aist.go.jp", "aff_unique_abbr": "NEU;AIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Japan" }, { "title": "Joint Edge-Model Sparse Learning is Provably Efficient for Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11995", "id": "4UldFtZ_CVF", "poster": "", "openreview": "https://openreview.net/forum?id=4UldFtZ_CVF", "slides": "https://iclr.cc/virtual/2023/poster/11995", "video": "https://iclr.cc/virtual/2023/poster/11995", "author_site": "shuai ZHANG, Meng Wang, Pin-Yu Chen, Sijia Liu, Songtao Lu, Miao Liu", "tldr": "Encouraged by the empirical success of sparse learners in accelerating GNN training, this paper characterizes the impact of graph sampling and neuron pruning on the sample complexity and convergence rate for a desirable test accuracy quantitatively.", "abstract": "Due to the significant computational challenge of training large-scale graph neural networks (GNNs), various sparse learning techniques have been exploited to reduce memory and storage costs. Examples include graph sparsification that samples a subgraph to reduce the amount of data aggregation and model sparsification that prunes the neural network to reduce the number of trainable weights. Despite the empirical successes in reducing the training cost while maintaining the test accuracy, the theoretical generalization analysis of sparse learning for GNNs remains elusive. To the best of our knowledge, this paper provides the first theoretical characterization of joint edge-model sparse learning from the perspective of sample complexity and convergence rate in achieving zero generalization error. It proves analytically that both sampling important nodes and pruning neurons with lowest-magnitude can reduce the sample complexity and improve convergence without compromising the test accuracy. Although the analysis is centered on two-layer GNNs with structural constraints on data, the insights are applicable to more general setups and justified by both synthetic and practical citation datasets.", "keywords": "Learning theory;Graph neural networks;Generalization analysis;Graph sparisification", "primary_area": "", "supplementary_material": "/attachment/83c24d1ed7d00da3448a0230c912f147b3b2dc12.zip", "author": "Shuai Zhang;Meng Wang;Pin-Yu Chen;Sijia Liu;Songtao Lu;Miao Liu", "authorids": "~Shuai_Zhang6;~Meng_Wang4;~Pin-Yu_Chen1;~Sijia_Liu1;~Songtao_Lu1;~Miao_Liu1", "gender": "M;F;M;M;M;M", "homepage": "https://inchs708.github.io/shuaizhang.github.io/index.html;https://www.ecse.rpi.edu/~wang/index.html;http://www.pinyuchen.com;https://lsjxjtu.github.io/;https://songtaogithub.github.io/;https://sites.google.com/view/miaoliuhome", "dblp": "71/208-15;93/6765-3;39/8969;128/6972-1;05/2887;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;jxwlCUUAAAAJ;C7dO_UgAAAAJ;LRsjX7kAAAAJ;7QHvAEYAAAAJ", "orcid": "0000-0001-8280-6988;;0000-0003-1039-8369;;;", "linkedin": ";;pin-yu-chen-940062a2;;;miao-liu-3273a32b", "or_profile": "~Shuai_Zhang6;~Meng_Wang4;~Pin-Yu_Chen1;~Sijia_Liu1;~Songtao_Lu1;~Miao_Liu1", "aff": "Rensselaer Polytechnic Institute;Rensselaer Polytechnic Institute;International Business Machines;Michigan State University;IBM Thomas J. Watson Research Center;International Business Machines", "aff_domain": "rpi.edu;rpi.edu;ibm.com;msu.edu;ibm.com;ibm.com", "position": "Postdoc;Associate Professor;Principal Researcher;Assistant Professor;Researcher;Research Staff Member", "bibtex": "@inproceedings{\nzhang2023joint,\ntitle={Joint Edge-Model Sparse Learning is Provably Efficient for Graph Neural Networks},\nauthor={Shuai Zhang and Meng Wang and Pin-Yu Chen and Sijia Liu and Songtao Lu and Miao Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4UldFtZ_CVF}\n}", "github": "", "project": "", "reviewers": "LzNm;swLg;X7Xt", "pdf_size": 1869900, "recommendation": "6;6;6", "confidence": "3;3;3", "correctness": "3;3;3", "technical_novelty": "3;2;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "98;78;207", "wc_strength_and_weaknesses": "44;237;404", "wc_clarity_quality_novelty_and_reproducibility": "806;31;105", "wc_summary_review": "340;96;78", "wc_review": "1288;442;794", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 127.66666666666667, 56.68823119092318 ], "wc_strength_and_weaknesses_avg": [ 228.33333333333334, 147.09709567342094 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 314.0, 349.2057655117777 ], "wc_summary_review_avg": [ 171.33333333333334, 119.4915152729357 ], "wc_review_avg": [ 841.3333333333334, 346.995997415276 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5361136090908688022&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=4UldFtZ_CVF", "email": "rpi.edu;rpi.edu;ibm.com;msu.edu;ibm.com;ibm.com", "author_num": 6, "aff_unique_index": "0;0;1;2;3;1", "aff_unique_norm": "Rensselaer Polytechnic Institute;International Business Machines Corporation;Michigan State University;IBM", "aff_unique_dep": ";;;Research", "aff_unique_url": "https://www.rpi.edu;https://www.ibm.com;https://www.msu.edu;https://www.ibm.com/research", "aff_unique_abbr": "RPI;IBM;MSU;IBM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Yorktown Heights", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "4VFNnqSinf", "title": "PREDICTION OF TOURISM FLOW WITH SPARSE DATA INCORPORATING TOURIST GEOLOCATIONS", "track": "main", "status": "Reject", "tldr": "We apply state-of-the-art deep learning models such as GNNs, RNNs and Transformers to the problem of Tourism flow predictions", "abstract": "Modern tourism in the 21st century is facing numerous challenges. One of these\nchallenges is the rapidly growing number of tourists in space-limited regions such\nas historical city centers, museums, or geographical bottlenecks like narrow val-\nleys. In this context, a proper and accurate prediction of tourism volume and\ntourism flow within a certain area is important and critical for visitor management\ntasks such as sustainable treatment of the environment and prevention of over-\ncrowding. Static flow control methods like conventional low-level controllers or\nlimiting access to overcrowded venues could not solve the problem yet. In this\npaper, we empirically evaluate the performance of state-of-the-art deep-learning\nmethods such as RNNs, GNNs, and Transformers as well as the classic statistical\nARIMA method. Granular limited data supplied by a tourism region is extended\nby exogenous data such as geolocation trajectories of individual tourists, weather\nand holidays. In the field of visitor flow prediction with sparse data, we are thereby\ncapable of increasing the accuracy of our predictions, incorporating modern input\nfeature handling as well as mapping geolocation data on top of discrete POI data.", "keywords": "GNN;RNN;Transformer;Tourism;Tourism flow prediction", "primary_area": "", "supplementary_material": "/attachment/f69aa6f01c1fac93653ea17402ff311ff98cb8ec.zip", "author": "Julian Lemmel;Zahra Babaiee;Marvin Kleinlehner;Ivan Majic;Philipp Neubauer;Johannes Scholz;Radu Grosu;Sophie Neubauer", "authorids": "~Julian_Lemmel1;~Zahra_Babaiee1;marvin@datenvorsprung.at;majic@tugraz.at;philipp@datenvorsprung.at;johannes.scholz@tugraz.at;~Radu_Grosu1;sophie@datenvorsprung.at", "gender": "M;F;;;;;M;", "homepage": ";https://informatics.tuwien.ac.at/people/zahra-babaiee;;;;;https://ti.tuwien.ac.at/cps/people/grosu;", "dblp": "323/4655;;;;;;94/5421;", "google_scholar": "BMRdvcYAAAAJ;;;;;;1g_muAgAAAAJ;", "orcid": "0000-0002-3517-2860;;;;;;0000-0001-5715-2142;", "linkedin": "julian-lemmel-446115223/;zahra-babaiee-5b4ba314b;;;;;;", "or_profile": "~Julian_Lemmel1;~Zahra_Babaiee1;marvin@datenvorsprung.at;majic@tugraz.at;philipp@datenvorsprung.at;johannes.scholz@tugraz.at;~Radu_Grosu1;sophie@datenvorsprung.at", "aff": "DatenVorsprung GmbH;TU Wien Vienna University of Technology;;;;;TU Wien Vienna University of Technology;", "aff_domain": "datenvorsprung.at;tuwien.ac.at;;;;;tuwien.ac.at;", "position": "Researcher;PhD student;;;;;Full Professor;", "bibtex": "@misc{\nlemmel2023prediction,\ntitle={{PREDICTION} {OF} {TOURISM} {FLOW} {WITH} {SPARSE} {DATA} {INCORPORATING} {TOURIST} {GEOLOCATIONS}},\nauthor={Julian Lemmel and Zahra Babaiee and Marvin Kleinlehner and Ivan Majic and Philipp Neubauer and Johannes Scholz and Radu Grosu and Sophie Neubauer},\nyear={2023},\nurl={https://openreview.net/forum?id=4VFNnqSinf}\n}", "github": "", "project": "", "reviewers": "XQLY;xjA7;mG9t;X9rk", "site": "https://openreview.net/forum?id=4VFNnqSinf", "pdf_size": 618623, "recommendation": "3;3;3;3", "confidence": "3;4;4;4", "correctness": "2;3;3;3", "technical_novelty": "2;1;1;1", "empirical_novelty": "3;2;2;2", "wc_summary_paper": "64;54;119;60", "wc_strength_and_weaknesses": "326;132;224;92", "wc_clarity_quality_novelty_and_reproducibility": "311;47;154;76", "wc_summary_review": "62;43;35;52", "wc_review": "763;276;532;280", "wc_reply_reviewers": "13;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 1.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 74.25, 26.080404521402652 ], "wc_strength_and_weaknesses_avg": [ 193.5, 90.2371874561702 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 147.0, 102.45242798489453 ], "wc_summary_review_avg": [ 48.0, 10.074720839804943 ], "wc_review_avg": [ 462.75, 202.00170172550528 ], "wc_reply_reviewers_avg": [ 3.25, 5.629165124598851 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dm-BLS08B5sJ:scholar.google.com/&scioq=PREDICTION+OF+TOURISM+FLOW+WITH+SPARSE+DATA+INCORPORATING+TOURIST+GEOLOCATIONS&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "DatenVorsprung GmbH;Vienna University of Technology", "aff_unique_dep": ";", "aff_unique_url": ";https://www.tuwien.ac.at", "aff_unique_abbr": ";TU Wien", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Vienna", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Germany;Austria" }, { "id": "4Vwx-VwS5b3", "title": "Implicit Neural Spatial Representations for Time-dependent PDEs", "track": "main", "status": "Reject", "tldr": "We replace traditional PDE solvers' spatial representations (e.g., grid, mesh, and point cloud) with a neural spatial representation.", "abstract": "Numerically solving partial differential equations (PDEs) often entails spatial and temporal discretizations. Traditional methods (e.g., finite\ndifference, finite element, smoothed-particle hydrodynamics) frequently adopt explicit spatial discretizations, such as grids, meshes, and point clouds, where each degree-of-freedom corresponds to a location in space. While these explicit spatial correspondences are intuitive to model and understand, these representations are not necessarily optimal for accuracy, memory-usage, or adaptivity. In this work, we explore implicit neural representation as an alternative spatial discretization, where spatial information is implicitly stored in the neural network weights. With implicit neural spatial representation, PDE-constrained time-stepping translates into updating neural network weights, which naturally integrates with commonly adopted optimization time integrators. We validate our approach on a variety of classic PDEs with examples involving large elastic deformations, turbulent fluids, and multiscale phenomena. While slower to compute than traditional representations, our approach exhibits higher accuracy, lower memory consumption, and dynamically adaptive allocation of degrees of freedom without complex remeshing.", "keywords": "PDE;implicit neural representation;neural field;numerical methods", "primary_area": "", "supplementary_material": "/attachment/f03e6328f4cd7c0e32b6e1917418d4437af66021.zip", "author": "Honglin Chen;Rundi Wu;Eitan Grinspun;Changxi Zheng;Peter Yichen Chen", "authorids": "~Honglin_Chen3;~Rundi_Wu1;~Eitan_Grinspun3;~Changxi_Zheng1;~Peter_Yichen_Chen1", "gender": "F;M;;M;M", "homepage": "https://www.cs.columbia.edu/~honglinchen/;https://www.cs.columbia.edu/~rundi/;http://www.dgp.toronto.edu/~eitan;http://www.cs.columbia.edu/~cxz;https://peterchencyc.com", "dblp": ";241/5506;;92/5285;230/7889", "google_scholar": ";ulf_Pt0AAAAJ;-HyEryoAAAAJ;-0rEuLgAAAAJ;9TX3RmEAAAAJ", "orcid": ";;;;", "linkedin": "honglin-chen-3800a914a/;;;;", "or_profile": "~Honglin_Chen3;~Rundi_Wu1;~Eitan_Grinspun3;~Changxi_Zheng1;~Peter_Yichen_Chen1", "aff": "Columbia University;Columbia University;University of Toronto;Columbia University;MIT", "aff_domain": "columbia.edu;columbia.edu;toronto.edu;cs.columbia.edu;csail.mit.edu", "position": "PhD student;PhD student;Full Professor;Associate Professor;Postdoc", "bibtex": "@misc{\nchen2023implicit,\ntitle={Implicit Neural Spatial Representations for Time-dependent {PDE}s},\nauthor={Honglin Chen and Rundi Wu and Eitan Grinspun and Changxi Zheng and Peter Yichen Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=4Vwx-VwS5b3}\n}", "github": "", "project": "", "reviewers": "8oNn;DwBW;oNuL;Tkaf;uDb6;7x1Y", "site": "https://openreview.net/forum?id=4Vwx-VwS5b3", "pdf_size": 8014722, "recommendation": "5;5;5;6;6;8", "confidence": "4;3;4;4;4;3", "correctness": "3;3;3;4;3;3", "technical_novelty": "2;2;2;3;3;4", "empirical_novelty": "2;0;2;3;2;4", "wc_summary_paper": "75;99;30;114;46;66", "wc_strength_and_weaknesses": "257;370;301;306;476;393", "wc_clarity_quality_novelty_and_reproducibility": "51;110;173;52;32;131", "wc_summary_review": "45;56;66;233;22;34", "wc_review": "428;635;570;705;576;624", "wc_reply_reviewers": "201;102;166;25;0;28", "wc_reply_authors": "1252;1021;1167;104;381;279", "reply_reviewers": "1;1;2;1;0;1", "reply_authors": "2;3;4;1;1;1", "recommendation_avg": [ 5.833333333333333, 1.0671873729054748 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "technical_novelty_avg": [ 2.6666666666666665, 0.7453559924999298 ], "empirical_novelty_avg": [ 2.1666666666666665, 1.2133516482134197 ], "wc_summary_paper_avg": [ 71.66666666666667, 28.802006102970598 ], "wc_strength_and_weaknesses_avg": [ 350.5, 72.00636545935829 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 91.5, 50.47359045415071 ], "wc_summary_review_avg": [ 76.0, 71.63565220382004 ], "wc_review_avg": [ 589.6666666666666, 84.89732360655167 ], "wc_reply_reviewers_avg": [ 87.0, 75.67033764957046 ], "wc_reply_authors_avg": [ 700.6666666666666, 458.2665405877045 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 2.0, 1.1547005383792515 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.44172610429938625, "corr_recommendation_correctness": 0.06984302957695786, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17730298820450536976&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Columbia University;University of Toronto;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.columbia.edu;https://www.utoronto.ca;https://web.mit.edu", "aff_unique_abbr": "Columbia;U of T;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Dirichlet-based Uncertainty Calibration for Active Domain Adaptation", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12004", "id": "4WM4cy42B81", "poster": "/media/PosterPDFs/ICLR%202023/12004.png?t=1681224214.150878", "openreview": "https://openreview.net/forum?id=4WM4cy42B81", "slides": "https://iclr.cc/virtual/2023/poster/12004", "video": "https://iclr.cc/virtual/2023/poster/12004", "author_site": "Mixue Xie, Shuang Li, Rui Zhang, Chi Liu", "tldr": "", "abstract": "Active domain adaptation (DA) aims to maximally boost the model adaptation on a new target domain by actively selecting limited target data to annotate, whereas traditional active learning methods may be less effective since they do not consider the domain shift issue. Despite active DA methods address this by further proposing targetness to measure the representativeness of target domain characteristics, their predictive uncertainty is usually based on the prediction of deterministic models, which can easily be miscalibrated on data with distribution shift. Considering this, we propose a Dirichlet-based Uncertainty Calibration (DUC) approach for active DA, which simultaneously achieves the mitigation of miscalibration and the selection of informative target samples. Specifically, we place a Dirichlet prior on the prediction and interpret the prediction as a distribution on the probability simplex, rather than a point estimate like deterministic models. This manner enables us to consider all possible predictions, mitigating the miscalibration of unilateral prediction. Then a two-round selection strategy based on different uncertainty origins is designed to select target samples that are both representative of target domain and conducive to discriminability. Extensive experiments on cross-domain image classification and semantic segmentation validate the superiority of DUC.", "keywords": "domain adaptation;active learning;uncertainty;Dirichlet", "primary_area": "", "supplementary_material": "/attachment/3ad1fa04da718b44a4e6b2f80fdd578220f9854c.zip", "author": "Mixue Xie;Shuang Li;Rui Zhang;Chi Harold Liu", "authorids": "~Mixue_Xie2;~Shuang_Li6;~Rui_Zhang20;~Chi_Harold_Liu1", "gender": ";M;M;M", "homepage": ";https://shuangli.xyz;https://github.com/larry6799;", "dblp": "289/0077;43/6294-8;;45/4723.html", "google_scholar": ";VXCiAc4AAAAJ;;3IgFTEkAAAAJ", "orcid": ";0000-0001-6807-9905;;", "linkedin": ";;;", "or_profile": "~Mixue_Xie2;~Shuang_Li6;~Rui_Zhang20;~Chi_Harold_Liu1", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology", "aff_domain": "bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn", "position": "MS student;Associate Professor;MS student;Full Professor", "bibtex": "@inproceedings{\nxie2023dirichletbased,\ntitle={Dirichlet-based Uncertainty Calibration for Active Domain Adaptation},\nauthor={Mixue Xie and Shuang Li and Rui Zhang and Chi Harold Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4WM4cy42B81}\n}", "github": "", "project": "", "reviewers": "CH6i;arzy;Rp4n", "pdf_size": 7331321, "recommendation": "5;6;8", "confidence": "4;4;4", "correctness": "4;4;4", "technical_novelty": "2;3;4", "empirical_novelty": "2;3;3", "wc_summary_paper": "97;109;124", "wc_strength_and_weaknesses": "286;107;125", "wc_clarity_quality_novelty_and_reproducibility": "47;37;213", "wc_summary_review": "26;33;115", "wc_review": "456;286;577", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 110.0, 11.045361017187261 ], "wc_strength_and_weaknesses_avg": [ 172.66666666666666, 80.47497885816573 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 99.0, 80.7134850360624 ], "wc_summary_review_avg": [ 58.0, 40.40627014049511 ], "wc_review_avg": [ 439.6666666666667, 119.36033214133114 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14105309971589037028&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=4WM4cy42B81", "email": "bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Beijing Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.bit.edu.cn/", "aff_unique_abbr": "BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "4WjVKtMUOP", "title": "Bounded Attacks and Robustness in Image Transform Domains", "track": "main", "status": "Reject", "tldr": "A novel set of attacks operating in the well-known DCT DWTs domains that do not abolish the usual $L^\\infty$ threat model, leading to adversarial examples with higher visual similarity and better adversarial learning transferability.", "abstract": "Classical image transformation such as the discrete cosine transform (DCT) and the discrete wavelet transforms (DWTs) provide semantically meaningful representations of images. In this paper we propose a general method for adversarial attacks in such transform domains that, in contrast to prior work, obey the $L^\\infty$ constraint in the pixel domain. The key idea is to replace the standard attack based on projections with the barrier method. Experiments with DCT and DWTs produce adversarial examples that are significantly more similar to the original than with prior attacks. Further, through adversarial training we show that robustness against our attacks transfers to robustness against a broad class of common image perturbations.", "keywords": "Adversarial example;white-box attack;neural networks;discrete linear transforms;DCT;JPEG;wavelet", "primary_area": "", "supplementary_material": "", "author": "Mohamed-Hicham LEGHETTAS;Markus P\u00fcschel", "authorids": "~Mohamed-Hicham_LEGHETTAS1;~Markus_P\u00fcschel1", "gender": "M;M", "homepage": "https://acl.inf.ethz.ch/people/hichaml/;https://acl.inf.ethz.ch/", "dblp": ";37/6355", "google_scholar": ";az9ZryAAAAAJ", "orcid": ";0000-0001-8834-8551", "linkedin": ";", "or_profile": "~Mohamed-Hicham_LEGHETTAS1;~Markus_P\u00fcschel1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;Department of Computer Science, ETHZ - ETH Zurich", "aff_domain": "inf.ethz.ch;inf.ethz.ch", "position": "PhD student;Full Professor", "bibtex": "@misc{\nleghettas2023bounded,\ntitle={Bounded Attacks and Robustness in Image Transform Domains},\nauthor={Mohamed-Hicham LEGHETTAS and Markus P{\\\"u}schel},\nyear={2023},\nurl={https://openreview.net/forum?id=4WjVKtMUOP}\n}", "github": "", "project": "", "reviewers": "GeiG;RQTp;2yuU;LwcG", "site": "https://openreview.net/forum?id=4WjVKtMUOP", "pdf_size": 10975624, "recommendation": "3;3;3;5", "confidence": "3;3;3;4", "correctness": "3;2;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;1;0", "wc_summary_paper": "185;27;25;141", "wc_strength_and_weaknesses": "396;270;173;310", "wc_clarity_quality_novelty_and_reproducibility": "42;10;12;75", "wc_summary_review": "75;8;9;93", "wc_review": "698;315;219;619", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 94.5, 70.24777576550022 ], "wc_strength_and_weaknesses_avg": [ 287.25, 80.14791014118833 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.75, 26.47050245084139 ], "wc_summary_review_avg": [ 46.25, 38.284298348017295 ], "wc_review_avg": [ 462.75, 200.6244937688317 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cDvrLvHVjr8J:scholar.google.com/&scioq=Bounded+Attacks+and+Robustness+in+Image+Transform+Domains&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Zurich", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "id": "4WoJDxyCxq", "title": "To be private and robust: Differentially Private Optimizers Can Learn Adversarially Robust Models", "track": "main", "status": "Withdraw", "tldr": "We show that DP models can be adversarially robust with rigorous proof on linear models and empirical evidence on deep networks.", "abstract": "Machine learning models have shone in a variety of domains and attracted increasing attention from both the security and the privacy communities. One important yet worrying question is: will training models under the differential privacy (DP) constraint unfavorably impact on the adversarial robustness? While previous works have postulated that privacy comes at the cost of worse robustness, we give the first theoretical analysis to show that DP models can indeed be robust and accurate, even sometimes more robust than their naturally-trained non-private counterparts. We observe three key factors that influence the privacy-robustness-accuracy tradeoff: (1) hyperparamters for DP optimizers are critical; (2) pre-training on public data significantly mitigates the accuracy and robustness drop; (3) choice of DP optimizers makes a difference. With these factors set properly, we achieve 90\\% natural accuracy, 72\\% robust accuracy ($+9\\%$ than the non-private model) under $l_2(0.5)$ attack, and 69\\% robust accuracy ($+16\\%$ than the non-private model) with pre-trained SimCLRv2 model under $l_\\infty(4/255)$ attack on CIFAR10 with $\\epsilon=2$. In fact, we show both theoretically and empirically that DP models are Pareto optimal in terms of accuracy and robustness. Additionally, the robustness of DP models is consistently observed on MNIST, Fashion MNIST and CelebA, with ResNet and Vision Transformer. We believe our encouraging results are a significant step towards training models that are private as well as robust, including deep neural networks.", "keywords": "deep learning;differential privacy;adversarial robustness;Pareto optimality", "primary_area": "", "supplementary_material": "/attachment/ccfe4ca9133240acb4274b717bf0ce4744b15f14.zip", "author": "Zhiqi Bu;Yuan Zhang", "authorids": "~Zhiqi_Bu1;~Yuan_Zhang19", "gender": "M;", "homepage": "https://sites.google.com/view/zhiqi-bu;", "dblp": "245/2573;", "google_scholar": "MEvTLxIAAAAJ;", "orcid": ";", "linkedin": ";ewan-yuan-zhang/", "or_profile": "~Zhiqi_Bu1;~Yuan_Zhang19", "aff": "Amazon;", "aff_domain": "amazon.com;", "position": "Researcher;", "bibtex": "@misc{\nbu2023to,\ntitle={To be private and robust: Differentially Private Optimizers Can Learn Adversarially Robust Models},\nauthor={Zhiqi Bu and Yuan Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=4WoJDxyCxq}\n}", "github": "", "project": "", "reviewers": "mRen;q1hC;FPNK;bxsQ;AApB", "site": "https://openreview.net/forum?id=4WoJDxyCxq", "pdf_size": 938133, "recommendation": "1;3;3;3;5", "confidence": "5;4;4;4;3", "correctness": "2;1;3;2;2", "technical_novelty": "1;3;4;1;3", "empirical_novelty": "1;4;3;4;3", "wc_summary_paper": "31;85;29;69;57", "wc_strength_and_weaknesses": "310;267;165;54;285", "wc_clarity_quality_novelty_and_reproducibility": "33;59;44;47;15", "wc_summary_review": "36;25;28;520;29", "wc_review": "410;436;266;690;386", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "348;165;102;265;375", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 3.0, 1.2649110640673518 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "correctness_avg": [ 2.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.4, 1.2000000000000002 ], "empirical_novelty_avg": [ 3.0, 1.0954451150103321 ], "wc_summary_paper_avg": [ 54.2, 21.673947494630507 ], "wc_strength_and_weaknesses_avg": [ 216.2, 94.9134342440521 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.6, 14.82700239428051 ], "wc_summary_review_avg": [ 127.6, 196.2331266631605 ], "wc_review_avg": [ 437.6, 138.98143760948798 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 251.0, 104.47774882720243 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.9999999999999999, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PgzoHRcB_gsJ:scholar.google.com/&scioq=To+be+private+and+robust:+Differentially+Private+Optimizers+Can+Learn+Adversarially+Robust+Models&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon.com, Inc.", "aff_unique_url": "https://www.amazon.com", "aff_unique_abbr": "Amazon", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "4XE614GBuGR", "title": "Domain-Adjusted Regression or: ERM May Already Learn Features Sufficient for Out-of-Distribution Generalization", "track": "main", "status": "Reject", "tldr": "We show that features learned via ERM may be \"good enough\" for generalization, and that the main difficulty is robust classification. We give a new model of dist shift and an alg which is minimax-optimal and meets/exceeds SOTA on several benchmarks.", "abstract": "A common explanation for the failure of deep networks to generalize out-of-distribution is that they fail to recover the \"correct\" features. We challenge this notion with a simple experiment which suggests that ERM already learns sufficient features and that the current bottleneck is not feature learning, but robust regression. We therefore argue that devising simpler methods for learning predictors on existing features is a promising direction for future research. Towards this end, we introduce Domain-Adjusted Regression (DARE), a convex objective for learning a linear predictor that is provably robust under a new model of distribution shift. Rather than learning one function, DARE performs a domain-specific adjustment to unify the domains in a canonical latent space and learns to predict in this space. Under a natural model, we prove that the DARE solution is the minimax-optimal predictor for a constrained set of test distributions. Further, we provide the first finite-environment convergence guarantee to the minimax risk, improving over existing analyses which only yield minimax predictors after an environment threshold. Evaluated on finetuned features, we find that DARE compares favorably to prior methods, consistently achieving equal or better performance.", "keywords": "domain generalization;domain generalization theory;out-of-distribution generalization;representation learning", "primary_area": "", "supplementary_material": "", "author": "Elan Rosenfeld;Pradeep Kumar Ravikumar;Andrej Risteski", "authorids": "~Elan_Rosenfeld1;~Pradeep_Kumar_Ravikumar1;~Andrej_Risteski2", "gender": "M;M;M", "homepage": ";http://www.cs.cmu.edu/~pradeepr/;", "dblp": "236/4508;94/3594;63/11143", "google_scholar": "f0j0K8QAAAAJ;https://scholar.google.com.tw/citations?user=Q4DTPw4AAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Elan_Rosenfeld1;~Pradeep_Kumar_Ravikumar1;~Andrej_Risteski2", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@misc{\nrosenfeld2023domainadjusted,\ntitle={Domain-Adjusted Regression or: {ERM} May Already Learn Features Sufficient for Out-of-Distribution Generalization},\nauthor={Elan Rosenfeld and Pradeep Kumar Ravikumar and Andrej Risteski},\nyear={2023},\nurl={https://openreview.net/forum?id=4XE614GBuGR}\n}", "github": "", "project": "", "reviewers": "4nvT;aW4c;o4xo;MFvo;NbM4", "site": "https://openreview.net/forum?id=4XE614GBuGR", "pdf_size": 725460, "recommendation": "5;6;6;6;6", "confidence": "4;2;3;3;3", "correctness": "4;3;4;3;3", "technical_novelty": "3;2;4;3;2", "empirical_novelty": "3;2;4;3;3", "wc_summary_paper": "212;93;77;103;92", "wc_strength_and_weaknesses": "1244;90;206;202;249", "wc_clarity_quality_novelty_and_reproducibility": "48;862;9;42;80", "wc_summary_review": "88;156;14;20;156", "wc_review": "1592;1201;306;367;577", "wc_reply_reviewers": "1091;285;30;0;241", "wc_reply_authors": "3773;2057;883;542;519", "reply_reviewers": "4;1;1;0;1", "reply_authors": "8;3;2;1;1", "recommendation_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 2.8, 0.7483314773547882 ], "empirical_novelty_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_paper_avg": [ 115.4, 49.00857067901491 ], "wc_strength_and_weaknesses_avg": [ 398.2, 426.159782241356 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 208.2, 327.6756933310739 ], "wc_summary_review_avg": [ 86.8, 62.19453352184579 ], "wc_review_avg": [ 808.6, 503.77558495822325 ], "wc_reply_reviewers_avg": [ 329.4, 396.97989873543975 ], "wc_reply_authors_avg": [ 1554.8, 1242.7853233764872 ], "reply_reviewers_avg": [ 1.4, 1.3564659966250538 ], "reply_authors_avg": [ 3.0, 2.6076809620810595 ], "replies_avg": [ 51, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.790569415042095, "corr_recommendation_correctness": -0.6123724356957948, "gs_citation": 89, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18354400836987334001&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "4XMAzZasId", "title": "Model-agnostic Measure of Generalization Difficulty", "track": "main", "status": "Reject", "tldr": "We propose a model-agnostic measure of the generalization difficulty of a task.", "abstract": "The measure of a machine learning algorithm is the difficulty of the tasks it can perform, and sufficiently difficult tasks are critical drivers of strong machine learning models. However, quantifying the generalization difficulty of machine learning benchmarks has remained challenging. We propose what is to our knowledge the first model-agnostic measure of the inherent generalization difficulty of tasks. Our inductive bias complexity measure quantifies the total information required to generalize well on a task minus the information provided by the data. It does so by measuring the fractional volume occupied by hypotheses that generalize on a task given that they fit the training data. It scales exponentially with the intrinsic dimensionality of the space over which the model must generalize but only polynomially in resolution per dimension, showing that tasks which require generalizing over many dimensions are drastically more difficult than tasks involving more detail in fewer dimensions. Our measure can be applied to compute and compare supervised learning, reinforcement learning and meta-learning task difficulties against each other. We show that applied empirically, it formally quantifies intuitively expected trends, e.g. that in terms of required inductive bias, MNIST $<$ CIFAR10 $<$ Imagenet and fully observable Markov decision processes (MDPs) $<$ partially observable MDPs. Further, we show that classification of complex images $<$ few-shot meta-learning with simple images. Our measure provides a quantitative metric to guide the construction of more complex tasks requiring greater inductive bias, and thereby encourages the development of more sophisticated architectures and learning algorithms with more powerful generalization capabilities.", "keywords": "generalization;inductive bias;information theory;manifold;complexity", "primary_area": "", "supplementary_material": "", "author": "Akhilan Boopathy;Kevin Liu;Jaedong Hwang;Shu Ge;Asaad Mohammedsaleh;Ila R Fiete", "authorids": "~Akhilan_Boopathy1;~Kevin_Liu3;~Jaedong_Hwang1;~Shu_Ge1;asaadm@mit.edu;~Ila_R_Fiete1", "gender": "M;M;M;F;;F", "homepage": ";;https://jd730.github.io/;;;https://fietelab.mit.edu/", "dblp": "230/8358;;239/1982;;;", "google_scholar": ";;https://scholar.google.co.kr/citations?user=bITgqEUAAAAJ;;;uE-CihIAAAAJ", "orcid": ";;;;;0000-0003-4738-2539", "linkedin": ";kevin-liu888;;ge-shu/;;", "or_profile": "~Akhilan_Boopathy1;~Kevin_Liu3;~Jaedong_Hwang1;~Shu_Ge1;asaadm@mit.edu;~Ila_R_Fiete1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;;mit.edu", "position": "PhD student;Undergrad student;PhD student;Undergrad student;;Professor", "bibtex": "@misc{\nboopathy2023modelagnostic,\ntitle={Model-agnostic Measure of Generalization Difficulty},\nauthor={Akhilan Boopathy and Kevin Liu and Jaedong Hwang and Shu Ge and Asaad Mohammedsaleh and Ila R Fiete},\nyear={2023},\nurl={https://openreview.net/forum?id=4XMAzZasId}\n}", "github": "", "project": "", "reviewers": "Du5K;NEm3;FUck;eajX", "site": "https://openreview.net/forum?id=4XMAzZasId", "pdf_size": 822566, "recommendation": "3;3;3;8", "confidence": "4;3;3;3", "correctness": "3;2;2;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "0;3;2;2", "wc_summary_paper": "76;100;231;140", "wc_strength_and_weaknesses": "359;443;494;401", "wc_clarity_quality_novelty_and_reproducibility": "169;706;56;31", "wc_summary_review": "94;144;21;23", "wc_review": "698;1393;802;595", "wc_reply_reviewers": "177;0;0;0", "wc_reply_authors": "2045;544;844;865", "reply_reviewers": "1;0;0;0", "reply_authors": "4;1;2;2", "recommendation_avg": [ 4.25, 2.165063509461097 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 136.75, 59.022771029493356 ], "wc_strength_and_weaknesses_avg": [ 424.25, 50.03686141236279 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 240.5, 273.7393833557751 ], "wc_summary_review_avg": [ 70.5, 51.62605931116571 ], "wc_review_avg": [ 872.0, 309.57470826926414 ], "wc_reply_reviewers_avg": [ 44.25, 76.64324823492282 ], "wc_reply_authors_avg": [ 1074.5, 574.5261090672903 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3056888251994888924&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "4Xd_aAqNe7h", "title": "FaceMAE: Privacy-Preserving Face Recognition via Masked Autoencoders", "track": "main", "status": "Withdraw", "tldr": "A novel method for privacy-preserving face recognition.", "abstract": "Face recognition, as one of the most successful applications in artificial intelligence, has been widely used in security, administration, advertising, and healthcare. However, the privacy issues of public face datasets have attracted increasing attention in recent years. Previous works simply mask most areas of faces or synthesize samples using generative models to construct privacy-preserving face datasets, which overlooks the trade-off between privacy protection and data utility. In this paper, we propose a novel framework FaceMAE, where the face privacy and recognition performance are considered simultaneously. Firstly, randomly masked face images are used to train the reconstruction module in FaceMAE. We tailor the instance relation matching (IRM) module to minimize the distribution gap between real faces and FaceMAE reconstructed ones. During the deployment phase, we use trained FaceMAE to reconstruct images from masked faces of unseen identities without extra training. The risk of privacy leakage is measured based on face retrieval between reconstructed and original datasets. Experiments prove that the identities of reconstructed images are difficult to be retrieved. We also perform sufficient privacy-preserving face recognition on several public face datasets (i.e. CASIA-WebFace and WebFace260M). Compared to previous state of the arts, FaceMAE consistently \\textbf{reduces at least 50\\% error rate} on LFW, CFP-FP and AgeDB.", "keywords": "FaceMAE: Privacy-Preserving Face Recognition via Masked Autoencoders", "primary_area": "", "supplementary_material": "", "author": "Kai Wang;Bo Zhao;Xiangyu Peng;Jianyang Gu;Jiankang Deng;Zheng Zhu;Xinchao Wang;Yang You", "authorids": "~Kai_Wang8;~Bo_Zhao4;~Xiangyu_Peng2;~Jianyang_Gu1;~Jiankang_Deng1;~Zheng_Zhu1;~Xinchao_Wang1;~Yang_You1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://kaiwang960112.github.io/;;https://github.com/xyupeng;https://vimar-gu.github.io/;https://jiankangdeng.github.io/;http://www.zhengzhu.net/;https://www.comp.nus.edu.sg/~youy/;https://sites.google.com/site/sitexinchaowang/", "dblp": "78/2022-36;;120/1463;241/7332.html;156/7808;29/4319.html/;33/8167-1.html;", "google_scholar": "i2II0XIAAAAJ;R3_AR5EAAAAJ;https://scholar.google.co.za/citations?user=KRUTk7sAAAAJ;8ZXbT18AAAAJ;Z_UoQFsAAAAJ;https://scholar.google.com.hk/citations?user=NmwjI0AAAAAJ;jF4dPZwAAAAJ;https://scholar.google.com.tw/citations?user=w69Buq0AAAAJ", "orcid": "0000-0002-1154-5175;;;;0000-0002-3709-6216;;;", "linkedin": ";;xiangyu-peng-aa10b11a5/;;jiankang-deng-b45b21b4/?originalSubdomain=uk;;yang-you-0b92914b/;", "or_profile": "~Kai_Wang8;~Bo_Zhao4;~Xiangyu_Peng2;~Jianyang_Gu1;~Jiankang_Deng1;~Zheng_Zhu1;~Yang_You1;~Xinchao_WANG3", "aff": "National University of Singapore;BAAI;National University of Singapore;Zhejiang University;;PhiGent Robotics;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;baai.ac.cn;nus.edu;zju.edu.cn;;phigent.ai;nus.edu.sg;nus.edu", "position": "PhD student;Principal Researcher;PhD student;PhD student;;Researcher;Professor;Assistant Professor", "bibtex": "@misc{\nwang2023facemae,\ntitle={Face{MAE}: Privacy-Preserving Face Recognition via Masked Autoencoders},\nauthor={Kai Wang and Bo Zhao and Xiangyu Peng and Jianyang Gu and Jiankang Deng and Zheng Zhu and Xinchao Wang and Yang You},\nyear={2023},\nurl={https://openreview.net/forum?id=4Xd_aAqNe7h}\n}", "github": "", "project": "", "reviewers": "u1Dy;mdU4;66sV;go3T;KPnB", "site": "https://openreview.net/forum?id=4Xd_aAqNe7h", "pdf_size": 1113983, "recommendation": "3;5;5;6;6", "confidence": "4;4;4;4;4", "correctness": "2;3;3;3;3", "technical_novelty": "2;3;2;3;3", "empirical_novelty": "2;3;2;3;2", "wc_summary_paper": "199;61;66;168;79", "wc_strength_and_weaknesses": "140;222;195;130;280", "wc_clarity_quality_novelty_and_reproducibility": "60;25;39;186;12", "wc_summary_review": "10;83;7;166;36", "wc_review": "409;391;307;650;407", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.8, 0.39999999999999997 ], "technical_novelty_avg": [ 2.6, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 114.6, 57.40592303935196 ], "wc_strength_and_weaknesses_avg": [ 193.4, 55.120232220120414 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.4, 62.8477525453377 ], "wc_summary_review_avg": [ 60.4, 59.412456606338026 ], "wc_review_avg": [ 432.8, 114.87454026023347 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9128709291752771, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5568096549648303859&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;2;3;0;0", "aff_unique_norm": "National University of Singapore;Beijing Academy of Artificial Intelligence;Zhejiang University;PhiGent Robotics", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.nus.edu.sg;https://www.baaic.cn;https://www.zju.edu.cn;", "aff_unique_abbr": "NUS;BAAI;ZJU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0;0", "aff_country_unique": "Singapore;China;" }, { "id": "4bCsX2K0KuR", "title": "FiD-Light: Efficient and Effective Retrieval-Augmented Text Generation", "track": "main", "status": "Reject", "tldr": "We increase the efficiency of FID with FiD-Light including a source pointing workflow for effective retrieval augmented generation.", "abstract": "Retrieval-augmented generation models offer many benefits over standalone language models: besides a textual answer to a given query they provide provenance items retrieved from an updateable knowledge base. However, they are also more complex systems and need to handle long inputs. In this work, we introduce FiD-Light to strongly increase the efficiency of the state-of-the-art retrieval-augmented FiD model, while maintaining the same level of effectiveness. Our FiD-Light model constrains the information flow from the encoder (which encodes passages separately) to the decoder (using concatenated encoded representations). Furthermore, we adapt FiD-Light with re-ranking capabilities through textual source pointers, to improve the top-ranked provenance precision. Our experiments on a diverse set of seven knowledge intensive tasks (KILT) show FiD-Light consistently improves the Pareto frontier between query latency and effectiveness. FiD-Light with source pointing sets substantial new state-of-the-art results on six KILT tasks for combined text generation and provenance retrieval evaluation, while maintaining reasonable efficiency.", "keywords": "retrieval augmented generation;KILT;Fusion-in-Decoder;efficiency", "primary_area": "", "supplementary_material": "", "author": "Sebastian Hofst\u00e4tter;Jiecao Chen;Karthik Raman;Hamed Zamani", "authorids": "~Sebastian_Hofst\u00e4tter1;~Jiecao_Chen1;~Karthik_Raman1;~Hamed_Zamani1", "gender": ";M;;M", "homepage": "https://sebastian-hofstaetter.github.io/;;;https://groups.cs.umass.edu/zamani/", "dblp": ";151/6467;01/7071-1;150/5324", "google_scholar": "XrpoFlYAAAAJ;wI1P9y8AAAAJ;x1zTxLoAAAAJ;d2uzDIAAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Sebastian_Hofst\u00e4tter1;~Jiecao_Chen1;~Karthik_Raman1;~Hamed_Zamani1", "aff": ";ByteDance Inc.;Google;Google", "aff_domain": ";bytedance.com;google.com;google.com", "position": ";Researcher;Research Scientist;Visiting Faculty Researcher", "bibtex": "@misc{\nhofst{\\\"a}tter2023fidlight,\ntitle={FiD-Light: Efficient and Effective Retrieval-Augmented Text Generation},\nauthor={Sebastian Hofst{\\\"a}tter and Jiecao Chen and Karthik Raman and Hamed Zamani},\nyear={2023},\nurl={https://openreview.net/forum?id=4bCsX2K0KuR}\n}", "github": "", "project": "", "reviewers": "aESm;erWw;aPe7", "site": "https://openreview.net/forum?id=4bCsX2K0KuR", "pdf_size": 537955, "recommendation": "5;6;6", "confidence": "4;4;4", "correctness": "3;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "48;206;46", "wc_strength_and_weaknesses": "69;243;31", "wc_clarity_quality_novelty_and_reproducibility": "168;70;12", "wc_summary_review": "52;108;173", "wc_review": "337;627;262", "wc_reply_reviewers": "35;0;0", "wc_reply_authors": "831;512;603", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 100.0, 74.95776588630872 ], "wc_strength_and_weaknesses_avg": [ 114.33333333333333, 92.2942155404239 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.33333333333333, 64.38081149604196 ], "wc_summary_review_avg": [ 111.0, 49.44357052910587 ], "wc_review_avg": [ 408.6666666666667, 157.39193823770717 ], "wc_reply_reviewers_avg": [ 11.666666666666666, 16.49915822768611 ], "wc_reply_authors_avg": [ 648.6666666666666, 134.17484447126773 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 75, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14812317885501155152&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1", "aff_unique_norm": "ByteDance;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.bytedance.com;https://www.google.com", "aff_unique_abbr": "ByteDance;Google", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "id": "4bH8SxYNcI", "title": "Towards Adversarially Robust Deepfake Detection: An Ensemble Approach", "track": "main", "status": "Reject", "tldr": "We present - Disjoint Deepfake Detection (D3), an ensemble based technique for deepfake detection and provide theoretical and empirical evidence for it's robustness.", "abstract": "Detecting deepfakes remains an open problem. Current detection methods fail against an adversary who adds imperceptible adversarial perturbations to the deepfake to evade detection. We propose Disjoint Deepfake Detection (D3), a deepfake detector designed to improve adversarial robustness beyond de facto solutions such as adversarial training. D3 uses an ensemble of models over disjoint subsets of the frequency spectrum to significantly improve robustness. Our key insight is to leverage a redundancy in the frequency domain and apply a saliency partitioning technique to disjointly distribute frequency components across multiple models. We formally prove that these disjoint ensembles lead to a reduction in the dimensionality of the input subspace where adversarial deepfakes lie. We then empirically validate the D3 method against white-box attacks and black-box attacks and find that D3 significantly outperforms existing state-of-the-art defenses applied to deepfake detection.", "keywords": "Deepfakes;Ensembles;Adversarial Subspace;Frequency;Defense", "primary_area": "", "supplementary_material": "/attachment/f9d773e9715443c8885ec5b96010f7e9db3b6446.zip", "author": "Ashish Hooda;Neal Mangaokar;Ryan Feng;Kassem Fawaz;Somesh Jha;Atul Prakash", "authorids": "~Ashish_Hooda1;~Neal_Mangaokar1;~Ryan_Feng1;~Kassem_Fawaz1;~Somesh_Jha1;~Atul_Prakash1", "gender": ";;;;M;", "homepage": "https://pages.cs.wisc.edu/~hooda;https://nealmangaokar.com;http://www-personal.umich.edu/~rtfeng/;https://kassemfawaz.com;;https://www.eecs.umich.edu/~aprakash", "dblp": "279/6684;278/0806.html;193/6789;97/535.html;j/SomeshJha;p/AtulPrakash", "google_scholar": "wCzkVGgAAAAJ;k7GbiDIAAAAJ;TIJw4tQAAAAJ;8TINuv4AAAAJ;BaI7l8QAAAAJ;kIkHa2IAAAAJ", "orcid": ";0000-0002-0684-4971;0000-0002-4767-274X;0000-0002-4609-7691;;0000-0002-4907-3687", "linkedin": ";;;kmfawaz/;;atul-prakash-8729a44/", "or_profile": "~Ashish_Hooda1;~Neal_Mangaokar1;~Ryan_Feng1;~Kassem_Fawaz1;~Somesh_Jha1;~Atul_Prakash1", "aff": "Department of Computer Science, University of Wisconsin - Madison;University of Michigan - Ann Arbor;KLA;University of Wisconsin, Madison;Department of Computer Science, University of Wisconsin, Madison;University of Michigan", "aff_domain": "cs.wisc.edu;umich.edu;kla.com;wisc.edu;cs.wisc.edu;umich.edu", "position": "PhD student;PhD student;Intern;Assistant Professor;Full Professor;Professor", "bibtex": "@misc{\nhooda2023towards,\ntitle={Towards Adversarially Robust Deepfake Detection: An Ensemble Approach},\nauthor={Ashish Hooda and Neal Mangaokar and Ryan Feng and Kassem Fawaz and Somesh Jha and Atul Prakash},\nyear={2023},\nurl={https://openreview.net/forum?id=4bH8SxYNcI}\n}", "github": "", "project": "", "reviewers": "9j5U;TsrJ;h9Bn;k4jq", "site": "https://openreview.net/forum?id=4bH8SxYNcI", "pdf_size": 1276875, "recommendation": "5;5;8;8", "confidence": "4;4;3;3", "correctness": "2;2;2;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "90;60;79;47", "wc_strength_and_weaknesses": "658;222;108;85", "wc_clarity_quality_novelty_and_reproducibility": "107;65;118;8", "wc_summary_review": "23;48;37;11", "wc_review": "878;395;342;151", "wc_reply_reviewers": "99;307;75;0", "wc_reply_authors": "1915;3059;499;31", "reply_reviewers": "1;3;2;0", "reply_authors": "5;6;2;1", "recommendation_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 69.0, 16.62828914831589 ], "wc_strength_and_weaknesses_avg": [ 268.25, 230.9246359745967 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 74.5, 43.188540146663904 ], "wc_summary_review_avg": [ 29.75, 13.988834833537782 ], "wc_review_avg": [ 441.5, 267.85490475255443 ], "wc_reply_reviewers_avg": [ 120.25, 113.83623105145391 ], "wc_reply_authors_avg": [ 1376.0, 1193.8555188966545 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 3.5, 2.0615528128088303 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.5773502691896258, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6384796330984351378&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;3;0;1", "aff_unique_norm": "University of Wisconsin-Madison;University of Michigan;KLA Corporation;University of Wisconsin", "aff_unique_dep": "Department of Computer Science;;;", "aff_unique_url": "https://www.wisc.edu;https://www.umich.edu;https://www.kla.com;https://www.wisc.edu", "aff_unique_abbr": "UW-Madison;UM;KLA;UW", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Madison;Ann Arbor;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Exploring perceptual straightness in learned visual representations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10934", "id": "4cOfD2qL6T", "poster": "/media/PosterPDFs/ICLR%202023/10934.png?t=1681661979.9304872", "openreview": "https://openreview.net/forum?id=4cOfD2qL6T", "slides": "https://iclr.cc/virtual/2023/poster/10934", "video": "https://iclr.cc/virtual/2023/poster/10934", "author_site": "Anne Harrington, Vasha DuTell, Ayush Tewari, Mark Hamilton, Simon Stent, Ruth Rosenholtz, William Freeman", "tldr": "", "abstract": "Humans have been shown to use a ''straightened'' encoding to represent the natural visual world as it evolves in time (Henaff et al. 2019). In the context of discrete video sequences, ''straightened'' means that changes between frames follow a more linear path in representation space at progressively deeper levels of processing. While deep convolutional networks are often proposed as models of human visual processing, many do not straighten natural videos. In this paper, we explore the relationship between network architecture, differing types of robustness, biologically-inspired filtering mechanisms, and representational straightness in response to time-varying input; we identify strengths and limitations of straightness as a useful way of evaluating neural network representations. We find that (1) adversarial training leads to straighter representations in both CNN and transformer-based architectures but (2) this effect is task-dependent, not generalizing to tasks such as segmentation and frame-prediction, where straight representations are not favorable for predictions; and nor to other types of robustness. In addition, (3) straighter representations impart temporal stability to class predictions, even for out-of-distribution data. Finally, (4) biologically-inspired elements increase straightness in the early stages of a network, but do not guarantee increased straightness in downstream layers of CNNs. We show that straightness is an easily computed measure of representational robustness and stability, as well as a hallmark of human representations with benefits for computer vision models.", "keywords": "adversarial robustness;deep learning;representation learning;computer vision;neuroscience;human vision", "primary_area": "", "supplementary_material": "/attachment/bd50569969c39f29c207556de802d53fd0a672f7.zip", "author": "Anne Harrington;Vasha DuTell;Ayush Tewari;Mark Hamilton;Simon Stent;Ruth Rosenholtz;William T. Freeman", "authorids": "~Anne_Harrington1;~Vasha_DuTell2;~Ayush_Tewari2;~Mark_Hamilton1;~Simon_Stent1;~Ruth_Rosenholtz1;~William_T._Freeman1", "gender": "F;F;;M;M;F;M", "homepage": ";https://redwood.berkeley.edu/people/vasha-dutell/;https://ayushtewari.com;https://mhamilton.net;;http://persci.mit.edu/people/rosenholtz;https://billf.mit.edu/", "dblp": "29/6192;;198/1021;91/631;146/2461;;86/6650", "google_scholar": "7M9eSFMAAAAJ;tvQjbgYAAAAJ;pDnzpeoAAAAJ;kgZtMGsAAAAJ;f3aij5UAAAAJ;BfE3-m0AAAAJ;https://scholar.google.com.tw/citations?user=0zZnyMEAAAAJ", "orcid": "0009-0000-9441-2687;0000-0001-8625-1350;;;;;", "linkedin": "anne-harrington-4a7a04177;vashadutell/;;;;;", "or_profile": "~Anne_Harrington1;~Vasha_DuTell2;~Ayush_Tewari2;~Mark_Hamilton1;~Simon_Stent1;~Ruth_Rosenholtz1;~William_T._Freeman1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Woven Planet;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;woven-planet.global;mit.edu;mit.edu", "position": "MS student;Postdoc;Postdoc;PhD student;Research Manager;Principal Researcher;Professor", "bibtex": "@inproceedings{\nharrington2023exploring,\ntitle={Exploring perceptual straightness in learned visual representations},\nauthor={Anne Harrington and Vasha DuTell and Ayush Tewari and Mark Hamilton and Simon Stent and Ruth Rosenholtz and William T. Freeman},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4cOfD2qL6T}\n}", "github": "", "project": "", "reviewers": "eEJj;zqwQ;EBgB", "pdf_size": 5567881, "recommendation": "6;6;6", "confidence": "4;2;4", "correctness": "3;3;3", "technical_novelty": "2;3;1", "empirical_novelty": "2;3;3", "wc_summary_paper": "102;114;105", "wc_strength_and_weaknesses": "483;137;84", "wc_clarity_quality_novelty_and_reproducibility": "90;15;175", "wc_summary_review": "185;67;22", "wc_review": "860;333;386", "wc_reply_reviewers": "322;0;0", "wc_reply_authors": "1157;767;938", "reply_reviewers": "1;0;0", "reply_authors": "4;3;3", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 107.0, 5.0990195135927845 ], "wc_strength_and_weaknesses_avg": [ 234.66666666666666, 176.92622442387926 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 93.33333333333333, 65.36223850375859 ], "wc_summary_review_avg": [ 91.33333333333333, 68.7329776906415 ], "wc_review_avg": [ 526.3333333333334, 236.92802470136135 ], "wc_reply_reviewers_avg": [ 107.33333333333333, 151.7922556947122 ], "wc_reply_authors_avg": [ 954.0, 159.6182946908029 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9308034719069361457&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=4cOfD2qL6T", "email": "mit.edu;mit.edu;mit.edu;mit.edu;woven-planet.global;mit.edu;mit.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Woven Planet", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.woven-planet.com", "aff_unique_abbr": "MIT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "United States;Japan" }, { "title": "3D Segmenter: 3D Transformer based Semantic Segmentation via 2D Panoramic Distillation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11824", "id": "4dZeBJ83oxk", "poster": "/media/PosterPDFs/ICLR%202023/11824.png?t=1682340156.729383", "openreview": "https://openreview.net/forum?id=4dZeBJ83oxk", "slides": "https://iclr.cc/virtual/2023/poster/11824", "video": "https://iclr.cc/virtual/2023/poster/11824", "author_site": "ZHENNAN WU, YANG LI, Yifei Huang, Lin Gu, Tatsuya Harada, Hiroyuki Sato", "tldr": "Distill knowledge from 2D strong model to enhance 3D semantic segmentation", "abstract": "Recently, 2D semantic segmentation has witnessed a significant advancement thanks to the huge amount of 2D image datasets available. Therefore, in this work, we propose the first 2D-to-3D knowledge distillation strategy to enhance 3D semantic segmentation model with knowledge embedded in the latent space of powerful 2D models. Specifically, unlike standard knowledge distillation, where teacher and student models take the same data as input, we use 2D panoramas properly aligned with corresponding 3D rooms to train the teacher network and use the learned knowledge from 2D teacher to guide 3D student. To facilitate our research, we create a large-scale, fine-annotated 3D semantic segmentation benchmark, containing voxel-wise semantic labels and aligned panoramas of 5175 scenes. Based on this benchmark, we propose a 3D volumetric semantic segmentation network, which adapts Video Swin Transformer as backbone and introduces a skip connected linear decoder. Achieving a state-of-the-art performance, our 3D Segmenter is computationally efficient and only requires $3.8\\%$ of the parameters compared to the prior art. Our code and data will be released upon acceptance.", "keywords": "3D semantic segmentation;knowledge distillation", "primary_area": "", "supplementary_material": "", "author": "ZHENNAN WU;YANG LI;Yifei Huang;Lin Gu;Tatsuya Harada;Hiroyuki Sato", "authorids": "~ZHENNAN_WU2;~YANG_LI49;~Yifei_Huang2;~Lin_Gu4;~Tatsuya_Harada1;~Hiroyuki_Sato2", "gender": "M;M;;M;M;M", "homepage": ";https://yang-l1.github.io/;;;https://www.mi.t.u-tokyo.ac.jp/harada/;https://www-sato.cnl.t.u-tokyo.ac.jp", "dblp": ";37/4190-193;;;14/5849;", "google_scholar": "rDXxDawAAAAJ;ECzmAC8AAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=ja;vG0c648AAAAJ", "orcid": ";;;;;0000-0002-2891-3835", "linkedin": ";;;;;", "or_profile": "~ZHENNAN_WU2;~YANG_LI49;~Yifei_Huang2;~Lin_Gu4;~Tatsuya_Harada1;~Hiroyuki_Sato2", "aff": "The University of Tokyo;Tencent XR Vision Lab;;RIKEN;The University of Tokyo;University of Tokyo", "aff_domain": "u-tokyo.ac.jp;tencent.com;;riken.jp;u-tokyo.ac.jp;u-tokyo.ac.jp", "position": "PhD student;Researcher;;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwu2023d,\ntitle={3D Segmenter: 3D Transformer based Semantic Segmentation via 2D Panoramic Distillation},\nauthor={ZHENNAN WU and YANG LI and Yifei Huang and Lin Gu and Tatsuya Harada and Hiroyuki Sato},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4dZeBJ83oxk}\n}", "github": "", "project": "", "reviewers": "TA4Q;Vm5M;NDh9;Pu1V", "pdf_size": 4003632, "recommendation": "5;6;6;8", "confidence": "5;4;4;4", "correctness": "3;4;3;4", "technical_novelty": "2;2;2;4", "empirical_novelty": "2;0;0;4", "wc_summary_paper": "38;168;87;80", "wc_strength_and_weaknesses": "234;137;229;78", "wc_clarity_quality_novelty_and_reproducibility": "50;37;64;118", "wc_summary_review": "4;78;62;52", "wc_review": "326;420;442;328", "wc_reply_reviewers": "0;0;53;0", "wc_reply_authors": "416;257;315;501", "reply_reviewers": "0;0;1;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 1.5, 1.6583123951777 ], "wc_summary_paper_avg": [ 93.25, 47.04984059484155 ], "wc_strength_and_weaknesses_avg": [ 169.5, 65.43890280253788 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 67.25, 30.81700017847292 ], "wc_summary_review_avg": [ 49.0, 27.586228448267445 ], "wc_review_avg": [ 379.0, 52.583267300539625 ], "wc_reply_reviewers_avg": [ 13.25, 22.949673200287624 ], "wc_reply_authors_avg": [ 372.25, 93.60922764343267 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.6622661785325219, "corr_recommendation_correctness": 0.6882472016116854, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14533165048366239532&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=4dZeBJ83oxk", "email": "u-tokyo.ac.jp;tencent.com;;riken.jp;u-tokyo.ac.jp;u-tokyo.ac.jp", "author_num": 6, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Tokyo;Tencent;RIKEN", "aff_unique_dep": ";XR Vision Lab;", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.tencent.com;https://www.riken.jp", "aff_unique_abbr": "UTokyo;Tencent;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "Japan;China" }, { "id": "4daKS8wEze5", "title": "ResGrad: Residual Denoising Diffusion Probabilistic Models for Text to Speech", "track": "main", "status": "Reject", "tldr": "", "abstract": "Denoising Diffusion Probabilistic Models (DDPMs) are emerging in text-to-speech (TTS) synthesis because of their strong capability of generating high-fidelity samples. However, their iterative refinement process in high-dimensional data space results in slow inference speed, which restricts their application in real-time systems. Previous works have explored speeding up by minimizing the number of inference steps but at the cost of sample quality. In this work, to improve the inference speed for DDPM-based TTS model while achieving high sample quality, we propose ResGrad, a lightweight diffusion model which learns to refine the output spectrogram of an existing TTS model (e.g., FastSpeech 2) by predicting the residual between the model output and the corresponding ground-truth speech. ResGrad has several advantages: 1) Compare with other acceleration methods for DDPM which need to synthesize speech from scratch, ResGrad reduces the complexity of task by changing the generation target from ground-truth mel-spectrogram to the residual, resulting into a more lightweight model and thus a smaller real-time factor. 2) ResGrad is employed in the inference process of the existing TTS model in a plug-and-play way, without re-training this model. We verify ResGrad on the single-speaker dataset LJSpeech and two more challenging datasets with multiple speakers (LibriTTS) and high sampling rate (VCTK). Experimental results show that in comparison with other speed-up methods of DDPMs: 1) ResGrad achieves better sample quality with the same inference speed measured by real-time factor; 2) with similar speech quality, ResGrad synthesizes speech faster than baseline methods by more than 10 times. Audio samples are available at \\url{https://resgrad1.github.io/}. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Zehua Chen;Yihan Wu;Yichong Leng;Jiawei Chen;Haohe Liu;Xu Tan;Yang Cui;Ke Wang;Lei He;sheng zhao;Jiang Bian;Danilo Mandic", "authorids": "~Zehua_Chen1;~Yihan_Wu2;~Yichong_Leng1;~Jiawei_Chen4;~Haohe_Liu1;~Xu_Tan1;~Yang_Cui1;~Ke_Wang12;~Lei_He6;~sheng_zhao1;~Jiang_Bian1;~Danilo_Mandic1", "gender": "M;F;M;M;M;M;M;M;M;M;;M", "homepage": ";https://wyh2000.github.io/;;https://github.com/Jiawch;https://tan-xu.github.io/;https://www.linkedin.com/in/yang62990360/;;;https://www.aaai.org/ojs/index.php/AAAI/article/view/4642;https://sites.google.com/view/jiangbian;http://www.commsp.ee.ic.ac.uk/~mandic;https://haoheliu.github.io/", "dblp": ";;242/8492;03/1390-8.html;96/10484-3;;;;;09/851-2.html;;272/5570", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;P2K_kOUAAAAJ;https://scholar.google.ae/citations?user=1jwteOQAAAAJ;zo2cd18AAAAJ;tob-U1oAAAAJ;n0eSvvkAAAAJ;https://scholar.google.com/citations?hl=zh-CN;EKl9yY8AAAAJ;689bIIwAAAAJ;pZBEnY8AAAAJ;https://scholar.google.co.uk/citations?user=hcxWZkcAAAAJ;g3O4lJMAAAAJ", "orcid": ";0009-0001-0312-782X;;;0000-0001-5631-0639;;;;;0000-0002-9472-600X;;0000-0003-1036-7888", "linkedin": "zehua-chen-7b1b89156/;;;;;;wangkenpu/;;;jbian/;;haohe-liu-4483a71a4/", "or_profile": "~Zehua_Chen1;~Yihan_Wu2;~Yichong_Leng1;~Jiawei_Chen4;~Xu_Tan1;~Yang_Cui1;~Ke_Wang12;~Lei_He6;~sheng_zhao1;~Jiang_Bian1;~Danilo_Mandic1;~Haohe_Liu2", "aff": "Imperial College London, Imperial College London;Renmin University of China;University of Science and Technology of China;South China University of Technology;Microsoft;;Microsoft;Microsoft;Microsoft;Microsoft;Imperial College London;University of Surrey", "aff_domain": "imperial.ac.uk;ruc.edu.cn;ustc.edu.cn;scut.edu.cn;microsoft.com;;microsoft.com;microsoft.com;microsoft.com;microsoft.com;imperial.ac.uk;surrey.ac.uk", "position": "PhD student;PhD student;PhD student;MS student;Principal Researcher;;Researcher;Principal Scientist Manager;Researcher;Partner Research Manager;Full Professor;PhD student", "bibtex": "@misc{\nchen2023resgrad,\ntitle={ResGrad: Residual Denoising Diffusion Probabilistic Models for Text to Speech},\nauthor={Zehua Chen and Yihan Wu and Yichong Leng and Jiawei Chen and Haohe Liu and Xu Tan and Yang Cui and Ke Wang and Lei He and sheng zhao and Jiang Bian and Danilo Mandic},\nyear={2023},\nurl={https://openreview.net/forum?id=4daKS8wEze5}\n}", "github": "", "project": "", "reviewers": "DApv;U3kA;Zr8k;TPsB", "site": "https://openreview.net/forum?id=4daKS8wEze5", "pdf_size": 1409441, "recommendation": "3;3;5;5", "confidence": "5;5;4;4", "correctness": "3;3;4;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "87;26;62;70", "wc_strength_and_weaknesses": "371;331;418;21", "wc_clarity_quality_novelty_and_reproducibility": "34;74;68;10", "wc_summary_review": "81;209;37;91", "wc_review": "573;640;585;192", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 61.25, 22.26404051379713 ], "wc_strength_and_weaknesses_avg": [ 285.25, 155.6412140147975 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.5, 26.014419078657127 ], "wc_summary_review_avg": [ 104.5, 63.661212680878144 ], "wc_review_avg": [ 497.5, 178.18038612597067 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 12, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14743274435992834293&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3;4;4;4;4;4;0;5", "aff_unique_norm": "Imperial College London;Renmin University of China;University of Science and Technology of China;South China University of Technology;Microsoft;University of Surrey", "aff_unique_dep": ";;;;Microsoft Corporation;", "aff_unique_url": "https://www.imperial.ac.uk;http://www.ruc.edu.cn;http://www.ustc.edu.cn;https://www.scut.edu.cn;https://www.microsoft.com;https://www.surrey.ac.uk", "aff_unique_abbr": "ICL;RUC;USTC;SCUT;Microsoft;Surrey", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;2;2;2;2;2;0;0", "aff_country_unique": "United Kingdom;China;United States" }, { "id": "4dsIu9DOFNB", "title": "Generating Features with Increased Crop-Related Diversity for Few-shot Object Detection", "track": "main", "status": "Withdraw", "tldr": "We transform the latent space such that the latent norm represents a data property, allowing controllable feature generation. ", "abstract": "Two-stage object detectors generate object proposals and classify them to detect objects in images. These proposals often do not perfectly contain the objects but overlap with them in many possible ways, exhibiting great variability induced by different object scales, object positions (w.r.t. the boxes), object parts, and backgrounds. Training a robust classifier against this variability requires abundant training data, which is not available in few-shot settings. To mitigate this issue, we propose a novel variational autoencoder (VAE) based data generation model, which is capable of generating data with increased crop-related variability. The main idea is to transform the latent space such that latent codes with different norms represent different crop-related variations. This allows us to generate features with increased crop-related diversity via simply varying the latent norm. In particular, each latent code is rescaled such that its norm linearly correlates with the IoU score of the input crop w.r.t. the ground-truth box. Here the IoU score is a proxy that represents the crop-related variation. We train this VAE model on base classes conditioned on the semantic code of each class and then use the trained model to generate features for novel classes. Our experimental results show that our generated features consistently improve state-of-the-art few-shot object detection methods on PASCAL VOC and COCO datasets.", "keywords": "Few-shot Object Detection", "primary_area": "", "supplementary_material": "", "author": "Jingyi Xu;Hieu Le;Dimitris Samaras", "authorids": "~Jingyi_Xu2;~Hieu_Le2;~Dimitris_Samaras3", "gender": "F;M;M", "homepage": "https://jingyixu.net;https://hieulem.github.io/;https://www.cs.stonybrook.edu/~samaras/", "dblp": ";130/6199.html;s/DimitrisSamaras", "google_scholar": "PhaWF6kAAAAJ;Bj9g-EEAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-1373-0294", "linkedin": ";;", "or_profile": "~Jingyi_Xu2;~Hieu_Le2;~Dimitris_Samaras3", "aff": "State University of New York at Stony Brook;EPFL - EPF Lausanne;Stony Brook University", "aff_domain": "cs.stonybrook.edu;epfl.ch;cs.stonybrook.edu", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@misc{\nxu2023generating,\ntitle={Generating Features with Increased Crop-Related Diversity for Few-shot Object Detection},\nauthor={Jingyi Xu and Hieu Le and Dimitris Samaras},\nyear={2023},\nurl={https://openreview.net/forum?id=4dsIu9DOFNB}\n}", "github": "", "project": "", "reviewers": "dSii;xcvb;u6Kq;J5Mb", "site": "https://openreview.net/forum?id=4dsIu9DOFNB", "pdf_size": 6552244, "recommendation": "3;5;6;6", "confidence": "5;5;5;4", "correctness": "3;4;3;4", "technical_novelty": "3;2;3;2", "empirical_novelty": "2;0;0;3", "wc_summary_paper": "80;98;115;35", "wc_strength_and_weaknesses": "359;174;189;154", "wc_clarity_quality_novelty_and_reproducibility": "60;24;33;13", "wc_summary_review": "44;7;13;35", "wc_review": "543;303;350;237", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 82.0, 29.824486584013478 ], "wc_strength_and_weaknesses_avg": [ 219.0, 81.77713616898063 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.5, 17.38533865071371 ], "wc_summary_review_avg": [ 24.75, 15.237699957670777 ], "wc_review_avg": [ 358.25, 113.96792311874425 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.4714045207910316, "corr_recommendation_correctness": 0.40824829046386296, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15410918201104323601&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;2", "aff_unique_norm": "State University of New York at Stony Brook;EPFL;Stony Brook University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stonybrook.edu;https://www.epfl.ch;https://www.stonybrook.edu", "aff_unique_abbr": "SUNY Stony Brook;EPFL;SBU", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Stony Brook;Lausanne;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Switzerland" }, { "title": "SketchKnitter: Vectorized Sketch Generation with Diffusion Models", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11832", "id": "4eJ43EN2g6l", "poster": "", "openreview": "https://openreview.net/forum?id=4eJ43EN2g6l", "slides": "https://iclr.cc/virtual/2023/poster/11832", "video": "https://iclr.cc/virtual/2023/poster/11832", "author_site": "wang qiang, Hao Deng, Yonggang Qi, Da Li, Yi-Zhe Song", "tldr": "", "abstract": "We show vectorized sketch generation can be identified as a reversal of the stroke deformation process. This relationship was established by means of a diffusion model that learns data distributions over the stroke-point locations and pen states of real human sketches. Given randomly scattered stroke-points, sketch generation becomes a process of deformation-based denoising, where the generator rectifies positions of stroke points at each timestep to converge at a recognizable sketch. A key innovation was to embed recognizability into the reverse time diffusion process. It was observed that the estimated noise during the reversal process is strongly correlated with sketch classification accuracy. An auxiliary recurrent neural network (RNN) was consequently used to quantify recognizability during data sampling. It follows that, based on the recognizability scores, a sampling shortcut function can also be devised that renders better quality sketches with fewer sampling steps. Finally it is shown that the model can be easily extended to a conditional generation framework, where given incomplete and unfaithful sketches, it yields one that is more visually appealing and with higher recognizability.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Qiang Wang;Haoge Deng;Yonggang Qi;Da Li;Yi-Zhe Song", "authorids": "wanqqiang@bupt.edu.cn;denghaoge@bupt.edu.cn;~Yonggang_Qi2;~Da_Li3;~Yi-Zhe_Song2", "gender": ";;M;M;M", "homepage": ";;https://qugank.github.io/;https://dali-dl.github.io/;http://personal.ee.surrey.ac.uk/Personal/Y.Song/", "dblp": ";;139/7002;43/4804-1;98/1684", "google_scholar": ";;https://scholar.google.com.hk/citations?user=pQNpf7cAAAAJ;RPvaE3oAAAAJ;https://scholar.google.co.uk/citations?user=irZFP_AAAAAJ", "orcid": ";;;0000-0002-2101-2989;", "linkedin": ";;;;", "or_profile": "wanqqiang@bupt.edu.cn;denghaoge@bupt.edu.cn;~Yonggang_Qi2;~Da_Li3;~Yi-Zhe_Song2", "aff": ";;Beijing University of Posts and Telecommunications;University of Edinburgh;University of Surrey", "aff_domain": ";;bupt.edu.cn;ed.ac.uk;surrey.ac.uk", "position": ";;Associate Professor;Visiting Scholar;Professor", "bibtex": "@inproceedings{\nwang2023sketchknitter,\ntitle={SketchKnitter: Vectorized Sketch Generation with Diffusion Models},\nauthor={Qiang Wang and Haoge Deng and Yonggang Qi and Da Li and Yi-Zhe Song},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4eJ43EN2g6l}\n}", "github": "", "project": "", "reviewers": "4TYn;zdyq;ocgD", "pdf_size": 4217718, "recommendation": "6;8;8", "confidence": "4;3;3", "correctness": "4;4;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "68;61;135", "wc_strength_and_weaknesses": "110;165;162", "wc_clarity_quality_novelty_and_reproducibility": "83;34;83", "wc_summary_review": "209;37;54", "wc_review": "470;297;434", "wc_reply_reviewers": "73;0;102", "wc_reply_authors": "359;164;503", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 88.0, 33.35665850571167 ], "wc_strength_and_weaknesses_avg": [ 145.66666666666666, 25.249862485874168 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.66666666666667, 23.098821518760552 ], "wc_summary_review_avg": [ 100.0, 77.38647599333275 ], "wc_review_avg": [ 400.3333333333333, 74.53112697986586 ], "wc_reply_reviewers_avg": [ 58.333333333333336, 42.913349386357105 ], "wc_reply_authors_avg": [ 342.0, 138.91724155050014 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.9999999999999998, "corr_recommendation_correctness": -0.49999999999999983, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8810146938935149748&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=4eJ43EN2g6l", "email": ";;bupt.edu.cn;ed.ac.uk;surrey.ac.uk", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Beijing University of Posts and Telecommunications;University of Edinburgh;University of Surrey", "aff_unique_dep": ";;", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.ed.ac.uk;https://www.surrey.ac.uk", "aff_unique_abbr": "BUPT;Edinburgh;Surrey", "aff_campus_unique_index": "0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United Kingdom" }, { "title": "ACMP: Allen-Cahn Message Passing with Attractive and Repulsive Forces for Graph Neural Networks", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11740", "id": "4fZc_79Lrqs", "poster": "/media/PosterPDFs/ICLR%202023/11740.png?t=1680938409.5063474", "openreview": "https://openreview.net/forum?id=4fZc_79Lrqs", "slides": "https://iclr.cc/virtual/2023/poster/11740", "video": "https://iclr.cc/virtual/2023/poster/11740", "author_site": "Yuelin Wang, Kai Yi, Xinliang Liu, Yuguang Wang, Shi Jin", "tldr": "", "abstract": "Neural message passing is a basic feature extraction unit for graph-structured data considering neighboring node features in network propagation from one layer to the next. We model such process by an interacting particle system with attractive and repulsive forces and the Allen-Cahn force arising in the modeling of phase transition. The dynamics of the system is a reaction-diffusion process which can separate particles without blowing up. This induces an Allen-Cahn message passing (ACMP) for graph neural networks where the numerical iteration for the particle system solution constitutes the message passing propagation. ACMP which has a simple implementation with a neural ODE solver can propel the network depth up to one hundred of layers with theoretically proven strictly positive lower bound of the Dirichlet energy. It thus provides a deep model of GNNs circumventing the common GNN problem of oversmoothing. GNNs with ACMP achieve state of the art performance for real-world node classification tasks on both homophilic and heterophilic datasets. Codes are available at https://github.com/ykiiiiii/ACMP", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yuelin Wang;Kai Yi;Xinliang Liu;Yu Guang Wang;Shi Jin", "authorids": "~Yuelin_Wang2;~Kai_Yi2;~Xinliang_Liu1;~Yu_Guang_Wang1;~Shi_Jin1", "gender": ";M;M;M;", "homepage": "https://ins.sjtu.edu.cn/peoples/wangyuelin;;https://cemse.kaust.edu.sa/scml/people/person/xinliang-liu;https://yuguangwang.github.io/;https://ins.sjtu.edu.cn/people/shijin/#publications", "dblp": ";;67/10364;03/10023-1;", "google_scholar": ";A_YCRFwAAAAJ;9AsSTc4AAAAJ;cMSEByAAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Yuelin_Wang2;~Kai_Yi2;~Xinliang_Liu1;~Yu_Guang_Wang1;~Shi_Jin1", "aff": "Shanghai Jiaotong University;University of New South Wales;King Abdullah University of Science and Technology;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;unsw.edu.au;kaust.edu.sa;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;PhD student;Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023acmp,\ntitle={{ACMP}: Allen-Cahn Message Passing with Attractive and Repulsive Forces for Graph Neural Networks},\nauthor={Yuelin Wang and Kai Yi and Xinliang Liu and Yu Guang Wang and Shi Jin},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4fZc_79Lrqs}\n}", "github": "", "project": "", "reviewers": "boSt;W9W7;A1JY", "pdf_size": 2683095, "recommendation": "6;6;6", "confidence": "4;4;4", "correctness": "3;4;3", "technical_novelty": "3;3;4", "empirical_novelty": "3;3;2", "wc_summary_paper": "100;123;35", "wc_strength_and_weaknesses": "133;275;401", "wc_clarity_quality_novelty_and_reproducibility": "70;36;77", "wc_summary_review": "59;55;64", "wc_review": "362;489;577", "wc_reply_reviewers": "0;0;120", "wc_reply_authors": "655;578;2152", "reply_reviewers": "0;0;1", "reply_authors": "1;1;5", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 86.0, 37.26481808176 ], "wc_strength_and_weaknesses_avg": [ 269.6666666666667, 109.47551730359118 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.0, 17.90716802475106 ], "wc_summary_review_avg": [ 59.333333333333336, 3.6817870057290873 ], "wc_review_avg": [ 476.0, 88.25342297422048 ], "wc_reply_reviewers_avg": [ 40.0, 56.568542494923804 ], "wc_reply_authors_avg": [ 1128.3333333333333, 724.5239049809805 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16596192032470483283&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=4fZc_79Lrqs", "email": "sjtu.edu.cn;unsw.edu.au;kaust.edu.sa;sjtu.edu.cn;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;University of New South Wales;King Abdullah University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.unsw.edu.au;https://www.kast.kau.edu.sa", "aff_unique_abbr": "SJTU;UNSW;KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "China;Australia;Saudi Arabia" }, { "id": "4g7nCbpjNwd", "title": "NormSoftmax: Normalize the Input of Softmax to Accelerate and Stabilize Training", "track": "main", "status": "Reject", "tldr": "", "abstract": "Softmax is a basic function that normalizes a vector to a probability distribution and is widely used in machine learning, most notably in cross-entropy loss function and dot product attention operations. However, optimization of softmax-based models is sensitive to the input statistics change. We observe that the input of softmax changes significantly during the initial training stage, causing slow and unstable convergence when training the model from scratch. To remedy the optimization difficulty of softmax, we propose a simple yet effective substitution, named NormSoftmax, where the input vector is first normalized to unit variance and then fed to the standard softmax function. Similar to other existing normalization layers in machine learning models, NormSoftmax can stabilize and accelerate the training process, and also increase the robustness of the training procedure against hyperparameters. Experiments on Transformer-based models and convolutional neural networks validate that our proposed NormSoftmax is an effective plug-and-play module to stabilize and speed up the optimization of neural networks with cross-entropy loss or dot-product attention operations.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/e548a722e0b650d3db4cb3997255ae01ca6b4741.zip", "author": "Zixuan Jiang;Jiaqi Gu;David Z. Pan", "authorids": "~Zixuan_Jiang1;~Jiaqi_Gu3;~David_Z._Pan1", "gender": "M;M;M", "homepage": ";https://scopex-asu.github.io;http://users.ece.utexas.edu/~dpan/", "dblp": "258/6469;;p/DavidZhigangPan.html", "google_scholar": "8g6Q5PYAAAAJ;FeIV12MAAAAJ;3aLlroEAAAAJ", "orcid": ";;0000-0002-5705-2501", "linkedin": "utzixuanjiang/;;davidzpan/", "or_profile": "~Zixuan_Jiang1;~Jiaqi_Gu3;~David_Z._Pan1", "aff": "University of Texas, Austin;University of Texas, Austin;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;PhD student;Professor", "bibtex": "@misc{\njiang2023normsoftmax,\ntitle={NormSoftmax: Normalize the Input of Softmax to Accelerate and Stabilize Training},\nauthor={Zixuan Jiang and Jiaqi Gu and David Z. Pan},\nyear={2023},\nurl={https://openreview.net/forum?id=4g7nCbpjNwd}\n}", "github": "", "project": "", "reviewers": "7Jaw;syPR;Eoot;ax8S", "site": "https://openreview.net/forum?id=4g7nCbpjNwd", "pdf_size": 1115287, "recommendation": "5;5;5;6", "confidence": "3;4;3;4", "correctness": "2;3;3;4", "technical_novelty": "2;2;2;4", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "45;95;43;74", "wc_strength_and_weaknesses": "338;104;119;289", "wc_clarity_quality_novelty_and_reproducibility": "53;51;55;6", "wc_summary_review": "45;48;30;71", "wc_review": "481;298;247;440", "wc_reply_reviewers": "147;0;125;30", "wc_reply_authors": "455;345;475;230", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 64.25, 21.579793789561567 ], "wc_strength_and_weaknesses_avg": [ 212.5, 102.61213378543495 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.25, 20.40067400847335 ], "wc_summary_review_avg": [ 48.5, 14.67140075112121 ], "wc_review_avg": [ 366.5, 96.80521680157531 ], "wc_reply_reviewers_avg": [ 75.5, 61.91324575565393 ], "wc_reply_authors_avg": [ 376.25, 97.87587802926726 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_X1J4PxfrHAJ:scholar.google.com/&scioq=NormSoftmax:+Normalize+the+Input+of+Softmax+to+Accelerate+and+Stabilize+Training&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "4gUIeq2lyM", "title": "Visual Reinforcement Learning with Self-Supervised 3D Representations", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "A prominent approach to visual Reinforcement Learning (RL) is to learn an internal state representation using self-supervised methods, which has the potential benefit of improved sample-efficiency and generalization through additional learning signal and inductive biases. However, while the real world is inherently 3D, prior efforts have largely been focused on leveraging 2D computer vision techniques as auxiliary self-supervision. In this work, we present a unified framework for self-supervised learning of 3D representations for motor control. Our proposed framework consists of two phases: a \\textit{pretraining} phase where a deep voxel-based 3D autoencoder is pretrained on a large object-centric dataset, and a \\textit{finetuning} phase where the representation is jointly finetuned together with RL on in-domain data. We empirically show that our method enjoys improved sample efficiency in simulated manipulation tasks compared to 2D representation learning methods. Additionally, our learned policies transfer zero-shot to a real robot setup with only approximate geometric correspondence, and successfully solve motor control tasks that involve grasping and lifting from \\textit{a single, uncalibrated RGB camera}. Videos are available at https://3d4rl.github.io/.", "keywords": "Reinforcement Learning;3D Representation Learning", "primary_area": "", "supplementary_material": "", "author": "Yanjie Ze;Nicklas Hansen;Yinbo Chen;Mohit Jain;Xiaolong Wang", "authorids": "~Yanjie_Ze1;~Nicklas_Hansen1;~Yinbo_Chen1;~Mohit_Jain2;~Xiaolong_Wang3", "gender": "M;Non-Binary;;M;M", "homepage": "http://yanjieze.com;https://nicklashansen.github.io;;https://natsu6767.github.io/;https://xiaolonw.github.io/", "dblp": "312/5407;258/0744.html;;;91/952-4", "google_scholar": "BO_b2O8AAAAJ;OFtDgzwAAAAJ;;;Y8O9N_0AAAAJ", "orcid": ";0000-0001-9897-4003;;;", "linkedin": "yanjie-ze-a71a0a247/;ncklas;;;", "or_profile": "~Yanjie_Ze1;~Nicklas_Hansen1;~Yinbo_Chen1;~Mohit_Jain2;~Xiaolong_Wang3", "aff": "Shanghai Jiaotong University;University of California, San Diego;;;University of California, San Diego", "aff_domain": "sjtu.edu.cn;ucsd.edu;;;ucsd.edu", "position": "Undergrad student;PhD student;;;Assistant Professor", "bibtex": "@misc{\nze2023visual,\ntitle={Visual Reinforcement Learning with Self-Supervised 3D Representations},\nauthor={Yanjie Ze and Nicklas Hansen and Yinbo Chen and Mohit Jain and Xiaolong Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=4gUIeq2lyM}\n}", "github": "", "project": "", "reviewers": "ppof;NR8D;1Tnh;PxAD", "site": "https://openreview.net/forum?id=4gUIeq2lyM", "pdf_size": 16347100, "recommendation": "3;3;6;6", "confidence": "5;4;4;3", "correctness": "2;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "3;2;3;2", "wc_summary_paper": "76;44;46;87", "wc_strength_and_weaknesses": "1020;359;164;115", "wc_clarity_quality_novelty_and_reproducibility": "39;61;29;242", "wc_summary_review": "87;16;22;120", "wc_review": "1222;480;261;564", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 63.25, 18.673175948402566 ], "wc_strength_and_weaknesses_avg": [ 414.5, 361.30354274487814 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 92.75, 86.94358803270083 ], "wc_summary_review_avg": [ 61.25, 43.882656027182314 ], "wc_review_avg": [ 631.75, 358.28366903893345 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.7071067811865476, "corr_recommendation_correctness": 0.5773502691896258, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6092696986541093287&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;1", "aff_unique_norm": "Shanghai Jiao Tong University;University of California, San Diego", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.ucsd.edu", "aff_unique_abbr": "SJTU;UCSD", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "On Representing Mixed-Integer Linear Programs by Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10797", "id": "4gc3MGZra1d", "poster": "/media/PosterPDFs/ICLR%202023/10797.png?t=1682909196.3119216", "openreview": "https://openreview.net/forum?id=4gc3MGZra1d", "slides": "https://iclr.cc/virtual/2023/poster/10797", "video": "https://iclr.cc/virtual/2023/poster/10797", "author_site": "Ziang Chen, Jialin Liu, Xinshang Wang, Wotao Yin", "tldr": "", "abstract": "While Mixed-integer linear programming (MILP) is NP-hard in general, practical MILP has received roughly 100--fold speedup in the past twenty years. Still, many classes of MILPs quickly become unsolvable as their sizes increase, motivating researchers to seek new acceleration techniques for MILPs. With deep learning, they have obtained strong empirical results, and many results were obtained by applying graph neural networks (GNNs) to making decisions in various stages of MILP solution processes. This work discovers a fundamental limitation: there exist feasible and infeasible MILPs that all GNNs will, however, treat equally, indicating GNN's lacking power to express general MILPs. Then, we show that, by restricting the MILPs to unfoldable ones or by adding random features, there exist GNNs that can reliably predict MILP feasibility, optimal objective values, and optimal solutions up to prescribed precision. We conducted small-scale numerical experiments to validate our theoretical findings.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Ziang Chen;Jialin Liu;Xinshang Wang;Wotao Yin", "authorids": "~Ziang_Chen1;~Jialin_Liu1;~Xinshang_Wang1;~Wotao_Yin1", "gender": "M;M;;M", "homepage": "https://sites.duke.edu/ziangchen/;https://liujl11git.github.io/;;http://wotaoyin.com", "dblp": ";;196/7073;76/2265", "google_scholar": "odvrFvIAAAAJ;QS6Lj5sAAAAJ;;kpQGGFUAAAAJ", "orcid": "0000-0002-8298-5223;;;0000-0001-6697-9731", "linkedin": ";;;", "or_profile": "~Ziang_Chen1;~Jialin_Liu1;~Xinshang_Wang1;~Wotao_Yin1", "aff": "Duke University;Alibaba Group US;;Alibaba Group US", "aff_domain": "duke.edu;alibaba-inc.com;;alibaba-inc.com", "position": "PhD student;Researcher;;Principal Researcher", "bibtex": "@inproceedings{\nchen2023on,\ntitle={On Representing Mixed-Integer Linear Programs by Graph Neural Networks},\nauthor={Ziang Chen and Jialin Liu and Xinshang Wang and Wotao Yin},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4gc3MGZra1d}\n}", "github": "", "project": "", "reviewers": "ekKz;LV4F;wHAc;mtYU", "pdf_size": 481725, "recommendation": "1;6;6;8", "confidence": "2;4;5;5", "correctness": "3;4;4;4", "technical_novelty": "2;3;4;4", "empirical_novelty": "0;2;3;4", "wc_summary_paper": "109;84;15;117", "wc_strength_and_weaknesses": "108;348;4;189", "wc_clarity_quality_novelty_and_reproducibility": "108;136;4;9", "wc_summary_review": "42;21;772;19", "wc_review": "367;589;795;334", "wc_reply_reviewers": "0;0;789;0", "wc_reply_authors": "120;792;1835;765", "reply_reviewers": "0;0;5;0", "reply_authors": "1;2;7;1", "recommendation_avg": [ 5.25, 2.5860201081971503 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 1.479019945774904 ], "wc_summary_paper_avg": [ 81.25, 40.139600147485275 ], "wc_strength_and_weaknesses_avg": [ 162.25, 125.70277443238872 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.25, 58.61900289155386 ], "wc_summary_review_avg": [ 213.5, 322.5759600466222 ], "wc_review_avg": [ 521.25, 186.00050403157513 ], "wc_reply_reviewers_avg": [ 197.25, 341.647021792961 ], "wc_reply_authors_avg": [ 878.0, 614.5278675536204 ], "reply_reviewers_avg": [ 1.25, 2.165063509461097 ], "reply_authors_avg": [ 2.75, 2.48746859276655 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.9472044455566301, "corr_recommendation_correctness": 0.9488474727161108, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "pdf": "https://openreview.net/pdf?id=4gc3MGZra1d", "email": "duke.edu;alibaba-inc.com;;alibaba-inc.com", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Duke University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.duke.edu;https://www.alibaba.com", "aff_unique_abbr": "Duke;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "4gwZXPNhBt", "title": "Topological Data Analysis-Deep Learning Framework for Predicting Cancer Phenotypes", "track": "main", "status": "Desk Reject", "tldr": "The use of topological data analysis to predict cancer-type phenotypes. ", "abstract": "Classification of patient cancer phenotypes from gene expression profiles remains a challenge in the field of transcriptomics. Gene expression data typically suffers from extreme noise and performs poorly on deep learning models alone. We build on recent work by Mandal et al., by incorporating the concept of differential gene expression analysis to pre-select genes that are necessary but not sufficient for disease association in our topological data analysis approach. The outcome is a reduction in computational cost in the calculation of persistent homology. We also test multiple topological representations to optimise prediction. Deep learning with topological features performs better compared to its use on raw data. Thus, topological features offers a new perspective on the difficult-to-unravel non-linear connection between genotype and phenotype", "keywords": "Topological data analysis;Deep learning;Gene expression;Cancer Phenotype prediction", "primary_area": "", "supplementary_material": "", "author": "Lebohang Mashatola;Ismail Yunus Akhalwaya;Stephanie Muller", "authorids": "~Lebohang_Mashatola1;ismaila@za.ibm.com;~Stephanie_Muller1", "gender": ";;", "homepage": ";;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;0000-0003-3997-4446", "linkedin": ";;stephanie-muller-352b48a6/", "or_profile": "~Lebohang_Mashatola1;ismaila@za.ibm.com;~Stephanie_Muller1", "aff": ";;International Business Machines", "aff_domain": ";;ibm.com", "position": ";;Researcher", "bibtex": "@misc{\nmashatola2023topological,\ntitle={Topological Data Analysis-Deep Learning Framework for Predicting Cancer Phenotypes},\nauthor={Lebohang Mashatola and Ismail Yunus Akhalwaya and Stephanie Muller},\nyear={2023},\nurl={https://openreview.net/forum?id=4gwZXPNhBt}\n}", "github": "", "project": "", "reviewers": "mwgb;QVBt;7K87;PGWw", "site": "https://openreview.net/forum?id=4gwZXPNhBt", "pdf_size": 363761, "recommendation": "1;3;5;5", "confidence": "5;3;5;4", "correctness": "2;2;3;3", "technical_novelty": "1;1;2;2", "empirical_novelty": "1;1;2;0", "wc_summary_paper": "60;35;45;134", "wc_strength_and_weaknesses": "150;81;366;195", "wc_clarity_quality_novelty_and_reproducibility": "55;81;212;62", "wc_summary_review": "26;28;25;39", "wc_review": "291;225;648;430", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 1.6583123951777 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 1.5, 0.5 ], "empirical_novelty_avg": [ 1.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 68.5, 38.84906691286162 ], "wc_strength_and_weaknesses_avg": [ 198.0, 105.1498930099313 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 102.5, 63.93160407810835 ], "wc_summary_review_avg": [ 29.5, 5.5901699437494745 ], "wc_review_avg": [ 398.5, 161.94211928957827 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.0909090909090909, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17482031539170739747&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "International Business Machines Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.ibm.com", "aff_unique_abbr": "IBM", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "4hhtHQLGDQO", "title": "Automatic Data Augmentation via Invariance-Constrained Learning", "track": "main", "status": "Reject", "tldr": "Imposing Invariance Constraints to models enables learning Data augmentation policies that can improve generalization.", "abstract": "Underlying data structures, such as symmetries or invariances to transformations, are often exploited to improve the solution of learning tasks. However, embedding these properties in models or learning algorithms can be challenging and computationally intensive. Data augmentation, on the other hand, induces these symmetries during training by applying multiple transformations to the input data. Despite its ubiquity, its effectiveness depends on the choices of which transformations to apply, when to do so, and how often. In fact, there is both empirical and theoretical evidence that the indiscriminate use of data augmentation can introduce biases that outweigh its benefits. This work tackles these issues by automatically adapting the data augmentation while solving the learning task. To do so, it formulates data augmentation as an invariance-constrained learning problem and leverages Monte Carlo Markov Chain (MCMC) sampling to solve it. The result is a practical algorithm that not only does away with a priori searches for augmentation distributions, but also dynamically controls if and when data augmentation is applied. Our experiments illustrate the performance of this method, which achieves state-of-the-art results in automatic data augmentation benchmarks for CIFAR datasets. Furthermore, this approach can be used to gather insights on the actual symmetries underlying a learning task.", "keywords": "Automatic data augmentation;Invariance;Constrained Learning;Image classification", "primary_area": "", "supplementary_material": "/attachment/920da04d6fcdea56fa4d2921bb56064d50d44435.zip", "author": "Ignacio Hounie;Luiz F. O. Chamon;Alejandro Ribeiro", "authorids": "~Ignacio_Hounie1;~Luiz_F._O._Chamon1;~Alejandro_Ribeiro1", "gender": ";M;M", "homepage": ";https://www.luizchamon.com;https://alelab.seas.upenn.edu", "dblp": ";120/6982;32/15", "google_scholar": "V0h3OSYAAAAJ;https://scholar.google.ca/citations?user=FIm-l-sAAAAJ;7mrPM4kAAAAJ", "orcid": ";0000-0001-7731-6650;0000-0003-4230-9906", "linkedin": ";luiz-chamon-abb07a18;", "or_profile": "~Ignacio_Hounie1;~Luiz_F._O._Chamon1;~Alejandro_Ribeiro1", "aff": "University of Pennsylvania;Universit\u00e4t Stuttgart;University of Pennsylvania", "aff_domain": "upenn.edu;uni-stuttgart.de;upenn.edu", "position": "PhD student;Principal Researcher;Full Professor", "bibtex": "@misc{\nhounie2023automatic,\ntitle={Automatic Data Augmentation via Invariance-Constrained Learning},\nauthor={Ignacio Hounie and Luiz F. O. Chamon and Alejandro Ribeiro},\nyear={2023},\nurl={https://openreview.net/forum?id=4hhtHQLGDQO}\n}", "github": "", "project": "", "reviewers": "159N;wrTV;8185;E3e9", "site": "https://openreview.net/forum?id=4hhtHQLGDQO", "pdf_size": 948599, "recommendation": "3;5;6;6", "confidence": "3;5;3;3", "correctness": "3;1;4;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "1;1;3;2", "wc_summary_paper": "47;56;61;45", "wc_strength_and_weaknesses": "73;507;224;449", "wc_clarity_quality_novelty_and_reproducibility": "86;32;29;67", "wc_summary_review": "41;52;57;31", "wc_review": "247;647;371;592", "wc_reply_reviewers": "0;342;9;244", "wc_reply_authors": "362;2382;429;1130", "reply_reviewers": "0;1;1;1", "reply_authors": "1;5;1;3", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.75, 1.0897247358851685 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 52.25, 6.53356717268599 ], "wc_strength_and_weaknesses_avg": [ 313.25, 174.39377139106776 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 53.5, 23.984369910422913 ], "wc_summary_review_avg": [ 45.25, 10.059199769365355 ], "wc_review_avg": [ 464.25, 162.48903809180482 ], "wc_reply_reviewers_avg": [ 148.75, 148.38695191963475 ], "wc_reply_authors_avg": [ 1075.75, 811.9354577181613 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.18731716231633877, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15461090531297469189&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Pennsylvania;University of Stuttgart", "aff_unique_dep": ";", "aff_unique_url": "https://www.upenn.edu;https://www.uni-stuttgart.de", "aff_unique_abbr": "UPenn;Uni Stuttgart", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Germany" }, { "id": "4hsI9zyNSfw", "title": "Online Learning for Obstacle Avoidance", "track": "main", "status": "Reject", "tldr": "Regret bounds for online learning obstacle avoidance policies", "abstract": "We approach the fundamental problem of obstacle avoidance for robotic systems via the lens of online learning. In contrast to prior work that either assumes worst-case realization of uncertainty in the environment or a given stochastic model of uncertainty, we propose a method that is efficient to implement and provably grants instance-optimality to perturbations of trajectories generated from an open-loop planner in the sense of minimizing worst-case regret. The resulting policy thus adapts online to realizations of uncertainty and provably compares well with the best obstacle avoidance policy in hindsight from a rich class of policies. The method is validated in simulation on a dynamical system environment and compared to baseline open-loop planning and robust Hamilton-Jacobi reachability techniques.", "keywords": "obstacle avoidance;online optimization;regret minimization", "primary_area": "", "supplementary_material": "/attachment/59249a58457c32f52d6bf549eab3ef15e915c635.zip", "author": "David Snyder;Wenhan Xia;Daniel Suo;Anirudha Majumdar;Elad Hazan", "authorids": "~David_Snyder2;~Wenhan_Xia1;~Daniel_Suo1;~Anirudha_Majumdar1;~Elad_Hazan1", "gender": "M;F;M;M;M", "homepage": "https://irom-lab.princeton.edu/;https://wenhanlunaxia.github.io/;https://danielsuo.com;https://irom-lab.princeton.edu/majumdar/;https://www.ehazan.com", "dblp": ";;;116/6436;72/739", "google_scholar": ";;;ibu3FwsAAAAJ;LnhCGNMAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~David_Snyder2;~Wenhan_Xia1;~Daniel_Suo1;~Anirudha_Majumdar1;~Elad_Hazan1", "aff": "Princeton University;Princeton University;;Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;;princeton.edu;princeton.edu", "position": "PhD student;PhD student;;Associate Professor;Full Professor", "bibtex": "@misc{\nsnyder2023online,\ntitle={Online Learning for Obstacle Avoidance},\nauthor={David Snyder and Wenhan Xia and Daniel Suo and Anirudha Majumdar and Elad Hazan},\nyear={2023},\nurl={https://openreview.net/forum?id=4hsI9zyNSfw}\n}", "github": "", "project": "", "reviewers": "YMGQ;txHg;vUNR;GFpG;Mcwo", "site": "https://openreview.net/forum?id=4hsI9zyNSfw", "pdf_size": 861428, "recommendation": "1;3;3;6;6", "confidence": "4;2;2;3;2", "correctness": "2;3;3;3;3", "technical_novelty": "2;2;3;2;3", "empirical_novelty": "1;2;3;2;3", "wc_summary_paper": "109;46;81;127;83", "wc_strength_and_weaknesses": "143;467;142;422;276", "wc_clarity_quality_novelty_and_reproducibility": "359;32;72;77;133", "wc_summary_review": "89;181;68;72;42", "wc_review": "700;726;363;698;534", "wc_reply_reviewers": "0;189;282;0;0", "wc_reply_authors": "616;817;256;725;602", "reply_reviewers": "0;1;1;0;0", "reply_authors": "2;3;2;3;2", "recommendation_avg": [ 3.8, 1.9390719429665315 ], "confidence_avg": [ 2.6, 0.8 ], "correctness_avg": [ 2.8, 0.39999999999999997 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_paper_avg": [ 89.2, 27.541967976163214 ], "wc_strength_and_weaknesses_avg": [ 290.0, 135.98676406180127 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 134.6, 116.72291977156843 ], "wc_summary_review_avg": [ 90.4, 47.73510238807496 ], "wc_review_avg": [ 604.2, 138.50400716224783 ], "wc_reply_reviewers_avg": [ 94.2, 119.06032084619964 ], "wc_reply_authors_avg": [ 603.2, 190.39894957693437 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.43835402965998715, "corr_recommendation_correctness": 0.7219948723811555, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10988661629297992940&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "4inSu6mXdZk", "title": "Multiscale Neural Operator: Learning Fast and Grid-independent PDE Solvers", "track": "main", "status": "Reject", "tldr": "We are the first to embed grid-independent neural operators as closure model or parametrization in physical simulations -- in doing so we created a fast and accurate surrogate of multiscale PDEs.", "abstract": "Numerical simulations in climate, chemistry, or astrophysics are computationally too expensive for uncertainty quantification or parameter-exploration at high-resolution. Reduced-order or surrogate models are multiple orders of magnitude faster, but traditional surrogates are inflexible or inaccurate and pure machine learning (ML)-based surrogates too data-hungry. We propose a hybrid, flexible surrogate model that exploits known physics for simulating large-scale dynamics and limits learning to the hard-to-model term, which is called parametrization or closure and captures the effect of fine- onto large-scale dynamics. Leveraging neural operators, we are the first to learn grid-independent, non-local, and flexible parametrizations. Our \\textit{multiscale neural operator} is motivated by a rich literature in multiscale modeling, has quasilinear runtime complexity, is more accurate or flexible than state-of-the-art parametrizations and demonstrated on the chaotic equation multiscale Lorenz96.", "keywords": "physics-informed machine learning;pinns;scientific machine learning;neural ODEs;neural operators;machine learning;neural networks;Matryoshka;multiphysics;multiscale;parametrizations;closure;subgrid;superstructures;partial differential equations;PDEs;differential equations;numerical solvers;physics;hpc;surrogate;reduced order modeling;model reduction;uncertainty quantification;climate;fluid dynamics;physics;computational physics", "primary_area": "", "supplementary_material": "", "author": "Bj\u00f6rn L\u00fctjens;Catherine H. Crawford;Campbell D Watson;Christopher Hill;Dava Newman", "authorids": "~Bj\u00f6rn_L\u00fctjens1;catcraw@us.ibm.com;~Campbell_D_Watson1;~Christopher_Hill1;~Dava_Newman1", "gender": "M;;M;M;F", "homepage": "https://blutjens.github.io/;;https://research.ibm.com/people/campbell-watson;https://mitgcm.org;https://davanewman.com", "dblp": ";;;;", "google_scholar": "AayqHVcAAAAJ;;SAukGWAAAAAJ;;", "orcid": "0000-0002-1616-4830;;0000-0003-3029-9069;0000-0003-3417-9056;0000-0001-6190-348X", "linkedin": "bjorn-lutjens/;;campbell-watson-819101100/;;", "or_profile": "~Bj\u00f6rn_L\u00fctjens1;catcraw@us.ibm.com;~Campbell_D_Watson1;~Christopher_Hill1;~Dava_Newman1", "aff": "Massachusetts Institute of Technology;;International Business Machines;;Massachusetts Institute of Technology", "aff_domain": "mit.edu;;ibm.com;;mit.edu", "position": "PhD student;;Research Scientist;;Full Professor", "bibtex": "@misc{\nl{\\\"u}tjens2023multiscale,\ntitle={Multiscale Neural Operator: Learning Fast and Grid-independent {PDE} Solvers},\nauthor={Bj{\\\"o}rn L{\\\"u}tjens and Catherine H. Crawford and Campbell D Watson and Christopher Hill and Dava Newman},\nyear={2023},\nurl={https://openreview.net/forum?id=4inSu6mXdZk}\n}", "github": "", "project": "", "reviewers": "HHZo;rbkc;PmjN;ALWF", "site": "https://openreview.net/forum?id=4inSu6mXdZk", "pdf_size": 2729990, "recommendation": "3;3;5;6", "confidence": "5;4;4;2", "correctness": "2;3;4;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "40;128;34;87", "wc_strength_and_weaknesses": "353;652;150;124", "wc_clarity_quality_novelty_and_reproducibility": "29;203;32;621", "wc_summary_review": "31;117;24;69", "wc_review": "453;1100;240;901", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 72.25, 38.17312536327095 ], "wc_strength_and_weaknesses_avg": [ 319.75, 211.3224727756137 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 221.25, 241.3031029638865 ], "wc_summary_review_avg": [ 60.25, 36.96873679205174 ], "wc_review_avg": [ 673.5, 342.8560193433973 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8388704928078611, "corr_recommendation_correctness": 0.5443310539518174, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15932486493161146158&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.ibm.com", "aff_unique_abbr": "MIT;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "4j7TG4gD_RM", "title": "Trusted Aggregation (TAG): Model Filtering Backdoor Defense In Federated Learning", "track": "main", "status": "Reject", "tldr": "TAG is a novel defense against Backdoor Attacks in Federated Learning", "abstract": "Federated Learning is a framework for training machine learning models from multiple local data sets without access to the data in aggregate. A shared model is jointly learned through an interactive process between server and clients that combines locally learned model gradients or weights. However, the lack of data transparency naturally raises concerns about model security. Recently, several state-of-the-art backdoor attacks have been proposed, which achieve high attack success rates while simultaneously being difficult to detect, leading to compromised federated learning models. In this paper, motivated by differences in the output layer distribution between models trained with and without the presence of backdoor attacks, we propose a defense method that can prevent backdoor attacks from influencing the model while maintaining the accuracy of the original classification task. TAG leverages a small validation data set to estimate the largest change that a benign user's local training can make to the output layer of the shared model, which can be used as a cutoff for returning user models. Experimental results on multiple data sets show that TAG defends against backdoor attacks even when 40\\% of the user submissions to update the shared model are malicious.", "keywords": "federated learning;backdoor attack;robust aggregation", "primary_area": "", "supplementary_material": "/attachment/5b7b0f49fe362952960301dcbd50c39fb6af82ec.zip", "author": "Joseph Lavond;Minhao Cheng;Yao Li", "authorids": "jlavond@email.unc.edu;~Minhao_Cheng1;~Yao_Li1", "gender": ";M;F", "homepage": ";https://cmhcbb.github.io/;https://liyao880.github.io/yaoli/", "dblp": ";174/1717;", "google_scholar": ";_LkC1yoAAAAJ;bQ6YhCwAAAAJ", "orcid": ";0000-0003-3965-4215;0000-0002-7195-5774", "linkedin": ";;yao-li-b189574a/", "or_profile": "jlavond@email.unc.edu;~Minhao_Cheng1;~Yao_Li1", "aff": ";Hong Kong University of Science and Technology;University of North Carolina, Chapel Hill", "aff_domain": ";ust.hk;unc.edu", "position": ";Assistant Professor;Assistant Professor", "bibtex": "@misc{\nlavond2023trusted,\ntitle={Trusted Aggregation ({TAG}): Model Filtering Backdoor Defense In Federated Learning},\nauthor={Joseph Lavond and Minhao Cheng and Yao Li},\nyear={2023},\nurl={https://openreview.net/forum?id=4j7TG4gD_RM}\n}", "github": "", "project": "", "reviewers": "JmoH;zmnT;qsGk;SvhM", "site": "https://openreview.net/forum?id=4j7TG4gD_RM", "pdf_size": 2803365, "recommendation": "3;3;5;5", "confidence": "4;4;3;4", "correctness": "2;3;2;3", "technical_novelty": "2;1;3;3", "empirical_novelty": "2;1;3;3", "wc_summary_paper": "59;63;36;85", "wc_strength_and_weaknesses": "63;17;316;301", "wc_clarity_quality_novelty_and_reproducibility": "337;149;22;90", "wc_summary_review": "33;13;43;198", "wc_review": "492;242;417;674", "wc_reply_reviewers": "0;0;0;79", "wc_reply_authors": "337;376;521;331", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 60.75, 17.383541066192468 ], "wc_strength_and_weaknesses_avg": [ 174.25, 135.33546283217862 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 149.5, 117.21028111902129 ], "wc_summary_review_avg": [ 71.75, 73.68641326594748 ], "wc_review_avg": [ 456.25, 155.0296342639045 ], "wc_reply_reviewers_avg": [ 19.75, 34.208003449485325 ], "wc_reply_authors_avg": [ 391.25, 76.87774385347166 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2205484184328353728&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Hong Kong University of Science and Technology;University of North Carolina", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;https://www.unc.edu", "aff_unique_abbr": "HKUST;UNC", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Hong Kong SAR;Chapel Hill", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "id": "4k95LUAcqi", "title": "Assessing Neural Network Robustness via Adversarial Pivotal Tuning of Real Images", "track": "main", "status": "Withdraw", "tldr": "Utilizing StyleGAN's full capacity to manipulate images semantically so as to fool image classifiers through a process called Adversarial Pivotal Tuning.", "abstract": "The ability to assess the robustness of image classifiers to a diverse set of manipulations is essential to their deployment in the real world. Recently, semantic manipulations of real images have been considered for this purpose, as they may not arise using standard adversarial settings. However, such semantic manipulations are often limited to style, color or attribute changes. While expressive, these manipulations do not consider the full capacity of a pretrained generator to affect adversarial image manipulations. In this work, we aim at leveraging the full capacity of a pretrained image generator to generate highly detailed, diverse and photorealistic image manipulations. Inspired by recent GAN-based image inversion methods, we propose a method called Adversarial Pivotal Tuning (APT). APT first finds a pivot latent space input to a pretrained generator that best reconstructs an input image. It then adjusts the weights of the generator to create small, but semantic, manipulations which fool a pretrained classifier. Crucially, APT changes both the input and the weights of the pretrained generator, while preserving its expressive latent editing capability, thus allowing the use of its full capacity in creating semantic adversarial manipulations. We demonstrate that APT generates a variety of semantic image manipulations, which preserve the input image class, but which fool a variety of pretrained classifiers. We further demonstrate that classifiers trained to be robust to other robustness benchmarks, are not robust to our generated manipulations and propose an approach to improve the robustness towards our generated manipulations.", "keywords": "Robustness;Adversarial Examples;StyleGAN;Generative Models", "primary_area": "", "supplementary_material": "", "author": "Peter Ebert Christensen;V\u00e9steinn Sn\u00e6bjarnarson;Andrea Dittadi;Serge Belongie;Sagie Benaim", "authorids": "~Peter_Ebert_Christensen1;~V\u00e9steinn_Sn\u00e6bjarnarson1;~Andrea_Dittadi1;~Serge_Belongie1;~Sagie_Benaim1", "gender": "M;M;M;M;M", "homepage": "https://captaine.github.io/;https://vesteinn.is;https://addtt.github.io;https://di.ku.dk/english/staff/?pure=en%2Fpersons%2Fserge-belongie(0ce65383-3761-4b17-948a-83b461e371e2)%2Fpublications.html;https://sagiebenaim.github.io/", "dblp": "256/5458;273/5233;;http://dblp.uni-trier.de/pers/hd/b/Belongie:Serge_J=;129/1316", "google_scholar": "6MejXVYAAAAJ;cp283P4AAAAJ;PrvuuaAAAAAJ;ORr4XJYAAAAJ;-zSM2I8AAAAJ", "orcid": ";0000-0001-9995-6181;;0000-0002-0388-5217;0000-0003-0002-3467", "linkedin": ";v%C3%A9steinn-sn%C3%A6bjarnarson-781b82a6/;;sergebelongie;sagie-benaim-aab47474/", "or_profile": "~Peter_Ebert_Christensen1;~V\u00e9steinn_Sn\u00e6bjarnarson1;~Andrea_Dittadi1;~Serge_Belongie1;~Sagie_Benaim1", "aff": "University of Copenhagen;University of Copenhagen;KTH Royal Institute of Technology;University of Copenhagen;University of Copenhagen", "aff_domain": "diku.dk;ku.dk;kth.se;ku.dk;di.ku", "position": "PhD student;PhD student;Postdoc;Full Professor;Postdoc", "bibtex": "@misc{\nchristensen2023assessing,\ntitle={Assessing Neural Network Robustness via Adversarial Pivotal Tuning of Real Images},\nauthor={Peter Ebert Christensen and V{\\'e}steinn Sn{\\ae}bjarnarson and Andrea Dittadi and Serge Belongie and Sagie Benaim},\nyear={2023},\nurl={https://openreview.net/forum?id=4k95LUAcqi}\n}", "github": "", "project": "", "reviewers": "69ng;5ugw;9cmV", "site": "https://openreview.net/forum?id=4k95LUAcqi", "pdf_size": 10882782, "recommendation": "5;5;5", "confidence": "4;3;3", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;3;2", "wc_summary_paper": "140;101;115", "wc_strength_and_weaknesses": "241;265;159", "wc_clarity_quality_novelty_and_reproducibility": "90;99;8", "wc_summary_review": "67;109;45", "wc_review": "538;574;327", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "305;402;256", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 118.66666666666667, 16.131404843417148 ], "wc_strength_and_weaknesses_avg": [ 221.66666666666666, 45.38232646698002 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.66666666666667, 40.94169295745136 ], "wc_summary_review_avg": [ 73.66666666666667, 26.549743668986505 ], "wc_review_avg": [ 479.6666666666667, 108.9474899002063 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 321.0, 60.668498140852854 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4DrUOihsPZUJ:scholar.google.com/&scioq=Assessing+Neural+Network+Robustness+via+Adversarial+Pivotal+Tuning+of+Real+Images&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Copenhagen;KTH Royal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ku.dk;https://www.kth.se", "aff_unique_abbr": "UCPH;KTH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Denmark;Sweden" }, { "id": "4lGL_ruf--t", "title": "OhMG: Zero-shot Open-vocabulary Human Motion Generation", "track": "main", "status": "Withdraw", "tldr": "We propose a zero-shot open-vocabulary human motion generation framework, with guidance from the large foundation model (i.e., CLIP)", "abstract": "Generating motion in line with text has attracted increasing attention nowadays. However, open-vocabulary human motion generation still remains touchless and undergoes the lack of diverse labeled data. The good news is that, recent studies of large foundation models (e.g., CLIP) have demonstrated superior performance on few/zero-shot image-text alignment, largely reducing the need for manually labeled data. In this paper, we take the advantage of CLIP for open-vocabulary 3D human motion generation in a zero-shot manner. Specifically, our model is composed of two stages, i.e., text2pose and pose2motion generations. For text2pose generation, to address the difficulty of optimization with direct supervision from CLIP, we propose to carve the versatile CLIP model into a slimmer but more specific model for aligning 3D poses and texts, via a novel pipeline distillation strategy. Optimizing with the distilled 3D pose-text model, we manage to concretize the text-pose knowledge of CLIP into a text2pose generator effectively and efficiently. As for pose2motion, drawing the inspiration of the advanced language model, we pretrain a transformer-based motion model, which makes up for the lack of motion dynamics of CLIP. After that, by formulating the condition poses as prompts, the motion generator can generate motions referring to the condition poses in a controllable and flexible manner.", "keywords": "foundation model;contrastive language-image pretraining;human motion generation;zero-shot;open-vocabulary", "primary_area": "", "supplementary_material": "", "author": "Junfan Lin;Jianlong Chang;Lingbo Liu;Guanbin Li;Liang Lin;Qi Tian;Chang Wen Chen", "authorids": "~Junfan_Lin1;~Jianlong_Chang2;~Lingbo_Liu1;~Guanbin_Li2;~Liang_Lin1;~Qi_Tian3;~Chang_Wen_Chen1", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/junfanlin;https://jianlongchange.github.io/;http://lingboliu.com/;http://guanbinli.com;http://www.linliang.net;https://www.qitian1987.com/index.html;https://chenlab.comp.polyu.edu.hk/", "dblp": "260/6800;92/2332;20/5299;126/4457;;78/1467-1.html;29/4638", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;RDwnNsQAAAAJ;sh2DmQgAAAAJ;2A2Bx2UAAAAJ;https://scholar.google.com.hk/citations?user=Nav8m8gAAAAJ;https://scholar.google.com/citations?hl=en;w2HXPUUAAAAJ", "orcid": ";;0000-0001-8179-6685;0000-0002-2486-2890;;0000-0002-7252-5047;0000-0002-6720-234X", "linkedin": ";;;;;;chang-wen-chen-7b72095/", "or_profile": "~Junfan_Lin1;~Jianlong_Chang2;~Lingbo_Liu1;~Guanbin_Li2;~Liang_Lin1;~Qi_Tian3;~Chang_Wen_Chen1", "aff": "SUN YAT-SEN UNIVERSITY;Huawei Technologies Ltd.;Hong Kong Polytechnic University;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;Huawei Technologies Ltd.;Hong Kong Polytechnic University", "aff_domain": "sysu.edu.cn;huawei.com;polyu.edu.hk;sysu.edu.cn;sysu.edu.cn;huawei.com;polyu.edu.hk", "position": "PhD student;Principal Researcher;Researcher;Associate Professor;Full Professor;Principal Researcher;Full Professor", "bibtex": "@misc{\nlin2023ohmg,\ntitle={Oh{MG}: Zero-shot Open-vocabulary Human Motion Generation},\nauthor={Junfan Lin and Jianlong Chang and Lingbo Liu and Guanbin Li and Liang Lin and Qi Tian and Chang Wen Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=4lGL_ruf--t}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=4lGL_ruf--t", "pdf_size": 2177926, "recommendation": "", "confidence": "", "correctness": "", "technical_novelty": "", "empirical_novelty": "", "wc_summary_paper": "", "wc_strength_and_weaknesses": "", "wc_clarity_quality_novelty_and_reproducibility": "", "wc_summary_review": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "recommendation_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "correctness_avg": [ 0, 0 ], "technical_novelty_avg": [ 0, 0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 0, 0 ], "wc_strength_and_weaknesses_avg": [ 0, 0 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 0, 0 ], "wc_summary_review_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 1, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0, "corr_recommendation_correctness": 0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9367748915966533236&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;0;1;2", "aff_unique_norm": "Sun Yat-sen University;Huawei;Hong Kong Polytechnic University", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "http://www.sysu.edu.cn;https://www.huawei.com;https://www.polyu.edu.hk", "aff_unique_abbr": "SYSU;Huawei;PolyU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "4lw-X9jRi1c", "title": "UniS-MMC: Learning Unimodality-supervised Multimodal Contrastive Representations", "track": "main", "status": "Withdraw", "tldr": "This paper proposes a novel multi-task-based multimodal contrastive method for multimodal representation learning (multimodal classification task).", "abstract": "Multimodal learning aims to imitate human beings to acquire complementary information from multiple modalities for final decisions. \nHowever, just like a human's final decision can be confused by specific erroneous information from the environment, current multimodal learning methods also suffer from uncertain unimodal prediction when learning multimodal representations. In this work, we propose to contrastively explore reliable representations and increase the agreement among the unimodal representations that alone make potentially correct predictions.\nSpecifically, we first capture task-related representations by directly sharing representations between unimodal and multimodal learning tasks. With the unimodal representations and predictions from the multitask-based framework, we then propose a novel multimodal contrastive learning method to align the representations towards the relatively more reliable modality under the weak supervision of the unimodal predictions.\nExperimental results on two image-text benchmarks UPMC-Food-101 and N24News, and two medical benchmarks ROSMAP and BRCA, show that our proposed Unimodality-supervised Multimodal Contrastive (UniS-MMC) learning method outperforms current state-of-the-art multimodal learning methods. The detailed ablation studies further demonstrate the advantage of our proposed method.", "keywords": "multimodal learning;contrastive learning;multi-task learning", "primary_area": "", "supplementary_material": "/attachment/2fde8bc0d646236841f097618bea43c983b7546b.zip", "author": "Heqing Zou;Meng Shen;Chen Chen;Yuchen Hu;Deepu Rajan;EngSiong Chng", "authorids": "~Heqing_Zou1;~Meng_Shen1;chen1436@e.ntu.edu.sg;yuchen005@e.ntu.edu.sg;~Deepu_Rajan1;~EngSiong_Chng1", "gender": "M;M;;;;M", "homepage": ";https://mengshen0709.github.io/;;;;https://personal.ntu.edu.sg/aseschng/intro1.html", "dblp": "277/6282;03/8774-2;;;;c/ChngEngSiong", "google_scholar": "https://scholar.google.com.sg/citations?hl=en;https://scholar.google.com/citations?hl=en;;;;https://scholar.google.com.tw/citations?user=FJodrCcAAAAJ", "orcid": ";0000-0003-2502-3500;;;;", "linkedin": ";meng-shen-0b0b5a24a/;;;;", "or_profile": "~Heqing_Zou1;~Meng_Shen1;chen1436@e.ntu.edu.sg;yuchen005@e.ntu.edu.sg;~Deepu_Rajan1;~EngSiong_Chng1", "aff": "Nanyang Technological University;Nanyang Technological University;;;;Nanyang Technological University", "aff_domain": "ntu.edu;ntu.edu.sg;;;;ntu.edu.sg", "position": "PhD student;PhD student;;;;Associate Professor", "bibtex": "@misc{\nzou2023unismmc,\ntitle={UniS-{MMC}: Learning Unimodality-supervised Multimodal Contrastive Representations},\nauthor={Heqing Zou and Meng Shen and Chen Chen and Yuchen Hu and Deepu Rajan and EngSiong Chng},\nyear={2023},\nurl={https://openreview.net/forum?id=4lw-X9jRi1c}\n}", "github": "", "project": "", "reviewers": "v6pE;6dsM;VdyB;REcN", "site": "https://openreview.net/forum?id=4lw-X9jRi1c", "pdf_size": 1057997, "recommendation": "3;3;5;5", "confidence": "4;3;5;3", "correctness": "3;3;3;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "23;32;58;71", "wc_strength_and_weaknesses": "101;114;167;237", "wc_clarity_quality_novelty_and_reproducibility": "29;24;106;21", "wc_summary_review": "4;23;29;34", "wc_review": "157;193;360;363", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "273;280;226;313", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 46.0, 19.32614809008769 ], "wc_strength_and_weaknesses_avg": [ 154.75, 53.53678641831241 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.0, 35.33411948810951 ], "wc_summary_review_avg": [ 22.5, 11.368817000902073 ], "wc_review_avg": [ 268.25, 94.12060082681155 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 273.0, 31.056400306539068 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.30151134457776363, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ajeSb8eBjpoJ:scholar.google.com/&scioq=UniS-MMC:+Learning+Unimodality-supervised+Multimodal+Contrastive+Representations&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "id": "4mFTFqOovux", "title": "Node Number Awareness Representation for Graph Similarity Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "This work aims to address two important issues in the graph similarity computation, the first one is the Node Number Awareness Issue (N$^2$AI), and the second one is how to accelerate the inference speed of graph similarity computation in downstream tasks. We found that existing Graph Neural Network based graph similarity models have a large error in predicting the similarity scores of two graphs with similar number of nodes. Our analysis shows that this is because of the global pooling function in graph neural networks that maps graphs with similar number of nodes to similar embedding distributions, reducing the separability of their embeddings, which we refer to as the N$^2$AI. Our motivation is to enhance the difference between the two embeddings to improve their separability, thus we leverage our proposed Different Attention (DiffAtt) to construct Node Number Awareness Graph Similarity Model (N$^2$AGim). In addition, we propose the Graph Similarity Learning with Landmarks (GSL$^2$) to accelerate similarity computation. GSL$^2$ uses the trained N$^2$AGim to generate the individual embedding for each graph without any additional learning, and this individual embedding can effectively help GSL$^2$ to improve its inference speed. Experiments demonstrate that our N$^2$AGim outperforms the second best approach on Mean Square Error by 24.3\\%(1.170 vs 1.546), 43.1\\%(0.066 vs 0.116), and 44.3\\%(0.308 vs 0.553), on AIDS700nef, LINUX, and IMDBMulti datasets, respectively. Our GSL$^2$ is at most 47.7 and 1.36 times faster than N$^2$AGim and the second faster model. Our code is publicly available on https://github.com/iclr231312/N2AGim. ", "keywords": "graph representation learning;graph similarity learning;graph matching", "primary_area": "", "supplementary_material": "", "author": "JaX Lyu;Liang Zhang;Yi Huang;Shifeng Chen;Guangming Zhu;Mohammed Bennamoun;Syed Afaq Ali Shah", "authorids": "~JaX_Lyu1;~Liang_Zhang1;~Yi_Huang7;~Shifeng_Chen1;~Guangming_Zhu2;~Mohammed_Bennamoun1;~Syed_Afaq_Ali_Shah2", "gender": ";M;M;M;M;M;M", "homepage": ";https://web.xidian.edu.cn/zhangliang/;;;https://web.xidian.edu.cn/gmzhu/index.html;https://research-repository.uwa.edu.au/en/persons/mohammed-bennamoun;https://www.ecu.edu.au/schools/science/staff/profiles/senior-lecturers/dr-syed-afaq-ali-shah", "dblp": ";;15/6040-10;84/4529;18/7761-1.html;00/3214.html;141/9937", "google_scholar": ";;;;;https://scholar.google.com.au/citations?user=ylX5MEAAAAAJ;https://scholar.google.com.au/citations?user=jO8lwTYAAAAJ", "orcid": ";;0000-0002-8443-6877;0000-0003-0677-7358;0000-0003-3214-4095;0000-0002-6603-3257;", "linkedin": ";;;;;mohammed-bennamoun-b3147174/;", "or_profile": "~JaX_Lyu1;~Liang_Zhang1;~Yi_Huang7;~Shifeng_Chen1;~Guangming_Zhu2;~Mohammed_Bennamoun1;~Syed_Afaq_Ali_Shah2", "aff": ";Xidian University;Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences;Xidian University;University of Western Australia;Edith Cowan University", "aff_domain": ";xidian.edu.cn;siat.ac.cn;siat.ac.cn;xidian.edu.cn;uwa.edu.au;ecu.edu.au", "position": ";Associate Professor;PhD student;Associate Professor;Associate Professor;Full Professor;Associate Professor", "bibtex": "@misc{\nlyu2023node,\ntitle={Node Number Awareness Representation for Graph Similarity Learning},\nauthor={JaX Lyu and Liang Zhang and Yi Huang and Shifeng Chen and Guangming Zhu and Mohammed Bennamoun and Syed Afaq Ali Shah},\nyear={2023},\nurl={https://openreview.net/forum?id=4mFTFqOovux}\n}", "github": "", "project": "", "reviewers": "ngMc;2mHs;qS53;w3qo", "site": "https://openreview.net/forum?id=4mFTFqOovux", "pdf_size": 8503911, "recommendation": "3;3;6;6", "confidence": "3;4;4;2", "correctness": "3;3;3;3", "technical_novelty": "3;1;2;3", "empirical_novelty": "3;1;3;3", "wc_summary_paper": "61;105;92;80", "wc_strength_and_weaknesses": "117;126;300;128", "wc_clarity_quality_novelty_and_reproducibility": "89;24;57;35", "wc_summary_review": "29;16;82;20", "wc_review": "296;271;531;263", "wc_reply_reviewers": "0;18;20;20", "wc_reply_authors": "1737;1830;940;807", "reply_reviewers": "0;1;1;1", "reply_authors": "5;7;2;2", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 84.5, 16.194134740701646 ], "wc_strength_and_weaknesses_avg": [ 167.75, 76.46690460584892 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.25, 24.823124299733102 ], "wc_summary_review_avg": [ 36.75, 26.54595072699413 ], "wc_review_avg": [ 340.25, 110.80021434997317 ], "wc_reply_reviewers_avg": [ 14.5, 8.411301920630361 ], "wc_reply_authors_avg": [ 1328.5, 458.603586989897 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 4.0, 2.1213203435596424 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.3015113445777637, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IZj5t53quqcJ:scholar.google.com/&scioq=Node+Number+Awareness+Representation+for+Graph+Similarity+Learning&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;3;4", "aff_unique_norm": "Xidian University;Shenzhen Institute of Advanced Technology;Chinese Academy of Sciences;University of Western Australia;Edith Cowan University", "aff_unique_dep": ";;Shenzhen Institutes of Advanced Technology;;", "aff_unique_url": "http://www.xidian.edu.cn/;http://www.siat.cas.cn;http://www.siat.cas.cn;https://www.uwa.edu.au;https://www.ecu.edu.au", "aff_unique_abbr": "Xidian;SIAT;SIAT;UWA;ECU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;1;1", "aff_country_unique": "China;Australia" }, { "id": "4nrZXPFN1c4", "title": "Energy Transformer", "track": "main", "status": "Reject", "tldr": "We propose a network, which describes the forward pass in a transformer as a gradient decent on an energy function. ", "abstract": "Transformers have become the de facto models of choice in machine learning, typically leading to impressive performance on many applications. At the same time, the architectural development in the transformer world is mostly driven by empirical findings, and the theoretical understanding of their architectural building blocks is rather limited. In contrast, Dense Associative Memory models or Modern Hopfield Networks have a well-established theoretical foundation, but have not yet demonstrated truly impressive practical results. We propose a transformer architecture that replaces the sequence of feedforward transformer blocks with a single large Associative Memory model. Our novel architecture, called Energy Transformer (or ET for short), has many of the familiar architectural primitives that are often used in the current generation of transformers. However, it is not identical to the existing architectures. The sequence of transformer layers in ET is purposely designed to minimize a specifically engineered energy function, which is responsible for representing the relationships between the tokens. As a consequence of this computational principle, the attention in ET is different from the conventional attention mechanism. In this work, we introduce the theoretical foundations of ET, explore it's empirical capabilities using the image completion task, and obtain strong quantitative results on the graph anomaly detection task.", "keywords": "Transformers;Hopfield Networks;Graph Anomaly Detection", "primary_area": "", "supplementary_material": "/attachment/bbe1439136a560763839893d9dab6e8bdcb37b25.zip", "author": "Benjamin Hoover;Yuchen Liang;Bao Pham;Rameswar Panda;Hendrik Strobelt;Duen Horng Chau;Mohammed J Zaki;Dmitry Krotov", "authorids": "~Benjamin_Hoover1;~Yuchen_Liang2;~Bao_Pham1;~Rameswar_Panda1;~Hendrik_Strobelt1;~Duen_Horng_Chau1;~Mohammed_J_Zaki1;~Dmitry_Krotov2", "gender": "M;;M;M;M;M;;Not Specified", "homepage": "https://bhoov.com;;;https://rpand002.github.io/;http://hendrik.strobelt.com;http://www.cs.rpi.edu/~zaki;https://mitibmwatsonailab.mit.edu/people/dmitry-krotov/;https://faculty.cc.gatech.edu/~dchau", "dblp": "250/9412;31/8891;;126/0986;67/7527;z/MohammedJaveedZaki.html;182/2341;10/2670", "google_scholar": "n10P0tYAAAAJ;;;_ySuu6gAAAAJ;H4vEe_oAAAAJ;https://scholar.google.com/scholar?q=zaki,+mj;WeD9ll0AAAAJ;https://scholar.google.com.tw/citations?user=YON32W4AAAAJ", "orcid": "0000-0001-5218-3185;;0000-0001-8962-9961;;;0000-0003-4711-0234;;0000-0001-9824-3323", "linkedin": "benhoov/;yuchen-liang-42471430/;;;;mohammed-j-zaki/;krotovdmitry;polochau", "or_profile": "~Benjamin_Hoover1;~Yuchen_Liang2;~Bao_Pham1;~Rameswar_Panda1;~Hendrik_Strobelt1;~Mohammed_J_Zaki1;~Dmitry_Krotov2;~Duen_Chau1", "aff": "International Business Machines;Rensselaer Polytechnic Institute;Rensselaer Polytechnic Institute;MIT-IBM Watson AI Lab;International Business Machines;Rensselaer Polytechnic Institute;Massachusetts Institute of Technology;", "aff_domain": "research.ibm.com;rpi.edu;rpi.edu;ibm.com;ibm.com;rpi.edu;mit.edu;", "position": "AI Research Engineer;PhD student;PhD student;Research Scientist;Principal Researcher;Professor;Researcher;", "bibtex": "@misc{\nhoover2023energy,\ntitle={Energy Transformer},\nauthor={Benjamin Hoover and Yuchen Liang and Bao Pham and Rameswar Panda and Hendrik Strobelt and Duen Horng Chau and Mohammed J Zaki and Dmitry Krotov},\nyear={2023},\nurl={https://openreview.net/forum?id=4nrZXPFN1c4}\n}", "github": "", "project": "", "reviewers": "v3Fm;VTZR;zHgn;iuLg;UVr7", "site": "https://openreview.net/forum?id=4nrZXPFN1c4", "pdf_size": 19682945, "recommendation": "5;6;6;6;8", "confidence": "4;3;2;2;4", "correctness": "3;3;3;2;4", "technical_novelty": "3;2;3;3;3", "empirical_novelty": "2;2;3;2;3", "wc_summary_paper": "92;62;53;45;147", "wc_strength_and_weaknesses": "741;205;81;215;389", "wc_clarity_quality_novelty_and_reproducibility": "12;11;1;33;25", "wc_summary_review": "50;22;24;18;10", "wc_review": "895;300;159;311;571", "wc_reply_reviewers": "298;0;36;33;0", "wc_reply_authors": "1091;786;122;532;1079", "reply_reviewers": "1;0;1;1;0", "reply_authors": "3;2;1;1;2", "recommendation_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.8, 0.39999999999999997 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 79.8, 37.177412497375336 ], "wc_strength_and_weaknesses_avg": [ 326.2, 229.4257178260537 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 16.4, 11.271202242884296 ], "wc_summary_review_avg": [ 24.8, 13.481839637082173 ], "wc_review_avg": [ 447.2, 260.43379197024336 ], "wc_reply_reviewers_avg": [ 73.4, 113.35889907722287 ], "wc_reply_authors_avg": [ 722.0, 364.3641036106603 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.22821773229381923, "corr_recommendation_correctness": 0.6454972243679028, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3803905964826469995&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;1;2;0;1;2", "aff_unique_norm": "International Business Machines Corporation;Rensselaer Polytechnic Institute;Massachusetts Institute of Technology", "aff_unique_dep": ";;IBM Watson AI Lab", "aff_unique_url": "https://www.ibm.com;https://www.rpi.edu;https://www.mitibmwatsonailab.org", "aff_unique_abbr": "IBM;RPI;MIT-IBM AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Mid-Vision Feedback", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10754", "id": "4oLK1_k71Tz", "poster": "", "openreview": "https://openreview.net/forum?id=4oLK1_k71Tz", "slides": "https://iclr.cc/virtual/2023/poster/10754", "video": "https://iclr.cc/virtual/2023/poster/10754", "author_site": "Michael Maynord, Eadom Dessalene, Cornelia Fermuller, Yiannis Aloimonos", "tldr": "", "abstract": "Feedback plays a prominent role in biological vision, where perception is modulated based on agents' evolving expectations and world model. We introduce a novel mechanism which modulates perception based on high level categorical expectations: Mid-Vision Feedback (MVF). MVF associates high level contexts with linear transformations. When a context is \"expected\" its associated linear transformation is applied over feature vectors in a mid level of a network. The result is that mid-level network representations are biased towards conformance with high level expectations, improving overall accuracy and contextual consistency. Additionally, during training mid-level feature vectors are biased through introduction of a loss term which increases the distance between feature vectors associated with different contexts. MVF is agnostic as to the source of contextual expectations, and can serve as a mechanism for top down integration of symbolic systems with deep vision architectures. We show the superior performance of MVF to post-hoc filtering for incorporation of contextual knowledge, and show superior performance of configurations using predicted context (when no context is known a priori) over configurations with no context awareness.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Michael Maynord;Eadom T Dessalene;Cornelia Fermuller;Yiannis Aloimonos", "authorids": "~Michael_Maynord1;~Eadom_T_Dessalene1;~Cornelia_Fermuller3;~Yiannis_Aloimonos1", "gender": ";M;F;M", "homepage": ";https://www.cs.umd.edu/people/edessale;http://users.umiacs.umd.edu/users/fer/;http://www.prg.cs.umd.edu", "dblp": ";;f/CorneliaFermuller;a/YiannisAloimonos", "google_scholar": ";;0gEOJSEAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0003-2044-2386;", "linkedin": ";;cornelia-fermuller-594b855/;yiannis-aloimonos-6374865/", "or_profile": "~Michael_Maynord1;~Eadom_T_Dessalene1;~Cornelia_Fermuller3;~Yiannis_Aloimonos1", "aff": ";University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": ";umd.edu;umd.edu;umd.edu", "position": ";PhD student;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nmaynord2023midvision,\ntitle={Mid-Vision Feedback},\nauthor={Michael Maynord and Eadom T Dessalene and Cornelia Fermuller and Yiannis Aloimonos},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4oLK1_k71Tz}\n}", "github": "", "project": "", "reviewers": "zASa;ZiEu;ym4H", "pdf_size": 11691341, "recommendation": "5;6;8", "confidence": "5;4;4", "correctness": "3;3;3", "technical_novelty": "2;3;4", "empirical_novelty": "2;3;4", "wc_summary_paper": "44;320;71", "wc_strength_and_weaknesses": "89;246;319", "wc_clarity_quality_novelty_and_reproducibility": "67;1002;72", "wc_summary_review": "467;113;48", "wc_review": "667;1681;510", "wc_reply_reviewers": "91;0;0", "wc_reply_authors": "698;704;607", "reply_reviewers": "1;0;0", "reply_authors": "4;5;2", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 145.0, 124.23365083583433 ], "wc_strength_and_weaknesses_avg": [ 218.0, 95.96179795453327 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 380.3333333333333, 439.5894549336789 ], "wc_summary_review_avg": [ 209.33333333333334, 184.12012986694916 ], "wc_review_avg": [ 952.6666666666666, 518.9825516227775 ], "wc_reply_reviewers_avg": [ 30.333333333333332, 42.897811391983886 ], "wc_reply_authors_avg": [ 669.6666666666666, 44.37967502760194 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 1.247219128924647 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.7559289460184544, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9080751403110573187&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=4oLK1_k71Tz", "email": ";umd.edu;umd.edu;umd.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "The Role of ImageNet Classes in Fr\u00e9chet Inception Distance", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12013", "id": "4oXTQ6m_ws8", "poster": "/media/PosterPDFs/ICLR%202023/12013.png?t=1682318543.590523", "openreview": "https://openreview.net/forum?id=4oXTQ6m_ws8", "slides": "https://iclr.cc/virtual/2023/poster/12013", "video": "https://iclr.cc/virtual/2023/poster/12013", "author_site": "Tuomas Kynk\u00e4\u00e4nniemi, Tero Karras, Miika Aittala, Timo Aila, Jaakko Lehtinen", "tldr": "We elucidate why using ImageNet pre-trained Inception features in FID can cause discrepancies with human judgement.", "abstract": "Fr\u00e9chet Inception Distance (FID) is the primary metric for ranking models in data-driven generative modeling. While remarkably successful, the metric is known to sometimes disagree with human judgement. We investigate a root cause of these discrepancies, and visualize what FID \"looks at\" in generated images. We show that the feature space that FID is (typically) computed in is so close to the ImageNet classifications that aligning the histograms of Top-$N$ classifications between sets of generated and real images can reduce FID substantially \u2014 without actually improving the quality of results. Thus, we conclude that FID is prone to intentional or accidental distortions. As a practical example of an accidental distortion, we discuss a case where an ImageNet pre-trained FastGAN achieves a FID comparable to StyleGAN2, while being worse in terms of human evaluation.", "keywords": "generative models;evaluation;Fr\u00e9chet Inception Distance", "primary_area": "", "supplementary_material": "", "author": "Tuomas Kynk\u00e4\u00e4nniemi;Tero Karras;Miika Aittala;Timo Aila;Jaakko Lehtinen", "authorids": "~Tuomas_Kynk\u00e4\u00e4nniemi1;~Tero_Karras1;~Miika_Aittala2;~Timo_Aila1;~Jaakko_Lehtinen1", "gender": ";M;M;M;M", "homepage": ";http://research.nvidia.com/person/tero-karras;https://people.csail.mit.edu/miika/;https://users.aalto.fi/~ailat1/;https://users.aalto.fi/~lehtinj7/", "dblp": "239/6466;32/7864;;95/2789;71/4075", "google_scholar": "https://scholar.google.fi/citations?user=7sATEtIAAAAJ;https://scholar.google.fi/citations?user=-50qJW8AAAAJ;-_EKVQ0AAAAJ;e7abmgkAAAAJ;https://scholar.google.fi/citations?user=Vpr6s3sAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Tuomas_Kynk\u00e4\u00e4nniemi1;~Tero_Karras1;~Miika_Aittala2;~Timo_Aila1;~Jaakko_Lehtinen1", "aff": "Aalto University;NVIDIA;NVIDIA;NVIDIA;NVIDIA", "aff_domain": "aalto.fi;nvidia.com;nvidia.com;nvidia.com;nvidia.com", "position": "PhD student;Distinguished Research Scientist;Senior Research Scientist;Distinguished Research Scientist;Distinguished Research Scientist", "bibtex": "@inproceedings{\nkynk{\\\"a}{\\\"a}nniemi2023the,\ntitle={The Role of ImageNet Classes in Fr\\'echet Inception Distance},\nauthor={Tuomas Kynk{\\\"a}{\\\"a}nniemi and Tero Karras and Miika Aittala and Timo Aila and Jaakko Lehtinen},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4oXTQ6m_ws8}\n}", "github": "", "project": "", "reviewers": "ncn9;9T7x;iTAh;Xd21", "pdf_size": 9649976, "recommendation": "5;6;8;8", "confidence": "4;4;3;3", "correctness": "4;4;4;4", "technical_novelty": "2;2;4;4", "empirical_novelty": "3;2;4;4", "wc_summary_paper": "122;143;63;60", "wc_strength_and_weaknesses": "399;414;43;63", "wc_clarity_quality_novelty_and_reproducibility": "43;106;10;27", "wc_summary_review": "30;66;6;21", "wc_review": "594;729;122;171", "wc_reply_reviewers": "0;89;0;0", "wc_reply_authors": "625;464;5;219", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.0, 1.0 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 97.0, 36.28360511305347 ], "wc_strength_and_weaknesses_avg": [ 229.75, 176.9708662463966 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.5, 36.28015986734347 ], "wc_summary_review_avg": [ 30.75, 22.083647796503186 ], "wc_review_avg": [ 404.0, 262.45856815886197 ], "wc_reply_reviewers_avg": [ 22.25, 38.53813046840752 ], "wc_reply_authors_avg": [ 328.25, 236.06924301992413 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.9622504486493761, "corr_recommendation_correctness": 0.0, "gs_citation": 216, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=628976172933060621&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=4oXTQ6m_ws8", "email": "aalto.fi;nvidia.com;nvidia.com;nvidia.com;nvidia.com", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Aalto University;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.aalto.fi;https://www.nvidia.com", "aff_unique_abbr": "Aalto;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "Finland;United States" }, { "title": "Transformer-Patcher: One Mistake Worth One Neuron", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11663", "id": "4oYUGeGBPm", "poster": "", "openreview": "https://openreview.net/forum?id=4oYUGeGBPm", "slides": "https://iclr.cc/virtual/2023/poster/11663", "video": "https://iclr.cc/virtual/2023/poster/11663", "author_site": "Zeyu Huang, Yikang Shen, Xiaofeng Zhang, Jie Zhou, Wenge Rong, Zhang Xiong", "tldr": "A Sequential Model Editor to correct model's output on the specific input.", "abstract": "Large Transformer-based Pretrained Language Models (PLMs) dominate almost all Natural Language Processing (NLP) tasks. Nevertheless, they still make mistakes from time to time. For a model deployed in an industrial environment, fixing these mistakes quickly and robustly is vital to improve user experiences. Previous works formalize such problems as Model Editing (ME) and mostly focus on fixing one mistake. However, the one-mistake-fixing scenario is not an accurate abstraction of the real-world challenge. In the deployment of AI services, there are ever-emerging mistakes, and the same mistake may recur if not corrected in time. Thus a preferable solution is to rectify the mistakes as soon as they appear nonstop. Therefore, we extend the existing ME into the Sequential Model Editing (SME) to help develop more practical editing methods. Our study shows that current ME methods either fail to make a sequence of edits or to remember previous edits. We then introduce Transformer-Patcher, a novel model editor that can shift the behavior of transformer-based models by simply adding and training a few neurons in the last Feed-Forward Network layer. Experimental results on both classification and generation tasks show that Transformer-Patcher can successively correct up to thousands of errors (Reliability) and generalize to their equivalent inputs (Generality) while retaining the model\u2019s accuracy on irrelevant inputs (Locality). Our method outperforms previous fine-tuning and HyperNetwork-based methods and achieves state-of-the-art performance for Sequential Model Editing (SME).", "keywords": "Sequential Model Editing", "primary_area": "", "supplementary_material": "/attachment/b71f6440d473a5222cb1ab141325795968e25723.zip", "author": "Zeyu Huang;Yikang Shen;Xiaofeng Zhang;Jie Zhou;Wenge Rong;Zhang Xiong", "authorids": "~Zeyu_Huang1;~Yikang_Shen1;~Xiaofeng_Zhang2;~Jie_Zhou8;~Wenge_Rong1;~Zhang_Xiong1", "gender": ";M;;M;M;M", "homepage": ";;;;;", "dblp": ";152/8226;;00/5012-16;18/5572.html;77/6921-1.html", "google_scholar": "https://scholar.google.com/citations?hl=en;qff5rRYAAAAJ;;https://scholar.google.com.hk/citations?user=OijxQCMAAAAJ;;", "orcid": ";;;0000-0002-5899-5165;;", "linkedin": ";;;;;", "or_profile": "~Zeyu_Huang1;~Yikang_Shen1;~Xiaofeng_Zhang2;~Jie_Zhou8;~Wenge_Rong1;~Zhang_Xiong1", "aff": ";International Business Machines;;WeChat AI, Tencent Inc.;Beihang University;Beihang University", "aff_domain": ";ibm.com;;tencent.com;buaa.edu.cn;buaa.edu.cn", "position": ";Researcher;;Principal Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2023transformerpatcher,\ntitle={Transformer-Patcher: One Mistake Worth One Neuron},\nauthor={Zeyu Huang and Yikang Shen and Xiaofeng Zhang and Jie Zhou and Wenge Rong and Zhang Xiong},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4oYUGeGBPm}\n}", "github": "", "project": "", "reviewers": "5q14;n4JS;AwJg;YxK1", "pdf_size": 730534, "recommendation": "6;6;6;8", "confidence": "3;2;4;4", "correctness": "3;2;2;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "95;48;116;163", "wc_strength_and_weaknesses": "343;155;239;1187", "wc_clarity_quality_novelty_and_reproducibility": "42;22;150;192", "wc_summary_review": "44;32;220;114", "wc_review": "524;257;725;1656", "wc_reply_reviewers": "193;0;0;0", "wc_reply_authors": "1129;195;2500;991", "reply_reviewers": "1;0;0;0", "reply_authors": "5;1;5;2", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 105.5, 41.33098111586513 ], "wc_strength_and_weaknesses_avg": [ 481.0, 413.0133169765837 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 101.5, 71.41953514270448 ], "wc_summary_review_avg": [ 102.5, 74.7178024302107 ], "wc_review_avg": [ 790.5, 526.551279554043 ], "wc_reply_reviewers_avg": [ 48.25, 83.57145146519834 ], "wc_reply_authors_avg": [ 1203.75, 828.9587972269792 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.7853571071357126 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5222329678670935, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 164, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=53766956711776463&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=4oYUGeGBPm", "email": ";ibm.com;;tencent.com;buaa.edu.cn;buaa.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "International Business Machines Corporation;Tencent;Beihang University", "aff_unique_dep": ";WeChat AI;", "aff_unique_url": "https://www.ibm.com;https://www.tencent.com;http://www.buaa.edu.cn/", "aff_unique_abbr": "IBM;Tencent;BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;China" }, { "id": "4ojYamKgnQc", "title": "MetaPhysiCa: Causality-aware Robustness to OOD Initial Conditions in Physics-informed Machine Learning", "track": "main", "status": "Reject", "tldr": "This work proposes combining causal structural discovery, invariant risk minimization, and meta-learning in order to make Physics-informed Machine Learning robust to OOD tasks.", "abstract": "A fundamental challenge in physics-informed machine learning (PIML) is the design of robust PIML methods for out-of-distribution (OOD) forecasting tasks, where the tasks require learning-to-learn from observations of the same (ODE) dynamical system with different unknown parameters, and demand accurate forecasts even under initial conditions outside the training support. In this work we propose a solution for such tasks, which we define as a meta-learning procedure for causal structural discovery (including invariant risk minimization). Using three different OOD tasks, we empirically observe that the proposed approach significantly outperforms existing state-of-the-art PIML and deep learning methods.", "keywords": "physics-informed machine learning;out-of-distribution;robustness;causality", "primary_area": "", "supplementary_material": "", "author": "S Chandra Mouli;Bruno Ribeiro", "authorids": "~S_Chandra_Mouli1;~Bruno_Ribeiro1", "gender": "M;M", "homepage": "https://www.cs.purdue.edu/homes/chandr/;https://www.cs.purdue.edu/homes/ribeirob/", "dblp": "167/6021;15/606", "google_scholar": "https://scholar.google.com/citations?hl=en;KIEleCsAAAAJ", "orcid": ";0000-0002-3527-6192", "linkedin": ";", "or_profile": "~S_Chandra_Mouli1;~Bruno_Ribeiro1", "aff": "Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nmouli2023metaphysica,\ntitle={MetaPhysiCa: Causality-aware Robustness to {OOD} Initial Conditions in Physics-informed Machine Learning},\nauthor={S Chandra Mouli and Bruno Ribeiro},\nyear={2023},\nurl={https://openreview.net/forum?id=4ojYamKgnQc}\n}", "github": "", "project": "", "reviewers": "t2KB;hnK8;s2pT;rgk4;MowG", "site": "https://openreview.net/forum?id=4ojYamKgnQc", "pdf_size": 1193988, "recommendation": "5;6;6;6;8", "confidence": "4;4;3;4;2", "correctness": "4;3;3;2;4", "technical_novelty": "3;3;3;2;4", "empirical_novelty": "2;3;3;2;4", "wc_summary_paper": "49;83;108;54;19", "wc_strength_and_weaknesses": "319;334;109;701;66", "wc_clarity_quality_novelty_and_reproducibility": "150;54;54;40;14", "wc_summary_review": "117;120;72;158;31", "wc_review": "635;591;343;953;130", "wc_reply_reviewers": "55;40;0;276;11", "wc_reply_authors": "1890;729;516;3049;522", "reply_reviewers": "1;1;0;1;1", "reply_authors": "4;1;1;5;1", "recommendation_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.4, 0.8 ], "correctness_avg": [ 3.2, 0.7483314773547882 ], "technical_novelty_avg": [ 3.0, 0.6324555320336759 ], "empirical_novelty_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_paper_avg": [ 62.6, 30.4538995860957 ], "wc_strength_and_weaknesses_avg": [ 305.8, 225.11632548529215 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.4, 46.17185289762584 ], "wc_summary_review_avg": [ 99.6, 43.81141403789656 ], "wc_review_avg": [ 530.4, 278.7985652760788 ], "wc_reply_reviewers_avg": [ 76.4, 101.72236725519123 ], "wc_reply_authors_avg": [ 1341.2, 994.4440456858296 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.4, 1.7435595774162693 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.8675276172357089, "corr_recommendation_correctness": 0.21821789023599233, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2mJuewxCzeQJ:scholar.google.com/&scioq=MetaPhysiCa:+Causality-aware+Robustness+to+OOD+Initial+Conditions+in+Physics-informed+Machine+Learning&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "4orJ47he7WV", "title": "Emergent collective intelligence from massive-agent cooperation and competition", "track": "main", "status": "Reject", "tldr": "", "abstract": "Inspired by organisms evolving through cooperation and competition between different populations on Earth, we study the emergence of artificial collective intelligence through massive-agent reinforcement learning. To this end, We propose a new massive-agent reinforcement learning environment, Lux, where dynamic and massive agents in two teams scramble for limited resources and fight off the darkness. In Lux, we build our agents through the standard reinforcement learning algorithm in curriculum learning phases and leverage centralized control via a pixel-to-pixel policy network. As agents co-evolve through self-play, we observe several stages of intelligence, from the acquisition of atomic skills to the development of group strategies. Since these learned group strategies arise from individual decisions without an explicit coordination mechanism, we claim that artificial collective intelligence emerges from massive-agent cooperation and competition. We further analyze the emergence of various learned strategies through metrics and ablation studies, aiming to provide insights for reinforcement learning implementations in massive-agent environments.", "keywords": "Reinforcement Learning;Multi-agent System;Emergent Behavior", "primary_area": "", "supplementary_material": "/attachment/cd39fdc13ef48d8651919575da76173084c56ed0.zip", "author": "Hanmo Chen;Stone Tao;Jiaxin Chen;Weihan Shen;Xihui Li;Sikai Cheng;Xiaolong Zhu;Xiu Li", "authorids": "~Hanmo_Chen1;~Stone_Tao1;~Jiaxin_Chen1;~Weihan_Shen1;~Xihui_Li1;~Sikai_Cheng1;~Xiaolong_Zhu1;~Xiu_Li1", "gender": "M;M;F;Not Specified;M;M;Not Specified;F", "homepage": ";https://www.stoneztao.com;;https://github.com/WeihanShen;https://github.com/Galaxy-Li;;http://xiaolongzhu.org;https://thusigsiclab.github.io/thu.github.io/introduction.html", "dblp": ";;65/1392;;;;;13/1206-1", "google_scholar": "0wCbDdYAAAAJ;GAMO0EwAAAAJ;;;;;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;;;;;;0000-0003-0403-1923", "linkedin": ";;;;;sikai-c/;;", "or_profile": "~Hanmo_Chen1;~Stone_Tao1;~Jiaxin_Chen1;~Weihan_Shen1;~Xihui_Li1;~Sikai_Cheng1;~Xiaolong_Zhu1;~Xiu_Li1", "aff": "Shenzhen Internation Graduate School, Tsinghua University;University of California, San Diego;Parametrix.ai;Parametrix;Tsinghua University;Georgia Institute of Technology;Parametrix;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;ucsd.edu;chaocanshu.ai;chaocanshu.ai;tsinghua.edu.cn;gatech.edu;chaocanshu.ai;tsinghua.edu.cn", "position": "MS student;Undergrad student;Researcher;Researcher;MS student;MS student;Researcher;Professor", "bibtex": "@misc{\nchen2023emergent,\ntitle={Emergent collective intelligence from massive-agent cooperation and competition},\nauthor={Hanmo Chen and Stone Tao and Jiaxin Chen and Weihan Shen and Xihui Li and Sikai Cheng and Xiaolong Zhu and Xiu Li},\nyear={2023},\nurl={https://openreview.net/forum?id=4orJ47he7WV}\n}", "github": "", "project": "", "reviewers": "Qnaj;evMn;ndgW;Sp1k", "site": "https://openreview.net/forum?id=4orJ47he7WV", "pdf_size": 21140450, "recommendation": "1;3;5;6", "confidence": "3;4;4;3", "correctness": "1;3;2;3", "technical_novelty": "1;2;2;3", "empirical_novelty": "0;2;3;3", "wc_summary_paper": "35;69;117;43", "wc_strength_and_weaknesses": "1;245;321;74", "wc_clarity_quality_novelty_and_reproducibility": "1;154;162;37", "wc_summary_review": "1;108;91;29", "wc_review": "38;576;691;183", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.75, 1.920286436967152 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 66.0, 32.01562118716424 ], "wc_strength_and_weaknesses_avg": [ 160.25, 128.27972365108994 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 88.5, 70.71244586351118 ], "wc_summary_review_avg": [ 57.25, 43.808532273976034 ], "wc_review_avg": [ 372.0, 269.5616812531039 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.13018891098082386, "corr_recommendation_correctness": 0.6673083711820306, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16672441335523555219&as_sdt=5,40&sciodt=0,40&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;2;0;3;2;0", "aff_unique_norm": "Tsinghua University;University of California, San Diego;Parametrix;Georgia Institute of Technology", "aff_unique_dep": "Graduate School;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.ucsd.edu;https://www.parametrix.ai;https://www.gatech.edu", "aff_unique_abbr": "THU;UCSD;Parametrix;Georgia Tech", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Shenzhen;San Diego;", "aff_country_unique_index": "0;1;1;0;1;0", "aff_country_unique": "China;United States;" }, { "id": "4phxC1MmcfN", "title": "Fast exploration and learning of latent graphs with aliased observations", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We consider the problem of quickly recovering the structure of a latent graph by navigating in it, when the agent can only perform stochastic actions and ---crucially--- different nodes may emit the same observation. This corresponds to learning the transition function of a partially observable Markov decision process (POMDP) in which observations are deterministic. This is highly relevant for partially observed reinforcement learning, where the agent needs to swiftly learn how to navigate new environments from sensory observations. The challenge involves solving two related problems: exploring the graph as fast as possible, and learning it from the obtained aliased observations, where the learning helps to explore faster. Our approach leverages a recently proposed model, the Clone Structured Cognitive Graph (CSCG), which can handle aliasing, and guide exploration. We provide empirical evidence that our model-based algorithm can recover graphs from a wide range of challenging topologies, and shows linear scaling with graph size even for severely aliased and loopy graph structures where model-free methods require an exponential number of steps.", "keywords": "graph learning;fast exploration;aliased environments;POMDPs", "primary_area": "", "supplementary_material": "", "author": "Sivaramakrishnan Swaminathan;Meet Dave;Miguel Lazaro-Gredilla;Dileep George", "authorids": "~Sivaramakrishnan_Swaminathan1;~Meet_Dave1;~Miguel_Lazaro-Gredilla1;~Dileep_George1", "gender": ";;M;", "homepage": "http://sivark.me;;;", "dblp": "342/7709.html;342/7305;77/4660;", "google_scholar": "oDVFD5oAAAAJ;https://scholar.google.com/citations?hl=en;SFjDQk8AAAAJ;", "orcid": ";;;", "linkedin": ";meetdave06;miguel-lazaro-g/;", "or_profile": "~Sivaramakrishnan_Swaminathan1;~Meet_Dave1;~Miguel_Lazaro-Gredilla1;~Dileep_George1", "aff": "Google DeepMind;Google DeepMind;Google Deepmind;Vicarious AI", "aff_domain": "deepmind.com;google.com;google.com;vicarious.com", "position": "Research Engineer;Research Engineer;Research Scientist;Co-founder", "bibtex": "@misc{\nswaminathan2023fast,\ntitle={Fast exploration and learning of latent graphs with aliased observations},\nauthor={Sivaramakrishnan Swaminathan and Meet Dave and Miguel Lazaro-Gredilla and Dileep George},\nyear={2023},\nurl={https://openreview.net/forum?id=4phxC1MmcfN}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=4phxC1MmcfN", "pdf_size": 1739975, "recommendation": "", "confidence": "", "correctness": "", "technical_novelty": "", "empirical_novelty": "", "wc_summary_paper": "", "wc_strength_and_weaknesses": "", "wc_clarity_quality_novelty_and_reproducibility": "", "wc_summary_review": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "recommendation_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "correctness_avg": [ 0, 0 ], "technical_novelty_avg": [ 0, 0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 0, 0 ], "wc_strength_and_weaknesses_avg": [ 0, 0 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 0, 0 ], "wc_summary_review_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 1, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0, "corr_recommendation_correctness": 0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11060387588494297181&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Google;DeepMind;Vicarious AI", "aff_unique_dep": "Google DeepMind;DeepMind;", "aff_unique_url": "https://deepmind.com;https://deepmind.com;https://www.vicarious.com", "aff_unique_abbr": "DeepMind;DeepMind;Vicarious AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Mind's Eye: Grounded Language Model Reasoning through Simulation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11291", "id": "4rXMRuoJlai", "poster": "", "openreview": "https://openreview.net/forum?id=4rXMRuoJlai", "slides": "https://iclr.cc/virtual/2023/poster/11291", "video": "https://iclr.cc/virtual/2023/poster/11291", "author_site": "Ruibo Liu, Jason Wei, Shixiang Gu, Te-Yen Wu, Soroush Vosoughi, Claire Cui, Denny Zhou, Andrew Dai", "tldr": "We present a new reasoning paradigm that grounds language model reasoning on simulation results from the advanced physics engine MuJoCo.", "abstract": "Successful and effective communication between humans and AI relies on a shared experience of the world. By training solely on written text, current language models (LMs) miss the grounded experience of humans in the real-world---their failure to relate language to the physical world causes knowledge to be misrepresented and obvious mistakes in their reasoning. We present Mind's Eye, a paradigm to ground language model reasoning in the physical world. Given a physical reasoning question, we use a computational physics engine (DeepMind's MuJoCo) to simulate the possible outcomes, and then use the simulation results as part of the input, which enables language models to perform reasoning. Experiments on 39 tasks in a physics alignment benchmark demonstrate that Mind's Eye can improve reasoning ability by a large margin (27.9% zero-shot, and 46.0% few-shot absolute accuracy improvement on average). Smaller language models armed with Mind's Eye can obtain similar performance to models that are 100x larger. Finally, we confirm the robustness of Mind's Eye through ablation studies.", "keywords": "reasoning;alignment;simulation;physics;grounding", "primary_area": "", "supplementary_material": "/attachment/9ffa77f471e27be35f281097b8d2de3710b76f05.zip", "author": "Ruibo Liu;Jason Wei;Shixiang Shane Gu;Te-Yen Wu;Soroush Vosoughi;Claire Cui;Denny Zhou;Andrew M. Dai", "authorids": "~Ruibo_Liu1;~Jason_Wei1;~Shixiang_Shane_Gu1;~Te-Yen_Wu1;~Soroush_Vosoughi1;claire@google.com;~Denny_Zhou1;~Andrew_M._Dai1", "gender": "M;M;;M;;;;", "homepage": "https://www.cs.dartmouth.edu/~rbliu/;https://jasonwei20.github.io;;http://teyenwu.com/;https://www.cs.dartmouth.edu/~soroush/;;;", "dblp": ";02/11220.html;;;01/1709;;;", "google_scholar": "5lgfeo4AAAAJ;;;;45DAXkwAAAAJ;;;", "orcid": ";;;;0000-0002-2564-8909;;;", "linkedin": ";;;;;;;", "or_profile": "~Ruibo_Liu1;~Jason_Wei1;~Shixiang_Shane_Gu1;~Te-Yen_Wu1;~Soroush_Vosoughi1;claire@google.com;~Denny_Zhou1;~Andrew_M._Dai1", "aff": "Google DeepMind;OpenAI;;Dartmouth College;Dartmouth College;;;", "aff_domain": "google.com;openai.com;;dartmouth.edu;dartmouth.edu;;;", "position": "Researcher;Researcher;;PhD student;Assistant Professor;;;", "bibtex": "@inproceedings{\nliu2023minds,\ntitle={Mind's Eye: Grounded Language Model Reasoning through Simulation},\nauthor={Ruibo Liu and Jason Wei and Shixiang Shane Gu and Te-Yen Wu and Soroush Vosoughi and Claire Cui and Denny Zhou and Andrew M. Dai},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4rXMRuoJlai}\n}", "github": "", "project": "", "reviewers": "46te;pY5b;TaNC", "pdf_size": 1093777, "recommendation": "6;6;8", "confidence": "3;5;5", "correctness": "4;4;4", "technical_novelty": "2;3;4", "empirical_novelty": "3;3;4", "wc_summary_paper": "111;105;20", "wc_strength_and_weaknesses": "279;264;76", "wc_clarity_quality_novelty_and_reproducibility": "49;17;25", "wc_summary_review": "50;7;34", "wc_review": "489;393;155", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "630;757;70", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 78.66666666666667, 41.55585264302597 ], "wc_strength_and_weaknesses_avg": [ 206.33333333333334, 92.36281117900694 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.333333333333332, 13.59738536958076 ], "wc_summary_review_avg": [ 30.333333333333332, 17.745108872274887 ], "wc_review_avg": [ 345.6666666666667, 140.40259573415617 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 485.6666666666667, 298.45863290505696 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.49999999999999983, "corr_recommendation_correctness": 0.0, "gs_citation": 84, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1515592209914067891&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=4rXMRuoJlai", "email": "google.com;openai.com;;dartmouth.edu;dartmouth.edu;;;", "author_num": 8, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Google;OpenAI;Dartmouth College", "aff_unique_dep": "Google DeepMind;;", "aff_unique_url": "https://deepmind.com;https://openai.com;https://www.dartmouth.edu", "aff_unique_abbr": "DeepMind;OpenAI;Dartmouth", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Inequality phenomenon in $l_{\\infty}$-adversarial training, and its unrealized threats", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11267", "id": "4t9q35BxGr", "poster": "", "openreview": "https://openreview.net/forum?id=4t9q35BxGr", "slides": "https://iclr.cc/virtual/2023/poster/11267", "video": "https://iclr.cc/virtual/2023/poster/11267", "author_site": "Ranjie Duan, YueFeng Chen, Yao Zhu, Xiaojun Jia, Rong Zhang, Hui Xue'", "tldr": "We find an intriguing phenomena of $l_{\\infty}$ adversarial training, and this phenomena brings unrealized threats to adversarially trained model.", "abstract": "The appearance of adversarial examples raises attention from both academia and industry. Along with the attack-defense arms race, adversarial training is the most effective against adversarial examples.\nHowever, we find inequality phenomena occur during the $l_{\\infty}$-adversarial training, that few features dominate the prediction made by the adversarially trained model. We systematically evaluate such inequality phenomena by extensive experiments and find such phenomena become more obvious when performing adversarial training with increasing adversarial strength (evaluated by $\\epsilon$). We hypothesize such inequality phenomena make $l_{\\infty}$-adversarially trained model less reliable than the standard trained model when few ``important features\" are influenced. To validate our hypothesis, we proposed two simple attacks that either perturb or replace important features with noise or occlusion. Experiments show that $l_{\\infty}$-adversarially trained model can be easily attacked when the few important features are influenced. \nOur work shed light on the limitation of the practicality of $l_{\\infty}$-adversarial training.", "keywords": "Adversarial training;Adversarial robustness;Adversarial feature represenation", "primary_area": "", "supplementary_material": "", "author": "Ranjie Duan;YueFeng Chen;Yao Zhu;Xiaojun Jia;Rong Zhang;Hui Xue'", "authorids": "~Ranjie_Duan1;~YueFeng_Chen1;~Yao_Zhu2;~Xiaojun_Jia1;~Rong_Zhang2;~Hui_Xue'1", "gender": "Not Specified;M;M;M;M;M", "homepage": ";;;https://jiaxiaojunqaq.github.io/;;http://www.alibaba.com", "dblp": "261/3330.html;52/8180;;;13/5366-2;", "google_scholar": ";Kf-IpFsAAAAJ;Te8bmo0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;", "orcid": ";;0000-0003-0991-1970;0000-0002-2018-9344;;", "linkedin": ";;;;;", "or_profile": "~Ranjie_Duan1;~YueFeng_Chen1;~Yao_Zhu2;~Xiaojun_Jia1;~Rong_Zhang2;~Hui_Xue'1", "aff": ", Tsinghua University;Alibaba Group;Zhejiang University;Chinese Academy of Sciences;;Alibaba Group", "aff_domain": "cs.tsinghua.edu.cn;alibaba-inc.com;zju.edu.cn;ucas.ac.cn;;alibaba-inc.com", "position": "Postdoc;Staff Algorithm Engineer;PhD student;PhD student;;Principal Researcher", "bibtex": "@inproceedings{\nduan2023inequality,\ntitle={Inequality phenomenon in \\$l\\_\\{{\\textbackslash}infty\\}\\$-adversarial training, and its unrealized threats},\nauthor={Ranjie Duan and YueFeng Chen and Yao Zhu and Xiaojun Jia and Rong Zhang and Hui Xue'},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4t9q35BxGr}\n}", "github": "", "project": "", "reviewers": "1FTk;Mxrb;AXew;yDNt", "pdf_size": 12955943, "recommendation": "8;8;8;8", "confidence": "4;4;4;4", "correctness": "4;4;3;4", "technical_novelty": "3;4;3;3", "empirical_novelty": "4;3;3;3", "wc_summary_paper": "47;41;80;72", "wc_strength_and_weaknesses": "372;176;340;108", "wc_clarity_quality_novelty_and_reproducibility": "88;28;38;26", "wc_summary_review": "100;63;37;31", "wc_review": "607;308;495;237", "wc_reply_reviewers": "102;0;167;0", "wc_reply_authors": "1572;204;1451;139", "reply_reviewers": "1;0;1;0", "reply_authors": "4;1;5;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 60.0, 16.38596960817394 ], "wc_strength_and_weaknesses_avg": [ 249.0, 110.24971655292362 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.0, 25.238858928247925 ], "wc_summary_review_avg": [ 57.75, 27.19719654670312 ], "wc_review_avg": [ 411.75, 146.9308936200961 ], "wc_reply_reviewers_avg": [ 67.25, 71.06818908625715 ], "wc_reply_authors_avg": [ 841.5, 671.7575827633061 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 1.7853571071357126 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XdmVCPsG0LkJ:scholar.google.com/&scioq=Inequality+phenomenon+in+%24l_%7B%5Cinfty%7D%24-adversarial+training,+and+its+unrealized+threats&hl=en&as_sdt=0,40", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=4t9q35BxGr", "email": "cs.tsinghua.edu.cn;alibaba-inc.com;zju.edu.cn;ucas.ac.cn;;alibaba-inc.com", "author_num": 6, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "Tsinghua University;Alibaba Group;Zhejiang University;Chinese Academy of Sciences", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.alibaba.com;https://www.zju.edu.cn;https://www.cas.cn", "aff_unique_abbr": "THU;Alibaba;ZJU;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "4tsqGWfBb3Q", "title": "Revisiting Residual Networks for Adversarial Robustness", "track": "main", "status": "Withdraw", "tldr": "Designing robust convolutional neural networks against adversarial attack. ", "abstract": "Convolutional neural networks are known to be vulnerable to adversarial attacks. Solutions to improve their robustness have largely focused on developing more effective adversarial training methods, while limited efforts have been devoted to analyzing the role of architectural elements (such as topology, depth, and width) on adversarial robustness. This paper seeks to resolve this limitation and present a holistic study on the impact of architecture choice on adversarial robustness. We focus on residual networks and consider architecture design at the block level, i.e., topology, kernel size, activation, and normalization, as well as at the network scaling level, i.e., depth and width of each block in the network. We first derive insights on the block structure through systematic ablative experiments and design a novel residual block, dubbed RobustResBlock. It improves CW40 robust accuracy by \u223c3% over Wide residual networks (WRNs), the de facto architecture of choice for designing robust architectures. Then we derive insights on the impact of depth and width of the network and design a compound scaling rule, dubbed RobustScaling, to distribute depth and width at a given desired FLOP count. Finally, we combine RobustResBlock and RobustScaling and present a portfolio of adversarially robust residual networks, RobustResNets, spanning a wide spectrum of model capacities. Experimental validation, on three datasets across four adversarial attacks, demonstrates that RobustResNets consistently outperform both the standard WRNs ( 3 \u223c 4% improvement in robust accuracy while saving about half parameters) and other robust architectures proposed by existing works.", "keywords": "Adversarial robustness;neural architecture design", "primary_area": "", "supplementary_material": "/attachment/5c06b2e1c58e280feaf7481a517a4c3670190ddb.zip", "author": "Shihua Huang;Zhichao Lu;Kalyanmoy Deb;Vishnu Boddeti", "authorids": "~Shihua_Huang1;~Zhichao_Lu1;~Kalyanmoy_Deb1;~Vishnu_Boddeti1", "gender": "M;M;M;M", "homepage": "http://www.shihuahuang.cn/;https://www.cs.cityu.edu.hk/~zhichalu/;https://www.egr.msu.edu/~kdeb/;https://hal.cse.msu.edu", "dblp": ";144/1417;https://dblp.org/pers/d/Deb:Kalyanmoy.html;55/6988", "google_scholar": "YVZLfBUAAAAJ;tIFWBcQAAAAJ;https://scholar.google.com/citations?hl=en;JKcrO9IAAAAJ", "orcid": ";0000-0002-4618-3573;0000-0001-7402-9939;", "linkedin": ";zhichao-lu-728037b4/;kalyanmoy-deb-91748136;", "or_profile": "~Shihua_Huang1;~Zhichao_Lu1;~Kalyanmoy_Deb1;~Vishnu_Boddeti1", "aff": "Michigan State University;SUN YAT-SEN UNIVERSITY;Michigan State University;Michigan State University", "aff_domain": "msu.edu;sysu.edu.cn;msu.edu;msu.edu", "position": "PhD student;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@misc{\nhuang2023revisiting,\ntitle={Revisiting Residual Networks for Adversarial Robustness},\nauthor={Shihua Huang and Zhichao Lu and Kalyanmoy Deb and Vishnu Boddeti},\nyear={2023},\nurl={https://openreview.net/forum?id=4tsqGWfBb3Q}\n}", "github": "", "project": "", "reviewers": "u5oP;mCXU;qiD6;yrZV;81Lk", "site": "https://openreview.net/forum?id=4tsqGWfBb3Q", "pdf_size": 11984067, "recommendation": "3;5;5;5;5", "confidence": "4;5;5;5;4", "correctness": "3;3;3;3;4", "technical_novelty": "2;2;2;2;1", "empirical_novelty": "2;2;3;2;3", "wc_summary_paper": "52;74;48;52;26", "wc_strength_and_weaknesses": "453;207;385;306;159", "wc_clarity_quality_novelty_and_reproducibility": "24;16;428;20;36", "wc_summary_review": "150;43;89;5;84", "wc_review": "679;340;950;383;305", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 4.6, 0.7999999999999999 ], "confidence_avg": [ 4.6, 0.4898979485566356 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 1.8, 0.4 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 50.4, 15.252540771950095 ], "wc_strength_and_weaknesses_avg": [ 302.0, 108.79338215167317 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 104.8, 161.7385544636776 ], "wc_summary_review_avg": [ 74.2, 48.626741614054296 ], "wc_review_avg": [ 531.4, 247.760045205033 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.6123724356957948, "corr_recommendation_correctness": 0.2500000000000001, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=839024245800338381&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Michigan State University;Sun Yat-sen University", "aff_unique_dep": ";", "aff_unique_url": "https://www.msu.edu;http://www.sysu.edu.cn", "aff_unique_abbr": "MSU;SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Using Both Demonstrations and Language Instructions to Efficiently Learn Robotic Tasks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10957", "id": "4u42KCQxCn8", "poster": "/media/PosterPDFs/ICLR%202023/10957.png?t=1682108161.9038146", "openreview": "https://openreview.net/forum?id=4u42KCQxCn8", "slides": "https://iclr.cc/virtual/2023/poster/10957", "video": "https://iclr.cc/virtual/2023/poster/10957", "author_site": "Albert Yu, Raymond Mooney", "tldr": "Conditioning robotic manipulation policies on both demonstrations and language instructions improves sample efficiency and generalization to novel tasks.", "abstract": "Demonstrations and natural language instructions are two common ways to specify and teach robots novel tasks. However, for many complex tasks, a demonstration or language instruction alone contains ambiguities, preventing tasks from being specified clearly. In such cases, a combination of both a demonstration and an instruction more concisely and effectively conveys the task to the robot than either modality alone. To instantiate this problem setting, we train a single multi-task policy on a few hundred challenging robotic pick-and-place tasks and propose DeL-TaCo (Joint Demo-Language Task Conditioning), a method for conditioning a robotic policy on task embeddings comprised of two components: a visual demonstration and a language instruction. By allowing these two modalities to mutually disambiguate and clarify each other during novel task specification, DeL-TaCo (1) substantially decreases the teacher effort needed to specify a new task and (2) achieves better generalization performance on novel objects and instructions over previous task-conditioning methods. To our knowledge, this is the first work to show that simultaneously conditioning a multi-task robotic manipulation policy on both demonstration and language embeddings improves sample efficiency and generalization over conditioning on either modality alone.", "keywords": "natural language for robotics;instruction following;learning from demonstrations;multi-task learning;robotic manipulation", "primary_area": "", "supplementary_material": "", "author": "Albert Yu;Ray Mooney", "authorids": "~Albert_Yu1;~Ray_Mooney1", "gender": ";M", "homepage": ";https://www.cs.utexas.edu/~mooney/", "dblp": "61/5253.html;m/RaymondJMooney.html", "google_scholar": "ZzURcb4AAAAJ;p9RsPG4AAAAJ", "orcid": ";0000-0002-4504-0490", "linkedin": "alberty101/;", "or_profile": "~Albert_Yu1;~Ray_Mooney1", "aff": "University of Texas at Austin;University of Texas at Austin", "aff_domain": "utexas.edu;cs.utexas.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nyu2023using,\ntitle={Using Both Demonstrations and Language Instructions to Efficiently Learn Robotic Tasks},\nauthor={Albert Yu and Ray Mooney},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4u42KCQxCn8}\n}", "github": "", "project": "", "reviewers": "5Uyd;Ch3U;p2aG;4Ugt", "pdf_size": 1683038, "recommendation": "6;6;6;8", "confidence": "5;4;3;2", "correctness": "4;3;4;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "88;67;78;44", "wc_strength_and_weaknesses": "205;489;440;114", "wc_clarity_quality_novelty_and_reproducibility": "183;207;55;15", "wc_summary_review": "76;57;38;48", "wc_review": "552;820;611;221", "wc_reply_reviewers": "0;353;12;0", "wc_reply_authors": "792;2132;1140;307", "reply_reviewers": "0;4;1;0", "reply_authors": "2;7;3;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 69.25, 16.361158271956175 ], "wc_strength_and_weaknesses_avg": [ 312.0, 156.81677206217452 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 115.0, 81.68231142664855 ], "wc_summary_review_avg": [ 54.75, 13.988834833537782 ], "wc_review_avg": [ 551.0, 214.97790584150735 ], "wc_reply_reviewers_avg": [ 91.25, 151.2008184501658 ], "wc_reply_authors_avg": [ 1092.75, 668.9780919432264 ], "reply_reviewers_avg": [ 1.25, 1.6393596310755 ], "reply_authors_avg": [ 3.25, 2.277608394786075 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.7745966692414834, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=679048483369398188&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=4u42KCQxCn8", "email": "utexas.edu;cs.utexas.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "4vGwQqviud5", "title": "DPM-Solver++: Fast Solver for Guided Sampling of Diffusion Probabilistic Models", "track": "main", "status": "Reject", "tldr": "We propose a fast ODE solver for guided sampling of diffusion probabilistic models in around 15 to 20 steps.", "abstract": "Diffusion probabilistic models (DPMs) have achieved impressive success in high-resolution image synthesis, especially in recent large-scale text-to-image generation applications. An essential technique for improving the sample quality of DPMs is guided sampling, which usually needs a large guidance scale to obtain the best sample quality. The commonly-used fast sampler for guided sampling is DDIM, a first-order diffusion ODE solver that generally needs 100 to 250 steps for high-quality samples. Although recent works propose dedicated high-order solvers and achieve a further speedup for sampling without guidance, their effectiveness for guided sampling has not been well-tested before. In this work, we demonstrate that previous high-order fast samplers suffer from instability issues, and they even become slower than DDIM when the guidance scale grows large. To further speed up guided sampling, we propose DPM-Solver++, a high-order solver for the guided sampling of DPMs. DPM-Solver++ solves the diffusion ODE with the data prediction model and adopts thresholding methods to keep the solution matches training data distribution. We further propose a multistep variant of DPM-Solver++ to address the instability issue by reducing the effective step size. Experiments show that DPM-Solver++ can generate high-quality samples within only 15 to 20 steps for guided sampling by pixel-space and latent-space DPMs.\n", "keywords": "diffusion probabilistic models;score-based generative models;fast sampling;guided sampling", "primary_area": "", "supplementary_material": "", "author": "Cheng Lu;Yuhao Zhou;Fan Bao;Jianfei Chen;Chongxuan Li;Jun Zhu", "authorids": "~Cheng_Lu5;~Yuhao_Zhou2;~Fan_Bao1;~Jianfei_Chen1;~Chongxuan_Li1;~Jun_Zhu2", "gender": "M;M;M;M;M;M", "homepage": "https://luchengthu.github.io/;https://yuhaoz.com;https://baofff.github.io/;http://ml.cs.tsinghua.edu.cn/~jianfei;http://ml.cs.tsinghua.edu.cn/~chongxuan;http://ml.cs.tsinghua.edu.cn/~jun", "dblp": "91/1482-11;;71/3877;48/6809-1;161/9965;50/2644-1", "google_scholar": "vPE9VRoAAAAJ;GKLRbxoAAAAJ;;di5RZ1MAAAAJ;UKMcQn4AAAAJ;axsP38wAAAAJ", "orcid": ";;;;0000-0002-0912-9076;", "linkedin": ";;;;;", "or_profile": "~Cheng_Lu5;~Yuhao_Zhou2;~Fan_Bao1;~Jianfei_Chen1;~Chongxuan_Li1;~Jun_Zhu2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Renmin University of China;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;ruc.edu.cn;mail.tsinghua.edu.cn", "position": "PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor;Professor", "bibtex": "@misc{\nlu2023dpmsolver,\ntitle={{DPM}-Solver++: Fast Solver for Guided Sampling of Diffusion Probabilistic Models},\nauthor={Cheng Lu and Yuhao Zhou and Fan Bao and Jianfei Chen and Chongxuan Li and Jun Zhu},\nyear={2023},\nurl={https://openreview.net/forum?id=4vGwQqviud5}\n}", "github": "", "project": "", "reviewers": "s8Si;zAj6;a1rL;8HFn", "site": "https://openreview.net/forum?id=4vGwQqviud5", "pdf_size": 11282134, "recommendation": "5;5;5;6", "confidence": "3;4;4;4", "correctness": "4;1;4;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "97;26;118;203", "wc_strength_and_weaknesses": "127;188;171;404", "wc_clarity_quality_novelty_and_reproducibility": "28;8;2;119", "wc_summary_review": "49;41;30;116", "wc_review": "301;263;321;842", "wc_reply_reviewers": "0;197;130;93", "wc_reply_authors": "655;655;531;803", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;3", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 1.224744871391589 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 111.0, 63.11497445139307 ], "wc_strength_and_weaknesses_avg": [ 222.5, 107.1272607696099 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.25, 47.03921236585494 ], "wc_summary_review_avg": [ 59.0, 33.59315406448165 ], "wc_review_avg": [ 431.75, 237.77234385016268 ], "wc_reply_reviewers_avg": [ 105.0, 71.16530053333577 ], "wc_reply_authors_avg": [ 661.0, 96.35351576356723 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 543, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9927433163087067144&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Tsinghua University;Renmin University of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ruc.edu.cn", "aff_unique_abbr": "THU;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "4vYWYGd13cZ", "title": "Targeted Attacks on Timeseries Forecasting", "track": "main", "status": "Reject", "tldr": "", "abstract": "Real-world deep learning models built for Time Series Forecasting are used in several critical applications from medical devices to the security domain. Many previous works have shown how deep learning models are prone to adversarial attacks and studied their vulnerabilities. However, the vulnerabilities of time series models for forecasting due to adversarial inputs are not extensively studied. While attack on a forecasting model might be intended to deteriorate the performance of the model, it is more effective, if the attack is focused on a specific impact on the model's output. In this paper, we propose a novel formulation of Directional, Amplitudinal, and Temporal targeted adversarial attacks on time series forecasting models. These targeted attacks create a specific impact or hide a potential high-impact area on the forecasting output. We use the existing adversarial attack techniques from the computer vision domain and adapt them for time series. Additionally, we propose a modified version of the Auto Projected Gradient Descent attack for targeted attacks. We explore the impact of the proposed targeted attacks against untargeted attacks. We use KS-Tests to statistically prove the impact of the attack. Our experimental results demonstrate how targeted attacks on time series models are practical and are more powerful in terms of statistical similarity. It is, hence difficult to detect through statistical methods. We believe that this work opens a new paradigm in the time series forecasting domain and is an important consideration for developing better defenses.", "keywords": "timeseries;forecasting;targeted attacks;adversarial ml;ai security;apgd", "primary_area": "", "supplementary_material": "", "author": "Yuvaraj Govindarajulu;Avinash Amballa;Pavan Kulkarni;Manojkumar Parmar", "authorids": "~Yuvaraj_Govindarajulu1;~Avinash_Amballa2;~Pavan_Kulkarni1;~Manojkumar_Parmar1", "gender": "M;M;M;M", "homepage": "https://yuvaraj-rajulu.github.io/;https://amballaavinash.github.io/;;", "dblp": "338/8780;;;232/7365", "google_scholar": "Mjctn_sAAAAJ;;;Pf4mIdQAAAAJ", "orcid": "0000-0002-4247-4410;;;0000-0002-1183-4399", "linkedin": "yuvaraj-govindarajulu/;https://in.linkedin.com/in/avinashamballa;pavan-kulkarni-b12b4724/;", "or_profile": "~Yuvaraj_Govindarajulu1;~Avinash_Amballa2;~Pavan_Kulkarni1;~Manojkumar_Parmar1", "aff": "Bosch;Bosch;Bosch;Bosch", "aff_domain": "bosch.com;bosch.com;bosch.com;bosch.com", "position": "Researcher;Researcher;Researcher;Principal Researcher", "bibtex": "@misc{\ngovindarajulu2023targeted,\ntitle={Targeted Attacks on Timeseries Forecasting},\nauthor={Yuvaraj Govindarajulu and Avinash Amballa and Pavan Kulkarni and Manojkumar Parmar},\nyear={2023},\nurl={https://openreview.net/forum?id=4vYWYGd13cZ}\n}", "github": "", "project": "", "reviewers": "6F4s;zM1v;oRvB;X1J3", "site": "https://openreview.net/forum?id=4vYWYGd13cZ", "pdf_size": 551878, "recommendation": "3;3;5;5", "confidence": "5;5;3;3", "correctness": "2;2;4;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "93;22;33;67", "wc_strength_and_weaknesses": "36;75;35;300", "wc_clarity_quality_novelty_and_reproducibility": "111;133;48;60", "wc_summary_review": "1635;13;21;22", "wc_review": "1875;243;137;449", "wc_reply_reviewers": "0;0;0;100", "wc_reply_authors": "786;223;181;382", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 1.0 ], "correctness_avg": [ 3.0, 1.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 53.75, 28.083580612165537 ], "wc_strength_and_weaknesses_avg": [ 111.5, 110.01931648578808 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 88.0, 35.13545218152173 ], "wc_summary_review_avg": [ 422.75, 699.9015555776398 ], "wc_review_avg": [ 676.0, 701.2738409494539 ], "wc_reply_reviewers_avg": [ 25.0, 43.30127018922193 ], "wc_reply_authors_avg": [ 393.0, 238.96338631681633 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2554604979842187579&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Robert Bosch GmbH", "aff_unique_dep": "", "aff_unique_url": "https://www.bosch.com", "aff_unique_abbr": "Bosch", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "4vfv4GDG6G", "title": "Agent Prioritization with Interpretable Relation for Trajectory Prediction", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this paper, we present a novel multi-agent trajectory prediction model, which discovers interpretable relations among agents and prioritize agent's motion. Different from existing approaches, our interpretable design is inspired by the fundamental navigation and motion functions of agent movements, which represent 'where' and 'how' the agents move in the scenes. Specifically, it generates the relation matrix, where each element indicates the motion impact from one to another. In addition, in highly interactive scenarios, one agent may implicitly gain higher priority to move, while the motion of other agents may be impacted by the prioritized agents with higher priority (e.g., a vehicle stopping or reducing its speed due to crossing pedestrians). Based on this intuition, we design a novel motion prioritization module to learn the agent motion priorities based on the inferred relation matrix. Then, a decoder is proposed to sequentially predict and iteratively update the future trajectories of each agent based on their priority orders and the learned relation structures. We first demonstrate the effectiveness of our prediction model on simulated Charged Particles dataset. Next, extensive evaluations are performed on commonly-used datasets for robot navigation, human-robot interactions, and autonomous agents: real-world NBA basketball and INTERACTION. Finally, we show that the proposed model outperforms other state-of-the-art relation based methods, and is capable to infer interpretable, meaningful relations among agents.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Manh Huynh;Hengbo Ma;Gita Alaghband;Chiho Choi", "authorids": "~Manh_Huynh1;~Hengbo_Ma1;~Gita_Alaghband1;~Chiho_Choi2", "gender": "M;;F;M", "homepage": "https://scholar.google.com/citations?user=ZyumSGEAAAAJ&hl=en;;http://cse.ucdenver.edu/~gita/;https://chihochoi.github.io/index.html", "dblp": ";;;176/1540", "google_scholar": "ZyumSGEAAAAJ;;;iSFDVj4AAAAJ", "orcid": ";;;0000-0002-0196-2039", "linkedin": ";;;chihochoi/", "or_profile": "~Manh_Huynh1;~Hengbo_Ma1;~Gita_Alaghband1;~Chiho_Choi2", "aff": ";;University of Colorado - Denver;Samsung", "aff_domain": ";;;samsung.com", "position": ";;Full Professor;Sr Staff Engineer", "bibtex": "@misc{\nhuynh2023agent,\ntitle={Agent Prioritization with Interpretable Relation for Trajectory Prediction},\nauthor={Manh Huynh and Hengbo Ma and Gita Alaghband and Chiho Choi},\nyear={2023},\nurl={https://openreview.net/forum?id=4vfv4GDG6G}\n}", "github": "", "project": "", "reviewers": "H2is;8XRE;fWdF", "site": "https://openreview.net/forum?id=4vfv4GDG6G", "pdf_size": 3608642, "recommendation": "3;5;5", "confidence": "5;3;4", "correctness": "3;3;3", "technical_novelty": "3;3;2", "empirical_novelty": "3;2;3", "wc_summary_paper": "37;77;139", "wc_strength_and_weaknesses": "310;368;83", "wc_clarity_quality_novelty_and_reproducibility": "20;112;280", "wc_summary_review": "22;93;39", "wc_review": "389;650;541", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 84.33333333333333, 41.96294661828324 ], "wc_strength_and_weaknesses_avg": [ 253.66666666666666, 122.98057660550394 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 137.33333333333334, 107.64550875701018 ], "wc_summary_review_avg": [ 51.333333333333336, 30.26916289265731 ], "wc_review_avg": [ 526.6666666666666, 107.03374338133848 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8660254037844385, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:n8LHIn6GyEYJ:scholar.google.com/&scioq=Agent+Prioritization+with+Interpretable+Relation+for+Trajectory+Prediction&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Colorado Denver;Samsung", "aff_unique_dep": ";Samsung", "aff_unique_url": "https://www.ucdenver.edu;https://www.samsung.com", "aff_unique_abbr": "UC Denver;Samsung", "aff_campus_unique_index": "0", "aff_campus_unique": "Denver;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;South Korea" }, { "id": "4wXotzMJ7Wo", "title": "Towards Representative Subset Selection for Self-Supervised Speech Recognition", "track": "main", "status": "Withdraw", "tldr": "A new data subset selection method for self-supervised speech recognition that performs better than existing dataset pruning strategies.", "abstract": "Self-supervised speech recognition models require considerable labeled training data for learning high-fidelity representations for Automatic Speech Recognition (ASR) which is computationally demanding and time-consuming, thereby hindering the usage of these models in resource-constrained environments. We consider the task of identifying an optimal subset of data to train self-supervised speech models for ASR. We make a surprising observation that the dataset pruning strategies used in vision tasks for sampling the most informative examples do not perform better than random subset selection on the task of fine-tuning self-supervised ASR. We then present the COWERAGE algorithm for better subset selection in self-supervised ASR, which is based on our finding that ensuring the coverage of examples based on training Word Error Rate (WER) in the early training epochs leads to better generalization performance. Extensive experiments on the wav2vec 2.0 model and TIMIT, Librispeech, and LJSpeech datasets show the effectiveness of COWERAGE, with up to 17% absolute WER improvement over existing dataset pruning methods and random sampling. We also demonstrate that the coverage of training instances in terms of WER ensures inclusion of phonemically diverse examples which leads to better test accuracy in self-supervised speech recognition models.", "keywords": "subset selection;self-supervised speech recognition;active learning;data pruning", "primary_area": "", "supplementary_material": "/attachment/4994e724ea2e5fd6f41f5172da14a063125bd2da.zip", "author": "Abdul Hameed Azeemi;Ihsan Ayyub Qazi;Agha Ali Raza", "authorids": "~Abdul_Hameed_Azeemi1;~Ihsan_Ayyub_Qazi1;~Agha_Ali_Raza1", "gender": "M;;M", "homepage": "https://abdulhameed.me;http://web.lums.edu.pk/~ihsan;https://aghaaliraza.com/", "dblp": "286/8780;98/776.html;48/9280", "google_scholar": "eElezeAAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-0506-8365;0000-0002-2262-0353;0000-0003-0124-9783", "linkedin": "https://linkedin.com/in/ahazeemi;;agha-ali-raza", "or_profile": "~Abdul_Hameed_Azeemi1;~Ihsan_Ayyub_Qazi1;~Agha_Ali_Raza1", "aff": "Lahore University of Management Sciences;Lahore University of Management Sciences;Lahore University of Management Sciences", "aff_domain": "lums.edu.pk;lums.edu.pk;lums.edu.pk", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@misc{\nazeemi2023towards,\ntitle={Towards Representative Subset Selection for Self-Supervised Speech Recognition},\nauthor={Abdul Hameed Azeemi and Ihsan Ayyub Qazi and Agha Ali Raza},\nyear={2023},\nurl={https://openreview.net/forum?id=4wXotzMJ7Wo}\n}", "github": "", "project": "", "reviewers": "jPnE;xFQf;LH8X;ECxK", "site": "https://openreview.net/forum?id=4wXotzMJ7Wo", "pdf_size": 2721184, "recommendation": "3;3;3;5", "confidence": "4;4;4;4", "correctness": "2;3;3;3", "technical_novelty": "2;2;1;2", "empirical_novelty": "1;2;1;2", "wc_summary_paper": "199;178;106;53", "wc_strength_and_weaknesses": "311;466;273;93", "wc_clarity_quality_novelty_and_reproducibility": "53;536;34;21", "wc_summary_review": "62;162;56;171", "wc_review": "625;1342;469;338", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "757;452;523;321", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 0.5 ], "wc_summary_paper_avg": [ 134.0, 58.10765870347901 ], "wc_strength_and_weaknesses_avg": [ 285.75, 132.70526553230658 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 161.0, 216.80521211446924 ], "wc_summary_review_avg": [ 112.75, 53.88587477252271 ], "wc_review_avg": [ 693.5, 387.9513500427599 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 513.25, 158.28830500071697 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7456456632922394172&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Lahore University of Management Sciences", "aff_unique_dep": "", "aff_unique_url": "https://lums.edu.pk", "aff_unique_abbr": "LUMS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Pakistan" }, { "title": "Dataset Pruning: Reducing Training Data by Examining Generalization Influence", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12019", "id": "4wZiAXD29TQ", "poster": "", "openreview": "https://openreview.net/forum?id=4wZiAXD29TQ", "slides": "https://iclr.cc/virtual/2023/poster/12019", "video": "https://iclr.cc/virtual/2023/poster/12019", "author_site": "Shuo Yang, Zeke Xie, Hanyu Peng, Min Xu, Mingming Sun, Ping Li", "tldr": "", "abstract": "The great success of deep learning heavily relies on increasingly larger training data, which comes at a price of huge computational and infrastructural costs. This poses crucial questions that, do all training data contribute to model's performance? How much does each individual training sample or a sub-training-set affect the model's generalization, and how to construct the smallest subset from the entire training data as a proxy training set without significantly sacrificing the model's performance? To answer these, we propose dataset pruning, an optimization-based sample selection method that can (1) examine the influence of removing a particular set of training samples on model's generalization ability with theoretical guarantee, and (2) construct the smallest subset of training data that yields strictly constrained generalization gap. The empirically observed generalization gap of dataset pruning is substantially consistent with our theoretical expectations. Furthermore, the proposed method prunes 40% training examples on the CIFAR-10 dataset, halves the convergence time with only 1.3% test accuracy decrease, which is superior to previous score-based sample selection methods.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Shuo Yang;Zeke Xie;Hanyu Peng;Min Xu;Mingming Sun;Ping Li", "authorids": "~Shuo_Yang5;~Zeke_Xie1;~Hanyu_Peng1;~Min_Xu5;~Mingming_Sun1;~Ping_Li3", "gender": "M;M;;F;M;M", "homepage": "https://faculty.hitsz.edu.cn/yangshuo;https://sites.google.com/view/zeke-xie;;https://www.uts.edu.au/staff/min.xu;;http://www.stat.rutgers.edu/home/pingli/", "dblp": "78/1102-6;210/1039;;09/0-1.html;87/8665-1.html;62/5860-1", "google_scholar": "mVtxxCkAAAAJ;https://scholar.google.co.jp/citations?user=ysXmZCMAAAAJ;;https://scholar.google.com.au/citations?user=Ac6VCMkAAAAJ;;", "orcid": ";;;0000-0001-9581-8849;;", "linkedin": ";;;;;", "or_profile": "~Shuo_Yang5;~Zeke_Xie1;~Hanyu_Peng1;~Min_Xu5;~Mingming_Sun1;~Ping_Li3", "aff": "University of Technology Sydney, Australia;Baidu;;University of Technology Sydney;Baidu;LinkedIn", "aff_domain": "student.uts.edu.au;baidu.com;;uts.edu.au;baidu.com;linkedin.com", "position": "PhD student;Researcher;;Associate Professor;Principal Researcher;Engineer", "bibtex": "@inproceedings{\nyang2023dataset,\ntitle={Dataset Pruning: Reducing Training Data by Examining Generalization Influence},\nauthor={Shuo Yang and Zeke Xie and Hanyu Peng and Min Xu and Mingming Sun and Ping Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4wZiAXD29TQ}\n}", "github": "", "project": "", "reviewers": "HgbD;W84Z;F3Cs;63py;gH14", "pdf_size": 1212489, "recommendation": "5;6;6;8;8", "confidence": "5;5;3;4;4", "correctness": "4;4;3;3;4", "technical_novelty": "2;3;2;3;3", "empirical_novelty": "2;3;3;3;3", "wc_summary_paper": "156;64;94;103;57", "wc_strength_and_weaknesses": "280;283;273;549;58", "wc_clarity_quality_novelty_and_reproducibility": "63;46;10;39;16", "wc_summary_review": "63;45;35;65;22", "wc_review": "562;438;412;756;153", "wc_reply_reviewers": "0;16;56;0;0", "wc_reply_authors": "474;330;948;719;279", "reply_reviewers": "0;1;1;0;0", "reply_authors": "2;1;3;1;1", "recommendation_avg": [ 6.6, 1.2 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "correctness_avg": [ 3.6, 0.4898979485566356 ], "technical_novelty_avg": [ 2.6, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_paper_avg": [ 94.8, 35.18749777975126 ], "wc_strength_and_weaknesses_avg": [ 288.6, 155.7775336818503 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.8, 19.528440797974632 ], "wc_summary_review_avg": [ 46.0, 16.419500601419035 ], "wc_review_avg": [ 464.2, 197.46837721518855 ], "wc_reply_reviewers_avg": [ 14.4, 21.70345594600086 ], "wc_reply_authors_avg": [ 550.0, 250.83141748991494 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.3563483225498991, "corr_recommendation_correctness": -0.2721655269759087, "gs_citation": 135, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6841068022855587838&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=4wZiAXD29TQ", "email": "student.uts.edu.au;baidu.com;;uts.edu.au;baidu.com;linkedin.com", "author_num": 6, "aff_unique_index": "0;1;0;1;2", "aff_unique_norm": "University of Technology Sydney;Baidu;LinkedIn Corporation", "aff_unique_dep": ";Baidu, Inc.;", "aff_unique_url": "https://www.uts.edu.au;https://www.baidu.com;https://www.linkedin.com", "aff_unique_abbr": "UTS;Baidu;LinkedIn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;2", "aff_country_unique": "Australia;China;United States" }, { "id": "4xzk3zGtz1h", "title": "Hybrid Federated Learning for Feature & Sample Heterogeneity: Algorithms and Implementation", "track": "main", "status": "Reject", "tldr": "In this paper, we proposed the first hybrid federated learning model and algorithm, which deals with partially overlapped features and samples in clients' datasets", "abstract": " Federated learning (FL) is a popular distributed machine learning paradigm dealing with distributed and private data sets. Based on the data partition pattern, FL is often categorized into horizontal, vertical, and hybrid settings. All three settings have many applications, but the hybrid FL remains relatively less explored, because it deals with the challenging situation where both the feature space and the data samples are heterogeneous.\n This work designs a novel mathematical model that effectively allows the clients to aggregate distributed data with heterogeneous, and possibly overlapping features and samples. Our main idea is to partition each client's model into a feature extractor part and a classifier part, where the former can be used to process the input data, while the latter is used to perform the learning from the extracted features. The heterogeneous feature aggregation is done through building a server model, which assimilates local classifiers and feature extractors through a carefully designed matching mechanism. A communication-efficient algorithm is then designed to train both the client and server models. Finally, we conducted numerical experiments on multiple image classification data sets to validate the performance of the proposed algorithm. To our knowledge, this is the first formulation and algorithm developed for hybrid FL.", "keywords": "Federated Learning;Model Ensemble;Model Design;Algorithm Design", "primary_area": "", "supplementary_material": "", "author": "Xinwei Zhang;Wotao Yin;Mingyi Hong;Tianyi Chen", "authorids": "~Xinwei_Zhang1;~Wotao_Yin1;~Mingyi_Hong1;~Tianyi_Chen5", "gender": "M;M;M;M", "homepage": "https://564612540.github.io/;http://wotaoyin.com;http://people.ece.umn.edu/~mhong/mingyi.html;https://chentianyi1991.github.io/", "dblp": "55/9870-1.html;76/2265;57/8053;", "google_scholar": "uq46meMAAAAJ;kpQGGFUAAAAJ;qRnP-p0AAAAJ;kFwvv38AAAAJ", "orcid": "0000-0001-7967-7150;0000-0001-6697-9731;;", "linkedin": ";;;", "or_profile": "~Xinwei_Zhang1;~Wotao_Yin1;~Mingyi_Hong1;~Tianyi_Chen5", "aff": "University of Minnesota - Twin Cities;Alibaba Group US;University of Minnesota, Minneapolis;Rensselaer Polytechnic Institute", "aff_domain": "umn.edu;alibaba-inc.com;umn.edu;rpi.edu", "position": "PhD student;Principal Researcher;Associate Professor;Assistant Professor", "bibtex": "@misc{\nzhang2023hybrid,\ntitle={Hybrid Federated Learning for Feature \\& Sample Heterogeneity: Algorithms and Implementation},\nauthor={Xinwei Zhang and Wotao Yin and Mingyi Hong and Tianyi Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=4xzk3zGtz1h}\n}", "github": "", "project": "", "reviewers": "FpoP;Ss8B;AHQz;3J31", "site": "https://openreview.net/forum?id=4xzk3zGtz1h", "pdf_size": 4187118, "recommendation": "3;5;5;5", "confidence": "4;3;3;3", "correctness": "4;2;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "81;115;73;24", "wc_strength_and_weaknesses": "248;309;211;52", "wc_clarity_quality_novelty_and_reproducibility": "26;98;61;51", "wc_summary_review": "77;47;58;38", "wc_review": "432;569;403;165", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "828;695;839;484", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;2;1", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 73.25, 32.51441987795569 ], "wc_strength_and_weaknesses_avg": [ 205.0, 95.01315698365148 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.0, 25.874698065871222 ], "wc_summary_review_avg": [ 55.0, 14.543039572248986 ], "wc_review_avg": [ 392.25, 145.41212982416562 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 711.5, 143.05331174076326 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": -0.816496580927726, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12407751391553249245&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Minnesota;Alibaba Group;Rensselaer Polytechnic Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.minnesota.edu;https://www.alibaba.com;https://www.rpi.edu", "aff_unique_abbr": "UMN;Alibaba;RPI", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Twin Cities;;Minneapolis", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Weakly Supervised Knowledge Transfer with Probabilistic Logical Reasoning for Object Detection", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11578", "id": "4yqxDCbzS98", "poster": "/media/PosterPDFs/ICLR%202023/11578.png?t=1681385969.7574651", "openreview": "https://openreview.net/forum?id=4yqxDCbzS98", "slides": "https://iclr.cc/virtual/2023/poster/11578", "video": "https://iclr.cc/virtual/2023/poster/11578", "author_site": "Martijn Oldenhof, Adam Arany, Yves Moreau, Edward De Brouwer", "tldr": "In this work, we propose ProbKT, a framework based on probabilistic logical reasoning to train object detection models with weak supervision, by transferring knowledge from a source domain where instance-level annotations are available.", "abstract": "Training object detection models usually requires instance-level annotations, such as the positions and labels of all objects present in each image. Such supervision is unfortunately not always available and, more often, only image-level information is provided, also known as weak supervision. \nRecent works have addressed this limitation by leveraging knowledge from a richly annotated domain. However, the scope of weak supervision supported by these approaches has been very restrictive, preventing them to use all available information. In this work, we propose ProbKT, a framework based on probabilistic logical reasoning to train object detection models with arbitrary types of weak supervision. We empirically show on different datasets that using all available information is beneficial as our ProbKT leads to significant improvement on target domain and better generalisation compared to existing baselines. We also showcase the ability of our approach to handle complex logic statements as supervision signal.", "keywords": "weak supervision;knowledge transfer;object detection;probabilistic logical reasoning", "primary_area": "", "supplementary_material": "", "author": "Martijn Oldenhof;Adam Arany;Yves Moreau;Edward De Brouwer", "authorids": "~Martijn_Oldenhof1;~Adam_Arany1;~Yves_Moreau2;~Edward_De_Brouwer1", "gender": ";;M;M", "homepage": "https://www.esat.kuleuven.be/stadius/person.php?id=2314;;;https://edwarddebrouwer.xyz", "dblp": "259/3090;178/0111;;", "google_scholar": "BOvJXqIAAAAJ;QH9zWmAAAAAJ;zWftTEUAAAAJ;-Pm4XtAAAAAJ", "orcid": "0000-0003-4916-3014;0000-0002-4901-7650;;", "linkedin": ";;;edwarddebrouwer/", "or_profile": "~Martijn_Oldenhof1;~Adam_Arany1;~Yves_Moreau2;~Edward_De_Brouwer1", "aff": "KU Leuven;KU Leuven;University of Leuven;Yale University", "aff_domain": "kuleuven.be;kuleuven.be;kuleuven.be;yale.edu", "position": "PhD student;Researcher;Professor;Postdoc", "bibtex": "@inproceedings{\noldenhof2023weakly,\ntitle={Weakly Supervised Knowledge Transfer with Probabilistic Logical Reasoning for Object Detection},\nauthor={Martijn Oldenhof and Adam Arany and Yves Moreau and Edward De Brouwer},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=4yqxDCbzS98}\n}", "github": "", "project": "", "reviewers": "Mz4p;ijtT;tNrs;q7Gr", "pdf_size": 1483717, "recommendation": "6;6;8;8", "confidence": "4;3;3;3", "correctness": "4;3;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;0", "wc_summary_paper": "103;89;71;95", "wc_strength_and_weaknesses": "145;205;115;87", "wc_clarity_quality_novelty_and_reproducibility": "34;74;126;15", "wc_summary_review": "37;47;16;18", "wc_review": "319;415;328;215", "wc_reply_reviewers": "0;22;32;0", "wc_reply_authors": "372;597;549;210", "reply_reviewers": "0;1;1;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 89.5, 11.779218989389747 ], "wc_strength_and_weaknesses_avg": [ 138.0, 43.78355855797927 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.25, 42.522788008313846 ], "wc_summary_review_avg": [ 29.5, 13.009611831257688 ], "wc_review_avg": [ 319.25, 70.90971372104107 ], "wc_reply_reviewers_avg": [ 13.5, 13.955285736952863 ], "wc_reply_authors_avg": [ 432.0, 153.132295744562 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10881183977490104131&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=4yqxDCbzS98", "email": "kuleuven.be;kuleuven.be;kuleuven.be;yale.edu", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Katholieke Universiteit Leuven;University of Leuven;Yale University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kuleuven.be;https://www.kuleuven.be;https://www.yale.edu", "aff_unique_abbr": "KU Leuven;KU Leuven;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Belgium;United States" }, { "id": "4zGai1tFQE", "title": "Deep Dependency Networks for Action Classification in Video", "track": "main", "status": "Reject", "tldr": "A new approach that jointly learns a conditional dependency network and a deep neural network for activity classification in video", "abstract": "We propose a simple approach which combines the strengths of probabilistic graphical models and deep learning architectures for solving the multi-label action classification task in videos. At a high level, given a video clip, the goal in this task is to infer the set of activities, defined as verb-noun pairs, that are performed in the clip. First, we show that the performance of previous approaches that combine Markov Random Fields with neural networks can be modestly improved by leveraging more powerful methods such as iterative join graph propagation, $\\ell$-1 regularization based structure learning and integer linear programming. Then we propose a new modeling framework called deep dependency network which augments a dependency network, a model that is easy to train and learns more accurate dependencies but is limited to Gibbs sampling for inference, to the output layer of a neural network. We show that despite its simplicity, joint learning this new architecture yields significant improvements in performance over the baseline neural network. In particular, our experimental evaluation on three video datasets: Charades, Textually Annotated Cooking Scenes (TaCOS), and Wetlab shows that deep dependency networks are almost always superior to pure neural architectures that do not use dependency networks.", "keywords": "probabilistic graphical models;action classification;multi-label classification;combining probabilistic models with deep learning;end-to-end learning", "primary_area": "", "supplementary_material": "/attachment/4e4f3882d3be29f10cfa423dcee32525d930bdc7.zip", "author": "Shivvrat Arya;Yu Xiang;Vibhav Giridhar Gogate", "authorids": "~Shivvrat_Arya1;~Yu_Xiang3;~Vibhav_Giridhar_Gogate1", "gender": "M;M;M", "homepage": "https://shivvrat.github.io;http://www.hlt.utdallas.edu/~vgogate/;https://yuxng.github.io/", "dblp": "275/7819;14/4229;00/6716-1", "google_scholar": "eM1co-kAAAAJ;https://scholar.google.com.tw/citations?user=pm_dg3cAAAAJ;", "orcid": "0000-0002-9727-2533;;0000-0001-9431-5131", "linkedin": "shivvrat/;;", "or_profile": "~Shivvrat_Arya1;~Vibhav_Gogate1;~Yu_Xiang1", "aff": "The University of Texas at Dallas;University of Texas, Dallas;University of Texas, Dallas", "aff_domain": "cs.utdallas.edu;utdallas.edu;utdallas.edu", "position": "PhD student;Professor;Assistant Professor", "bibtex": "@misc{\narya2023deep,\ntitle={Deep Dependency Networks for Action Classification in Video},\nauthor={Shivvrat Arya and Yu Xiang and Vibhav Giridhar Gogate},\nyear={2023},\nurl={https://openreview.net/forum?id=4zGai1tFQE}\n}", "github": "", "project": "", "reviewers": "KzJi;apzy;rXD3", "site": "https://openreview.net/forum?id=4zGai1tFQE", "pdf_size": 325171, "recommendation": "3;5;6", "confidence": "4;4;3", "correctness": "3;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "51;64;79", "wc_strength_and_weaknesses": "243;104;49", "wc_clarity_quality_novelty_and_reproducibility": "55;76;12", "wc_summary_review": "25;37;15", "wc_review": "374;281;155", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "980;762;211", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 64.66666666666667, 11.440668201153676 ], "wc_strength_and_weaknesses_avg": [ 132.0, 81.63740972536223 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.666666666666664, 26.637484032009397 ], "wc_summary_review_avg": [ 25.666666666666668, 8.993825042154695 ], "wc_review_avg": [ 270.0, 89.74408058473828 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 651.0, 323.60572718458906 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.7559289460184545, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:L0eURkSaDHUJ:scholar.google.com/&scioq=Deep+Dependency+Networks+for+Action+Classification+in+Video&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Dallas", "aff_unique_dep": "", "aff_unique_url": "https://www.utdallas.edu", "aff_unique_abbr": "UT Dallas", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Dallas", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "5-3YJbVPp6m", "title": "Transfer Learning with Pre-trained Conditional Generative Models", "track": "main", "status": "Reject", "tldr": "We propose a novel transfer learning method using conditional generative models pre-trained on source dataset for an inductive transfer learning setting where NN architectures are not consistent.", "abstract": "Transfer learning is crucial in training deep neural networks on new target tasks. Current transfer learning methods always assume at least one of (i) source and target task label spaces overlap, (ii) source datasets are available, and (iii) target network architectures are consistent with source ones. However, holding these assumptions is difficult in practical settings because the target task rarely has the same labels as the source task, the source dataset access is restricted due to storage costs and privacy, and the target architecture is often specialized to each task. To transfer source knowledge without these assumptions, we propose a transfer learning method that uses deep generative models and is composed of the following two stages: pseudo pre-training (PP) and pseudo semi-supervised learning (P-SSL). PP trains a target architecture with an artificial dataset synthesized by using conditional source generative models. P-SSL applies SSL algorithms to labeled target data and unlabeled pseudo samples, which are generated by cascading the source classifier and generative models to condition them with target samples. Our experimental results indicate that our method can outperform the baselines of scratch training and knowledge distillation.", "keywords": "Deep Learning;Transfer Learning;Deep Generative Models;Semi-supervised Learning", "primary_area": "", "supplementary_material": "", "author": "Shin'ya Yamaguchi;Sekitoshi Kanai;Atsutoshi Kumagai;Daiki Chijiwa;Hisashi Kashima", "authorids": "~Shin'ya_Yamaguchi1;~Sekitoshi_Kanai1;~Atsutoshi_Kumagai2;~Daiki_Chijiwa1;~Hisashi_Kashima2", "gender": "M;M;M;M;M", "homepage": "https://yshinya6.github.io/;https://sekitoshi.github.io/;https://scholar.google.co.jp/citations?user=Q_d8GEIAAAAJ&hl=ja;;https://hkashima.github.io/index_e.html", "dblp": "https://dblp.uni-trier.de/pers/y/Yamaguchi:Shin=ya;209/4874;178/8630;295/8488;27/4448", "google_scholar": "_xJYVD0AAAAJ;qa2i5_IAAAAJ;https://scholar.google.co.jp/citations?user=Q_d8GEIAAAAJ;;bkTB0t8AAAAJ", "orcid": "0000-0001-9113-7405;0000-0003-4383-4454;0000-0002-2915-4615;;0000-0002-2770-0184", "linkedin": "shin-ya-yamaguchi-32183a154/;;;daiki-chijiwa-81491a1a7/;", "or_profile": "~Shin'ya_Yamaguchi1;~Sekitoshi_Kanai1;~Atsutoshi_Kumagai2;~Daiki_Chijiwa1;~Hisashi_Kashima2", "aff": "NTT;NTT;NTT;NTT;Kyoto University", "aff_domain": "ntt.co.jp;ntt.co.jp;ntt.co.jp;ntt.co.jp;kyoto-u.ac.jp", "position": "Researcher;Researcher;Researcher;Researcher;Full Professor", "bibtex": "@misc{\nyamaguchi2023transfer,\ntitle={Transfer Learning with Pre-trained Conditional Generative Models},\nauthor={Shin'ya Yamaguchi and Sekitoshi Kanai and Atsutoshi Kumagai and Daiki Chijiwa and Hisashi Kashima},\nyear={2023},\nurl={https://openreview.net/forum?id=5-3YJbVPp6m}\n}", "github": "", "project": "", "reviewers": "FAEQ;kCqh;MbsB;JCMy", "site": "https://openreview.net/forum?id=5-3YJbVPp6m", "pdf_size": 21937043, "recommendation": "1;5;6;8", "confidence": "4;3;4;4", "correctness": "1;3;3;4", "technical_novelty": "1;2;4;3", "empirical_novelty": "1;2;3;4", "wc_summary_paper": "75;173;27;126", "wc_strength_and_weaknesses": "368;180;377;104", "wc_clarity_quality_novelty_and_reproducibility": "8;81;38;72", "wc_summary_review": "23;43;15;28", "wc_review": "474;477;457;330", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "875;732;1502;552", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;3;2", "recommendation_avg": [ 5.0, 2.5495097567963922 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 1.0897247358851685 ], "technical_novelty_avg": [ 2.5, 1.118033988749895 ], "empirical_novelty_avg": [ 2.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 100.25, 54.67803489519352 ], "wc_strength_and_weaknesses_avg": [ 257.25, 118.38364540763222 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.75, 28.951468011138918 ], "wc_summary_review_avg": [ 27.25, 10.207227831296802 ], "wc_review_avg": [ 434.5, 60.81323869027204 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 915.25, 357.5705349997396 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9898267954648646, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3292502770218622571&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "NTT Corporation;Kyoto University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntt.co.jp;https://www.kyoto-u.ac.jp", "aff_unique_abbr": "NTT;Kyoto U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Japan" }, { "title": "Defending against Adversarial Audio via Diffusion Model", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11208", "id": "5-Df3tljit7", "poster": "/media/PosterPDFs/ICLR%202023/11208.png?t=1681907373.808215", "openreview": "https://openreview.net/forum?id=5-Df3tljit7", "slides": "https://iclr.cc/virtual/2023/poster/11208", "video": "https://iclr.cc/virtual/2023/poster/11208", "author_site": "Shutong Wu, Jiongxiao Wang, Wei Ping, Weili Nie, Chaowei Xiao", "tldr": "We propose a defense method based based on diffusion models for acoustic systems against diverse audio adversarial examples.", "abstract": "Deep learning models have been widely used in commercial acoustic systems in recent years. However, adversarial audio examples can cause abnormal behaviors for those acoustic systems, while being hard for humans to perceive. Various methods, such as transformation-based defenses and adversarial training, have been proposed to protect acoustic systems from adversarial attacks, but they are less effective against adaptive attacks. Furthermore, directly applying the methods from the image domain can lead to suboptimal results because of the unique properties of audio data. In this paper, we propose an adversarial purification-based defense pipeline, AudioPure, for acoustic systems via off-the-shelf diffusion models. Taking advantage of the strong generation ability of diffusion models, AudioPure first adds a small amount of noise to the adversarial audio and then runs the reverse sampling step to purify the noisy audio and recover clean audio. AudioPure is a plug-and-play method that can be directly applied to any pretrained classifier without any fine-tuning or re-training. We conduct extensive experiments on the speech command recognition task to evaluate the robustness of AudioPure. Our method is effective against diverse adversarial attacks (e.g. L2 or L\u221e-norm). It outperforms the existing methods under both strong adaptive white-box and black-box attacks bounded by L2 or L\u221e-norm (up to +20% in robust accuracy). Besides, we also evaluate the certified robustness for perturbations bounded by L2-norm via randomized smoothing. Our pipeline achieves a higher certified accuracy than baselines.", "keywords": "Adversarial attack and defense;AI security;speech recognition;diffusion models", "primary_area": "", "supplementary_material": "", "author": "Shutong Wu;Jiongxiao Wang;Wei Ping;Weili Nie;Chaowei Xiao", "authorids": "~Shutong_Wu1;~Jiongxiao_Wang1;~Wei_Ping1;~Weili_Nie1;~Chaowei_Xiao2", "gender": "M;;M;M;M", "homepage": "https://cychomatica.github.io/;https://jayfeather1024.github.io/jxwang.github.io/;https://wpingnet.github.io/;https://weilinie.github.io/;https://xiaocw11.github.io/", "dblp": "288/0663;322/5991;08/8399.html;147/4786;150/3317", "google_scholar": ";sIGapHMAAAAJ;6gKEYRgAAAAJ;zW7BH7oAAAAJ;Juoqtj8AAAAJ", "orcid": ";;;;0000-0002-7043-4926", "linkedin": ";;wei-ping/;;", "or_profile": "~Shutong_Wu1;~Jiongxiao_Wang1;~Wei_Ping1;~Weili_Nie1;~chaowei_xiao1", "aff": "Shanghai Jiaotong University;Arizona State University;NVIDIA;NVIDIA;Arizona State University", "aff_domain": "sjtu.edu.cn;asu.edu;nvidia.com;nvidia.com;asu.edu", "position": "MS student;PhD student;Principal Researcher;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nwu2023defending,\ntitle={Defending against Adversarial Audio via Diffusion Model},\nauthor={Shutong Wu and Jiongxiao Wang and Wei Ping and Weili Nie and Chaowei Xiao},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5-Df3tljit7}\n}", "github": "", "project": "", "reviewers": "phaA;NJPC;fScq;6825", "pdf_size": 1575588, "recommendation": "6;6;8;8", "confidence": "4;4;3;4", "correctness": "3;3;3;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "77;58;41;122", "wc_strength_and_weaknesses": "115;81;82;148", "wc_clarity_quality_novelty_and_reproducibility": "38;25;26;44", "wc_summary_review": "25;74;33;58", "wc_review": "255;238;182;372", "wc_reply_reviewers": "0;67;29;0", "wc_reply_authors": "848;1108;420;1188", "reply_reviewers": "0;1;1;0", "reply_authors": "4;7;3;3", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 74.5, 30.236567265481707 ], "wc_strength_and_weaknesses_avg": [ 106.5, 27.59075932264279 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.25, 8.042853971072706 ], "wc_summary_review_avg": [ 47.5, 19.551214796017153 ], "wc_review_avg": [ 261.75, 69.14613148976593 ], "wc_reply_reviewers_avg": [ 24.0, 27.504545078950134 ], "wc_reply_authors_avg": [ 891.0, 299.57803657811763 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 4.25, 1.6393596310755 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9683917527299489547&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=5-Df3tljit7", "email": "sjtu.edu.cn;asu.edu;nvidia.com;nvidia.com;asu.edu", "author_num": 5, "aff_unique_index": "0;1;2;2;1", "aff_unique_norm": "Shanghai Jiao Tong University;Arizona State University;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.asu.edu;https://www.nvidia.com", "aff_unique_abbr": "SJTU;ASU;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "China;United States" }, { "id": "5-X1XzdAWcC", "title": "Efficient Exploration using Model-Based Quality-Diversity with Gradients", "track": "main", "status": "Reject", "tldr": "", "abstract": "Exploration is a key challenge in Reinforcement Learning, especially in long-horizon, deceptive and sparse-reward environments. For such applications, population-based approaches have proven effective. Methods such as Quality-Diversity deals with this by encouraging novel solutions and producing a diversity of behaviours. However, these methods are driven by either undirected sampling (i.e. mutations) or use approximated gradients (i.e. Evolution Strategies) in the parameter space, which makes them highly sample-inefficient. In this paper, we propose a model-based Quality-Diversity approach, relying on gradients and learning in imagination. Our approach optimizes all members of a population simultaneously to maintain both performance and diversity efficiently by leveraging the effectiveness of QD algorithms as good data generators to train deep models. We demonstrate that it maintains the divergent search capabilities of population-based approaches while significantly improving their sample efficiency (5 times faster) and quality of solutions (2 times more performant).", "keywords": "Quality-Diversity;Exploration;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/aadf58338a3af10f1c3aea64f69777f8b928249f.zip", "author": "Bryan Lim;Manon Flageat;Antoine Cully", "authorids": "~Bryan_Lim2;~Manon_Flageat1;~Antoine_Cully1", "gender": "M;F;M", "homepage": ";;", "dblp": ";;https://dblp.org/pers/c/Cully:Antoine.html", "google_scholar": "OpxLH5cAAAAJ;;rZtJlPQAAAAJ", "orcid": ";0000-0002-4601-2176;", "linkedin": ";;", "or_profile": "~Bryan_Lim2;~Manon_Flageat1;~Antoine_Cully1", "aff": "Imperial College London;Imperial College London;Imperial College London", "aff_domain": "imperial.ac.uk;ic.ac.uk;imperial.ac.uk", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nlim2023efficient,\ntitle={Efficient Exploration using Model-Based Quality-Diversity with Gradients},\nauthor={Bryan Lim and Manon Flageat and Antoine Cully},\nyear={2023},\nurl={https://openreview.net/forum?id=5-X1XzdAWcC}\n}", "github": "", "project": "", "reviewers": "TxSP;QkBZ;FZRr;tyZo", "site": "https://openreview.net/forum?id=5-X1XzdAWcC", "pdf_size": 1958612, "recommendation": "3;5;6;6", "confidence": "4;3;4;3", "correctness": "3;3;3;3", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "196;85;95;126", "wc_strength_and_weaknesses": "673;135;95;480", "wc_clarity_quality_novelty_and_reproducibility": "329;1;11;114", "wc_summary_review": "71;2;53;71", "wc_review": "1269;223;254;791", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 125.5, 43.41946568072896 ], "wc_strength_and_weaknesses_avg": [ 345.75, 241.04291630330064 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 113.75, 131.91166551901313 ], "wc_summary_review_avg": [ 49.25, 28.252212302756046 ], "wc_review_avg": [ 634.25, 430.46334048325184 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.40824829046386296, "corr_recommendation_correctness": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1663155836754182022&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Characterizing the Influence of Graph Elements", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11701", "id": "51GXyzOKOp", "poster": "", "openreview": "https://openreview.net/forum?id=51GXyzOKOp", "slides": "https://iclr.cc/virtual/2023/poster/11701", "video": "https://iclr.cc/virtual/2023/poster/11701", "author_site": "Zizhang chen, Peizhao Li, Hongfu Liu, Pengyu Hong", "tldr": "Use influence functions to model the influence of elements in graphs, and understand the model behavior of graph convolution networks. ", "abstract": "Influence function, a method from the robust statistics, measures the changes of model parameters or some functions about model parameters with respect to the removal or modification of training instances. It is an efficient and useful post-hoc method for studying the interpretability of machine learning models without the need of expensive model re-training. Recently, graph convolution networks (GCNs), which operate on graph data, have attracted a great deal of attention. However, there is no preceding research on the influence functions of GCNs to shed light on the effects of removing training nodes/edges from an input graph. Since the nodes/edges in a graph are interdependent in GCNs, it is challenging to derive influence functions for GCNs. To fill this gap, we started with the simple graph convolution (SGC) model that operates on an attributed graph, and formulated an influence function to approximate the changes of model parameters when a node or an edge is removed from an attributed graph. Moreover, we theoretically analyzed the error bound of the estimated influence of removing an edge. We experimentally validated the accuracy and effectiveness of our influence estimation function. In addition, we showed that the influence function of a SGC model could be used to estimate the impact of removing training nodes/edges on the test performance of the SGC without re-training the model. Finally, we demonstrated how to use influence functions to effectively guide the adversarial attacks on GCNs.", "keywords": "Interpretable Machine Learning;Influence functions;Graph Neural Networks", "primary_area": "", "supplementary_material": "/attachment/57ba4acff68cbaa2b74abacb443318cfdba8250f.zip", "author": "Zizhang Chen;Peizhao Li;Hongfu Liu;Pengyu Hong", "authorids": "~Zizhang_Chen1;~Peizhao_Li1;~Hongfu_Liu2;~Pengyu_Hong1", "gender": ";M;M;M", "homepage": ";https://peizhaoli.com;http://hongfuliu.com/;http://www.cs.brandeis.edu/~hong/", "dblp": ";232/1771;32/9075-1;89/4734", "google_scholar": ";h8UyqB4AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=pvDa8pcAAAAJ", "orcid": ";;;0000-0002-3177-2754", "linkedin": "%E6%A2%93%E5%BD%B0-%E9%99%88-7043b3148;peizhao-li-099037182/;;", "or_profile": "~Zizhang_Chen1;~Peizhao_Li1;~Hongfu_Liu2;~Pengyu_Hong1", "aff": "Brandeis University;Brandeis University;Brandeis University;Brandeis University", "aff_domain": "brandeis.edu;brandeis.edu;brandeis.edu;brandeis.edu", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023characterizing,\ntitle={Characterizing the Influence of Graph Elements},\nauthor={Zizhang Chen and Peizhao Li and Hongfu Liu and Pengyu Hong},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=51GXyzOKOp}\n}", "github": "", "project": "", "reviewers": "Jtg1;pmjX;Lyjj;9bmE", "pdf_size": 2061384, "recommendation": "6;6;6;8", "confidence": "3;3;4;4", "correctness": "4;4;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "63;123;93;80", "wc_strength_and_weaknesses": "25;240;214;192", "wc_clarity_quality_novelty_and_reproducibility": "14;178;65;109", "wc_summary_review": "128;71;47;17", "wc_review": "230;612;419;398", "wc_reply_reviewers": "40;221;0;0", "wc_reply_authors": "366;1268;596;244", "reply_reviewers": "1;2;0;0", "reply_authors": "2;3;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 89.75, 21.947380253688593 ], "wc_strength_and_weaknesses_avg": [ 167.75, 84.14979203777035 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 91.5, 60.201744160779924 ], "wc_summary_review_avg": [ 65.75, 40.71470864441989 ], "wc_review_avg": [ 414.75, 135.40564057675 ], "wc_reply_reviewers_avg": [ 65.25, 91.39303857515625 ], "wc_reply_authors_avg": [ 618.5, 395.71549123075783 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7519786356299317776&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=51GXyzOKOp", "email": "brandeis.edu;brandeis.edu;brandeis.edu;brandeis.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Brandeis University", "aff_unique_dep": "", "aff_unique_url": "https://www.brandeis.edu", "aff_unique_abbr": "Brandeis", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Discovering Generalizable Multi-agent Coordination Skills from Multi-task Offline Data", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11262", "id": "53FyUAdP7d", "poster": "/media/PosterPDFs/ICLR%202023/11262.png?t=1682660899.7876818", "openreview": "https://openreview.net/forum?id=53FyUAdP7d", "slides": "https://iclr.cc/virtual/2023/poster/11262", "video": "https://iclr.cc/virtual/2023/poster/11262", "author_site": "Fuxiang Zhang, Chengxing Jia, Yi-Chen Li, Lei Yuan, Yang Yu, Zongzhang Zhang", "tldr": "We propose a novel multi-agent reinforcement learning algorithm to discover coordination skills from multi-task offline data and realize multi-task generalization.", "abstract": "Cooperative multi-agent reinforcement learning (MARL) faces the challenge of adapting to multiple tasks with varying agents and targets. Previous multi-task MARL approaches require costly interactions to simultaneously learn or fine-tune policies in different tasks. However, the situation that an agent should generalize to multiple tasks with only offline data from limited tasks is more in line with the needs of real-world applications. Since offline multi-task data contains a variety of behaviors, an effective data-driven approach is to extract informative latent variables that can represent universal skills for realizing coordination across tasks. In this paper, we propose a novel Offline MARL algorithm to Discover coordInation Skills (ODIS) from multi-task data. ODIS first extracts task-invariant coordination skills from offline multi-task data and learns to delineate different agent behaviors with the discovered coordination skills. Then we train a coordination policy to choose optimal coordination skills with the centralized training and decentralized execution paradigm. We further demonstrate that the discovered coordination skills can assign effective coordinative behaviors, thus significantly enhancing generalization to unseen tasks. Empirical results in cooperative MARL benchmarks, including the StarCraft multi-agent challenge, show that ODIS obtains superior performance in a wide range of tasks only with offline data from limited sources.", "keywords": "multi-agent reinforcement learning;multi-task reinforcement learning;skill discovery;offline reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Fuxiang Zhang;Chengxing Jia;Yi-Chen Li;Lei Yuan;Yang Yu;Zongzhang Zhang", "authorids": "~Fuxiang_Zhang1;~Chengxing_Jia1;~Yi-Chen_Li1;~Lei_Yuan2;~Yang_Yu5;~Zongzhang_Zhang1", "gender": "M;M;M;M;M;M", "homepage": "http://www.lamda.nju.edu.cn/zhangfx/;http://www.lamda.nju.edu.cn/jiacx/;http://www.lamda.nju.edu.cn/liyc/;http://www.lamda.nju.edu.cn/yuanl/;http://www.lamda.nju.edu.cn/zhangzz;http://www.lamda.nju.edu.cn/yuy", "dblp": "12/3884;;143/7158-1;23/6750-1;90/8724;46/2181-1", "google_scholar": ";;https://scholar.google.com.hk/citations?user=OA3GmbQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;sG7WEAgAAAAJ;PG2lDSwAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Fuxiang_Zhang1;~Chengxing_Jia1;~Yi-Chen_Li1;~Lei_Yuan2;~Zongzhang_Zhang1;~Yang_Yu2", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "MS student;PhD student;PhD student;PhD student;Associate Professor;Professor", "bibtex": "@inproceedings{\nzhang2023discovering,\ntitle={Discovering Generalizable Multi-agent Coordination Skills from Multi-task Offline Data},\nauthor={Fuxiang Zhang and Chengxing Jia and Yi-Chen Li and Lei Yuan and Yang Yu and Zongzhang Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=53FyUAdP7d}\n}", "github": "", "project": "", "reviewers": "PuN1;e7iL;1rgQ;E6Nd", "pdf_size": 3798755, "recommendation": "5;6;8;8", "confidence": "2;3;4;3", "correctness": "3;3;4;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;4", "wc_summary_paper": "74;100;75;54", "wc_strength_and_weaknesses": "97;142;201;353", "wc_clarity_quality_novelty_and_reproducibility": "167;51;79;32", "wc_summary_review": "32;33;81;35", "wc_review": "370;326;436;474", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "885;694;413;1250", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;2", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 75.75, 16.315253599009733 ], "wc_strength_and_weaknesses_avg": [ 198.25, 96.65757859578316 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 82.25, 51.70771219073611 ], "wc_summary_review_avg": [ 45.25, 20.668514702319566 ], "wc_review_avg": [ 401.5, 57.31273854912187 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 810.5, 304.25688159842827 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.816496580927726, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10631821989067535850&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=53FyUAdP7d", "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "53T6FlFulCV", "title": "SoundCount: Sound Counting from Raw Audio with Dyadic Decomposition Neural Network", "track": "main", "status": "Reject", "tldr": "A novel and general framework for sound crowd counting from sound raw waveform", "abstract": "In this paper, we study an underexplored, yet important and challenging problem: counting the number of distinct sound events in data characterized by a high degree of polyphonicity and spectral overlap. A key example is counting individual bird calls in bioacoustic data, from which biodiversity can be estimated. We do so by systematically proposing a novel end-to-end trainable neural network, designing new evaluation protocols, quantifying the difficulty of counting depending on sound polyphonicity, and creating a new dataset tailored for concurrent sound event counting. Unlike existing methods that all apply frequency-selective filters on the raw waveform in a one-stage manner, our neural network progressively decomposes the raw waveform dyadically in frequency domain. Taking inspiration from wavelet decomposition, intermediate waveforms convolved by a parent filter are successively processed by a pair of children filters that evenly split the parent filter's carried frequency response. An energy gain normalization module is introduced to normalize received sound events' loudness variance and spectrum overlap. The network is fully convolutional and parameter-frugal so it is light-weight and computationally efficient. We further design a set of polyphony-aware metrics to quantify sound counting difficulty level from different perspectives. To show the efficiency and generalization of our method (we call DyDecNet), we do experiments on both bioacoustic bird sound (both synthetic and real-world sound), telephone-ring sound and music sound data. Comprehensive experiment results show our method outperforms existing sound event detection (SED) methods significantly. The dyadic decomposition front-end network can be used by existing methods to improve their performance accordingly.", "keywords": "Sound Crowd Count;Dyadic Decomposition Network;Learnable Filters;Acoustic Crowd Counting", "primary_area": "", "supplementary_material": "/attachment/6b0946acc9eae8847fa8e4b8855c424aeef1a36e.zip", "author": "Yuhang He;Zhuangzhuang Dai;Niki Trigoni;Andrew Markham", "authorids": "~Yuhang_He3;~Zhuangzhuang_Dai1;~Niki_Trigoni1;~Andrew_Markham2", "gender": "M;M;F;M", "homepage": "https://yuhanghe01.github.io/;;https://www.cs.ox.ac.uk/people/niki.trigoni/;", "dblp": ";;t/NikiTrigoni;83/7169", "google_scholar": "H1p3ve8AAAAJ;;;https://scholar.google.co.uk/citations?user=g3JTO9EAAAAJ", "orcid": ";my-orcid?orcid=0000-0002-6098-115X;;", "linkedin": ";;;", "or_profile": "~Yuhang_He3;~Zhuangzhuang_Dai1;~Niki_Trigoni1;~Andrew_Markham2", "aff": "University of Oxford;;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;;ox.ac.uk;ox.ac.uk", "position": "PhD student;;Full Professor;Associate Professor", "bibtex": "@misc{\nhe2023soundcount,\ntitle={SoundCount: Sound Counting from Raw Audio with Dyadic Decomposition Neural Network},\nauthor={Yuhang He and Zhuangzhuang Dai and Niki Trigoni and Andrew Markham},\nyear={2023},\nurl={https://openreview.net/forum?id=53T6FlFulCV}\n}", "github": "", "project": "", "reviewers": "3j8s;xW8d;6EcX;5ud3;6cJE", "site": "https://openreview.net/forum?id=53T6FlFulCV", "pdf_size": 3201380, "recommendation": "3;5;6;6;8", "confidence": "4;3;3;2;3", "correctness": "3;4;4;3;3", "technical_novelty": "3;3;3;3;3", "empirical_novelty": "4;3;3;3;3", "wc_summary_paper": "209;79;62;19;330", "wc_strength_and_weaknesses": "357;87;283;132;414", "wc_clarity_quality_novelty_and_reproducibility": "115;638;58;16;159", "wc_summary_review": "144;59;30;29;74", "wc_review": "825;863;433;196;977", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 5.6, 1.624807680927192 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_paper_avg": [ 139.8, 114.28630714131943 ], "wc_strength_and_weaknesses_avg": [ 254.6, 126.34967352549828 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 197.2, 225.71256057206918 ], "wc_summary_review_avg": [ 67.2, 42.08277557386157 ], "wc_review_avg": [ 658.8, 295.2086719593447 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5838742081211422, "corr_recommendation_correctness": -0.05025189076296065, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14473564379204926345&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "53yQBJNQVJu", "title": "Worst-case Few-shot Evaluation: Are Neural Networks Robust Few-shot Learners?", "track": "main", "status": "Reject", "tldr": "", "abstract": "Neural networks have achieved remarkable performance on various few-shot tasks. However, recent studies reveal that existing few-shot models often exploit the spurious correlations between training and test sets, achieving a high performance that is hard to generalize. Motivated that a robust few-shot learner should accurately classify data given any valid training set, we consider a worst-case few-shot evaluation that computes worst-case generalization errors by constructing a challenging few-shot set. Specifically, we search for the label-balanced subset of a full-size training set that results in the largest expected risks. Since the search space is enormous, we propose an efficient method NMMD-attack to optimize the target by maximizing NMMD distance (maximum mean discrepancy based on neural tangent kernel). Experiments show that NMMD-attack can successfully attack various architectures. The large gap between average performance and worst-case performance shows that neural networks still suffer from poor robustness. We appeal to more worst-case benchmarks for better robust few-shot evaluation.", "keywords": "Distributional Robustness;few-shot evaluation", "primary_area": "", "supplementary_material": "/attachment/65d854a7518dcb4f351043fe6efc931442c34782.zip", "author": "Yudong Wang;Ma Chang;Qingxiu Dong;Lingpeng Kong;Zhifang Sui;Jingjing Xu", "authorids": "~Yudong_Wang1;~Ma_Chang1;~Qingxiu_Dong1;~Lingpeng_Kong1;~Zhifang_Sui1;~Jingjing_Xu1", "gender": "M;F;F;M;F;F", "homepage": ";https://github.com/chang-github-00;https://dqxiu.github.io/;https://ikekonglp.github.io/;http://eecs.pku.edu.cn/EN/People/Faculty/Detail/?ID=6024;", "dblp": "18/776-5;;284/0673;144/7656;;25/624", "google_scholar": "https://scholar.google.com/citations?hl=en;8OOpuiIAAAAJ;ibcR7VkAAAAJ;f1hBi5wAAAAJ;;", "orcid": "0009-0005-0204-495X;;;;;", "linkedin": ";;qingxiu-dong-a3758a199/;;;", "or_profile": "~Yudong_Wang1;~Ma_Chang1;~Qingxiu_Dong1;~Lingpeng_Kong1;~Zhifang_Sui1;~Jingjing_Xu1", "aff": "University of Electronic Science and Technology of China;University of Hong Kong;Peking University;Department of Computer Science, The University of Hong Kong;Peking University;", "aff_domain": "uestc.edu.cn;hku.hk;pku.edu.cn;cs.hku.hk;pku.edu.cn;", "position": "Undergrad student;PhD student;PhD student;Assistant Professor;Full Professor;", "bibtex": "@misc{\nwang2023worstcase,\ntitle={Worst-case Few-shot Evaluation: Are Neural Networks Robust Few-shot Learners?},\nauthor={Yudong Wang and Ma Chang and Qingxiu Dong and Lingpeng Kong and Zhifang Sui and Jingjing Xu},\nyear={2023},\nurl={https://openreview.net/forum?id=53yQBJNQVJu}\n}", "github": "", "project": "", "reviewers": "Hiwt;di2U;X5Fg", "site": "https://openreview.net/forum?id=53yQBJNQVJu", "pdf_size": 2045248, "recommendation": "1;5;5", "confidence": "4;3;4", "correctness": "3;2;3", "technical_novelty": "2;2;3", "empirical_novelty": "1;3;3", "wc_summary_paper": "82;93;84", "wc_strength_and_weaknesses": "426;1268;389", "wc_clarity_quality_novelty_and_reproducibility": "63;84;32", "wc_summary_review": "79;146;50", "wc_review": "650;1591;555", "wc_reply_reviewers": "765;1122;0", "wc_reply_authors": "2110;1725;735", "reply_reviewers": "3;2;0", "reply_authors": "3;4;1", "recommendation_avg": [ 3.6666666666666665, 1.8856180831641267 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_paper_avg": [ 86.33333333333333, 4.784233364802441 ], "wc_strength_and_weaknesses_avg": [ 694.3333333333334, 405.92473303420286 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.666666666666664, 21.359359124801056 ], "wc_summary_review_avg": [ 91.66666666666667, 40.202266381663385 ], "wc_review_avg": [ 932.0, 467.5945537179263 ], "wc_reply_reviewers_avg": [ 629.0, 468.04059652983096 ], "wc_reply_authors_avg": [ 1523.3333333333333, 579.1708632941483 ], "reply_reviewers_avg": [ 1.6666666666666667, 1.247219128924647 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0v9Lqrf1plMJ:scholar.google.com/&scioq=Worst-case+Few-shot+Evaluation:+Are+Neural+Networks+Robust+Few-shot+Learners%3F&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;2;1;2", "aff_unique_norm": "University of Electronic Science and Technology of China;University of Hong Kong;Peking University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uestc.edu.cn;https://www.hku.hk;http://www.pku.edu.cn", "aff_unique_abbr": "UESTC;HKU;Peking U", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "54F8woU8vhq", "title": "Context and History Aware Other-Shaping", "track": "main", "status": "Reject", "tldr": "A scalable shaping algorithm which can be used in complex environments.", "abstract": "Cooperation failures, in which self-interested agents converge to collectively worst-case outcomes, are a common failure mode of Multi-Agent Reinforcement Learning (MARL) methods. Methods such as Model-Free Opponent Shaping (MFOS) and The Good Shepherd address this issue by shaping their co-player\u2019s learning into mutual cooperation. However, these methods fail to capture important co-player learning dynamics or do not scale to co-players parameterised by deep neural networks. To address these issues, we propose Context and History Aware Other-Shaping (CHAOS). A CHAOS agent is a meta-learner parameterised by a recurrent neural network that learns to shape its co-player over multiple trials. CHAOS considers both the context (inter-episode information), and history (intra-episode information) to shape co-players successfully. CHAOS also successfully scales to shaping co-players parameterised by deep neural networks. In a set of experiments, we show that CHAOS achieves state-of-the-art shaping in matrix games. We provide extensive ablations, motivating the importance of both context and history. CHAOS also successfully shapes on a complex grid-worldbased game, demonstrating CHAOS\u2019s scalability empirically. Finally, we provide empirical evidence that, counterintuitively, the widely-used Coin Game environment does not require history to learn shaping because states are often indicative of past actions. This suggests that the Coin Game is, in contrast to common understanding, unsuitable for investigating shaping in high-dimensional, multi-step environments.", "keywords": "Shaping;Multi-Agent;Reinforcement Learning;Meta Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/b9e72cfe641c57a9bd0eab69f62a916badb331e8.zip", "author": "Akbir Khan;Newton Kwan;Timon Willi;Chris Lu;Andrea Tacchetti;Jakob Nicolaus Foerster", "authorids": "~Akbir_Khan1;~Newton_Kwan1;~Timon_Willi1;~Chris_Lu1;~Andrea_Tacchetti1;~Jakob_Nicolaus_Foerster1", "gender": "M;M;;;M;M", "homepage": "https://akbir.dev;https://newtonkwan.com/;https://www.timonwilli.com;;http://web.mit.edu/~atacchet/www/;https://www.jakobfoerster.com", "dblp": ";;243/3437;77/9579;127/6624;176/5095", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;Dn-udzAAAAAJ;4WLoIRsAAAAJ;https://scholar.google.co.uk/citations?user=HKybSogAAAAJ;6z4lQzMAAAAJ", "orcid": ";0009-0009-0167-7287;0000-0003-4405-5700;;0000-0001-9311-9171;", "linkedin": ";newtonkwan/;;;andreatacchetti/;", "or_profile": "~Akbir_Khan1;~Newton_Kwan1;~Timon_Willi1;~Chris_Lu1;~Andrea_Tacchetti1;~Jakob_Nicolaus_Foerster1", "aff": ";GSK;University of Oxford, University of Oxford;University of Oxford;Google DeepMind;University of Oxford, University of Oxford", "aff_domain": ";gsk.com;eng.ox.ac.uk;ox.ac.uk;google.com;eng.ox.ac.uk", "position": ";Researcher;PhD student;PhD student;Research Scientist;Associate Professor", "bibtex": "@misc{\nkhan2023context,\ntitle={Context and History Aware Other-Shaping},\nauthor={Akbir Khan and Newton Kwan and Timon Willi and Chris Lu and Andrea Tacchetti and Jakob Nicolaus Foerster},\nyear={2023},\nurl={https://openreview.net/forum?id=54F8woU8vhq}\n}", "github": "", "project": "", "reviewers": "PvhL;Ae1h;Hqnc;zKsT", "site": "https://openreview.net/forum?id=54F8woU8vhq", "pdf_size": 5872559, "recommendation": "3;3;5;5", "confidence": "4;3;4;3", "correctness": "2;2;2;2", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "22;73;100;46", "wc_strength_and_weaknesses": "72;201;487;157", "wc_clarity_quality_novelty_and_reproducibility": "6;62;62;37", "wc_summary_review": "1;71;39;52", "wc_review": "101;407;688;292", "wc_reply_reviewers": "0;167;70;0", "wc_reply_authors": "436;1027;1248;723", "reply_reviewers": "0;1;1;0", "reply_authors": "1;3;3;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 60.25, 29.192250684042847 ], "wc_strength_and_weaknesses_avg": [ 229.25, 155.8691358159145 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.75, 23.025800746119558 ], "wc_summary_review_avg": [ 40.75, 25.616157010761782 ], "wc_review_avg": [ 372.0, 212.67463412452366 ], "wc_reply_reviewers_avg": [ 59.25, 68.45938576995853 ], "wc_reply_authors_avg": [ 858.5, 306.9890063178159 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=482495346818023750&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "GlaxoSmithKline;University of Oxford;Google", "aff_unique_dep": ";;Google DeepMind", "aff_unique_url": "https://www.gsk.com;https://www.ox.ac.uk;https://deepmind.com", "aff_unique_abbr": "GSK;Oxford;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "55Eet8WGJTv", "title": "Lightweight Uncertainty for Offline Reinforcement Learning via Bayesian Posterior", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Offline Reinforcement Learning (RL) aims to learn optimal policies from fixed datasets. Directly applying off-policy RL algorithms to offline datasets typically suffers from the distributional shift issue and fails to obtain a reliable value estimation for out-of-distribution (OOD) actions. To this end, several methods penalize the value function with uncertainty quantification and achieve tremendous success from both theoretical and empirical perspectives. However, such uncertainty-based methods typically require estimating the lower confidence bound (LCB) of the $Q$-function based on a large number of ensemble networks, which is computationally expensive. In this paper, we propose a lightweight uncertainty quantifier based on approximate Bayesian inference in the last layer of the $Q$-network, which estimates the Bayesian posterior with minimal parameters in addition to the ordinary $Q$-network. We then obtain the uncertainty quantification by the disagreement of the $Q$-posterior. Moreover, to avoid mode collapse in OOD samples and improve diversity in the $Q$-posterior, we introduce a repulsive force for OOD predictions in training. We show that our method recovers the provably efficient LCB-penalty under linear MDP assumptions. We further compare our method with other baselines on the D4RL benchmark. The experimental results show that our proposed method achieves state-of-the-art performance on most tasks with more lightweight uncertainty quantifiers.", "keywords": "Offline reinforcement learning;Uncertainty quantification;Bayesian neural networks", "primary_area": "", "supplementary_material": "", "author": "Xudong Yu;Chenjia Bai;Hongyi Guo;Lingxiao Wang;Changhong Wang;Zhen Wang;Zhaoran Wang", "authorids": "~Xudong_Yu2;~Chenjia_Bai2;~Hongyi_Guo1;~Lingxiao_Wang6;cwang@hit.edu.cn;~Zhen_Wang11;~Zhaoran_Wang1", "gender": ";M;M;M;;M;Not Specified", "homepage": ";https://baichenjia.github.io/;https://gohsyi.github.io/;;;http://iopen.nwpu.edu.cn/info/1015/1351.htm?ivk_sa=1024320u;https://zhaoranwang.github.io/", "dblp": ";247/1943;;140/1229;;;117/2756", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;Rm_1y2kAAAAJ;https://scholar.google.com/citations?hl=en;;;https://scholar.google.co.uk/citations?hl=zh-CN;https://scholar.google.com.tw/citations?user=HSx0BgQAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Xudong_Yu2;~Chenjia_Bai2;~Hongyi_Guo1;~Lingxiao_Wang6;cwang@hit.edu.cn;~Zhen_Wang11;~Zhaoran_Wang1", "aff": "Harbin Institute of Technology;Shanghai AI Laboratory;Northwestern University, Northwestern University;Northwestern University;;Northwestern Polytechnical University;", "aff_domain": "hit.edu.cn;pjlab.org.cn;u.northwestern.edu;northwestern.edu;;nwpu.edu.cn;", "position": "PhD student;Researcher;PhD student;PhD student;;Full Professor;", "bibtex": "@misc{\nyu2023lightweight,\ntitle={Lightweight Uncertainty for Offline Reinforcement Learning via Bayesian Posterior},\nauthor={Xudong Yu and Chenjia Bai and Hongyi Guo and Lingxiao Wang and Changhong Wang and Zhen Wang and Zhaoran Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=55Eet8WGJTv}\n}", "github": "", "project": "", "reviewers": "w9ZG;J9kh;AtQJ", "site": "https://openreview.net/forum?id=55Eet8WGJTv", "pdf_size": 2816907, "recommendation": "3;5;5", "confidence": "4;3;4", "correctness": "2;3;3", "technical_novelty": "3;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "73;57;91", "wc_strength_and_weaknesses": "73;117;96", "wc_clarity_quality_novelty_and_reproducibility": "487;26;177", "wc_summary_review": "62;85;52", "wc_review": "695;285;416", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "2481;739;1468", "reply_reviewers": "0;0;0", "reply_authors": "6;2;4", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 73.66666666666667, 13.888444437333106 ], "wc_strength_and_weaknesses_avg": [ 95.33333333333333, 17.96910929592474 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 230.0, 191.89754210689273 ], "wc_summary_review_avg": [ 66.33333333333333, 13.816254517375137 ], "wc_review_avg": [ 465.3333333333333, 170.97823123297175 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1562.6666666666667, 714.3119455500906 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 4.0, 1.632993161855452 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8WLEcPtuZs4J:scholar.google.com/&scioq=Lightweight+Uncertainty+for+Offline+Reinforcement+Learning+via+Bayesian+Posterior&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;2;3", "aff_unique_norm": "Harbin Institute of Technology;Shanghai AI Laboratory;Northwestern University;Northwestern Polytechnical University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.hit.edu.cn/;https://www.shanghai-ai-lab.com;https://www.northwestern.edu;https://www.nwpu.edu.cn", "aff_unique_abbr": "HIT;SAIL;NU;NWPU", "aff_campus_unique_index": "0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "China;United States" }, { "id": "58QUPAU0RJs", "title": "Neural Prompt Search", "track": "main", "status": "Withdraw", "tldr": "We propose to search, instead of hand-engineering, prompt modules for parameter-efficient transfer learning.", "abstract": "The size of vision models has grown exponentially over the last few years, especially after the emergence of Vision Transformer. This has motivated the development of parameter-efficient tuning methods, such as learning adapter layers or visual prompt tokens, which allow a tiny portion of model parameters to be trained, whereas the vast majority obtained from pre-training are frozen. However, designing a proper tuning method is non-trivial: one might need to try out a lengthy list of design choices, not to mention that each downstream dataset often requires custom designs. In this paper, we view the existing parameter-efficient tuning methods as \"prompt modules\" and propose Neural prOmpt seArcH (NOAH), a novel approach that learns, for large vision models, the optimal design of prompt modules through a neural architecture search algorithm, specifically for each downstream dataset. By conducting extensive experiments on over 20 vision datasets, we demonstrate that NOAH (i) is superior to individual prompt modules, (ii) has a good few-shot learning ability, and (iii) is domain-generalizable. The code and models will be released to facilitate future research.", "keywords": "transfer learning;computer vision;parameter-efficient tuning;prompt learning;neural architecture search", "primary_area": "", "supplementary_material": "", "author": "Yuanhan Zhang;Kaiyang Zhou;Ziwei Liu", "authorids": "~Yuanhan_Zhang1;~Kaiyang_Zhou1;~Ziwei_Liu1", "gender": "M;M;M", "homepage": "https://zhangyuanhan-ai.github.io/;https://kaiyangzhou.github.io/;https://liuziwei7.github.io/", "dblp": "10/2476;203/3155;05/6300-2", "google_scholar": "g6grFy0AAAAJ;https://scholar.google.co.uk/citations?user=gRIejugAAAAJ;https://scholar.google.com.hk/citations?user=lc45xlcAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yuanhan_Zhang1;~Kaiyang_Zhou1;~Ziwei_Liu1", "aff": "Nanyang Technological University;Hong Kong Baptist University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;hkbu.edu.hk;ntu.edu.sg", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nzhang2023neural,\ntitle={Neural Prompt Search},\nauthor={Yuanhan Zhang and Kaiyang Zhou and Ziwei Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=58QUPAU0RJs}\n}", "github": "", "project": "", "reviewers": "GCwp;srxN;ptAg;qnLU", "site": "https://openreview.net/forum?id=58QUPAU0RJs", "pdf_size": 1574992, "recommendation": "5;5;5;5", "confidence": "4;4;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "78;105;59;68", "wc_strength_and_weaknesses": "271;358;231;131", "wc_clarity_quality_novelty_and_reproducibility": "44;76;19;73", "wc_summary_review": "13;50;29;54", "wc_review": "406;589;338;326", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 77.5, 17.240939649566666 ], "wc_strength_and_weaknesses_avg": [ 247.75, 81.55787822154277 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 53.0, 23.27015255644019 ], "wc_summary_review_avg": [ 36.5, 16.560495161679196 ], "wc_review_avg": [ 414.75, 105.1270065206843 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 385, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1425524227227999258&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff_unique_index": "0;1;0", "aff_unique_norm": "Nanyang Technological University;Hong Kong Baptist University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.hkbu.edu.hk", "aff_unique_abbr": "NTU;HKBU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;China" }, { "title": "Avoiding spurious correlations via logit correction", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10843", "id": "5BaqCFVh5qL", "poster": "", "openreview": "https://openreview.net/forum?id=5BaqCFVh5qL", "slides": "https://iclr.cc/virtual/2023/poster/10843", "video": "https://iclr.cc/virtual/2023/poster/10843", "author_site": "Sheng Liu, Xu Zhang, Nitesh Sekhar, Yue Wu, Prateek Singhal, Carlos Fernandez-Granda", "tldr": " We propose the logit correction (LC) loss, a simple yet effective improvement on the softmax cross-entropy loss, to mitigate spurious correlations", "abstract": "Empirical studies suggest that machine learning models trained with empirical risk minimization (ERM) often rely on attributes that may be spuriously correlated with the class labels. Such models typically lead to poor performance during inference for data lacking such correlations. In this work, we explicitly consider a situation where potential spurious correlations are present in the majority of training data. In contrast with existing approaches, which use the ERM model outputs to detect the samples without spurious correlations and either heuristically upweight or upsample those samples, we propose the logit correction (LC) loss, a simple yet effective improvement on the softmax cross-entropy loss, to correct the sample logit. We demonstrate that minimizing the LC loss is equivalent to maximizing the group-balanced accuracy, so the proposed LC could mitigate the negative impacts of spurious correlations. Our extensive experimental results further reveal that the proposed LC loss outperforms state-of-the-art solutions on multiple popular benchmarks by a large margin, an average 5.5% absolute improvement, without access to spurious attribute labels. LC is also competitive with oracle methods that make use of the attribute labels.", "keywords": "spurious correlation;robust learning;empirical risk minimization", "primary_area": "", "supplementary_material": "/attachment/f134bfec58508a3aa91de1294acafef78dbef66c.zip", "author": "Sheng Liu;Xu Zhang;Nitesh Sekhar;Yue Wu;Prateek Singhal;Carlos Fernandez-Granda", "authorids": "~Sheng_Liu2;~Xu_Zhang1;~Nitesh_Sekhar1;~Yue_Wu15;~Prateek_Singhal1;~Carlos_Fernandez-Granda1", "gender": ";M;M;M;M;", "homepage": "https://shengliu66.github.io/;https://xu-zhang-1987.github.io;;https://sites.google.com/view/yue-rex-wu/home;;https://cims.nyu.edu/~cfgranda/", "dblp": ";98/5660-22;335/1885;41/5979-1.html;139/1013;77/11141", "google_scholar": "rzhzR-cAAAAJ;efE70pEAAAAJ;HzNqQNoAAAAJ;fONV3IgAAAAJ;https://scholar.google.co.in/citations?user=sqU8ApgAAAAJ;GX-PtukAAAAJ", "orcid": ";;;;;", "linkedin": ";xu-zhang-22008054;niteshsekhar;yue-rex-wu-56038913/;;", "or_profile": "~Sheng_Liu2;~Xu_Zhang1;~Nitesh_Sekhar1;~Yue_Wu15;~Prateek_Singhal1;~Carlos_Fernandez-Granda1", "aff": "New York University;Amazon;Amazon;Amazon;Amazon;New York University", "aff_domain": "nyu.edu;amazon.com;amazon.com;amazon.com;amazon.com;nyu.edu", "position": "PhD student;Applied Scientist;Researcher;Principal Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nliu2023avoiding,\ntitle={Avoiding spurious correlations via logit correction},\nauthor={Sheng Liu and Xu Zhang and Nitesh Sekhar and Yue Wu and Prateek Singhal and Carlos Fernandez-Granda},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5BaqCFVh5qL}\n}", "github": "", "project": "", "reviewers": "tcpr;aF7f;H482;5Aoq", "pdf_size": 4116312, "recommendation": "6;6;6;6", "confidence": "3;3;3;3", "correctness": "4;3;3;2", "technical_novelty": "3;4;3;3", "empirical_novelty": "3;3;3;2", "wc_summary_paper": "122;172;83;51", "wc_strength_and_weaknesses": "319;598;340;205", "wc_clarity_quality_novelty_and_reproducibility": "41;123;65;53", "wc_summary_review": "29;61;49;29", "wc_review": "511;954;537;338", "wc_reply_reviewers": "52;193;93;0", "wc_reply_authors": "208;1058;810;415", "reply_reviewers": "1;1;1;0", "reply_authors": "2;3;2;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 107.0, 45.17189391646093 ], "wc_strength_and_weaknesses_avg": [ 365.5, 143.7263023945165 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.5, 31.476181471074284 ], "wc_summary_review_avg": [ 42.0, 13.674794331177344 ], "wc_review_avg": [ 585.0, 226.35701888830397 ], "wc_reply_reviewers_avg": [ 84.5, 70.78311945654839 ], "wc_reply_authors_avg": [ 622.75, 331.54288938235425 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1099857187141546737&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=5BaqCFVh5qL", "email": "nyu.edu;amazon.com;amazon.com;amazon.com;amazon.com;nyu.edu", "author_num": 6, "aff_unique_index": "0;1;1;1;1;0", "aff_unique_norm": "New York University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.nyu.edu;https://www.amazon.com", "aff_unique_abbr": "NYU;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "5C5ZcWvtI7S", "title": "Machine Learning Force Fields with Data Cost Aware Training", "track": "main", "status": "Reject", "tldr": "We propose ASTEROID, a computational framework to reduce the data generation cost of training machine learning force fields.", "abstract": "Machine learning force fields (MLFF) have been proposed to accelerate molecular dynamics (MD) simulation, which finds widespread applications in chemistry and biomedical research. Even for the most data-efficient MLFF models, reaching chemical accuracy can require hundreds of frames of force and energy labels generated by expensive quantum mechanical algorithms, which may scale as $O(n^3)$ to $O(n^7)$, with $n$ being the number of basis functions used and typically proportional to the number of atoms.\nTo address this issue, we propose a multi-stage computational framework -- ASTEROID, which enjoys low training data generation cost without significantly sacrificing MLFFs' accuracy. Specifically, ASTEROID leverages a combination of both large cheap inaccurate data and small expensive accurate data. The motivation behind ASTEROID is that inaccurate data, though incurring large bias, can help capture the sophisticated structures of the underlying force field. Therefore, we first train a MLFF model on a large amount of inaccurate training data, employing a bias-aware loss function to prevent the model from overfitting the potential bias of the inaccurate training data. We then fine-tune the obtained model using a small amount of accurate training data, which preserves the knowledge learned from the inaccurate training data while significantly improving the model's accuracy. Moreover, we propose a variant of ASTEROID based on score matching for the setting where the inaccurate training data are unlabelled. Extensive experiments on MD simulation datasets show that ASTEROID can significantly reduce data generation costs while improving the accuracy of MLFFs.", "keywords": "Machine Learning Force Fields;Data-Cost Aware Training;AI for Science", "primary_area": "", "supplementary_material": "", "author": "Alexander Bukharin;Tianyi Liu;Shengjie Wang;Simiao Zuo;Weihao Gao;Wen Yan;Tuo Zhao", "authorids": "~Alexander_Bukharin1;~Tianyi_Liu2;~Shengjie_Wang1;~Simiao_Zuo1;~Weihao_Gao1;~Wen_Yan1;~Tuo_Zhao1", "gender": "M;M;M;;M;M;M", "homepage": "https://abukharin3.github.io;https://sites.google.com/view/tianyiliu/home;https://sheng-jie-wang.github.io/;;https://wgao9.github.io/;;http://www2.isye.gatech.edu/~tzhao80", "dblp": "294/6372;;;232/2089;https://dblp.uni-trier.de/pers/hd/g/Gao:Weihao;;", "google_scholar": ";;;J8TSTXMAAAAJ;E__5Lr0AAAAJ;mFiX664AAAAJ;EJXN6tYAAAAJ", "orcid": ";;0000-0002-9311-102X;;;0000-0002-9189-0840;", "linkedin": ";;;;weihao-gao-6517b3ab/;;", "or_profile": "~Alexander_Bukharin1;~Tianyi_Liu2;~Shengjie_Wang1;~Simiao_Zuo1;~Weihao_Gao1;~Wen_Yan1;~Tuo_Zhao1", "aff": "Georgia Institute of Technology;;ByteDance Inc.;Georgia Institute of Technology;;Bytedance Inc.;Georgia Institute of Technology", "aff_domain": "gatech.edu;;bytedance.com;gatech.edu;;bytedance.com;gatech.edu", "position": "PhD student;;Researcher;PhD student;;Researcher;Associate Professor", "bibtex": "@misc{\nbukharin2023machine,\ntitle={Machine Learning Force Fields with Data Cost Aware Training},\nauthor={Alexander Bukharin and Tianyi Liu and Shengjie Wang and Simiao Zuo and Weihao Gao and Wen Yan and Tuo Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=5C5ZcWvtI7S}\n}", "github": "", "project": "", "reviewers": "Y3t9;2odL;VP9R;K6wT", "site": "https://openreview.net/forum?id=5C5ZcWvtI7S", "pdf_size": 1487407, "recommendation": "3;3;6;6", "confidence": "4;3;2;4", "correctness": "3;3;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "119;137;191;65", "wc_strength_and_weaknesses": "381;228;211;78", "wc_clarity_quality_novelty_and_reproducibility": "90;26;164;59", "wc_summary_review": "41;186;108;97", "wc_review": "631;577;674;299", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "506;530;785;336", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 128.0, 45.0 ], "wc_strength_and_weaknesses_avg": [ 224.5, 107.41159155323973 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 84.75, 51.0459351956647 ], "wc_summary_review_avg": [ 108.0, 51.70589908318006 ], "wc_review_avg": [ 545.25, 146.2675203180802 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 539.25, 160.38605768582255 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.3015113445777637, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8716781953397676677&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Georgia Institute of Technology;ByteDance;Bytedance Inc.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.gatech.edu;https://www.bytedance.com;https://www.bytedance.com", "aff_unique_abbr": "Georgia Tech;ByteDance;Bytedance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "United States;China" }, { "id": "5DKHY-Ag62E", "title": "Distributionally Robust Model-Based Offline Reinforcement Learning with Near-Optimal Sample Complexity", "track": "main", "status": "Withdraw", "tldr": "This paper provides the first provably near-optimal robust offline RL algorithm that learns under model uncertainty and partial coverage.", "abstract": "This paper concerns the central issues of model robustness and sample efficiency in offline reinforcement learning (RL), which aims to learn to perform decision making from history data without active exploration. Due to uncertainties and variabilities of the environment, it is critical to learn a robust policy---with as few samples as possible---that performs well even when the deployed environment deviates from the nominal one used to collect the history dataset. We consider a distributionally robust formulation of offline RL, focusing on tabular robust Markov decision processes with an uncertainty set specified by the Kullback-Leibler divergence in both finite-horizon and infinite-horizon settings. To combat with sample scarcity, a model-based algorithm that combines distributionally robust value iteration with the principle of pessimism in the face of uncertainty is proposed, by penalizing the robust value estimates with a carefully designed data-driven penalty term. Under a mild and tailored assumption of the history dataset that measures distribution shift without requiring full coverage of the state-action space, we establish the finite-sample complexity of the proposed algorithm, and further show it is almost unimprovable in light of a nearly-matching information-theoretic lower bound up to a polynomial factor of the (effective) horizon length. To the best our knowledge, this provides the first provably near-optimal robust offline RL algorithm that learns under model uncertainty and partial coverage. ", "keywords": "offline reinforcement learning;distributional robustness;model-based reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/e72b3cbba56992932bb3b6302043db6396accb3c.zip", "author": "Laixi Shi;Yuejie Chi", "authorids": "~Laixi_Shi1;~Yuejie_Chi1", "gender": "F;", "homepage": "https://laixishi.github.io/;", "dblp": "211/7965;", "google_scholar": "V8RkRr8AAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Laixi_Shi1;~Yuejie_Chi1", "aff": "Carnegie Mellon University;", "aff_domain": "andrew.cmu.edu;", "position": "PhD student;", "bibtex": "@misc{\nshi2023distributionally,\ntitle={Distributionally Robust Model-Based Offline Reinforcement Learning with Near-Optimal Sample Complexity},\nauthor={Laixi Shi and Yuejie Chi},\nyear={2023},\nurl={https://openreview.net/forum?id=5DKHY-Ag62E}\n}", "github": "", "project": "", "reviewers": "8J56;U99n;ZKFf;WXHM;bArX", "site": "https://openreview.net/forum?id=5DKHY-Ag62E", "pdf_size": 620715, "recommendation": "3;3;5;6;6", "confidence": "3;4;4;3;2", "correctness": "4;4;4;3;4", "technical_novelty": "1;3;3;3;4", "empirical_novelty": "0;0;0;0;0", "wc_summary_paper": "43;43;77;75;46", "wc_strength_and_weaknesses": "207;172;106;56;92", "wc_clarity_quality_novelty_and_reproducibility": "2;45;149;22;81", "wc_summary_review": "9;101;73;218;63", "wc_review": "261;361;405;371;282", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "551;765;808;483;523", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;2;2;1;1", "recommendation_avg": [ 4.6, 1.3564659966250536 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "correctness_avg": [ 3.8, 0.39999999999999997 ], "technical_novelty_avg": [ 2.8, 0.9797958971132712 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 56.8, 15.727682601069999 ], "wc_strength_and_weaknesses_avg": [ 126.6, 55.011271572287804 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.8, 51.75867077118577 ], "wc_summary_review_avg": [ 92.8, 69.34666538486188 ], "wc_review_avg": [ 336.0, 55.04906902028408 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 626.0, 133.51254622693705 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5122717640554154, "corr_recommendation_correctness": -0.5160468465421401, "gs_citation": 76, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12842212130852394407&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "5DkfiQPy9A", "title": "ACAT: Adversarial Counterfactual Attention for Classification and Detection in Medical Imaging", "track": "main", "status": "Reject", "tldr": "We propose a method to generate counterfactual images, which are adversarially obtained, and we derive saliency maps from them. These are employed in a framework that refines a classifier pipeline and helps learning better local features.", "abstract": "In some medical imaging tasks and other settings where only small parts of the image are informative for the classification task, traditional CNNs can sometimes struggle to generalise. Manually annotated Regions of Interest (ROI) are sometimes used to isolate the most informative parts of the image. However, these are expensive to collect and may vary significantly across annotators. To overcome these issues, we propose a method to generate saliency maps, obtained from adversarially generated counterfactual images. With this method, we are able to isolate the area of interest in brain and lung CT scans without using any manual annotations. Our saliency maps, in the task of localising the lesion location out of 6 possible regions, obtain a score of $65.05 \\%$ on brain CT scans, improving the score of $61.29 \\%$ obtained with the best competing method. We also employ the saliency maps in a framework that refines a classifier pipeline. In particular, the saliency maps are used to obtain soft spatial attention masks that modulate the image features at different scales. We refer to our method as \\emph{Adversarial Counterfactual Attention} (ACAT). ACAT increases the baseline classification accuracy of lesions in brain CT scans from $71.39 \\%$ to $72.55 \\%$ and of COVID-19 related findings in lung CT scans from $67.71 \\%$ to $70.84 \\%$ and exceeds the performance of competing methods.", "keywords": "Medical imaging;counterfactual examples;adversarial attacks;attention;saliency maps", "primary_area": "", "supplementary_material": "", "author": "Alessandro Fontanella;Antreas Antoniou;Wenwen Li;Joanna Wardlaw;Grant Mair;Emanuele Trucco;Amos Storkey", "authorids": "~Alessandro_Fontanella1;~Antreas_Antoniou3;~Wenwen_Li2;~Joanna_Wardlaw1;~Grant_Mair1;~Emanuele_Trucco1;~Amos_Storkey1", "gender": "M;F;;M;M;Not Specified;M", "homepage": ";;;https://www.ed.ac.uk/profile/dr-grant-mair;https://www.computing.dundee.ac.uk/about/staff/26;http://homepages.inf.ed.ac.uk/amos/;https://antreasantoniou.github.io/", "dblp": ";;;;http://dblp.uni-trier.de/pers/hd/t/Trucco:Emanuele;;", "google_scholar": "Bc81Y9YAAAAJ;https://scholar.google.co.uk/citations?user=sIHwzawAAAAJ;;;https://scholar.google.com.tw/citations?user=AoqaZGkAAAAJ;;", "orcid": ";my-orcid?orcid=0000-0003-3710-1967;;0000-0003-2189-443X;;;", "linkedin": ";wenwen-li-56a58164/?originalSubdomain=uk;;;;;", "or_profile": "~Alessandro_Fontanella1;~Wenwen_Li2;~Joanna_Wardlaw1;~Grant_Mair1;~Emanuele_Trucco1;~Amos_Storkey1;~Antreas_Antoniou2", "aff": "University of Edinburgh, University of Edinburgh;;;University of Edinburgh, University of Edinburgh;Dundee University;University of Edinburgh;", "aff_domain": "ed.ac.uk;;;ed.ac.uk; ;ed.ac.uk;", "position": "PhD student;;;Lecturer;Professor;Full Professor;", "bibtex": "@misc{\nfontanella2023acat,\ntitle={{ACAT}: Adversarial Counterfactual Attention for Classification and Detection in Medical Imaging},\nauthor={Alessandro Fontanella and Antreas Antoniou and Wenwen Li and Joanna Wardlaw and Grant Mair and Emanuele Trucco and Amos Storkey},\nyear={2023},\nurl={https://openreview.net/forum?id=5DkfiQPy9A}\n}", "github": "", "project": "", "reviewers": "3Yeb;svqR;McTW", "site": "https://openreview.net/forum?id=5DkfiQPy9A", "pdf_size": 8985424, "recommendation": "3;5;5", "confidence": "5;4;4", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;3;2", "wc_summary_paper": "41;88;59", "wc_strength_and_weaknesses": "36;345;251", "wc_clarity_quality_novelty_and_reproducibility": "12;39;49", "wc_summary_review": "10;48;40", "wc_review": "99;520;399", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "84;429;223", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 62.666666666666664, 19.362047641943473 ], "wc_strength_and_weaknesses_avg": [ 210.66666666666666, 129.33247422395078 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.333333333333336, 15.627610892974722 ], "wc_summary_review_avg": [ 32.666666666666664, 16.35712552851373 ], "wc_review_avg": [ 339.3333333333333, 176.9752022804953 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 245.33333333333334, 141.72822192100236 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.9999999999999997, "corr_recommendation_correctness": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9627517812068713308&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Edinburgh;University of Dundee", "aff_unique_dep": ";", "aff_unique_url": "https://www.ed.ac.uk;https://www.dundee.ac.uk", "aff_unique_abbr": "Edinburgh;Dundee", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Explaining RL Decisions with Trajectories", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11231", "id": "5Egggz1q575", "poster": "/media/PosterPDFs/ICLR%202023/11231.png?t=1682773900.2705839", "openreview": "https://openreview.net/forum?id=5Egggz1q575", "slides": "https://iclr.cc/virtual/2023/poster/11231", "video": "https://iclr.cc/virtual/2023/poster/11231", "author_site": "Shripad Deshmukh, Arpan Dasgupta, Balaji Krishnamurthy, Nan Jiang, Chirag Agarwal, Georgios Theocharous, Jayakumar Subramanian", "tldr": "This work focuses on idea of explaining actions of offline RL agent by attributing the actions to trajectories encountered during the training.", "abstract": "Explanation is a key component for the adoption of reinforcement learning (RL) in many real-world decision-making problems. In the literature, the explanation is often provided by saliency attribution to the features of the RL agent's state. In this work, we propose a complementary approach to these explanations, particularly for offline RL, where we attribute the policy decisions of a trained RL agent to the trajectories encountered by it during training. To do so, we encode trajectories in offline training data individually as well as collectively (encoding a set of trajectories). We then attribute policy decisions to a set of trajectories in this encoded space by estimating the sensitivity of the decision with respect to that set. Further, we demonstrate the effectiveness of the proposed approach in terms of quality of attributions as well as practical scalability in diverse environments that involve both discrete and continuous state and action spaces such as grid-worlds, video games (Atari) and continuous control (MuJoCo). We also conduct a human study on a simple navigation task to observe how their understanding of the task compares with data attributed for a trained RL policy.", "keywords": "Explainable RL;Explainable AI;Offline Reinforcement Learning;Trajectory Attribution;Decision-Aware AI", "primary_area": "", "supplementary_material": "", "author": "Shripad Vilasrao Deshmukh;Arpan Dasgupta;Balaji Krishnamurthy;Nan Jiang;Chirag Agarwal;Georgios Theocharous;Jayakumar Subramanian", "authorids": "~Shripad_Vilasrao_Deshmukh2;arpan.dasgupta@research.iiit.ac.in;~Balaji_Krishnamurthy1;~Nan_Jiang2;~Chirag_Agarwal1;~Georgios_Theocharous1;~Jayakumar_Subramanian1", "gender": "M;;M;M;M;M;M", "homepage": "https://shripaddeshmukh.github.io/;;;http://nanjiang.cs.illinois.edu;https://chirag-agarwall.github.io/;https://research.adobe.com/person/georgios-theocharous/;", "dblp": "255/5905;;79/1076;06/4489-8;173/8821;;202/5957", "google_scholar": "g90em_oAAAAJ;;n8iUBg8AAAAJ;nUlanA8AAAAJ;https://scholar.google.com/citations?hl=en;;LewRar8AAAAJ", "orcid": ";;0000-0002-0366-2427;;;;0000-0003-4621-2677", "linkedin": "shripad-deshmukh/;;balaji-krishnamurthy-4241695/;nan-jiang-28139937/;chirag-agarwal-0a6a43a1/;;", "or_profile": "~Shripad_Vilasrao_Deshmukh2;arpan.dasgupta@research.iiit.ac.in;~Balaji_Krishnamurthy1;~Nan_Jiang2;~Chirag_Agarwal1;~Georgios_Theocharous1;~Jayakumar_Subramanian1", "aff": "Adobe Systems;;Adobe Systems;University of Illinois, Urbana Champaign;Adobe Systems;Adobe;Adobe Systems", "aff_domain": "adobe.com;;adobe.com;illinois.edu;adobe.com;adobe.com;adobe.com", "position": "Researcher;;Principal Scientist;Assistant Professor;Researcher;Research Scientist;Senior Research Scientist", "bibtex": "@inproceedings{\ndeshmukh2023explaining,\ntitle={Explaining {RL} Decisions with Trajectories},\nauthor={Shripad Vilasrao Deshmukh and Arpan Dasgupta and Balaji Krishnamurthy and Nan Jiang and Chirag Agarwal and Georgios Theocharous and Jayakumar Subramanian},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5Egggz1q575}\n}", "github": "", "project": "", "reviewers": "NGMa;UWVh;CPZK;XJVh", "pdf_size": 5192901, "recommendation": "5;5;6;6", "confidence": "4;4;4;3", "correctness": "3;3;3;4", "technical_novelty": "3;2;3;2", "empirical_novelty": "3;2;3;2", "wc_summary_paper": "116;102;59;82", "wc_strength_and_weaknesses": "764;87;340;21", "wc_clarity_quality_novelty_and_reproducibility": "31;175;54;19", "wc_summary_review": "54;143;2;167", "wc_review": "965;507;455;289", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 89.75, 21.47527648250425 ], "wc_strength_and_weaknesses_avg": [ 303.0, 291.5776054500757 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 69.75, 62.05390801553114 ], "wc_summary_review_avg": [ 91.5, 66.65020630125612 ], "wc_review_avg": [ 554.0, 250.57733337235433 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17545966142697267061&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=5Egggz1q575", "email": "adobe.com;;adobe.com;illinois.edu;adobe.com;adobe.com;adobe.com", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Adobe;University of Illinois Urbana-Champaign", "aff_unique_dep": "Adobe Systems Incorporated;", "aff_unique_url": "https://www.adobe.com;https://illinois.edu", "aff_unique_abbr": "Adobe;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "5FqeE2SojJi", "title": "Proportional Amplitude Spectrum Training Augmentation for Synthetic-to-Real Domain Generalization", "track": "main", "status": "Reject", "tldr": "We propose Proportional Amplitude Spectrum Training Augmentation (PASTA), an augmentation strategy for Synthetic-to-Real Generalization", "abstract": "Synthetic data offers the promise of cheap and bountiful training data for settings where lots of labeled real-world data for some task is unavailable. However, models trained on synthetic data significantly underperform on real-world data. In this paper, we propose Proportional Amplitude Spectrum Training Augmentation (PASTA), a simple and effective augmentation strategy to improve out-of-the-box synthetic-to-real (syn-to-real) generalization performance. PASTA involves perturbing the amplitude spectrums of the synthetic images in the Fourier domain to generate augmented views. We design PASTA to perturb the amplitude spectrums in a structured manner such that high-frequency components are perturbed relatively more than the low-frequency ones. For the tasks of semantic segmentation (GTAV\u2192Real), object detection (Sim10K\u2192Real), and object recognition (VisDAC Syn\u2192Real), across a total of 5 syn-to-real shifts, we find that PASTA either outperforms or is consistently competitive with more complex state-of-the-art methods while being complementary to other generalization approaches.", "keywords": "Synthetic-to-Real Generalization;Fourier Space Augmentation;Single Domain Generalization", "primary_area": "", "supplementary_material": "", "author": "Prithvijit Chattopadhyay;Kartik Sarangmath;Vivek Vijaykumar;Judy Hoffman", "authorids": "~Prithvijit_Chattopadhyay1;~Kartik_Sarangmath1;~Vivek_Vijaykumar1;~Judy_Hoffman1", "gender": "M;M;M;F", "homepage": "https://prithv1.xyz/;;;https://www.cc.gatech.edu/~judy/", "dblp": "179/2452;282/7273;;45/10336", "google_scholar": "https://scholar.google.co.in/citations?user=rIK7AMkAAAAJ;;bYbusDQAAAAJ;mqpjAt4AAAAJ", "orcid": ";;;", "linkedin": ";kartiksarangmath/;vivek-vjk/;", "or_profile": "~Prithvijit_Chattopadhyay1;~Kartik_Sarangmath1;~Vivek_Vijaykumar1;~Judy_Hoffman1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Databricks;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;databricks.com;gatech.edu", "position": "PhD;MS student;Intern;Assistant Professor", "bibtex": "@misc{\nchattopadhyay2023proportional,\ntitle={Proportional Amplitude Spectrum Training Augmentation for Synthetic-to-Real Domain Generalization},\nauthor={Prithvijit Chattopadhyay and Kartik Sarangmath and Vivek Vijaykumar and Judy Hoffman},\nyear={2023},\nurl={https://openreview.net/forum?id=5FqeE2SojJi}\n}", "github": "", "project": "", "reviewers": "tiZd;vBft;PoSx;nJc4", "site": "https://openreview.net/forum?id=5FqeE2SojJi", "pdf_size": 52225630, "recommendation": "5;5;6;8", "confidence": "4;4;3;3", "correctness": "2;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "63;123;44;84", "wc_strength_and_weaknesses": "296;92;185;168", "wc_clarity_quality_novelty_and_reproducibility": "17;30;24;16", "wc_summary_review": "15;52;36;93", "wc_review": "391;297;289;361", "wc_reply_reviewers": "554;0;0;0", "wc_reply_authors": "2978;517;565;687", "reply_reviewers": "2;0;0;0", "reply_authors": "5;1;1;1", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 78.5, 29.33001875212493 ], "wc_strength_and_weaknesses_avg": [ 185.25, 72.9019032673359 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 21.75, 5.673402858955108 ], "wc_summary_review_avg": [ 49.0, 28.591956910991595 ], "wc_review_avg": [ 334.5, 42.92726406376256 ], "wc_reply_reviewers_avg": [ 138.5, 239.8890368482895 ], "wc_reply_authors_avg": [ 1186.75, 1036.0338737222833 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8164965809277259, "corr_recommendation_correctness": 0.4714045207910316, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8626740778912204680&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Georgia Institute of Technology;Databricks", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://databricks.com", "aff_unique_abbr": "Georgia Tech;Databricks", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "5G_SmGZlXQ", "title": "Toxicity in Multilingual Machine Translation at Scale", "track": "main", "status": "Reject", "tldr": "Quantification, analysis and mitigation recommendations of toxicity for 164 languages ", "abstract": "Machine Translation systems can produce different types of errors, some of which get characterized as critical or catastrophic due to the specific negative impact they can have on users. Automatic or human evaluation metrics do not necessarily differentiate between such critical errors and more innocuous ones. In this paper we focus on one type of critical error: added toxicity. We evaluate and analyze added toxicity when translating a large evaluation dataset (HOLISTICBIAS, over 472k sentences, covering 13 demographic axes) from English into 164 languages. The toxicity automatic evaluation shows that added toxicity across languages varies from 0% to 5%. The output languages with the most added toxicity tend to be low-resource ones, and the demographic axes with the most added toxicity include sexual orientation, gender and sex, and ability. We also perform human evaluation on a subset of 8 directions, confirming the prevalence of true\nadded toxicity.\n\nWe use a measurement of the amount of source contribution to the translation, where a low source contribution implies hallucination, to interpret what causes toxicity. We observe that the source contribution is somewhat correlated with toxicity but that 45.6% of added toxic words have a high source contribution, suggesting that much of the added toxicity may be due to mistranslations. Combining the signal of source contribution level with a measurement of translation robustness allows us to flag 22.3% of added toxicity, suggesting that added toxicity may be related to both hallucination and the stability of translations in different contexts. Given these findings, our recommendations to reduce added toxicity are to curate training data to avoid mistranslations, mitigate hallucination and check unstable translations.", "keywords": "Toxicity;Holistic Bias;Input Attributions;Multilingual Machine Translation;Scale", "primary_area": "", "supplementary_material": "", "author": "Marta R. Costa-juss\u00e0;Christophe Ropers;Eric Michael Smith;Daniel Licht;Carlos Escolano;Javier Ferrando", "authorids": "~Marta_R._Costa-juss\u00e01;chrisropers@fb.com;~Eric_Michael_Smith1;dlicht@fb.com;~Carlos_Escolano1;~Javier_Ferrando1", "gender": "F;;Non-Binary;;M;M", "homepage": "https://www.costa-jussa.com;;;;;https://javiferran.github.io/personal/", "dblp": "17/2183;;;;51/7736;267/5458", "google_scholar": "ESqQ7FoAAAAJ;;uOK8DfQAAAAJ;;https://scholar.google.es/citations?user=yja1284AAAAJ;ZNsw8ZUAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;https://es.linkedin.com/in/carlos-escolano-ba26549a;javierferrandomonsonis/", "or_profile": "~Marta_R._Costa-juss\u00e01;chrisropers@fb.com;~Eric_Michael_Smith1;dlicht@fb.com;~Carlos_Escolano1;~Javier_Ferrando1", "aff": "Meta;;Meta AI;;Universidad Polit\u00e9cnica de Cataluna;Universidad Polit\u00e9cnica de Cataluna", "aff_domain": "fb.com;;meta.com;;upc.edu;upc.edu", "position": "Research Scientist;;Researcher;;Postdoc;PhD student", "bibtex": "@misc{\ncosta-juss{\\`a}2023toxicity,\ntitle={Toxicity in Multilingual Machine Translation at Scale},\nauthor={Marta R. Costa-juss{\\`a} and Christophe Ropers and Eric Michael Smith and Daniel Licht and Carlos Escolano and Javier Ferrando},\nyear={2023},\nurl={https://openreview.net/forum?id=5G_SmGZlXQ}\n}", "github": "", "project": "", "reviewers": "mzzx;APYv;PvYr;HKPu", "site": "https://openreview.net/forum?id=5G_SmGZlXQ", "pdf_size": 1741869, "recommendation": "3;3;5;8", "confidence": "3;4;3;4", "correctness": "4;3;3;3", "technical_novelty": "3;2;2;2", "empirical_novelty": "3;2;2;4", "wc_summary_paper": "68;70;82;204", "wc_strength_and_weaknesses": "248;763;44;497", "wc_clarity_quality_novelty_and_reproducibility": "6;87;134;158", "wc_summary_review": "6;27;145;94", "wc_review": "328;947;405;953", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "197;544;187;351", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 106.0, 56.83308895353129 ], "wc_strength_and_weaknesses_avg": [ 388.0, 269.4633555791956 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 96.25, 58.0274719421758 ], "wc_summary_review_avg": [ 68.0, 55.06813960903346 ], "wc_review_avg": [ 658.25, 293.0250629212457 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 319.75, 144.87473037075858 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.3665083330689157, "corr_recommendation_correctness": -0.49374193110101877, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16408923714085128860&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Meta;Universitat Polit\u00e8cnica de Catalunya", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.upc.edu", "aff_unique_abbr": "Meta;UPC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United States;Spain" }, { "id": "5H1MT1RuWP4", "title": "TG-Gen: A Deep Generative Model Framework for Temporal Graphs", "track": "main", "status": "Withdraw", "tldr": "We propose, TG-Gen, a generic framework for generating synthetic temporal graph data. ", "abstract": "Graph Neural Networks (GNNs) have recently emerged as popular methods for learning representations of non-euclidean data often encountered in diverse areas ranging from chemistry and biology to social and financial networks. More recently, research has focused specifically on learning on temporal graphs, wherein the nodes and edges of a graph, and their respective features, may change over time. However, existing work in the temporal graph space has largely focused on discriminative models. In this work, we present TG-Gen, a generic generative framework for temporal graph data, which combines an encoder module that creates temporal embeddings of nodes from raw interaction data, with a decoder module that uses the learned temporal embeddings to create a deep probabilistic model of interaction data. We show that TG-Gen is able to generate robust and accurate synthetic data for temporal graphs for two traditional benchmark data and a novel dataset. Additionally, we demonstrate that TG-Gen is able to learn generalizable representations of temporal graphs and outperforms the previous state-of-the-art method in the discriminative regime, such as for dynamic link prediction. Finally, we perform comprehensive ablation studies which show the effects of specific modules and configurations of our model.", "keywords": "graph neural networks;generative models;temporal graphs", "primary_area": "", "supplementary_material": "/attachment/20fa4e198655b2e17b73b80702a534e484762ad6.zip", "author": "Ryien Hosseini;Filippo Simini", "authorids": "~Ryien_Hosseini1;~Filippo_Simini1", "gender": "M;", "homepage": "https://ryien.com;", "dblp": "324/8280;136/7111", "google_scholar": "https://scholar.google.com/citations?hl=en;7cf56rIAAAAJ", "orcid": ";", "linkedin": "ryienh/;", "or_profile": "~Ryien_Hosseini1;~Filippo_Simini1", "aff": "Argonne National Laboratory;Argonne National Laboratory", "aff_domain": "anl.gov;anl.gov", "position": "Intern;Researcher", "bibtex": "@misc{\nhosseini2023tggen,\ntitle={{TG}-Gen: A Deep Generative Model Framework for Temporal Graphs},\nauthor={Ryien Hosseini and Filippo Simini},\nyear={2023},\nurl={https://openreview.net/forum?id=5H1MT1RuWP4}\n}", "github": "", "project": "", "reviewers": "fcNQ;7W3b;ktQP;Sy6W", "site": "https://openreview.net/forum?id=5H1MT1RuWP4", "pdf_size": 1114186, "recommendation": "3;3;3;6", "confidence": "4;5;4;3", "correctness": "3;2;2;3", "technical_novelty": "1;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "58;14;51;39", "wc_strength_and_weaknesses": "112;98;91;128", "wc_clarity_quality_novelty_and_reproducibility": "87;34;51;114", "wc_summary_review": "47;71;396;26", "wc_review": "304;217;589;307", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 40.5, 16.740669042783207 ], "wc_strength_and_weaknesses_avg": [ 107.25, 14.16642156650719 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 71.5, 31.11671576500322 ], "wc_summary_review_avg": [ 135.0, 151.5272252765159 ], "wc_review_avg": [ 354.25, 140.27005204247982 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gzsRXpHuSo8J:scholar.google.com/&scioq=TG-Gen:+A+Deep+Generative+Model+Framework+for+Temporal+Graphs&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Argonne National Laboratory", "aff_unique_dep": "", "aff_unique_url": "https://www.anl.gov", "aff_unique_abbr": "ANL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "5H9_FUPA9r8", "title": "CommsVAE: Learning the brain's macroscale communication dynamics using coupled sequential VAEs", "track": "main", "status": "Reject", "tldr": "We address three issues with common connectivity approaches by explicitly modeling the directionality of communication, finding communication at each timestep, and encouraging sparsity.", "abstract": "Communication within or between complex systems is commonplace in the natural sciences and fields such as graph neural networks. The brain is a perfect example of such a complex system, where communication between brain regions is constantly being orchestrated. To analyze communication, the brain is often split up into anatomical regions that each perform certain computations. These regions must interact and communicate with each other to perform tasks and support higher-level cognition. On a macroscale, these regions communicate through signal propagation along the cortex and along white matter tracts over longer distances. When and what types of signals are communicated over time is an unsolved problem and is often studied using either functional or structural data. In this paper, we propose a non-linear generative approach to communication from functional data. We address three issues with common connectivity approaches by explicitly modeling the directionality of communication, finding communication at each timestep, and encouraging sparsity. To evaluate our model, we simulate temporal data that has sparse communication between nodes embedded in it and show that our model can uncover the expected communication dynamics. Subsequently, we apply our model to temporal neural data from multiple tasks and show that our approach models communication that is more specific to each task. The specificity of our method means it can have an impact on the understanding of psychiatric disorders, which are believed to be related to highly specific communication between brain regions compared to controls. In sum, we propose a general model for dynamic communication learning on graphs, and show its applicability to a subfield of the natural sciences, with potential widespread scientific impact.", "keywords": "variational autoencoder;computational neuroscience;graphs;fMRI;sequential variational autoencoder;graph learning;communications", "primary_area": "", "supplementary_material": "", "author": "Eloy Geenjaar;Noah Lewis;Amrit Kashyap;Robyn Miller;Vince Calhoun", "authorids": "~Eloy_Geenjaar1;~Noah_Lewis1;~Amrit_Kashyap1;~Robyn_Miller1;~Vince_Calhoun1", "gender": "M;;;;", "homepage": "http://eloygeenjaar.nl;;https://neurologie.charite.de/en/metas/person_detail/person/address_detail/amrit_kashyap_phd-1/;;", "dblp": "289/0786;;;;48/3821.html", "google_scholar": "https://scholar.google.com/user=NMq1qHIAAAAJ;;;zPpJc94AAAAJ;WNOoGKIAAAAJ", "orcid": "0000-0001-5448-6358;0000-0002-5712-2434;;;", "linkedin": "eloy-geenjaar-23904bb0/;;;;", "or_profile": "~Eloy_Geenjaar1;~Noah_Lewis1;~Amrit_Kashyap1;~Robyn_Miller1;~Vince_Calhoun1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;;Georgia State University;Emory University", "aff_domain": "gatech.edu;gatech.edu;;gsu.edu;emory.edu", "position": "PhD student;PhD student;;Assistant Professor;Full Professor", "bibtex": "@misc{\ngeenjaar2023commsvae,\ntitle={Comms{VAE}: Learning the brain's macroscale communication dynamics using coupled sequential {VAE}s},\nauthor={Eloy Geenjaar and Noah Lewis and Amrit Kashyap and Robyn Miller and Vince Calhoun},\nyear={2023},\nurl={https://openreview.net/forum?id=5H9_FUPA9r8}\n}", "github": "", "project": "", "reviewers": "zo6y;kyae;sVVG;qncm", "site": "https://openreview.net/forum?id=5H9_FUPA9r8", "pdf_size": 10542507, "recommendation": "5;5;5;6", "confidence": "4;3;3;5", "correctness": "1;3;2;3", "technical_novelty": "1;3;4;2", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "91;229;16;64", "wc_strength_and_weaknesses": "6;480;167;219", "wc_clarity_quality_novelty_and_reproducibility": "6;66;14;144", "wc_summary_review": "225;123;30;33", "wc_review": "328;898;227;460", "wc_reply_reviewers": "88;0;0;0", "wc_reply_authors": "1122;1241;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;2;0;0", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 1.118033988749895 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 100.0, 79.17385932237988 ], "wc_strength_and_weaknesses_avg": [ 218.0, 170.43327139968886 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.5, 54.99772722576816 ], "wc_summary_review_avg": [ 102.75, 79.86355551814607 ], "wc_review_avg": [ 478.25, 256.03942567503157 ], "wc_reply_reviewers_avg": [ 22.0, 38.1051177665153 ], "wc_reply_authors_avg": [ 590.75, 592.2463064469039 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 1.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.8703882797784891, "corr_recommendation_correctness": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_wadMbUR_RcJ:scholar.google.com/&scioq=CommsVAE:+Learning+the+brain%27s+macroscale+communication+dynamics+using+coupled+sequential+VAEs&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Georgia Institute of Technology;Georgia State University;Emory University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.gatech.edu;https://www.gsu.edu;https://www.emory.edu", "aff_unique_abbr": "Georgia Tech;GSU;Emory", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Dual Diffusion Implicit Bridges for Image-to-Image Translation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11739", "id": "5HLoTvVGDe", "poster": "", "openreview": "https://openreview.net/forum?id=5HLoTvVGDe", "slides": "https://iclr.cc/virtual/2023/poster/11739", "video": "https://iclr.cc/virtual/2023/poster/11739", "author_site": "Xuan Su, Jiaming Song, Chenlin Meng, Stefano Ermon", "tldr": "", "abstract": "Common image-to-image translation methods rely on joint training over data from both source and target domains. The training process requires concurrent access to both datasets, which hinders data separation and privacy protection; and existing models cannot be easily adapted for translation of new domain pairs. We present Dual Diffusion Implicit Bridges (DDIBs), an image translation method based on diffusion models, that circumvents training on domain pairs. Image translation with DDIBs relies on two diffusion models trained independently on each domain, and is a two-step process: DDIBs first obtain latent encodings for source images with the source diffusion model, and then decode such encodings using the target model to construct target images. Both steps are defined via ordinary differential equations (ODEs), thus the process is cycle consistent only up to discretization errors of the ODE solvers. Theoretically, we interpret DDIBs as concatenation of source to latent, and latent to target Schrodinger Bridges, a form of entropy-regularized optimal transport, to explain the efficacy of the method. Experimentally, we apply DDIBs on synthetic and high-resolution image datasets, to demonstrate their utility in a wide variety of translation tasks and their inherent optimal transport properties.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/485cbc883fcd1652371aa7f82cdab0b359963652.zip", "author": "Xuan Su;Jiaming Song;Chenlin Meng;Stefano Ermon", "authorids": "~Xuan_Su1;~Jiaming_Song1;~Chenlin_Meng1;~Stefano_Ermon1", "gender": "M;M;F;M", "homepage": "https://github.com/suxuann;http://tsong.me;https://chenlin9.github.io/;http://cs.stanford.edu/~ermon/", "dblp": ";173/5104;227/2517;47/8135", "google_scholar": "aA9q8dYAAAAJ;;nEFU7wIAAAAJ;", "orcid": ";;;", "linkedin": "xuan-su-a7b294257/;jiamings/;;", "or_profile": "~Xuan_Su1;~Jiaming_Song1;~Chenlin_Meng1;~Stefano_Ermon1", "aff": "Stanford University;NVIDIA;Stanford University;Stanford University", "aff_domain": "stanford.edu;nvidia.com;stanford.edu;stanford.edu", "position": "MS student;Researcher;PhD student;Associate Professor", "bibtex": "@inproceedings{\nsu2023dual,\ntitle={Dual Diffusion Implicit Bridges for Image-to-Image Translation},\nauthor={Xuan Su and Jiaming Song and Chenlin Meng and Stefano Ermon},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5HLoTvVGDe}\n}", "github": "", "project": "", "reviewers": "2Rpq;Ztwc;iEhQ;FZcm", "pdf_size": 47947287, "recommendation": "5;5;6;10", "confidence": "3;4;3;5", "correctness": "3;4;3;4", "technical_novelty": "3;2;3;4", "empirical_novelty": "3;2;3;4", "wc_summary_paper": "41;76;70;81", "wc_strength_and_weaknesses": "176;139;193;112", "wc_clarity_quality_novelty_and_reproducibility": "20;12;65;42", "wc_summary_review": "23;74;58;74", "wc_review": "260;301;386;309", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "225;369;523;32", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.5, 2.0615528128088303 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 67.0, 15.508062419270823 ], "wc_strength_and_weaknesses_avg": [ 155.0, 31.583223394707513 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.75, 20.632195714465293 ], "wc_summary_review_avg": [ 57.25, 20.825165065372232 ], "wc_review_avg": [ 314.0, 45.53570028010989 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 287.25, 181.16894739441415 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.8043996665398437, "corr_recommendation_correctness": 0.48507125007266594, "gs_citation": 227, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16410633619023162793&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=5HLoTvVGDe", "email": "stanford.edu;nvidia.com;stanford.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Stanford University;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.stanford.edu;https://www.nvidia.com", "aff_unique_abbr": "Stanford;NVIDIA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Lossless Adaptation of Pretrained Vision Models For Robotic Manipulation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11065", "id": "5IND3TXJRb-", "poster": "", "openreview": "https://openreview.net/forum?id=5IND3TXJRb-", "slides": "https://iclr.cc/virtual/2023/poster/11065", "video": "https://iclr.cc/virtual/2023/poster/11065", "author_site": "Mohit Sharma, Claudio Fantacci, Yuxiang Zhou, Skanda Koppula, Nicolas Heess, Jonathan Scholz, Yusuf Aytar", "tldr": "", "abstract": "Recent works have shown that large models pretrained on common visual learning tasks can provide useful representations for a wide range of specialized perception problems, as well as a variety of robotic manipulation tasks. While prior work on robotic manipulation has predominantly used frozen pretrained features, we demonstrate that in robotics this approach can fail to reach optimal performance, and that fine-tuning of the full model can lead to significantly better results. Unfortunately, fine-tuning disrupts the pretrained visual representation, and causes representational drift towards the fine-tuned task thus leading to a loss of the versatility of the original model. We introduce a method for lossless adaptation to address this shortcoming of classical fine-tuning. We demonstrate that appropriate placement of our parameter efficient adapters can significantly reduce the performance gap between frozen pretrained representations and full end-to-end fine-tuning without changes to the original representation and thus preserving original capabilities of the pretrained model. We perform a comprehensive investigation across three major model architectures (ViTs, NFNets, and ResNets), supervised (ImageNet-1K classification) and self-supervised pretrained weights (CLIP, BYOL, Visual MAE) in three manipulation task domains and 35 individual tasks, and demonstrate that our claims are strongly validated in various settings. Please see real world videos at https://sites.google.com/view/robo-adapters", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/cae22340230a301e88d842a8037cda48f40ebe8d.zip", "author": "Mohit Sharma;Claudio Fantacci;Yuxiang Zhou;Skanda Koppula;Nicolas Heess;Jon Scholz;Yusuf Aytar", "authorids": "~Mohit_Sharma1;cfantacci@deepmind.com;~Yuxiang_Zhou2;~Skanda_Koppula1;~Nicolas_Heess1;~Jon_Scholz1;~Yusuf_Aytar1", "gender": "M;;M;;;;M", "homepage": "https://mohitsharma0690.github.io/;;https://yuxiang-zhou.github.io/;;;;", "dblp": ";;27/10149;;76/9181;;41/5577", "google_scholar": ";;https://scholar.google.co.uk/citations?user=3dYhzNQAAAAJ;;79k7bGEAAAAJ;;0ncQNL8AAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Mohit_Sharma1;cfantacci@deepmind.com;~Yuxiang_Zhou2;~Skanda_Koppula1;~Nicolas_Heess1;~Jon_Scholz1;~Yusuf_Aytar1", "aff": "Carnegie Mellon University;;Google DeepMind;;Google DeepMind;;Google DeepMind", "aff_domain": "cmu.edu;;deepmind.com;;google.com;;google.com", "position": "PhD student;;Research Engineer;;Research Scientist;;Research Scientist", "bibtex": "@inproceedings{\nsharma2023lossless,\ntitle={Lossless Adaptation of Pretrained Vision Models For Robotic Manipulation},\nauthor={Mohit Sharma and Claudio Fantacci and Yuxiang Zhou and Skanda Koppula and Nicolas Heess and Jon Scholz and Yusuf Aytar},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5IND3TXJRb-}\n}", "github": "", "project": "", "reviewers": "1MbS;a8EG;eGE9;TzMT", "pdf_size": 1888316, "recommendation": "5;6;6;8", "confidence": "3;4;4;4", "correctness": "3;3;3;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "155;68;112;81", "wc_strength_and_weaknesses": "194;257;167;203", "wc_clarity_quality_novelty_and_reproducibility": "11;217;69;45", "wc_summary_review": "35;47;114;36", "wc_review": "395;589;462;365", "wc_reply_reviewers": "215;142;71;0", "wc_reply_authors": "645;1377;781;337", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;3;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 104.0, 33.50373113550191 ], "wc_strength_and_weaknesses_avg": [ 205.25, 32.683137854251385 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 85.5, 78.66860873308997 ], "wc_summary_review_avg": [ 58.0, 32.67261850540908 ], "wc_review_avg": [ 452.75, 86.14631448878124 ], "wc_reply_reviewers_avg": [ 107.0, 80.05310737254364 ], "wc_reply_authors_avg": [ 785.0, 377.751240898028 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.6622661785325219, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11493009189343813254&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=5IND3TXJRb-", "email": "cmu.edu;;deepmind.com;;google.com;;google.com", "author_num": 7, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Carnegie Mellon University;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.cmu.edu;https://deepmind.com", "aff_unique_abbr": "CMU;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "5Jq1ASp33L", "title": "Understanding Incremental Learning of Gradient Descent: A Fine-grained analysis of Matrix Sensing", "track": "main", "status": "Reject", "tldr": "", "abstract": "The implicit bias of optimization algorithms such as gradient descent (GD) is believed to play an important role in generalization of modern machine learning methods such as deep learning. This paper provides a fine-grained analysis of the dynamics of GD for the matrix sensing problem, whose goal is to recover a low-rank ground-truth matrix from near-isotropic linear measurements. With small initialization, we that GD behaves similarly to the greedy low-rank learning heuristics~\\citep{li2020towards} and follows an incremental learning procedure~\\citep{gissin2019implicit}. That is, GD sequentially learns solutions with increasing ranks until it recovers the ground-truth matrix. Compared to existing works which only analyze the first learning phase for rank-1 solutions, our result is stronger because it characterizes the whole learning process. Moreover, our analysis of the incremental learning procedure applies to the\nunder-parameterized regime as well. As a key ingredient of our analysis, we observe that GD always follows an approximately low-rank trajectory and develops novel landscape properties for matrix sensing with low-rank parameterization. Finally, we conduct numerical experiments which confirm our theoretical findings.", "keywords": "deep learning theory;incremental learning;non-convex optimization", "primary_area": "", "supplementary_material": "", "author": "Jikai Jin;Zhiyuan Li;Kaifeng Lyu;Simon Shaolei Du;Jason D. Lee", "authorids": "~Jikai_Jin1;~Zhiyuan_Li2;~Kaifeng_Lyu2;~Simon_Shaolei_Du1;~Jason_D._Lee1", "gender": "M;M;M;M;M", "homepage": "https://www.jkjin.com/;https://zhiyuanli.ttic.edu;https://kaifeng.ac/;http://simonshaoleidu.com;https://jasondlee88.github.io/", "dblp": "276/0406;l/ZhiyuanLi;220/3283;176/5602;88/3262", "google_scholar": "xQqZt2AAAAAJ;https://scholar.google.com/citations?hl=en;843JJtgAAAAJ;OttawxUAAAAJ;GR_DsT0AAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Jikai_Jin1;~Zhiyuan_Li2;~Kaifeng_Lyu2;~Simon_Shaolei_Du1;~Jason_D._Lee1", "aff": "Peking University;Computer Science Department, Stanford University;Princeton University;Meta Facebook;Princeton University", "aff_domain": "pku.edu.cn;cs.stanford.edu;princeton.edu;fb.com;princeton.edu", "position": "Undergrad student;Postdoc;PhD student;Visiting Professor;Assistant Professor", "bibtex": "@misc{\njin2023understanding,\ntitle={Understanding Incremental Learning of Gradient Descent: A Fine-grained analysis of Matrix Sensing},\nauthor={Jikai Jin and Zhiyuan Li and Kaifeng Lyu and Simon Shaolei Du and Jason D. Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=5Jq1ASp33L}\n}", "github": "", "project": "", "reviewers": "7KXn;rmE5;vVy7", "site": "https://openreview.net/forum?id=5Jq1ASp33L", "pdf_size": 1722759, "recommendation": "3;5;8", "confidence": "3;2;2", "correctness": "2;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;2;4", "wc_summary_paper": "111;85;125", "wc_strength_and_weaknesses": "346;370;146", "wc_clarity_quality_novelty_and_reproducibility": "124;20;541", "wc_summary_review": "54;51;30", "wc_review": "635;526;842", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "341;630;321", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_paper_avg": [ 107.0, 16.57307052620807 ], "wc_strength_and_weaknesses_avg": [ 287.3333333333333, 100.41690871340123 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 228.33333333333334, 225.12860522130208 ], "wc_summary_review_avg": [ 45.0, 10.677078252031311 ], "wc_review_avg": [ 667.6666666666666, 131.05808720648346 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 430.6666666666667, 141.1862442150635 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8029550685469661, "corr_recommendation_correctness": 0.9933992677987828, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12518359837852368128&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;2;3;2", "aff_unique_norm": "Peking University;Stanford University;Princeton University;Meta", "aff_unique_dep": ";Computer Science Department;;Meta Platforms, Inc.", "aff_unique_url": "http://www.pku.edu.cn;https://www.stanford.edu;https://www.princeton.edu;https://meta.com", "aff_unique_abbr": "Peking U;Stanford;Princeton;Meta", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "MAST: Masked Augmentation Subspace Training for Generalizable Self-Supervised Priors", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11244", "id": "5KUPKjHYD-l", "poster": "/media/PosterPDFs/ICLR%202023/11244.png?t=1682448089.316343", "openreview": "https://openreview.net/forum?id=5KUPKjHYD-l", "slides": "https://iclr.cc/virtual/2023/poster/11244", "video": "https://iclr.cc/virtual/2023/poster/11244", "author_site": "Chen Huang, Hanlin Goh, Jiatao Gu, Joshua Susskind", "tldr": "Disentangled and uncertainty-aware learning of augmentation invariances during SSL improves generalization on downstream tasks", "abstract": "Recent Self-Supervised Learning (SSL) methods are able to learn feature representations that are invariant to different data augmentations, which can then be transferred to downstream tasks of interest. However, different downstream tasks require different invariances for their best performance, so the optimal choice of augmentations for SSL depends on the target task. In this paper, we aim to learn self-supervised features that generalize well across a variety of downstream tasks (e.g., object classification, detection and instance segmentation) without knowing any task information beforehand. We do so by Masked Augmentation Subspace Training (or MAST) to encode in the single feature space the priors from different data augmentations in a factorized way. Specifically, we disentangle the feature space into separate subspaces, each induced by a learnable mask that selects relevant feature dimensions to model invariance to a specific augmentation. We show the success of MAST in jointly capturing generalizable priors from different augmentations, using both unique and shared features across the subspaces. We further show that MAST benefits from uncertainty modeling to reweight ambiguous samples from strong augmentations that may cause similarity mismatch in each subspace. Experiments demonstrate that MAST consistently improves generalization on various downstream tasks, while being task-agnostic and efficient during SSL. We also provide interesting insights about how different augmentations are related and how uncertainty reflects learning difficulty.", "keywords": "Self-Supervised Learning (SSL);Generalization;Computer vision", "primary_area": "", "supplementary_material": "", "author": "Chen Huang;Hanlin Goh;Jiatao Gu;Joshua M. Susskind", "authorids": "~Chen_Huang6;~Hanlin_Goh2;~Jiatao_Gu1;~Joshua_M._Susskind1", "gender": "M;M;M;M", "homepage": ";;http://jiataogu.me;http://www.apple.com", "dblp": "05/8125-1;96/4057;164/5848.html;132/7797", "google_scholar": "QZ-JKOUAAAAJ;;https://scholar.google.com.sg/citations?user=cB1mFBsAAAAJ;Sv2TGqsAAAAJ", "orcid": ";;;", "linkedin": ";;jiatao-gu-204b2672/;joshua-susskind-8ab2ab5/", "or_profile": "~Chen_Huang6;~Hanlin_Goh2;~Jiatao_Gu1;~Joshua_M._Susskind1", "aff": "Apple;Apple;Apple;Apple", "aff_domain": "apple.com;apple.com;apple.com;apple.com", "position": "Research Scientist;Research Scientist;Researcher;Researcher", "bibtex": "@inproceedings{\nhuang2023mast,\ntitle={{MAST}: Masked Augmentation Subspace Training for Generalizable Self-Supervised Priors},\nauthor={Chen Huang and Hanlin Goh and Jiatao Gu and Joshua M. Susskind},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5KUPKjHYD-l}\n}", "github": "", "project": "", "reviewers": "7wWs;JyME;Psus;oMUf", "pdf_size": 7804310, "recommendation": "6;6;6;8", "confidence": "3;4;5;4", "correctness": "3;2;4;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "182;42;113;58", "wc_strength_and_weaknesses": "50;404;49;131", "wc_clarity_quality_novelty_and_reproducibility": "49;52;8;12", "wc_summary_review": "18;35;55;26", "wc_review": "299;533;225;227", "wc_reply_reviewers": "367;613;0;0", "wc_reply_authors": "1060;2047;123;391", "reply_reviewers": "4;3;0;0", "reply_authors": "5;5;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 98.75, 54.805907528294796 ], "wc_strength_and_weaknesses_avg": [ 158.5, 145.59275394057218 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.25, 20.327014045353536 ], "wc_summary_review_avg": [ 33.5, 13.793114224133722 ], "wc_review_avg": [ 321.0, 125.9761882261882 ], "wc_reply_reviewers_avg": [ 245.0, 259.979806908152 ], "wc_reply_authors_avg": [ 905.25, 742.2783760153599 ], "reply_reviewers_avg": [ 1.75, 1.7853571071357126 ], "reply_authors_avg": [ 3.0, 2.0 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12964288295345984398&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=5KUPKjHYD-l", "email": "apple.com;apple.com;apple.com;apple.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Apple", "aff_unique_dep": "Apple Inc.", "aff_unique_url": "https://www.apple.com", "aff_unique_abbr": "Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "5L1ctJ223ML", "title": "Regularizing hard examples improves robustness", "track": "main", "status": "Withdraw", "tldr": "We study the negative effect of hard examples on generalization in adversarial training and propose a new method to mitigate the effect of hard examples.", "abstract": "Recent studies have validated that pruning hard-to-learn examples from training improves the generalization performance of neural networks (NNs). In this study, we investigate this intriguing phenomenon\u2014the negative effect of hard examples on generalization\u2014in adversarial training. Particularly, we theoretically demonstrate that the increase in the difficulty of hard examples in adversarial training is significantly greater than the increase in the difficulty of easy examples. Furthermore, we verify that hard examples are only fitted through memorization of the label in adversarial training and that the memorization of hard examples is attributed to the significant increase in the difficulty of hard examples. We find that the increased difficulty of hard examples brings about the functioning of hard examples as label corrupted data in adversarial training, thereby leading to the memorization of those hard examples and deterioration of the robustness performance. Based upon these observations, we propose a new approach, difficulty proportional label smoothing (DPLS), to mitigate the negative effect of hard examples, thereby improving the adversarial robustness of NNs. Notably, our experimental result indicates that our method can successfully leverage hard examples while circumventing the negative effect.", "keywords": "deep learning;adversarial robustness;adversarial examples", "primary_area": "", "supplementary_material": "", "author": "Hyungyu Lee;Saehyung Lee;Ho Bae;Sungroh Yoon", "authorids": "~Hyungyu_Lee1;~Saehyung_Lee1;~Ho_Bae1;~Sungroh_Yoon1", "gender": "M;M;;M", "homepage": ";https://www.spai.co.kr;http://ailab.snu.ac.kr;https://snu.ac.kr", "dblp": "260/0442;199/1782;99/1474;", "google_scholar": "nS24h74AAAAJ;https://scholar.google.com/citations?hl=en;Bphl_fIAAAAJ;", "orcid": ";0000-0002-5238-3547;0000-0002-2367-197X;", "linkedin": ";;;", "or_profile": "~Saehyung_Lee1;~Ho_Bae1;~Sungroh_Yoon1;~Hyungyu_Lee2", "aff": "Qualcomm Inc, QualComm;Ewha Womans University;Seoul National University;Seoul National University", "aff_domain": "qti.qualcomm.com;ewha.ac.kr;snu.ac.kr;snu.ac.kr", "position": "Intern;Assistant Professor;Full Professor;PhD student", "bibtex": "@misc{\nlee2023regularizing,\ntitle={Regularizing hard examples improves robustness},\nauthor={Hyungyu Lee and Saehyung Lee and Ho Bae and Sungroh Yoon},\nyear={2023},\nurl={https://openreview.net/forum?id=5L1ctJ223ML}\n}", "github": "", "project": "", "reviewers": "nQWa;HeMq;mCYK;68Wg", "site": "https://openreview.net/forum?id=5L1ctJ223ML", "pdf_size": 1238143, "recommendation": "3;3;5;6", "confidence": "5;4;4;4", "correctness": "2;3;3;3", "technical_novelty": "1;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "78;53;23;163", "wc_strength_and_weaknesses": "270;218;137;255", "wc_clarity_quality_novelty_and_reproducibility": "81;19;5;60", "wc_summary_review": "30;61;57;77", "wc_review": "459;351;222;555", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "739;1222;531;559", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 79.25, 52.12664865498261 ], "wc_strength_and_weaknesses_avg": [ 220.0, 51.52184002925361 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.25, 30.58083550199373 ], "wc_summary_review_avg": [ 56.25, 16.90229274388537 ], "wc_review_avg": [ 396.75, 124.04510268446715 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 762.75, 276.90104279326937 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5555555555555555, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6xVA96l0-NEJ:scholar.google.com/&scioq=Regularizing+hard+examples+improves+robustness&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Qualcomm Incorporated;Ewha Womans University;Seoul National University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.qualcomm.com;http://www.ewha.ac.kr;https://www.snu.ac.kr", "aff_unique_abbr": "Qualcomm;Ewha;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;South Korea" }, { "id": "5MR1OGvCtH", "title": "A Sample Based Method for Understanding The Decisions of Neural Networks Semantically", "track": "main", "status": "Reject", "tldr": "This paper introduces a semantic interpretability framework that is used to understand how CNN models and their robust counterparts manipulate image regions.", "abstract": "Interpretability in deep learning is one of the largest obstacles to its more widespread adoption in critical applications. A variety of methods have been introduced to understand and explain decisions made by Deep Models. A class of these methods highlights which features are most influential to model predictions. These methods have some key weaknesses. First, most of these methods are applicable only to the atomic elements that make up raw inputs to the model (e.g., pixels or words). Second, these methods generally don't distinguish between the importance of features individually versus due to interactions with other features. As a result, it is difficult to explore high level questions about how models use features. We tackle these issues by proposing Sample-Based Semantic Analysis (SBSA). We use Sobol sensitivity analysis as our sample-based method. Sobol-SBSA allows us to quantify the importance of semantic combinations of raw inputs and highlight the extent to which these features are important individually as opposed to due to interactions with other features. We demonstrate the ability of Sobol-SBSA to answer a richer class of questions about the behavior of Deep Learning models by exploring how CNN models from AlexNet to DenseNet use regions when classifying images. We present two key findings. 1) The architectural improvements from AlexNet to DenseNet manifested themselves in CNN models utilizing greater levels of region interactions for predictions. 2) Adversarially robust CNNs resist exploiting spurious correlations in ImageNet data by forcing these architectures to rely less on region-to-region interaction. Our proposed method is generalizable to a wide variety of network and input types and can help provide greater clarity about model decisions.", "keywords": "Machine Learning Interpretability;Bias;ImageNet;AlexNet;ResNet;VGG-16;Inception;CNNs;Bag of Words", "primary_area": "", "supplementary_material": "", "author": "Ohi Dibua;Jonathan Mbuya;Mackenzie Austin;Kushal Kafle", "authorids": "~Ohi_Dibua1;jmbuya@gmu.edu;mua23@pitt.edu;~Kushal_Kafle2", "gender": "M;;;M", "homepage": ";;;https://kushalkafle.com", "dblp": ";;;188/6388", "google_scholar": ";;;M_iwxCQAAAAJ", "orcid": ";;;0000-0002-0847-7861", "linkedin": "ohi-dibua-2bb72032;;;kushalkafle/", "or_profile": "~Ohi_Dibua1;jmbuya@gmu.edu;mua23@pitt.edu;~Kushal_Kafle2", "aff": "Adobe Systems;;;Adobe Systems", "aff_domain": "adobe.com;;;adobe.com", "position": "Research Engineer;;;Researcher", "bibtex": "@misc{\ndibua2023a,\ntitle={A Sample Based Method for Understanding The Decisions of Neural Networks Semantically},\nauthor={Ohi Dibua and Jonathan Mbuya and Mackenzie Austin and Kushal Kafle},\nyear={2023},\nurl={https://openreview.net/forum?id=5MR1OGvCtH}\n}", "github": "", "project": "", "reviewers": "yYjF;1XwS;phNs", "site": "https://openreview.net/forum?id=5MR1OGvCtH", "pdf_size": 1724167, "recommendation": "3;3;5", "confidence": "3;3;5", "correctness": "2;3;3", "technical_novelty": "3;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "65;42;88", "wc_strength_and_weaknesses": "195;325;283", "wc_clarity_quality_novelty_and_reproducibility": "49;35;15", "wc_summary_review": "49;28;44", "wc_review": "358;430;430", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1833;1667;1234", "reply_reviewers": "0;0;0", "reply_authors": "5;4;4", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 65.0, 18.7794213613377 ], "wc_strength_and_weaknesses_avg": [ 267.6666666666667, 54.16846150872501 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.0, 13.9522996909709 ], "wc_summary_review_avg": [ 40.333333333333336, 8.9566858950296 ], "wc_review_avg": [ 406.0, 33.94112549695428 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1578.0, 252.5087457231267 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 4.333333333333333, 0.4714045207910317 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6vcglhcLSUkJ:scholar.google.com/&scioq=A+Sample+Based+Method+for+Understanding+The+Decisions+of+Neural+Networks+Semantically&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Adobe", "aff_unique_dep": "Adobe Systems Incorporated", "aff_unique_url": "https://www.adobe.com", "aff_unique_abbr": "Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "5MUJsSRuylD", "title": "Causal Inference via Nonlinear Variable Decorrelation in Healthcare", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Causal inference and model interpretability research are gaining increasing attention, especially in the domains of healthcare and bioinformatics. Despite recent successes in this field, decorrelating features under nonlinear environments with human interpretable representations has not been adequately investigated. To address this issue, we introduce a novel method with a variable decorrelation regularizer to handle both linear and nonlinear confounding. Moreover, we employ association rules as new representations using association rule mining based on the original features to further proximate human decision patterns to increase model interpretability. Extensive experiments are conducted on four healthcare datasets (one synthetically generated and three real-world collections on different diseases). Quantitative results in comparison to baseline approaches on parameter estimation and causality computation indicate the model's superior performance. Furthermore, expert evaluation given by healthcare professionals validates the effectiveness and interpretability of the proposed model.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Junda Wang;Weijian Li;Han Wang;Hanjia Lyu;Caroline Thirukumaran;Addisu Mesfin;Jiebo Luo", "authorids": "~Junda_Wang1;~Weijian_Li1;hanakookqwq@gmail.com;~Hanjia_Lyu1;caroline_thirukumaran@urmc.rochester.edu;addisu_mesfin@urmc.rochester.edu;~Jiebo_Luo1", "gender": "M;M;;;;;", "homepage": "http://none;https://weijian-li.github.io/;;https://brucelyu17.github.io/;;;", "dblp": "239/3062;128/5278;;256/5541;;;", "google_scholar": "https://scholar.google.bg/citations?user=3ZhGCvkAAAAJ;dYZVGOQAAAAJ;;tPhwyYsAAAAJ;;;", "orcid": ";;;0000-0002-3876-0094;;;", "linkedin": ";;;;;;", "or_profile": "~Junda_Wang1;~Weijian_Li1;hanakookqwq@gmail.com;~Hanjia_Lyu1;caroline_thirukumaran@urmc.rochester.edu;addisu_mesfin@urmc.rochester.edu;~Jiebo_Luo1", "aff": "University of Massachusetts at Amherst;Amazon;;University of Rochester;;;", "aff_domain": "umass.edu;amazon.com;;rochester.edu;;;", "position": "PhD student;Researcher;;PhD student;;;", "bibtex": "@misc{\nwang2023causal,\ntitle={Causal Inference via Nonlinear Variable Decorrelation in Healthcare},\nauthor={Junda Wang and Weijian Li and Han Wang and Hanjia Lyu and Caroline Thirukumaran and Addisu Mesfin and Jiebo Luo},\nyear={2023},\nurl={https://openreview.net/forum?id=5MUJsSRuylD}\n}", "github": "", "project": "", "reviewers": "GQzM;CHFK;ownt;syzi", "site": "https://openreview.net/forum?id=5MUJsSRuylD", "pdf_size": 956988, "recommendation": "3;3;3;5", "confidence": "4;4;3;2", "correctness": "2;2;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "72;59;58;49", "wc_strength_and_weaknesses": "249;300;143;117", "wc_clarity_quality_novelty_and_reproducibility": "29;69;33;171", "wc_summary_review": "45;80;10;60", "wc_review": "395;508;244;397", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "238;533;331;571", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 59.5, 8.200609733428363 ], "wc_strength_and_weaknesses_avg": [ 202.25, 75.03124349229459 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 75.5, 57.295287764352835 ], "wc_summary_review_avg": [ 48.75, 25.586861863073402 ], "wc_review_avg": [ 386.0, 93.87491677759294 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 418.25, 138.38600904715764 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.8703882797784891, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UEwttuOJffAJ:scholar.google.com/&scioq=Causal+Inference+via+Nonlinear+Variable+Decorrelation+in+Healthcare&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Massachusetts Amherst;Amazon;University of Rochester", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.umass.edu;https://www.amazon.com;https://www.rochester.edu", "aff_unique_abbr": "UMass Amherst;Amazon;U of R", "aff_campus_unique_index": "0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Long Range Language Modeling via Gated State Spaces", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11051", "id": "5MkYIYCbva", "poster": "/media/PosterPDFs/ICLR%202023/11051.png?t=1682788342.8958023", "openreview": "https://openreview.net/forum?id=5MkYIYCbva", "slides": "https://iclr.cc/virtual/2023/poster/11051", "video": "https://iclr.cc/virtual/2023/poster/11051", "author_site": "Harsh Mehta, Ankit Gupta, Ashok Cutkosky, Behnam Neyshabur", "tldr": "Explore and improve state space model family on long range language modeling tasks", "abstract": "State space models have shown to be effective at modeling long range dependencies, specially on sequence classification tasks. In this work we focus on autoregressive sequence modeling over English books, Github source code and ArXiv mathematics articles. Based on recent developments around the effectiveness of gated activation functions, we propose a new layer named \\textit{Gated State Space} (GSS) and show that it trains significantly faster than the diagonal version of S4 (i.e. DSS) on TPUs, is fairly competitive with several well-tuned Transformer-based baselines and exhibits zero-shot generalization to longer inputs while being straightforward to implement. Finally, we show that leveraging self-attention to model local dependencies improves the performance of GSS even further.", "keywords": "Long range language modeling;language modeling;state space models", "primary_area": "", "supplementary_material": "", "author": "Harsh Mehta;Ankit Gupta;Ashok Cutkosky;Behnam Neyshabur", "authorids": "~Harsh_Mehta1;~Ankit_Gupta3;~Ashok_Cutkosky1;~Behnam_Neyshabur1", "gender": "M;M;;M", "homepage": ";https://sites.google.com/view/ag1988/;http://www.cs.stanford.edu/~ashokc;https://www.neyshabur.net", "dblp": "122/1475;65/2886-1;191/6725;131/9898", "google_scholar": "murJPNoAAAAJ;fdH955UAAAAJ;h4AbGp0AAAAJ;e1ucbCYAAAAJ", "orcid": ";;;", "linkedin": ";ag1988;;", "or_profile": "~Harsh_Mehta1;~Ankit_Gupta3;~Ashok_Cutkosky1;~Behnam_Neyshabur1", "aff": "Google Research;International Business Machines;Boston University;Google", "aff_domain": "google.com;ibm.com;bu.edu;google.com", "position": "Software Engineer;Researcher;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nmehta2023long,\ntitle={Long Range Language Modeling via Gated State Spaces},\nauthor={Harsh Mehta and Ankit Gupta and Ashok Cutkosky and Behnam Neyshabur},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5MkYIYCbva}\n}", "github": "", "project": "", "reviewers": "GxKh;Rso2;Z4KM;LYuN", "pdf_size": 327509, "recommendation": "5;6;6;6", "confidence": "4;4;4;3", "correctness": "3;3;4;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;0;3", "wc_summary_paper": "156;75;84;47", "wc_strength_and_weaknesses": "327;63;186;69", "wc_clarity_quality_novelty_and_reproducibility": "17;282;12;32", "wc_summary_review": "155;16;55;21", "wc_review": "655;436;337;169", "wc_reply_reviewers": "0;177;0;0", "wc_reply_authors": "861;853;600;347", "reply_reviewers": "0;2;0;0", "reply_authors": "2;3;2;2", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 90.5, 40.202611855450385 ], "wc_strength_and_weaknesses_avg": [ 161.25, 107.52761273273019 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 85.75, 113.54376909368474 ], "wc_summary_review_avg": [ 61.75, 55.88995884772147 ], "wc_review_avg": [ 399.25, 175.81862102746683 ], "wc_reply_reviewers_avg": [ 44.25, 76.64324823492282 ], "wc_reply_authors_avg": [ 665.25, 211.60620855731054 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 267, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10964065881465030939&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=5MkYIYCbva", "email": "google.com;ibm.com;bu.edu;google.com", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Google;International Business Machines Corporation;Boston University", "aff_unique_dep": "Google Research;;", "aff_unique_url": "https://research.google;https://www.ibm.com;https://www.bu.edu", "aff_unique_abbr": "Google Research;IBM;BU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Embedding Fourier for Ultra-High-Definition Low-Light Image Enhancement", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11576", "id": "5N0wtJZ89r9", "poster": "/media/PosterPDFs/ICLR%202023/11576.png?t=1680787132.15787", "openreview": "https://openreview.net/forum?id=5N0wtJZ89r9", "slides": "https://iclr.cc/virtual/2023/poster/11576", "video": "https://iclr.cc/virtual/2023/poster/11576", "author_site": "Chongyi Li, Chun-Le Guo, man zhou, Zhexin Liang, Shangchen Zhou, Ruicheng Feng, Chen Change Loy", "tldr": "In this paper, we propose a new solution for UHD LLIE based on the characteristics of the Fourier domain. We also propose the first real UHD LLIE dataset with diverse data.", "abstract": "Ultra-High-Definition (UHD) photo has gradually become the standard configuration in advanced imaging devices. The new standard unveils many issues in existing approaches for low-light image enhancement (LLIE), especially in dealing with the intricate issue of joint luminance enhancement and noise removal while remaining efficient. Unlike existing methods that address the problem in the spatial domain, we propose a new solution, UHDFour, that embeds Fourier transform into a cascaded network. Our approach is motivated by a few unique characteristics in the Fourier domain: 1) most luminance information concentrates on amplitudes while noise is closely related to phases, and 2) a high-resolution image and its low-resolution version share similar amplitude patterns. Through embedding Fourier into our network, the amplitude and phase of a low-light image are separately processed to avoid amplifying noise when enhancing luminance. Besides, UHDFour is scalable to UHD images by implementing amplitude and phase enhancement under the low-resolution regime and then adjusting the high-resolution scale with few computations. We also contribute the first real UHD LLIE dataset, UHD-LL, that contains 2,150 low-noise/normal-clear 4K image pairs with diverse darkness and noise levels captured in different scenarios. With this dataset, we systematically analyze the performance of existing LLIE methods for processing UHD images and demonstrate the advantage of our solution. We believe our new framework, coupled with the dataset, would push the frontier of LLIE towards UHD. The code and dataset are available at https://li-chongyi.github.io/UHDFour/.", "keywords": "low-light image enhancement;high-resolution image processing;Fourier transform;benchmark", "primary_area": "", "supplementary_material": "", "author": "Chongyi Li;Chun-Le Guo;man zhou;Zhexin Liang;Shangchen Zhou;Ruicheng Feng;Chen Change Loy", "authorids": "~Chongyi_Li1;~Chun-Le_Guo1;~man_zhou1;~Zhexin_Liang1;~Shangchen_Zhou1;~Ruicheng_Feng1;~Chen_Change_Loy2", "gender": ";;M;F;M;M;M", "homepage": ";;;https://zhexinliang.github.io/;https://shangchenzhou.com;https://jnjaby.github.io/;https://www.mmlab-ntu.com/person/ccloy/index.html", "dblp": ";;165/8237;275/3656;191/5298;134/1620;01/5855", "google_scholar": ";;;19Ovo9AAAAAJ;https://scholar.google.com.hk/citations?user=suaDwBQAAAAJ;https://scholar.google.com.sg/citations?user=nDrw-wwAAAAJ;https://scholar.google.co.uk/citations?user=559LF80AAAAJ", "orcid": ";;0000-0003-2872-605X;;0000-0001-8201-8877;;0000-0001-5345-1591", "linkedin": ";;;;;;", "or_profile": "~Chongyi_Li1;~Chun-Le_Guo1;~man_zhou1;~Zhexin_Liang1;~Shangchen_Zhou1;~Ruicheng_Feng1;~Chen_Change_Loy2", "aff": ";;University of Science and Technology of China;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University", "aff_domain": ";;ustc.edu.cn;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "position": ";;Postdoc;MS student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nli2023embedding,\ntitle={Embedding Fourier for Ultra-High-Definition Low-Light Image Enhancement},\nauthor={Chongyi Li and Chun-Le Guo and man zhou and Zhexin Liang and Shangchen Zhou and Ruicheng Feng and Chen Change Loy},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5N0wtJZ89r9}\n}", "github": "", "project": "", "reviewers": "ohoL;aXaB;yq2M;Fqm7", "pdf_size": 40364430, "recommendation": "6;8;8;8", "confidence": "5;5;5;3", "correctness": "3;3;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;4;3", "wc_summary_paper": "57;121;179;31", "wc_strength_and_weaknesses": "424;482;486;76", "wc_clarity_quality_novelty_and_reproducibility": "73;48;47;18", "wc_summary_review": "35;31;42;10", "wc_review": "589;682;754;135", "wc_reply_reviewers": "0;8;0;0", "wc_reply_authors": "708;1252;278;271", "reply_reviewers": "0;1;0;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 97.0, 57.56735185849702 ], "wc_strength_and_weaknesses_avg": [ 367.0, 169.7910480561328 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.5, 19.474342094150447 ], "wc_summary_review_avg": [ 29.5, 11.926860441876563 ], "wc_review_avg": [ 540.0, 241.03215553116559 ], "wc_reply_reviewers_avg": [ 2.0, 3.4641016151377544 ], "wc_reply_authors_avg": [ 627.25, 401.7843793628617 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 147, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17547544909247114042&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=5N0wtJZ89r9", "email": ";;ustc.edu.cn;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "author_num": 7, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "University of Science and Technology of China;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.ntu.edu.sg", "aff_unique_abbr": "USTC;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "China;Singapore" }, { "title": "Automatic Chain of Thought Prompting in Large Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11360", "id": "5NTt8GFjUHkr", "poster": "/media/PosterPDFs/ICLR%202023/11360.png?t=1680835874.7364259", "openreview": "https://openreview.net/forum?id=5NTt8GFjUHkr", "slides": "https://iclr.cc/virtual/2023/poster/11360", "video": "https://iclr.cc/virtual/2023/poster/11360", "author_site": "Zhuosheng Zhang, Aston Zhang, Mu Li, Alex Smola", "tldr": "We propose an automatic prompting method (Auto-CoT) to elicit chain-of-thought reasoning in large language models without needing manually-designed demonstrations.", "abstract": "Large Language Models (LLMs) can carry out complex reasoning tasks by generating intermediate reasoning steps. These steps are triggered by what is called chain-of-thought (CoT) prompting, which comes in two flavors: one leverages a simple prompt like \"Let\u2019s think step by step\" to facilitate step-by-step reasoning before answering a question (Zero-Shot-CoT). The other uses manual demonstrations, each composed of a question and a reasoning chain that leads to an answer (Manual-CoT). Unfortunately, the superior performance of the latter strategy crucially hinges on manually generating task-specific demonstrations. This makes it far less scalable and more dependent on the talent of the CoT engineer. We show that such manual efforts may be eliminated by leveraging LLMs to generate the reasoning chains on its own. Since these generated chains often come with mistakes we propose a number of mitigation strategies. Our proposed Auto-CoT method automaticaly samples diverse questions and we perform post-processing quality control to generate usable reasoning chains from Zero-Shot-CoT. On ten public benchmark reasoning tasks, Auto-CoT performs on par with Manual-CoT without the need for human intervention. Code is available at https://github.com/amazon-research/auto-cot.\n", "keywords": "Chain of Thought Prompting;Large Language Models;In-context Learning;Few-shot Learning;Arithmetic Reasoning;Commonsense Reasoning;Symbolic Reasoning.", "primary_area": "", "supplementary_material": "", "author": "Zhuosheng Zhang;Aston Zhang;Mu Li;Alex Smola", "authorids": "~Zhuosheng_Zhang1;~Aston_Zhang2;~Mu_Li4;~Alex_Smola1", "gender": "M;;;M", "homepage": "https://bcmi.sjtu.edu.cn/~zhangzs/;;https://github.com/mli;http://alex.smola.org", "dblp": "06/9708;;;s/AlexanderJSmola", "google_scholar": "https://scholar.google.co.jp/citations?user=63LTQhgAAAAJ;;;Tb0ZrYwAAAAJ", "orcid": "0000-0002-4183-3645;;;", "linkedin": ";;;smola", "or_profile": "~Zhuosheng_Zhang1;~Aston_Zhang2;~Mu_Li4;~Alex_Smola1", "aff": "Shanghai Jiaotong University;;Amazon;Boson AI", "aff_domain": "sjtu.edu.cn;;amazon.com;boson.ai", "position": "PhD student;;Researcher;CEO", "bibtex": "@inproceedings{\nzhang2023automatic,\ntitle={Automatic Chain of Thought Prompting in Large Language Models},\nauthor={Zhuosheng Zhang and Aston Zhang and Mu Li and Alex Smola},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5NTt8GFjUHkr}\n}", "github": "", "project": "", "reviewers": "psXT;ajVM;G2Cc;F79B", "pdf_size": 1181464, "recommendation": "3;6;8;8", "confidence": "5;5;4;5", "correctness": "4;4;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;4;0", "wc_summary_paper": "120;89;222;61", "wc_strength_and_weaknesses": "376;101;1256;344", "wc_clarity_quality_novelty_and_reproducibility": "17;114;4;39", "wc_summary_review": "117;115;61;35", "wc_review": "630;419;1543;479", "wc_reply_reviewers": "0;0;32;0", "wc_reply_authors": "1137;529;656;539", "reply_reviewers": "0;0;1;0", "reply_authors": "4;3;1;1", "recommendation_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 1.479019945774904 ], "wc_summary_paper_avg": [ 123.0, 60.848171706305195 ], "wc_strength_and_weaknesses_avg": [ 519.25, 438.45374613521096 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.5, 42.58227330709341 ], "wc_summary_review_avg": [ 82.0, 35.22782990761707 ], "wc_review_avg": [ 767.75, 454.1450071287804 ], "wc_reply_reviewers_avg": [ 8.0, 13.856406460551018 ], "wc_reply_authors_avg": [ 715.25, 248.564252256836 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.49374193110101877, "corr_recommendation_correctness": -0.8551861104941366, "gs_citation": 1016, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10788066074928914543&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=5NTt8GFjUHkr", "email": "sjtu.edu.cn;;amazon.com;boson.ai", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Shanghai Jiao Tong University;Amazon;Boson AI", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.amazon.com;https://www.boson.ai", "aff_unique_abbr": "SJTU;Amazon;Boson AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "title": "DFlow: Learning to Synthesize Better Optical Flow Datasets via a Differentiable Pipeline", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11953", "id": "5O2uzDusEN5", "poster": "", "openreview": "https://openreview.net/forum?id=5O2uzDusEN5", "slides": "https://iclr.cc/virtual/2023/poster/11953", "video": "https://iclr.cc/virtual/2023/poster/11953", "author_site": "BYUNGKI KWON, HYEONWOO NAM, Ji-Yun Kim, Tae-Hyun Oh", "tldr": "Differentiable and efficient optical flow data generation pipeline", "abstract": "Comprehensive studies of synthetic optical flow datasets have attempted to reveal what properties lead to accuracy improvement in learning-based optical flow estimation. However, manually identifying and verifying the properties that contribute to accurate optical flow estimation require large-scale trial-and-error experiments with iteratively generating whole synthetic datasets and training on them, \\ie, impractical. To address this challenge, we propose a differentiable optical flow data generation pipeline and a loss function to drive the pipeline, called DFlow. DFlow efficiently synthesizes a dataset effective for a target domain without the need for cumbersome try-and-errors. This favorable property is achieved by proposing an efficient dataset comparison method that uses neural networks to approximately encode each dataset and compares the proxy networks instead of explicitly comparing datasets in a pairwise way. Our experiments show the competitive performance of our DFlow against the prior arts in pre-training. Furthermore, compared to competing datasets, DFlow achieves the best fine-tuning performance on the Sintel public benchmark with RAFT.", "keywords": "Synthetic data;Optical flow", "primary_area": "", "supplementary_material": "/attachment/2a1918cfd056437a25c0fd6eea7835a9199182a2.zip", "author": "Kwon Byung-Ki;Nam Hyeon-Woo;Ji-Yun Kim;Tae-Hyun Oh", "authorids": "~Kwon_Byung-Ki1;~Nam_Hyeon-Woo1;~Ji-Yun_Kim1;~Tae-Hyun_Oh3", "gender": "M;M;M;M", "homepage": "https://sites.google.com/view/kwon-byung--ki/%ED%99%88;https://ami.kaist.ac.kr;https://www.facebook.com/profile.php?id=100083974187149;https://sites.google.com/view/southhw/", "dblp": ";119/1450;;299/7655", "google_scholar": "rUmP7vgAAAAJ;dMCBjeIAAAAJ;;https://scholar.google.fi/citations?user=1jQ1FNUAAAAJ", "orcid": ";0000-0003-0468-1571;;", "linkedin": "byung-ki-kwon-a483961b1;tae-hyun-oh-at-mit/;;nam-hyeon-woo-8397b6246/", "or_profile": "~Kwon_Byung-Ki1;~Tae-Hyun_Oh3;~JIYUN_KIM1;~Nam_Hyeon_Woo1", "aff": "Pohang University of Science and Technology;POSTECH;KRAFTON;POSTECH", "aff_domain": "postech.edu;postech.ac.kr;krafton.com;postech.ac.kr", "position": "MS student;Assistant Professor;Researcher;PhD student", "bibtex": "@inproceedings{\nbyung-ki2023dflow,\ntitle={{DF}low: Learning to Synthesize Better Optical Flow Datasets via a Differentiable Pipeline},\nauthor={Kwon Byung-Ki and Nam Hyeon-Woo and Ji-Yun Kim and Tae-Hyun Oh},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5O2uzDusEN5}\n}", "github": "", "project": "", "reviewers": "dkLC;uY3h;iPLs;tPfm", "pdf_size": 5447889, "recommendation": "5;5;6;8", "confidence": "3;5;3;4", "correctness": "3;3;3;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "2;3;0;3", "wc_summary_paper": "57;64;78;72", "wc_strength_and_weaknesses": "176;129;33;78", "wc_clarity_quality_novelty_and_reproducibility": "153;54;24;217", "wc_summary_review": "21;53;25;105", "wc_review": "407;300;160;472", "wc_reply_reviewers": "0;0;0;390", "wc_reply_authors": "1700;939;206;1780", "reply_reviewers": "0;0;0;3", "reply_authors": "3;2;1;5", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 67.75, 7.949056547792323 ], "wc_strength_and_weaknesses_avg": [ 104.0, 53.67960506561128 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 112.0, 77.15892689767011 ], "wc_summary_review_avg": [ 51.0, 33.52610922848042 ], "wc_review_avg": [ 334.75, 118.11302849389648 ], "wc_reply_reviewers_avg": [ 97.5, 168.87495373796554 ], "wc_reply_authors_avg": [ 1156.25, 639.3161874221549 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=236860155828370011&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=5O2uzDusEN5", "email": "postech.edu;postech.ac.kr;krafton.com;postech.ac.kr", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Pohang University of Science and Technology;KRAFTON Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.postech.ac.kr;https://www.krafton.com", "aff_unique_abbr": "POSTECH;KRAFTON", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pohang;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "An Additive Instance-Wise Approach to Multi-class Model Interpretation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12059", "id": "5OygDd-4Eeh", "poster": "", "openreview": "https://openreview.net/forum?id=5OygDd-4Eeh", "slides": "https://iclr.cc/virtual/2023/poster/12059", "video": "https://iclr.cc/virtual/2023/poster/12059", "author_site": "Vy Vo, Van Nguyen, Trung Le, Quan Tran, Reza Haffari, Seyit Camtepe, Dinh Phung", "tldr": "", "abstract": "Interpretable machine learning offers insights into what factors drive a certain prediction of a black-box system. A large number of interpreting methods focus on identifying explanatory input features, which generally fall into two main categories: attribution and selection. A popular attribution-based approach is to exploit local neighborhoods for learning instance-specific explainers in an additive manner. The process is thus inefficient and susceptible to poorly-conditioned samples. Meanwhile, many selection-based methods directly optimize local feature distributions in an instance-wise training framework, thereby being capable of leveraging global information from other inputs. However, they can only interpret single-class predictions and many suffer from inconsistency across different settings, due to a strict reliance on a pre-defined number of features selected. This work exploits the strengths of both methods and proposes a framework for learning local explanations simultaneously for multiple target classes. Our model explainer significantly outperforms additive and instance-wise counterparts on faithfulness with more compact and comprehensible explanations. We also demonstrate the capacity to select stable and important features through extensive experiments on various data sets and black-box model architectures.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/5d607b060921e83f2217d9ceb73b682fcf7fb46f.zip", "author": "Vy Vo;Van Nguyen;Trung Le;Quan Hung Tran;Gholamreza Haffari;Seyit Camtepe;Dinh Phung", "authorids": "~Vy_Vo2;~Van_Nguyen2;~Trung_Le2;~Quan_Hung_Tran1;~Gholamreza_Haffari2;~Seyit_Camtepe1;~Dinh_Phung2", "gender": "F;M;M;M;M;M;M", "homepage": "https://isvy08.github.io/;;;;https://people.csiro.au/C/S/Seyit-Camtepe;https://research.monash.edu/en/persons/dinh-phung;https://rezahaffari.github.io/HomePage/HomePage.html", "dblp": "176/4660;;;151/8700;55/3548.html;71/5859;", "google_scholar": "3CpFpFkAAAAJ;KPpmKZ0AAAAJ;https://scholar.google.com/citations?hl=en;ehs5ImcAAAAJ;https://scholar.google.com.au/citations?user=CLpWx6IAAAAJ;https://scholar.google.com.au/citations?user=OtA9SwIAAAAJ;https://scholar.google.com.tw/citations?user=Perjx5EAAAAJ", "orcid": ";0000-0002-5838-3409;;;0000-0001-6353-8359;0000-0002-9977-8247;", "linkedin": ";;;;camtepe;https://linkedin.com/in/dinh-phung-6b537a6;gholamrezahaffari/?originalSubdomain=au", "or_profile": "~Vy_Vo2;~Van_Nguyen2;~Trung_Le2;~Quan_Hung_Tran1;~Seyit_Camtepe1;~Dinh_Phung1;~Gholamreza_Haffari1", "aff": "Monash University;Monash University;Monash University;Adobe Systems;CSIRO;Monash University;Monash University", "aff_domain": "monash.edu;monash.edu;monash.edu;adobe.com;data61.csiro.au;monash.edu;monash.edu", "position": "PhD student;Postdoc;Assistant Professor;Research Scientist;Principal Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nvo2023an,\ntitle={An Additive Instance-Wise Approach to Multi-class Model Interpretation},\nauthor={Vy Vo and Van Nguyen and Trung Le and Quan Hung Tran and Gholamreza Haffari and Seyit Camtepe and Dinh Phung},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5OygDd-4Eeh}\n}", "github": "", "project": "", "reviewers": "qCbz;RWdr;Vmy2", "pdf_size": 916489, "recommendation": "3;6;8", "confidence": "4;4;3", "correctness": "3;3;3", "technical_novelty": "2;3;4", "empirical_novelty": "2;2;3", "wc_summary_paper": "106;58;91", "wc_strength_and_weaknesses": "1096;145;181", "wc_clarity_quality_novelty_and_reproducibility": "26;39;92", "wc_summary_review": "59;52;25", "wc_review": "1287;294;389", "wc_reply_reviewers": "0;83;0", "wc_reply_authors": "2668;1210;527", "reply_reviewers": "0;1;0", "reply_authors": "6;4;1", "recommendation_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 85.0, 20.049937655763422 ], "wc_strength_and_weaknesses_avg": [ 474.0, 440.06590415527535 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 52.333333333333336, 28.546258754675524 ], "wc_summary_review_avg": [ 45.333333333333336, 14.65908895153068 ], "wc_review_avg": [ 656.6666666666666, 447.39716385133937 ], "wc_reply_reviewers_avg": [ 27.666666666666668, 39.12657522565563 ], "wc_reply_authors_avg": [ 1468.3333333333333, 892.9435716898477 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 2.0548046676563256 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.8029550685469661, "corr_recommendation_correctness": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2498864479305901963&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=5OygDd-4Eeh", "email": "monash.edu;monash.edu;monash.edu;adobe.com;data61.csiro.au;monash.edu;monash.edu", "author_num": 7, "aff_unique_index": "0;0;0;1;2;0;0", "aff_unique_norm": "Monash University;Adobe;Commonwealth Scientific and Industrial Research Organisation", "aff_unique_dep": ";Adobe Systems Incorporated;", "aff_unique_url": "https://www.monash.edu;https://www.adobe.com;https://www.csiro.au", "aff_unique_abbr": "Monash;Adobe;CSIRO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "Australia;United States" }, { "id": "5P96KWeULzE", "title": "$\\Delta$-PINNs: physics-informed neural networks on complex geometries", "track": "main", "status": "Reject", "tldr": "We encode the geometry using the Laplace-Beltrami eigenfunctions to solve partial differential equations with physics-informed neural networks on complex geometries.", "abstract": "Physics-informed neural networks (PINNs) have demonstrated promise in solving forward and inverse problems involving partial differential equations. Despite recent progress on expanding the class of problems that can be tackled by PINNs, most of existing use-cases involve simple geometric domains. To date, there is no clear way to inform PINNs about the topology of the domain where the problem is being solved. In this work, we propose a novel positional encoding mechanism for PINNs based on the eigenfunctions of the Laplace-Beltrami operator. This technique allows to create an input space for the neural network that represents the geometry of a given object. We approximate the eigenfunctions as well as the operators involved in the partial differential equations with finite elements. We extensively test and compare the proposed methodology against traditional PINNs in complex shapes, such as a coil, a heat sink and a bunny, with different physics, such as the Eikonal equation and heat transfer. We also study the sensitivity of our method to the number of eigenfunctions used, as well as the discretization used for the eigenfunctions and the underlying operators. Our results show excellent agreement with the ground truth data in cases where traditional PINNs fail to produce a meaningful solution. We envision this new technique will expand the effectiveness of PINNs to more realistic applications.", "keywords": "deep learning;Laplace-Beltrami;physics-informed neural networks;partial differential equations", "primary_area": "", "supplementary_material": "", "author": "Francisco Sahli Costabal;Simone Pezzuto;Paris Perdikaris", "authorids": "~Francisco_Sahli_Costabal1;simone.pezzuto@usi.ch;~Paris_Perdikaris1", "gender": "M;;M", "homepage": "https://fsahli.ing.puc.cl;;https://directory.seas.upenn.edu/paris-perdikaris/", "dblp": ";;180/9141", "google_scholar": "9wRFbcEAAAAJ;;h_zkt1oAAAAJ", "orcid": ";;0000-0002-2816-3229", "linkedin": ";;paris-perdikaris-093068102/", "or_profile": "~Francisco_Sahli_Costabal1;simone.pezzuto@usi.ch;~Paris_Perdikaris1", "aff": "Pontificia Universidad Catolica de Chile;;University of Pennsylvania", "aff_domain": "uc.cl;;upenn.edu", "position": "Assistant Professor;;Associate Professor", "bibtex": "@misc{\ncostabal2023deltapinns,\ntitle={\\${\\textbackslash}Delta\\$-{PINN}s: physics-informed neural networks on complex geometries},\nauthor={Francisco Sahli Costabal and Simone Pezzuto and Paris Perdikaris},\nyear={2023},\nurl={https://openreview.net/forum?id=5P96KWeULzE}\n}", "github": "", "project": "", "reviewers": "ebMA;vdFL;yokT", "site": "https://openreview.net/forum?id=5P96KWeULzE", "pdf_size": 11137379, "recommendation": "3;5;8", "confidence": "4;3;3", "correctness": "2;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "1;3;4", "wc_summary_paper": "69;88;87", "wc_strength_and_weaknesses": "159;96;240", "wc_clarity_quality_novelty_and_reproducibility": "149;103;78", "wc_summary_review": "91;61;77", "wc_review": "468;348;482", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "553;677;97", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 1.247219128924647 ], "wc_summary_paper_avg": [ 81.33333333333333, 8.73053390247253 ], "wc_strength_and_weaknesses_avg": [ 165.0, 58.9406481131655 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 110.0, 29.40521495698793 ], "wc_summary_review_avg": [ 76.33333333333333, 12.256517540566824 ], "wc_review_avg": [ 432.6666666666667, 60.140576060500855 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 442.3333333333333, 249.3796748378308 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.8029550685469661, "corr_recommendation_correctness": 0.9933992677987828, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2544358258544024677&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1", "aff_unique_norm": "Pontificia Universidad Catolica de Chile;University of Pennsylvania", "aff_unique_dep": ";", "aff_unique_url": "https://www.puc.cl;https://www.upenn.edu", "aff_unique_abbr": "PUC;UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Chile;United States" }, { "id": "5Qt6ZqXSDEZ", "title": "From Adaptive Query Release to Machine Unlearning", "track": "main", "status": "Reject", "tldr": "Efficient algorithms for exact machine unlearning for stochastic convex optimization", "abstract": "We formalize the problem of machine unlearning as design of efficient unlearning algorithms corresponding to learning algorithms which perform a selection of adaptive queries from structured query classes. We give efficient unlearning algorithms for linear and prefix-sum query classes. As applications, we show that unlearning in many problems, in particular, stochastic convex optimization (SCO), can be reduced to the above, yielding improved guarantees for the problem. In particular, for smooth Lipschitz losses and any $\\rho>0$, our results yield an unlearning algorithm with excess population risk of $\\tilde O\\big(\\frac{1}{\\sqrt{n}}+\\frac{\\sqrt{d}}{n\\rho}\\big)$ with unlearning query (gradient) complexity $\\tilde O(\\rho \\cdot \\text{Retraining Complexity})$, where $d$ is the model dimensionality and $n$ is the initial number of samples. For non-smooth Lipschitz losses, we give an unlearning algorithm with excess population risk $\\tilde O\\big(\\frac{1}{\\sqrt{n}}+\\big(\\frac{\\sqrt{d}}{n\\rho}\\big)^{1/2}\\big)$ with the same unlearning query (gradient) complexity. Furthermore, in the special case of Generalized Linear Models (GLMs), such as those in linear and logistic regression, we get dimension-independent rates of $\\tilde O\\big(\\frac{1}{\\sqrt{n}} +\\frac{1}{(n\\rho)^{2/3}}\\big)$ and $\\tilde O\\big(\\frac{1}{\\sqrt{n}} +\\frac{1}{(n\\rho)^{1/3}}\\big)$ for smooth Lipschitz and non-smooth Lipschitz losses respectively. Finally, we give generalizations of the above from one unlearning request to dynamic streams consisting of insertions and deletions.", "keywords": "machine unlerarning;stochastic convex optimization", "primary_area": "", "supplementary_material": "", "author": "Enayat Ullah;Raman Arora", "authorids": "~Enayat_Ullah1;~Raman_Arora1", "gender": ";M", "homepage": "https://enayatullah.github.io;http://www.cs.jhu.edu/~raman/Home.html", "dblp": "223/5999;", "google_scholar": ";Spe0xdkAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Enayat_Ullah1;~Raman_Arora1", "aff": "Johns Hopkins University;Johns Hopkins University", "aff_domain": "jhu.edu;jhu.edu", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nullah2023from,\ntitle={From Adaptive Query Release to Machine Unlearning},\nauthor={Enayat Ullah and Raman Arora},\nyear={2023},\nurl={https://openreview.net/forum?id=5Qt6ZqXSDEZ}\n}", "github": "", "project": "", "reviewers": "VmwR;44f4;qNGF;NHVJ", "site": "https://openreview.net/forum?id=5Qt6ZqXSDEZ", "pdf_size": 491309, "recommendation": "5;6;6;6", "confidence": "2;3;3;2", "correctness": "3;4;3;4", "technical_novelty": "3;2;3;2", "empirical_novelty": "0;2;0;0", "wc_summary_paper": "62;104;54;360", "wc_strength_and_weaknesses": "104;56;477;460", "wc_clarity_quality_novelty_and_reproducibility": "217;13;25;35", "wc_summary_review": "64;47;34;104", "wc_review": "447;220;590;959", "wc_reply_reviewers": "0;0;207;99", "wc_reply_authors": "945;92;1548;1113", "reply_reviewers": "0;0;2;1", "reply_authors": "3;1;4;3", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 0.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 145.0, 125.57467897629681 ], "wc_strength_and_weaknesses_avg": [ 274.25, 195.08251459318438 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 72.5, 83.78991586103903 ], "wc_summary_review_avg": [ 62.25, 26.34743820563965 ], "wc_review_avg": [ 554.0, 268.4799806317037 ], "wc_reply_reviewers_avg": [ 76.5, 85.5 ], "wc_reply_authors_avg": [ 924.5, 528.6210835749932 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10711886706404193536&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "5R96mIU85IW", "title": "Effectively using public data in privacy preserving Machine learning", "track": "main", "status": "Reject", "tldr": "improving the effect of public data in DP-SGD and improving the accuracy significantly", "abstract": "A key challenge towards differentially private machine learning is balancing the trade-off between privacy and utility. \nA recent line of work has demonstrated that leveraging \\emph{public data samples} can enhance the utility of DP-trained models (for the same privacy guarantees). \nIn this work, we show that public data can be used to improve utility in DP models significantly more than shown in recent works. \nTowards this end, we introduce a modified DP-SGD algorithm that leverages public data during its training process. \nOur technique uses public data in two complementary ways: (1) it uses generative models trained on public data to produce synthetic data that is effectively embedded in multiple steps of the training pipeline; (2) it uses a new gradient clipping mechanism (required for achieving differential privacy) which changes the \\emph{origin} of gradient vectors using information inferred from available public and synthesized data. \nOur experimental results demonstrate the effectiveness of our approach in improving the state-of-the-art in differentially private machine learning across multiple datasets, network architectures, and application domains. \nNotably, we achieve a $75\\%$ accuracy on CIFAR10 when using only $2,000$ public images; this is \\emph{significantly higher} than the state-of-the-art which is $68\\%$ for DP-SGD with the privacy budget of $\\varepsilon=2,\\delta=10^{-5}$ (given the same number of public data points).", "keywords": "Privacy preserving machine learning;dp-sgd;public data in privacy", "primary_area": "", "supplementary_material": "", "author": "Milad Nasr;Saeed Mahloujifar;Xinyu Tang;Prateek Mittal;Amir Houmansadr", "authorids": "~Milad_Nasr2;~Saeed_Mahloujifar1;~Xinyu_Tang1;~Prateek_Mittal1;~Amir_Houmansadr1", "gender": ";M;;;M", "homepage": "https://people.cs.umass.edu/~milad/;https://www.cs.virginia.edu/~sm5fd/;;http://www.princeton.edu/~pmittal/;https://www.cs.umass.edu/~amir/", "dblp": ";208/0825;65/5518;;22/1797", "google_scholar": "k6-nvDAAAAAJ;kW-hl3YAAAAJ;uwcdL7gAAAAJ;https://scholar.google.com.tw/citations?user=xTKD8J4AAAAJ;https://scholar.google.com.tw/citations?user=cTTFHNwAAAAJ", "orcid": ";;;0000-0002-4057-0118;", "linkedin": ";;;;", "or_profile": "~Milad_Nasr2;~Saeed_Mahloujifar1;~Xinyu_Tang1;~Prateek_Mittal1;~Amir_Houmansadr1", "aff": "Google;Princeton University;Princeton University;Princeton University;University of Massachusetts, Amherst", "aff_domain": "google.com;princeton.edu;princeton.edu;princeton.edu;umass.edu", "position": "Researcher;Postdoc;PhD student;Full Professor;Associate Professor", "bibtex": "@misc{\nnasr2023effectively,\ntitle={Effectively using public data in privacy preserving Machine learning},\nauthor={Milad Nasr and Saeed Mahloujifar and Xinyu Tang and Prateek Mittal and Amir Houmansadr},\nyear={2023},\nurl={https://openreview.net/forum?id=5R96mIU85IW}\n}", "github": "", "project": "", "reviewers": "uKD4;Y2rR;d916;UAzg", "site": "https://openreview.net/forum?id=5R96mIU85IW", "pdf_size": 374995, "recommendation": "5;6;6;6", "confidence": "3;4;2;4", "correctness": "2;3;3;4", "technical_novelty": "3;3;3;4", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "122;60;60;43", "wc_strength_and_weaknesses": "23;208;66;135", "wc_clarity_quality_novelty_and_reproducibility": "417;28;9;10", "wc_summary_review": "49;17;12;38", "wc_review": "611;313;147;226", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "364;353;15;106", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 71.25, 30.11125204969066 ], "wc_strength_and_weaknesses_avg": [ 108.0, 70.21039809031139 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 116.0, 173.9468309570485 ], "wc_summary_review_avg": [ 29.0, 15.116216457830975 ], "wc_review_avg": [ 324.25, 175.65787058939318 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 209.5, 152.48360567615129 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.17407765595569782, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1770399441580219333&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "Google;Princeton University;University of Massachusetts Amherst", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.princeton.edu;https://www.umass.edu", "aff_unique_abbr": "Google;Princeton;UMass Amherst", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Mountain View;;Amherst", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "5RSq86IM6mE", "title": "Shifts 2.0: Extending The Dataset of Real Distributional Shifts", "track": "main", "status": "Reject", "tldr": "We introduce two new datasets into the Shifts Benchmark for assessing robustness and uncertainty - Multiple Sclerosis Lesion Segmentation in MRI images and Cargo Vessel Power Consumption Prediction", "abstract": "Distributional shift, or the mismatch between training and deployment data, is a significant obstacle to the usage of machine learning in high-stakes industrial applications, such as autonomous driving and medicine. This creates a need to be able to assess how robustly ML models generalize as well as the quality of their uncertainty estimates. Standard ML datasets do not allow these properties to be assessed, as the training, validation and test data are often identically distributed. Recently, a range of dedicated benchmarks have appeared, featuring both distributionally matched and shifted data. The Shifts dataset stands out in terms of the diversity of tasks and data modalities it features. Unlike most benchmarks, which are dominated by 2D image data, Shifts contains tabular weather forecasting, machine translation, and vehicle motion prediction tasks. This enables models to be assessed on a diverse set of industrial-scale tasks and either universal or directly applicable task-specific conclusions to be reached. In this paper, we extend the Shifts Dataset with two datasets sourced from industrial, high-risk applications of high societal importance. Specifically, we consider the tasks of segmentation of white matter Multiple Sclerosis lesions in 3D magnetic resonance brain images and the estimation of power consumption in marine cargo vessels. Both tasks feature ubiquitous distributional shifts and strict safety requirements due to the high cost of errors. These new datasets will allow researchers to explore robust generalization and uncertainty estimation in new situations. This work provides a description of the dataset and baseline results for both tasks.", "keywords": "Distributional Shift;Uncertainty Estimation;Benchmark;MRI 3D segmentation;medical data;industrial tabular data", "primary_area": "", "supplementary_material": "/attachment/afc1ca7b39fb7dfa91990a70bd889a49aa0e8108.zip", "author": "Andrey Malinin;andreas athanasopoulos;Muhamed Barakovic;Meritxell Bach Cuadra;Mark Gales;Cristina Granziera;Mara Graziani;Nikolay Kartashev;Konstantinos Kyriakopoulos;Po-Jui Lu;Nataliia Molchanova;Antonis Nikitakis;Vatsal Raina;Francesco La Rosa;Eli Sivena;Vasileios Tsarsitalidis;Efi Tsompopoulou;Elena Volf", "authorids": "~Andrey_Malinin1;~andreas_athanasopoulos1;~Muhamed_Barakovic1;~Meritxell_Bach_Cuadra1;~Mark_Gales1;~Cristina_Granziera1;~Mara_Graziani1;~Nikolay_Kartashev1;~Konstantinos_Kyriakopoulos2;~Po-Jui_Lu1;~Nataliia_Molchanova1;~Antonis_Nikitakis1;~Vatsal_Raina1;~Francesco_La_Rosa1;e.sivena@deepsea.ai;v.tsarsitalidis@deepsea.ai;~Efi_Tsompopoulou1;~Elena_Volf1", "gender": "M;M;;F;M;F;F;Not Specified;M;;F;M;M;M;;;;F", "homepage": ";;;https://unil.ch/mial/home.html;http://mi.eng.cam.ac.uk/~mjfg/index.html;https://dbe.unibas.ch/en/research/imaging-modelling-diagnosis/translational-imaging-in-neurology-think-basel-group/;https://maragraziani.com/;;;;;;;;;;;https://www.hse.ru/org/persons/499313191", "dblp": "174/5705;351/9383.html;;;74/4419.html;;172/1073.html;306/1426;;;;;;42/6863;;;;", "google_scholar": ";https://scholar.google.co.uk/citations?user=NInCblkAAAAJ;q7Bc76kAAAAJ;https://scholar.google.ch/citations?user=UoZ4neoAAAAJ;https://scholar.google.co.uk/citations?hl=en;;https://scholar.google.ch/citations?user=aSOhI_8AAAAJ;;v3GAEXYAAAAJ;;;;Hom8UAYAAAAJ;OEE8Ze0AAAAJ;;;;", "orcid": ";0009-0000-6807-5939;0000-0001-8557-9223;0000-0003-2730-4285;;;0000-0003-3456-945X;;;;;;0000-0002-3422-6513;0000-0002-9224-4664;;;;", "linkedin": ";andreas-athanasopoulos-950636153/;muhamedb/;;;;mara-graziani-878980105/?originalSubdomain=ch;;;;nataliia-molchanova-699b201b8/;antonis-nikitakis-62544120/;;;;;;", "or_profile": "~Andrey_Malinin1;~andreas_athanasopoulos1;~Muhamed_Barakovic1;~Meritxell_Bach_Cuadra1;~Mark_Gales1;~Cristina_Granziera1;~Mara_Graziani1;~Nikolay_Kartashev1;~Konstantinos_Kyriakopoulos2;~Po-Jui_Lu1;~Nataliia_Molchanova1;~Antonis_Nikitakis1;~Vatsal_Raina1;~Francesco_La_Rosa1;e.sivena@deepsea.ai;v.tsarsitalidis@deepsea.ai;~Efi_Tsompopoulou1;~Elena_Volf1", "aff": "Yandex;Universit\u00e9 de Neuch\u00e2tel;;University of Lausanne;University of Cambridge;University of Basel;University of Applied Sciences Western Switzerland, Sierre (HES-SO Valais);Yandex;;;CHUV - University Hospital Lausanne;;University of Cambridge;Icahn School of Medicine at Mount Sinai;;;;Yandex", "aff_domain": "yandex.ru;unine.ch;;unil.ch;cam.ac.uk;unibas.ch;hevs.ch;yandex-team.ru;;;chuv.ch;;cam.ac.uk;mssm.edu;;;;yandex.ru", "position": "Principal Researcher;PhD student;;Assistant Professor;Full Professor;Assistant Professor;Postdoc;Intern;;;PhD student;;PhD student;Postdoc;;;;Researcher", "bibtex": "@misc{\nmalinin2023shifts,\ntitle={Shifts 2.0: Extending The Dataset of Real Distributional Shifts},\nauthor={Andrey Malinin and andreas athanasopoulos and Muhamed Barakovic and Meritxell Bach Cuadra and Mark Gales and Cristina Granziera and Mara Graziani and Nikolay Kartashev and Konstantinos Kyriakopoulos and Po-Jui Lu and Nataliia Molchanova and Antonis Nikitakis and Vatsal Raina and Francesco La Rosa and Eli Sivena and Vasileios Tsarsitalidis and Efi Tsompopoulou and Elena Volf},\nyear={2023},\nurl={https://openreview.net/forum?id=5RSq86IM6mE}\n}", "github": "", "project": "", "reviewers": "1V51;AFoe;cD8F", "site": "https://openreview.net/forum?id=5RSq86IM6mE", "pdf_size": 510924, "recommendation": "5;6;6", "confidence": "3;3;3", "correctness": "3;4;4", "technical_novelty": "2;2;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "41;43;63", "wc_strength_and_weaknesses": "262;284;192", "wc_clarity_quality_novelty_and_reproducibility": "36;36;45", "wc_summary_review": "27;67;42", "wc_review": "366;430;342", "wc_reply_reviewers": "301;50;49", "wc_reply_authors": "1219;819;354", "reply_reviewers": "2;2;1", "reply_authors": "3;2;1", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 49.0, 9.93310961716756 ], "wc_strength_and_weaknesses_avg": [ 246.0, 39.22584182228174 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.0, 4.242640687119285 ], "wc_summary_review_avg": [ 45.333333333333336, 16.49915822768611 ], "wc_review_avg": [ 379.3333333333333, 37.142368739157654 ], "wc_reply_reviewers_avg": [ 133.33333333333334, 118.55893986630542 ], "wc_reply_authors_avg": [ 797.3333333333334, 353.46695586559275 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 18, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9999999999999997, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16218934775968431774&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3;4;5;0;6;3;7;0", "aff_unique_norm": "Yandex;Universit\u00e9 de Neuch\u00e2tel;University of Lausanne;University of Cambridge;University of Basel;University of Applied Sciences Western Switzerland;University Hospital Lausanne;Icahn School of Medicine at Mount Sinai", "aff_unique_dep": ";;;;;;;School of Medicine", "aff_unique_url": "https://yandex.com;https://www.unine.ch;https://www.unil.ch;https://www.cam.ac.uk;https://www.unibas.ch;https://www.hes-so.ch;https://www.chuv.ch;https://icahn.mssm.edu", "aff_unique_abbr": "Yandex;UNINE;UNIL;Cambridge;UniBas;HES-SO;CHUV;ISMMS", "aff_campus_unique_index": "1;2;3;1;4", "aff_campus_unique": ";Cambridge;Sierre;Lausanne;New York", "aff_country_unique_index": "0;1;1;2;1;1;0;1;2;3;0", "aff_country_unique": "Russian Federation;Switzerland;United Kingdom;United States" }, { "id": "5RxmkAFVs_V", "title": "Progressive Image Synthesis from Semantics to Details with Denoising Diffusion GAN", "track": "main", "status": "Withdraw", "tldr": "We propose a novel progressive method for image synthesis from semantics to details with diffusion denoising GAN.", "abstract": "Image generation has been dominated by generative adversarial Networks (GANs) due to its superior ability to generate realistic images. Recently, by decomposing the image generation process into a sequence of denoising steps, denoising diffusion probabilistic models (DDPMs) have shown remarkable sample quality and diversity in image generation. However, DDPMs typically face two main challenges (but GANs do not): the time-expensive sampling process and the semantically meaningless latent space. Although these two challenges start to draw attention in recent works on DDPMs, they are often addressed separately. In this paper, by interpreting the sampling process of DDPMs in a new way with a special noise scheduler, we propose a novel progressive training pipeline to address these two challenges simultaneously. Concretely, we choose to decompose the sampling process into two stages: generating semantics firstly and then refining details progressively. As a result, we are able to interpret the sampling process of DDPMs as a refinement process instead of a denoising process, when the DDPMs try to predict the real images at each time step. Motivated by such new interpretation, we present a novel training pipeline that progressively transforms the attention from semantics to sample quality during training. Extensive results on two benchmarks show that our proposed diffusion model achieves competitive results with as few as two sampling steps on unconditional image generation. Importantly, the latent space of our diffusion model is shown to be semantically meaningful, which can be exploited on various downstream tasks (e.g., attribute manipulation).", "keywords": "Image generation;GANs;diffusion model;progressive generation", "primary_area": "", "supplementary_material": "", "author": "Guoxing Yang;Haoyu Lu;Guang Zhou;Haoran Wu;Zhiwu Lu", "authorids": "~Guoxing_Yang3;~Haoyu_Lu1;~Guang_Zhou2;~Haoran_Wu6;~Zhiwu_Lu1", "gender": ";M;M;M;M", "homepage": "https://haoyulu1998.github.io/;https://www.linkedin.com/in/%E5%B9%BF-%E5%91%A8-b4b204251/;;https://gsai.ruc.edu.cn/luzhiwu;https://github.com/GuoxingY", "dblp": "240/2720;;;53/5234;271/9521", "google_scholar": "https://scholar.google.com.hk/citations?view_op=list_works;;;OUXS8doAAAAJ;", "orcid": ";;;;", "linkedin": "%E6%B5%A9%E5%AE%87-%E5%8D%A2-4b42b7198/;;https://www.linkedin.cn/incareer/in/%E6%B5%A9%E7%84%B6-%E5%90%B4-b807a0164;;", "or_profile": "~Haoyu_Lu1;~Guang_Zhou2;~Haoran_Wu6;~Zhiwu_Lu1;~GuoXing_Yang2", "aff": "Renmin University of China;;China Unicom Research Institute ;Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;;chinaunicom.cn;ruc.edu.cn;ruc.edu.cn", "position": "PhD student;;Researcher;Full Professor;PhD student", "bibtex": "@misc{\nyang2023progressive,\ntitle={Progressive Image Synthesis from Semantics to Details with Denoising Diffusion {GAN}},\nauthor={Guoxing Yang and Haoyu Lu and Guang Zhou and Haoran Wu and Zhiwu Lu},\nyear={2023},\nurl={https://openreview.net/forum?id=5RxmkAFVs_V}\n}", "github": "", "project": "", "reviewers": "Zhdn;F8Xn;mcz1;gX4Y", "site": "https://openreview.net/forum?id=5RxmkAFVs_V", "pdf_size": 3318365, "recommendation": "3;3;5;5", "confidence": "4;5;3;5", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;1;3;3", "wc_summary_paper": "84;61;64;83", "wc_strength_and_weaknesses": "357;275;27;222", "wc_clarity_quality_novelty_and_reproducibility": "19;13;67;34", "wc_summary_review": "117;3;37;39", "wc_review": "577;352;195;378", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 73.0, 10.559356040971437 ], "wc_strength_and_weaknesses_avg": [ 220.25, 121.49768516313387 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.25, 20.932928605429293 ], "wc_summary_review_avg": [ 49.0, 41.78516483155236 ], "wc_review_avg": [ 375.5, 135.77647071565823 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.30151134457776363, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:t0ZmV0wn5EQJ:scholar.google.com/&scioq=Progressive+Image+Synthesis+from+Semantics+to+Details+with+Denoising+Diffusion+GAN&hl=en&as_sdt=0,5", "gs_version_total": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Renmin University of China;China Unicom Research Institute", "aff_unique_dep": ";", "aff_unique_url": "http://www.ruc.edu.cn;https://www.chinaunicom.com.cn/en-US/ResearchInstitute", "aff_unique_abbr": "RUC;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "5T80c_5NSbV", "title": "A sparse, fast, and stable representation for multiparameter topological data analysis", "track": "main", "status": "Reject", "tldr": "In this article, we provide a general framework for representing multiparameter persistent homology with stability guarantees.", "abstract": "Topological data analysis (TDA) is a new area of geometric data analysis that focuses on using invariants from algebraic topology to provide multiscale shape descriptors for point clouds. One of the most important shape descriptors is persistent homology, which studies the topological variations as a filtration parameter changes; a typical parameter is the feature scale.\n\nFor many data sets, it is useful to consider varying multiple filtration parameters at once, for example scale and density. While the theoretical properties of one-parameter persistent homology are well understood, less is known about the multiparameter case. Of particular interest is the problem of representing multiparameter persistent homology by elements of a vector space for integration with traditional machine learning.\n\nExisting approaches to this problem either ignore most of the multiparameter information to reduce to the one-parameter case or are heuristic and potentially unstable in the face of noise. In this article, we introduce a general representation framework for multiparameter persistent homology that encompasses previous approaches. We establish theoretical stability guarantees under this framework as well as efficient algorithms for practical computation, making this framework an applicable and versatile tool for TDA practitioners. We validate our stability results and algorithms with numerical experiments that demonstrate statistical convergence, prediction accuracy, and fast running times on several real data sets. ", "keywords": "Topological Data Analysis;Algebraic Topology;Persistent Homology;Kernel Methods", "primary_area": "", "supplementary_material": "/attachment/deeac0b2fa989960a92907a3472afc41ea934cdf.zip", "author": "David Loiseaux;Mathieu Carri\u00e8re;Andrew Blumberg", "authorids": "david.loiseaux@inria.fr;~Mathieu_Carri\u00e8re1;~Andrew_Blumberg1", "gender": ";;", "homepage": ";https://mathieucarriere.github.io/website/;", "dblp": ";167/1015;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "david.loiseaux@inria.fr;~Mathieu_Carri\u00e8re1;~Andrew_Blumberg1", "aff": ";INRIA;", "aff_domain": ";inria.fr;", "position": ";Researcher;", "bibtex": "@misc{\nloiseaux2023a,\ntitle={A sparse, fast, and stable representation for multiparameter topological data analysis},\nauthor={David Loiseaux and Mathieu Carri{\\`e}re and Andrew Blumberg},\nyear={2023},\nurl={https://openreview.net/forum?id=5T80c_5NSbV}\n}", "github": "", "project": "", "reviewers": "Bro2;a971;su2T;WBCA", "site": "https://openreview.net/forum?id=5T80c_5NSbV", "pdf_size": 2309977, "recommendation": "5;5;6;6", "confidence": "3;4;2;1", "correctness": "3;3;4;3", "technical_novelty": "2;4;3;3", "empirical_novelty": "2;4;3;0", "wc_summary_paper": "45;58;37;43", "wc_strength_and_weaknesses": "362;436;113;67", "wc_clarity_quality_novelty_and_reproducibility": "269;31;146;38", "wc_summary_review": "153;43;63;15", "wc_review": "829;568;359;163", "wc_reply_reviewers": "679;0;4;0", "wc_reply_authors": "685;0;169;41", "reply_reviewers": "1;0;1;0", "reply_authors": "1;0;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 1.479019945774904 ], "wc_summary_paper_avg": [ 45.75, 7.660776723022281 ], "wc_strength_and_weaknesses_avg": [ 244.5, 157.5412644357027 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 121.0, 96.84781876738371 ], "wc_summary_review_avg": [ 68.5, 51.679299530856646 ], "wc_review_avg": [ 479.75, 247.32304280030198 ], "wc_reply_reviewers_avg": [ 170.75, 293.4428181094232 ], "wc_reply_authors_avg": [ 223.75, 273.50079981601516 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 0.75, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.8944271909999159, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:H9xNxhiyHH4J:scholar.google.com/&scioq=A+sparse,+fast,+and+stable+representation+for+multiparameter+topological+data+analysis&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "id": "5U3xzYJoThy", "title": "A New Path: Scaling Vision-and-Language Navigation with Synthetic Instructions and Imitation Learning", "track": "main", "status": "Withdraw", "tldr": "A new path to improving instruction following agents using pure imitation learning (no RL) and large scale in-domain data augmentation ", "abstract": "Recent studies in Vision-and-Language Navigation (VLN) train RL agents to execute natural-language navigation instructions in photorealistic environments, as a step towards intelligent agents or robots that can follow human instructions. However, given the scarcity of human instruction data and limited diversity in the training environments, these agents still struggle with complex language grounding and spatial language understanding. Pretraining on large text and image-text datasets from the web has been extensively explored but the improvements are limited. To address the scarcity of in-domain instruction data, we investigate large-scale augmentation with synthetic instructions. We take 500+ indoor environments captured in densely-sampled 360\u25e6 panoramas, construct navigation trajectories through these panoramas, and generate a visually-grounded instruction for each trajectory using Marky (Wang et al., 2022), a high-quality multilingual navigation instruction generator. To further increase the variability of the trajectories, we also synthesize image observations from novel viewpoints using an image-to-image GAN. The resulting dataset of 4.2M instruction-trajectory pairs is two orders of magnitude larger than existing human-annotated datasets, and contains a wider variety of environments and viewpoints. To efficiently leverage data at this scale, we train a transformer agent with imitation learning for over 700M steps of experience. On the challenging Room-across-Room dataset, our approach outperforms all existing RL agents, improving the state-of-the-art NDTW from 71.1 to 79.1 in seen environments, and from 64.6 to 66.8 in unseen test environments. Self-supervision with synthetic instructions in new environments can improve further to 68.6 (vs. human 79.5). Our work points to a new path to improving instruction-following agents, emphasizing large-scale imitation learning and the development of synthetic instruction generation capabilities \u2013 which are shown to flow through directly to improved instruction-following performance.", "keywords": "vision and language navigation", "primary_area": "", "supplementary_material": "", "author": "Aishwarya Kamath;Peter Anderson;Su Wang;Jing Yu Koh;Alexander Ku;Austin Waters;Yinfei Yang;Jason Michael Baldridge;Zarana Parekh", "authorids": "~Aishwarya_Kamath1;~Peter_Anderson1;~Su_Wang4;~Jing_Yu_Koh2;~Alexander_Ku1;~Austin_Waters1;~Yinfei_Yang1;~Jason_Michael_Baldridge1;~Zarana_Parekh1", "gender": "F;M;;;M;;;M;", "homepage": ";http://www.panderson.me/;https://jacobsuwang.github.io/;;https://alexyku.github.io/;;;https://research.google/people/jasonbaldridge/?&type=google;", "dblp": "220/2018;88/3792-1;37/5976-1;;215/4289.html;;117/4082;90/6617;", "google_scholar": "WaW2C0UAAAAJ;r5mA7Q8AAAAJ;bJZV7r4AAAAJ;;Lh_ZqdcAAAAJ;;kvDbu90AAAAJ;TP_JZm8AAAAJ;", "orcid": ";;;;;;;;", "linkedin": "aishkamath;panderson80;;;;;;jason-baldridge-9b26295/;", "or_profile": "~Aishwarya_Kamath1;~Peter_Anderson1;~Su_Wang4;~Jing_Yu_Koh2;~Alexander_Ku1;~Austin_Waters1;~Yinfei_Yang1;~Jason_Michael_Baldridge1;~Zarana_Parekh1", "aff": "New York University;Google;Google;;Google;;Apple;Google;", "aff_domain": "nyu.edu;google.com;google.com;;google.com;;apple.com;google.com;", "position": "PhD student;Research Scientist;Researcher;;Researcher;;Researcher;Research Scientist;", "bibtex": "@misc{\nkamath2023a,\ntitle={A New Path: Scaling Vision-and-Language Navigation with Synthetic Instructions and Imitation Learning},\nauthor={Aishwarya Kamath and Peter Anderson and Su Wang and Jing Yu Koh and Alexander Ku and Austin Waters and Yinfei Yang and Jason Michael Baldridge and Zarana Parekh},\nyear={2023},\nurl={https://openreview.net/forum?id=5U3xzYJoThy}\n}", "github": "", "project": "", "reviewers": "ao3J;9cD2;bXfr;Na5P", "site": "https://openreview.net/forum?id=5U3xzYJoThy", "pdf_size": 3055621, "recommendation": "5;5;5;8", "confidence": "3;4;4;5", "correctness": "3;4;2;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "3;1;3;4", "wc_summary_paper": "99;106;54;155", "wc_strength_and_weaknesses": "262;215;75;277", "wc_clarity_quality_novelty_and_reproducibility": "121;65;65;132", "wc_summary_review": "83;54;206;71", "wc_review": "565;440;400;635", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "112;312;486;285", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 103.5, 35.80851853958776 ], "wc_strength_and_weaknesses_avg": [ 207.25, 79.70688489710284 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 95.75, 30.994959267597046 ], "wc_summary_review_avg": [ 103.5, 60.068710657046736 ], "wc_review_avg": [ 510.0, 94.40603794249603 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 298.75, 132.5733287656307 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.816496580927726, "corr_recommendation_correctness": 0.0, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12473981189594605407&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff_unique_index": "0;1;1;1;2;1", "aff_unique_norm": "New York University;Google;Apple", "aff_unique_dep": ";Google;Apple Inc.", "aff_unique_url": "https://www.nyu.edu;https://www.google.com;https://www.apple.com", "aff_unique_abbr": "NYU;Google;Apple", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "LMC: Fast Training of GNNs via Subgraph Sampling with Provable Convergence", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11812", "id": "5VBBA91N6n", "poster": "", "openreview": "https://openreview.net/forum?id=5VBBA91N6n", "slides": "https://iclr.cc/virtual/2023/poster/11812", "video": "https://iclr.cc/virtual/2023/poster/11812", "author_site": "Zhihao Shi, Xize Liang, Jie Wang", "tldr": "We propose a novel and efficient subgraph-wise sampling method with a convergence guarantee by Local Message Compensation (LMC).", "abstract": "The message passing-based graph neural networks (GNNs) have achieved great success in many real-world applications.\nHowever, training GNNs on large-scale graphs suffers from the well-known neighbor explosion problem, i.e., the exponentially increasing dependencies of nodes with the number of message passing layers. Subgraph-wise sampling methods---a promising class of mini-batch training techniques---discard messages outside the mini-batches in backward passes to avoid the neighbor explosion problem at the expense of gradient estimation accuracy. This poses significant challenges to their convergence analysis and convergence speeds, which seriously limits their reliable real-world applications. To address this challenge, we propose a novel subgraph-wise sampling method with a convergence guarantee, namely Local Message Compensation (LMC). To the best of our knowledge, LMC is the {\\it first} subgraph-wise sampling method with provable convergence. The key idea of LMC is to retrieve the discarded messages in backward passes based on a message passing formulation of backward passes. By efficient and effective compensations for the discarded messages in both forward and backward passes, LMC computes accurate mini-batch gradients and thus accelerates convergence. We further show that LMC converges to first-order stationary points of GNNs. Experiments on large-scale benchmark tasks demonstrate that LMC significantly outperforms state-of-the-art subgraph-wise sampling methods in terms of efficiency.", "keywords": "Graph Nerual Networks;Scalable Training;Provable Convergence;Local Message Compensation", "primary_area": "", "supplementary_material": "", "author": "Zhihao Shi;Xize Liang;Jie Wang", "authorids": "~Zhihao_Shi3;~Xize_Liang1;~Jie_Wang1", "gender": "M;;M", "homepage": "https://miralab.ai/people/zhihao-shi/;;http://staff.ustc.edu.cn/~jwangx", "dblp": ";;29/5259-5", "google_scholar": "https://scholar.google.com.hk/citations?user=u2Ffj60AAAAJ;;OugG4dUAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zhihao_Shi3;~Xize_Liang1;~Jie_Wang1", "aff": "University of Science and Technology of China;;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;;ustc.edu.cn", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nshi2023lmc,\ntitle={{LMC}: Fast Training of {GNN}s via Subgraph Sampling with Provable Convergence},\nauthor={Zhihao Shi and Xize Liang and Jie Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5VBBA91N6n}\n}", "github": "", "project": "", "reviewers": "LGPo;7N9L;p4iX;DdvT", "pdf_size": 1416039, "recommendation": "6;8;8;8", "confidence": "4;3;3;3", "correctness": "3;4;4;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "76;87;57;54", "wc_strength_and_weaknesses": "203;113;215;134", "wc_clarity_quality_novelty_and_reproducibility": "9;18;40;27", "wc_summary_review": "35;58;35;17", "wc_review": "323;276;347;232", "wc_reply_reviewers": "0;0;0;16", "wc_reply_authors": "1559;767;640;1046", "reply_reviewers": "0;0;0;1", "reply_authors": "4;2;2;4", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 68.5, 13.6106575888162 ], "wc_strength_and_weaknesses_avg": [ 166.25, 43.59687488800086 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 23.5, 11.4564392373896 ], "wc_summary_review_avg": [ 36.25, 14.549484526951462 ], "wc_review_avg": [ 294.5, 44.206899913927465 ], "wc_reply_reviewers_avg": [ 4.0, 6.928203230275509 ], "wc_reply_authors_avg": [ 1003.0, 353.0049574722712 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2851469416963097876&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=5VBBA91N6n", "email": "ustc.edu.cn;;ustc.edu.cn", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "5WOIluv9Xop", "title": "HOW SAMPLING AFFECTS TRAINING: AN EFFECTIVE SAMPLING THEORY STUDY FOR LONG-TAILED IMAGE CLASSIFICATION", "track": "main", "status": "Reject", "tldr": "", "abstract": "The long-tailed image classification problem has been very challenging for a longtime. Suffered from the unbalanced distribution of categories, many deep vision classification methods perform well in the head classes while poor in the tail ones. This paper proposes an effective sampling theory, attempting to provide a theoretical explanation for the decoupling representation and classifier for long-tailed image classification. To apply the above sampling theory in practice, a general jitter sampling strategy is proposed. Experiments show that variety of long-tailed distribution algorithms exhibit better performance based on the effective sampling theory. The code will be released soon later.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Gong Zhang;Yongqiang Gao;Haijing Liu", "authorids": "~Gong_Zhang2;~Yongqiang_Gao1;~Haijing_Liu3", "gender": "M;M;M", "homepage": ";;https://github.com/Liu-Haijing", "dblp": ";99/9999;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Gong_Zhang2;~Yongqiang_Gao1;~Haijing_Liu3", "aff": ";;SUN YAT-SEN UNIVERSITY", "aff_domain": ";;sysu.edu.cn", "position": ";;Undergrad student", "bibtex": "@misc{\nzhang2023how,\ntitle={{HOW} {SAMPLING} {AFFECTS} {TRAINING}: {AN} {EFFECTIVE} {SAMPLING} {THEORY} {STUDY} {FOR} {LONG}-{TAILED} {IMAGE} {CLASSIFICATION}},\nauthor={Gong Zhang and Yongqiang Gao and Haijing Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=5WOIluv9Xop}\n}", "github": "", "project": "", "reviewers": "y4qK;3mbq;xj1Z;vZeb", "site": "https://openreview.net/forum?id=5WOIluv9Xop", "pdf_size": 254727, "recommendation": "1;3;3;3", "confidence": "5;4;5;3", "correctness": "2;3;2;2", "technical_novelty": "2;1;2;2", "empirical_novelty": "2;0;2;2", "wc_summary_paper": "34;72;73;58", "wc_strength_and_weaknesses": "503;398;342;218", "wc_clarity_quality_novelty_and_reproducibility": "10;49;35;57", "wc_summary_review": "10;44;20;71", "wc_review": "557;563;470;404", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 59.25, 15.738090735537142 ], "wc_strength_and_weaknesses_avg": [ 365.25, 102.79925826580657 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.75, 17.851820635442202 ], "wc_summary_review_avg": [ 36.25, 23.562417108607512 ], "wc_review_avg": [ 498.5, 65.81223290544092 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5222329678670935, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_-RU_qdYVWYJ:scholar.google.com/&scioq=HOW+SAMPLING+AFFECTS+TRAINING:+AN+EFFECTIVE+SAMPLING+THEORY+STUDY+FOR+LONG-TAILED+IMAGE+CLASSIFICATION&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Sun Yat-sen University", "aff_unique_dep": "", "aff_unique_url": "http://www.sysu.edu.cn", "aff_unique_abbr": "SYSU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "5XrQ2mskPQz", "title": "Matrix factorization under the constraint of connectivity between observed and source data ~ Muscle synergy analysis based on connectivity between muscle and brain activities ~", "track": "main", "status": "Reject", "tldr": "", "abstract": "Matrix factorization is a popular method to investigate the hidden elements in observed data for tasks such as speech separation and muscle synergy analysis. The hidden elements may be closely related to the source phenomenon that cause the observed phenomenon. \n However, conventional methods do not always factorize the observed phenomenon elements with the connectivity between the observed and source phenomena because they only use the observed phenomenon. This paper proposes a matrix decomposition method that constrains the connectivity between observed and source data by using the representations from a decoding model from source data to observed data. We applied our method to the corticomuscular system, which is made up of corticospinal pathways between the primary motor cortex and muscles in the body and creates muscle synergies that enable efficient connections between the brain and muscles. In this context, muscle activities are the observed phenomenon and brain activities are the source. Many previous studies have analyzed muscle synergies using only observed muscle activity, but there may be unrevealed muscle synergies under the constraint of the connectivity between brain and muscle activity. We therefore simultaneously recorded the brain activity from multiple regions of an extensive cortical area and the activity of multiple muscles of a monkey's forelimb while it performed a reach and grasp task throughout the course of recovery from a partial spinal cord injury (SCI). Analysis from a dataset of the monkey before SCI showed that some of the muscle synergies calculated from the proposed method using brain and muscle activities, did not exhibit a high degree of similarity to synergies obtained from the conventional method. The proposed method results obtained from the monkey after SCI showed an adaptive change in the number of muscle synergies associated with the degree of functional recovery. Specifically, the numbers of muscle synergies obtained by the proposed method initially increased immediately after SCI and then gradually decreased, while those obtained by a conventional method maintained the same number before and after SCI. These results suggest that our method is able to capture the unrevealed connectivity in the corticomuscular system that contributes to functional recovery: in other words, that it can factorize the observed data under the constraint of the connectivity between the observed and source data. Our work thus demonstrates the importance of using not only observed data but also source data to reveal unknown hidden elements.", "keywords": "Matrix factorization;Muscle synergy", "primary_area": "", "supplementary_material": "", "author": "Takashi Isezaki;Michiaki Suzuki;Yukio Koike;Ryosuke Aoki;Yukio Nishimura", "authorids": "~Takashi_Isezaki1;~Michiaki_Suzuki1;~Yukio_Koike1;~Ryosuke_Aoki1;~Yukio_Nishimura1", "gender": "M;M;M;M;M", "homepage": ";;;;https://www.igakuken.or.jp/english/project/detail/neuroprosth.html", "dblp": ";;;;", "google_scholar": ";;6J_TZlwAAAAJ;;", "orcid": "0000-0002-4895-7333;0000-0003-0725-0515;;0000-0003-1064-7046;", "linkedin": ";;;;", "or_profile": "~Takashi_Isezaki1;~Michiaki_Suzuki1;~Yukio_Koike1;~Ryosuke_Aoki1;~Yukio_Nishimura1", "aff": "Nippon Telegraph and Telephone Corporation;Tokyo Metropolitan Institute of Medical Science;NTT Human Informatics Laboratories;;Tokyo Metropolitan Institute of Medical Science", "aff_domain": "ntt.co.jp;igakuken.or.jp;ntt.co.jp;;igakuken.or.jp", "position": "Researcher;Researcher;Researcher;;Principal Researcher", "bibtex": "@misc{\nisezaki2023matrix,\ntitle={Matrix factorization under the constraint of connectivity between observed and source data {\\textasciitilde} Muscle synergy analysis based on connectivity between muscle and brain activities {\\textasciitilde}},\nauthor={Takashi Isezaki and Michiaki Suzuki and Yukio Koike and Ryosuke Aoki and Yukio Nishimura},\nyear={2023},\nurl={https://openreview.net/forum?id=5XrQ2mskPQz}\n}", "github": "", "project": "", "reviewers": "VHFL;89Ck;jF4J", "site": "https://openreview.net/forum?id=5XrQ2mskPQz", "pdf_size": 675417, "recommendation": "3;3;5", "confidence": "3;4;5", "correctness": "2;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "1;2;2", "wc_summary_paper": "186;68;191", "wc_strength_and_weaknesses": "433;252;186", "wc_clarity_quality_novelty_and_reproducibility": "65;106;58", "wc_summary_review": "89;22;81", "wc_review": "773;448;516", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 148.33333333333334, 56.84090858606053 ], "wc_strength_and_weaknesses_avg": [ 290.3333333333333, 104.41689944108133 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 76.33333333333333, 21.171259344267224 ], "wc_summary_review_avg": [ 64.0, 29.87752778706208 ], "wc_review_avg": [ 579.0, 139.95951795668157 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.8660254037844387, "corr_recommendation_correctness": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gHcg3MIMc2QJ:scholar.google.com/&scioq=Matrix+factorization+under+the+constraint+of+connectivity+between+observed+and+source+data+~+Muscle+synergy+analysis+based+on+connectivity+between+muscle+and+brain+activities+~&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Nippon Telegraph and Telephone Corporation;Tokyo Metropolitan Institute of Medical Science;NTT Human Informatics Laboratories", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntt.co.jp;https://www.tokyo-metsci.org;https://www.ntt.co.jp", "aff_unique_abbr": "NTT;;NTT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "id": "5YHaMHg2Bfa", "title": "SGD Through the Lens of Kolmogorov Complexity", "track": "main", "status": "Reject", "tldr": "", "abstract": "We initiate a thorough study of the dynamics of stochastic gradient descent (SGD) under minimal assumptions using the tools of entropy compression. Specifically, we characterize a quantity of interest which we refer to as the \\emph{accuracy discrepancy}. Roughly speaking, this measures the average discrepancy between the model accuracy on batches and large subsets of the entire dataset. We show that if this quantity is sufficiently large, then SGD finds a model which achieves perfect accuracy on the data in $O(1)$ epochs. On the contrary, if the model cannot perfectly fit the data, this quantity must remain below a \\emph{global} threshold, which only depends on the size of the dataset and batch.\n\nWe use the above framework to lower bound the amount of randomness required to allow (non stochastic) gradient descent to escape from local minimas using perturbations. We show that even if the model is \\emph{extremely overparameterized}, at least a linear (in the size of the dataset) number of random bits are required to guarantee that GD escapes local minimas in polynomial time.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/e5266e0d5614d6f432fd1f91dd63312c5c4f79c9.zip", "author": "Gregory Schwartzman", "authorids": "~Gregory_Schwartzman1", "gender": "", "homepage": "https://sites.google.com/view/gregoryschwartzman/", "dblp": "176/5322.html", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Gregory_Schwartzman1", "aff": "Japan Advanced Institute of Science and Technology", "aff_domain": "jaist.ac.jp", "position": "Associate Professor", "bibtex": "@misc{\nschwartzman2023sgd,\ntitle={{SGD} Through the Lens of Kolmogorov Complexity},\nauthor={Gregory Schwartzman},\nyear={2023},\nurl={https://openreview.net/forum?id=5YHaMHg2Bfa}\n}", "github": "", "project": "", "reviewers": "w9AC;T4KT;5jcV;xLLJ;EpmW;GiuD;dQLw", "site": "https://openreview.net/forum?id=5YHaMHg2Bfa", "pdf_size": 731744, "recommendation": "3;5;5;6;6;6;8", "confidence": "4;3;3;3;3;2;4", "correctness": "4;3;3;3;3;4;4", "technical_novelty": "2;3;3;3;4;3;4", "empirical_novelty": "2;0;3;0;0;2;0", "wc_summary_paper": "124;314;67;48;273;92;62", "wc_strength_and_weaknesses": "669;787;418;137;140;128;197", "wc_clarity_quality_novelty_and_reproducibility": "40;255;61;52;19;173;88", "wc_summary_review": "34;380;26;30;19;26;3", "wc_review": "867;1736;572;267;451;419;350", "wc_reply_reviewers": "0;244;167;0;0;0;0", "wc_reply_authors": "830;1293;1046;342;194;191;655", "reply_reviewers": "0;1;1;0;0;0;0", "reply_authors": "1;3;2;1;1;1;1", "recommendation_avg": [ 5.571428571428571, 1.3997084244475302 ], "confidence_avg": [ 3.142857142857143, 0.6388765649999399 ], "correctness_avg": [ 3.4285714285714284, 0.4948716593053935 ], "technical_novelty_avg": [ 3.142857142857143, 0.6388765649999398 ], "empirical_novelty_avg": [ 1.0, 1.1952286093343936 ], "wc_summary_paper_avg": [ 140.0, 100.28673178157004 ], "wc_strength_and_weaknesses_avg": [ 353.7142857142857, 256.1861568055064 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 98.28571428571429, 78.7975965654397 ], "wc_summary_review_avg": [ 74.0, 125.26885372783497 ], "wc_review_avg": [ 666.0, 472.1422303864437 ], "wc_reply_reviewers_avg": [ 58.714285714285715, 95.08900234699561 ], "wc_reply_authors_avg": [ 650.1428571428571, 399.3156390524127 ], "reply_reviewers_avg": [ 0.2857142857142857, 0.45175395145262565 ], "reply_authors_avg": [ 1.4285714285714286, 0.7284313590846835 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": -0.09128709291752751, "corr_recommendation_correctness": 0.05892556509887903, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10073201653423374901&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "Japan Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.jaist.ac.jp", "aff_unique_abbr": "JAIST", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "id": "5Z1rblK1Be5", "title": "A Risk-Averse Equilibrium for Multi-Agent Systems", "track": "main", "status": "Reject", "tldr": "We introduce a novel risk-averse solution concept that allows the learner to accommodate low probability actions by finding the strategy with minimum variance, given any level of expected utility. ", "abstract": "In multi-agent systems, intelligent agents are tasked with making decisions that lead to optimal outcomes when actions of the other agents are as expected, whilst also being prepared for their unexpected behaviour. In this work, we introduce a novel risk-averse solution concept that allows the learner to accommodate low probability actions by finding the strategy with minimum variance, given any level of expected utility. We first prove the existence of such a risk-averse equilibrium, and propose one fictitious-play type learning algorithm for smaller games that enjoys provable convergence guarantees in games classes including zero-sum and potential. Furthermore, we propose an approximation method for larger games based on iterative population-based training that generates a population of risk- averse agents. Empirically, our equilibrium is shown to be able to reduce the utility variance, specifically in the sense that other agents\u2019 low probability behaviour is better accounted for by our equilibrium in comparison to playing other solutions. Importantly, we show that our population of agents that approximate a risk-averse equilibrium is particularly effective against unseen opposing populations, especially in the case of guaranteeing a minimum level of performance, which is critical to safety-aware multi-agent systems.", "keywords": "game theory;safe game theory;risk averse game theory;safe equilibrium;population learning;game theory equilibrium", "primary_area": "", "supplementary_material": "", "author": "Oliver Slumbers;David Henry Mguni;Stephen Marcus McAleer;Jun Wang;Yaodong Yang", "authorids": "~Oliver_Slumbers1;~David_Henry_Mguni1;~Stephen_Marcus_McAleer1;~Jun_Wang2;~Yaodong_Yang1", "gender": ";M;M;M;M", "homepage": ";;https://www.andrew.cmu.edu/user/smcaleer/;http://www0.cs.ucl.ac.uk/staff/jun.wang/;https://www.yangyaodong.com", "dblp": "285/5044;217/2369;;w/JunWang12;170/1496-1", "google_scholar": "obYGSVIAAAAJ;K-_yzBsAAAAJ;iEFL4-YAAAAJ;https://scholar.google.co.uk/citations?user=wIE1tY4AAAAJ;https://scholar.google.co.uk/citations?user=6yL0xw8AAAAJ", "orcid": ";;;;0000-0001-8132-5613", "linkedin": ";;stephen-mcaleer/;;yaodong-yang", "or_profile": "~Oliver_Slumbers1;~David_Henry_Mguni1;~Stephen_Marcus_McAleer1;~Jun_Wang2;~Yaodong_Yang1", "aff": "University College London;Queen Mary University, London;Carnegie Mellon University;University College London;Peking University", "aff_domain": "ucl.ac.uk;qmul.ac.uk;cmu.edu;ucl.ac.uk;pku.edu.cn", "position": "PhD student;Lecturer;Postdoc;Professor;Assistant Professor", "bibtex": "@misc{\nslumbers2023a,\ntitle={A Risk-Averse Equilibrium for Multi-Agent Systems},\nauthor={Oliver Slumbers and David Henry Mguni and Stephen Marcus McAleer and Jun Wang and Yaodong Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=5Z1rblK1Be5}\n}", "github": "", "project": "", "reviewers": "UpJk;WyXC;1b8K;PRwZ", "site": "https://openreview.net/forum?id=5Z1rblK1Be5", "pdf_size": 2496190, "recommendation": "3;3;5;6", "confidence": "2;4;3;3", "correctness": "2;1;4;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "33;306;53;140", "wc_strength_and_weaknesses": "740;74;224;143", "wc_clarity_quality_novelty_and_reproducibility": "41;24;90;152", "wc_summary_review": "36;28;72;74", "wc_review": "850;432;439;509", "wc_reply_reviewers": "359;247;0;0", "wc_reply_authors": "653;312;146;404", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 1.118033988749895 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 133.0, 107.67776000641915 ], "wc_strength_and_weaknesses_avg": [ 295.25, 262.20733685387216 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 76.75, 49.746231013012434 ], "wc_summary_review_avg": [ 52.5, 20.706279240848655 ], "wc_review_avg": [ 557.5, 171.5378966875833 ], "wc_reply_reviewers_avg": [ 151.5, 156.5894313164206 ], "wc_reply_authors_avg": [ 378.75, 183.35672199295013 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.7745966692414834, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:w8s66LSAmcUJ:scholar.google.com/&scioq=A+Risk-Averse+Equilibrium+for+Multi-Agent+Systems&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "University College London;Queen Mary University of London;Carnegie Mellon University;Peking University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ucl.ac.uk;https://www.qmul.ac.uk;https://www.cmu.edu;http://www.pku.edu.cn", "aff_unique_abbr": "UCL;QMUL;CMU;Peking U", "aff_campus_unique_index": "1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;0;1;0;2", "aff_country_unique": "United Kingdom;United States;China" }, { "id": "5ZLWi--i57", "title": "BQ-NCO: Bisimulation Quotienting for Generalizable Neural Combinatorial Optimization", "track": "main", "status": "Reject", "tldr": "A generic formulation of Combinatorial Optimization problems as MDP, and pre-processing steps to improve it, with experiments on routing problems", "abstract": "Despite the success of Neural Combinatorial Optimization methods for end-to-end heuristic learning, out-of-distribution generalization remains a challenge. \nIn this paper, we present a novel formulation of combinatorial optimization (CO) problems as Markov Decision Processes (MDPs) that effectively leverages symmetries of the CO problems to improve out-of-distribution robustness. \nStarting from the standard MDP formulation of constructive heuristics, we introduce a generic transformation based on bisimulation quotienting (BQ) in MDPs. \nThis transformation allows to reduce the state space by accounting for the intrinsic symmetries of the CO problem and facilitates the MDP solving.\nWe illustrate our approach on the Traveling Salesman and Capacitated Vehicle Routing Problems. We present a BQ reformulation of these problems and introduce a simple attention-based policy network that we train by imitation of (near) optimal solutions for small instances from a single distribution. \nWe obtain new state-of-the-art generalization results for instances with up to 1000 nodes from synthetic and realistic benchmarks that vary both in size and node distributions.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/66e8dbe447ed3ac2961f89ed9826025672465ea4.zip", "author": "Darko Drakulic;Sofia Michel;Florian Mai;Arnaud Sors;Jean-Marc Andreoli", "authorids": "~Darko_Drakulic1;~Sofia_Michel1;~Florian_Mai1;~Arnaud_Sors1;~Jean-Marc_Andreoli2", "gender": "M;;Non-Binary;M;M", "homepage": ";https://europe.naverlabs.com/people_user/sofia-michel/;;;https://europe.naverlabs.com/people_user/jean-marc-andreoli/", "dblp": "121/2070.html;139/2626;200/7899;217/2664;89/4299.html", "google_scholar": "B3-rbrcAAAAJ;;MfETM20AAAAJ;https://scholar.google.com/scholar?hl=fr;shjlrvEAAAAJ", "orcid": ";;;;", "linkedin": ";;;;https://fr.linkedin.com/public-profile/in/jean-marc-andreoli-ab80332?trk=people-guest_people_search-card&challengeId=AQHatWUwZW-a2gAAAXcKvbGBUWRIqIBbgOTe62Bfj7_NtUbKi3T53z487fBcrsbq3TFmiI6_Wr_s3zVpKXEDMSNYJvfWn3nRxQ&submissionId=4e430cf4-a4af-5a16-cb22-39677341584d", "or_profile": "~Darko_Drakulic1;~Sofia_Michel1;~Florian_Mai1;~Arnaud_Sors1;~Jean-Marc_Andreoli2", "aff": "Naver Labs Europe;Naver Labs Europe;Idiap Research Institute;Naver Labs Europe;Naver Labs Europe", "aff_domain": "naverlabs.com;naverlabs.com;idiap.ch;naverlabs.com;naverlabs.com", "position": "Researcher;Researcher;PhD student;Researcher;Researcher", "bibtex": "@misc{\ndrakulic2023bqnco,\ntitle={{BQ}-{NCO}: Bisimulation Quotienting for Generalizable Neural Combinatorial Optimization},\nauthor={Darko Drakulic and Sofia Michel and Florian Mai and Arnaud Sors and Jean-Marc Andreoli},\nyear={2023},\nurl={https://openreview.net/forum?id=5ZLWi--i57}\n}", "github": "", "project": "", "reviewers": "yAnE;Sx6V;kF44;B3kw", "site": "https://openreview.net/forum?id=5ZLWi--i57", "pdf_size": 1702166, "recommendation": "3;5;5;8", "confidence": "4;3;4;2", "correctness": "3;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "53;66;66;112", "wc_strength_and_weaknesses": "299;216;153;84", "wc_clarity_quality_novelty_and_reproducibility": "37;70;25;369", "wc_summary_review": "23;93;26;25", "wc_review": "412;445;270;590", "wc_reply_reviewers": "0;71;0;0", "wc_reply_authors": "783;932;694;1369", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;2", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 74.25, 22.431841208425134 ], "wc_strength_and_weaknesses_avg": [ 188.0, 79.28745171841507 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 125.25, 141.69046368757498 ], "wc_summary_review_avg": [ 41.75, 29.608909132218972 ], "wc_review_avg": [ 429.25, 113.73955996046406 ], "wc_reply_reviewers_avg": [ 17.75, 30.74390183434757 ], "wc_reply_authors_avg": [ 944.5, 259.41713513181816 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8866206949335731, "corr_recommendation_correctness": 0.0, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1961595760386750658&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "NAVER LABS;Idiap Research Institute", "aff_unique_dep": ";", "aff_unique_url": "https://labs.naver.com;https://www.idiap.ch", "aff_unique_abbr": "NLE;Idiap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Unknown;Switzerland" }, { "id": "5ZSBBhiAapV", "title": "On Feature Diversity in Energy-based Models", "track": "main", "status": "Reject", "tldr": "We derive generalization bounds for energy-based models showing that reducing the redundancy of the feature can lead to better generalization", "abstract": "Energy-based learning is a powerful learning paradigm that encapsulates various discriminative and generative approaches. An energy-based model (EBM) is typically formed of inner-model(s) that learn a combination of the different features to generate an energy mapping for each input configuration. In this paper, we focus on the diversity of the produced feature set. We extend the probably approximately correct (PAC) theory of EBMs and analyze the effect of redundancy reduction on the performance of EBMs. We derive generalization bounds for various learning contexts, i.e., regression, classification, and implicit regression, with different energy functions and we show that indeed reducing redundancy of the feature set can consistently decrease the gap between the true and empirical expectation of the energy and boosts the performance of the model. ", "keywords": "energy-based models;redundancy reduction;feature diversity", "primary_area": "", "supplementary_material": "/attachment/e927ffd025ad8567ca4978e50ef45a16e63415ac.zip", "author": "Firas Laakom;Jenni Raitoharju;Alexandros Iosifidis;Moncef Gabbouj", "authorids": "~Firas_Laakom1;~Jenni_Raitoharju1;~Alexandros_Iosifidis3;~Moncef_Gabbouj1", "gender": "M;;;M", "homepage": ";;;https://www.tuni.fi/en/moncef-gabbouj", "dblp": "242/8179;;;08/6597", "google_scholar": "VPWIyx8AAAAJ;;;cHukfSUAAAAJ", "orcid": "0000-0001-7436-5692;;;0000-0002-9788-2323", "linkedin": ";;;moncef-gabbouj-2186282/?originalSubdomain=fi", "or_profile": "~Firas_Laakom1;~Jenni_Raitoharju1;~Alexandros_Iosifidis3;~Moncef_Gabbouj1", "aff": "Tampere University;;;Tampere University", "aff_domain": "tuni.fi;;;tuni.fi", "position": "PhD student;;;Full Professor", "bibtex": "@misc{\nlaakom2023on,\ntitle={On Feature Diversity in Energy-based Models},\nauthor={Firas Laakom and Jenni Raitoharju and Alexandros Iosifidis and Moncef Gabbouj},\nyear={2023},\nurl={https://openreview.net/forum?id=5ZSBBhiAapV}\n}", "github": "", "project": "", "reviewers": "VPpj;mRZM;AdvF;oTpp;FAvo", "site": "https://openreview.net/forum?id=5ZSBBhiAapV", "pdf_size": 846494, "recommendation": "1;5;5;5;5", "confidence": "4;4;2;3;3", "correctness": "3;3;3;2;2", "technical_novelty": "1;2;2;3;2", "empirical_novelty": "1;2;2;3;2", "wc_summary_paper": "51;99;61;174;40", "wc_strength_and_weaknesses": "211;448;257;408;668", "wc_clarity_quality_novelty_and_reproducibility": "52;4;14;108;20", "wc_summary_review": "63;35;24;164;187", "wc_review": "377;586;356;854;915", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;636;796;987", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;1;1;2", "recommendation_avg": [ 4.2, 1.6000000000000003 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "correctness_avg": [ 2.6, 0.4898979485566356 ], "technical_novelty_avg": [ 2.0, 0.6324555320336759 ], "empirical_novelty_avg": [ 2.0, 0.6324555320336759 ], "wc_summary_paper_avg": [ 85.0, 48.73191972413974 ], "wc_strength_and_weaknesses_avg": [ 398.4, 161.46157437607255 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.6, 37.78677017158254 ], "wc_summary_review_avg": [ 94.6, 67.65973691938211 ], "wc_review_avg": [ 617.6, 233.08933909554935 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 483.8, 410.3580875284414 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.8, 0.7483314773547883 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5345224838248487, "corr_recommendation_correctness": -0.40824829046386296, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7411738941659034268&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Tampere University", "aff_unique_dep": "", "aff_unique_url": "https://www.tuni.fi", "aff_unique_abbr": "Tuni", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Finland" }, { "id": "5ZarS9RX5I-", "title": "Enforcing zero-Hessian in meta-learning", "track": "main", "status": "Withdraw", "tldr": "This paper argues linearity in the inner loop is the key gradient-based meta learning, thereby suggests algorithms which exploits this prior.", "abstract": "Gradient-Based Meta Learning (GBML) enables us to get task-specific parameters with few-labeled datapoints in an inner loop. However, it has not yet been discussed how GBML can adapt to a new task within a few optimization steps with a huge learning rate in the inner loop. We find that the gradient does not change from the beginning to the end of the inner loop, meaning that it behaves like a linear model. In this paper, we argue that this characteristic is an essential key to understanding convergence in inner loops with huge learning rates. Also, we show that gradient-based meta learning can be interpreted as metric-based meta learning when we adopt our hypothesis that linearity in the inner loop is the key to operating GBML. To empirically prove and exploit our hypothesis, we propose a regularization-based algorithm called enforcing Linearity in the Inner Loop (LIL) which exploits our observation which can be applied to any baselines that has the form of GBML. LIL proves its potential by showing its boosted performance not only on top of general baselines in various architectures, but also on adverse or Hessian-free baselines. Qualitative experiments are also conducted to explain the performance of LIL.", "keywords": "meta learning;Gradient based meta learning;GBML;kernel gradient descent;metric-based learning;optimization-based meta-learning", "primary_area": "", "supplementary_material": "/attachment/c2341cd4459d13d573adbc0f2630f56ecdaf56e2.zip", "author": "JunHoo Lee;Jayeon Yoo;Nojun Kwak", "authorids": "~JunHoo_Lee1;~Jayeon_Yoo1;~Nojun_Kwak1", "gender": "M;F;M", "homepage": "https://junhoo-lee.com;;http://mipal.snu.ac.kr", "dblp": "376/0719;281/8521;49/2806", "google_scholar": "https://scholar.google.com/citations?hl=ko;JAeV59wAAAAJ;h_8-1M0AAAAJ", "orcid": ";;0000-0002-1792-0327", "linkedin": ";;", "or_profile": "~JunHoo_Lee1;~Jayeon_Yoo1;~Nojun_Kwak1", "aff": "Seoul National University;NAVER;Seoul National University", "aff_domain": "snu.ac.kr;navercorp.com;snu.ac.kr", "position": "PhD student;Intern;Full Professor", "bibtex": "@misc{\nlee2023enforcing,\ntitle={Enforcing zero-Hessian in meta-learning},\nauthor={JunHoo Lee and Jayeon Yoo and Nojun Kwak},\nyear={2023},\nurl={https://openreview.net/forum?id=5ZarS9RX5I-}\n}", "github": "", "project": "", "reviewers": "eHDH;Qg63;4diV;QYKq;Ee1X", "site": "https://openreview.net/forum?id=5ZarS9RX5I-", "pdf_size": 1487842, "recommendation": "1;3;5;5;6", "confidence": "5;5;3;5;4", "correctness": "1;2;3;3;3", "technical_novelty": "2;2;3;3;3", "empirical_novelty": "2;2;2;2;4", "wc_summary_paper": "65;65;50;114;65", "wc_strength_and_weaknesses": "442;247;141;527;488", "wc_clarity_quality_novelty_and_reproducibility": "97;41;93;77;64", "wc_summary_review": "77;10;42;86;23", "wc_review": "681;363;326;804;640", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 4.0, 1.7888543819998317 ], "confidence_avg": [ 4.4, 0.7999999999999999 ], "correctness_avg": [ 2.4, 0.8 ], "technical_novelty_avg": [ 2.6, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.4, 0.8 ], "wc_summary_paper_avg": [ 71.8, 21.88515478583599 ], "wc_strength_and_weaknesses_avg": [ 369.0, 149.21260000415515 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 74.4, 20.431348462595416 ], "wc_summary_review_avg": [ 47.6, 29.62836478781777 ], "wc_review_avg": [ 562.8, 186.6026795091646 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5590169943749475, "corr_recommendation_correctness": 0.9782797401561579, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UZnsnNuwIU8J:scholar.google.com/&scioq=Enforcing+zero-Hessian+in+meta-learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Seoul National University;NAVER Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://www.naver.com", "aff_unique_abbr": "SNU;NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "CLARE: Conservative Model-Based Reward Learning for Offline Inverse Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11961", "id": "5aT4ganOd98", "poster": "/media/PosterPDFs/ICLR%202023/11961.png?t=1680978819.5675998", "openreview": "https://openreview.net/forum?id=5aT4ganOd98", "slides": "https://iclr.cc/virtual/2023/poster/11961", "video": "https://iclr.cc/virtual/2023/poster/11961", "author_site": "Sheng Yue, Guanbo Wang, Wei Shao, Zhaofeng Zhang, Sen Lin, Ju Ren, Junshan Zhang", "tldr": "This paper introduces a principled algorithm to approach the reward extrapolation error in offline inverse reinforcement learning.", "abstract": "This work aims to tackle a major challenge in offline Inverse Reinforcement Learning (IRL), namely the reward extrapolation error, where the learned reward function may fail to explain the task correctly and misguide the agent in unseen environments due to the intrinsic covariate shift. Leveraging both expert data and lower-quality diverse data, we devise a principled algorithm (namely CLARE) that solves offline IRL efficiently via integrating \"conservatism\" into a learned reward function and utilizing an estimated dynamics model. Our theoretical analysis provides an upper bound on the return gap between the learned policy and the expert policy, based on which we characterize the impact of covariate shift by examining subtle two-tier tradeoffs between the exploitation (on both expert and diverse data) and exploration (on the estimated dynamics model). We show that CLARE can provably alleviate the reward extrapolation error by striking the right exploitation-exploration balance therein. Extensive experiments corroborate the significant performance gains of CLARE over existing state-of-the-art algorithms on MuJoCo continuous control tasks (especially with a small offline dataset), and the learned reward is highly instructive for further learning. ", "keywords": "offline inverse reinforcement learning;inverse reinforcement learning;offline reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/858367de00fa1bb99a0bc2a0c07333755daeb79f.zip", "author": "Sheng Yue;Guanbo Wang;Wei Shao;Zhaofeng Zhang;Sen Lin;Ju Ren;Junshan Zhang", "authorids": "~Sheng_Yue1;~Guanbo_Wang3;~Wei_Shao4;~Zhaofeng_Zhang1;~Sen_Lin1;~Ju_Ren1;~Junshan_Zhang1", "gender": "M;;M;;;;M", "homepage": "https://shaunyue.github.io;;https://swsamleo.github.io/wei_shao.github.io//;;https://slin70.github.io/;;https://faculty.engineering.ucdavis.edu/jzhang/", "dblp": "236/3241;294/8693;24/803-6;;70/9499-1.html;;59/1232.html", "google_scholar": "n0Gjw_oAAAAJ;;https://scholar.google.com.au/citations?user=sdthjnoAAAAJ;https://scholar.google.com/citations?hl=en;94-TbUsAAAAJ;;UtAdFs8AAAAJ", "orcid": "0009-0001-3416-8181;0009-0004-7468-5249;0000-0002-9873-8331;;;;", "linkedin": ";;;;;;", "or_profile": "~Sheng_Yue1;~Guanbo_Wang3;~Wei_Shao4;~Zhaofeng_Zhang1;~Sen_Lin1;~Ju_Ren1;~Junshan_Zhang1", "aff": "Tsinghua University;Tsinghua University;University of California, Davis;Arizona State University;Ohio State University, Columbus;;University of California, Davis", "aff_domain": "tsinghua.edu.cn;cs.tsinghua.edu.cn;ucdavis.edu;asu.edu;osu.edu;;ucdavis.edu", "position": "Postdoc;PhD student;Postdoc;PhD student;Postdoc;;Full Professor", "bibtex": "@inproceedings{\nyue2023clare,\ntitle={{CLARE}: Conservative Model-Based Reward Learning for Offline Inverse Reinforcement Learning},\nauthor={Sheng Yue and Guanbo Wang and Wei Shao and Zhaofeng Zhang and Sen Lin and Ju Ren and Junshan Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5aT4ganOd98}\n}", "github": "", "project": "", "reviewers": "5Qn6;Gh9M;CxsR;yF3G", "pdf_size": 1155626, "recommendation": "6;6;8;8", "confidence": "2;3;4;2", "correctness": "4;2;3;3", "technical_novelty": "3;2;4;3", "empirical_novelty": "3;3;4;3", "wc_summary_paper": "52;81;105;109", "wc_strength_and_weaknesses": "124;524;57;239", "wc_clarity_quality_novelty_and_reproducibility": "37;14;250;40", "wc_summary_review": "2;29;64;17", "wc_review": "215;648;476;405", "wc_reply_reviewers": "0;70;32;35", "wc_reply_authors": "213;1657;334;180", "reply_reviewers": "0;1;1;1", "reply_authors": "1;6;2;2", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 86.75, 22.741756748325315 ], "wc_strength_and_weaknesses_avg": [ 236.0, 178.56231405310584 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 85.25, 95.64877155510153 ], "wc_summary_review_avg": [ 28.0, 22.880122377295102 ], "wc_review_avg": [ 436.0, 155.19826029952785 ], "wc_reply_reviewers_avg": [ 34.25, 24.78280654001883 ], "wc_reply_authors_avg": [ 596.0, 615.245885805017 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.920286436967152 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.30151134457776363, "corr_recommendation_correctness": 0.0, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1626666204223323971&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=5aT4ganOd98", "email": "tsinghua.edu.cn;cs.tsinghua.edu.cn;ucdavis.edu;asu.edu;osu.edu;;ucdavis.edu", "author_num": 7, "aff_unique_index": "0;0;1;2;3;1", "aff_unique_norm": "Tsinghua University;University of California, Davis;Arizona State University;Ohio State University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.ucdavis.edu;https://www.asu.edu;https://www.osu.edu", "aff_unique_abbr": "THU;UC Davis;ASU;OSU", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Davis;Columbus", "aff_country_unique_index": "0;0;1;1;1;1", "aff_country_unique": "China;United States" }, { "id": "5b9uVL3l1T4", "title": "Data Drift Correction via Time-varying Importance Weight Estimator", "track": "main", "status": "Reject", "tldr": "Data gradually evolves over time in the real-world applications. This paper proposes a simple yet effective way to detect gradual shifts in data.", "abstract": "Real-world deployment of machine learning models is challenging when data evolves over time. And data does evolve over time. While no model can work when data evolves in an arbitrary fashion, if there is some pattern to these changes, we might be able to design methods to address it. This paper addresses situations when data evolves gradually. We introduce a novel time-varying importance weight estimator that can detect gradual shifts in the distribution of data. Such an importance weight estimator allows the training method to selectively sample past data---not just similar data from the past like a standard importance weight estimator would but also data that evolved in a similar fashion in the past. Our time-varying importance weight is quite general. We demonstrate different ways of implementing it that exploit some known structure in the evolution of data. We demonstrate and evaluate this approach on a variety of problems ranging from supervised learning tasks (multiple image classification datasets) where the data undergoes a sequence of gradual shifts of our design to reinforcement learning tasks (robotic manipulation and continuous control) where data undergoes a shift organically as the policy or the task changes.", "keywords": "distribution shift;data drift over time;propensity scoring", "primary_area": "", "supplementary_material": "", "author": "Rasool Fakoor;Jonas Mueller;Zachary Chase Lipton;Pratik Chaudhari;Alex Smola", "authorids": "~Rasool_Fakoor1;~Jonas_Mueller1;~Zachary_Chase_Lipton1;~Pratik_Chaudhari1;~Alex_Smola1", "gender": "M;M;Unspecified;M;M", "homepage": "http://rasoolfa.github.io;;http://zacklipton.com;https://pratikac.github.io/;http://alex.smola.org", "dblp": "123/2447;178/3250;;;s/AlexanderJSmola", "google_scholar": "nVsOPtQAAAAJ;HeVcLzAAAAAJ;MN9Kfg8AAAAJ;c_z5hWEAAAAJ;Tb0ZrYwAAAAJ", "orcid": ";;;;", "linkedin": "rasool-fakoor-695b5845/;;;pratik-chaudhari-59508765;smola", "or_profile": "~Rasool_Fakoor1;~Jonas_Mueller1;~Zachary_Chase_Lipton1;~Pratik_Chaudhari1;~Alex_Smola1", "aff": "Amazon Web Services;Cleanlab;Carnegie Mellon University;School of Engineering and Applied Science, University of Pennsylvania;Boson AI", "aff_domain": "amazon.com;cleanlab.ai;cmu.edu;seas.upenn.edu;boson.ai", "position": "Researcher;Researcher;Assistant Professor;Assistant Professor;CEO", "bibtex": "@misc{\nfakoor2023data,\ntitle={Data Drift Correction via Time-varying Importance Weight Estimator},\nauthor={Rasool Fakoor and Jonas Mueller and Zachary Chase Lipton and Pratik Chaudhari and Alex Smola},\nyear={2023},\nurl={https://openreview.net/forum?id=5b9uVL3l1T4}\n}", "github": "", "project": "", "reviewers": "Ytji;TuzH;LTNU;UiAM;gsqz;FwNn", "site": "https://openreview.net/forum?id=5b9uVL3l1T4", "pdf_size": 4736188, "recommendation": "3;5;5;6;6;6", "confidence": "3;3;4;3;4;3", "correctness": "3;3;3;4;4;4", "technical_novelty": "2;3;3;3;2;4", "empirical_novelty": "3;2;2;3;2;3", "wc_summary_paper": "52;145;55;59;107;60", "wc_strength_and_weaknesses": "166;68;181;219;255;249", "wc_clarity_quality_novelty_and_reproducibility": "22;364;1;9;26;27", "wc_summary_review": "90;94;56;22;51;43", "wc_review": "330;671;293;309;439;379", "wc_reply_reviewers": "0;0;435;114;182;0", "wc_reply_authors": "1101;952;2279;669;844;462", "reply_reviewers": "0;0;2;1;1;0", "reply_authors": "2;2;3;2;2;1", "recommendation_avg": [ 5.166666666666667, 1.0671873729054746 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.8333333333333335, 0.6871842709362768 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 79.66666666666667, 34.649033977234566 ], "wc_strength_and_weaknesses_avg": [ 189.66666666666666, 63.360520480466036 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 74.83333333333333, 129.65777604482074 ], "wc_summary_review_avg": [ 59.333333333333336, 25.44056253745625 ], "wc_review_avg": [ 403.5, 129.07329958851548 ], "wc_reply_reviewers_avg": [ 121.83333333333333, 156.14354151086604 ], "wc_reply_authors_avg": [ 1051.1666666666667, 585.3743579017524 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.74535599249993 ], "reply_authors_avg": [ 2.0, 0.5773502691896257 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.22086305214969318, "corr_recommendation_correctness": 0.7808688094430303, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7716982395890488990&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Amazon;Cleanlab;Carnegie Mellon University;University of Pennsylvania;Boson AI", "aff_unique_dep": "Amazon Web Services;;;School of Engineering and Applied Science;", "aff_unique_url": "https://aws.amazon.com;https://www.cleanlab.ai;https://www.cmu.edu;https://www.upenn.edu;https://www.boson.ai", "aff_unique_abbr": "AWS;;CMU;UPenn;Boson AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;China" }, { "id": "5c9imxdLlCW", "title": "Rewiring with Positional Encodings for GNNs", "track": "main", "status": "Reject", "tldr": "", "abstract": "Several recent works use positional encodings to extend the receptive fields of graph neural network (GNN) layers equipped with attention mechanisms. These techniques, however, extend receptive fields to the complete graph, at substantial computational cost and risking a change in the inductive biases of conventional GNNs, or require complex architecture adjustments. As a conservative alternative, we use positional encodings to expand receptive fields to $r$-hop neighborhoods. More specifically, our method augments the input graph with additional nodes/edges and uses positional encodings as node and/or edge features. We thus modify graphs before inputting them to a downstream GNN model, instead of modifying the model itself. This makes our method model-agnostic, i.e. compatible with any existing GNN architectures. We also provide examples of positional encodings that are lossless with a one-to-one map between the original and the modified graphs. We demonstrate that extending receptive fields via positional encodings and a virtual fully-connected node significantly improves GNN performance and alleviates over-squashing using small $r$. We obtain improvements on a variety of models and datasets, and reach state-of-the-art performance using traditional GNNs or graph Transformers.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Rickard Br\u00fcel Gabrielsson;Mikhail Yurochkin;Justin Solomon", "authorids": "~Rickard_Br\u00fcel_Gabrielsson1;~Mikhail_Yurochkin1;~Justin_Solomon1", "gender": "Not Specified;M;M", "homepage": "http://bruel.org/;https://moonfolk.github.io/;http://people.csail.mit.edu/jsolomon/", "dblp": "228/6813;191/6719;80/5094", "google_scholar": "y9Oh5XwAAAAJ;QjBF9sUAAAAJ;pImSVwoAAAAJ", "orcid": ";;0000-0002-7701-7586", "linkedin": ";mikhail-yurochkin-a45659114/;justin-solomon-8a587914/", "or_profile": "~Rickard_Br\u00fcel_Gabrielsson1;~Mikhail_Yurochkin1;~Justin_Solomon1", "aff": "Massachusetts Institute of Technology;IBM Research;Massachusetts Institute of Technology", "aff_domain": "mit.edu;ibm.com;mit.edu", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@misc{\ngabrielsson2023rewiring,\ntitle={Rewiring with Positional Encodings for {GNN}s},\nauthor={Rickard Br{\\\"u}el Gabrielsson and Mikhail Yurochkin and Justin Solomon},\nyear={2023},\nurl={https://openreview.net/forum?id=5c9imxdLlCW}\n}", "github": "", "project": "", "reviewers": "CvhJ;qbsV;37EK;apU4", "site": "https://openreview.net/forum?id=5c9imxdLlCW", "pdf_size": 315810, "recommendation": "3;3;5;5", "confidence": "4;5;4;4", "correctness": "3;4;2;3", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "54;383;148;48", "wc_strength_and_weaknesses": "283;50;184;277", "wc_clarity_quality_novelty_and_reproducibility": "50;52;278;43", "wc_summary_review": "50;59;32;39", "wc_review": "437;544;642;407", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 158.25, 135.68414608936448 ], "wc_strength_and_weaknesses_avg": [ 198.5, 94.29342500938228 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 105.75, 99.5047109437538 ], "wc_summary_review_avg": [ 45.0, 10.319883720275147 ], "wc_review_avg": [ 507.5, 92.86145594378758 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": -0.7071067811865475, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18178702444239452152&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;IBM", "aff_unique_dep": ";IBM Research", "aff_unique_url": "https://web.mit.edu;https://www.ibm.com/research", "aff_unique_abbr": "MIT;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "$\\mathscr{N}$-WL: A New Hierarchy of Expressivity for Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11218", "id": "5cAI0qXxyv", "poster": "/media/PosterPDFs/ICLR%202023/11218.png?t=1680858146.205472", "openreview": "https://openreview.net/forum?id=5cAI0qXxyv", "slides": "https://iclr.cc/virtual/2023/poster/11218", "video": "https://iclr.cc/virtual/2023/poster/11218", "author_site": "Qing Wang, Dillon Chen, Asiri Wijesinghe, Shouheng Li, Muhammad Farhan", "tldr": "", "abstract": "The expressive power of Graph Neural Networks (GNNs) is fundamental for understanding their capabilities and limitations, i.e., what graph properties can or cannot be learnt by a GNN. Since standard GNNs have been characterised to be upper-bounded by the Weisfeiler-Lehman (1-WL) algorithm, recent attempts concentrated on developing more expressive GNNs in terms of the $k$-WL hierarchy, a well-established framework for graph isormorphism tests. In this work we show that, contrary to the widely accepted view, the $k$-WL hierarchy is not well-suited for measuring expressive GNNs. This is due to limitations that are inherent to high-dimensional WL algorithms such as the lack of a natural interpretation and high computational costs, which makes it difficult to draw any firm conclusions about the expressive power of GNNs beyond 1-WL. Thus, we propose a novel hierarchy of graph isomorphism tests, namely Neighbourhood WL ($\\mathscr{N}$-WL), and also establish a new theorem on the equivalence of expressivity between induced connected subgraphs and induced subgraphs within this hierarchy. Further, we design a GNN model upon $\\mathscr{N}$-WL, Graph Neighbourhood Neural Network (G3N), and empirically verify its expressive power on synthetic and real-world benchmarks.", "keywords": "Graph neural network;Weisfeiler-Lehman algorithm;k-WL hierarchy;graph classification", "primary_area": "", "supplementary_material": "", "author": "Qing Wang;Dillon Ze Chen;Asiri Wijesinghe;Shouheng Li;Muhammad Farhan", "authorids": "~Qing_Wang14;~Dillon_Ze_Chen1;~Asiri_Wijesinghe1;~Shouheng_Li1;muhammad.farhan@anu.edu.au", "gender": "F;M;M;M;", "homepage": "https://graphlabanu.github.io/website/team/;https://dillonzchen.github.io/;https://cecs.anu.edu.au/people/asiri-wijesinghe;;", "dblp": "97/6505-2;350/4009;251/5617;301/6357;", "google_scholar": "GytuLAcAAAAJ;;dV4kyHYAAAAJ;;", "orcid": ";;0000-0003-4392-5348;;", "linkedin": ";;asiriwijesinghe/?originalSubdomain=au;sean-li-5bb07771/;", "or_profile": "~Qing_Wang14;~Dillon_Ze_Chen1;~Asiri_Wijesinghe1;~Shouheng_Li1;muhammad.farhan@anu.edu.au", "aff": "Australian National University;Australian National University;;CSIRO;", "aff_domain": "anu.edu.au;anu.edu.au;;csiro.au;", "position": "Associate Professor;Undergrad student;;Researcher;", "bibtex": "@inproceedings{\nwang2023mathscrnwl,\ntitle={\\${\\textbackslash}mathscr\\{N\\}\\$-{WL}: A New Hierarchy of Expressivity for Graph Neural Networks},\nauthor={Qing Wang and Dillon Ze Chen and Asiri Wijesinghe and Shouheng Li and Muhammad Farhan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5cAI0qXxyv}\n}", "github": "", "project": "", "reviewers": "rk5R;e9AE;YCJN;QGtL", "pdf_size": 1590296, "recommendation": "5;5;5;6", "confidence": "4;3;4;4", "correctness": "3;3;3;3", "technical_novelty": "3;2;2;3", "empirical_novelty": "0;2;2;3", "wc_summary_paper": "119;81;140;144", "wc_strength_and_weaknesses": "255;237;666;460", "wc_clarity_quality_novelty_and_reproducibility": "22;38;55;43", "wc_summary_review": "85;54;41;36", "wc_review": "481;410;902;683", "wc_reply_reviewers": "125;0;0;0", "wc_reply_authors": "637;530;1470;784", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;2;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 121.0, 24.969981978367546 ], "wc_strength_and_weaknesses_avg": [ 404.5, 174.5487038049839 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.5, 11.84271928232701 ], "wc_summary_review_avg": [ 54.0, 19.06567596493762 ], "wc_review_avg": [ 619.0, 191.64420158199414 ], "wc_reply_reviewers_avg": [ 31.25, 54.12658773652741 ], "wc_reply_authors_avg": [ 855.25, 366.2017033002441 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13468005437065360660&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=5cAI0qXxyv", "email": "anu.edu.au;anu.edu.au;;csiro.au;", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "Australian National University;Commonwealth Scientific and Industrial Research Organisation", "aff_unique_dep": ";", "aff_unique_url": "https://www.anu.edu.au;https://www.csiro.au", "aff_unique_abbr": "ANU;CSIRO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "$\\mathcal{O}$-GNN: incorporating ring priors into molecular modeling", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11110", "id": "5cFfz6yMVPU", "poster": "", "openreview": "https://openreview.net/forum?id=5cFfz6yMVPU", "slides": "https://iclr.cc/virtual/2023/poster/11110", "video": "https://iclr.cc/virtual/2023/poster/11110", "author_site": "Jinhua Zhu, Kehan Wu, Bohan Wang, Yingce Xia, Shufang Xie, Qi Meng, Lijun Wu, Tao Qin, Wengang Zhou, Houqiang Li, Tie-Yan Liu", "tldr": "", "abstract": "Cyclic compounds that contain at least one ring play an important role in drug design. Despite the recent success of molecular modeling with graph neural networks (GNNs), few models explicitly take rings in compounds into consideration, consequently limiting the expressiveness of the models. In this work, we design a new variant of GNN, ring-enhanced GNN ($\\mathcal{O}$-GNN), that explicitly models rings in addition to atoms and bonds in compounds. In $\\mathcal{O}$-GNN, each ring is represented by a latent vector, which contributes to and is iteratively updated by atom and bond representations. Theoretical analysis shows that $\\mathcal{O}$-GNN is able to distinguish two isomorphic subgraphs lying on different rings using only one layer while conventional graph convolutional neural networks require multiple layers to distinguish, demonstrating that $\\mathcal{O}$-GNN is more expressive. Through experiments, $\\mathcal{O}$-GNN shows good performance on $\\bf{11}$ public datasets. In particular, it achieves state-of-the-art validation result on the PCQM4Mv1 benchmark (outperforming the previous KDDCup champion solution) and the drug-drug interaction prediction task on DrugBank. Furthermore, $\\mathcal{O}$-GNN outperforms strong baselines (without modeling rings) on the molecular property prediction and retrosynthesis prediction tasks.", "keywords": "Graph Neural Network;Ring;Molecular Modeling", "primary_area": "", "supplementary_material": "", "author": "Jinhua Zhu;Kehan Wu;Bohan Wang;Yingce Xia;Shufang Xie;Qi Meng;Lijun Wu;Tao Qin;Wengang Zhou;Houqiang Li;Tie-Yan Liu", "authorids": "~Jinhua_Zhu1;wu_2018@mail.ustc.edu.cn;~Bohan_Wang1;~Yingce_Xia1;~Shufang_Xie1;~Qi_Meng1;~Lijun_Wu1;~Tao_Qin1;~Wengang_Zhou1;~Houqiang_Li1;~Tie-Yan_Liu1", "gender": "M;;M;M;M;F;M;M;M;M;M", "homepage": "https://github.com/teslacool;;https://bhwangfy.github.io/;https://www.microsoft.com/en-us/research/people/yinxia/;;;https://apeterswu.github.io/;https://www.microsoft.com/en-us/research/people/taoqin/;http://staff.ustc.edu.cn/~zhwg/index.html;https://staff.ustc.edu.cn/~lihq/;http://member.acm.org/~tieyanliu", "dblp": "18/1965-1;;202/1184;http://dblp.uni-trier.de/pers/hd/x/Xia:Yingce;https://dblp.uni-trier.de/pid/163/2704-3;;68/1284-3;14/6841;22/4544-1;59/7017.html;l/TieYanLiu", "google_scholar": "https://scholar.google.com.hk/citations?user=FvGy0LQAAAAJ;;LfkHCEUAAAAJ;GS5wRxYAAAAJ;;t-z3K34AAAAJ;https://scholar.google.com/citations?hl=en;Bl4SRU0AAAAJ;8s1JF8YAAAAJ;7sFMIKoAAAAJ;Nh832fgAAAAJ", "orcid": "0000-0003-2157-9077;;;;;;0000-0002-3530-590X;;0000-0003-1690-9836;0000-0003-2188-3028;0000-0002-0476-8020", "linkedin": ";;;;;;lijun-wu-59340478/;;;;", "or_profile": "~Jinhua_Zhu1;wu_2018@mail.ustc.edu.cn;~Bohan_Wang1;~Yingce_Xia1;~Shufang_Xie1;~Qi_Meng1;~Lijun_Wu1;~Tao_Qin1;~Wengang_Zhou1;~Houqiang_Li1;~Tie-Yan_Liu1", "aff": "University of Science and Technology of China;;Microsoft Research Asia, University of Science and Technology of China;Microsoft;Renmin University of China;Microsoft;Microsoft Research;;University of Science and Technology of China;University of Science and Technology of China;Microsoft", "aff_domain": "ustc.edu.cn;;ustc.edu.cn;microsoft.com;ruc.edu.cn;microsoft.com;microsoft.com;;ustc.edu.cn;ustc.edu.cn;microsoft.com", "position": "PhD student;;PhD student;Researcher;PhD student;associate researcher;Researcher;;Full Professor;Professor;Distinguished Scientist", "bibtex": "@inproceedings{\nzhu2023mathcalognn,\ntitle={\\${\\textbackslash}mathcal\\{O\\}\\$-{GNN}: incorporating ring priors into molecular modeling},\nauthor={Jinhua Zhu and Kehan Wu and Bohan Wang and Yingce Xia and Shufang Xie and Qi Meng and Lijun Wu and Tao Qin and Wengang Zhou and Houqiang Li and Tie-Yan Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5cFfz6yMVPU}\n}", "github": "", "project": "", "reviewers": "Q5iY;4qh9;X4DK", "pdf_size": 1031469, "recommendation": "5;6;8", "confidence": "4;4;3", "correctness": "3;4;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;2;0", "wc_summary_paper": "51;63;84", "wc_strength_and_weaknesses": "123;316;231", "wc_clarity_quality_novelty_and_reproducibility": "18;49;136", "wc_summary_review": "164;65;315", "wc_review": "356;493;766", "wc_reply_reviewers": "249;50;279", "wc_reply_authors": "1798;559;937", "reply_reviewers": "2;1;2", "reply_authors": "5;2;4", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 66.0, 13.638181696985855 ], "wc_strength_and_weaknesses_avg": [ 223.33333333333334, 78.97819670995 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 67.66666666666667, 49.94886273869395 ], "wc_summary_review_avg": [ 181.33333333333334, 102.7953738691041 ], "wc_review_avg": [ 538.3333333333334, 170.4236551134326 ], "wc_reply_reviewers_avg": [ 192.66666666666666, 101.62130135404136 ], "wc_reply_authors_avg": [ 1098.0, 518.4727572399537 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 1.247219128924647 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 11, 0 ], "corr_recommendation_confidence": -0.944911182523068, "corr_recommendation_correctness": -0.18898223650461363, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "pdf": "https://openreview.net/pdf?id=5cFfz6yMVPU", "email": "ustc.edu.cn;;ustc.edu.cn;microsoft.com;ruc.edu.cn;microsoft.com;microsoft.com;;ustc.edu.cn;ustc.edu.cn;microsoft.com", "author_num": 11, "aff_unique_index": "0;1;1;2;1;1;0;0;1", "aff_unique_norm": "University of Science and Technology of China;Microsoft;Renmin University of China", "aff_unique_dep": ";Research;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.microsoft.com/en-us/research/group/microsoft-research-asia;http://www.ruc.edu.cn", "aff_unique_abbr": "USTC;MSRA;RUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;1;0;1;1;0;0;1", "aff_country_unique": "China;United States" }, { "id": "5c_nxk-dX1J", "title": "GradientMix: A Simple yet Effective Regularization for Large Batch Training", "track": "main", "status": "Reject", "tldr": "", "abstract": "Stochastic gradient descent (SGD) is the core tool for training deep neural networks. As modern deep learning tasks become more complex and state-of-the-art architectures grow as well, network training with SGD takes a huge amount of time; for example, training ResNet on the ImageNet dataset or BERT pre-training can take days to dozens of days. To reduce the network training time, distributed learning using a large batch size for SGD has been one of the main active research areas in recent years, but this approach entails a significant degradation in generalization. To address this issue, in this paper, we propose a simple yet effective regularization technique, GradientMix, for large-scale distributed learning. GradientMix can enhance the generalization in large batch regimes by giving appropriate noise through a mixup of local gradients computed at multiple devices, which is contrary to the conventions that simply average local gradients. Furthermore, GradientMix is optimizer-agnostic, hence can be applied to any popular optimization algorithm as long as the overall loss is expressed as the sum of the subgroup losses. Our extensive experiments show the effectiveness in both small and large-scale problems, and especially we consistently achieve state-of-the-art performance for various optimizers on training ResNet-50 on the ImageNet dataset with 32K batch size.", "keywords": "Large Batch Training;Deep Learning Optimization", "primary_area": "", "supplementary_material": "", "author": "Jihun Yun;Jung Hyun Lee;Eunho Yang", "authorids": "~Jihun_Yun2;~Jung_Hyun_Lee1;~Eunho_Yang1", "gender": "M;M;M", "homepage": "https://github.com/abcdxyzpqrst;;https://sites.google.com/site/hleehome2/", "dblp": "241/9676;132/2899;96/2621", "google_scholar": "ELv5qfEAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jihun_Yun2;~Jung_Hyun_Lee1;~Eunho_Yang1", "aff": "Korea Advanced Institute of Science & Technology;NAVER CLOVA;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;navercorp.com;kaist.ac.kr", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@misc{\nyun2023gradientmix,\ntitle={GradientMix: A Simple yet Effective Regularization for Large Batch Training},\nauthor={Jihun Yun and Jung Hyun Lee and Eunho Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=5c_nxk-dX1J}\n}", "github": "", "project": "", "reviewers": "LFbe;yVcz;SMM4;HmFS", "site": "https://openreview.net/forum?id=5c_nxk-dX1J", "pdf_size": 535803, "recommendation": "5;5;5;6", "confidence": "3;4;3;4", "correctness": "3;3;4;4", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;1;2;3", "wc_summary_paper": "59;129;53;93", "wc_strength_and_weaknesses": "221;188;93;299", "wc_clarity_quality_novelty_and_reproducibility": "1;41;269;151", "wc_summary_review": "1;86;43;56", "wc_review": "282;444;458;599", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 83.5, 30.376800358168072 ], "wc_strength_and_weaknesses_avg": [ 200.25, 73.88293104635197 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 115.5, 104.26288889149389 ], "wc_summary_review_avg": [ 46.5, 30.549140740780256 ], "wc_review_avg": [ 445.75, 112.30844803486512 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CYD-tNP3tpwJ:scholar.google.com/&scioq=GradientMix:+A+Simple+yet+Effective+Regularization+for+Large+Batch+Training&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;NAVER Corporation", "aff_unique_dep": ";CLOVA", "aff_unique_url": "https://www.kaist.ac.kr;https://www.naver.com", "aff_unique_abbr": "KAIST;NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "5cio7DSIXLQ", "title": "Adaptive Sparse Softmax: An Effective and Efficient Softmax Variant for Text Classification", "track": "main", "status": "Reject", "tldr": "", "abstract": "Softmax with the cross entropy loss is the standard configuration for current neural text classification models. The gold score for a target class is supposed to be 1, but it is never reachable under the softmax schema. Such a problem makes the training process continue forever and leads to overfitting. Moreover, the \u201ctarget-approach-1\u201d training goal forces the model to continuously learn all samples, leading to a waste of time in handling some samples which have already been classified correctly with high confidence, while the test goal simply requires the target class of each sample to hold the maximum score. To solve the above weaknesses, we propose the \\textbf{A}daptive \\textbf{S}parse softmax (AS-Softmax) which designs a reasonable and test-matching transformation on top of softmax. For more purposeful learning, we discard the classes with far smaller scores compared with the actual class during training. Then the model could focus on learning to distinguish the target class from its strong opponents, which is also the great challenge in test. In addition, since the training losses of easy samples will gradually drop to 0 in AS-Softmax, we develop an adaptive gradient accumulation strategy based on the masked sample ratio to speed up training. We verify proposed AS-Softmax on a variety of multi-class, multi-label and token classification tasks with class sizes ranging from 5 to 5000+. The results show that AS-Softmax consistently outperforms softmax and its variants, and the loss of AS-Softmax is remarkably correlated with classification performance in validation. Furthermore, adaptive gradient accumulation strategy can bring about 1.2\u00d7 training speedup comparing with the standard softmax while maintaining classification effectiveness.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/20dc49709dae9eddb2de11e6e9c88435620c9934.zip", "author": "Qi Lv;Lei Geng;Ziqiang Cao;Min Cao;Sujian Li;Wenjie Li;Guohong Fu", "authorids": "~Qi_Lv1;~Lei_Geng2;~Ziqiang_Cao2;~Min_Cao2;~Sujian_Li1;~Wenjie_Li1;~Guohong_Fu1", "gender": "M;M;F;F;F;M;F", "homepage": "https://github.com/Aopolin-Lv;;;https://pku-tangent.github.io/;https://web.comp.polyu.edu.hk/cswjli/;http://web.suda.edu.cn/ghfu/;https://github.com/ngwlh-gl?tab=repositories", "dblp": ";148/4447;;05/4288;33/3999-2.html;23/5204;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;nhMWtZsAAAAJ;https://scholar.google.com.tw/citations?user=RvBDhSwAAAAJ;Rx5swD4AAAAJ;ueOZz5QAAAAJ;", "orcid": "0000-0002-8507-7167;0000-0002-1077-9033;;;0000-0002-7360-8864;0000-0001-6882-6181;", "linkedin": "qi-lv-%EF%BC%88%E5%90%95%E5%A5%87%EF%BC%89-075614311/;;;;;;", "or_profile": "~Qi_Lv1;~Ziqiang_Cao2;~Min_Cao2;~Sujian_Li1;~Wenjie_Li1;~Guohong_Fu1;~lei_Geng1", "aff": "Soochow University;Soochow University, China;Soochow University;Peking University;The Hong Kong Polytechnic University, The Hong Kong Polytechnic University;Soochow University, China,;Soochow University", "aff_domain": "suda.edu.cn;suda.edu.cn;suda.edu.cn;pku.edu.cn;comp.polyu.edu.hk;suda.edu.cn;suda.edu.cn", "position": "MS student;Associate Professor;Associate Professor;Associate Professor;Full Professor;Full Professor;MS student", "bibtex": "@misc{\nlv2023adaptive,\ntitle={Adaptive Sparse Softmax: An Effective and Efficient Softmax Variant for Text Classification},\nauthor={Qi Lv and Lei Geng and Ziqiang Cao and Min Cao and Sujian Li and Wenjie Li and Guohong Fu},\nyear={2023},\nurl={https://openreview.net/forum?id=5cio7DSIXLQ}\n}", "github": "", "project": "", "reviewers": "UMbA;bnPp;FqyY;EExx", "site": "https://openreview.net/forum?id=5cio7DSIXLQ", "pdf_size": 3865632, "recommendation": "3;5;5;6", "confidence": "3;3;3;4", "correctness": "2;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "65;101;72;74", "wc_strength_and_weaknesses": "327;471;110;41", "wc_clarity_quality_novelty_and_reproducibility": "5;158;59;711", "wc_summary_review": "41;33;24;61", "wc_review": "438;763;265;887", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 78.0, 13.693063937629153 ], "wc_strength_and_weaknesses_avg": [ 237.25, 171.31896421587425 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 233.25, 281.23333283947693 ], "wc_summary_review_avg": [ 39.75, 13.663363421939708 ], "wc_review_avg": [ 588.25, 248.42239733969237 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.6622661785325219, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7tcu8RYOYT4J:scholar.google.com/&scioq=Adaptive+Sparse+Softmax:+An+Effective+and+Efficient+Softmax+Variant+for+Text+Classification&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2;0;0", "aff_unique_norm": "Soochow University;Peking University;Hong Kong Polytechnic University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.soochow.edu.cn;http://www.pku.edu.cn;https://www.polyu.edu.hk", "aff_unique_abbr": "Soochow U;Peking U;PolyU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "5d_yTyTj646", "title": "Escaping saddle points in zeroth-order optimization: two function evaluations suffice", "track": "main", "status": "Reject", "tldr": "We provide the first result showing that zeroth-order optimization with constant number of function evaluations per iteration can escape saddle points efficiently.", "abstract": "Zeroth-order methods are useful in solving black-box optimization and reinforcement learning problems in unknown environments. It uses function values to estimate the gradient. As optimization problems are often nonconvex, it is a natural question to understand how zeroth-order methods escape saddle points. In this paper, we consider zeroth-order methods, that at each iteration, may freely choose 2$m$ function evaluations where $m$ ranges from 1 to $d$, with $d$ denoting the problem dimension. We show that by adding an appropriate isotropic perturbation at each iteration, a zeroth-order algorithm based on $2m$ function evaluations per iteration can not only find $\\epsilon$-second order stationary points polynomially fast, but do so using only $\\tilde{O}(\\frac{d}{\\epsilon^{2.5}})$ function evaluations.", "keywords": "zeroth-order optimization;nonconvex optimization;escape saddle points", "primary_area": "", "supplementary_material": "/attachment/b6a831e76e500fa76f3ebafd6ffabebccd21c7cc.zip", "author": "Zhaolin Ren;Yujie Tang;Na Li", "authorids": "~Zhaolin_Ren1;~Yujie_Tang1;~Na_Li3", "gender": "M;;F", "homepage": ";https://tyj518.github.io/;https://nali.seas.harvard.edu/", "dblp": ";;", "google_scholar": ";g61gTKsAAAAJ;qdGelXoAAAAJ", "orcid": ";;", "linkedin": "zhaolin-ren-1b1b94108;;", "or_profile": "~Zhaolin_Ren1;~Yujie_Tang1;~Na_Li3", "aff": "Harvard University;Peking University;Harvard University", "aff_domain": "harvard.edu;pku.edu.cn;harvard.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nren2023escaping,\ntitle={Escaping saddle points in zeroth-order optimization: two function evaluations suffice},\nauthor={Zhaolin Ren and Yujie Tang and Na Li},\nyear={2023},\nurl={https://openreview.net/forum?id=5d_yTyTj646}\n}", "github": "", "project": "", "reviewers": "Rytu;QY7d;nRGh;czK6;ZbmT", "site": "https://openreview.net/forum?id=5d_yTyTj646", "pdf_size": 3034964, "recommendation": "3;3;6;6;8", "confidence": "5;5;3;3;4", "correctness": "4;3;2;3;4", "technical_novelty": "2;3;3;3;4", "empirical_novelty": "0;3;0;0;3", "wc_summary_paper": "64;33;74;145;49", "wc_strength_and_weaknesses": "86;131;81;166;353", "wc_clarity_quality_novelty_and_reproducibility": "32;120;458;14;59", "wc_summary_review": "23;54;94;6;59", "wc_review": "205;338;707;331;520", "wc_reply_reviewers": "0;0;42;0;0", "wc_reply_authors": "559;3223;1214;333;708", "reply_reviewers": "0;0;3;0;0", "reply_authors": "2;7;4;1;1", "recommendation_avg": [ 5.2, 1.9390719429665317 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "correctness_avg": [ 3.2, 0.7483314773547882 ], "technical_novelty_avg": [ 3.0, 0.6324555320336759 ], "empirical_novelty_avg": [ 1.2, 1.4696938456699067 ], "wc_summary_paper_avg": [ 73.0, 38.57978745405423 ], "wc_strength_and_weaknesses_avg": [ 163.4, 99.78496880793219 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 136.6, 164.66159236446123 ], "wc_summary_review_avg": [ 47.2, 30.524744061171095 ], "wc_review_avg": [ 420.2, 175.06501649387292 ], "wc_reply_reviewers_avg": [ 8.4, 16.799999999999997 ], "wc_reply_authors_avg": [ 1207.4, 1048.5308960636305 ], "reply_reviewers_avg": [ 0.6, 1.2 ], "reply_authors_avg": [ 3.0, 2.280350850198276 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.6918984060216639, "corr_recommendation_correctness": -0.027565892320998583, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2955759228248540695&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Harvard University;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;http://www.pku.edu.cn", "aff_unique_abbr": "Harvard;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "id": "5eCi6tAPc7", "title": "Learning implicit hidden Markov models using neural likelihood-free inference", "track": "main", "status": "Reject", "tldr": "We propose a novel method, using an autoregressive-flow, for carrying out likelihood-free Bayesian inference of a hidden Markov model", "abstract": "Likelihood-free inference methods for implicit models based on neural conditional density estimation were shown to drastically reduce the simulation burden in comparison to classical methods such as ABC. However, when applied in the context of any latent variable model, such as a Hidden Markov model (HMM), these methods are designed to only estimate the parameters rather than the joint posterior distribution of both the parameters and the hidden states. Naive application of these methods to a HMM, ignoring the inference of this joint posterior distribution, will result in overestimation of uncertainty of the posterior predictive. We propose a postprocessing step that can rectify this problem. Our approach relies on learning directly the intractable posterior distribution of the hidden states, using an autoregressive-flow, by exploiting the Markov property. Upon evaluating our approach on some intractable HMMs, we found that the quality of the estimates retrieved using our postprocessing is comparable to what can be achieved using a computationally expensive particle-filtering which additionally requires a tractable data distribution.", "keywords": "likelihood-free;Bayesian inference;simulation based inference;ABC-SMC;HMM;simulator;implicit models", "primary_area": "", "supplementary_material": "/attachment/b118fa28a08c2fa0bc2246ded214c560840014ee.zip", "author": "Sanmitra Ghosh;Paul Birrell;Daniela De Angelis", "authorids": "~Sanmitra_Ghosh1;~Paul_Birrell1;~Daniela_De_Angelis1", "gender": "M;M;F", "homepage": "https://www.mrc-bsu.cam.ac.uk/people/in-alphabetical-order/a-to-g/sanmitra-ghosh/;;https://eur01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fwww.mrc-bsu.cam.ac.uk%2Fpeople%2Fin-alphabetical-order%2Fa-to-g%2Fdaniela-de-angelis%2F&data=04%7C01%7CDuncan.Kerrod%40phe.gov.uk%7C4d482b2fb05947941efd08d951a7e0a0%7Cee4e14994a354b2ead475f3cf9de8666%7C0%7C0%7C637630603735851161%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=b1ic%2Bjy8fzq6uUzg9zgM5bkh%2FLn4yIeKMALyTuniObQ%3D&reserved=0", "dblp": ";;", "google_scholar": ";rUuZD_YAAAAJ;", "orcid": ";0000-0001-8131-4893;0000-0001-6619-6112", "linkedin": ";;", "or_profile": "~Sanmitra_Ghosh1;~Paul_Birrell1;~Daniela_De_Angelis1", "aff": "PhysicsX;UK Health Security Agency;University of Cambridge", "aff_domain": "physicsx.ai;ukhsa.gov.uk;cam.ac.uk", "position": "Researcher;Principal Researcher;Full Professor", "bibtex": "@misc{\nghosh2023learning,\ntitle={Learning implicit hidden Markov models using neural likelihood-free inference},\nauthor={Sanmitra Ghosh and Paul Birrell and Daniela De Angelis},\nyear={2023},\nurl={https://openreview.net/forum?id=5eCi6tAPc7}\n}", "github": "", "project": "", "reviewers": "pyEj;AdSN;eUPv;DLUp", "site": "https://openreview.net/forum?id=5eCi6tAPc7", "pdf_size": 1261233, "recommendation": "3;5;6;8", "confidence": "4;3;3;3", "correctness": "3;4;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;0;3", "wc_summary_paper": "72;70;238;137", "wc_strength_and_weaknesses": "235;53;126;62", "wc_clarity_quality_novelty_and_reproducibility": "599;110;738;11", "wc_summary_review": "75;15;101;103", "wc_review": "981;248;1203;313", "wc_reply_reviewers": "0;0;199;97", "wc_reply_authors": "816;297;887;64", "reply_reviewers": "0;0;2;1", "reply_authors": "1;1;2;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 129.25, 68.32779449096832 ], "wc_strength_and_weaknesses_avg": [ 119.0, 72.64640390273973 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 364.5, 309.92942745083116 ], "wc_summary_review_avg": [ 73.5, 35.53519382246282 ], "wc_review_avg": [ 686.25, 413.91024087354975 ], "wc_reply_reviewers_avg": [ 74.0, 82.31949951256992 ], "wc_reply_authors_avg": [ 516.0, 346.3762405246642 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.8006407690254357, "corr_recommendation_correctness": 0.5547001962252291, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:J_bKrc6FklUJ:scholar.google.com/&scioq=Learning+implicit+hidden+Markov+models+using+neural+likelihood-free+inference&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "PhysicsX;UK Health Security Agency;University of Cambridge", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.ukhsa.gov.uk;https://www.cam.ac.uk", "aff_unique_abbr": ";UKHSA;Cambridge", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "1;1", "aff_country_unique": ";United Kingdom" }, { "title": "D4AM: A General Denoising Framework for Downstream Acoustic Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11862", "id": "5fvXH49wk2", "poster": "/media/PosterPDFs/ICLR%202023/11862.png?t=1682801433.6650214", "openreview": "https://openreview.net/forum?id=5fvXH49wk2", "slides": "https://iclr.cc/virtual/2023/poster/11862", "video": "https://iclr.cc/virtual/2023/poster/11862", "author_site": "Chi-Chang Lee, Yu Tsao, Hsin-Min Wang, Chu-Song Chen", "tldr": "We propose a general denoising framework for various downstream acoustic models (D4AM) by adopting an effective joint training scheme with the regression (denoising) objective and the classification (ASR) objective.", "abstract": "The performance of acoustic models degrades notably in noisy environments. Speech enhancement (SE) can be used as a front-end strategy to aid automatic speech recognition (ASR) systems. However, existing training objectives of SE methods are not fully effective at integrating speech-text and noise-clean paired data for training toward unseen ASR systems. In this study, we propose a general denoising framework, D4AM, for various downstream acoustic models. Our framework fine-tunes the SE model with the backward gradient according to a specific acoustic model and the corresponding classification objective. In addition, our method aims to consider the regression objective as an auxiliary loss to make the SE model generalize to other unseen acoustic models. To jointly train an SE unit with regression and classification objectives, D4AM uses an adjustment scheme to directly estimate suitable weighting coefficients rather than undergoing a grid search process with additional training costs. The adjustment scheme consists of two parts: gradient calibration and regression objective weighting. The experimental results show that D4AM can consistently and effectively provide improvements to various unseen acoustic models and outperforms other combination setups. Specifically, when evaluated on the Google ASR API with real noisy data completely unseen during SE training, D4AM achieves a relative WER reduction of 24.65% compared with the direct feeding of noisy input. To our knowledge, this is the first work that deploys an effective combination scheme of regression (denoising) and classification (ASR) objectives to derive a general pre-processor applicable to various unseen ASR systems. Our code is available at https://github.com/ChangLee0903/D4AM.", "keywords": "audio processing;speech enhancement;robust automatic speech recognition;auxiliary task learning", "primary_area": "", "supplementary_material": "/attachment/bef30295776fe8cea88a72a482f35baa40975316.zip", "author": "Chi-Chang Lee;Yu Tsao;Hsin-Min Wang;Chu-Song Chen", "authorids": "~Chi-Chang_Lee1;~Yu_Tsao1;~Hsin-Min_Wang1;~Chu-Song_Chen2", "gender": "M;M;M;M", "homepage": "https://bio-asplab.citi.sinica.edu.tw/Lab-eng.html;https://www.citi.sinica.edu.tw/pages/yu.tsao/index_en.html;https://homepage.iis.sinica.edu.tw/pages/whm/;https://www.csie.ntu.edu.tw/en/member/Faculty/Chu-Song-Chen-94737564", "dblp": ";66/7146;28/5019;67/1007", "google_scholar": ";https://scholar.google.com.tw/citations?user=ZO5e5I4AAAAJ;trzbZ3AAAAAJ;WKk6fIQAAAAJ", "orcid": ";0000-0001-6956-0418;0000-0003-3599-5071;0000-0002-2959-2471", "linkedin": ";;hsin-min-wang-97871417/;", "or_profile": "~Chi-Chang_Lee1;~Yu_Tsao1;~Hsin-Min_Wang1;~Chu-Song_Chen2", "aff": "National Taiwan University;Academia Sinica;Academia Sinica;Department of Computer Science and Informational Engineering, National Taiwan University", "aff_domain": "ntu.edu.tw;sinica.edu.tw;sinica.edu.tw;csie.ntu.edu.tw", "position": "MS student;Full Professor;Researcher;Professor", "bibtex": "@inproceedings{\nlee2023dam,\ntitle={D4{AM}: A General Denoising Framework for Downstream Acoustic Models},\nauthor={Chi-Chang Lee and Yu Tsao and Hsin-Min Wang and Chu-Song Chen},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5fvXH49wk2}\n}", "github": "", "project": "", "reviewers": "jXQS;DCcQ;mD2F", "pdf_size": 777779, "recommendation": "5;5;6", "confidence": "4;4;2", "correctness": "3;3;4", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "65;50;91", "wc_strength_and_weaknesses": "181;158;127", "wc_clarity_quality_novelty_and_reproducibility": "22;241;152", "wc_summary_review": "55;249;51", "wc_review": "323;698;421", "wc_reply_reviewers": "0;58;0", "wc_reply_authors": "1233;2531;658", "reply_reviewers": "0;1;0", "reply_authors": "3;4;1", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 68.66666666666667, 16.937794687883333 ], "wc_strength_and_weaknesses_avg": [ 155.33333333333334, 22.125902367034783 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 138.33333333333334, 89.92713099442732 ], "wc_summary_review_avg": [ 118.33333333333333, 92.40971569892181 ], "wc_review_avg": [ 480.6666666666667, 158.80036383949363 ], "wc_reply_reviewers_avg": [ 19.333333333333332, 27.34146220587984 ], "wc_reply_authors_avg": [ 1474.0, 783.4083652008488 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.9999999999999997, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15468552362901463868&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=5fvXH49wk2", "email": "ntu.edu.tw;sinica.edu.tw;sinica.edu.tw;csie.ntu.edu.tw", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "National Taiwan University;Academia Sinica", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.tw;https://www.sinica.edu.tw", "aff_unique_abbr": "NTU;Academia Sinica", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "5g4FC-SHkaV", "title": "Find Your Friends: Personalized Federated Learning with the Right Collaborators", "track": "main", "status": "Reject", "tldr": "We propose a novel personalized decentralized federated learning framework for heterogeneous client data by collaborating with the right clients.", "abstract": "In the traditional federated learning setting, a central server coordinates a network of clients to train one global model. However, the global model may serve many clients poorly due to data heterogeneity. Moreover, there may not exist a trusted central party that can coordinate the clients to ensure that each of them can benefit from others. To address these concerns, we present a novel decentralized framework, FedeRiCo, where each client can learn as much or as little from other clients as is optimal for its local data distribution. Based on expectation-maximization, FedeRiCo estimates the utilities of other participants\u2019 models on each client\u2019s data so that everyone can select the right collaborators for learning. As a result, our algorithm outperforms other federated, personalized, and/or decentralized approaches on several benchmark datasets, being the only approach that consistently performs better than training with local data only.", "keywords": "Federated learning;Personalized federated learning;Decentralized federated learning", "primary_area": "", "supplementary_material": "/attachment/599e6eee206c274b2a383eb2ca7c33967d7720ea.zip", "author": "Yi Sui;Junfeng Wen;Yenson Lau;Brendan Leigh Ross;Jesse C Cresswell", "authorids": "~Yi_Sui1;~Junfeng_Wen1;~Yenson_Lau1;~Brendan_Leigh_Ross1;~Jesse_C_Cresswell1", "gender": "F;;M;M;", "homepage": "https://www.linkedin.com/in/yi-sui-90513699/;;;;https://jescresswell.github.io/", "dblp": ";153/5459;208/4307;295/0098;279/6764", "google_scholar": "fLo2o54AAAAJ;fXRXgPMAAAAJ;;https://scholar.google.ca/citations?user=TyY1aSYAAAAJ;https://scholar.google.ca/citations?hl=en", "orcid": "0009-0009-9207-7403;;;;0000-0002-9284-8804", "linkedin": ";;;brendan-ross;", "or_profile": "~Yi_Sui1;~Junfeng_Wen1;~Yenson_Lau1;~Brendan_Leigh_Ross1;~Jesse_C_Cresswell1", "aff": "Layer6 AI;Carleton University;;Layer 6 AI;Layer 6 AI", "aff_domain": "layer6.ai;carleton.ca;;layer6.ai;layer6.ai", "position": "Machine Learning Scientist;Assistant Professor;;Senior Machine Learning Scientist;Staff Machine Learning Scientist", "bibtex": "@misc{\nsui2023find,\ntitle={Find Your Friends: Personalized Federated Learning with the Right Collaborators},\nauthor={Yi Sui and Junfeng Wen and Yenson Lau and Brendan Leigh Ross and Jesse C Cresswell},\nyear={2023},\nurl={https://openreview.net/forum?id=5g4FC-SHkaV}\n}", "github": "", "project": "", "reviewers": "xqTG;r1RL;Knpq;Tffe", "site": "https://openreview.net/forum?id=5g4FC-SHkaV", "pdf_size": 10132040, "recommendation": "3;6;6;6", "confidence": "3;4;5;2", "correctness": "3;3;4;4", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;3;0;3", "wc_summary_paper": "64;80;39;88", "wc_strength_and_weaknesses": "116;116;206;128", "wc_clarity_quality_novelty_and_reproducibility": "78;18;9;11", "wc_summary_review": "54;33;36;17", "wc_review": "312;247;290;244", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "550;455;728;364", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 67.75, 18.713297411199342 ], "wc_strength_and_weaknesses_avg": [ 141.5, 37.559952076646745 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.0, 28.48683906648823 ], "wc_summary_review_avg": [ 35.0, 13.133925536563698 ], "wc_review_avg": [ 273.25, 28.838992700855556 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 524.25, 134.77087036893394 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.2581988897471611, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2654970507421365340&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Layer6 AI;Carleton University;Layer 6 AI", "aff_unique_dep": ";;", "aff_unique_url": "https://layer6.ai;https://carleton.ca;https://layer6.ai", "aff_unique_abbr": "Layer6;Carleton;Layer 6 AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Towards Better Selective Classification", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11355", "id": "5gDz_yTcst", "poster": "", "openreview": "https://openreview.net/forum?id=5gDz_yTcst", "slides": "https://iclr.cc/virtual/2023/poster/11355", "video": "https://iclr.cc/virtual/2023/poster/11355", "author_site": "Leo Feng, Mohamed Osama Ahmed, Hossein Hajimirsadeghi, Amir Abdi", "tldr": "", "abstract": "We tackle the problem of Selective Classification where the objective is to achieve the best performance on a predetermined ratio (coverage) of the dataset. Recent state-of-the-art selective methods come with architectural changes either via introducing a separate selection head or an extra abstention logit. In this paper, we challenge the aforementioned methods. The results suggest that the superior performance of state-of-the-art methods is owed to training a more generalizable classifier rather than their proposed selection mechanisms. We argue that the best performing selection mechanism should instead be rooted in the classifier itself. Our proposed selection strategy uses the classification scores and achieves better results by a significant margin, consistently, across all coverages and all datasets, without any added compute cost. Furthermore, inspired by semi-supervised learning, we propose an entropy-based regularizer that improves the performance of selective classification methods. Our proposed selection mechanism with the proposed entropy-based regularizer achieves new state-of-the-art results.", "keywords": "Selective Classification;Semi-Supervised Learning", "primary_area": "", "supplementary_material": "", "author": "Leo Feng;Mohamed Osama Ahmed;Hossein Hajimirsadeghi;Amir H. Abdi", "authorids": "~Leo_Feng1;~Mohamed_Osama_Ahmed2;~Hossein_Hajimirsadeghi1;~Amir_H._Abdi1", "gender": "M;M;M;M", "homepage": "https://leofeng-ca.github.io/;;;", "dblp": "255/9367;https://dblp.org/pers/hd/a/Ahmed:Mohamed_Osama;64/8131;200/9533", "google_scholar": "WsRunnEAAAAJ;https://scholar.google.ca/citations?user=jyVyVj4AAAAJ;;https://scholar.google.ca/citations?user=y7SFUVoAAAAJ", "orcid": ";0000-0001-6758-1178;;0000-0002-3169-4477", "linkedin": "leo-feng/;mohamed-osama-ahmed-91439a154/;;amir-abdi/", "or_profile": "~Leo_Feng1;~Mohamed_Osama_Ahmed2;~Hossein_Hajimirsadeghi1;~Amir_H_Abdi1", "aff": "Mila - Quebec Artificial Intelligence Institute;;Borealis AI;Borealis AI", "aff_domain": "mila.quebec;;borealisai.com;borealisai.com", "position": "PhD student;;Principal Researcher;Research Engineer", "bibtex": "@inproceedings{\nfeng2023towards,\ntitle={Towards Better Selective Classification},\nauthor={Leo Feng and Mohamed Osama Ahmed and Hossein Hajimirsadeghi and Amir H. Abdi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5gDz_yTcst}\n}", "github": "", "project": "", "reviewers": "6JTV;F9DQ;wKA4;hDSo", "pdf_size": 11215635, "recommendation": "5;5;6;8", "confidence": "3;3;4;4", "correctness": "2;2;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "66;98;33;89", "wc_strength_and_weaknesses": "56;163;215;136", "wc_clarity_quality_novelty_and_reproducibility": "320;56;142;24", "wc_summary_review": "30;171;35;75", "wc_review": "472;488;425;324", "wc_reply_reviewers": "0;0;118;0", "wc_reply_authors": "1133;951;1305;133", "reply_reviewers": "0;0;1;0", "reply_authors": "6;5;4;1", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 71.5, 25.104780421266383 ], "wc_strength_and_weaknesses_avg": [ 142.5, 57.447802394869726 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 135.5, 114.92932610957048 ], "wc_summary_review_avg": [ 77.75, 56.59229187795808 ], "wc_review_avg": [ 427.25, 63.950664578251256 ], "wc_reply_reviewers_avg": [ 29.5, 51.09549882328188 ], "wc_reply_authors_avg": [ 880.5, 449.35592796801956 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.0, 1.8708286933869707 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.8164965809277259, "corr_recommendation_correctness": 0.8164965809277259, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12830173518055441503&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=5gDz_yTcst", "email": "mila.quebec;;borealisai.com;borealisai.com", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Quebec Artificial Intelligence Institute;Borealis AI", "aff_unique_dep": "Artificial Intelligence;", "aff_unique_url": "https://mila.quebec;https://www.borealisai.com", "aff_unique_abbr": "Mila;Borealis AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "id": "5gri-cs4RVq", "title": "Do We Need Neural Collapse? Learning Diverse Features for Fine-grained and Long-tail Classification", "track": "main", "status": "Reject", "tldr": "Neural collapse is not what you need: Deep features with within-class diversity improve the performance of fine-grained and long-tail learning", "abstract": "Feature extractors learned from supervised training of deep neural networks have demonstrated superior performance over handcrafted ones. Recently, it is shown that such learned features have a neural collapse property, where within-class features collapse to the class mean and different class means are maximally separated. This paper examines the neural collapse property in the context of fine-grained classification tasks, where a feature extractor pretrained from a classification task with coarse labels is used for generating features for a downstream classification task with fine-grained labels. We argue that the within-class feature collapse is an undesirable property for fine-grained classification. Hence, we introduce a geometric arrangement of features called the maximal-separating-cone, where within-class features lie in a cone of nontrivial radius instead of collapsing to the class mean, and cones of different classes are maximally separated. We present a technique based on classifier weight and training loss design to produce such an arrangement. Experimentally we demonstrate an improved fine-grained classification performance with a feature extractor pretrained by our method. Moreover, our technique also provides benefits for the classification on data with long-tail distribution over classes. Our work may motivate future efforts on the design of better geometric arrangements of deep features.", "keywords": "Neural Collapse;Diverse deep learning features;Finegrained transfer learning", "primary_area": "", "supplementary_material": "", "author": "Jiawei Ma;Chong You;Sashank J. Reddi;Sadeep Jayasumana;Himanshu Jain;Felix Yu;Shih-Fu Chang;Sanjiv Kumar", "authorids": "~Jiawei_Ma1;~Chong_You2;~Sashank_J._Reddi1;~Sadeep_Jayasumana1;~Himanshu_Jain3;~Felix_Yu1;~Shih-Fu_Chang3;~Sanjiv_Kumar1", "gender": "M;M;M;;M;M;M;", "homepage": "https://blogs.cuit.columbia.edu/jm4743/;https://sites.google.com/view/cyou;;;;http://felixyu.org;http://www.ee.columbia.edu/~sfchang/;http://www.sanjivk.com/", "dblp": "201/7741;164/7311;50/10452;;;23/10574;c/ShihFuChang;", "google_scholar": "kXbWREkAAAAJ;Mfrpm_IAAAAJ;70lgwYwAAAAJ;;JtrH9jQAAAAJ;lYvF6cUAAAAJ;OMVTRscAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-8625-5391;;;;;;;", "linkedin": "jiawei-ma-ee1128/?locale=en_US;;;;;;;", "or_profile": "~Jiawei_Ma1;~Chong_You2;~Sashank_J._Reddi1;~Sadeep_Jayasumana1;~Himanshu_Jain3;~Felix_Yu1;~Shih-Fu_Chang3;~Sanjiv_Kumar1", "aff": "Columbia University;Google;Google;;Google;Google;Columbia University;Google", "aff_domain": "columbia.edu;google.com;google.com;;google.com;google.com;ee.columbia.edu;google.com", "position": "PhD student;Research Scientist;Research Scientist;;Researcher;Research Scientist;Full Professor;Research Scientist", "bibtex": "@misc{\nma2023do,\ntitle={Do We Need Neural Collapse? Learning Diverse Features for Fine-grained and Long-tail Classification},\nauthor={Jiawei Ma and Chong You and Sashank J. Reddi and Sadeep Jayasumana and Himanshu Jain and Felix Yu and Shih-Fu Chang and Sanjiv Kumar},\nyear={2023},\nurl={https://openreview.net/forum?id=5gri-cs4RVq}\n}", "github": "", "project": "", "reviewers": "ur4C;tRGD;6L41", "site": "https://openreview.net/forum?id=5gri-cs4RVq", "pdf_size": 2716999, "recommendation": "5;5;8", "confidence": "3;3;4", "correctness": "2;2;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "64;60;53", "wc_strength_and_weaknesses": "311;381;60", "wc_clarity_quality_novelty_and_reproducibility": "40;218;16", "wc_summary_review": "46;21;23", "wc_review": "461;680;152", "wc_reply_reviewers": "356;239;0", "wc_reply_authors": "2002;2384;227", "reply_reviewers": "1;1;0", "reply_authors": "5;4;1", "recommendation_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 59.0, 4.546060565661952 ], "wc_strength_and_weaknesses_avg": [ 250.66666666666666, 137.81710908140383 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 91.33333333333333, 90.10117769608908 ], "wc_summary_review_avg": [ 30.0, 11.343133018115703 ], "wc_review_avg": [ 431.0, 216.59639886203095 ], "wc_reply_reviewers_avg": [ 198.33333333333334, 148.1538239653488 ], "wc_reply_authors_avg": [ 1537.6666666666667, 939.8107374478236 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 1.699673171197595 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1074021067332525724&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;1;0;1", "aff_unique_norm": "Columbia University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.columbia.edu;https://www.google.com", "aff_unique_abbr": "Columbia;Google", "aff_campus_unique_index": "1;1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "5i-n9TYb-xa", "title": "Decoupled and Patch-based Contrastive Learning for Long-tailed Visual Recognition", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The imbalance of the dataset leads to the trained model being biased towards head classes and under-represent the tail classes, making the long-tailed recognition challenging.\nTo address those issues, this paper proposes the decoupled and patch-based contrastive learning. Given an anchor image, the supervised contrastive learning pulls two kinds of positives together in the embedding space: the same image with different data augmentation and other images from the same classes. The weights of two kinds of positives can be influenced by the cardinality of different classes, leading to biased feature space. The decoupled supervised contrastive loss decouples the two kinds of positives, removing the influence of the imbalanced dataset. To improve the discriminative of the learned model on the tail classes, patch-based self distillation crops the small patches from the global view of an image. These small patches can encode the shared visual patterns between different images, and thus can be used to transfer similarity relationship knowledge. Experiments on several long-tailed classification benchmarks demonstrate the superiority of our method. For instance, it achieves 57.7% top-1 accuracy on the ImageNet-LT dataset. Combined with the ensemble-based method, the performance can be further boosted to 59.7%. Our code will be released.", "keywords": "long-tailed;self distillation", "primary_area": "", "supplementary_material": "", "author": "shiyu xuan;Shiliang Zhang", "authorids": "~shiyu_xuan1;~Shiliang_Zhang3", "gender": "M;M", "homepage": ";https://www.pkuvmc.com", "dblp": "252/0070;52/6186", "google_scholar": "UyZgrZAAAAAJ;7phvKK4AAAAJ", "orcid": "0000-0001-9950-6025;0000-0001-9053-9314", "linkedin": ";", "or_profile": "~shiyu_xuan1;~Shiliang_Zhang3", "aff": "Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nxuan2023decoupled,\ntitle={Decoupled and Patch-based Contrastive Learning for Long-tailed Visual Recognition},\nauthor={shiyu xuan and Shiliang Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=5i-n9TYb-xa}\n}", "github": "", "project": "", "reviewers": "9bZc;vTmg;EvBf;Cydf;TYN9", "site": "https://openreview.net/forum?id=5i-n9TYb-xa", "pdf_size": 1250481, "recommendation": "3;5;5;6;6", "confidence": "5;4;4;4;3", "correctness": "3;3;2;3;3", "technical_novelty": "3;2;2;3;3", "empirical_novelty": "3;2;2;3;3", "wc_summary_paper": "39;56;75;122;22", "wc_strength_and_weaknesses": "54;285;183;164;139", "wc_clarity_quality_novelty_and_reproducibility": "339;31;112;33;8", "wc_summary_review": "27;27;55;23;11", "wc_review": "459;399;425;342;180", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "correctness_avg": [ 2.8, 0.39999999999999997 ], "technical_novelty_avg": [ 2.6, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_paper_avg": [ 62.8, 34.440673628719864 ], "wc_strength_and_weaknesses_avg": [ 165.0, 74.46072790404348 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 104.6, 122.3709115762402 ], "wc_summary_review_avg": [ 28.6, 14.44437606821423 ], "wc_review_avg": [ 361.0, 98.25069974305526 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ucw49LgNnoAJ:scholar.google.com/&scioq=Decoupled+and+Patch-based+Contrastive+Learning+for+Long-tailed+Visual+Recognition&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "5iibKv7Wk8W", "title": "Mixture of Basis for Interpretable Continual Learning with Distribution Shifts", "track": "main", "status": "Withdraw", "tldr": "We develop a novel continual learning algorithm, Mixture of Basis models (MoB), that constructs a dynamic, task-dependent, mixture of interpretable models that outperforms other continual learning algorithms on several, diverse problem domains.", "abstract": "Continual learning in environments with shifting data distributions is a challenging problem with several real-world applications. In this paper we consider settings in which the data distribution (i.e., task) shifts abruptly and the timing of these shifts are not known. Furthermore, we consider a $\\textit{semi-supervised task-agnostic}$ setting in which the learning algorithm has access to both task-segmented and unsegmented data for offline training. We propose a novel approach called $\\textit{Mixture of Basis}$ models $\\textit{(MoB)}$ for addressing this problem setting. The core idea is to learn a small set of $\\textit{basis models}$ and to construct a dynamic, task-dependent mixture of the models to predict for the current task. We also propose a new methodology to detect observations that are out-of-distribution with respect to the existing basis models and to instantiate new models as needed. We develop novel problem domains for regression tasks, evaluate MoB and other continual learning algorithms on these, and show that MoB attains better prediction error in nearly every case while using fewer models than other multiple-model approaches. We analyze latent task representations learned by MoB alongside the tasks themselves, using both qualitative and quantitative measures, to show that the learned latent task representations can be interpretably linked to the structure of the task space.", "keywords": "continual learning;lifelong learning;distribution shift;interpretable learning;semi-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Pranay Pasula;Mengda Xu;Sumitra Ganesh", "authorids": "~Pranay_Pasula1;~Mengda_Xu1;~Sumitra_Ganesh1", "gender": ";M;F", "homepage": "https://pranaypasula.com;https://mengdaxu.github.io/;", "dblp": "270/9321;;98/463.html", "google_scholar": "QMPZJQUAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": "pranaypasula/;mengda-xu-132b57135/;sumitra-ganesh-0379853", "or_profile": "~Pranay_Pasula1;~Mengda_Xu1;~Sumitra_Ganesh1", "aff": "JPMorgan AI Research;Columbia University;J.P. Morgan Chase", "aff_domain": "jpmorgan.com;columbia.edu;jpmorgan.com", "position": "Research Scientist;PhD student;Researcher", "bibtex": "@misc{\npasula2023mixture,\ntitle={Mixture of Basis for Interpretable Continual Learning with Distribution Shifts},\nauthor={Pranay Pasula and Mengda Xu and Sumitra Ganesh},\nyear={2023},\nurl={https://openreview.net/forum?id=5iibKv7Wk8W}\n}", "github": "", "project": "", "reviewers": "vASS;J28N;n9yS;z3kG", "site": "https://openreview.net/forum?id=5iibKv7Wk8W", "pdf_size": 10334800, "recommendation": "1;3;3;5", "confidence": "5;3;3;4", "correctness": "2;2;3;3", "technical_novelty": "1;2;2;3", "empirical_novelty": "1;1;2;2", "wc_summary_paper": "82;127;37;123", "wc_strength_and_weaknesses": "141;212;259;106", "wc_clarity_quality_novelty_and_reproducibility": "51;70;49;402", "wc_summary_review": "47;55;17;54", "wc_review": "321;464;362;685", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.5, 0.5 ], "wc_summary_paper_avg": [ 92.25, 36.43744639790226 ], "wc_strength_and_weaknesses_avg": [ 179.5, 59.70971445250764 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 143.0, 149.7581383431298 ], "wc_summary_review_avg": [ 43.25, 15.465687828221544 ], "wc_review_avg": [ 458.0, 141.02304776170453 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.4264014327112209, "corr_recommendation_correctness": 0.7071067811865476, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=464795728279475728&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "JPMorgan Chase & Co.;Columbia University", "aff_unique_dep": "JPMorgan AI Research;", "aff_unique_url": "https://www.jpmorganchase.com;https://www.columbia.edu", "aff_unique_abbr": "JPM;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "5iqzNK-Qeb", "title": "CBLab: Scalable Traffic Simulation with Enriched Data Supporting", "track": "main", "status": "Reject", "tldr": "We present CBLab, a toolkit for scalable traffic simulation with enriched input data supporting.", "abstract": "Traffic simulation provides interactive data for the optimization of traffic policies. However, existing traffic simulators are limited by their lack of scalability and shortage in input data, which prevents them from generating interactive data from traffic simulation in the scenarios of real large-scale city road networks. \n\nIn this paper, we present \\textbf{C}ity \\textbf{B}rain \\textbf{Lab}, a toolkit for scalable traffic simulation. CBLab is consist of three components: CBEngine, CBData, and CBScenario. CBEngine is a highly efficient simulator supporting large-scale traffic simulation. CBData includes a traffic dataset with road network data of 100 cities all around the world. We also develop a pipeline to conduct a one-click transformation from raw road networks to input data of our traffic simulation. Combining CBEngine and CBData allows researchers to run scalable traffic simulations in the road network of real large-scale cities. Based on that, CBScenario implements an interactive environment and several baseline methods for two scenarios of traffic policies respectively, with which traffic policies adaptable for large-scale urban traffic can be trained and tuned. To the best of our knowledge, CBLab is the first infrastructure supporting traffic policy optimization in large-scale urban scenarios. The code is available on GitHub:~\\url{https://github.com/CityBrainLab/CityBrainLab.git}.", "keywords": "Infrastructure;Traffic Policy;Traffic Simulation;Large-scale Dataset", "primary_area": "", "supplementary_material": "", "author": "Chumeng Liang;Zherui Huang;Yicheng Liu;Zhanyu Liu;Guanjie Zheng;Hanyuan Shi;Yuhao Du;FULIANG LI;Zhenhui Li", "authorids": "~Chumeng_Liang2;~Zherui_Huang1;~Yicheng_Liu3;~Zhanyu_Liu1;~Guanjie_Zheng1;~Hanyuan_Shi1;~Yuhao_Du3;~FULIANG_LI1;~Zhenhui_Li1", "gender": "M;;M;M;M;;M;;", "homepage": "https://zheruihuang.com/;https://github.com/liuyc1515;https://zhyliu00.github.io/;http://jhc.sjtu.edu.cn/~gjzheng/;;;https://www.researchgate.net/profile/Fuliang-Li;;https://caradryanl.github.io", "dblp": ";;02/10777;204/3356;228/5537;;;;330/5426.html", "google_scholar": ";;VAnWz0IAAAAJ;jJpqDQIAAAAJ;;;;;4S0PYJYAAAAJ", "orcid": ";;0000-0001-6207-5460;;;;;;", "linkedin": ";;;;;;;;", "or_profile": "~Zherui_Huang1;~Yicheng_Liu3;~Zhanyu_Liu1;~Guanjie_Zheng1;~Hanyuan_Shi1;~Yuhao_Du3;~FULIANG_LI1;~Zhenhui_Li1;~Caradryan_Liang1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;;;Hong Kong Polytechnic University;Pennsylvania State Univ University Park;University of Southern California", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;;;polyu.edu.hk;;usc.edu", "position": "Undergrad student;Undergrad student;PhD student;Assistant Professor;;;PhD student;;MS student", "bibtex": "@misc{\nliang2023cblab,\ntitle={{CBL}ab: Scalable Traffic Simulation with Enriched Data Supporting},\nauthor={Chumeng Liang and Zherui Huang and Yicheng Liu and Zhanyu Liu and Guanjie Zheng and Hanyuan Shi and Yuhao Du and FULIANG LI and Zhenhui Li},\nyear={2023},\nurl={https://openreview.net/forum?id=5iqzNK-Qeb}\n}", "github": "", "project": "", "reviewers": "VNgb;4dnh;9smp;JNCH", "site": "https://openreview.net/forum?id=5iqzNK-Qeb", "pdf_size": 3865609, "recommendation": "6;6;6;8", "confidence": "4;3;3;4", "correctness": "3;3;3;3", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "93;29;94;92", "wc_strength_and_weaknesses": "183;113;648;67", "wc_clarity_quality_novelty_and_reproducibility": "190;16;104;25", "wc_summary_review": "130;22;370;37", "wc_review": "596;180;1216;221", "wc_reply_reviewers": "0;0;127;0", "wc_reply_authors": "515;453;2657;151", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;5;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 77.0, 27.721832551258224 ], "wc_strength_and_weaknesses_avg": [ 252.75, 231.90555728571923 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.75, 70.2508896740817 ], "wc_summary_review_avg": [ 139.75, 139.22351633255064 ], "wc_review_avg": [ 553.25, 415.56309689384113 ], "wc_reply_reviewers_avg": [ 31.75, 54.99261314031185 ], "wc_reply_authors_avg": [ 944.0, 998.5414362959606 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2648907501065642183&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1;2;3", "aff_unique_norm": "Shanghai Jiao Tong University;Hong Kong Polytechnic University;Pennsylvania State University;University of Southern California", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.polyu.edu.hk;https://www.psu.edu;https://www.usc.edu", "aff_unique_abbr": "SJTU;PolyU;PSU;USC", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Hong Kong SAR;University Park;Los Angeles", "aff_country_unique_index": "0;0;0;0;0;1;1", "aff_country_unique": "China;United States" }, { "id": "5jBBG-zgrwl", "title": "Individual Fairness of Data Provider Regarding Privacy Risk and Gain", "track": "main", "status": "Reject", "tldr": "We propose a new definition of individual fairness (IF) from the perspective of privacy protection and experimentally evaluate privacy-preserving machine learning based on the proposed IF. ", "abstract": "Fairness and privacy risks are important concerns of machine learning (ML) when deploying ML to the real world. Recent studies have focused on group fairness and privacy protection, but no study focuses on individual fairness (IF) and privacy protection. In this paper, we propose a new definition of IF from the perspective of privacy protection and experimentally evaluate privacy-preserving ML based on the proposed IF. For the proposed definition, we assume that users provide their data to an ML service and consider the principle that all users should obtain gains corresponding to their privacy risks. As a user's gain, we calculate the accuracy improvement on the user's data when providing the data to the ML service. We conducted experiments on the image and tabular datasets using three neural networks (NNs) and two tree-based algorithms with differential privacy guarantee. The experimental results of NNs show that we cannot stably improve the proposed IF by changing the strength of privacy protection and applying defenses against membership inference attacks. The results of tree-based algorithms show that privacy risks were extremely small without depending on the strength of privacy protection but raise a new question about the motivation of users for providing their data.", "keywords": "differential privacy;privacy-preserving machine learning;lower bound", "primary_area": "", "supplementary_material": "", "author": "Toshiki Shibahara;Takayuki Miura;Masanobu Kii;Atsunori Ichikawa", "authorids": "~Toshiki_Shibahara1;~Takayuki_Miura1;~Masanobu_Kii1;~Atsunori_Ichikawa1", "gender": "M;;M;M", "homepage": ";;;", "dblp": "168/9586;;;243/4463.html", "google_scholar": "udRFUnoAAAAJ;https://scholar.google.com/citations?hl=ja;https://scholar.google.co.jp/citations?hl=ja;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Toshiki_Shibahara1;~Takayuki_Miura1;~Masanobu_Kii1;~Atsunori_Ichikawa1", "aff": "NTT;NTT Social Informatics Laboratories;NTT, The University of Tokyo;NTT Corp.", "aff_domain": "ntt.co.jp;ntt.com;ntt.co.jp;ntt.co.jp", "position": "Researcher;Researcher;Researcher;Researcher", "bibtex": "@misc{\nshibahara2023individual,\ntitle={Individual Fairness of Data Provider Regarding Privacy Risk and Gain},\nauthor={Toshiki Shibahara and Takayuki Miura and Masanobu Kii and Atsunori Ichikawa},\nyear={2023},\nurl={https://openreview.net/forum?id=5jBBG-zgrwl}\n}", "github": "", "project": "", "reviewers": "fzWY;77T3;bzAv;MgQj", "site": "https://openreview.net/forum?id=5jBBG-zgrwl", "pdf_size": 1562144, "recommendation": "3;3;5;5", "confidence": "4;4;4;3", "correctness": "3;2;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "42;230;90;167", "wc_strength_and_weaknesses": "181;139;565;594", "wc_clarity_quality_novelty_and_reproducibility": "98;76;284;24", "wc_summary_review": "30;136;114;117", "wc_review": "351;581;1053;902", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 132.25, 71.92487400058481 ], "wc_strength_and_weaknesses_avg": [ 369.75, 210.52479070171285 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 120.5, 98.14657406145157 ], "wc_summary_review_avg": [ 99.25, 40.861809798392436 ], "wc_review_avg": [ 721.75, 273.6250856555371 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DKZueONIfuwJ:scholar.google.com/&scioq=Individual+Fairness+of+Data+Provider+Regarding+Privacy+Risk+and+Gain&hl=en&as_sdt=0,34", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "NTT Corporation;NTT Data;University of Tokyo", "aff_unique_dep": ";Social Informatics Laboratories;", "aff_unique_url": "https://www.ntt.co.jp;https://www.nttdata.com;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "NTT;NTT;UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "title": "Learning to Estimate Shapley Values with Vision Transformers", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11777", "id": "5ktFNz_pJLK", "poster": "/media/PosterPDFs/ICLR%202023/11777.png?t=1682805061.1995313", "openreview": "https://openreview.net/forum?id=5ktFNz_pJLK", "slides": "https://iclr.cc/virtual/2023/poster/11777", "video": "https://iclr.cc/virtual/2023/poster/11777", "author_site": "Ian Covert, Chanwoo Kim, Su-In Lee", "tldr": "A learning-based approach to efficiently calculate Shapley values for ViTs", "abstract": "Transformers have become a default architecture in computer vision, but understanding what drives their predictions remains a challenging problem. Current explanation approaches rely on attention values or input gradients, but these provide a limited view of a model\u2019s dependencies. Shapley values offer a theoretically sound alternative, but their computational cost makes them impractical for large, high-dimensional models. In this work, we aim to make Shapley values practical for vision transformers (ViTs). To do so, we first leverage an attention masking approach to evaluate ViTs with partial information, and we then develop a procedure to generate Shapley value explanations via a separate, learned explainer model. Our experiments compare Shapley values to many baseline methods (e.g., attention rollout, GradCAM, LRP), and we find that our approach provides more accurate explanations than existing methods for ViTs.", "keywords": "ViTs;Shapley values;amortization;explainability", "primary_area": "", "supplementary_material": "/attachment/cc200e79c97b312761ec59f9c0b11831c5ed8c95.zip", "author": "Ian Connick Covert;Chanwoo Kim;Su-In Lee", "authorids": "~Ian_Connick_Covert1;~Chanwoo_Kim3;~Su-In_Lee2", "gender": "M;;F", "homepage": "https://iancovert.com;https://chanwoo.kim;http://suinlee.cs.washington.edu/", "dblp": "262/3443;62/79-2;17/1784", "google_scholar": "Np8Ek3cAAAAJ;;", "orcid": ";;", "linkedin": "ian-covert/;;", "or_profile": "~Ian_Connick_Covert1;~Chanwoo_Kim3;~Su-In_Lee2", "aff": "University of Washington;Department of Computer Science, University of Washington;University of Washington", "aff_domain": "uw.edu;cs.washington.edu;uw.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ncovert2023learning,\ntitle={Learning to Estimate Shapley Values with Vision Transformers},\nauthor={Ian Connick Covert and Chanwoo Kim and Su-In Lee},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5ktFNz_pJLK}\n}", "github": "", "project": "", "reviewers": "trn3;qY28;eEy2;QpHf", "pdf_size": 26604097, "recommendation": "6;8;8;8", "confidence": "4;4;3;3", "correctness": "3;4;4;4", "technical_novelty": "3;3;3;2", "empirical_novelty": "2;4;0;2", "wc_summary_paper": "52;65;114;76", "wc_strength_and_weaknesses": "140;79;55;97", "wc_clarity_quality_novelty_and_reproducibility": "11;6;87;390", "wc_summary_review": "9;2;24;146", "wc_review": "212;152;280;709", "wc_reply_reviewers": "85;0;0;366", "wc_reply_authors": "1565;315;233;1558", "reply_reviewers": "1;0;0;2", "reply_authors": "4;1;1;4", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 76.75, 23.12331074911203 ], "wc_strength_and_weaknesses_avg": [ 92.75, 31.083556746292725 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 123.5, 157.17585692465622 ], "wc_summary_review_avg": [ 45.25, 58.708495978009864 ], "wc_review_avg": [ 338.25, 218.79028200539437 ], "wc_reply_reviewers_avg": [ 112.75, 150.2753722337762 ], "wc_reply_authors_avg": [ 917.75, 644.4072373119346 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9379341996847050714&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=5ktFNz_pJLK", "email": "uw.edu;cs.washington.edu;uw.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Recursive Time Series Data Augmentation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10784", "id": "5lgD4vU-l24s", "poster": "", "openreview": "https://openreview.net/forum?id=5lgD4vU-l24s", "slides": "https://iclr.cc/virtual/2023/poster/10784", "video": "https://iclr.cc/virtual/2023/poster/10784", "author_site": "Amine Aboussalah, Minjae Kwon, Raj Patel, Cheng Chi, Chi-Guhn Lee", "tldr": "", "abstract": "Time series observations can be seen as realizations of an underlying dynamical system governed by rules that we typically do not know. For time series learning tasks we create our model using available data. Training on available realizations, where data is limited, often induces severe over-fitting thereby preventing generalization. To address this issue, we introduce a general recursive framework for time series augmentation, which we call the Recursive Interpolation Method (RIM). New augmented time series are generated using a recursive interpolation function from the original time series for use in training. We perform theoretical analysis to characterize the proposed RIM and to guarantee its performance under certain conditions. We apply RIM to diverse synthetic and real-world time series cases to achieve strong performance over non-augmented data on a variety of learning tasks. Our method is also computationally more efficient and leads to better performance when compared to state of the art time series data augmentation.\n", "keywords": "Time Series;Data augmentation;Representation Learning;Deep Learning;Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Amine Mohamed Aboussalah;Minjae Kwon;Raj G Patel;Cheng Chi;Chi-Guhn Lee", "authorids": "~Amine_Mohamed_Aboussalah1;~Minjae_Kwon1;~Raj_G_Patel1;~Cheng_Chi3;~Chi-Guhn_Lee1", "gender": "M;;;M;M", "homepage": "https://engineering.nyu.edu/faculty/amine-mohamed-aboussalah;http://github.com/kmj901122;;;http://cglee.mie.utoronto.ca", "dblp": ";;;;62/4690", "google_scholar": ";;;;https://scholar.google.ca/citations?user=ZpALG2AAAAAJ", "orcid": ";;;;0000-0002-0916-0241", "linkedin": ";;raj-patel-52298aa8;www.linkedin.com/in/cheng-chi-2a603823b;", "or_profile": "~Amine_Mohamed_Aboussalah1;~Minjae_Kwon1;~Raj_G_Patel1;~Cheng_Chi3;~Chi-Guhn_Lee1", "aff": "New York University;University of Virginia, Charlottesville;University of Toronto;University of Toronto;University of Toronto", "aff_domain": "nyu.edu;virginia.edu;utoronto.ca;utoronto.ca;mie.utoronto.ca", "position": "Assistant Professor;PhD student;MS student;MS student;Full Professor", "bibtex": "@inproceedings{\naboussalah2023recursive,\ntitle={Recursive Time Series Data Augmentation},\nauthor={Amine Mohamed Aboussalah and Minjae Kwon and Raj G Patel and Cheng Chi and Chi-Guhn Lee},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5lgD4vU-l24s}\n}", "github": "", "project": "", "reviewers": "oRTw;aEoM;Ftkr;LCdk", "pdf_size": 1813160, "recommendation": "5;5;6;10", "confidence": "3;3;3;4", "correctness": "3;3;3;4", "technical_novelty": "3;3;3;4", "empirical_novelty": "3;2;3;4", "wc_summary_paper": "138;55;54;47", "wc_strength_and_weaknesses": "300;418;104;22", "wc_clarity_quality_novelty_and_reproducibility": "104;2;1;5", "wc_summary_review": "136;14;18;56", "wc_review": "678;489;177;130", "wc_reply_reviewers": "77;0;27;0", "wc_reply_authors": "900;1319;179;6", "reply_reviewers": "1;0;1;0", "reply_authors": "6;5;1;1", "recommendation_avg": [ 6.5, 2.0615528128088303 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 73.5, 37.36642878306676 ], "wc_strength_and_weaknesses_avg": [ 211.0, 156.4768353463221 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.0, 43.90330283703038 ], "wc_summary_review_avg": [ 56.0, 49.01020301937138 ], "wc_review_avg": [ 368.5, 225.75705969027857 ], "wc_reply_reviewers_avg": [ 26.0, 31.44041984452498 ], "wc_reply_authors_avg": [ 601.0, 533.1589819181517 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 2.277608394786075 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.9801960588196067, "corr_recommendation_correctness": 0.9801960588196067, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11165725864158746808&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=5lgD4vU-l24s", "email": "nyu.edu;virginia.edu;utoronto.ca;utoronto.ca;mie.utoronto.ca", "author_num": 5, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "New York University;University of Virginia;University of Toronto", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nyu.edu;https://www.virginia.edu;https://www.utoronto.ca", "aff_unique_abbr": "NYU;UVA;U of T", "aff_campus_unique_index": "1", "aff_campus_unique": ";Charlottesville", "aff_country_unique_index": "0;0;1;1;1", "aff_country_unique": "United States;Canada" }, { "id": "5m_3whfo483", "title": "ETSformer: Exponential Smoothing Transformers for Time-series Forecasting", "track": "main", "status": "Reject", "tldr": "We propose an interpretable Transformer architecture which decomposes forecasts into level, growth, and seasonality components.", "abstract": "Transformers have recently been actively studied for time-series forecasting. While often showing promising results in various scenarios, traditional Transformers are not designed to fully exploit the characteristics of time-series data and thus suffer some fundamental limitations, e.g., they are generally not decomposable or interpretable, and are neither effective nor efficient for long-term forecasting. In this paper, we propose ETSformer, a novel time-series Transformer architecture, which exploits the principle of exponential smoothing methods in improving Transformers for time-series forecasting. Specifically, ETSformer leverages a novel level-growth-seasonality decomposed Transformer architecture which leads to more interpretable and disentangled decomposed forecasts. We further propose two novel attention mechanisms -- the exponential smoothing attention and frequency attention, which are specially designed to overcome the limitations of the vanilla attention mechanism for time-series data. Extensive experiments on various time-series benchmarks validate the efficacy and advantages of the proposed method. Code is attached in the supplementary material, and will be made publicly available. ", "keywords": "time-series;forecasting;transformer;decomposition;season-trend;interpretable", "primary_area": "", "supplementary_material": "/attachment/d308dba1a0d58717ab828aa821a3c65d4956dccc.zip", "author": "Gerald Woo;Chenghao Liu;Doyen Sahoo;Akshat Kumar;Steven Hoi", "authorids": "~Gerald_Woo1;~Chenghao_Liu1;~Doyen_Sahoo1;~Akshat_Kumar2;~Steven_Hoi2", "gender": "M;M;M;M;M", "homepage": ";;https://www.linkedin.com/in/doyensahoo/?originalSubdomain=sg;http://www.smu.edu.sg/faculty/profile/102291/Akshat-KUMAR;http://stevenhoi.com", "dblp": "246/5297;;151/3155;73/193;", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com.sg/citations?hl=en;https://scholar.google.com.tw/citations?user=zsYC3R0AAAAJ;JoLjflYAAAAJ", "orcid": ";;;;", "linkedin": "gerald-woo/;chenghao-liu-40a62a56/;doyensahoo/?originalSubdomain=sg;;", "or_profile": "~Gerald_Woo1;~Chenghao_Liu1;~Doyen_Sahoo1;~Akshat_Kumar2;~Steven_Hoi2", "aff": "Singapore Management University;Salesforce AI Research;SalesForce.com;Singapore Management University;Singapore Management University", "aff_domain": "smu.edu.sg;salesforce.com;salesforce.com;smu.edu.sg;smu.edu.sg", "position": "PhD student;Researcher;Researcher;Associate Professor;Associate Professor", "bibtex": "@misc{\nwoo2023etsformer,\ntitle={{ETS}former: Exponential Smoothing Transformers for Time-series Forecasting},\nauthor={Gerald Woo and Chenghao Liu and Doyen Sahoo and Akshat Kumar and Steven Hoi},\nyear={2023},\nurl={https://openreview.net/forum?id=5m_3whfo483}\n}", "github": "", "project": "", "reviewers": "cG8G;bgQv;9rNK;7t86", "site": "https://openreview.net/forum?id=5m_3whfo483", "pdf_size": 1129622, "recommendation": "3;5;5;6", "confidence": "4;3;4;4", "correctness": "2;2;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "45;70;59;106", "wc_strength_and_weaknesses": "249;446;117;241", "wc_clarity_quality_novelty_and_reproducibility": "20;21;13;25", "wc_summary_review": "12;24;115;94", "wc_review": "326;561;304;466", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1233;1404;549;1085", "reply_reviewers": "0;0;0;0", "reply_authors": "5;5;4;5", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 70.0, 22.594247055390007 ], "wc_strength_and_weaknesses_avg": [ 263.25, 117.77600562083943 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 19.75, 4.322904116447646 ], "wc_summary_review_avg": [ 61.25, 44.08727140570167 ], "wc_review_avg": [ 414.25, 105.06753780307217 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1067.75, 320.0666922689707 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 4.75, 0.4330127018922193 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.13245323570650439, "corr_recommendation_correctness": 0.6882472016116854, "gs_citation": 318, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1694313711004546611&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Singapore Management University;Salesforce", "aff_unique_dep": ";Salesforce AI Research", "aff_unique_url": "https://www.smu.edu.sg;https://www.salesforce.com", "aff_unique_abbr": "SMU;Salesforce AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "Singapore;United States" }, { "title": "SP2 : A Second Order Stochastic Polyak Method", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11078", "id": "5mqFra2ZSuf", "poster": "/media/PosterPDFs/ICLR%202023/11078.png?t=1681061042.5275857", "openreview": "https://openreview.net/forum?id=5mqFra2ZSuf", "slides": "https://iclr.cc/virtual/2023/poster/11078", "video": "https://iclr.cc/virtual/2023/poster/11078", "author_site": "Shuang Li, William Swartworth, Martin Tak\u00e1\u010d, Deanna Needell, Robert M. Gower", "tldr": "", "abstract": "Recently the SP (Stochastic Polyak step size) method has emerged as a competitive adaptive method for setting the step sizes of SGD. SP can be interpreted as a method specialized to interpolated models, since it solves the interpolation equations. SP solves these equation by using local linearizations of the model. We take a step further and develop a method for solving the interpolation equations that uses the local second-order approximation of the model. Our resulting method SP2 uses Hessian-vector products to speed-up the convergence of SP. Furthermore, and rather uniquely among second-order methods, the design of SP2 in no way relies on positive definite Hessian matrices or convexity of the objective function. We show SP2 is competitive both in experiments and in theory. \nWe show SP2 is very competitive on matrix completion, non-convex test problems and logistic regression. We also provide a convergence theory on sums-of-quadratics.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/2b24e532457ff07e9d899d7f0abd6ce2e86d4282.zip", "author": "Shuang Li;William Joseph Swartworth;Martin Tak\u00e1\u010d;Deanna Needell;Robert M. Gower", "authorids": "~Shuang_Li9;~William_Joseph_Swartworth1;~Martin_Tak\u00e1\u010d1;~Deanna_Needell2;~Robert_M._Gower1", "gender": "F;;Not Specified;M;M", "homepage": "https://www.ece.iastate.edu/~lishuang/;https://www.math.ucla.edu/~wswartworth/;https://www.math.ucla.edu/~deanna/index.html;https://gowerrobert.github.io/;http://mtakac.com", "dblp": ";;03/2691;143/0056;42/3759-1.html", "google_scholar": "mzDw-lwAAAAJ;;;okKw87MAAAAJ;qKQD-2cAAAAJ", "orcid": ";;0000-0002-8058-8638;;0000-0001-7455-2025", "linkedin": ";;;;martintakac/", "or_profile": "~Shuang_Li9;~William_Joseph_Swartworth1;~Deanna_Needell2;~Robert_M._Gower1;~Martin_Takac3", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;Flatiron Institute;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "ucla.edu;ucla.edu;ucla.edu;simonsfoundation.org;mbzuai.ac.ae", "position": "Postdoc;PhD student;Full Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nli2023sp,\ntitle={{SP}2 : A Second Order Stochastic Polyak Method},\nauthor={Shuang Li and William Joseph Swartworth and Martin Tak{\\'a}{\\v{c}} and Deanna Needell and Robert M. Gower},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5mqFra2ZSuf}\n}", "github": "", "project": "", "reviewers": "etGz;fkL2;DF7u", "pdf_size": 3275477, "recommendation": "5;6;6", "confidence": "4;5;4", "correctness": "3;4;3", "technical_novelty": "2;3;3", "empirical_novelty": "0;3;2", "wc_summary_paper": "126;105;71", "wc_strength_and_weaknesses": "23;640;152", "wc_clarity_quality_novelty_and_reproducibility": "22;19;22", "wc_summary_review": "147;85;29", "wc_review": "318;849;274", "wc_reply_reviewers": "48;539;64", "wc_reply_authors": "630;2426;621", "reply_reviewers": "1;7;1", "reply_authors": "7;18;8", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 100.66666666666667, 22.661764175711376 ], "wc_strength_and_weaknesses_avg": [ 271.6666666666667, 265.72207703204157 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 21.0, 1.4142135623730951 ], "wc_summary_review_avg": [ 87.0, 48.194052191807515 ], "wc_review_avg": [ 480.3333333333333, 261.3048453860399 ], "wc_reply_reviewers_avg": [ 217.0, 227.7820595803512 ], "wc_reply_authors_avg": [ 1225.6666666666667, 848.7717923891885 ], "reply_reviewers_avg": [ 3.0, 2.8284271247461903 ], "reply_authors_avg": [ 11.0, 4.96655480858378 ], "replies_avg": [ 51, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13205340829069169268&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=5mqFra2ZSuf", "email": "ucla.edu;ucla.edu;ucla.edu;simonsfoundation.org;mbzuai.ac.ae", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "University of California, Los Angeles;Flatiron Institute;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucla.edu;https://flatironinstitute.org;https://mbzuai.ac.ae", "aff_unique_abbr": "UCLA;Flatiron;MBZUAI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;United Arab Emirates" }, { "id": "5o8oFs5D9Z", "title": "SurCo: Learning Linear Surrogates for Combinatorial Nonlinear Optimization Problems", "track": "main", "status": "Reject", "tldr": "SurCo learns linear surrogate problems for nonlinear combinatorial optimization by training high-quality linear surrogates using end-to-end gradient descent with better performance in two industrial domains", "abstract": "Optimization problems with expensive nonlinear cost functions and combinatorial constraints appear in many real-world applications, but remain challenging to solve efficiently. Existing combinatorial solvers like Mixed Integer Linear Programming can be fast in practice but cannot readily optimize nonlinear cost functions, while general nonlinear optimizers like gradient descent often do not handle complex combinatorial structures, may require many queries of the cost function, and are prone to local optima. To bridge this gap, we propose SurCo that learns linear Surrogate costs which can be used by existing Combinatorial solvers to output good solutions to the original nonlinear combinatorial optimization problem, combining the flexibility of gradient-based methods with the structure of linear combinatorial optimization. We learn these linear surrogates end-to-end with the nonlinear loss by differentiating through the linear surrogate solver. Three variants of SurCo are proposed: SurCo-zero operates on individual nonlinear problems, SurCo-prior trains a linear surrogate predictor on distributions of problems, and SurCo-hybrid uses a model trained offline to warm start online solving for SurCo-zero. We analyze our method theoretically and empirically, showing smooth convergence and improved performance. Experiments show that compared to state-of-the-art approaches and expert-designed heuristics, SurCo obtains lower cost solutions with comparable or faster solve time for two real-world industry-level applications: embedding table sharding and inverse photonic design.", "keywords": "Differentiable Optimization;Machine Learning;Nonlinear Optimization;Combinatorial Optimization", "primary_area": "", "supplementary_material": "", "author": "Aaron M Ferber;Taoan Huang;Daochen Zha;Martin Schubert;Benoit Steiner;Bistra Dilkina;Yuandong Tian", "authorids": "~Aaron_M_Ferber1;~Taoan_Huang2;~Daochen_Zha1;mfschubert@fb.com;~Benoit_Steiner1;~Bistra_Dilkina2;~Yuandong_Tian1", "gender": "M;M;;;;F;M", "homepage": "https://aaron-ferber.github.io/;;http://dczha.com/;;https://bsteiner.info;;http://yuandong-tian.com", "dblp": "163/7788;241/7690;167/0903;;177/9377;30/5718;t/YuandongTian", "google_scholar": "TuVq07oAAAAJ;;jK0NgMcAAAAJ;;rT11mdcAAAAJ;1jjyaBYAAAAJ;0mgEF28AAAAJ", "orcid": ";;0000-0002-6677-7504;;;0000-0002-6784-473X;0000-0003-4202-4847", "linkedin": "aaron-ferber-64a73980/;;daochen-zha;;;;yuandongtian", "or_profile": "~Aaron_M_Ferber1;~Taoan_Huang2;~Daochen_Zha1;mfschubert@fb.com;~Benoit_Steiner1;~Bistra_Dilkina2;~Yuandong_Tian1", "aff": "University of Southern California;University of Southern California;Rice University;;Meta Facebook;University of Southern California;Meta AI (FAIR)", "aff_domain": "usc.edu;usc.edu;rice.edu;;fb.com;usc.edu;meta.com", "position": "PhD student;PhD student;PhD student;;Researcher;Associate Professor;Research Scientist", "bibtex": "@misc{\nferber2023surco,\ntitle={SurCo: Learning Linear Surrogates for Combinatorial Nonlinear Optimization Problems},\nauthor={Aaron M Ferber and Taoan Huang and Daochen Zha and Martin Schubert and Benoit Steiner and Bistra Dilkina and Yuandong Tian},\nyear={2023},\nurl={https://openreview.net/forum?id=5o8oFs5D9Z}\n}", "github": "", "project": "", "reviewers": "r1VR;n7J2;qvJ5", "site": "https://openreview.net/forum?id=5o8oFs5D9Z", "pdf_size": 647110, "recommendation": "5;6;8", "confidence": "4;3;3", "correctness": "2;3;3", "technical_novelty": "2;2;4", "empirical_novelty": "3;2;3", "wc_summary_paper": "83;99;142", "wc_strength_and_weaknesses": "404;370;382", "wc_clarity_quality_novelty_and_reproducibility": "497;48;19", "wc_summary_review": "86;100;20", "wc_review": "1070;617;563", "wc_reply_reviewers": "415;134;0", "wc_reply_authors": "631;171;117", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 108.0, 24.91318258807306 ], "wc_strength_and_weaknesses_avg": [ 385.3333333333333, 14.079141387961917 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 188.0, 218.81651369735937 ], "wc_summary_review_avg": [ 68.66666666666667, 34.883934538536344 ], "wc_review_avg": [ 750.0, 227.34555196880365 ], "wc_reply_reviewers_avg": [ 183.0, 172.9296581465038 ], "wc_reply_authors_avg": [ 306.3333333333333, 230.63005489793002 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.7559289460184545, "corr_recommendation_correctness": 0.7559289460184545, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2918719716302112676&as_sdt=2005&sciodt=0,5&hl=en&oe=ASCII", "gs_version_total": 7, "aff_unique_index": "0;0;1;2;0;2", "aff_unique_norm": "University of Southern California;Rice University;Meta", "aff_unique_dep": ";;Meta Platforms, Inc.", "aff_unique_url": "https://www.usc.edu;https://www.rice.edu;https://meta.com", "aff_unique_abbr": "USC;Rice;Meta", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "5ohslQBnxUw", "title": "On the Convergence of Gradient Flow on Multi-layer Linear Models", "track": "main", "status": "Reject", "tldr": "We study how initialization affect the convergence of gradient flow on multi-layer linear networks", "abstract": "In this paper, we analyze the convergence of gradient flow on a multi-layer linear model with a loss function of the form $f(W_1W_2\\cdots W_L)$. We show that when $f$ satisfies the gradient dominance property, proper weight initialization leads to exponential convergence of the gradient flow to a global minimum of the loss. Moreover, the convergence rate depends on two trajectory-specific quantities that are controlled by the weight initialization: the \\emph{imbalance matrices}, which measure the difference between the weights of adjacent layers, and the least singular value of the \\emph{weight product} $W=W_1W_2\\cdots W_L$. Our analysis provides improved rate bounds for several multi-layer network models studied in the literature, leading to novel characterizations of the effect of weight imbalance on the rate of convergence. Our results apply to most regression losses and extend to classification ones.", "keywords": "Multi-layer Linear Networks;Non-convex optimization;Gradient Flow;Training invariance", "primary_area": "", "supplementary_material": "", "author": "Hancheng Min;Rene Vidal;Enrique Mallada", "authorids": "~Hancheng_Min1;~Rene_Vidal1;~Enrique_Mallada1", "gender": "M;;M", "homepage": "https://hanchmin.github.io/;http://www.vision.jhu.edu;http://mallada.ece.jhu.edu", "dblp": "226/6324;v/ReneVidal;", "google_scholar": "XgQjPZIAAAAJ;https://scholar.google.com/citations?hl=en;ZvRFA04AAAAJ", "orcid": ";;0000-0003-1568-1833", "linkedin": ";rene-vidal-74844928/;emallada/", "or_profile": "~Hancheng_Min1;~Rene_Vidal1;~Enrique_Mallada1", "aff": "Johns Hopkins University;Amazon;Johns Hopkins University", "aff_domain": "jhu.edu;amazon.com;jhu.edu", "position": "PhD student;Principal Researcher;Associate Professor", "bibtex": "@misc{\nmin2023on,\ntitle={On the Convergence of Gradient Flow on Multi-layer Linear Models},\nauthor={Hancheng Min and Rene Vidal and Enrique Mallada},\nyear={2023},\nurl={https://openreview.net/forum?id=5ohslQBnxUw}\n}", "github": "", "project": "", "reviewers": "h9zp;QQB2;5qgv", "site": "https://openreview.net/forum?id=5ohslQBnxUw", "pdf_size": 471899, "recommendation": "3;6;6", "confidence": "4;3;4", "correctness": "2;4;4", "technical_novelty": "3;3;3", "empirical_novelty": "0;0;3", "wc_summary_paper": "152;71;49", "wc_strength_and_weaknesses": "587;260;35", "wc_clarity_quality_novelty_and_reproducibility": "18;44;22", "wc_summary_review": "26;85;72", "wc_review": "783;460;178", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1266;38;199", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 1.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 90.66666666666667, 44.28945196720722 ], "wc_strength_and_weaknesses_avg": [ 294.0, 226.63186007267382 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.0, 11.430952132988164 ], "wc_summary_review_avg": [ 61.0, 25.311394008759507 ], "wc_review_avg": [ 473.6666666666667, 247.17919725485712 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 501.0, 544.9152839356469 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 1.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15041935046689134961&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0", "aff_unique_norm": "Johns Hopkins University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.jhu.edu;https://www.amazon.com", "aff_unique_abbr": "JHU;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "5p4wvBz9xIe", "title": "HeatDETR: Hardware-Efficient DETR with Device-Adaptive Thinning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Vision transformers (ViTs) have continuously achieved new milestones in computer vision.\nA natural usage of ViTs in detection is to replace the CNN-based backbone with a transformer-based backbone directly, but with the price of considerable computation burden for their deployment on resource-limited edge devices.\nMore potential usage is the DETR family, which eliminates the need for many hand-designed components in object detection but still cannot reach real-time edge applications.\nIn this paper, we propose a novel hardware-efficient adaptive-thinning DETR (HeatDETR), achieving high speed inference on multiple edge devices and even the realtime, for the first time. \nSpecifically, it mainly includes three contributions: \n1) For decent detection performance, we introduce a backbone design principle based on the visual modeling process that focuses on locality to globality. Meanwhile, we propose a semantic-augmented module (SAM) in the backbone with the global modeling capabilities of self-attention to enhance low-level semantics. We also introduce an attention-based task-couple module (TCM) to reduce contradictions between classification and regression tasks.\n2) For on-device efficiency, we propose a scale-combined module (SCM), through which we transform the multi-level detection process into the single-level process, releasing the multi-branch inference for higher hardware speed while maintaining detection performance.\nThen we first revisit network architectures and operators used in ViT-based models, reparametered CNNs, identify hardware-efficient design and introduce basic HeatDETR structure.\n3) Based on our device-adaptive model-thinning strategy, deployable end-to-end HeatDETR on target devices can be generated efficiently.\nExperiments on the MS COCO dataset show HeatDETR outperforms current DETR-based methods by 0.3%~6.2% AP with 5%~68% speedup on a single Tesla V100.\nEven real-time inference can be achieved on extremely memory-constrained devices, e.g., Dual-Core Intel Core i7 CPU.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/65b3c853712320e4d27ae04c7f051cb12ae59e03.zip", "author": "Peiyan Dong;Xin Meng;PENG ZHANG;Hao Tang;Yanzhi Wang;Chih-Hsien Chou", "authorids": "~Peiyan_Dong1;~Xin_Meng1;~PENG_ZHANG27;~Hao_Tang6;~Yanzhi_Wang3;chih.hsien.chou@futurewei.com", "gender": "F;M;M;M;M;", "homepage": "https://peiyanflying.github.io/Peggy_Peiyan.github.io/;https://www.linkedin.com/in/%E9%91%AB-%E5%AD%9F-b45849175/;;https://ha0tang.github.io/;https://web.northeastern.edu/yanzhiwang/;", "dblp": "254/1329;;21/1048-70.html;07/5751-5;;", "google_scholar": "OGU3CVoAAAAJ;;https://scholar.google.com/citations?view_op=list_works;9zJkeEMAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";0000-0003-2228-0587;;0000-0002-2077-1246;;", "linkedin": ";%E9%91%AB-%E5%AD%9F-b45849175/;;hao-tang-887475138/;;", "or_profile": "~Peiyan_Dong1;~Xin_Meng1;~PENG_ZHANG27;~Hao_Tang6;~Yanzhi_Wang3;chih.hsien.chou@futurewei.com", "aff": "Northeastern University;NVIDIA;;ETH Zurich;Northeastern University;", "aff_domain": "northeastern.edu;nvidia.com;;vision.ee.ethz.ch;northeastern.edu;", "position": "PhD student;Researcher;;Postdoc;Associate Professor;", "bibtex": "@misc{\ndong2023heatdetr,\ntitle={Heat{DETR}: Hardware-Efficient {DETR} with Device-Adaptive Thinning},\nauthor={Peiyan Dong and Xin Meng and PENG ZHANG and Hao Tang and Yanzhi Wang and Chih-Hsien Chou},\nyear={2023},\nurl={https://openreview.net/forum?id=5p4wvBz9xIe}\n}", "github": "", "project": "", "reviewers": "RjAd;Mbgz;1XTf", "site": "https://openreview.net/forum?id=5p4wvBz9xIe", "pdf_size": 2101396, "recommendation": "3;3;5", "confidence": "4;3;3", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "57;48;135", "wc_strength_and_weaknesses": "371;144;391", "wc_clarity_quality_novelty_and_reproducibility": "291;17;35", "wc_summary_review": "84;2;34", "wc_review": "803;211;595", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 80.0, 39.06404996924922 ], "wc_strength_and_weaknesses_avg": [ 302.0, 112.02083139606967 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 114.33333333333333, 125.13814588507996 ], "wc_summary_review_avg": [ 40.0, 33.7441352929167 ], "wc_review_avg": [ 536.3333333333334, 245.21736389488316 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5KhEtaHF0D8J:scholar.google.com/&scioq=HeatDETR:+Hardware-Efficient+DETR+with+Device-Adaptive+Thinning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Northeastern University;NVIDIA;ETH Zurich", "aff_unique_dep": ";NVIDIA Corporation;", "aff_unique_url": "https://www.northeastern.edu;https://www.nvidia.com;https://www.ethz.ch", "aff_unique_abbr": "NEU;NVIDIA;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Switzerland" }, { "id": "5pU6126YRp", "title": "TT-Rules: Extracting & Optimizing Exact Rules of a CNN-Based Model - Application to Fairness", "track": "main", "status": "Withdraw", "tldr": "In this work, we proposed an optimized new CNN-based framework for global and exact interpretability with application to healthcare and fairness tabular datasets.", "abstract": "Most Machine Learning (ML) models are ``black box'' models, but in critical domains such as healthcare, energy, finance, military, or justice, they need to be globally and exactly interpretable. Creating ML models convertible by design into rule-based models is an attractive solution: they produce all the rules (global nature of interpretability) that allow us to obtain exactly the output result (exact nature of interpretability). Today, these rule-based models are mainly decision trees, whose natural interpretability is outweighed by their poor performances and scalability. In this paper, we offer a new three-step framework, TT-rules, that extracts and optimizes exact rules from a recent family of Convolution Neural Networks (CNNs) called Truth Table nets (TTnets). First, we show how to extract rules $\\mathcal{R}$ in Disjunction Normal Form (DNF) from TTnets, which we adapt and enhance for tabular datasets. Secondly, we explain how the TT-rules framework permits the optimization of two key interpretability factors, namely the number of rules and their size, transforming the original set $\\mathcal{R}$ into an optimized $\\mathcal{R}_{opt}$. Our rule-based model is thus composed of $\\mathcal{R}_{opt}$ with a final binary linear regression and allows multi-label classification. In a third step, we improve the rules' visualization by converting them into Reduced Ordered Binary Decision Diagrams (ROBDD) and enriching them by computing interesting associated probabilities. To evaluate TT-rules' performances, we applied it to two tabular healthcare datasets and two fairness datasets. Our framework reaches competitive results compared to state-of-the-art rule-based models in terms of accuracy, complexity, and statistical parity, also giving exact and global interpretability. In addition, we show that practitioners can use their domain knowledge to diagnose individual fairness of a given TT-rules model by analyzing and further modifying the rules $\\mathcal{R}_{opt}$. As an example of the compactness of our framework's output, we draw all the rules in $\\mathcal{R}_{opt}$ for one model on the Adult dataset (only 15 conditions for an 84.6\\% accuracy).", "keywords": "global & exact interpretability;convolutional neural-networks;rule-based model for fairness", "primary_area": "", "supplementary_material": "", "author": "Adrien Benamira;Tristan Gu\u00e9rand;Thomas Peyrin", "authorids": "~Adrien_Benamira1;~Tristan_Gu\u00e9rand1;~Thomas_Peyrin1", "gender": "M;M;M", "homepage": ";;https://thomaspeyrin.github.io/web/", "dblp": ";339/6723;p/ThomasPeyrin", "google_scholar": "k3jLkWUAAAAJ;eHI8-D4AAAAJ;", "orcid": ";0009-0004-1480-9617;", "linkedin": ";tristanguerand/;", "or_profile": "~Adrien_Benamira1;~Tristan_Gu\u00e9rand1;~Thomas_Peyrin1", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "position": "PhD student;PhD student;Full Professor", "bibtex": "@misc{\nbenamira2023ttrules,\ntitle={{TT}-Rules: Extracting \\& Optimizing Exact Rules of a {CNN}-Based Model - Application to Fairness},\nauthor={Adrien Benamira and Tristan Gu{\\'e}rand and Thomas Peyrin},\nyear={2023},\nurl={https://openreview.net/forum?id=5pU6126YRp}\n}", "github": "", "project": "", "reviewers": "UXXs;xHxD;SpTW;c2Ly", "site": "https://openreview.net/forum?id=5pU6126YRp", "pdf_size": 1845198, "recommendation": "1;3;3;3", "confidence": "4;4;3;4", "correctness": "2;3;3;3", "technical_novelty": "1;2;2;2", "empirical_novelty": "1;3;2;0", "wc_summary_paper": "167;68;116;44", "wc_strength_and_weaknesses": "451;183;359;91", "wc_clarity_quality_novelty_and_reproducibility": "142;50;27;37", "wc_summary_review": "34;34;12;157", "wc_review": "794;335;514;329", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 98.75, 47.16659305059037 ], "wc_strength_and_weaknesses_avg": [ 271.0, 141.6756859873987 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.0, 45.765707685995636 ], "wc_summary_review_avg": [ 59.25, 57.146194098994904 ], "wc_review_avg": [ 493.0, 189.01190438699885 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kCUuTY_oePEJ:scholar.google.com/&scioq=TT-Rules:+Extracting+%26+Optimizing+Exact+Rules+of+a+CNN-Based+Model+-+Application+to+Fairness&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "id": "5rX7M4wa2R_", "title": "On Regularization for Explaining Graph Neural Networks: An Information Theory Perspective", "track": "main", "status": "Reject", "tldr": "We rethink the role of regularization in GNNs explainability from the perspective of information theory, and propose four intriguing propositions of regularization. ", "abstract": "This work studies the explainability of graph neural networks (GNNs), which is important for the credibility of GNNs in practical usage. Existing work mostly follows the two-phase paradigm to interpret a prediction: feature attribution and selection. However, another important component --- regularization, which is crucial to facilitate the above paradigm --- has been seldom studied. In this work, we explore the role of regularization in GNNs explainability from the perspective of information theory. Our main findings are: 1) regularization is essentially pursuing the balance between two phases, 2) its optimal coefficient is proportional to the sparsity of explanations, 3) existing methods imply an implicit regularization effect of stochastic mechanism, and 4) its contradictory effects on two phases are responsible for the out-of-distribution (OOD) issue in post-hoc explainability. Based on these findings, we propose two common optimization methods, which can bolster the performance of the current explanation methods via sparsity-adaptive and OOD-resistant regularization schemes. Extensive empirical studies validate our findings and proposed methods. Code is available at https://anonymous.4open.science/r/Rethink_Reg-07F0.\n", "keywords": "Explainability;Graph Neural Networks;Regularization", "primary_area": "", "supplementary_material": "", "author": "Junfeng Fang;Wei Liu;An Zhang;Xiang Wang;Xiangnan He;Kun Wang;Tat-Seng Chua", "authorids": "~Junfeng_Fang1;~Wei_Liu34;~An_Zhang2;~Xiang_Wang6;~Xiangnan_He1;~Kun_Wang15;~Tat-Seng_Chua2", "gender": "M;M;M;M;M;F;M", "homepage": "https://scholar.google.com/citations?user=beNNywsAAAAJ&hl=zh-CN;;https://github.com/xiangwang1223;http://staff.ustc.edu.cn/~hexn;http://home.ustc.edu.cn/~wk520529/#home;https://github.com/anzhang314;http://www.comp.nus.edu.sg/~chuats/", "dblp": "340/7929;;31/2864-10;59/1007;;78/5581-3;", "google_scholar": "beNNywsAAAAJ;9Hd32GUAAAAJ;https://scholar.google.com.sg/citations?user=HdhaQB0AAAAJ;https://scholar.google.com.sg/citations?user=X45Go24AAAAJ;UnyqjWQAAAAJ;https://scholar.google.com.sg/citations?user=BcX7GJcAAAAJ;https://scholar.google.com.tw/citations?user=Z9DWCBEAAAAJ", "orcid": ";;0000-0002-6148-6329;0000-0001-8472-7992;0000-0003-0602-169X;;0000-0001-6097-7807", "linkedin": ";;;;;;", "or_profile": "~Junfeng_Fang1;~Wei_Liu34;~Xiang_Wang6;~Xiangnan_He1;~Kun_Wang15;~AN_ZHANG1;~Tat-seng_Chua1", "aff": ";University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;National University of Singapore;National University of Singapore", "aff_domain": ";ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;nus.edu.sg;nus.edu.sg", "position": ";MS student;Full Professor;Professor;PhD student;Postdoc;Full Professor", "bibtex": "@misc{\nfang2023on,\ntitle={On Regularization for Explaining Graph Neural Networks: An Information Theory Perspective},\nauthor={Junfeng Fang and Wei Liu and An Zhang and Xiang Wang and Xiangnan He and Kun Wang and Tat-Seng Chua},\nyear={2023},\nurl={https://openreview.net/forum?id=5rX7M4wa2R_}\n}", "github": "", "project": "", "reviewers": "eWP9;SjeN;HoXU", "site": "https://openreview.net/forum?id=5rX7M4wa2R_", "pdf_size": 2734896, "recommendation": "1;6;6", "confidence": "3;2;4", "correctness": "2;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "63;38;84", "wc_strength_and_weaknesses": "214;138;406", "wc_clarity_quality_novelty_and_reproducibility": "60;54;42", "wc_summary_review": "30;15;67", "wc_review": "367;245;599", "wc_reply_reviewers": "918;90;125", "wc_reply_authors": "1311;657;1500", "reply_reviewers": "5;1;1", "reply_authors": "3;2;3", "recommendation_avg": [ 4.333333333333333, 2.357022603955158 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 61.666666666666664, 18.80307303489394 ], "wc_strength_and_weaknesses_avg": [ 252.66666666666666, 112.77509575354934 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 52.0, 7.483314773547883 ], "wc_summary_review_avg": [ 37.333333333333336, 21.853044537445015 ], "wc_review_avg": [ 403.6666666666667, 146.82718488829724 ], "wc_reply_reviewers_avg": [ 377.6666666666667, 382.3404532902871 ], "wc_reply_authors_avg": [ 1156.0, 361.1841635509508 ], "reply_reviewers_avg": [ 2.3333333333333335, 1.8856180831641267 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8660254037844387, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6674957004365897002&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;1;1", "aff_unique_norm": "University of Science and Technology of China;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "USTC;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;1", "aff_country_unique": "China;Singapore" }, { "id": "5s2v_0F7MG", "title": "OrthoReg: Improving Graph-regularized MLPs via Orthogonality Regularization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph Neural Networks (GNNs) are currently dominating in modeling graph-structure data, while their high reliance on graph structure for inference significantly impedes them from widespread applications. By contrast, graph-Regularized MLPs (GR-MLPs) implicitly inject the graph structure information into model weights, while their performance can hardly match that of GNNs in most tasks. This motivates us to study the causes of the limited performance of GR-MLPs. In this paper, we demonstrate that node embeddings learned from conventional GR-MLPs suffer from dimensional collapse, a phenomenon in which the largest a few eigenvalues dominate the embedding space, and thus the expressive power is constrained. We further propose ORTHO-REG, a novel GR-MLP model, to mitigate the dimensional collapse issue. Through a soft regularization loss on the correlation matrix of node embeddings, ORTHO-REG explicitly encourages orthogonal node representations and thus can naturally avoid dimensionally collapsed representations. Experiments on traditional transductive semi-supervised classification tasks and inductive node classification for cold-start scenarios demonstrate its effectiveness and superiority.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/38007ab5f205a942fc0d5c18c9d9efd418168f1f.zip", "author": "Hengrui Zhang;Shen Wang;Soji Adeshina;Vassilis N. Ioannidis;Jiani Zhang;Xiao Qin;Christos Faloutsos;Da Zheng;George Karypis;Philip S. Yu", "authorids": "~Hengrui_Zhang1;~Shen_Wang2;~Soji_Adeshina1;~Vassilis_N._Ioannidis1;jennyzhang0215@gmail.com;qinxiao.work@gmail.com;~Christos_Faloutsos1;~Da_Zheng1;~George_Karypis1;~Philip_S._Yu1", "gender": "M;M;;;;;M;;M;M", "homepage": "https://hengruizhang98.github.io;;;https://scholar.google.com/citations?hl=en&user=mjmiI4sAAAAJ&view_op=list_works&authuser=1;;;https://www.cs.cmu.edu/~christos/;;;https://cs.uic.edu/profiles/philip-yu/", "dblp": ";;298/4855;;;;f/CFaloutsos;;;y/PhilipSYu", "google_scholar": "iwffiD0AAAAJ;G7twX6YAAAAJ;O2IS5isAAAAJ;;;;nd8lQQIAAAAJ;;ElqwScwAAAAJ;D0lL1r0AAAAJ", "orcid": "0009-0006-1330-0899;;;0000-0002-8367-0733;;;0000-0003-2996-9790;;;0000-0002-3491-5968", "linkedin": ";shen-wang-97309138/;sojiadeshina/;;;;christos-faloutsos-43a7aa2/;;;", "or_profile": "~Hengrui_Zhang1;~Shen_Wang2;~Soji_Adeshina1;~Vassilis_N._Ioannidis1;jennyzhang0215@gmail.com;qinxiao.work@gmail.com;~Christos_Faloutsos1;~Da_Zheng1;~George_Karypis1;~Philip_S._Yu1", "aff": "University of Illinois, Chicago;Amazon;Amazon;Amazon Web Services;;;Carnegie Mellon University;;University of Minnesota, Minneapolis;University of Illinois Chicago", "aff_domain": "uic.edu;amazon.com;amazon.com;amazon.com;;;cmu.edu;;umn.edu;uic.edu", "position": "PhD student;Researcher;Researcher;Applied Scientist II;;;Full Professor;;Full Professor;Full Professor", "bibtex": "@misc{\nzhang2023orthoreg,\ntitle={OrthoReg: Improving Graph-regularized {MLP}s via Orthogonality Regularization},\nauthor={Hengrui Zhang and Shen Wang and Soji Adeshina and Vassilis N. Ioannidis and Jiani Zhang and Xiao Qin and Christos Faloutsos and Da Zheng and George Karypis and Philip S. Yu},\nyear={2023},\nurl={https://openreview.net/forum?id=5s2v_0F7MG}\n}", "github": "", "project": "", "reviewers": "GzRn;JMnM;2EfD;Mv49", "site": "https://openreview.net/forum?id=5s2v_0F7MG", "pdf_size": 865243, "recommendation": "5;6;6;6", "confidence": "4;3;4;3", "correctness": "3;3;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "58;81;71;38", "wc_strength_and_weaknesses": "209;158;309;372", "wc_clarity_quality_novelty_and_reproducibility": "46;292;5;6", "wc_summary_review": "42;117;40;37", "wc_review": "355;648;425;453", "wc_reply_reviewers": "0;69;0;0", "wc_reply_authors": "636;1242;1023;911", "reply_reviewers": "0;2;0;0", "reply_authors": "1;4;2;2", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 62.0, 16.077935190813527 ], "wc_strength_and_weaknesses_avg": [ 262.0, 83.56733811723333 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 87.25, 119.3636774735095 ], "wc_summary_review_avg": [ 59.0, 33.53356527421443 ], "wc_review_avg": [ 470.25, 108.6539805989638 ], "wc_reply_reviewers_avg": [ 17.25, 29.877876430563134 ], "wc_reply_authors_avg": [ 953.0, 218.33117047274766 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6774275014736808807&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1;1;2;3;0", "aff_unique_norm": "University of Illinois at Chicago;Amazon;Carnegie Mellon University;University of Minnesota", "aff_unique_dep": ";Amazon.com, Inc.;;", "aff_unique_url": "https://www.uic.edu;https://www.amazon.com;https://www.cmu.edu;https://www.minnesota.edu", "aff_unique_abbr": "UIC;Amazon;CMU;UMN", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Chicago;;Minneapolis", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "5s6NuOP9cW", "title": "Merging Models Pre-Trained on Different Features with Consensus Graph", "track": "main", "status": "Reject", "tldr": "Combining Pre-Trained Models with Different Feature Sets via Learning Consensus Graph", "abstract": "Learning global models effectively on private and decentralized datasets has become an increasingly important challenge of machine learning when applied in practice. Federated Learning (FL) has recently emerged as a solution paradigm to address this challenge. In particular, the FL clients agree to a common model parameterization in advance, which can then be updated collaboratively via synchronous aggregation of their local model updates. However, such strong requirement of modeling homogeneity and synchronicity across clients makes FL inapplicable to many practical learning scenarios that cannot afford such requirements. For example, in distributed sensing, a network of heterogeneous sensors sample from different data modalities of the same phenomenon. Each sensor thus requires its own specialized model. Local learning therefore needs to happen in isolation but inference still requires merging the local models for better performance. \n\nTo enable this, we investigate a feature fusion approach that extracts local feature representations from local models and incorporates them into a global representation to train a more holistic predictive model. We study two key aspects of this feature incorporation. First, we develop an alignment algorithm that draws accurate correspondence between feature components which are arbitrarily arranged across clients. Next, we propose learning a consensus graph that captures the high-order interactions between these feature components, which reveals how data with heterogeneous features can be stitched together coherently to train a better model. The proposed framework is demonstrated on four real-life data sets including monitoring and predicting power grids and traffic networks.", "keywords": "Graph Neural Network;Probabilistic Methods", "primary_area": "", "supplementary_material": "", "author": "Tengfei Ma;Trong Nghia Hoang;Jie Chen", "authorids": "~Tengfei_Ma1;~Trong_Nghia_Hoang1;~Jie_Chen1", "gender": "M;;M", "homepage": "https://sites.google.com/site/matf0123/;https://jiechenjiechen.github.io;https://htnghia87.github.io/", "dblp": "94/9023-1;92/6289-7;62/540", "google_scholar": "9OvNakkAAAAJ;Z-lkme8AAAAJ;E-kZZeQAAAAJ", "orcid": "0000-0002-1086-529X;;", "linkedin": ";;", "or_profile": "~Tengfei_Ma1;~Jie_Chen1;~Nghia_Hoang2", "aff": "International Business Machines;International Business Machines;Washington State University", "aff_domain": "ibm.com;ibm.com;eecs.wsu.edu", "position": "Researcher;Research Staff Member;Assistant Professor", "bibtex": "@misc{\nma2023merging,\ntitle={Merging Models Pre-Trained on Different Features with Consensus Graph},\nauthor={Tengfei Ma and Trong Nghia Hoang and Jie Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=5s6NuOP9cW}\n}", "github": "", "project": "", "reviewers": "Szi9;iTHt;2ATz;6rxh", "site": "https://openreview.net/forum?id=5s6NuOP9cW", "pdf_size": 580991, "recommendation": "5;5;5;8", "confidence": "3;4;5;3", "correctness": "2;3;2;4", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;2;2;4", "wc_summary_paper": "94;50;42;81", "wc_strength_and_weaknesses": "372;341;314;72", "wc_clarity_quality_novelty_and_reproducibility": "98;41;71;9", "wc_summary_review": "50;54;9;44", "wc_review": "614;486;436;206", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 66.75, 21.44032415799724 ], "wc_strength_and_weaknesses_avg": [ 274.75, 118.84312138277082 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.75, 33.22931687531358 ], "wc_summary_review_avg": [ 39.25, 17.82379028153103 ], "wc_review_avg": [ 435.5, 147.54914435536384 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5222329678670935, "corr_recommendation_correctness": 0.8703882797784892, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Rmp7vkRYRQQJ:scholar.google.com/&scioq=Merging+Models+Pre-Trained+on+Different+Features+with+Consensus+Graph&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "International Business Machines Corporation;Washington State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ibm.com;https://wsu.edu", "aff_unique_abbr": "IBM;WSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "How Sharpness-Aware Minimization Minimizes Sharpness?", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11242", "id": "5spDgWmpY6x", "poster": "", "openreview": "https://openreview.net/forum?id=5spDgWmpY6x", "slides": "https://iclr.cc/virtual/2023/poster/11242", "video": "https://iclr.cc/virtual/2023/poster/11242", "author_site": "Kaiyue Wen, Tengyu Ma, Zhiyuan Li", "tldr": "we prove the implicit bias of Sharpness-Aware Minimization (SAM) is minimizing the top eigenvalue of Hessian in the full-batch setting or minimizing the trace of Hessian when batch size is 1.", "abstract": "Sharpness-Aware Minimization (SAM) is a highly effective regularization technique for improving the generalization of deep neural networks for various settings. However, the underlying working of SAM remains elusive because of various intriguing approximations in the theoretical characterizations. SAM intends to penalize a notion of sharpness of the model but implements a computationally efficient variant; moreover, a third notion of sharpness was used for proving generalization guarantees. The subtle differences in these notions of sharpness can indeed lead to significantly different empirical results. This paper rigorously nails down the exact sharpness notion that SAM regularizes and clarifies the underlying mechanism. We also show that the two steps of approximations in the original motivation of SAM individually lead to inaccurate local conclusions, but their combination accidentally reveals the correct effect, when full-batch gradients are applied. Furthermore, we also prove that the stochastic version of SAM in fact regularizes the third notion of sharpness mentioned above, which is most likely to be the preferred notion for practical performance. The key mechanism behind this intriguing phenomenon is the alignment between the gradient and the top eigenvector of Hessian when SAM is applied.", "keywords": "implicit bias;implicit regularization;sharpness;sharpness aware minimization", "primary_area": "", "supplementary_material": "", "author": "Kaiyue Wen;Tengyu Ma;Zhiyuan Li", "authorids": "~Kaiyue_Wen1;~Tengyu_Ma1;~Zhiyuan_Li2", "gender": "M;M;M", "homepage": "https://whenwen.github.io/;http://ai.stanford.edu/~tengyuma/;https://zhiyuanli.ttic.edu", "dblp": "322/0395;54/9061;l/ZhiyuanLi", "google_scholar": ";i38QlUwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-3128-868X;;", "linkedin": "kaiyue-wen-a3a336192/;;", "or_profile": "~Kaiyue_Wen1;~Tengyu_Ma1;~Zhiyuan_Li2", "aff": "Stanford University;Facebook AI Research;Computer Science Department, Stanford University", "aff_domain": "stanford.edu;fb.com;cs.stanford.edu", "position": "Intern;Visiting Scientist;Postdoc", "bibtex": "@inproceedings{\nwen2023how,\ntitle={How Sharpness-Aware Minimization Minimizes Sharpness?},\nauthor={Kaiyue Wen and Tengyu Ma and Zhiyuan Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5spDgWmpY6x}\n}", "github": "", "project": "", "reviewers": "G4Gz;pjDp;ReRk", "pdf_size": 812525, "recommendation": "6;6;8", "confidence": "3;3;3", "correctness": "3;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "0;1;3", "wc_summary_paper": "242;48;133", "wc_strength_and_weaknesses": "1059;140;251", "wc_clarity_quality_novelty_and_reproducibility": "321;6;7", "wc_summary_review": "58;12;25", "wc_review": "1680;206;416", "wc_reply_reviewers": "242;0;0", "wc_reply_authors": "1641;188;447", "reply_reviewers": "1;0;0", "reply_authors": "4;1;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.3333333333333333, 1.247219128924647 ], "wc_summary_paper_avg": [ 141.0, 79.40193112680993 ], "wc_strength_and_weaknesses_avg": [ 483.3333333333333, 409.57240575453267 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 111.33333333333333, 148.2572838757753 ], "wc_summary_review_avg": [ 31.666666666666668, 19.362047641943473 ], "wc_review_avg": [ 767.3333333333334, 651.0224437162072 ], "wc_reply_reviewers_avg": [ 80.66666666666667, 114.07989403142966 ], "wc_reply_authors_avg": [ 758.6666666666666, 632.800302008637 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 64, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8766296623116329715&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=5spDgWmpY6x", "email": "stanford.edu;fb.com;cs.stanford.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Stanford University;Meta", "aff_unique_dep": ";Facebook AI Research", "aff_unique_url": "https://www.stanford.edu;https://research.facebook.com", "aff_unique_abbr": "Stanford;FAIR", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "TextGrad: Advancing Robustness Evaluation in NLP by Gradient-Driven Optimization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10897", "id": "5tKXUZil3X", "poster": "", "openreview": "https://openreview.net/forum?id=5tKXUZil3X", "slides": "https://iclr.cc/virtual/2023/poster/10897", "video": "https://iclr.cc/virtual/2023/poster/10897", "author_site": "Bairu Hou, Jinghan Jia, Yihua Zhang, Guanhua Zhang, Yang Zhang, Sijia Liu, Shiyu Chang", "tldr": "", "abstract": "Robustness evaluation against adversarial examples has become increasingly important to unveil the trustworthiness of the prevailing deep models in natural language processing (NLP). However, in contrast to the computer vision domain where the first-order projected gradient descent (PGD) is used as the benchmark approach to generate adversarial examples for robustness evaluation, there lacks a principled first-order gradient-based robustness evaluation framework in NLP. The emerging optimization challenges lie in 1) the discrete nature of textual inputs together with the strong coupling between the perturbation location and the actual content, and 2) the additional constraint that the perturbed text should be fluent and achieve a low perplexity under a language model. These challenges make the development of PGD-like NLP attacks difficult. To bridge the gap, we propose TextGrad, a new attack generator using gradient-driven optimization, supporting high-accuracy and high-quality assessment of adversarial robustness in NLP. Specifically, we address the aforementioned challenges in a unified optimization framework. And we develop an effective convex relaxation method to co-optimize the continuously-relaxed site selection and perturbation variables and leverage an effective sampling method to establish an accurate mapping from the continuous optimization variables to the discrete textual perturbations. Moreover, as a first-order attack generation method, TextGrad can be baked into adversarial training to further improve the robustness of NLP models. Extensive experiments are provided to demonstrate the effectiveness of TextGrad not only in attack generation for robustness evaluation but also in adversarial defense. From the attack perspective, we show that TextGrad achieves remarkable improvements in both the attack success rate and the perplexity score over five state-of-the-art baselines. From the defense perspective, TextGrad-enabled adversarial training yields the most robust NLP model against a wide spectrum of NLP attacks. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Bairu Hou;Jinghan Jia;Yihua Zhang;Guanhua Zhang;Yang Zhang;Sijia Liu;Shiyu Chang", "authorids": "~Bairu_Hou2;~Jinghan_Jia1;~Yihua_Zhang1;~Guanhua_Zhang1;~Yang_Zhang3;~Sijia_Liu1;~Shiyu_Chang2", "gender": ";M;M;;M;M;Unspecified", "homepage": "https://hbr690188270.github.io/;https://jinghanjia.netlify.app/;https://yihua-zhang.com;;;https://lsjxjtu.github.io/;http://people.csail.mit.edu/chang87/", "dblp": "274/7151;286/5392;;171/0962.html;06/6785-1;128/6972-1;28/9988", "google_scholar": "FO7taJgAAAAJ;bqP_zxYAAAAJ;https://scholar.google.com/citations?hl=zh-CN;_hrEN-sAAAAJ;_-5PSgQAAAAJ;C7dO_UgAAAAJ;r21asW4AAAAJ", "orcid": ";;;;;;", "linkedin": ";jinghan-jia-5194451ba/;zhangyihua/;;;;", "or_profile": "~Bairu_Hou2;~Jinghan_Jia1;~Yihua_Zhang1;~Guanhua_Zhang1;~Yang_Zhang3;~Sijia_Liu1;~Shiyu_Chang2", "aff": "University of California, Santa Barbara;Michigan State University;Michigan State University;Max Planck Institute for Intelligent Systems, Max-Planck Institute;International Business Machines;Michigan State University;University of California, Santa Barbara", "aff_domain": "ucsb.edu;msu.edu;msu.edu;tuebingen.mpg.de;ibm.com;msu.edu;ucsb.edu", "position": "PhD student;PhD student;PhD student;PhD student;Research Staff Employee;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nhou2023textgrad,\ntitle={TextGrad: Advancing Robustness Evaluation in {NLP} by Gradient-Driven Optimization},\nauthor={Bairu Hou and Jinghan Jia and Yihua Zhang and Guanhua Zhang and Yang Zhang and Sijia Liu and Shiyu Chang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5tKXUZil3X}\n}", "github": "", "project": "", "reviewers": "7WNo;iGRm;eerx;J6Pr", "pdf_size": 759459, "recommendation": "5;6;6;8", "confidence": "3;3;4;3", "correctness": "3;3;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "158;34;111;64", "wc_strength_and_weaknesses": "246;180;279;156", "wc_clarity_quality_novelty_and_reproducibility": "30;44;92;29", "wc_summary_review": "75;39;110;24", "wc_review": "509;297;592;273", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 91.75, 47.07640066954992 ], "wc_strength_and_weaknesses_avg": [ 215.25, 49.403314666123364 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.75, 25.66490794840301 ], "wc_summary_review_avg": [ 62.0, 33.3391661563393 ], "wc_review_avg": [ 417.75, 136.21926258793212 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.13245323570650439, "corr_recommendation_correctness": 0.0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4250032082974903823&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=5tKXUZil3X", "email": "ucsb.edu;msu.edu;msu.edu;tuebingen.mpg.de;ibm.com;msu.edu;ucsb.edu", "author_num": 7, "aff_unique_index": "0;1;1;2;3;1;0", "aff_unique_norm": "University of California, Santa Barbara;Michigan State University;Max Planck Institute for Intelligent Systems;International Business Machines Corporation", "aff_unique_dep": ";;Intelligent Systems;", "aff_unique_url": "https://www.ucsb.edu;https://www.msu.edu;https://www.mpi-is.mpg.de;https://www.ibm.com", "aff_unique_abbr": "UCSB;MSU;MPI-IS;IBM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Santa Barbara;", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "United States;Germany" }, { "id": "5tKhUU5WBi8", "title": "Out-of-distribution Detection with Diffusion-based Neighborhood", "track": "main", "status": "Reject", "tldr": "We design a general strategy to combine a diffusion model and a Resnet to do OOD detection.", "abstract": "Out-of-distribution (OOD) detection is an important task to ensure the reliability and safety of deep learning and the discriminator models outperform others for now. However, the feature extraction of such models must compress the data and lose certain information, leaving room for bad cases and malicious attacks. However, despite effectively fitting the data distribution and producing high-quality samples, generative models lack suitable indicator scores to match with discriminator models in the OOD detection tasks. In this paper, we find that these two kinds of models can be combined to solve each other's problems. We introduce diffusion models (DMs), a kind of powerful generative model, into OOD detection and find that the denoising process of DMs also functions as a novel form of asymmetric interpolation. This property establishes a diffusion-based neighborhood for each input data. Then, we perform discriminator-based OOD detection based on the diffusion-based neighborhood instead of isolated data. In this combination, the discriminator models provide detection metrics for generation models and the diffusion-based neighborhood reduces the information loss of feature extraction. According to our experiments on CIFAR10 and CIFAR100, our new methods successfully outperform state-of-the-art methods. Our implementation is put in the supplementary materials.", "keywords": "OOD detection;diffusion model", "primary_area": "", "supplementary_material": "/attachment/0e4a29f171afedf4ad2619e4cbc4524887f99bb0.zip", "author": "Luping Liu;Yi Ren;Xize Cheng;Zhou Zhao", "authorids": "~Luping_Liu2;~Yi_Ren2;~Xize_Cheng1;~Zhou_Zhao2", "gender": ";M;M;M", "homepage": ";https://rayeren.github.io/;https://exgc.github.io/;https://dblp.uni-trier.de/pid/75/7785.html?", "dblp": ";75/6568-6;334/2167;75/7785", "google_scholar": ";4FA6C0AAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=IIoFY90AAAAJ", "orcid": ";;0000-0001-9708-3225;0000-0001-6121-0384", "linkedin": ";;;", "or_profile": "~Luping_Liu2;~Yi_Ren2;~Xize_Cheng1;~Zhou_Zhao2", "aff": ";ByteDance;Zhejiang University;Zhejiang University", "aff_domain": ";bytedance.com;zju.edu.cn;zju.edu.cn", "position": ";Researcher;PhD student;Associate Professor", "bibtex": "@misc{\nliu2023outofdistribution,\ntitle={Out-of-distribution Detection with Diffusion-based Neighborhood},\nauthor={Luping Liu and Yi Ren and Xize Cheng and Zhou Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=5tKhUU5WBi8}\n}", "github": "", "project": "", "reviewers": "sRvP;ZBTr;ziL9;H7mg", "site": "https://openreview.net/forum?id=5tKhUU5WBi8", "pdf_size": 739555, "recommendation": "3;3;3;5", "confidence": "4;3;3;4", "correctness": "2;3;2;4", "technical_novelty": "3;3;2;1", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "58;87;39;63", "wc_strength_and_weaknesses": "334;272;161;202", "wc_clarity_quality_novelty_and_reproducibility": "35;46;203;59", "wc_summary_review": "48;45;157;46", "wc_review": "475;450;560;370", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "629;657;1066;423", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 61.75, 17.108112110925624 ], "wc_strength_and_weaknesses_avg": [ 242.25, 66.19053935420077 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 85.75, 68.22527024497595 ], "wc_summary_review_avg": [ 74.0, 47.93224384482746 ], "wc_review_avg": [ 463.75, 67.765680842149 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 693.75, 233.14091768713615 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ZkOPdR88L7QJ:scholar.google.com/&scioq=Out-of-distribution+Detection+with+Diffusion-based+Neighborhood&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "ByteDance;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.bytedance.com;https://www.zju.edu.cn", "aff_unique_abbr": "ByteDance;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "5uH745DalVx", "title": "CooPredict : Cooperative Differential Games For Time Series Prediction", "track": "main", "status": "Withdraw", "tldr": "We proposed a novel framework on time series prediction as an application of cooperative differential games. ", "abstract": "Modeling time series dynamics with neural differential equations has become a major line of research that opened new ways to handle various real-world scenarios (e.g., missing observations, irregular times). Despite the progress, most existing methods still face challenges in providing an explainable rationale on temporal association, which tells how past observations affect future states. To tackle this challenge, we introduce novel multi-agent based neural stochastic differential equations and analyze the time series prediction through the lens of cooperative differential game. Our framework provides an explainable method that can reveal the underlying temporal relevance of the data and fully utilizes this information to systemically solve the prediction problem. We develop the gradient descent based deep neural fictitious play to approximate the Nash equilibrium and theoretical results assure the convergence. Throughout the experiments on various datasets, we demonstrate the superiority of our framework over all the benchmarks in modeling time series prediction by capitalizing on the underlying temporal dynamics without any inductive bias. An ablation study shows that neural agents of the proposed framework learn intrinsic temporal relevance to predict accurate time series.", "keywords": "time series forecasting;time series prediction;neural stochastic differential equations;cooperative differential game", "primary_area": "", "supplementary_material": "", "author": "Sung Woo Park;Byoungwoo Park;Changhee Lee;Junseok Kwon", "authorids": "~Sung_Woo_Park2;~Byoungwoo_Park1;~Changhee_Lee1;~Junseok_Kwon5", "gender": "M;M;;M", "homepage": ";https://bw-park.github.io/;;https://sites.google.com/view/cau-cvml/", "dblp": "92/6585;354/2891;;04/425", "google_scholar": "B1xpjO8AAAAJ;https://scholar.google.com/citations?hl=ko;https://scholar.google.com/citations?hl=en;lwsaTnEAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Sung_Woo_Park2;~Byoungwoo_Park1;~Changhee_Lee1;~Junseok_Kwon5", "aff": "ChungAng University;;ChungAng University;Chung-Ang University", "aff_domain": "cau.ac.kr;;cau.ac.kr;cau.ac.kr", "position": "PhD student;;Assistant Professor;Full Professor", "bibtex": "@misc{\npark2023coopredict,\ntitle={CooPredict : Cooperative Differential Games For Time Series Prediction},\nauthor={Sung Woo Park and Byoungwoo Park and Changhee Lee and Junseok Kwon},\nyear={2023},\nurl={https://openreview.net/forum?id=5uH745DalVx}\n}", "github": "", "project": "", "reviewers": "jXp9;dLYs;cyPc", "site": "https://openreview.net/forum?id=5uH745DalVx", "pdf_size": 4410210, "recommendation": "5;5;8", "confidence": "4;3;3", "correctness": "3;4;3", "technical_novelty": "2;2;4", "empirical_novelty": "3;2;2", "wc_summary_paper": "60;147;68", "wc_strength_and_weaknesses": "105;278;66", "wc_clarity_quality_novelty_and_reproducibility": "82;55;40", "wc_summary_review": "131;38;291", "wc_review": "378;518;465", "wc_reply_reviewers": "38;0;0", "wc_reply_authors": "1136;834;907", "reply_reviewers": "1;0;0", "reply_authors": "4;4;4", "recommendation_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 91.66666666666667, 39.262648351270904 ], "wc_strength_and_weaknesses_avg": [ 149.66666666666666, 92.13154846317423 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.0, 17.378147196982766 ], "wc_summary_review_avg": [ 153.33333333333334, 104.48710712597777 ], "wc_review_avg": [ 453.6666666666667, 57.71385352659176 ], "wc_reply_reviewers_avg": [ 12.666666666666666, 17.913371790059205 ], "wc_reply_authors_avg": [ 959.0, 128.65716717954996 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 4.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6NWyvqO4jkwJ:scholar.google.com/&scioq=CooPredict+:+Cooperative+Differential+Games+For+Time+Series+Prediction&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "Chungang University;Chung-Ang University", "aff_unique_dep": ";", "aff_unique_url": "http://www.cau.ac.kr;http://www.cau.ac.kr", "aff_unique_abbr": "CAU;CAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "5udLUhg1E5", "title": "Only For You: Deep Neural Anti-Forwarding Watermark Preserves Image Privacy", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In recent decades, messaging apps (e.g., Facebook Messager, Whatsapp, Wechat, Snapchat) have expanded exponentially, where a huge amount of private image sharing takes place daily. However, within these apps, the possible unauthorised or malicious image forwarding among users poses significant threats to personal image privacy. In specific situations, we hope to send private and confidential images (e.g., personal selfies) in an `only for you' manner. Given limited existing studies on this topic, for the first time, we propose the Deep Neural Anti-Forwarding Watermark (DeepRAFT) that enables media platforms to check and block any unauthorised forwarding of protected images through injecting non-fragile and invisible watermarks. To this end, we jointly train a DeepRAFT encoder and scanner, where the encoder embeds a confidentiality stamp into images as watermarks, and the scanner learns to detect them.\nTo ensure that the technique is robust and resistant to tampering, we involve a series of data augmentations (mounted on a stochastic concatenation process) and adversarial defenses (i.e., adversarial training and randomized smoothing) towards both common image corruptions (e.g., rotation, cropping, color jitters, defocus blur, perspective warping, pixel noise, JPEG compression) and adversarial attacks (i.e., under both black and white box settings). Experiments on Mirflickr and MetFaces datasets demonstrate that DeepRAFT can efficiently and robustly imbue and detect the anti-forwarding watermark in images. Moreover, the trained DeepRAFT encoder and scanner can be easily transferred in a zero-shot manner even with a significant domain shift. We release our code and models to inspire studies in this anti-forwarding area at \\url{link.available.upon.acceptance.}", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Xinghua Qu;Alvin Chan;Yew-Soon Ong;Pengfei Wei;Xiang Yin;Caishun Chen;Zhu Sun;Zejun MA", "authorids": "~Xinghua_Qu1;~Alvin_Chan1;~Yew-Soon_Ong1;~Pengfei_Wei3;~Xiang_Yin2;~Caishun_Chen1;~Zhu_Sun1;~Zejun_MA1", "gender": "M;M;M;M;F;M;M;M", "homepage": "https://xinghua-qu.github.io/;https://www.alvinchan.io/;;;https://sites.google.com/view/zhusun/home;;https://pengfei-wei.com/;http://www.ntu.edu.sg/home/asysong/", "dblp": "18/1099;163/6518.html;18/1022-6.html;184/2141;163/5129-1.html;;29/11273-1;64/4136", "google_scholar": "https://scholar.google.com.sg/citations?user=2PxlmU0AAAAJ;SP4eIUYAAAAJ;e6_J-lEAAAAJ;0vUnBs8AAAAJ;https://scholar.google.com.sg/citations?user=kJy0fd8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.sg/citations?user=a94WthkAAAAJ;https://scholar.google.com.tw/citations?user=h9oWOsEAAAAJ", "orcid": "0000-0001-8072-2019;;;0000-0003-1143-3138;0000-0002-3350-7022;;;0000-0002-4480-169X", "linkedin": "xinghua-qu/;;;cschen1205/;;zejun-ma-58614365/;;", "or_profile": "~Xinghua_Qu1;~Alvin_Chan1;~Xiang_Yin2;~Caishun_Chen1;~Zhu_Sun1;~Zejun_MA1;~pengfei_wei2;~Yew_Soon_Ong1", "aff": "Bytedance Seed;Massachusetts Institute of Technology;ByteDance Inc.;CFAR, IHPC, A*STAR;Institute of High Performance Computing, Singapore, A*STAR;ByteDance Inc.;AI LAB Bytedance;Nanyang Technological University", "aff_domain": "bytedance.com;mit.edu;bytedance.com;astar.edu.sg;ihpc.a-star.edu.sg;bytedance.com;bytedance.com;ntu.edu.sg", "position": "Research Scientist;Postdoc;Researcher;Researcher;Researcher;Principal Researcher;Researcher;Full Professor", "bibtex": "@misc{\nqu2023only,\ntitle={Only For You: Deep Neural Anti-Forwarding Watermark Preserves Image Privacy},\nauthor={Xinghua Qu and Alvin Chan and Yew-Soon Ong and Pengfei Wei and Xiang Yin and Caishun Chen and Zhu Sun and Zejun MA},\nyear={2023},\nurl={https://openreview.net/forum?id=5udLUhg1E5}\n}", "github": "", "project": "", "reviewers": "W2HW;pWnS;Fnkz;8x4V", "site": "https://openreview.net/forum?id=5udLUhg1E5", "pdf_size": 37561277, "recommendation": "3;5;5;6", "confidence": "4;4;4;4", "correctness": "2;3;3;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "108;62;53;80", "wc_strength_and_weaknesses": "39;505;510;277", "wc_clarity_quality_novelty_and_reproducibility": "22;31;39;190", "wc_summary_review": "179;25;33;63", "wc_review": "348;623;635;610", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 75.75, 21.00446381129497 ], "wc_strength_and_weaknesses_avg": [ 332.75, 193.9618196965578 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.5, 69.25496372102147 ], "wc_summary_review_avg": [ 75.0, 61.69278726074872 ], "wc_review_avg": [ 554.0, 119.26231592586151 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9733285267845754, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6H--lz8eRx0J:scholar.google.com/&scioq=Only+For+You:+Deep+Neural+Anti-Forwarding+Watermark+Preserves+Image+Privacy&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;3;0;0;4", "aff_unique_norm": "ByteDance;Massachusetts Institute of Technology;A*STAR;Institute of High Performance Computing;Nanyang Technological University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.bytedance.com;https://web.mit.edu;https://www.a-star.edu.sg;https://www.ihpc.a-star.edu.sg;https://www.ntu.edu.sg", "aff_unique_abbr": "Bytedance;MIT;A*STAR;IHPC;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;2;0;0;2", "aff_country_unique": "China;United States;Singapore" }, { "title": "Augmentation Component Analysis: Modeling Similarity via the Augmentation Overlaps", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10818", "id": "5vM51iamNeL", "poster": "/media/PosterPDFs/ICLR%202023/10818.png?t=1680782869.7214541", "openreview": "https://openreview.net/forum?id=5vM51iamNeL", "slides": "https://iclr.cc/virtual/2023/poster/10818", "video": "https://iclr.cc/virtual/2023/poster/10818", "author_site": "Lu Han, Han-Jia Ye, De-Chuan Zhan", "tldr": "", "abstract": "Self-supervised learning aims to learn a embedding space where semantically similar samples are close. Contrastive learning methods pull views of samples together and push different samples away, which utilizes semantic invariance of augmentation but ignores the relationship between samples. To better exploit the power of augmentation, we observe that semantically similar samples are more likely to have similar augmented views. Therefore, we can take the augmented views as a special description of a sample. In this paper, we model such a description as the augmentation distribution, and we call it augmentation feature. The similarity in augmentation feature reflects how much the views of two samples overlap and is related to their semantical similarity. Without computational burdens to explicitly estimate values of the augmentation feature, we propose Augmentation Component Analysis (ACA) with a contrastive-like loss to learn principal components and an on-the-fly projection loss to embed data. ACA equals an efficient dimension reduction by PCA and extracts low-dimensional embeddings, theoretically preserving the similarity of augmentation distribution between samples. Empirical results show that our method can achieve competitive results against various traditional contrastive learning methods on different benchmarks.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/e31dbf6f492f677ac8e3689a4f8ecb0aeb5f3d0c.zip", "author": "Lu Han;Han-Jia Ye;De-Chuan Zhan", "authorids": "~Lu_Han2;~Han-Jia_Ye1;~De-Chuan_Zhan1", "gender": "M;M;M", "homepage": "http://www.lamda.nju.edu.cn/hanlu/;http://www.lamda.nju.edu.cn/yehj;http://www.lamda.nju.edu.cn/zhandc/", "dblp": ";165/3014;74/498", "google_scholar": "https://scholar.google.com.hk/citations?user=m-WYn7gAAAAJ;mgOYhtoAAAAJ;mYJf4TcAAAAJ", "orcid": ";;0000-0002-3533-2078", "linkedin": ";;", "or_profile": "~Lu_Han2;~Han-Jia_Ye1;~De-Chuan_Zhan1", "aff": "Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nhan2023augmentation,\ntitle={Augmentation Component Analysis: Modeling Similarity via the Augmentation Overlaps},\nauthor={Lu Han and Han-Jia Ye and De-Chuan Zhan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=5vM51iamNeL}\n}", "github": "", "project": "", "reviewers": "1C2x;fbkG;LVcG;gr8r", "pdf_size": 1592319, "recommendation": "6;6;6;8", "confidence": "3;4;3;4", "correctness": "4;4;3;4", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;2;3;2", "wc_summary_paper": "177;58;102;84", "wc_strength_and_weaknesses": "243;242;325;471", "wc_clarity_quality_novelty_and_reproducibility": "60;38;18;36", "wc_summary_review": "71;5;14;101", "wc_review": "551;343;459;692", "wc_reply_reviewers": "114;0;5;228", "wc_reply_authors": "514;804;673;767", "reply_reviewers": "1;0;1;2", "reply_authors": "2;2;2;3", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 105.25, 44.27965108263614 ], "wc_strength_and_weaknesses_avg": [ 320.25, 93.32570653362342 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.0, 14.89966442575134 ], "wc_summary_review_avg": [ 47.75, 39.820691857374854 ], "wc_review_avg": [ 511.25, 127.75831675472247 ], "wc_reply_reviewers_avg": [ 86.75, 93.41138849198207 ], "wc_reply_authors_avg": [ 689.5, 112.0145079889208 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15779702735781969863&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=5vM51iamNeL", "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "5wa-ueGGI33", "title": "Shortcut Learning Through the Lens of Early Training Dynamics", "track": "main", "status": "Reject", "tldr": "Potential shortcuts can be found by monitoring the easy features learned by the initial layers of a DNN early during the training using suitable instance difficulty metrics.", "abstract": "Deep Neural Networks (DNNs) are prone to learn shortcut patterns that damage the generalization of the DNN during deployment. Shortcut learning is concerning, particularly when the DNNs are applied to safety-critical domains. This paper aims to better understand shortcut learning through the lens of the learning dynamics of the internal neurons during the training process. More specifically, we make the following observations: (1) While previous works treat shortcuts as synonymous with spurious correlations, we emphasize that not all spurious correlations are shortcuts. We show that shortcuts are only those spurious features that are ``easier'' than the core features. (2) We build upon this premise and use instance difficulty methods (like Prediction Depth) to quantify ``easy'' and to identify this behavior during the training phase. (3) We empirically show that shortcut learning can be detected by observing the learning dynamics of the DNN's early layers, irrespective of the network architecture used. In other words, easy features learned by the initial layers of a DNN early during the training are potential shortcuts. We verify our claims on simulated and real medical imaging data and justify the empirical success of our hypothesis by showing the theoretical connections between Prediction Depth and information-theoretic concepts like $\\mathcal{V}$-usable information. Lastly, our experiments show the insufficiency of monitoring only accuracy plots during training (as is common in machine learning pipelines), and we highlight the need for monitoring early training dynamics using example difficulty metrics.", "keywords": "shortcut learning;spurious correlations;convolutional neural networks;deep learning;machine learning;computer vision;training dynamics", "primary_area": "", "supplementary_material": "", "author": "Nihal Murali;Aahlad Manas Puli;Ke Yu;Rajesh Ranganath;kayhan Batmanghelich", "authorids": "~Nihal_Murali1;~Aahlad_Manas_Puli1;~Ke_Yu4;~Rajesh_Ranganath2;~kayhan_Batmanghelich1", "gender": "M;M;M;;M", "homepage": "https://nihal-m.github.io/;http://aahladmanas.github.io;;;http://batman-lab.com", "dblp": "210/5777.html;228/9272;;97/7057;38/193", "google_scholar": "https://scholar.google.com/citations?hl=en;xWmCmBQAAAAJ;BwaMMgoAAAAJ;;PvHFAfIAAAAJ", "orcid": ";;0000-0001-9882-5729;;0000-0001-9893-9136", "linkedin": ";;yuke82/;;", "or_profile": "~Nihal_Murali1;~Aahlad_Manas_Puli1;~Ke_Yu4;~Rajesh_Ranganath2;~kayhan_Batmanghelich1", "aff": "University of Pittsburgh;New York University;University of Pittsburgh;New York University;University of Pittsburgh", "aff_domain": "pitt.edu;nyu.edu;pitt.edu;nyu.edu;pitt.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nmurali2023shortcut,\ntitle={Shortcut Learning Through the Lens of Early Training Dynamics},\nauthor={Nihal Murali and Aahlad Manas Puli and Ke Yu and Rajesh Ranganath and kayhan Batmanghelich},\nyear={2023},\nurl={https://openreview.net/forum?id=5wa-ueGGI33}\n}", "github": "", "project": "", "reviewers": "5RsT;eVSL;RFyP;3xDy", "site": "https://openreview.net/forum?id=5wa-ueGGI33", "pdf_size": 2118537, "recommendation": "3;6;6;6", "confidence": "4;4;3;2", "correctness": "2;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;3;4", "wc_summary_paper": "165;180;106;49", "wc_strength_and_weaknesses": "93;306;333;126", "wc_clarity_quality_novelty_and_reproducibility": "674;163;190;58", "wc_summary_review": "73;143;39;31", "wc_review": "1005;792;668;264", "wc_reply_reviewers": "848;275;370;0", "wc_reply_authors": "2618;530;2211;618", "reply_reviewers": "3;1;1;0", "reply_authors": "5;1;5;2", "recommendation_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 125.0, 51.870029882389694 ], "wc_strength_and_weaknesses_avg": [ 214.5, 106.07662324942287 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 271.25, 237.69873264281406 ], "wc_summary_review_avg": [ 71.5, 44.189930979805794 ], "wc_review_avg": [ 682.25, 269.883655488805 ], "wc_reply_reviewers_avg": [ 373.25, 305.92758538582297 ], "wc_reply_authors_avg": [ 1494.25, 931.9518160827844 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 3.25, 1.7853571071357126 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5222329678670935, "corr_recommendation_correctness": 1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9197137712601725336&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "University of Pittsburgh;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.pitt.edu;https://www.nyu.edu", "aff_unique_abbr": "Pitt;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "5ycxwq2VFAX", "title": "DECODING LAYER SALIENCY IN TRANSFORMERS", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In this paper, we introduce a strategy for identifying textual saliency in large-scale language models applied to classification tasks. In visual networks where saliency is more well-studied, saliency is naturally localized through the convolutional layers of the network; however, the same is not true in modern transformer-stack networks used to process natural language. We adapt gradient-based saliency methods for these networks, propose a method for evaluating the degree of semantic coherence of each layer, and demonstrate consistent improvement over numerous other methods for textual saliency on multiple benchmark classification datasets. Our approach requires no additional training or access to labelled data, and is comparatively very computationally efficient.", "keywords": "saliency;explainability;transparency;transformers;NLP;feature attribution", "primary_area": "", "supplementary_material": "/attachment/318d3835bb93a6e1917abf2e066f42ea207ec076.zip", "author": "Elizabeth Mary Hou;Gregory Castanon", "authorids": "~Elizabeth_Mary_Hou1;gregory.castanon@str.us", "gender": ";", "homepage": "http://lizardintelligence.net/;", "dblp": "184/9143;", "google_scholar": "pHvwsLwAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Elizabeth_Mary_Hou1;gregory.castanon@str.us", "aff": "STR;", "aff_domain": "str.us;", "position": "Researcher;", "bibtex": "@misc{\nhou2023decoding,\ntitle={{DECODING} {LAYER} {SALIENCY} {IN} {TRANSFORMERS}},\nauthor={Elizabeth Mary Hou and Gregory Castanon},\nyear={2023},\nurl={https://openreview.net/forum?id=5ycxwq2VFAX}\n}", "github": "", "project": "", "reviewers": "UMGB;Aqm6;CACK", "site": "https://openreview.net/forum?id=5ycxwq2VFAX", "pdf_size": 5154510, "recommendation": "3;5;6", "confidence": "4;2;4", "correctness": "3;4;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;0;3", "wc_summary_paper": "73;32;118", "wc_strength_and_weaknesses": "298;171;170", "wc_clarity_quality_novelty_and_reproducibility": "16;20;26", "wc_summary_review": "92;28;52", "wc_review": "479;251;366", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1007;528;545", "reply_reviewers": "0;0;0", "reply_authors": "3;2;2", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 74.33333333333333, 35.122009560324926 ], "wc_strength_and_weaknesses_avg": [ 213.0, 60.10546286874985 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 20.666666666666668, 4.109609335312651 ], "wc_summary_review_avg": [ 57.333333333333336, 26.398653164297773 ], "wc_review_avg": [ 365.3333333333333, 93.08180392655818 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 693.3333333333334, 221.90438381328917 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.1889822365046137, "corr_recommendation_correctness": 0.18898223650461363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:55tXFDVVWrcJ:scholar.google.com/&scioq=DECODING+LAYER+SALIENCY+IN+TRANSFORMERS&hl=en&as_sdt=0,5", "gs_version_total": 0 }, { "id": "5zaWBdMxcF1", "title": "Physically Plausible and Conservative Solutions to Navier-Stokes Equations Using Physics-Informed CNNs", "track": "main", "status": "Withdraw", "tldr": "Solving Navier-Stokes Equations Using PICNN", "abstract": "Physics-informed Neural Network (PINN) is an emerging approach for efficiently solving partial differential equations (PDEs) using neural networks. PICNN, a variant of PINN enhanced by convolutional neural networks (CNNs), has achieved better results on a series of PDEs since the parameter-sharing property of CNNs is effective to learn spatial dependencies. However, applying existing PICNN-based methods to solve Navier-Stokes equations can generate oscillating predictions, which are inconsistent with the laws of physics and the conservation properties. To address this issue, we propose a novel method that combines PICNN with the finite volume method to obtain physically plausible and conservative solutions to Navier-Stokes equations. We derive the second-order upwind difference scheme of Navier-Stokes equations using the finite volume method. Then we use the derived scheme to calculate the partial derivatives and construct the physics-informed loss function. The proposed method is assessed by experiments on steady-state Navier-Stokes equations under different scenarios, including convective heat transfer, lid-driven cavity flow, etc. The experimental results demonstrate that our method can effectively improve the plausibility and the accuracy of the predicted solutions from PICNN.", "keywords": "Finite volume method;Navier-Stokes equation;Partial differential equation;Physics-informed convolutional neural network", "primary_area": "", "supplementary_material": "", "author": "Jianfeng Li;Liangying Zhou;Jingwei Sun;Guangzhong Sun", "authorids": "~Jianfeng_Li5;~Liangying_Zhou1;~Jingwei_Sun3;~Guangzhong_Sun1", "gender": "M;F;;M", "homepage": ";;https://faculty.ustc.edu.cn/sunjingwei;", "dblp": ";;https://dblp.uni-trier.de/pid/66/7761-1;44/1372", "google_scholar": ";;;", "orcid": "0000-0001-7208-1573;0000-0002-7771-5683;;0000-0002-0794-7681", "linkedin": ";;;", "or_profile": "~Jianfeng_Li5;~Liangying_Zhou1;~Jingwei_Sun3;~Guangzhong_Sun1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "MS student;MS student;Researcher;Full Professor", "bibtex": "@misc{\nli2023physically,\ntitle={Physically Plausible and Conservative Solutions to Navier-Stokes Equations Using Physics-Informed {CNN}s},\nauthor={Jianfeng Li and Liangying Zhou and Jingwei Sun and Guangzhong Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=5zaWBdMxcF1}\n}", "github": "", "project": "", "reviewers": "TDY5;RDRN;eH1Y", "site": "https://openreview.net/forum?id=5zaWBdMxcF1", "pdf_size": 879373, "recommendation": "3;3;6", "confidence": "3;2;2", "correctness": "3;2;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;1;3", "wc_summary_paper": "200;51;59", "wc_strength_and_weaknesses": "136;65;110", "wc_clarity_quality_novelty_and_reproducibility": "54;35;60", "wc_summary_review": "65;58;26", "wc_review": "455;209;255", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 103.33333333333333, 68.43163660828878 ], "wc_strength_and_weaknesses_avg": [ 103.66666666666667, 29.32954520994525 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.666666666666664, 10.656244908763853 ], "wc_summary_review_avg": [ 49.666666666666664, 16.97710877099579 ], "wc_review_avg": [ 306.3333333333333, 106.78743163666572 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.5, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16373773451393670741&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "653nhbKy6yE", "title": "Graph in Graph Neural Network", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Most existing Graph Neural Networks (GNNs) frequently suffer from two limitations: (i) they can only process graphs whose vertices are represented by vectors or single values; and (ii) they assume each input graph is independent from others during the propagation. In this paper, we propose \\textbf{the first GNN model (called Graph in Graph Neural Network (GIG)) that can process graphs whose vertices are also represented by graphs}. Considering that the relationship between different graphs may contain crucial task-related cues, we further propose a GIG graph relationship modelling (GRM) strategy that integrates multiple target graph samples as a global graph, each of whose vertex describes a target graph sample. We then applies the GIG model to jointly process the combined graph samples (i.e., the global graph), where additional task-specific relationship cues among graph samples can be extracted in an end-to-end manner. The experimental results show that the proposed GIG model and the GRM strategy generalize well on various graph analysis tasks, providing new state-of-the-art results on five out of seven benchmark graph datasets. Importantly, not only its vertex/edge updating functions are flexible to be customized from different existing GNNs but also it is robust to different settings. Our code is provided in the supplementary material for reproducibly purpose.", "keywords": "Graph Neural Network;Deep Learning;Sub-graph", "primary_area": "", "supplementary_material": "/attachment/761833e0506ad0c47e5c12e1bcec1990a53e6617.zip", "author": "Jiongshu Wang;Siyang Song;Liru Chen;Jing Yang;Jiankang Deng;Hatice Gunes", "authorids": "~Jiongshu_Wang2;~Siyang_Song1;abigail.lrchen@gmail.com;~Jing_Yang7;~Jiankang_Deng1;~Hatice_Gunes4", "gender": "M;M;;F;M;F", "homepage": ";https://www.cst.cam.ac.uk/people/ss2796;;https://jingyang2017.github.io/;https://jiankangdeng.github.io/;https://www.cl.cam.ac.uk/~hg410/", "dblp": ";220/3096.html;;62/5839-38.html;156/7808;38/1743.html", "google_scholar": ";ZKSL1IcAAAAJ;;https://scholar.google.co.uk/citations?user=a0HJYXcAAAAJ;Z_UoQFsAAAAJ;2WkE1wYAAAAJ", "orcid": "0000-0002-8168-7750;0000-0003-2339-5685;;0000-0002-8794-4842;0000-0002-3709-6216;0000-0003-2407-3012", "linkedin": ";siyang-song-7a814412b/;;;jiankang-deng-b45b21b4/?originalSubdomain=uk;hatice-gunes-05ab628/", "or_profile": "~Jiongshu_Wang2;~Siyang_Song1;abigail.lrchen@gmail.com;~Jing_Yang7;~Jiankang_Deng1;~Hatice_Gunes4", "aff": ";University of Cambridge;;University of Cambridge;;University of Cambridge", "aff_domain": ";cam.ac.uk;;cam.ac.uk;;cam.ac.uk", "position": ";Postdoc;;Postdoc;;Full Professor", "bibtex": "@misc{\nwang2023graph,\ntitle={Graph in Graph Neural Network},\nauthor={Jiongshu Wang and Siyang Song and Liru Chen and Jing Yang and Jiankang Deng and Hatice Gunes},\nyear={2023},\nurl={https://openreview.net/forum?id=653nhbKy6yE}\n}", "github": "", "project": "", "reviewers": "xY2R;YnR1;QhTm", "site": "https://openreview.net/forum?id=653nhbKy6yE", "pdf_size": 1293135, "recommendation": "3;5;5", "confidence": "4;4;4", "correctness": "2;3;4", "technical_novelty": "2;4;4", "empirical_novelty": "1;3;0", "wc_summary_paper": "173;70;92", "wc_strength_and_weaknesses": "58;285;304", "wc_clarity_quality_novelty_and_reproducibility": "48;5;121", "wc_summary_review": "23;11;167", "wc_review": "302;371;684", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 3.3333333333333335, 0.9428090415820634 ], "empirical_novelty_avg": [ 1.3333333333333333, 1.247219128924647 ], "wc_summary_paper_avg": [ 111.66666666666667, 44.28945196720722 ], "wc_strength_and_weaknesses_avg": [ 215.66666666666666, 111.7566801383951 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 58.0, 47.88179890800539 ], "wc_summary_review_avg": [ 67.0, 70.8801805866774 ], "wc_review_avg": [ 452.3333333333333, 166.21739446346228 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8660254037844385, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "65P6pfeT3eg", "title": "Protecting DNN from Evasion Attacks using Ensemble of High Focal Diversity", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Edge AI continues to attract emerging applications that deploy well-trained DNN models on heterogeneous edge clients for real-time object detection. Recent studies have shown that evasion attacks on DNN object detection models at the test time are on the rise. Such evasion attacks generate deceptive queries using maliciously manipulated or out-of-distribution data, aiming to mislead high-quality object detectors during edge inference. This paper introduces ODEN, a novel approach to object detection ensemble, which combines a detection inconsistency solver with focal diversity-optimized ensemble pruning to defend against evasion attacks. The focal diversity ranking techniques enable ODEN to compose an ensemble from a pool of base object detectors with high failure independence, which strengthens the generalization performance of the ODEN ensemble in the presence of irregular query data and evasion attacks. The ODEN inconsistency solver can detect and resolve three types of inconsistency by combining detection results from multiple DNN object detectors: the inconsistency of the object existence, the size and location inconsistency of the bounding boxes of detected objects, and the classification inconsistency of detected objects and their confidence. Extensive experiments on three benchmark vision datasets (OpenImages, COCO, and VOC) show that under no attack, ODEN can outperform existing ensemble methods by up to 9.33% of mAP. Compared to the low mAP of 2.64~18.07% under four evasion attacks, ODEN can maintain a high mAP of 58.97~86.00%, achieving up to an 82.44% increase in AI safety.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Ka-Ho Chow;Ling Liu;Myungjin Lee;Ramana Kompella", "authorids": "~Ka-Ho_Chow1;~Ling_Liu1;myungjle@cisco.com;rkompell@cisco.com", "gender": "M;;;", "homepage": "https://khchow.com;;;", "dblp": "51/8062.html;;;", "google_scholar": "2spsaBIAAAAJ;;;", "orcid": "0000-0001-5917-2577;;;", "linkedin": "khchow;;;", "or_profile": "~Ka-Ho_Chow1;~Ling_Liu1;myungjle@cisco.com;rkompell@cisco.com", "aff": "Georgia Institute of Technology;;;", "aff_domain": "gatech.edu;;;", "position": "PhD student;;;", "bibtex": "@misc{\nchow2023protecting,\ntitle={Protecting {DNN} from Evasion Attacks using Ensemble of High Focal Diversity},\nauthor={Ka-Ho Chow and Ling Liu and Myungjin Lee and Ramana Kompella},\nyear={2023},\nurl={https://openreview.net/forum?id=65P6pfeT3eg}\n}", "github": "", "project": "", "reviewers": "f97e;6kBU;pyAP", "site": "https://openreview.net/forum?id=65P6pfeT3eg", "pdf_size": 11878466, "recommendation": "3;3;5", "confidence": "3;2;4", "correctness": "3;3;3", "technical_novelty": "2;2;4", "empirical_novelty": "0;2;4", "wc_summary_paper": "75;87;52", "wc_strength_and_weaknesses": "518;253;443", "wc_clarity_quality_novelty_and_reproducibility": "53;215;34", "wc_summary_review": "65;47;48", "wc_review": "711;602;577", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.0, 1.632993161855452 ], "wc_summary_paper_avg": [ 71.33333333333333, 14.522013940527977 ], "wc_strength_and_weaknesses_avg": [ 404.6666666666667, 111.52976682881074 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 100.66666666666667, 81.21713010333609 ], "wc_summary_review_avg": [ 53.333333333333336, 8.259674462242579 ], "wc_review_avg": [ 630.0, 58.1778881248423 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.8660254037844387, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14111240927057485854&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "A Graph Neural Network Approach to Automated Model Building in Cryo-EM Maps", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12154", "id": "65XDF_nwI61", "poster": "", "openreview": "https://openreview.net/forum?id=65XDF_nwI61", "slides": "https://iclr.cc/virtual/2023/poster/12154", "video": "https://iclr.cc/virtual/2023/poster/12154", "author_site": "Kiarash Jamali, Dari Kimanius, Sjors Scheres", "tldr": "Using graph neural networks to automatically build atomic models in cryo-EM maps", "abstract": "Electron cryo-microscopy (cryo-EM) produces three-dimensional (3D) maps of the electrostatic potential of biological macromolecules, including proteins. At sufficient resolution, the cryo-EM maps, along with some knowledge about the imaged molecules, allow de novo atomic modelling. Typically, this is done through a laborious manual process. Recent advances in machine learning applications to protein structure prediction show potential for automating this process. Taking inspiration from these techniques, we have built ModelAngelo for automated model building of proteins in cryo-EM maps. ModelAngelo first uses a residual convolutional neural network (CNN) to initialize a graph representation with nodes assigned to individual amino acids of the proteins in the map and edges representing the protein chain. The graph is then refined with a graph neural network (GNN) that combines the cryo-EM data, the amino acid sequence data and prior knowledge about protein geometries. The GNN refines the geometry of the protein chain and classifies the amino acids for each of its nodes. The final graph is post-processed with a hidden Markov model (HMM) search to map each protein chain to entries in a user provided sequence file. Application to 28 test cases shows that ModelAngelo outperforms state-of-the-art and approximates manual building for cryo-EM maps with resolutions better than 3.5 A.", "keywords": "cryo-em;model building;graph neural networks;attention networks;proteins", "primary_area": "", "supplementary_material": "/attachment/ceefb2f93d9581ba996c53fdeca1780efb08900a.zip", "author": "Kiarash Jamali;Dari Kimanius;Sjors HW Scheres", "authorids": "~Kiarash_Jamali1;~Dari_Kimanius1;~Sjors_HW_Scheres1", "gender": "M;M;M", "homepage": "https://jamaliki.github.io;;https://www2.mrc-lmb.cam.ac.uk/groups/scheres", "dblp": "230/4123;330/5398;", "google_scholar": ";noWvpR8AAAAJ;https://scholar.google.co.uk/citations?user=5VgYLcsAAAAJ", "orcid": ";0000-0002-2662-6373;0000-0002-0462-6540", "linkedin": "https://linkedin.com/in/kiarash-jamali-9b73a6171;;", "or_profile": "~Kiarash_Jamali1;~Dari_Kimanius1;~Sjors_HW_Scheres1", "aff": "University of Cambridge;MRC Laboratory of Molecular Biology;MRC Laboratory of Molecular Biology", "aff_domain": "cam.ac.uk;mrc-lmb.cam.ac.uk;mrc-lmb.cam.ac.uk", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\njamali2023a,\ntitle={A Graph Neural Network Approach to Automated Model Building in Cryo-{EM} Maps},\nauthor={Kiarash Jamali and Dari Kimanius and Sjors HW Scheres},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=65XDF_nwI61}\n}", "github": "", "project": "", "reviewers": "RqeM;K3kR;7rzk;9Yaz;AJLC", "pdf_size": 14190818, "recommendation": "5;5;6;8;8", "confidence": "4;4;3;2;4", "correctness": "3;3;4;3;3", "technical_novelty": "2;4;4;3;3", "empirical_novelty": "2;4;0;3;4", "wc_summary_paper": "94;146;269;85;102", "wc_strength_and_weaknesses": "241;467;261;242;159", "wc_clarity_quality_novelty_and_reproducibility": "39;55;63;59;17", "wc_summary_review": "19;118;54;40;48", "wc_review": "393;786;647;426;326", "wc_reply_reviewers": "122;0;66;21;0", "wc_reply_authors": "780;830;538;594;344", "reply_reviewers": "2;0;1;1;0", "reply_authors": "2;2;2;2;2", "recommendation_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 3.4, 0.8 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 3.2, 0.7483314773547882 ], "empirical_novelty_avg": [ 2.6, 1.4966629547095764 ], "wc_summary_paper_avg": [ 139.2, 68.20381221016902 ], "wc_strength_and_weaknesses_avg": [ 274.0, 102.71903426337302 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.6, 16.8949696655543 ], "wc_summary_review_avg": [ 55.8, 33.27701909726892 ], "wc_review_avg": [ 515.6, 172.85207548652693 ], "wc_reply_reviewers_avg": [ 41.8, 46.78632278775497 ], "wc_reply_authors_avg": [ 617.2, 175.06387405744223 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.51604684654214, "corr_recommendation_correctness": -0.14744195615489716, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16160756826179229779&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=65XDF_nwI61", "email": "cam.ac.uk;mrc-lmb.cam.ac.uk;mrc-lmb.cam.ac.uk", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Cambridge;Medical Research Council Laboratory of Molecular Biology", "aff_unique_dep": ";Laboratory of Molecular Biology", "aff_unique_url": "https://www.cam.ac.uk;https://mrc-lmb.cam.ac.uk", "aff_unique_abbr": "Cambridge;MRC LMB", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "663Cl-KetJ", "title": "Better with Less: Data-Active Pre-training of Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recently, pre-training on graph neural networks (GNNs) has become an active research area and is used to learn transferable knowledge for downstream tasks with unlabeled data. The success of graph pre-training models is often attributed to the massive amount of input data. In this paper, however, we identify the curse of big data phenomenon in graph pre-training: more training samples and graph datasets do not necessarily lead to better performance. Motivated by this observation, we propose a better-with-less framework for graph pre-training: few, but carefully chosen data are fed into a GNN model to enhance pre-training. This novel pre-training pipeline is called the data-active graph pre-training (APT) framework, and is composed of a graph selector and a pre-training model. The graph selector chooses the most representative and instructive data points based on the inherent properties of graphs as well as the predictive uncertainty. The proposed uncertainty, as feedback from the pre-training model, measures the confidence level of the model to the data. When fed with the chosen data, on the other hand, the pre-training model grasps an initial understanding of the new, unseen data, and at the same time attempts to remember the knowledge learnt from the previous data. Therefore, the integration and interaction between these two components form a unified framework, in which graph pre-training is performed in a progressive way. Experiment results show that the proposed APT framework is able to obtain an efficient pre-training model with fewer training data and better downstream performance.", "keywords": "Pre-training;Graph Neural Networks", "primary_area": "", "supplementary_material": "", "author": "Jiarong Xu;Renhong Huang;XIN JIANG;Yuxuan Cao;Carl Yang;Chunping Wang;Yang Yang", "authorids": "~Jiarong_Xu2;~Renhong_Huang1;~XIN_JIANG5;~Yuxuan_Cao1;~Carl_Yang1;~Chunping_Wang1;~Yang_Yang35", "gender": "F;M;M;F;M;F;M", "homepage": "https://galina0217.github.io/;https://github.com/renH2;https://jiangxjames.github.io/;https://scholar.google.com/citations?user=rwPrfJ0AAAAJ&hl=zh-CN;https://cs.emory.edu/~jyang71/;;http://yangy.org", "dblp": ";325/0914;;;305/0254;54/2715-1;", "google_scholar": ";;zs_h9Y4AAAAJ;;mOINlwcAAAAJ;Rmy5RogAAAAJ;", "orcid": "0000-0003-2973-1889;0000-0002-7808-9768;0000-0003-1231-8529;0009-0000-2867-8938;0000-0001-9145-4531;0000-0003-1854-8667;0000-0002-5058-4417", "linkedin": ";;;;;https://linkedin.com/in/chunping-wang-7b94a15/;", "or_profile": "~Jiarong_Xu2;~Renhong_Huang1;~XIN_JIANG5;~Yuxuan_Cao1;~Carl_Yang1;~Chunping_Wang1;~Yang_Yang35", "aff": "Fudan University;Zhejiang University;Lehigh University;Zhejiang University;Emory University;Finvolution Group;Zhejiang University", "aff_domain": "fudan.edu.cn;zju.edu.cn;lehigh.edu;zju.edu.cn;emory.edu;xinye.com;zju.edu.cn", "position": "Assistant Professor;MS student;Postdoc;MS student;Assistant Professor;Principal Scientist;Associate Professor", "bibtex": "@misc{\nxu2023better,\ntitle={Better with Less: Data-Active Pre-training of Graph Neural Networks},\nauthor={Jiarong Xu and Renhong Huang and XIN JIANG and Yuxuan Cao and Carl Yang and Chunping Wang and Yang Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=663Cl-KetJ}\n}", "github": "", "project": "", "reviewers": "pEvr;ajow;z7Ev;xmXz", "site": "https://openreview.net/forum?id=663Cl-KetJ", "pdf_size": 6978022, "recommendation": "3;3;6;8", "confidence": "3;4;3;4", "correctness": "3;3;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "64;77;55;173", "wc_strength_and_weaknesses": "347;838;216;279", "wc_clarity_quality_novelty_and_reproducibility": "16;6;3;79", "wc_summary_review": "31;6;3;68", "wc_review": "458;927;277;599", "wc_reply_reviewers": "0;0;23;17", "wc_reply_authors": "2775;3147;453;1376", "reply_reviewers": "0;0;1;1", "reply_authors": "6;6;1;2", "recommendation_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 92.25, 47.27248142418589 ], "wc_strength_and_weaknesses_avg": [ 420.0, 245.73868234366358 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.0, 30.97579700346708 ], "wc_summary_review_avg": [ 27.0, 26.04803255526221 ], "wc_review_avg": [ 565.25, 238.00879710632546 ], "wc_reply_reviewers_avg": [ 10.0, 10.222524150130436 ], "wc_reply_authors_avg": [ 1937.75, 1082.0488378534492 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.75, 2.277608394786075 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.23570226039551587, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1665875990764394230&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;1;3;4;1", "aff_unique_norm": "Fudan University;Zhejiang University;Lehigh University;Emory University;FinVolution Group", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.fudan.edu.cn;https://www.zju.edu.cn;https://www.lehigh.edu;https://www.emory.edu;https://www.finvolutiongroup.com", "aff_unique_abbr": "Fudan;ZJU;Lehigh;Emory;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1;0;0", "aff_country_unique": "China;United States" }, { "id": "66kLbXgU_ae", "title": "EXACT: Compositional Augmentation for Image-level Weakly-Supervised Instance Segmentation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We propose EXACT: EXtract-AugContext-pasTe, a compositional image augmentation pipeline for weakly-supervised instance segmentation using only image-level supervision. The proposed method consists of three main components. The first component generates high-quality foreground object masks. To this end, an EM-like approach is proposed that iteratively refines an initial set of object mask proposals generated by a generic entity segmentation method. Next, in the second component, high-quality context-aware background images are generated using a text-to-image compositional synthesis method like DALL-E. Finally, the third component creates a large-scale pseudo-labeled instance segmentation training dataset by compositing the foreground object masks onto the original and generated background images. The proposed approach achieves state-of-the-art weakly-supervised instance segmentation results on both the PASCAL VOC 2012 and MS COCO dataset by using only image-level, weak label information. In particular, it outperforms the best baseline by +7.4 and +2.8 mAP-0.50 on PASCAL and COCO, respectively. Further, the method provides a new solution to the long-tail weakly-supervised instance segmentation problem (when many classes may only have few training samples), by selectively augmenting under-represented classes.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yunhao Ge;Jiashu Xu;Brian Nlong Zhao;Laurent Itti;Vibhav Vineet", "authorids": "~Yunhao_Ge1;~Jiashu_Xu1;~Brian_Nlong_Zhao1;~Laurent_Itti1;~Vibhav_Vineet5", "gender": "M;M;M;M;", "homepage": "https://gyhandy.github.io/;https://cnut1648.github.io/;;http://ilab.usc.edu;", "dblp": "204/1908;;205/7046.html;31/3256;", "google_scholar": "https://scholar.google.ca/citations?user=QhjGr4oAAAAJ;0uYehJsAAAAJ;IhqFMeUAAAAJ;xhUvqK8AAAAJ;", "orcid": ";;;0000-0002-0168-2977;", "linkedin": "yunhao-ge-720727135/;jiashu-xu;;;", "or_profile": "~Yunhao_Ge1;~Jiashu_Xu1;~Brian_Nlong_Zhao1;~Laurent_Itti1;~Vibhav_Vineet5", "aff": "University of Southern California;Harvard University;University of Southern California;University of Southern California;", "aff_domain": "usc.edu;harvard.edu;usc.edu;usc.edu;", "position": "PhD student;MS student;Undergrad student;Professor;", "bibtex": "@misc{\nge2023exact,\ntitle={{EXACT}: Compositional Augmentation for Image-level Weakly-Supervised Instance Segmentation},\nauthor={Yunhao Ge and Jiashu Xu and Brian Nlong Zhao and Laurent Itti and Vibhav Vineet},\nyear={2023},\nurl={https://openreview.net/forum?id=66kLbXgU_ae}\n}", "github": "", "project": "", "reviewers": "8xwa;dua6;pkWn", "site": "https://openreview.net/forum?id=66kLbXgU_ae", "pdf_size": 14837286, "recommendation": "3;3;3", "confidence": "5;4;4", "correctness": "2;3;3", "technical_novelty": "1;2;2", "empirical_novelty": "0;2;2", "wc_summary_paper": "66;116;169", "wc_strength_and_weaknesses": "123;218;254", "wc_clarity_quality_novelty_and_reproducibility": "14;36;71", "wc_summary_review": "41;47;56", "wc_review": "244;417;550", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 117.0, 42.05551886098502 ], "wc_strength_and_weaknesses_avg": [ 198.33333333333334, 55.25898619731957 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.333333333333336, 23.47102232304526 ], "wc_summary_review_avg": [ 48.0, 6.164414002968976 ], "wc_review_avg": [ 403.6666666666667, 125.27924364749688 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:EgTzoBqY0_sJ:scholar.google.com/&scioq=EXACT:+Compositional+Augmentation+for+Image-level+Weakly-Supervised+Instance+Segmentation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Southern California;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "https://www.usc.edu;https://www.harvard.edu", "aff_unique_abbr": "USC;Harvard", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning a Data-Driven Policy Network for Pre-Training Automated Feature Engineering", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10705", "id": "688hNNMigVX", "poster": "/media/PosterPDFs/ICLR%202023/10705.png?t=1681830698.701655", "openreview": "https://openreview.net/forum?id=688hNNMigVX", "slides": "https://iclr.cc/virtual/2023/poster/10705", "video": "https://iclr.cc/virtual/2023/poster/10705", "author_site": "Liyao Li, Haobo Wang, Liangyu Zha, Qingyi Huang, Sai Wu, Gang Chen, Junbo Zhao", "tldr": "We propose a data-driven automated feature engineering framework Fetch.", "abstract": "Feature engineering is widely acknowledged to be pivotal in tabular data analysis and prediction. Automated feature engineering (AutoFE) emerged to automate this process managed by experienced data scientists and engineers conventionally. In this area, most \u2014 if not all \u2014 prior work adopted an identical framework from the neural architecture search (NAS) method. While feasible, we posit that the NAS framework very much contradicts the way how human experts cope with the data since the inherent Markov decision process (MDP) setup differs. We point out that its data-unobserved setup consequentially results in an incapability to generalize across different datasets as well as also high computational cost. This paper proposes a novel AutoFE framework Feature Set Data-Driven Search (FETCH), a pipeline mainly for feature generation and selection. Notably, FETCH is built on a brand-new data-driven MDP setup using the tabular dataset as the state fed into the policy network. Further, we posit that the crucial merit of FETCH is its transferability where the yielded policy network trained on a variety of datasets is indeed capable to enact feature engineering on unseen data, without requiring additional exploration. To the best of our knowledge, this is a pioneer attempt to build a tabular data pre-training paradigm via AutoFE. Extensive experiments show that FETCH systematically surpasses the current state-of-the-art AutoFE methods and validates the transferability of AutoFE pre-training.", "keywords": "Automated Feature Engineering;Reinforcement Learning;Tabular Data;Data-Driven;Pre-Training", "primary_area": "", "supplementary_material": "/attachment/56034b2cc3d9067f2e11f522ad69d1914f47261d.zip", "author": "Liyao Li;Haobo Wang;Liangyu Zha;Qingyi Huang;Sai Wu;Gang Chen;Junbo Zhao", "authorids": "~Liyao_Li1;~Haobo_Wang1;~Liangyu_Zha1;~Qingyi_Huang1;~Sai_Wu2;~Gang_Chen6;~Junbo_Zhao1", "gender": "M;M;;M;M;M;M", "homepage": "https://leopold-dev.github.io/;https://hbzju.github.io/;;https://person.zju.edu.cn/0011057;;http://jakezhao.net/;https://weibo.com/u/5626217150", "dblp": "133/3737;;;30/1186.html;67/6383-1;191/6665;", "google_scholar": "HkwGSt4AAAAJ;DnN-rggAAAAJ;77h4exkAAAAJ;RMaqDKAAAAAJ;;8ipao8MAAAAJ;", "orcid": "0009-0005-5235-1982;0000-0001-8586-3048;;;0000-0002-7483-0045;;", "linkedin": ";;;;;;", "or_profile": "~Liyao_Li1;~Haobo_Wang1;~Qingyi_Huang1;~Sai_Wu2;~Gang_Chen6;~Junbo_Zhao1;~Hellon_Leo1", "aff": "Zhejiang University;Zhejiang University;;Zhejiang University;College of Computer Science and Technology, Zhejiang University;Zhejiang University;", "aff_domain": "zju.edu.cn;zju.edu.cn;;zju.edu.cn;cs.zju.edu.cn;zju.edu.cn;", "position": "PhD student;PhD student;;Full Professor;Full Professor;Assistant Professor;", "bibtex": "@inproceedings{\nli2023learning,\ntitle={Learning a Data-Driven Policy Network for Pre-Training Automated Feature Engineering},\nauthor={Liyao Li and Haobo Wang and Liangyu Zha and Qingyi Huang and Sai Wu and Gang Chen and Junbo Zhao},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=688hNNMigVX}\n}", "github": "", "project": "", "reviewers": "B9Km;Y4Ri;vD6x", "pdf_size": 2805448, "recommendation": "8;8;8", "confidence": "3;5;4", "correctness": "4;4;3", "technical_novelty": "4;3;3", "empirical_novelty": "4;4;3", "wc_summary_paper": "119;49;187", "wc_strength_and_weaknesses": "236;46;182", "wc_clarity_quality_novelty_and_reproducibility": "33;37;155", "wc_summary_review": "17;156;125", "wc_review": "405;288;649", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "456;502;372", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 118.33333333333333, 56.3402362634576 ], "wc_strength_and_weaknesses_avg": [ 154.66666666666666, 79.9388655299925 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 75.0, 56.592107812544555 ], "wc_summary_review_avg": [ 99.33333333333333, 59.578146627396485 ], "wc_review_avg": [ 447.3333333333333, 150.3869084136722 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 443.3333333333333, 53.82275437850509 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16572206393407030807&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=688hNNMigVX", "email": "zju.edu.cn;zju.edu.cn;;zju.edu.cn;cs.zju.edu.cn;zju.edu.cn;", "author_num": 7, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "692oJ-QFuMC", "title": "Teacher Intervention: Improving Convergence of Quantization Aware Training for Ultra-Low Precision Transformers", "track": "main", "status": "Withdraw", "tldr": "Efficient and accurate two-step Quantization Aware Training method of Finetuned Transformers", "abstract": "Pre-trained Transformer models such as BERT have shown great success in a wide range of applications, but with the cost of substantial increases in model complexity. Quantization-aware training (QAT) is a promising way to lower the implementation cost and energy consumption. However, aggressive quantization below 2-bit causes considerable accuracy degradation, especially when the downstream dataset is not abundant.\nThis work proposes a proactive knowledge distillation method called Teacher Intervention (TI) for fast converging QAT of ultra-low precision pre-trained Transformers. TI intervenes layer-wise signal propagation with the intact signal from the teacher to remove the interference of propagated quantization errors, smoothing loss surface and expediting the convergence. We further propose a gradual intervention mechanism to stabilize the tuning of the feed-forward network and recover the self-attention map in steps. The proposed scheme enables fast convergence of QAT and improves the model accuracy regardless of the diverse characteristics of downstream fine-tuning tasks. We demonstrate that TI consistently achieves superior accuracy with lower fine-tuning budget. ", "keywords": "Deep Learning;Quantization;QAT;Self-Attention;Transformer;BERT", "primary_area": "", "supplementary_material": "", "author": "Minsoo Kim;Kyuhong Shim;Seongmin Park;Wonyong Sung;Jungwook Choi", "authorids": "~Minsoo_Kim2;~Kyuhong_Shim1;~Seongmin_Park1;~Wonyong_Sung1;~Jungwook_Choi1", "gender": "M;M;;;M", "homepage": "https://marsjacobs.github.io;https://sites.google.com/view/khshim;https://jchoi-hyu.github.io;;", "dblp": ";209/4981;97/4757-3;22/1975;97/4140", "google_scholar": "https://scholar.google.co.kr/citations?hl=ko;https://scholar.google.co.kr/citations?user=msFkCLEAAAAJ;;https://scholar.google.co.kr/citations?user=1IfNFz4AAAAJ;YPT98zwAAAAJ", "orcid": ";0000-0002-0123-3100;;0000-0001-8801-210X;", "linkedin": "minsoo-kim-37268a1b0/;;;;jungwook-choi-5854996b/", "or_profile": "~Minsoo_Kim2;~Kyuhong_Shim1;~Seongmin_Park1;~Wonyong_Sung1;~Jungwook_Choi1", "aff": "Hanyang University;Qualcomm Inc, Qualcomm;Hanyang University;Seoul National University;Hanyang University", "aff_domain": "hanyang.ac.kr;qti.qualcomm.com;hanyang.ac.kr;snu.ac.kr;hanyang.ac.kr", "position": "PhD student;Researcher;PhD student;Emeritus;Assistant Professor", "bibtex": "@misc{\nkim2023teacher,\ntitle={Teacher Intervention: Improving Convergence of Quantization Aware Training for Ultra-Low Precision Transformers},\nauthor={Minsoo Kim and Kyuhong Shim and Seongmin Park and Wonyong Sung and Jungwook Choi},\nyear={2023},\nurl={https://openreview.net/forum?id=692oJ-QFuMC}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=692oJ-QFuMC", "pdf_size": 914956, "recommendation": "", "confidence": "", "correctness": "", "technical_novelty": "", "empirical_novelty": "", "wc_summary_paper": "", "wc_strength_and_weaknesses": "", "wc_clarity_quality_novelty_and_reproducibility": "", "wc_summary_review": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "recommendation_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "correctness_avg": [ 0, 0 ], "technical_novelty_avg": [ 0, 0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 0, 0 ], "wc_strength_and_weaknesses_avg": [ 0, 0 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 0, 0 ], "wc_summary_review_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 1, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0, "corr_recommendation_correctness": 0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6976969944035637955&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Hanyang University;Qualcomm Incorporated;Seoul National University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.hanyang.ac.kr;https://www.qualcomm.com;https://www.snu.ac.kr", "aff_unique_abbr": "HYU;Qualcomm;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "South Korea;United States" }, { "id": "69MODRAL5u8", "title": "A Theory of Equivalence-Preserving Program Embeddings", "track": "main", "status": "Reject", "tldr": "We develop a theory of program embeddings that preserve semantic equivalence and show when they are tractable to compute", "abstract": "Program embeddings are used to solve tasks such as \\textit{code clone detection} and \\textit{semantic labeling}. Solutions to these \\textit{semantic tasks} should be invariant to semantics-preserving program transformations. When a program embedding function satisfies this invariance, we call it an \\textit{equivalence-preserving program embedding function}. We say a programming language can be \\textit{tractably embedded} when we can construct an equivalence-preserving program embedding function that executes in polynomial time in program/input length and produces program embeddings that are proportional to the input length. Determining whether a programming language can be tractably embedded is the \\textit{equivalence-preserving program embedding problem}. We formalize this problem and theoretically characterize when programming languages can be tractably embedded. To validate our theoretical results, we use the BERT-Tiny model to learn an equivalence-preserving program embedding function for a programming language that can be tractably embedded and show the model fails to construct an equivalence-preserving program embedding function for a similar language that is intractable to embed.\n", "keywords": "Programming Languages;Program Embeddings;Code;Big Code", "primary_area": "", "supplementary_material": "", "author": "Logan Weber;Jesse Michel;Alex Renda;Saman Amarasinghe;Michael Carbin", "authorids": "~Logan_Weber1;~Jesse_Michel1;~Alex_Renda2;~Saman_Amarasinghe1;~Michael_Carbin1", "gender": ";;M;M;M", "homepage": "https://weberlo.github.io;http://web.mit.edu/jmmichel/www/;https://alexrenda.com;http://people.csail.mit.edu/saman/;http://people.csail.mit.edu/mcarbin/", "dblp": ";;206/6568;a/SPAmarasinghe;07/3119", "google_scholar": ";;4BCuJ2AAAAAJ;https://scholar.google.com.tw/citations?user=cF6i_goAAAAJ;mtejbKYAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Logan_Weber1;~Jesse_Michel1;~Alex_Renda2;~Saman_Amarasinghe1;~Michael_Carbin1", "aff": "Massachusetts Institute of Technology;MIT;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;csail.mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@misc{\nweber2023a,\ntitle={A Theory of Equivalence-Preserving Program Embeddings},\nauthor={Logan Weber and Jesse Michel and Alex Renda and Saman Amarasinghe and Michael Carbin},\nyear={2023},\nurl={https://openreview.net/forum?id=69MODRAL5u8}\n}", "github": "", "project": "", "reviewers": "Pnio;3JjL;c7mJ;M5Qy", "site": "https://openreview.net/forum?id=69MODRAL5u8", "pdf_size": 1950167, "recommendation": "3;3;5;5", "confidence": "4;3;5;4", "correctness": "3;3;4;4", "technical_novelty": "1;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "71;50;34;62", "wc_strength_and_weaknesses": "208;83;202;302", "wc_clarity_quality_novelty_and_reproducibility": "147;21;31;31", "wc_summary_review": "32;40;53;112", "wc_review": "458;194;320;507", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "491;0;159;490", "reply_reviewers": "0;0;0;0", "reply_authors": "1;0;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 54.25, 13.863170633011771 ], "wc_strength_and_weaknesses_avg": [ 198.75, 77.70899240113721 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.5, 51.83386923624359 ], "wc_summary_review_avg": [ 59.25, 31.363792819109108 ], "wc_review_avg": [ 369.75, 122.46300461772118 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 285.0, 213.05046350571502 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.75, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.7071067811865475, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zsVmTVcYOocJ:scholar.google.com/&scioq=A+Theory+of+Equivalence-Preserving+Program+Embeddings&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Neuroevolution is a Competitive Alternative to Reinforcement Learning for Skill Discovery", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10722", "id": "6BHlZgyPOZY", "poster": "", "openreview": "https://openreview.net/forum?id=6BHlZgyPOZY", "slides": "https://iclr.cc/virtual/2023/poster/10722", "video": "https://iclr.cc/virtual/2023/poster/10722", "author_site": "Felix Chalumeau, Raphael Boige, Bryan Lim, Valentin Mac\u00e9, Maxime Allard, Arthur Flajolet, Antoine Cully, Thomas PIERROT", "tldr": "", "abstract": "Deep Reinforcement Learning (RL) has emerged as a powerful paradigm for training neural policies to solve complex control tasks. However, these policies tend to be overfit to the exact specifications of the task and environment they were trained on, and thus do not perform well when conditions deviate slightly or when composed hierarchically to solve even more complex tasks. Recent work has shown that training a mixture of policies, as opposed to a single one, that are driven to explore different regions of the state-action space can address this shortcoming by generating a diverse set of behaviors, referred to as skills, that can be collectively used to great effect in adaptation tasks or for hierarchical planning. This is typically realized by including a diversity term - often derived from information theory - in the objective function optimized by RL. However these approaches often require careful hyperparameter tuning to be effective. In this work, we demonstrate that less widely-used neuroevolution methods, specifically Quality Diversity (QD), are a competitive alternative to information-theory-augmented RL for skill discovery. Through an extensive empirical evaluation comparing eight state-of-the-art algorithms (four flagship algorithms from each line of work) on the basis of (i) metrics directly evaluating the skills' diversity, (ii) the skills' performance on adaptation tasks, and (iii) the skills' performance when used as primitives for hierarchical planning; QD methods are found to provide equal, and sometimes improved, performance whilst being less sensitive to hyperparameters and more scalable. As no single method is found to provide near-optimal performance across all environments, there is a rich scope for further research which we support by proposing future directions and providing optimized open-source implementations.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Felix Chalumeau;Raphael Boige;Bryan Lim;Valentin Mac\u00e9;Maxime Allard;Arthur Flajolet;Antoine Cully;Thomas PIERROT", "authorids": "~Felix_Chalumeau1;~Raphael_Boige1;~Bryan_Lim2;~Valentin_Mac\u00e91;~Maxime_Allard1;~Arthur_Flajolet2;~Antoine_Cully1;~Thomas_PIERROT1", "gender": "M;M;M;M;;;M;M", "homepage": ";https://github.com/Egiob;;;https://maximeallard.lu/;;;", "dblp": "286/1636;;;;;;https://dblp.org/pers/c/Cully:Antoine.html;228/7739", "google_scholar": "YAC6ZzIAAAAJ;;OpxLH5cAAAAJ;bzIEjccAAAAJ;LJILpisAAAAJ;;rZtJlPQAAAAJ;https://scholar.google.fr/citations?user=0zBiyNUAAAAJ", "orcid": ";;;;0000-0001-5433-9593;;;0000-0002-5227-6194", "linkedin": "f%C3%A9lix-chalumeau-083457172/;;;valentinmace/;;;;thomas-pierrot-120a43128/", "or_profile": "~Felix_Chalumeau1;~Raphael_Boige1;~Bryan_Lim2;~Valentin_Mac\u00e91;~Maxime_Allard1;~Arthur_Flajolet2;~Antoine_Cully1;~Thomas_PIERROT1", "aff": "InstaDeep;InstaDeep;Imperial College London;;Imperial College London, Imperial College London;;Imperial College London;Universit\u00e9 Pierre et Marie Curie - Paris 6, Computer Science Lab - Pierre and Marie Curie University, Paris, France", "aff_domain": "instadeep.com;instadeep.com;imperial.ac.uk;;imperial.ac.uk;;imperial.ac.uk;isir.upmc.fr", "position": "Researcher;Researcher;PhD student;;PhD student;;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nchalumeau2023neuroevolution,\ntitle={Neuroevolution is a Competitive Alternative to Reinforcement Learning for Skill Discovery},\nauthor={Felix Chalumeau and Raphael Boige and Bryan Lim and Valentin Mac{\\'e} and Maxime Allard and Arthur Flajolet and Antoine Cully and Thomas PIERROT},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6BHlZgyPOZY}\n}", "github": "", "project": "", "reviewers": "UYjV;yJjm;rzaa;hXTi", "pdf_size": 8571726, "recommendation": "5;6;8;8", "confidence": "3;3;3;4", "correctness": "3;4;4;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "1;4;3;3", "wc_summary_paper": "87;69;64;71", "wc_strength_and_weaknesses": "452;173;106;365", "wc_clarity_quality_novelty_and_reproducibility": "112;69;18;98", "wc_summary_review": "104;29;47;43", "wc_review": "755;340;235;577", "wc_reply_reviewers": "0;0;18;37", "wc_reply_authors": "1976;1169;410;788", "reply_reviewers": "0;0;1;1", "reply_authors": "3;2;1;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 72.75, 8.613216588476108 ], "wc_strength_and_weaknesses_avg": [ 274.0, 139.99107114384117 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 74.25, 35.9887135085432 ], "wc_summary_review_avg": [ 55.75, 28.647643882176418 ], "wc_review_avg": [ 476.75, 202.86494891922558 ], "wc_reply_reviewers_avg": [ 13.75, 15.303185942802891 ], "wc_reply_authors_avg": [ 1085.75, 579.8208236170894 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.5555555555555555, "corr_recommendation_correctness": 0.7777777777777777, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2436473677846335570&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=6BHlZgyPOZY", "email": "instadeep.com;instadeep.com;imperial.ac.uk;;imperial.ac.uk;;imperial.ac.uk;isir.upmc.fr", "author_num": 8, "aff_unique_index": "0;0;1;1;1;2", "aff_unique_norm": "InstaDeep;Imperial College London;Universit\u00e9 Pierre et Marie Curie - Paris 6", "aff_unique_dep": ";;Computer Science Lab", "aff_unique_url": "https://www.instadeep.com;https://www.imperial.ac.uk;https://www.upmc.fr", "aff_unique_abbr": "InstaDeep;ICL;UPMC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United Kingdom;France" }, { "id": "6BLZcpw1sh", "title": "Image Classification by Throwing Quantum Kitchen Sinks at Tensor Networks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Several variational quantum circuit approaches to machine learning have been proposed in recent years, with one promising class of variational algorithms involving tensor networks operating on states resulting from local feature maps. In contrast, a random feature approach known as quantum kitchen sinks provides comparable performance, but leverages non-local feature maps. Here we combine these two approaches by proposing a new circuit ansatz where a tree tensor network coherently processes the non-local feature maps of quantum kitchen sinks, and we run numerical experiments to empirically evaluate the performance of image classification with the new ansatz. From the perspective of classification performance, we find that simply combining quantum kitchen sinks with tensor networks yields no qualitative improvements. However, the addition of feature optimization greatly boosts performance, leading to state-of-the-art quantum circuits for image classification, requiring only shallow circuits and a small number of qubits -- both well within reach of near-term quantum devices.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Nathan Kodama;Alex Bocharov;Marcus Silva", "authorids": "~Nathan_Kodama1;~Alex_Bocharov1;~Marcus_Silva1", "gender": "M;M;M", "homepage": ";;https://marcusps.github.io/", "dblp": ";40/11190;", "google_scholar": ";;qsKgI6AAAAAJ", "orcid": ";;0000-0002-6641-8712", "linkedin": "nathan-x-kodama;alexei-bocharov-4342572/;", "or_profile": "~Nathan_Kodama1;~Alex_Bocharov1;~Marcus_Silva1", "aff": "Case Western Reserve University;;Microsoft", "aff_domain": "case.edu;;microsoft.com", "position": "PhD student;;Principal Researcher", "bibtex": "@misc{\nkodama2023image,\ntitle={Image Classification by Throwing Quantum Kitchen Sinks at Tensor Networks},\nauthor={Nathan Kodama and Alex Bocharov and Marcus Silva},\nyear={2023},\nurl={https://openreview.net/forum?id=6BLZcpw1sh}\n}", "github": "", "project": "", "reviewers": "kzUp;hxnH;UuL1;H8ve", "site": "https://openreview.net/forum?id=6BLZcpw1sh", "pdf_size": 2409041, "recommendation": "3;3;5;5", "confidence": "5;4;4;3", "correctness": "3;2;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "30;56;48;94", "wc_strength_and_weaknesses": "58;110;138;144", "wc_clarity_quality_novelty_and_reproducibility": "13;7;116;49", "wc_summary_review": "30;14;95;54", "wc_review": "131;187;397;341", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 57.0, 23.345235059857504 ], "wc_strength_and_weaknesses_avg": [ 112.5, 33.98161267509239 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.25, 43.355362990061565 ], "wc_summary_review_avg": [ 48.25, 30.515364982251153 ], "wc_review_avg": [ 264.0, 108.66922287382016 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.7071067811865475, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1490989148032697983&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Case Western Reserve University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.case.edu;https://www.microsoft.com", "aff_unique_abbr": "CWRU;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "6BO4lP8K1N1", "title": "Cutting Long Gradient Flows: Decoupling End-to-End Backpropagation Based on Supervised Contrastive Learning", "track": "main", "status": "Reject", "tldr": "We cut long gradient flows into multiple shorter ones and maintain comparable test accuracy.", "abstract": "End-to-end backpropagation (BP) is the foundation of current deep learning technology. Unfortunately, as a network becomes deeper, BP becomes inefficient for various reasons. This paper proposes a new methodology for decoupling BP to transform a long gradient flow into multiple short ones in order to address the optimization issues caused by long gradient flows. We report thorough experiments conducted to illustrate the effectiveness of our model compared with BP and associated learning (AL), a state-of-the-art methodology for backpropagation decoupling. We will release the source code for the experiments after acceptance.\n", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/2400a5d5b62029f128ec4d9d16acfe6ecbd0a462.zip", "author": "Cheng-Kai Wang;Hung-Hsuan Chen", "authorids": "ckwang9805@gmail.com;~Hung-Hsuan_Chen1", "gender": ";", "homepage": ";", "dblp": ";13/1892", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "ckwang9805@gmail.com;~Hung-Hsuan_Chen1", "aff": ";National Central University, Taiwan", "aff_domain": ";ncu.edu.tw", "position": ";Associa", "bibtex": "@misc{\nwang2023cutting,\ntitle={Cutting Long Gradient Flows: Decoupling End-to-End Backpropagation Based on Supervised Contrastive Learning},\nauthor={Cheng-Kai Wang and Hung-Hsuan Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=6BO4lP8K1N1}\n}", "github": "", "project": "", "reviewers": "XPBU;UdqE;Xd2U;tuDH", "site": "https://openreview.net/forum?id=6BO4lP8K1N1", "pdf_size": 399384, "recommendation": "3;5;6;6", "confidence": "4;4;4;3", "correctness": "2;4;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "68;88;162;128", "wc_strength_and_weaknesses": "227;228;271;204", "wc_clarity_quality_novelty_and_reproducibility": "72;120;57;81", "wc_summary_review": "40;64;80;97", "wc_review": "407;500;570;510", "wc_reply_reviewers": "184;135;63;27", "wc_reply_authors": "970;1089;359;157", "reply_reviewers": "1;1;1;1", "reply_authors": "3;4;3;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 111.5, 36.2870500316573 ], "wc_strength_and_weaknesses_avg": [ 232.5, 24.212600025606502 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 82.5, 23.286262044390035 ], "wc_summary_review_avg": [ 70.25, 21.00446381129497 ], "wc_review_avg": [ 496.75, 58.3239873465455 ], "wc_reply_reviewers_avg": [ 102.25, 61.15298439160594 ], "wc_reply_authors_avg": [ 643.75, 394.55505002470824 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.4714045207910316, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jEAgwsHs8HkJ:scholar.google.com/&scioq=Cutting+Long+Gradient+Flows:+Decoupling+End-to-End+Backpropagation+Based+on+Supervised+Contrastive+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "National Central University", "aff_unique_dep": "", "aff_unique_url": "https://www.ncu.edu.tw", "aff_unique_abbr": "NCU", "aff_campus_unique_index": "0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "6BZJ_zn7ez7", "title": "PatchBlender: A Motion Prior for Video Transformers", "track": "main", "status": "Reject", "tldr": "We introduce PatchBlender, a learnable blending function that operates over patch embeddings across the temporal dimension of the latent space.", "abstract": "Transformers have become one of the dominant architectures in the field of computer vision. However, there are yet several challenges when applying such architectures to video data. Most notably, these models struggle to model the temporal patterns of video data effectively. Directly targeting this issue, we introduce PatchBlender, a learnable blending function that operates over patch embeddings across the temporal dimension of the latent space. We show that our method is successful at enabling vision transformers to encode the temporal component of video data. On Something-Something v2 and MOVi-A, we show that our method improves the performance of a ViT-B. PatchBlender has the advantage of being compatible with almost any Transformer architecture and since it is learnable, the model can adaptively turn on or off the prior. It is also extremely lightweight compute-wise, 0.005% the GFLOPs of a ViT-B.", "keywords": "transformer;vit;vision;video;prior;temporal;pattern;dimension;latent;time;motion;attention;smoothing;blending;smooth;blend;patch;patchblender;inductive bias;kinetics;kinetics400;ssv2;something-something;something something;kubric;movia;movi-a", "primary_area": "", "supplementary_material": "", "author": "Gabriele Prato;Yale Song;Janarthanan Rajendran;R Devon Hjelm;Neel Joshi;Sarath Chandar", "authorids": "~Gabriele_Prato1;~Yale_Song1;~Janarthanan_Rajendran2;~R_Devon_Hjelm1;~Neel_Joshi1;~Sarath_Chandar1", "gender": ";M;;M;;M", "homepage": ";https://people.csail.mit.edu/yalesong;;;;http://sarathchandar.in/", "dblp": ";31/9606.html;;195/5928;;45/8542", "google_scholar": ";dNHNpxoAAAAJ;;https://scholar.google.ca/citations?user=68c5HfwAAAAJ;;https://scholar.google.co.in/citations?user=yxWtZLAAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Gabriele_Prato1;~Yale_Song1;~Janarthanan_Rajendran2;~R_Devon_Hjelm1;~Neel_Joshi1;~Sarath_Chandar1", "aff": ";FAIR, Meta;;Apple;;\u00c9cole Polytechnique de Montr\u00e9al", "aff_domain": ";meta.com;;apple.com;;polymtl.ca", "position": ";Research Scientist;;Researcher;;Assistant Professor", "bibtex": "@misc{\nprato2023patchblender,\ntitle={PatchBlender: A Motion Prior for Video Transformers},\nauthor={Gabriele Prato and Yale Song and Janarthanan Rajendran and R Devon Hjelm and Neel Joshi and Sarath Chandar},\nyear={2023},\nurl={https://openreview.net/forum?id=6BZJ_zn7ez7}\n}", "github": "", "project": "", "reviewers": "4JAh;4WoX;rJNz", "site": "https://openreview.net/forum?id=6BZJ_zn7ez7", "pdf_size": 7274863, "recommendation": "3;3;5", "confidence": "4;5;4", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "44;46;72", "wc_strength_and_weaknesses": "116;245;247", "wc_clarity_quality_novelty_and_reproducibility": "44;45;41", "wc_summary_review": "27;97;49", "wc_review": "231;433;409", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "128;336;284", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 54.0, 12.754084313139327 ], "wc_strength_and_weaknesses_avg": [ 202.66666666666666, 61.28802674439945 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.333333333333336, 1.699673171197595 ], "wc_summary_review_avg": [ 57.666666666666664, 29.227080289043965 ], "wc_review_avg": [ 357.6666666666667, 90.1011776960891 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 249.33333333333334, 88.38300490227493 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bSOGZw6-8lcJ:scholar.google.com/&scioq=PatchBlender:+A+Motion+Prior+for+Video+Transformers&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Meta;Apple;\u00c9cole Polytechnique de Montr\u00e9al", "aff_unique_dep": "Facebook AI Research (FAIR);Apple Inc.;", "aff_unique_url": "https://meta.com;https://www.apple.com;https://www.polymtl.ca", "aff_unique_abbr": "Meta;Apple;Polytechnique Montr\u00e9al", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montr\u00e9al", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Canada" }, { "id": "6BdJ5G5wEdp", "title": "MSQ-BioBERT: Ambiguity Resolution to Enhance BioBERT Medical Question-Answering", "track": "main", "status": "Withdraw", "tldr": "A way to improve BioBERT Question-answering using multiple synonymous questions.", "abstract": "Bidirectional Encoder Representations from Transformers (BERT) and its biomedical variation (BioBERT) achieve impressive results on the SQuAD or medical question-answering (QA) datasets, and so they are widely used for a variety of passage-based QA tasks. However, their performances rapidly deteriorate when encountering passage and context ambiguities. This issue is prevalent and unavoidable in many fields, notably the medical field. To address this issue, we introduce a novel approach called the Multiple Synonymous Questions BioBERT (MSQ-BioBERT), which integrates question augmentation, rather than the typical single question used by traditional BioBERT, to elevate performance. Experiments with both an ambiguous medical dataset and open biomedical datasets demonstrate the significant performance gains of the MSQ-BioBERT approach, showcasing a new method for addressing ambiguity in QA tasks. ", "keywords": "Question answering;Question augmentation;BioBERT;Matrix approximation", "primary_area": "", "supplementary_material": "", "author": "Muzhe Guo;Muhao Guo;Edward T. Dougherty;Fang Jin", "authorids": "~Muzhe_Guo1;~Muhao_Guo1;edougherty@rwu.edu;fangjin@gwu.edu", "gender": "M;M;;", "homepage": "https://blogs.gwu.edu/muzheguo/;;;", "dblp": "311/8456;345/6430;;", "google_scholar": ";wIOmifAAAAAJ;;", "orcid": "0000-0001-6701-3111;0000-0002-9890-8214;;", "linkedin": "muzhe-guo-aa43a9166/;muhaoguo/;;", "or_profile": "~Muzhe_Guo1;~Muhao_Guo1;edougherty@rwu.edu;fangjin@gwu.edu", "aff": "George Washington University;Arizona State University;;", "aff_domain": "gwu.edu;asu.edu;;", "position": "PhD student;PhD student;;", "bibtex": "@misc{\nguo2023msqbiobert,\ntitle={{MSQ}-Bio{BERT}: Ambiguity Resolution to Enhance Bio{BERT} Medical Question-Answering},\nauthor={Muzhe Guo and Muhao Guo and Edward T. Dougherty and Fang Jin},\nyear={2023},\nurl={https://openreview.net/forum?id=6BdJ5G5wEdp}\n}", "github": "", "project": "", "reviewers": "BDKD;veEq;87VH", "site": "https://openreview.net/forum?id=6BdJ5G5wEdp", "pdf_size": 2118356, "recommendation": "3;5;5", "confidence": "5;4;4", "correctness": "2;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "42;82;133", "wc_strength_and_weaknesses": "357;223;170", "wc_clarity_quality_novelty_and_reproducibility": "97;43;22", "wc_summary_review": "87;32;34", "wc_review": "583;380;359", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 85.66666666666667, 37.2409571424915 ], "wc_strength_and_weaknesses_avg": [ 250.0, 78.69349824900827 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.0, 31.591137997862628 ], "wc_summary_review_avg": [ 51.0, 25.468935326524086 ], "wc_review_avg": [ 440.6666666666667, 101.00935050226235 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.9999999999999997, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15692311037579596490&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "George Washington University;Arizona State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gwu.edu;https://www.asu.edu", "aff_unique_abbr": "GWU;ASU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "6FAWzRMRk7A", "title": "Correcting Three Existing Beliefs on Mutual Information in Contrastive Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Contrastive learning has played a pivotal role in the recent success of unsupervised representation learning. It has been commonly explained with instance discrimination and a mutual information loss, and some of the fundamental explanations are based on mutual information analysis. In this work, we develop new methods that enable rigorous analysis of mutual information in contrastive learning. Using the methods, we investigate three existing beliefs and show that they are incorrect. Based on the investigation results, we address two issues in the discussion section. In particular, we question if contrastive learning is indeed an unsupervised representation learning method because the current framework of contrastive learning relies on validation performance for tuning the augmentation design.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Kyungeun Lee;Jaeill Kim;Suhyun Kang;Wonjong Rhee", "authorids": "~Kyungeun_Lee1;~Jaeill_Kim1;~Suhyun_Kang1;~Wonjong_Rhee1", "gender": "F;M;M;", "homepage": "https://sites.google.com/view/cvkyungeunlee/;https://sites.google.com/view/jaeillkim;;http://drl.snu.ac.kr", "dblp": "230/3844;311/1999;;37/711", "google_scholar": "ASy-_MEAAAAJ;kVJRl3wAAAAJ;YSqgT04AAAAJ;https://scholar.google.co.kr/citations?user=htFuYWsAAAAJ", "orcid": "0000-0002-1674-7147;;;0000-0002-2590-8774", "linkedin": ";;suhyun-kang-58b170259/;wonjong/", "or_profile": "~Kyungeun_Lee1;~Jaeill_Kim1;~Suhyun_Kang1;~Wonjong_Rhee1", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@misc{\nlee2023correcting,\ntitle={Correcting Three Existing Beliefs on Mutual Information in Contrastive Learning},\nauthor={Kyungeun Lee and Jaeill Kim and Suhyun Kang and Wonjong Rhee},\nyear={2023},\nurl={https://openreview.net/forum?id=6FAWzRMRk7A}\n}", "github": "", "project": "", "reviewers": "qUxp;nSSh;jSDs;zEXJ;YYx6", "site": "https://openreview.net/forum?id=6FAWzRMRk7A", "pdf_size": 18544062, "recommendation": "3;3;3;5;6", "confidence": "3;4;3;4;4", "correctness": "2;2;2;2;2", "technical_novelty": "2;3;2;2;3", "empirical_novelty": "3;0;2;2;4", "wc_summary_paper": "159;105;78;119;110", "wc_strength_and_weaknesses": "985;300;356;209;2359", "wc_clarity_quality_novelty_and_reproducibility": "186;117;52;12;178", "wc_summary_review": "39;23;26;15;147", "wc_review": "1369;545;512;355;2794", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 4.0, 1.2649110640673518 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "correctness_avg": [ 2.0, 0.0 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.2, 1.32664991614216 ], "wc_summary_paper_avg": [ 114.2, 26.24042682579687 ], "wc_strength_and_weaknesses_avg": [ 841.8, 806.5217666002574 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 109.0, 68.42806441804414 ], "wc_summary_review_avg": [ 50.0, 49.1121166312347 ], "wc_review_avg": [ 1115.0, 911.0066959139214 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.6454972243679028, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TVbFh3Nvg7gJ:scholar.google.com/&scioq=Correcting+Three+Existing+Beliefs+on+Mutual+Information+in+Contrastive+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "6FEULL9vSUt", "title": "Learning to Predict Parameter for Unseen Data", "track": "main", "status": "Reject", "tldr": "", "abstract": "Typical deep learning models depend heavily on large amounts of training data and resort to an iterative optimization algorithm (e.g., SGD or Adam) for learning network parameters, which makes the training process very time- and resource-intensive. In this paper, we propose a new training paradigm and formulate network parameter training into a prediction task: given a network architecture, we observe there exists correlations between datasets and their corresponding optimal network parameters, and explore if we can learn a hyper-mapping between them to capture the relations, such that we can directly predict the parameters of the network for a new dataset never seen during the training phase. To do this, we put forward a new hypernetwork with the purpose of building a mapping between datasets and their corresponding network parameters, and then predict parameters for unseen data with only a single forward propagation of the hypernetwork. At its heart, our model benefits from a series of GRU sharing weights to capture the dependencies of parameters among different layers in the network. Extensive experimental studies are performed and experimental results validate our proposed method achieves surprisingly good efficacy. For instance, it takes 119 GPU seconds to train ResNet-18 using Adam from scratch and obtain a top-1 accuracy of 74.56%, while our method costs only 0.5 GPU seconds to predict the network parameters of ResNet-18 achieving comparable performance (73.33%), more than 200 times faster than the traditional training paradigm.", "keywords": "Parameter Prediction;Training paradigm", "primary_area": "", "supplementary_material": "", "author": "Shiye Wang;Kaituo Feng;Changsheng Li;Ye Yuan;Guoren Wang", "authorids": "~Shiye_Wang2;~Kaituo_Feng1;~Changsheng_Li4;~Ye_Yuan15;~Guoren_Wang2", "gender": ";M;M;;M", "homepage": "https://wangshiye1.github.io/;https://github.com/tulerfeng;;;https://guorenwang.github.io/", "dblp": "240/2541;322/6044;;;", "google_scholar": ";m1iCh00AAAAJ;FfJnUioAAAAJ;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;0000-0001-9789-7632;;", "linkedin": ";;;;", "or_profile": "~Shiye_Wang2;~Kaituo_Feng1;~Changsheng_Li4;~Ye_Yuan15;~Guoren_Wang2", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;;Beijing Institute of Technology", "aff_domain": "bit.edu.cn;bit.edu.cn;bit.edu.cn;;bit.edu.cn", "position": "PhD student;MS student;Full Professor;;Full Professor", "bibtex": "@misc{\nwang2023learning,\ntitle={Learning to Predict Parameter for Unseen Data},\nauthor={Shiye Wang and Kaituo Feng and Changsheng Li and Ye Yuan and Guoren Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=6FEULL9vSUt}\n}", "github": "", "project": "", "reviewers": "VS2g;AS8e;y6gY", "site": "https://openreview.net/forum?id=6FEULL9vSUt", "pdf_size": 800146, "recommendation": "5;5;6", "confidence": "3;3;3", "correctness": "3;2;3", "technical_novelty": "3;3;3", "empirical_novelty": "0;2;3", "wc_summary_paper": "97;83;47", "wc_strength_and_weaknesses": "56;223;224", "wc_clarity_quality_novelty_and_reproducibility": "376;14;43", "wc_summary_review": "34;43;32", "wc_review": "563;363;346", "wc_reply_reviewers": "0;65;0", "wc_reply_authors": "2405;1777;964", "reply_reviewers": "0;1;0", "reply_authors": "5;4;3", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 75.66666666666667, 21.060758665241753 ], "wc_strength_and_weaknesses_avg": [ 167.66666666666666, 78.96131260870028 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 144.33333333333334, 164.24033879923925 ], "wc_summary_review_avg": [ 36.333333333333336, 4.784233364802441 ], "wc_review_avg": [ 424.0, 98.53256652836495 ], "wc_reply_reviewers_avg": [ 21.666666666666668, 30.641293851417057 ], "wc_reply_authors_avg": [ 1715.3333333333333, 589.8996148121777 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 4.0, 0.816496580927726 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17708144385337506766&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Beijing Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.bit.edu.cn/", "aff_unique_abbr": "BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "6Fq1-57gff", "title": "The World is Changing: Improving Fair Training under Correlation Shifts", "track": "main", "status": "Reject", "tldr": "We analyze fundamental limits in accuracy and fairness of in-processing fair algorithms when the data bias changes with correlation shifts and propose a novel pre-processing step that improves their performances.", "abstract": "Model fairness is an essential element for Trustworthy AI. While many techniques for model fairness have been proposed, most of them assume that the training and deployment data distributions are identical, which is often not true in practice. In particular, when the bias between labels and sensitive groups changes, the fairness of the trained model is directly influenced and can worsen. We make two contributions for solving this problem. First, we analytically show that existing in-processing fair algorithms have fundamental limits in accuracy and fairness. We introduce the notion of correlation shifts, which can explicitly capture the change of the above bias. Second, we propose a novel pre-processing step that samples the input data to reduce correlation shifts and thus enables the in-processing approaches to overcome their limitations. We formulate an optimization problem for adjusting the data ratio among labels and sensitive groups to reflect the shifted correlation. A key advantage of our approach lies in decoupling the roles of pre-processing and in-processing approaches: correlation adjustment via pre-processing and unfairness mitigation on the processed data via in-processing. Experiments show that our framework effectively improves existing in-processing fair algorithms w.r.t. accuracy and fairness, both on synthetic and real datasets.", "keywords": "trustworthy AI;fairness;correlation shifts", "primary_area": "", "supplementary_material": "/attachment/ee63cd30a40a1cda99b1979c0dcac4f74c096a3d.zip", "author": "Yuji Roh;Kangwook Lee;Steven Euijong Whang;Changho Suh", "authorids": "~Yuji_Roh1;~Kangwook_Lee1;~Steven_Euijong_Whang1;~Changho_Suh1", "gender": "F;M;M;M", "homepage": ";http://kangwooklee.com/;http://www.stevenwhang.com;https://csuh.kaist.ac.kr", "dblp": "230/3981;88/9826-1;w/StevenEuijongWhang;75/1420", "google_scholar": ";sCEl8r-n5VEC;w6hts30AAAAJ;https://scholar.google.com.tw/citations?user=B1guGw8AAAAJ", "orcid": ";;0000-0001-6419-931X;0000-0002-3101-4291", "linkedin": ";;steven-euijong-whang-1612b5a/;changho-suh-584aa732/?originalSubdomain=kr", "or_profile": "~Yuji_Roh1;~Kangwook_Lee1;~Steven_Euijong_Whang1;~Changho_Suh1", "aff": "Korea Advanced Institute of Science & Technology;KRAFTON;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;krafton.com;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;Researcher;Associate Professor;Associate Professor", "bibtex": "@misc{\nroh2023the,\ntitle={The World is Changing: Improving Fair Training under Correlation Shifts},\nauthor={Yuji Roh and Kangwook Lee and Steven Euijong Whang and Changho Suh},\nyear={2023},\nurl={https://openreview.net/forum?id=6Fq1-57gff}\n}", "github": "", "project": "", "reviewers": "9fri;H3mp;gS56;c8Rf", "site": "https://openreview.net/forum?id=6Fq1-57gff", "pdf_size": 4274464, "recommendation": "5;5;6;8", "confidence": "4;4;3;4", "correctness": "3;3;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "75;88;36;105", "wc_strength_and_weaknesses": "326;370;80;103", "wc_clarity_quality_novelty_and_reproducibility": "26;40;7;18", "wc_summary_review": "27;281;17;22", "wc_review": "454;779;140;248", "wc_reply_reviewers": "80;48;0;0", "wc_reply_authors": "1049;1603;249;258", "reply_reviewers": "1;1;0;0", "reply_authors": "3;4;1;1", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 76.0, 25.42636427018224 ], "wc_strength_and_weaknesses_avg": [ 219.75, 129.4456932462413 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 22.75, 12.028611723719408 ], "wc_summary_review_avg": [ 86.75, 112.20600474127933 ], "wc_review_avg": [ 405.25, 243.49063123660426 ], "wc_reply_reviewers_avg": [ 32.0, 33.94112549695428 ], "wc_reply_authors_avg": [ 789.75, 570.9104023399819 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9428090415820632, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6Gh59rLnk9gJ:scholar.google.com/&scioq=The+World+is+Changing:+Improving+Fair+Training+under+Correlation+Shifts&hl=en&as_sdt=0,23", "gs_version_total": 2, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;KRAFTON Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.krafton.com", "aff_unique_abbr": "KAIST;KRAFTON", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "6G1MXNU8VtV", "title": "Black-Box Adversarial Attack Guided by Model Behavior for Programming Pre-trained Language Models", "track": "main", "status": "Withdraw", "tldr": "We use the uncertainty of model outputs to guide searching for adversarial examples by the variable name replacement.", "abstract": "Pre-trained models for programming languages are widely used to solve code tasks in Software Engineering (SE) community, such as code clone detection and bug identification. Reliability is the primary concern of these machine learning applications in SE because software failure can lead to intolerable loss. However, deep neural networks are known to suffer from adversarial attacks. In this paper, we propose a novel black-box adversarial attack based on model behaviors for pre-trained programming language models, named Representation Nearest Neighbor Search(RNNS). The proposed approach can efficiently identify adversarial examples via variable replacement in an ample search space of real variable names under similarity constraints. We evaluate RNNS on 6 code tasks (e.g., clone detection), 3 programming languages (Java, Python, and C), and 3 pre-trained code models: CodeBERT, GraphCodeBERT, and CodeT5. The results demonstrate that RNNS outperforms the state-of-the-art black-box attacking method (MHM) in terms of both attack success rate and quality of generated adversarial examples. ", "keywords": "black-box;adversarial attack;pre-trained models for programming languages;code model", "primary_area": "", "supplementary_material": "", "author": "Jie Zhang;Wei Ma;Xiaofei Xie;Qiang Hu;Yang Liu", "authorids": "clark.zhang@huawei.com;~Wei_Ma1;~Xiaofei_Xie2;~Qiang_Hu3;~Yang_Liu36", "gender": ";M;M;;M", "homepage": ";https://marvinmw.github.io/weima/;http://xiaofeixie.bitbucket.io/;https://wellido.github.io/;https://personal.ntu.edu.sg/yangliu/", "dblp": ";;127/0713;;51/3710-3", "google_scholar": ";ZubTNs0AAAAJ;FfcZfJgAAAAJ;UTWWmz4AAAAJ;https://scholar.google.com.sg/citations?hl=en", "orcid": ";;0000-0002-1288-6502;;0000-0001-7300-9215", "linkedin": ";;;;", "or_profile": "clark.zhang@huawei.com;~Wei_Ma1;~Xiaofei_Xie2;~Qiang_Hu3;~Yang_Liu36", "aff": ";Nanyang Technological University;Singapore Management University;University of Luxembourg;Nanyang Technological University", "aff_domain": ";ntu.edu.sg;smu.edu.sg;uni.lu;ntu.edu.sg", "position": ";Researcher;Assistant Professor;PhD student;Full Professor", "bibtex": "@misc{\nzhang2023blackbox,\ntitle={Black-Box Adversarial Attack Guided by Model Behavior for Programming Pre-trained Language Models},\nauthor={Jie Zhang and Wei Ma and Xiaofei Xie and Qiang Hu and Yang Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=6G1MXNU8VtV}\n}", "github": "", "project": "", "reviewers": "kWkK;RhVp;cjtq", "site": "https://openreview.net/forum?id=6G1MXNU8VtV", "pdf_size": 1489902, "recommendation": "3;5;6", "confidence": "4;4;4", "correctness": "2;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "133;56;37", "wc_strength_and_weaknesses": "184;32;83", "wc_clarity_quality_novelty_and_reproducibility": "258;14;41", "wc_summary_review": "16;247;22", "wc_review": "591;349;183", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "372;281;224", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 75.33333333333333, 41.507696742759514 ], "wc_strength_and_weaknesses_avg": [ 99.66666666666667, 63.16292864929203 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 104.33333333333333, 109.21640088476741 ], "wc_summary_review_avg": [ 95.0, 107.50813922675809 ], "wc_review_avg": [ 374.3333333333333, 167.5257857432368 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 292.3333333333333, 60.94988834189025 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9449111825230683, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3838023517024859599&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Nanyang Technological University;Singapore Management University;University of Luxembourg", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.smu.edu.sg;https://wwwen.uniluxembourg.lu", "aff_unique_abbr": "NTU;SMU;Uni Lu", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Singapore;Luxembourg" }, { "id": "6G5DwFLYRM", "title": "META-LEARNING FOR UNSUPERVISED OUTLIER DETECTION WITH OPTIMAL TRANSPORT", "track": "main", "status": "Withdraw", "tldr": "A new meta learning for unsupervised machine learning problems with optimal transport.", "abstract": "Automated machine learning has been widely researched and adopted in the field of supervised classification and regression, but progress in unsupervised settings has been limited. We propose a novel approach to automate outlier detection based on meta-learning from previous datasets with outliers. Our premise is that the selection of the optimal outlier detection technique depends on inherent properties of the data distribution. We leverage the Gromov-Wasserstein distance in particular, to find the dataset with the most similar underlying distribution, and then apply the outlier detection techniques that proved to work best for that data distribution. We evaluate the robustness of our approach and find that it outperforms the state of the art methods in unsupervised outlier detection. This approach can also be easily generalized to automate other unsupervised settings.", "keywords": "unsupervised learning;automl;optimal transport", "primary_area": "", "supplementary_material": "/attachment/788829bc77850b2cf12c1934c13f49eaf453aa1c.zip", "author": "prabhant singh;Joaquin Vanschoren", "authorids": "~prabhant_singh1;~Joaquin_Vanschoren1", "gender": "M;M", "homepage": "https://prabhant.github.io/;http://www.win.tue.nl/~jvanscho/", "dblp": "243/3553;85/5045", "google_scholar": "https://scholar.google.com/citations?hl=en;HhDsD9UAAAAJ", "orcid": ";0000-0001-7044-9805", "linkedin": ";", "or_profile": "~prabhant_singh1;~Joaquin_Vanschoren1", "aff": "Eindhoven University of Technology;Eindhoven University of Technology", "aff_domain": "tue.nl;tue.nl", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nsingh2023metalearning,\ntitle={{META}-{LEARNING} {FOR} {UNSUPERVISED} {OUTLIER} {DETECTION} {WITH} {OPTIMAL} {TRANSPORT}},\nauthor={prabhant singh and Joaquin Vanschoren},\nyear={2023},\nurl={https://openreview.net/forum?id=6G5DwFLYRM}\n}", "github": "", "project": "", "reviewers": "vhGa;dXjK;E9xX;hzxw", "site": "https://openreview.net/forum?id=6G5DwFLYRM", "pdf_size": 503537, "recommendation": "1;3;3;5", "confidence": "4;3;4;2", "correctness": "2;2;2;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "47;56;81;104", "wc_strength_and_weaknesses": "135;359;258;156", "wc_clarity_quality_novelty_and_reproducibility": "161;20;62;145", "wc_summary_review": "19;32;55;73", "wc_review": "362;467;456;478", "wc_reply_reviewers": "0;124;50;64", "wc_reply_authors": "402;366;456;543", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;3;3", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 72.0, 22.282279955157193 ], "wc_strength_and_weaknesses_avg": [ 227.0, 89.2888570875448 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 97.0, 58.210823735796765 ], "wc_summary_review_avg": [ 44.75, 20.78911975048487 ], "wc_review_avg": [ 440.75, 46.12686310600364 ], "wc_reply_reviewers_avg": [ 59.5, 44.189930979805794 ], "wc_reply_authors_avg": [ 441.75, 66.65723891671482 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.8528028654224418, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17197427278832617668&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Eindhoven University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.tue.nl", "aff_unique_abbr": "TU/e", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "title": "A Kernel Perspective of Skip Connections in Convolutional Networks", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10759", "id": "6H_uOfcwiVh", "poster": "/media/PosterPDFs/ICLR%202023/10759.png?t=1682523112.2907224", "openreview": "https://openreview.net/forum?id=6H_uOfcwiVh", "slides": "https://iclr.cc/virtual/2023/poster/10759", "video": "https://iclr.cc/virtual/2023/poster/10759", "author_site": "Daniel Barzilai, Amnon Geifman, Meirav Galun, Ronen Basri", "tldr": "", "abstract": "Over-parameterized residual networks (ResNets) are amongst the most successful convolutional neural\u00a0architectures for image processing. Here we study their\u00a0properties through\u00a0their Gaussian Process and Neural Tangent kernels. We derive explicit\u00a0formulas for these kernels, analyze their spectra, and provide bounds on their implied condition numbers. Our results indicate that (1) with ReLU activation, the eigenvalues\u00a0of these residual kernels\u00a0decay polynomially at a similar rate compared to the same kernels when skip connections are not used, thus maintaining a similar frequency bias; (2) however, residual kernels are more locally biased. Our\u00a0analysis further shows that the matrices obtained by these residual kernels yield favorable condition numbers at finite depths than those obtained without the skip connections, enabling therefore faster convergence of training with gradient descent.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Daniel Barzilai;Amnon Geifman;Meirav Galun;Ronen Basri", "authorids": "~Daniel_Barzilai1;~Amnon_Geifman1;~Meirav_Galun1;~Ronen_Basri1", "gender": "M;M;F;M", "homepage": ";https://scholar.google.co.il/citations?user=Drcgf9wAAAAJ&hl=en;https://www.weizmann.ac.il/math/meirav/;https://www.weizmann.ac.il/math/ronen/", "dblp": "334/4656;232/2462;92/3521;b/RonenBasri.html", "google_scholar": "B6zVOFoAAAAJ;https://scholar.google.co.il/citations?user=Drcgf9wAAAAJ;https://scholar.google.co.il/citations?user=oVsC3XcAAAAJ;d6vuvHIAAAAJ", "orcid": ";;;", "linkedin": "daniel-barzilai-1a9b61219/;;;", "or_profile": "~Daniel_Barzilai1;~Amnon_Geifman1;~Meirav_Galun1;~Ronen_Basri1", "aff": "Weizmann Institute of Science;Weizmann Institute, Technion;Weizmann Institute;Meta Platforms Inc.", "aff_domain": "weizmann.ac.il;weizmann.ac.il;weizmann.ac.il;meta.com", "position": "MS student;PhD student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nbarzilai2023a,\ntitle={A Kernel Perspective of Skip Connections in Convolutional Networks},\nauthor={Daniel Barzilai and Amnon Geifman and Meirav Galun and Ronen Basri},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6H_uOfcwiVh}\n}", "github": "", "project": "", "reviewers": "zQoE;TKdt;RWs1;ciS9", "pdf_size": 678629, "recommendation": "5;8;8;8", "confidence": "2;4;3;3", "correctness": "3;4;4;4", "technical_novelty": "3;4;4;3", "empirical_novelty": "2;0;2;3", "wc_summary_paper": "42;99;107;89", "wc_strength_and_weaknesses": "42;244;298;432", "wc_clarity_quality_novelty_and_reproducibility": "46;25;90;95", "wc_summary_review": "22;6;50;36", "wc_review": "152;374;545;652", "wc_reply_reviewers": "0;0;0;14", "wc_reply_authors": "448;133;286;760", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 84.25, 25.21284394906691 ], "wc_strength_and_weaknesses_avg": [ 254.0, 140.2355161861645 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.0, 29.50423698386386 ], "wc_summary_review_avg": [ 28.5, 16.332482971061076 ], "wc_review_avg": [ 430.75, 189.02827169500333 ], "wc_reply_reviewers_avg": [ 3.5, 6.06217782649107 ], "wc_reply_authors_avg": [ 406.75, 232.38263166596596 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.816496580927726, "corr_recommendation_correctness": 1.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11679353657180537861&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=6H_uOfcwiVh", "email": "weizmann.ac.il;weizmann.ac.il;weizmann.ac.il;meta.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Weizmann Institute of Science;Meta", "aff_unique_dep": ";Meta Platforms Inc.", "aff_unique_url": "https://www.weizmann.org.il;https://www.meta.com", "aff_unique_abbr": "Weizmann;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Israel;United States" }, { "title": "On the Performance of Temporal Difference Learning With Neural Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11766", "id": "6JMXLWX68Kj", "poster": "/media/PosterPDFs/ICLR%202023/11766.png?t=1680788608.2117293", "openreview": "https://openreview.net/forum?id=6JMXLWX68Kj", "slides": "https://iclr.cc/virtual/2023/poster/11766", "video": "https://iclr.cc/virtual/2023/poster/11766", "author_site": "Haoxing Tian, Ioannis Paschalidis, Alex Olshevsky", "tldr": "", "abstract": "Neural Temporal Difference (TD) Learning is an approximate temporal difference method for policy evaluation that uses a neural network for function approximation. Analysis of Neural TD Learning has proven to be challenging. In this paper we provide a convergence analysis of Neural TD Learning with a projection onto $B(\\theta_0, \\omega)$, a ball of fixed radius $\\omega$ around the initial point $\\theta_0$. We show an approximation bound of $O(\\epsilon + 1/\\sqrt{m})$ where $\\epsilon$ is the approximation quality of the best neural network in $B(\\theta_0, \\omega)$ and $m$ is the width of all hidden layers in the network. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "HAOXING TIAN;Ioannis Paschalidis;Alex Olshevsky", "authorids": "~HAOXING_TIAN1;~Ioannis_Paschalidis1;~Alex_Olshevsky1", "gender": "M;M;M", "homepage": ";http://sites.bu.edu/paschalidis/;http://sites.bu.edu/aolshevsky/", "dblp": "350/3752;44/2060;21/4206", "google_scholar": "g8jTnD0AAAAJ;Es_hZ0QAAAAJ;YKwHoFMAAAAJ", "orcid": ";0000-0002-3343-2913;", "linkedin": ";yannis-paschalidis-75a921/;alex-olshevsky-43336698/", "or_profile": "~HAOXING_TIAN1;~Ioannis_Paschalidis1;~Alexander_Olshevsky1", "aff": "Boston University;Boston University;Boston University", "aff_domain": "bu.edu;bu.edu;bu.edu", "position": "PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\ntian2023on,\ntitle={On the Performance of Temporal Difference Learning With Neural Networks},\nauthor={HAOXING TIAN and Ioannis Paschalidis and Alex Olshevsky},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6JMXLWX68Kj}\n}", "github": "", "project": "", "reviewers": "bfdn;SRvC;oRiv;ZfL8", "pdf_size": 922778, "recommendation": "6;6;6;8", "confidence": "4;3;4;2", "correctness": "4;3;3;4", "technical_novelty": "4;2;2;3", "empirical_novelty": "0;2;0;2", "wc_summary_paper": "44;23;87;130", "wc_strength_and_weaknesses": "137;180;422;94", "wc_clarity_quality_novelty_and_reproducibility": "15;24;15;18", "wc_summary_review": "36;30;31;11", "wc_review": "232;257;555;253", "wc_reply_reviewers": "0;23;0;0", "wc_reply_authors": "912;1092;2353;337", "reply_reviewers": "0;1;0;0", "reply_authors": "2;2;4;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 1.0, 1.0 ], "wc_summary_paper_avg": [ 71.0, 41.140004861448425 ], "wc_strength_and_weaknesses_avg": [ 208.25, 127.09912470194277 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 18.0, 3.6742346141747673 ], "wc_summary_review_avg": [ 27.0, 9.513148795220223 ], "wc_review_avg": [ 324.25, 133.56154948187745 ], "wc_reply_reviewers_avg": [ 5.75, 9.959292143521045 ], "wc_reply_authors_avg": [ 1173.5, 735.8629288121532 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.8703882797784891, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8542089570375555757&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=6JMXLWX68Kj", "email": "bu.edu;bu.edu;bu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Boston University", "aff_unique_dep": "", "aff_unique_url": "https://www.bu.edu", "aff_unique_abbr": "BU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Uni-Mol: A Universal 3D Molecular Representation Learning Framework", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11617", "id": "6K2RM6wVqKu", "poster": "/media/PosterPDFs/ICLR%202023/11617.png?t=1681370539.1384828", "openreview": "https://openreview.net/forum?id=6K2RM6wVqKu", "slides": "https://iclr.cc/virtual/2023/poster/11617", "video": "https://iclr.cc/virtual/2023/poster/11617", "author_site": "Gengmo Zhou, Zhifeng Gao, Qiankun Ding, Hang Zheng, Hongteng Xu, Zhewei Wei, Linfeng Zhang, Guolin Ke", "tldr": "A universal 3D molecular pretraining framework that significantly enlarges the representation ability and application scope in drug design.", "abstract": "Molecular representation learning (MRL) has gained tremendous attention due to its critical role in learning from limited supervised data for applications like drug design. In most MRL methods, molecules are treated as 1D sequential tokens or 2D topology graphs, limiting their ability to incorporate 3D information for downstream tasks and, in particular, making it almost impossible for 3D geometry prediction/generation. In this paper, we propose a universal 3D MRL framework, called Uni-Mol, that significantly enlarges the representation ability and application scope of MRL schemes. Uni-Mol contains two pretrained models with the same SE(3) Transformer architecture: a molecular model pretrained by 209M molecular conformations; a pocket model pretrained by 3M candidate protein pocket data. Besides, Uni-Mol contains several finetuning strategies to apply the pretrained models to various downstream tasks. By properly incorporating 3D information, Uni-Mol outperforms SOTA in 14/15 molecular property prediction tasks. Moreover, Uni-Mol achieves superior performance in 3D spatial tasks, including protein-ligand binding pose prediction, molecular conformation generation, etc. The code, model, and data are made publicly available at https://github.com/dptech-corp/Uni-Mol.", "keywords": "Representation Learning;Large-Scale 3D Molecular Pretraining;Molecular Property;Protein-Ligand Complex", "primary_area": "", "supplementary_material": "", "author": "Gengmo Zhou;Zhifeng Gao;Qiankun Ding;Hang Zheng;Hongteng Xu;Zhewei Wei;Linfeng Zhang;Guolin Ke", "authorids": "~Gengmo_Zhou1;~Zhifeng_Gao1;~Qiankun_Ding1;~Hang_Zheng2;~Hongteng_Xu1;~Zhewei_Wei1;~Linfeng_Zhang1;~Guolin_Ke3", "gender": ";M;;M;M;M;M;M", "homepage": "https://zhougengmo.github.io/;;https://www.dp.tech/;;https://hongtengxu.github.io;http://weizhewei.com;;https://guolinke.github.io", "dblp": ";71/6161;;;38/10816;94/4260;;190/7810", "google_scholar": "z76EQ7YAAAAJ;uBo3SJcAAAAJ;;;7gYVOO8AAAAJ;https://scholar.google.com.hk/citations?user=qZ7dj4gAAAAJ;;M2qJgtoAAAAJ", "orcid": ";;;0000-0002-2825-0576;0000-0003-4192-5360;0000-0003-3620-5086;0000-0002-8470-5846;", "linkedin": ";;;;;;;", "or_profile": "~Gengmo_Zhou1;~Zhifeng_Gao1;~Qiankun_Ding1;~Hang_Zheng2;~Hongteng_Xu1;~Zhewei_Wei1;~Linfeng_Zhang1;~guolin_ke1", "aff": "Renmin University of China;DP Technology;;Peking University;Renmin University of China;Renmin University of China;DP Technology;DP Technology", "aff_domain": "ruc.edu.cn;dp.tech;;pku.edu.cn;ruc.edu.cn;ruc.edu.cn;dp.tech;dp.tech", "position": "MS student;Researcher;;PhD student;Associate Professor;Full Professor;Researcher;Senior Researcher", "bibtex": "@inproceedings{\nzhou2023unimol,\ntitle={Uni-Mol: A Universal 3D Molecular Representation Learning Framework},\nauthor={Gengmo Zhou and Zhifeng Gao and Qiankun Ding and Hang Zheng and Hongteng Xu and Zhewei Wei and Linfeng Zhang and Guolin Ke},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6K2RM6wVqKu}\n}", "github": "", "project": "", "reviewers": "LVDL;uSFd;6Ybw;CPRe", "pdf_size": 2992275, "recommendation": "5;8;8;10", "confidence": "4;4;4;4", "correctness": "2;4;4;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;0;3;4", "wc_summary_paper": "51;101;89;185", "wc_strength_and_weaknesses": "466;155;277;88", "wc_clarity_quality_novelty_and_reproducibility": "23;7;51;342", "wc_summary_review": "70;2;68;30", "wc_review": "610;265;485;645", "wc_reply_reviewers": "510;0;0;0", "wc_reply_authors": "2887;184;211;356", "reply_reviewers": "3;0;0;0", "reply_authors": "8;1;1;1", "recommendation_avg": [ 7.75, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 1.479019945774904 ], "wc_summary_paper_avg": [ 106.5, 48.936182932468284 ], "wc_strength_and_weaknesses_avg": [ 246.5, 143.70542787243633 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 105.75, 137.3050891263685 ], "wc_summary_review_avg": [ 42.5, 28.297526393662043 ], "wc_review_avg": [ 501.25, 148.80251173955364 ], "wc_reply_reviewers_avg": [ 127.5, 220.83647796503186 ], "wc_reply_authors_avg": [ 909.5, 1143.5822008058713 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 2.75, 3.031088913245535 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5488604301969737, "gs_citation": 377, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13603750970208662640&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=6K2RM6wVqKu", "email": "ruc.edu.cn;dp.tech;;pku.edu.cn;ruc.edu.cn;ruc.edu.cn;dp.tech;dp.tech", "author_num": 8, "aff_unique_index": "0;1;2;0;0;1;1", "aff_unique_norm": "Renmin University of China;DP Technology;Peking University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ruc.edu.cn;;http://www.pku.edu.cn", "aff_unique_abbr": "RUC;;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "id": "6KYPBGeYxv", "title": "CacheGNN: Enhancing Graph Neural Networks with Global Information Caching", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Graph neural networks (GNNs) have achieved impressive results on various graph learning tasks. Most GNNs merely leverage information from a limited range of local neighbors, which is difficult to effectively capture global information in the graph. However, utilising global information enables GNNs to capture long-range dependencies and learn more informative node representations. To this end, we propose CacheGNN, an approach that leverages information from global similar nodes to enhance GNNs. Our CacheGNN uses a cache to store node representations and utilises those cached embeddings to efficiently find global similar nodes. To quickly and efficiently making predictions at test time, our CacheGNN retrieves global similar nodes from a set of representative nodes, which is selected from a sparse node selection distribution with Dirichlet prior. We conduct node classification experiments on seven real-world datasets under inductive and transductive settings. Experimental results verify the effectiveness of our CacheGNN.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Ruihong Zeng;Jinyuan Fang;Zaiqiao Meng;Shangsong Liang", "authorids": "~Ruihong_Zeng1;~Jinyuan_Fang1;~Zaiqiao_Meng1;~Shangsong_Liang1", "gender": "M;M;M;M", "homepage": "https://github.com/jyfang6;https://mengzaiqiao.github.io/;;https://zengrh3.github.io/", "dblp": "251/9517;185/0748;57/7731;", "google_scholar": "LOWJnPsAAAAJ;https://scholar.google.com/citations?hl=en;4uggVcIAAAAJ;", "orcid": ";;;0000-0002-3751-9127", "linkedin": ";;;", "or_profile": "~Jinyuan_Fang1;~Zaiqiao_Meng1;~Shangsong_Liang1;~Peter_Tseng_Ruihong2", "aff": "University of Glasgow;University of Glasgow;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY", "aff_domain": "glasgow.ac.uk;glasgow.ac.uk;sysu.edu.cn;sysu.edu.cn", "position": "PhD student;Lecturer;Associate Professor;MS student", "bibtex": "@misc{\nzeng2023cachegnn,\ntitle={Cache{GNN}: Enhancing Graph Neural Networks with Global Information Caching},\nauthor={Ruihong Zeng and Jinyuan Fang and Zaiqiao Meng and Shangsong Liang},\nyear={2023},\nurl={https://openreview.net/forum?id=6KYPBGeYxv}\n}", "github": "", "project": "", "reviewers": "8tsT;6Vui;92mG", "site": "https://openreview.net/forum?id=6KYPBGeYxv", "pdf_size": 548187, "recommendation": "3;3;5", "confidence": "3;4;4", "correctness": "3;2;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;2", "wc_summary_paper": "45;66;95", "wc_strength_and_weaknesses": "141;244;462", "wc_clarity_quality_novelty_and_reproducibility": "68;64;102", "wc_summary_review": "93;18;64", "wc_review": "347;392;723", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 68.66666666666667, 20.49932248202906 ], "wc_strength_and_weaknesses_avg": [ 282.3333333333333, 133.82160596190073 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 78.0, 17.048949136725895 ], "wc_summary_review_avg": [ 58.333333333333336, 30.879694874715902 ], "wc_review_avg": [ 487.3333333333333, 167.65109271605982 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1UGAOlkDyxYJ:scholar.google.com/&scioq=CacheGNN:+Enhancing+Graph+Neural+Networks+with+Global+Information+Caching&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of Glasgow;Sun Yat-sen University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gla.ac.uk;http://www.sysu.edu.cn", "aff_unique_abbr": "Glasgow;SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United Kingdom;China" }, { "id": "6Lh_wgIaT9l", "title": "Optimal Membership Inference Bounds for Adaptive Composition of Sampled Gaussian Mechanisms", "track": "main", "status": "Reject", "tldr": "We prove optimal membership inference bounds for DP-SGD, beating previously known upper bounds on membership inference.", "abstract": "Given a trained model and a data sample, membership-inference (MI) attacks predict whether the sample was in the model\u2019s training set. A common counter- measure against MI attacks is to utilize differential privacy (DP) during model training to mask the presence of individual examples. While this use of DP is a principled approach to limit the efficacy of MI attacks, there is a gap between the bounds provided by DP and the empirical performance of MI attacks. In this paper, we derive bounds for the advantage of an adversary mounting a MI attack, and demonstrate tightness for the widely-used Gaussian mechanism. Our analysis answers an open problem in the field of differential privacy, namely the fact that membership inference is not 100% successful even for relatively high budgets ($\\epsilon> 10$). Finally, using our analysis, we provide MI metrics for models trained on CIFAR10 dataset. To the best of our knowledge, our analysis provides the state-of-the-art membership inference bounds.", "keywords": "Membership inference;DP-SGD;Gaussian Mechanism", "primary_area": "", "supplementary_material": "/attachment/ecfaa8f4cbb760de1ba4bbf922d852da896a2c17.zip", "author": "Saeed Mahloujifar;Alexandre Sablayrolles;Graham Cormode;Somesh Jha", "authorids": "~Saeed_Mahloujifar1;~Alexandre_Sablayrolles1;~Graham_Cormode1;~Somesh_Jha1", "gender": "M;;M;M", "homepage": "https://www.cs.virginia.edu/~sm5fd/;;http://dimacs.rutgers.edu/~graham/;", "dblp": "208/0825;186/7749;c/GrahamCormode;j/SomeshJha", "google_scholar": "kW-hl3YAAAAJ;Wy8wM-cAAAAJ;https://scholar.google.co.uk/citations?user=gpLVKmEAAAAJ;BaI7l8QAAAAJ", "orcid": ";;0000-0002-0698-0922;", "linkedin": ";;;", "or_profile": "~Saeed_Mahloujifar1;~Alexandre_Sablayrolles1;~Graham_Cormode1;~Somesh_Jha1", "aff": "Princeton University;Meta Facebook;The university of Warwick;Department of Computer Science, University of Wisconsin, Madison", "aff_domain": "princeton.edu;fb.com;warwick.ac.uk;cs.wisc.edu", "position": "Postdoc;Researcher;Full Professor;Full Professor", "bibtex": "@misc{\nmahloujifar2023optimal,\ntitle={Optimal Membership Inference Bounds for Adaptive Composition of Sampled Gaussian Mechanisms},\nauthor={Saeed Mahloujifar and Alexandre Sablayrolles and Graham Cormode and Somesh Jha},\nyear={2023},\nurl={https://openreview.net/forum?id=6Lh_wgIaT9l}\n}", "github": "", "project": "", "reviewers": "eNLu;H7BJ;919G;8xNP", "site": "https://openreview.net/forum?id=6Lh_wgIaT9l", "pdf_size": 727729, "recommendation": "3;3;5;5", "confidence": "4;4;4;4", "correctness": "3;3;3;4", "technical_novelty": "2;3;2;2", "empirical_novelty": "0;0;2;2", "wc_summary_paper": "60;126;108;83", "wc_strength_and_weaknesses": "307;204;439;43", "wc_clarity_quality_novelty_and_reproducibility": "25;302;46;306", "wc_summary_review": "29;18;13;18", "wc_review": "421;650;606;450", "wc_reply_reviewers": "39;0;0;92", "wc_reply_authors": "323;743;449;414", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.0, 1.0 ], "wc_summary_paper_avg": [ 94.25, 24.983744715314394 ], "wc_strength_and_weaknesses_avg": [ 248.25, 144.84711767929662 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 169.75, 134.4625877335402 ], "wc_summary_review_avg": [ 19.5, 5.852349955359813 ], "wc_review_avg": [ 531.75, 98.03666405993219 ], "wc_reply_reviewers_avg": [ 32.75, 37.7317836843158 ], "wc_reply_authors_avg": [ 482.25, 157.41247568093198 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16756664407086380180&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Princeton University;Meta;University of Warwick;University of Wisconsin-Madison", "aff_unique_dep": ";Meta Platforms, Inc.;;Department of Computer Science", "aff_unique_url": "https://www.princeton.edu;https://meta.com;https://warwick.ac.uk;https://www.wisc.edu", "aff_unique_abbr": "Princeton;Meta;Warwick;UW-Madison", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "6MMOFoiWnDM", "title": "GT-CausIn: a novel causal-based insight for traffic prediction", "track": "main", "status": "Withdraw", "tldr": "A model fusing causal knowledge, space dependency and temporal dependency is proposed in this work.", "abstract": "Traffic forecasting is an important issue of spatiotemporal series prediction. Among different methods, graph neural networks have achieved so far the most promising results, learning relations between graph nodes then becomes a crucial task. However, improvement space is very limited when these relations are learned in a node-to-node manner. The challenge stems from (1) obscure temporal dependencies between different stations, (2) difficulties in defining variables beyond the node level, and (3) no ready-made method to validate the learned relations. To confront these challenges, we define legitimate traffic variables to discover the causal structure of the traffic network. The causal relation is carefully checked with statistic tools and case analysis. We then present a novel model named Graph Spatial-Temporal Network Based on Causal Insight (GT-CausIn), where graph diffusion layers and temporal convolutional network (TCN) layers are integrated with causal knowledge to capture dependencies in spatiotemporal space. Experiments are carried out on two real-world traffic datasets: PEMS-BAY and METR-LA, which show that GT-CausIn significantly outperforms the state-of-the-art models.", "keywords": "spatiotemporal forecasting;causal discovery;graph neural networks", "primary_area": "", "supplementary_material": "", "author": "Ting GAO;Rodrigo Kappes Marques;Lei Yu", "authorids": "~Ting_GAO1;rodrigokmarques@poli.ufrj.br;~Lei_Yu9", "gender": "F;;M", "homepage": ";;http://ecpkn.buaa.edu.cn/info/1968/4611.htm", "dblp": ";;01/2775-9", "google_scholar": ";;", "orcid": ";;", "linkedin": "https://www.linkedin.cn/incareer/in/ACoAADg1qxYBW-42puJU6QBFmdwAAebGlzpB0L0;;", "or_profile": "~Ting_GAO1;rodrigokmarques@poli.ufrj.br;~Lei_Yu9", "aff": ";;Beihang University", "aff_domain": ";;buaa.edu.cn", "position": ";;Associate Professor", "bibtex": "@misc{\ngao2023gtcausin,\ntitle={{GT}-CausIn: a novel causal-based insight for traffic prediction},\nauthor={Ting GAO and Rodrigo Kappes Marques and Lei Yu},\nyear={2023},\nurl={https://openreview.net/forum?id=6MMOFoiWnDM}\n}", "github": "", "project": "", "reviewers": "Nju9;aEiU;qK7e;GXFt", "site": "https://openreview.net/forum?id=6MMOFoiWnDM", "pdf_size": 12758091, "recommendation": "3;3;3;5", "confidence": "4;4;3;4", "correctness": "3;3;3;2", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "100;14;79;50", "wc_strength_and_weaknesses": "122;104;165;40", "wc_clarity_quality_novelty_and_reproducibility": "114;21;27;6", "wc_summary_review": "43;21;51;63", "wc_review": "379;160;322;159", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 60.75, 32.306152664778885 ], "wc_strength_and_weaknesses_avg": [ 107.75, 44.95761893161158 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.0, 42.26700841081611 ], "wc_summary_review_avg": [ 44.5, 15.321553446044563 ], "wc_review_avg": [ 255.0, 97.60379090998464 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": -1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10349499411114170931&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 2, "aff_unique_index": "0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Particle-based Variational Inference with Preconditioned Functional Gradient Flow", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11968", "id": "6OphWWAE3cS", "poster": "", "openreview": "https://openreview.net/forum?id=6OphWWAE3cS", "slides": "https://iclr.cc/virtual/2023/poster/11968", "video": "https://iclr.cc/virtual/2023/poster/11968", "author_site": "Hanze Dong, Xi Wang, LIN Yong, Tong Zhang", "tldr": "", "abstract": "Particle-based variational inference (VI) minimizes the KL divergence between model samples and the target posterior with gradient flow estimates. With the popularity of Stein variational gradient descent (SVGD), the focus of particle-based VI algorithms has been on the properties of functions in Reproducing Kernel Hilbert Space (RKHS) to approximate the gradient flow. However, the requirement of RKHS restricts the function class and algorithmic flexibility. This paper offers a general solution to this problem by introducing a functional regularization term that encompasses the RKHS norm as a special case. This allows us to propose a new particle-based VI algorithm called preconditioned functional gradient flow (PFG). Compared to SVGD, PFG has several advantages. It has a larger function class, improved scalability in large particle-size scenarios, better adaptation to ill-conditioned distributions, and provable continuous-time convergence in KL divergence. Additionally, non-linear function classes such as neural networks can be incorporated to estimate the gradient flow. Our theory and experiments demonstrate the effectiveness of the proposed framework.", "keywords": "Posterior Sampling;Particle-based VI", "primary_area": "", "supplementary_material": "/attachment/1dec479333b7181245aed90cd89a6585057440f4.zip", "author": "Hanze Dong;Xi Wang;LIN Yong;Tong Zhang", "authorids": "~Hanze_Dong1;~Xi_Wang4;~LIN_Yong1;~Tong_Zhang2", "gender": "M;M;;M", "homepage": "https://hendrydong.github.io/;;;http://tongzhang-ml.org", "dblp": "228/7798;;;07/4227-1", "google_scholar": "g9WLzWoAAAAJ;giztudUAAAAJ;;LurWtuYAAAAJ", "orcid": ";;;0000-0002-5511-2558", "linkedin": "hanze-dong/;wang-xi-660a47153/;;", "or_profile": "~Hanze_Dong1;~Xi_Wang4;~LIN_Yong1;~Tong_Zhang2", "aff": "Hong Kong University of Science and Technology;University of Massachusetts, Amherst;;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;umass.edu;;ust.hk", "position": "PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\ndong2023particlebased,\ntitle={Particle-based Variational Inference with Preconditioned Functional Gradient Flow},\nauthor={Hanze Dong and Xi Wang and LIN Yong and Tong Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6OphWWAE3cS}\n}", "github": "", "project": "", "reviewers": "vc1L;m3yE;9msK", "pdf_size": 2293448, "recommendation": "6;8;8", "confidence": "4;4;5", "correctness": "3;4;4", "technical_novelty": "2;2;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "48;95;65", "wc_strength_and_weaknesses": "76;213;190", "wc_clarity_quality_novelty_and_reproducibility": "65;52;7", "wc_summary_review": "131;41;22", "wc_review": "320;401;284", "wc_reply_reviewers": "0;31;0", "wc_reply_authors": "642;1145;279", "reply_reviewers": "0;1;0", "reply_authors": "1;4;1", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 69.33333333333333, 19.430788855719562 ], "wc_strength_and_weaknesses_avg": [ 159.66666666666666, 59.901771444776344 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.333333333333336, 24.850665092821068 ], "wc_summary_review_avg": [ 64.66666666666667, 47.541794478355804 ], "wc_review_avg": [ 335.0, 48.92851929090027 ], "wc_reply_reviewers_avg": [ 10.333333333333334, 14.613540144521982 ], "wc_reply_authors_avg": [ 688.6666666666666, 355.0796467773893 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11677812087054761061&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=6OphWWAE3cS", "email": "ust.hk;umass.edu;;ust.hk", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;University of Massachusetts Amherst", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;https://www.umass.edu", "aff_unique_abbr": "HKUST;UMass Amherst", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Hong Kong SAR;Amherst", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "id": "6OxI4WqGr6", "title": "Semi-Supervised Offline Reinforcement Learning with Action-Free Trajectories", "track": "main", "status": "Reject", "tldr": "", "abstract": "Natural agents can effectively learn from multiple data sources that differ in size, quality, and types of measurements. We study this heterogeneity in the context of offline reinforcement learning (RL) by introducing a new, practically motivated semi-supervised setting. Here, an agent has access to two sets of trajectories: labelled trajectories containing state, action, reward triplets at every timestep, along with unlabelled trajectories that contain only state and reward information. For this setting, we develop a simple meta-algorithmic pipeline that learns an inverse-dynamics model on the labelled data to obtain proxy-labels for the unlabelled data, followed by the use of any offline RL algorithm on the true and proxy-labelled trajectories. Empirically, we find this simple pipeline to be highly successful --- on several D4RL benchmarks~\\cite{fu2020d4rl}, certain offline RL algorithms can match the performance of variants trained on a fully labeled dataset even when we label only 10\\% trajectories from the low return regime. Finally, we perform a large-scale controlled empirical study investigating the interplay of data-centric properties of the labelled and unlabelled datasets, with algorithmic design choices (e.g., inverse dynamics, offline RL algorithm) to identify general trends and best practices for training RL agents on semi-supervised offline datasets.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Qinqing Zheng;Mikael Henaff;Brandon Amos;Aditya Grover", "authorids": "~Qinqing_Zheng1;~Mikael_Henaff1;~Brandon_Amos1;~Aditya_Grover1", "gender": ";M;;M", "homepage": "https://enosair.github.io;http://www.mikaelhenaff.com;http://bamos.github.io;https://aditya-grover.github.io", "dblp": "160/8439;86/10571;133/4801.html;162/5052", "google_scholar": "Jwnl3v0AAAAJ;bX__wkYAAAAJ;d8gdZR4AAAAJ;oOhnPUgAAAAJ", "orcid": "0000-0003-1096-9635;;;", "linkedin": ";;bdamos;", "or_profile": "~Qinqing_Zheng1;~Mikael_Henaff1;~Brandon_Amos1;~Aditya_Grover1", "aff": "Meta Facebook;Meta;Meta;University of California, Los Angeles", "aff_domain": "fb.com;meta.com;meta.com;ucla.edu", "position": "Researcher;Researcher;Research Scientist;Assistant Professor", "bibtex": "@misc{\nzheng2023semisupervised,\ntitle={Semi-Supervised Offline Reinforcement Learning with Action-Free Trajectories},\nauthor={Qinqing Zheng and Mikael Henaff and Brandon Amos and Aditya Grover},\nyear={2023},\nurl={https://openreview.net/forum?id=6OxI4WqGr6}\n}", "github": "", "project": "", "reviewers": "az7q;omLk;Mz9R", "site": "https://openreview.net/forum?id=6OxI4WqGr6", "pdf_size": 1146698, "recommendation": "3;5;6", "confidence": "2;5;3", "correctness": "3;3;3", "technical_novelty": "2;1;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "73;99;64", "wc_strength_and_weaknesses": "298;285;153", "wc_clarity_quality_novelty_and_reproducibility": "29;36;11", "wc_summary_review": "46;274;4", "wc_review": "446;694;232", "wc_reply_reviewers": "0;373;0", "wc_reply_authors": "446;1543;303", "reply_reviewers": "0;1;0", "reply_authors": "1;3;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 78.66666666666667, 14.83988619303471 ], "wc_strength_and_weaknesses_avg": [ 245.33333333333334, 65.50487683286558 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.333333333333332, 10.530379332620875 ], "wc_summary_review_avg": [ 108.0, 118.62546101069533 ], "wc_review_avg": [ 457.3333333333333, 188.7808841546787 ], "wc_reply_reviewers_avg": [ 124.33333333333333, 175.83388625505484 ], "wc_reply_authors_avg": [ 764.0, 553.9211736941157 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.49999999999999994, "corr_recommendation_correctness": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6496384104347053282&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Meta;University of California, Los Angeles", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.ucla.edu", "aff_unique_abbr": "Meta;UCLA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "FedDAR: Federated Domain-Aware Representation Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11776", "id": "6P9Y25Pljl6", "poster": "/media/PosterPDFs/ICLR%202023/11776.png?t=1682974383.5504367", "openreview": "https://openreview.net/forum?id=6P9Y25Pljl6", "slides": "https://iclr.cc/virtual/2023/poster/11776", "video": "https://iclr.cc/virtual/2023/poster/11776", "author_site": "Aoxiao Zhong, Hao He, Zhaolin Ren, Na Li, Quanzheng Li", "tldr": "", "abstract": "Cross-silo Federated learning (FL) has become a promising tool in machine learning applications for healthcare. It allows hospitals/institutions to train models with sufficient data while the data is kept private. To make sure the FL model is robust when facing heterogeneous data among FL clients, most efforts focus on personalizing models for clients. However, the latent relationships between clients' data are ignored. In this work, we focus on a special non-iid FL problem, called Domain-mixed FL, where each client's data distribution is assumed to be a mixture of several predefined domains. Recognizing the diversity of domains and the similarity within domains, we propose a novel method, FedDAR, which learns a domain shared representation and domain-wise personalized prediction heads in a decoupled manner. For simplified linear regression settings, we have theoretically proved that FedDAR enjoys a linear convergence rate. For general settings, we have performed intensive empirical studies on both synthetic and real-world medical datasets which demonstrate its superiority over prior FL methods. Our code is available at https://github.com/zlz0414/FedDAR. ", "keywords": "federated learning;healthcare;fairness;personalization", "primary_area": "", "supplementary_material": "", "author": "Aoxiao Zhong;Hao He;Zhaolin Ren;Na Li;Quanzheng Li", "authorids": "~Aoxiao_Zhong1;~Hao_He1;~Zhaolin_Ren1;~Na_Li3;~Quanzheng_Li1", "gender": ";M;M;F;M", "homepage": ";http://people.csail.mit.edu/hehaodele;;https://nali.seas.harvard.edu/;https://camca.mgh.harvard.edu/people/faculty/", "dblp": "203/3014;;;;", "google_scholar": "pG5Zz4kAAAAJ;https://scholar.google.com/citations?hl=en;;qdGelXoAAAAJ;MHq2z7oAAAAJ", "orcid": ";;;;", "linkedin": ";;zhaolin-ren-1b1b94108;;", "or_profile": "~Aoxiao_Zhong1;~Hao_He1;~Zhaolin_Ren1;~Na_Li3;~Quanzheng_Li1", "aff": "Harvard University;Massachusetts Institute of Technology;Harvard University;Harvard University;Harvard University", "aff_domain": "harvard.edu;mit.edu;harvard.edu;harvard.edu;harvard.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhong2023feddar,\ntitle={Fed{DAR}: Federated Domain-Aware Representation Learning},\nauthor={Aoxiao Zhong and Hao He and Zhaolin Ren and Na Li and Quanzheng Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6P9Y25Pljl6}\n}", "github": "", "project": "", "reviewers": "eZTL;XSkK;GcPS;p1BK", "pdf_size": 507835, "recommendation": "6;6;6;8", "confidence": "3;4;3;3", "correctness": "3;3;3;3", "technical_novelty": "3;4;2;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "128;78;85;170", "wc_strength_and_weaknesses": "144;384;317;144", "wc_clarity_quality_novelty_and_reproducibility": "43;72;45;346", "wc_summary_review": "53;86;70;36", "wc_review": "368;620;517;696", "wc_reply_reviewers": "130;0;0;19", "wc_reply_authors": "954;1480;1048;781", "reply_reviewers": "1;0;0;1", "reply_authors": "2;3;2;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 115.25, 36.9552093756753 ], "wc_strength_and_weaknesses_avg": [ 247.25, 105.93246669458802 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 126.5, 127.24484272456782 ], "wc_summary_review_avg": [ 61.25, 18.673175948402566 ], "wc_review_avg": [ 550.25, 122.9112993178414 ], "wc_reply_reviewers_avg": [ 37.25, 54.10810937373436 ], "wc_reply_authors_avg": [ 1065.75, 257.62800216591364 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4299155065459129572&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=6P9Y25Pljl6", "email": "harvard.edu;mit.edu;harvard.edu;harvard.edu;harvard.edu", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Harvard University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://web.mit.edu", "aff_unique_abbr": "Harvard;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Understanding DDPM Latent Codes Through Optimal Transport", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11105", "id": "6PIrhAx1j4i", "poster": "", "openreview": "https://openreview.net/forum?id=6PIrhAx1j4i", "slides": "https://iclr.cc/virtual/2023/poster/11105", "video": "https://iclr.cc/virtual/2023/poster/11105", "author_site": "Valentin Khrulkov, Gleb Ryzhakov, Andrei Chertkov, Ivan Oseledets", "tldr": "ddim encoder is almost equal to optimal transport", "abstract": "Diffusion models have recently outperformed alternative approaches to model the distribution of natural images. Such diffusion models allow for deterministic sampling via the probability flow ODE, giving rise to a latent space and an encoder map. While having important practical applications, such as the estimation of the likelihood, the theoretical properties of this map are not yet fully understood. In the present work, we partially address this question for the popular case of the VP-SDE (DDPM) approach. We show that, perhaps surprisingly, the DDPM encoder map coincides with the optimal transport map for common distributions; we support this claim by extensive numerical experiments using advanced tensor train solver for multidimensional Fokker-Planck equation. We provide additional theoretical evidence for the case of multivariate normal distributions.", "keywords": "diffusion models;ddpm;optimal transport;theory", "primary_area": "", "supplementary_material": "/attachment/65489ccd50a55888d01dedaf2fd804b284aa6145.zip", "author": "Valentin Khrulkov;Gleb Ryzhakov;Andrei Chertkov;Ivan Oseledets", "authorids": "~Valentin_Khrulkov1;~Gleb_Ryzhakov1;~Andrei_Chertkov1;~Ivan_Oseledets1", "gender": "M;M;M;M", "homepage": ";;http://oseledets.github.io;", "dblp": ";285/5751;56/7175;222/9593", "google_scholar": "https://scholar.google.ru/citations?user=GS5HTlkAAAAJ;Hf_pNoQAAAAJ;https://scholar.google.ru/citations?user=5kMqBQEAAAAJ;ZqmOtcwAAAAJ", "orcid": "0009-0000-6694-5398;;;", "linkedin": ";;;", "or_profile": "~Valentin_Khrulkov1;~Andrei_Chertkov1;~Ivan_Oseledets1;~Gleb_Vladimirovich_Ryzhakov1", "aff": "Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology;Institute of Numerical Mathematics;Skolkovo Institute of Science and Technology", "aff_domain": "skolkovotech.ru;skolkovotech.ru;inm.ras.ru;skoltech.ru", "position": "PhD student;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nkhrulkov2023understanding,\ntitle={Understanding {DDPM} Latent Codes Through Optimal Transport},\nauthor={Valentin Khrulkov and Gleb Ryzhakov and Andrei Chertkov and Ivan Oseledets},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6PIrhAx1j4i}\n}", "github": "", "project": "", "reviewers": "qZHA;R2Hy;CjpS;6Sgh", "pdf_size": 10284609, "recommendation": "5;6;6;8", "confidence": "4;3;4;4", "correctness": "4;2;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "38;49;171;66", "wc_strength_and_weaknesses": "414;101;191;302", "wc_clarity_quality_novelty_and_reproducibility": "17;33;62;63", "wc_summary_review": "22;100;152;68", "wc_review": "491;283;576;499", "wc_reply_reviewers": "441;92;0;0", "wc_reply_authors": "1013;231;115;290", "reply_reviewers": "1;1;0;0", "reply_authors": "7;5;3;4", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 81.0, 52.91030145444269 ], "wc_strength_and_weaknesses_avg": [ 252.0, 117.54360892877162 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.75, 19.587942719948924 ], "wc_summary_review_avg": [ 85.5, 47.35768153108849 ], "wc_review_avg": [ 462.25, 108.6815876770302 ], "wc_reply_reviewers_avg": [ 133.25, 181.60585755971638 ], "wc_reply_authors_avg": [ 412.25, 352.51054948752954 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 4.75, 1.479019945774904 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.13245323570650439, "corr_recommendation_correctness": 0.20751433915982243, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2885441022121331133&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=6PIrhAx1j4i", "email": "skolkovotech.ru;skolkovotech.ru;inm.ras.ru;skoltech.ru", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Skolkovo Institute of Science and Technology;Institute of Numerical Mathematics", "aff_unique_dep": ";", "aff_unique_url": "https://www.skoltech.ru;", "aff_unique_abbr": "Skoltech;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Russian Federation;" }, { "id": "6Pv8AMSylux", "title": "DIVISION: Memory Efficient Training via Dual Activation Precision", "track": "main", "status": "Reject", "tldr": "A simple and transparent framework to reduce the memory cost of DNN training.", "abstract": "Activation compressed training (ACT) has been shown to be a promising way to reduce the memory cost of training deep neural networks (DNNs). However, existing work of ACT relies on searching for optimal bit-width during DNN training to reduce the quantization noise, which makes the procedure complicated and less transparent. To this end, we propose a simple and effective method to compress DNN training. Our method is motivated by an instructive observation: DNN backward propagation mainly utilizes the low-frequency component (LFC) of the activation maps, while the majority of memory is for caching the high-frequency component (HFC) during the training. This indicates the HFC of activation maps is highly redundant and compressible during DNN training, which inspires our proposed Dual Activation Precision (DIVISION). During the training, DIVISION preserves the high-precision copy of LFC and compresses the HFC into a light-weight copy with low numerical precision. This can significantly reduce the memory cost without negatively affecting the precision of backward propagation such that DIVISION maintains competitive model accuracy. Experimental results show DIVISION achieves over 10\u00d7 compression of activation maps, and significantly higher training throughput than state-of-the-art ACT methods, without loss of model accuracy. The code is available at https://anonymous.4open.science/r/division-5CC0/\n", "keywords": "DNN training;activation compressed training;memory efficient training;frequency domain", "primary_area": "", "supplementary_material": "", "author": "Guanchu Wang;Zirui Liu;Zhimeng Jiang;Ninghao Liu;Na Zou;Xia Hu", "authorids": "~Guanchu_Wang1;~Zirui_Liu1;~Zhimeng_Jiang1;~Ninghao_Liu2;~Na_Zou2;~Xia_Hu4", "gender": "M;M;M;F;M;M", "homepage": "https://guanchuwang.github.io/home;https://zirui-ray-liu.github.io/;http://www.zhimengjiang.com/;https://nzou1.github.io/;https://cobweb.cs.uga.edu/~ninghaoliu/;https://cs.rice.edu/~xh37/index.html", "dblp": "213/0985;196/8629-1.html;217/3235;152/0090-1.html;145/4489;256/9406.html", "google_scholar": "_QL5218AAAAJ;https://scholar.google.com/citations?hl=zh-CN;5Es3Yk4AAAAJ;https://scholar.google.com/citations?hl=en;Nir-EDYAAAAJ;https://scholar.google.com.tw/citations?user=pcCS60IAAAAJ", "orcid": ";;0000-0001-6933-3952;0000-0003-1984-795X;0000-0002-9170-2424;", "linkedin": ";;;na-zou-a1721535/;;", "or_profile": "~Guanchu_Wang1;~Zirui_Liu1;~Zhimeng_Jiang1;~Na_Zou2;~Ninghao_Liu1;~Xia_Hu2", "aff": "Rice University;Rice University;Texas A&M University;Texas A&M University - College Station;University of Georgia;Rice University", "aff_domain": "rice.edu;rice.edu;tamu.edu;tamu.edu;uga.edu;rice.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor;Associate Professor", "bibtex": "@misc{\nwang2023division,\ntitle={{DIVISION}: Memory Efficient Training via Dual Activation Precision},\nauthor={Guanchu Wang and Zirui Liu and Zhimeng Jiang and Ninghao Liu and Na Zou and Xia Hu},\nyear={2023},\nurl={https://openreview.net/forum?id=6Pv8AMSylux}\n}", "github": "", "project": "", "reviewers": "Eerz;R65G;PUeD;xXpe", "site": "https://openreview.net/forum?id=6Pv8AMSylux", "pdf_size": 1690445, "recommendation": "3;5;6;8", "confidence": "5;3;4;5", "correctness": "2;3;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "1;3;3;3", "wc_summary_paper": "112;78;111;67", "wc_strength_and_weaknesses": "400;240;281;185", "wc_clarity_quality_novelty_and_reproducibility": "45;12;56;40", "wc_summary_review": "11;33;24;53", "wc_review": "568;363;472;345", "wc_reply_reviewers": "169;100;27;0", "wc_reply_authors": "1667;1350;2169;1025", "reply_reviewers": "1;1;1;0", "reply_authors": "8;7;7;2", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 92.0, 19.887181801351343 ], "wc_strength_and_weaknesses_avg": [ 276.5, 79.02056694304338 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.25, 16.223054582907622 ], "wc_summary_review_avg": [ 30.25, 15.286840746210448 ], "wc_review_avg": [ 437.0, 89.89716347026751 ], "wc_reply_reviewers_avg": [ 74.0, 65.92799102050661 ], "wc_reply_authors_avg": [ 1552.75, 422.032211448368 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 6.0, 2.345207879911715 ], "replies_avg": [ 38, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.08362420100070908, "corr_recommendation_correctness": 0.9805806756909202, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17267138215214751745&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;1;1;2;0", "aff_unique_norm": "Rice University;Texas A&M University;University of Georgia", "aff_unique_dep": ";;", "aff_unique_url": "https://www.rice.edu;https://www.tamu.edu;https://www.uga.edu", "aff_unique_abbr": "Rice;TAMU;UGA", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A VAE for Transformers with Nonparametric Variational Information Bottleneck", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10953", "id": "6QkjC_cs03X", "poster": "/media/PosterPDFs/ICLR%202023/10953.png?t=1681837860.0667808", "openreview": "https://openreview.net/forum?id=6QkjC_cs03X", "slides": "https://iclr.cc/virtual/2023/poster/10953", "video": "https://iclr.cc/virtual/2023/poster/10953", "author_site": "James Henderson, Fabio Fehr", "tldr": "We propose a Variational AutoEncoder using Bayesian nonparametrics to regularise a Transformer encoder-decoder with latent mixture distributions.", "abstract": "We propose a Variational AutoEncoder (VAE) for Transformers by developing a Variational Information Bottleneck (VIB) regulariser for Transformer embeddings. We formalise such attention-based representations as mixture distributions, and use Bayesian nonparametrics to develop a Nonparametric VIB (NVIB) for them. The variable number of mixture components supported by nonparametrics captures the variable number of vectors supported by attention, and exchangeable distributions from nonparametrics capture the permutation invariance of attention. Our Transformer VAE (NVAE) uses NVIB to regularise the information passing from the Transformer encoder to the Transformer decoder. Evaluations of a NVAE, trained on natural language text, demonstrate that NVIB can regularise the number of mixture components in the induced embedding whilst maintaining generation quality and reconstruction capacity.", "keywords": "VAE;VIB;Bayesian nonparametrics;Transformers;natural language", "primary_area": "", "supplementary_material": "", "author": "James Henderson;Fabio James Fehr", "authorids": "~James_Henderson1;~Fabio_James_Fehr1", "gender": "M;M", "homepage": "http://idiap.ch/~jhenderson/;https://fjfehr.github.io/", "dblp": "h/JamesHenderson.html;315/4886.html", "google_scholar": "CSib0ooAAAAJ;WaZWY0wAAAAJ", "orcid": "0000-0003-3714-4799;", "linkedin": "james-henderson-3b68346b/;fabio-j-fehr", "or_profile": "~James_Henderson1;~Fabio_James_Fehr1", "aff": "Idiap Research Institute;Idiap Research Institute", "aff_domain": "idiap.ch;idiap.ch", "position": "Senior Researcher;PhD student", "bibtex": "@inproceedings{\nhenderson2023a,\ntitle={A {VAE} for Transformers with Nonparametric Variational Information Bottleneck},\nauthor={James Henderson and Fabio James Fehr},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6QkjC_cs03X}\n}", "github": "", "project": "", "reviewers": "fw7U;Lu5K;tsdp;w4Qq", "pdf_size": 1111455, "recommendation": "5;5;6;6", "confidence": "4;3;3;3", "correctness": "3;3;4;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "2;0;3;4", "wc_summary_paper": "45;74;51;64", "wc_strength_and_weaknesses": "268;135;111;253", "wc_clarity_quality_novelty_and_reproducibility": "69;77;6;29", "wc_summary_review": "16;15;45;19", "wc_review": "398;301;213;365", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "465;313;169;549", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 1.479019945774904 ], "wc_summary_paper_avg": [ 58.5, 11.280514172678478 ], "wc_strength_and_weaknesses_avg": [ 191.75, 69.47436577616236 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.25, 29.054904921544658 ], "wc_summary_review_avg": [ 23.75, 12.356678356257397 ], "wc_review_avg": [ 319.25, 70.56335805501323 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 374.0, 145.4750837772572 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14451763274570161984&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=6QkjC_cs03X", "email": "idiap.ch;idiap.ch", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Idiap Research Institute", "aff_unique_dep": "", "aff_unique_url": "https://www.idiap.ch", "aff_unique_abbr": "Idiap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "id": "6R1unINH63", "title": "Variance Double-Down: The Small Batch Size Anomaly in Multistep Deep Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "We perform an exhaustive investigation into the interplay of batch size and update horizon and uncover a surprising phenomenon: when increasing the update horizon, it is more beneficial to decrease the batch size", "abstract": "State of the art results in reinforcement learning suggest that multi-step learning is necessary. However, the increased variance that comes with it makes it difficult to increase the update horizon beyond relatively small numbers. In this paper, we report the counterintuitive finding that decreasing the batch size substantially improves performance across a large swath of deep RL agents. It is well-known that gradient variance decreases with increasing batch sizes, so obtaining improved performance by increasing variance on two fronts is a rather surprising finding. We conduct a broad set of experiments to better understand this variance double-down phenomenon.", "keywords": "Reinforcement Learning;Deep Reinforcement Learning;Value based;Batch Size;Multi step learning", "primary_area": "", "supplementary_material": "/attachment/33d2b530cfd0ed613183a622f0671bbf4551d186.zip", "author": "Johan Samir Obando Ceron;Marc G Bellemare;Pablo Samuel Castro", "authorids": "~Johan_Samir_Obando_Ceron1;~Marc_G_Bellemare1;~Pablo_Samuel_Castro1", "gender": "M;M;M", "homepage": "https://johanobandoc.github.io;http://www.marcgbellemare.info;https://psc-g.github.io/", "dblp": ";38/4525;05/5455", "google_scholar": "KViAb3EAAAAJ;https://scholar.google.co.uk/citations?user=uyYPun0AAAAJ;https://scholar.google.ca/citations?user=jn5r6TsAAAAJ", "orcid": ";;", "linkedin": "johan-obando/;;pablo-samuel-castro-2113641b/", "or_profile": "~Johan_Samir_Obando_Ceron1;~Marc_G_Bellemare1;~Pablo_Samuel_Castro1", "aff": "Mila - Quebec AI Institute, Universit\u00e9 de Montr\u00e9al;Google;Google", "aff_domain": "mila.umontreal.ca;google.com;google.com", "position": "MS student;Research Scientist;Researcher", "bibtex": "@misc{\nceron2023variance,\ntitle={Variance Double-Down: The Small Batch Size Anomaly in Multistep Deep Reinforcement Learning},\nauthor={Johan Samir Obando Ceron and Marc G Bellemare and Pablo Samuel Castro},\nyear={2023},\nurl={https://openreview.net/forum?id=6R1unINH63}\n}", "github": "", "project": "", "reviewers": "74JP;fBLk;iTrT", "site": "https://openreview.net/forum?id=6R1unINH63", "pdf_size": 3391986, "recommendation": "3;3;5", "confidence": "3;4;3", "correctness": "2;2;3", "technical_novelty": "1;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "342;38;71", "wc_strength_and_weaknesses": "565;344;266", "wc_clarity_quality_novelty_and_reproducibility": "296;12;86", "wc_summary_review": "42;54;54", "wc_review": "1245;448;477", "wc_reply_reviewers": "1217;0;0", "wc_reply_authors": "2275;722;797", "reply_reviewers": "2;0;0", "reply_authors": "4;1;2", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 150.33333333333334, 136.1967531021043 ], "wc_strength_and_weaknesses_avg": [ 391.6666666666667, 126.63420636708797 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 131.33333333333334, 120.29223674960168 ], "wc_summary_review_avg": [ 50.0, 5.656854249492381 ], "wc_review_avg": [ 723.3333333333334, 369.0639812041026 ], "wc_reply_reviewers_avg": [ 405.6666666666667, 573.6993018026856 ], "wc_reply_authors_avg": [ 1264.6666666666667, 715.0693828029713 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ExmVy7InpqIJ:scholar.google.com/&scioq=Variance+Double-Down:+The+Small+Batch+Size+Anomaly+in+Multistep+Deep+Reinforcement+Learning&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0;1;1", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;Google", "aff_unique_dep": "Mila - Quebec AI Institute;Google", "aff_unique_url": "https://www.mila.quebec/;https://www.google.com", "aff_unique_abbr": "Mila;Google", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Montr\u00e9al;Mountain View", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Canada;United States" }, { "id": "6RWJe6lPbQ", "title": "Deep Graph-Level Clustering Using Pseudo-Label-Guided Mutual Information Maximization Network", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this work, we study the problem of partitioning a set of graphs into different groups such that the graphs in the same group are similar while the graphs in different groups are dissimilar. This problem was rarely studied previously, although there have been a lot of work on node clustering and graph classification. The problem is challenging because it is difficult to measure the similarity or distance between graphs. One feasible approach is using graph kernels to compute a similarity matrix for the graphs and then performing spectral clustering, but the effectiveness of existing graph kernels in measuring the similarity between graphs is very limited. To solve the problem, we propose a novel method called Deep Graph-Level Clustering (DGLC). DGLC utilizes a graph isomorphism network to learn graph-level representations by maximizing the mutual information between the representations of entire graphs and substructures, under the regularization of a clustering module that ensures discriminative representations via pseudo labels. DGLC achieves graph-level representation learning and graph-level clustering in an end-to-end manner. The experimental results on six benchmark datasets of graphs show that our DGLC has state-of-the-art performance in comparison to many baselines.", "keywords": "Graph-level clustering;Graph representation learning;Deep learning;Unsupervised learning", "primary_area": "", "supplementary_material": "/attachment/45c6a05ba87fcfccc013c86ae4c65a8e4ce1bcae.zip", "author": "Jinyu Cai;Yi Han;Wenzhong Guo;Jicong Fan", "authorids": "~Jinyu_Cai2;119020013@link.cuhk.edu.cn;guowenzhong@fzu.edu.cn;~Jicong_Fan2", "gender": "M;;;M", "homepage": "https://jinyucai95.github.io/;;;https://jicongfan.github.io/", "dblp": "223/9427;;;139/1570", "google_scholar": "g9TVoA0AAAAJ;;;vdJsnhIAAAAJ", "orcid": "0000-0003-2241-2754;;;0000-0001-9665-0355", "linkedin": ";;;", "or_profile": "~Jinyu_Cai2;119020013@link.cuhk.edu.cn;guowenzhong@fzu.edu.cn;~Jicong_Fan2", "aff": "Fuzhou University;;;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "fzu.edu.cn;;;cuhk.edu.cn", "position": "PhD student;;;Research Assistant Professor", "bibtex": "@misc{\ncai2023deep,\ntitle={Deep Graph-Level Clustering Using Pseudo-Label-Guided Mutual Information Maximization Network},\nauthor={Jinyu Cai and Yi Han and Wenzhong Guo and Jicong Fan},\nyear={2023},\nurl={https://openreview.net/forum?id=6RWJe6lPbQ}\n}", "github": "", "project": "", "reviewers": "jD5G;RqLm;vbKc", "site": "https://openreview.net/forum?id=6RWJe6lPbQ", "pdf_size": 1845579, "recommendation": "3;5;6", "confidence": "4;4;4", "correctness": "2;3;3", "technical_novelty": "1;2;3", "empirical_novelty": "1;2;2", "wc_summary_paper": "45;69;51", "wc_strength_and_weaknesses": "180;120;220", "wc_clarity_quality_novelty_and_reproducibility": "70;57;30", "wc_summary_review": "65;421;23", "wc_review": "360;667;324", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 55.0, 10.198039027185569 ], "wc_strength_and_weaknesses_avg": [ 173.33333333333334, 41.096093353126506 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 52.333333333333336, 16.659998666133067 ], "wc_summary_review_avg": [ 169.66666666666666, 178.54473451273276 ], "wc_review_avg": [ 450.3333333333333, 153.90978598588921 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9449111825230683, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10216186387333079874&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1", "aff_unique_norm": "Fuzhou University;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.fznu.edu.cn;https://www.cuhk.edu.cn", "aff_unique_abbr": "FZU;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Learning Multimodal Data Augmentation in Feature Space", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11438", "id": "6SRDbbvU8s", "poster": "", "openreview": "https://openreview.net/forum?id=6SRDbbvU8s", "slides": "https://iclr.cc/virtual/2023/poster/11438", "video": "https://iclr.cc/virtual/2023/poster/11438", "author_site": "Zichang Liu, Zhiqiang Tang, Xingjian Shi, Aston Zhang, Mu Li, Anshumali Shrivastava, Andrew Wilson", "tldr": "", "abstract": "The ability to jointly learn from multiple modalities, such as text, audio, and visual data, is a defining feature of intelligent systems. While there have been promising advances in designing neural networks to harness multimodal data, the enormous success of data augmentation currently remains limited to single-modality tasks like image classification. Indeed, it is particularly difficult to augment each modality while preserving the overall semantic structure of the data; for example, a caption may no longer be a good description of an image after standard augmentations have been applied, such as translation. Moreover, it is challenging to specify reasonable transformations that are not tailored to a particular modality. In this paper, we introduce LeMDA, Learning Multimodal Data Augmentation, an easy-to-use method that automatically learns to jointly augment multimodal data in feature space, with no constraints on the identities of the modalities or the relationship between modalities. We show that LeMDA can (1) profoundly improve the performance of multimodal deep learning architectures, (2) apply to combinations of modalities that have not been previously considered, and (3) achieve state-of-the-art results on a wide range of applications comprised of image, text, and tabular data.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Zichang Liu;Zhiqiang Tang;Xingjian Shi;Aston Zhang;Mu Li;Anshumali Shrivastava;Andrew Gordon Wilson", "authorids": "~Zichang_Liu1;~Zhiqiang_Tang1;~Xingjian_Shi1;~Aston_Zhang2;~Mu_Li4;~Anshumali_Shrivastava1;~Andrew_Gordon_Wilson1", "gender": "F;M;M;;;M;Not Specified", "homepage": ";https://sites.google.com/site/zhiqiangtanghomepage/home;https://sxjscience.github.io/;;https://github.com/mli;https://www.cs.rice.edu/~as143/;https://cims.nyu.edu/~andrewgw", "dblp": "227/4714;71/10098-1;145/9987;;;63/9828;65/10453", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.hk/citations?user=P4G6H7oAAAAJ;;;https://scholar.google.com.tw/citations?user=SGT23RAAAAAJ;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ", "orcid": "0009-0004-1098-2869;;;;;;", "linkedin": "zichang-liu/;;;;;;", "or_profile": "~Zichang_Liu1;~Zhiqiang_Tang1;~Xingjian_Shi1;~Aston_Zhang2;~Mu_Li4;~Anshumali_Shrivastava1;~Andrew_Gordon_Wilson1", "aff": "Rice University;AWS;Amazon Web Services;;Amazon;ThirdAI Corp.;New York University", "aff_domain": "rice.edu;amazon.com;amazon.com;;amazon.com;thirdai.com;nyu.edu", "position": "PhD student;Applied Scientist;Applied Scientist;;Researcher;CEO;Associate Professor", "bibtex": "@inproceedings{\nliu2023learning,\ntitle={Learning Multimodal Data Augmentation in Feature Space},\nauthor={Zichang Liu and Zhiqiang Tang and Xingjian Shi and Aston Zhang and Mu Li and Anshumali Shrivastava and Andrew Gordon Wilson},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6SRDbbvU8s}\n}", "github": "", "project": "", "reviewers": "cbnb;nNn5;PrKb;huNS", "pdf_size": 4475569, "recommendation": "3;6;6;8", "confidence": "3;3;4;4", "correctness": "3;3;3;3", "technical_novelty": "3;3;4;3", "empirical_novelty": "3;3;4;2", "wc_summary_paper": "16;70;61;88", "wc_strength_and_weaknesses": "30;191;242;381", "wc_clarity_quality_novelty_and_reproducibility": "36;63;58;129", "wc_summary_review": "15;38;29;50", "wc_review": "97;362;390;648", "wc_reply_reviewers": "0;18;18;31", "wc_reply_authors": "57;644;902;617", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;2;1", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 58.75, 26.52710877574109 ], "wc_strength_and_weaknesses_avg": [ 211.0, 125.52091459195157 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 71.5, 34.7167106736799 ], "wc_summary_review_avg": [ 33.0, 12.786711852544421 ], "wc_review_avg": [ 374.25, 195.0671358789071 ], "wc_reply_reviewers_avg": [ 16.75, 11.031205736455105 ], "wc_reply_authors_avg": [ 555.0, 308.2928802291743 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.7001400420140049, "corr_recommendation_correctness": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12322477023773725071&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=6SRDbbvU8s", "email": "rice.edu;amazon.com;amazon.com;;amazon.com;thirdai.com;nyu.edu", "author_num": 7, "aff_unique_index": "0;1;1;1;2;3", "aff_unique_norm": "Rice University;Amazon;ThirdAI Corp.;New York University", "aff_unique_dep": ";Amazon Web Services;;", "aff_unique_url": "https://www.rice.edu;https://aws.amazon.com;;https://www.nyu.edu", "aff_unique_abbr": "Rice;AWS;;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "6TugHflAGRU", "title": "Eigenvalue Initialisation and Regularisation for Koopman Autoencoders", "track": "main", "status": "Reject", "tldr": "Using eigenvalues to regularise and initialise Koopman autoencoders improves performance significantly", "abstract": "Regularising the parameter matrices of neural networks is ubiquitous in training deep models. Typical regularisation approaches suggest initialising weights using small random values, and to penalise weights to promote sparsity. However, these widely used techniques may be less effective in certain scenarios. Here, we study the Koopman autoencoder model which includes an encoder, a Koopman operator layer, and a decoder. These models have been designed and dedicated to tackle physics-related problems with interpretable dynamics and an ability to incorporate physics-related constraints. However, the majority of existing work employs standard regularisation practices. In our work, we take a step toward augmenting Koopman autoencoders with initialisation and penalty schemes tailored for physics-related settings. Specifically, we propose the \"eigeninit\" initialisation scheme that samples initial Koopman operators from specific eigenvalue distributions. In addition, we suggest the \"eigenloss\" penalty scheme that penalises the eigenvalues of the Koopman operator during training. We demonstrate the utility of these schemes on two synthetic data sets: a driven pendulum and flow past a cylinder; and two real-world problems: ocean surface temperatures and cyclone wind fields. We find on these datasets that eigenloss and eigeninit improves the convergence rate by a factor of 2 to 5, and that they reduce the cumulative long-term prediction error by up to a factor of 2.5. Such a finding points to the utility of incorporating similar schemes as an inductive bias in other physics-related deep learning approaches.", "keywords": "koopman;deep learning;dynamical systems;autoencoders;physics-constrained learning;neural networks", "primary_area": "", "supplementary_material": "/attachment/0dbe82c98d565752d926ed65ab082340f38b759a.zip", "author": "Jack William Miller;Charles O'Neill;Navid C Constantinou;Omri Azencot", "authorids": "~Jack_William_Miller1;~Charles_O'Neill2;~Navid_C_Constantinou1;~Omri_Azencot1", "gender": "M;M;;Unspecified", "homepage": "https://jackwmiller.com;https://charlesponeill.com;https://www.navidconstantinou.com;http://omriazencot.com", "dblp": "336/6526;;;132/3985.html", "google_scholar": "geDj6_gAAAAJ;https://scholar.google.com.au/citations?user=-P4K8hsAAAAJ;https://scholar.google.com.au/citations?user=Hf4FZvQAAAAJ;https://scholar.google.co.il/citations?user=MEGuRmAAAAAJ", "orcid": "0000-0002-8410-4936;;0000-0002-8149-4094;", "linkedin": "jack-miller-8574211aa/;charles-o-neill/;;omri-azencot-a8812417/", "or_profile": "~Jack_William_Miller1;~Charles_O'Neill2;~Navid_C_Constantinou1;~Omri_Azencot1", "aff": "ETHZ - ETH Zurich;Australian National University;Australian National University;Ben-Gurion University of the Negev", "aff_domain": "eth.edu;anu.edu.au;anu.edu.au;bgu.ac.il", "position": "Undergrad student;Undergrad student;Researcher;Assistant Professor", "bibtex": "@misc{\nmiller2023eigenvalue,\ntitle={Eigenvalue Initialisation and Regularisation for Koopman Autoencoders},\nauthor={Jack William Miller and Charles O'Neill and Navid C Constantinou and Omri Azencot},\nyear={2023},\nurl={https://openreview.net/forum?id=6TugHflAGRU}\n}", "github": "", "project": "", "reviewers": "ibgi;AUHg;eMgi", "site": "https://openreview.net/forum?id=6TugHflAGRU", "pdf_size": 710698, "recommendation": "3;5;5", "confidence": "4;3;3", "correctness": "2;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "54;126;59", "wc_strength_and_weaknesses": "475;1378;117", "wc_clarity_quality_novelty_and_reproducibility": "823;34;69", "wc_summary_review": "122;54;30", "wc_review": "1474;1592;275", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1400;1159;365", "reply_reviewers": "0;0;0", "reply_authors": "2;2;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 79.66666666666667, 32.826141344293816 ], "wc_strength_and_weaknesses_avg": [ 656.6666666666666, 530.5860491527793 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 308.6666666666667, 363.9691684134187 ], "wc_summary_review_avg": [ 68.66666666666667, 38.96437118987322 ], "wc_review_avg": [ 1113.6666666666667, 594.9802984600041 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 974.6666666666666, 442.1842250565808 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.9999999999999998, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=764353678603286649&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "ETH Zurich;Australian National University;Ben-Gurion University of the Negev", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ethz.ch;https://www.anu.edu.au;https://www.bgu.ac.il", "aff_unique_abbr": "ETHZ;ANU;BGU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "Switzerland;Australia;Israel" }, { "title": "Diffusion Probabilistic Modeling of Protein Backbones in 3D for the motif-scaffolding problem", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11916", "id": "6TxBxqNME1Y", "poster": "/media/PosterPDFs/ICLR%202023/11916.png?t=1680741445.2650843", "openreview": "https://openreview.net/forum?id=6TxBxqNME1Y", "slides": "https://iclr.cc/virtual/2023/poster/11916", "video": "https://iclr.cc/virtual/2023/poster/11916", "author_site": "Brian Trippe, Jason Yim, Doug Tischer, David Baker, Tamara Broderick, Regina Barzilay, Tommi Jaakkola", "tldr": "We have created the first generative modeling approach to motif-scaffolding by developing a diffusion probabilistic model of protein backbones and a procedure for generating scaffolds conditional on a motif.", "abstract": "Construction of a scaffold structure that supports a desired motif, conferring protein function, shows promise for the design of vaccines and enzymes. But a general solution to this motif-scaffolding problem remains open. Current machine-learning techniques for scaffold design are either limited to unrealistically small scaffolds (up to length 20) or struggle to produce multiple diverse scaffolds. We propose to learn a distribution over diverse and longer protein backbone structures via an E(3)-equivariant graph neural network. We develop SMCDiff to efficiently sample scaffolds from this distribution conditioned on a given motif; our algorithm is the first to theoretically guarantee conditional samples from a diffusion model in the large-compute limit. We evaluate our designed backbones by how well they align with AlphaFold2-predicted structures. We show that our method can (1) sample scaffolds up to 80 residues and (2) achieve structurally diverse scaffolds for a fixed motif.", "keywords": "Diffusion Models;Sequential Monte Carlo;Protein Design;Geometric Deep Learning", "primary_area": "", "supplementary_material": "/attachment/bb00291cfd4893dd554dd5f852dd76bdea49c496.zip", "author": "Brian L. Trippe;Jason Yim;Doug Tischer;David Baker;Tamara Broderick;Regina Barzilay;Tommi S. Jaakkola", "authorids": "~Brian_L._Trippe1;~Jason_Yim1;dtischer@uw.edu;~David_Baker1;~Tamara_Broderick2;~Regina_Barzilay1;~Tommi_S._Jaakkola1", "gender": ";;;;;female;", "homepage": ";http://people.csail.mit.edu/jyim/;;https://www.ipd.uw.edu/david-baker/;http://tamarabroderick.com/;https://www.regina.csail.mit.edu/;", "dblp": ";278/7337;;;40/7412;b/ReginaBarzilay;", "google_scholar": ";8wDe9NAAAAAJ;;;dPX0wQcAAAAJ;;", "orcid": ";0000-0003-0575-7400;;;;;", "linkedin": ";;;;tamara-broderick-b20243139/;;", "or_profile": "~Brian_L._Trippe1;~Jason_Yim1;dtischer@uw.edu;~David_Baker1;~Tamara_Broderick2;~Regina_Barzilay1;~Tommi_S._Jaakkola1", "aff": ";Massachusetts Institute of Technology;;University of Washington;Massachusetts Institute of Technology;Massachusetts Institute of Technology;", "aff_domain": ";mit.edu;;u.washington.edu;mit.edu;mit.edu;", "position": ";PhD student;;Full Professor;Associate Professor;Professor;", "bibtex": "@inproceedings{\ntrippe2023diffusion,\ntitle={Diffusion Probabilistic Modeling of Protein Backbones in 3D for the motif-scaffolding problem},\nauthor={Brian L. Trippe and Jason Yim and Doug Tischer and David Baker and Tamara Broderick and Regina Barzilay and Tommi S. Jaakkola},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6TxBxqNME1Y}\n}", "github": "", "project": "", "reviewers": "BXA4;EPhs;eV4U;KTe1", "pdf_size": 6716643, "recommendation": "6;6;6;8", "confidence": "4;4;4;4", "correctness": "3;3;3;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "17;76;83;119", "wc_strength_and_weaknesses": "164;89;362;148", "wc_clarity_quality_novelty_and_reproducibility": "176;70;47;92", "wc_summary_review": "23;241;37;366", "wc_review": "380;476;529;725", "wc_reply_reviewers": "25;0;0;0", "wc_reply_authors": "558;501;475;926", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;2", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 73.75, 36.601741761834234 ], "wc_strength_and_weaknesses_avg": [ 190.75, 102.74087550726829 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 96.25, 48.715372317164935 ], "wc_summary_review_avg": [ 166.75, 143.7991220418261 ], "wc_review_avg": [ 527.5, 125.91366089507524 ], "wc_reply_reviewers_avg": [ 6.25, 10.825317547305483 ], "wc_reply_authors_avg": [ 615.0, 182.04807057477979 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 275, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13914392105282756395&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=6TxBxqNME1Y", "email": ";mit.edu;;u.washington.edu;mit.edu;mit.edu;", "author_num": 7, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.washington.edu", "aff_unique_abbr": "MIT;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "An Adaptive Policy to Employ Sharpness-Aware Minimization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11349", "id": "6Wl7-M2BC-", "poster": "/media/PosterPDFs/ICLR%202023/11349.png?t=1681797841.0008154", "openreview": "https://openreview.net/forum?id=6Wl7-M2BC-", "slides": "https://iclr.cc/virtual/2023/poster/11349", "video": "https://iclr.cc/virtual/2023/poster/11349", "author_site": "Weisen JIANG, Hansi Yang, Yu Zhang, James Kwok", "tldr": "We design an adaptive policy to employ SAM and propose two efficient algorithms to reduce the fraction of SAM updates.", "abstract": "Sharpness-aware minimization (SAM), which searches for flat minima by min-max optimization, has been shown to be useful in improving model generalization. However, since each SAM update requires computing two gradients, its computational cost and training time are both doubled compared to standard empirical risk minimization (ERM). Recent state-of-the-arts reduce the fraction of SAM updates and thus accelerate SAM by switching between SAM and ERM updates randomly or periodically. In this paper, we design an adaptive policy to employ SAM based on the loss landscape geometry. Two efficient algorithms, AE-SAM and AE-LookSAM, are proposed. We theoretically show that AE-SAM has the same convergence rate as SAM. Experimental results on various datasets and architectures demonstrate the efficiency and effectiveness of the adaptive policy.", "keywords": "Sharpness-aware minimization;model generalization;loss landscape", "primary_area": "", "supplementary_material": "", "author": "Weisen Jiang;Hansi Yang;Yu Zhang;James Kwok", "authorids": "~Weisen_Jiang1;~Hansi_Yang1;~Yu_Zhang3;~James_Kwok1", "gender": "M;M;M;", "homepage": "https://wayson-ust.github.io/;https://www.linkedin.com/in/%E7%80%9A%E6%80%9D-%E6%9D%A8-6463a4a1;http://cse.sustech.edu.cn/faculty/~zhangy/;", "dblp": "302/7625;252/5354;50/671-6;", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com.hk/citations?user=jaRS5w4AAAAJ;", "orcid": ";0000-0002-0479-9898;;", "linkedin": ";%E7%80%9A%E6%80%9D-%E6%9D%A8-6463a4a1;;", "or_profile": "~Weisen_Jiang1;~Hansi_Yang1;~Yu_Zhang3;~James_Kwok1", "aff": "Hong Kong University of Science and Technology;Department of Computer Science and Engineering, Hong Kong University of Science and Technology;Southern University of Science and Technology;", "aff_domain": "ust.hk;cse.ust.hk;sustc.edu.cn;", "position": "PhD student;PhD student;Associate Professor;", "bibtex": "@inproceedings{\njiang2023an,\ntitle={An Adaptive Policy to Employ Sharpness-Aware Minimization},\nauthor={Weisen Jiang and Hansi Yang and Yu Zhang and James Kwok},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6Wl7-M2BC-}\n}", "github": "", "project": "", "reviewers": "rg2h;4reb;qUYU", "pdf_size": 1603718, "recommendation": "5;6;6", "confidence": "5;3;4", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;0", "wc_summary_paper": "44;61;107", "wc_strength_and_weaknesses": "246;62;20", "wc_clarity_quality_novelty_and_reproducibility": "13;30;11", "wc_summary_review": "4;52;109", "wc_review": "307;205;247", "wc_reply_reviewers": "76;29;0", "wc_reply_authors": "1962;312;788", "reply_reviewers": "1;1;0", "reply_authors": "4;1;2", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 70.66666666666667, 26.612444874949432 ], "wc_strength_and_weaknesses_avg": [ 109.33333333333333, 98.14728161741868 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 18.0, 8.524474568362947 ], "wc_summary_review_avg": [ 55.0, 42.91852746774987 ], "wc_review_avg": [ 253.0, 41.8568990729127 ], "wc_reply_reviewers_avg": [ 35.0, 31.31559781748812 ], "wc_reply_authors_avg": [ 1020.6666666666666, 693.4096111887179 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8660254037844385, "corr_recommendation_correctness": 0.0, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11096684811397339892&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=6Wl7-M2BC-", "email": "ust.hk;cse.ust.hk;sustc.edu.cn;", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Hong Kong University of Science and Technology;Southern University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;https://www.sustech.edu.cn", "aff_unique_abbr": "HKUST;SUSTech", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "6Ysgo5RXUvn", "title": "DSPNet: Towards Slimmable Pretrained Networks based on Discriminative Self-supervised Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Self-supervised learning (SSL) has achieved promising downstream performance. However, when facing various resource budgets in real-world applications, it costs a huge computation burden to pretrain multiple networks of various sizes one by one. In this paper, we propose Discriminative-SSL-based Slimmable Pretrained Networks (DSPNet), which can be trained once and then slimmed to multiple sub-networks of various sizes, each of which faithfully learns good representation and can serve as good initialization for downstream tasks with various resource budgets. Specifically, we extend the idea of slimmable networks to a discriminative SSL paradigm, by integrating SSL and knowledge distillation gracefully. We show comparable or improved performance of DSPNet on ImageNet to the networks individually pretrained one by one under the linear evaluation and semi-supervised evaluation protocols, while reducing large training cost. The pretrained models also generalize well on downstream detection and segmentation tasks. Code will be made public.", "keywords": "Self-supervised Learning;Dynamic Neural Networks;Knowledge Distillation", "primary_area": "", "supplementary_material": "/attachment/ec462c35cecc9c24397af21848adbc47d02ef404.zip", "author": "Shaoru Wang;Zeming Li;Jin Gao;Weiming Hu", "authorids": "~Shaoru_Wang1;~Zeming_Li2;~Jin_Gao1;~Weiming_Hu1", "gender": "M;;M;M", "homepage": ";;https://people.ucas.edu.cn/~jgao?language=en;http://weiminghu.people-ai.net/", "dblp": "255/5225;;;", "google_scholar": "Vl6LhukAAAAJ;;W1o3B-0AAAAJ;", "orcid": ";;;0000-0001-9237-8825", "linkedin": ";;;", "or_profile": "~Shaoru_Wang1;~Zeming_Li2;~Jin_Gao1;~Weiming_Hu1", "aff": "Institute of Automation, Chinese Academy of Sciences;;Institute of automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science", "aff_domain": "ia.ac.cn;;ia.ac.cn;nlpr.ia.ac.cn", "position": "PhD student;;Associate Professor;Full Professor", "bibtex": "@misc{\nwang2023dspnet,\ntitle={{DSPN}et: Towards Slimmable Pretrained Networks based on Discriminative Self-supervised Learning},\nauthor={Shaoru Wang and Zeming Li and Jin Gao and Weiming Hu},\nyear={2023},\nurl={https://openreview.net/forum?id=6Ysgo5RXUvn}\n}", "github": "", "project": "", "reviewers": "P1zK;xwkC;NqU9;ssnC", "site": "https://openreview.net/forum?id=6Ysgo5RXUvn", "pdf_size": 634372, "recommendation": "3;3;5;6", "confidence": "4;4;5;4", "correctness": "2;3;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "20;92;70;62", "wc_strength_and_weaknesses": "300;178;267;216", "wc_clarity_quality_novelty_and_reproducibility": "14;8;25;33", "wc_summary_review": "18;57;44;41", "wc_review": "352;335;406;352", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 61.0, 26.095976701399778 ], "wc_strength_and_weaknesses_avg": [ 240.25, 46.76737644982878 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 20.0, 9.669539802906858 ], "wc_summary_review_avg": [ 40.0, 14.053469322555197 ], "wc_review_avg": [ 361.25, 26.75233634656981 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ohSXFnr3FPQJ:scholar.google.com/&scioq=DSPNet:+Towards+Slimmable+Pretrained+Networks+based+on+Discriminative+Self-supervised+Learning&hl=en&as_sdt=0,44", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Generalize Learned Heuristics to Solve Large-scale Vehicle Routing Problems in Real-time", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11865", "id": "6ZajpxqTlQ", "poster": "/media/PosterPDFs/ICLR%202023/11865.png?t=1680778047.1444209", "openreview": "https://openreview.net/forum?id=6ZajpxqTlQ", "slides": "https://iclr.cc/virtual/2023/poster/11865", "video": "https://iclr.cc/virtual/2023/poster/11865", "author_site": "Qingchun Hou, Jingwei Yang, Yiqiang Su, Xiaoqing Wang, Yuming Deng", "tldr": "Propose a zero-shot method to generalize the data-driven heuristics trained on small-scale VRPs to solve large-scale VRPs in real-time", "abstract": "Large-scale Vehicle Routing Problems (VRPs) are widely used in logistics, transportation, supply chain, and robotic systems. Recently, data-driven VRP heuristics are proposed to generate real-time VRP solutions with up to 100 nodes. Despite this progress, current heuristics for large-scale VRPs still face three major challenges: 1) Difficulty in generalizing the heuristics learned on small-scale VRPs to large-scale VRPs without retraining; 2) Challenge in generating real-time solutions for large-scale VRPs; 3) Difficulty in embedding global constraints into learned heuristics. We contribute in the three directions: We propose a Two-stage Divide Method (TAM) to generate sub-route sequence rather than node sequence for generalizing the heuristics learned on small-scale VRPs to solve large-scale VRPs in real-time. A two-step reinforcement learning method with new reward and padding techniques is proposed to train our TAM. A global mask function is proposed to keep the global constraints satisfied when dividing a large-scale VRP into several small-scale Traveling Salesman Problems (TSPs). As result, we can solve the small-scale TSPs in parallel quickly. The experiments on synthetic and real-world large-scale VRPs show our method could generalize the learned heuristics trained on datasets of VRP 100 to solve VRPs with over 5000 nodes in real-time while keeping the solution quality better than data-driven heuristics and competitive with traditional heuristics.", "keywords": "Learning;Vehicle Routing Problem;Large-scale Vehicle Routing Problem;Generalization;Combinatorial Optimization;Reinforcement Learning;Attention", "primary_area": "", "supplementary_material": "", "author": "Qingchun Hou;Jingwei Yang;Yiqiang Su;Xiaoqing Wang;Yuming Deng", "authorids": "~Qingchun_Hou1;~Jingwei_Yang2;yiqiang.syq@alibaba-inc.com;~Xiaoqing_Wang1;~Yuming_Deng1", "gender": ";M;;;M", "homepage": "https://person.zju.edu.cn/en/houqingchun;;;;", "dblp": "249/5756;;;;", "google_scholar": "MBVZbRMAAAAJ;ayX_-lwAAAAJ;;;", "orcid": "0000-0001-8334-9897;;;;", "linkedin": ";;;https://www.linkedin.cn/in/xiaoqing-wang-20b85b85;yuming-deng-a38780b", "or_profile": "~Qingchun_Hou1;~Jingwei_Yang2;yiqiang.syq@alibaba-inc.com;~Xiaoqing_Wang1;~Yuming_Deng1", "aff": "Alibaba Group;Alibaba Group;;Alibaba Group;", "aff_domain": "alibaba-inc.com;alibaba-inc.com;;alibaba-inc.com;", "position": "Algorithm Expert;Researcher;;Director;", "bibtex": "@inproceedings{\nhou2023generalize,\ntitle={Generalize Learned Heuristics to Solve Large-scale Vehicle Routing Problems in Real-time},\nauthor={Qingchun Hou and Jingwei Yang and Yiqiang Su and Xiaoqing Wang and Yuming Deng},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6ZajpxqTlQ}\n}", "github": "", "project": "", "reviewers": "7d5S;6VLJ;cdJ3;o9TS", "pdf_size": 31185039, "recommendation": "5;6;6;8", "confidence": "3;4;5;4", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "41;101;250;80", "wc_strength_and_weaknesses": "162;62;21;196", "wc_clarity_quality_novelty_and_reproducibility": "310;150;45;41", "wc_summary_review": "25;99;355;43", "wc_review": "538;412;671;360", "wc_reply_reviewers": "0;0;359;21", "wc_reply_authors": "3438;617;3592;507", "reply_reviewers": "0;0;5;1", "reply_authors": "6;1;7;2", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 118.0, 79.19280270327602 ], "wc_strength_and_weaknesses_avg": [ 110.25, 71.28244875142829 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 136.5, 109.28975249308601 ], "wc_summary_review_avg": [ 130.5, 132.45659666471883 ], "wc_review_avg": [ 495.25, 120.35234729742498 ], "wc_reply_reviewers_avg": [ 95.0, 152.6613900107031 ], "wc_reply_authors_avg": [ 2038.5, 1478.0153077691718 ], "reply_reviewers_avg": [ 1.5, 2.0615528128088303 ], "reply_authors_avg": [ 4.0, 2.5495097567963922 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.3244428422615251, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2538234724753917949&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=6ZajpxqTlQ", "email": "alibaba-inc.com;alibaba-inc.com;;alibaba-inc.com;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Alibaba Group", "aff_unique_dep": "", "aff_unique_url": "https://www.alibaba.com", "aff_unique_abbr": "Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "6aKcyoDJBaX", "title": "Federated Learning on Adaptively Weighted Nodes by Bilevel Optimization", "track": "main", "status": "Reject", "tldr": "We propose a federated learning method with adaptively weighted nodes and analyze its generalization performance.", "abstract": "We propose a federated learning method with weighted nodes in which the weights can be modified to optimize the model\u2019s performance on a separate validation set. The problem is formulated as a bilevel optimization problem where the inner problem is a federated learning problem with weighted nodes and the outer problem focuses on optimizing the weights based on the validation performance of the model returned from the inner problem. A communication-efficient federated optimization algorithm is designed to solve this bilevel optimization problem. We analyze the generalization performance of the output model and identify the scenarios when our method is in theory superior to training a model locally and superior to federated learning with static and evenly distributed weights. ", "keywords": "federated learning;bilevel optimization;distributed optimization;generalization performance", "primary_area": "", "supplementary_material": "/attachment/0e76b6bd8779f816e82b2a15a9338174de877972.zip", "author": "Yankun Huang;Qihang Lin;Nick Street;Stephen Baek", "authorids": "~Yankun_Huang1;~Qihang_Lin1;~Nick_Street1;~Stephen_Baek1", "gender": ";;M;", "homepage": ";https://tippie.uiowa.edu/people/qihang-lin;;http://www.stephenbaek.com", "dblp": ";02/8146;;", "google_scholar": ";sPtFRB8AAAAJ;-kKtBb8AAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yankun_Huang1;~Qihang_Lin1;~Nick_Street1;~Stephen_Baek1", "aff": ";University of Iowa;University of Iowa;University of Virginia, Charlottesville", "aff_domain": ";uiowa.edu;uiowa.edu;virginia.edu", "position": ";Associate Professor;Full Professor;Associate Professor", "bibtex": "@misc{\nhuang2023federated,\ntitle={Federated Learning on Adaptively Weighted Nodes by Bilevel Optimization},\nauthor={Yankun Huang and Qihang Lin and Nick Street and Stephen Baek},\nyear={2023},\nurl={https://openreview.net/forum?id=6aKcyoDJBaX}\n}", "github": "", "project": "", "reviewers": "F6ZM;gCZG;9pHr;Caqk", "site": "https://openreview.net/forum?id=6aKcyoDJBaX", "pdf_size": 4216826, "recommendation": "3;3;6;6", "confidence": "4;4;3;3", "correctness": "3;4;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "62;34;60;169", "wc_strength_and_weaknesses": "262;140;149;471", "wc_clarity_quality_novelty_and_reproducibility": "64;8;14;12", "wc_summary_review": "32;9;32;57", "wc_review": "420;191;255;709", "wc_reply_reviewers": "0;0;0;21", "wc_reply_authors": "806;653;700;600", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 81.25, 51.85255538543882 ], "wc_strength_and_weaknesses_avg": [ 255.5, 133.38384459896184 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 24.5, 22.907422377910613 ], "wc_summary_review_avg": [ 32.5, 16.977926846349646 ], "wc_review_avg": [ 393.75, 200.26903779665992 ], "wc_reply_reviewers_avg": [ 5.25, 9.093266739736606 ], "wc_reply_authors_avg": [ 689.75, 75.86954263734559 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": -0.5773502691896258, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3810276202724558904&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Iowa;University of Virginia", "aff_unique_dep": ";", "aff_unique_url": "https://www.uiowa.edu;https://www.virginia.edu", "aff_unique_abbr": "UIowa;UVA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Charlottesville", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "6apN9AQ-3fN", "title": "Distance VS. Coordinate: Distance Based Embedding Improves Model Generalization for Routing Problems", "track": "main", "status": "Reject", "tldr": "Distance based embedding is a better choice for routing problems, compared to coordinate based embedding.", "abstract": "Routing problems, such as traveling salesman problem (TSP) and vehicle routing problem, are among the most classic research topics in combinatorial optimization and operations research (OR). In recent years, with the rapid development of online service platforms, there has been renewed interest in applying this study to facilitate emerging industrial applications, such as food delivery and logistics services. While OR methods remain the mainstream technique, increasing efforts have been put into exploiting deep learning (DL) models for tackling routing problems. The existing ML methods often consider the embedding of the route point coordinate as a key model input and are capable of delivering competing performance in synthetic or simplified settings. However, it is empirically noted that this line of work appears to lack robustness and generalization ability that are crucial for real-world applications. In this paper, we demonstrate that the coordinate can unexpectedly lead to these problems. There are two factors that make coordinate rather `poisonous' for DL models: i) the definition of distance between route points is far more complex than what coordinate can depict; ii) the coordinate can hardly be sufficiently `traversed' by the training data. To circumvent these limitations, we propose to abandon the coordinate and instead use the relative distance for route point embedding. We show in both synthetic TSP and real-world food pickup and delivery route prediction problem that our design can significantly improve model's generalization ability, and deliver competitive or better performance with existing models. ", "keywords": "routing problems;travelling salesman problem;combinatorial optimization;pickup and delivery;embedding", "primary_area": "", "supplementary_material": "", "author": "Hongsen Liao;Ruiyuan Wu;Yuyang Han;Yuncong Hu;Ke Xing;Jinghua Hao;Renqing He", "authorids": "liaohongsen@meituan.com;wuruiyuan@meituan.com;hanyuyang02@meituan.com;huyuncong03@meituan.com;xingke@meituan.com;haojinghua@meituan.com;herenqing@meituan.com", "gender": ";;;;;;", "homepage": ";;;;;;", "dblp": ";;;;;;", "google_scholar": ";;;;;;", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": ";;;;;;", "aff": ";;;;;;", "aff_domain": ";;;;;;", "position": ";;;;;;", "bibtex": "@misc{\nliao2023distance,\ntitle={Distance {VS}. Coordinate: Distance Based Embedding Improves Model Generalization for Routing Problems},\nauthor={Hongsen Liao and Ruiyuan Wu and Yuyang Han and Yuncong Hu and Ke Xing and Jinghua Hao and Renqing He},\nyear={2023},\nurl={https://openreview.net/forum?id=6apN9AQ-3fN}\n}", "github": "", "project": "", "reviewers": "jQZV;2rgX;F45x;CEky", "site": "https://openreview.net/forum?id=6apN9AQ-3fN", "pdf_size": 615053, "recommendation": "3;3;3;5", "confidence": "4;5;4;3", "correctness": "3;4;2;2", "technical_novelty": "1;2;2;3", "empirical_novelty": "0;2;2;3", "wc_summary_paper": "105;39;64;119", "wc_strength_and_weaknesses": "51;119;440;95", "wc_clarity_quality_novelty_and_reproducibility": "175;37;202;45", "wc_summary_review": "52;80;61;29", "wc_review": "383;275;767;288", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 81.75, 31.901214710415026 ], "wc_strength_and_weaknesses_avg": [ 176.25, 154.21636586303023 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 114.75, 74.41899959015842 ], "wc_summary_review_avg": [ 55.5, 18.33712082089225 ], "wc_review_avg": [ 428.25, 199.97171675014445 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": -0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9252816129354772651&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 0 }, { "title": "Compositional Task Representations for Large Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10832", "id": "6axIMJA7ME3", "poster": "/media/PosterPDFs/ICLR%202023/10832.png?t=1677614334.6011186", "openreview": "https://openreview.net/forum?id=6axIMJA7ME3", "slides": "https://iclr.cc/virtual/2023/poster/10832", "video": "https://iclr.cc/virtual/2023/poster/10832", "author_site": "NAN SHAO, Zefan Cai, Hanwei Xu, Chonghua Liao, Yanan Zheng, Zhilin Yang", "tldr": "", "abstract": "Large language models have shown a remarkable cross-task generalization ability. Most prior work assumed that prompts effectively extract knowledge from language models to facilitate generalization to new tasks. This perspective led to numerous studies on improving prompts. In contrast, we introduce a new perspective, compositional generalization, that views each task as a composition of latent codes and generalizes to test tasks by a new composition of seen codes. To this end, we propose a novel prompt-free approach, Compositional Task Representations (CTR), that employs multi-task training to learn a discrete, compositional codebook. Empirically, our CTR substantially outperforms prompt-based methods in zero-label learning on average. According to our analysis, some of the learned CTR codes are interpretable to human and demonstrate a certain degree of controllability.\n", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "NAN SHAO;Zefan Cai;Hanwei xu;Chonghua Liao;Yanan Zheng;Zhilin Yang", "authorids": "~NAN_SHAO2;~Zefan_Cai1;xuhanwei@rcrai.com;~Chonghua_Liao1;~Yanan_Zheng1;~Zhilin_Yang2", "gender": "M;;;M;F;", "homepage": ";;;;https://zheng-yanan.githun.io;", "dblp": "00/8841;;;304/4739.html;93/7107;", "google_scholar": "https://scholar.google.com.hk/citations?user=CjbZcTsAAAAJ;;;;0DqJ8eIAAAAJ;", "orcid": "0000-0003-4483-1693;;;;;", "linkedin": ";;;;;", "or_profile": "~NAN_SHAO2;~Zefan_Cai1;xuhanwei@rcrai.com;~Chonghua_Liao1;~Yanan_Zheng1;~Zhilin_Yang2", "aff": "Recurrent AI;;;Tsinghua University;Moonshot AI;", "aff_domain": "rcrai.com;;;tsinghua.edu.cn;moonshot.cn;", "position": "Engineer;;;PhD student;Researcher;", "bibtex": "@inproceedings{\nshao2023compositional,\ntitle={Compositional Task Representations for Large Language Models},\nauthor={NAN SHAO and Zefan Cai and Hanwei xu and Chonghua Liao and Yanan Zheng and Zhilin Yang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6axIMJA7ME3}\n}", "github": "", "project": "", "reviewers": "zu2t;FsjZ;BP2L;xT8s", "pdf_size": 829228, "recommendation": "6;6;6;8", "confidence": "5;3;4;3", "correctness": "3;3;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "197;102;169;70", "wc_strength_and_weaknesses": "389;53;402;27", "wc_clarity_quality_novelty_and_reproducibility": "52;224;25;61", "wc_summary_review": "76;60;35;14", "wc_review": "714;439;631;172", "wc_reply_reviewers": "246;194;175;0", "wc_reply_authors": "592;1004;847;126", "reply_reviewers": "2;2;1;0", "reply_authors": "3;3;2;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 134.5, 50.77647092896473 ], "wc_strength_and_weaknesses_avg": [ 217.75, 178.04686882953038 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 90.5, 78.20645753388911 ], "wc_summary_review_avg": [ 46.25, 23.668280461410795 ], "wc_review_avg": [ 489.0, 208.4334426141832 ], "wc_reply_reviewers_avg": [ 153.75, 92.49425657844924 ], "wc_reply_authors_avg": [ 642.25, 332.34949601285695 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5222329678670935, "corr_recommendation_correctness": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16789793034502347690&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=6axIMJA7ME3", "email": "rcrai.com;;;tsinghua.edu.cn;moonshot.cn;", "author_num": 6, "aff_unique_index": "0;1;2", "aff_unique_norm": "Recurrent AI;Tsinghua University;Moonshot AI", "aff_unique_dep": ";;", "aff_unique_url": "https://www.recurrent.ai;https://www.tsinghua.edu.cn;https://moonshot.ai", "aff_unique_abbr": "Recurrent AI;THU;Moonshot AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "id": "6bRKHpeZi7", "title": "Visual Expertise and the Log-Polar Transform Explain Image Inversion Effects", "track": "main", "status": "Reject", "tldr": "", "abstract": "Visual expertise can be defined as the ability to discriminate among subordinate-level objects in homogeneous classes, such as identities of faces within the class \"face\". Despite being able to discriminate many faces, subjects perform poorly at recognizing even familiar faces once inverted. This face-inversion effect is in contrast to subjects\u2019 performance identifying inverted objects for which their experience is at a basic level, which results in less impairment. Experimental results have suggested that when identifying mono-oriented objects, such as cars, car novices' performance is between that of faces and other objects. We build an anatomically-inspired neurocomputational model to explore this effect. Our model includes a foveated retina and the log-polar mapping from the visual field to V1. This transformation causes changes in scale to appear as horizontal translations, leading to scale equivariance. Rotation is similarly equivariant, leading to vertical translations. When fed into a standard convolutional network, this provides rotation and scale invariance. It may be surprising that a rotation-invariant network shows any inversion effect at all. This is because there is a crucial topological difference between scale and rotation: Rotational invariance is discontinuous, with V1 ranging from 90 degrees (vertically up) to 270 degrees (vertically down). Hence when a face is inverted, the configural information in the face is disrupted while feature information is relatively unaffected. We show that the inversion effect arises as a result of visual expertise, where configural information becomes relevant as more identities are learned at the subordinate level. Our model matches the classic result: faces suffer more from inversion than mono-oriented objects, which are more disrupted than non-mono-oriented objects when objects are only familiar at a basic level.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Martha Gahl;Shubham Kulkarni;Nikhil Pathak;Alex Russell;Garrison W. Cottrell", "authorids": "~Martha_Gahl1;~Shubham_Kulkarni1;~Nikhil_Pathak1;~Alex_Russell1;~Garrison_W._Cottrell1", "gender": "F;M;;M;M", "homepage": ";;;;https://cseweb.ucsd.edu/~gary/", "dblp": "284/4648;;;;c/GWCottrell", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;5Aut7EEAAAAJ", "orcid": ";;;;0000-0001-7538-1715", "linkedin": "martha-gahl-588139105/;shubhamkulk/;nikhilpathak5/;alex-russell-b93418235;", "or_profile": "~Martha_Gahl1;~Shubham_Kulkarni1;~Nikhil_Pathak1;~Alex_Russell1;~Garrison_Cottrell2", "aff": "University of California, San Diego, University of California, San Diego;University of California, San Diego;;University of California, San Diego;University of California, San Diego", "aff_domain": "eng.ucsd.edu;ucsd.edu;;ucsd.edu;ucsd.edu", "position": "PhD student;MS student;;Undergrad student;Emeritus", "bibtex": "@misc{\ngahl2023visual,\ntitle={Visual Expertise and the Log-Polar Transform Explain Image Inversion Effects},\nauthor={Martha Gahl and Shubham Kulkarni and Nikhil Pathak and Alex Russell and Garrison W. Cottrell},\nyear={2023},\nurl={https://openreview.net/forum?id=6bRKHpeZi7}\n}", "github": "", "project": "", "reviewers": "UAVL;8Y2A;yrmA;rdQQ", "site": "https://openreview.net/forum?id=6bRKHpeZi7", "pdf_size": 654340, "recommendation": "3;5;5;5", "confidence": "4;5;4;5", "correctness": "3;3;3;2", "technical_novelty": "3;3;2;2", "empirical_novelty": "4;3;2;3", "wc_summary_paper": "65;63;211;66", "wc_strength_and_weaknesses": "434;43;393;80", "wc_clarity_quality_novelty_and_reproducibility": "64;4;256;69", "wc_summary_review": "24;4;123;328", "wc_review": "587;114;983;543", "wc_reply_reviewers": "0;0;134;0", "wc_reply_authors": "0;0;232;0", "reply_reviewers": "0;0;1;0", "reply_authors": "0;0;1;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 101.25, 63.3733974156349 ], "wc_strength_and_weaknesses_avg": [ 237.5, 177.0797842781609 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 98.25, 94.60014534872555 ], "wc_summary_review_avg": [ 119.75, 128.39854944663512 ], "wc_review_avg": [ 556.75, 307.7420795081492 ], "wc_reply_reviewers_avg": [ 33.5, 58.023702053557386 ], "wc_reply_authors_avg": [ 58.0, 100.45894683899488 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 0.25, 0.4330127018922193 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7592372523517567557&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "6dZqGFB8g-O", "title": "STay-On-the-Ridge (STON'R): Guaranteed Convergence to Local Minimax Equilibrium in Nonconvex-Nonconcave Games", "track": "main", "status": "Reject", "tldr": "", "abstract": "Min-max optimization problems involving nonconvex-nonconcave objectives have found important applications in adversarial training and other multi-agent learning settings. Yet, no known gradient descent-based method is guaranteed to converge to (even local notions of) min-max equilibrium in the nonconvex-nonconcave setting. For all known methods, there exist relatively simple objectives for which they cycle or exhibit other undesirable behavior different from converging to a point, let alone to some game-theoretically meaningful one [Flokas et al. '19, Hsieh et al. '21]. The only known convergence guarantees hold under the strong assumption that the initialization is very close to a local min-max equilibrium [Wang et al. '19]. Moreover, the afore-described challenges are not just theoretical curiosities. All known methods are unstable in practice, even in simple settings.\n \nWe propose the first method that is guaranteed to converge to a local min-max equilibrium for smooth nonconvex-nonconcave objectives. Our method is second-order and provably escapes limit cycles as long as it is initialized at an easy-to-find initial point. Both the definition of our method and its convergence analysis are motivated by the topological nature of the problem. In particular, our method is not designed to decrease some potential function, such as the distance of its iterate from the set of local min-max equilibria or the projected gradient of the objective, but is designed to satisfy a topological property that guarantees the avoidance of cycles and implies its convergence. ", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/c0429aae5dc043dfc046e60fb00dfd63e34fd521.zip", "author": "Constantinos Costis Daskalakis;Noah Golowich;EFSTRATIOS PANTELEIMON SKOULAKIS;Emmanouil Zampetakis", "authorids": "~Constantinos_Costis_Daskalakis1;~Noah_Golowich1;~EFSTRATIOS_PANTELEIMON_SKOULAKIS1;~Emmanouil_Zampetakis1", "gender": "M;;M;M", "homepage": "http://people.csail.mit.edu/costis/;https://noahgol.github.io;https://mzampet.com/;http://www.corelab.ntua.gr/~sskoul/", "dblp": ";150/1861;;183/0979.html", "google_scholar": "iTv2cOgAAAAJ;roUlyWcAAAAJ;;Juo2Tk8AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Constantinos_Costis_Daskalakis1;~Noah_Golowich1;~Emmanouil_Zampetakis1;~Stratis_Skoulakis2", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Yale University;EPFL - EPF Lausanne", "aff_domain": "mit.edu;mit.edu;yale.edu;epfl.ch", "position": "Full Professor;PhD student;Assistant Professor;Postdoc", "bibtex": "@misc{\ndaskalakis2023stayontheridge,\ntitle={{ST}ay-On-the-Ridge ({STON}'R): Guaranteed Convergence to Local Minimax Equilibrium in Nonconvex-Nonconcave Games},\nauthor={Constantinos Costis Daskalakis and Noah Golowich and EFSTRATIOS PANTELEIMON SKOULAKIS and Emmanouil Zampetakis},\nyear={2023},\nurl={https://openreview.net/forum?id=6dZqGFB8g-O}\n}", "github": "", "project": "", "reviewers": "5vqs;PZ8h;Ej7m", "site": "https://openreview.net/forum?id=6dZqGFB8g-O", "pdf_size": 425261, "recommendation": "5;5;5", "confidence": "4;3;3", "correctness": "2;3;4", "technical_novelty": "3;3;4", "empirical_novelty": "0;3;3", "wc_summary_paper": "61;23;73", "wc_strength_and_weaknesses": "544;41;394", "wc_clarity_quality_novelty_and_reproducibility": "104;306;8", "wc_summary_review": "50;63;60", "wc_review": "759;433;535", "wc_reply_reviewers": "0;10;0", "wc_reply_authors": "1792;282;426", "reply_reviewers": "0;1;0", "reply_authors": "3;1;1", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 52.333333333333336, 21.31248981752771 ], "wc_strength_and_weaknesses_avg": [ 326.3333333333333, 210.84960411524503 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 139.33333333333334, 124.19697616644653 ], "wc_summary_review_avg": [ 57.666666666666664, 5.557777333511022 ], "wc_review_avg": [ 575.6666666666666, 136.16003655829252 ], "wc_reply_reviewers_avg": [ 3.3333333333333335, 4.714045207910316 ], "wc_reply_authors_avg": [ 833.3333333333334, 680.4240507866318 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7ds0PaPk65UJ:scholar.google.com/&scioq=STay-On-the-Ridge+(STON%27R):+Guaranteed+Convergence+to+Local+Minimax+Equilibrium+in+Nonconvex-Nonconcave+Games&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Massachusetts Institute of Technology;Yale University;EPFL", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.yale.edu;https://www.epfl.ch", "aff_unique_abbr": "MIT;Yale;EPFL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;Switzerland" }, { "id": "6dlC7E1H_9", "title": "Teaching Algorithmic Reasoning via In-context Learning", "track": "main", "status": "Reject", "tldr": "We study how to teach algorithmic reasoning to LLMs via in-context learning. We show that algorithmic reasoning can be taught by increasing specificity in the way we explain the steps of an algorithm along with running examples.", "abstract": "Large language models (LLMs) have shown increasing in-context learning capabilities through scaling up model and data size. Despite this progress, LLMs are still unable to solve algorithmic reasoning problems. While providing a rationale with the final answer has led to further improvements in multi-step reasoning problems, Anil et al. 2022 showed that even simple algorithmic reasoning tasks such as parity are far from solved. In this work, we identify and study four key stages for successfully teaching algorithmic reasoning to LLMs: (1) formulating algorithms as skills, (2) teaching multiple skills simultaneously (skill accumulation), (3) teaching how to combine skills (skill composition) and (4) teaching how to use skills as tools. We show that it is possible to teach algorithmic reasoning to LLMs via in-context learning, which we refer to as Algorithmic Prompting. We evaluate our approach on a variety of arithmetic and quantitative reasoning tasks, and demonstrate significant boosts in performance over existing prompting techniques. In particular, for long parity, addition, multiplication and subtraction and parity tasks, we achieve an error reduction of approximately 10x, 9x, 5x and 2x respectively compared to the best available baselines. ", "keywords": "in-context learning;algorithmic reasoning;LLMs;prompting", "primary_area": "", "supplementary_material": "", "author": "Hattie Zhou;Azade Nova;Aaron Courville;Hugo Larochelle;Behnam Neyshabur;Hanie Sedghi", "authorids": "~Hattie_Zhou1;~Azade_Nova1;~Aaron_Courville3;~Hugo_Larochelle1;~Behnam_Neyshabur1;~Hanie_Sedghi1", "gender": "F;;M;M;F;F", "homepage": "http://hattiezhou.com;;https://mila.quebec/en/directory/hugo-larochelle;https://www.neyshabur.net;https://haniesedghi.com/;https://sites.google.com/site/azadenazi/", "dblp": ";56/1688;86/3862.html;131/9898;66/8332;99/7868.html", "google_scholar": ";https://scholar.google.ca/citations?user=km6CP8cAAAAJ;https://scholar.google.ca/citations?user=U89FHq4AAAAJ;e1ucbCYAAAAJ;_9GX96fDWAMC;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;", "linkedin": ";;;;hanie-sedghi-71bb2582;", "or_profile": "~Hattie_Zhou1;~Aaron_Courville3;~Hugo_Larochelle1;~Behnam_Neyshabur1;~Hanie_Sedghi1;~Azade_Nazi1", "aff": "University of Montreal;Universit\u00e9 de Montr\u00e9al;Google;Google;Google Research, Brain team;", "aff_domain": "umontreal.ca; ;google.com;google.com;google.com;", "position": "PhD student;Assistant Professor;Research Scientist;Research Scientist;Senior Research Scientist;", "bibtex": "@misc{\nzhou2023teaching,\ntitle={Teaching Algorithmic Reasoning via In-context Learning},\nauthor={Hattie Zhou and Azade Nova and Aaron Courville and Hugo Larochelle and Behnam Neyshabur and Hanie Sedghi},\nyear={2023},\nurl={https://openreview.net/forum?id=6dlC7E1H_9}\n}", "github": "", "project": "", "reviewers": "Ke6m;9LM1;UYXX", "site": "https://openreview.net/forum?id=6dlC7E1H_9", "pdf_size": 2296454, "recommendation": "5;5;8", "confidence": "4;4;3", "correctness": "4;4;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;3;3", "wc_summary_paper": "105;63;98", "wc_strength_and_weaknesses": "353;364;168", "wc_clarity_quality_novelty_and_reproducibility": "44;180;228", "wc_summary_review": "35;60;76", "wc_review": "537;667;570", "wc_reply_reviewers": "0;494;93", "wc_reply_authors": "1133;2523;1222", "reply_reviewers": "0;3;2", "reply_authors": "2;4;3", "recommendation_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 88.66666666666667, 18.372685039360892 ], "wc_strength_and_weaknesses_avg": [ 295.0, 89.91477446263582 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 150.66666666666666, 77.92874237974644 ], "wc_summary_review_avg": [ 57.0, 16.87206764645835 ], "wc_review_avg": [ 591.3333333333334, 55.174470747096635 ], "wc_reply_reviewers_avg": [ 195.66666666666666, 214.3429235801567 ], "wc_reply_authors_avg": [ 1626.0, 635.3146202210891 ], "reply_reviewers_avg": [ 1.6666666666666667, 1.247219128924647 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": -1.0, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15469085261765926014&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "University of Montreal;Universit\u00e9 de Montr\u00e9al;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://wwwumontreal.ca;https://www.umontreal.ca;https://www.google.com", "aff_unique_abbr": "UM;UdeM;Google", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;1;1", "aff_country_unique": "Canada;United States" }, { "title": "Excess Risk of Two-Layer ReLU Neural Networks in Teacher-Student Settings and its Superiority to Kernel Methods", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10813", "id": "6doXHqwMayf", "poster": "", "openreview": "https://openreview.net/forum?id=6doXHqwMayf", "slides": "https://iclr.cc/virtual/2023/poster/10813", "video": "https://iclr.cc/virtual/2023/poster/10813", "author_site": "Akiyama Shunta, Taiji Suzuki", "tldr": "", "abstract": "While deep learning has outperformed other methods for various tasks, theoretical frameworks that explain its reason have not been fully established. We investigate the excess risk of two-layer ReLU neural networks in a teacher-student regression model, in which a student network learns an unknown teacher network through its outputs. Especially, we consider the student network that has the same width as the teacher network and is trained in two phases: first by noisy gradient descent and then by the vanilla gradient descent. Our result shows that the student network provably reaches a near-global optimal solution and outperforms any kernel methods estimator (more generally, linear estimators), including neural tangent kernel approach, random feature model, and other kernel methods, in a sense of the minimax optimal rate. The key concept inducing this superiority is the non-convexity of the neural network models. Even though the loss landscape is highly non-convex, the student network adaptively learns the teacher neurons.", "keywords": "Deep learning theory;optimization;learning theory;excess risk", "primary_area": "", "supplementary_material": "/attachment/461a9b71a1812b124bfc058149e0775fd95641ef.zip", "author": "Shunta Akiyama;Taiji Suzuki", "authorids": "~Shunta_Akiyama1;~Taiji_Suzuki1", "gender": "M;M", "homepage": "https://shuntaak.github.io/;http://ibis.t.u-tokyo.ac.jp/suzuki/", "dblp": "280/3821;08/312", "google_scholar": "https://scholar.google.co.jp/citations?user=RlTfkjQAAAAJ;x8osrBsAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Shunta_Akiyama1;~Taiji_Suzuki1", "aff": "The University of Tokyo;The University of Tokyo", "aff_domain": "u-tokyo.ac.jp;tokyo.ac.jp", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nakiyama2023excess,\ntitle={Excess Risk of Two-Layer Re{LU} Neural Networks in Teacher-Student Settings and its Superiority to Kernel Methods},\nauthor={Shunta Akiyama and Taiji Suzuki},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6doXHqwMayf}\n}", "github": "", "project": "", "reviewers": "Xjf3;xk6b;51MF;vbHH;VLSM", "pdf_size": 526742, "recommendation": "5;6;8;8;8", "confidence": "3;4;3;3;4", "correctness": "3;2;4;4;4", "technical_novelty": "3;3;3;3;3", "empirical_novelty": "0;0;3;3;0", "wc_summary_paper": "50;70;70;46;69", "wc_strength_and_weaknesses": "153;453;242;100;240", "wc_clarity_quality_novelty_and_reproducibility": "18;49;31;17;29", "wc_summary_review": "92;85;69;46;47", "wc_review": "313;657;412;209;385", "wc_reply_reviewers": "0;598;0;0;0", "wc_reply_authors": "506;2386;404;450;184", "reply_reviewers": "0;3;0;0;0", "reply_authors": "1;4;1;1;1", "recommendation_avg": [ 7.0, 1.2649110640673518 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "correctness_avg": [ 3.4, 0.8 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 1.2, 1.4696938456699067 ], "wc_summary_paper_avg": [ 61.0, 10.69579356569675 ], "wc_strength_and_weaknesses_avg": [ 237.6, 120.42690729234891 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.8, 11.565465835840769 ], "wc_summary_review_avg": [ 67.8, 18.925115587493778 ], "wc_review_avg": [ 395.2, 148.5616370399842 ], "wc_reply_reviewers_avg": [ 119.6, 239.2 ], "wc_reply_authors_avg": [ 786.0, 807.418602708657 ], "reply_reviewers_avg": [ 0.6, 1.2000000000000002 ], "reply_authors_avg": [ 1.6, 1.2000000000000002 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.7905694150420948, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2461237120298070066&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=6doXHqwMayf", "email": "u-tokyo.ac.jp;tokyo.ac.jp", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "id": "6dtI3qPVwVp", "title": "Link Prediction without Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "We address key limitations of GNN-based link prediction methods in handling class imbalance and, moreover, present a simpler, more accurate, and more efficient alternative. ", "abstract": "Link prediction, which consists of predicting edges based on graph features, is a fundamental task in many graph applications. As for several related problems, Graph Neural Networks (GNNs), which are based on an attribute-centric message-passing paradigm, have become the predominant framework for link prediction. GNNs have consistently outperformed traditional topology-based heuristics, but what contributes to their performance? Are there simpler approaches that achieve comparable or better results? To answer these questions, we first identify important limitations in how GNN-based link prediction methods handle the intrinsic class imbalance of the problem---due to the graph sparsity---in their training and evaluation. Moreover, we propose Gelato, a novel topology-centric framework that applies a topological heuristic to a graph enhanced by attribute information via graph learning. Our model is trained end-to-end with an N-pair loss on an unbiased training set to address class imbalance. Experiments show that Gelato is 145% more accurate, trains 11 times faster, infers 6,000 times faster, and has less than half of the trainable parameters compared to state-of-the-art GNNs for link prediction.", "keywords": "Link Prediction;Graph Neural Networks;Graph Learning;Topological Heuristics", "primary_area": "", "supplementary_material": "", "author": "Zexi Huang;Mert Kosan;Arlei Lopes da Silva;Ambuj Singh", "authorids": "~Zexi_Huang1;~Mert_Kosan1;~Arlei_Lopes_da_Silva1;~Ambuj_Singh1", "gender": "M;M;M;", "homepage": "https://zexihuang.com/;https://www.mertkosan.com;https://cs.rice.edu/~al110/index.html;", "dblp": "299/4829;304/8019;19/2546;", "google_scholar": "56TmFA4AAAAJ;12lDpTAAAAAJ;atGtis4AAAAJ;", "orcid": "0000-0002-1480-4494;0000-0002-8092-5024;0000-0003-1792-0076;", "linkedin": "zexihuang/;https://linkedin.com/in/mertkosan;;", "or_profile": "~Zexi_Huang1;~Mert_Kosan1;~Arlei_Lopes_da_Silva1;~Ambuj_Singh1", "aff": "UC Santa Barbara;University of California, Santa Barbara;Rice University;", "aff_domain": "ucsb.edu;ucsb.edu;rice.edu;", "position": "PhD student;PhD student;Assistant Professor;", "bibtex": "@misc{\nhuang2023link,\ntitle={Link Prediction without Graph Neural Networks},\nauthor={Zexi Huang and Mert Kosan and Arlei Lopes da Silva and Ambuj Singh},\nyear={2023},\nurl={https://openreview.net/forum?id=6dtI3qPVwVp}\n}", "github": "", "project": "", "reviewers": "maA8;L8KX;p64G;SSk7", "site": "https://openreview.net/forum?id=6dtI3qPVwVp", "pdf_size": 14523542, "recommendation": "1;3;3;6", "confidence": "5;4;3;5", "correctness": "3;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "3;3;2;2", "wc_summary_paper": "46;70;51;70", "wc_strength_and_weaknesses": "104;303;201;118", "wc_clarity_quality_novelty_and_reproducibility": "30;34;34;18", "wc_summary_review": "18;42;36;9", "wc_review": "198;449;322;215", "wc_reply_reviewers": "55;0;0;0", "wc_reply_authors": "825;597;766;255", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 3.25, 1.7853571071357126 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 59.25, 10.894379284750462 ], "wc_strength_and_weaknesses_avg": [ 181.5, 79.34261150226907 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.0, 6.557438524302 ], "wc_summary_review_avg": [ 26.25, 13.311179511974137 ], "wc_review_avg": [ 296.0, 100.31201323869439 ], "wc_reply_reviewers_avg": [ 13.75, 23.81569860407206 ], "wc_reply_authors_avg": [ 610.75, 221.78410109834294 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.1266600992762247, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=593873435520105695&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of California, Santa Barbara;Rice University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsb.edu;https://www.rice.edu", "aff_unique_abbr": "UCSB;Rice", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Santa Barbara;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "6f47WT-HtuH", "title": "Unfair geometries: exactly solvable data model with fairness implications", "track": "main", "status": "Reject", "tldr": "We propose a generative model, exactly solvable using statistical physics, which emphasize the impact of data geometry in inducing bias in classification.", "abstract": "Machine learning (ML) may be oblivious to human bias but it is not immune to its perpetuation. Marginalisation and iniquitous group representation are often traceable in the very data used for training, and may be reflected or even enhanced by the learning models.\nIn the present work, we aim at clarifying the role played by data geometry in the emergence of ML bias. We introduce an exactly solvable high-dimensional model of data imbalance, where parametric control over the many bias-inducing factors allows for an extensive exploration of the bias inheritance mechanism.Through the tools of statistical physics, we analytically characterise the typical properties of learning models trained in this synthetic framework and obtain exact predictions for the observables that are commonly employed for fairness assessment.\nDespite the simplicity of the data model, we retrace and unpack typical unfairness behaviour observed on real-world datasets. \nWe also obtain a detailed analytical characterisation of a class of bias mitigation strategies. We first consider a basic loss-reweighing scheme, which allows for an implicit minimisation of different unfairness metrics, and quantify the incompatibilities between some existing fairness criteria. Then, we consider a novel mitigation strategy based on a matched inference approach, consisting in the introduction of coupled learning models. Our theoretical analysis of this approach shows that the coupled strategy can strike superior fairness-accuracy trade-offs.", "keywords": "statistical physics;statistical mechanics of learning;generalization model;modelling structured data;data imbalance;bias;fairness;bias mitigation", "primary_area": "", "supplementary_material": "/attachment/dc3150f1fcced88c76dbb179875ac289016c6305.zip", "author": "Stefano Sarao Mannelli;Federica Gerace;Negar Rostamzadeh;Luca Saglietti", "authorids": "~Stefano_Sarao_Mannelli1;~Federica_Gerace1;~Negar_Rostamzadeh1;~Luca_Saglietti1", "gender": "M;F;F;M", "homepage": "https://stefsmlab.github.io/;;;", "dblp": "232/3343;;126/0982;180/5743", "google_scholar": "https://scholar.google.it/citations?user=Kq272_MAAAAJ;dvDLaPkAAAAJ;https://scholar.google.ca/citations?user=t5ak3j0AAAAJ;klxwxyUAAAAJ", "orcid": ";;;", "linkedin": ";;;luca-saglietti-325208169/", "or_profile": "~Stefano_Sarao_Mannelli1;~Federica_Gerace1;~Negar_Rostamzadeh1;~Luca_Saglietti1", "aff": "University College London;International Higher School for Advanced Studies Trieste;Google;Bocconi University", "aff_domain": "ucl.ac.uk;sissa.it;google.com;unibocconi.it", "position": "Postdoc;Postdoc;Research Scientist;Assistant Professor", "bibtex": "@misc{\nmannelli2023unfair,\ntitle={Unfair geometries: exactly solvable data model with fairness implications},\nauthor={Stefano Sarao Mannelli and Federica Gerace and Negar Rostamzadeh and Luca Saglietti},\nyear={2023},\nurl={https://openreview.net/forum?id=6f47WT-HtuH}\n}", "github": "", "project": "", "reviewers": "jtpj;9w79;NozC;CwVZ", "site": "https://openreview.net/forum?id=6f47WT-HtuH", "pdf_size": 2912722, "recommendation": "3;3;5;5", "confidence": "3;3;2;3", "correctness": "3;2;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "96;47;71;81", "wc_strength_and_weaknesses": "234;481;149;204", "wc_clarity_quality_novelty_and_reproducibility": "108;68;37;21", "wc_summary_review": "17;60;79;64", "wc_review": "455;656;336;370", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 73.75, 17.82379028153103 ], "wc_strength_and_weaknesses_avg": [ 267.0, 127.2576127388849 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 58.5, 33.20015060206806 ], "wc_summary_review_avg": [ 55.0, 23.054283766797006 ], "wc_review_avg": [ 454.25, 124.28269187622224 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.7071067811865475, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11049095007331810290&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University College London;International Higher School for Advanced Studies;Google;Bocconi University", "aff_unique_dep": ";;Google;", "aff_unique_url": "https://www.ucl.ac.uk;https://www.sissa.it;https://www.google.com;https://www.bocconi.edu", "aff_unique_abbr": "UCL;SISSA;Google;Bocconi", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Trieste;Mountain View", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "United Kingdom;Italy;United States" }, { "title": "Multifactor Sequential Disentanglement via Structured Koopman Autoencoders", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11931", "id": "6fuPIe9tbnC", "poster": "", "openreview": "https://openreview.net/forum?id=6fuPIe9tbnC", "slides": "https://iclr.cc/virtual/2023/poster/11931", "video": "https://iclr.cc/virtual/2023/poster/11931", "author_site": "Nimrod Berman, Ilan Naiman, Omri Azencot", "tldr": "A new method for learning multifactor disentangled representations of sequential data", "abstract": "Disentangling complex data to its latent factors of variation is a fundamental task in representation learning. Existing work on sequential disentanglement mostly provides two factor representations, i.e., it separates the data to time-varying and time-invariant factors. In contrast, we consider multifactor disentanglement in which multiple (more than two) semantic disentangled components are generated. Key to our approach is a strong inductive bias where we assume that the underlying dynamics can be represented linearly in the latent space. Under this assumption, it becomes natural to exploit the recently introduced Koopman autoencoder models. However, disentangled representations are not guaranteed in Koopman approaches, and thus we propose a novel spectral loss term which leads to structured Koopman matrices and disentanglement. Overall, we propose a simple and easy to code new deep model that is fully unsupervised and it supports multifactor disentanglement. We showcase new disentangling abilities such as swapping of individual static factors between characters, and an incremental swap of disentangled factors from the source to the target. Moreover, we evaluate our method extensively on two factor standard benchmark tasks where we significantly improve over competing unsupervised approaches, and we perform competitively in comparison to weakly- and self-supervised state-of-the-art approaches. The code is available at https://github.com/azencot-group/SKD.", "keywords": "Koopman methods;Sequential Disentanglement", "primary_area": "", "supplementary_material": "/attachment/bfff9d538e012f5bab63814d7173609342bcb3fe.zip", "author": "Nimrod Berman;Ilan Naiman;Omri Azencot", "authorids": "~Nimrod_Berman1;~Ilan_Naiman1;~Omri_Azencot1", "gender": "M;M;Unspecified", "homepage": ";https://www.linkedin.com/in/ilan-naiman-80071a190;http://omriazencot.com", "dblp": ";285/4824;132/3985.html", "google_scholar": ";Fglytk8AAAAJ;https://scholar.google.co.il/citations?user=MEGuRmAAAAAJ", "orcid": ";;", "linkedin": "nimrod-berman-a26250143/;ilan-naiman-80071a190;omri-azencot-a8812417/", "or_profile": "~Nimrod_Berman1;~Ilan_Naiman1;~Omri_Azencot1", "aff": "Ben-Gurion University of the Negev;Ben Gurion University of the Negev, Technion;Ben-Gurion University of the Negev", "aff_domain": "bgu.ac.il;bgu.ac.il;bgu.ac.il", "position": "MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nberman2023multifactor,\ntitle={Multifactor Sequential Disentanglement via Structured Koopman Autoencoders},\nauthor={Nimrod Berman and Ilan Naiman and Omri Azencot},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6fuPIe9tbnC}\n}", "github": "", "project": "", "reviewers": "oLc4;WaMN;tz1i", "pdf_size": 3917449, "recommendation": "6;8;8", "confidence": "4;3;4", "correctness": "4;4;3", "technical_novelty": "3;3;4", "empirical_novelty": "3;3;3", "wc_summary_paper": "74;73;34", "wc_strength_and_weaknesses": "212;75;250", "wc_clarity_quality_novelty_and_reproducibility": "28;26;58", "wc_summary_review": "32;355;35", "wc_review": "346;529;377", "wc_reply_reviewers": "0;5;94", "wc_reply_authors": "667;1100;759", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 60.333333333333336, 18.62495339293199 ], "wc_strength_and_weaknesses_avg": [ 179.0, 75.1576121671429 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.333333333333336, 14.636332266733433 ], "wc_summary_review_avg": [ 140.66666666666666, 151.5615020012961 ], "wc_review_avg": [ 417.3333333333333, 79.96804917521052 ], "wc_reply_reviewers_avg": [ 33.0, 43.18178628387977 ], "wc_reply_authors_avg": [ 842.0, 186.25967536390334 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": -0.49999999999999983, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9706326587878964713&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=6fuPIe9tbnC", "email": "bgu.ac.il;bgu.ac.il;bgu.ac.il", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Ben-Gurion University of the Negev;Ben Gurion University of the Negev", "aff_unique_dep": ";", "aff_unique_url": "https://www.bgu.ac.il;https://www.bgu.ac.il", "aff_unique_abbr": "BGU;BGU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "id": "6i6ajdIinJm", "title": "Local Stochastic Bilevel Optimization with Momentum-Based Variance Reduction", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Bilevel Optimization has witnessed notable progress recently with new emerging efficient algorithms and has been applied to many machine learning tasks such as data cleaning, few-shot learning, and neural architecture search. However, little attention has been paid to solving the bilevel problems under distributed setting. Federated learning (FL) is an emerging paradigm which solves machine learning tasks over distributed-located data. FL problems are challenging to solve due to the heterogeneity and communication bottleneck. However, it is unclear how these challenges will affect the convergence of Bilevel Optimization algorithms. In this paper, we study Federated Bilevel Optimization problems. Specifically, we first propose the FedBiO, a deterministic gradient-based algorithm and we show it requires $O(\\epsilon^{-1.5})$ number of steps/communication steps to reach an $\\epsilon$-stationary point. Then we propose FedBiOAcc to accelerate FedBiO with the momentum-based variance-reduction technique under the stochastic scenario. We show FedBiOAcc needs $O(\\epsilon^{-1.5})$ number of steps and $O(\\epsilon^{-1})$ communication steps, this matches the best known rate for single-level stochastic federated algorithms. Finally, we validate our proposed algorithms via the important Fair Federated Learning task. More specifically, we define a bilevel-based group fair FL objective. Our algorithms show superior performances compared to other baselines in numerical experiments.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Junyi Li;Feihu Huang;Heng Huang", "authorids": "~Junyi_Li1;~Feihu_Huang1;~Heng_Huang1", "gender": "M;M;M", "homepage": ";;https://www.cs.umd.edu/~heng/", "dblp": ";169/6247;03/281", "google_scholar": "MzvZSs0AAAAJ;tRQwlHUAAAAJ;4OqLaDwAAAAJ", "orcid": ";0000-0003-0806-6074;", "linkedin": ";;", "or_profile": "~Junyi_Li1;~Feihu_Huang1;~Heng_Huang1", "aff": "University of Pittsburgh;Nanjing University of Aeronautics and Astronautics;University of Pittsburgh", "aff_domain": "pitt.edu;nuaa.edu.cn;pitt.edu", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nli2023local,\ntitle={Local Stochastic Bilevel Optimization with Momentum-Based Variance Reduction},\nauthor={Junyi Li and Feihu Huang and Heng Huang},\nyear={2023},\nurl={https://openreview.net/forum?id=6i6ajdIinJm}\n}", "github": "", "project": "", "reviewers": "aHE2;6Gia;3mhv;w13W", "site": "https://openreview.net/forum?id=6i6ajdIinJm", "pdf_size": 509778, "recommendation": "3;3;3;6", "confidence": "4;5;3;4", "correctness": "2;1;4;4", "technical_novelty": "2;1;2;3", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "35;55;41;142", "wc_strength_and_weaknesses": "421;232;69;200", "wc_clarity_quality_novelty_and_reproducibility": "26;77;42;368", "wc_summary_review": "92;35;246;51", "wc_review": "574;399;398;761", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 1.299038105676658 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 68.25, 43.19360485071835 ], "wc_strength_and_weaknesses_avg": [ 230.5, 125.80242445994433 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 128.25, 139.64307179377 ], "wc_summary_review_avg": [ 106.0, 83.45957105089865 ], "wc_review_avg": [ 533.0, 149.87161172149982 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5555555555555556, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13099785294947644394&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Pittsburgh;Nanjing University of Aeronautics and Astronautics", "aff_unique_dep": ";", "aff_unique_url": "https://www.pitt.edu;http://www.nuaa.edu.cn", "aff_unique_abbr": "Pitt;NUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "title": "Implicit Bias of Large Depth Networks: a Notion of Rank for Nonlinear Functions", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10988", "id": "6iDHce-0B-a", "poster": "/media/PosterPDFs/ICLR%202023/10988.png?t=1682532218.2285876", "openreview": "https://openreview.net/forum?id=6iDHce-0B-a", "slides": "https://iclr.cc/virtual/2023/poster/10988", "video": "https://iclr.cc/virtual/2023/poster/10988", "tldr": "The representation cost of DNNs converges to a notion of nonlinear rank as the depth grows to infinity. This bias towards low-rank functions extends to large but finite widths.", "abstract": "We show that the representation cost of fully connected neural networks with homogeneous nonlinearities - which describes the implicit bias in function space of networks with $L_2$-regularization or with losses such as the cross-entropy - converges as the depth of the network goes to infinity to a notion of rank over nonlinear functions. We then inquire under which conditions the global minima of the loss recover the `true' rank of the data: we show that for too large depths the global minimum will be approximately rank 1 (underestimating the rank); we then argue that there is a range of depths which grows with the number of datapoints where the true rank is recovered. Finally, we discuss the effect of the rank of a classifier on the topology of the resulting class boundaries and show that autoencoders with optimal nonlinear rank are naturally denoising.", "keywords": "Deep Neural Networks;implicit bias;representation cost;sparsity", "primary_area": "", "supplementary_material": "/attachment/c0db0dc2d580fa92feba1bdaf423793fc17a29a8.zip", "author": "Arthur Jacot", "authorids": "~Arthur_Jacot1", "gender": "M", "homepage": "", "dblp": "222/2747", "google_scholar": "https://scholar.google.ch/citations?user=G6OhFawAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Arthur_Jacot1", "aff": "NYU, New York University", "aff_domain": "cims.nyu.edu", "position": "Assistant Professor", "bibtex": "@inproceedings{\njacot2023implicit,\ntitle={Implicit Bias of Large Depth Networks: a Notion of Rank for Nonlinear Functions},\nauthor={Arthur Jacot},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6iDHce-0B-a}\n}", "github": "", "project": "", "reviewers": "44KN;BGRP;YMQ3;JJ6D;mVym;jUL5", "pdf_size": 430774, "recommendation": "5;6;8;8;8;10", "confidence": "3;3;3;4;5;4", "correctness": "3;3;4;4;4;4", "technical_novelty": "2;3;4;4;4;4", "empirical_novelty": "2;0;4;3;4;4", "wc_summary_paper": "88;154;403;92;75;238", "wc_strength_and_weaknesses": "103;831;71;523;287;229", "wc_clarity_quality_novelty_and_reproducibility": "268;38;607;134;2;7", "wc_summary_review": "77;91;127;51;3;16", "wc_review": "536;1114;1208;800;367;490", "wc_reply_reviewers": "0;65;0;0;0;0", "wc_reply_authors": "302;883;121;561;307;289", "reply_reviewers": "0;1;0;0;0;0", "reply_authors": "1;2;1;1;1;1", "recommendation_avg": [ 7.5, 1.6072751268321592 ], "confidence_avg": [ 3.6666666666666665, 0.7453559924999299 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.5, 0.7637626158259734 ], "empirical_novelty_avg": [ 2.8333333333333335, 1.4624940645653537 ], "wc_summary_paper_avg": [ 175.0, 116.08330342186741 ], "wc_strength_and_weaknesses_avg": [ 340.6666666666667, 264.01620320646146 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 176.0, 213.6469049623701 ], "wc_summary_review_avg": [ 60.833333333333336, 42.81906377096797 ], "wc_review_avg": [ 752.5, 317.4921258866116 ], "wc_reply_reviewers_avg": [ 10.833333333333334, 24.224069756247722 ], "wc_reply_authors_avg": [ 410.5, 247.30665848968428 ], "reply_reviewers_avg": [ 0.16666666666666666, 0.372677996249965 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0.556486674912202, "corr_recommendation_correctness": 0.8798826901281201, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2007121030539715660&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=6iDHce-0B-a", "email": "cims.nyu.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "0", "aff_campus_unique": "New York", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Learning Continuous Normalizing Flows For Faster Convergence To Target Distribution via Ascent Regularizations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10684", "id": "6iEoTr-jeB7", "poster": "", "openreview": "https://openreview.net/forum?id=6iEoTr-jeB7", "slides": "https://iclr.cc/virtual/2023/poster/10684", "video": "https://iclr.cc/virtual/2023/poster/10684", "author_site": "Shuangshuang Chen, Sihao Ding, Yiannis Karayiannidis, M\u00e5rten Bj\u00f6rkman", "tldr": "", "abstract": "Normalizing flows (NFs) have been shown to be advantageous in modeling complex distributions and improving sampling efficiency for unbiased sampling. In this work, we propose a new class of continuous NFs, ascent continuous normalizing flows (ACNFs), that makes a base distribution converge faster to a target distribution. As solving such a flow is non-trivial and barely possible, we propose a practical implementation to learn flexibly parametric ACNFs via ascent regularization and apply it in two learning cases: maximum likelihood learning for density estimation and minimizing reverse KL divergence for unbiased sampling and variational inference. The learned ACNFs demonstrate faster convergence towards the target distributions, therefore, achieving better density estimations, unbiased sampling and variational approximation at lower computational costs. Furthermore, the flows show to stabilize themselves to mitigate performance deterioration and are less sensitive to the choice of training flow length $T$.\n", "keywords": "normalizing flows;gradient flows;density estimation;unbiased sampling;variational inference", "primary_area": "", "supplementary_material": "/attachment/285c037b68a9766f05a0dc7a35820137efc8f626.zip", "author": "Shuangshuang Chen;Sihao Ding;Yiannis Karayiannidis;M\u00e5rten Bj\u00f6rkman", "authorids": "~Shuangshuang_Chen1;~Sihao_Ding1;~Yiannis_Karayiannidis1;~M\u00e5rten_Bj\u00f6rkman2", "gender": "F;M;M;M", "homepage": "https://ssajj1212.github.io/;;http://yiannis.info/;https://www.kth.se/profile/celle", "dblp": ";;;", "google_scholar": "MegdTh0AAAAJ;https://scholar.google.com/citations?hl=en;bgYtbpcAAAAJ;https://scholar.google.se/citations?user=jKjp9h4AAAAJ", "orcid": ";;;", "linkedin": ";sihao-ding-27333710b/;;", "or_profile": "~Shuangshuang_Chen1;~Sihao_Ding1;~Yiannis_Karayiannidis1;~Marten_Bjoerkman1", "aff": "KTH Royal Institute of Technology, Stockholm, Sweden;Volvo Car;;KTH Royal Institute of Technology, Stockholm, Sweden", "aff_domain": "kth.se;volvocars.com;;kth.se", "position": "PhD student;Principal Researcher;;Associate Professor", "bibtex": "@inproceedings{\nchen2023learning,\ntitle={Learning Continuous Normalizing Flows For Faster Convergence To Target Distribution via Ascent Regularizations},\nauthor={Shuangshuang Chen and Sihao Ding and Yiannis Karayiannidis and M{\\r{a}}rten Bj{\\\"o}rkman},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6iEoTr-jeB7}\n}", "github": "", "project": "", "reviewers": "niTY;XQ2y;XKEH", "pdf_size": 18804515, "recommendation": "6;6;8", "confidence": "4;3;3", "correctness": "3;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "51;87;88", "wc_strength_and_weaknesses": "132;128;247", "wc_clarity_quality_novelty_and_reproducibility": "40;313;39", "wc_summary_review": "13;95;25", "wc_review": "236;623;399", "wc_reply_reviewers": "0;102;0", "wc_reply_authors": "261;1174;519", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 75.33333333333333, 17.21110752456745 ], "wc_strength_and_weaknesses_avg": [ 169.0, 55.17849822772152 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 130.66666666666666, 128.92978278461842 ], "wc_summary_review_avg": [ 44.333333333333336, 36.160137659521645 ], "wc_review_avg": [ 419.3333333333333, 158.64495649790516 ], "wc_reply_reviewers_avg": [ 34.0, 48.08326112068523 ], "wc_reply_authors_avg": [ 651.3333333333334, 384.29704945812716 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=637613992198457729&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=6iEoTr-jeB7", "email": "kth.se;volvocars.com;;kth.se", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "KTH Royal Institute of Technology;Volvo Cars", "aff_unique_dep": ";", "aff_unique_url": "https://www.kth.se;https://www.volvocars.com", "aff_unique_abbr": "KTH;Volvo", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stockholm;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Sweden" }, { "title": "Contrastive Meta-Learning for Partially Observable Few-Shot Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11167", "id": "6iVJOtr2zL2", "poster": "/media/PosterPDFs/ICLR%202023/11167.png?t=1680802668.6705232", "openreview": "https://openreview.net/forum?id=6iVJOtr2zL2", "slides": "https://iclr.cc/virtual/2023/poster/11167", "video": "https://iclr.cc/virtual/2023/poster/11167", "author_site": "Adam Jelley, Amos Storkey, Antreas Antoniou, Sam Devlin", "tldr": "An approach for meta-learning contrastive representations under partial observability.", "abstract": "Many contrastive and meta-learning approaches learn representations by identifying common features in multiple views. However, the formalism for these approaches generally assumes features to be shared across views to be captured coherently. We consider the problem of learning a unified representation from partial observations, where useful features may be present in only some of the views. We approach this through a probabilistic formalism enabling views to map to representations with different levels of uncertainty in different components; these views can then be integrated with one another through marginalisation over that uncertainty. Our approach, Partial Observation Experts Modelling (POEM), then enables us to meta-learn consistent representations from partial observations. We evaluate our approach on an adaptation of a comprehensive few-shot learning benchmark, Meta-Dataset, and demonstrate the benefits of POEM over other meta-learning methods at representation learning from partial observations. We further demonstrate the utility of POEM by meta-learning to represent an environment from partial views observed by an agent exploring the environment.", "keywords": "Contrastive Learning;Meta-Learning;Few-Shot Learning;Partial Observability", "primary_area": "", "supplementary_material": "", "author": "Adam Jelley;Amos Storkey;Antreas Antoniou;Sam Devlin", "authorids": "~Adam_Jelley1;~Amos_Storkey1;~Antreas_Antoniou3;~Sam_Devlin2", "gender": "M;Not Specified;M;M", "homepage": "https://adamjelley.github.io;http://homepages.inf.ed.ac.uk/amos/;;https://antreasantoniou.github.io/", "dblp": "339/0093;;64/7502;", "google_scholar": "39t3yJcAAAAJ;;https://scholar.google.com/citations?hl=en;", "orcid": "0000-0002-0052-482X;;0000-0002-7769-3090;", "linkedin": "adamjelley/;;https://www.linkedin.com/pub/sam-devlin/83/810/b23;", "or_profile": "~Adam_Jelley1;~Amos_Storkey1;~Sam_Devlin2;~Antreas_Antoniou2", "aff": "University of Edinburgh, University of Edinburgh;University of Edinburgh;Microsoft Research;", "aff_domain": "ed.ac.uk;ed.ac.uk;microsoft.com;", "position": "PhD student;Full Professor;Principal Researcher;", "bibtex": "@inproceedings{\njelley2023contrastive,\ntitle={Contrastive Meta-Learning for Partially Observable Few-Shot Learning},\nauthor={Adam Jelley and Amos Storkey and Antreas Antoniou and Sam Devlin},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6iVJOtr2zL2}\n}", "github": "", "project": "", "reviewers": "fe5b;EVnw;crwa;YAXs", "pdf_size": 1179732, "recommendation": "6;6;6;6", "confidence": "4;4;3;4", "correctness": "4;3;4;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "72;171;198;119", "wc_strength_and_weaknesses": "367;370;136;179", "wc_clarity_quality_novelty_and_reproducibility": "74;198;51;169", "wc_summary_review": "93;58;33;74", "wc_review": "606;797;418;541", "wc_reply_reviewers": "109;0;0;45", "wc_reply_authors": "1575;1567;534;1146", "reply_reviewers": "1;0;0;1", "reply_authors": "3;2;1;3", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 140.0, 48.45100618150257 ], "wc_strength_and_weaknesses_avg": [ 263.0, 106.59502802663921 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 123.0, 61.89911146373589 ], "wc_summary_review_avg": [ 64.5, 22.005681084665387 ], "wc_review_avg": [ 590.5, 137.01186080044312 ], "wc_reply_reviewers_avg": [ 38.5, 44.65702632285316 ], "wc_reply_authors_avg": [ 1205.5, 424.75434076651885 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1784953512769401012&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=6iVJOtr2zL2", "email": "ed.ac.uk;ed.ac.uk;microsoft.com;", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Edinburgh;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.ed.ac.uk;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Edinburgh;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "6j3bPQtQS-3", "title": "Low-Entropy Features Hurt Out-of-Distribution Performance", "track": "main", "status": "Withdraw", "tldr": "We hypothesize that low-entropy features tend to be more domain-specific. This paper studies how the entropy of the intermediate representation affect the model's robustness against out-of-distribution (OOD) data.", "abstract": "We study the relationship between the entropy of intermediate representations and a model's robustness to distributional shift. We train two feed-forward networks end-to-end separated by a discrete $n$-bit channel on an unsupervised contrastive learning task. Different \\textit{masking strategies} are implemented that remove a proportion $p_{\\text{mask}}$ of low-entropy bits, high-entropy bits, or random bits, and the effects on performance are compared to the baseline accuracy with no mask. When testing in-distribution (InD) we find that the removal of bits via any strategy leads to an \\textit{increase} in performance, when masking out a relatively low $p_{\\text{mask}}$. We hypothesize that the entropy of a bit serves as a guide to its usefulness out-of-distribution (OOD). Through experiment on three OOD datasets we demonstrate that the removal of low-entropy bits can notably benefit OOD performance. Conversely, we show that top-entropy masking disproportionately harms performance both InD and OOD.", "keywords": "Generalization;Out-of-Distribution;Entropy-based Methods;Unsupervised Contrastive Learning;Latent Representations", "primary_area": "", "supplementary_material": "/attachment/e2eb2ef13f10df205c73c08ec1c913e4e0b9f729.zip", "author": "Nandi Schoots;Dylan Cope", "authorids": "~Nandi_Schoots1;~Dylan_Cope1", "gender": "F;", "homepage": "https://safeandtrustedai.org/person/nandi-schoots/;https://dylancope.com", "dblp": ";290/7911", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Nandi_Schoots1;~Dylan_Cope1", "aff": "Imperial College London;University of California, Berkeley", "aff_domain": "ic.ac.uk;berkeley.edu", "position": "PhD student;Visiting Scholar", "bibtex": "@misc{\nschoots2023lowentropy,\ntitle={Low-Entropy Features Hurt Out-of-Distribution Performance},\nauthor={Nandi Schoots and Dylan Cope},\nyear={2023},\nurl={https://openreview.net/forum?id=6j3bPQtQS-3}\n}", "github": "", "project": "", "reviewers": "tzxC;2p4c;ewSo;9gW1", "site": "https://openreview.net/forum?id=6j3bPQtQS-3", "pdf_size": 899111, "recommendation": "3;3;3;3", "confidence": "3;3;4;3", "correctness": "3;4;3;2", "technical_novelty": "2;4;3;2", "empirical_novelty": "3;4;3;2", "wc_summary_paper": "60;160;89;208", "wc_strength_and_weaknesses": "434;275;288;466", "wc_clarity_quality_novelty_and_reproducibility": "63;308;25;209", "wc_summary_review": "98;49;55;289", "wc_review": "655;792;457;1172", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 129.25, 58.22961016527588 ], "wc_strength_and_weaknesses_avg": [ 365.75, 85.13041465892199 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 151.25, 113.61420465769234 ], "wc_summary_review_avg": [ 122.75, 97.82733513696466 ], "wc_review_avg": [ 769.0, 261.3799915831355 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4mkNPlq7sFoJ:scholar.google.com/&scioq=Low-Entropy+Features+Hurt+Out-of-Distribution+Performance&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Imperial College London;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.imperial.ac.uk;https://www.berkeley.edu", "aff_unique_abbr": "ICL;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Offline Reinforcement Learning with Differentiable Function Approximation is Provably Efficient", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10793", "id": "6jfbOWzWTcE", "poster": "", "openreview": "https://openreview.net/forum?id=6jfbOWzWTcE", "slides": "https://iclr.cc/virtual/2023/poster/10793", "video": "https://iclr.cc/virtual/2023/poster/10793", "author_site": "Ming Yin, Mengdi Wang, Yu-Xiang Wang", "tldr": "", "abstract": "Offline reinforcement learning, which aims at optimizing sequential decision-making strategies with historical data, has been extensively applied in real-life applications. State-Of-The-Art algorithms usually leverage powerful function approximators (e.g. neural networks) to alleviate the sample complexity hurdle for better empirical performances. Despite the successes, a more systematic under- standing of the statistical complexity for function approximation remains lacking. Towards bridging the gap, we take a step by considering offline reinforcement learning with differentiable function class approximation (DFA). This function class naturally incorporates a wide range of models with nonlinear/nonconvex structures. We show offline RL with differentiable function approximation is provably efficient by analyzing the pessimistic fitted Q-learning (PFQL) algorithm, and our results provide the theoretical basis for understanding a variety of practical heuristics that rely on Fitted Q-Iteration style design. In addition, we further im- prove our guarantee with a tighter instance-dependent characterization. We hope our work could draw interest in studying reinforcement learning with differentiable function approximation beyond the scope of current research.\n", "keywords": "Reinforcement Learning Theory", "primary_area": "", "supplementary_material": "/attachment/50b58565c1ed2fe2d4556b142c8f36936b343895.zip", "author": "Ming Yin;Mengdi Wang;Yu-Xiang Wang", "authorids": "~Ming_Yin4;~Mengdi_Wang1;~Yu-Xiang_Wang1", "gender": "M;F;", "homepage": "https://mingyin0312.github.io;http://mwang.princeton.edu;http://www.cs.ucsb.edu/~yuxiangw/publications.html", "dblp": "89/453.html;;62/1637-3.html", "google_scholar": "ncBRYIUAAAAJ;;HGNZ1fkAAAAJ", "orcid": "0000-0001-6458-0751;;", "linkedin": ";;", "or_profile": "~Ming_Yin4;~Mengdi_Wang1;~Yu-Xiang_Wang1", "aff": "UC, Santa Barbara;Princeton University;UC Santa Barbara", "aff_domain": "ucsb.edu;princeton.edu;ucsb.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nyin2023offline,\ntitle={Offline Reinforcement Learning with Differentiable Function Approximation is Provably Efficient},\nauthor={Ming Yin and Mengdi Wang and Yu-Xiang Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6jfbOWzWTcE}\n}", "github": "", "project": "", "reviewers": "tREF;4N32;7cjK;eR8G", "pdf_size": 441056, "recommendation": "6;6;6;8", "confidence": "3;4;3;4", "correctness": "4;4;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;0;0;0", "wc_summary_paper": "79;170;73;53", "wc_strength_and_weaknesses": "125;174;86;143", "wc_clarity_quality_novelty_and_reproducibility": "52;67;231;27", "wc_summary_review": "55;6;63;19", "wc_review": "311;417;453;242", "wc_reply_reviewers": "0;0;56;0", "wc_reply_authors": "213;618;712;258", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 0.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 93.75, 45.06315013400639 ], "wc_strength_and_weaknesses_avg": [ 132.0, 31.81980515339464 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 94.25, 80.2352011276846 ], "wc_summary_review_avg": [ 35.75, 23.8681272830526 ], "wc_review_avg": [ 355.75, 83.89092620778483 ], "wc_reply_reviewers_avg": [ 14.0, 24.24871130596428 ], "wc_reply_authors_avg": [ 450.25, 217.8880159623287 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4824050143542522599&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=6jfbOWzWTcE", "email": "ucsb.edu;princeton.edu;ucsb.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Santa Barbara;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsb.edu;https://www.princeton.edu", "aff_unique_abbr": "UCSB;Princeton", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Santa Barbara;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "6jqSG88Mf_D", "title": "3D Neural Embedding Likelihood for Robust Sim-to-Real Transfer in Inverse Graphics", "track": "main", "status": "Reject", "tldr": "We propose 3D Neural Embedding Likelihoods (3DNEL), a 3D likelihood that models both shape information from depth and appearance information from RGB via neural embeddings and bridges the sim-to-real gap in 3D inverse graphics.", "abstract": "A central challenge in 3D scene perception via inverse graphics is robustly modeling the gap between 3D graphics and real-world data. We propose a novel 3D Neural Embedding Likelihood (3DNEL) over RGB-D images to address this gap. 3DNEL uses neural embeddings to predict 2D-3D correspondences from RGB and combines this with depth in a principled manner. 3DNEL is trained entirely from synthetic images and generalizes to real-world data. To showcase this capability, we develop a multi-stage inverse graphics pipeline that uses 3DNEL for 6D object pose estimation from real RGB-D images. Our method outperforms the previous state-of-the-art in sim-to-real pose estimation on the YCB-Video dataset, and improves robustness, with significantly fewer large-error predictions. Unlike existing bottom-up, discriminative approaches that are specialized for pose estimation, 3DNEL adopts a probabilistic generative formulation that jointly models multi-object scenes. This generative formulation enables easy extension of 3DNEL to additional tasks like object and camera tracking from video, using principled inference in the same probabilistic model without task specific retraining.", "keywords": "3D inverse graphics;probabilistic inference;likelihood;RGB-D;neural embedding;object pose estimation", "primary_area": "", "supplementary_material": "", "author": "Guangyao Zhou;Nishad Gothoskar;Lirui Wang;Joshua B. Tenenbaum;Dan Gutfreund;Miguel Lazaro-Gredilla;Dileep George;Vikash Mansinghka", "authorids": "~Guangyao_Zhou1;~Nishad_Gothoskar1;~Lirui_Wang1;~Joshua_B._Tenenbaum1;~Dan_Gutfreund1;~Miguel_Lazaro-Gredilla1;~Dileep_George1;~Vikash_Mansinghka1", "gender": "M;M;M;;;M;;M", "homepage": "https://stanniszhou.github.io;http://www.nishadg.com;https://liruiw.github.io/;;https://researcher.watson.ibm.com/researcher/view.php?person=us-dgutfre;;;", "dblp": ";;221/9612;t/JoshuaBTenenbaum;g/DanGutfreund;77/4660;;23/1731", "google_scholar": "RW94MCIAAAAJ;;EM9YhH0AAAAJ;;fRJbyD8AAAAJ;SFjDQk8AAAAJ;;", "orcid": ";;;;;;;", "linkedin": ";;;;;miguel-lazaro-g/;;", "or_profile": "~Guangyao_Zhou1;~Nishad_Gothoskar1;~Lirui_Wang1;~Joshua_B._Tenenbaum1;~Dan_Gutfreund1;~Miguel_Lazaro-Gredilla1;~Dileep_George1;~Vikash_Mansinghka1", "aff": "Google DeepMind;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;MIT-IBM Watson AI Lab;Google Deepmind;Vicarious AI;Massachusetts Institute of Technology", "aff_domain": "google.com;mit.edu;mit.edu;mit.edu;mit.edu;google.com;vicarious.com;mit.edu", "position": "Research Scientist;PhD student;PhD student;Professor;Principal Researcher;Research Scientist;Co-founder;Principal Research Scientist", "bibtex": "@misc{\nzhou2023d,\ntitle={3D Neural Embedding Likelihood for Robust Sim-to-Real Transfer in Inverse Graphics},\nauthor={Guangyao Zhou and Nishad Gothoskar and Lirui Wang and Joshua B. Tenenbaum and Dan Gutfreund and Miguel Lazaro-Gredilla and Dileep George and Vikash Mansinghka},\nyear={2023},\nurl={https://openreview.net/forum?id=6jqSG88Mf_D}\n}", "github": "", "project": "", "reviewers": "a254;4Xvb;aQXb", "site": "https://openreview.net/forum?id=6jqSG88Mf_D", "pdf_size": 19348747, "recommendation": "6;6;6", "confidence": "3;4;3", "correctness": "3;2;4", "technical_novelty": "3;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "41;85;57", "wc_strength_and_weaknesses": "45;101;240", "wc_clarity_quality_novelty_and_reproducibility": "8;273;39", "wc_summary_review": "90;106;58", "wc_review": "184;565;394", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "456;1613;1647", "reply_reviewers": "0;0;0", "reply_authors": "1;3;3", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 61.0, 18.184242262647807 ], "wc_strength_and_weaknesses_avg": [ 128.66666666666666, 81.97696153323946 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 106.66666666666667, 118.29435977913552 ], "wc_summary_review_avg": [ 84.66666666666667, 19.955506062794353 ], "wc_review_avg": [ 381.0, 155.8139916695545 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1238.6666666666667, 553.6029463633862 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15230456737693672175&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;1;2;3;1", "aff_unique_norm": "Google;Massachusetts Institute of Technology;DeepMind;Vicarious AI", "aff_unique_dep": "Google DeepMind;;DeepMind;", "aff_unique_url": "https://deepmind.com;https://web.mit.edu;https://deepmind.com;https://www.vicarious.com", "aff_unique_abbr": "DeepMind;MIT;DeepMind;Vicarious AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "The Augmented Image Prior: Distilling 1000 Classes by Extrapolating from a Single Image", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12236", "id": "6kxApT2r2i", "poster": "", "openreview": "https://openreview.net/forum?id=6kxApT2r2i", "slides": "https://iclr.cc/virtual/2023/poster/12236", "video": "https://iclr.cc/virtual/2023/poster/12236", "author_site": "Yuki Asano, Aaqib Saeed", "tldr": "We show that it is possible to extrapolate to semantic classes such as those of ImageNet or Kinetics using just a single datum plus heavy augmentations as visual inputs.", "abstract": "What can neural networks learn about the visual world when provided with only a single image as input? While any image obviously cannot contain the multitudes of all existing objects, scenes and lighting conditions -- within the space of all $256^{3\\cdot224\\cdot224}$ possible $224$-sized square images, it might still provide a strong prior for natural images. To analyze this ``augmented image prior'' hypothesis, we develop a simple framework for training neural networks from scratch using a single image and augmentations using knowledge distillation from a supervised pretrained teacher. With this, we find the answer to the above question to be: `surprisingly, a lot'. In quantitative terms, we find accuracies of $94\\%$/$74\\%$ on CIFAR-10/100, $69$\\% on ImageNet, and by extending this method to video and audio, $51\\%$ on Kinetics-400 and $84$\\% on SpeechCommands. In extensive analyses spanning 13 datasets, we disentangle the effect of augmentations, choice of data and network architectures and also provide qualitative evaluations that include lucid ``panda neurons'' in networks that have never even seen one. ", "keywords": "Augmentations;Single Image Learning;Distillation", "primary_area": "", "supplementary_material": "/attachment/e06fe92d8d46fcf932892cc71b7ee0b387c809f1.zip", "author": "Yuki M Asano;Aaqib Saeed", "authorids": "~Yuki_M_Asano1;~Aaqib_Saeed1", "gender": ";M", "homepage": "http://aqibsaeed.github.io/;https://yukimasano.github.io/", "dblp": "210/1023;239/8823", "google_scholar": "O0nlHrkAAAAJ;CdpLhlgAAAAJ", "orcid": "0000-0003-1473-0322;", "linkedin": "aqibsaeed/;", "or_profile": "~Aaqib_Saeed1;~Yuki_Asano1", "aff": "Philips Research;University of Amsterdam", "aff_domain": "philips.com;uva.nl", "position": "Researcher;Assistant Professor", "bibtex": "@inproceedings{\nasano2023the,\ntitle={The Augmented Image Prior: Distilling 1000 Classes by Extrapolating from a Single Image},\nauthor={Yuki M Asano and Aaqib Saeed},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6kxApT2r2i}\n}", "github": "", "project": "", "reviewers": "AYLh;rxVL;rSwz", "pdf_size": 7311978, "recommendation": "6;6;8", "confidence": "4;4;4", "correctness": "3;3;3", "technical_novelty": "3;2;3", "empirical_novelty": "3;1;3", "wc_summary_paper": "50;45;103", "wc_strength_and_weaknesses": "326;386;253", "wc_clarity_quality_novelty_and_reproducibility": "10;24;64", "wc_summary_review": "26;60;50", "wc_review": "412;515;470", "wc_reply_reviewers": "24;25;24", "wc_reply_authors": "723;253;1423", "reply_reviewers": "1;1;1", "reply_authors": "2;1;2", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_paper_avg": [ 66.0, 26.242459234352765 ], "wc_strength_and_weaknesses_avg": [ 321.6666666666667, 54.38341250377321 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.666666666666664, 22.88133640230735 ], "wc_summary_review_avg": [ 45.333333333333336, 14.267289706021797 ], "wc_review_avg": [ 465.6666666666667, 42.16106682183879 ], "wc_reply_reviewers_avg": [ 24.333333333333332, 0.4714045207910317 ], "wc_reply_authors_avg": [ 799.6666666666666, 480.7170569980733 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10445117875203863081&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=6kxApT2r2i", "email": "philips.com;uva.nl", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Philips Research;University of Amsterdam", "aff_unique_dep": ";", "aff_unique_url": "https://www.philips.com/research;https://www.uva.nl", "aff_unique_abbr": "Philips Research;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "id": "6l46OaYQvu3", "title": "Learning Robust \bGoal Space with Hypothetical Analogy-Making", "track": "main", "status": "Withdraw", "tldr": "Generating hypothetical observation and maximizing mutual information between the original observation using analogy-making module helps the RL agent\u2019s learned policy generalize by revealing a robust goal context space.", "abstract": "Learning compact state representations from high dimensional and noisy observations is the cornerstone of reinforcement learning (RL). However, these representations are often biased toward the current goal context and overfitted to goal-irrelevant features, making it hard to generalize to other tasks. Inspired by the human analogy-making process, we propose a novel representation learning framework called hypothetical analogy-making (HAM) for learning robust goal space and generalizable policy for RL. It consists of encoding goal-relevant and other task-related features, hypothetical observation generation with different feature combination, and analogy-making between the original and hypothetical observations using discriminators. Our model introduces an analogy-making objective that maximizes the mutual information between the generated hypothetical observation and the original observation to enhance disentangled representation. Experiments on various challenging RL environments showed that our model helps the RL agent\u2019s learned policy generalize by revealing a robust goal space.", "keywords": "GAN;information regularization;neuroscience-inspired AI;generalizable policy", "primary_area": "", "supplementary_material": "", "author": "Shinyoung Joo;Sang Wan Lee", "authorids": "~Shinyoung_Joo1;~Sang_Wan_Lee1", "gender": ";M", "homepage": "https://sineong.github.io;https://aibrain.kaist.ac.kr/sang-wan-lee", "dblp": ";77/6650", "google_scholar": ";0rMoHW4AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Shinyoung_Joo1;~Sang_Wan_Lee1", "aff": "NAVER Cloud;Korea Advanced Institute of Science & Technology", "aff_domain": "navercorp.com;kaist.ac.kr", "position": "Researcher;Associate Professor", "bibtex": "@misc{\njoo2023learning,\ntitle={Learning Robust \bGoal Space with Hypothetical Analogy-Making},\nauthor={Shinyoung Joo and Sang Wan Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=6l46OaYQvu3}\n}", "github": "", "project": "", "reviewers": "QSuL;CQnq;nhqy;nDKY", "site": "https://openreview.net/forum?id=6l46OaYQvu3", "pdf_size": 9673617, "recommendation": "3;5;6;6", "confidence": "3;4;4;3", "correctness": "2;3;3;3", "technical_novelty": "2;3;4;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "182;103;95;128", "wc_strength_and_weaknesses": "152;608;418;214", "wc_clarity_quality_novelty_and_reproducibility": "159;95;98;23", "wc_summary_review": "41;92;74;23", "wc_review": "534;898;685;388", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "72;75;68;75", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 127.0, 34.007352146263905 ], "wc_strength_and_weaknesses_avg": [ 348.0, 179.49373248110922 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 93.75, 48.17351450745524 ], "wc_summary_review_avg": [ 57.5, 27.04163456597992 ], "wc_review_avg": [ 626.25, 188.794034598554 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 72.5, 2.8722813232690143 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.40824829046386296, "corr_recommendation_correctness": 0.9428090415820632, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ZIFjyIZyqzMJ:scholar.google.com/&scioq=Learning+Robust+%3FGoal+Space+with+Hypothetical+Analogy-Making&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0;1", "aff_unique_norm": "NAVER Corporation;Korea Advanced Institute of Science and Technology", "aff_unique_dep": "Cloud Division;", "aff_unique_url": "https://www.naver.com;https://www.kaist.ac.kr", "aff_unique_abbr": "NAVER;KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Imitating Graph-Based Planning with Goal-Conditioned Policies", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10768", "id": "6lUEy1J5R7p", "poster": "", "openreview": "https://openreview.net/forum?id=6lUEy1J5R7p", "slides": "https://iclr.cc/virtual/2023/poster/10768", "video": "https://iclr.cc/virtual/2023/poster/10768", "author_site": "Junsu Kim, Younggyo Seo, Sungsoo Ahn, Kyunghwan Son, Jinwoo Shin", "tldr": "We train goal-conditioned policies guided by decisions from graph-based planning.", "abstract": "Recently, graph-based planning algorithms have gained much attention to solve goal-conditioned reinforcement learning (RL) tasks: they provide a sequence of subgoals to reach the target-goal, and the agents learn to execute subgoal-conditioned policies. However, the sample-efficiency of such RL schemes still remains a challenge, particularly for long-horizon tasks. To address this issue, we present a simple yet effective self-imitation scheme which distills a subgoal-conditioned policy into the target-goal-conditioned policy. Our intuition here is that to reach a target-goal, an agent should pass through a subgoal, so target-goal- and subgoal- conditioned policies should be similar to each other. We also propose a novel scheme of stochastically skipping executed subgoals in a planned path, which further improves performance. Unlike prior methods that only utilize graph-based planning in an execution phase, our method transfers knowledge from a planner along with a graph into policy learning. We empirically show that our method can significantly boost the sample-efficiency of the existing goal-conditioned RL methods under various long-horizon control tasks.", "keywords": "Reinforcement Learning;Goal-Conditioned Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Junsu Kim;Younggyo Seo;Sungsoo Ahn;Kyunghwan Son;Jinwoo Shin", "authorids": "~Junsu_Kim1;~Younggyo_Seo1;~Sungsoo_Ahn1;~Kyunghwan_Son1;~Jinwoo_Shin1", "gender": "M;M;M;M;M", "homepage": "https://sites.google.com/view/junsu-kim;https://younggyo.me/;https://sungsooahn.super.site/;http://lanada.kaist.ac.kr/;https://sites.google.com/site/mijirim/", "dblp": ";265/5586;90/5164;206/9135;31/7062", "google_scholar": "1o9cS8UAAAAJ;tI1-YwIAAAAJ;XTenHs0AAAAJ;;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ", "orcid": ";;;;", "linkedin": "junsu-kim-b170b3168/;;;;", "or_profile": "~Junsu_Kim1;~Younggyo_Seo1;~Sungsoo_Ahn1;~Kyunghwan_Son1;~Jinwoo_Shin1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Pohang University of Science and Technology;;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;postech.ac.kr;;kaist.ac.kr", "position": "Ph.D. student;PhD student;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\nkim2023imitating,\ntitle={Imitating Graph-Based Planning with Goal-Conditioned Policies},\nauthor={Junsu Kim and Younggyo Seo and Sungsoo Ahn and Kyunghwan Son and Jinwoo Shin},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6lUEy1J5R7p}\n}", "github": "", "project": "", "reviewers": "syAR;X3ba;tgwg;4xSo", "pdf_size": 1299456, "recommendation": "6;6;6;8", "confidence": "4;4;3;3", "correctness": "3;4;4;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "88;42;124;99", "wc_strength_and_weaknesses": "398;111;181;482", "wc_clarity_quality_novelty_and_reproducibility": "10;9;43;59", "wc_summary_review": "76;22;22;41", "wc_review": "572;184;370;681", "wc_reply_reviewers": "0;73;0;0", "wc_reply_authors": "1466;916;808;1437", "reply_reviewers": "0;1;0;0", "reply_authors": "2;3;1;2", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 88.25, 29.71847068743612 ], "wc_strength_and_weaknesses_avg": [ 293.0, 151.99835525425925 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.25, 21.510172012329424 ], "wc_summary_review_avg": [ 40.25, 22.049659861322123 ], "wc_review_avg": [ 451.75, 190.6493836863891 ], "wc_reply_reviewers_avg": [ 18.25, 31.60992723813201 ], "wc_reply_authors_avg": [ 1156.75, 297.38979051070334 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1586819259107747400&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=6lUEy1J5R7p", "email": "kaist.ac.kr;kaist.ac.kr;postech.ac.kr;;kaist.ac.kr", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Pohang University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.postech.ac.kr", "aff_unique_abbr": "KAIST;POSTECH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pohang", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "6lUU0QaTOC", "title": "TI-VAE: A temporally independent VAE with applications to latent factor learning in neuroimaging", "track": "main", "status": "Withdraw", "tldr": " Our approach extends temporal ICA to the non-linear case and generalizes weight sharing to non-Euclidean neuroimaging data. ", "abstract": "Functional magnetic resonance imaging (fMRI) data contain complex spatiotemporal dynamics, thus researchers have developed approaches that reduce the dimensionality of the signal while extracting relevant and interpretable dynamics. Recently, the feasibility of latent factor analysis, which can identify the lower-dimensional trajectory of neuronal population activations, has been demonstrated on both spiking and calcium imaging data. In this work, we propose a new framework inspired by latent factor analysis and apply it to functional MRI data from the human somatomotor cortex. Models of fMRI data that can perform whole-brain discovery of dynamical latent factors are understudied. The benefits of approaches such as linear independent component analysis models have been widely appreciated, however, nonlinear extensions are rare and present challenges in terms of identification. Deep learning methods are potentially well-suited, but without adequate inductive biases with respect to spatial weight-sharing may heavily overparameterize the model for the dataset size. Due to the underspecification of neuroimaging approaches, this increases the chances of overfitting and picking up on spurious correlations. Our approach extends temporal ICA to the non-linear case and generalizes weight sharing to non-Euclidean neuroimaging data. We evaluate our model on data with multiple motor sub-tasks to assess whether the model captures disentangled latent factors corresponding to each sub-task. Then, to evaluate the latent factors we find further, we compare the spatial location of each latent factor to the known motor homunculus. Finally, we show that our latent factors correlate better to the task than the current gold standard of source signal separation for neuroimaging data, independent component analysis (ICA). ", "keywords": "variational autoencoder;computational neuroscience;latent factor analysis;latent factor learning;fMRI;sequential variational autoencoder;somatomotor cortex;weight sharing;inductive bias", "primary_area": "", "supplementary_material": "", "author": "Eloy Geenjaar;Amrit Kashyap;Noah Lewis;Robyn Miller;Vince Calhoun", "authorids": "~Eloy_Geenjaar1;~Amrit_Kashyap1;~Noah_Lewis1;~Robyn_Miller1;~Vince_Calhoun1", "gender": "M;;;;", "homepage": "http://eloygeenjaar.nl;https://neurologie.charite.de/en/metas/person_detail/person/address_detail/amrit_kashyap_phd-1/;;;", "dblp": "289/0786;;;;48/3821.html", "google_scholar": "https://scholar.google.com/user=NMq1qHIAAAAJ;;;zPpJc94AAAAJ;WNOoGKIAAAAJ", "orcid": "0000-0001-5448-6358;;0000-0002-5712-2434;;", "linkedin": "eloy-geenjaar-23904bb0/;;;;", "or_profile": "~Eloy_Geenjaar1;~Amrit_Kashyap1;~Noah_Lewis1;~Robyn_Miller1;~Vince_Calhoun1", "aff": "Georgia Institute of Technology;;Georgia Institute of Technology;Georgia State University;Emory University", "aff_domain": "gatech.edu;;gatech.edu;gsu.edu;emory.edu", "position": "PhD student;;PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\ngeenjaar2023tivae,\ntitle={{TI}-{VAE}: A temporally independent {VAE} with applications to latent factor learning in neuroimaging},\nauthor={Eloy Geenjaar and Amrit Kashyap and Noah Lewis and Robyn Miller and Vince Calhoun},\nyear={2023},\nurl={https://openreview.net/forum?id=6lUU0QaTOC}\n}", "github": "", "project": "", "reviewers": "GHgW;UKiL;3TLL;JDeS", "site": "https://openreview.net/forum?id=6lUU0QaTOC", "pdf_size": 3968381, "recommendation": "3;3;3;5", "confidence": "4;4;3;3", "correctness": "2;2;2;3", "technical_novelty": "2;3;2;2", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "89;116;67;104", "wc_strength_and_weaknesses": "6;85;190;198", "wc_clarity_quality_novelty_and_reproducibility": "6;650;72;90", "wc_summary_review": "114;67;45;55", "wc_review": "215;918;374;447", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 94.0, 18.289341158171883 ], "wc_strength_and_weaknesses_avg": [ 119.75, 79.38001952632665 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 204.5, 259.1037437012441 ], "wc_summary_review_avg": [ 70.25, 26.43269755435491 ], "wc_review_avg": [ 488.5, 261.7751898098825 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:EjdboCEOvscJ:scholar.google.com/&scioq=TI-VAE:+A+temporally+independent+VAE+with+applications+to+latent+factor+learning+in+neuroimaging&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Georgia Institute of Technology;Georgia State University;Emory University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.gatech.edu;https://www.gsu.edu;https://www.emory.edu", "aff_unique_abbr": "Georgia Tech;GSU;Emory", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Thalamus: a brain-inspired algorithm for biologically-plausible continual learning and disentangled representations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12052", "id": "6orC5MvgPBK", "poster": "", "openreview": "https://openreview.net/forum?id=6orC5MvgPBK", "slides": "https://iclr.cc/virtual/2023/poster/12052", "video": "https://iclr.cc/virtual/2023/poster/12052", "tldr": "A brain-inspired algorithm that alternates optimizing in weight space with optimizing the latent embedding space in the same neural network leading to open-ended discovery of tasks and disentangled learning.", "abstract": "Animals thrive in a constantly changing environment and leverage the temporal structure to learn well-factorized causal representations. In contrast, traditional neural networks suffer from forgetting in changing environments and many methods have been proposed to limit forgetting with different trade-offs. Inspired by the brain thalamocortical circuit, we introduce a simple algorithm that uses optimization at inference time to generate internal representations of the current task dynamically. The algorithm alternates between updating the model weights and a latent task embedding, allowing the agent to parse the stream of temporal experience into discrete events and organize learning about them. On a continual learning benchmark, it achieves competitive end average accuracy by mitigating forgetting, but importantly, the interaction between the weights dynamics and the latent dynamics organizes knowledge into flexible structures with a cognitive interface to control them. Tasks later in the sequence can be solved through knowledge transfer as they become reachable within the well-factorized latent space. The algorithm meets many of the desiderata of an ideal continually learning agent in open-ended environments, and its simplicity suggests fundamental computations in circuits with abundant feedback control loops such as the thalamocortical circuits in the brain", "keywords": "brain-inspired learning;neuroscience;recurrent neural networks;context inference;bayesian brain", "primary_area": "", "supplementary_material": "", "author": "Ali Hummos", "authorids": "~Ali_Hummos1", "gender": "M", "homepage": "", "dblp": "", "google_scholar": "YFDOLsUAAAAJ", "orcid": " 0000-0003-4831-305X", "linkedin": "ali-hummos-b77a1422/", "or_profile": "~Ali_Hummos1", "aff": "Massachusetts Institute of Technology", "aff_domain": "mit.edu", "position": "Postdoc", "bibtex": "@inproceedings{\nhummos2023thalamus,\ntitle={Thalamus: a brain-inspired algorithm for biologically-plausible continual learning and disentangled representations},\nauthor={Ali Hummos},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6orC5MvgPBK}\n}", "github": "", "project": "", "reviewers": "ugKD;JbgB;5VKC;W9vo", "pdf_size": 12128515, "recommendation": "6;6;8;8", "confidence": "3;3;2;3", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "3;0;3;3", "wc_summary_paper": "93;19;159;53", "wc_strength_and_weaknesses": "92;14;204;137", "wc_clarity_quality_novelty_and_reproducibility": "37;60;111;351", "wc_summary_review": "75;80;39;54", "wc_review": "297;173;513;595", "wc_reply_reviewers": "15;48;35;118", "wc_reply_authors": "887;781;794;882", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;2", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 81.0, 52.09606511052442 ], "wc_strength_and_weaknesses_avg": [ 111.75, 69.08825877093734 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 139.75, 124.87068310856635 ], "wc_summary_review_avg": [ 62.0, 16.47725705328408 ], "wc_review_avg": [ 394.5, 167.93674404370236 ], "wc_reply_reviewers_avg": [ 54.0, 38.77499194068259 ], "wc_reply_authors_avg": [ 836.0, 48.74935897014442 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17849911164403545175&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=6orC5MvgPBK", "email": "mit.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Improving Deep Policy Gradients with Value Function Search", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11568", "id": "6qZC7pfenQm", "poster": "", "openreview": "https://openreview.net/forum?id=6qZC7pfenQm", "slides": "https://iclr.cc/virtual/2023/poster/11568", "video": "https://iclr.cc/virtual/2023/poster/11568", "author_site": "Enrico Marchesini, Christopher Amato", "tldr": "We present a Value Function Search that employs a gradient-free population of perturbed value networks to improve Deep Policy Gradient primitives, leading to higher returns and better sample efficiency.", "abstract": "Deep Policy Gradient (PG) algorithms employ value networks to drive the learning of parameterized policies and reduce the variance of the gradient estimates. However, value function approximation gets stuck in local optima and struggles to fit the actual return, limiting the variance reduction efficacy and leading policies to sub-optimal performance. This paper focuses on improving value approximation and analyzing the effects on Deep PG primitives such as value prediction, variance reduction, and correlation of gradient estimates with the true gradient. To this end, we introduce a Value Function Search that employs a population of perturbed value networks to search for a better approximation. Our framework does not require additional environment interactions, gradient computations, or ensembles, providing a computationally inexpensive approach to enhance the supervised learning task on which value networks train. Crucially, we show that improving Deep PG primitives results in improved sample efficiency and policies with higher returns using common continuous control benchmark domains.", "keywords": "Deep Reinforcement Learning;Deep Policy Gradients", "primary_area": "", "supplementary_material": "", "author": "Enrico Marchesini;Christopher Amato", "authorids": "~Enrico_Marchesini1;~Christopher_Amato1", "gender": "M;M", "homepage": "https://emarche.github.io;http://www.ccs.neu.edu/home/camato/index.html", "dblp": "https://dblp.uni-trier.de/pid/238/2360.html;10/3254", "google_scholar": "https://scholar.google.it/citations?user=9V1_SGkAAAAJ;-8-sD-sAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Enrico_Marchesini1;~Christopher_Amato1", "aff": "Northeastern University;Northeastern University", "aff_domain": "neu.edu;northeastern.edu", "position": "Postdoc;Associate Professor", "bibtex": "@inproceedings{\nmarchesini2023improving,\ntitle={Improving Deep Policy Gradients with Value Function Search},\nauthor={Enrico Marchesini and Christopher Amato},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6qZC7pfenQm}\n}", "github": "", "project": "", "reviewers": "7FyJ;qNbH;KmE6;ur4C", "pdf_size": 7748082, "recommendation": "5;5;5;6", "confidence": "4;4;4;4", "correctness": "2;2;3;3", "technical_novelty": "3;4;2;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "94;198;109;64", "wc_strength_and_weaknesses": "225;477;515;155", "wc_clarity_quality_novelty_and_reproducibility": "55;107;131;15", "wc_summary_review": "74;49;129;14", "wc_review": "448;831;884;248", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 116.25, 49.90177852541931 ], "wc_strength_and_weaknesses_avg": [ 343.0, 155.56991997169632 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 77.0, 45.12205669071391 ], "wc_summary_review_avg": [ 66.5, 41.907636535600524 ], "wc_review_avg": [ 602.75, 265.0446896279946 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6251628957984410095&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=6qZC7pfenQm", "email": "neu.edu;northeastern.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Mitigating Memorization of Noisy Labels via Regularization between Representations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11563", "id": "6qcYDVlVLnK", "poster": "/media/PosterPDFs/ICLR%202023/11563.png?t=1681025677.0680423", "openreview": "https://openreview.net/forum?id=6qcYDVlVLnK", "slides": "https://iclr.cc/virtual/2023/poster/11563", "video": "https://iclr.cc/virtual/2023/poster/11563", "author_site": "Hao Cheng, Zhaowei Zhu, Xing Sun, Yang Liu", "tldr": "We theoretically show the memorization effect of DNN with resepct to the model capacity and propose a representation-based regularizer to mitigate the memorization effect. ", "abstract": "Designing robust loss functions is popular in learning with noisy labels while existing designs did not explicitly consider the overfitting property of deep neural networks (DNNs). As a result, applying these losses may still suffer from overfitting/memorizing noisy labels as training proceeds. In this paper, we first theoretically analyze the memorization effect and show that a lower-capacity model may perform better on noisy datasets. However, it is non-trivial to design a neural network with the best capacity given an arbitrary task. To circumvent this dilemma, instead of changing the model architecture, we decouple DNNs into an encoder followed by a linear classifier and propose to restrict the function space of a DNN by a representation regularizer. Particularly, we require the distance between two self-supervised features to be positively related to the distance between the corresponding two supervised model outputs. \nOur proposed framework is easily extendable and can incorporate many other robust loss functions to further improve performance. Extensive experiments and theoretical analyses support our claims. Code is available at https://github.com/UCSC-REAL/SelfSup_NoisyLabel.", "keywords": "learning with noisy labels;representation learning", "primary_area": "", "supplementary_material": "/attachment/8931aaf22ea3fba0a93ff3b8a0d8960f782b0510.zip", "author": "Hao Cheng;Zhaowei Zhu;Xing Sun;Yang Liu", "authorids": "~Hao_Cheng5;~Zhaowei_Zhu1;~Xing_Sun1;~Yang_Liu3", "gender": "M;M;M;M", "homepage": "https://haochenglouis.github.io;https://www.zzw.ai;https://www.sunxing.org;http://www.yliuu.com", "dblp": ";202/1712;;51/3710-18", "google_scholar": "ftlVqVIAAAAJ;YS8pSQoAAAAJ;IUtix9IAAAAJ;jKrIVCIAAAAJ", "orcid": "0000-0001-8864-7818;0000-0003-3894-5862;0000-0001-8132-9083;0000-0001-8420-6011", "linkedin": ";;sunxings/;", "or_profile": "~Hao_Cheng5;~Zhaowei_Zhu1;~Xing_Sun1;~Yang_Liu3", "aff": "University of California, Santa Cruz;University of California, Santa Cruz;Tencent YouTu Lab;University of California, Santa Cruz", "aff_domain": "ucsc.edu;ucsc.edu;tencent.com;ucsc.edu", "position": "PhD student;PhD student;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\ncheng2023mitigating,\ntitle={Mitigating Memorization of Noisy Labels via Regularization between Representations},\nauthor={Hao Cheng and Zhaowei Zhu and Xing Sun and Yang Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6qcYDVlVLnK}\n}", "github": "", "project": "", "reviewers": "L66U;DbNo;wiWY;kNp2;Y66C", "pdf_size": 668558, "recommendation": "3;6;8;8;8", "confidence": "4;4;5;4;3", "correctness": "2;3;3;3;4", "technical_novelty": "3;2;3;3;3", "empirical_novelty": "3;3;3;2;2", "wc_summary_paper": "76;67;167;212;105", "wc_strength_and_weaknesses": "77;149;558;249;383", "wc_clarity_quality_novelty_and_reproducibility": "227;47;45;72;47", "wc_summary_review": "33;26;49;32;33", "wc_review": "413;289;819;565;568", "wc_reply_reviewers": "0;0;0;0;172", "wc_reply_authors": "406;482;1518;1084;2155", "reply_reviewers": "0;0;0;0;4", "reply_authors": "1;1;3;2;5", "recommendation_avg": [ 6.6, 1.9595917942265424 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.8, 0.39999999999999997 ], "empirical_novelty_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_paper_avg": [ 125.4, 55.672614452709155 ], "wc_strength_and_weaknesses_avg": [ 283.2, 171.56503140208963 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 87.6, 70.409090322202 ], "wc_summary_review_avg": [ 34.6, 7.657675887630659 ], "wc_review_avg": [ 530.8, 177.7395847862822 ], "wc_reply_reviewers_avg": [ 34.4, 68.8 ], "wc_reply_authors_avg": [ 1129.0, 655.341132540908 ], "reply_reviewers_avg": [ 0.8, 1.6 ], "reply_authors_avg": [ 2.4, 1.4966629547095764 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8068715304598785, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12075636214868930420&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=6qcYDVlVLnK", "email": "ucsc.edu;ucsc.edu;tencent.com;ucsc.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of California, Santa Cruz;Tencent", "aff_unique_dep": ";YouTu Lab", "aff_unique_url": "https://www.ucsc.edu;https://www.tencent.com", "aff_unique_abbr": "UCSC;Tencent", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Santa Cruz;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Planning Goals for Exploration", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11390", "id": "6qeBuZSo7Pr", "poster": "/media/PosterPDFs/ICLR%202023/11390.png?t=1681839567.2696724", "openreview": "https://openreview.net/forum?id=6qeBuZSo7Pr", "slides": "https://iclr.cc/virtual/2023/poster/11390", "video": "https://iclr.cc/virtual/2023/poster/11390", "author_site": "Edward Hu, Richard Chang, Oleh Rybkin, Dinesh Jayaraman", "tldr": "We use world models to generate goals for exploration.", "abstract": "Dropped into an unknown environment, what should an agent do to quickly learn about the environment and how to accomplish diverse tasks within it? We address this question within the goal-conditioned reinforcement learning paradigm, by identifying how the agent should set its goals at training time to maximize exploration. We propose \"Planning Exploratory Goals\" (PEG), a method that sets goals for each training episode to directly optimize an intrinsic exploration reward. PEG first chooses goal commands such that the agent's goal-conditioned policy, at its current level of training, will end up in states with high exploration potential. It then launches an exploration policy starting at those promising states. To enable this direct optimization, PEG learns world models and adapts sampling-based planning algorithms to \"plan goal commands\". In challenging simulated robotics environments including a multi-legged ant robot in a maze, and a robot arm on a cluttered tabletop, PEG exploration enables more efficient and effective training of goal-conditioned policies relative to baselines and ablations. Our ant successfully navigates a long maze, and the robot arm successfully builds a stack of three blocks upon command. Website: https://sites.google.com/view/exploratory-goals", "keywords": "model-based reinforcement learning;exploration;goal-conditioned reinforcement learning;planning;intrinsic motivation;reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/0d43ce144b0933fda5695689b53a2e642db4d34e.zip", "author": "Edward S. Hu;Richard Chang;Oleh Rybkin;Dinesh Jayaraman", "authorids": "~Edward_S._Hu1;~Richard_Chang2;~Oleh_Rybkin1;~Dinesh_Jayaraman2", "gender": "M;M;M;M", "homepage": ";http://olehrybkin.com/;https://www.seas.upenn.edu/~dineshj/;https://www.edwardshu.com", "dblp": ";217/2946;145/3870;245/4627", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;QxLpghAAAAAJ;", "orcid": ";0000-0002-5898-006X;0000-0002-6888-3095;", "linkedin": "richard-chang-bb5475168/;oleh-rybkin/;dinesh-jayaraman-44b31539/;", "or_profile": "~Richard_Chang2;~Oleh_Rybkin1;~Dinesh_Jayaraman2;~Edward_Shichao_Hu1", "aff": "University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": "upenn.edu;upenn.edu;upenn.edu;upenn.edu", "position": "Undergrad student;PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nhu2023planning,\ntitle={Planning Goals for Exploration},\nauthor={Edward S. Hu and Richard Chang and Oleh Rybkin and Dinesh Jayaraman},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6qeBuZSo7Pr}\n}", "github": "", "project": "", "reviewers": "WR3C;p4aC;i4Jc;rUvT;5sEN", "pdf_size": 7705038, "recommendation": "6;8;8;8;8", "confidence": "4;5;4;4;3", "correctness": "3;4;4;3;3", "technical_novelty": "3;2;3;3;3", "empirical_novelty": "3;3;3;2;3", "wc_summary_paper": "70;25;92;74;78", "wc_strength_and_weaknesses": "183;454;308;189;369", "wc_clarity_quality_novelty_and_reproducibility": "70;103;78;22;92", "wc_summary_review": "38;115;78;42;44", "wc_review": "361;697;556;327;583", "wc_reply_reviewers": "288;78;99;18;48", "wc_reply_authors": "2054;1071;569;1275;1687", "reply_reviewers": "2;1;1;1;1", "reply_authors": "6;3;2;3;3", "recommendation_avg": [ 7.6, 0.7999999999999999 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 2.8, 0.39999999999999997 ], "empirical_novelty_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_paper_avg": [ 67.8, 22.648620267027304 ], "wc_strength_and_weaknesses_avg": [ 300.6, 104.45017951157384 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.0, 27.914154115788644 ], "wc_summary_review_avg": [ 63.4, 29.513386793114748 ], "wc_review_avg": [ 504.8, 139.97771251167094 ], "wc_reply_reviewers_avg": [ 106.2, 94.93661043032871 ], "wc_reply_authors_avg": [ 1331.2, 510.06760336253467 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 3.4, 1.3564659966250538 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.408248290463863, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14640808844240736021&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=6qeBuZSo7Pr", "email": "upenn.edu;upenn.edu;upenn.edu;upenn.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "UL2: Unifying Language Learning Paradigms", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10748", "id": "6ruVLB727MC", "poster": "", "openreview": "https://openreview.net/forum?id=6ruVLB727MC", "slides": "https://iclr.cc/virtual/2023/poster/10748", "video": "https://iclr.cc/virtual/2023/poster/10748", "author_site": "Yi Tay, Mostafa Dehghani, Vinh Tran, Xavier Garcia, Jason Wei, Xuezhi Wang, Hyung Won Chung, Dara Bahri, Tal Schuster, Huaixiu Steven Zheng, Denny Zhou, Neil Houlsby, Donald Metzler", "tldr": "How to train a language model properly", "abstract": "Existing pre-trained models are generally geared towards a particular class of problems. To date, there seems to be still no consensus on what the right architecture and pre-training setup should be. This paper presents a unified framework for pre-training models that are universally effective across datasets and setups. We begin by disentangling architectural archetypes with pre-training objectives -- two concepts that are commonly conflated. Next, we present a generalized and unified perspective for self-supervision in NLP and show how different pre-training objectives can be cast as one another and how interpolating between different objectives can be effective. We then propose Mixture-of-Denoisers (MoD), a pre-training objective that combines diverse pre-training paradigms together. We furthermore introduce a notion of mode switching, wherein downstream fine-tuning is associated with specific pre-training schemes. We conduct extensive ablative experiments to compare multiple pre-training objectives and find that our method pushes the Pareto-frontier by outperforming T5 and/or GPT-like models across multiple diverse setups. Finally, by scaling our model up to 20B parameters, we achieve SOTA performance on 50 well-established supervised NLP tasks ranging from language generation (with automated and human evaluation), language understanding, text classification, question answering, commonsense reasoning, long text reasoning, structured knowledge grounding and information retrieval. Our model also achieve strong results at in-context learning, outperforming 175B GPT-3 on zero-shot SuperGLUE and tripling the performance of T5-XXL on one-shot summarization. Finally, we show that UL2 20B works well with chain-of-thought prompting and reasoning, making it an appealing choice for research into reasoning at a small to medium scale of 20B parameters. We release Flax-based T5X model checkpoints for the 20B model publicly.\n", "keywords": "language models;pretraining;transformers", "primary_area": "", "supplementary_material": "", "author": "Yi Tay;Mostafa Dehghani;Vinh Q. Tran;Xavier Garcia;Jason Wei;Xuezhi Wang;Hyung Won Chung;Dara Bahri;Tal Schuster;Steven Zheng;Denny Zhou;Neil Houlsby;Donald Metzler", "authorids": "~Yi_Tay1;~Mostafa_Dehghani1;~Vinh_Q._Tran1;~Xavier_Garcia1;~Jason_Wei1;~Xuezhi_Wang3;~Hyung_Won_Chung1;~Dara_Bahri1;~Tal_Schuster1;stevenzheng@google.com;~Denny_Zhou1;~Neil_Houlsby1;~Donald_Metzler1", "gender": "M;M;M;;M;;M;M;Not Specified;;;M;M", "homepage": "http://yitay.net;http://mostafadehghani.com/;https://vqtran.github.io;;https://jasonwei20.github.io;https://research.google/people/105995/;;http://www.dara.run;https://people.csail.mit.edu/tals/;;;https://neilhoulsby.github.io/;https://research.google/people/DonaldMetzler/", "dblp": ";125/4062;77/2885-2.html;;02/11220.html;70/4090-2;;231/7656;190/7491;;;91/10669;95/2272", "google_scholar": "VBclY_cAAAAJ;https://scholar.google.nl/citations?user=MiHOX3QAAAAJ;ot3WsOwAAAAJ;;;ScLUQ-YAAAAJ;1CAlXvYAAAAJ;j5PpTOwAAAAJ;oo8QRmIAAAAJ;;;https://scholar.google.com/citations?hl=en;bmXpOd8AAAAJ", "orcid": ";;;;;;;;;;;;0000-0003-4276-6269", "linkedin": ";;vinh-tran-32597468/;;;;;;;;;;donmetzler/", "or_profile": "~Yi_Tay1;~Mostafa_Dehghani1;~Vinh_Q._Tran1;~Xavier_Garcia1;~Jason_Wei1;~Xuezhi_Wang3;~Hyung_Won_Chung1;~Dara_Bahri1;~Tal_Schuster1;stevenzheng@google.com;~Denny_Zhou1;~Neil_Houlsby1;~Donald_Metzler1", "aff": "Google;Google DeepMind;Google;;OpenAI;Google DeepMind;Google Brain;Google Research;Google;;;Google;Google", "aff_domain": "google.com;google.com;google.com;;openai.com;google.com;google.com;google.com;google.com;;;google.com;google.com", "position": "Research Scientist;Research Scientist;Researcher;;Researcher;Research Scientist;Researcher;Research Scientist;Researcher;;;Researcher;Research Scientist", "bibtex": "@inproceedings{\ntay2023ul,\ntitle={{UL}2: Unifying Language Learning Paradigms},\nauthor={Yi Tay and Mostafa Dehghani and Vinh Q. Tran and Xavier Garcia and Jason Wei and Xuezhi Wang and Hyung Won Chung and Dara Bahri and Tal Schuster and Steven Zheng and Denny Zhou and Neil Houlsby and Donald Metzler},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6ruVLB727MC}\n}", "github": "", "project": "", "reviewers": "m24X;qkXg;Laoj;9xAc", "pdf_size": 401387, "recommendation": "3;6;8;8", "confidence": "4;4;4;4", "correctness": "2;3;4;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "116;33;179;132", "wc_strength_and_weaknesses": "504;165;315;113", "wc_clarity_quality_novelty_and_reproducibility": "56;23;30;43", "wc_summary_review": "6;16;30;24", "wc_review": "682;237;554;312", "wc_reply_reviewers": "164;0;0;0", "wc_reply_authors": "367;266;270;21", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 115.0, 52.70199237220544 ], "wc_strength_and_weaknesses_avg": [ 274.25, 151.97265379008158 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.0, 12.62933094031509 ], "wc_summary_review_avg": [ 19.0, 9.0 ], "wc_review_avg": [ 446.25, 179.5805877593678 ], "wc_reply_reviewers_avg": [ 41.0, 71.01408311032397 ], "wc_reply_authors_avg": [ 231.0, 127.81040646207178 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 13, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9945577827230725, "gs_citation": 488, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9085524785372886362&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=6ruVLB727MC", "email": "google.com;google.com;google.com;;openai.com;google.com;google.com;google.com;google.com;;;google.com;google.com", "author_num": 13, "aff_unique_index": "0;0;0;1;0;0;0;0;0;0", "aff_unique_norm": "Google;OpenAI", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://openai.com", "aff_unique_abbr": "Google;OpenAI", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0;0;1;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "6s5HaPx6ndR", "title": "Extracting Meaningful Attention on Source Code: An Empirical Study of Developer and Neural Model Code Exploration", "track": "main", "status": "Reject", "tldr": "We compare how developers and GPT-like language models navigate snippets of source code.", "abstract": "The high effectiveness of neural models of code, such as OpenAI Codex and AlphaCode, suggests coding capabilities of models that are at least comparable to those of humans. However, previous work has only used these models for their raw completion, ignoring how the model reasoning, in the form of attention weights, can be used for other downstream tasks. Disregarding the attention weights means discarding a considerable portion of what those models compute when queried. To profit more from the knowledge embedded in these large pre-trained models, this work compares multiple approaches to post-process these valuable attention weights for supporting code exploration. Specifically, we compare to which extent the transformed attention signal of CodeGen, a large and publicly available pre-trained neural model, agrees with how developers look at and explore code when each answering the same sense-making questions about code. At the core of our experimental evaluation, we collect, manually annotate, and open-source a novel eye-tracking dataset comprising 25 developers answering sense-making questions on code over 92 sessions. We empirically evaluate five attention-agnostic heuristics and ten attention-based post processing approaches of the attention signal against our ground truth of developers exploring code, including the novel concept of follow-up attention which exhibits the highest agreement. Beyond the dataset contribution and the empirical study, we also introduce a novel practical application of the attention signal of pre-trained models with completely analytical solutions, going beyond how neural models\u2019 attention mechanisms have traditionally been used.\n", "keywords": "eye-tracking;transformers;self-attention;code exploration;source code;neural models of code", "primary_area": "", "supplementary_material": "", "author": "Matteo Paltenghi;Rahul Pandita;Austin Henley;Albert Ziegler", "authorids": "~Matteo_Paltenghi1;~Rahul_Pandita1;~Austin_Henley2;~Albert_Ziegler1", "gender": "M;M;M;", "homepage": "https://matteopaltenghi.com/;http://rahulpandita.me/;https://austinhenley.com/;", "dblp": ";05/9103;;61/607.html", "google_scholar": "https://scholar.google.de/citations?user=L1VH9VMAAAAJ;ikALA9IAAAAJ;V7gfCwsAAAAJ;", "orcid": "0000-0003-2266-453X;;;my-orcid?orcid=0000-0003-0937-7464", "linkedin": "matteo-paltenghi/?originalSubdomain=ch;panditarahul;;albert-ziegler-6b3b24138/", "or_profile": "~Matteo_Paltenghi1;~Rahul_Pandita1;~Austin_Henley2;~Albert_Ziegler1", "aff": "Universit\u00e4t Stuttgart;;Microsoft;", "aff_domain": "uni-stuttgart.de;;microsoft.com;", "position": "PhD student;;Researcher;", "bibtex": "@misc{\npaltenghi2023extracting,\ntitle={Extracting Meaningful Attention on Source Code: An Empirical Study of Developer and Neural Model Code Exploration},\nauthor={Matteo Paltenghi and Rahul Pandita and Austin Henley and Albert Ziegler},\nyear={2023},\nurl={https://openreview.net/forum?id=6s5HaPx6ndR}\n}", "github": "", "project": "", "reviewers": "oGLs;6UaZ;q7Ss;AZZZ;ZiWC", "site": "https://openreview.net/forum?id=6s5HaPx6ndR", "pdf_size": 936036, "recommendation": "3;5;5;6;6", "confidence": "4;4;4;3;3", "correctness": "2;3;3;3;4", "technical_novelty": "2;2;2;3;2", "empirical_novelty": "2;2;2;3;3", "wc_summary_paper": "54;181;64;240;232", "wc_strength_and_weaknesses": "103;338;402;451;224", "wc_clarity_quality_novelty_and_reproducibility": "58;205;88;193;54", "wc_summary_review": "49;75;45;113;49", "wc_review": "264;799;599;997;559", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "607;570;690;556;238", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.2, 0.39999999999999997 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 154.2, 80.38507324124299 ], "wc_strength_and_weaknesses_avg": [ 303.6, 125.83258719425585 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 119.6, 65.99575743939909 ], "wc_summary_review_avg": [ 66.2, 25.72469630530164 ], "wc_review_avg": [ 643.6, 245.87931999255244 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 532.2, 154.30152299961267 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.74535599249993, "corr_recommendation_correctness": 0.8660254037844387, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;1", "aff_unique_norm": "University of Stuttgart;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.uni-stuttgart.de;https://www.microsoft.com", "aff_unique_abbr": "Uni Stuttgart;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Germany;United States" }, { "id": "6s8XPvu7bI8", "title": "Fine-grain Inference on Out-of-Distribution Data with Hierarchical Classification", "track": "main", "status": "Reject", "tldr": "", "abstract": "Machine learning methods must be trusted to make appropriate decisions in real-world environments, even when faced with out-of-distribution (OOD) samples. Many current approaches simply aim to detect OOD examples and alert the user when an unrecognized input is given. However, when the OOD sample significantly overlaps with the training data, a binary anomaly detection is not interpretable or explainable, and provides little information to the user. We propose a new model for OOD detection that makes predictions at varying levels of granularity\u2014as the inputs become more ambiguous, the model predictions become coarser and more conservative. Consider an animal classifier that encounters an unknown bird species and a car. Both cases are OOD, but the user gains more information if the classifier recognizes that its uncertainty over the particular species is too large and predicts \u201cbird\u201d instead of detecting it as OOD. Furthermore, we diagnose the classifier\u2019s performance at each level of the hierarchy improving the explainability and interpretability of the model\u2019s predictions. We demonstrate the effectiveness of hierarchical classifiers for both fine- and coarse-grained OOD tasks.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/d9efe23fbd44fb3234dd507094a2ef8138ccdb69.zip", "author": "Randolph Linderman;Jingyang Zhang;Nathan Inkawhich;Hai Li;Yiran Chen", "authorids": "~Randolph_Linderman1;~Jingyang_Zhang2;~Nathan_Inkawhich1;~Hai_Li1;~Yiran_Chen1", "gender": "M;M;;F;M", "homepage": ";;;https://ece.duke.edu/faculty/hai-helen-li;https://ece.duke.edu/people/yiran-chen/", "dblp": "329/5247;;230/7843;30/5330-1;80/1641", "google_scholar": "uPaeUDIAAAAJ;f3DQwmgAAAAJ;NZh50oIAAAAJ;E6Tpfq8AAAAJ;", "orcid": "0000-0001-5695-7766;;;0000-0003-3228-6544;0000-0002-1486-8412", "linkedin": "randolph-linderman-74b8a661?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BCsSuyObXSVi8qLYQaTSm%2FQ%3D%3D;;;;", "or_profile": "~Randolph_Linderman1;~Jingyang_Zhang2;~Nathan_Inkawhich1;~Hai_Li1;~Yiran_Chen1", "aff": "Duke University;Electrical and Computer Engineering, Duke University;Air Force Research Laboratory;Duke University;Duke University", "aff_domain": "duke.edu;duke.edu;us.af.mil;duke.edu;duke.edu", "position": "PhD student;PhD student;Researcher;Professor;Professor", "bibtex": "@misc{\nlinderman2023finegrain,\ntitle={Fine-grain Inference on Out-of-Distribution Data with Hierarchical Classification},\nauthor={Randolph Linderman and Jingyang Zhang and Nathan Inkawhich and Hai Li and Yiran Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=6s8XPvu7bI8}\n}", "github": "", "project": "", "reviewers": "NPqs;Wf9S;XEo4;KPxY", "site": "https://openreview.net/forum?id=6s8XPvu7bI8", "pdf_size": 2573854, "recommendation": "3;5;6;8", "confidence": "2;4;2;4", "correctness": "4;3;3;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;3;1;3", "wc_summary_paper": "165;52;50;80", "wc_strength_and_weaknesses": "119;262;80;199", "wc_clarity_quality_novelty_and_reproducibility": "66;70;98;38", "wc_summary_review": "23;33;47;88", "wc_review": "373;417;275;405", "wc_reply_reviewers": "0;83;0;0", "wc_reply_authors": "390;729;947;110", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.0, 1.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 86.75, 46.70853776345391 ], "wc_strength_and_weaknesses_avg": [ 165.0, 70.5443123150265 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.0, 21.2602916254693 ], "wc_summary_review_avg": [ 47.75, 24.752525123712125 ], "wc_review_avg": [ 367.5, 55.774097930849585 ], "wc_reply_reviewers_avg": [ 20.75, 35.94005425705421 ], "wc_reply_authors_avg": [ 544.0, 319.6505904890526 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5547001962252291, "corr_recommendation_correctness": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9793850610669941824&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Duke University;Air Force Research Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.duke.edu;https://www.afrl.af.mil/", "aff_unique_abbr": "Duke;AFRL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "6sQr2-BlARv", "title": "Learning Top-k Classification with Label Ranking", "track": "main", "status": "Reject", "tldr": "", "abstract": "Class confusability and multi-label nature of examples inevitably arise in classification tasks with the increasing number of classes, which poses a huge challenge to classification. To mitigate this problem, top-$k$ classification is proposed, where the classifier is allowed to predict $k$ label candidates and the prediction result is considered correct as long as the ground truth label is included in the $k$ labels. However, existing top-k classification methods neglect the ranking of the ground truth label among the predicted $k$ labels, which has high application value. In this paper, we propose a novel three-stage approach to learn top-$k$ classification with label ranking. We first propose an ensemble based relabeling method and relabel the training data with $k$ labels, which is used to train the top-$k$ classifier. We then propose a novel top-$k$ classification loss function that aims to improve the ranking of the ground truth label. Finally, we have conducted extensive experiments on four text datasets and four image datasets, and the experimental results show that our method could significantly improve the performance of existing methods.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Bin Cao;Kai Wang;JING FAN;Jianwei Yin", "authorids": "~Bin_Cao3;~Kai_Wang17;~JING_FAN2;~Jianwei_Yin1", "gender": "M;M;F;M", "homepage": "http://www.cs.zjut.edu.cn/staffs-en/bincao.html;;http://www.cs.zjut.edu.cn/staffs/jingfan.html;https://person.zju.edu.cn/0001038", "dblp": "17/1169-4;;;74/3786", "google_scholar": "m4CUeVAAAAAJ;https://scholar.google.com.hk/citations?user=avOwNSEAAAAJ;;0s1A5fwAAAAJ", "orcid": ";;;0000-0003-4703-7348", "linkedin": ";;;", "or_profile": "~Bin_Cao3;~Kai_Wang17;~JING_FAN2;~Jianwei_Yin1", "aff": "Zhejiang University of Technology;Zhejiang University of Technology;Zhejiang University of Technology;Zhejiang University", "aff_domain": "zjut.edu.cn;zjut.edu.cn;zjut.edu.cn;zju.edu.cn", "position": "Associate Professor;PhD student;Full Professor;Full Professor", "bibtex": "@misc{\ncao2023learning,\ntitle={Learning Top-k Classification with Label Ranking},\nauthor={Bin Cao and Kai Wang and JING FAN and Jianwei Yin},\nyear={2023},\nurl={https://openreview.net/forum?id=6sQr2-BlARv}\n}", "github": "", "project": "", "reviewers": "ALCH;Pewp;6iPJ;iYfY", "site": "https://openreview.net/forum?id=6sQr2-BlARv", "pdf_size": 1978363, "recommendation": "5;5;5;6", "confidence": "3;4;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;2;1;3", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "70;65;16;137", "wc_strength_and_weaknesses": "67;99;117;113", "wc_clarity_quality_novelty_and_reproducibility": "217;58;30;36", "wc_summary_review": "34;211;42;33", "wc_review": "388;433;205;319", "wc_reply_reviewers": "0;0;55;0", "wc_reply_authors": "209;566;419;254", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 72.0, 43.05229378325852 ], "wc_strength_and_weaknesses_avg": [ 99.0, 19.6468827043885 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 85.25, 76.77686826121524 ], "wc_summary_review_avg": [ 80.0, 75.71327492586752 ], "wc_review_avg": [ 336.25, 85.9691078236828 ], "wc_reply_reviewers_avg": [ 13.75, 23.81569860407206 ], "wc_reply_authors_avg": [ 362.0, 141.36654484000096 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8p0z9RlW7lcJ:scholar.google.com/&scioq=Learning+Top-k+Classification+with+Label+Ranking&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Zhejiang University of Technology;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.zjut.edu.cn;https://www.zju.edu.cn", "aff_unique_abbr": "ZJUT;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Editing models with task arithmetic", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12254", "id": "6t0Kwf8-jrj", "poster": "", "openreview": "https://openreview.net/forum?id=6t0Kwf8-jrj", "slides": "https://iclr.cc/virtual/2023/poster/12254", "video": "https://iclr.cc/virtual/2023/poster/12254", "author_site": "Gabriel Ilharco, Marco Tulio Ribeiro, Mitchell Wortsman, Ludwig Schmidt, Hannaneh Hajishirzi, Ali Farhadi", "tldr": "We study a new paradigm for editing pre-trained models, where weight vectors obtained via fine-tuning can be combined to efficiently and effectively steer model behavior.", "abstract": "Changing how pre-trained models behave---e.g., improving their performance on a downstream task or mitigating biases learned during pre-training---is a common practice when developing machine learning systems. In this work, we propose a new paradigm for steering the behavior of neural networks, centered around task vectors. A task vector specifies a direction in the weight space of a pre-trained model, such that movement in that direction improves performance on the task. We build task vectors by subtracting the weights of a pre-trained model from the weights of the same model after fine-tuning on a task. We show that these task vectors can be modified and combined together through arithmetic operations such as negation and addition, and the behavior of the resulting model is steered accordingly. Moreover, task vectors can be added together to improve performance on multiple tasks at once. Finally, when tasks are linked by an analogy relationship of the form ``A is to B as C is to D\", combining task vectors from three of the tasks can improve performance on the fourth, even when no data from the fourth task is used for training.", "keywords": "pre-trained models;model editing;model patching;fine-tuning;transfer learning;weight interpolation;merging models", "primary_area": "", "supplementary_material": "", "author": "Gabriel Ilharco;Marco Tulio Ribeiro;Mitchell Wortsman;Ludwig Schmidt;Hannaneh Hajishirzi;Ali Farhadi", "authorids": "~Gabriel_Ilharco1;~Marco_Tulio_Ribeiro1;~Mitchell_Wortsman1;~Ludwig_Schmidt1;~Hannaneh_Hajishirzi1;~Ali_Farhadi3", "gender": "M;M;M;M;F;M", "homepage": "http://gabrielilharco.com/;;https://mitchellnw.github.io/;http://people.csail.mit.edu/ludwigs/;https://homes.cs.washington.edu/~hannaneh/;https://homes.cs.washington.edu/~ali/", "dblp": "249/2616;21/10105;232/2273;141/2720;52/1296;37/5826", "google_scholar": "https://scholar.google.com/citations?hl=en;rmsIyGMAAAAJ;fzRnjFgAAAAJ;SWMKy70AAAAJ;LOV6_WIAAAAJ;jeOFRDsAAAAJ", "orcid": ";;;;;", "linkedin": ";;;ludwig-schmidt-87ba3612/;;", "or_profile": "~Gabriel_Ilharco1;~Marco_Tulio_Ribeiro1;~Mitchell_Wortsman1;~Ludwig_Schmidt1;~Hannaneh_Hajishirzi1;~Ali_Farhadi3", "aff": "Department of Computer Science, University of Washington;Microsoft;Google;Allen Institute for Artificial Intelligence;University of Washington;University of Washington", "aff_domain": "cs.washington.edu;microsoft.com;google.com;allenai.org;uw.edu;cs.uw.edu", "position": "PhD student;Researcher;Intern;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nilharco2023editing,\ntitle={Editing models with task arithmetic},\nauthor={Gabriel Ilharco and Marco Tulio Ribeiro and Mitchell Wortsman and Ludwig Schmidt and Hannaneh Hajishirzi and Ali Farhadi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6t0Kwf8-jrj}\n}", "github": "", "project": "", "reviewers": "t87y;gv8P;9yQa", "pdf_size": 7536093, "recommendation": "5;5;6", "confidence": "3;4;3", "correctness": "3;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "0;3;3", "wc_summary_paper": "99;103;59", "wc_strength_and_weaknesses": "286;152;136", "wc_clarity_quality_novelty_and_reproducibility": "9;14;58", "wc_summary_review": "47;66;41", "wc_review": "441;335;294", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "969;508;403", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 87.0, 19.86621923433512 ], "wc_strength_and_weaknesses_avg": [ 191.33333333333334, 67.25738290742181 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.0, 22.015146301277824 ], "wc_summary_review_avg": [ 51.333333333333336, 10.656244908763853 ], "wc_review_avg": [ 356.6666666666667, 61.93724422528195 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 626.6666666666666, 245.83237287947972 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.0, "gs_citation": 588, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11642314124177379139&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=6t0Kwf8-jrj", "email": "cs.washington.edu;microsoft.com;google.com;allenai.org;uw.edu;cs.uw.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;0;0", "aff_unique_norm": "University of Washington;Microsoft;Google;Allen Institute for Artificial Intelligence", "aff_unique_dep": "Department of Computer Science;Microsoft Corporation;Google;", "aff_unique_url": "https://www.washington.edu;https://www.microsoft.com;https://www.google.com;https://allenai.org", "aff_unique_abbr": "UW;Microsoft;Google;AI2", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Seattle;;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "6tPGEjCN4iI", "title": "TimelyFL: Heterogeneity-aware Asynchronous Federated Learning with Adaptive Partial Training", "track": "main", "status": "Reject", "tldr": "An inclusiveness asynchronous federated learning with adaptive partial training.", "abstract": "In cross-device Federated Learning (FL) environments, scaling synchronous FL methods is challenging as stragglers hinder the training process. Moreover, the availability of each client to join the training is highly variable over time due to system heterogeneities and intermittent connectivity. Recent asynchronous FL methods (e.g., FedBuff) have been proposed to overcome these issues by allowing slower users to continue their work on local training based on stale models and to contribute to aggregation when ready. However, we show empirically that this method can lead to a substantial drop in training accuracy as well as a slower convergence rate. The primary reason is that fast-speed devices contribute to many more rounds of aggregation while others join more intermittently or not at all, and with stale model updates. To overcome this barrier, we propose TimelyFL, a heterogeneity-aware asynchronous FL framework with adaptive partial training. During the training, TimelyFL adjusts the local training workload based on the real-time resource capabilities of each client, aiming to allow more available clients to join in the global update without staleness. We demonstrate the performance benefits of TimelyFL by conducting extensive experiments on various datasets (e.g., CIFAR-10, Google Speech, and Reddit) and models (e.g., ResNet20, VGG11, and ALBERT). We also validate the feasibility of TimelyFL by deploying it on an Android-based mobile device testbed. In comparison with the state-of-the-art (i.e., FedBuff), our evaluations reveal that TimelyFL improves participation rate by 21.13%, harvests 1.28x - 2.89x more efficiency on convergence rate, and provides a 6.25% increment on test accuracy.", "keywords": "Submodel Training;Federated Learning", "primary_area": "", "supplementary_material": "/attachment/f7124b4f5b27b377329c0f85c023e9c951003dc2.zip", "author": "Tuo Zhang;Lei Gao;Sunwoo Lee;Mi Zhang;Chaoyang He;Salman Avestimehr", "authorids": "~Tuo_Zhang2;~Lei_Gao3;~Sunwoo_Lee1;~Mi_Zhang1;~Chaoyang_He1;~Salman_Avestimehr1", "gender": "M;M;M;M;M;", "homepage": ";;https://sites.google.com/view/sunwoolee;https://mi-zhang.github.io/;http://chaoyanghe.com;", "dblp": ";;56/7811-1;84/2519-2.html;222/6721-1.html;", "google_scholar": "Rki45F4AAAAJ;TxzNHuIAAAAJ;WA9KNNcAAAAJ;https://scholar.google.com.tw/citations?user=r3A90uAAAAAJ;2z2camUAAAAJ;", "orcid": ";;0000-0001-6334-3068;;;", "linkedin": "tuo-zhang-ultraz/;;sunwoo-lee-90a7308a;mizhang/;;", "or_profile": "~Tuo_Zhang2;~Lei_Gao3;~Sunwoo_Lee1;~Mi_Zhang1;~Chaoyang_He1;~Salman_Avestimehr1", "aff": "University of Southern California;University of Southern California;Inha University;The Ohio State University;TensorOpera AI;", "aff_domain": "usc.edu;usc.edu;inha.ac.kr;osu.edu;tensoropera.ai;", "position": "PhD student;PhD student;Assistant Professor;Associate Professor;Researcher;", "bibtex": "@misc{\nzhang2023timelyfl,\ntitle={Timely{FL}: Heterogeneity-aware Asynchronous Federated Learning with Adaptive Partial Training },\nauthor={Tuo Zhang and Lei Gao and Sunwoo Lee and Mi Zhang and Chaoyang He and Salman Avestimehr},\nyear={2023},\nurl={https://openreview.net/forum?id=6tPGEjCN4iI}\n}", "github": "", "project": "", "reviewers": "L6W9;touo;21P2;rkLW", "site": "https://openreview.net/forum?id=6tPGEjCN4iI", "pdf_size": 7123415, "recommendation": "3;6;6;6", "confidence": "4;3;4;3", "correctness": "3;2;3;4", "technical_novelty": "1;2;2;3", "empirical_novelty": "1;3;2;3", "wc_summary_paper": "53;78;127;136", "wc_strength_and_weaknesses": "278;405;323;211", "wc_clarity_quality_novelty_and_reproducibility": "65;172;83;23", "wc_summary_review": "11;76;102;29", "wc_review": "407;731;635;399", "wc_reply_reviewers": "943;0;115;0", "wc_reply_authors": "2784;1269;1444;755", "reply_reviewers": "2;0;1;0", "reply_authors": "12;6;9;2", "recommendation_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 98.5, 34.311076928595526 ], "wc_strength_and_weaknesses_avg": [ 304.25, 70.51019429841334 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 85.75, 54.34783804347695 ], "wc_summary_review_avg": [ 54.5, 36.26637561157718 ], "wc_review_avg": [ 543.0, 144.08330923462302 ], "wc_reply_reviewers_avg": [ 264.5, 394.53548636339417 ], "wc_reply_authors_avg": [ 1563.0, 749.0497313262986 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 7.25, 3.6996621467371855 ], "replies_avg": [ 38, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14032456062219899291&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "University of Southern California;Inha University;Ohio State University;TensorOpera AI", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.usc.edu;https://www.inha.edu/;https://www.osu.edu;", "aff_unique_abbr": "USC;Inha;OSU;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;South Korea;" }, { "title": "Neural Networks Efficiently Learn Low-Dimensional Representations with SGD", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11421", "id": "6taykzqcPD", "poster": "/media/PosterPDFs/ICLR%202023/11421.png?t=1680477175.2751217", "openreview": "https://openreview.net/forum?id=6taykzqcPD", "slides": "https://iclr.cc/virtual/2023/poster/11421", "video": "https://iclr.cc/virtual/2023/poster/11421", "author_site": "Alireza Mousavi-Hosseini, Sejun Park, Manuela Girotti, Ioannis Mitliagkas, Murat A Erdogdu", "tldr": "We prove that SGD on neural networks can learn low-dimensional features in certain settings, and use this to derive novel generalization and excess risk bounds.", "abstract": "We study the problem of training a two-layer neural network (NN) of arbitrary width using stochastic gradient descent (SGD) where the input $\\boldsymbol{x}\\in \\mathbb{R}^d$ is Gaussian and the target $y \\in \\mathbb{R}$ follows a multiple-index model, i.e., $y=g(\\langle\\boldsymbol{u_1},\\boldsymbol{x}\\rangle,...,\\langle\\boldsymbol{u_k},\\boldsymbol{x}\\rangle)$ with a noisy link function $g$. We prove that the first-layer weights in the NN converge to the $k$-dimensional principal subspace spanned by the vectors $\\boldsymbol{u_1},...,\\boldsymbol{u_k}$ of the true model, when online SGD with weight decay is used for training. This phenomenon has several important consequences when $k \\ll d$. First, by employing uniform convergence on this smaller subspace, we establish a generalization error bound of $\\mathcal{O}(\\sqrt{{kd}/{T}})$ after $T$ iterations of SGD, which is independent of the width of the NN. We further demonstrate that, by recovering the principal direction, SGD-trained ReLU NNs can learn a single-index target of the form $y=f(\\langle\\boldsymbol{u},\\boldsymbol{x}\\rangle) + \\epsilon$ with a sample complexity linear in $d$ (up to log factors), where $f$ is a monotonic function with at most polynomial growth, and $\\epsilon$ is the noise. This is in contrast to the known $d^{\\Omega(p)}$ samples required to learn any degree $p$ polynomial in the kernel regime, and shows that SGD-trained NNs can outperform the Neural Tangent Kernel at initialization. Finally, we establish compressibility guarantees for NNs using that SGD produces an approximately rank-$k$ first-layer weight matrix.", "keywords": "feature learning;generalization;compressibility;sgd;neural networks", "primary_area": "", "supplementary_material": "", "author": "Alireza Mousavi-Hosseini;Sejun Park;Manuela Girotti;Ioannis Mitliagkas;Murat A Erdogdu", "authorids": "~Alireza_Mousavi-Hosseini1;~Sejun_Park1;~Manuela_Girotti1;~Ioannis_Mitliagkas1;~Murat_A_Erdogdu1", "gender": ";F;M;M;M", "homepage": ";https://mathemanu.github.io/;http://mitliagkas.github.io/;http://www.cs.toronto.edu/~erdogdu/;https://www.cs.toronto.edu/~mousavi/", "dblp": "155/9882;;83/8757;139/1292;296/4041", "google_scholar": ";P69Py8IAAAAJ;K757SxgAAAAJ;Lqc4cdAAAAAJ;", "orcid": ";0000-0003-2261-1251;;;", "linkedin": ";mathemanu/;;;", "or_profile": "~Sejun_Park1;~Manuela_Girotti1;~Ioannis_Mitliagkas1;~Murat_A_Erdogdu1;~Alireza_Mousavi1", "aff": "Korea University;Concordia University, Montreal;Mila - Quebec AI Institute;Vector Institute;Department of Computer Science, University of Toronto", "aff_domain": "korea.ac.kr;concordia.ca;mila.quebec;vectorinstitute.ai;cs.toronto.edu", "position": "Assistant Professor;Affiliate Assistant Professor;Principal Researcher;Faculty;PhD student", "bibtex": "@inproceedings{\nmousavi-hosseini2023neural,\ntitle={Neural Networks Efficiently Learn Low-Dimensional Representations with {SGD}},\nauthor={Alireza Mousavi-Hosseini and Sejun Park and Manuela Girotti and Ioannis Mitliagkas and Murat A Erdogdu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6taykzqcPD}\n}", "github": "", "project": "", "reviewers": "vyfX;qmRw;k8J3", "pdf_size": 982316, "recommendation": "6;8;8", "confidence": "3;3;3", "correctness": "3;4;4", "technical_novelty": "3;3;3", "empirical_novelty": "3;0;0", "wc_summary_paper": "26;63;284", "wc_strength_and_weaknesses": "182;195;480", "wc_clarity_quality_novelty_and_reproducibility": "5;64;62", "wc_summary_review": "17;24;64", "wc_review": "230;346;890", "wc_reply_reviewers": "0;31;30", "wc_reply_authors": "225;359;780", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 1.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 124.33333333333333, 113.90736977425513 ], "wc_strength_and_weaknesses_avg": [ 285.6666666666667, 137.5168676522589 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.666666666666664, 27.35365098523819 ], "wc_summary_review_avg": [ 35.0, 20.704266871026046 ], "wc_review_avg": [ 488.6666666666667, 287.7097302645305 ], "wc_reply_reviewers_avg": [ 20.333333333333332, 14.383632673594278 ], "wc_reply_authors_avg": [ 454.6666666666667, 236.46047355859616 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 71, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11875551295892975315&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "pdf": "https://openreview.net/pdf?id=6taykzqcPD", "email": "korea.ac.kr;concordia.ca;mila.quebec;vectorinstitute.ai;cs.toronto.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Korea University;Concordia University;Quebec AI Institute;Vector Institute;University of Toronto", "aff_unique_dep": ";;AI Institute;;Department of Computer Science", "aff_unique_url": "https://www.korea.ac.kr;https://www.concordia.ca;https://mila.quebec;https://vectorinstitute.ai/;https://www.utoronto.ca", "aff_unique_abbr": "KU;Concordia;Mila;Vector Institute;U of T", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Montreal;Toronto", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "South Korea;Canada" }, { "title": "Interpretable Geometric Deep Learning via Learnable Randomness Injection", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10915", "id": "6u7mf9s2A9", "poster": "", "openreview": "https://openreview.net/forum?id=6u7mf9s2A9", "slides": "https://iclr.cc/virtual/2023/poster/10915", "video": "https://iclr.cc/virtual/2023/poster/10915", "author_site": "Siqi Miao, Yunan Luo, Mia Liu, Pan Li", "tldr": "", "abstract": "Point cloud data is ubiquitous in scientific fields. Recently, geometric deep learning (GDL) has been widely applied to solve prediction tasks with such data. However, GDL models are often complicated and hardly interpretable, which poses concerns to scientists who are to deploy these models in scientific analysis and experiments. This work proposes a general mechanism, learnable randomness injection (LRI), which allows building inherently interpretable models based on general GDL backbones. LRI-induced models, once trained, can detect the points in the point cloud data that carry information indicative of the prediction label. We also propose four datasets from real scientific applications that cover the domains of high-energy physics and biochemistry to evaluate the LRI mechanism. Compared with previous post-hoc interpretation methods, the points detected by LRI align much better and stabler with the ground-truth patterns that have actual scientific meanings. LRI is grounded by the information bottleneck principle, and thus LRI-induced models are also more robust to distribution shifts between training and test scenarios. Our code and datasets are available at https://github.com/Graph-COM/LRI.", "keywords": "Geometric Deep Learning;Interpretation;Graph Neural Networks", "primary_area": "", "supplementary_material": "/attachment/d915aef28bea5b8176a0a2549b1c18bca62d5b19.zip", "author": "Siqi Miao;Yunan Luo;Mia Liu;Pan Li", "authorids": "~Siqi_Miao1;~Yunan_Luo1;liu3173@purdue.edu;~Pan_Li2", "gender": ";;;", "homepage": "https://siqi.plus/;https://faculty.cc.gatech.edu/~yunan/;;", "dblp": "312/7014-1;225/8950;;https://dblp.org/pers/hd/l/Li_0005:Pan", "google_scholar": "bVF_CzUAAAAJ;N8RBFoAAAAAJ;;IroP0EwAAAAJ", "orcid": ";0000-0001-7728-6412;;", "linkedin": ";;;pan-li-b951105a/", "or_profile": "~Siqi_Miao1;~Yunan_Luo1;liu3173@purdue.edu;~Pan_Li2", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;;Purdue University", "aff_domain": "gatech.edu;gatech.edu;;purdue.edu", "position": "PhD student;Assistant Professor;;Assistant Professor", "bibtex": "@inproceedings{\nmiao2023interpretable,\ntitle={Interpretable Geometric Deep Learning via Learnable Randomness Injection},\nauthor={Siqi Miao and Yunan Luo and Mia Liu and Pan Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6u7mf9s2A9}\n}", "github": "", "project": "", "reviewers": "Wziv;vQZw;HwR5;sW6V", "pdf_size": 3436514, "recommendation": "6;6;8;8", "confidence": "4;3;3;4", "correctness": "3;3;3;4", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "47;42;119;105", "wc_strength_and_weaknesses": "272;167;359;76", "wc_clarity_quality_novelty_and_reproducibility": "37;38;46;60", "wc_summary_review": "17;28;32;11", "wc_review": "373;275;556;252", "wc_reply_reviewers": "82;0;139;0", "wc_reply_authors": "2607;819;1497;438", "reply_reviewers": "1;0;1;0", "reply_authors": "7;3;4;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 78.25, 34.15680752061 ], "wc_strength_and_weaknesses_avg": [ 218.5, 106.72511419530082 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.25, 9.202581159652981 ], "wc_summary_review_avg": [ 22.0, 8.396427811873332 ], "wc_review_avg": [ 364.0, 119.80191985106082 ], "wc_reply_reviewers_avg": [ 55.25, 58.810607036486196 ], "wc_reply_authors_avg": [ 1340.25, 823.8602354161778 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.75, 2.165063509461097 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6558103168934843552&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=6u7mf9s2A9", "email": "gatech.edu;gatech.edu;;purdue.edu", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Georgia Institute of Technology;Purdue University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.purdue.edu", "aff_unique_abbr": "Georgia Tech;Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "6uv5W_DXvRr", "title": "Linearised Implicit Variational Inference", "track": "main", "status": "Reject", "tldr": "A novel bound for training implicit variational approximations for Bayesian Neural Networks", "abstract": "Bayesian neural networks (BNNs) are touted for robustness under data drift, resilience to overfitting and catastrophic forgetting whilst also producing actionable uncertainty estimates. In variational inference, these elegant properties are contingent on the expressivity of the variational approximation. Posteriors over parameters of large models are usually multimodal and highly correlated and hence cannot be well-approximated by simple, prescribed densities. We posit implicit variational distributions specified using differentiable generators are more flexible and propose a novel bound for training BNNs using such approximations (amortized neural samplers). The proposed bound uses an approximation of the variational distribution's entropy by locally linearising the generator. Unlike existing works, our method does not require a discriminator network and moves away from an unfavourable adversarial objective. Our formulation resembles normalizing flows but does not necessitate invertibility of the generator. Moreover, we use a differentiable numerical lower bound on the Jacobians of the generator, mitigating computational concerns. We report log-likelihoods on UCI datasets competitive with deep ensembles and test our method on out-of-distribution benchmarks.", "keywords": "Implicit models;Variational Inference;Bayesian Deep Learning", "primary_area": "", "supplementary_material": "", "author": "Anshuk Uppal;Wouter Boomsma;Jes Frellsen", "authorids": "~Anshuk_Uppal1;~Wouter_Boomsma1;~Jes_Frellsen1", "gender": "M;M;M", "homepage": "https://uppalanshuk.github.io/;;https://frellsen.org", "dblp": ";06/5945;83/8247", "google_scholar": "XBi06jkAAAAJ;EwqU_jsAAAAJ;Yj2sBWkAAAAJ", "orcid": ";0000-0002-8257-3827;0000-0001-9224-1271", "linkedin": "anshuk-uppal-58011a134/;;frellsen/", "or_profile": "~Anshuk_Uppal1;~Wouter_Boomsma1;~Jes_Frellsen1", "aff": "Technical University of Denmark;University of Copenhagen;Technical University of Denmark", "aff_domain": "dtu.dk;ku.dk;dtu.dk", "position": "PhD student;Full Professor;Associate Professor", "bibtex": "@misc{\nuppal2023linearised,\ntitle={Linearised Implicit Variational Inference},\nauthor={Anshuk Uppal and Wouter Boomsma and Jes Frellsen},\nyear={2023},\nurl={https://openreview.net/forum?id=6uv5W_DXvRr}\n}", "github": "", "project": "", "reviewers": "xXZT;mGTw;9D7T", "site": "https://openreview.net/forum?id=6uv5W_DXvRr", "pdf_size": 477158, "recommendation": "3;3;5", "confidence": "4;4;4", "correctness": "2;3;3", "technical_novelty": "3;2;2", "empirical_novelty": "2;2;3", "wc_summary_paper": "86;61;90", "wc_strength_and_weaknesses": "311;420;447", "wc_clarity_quality_novelty_and_reproducibility": "79;17;224", "wc_summary_review": "170;13;57", "wc_review": "646;511;818", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 79.0, 12.832251036613439 ], "wc_strength_and_weaknesses_avg": [ 392.6666666666667, 58.78964383479647 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 106.66666666666667, 86.74227471205849 ], "wc_summary_review_avg": [ 80.0, 66.1261420821347 ], "wc_review_avg": [ 658.3333333333334, 125.63527459365154 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7kGITpEiUT0J:scholar.google.com/&scioq=Linearised+Implicit+Variational+Inference&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Technical University of Denmark;University of Copenhagen", "aff_unique_dep": ";", "aff_unique_url": "https://www.tek.dk;https://www.ku.dk", "aff_unique_abbr": "DTU;UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Denmark" }, { "title": "MEDFAIR: Benchmarking Fairness for Medical Imaging", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11452", "id": "6ve2CkeQe5S", "poster": "/media/PosterPDFs/ICLR%202023/11452.png?t=1682161086.7581112", "openreview": "https://openreview.net/forum?id=6ve2CkeQe5S", "slides": "https://iclr.cc/virtual/2023/poster/11452", "video": "https://iclr.cc/virtual/2023/poster/11452", "author_site": "Yongshuo Zong, Yongxin Yang, Timothy Hospedales", "tldr": "We develop a fairness benchmark for medical imaging and find that the state-of-the-art bias mitigation algorithm does not significantly outperform ERM.", "abstract": "A multitude of work has shown that machine learning-based medical diagnosis systems can be biased against certain subgroups of people. This has motivated a growing number of bias mitigation algorithms that aim to address fairness issues in machine learning. However, it is difficult to compare their effectiveness in medical imaging for two reasons. First, there is little consensus on the criteria to assess fairness. Second, existing bias mitigation algorithms are developed under different settings, e.g., datasets, model selection strategies, backbones, and fairness metrics, making a direct comparison and evaluation based on existing results impossible. In this work, we introduce MEDFAIR, a framework to benchmark the fairness of machine learning models for medical imaging. MEDFAIR covers eleven algorithms from various categories, ten datasets from different imaging modalities, and three model selection criteria. Through extensive experiments, we find that the under-studied issue of model selection criterion can have a significant impact on fairness outcomes; while in contrast, state-of-the-art bias mitigation algorithms do not significantly improve fairness outcomes over empirical risk minimization (ERM) in both in-distribution and out-of-distribution settings. We evaluate fairness from various perspectives and make recommendations for different medical application scenarios that require different ethical principles. Our framework provides a reproducible and easy-to-use entry point for the development and evaluation of future bias mitigation algorithms in deep learning. Code is available at https://github.com/ys-zong/MEDFAIR.", "keywords": "Fairness;Bias Mitigation;Medical Imaging;Benchmark", "primary_area": "", "supplementary_material": "/attachment/ec90176413847af39ce2ee80f1a7df4702f58aa3.zip", "author": "Yongshuo Zong;Yongxin Yang;Timothy Hospedales", "authorids": "~Yongshuo_Zong1;~Yongxin_Yang1;~Timothy_Hospedales1", "gender": ";M;M", "homepage": "https://ys-zong.github.io/;http://homepages.inf.ed.ac.uk/thospeda/;", "dblp": ";32/3545;150/4258", "google_scholar": "38-dM-MAAAAJ;https://scholar.google.fr/citations?user=nHhtvqkAAAAJ;https://scholar.google.co.uk/citations?user=F7PtrL8AAAAJ", "orcid": ";0000-0003-4867-7486;", "linkedin": ";timothyhospedales/;", "or_profile": "~Yongshuo_Zong1;~Timothy_Hospedales1;~Yongxin_Yang3", "aff": "University of Edinburgh;Samsung AI Research Centre;Queen Mary University of London", "aff_domain": "ed.ac.uk;samsung.com;qmul.ac.uk", "position": "PhD student;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nzong2023medfair,\ntitle={{MEDFAIR}: Benchmarking Fairness for Medical Imaging},\nauthor={Yongshuo Zong and Yongxin Yang and Timothy Hospedales},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6ve2CkeQe5S}\n}", "github": "", "project": "", "reviewers": "q4yd;sgSV;zqn8;aeZu", "pdf_size": 1305888, "recommendation": "5;6;8;8", "confidence": "5;4;4;4", "correctness": "3;3;4;4", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "26;49;112;52", "wc_strength_and_weaknesses": "163;199;179;136", "wc_clarity_quality_novelty_and_reproducibility": "97;19;17;14", "wc_summary_review": "30;25;24;59", "wc_review": "316;292;332;261", "wc_reply_reviewers": "0;0;0;16", "wc_reply_authors": "1906;948;1033;308", "reply_reviewers": "0;0;0;1", "reply_authors": "4;2;2;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 59.75, 31.799174517587716 ], "wc_strength_and_weaknesses_avg": [ 169.25, 23.047505287991584 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.75, 34.83084121866711 ], "wc_summary_review_avg": [ 34.5, 14.326548781894402 ], "wc_review_avg": [ 300.25, 26.76167969317322 ], "wc_reply_reviewers_avg": [ 4.0, 6.928203230275509 ], "wc_reply_authors_avg": [ 1048.75, 568.7676920325205 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.7777777777777777, "corr_recommendation_correctness": 0.9622504486493761, "gs_citation": 83, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6426199858304000749&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=6ve2CkeQe5S", "email": "ed.ac.uk;samsung.com;qmul.ac.uk", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Edinburgh;Samsung;Queen Mary University of London", "aff_unique_dep": ";AI Research;", "aff_unique_url": "https://www.ed.ac.uk;https://www.samsung.com/global/researchers/samsung-ai-research-centre/;https://www.qmul.ac.uk", "aff_unique_abbr": "Edinburgh;SARC;QMUL", "aff_campus_unique_index": "1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;South Korea" }, { "title": "Beyond calibration: estimating the grouping loss of modern neural networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11792", "id": "6w1k-IixnL8", "poster": "/media/PosterPDFs/ICLR%202023/11792.png?t=1682975140.5679522", "openreview": "https://openreview.net/forum?id=6w1k-IixnL8", "slides": "https://iclr.cc/virtual/2023/poster/11792", "video": "https://iclr.cc/virtual/2023/poster/11792", "author_site": "Alexandre Perez-Lebel, Marine Le Morvan, Gael Varoquaux", "tldr": "We provide an estimator to evaluate confidence scores beyond calibration, revealing the subgroups heterogeneities that undermine individual predicted probabilities.", "abstract": "The ability to ensure that a classifier gives reliable confidence scores is essential to ensure informed decision-making. To this end, recent work has focused on miscalibration, i.e., the over or under confidence of model scores. Yet calibration is not enough: even a perfectly calibrated classifier with the best possible accuracy can have confidence scores that are far from the true posterior probabilities. This is due to the grouping loss, created by samples with the same confidence scores but different true posterior probabilities. Proper scoring rule theory shows that given the calibration loss, the missing piece to characterize individual errors is the grouping loss. While there are many estimators of the calibration loss, none exists for the grouping loss in standard settings. Here, we propose an estimator to approximate the grouping loss. We show that modern neural network architectures in vision and NLP exhibit grouping loss, notably in distribution shifts settings, which highlights the importance of pre-production validation.", "keywords": "calibration;grouping loss;decision making;model evaluation", "primary_area": "", "supplementary_material": "/attachment/6598f68d671e925a146b995100f714e3ad4e33e3.zip", "author": "Alexandre Perez-Lebel;Marine Le Morvan;Gael Varoquaux", "authorids": "~Alexandre_Perez-Lebel1;~Marine_Le_Morvan2;~Gael_Varoquaux1", "gender": ";F;M", "homepage": ";https://marinelm.github.io/;http://gael-varoquaux.info", "dblp": ";202/2253;36/7585", "google_scholar": "A0M2jTMAAAAJ;wbTRhwcAAAAJ;https://scholar.google.fr/citations?user=OGGu384AAAAJ", "orcid": "my-orcid?orcid=0000-0003-0556-0763;;", "linkedin": "perez-alexandre/;;", "or_profile": "~Alexandre_Perez-Lebel1;~Marine_Le_Morvan2;~Gael_Varoquaux1", "aff": "INRIA;INRIA;INRIA", "aff_domain": "inria.fr;inria.fr;inria.fr", "position": "PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nperez-lebel2023beyond,\ntitle={Beyond calibration: estimating the grouping loss of modern neural networks},\nauthor={Alexandre Perez-Lebel and Marine Le Morvan and Gael Varoquaux},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6w1k-IixnL8}\n}", "github": "", "project": "", "reviewers": "rpCw;sCnp;LCym", "pdf_size": 6195479, "recommendation": "3;8;8", "confidence": "5;3;2", "correctness": "4;4;4", "technical_novelty": "2;4;3", "empirical_novelty": "2;4;3", "wc_summary_paper": "61;40;104", "wc_strength_and_weaknesses": "113;422;282", "wc_clarity_quality_novelty_and_reproducibility": "466;31;59", "wc_summary_review": "13;38;168", "wc_review": "653;531;613", "wc_reply_reviewers": "209;133;147", "wc_reply_authors": "1457;1257;777", "reply_reviewers": "1;1;1", "reply_authors": "3;3;2", "recommendation_avg": [ 6.333333333333333, 2.357022603955158 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 68.33333333333333, 26.637484032009397 ], "wc_strength_and_weaknesses_avg": [ 272.3333333333333, 126.33377308630587 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 185.33333333333334, 198.79023003044074 ], "wc_summary_review_avg": [ 73.0, 67.94605703546503 ], "wc_review_avg": [ 599.0, 50.78057371344544 ], "wc_reply_reviewers_avg": [ 163.0, 33.0252428706689 ], "wc_reply_authors_avg": [ 1163.6666666666667, 285.345794120436 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9449111825230683, "corr_recommendation_correctness": 0.0, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8806679321330250984&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=6w1k-IixnL8", "email": "inria.fr;inria.fr;inria.fr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "SGDA with shuffling: faster convergence for nonconvex-P\u0141 minimax optimization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11501", "id": "6xXtM8bFFJ", "poster": "/media/PosterPDFs/ICLR%202023/11501.png?t=1682774858.6094716", "openreview": "https://openreview.net/forum?id=6xXtM8bFFJ", "slides": "https://iclr.cc/virtual/2023/poster/11501", "video": "https://iclr.cc/virtual/2023/poster/11501", "author_site": "Hanseul Cho, Chulhee Yun", "tldr": "We study the convergence bounds of (mini-batch) SGDA with random reshuffling for nonconvex-P\u0141 and primal-P\u0141-P\u0141 problems.", "abstract": "Stochastic gradient descent-ascent (SGDA) is one of the main workhorses for solving finite-sum minimax optimization problems. Most practical implementations of SGDA randomly reshuffle components and sequentially use them (i.e., without-replacement sampling); however, there are few theoretical results on this approach for minimax algorithms, especially outside the easier-to-analyze (strongly-)monotone setups. To narrow this gap, we study the convergence bounds of SGDA with random reshuffling (SGDA-RR) for smooth nonconvex-nonconcave objectives with Polyak-{\\L}ojasiewicz (P{\\L}) geometry. We analyze both simultaneous and alternating SGDA-RR for nonconvex-P{\\L} and primal-P{\\L}-P{\\L} objectives, and obtain convergence rates faster than with-replacement SGDA. Our rates extend to mini-batch SGDA-RR, recovering known rates for full-batch gradient descent-ascent (GDA). Lastly, we present a comprehensive lower bound for GDA with an arbitrary step-size ratio, which matches the full-batch upper bound for the primal-P{\\L}-P{\\L} case.", "keywords": "minimax optimization;SGDA;without-replacement sampling;random reshuffling;Polyak-\u0141ojasiewicz", "primary_area": "", "supplementary_material": "", "author": "Hanseul Cho;Chulhee Yun", "authorids": "~Hanseul_Cho1;~Chulhee_Yun1", "gender": "M;M", "homepage": "https://hanseuljo.github.io/;https://chulheeyun.github.io/", "dblp": "233/5755-2;138/0148.html", "google_scholar": "IczOXwsAAAAJ;Ukl64ggAAAAJ", "orcid": "0009-0001-0410-0290;", "linkedin": "hanseul-cho-66b01a260/;", "or_profile": "~Hanseul_Cho1;~Chulhee_Yun1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\ncho2023sgda,\ntitle={{SGDA} with shuffling: faster convergence for nonconvex-P{\\L} minimax optimization},\nauthor={Hanseul Cho and Chulhee Yun},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6xXtM8bFFJ}\n}", "github": "", "project": "", "reviewers": "CLz9;9Nrg;8wv3;s6TB", "pdf_size": 4616406, "recommendation": "6;6;8;8", "confidence": "4;4;3;4", "correctness": "4;4;4;4", "technical_novelty": "3;2;3;4", "empirical_novelty": "0;0;0;0", "wc_summary_paper": "32;64;62;66", "wc_strength_and_weaknesses": "148;62;42;224", "wc_clarity_quality_novelty_and_reproducibility": "8;36;13;8", "wc_summary_review": "22;447;207;109", "wc_review": "210;609;324;407", "wc_reply_reviewers": "0;623;0;0", "wc_reply_authors": "583;3293;874;134", "reply_reviewers": "0;2;0;0", "reply_authors": "1;6;2;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 56.0, 13.92838827718412 ], "wc_strength_and_weaknesses_avg": [ 119.0, 72.53275122315436 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 16.25, 11.583932838203095 ], "wc_summary_review_avg": [ 196.25, 158.87632768918093 ], "wc_review_avg": [ 387.5, 145.75750409498647 ], "wc_reply_reviewers_avg": [ 155.75, 269.76691327885266 ], "wc_reply_authors_avg": [ 1221.0, 1224.969999632644 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 2.0615528128088303 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16039842393787333051&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=6xXtM8bFFJ", "email": "kaist.ac.kr;kaist.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "6yaLHYv5L91", "title": "The Ultimate Combo: Boosting Adversarial Example Transferability by Composing Data Augmentations", "track": "main", "status": "Reject", "tldr": "We comprehensively studied data-augmentation methods for enhancing the transferability of adversarial examples, finding compositions that work best, and advancing the state of the art.", "abstract": "Transferring adversarial examples from surrogate (ML) models to evade target models is a common method for evaluating adversarial robustness in black-box settings. Researchers have invested substantial efforts to enhance transferability. Chiefly, attacks leveraging data augmentation have been found to help adversarial examples generalize better from surrogate to target models. Still, prior work has explored a limited set of augmentation techniques and their composition. To fill the gap, we conducted a systematic, comprehensive study of how data augmentation affects transferability. Particularly, we explored ten augmentation techniques of six categories originally proposed to help ML models generalize to unseen benign samples, and assessed how they influence transferability, both when applied individually and when composed. Our extensive experiments with the ImageNet dataset showed that simple color-space augmentations (e.g., color to greyscale) outperform the state of the art when combined with standard augmentations, such as translation and scaling. Additionally, except for two methods that may harm transferability, we found that composing augmentation methods impacts transferability monotonically (i.e., more methods composed $\\rightarrow$ $\\ge$transferability)---the best composition we found significantly outperformed the state of the art (e.g., 95.6% vs. 90.9% average transferability from normally trained surrogates to other normally trained models). We provide intuitive, empirically supported explanations for why certain augmentations fail to improve transferability.", "keywords": "Adversarial machine learning;transferability;evasion;black-box attacks", "primary_area": "", "supplementary_material": "", "author": "Zebin Yun;Achi-Or Weingarten;Eyal Ronen;Mahmood Sharif", "authorids": "~Zebin_Yun1;~Achi-Or_Weingarten1;~Eyal_Ronen1;~Mahmood_Sharif2", "gender": "M;M;;M", "homepage": "https://yundaqwe.github.io/;;https://eyalro.net;https://mahmoods01.github.io/", "dblp": ";189/1578.html;180/7297.html;136/8393.html", "google_scholar": ";;jDyCADyMIQgC;nen9rA4AAAAJ", "orcid": ";;0000-0002-6013-7426;0000-0001-7661-2220", "linkedin": ";;;mahmood-sharif/", "or_profile": "~Zebin_Yun1;~Achi-Or_Weingarten1;~Eyal_Ronen1;~Mahmood_Sharif2", "aff": "Southern University of Science and Technology;Weizmann Institute of Science;Tel Aviv University;Tel Aviv University", "aff_domain": "sustech.edu;weizmann.ac.il;tau.ac.il;tau.ac.il", "position": "Undergrad student;MS student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nyun2023the,\ntitle={The Ultimate Combo: Boosting Adversarial Example Transferability by Composing Data Augmentations},\nauthor={Zebin Yun and Achi-Or Weingarten and Eyal Ronen and Mahmood Sharif},\nyear={2023},\nurl={https://openreview.net/forum?id=6yaLHYv5L91}\n}", "github": "", "project": "", "reviewers": "fNhP;T7Qz;KVr6;DKMA", "site": "https://openreview.net/forum?id=6yaLHYv5L91", "pdf_size": 1317995, "recommendation": "3;3;3;3", "confidence": "5;3;4;4", "correctness": "3;2;4;3", "technical_novelty": "1;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "31;19;81;49", "wc_strength_and_weaknesses": "176;207;486;193", "wc_clarity_quality_novelty_and_reproducibility": "41;31;33;32", "wc_summary_review": "40;33;114;22", "wc_review": "288;290;714;296", "wc_reply_reviewers": "77;0;0;0", "wc_reply_authors": "479;607;825;444", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 45.0, 23.366642891095847 ], "wc_strength_and_weaknesses_avg": [ 265.5, 127.77812801884366 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.25, 3.960744879438715 ], "wc_summary_review_avg": [ 52.25, 36.22412869897632 ], "wc_review_avg": [ 397.0, 183.04371062672436 ], "wc_reply_reviewers_avg": [ 19.25, 33.34197804570089 ], "wc_reply_authors_avg": [ 588.75, 149.28559039639424 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:25JwV78-N_EJ:scholar.google.com/&scioq=The+Ultimate+Combo:+Boosting+Adversarial+Example+Transferability+by+Composing+Data+Augmentations&hl=en&as_sdt=0,48", "gs_version_total": 6, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Southern University of Science and Technology;Weizmann Institute of Science;Tel Aviv University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sustech.edu.cn;https://www.weizmann.org.il;https://www.tau.ac.il", "aff_unique_abbr": "SUSTech;Weizmann;TAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "China;Israel" }, { "title": "Parallel Deep Neural Networks Have Zero Duality Gap", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11942", "id": "6zrOr_Rdhjs", "poster": "/media/PosterPDFs/ICLR%202023/11942.png?t=1681778655.0613575", "openreview": "https://openreview.net/forum?id=6zrOr_Rdhjs", "slides": "https://iclr.cc/virtual/2023/poster/11942", "video": "https://iclr.cc/virtual/2023/poster/11942", "author_site": "Yifei Wang, Tolga Ergen, Mert Pilanci", "tldr": "", "abstract": "Training deep neural networks is a challenging non-convex optimization problem. Recent work has proven that the strong duality holds (which means zero duality gap) for regularized finite-width two-layer ReLU networks and consequently provided an equivalent convex training problem. However, extending this result to deeper networks remains to be an open problem. In this paper, we prove that the duality gap for deeper linear networks with vector outputs is non-zero. In contrast, we show that the zero duality gap can be obtained by stacking standard deep networks in parallel, which we call a parallel architecture, and modifying the regularization. Therefore, we prove the strong duality and existence of equivalent convex problems that enable globally optimal training of deep networks. As a by-product of our analysis, we demonstrate that the weight decay regularization on the network parameters explicitly encourages low-rank solutions via closed-form expressions. In addition, we show that strong duality holds for three-layer standard ReLU networks given rank-1 data matrices.", "keywords": "Deep neural networks;Convex duality;Convex optimization", "primary_area": "", "supplementary_material": "", "author": "Yifei Wang;Tolga Ergen;Mert Pilanci", "authorids": "~Yifei_Wang2;~Tolga_Ergen1;~Mert_Pilanci3", "gender": "M;M;M", "homepage": "http://web.stanford.edu/~wangyf18/;https://tolgaergen.github.io/;https://stanford.edu/~pilanci/", "dblp": ";202/7477.html;45/8056", "google_scholar": ";https://scholar.google.com.tr/citations?user=T1pWaCsAAAAJ;aSAS-aAAAAAJ", "orcid": ";0000-0003-4806-0224;", "linkedin": ";;mert-pilanci-ba615743/", "or_profile": "~Yifei_Wang2;~Tolga_Ergen1;~Mert_Pilanci3", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2023parallel,\ntitle={Parallel Deep Neural Networks Have Zero Duality Gap},\nauthor={Yifei Wang and Tolga Ergen and Mert Pilanci},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=6zrOr_Rdhjs}\n}", "github": "", "project": "", "reviewers": "Lv6H;BG6i;jwcy;RuMQ", "pdf_size": 409439, "recommendation": "3;6;6;8", "confidence": "4;3;3;3", "correctness": "3;3;4;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "1;0;0;0", "wc_summary_paper": "84;84;68;95", "wc_strength_and_weaknesses": "129;137;124;97", "wc_clarity_quality_novelty_and_reproducibility": "384;33;478;22", "wc_summary_review": "16;46;111;22", "wc_review": "613;300;781;236", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 0.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 82.75, 9.627434756984853 ], "wc_strength_and_weaknesses_avg": [ 121.75, 15.022899187573616 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 229.25, 204.50595957086435 ], "wc_summary_review_avg": [ 48.75, 37.652191171298384 ], "wc_review_avg": [ 482.5, 223.71913194896854 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.8892972917998875, "corr_recommendation_correctness": 0.08084520834544431, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9697613792468524971&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=6zrOr_Rdhjs", "email": "stanford.edu;stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "70-hEqC4Wo8", "title": "Accelerating spiking neural network training using the $d$-block model", "track": "main", "status": "Reject", "tldr": "We propose a new SNN model which obtains accelerated training and state-of-the-art performance across various neuromorphic datasets without the need of any regularisation and using less spikes compared to standard SNNs.", "abstract": "There is a growing interest in using spiking neural networks (SNNs) to study the brain \\textit{in silico} and in emulating them on neuromorphic computers due to their lower energy consumption compared to artificial neural networks (ANNs). Significant progress has been made in directly training SNNs to perform on a par with ANNs in terms of accuracy. However, these methods are slow due to their sequential nature and require careful network regularisation to avoid overfitting. We propose a new SNN model, the $d$-block model, with stochastic absolute refractory periods and recurrent conductance latencies, which reduces the number of sequential computations using fast vectorised operations. Our model obtains accelerated training speeds and state-of-the-art performance across various neuromorphic datasets without the need for any regularisation and using fewer spikes compared to standard SNNs.", "keywords": "spiking neural networks;accelerated training;stochastic refractory period;stochastic recurrent conductance latency", "primary_area": "", "supplementary_material": "", "author": "Luke Taylor;Andrew J King;Nicol Spencer Harper", "authorids": "~Luke_Taylor1;~Andrew_J_King1;~Nicol_Spencer_Harper1", "gender": ";M;", "homepage": ";;https://www.dpag.ox.ac.uk/team/nicol-harper", "dblp": "205/2581;;", "google_scholar": "https://scholar.google.co.za/citations?user=C3DoHSkAAAAJ;;https://scholar.google.co.uk/citations?user=GUALUxwAAAAJ", "orcid": ";0000-0001-5180-7179;", "linkedin": "luke-t-7963078a/;;", "or_profile": "~Luke_Taylor1;~Andrew_J_King1;~Nicol_Spencer_Harper1", "aff": "University of Oxford;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;oxford.ac.uk;ox.ac.uk", "position": "PhD student;Full Professor;Postdoc", "bibtex": "@misc{\ntaylor2023accelerating,\ntitle={Accelerating spiking neural network training using the \\$d\\$-block model},\nauthor={Luke Taylor and Andrew J King and Nicol Spencer Harper},\nyear={2023},\nurl={https://openreview.net/forum?id=70-hEqC4Wo8}\n}", "github": "", "project": "", "reviewers": "jrgn;Yn85;b7YL;D9Qe;ddei", "site": "https://openreview.net/forum?id=70-hEqC4Wo8", "pdf_size": 2854514, "recommendation": "3;3;3;5;6", "confidence": "3;4;4;4;4", "correctness": "3;2;3;3;4", "technical_novelty": "2;2;2;2;2", "empirical_novelty": "3;2;3;3;3", "wc_summary_paper": "92;54;33;65;34", "wc_strength_and_weaknesses": "40;169;195;156;80", "wc_clarity_quality_novelty_and_reproducibility": "36;46;53;45;43", "wc_summary_review": "14;20;22;64;14", "wc_review": "182;289;303;330;171", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 4.0, 1.2649110640673518 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_paper_avg": [ 55.6, 21.87784267243916 ], "wc_strength_and_weaknesses_avg": [ 128.0, 58.312948819280265 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.6, 5.4626001134990645 ], "wc_summary_review_avg": [ 26.8, 18.87220177933672 ], "wc_review_avg": [ 255.0, 65.5286197016235 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.39528470752104744, "corr_recommendation_correctness": 0.7499999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kD3UoO7RSncJ:scholar.google.com/&scioq=Accelerating+spiking+neural+network+training+using+the+%24d%24-block+model&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "70BaDC5ceIO", "title": "Neural Network Approximations of PDEs Beyond Linearity: Representational Perspective", "track": "main", "status": "Reject", "tldr": "", "abstract": "A burgeoning line of research has developed deep neural networks capable of approximating the solutions to high dimensional PDEs, opening related lines of theoretical inquiry focused on explaining how it is that these models appear to evade the curse of dimensionality. However, most theoretical analyses thus far have been limited to simple linear PDEs. In this work, we take a step towards studying the representational power of neural networks for approximating solutions to nonlinear PDEs. We focus on a class of PDEs known as nonlinear elliptic variational PDEs, whose solutions minimize an Euler-Lagrange energy functional $\\mathcal{E}(u) = \\int_\\Omega L(\\nabla u) dx$. We show that if composing a function with Barron norm $b$ with $L$ produces a function of Barron norm at most $B_L b^p$, the solution to the PDE can be $\\epsilon$-approximated in the $L^2$ sense by a function with Barron norm $O\\left(\\left(dB_L\\right)^{p^{\\log(1/\\epsilon)}}\\right)$. By a classical result due to \\cite{barron1993universal}, this correspondingly bounds the size of a 2-layer neural network needed to approximate the solution. Treating $p, \\epsilon, B_L$ as constants, this quantity is polynomial in dimension, thus showing neural networks can evade the curse of dimensionality. Our proof technique involves neurally simulating (preconditioned) gradient in an appropriate Hilbert space, which converges exponentially fast to the solution of the PDE, and such that we can bound the increase of the Barron norm at each iterate. Our results subsume and substantially generalize analogous prior results for linear elliptic PDEs. ", "keywords": "PDE;Partial Differential Equations;Deep Learning Theory;Universal Approximation", "primary_area": "", "supplementary_material": "/attachment/d5e5b310b8b9e13fbb5d5aef4fca2b1ce143a857.zip", "author": "Tanya Marwah;Zachary Chase Lipton;Jianfeng Lu;Andrej Risteski", "authorids": "~Tanya_Marwah1;~Zachary_Chase_Lipton1;~Jianfeng_Lu1;~Andrej_Risteski2", "gender": "F;Unspecified;M;M", "homepage": "https://tm157.github.io/;http://zacklipton.com;https://services.math.duke.edu/~jianfeng/;", "dblp": "190/7486;;82/6187-1.html;63/11143", "google_scholar": "_Y_XvN4AAAAJ;MN9Kfg8AAAAJ;ej9SRrAAAAAJ;", "orcid": ";;0000-0001-6255-5165;", "linkedin": ";;;", "or_profile": "~Tanya_Marwah1;~Zachary_Chase_Lipton1;~Jianfeng_Lu1;~Andrej_Risteski2", "aff": "Carnegie Mellon University;Carnegie Mellon University;Duke University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;duke.edu;cmu.edu", "position": "PhD student;Assistant Professor;Professor;Assistant Professor", "bibtex": "@misc{\nmarwah2023neural,\ntitle={Neural Network Approximations of {PDE}s Beyond Linearity: Representational Perspective},\nauthor={Tanya Marwah and Zachary Chase Lipton and Jianfeng Lu and Andrej Risteski},\nyear={2023},\nurl={https://openreview.net/forum?id=70BaDC5ceIO}\n}", "github": "", "project": "", "reviewers": "AUeL;23dt;N9yD;4yVB", "site": "https://openreview.net/forum?id=70BaDC5ceIO", "pdf_size": 358945, "recommendation": "3;6;6;8", "confidence": "3;2;3;4", "correctness": "3;3;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "0;3;0;0", "wc_summary_paper": "151;62;131;106", "wc_strength_and_weaknesses": "140;84;532;132", "wc_clarity_quality_novelty_and_reproducibility": "162;27;62;107", "wc_summary_review": "1077;38;105;63", "wc_review": "1530;211;830;408", "wc_reply_reviewers": "1615;10;47;0", "wc_reply_authors": "3273;335;550;120", "reply_reviewers": "8;1;1;0", "reply_authors": "10;2;2;1", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 0.75, 1.299038105676658 ], "wc_summary_paper_avg": [ 112.5, 33.23025729662652 ], "wc_strength_and_weaknesses_avg": [ 222.0, 180.2553743997665 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 89.5, 50.55937104039171 ], "wc_summary_review_avg": [ 320.75, 437.27701460287165 ], "wc_review_avg": [ 744.75, 505.51329112101496 ], "wc_reply_reviewers_avg": [ 418.0, 691.309988355441 ], "wc_reply_authors_avg": [ 1069.5, 1281.2428536386067 ], "reply_reviewers_avg": [ 2.5, 3.2015621187164243 ], "reply_authors_avg": [ 3.75, 3.6314597615834874 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.39605901719066966, "corr_recommendation_correctness": 0.7276068751089989, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15063349438366195415&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Carnegie Mellon University;Duke University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.duke.edu", "aff_unique_abbr": "CMU;Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "70_umOqc6_-", "title": "Motif-based Graph Representation Learning with Application to Chemical Molecules", "track": "main", "status": "Withdraw", "tldr": "We propose a motif-based representation learning method to better capture local structure information and demonstrate the performance and explainability advantages on molecular benchmarks. ", "abstract": "This work considers the task of representation learning on the attributed relational graph (ARG). Both the nodes and edges in an ARG are associated with attributes/features allowing ARGs to encode rich structural information widely observed in real applications. Existing graph neural networks offer limited ability to capture complex interactions within local structural contexts, which hinders them from taking advantage of the expression power of ARGs. We propose Motif Convolution Module (MCM), a new motif-based graph representation learning technique to better utilize local structural information. The ability to handle continuous edge and node features is one of MCM's advantages over existing motif-based models. MCM builds a motif vocabulary in an unsupervised way and deploys a novel motif convolution operation to extract the local structural context of individual nodes, which is then used to learn higher-level node representations via multilayer perceptron and/or message passing in graph neural networks. When compared with other graph learning approaches to classifying synthetic graphs, our approach is substantially better in capturing structural context. We also demonstrate the performance and explainability advantages of our approach by applying it to several molecular benchmarks.", "keywords": "Graph Neural Networks;Molecular Graph Representation", "primary_area": "", "supplementary_material": "/attachment/0bb33d24b77a68f8f95b585ad78406f54feb4f87.zip", "author": "Yifei Wang;Shiyang Chen;Guobin Chen;Ethan Shurberg;Hang Liu;Pengyu Hong", "authorids": "~Yifei_Wang3;~Shiyang_Chen1;~Guobin_Chen4;~Ethan_Shurberg1;~Hang_Liu3;~Pengyu_Hong1", "gender": "F;;M;;M;M", "homepage": "https://yifeiwang15.github.io;;;;http://www.cs.brandeis.edu/~hong/;https://asherliu.github.io/", "dblp": "00/555-2;;;;89/4734;43/6690-1.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;;https://scholar.google.com.tw/citations?user=pvDa8pcAAAAJ;TGDrPLOkJIIC", "orcid": "0000-0002-8295-5534;0000-0003-2626-7865;;;0000-0002-3177-2754;", "linkedin": "yifei-wang-4980691b8/;;guobin-chen-a197141a9/;ethan-shurberg-741568178/;;", "or_profile": "~Yifei_Wang3;~Shiyang_Chen1;~Guobin_Chen4;~Ethan_Shurberg1;~Pengyu_Hong1;~Hang_Liu2", "aff": "Brandeis University;Rutgers University;;Brandeis University;Brandeis University;Rutgers University", "aff_domain": "brandeis.edu;stevens.edu;;brandeis.edu;brandeis.edu;rutgers.edu", "position": "PhD student;PhD student;;Undergrad student;Full Professor;Assistant Professor", "bibtex": "@misc{\nwang2023motifbased,\ntitle={Motif-based Graph Representation Learning with Application to Chemical Molecules},\nauthor={Yifei Wang and Shiyang Chen and Guobin Chen and Ethan Shurberg and Hang Liu and Pengyu Hong},\nyear={2023},\nurl={https://openreview.net/forum?id=70_umOqc6_-}\n}", "github": "", "project": "", "reviewers": "q2Jg;VGPD;fhx7;kUW6", "site": "https://openreview.net/forum?id=70_umOqc6_-", "pdf_size": 5215717, "recommendation": "3;5;5;5", "confidence": "4;3;2;5", "correctness": "4;4;2;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "0;3;2;3", "wc_summary_paper": "24;40;21;173", "wc_strength_and_weaknesses": "136;127;308;1005", "wc_clarity_quality_novelty_and_reproducibility": "2;40;7;77", "wc_summary_review": "19;21;32;95", "wc_review": "181;228;368;1350", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 64.5, 63.05751343020116 ], "wc_strength_and_weaknesses_avg": [ 394.0, 360.05902293929535 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.5, 30.054117854297438 ], "wc_summary_review_avg": [ 41.75, 31.13980571551467 ], "wc_review_avg": [ 531.75, 477.3983530553913 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.2581988897471611, "corr_recommendation_correctness": -0.5222329678670935, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15859396423170566826&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff_unique_index": "0;1;0;0;1", "aff_unique_norm": "Brandeis University;Rutgers University", "aff_unique_dep": ";", "aff_unique_url": "https://www.brandeis.edu;https://www.rutgers.edu", "aff_unique_abbr": "Brandeis;Rutgers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "72ICa7Wb4ui", "title": "Automatic Clipping: Differentially Private Deep Learning Made Easier and Stronger", "track": "main", "status": "Reject", "tldr": "We propose automatic DP optimizers that do not need to tune the clipping threshold, with convergence proof and SOTA accuracy.", "abstract": "Per-example gradient clipping is a key algorithmic step that enables practical differential private (DP) training for deep learning models. The choice of clipping threshold $R$, however, is shown to be vital for achieving high accuracy under DP. We propose an easy-to-use replacement, called automatic clipping, that eliminates the need to tune $R$ for any DP optimizers, including DP-SGD, DP-Adam, DP-LAMB and many others.\nThe automatic variants are as private and computationally efficient as existing DP optimizers, but require no DP-specific hyperparameters and thus make DP training as amenable as the standard non-private training. We give a rigorous convergence analysis of automatic DP-SGD in the non-convex setting, which shows that it {can enjoy an asymptotic convergence rate that matches the standard SGD, under a symmetric gradient noise assumption of the per-sample gradients.} We also demonstrate on various language and vision tasks that automatic clipping outperforms or matches the state-of-the-art, and can be easily employed with minimal changes to existing codebases.", "keywords": "deep learning;differential privacy;per-sample gradient clipping;convergence", "primary_area": "", "supplementary_material": "/attachment/2ca597a3234ccd2c8268be3d02bc85e30c573e7d.zip", "author": "Zhiqi Bu;Yu-Xiang Wang;Sheng Zha;George Karypis", "authorids": "~Zhiqi_Bu1;~Yu-Xiang_Wang1;~Sheng_Zha1;~George_Karypis1", "gender": "M;;M;M", "homepage": "https://sites.google.com/view/zhiqi-bu;http://www.cs.ucsb.edu/~yuxiangw/publications.html;https://github.com/szha;", "dblp": "245/2573;62/1637-3.html;218/5471;", "google_scholar": "MEvTLxIAAAAJ;HGNZ1fkAAAAJ;;ElqwScwAAAAJ", "orcid": ";;;", "linkedin": ";;shengzha/;", "or_profile": "~Zhiqi_Bu1;~Yu-Xiang_Wang1;~Sheng_Zha1;~George_Karypis1", "aff": "Amazon;UC Santa Barbara;Amazon;University of Minnesota, Minneapolis", "aff_domain": "amazon.com;ucsb.edu;amazon.com;umn.edu", "position": "Researcher;Assistant Professor;Researcher;Full Professor", "bibtex": "@misc{\nbu2023automatic,\ntitle={Automatic Clipping: Differentially Private Deep Learning Made Easier and Stronger},\nauthor={Zhiqi Bu and Yu-Xiang Wang and Sheng Zha and George Karypis},\nyear={2023},\nurl={https://openreview.net/forum?id=72ICa7Wb4ui}\n}", "github": "", "project": "", "reviewers": "adTx;LbVy;6ygh", "site": "https://openreview.net/forum?id=72ICa7Wb4ui", "pdf_size": 512882, "recommendation": "3;5;6", "confidence": "4;4;3", "correctness": "3;3;3", "technical_novelty": "2;2;4", "empirical_novelty": "2;2;4", "wc_summary_paper": "51;67;297", "wc_strength_and_weaknesses": "140;517;458", "wc_clarity_quality_novelty_and_reproducibility": "51;121;57", "wc_summary_review": "53;46;101", "wc_review": "295;751;913", "wc_reply_reviewers": "209;0;0", "wc_reply_authors": "900;794;493", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_paper_avg": [ 138.33333333333334, 112.38426145249265 ], "wc_strength_and_weaknesses_avg": [ 371.6666666666667, 165.57442099819914 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 76.33333333333333, 31.678944988044595 ], "wc_summary_review_avg": [ 66.66666666666667, 24.44494948973214 ], "wc_review_avg": [ 653.0, 261.6409753842085 ], "wc_reply_reviewers_avg": [ 69.66666666666667, 98.52354484532562 ], "wc_reply_authors_avg": [ 729.0, 172.3968290504981 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.7559289460184545, "corr_recommendation_correctness": 0.0, "gs_citation": 80, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14351327138950702901&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Amazon;University of California, Santa Barbara;University of Minnesota", "aff_unique_dep": "Amazon.com, Inc.;;", "aff_unique_url": "https://www.amazon.com;https://www.ucsb.edu;https://www.minnesota.edu", "aff_unique_abbr": "Amazon;UCSB;UMN", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Santa Barbara;Minneapolis", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "72lzvXrKqqd", "title": "On the Importance of In-distribution Class Prior for Out-of-distribution Detection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Given a pre-trained in-distribution (ID) model, the task of inference-time out-of-distribution (OOD) detection methods aims to recognize upcoming OOD data in inference time. However, some representative methods share an unproven assumption that the probability that OOD data belong to every ID class should be the same, i.e., probabilities that OOD data belong to ID classes form a uniform distribution. In this paper, we theoretically and empirically show that this assumption makes these methods incapable of recognizing OOD data when the ID model is trained with class-imbalanced data. Fortunately, by analyzing the causal relations between ID/OOD classes and features, we identify several common scenarios where probabilities that OOD data belong to ID classes should be the ID-class-prior distribution. Based on the above finding, we propose two effective strategies to modify previous inference-time OOD detection methods: 1) if they explicitly use the uniform distribution, we can replace the uniform distribution with the ID-class-prior distribution; 2) otherwise, we can reweight their scores according to the similarity between the ID-class-prior distribution and the softmax outputs of the pre-trained model. Extensive experiments show that both strategies significantly improve the accuracy of recognizing OOD data when the ID model is pre-trained with imbalanced data. As a highlight, when evaluating on the iNaturalist dataset, our method can achieve ~36% increase on AUROC and ~61% decrease on FPR95, compared with the original Energy method, reflecting the importance of ID-class prior in the OOD detection, which lights up a new road to study this problem.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Xue Jiang;Feng Liu;Zhen Fang;Hong Chen;Tongliang Liu;Feng Zheng;Bo Han", "authorids": "~Xue_Jiang3;~Feng_Liu2;~Zhen_Fang2;~Hong_Chen1;~Tongliang_Liu1;~Feng_Zheng1;~Bo_Han1", "gender": "F;M;M;;M;M;M", "homepage": ";https://fengliu90.github.io/index.html;https://fang-zhen.github.io/index.html;https://chenhongml.github.io/;https://tongliang-liu.github.io/;http://faculty.sustech.edu.cn/fengzheng/;https://bhanml.github.io/", "dblp": ";77/1318-3;;https://dblp.uni-trier.de/pers/hd/c/Chen_0004:Hong;150/6667;39/800;241/0472-3", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;OzD6WJcAAAAJ;;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;PcmyXHMAAAAJ;nTNjqHwAAAAJ", "orcid": "0000-0003-2577-2296;0000-0002-5005-9129;0000-0003-0602-6255;;;0000-0002-1701-9141;", "linkedin": ";alexfengliu;;;;;", "or_profile": "~Xue_Jiang3;~Feng_Liu2;~Zhen_Fang2;~Hong_Chen1;~Tongliang_Liu1;~Feng_Zheng1;~bo_han2", "aff": "Southern University of Science and Technology;University of Melbourne;University of Technology Sydney;Huazhong Agricultural University;University of Sydney;Southern University of Science and Technology;RIKEN", "aff_domain": "sustech.edu.cn;unimelb.edu.au;uts.edu.au;hzau.edu.cn;sydney.edu.au;sustech.edu.cn;riken.jp", "position": "PhD student;Assistant Professor;Postdoc;Full Professor;Lecturer;Associate Professor;Adjunct Scientist", "bibtex": "@misc{\njiang2023on,\ntitle={On the Importance of In-distribution Class Prior for Out-of-distribution Detection},\nauthor={Xue Jiang and Feng Liu and Zhen Fang and Hong Chen and Tongliang Liu and Feng Zheng and Bo Han},\nyear={2023},\nurl={https://openreview.net/forum?id=72lzvXrKqqd}\n}", "github": "", "project": "", "reviewers": "G6VC;uGHp;dyEV;vuMZ", "site": "https://openreview.net/forum?id=72lzvXrKqqd", "pdf_size": 1066620, "recommendation": "3;6;6;8", "confidence": "3;3;4;4", "correctness": "2;4;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "22;102;51;137", "wc_strength_and_weaknesses": "634;111;284;311", "wc_clarity_quality_novelty_and_reproducibility": "83;51;38;39", "wc_summary_review": "24;21;42;216", "wc_review": "763;285;415;703", "wc_reply_reviewers": "986;0;0;0", "wc_reply_authors": "4555;753;1763;1004", "reply_reviewers": "4;0;0;0", "reply_authors": "17;11;13;12", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 78.0, 44.50280890011326 ], "wc_strength_and_weaknesses_avg": [ 335.0, 188.914001598611 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 52.75, 18.198557635153396 ], "wc_summary_review_avg": [ 75.75, 81.37067960881241 ], "wc_review_avg": [ 541.5, 198.07763629445904 ], "wc_reply_reviewers_avg": [ 246.5, 426.95052406572825 ], "wc_reply_authors_avg": [ 2018.75, 1510.7790002180993 ], "reply_reviewers_avg": [ 1.0, 1.7320508075688772 ], "reply_authors_avg": [ 13.25, 2.277608394786075 ], "replies_avg": [ 66, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.7001400420140049, "corr_recommendation_correctness": 0.5940885257860046, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ax56xWLjXooJ:scholar.google.com/&scioq=On+the+Importance+of+In-distribution+Class+Prior+for+Out-of-distribution+Detection&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4;0;5", "aff_unique_norm": "Southern University of Science and Technology;University of Melbourne;University of Technology Sydney;Huazhong Agricultural University;University of Sydney;RIKEN", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.sustech.edu.cn;https://www.unimelb.edu.au;https://www.uts.edu.au;http://www.hzau.edu.cn/;https://www.sydney.edu.au;https://www.riken.jp", "aff_unique_abbr": "SUSTech;UniMelb;UTS;HAU;USYD;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1;0;2", "aff_country_unique": "China;Australia;Japan" }, { "id": "73U_NlKaNx", "title": "Time Series Subsequence Anomaly Detection via Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "A graph neural network-based time series subsequence anomaly detection method consdering multiple effective heuristics. ", "abstract": "Time series subsequence anomaly detection is an important task in a large variety of real-world applications ranging from health monitoring to AIOps, and is challenging due to complicated underlying temporal dynamics and unpredictable anomalous patterns. Firstly, how to effectively learn the temporal dependency in time series remains a challenge. Secondly, diverse and complicated anomalous subsequences as well as the lack of labels make accurate detection difficult. For example, the popular subsequence anomaly detection algorithm---time series discord---fails to handle recurring anomalies. Thirdly, many existing algorithms require a proper subsequence length for effective detection, which is difficult or impossible in practice. In this paper, we present a novel approach to subsequence anomaly detection which combines practical heuristics of time series discords and temporal relationships with deep neural networks. By performing length selection considering multi-scale information and incorporating prior knowledge using graph neural networks, our method can adaptively learn the appropriate subsequence length as well as integrated representations from both priors and raw data favorable to anomaly detection. In particular, our graph incorporates both semantic and temporal relationships between subsequences. The experimental results demonstrate the effectiveness of the proposed algorithm, which achieves superior performance on multiple time series anomaly benchmarks in comparison with state-of-the-art algorithms.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Weiqi Chen;Zhiqiang Zhou;Qingsong Wen;Liang Sun", "authorids": "~Weiqi_Chen1;zhouzhiqiang.zzq@alibaba-inc.com;~Qingsong_Wen2;~Liang_Sun2", "gender": "M;;;M", "homepage": "https://github.com/DAMO-DI-ML;;;https://www.linkedin.com/in/liang-sun-a0a87621/", "dblp": ";;;18/5837-1", "google_scholar": "dMg_soMAAAAJ;;;D_cOMBgAAAAJ", "orcid": "0009-0007-9246-9402;;;0009-0002-5835-7259", "linkedin": ";;;", "or_profile": "~Weiqi_Chen1;zhouzhiqiang.zzq@alibaba-inc.com;~Qingsong_Wen2;~Liang_Sun2", "aff": "Alibaba Group;;;Alibaba Group", "aff_domain": "alibaba-inc.com;;;alibaba-inc.com", "position": "Researcher;;;Staff Software Engineer", "bibtex": "@misc{\nchen2023time,\ntitle={Time Series Subsequence Anomaly Detection via Graph Neural Networks},\nauthor={Weiqi Chen and Zhiqiang Zhou and Qingsong Wen and Liang Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=73U_NlKaNx}\n}", "github": "", "project": "", "reviewers": "s3os;8d5u;s2pX;GTLu", "site": "https://openreview.net/forum?id=73U_NlKaNx", "pdf_size": 3377719, "recommendation": "3;5;5;6", "confidence": "4;3;4;4", "correctness": "2;3;3;3", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "74;158;119;91", "wc_strength_and_weaknesses": "445;69;211;243", "wc_clarity_quality_novelty_and_reproducibility": "4;22;77;42", "wc_summary_review": "33;38;47;123", "wc_review": "556;287;454;499", "wc_reply_reviewers": "307;0;0;48", "wc_reply_authors": "1064;365;896;286", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;2;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 110.5, 31.784430150625635 ], "wc_strength_and_weaknesses_avg": [ 242.0, 134.25721582097552 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.25, 27.095894522971555 ], "wc_summary_review_avg": [ 60.25, 36.574410453211684 ], "wc_review_avg": [ 449.0, 100.27212972705826 ], "wc_reply_reviewers_avg": [ 88.75, 127.52132174660048 ], "wc_reply_authors_avg": [ 652.75, 333.76741527596727 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.13245323570650439, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17419554235237377231&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Alibaba Group", "aff_unique_dep": "", "aff_unique_url": "https://www.alibaba.com", "aff_unique_abbr": "Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Subquadratic Algorithms for Kernel Matrices via Kernel Density Estimation", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11134", "id": "74A-FDAyiL", "poster": "", "openreview": "https://openreview.net/forum?id=74A-FDAyiL", "slides": "https://iclr.cc/virtual/2023/poster/11134", "video": "https://iclr.cc/virtual/2023/poster/11134", "author_site": "Ainesh Bakshi, Piotr Indyk, Praneeth Kacham, Sandeep Silwal, Samson Zhou", "tldr": "We give a framework for using recently developed tools for kernel density estimation to solve downstream kernel problems in sub-quadratic time.", "abstract": "Kernel matrices, as well as weighted graphs represented by them, are ubiquitous objects in machine learning, statistics and other related fields. The main drawback of using kernel methods (learning and inference using kernel matrices) is efficiency -- given $n$ input points, most kernel-based algorithms need to materialize the full $n \\times n$ kernel matrix before performing any subsequent computation, thus incurring $\\Omega(n^2)$ runtime. Breaking this quadratic barrier for various problems has therefore, been a subject of extensive research efforts. \n\nWe break the quadratic barrier and obtain \\emph{subquadratic} time algorithms for several fundamental linear-algebraic and graph processing primitives, including approximating the top eigenvalue and eigenvector, spectral sparsification, solving linear systems, local clustering, low-rank approximation, arboricity estimation and counting weighted triangles. We build on the recently developed Kernel Density Estimation framework, which (after preprocessing in time subquadratic in $n$) can return estimates of row/column sums of the kernel matrix. In particular, we develop efficient reductions from \\emph{weighted vertex} and \\emph{weighted edge sampling} on kernel graphs, \\emph{simulating random walks} on kernel graphs, and \\emph{importance sampling} on matrices to Kernel Density Estimation and show that we can generate samples from these distributions in \\emph{sublinear} (in the support of the distribution) time. Our reductions are the central ingredient in each of our applications and we believe they may be of independent interest. We empirically demonstrate the efficacy of our algorithms on low-rank approximation (LRA) and spectral sparsification, where we observe a $\\textbf{9x}$ decrease in the number of kernel evaluations over baselines for LRA and a $\\textbf{41x}$ reduction in the graph size for spectral sparsification.", "keywords": "kernel density estimation;sublinear time algorithms", "primary_area": "", "supplementary_material": "/attachment/a36c54baf74da5bbc443f56a7881b0e97742cab7.zip", "author": "Ainesh Bakshi;Piotr Indyk;Praneeth Kacham;Sandeep Silwal;Samson Zhou", "authorids": "~Ainesh_Bakshi1;~Piotr_Indyk1;~Praneeth_Kacham1;~Sandeep_Silwal1;~Samson_Zhou1", "gender": "M;;M;M;", "homepage": "http://aineshbakshi.com/;https://people.csail.mit.edu/indyk/;https://www.praneethkacham.com;https://sandeepsilwal.com;https://samsonzhou.github.io/", "dblp": "132/1905;i/PiotrIndyk;255/5684;225/4637;179/2683", "google_scholar": ";oOwNKsAAAAAJ;hKhPmTkAAAAJ;MnDnUvcAAAAJ;NpjsgocAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Ainesh_Bakshi1;~Piotr_Indyk1;~Praneeth_Kacham1;~Sandeep_Silwal1;~Samson_Zhou1", "aff": "School of Computer Science, Carnegie Mellon University;Massachusetts Institute of Technology;Carnegie Mellon University;Massachusetts Institute of Technology;University of California, Berkeley", "aff_domain": "cs.cmu.edu;mit.edu;cmu.edu;mit.edu;berkeley.edu", "position": "PhD student;Full Professor;PhD student;PhD student;Postdoc", "bibtex": "@inproceedings{\nbakshi2023subquadratic,\ntitle={Subquadratic Algorithms for Kernel Matrices via Kernel Density Estimation},\nauthor={Ainesh Bakshi and Piotr Indyk and Praneeth Kacham and Sandeep Silwal and Samson Zhou},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=74A-FDAyiL}\n}", "github": "", "project": "", "reviewers": "dkUW;vw4c;Eddv", "pdf_size": 1524190, "recommendation": "8;8;8", "confidence": "3;3;4", "correctness": "4;3;4", "technical_novelty": "3;4;3", "empirical_novelty": "2;2;0", "wc_summary_paper": "51;127;87", "wc_strength_and_weaknesses": "96;401;169", "wc_clarity_quality_novelty_and_reproducibility": "58;62;111", "wc_summary_review": "36;565;16", "wc_review": "241;1155;383", "wc_reply_reviewers": "9;513;0", "wc_reply_authors": "666;1845;87", "reply_reviewers": "1;3;0", "reply_authors": "3;4;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 88.33333333333333, 31.04119127152751 ], "wc_strength_and_weaknesses_avg": [ 222.0, 130.03332906092447 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 77.0, 24.097026095903757 ], "wc_summary_review_avg": [ 205.66666666666666, 254.21819149873775 ], "wc_review_avg": [ 593.0, 401.60013280210285 ], "wc_reply_reviewers_avg": [ 174.0, 239.73735628808456 ], "wc_reply_authors_avg": [ 866.0, 731.5011961712707 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6881553754487689173&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=74A-FDAyiL", "email": "cs.cmu.edu;mit.edu;cmu.edu;mit.edu;berkeley.edu", "author_num": 5, "aff_unique_index": "0;1;0;1;2", "aff_unique_norm": "Carnegie Mellon University;Massachusetts Institute of Technology;University of California, Berkeley", "aff_unique_dep": "School of Computer Science;;", "aff_unique_url": "https://www.cmu.edu;https://web.mit.edu;https://www.berkeley.edu", "aff_unique_abbr": "CMU;MIT;UC Berkeley", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Pittsburgh;;Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Group Importance using the Differentiable Hypergeometric Distribution", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10707", "id": "75O7S_L4oY", "poster": "/media/PosterPDFs/ICLR%202023/10707.png?t=1682575109.4512513", "openreview": "https://openreview.net/forum?id=75O7S_L4oY", "slides": "https://iclr.cc/virtual/2023/poster/10707", "video": "https://iclr.cc/virtual/2023/poster/10707", "author_site": "Thomas Sutter, Laura Manduchi, Alain Ryser, Julia E Vogt", "tldr": "We propose the differentiable hypergeometric distribution and show the advantage of explicitly learning subset sizes.", "abstract": "Partitioning a set of elements into subsets of a priori unknown sizes is essential in many applications. These subset sizes are rarely explicitly learned - be it the cluster sizes in clustering applications or the number of shared versus independent generative latent factors in weakly-supervised learning. Probability distributions over correct combinations of subset sizes are non-differentiable due to hard constraints, which prohibit gradient-based optimization. In this work, we propose the differentiable hypergeometric distribution. The hypergeometric distribution models the probability of different group sizes based on their relative importance. We introduce reparameterizable gradients to learn the importance between groups and highlight the advantage of explicitly learning the size of subsets in two typical applications: weakly-supervised learning and clustering. In both applications, we outperform previous approaches, which rely on suboptimal heuristics to model the unknown size of groups.", "keywords": "hypergeometric distribution;weakly-supervised learning;reparameterization trick;group importance;variational clustering;gumbel softmax", "primary_area": "", "supplementary_material": "/attachment/3a50a5ec4d177ec36491d983c78f270688dc764f.zip", "author": "Thomas M. Sutter;Laura Manduchi;Alain Ryser;Julia E Vogt", "authorids": "~Thomas_M._Sutter1;~Laura_Manduchi2;~Alain_Ryser1;~Julia_E_Vogt1", "gender": "F;M;F;", "homepage": "https://mds.inf.ethz.ch/team/detail/laura-manduchi/;https://mds.inf.ethz.ch/team/detail/alain-ryser;http://mds.inf.ethz.ch;https://mds.inf.ethz.ch/", "dblp": "249/9257;230/3590;13/8412;259/0609", "google_scholar": ";https://scholar.google.ch/citations?user=l9tQ2agAAAAJ;UoeV-8kAAAAJ;eySN1UkAAAAJ", "orcid": ";;;", "linkedin": ";alain-r-0554441b5/;julia-vogt-50b53895;", "or_profile": "~Laura_Manduchi2;~Alain_Ryser1;~Julia_E_Vogt1;~Thomas_Marco_Sutter1", "aff": "Swiss Federal Institute of Technology;ETHZ - ETH Zurich;Swiss Federal Institute of Technology;ETH Zurich", "aff_domain": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "position": "PhD student;PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nsutter2023learning,\ntitle={Learning Group Importance using the Differentiable Hypergeometric Distribution},\nauthor={Thomas M. Sutter and Laura Manduchi and Alain Ryser and Julia E Vogt},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=75O7S_L4oY}\n}", "github": "", "project": "", "reviewers": "yMNr;xbJ8;uGmy;gdaG", "pdf_size": 2377022, "recommendation": "6;8;8;8", "confidence": "2;3;3;2", "correctness": "4;4;4;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;3;4;4", "wc_summary_paper": "64;97;16;143", "wc_strength_and_weaknesses": "82;96;93;147", "wc_clarity_quality_novelty_and_reproducibility": "27;42;117;81", "wc_summary_review": "16;13;19;90", "wc_review": "189;248;245;461", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 2.5, 0.5 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 80.0, 46.39504283864818 ], "wc_strength_and_weaknesses_avg": [ 104.5, 25.084855989221865 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.75, 35.074028853269766 ], "wc_summary_review_avg": [ 34.5, 32.113081446662825 ], "wc_review_avg": [ 285.75, 103.87342056560956 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18110733746755532103&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=75O7S_L4oY", "email": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Swiss Federal Institute of Technology;ETH Zurich", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "id": "77aKxP46geN", "title": "Dateformer: Transformer Extends Look-back Horizon to Predict Longer-term Time Series", "track": "main", "status": "Reject", "tldr": "We propose (1) splitting time series into patches thereby enabling vanilla Transformer to predict long-term series; (2)tapping whole training set time series to break information bottlenecks. Our work surpasses SOTA by 33.6% on 7 real-world datasets.", "abstract": "Transformers have demonstrated impressive strength in long-term series forecasting. Existing prediction research mostly focused on mapping past short sub-series (lookback window) to future series (forecast window). The longer training dataset time series will be discarded, once training is completed. Models can merely rely on lookback window information for inference, which impedes models from analyzing time series from a global perspective. And these windows used by Transformers are quite narrow because they must model each time-step therein. Under this point-wise processing style, broadening windows will rapidly exhaust their model capacity. This, for fine-grained time series, leads to a bottleneck in information input and prediction output, which is mortal to long-term series forecasting. To overcome the barrier, we propose a brand-new methodology to use Transformer for time series prediction. Specifically, we split time series into patches by day and reform point-wise to patch-wise processing, which considerably enhances the information input and output of Transformers. To further help models leverage the whole training set's global information during inference, we distill the information, store it in time representations, and replace series with time representations as the main modeling entities. Our designed time-modeling Transformer---Dateformer yields state-of-the-art accuracy on 7 real-world datasets with a 33.6% relative improvement and extends the maximum forecast range to half-year.", "keywords": "Time-series forecasting;Long-term forecasting;Transformer;Time-modeling method", "primary_area": "", "supplementary_material": "", "author": "Julong Young;Junhui Chen;Feihu Huang;Jian Peng", "authorids": "~Julong_Young1;~Junhui_Chen1;~Feihu_Huang2;~Jian_Peng5", "gender": "M;F;M;M", "homepage": "https://scholar.google.com/citations?user=3Jee5d8AAAAJ&hl=zh-CN;https://cs.scu.edu.cn/info/1287/16231.htm;https://cs.scu.edu.cn/info/1285/13597.htm;https://github.com/sakurfall", "dblp": "149/0983;;29/4181-2;", "google_scholar": "3Jee5d8AAAAJ;;;", "orcid": "0000-0002-7467-6324;;0000-0001-5831-2240; 0000-0003-1015-8592", "linkedin": ";;;", "or_profile": "~Junhui_Chen1;~Feihu_Huang2;~jian_peng4;~Julong_Yang1", "aff": "Sichuan University;;Sichuan University;Sichuan University", "aff_domain": "scu.edu.cn;;scu.edu.cn;scu.edu.cn", "position": "PhD student;;Full Professor;MS student", "bibtex": "@misc{\nyoung2023dateformer,\ntitle={Dateformer: Transformer Extends Look-back Horizon to Predict Longer-term Time Series},\nauthor={Julong Young and Junhui Chen and Feihu Huang and Jian Peng},\nyear={2023},\nurl={https://openreview.net/forum?id=77aKxP46geN}\n}", "github": "", "project": "", "reviewers": "Xz1K;atRd;Q1u4;VVpa", "site": "https://openreview.net/forum?id=77aKxP46geN", "pdf_size": 2209124, "recommendation": "5;6;6;6", "confidence": "3;3;4;3", "correctness": "2;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "29;46;201;22", "wc_strength_and_weaknesses": "154;356;456;48", "wc_clarity_quality_novelty_and_reproducibility": "172;87;179;50", "wc_summary_review": "20;67;111;198", "wc_review": "375;556;947;318", "wc_reply_reviewers": "183;0;47;0", "wc_reply_authors": "2665;1302;1722;925", "reply_reviewers": "1;0;1;0", "reply_authors": "8;4;4;4", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 74.5, 73.55440163579607 ], "wc_strength_and_weaknesses_avg": [ 253.5, 160.9681645543615 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 122.0, 55.131660595342126 ], "wc_summary_review_avg": [ 99.0, 65.59344479443048 ], "wc_review_avg": [ 549.0, 246.01321102737552 ], "wc_reply_reviewers_avg": [ 57.5, 74.95498649189392 ], "wc_reply_authors_avg": [ 1653.5, 648.4768692867926 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 5.0, 1.7320508075688772 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9pzM5_l4GuMJ:scholar.google.com/&scioq=Dateformer:+Transformer+Extends+Look-back+Horizon+to+Predict+Longer-term+Time+Series&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Sichuan University", "aff_unique_dep": "", "aff_unique_url": "https://www.scu.edu.cn", "aff_unique_abbr": "SCU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Variational Information Pursuit for Interpretable Predictions", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11063", "id": "77lSWa-Tm3Z", "poster": "/media/PosterPDFs/ICLR%202023/11063.png?t=1682665923.5050147", "openreview": "https://openreview.net/forum?id=77lSWa-Tm3Z", "slides": "https://iclr.cc/virtual/2023/poster/11063", "video": "https://iclr.cc/virtual/2023/poster/11063", "author_site": "Aditya Chattopadhyay, Kwan Ho Ryan Chan, Benjamin Haeffele, Donald Geman, Rene Vidal", "tldr": "A Framework for Interpretable ML", "abstract": "There is a growing interest in the machine learning community in developing predictive algorithms that are interpretable by design. To this end, recent work proposes to sequentially ask interpretable queries about data until a high confidence prediction can be made based on the answers obtained (the history). To promote short query-answer chains, a greedy procedure called Information Pursuit (IP) is used, which adaptively chooses queries in order of information gain. Generative models are employed to learn the distribution of query-answers and labels, which is in turn used to estimate the most informative query. However, learning and inference with a full generative model of the data is often intractable for complex tasks. In this work, we propose Variational Information Pursuit (V-IP), a variational characterization of IP which bypasses the need to learn generative models. V-IP is based on finding a query selection strategy and a classifier that minimize the expected cross-entropy between true and predicted labels. We prove that the IP strategy is the optimal solution to this problem. Therefore, instead of learning generative models, we can use our optimal strategy to directly pick the most informative query given any history. We then develop a practical algorithm by defining a finite-dimensional parameterization of our strategy and classifier using deep networks and train them end-to-end using our objective. Empirically, V-IP is 10-100x faster than IP on different Vision and NLP tasks with competitive performance. Moreover, V-IP finds much shorter query chains when compared to reinforcement learning which is typically used in sequential-decision-making problems. Finally, we demonstrate the utility of V-IP on challenging tasks like medical diagnosis where the performance is far superior to the generative modeling approach.", "keywords": "Interpretable ML;Explainable AI;Information Pursuit", "primary_area": "", "supplementary_material": "", "author": "Aditya Chattopadhyay;Kwan Ho Ryan Chan;Benjamin David Haeffele;Donald Geman;Rene Vidal", "authorids": "~Aditya_Chattopadhyay1;~Kwan_Ho_Ryan_Chan1;~Benjamin_David_Haeffele1;~Donald_Geman2;~Rene_Vidal1", "gender": "M;M;;M;", "homepage": ";https://ryanchankh.github.io/;;http://www.cis.jhu.edu/people/faculty/geman/;http://www.vision.jhu.edu", "dblp": "207/8574;267/5496;;;v/ReneVidal", "google_scholar": "aekzv1gAAAAJ;DBXWBqcAAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": ";ryanchankh/;;;rene-vidal-74844928/", "or_profile": "~Aditya_Chattopadhyay1;~Kwan_Ho_Ryan_Chan1;~Benjamin_David_Haeffele1;~Donald_Geman2;~Rene_Vidal1", "aff": "Johns Hopkins University;University of Pennsylvania ;;Johns Hopkins University;Amazon", "aff_domain": "jhu.edu;seas.upenn.edu;;jh.edu;amazon.com", "position": "PhD student;PhD student;;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nchattopadhyay2023variational,\ntitle={Variational Information Pursuit for Interpretable Predictions},\nauthor={Aditya Chattopadhyay and Kwan Ho Ryan Chan and Benjamin David Haeffele and Donald Geman and Rene Vidal},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=77lSWa-Tm3Z}\n}", "github": "", "project": "", "reviewers": "VaCA;G1eQ;jPMz", "pdf_size": 16610504, "recommendation": "6;8;8", "confidence": "3;3;4", "correctness": "3;4;4", "technical_novelty": "2;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "271;220;65", "wc_strength_and_weaknesses": "373;230;272", "wc_clarity_quality_novelty_and_reproducibility": "302;41;207", "wc_summary_review": "89;39;39", "wc_review": "1035;530;583", "wc_reply_reviewers": "0;11;0", "wc_reply_authors": "2272;467;1226", "reply_reviewers": "0;1;0", "reply_authors": "4;1;2", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 185.33333333333334, 87.59883307188261 ], "wc_strength_and_weaknesses_avg": [ 291.6666666666667, 60.01296156294534 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 183.33333333333334, 107.85896140588206 ], "wc_summary_review_avg": [ 55.666666666666664, 23.570226039551585 ], "wc_review_avg": [ 716.0, 226.60244188151782 ], "wc_reply_reviewers_avg": [ 3.6666666666666665, 5.185449728701348 ], "wc_reply_authors_avg": [ 1321.6666666666667, 739.9866365159727 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.49999999999999983, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3332453611956449585&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=77lSWa-Tm3Z", "email": "jhu.edu;seas.upenn.edu;;jh.edu;amazon.com", "author_num": 5, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Johns Hopkins University;University of Pennsylvania;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.jhu.edu;https://www.upenn.edu;https://www.amazon.com", "aff_unique_abbr": "JHU;UPenn;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "78IUEPOGjG6", "title": "Mitigating Forgetting in Online Continual Learning via Contrasting Semantically Distinct Augmentations", "track": "main", "status": "Withdraw", "tldr": "Leverage the strong data augmentation to mitigate catastrophic forgetting", "abstract": "Online continual learning (OCL) aims to enable model learning from a non-stationary data stream to continuously acquire new knowledge as well as retain the learnt one, under the constraints of having limited system size and computational cost, in which the main challenge comes from the \"catastrophic forgetting\" issue -- the inability to well remember the learnt knowledge while learning the new ones. With the specific focus on the class-incremental OCL scenario, i.e. OCL for classification, the recent advance incorporates the contrastive learning technique for learning more generalised feature representation to achieve the state-of-the-art performance but is still unable to fully resolve the catastrophic forgetting. In this paper, we follow the strategy of adopting contrastive learning but further introduce the semantically distinct augmentation technique, in which it leverages strong augmentation to generate more data samples, and we show that considering these samples semantically different from their original classes (thus being related to the out-of-distribution samples) in the contrastive learning mechanism contributes to alleviate forgetting and facilitate model stability. Moreover, in addition to contrastive learning, the typical classification mechanism and objective (i.e. softmax classifier and cross-entropy loss) are included in our model design for faster convergence and utilising the label information, but particularly equipped with a sampling strategy to tackle the tendency of favouring the new classes (i.e. model bias towards the recently learnt classes). Upon conducting extensive experiments on CIFAR-10, CIFAR-100, and Mini-Imagenet datasets, our proposed method is shown to achieve superior performance against various baselines.", "keywords": "continual learning;representation learning;memory replay", "primary_area": "", "supplementary_material": "", "author": "Sheng-Feng Yu;Wei-Chen Chiu", "authorids": "~Sheng-Feng_Yu1;~Wei-Chen_Chiu3", "gender": "M;M", "homepage": ";https://walonchiu.github.io/", "dblp": "152/6568;148/9413", "google_scholar": "0f0pRGIAAAAJ;FiFOBS8AAAAJ", "orcid": ";0000-0001-7715-8306", "linkedin": ";", "or_profile": "~Sheng_Feng_Yu1;~Wei-chen_Chiu2", "aff": "National Chiao Tung University;National Chiao Tung University", "aff_domain": "nctu.edu.tw;nctu.edu.tw", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nyu2023mitigating,\ntitle={Mitigating Forgetting in Online Continual Learning via Contrasting Semantically Distinct Augmentations},\nauthor={Sheng-Feng Yu and Wei-Chen Chiu},\nyear={2023},\nurl={https://openreview.net/forum?id=78IUEPOGjG6}\n}", "github": "", "project": "", "reviewers": "fB63;7pUt;2TzN;VXMJ", "site": "https://openreview.net/forum?id=78IUEPOGjG6", "pdf_size": 3074207, "recommendation": "3;5;5;5", "confidence": "4;4;4;4", "correctness": "3;3;3;3", "technical_novelty": "1;2;2;3", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "22;90;112;69", "wc_strength_and_weaknesses": "135;292;280;304", "wc_clarity_quality_novelty_and_reproducibility": "21;87;21;17", "wc_summary_review": "57;54;62;49", "wc_review": "235;523;475;439", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 73.25, 33.26691299174001 ], "wc_strength_and_weaknesses_avg": [ 252.75, 68.51049189722696 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.5, 29.201883500897676 ], "wc_summary_review_avg": [ 55.5, 4.716990566028302 ], "wc_review_avg": [ 418.0, 109.77704678119193 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=785274654827376483&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "National Chiao Tung University", "aff_unique_dep": "", "aff_unique_url": "https://www.nctu.edu.tw", "aff_unique_abbr": "NCTU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Sparse tree-based Initialization for Neural Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11753", "id": "78xgBm6ckZr", "poster": "/media/PosterPDFs/ICLR%202023/11753.png?t=1682874517.015519", "openreview": "https://openreview.net/forum?id=78xgBm6ckZr", "slides": "https://iclr.cc/virtual/2023/poster/11753", "video": "https://iclr.cc/virtual/2023/poster/11753", "author_site": "Patrick Lutz, Ludovic Arnould, Claire Boyer, Erwan Scornet", "tldr": "", "abstract": "Dedicated neural network (NN) architectures have been designed to handle specific data types (such as CNN for images or RNN for text), which ranks them among state-of-the-art methods for dealing with these data. Unfortunately, no architecture has been found for dealing with tabular data yet, for which tree ensemble methods (tree boosting, random forests) usually show the best predictive performances. In this work, we propose a new sparse initialization technique for (potentially deep) multilayer perceptrons (MLP): we first train a tree-based procedure to detect feature interactions and use the resulting information to initialize the network, which is subsequently trained via standard gradient descent (GD) strategies. Numerical experiments on several tabular data sets showthe benefits of this new, simple and easy-to-use method, both in terms of generalization capacity and computation time, compared to default MLP initialization and even to existing complex deep learning solutions. In fact, this wise MLP initialization raises the performances of the resulting NN methods to that of gradient boosting on tabular data. Besides, such initializations are able to preserve the sparsity of weights introduced in the first layers of the network throughout the training, which emphasizes that the first layers act as a sparse feature extractor (like convolutional layers in CNN).", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Patrick Lutz;Ludovic Arnould;Claire Boyer;Erwan Scornet", "authorids": "~Patrick_Lutz1;~Ludovic_Arnould1;~Claire_Boyer1;~Erwan_Scornet1", "gender": "Not Specified;M;;M", "homepage": ";https://ludovic-arnould.github.io/;https://www.imo.universite-paris-saclay.fr/~claire.boyer/;https://erwanscornet.github.io/", "dblp": ";277/6031;;176/1062", "google_scholar": ";;;", "orcid": ";;;", "linkedin": "patrick-lutz-bb0b84174/;;;", "or_profile": "~Patrick_Lutz1;~Ludovic_Arnould1;~Claire_Boyer1;~Erwan_Scornet1", "aff": "Boston University, Boston University;LPSM, Sorbonne Universite;Sorbonne Universit\u00e9 ;Ecole polytechnique", "aff_domain": "bu.edu;sorbonne-universite.fr;sorbonne-universite.fr;polytechnique.edu", "position": "PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nlutz2023sparse,\ntitle={Sparse tree-based Initialization for Neural Networks},\nauthor={Patrick Lutz and Ludovic Arnould and Claire Boyer and Erwan Scornet},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=78xgBm6ckZr}\n}", "github": "", "project": "", "reviewers": "c3Rb;znja;mdb9", "pdf_size": 3060271, "recommendation": "5;6;8", "confidence": "2;4;3", "correctness": "3;3;3", "technical_novelty": "1;3;3", "empirical_novelty": "1;3;3", "wc_summary_paper": "101;55;94", "wc_strength_and_weaknesses": "196;283;108", "wc_clarity_quality_novelty_and_reproducibility": "16;71;329", "wc_summary_review": "82;41;53", "wc_review": "395;450;584", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "290;482;278", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_paper_avg": [ 83.33333333333333, 20.23747898221405 ], "wc_strength_and_weaknesses_avg": [ 195.66666666666666, 71.44383963801374 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 138.66666666666666, 136.44616357947027 ], "wc_summary_review_avg": [ 58.666666666666664, 17.21110752456745 ], "wc_review_avg": [ 476.3333333333333, 79.37393918801196 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 350.0, 93.46657156438339 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.3273268353539886, "corr_recommendation_correctness": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1439197995500331553&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=78xgBm6ckZr", "email": "bu.edu;sorbonne-universite.fr;sorbonne-universite.fr;polytechnique.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Boston University;Sorbonne Universite;Sorbonne Universit\u00e9;Ecole Polytechnique", "aff_unique_dep": ";LPSM;;", "aff_unique_url": "https://www.bu.edu;https://www.sorbonne-universite.fr;https://www.sorbonne-universite.fr;https://www.polytechnique.edu", "aff_unique_abbr": "BU;Sorbonne;Sorbonne U;X", "aff_campus_unique_index": "0", "aff_campus_unique": "Boston;", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;France" }, { "id": "79xEHFvjx9p", "title": "Feature-Driven Talking Face Generation with StyleGAN2", "track": "main", "status": "Reject", "tldr": "Audio and image features are extracted through Gan to generate talking face.", "abstract": "In this work, we wish to use a face image that generate a more natural and real face talking animation video. This is not an easy task because face appearance variation and semantics of speech are coupled together when tacking face have a micro movement. Audio features sometimes contain information about expressions, but they are not accurate enough. So a single audio feature cannot fully represent the movement of the face. For the above reason, we want to use different features to generate talking faces. The StyleGan series show good performance in the direction of image processing, and can perform the style migration task of portraits very well at the same time. We find that StyleGan can be used as a talking face generator. At the same time, we also encode and extract non-identity features and non-lip features, and try to find the subtle relationship between the features and the talking face. We also use the evaluation and ablation study to measure the quality of the generated videos and examine whether our approach is effective and feasible.", "keywords": "Talking face;GAN;Feature Selection", "primary_area": "", "supplementary_material": "/attachment/915a63af33d04c5fef8318e8c3fb4c813dc46d97.zip", "author": "Tao Zhang;Kai Tang;Weiwu Zhang;Kazushige Ouchi", "authorids": "~Tao_Zhang20;tangkai@toshiba.com.cn;zhangweiwu98@163.com;kazushige.ouchi@toshiba.co.jp", "gender": "M;;;", "homepage": ";;;", "dblp": ";;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Tao_Zhang20;tangkai@toshiba.com.cn;zhangweiwu98@163.com;kazushige.ouchi@toshiba.co.jp", "aff": "Toshiba (China) Co.,Ltd R&D Center;;;", "aff_domain": "com.cn;;;", "position": "Researcher;;;", "bibtex": "@misc{\nzhang2023featuredriven,\ntitle={Feature-Driven Talking Face Generation with Style{GAN}2},\nauthor={Tao Zhang and Kai Tang and Weiwu Zhang and Kazushige Ouchi},\nyear={2023},\nurl={https://openreview.net/forum?id=79xEHFvjx9p}\n}", "github": "", "project": "", "reviewers": "34UQ;FwS3;gYGv;MS16", "site": "https://openreview.net/forum?id=79xEHFvjx9p", "pdf_size": 957356, "recommendation": "1;1;3;3", "confidence": "4;5;4;4", "correctness": "1;2;2;2", "technical_novelty": "2;1;1;2", "empirical_novelty": "1;1;0;2", "wc_summary_paper": "70;49;34;40", "wc_strength_and_weaknesses": "88;243;14;37", "wc_clarity_quality_novelty_and_reproducibility": "430;40;20;210", "wc_summary_review": "72;12;52;3", "wc_review": "660;344;120;290", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 1.75, 0.4330127018922193 ], "technical_novelty_avg": [ 1.5, 0.5 ], "empirical_novelty_avg": [ 1.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 48.25, 13.645054048995188 ], "wc_strength_and_weaknesses_avg": [ 95.5, 89.27065587302471 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 175.0, 164.69669092000603 ], "wc_summary_review_avg": [ 34.75, 28.331740151286155 ], "wc_review_avg": [ 353.5, 195.31192999916826 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Jjgq3TeaUgoJ:scholar.google.com/&scioq=Feature-Driven+Talking+Face+Generation+with+StyleGAN2&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Toshiba (China) Co., Ltd", "aff_unique_dep": "R&D Center", "aff_unique_url": "https://www.toshiba.com.cn", "aff_unique_abbr": "Toshiba China", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "7AwPeT4XbAh", "title": "Multi-Modality Alone is Not Enough: Generating Scene Graphs using Cross-Relation-Modality Tokens", "track": "main", "status": "Reject", "tldr": "Introducing a novel cross relational multi-modal token generation strategy for scene graphs", "abstract": "Recent years have seen a growing interest in Scene Graph Generation (SGG), a comprehensive visual scene understanding task that aims to predict the relationships between objects detected in a scene. One of its key challenges is the strong bias of the visual world around us toward a few frequently occurring relationships, leaving a long tail of under-represented classes. Although infusing additional modalities is one prominent way to improve SGG performance on under-represented classes, we argue that using additional modalities alone is not enough. We propose to inject entity relation information (Cross-Relation) and modality dependencies (Cross-Modality) into each embedding token of a transformer which we term primal fusion. The resulting Cross-RElAtion-Modality (CREAM) token acts as a strong inductive bias for the SGG framework. Our experimental results on the Visual Genome dataset demonstrate that our CREAM model outperforms state-of-the-art SGG models by around 20% while being simpler and requiring substantially less computation. Additionally, to analyse the generalisability of the CREAM model we also evaluate it on the Open Images dataset. Finally, we examine the impact of the depth-map quality on SGG performance and empirically show the superiority of our model over the prior state of the art by better capturing the depth data, boosting the performance by a margin of around 25%.", "keywords": "scene graphs;transformers;fusion strategies;multi-modal", "primary_area": "", "supplementary_material": "", "author": "Gopika Sudhakaran;Devendra Singh Dhami;Stefan Roth;Kristian Kersting", "authorids": "~Gopika_Sudhakaran1;~Devendra_Singh_Dhami1;~Stefan_Roth1;~Kristian_Kersting1", "gender": "F;M;M;M", "homepage": ";https://sites.google.com/view/devendradhami;https://www.visinf.tu-darmstadt.de/visual_inference/people_vi/stefan_roth.en.jsp;http://www.ml.informatik.tu-darmstadt.de/", "dblp": "213/0247;201/2130;24/3452;40/3793", "google_scholar": "QZS6FjoAAAAJ;aVlaHfkAAAAJ;0yDoR0AAAAAJ;QY-earAAAAAJ", "orcid": "0009-0007-3721-5602;;0000-0001-9002-9832;0000-0002-2873-9152", "linkedin": "https://de.linkedin.com/in/gopika-sudhakaran-7a289755;;stefanroth13;", "or_profile": "~Gopika_Sudhakaran1;~Devendra_Singh_Dhami1;~Stefan_Roth1;~Kristian_Kersting1", "aff": "Technische Universit\u00e4t Darmstadt;CS Department, TU Darmstadt, TU Darmstadt;Technische Universit\u00e4t Darmstadt;TU Darmstadt", "aff_domain": "tu-darmstadt.de;cs.tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de", "position": "PhD student;Postdoctoral researcher;Full Professor;Full Professor", "bibtex": "@misc{\nsudhakaran2023multimodality,\ntitle={Multi-Modality Alone is Not Enough: Generating Scene Graphs using Cross-Relation-Modality Tokens},\nauthor={Gopika Sudhakaran and Devendra Singh Dhami and Stefan Roth and Kristian Kersting},\nyear={2023},\nurl={https://openreview.net/forum?id=7AwPeT4XbAh}\n}", "github": "", "project": "", "reviewers": "JG3W;XdXf;adsx", "site": "https://openreview.net/forum?id=7AwPeT4XbAh", "pdf_size": 1396427, "recommendation": "3;3;5", "confidence": "4;4;4", "correctness": "2;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "0;2;3", "wc_summary_paper": "101;99;61", "wc_strength_and_weaknesses": "201;217;142", "wc_clarity_quality_novelty_and_reproducibility": "23;40;107", "wc_summary_review": "39;53;38", "wc_review": "364;409;348", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 87.0, 18.40289832245635 ], "wc_strength_and_weaknesses_avg": [ 186.66666666666666, 32.25247621845836 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.666666666666664, 36.261396675926434 ], "wc_summary_review_avg": [ 43.333333333333336, 6.847546194724712 ], "wc_review_avg": [ 373.6666666666667, 25.82419193099542 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_3PEOcr70t0J:scholar.google.com/&scioq=Multi-Modality+Alone+is+Not+Enough:+Generating+Scene+Graphs+using+Cross-Relation-Modality+Tokens&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt", "aff_unique_dep": "", "aff_unique_url": "https://www.tu-darmstadt.de", "aff_unique_abbr": "TUD", "aff_campus_unique_index": "1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "7BfWbjOqgMf", "title": "Universal Speech Enhancement with Score-based Diffusion", "track": "main", "status": "Reject", "tldr": "We propose to consider the task of speech enhancement as a universal endeavor, and provide a diffusion-based approach to deal with 55 different distortions at the same time.", "abstract": "Removing background noise from speech audio has been the subject of considerable effort, especially in recent years due to the rise of virtual communication and amateur recordings. Yet background noise is not the only unpleasant disturbance that can prevent intelligibility: reverb, clipping, codec artifacts, problematic equalization, limited bandwidth, or inconsistent loudness are equally disturbing and ubiquitous. In this work, we propose to consider the task of speech enhancement as a holistic endeavor, and present a universal speech enhancement system that tackles 55 different distortions at the same time. Our approach consists of a generative model that employs score-based diffusion, together with a multi-resolution conditioning network that performs enhancement with mixture density networks. We show that this approach significantly outperforms the state of the art in a subjective test performed by expert listeners. We also show that it achieves competitive objective scores with just 4-8 diffusion steps, despite not considering any particular strategy for fast sampling. We hope that both our methodology and technical contributions encourage researchers and practitioners to adopt a universal approach to speech enhancement, possibly framing it as a generative task.", "keywords": "Speech enhancement;audio;score matching;diffusion;mixture density networks", "primary_area": "", "supplementary_material": "/attachment/e6188305b13a15450bd8cdfc2fb5c827abca8dd3.zip", "author": "Joan Serr\u00e0;Santiago Pascual;Jordi Pons;Recep Oguz Araz;Davide Scaini", "authorids": "~Joan_Serr\u00e01;~Santiago_Pascual1;~Jordi_Pons1;~Recep_Oguz_Araz1;~Davide_Scaini1", "gender": "M;Not Specified;M;;M", "homepage": "https://serrjoa.github.io/;;;;http://www.jordipons.me", "dblp": "67/3884;189/4003;;;https://dblp.uni-trier.de/pers/hd/p/Pons:Jordi", "google_scholar": "sZLj96sAAAAJ;;https://scholar.google.com/citations?view_op=list_works;1R60o-UAAAAJ;https://scholar.google.es/citations?user=wPzfRiwAAAAJ", "orcid": ";;0000-0002-8456-0990;;", "linkedin": "joan-serra-julia/;;r-o\u011fuz-araz-930a39157/;davide-scaini/;https://linkedin.com/in/jordiponspuig", "or_profile": "~Joan_Serr\u00e01;~Santiago_Pascual1;~Recep_Oguz_Araz1;~Davide_Scaini1;~Jordi_Puig_Puig1", "aff": "Dolby Laboratories;Dolby Labs;Universitat Pompeu Fabra;;Dolby Laboratories", "aff_domain": "dolby.com;dolby.com;upf.edu;;dolby.com", "position": "Researcher;Researcher;MS student;;Postdoc", "bibtex": "@misc{\nserr{\\`a}2023universal,\ntitle={Universal Speech Enhancement with Score-based Diffusion},\nauthor={Joan Serr{\\`a} and Santiago Pascual and Jordi Pons and Recep Oguz Araz and Davide Scaini},\nyear={2023},\nurl={https://openreview.net/forum?id=7BfWbjOqgMf}\n}", "github": "", "project": "", "reviewers": "XfqC;5RnM;yPpC;BhNH", "site": "https://openreview.net/forum?id=7BfWbjOqgMf", "pdf_size": 803977, "recommendation": "5;6;6;6", "confidence": "4;4;3;4", "correctness": "3;3;3;2", "technical_novelty": "1;3;3;3", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "54;101;87;69", "wc_strength_and_weaknesses": "227;304;183;134", "wc_clarity_quality_novelty_and_reproducibility": "19;49;264;31", "wc_summary_review": "404;59;154;440", "wc_review": "704;513;688;674", "wc_reply_reviewers": "84;340;87;84", "wc_reply_authors": "1414;639;1133;980", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;3;2", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 77.75, 17.795715776557007 ], "wc_strength_and_weaknesses_avg": [ 212.0, 62.477996126636455 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 90.75, 100.59417229641089 ], "wc_summary_review_avg": [ 264.25, 161.78747633855966 ], "wc_review_avg": [ 644.75, 76.80291335620024 ], "wc_reply_reviewers_avg": [ 148.75, 110.42503112972167 ], "wc_reply_authors_avg": [ 1041.5, 279.6949230858508 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 104, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18075280620064161482&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Dolby Laboratories;Universitat Pompeu Fabra", "aff_unique_dep": ";", "aff_unique_url": "https://www.dolby.com;https://www.upf.edu/", "aff_unique_abbr": "Dolby;UPF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Spain" }, { "title": "Sequential Latent Variable Models for Few-Shot High-Dimensional Time-Series Forecasting", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11796", "id": "7C9aRX2nBf2", "poster": "/media/PosterPDFs/ICLR%202023/11796.png?t=1681679218.8339658", "openreview": "https://openreview.net/forum?id=7C9aRX2nBf2", "slides": "https://iclr.cc/virtual/2023/poster/11796", "video": "https://iclr.cc/virtual/2023/poster/11796", "author_site": "Xiajun Jiang, Ryan Missel, Zhiyuan Li, Linwei Wang", "tldr": "We present the very first step toward few-shot high-dimensional sequence forecasting by a Bayesian meta-learning model that learns the process of learning latent dynamics that changes with the small number of observations that are available.", "abstract": "Modern applications increasingly require learning and forecasting latent dynamics from high-dimensional time-series. Compared to univariate time-series forecasting, this adds a new challenge of reasoning about the latent dynamics of an unobserved abstract state. Sequential latent variable models (LVMs) present an attractive solution, although existing works either struggle with long-term forecasting or have difficulty learning across diverse dynamics. In this paper, we first present a conceptual framework of sequential LVMs to unify existing works, contrast their fundamental limitations, and identify an intuitive solution to long-term forecasting for diverse dynamics via meta-learning. We then present the first framework of few-shot forecasting for high-dimensional time-series: instead of learning a single dynamic function, we leverage data of diverse dynamics and learn to adapt latent dynamic functions to few-shot support series. This is realized via Bayesian meta-learning underpinned by: 1) a latent dynamic function conditioned on knowledge derived from few-shot support series, and 2) a meta-model that learns to extract such dynamic-specific knowledge via feed-forward embedding of support set. We compared the presented framework with a comprehensive set of baseline models trained 1) globally on the large meta-training set with diverse dynamics, and 2) individually on single dynamics, both with and without fine-tuning to k-shot support series used by the meta-models. We demonstrated that the presented framework is agnostic to the latent dynamic function of choice and, at meta-test time, is able to forecast for new dynamics given variable-shot of support series.", "keywords": "Time series;generative models;Bayesian meta-learning", "primary_area": "", "supplementary_material": "", "author": "Xiajun Jiang;Ryan Missel;Zhiyuan Li;Linwei Wang", "authorids": "~Xiajun_Jiang1;~Ryan_Missel1;~Zhiyuan_Li5;~Linwei_Wang1", "gender": "M;M;M;F", "homepage": ";;;https://people.rit.edu/lxwast", "dblp": "45/10201;278/4319;39/7780-7;02/6162", "google_scholar": "P9klFBUAAAAJ;;T0yPXRwAAAAJ;https://scholar.google.com.tw/citations?user=CG56DzcAAAAJ", "orcid": "0000-0003-1075-6736;0000-0002-9509-6775;;", "linkedin": ";;;", "or_profile": "~Xiajun_Jiang1;~Ryan_Missel1;~Zhiyuan_Li5;~Linwei_Wang1", "aff": "Rochester Institute of Technology;Rochester Institute of Technology;Rochester Institute of Technology;Rochester Institute of Technology", "aff_domain": "rit.edu;rit.edu;rit.edu;rit.edu", "position": "PhD student;PhD student;PhD student;Professor", "bibtex": "@inproceedings{\njiang2023sequential,\ntitle={Sequential Latent Variable Models for Few-Shot High-Dimensional Time-Series Forecasting},\nauthor={Xiajun Jiang and Ryan Missel and Zhiyuan Li and Linwei Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7C9aRX2nBf2}\n}", "github": "", "project": "", "reviewers": "wdac;EkYh;Chgq", "pdf_size": 2896627, "recommendation": "6;6;8", "confidence": "3;3;3", "correctness": "3;4;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;2", "wc_summary_paper": "64;135;48", "wc_strength_and_weaknesses": "373;176;612", "wc_clarity_quality_novelty_and_reproducibility": "147;16;155", "wc_summary_review": "49;70;46", "wc_review": "633;397;861", "wc_reply_reviewers": "14;0;46", "wc_reply_authors": "1166;706;450", "reply_reviewers": "1;0;1", "reply_authors": "3;2;2", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 82.33333333333333, 37.80946383586463 ], "wc_strength_and_weaknesses_avg": [ 387.0, 178.2713287847114 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 106.0, 63.72336044706577 ], "wc_summary_review_avg": [ 55.0, 10.677078252031311 ], "wc_review_avg": [ 630.3333333333334, 189.43659156092895 ], "wc_reply_reviewers_avg": [ 20.0, 19.252705437591537 ], "wc_reply_authors_avg": [ 774.0, 296.23414162899365 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1628516791058427041&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=7C9aRX2nBf2", "email": "rit.edu;rit.edu;rit.edu;rit.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Rochester Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.rit.edu", "aff_unique_abbr": "RIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Understanding The Robustness of Self-supervised Learning Through Topic Modeling", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11120", "id": "7Cb7Faxa1OB", "poster": "/media/PosterPDFs/ICLR%202023/11120.png?t=1682117711.864098", "openreview": "https://openreview.net/forum?id=7Cb7Faxa1OB", "slides": "https://iclr.cc/virtual/2023/poster/11120", "video": "https://iclr.cc/virtual/2023/poster/11120", "author_site": "Zeping Luo, Shiyou Wu, Cindy Weng, Mo Zhou, Rong Ge", "tldr": "", "abstract": "Self-supervised learning has significantly improved the performance of many NLP tasks. However, how can self-supervised learning discover useful features, and why is it better than traditional approaches such as probabilistic models are still largely unknown. In this paper, we focus on the context of topic modeling and highlight a key advantage of self-supervised learning - when applied to data generated by topic models, self-supervised learning can be oblivious to the specific model, and hence is less susceptible to model misspecification. In particular, we prove that commonly used self-supervised objectives based on reconstruction or contrastive samples can both recover useful posterior information for general topic models. Empirically, we show that the same objectives can perform on par with posterior inference using the correct model, while outperforming posterior inference using misspecified models.", "keywords": "deep learning theory;self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/f86c1605942ce58eeaf6bb5b7eb677d0bba76021.zip", "author": "Zeping Luo;Shiyou Wu;Cindy Weng;Mo Zhou;Rong Ge", "authorids": "~Zeping_Luo1;~Shiyou_Wu1;~Cindy_Weng1;~Mo_Zhou3;~Rong_Ge1", "gender": "M;;;M;M", "homepage": ";;https://github.com/wengcindy;https://mozhou7.github.io/;https://users.cs.duke.edu/~rongge/", "dblp": ";;;;89/6869-1.html", "google_scholar": ";;;j_SEFF8AAAAJ;https://scholar.google.com.tw/citations?user=MVxcjEoAAAAJ", "orcid": ";;;;", "linkedin": "zeping-luo-a78b28178/;shiyou-tony-wu/;;;", "or_profile": "~Zeping_Luo1;~Shiyou_Wu1;~Cindy_Weng1;~Mo_Zhou3;~Rong_Ge1", "aff": "Duke University;Duke University;Duke University;Duke University;Google (visiting)", "aff_domain": "duke.edu;duke.edu;duke.edu;duke.edu;google.com", "position": "Undergrad student;Undergrad student;Undergrad student;PhD student;Researcher", "bibtex": "@inproceedings{\nluo2023understanding,\ntitle={Understanding The Robustness of Self-supervised Learning Through Topic Modeling},\nauthor={Zeping Luo and Shiyou Wu and Cindy Weng and Mo Zhou and Rong Ge},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7Cb7Faxa1OB}\n}", "github": "", "project": "", "reviewers": "KUB2;6Fra;DvbM", "pdf_size": 4650254, "recommendation": "6;6;6", "confidence": "3;3;3", "correctness": "4;4;3", "technical_novelty": "4;3;4", "empirical_novelty": "3;3;3", "wc_summary_paper": "328;88;87", "wc_strength_and_weaknesses": "139;138;164", "wc_clarity_quality_novelty_and_reproducibility": "765;30;58", "wc_summary_review": "41;17;60", "wc_review": "1273;273;369", "wc_reply_reviewers": "180;0;13", "wc_reply_authors": "2009;313;119", "reply_reviewers": "2;0;1", "reply_authors": "4;1;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 167.66666666666666, 113.37352228609446 ], "wc_strength_and_weaknesses_avg": [ 147.0, 12.027745701779143 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 284.3333333333333, 340.0748283670652 ], "wc_summary_review_avg": [ 39.333333333333336, 17.594190960528863 ], "wc_review_avg": [ 638.3333333333334, 450.4851705538029 ], "wc_reply_reviewers_avg": [ 64.33333333333333, 81.96069518711731 ], "wc_reply_authors_avg": [ 813.6666666666666, 848.9308308428641 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9846665411334482476&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=7Cb7Faxa1OB", "email": "duke.edu;duke.edu;duke.edu;duke.edu;google.com", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Duke University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.duke.edu;https://www.google.com", "aff_unique_abbr": "Duke;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Moderate Coreset: A Universal Method of Data Selection for Real-world Data-efficient Deep Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11366", "id": "7D5EECbOaf9", "poster": "", "openreview": "https://openreview.net/forum?id=7D5EECbOaf9", "slides": "https://iclr.cc/virtual/2023/poster/11366", "video": "https://iclr.cc/virtual/2023/poster/11366", "author_site": "Xiaobo Xia, Jiale Liu, Jun Yu, Xu Shen, Bo Han, Tongliang Liu", "tldr": "", "abstract": "Deep learning methods nowadays rely on massive data, resulting in substantial costs of data storage and model training. Data selection is a useful tool to alleviate such costs, where a coreset of massive data is extracted to practically perform on par with full data. Based on carefully-designed score criteria, existing methods first count the score of each data point and then select the data points whose scores lie in a certain range to construct a coreset. These methods work well in their respective preconceived scenarios but are not robust to the change of scenarios, since the optimal range of scores varies as the scenario changes. The issue limits the application of these methods, because realistic scenarios often mismatch preconceived ones, and it is inconvenient or unfeasible to tune the criteria and methods accordingly. In this paper, to address the issue, a concept of the moderate coreset is discussed. Specifically, given any score criterion of data selection, different scenarios prefer data points with scores in different intervals. As the score median is a proxy of the score distribution in statistics, the data points with scores close to the score median can be seen as a proxy of full data and generalize different scenarios, which are used to construct the moderate coreset. As a proof-of-concept, a universal method that inherits the moderate coreset and uses the distance of a data point to its class center as the score criterion, is proposed to meet complex realistic scenarios. Extensive experiments confirm the advance of our method over prior state-of-the-art methods, leading to a strong baseline for future research. The implementation is available at https://github.com/tmllab/Moderate-DS.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/9b836041e1d1ceaef57bd000a050496665d93f35.zip", "author": "Xiaobo Xia;Jiale Liu;Jun Yu;Xu Shen;Bo Han;Tongliang Liu", "authorids": "~Xiaobo_Xia1;~Jiale_Liu2;~Jun_Yu3;~Xu_Shen1;~Bo_Han1;~Tongliang_Liu1", "gender": "M;;M;M;;M", "homepage": "https://xiaoboxia.github.io/;;https://faculty.ustc.edu.cn/yujun_AI/en/index.htm;;;https://tongliang-liu.github.io/", "dblp": "242/8072;;50/5754-1.html;09/10130-1.html;;150/6667", "google_scholar": "jRsugY0AAAAJ;;efZyqyQAAAAJ;38jwGs8AAAAJ;;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ", "orcid": ";;0000-0002-3197-8103;;;", "linkedin": ";;;;;", "or_profile": "~Xiaobo_Xia1;~Jiale_Liu2;~Jun_Yu3;~Xu_Shen1;~Bo_Han1;~Tongliang_Liu1", "aff": "The University of Sydney;;University of Science and Technology of China;Alibaba Group;;University of Sydney", "aff_domain": "sydney.edu.au;;ustc.edu.cn;alibaba-inc.com;;sydney.edu.au", "position": "PhD student;;Associate Professor;Researcher;;Lecturer", "bibtex": "@inproceedings{\nxia2023moderate,\ntitle={Moderate Coreset: A Universal Method of Data Selection for Real-world Data-efficient Deep Learning},\nauthor={Xiaobo Xia and Jiale Liu and Jun Yu and Xu Shen and Bo Han and Tongliang Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7D5EECbOaf9}\n}", "github": "", "project": "", "reviewers": "vgUK;GiR4;Ergu;vuQC", "pdf_size": 662100, "recommendation": "5;6;8;8", "confidence": "4;4;3;5", "correctness": "3;3;3;4", "technical_novelty": "1;2;3;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "261;86;57;107", "wc_strength_and_weaknesses": "141;373;34;187", "wc_clarity_quality_novelty_and_reproducibility": "37;54;100;389", "wc_summary_review": "125;34;61;92", "wc_review": "564;547;252;775", "wc_reply_reviewers": "128;15;24;26", "wc_reply_authors": "2598;1612;900;1010", "reply_reviewers": "1;1;1;1", "reply_authors": "12;5;4;4", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 127.75, 78.95370478957906 ], "wc_strength_and_weaknesses_avg": [ 183.75, 122.55483466595678 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 145.0, 142.74627841033194 ], "wc_summary_review_avg": [ 78.0, 34.022051672408 ], "wc_review_avg": [ 534.5, 186.19411913376857 ], "wc_reply_reviewers_avg": [ 48.25, 46.2297252857942 ], "wc_reply_authors_avg": [ 1530.0, 673.5443563715755 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 6.25, 3.344772040064913 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 102, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=909200736300260421&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=7D5EECbOaf9", "email": "sydney.edu.au;;ustc.edu.cn;alibaba-inc.com;;sydney.edu.au", "author_num": 6, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Sydney;University of Science and Technology of China;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sydney.edu.au;http://www.ustc.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "USYD;USTC;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Australia;China" }, { "id": "7Di4aNrBAhv", "title": "Contrastive Graph Few-Shot Learning", "track": "main", "status": "Withdraw", "tldr": "We propose CGFL, a general and effective framework to mitigate the distribution shift impact for learning more generalizable representations on graph few-shot-learning tasks.", "abstract": "Prevailing supervised deep graph learning models often suffer from label sparsity issue. Although many graph few-shot learning (GFL) methods have been developed to avoid performance degradation in face of limited annotated data, they excessively rely on labeled data, where the distribution shift in the test phase might result in impaired generalization ability. Additionally, they lack a general purpose as their designs are coupled with task or data-specific characteristics. To this end, we propose a general and effective Contrastive Graph Few-shot Learning framework (CGFL). CGFL leverages a self-distilled contrastive learning procedure to boost GFL. Specifically, our model firstly pre-trains a graph encoder with contrastive learning using unlabeled data. Later, the trained encoder is frozen as a teacher model to distill a student model with a contrastive loss. The distilled model is finally fed to GFL. CGFL learns data representation in a self-supervised manner, thus mitigating the distribution shift impact for better generalization and making model task and data-independent for a general graph mining purpose. Furthermore, we introduce an information-based method to quantitatively measure the capability of CGFL. Comprehensive experiments demonstrate that CGFL outperforms state- of-the-art baselines on several graph mining tasks across various datasets in the few-shot scenario. We also provide a quantitative measurement of CGFL\u2019s success.", "keywords": "Graph representation learning;Few-shot learning;Contrastive learning", "primary_area": "", "supplementary_material": "/attachment/b5a4ae916f877ef47592e3b37212725ca34f142c.zip", "author": "Chunhui Zhang;Hongfu Liu;Jundong Li;Yanfang Ye;Chuxu Zhang", "authorids": "~Chunhui_Zhang1;~Hongfu_Liu2;~Jundong_Li2;~Yanfang_Ye1;~Chuxu_Zhang2", "gender": "M;M;M;;", "homepage": "https://chunhuizng.github.io;http://hongfuliu.com/;https://jundongli.github.io/;http://yes-lab.org/;", "dblp": "62/3401;32/9075-1;144/7997.html;;", "google_scholar": "https://scholar.google.com.hk/citations?user=jlqnbkAAAAAJ;https://scholar.google.com/citations?hl=en;uY6ek7sAAAAJ;egjr888AAAAJ;", "orcid": ";;;;", "linkedin": "chunhui-zhang-541827161/;;;;", "or_profile": "~Chunhui_Zhang1;~Hongfu_Liu2;~Jundong_Li2;~Yanfang_Ye1;~Chuxu_Zhang2", "aff": "Brandeis University;Brandeis University;University of Virginia;University of Notre Dame;", "aff_domain": "brandeis.edu;brandeis.edu;virginia.edu;nd.edu;", "position": "MS student;Assistant Professor;Assistant Professor;Associate Professor;", "bibtex": "@misc{\nzhang2023contrastive,\ntitle={Contrastive Graph Few-Shot Learning},\nauthor={Chunhui Zhang and Hongfu Liu and Jundong Li and Yanfang Ye and Chuxu Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=7Di4aNrBAhv}\n}", "github": "", "project": "", "reviewers": "Y3ks;Y6TK;xg15;aDa5;R1dy", "site": "https://openreview.net/forum?id=7Di4aNrBAhv", "pdf_size": 569596, "recommendation": "3;3;5;5;6", "confidence": "4;4;3;3;3", "correctness": "3;2;3;3;4", "technical_novelty": "2;2;2;2;2", "empirical_novelty": "2;2;2;0;2", "wc_summary_paper": "92;107;102;65;111", "wc_strength_and_weaknesses": "101;652;261;239;161", "wc_clarity_quality_novelty_and_reproducibility": "17;55;54;90;490", "wc_summary_review": "7;7;53;50;99", "wc_review": "217;821;470;444;861", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 4.4, 1.2 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.6, 0.8 ], "wc_summary_paper_avg": [ 95.4, 16.475436261295176 ], "wc_strength_and_weaknesses_avg": [ 282.8, 193.17805258362037 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 141.2, 175.92202818294246 ], "wc_summary_review_avg": [ 43.2, 34.28352373954579 ], "wc_review_avg": [ 562.6, 244.0873614098034 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.9525793444156803, "corr_recommendation_correctness": 0.7905694150420948, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14945782772589777137&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Brandeis University;University of Virginia;University of Notre Dame", "aff_unique_dep": ";;", "aff_unique_url": "https://www.brandeis.edu;https://www.virginia.edu;https://www.nd.edu", "aff_unique_abbr": "Brandeis;UVA;Notre Dame", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "7DtgxVZGj-y", "title": "Contrastive Unsupervised Learning of World Model with Invariant Causal Features", "track": "main", "status": "Reject", "tldr": "We present a world model, which learns the causal features using invariance principle and achieves state-of-the-art performance on out-of-distribution generalisation.", "abstract": "In this paper we present a world model, which learns the causal features using invariance principle. We use contrastive unsupervised learning to learn the invariant causal features, which enforces invariance across augmentations of irrelevant parts or styles of the observation. Since the world model based reinforcement learning methods optimize representation learning and policy of the agent independently, contrastive loss collapses due to lack of supervisory signal to the representation learning module. We propose depth reconstruction as an auxiliary task to explicitly enforce the invariance and data augmentation as style intervention on the RGB space to mitigate this issue. Our design help us to leverage state-of-the-art unsupervised representation learning method to learn the world model with invariant causal features, which outperforms current state-of-the-art model-based as well as model-free reinforcement learning methods on out-of-distribution point navigation tasks on Gibson and iGibson dataset at 100k and 500k interaction step benchmarks. Further experiments on DeepMind control suite even without depth reconstruction, our proposed model performs on par with the state-of-the-art counterpart models.", "keywords": "world models;causality;contrastive learning;model-based reinforcement learning;reinforcement learning;out-of-distribution generalisation;sim-to-real transfer;robot navigation", "primary_area": "", "supplementary_material": "", "author": "Rudra P. K. Poudel;Harit Pandya;Roberto Cipolla", "authorids": "~Rudra_P._K._Poudel1;~Harit_Pandya1;~Roberto_Cipolla1", "gender": "M;M;M", "homepage": ";https://mi.eng.cam.ac.uk/~cipolla/;https://www.rudrapoudel.com", "dblp": "155/3207;c/RobertoCipolla;08/11431", "google_scholar": "https://scholar.google.co.uk/citations?user=bOWP5gQAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=Rw4cmbUAAAAJ", "orcid": ";0000-0002-8999-2151;", "linkedin": ";;https://linkedin.com/in/rudrapoudel", "or_profile": "~Harit_Pandya1;~Roberto_Cipolla1;~Rudra_Poudel1", "aff": "Toshiba Europe;University of Cambridge;Toshiba Europe Ltd", "aff_domain": "toshiba.eu;cam.ac.uk;crl.toshiba.co.uk", "position": "Researcher;Full Professor;Senior Research Scientist", "bibtex": "@misc{\npoudel2023contrastive,\ntitle={Contrastive Unsupervised Learning of World Model with Invariant Causal Features},\nauthor={Rudra P. K. Poudel and Harit Pandya and Roberto Cipolla},\nyear={2023},\nurl={https://openreview.net/forum?id=7DtgxVZGj-y}\n}", "github": "", "project": "", "reviewers": "acab;Hdwq;12bZ;tiQV", "site": "https://openreview.net/forum?id=7DtgxVZGj-y", "pdf_size": 8555877, "recommendation": "1;3;3;6", "confidence": "3;5;5;4", "correctness": "2;2;3;4", "technical_novelty": "1;2;2;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "138;82;106;137", "wc_strength_and_weaknesses": "496;1042;128;490", "wc_clarity_quality_novelty_and_reproducibility": "69;57;121;55", "wc_summary_review": "29;51;84;33", "wc_review": "732;1232;439;715", "wc_reply_reviewers": "0;0;0;88", "wc_reply_authors": "934;460;598;670", "reply_reviewers": "0;0;0;1", "reply_authors": "2;1;1;2", "recommendation_avg": [ 3.25, 1.7853571071357126 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 115.75, 23.34925052330374 ], "wc_strength_and_weaknesses_avg": [ 539.0, 326.41231594411386 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 75.5, 26.80951323690902 ], "wc_summary_review_avg": [ 49.25, 21.706853756359994 ], "wc_review_avg": [ 779.5, 285.96896684780324 ], "wc_reply_reviewers_avg": [ 22.0, 38.1051177665153 ], "wc_reply_authors_avg": [ 665.5, 172.40867147565402 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.2955402316445243, "corr_recommendation_correctness": 0.8866206949335731, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=156367615605998644&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Toshiba;University of Cambridge", "aff_unique_dep": ";", "aff_unique_url": "https://toshiba.eu;https://www.cam.ac.uk", "aff_unique_abbr": "Toshiba;Cambridge", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Unknown;United Kingdom" }, { "id": "7EUu177KXY", "title": "D3C2-Net: Dual-Domain Deep Convolutional Coding Network for Compressive Sensing", "track": "main", "status": "Withdraw", "tldr": "We propose a novel D3C2-Net for compressive sensing based on our new proposed generalized dual-domain optimization framework, achieving higher performance than other state-of-the-arts.", "abstract": "Mapping optimization algorithms into neural networks, deep unfolding networks (DUNs) have achieved impressive success in compressive sensing (CS). From the perspective of optimization, DUNs inherit a well-defined and interpretable structure from iterative steps. However, from the viewpoint of neural network design, most existing DUNs are inherently established based on traditional image-domain unfolding, which takes single-channel images as inputs and outputs between adjacent stages, resulting in insufficient information transmission capability and the inevitable loss of the image details. In this paper, to break the above bottleneck, we propose a generalized dual-domain optimization framework, which is general for inverse imaging problems and integrates the merits of both (1) image-domain and (2) convolutional-coding-domain priors to constrain the feasible region of the solution space. By unfolding the proposed optimization framework into deep neural networks, we further design a novel Dual-Domain Deep Convolutional Coding Network ($\\mathrm{D^3C^2}$-Net) for CS imaging with the ability of transmitting high-capacity feature through all the unfolded stages. Experiments on multiple natural and MR image datasets demonstrate that our $\\mathrm{D^3C^2}$-Net achieves higher performance and better accuracy-complexity trade-offs than other state-of-the-art.", "keywords": "image reconstruction;compressive sensing (CS);convolutional coding;dual-domain optimization;deep unfolding networks", "primary_area": "", "supplementary_material": "", "author": "Weiqi Li;Bin Chen;Jian Zhang", "authorids": "~Weiqi_Li1;~Bin_Chen8;~Jian_Zhang22", "gender": "M;M;M", "homepage": "https://villa.jianzhang.tech/people/weiqi-li-%E6%9D%8E%E7%8E%AE%E7%90%A6/;https://villa.jianzhang.tech/people/bin-chen-%E9%99%88%E6%96%8C/;http://jianzhang.tech/", "dblp": ";22/5523-1.html;07/314-18", "google_scholar": "SIkQdEsAAAAJ;https://scholar.google.com.hk/citations?user=aZDNm98AAAAJ;7brFI_4AAAAJ", "orcid": ";0000-0002-9056-3717;0000-0001-5486-3125", "linkedin": ";;", "or_profile": "~Weiqi_Li1;~Bin_Chen8;~Jian_Zhang22", "aff": "Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;MS student;Assistant Professor", "bibtex": "@misc{\nli2023dcnet,\ntitle={D3C2-Net: Dual-Domain Deep Convolutional Coding Network for Compressive Sensing},\nauthor={Weiqi Li and Bin Chen and Jian Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=7EUu177KXY}\n}", "github": "", "project": "", "reviewers": "a79H;hEC2;yUST", "site": "https://openreview.net/forum?id=7EUu177KXY", "pdf_size": 21740721, "recommendation": "3;5;5", "confidence": "5;3;4", "correctness": "4;3;3", "technical_novelty": "1;2;2", "empirical_novelty": "0;2;2", "wc_summary_paper": "41;60;70", "wc_strength_and_weaknesses": "163;107;302", "wc_clarity_quality_novelty_and_reproducibility": "27;17;53", "wc_summary_review": "34;50;51", "wc_review": "265;234;476", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 57.0, 12.027745701779143 ], "wc_strength_and_weaknesses_avg": [ 190.66666666666666, 81.97696153323946 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.333333333333336, 15.173075568988057 ], "wc_summary_review_avg": [ 45.0, 7.788880963698615 ], "wc_review_avg": [ 325.0, 107.52054067324376 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.8660254037844385, "corr_recommendation_correctness": -0.9999999999999998, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16854954474123259421&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Towards Robustness Certification Against Universal Perturbations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11891", "id": "7GEvPKxjtt", "poster": "", "openreview": "https://openreview.net/forum?id=7GEvPKxjtt", "slides": "https://iclr.cc/virtual/2023/poster/11891", "video": "https://iclr.cc/virtual/2023/poster/11891", "author_site": "Yi Zeng, Zhouxing Shi, Ming Jin, Feiyang Kang, Lingjuan Lyu, Cho-Jui Hsieh, Ruoxi Jia", "tldr": "A robustness certification framework against universal perturbations (including both universal adversarial noise and backdoor attacks).", "abstract": "In this paper, we investigate the problem of certifying neural network robustness against universal perturbations (UPs), which have been widely used in universal adversarial attacks and backdoor attacks. Existing robustness certification methods aim to provide robustness guarantees for each sample with respect to the worst-case perturbations given a neural network. However, those sample-wise bounds will be loose when considering the UP threat model as they overlook the important constraint that the perturbation should be shared across all samples. We propose a method based on a combination of linear relaxation-based perturbation analysis and Mixed Integer Linear Programming to establish the first robust certification method for UP. In addition, we develop a theoretical framework for computing error bounds on the entire population using the certification results from a randomly sampled batch. Aside from an extensive evaluation of the proposed certification, we further show how the certification facilitates efficient comparison of robustness among different models or efficacy among different universal adversarial attack defenses and enables accurate detection of backdoor target classes.", "keywords": "Universal Perturbation;Adversarial Attack;Backdoor Attack;Certified Robustness;Poisoning Attack", "primary_area": "", "supplementary_material": "", "author": "Yi Zeng;Zhouxing Shi;Ming Jin;Feiyang Kang;Lingjuan Lyu;Cho-Jui Hsieh;Ruoxi Jia", "authorids": "~Yi_Zeng3;~Zhouxing_Shi1;~Ming_Jin2;~Feiyang_Kang1;~Lingjuan_Lyu1;~Cho-Jui_Hsieh1;~Ruoxi_Jia1", "gender": "M;;M;M;F;M;", "homepage": "https://yizeng623.github.io/;https://shizhouxing.github.io;http://www.jinming.tech/;;https://sites.google.com/view/lingjuan-lyu;http://web.cs.ucla.edu/~chohsieh/index.html;https://ruoxijia.info/", "dblp": "75/148;232/2169;;218/1175;178/9876;14/2770;147/5355-1", "google_scholar": "slUNmHQAAAAJ;YFIr4PwAAAAJ;YdxdTtkAAAAJ;_6mV_iEAAAAJ;;Wy89g4IAAAAJ;JCrug-YAAAAJ", "orcid": "0000-0002-6901-9194;;;;;;", "linkedin": "chnyizeng/;;;;;;", "or_profile": "~Yi_Zeng3;~Zhouxing_Shi1;~Ming_Jin2;~Feiyang_Kang1;~Lingjuan_Lyu1;~Cho-Jui_Hsieh1;~Ruoxi_Jia1", "aff": "Virginia Tech;University of California, Los Angeles;Virginia Tech;Virginia Tech;Sony;Amazon;Virginia Tech", "aff_domain": "vt.edu;ucla.edu;vt.edu;vt.edu;sony.com;amazon.com;vt.edu", "position": "PhD student;PhD student;Assistant Professor;PhD student;scientist;visiting scholar;Assistant Professor", "bibtex": "@inproceedings{\nzeng2023towards,\ntitle={Towards Robustness Certification Against Universal Perturbations},\nauthor={Yi Zeng and Zhouxing Shi and Ming Jin and Feiyang Kang and Lingjuan Lyu and Cho-Jui Hsieh and Ruoxi Jia},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7GEvPKxjtt}\n}", "github": "", "project": "", "reviewers": "wZWn;Kpjw;GEmp;e7K7", "pdf_size": 1588189, "recommendation": "5;5;8;8", "confidence": "4;3;3;5", "correctness": "2;3;3;3", "technical_novelty": "2;2;4;4", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "60;39;145;108", "wc_strength_and_weaknesses": "70;93;230;380", "wc_clarity_quality_novelty_and_reproducibility": "325;63;51;82", "wc_summary_review": "31;39;50;70", "wc_review": "486;234;476;640", "wc_reply_reviewers": "66;175;0;22", "wc_reply_authors": "3017;1686;86;94", "reply_reviewers": "2;1;0;1", "reply_authors": "15;10;1;1", "recommendation_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 1.0 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 88.0, 41.33400537088077 ], "wc_strength_and_weaknesses_avg": [ 193.25, 123.96244390943573 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 130.25, 112.98091653018221 ], "wc_summary_review_avg": [ 47.5, 14.637281168304447 ], "wc_review_avg": [ 459.0, 145.2618325645109 ], "wc_reply_reviewers_avg": [ 65.75, 67.40317129037773 ], "wc_reply_authors_avg": [ 1220.75, 1224.7647478189433 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 6.75, 6.015604707757983 ], "replies_avg": [ 37, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.3015113445777637, "corr_recommendation_correctness": 0.5773502691896258, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17291053604651641242&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=7GEvPKxjtt", "email": "vt.edu;ucla.edu;vt.edu;vt.edu;sony.com;amazon.com;vt.edu", "author_num": 7, "aff_unique_index": "0;1;0;0;2;3;0", "aff_unique_norm": "Virginia Tech;University of California, Los Angeles;Sony Corporation;Amazon", "aff_unique_dep": ";;;Amazon.com, Inc.", "aff_unique_url": "https://www.vt.edu;https://www.ucla.edu;https://www.sony.com;https://www.amazon.com", "aff_unique_abbr": "VT;UCLA;Sony;Amazon", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "United States;Japan" }, { "id": "7GQfA9xAqxN", "title": "Hypothetical Training for Robust Machine Reading Comprehension of Tabular Context", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Machine Reading Comprehension (MRC) models easily learn the spurious correlations from complex context such as tabular data. Counterfactual training\u2014using the original and augmented data\u2014has become a promising solution. However, it is costly to construct faithful counterfactual examples because it is tricky to maintain the consistency and dependency of the table entries. In this paper, we take a more economic fashion to ask hypothetical questions, e.g., \u201cin which year would the net profit be larger if the revenue in 2019 were $38,298?\u201d, whose effects on the answers are equivalent to those expensive counterfactual tables. We propose a hypothetical training framework that uses pairs of examples with different hypothetical questions to supervise the direction of model gradient w.r.t. the input towards the answer change. We conduct experiments on MRC datasets with factual and hypothetical examples. Performance gain on a newly constructed stress test validates the effectiveness and rationality of our approach.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Moxin Li;Wenjie Wang;Fuli Feng;Hanwang Zhang;Qifan Wang;Tat-Seng Chua", "authorids": "~Moxin_Li2;~Wenjie_Wang1;~Fuli_Feng1;~Hanwang_Zhang3;~Qifan_Wang2;~Tat-Seng_Chua2", "gender": "M;M;M;M;F;M", "homepage": "https://wenjiewwj.github.io/;https://fulifeng.github.io/;https://mreallab.github.io/index.html;https://wqfcr.github.io/;https://li-moxin.github.io/HelloFromMoxin/;http://www.comp.nus.edu.sg/~chuats/", "dblp": "38/1956-7;183/9198;79/8116.html;33/8610;266/2836;", "google_scholar": "Ma5DtmoAAAAJ;https://scholar.google.com.sg/citations?user=QePM4u8AAAAJ;YG0DFyYAAAAJ;LrSyLosAAAAJ;5Yp7L3kAAAAJ;https://scholar.google.com.tw/citations?user=Z9DWCBEAAAAJ", "orcid": "0000-0002-5199-1428;0000-0002-5828-9842;;0000-0002-7570-5756;;0000-0001-6097-7807", "linkedin": ";;;;;", "or_profile": "~Wenjie_Wang1;~Fuli_Feng1;~Hanwang_Zhang3;~Qifan_Wang2;~Li_Moxin1;~Tat-seng_Chua1", "aff": "National University of Singapore;University of Science and Technology of China;Nanyang Technological University;Meta AI;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;ustc.edu.cn;ntu.edu.sg;fb.com;nus.edu.sg;nus.edu.sg", "position": "PhD student;Full Professor;Associate Professor;Principal Researcher;PhD student;Full Professor", "bibtex": "@misc{\nli2023hypothetical,\ntitle={Hypothetical Training for Robust Machine Reading Comprehension of Tabular Context},\nauthor={Moxin Li and Wenjie Wang and Fuli Feng and Hanwang Zhang and Qifan Wang and Tat-Seng Chua},\nyear={2023},\nurl={https://openreview.net/forum?id=7GQfA9xAqxN}\n}", "github": "", "project": "", "reviewers": "dvUJ;k6fA;9LLL;aWjz", "site": "https://openreview.net/forum?id=7GQfA9xAqxN", "pdf_size": 574837, "recommendation": "3;3;6;8", "confidence": "3;3;4;5", "correctness": "2;3;3;4", "technical_novelty": "3;3;2;4", "empirical_novelty": "2;3;2;4", "wc_summary_paper": "132;107;118;50", "wc_strength_and_weaknesses": "83;574;185;137", "wc_clarity_quality_novelty_and_reproducibility": "39;33;708;26", "wc_summary_review": "69;33;59;30", "wc_review": "323;747;1070;243", "wc_reply_reviewers": "102;93;37;0", "wc_reply_authors": "614;1112;824;107", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "recommendation_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 101.75, 31.163881337214722 ], "wc_strength_and_weaknesses_avg": [ 244.75, 193.48691816244323 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 201.5, 292.4641003610529 ], "wc_summary_review_avg": [ 47.75, 16.663958113245485 ], "wc_review_avg": [ 595.75, 334.14620677182614 ], "wc_reply_reviewers_avg": [ 58.0, 41.731283229730664 ], "wc_reply_authors_avg": [ 664.25, 367.1010589742285 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.994936676326182, "corr_recommendation_correctness": 0.8333333333333334, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5755396363880229776&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3;0;0", "aff_unique_norm": "National University of Singapore;University of Science and Technology of China;Nanyang Technological University;Meta", "aff_unique_dep": ";;;Meta AI", "aff_unique_url": "https://www.nus.edu.sg;http://www.ustc.edu.cn;https://www.ntu.edu.sg;https://meta.com", "aff_unique_abbr": "NUS;USTC;NTU;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;0;0", "aff_country_unique": "Singapore;China;United States" }, { "id": "7HSHJQwkna0", "title": "Learn Together, Stop Apart: An Inclusive Approach To Early Stopping", "track": "main", "status": "Desk Reject", "tldr": "We propose a new scheme to GB pruning based on adaptive stops for different data regions", "abstract": "Gradient Boosting is the most popular method of constructing ensembles that allows to get state-of-the-art results on many tasks. One of the critical parameters affecting the quality of the learned model is the number of members in the ensemble or the number of boosting iterations. Unfortunately, the problem of selecting the optimal number of models still remains open and understudied. This paper proposes a new look at the optimal stop selection problem in Gradient Boosting. In contrast to the classical approaches that select a universal ensemble size using a hold--out validation set, our algorithm takes into account the heterogeneity of data in the feature space and adaptively sets different number of models for different regions of data, but it still uses the same common ensemble trained for the whole task. Experiments on SOTA implementations of Gradient Boosting show that the proposed method does not affect the complexity of learning algorithms and significantly increases quality on most standard benchmarks up to 2%.", "keywords": "ensemble;boosting;regularization;clusterization", "primary_area": "", "supplementary_material": "", "author": "Bulat Ibragimov;Gleb Gennadjevich Gusev", "authorids": "~Bulat_Ibragimov4;~Gleb_Gennadjevich_Gusev1", "gender": "M;M", "homepage": ";https://sberlabs.com/laboratories/sber-ai-lab", "dblp": ";117/9143.html", "google_scholar": "oQXXE_0AAAAJ;https://scholar.google.ru/citations?user=RWX4sYcAAAAJ", "orcid": "0000-0001-8540-0684;0009-0003-7298-1848", "linkedin": ";gleb-gusev-55a6a0ab/", "or_profile": "~Bulat_Ibragimov4;~Gleb_Gennadjevich_Gusev1", "aff": "Moscow Institute of Physics and Technology;ARTIFICIAL INTELLIGENCE RESEARCH INSTITUTE (AIRI)", "aff_domain": "phystech.edu;airi.net", "position": "PhD student;Principal Researcher", "bibtex": "@misc{\nibragimov2023learn,\ntitle={Learn Together, Stop Apart: An Inclusive Approach To Early Stopping},\nauthor={Bulat Ibragimov and Gleb Gennadjevich Gusev},\nyear={2023},\nurl={https://openreview.net/forum?id=7HSHJQwkna0}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=7HSHJQwkna0", "pdf_size": 13098633, "recommendation": "", "confidence": "", "correctness": "", "technical_novelty": "", "empirical_novelty": "", "wc_summary_paper": "", "wc_strength_and_weaknesses": "", "wc_clarity_quality_novelty_and_reproducibility": "", "wc_summary_review": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "recommendation_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "correctness_avg": [ 0, 0 ], "technical_novelty_avg": [ 0, 0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 0, 0 ], "wc_strength_and_weaknesses_avg": [ 0, 0 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 0, 0 ], "wc_summary_review_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 1, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0, "corr_recommendation_correctness": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GSV_PqUAOaoJ:scholar.google.com/&scioq=Learn+Together,+Stop+Apart:+An+Inclusive+Approach+To+Early+Stopping&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Moscow Institute of Physics and Technology;", "aff_unique_dep": ";", "aff_unique_url": "https://www.mipt.ru/en;", "aff_unique_abbr": "MIPT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "Russian Federation;" }, { "id": "7HgnhMmbIB", "title": "Cross-Protein Wasserstein Transformer for Protein-Protein Interactions", "track": "main", "status": "Reject", "tldr": "", "abstract": "Previous studies reveal intimate relationships between the structure and function of proteins. Motivated by this, for protein-protein interactions (PPIs), we hypothesize that cross-protein structural correspondence, including both global correlation and local co-occurrence, poses a great influence. Accordingly, a novel deep learning framework named Cross-Protein Wasserstein Transformer (CPWT) is proposed to predict PPI sites through fine-grained cross-graph structural modeling. Considering the irregular architecture of acid sequences, for a pair of proteins, graphs are constructed to describe them. Then, a core Cross-Graph Transformer (CGT) module of two branches (e.g. ligand and receptor branches) is proposed for cross-protein structural modeling. Specifically, in this module, Wasserstein affinity across graphs is calculated through cross-graph query (i.e. ligand (query) - receptor (key) or the converse), based on which the multi-head attention is derived to adaptively mine fine-grained cues of PPI sites. By stacking CGT modules, the two branches in CGT are co-evolved in a deep architecture during forward inference, hence being powerful and advantageous in cross-protein structural representation and fine-grained learning. We verify the effectiveness of our CPWT framework by conducting comprehensive experiments on multiple PPI datasets, and further visualize the learned fine-grained saliencies for intuitive understanding.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Gongping Xu;Tong Zhang;Wenting Zhao;Zhen Cui;Jian Yang", "authorids": "~Gongping_Xu1;~Tong_Zhang8;~Wenting_Zhao2;~Zhen_Cui4;~Jian_Yang1", "gender": "M;M;;M;M", "homepage": "https://github.com/XuBlack;https://vgg-ai.cn/teachers/ZhangTong/;;http://aip.seu.edu.cn/zcui/;", "dblp": ";07/4227-21;41/10049-1.html;59/8491-1;y/JianYang3.html", "google_scholar": "Kbk4OtwAAAAJ;;;ChRyl3kAAAAJ;https://scholar.google.com.hk/citations?user=6CIDtZQAAAAJ", "orcid": ";0000-0001-6212-4891;0000-0003-0313-5149;;", "linkedin": ";;;;", "or_profile": "~Gongping_Xu1;~Tong_Zhang8;~Wenting_Zhao2;~Zhen_Cui4;~Jian_Yang1", "aff": "Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing University of Science and Technology", "aff_domain": "njust.edu.cn;njust.edu.cn;njust.edu.cn;njust.edu.cn;njust.edu.cn", "position": "MS student;Associate Professor;PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nxu2023crossprotein,\ntitle={Cross-Protein Wasserstein Transformer for Protein-Protein Interactions},\nauthor={Gongping Xu and Tong Zhang and Wenting Zhao and Zhen Cui and Jian Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=7HgnhMmbIB}\n}", "github": "", "project": "", "reviewers": "sndu;1voW;HBkA;jcwb", "site": "https://openreview.net/forum?id=7HgnhMmbIB", "pdf_size": 821617, "recommendation": "3;3;3;5", "confidence": "4;4;4;4", "correctness": "2;2;3;4", "technical_novelty": "2;1;2;2", "empirical_novelty": "3;1;2;0", "wc_summary_paper": "40;108;66;156", "wc_strength_and_weaknesses": "43;96;53;201", "wc_clarity_quality_novelty_and_reproducibility": "89;48;104;22", "wc_summary_review": "82;246;62;17", "wc_review": "254;498;285;396", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 92.5, 43.96305266925854 ], "wc_strength_and_weaknesses_avg": [ 98.25, 62.57545445300418 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.75, 32.529794035622174 ], "wc_summary_review_avg": [ 101.75, 86.54586934106099 ], "wc_review_avg": [ 358.25, 96.42192437407584 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3qn8deUGB6UJ:scholar.google.com/&scioq=Cross-Protein+Wasserstein+Transformer+for+Protein-Protein+Interactions&hl=en&as_sdt=0,34", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Nanjing University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.nust.edu.cn/", "aff_unique_abbr": "NUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Domain Generalisation via Domain Adaptation: An Adversarial Fourier Amplitude Approach", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11223", "id": "7IG0wsTND7w", "poster": "/media/PosterPDFs/ICLR%202023/11223.png?t=1680796367.4392729", "openreview": "https://openreview.net/forum?id=7IG0wsTND7w", "slides": "https://iclr.cc/virtual/2023/poster/11223", "video": "https://iclr.cc/virtual/2023/poster/11223", "author_site": "Minyoung Kim, Da Li, Timothy Hospedales", "tldr": "We tackle the domain generalisation problem by posing it as a domain adaptation task where we adversarially synthesise the worst-case target domain via Fourier amplitude generation.", "abstract": "We tackle the domain generalisation (DG) problem by posing it as a domain adaptation (DA) task where we adversarially synthesise the worst-case `target' domain and adapt a model to that worst-case domain, thereby improving the model\u2019s robustness. To synthesise data that is challenging yet semantics-preserving, we generate Fourier amplitude images and combine them with source domain phase images, exploiting the widely believed conjecture from signal processing that amplitude spectra mainly determines image style, while phase data mainly captures image semantics. To synthesise a worst-case domain for adaptation, we train the classifier and the amplitude generator adversarially. Specifically, we exploit the maximum classifier discrepancy (MCD) principle from DA that relates the target domain performance to the discrepancy of classifiers in the model hypothesis space. By Bayesian hypothesis modeling, we express the model hypothesis space effectively as a posterior distribution over classifiers given the source domains, making adversarial MCD minimisation feasible. On the DomainBed benchmark including the large-scale DomainNet dataset, the proposed approach yields significantly improved domain generalisation performance over the state-of-the-art.", "keywords": "Domain generalisation;Domain adaptation;Fourier analysis", "primary_area": "", "supplementary_material": "", "author": "Minyoung Kim;Da Li;Timothy Hospedales", "authorids": "~Minyoung_Kim2;~Da_Li3;~Timothy_Hospedales1", "gender": "M;M;M", "homepage": "https://sites.google.com/site/mikim21/;https://dali-dl.github.io/;http://homepages.inf.ed.ac.uk/thospeda/", "dblp": ";43/4804-1;32/3545", "google_scholar": ";RPvaE3oAAAAJ;https://scholar.google.fr/citations?user=nHhtvqkAAAAJ", "orcid": ";0000-0002-2101-2989;0000-0003-4867-7486", "linkedin": ";;timothyhospedales/", "or_profile": "~Minyoung_Kim2;~Da_Li3;~Timothy_Hospedales1", "aff": "Samsung AI Center, Cambridge, UK;University of Edinburgh;Samsung AI Research Centre", "aff_domain": "samsung.com;ed.ac.uk;samsung.com", "position": "Senior Researcher;Visiting Scholar;Principal Researcher", "bibtex": "@inproceedings{\nkim2023domain,\ntitle={Domain Generalisation via Domain Adaptation: An Adversarial Fourier Amplitude Approach},\nauthor={Minyoung Kim and Da Li and Timothy Hospedales},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7IG0wsTND7w}\n}", "github": "", "project": "", "reviewers": "pXLL;71uu;5QDe;M2ax", "pdf_size": 8398724, "recommendation": "5;5;6;6", "confidence": "3;3;5;3", "correctness": "3;3;4;3", "technical_novelty": "2;3;3;2", "empirical_novelty": "3;3;0;3", "wc_summary_paper": "66;77;56;75", "wc_strength_and_weaknesses": "144;119;245;105", "wc_clarity_quality_novelty_and_reproducibility": "67;42;17;118", "wc_summary_review": "43;119;44;50", "wc_review": "320;357;362;348", "wc_reply_reviewers": "0;137;0;0", "wc_reply_authors": "552;489;460;188", "reply_reviewers": "0;1;0;0", "reply_authors": "1;3;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 68.5, 8.32165848854662 ], "wc_strength_and_weaknesses_avg": [ 153.25, 54.78309502027062 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.0, 37.3563916887057 ], "wc_summary_review_avg": [ 64.0, 31.866910738256383 ], "wc_review_avg": [ 346.75, 16.23845743905498 ], "wc_reply_reviewers_avg": [ 34.25, 59.322740159234044 ], "wc_reply_authors_avg": [ 422.25, 139.27378611928376 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14264906480525916640&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=7IG0wsTND7w", "email": "samsung.com;ed.ac.uk;samsung.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Samsung;University of Edinburgh", "aff_unique_dep": "AI Center;", "aff_unique_url": "https://www.samsung.com/global/research-innovation/ai-research/;https://www.ed.ac.uk", "aff_unique_abbr": "SAC;Edinburgh", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;South Korea" }, { "id": "7IMneQViz6h", "title": "Mutual Information-guided Knowledge Transfer for Open-World Semi-Supervised Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We tackle the open-world semi-supervised learning problem, aiming to cluster novel classes and classify seen classes in unlabeled data based on labeled data from seen classes. The main challenge is to transfer knowledge contained in seen class data to unseen ones. Previous methods mostly transfer knowledge through sharing representation space. However, they learn the seen and unseen classes classifier in a disjoint manner, neglecting the underlying relation between predictions on the seen and unseen classes. Therefore, the learned representations and classifiers are less effective for clustering unseen classes. In this paper, we propose a novel and general method to transfer knowledge between seen and unseen classes. Our insight is to utilize mutual information to measure the generic statistical dependency between seen and unseen classes in the classifier output space, which couple the learning of classifier and promote transferring knowledge between two data sets. To validate the effectiveness and generalization of our method, we conduct extensive experiments on several benchmarks, including CIFAR10/100, Imagenet100, Oxford-IIIT Pet and FGVC-Aicraft datasets. Our results show that the proposed method outperforms previous SOTA by a significant margin on almost all benchmarks.\n", "keywords": "Novel Class Discovery;Open-world Semi-supervised learning;Knowledge Transfer;Mutual Information", "primary_area": "", "supplementary_material": "", "author": "Chuyu Zhang;Ruijie Xu;ChuanYang Hu;Xuming He", "authorids": "~Chuyu_Zhang1;~Ruijie_Xu1;~ChuanYang_Hu1;~Xuming_He3", "gender": "M;F;M;M", "homepage": ";https://github.com/RikkiXu;https://plus.sist.shanghaitech.edu.cn/author/chuanyang-hu/;https://faculty.sist.shanghaitech.edu.cn/faculty/hexm/index.html", "dblp": "270/8658;;;03/4230", "google_scholar": "V7IktkcAAAAJ;;;0KyeZ2QAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Chuyu_Zhang1;~Ruijie_Xu1;~ChuanYang_Hu1;~Xuming_He3", "aff": "ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "position": "PhD student;MS student;MS student;Associate Professor", "bibtex": "@misc{\nzhang2023mutual,\ntitle={Mutual Information-guided Knowledge Transfer for Open-World Semi-Supervised Learning},\nauthor={Chuyu Zhang and Ruijie Xu and ChuanYang Hu and Xuming He},\nyear={2023},\nurl={https://openreview.net/forum?id=7IMneQViz6h}\n}", "github": "", "project": "", "reviewers": "Gwxx;NA5A;UVLc;m3Ej", "site": "https://openreview.net/forum?id=7IMneQViz6h", "pdf_size": 3188612, "recommendation": "3;5;6;6", "confidence": "4;4;4;3", "correctness": "2;3;3;3", "technical_novelty": "1;3;2;3", "empirical_novelty": "1;3;1;3", "wc_summary_paper": "62;45;66;137", "wc_strength_and_weaknesses": "70;184;228;158", "wc_clarity_quality_novelty_and_reproducibility": "345;23;27;120", "wc_summary_review": "42;49;59;51", "wc_review": "519;301;380;466", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 1.0 ], "wc_summary_paper_avg": [ 77.5, 35.245567097154215 ], "wc_strength_and_weaknesses_avg": [ 160.0, 57.67148342118486 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 128.75, 130.74474176807266 ], "wc_summary_review_avg": [ 50.25, 6.057020719792859 ], "wc_review_avg": [ 416.5, 83.1098670435707 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.4714045207910316, "corr_recommendation_correctness": 0.9428090415820632, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gcZ7kc5J7rYJ:scholar.google.com/&scioq=Mutual+Information-guided+Knowledge+Transfer+for+Open-World+Semi-Supervised+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "ShanghaiTech University", "aff_unique_dep": "", "aff_unique_url": "https://www.shanghaitech.edu.cn", "aff_unique_abbr": "ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning Structured Representations by Embedding Class Hierarchy", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10936", "id": "7J-30ilaUZM", "poster": "/media/PosterPDFs/ICLR%202023/10936.png?t=1681050034.9021764", "openreview": "https://openreview.net/forum?id=7J-30ilaUZM", "slides": "https://iclr.cc/virtual/2023/poster/10936", "video": "https://iclr.cc/virtual/2023/poster/10936", "author_site": "Siqi Zeng, Remi Tachet des Combes, Han Zhao", "tldr": "We propose to learn structured representations that preserve the hierarchy between label classes by using CPCC as a regularizer.", "abstract": "Existing models for learning representations in supervised classification problems are permutation invariant with respect to class labels. However, structured knowledge about the classes, such as hierarchical label structures, widely exists in many real-world datasets, e.g., the ImageNet and CIFAR benchmarks. How to learn representations that can preserve such structures among the classes remains an open problem. To approach this problem, given a tree of class hierarchy, we first define a tree metric between any pair of nodes in the tree to be the length of the shortest path connecting them. We then provide a method to learn the hierarchical relationship of class labels by approximately embedding the tree metric in the Euclidean space of features. More concretely, during supervised training, we propose to use the Cophenetic Correlation Coefficient (CPCC) as a regularizer for the cross-entropy loss to correlate the tree metric of classes and the Euclidean distance in the class-conditioned representations. Our proposed regularizer is computationally lightweight and easy to implement. Empirically, we demonstrate that this approach can help to learn more interpretable representations due to the preservation of the tree metric, and leads to better in-distribution generalization as well as under sub-population shifts over six real-world datasets.", "keywords": "structured representations;representation learning;tree embedding", "primary_area": "", "supplementary_material": "/attachment/7b9827cff771424eba7daa3c2921e26d8aaa1383.zip", "author": "Siqi Zeng;Remi Tachet des Combes;Han Zhao", "authorids": "~Siqi_Zeng1;~Remi_Tachet_des_Combes1;~Han_Zhao1", "gender": ";M;M", "homepage": "https://cindy2000sh.github.io/;;https://hanzhaoml.github.io/", "dblp": "135/7166;146/0392;03/3520-2", "google_scholar": "5If-3u4AAAAJ;1MZF70cAAAAJ;x942ipYAAAAJ", "orcid": "0009-0008-2042-0754;;0000-0002-8579-1600", "linkedin": "siqi-zeng-91b067175/;;", "or_profile": "~Siqi_Zeng1;~Remi_Tachet_des_Combes1;~Han_Zhao1", "aff": "Carnegie Mellon University;Microsoft Research;University of Illinois, Urbana Champaign", "aff_domain": "andrew.cmu.edu;microsoft.com;illinois.edu", "position": "Undergrad student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nzeng2023learning,\ntitle={Learning Structured Representations by Embedding Class Hierarchy},\nauthor={Siqi Zeng and Remi Tachet des Combes and Han Zhao},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7J-30ilaUZM}\n}", "github": "", "project": "", "reviewers": "EJjb;SKmB;NCZE;siuS", "pdf_size": 1475475, "recommendation": "5;5;6;8", "confidence": "4;4;4;3", "correctness": "3;3;3;4", "technical_novelty": "2;2;2;4", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "64;126;124;88", "wc_strength_and_weaknesses": "475;219;293;307", "wc_clarity_quality_novelty_and_reproducibility": "11;35;45;50", "wc_summary_review": "42;22;123;50", "wc_review": "592;402;585;495", "wc_reply_reviewers": "0;0;24;40", "wc_reply_authors": "1447;857;548;368", "reply_reviewers": "0;0;1;2", "reply_authors": "3;3;1;1", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 100.5, 25.937424698685874 ], "wc_strength_and_weaknesses_avg": [ 323.5, 93.64160400164022 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.25, 15.006248698458919 ], "wc_summary_review_avg": [ 59.25, 38.192767639960316 ], "wc_review_avg": [ 518.5, 77.37732225917358 ], "wc_reply_reviewers_avg": [ 16.0, 16.97056274847714 ], "wc_reply_authors_avg": [ 805.0, 409.8432627236905 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9428090415820632, "corr_recommendation_correctness": 0.9428090415820632, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14122574272819658545&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=7J-30ilaUZM", "email": "andrew.cmu.edu;microsoft.com;illinois.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Carnegie Mellon University;Microsoft;University of Illinois Urbana-Champaign", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "https://www.cmu.edu;https://www.microsoft.com/en-us/research;https://illinois.edu", "aff_unique_abbr": "CMU;MSR;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Fast and Precise: Adjusting Planning Horizon with Adaptive Subgoal Search", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11591", "id": "7JsGYvjE88d", "poster": "/media/PosterPDFs/ICLR%202023/11591.png?t=1682632479.1982944", "openreview": "https://openreview.net/forum?id=7JsGYvjE88d", "slides": "https://iclr.cc/virtual/2023/poster/11591", "video": "https://iclr.cc/virtual/2023/poster/11591", "author_site": "Micha\u0142 Zawalski, Micha\u0142 Tyrolski, Konrad Czechowski, Tomasz Odrzyg\u00f3\u017ad\u017a, Damian Stachura, Piotr Pi\u0119kos, Yuhuai Wu, \u0141ukasz Kuci\u0144ski, Piotr Mi\u0142o\u015b", "tldr": "We propose Adaptive Subgoal Search (AdaSubS), a search algorithm that adjusts the planning horizon to match the local complexity of the solved problems.", "abstract": "Complex reasoning problems contain states that vary in the computational cost required to determine the right action plan. To take advantage of this property, we propose Adaptive Subgoal Search (AdaSubS), a search method that adaptively adjusts the planning horizon. To this end, AdaSubS generates diverse sets of subgoals at different distances. A verification mechanism is employed to filter out unreachable subgoals swiftly, making it possible to focus on feasible further subgoals. In this way, AdaSubS benefits from the efficiency of planning with longer-term subgoals and the fine control with shorter-term ones, and thus scales well to difficult planning problems. We show that AdaSubS significantly surpasses hierarchical planning algorithms on three complex reasoning tasks: Sokoban, the Rubik\u2019s Cube, and the inequality-proving benchmark INT. ", "keywords": "search;adaptive horizon;verification;deep learning;hierarchical planning", "primary_area": "", "supplementary_material": "", "author": "Micha\u0142 Zawalski;Micha\u0142 Tyrolski;Konrad Czechowski;Tomasz Odrzyg\u00f3\u017ad\u017a;Damian Stachura;Piotr Pi\u0119kos;Yuhuai Wu;\u0141ukasz Kuci\u0144ski;Piotr Mi\u0142o\u015b", "authorids": "~Micha\u0142_Zawalski1;~Micha\u0142_Tyrolski1;~Konrad_Czechowski1;~Tomasz_Odrzyg\u00f3\u017ad\u017a1;~Damian_Stachura1;~Piotr_Pi\u0119kos2;~Yuhuai_Wu1;~\u0141ukasz_Kuci\u0144ski1;~Piotr_Mi\u0142o\u015b1", "gender": "M;Not Specified;;M;M;M;M;M;", "homepage": "https://michalzawalski.github.io/;;https://www.linkedin.com/in/konrad-czechowski-723bb6150/;;;https://piotrpiekos.github.io;http://www.cs.toronto.edu/~ywu/;https://sites.google.com/view/lukaszkucinski;", "dblp": "300/4651.html;304/8577;237/9612;;;;;250/9699;208/0989.html", "google_scholar": "ljbCuVkAAAAJ;https://scholar.google.com/citations?view_op=list_works;ni7tRv4AAAAJ;J2ERJ7cAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.ca/citations?user=bOQGfFIAAAAJ;l6dK-VUAAAAJ;Se68XecAAAAJ", "orcid": "0000-0002-4063-2411;;;;;;;0000-0002-5617-8129;", "linkedin": "micha\u0142-zawalski;mtyrolski/;;tomasz-odrzygozdz/;damian-stachura-210bb3119/;;;https://linkedin.com/in/lukasz-kucinski;piotr-milos-4b02151/", "or_profile": "~Micha\u0142_Zawalski1;~Micha\u0142_Tyrolski1;~Konrad_Czechowski1;~Tomasz_Odrzyg\u00f3\u017ad\u017a1;~Damian_Stachura1;~Piotr_Pi\u0119kos2;~Yuhuai_Wu1;~\u0141ukasz_Kuci\u0144ski1;~Piotr_Mi\u0142o\u015b1", "aff": "University of Warsaw;University of Warsaw;University of Warsaw;IDEAS NCBR;Jagiellonian University Cracow;King Abdullah University of Science and Technology;Stanford University;Institute of Mathematics Polish Academy of Sciences;IDEAS NCBR", "aff_domain": "uw.edu.pl;mimuw.edu.pl;mimuw.edu.pl;ideas-ncbr.pl;uj.edu.pl;kaust.edu.sa;stanford.edu;impan.pl;ideas-ncbr.pl", "position": "PhD student;MS student;PhD student;Postdoc;PhD student;PhD student;Postdoc;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nzawalski2023fast,\ntitle={Fast and Precise: Adjusting Planning Horizon with Adaptive Subgoal Search},\nauthor={Micha{\\l} Zawalski and Micha{\\l} Tyrolski and Konrad Czechowski and Tomasz Odrzyg{\\'o}{\\'z}d{\\'z} and Damian Stachura and Piotr Pi{\\k{e}}kos and Yuhuai Wu and {\\L}ukasz Kuci{\\'n}ski and Piotr Mi{\\l}o{\\'s}},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7JsGYvjE88d}\n}", "github": "", "project": "", "reviewers": "1BT4;1Tzk;EUhR;Kkdv", "pdf_size": 1296921, "recommendation": "8;8;8;8", "confidence": "4;4;4;5", "correctness": "4;4;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;2;4", "wc_summary_paper": "119;102;63;44", "wc_strength_and_weaknesses": "469;140;297;1293", "wc_clarity_quality_novelty_and_reproducibility": "100;45;60;888", "wc_summary_review": "52;28;117;36", "wc_review": "740;315;537;2261", "wc_reply_reviewers": "0;30;0;194", "wc_reply_authors": "926;200;837;1854", "reply_reviewers": "0;1;0;2", "reply_authors": "3;1;2;4", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 82.0, 29.891470355270247 ], "wc_strength_and_weaknesses_avg": [ 549.75, 444.6118391361166 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 273.25, 355.49498941616605 ], "wc_summary_review_avg": [ 58.25, 35.00267846894006 ], "wc_review_avg": [ 963.25, 764.1846553680596 ], "wc_reply_reviewers_avg": [ 56.0, 80.61017305526642 ], "wc_reply_authors_avg": [ 954.25, 590.1247219868017 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=671152867223674120&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=7JsGYvjE88d", "email": "uw.edu.pl;mimuw.edu.pl;mimuw.edu.pl;ideas-ncbr.pl;uj.edu.pl;kaust.edu.sa;stanford.edu;impan.pl;ideas-ncbr.pl", "author_num": 9, "aff_unique_index": "0;0;0;1;2;3;4;5;1", "aff_unique_norm": "University of Warsaw;Institute for Development, Economic Analysis, and Simulation (IDEAS);Jagiellonian University;King Abdullah University of Science and Technology;Stanford University;Polish Academy of Sciences", "aff_unique_dep": ";;;;;Institute of Mathematics", "aff_unique_url": "https://www.uw.edu.pl;https://www.ideas-ncbr.gov.pl;https://www.uj.edu.pl;https://www.kast.kau.edu.sa;https://www.stanford.edu;https://www.impan.pl/", "aff_unique_abbr": "UW;IDEAS;UJ;KAUST;Stanford;PAS", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Cracow;Stanford", "aff_country_unique_index": "0;0;0;0;0;1;2;0;0", "aff_country_unique": "Poland;Saudi Arabia;United States" }, { "id": "7KSeWGIOYM", "title": "Bootstrap Motion Forecasting With Self-Consistent Constraints", "track": "main", "status": "Reject", "tldr": "We introduce self-consistent constraints to improve the performance of motion forecasting in autonomous driving, which can be easily incorporated into other motion forecasting approaches.", "abstract": "We present a novel framework to bootstrap Motion forecasting with self-consistent constraints (MISC). The motion forecasting task aims at predicting future trajectories of vehicles by incorporating spatial and temporal information from the past. A key design of MISC is the proposed Dual Consistency Constraints that regularize the predicted trajectories under spatial and temporal perturbation during training. Also, to model the multi-modality in motion forecasting, we design a novel self-ensembling scheme to obtain accurate teacher targets to enforce the self-constraints with multi-modality supervision. With explicit constraints from multiple teacher targets, we observe a clear improvement in the prediction performance. Extensive experiments on the Argoverse motion forecasting benchmark show that MISC significantly outperforms the state-of-the-art methods. As the proposed strategies are general and can be easily incorporated into other motion forecasting approaches, we also demonstrate that our proposed scheme consistently improves the prediction performance of several existing methods.", "keywords": "Motion Forecasting;Autonomous Driving;Trajectory prediction", "primary_area": "", "supplementary_material": "", "author": "Maosheng Ye;jiamiao xu;xunnong xu;Tengfei Wang;Tongyi Cao;Qifeng Chen", "authorids": "~Maosheng_Ye1;jiamiaoxu@deeproute.ai;xunnongxu@deeproute.ai;~Tengfei_Wang1;~Tongyi_Cao1;~Qifeng_Chen1", "gender": "M;;;M;M;M", "homepage": ";;;https://tengfei-wang.github.io/;;http://cqf.io/", "dblp": "126/6604;;;142/3354-2;161/2748;117/4819", "google_scholar": "gg51qZYAAAAJ;;;HjpeWKcAAAAJ;;lLMX9hcAAAAJ", "orcid": "0000-0001-8470-685X;;;;;", "linkedin": ";;;;;", "or_profile": "~Maosheng_Ye1;jiamiaoxu@deeproute.ai;xunnongxu@deeproute.ai;~Tengfei_Wang1;~Tongyi_Cao1;~Qifeng_Chen1", "aff": "DeepRoute.Ai;;;Hong Kong University of Science and Technology;Department of Computer Science, University of Massachusetts, Amherst;Hong Kong University of Science and Technology", "aff_domain": "deeproute.ai;;;ust.hk;cs.umass.edu;hkust.edu", "position": "Intern;;;PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nye2023bootstrap,\ntitle={Bootstrap Motion Forecasting With Self-Consistent Constraints},\nauthor={Maosheng Ye and jiamiao xu and xunnong xu and Tengfei Wang and Tongyi Cao and Qifeng Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=7KSeWGIOYM}\n}", "github": "", "project": "", "reviewers": "xHGP;mCiK;5FaF;MgTW", "site": "https://openreview.net/forum?id=7KSeWGIOYM", "pdf_size": 6669094, "recommendation": "5;5;6;8", "confidence": "4;4;5;3", "correctness": "3;3;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "87;228;53;90", "wc_strength_and_weaknesses": "223;142;218;356", "wc_clarity_quality_novelty_and_reproducibility": "207;131;25;91", "wc_summary_review": "26;126;187;107", "wc_review": "543;627;483;644", "wc_reply_reviewers": "128;157;98;374", "wc_reply_authors": "724;569;638;390", "reply_reviewers": "1;2;1;3", "reply_authors": "2;4;2;4", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 114.5, 67.12115910798919 ], "wc_strength_and_weaknesses_avg": [ 234.75, 77.01095701262255 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 113.5, 65.92988700126826 ], "wc_summary_review_avg": [ 111.5, 57.53477209479499 ], "wc_review_avg": [ 574.25, 65.09752299435056 ], "wc_reply_reviewers_avg": [ 189.25, 108.68618817494705 ], "wc_reply_authors_avg": [ 580.25, 122.80141489412897 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14398077860603192038&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "DeepRoute Ai;Hong Kong University of Science and Technology;University of Massachusetts Amherst", "aff_unique_dep": ";;Department of Computer Science", "aff_unique_url": ";https://www.ust.hk;https://www.umass.edu", "aff_unique_abbr": "DeepRoute Ai;HKUST;UMass Amherst", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Hong Kong SAR;Amherst", "aff_country_unique_index": "1;2;1", "aff_country_unique": ";China;United States" }, { "id": "7KdrFjpmJf7", "title": "Learning Sampling Policy to Achieve Fewer Queries for Zeroth-Order Optimization", "track": "main", "status": "Withdraw", "tldr": "TL", "abstract": "Zeroth-order (ZO) methods, which use the finite difference of two function evaluations (also called ZO gradient) to approximate first-order gradient, have attracted much attention recently in machine learning because of its broad applications.\nThe accurateness of ZO gradient highly depends on how many finite differences are averaged, which are intrinsically determined by the number of perturbations randomly drawn from a distribution. \nExisting ZO methods try to learn a data-driven distribution for sampling the perturbations to improve the efficiency of ZO optimization (ZOO) algorithms. \nIn this paper, we explore a new and parallel direction, i.e. , learn an optimal sampling policy instead of using totally random strategy to generate perturbations based on the techniques of reinforcement\nlearning (RL), which makes it possible to approximate the gradient with only two function evaluations. Specifically, we first formulate the problem of learning a sampling policy as a Markov decision process. Then, we propose our ZO-RL algorithm, \\textit{i.e.}, using deep deterministic policy gradient, an actor-critic RL algorithm to learn a sampling policy which can guide the generation of perturbed vectors in getting ZO gradients as accurate as possible. Importantly, the existing ZOO algorithms of learning a distribution can be plugged in to improve the exploration of ZO-RL.\nExperimental results with different ZO estimators show that our ZO-RL algorithm can effectively reduce the query complexity of ZOO algorithms and converge faster than existing ZOO algorithms especially in the later stage of the optimization process.", "keywords": "Zeroth-order optimization;reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Zhou Zhai;Xiang Li;Heng Huang;Bin Gu", "authorids": "~Zhou_Zhai2;~Xiang_Li1;~Heng_Huang1;~Bin_Gu1", "gender": "M;M;M;M", "homepage": "https://www.linkedin.com/in/xiang-li-2703005a/;https://www.cs.umd.edu/~heng/;https://mbzuai.ac.ae/study/faculty/bin-gu/;", "dblp": "40/1491-12;03/281;29/1758-1;https://dblp.uni-trier.de/pers/hd/z/Zhai:Zhou", "google_scholar": ";4OqLaDwAAAAJ;Vo8OgCgAAAAJ;", "orcid": ";;0000-0001-6049-1815;", "linkedin": ";;;", "or_profile": "~Xiang_Li1;~Heng_Huang1;~Bin_Gu1;~Zhou_Zhaizhai1", "aff": ";University of Pittsburgh;Mohamed bin Zayed University of Artificial Intelligence;", "aff_domain": ";pitt.edu;mbzuai.ac.ae;", "position": ";Full Professor;Assistant Professor;", "bibtex": "@misc{\nzhai2023learning,\ntitle={Learning Sampling Policy to Achieve Fewer Queries for Zeroth-Order Optimization},\nauthor={Zhou Zhai and Xiang Li and Heng Huang and Bin Gu},\nyear={2023},\nurl={https://openreview.net/forum?id=7KdrFjpmJf7}\n}", "github": "", "project": "", "reviewers": "BMpH;ikYZ;cBx2;E5F1", "site": "https://openreview.net/forum?id=7KdrFjpmJf7", "pdf_size": 16639562, "recommendation": "1;3;5;6", "confidence": "4;3;4;3", "correctness": "1;2;3;3", "technical_novelty": "1;2;2;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "119;61;96;92", "wc_strength_and_weaknesses": "442;415;87;178", "wc_clarity_quality_novelty_and_reproducibility": "28;81;261;1", "wc_summary_review": "70;36;18;2", "wc_review": "659;593;462;273", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.75, 1.920286436967152 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 92.0, 20.65187642806338 ], "wc_strength_and_weaknesses_avg": [ 280.5, 151.75720740709482 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 92.75, 101.31232649584156 ], "wc_summary_review_avg": [ 31.5, 25.273503912200223 ], "wc_review_avg": [ 496.75, 147.360739343965 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.39056673294247163, "corr_recommendation_correctness": 0.9813358399735743, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6p-YQpxrkTcJ:scholar.google.com/&scioq=Learning+Sampling+Policy+to+Achieve+Fewer+Queries+for+Zeroth-Order+Optimization&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of Pittsburgh;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.pitt.edu;https://mbzuai.ac.ae", "aff_unique_abbr": "Pitt;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;United Arab Emirates" }, { "id": "7Kf5_7-b7q", "title": "Comparing Auxiliary Tasks for Learning Representations for Reinforcement Learning", "track": "main", "status": "Withdraw", "tldr": "This paper empirically compares common auxiliary tasks used to learn representations for reinforcement learning (RL) across diverse continuous control environments and RL algorithms.", "abstract": "Learning state representations has gained steady popularity in reinforcement learning (RL) due to its potential to improve both sample efficiency and returns on many environments. A straightforward and efficient method is to generate representations with a distinct neural network trained on an auxiliary task, i.e. a task that differs from the actual RL task. While a whole range of such auxiliary tasks has been proposed in the literature, a comparison on typical continuous control benchmark environments is computationally expensive and has, to the best of our knowledge, not been performed before. This paper presents such a comparison of common auxiliary tasks, based on hundreds of agents trained with state-of-the-art off-policy RL algorithms. We compare possible improvements in both sample efficiency and returns for environments ranging from simple pendulum to a complex simulated robotics task. Our findings show that representation learning with auxiliary tasks is beneficial for environments of higher dimension and complexity, and that learning environment dynamics is preferable to predicting rewards. We believe these insights will enable other researchers to make more informed decisions on how to utilize representation learning for their specific problem.", "keywords": "Reinforcement learning;Representation learning;Auxiliary task;Comparison", "primary_area": "", "supplementary_material": "", "author": "Moritz Lange;Noah Krystiniak;Raphael Engelhardt;Wolfgang Konen;Laurenz Wiskott", "authorids": "~Moritz_Lange1;~Noah_Krystiniak1;~Raphael_Engelhardt1;~Wolfgang_Konen1;~Laurenz_Wiskott1", "gender": ";;M;M;M", "homepage": ";https://github.com/NoKryst13;https://www.th-koeln.de/personen/raphael.engelhardt/;https://blogs.gm.fh-koeln.de/konen/en/;https://www.ini.rub.de/the_institute/people/laurenz-wiskott/", "dblp": ";;342/0022;;61/4046", "google_scholar": "RP42Zj4AAAAJ;https://scholar.google.com/citations?hl=de;https://scholar.google.de/citations?user=0R45IFQAAAAJ;https://scholar.google.de/citations?user=XzM5OZUAAAAJ;Uk8Le0YAAAAJ", "orcid": "0000-0001-7109-7813;;0000-0003-1463-2706;;0000-0001-6237-740X", "linkedin": ";;;;", "or_profile": "~Moritz_Lange1;~Noah_Krystiniak1;~Raphael_Engelhardt1;~Wolfgang_Konen1;~Laurenz_Wiskott1", "aff": "INI/RUB, Ruhr-Universt\u00e4t Bochum;;Faculty of Computer Science and Engineering Science;TH K\u00f6ln;Ruhr-Universit\u00e4t Bochum", "aff_domain": "ini.rub.de;;th-koeln.de;th-koeln.de;ruhr-uni-bochum.de", "position": "PhD student;;PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nlange2023comparing,\ntitle={Comparing Auxiliary Tasks for Learning Representations for Reinforcement Learning},\nauthor={Moritz Lange and Noah Krystiniak and Raphael Engelhardt and Wolfgang Konen and Laurenz Wiskott},\nyear={2023},\nurl={https://openreview.net/forum?id=7Kf5_7-b7q}\n}", "github": "", "project": "", "reviewers": "CyMB;o8CW;vAXy;8ZY1", "site": "https://openreview.net/forum?id=7Kf5_7-b7q", "pdf_size": 3095984, "recommendation": "3;3;3;3", "confidence": "3;4;4;5", "correctness": "3;3;4;2", "technical_novelty": "2;1;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "171;97;70;61", "wc_strength_and_weaknesses": "765;479;206;763", "wc_clarity_quality_novelty_and_reproducibility": "45;84;15;33", "wc_summary_review": "46;46;51;92", "wc_review": "1027;706;342;949", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 99.75, 43.21675022488387 ], "wc_strength_and_weaknesses_avg": [ 553.25, 231.80204377873807 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.25, 25.31180554602931 ], "wc_summary_review_avg": [ 58.75, 19.30511590226798 ], "wc_review_avg": [ 756.0, 266.73301257999543 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:OIKCwMQ_6F8J:scholar.google.com/&scioq=Comparing+Auxiliary+Tasks+for+Learning+Representations+for+Reinforcement+Learning&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Ruhr-Universit\u00e4t Bochum;Faculty of Computer Science and Engineering Science;TH K\u00f6ln", "aff_unique_dep": "INI;Computer Science and Engineering Science;", "aff_unique_url": "https://www.ruhr-uni-bochum.de;;https://www.th-koeln.de", "aff_unique_abbr": "RUB;;TH K\u00f6ln", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany;" }, { "title": "$\\rm A^2Q$: Aggregation-Aware Quantization for Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11655", "id": "7L2mgi0TNEP", "poster": "/media/PosterPDFs/ICLR%202023/11655.png?t=1680841903.3493876", "openreview": "https://openreview.net/forum?id=7L2mgi0TNEP", "slides": "https://iclr.cc/virtual/2023/poster/11655", "video": "https://iclr.cc/virtual/2023/poster/11655", "author_site": "Zeyu Zhu, Fanrong Li, Zitao Mo, Qinghao Hu, Gang Li, Zejian Liu, Xiaoyao Liang, Jian Cheng", "tldr": "We propose an Aggregation-Aware mixed-precision Quantization method that fully utilizes the property of GNNs, achieving up to $2\\times$ speedup and $11.4\\%$ accuracy improvement compared to the state-of-the-art quantization method on GNNs.", "abstract": "As graph data size increases, the vast latency and memory consumption during inference pose a significant challenge to the real-world deployment of Graph Neural Networks (GNNs). While quantization is a powerful approach to reducing GNNs complexity, most previous works on GNNs quantization fail to exploit the unique characteristics of GNNs, suffering from severe accuracy degradation. Through an in-depth analysis of the topology of GNNs, we observe that the topology of the graph leads to significant differences between nodes, and most of the nodes in a graph appear to have a small aggregation value. Motivated by this, in this paper, we propose the Aggregation-Aware mixed-precision Quantization ($\\rm A^2Q$) for GNNs, where an appropriate bitwidth is automatically learned and assigned to each node in the graph. To mitigate the vanishing gradient problem caused by sparse connections between nodes, we propose a Local Gradient method to serve the quantization error of the node features as the supervision during training. We also develop a Nearest Neighbor Strategy to deal with the generalization on unseen graphs. Extensive experiments on eight public node-level and graph-level datasets demonstrate the generality and robustness of our proposed method. Compared to the FP32 models, our method can achieve up to $18.8\\times$ (i.e., 1.70bits) compression ratio with negligible accuracy degradation. Moreover, compared to the state-of-the-art quantization method, our method can achieve up to $11.4\\%$ and $9.5\\%$ accuracy improvements on the node-level and graph-level tasks, respectively, and up to $2\\times$ speedup on a dedicated hardware accelerator.", "keywords": "Graph Neural Networks;MPNN framework;Mixed-precision;Quantization", "primary_area": "", "supplementary_material": "/attachment/886c75fadfeb571e65d89131fb2f3a224b84259f.zip", "author": "Zeyu Zhu;Fanrong Li;Zitao Mo;Qinghao Hu;Gang Li;Zejian Liu;Xiaoyao Liang;Jian Cheng", "authorids": "~Zeyu_Zhu1;~Fanrong_Li1;~Zitao_Mo1;~Qinghao_Hu2;~Gang_Li18;~Zejian_Liu1;~Xiaoyao_Liang1;~Jian_Cheng7", "gender": "M;M;M;M;;M;M;M", "homepage": "https://github.com/weihai-98;;;;;;http://www.cs.sjtu.edu.cn/en/PeopleDetail.aspx?id=160;https://people.ucas.ac.cn/~chengjian?language=en", "dblp": "192/2177;218/1183;249/5473;;;https://dblp.uni-trier.de/pers/hd/l/Liu:Zejian;;14/6145-1", "google_scholar": ";;;fjuWXroAAAAJ;;;https://scholar.google.com.tw/citations?user=OIDtL6QAAAAJ;ZGCIUJ8AAAAJ", "orcid": ";;;0000-0003-0422-5509;;;;0000-0003-1289-2758", "linkedin": ";;;;;;;", "or_profile": "~Zeyu_Zhu1;~Fanrong_Li1;~Zitao_Mo1;~Qinghao_Hu2;~Gang_Li18;~Zejian_Liu1;~Xiaoyao_Liang1;~Jian_Cheng7", "aff": "Institute of Automation, Chinese Academy of Sciences;NVIDIA;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;;Institute of automation, Chinese academy of science;Shanghai Jiaotong University;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;nvidia.com;ia.ac.cn;ia.ac.cn;;ia.ac.cn;;ia.ac.cn", "position": "PhD student;Senior architect;Engineer;Associate Professor;;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhu2023rm,\ntitle={\\${\\textbackslash}rm A{\\textasciicircum}2Q\\$: Aggregation-Aware Quantization for Graph Neural Networks},\nauthor={Zeyu Zhu and Fanrong Li and Zitao Mo and Qinghao Hu and Gang Li and Zejian Liu and Xiaoyao Liang and Jian Cheng},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7L2mgi0TNEP}\n}", "github": "", "project": "", "reviewers": "eih6;MZPa;wtuK;WF4b;ZCCp", "pdf_size": 2390280, "recommendation": "6;6;8;8;8", "confidence": "3;2;5;4;3", "correctness": "3;3;4;4;4", "technical_novelty": "3;3;3;4;3", "empirical_novelty": "2;2;3;4;3", "wc_summary_paper": "80;60;137;100;54", "wc_strength_and_weaknesses": "581;211;151;292;208", "wc_clarity_quality_novelty_and_reproducibility": "41;18;70;25;25", "wc_summary_review": "32;43;200;32;38", "wc_review": "734;332;558;449;325", "wc_reply_reviewers": "437;0;0;0;39", "wc_reply_authors": "3731;1071;564;1266;1726", "reply_reviewers": "2;0;0;0;1", "reply_authors": "8;3;2;3;4", "recommendation_avg": [ 7.2, 0.9797958971132712 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "correctness_avg": [ 3.6, 0.4898979485566356 ], "technical_novelty_avg": [ 3.2, 0.39999999999999997 ], "empirical_novelty_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_paper_avg": [ 86.2, 30.109134826494103 ], "wc_strength_and_weaknesses_avg": [ 288.6, 152.94521895110026 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.8, 18.69117438792972 ], "wc_summary_review_avg": [ 69.0, 65.62926176637978 ], "wc_review_avg": [ 479.6, 153.29005186247412 ], "wc_reply_reviewers_avg": [ 95.2, 171.56619713684861 ], "wc_reply_authors_avg": [ 1671.6, 1095.088781788947 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 4.0, 2.0976176963403033 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.7205766921228921, "corr_recommendation_correctness": 1.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17779667518329714260&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=7L2mgi0TNEP", "email": "ia.ac.cn;nvidia.com;ia.ac.cn;ia.ac.cn;;ia.ac.cn;;ia.ac.cn", "author_num": 8, "aff_unique_index": "0;1;0;0;0;2;0", "aff_unique_norm": "Chinese Academy of Sciences;NVIDIA;Shanghai Jiao Tong University", "aff_unique_dep": "Institute of Automation;NVIDIA Corporation;", "aff_unique_url": "http://www.ia.cas.cn;https://www.nvidia.com;https://www.sjtu.edu.cn", "aff_unique_abbr": "CAS;NVIDIA;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "id": "7MthJsb-nm", "title": "Fast Bayesian Updates for Deep Learning with a Use Case in Active Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Retraining deep neural networks when new data arrives is typically computationally expensive. Moreover, certain applications do not allow such costly retraining due to time or computational constraints. Fast Bayesian updates are a possible solution to this issue. Therefore, we propose a Bayesian update based on Monte-Carlo samples and a last-layer Laplace approximation for different Bayesian neural network types, i.e., Dropout, Ensemble, and Spectral Normalized Neural Gaussian Process (SNGP). In a large-scale evaluation study, we show that our updates combined with SNGP represent a fast and competitive alternative to costly retraining. As a use case, we combine the Bayesian updates for SNGP with different sequential query strategies to exemplarily demonstrate their improved selection performance in active learning.", "keywords": "Bayesian Neural Networks;Deep Learning;Active Learning", "primary_area": "", "supplementary_material": "/attachment/28e29cc28ab487df149de69d6f4306beb59c7d6d.zip", "author": "Marek Herde;Zhixin Huang;Denis Huseljic;Daniel Kottke;Stephan Vogt;Bernhard Sick", "authorids": "~Marek_Herde1;zhixin.huang@uni-kassel.de;~Denis_Huseljic1;~Daniel_Kottke1;stephan.vogt@uni-kassel.de;~Bernhard_Sick1", "gender": "M;;M;Not Specified;;M", "homepage": ";;https://www.uni-kassel.de/eecs/ies/denis-huseljic;;;", "dblp": ";;;;;21/4593", "google_scholar": "pwRDfMQAAAAJ;;https://scholar.google.de/citations?user=sFeKFT4AAAAJ;https://scholar.google.de/citations?user=NU--dFsAAAAJ;;https://scholar.google.de/citations?user=sGAKnroAAAAJ", "orcid": "0000-0003-4908-122X;;;;;", "linkedin": ";;;;;bernhard-sick-71915b76/?originalSubdomain=de", "or_profile": "~Marek_Herde1;zhixin.huang@uni-kassel.de;~Denis_Huseljic1;~Daniel_Kottke1;stephan.vogt@uni-kassel.de;~Bernhard_Sick1", "aff": "Universit\u00e4t Kassel;;Universit\u00e4t Kassel;Universit\u00e4t Kassel;;Universit\u00e4t Kassel", "aff_domain": "uni-kassel.de;;uni-kassel.de;uni-kassel.de;;uni-kassel.de", "position": "PhD student;;PhD student;Postdoc;;Full Professor", "bibtex": "@misc{\nherde2023fast,\ntitle={Fast Bayesian Updates for Deep Learning with a Use Case in Active Learning},\nauthor={Marek Herde and Zhixin Huang and Denis Huseljic and Daniel Kottke and Stephan Vogt and Bernhard Sick},\nyear={2023},\nurl={https://openreview.net/forum?id=7MthJsb-nm}\n}", "github": "", "project": "", "reviewers": "Wgk8;mJJL;gkXW;a5hN", "site": "https://openreview.net/forum?id=7MthJsb-nm", "pdf_size": 2151887, "recommendation": "3;5;5;6", "confidence": "4;3;3;4", "correctness": "2;3;3;3", "technical_novelty": "1;2;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "55;47;93;63", "wc_strength_and_weaknesses": "688;246;371;174", "wc_clarity_quality_novelty_and_reproducibility": "137;41;68;126", "wc_summary_review": "62;45;74;107", "wc_review": "942;379;606;470", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "614;111;278;268", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 64.5, 17.399712641305314 ], "wc_strength_and_weaknesses_avg": [ 369.75, 196.7973259472801 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 93.0, 39.85599076675927 ], "wc_summary_review_avg": [ 72.0, 22.68259244442751 ], "wc_review_avg": [ 599.25, 213.73976583686996 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 317.75, 183.41534150664714 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.2294157338705618, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17707027478230726873&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Kassel", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-kassel.de", "aff_unique_abbr": "UKassel", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "7NUTyhyQt9x", "title": "Current Anomaly Detectors are Anomalous: On Semantic Treatment of OOD Inputs", "track": "main", "status": "Reject", "tldr": "We propose that in-distribution should not be tied to the training distribution but to the distribution of semantic information in training data, and therefore OOD detection should be performed on the semantic information extracted from training data", "abstract": "Machine learning models have achieved impressive performance across different modalities. It is well known that these models are prone to making mistakes on out-of-distribution inputs. OOD detection has, therefore, gained a lot of attention recently. We observe that most existing detectors use the distribution estimated by the training dataset for OOD detection. This can be a serious impediment since faulty OOD detectors can potentially restrict utility of the model. Such detectors, tied to the bias in data collection process, can be impermeable to inputs lying outside the training distribution but with the same semantic information (e.g., class labels) as the training data. We argue that in-distribution should not be tied to just the training distribution but to the distribution of the semantic information contained in the training data. To support our argument, we perform OOD detection on semantic information extracted from the training data of MNIST and COCO datasets, and show that it not only reduces false alarms but also significantly improves detection of OOD inputs with spurious features from training data. ", "keywords": "machine learning;training distribution;out-of-distribution;OODs;detection;semantic information", "primary_area": "", "supplementary_material": "", "author": "Ramneet Kaur;Xiayan Ji;Souradeep Dutta;Yahan Yang;Michele Caprio;Elena Bernardis;Oleg Sokolsky;Insup Lee", "authorids": "~Ramneet_Kaur1;~Xiayan_Ji1;~Souradeep_Dutta2;~Yahan_Yang1;~Michele_Caprio1;~Elena_Bernardis2;~Oleg_Sokolsky1;~Insup_Lee1", "gender": "F;;M;F;M;;M;", "homepage": ";;https://sites.google.com/site/duttasouradeep39/;https://www.linkedin.com/in/yahan-yang-3637021a3/;https://mc6034.wixsite.com/caprio;;https://www.cis.upenn.edu/~sokolsky/;https://www.cis.upenn.edu/~lee/", "dblp": ";;;131/7592.html;322/9067;91/2101;31/4030;l/InsupLee.html", "google_scholar": "uE3rQk8AAAAJ;dChT9WwAAAAJ;;E5CWhTAAAAAJ;6rngqVgAAAAJ;https://scholar.google.com/citations?hl=en;J0SKz5YAAAAJ;qPlUgrgAAAAJ", "orcid": ";;;;0000-0002-7569-097X;;0000-0001-5282-0658;0000-0003-2672-1132", "linkedin": "ramneet-kaur-550b0b169/;xiayan-ji-b52b7117b/;;;michele-caprio-5866b162/;;;", "or_profile": "~Ramneet_Kaur1;~Xiayan_Ji1;~Souradeep_Dutta2;~Yahan_Yang1;~Michele_Caprio1;~Elena_Bernardis2;~Oleg_Sokolsky1;~Insup_Lee1", "aff": "University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;School of Engineering and Applied Science, University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": "seas.upenn.edu;seas.upenn.edu;upenn.edu;seas.upenn.edu;seas.upenn.edu;upenn.edu;upenn.edu;upenn.edu", "position": "PhD student;PhD student;Postdoc;PhD student;Postdoc;Assistant Professor;Research Professor;Full Professor", "bibtex": "@misc{\nkaur2023current,\ntitle={Current Anomaly Detectors are Anomalous: On Semantic Treatment of {OOD} Inputs},\nauthor={Ramneet Kaur and Xiayan Ji and Souradeep Dutta and Yahan Yang and Michele Caprio and Elena Bernardis and Oleg Sokolsky and Insup Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=7NUTyhyQt9x}\n}", "github": "", "project": "", "reviewers": "iQtU;mNjq;sQTF;fcgs", "site": "https://openreview.net/forum?id=7NUTyhyQt9x", "pdf_size": 7066764, "recommendation": "3;3;5;5", "confidence": "4;4;3;4", "correctness": "3;1;2;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "0;2;3;2", "wc_summary_paper": "56;109;153;138", "wc_strength_and_weaknesses": "201;455;235;172", "wc_clarity_quality_novelty_and_reproducibility": "8;78;197;139", "wc_summary_review": "8;6;50;48", "wc_review": "273;648;635;497", "wc_reply_reviewers": "0;166;82;0", "wc_reply_authors": "829;1849;597;422", "reply_reviewers": "0;1;1;0", "reply_authors": "2;4;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 114.0, 37.03376837428241 ], "wc_strength_and_weaknesses_avg": [ 265.75, 111.51541373281094 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 105.5, 70.2797979507625 ], "wc_summary_review_avg": [ 28.0, 21.02379604162864 ], "wc_review_avg": [ 513.25, 150.8018153073762 ], "wc_reply_reviewers_avg": [ 62.0, 68.74590896918885 ], "wc_reply_authors_avg": [ 924.25, 553.0783737410097 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1KE6nNfVbDEJ:scholar.google.com/&scioq=Current+Anomaly+Detectors+are+Anomalous:+On+Semantic+Treatment+of+OOD+Inputs&hl=en&as_sdt=0,40", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "7PURWDjJCf3", "title": "Slimmable Networks for Contrastive Self-supervised Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Self-supervised learning makes great progress in large model pre-training but suffers in training small models. Previous solutions to this problem mainly rely on knowledge distillation and indeed have a two-stage learning procedure: first train a large teacher model, then distill it to improve the generalization ability of small ones. In this work, we present a new one-stage solution to obtain pre-trained small models without extra teachers: slimmable networks for contrastive self-supervised learning (SlimCLR). A slimmable network contains a full network and several weight-sharing sub-networks. We can pre-train for only one time and obtain various networks including small ones with low computation costs. However, in self-supervised cases, the interference between weight-sharing networks leads to severe performance degradation. One evidence of the interference is gradient imbalance: a small proportion of parameters produces dominant gradients during backpropagation, and the main parameters may not be fully optimized. The divergence in gradient directions of various networks may also cause interference between networks. To overcome these problems, we make the main parameters produce dominant gradients and provide consistent guidance for sub-networks via three techniques: slow start training of sub-networks, online distillation, and loss re-weighting according to model sizes. Besides, a switchable linear probe layer is applied during linear evaluation to avoid the interference of weight-sharing linear layers. We instantiate SlimCLR with typical contrastive learning frameworks and achieve better performance than previous arts with fewer parameters and FLOPs.", "keywords": "self-supervised learning;contrastive learning;slimmable networks", "primary_area": "", "supplementary_material": "/attachment/1349fe5fd001342b5b605f423d5be7465997424a.zip", "author": "Shuai Zhao;Xiaohan Wang;Linchao Zhu;Yi Yang", "authorids": "~Shuai_Zhao1;~Xiaohan_Wang2;~Linchao_Zhu1;~Yi_Yang22", "gender": "M;M;M;M", "homepage": ";https://wxh1996.github.io/;http://ffmpbgrnn.github.io/;https://person.zju.edu.cn/yiyang", "dblp": "116/8682-6;;172/1383.html;33/4854-1.html", "google_scholar": ";iGA10XoAAAAJ;9ZukE28AAAAJ;RMSuNFwAAAAJ", "orcid": "0000-0003-1320-4283;;;", "linkedin": ";%E6%99%93%E6%99%97-%E6%B1%AA-883895bb/;;", "or_profile": "~Shuai_Zhao1;~Xiaohan_Wang2;~Linchao_Zhu1;~Yi_Yang22", "aff": "Baidu;Stanford University;Zhejiang University;Zhejiang University", "aff_domain": "baidu.com;stanford.edu;zju.edu.cn;zju.edu.cn", "position": "Intern;Postdoc;Assistant Professor;Full Professor", "bibtex": "@misc{\nzhao2023slimmable,\ntitle={Slimmable Networks for Contrastive Self-supervised Learning},\nauthor={Shuai Zhao and Xiaohan Wang and Linchao Zhu and Yi Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=7PURWDjJCf3}\n}", "github": "", "project": "", "reviewers": "5mjb;1XPr;enLj;yYC4", "site": "https://openreview.net/forum?id=7PURWDjJCf3", "pdf_size": 2204918, "recommendation": "3;3;5;5", "confidence": "3;5;4;3", "correctness": "3;3;3;3", "technical_novelty": "1;1;2;2", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "67;60;109;40", "wc_strength_and_weaknesses": "403;848;103;105", "wc_clarity_quality_novelty_and_reproducibility": "70;40;23;30", "wc_summary_review": "37;5;47;33", "wc_review": "577;953;282;208", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "974;1648;353;318", "reply_reviewers": "0;0;0;0", "reply_authors": "2;3;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 1.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 69.0, 25.129663746258124 ], "wc_strength_and_weaknesses_avg": [ 364.75, 304.53930370315095 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.75, 17.93564885918544 ], "wc_summary_review_avg": [ 30.5, 15.580436450882884 ], "wc_review_avg": [ 505.0, 293.18338970685227 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 823.25, 542.9895832334171 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.30151134457776363, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14449355225875086784&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Baidu;Stanford University;Zhejiang University", "aff_unique_dep": "Baidu, Inc.;;", "aff_unique_url": "https://www.baidu.com;https://www.stanford.edu;https://www.zju.edu.cn", "aff_unique_abbr": "Baidu;Stanford;ZJU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "id": "7P_yIFi6zaA", "title": "Are vision transformers more robust than CNNs for Backdoor attacks?", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Transformer architectures are based on a self-attention mechanism that processes images as a sequence of patches. As their design is quite different compared to CNNs, it is interesting to study if transformers are vulnerable to backdoor attacks and how different transformer architectures affect attack success rates. Backdoor attacks happen when an attacker poisons a small part of the training images with a specific trigger or backdoor which will be activated later. The model performance is good on clean test images, but the attacker can manipulate the decision of the model by showing the trigger on an image at test time. In this paper, we perform a comparative study of state-of-the-art architectures through the lens of backdoor robustness, specifically how attention mechanisms affect robustness. We show that the popular vision transformer architecture (ViT) is the least robust architecture and ResMLP, which belongs to a class called Feed Forward Networks (FFN), is the most robust one to backdoor attacks among state-of-the-art architectures. We also find an intriguing difference between transformers and CNNs \u2013 interpretation algorithms effectively highlight the trigger on test images for transformers but not for CNNs. Based on this observation, we find that a test-time image blocking defense reduces the attack success rate by a large margin for transformers. We also show that such blocking mechanisms can be incorporated during the training process to improve robustness even further. We believe our experimental findings will encourage the community to understand the building block components in developing novel architectures robust to backdoor attacks.", "keywords": "Backdoor Attacks;Vision Transformers;Robustness", "primary_area": "", "supplementary_material": "/attachment/bbf2ac040f706d31e5d7ef456c6d3b0ccfdbc1aa.zip", "author": "Akshayvarun Subramanya;Aniruddha Saha;Soroush Abbasi Koohpayegani;Ajinkya Tejankar;Hamed Pirsiavash", "authorids": "~Akshayvarun_Subramanya2;~Aniruddha_Saha1;~Soroush_Abbasi_Koohpayegani1;~Ajinkya_Tejankar1;~Hamed_Pirsiavash1", "gender": "M;M;M;M;", "homepage": "https://ani0075saha.github.io/;http://soroush-abbasi.github.io;https://ajtejankar.github.io;https://web.cs.ucdavis.edu/~hpirsiav/;https://aksvarun.github.io", "dblp": "221/8102;277/5486;255/5662;07/6340;190/7249", "google_scholar": "xfjALj0AAAAJ;JS10DM0AAAAJ;zt4D3G4AAAAJ;https://scholar.google.com.tw/citations?user=c9XXy4MAAAAJ;2_3SWFwAAAAJ", "orcid": ";;;;", "linkedin": ";;ajinkya-tejankar-79854445/;hpirsiav/;", "or_profile": "~Aniruddha_Saha1;~Soroush_Abbasi_Koohpayegani1;~Ajinkya_Tejankar1;~Hamed_Pirsiavash1;~Akshayvarun_Subramanya1", "aff": "University of Maryland, College Park;University of California, Davis;University of California, Davis;University of California, Davis;", "aff_domain": "umd.edu;ucdavis.edu;ucdavis.edu;ucdavis.edu;", "position": "Postdoc;PhD student;PhD student;Associate Professor;", "bibtex": "@misc{\nsubramanya2023are,\ntitle={Are vision transformers more robust than {CNN}s for Backdoor attacks?},\nauthor={Akshayvarun Subramanya and Aniruddha Saha and Soroush Abbasi Koohpayegani and Ajinkya Tejankar and Hamed Pirsiavash},\nyear={2023},\nurl={https://openreview.net/forum?id=7P_yIFi6zaA}\n}", "github": "", "project": "", "reviewers": "4JuB;xUii;Jb75;JDpz", "site": "https://openreview.net/forum?id=7P_yIFi6zaA", "pdf_size": 3928302, "recommendation": "3;3;3;5", "confidence": "4;4;5;4", "correctness": "2;4;2;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "71;149;74;75", "wc_strength_and_weaknesses": "43;142;449;212", "wc_clarity_quality_novelty_and_reproducibility": "454;1;23;22", "wc_summary_review": "23;54;25;34", "wc_review": "591;346;571;343", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "102;89;583;255", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 92.25, 32.797675222491 ], "wc_strength_and_weaknesses_avg": [ 211.5, 149.69051406151294 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 125.0, 190.15125558354853 ], "wc_summary_review_avg": [ 34.0, 12.267844146385297 ], "wc_review_avg": [ 462.75, 118.46597612816939 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 257.25, 199.078345130755 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.17407765595569782, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8236973220602202267&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Maryland;University of California, Davis", "aff_unique_dep": ";", "aff_unique_url": "https://www/umd.edu;https://www.ucdavis.edu", "aff_unique_abbr": "UMD;UC Davis", "aff_campus_unique_index": "0;1;1;1", "aff_campus_unique": "College Park;Davis", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "7QTldIMkkqX", "title": "Masked Autoencoders Enable Efficient Knowledge Distillers", "track": "main", "status": "Withdraw", "tldr": "This paper studies the potential of distilling knowledge from self-supervised pre-trained models, especially Masked Autoencoders", "abstract": "This paper studies the potential of distilling knowledge from self-supervised pre-trained models, especially Masked Autoencoders. Our approach is simple: in addition to optimizing the pixel reconstruction loss on masked inputs, we minimize the distance between the intermediate feature map of the teacher model and that of the student model. This design leads to a computationally efficient knowledge distillation framework, given 1) only a small visible subset of patches is used, and 2) the teacher model only needs to forward propagate inputs through the first few layers for obtaining intermediate feature maps. \n\nCompared to directly distilling fine-tuned models, distilling pre-trained models substantially improves the performance of downstream representation learning, meanwhile incurring little extra pre-training cost. For example, by distilling the knowledge from an MAE pre-trained ViT-L into a ViT-B, our method achieves an 84.0% ImageNet top-1 accuracy, outperforming the baseline of distilling a fine-tuned ViT-L by 1.2%, with no extra training time at all. More interestingly, our method can robustly tackle different masking ratios: e.g., by pushing to the extreme 95% masking ratio where merely TEN patches are visible during distillation, our ViT-B still secures a top-1 accuracy of 83.8%, meanwhile further reducing total training time by 13% of that of the distilling during fine-tuning baseline. ", "keywords": "Transformer;Pretraining;Knowledge Distillation", "primary_area": "", "supplementary_material": "", "author": "Yutong Bai;Zeyu Wang;Junfei Xiao;Chen Wei;Huiyu Wang;Alan Yuille;Yuyin Zhou;Cihang Xie", "authorids": "~Yutong_Bai1;~Zeyu_Wang2;~Junfei_Xiao1;~Chen_Wei2;~Huiyu_Wang1;~Alan_Yuille1;~Yuyin_Zhou1;~Cihang_Xie3", "gender": "F;;M;;;M;;", "homepage": "https://yutongbai.com/;;;https://weichen582.github.io/;http://csrhddlam.github.io/;;https://yuyinzhou.github.io/;", "dblp": "216/8431;;246/7952;181/2831-5;;y/AlanLYuille;192/1413;", "google_scholar": "N1-l4GsAAAAJ;;rv-aTqkAAAAJ;https://scholar.google.com/citations?hl=en;SnmuYloAAAAJ;;eiqVLC0AAAAJ;", "orcid": ";;;;;;;", "linkedin": "%E9%9B%A8%E6%A1%90-%E7%99%BD-59a44a136/;;;;;;;", "or_profile": "~Yutong_Bai1;~Zeyu_Wang2;~Junfei_Xiao1;~Chen_Wei2;~Huiyu_Wang1;~Alan_Yuille1;~Yuyin_Zhou1;~Cihang_Xie3", "aff": "Johns Hopkins University;;Google;Johns Hopkins University;Meta Platforms;Johns Hopkins University;University of California, Santa Cruz;", "aff_domain": "jhu.edu;;google.com;jhu.edu;meta.com;johnshopkins.edu;ucsc.edu;", "position": "PhD student;;Researcher;PhD student;Researcher;Full Professor;Assistant Professor;", "bibtex": "@misc{\nbai2023masked,\ntitle={Masked Autoencoders Enable Efficient Knowledge Distillers},\nauthor={Yutong Bai and Zeyu Wang and Junfei Xiao and Chen Wei and Huiyu Wang and Alan Yuille and Yuyin Zhou and Cihang Xie},\nyear={2023},\nurl={https://openreview.net/forum?id=7QTldIMkkqX}\n}", "github": "", "project": "", "reviewers": "tKJt;bPBQ;neYs;wVjj", "site": "https://openreview.net/forum?id=7QTldIMkkqX", "pdf_size": 430889, "recommendation": "3;3;3;3", "confidence": "4;5;4;5", "correctness": "3;3;4;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "89;56;82;33", "wc_strength_and_weaknesses": "555;196;346;265", "wc_clarity_quality_novelty_and_reproducibility": "32;7;28;19", "wc_summary_review": "65;28;5;66", "wc_review": "741;287;461;383", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 65.0, 22.192341021172147 ], "wc_strength_and_weaknesses_avg": [ 340.5, 134.74141902176925 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 21.5, 9.604686356149273 ], "wc_summary_review_avg": [ 41.0, 25.816661286851172 ], "wc_review_avg": [ 468.0, 169.23652088128023 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5030683813836303943&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;0;2;0;3", "aff_unique_norm": "Johns Hopkins University;Google;Meta;University of California, Santa Cruz", "aff_unique_dep": ";Google;Meta Platforms, Inc.;", "aff_unique_url": "https://www.jhu.edu;https://www.google.com;https://www.meta.com;https://www.ucsc.edu", "aff_unique_abbr": "JHU;Google;Meta;UCSC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Santa Cruz", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "7RBvBi3p3Et", "title": "AdaStride: Using Adaptive Strides in Sequential Data for Effective Downsampling", "track": "main", "status": "Reject", "tldr": "In this paper, we propose a novel downsampling methods called AdaStride, which learns to use adaptive strides in a sequential data for effective downsampling.", "abstract": "The downsampling layer has been one of the most commonly used deep learning (DL) components in sequential data processing due to its several advantages. First, it improves the generalization performance of networks by acting as an information bottleneck, where it extracts task-relevant features and discards others. Second, it reduces data resolution allowing CNN layers to have larger receptive fields with smaller kernel sizes. Third, the reduced data resolution facilitates the use of Transformer networks in case of high-resolution data. Accordingly, there have been many studies on downsampling methods, but they have a limitation in that they apply the same downsampling ratio across a data instance. Using the same downsampling ratio uniformly for an entire data instance does not reflect the fact that the task-relevant information is not uniformly distributed in real data. In this paper, we introduce AdaStride, a downsampling method that can apply adaptively varying downsampling ratios across a sequential data instance given an overall downsampling ratio. Specifically, AdaStride learns to deploy adaptive strides in a sequential data instance. Therefore, it can preserve more information from task-relevant parts of a data instance by using smaller strides for those parts and larger strides for less relevant parts. To achieve this, we propose a novel training method called vector positioning that rearranges each time step of an input on a one-dimensional line segment without reordering, which is used to build an alignment matrix for the downsampling. In experiments conducted on three different tasks of audio classification, automatic speech recognition, and discrete representation learning, AdaStride outperforms other widely used standard downsampling methods showing its generality and effectiveness. In addition, we analyze how our AdaStride learns the effective adaptive strides to improve its performance in the tasks.", "keywords": "Downsampling;Pooling;Strides;Learning algorithm", "primary_area": "", "supplementary_material": "/attachment/33077321d0fef46cd6dba79e93ef82124903916d.zip", "author": "Yoonhyung Lee;Kyomin Jung", "authorids": "~Yoonhyung_Lee2;~Kyomin_Jung1", "gender": "M;M", "homepage": "http://milab.snu.ac.kr/kjung/index.html;", "dblp": "48/3867;210/4243", "google_scholar": "https://scholar.google.co.kr/citations?user=u3uMl4MAAAAJ;N2p8CLkAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Kyomin_Jung1;~YOONHYUNG_LEE1", "aff": "Seoul National University;", "aff_domain": "snu.ac.kr;", "position": "Full Professor;", "bibtex": "@misc{\nlee2023adastride,\ntitle={AdaStride: Using Adaptive Strides in Sequential Data for Effective Downsampling},\nauthor={Yoonhyung Lee and Kyomin Jung},\nyear={2023},\nurl={https://openreview.net/forum?id=7RBvBi3p3Et}\n}", "github": "", "project": "", "reviewers": "PVam;PyHD;MsFg;kiYh", "site": "https://openreview.net/forum?id=7RBvBi3p3Et", "pdf_size": 3379654, "recommendation": "5;6;6;6", "confidence": "3;2;3;4", "correctness": "3;3;3;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "77;67;14;59", "wc_strength_and_weaknesses": "21;132;484;251", "wc_clarity_quality_novelty_and_reproducibility": "137;47;21;21", "wc_summary_review": "8;39;18;20", "wc_review": "243;285;537;351", "wc_reply_reviewers": "0;0;0;67", "wc_reply_authors": "779;440;1244;827", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;3;2", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 54.25, 24.097458372201828 ], "wc_strength_and_weaknesses_avg": [ 222.0, 171.74545117702536 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.5, 47.67336782733101 ], "wc_summary_review_avg": [ 21.25, 11.211043662389331 ], "wc_review_avg": [ 354.0, 112.44998888394787 ], "wc_reply_reviewers_avg": [ 16.75, 29.011851026778693 ], "wc_reply_authors_avg": [ 822.5, 285.4299388641633 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TQa3BS77bWYJ:scholar.google.com/&scioq=AdaStride:+Using+Adaptive+Strides+in+Sequential+Data+for+Effective+Downsampling&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "id": "7T2XgpklLDA", "title": "Does progress on ImageNet transfer to real world datasets?", "track": "main", "status": "Reject", "tldr": "", "abstract": "Does progress on ImageNet transfer to real world datasets? We investigate this question by evaluating ImageNet pre-trained models with varying accuracy (57% - 83%) on six practical image classification datasets. In particular, we study datasets collected with the goal of solving real world tasks (e.g., classifying images from camera traps or satellites), as opposed to web-scraped benchmarks collected for comparing models. On multiple datasets, models with higher ImageNet accuracy do not consistently yield performance improvements. For certain tasks, interventions such as data augmentation improve performance even when architectures do not. We hope that future benchmarks will include more diverse datasets to encourage a more comprehensive approach to improving learning algorithms.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/c05959062fa49994323666d8e7fc2754f81aa26b.zip", "author": "Alex Fang;Simon Kornblith;Ludwig Schmidt", "authorids": "~Alex_Fang1;~Simon_Kornblith1;~Ludwig_Schmidt1", "gender": ";M;M", "homepage": ";;http://people.csail.mit.edu/ludwigs/", "dblp": "260/0449;220/4059;141/2720", "google_scholar": ";1O3RPmsAAAAJ;SWMKy70AAAAJ", "orcid": ";;", "linkedin": "alex-fang-8a11a8115/;;ludwig-schmidt-87ba3612/", "or_profile": "~Alex_Fang1;~Simon_Kornblith1;~Ludwig_Schmidt1", "aff": "Department of Computer Science, University of Washington;Google;Allen Institute for Artificial Intelligence", "aff_domain": "cs.washington.edu;google.com;allenai.org", "position": "PhD student;Research Scientist;Researcher", "bibtex": "@misc{\nfang2023does,\ntitle={Does progress on ImageNet transfer to real world datasets?},\nauthor={Alex Fang and Simon Kornblith and Ludwig Schmidt},\nyear={2023},\nurl={https://openreview.net/forum?id=7T2XgpklLDA}\n}", "github": "", "project": "", "reviewers": "ZbHU;FJNw;yyk7;QVyG", "site": "https://openreview.net/forum?id=7T2XgpklLDA", "pdf_size": 5813751, "recommendation": "3;5;8;8", "confidence": "3;4;4;5", "correctness": "3;3;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "32;104;55;112", "wc_strength_and_weaknesses": "463;159;186;184", "wc_clarity_quality_novelty_and_reproducibility": "24;44;100;122", "wc_summary_review": "117;51;96;85", "wc_review": "636;358;437;503", "wc_reply_reviewers": "0;0;0;11", "wc_reply_authors": "1218;529;445;787", "reply_reviewers": "0;0;0;1", "reply_authors": "2;1;1;1", "recommendation_avg": [ 6.0, 2.1213203435596424 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 75.75, 33.379447269240394 ], "wc_strength_and_weaknesses_avg": [ 248.0, 124.58531213590148 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 72.5, 39.909272105614754 ], "wc_summary_review_avg": [ 87.25, 23.878599205146017 ], "wc_review_avg": [ 483.5, 101.91785908269463 ], "wc_reply_reviewers_avg": [ 2.75, 4.763139720814412 ], "wc_reply_authors_avg": [ 744.75, 300.89398049811496 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.8333333333333334, "corr_recommendation_correctness": 0.0, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12582372480364496193&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Washington;Google;Allen Institute for Artificial Intelligence", "aff_unique_dep": "Department of Computer Science;Google;", "aff_unique_url": "https://www.washington.edu;https://www.google.com;https://allenai.org", "aff_unique_abbr": "UW;Google;AI2", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Seattle;Mountain View;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "7TKYqsMjNh", "title": "Fully Continuous Gated Recurrent Units For processing Time Series", "track": "main", "status": "Withdraw", "tldr": "Previous GRU-based models are piece-wise continuous. We proposed the first fully continuous GRU.", "abstract": "For a long time, RNN-based models, such as RNNs, LSTMs, and GRUs, have been used to process time series data. However, RNN-based models do not fit well with real-world sporadically observed data. As a result, many researchers have suggested various enhancements to overcome the limitation. Among them, differential equation-based models, e.g., GRU-ODE-Bayes, ODE-RNN, and so forth, show good accuracy in many cases. Those methods try to continuously model the hidden state of RNNs (or GRUs). However, existing methods' hidden states are piece-wise continuous. In this paper, we represent GRUs as delay differential equations and present fully continuous GRUs. To our knowledge, we propose the first model that continuously generalizes all the parts of GRUs, including their hidden state and various gates. After reconstructing a continuous path $x(t)$ from discrete time series observations $\\{(x_i, t_i)\\}_{i=0}^{N-1}$ (with an appropriate interpolation algorithm), we calculate the time derivatives of the reset gate $r(t)$, the update gate $z(t)$, the update vector $g(t)$, and the hidden state $h(t)$. Then, we develop an augmented delay differential equation (DDE) that continuously generalizes all the parts. In our experiments with 3 real-world datasets and 13 baselines, our fully continuous GRU method outperforms existing baselines by non-trivial margins. ", "keywords": "Time Series forecasting;Continuous GRU", "primary_area": "", "supplementary_material": "/attachment/c4512c9a378dc990f0f7858a1617bb8e2be65c9d.zip", "author": "Sheo yon Jhin;Noseong Park", "authorids": "~Sheo_yon_Jhin1;~Noseong_Park1", "gender": "F;", "homepage": "https://sheoyonj.space/;", "dblp": "280/3334.html;", "google_scholar": "S_EBNdgAAAAJ;", "orcid": ";", "linkedin": "sheoyon-jhin/;", "or_profile": "~Sheo_yon_Jhin1;~Noseong_Park1", "aff": "Yonsei University;", "aff_domain": "yonsei.ac.kr;", "position": "MS student;", "bibtex": "@misc{\njhin2023fully,\ntitle={Fully Continuous Gated Recurrent Units For processing Time Series},\nauthor={Sheo yon Jhin and Noseong Park},\nyear={2023},\nurl={https://openreview.net/forum?id=7TKYqsMjNh}\n}", "github": "", "project": "", "reviewers": "kaeJ;XiD9;jDxm;NQ2g;XaJG", "site": "https://openreview.net/forum?id=7TKYqsMjNh", "pdf_size": 975340, "recommendation": "1;3;3;5;6", "confidence": "5;4;3;4;3", "correctness": "3;3;3;3;4", "technical_novelty": "1;2;2;2;3", "empirical_novelty": "2;2;2;2;1", "wc_summary_paper": "49;123;76;69;28", "wc_strength_and_weaknesses": "311;416;674;298;173", "wc_clarity_quality_novelty_and_reproducibility": "148;102;127;19;77", "wc_summary_review": "33;53;115;24;63", "wc_review": "541;694;992;410;341", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 3.6, 1.7435595774162693 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 2.0, 0.6324555320336759 ], "empirical_novelty_avg": [ 1.8, 0.4 ], "wc_summary_paper_avg": [ 69.0, 31.767908335299634 ], "wc_strength_and_weaknesses_avg": [ 374.4, 168.46910696029704 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 94.6, 44.67930169552787 ], "wc_summary_review_avg": [ 57.6, 31.87224497897818 ], "wc_review_avg": [ 595.6, 231.976378107772 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.6744532734334624, "corr_recommendation_correctness": 0.6882472016116853, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HDqH9oTEnfMJ:scholar.google.com/&scioq=Fully+Continuous+Gated+Recurrent+Units+For+processing+Time+Series&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Yonsei University", "aff_unique_dep": "", "aff_unique_url": "https://www.yonsei.ac.kr", "aff_unique_abbr": "Yonsei", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "id": "7UrHaeZ5Ie7", "title": "An Efficient Mean-field Approach to High-Order Markov Logic", "track": "main", "status": "Reject", "tldr": "This paper proposes a method to perform mean-field iteration of MLN efficiently via a novel neural network.", "abstract": "Markov logic networks (MLNs) are powerful models for symbolic reasoning, which combine probabilistic modeling with relational logic. Inference algorithms for MLNs often perform at the level of propositional logic or require building a first-order probabilistic graph, and the computational efficiency remains a challenge. The mean-field algorithm generalizes message passing for approximate inference in many intractable probabilistic graphical models, but in MLNs it still suffers from the high-order dependencies among the massive groundings, resulting in time complexity exponential in both the length and the arity of logic rules. We propose a novel method, LogicMP, to simplify the logic message passing especially. In most practical cases, it can reduce the complexity significantly to polynomial for the formulae in conjunctive normal form (CNF). We exploit the property of CNF logic rules to sidestep the expectation computation of high-order dependency, and then formulate the logic message passing by Einstein summation to facilitate parallel computation, which can be optimized by sequentially contracting the rule arguments. With LogicMP, we achieve evident improvements on several reasoning benchmark datasets in both performance and efficiency over competitor methods. Specifically, the AUC-PR of the UW-CSE and Cora datasets is improved by more than 11\\% absolutely and the speed is about ten times faster.\n", "keywords": "Logic Rules;Mean-field Algorithm;Markov Logic Network;Symbolic Reasoning", "primary_area": "", "supplementary_material": "/attachment/695633528350488cfe0b3aee7539f7eab32da800.zip", "author": "Weidi Xu;Jianshan He;Jingwei Wang;Hongting Zhou;Xiaopei Wan;Taifeng Wang;Ruopeng Li;Wei Chu", "authorids": "~Weidi_Xu1;~Jianshan_He1;~Jingwei_Wang1;zhouhongting.zht@antgruop.com;~Xiaopei_Wan2;~Taifeng_Wang2;ruopeng.lrp@antgroup.com;~Wei_Chu1", "gender": "M;M;;;M;M;;M", "homepage": ";;;;;https://scholar.google.com/citations?user=aMNBEk0AAAAJ&hl=zh-CN;;http://weichu.github.io", "dblp": "00/11534;225/5402;;;;01/1483;;", "google_scholar": ";https://scholar.google.com.hk/citations?user=0bq6rAkAAAAJ;;;zU9TT-AAAAAJ;aMNBEk0AAAAJ;;3J4zb7gAAAAJ", "orcid": "0000-0002-7279-9339;;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Weidi_Xu1;~Jianshan_He1;~Jingwei_Wang1;zhouhongting.zht@antgruop.com;~Xiaopei_Wan2;~Taifeng_Wang2;ruopeng.lrp@antgroup.com;~Wei_Chu1", "aff": "Ant Group;Ant Group;;;;BioMap;;Ant Group", "aff_domain": "antgroup.com;antgroup.com;;;;biomap.com;;antgroup.com", "position": "Researcher;Researcher;;;;Principal Researcher;;Researcher", "bibtex": "@misc{\nxu2023an,\ntitle={An Efficient Mean-field Approach to High-Order Markov Logic},\nauthor={Weidi Xu and Jianshan He and Jingwei Wang and Hongting Zhou and Xiaopei Wan and Taifeng Wang and Ruopeng Li and Wei Chu},\nyear={2023},\nurl={https://openreview.net/forum?id=7UrHaeZ5Ie7}\n}", "github": "", "project": "", "reviewers": "qmoi;cRow;gHdR;sKHS", "site": "https://openreview.net/forum?id=7UrHaeZ5Ie7", "pdf_size": 2275721, "recommendation": "3;5;6;6", "confidence": "3;4;4;3", "correctness": "2;3;4;4", "technical_novelty": "3;2;3;3", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "43;176;10;81", "wc_strength_and_weaknesses": "45;214;13;139", "wc_clarity_quality_novelty_and_reproducibility": "579;34;17;190", "wc_summary_review": "46;62;98;40", "wc_review": "713;486;138;450", "wc_reply_reviewers": "0;0;0;549", "wc_reply_authors": "1404;774;469;1877", "reply_reviewers": "0;0;0;3", "reply_authors": "2;1;1;3", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 77.5, 62.17113478134367 ], "wc_strength_and_weaknesses_avg": [ 102.75, 79.1845155317629 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 205.0, 226.21118451570868 ], "wc_summary_review_avg": [ 61.5, 22.555487137279922 ], "wc_review_avg": [ 446.75, 204.79669797142725 ], "wc_reply_reviewers_avg": [ 137.25, 237.72397333882842 ], "wc_reply_authors_avg": [ 1131.0, 546.9776046603737 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.40824829046386296, "corr_recommendation_correctness": 0.9847319278346618, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:r0dz-VF2hhsJ:scholar.google.com/&scioq=An+Efficient+Mean-field+Approach+to+High-Order+Markov+Logic&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Ant Group;BioMap", "aff_unique_dep": ";", "aff_unique_url": "https://www.antgroup.com;", "aff_unique_abbr": "Ant Group;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "id": "7UudBVsIrr", "title": "MolJET: Multimodal Joint Embedding Transformer for Conditional de novo Molecular Design and Multi-Property Optimization", "track": "main", "status": "Reject", "tldr": "MolJET is a foundational generative chemistry model for molecular design that uses joint embeddings learned from three chemistry-related modalities to perform conditional multi-property optimization.", "abstract": "Multi-property constrained optimization of molecules using generative de novo design models is vital for the successful application of Artificial Intelligence (AI) towards materials and drug discovery. Yet there remains a gap between the reported performance of such models in the literature and their practical utility in real world design scenarios. Furthermore, existing models are largely inaccessible to chemists without an extensive background in computer science. To address these challenges, we propose a generative foundation model, the Multimodal Joint Embedding Transformer (MolJET), which performs conditional generation of desired molecular distributions based on human-interpretable chemistry prompts in a zero-shot manner. We assess MolJET on the standard benchmarks available in the GuacaMol and MIMOSA evaluation frameworks. These include structure-based sampling tasks as well as a range of multi-property optimization tasks that probe a models ability to design drug-like molecules given realistic property constraints. We demonstrate that with self-supervised pretraining, MolJET outperforms 80% of task-optimized models while using zero-shot inferences and beats all baselines after minimal supervision. Moreover, the performance of MolJET on text-only conditioning tasks improves with the inclusion of property modalities during training, highlighting the importance of a multimodal approach to molecular design. MolJET is the first example of text-based de novo molecular design using large-scale multimodal foundation models and should serve as a building block towards further improvements to accessible AI for chemists.", "keywords": "Transformers;Multimodal;Molecules;Generative;Drug-design;LLM", "primary_area": "", "supplementary_material": "", "author": "Orion Walker Dollar;Sameera Horawalavithana;Scott Vasquez;W. James Pfaendtner;Svitlana Volkova", "authorids": "~Orion_Walker_Dollar1;~Sameera_Horawalavithana1;~Scott_Vasquez1;~W._James_Pfaendtner1;~Svitlana_Volkova1", "gender": "M;M;M;;F", "homepage": ";https://samtube405.github.io/_profile/;https://www.pnnl.gov/science/staff/staff_info.asp?staff_num=10254;;https://www.linkedin.com/in/svitlanavolkova/", "dblp": ";205/3214;;;19/8609", "google_scholar": "3KMT33YAAAAJ;zp7o6VYAAAAJ;;;DwrriFYAAAAJ", "orcid": ";;;;0000-0002-6131-3073", "linkedin": "orion-dollar/;;;;svitlanavolkova/", "or_profile": "~Orion_Walker_Dollar1;~Sameera_Horawalavithana1;~Scott_Vasquez1;~W._James_Pfaendtner1;~Svitlana_Volkova1", "aff": "University of Washington;Pacific Northwest National Laboratory;Pacific Northwest National Laboratory;University of Washington;Aptima, Inc.", "aff_domain": "uw.edu;pnnl.gov;pnnl.gov;u.washington.edu;aptima.com", "position": "PhD student;Researcher;Researcher;;Principal Researcher", "bibtex": "@misc{\ndollar2023moljet,\ntitle={Mol{JET}: Multimodal Joint Embedding Transformer for Conditional de novo Molecular Design and Multi-Property Optimization},\nauthor={Orion Walker Dollar and Sameera Horawalavithana and Scott Vasquez and W. James Pfaendtner and Svitlana Volkova},\nyear={2023},\nurl={https://openreview.net/forum?id=7UudBVsIrr}\n}", "github": "", "project": "", "reviewers": "cdBp;qqLn;TDmG;rKPy;X1nX;KXSs", "site": "https://openreview.net/forum?id=7UudBVsIrr", "pdf_size": 693123, "recommendation": "3;3;3;3;3;8", "confidence": "3;3;5;3;4;4", "correctness": "3;2;2;3;3;4", "technical_novelty": "3;2;2;4;2;3", "empirical_novelty": "1;2;2;4;3;3", "wc_summary_paper": "55;36;62;114;55;55", "wc_strength_and_weaknesses": "750;346;306;38;365;99", "wc_clarity_quality_novelty_and_reproducibility": "86;65;97;70;27;76", "wc_summary_review": "47;59;100;54;157;26", "wc_review": "938;506;565;276;604;256", "wc_reply_reviewers": "0;0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "0;0;0;0;0;0", "reply_authors": "0;0;0;0;0;0", "recommendation_avg": [ 3.8333333333333335, 1.8633899812498245 ], "confidence_avg": [ 3.6666666666666665, 0.7453559924999298 ], "correctness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "technical_novelty_avg": [ 2.6666666666666665, 0.7453559924999298 ], "empirical_novelty_avg": [ 2.5, 0.9574271077563381 ], "wc_summary_paper_avg": [ 62.833333333333336, 24.23094898861555 ], "wc_strength_and_weaknesses_avg": [ 317.3333333333333, 229.53552133723346 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.16666666666667, 21.965250333702603 ], "wc_summary_review_avg": [ 73.83333333333333, 43.24895631367562 ], "wc_review_avg": [ 524.1666666666666, 228.4472343647191 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.20000000000000004, "corr_recommendation_correctness": 0.7592566023652967, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8554895151111183628&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;0;2", "aff_unique_norm": "University of Washington;Pacific Northwest National Laboratory;Aptima, Inc.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.washington.edu;https://www.pnnl.gov;https://www.aptima.com", "aff_unique_abbr": "UW;PNNL;Aptima", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "7WgLZCURXxT", "title": "Multi-instance Interactive Segmentation with Self-Supervised Transformer", "track": "main", "status": "Reject", "tldr": "Multi-instance interactive segmentation using Label Propagation and self-supervised representations from Vision Transformer.", "abstract": "The rise of Vision Transformers (ViT) combined with better self-supervised learning pre-tasks has taken representation learning to the next level, beating supervised results on ImageNet. In particular, self-attention mechanism of ViT allows to easily visualize semantic information learned by the network. Following revealing of attention maps of DINO, many tried to leverage its representations for unsupervised segmentation. Despite very promising results for basic images with a single clear object in a simple background, representation of ViT are not able to segment images, with several classes and object instance, in an unsupervised fashion yet. In this paper, we propose SALT: Semi-supervised Segmentation with Self-supervised Attention Layers in Transformers, an interactive algorithm for multi-class/multi-instance segmentation. We follow previous works path and take it a step further by discriminating between different objects, using sparse human help to select said objects. We show that remarkable results are achieved with very sparse labels. Different pre-tasks are compared, and we show that self-supervised ones are more robust for panoptic segmentation, and overall achieve very similar performance. Evaluation is carried out on Pascal VOC 2007 and COCO-panoptic. Performance is evaluated for extreme conditions such as very noisy, and sparse interactions going to as little as one interaction per class.", "keywords": "Vision Transformer;Self-supervised learning;Interactive Image Segmentation;Semi-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Xavier Jim\u00e9nez;Jaonary Rabarisoa;Valentin Belissen;Quoc Cuong PHAM", "authorids": "~Xavier_Jim\u00e9nez1;~Jaonary_Rabarisoa1;~Valentin_Belissen1;~Quoc_Cuong_PHAM1", "gender": "M;;;M", "homepage": ";;;http://www.kalisteo.eu", "dblp": ";69/1894;;72/5867", "google_scholar": ";;9z3YfasAAAAJ;3rZlzWQAAAAJ", "orcid": "0000-0001-8533-4007;;;0000-0003-3032-7090", "linkedin": "xavier-jimenez-875896154/;;;quoc-cuong-pham-39b6b616/", "or_profile": "~Xavier_Jim\u00e9nez1;~Jaonary_Rabarisoa1;~Valentin_Belissen1;~Quoc_Cuong_PHAM1", "aff": ";CEA;CEA;CEA", "aff_domain": ";cea.fr;cea.fr;cea.fr", "position": ";Research scientist;Researcher;Researcher", "bibtex": "@misc{\njim{\\'e}nez2023multiinstance,\ntitle={Multi-instance Interactive Segmentation with Self-Supervised Transformer},\nauthor={Xavier Jim{\\'e}nez and Jaonary Rabarisoa and Valentin Belissen and Quoc Cuong PHAM},\nyear={2023},\nurl={https://openreview.net/forum?id=7WgLZCURXxT}\n}", "github": "", "project": "", "reviewers": "KmRX;JhQx;yrjR;Uyni", "site": "https://openreview.net/forum?id=7WgLZCURXxT", "pdf_size": 21857621, "recommendation": "3;3;3;6", "confidence": "4;4;4;3", "correctness": "3;3;2;3", "technical_novelty": "2;2;1;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "126;113;32;55", "wc_strength_and_weaknesses": "176;175;67;97", "wc_clarity_quality_novelty_and_reproducibility": "49;59;14;26", "wc_summary_review": "71;48;11;39", "wc_review": "422;395;124;217", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 81.5, 39.131189606246316 ], "wc_strength_and_weaknesses_avg": [ 128.75, 47.93941489004637 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.0, 17.874562931719478 ], "wc_summary_review_avg": [ 42.25, 21.48691462262556 ], "wc_review_avg": [ 289.5, 123.82750098423209 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qx_xsD7zYYwJ:scholar.google.com/&scioq=Multi-instance+Interactive+Segmentation+with+Self-Supervised+Transformer&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Commissariat \u00e0 l'\u00c9nergie Atomique et aux \u00c9nergies Alternatives", "aff_unique_dep": "", "aff_unique_url": "https://www cea fr", "aff_unique_abbr": "CEA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "id": "7WiIzqeqBNL", "title": "Multi-View Independent Component Analysis with Shared and Individual Sources", "track": "main", "status": "Reject", "tldr": "We investigate the special setting of noisy linear ICA where the observations are split among different views, each of which receives a mixture of shared and individual sources. ", "abstract": "Independent component analysis (ICA) is a blind source separation method for linear disentanglement of independent latent sources from observed data. We investigate the special setting of noisy linear ICA where the observations are split among different views, each receiving a mixture of shared and individual sources. We prove that the corresponding linear structure is identifiable, and the shared sources can be recovered, provided that sufficiently many diverse views and data points are available. To computationally estimate the sources, we optimize a constrained form of the joint log-likelihood of the observed data among all views. We show empirically that our objective recovers the sources in high dimensional settings, also in the case when the measurements are corrupted by noise. Finally, we apply the proposed model in a challenging real-life application, where the estimated shared sources from two large transcriptome datasets (observed data) provided by two different labs (two different views) lead to a more plausible representation of the underlying graph structure than existing baselines.", "keywords": "multiview independent component analysis;independent component analysis;blind source separation;multiview representation learning", "primary_area": "", "supplementary_material": "/attachment/1c8fec5e497498acc14b5b183960eec53fbb1481.zip", "author": "Teodora Pandeva;Patrick Forr\u00e9", "authorids": "~Teodora_Pandeva1;~Patrick_Forr\u00e91", "gender": ";", "homepage": "https://amlab.science.uva.nl/people/TeodoraPandeva/;", "dblp": "254/1091;", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Teodora_Pandeva1;~Patrick_Forr\u00e91", "aff": "University of Amsterdam;", "aff_domain": "uva.nl;", "position": "PhD student;", "bibtex": "@misc{\npandeva2023multiview,\ntitle={Multi-View Independent Component Analysis with Shared and Individual Sources},\nauthor={Teodora Pandeva and Patrick Forr{\\'e}},\nyear={2023},\nurl={https://openreview.net/forum?id=7WiIzqeqBNL}\n}", "github": "", "project": "", "reviewers": "Kgmf;g5xN;vDv7;8FCr", "site": "https://openreview.net/forum?id=7WiIzqeqBNL", "pdf_size": 3049936, "recommendation": "3;3;5;8", "confidence": "4;4;3;5", "correctness": "3;3;2;4", "technical_novelty": "3;2;2;4", "empirical_novelty": "1;2;2;4", "wc_summary_paper": "54;62;35;85", "wc_strength_and_weaknesses": "66;96;52;225", "wc_clarity_quality_novelty_and_reproducibility": "29;418;144;102", "wc_summary_review": "133;50;24;48", "wc_review": "282;626;255;460", "wc_reply_reviewers": "0;230;0;0", "wc_reply_authors": "189;668;443;138", "reply_reviewers": "0;1;0;0", "reply_authors": "2;7;3;1", "recommendation_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_paper_avg": [ 59.0, 17.930421077041107 ], "wc_strength_and_weaknesses_avg": [ 109.75, 68.41189589537773 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 173.25, 147.17570281809427 ], "wc_summary_review_avg": [ 63.75, 41.26969226926705 ], "wc_review_avg": [ 405.75, 149.57669437449138 ], "wc_reply_reviewers_avg": [ 57.5, 99.59292143521044 ], "wc_reply_authors_avg": [ 359.5, 212.2951954237307 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 2.277608394786075 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.5183210553488161, "corr_recommendation_correctness": 0.5183210553488161, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12721787444630490920&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 9, "aff_unique_index": "0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_country_unique_index": "0", "aff_country_unique": "Netherlands" }, { "id": "7XHiDnUb_hj", "title": "Detecting Small Query Graphs in A Large Graph via Neural Subgraph Search", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent advances have shown the success of using reinforcement learning and search to solve NP-hard graph-related tasks, such as Traveling Salesman Optimization, Graph Edit Distance computation, etc. However, it remains unclear how one can efficiently and accurately detect the occurrences of a small query graph in a large target graph, which is a core operation in graph database search, biomedical analysis, social group finding, etc. This task is called Subgraph Matching which essentially performs subgraph isomorphism check between a query graph and a large target graph. One promising approach to this classical problem is the \u201clearning-to-search\u201d paradigm, where a reinforcement learning (RL) agent is designed with a learned policy to guide a search algorithm to quickly find the solution without any solved instances for supervision. However, for the specific task of Subgraph Matching, though the query graph is usually small given by the user as input, the target graph is often orders-of-magnitude larger. It poses challenges to the neural network design and can lead to solution and reward sparsity. In this paper, we propose NSUBS with two innovations to tackle the challenges: (1) A novel encoder-decoder neural network architecture to dynamically compute the matching information between the query and the target graphs at each search state; (2) A novel look-ahead loss function for training the policy network. Experiments on six large real-world target graphs show that NSUBS can significantly improve the subgraph matching performance.", "keywords": "subgraph matching;subgraph isomorphism search", "primary_area": "", "supplementary_material": "/attachment/e6901cfc0bd82edceec38c3621318e082948b834.zip", "author": "Yunsheng Bai;Derek Qiang Xu;Yizhou Sun;Wei Wang", "authorids": "~Yunsheng_Bai2;~Derek_Qiang_Xu1;~Yizhou_Sun1;~Wei_Wang13", "gender": "F;F;M;M", "homepage": "http://web.cs.ucla.edu/~yzsun/;http://www.cs.ucla.edu/~weiwang;https://derekqxu.github.io;https://yunshengb.com/", "dblp": "37/3868;w/WeiWang.html;155/0712;225/5377.html", "google_scholar": "https://scholar.google.com.tw/citations?user=TQgOjK0AAAAJ;UedS9LQAAAAJ;07nfvIgAAAAJ;", "orcid": ";0000-0002-8180-2886;0009-0008-2992-9768;", "linkedin": ";wei-wang-8800845/;derekqxu/;", "or_profile": "~Yizhou_Sun1;~Wei_Wang13;~Derek_Qiang_Xu2;~Yunsheng_Bai1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;ucla.edu;ucla.edu;cs.ucla.edu", "position": "Associate Professor;Full Professor;PhD student;PhD student", "bibtex": "@misc{\nbai2023detecting,\ntitle={Detecting Small Query Graphs in A Large Graph via Neural Subgraph Search},\nauthor={Yunsheng Bai and Derek Qiang Xu and Yizhou Sun and Wei Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=7XHiDnUb_hj}\n}", "github": "", "project": "", "reviewers": "181W;vmC2;HPUs;AyD2", "site": "https://openreview.net/forum?id=7XHiDnUb_hj", "pdf_size": 3969948, "recommendation": "5;5;6;6", "confidence": "3;3;3;4", "correctness": "3;3;3;3", "technical_novelty": "3;1;2;3", "empirical_novelty": "3;1;0;2", "wc_summary_paper": "55;44;37;53", "wc_strength_and_weaknesses": "108;18;264;249", "wc_clarity_quality_novelty_and_reproducibility": "153;37;25;26", "wc_summary_review": "45;77;25;43", "wc_review": "361;176;351;371", "wc_reply_reviewers": "0;51;0;0", "wc_reply_authors": "703;1944;315;647", "reply_reviewers": "0;1;0;0", "reply_authors": "2;6;3;2", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 1.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 47.25, 7.224091638399945 ], "wc_strength_and_weaknesses_avg": [ 159.75, 101.98621230342854 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.25, 53.75581363908466 ], "wc_summary_review_avg": [ 47.5, 18.728320800328042 ], "wc_review_avg": [ 314.75, 80.41882553233415 ], "wc_reply_reviewers_avg": [ 12.75, 22.083647796503186 ], "wc_reply_authors_avg": [ 902.25, 619.4672610396775 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vN_JbxyrnHIJ:scholar.google.com/&scioq=Detecting+Small+Query+Graphs+in+A+Large+Graph+via+Neural+Subgraph+Search&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Encoding Recurrence into Transformers", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10686", "id": "7YfHla7IxBJ", "poster": "/media/PosterPDFs/ICLR%202023/10686.png?t=1681695562.363925", "openreview": "https://openreview.net/forum?id=7YfHla7IxBJ", "slides": "https://iclr.cc/virtual/2023/poster/10686", "video": "https://iclr.cc/virtual/2023/poster/10686", "author_site": "Feiqing Huang, Kexin Lu, Yuxi Cai, Zhen Qin, Yanwen Fang, Guangjian Tian, Guodong Li", "tldr": "We propose a new module to encode the recurrent dynamics of an RNN layer into Transformers and higher sample efficiency can be achieved.", "abstract": "This paper novelly breaks down with ignorable loss an RNN layer into a sequence of simple RNNs, each of which can be further rewritten into a lightweight positional encoding matrix of a self-attention, named the Recurrence Encoding Matrix (REM). Thus, recurrent dynamics introduced by the RNN layer can be encapsulated into the positional encodings of a multihead self-attention, and this makes it possible to seamlessly incorporate these recurrent dynamics into a Transformer, leading to a new module, Self-Attention with Recurrence (RSA). The proposed module can leverage the recurrent inductive bias of REMs to achieve a better sample efficiency than its corresponding baseline Transformer, while the self-attention is used to model the remaining non-recurrent signals. The relative proportions of these two components are controlled by a data-driven gated mechanism, and the effectiveness of RSA modules are demonstrated by four sequential learning tasks.", "keywords": "Recurrent models;Transformers;sample efficiency;gated mechanism", "primary_area": "", "supplementary_material": "/attachment/7bd80d8f46af4f56d77c37d2d709049d79431c11.zip", "author": "Feiqing Huang;Kexin Lu;Yuxi CAI;Zhen Qin;Yanwen Fang;Guangjian Tian;Guodong Li", "authorids": "~Feiqing_Huang1;~Kexin_Lu1;~Yuxi_CAI1;~Zhen_Qin7;~Yanwen_Fang1;~Guangjian_Tian1;~Guodong_Li1", "gender": ";;;F;F;M;M", "homepage": ";https://github.com/neithen-Lu;;https://qin7zhen.github.io/;https://www.researchgate.net/profile/Yanwen-Fang-2;;https://saasweb.hku.hk/staff/gdli/", "dblp": ";;;;;52/7695.html;", "google_scholar": ";;;;https://scholar.google.com/citations?hl=zh-CN;;whNuLsEAAAAJ", "orcid": ";;0000-0003-4065-1193;;;;", "linkedin": ";;;;;;", "or_profile": "~Feiqing_Huang1;~Kexin_Lu1;~Yuxi_CAI1;~Zhen_Qin7;~Yanwen_Fang1;~Guangjian_Tian1;~Guodong_Li1", "aff": ";University of Hong Kong;University of Hong Kong;Huawei Technologies Ltd.;The University of Hong Kong;Huawei Technologies Ltd.;The University of Hong Kong", "aff_domain": ";hku.hk;hku.hk;huawei.com;hku.hk;huawei.com;hku.hk", "position": ";PhD student;PhD student;Researcher;PhD student;Researcher;Professor", "bibtex": "@inproceedings{\nhuang2023encoding,\ntitle={Encoding Recurrence into Transformers},\nauthor={Feiqing Huang and Kexin Lu and Yuxi CAI and Zhen Qin and Yanwen Fang and Guangjian Tian and Guodong Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7YfHla7IxBJ}\n}", "github": "", "project": "", "reviewers": "mvWh;ZrmK;f1my", "pdf_size": 4199905, "recommendation": "6;8;8", "confidence": "5;3;4", "correctness": "3;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "41;121;46", "wc_strength_and_weaknesses": "185;231;59", "wc_clarity_quality_novelty_and_reproducibility": "40;52;8", "wc_summary_review": "15;19;31", "wc_review": "281;423;144", "wc_reply_reviewers": "201;16;0", "wc_reply_authors": "3821;2652;225", "reply_reviewers": "2;1;0", "reply_authors": "10;7;1", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 69.33333333333333, 36.59083066683358 ], "wc_strength_and_weaknesses_avg": [ 158.33333333333334, 72.70641114937679 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.333333333333336, 18.571184369578827 ], "wc_summary_review_avg": [ 21.666666666666668, 6.79869268479038 ], "wc_review_avg": [ 282.6666666666667, 113.90736977425512 ], "wc_reply_reviewers_avg": [ 72.33333333333333, 91.21525213593516 ], "wc_reply_authors_avg": [ 2232.6666666666665, 1497.7058752935734 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 6.0, 3.7416573867739413 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.8660254037844385, "corr_recommendation_correctness": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10868113070989045285&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=7YfHla7IxBJ", "email": ";hku.hk;hku.hk;huawei.com;hku.hk;huawei.com;hku.hk", "author_num": 7, "aff_unique_index": "0;0;1;0;1;0", "aff_unique_norm": "University of Hong Kong;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.hku.hk;https://www.huawei.com", "aff_unique_abbr": "HKU;Huawei", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "7ZaJfk915b1", "title": "Shared Knowledge Lifelong Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In Lifelong Learning (LL), agents continually learn as they encounter new conditions and tasks. Most current LL is limited to a single agent that learns tasks sequentially. Dedicated LL machinery is then deployed to mitigate the forgetting of old tasks as new tasks are learned. This is inherently slow. We propose a new Shared Knowledge Lifelong Learning (SKILL) learning paradigm, which deploys a population of LL agents that each learn different tasks independently and in parallel. After learning their respective tasks, agents share and consolidate their knowledge over a communication network, so that, in the end, all agents can master all tasks. Our approach relies on a frozen backbone embedded in all agents at manufacturing time, so that only the last layer head plus some small adjustments to the backbone beneficial biases are learned for each task. To eliminate the need for a task oracle, agents also learn and share summary statistics about their training datasets (Gaussian Mixture Clusters), or share a few training images, to help other agents assign test samples to the correct head using a Mahalanobis task mapper. On a new, very challenging dataset with 102 image classification tasks, we achieve significant speedup over 18 LL baselines (e.g., >9,000x speedup over single-agent EWC) while also achieving higher (and SOTA) accuracy.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yunhao Ge;Yuecheng Li;Di Wu;Ao Xu;Adam M. Jones;Amanda Sofie Rios;Iordanis Fostiropoulos;shixian wen;Po-Hsuan Huang;Zachary William Murdock;Kiran Lekkala;Gozde Sahin;Sumedh Anand Sontakke;Laurent Itti", "authorids": "~Yunhao_Ge1;~Yuecheng_Li4;~Di_Wu18;~Ao_Xu1;~Adam_M._Jones1;~Amanda_Sofie_Rios1;~Iordanis_Fostiropoulos1;~shixian_wen1;~Po-Hsuan_Huang1;~Zachary_William_Murdock1;~Kiran_Lekkala1;~Gozde_Sahin1;~Sumedh_Anand_Sontakke1;~Laurent_Itti1", "gender": "M;M;M;M;;F;M;M;M;M;M;;M;M", "homepage": "https://gyhandy.github.io/;;;;;;https://iordanis.me;;;;http://ilab.usc.edu/people/;;https://sumedh7.github.io/;http://ilab.usc.edu", "dblp": "204/1908;;;;;230/3641.html;227/5493;;;;;;276/0127;31/3256", "google_scholar": "https://scholar.google.ca/citations?user=QhjGr4oAAAAJ;;;;;acUiYtUAAAAJ;XTLv1v8AAAAJ;;;GtSR1hwAAAAJ;uDKnFSgAAAAJ;;https://scholar.google.com/citations?hl=en;xhUvqK8AAAAJ", "orcid": ";;;;;0000-0002-2410-6614;;;;;;;;0000-0002-0168-2977", "linkedin": "yunhao-ge-720727135/;yuecheng-li-087709150/;diwu123/;ao-xu-183b4123a/;;amanda-rios-49456217a/;;;pohsuanhuang/;zwmurdock/;;;sumedh-sontakke-0ab24210a/;", "or_profile": "~Yunhao_Ge1;~Yuecheng_Li4;~Di_Wu18;~Ao_Xu1;~Adam_M._Jones1;~Amanda_Sofie_Rios1;~Iordanis_Fostiropoulos1;~shixian_wen1;~Po-Hsuan_Huang1;~Zachary_William_Murdock1;~Kiran_Lekkala1;~Gozde_Sahin1;~Sumedh_Anand_Sontakke1;~Laurent_Itti1", "aff": "University of Southern California;University of Southern California;;University of Southern California;;Intel;University of Southern California;University of Southern California;University of Southern California;University of Southern California;University of Southern California;;University of Southern California;University of Southern California", "aff_domain": "usc.edu;usc.edu;;usc.edu;;intel.com;usc.edu;usc.edu;usc.edu;usc.edu;usc.edu;;usc.edu;usc.edu", "position": "PhD student;MS student;;MS student;;AI Researcher and Engineer;PhD student;PhD student;PhD student;PhD student;PhD student;;PhD student;Professor", "bibtex": "@misc{\nge2023shared,\ntitle={Shared Knowledge Lifelong Learning},\nauthor={Yunhao Ge and Yuecheng Li and Di Wu and Ao Xu and Adam M. Jones and Amanda Sofie Rios and Iordanis Fostiropoulos and shixian wen and Po-Hsuan Huang and Zachary William Murdock and Kiran Lekkala and Gozde Sahin and Sumedh Anand Sontakke and Laurent Itti},\nyear={2023},\nurl={https://openreview.net/forum?id=7ZaJfk915b1}\n}", "github": "", "project": "", "reviewers": "hWZC;DhE6;UHwg;Ft7Z", "site": "https://openreview.net/forum?id=7ZaJfk915b1", "pdf_size": 8629624, "recommendation": "3;3;3;3", "confidence": "4;5;3;4", "correctness": "1;2;3;2", "technical_novelty": "3;2;2;1", "empirical_novelty": "0;2;2;3", "wc_summary_paper": "140;106;131;86", "wc_strength_and_weaknesses": "556;160;170;949", "wc_clarity_quality_novelty_and_reproducibility": "36;2185;5;43", "wc_summary_review": "74;160;74;46", "wc_review": "806;2611;380;1124", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 115.75, 21.21762239271875 ], "wc_strength_and_weaknesses_avg": [ 458.75, 324.9733642931371 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 567.25, 934.117865956968 ], "wc_summary_review_avg": [ 88.5, 42.83398183685472 ], "wc_review_avg": [ 1230.25, 839.74292941352 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 14, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;0;0;1;0;0;0;0;0;0;0", "aff_unique_norm": "University of Southern California;Intel", "aff_unique_dep": ";Intel Corporation", "aff_unique_url": "https://www.usc.edu;https://www.intel.com", "aff_unique_abbr": "USC;Intel", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "7ZcyRF7Y3S", "title": "Synergies Between Disentanglement and Sparsity: a Multi-Task Learning Perspective", "track": "main", "status": "Reject", "tldr": "We show how disentangled representations combined with sparse base-predictors can improve generalization and how, in a multi-task learning setting, sparsity regularization on the task-specific predictors can induce disentanglement.", "abstract": "Although disentangled representations are often said to be beneficial for downstream tasks, current empirical and theoretical understanding is limited. In this work, we provide evidence that disentangled representations coupled with sparse base-predictors improve generalization. In the context of multi-task learning, we prove a new identifiability result that provides conditions under which maximally sparse base-predictors yield disentangled representations. Motivated by this theoretical result, we propose a practical approach to learn disentangled representations based on a sparsity-promoting bi-level optimization problem. Finally, we explore a meta-learning version of this algorithm based on group Lasso multiclass SVM base-predictors, for which we derive a tractable dual formulation. It obtains competitive results on standard few-shot classification benchmarks, while each task is using only a fraction of the learned representations. ", "keywords": "Disentanglement;identifiability;multi-task learning;sparsity;transfer learning;meta-learing", "primary_area": "", "supplementary_material": "/attachment/dfe9c840bccbae98431763da1400c36187001f66.zip", "author": "Sebastien Lachapelle;Tristan Deleu;Divyat Mahajan;Ioannis Mitliagkas;Yoshua Bengio;Simon Lacoste-Julien;Quentin Bertrand", "authorids": "~Sebastien_Lachapelle1;~Tristan_Deleu1;~Divyat_Mahajan1;~Ioannis_Mitliagkas1;~Yoshua_Bengio1;~Simon_Lacoste-Julien1;~Quentin_Bertrand1", "gender": "M;;M;M;M;M;M", "homepage": "https://slachapelle.github.io/;https://tristandeleu.github.io/;http://divyat09.github.io/;http://mitliagkas.github.io/;http://yoshuabengio.org;http://www.iro.umontreal.ca/~slacoste/;https://qb3.github.io/index.html", "dblp": "224/0080;192/1896;242/8911.html;83/8757;56/953;94/446.html;", "google_scholar": "uxHoJp8AAAAJ;nLNwh-wAAAAJ;https://scholar.google.co.in/citations?user=z5bDMO4AAAAJ;K757SxgAAAAJ;kukA0LcAAAAJ;oejm5IUAAAAJ;Uxr3P78AAAAJ", "orcid": ";;;;;0000-0001-6485-6180;", "linkedin": "s%C3%A9bastien-lachapelle-a4321a122/;;divyat-mahajan-6221a0a6/;;yoshuabengio/?originalSubdomain=ca;simon-lacoste-julien-355b9a3;", "or_profile": "~Sebastien_Lachapelle1;~Tristan_Deleu1;~Divyat_Mahajan1;~Ioannis_Mitliagkas1;~Yoshua_Bengio1;~Simon_Lacoste-Julien1;~Quentin_Bertrand1", "aff": "University of Montreal;University of Montreal;Montreal Institute of Learning Algorithms;Mila - Quebec AI Institute;University of Montreal;Samsung - SAIT AI Lab, Montreal;", "aff_domain": "umontreal.ca;umontreal.ca;mila.quebec;mila.quebec;umontreal.ca;samsung.com;", "position": "PhD student;PhD student;PhD student;Principal Researcher;Full Professor;VP Lab Director;", "bibtex": "@misc{\nlachapelle2023synergies,\ntitle={Synergies Between Disentanglement and Sparsity: a Multi-Task Learning Perspective},\nauthor={Sebastien Lachapelle and Tristan Deleu and Divyat Mahajan and Ioannis Mitliagkas and Yoshua Bengio and Simon Lacoste-Julien and Quentin Bertrand},\nyear={2023},\nurl={https://openreview.net/forum?id=7ZcyRF7Y3S}\n}", "github": "", "project": "", "reviewers": "5SbL;iXc7;GZww;wTbQ", "site": "https://openreview.net/forum?id=7ZcyRF7Y3S", "pdf_size": 3483454, "recommendation": "6;6;6;6", "confidence": "2;3;3;4", "correctness": "4;3;4;4", "technical_novelty": "4;3;3;4", "empirical_novelty": "4;3;3;3", "wc_summary_paper": "53;57;77;101", "wc_strength_and_weaknesses": "107;120;272;199", "wc_clarity_quality_novelty_and_reproducibility": "47;11;23;6", "wc_summary_review": "58;20;74;36", "wc_review": "265;208;446;342", "wc_reply_reviewers": "0;0;0;44", "wc_reply_authors": "207;267;643;592", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;2;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 72.0, 19.05255888325765 ], "wc_strength_and_weaknesses_avg": [ 174.5, 66.39465339920075 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 21.75, 15.833114033569013 ], "wc_summary_review_avg": [ 47.0, 20.615528128088304 ], "wc_review_avg": [ 315.25, 89.21708076371922 ], "wc_reply_reviewers_avg": [ 11.0, 19.05255888325765 ], "wc_reply_authors_avg": [ 427.25, 192.2763310966797 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;0;1;2;0;3", "aff_unique_norm": "University of Montreal;Montreal Institute of Learning Algorithms;Quebec AI Institute;Samsung", "aff_unique_dep": ";Learning Algorithms;AI Institute;SAIT AI Lab", "aff_unique_url": "https://wwwumontreal.ca;https://mila.quebec;https://mila.quebec;https://www.samsung.com", "aff_unique_abbr": "UM;MILA;Mila;Samsung", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Canada" }, { "id": "7_3oRsaogr", "title": "Style Balancing and Test-Time Style Shifting for Domain Generalization", "track": "main", "status": "Withdraw", "tldr": "We propose style balancing and test-time style shifting for domain generalization, to handle the imbalance issues and the issue on large style gap between source and target domains.", "abstract": "Given a training set that consists of multiple source domains, the goal of domain generalization (DG) is to train the model to have generalization capability on the unseen target domain. Although various solutions have been proposed, existing ideas suffer from severe cross-domain data/class imbalance issues that naturally arise in DG. Moreover, the performance of prior works are degraded in practice\nwhere the gap between the style statistics of source and target domains is large. In this paper, we propose a new strategy to handle these issues in DG. We first propose style balancing, which strategically balances the number of samples for each class across all source domains in the style-space, providing a great platform for the model to get exposed to various styles per classes during training. Based on\nthe model trained with our style balancing, we also propose test-time style shifting, which shifts the style of the test sample (that has a large style gap with the source domains) to the nearest source domain that the model is already familiar with, to further improve the prediction performance. Our style balancing and test-time style shifting work in a highly complementary fashion, and can successfully work in conjunction with various other DG schemes. Experimental results on benchmark datasets show the improved performance of our scheme over existing methods.", "keywords": "Domain generalization;Style mixing;Arbitrary style transfer", "primary_area": "", "supplementary_material": "/attachment/164b87f152b4b677fc06360630521d168b4aa97a.zip", "author": "Jungwuk Park;Dong-Jun Han;Soyeong Kim;Jaekyun Moon", "authorids": "~Jungwuk_Park1;~Dong-Jun_Han1;~Soyeong_Kim1;~Jaekyun_Moon2", "gender": "M;M;F;M", "homepage": ";https://sites.google.com/view/djhan930/home?authuser=0;http://;http://comstolab.kaist.ac.kr/people.html", "dblp": "307/4735;201/0078;;78/2744", "google_scholar": "ek4xQy0AAAAJ;https://scholar.google.co.kr/citations?user=-YR-GxUAAAAJ;;", "orcid": ";;;", "linkedin": "jungwuk-park-458b25199;;;", "or_profile": "~Jungwuk_Park1;~Dong-Jun_Han1;~Soyeong_Kim1;~Jaekyun_Moon2", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;;KAIST", "aff_domain": "kaist.ac.kr;kaist.ac.kr;;kaist.edu", "position": "PhD student;Postdoc;;Full Professor", "bibtex": "@misc{\npark2023style,\ntitle={Style Balancing and Test-Time Style Shifting for Domain Generalization},\nauthor={Jungwuk Park and Dong-Jun Han and Soyeong Kim and Jaekyun Moon},\nyear={2023},\nurl={https://openreview.net/forum?id=7_3oRsaogr}\n}", "github": "", "project": "", "reviewers": "ZHwx;4j4g;WgpP;1Ng3", "site": "https://openreview.net/forum?id=7_3oRsaogr", "pdf_size": 581201, "recommendation": "3;5;5;6", "confidence": "5;4;4;2", "correctness": "2;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "135;53;98;89", "wc_strength_and_weaknesses": "476;178;341;204", "wc_clarity_quality_novelty_and_reproducibility": "103;41;15;15", "wc_summary_review": "66;65;26;34", "wc_review": "780;337;480;342", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 93.75, 29.166547618804664 ], "wc_strength_and_weaknesses_avg": [ 299.75, 119.11837599631721 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.5, 35.95483277669359 ], "wc_summary_review_avg": [ 47.75, 17.977416388346796 ], "wc_review_avg": [ 484.75, 179.86296867337646 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.894736842105263, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17482138953832830259&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Measuring Forgetting of Memorized Training Examples", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11472", "id": "7bJizxLKrR", "poster": "", "openreview": "https://openreview.net/forum?id=7bJizxLKrR", "slides": "https://iclr.cc/virtual/2023/poster/11472", "video": "https://iclr.cc/virtual/2023/poster/11472", "author_site": "Matthew Jagielski, Om Thakkar, Florian Tramer, Daphne Ippolito, Katherine Lee, Nicholas Carlini, Eric Wallace, Shuang Song, Abhradeep Guha Thakurta, Nicolas Papernot, Chiyuan Zhang", "tldr": "When models are trained on large datasets, we show that privacy attacks become less effective on examples seen early in training, and investigate why.", "abstract": "Machine learning models exhibit two seemingly contradictory phenomena: training data memorization and various forms of forgetting. In memorization, models overfit specific training examples and become susceptible to privacy attacks. In forgetting, examples which appeared early in training are forgotten by the end. In this work, we connect these phenomena.\nWe propose a technique to measure to what extent models ``forget'' the specifics of training examples, becoming less susceptible to privacy attacks on examples they have not seen recently.\nWe show that, while non-convexity can prevent forgetting from happening in the worst-case, standard image,speech, and language models empirically do forget examples over time.\nWe identify nondeterminism as a potential explanation, showing that deterministically trained models do not forget.\nOur results suggest that examples seen early when training with extremely large datasets---for instance those examples used to pre-train a model---may observe privacy benefits at the expense of examples seen later.", "keywords": "forgetting;memorization;membership inference;canary extraction;nondeterminism;convexity", "primary_area": "", "supplementary_material": "", "author": "Matthew Jagielski;Om Thakkar;Florian Tramer;Daphne Ippolito;Katherine Lee;Nicholas Carlini;Eric Wallace;Shuang Song;Abhradeep Guha Thakurta;Nicolas Papernot;Chiyuan Zhang", "authorids": "~Matthew_Jagielski1;~Om_Thakkar1;~Florian_Tramer1;~Daphne_Ippolito1;~Katherine_Lee1;~Nicholas_Carlini1;~Eric_Wallace1;~Shuang_Song3;~Abhradeep_Guha_Thakurta1;~Nicolas_Papernot1;~Chiyuan_Zhang1", "gender": "M;M;F;F;;M;;M;M;M;M", "homepage": "https://jagielski.github.io/;http://floriantramer.com;http://www.daphnei.com;https://katelee168.github.io/;http://nicholas.carlini.com;http://www.ericswallace.com/;;https://athakurta.squarespace.com/;https://www.papernot.fr;http://pluskid.org;http://www.omthakkar.com/", "dblp": "218/5156;158/7224;192/2031.html;115/5082.html;145/1806;218/6165;86/4211-1;31/8315;162/1405;21/8315;https://dblp.uni-trier.de/pid/166/1707", "google_scholar": "_8rw_GMAAAAJ;https://scholar.google.ch/citations?user=ijH0-a8AAAAJ;;bjdB4K8AAAAJ;;SgST3LkAAAAJ;;1rV69hMAAAAJ;cGxq0cMAAAAJ;l_G2vr0AAAAJ;j5N3bKYAAAAJ", "orcid": ";;;;;;;;;;", "linkedin": ";;;;;;;;nicolaspapernot;;", "or_profile": "~Matthew_Jagielski1;~Florian_Tramer1;~Daphne_Ippolito1;~Katherine_Lee1;~Nicholas_Carlini1;~Eric_Wallace1;~Shuang_Song3;~Abhradeep_Guha_Thakurta1;~Nicolas_Papernot1;~Chiyuan_Zhang1;~Om_Dipakbhai_Thakkar1", "aff": "Google;ETHZ - ETH Zurich;Carnegie Mellon University;Cornell University;Google;University of California, Berkeley;Google;Google;Google;Google;Google", "aff_domain": "google.com;ethz.ch;cmu.edu;cornell.edu;google.com;berkeley.edu;google.com;google.com;google.com;google.com;google.com", "position": "Researcher;Assistant Professor;Assistant Professor;PhD student;Researcher;PhD student;Software Engineer;Senior Research Scientist;Research Scientist;Research Scientist;Researcher", "bibtex": "@inproceedings{\njagielski2023measuring,\ntitle={Measuring Forgetting of Memorized Training Examples},\nauthor={Matthew Jagielski and Om Thakkar and Florian Tramer and Daphne Ippolito and Katherine Lee and Nicholas Carlini and Eric Wallace and Shuang Song and Abhradeep Guha Thakurta and Nicolas Papernot and Chiyuan Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7bJizxLKrR}\n}", "github": "", "project": "", "reviewers": "2BVh;TqY1;Qkp9;hxrq", "pdf_size": 806411, "recommendation": "6;6;6;8", "confidence": "3;3;3;3", "correctness": "3;3;3;2", "technical_novelty": "3;3;4;3", "empirical_novelty": "3;3;4;3", "wc_summary_paper": "83;92;58;51", "wc_strength_and_weaknesses": "309;422;136;332", "wc_clarity_quality_novelty_and_reproducibility": "61;35;38;5", "wc_summary_review": "42;64;64;7", "wc_review": "495;613;296;395", "wc_reply_reviewers": "135;0;0;20", "wc_reply_authors": "397;484;175;337", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 71.0, 16.98528775146303 ], "wc_strength_and_weaknesses_avg": [ 299.75, 103.54316732648273 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.75, 19.904459299363044 ], "wc_summary_review_avg": [ 44.25, 23.306383245797704 ], "wc_review_avg": [ 449.75, 117.61669736903855 ], "wc_reply_reviewers_avg": [ 38.75, 56.166604846652426 ], "wc_reply_authors_avg": [ 348.25, 112.8569337701499 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 11, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -1.0, "gs_citation": 112, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14449375294828538389&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=7bJizxLKrR", "email": "google.com;ethz.ch;cmu.edu;cornell.edu;google.com;berkeley.edu;google.com;google.com;google.com;google.com;google.com", "author_num": 11, "aff_unique_index": "0;1;2;3;0;4;0;0;0;0;0", "aff_unique_norm": "Google;ETH Zurich;Carnegie Mellon University;Cornell University;University of California, Berkeley", "aff_unique_dep": "Google;;;;", "aff_unique_url": "https://www.google.com;https://www.ethz.ch;https://www.cmu.edu;https://www.cornell.edu;https://www.berkeley.edu", "aff_unique_abbr": "Google;ETHZ;CMU;Cornell;UC Berkeley", "aff_campus_unique_index": "0;0;2;0;0;0;0;0", "aff_campus_unique": "Mountain View;;Berkeley", "aff_country_unique_index": "0;1;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;Switzerland" }, { "id": "7bcrAxy00Jw", "title": "Improve distance metric learning by learning positions of class centers", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep metric learning aims at learning a deep neural network by letting similar samples have small distances while dissimilar samples have large distances. To achieve this goal, the current DML algorithms mainly focus on pulling similar samples in each class as closely as possible. However, pulling similar samples only considers the local distribution of the data samples and ignores the global distribution of the data set, i.e., the center positions of different classes. The global distribution helps the distance metric learning. For example, expanding the distance between centers can increase the discriminant ability of the extracted features. However, how to increase the distance between centers is a challenging task. In this paper, we design a genius function named the skewed mean function, which only considers the most considerable distances of a set of samples. So maximizing the value of the skewed mean function can make the largest distance larger. We also prove that the current energy functions used for uniformity regularization on centers are special cases of our skewed mean function. At last, we conduct extensive experiments to illustrate the superiority of our methods.", "keywords": "distance metric learning;skewed mean function", "primary_area": "", "supplementary_material": "", "author": "Kun Song;Lantian Chu;Junwei Han;Fakhri Karray", "authorids": "~Kun_Song2;~Lantian_Chu1;~Junwei_Han1;~Fakhri_Karray1", "gender": "M;M;M;M", "homepage": "https://vision-intelligence.com.cn/;https://mbzuai.ac.ae/study/faculty/professor-fakhreddine-fakhri-karray/;;", "dblp": ";k/FakhriKarray;0000-0001-5545-7217;", "google_scholar": ";9_Hpd5kAAAAJ;;https://scholar.google.com/citations?hl=zh-TW", "orcid": ";0000-0002-6900-315X;;", "linkedin": ";;;", "or_profile": "~Lantian_Chu1;~Fakhri_Karray1;~Junwei_Han3;~Kun_Kun1", "aff": "Northwest Polytechnical University Xi'an;Mohamed bin Zayed University of Artificial Intelligence;Northwest Polytechnical University Xi'an;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "nwpu.edu.cn;mbzuai.ac.ae;nwpu.edu.cn;mbzuai.ac.ae", "position": "MS student;Full Professor;Full Professor;Postdoc", "bibtex": "@misc{\nsong2023improve,\ntitle={Improve distance metric learning by learning positions of class centers},\nauthor={Kun Song and Lantian Chu and Junwei Han and Fakhri Karray},\nyear={2023},\nurl={https://openreview.net/forum?id=7bcrAxy00Jw}\n}", "github": "", "project": "", "reviewers": "tGz6;CgdK;SAjL", "site": "https://openreview.net/forum?id=7bcrAxy00Jw", "pdf_size": 467035, "recommendation": "1;3;3", "confidence": "5;5;4", "correctness": "2;1;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "59;142;59", "wc_strength_and_weaknesses": "51;423;81", "wc_clarity_quality_novelty_and_reproducibility": "1;46;17", "wc_summary_review": "4;75;7", "wc_review": "115;686;164", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "correctness_avg": [ 2.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 86.66666666666667, 39.12657522565563 ], "wc_strength_and_weaknesses_avg": [ 185.0, 168.7364809399556 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 21.333333333333332, 18.624953392931992 ], "wc_summary_review_avg": [ 28.666666666666668, 32.7854981491648 ], "wc_review_avg": [ 321.6666666666667, 258.39805640823914 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:y0U1_qHkGEgJ:scholar.google.com/&scioq=Improve+distance+metric+learning+by+learning+positions+of+class+centers&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Northwest Polytechnical University;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "http://www.nwpu.edu.cn;https://mbzuai.ac.ae", "aff_unique_abbr": "NWPU;MBZUAI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Xi'an;", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "China;United Arab Emirates" }, { "id": "7bns2VTdMAx", "title": "Deep Learning of Intrinsically Motivated Options in the Arcade Learning Environment", "track": "main", "status": "Reject", "tldr": "", "abstract": "In Reinforcement Learning, Intrinsic Motivation motivates directed behaviors through a wide range of reward-generating methods. Depending on the task and environment, these rewards can be useful, might complement each other, but can also break down entirely, as seen with the noisy TV problem for curiosity. We therefore argue that scalability and robustness, among others, are key desirable properties of a method to incorporate intrinsic rewards, which a simple weighted sum of reward lacks. In a tabular setting, Explore Options let the agent call an intrinsically motivated policy in order to learn from its trajectories. We introduce Deep Explore Options, revising Explore Options within the Deep Reinforcement Learning paradigm to tackle complex visual problems. Deep Explore Options can naturally learn from several unrelated intrinsic rewards, ignore harmful intrinsic rewards, learn to balance exploration, but also isolate exploitative and exploratory behaviors for independent usage. \nWe test Deep Explore Options on hard and easy exploration games of the Atari Suite, following a benchmarking study to ensure fairness. Our empirical results show that they achieve similar results than weighted sum baselines, while maintaining their key properties.\n", "keywords": "reinforcement learning;intrinsic motivation;exploration;options;auxiliary task learning", "primary_area": "", "supplementary_material": "", "author": "Louis Bagot;Kevin Mets;Tom De Schepper;Steven Latre", "authorids": "~Louis_Bagot1;~Kevin_Mets1;~Tom_De_Schepper1;~Steven_Latre1", "gender": "M;M;M;M", "homepage": ";;;https://www.uantwerpen.be/en/staff/steven-latre/", "dblp": ";;;", "google_scholar": ";avinyLUAAAAJ;Z2OO2QEAAAAJ;", "orcid": ";0000-0002-4812-4841;0000-0002-2969-3133;", "linkedin": "louis-bagot-32ba48152/;;tomdeschepper/;", "or_profile": "~Louis_Bagot1;~Kevin_Mets1;~Tom_De_Schepper1;~Steven_Latre1", "aff": "University of Antwerp;University of Antwerp, IDLab, imec;imec;University of Antwerp", "aff_domain": "uantwerpen.be;uantwerpen.be;imec.be;uantwerpen.be", "position": "PhD student;Postdoc;Principal Researcher;Full Professor", "bibtex": "@misc{\nbagot2023deep,\ntitle={Deep Learning of Intrinsically Motivated Options in the Arcade Learning Environment},\nauthor={Louis Bagot and Kevin Mets and Tom De Schepper and Steven Latre},\nyear={2023},\nurl={https://openreview.net/forum?id=7bns2VTdMAx}\n}", "github": "", "project": "", "reviewers": "aMNX;u5M2;xbC8;dS6L", "site": "https://openreview.net/forum?id=7bns2VTdMAx", "pdf_size": 1661880, "recommendation": "1;1;3;3", "confidence": "5;5;4;2", "correctness": "1;1;3;3", "technical_novelty": "1;1;1;3", "empirical_novelty": "1;2;1;2", "wc_summary_paper": "37;111;52;91", "wc_strength_and_weaknesses": "111;746;189;1454", "wc_clarity_quality_novelty_and_reproducibility": "35;682;716;116", "wc_summary_review": "52;109;139;99", "wc_review": "235;1648;1096;1760", "wc_reply_reviewers": "0;44;15;0", "wc_reply_authors": "0;263;245;443", "reply_reviewers": "0;1;1;0", "reply_authors": "0;1;1;1", "recommendation_avg": [ 2.0, 1.0 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "correctness_avg": [ 2.0, 1.0 ], "technical_novelty_avg": [ 1.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 1.5, 0.5 ], "wc_summary_paper_avg": [ 72.75, 29.600464523382062 ], "wc_strength_and_weaknesses_avg": [ 625.0, 537.6276592587104 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 387.25, 313.2932930976978 ], "wc_summary_review_avg": [ 99.75, 31.251999936004097 ], "wc_review_avg": [ 1184.75, 603.2028576689603 ], "wc_reply_reviewers_avg": [ 14.75, 17.963504669189696 ], "wc_reply_authors_avg": [ 237.75, 157.593424672478 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 0.75, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8164965809277259, "corr_recommendation_correctness": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16395956927318658105&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Antwerp;IMEC", "aff_unique_dep": ";", "aff_unique_url": "https://www.uantwerp.be;https://www.imec-int.com", "aff_unique_abbr": "UA;imec", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Belgium" }, { "id": "7bvWopYY1H", "title": "GeoVeX: Geospatial Vectors with Hexagonal Convolutional Autoencoders", "track": "main", "status": "Reject", "tldr": "We introduce a new geospatial representation model called GeoVeX to learn global vectors for all geographical locations on Earth land cover (200+ million embeddings). ", "abstract": "We introduce a new geospatial representation model called GeoVeX to learn global vectors for all geographical locations on Earth land cover (200+ million embeddings). GeoVeX is built on a novel model architecture named Hexagonal Convolutional Autoencoders (HCAE) combined with a Zero-Inflated Poisson (ZIP) reconstruction layer, applied to a grid of Uber's H3 hexagons, each one described by the histogram of OpenStreetMap (OSM) geographical tags occurrences. GeoVeX is novel on three aspects: 1) it produces the first geospatial vectors trained on worldwide open data, enabling wide adoption on every downstream tasks which may benefit from enriched geographical information, requiring only location coordinates; 2) it represents the first use of hexagonal convolutions within autoencoder architectures, to learn latent representations of an hexagonal grid; and 3) it introduces a spatial-contextual Poisson reconstruction loss function for autoencoder architectures suitable for training on sparse geographical count data. Experiments demonstrate that GeoVeX embeddings can improve upon state-of-the-art geospatial location representations on two different downstream tasks: price prediction in the travel industry and hyperlocal interpolation of climate data from weather stations. ", "keywords": "Representation learning;Geospatial Embedding;Convolutional Autoencoders on hexagonal grids;OpenStreetMap;H3 hexagons", "primary_area": "", "supplementary_material": "/attachment/922da603222752a2323fa2cdac7c56c34c787fab.zip", "author": "Daniele Donghi;Anne Morvan", "authorids": "~Daniele_Donghi1;~Anne_Morvan1", "gender": "M;F", "homepage": ";https://annemorvan.github.io/", "dblp": ";https://dblp.org/pers/m/Morvan:Anne.html", "google_scholar": ";K6VhfJcAAAAJ", "orcid": ";", "linkedin": "danieledonghi;annemorvan/", "or_profile": "~Daniele_Donghi1;~Anne_Morvan1", "aff": ";Expedia Group", "aff_domain": ";expediagroup.com", "position": ";Machine Learning Scientist", "bibtex": "@misc{\ndonghi2023geovex,\ntitle={GeoVeX: Geospatial Vectors with Hexagonal Convolutional Autoencoders},\nauthor={Daniele Donghi and Anne Morvan},\nyear={2023},\nurl={https://openreview.net/forum?id=7bvWopYY1H}\n}", "github": "", "project": "", "reviewers": "Fj26;3Ztx;zeEd;pWwW", "site": "https://openreview.net/forum?id=7bvWopYY1H", "pdf_size": 4275148, "recommendation": "3;3;5;6", "confidence": "3;4;4;3", "correctness": "2;2;3;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "73;21;34;78", "wc_strength_and_weaknesses": "618;107;48;92", "wc_clarity_quality_novelty_and_reproducibility": "155;46;50;16", "wc_summary_review": "54;25;9;30", "wc_review": "900;199;141;216", "wc_reply_reviewers": "1734;68;46;0", "wc_reply_authors": "2937;642;995;695", "reply_reviewers": "3;1;1;0", "reply_authors": "6;1;2;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 51.5, 24.5 ], "wc_strength_and_weaknesses_avg": [ 216.25, 232.96177261516533 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.75, 52.61831905334871 ], "wc_summary_review_avg": [ 29.5, 16.132265804901678 ], "wc_review_avg": [ 364.0, 310.70645310324664 ], "wc_reply_reviewers_avg": [ 462.0, 734.7992923241012 ], "wc_reply_authors_avg": [ 1317.25, 944.8006072711851 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.5, 2.0615528128088303 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.19245008972987526, "corr_recommendation_correctness": 0.9622504486493761, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13821440064780663622&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Expedia Group", "aff_unique_dep": "", "aff_unique_url": "https://www.expediagroup.com", "aff_unique_abbr": "Expedia Group", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "7d-d0BFz6Hf", "title": "Mesh-Independent Operator Learning for PDEs using Set Representations", "track": "main", "status": "Reject", "tldr": "We propose an attention-based operator learning model for obtaining the continuous solution of PDEs, independent of the discretization formats.", "abstract": "Operator learning, learning the mapping between infinite-dimensional function spaces, has been attracted as an alternative approach to traditional numerical methods to solve partial differential equations (PDEs). In practice, the functions of the physical systems are often observed by sparse or even irregularly distributed measurements, thus the functions are discretized and usually represented by finite structured arrays, which are given as data of input-output pairs. Through training with the arrays, the solution of the trained models should be independent of the discretization of the input function and can be queried at any point continuously. Therefore, the architectures for operator learning should be flexibly compatible with arbitrary sizes and locations of the measurements, otherwise, it can restrict the scalability when the observations have discrepancies between measurement formats. In this paper, we propose to treat the discretized functions as set-valued data and construct an attention-based model, called mesh-independent operator learner (MIOL), to provide proper treatments of input functions and query coordinates for the solution functions by detaching the dependencies on input and output meshes. Our models pre-trained with benchmark datasets of operator learning are evaluated by downstream tasks to demonstrate the generalization abilities to varying discretization formats of the system, which are natural characteristics of the continuous solution of the PDEs. ", "keywords": "partial differential equations;operator learning;set representations;attention-based model;implicit neural representation", "primary_area": "", "supplementary_material": "", "author": "Seungjun Lee", "authorids": "~Seungjun_Lee1", "gender": "M", "homepage": "https://7tl7qns7ch.github.io/seungjunlee.github.io/", "dblp": "", "google_scholar": "https://scholar.google.com/citations?hl=ko", "orcid": "0009-0001-4314-0260", "linkedin": "seungjun-lee-656946213/", "or_profile": "~Seungjun_Lee1", "aff": "Alsemy", "aff_domain": "alsemy.com", "position": "Researcher", "bibtex": "@misc{\nlee2023meshindependent,\ntitle={Mesh-Independent Operator Learning for {PDE}s using Set Representations},\nauthor={Seungjun Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=7d-d0BFz6Hf}\n}", "github": "", "project": "", "reviewers": "YQEY;kSWf;p5hq", "site": "https://openreview.net/forum?id=7d-d0BFz6Hf", "pdf_size": 4346314, "recommendation": "5;5;6", "confidence": "3;3;3", "correctness": "3;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "31;112;71", "wc_strength_and_weaknesses": "192;136;118", "wc_clarity_quality_novelty_and_reproducibility": "84;220;6", "wc_summary_review": "26;41;32", "wc_review": "333;509;227", "wc_reply_reviewers": "0;0;11", "wc_reply_authors": "368;919;495", "reply_reviewers": "0;0;1", "reply_authors": "1;2;2", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 71.33333333333333, 33.06895153396242 ], "wc_strength_and_weaknesses_avg": [ 148.66666666666666, 31.510139461590594 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 103.33333333333333, 88.42825089051324 ], "wc_summary_review_avg": [ 33.0, 6.164414002968976 ], "wc_review_avg": [ 356.3333333333333, 116.3022881211811 ], "wc_reply_reviewers_avg": [ 3.6666666666666665, 5.185449728701348 ], "wc_reply_authors_avg": [ 594.0, 235.58579470474587 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ocUZfKkizr8J:scholar.google.com/&scioq=Mesh-Independent+Operator+Learning+for+PDEs+using+Set+Representations&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Alsemy", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "" }, { "title": "Cycle to Clique (Cy2C) Graph Neural Network: A Sight to See beyond Neighborhood Aggregation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11691", "id": "7d-g8KozkiE", "poster": "", "openreview": "https://openreview.net/forum?id=7d-g8KozkiE", "slides": "https://iclr.cc/virtual/2023/poster/11691", "video": "https://iclr.cc/virtual/2023/poster/11691", "author_site": "YunYoung Choi, Sun Woo Park, Youngho Woo, U Jin Choi", "tldr": "", "abstract": "Graph neural networks have been successfully adapted for learning vector representations of graphs through various neighborhood aggregation schemes. Previous researches suggest, however, that they possess limitations in incorporating key non-Euclidean topological properties of graphs. This paper mathematically identifies the caliber of graph neural networks in classifying isomorphism classes of graphs with continuous node attributes up to their local topological properties. In light of these observations, we construct the Cycle to Clique graph neural network, a novel yet simple algorithm which topologically enriches the input data of conventional graph neural networks while preserving their architectural components. This method theoretically outperforms conventional graph neural networks in classifying isomorphism classes of graphs while ensuring comparable time complexity in representing random graphs. Empirical results further support that the novel algorithm produces comparable or enhanced results in classifying benchmark graph data sets compared to contemporary variants of graph neural networks.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/98216ebc69f8ee27ed98d1ac19b2e4e0e420ef4c.zip", "author": "Yun Young Choi;Sun Woo Park;Youngho Woo;U Jin Choi", "authorids": "~Yun_Young_Choi1;spark483@wisc.edu;~Youngho_Woo1;~U_Jin_Choi1", "gender": ";;M;M", "homepage": ";;;https://mathsci.kaist.ac.kr/home/?s=%EC%B5%9C%EC%9A%B0%EC%A7%84", "dblp": ";;;", "google_scholar": ";;https://scholar.google.com/citations?hl=en;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yun_Young_Choi1;spark483@wisc.edu;~Youngho_Woo1;~U_Jin_Choi1", "aff": ";;National Institute for mathematical Sciences ;", "aff_domain": ";;nims.re.kr;", "position": ";;Full Professor;", "bibtex": "@inproceedings{\nchoi2023cycle,\ntitle={Cycle to Clique (Cy2C) Graph Neural Network: A Sight to See beyond Neighborhood Aggregation},\nauthor={Yun Young Choi and Sun Woo Park and Youngho Woo and U Jin Choi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7d-g8KozkiE}\n}", "github": "", "project": "", "reviewers": "KiJD;Qt5k;L1b5", "pdf_size": 1960529, "recommendation": "5;6;8", "confidence": "3;2;3", "correctness": "3;4;4", "technical_novelty": "3;3;4", "empirical_novelty": "2;2;2", "wc_summary_paper": "105;81;111", "wc_strength_and_weaknesses": "537;90;197", "wc_clarity_quality_novelty_and_reproducibility": "60;22;87", "wc_summary_review": "76;17;56", "wc_review": "778;210;451", "wc_reply_reviewers": "0;46;0", "wc_reply_authors": "807;732;360", "reply_reviewers": "0;1;0", "reply_authors": "2;2;1", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 99.0, 12.96148139681572 ], "wc_strength_and_weaknesses_avg": [ 274.6666666666667, 190.57165464873194 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.333333333333336, 26.662499674428293 ], "wc_summary_review_avg": [ 49.666666666666664, 24.499433100017278 ], "wc_review_avg": [ 479.6666666666667, 232.76931832944726 ], "wc_reply_reviewers_avg": [ 15.333333333333334, 21.684607956387456 ], "wc_reply_authors_avg": [ 633.0, 195.45331923505418 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.18898223650461363, "corr_recommendation_correctness": 0.7559289460184545, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6900940488514379496&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=7d-g8KozkiE", "email": ";;nims.re.kr;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "National Institute for Mathematical Sciences", "aff_unique_dep": "", "aff_unique_url": "http://www.nims.re.kr/english/", "aff_unique_abbr": "NIMS", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "id": "7frgl8pKJpY", "title": "Supplementing Domain Knowledge to BERT with Semi-structured Information of Documents", "track": "main", "status": "Withdraw", "tldr": "A new domain adaptation method is proposed, which emphasize the importance of semi-structured information of documents for BERT capturing domain knowledge.", "abstract": "Adapting BERT on in-domain text corpus is a good way to boost its performance on domain-specific natural language processing (NLP) tasks. Common domain adaptation methods, however, can be deficient in capturing domain knowledge. Meanwhile, the context fragmentation inherent in Transformer-based models also hinders the acquisition of domain knowledge. Given the semi-structural characteristics of documents and their potential for alleviating these problems, we leverage semi-structured information of documents to supplement domain knowledge to BERT. To this end, we propose a topic-based domain adaptation method, which enhances the capture of domain knowledge at various levels of text granularity. Specifically, topic masked language model is designed at the paragraph level for pre-training; topic subsection matching degree dataset is automatically constructed at the subsection level for intermediate fine-tuning. Experiments are conducted over three biomedical NLP tasks across five datasets, and the results highlight the importance of the previously overlooked semi-structured information for domain adaptation. Our method benefits BERT, RoBERTa, BioBERT, and PubMedBERT in nearly all cases and yield significant gains over the topic-related task, question answering, with an average accuracy improvement of 4.8.", "keywords": "Domain adaptation;Semi-structured information;BERT;Pre-trained language model;Biomedical question answering", "primary_area": "", "supplementary_material": "", "author": "Chen Jing;Zhihua Wei;Jiaqi Wang;Rui Wang;Chuanyang Gong", "authorids": "~Chen_Jing1;~Zhihua_Wei1;~Jiaqi_Wang6;~Rui_Wang27;~Chuanyang_Gong1", "gender": "F;F;M;M;M", "homepage": "https://github.com/freeflover;;https://calmaqi.github.io/;https://github.com/mechanicalsea;https://github.com/gongchuanyang/", "dblp": ";55/3674-1;;https://dblp.org/rec/journals/corr/abs-2103-13581;", "google_scholar": ";;;1rsbf7IAAAAJ;", "orcid": ";;;0000-0002-5211-2114;", "linkedin": ";;;%E7%91%9E-%E7%8E%8B-b98085b8/;", "or_profile": "~Chen_Jing1;~Zhihua_Wei1;~Jiaqi_Wang6;~Rui_Wang27;~Chuanyang_Gong1", "aff": "Tongji University;Tongji University;Tongji University;Tongji University;Tongji University", "aff_domain": "tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn", "position": "PhD student;Full Professor;PhD student;PhD student;PhD student", "bibtex": "@misc{\njing2023supplementing,\ntitle={Supplementing Domain Knowledge to {BERT} with Semi-structured Information of Documents},\nauthor={Chen Jing and Zhihua Wei and Jiaqi Wang and Rui Wang and Chuanyang Gong},\nyear={2023},\nurl={https://openreview.net/forum?id=7frgl8pKJpY}\n}", "github": "", "project": "", "reviewers": "a6om;V5TH;pprb;Top2", "site": "https://openreview.net/forum?id=7frgl8pKJpY", "pdf_size": 529232, "recommendation": "3;3;5;6", "confidence": "4;4;4;3", "correctness": "3;3;3;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "73;171;87;63", "wc_strength_and_weaknesses": "175;557;465;76", "wc_clarity_quality_novelty_and_reproducibility": "100;169;123;15", "wc_summary_review": "56;83;35;7", "wc_review": "404;980;710;161", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 98.5, 42.717092597694425 ], "wc_strength_and_weaknesses_avg": [ 318.25, 198.58420757955554 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 101.75, 55.90784828626478 ], "wc_summary_review_avg": [ 45.25, 27.87808278917329 ], "wc_review_avg": [ 563.75, 309.1847142081898 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.7777777777777777, "corr_recommendation_correctness": 0.7777777777777777, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8403354691174680063&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tongji University", "aff_unique_dep": "", "aff_unique_url": "https://www.tongji.edu.cn", "aff_unique_abbr": "Tongji", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Forward Super-Resolution: How Can GANs Learn Hierarchical Generative Models for Real-World Distributions", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10861", "id": "7h5KSs2PCRi", "poster": "", "openreview": "https://openreview.net/forum?id=7h5KSs2PCRi", "slides": "https://iclr.cc/virtual/2023/poster/10861", "video": "https://iclr.cc/virtual/2023/poster/10861", "author_site": "Zeyuan Allen-Zhu, Yuanzhi Li", "tldr": "We provide a theory to study how generative adversarial networks (GANs) can efficiently learn certain hierarchically generated distributions that are close to the distribution of images in practice.", "abstract": "Generative adversarial networks (GANs) are among the most successful models for learning high-complexity, real-world distributions. However, in theory, due to the highly non-convex, non-concave landscape of the minmax training objective, GAN remains one of the least understood deep learning models. In this work, we formally study how GANs can efficiently learn certain hierarchically generated distributions that are close to the distribution of real-life images. We prove that when a distribution has a structure that we refer to as \\emph{forward super-resolution}, then simply training generative adversarial networks using stochastic gradient descent ascent (SGDA) can learn this distribution efficiently, both in sample and time complexities.\nWe also provide empirical evidence that our assumption ``forward super-resolution'' is very natural in practice, and the underlying learning mechanisms that we study in this paper (to allow us efficiently train GAN via GDA in theory) simulates the actual learning process of GANs on real-world problems.\n", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Zeyuan Allen-Zhu;Yuanzhi Li", "authorids": "~Zeyuan_Allen-Zhu1;~Yuanzhi_Li1", "gender": ";M", "homepage": ";", "dblp": ";73/3628", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Zeyuan_Allen-Zhu1;~Yuanzhi_Li1", "aff": ";Carnegie Mellon University", "aff_domain": ";andrew.cmu.edu", "position": ";Assistant Professor", "bibtex": "@inproceedings{\nallen-zhu2023forward,\ntitle={Forward Super-Resolution: How Can {GAN}s Learn Hierarchical Generative Models for Real-World Distributions},\nauthor={Zeyuan Allen-Zhu and Yuanzhi Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7h5KSs2PCRi}\n}", "github": "", "project": "", "reviewers": "okYU;EgRr;2F3d", "pdf_size": 1222959, "recommendation": "6;6;6", "confidence": "2;2;3", "correctness": "4;4;3", "technical_novelty": "4;3;3", "empirical_novelty": "3;0;3", "wc_summary_paper": "116;102;161", "wc_strength_and_weaknesses": "188;136;261", "wc_clarity_quality_novelty_and_reproducibility": "98;59;47", "wc_summary_review": "110;92;74", "wc_review": "512;389;543", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "354;607;1079", "reply_reviewers": "0;0;0", "reply_authors": "1;1;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 126.33333333333333, 25.170529504870483 ], "wc_strength_and_weaknesses_avg": [ 195.0, 51.27052434554055 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.0, 21.77154105707724 ], "wc_summary_review_avg": [ 92.0, 14.696938456699069 ], "wc_review_avg": [ 481.3333333333333, 66.50480350237034 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 680.0, 300.44744410073895 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16263482295549153413&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=7h5KSs2PCRi", "email": ";andrew.cmu.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "7hYCGFacpz", "title": "Renamer: A Transformer Architecture In-variant to Variable Renaming", "track": "main", "status": "Reject", "tldr": "", "abstract": "Modeling tasks often take inputs from languages including programming languages and natural language. Many such tasks involve learning functions which are invariant to certain types of input transformations. In this work we consider a specific class of invariance: semantics-preserving variable renaming. We first show that transformer networks trained on such tasks do not always mirror the invariance of the underlying function. In this work we propose Renamer, a transformer architecture which is invariant to semantics-preserving variable renaming. Renamer improves over a vanilla transformer by between a 24.79% to 52.80% reduction in error on a case study on learning a surrogate of a large-scale CPU simualtor. Furthermore, the invariant network does not experience the same sensitivity to variable renaming, and its error remains constant when evaluated on a variable renamed version of the test set. Finally, the invariant network is more efficient to train, and matches the best error of the vanilla network with a between 25.15% to 60.00% reduction in training epochs.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Zachary Ankner;Alex Renda;Michael Carbin", "authorids": "~Zachary_Ankner1;~Alex_Renda2;~Michael_Carbin1", "gender": ";M;M", "homepage": "https://zackankner.com/;https://alexrenda.com;http://people.csail.mit.edu/mcarbin/", "dblp": ";206/6568;07/3119", "google_scholar": "AaKpmFYAAAAJ;4BCuJ2AAAAAJ;mtejbKYAAAAJ", "orcid": "0000-0001-8307-3456;;", "linkedin": ";;", "or_profile": "~Zachary_Ankner1;~Alex_Renda2;~Michael_Carbin1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu", "position": "Undergrad student;PhD student;Associate Professor", "bibtex": "@misc{\nankner2023renamer,\ntitle={Renamer: A Transformer Architecture In-variant to Variable Renaming},\nauthor={Zachary Ankner and Alex Renda and Michael Carbin},\nyear={2023},\nurl={https://openreview.net/forum?id=7hYCGFacpz}\n}", "github": "", "project": "", "reviewers": "1jmm;KJQk;uTBf", "site": "https://openreview.net/forum?id=7hYCGFacpz", "pdf_size": 523240, "recommendation": "5;6;6", "confidence": "5;4;4", "correctness": "3;4;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "122;26;79", "wc_strength_and_weaknesses": "304;61;405", "wc_clarity_quality_novelty_and_reproducibility": "75;44;12", "wc_summary_review": "101;18;20", "wc_review": "602;149;516", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "663;33;638", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 75.66666666666667, 39.262648351270904 ], "wc_strength_and_weaknesses_avg": [ 256.6666666666667, 144.37066491808122 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.666666666666664, 25.72072229848057 ], "wc_summary_review_avg": [ 46.333333333333336, 38.663792996664064 ], "wc_review_avg": [ 422.3333333333333, 196.43885110186892 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 444.6666666666667, 291.2711604139498 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9999999999999998, "corr_recommendation_correctness": 0.9999999999999997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2653961014304750797&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "7hdmA0qtr5", "title": "scFormer: a universal representation learning approach for single-cell data using transformers", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Single-cell sequencing has emerged as a promising technique to decode cellular heterogeneity and analyze gene functions. With the high throughput of modern techniques and resulting large-scale sequencing data, deep learning has been used extensively to learn representations of individual cells for downstream tasks. However, most existing methods rely on fully connected networks and are unable to model complex relationships between both cell and gene representations. We hereby propose scFormer, a novel transformer-based deep learning framework to jointly optimize cell and gene embeddings for single-cell biology in an unsupervised manner. By drawing parallels between natural language processing and genomics, scFormer applies self-attention to learn salient gene and cell embeddings through masked gene modelling. scFormer provides a unified framework to readily address a variety of downstream tasks such as data integration, analysis of gene function, and perturbation response prediction. Extensive experiments using scFormer show state-of-the-art performance on seven datasets across the relevant tasks. The scFormer model implementation is available at https://anonymous.4open.science/r/scFormer-E7E4/", "keywords": "single-cell genomics;self-supervised learning;transformer", "primary_area": "", "supplementary_material": "", "author": "Haotian Cui;Chloe Wang;Hassaan Maan;Nan Duan;BO WANG", "authorids": "~Haotian_Cui1;~Chloe_Wang1;~Hassaan_Maan1;~Nan_Duan1;~BO_WANG11", "gender": "M;F;;M;M", "homepage": ";;https://hsmaan.com/;https://nanduan.github.io/;https://wanglab.ai/", "dblp": "206/7051;;;;", "google_scholar": ";;U9Z5rYMAAAAJ;Qaa6OxIAAAAJ;37FDILIAAAAJ", "orcid": ";;;;", "linkedin": ";https://ca.linkedin.com/in/chloe-xueqi-wang-979712158;;;", "or_profile": "~Haotian_Cui1;~Chloe_Wang1;~Hassaan_Maan1;~Nan_Duan1;~BO_WANG11", "aff": "Toronto University;University of Toronto;Microsoft Research;Microsoft Research Asia;Vector Institute", "aff_domain": "utoronto.ca;utoronto.ca;research.microsoft.com;microsoft.com;vectorinstitute.ai", "position": "PhD student;PhD student;Intern;Principal Researcher;Assistant Professor", "bibtex": "@misc{\ncui2023scformer,\ntitle={scFormer: a universal representation learning approach for single-cell data using transformers},\nauthor={Haotian Cui and Chloe Wang and Hassaan Maan and Nan Duan and BO WANG},\nyear={2023},\nurl={https://openreview.net/forum?id=7hdmA0qtr5}\n}", "github": "", "project": "", "reviewers": "evkH;bgoV;j75F;BBct", "site": "https://openreview.net/forum?id=7hdmA0qtr5", "pdf_size": 2726023, "recommendation": "1;3;3;6", "confidence": "3;5;4;4", "correctness": "1;2;3;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "73;63;207;162", "wc_strength_and_weaknesses": "799;242;65;129", "wc_clarity_quality_novelty_and_reproducibility": "101;47;81;35", "wc_summary_review": "77;47;467;353", "wc_review": "1050;399;820;679", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.25, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 1.118033988749895 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 126.25, 60.4870853984551 ], "wc_strength_and_weaknesses_avg": [ 308.75, 290.0537665675107 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.0, 26.324893162176366 ], "wc_summary_review_avg": [ 236.0, 178.92177061498134 ], "wc_review_avg": [ 737.0, 235.83150764899926 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.39605901719066966, "corr_recommendation_correctness": 0.9393364366277244, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4827781957854084436&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;1;2", "aff_unique_norm": "University of Toronto;Microsoft;Vector Institute", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "https://www.utoronto.ca;https://www.microsoft.com/en-us/research;https://vectorinstitute.ai/", "aff_unique_abbr": "U of T;MSR;Vector Institute", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;1;2;0", "aff_country_unique": "Canada;United States;China" }, { "id": "7hvbaJ1AbaM", "title": "Formal Interpretability with Merlin-Arthur Classifiers", "track": "main", "status": "Reject", "tldr": "We introduce a new type of interpretable classifier with theoretical guarantees based on the Merlin-Arthur protocol from Interactive Proof Systems.", "abstract": "We propose a new type of multi-agent interactive classifier that provides, for the first time, provable interpretability guarantees even for complex agents such as neural networks. In our setting, which is inspired by the Merlin-Arthur protocol from Interactive Proof Systems, two agents cooperate to provide a classification: the prover selects a small set of features as a certificate and presents it to the verifier who decides the class. A second, adversarial prover ensures the truthfulness of the system and allows us to connect the game-theoretic equilibrium between the provers and the verifier to guarantees on the exchanged features. We define completeness and soundness metrics to provide a lower bound on the mutual information between the features and the class. Our experiments demonstrate good agreement between theory and practice using neural network classifiers, and we show how our setup practically prevents manipulation.", "keywords": "interpretability;explainable ai", "primary_area": "", "supplementary_material": "/attachment/9a8fedec5344539d8e455ba0b06df51a91a49c96.zip", "author": "Stephan Waeldchen;Kartikey Sharma;Max Zimmer;Berkant Turan;Sebastian Pokutta", "authorids": "~Stephan_Waeldchen1;~Kartikey_Sharma1;~Max_Zimmer1;~Berkant_Turan1;~Sebastian_Pokutta1", "gender": "M;;;M;M", "homepage": "https://stephanw.net;;;https://b-turan.github.io/;http://www.pokutta.com", "dblp": "236/5739.html;;;;75/7718", "google_scholar": "me6yCJYAAAAJ;;;beC77-AAAAAJ;", "orcid": "0000-0001-7629-7021;;;;", "linkedin": "stwaeldchen/;;;;", "or_profile": "~Stephan_Waeldchen1;~Kartikey_Sharma1;~Max_Zimmer1;~Berkant_Turan1;~Sebastian_Pokutta1", "aff": "Zuse Institut Berlin;;;Zuse Institute Berlin;TU Berlin", "aff_domain": "zib.de;;;zib.de;tu-berlin.de", "position": "Researcher;;;PhD student;Full Professor", "bibtex": "@misc{\nwaeldchen2023formal,\ntitle={Formal Interpretability with Merlin-Arthur Classifiers},\nauthor={Stephan Waeldchen and Kartikey Sharma and Max Zimmer and Berkant Turan and Sebastian Pokutta},\nyear={2023},\nurl={https://openreview.net/forum?id=7hvbaJ1AbaM}\n}", "github": "", "project": "", "reviewers": "YnKy;LYB7;zBZW;Pt1p", "site": "https://openreview.net/forum?id=7hvbaJ1AbaM", "pdf_size": 1500278, "recommendation": "3;5;5;5", "confidence": "3;3;3;4", "correctness": "2;2;2;4", "technical_novelty": "2;3;3;2", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "59;170;54;292", "wc_strength_and_weaknesses": "332;385;1061;869", "wc_clarity_quality_novelty_and_reproducibility": "49;67;26;146", "wc_summary_review": "53;18;111;96", "wc_review": "493;640;1252;1403", "wc_reply_reviewers": "0;0;222;339", "wc_reply_authors": "386;255;1363;789", "reply_reviewers": "0;0;1;2", "reply_authors": "1;1;2;3", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 143.75, 97.34571125632603 ], "wc_strength_and_weaknesses_avg": [ 661.75, 311.3192694004019 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 72.0, 45.1275968781853 ], "wc_summary_review_avg": [ 69.5, 36.568429006453094 ], "wc_review_avg": [ 947.0, 387.72606309094056 ], "wc_reply_reviewers_avg": [ 140.25, 146.22307444449388 ], "wc_reply_authors_avg": [ 698.25, 431.3057934922739 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16135789163935892551&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Zuse Institut Berlin;Zuse Institute Berlin;Technische Universit\u00e4t Berlin", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zib.de;https://www.zib.de;https://www.tu-berlin.de", "aff_unique_abbr": "ZIB;ZIB;TU Berlin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berlin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Strong inductive biases provably prevent harmless interpolation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11125", "id": "7i6OZa7oij", "poster": "/media/PosterPDFs/ICLR%202023/11125.png?t=1683482760.7646763", "openreview": "https://openreview.net/forum?id=7i6OZa7oij", "slides": "https://iclr.cc/virtual/2023/poster/11125", "video": "https://iclr.cc/virtual/2023/poster/11125", "author_site": "Michael Aerni, Marco Milanta, Konstantin Donhauser, Fanny Yang", "tldr": "We show that the strength of a model\u2019s inductive bias determines whether interpolation of noisy data is harmless or harmful.", "abstract": "Classical wisdom suggests that estimators should avoid fitting noise to achieve good generalization. In contrast, modern overparameterized models can yield small test error despite interpolating noise \u2014 a phenomenon often called \"benign overfitting\" or \"harmless interpolation\". This paper argues that the degree to which interpolation is harmless hinges upon the strength of an estimator's inductive bias, i.e., how heavily the estimator favors solutions with a certain structure: while strong inductive biases prevent harmless interpolation, weak inductive biases can even require fitting noise to generalize well. Our main theoretical result establishes tight non-asymptotic bounds for high-dimensional kernel regression that reflect this phenomenon for convolutional kernels, where the filter size regulates the strength of the inductive bias. We further provide empirical evidence of the same behavior for deep neural networks with varying filter sizes and rotational invariance.", "keywords": "high-dimensional statistics;non-parametric regression;deep learning theory;generalization bounds;benign overfitting", "primary_area": "", "supplementary_material": "/attachment/1c032410c631166c047bb9518f4d0429ce6b2d9d.zip", "author": "Michael Aerni;Marco Milanta;Konstantin Donhauser;Fanny Yang", "authorids": "~Michael_Aerni1;~Marco_Milanta1;~Konstantin_Donhauser1;~Fanny_Yang1", "gender": "M;M;M;", "homepage": "https://www.michaelaerni.com/;https://mmilanta.github.io/;;http://www.fanny-yang.de", "dblp": "299/1497;;238/0076;126/4852", "google_scholar": "8LsoDewAAAAJ;https://scholar.google.com/citations?hl=it;;BfDKicQAAAAJ", "orcid": "0000-0003-3276-2678;0000-0002-2711-1830;;", "linkedin": "michael-aerni/;marco-milanta/;konstantin-donhauser-5a5704192/;", "or_profile": "~Michael_Aerni1;~Marco_Milanta1;~Konstantin_Donhauser1;~Fanny_Yang1", "aff": "ETH Zurich;;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "inf.ethz;;ethz.ch;ethz.ch", "position": "PhD student;;PhD student;Professor", "bibtex": "@inproceedings{\naerni2023strong,\ntitle={Strong inductive biases provably prevent harmless interpolation},\nauthor={Michael Aerni and Marco Milanta and Konstantin Donhauser and Fanny Yang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7i6OZa7oij}\n}", "github": "", "project": "", "reviewers": "KVHo;NQJb;fKcA", "pdf_size": 924415, "recommendation": "8;8;8", "confidence": "3;4;4", "correctness": "3;4;4", "technical_novelty": "4;3;3", "empirical_novelty": "3;0;3", "wc_summary_paper": "153;122;147", "wc_strength_and_weaknesses": "287;143;89", "wc_clarity_quality_novelty_and_reproducibility": "35;119;59", "wc_summary_review": "18;41;33", "wc_review": "493;425;328", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "709;328;1070", "reply_reviewers": "0;0;0", "reply_authors": "1;1;2", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 140.66666666666666, 13.424687043734844 ], "wc_strength_and_weaknesses_avg": [ 173.0, 83.57032966310472 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 71.0, 35.32704346531139 ], "wc_summary_review_avg": [ 30.666666666666668, 9.533566430716728 ], "wc_review_avg": [ 415.3333333333333, 67.70688459988557 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 702.3333333333334, 302.95690929386126 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5173396815349617962&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=7i6OZa7oij", "email": "inf.ethz;;ethz.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Chasing All-Round Graph Representation Robustness: Model, Training, and Optimization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10796", "id": "7jk5gWjC18M", "poster": "", "openreview": "https://openreview.net/forum?id=7jk5gWjC18M", "slides": "https://iclr.cc/virtual/2023/poster/10796", "video": "https://iclr.cc/virtual/2023/poster/10796", "author_site": "Chunhui Zhang, Yijun Tian, Mingxuan Ju, Zheyuan Liu, Yanfang Ye, Nitesh Chawla, Chuxu Zhang", "tldr": "We identify a fundamental issue in graph adversarial learning and then propose a novel method to enlarge the model capacity and enrich the representation diversity of adversarial samples.", "abstract": "Graph Neural Networks (GNNs) have achieved state-of-the-art results on a variety of graph learning tasks, however, it has been demonstrated that they are vulnerable to adversarial attacks, raising serious security concerns. A lot of studies have been developed to train GNNs in a noisy environment and increase their robustness against adversarial attacks. However, existing methods have not uncovered a principled difficulty: the convoluted mixture distribution between clean and attacked data samples, which leads to sub-optimal model design and limits their frameworks\u2019 robustness. In this work, we first begin by identifying the root cause of mixture distribution, then, for tackling it, we propose a novel method GAME - Graph Adversarial Mixture of Experts to enlarge the model capacity and enrich the representation diversity of adversarial samples, from three perspectives of model, training, and optimization. Specifically, we first propose a plug-and- play GAME layer that can be easily incorporated into any GNNs and enhance their adversarial learning capabilities. Second, we design a decoupling-based graph adversarial training in which the component of the model used to generate adversarial graphs is separated from the component used to update weights. Third, we introduce a graph diversity regularization that enables the model to learn diverse representation and further improves model performance. Extensive experiments demonstrate the effectiveness and advantages of GAME over the state-of-the-art adversarial training methods across various datasets given different attacks.", "keywords": "Graph neural networks;Mixture of experts;Graph adversarial learning", "primary_area": "", "supplementary_material": "", "author": "Chunhui Zhang;Yijun Tian;Mingxuan Ju;Zheyuan Liu;Yanfang Ye;Nitesh Chawla;Chuxu Zhang", "authorids": "~Chunhui_Zhang1;~Yijun_Tian1;~Mingxuan_Ju1;~Zheyuan_Liu3;~Yanfang_Ye1;~Nitesh_Chawla1;~Chuxu_Zhang2", "gender": "M;;M;M;;M;", "homepage": "https://chunhuizng.github.io;https://www.yijuntian.com/;https://jumxglhf.github.io;https://franciscoliu.github.io/;http://yes-lab.org/;http://niteshchawla.nd.edu;", "dblp": "62/3401;234/9123-1;234/2715;191/0249-10;;c/NiteshVChawla.html;", "google_scholar": "https://scholar.google.com.hk/citations?user=jlqnbkAAAAAJ;dbaBgV0AAAAJ;qNoO67AAAAAJ;NLA-nSUAAAAJ;egjr888AAAAJ;hDLBEhkAAAAJ;", "orcid": ";0000-0003-2795-6080;0009-0008-9054-3856;0000-0001-7809-4586;;;", "linkedin": "chunhui-zhang-541827161/;yijun-tian/;;zheyuan-frank-liu-371738185/;;;", "or_profile": "~Chunhui_Zhang1;~Yijun_Tian1;~Mingxuan_Ju1;~Zheyuan_Liu3;~Yanfang_Ye1;~Nitesh_Chawla1;~Chuxu_Zhang2", "aff": "Brandeis University;University of Notre Dame;University of Notre Dame;Brandeis University;University of Notre Dame;University of Notre Dame;", "aff_domain": "brandeis.edu;nd.edu;nd.edu;brandeis.edu;nd.edu;nd.edu;", "position": "MS student;PhD student;PhD student;Undergrad student;Associate Professor;Full Professor;", "bibtex": "@inproceedings{\nzhang2023chasing,\ntitle={Chasing All-Round Graph Representation Robustness: Model, Training, and Optimization},\nauthor={Chunhui Zhang and Yijun Tian and Mingxuan Ju and Zheyuan Liu and Yanfang Ye and Nitesh Chawla and Chuxu Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7jk5gWjC18M}\n}", "github": "", "project": "", "reviewers": "SpxS;jUru;iMjk;QLnJ", "pdf_size": 2275149, "recommendation": "5;8;8;8", "confidence": "5;4;4;4", "correctness": "3;4;4;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "43;93;77;53", "wc_strength_and_weaknesses": "89;314;401;449", "wc_clarity_quality_novelty_and_reproducibility": "520;37;20;23", "wc_summary_review": "36;47;26;42", "wc_review": "688;491;524;567", "wc_reply_reviewers": "538;0;0;0", "wc_reply_authors": "3878;487;442;626", "reply_reviewers": "2;0;0;0", "reply_authors": "7;1;1;1", "recommendation_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 66.5, 19.665960439297137 ], "wc_strength_and_weaknesses_avg": [ 313.25, 138.21789862387578 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 150.0, 213.7159329577465 ], "wc_summary_review_avg": [ 37.75, 7.8222439235810075 ], "wc_review_avg": [ 567.5, 74.60730527233912 ], "wc_reply_reviewers_avg": [ 134.5, 232.960833618014 ], "wc_reply_authors_avg": [ 1358.25, 1456.358536727821 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 2.598076211353316 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11065790509935314037&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=7jk5gWjC18M", "email": "brandeis.edu;nd.edu;nd.edu;brandeis.edu;nd.edu;nd.edu;", "author_num": 7, "aff_unique_index": "0;1;1;0;1;1", "aff_unique_norm": "Brandeis University;University of Notre Dame", "aff_unique_dep": ";", "aff_unique_url": "https://www.brandeis.edu;https://www.nd.edu", "aff_unique_abbr": "Brandeis;Notre Dame", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "7kpmIkHVpHu", "title": "Hyperbolic Contrastive Learning for Visual Representations beyond Objects", "track": "main", "status": "Withdraw", "tldr": "We use hyperbolic objective to learn scene-object hypernymy, and show significant improvements for multiple datasets across multiple SSL tasks.", "abstract": " Although self-/un-supervised methods have led to rapid progress in visual representation learning, these methods generally treat objects and scenes using the same lens. In this paper, we focus on learning representations for objects and scenes \\that preserve the structure among them.\n Motivated by the observation that visually similar objects are close in the representation space, we argue that the scenes and objects should instead follow a hierarchical structure based on their compositionality. To exploit such a structure, we propose a contrastive learning framework where a Euclidean loss is used to learn object representations and a hyperbolic loss is used to encourage representations of scenes to lie close to representations of their constituent objects in a hyperbolic space. This novel hyperbolic objective encourages the scene-object hypernymy among the representations by optimizing the magnitude of their norms. We show that when pretraining on the COCO and OpenImages datasets, the hyperbolic loss improves downstream performance of several baselines across multiple datasets and tasks, including image classification, object detection, and semantic segmentation. We also show that the properties of the learned representations allow us to solve various vision tasks that involve the interaction between scenes and objects in a zero-shot way.", "keywords": "Self Supervised learning for Scene images;Hyperbolic objective;Hierarchical scene-object structure.", "primary_area": "", "supplementary_material": "", "author": "Songwei Ge;Shlok Kumar Mishra;Simon Kornblith;Chun-Liang Li;David Jacobs", "authorids": "~Songwei_Ge2;~Shlok_Kumar_Mishra1;~Simon_Kornblith1;~Chun-Liang_Li1;~David_Jacobs1", "gender": ";M;M;M;M", "homepage": "https://songweige.github.io/;https://shlokk.github.io/shlokmishra.github.io/;;http://chunliangli.github.io;http://www.cs.umd.edu/~djacobs", "dblp": "228/2581;173/6664;220/4059;;j/DavidWJacobs.html", "google_scholar": "https://scholar.google.com/scholar?hl=en;6XJ-4S0AAAAJ;1O3RPmsAAAAJ;https://scholar.google.com.tw/citations?user=vqHIt_sAAAAJ;WH2KmRgAAAAJ", "orcid": ";;;;", "linkedin": ";shlokk/;;;", "or_profile": "~Songwei_Ge2;~Shlok_Kumar_Mishra1;~Simon_Kornblith1;~Chun-Liang_Li1;~David_W._Jacobs1", "aff": "NVIDIA;University of Maryland, College Park;Google;Google;University of Maryland, College Park", "aff_domain": "nvidia.com;umd.edu;google.com;google.com;umd.edu", "position": "Intern;PhD student;Research Scientist;Researcher;Professor", "bibtex": "@misc{\nge2023hyperbolic,\ntitle={Hyperbolic Contrastive Learning for Visual Representations beyond Objects},\nauthor={Songwei Ge and Shlok Kumar Mishra and Simon Kornblith and Chun-Liang Li and David Jacobs},\nyear={2023},\nurl={https://openreview.net/forum?id=7kpmIkHVpHu}\n}", "github": "", "project": "", "reviewers": "dJwh;E5tj;rgJz;iaEV", "site": "https://openreview.net/forum?id=7kpmIkHVpHu", "pdf_size": 4167281, "recommendation": "3;5;5;5", "confidence": "3;4;3;4", "correctness": "2;3;3;2", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "59;46;82;40", "wc_strength_and_weaknesses": "42;227;509;688", "wc_clarity_quality_novelty_and_reproducibility": "303;12;50;193", "wc_summary_review": "65;33;59;91", "wc_review": "469;318;700;1012", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 56.75, 16.11482236948332 ], "wc_strength_and_weaknesses_avg": [ 366.5, 249.2132620869122 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 139.5, 116.03986383997527 ], "wc_summary_review_avg": [ 62.0, 20.615528128088304 ], "wc_review_avg": [ 624.75, 261.71489735970323 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 70, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6750819842213893633&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;2;2;1", "aff_unique_norm": "NVIDIA;University of Maryland;Google", "aff_unique_dep": "NVIDIA Corporation;;Google", "aff_unique_url": "https://www.nvidia.com;https://www/umd.edu;https://www.google.com", "aff_unique_abbr": "NVIDIA;UMD;Google", "aff_campus_unique_index": "1;2;2;1", "aff_campus_unique": ";College Park;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "7ks5PS09q1", "title": "Quasi-Taylor Samplers for Diffusion Generative Models based on Ideal Derivatives", "track": "main", "status": "Reject", "tldr": "Taylor-expansion approach for diffusion generative models is discussed.", "abstract": "Diffusion generative models have emerged as a new challenger to popular deep neural generative models such as GANs, but have the drawback that they often require a huge number of neural function evaluations (NFEs) during synthesis unless some sophisticated sampling strategies are employed. This paper proposes new efficient samplers based on the numerical schemes derived by the familiar Taylor expansion, which directly solves the ODE/SDE of interest. In general, it is not easy to compute the derivatives that are required in higher-order Taylor schemes, but in the case of diffusion models, this difficulty is alleviated by the trick that the authors call ``ideal derivative substitution,'' in which the higher-order derivatives are replaced by tractable ones. To derive ideal derivatives, the authors argue the ``single point approximation,'' in which the true score function is approximated by a conditional one, holds in many cases, and considered the derivatives of this approximation. Applying thus obtained new quasi-Taylor samplers to image generation tasks, the authors experimentally confirmed that the proposed samplers could synthesize plausible images in small number of NFEs, and that the performance was better or at the same level as DDIM and Runge-Kutta methods. The paper also argues the relevance of the proposed samplers to the existing ones mentioned above. ", "keywords": "diffusion models;score based models;neural generative models", "primary_area": "", "supplementary_material": "", "author": "Hideyuki Tachibana;Mocho Go;Muneyoshi Inahara;Yotaro Katayama;Yotaro Watanabe", "authorids": "~Hideyuki_Tachibana1;~Mocho_Go1;m_inahara@pkshatech.com;~Yotaro_Katayama1;y_watanabe@pkshatech.com", "gender": "M;M;;M;", "homepage": "https://tachi-hi.github.io/index.html;;;;", "dblp": "20/8053;309/8912;;;", "google_scholar": "https://scholar.google.co.jp/citations?user=wAXtttwAAAAJ;;;;", "orcid": "0000-0001-5162-1294;;;0000-0003-2645-5938;", "linkedin": "hideyuki-tachibana-5a513451;;;;", "or_profile": "~Hideyuki_Tachibana1;~Mocho_Go1;m_inahara@pkshatech.com;~Yotaro_Katayama1;y_watanabe@pkshatech.com", "aff": "PKSHA Technology Inc.;PKSHA Technology Inc.;;;", "aff_domain": "pkshatech.com;pkshatech.com;;;", "position": "Researcher;Researcher;;;", "bibtex": "@misc{\ntachibana2023quasitaylor,\ntitle={Quasi-Taylor Samplers for Diffusion Generative Models based on Ideal Derivatives},\nauthor={Hideyuki Tachibana and Mocho Go and Muneyoshi Inahara and Yotaro Katayama and Yotaro Watanabe},\nyear={2023},\nurl={https://openreview.net/forum?id=7ks5PS09q1}\n}", "github": "", "project": "", "reviewers": "rnag;jtaQ;Qqb3;hEQV", "site": "https://openreview.net/forum?id=7ks5PS09q1", "pdf_size": 18874807, "recommendation": "3;3;5;5", "confidence": "4;4;4;4", "correctness": "3;2;3;2", "technical_novelty": "3;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "127;97;43;45", "wc_strength_and_weaknesses": "589;132;156;343", "wc_clarity_quality_novelty_and_reproducibility": "83;211;23;28", "wc_summary_review": "29;58;0;26", "wc_review": "828;498;222;442", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1032;1167;527;844", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 78.0, 35.62302626111375 ], "wc_strength_and_weaknesses_avg": [ 305.0, 183.1870628619827 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 86.25, 75.77392361492177 ], "wc_summary_review_avg": [ 28.25, 20.54720175595694 ], "wc_review_avg": [ 497.5, 216.9164585733411 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 892.5, 240.18378379899005 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13988674658173300826&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "PKSHA Technology Inc.", "aff_unique_dep": "", "aff_unique_url": "https://www.pkshatech.com", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "id": "7lvuPvDNhI4", "title": "Unified Algorithms for RL with Decision-Estimation Coefficients: No-Regret, PAC, and Reward-Free Learning", "track": "main", "status": "Reject", "tldr": "We design new unified algorithms for no-regret, PAC, and reward-free reinforcement learning with general model classes, building on the Decision-Estimation Coefficient and a strong model estimation procedure.", "abstract": "Finding unified complexity measures and algorithms for sample-efficient learning is a central topic of research in reinforcement learning (RL). The Decision-Estimation Coefficient (DEC) is recently proposed by Foster et al. (2021) as a necessary and sufficient complexity measure for sample-efficient no-regret RL. This paper makes progress towards a unified theory for RL with the DEC framework. First, we propose two new DEC-type complexity measures: Explorative DEC (EDEC), and Reward-Free DEC (RFDEC). We show that they are necessary and sufficient for sample-efficient PAC learning and reward-free learning, thereby extending the original DEC which only captures no-regret learning. Next, we design new unified sample-efficient algorithms for all three learning goals. Our algorithms instantiate variants of the Estimation-To-Decisions (E2D) meta-algorithm with a strong and general model estimation subroutine. Even in the no-regret setting, our algorithm \\textsc{E2D-TA} improves upon the algorithms of Foster et al. (2021) which require either bounding a variant of the DEC which may be prohibitively large, or designing problem-specific estimation subroutines. As applications, we recover existing and obtain new sample-efficient learning results for a wide range of tractable RL problems using essentially a single algorithm. Finally, as a connection, we re-analyze two existing optimistic model-based algorithms based on Posterior Sampling or Maximum Likelihood Estimation, showing that they enjoy similar regret bounds as \\textsc{E2D-TA} under similar structural conditions as the DEC.", "keywords": "reinforcement learning theory;decision-estimation coefficient;function approximation", "primary_area": "", "supplementary_material": "", "author": "Fan Chen;Song Mei;Yu Bai", "authorids": "~Fan_Chen4;~Song_Mei1;~Yu_Bai1", "gender": "M;M;", "homepage": "https://sites.google.com/view/chen-fan;https://www.stat.berkeley.edu/~songmei/;https://yubai.org", "dblp": ";https://dblp.org/pers/hd/m/Mei:Song;03/6325-17.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?hl=en;owqhKD8AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Fan_Chen4;~Song_Mei1;~Yu_Bai1", "aff": "Peking University;University of California, Berkeley;Salesforce Research", "aff_domain": "pku.edu.cn;berkeley.edu;salesforce.com", "position": "Undergrad student;Assistant Professor;Research Scientist", "bibtex": "@misc{\nchen2023unified,\ntitle={Unified Algorithms for {RL} with Decision-Estimation Coefficients: No-Regret, {PAC}, and Reward-Free Learning},\nauthor={Fan Chen and Song Mei and Yu Bai},\nyear={2023},\nurl={https://openreview.net/forum?id=7lvuPvDNhI4}\n}", "github": "", "project": "", "reviewers": "tzo7;Tm8H;oWcL", "site": "https://openreview.net/forum?id=7lvuPvDNhI4", "pdf_size": 754762, "recommendation": "5;5;5", "confidence": "4;3;3", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "0;0;0", "wc_summary_paper": "22;48;99", "wc_strength_and_weaknesses": "241;109;330", "wc_clarity_quality_novelty_and_reproducibility": "10;39;31", "wc_summary_review": "14;39;115", "wc_review": "287;235;575", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "572;311;605", "reply_reviewers": "0;0;0", "reply_authors": "2;2;2", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 56.333333333333336, 31.98263417682929 ], "wc_strength_and_weaknesses_avg": [ 226.66666666666666, 90.79035680560402 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.666666666666668, 12.229290885229428 ], "wc_summary_review_avg": [ 56.0, 42.949582846247374 ], "wc_review_avg": [ 365.6666666666667, 149.5355773349235 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 496.0, 131.5066538240556 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18349282512743054619&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Peking University;University of California, Berkeley;Salesforce", "aff_unique_dep": ";;Salesforce Research", "aff_unique_url": "http://www.pku.edu.cn;https://www.berkeley.edu;https://research.salesforce.com", "aff_unique_abbr": "Peking U;UC Berkeley;Salesforce", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "Mitigating Dataset Bias by Using Per-Sample Gradient", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11518", "id": "7mgUec-7GMv", "poster": "", "openreview": "https://openreview.net/forum?id=7mgUec-7GMv", "slides": "https://iclr.cc/virtual/2023/poster/11518", "video": "https://iclr.cc/virtual/2023/poster/11518", "author_site": "Sumyeong Ahn, SeongYoon Kim, Se-Young Yun", "tldr": "We solve the dataset bias problem by using the per-sample gradient. Furthermore, we provide the mathematical background of the proposed algorithm.", "abstract": "The performance of deep neural networks is strongly influenced by the training dataset setup. In particular, when attributes with a strong correlation with the target attribute are present, the trained model can provide unintended prejudgments and show significant inference errors (i.e., the dataset bias problem). Various methods have been proposed to mitigate dataset bias, and their emphasis is on weakly correlated samples, called bias-conflicting samples. These methods are based on explicit bias labels provided by humans. However, such methods require human costs. Recently, several studies have sought to reduce human intervention by utilizing the output space values of neural networks, such as feature space, logits, loss, or accuracy. However, these output space values may be insufficient for the model to understand the bias attributes well. In this study, we propose a debiasing algorithm leveraging gradient called Per-sample Gradient-based Debiasing (PGD). PGD is comprised of three steps: (1) training a model on uniform batch sampling, (2) setting the importance of each sample in proportion to the norm of the sample gradient, and (3) training the model using importance-batch sampling, whose probability is obtained in step (2). Compared with existing baselines for various datasets, the proposed method showed state-of-the-art accuracy for the classification task. Furthermore, we describe theoretical understandings of how PGD can mitigate dataset bias. \n", "keywords": "Dataset bias;Debiasing;Gradient-norm based debiasing", "primary_area": "", "supplementary_material": "", "author": "Sumyeong Ahn;Seongyoon Kim;Se-Young Yun", "authorids": "~Sumyeong_Ahn1;~Seongyoon_Kim1;~Se-Young_Yun1", "gender": "M;M;M", "homepage": "https://sumyeongahn.github.io;https://github.com/curisam;https://fbsqkd.github.io", "dblp": "217/5462;;23/8862", "google_scholar": "krxhvIYAAAAJ;;X_IAjb8AAAAJ", "orcid": ";;", "linkedin": ";;seyoung-yun-395130ab/", "or_profile": "~Sumyeong_Ahn1;~Seongyoon_Kim1;~Se-Young_Yun1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;KAIST", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nahn2023mitigating,\ntitle={Mitigating Dataset Bias by Using Per-Sample Gradient},\nauthor={Sumyeong Ahn and Seongyoon Kim and Se-Young Yun},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7mgUec-7GMv}\n}", "github": "", "project": "", "reviewers": "Lo2Z;oD49;jyHT", "pdf_size": 4953730, "recommendation": "8;8;8", "confidence": "3;4;3", "correctness": "4;3;3", "technical_novelty": "4;3;2", "empirical_novelty": "4;3;2", "wc_summary_paper": "33;143;59", "wc_strength_and_weaknesses": "68;394;185", "wc_clarity_quality_novelty_and_reproducibility": "12;238;27", "wc_summary_review": "35;93;62", "wc_review": "148;868;333", "wc_reply_reviewers": "0;316;83", "wc_reply_authors": "72;2198;585", "reply_reviewers": "0;2;2", "reply_authors": "1;5;3", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 78.33333333333333, 46.942044646090515 ], "wc_strength_and_weaknesses_avg": [ 215.66666666666666, 134.84394272227763 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 92.33333333333333, 103.18376271918412 ], "wc_summary_review_avg": [ 63.333333333333336, 23.697163449568293 ], "wc_review_avg": [ 449.6666666666667, 305.2958492275248 ], "wc_reply_reviewers_avg": [ 133.0, 133.76347284167926 ], "wc_reply_authors_avg": [ 951.6666666666666, 905.8338086475294 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.9428090415820634 ], "reply_authors_avg": [ 3.0, 1.632993161855452 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11468249995162835258&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=7mgUec-7GMv", "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "7o6iMO1gkeJ", "title": "DetectBench: An Object Detection Benchmark for OOD Generalization Algorithms", "track": "main", "status": "Reject", "tldr": "", "abstract": "The consensus about practical machine learning tasks, such as object detection, is still the test data are drawn from the same distribution as the training data, which is known as IID (Independent and Identically Distributed). However, it can not avoid being confronted with OOD (Out-of-Distribution) scenarios in real practice. It is risky to apply an object detection algorithm without figuring out its OOD generalization performance. On the other hand, a plethora of OOD generalization algorithms has been proposed to amortize the gap between the in-house and open-world performances of machine learning systems. However, their effectiveness was only demonstrated in the image classification tasks. It is still an opening question of how these algorithms perform on complex and practical tasks. In this paper, we first specify the setting of OOD-OD (OOD generalization object detection). Then, we propose DetectBench consisting of four OOD-OD benchmark datasets to evaluate various object detection and OOD generalization algorithms. From extensive experiments on DetectBench, we find that existing OOD generalization algorithms fail dramatically when applied to the more practical object detection tasks. This raises questions over the current progress on a large number of these algorithms and whether they can be effective in practice beyond simple toy examples. For future work, we sincerely hope that DetectBench can serve as a foothold for OOD-OD research.", "keywords": "Out-of-Distribution;object detection;benchmark", "primary_area": "", "supplementary_material": "/attachment/5701d22c789a414ff40d79849195eaffdc7e33a3.zip", "author": "Fan Wu;Nanyang Ye;Lanqing HONG;Chensheng Peng;Bikang Pan;Huaihai Lyu;Heyuan Shi", "authorids": "~Fan_Wu14;~Nanyang_Ye1;~Lanqing_HONG1;~Chensheng_Peng1;~Bikang_Pan1;~Huaihai_Lyu1;~Heyuan_Shi1", "gender": ";;F;;M;M;M", "homepage": ";;https://racheltechie.github.io/;;https://panbikang.github.io/home/;https://github.com/huaihailv;https://shiheyuan.github.io/", "dblp": ";175/2581;226/4258;;;;192/6867", "google_scholar": ";;https://scholar.google.com.sg/citations?user=2p7x6OUAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;;;;;0000-0002-9040-7247", "linkedin": ";;;;;;", "or_profile": "~Fan_Wu14;~Nanyang_Ye1;~Lanqing_HONG1;~Chensheng_Peng1;~Bikang_Pan1;~Huaihai_Lyu1;~Heyuan_Shi1", "aff": ";Shanghai Jiaotong University;Huawei Technologies Ltd.;;ShanghaiTech University;University of Chinese Academy of Sciences;Central South University", "aff_domain": ";sjtu.edu.cn;huawei.com;;shanghaitech.edu.cn;ucas.ac.cn;csu.edu.cn", "position": ";Assistant Professor;Researcher;;Undergrad student;Undergrad student;Associate Professor", "bibtex": "@misc{\nwu2023detectbench,\ntitle={DetectBench: An Object Detection Benchmark for {OOD} Generalization Algorithms},\nauthor={Fan Wu and Nanyang Ye and Lanqing HONG and Chensheng Peng and Bikang Pan and Huaihai Lyu and Heyuan Shi},\nyear={2023},\nurl={https://openreview.net/forum?id=7o6iMO1gkeJ}\n}", "github": "", "project": "", "reviewers": "aJ6J;Sn2d;JACh;m6Le", "site": "https://openreview.net/forum?id=7o6iMO1gkeJ", "pdf_size": 22758892, "recommendation": "3;3;6;8", "confidence": "4;4;4;4", "correctness": "3;2;3;4", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "151;84;74;182", "wc_strength_and_weaknesses": "763;330;269;652", "wc_clarity_quality_novelty_and_reproducibility": "16;124;72;131", "wc_summary_review": "77;19;29;82", "wc_review": "1007;557;444;1047", "wc_reply_reviewers": "183;656;0;0", "wc_reply_authors": "1355;2061;319;205", "reply_reviewers": "1;3;0;0", "reply_authors": "3;5;1;1", "recommendation_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 122.75, 45.240330458563186 ], "wc_strength_and_weaknesses_avg": [ 503.5, 208.85700850103163 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 85.75, 46.27296727031886 ], "wc_summary_review_avg": [ 51.75, 28.030117730755254 ], "wc_review_avg": [ 763.75, 266.63962102433317 ], "wc_reply_reviewers_avg": [ 209.75, 268.25582472706907 ], "wc_reply_authors_avg": [ 985.0, 765.9360286603575 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8333333333333334, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3957218932618486450&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Shanghai Jiao Tong University;Huawei;ShanghaiTech University;University of Chinese Academy of Sciences;Central South University", "aff_unique_dep": ";Huawei Technologies;;;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.huawei.com;https://www.shanghaitech.edu.cn;http://www.ucas.ac.cn;https://www.csu.edu.cn", "aff_unique_abbr": "SJTU;Huawei;ShanghaiTech;UCAS;CSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Certified Training: Small Boxes are All You Need", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10739", "id": "7oFuxtJtUMH", "poster": "", "openreview": "https://openreview.net/forum?id=7oFuxtJtUMH", "slides": "https://iclr.cc/virtual/2023/poster/10739", "video": "https://iclr.cc/virtual/2023/poster/10739", "author_site": "Mark N M\u00fcller, Franziska Eckert, Marc Fischer, Martin Vechev", "tldr": "We propose a novel certified training method based on propagating small input regions, establishing a new state of the art for certified accuracy.", "abstract": "To obtain, deterministic guarantees of adversarial robustness, specialized training methods are used. We propose, SABR, a novel such certified training method, based on the key insight that propagating interval bounds for a small but carefully selected subset of the adversarial input region is sufficient to approximate the worst-case loss over the whole region while significantly reducing approximation errors. We show in an extensive empirical evaluation that SABR outperforms existing certified defenses in terms of both standard and certifiable accuracies across perturbation magnitudes and datasets, pointing to a new class of certified training methods promising to alleviate the robustness-accuracy trade-off.", "keywords": "Certified Training;Certified Robustness;Adversarial Robustness;Robustness Verification", "primary_area": "", "supplementary_material": "/attachment/741a992c2ae6a717f45866c6ee3328d96fde92e7.zip", "author": "Mark Niklas Mueller;Franziska Eckert;Marc Fischer;Martin Vechev", "authorids": "~Mark_Niklas_Mueller2;~Franziska_Eckert1;~Marc_Fischer1;~Martin_Vechev1", "gender": "M;F;M;M", "homepage": "https://www.sri.inf.ethz.ch/people/mark;;;https://www.sri.inf.ethz.ch/people/martin", "dblp": "287/4254;;37/9373-2;93/2189.html", "google_scholar": "RBpmcCAAAAAJ;;;https://scholar.google.ch/citations?user=aZ1Rh50AAAAJ", "orcid": "0000-0002-2496-6542;;;", "linkedin": "mark-m%C3%BCller-8bb4b1140/;franziska-eckert-a1a475187/;;", "or_profile": "~Mark_Niklas_Mueller2;~Franziska_Eckert1;~Marc_Fischer1;~Martin_Vechev1", "aff": "Swiss Federal Institute of Technology;;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;;ethz.ch;ethz.ch", "position": "PhD student;;PhD student;Full Professor", "bibtex": "@inproceedings{\nmueller2023certified,\ntitle={Certified Training: Small Boxes are All You Need},\nauthor={Mark Niklas Mueller and Franziska Eckert and Marc Fischer and Martin Vechev},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7oFuxtJtUMH}\n}", "github": "", "project": "", "reviewers": "Bdkn;1PLc;6kss;mQ2H", "pdf_size": 1383570, "recommendation": "6;8;8;8", "confidence": "4;3;4;4", "correctness": "3;3;4;4", "technical_novelty": "3;3;4;4", "empirical_novelty": "2;3;4;4", "wc_summary_paper": "78;42;199;73", "wc_strength_and_weaknesses": "306;340;16;89", "wc_clarity_quality_novelty_and_reproducibility": "24;70;652;501", "wc_summary_review": "69;30;48;18", "wc_review": "477;482;915;681", "wc_reply_reviewers": "156;26;70;35", "wc_reply_authors": "729;430;709;438", "reply_reviewers": "1;1;3;1", "reply_authors": "1;1;2;2", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 98.0, 59.92078103629825 ], "wc_strength_and_weaknesses_avg": [ 187.75, 138.21428110003683 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 311.75, 270.5682677255409 ], "wc_summary_review_avg": [ 41.25, 19.253246479490155 ], "wc_review_avg": [ 638.75, 179.46639657607216 ], "wc_reply_reviewers_avg": [ 71.75, 51.34381657025508 ], "wc_reply_authors_avg": [ 576.5, 142.70336366042673 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15074378880052440116&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=7oFuxtJtUMH", "email": "ethz.ch;;ethz.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Swiss Federal Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "DynaMS: Dyanmic Margin Selection for Efficient Deep Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11185", "id": "7oPAgqxNb20", "poster": "/media/PosterPDFs/ICLR%202023/11185.png?t=1682962592.0985887", "openreview": "https://openreview.net/forum?id=7oPAgqxNb20", "slides": "https://iclr.cc/virtual/2023/poster/11185", "video": "https://iclr.cc/virtual/2023/poster/11185", "author_site": "Jiaxing Wang, Yong Li, Jingwei Zhuo, Xupeng Shi, WEIZHONG ZHANG, Lixing Gong, Tong Tao, pengzhang liu, Yongjun Bao, Weipeng Yan", "tldr": "A general dynamic data selection framework for efficient deep neural network training that enjoy both theoretical and practical advantages.", "abstract": "The great success of deep learning is largely driven by training over-parameterized models on massive datasets. To avoid excessive computation, extracting and training only on the most informative subset is drawing increasing attention. Nevertheless, it is still an open question how to select such a subset on which the model trained generalizes on par with the full data. In this paper, we propose dynamic margin selection (DynaMS). DynaMS leverages the distance from candidate samples to the classification boundary to construct the subset, and the subset is dynamically updated during model training. We show that DynaMS converges with large probability, and for the first time show both in theory and practice that dynamically updating the subset can result in better generalization over previous works. To reduce the additional computation incurred by the selection, a light parameter sharing proxy (PSP) is designed. PSP is able to faithfully evaluate instances with respect to the current model, which is necessary for dynamic selection. Extensive analysis and experiments demonstrate the superiority of the proposed approach in data selection against many state-of-the-art counterparts on benchmark datasets.", "keywords": "efficient training;data selection", "primary_area": "", "supplementary_material": "/attachment/30eeab0b6f5f8b2c2fd949ef7de4a2d682b88e63.zip", "author": "Jiaxing Wang;Yong Li;Jingwei Zhuo;Xupeng Shi;WEIZHONG ZHANG;Lixing Gong;Tong Tao;Pengzhang Liu;Yongjun Bao;Weipeng Yan", "authorids": "~Jiaxing_Wang1;~Yong_Li15;~Jingwei_Zhuo1;~Xupeng_Shi1;~WEIZHONG_ZHANG2;~Lixing_Gong1;taotong@jd.com;liupengzhang@jd.com;~Yongjun_Bao2;paul.yan@jd.com", "gender": "M;M;M;M;;M;;;M;", "homepage": ";http://www.foreverlee.net/;http://ml.cs.tsinghua.edu.cn/~jingwei/;;;https://www.linkedin.com/in/lance-k-04b95912b/;;;;", "dblp": ";93/2334-34.html;165/3129;;;;;;52/5055.html;", "google_scholar": "https://scholar.google.com.hk/citations?user=EHGfstcAAAAJ;aOhO0jMAAAAJ;z1ZMZWIAAAAJ;https://scholar.google.com/citations?hl=en;;;;;;", "orcid": ";;;;;;;;;", "linkedin": ";;;;;lance-k-04b95912b/;;;;", "or_profile": "~Jiaxing_Wang1;~Yong_Li15;~Jingwei_Zhuo1;~Xupeng_Shi1;~WEIZHONG_ZHANG2;~Lixing_Gong1;taotong@jd.com;liupengzhang@jd.com;~Yongjun_Bao2;paul.yan@jd.com", "aff": "JD.com;;Alibaba Group;Huawei Technologies Ltd.;;JingDong;;;;", "aff_domain": "jd.com;;alibaba-inc.com;huawei.com;;jd.com;;;;", "position": "Researcher;;Researcher;Principal Researcher;;Researcher;;;;", "bibtex": "@inproceedings{\nwang2023dynams,\ntitle={Dyna{MS}: Dyanmic Margin Selection for Efficient Deep Learning},\nauthor={Jiaxing Wang and Yong Li and Jingwei Zhuo and Xupeng Shi and WEIZHONG ZHANG and Lixing Gong and Tong Tao and Pengzhang Liu and Yongjun Bao and Weipeng Yan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7oPAgqxNb20}\n}", "github": "", "project": "", "reviewers": "UJJg;6uhW;8YeC;hmw5", "pdf_size": 1982772, "recommendation": "3;3;5;6", "confidence": "3;4;4;4", "correctness": "2;3;3;3", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "70;71;58;58", "wc_strength_and_weaknesses": "182;170;118;167", "wc_clarity_quality_novelty_and_reproducibility": "35;24;14;32", "wc_summary_review": "19;33;83;54", "wc_review": "306;298;273;311", "wc_reply_reviewers": "0;0;0;54", "wc_reply_authors": "586;530;430;482", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;2;2", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 64.25, 6.2599920127744575 ], "wc_strength_and_weaknesses_avg": [ 159.25, 24.468091466234142 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.25, 8.13557004763649 ], "wc_summary_review_avg": [ 47.25, 24.107830678018296 ], "wc_review_avg": [ 297.0, 14.611639196202457 ], "wc_reply_reviewers_avg": [ 13.5, 23.382685902179844 ], "wc_reply_authors_avg": [ 507.0, 57.7148161220323 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": 0.5555555555555555, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1007524047402446821&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=7oPAgqxNb20", "email": "jd.com;;alibaba-inc.com;huawei.com;;jd.com;;;;", "author_num": 10, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "JD.com;Alibaba Group;Huawei", "aff_unique_dep": ";;Huawei Technologies", "aff_unique_url": "https://www.jd.com;https://www.alibaba.com;https://www.huawei.com", "aff_unique_abbr": "JD;Alibaba;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "7pl0FRiS0Td", "title": "Contextual Transformer for Offline Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "This paper explores how prompts help sequence-modeling based offline-RL algorithms", "abstract": "Recently, the pretrain-tuning paradigm in large-scale sequence models has made significant progress in Natural Language Processing and Computer Vision. However, such a paradigm is still hindered by intractable challenges in Reinforcement Learning (RL), including the lack of self-supervised large-scale pretraining methods based on offline data and efficient fine-tuning/prompt-tuning over unseen downstream tasks. In this work, we explore how prompts can help sequence-modeling-based offline Reinforcement Learning (offline-RL) algorithms. Firstly, we propose prompt tuning for offline RL, where a context vector sequence is concatenated with the input to guide the conditional generation. As such, we can pretrain a model on the offline dataset with supervised loss and learn a prompt to guide the policy to play the desired actions. Secondly, we extend the framework to the Meta-RL setting and propose Contextual Meta Transformer (CMT), which leverages the context among different tasks as the prompt to improve the performance on unseen tasks. We conduct extensive experiments across three different offline-RL settings: offline single-agent RL on the D4RL dataset, offline Meta-RL on the MuJoCo benchmark, and offline MARL on the SMAC benchmark. The results validate the strong performance, and generality of our methods.", "keywords": "Offline Meta Reinforcement Learning;Prompt Tuning;Transformer", "primary_area": "", "supplementary_material": "/attachment/8ba04db5cad22e4de84afb8abb0ccc879ee58449.zip", "author": "Runji Lin;Ye Li;Xidong Feng;Zhaowei Zhang;Xian Hong Wu Fung;Haifeng Zhang;Jun Wang;Yali Du;Yaodong Yang", "authorids": "~Runji_Lin1;~Ye_Li2;~Xidong_Feng1;~Zhaowei_Zhang2;~Xian_Hong_Wu_Fung2;~Haifeng_Zhang3;~Jun_Wang2;~Yali_Du1;~Yaodong_Yang1", "gender": ";M;;M;M;;M;;M", "homepage": ";;https://waterhorse1.github.io/;https://zowiezhang.github.io;;https://pkuzhf.github.io;http://www0.cs.ucl.ac.uk/staff/jun.wang/;;https://www.yangyaodong.com", "dblp": ";;;127/1796;;93/7133-2;w/JunWang12;;170/1496-1", "google_scholar": ";https://scholar.google.com.tw/citations?hl=zh-CN;JfOLNu8AAAAJ;https://scholar.google.com.hk/citations?view_op=list_works;;;https://scholar.google.co.uk/citations?user=wIE1tY4AAAAJ;;https://scholar.google.co.uk/citations?user=6yL0xw8AAAAJ", "orcid": ";0009-0008-9690-2119;;;;;;;0000-0001-8132-5613", "linkedin": ";;;;xianhong-wu-604459286/;;;;yaodong-yang", "or_profile": "~Runji_Lin1;~Ye_Li2;~Xidong_Feng1;~Zhaowei_Zhang2;~Xian_Hong_Wu_Fung2;~Haifeng_Zhang3;~Jun_Wang2;~Yali_Du1;~Yaodong_Yang1", "aff": ";Nankai University;University College London;Wuhan University;Peking University;Institute of Automation, Chinese Academy of Sciences;University College London;;Peking University", "aff_domain": ";nankai.edu.cn;ucl.ac.uk;whu.edu.cn;pku.edu.cn;ia.ac.cn;ucl.ac.uk;;pku.edu.cn", "position": ";MS student;PhD student;Undergrad student;Undergrad student;Associate Professor;Professor;;Assistant Professor", "bibtex": "@misc{\nlin2023contextual,\ntitle={Contextual Transformer for Offline Reinforcement Learning},\nauthor={Runji Lin and Ye Li and Xidong Feng and Zhaowei Zhang and Xian Hong Wu Fung and Haifeng Zhang and Jun Wang and Yali Du and Yaodong Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=7pl0FRiS0Td}\n}", "github": "", "project": "", "reviewers": "7JkM;9AEp;SQ2X;yV6V", "site": "https://openreview.net/forum?id=7pl0FRiS0Td", "pdf_size": 1654518, "recommendation": "3;3;5;6", "confidence": "2;4;5;4", "correctness": "3;3;3;4", "technical_novelty": "2;2;2;1", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "30;88;76;52", "wc_strength_and_weaknesses": "197;437;322;112", "wc_clarity_quality_novelty_and_reproducibility": "79;367;47;30", "wc_summary_review": "39;440;47;20", "wc_review": "345;1332;492;214", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 61.5, 22.332711434127294 ], "wc_strength_and_weaknesses_avg": [ 267.0, 123.3389638354401 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 130.75, 137.5288606075103 ], "wc_summary_review_avg": [ 136.5, 175.5 ], "wc_review_avg": [ 595.75, 436.30171613231136 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.5739640213948524, "corr_recommendation_correctness": 0.7777777777777777, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:b0QKSU_SRQgJ:scholar.google.com/&scioq=Contextual+Transformer+for+Offline+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4;1;3", "aff_unique_norm": "Nankai University;University College London;Wuhan University;Peking University;Chinese Academy of Sciences", "aff_unique_dep": ";;;;Institute of Automation", "aff_unique_url": "http://www.nankai.edu.cn;https://www.ucl.ac.uk;http://www.whu.edu.cn/;http://www.pku.edu.cn;http://www.ia.cas.cn", "aff_unique_abbr": "NKU;UCL;WHU;Peking U;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;1;0", "aff_country_unique": "China;United Kingdom" }, { "id": "7qSpaOSbRVO", "title": "Data Poisoning Attacks Against Multimodal Encoders", "track": "main", "status": "Reject", "tldr": "", "abstract": "Traditional machine learning (ML) models, e.g., image classifiers, usually rely on large-scale labeled datasets to achieve strong performance. However, such labeled datasets are often challenging and expensive to obtain. Also, the predefined categories limit the model's ability to generalize to other visual concepts as additional labeled data is required. On the contrary, the newly emerged multimodal model, which contains both visual and linguistic modalities, learns the concept of images from the raw text. It is a promising way to solve the above problems as it can use easy-to-collect image-text pairs to construct the training dataset and the raw texts contain almost unlimited categories according to their semantics. However, learning from a large-scale unlabeled dataset also exposes the model to the risk of potential poisoning attacks, whereby the adversary aims to perturb the model's training dataset to trigger malicious behaviors in it. Previous work mainly focuses on the visual modality. In this paper, we instead focus on answering two questions: (1) Is the linguistic modality also vulnerable to poisoning attacks? and (2) Which modality is most vulnerable? To answer the two questions, we conduct three types of poisoning attacks against CLIP, the most representative multimodal contrastive learning framework. Extensive evaluations on different datasets and model architectures show that all three attacks can perform well on the linguistic modality with only a relatively low poisoning rate and limited epochs. Also, we observe that the poisoning effect differs between different modalities, i.e., with lower MinRank in the visual modality and with higher Hit@K when K is small in the linguistic modality. To mitigate the attacks, we propose both pre-training and post-training defenses. We empirically show that both defenses can significantly reduce the attack performance while preserving the model's utility.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Ziqing Yang;Xinlei He;Zheng Li;Michael Backes;Mathias Humbert;Pascal Berrang;Yang Zhang", "authorids": "~Ziqing_Yang3;~Xinlei_He1;~Zheng_Li17;~Michael_Backes1;~Mathias_Humbert2;~Pascal_Berrang1;~Yang_Zhang15", "gender": ";M;M;;M;M;M", "homepage": ";https://xinleihe.github.io/;https://zhenglisec.github.io/;;http://mhumbert.com;https://pascal-berrang.de;https://yangzhangalmo.github.io/", "dblp": ";227/7262;10/1143-23;;83/8816;;06/6785-16", "google_scholar": ";6hZNEtoAAAAJ;xEAaaGsAAAAJ;;;;Xeb2888AAAAJ", "orcid": ";;0000-0002-4466-7523;;;;0000-0003-3612-7348", "linkedin": ";;;;;;", "or_profile": "~Ziqing_Yang3;~Xinlei_He1;~Zheng_Li17;~Michael_Backes1;~Mathias_Humbert2;~Pascal_Berrang1;~Yang_Zhang15", "aff": ";CISPA Helmholtz Center for Information Security;CISPA Helmholtz Center for Information Security;;Universit\u00e9 de Lausanne;University of Birmingham;CISPA Helmholtz Center for Information Security", "aff_domain": ";cispa.de;cispa.de;;unil.ch;bham.ac.uk;cispa.de", "position": ";PhD student;PhD student;;Associate Professor;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nyang2023data,\ntitle={Data Poisoning Attacks Against Multimodal Encoders},\nauthor={Ziqing Yang and Xinlei He and Zheng Li and Michael Backes and Mathias Humbert and Pascal Berrang and Yang Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=7qSpaOSbRVO}\n}", "github": "", "project": "", "reviewers": "ygKY;zCXF;KeYj", "site": "https://openreview.net/forum?id=7qSpaOSbRVO", "pdf_size": 985510, "recommendation": "5;6;6", "confidence": "4;4;4", "correctness": "3;3;3", "technical_novelty": "2;1;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "84;49;256", "wc_strength_and_weaknesses": "492;159;140", "wc_clarity_quality_novelty_and_reproducibility": "24;55;58", "wc_summary_review": "56;65;33", "wc_review": "656;328;487", "wc_reply_reviewers": "0;0;92", "wc_reply_authors": "489;413;295", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 129.66666666666666, 90.46669123065251 ], "wc_strength_and_weaknesses_avg": [ 263.6666666666667, 161.64226619984706 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.666666666666664, 15.369522511198006 ], "wc_summary_review_avg": [ 51.333333333333336, 13.474255287605159 ], "wc_review_avg": [ 490.3333333333333, 133.92618198926684 ], "wc_reply_reviewers_avg": [ 30.666666666666668, 43.36921591277491 ], "wc_reply_authors_avg": [ 399.0, 79.8164561144296 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11816704560819330585&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 17, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "CISPA Helmholtz Center for Information Security;Universit\u00e9 de Lausanne;University of Birmingham", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cispa.de/;https://www.unil.ch;https://www.birmingham.ac.uk", "aff_unique_abbr": "CISPA;UNIL;Birmingham", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;0", "aff_country_unique": "Germany;Switzerland;United Kingdom" }, { "id": "7qyLeRm1e3", "title": "Improving Generative Flow Networks with Path Regularization", "track": "main", "status": "Reject", "tldr": "We propose a novel path regularization method based on optimal transport theory that places prior constraints on the underlying structure of the GFlowNets", "abstract": "Generative Flow Networks (GFlowNets) are recently proposed models for learning stochastic policies that generate compositional objects by sequences of actions with the probability proportional to a given reward function. The central problem of GFlowNets is to improve their exploration and generalization. In this work, we propose a novel path regularization method based on optimal transport theory that places prior constraints on the underlying structure of the GFlowNets. The prior is designed to help the GFlowNets better discover the latent structure of the target distribution or enhance its ability to explore the environment in the context of active learning. The path regularization controls the flow in GFlowNets to generate more diverse and novel candidates via maximizing the optimal transport distances between two forward policies or to improve the generalization via minimizing the optimal transport distances. In addition, we derive an efficient implementation of the regularization by finding its closed form solutions in specific cases and a meaningful upper bound that can be used as an approximation to minimize the regularization term. We empirically demonstrate the advantage of our path regularization on a wide range of tasks, including synthetic hypergrid environment modeling, discrete probabilistic modeling, and biological sequence design.", "keywords": "generative flow networks;path regularization;optimal transport", "primary_area": "", "supplementary_material": "/attachment/a50bbcb4e585c8f803854a574c00c3ef3e2ba334.zip", "author": "Anh Do;Duy Dinh;Tan Minh Nguyen;Nguyen Duy Khuong;Stanley Osher;Nhat Ho", "authorids": "~Anh_Do1;~Duy_Dinh1;~Tan_Minh_Nguyen1;~Nguyen_Duy_Khuong1;~Stanley_Osher1;~Nhat_Ho1", "gender": "M;M;M;M;M;M", "homepage": "https://anhhndo.github.io/;;https://tanmnguyen89.github.io/;https://khuongnd.github.io/;https://www.math.ucla.edu/~sjo/;https://nhatptnk8912.github.io/", "dblp": ";;255/4725;;;203/4479", "google_scholar": ";;OizOh88AAAAJ;vAOT46YAAAAJ;;https://scholar.google.ca/citations?user=Xs7cKMwAAAAJ", "orcid": ";;;;;", "linkedin": ";duy-dinh-2b5408215/;;;;nhat-pham-minh-ho-267b8164/", "or_profile": "~Anh_Do1;~Duy_Dinh1;~Tan_Minh_Nguyen1;~Nguyen_Duy_Khuong1;~Stanley_Osher1;~Nhat_Ho1", "aff": "FPT Software;FPT Software;University of California, Los Angeles;FPT Software Ltd. - FPT Corporation;University of California, Los Angeles;University of Texas, Austin", "aff_domain": "fsoft.com.vn;fsoft.com.vn;ucla.edu;fpt-software.com;ucla.edu;utexas.edu", "position": "FPT AI Residency;FPT AI Residency;Postdoc;Researcher;Full Professor;Assistant Professor", "bibtex": "@misc{\ndo2023improving,\ntitle={Improving Generative Flow Networks with Path Regularization},\nauthor={Anh Do and Duy Dinh and Tan Minh Nguyen and Nguyen Duy Khuong and Stanley Osher and Nhat Ho},\nyear={2023},\nurl={https://openreview.net/forum?id=7qyLeRm1e3}\n}", "github": "", "project": "", "reviewers": "86A8;ayzy;MjKh;J937", "site": "https://openreview.net/forum?id=7qyLeRm1e3", "pdf_size": 1689291, "recommendation": "5;5;5;6", "confidence": "4;5;3;4", "correctness": "3;3;3;3", "technical_novelty": "3;3;3;4", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "76;64;123;150", "wc_strength_and_weaknesses": "663;827;276;453", "wc_clarity_quality_novelty_and_reproducibility": "56;75;147;137", "wc_summary_review": "49;89;58;51", "wc_review": "844;1055;604;791", "wc_reply_reviewers": "0;0;226;0", "wc_reply_authors": "2830;1258;2085;2115", "reply_reviewers": "0;0;1;0", "reply_authors": "5;2;4;4", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 103.25, 34.85236720798173 ], "wc_strength_and_weaknesses_avg": [ 554.75, 208.5022481893181 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 103.75, 38.99599338393625 ], "wc_summary_review_avg": [ 61.75, 16.08376510646683 ], "wc_review_avg": [ 823.5, 160.66191210115733 ], "wc_reply_reviewers_avg": [ 56.5, 97.86087062764156 ], "wc_reply_authors_avg": [ 2072.0, 556.591861241251 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 1.0897247358851685 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jrbt8qpk4HMJ:scholar.google.com/&scioq=Improving+Generative+Flow+Networks+with+Path+Regularization&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;0;1;0;1;2", "aff_unique_norm": "FPT Corporation;University of California, Los Angeles;University of Texas at Austin", "aff_unique_dep": ";;", "aff_unique_url": "https://www.fpt-software.com;https://www.ucla.edu;https://www.utexas.edu", "aff_unique_abbr": "FPT;UCLA;UT Austin", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Los Angeles;Austin", "aff_country_unique_index": "0;0;1;0;1;1", "aff_country_unique": "Vietnam;United States" }, { "id": "7sWLxZBLPO5", "title": "Multiple Modes for Continual Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Adapting model parameters to incoming streams of data is a crucial factor to deep learning scalability. Interestingly, prior continual learning strategies in online settings inadvertently anchor their updated parameters to a local parameter subspace to remember old tasks, else drift away from the subspace and forget. From this observation, we formulate a trade-off between constructing multiple parameter modes and allocating tasks per mode. Mode-Optimized Task Allocation (MOTA), our contributed adaptation strategy, trains multiple modes in parallel, then optimizes task allocation per mode. We empirically demonstrate improvements over baseline continual learning strategies and across varying distribution shifts, namely sub-population, domain, and task shift.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Siddhartha Datta;Nigel Shadbolt", "authorids": "~Siddhartha_Datta1;~Nigel_Shadbolt1", "gender": ";M", "homepage": "http://siddharthadatta.ml/;https://www.cs.ox.ac.uk/people/nigel.shadbolt/", "dblp": ";s/NigelShadbolt", "google_scholar": ";wTAM67UAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Siddhartha_Datta1;~Nigel_Shadbolt1", "aff": "University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk", "position": "PhD student;Full Professor", "bibtex": "@misc{\ndatta2023multiple,\ntitle={Multiple Modes for Continual Learning},\nauthor={Siddhartha Datta and Nigel Shadbolt},\nyear={2023},\nurl={https://openreview.net/forum?id=7sWLxZBLPO5}\n}", "github": "", "project": "", "reviewers": "MaED;ydF1;x9Gv;EVp1", "site": "https://openreview.net/forum?id=7sWLxZBLPO5", "pdf_size": 2227649, "recommendation": "3;6;6;8", "confidence": "3;4;3;2", "correctness": "2;4;4;3", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "56;81;45;182", "wc_strength_and_weaknesses": "635;264;109;157", "wc_clarity_quality_novelty_and_reproducibility": "45;57;13;61", "wc_summary_review": "21;63;34;62", "wc_review": "757;465;201;462", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "370;619;114;72", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 91.0, 54.13409276971399 ], "wc_strength_and_weaknesses_avg": [ 291.25, 206.24303018526467 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.0, 18.841443681416774 ], "wc_summary_review_avg": [ 45.0, 18.096961070853858 ], "wc_review_avg": [ 471.25, 196.7312570487974 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 293.75, 219.7070492724346 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.39605901719066966, "corr_recommendation_correctness": 0.5488604301969737, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3406962528913236929&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Exploring The Role of Mean Teachers in Self-supervised Masked Auto-Encoders", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11930", "id": "7sn6Vxp92xV", "poster": "/media/PosterPDFs/ICLR%202023/11930.png?t=1682401281.7992334", "openreview": "https://openreview.net/forum?id=7sn6Vxp92xV", "slides": "https://iclr.cc/virtual/2023/poster/11930", "video": "https://iclr.cc/virtual/2023/poster/11930", "author_site": "Youngwan Lee, Jeff Willette, Jonghee Kim, Juho Lee, Sung Ju Hwang", "tldr": "We conduct analysis of the dynamics of the self-distillation scheme in masked auto-encoder.", "abstract": "Masked image modeling (MIM) has become a popular strategy for self-supervised learning (SSL) of visual representations with Vision Transformers. A representative MIM model, the masked auto-encoder (MAE), randomly masks a subset of image patches and reconstructs the masked patches given the unmasked patches. Concurrently, many recent works in self-supervised learning utilize the student/teacher paradigm which provides the student with an additional target based on the output of a teacher composed of an exponential moving average (EMA) of previous students. Although common, relatively little is known about the dynamics of the interaction between the student and teacher. \nThrough analysis on a simple linear model, we find that the teacher conditionally removes previous gradient directions based on feature similarities which effectively acts as a conditional momentum regularizer. From this analysis, we present a simple SSL method, the Reconstruction-Consistent Masked Auto-Encoder (RC-MAE) by adding an EMA teacher to MAE. We find that RC-MAE converges faster and requires less memory usage than state-of-the-art self-distillation methods during pre-training, which may provide a way to enhance the practicality of prohibitively expensive self-supervised learning of Vision Transformer models. Additionally, we show that RC-MAE achieves more robustness and better performance compared to MAE on downstream tasks such as ImageNet-1K classification, object detection, and instance segmentation.", "keywords": "self-supervised learning;masked auto-encoder", "primary_area": "", "supplementary_material": "/attachment/eecfab490e1b1c0f70751cc6951acb8443391969.zip", "author": "Youngwan Lee;Jeffrey Ryan Willette;Jonghee Kim;Juho Lee;Sung Ju Hwang", "authorids": "~Youngwan_Lee1;~Jeffrey_Ryan_Willette1;jhkim27@etri.re.kr;~Juho_Lee2;~Sung_Ju_Hwang1", "gender": "M;M;;M;", "homepage": "https://youngwanlee.github.io/;https://jeffwillette.github.io;;https://juho.lee.github.io;", "dblp": "184/5625;286/0937;;55/3410-1;", "google_scholar": "EqemKYsAAAAJ;https://scholar.google.com/citations?hl=en;;Py4URJUAAAAJ;", "orcid": "0000-0001-8644-155X;;;;", "linkedin": "youngwanlee/;;;;", "or_profile": "~Youngwan_Lee1;~Jeffrey_Ryan_Willette1;jhkim27@etri.re.kr;~Juho_Lee2;~Sung_Ju_Hwang1", "aff": "Electronics and Telecommunication Research Institute;Korea Advanced Institute of Science & Technology;;Korea Advanced Institute of Science & Technology;", "aff_domain": "etri.re.kr;kaist.ac.kr;;kaist.ac.kr;", "position": "Researcher;Student;;Assistant Professor;", "bibtex": "@inproceedings{\nlee2023exploring,\ntitle={Exploring The Role of Mean Teachers in Self-supervised Masked Auto-Encoders},\nauthor={Youngwan Lee and Jeffrey Ryan Willette and Jonghee Kim and Juho Lee and Sung Ju Hwang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7sn6Vxp92xV}\n}", "github": "", "project": "", "reviewers": "5RHA;jGgU;JA1p;rLZG", "pdf_size": 1902628, "recommendation": "5;5;6;6", "confidence": "4;4;2;3", "correctness": "2;3;3;3", "technical_novelty": "2;3;3;2", "empirical_novelty": "2;0;2;2", "wc_summary_paper": "123;57;93;88", "wc_strength_and_weaknesses": "318;96;70;146", "wc_clarity_quality_novelty_and_reproducibility": "245;12;58;17", "wc_summary_review": "136;4;36;36", "wc_review": "822;169;257;287", "wc_reply_reviewers": "1329;0;0;0", "wc_reply_authors": "3275;742;583;788", "reply_reviewers": "3;0;0;0", "reply_authors": "5;1;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 90.25, 23.40272420040026 ], "wc_strength_and_weaknesses_avg": [ 157.5, 96.60615922393355 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.0, 95.21817053483016 ], "wc_summary_review_avg": [ 53.0, 49.66890375275057 ], "wc_review_avg": [ 383.75, 256.7132398221798 ], "wc_reply_reviewers_avg": [ 332.25, 575.4738808147595 ], "wc_reply_authors_avg": [ 1347.0, 1115.7268931060146 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.9045340337332909, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18138944436492483876&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=7sn6Vxp92xV", "email": "etri.re.kr;kaist.ac.kr;;kaist.ac.kr;", "author_num": 5, "aff_unique_index": "0;1;1", "aff_unique_norm": "Electronics and Telecommunication Research Institute;Korea Advanced Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.etri.re.kr;https://www.kaist.ac.kr", "aff_unique_abbr": "ETRI;KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "7t3ggLCjl7G", "title": "When Do Models Generalize? A Perspective From Data-Algorithm Compatibility", "track": "main", "status": "Reject", "tldr": "We propose data-algorithm-compatibility to characterize generalization, and study it in the overparameterized linear regression regime.", "abstract": "One of the major open problems in machine learning is to characterize generalization in the overparameterized regime, where most traditional generalization bounds become inconsistent (Nagarajan and Kolter, 2019). In many scenarios, their failure can be attributed to obscuring the crucial interplay between the training algorithm and the underlying data distribution. To address this issue, we propose a concept named compatibility, which quantitatively characterizes generalization in a both data-relevant and algorithm relevant manner. By considering the entire training trajectory and focusing on early-stopping iterates, compatibility exploits the data and the algorithm information and is therefore a more suitable notion for generalization. We validate this by theoretically studying compatibility under the setting of solving overparameterized linear regression with gradient descent. Specifically, we perform a data-dependent trajectory analysis and derive a sufficient condition for compatibility in such a setting. Our theoretical results demonstrate that in the sense of compatibility, generalization holds with significantly weaker restrictions on the problem instance than the previous last iterate analysis.", "keywords": "generalization;data-algorithm compatibility;early stopping;overparameterized linear regression", "primary_area": "", "supplementary_material": "/attachment/a58f63d5ae524c3c41559a340ab6be2699ff8420.zip", "author": "Jing Xu;Jiaye Teng;Yang Yuan;Andrew C Yao", "authorids": "~Jing_Xu4;~Jiaye_Teng2;~Yang_Yuan4;~Andrew_C_Yao1", "gender": "M;M;M;M", "homepage": "https://jingxuthu.github.io;http://www.tengjiaye.com;http://people.iiis.tsinghua.edu.cn/~yuanyang/index.html;https://iiis.tsinghua.edu.cn/en/yao/", "dblp": "07/1951-27;266/8187;;y/AndrewChiChihYao", "google_scholar": "jlrroGQAAAAJ;NGqfK2wAAAAJ;;", "orcid": ";0000-0002-4385-5792;;", "linkedin": ";;;", "or_profile": "~Jing_Xu4;~Jiaye_Teng2;~Yang_Yuan4;~Andrew_C_Yao1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "thu.edu.cn;iiis.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nxu2023when,\ntitle={When Do Models Generalize? A Perspective From Data-Algorithm Compatibility},\nauthor={Jing Xu and Jiaye Teng and Yang Yuan and Andrew C Yao},\nyear={2023},\nurl={https://openreview.net/forum?id=7t3ggLCjl7G}\n}", "github": "", "project": "", "reviewers": "rKAH;kDAx;9FRm;a4RZ", "site": "https://openreview.net/forum?id=7t3ggLCjl7G", "pdf_size": 2579141, "recommendation": "5;6;6;6", "confidence": "3;3;3;3", "correctness": "3;3;3;3", "technical_novelty": "2;3;4;3", "empirical_novelty": "0;0;0;3", "wc_summary_paper": "35;60;90;183", "wc_strength_and_weaknesses": "291;275;528;946", "wc_clarity_quality_novelty_and_reproducibility": "6;8;35;91", "wc_summary_review": "35;69;243;26", "wc_review": "367;412;896;1246", "wc_reply_reviewers": "335;83;0;328", "wc_reply_authors": "1330;766;932;2215", "reply_reviewers": "3;1;0;2", "reply_authors": "5;2;2;5", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 0.75, 1.299038105676658 ], "wc_summary_paper_avg": [ 92.0, 56.03124128555426 ], "wc_strength_and_weaknesses_avg": [ 510.0, 270.92711196925273 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.0, 34.30014577228499 ], "wc_summary_review_avg": [ 93.25, 87.93285790874762 ], "wc_review_avg": [ 730.25, 362.87213657154774 ], "wc_reply_reviewers_avg": [ 186.5, 147.96029872908477 ], "wc_reply_authors_avg": [ 1310.75, 560.8571007841481 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17545033280708260759&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Neural-based classification rule learning for sequential data", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11253", "id": "7tJyBmu9iCj", "poster": "/media/PosterPDFs/ICLR%202023/11253.png?t=1681718703.2318575", "openreview": "https://openreview.net/forum?id=7tJyBmu9iCj", "slides": "https://iclr.cc/virtual/2023/poster/11253", "video": "https://iclr.cc/virtual/2023/poster/11253", "author_site": "Marine Collery, Philippe Bonnard, Fran\u00e7ois Fages, Remy Kusters", "tldr": "", "abstract": "Discovering interpretable patterns for classification of sequential data is of key importance for a variety of fields, ranging from genomics to fraud detection or more generally interpretable decision-making.\nIn this paper, we propose a novel differentiable fully interpretable method to discover both local and global patterns (i.e. catching a relative or absolute temporal dependency) for rule-based binary classification.\nIt consists of a convolutional binary neural network with an interpretable neural filter and a training strategy based on dynamically-enforced sparsity.\nWe demonstrate the validity and usefulness of the approach on synthetic datasets and on an open-source peptides dataset.\nKey to this end-to-end differentiable method is that the expressive patterns used in the rules are learned alongside the rules themselves.", "keywords": "classification rule learning;binary neural network;interpretable AI;sequential data", "primary_area": "", "supplementary_material": "", "author": "Marine Collery;Philippe Bonnard;Fran\u00e7ois Fages;Remy Kusters", "authorids": "~Marine_Collery1;philippe.bonnard@fr.ibm.com;francois.fages@inria.fr;~Remy_Kusters1", "gender": ";;;", "homepage": ";;;", "dblp": ";;;", "google_scholar": "o4cVSFMAAAAJ;;;https://scholar.google.fr/citations?user=442FIp8AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Marine_Collery1;philippe.bonnard@fr.ibm.com;francois.fages@inria.fr;~Remy_Kusters1", "aff": "INRIA;;;Gourmey (Supreme SAS)", "aff_domain": "inria.fr;;;gourmey.com", "position": "PhD student;;;Principal Researcher", "bibtex": "@inproceedings{\ncollery2023neuralbased,\ntitle={Neural-based classification rule learning for sequential data},\nauthor={Marine Collery and Philippe Bonnard and Fran{\\c{c}}ois Fages and Remy Kusters},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7tJyBmu9iCj}\n}", "github": "", "project": "", "reviewers": "r7kZ;JYCZ;UfDD", "pdf_size": 468467, "recommendation": "6;6;8", "confidence": "3;3;2", "correctness": "3;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;0;2", "wc_summary_paper": "41;81;75", "wc_strength_and_weaknesses": "303;59;132", "wc_clarity_quality_novelty_and_reproducibility": "10;11;107", "wc_summary_review": "10;102;19", "wc_review": "364;253;333", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "775;579;520", "reply_reviewers": "0;0;0", "reply_authors": "1;2;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 65.66666666666667, 17.613126418163876 ], "wc_strength_and_weaknesses_avg": [ 164.66666666666666, 102.25567085605678 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.666666666666664, 45.492368102304326 ], "wc_summary_review_avg": [ 43.666666666666664, 41.41121694527811 ], "wc_review_avg": [ 316.6666666666667, 46.764183825753754 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 624.6666666666666, 108.99643215363622 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.9999999999999998, "corr_recommendation_correctness": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16335124883748642769&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=7tJyBmu9iCj", "email": "inria.fr;;;gourmey.com", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "INRIA;Supreme SAS", "aff_unique_dep": ";", "aff_unique_url": "https://www.inria.fr;https://www.supremesas.com", "aff_unique_abbr": "INRIA;Supreme SAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "id": "7tmlbL5JQyt", "title": "Optimizing Connectivity through Network Gradients for the Restricted Machine", "track": "main", "status": "Withdraw", "tldr": "This paper proposes a novel methodology that optimizes the network connectivity of an RBM using the idea of connection gradients jointly with other model parameters. ", "abstract": "Leveraging sparse networks to connect successive layers in deep neural networks has recently been shown to provide benefits to large scale state-of-the-art models. However, network connectivity also plays a significant role on the learning performance of shallow networks, such as the classic Restricted Boltzmann Machines (RBM). Efficiently finding sparse connectivity patterns that improve the learning performance of shallow networks is a fundamental problem. While recent principled approaches explicitly include network connections as model parameters that must be optimized, they often rely on explicit penalization or have network sparsity as a hyperparameter. This work presents a method to find optimal connectivity patterns for RBMs based on the idea of network gradients (NCG): computing the gradient of every possible connection, given a specific connection pattern, and using the gradient to drive a continuous connection strength parameter that in turn is used to determine the connection pattern. Thus, learning RBM parameters and learning network connections is truly jointly performed, albeit with different learning rates, and without changes to the objective function. The method is applied to the MNIST and other datasets showing that better RBM models are found for the benchmark tasks of sample generation and input classification. Results also show that NCG is robust to network initialization, both adding and removing network connections while learning. ", "keywords": "Neural Networks;Restricted Boltzmann Machine;Neural Architecture Search;Network Pruning;Network Optimization;AutoML", "primary_area": "", "supplementary_material": "", "author": "Amanda C N de Oliveira;Daniel R. Figueiredo", "authorids": "~Amanda_C_N_de_Oliveira1;~Daniel_R._Figueiredo1", "gender": "F;M", "homepage": ";https://www.cos.ufrj.br/~daniel/", "dblp": ";46/4473", "google_scholar": "https://scholar.google.com.br/citations?hl=pt-BR;j4YbANwAAAAJ", "orcid": "0000-0003-0711-5970;0000-0001-9341-6619", "linkedin": "amanda-camacho-3b809911b;", "or_profile": "~Amanda_C_N_de_Oliveira1;~Daniel_R._Figueiredo1", "aff": "Universidade Federal do Rio de Janeiro;Universidade Federal do Rio de Janeiro", "aff_domain": "cos.ufrj.br;ufrj.br", "position": "PhD student;Associate Professor", "bibtex": "@misc{\noliveira2023optimizing,\ntitle={Optimizing Connectivity through Network Gradients for the Restricted Machine},\nauthor={Amanda C N de Oliveira and Daniel R. Figueiredo},\nyear={2023},\nurl={https://openreview.net/forum?id=7tmlbL5JQyt}\n}", "github": "", "project": "", "reviewers": "UouD;pSj3;bCKv", "site": "https://openreview.net/forum?id=7tmlbL5JQyt", "pdf_size": 683955, "recommendation": "3;3;3", "confidence": "4;4;3", "correctness": "3;3;2", "technical_novelty": "1;2;2", "empirical_novelty": "1;2;3", "wc_summary_paper": "108;56;102", "wc_strength_and_weaknesses": "346;188;457", "wc_clarity_quality_novelty_and_reproducibility": "82;512;152", "wc_summary_review": "49;44;100", "wc_review": "585;800;811", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 88.66666666666667, 23.22833518691246 ], "wc_strength_and_weaknesses_avg": [ 330.3333333333333, 110.37612463249872 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 248.66666666666666, 188.3849486792639 ], "wc_summary_review_avg": [ 64.33333333333333, 25.30261295246446 ], "wc_review_avg": [ 732.0, 104.04165832332099 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zFidOf0mXKEJ:scholar.google.com/&scioq=Optimizing+Connectivity+through+Network+Gradients+for+the+Restricted+Machine&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Universidade Federal do Rio de Janeiro", "aff_unique_dep": "", "aff_unique_url": "https://www.ufrj.br", "aff_unique_abbr": "UFRJ", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Rio de Janeiro", "aff_country_unique_index": "0;0", "aff_country_unique": "Brazil" }, { "id": "7uIycrR-KOa", "title": "Test-Time Adaptation for Real-World Denoising Networks via Noise-Aware Image Generation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Image denoising aims for a challenging task of recovering clean images from unseen noise, which can follow different distributions depending on scenes, camera models, ISO settings, etc. Previous works have attempted to handle unseen noise by adapting denoising neural networks to each given noisy image. However, a single noisy image can only provide a limited amount of information for training networks. Therefore, we propose to generate noisy images with diverse yet realistic noise that is similar to noise in a given input image. Such noise generation is difficult to achieve given only a single noisy image. To address the challenge, we propose a normalizing flow (NF) framework that can learn the latent representation of noise, conditioned on noisy images. We also employ the Gaussian mixture model to better handle real-world unseen noise by leveraging multiple noise distributions. Using the proposed NF model, our framework can generate multiple synthetic noisy images to facilitate the adaptation of denoising networks to each given image. To further improve the adaptation to unseen noise, we integrate a meta-learning algorithm into our framework. The experimental results demonstrate that our framework substantially improves the performance of several denoising networks on unseen real-world noise across numerous real-world benchmark datasets.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/4008b8e7a08a4055bea1a4bb05143fd03b4955e4.zip", "author": "Dongjin Kim;Donggoo Jung;Dajin Han;Kyeonga Kim;Sungyong Baik;Tae Hyun Kim", "authorids": "~Dongjin_Kim3;~Donggoo_Jung1;~Dajin_Han1;~Kyeonga_Kim1;~Sungyong_Baik1;~Tae_Hyun_Kim2", "gender": ";M;M;;M;M", "homepage": "https://sites.google.com/view/lliger9/;https://donggoo-jung.github.io;;;https://dsybaik-hy.github.io/;https://sites.google.com/view/lliger9/", "dblp": "16/9611-4;;;;243/2775;43/11343-6", "google_scholar": "https://scholar.google.co.kr/citations?user=6I9aJxYAAAAJ;https://scholar.google.co.kr/citations?user=yXJ05SwAAAAJ;;;lQ4gotkAAAAJ;https://scholar.google.co.kr/citations?user=8soccsoAAAAJ", "orcid": ";;;;;0000-0002-7995-3984", "linkedin": ";;dajin-han-1aabb4199/;kyeonga-kim-25b942205/;;", "or_profile": "~Dongjin_Kim3;~Donggoo_Jung1;~Dajin_Han1;~Kyeonga_Kim1;~Sungyong_Baik1;~Tae_Hyun_Kim2", "aff": "Hanyang University;Hanyang University;Hanyang University;;Hanyang University;Hanyang University", "aff_domain": "hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr;;hanyang.ac.kr;hanyang.ac.kr", "position": "MS student;PhD student;MS student;;Assistant Professor;Associate Professor", "bibtex": "@misc{\nkim2023testtime,\ntitle={Test-Time Adaptation for Real-World Denoising Networks via Noise-Aware Image Generation},\nauthor={Dongjin Kim and Donggoo Jung and Dajin Han and Kyeonga Kim and Sungyong Baik and Tae Hyun Kim},\nyear={2023},\nurl={https://openreview.net/forum?id=7uIycrR-KOa}\n}", "github": "", "project": "", "reviewers": "bnr3;TYGP;vPkE;gg2V", "site": "https://openreview.net/forum?id=7uIycrR-KOa", "pdf_size": 975348, "recommendation": "3;3;5;6", "confidence": "5;4;2;4", "correctness": "3;2;3;3", "technical_novelty": "1;2;2;3", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "55;122;88;128", "wc_strength_and_weaknesses": "201;438;149;156", "wc_clarity_quality_novelty_and_reproducibility": "30;28;116;87", "wc_summary_review": "69;38;92;25", "wc_review": "355;626;445;396", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 98.25, 29.260681810238122 ], "wc_strength_and_weaknesses_avg": [ 236.0, 118.3194827574901 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.25, 37.67874069020885 ], "wc_summary_review_avg": [ 56.0, 26.22022120425379 ], "wc_review_avg": [ 455.5, 103.46617804867444 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.4856618642571827, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_kV85xaMt1IJ:scholar.google.com/&scioq=Test-Time+Adaptation+for+Real-World+Denoising+Networks+via+Noise-Aware+Image+Generation&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Hanyang University", "aff_unique_dep": "", "aff_unique_url": "https://www.hanyang.ac.kr", "aff_unique_abbr": "HYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "7uTvSvC7hGO", "title": "Exploring Visual Interpretability for Contrastive Language-Image Pretraining", "track": "main", "status": "Withdraw", "tldr": "A visual interpretability work for CLIP. We observe CLIP shows opposite visualization results, and find the reason is semantic shift at pooling layer. Then, we solve this problem with nontrivial improvements.", "abstract": "Contrastive Language-Image Pre-training (CLIP) learns rich representations via readily available supervision of natural language. It improves the performance of downstream vision tasks, including but not limited to the zero-shot, long tail, segmentation, retrieval, caption, and video. However, the visual interpretability of CLIP is rarely studied, especially in the aspect of the raw feature map. To provide visual explanations of its predictions, we propose the Image-Text Similarity Map (ITSM). Based on it, we surprisingly find that CLIP prefers the background regions than the foregrounds, and shows erroneous visualization against human understanding. Experimentally, we find the devil is in the pooling part, where inappropriate pooling methods lead to a phenomenon called semantic shift. To correct and boost the visualization results, we propose the Masked Max Pooling, with attention map from the self-supervised image encoder. Meanwhile, interpretability and recognition require different representations. To address the problem, we propose the dual projections to cater this requirement. We integrate above methods as Interpretable Contrastive Language-Image Pre-training (ICLIP). Our experiments suggest that ICLIP greatly improves the interpretability of CLIP, e.g. nontrivial improvements at 32.85% and 49.10% on VOC 2012 dataset.", "keywords": "Visual Interpretability;Explainability;Contrastive Language-Image Pretraining;Multimodality", "primary_area": "", "supplementary_material": "", "author": "Yi Li;Hualiang Wang;Yiqun Duan;Hang Xu;Xiaomeng Li", "authorids": "~Yi_Li15;~Hualiang_Wang1;~Yiqun_Duan1;~Hang_Xu1;~Xiaomeng_Li1", "gender": "M;M;M;M;F", "homepage": "https://none.com;https://github.com/SiLangWHL;https://github.com/DuanYiqun;;https://xmengli.github.io/", "dblp": ";;248/5526;;02/9850-1", "google_scholar": "qGsK180AAAAJ;4lzd8NsAAAAJ;https://scholar.google.com.au/citations?user=GoQKrD0AAAAJ;https://scholar.google.com.hk/citations?user=J_8TX6sAAAAJ;uVTzPpoAAAAJ", "orcid": ";0009-0006-0157-8885;;0000-0003-3645-8972;", "linkedin": ";;;;", "or_profile": "~Yi_Li15;~Hualiang_Wang1;~Yiqun_Duan1;~Hang_Xu1;~Xiaomeng_Li1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;University of Technology Sydney;Huawei Noah\u2018s Ark Lab;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;ust.hk;uts.edu.au;huawei.com;ust.hk", "position": "PhD student;PhD student;PhD student;Researcher;Assistant Professor", "bibtex": "@misc{\nli2023exploring,\ntitle={Exploring Visual Interpretability for Contrastive Language-Image Pretraining},\nauthor={Yi Li and Hualiang Wang and Yiqun Duan and Hang Xu and Xiaomeng Li},\nyear={2023},\nurl={https://openreview.net/forum?id=7uTvSvC7hGO}\n}", "github": "", "project": "", "reviewers": "cPkF;F7pA;ANM9;9fvA;b4Yc", "site": "https://openreview.net/forum?id=7uTvSvC7hGO", "pdf_size": 12374885, "recommendation": "3;3;3;5;6", "confidence": "4;4;4;4;4", "correctness": "3;3;3;2;3", "technical_novelty": "2;2;3;2;3", "empirical_novelty": "3;2;2;2;3", "wc_summary_paper": "57;72;132;148;72", "wc_strength_and_weaknesses": "371;268;624;304;85", "wc_clarity_quality_novelty_and_reproducibility": "90;38;185;156;9", "wc_summary_review": "71;32;25;60;33", "wc_review": "589;410;966;668;199", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 4.0, 1.2649110640673518 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.8, 0.39999999999999997 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 96.2, 36.53163013061421 ], "wc_strength_and_weaknesses_avg": [ 330.4, 174.73477043794117 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 95.6, 67.06593770312914 ], "wc_summary_review_avg": [ 44.2, 17.948816116947658 ], "wc_review_avg": [ 566.4, 256.8879911556786 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.3952847075210474, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3842450027936874648&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Hong Kong University of Science and Technology;University of Technology Sydney;Huawei", "aff_unique_dep": ";;Noah's Ark Lab", "aff_unique_url": "https://www.ust.hk;https://www.uts.edu.au;https://www.huawei.com", "aff_unique_abbr": "HKUST;UTS;Huawei", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;Australia" }, { "id": "7wk9PqiiW2D", "title": "ProsodyBERT: Self-Supervised Prosody Representation for Style-Controllable TTS", "track": "main", "status": "Reject", "tldr": "a self-supervised approach to learning prosody representations from raw audio", "abstract": "We propose ProsodyBERT, a self-supervised approach to learning prosody representations from raw audio. Different from most previous works, which use information bottlenecks to disentangle prosody features from speech content and speaker information, we perform an offline clustering of speaker-normalized prosody-related features (energy, pitch, their dynamics, etc.) and use the cluster labels as targets for HuBERT-like masked unit prediction. A span boundary loss is also introduced to capture long-range prosodic information. We demonstrate the effectiveness of ProsodyBERT on a multi-speaker style-controllable text-to-speech (TTS) system. Experiments show that the TTS system trained with ProsodyBERT features can generate natural and expressive speech samples, surpassing the model supervised by energy and pitch on subjective human evaluation. Also, the style and expressiveness of synthesized audio can be controlled by manipulating the prosody features. In addition, We achieve new state-of-the-art results on the IEMOCAP emotion recognition task by combining our prosody features with HuBERT features, showing that ProsodyBERT is complementary to popular pretrained speech self-supervised models.", "keywords": "prosody;self-supervised learning;text-to-speech;speech processing;emotion recognition;speech synthesis", "primary_area": "", "supplementary_material": "/attachment/a2e5020bc0c9d0f9bdd1a491f7d2c0033028ddac.zip", "author": "Yushi Hu;Chunlei Zhang;Jiatong Shi;Jiachen Lian;Mari Ostendorf;Dong Yu", "authorids": "~Yushi_Hu1;~Chunlei_Zhang1;~Jiatong_Shi1;~Jiachen_Lian1;~Mari_Ostendorf1;~Dong_Yu2", "gender": "M;M;M;M;F;M", "homepage": "https://yushi-hu.github.io;;http://shijt.site;https://jlian2.github.io;https://people.ece.uw.edu/ostendorf/;https://sites.google.com/view/dongyu888/", "dblp": "268/5766;;229/3529.html;249/9914;85/2189;71/4598-1", "google_scholar": "mXN51X0AAAAJ;NCKZGb0AAAAJ;FEDNbgkAAAAJ;https://scholar.google.com/citations?view_op=list_works;exS-GecAAAAJ;tMY31_gAAAAJ", "orcid": ";;;;0000-0001-9385-9655;0000-0003-0520-6844", "linkedin": ";;jiatong-shi-608b3016b/;jiachenlian/en;mari-ostendorf-66820a1/;dongyu/", "or_profile": "~Yushi_Hu1;~Chunlei_Zhang1;~Jiatong_Shi1;~Jiachen_Lian1;~Mari_Ostendorf1;~Dong_Yu2", "aff": "University of Washington;Tencent AI Lab;Carnegie Mellon University;Electrical Engineering & Computer Science Department, University of California Berkeley;University of Washington;Tencent AI Lab", "aff_domain": "uw.edu;tencent.com;andrew.cmu.edu;eecs.berkeley.edu;u.washington.edu;tencent.com", "position": "PhD student;Researcher;PhD student;PhD student;Full Professor;Distinguished Scientist", "bibtex": "@misc{\nhu2023prosodybert,\ntitle={Prosody{BERT}: Self-Supervised Prosody Representation for Style-Controllable {TTS}},\nauthor={Yushi Hu and Chunlei Zhang and Jiatong Shi and Jiachen Lian and Mari Ostendorf and Dong Yu},\nyear={2023},\nurl={https://openreview.net/forum?id=7wk9PqiiW2D}\n}", "github": "", "project": "", "reviewers": "J32w;CWFe;925W;PaY1", "site": "https://openreview.net/forum?id=7wk9PqiiW2D", "pdf_size": 4318349, "recommendation": "3;5;5;8", "confidence": "5;4;4;4", "correctness": "2;3;3;4", "technical_novelty": "2;2;3;4", "empirical_novelty": "0;3;3;4", "wc_summary_paper": "56;80;106;56", "wc_strength_and_weaknesses": "31;300;368;53", "wc_clarity_quality_novelty_and_reproducibility": "269;85;315;96", "wc_summary_review": "26;58;62;91", "wc_review": "382;523;851;296", "wc_reply_reviewers": "204;0;155;0", "wc_reply_authors": "1516;548;913;99", "reply_reviewers": "1;0;1;0", "reply_authors": "3;1;2;1", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 1.5 ], "wc_summary_paper_avg": [ 74.5, 20.657928260113597 ], "wc_strength_and_weaknesses_avg": [ 188.0, 148.1705098864143 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 191.25, 102.1282894207085 ], "wc_summary_review_avg": [ 59.25, 23.036655573238058 ], "wc_review_avg": [ 513.0, 211.3019166974119 ], "wc_reply_reviewers_avg": [ 89.75, 91.40671474240828 ], "wc_reply_authors_avg": [ 769.0, 518.7692165115428 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.7276068751089989, "corr_recommendation_correctness": 0.9901475429766743, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12782719406193819464&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;0;1", "aff_unique_norm": "University of Washington;Tencent;Carnegie Mellon University;University of California, Berkeley", "aff_unique_dep": ";Tencent AI Lab;;Electrical Engineering & Computer Science Department", "aff_unique_url": "https://www.washington.edu;https://ai.tencent.com;https://www.cmu.edu;https://www.berkeley.edu", "aff_unique_abbr": "UW;Tencent AI Lab;CMU;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;0;0;0;1", "aff_country_unique": "United States;China" }, { "title": "First Steps Toward Understanding the Extrapolation of Nonlinear Models to Unseen Domains", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10920", "id": "7wrq3vHcMM", "poster": "/media/PosterPDFs/ICLR%202023/10920.png?t=1682014631.3467917", "openreview": "https://openreview.net/forum?id=7wrq3vHcMM", "slides": "https://iclr.cc/virtual/2023/poster/10920", "video": "https://iclr.cc/virtual/2023/poster/10920", "author_site": "Kefan Dong, Tengyu Ma", "tldr": "We prove the first result for understanding the extrapolation of nonlinear model class with structured domain shifts.", "abstract": "Real-world machine learning applications often involve deploying neural networks to domains that are not seen in the training time. Hence, we need to understand the extrapolation of \\textit{nonlinear} models---under what conditions on the distributions and function class, models can be guaranteed to extrapolate to new test distributions. The question is very challenging because even two-layer neural networks cannot be guaranteed to extrapolate outside the support of the training distribution without further assumptions on the domain shift. This paper makes some initial steps towards analyzing the extrapolation of nonlinear models for structured domain shift. We primarily consider settings where the \\textit{marginal} distribution of each coordinate of the data (or subset of coordinates) do not shift significantly across the training and test distributions, but the joint distribution may have a much bigger shift. We prove that the family of nonlinear models of the form $f(x)=\\sum f_i(x_i)$, where $f_i$ is an \\emph{arbitrary} function on the subset of features $x_i$, can extrapolate to unseen distributions, if the covariance of the features is well-conditioned. To the best of our knowledge, this is the first result that goes beyond linear models and the bounded density ratio assumption, even though the assumptions on the distribution shift and function class are stylized.", "keywords": "extrapolation of nonlinear models;theory;structured domain shift;gaussian kernel", "primary_area": "", "supplementary_material": "/attachment/bce566ca0905dff0b77093efa993e451340f7f85.zip", "author": "Kefan Dong;Tengyu Ma", "authorids": "~Kefan_Dong1;~Tengyu_Ma1", "gender": "M;M", "homepage": "https://kfdong.github.io/;http://ai.stanford.edu/~tengyuma/", "dblp": "234/8542;54/9061", "google_scholar": "XalUZEoAAAAJ;i38QlUwAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Kefan_Dong1;~Tengyu_Ma1", "aff": "Stanford University;Facebook AI Research", "aff_domain": "stanford.edu;fb.com", "position": "PhD student;Visiting Scientist", "bibtex": "@inproceedings{\ndong2023first,\ntitle={First Steps Toward Understanding the Extrapolation of Nonlinear Models to Unseen Domains},\nauthor={Kefan Dong and Tengyu Ma},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7wrq3vHcMM}\n}", "github": "", "project": "", "reviewers": "JEBk;1mvE;CdV3;fhp8", "pdf_size": 396106, "recommendation": "5;6;6;6", "confidence": "3;3;3;2", "correctness": "3;4;3;4", "technical_novelty": "4;4;2;2", "empirical_novelty": "0;0;3;0", "wc_summary_paper": "69;150;51;45", "wc_strength_and_weaknesses": "224;385;195;261", "wc_clarity_quality_novelty_and_reproducibility": "19;32;62;3", "wc_summary_review": "77;122;67;63", "wc_review": "389;689;375;372", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "343;260;521;319", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 1.0 ], "empirical_novelty_avg": [ 0.75, 1.299038105676658 ], "wc_summary_paper_avg": [ 78.75, 42.073596233267246 ], "wc_strength_and_weaknesses_avg": [ 266.25, 72.44092420724628 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.0, 21.644860821913362 ], "wc_summary_review_avg": [ 82.25, 23.509306667785847 ], "wc_review_avg": [ 456.25, 134.53136251447094 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 360.75, 97.32516375532074 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12721639972456273822&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=7wrq3vHcMM", "email": "stanford.edu;fb.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Stanford University;Meta", "aff_unique_dep": ";Facebook AI Research", "aff_unique_url": "https://www.stanford.edu;https://research.facebook.com", "aff_unique_abbr": "Stanford;FAIR", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "OTOv2: Automatic, Generic, User-Friendly", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12112", "id": "7ynoX1ojPMt", "poster": "", "openreview": "https://openreview.net/forum?id=7ynoX1ojPMt", "slides": "https://iclr.cc/virtual/2023/poster/12112", "video": "https://iclr.cc/virtual/2023/poster/12112", "author_site": "Tianyi Chen, Luming Liang, Tianyu Ding, Zhihui Zhu, Ilya Zharkov", "tldr": "", "abstract": "The existing model compression methods via structured pruning typically require complicated multi-stage procedures. Each individual stage necessitates numerous engineering efforts and domain-knowledge from the end-users which prevent their wider applications onto broader scenarios. We propose the second generation of Only-Train-Once (OTOv2), which first automatically trains and compresses a general DNN only once from scratch to produce a more compact model with competitive performance without fine-tuning. OTOv2 is automatic and pluggable into various deep learning applications, and requires almost minimal engineering efforts from the users. Methodologically, OTOv2 proposes two major improvements: (i) Autonomy: automatically exploits the dependency of general DNNs, partitions the trainable variables into Zero-Invariant Groups (ZIGs), and constructs the compressed model; and (ii) Dual Half-Space Projected Gradient (DHSPG): a novel optimizer to more reliably solve structured-sparsity problems. Numerically, we demonstrate the generality and autonomy of OTOv2 on a variety of model architectures such as VGG, ResNet, CARN, ConvNeXt, DenseNet and StackedUnets, the majority of which cannot be handled by other methods without extensive handcrafting efforts. Together with benchmark datasets including CIFAR10/100, DIV2K, Fashion-MNIST, SVNH and ImageNet, its effectiveness is validated by performing competitively or even better than the state-of-the-arts. The source code is available at https://github.com/tianyic/only_train_once.", "keywords": "Model Compression;One Shot;Automatic;Generic;User-Friendly", "primary_area": "", "supplementary_material": "", "author": "Tianyi Chen;Luming Liang;Tianyu DING;Zhihui Zhu;Ilya Zharkov", "authorids": "~Tianyi_Chen3;~Luming_Liang2;~Tianyu_DING2;~Zhihui_Zhu1;~Ilya_Zharkov1", "gender": "M;M;M;M;M", "homepage": ";;https://www.tianyuding.com;https://zhihuizhu.github.io/;", "dblp": ";46/6624;134/4796;71/8081;217/3421", "google_scholar": "2BahjdkAAAAJ;vTgdAS4AAAAJ;Qi7zTOcAAAAJ;gmSwszcAAAAJ;", "orcid": ";;0000-0001-8445-4330;;", "linkedin": "tianyi-chen-b65502b3/;luming-liang-76185b19/;tianyuding/;;", "or_profile": "~Tianyi_Chen3;~Luming_Liang2;~Tianyu_DING2;~Zhihui_Zhu1;~Ilya_Zharkov1", "aff": "Microsoft;Microsoft;;Ohio State University, Columbus;Microsoft", "aff_domain": "microsoft.com;microsoft.com;;osu.edu;microsoft.com", "position": "Senior Researcher;Principal Researcher;;Assistant Professor;Principal Research Manager", "bibtex": "@inproceedings{\nchen2023otov,\ntitle={{OTO}v2: Automatic, Generic, User-Friendly},\nauthor={Tianyi Chen and Luming Liang and Tianyu DING and Zhihui Zhu and Ilya Zharkov},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=7ynoX1ojPMt}\n}", "github": "", "project": "", "reviewers": "eYjt;iQEU;F5Jh", "pdf_size": 927580, "recommendation": "6;6;8", "confidence": "2;3;2", "correctness": "3;3;3", "technical_novelty": "2;3;4", "empirical_novelty": "3;4;3", "wc_summary_paper": "45;65;67", "wc_strength_and_weaknesses": "125;209;112", "wc_clarity_quality_novelty_and_reproducibility": "52;135;9", "wc_summary_review": "2;23;50", "wc_review": "224;432;238", "wc_reply_reviewers": "0;73;0", "wc_reply_authors": "587;244;351", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 59.0, 9.93310961716756 ], "wc_strength_and_weaknesses_avg": [ 148.66666666666666, 42.99095512107427 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.33333333333333, 52.29616514515083 ], "wc_summary_review_avg": [ 25.0, 19.6468827043885 ], "wc_review_avg": [ 298.0, 94.92453142716411 ], "wc_reply_reviewers_avg": [ 24.333333333333332, 34.41253001774532 ], "wc_reply_authors_avg": [ 394.0, 143.29224217195664 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.0, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10490559896952385613&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=7ynoX1ojPMt", "email": "microsoft.com;microsoft.com;;osu.edu;microsoft.com", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Microsoft;Ohio State University", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.osu.edu", "aff_unique_abbr": "Microsoft;OSU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Columbus", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "7zv_wSgP-LN", "title": "Walking the Tightrope: An Investigation of the Convolutional Autoencoder Bottleneck", "track": "main", "status": "Reject", "tldr": "We investigate the effect of feature map size vs. number of channels in the bottleneck of convolutional autoecoders and find that tuning the former is significantly more important than the latter.", "abstract": "In this paper, we present an in-depth investigation of the convolutional autoencoder (CAE) bottleneck.\nAutoencoders (AE), and especially their convolutional variants, play a vital role in the current deep learning toolbox.\nResearchers and practitioners employ CAEs for various tasks, ranging from outlier detection and compression to transfer and representation learning.\nDespite their widespread adoption, we have limited insight into how the bottleneck shape impacts the CAE's emergent properties.\nWe demonstrate that increased bottleneck area (i.e., height $\\times$ width) drastically improves generalization in terms of reconstruction error while also speeding up training.\nThe number of channels in the bottleneck, on the other hand, is of secondary importance.\nFurthermore, we show empirically that CAEs do not learn to copy their input, even when all layers have the same number of neurons as there are pixels in the input (i.e. there is no bottleneck).\nBesides raising important questions for further research, our findings are directly applicable to two of the most common use-cases for CAEs:\nIn image compression, it is advantageous to increase the feature map size in the bottleneck as this greatly improves reconstruction quality.\nFor reconstruction-based outlier detection, we recommend decreasing the feature map size so that out-of-distribution samples will yield a higher reconstruction error.", "keywords": "autoencoders;unsupervised learning;representation learning;investigation", "primary_area": "", "supplementary_material": "/attachment/30876c1f5b9711897a48686341cc4429f7e52267.zip", "author": "Ilja Manakov;Markus Rohm;Volker Tresp", "authorids": "~Ilja_Manakov1;~Markus_Rohm1;~Volker_Tresp1", "gender": "M;;M", "homepage": ";;https://www.dbs.ifi.lmu.de/~tresp/", "dblp": ";;t/VolkerTresp", "google_scholar": "cbLe6t0AAAAJ;;xIJHTUwAAAAJ", "orcid": ";;0000-0001-9428-3686", "linkedin": "imanakov/;;volker-tresp-8110a118/", "or_profile": "~Ilja_Manakov1;~Markus_Rohm1;~Volker_Tresp1", "aff": "Institute of Computer Science;Institut f\u00fcr Informatik;Siemens Corporate Research", "aff_domain": "dbs.ifi.lmu.de;lmu.de;siemens.com", "position": "PhD student;PhD student;Principal Researcher", "bibtex": "@misc{\nmanakov2023walking,\ntitle={Walking the Tightrope: An Investigation of the Convolutional Autoencoder Bottleneck},\nauthor={Ilja Manakov and Markus Rohm and Volker Tresp},\nyear={2023},\nurl={https://openreview.net/forum?id=7zv_wSgP-LN}\n}", "github": "", "project": "", "reviewers": "AyQZ;fPDr;VXUw;7HZy", "site": "https://openreview.net/forum?id=7zv_wSgP-LN", "pdf_size": 7673979, "recommendation": "3;5;5;6", "confidence": "3;4;3;3", "correctness": "2;3;4;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "192;98;68;61", "wc_strength_and_weaknesses": "159;398;118;113", "wc_clarity_quality_novelty_and_reproducibility": "17;54;51;18", "wc_summary_review": "38;89;32;19", "wc_review": "406;639;269;211", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 104.75, 52.25598051897983 ], "wc_strength_and_weaknesses_avg": [ 197.0, 117.4116689260484 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.0, 17.53567791675018 ], "wc_summary_review_avg": [ 44.5, 26.5941722939444 ], "wc_review_avg": [ 381.25, 164.7974135112563 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.13245323570650439, "corr_recommendation_correctness": 0.6488856845230502, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3185020448338485632&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Institute of Computer Science;Institut f\u00fcr Informatik;Siemens AG", "aff_unique_dep": "Computer Science;Department of Computer Science;Corporate Research", "aff_unique_url": ";;https://www.siemens.com/research", "aff_unique_abbr": ";;Siemens", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1", "aff_country_unique": ";Germany" }, { "id": "7zxPlqOT5us", "title": "Learning Critically in Federated Learning with Noisy and Heterogeneous Clients", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Federated learning (FL) is a distributed learning framework for collaboratively training models with privacy guarantee. Class imbalance problem is a main problem in FL with heterogeneous clients. Besides, Label noise is also an inherent problem in scenarios since clients have varied expertise in annotations. However, the co-existence of heterogeneous label noise and class-imbalance distribution in FL\u2019s small local datasets renders conventional label-noise learning methods ineffective. Thus, in this paper, we propose algorithm FedCNI, including a noise-resilience local solver and a robust global aggregator, to address the challenges of noisy and highly-skewed data in FL without using an additional clean proxy dataset. For the local solver, we first design a prototypical classifier to detect the noisy samples by evaluating the similarity between samples and prototypes. Then, we introduce a curriculum pseudo labeling method with thresholds for different classes cautiously from the noisy samples. For the global aggregator, We aggregate critically by switching re-weighted aggregation from data-size to noise level in different learning periods. Experiments on real-world datasets demonstrate that our method can substantially outperform state-of-the-art solutions and is robust in mix-heterogeneous FL environments.", "keywords": "Federated learning;Noisy labels;Class imbalance", "primary_area": "", "supplementary_material": "", "author": "Chenrui Wu;Zexi Li;Fangxin Wang;Chao Wu", "authorids": "~Chenrui_Wu2;~Zexi_Li1;~Fangxin_Wang1;~Chao_Wu1", "gender": ";M;M;M", "homepage": ";https://zexilee.github.io/about-zexili/;https://mypage.cuhk.edu.cn/academics/wangfangxin/;", "dblp": ";151/9187-1;142/0351;45/3158-1", "google_scholar": ";https://scholar.google.com.hk/citations?user=6lMg5eoAAAAJ;;gpTPt58AAAAJ", "orcid": ";0000-0003-0831-3549;;0000-0003-0885-6869", "linkedin": ";;;", "or_profile": "~Chenrui_Wu2;~Zexi_Li1;~Fangxin_Wang1;~Chao_Wu1", "aff": ";Zhejiang University;The Chinese University of Hong Kong, Shenzhen;Zhejiang University", "aff_domain": ";zju.edu.cn;cuhk.edu.cn;zju.edu.cn", "position": ";PhD student;Assistant Professor;Associate Professor", "bibtex": "@misc{\nwu2023learning,\ntitle={Learning Critically in Federated Learning with Noisy and Heterogeneous Clients},\nauthor={Chenrui Wu and Zexi Li and Fangxin Wang and Chao Wu},\nyear={2023},\nurl={https://openreview.net/forum?id=7zxPlqOT5us}\n}", "github": "", "project": "", "reviewers": "WMvh;ujZf;ojgr;Lk77", "site": "https://openreview.net/forum?id=7zxPlqOT5us", "pdf_size": 792107, "recommendation": "3;3;5;6", "confidence": "4;3;4;3", "correctness": "2;2;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "0;2;3;3", "wc_summary_paper": "71;50;101;69", "wc_strength_and_weaknesses": "251;300;300;216", "wc_clarity_quality_novelty_and_reproducibility": "59;22;46;83", "wc_summary_review": "47;18;27;36", "wc_review": "428;390;474;404", "wc_reply_reviewers": "0;168;44;45", "wc_reply_authors": "1473;1396;1780;756", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;3;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 72.75, 18.25342433627181 ], "wc_strength_and_weaknesses_avg": [ 266.75, 35.47798613224826 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 52.5, 22.051077071199945 ], "wc_summary_review_avg": [ 32.0, 10.747092630102339 ], "wc_review_avg": [ 424.0, 31.906112267087632 ], "wc_reply_reviewers_avg": [ 64.25, 62.595427149273455 ], "wc_reply_authors_avg": [ 1351.25, 372.4831372022095 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.19245008972987526, "corr_recommendation_correctness": 0.9622504486493761, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2V8mYqeckUEJ:scholar.google.com/&scioq=Learning+Critically+in+Federated+Learning+with+Noisy+and+Heterogeneous+Clients&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Zhejiang University;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.cuhk.edu.cn", "aff_unique_abbr": "ZJU;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "8-2sjUPp_YD", "title": "ADVL: Adaptive Distillation for Vision-Language Tasks", "track": "main", "status": "Reject", "tldr": "Leveraging Pretrained Unimodal Encoders for Vision-Language Tasks via Adaptive Knowledge Distillation", "abstract": "Large-scale image-text pairs, such as image-captions and image-phrases, enable the strong representation of vision-language (VL) models. Nevertheless, they lose diversity and complexity due to the constraints in collecting data. Meanwhile, models pre-trained with image-only or text-only data (we call them unimodal pretrained models) continue to flourish and impress the community. Compared to image-text pairs, unimodal data has less constraints during the collection process resulting in more diverse styles. A natural question is how to leverage unimodal pretrained models to benefit downstream VL tasks? Most existing works focus on fusing VL information in the expensive pre-training stage. They directly plug in unimodal pre-trained encoders into a VL framework and redo an additional pre-training step on paired image-text data. This causes additional computation expense and the unimodal pretrained knowledge might be forgotten. In this paper, we take a different route and investigate how to fuse VL information in the finetuning stage oaly. To directly transfer pretrained knowledge from unimodal models to belp downstream VL tasks, we propose $\\mathrm{ADVL}$, which avoids redoing any pre-training step and is generalizable to be applied of top of various VL models. To comprehensively demonstrate the effectiveness of ADVL, we conduct evaluation across three mostly recognized highly semantic VL benchmarks: VCR, VQA, and SNLI-VE under three settings, low-shot, full-shot and domainshifted settings. Results show that ADVL consistently improves the performance with different VL base models across all settings. It even achieves state-of-theart (SOTA) performance on VCR among models pre-trained with image-text data and delivers competitive results on VQA and SNLI-VE, Based on our analysis, we also discover that ADVL can improve the robustness of VL models and regulate them to better use vision information.", "keywords": "vision language;kowledge distillation;vcr;vqa;snli-ve;visual question answering;commonsense reasoning;pretraining;multimodal;robust;low-shot;zero-shot;domain-shift;debiased", "primary_area": "", "supplementary_material": "", "author": "Zhecan Wang;Noel C Codella;Haoxuan You;Long Chen;Yen-Chun Chen;Yulei Niu;Jianwei Yang;Luowei Zhou;Lu Yuan;Kai-Wei Chang;Shih-Fu Chang", "authorids": "~Zhecan_Wang2;~Noel_C_Codella1;~Haoxuan_You1;~Long_Chen8;~Yen-Chun_Chen1;~Yulei_Niu1;~Jianwei_Yang1;~Luowei_Zhou1;~Lu_Yuan1;~Kai-Wei_Chang1;~Shih-Fu_Chang3", "gender": "M;M;M;M;M;M;;M;M;M;M", "homepage": "https://www.zhecanwang.com/;http://www.noelcodella.com/;https://hxyou.github.io/;https://zjuchenlong.github.io/;;https://yuleiniu.github.io;https://luoweizhou.github.io;https://www.microsoft.com/en-us/research/people/luyuan/;http://kwchang.net;http://www.ee.columbia.edu/~sfchang/;https://jwyang.github.io/", "dblp": "167/4251;;210/2628;64/5725-16;160/0623-1;165/2982;122/7357;;18/2428;c/ShihFuChang;", "google_scholar": "uqHPnmgAAAAJ;8BnjC-4AAAAJ;BhysChMAAAAJ;https://scholar.google.com.sg/citations?user=-gtmMpIAAAAJ;Gptgy4YAAAAJ;WXd3dDwAAAAJ;M-3cIR0AAAAJ;k9TsUVsAAAAJ;fqDBtzYAAAAJ;OMVTRscAAAAJ;Cl9byD8AAAAJ", "orcid": "0009-0003-7785-4637;;;0000-0001-6148-9709;;;;;0000-0001-5365-0072;;", "linkedin": "jameszhecanwang/;noel-c-f-codella-ph-d-1b1b1723/;;;;;;;kai-wei-chang-41239040;;", "or_profile": "~Zhecan_Wang2;~Noel_C_Codella1;~Haoxuan_You1;~Long_Chen8;~Yen-Chun_Chen1;~Yulei_Niu1;~Luowei_Zhou1;~Lu_Yuan1;~Kai-Wei_Chang1;~Shih-Fu_Chang3;~Jianwei_Yang2", "aff": "Columbia University;Microsoft;Columbia University;Columbia University;Microsoft;Columbia University;Google;Microsoft;Amazon;Columbia University;Microsoft", "aff_domain": "columbia.edu;microsoft.com;columbia.edu;columbia.edu;microsoft.com;columbia.edu;google.com;microsoft.com;amazon.com;ee.columbia.edu;microsoft.com", "position": "PhD student;Principal Researcher;PhD student;Postdoc;Researcher;Postdoc;Research Scientist;Principal Research Manager;Researcher;Full Professor;Researcher", "bibtex": "@misc{\nwang2023advl,\ntitle={{ADVL}: Adaptive Distillation for Vision-Language Tasks},\nauthor={Zhecan Wang and Noel C Codella and Haoxuan You and Long Chen and Yen-Chun Chen and Yulei Niu and Jianwei Yang and Luowei Zhou and Lu Yuan and Kai-Wei Chang and Shih-Fu Chang},\nyear={2023},\nurl={https://openreview.net/forum?id=8-2sjUPp_YD}\n}", "github": "", "project": "", "reviewers": "LFJd;JhCz;chYB", "site": "https://openreview.net/forum?id=8-2sjUPp_YD", "pdf_size": 5509510, "recommendation": "3;3;3", "confidence": "5;4;4", "correctness": "3;3;2", "technical_novelty": "2;3;1", "empirical_novelty": "3;2;0", "wc_summary_paper": "67;134;179", "wc_strength_and_weaknesses": "216;255;458", "wc_clarity_quality_novelty_and_reproducibility": "19;53;34", "wc_summary_review": "74;23;61", "wc_review": "376;465;732", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 126.66666666666667, 46.0169051062276 ], "wc_strength_and_weaknesses_avg": [ 309.6666666666667, 106.08906111795358 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.333333333333336, 13.912424503139471 ], "wc_summary_review_avg": [ 52.666666666666664, 21.638443156156644 ], "wc_review_avg": [ 524.3333333333334, 151.27091223658596 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 11, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AQPMvBEA3iIJ:scholar.google.com/&scioq=ADVL:+Adaptive+Distillation+for+Vision-Language+Tasks&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;1;0;2;1;3;0;1", "aff_unique_norm": "Columbia University;Microsoft;Google;Amazon", "aff_unique_dep": ";Microsoft Corporation;Google;Amazon.com, Inc.", "aff_unique_url": "https://www.columbia.edu;https://www.microsoft.com;https://www.google.com;https://www.amazon.com", "aff_unique_abbr": "Columbia;Microsoft;Google;Amazon", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "8-aqFHleFyC", "title": "On $\\mathcal{O}(1/K)$ Convergence and Low Sample Complexity for Single-Timescale Policy Evaluation with Nonlinear Function Approximation", "track": "main", "status": "Reject", "tldr": "", "abstract": "\tLearning an accurate value function for a given policy is a critical step in solving reinforcement learning (RL) problems. So far, however, the convergence speed and sample complexity performances of most existing policy evaluation algorithms remain unsatisfactory, particularly with {\\em non-linear} function approximation. This challenge motivates us to develop a new variance-reduced primal-dual method (VRPD) that is able to achieve a fast convergence speed for RL policy evaluation with nonlinear function approximation. To lower the high sample complexity limitation of variance-reduced approaches (due to the periodic full gradient evaluation with all training data), we further propose an enhanced VRPD method with an adaptive-batch adjustment (VRPD$^+$). The main features of VRPD include: i) VRPD allows the use of {\\em{constant}} step sizes and achieves the $\\mathcal{O}(1/K)$ convergence rate to the first-order stationary points of non-convex policy evaluation problems; ii) VRPD is a generic {\\em{single}}-timescale algorithm that is also applicable for solving a large class of non-convex strongly-concave minimax optimization problems; iii) By adaptively adjusting the batch size via historical stochastic gradient information, VRPD$^+$ is more sample-efficient in practice without loss of theoretical convergence rate. Our extensive numerical experiments verify our theoretical findings and showcase the high efficiency of the proposed VRPD and VRPD$^+$ algorithms compared with the state-of-the-art methods.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/b1efcee2fe438d9880da1e0afe7ef744a02f83bf.zip", "author": "Zhuqing Liu;Xin Zhang;Jia Liu;Zhengyuan Zhu;Songtao Lu", "authorids": "~Zhuqing_Liu2;~Xin_Zhang16;~Jia_Liu1;~Zhengyuan_Zhu1;~Songtao_Lu1", "gender": "F;M;M;M;M", "homepage": "https://github.com/Zhuqing-Liu;https://xinzhang-nac.github.io/;https://kevinliu-osu.github.io/index.html;;https://songtaogithub.github.io/", "dblp": "195/1161;76/1584-54.html;;68/151;05/2887", "google_scholar": ";9u5Pa0gAAAAJ;Ofx3dScAAAAJ;ixDds0sAAAAJ;LRsjX7kAAAAJ", "orcid": "0000-0003-0146-5101;0000-0002-0784-2038;;;", "linkedin": ";;;;", "or_profile": "~Zhuqing_Liu2;~Xin_Zhang16;~Jia_Liu1;~Zhengyuan_Zhu1;~Songtao_Lu1", "aff": "Ohio State University;Meta Facebook;The Ohio State University;Iowa State University;IBM Thomas J. Watson Research Center", "aff_domain": "osu.edu;fb.com;osu.edu;iastate.edu;ibm.com", "position": "PhD student;Research Scientist;Assistant Professor;Full Professor;Researcher", "bibtex": "@misc{\nliu2023on,\ntitle={On \\${\\textbackslash}mathcal\\{O\\}(1/K)\\$ Convergence and Low Sample Complexity for Single-Timescale Policy Evaluation with Nonlinear Function Approximation},\nauthor={Zhuqing Liu and Xin Zhang and Jia Liu and Zhengyuan Zhu and Songtao Lu},\nyear={2023},\nurl={https://openreview.net/forum?id=8-aqFHleFyC}\n}", "github": "", "project": "", "reviewers": "ZMya;Mbzc;iPgL;vYTZ", "site": "https://openreview.net/forum?id=8-aqFHleFyC", "pdf_size": 481250, "recommendation": "3;5;6;6", "confidence": "4;4;2;3", "correctness": "2;3;4;3", "technical_novelty": "1;2;3;2", "empirical_novelty": "2;3;0;2", "wc_summary_paper": "43;126;39;59", "wc_strength_and_weaknesses": "186;369;164;265", "wc_clarity_quality_novelty_and_reproducibility": "17;46;104;89", "wc_summary_review": "52;116;23;57", "wc_review": "298;657;330;470", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1514;2612;1099;1254", "reply_reviewers": "0;0;0;0", "reply_authors": "3;5;2;2", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 66.75, 35.01696017646306 ], "wc_strength_and_weaknesses_avg": [ 246.0, 80.33367911405527 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.0, 34.489128721961066 ], "wc_summary_review_avg": [ 62.0, 33.771289581536564 ], "wc_review_avg": [ 438.75, 141.6392865697932 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1619.75, 591.7551753047877 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.7385489458759963, "corr_recommendation_correctness": 0.8660254037844386, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cBErJKYm-rUJ:scholar.google.com/&scioq=On+%24%5Cmathcal%7BO%7D(1/K)%24+Convergence+and+Low+Sample+Complexity+for+Single-Timescale+Policy+Evaluation+with+Nonlinear+Function+Approximation&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "Ohio State University;Meta;Iowa State University;IBM", "aff_unique_dep": ";Meta Platforms, Inc.;;Research", "aff_unique_url": "https://www.osu.edu;https://meta.com;https://www.iastate.edu;https://www.ibm.com/research", "aff_unique_abbr": "OSU;Meta;ISU;IBM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Yorktown Heights", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Markup-to-Image Diffusion Models with Scheduled Sampling", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11768", "id": "81VJDmOE2ol", "poster": "/media/PosterPDFs/ICLR%202023/11768.png?t=1682956354.0782828", "openreview": "https://openreview.net/forum?id=81VJDmOE2ol", "slides": "https://iclr.cc/virtual/2023/poster/11768", "video": "https://iclr.cc/virtual/2023/poster/11768", "author_site": "Yuntian Deng, Noriyuki Kojima, Alexander M Rush", "tldr": "", "abstract": "Building on recent advances in image generation, we present a fully data-driven approach to rendering markup into images. The approach is based on diffusion models, which parameterize the distribution of data using a sequence of denoising operations on top of a Gaussian noise distribution. We view the diffusion denoising process a sequential decision making process, and show that it exhibits compounding errors similar to exposure bias issues in imitation learning problems. To mitigate these issues, we adapt the scheduled sampling algorithm to diffusion training. We conduct experiments on four markup datasets: formulas (LaTeX), table layouts (HTML), sheet music (LilyPond), and molecular images (SMILES). These experiments each verify the effectiveness of diffusion and the use of scheduled sampling to fix generation issues. These results also show that the markup-to-image task presents a useful controlled compositional setting for diagnosing and analyzing generative image models.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yuntian Deng;Noriyuki Kojima;Alexander M Rush", "authorids": "~Yuntian_Deng2;~Noriyuki_Kojima1;~Alexander_M_Rush1", "gender": ";;M", "homepage": "https://yuntiandeng.com;https://kojimano.github.io/;http://rush.seas.harvard.edu/", "dblp": "166/1720;185/7884;http://dblp.uni-trier.de/pers/hd/r/Rush:Alexander_M=", "google_scholar": "tk0e5lYAAAAJ;Dgu63dgAAAAJ;LIjnUGgAAAAJ", "orcid": ";;0000-0002-9900-1606", "linkedin": ";;sasha-rush-a69b6917/", "or_profile": "~Yuntian_Deng2;~Noriyuki_Kojima1;~Alexander_M_Rush1", "aff": "Harvard University;Cornell University;School of Engineering and Applied Sciences, Harvard University", "aff_domain": "harvard.edu;cornell.edu;seas.harvard.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ndeng2023markuptoimage,\ntitle={Markup-to-Image Diffusion Models with Scheduled Sampling},\nauthor={Yuntian Deng and Noriyuki Kojima and Alexander M Rush},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=81VJDmOE2ol}\n}", "github": "", "project": "", "reviewers": "NDpz;LE3F;aUAh;uJv3", "pdf_size": 2104966, "recommendation": "3;6;6;8", "confidence": "3;4;4;4", "correctness": "3;3;4;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;4;3;3", "wc_summary_paper": "61;69;34;132", "wc_strength_and_weaknesses": "190;246;146;236", "wc_clarity_quality_novelty_and_reproducibility": "16;53;78;206", "wc_summary_review": "41;66;82;53", "wc_review": "308;434;340;627", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "370;317;326;464", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 74.0, 35.90960874195095 ], "wc_strength_and_weaknesses_avg": [ 204.5, 39.834030677299026 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 88.25, 71.47158526295607 ], "wc_summary_review_avg": [ 60.5, 15.239750654128171 ], "wc_review_avg": [ 427.25, 124.27665710019723 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 369.25, 58.26394682820586 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.8892972917998875, "corr_recommendation_correctness": 0.08084520834544431, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18381331475638575973&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=81VJDmOE2ol", "email": "harvard.edu;cornell.edu;seas.harvard.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Harvard University;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.cornell.edu", "aff_unique_abbr": "Harvard;Cornell", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "83piwkGNzOP", "title": "A unified optimization framework of ANN-SNN Conversion: towards optimal mapping from activation values to firing rates", "track": "main", "status": "Reject", "tldr": "", "abstract": "Spiking Neural Networks (SNNs) have attracted great attention as a primary candidate for running large-scale deep artificial neural networks (ANNs) in real-time due to their distinctive properties of energy-efficient and event-driven fast computation. Training an SNN directly from scratch is usually difficult because of the discreteness of spikes. Converting an ANN to an SNN, i.e., ANN-SNN conversion, is an alternative method to obtain deep SNNs.\nThe performance of the converted SNN is determined by both the ANN performance and the conversion error. The existing ANN-SNN conversion methods usually redesign the ANN with a new activation function instead of the regular ReLU, train the tailored ANN and convert it to an SNN. The performance loss between the regular ANN with ReLU and the tailored ANN has never been considered, which will be inherited to the converted SNN. \nIn this work, we formulate the ANN-SNN conversion as a unified optimization problem which considers the performance loss between the regular ANN and the tailored ANN, as well as the conversion error simultaneously. Following the unified optimization framework, we propose the SlipReLU activation function to replace the regular ReLU activation function in the tailored ANN. The SlipReLU is a weighted sum of the threhold-ReLU and the step function, which improves the performance of either as an activation function alone.\nThe SlipReLU method covers a family of activation functions mapping from activation values in source ANNs to firing rates in target SNNs; most of the state-of-the-art optimal ANN-SNN conversion methods are special cases of our proposed SlipReLU method. We demonstrate through two theorems that the expected conversion error between SNNs and ANNs can theoretically be zero on a range of shift values $\\delta \\in [-\\frac{1}{2},\\frac{1}{2}]$ rather than a fixed shift term $\\frac{1}{2}$, enabling us to achieve converted SNNs with high accuracy and ultra-low latency. We evaluate our proposed SlipReLU method on CIFAR-10 dataset, and the results show that the SlipReLU outperforms the state-of-the-art ANN-SNN conversion in both accuracy and latency. To our knowledge, this is the first work to explore high-performance ANN-SNN conversion method considering the ANN performance and the conversion error simultaneously.", "keywords": "ANN-SNN conversion", "primary_area": "", "supplementary_material": "/attachment/1e9c0be9257ff56aebced1301bbb0612f0bc63fb.zip", "author": "Haiyan Jiang;Srinivas Anumasa;Giulia De Masi;Huan Xiong;Bin Gu", "authorids": "~Haiyan_Jiang1;~Srinivas_Anumasa1;giulia.demasi@tii.ae;~Huan_Xiong1;~Bin_Gu1", "gender": "F;M;;M;M", "homepage": ";https://sites.google.com/view/brainiith/people?authuser=0#h.p_TmRjBsUKAuEP;;https://scholar.google.com/citations?user=l4hm14MAAAAJ&hl=en;https://mbzuai.ac.ae/study/faculty/bin-gu/", "dblp": ";256/7962;;;29/1758-1", "google_scholar": "vpHnhJsAAAAJ;OjAbXBAAAAAJ;;l4hm14MAAAAJ;Vo8OgCgAAAAJ", "orcid": "0000-0002-4099-480X;;;;0000-0001-6049-1815", "linkedin": ";;;;", "or_profile": "~Haiyan_Jiang1;~Srinivas_Anumasa1;giulia.demasi@tii.ae;~Huan_Xiong1;~Bin_Gu1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;;;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "mbzuai.ac.ae;mbzuai.ac.ae;;;mbzuai.ac.ae", "position": "Researcher;Postdoc;;;Assistant Professor", "bibtex": "@misc{\njiang2023a,\ntitle={A unified optimization framework of {ANN}-{SNN} Conversion: towards optimal mapping from activation values to firing rates},\nauthor={Haiyan Jiang and Srinivas Anumasa and Giulia De Masi and Huan Xiong and Bin Gu},\nyear={2023},\nurl={https://openreview.net/forum?id=83piwkGNzOP}\n}", "github": "", "project": "", "reviewers": "FzYt;rSjR;FPAL;7AYG", "site": "https://openreview.net/forum?id=83piwkGNzOP", "pdf_size": 739495, "recommendation": "1;6;8;8", "confidence": "4;5;4;5", "correctness": "2;3;3;4", "technical_novelty": "1;3;4;3", "empirical_novelty": "1;2;4;3", "wc_summary_paper": "9;69;38;69", "wc_strength_and_weaknesses": "113;230;109;165", "wc_clarity_quality_novelty_and_reproducibility": "4;37;8;8", "wc_summary_review": "18;26;8;45", "wc_review": "144;362;163;287", "wc_reply_reviewers": "296;167;13;39", "wc_reply_authors": "2376;1504;289;514", "reply_reviewers": "1;2;1;1", "reply_authors": "5;3;2;2", "recommendation_avg": [ 5.75, 2.8613807855648994 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 1.0897247358851685 ], "empirical_novelty_avg": [ 2.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 46.25, 24.953707139421187 ], "wc_strength_and_weaknesses_avg": [ 154.25, 48.99681112072499 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 14.25, 13.235841491949047 ], "wc_summary_review_avg": [ 24.25, 13.571569548139964 ], "wc_review_avg": [ 239.0, 89.76914837515169 ], "wc_reply_reviewers_avg": [ 128.75, 112.79267485080757 ], "wc_reply_authors_avg": [ 1170.75, 832.5393008741389 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.4368520283305189, "corr_recommendation_correctness": 0.8649228885013015, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17630055219270755265&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": "", "aff_unique_url": "https://mbzuai.ac.ae", "aff_unique_abbr": "MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Arab Emirates" }, { "id": "83xscrmnw6Q", "title": "Knowledge-Driven New Drug Recommendation", "track": "main", "status": "Reject", "tldr": "recommendation for new drugs with limited historical prescription data", "abstract": "Drug recommendation assists doctors in prescribing personalized medications to patients based on their health conditions. Existing drug recommendation solutions adopt the supervised multi-label classification setup and only work with existing drugs with sufficient prescription data from many patients. However, newly approved drugs do not have much historical prescription data and cannot leverage existing drug recommendation methods. To address this, we formulate the new drug recommendation as a few-shot learning problem. Yet, directly applying existing few-shot learning algorithms faces two challenges: (1) complex relations among diseases and drugs and (2) numerous false-negative patients who were eligible but did not yet use the new drugs. To tackle these challenges, we propose EDGE, which can quickly adapt to the recommendation for a new drug with limited prescription data from a few support patients. EDGE maintains a drug-dependent multi-phenotype few-shot learner to bridge the gap between existing and new drugs. Specifically, EDGE leverages the drug ontology to link new drugs to existing drugs with similar treatment effects and learns ontology-based drug representations. Such drug representations are used to customize the metric space of the phenotype-driven patient representations, which are composed of a set of phenotypes capturing complex patient health status. Lastly, EDGE eliminates the false-negative supervision signal using an external drug-disease knowledge base. We evaluate EDGE on two real-world datasets: the public EHR data (MIMIC-IV) and private industrial claims data. Results show that EDGE achieves 7.3% improvement on the ROC-AUC score over the best baseline.", "keywords": "drug recommendation;medication recommendation;healthcare;electronic health record;few-shot learning", "primary_area": "", "supplementary_material": "/attachment/039645ac62235142d803f94ace79bbfeff58cd96.zip", "author": "Zhenbang Wu;Huaxiu Yao;Zhe Su;David M Liebovitz;Lucas M Glass;James Zou;Chelsea Finn;Jimeng Sun", "authorids": "~Zhenbang_Wu1;~Huaxiu_Yao1;~Zhe_Su2;david.liebovitz@nm.org;~Lucas_M_Glass2;~James_Zou1;~Chelsea_Finn1;~Jimeng_Sun3", "gender": "M;M;M;;;;F;", "homepage": ";http://huaxiuyao.mystrikingly.com;;;https://www.linkedin.com/in/lucas-glass-76207b45/;;https://ai.stanford.edu/~cbfinn/;http://sunlab.org", "dblp": "315/0212;197/1635;;;;;131/1783;", "google_scholar": "N8p-spIAAAAJ;A20BZnQAAAAJ;zvcvNE0AAAAJ;;XZ7SbIUAAAAJ;23ZXZvEAAAAJ;vfPE6hgAAAAJ;9jmmp5sAAAAJ", "orcid": ";;;;;;;0000-0003-1512-6426", "linkedin": ";huaxiuyao/;zhe-su-b134b823a/;;;;;jimengsun/", "or_profile": "~Zhenbang_Wu1;~Huaxiu_Yao1;~Zhe_Su2;david.liebovitz@nm.org;~Lucas_M_Glass2;~James_Zou1;~Chelsea_Finn1;~Jimeng_Sun3", "aff": "University of Illinois Urbana Champaign;Computer Science Department, Stanford University;Zhejiang University;;;Stanford University;Google;Georgia Institute of Technology", "aff_domain": "illinois.edu;cs.stanford.edu;zju.edu.cn;;;stanford.edu;google.com;gatech.edu", "position": "PhD student;Postdoc;Undergrad student;;;Assistant Professor;Research Scientist;Associate Professor", "bibtex": "@misc{\nwu2023knowledgedriven,\ntitle={Knowledge-Driven New Drug Recommendation},\nauthor={Zhenbang Wu and Huaxiu Yao and Zhe Su and David M Liebovitz and Lucas M Glass and James Zou and Chelsea Finn and Jimeng Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=83xscrmnw6Q}\n}", "github": "", "project": "", "reviewers": "GvBg;YURq;54ZT;usk8", "site": "https://openreview.net/forum?id=83xscrmnw6Q", "pdf_size": 748054, "recommendation": "3;3;5;5", "confidence": "4;4;4;4", "correctness": "3;4;2;3", "technical_novelty": "2;1;2;3", "empirical_novelty": "3;0;2;2", "wc_summary_paper": "74;16;110;55", "wc_strength_and_weaknesses": "193;46;264;132", "wc_clarity_quality_novelty_and_reproducibility": "185;28;67;51", "wc_summary_review": "16;45;80;11", "wc_review": "468;135;521;249", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 63.75, 33.914414339628514 ], "wc_strength_and_weaknesses_avg": [ 158.75, 80.12295239193324 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 82.75, 60.63981777677107 ], "wc_summary_review_avg": [ 38.0, 27.504545078950134 ], "wc_review_avg": [ 343.25, 157.64576588034328 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.7071067811865475, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6099476625693337510&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;1;3;4", "aff_unique_norm": "University of Illinois Urbana-Champaign;Stanford University;Zhejiang University;Google;Georgia Institute of Technology", "aff_unique_dep": ";Computer Science Department;;Google;", "aff_unique_url": "https://illinois.edu;https://www.stanford.edu;https://www.zju.edu.cn;https://www.google.com;https://www.gatech.edu", "aff_unique_abbr": "UIUC;Stanford;ZJU;Google;Georgia Tech", "aff_campus_unique_index": "0;1;1;3", "aff_campus_unique": "Urbana-Champaign;Stanford;;Mountain View", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "id": "86_enbV-pNB", "title": "Semi-Supervised Single Domain Generalization with Label-Free Adversarial Data Augmentation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Domain generalization (DG) has attracted increasing attention recently, as it seeks to improve the generalization ability of visual recognition models to unseen target domains. DG leverages multiple source domains for model training, while single domain generalization (SDG) further restricts such setting by exploiting only a single source domain. Nevertheless, both DG and SDG assume that the source domains are fully labeled, which might not be practical in many real world scenarios. In this paper, we present a new problem, i.e., semi-supervised single domain generalization (SS-SDG), which aims to train a model with a partially labeled single source domain to generalize to multiple unseen testing domains. We propose an effective framework to address this problem. In particular, we design a label-free adversarial data augmentation strategy to diversify the source domain, and propose a novel multi-pair FixMatch loss to generalize classifiers to unseen testing domains. Extensive experiments on OfficeHome, PACS and DomainNet20 datasets show that our method surpasses the latest SDG and semi-supervised methods. Moreover, on PACS and DomainNet20, our method approaches the fully supervised ERM upper bound within $5\\%$ gap, but only uses less than $8\\%$ of the labels.", "keywords": "Representation Learning;Domain Generalization", "primary_area": "", "supplementary_material": "", "author": "Ronghang Zhu;Ronghang Zhu;Xiang Yu;Sheng Li", "authorids": "~Ronghang_Zhu2;ronghangzhu@foxmail.com;~Xiang_Yu1;~Sheng_Li3", "gender": ";;M;M", "homepage": ";;https://sites.google.com/site/xiangyurutgers/;http://sheng-li.org", "dblp": ";;19/2453-2.html;23/3439-1", "google_scholar": ";;QJbtEKMAAAAJ;DEncVcYAAAAJ", "orcid": ";;;0000-0003-1205-8632", "linkedin": ";;;sheng-li-15a70022/", "or_profile": "~Ronghang_Zhu2;ronghangzhu@foxmail.com;~Xiang_Yu1;~Sheng_Li3", "aff": ";;Amazon;University of Virginia, Charlottesville", "aff_domain": ";;amazon.com;virginia.edu", "position": ";;Researcher;Assistant Professor", "bibtex": "@misc{\nzhu2023semisupervised,\ntitle={Semi-Supervised Single Domain Generalization with Label-Free Adversarial Data Augmentation},\nauthor={Ronghang Zhu and Ronghang Zhu and Xiang Yu and Sheng Li},\nyear={2023},\nurl={https://openreview.net/forum?id=86_enbV-pNB}\n}", "github": "", "project": "", "reviewers": "Unh3;ELk2;j45M;JqBr", "site": "https://openreview.net/forum?id=86_enbV-pNB", "pdf_size": 867917, "recommendation": "5;5;5;5", "confidence": "5;5;4;4", "correctness": "3;3;4;3", "technical_novelty": "3;3;2;2", "empirical_novelty": "3;0;2;3", "wc_summary_paper": "122;79;42;91", "wc_strength_and_weaknesses": "186;222;99;247", "wc_clarity_quality_novelty_and_reproducibility": "102;18;29;53", "wc_summary_review": "37;53;17;19", "wc_review": "447;372;187;410", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 83.5, 28.64000698323937 ], "wc_strength_and_weaknesses_avg": [ 188.5, 56.037933580745104 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.5, 32.31485726411305 ], "wc_summary_review_avg": [ 31.5, 14.654350889752845 ], "wc_review_avg": [ 354.0, 99.99749996874922 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2986288630003651923&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Amazon;University of Virginia", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.virginia.edu", "aff_unique_abbr": "Amazon;UVA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Charlottesville", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "87n67AtiHo", "title": "Improved Fully Quantized Training via Rectifying Batch Normalization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Quantization-aware Training (QAT) is able to reduce the training cost by quantizing neural network weights and activations in the forward pass and improve the speed at the inference stage. QAT can be extended to Fully-Quantized Training (FQT), which further accelerates the training by quantizing gradients in the backward pass as back-propagation typically occupies half of the training time. Unfortunately, gradient quantization is challenging as Stochastic Gradient Descent (SGD) based training is sensitive to the precision of the gradient signal. Particularly, the noise introduced by gradient quantization accumulates during backward pass, which causes the exploding gradient problem and results in unstable training and significant accuracy drop. Though Batch Normalization (BatchNorm) is a de-facto resort to stabilize training in regular full-precision scenario, we observe that it fails to prevent the gradient explosion when gradient quantizers are injected in the backward pass. Surprisingly, our theory shows that BatchNorm could amplify the noise accumulation, which in turn hastens the explosion of gradients. A BatchNorm rectification method is derived from our theory to suppress the amplification effect and bridge the performance gap between full-precision training and FQT. Adding this simple rectification loss to baselines generates better results than most prior FQT algorithms on various neural network architectures and datasets, regardless of the gradient bit-widths used (8,4, and 2 bits).", "keywords": "Model Compression;Gradient Quantization;Convolution Neural Networks;Batch Normalization", "primary_area": "", "supplementary_material": "", "author": "Kaixin Xu;Jie Lin;Zhe Wang;Peng Hu;Ziyuan Zhao", "authorids": "~Kaixin_Xu1;~Jie_Lin1;~Zhe_Wang12;~Peng_Hu2;~Ziyuan_Zhao1", "gender": ";M;M;M;M", "homepage": ";;https://www.linkedin.com/in/wangzhemark/?originalSubdomain=sg;https://penghu-cs.github.io/;https://jacobzhaoziyuan.github.io/", "dblp": ";88/6731;;11/6278-2;147/1229.html", "google_scholar": ";;Xqu6fAkAAAAJ;gvESkwYAAAAJ;2vL2XTsAAAAJ", "orcid": ";;;0000-0003-3868-3997;0000-0002-4403-825X", "linkedin": ";;;;zhaoziyuan/", "or_profile": "~Kaixin_Xu1;~Jie_Lin1;~Zhe_Wang12;~Peng_Hu2;~Ziyuan_Zhao1", "aff": ";;, A*STAR;Sichuan University;I2R, A*STAR", "aff_domain": ";;i2r.a-star.edu.sg;scu.edu.cn;i2r.a-star.edu.sg", "position": ";;Researcher;Associate Professor;Snr Research Engineer", "bibtex": "@misc{\nxu2023improved,\ntitle={Improved Fully Quantized Training via Rectifying Batch Normalization},\nauthor={Kaixin Xu and Jie Lin and Zhe Wang and Peng Hu and Ziyuan Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=87n67AtiHo}\n}", "github": "", "project": "", "reviewers": "FcHZ;GegD;dU93", "site": "https://openreview.net/forum?id=87n67AtiHo", "pdf_size": 725802, "recommendation": "3;5;6", "confidence": "4;3;2", "correctness": "2;3;4", "technical_novelty": "1;3;4", "empirical_novelty": "3;3;3", "wc_summary_paper": "77;40;75", "wc_strength_and_weaknesses": "210;140;133", "wc_clarity_quality_novelty_and_reproducibility": "442;26;63", "wc_summary_review": "53;74;42", "wc_review": "782;280;313", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "694;533;246", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 1.247219128924647 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 64.0, 16.990193249832878 ], "wc_strength_and_weaknesses_avg": [ 161.0, 34.76588366008646 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 177.0, 187.99113454274024 ], "wc_summary_review_avg": [ 56.333333333333336, 13.274871834493252 ], "wc_review_avg": [ 458.3333333333333, 229.2630706318738 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 491.0, 185.29076249685698 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.9819805060619659, "corr_recommendation_correctness": 0.9819805060619659, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:k9IQVjGQDFUJ:scholar.google.com/&scioq=Improved+Fully+Quantized+Training+via+Rectifying+Batch+Normalization&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "A*STAR;Sichuan University", "aff_unique_dep": ";", "aff_unique_url": "https://www.a-star.edu.sg;https://www.scu.edu.cn", "aff_unique_abbr": "A*STAR;SCU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;China" }, { "id": "88Z7kxbZLL3", "title": "Semi-Supervised Semantic Segmentation via Boosting Uncertainty on Unlabeled Data", "track": "main", "status": "Withdraw", "tldr": "We theoretically analyze and experimentally prove that appropriately boosting uncertainty on unlabeled data can help minimize the distribution gap in semi-supervised semantic segmentation.", "abstract": "We bring a new perspective to semi-supervised semantic segmentation by providing an analysis on the labeled and unlabeled distributions in training datasets. We firstly figure out that the distribution gap between labeled and unlabeled datasets cannot be ignored, even though the two datasets are sampled from the same distribution. To address this issue, we theoretically analyze and experimentally prove that appropriately boosting uncertainty on unlabeled data can help minimize the distribution gap, which benefits the generalization of the model. We propose two strategies and design an algorithm of uncertainty booster specially for semi-supervised semantic segmentation. Extensive experiments are carried out based on these theories, and the results confirmed the efficacy of the algorithm and strategies. Our plug-and play uncertainty booster is tiny, efficient and robust to hyper parameters, but can significantly promote the performance. In our experiments, our method achieves state-of-the-art performance compared to the current semi-supervised semantic segmentation methods on the popular benchmark: Cityscapes and PASCAL VOC 2012 with different train settings.", "keywords": "Semantic Segmentation;Semi-supervised Learning;Uncertainty in Deep Learning", "primary_area": "", "supplementary_material": "/attachment/d1fe150c40bd0b6730a868e475562d8338fa005b.zip", "author": "Daoan Zhang;Yunhao Luo;Jie-Neng Chen;Lingyun Huang;Jianguo Zhang", "authorids": "~Daoan_Zhang2;~Yunhao_Luo1;~Jie-Neng_Chen1;~Lingyun_Huang1;~Jianguo_Zhang2", "gender": "M;M;M;M;M", "homepage": "https://devinluo27.github.io/;;https://scholar.google.com/citations?hl=en&user=ypSmZtIAAAAJ&view_op=list_works;https://dwan.ch;https://beckschen.github.io", "dblp": ";;90/6415-1;331/5467;", "google_scholar": ";;https://scholar.google.com/citations?hl=en;yeLdvGoAAAAJ;yLYj88sAAAAJ", "orcid": ";;;0000-0002-6959-165X;", "linkedin": ";lingyun-huang-65bb1b4/;;;jieneng-chen-53254011a/", "or_profile": "~Yunhao_Luo1;~Lingyun_Huang1;~Jianguo_Zhang2;~Zhang_Daoan1;~J_Chen1", "aff": "Brown University;;Southern University for Science and Technology;southern university of science and technology;Johns Hopkins University", "aff_domain": "brown.edu;;sustech.edu;sustech.edu.cn;jhu.edu", "position": "MS student;;Full Professor;MS student;PhD student", "bibtex": "@misc{\nzhang2023semisupervised,\ntitle={Semi-Supervised Semantic Segmentation via Boosting Uncertainty on Unlabeled Data},\nauthor={Daoan Zhang and Yunhao Luo and Jie-Neng Chen and Lingyun Huang and Jianguo Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=88Z7kxbZLL3}\n}", "github": "", "project": "", "reviewers": "Wa4P;YHx1;mdKN;YVsx", "site": "https://openreview.net/forum?id=88Z7kxbZLL3", "pdf_size": 809233, "recommendation": "3;5;5;5", "confidence": "4;3;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;3;3;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "44;81;69;39", "wc_strength_and_weaknesses": "246;80;398;259", "wc_clarity_quality_novelty_and_reproducibility": "26;49;5;6", "wc_summary_review": "2;8;50;16", "wc_review": "318;218;522;320", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 58.25, 17.36915369268175 ], "wc_strength_and_weaknesses_avg": [ 245.75, 112.7261615597728 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 21.5, 17.95132307101624 ], "wc_summary_review_avg": [ 19.0, 18.57417562100671 ], "wc_review_avg": [ 344.5, 110.46605813551962 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:89uvNaxkax8J:scholar.google.com/&scioq=Semi-Supervised+Semantic+Segmentation+via+Boosting+Uncertainty+on+Unlabeled+Data&hl=en&as_sdt=0,44", "gs_version_total": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Brown University;Southern University for Science and Technology;Southern University of Science and Technology;Johns Hopkins University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.brown.edu;https://www.sustech.edu.cn;https://www.sustech.edu.cn;https://www.jhu.edu", "aff_unique_abbr": "Brown;SUSTech;SUSTech;JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United States;China" }, { "title": "Universal Few-shot Learning of Dense Prediction Tasks with Visual Token Matching", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10974", "id": "88nT0j5jAn", "poster": "/media/PosterPDFs/ICLR%202023/10974.png?t=1682639460.5686665", "openreview": "https://openreview.net/forum?id=88nT0j5jAn", "slides": "https://iclr.cc/virtual/2023/poster/10974", "video": "https://iclr.cc/virtual/2023/poster/10974", "author_site": "Donggyun Kim, Jinwoo Kim, Seongwoong Cho, Chong Luo, Seunghoon Hong", "tldr": "a universal few-shot learner for general dense prediction tasks", "abstract": "Dense prediction tasks are a fundamental class of problems in computer vision. As supervised methods suffer from high pixel-wise labeling cost, a few-shot learning solution that can learn any dense task from a few labeled images is desired. Yet, current few-shot learning methods target a restricted set of tasks such as semantic segmentation, presumably due to challenges in designing a general and unified model that is able to flexibly and efficiently adapt to arbitrary tasks of unseen semantics. We propose Visual Token Matching (VTM), a universal few-shot learner for arbitrary dense prediction tasks. It employs non-parametric matching on patch-level embedded tokens of images and labels that encapsulates all tasks. Also, VTM flexibly adapts to any task with a tiny amount of task-specific parameters that modulate the matching algorithm. We implement VTM as a powerful hierarchical encoder-decoder architecture involving ViT backbones where token matching is performed at multiple feature hierarchies. We experiment VTM on a challenging variant of Taskonomy dataset and observe that it robustly few-shot learns various unseen dense prediction tasks. Surprisingly, it is competitive with fully supervised baselines using only 10 labeled examples of novel tasks ($0.004\\%$ of full supervision) and sometimes outperforms using $0.1\\%$ of full supervision. Codes are available at https://github.com/GitGyun/visual_token_matching.", "keywords": "few-shot learning;dense prediction tasks", "primary_area": "", "supplementary_material": "", "author": "Donggyun Kim;Jinwoo Kim;Seongwoong Cho;Chong Luo;Seunghoon Hong", "authorids": "~Donggyun_Kim1;~Jinwoo_Kim4;~Seongwoong_Cho1;~Chong_Luo1;~Seunghoon_Hong2", "gender": ";M;M;F;M", "homepage": ";https://jw9730.github.io/;https://www.github.com/seongwoongcho;https://www.microsoft.com/en-us/research/people/cluo/;https://maga33.github.io/", "dblp": ";;;79/3712;142/3014.html", "google_scholar": "g_CtB50AAAAJ;kSJAiE4AAAAJ;;01iBf38AAAAJ;hvr3ALkAAAAJ", "orcid": ";;;0000-0003-0939-474X;", "linkedin": "%EB%8F%99%EA%B7%A0-%EA%B9%80-37a890187/;jw9730/;;;seunghoon-hong-194489a4/", "or_profile": "~Donggyun_Kim1;~Jinwoo_Kim4;~Seongwoong_Cho1;~Chong_Luo1;~Seunghoon_Hong1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Microsoft Research Asia;", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;microsoft.com;", "position": "PhD student;PhD student;MS student;Principal Researcher;", "bibtex": "@inproceedings{\nkim2023universal,\ntitle={Universal Few-shot Learning of Dense Prediction Tasks with Visual Token Matching},\nauthor={Donggyun Kim and Jinwoo Kim and Seongwoong Cho and Chong Luo and Seunghoon Hong},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=88nT0j5jAn}\n}", "github": "", "project": "", "reviewers": "hDLX;arpe;1qaw", "pdf_size": 17448273, "recommendation": "8;10;10", "confidence": "4;3;4", "correctness": "4;3;4", "technical_novelty": "4;4;3", "empirical_novelty": "4;4;4", "wc_summary_paper": "75;140;277", "wc_strength_and_weaknesses": "272;428;450", "wc_clarity_quality_novelty_and_reproducibility": "41;2;129", "wc_summary_review": "55;17;104", "wc_review": "443;587;960", "wc_reply_reviewers": "0;29;51", "wc_reply_authors": "532;1299;2287", "reply_reviewers": "0;1;1", "reply_authors": "1;2;4", "recommendation_avg": [ 9.333333333333334, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 4.0, 0.0 ], "wc_summary_paper_avg": [ 164.0, 84.194219912454 ], "wc_strength_and_weaknesses_avg": [ 383.3333333333333, 79.23523346480543 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.333333333333336, 53.1183165730575 ], "wc_summary_review_avg": [ 58.666666666666664, 35.6121078036982 ], "wc_review_avg": [ 663.3333333333334, 217.85673171962247 ], "wc_reply_reviewers_avg": [ 26.666666666666668, 20.885933597094056 ], "wc_reply_authors_avg": [ 1372.6666666666667, 718.3668205651544 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": -0.4999999999999999, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16301186139394092348&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=88nT0j5jAn", "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;microsoft.com;", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Microsoft", "aff_unique_dep": ";Research", "aff_unique_url": "https://www.kaist.ac.kr;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "KAIST;MSR Asia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "South Korea;China" }, { "title": "Function-space regularized R\u00e9nyi divergences", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10987", "id": "89GT-S49mGd", "poster": "/media/PosterPDFs/ICLR%202023/10987.png?t=1680882946.043871", "openreview": "https://openreview.net/forum?id=89GT-S49mGd", "slides": "https://iclr.cc/virtual/2023/poster/10987", "video": "https://iclr.cc/virtual/2023/poster/10987", "author_site": "Jeremiah Birrell, Yannis Pantazis, Paul Dupuis, Luc Rey-Bellet, Markos Katsoulakis", "tldr": "", "abstract": "We propose a new family of regularized R\u00e9nyi divergences parametrized not only by the order $\\alpha$ but also by a variational function space. These new objects are defined by taking the infimal convolution of the standard R\u00e9nyi divergence with the integral probability metric (IPM) associated with the chosen function space. We derive a novel dual variational representation that can be used to construct numerically tractable divergence estimators. This representation avoids risk-sensitive terms and therefore exhibits lower variance, making it well-behaved when $\\alpha>1$; this addresses a notable weakness of prior approaches. We prove several properties of these new divergences, showing that they interpolate between the classical R\u00e9nyi divergences and IPMs. We also study the $\\alpha\\to\\infty$ limit, which leads to a regularized worst-case-regret and a new variational representation in the classical case. Moreover, we show that the proposed regularized R\u00e9nyi divergences inherit features from IPMs such as the ability to compare distributions that are not absolutely continuous, e.g., empirical measures and distributions with low-dimensional support. We present numerical results on both synthetic and real datasets, showing the utility of these new divergences in both estimation and GAN training applications; in particular, we demonstrate significantly reduced variance and improved training performance.", "keywords": "R\u00e9nyi divergence;integral probability metrics;variational formulas;worst-case-regret", "primary_area": "", "supplementary_material": "", "author": "Jeremiah Birrell;Yannis Pantazis;Paul Dupuis;Luc Rey-Bellet;Markos Katsoulakis", "authorids": "~Jeremiah_Birrell1;~Yannis_Pantazis1;~Paul_Dupuis1;~Luc_Rey-Bellet1;~Markos_Katsoulakis1", "gender": "M;M;;M;M", "homepage": "https://www.researchgate.net/profile/Jeremiah-Birrell;https://sites.google.com/site/yannispantazis/;https://appliedmath.brown.edu/people/paul-dupuis;https://luc-umass.github.io/;https://www.math.umass.edu/directory/faculty/markos-katsoulakis", "dblp": ";;;75/4765;", "google_scholar": "R60hJGUAAAAJ;https://scholar.google.gr/citations?user=MypIGOYAAAAJ;;GElhocQAAAAJ;2PpEwFQAAAAJ", "orcid": ";0000-0002-2009-7562;;0000-0003-1166-8957;0000-0003-4354-1766", "linkedin": ";;;;", "or_profile": "~Jeremiah_Birrell1;~Yannis_Pantazis1;~Paul_Dupuis1;~Luc_Rey-Bellet1;~Markos_Katsoulakis1", "aff": "University of Massachusetts, Amherst;Foundation for Research and Technology - Hellas;Brown University;University of Massachusetts at Amherst;University of Massachusetts at Amherst", "aff_domain": "umass.edu;forth.gr;brown.edu;umass.edu;umass.edu", "position": "Postdoc;Researcher;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nbirrell2023functionspace,\ntitle={Function-space regularized R\\'enyi divergences},\nauthor={Jeremiah Birrell and Yannis Pantazis and Paul Dupuis and Luc Rey-Bellet and Markos Katsoulakis},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=89GT-S49mGd}\n}", "github": "", "project": "", "reviewers": "ht2a;qiWv;Fzvh", "pdf_size": 799940, "recommendation": "5;6;8", "confidence": "3;3;3", "correctness": "4;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "16;52;133", "wc_strength_and_weaknesses": "229;82;296", "wc_clarity_quality_novelty_and_reproducibility": "9;274;45", "wc_summary_review": "23;34;78", "wc_review": "277;442;552", "wc_reply_reviewers": "0;132;61", "wc_reply_authors": "692;662;312", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 67.0, 48.92851929090027 ], "wc_strength_and_weaknesses_avg": [ 202.33333333333334, 89.37685507010315 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 109.33333333333333, 117.36079224151291 ], "wc_summary_review_avg": [ 45.0, 23.762715894162152 ], "wc_review_avg": [ 423.6666666666667, 113.0142567210979 ], "wc_reply_reviewers_avg": [ 64.33333333333333, 53.94029621308688 ], "wc_reply_authors_avg": [ 555.3333333333334, 172.49798710580816 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.7559289460184545, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16389556735546930927&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=89GT-S49mGd", "email": "umass.edu;forth.gr;brown.edu;umass.edu;umass.edu", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Massachusetts Amherst;Foundation for Research and Technology - Hellas;Brown University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.umass.edu;https://www.forth.gr;https://www.brown.edu", "aff_unique_abbr": "UMass Amherst;FORTH;Brown", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Greece" }, { "id": "8CDeu0f4i2", "title": "REDUCING OVERSMOOTHING IN GRAPH NEURAL NETWORKS BY CHANGING THE ACTIVATION FUNCTION", "track": "main", "status": "Reject", "tldr": "", "abstract": "The performance of Graph Neural Networks (GNNs) deteriorates as the depth of the network increases. That performance drop is mainly attributed to oversmoothing, which leads to similar node representations through repeated graph convolutions. We show that in deep GNNs the activation function plays a crucial role in oversmoothing. We explain theoretically why this is the case and propose a simple modification to the slope of ReLU to reduce oversmoothing. The proposed approach enables deep architectures without the need to change the network architecture or to add residual connections. We verify the theoretical results experimentally and further show that deep networks, which do not suffer from oversmoothing are beneficial in the presence of the \u201ccold start\u201d problem, i.e. when there is no feature information about unlabeled nodes.", "keywords": "Graph Neural Networks;Oversmoothing", "primary_area": "", "supplementary_material": "", "author": "Dimitrios Kelesis;Dimitrios Vogiatzis;Georgios Katsimpras;Dimitris Fotakis;Georgios Paliouras", "authorids": "~Dimitrios_Kelesis1;dimitrv@iit.demokritos.gr;~Georgios_Katsimpras2;~Dimitris_Fotakis1;~Georgios_Paliouras1", "gender": ";;;M;M", "homepage": ";;;http://www.softlab.ntua.gr/~fotakis/;https://users.iit.demokritos.gr/~paliourg", "dblp": "309/5763;;;95/4731;55/2039", "google_scholar": ";;HmR-kEsAAAAJ;zFDLf0UAAAAJ;-pec7wIAAAAJ", "orcid": ";;;0000-0001-6864-8960;0000-0001-9629-2367", "linkedin": "dimitrios-kelesis-b614451b5/;;;;georgios-paliouras-a203a79/", "or_profile": "~Dimitrios_Kelesis1;dimitrv@iit.demokritos.gr;~Georgios_Katsimpras2;~Dimitris_Fotakis1;~Georgios_Paliouras1", "aff": "National Centre For Scientific Research Demokritos;;NCSR Demokritos;National Technical University of Athens;NCSR \u201cDemokritos\u201d", "aff_domain": "iit.demokritos.gr;;iit.demokritos.gr;ntua.gr;demokritos.gr", "position": "Researcher;;Researcher;Full Professor;Researcher", "bibtex": "@misc{\nkelesis2023reducing,\ntitle={{REDUCING} {OVERSMOOTHING} {IN} {GRAPH} {NEURAL} {NETWORKS} {BY} {CHANGING} {THE} {ACTIVATION} {FUNCTION}},\nauthor={Dimitrios Kelesis and Dimitrios Vogiatzis and Georgios Katsimpras and Dimitris Fotakis and Georgios Paliouras},\nyear={2023},\nurl={https://openreview.net/forum?id=8CDeu0f4i2}\n}", "github": "", "project": "", "reviewers": "kwRc;XeNT;rfT6;WALS", "site": "https://openreview.net/forum?id=8CDeu0f4i2", "pdf_size": 2713394, "recommendation": "3;5;5;6", "confidence": "4;4;3;3", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "88;120;80;117", "wc_strength_and_weaknesses": "380;343;390;272", "wc_clarity_quality_novelty_and_reproducibility": "112;20;68;32", "wc_summary_review": "86;40;43;42", "wc_review": "666;523;581;463", "wc_reply_reviewers": "274;0;0;108", "wc_reply_authors": "613;271;522;330", "reply_reviewers": "2;0;0;1", "reply_authors": "3;1;1;2", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 101.25, 17.512495538900218 ], "wc_strength_and_weaknesses_avg": [ 346.25, 46.30537225851877 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 58.0, 35.832945734337834 ], "wc_summary_review_avg": [ 52.75, 19.22725929507375 ], "wc_review_avg": [ 558.25, 74.90452256039018 ], "wc_reply_reviewers_avg": [ 95.5, 112.09259565198765 ], "wc_reply_authors_avg": [ 434.0, 138.89744418094958 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.6882472016116854, "corr_recommendation_correctness": 0.6622661785325219, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14838311068490908583&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "National Centre for Scientific Research 'Demokritos';National Technical University of Athens;National Centre for Scientific Research \u201cDemokritos\u201d", "aff_unique_dep": ";;", "aff_unique_url": "https://www.demokritos.gr;https://www.ntua.gr;https://www.demokritos.gr", "aff_unique_abbr": "NCSR Demokritos;NTUA;NCSR Demokritos", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Greece" }, { "id": "8CJrjp73sfk", "title": "Few-bit Backward: Quantized Gradients of Activation Functions for Memory Footprint Reduction", "track": "main", "status": "Reject", "tldr": "", "abstract": "Memory footprint is one of the main limiting factors for large neural network training. In backpropagation, one needs to store the input to each operation in the computational graph. Every modern neural network model has quite a few pointwise nonlinearities in its architecture, and such operation induces additional memory costs which --- as we show -- can be significantly reduced by quantization of the gradients.\nWe propose a systematic approach to compute optimal quantization of the retained gradients of the pointwise nonlinear functions with only a few bits per each element.\nWe show that such approximation can be achieved by computing optimal piecewise-constant approximation of the derivative of the activation function, which can be done by dynamic programming. The drop-in replacements are implemented for all popular nonlinearities and can be used in any existing pipeline. We confirm the memory reduction and the same convergence on several open benchmarks.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/7db78f21571ec2bc7dc720b79f309cb641271b19.zip", "author": "Georgii Sergeevich Novikov;Daniel Bershatsky;Julia Gusak;Alex Shonenkov;Denis Valerievich Dimitrov;Ivan Oseledets", "authorids": "~Georgii_Sergeevich_Novikov1;~Daniel_Bershatsky1;~Julia_Gusak1;~Alex_Shonenkov1;~Denis_Valerievich_Dimitrov1;~Ivan_Oseledets1", "gender": "Non-Binary;M;F;M;M;M", "homepage": ";https://github.com/daskol;https://juliagusak.github.io/about/;https://t.me/shonenkovAI;https://t.me/dendimitrov;http://oseledets.github.io", "dblp": ";;179/6722;287/9081.html;82/8368;56/7175", "google_scholar": "M5fp_DMAAAAJ;XthC2z8AAAAJ;QriHoq4AAAAJ;https://scholar.google.com/citations?hl=en;3JSIJpYAAAAJ;https://scholar.google.ru/citations?user=5kMqBQEAAAAJ", "orcid": ";0000-0001-8917-8187;;0000-0002-2348-5624;0000-0002-9756-5424;", "linkedin": ";;julia-gusak-0b265688/;http://linkedin.com/in/shonenkov/;denis-dimitrov-66bbb3116/;", "or_profile": "~Georgii_Sergeevich_Novikov1;~Daniel_Bershatsky1;~Julia_Gusak1;~Alex_Shonenkov1;~Denis_Valerievich_Dimitrov1;~Ivan_Oseledets1", "aff": "Skolkovo Institute of Science and Technology;Skoltech;INRIA;;Sber;Institute of Numerical Mathematics", "aff_domain": "skoltech.ru;skoltech.ru;inria.fr;;sberbank.com;inm.ras.ru", "position": "PhD student;MS student;Researcher;;Principal Researcher;Researcher", "bibtex": "@misc{\nnovikov2023fewbit,\ntitle={Few-bit Backward: Quantized Gradients of Activation Functions for Memory Footprint Reduction},\nauthor={Georgii Sergeevich Novikov and Daniel Bershatsky and Julia Gusak and Alex Shonenkov and Denis Valerievich Dimitrov and Ivan Oseledets},\nyear={2023},\nurl={https://openreview.net/forum?id=8CJrjp73sfk}\n}", "github": "", "project": "", "reviewers": "Wx7m;1fcj;KDUc", "site": "https://openreview.net/forum?id=8CJrjp73sfk", "pdf_size": 719159, "recommendation": "3;6;6", "confidence": "5;3;4", "correctness": "3;4;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "107;111;39", "wc_strength_and_weaknesses": "118;354;131", "wc_clarity_quality_novelty_and_reproducibility": "13;91;25", "wc_summary_review": "25;54;37", "wc_review": "263;610;232", "wc_reply_reviewers": "61;171;0", "wc_reply_authors": "461;1222;356", "reply_reviewers": "1;1;0", "reply_authors": "5;10;2", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 85.66666666666667, 33.038697848970315 ], "wc_strength_and_weaknesses_avg": [ 201.0, 108.31743473082561 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.0, 34.292856398964496 ], "wc_summary_review_avg": [ 38.666666666666664, 11.897712198383164 ], "wc_review_avg": [ 368.3333333333333, 171.3521390457544 ], "wc_reply_reviewers_avg": [ 77.33333333333333, 70.75937315217602 ], "wc_reply_authors_avg": [ 679.6666666666666, 385.87591557678513 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 5.666666666666667, 3.299831645537222 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": 0.5, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4223555193894842487&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "Skolkovo Institute of Science and Technology;INRIA;Sberbank;Institute of Numerical Mathematics", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.skoltech.ru;https://www.inria.fr;https://www.sberbank.ru;", "aff_unique_abbr": "Skoltech;INRIA;Sber;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Russian Federation;France;" }, { "title": "Sharper Bounds for Uniformly Stable Algorithms with Stationary Mixing Process", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11666", "id": "8E5Yazboyh", "poster": "/media/PosterPDFs/ICLR%202023/11666.png?t=1680750097.116154", "openreview": "https://openreview.net/forum?id=8E5Yazboyh", "slides": "https://iclr.cc/virtual/2023/poster/11666", "video": "https://iclr.cc/virtual/2023/poster/11666", "author_site": "Shi Fu, Yunwen Lei, Qiong Cao, Xinmei Tian, Dacheng Tao", "tldr": "We develop stability and generalization bounds for learning with mixing sequences.", "abstract": " Generalization analysis of learning algorithms often builds on a critical assumption that training examples are independently and identically distributed, which is often violated in practical problems such as time series prediction. In this paper, we use algorithmic stability to study the generalization performance of learning algorithms with $\\psi$-mixing data, where the dependency between observations weakens over time. We show uniformly stable algorithms guarantee high-probability generalization bounds of the order $O(1/\\sqrt{n})$ (within a logarithmic factor), where $n$ is the sample size. We apply our general result to specific algorithms including regularization schemes, stochastic gradient descent and localized iterative regularization, and develop excess population risk bounds for learning with $\\psi$-mixing data. Our analysis builds on a novel moment bound for weakly-dependent random variables on a $\\varphi$-mixing sequence and a novel error decomposition of generalization error.", "keywords": "Algorithmic Stability;Non-I.I.D. Learning;Generalization Error;Learning Theory", "primary_area": "", "supplementary_material": "", "author": "Shi Fu;Yunwen Lei;Qiong Cao;Xinmei Tian;Dacheng Tao", "authorids": "~Shi_Fu1;~Yunwen_Lei1;~Qiong_Cao1;~Xinmei_Tian1;~Dacheng_Tao1", "gender": "M;M;F;F;", "homepage": "http:// home.ustc.edu.cn/~fs311;https://leiyw.github.io/;;https://faculty.ustc.edu.cn/tianxinmei1/zh_CN/index.htm;", "dblp": ";https://dblp.org/pers/l/Lei:Yunwen;22/7733;03/5204-1;", "google_scholar": ";https://scholar.google.com.hk/citations?user=g3dg0rsAAAAJ;JYtbNBsAAAAJ;https://scholar.google.com.au/citations?hl=zh-CN;", "orcid": ";;;0000-0002-5952-8753;", "linkedin": ";;;;", "or_profile": "~Shi_Fu1;~Yunwen_Lei1;~Qiong_Cao1;~Xinmei_Tian1;~Dacheng_Tao1", "aff": "University of Science and Technology of China;University of Hong Kong;JD Explore Academy;University of Science and Technology of China;", "aff_domain": "ustc.edu.cn;hku.hk;jd.com;ustc.edu.cn;", "position": "MS student;Assistant Professor;Research Scientist;Full Professor;", "bibtex": "@inproceedings{\nfu2023sharper,\ntitle={Sharper Bounds for Uniformly Stable Algorithms with Stationary Mixing Process},\nauthor={Shi Fu and Yunwen Lei and Qiong Cao and Xinmei Tian and Dacheng Tao},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8E5Yazboyh}\n}", "github": "", "project": "", "reviewers": "Ue97;2VCS;PGse;enwM;uJFn;HM4R", "pdf_size": 368879, "recommendation": "5;6;6;6;6;8", "confidence": "2;3;3;4;3;2", "correctness": "3;3;3;4;4;4", "technical_novelty": "3;2;3;3;3;3", "empirical_novelty": "0;0;0;0;0;0", "wc_summary_paper": "430;149;87;233;78;44", "wc_strength_and_weaknesses": "395;290;285;209;344;92", "wc_clarity_quality_novelty_and_reproducibility": "56;90;62;90;25;188", "wc_summary_review": "55;131;22;88;50;25", "wc_review": "936;660;456;620;497;349", "wc_reply_reviewers": "26;49;0;0;20;0", "wc_reply_authors": "958;620;979;674;418;563", "reply_reviewers": "1;1;0;0;1;0", "reply_authors": "3;1;2;1;1;1", "recommendation_avg": [ 6.166666666666667, 0.8975274678557507 ], "confidence_avg": [ 2.8333333333333335, 0.6871842709362768 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 170.16666666666666, 131.17344328110863 ], "wc_strength_and_weaknesses_avg": [ 269.1666666666667, 97.57632340321544 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 85.16666666666667, 51.03729573121557 ], "wc_summary_review_avg": [ 61.833333333333336, 37.85682090309339 ], "wc_review_avg": [ 586.3333333333334, 187.13690769653704 ], "wc_reply_reviewers_avg": [ 15.833333333333334, 18.13299632039767 ], "wc_reply_authors_avg": [ 702.0, 204.02205763103166 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.7637626158259734 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.22518867455552252, "corr_recommendation_correctness": 0.5570860145311556, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9345933168421056771&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=8E5Yazboyh", "email": "ustc.edu.cn;hku.hk;jd.com;ustc.edu.cn;", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Science and Technology of China;University of Hong Kong;JD", "aff_unique_dep": ";;JD Explore Academy", "aff_unique_url": "http://www.ustc.edu.cn;https://www.hku.hk;", "aff_unique_abbr": "USTC;HKU;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "id": "8FL8vRvlk59", "title": "Reinforced Sample Reweighting Policy for Semi-supervised Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Semi-supervised learning (SSL) has been shown to be an effective paradigm for learning with less labeled data. To improve the performance of SSL, existing methods build sample reweighting or thresholding strategies to handle the category bias or erroneous pseudo labels. However, most of these existing methods are based on the heuristic hand-crafted rules, which require laborious adjustment, and may lead to sub-optimal solutions that cannot improve the model performance to the greatest extent. Here, to the best of our knowledge, we pioneer to develop an automatic strategy that boosts the performance of SSL. We introduce an end-to-end sample reweighting policy for semi-supervised learning, with a delicately designed Markov Decision Process (MDP) framework. The MDP framework is constructed with an agent network, which is optimized in a reward-driven manner, and receives the carefully designed state and action representations for decision reference. We also design a memory paradigm for computation-efficient representation construction and MDP solving. We further introduce a \"pretraining-boosting\" two-stage MDP curriculum where the agent network is firstly pretrained and then optimized continuously in the deployment phase to catch up with the constantly updated classification network. Extensive experiments demonstrate that our method achieves state-of-the-art performance on multiple datasets, outperforming previous advanced approaches such as FixMatch.", "keywords": "Semi-supervised Learning", "primary_area": "", "supplementary_material": "", "author": "Lanyun Zhu;Tianrun Chen;Jianxiong Yin;Simon See;Jun Liu", "authorids": "~Lanyun_Zhu1;~Tianrun_Chen1;~Jianxiong_Yin1;~Simon_See1;~Jun_Liu8", "gender": "M;M;;M;M", "homepage": "https://lanyunzhu.site;http://tianrun-chen.github.io;;;", "dblp": "245/2640;317/5235;;62/6547;95/3736-36", "google_scholar": "urOSnlQAAAAJ;;;ebIHTEoAAAAJ;Q5Ild8UAAAAJ", "orcid": ";;;0000-0002-4958-9237;", "linkedin": ";https://www.linkedin.cn/incareer/in/tianrun-chen-3441731a2;;simonsee/;", "or_profile": "~Lanyun_Zhu1;~Tianrun_Chen1;~Jianxiong_Yin1;~Simon_See1;~Jun_Liu8", "aff": "Singapore University of Technology and Design;Zhejiang University;;NVIDIA;Singapore University of Technology and Design", "aff_domain": "sutd.edu.sg;zju.edu.cn;;nvidia.com;sutd.edu.sg", "position": "PhD student;PhD student;;Associate Professor;Assistant Professor", "bibtex": "@misc{\nzhu2023reinforced,\ntitle={Reinforced Sample Reweighting Policy for Semi-supervised Learning},\nauthor={Lanyun Zhu and Tianrun Chen and Jianxiong Yin and Simon See and Jun Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=8FL8vRvlk59}\n}", "github": "", "project": "", "reviewers": "pcYv;DziM;EvPU;eQW6", "site": "https://openreview.net/forum?id=8FL8vRvlk59", "pdf_size": 574238, "recommendation": "3;5;5;6", "confidence": "5;5;4;3", "correctness": "2;2;2;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "41;59;65;14", "wc_strength_and_weaknesses": "109;318;136;18", "wc_clarity_quality_novelty_and_reproducibility": "17;7;4;12", "wc_summary_review": "16;30;25;26", "wc_review": "183;414;230;70", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 44.75, 19.828956099603428 ], "wc_strength_and_weaknesses_avg": [ 145.25, 108.8976009836764 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 10.0, 4.949747468305833 ], "wc_summary_review_avg": [ 24.25, 5.11737237261468 ], "wc_review_avg": [ 224.25, 124.02897846874335 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.7608859102526822, "corr_recommendation_correctness": 0.6622661785325219, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gq6Vy2fdSmEJ:scholar.google.com/&scioq=Reinforced+Sample+Reweighting+Policy+for+Semi-supervised+Learning&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Singapore University of Technology and Design;Zhejiang University;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://www.sutd.edu.sg;https://www.zju.edu.cn;https://www.nvidia.com", "aff_unique_abbr": "SUTD;ZJU;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Singapore;China;United States" }, { "title": "Representation Learning for Low-rank General-sum Markov Games", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12229", "id": "8FroynZv4C", "poster": "", "openreview": "https://openreview.net/forum?id=8FroynZv4C", "slides": "https://iclr.cc/virtual/2023/poster/12229", "video": "https://iclr.cc/virtual/2023/poster/12229", "author_site": "Chengzhuo Ni, Yuda Song, Xuezhou Zhang, Zihan Ding, Chi Jin, Mengdi Wang", "tldr": "We provide a general representation learning framework for multi-player general-sum Markov games.", "abstract": "We study multi-agent general-sum Markov games with nonlinear function approximation. We focus on low-rank Markov games whose transition matrix admits a hidden low-rank structure on top of an unknown non-linear representation. The goal is to design an algorithm that (1) finds an $\\varepsilon$-equilibrium policy sample efficiently without prior knowledge of the environment or the representation, and (2) permits a deep-learning friendly implementation. We leverage representation learning and present a model-based and a model-free approach to construct an effective representation from collected data. For both approaches, the algorithm achieves a sample complexity of poly$(H,d,A,1/\\varepsilon)$, where $H$ is the game horizon, $d$ is the dimension of the feature vector, $A$ is the size of the joint action space and $\\varepsilon$ is the optimality gap. When the number of players is large, the above sample complexity can scale exponentially with the number of players in the worst case. To address this challenge, we consider Markov Games with a factorized transition structure and present an algorithm that escapes such exponential scaling. To our best knowledge, this is the first sample-efficient algorithm for multi-agent general-sum Markov games that incorporates (non-linear) function approximation. We accompany our theoretical result with a neural network-based implementation of our algorithm and evaluate it against the widely used deep RL baseline, DQN with fictitious play.", "keywords": "Reinforcement Learning;Multi Agent;Representation Learning", "primary_area": "", "supplementary_material": "/attachment/a33d768797f87af440a2f0b5ef27a96429e5c599.zip", "author": "Chengzhuo Ni;Yuda Song;Xuezhou Zhang;Zihan Ding;Chi Jin;Mengdi Wang", "authorids": "~Chengzhuo_Ni1;~Yuda_Song2;~Xuezhou_Zhang2;~Zihan_Ding1;~Chi_Jin1;~Mengdi_Wang1", "gender": "M;M;M;M;F;M", "homepage": ";https://yudasong.github.io/;https://quantumiracle.github.io/webpage/;https://sites.google.com/view/cjin/home;http://mwang.princeton.edu;https://zhangxz1123.github.io/", "dblp": "241/5404;250/4880-1;;126/1802-1;;213/7993", "google_scholar": ";0QDCG8IAAAAJ;t5DgPBAAAAAJ;GINhGvwAAAAJ;;tR-p-r8AAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Chengzhuo_Ni1;~Yuda_Song2;~Zihan_Ding1;~Chi_Jin1;~Mengdi_Wang1;~Xuezhou_Zhang1", "aff": "Princeton University;Carnegie Mellon University;Princeton University;Princeton University;Princeton University;Princeton University", "aff_domain": "princeton.edu;andrew.cmu.edu;princeton.edu;princeton.edu;princeton.edu;princeton.edu", "position": "Graduate student;PhD student;PhD student;Assistant Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nni2023representation,\ntitle={Representation Learning for Low-rank General-sum Markov Games},\nauthor={Chengzhuo Ni and Yuda Song and Xuezhou Zhang and Zihan Ding and Chi Jin and Mengdi Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8FroynZv4C}\n}", "github": "", "project": "", "reviewers": "oMoF;xksf;H4wD;tHaH", "pdf_size": 1093407, "recommendation": "6;6;8;8", "confidence": "4;3;4;2", "correctness": "4;3;4;4", "technical_novelty": "3;2;3;3", "empirical_novelty": "0;2;3;2", "wc_summary_paper": "77;107;158;100", "wc_strength_and_weaknesses": "242;260;143;371", "wc_clarity_quality_novelty_and_reproducibility": "66;331;156;25", "wc_summary_review": "7;93;38;74", "wc_review": "392;791;495;570", "wc_reply_reviewers": "0;0;0;23", "wc_reply_authors": "409;1145;642;624", "reply_reviewers": "0;0;0;1", "reply_authors": "1;2;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 110.5, 29.58462438497403 ], "wc_strength_and_weaknesses_avg": [ 254.0, 80.91662375556706 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 144.5, 117.64034172000692 ], "wc_summary_review_avg": [ 53.0, 33.09833832687073 ], "wc_review_avg": [ 562.0, 146.53839087420062 ], "wc_reply_reviewers_avg": [ 5.75, 9.959292143521045 ], "wc_reply_authors_avg": [ 705.0, 270.067584134046 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.30151134457776363, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10375658813485953807&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=8FroynZv4C", "email": "princeton.edu;andrew.cmu.edu;princeton.edu;princeton.edu;princeton.edu;princeton.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Princeton University;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;https://www.cmu.edu", "aff_unique_abbr": "Princeton;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Reliability of CKA as a Similarity Measure in Deep Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12037", "id": "8HRvyxc606", "poster": "/media/PosterPDFs/ICLR%202023/12037.png?t=1682959373.3363323", "openreview": "https://openreview.net/forum?id=8HRvyxc606", "slides": "https://iclr.cc/virtual/2023/poster/12037", "video": "https://iclr.cc/virtual/2023/poster/12037", "author_site": "MohammadReza Davari, Stefan Horoi, Amine Natik, Guillaume Lajoie, Guy Wolf, Eugene Belilovsky", "tldr": "We extensively study a broad class of cases where the very popular CKA analysis method for deep representations can give unreliable results.", "abstract": "Comparing learned neural representations in neural networks is a challenging but important problem, which has been approached in different ways. The Centered Kernel Alignment (CKA) similarity metric, particularly its linear variant, has recently become a popular approach and has been widely used to compare representations of a network's different layers, of architecturally similar networks trained differently, or of models with different architectures trained on the same data. A wide variety of claims about similarity and dissimilarity of these various representations have been made using CKA results. In this work we present analysis that formally characterizes CKA sensitivity to a large class of simple transformations, which can naturally occur in the context of modern machine learning. This provides a concrete explanation to CKA sensitivity to outliers, which has been observed in past works, and to transformations that preserve the linear separability of the data, an important generalization attribute. We empirically investigate several weaknesses of the CKA similarity metric, demonstrating situations in which it gives unexpected or counterintuitive results. Finally we study approaches for modifying representations to maintain functional behaviour while changing the CKA value. Our results illustrate that, in many cases, the CKA value can be easily manipulated without substantial changes to the functional behaviour of the models, and call for caution when leveraging activation alignment metrics.", "keywords": "Representation Learning;Similarity Measures;Centered Kernel Alignment (CKA)", "primary_area": "", "supplementary_material": "/attachment/bfcb004dfa33cf8903100a8af731004adb87426d.zip", "author": "MohammadReza Davari;Stefan Horoi;Amine Natik;Guillaume Lajoie;Guy Wolf;Eugene Belilovsky", "authorids": "~MohammadReza_Davari1;~Stefan_Horoi1;~Amine_Natik2;~Guillaume_Lajoie1;~Guy_Wolf1;~Eugene_Belilovsky1", "gender": "M;;M;M;M;M", "homepage": "https://davari.io/;;https://dms.umontreal.ca/~natika/;https://dms.umontreal.ca/~lajoie/;http://guywolf.org;http://eugenium.github.io", "dblp": "239/8727;256/5511;;31/10384;120/1308;42/11445", "google_scholar": "https://scholar.google.ca/citations?user=4AztFtEAAAAJ;https://scholar.google.fr/citations?user=jUm5G6sAAAAJ;;;g0k3SjcAAAAJ;https://scholar.google.fr/citations?user=CffJDoEAAAAJ", "orcid": ";0000-0003-2951-2600;;;0000-0002-6740-059X;", "linkedin": "rezadavari/;;;;;", "or_profile": "~MohammadReza_Davari1;~Stefan_Horoi1;~Amine_Natik2;~Guillaume_Lajoie1;~Guy_Wolf1;~Eugene_Belilovsky1", "aff": "Concordia University, Montreal;Universit\u00e9 de Montr\u00e9al;University of Montreal;Mila - Quebec Artificial Intelligence Institute;University of Montreal;Concordia University, Montreal", "aff_domain": "concordia.ca;umontreal.ca;umontreal.ca;mila.quebec;umontreal.ca;concordia.ca", "position": "PhD student;PhD student;PhD student;Associate Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ndavari2023reliability,\ntitle={Reliability of {CKA} as a Similarity Measure in Deep Learning},\nauthor={MohammadReza Davari and Stefan Horoi and Amine Natik and Guillaume Lajoie and Guy Wolf and Eugene Belilovsky},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8HRvyxc606}\n}", "github": "", "project": "", "reviewers": "wkkL;UWFM;YJ8i;U5e1", "pdf_size": 1221548, "recommendation": "6;6;6;8", "confidence": "4;4;4;4", "correctness": "3;3;4;4", "technical_novelty": "3;2;2;4", "empirical_novelty": "0;3;2;4", "wc_summary_paper": "58;104;32;155", "wc_strength_and_weaknesses": "438;300;449;162", "wc_clarity_quality_novelty_and_reproducibility": "162;209;4;7", "wc_summary_review": "106;58;60;55", "wc_review": "764;671;545;379", "wc_reply_reviewers": "309;201;144;0", "wc_reply_authors": "1644;720;1363;222", "reply_reviewers": "3;2;1;0", "reply_authors": "4;3;3;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 1.479019945774904 ], "wc_summary_paper_avg": [ 87.25, 46.847491928597414 ], "wc_strength_and_weaknesses_avg": [ 337.25, 116.9815690611132 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 95.5, 91.52731832627896 ], "wc_summary_review_avg": [ 69.75, 21.00446381129497 ], "wc_review_avg": [ 589.75, 144.3803570434704 ], "wc_reply_reviewers_avg": [ 163.5, 111.45514792955954 ], "wc_reply_authors_avg": [ 987.25, 554.4228417913533 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15043180937751747427&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=8HRvyxc606", "email": "concordia.ca;umontreal.ca;umontreal.ca;mila.quebec;umontreal.ca;concordia.ca", "author_num": 6, "aff_unique_index": "0;1;2;3;2;0", "aff_unique_norm": "Concordia University;Universit\u00e9 de Montr\u00e9al;University of Montreal;Quebec Artificial Intelligence Institute", "aff_unique_dep": ";;;Artificial Intelligence", "aff_unique_url": "https://www.concordia.ca;https://www.umontreal.ca;https://wwwumontreal.ca;https://mila.quebec", "aff_unique_abbr": "Concordia;UdeM;UM;Mila", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Montreal;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Canada" }, { "id": "8IBtyLQ8GKw", "title": "Ahead-of-Time P-Tuning", "track": "main", "status": "Reject", "tldr": "A novel method for parameter efficient fine-tuning. Can perform multi-task inference like P-Tuning, but up to 1.3x times faster than it.", "abstract": "This paper proposes a new parameter-efficient method for fine-tuning, AoT P-Tuning. This method adds input-dependent biases before evaluating the Transformer layer, reducing the required evaluation time when compared to P-Tuning. Same as P-Tuning, AoT P-Tuning allows multi-task inference with a single backbone model for evaluating different tasks in a single batch.\nWe experimented with the proposed method on the GLUE and SuperGLUE benchmarking datasets using RoBERTa-Base, RoBERTa-Large, and DeBERTa-XL backbone models. Our observations show that AoT P-tuning performed on par with or better than P-Tuning v2 while being up to $1.3\\times$ times faster during inference.", "keywords": "Efficient Fine-Tuning;P-Tuning;Multi-Task Inference;Transformers;GLUE;SuperGLUE", "primary_area": "", "supplementary_material": "/attachment/cf7409f0e7496557b112eead49dc54883ba09bde.zip", "author": "Daniil Gavrilov", "authorids": "~Daniil_Gavrilov1", "gender": "", "homepage": "https://kefirski.me", "dblp": "234/8563", "google_scholar": "https://scholar.google.ru/citations?user=PAZUwukAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Daniil_Gavrilov1", "aff": "T-Bank", "aff_domain": "tbank.ru", "position": "Principal Researcher", "bibtex": "@misc{\ngavrilov2023aheadoftime,\ntitle={Ahead-of-Time P-Tuning},\nauthor={Daniil Gavrilov},\nyear={2023},\nurl={https://openreview.net/forum?id=8IBtyLQ8GKw}\n}", "github": "", "project": "", "reviewers": "YHHD;ozTx;BA6Y;8wRe", "site": "https://openreview.net/forum?id=8IBtyLQ8GKw", "pdf_size": 3727219, "recommendation": "3;5;5;6", "confidence": "4;4;3;4", "correctness": "2;3;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "1;3;3;3", "wc_summary_paper": "80;51;94;39", "wc_strength_and_weaknesses": "1000;214;100;45", "wc_clarity_quality_novelty_and_reproducibility": "122;29;31;43", "wc_summary_review": "131;22;79;20", "wc_review": "1333;316;304;147", "wc_reply_reviewers": "145;150;0;0", "wc_reply_authors": "729;291;49;49", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 66.0, 21.988633427296023 ], "wc_strength_and_weaknesses_avg": [ 339.75, 386.0378057910909 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.25, 38.336503492102665 ], "wc_summary_review_avg": [ 63.0, 45.8530260724415 ], "wc_review_avg": [ 525.0, 471.2403845172865 ], "wc_reply_reviewers_avg": [ 73.75, 73.77118339839751 ], "wc_reply_authors_avg": [ 279.5, 277.6882244532526 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": -0.13245323570650439, "corr_recommendation_correctness": 0.9733285267845754, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18338333452342674870&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "T-Bank", "aff_unique_dep": "", "aff_unique_url": "https://www.tbank.com.cn", "aff_unique_abbr": "", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "8IMz713Bxcq", "title": "Towards Unsupervised Time Series Representation Learning: A Decomposition Perspective", "track": "main", "status": "Reject", "tldr": "An unsupervised time series representation learning approach with the help of time series decomposition and contrastive learning", "abstract": "Existing contrastive methods of universal time series representation learning mainly rely on distilling invariant patterns at varying scales and building contrastive loss with the help of negative sampling. However, the invariance assumptions may not hold in real-world time-series data, and the infamous negative sampling could bring in new biases for representation learning. In this work, we propose a novel contrastive learning approach toward time series representation learning on top of trend-seasonality decomposition, namely TS-DC. TS-DC differentiates itself from prior methods in three folds: 1) a time series decomposition approach is devised to distill different aspects/components of a complex time series; 2) a novel component-wise contrastive loss is proposed in which negative sampling is not necessary; 3) the informative signals of time series can be captured comprehensively by means of adaptive contrasting. Extensive experiments on different public benchmark datasets validate the superior performance of our proposed representation learning method. ", "keywords": "Time Series;Representation Learning;Contrastive Learning", "primary_area": "", "supplementary_material": "/attachment/5cbbd6da3f22b2b57f787feb2e9f770f5ff76c8b.zip", "author": "Yan Li;Xinjiang Lu;Jingjing Gu;Haishuai Wang;Dejing Dou", "authorids": "~Yan_Li18;~Xinjiang_Lu2;gujingjing@nuaa.edu.cn;~Haishuai_Wang2;~Dejing_Dou3", "gender": ";M;;M;", "homepage": ";;;https://www.linkedin.com/in/haishuai-wang-b5241775/;", "dblp": ";82/10445;;163/0767;", "google_scholar": ";J08FRggAAAAJ;;;", "orcid": ";0000-0002-3602-0391;;0000-0003-1617-0920;", "linkedin": ";;;;", "or_profile": "~Yan_Li18;~Xinjiang_Lu2;gujingjing@nuaa.edu.cn;~Haishuai_Wang2;~Dejing_Dou3", "aff": ";Baidu;;Zhejiang University;", "aff_domain": ";baidu.com;;zju.edu.cn;", "position": ";Researcher;;Research Professor;", "bibtex": "@misc{\nli2023towards,\ntitle={Towards Unsupervised Time Series Representation Learning: A Decomposition Perspective},\nauthor={Yan Li and Xinjiang Lu and Jingjing Gu and Haishuai Wang and Dejing Dou},\nyear={2023},\nurl={https://openreview.net/forum?id=8IMz713Bxcq}\n}", "github": "", "project": "", "reviewers": "9QzX;BQk8;BofU;HiGm", "site": "https://openreview.net/forum?id=8IMz713Bxcq", "pdf_size": 9572551, "recommendation": "3;3;6;6", "confidence": "2;4;3;3", "correctness": "3;4;3;3", "technical_novelty": "3;4;3;3", "empirical_novelty": "0;3;2;3", "wc_summary_paper": "126;24;81;62", "wc_strength_and_weaknesses": "214;399;85;124", "wc_clarity_quality_novelty_and_reproducibility": "13;45;28;1", "wc_summary_review": "53;73;243;14", "wc_review": "406;541;437;201", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1616;1092;737;373", "reply_reviewers": "0;0;0;0", "reply_authors": "3;2;1;1", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 73.25, 36.72448093574639 ], "wc_strength_and_weaknesses_avg": [ 205.5, 121.11667928076628 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 21.75, 16.48294573187693 ], "wc_summary_review_avg": [ 95.75, 87.62241436984033 ], "wc_review_avg": [ 396.25, 123.31945304776534 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 954.5, 458.7856253197129 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.5773502691896258, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gvxF4MSOBwQJ:scholar.google.com/&scioq=Towards+Unsupervised+Time+Series+Representation+Learning:+A+Decomposition+Perspective&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Baidu;Zhejiang University", "aff_unique_dep": "Baidu, Inc.;", "aff_unique_url": "https://www.baidu.com;https://www.zju.edu.cn", "aff_unique_abbr": "Baidu;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Visually-Augmented Language Modeling", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11283", "id": "8IN-qLkl215", "poster": "/media/PosterPDFs/ICLR%202023/11283.png?t=1682668710.0185053", "openreview": "https://openreview.net/forum?id=8IN-qLkl215", "slides": "https://iclr.cc/virtual/2023/poster/11283", "video": "https://iclr.cc/virtual/2023/poster/11283", "author_site": "Weizhi Wang, Li Dong, Hao Cheng, Haoyu Song, Xiaodong Liu, Xifeng Yan, Jianfeng Gao, Furu Wei", "tldr": "We propose a novel pre-trained framework, to Visually-augment text tokens with retrieved relevant images for multimodal grounded Language Modeling.", "abstract": "Human language is grounded on multimodal knowledge including visual knowledge like colors, sizes, and shapes. However, current large-scale pre-trained language models rely on the text-only self-supervised training with massive text data, which precludes them from utilizing relevant visual information when necessary. To address this, we propose a novel pre-training framework, named VaLM, to Visually-augment text tokens with retrieved relevant images for Language Modeling. Specifically, VaLM builds on a novel latent text-image alignment method via an image retrieval module to fetch corresponding images given a textual context. With the visually-augmented context, VaLM uses a visual knowledge fusion layer to enable multimodal grounded language modeling by attending on both text context and visual knowledge in images. We evaluate VaLM on various visual knowledge intensive commonsense reasoning tasks, which require visual information to excel. The experimental results illustrate that VaLM outperforms all strong language-only and vision-language baselines with substantial gains on reasoning object commonsense including color, size, and shape.", "keywords": "visually-grounded language modeling;visual commonsense reasoning;pre-trained visually-augmented language model", "primary_area": "", "supplementary_material": "", "author": "Weizhi Wang;Li Dong;Hao Cheng;Haoyu Song;Xiaodong Liu;Xifeng Yan;Jianfeng Gao;Furu Wei", "authorids": "~Weizhi_Wang1;~Li_Dong1;~Hao_Cheng4;~Haoyu_Song1;~Xiaodong_Liu1;~Xifeng_Yan1;~Jianfeng_Gao1;~Furu_Wei1", "gender": "M;M;M;M;;;M;M", "homepage": "https://victorwz.github.io;http://dong.li;https://sites.google.com/site/hcheng2site/Home;https://songhaoyu.github.io/;;https://sites.cs.ucsb.edu/~xyan/;https://www.microsoft.com/en-us/research/people/jfgao/;https://www.microsoft.com/en-us/research/people/fuwei/", "dblp": "98/6969;85/5090-4;09/5158-2;55/5450-2;65/622;y/XifengYan;92/5339;72/5870", "google_scholar": "UC2_V1MAAAAJ;wEfQgPgAAAAJ;https://scholar.google.com/citations?hl=en;CmNI8ecAAAAJ;NIewcxMAAAAJ;XZV2eogAAAAJ;https://scholar.google.com/citations?hl=en;G-V1VpwAAAAJ", "orcid": ";;0000-0001-7988-3149;;;;;", "linkedin": ";;;hysong-10021b150/;;;;", "or_profile": "~Weizhi_Wang1;~Li_Dong1;~Hao_Cheng4;~Haoyu_Song1;~Xiaodong_Liu1;~Xifeng_Yan1;~Jianfeng_Gao1;~Furu_Wei1", "aff": "University of California, Santa Barbara;Microsoft Research;Microsoft Research;Harbin Institute of Technology;Microsoft Research;UC Santa Barbara;Microsoft Research;Microsoft Research", "aff_domain": "ucsb.edu;microsoft.com;microsoft.com;ir.hit.edu.cn;microsoft.com;ucsb.edu;microsoft.com;microsoft.com", "position": "PhD student;Principal Researcher;Researcher;PhD student;Researcher;Full Professor;Principal Researcher;Distinguished Scientist", "bibtex": "@inproceedings{\nwang2023visuallyaugmented,\ntitle={Visually-Augmented Language Modeling},\nauthor={Weizhi Wang and Li Dong and Hao Cheng and Haoyu Song and Xiaodong Liu and Xifeng Yan and Jianfeng Gao and Furu Wei},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8IN-qLkl215}\n}", "github": "", "project": "", "reviewers": "j6aD;9KzG;f9V4;FCDh", "pdf_size": 2460392, "recommendation": "6;6;6;10", "confidence": "4;4;4;4", "correctness": "3;3;4;4", "technical_novelty": "3;3;3;4", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "94;41;77;285", "wc_strength_and_weaknesses": "443;212;191;454", "wc_clarity_quality_novelty_and_reproducibility": "55;27;28;114", "wc_summary_review": "36;47;71;22", "wc_review": "628;327;367;875", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "802;514;128;629", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 7.0, 1.7320508075688772 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 124.25, 94.76121305681983 ], "wc_strength_and_weaknesses_avg": [ 325.0, 123.78408621466654 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.0, 35.31996602489872 ], "wc_summary_review_avg": [ 44.0, 17.930421077041107 ], "wc_review_avg": [ 549.25, 220.75141562400003 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 518.25, 247.5342148067616 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6813624765517199122&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=8IN-qLkl215", "email": "ucsb.edu;microsoft.com;microsoft.com;ir.hit.edu.cn;microsoft.com;ucsb.edu;microsoft.com;microsoft.com", "author_num": 8, "aff_unique_index": "0;1;1;2;1;0;1;1", "aff_unique_norm": "University of California, Santa Barbara;Microsoft;Harbin Institute of Technology", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "https://www.ucsb.edu;https://www.microsoft.com/en-us/research;http://www.hit.edu.cn/", "aff_unique_abbr": "UCSB;MSR;HIT", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Santa Barbara;;Harbin", "aff_country_unique_index": "0;0;0;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Provable Memorization Capacity of Transformers", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11761", "id": "8JCg5xJCTPR", "poster": "", "openreview": "https://openreview.net/forum?id=8JCg5xJCTPR", "slides": "https://iclr.cc/virtual/2023/poster/11761", "video": "https://iclr.cc/virtual/2023/poster/11761", "author_site": "Junghwan Kim, Michelle Kim, Barzan Mozafari", "tldr": "We provide the memorization capacity of Transformer architecture in sequence input.", "abstract": "Quantifying memorization capacity is essential for understanding the expressiveness and generalizability of deep learning model architectures. However, the memorization capacity of the Transformer architecture has yet to be explored. In this work, we present the first study of the memorization capacity of the Transformer architecture. We prove that Transformers are capable of memorizing $N$ sequence-to-sequence mappings of length $n$ with $d$-dimensional input tokens using $\\tilde{O}(d + n + \\sqrt{nN})$ parameters. Our theory supports memorization both with and without permutation equivariance, utilizing positional encodings in the latter case. Building on our theory, we also analyze the memorization capacity of Transformers in the sequence classification and language modeling tasks. To verify these theoretical findings, we conduct experiments analyzing the memorization capacity of Transformers in the natural language domain.", "keywords": "Transformer;Expressivness;Memorization;Deep learning theory;contextual mapping;permutation equivariance", "primary_area": "", "supplementary_material": "", "author": "Junghwan Kim;Michelle Kim;Barzan Mozafari", "authorids": "~Junghwan_Kim1;~Michelle_Kim2;~Barzan_Mozafari1", "gender": "M;F;M", "homepage": ";https://cozymichelle.github.io/;https://web.eecs.umich.edu/~mozafari/", "dblp": ";;", "google_scholar": "msafJ3UAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Junghwan_Kim1;~Michelle_Kim2;~Barzan_Mozafari1", "aff": "University of Michigan - Ann Arbor;Michigan State University;University of Michigan", "aff_domain": "umich.edu;msu.edu;umich.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nkim2023provable,\ntitle={Provable Memorization Capacity of Transformers},\nauthor={Junghwan Kim and Michelle Kim and Barzan Mozafari},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8JCg5xJCTPR}\n}", "github": "", "project": "", "reviewers": "Nzx7;YzgV;XAtG;SL1m", "pdf_size": 543355, "recommendation": "5;8;8;8", "confidence": "3;3;3;2", "correctness": "3;4;3;4", "technical_novelty": "3;3;4;2", "empirical_novelty": "2;0;0;2", "wc_summary_paper": "50;162;99;62", "wc_strength_and_weaknesses": "552;214;54;82", "wc_clarity_quality_novelty_and_reproducibility": "36;59;70;51", "wc_summary_review": "65;36;135;15", "wc_review": "703;471;358;210", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1477;333;557;255", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.0, 1.0 ], "wc_summary_paper_avg": [ 93.25, 43.6083420918521 ], "wc_strength_and_weaknesses_avg": [ 225.5, 197.95138292015037 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.0, 12.389511693363866 ], "wc_summary_review_avg": [ 62.75, 45.334175850014084 ], "wc_review_avg": [ 435.5, 180.05068730776898 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 655.5, 487.0757128003818 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13223636017361904397&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=8JCg5xJCTPR", "email": "umich.edu;msu.edu;umich.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Michigan;Michigan State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.umich.edu;https://www.msu.edu", "aff_unique_abbr": "UM;MSU", "aff_campus_unique_index": "0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "8JEpyIgQS0t", "title": "Dynamical Signatures of Learning in Recurrent Networks", "track": "main", "status": "Reject", "tldr": "Self-organized learning of temporal sequences results in subcritical dynamics, which we propose is a signature of specialization. ", "abstract": "Recurrent neural networks (RNNs) are powerful computational tools that operate best near the edge of chaos, where small perturbations in neuronal firing are transmitted between neurons with minimal amplification or loss. In this article, we depart from the observation that both stimulus and noise can be seen as perturbations to the intrinsic dynamics of a recurrent network, however stimulus information must be reliably preserved, while noise must be discarded. First, we show that self-organizing recurrent networks (SORNs) that learn the spatio-temporal structure of their inputs, increase their recurrent memory by preferentially propagating the relevant stimulus-specific structured signals, while becoming more robust to random perturbation. We find that the computational advantages gained through self-supervised learning are accompanied by a shift from critical to ordered dynamics, and that this dynamical shift varies with the structure of the stimulus. Next, we show that SORNs with subcritical dynamics can outperform their random RNNs counterparts with critical dynamics, on a range of tasks, including a temporal MNIST and a sequential shape-rotation task. Interestingly, when a shape is rotated, both the invariant (shape) and the variant (motion direction) aspects of the stimulus sequence are improved through learning in the subcritical SORNs. We propose that the shift in criticality is a signature of specialization and we expect it to be found in all cases in which general-purpose recurrent networks acquire self-correcting properties by internalizing the statistical structure of their inputs.", "keywords": "RNNs;self-organization;criticality;spatio-temporal dynamics", "primary_area": "", "supplementary_material": "", "author": "Marius Schneider;Andreea Lazar", "authorids": "marius.schneider@esi-frankfurt.de;andreea.lazar@esi-frankfurt.de", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": ";", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@misc{\nschneider2023dynamical,\ntitle={Dynamical Signatures of Learning in Recurrent Networks},\nauthor={Marius Schneider and Andreea Lazar},\nyear={2023},\nurl={https://openreview.net/forum?id=8JEpyIgQS0t}\n}", "github": "", "project": "", "reviewers": "vp6e;CT5J;B89g;RfJy", "site": "https://openreview.net/forum?id=8JEpyIgQS0t", "pdf_size": 2446826, "recommendation": "3;3;3;5", "confidence": "5;4;4;4", "correctness": "3;2;3;2", "technical_novelty": "1;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "84;57;54;54", "wc_strength_and_weaknesses": "126;95;158;609", "wc_clarity_quality_novelty_and_reproducibility": "128;15;58;19", "wc_summary_review": "70;18;42;76", "wc_review": "408;185;312;758", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 62.25, 12.616952880945542 ], "wc_strength_and_weaknesses_avg": [ 247.0, 210.1844428115459 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.0, 45.37069538810266 ], "wc_summary_review_avg": [ 51.5, 23.21098877687032 ], "wc_review_avg": [ 415.75, 212.84075620049842 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2OVNLpQWX1oJ:scholar.google.com/&scioq=Dynamical+Signatures+of+Learning+in+Recurrent+Networks&hl=en&as_sdt=0,33", "gs_version_total": 0 }, { "id": "8JRQza2MaO4", "title": "Revitalize Region Feature for Democratizing Video-language Pre-training of Retrieval", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent dominant methods for video-language pre-training (VLP) learn transferable representations from the raw pixels in an end-to-end manner to achieve advanced performance on downstream video-language retrieval. Despite the impressive results, VLP research becomes extremely expensive with the need for massive data and a long training time, preventing further explorations. In this work, we revitalize region features of sparsely sampled video clips to significantly reduce both spatial and temporal visual redundancy towards democratizing VLP research at the same time achieving state-of-the-art results. Specifically, to fully explore the potential of region features, we introduce a novel bidirectional region-word alignment regularization that properly optimizes the fine-grained relations between regions and certain words in sentences, eliminating the domain/modality disconnections between pre-extracted region features and text. Extensive results of downstream video-language retrieval tasks on four datasets demonstrate the superiority of our method on both effectiveness and efficiency, e.g., our method achieves competing results with 80% fewer data and 85% less pre-training time compared to the most efficient VLP method so far.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/36427dbf797f7e78729c18f9649f56ae56c629dd.zip", "author": "Guanyu Cai;Yixiao Ge;Binjie Zhang;Jinpeng Wang;Rui Yan;Xudong Lin;Ying Shan;Lianghua He;Xiaohu Qie;Jianping Wu;Mike Zheng Shou", "authorids": "~Guanyu_Cai1;~Yixiao_Ge2;~Binjie_Zhang1;~Jinpeng_Wang2;~Rui_Yan5;~Xudong_Lin1;~Ying_Shan2;~Lianghua_He1;~Xiaohu_Qie1;~Jianping_Wu1;~Mike_Zheng_Shou1", "gender": "M;F;M;M;M;M;M;M;;M;", "homepage": ";https://geyixiao.com/;https://binjiezhang.github.io/;https://fingerrec.github.io/;https://ruiyan1995.github.io/;;;;;https://www.tsinghua.edu.cn/publish/csen/4623/2010/20101224194435414856631/20101224194435414856631_.html;http://www.columbia.edu/~zs2262/", "dblp": "218/6201;228/6649;275/3673;308/1365;;23/7723-3;68/5910;24/2365;62/1827;;284/0807", "google_scholar": "Des__9gAAAAJ;TtU74NAAAAAJ;https://scholar.google.com/citations?hl=en;UtaAVacAAAAJ;https://scholar.google.com.hk/citations?user=PWy5LfMAAAAJ;https://scholar.google.com.hk/citations?hl=en;4oXBp9UAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=Y-nqSYgAAAAJ;h1-3lSoAAAAJ", "orcid": ";;;;;;0000-0001-7673-8325;;;;", "linkedin": ";;;;;;YingShanProfile/;;;;", "or_profile": "~Guanyu_Cai1;~Yixiao_Ge2;~Binjie_Zhang1;~Jinpeng_Wang2;~Rui_Yan5;~Xudong_Lin1;~Ying_Shan2;~Lianghua_He1;~Xiaohu_Qie1;~Jianping_Wu1;~Zheng_Shou1", "aff": "Tongji University;Tencent;National University of Singapore;National University of Singapore;Nanjing University;Columbia University;Tencent PCG ARC Lab;Tongji University;Tencent;;National University of Singapore", "aff_domain": "tongji.edu.cn;tencent.com;nus.edu;nus.edu;nju.edu.cn;columbia.edu;arc.tencent.com;tongji.edu.cn;tencent.com;;nus.edu.sg", "position": "PhD student;Researcher;PhD student;PhD student;Researcher;PhD student;Director;Full Professor;VP;;Assistant Professor", "bibtex": "@misc{\ncai2023revitalize,\ntitle={Revitalize Region Feature for Democratizing Video-language Pre-training of Retrieval},\nauthor={Guanyu Cai and Yixiao Ge and Binjie Zhang and Jinpeng Wang and Rui Yan and Xudong Lin and Ying Shan and Lianghua He and Xiaohu Qie and Jianping Wu and Mike Zheng Shou},\nyear={2023},\nurl={https://openreview.net/forum?id=8JRQza2MaO4}\n}", "github": "", "project": "", "reviewers": "nWq8;yXUD;ymrc", "site": "https://openreview.net/forum?id=8JRQza2MaO4", "pdf_size": 1117434, "recommendation": "3;5;6", "confidence": "5;3;5", "correctness": "3;3;4", "technical_novelty": "2;2;2", "empirical_novelty": "2;0;2", "wc_summary_paper": "69;90;70", "wc_strength_and_weaknesses": "216;183;243", "wc_clarity_quality_novelty_and_reproducibility": "8;38;33", "wc_summary_review": "77;98;22", "wc_review": "370;409;368", "wc_reply_reviewers": "119;0;0", "wc_reply_authors": "1778;690;1173", "reply_reviewers": "1;0;0", "reply_authors": "5;2;2", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 76.33333333333333, 9.672412085697939 ], "wc_strength_and_weaknesses_avg": [ 214.0, 24.535688292770594 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.333333333333332, 13.123346456686352 ], "wc_summary_review_avg": [ 65.66666666666667, 32.04510709747884 ], "wc_review_avg": [ 382.3333333333333, 18.87385022252275 ], "wc_reply_reviewers_avg": [ 39.666666666666664, 56.09713797413277 ], "wc_reply_authors_avg": [ 1213.6666666666667, 445.1039828574392 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 11, 0 ], "corr_recommendation_confidence": -0.18898223650461363, "corr_recommendation_correctness": 0.7559289460184545, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=104728638321013330&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;2;3;4;1;0;1;2", "aff_unique_norm": "Tongji University;Tencent;National University of Singapore;Nanjing University;Columbia University", "aff_unique_dep": ";Tencent Holdings Limited;;;", "aff_unique_url": "https://www.tongji.edu.cn;https://www.tencent.com;https://www.nus.edu.sg;https://www.nju.edu.cn;https://www.columbia.edu", "aff_unique_abbr": "Tongji;Tencent;NUS;Nanjing U;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;2;0;0;0;1", "aff_country_unique": "China;Singapore;United States" }, { "title": "Unified Discrete Diffusion for Simultaneous Vision-Language Generation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11819", "id": "8JqINxA-2a", "poster": "/media/PosterPDFs/ICLR%202023/11819.png?t=1680763740.0453675", "openreview": "https://openreview.net/forum?id=8JqINxA-2a", "slides": "https://iclr.cc/virtual/2023/poster/11819", "video": "https://iclr.cc/virtual/2023/poster/11819", "author_site": "Minghui HU, Chuanxia Zheng, Zuopeng Yang, Tat-Jen Cham, Heliang Zheng, Chaoyue Wang, Dacheng Tao, Ponnuthurai Suganthan", "tldr": "We proposed Unified Discrete Denoising Diffusion model, which allows us to construct a joint vision-language probability distribution, leading to a capability of simultaneously generating cross-domain results. ", "abstract": "The recently developed discrete diffusion model performs extraordinarily well in generation tasks, especially in the text-to-image task, showing great potential for modeling multimodal signals. In this paper, we leverage these properties and present a unified multimodal generation model, which can perform text-based, image-based, and even vision-language simultaneous generation using a single model. Specifically, we unify the discrete diffusion process for multimodal signals by proposing a unified Markov transition matrix and a unified objective. Moreover, we design a multimodal mutual attention module to highlight the inter-modal linkages, which is vital for multimodal generation. Extensive experiments indicate that our proposed method can perform comparably to the state-of-the-art solutions in various generation tasks.", "keywords": "Multi-modal;Image generation;Image Caption.", "primary_area": "", "supplementary_material": "/attachment/e652e7b4146fdb09e40aadfab59701afccb484b9.zip", "author": "Minghui Hu;Chuanxia Zheng;Zuopeng Yang;Tat-Jen Cham;Heliang Zheng;Chaoyue Wang;Dacheng Tao;Ponnuthurai N. Suganthan", "authorids": "~Minghui_Hu1;~Chuanxia_Zheng1;~Zuopeng_Yang1;~Tat-Jen_Cham1;~Heliang_Zheng1;~Chaoyue_Wang2;~Dacheng_Tao1;~Ponnuthurai_N._Suganthan1", "gender": "M;M;M;M;M;M;;M", "homepage": "https://mhh0318.github.io/;http://www.chuanxiaz.com/;http://www.pami.sjtu.edu.cn/StudentDetail/54;https://personal.ntu.edu.sg/astjcham/;;;;https://github.com/P-N-Suganthan", "dblp": "163/9000-1;195/8988;254/9451.html;29/3808;208/4220;174/7172;;s/PNSuganthan", "google_scholar": "https://scholar.google.es/citations?user=9jfGj64AAAAJ;mvpE6bIAAAAJ;qbuqD10AAAAJ;Lx3X7W0AAAAJ;VRgciTQAAAAJ;https://scholar.google.com.au/citations?user=ioj1BycAAAAJ;;https://scholar.google.com.sg/citations?hl=en", "orcid": ";;0000-0003-2151-1777;0000-0001-5264-2572;;;;0000-0003-0901-5105", "linkedin": ";chuanxia-zheng-80a3b8110/;;tatjencham/;;;;", "or_profile": "~Minghui_Hu1;~Chuanxia_Zheng1;~Zuopeng_Yang1;~Tat-Jen_Cham1;~Heliang_Zheng1;~Chaoyue_Wang2;~Dacheng_Tao1;~Ponnuthurai_N._Suganthan1", "aff": "Nanyang Technological University;University of Oxford;Shanghai Jiaotong University;Nanyang Technological University;USTC;JD.com;;University of Qatar", "aff_domain": "ntu.edu.sg;ox.ac.uk;sjtu.edu;ntu.edu.sg;ustc.edu;jd.com;;qu.edu.qa", "position": "PhD student;Postdoc;PhD student;Associate Professor;Researcher;Researcher;;Full Professor", "bibtex": "@inproceedings{\nhu2023unified,\ntitle={Unified Discrete Diffusion for Simultaneous Vision-Language Generation},\nauthor={Minghui Hu and Chuanxia Zheng and Zuopeng Yang and Tat-Jen Cham and Heliang Zheng and Chaoyue Wang and Dacheng Tao and Ponnuthurai N. Suganthan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8JqINxA-2a}\n}", "github": "", "project": "", "reviewers": "1mAH;bEZ6;cr8c;tuqJ", "pdf_size": 17105657, "recommendation": "5;6;6;8", "confidence": "3;4;4;3", "correctness": "2;3;4;4", "technical_novelty": "2;3;2;4", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "83;44;185;102", "wc_strength_and_weaknesses": "263;176;165;285", "wc_clarity_quality_novelty_and_reproducibility": "12;45;2;26", "wc_summary_review": "4;24;28;33", "wc_review": "362;289;380;446", "wc_reply_reviewers": "107;92;38;0", "wc_reply_authors": "1096;1594;1196;295", "reply_reviewers": "1;2;1;0", "reply_authors": "3;4;3;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 103.5, 51.4902903468217 ], "wc_strength_and_weaknesses_avg": [ 222.25, 52.47558956314831 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 21.25, 16.145819892467525 ], "wc_summary_review_avg": [ 22.25, 11.008519428151999 ], "wc_review_avg": [ 369.25, 55.89890428264225 ], "wc_reply_reviewers_avg": [ 59.25, 42.763155870445296 ], "wc_reply_authors_avg": [ 1045.25, 471.5142495195665 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.2294157338705618, "corr_recommendation_correctness": 0.7608859102526822, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9159296411950159363&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=8JqINxA-2a", "email": "ntu.edu.sg;ox.ac.uk;sjtu.edu;ntu.edu.sg;ustc.edu;jd.com;;qu.edu.qa", "author_num": 8, "aff_unique_index": "0;1;2;0;3;4;5", "aff_unique_norm": "Nanyang Technological University;University of Oxford;Shanghai Jiao Tong University;University of Science and Technology of China;JD.com;Qatar University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.ox.ac.uk;https://www.sjtu.edu.cn;https://www.ustc.edu.cn;https://www.jd.com;https://www.qu.edu.qa", "aff_unique_abbr": "NTU;Oxford;SJTU;USTC;JD;QU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;2;2;3", "aff_country_unique": "Singapore;United Kingdom;China;Qatar" }, { "title": "Correlative Information Maximization Based Biologically Plausible Neural Networks for Correlated Source Separation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11170", "id": "8JsaP7j1cL0", "poster": "/media/PosterPDFs/ICLR%202023/11170.png?t=1681333943.50839", "openreview": "https://openreview.net/forum?id=8JsaP7j1cL0", "slides": "https://iclr.cc/virtual/2023/poster/11170", "video": "https://iclr.cc/virtual/2023/poster/11170", "author_site": "Bariscan Bozkurt, Ate\u015f \u0130sfendiyaro\u011flu, Cengiz Pehlevan, Alper Erdogan", "tldr": "This paper proposes biologically plausible neural networks for blind separation of correlated sources exploiting prior domain assumptions via an information maximization criterion.", "abstract": "The brain effortlessly extracts latent causes of stimuli, but how it does this at the network level remains unknown. Most prior attempts at this problem proposed neural networks that implement independent component analysis, which works under the limitation that latent elements are mutually independent. Here, we relax this limitation and propose a biologically plausible neural network that extracts correlated latent sources by exploiting information about their domains. To derive this network, we choose maximum correlative information transfer from inputs to outputs as the separation objective under the constraint that the outputs are restricted to their presumed sets. The online formulation of this optimization problem naturally leads to neural networks with local learning rules. Our framework incorporates infinitely many source domain choices and flexibly models complex latent structures. Choices of simplex or polytopic source domains result in networks with piecewise-linear activation functions. We provide numerical examples to demonstrate the superior correlated source separation capability for both synthetic and natural sources.", "keywords": "Biologically Plausible Neural Networks;Blind Correlated Source Separation;Correlative Information Maximization", "primary_area": "", "supplementary_material": "/attachment/18f42a894b9bd2a6ebaa0bb38f7e369c112057ef.zip", "author": "Bariscan Bozkurt;Ate\u015f \u0130sfendiyaro\u011flu;Cengiz Pehlevan;Alper Tunga Erdogan", "authorids": "~Bariscan_Bozkurt1;~Ate\u015f_\u0130sfendiyaro\u011flu1;~Cengiz_Pehlevan2;~Alper_Tunga_Erdogan1", "gender": "M;M;;M", "homepage": ";;https://pehlevan.seas.harvard.edu/;https://aspc.ku.edu.tr", "dblp": "321/6640;;145/3480;46/5196", "google_scholar": "https://scholar.google.com/citations?hl=en;;veDLTPEAAAAJ;CW8eBF8AAAAJ", "orcid": ";;0000-0001-9767-6063;0000-0003-0876-2897", "linkedin": "bar%C4%B1%C5%9Fcan-bozkurt-436a5610b/;ate%C5%9F-isfendiyaro%C4%9Flu-751139243/;;", "or_profile": "~Bariscan_Bozkurt1;~Ate\u015f_\u0130sfendiyaro\u011flu1;~Cengiz_Pehlevan2;~Alper_Tunga_Erdogan1", "aff": "Ko\u00e7 University;\u00dcsk\u00fcdar American Academy;School of Engineering and Applied Sciences, Harvard University;Ko\u00e7 University", "aff_domain": "ku.edu.tr;my.uaa.k12.tr;seas.harvard.edu;ku.edu.tr", "position": "MS student;Undergrad student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nbozkurt2023correlative,\ntitle={Correlative Information Maximization Based Biologically Plausible Neural Networks for Correlated Source Separation},\nauthor={Bariscan Bozkurt and Ate{\\c{s}} {\\.I}sfendiyaro{\\u{g}}lu and Cengiz Pehlevan and Alper Tunga Erdogan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8JsaP7j1cL0}\n}", "github": "", "project": "", "reviewers": "C2ij;hwJs;LCHG", "pdf_size": 18274896, "recommendation": "5;6;8", "confidence": "3;2;4", "correctness": "4;4;4", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "119;165;22", "wc_strength_and_weaknesses": "131;29;46", "wc_clarity_quality_novelty_and_reproducibility": "237;121;263", "wc_summary_review": "98;19;12", "wc_review": "585;334;343", "wc_reply_reviewers": "0;0;65", "wc_reply_authors": "2476;2169;526", "reply_reviewers": "0;0;1", "reply_authors": "5;5;2", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 102.0, 59.604250407724 ], "wc_strength_and_weaknesses_avg": [ 68.66666666666667, 44.61937795273359 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 207.0, 61.73059749157355 ], "wc_summary_review_avg": [ 43.0, 38.99572626156188 ], "wc_review_avg": [ 420.6666666666667, 116.25928875673644 ], "wc_reply_reviewers_avg": [ 21.666666666666668, 30.641293851417057 ], "wc_reply_authors_avg": [ 1723.6666666666667, 856.102148629992 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 4.0, 1.4142135623730951 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.6546536707079772, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=301569233832155819&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=8JsaP7j1cL0", "email": "ku.edu.tr;my.uaa.k12.tr;seas.harvard.edu;ku.edu.tr", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Ko\u00e7 University;\u00dcsk\u00fcdar American Academy;Harvard University", "aff_unique_dep": ";;School of Engineering and Applied Sciences", "aff_unique_url": "https://www.ku.edu.tr;https://www.usakamericanacademy.org;https://www.harvard.edu", "aff_unique_abbr": "Ko\u00e7;;Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "T\u00fcrkiye;United States" }, { "title": "NAGphormer: A Tokenized Graph Transformer for Node Classification in Large Graphs", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11258", "id": "8KYeilT3Ow", "poster": "/media/PosterPDFs/ICLR%202023/11258.png?t=1682649375.7757351", "openreview": "https://openreview.net/forum?id=8KYeilT3Ow", "slides": "https://iclr.cc/virtual/2023/poster/11258", "video": "https://iclr.cc/virtual/2023/poster/11258", "author_site": "Jinsong Chen, Kaiyuan Gao, Gaichao Li, Kun He", "tldr": "We propose a novel Graph Transformer that utilizes the neighborhood aggregation of multiple hops to build the input sequence of token vectors and thereby can handle large graphs efficiently.", "abstract": "The graph Transformer emerges as a new architecture and has shown superior performance on various graph mining tasks. In this work, we observe that existing graph Transformers treat nodes as independent tokens and construct a single long sequence composed of all node tokens so as to train the Transformer model, causing it hard to scale to large graphs due to the quadratic complexity on the number of nodes for the self-attention computation. To this end, we propose a Neighborhood Aggregation Graph Transformer (NAGphormer) that treats each node as a sequence containing a series of tokens constructed by our proposed Hop2Token module. For each node, Hop2Token aggregates the neighborhood features from different hops into different representations and thereby produces a sequence of token vectors as one input. In this way, NAGphormer could be trained in a mini-batch manner and thus could scale to large graphs. Moreover, we mathematically show that as compared to a category of advanced Graph Neural Networks (GNNs), the decoupled Graph Convolutional Network, NAGphormer could learn more informative node representations from the multi-hop neighborhoods. Extensive experiments on benchmark datasets from small to large are conducted to demonstrate that NAGphormer consistently outperforms existing graph Transformers and mainstream GNNs. Code is available at https://github.com/JHL-HUST/NAGphormer.", "keywords": "Graph Transformer;node classification;neighborhood aggregation;multi-hop neighborhood", "primary_area": "", "supplementary_material": "/attachment/c0208c9c1216eb9f9d92f08fb01b90f20bc38ea3.zip", "author": "Jinsong Chen;Kaiyuan Gao;Gaichao Li;Kun He", "authorids": "~Jinsong_Chen2;~Kaiyuan_Gao1;~Gaichao_Li1;~Kun_He1", "gender": "M;M;M;F", "homepage": "https://guangnianchenai.github.io/;https://kygao.github.io;https://arxiv.org/abs/2211.07970;http://faculty.hust.edu.cn/hekun/zh_CN/more/1411001/jsjjgd/index.htm", "dblp": "14/7450-2;180/6731;322/4040;59/1028-1", "google_scholar": "F470g5wAAAAJ;Or77MPQAAAAJ;;YTQnGJsAAAAJ", "orcid": "0000-0001-7588-6713;0009-0002-8862-8320;;0000-0001-7627-4604", "linkedin": ";;;", "or_profile": "~Jinsong_Chen2;~Kaiyuan_Gao1;~Gaichao_Li1;~Kun_He1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Sceince and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nchen2023nagphormer,\ntitle={{NAG}phormer: A Tokenized Graph Transformer for Node Classification in Large Graphs},\nauthor={Jinsong Chen and Kaiyuan Gao and Gaichao Li and Kun He},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8KYeilT3Ow}\n}", "github": "", "project": "", "reviewers": "zQFG;wvBc;b2CU;gZ4g", "pdf_size": 700614, "recommendation": "5;5;8;8", "confidence": "3;4;3;4", "correctness": "3;3;4;4", "technical_novelty": "3;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "68;70;42;26", "wc_strength_and_weaknesses": "182;202;138;279", "wc_clarity_quality_novelty_and_reproducibility": "57;105;14;22", "wc_summary_review": "37;59;18;27", "wc_review": "344;436;212;354", "wc_reply_reviewers": "0;0;0;13", "wc_reply_authors": "661;471;485;609", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "recommendation_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 51.5, 18.405162319305962 ], "wc_strength_and_weaknesses_avg": [ 200.25, 51.02144157116692 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.5, 35.892199709686224 ], "wc_summary_review_avg": [ 35.25, 15.270478054075452 ], "wc_review_avg": [ 336.5, 80.25428337478318 ], "wc_reply_reviewers_avg": [ 3.25, 5.629165124598851 ], "wc_reply_authors_avg": [ 556.5, 80.77592463104338 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 175, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1843081330295016257&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=8KYeilT3Ow", "email": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "8MneBPDxV9L", "title": "Finding the smallest tree in the forest: Monte Carlo Forest Search for UNSAT solving", "track": "main", "status": "Withdraw", "tldr": "We develop Monte Carlo Forest Search (MCFS), an algorithm for finding small search trees within a forest that retains the benefits of the best MCTS approaches.", "abstract": "Monte Carlo Tree Search (MCTS) is an effective approach for finding low-cost paths through any large combinatorial space that can naturally be structured as a search tree. However, some combinatorial problems do not have a natural interpretation as searches for a good path. For example, solving a CSP can be represented as a path (assign variables sequentially and check the solution); however, proving that no solution exists (via existing methods) requires enumerating multiple paths to build out a \u201cproof tree\u201d demonstrating that every possible variable assignment leads to a conflict. Rather than finding a good path (solution) within a tree, the search problem becomes searching for a small proof tree within a forest of candidate trees. In this paper we develop Monte Carlo Forest Search (MCFS), an algorithm for finding small search trees. Our method leverages the benefits of the best MCTS approaches and further introduces two key ideas. First, we estimate tree size via the linear (i.e., path-based) and unbiased approximation from Knuth (1975). Second, we query a strong solver at a user-defined depth rather than learning a policy across the whole tree, in order to (1) reduce the variance of our tree-size estimates and (2) focus our policy search on early decisions, which offer the greatest potential for reducing tree size. We evaluated our approach on the Boolean satisfiability (SAT) problem, and found that it matched or improved performance over a strong baseline on two well-known distributions (\\texttt{sgen}, \\texttt{random}). Notably, we improved walltime by 9\\% on \\texttt{sgen} over the \\texttt{kcnfs} solver and even further over the strongest UNSAT solver from the 2021 SAT competition.", "keywords": "Monte Carlo Tree Search;Reinforcement learning;Combinatorial optimization;SAT", "primary_area": "", "supplementary_material": "/attachment/b52cf5e9db4b854e5f204d47e8c71e1b35fcab1d.zip", "author": "Chris Cameron;Jason Hartford;Taylor Lundy;Tuan Truong;Alan Milligan;Rex Chen;Kevin Leyton-Brown", "authorids": "~Chris_Cameron1;~Jason_Hartford1;~Taylor_Lundy1;~Tuan_Truong1;alanmil@student.ubc.ca;rexc@cmu.edu;~Kevin_Leyton-Brown1", "gender": ";M;M;M;;;Not Specified", "homepage": "https://www.cs.ubc.ca/~cchris13/;https://jhartford.github.io;https://cs.ubc.ca/~tlundy;;;;http://cs.ubc.ca/~kevinlb", "dblp": "183/0895;191/6716;243/2600;;;;81/1149", "google_scholar": ";https://scholar.google.ca/citations?user=eBNK7SsAAAAJ;;;;;_4dnp0IAAAAJ", "orcid": ";;;;;;0000-0002-7644-5327", "linkedin": ";jasonhartford1/;taylor-lundy-8b915418b/;tuan-truong-620a5119b/;;;kevinleytonbrown/", "or_profile": "~Chris_Cameron1;~Jason_Hartford1;~Taylor_Lundy1;~Tuan_Truong1;alanmil@student.ubc.ca;rexc@cmu.edu;~Kevin_Leyton-Brown1", "aff": "University of British Columbia;Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal;University of British Columbia;University of British Columbia;;;University of British Columbia", "aff_domain": "cs.ubc.ca;mila.umontreal.ca;ubc.ca;cs.ubc.ca;;;ubc.ca", "position": "PhD student;Postdoc;PhD student;Undergrad student;;;Full Professor", "bibtex": "@misc{\ncameron2023finding,\ntitle={Finding the smallest tree in the forest: Monte Carlo Forest Search for {UNSAT} solving},\nauthor={Chris Cameron and Jason Hartford and Taylor Lundy and Tuan Truong and Alan Milligan and Rex Chen and Kevin Leyton-Brown},\nyear={2023},\nurl={https://openreview.net/forum?id=8MneBPDxV9L}\n}", "github": "", "project": "", "reviewers": "AVmx;2LN6;WMiN;ZSJE", "site": "https://openreview.net/forum?id=8MneBPDxV9L", "pdf_size": 1090612, "recommendation": "3;3;3;6", "confidence": "2;3;4;5", "correctness": "2;3;3;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "3;3;2;2", "wc_summary_paper": "106;135;131;195", "wc_strength_and_weaknesses": "284;278;279;119", "wc_clarity_quality_novelty_and_reproducibility": "195;62;108;53", "wc_summary_review": "58;108;103;33", "wc_review": "643;583;621;400", "wc_reply_reviewers": "0;0;53;0", "wc_reply_authors": "748;174;396;207", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 141.75, 32.69078616368839 ], "wc_strength_and_weaknesses_avg": [ 240.0, 69.89635183612947 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 104.5, 56.26055456534356 ], "wc_summary_review_avg": [ 75.5, 31.32491021535417 ], "wc_review_avg": [ 561.75, 95.82112241045812 ], "wc_reply_reviewers_avg": [ 13.25, 22.949673200287624 ], "wc_reply_authors_avg": [ 381.25, 228.05632527952386 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.7745966692414834, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AvI2ixGsClsJ:scholar.google.com/&scioq=Finding+the+smallest+tree+in+the+forest:+Monte+Carlo+Forest+Search+for+UNSAT+solving&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of British Columbia;University of Montreal", "aff_unique_dep": ";Montreal Institute for Learning Algorithms", "aff_unique_url": "https://www.ubc.ca;https://www.umontreal.ca", "aff_unique_abbr": "UBC;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Canada" }, { "id": "8NLta1E_BPR", "title": "Meta-Learning via Classifier(-free) Guidance", "track": "main", "status": "Reject", "tldr": "We develop a meta-learning method that uses classifier(-free) guidance from the generative modeling literature to generate zero-shot adapted network weights.", "abstract": "State-of-the-art meta-learning techniques do not optimize for zero-shot adaptation to unseen tasks, a setting in which humans excel. On the contrary, meta-learning algorithms learn hyperparameters and weight initializations that explicitly optimize for few-shot learning performance. In this work, we take inspiration from recent advances in generative modeling and language-conditioned image synthesis to propose meta-learning techniques that use natural language guidance to achieve higher zero-shot performance compared to the state-of-the-art. We do so by recasting the meta-learning problem as a multi-modal generative modeling problem: given a task, we consider its adapted neural network weights and its natural language description as equivalent multi-modal task representations. We first train an unconditional generative hypernetwork model to produce neural network weights; then we train a second \"guidance\" model that, given a natural language task description, traverses the hypernetwork latent space to find high-performance task-adapted weights in a zero-shot manner. We explore two alternative approaches for latent space guidance: \"HyperCLIP\"-based classifier guidance and a conditional Hypernetwork Latent Diffusion Model (\"HyperLDM\"), which we show to benefit from the classifier-free guidance technique common in image generation. Finally, we demonstrate that our approaches outperform existing meta-learning methods with zero-shot learning experiments on our Meta-VQA dataset, which we specifically constructed to reflect the multi-modal meta-learning setting.", "keywords": "deep leaning;meta learning;hypernetworks;generative models;classifier guidance;contrastive learning;clip;classifier-free guidance;latent diffusion;diffusion models", "primary_area": "", "supplementary_material": "/attachment/4e17f2ca9cf41bf8a456e46cd7349e38535b7fdd.zip", "author": "Elvis Nava;Seijin Kobayashi;Yifei Yin;Robert K. Katzschmann;Benjamin F Grewe", "authorids": "~Elvis_Nava1;~Seijin_Kobayashi1;~Yifei_Yin1;~Robert_K._Katzschmann1;~Benjamin_F_Grewe1", "gender": "M;;;;M", "homepage": "https://www.elvisnava.com/;;;;https://www.ini.uzh.ch/en/institute/people?uname=bgrewe", "dblp": "303/0379;;;;", "google_scholar": "c5Z7qwgAAAAJ;;;;https://scholar.google.de/citations?user=ZA-1rh8AAAAJ", "orcid": ";;;;0000-0001-8560-2120", "linkedin": "elvisnava/;;;;", "or_profile": "~Elvis_Nava1;~Seijin_Kobayashi1;~Yifei_Yin1;~Robert_K._Katzschmann1;~Benjamin_F_Grewe1", "aff": "Swiss Federal Institute of Technology;;;;ETHZ - ETH Zurich", "aff_domain": "ethz.ch;;;;ethz.ch", "position": "PhD student;;;;Assistant Professor", "bibtex": "@misc{\nnava2023metalearning,\ntitle={Meta-Learning via Classifier(-free) Guidance},\nauthor={Elvis Nava and Seijin Kobayashi and Yifei Yin and Robert K. Katzschmann and Benjamin F Grewe},\nyear={2023},\nurl={https://openreview.net/forum?id=8NLta1E_BPR}\n}", "github": "", "project": "", "reviewers": "5YoJ;mvke;TMAm;HRh8", "site": "https://openreview.net/forum?id=8NLta1E_BPR", "pdf_size": 1631705, "recommendation": "3;3;3;6", "confidence": "4;4;4;4", "correctness": "3;2;2;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "1;2;2;4", "wc_summary_paper": "43;117;190;100", "wc_strength_and_weaknesses": "170;412;222;198", "wc_clarity_quality_novelty_and_reproducibility": "68;2;129;57", "wc_summary_review": "103;22;67;34", "wc_review": "384;553;608;389", "wc_reply_reviewers": "0;0;91;0", "wc_reply_authors": "860;1432;1041;412", "reply_reviewers": "0;0;1;0", "reply_authors": "2;3;2;2", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_paper_avg": [ 112.5, 52.47142079265626 ], "wc_strength_and_weaknesses_avg": [ 250.5, 95.04078072069905 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.0, 45.09434554353794 ], "wc_summary_review_avg": [ 56.5, 31.5 ], "wc_review_avg": [ 483.5, 98.94569217505126 ], "wc_reply_reviewers_avg": [ 22.75, 39.40415587219196 ], "wc_reply_authors_avg": [ 936.25, 366.5353836943986 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16387882098285992982&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1", "aff_unique_norm": "Swiss Federal Institute of Technology;ETH Zurich", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "id": "8OFAtZzIf7T", "title": "Logit Margin Matters: Improving Transferable Targeted Adversarial Attack by Logit Calibration", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Previous works have extensively studied the transferability of adversarial samples in untargeted black-box scenarios. However, it still remains challenging to craft the targeted adversarial examples with higher transferability than non-targeted ones. Recent studies reveal that the traditional Cross-Entropy (CE) loss function is insufficient to learn transferable targeted perturbations due to the issue of vanishing gradient. In this work, we provide a comprehensive investigation of the CE loss function and find that the logit margin between the targeted and untargeted classes will quickly obtain saturation in CE, which largely limits the transferability. Therefore, in this paper, we devote to the goal of enlarging logit margins and propose two simple and effective logit calibration methods, which are achieved by downscaling the logits with a temperature factor and an adaptive margin, respectively.\nBoth of them can effectively encourage the optimization to produce larger logit margins and lead to higher transferability. Besides, we show that minimizing the cosine distance between the adversarial examples and the classifier of the target class can further improve the transferability, which is benefited from downscaling logits via L2-normalization.\nExperiments conducted on the ImageNet dataset validate the effectiveness of the proposed methods, which outperform the state-of-the-art methods in black-box targeted attacks. The source code is available at \\href{https://anonymous.4open.science/r/Target-Attack-72EB/README.md}{Link}.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/a1fe0847fe79d3217717437a89e9cb554bfdbe6a.zip", "author": "Juanjuan Weng;Zhiming Luo;Zhun Zhong;Shaozi Li;Nicu Sebe", "authorids": "~Juanjuan_Weng1;~Zhiming_Luo1;~Zhun_Zhong1;~Shaozi_Li1;~Nicu_Sebe1", "gender": "F;M;M;M;M", "homepage": "https://github.com/WJJLL;https://sites.google.com/view/zhimingluo;http://zhunzhong.site;;http://disi.unitn.it/~sebe/", "dblp": "342/2875;75/9709;32/6525;51/2064;20/3519", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;RdRCIIYAAAAJ;nZizkQ0AAAAJ;CT8_b_QAAAAJ;https://scholar.google.it/citations?user=stFCYOAAAAAJ", "orcid": "0000-0003-0825-2272;;;;0000-0002-6597-7248", "linkedin": ";;;;", "or_profile": "~Juanjuan_Weng1;~Zhiming_Luo1;~Zhun_Zhong1;~Shaozi_Li1;~Nicu_Sebe1", "aff": "Xiamen University;Xiamen University;University of Trento;Xiamen University;University of Trento", "aff_domain": "xmu.edu.cn;xmu.edu.cn;unitn.it;xmu.edu.cn;unitn.it", "position": "PhD student;Associate Professor;Assistant Professor;Full Professor;Full Professor", "bibtex": "@misc{\nweng2023logit,\ntitle={Logit Margin Matters: Improving Transferable Targeted Adversarial Attack by Logit Calibration},\nauthor={Juanjuan Weng and Zhiming Luo and Zhun Zhong and Shaozi Li and Nicu Sebe},\nyear={2023},\nurl={https://openreview.net/forum?id=8OFAtZzIf7T}\n}", "github": "", "project": "", "reviewers": "TBJW;gqom;4ThG;f89A", "site": "https://openreview.net/forum?id=8OFAtZzIf7T", "pdf_size": 455492, "recommendation": "5;5;5;5", "confidence": "3;3;3;3", "correctness": "3;3;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "128;80;39;70", "wc_strength_and_weaknesses": "227;337;161;275", "wc_clarity_quality_novelty_and_reproducibility": "14;46;59;24", "wc_summary_review": "16;40;45;61", "wc_review": "385;503;304;430", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 79.25, 31.948200262299597 ], "wc_strength_and_weaknesses_avg": [ 250.0, 64.50581369148055 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.75, 17.725334975678173 ], "wc_summary_review_avg": [ 40.5, 16.132265804901678 ], "wc_review_avg": [ 405.5, 72.16127770487438 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14127407136363892065&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;1;0;1", "aff_unique_norm": "Xiamen University;University of Trento", "aff_unique_dep": ";", "aff_unique_url": "https://www.xmu.edu.cn;https://www.unitn.it", "aff_unique_abbr": "XMU;UniTN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "China;Italy" }, { "title": "Autoencoders as Cross-Modal Teachers: Can Pretrained 2D Image Transformers Help 3D Representation Learning?", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11988", "id": "8Oun8ZUVe8N", "poster": "/media/PosterPDFs/ICLR%202023/11988.png?t=1681120585.2747166", "openreview": "https://openreview.net/forum?id=8Oun8ZUVe8N", "slides": "https://iclr.cc/virtual/2023/poster/11988", "video": "https://iclr.cc/virtual/2023/poster/11988", "author_site": "Runpei Dong, Zekun Qi, Linfeng Zhang, Junbo Zhang, Jianjian Sun, Zheng Ge, Li Yi, Kaisheng Ma", "tldr": "This paper shows that pretrained 2D image Transformers can help self-supervised 3D representation learning by training autoencoders as cross-modal teachers.", "abstract": "The success of deep learning heavily relies on large-scale data with comprehensive labels, which is more expensive and time-consuming to fetch in 3D compared to 2D images or natural languages. This promotes the potential of utilizing models pretrained with data more than 3D as teachers for cross-modal knowledge transferring. In this paper, we revisit masked modeling in a unified fashion of knowledge distillation, and we show that foundational Transformers pretrained with 2D images or natural languages can help self-supervised 3D representation learning through training Autoencoders as Cross-Modal Teachers (ACT). The pretrained Transformers are transferred as cross-modal 3D teachers using discrete variational autoencoding self-supervision, during which the Transformers are frozen with prompt tuning for better knowledge inheritance. The latent features encoded by the 3D teachers are used as the target of masked point modeling, wherein the dark knowledge is distilled to the 3D Transformer students as foundational geometry understanding. Our ACT pretrained 3D learner achieves state-of-the-art generalization capacity across various downstream benchmarks, e.g., 88.21% overall accuracy on ScanObjectNN. Codes have been released at https://github.com/RunpeiDong/ACT.", "keywords": "Representation Learning;Cross-Modal Learning;3D Point Clouds", "primary_area": "", "supplementary_material": "/attachment/d42606216832757038625654a846aae99e3352b6.zip", "author": "Runpei Dong;Zekun Qi;Linfeng Zhang;Junbo Zhang;Jianjian Sun;Zheng Ge;Li Yi;Kaisheng Ma", "authorids": "~Runpei_Dong1;~Zekun_Qi2;~Linfeng_Zhang2;~Junbo_Zhang3;~Jianjian_Sun1;~Zheng_Ge1;~Li_Yi2;~Kaisheng_Ma1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://runpeidong.web.illinois.edu/;https://qizekun.github.io/;http://www.zhanglinfeng.tech/;;;;https://ericyi.github.io/;http://group.iiis.tsinghua.edu.cn/~maks/index.html", "dblp": "298/8727;182/3981;93/488-1;75/8471;322/9274;231/1007;26/4239-1;133/4053.html", "google_scholar": "z2SoXI8AAAAJ;ap8yc3oAAAAJ;AK9VF30AAAAJ;https://scholar.google.cz/citations?user=rSP0pGQAAAAJ;https://scholar.google.com/citations?hl=en;hJ-VrrIAAAAJ;UyZL660AAAAJ;VtDpVoEAAAAJ", "orcid": ";;0000-0002-3341-183X;;;;;0000-0001-9226-3366", "linkedin": ";;;;;;;", "or_profile": "~Runpei_Dong1;~Zekun_Qi2;~Linfeng_Zhang2;~Junbo_Zhang3;~Jianjian_Sun1;~Zheng_Ge1;~Li_Yi2;~Kaisheng_Ma1", "aff": ";Xi'an Jiaotong University;Tsinghua University;Tsinghua University;Megvii Technology Inc.;Megvii Technology Inc.;Tsinghua University;", "aff_domain": ";xjtu.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;megvii.com;megvii.com;tsinghua.edu.cn;", "position": ";MS student;PhD student;PhD student;Researcher;Researcher;Assistant Professor;", "bibtex": "@inproceedings{\ndong2023autoencoders,\ntitle={Autoencoders as Cross-Modal Teachers: Can Pretrained 2D Image Transformers Help 3D Representation Learning?},\nauthor={Runpei Dong and Zekun Qi and Linfeng Zhang and Junbo Zhang and Jianjian Sun and Zheng Ge and Li Yi and Kaisheng Ma},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8Oun8ZUVe8N}\n}", "github": "", "project": "", "reviewers": "Dx8R;FxNn;8fVd;8SEp", "pdf_size": 4604994, "recommendation": "5;6;8;10", "confidence": "5;5;4;4", "correctness": "2;3;4;4", "technical_novelty": "3;3;4;4", "empirical_novelty": "2;3;4;4", "wc_summary_paper": "99;60;93;41", "wc_strength_and_weaknesses": "352;193;94;88", "wc_clarity_quality_novelty_and_reproducibility": "24;65;6;56", "wc_summary_review": "20;21;10;14", "wc_review": "495;339;203;199", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "2521;2057;465;46", "reply_reviewers": "0;0;0;0", "reply_authors": "4;3;1;1", "recommendation_avg": [ 7.25, 1.920286436967152 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 73.25, 23.81569860407206 ], "wc_strength_and_weaknesses_avg": [ 181.75, 106.7716605659011 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.75, 23.836683913665507 ], "wc_summary_review_avg": [ 16.25, 4.493050188902857 ], "wc_review_avg": [ 309.0, 121.27654348636426 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1272.25, 1040.4987686201266 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.911322376865767, "corr_recommendation_correctness": 0.9028289727756884, "gs_citation": 106, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16657130336390881372&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=8Oun8ZUVe8N", "email": ";xjtu.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;megvii.com;megvii.com;tsinghua.edu.cn;", "author_num": 8, "aff_unique_index": "0;1;1;2;2;1", "aff_unique_norm": "Xi'an Jiao Tong University;Tsinghua University;Megvii Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.tsinghua.edu.cn;https://www.megvii.com", "aff_unique_abbr": "XJTU;THU;Megvii", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "8RExG-EKC22", "title": "Adaptive IMLE for Few-shot Image Synthesis", "track": "main", "status": "Reject", "tldr": "", "abstract": "Despite their success on large datasets, GANs have been difficult to apply in the few-shot setting, where only a limited number of training examples are provided. Due to mode collapse, GANs tend to ignore some training examples, causing overfitting to a subset of the training dataset, which is small to begin with. A recent method called Implicit Maximum Likelihood Estimation (IMLE) is an alternative to GAN that tries to address this issue. It uses the same kind of generators as GANs but trains it with a different objective that encourages mode coverage. However, the theoretical guarantees of IMLE hold under restrictive conditions, such as the requirement for the optimal likelihood at all data points to be the same. In this paper, we present a more generalized formulation of IMLE which includes the original formulation as a special case, and we prove that the theoretical guarantees hold under weaker conditions. Using this generalized formulation, we further derive a new algorithm, which we dub Adaptive IMLE, which can adapt to the varying difficulty of different training examples. We demonstrate on multiple few-shot image synthesis datasets that our method significantly outperforms existing methods. ", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/c079ad2147e30093483137964a3999527ff80729.zip", "author": "Mehran Aghabozorgi;Shichong Peng;Ke Li", "authorids": "~Mehran_Aghabozorgi1;~Shichong_Peng1;~Ke_Li1", "gender": "M;M;M", "homepage": ";https://sites.google.com/view/niopeng/home;http://www.sfu.ca/~keli/", "dblp": ";221/4790;75/6627-11", "google_scholar": "sKWTHpsAAAAJ;;vQc8tI4AAAAJ", "orcid": ";;", "linkedin": "mehran-aghabozorg/;;", "or_profile": "~Mehran_Aghabozorgi1;~Shichong_Peng1;~Ke_Li1", "aff": "Computing Science, Simon Fraser University;Simon Fraser University;Simon Fraser University", "aff_domain": "cs.sfu.ca;sfu.ca;sfu.ca", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\naghabozorgi2023adaptive,\ntitle={Adaptive {IMLE} for Few-shot Image Synthesis},\nauthor={Mehran Aghabozorgi and Shichong Peng and Ke Li},\nyear={2023},\nurl={https://openreview.net/forum?id=8RExG-EKC22}\n}", "github": "", "project": "", "reviewers": "jG1N;uhYC;xM5B;owTn;Me9N", "site": "https://openreview.net/forum?id=8RExG-EKC22", "pdf_size": 16662786, "recommendation": "3;6;6;6;6", "confidence": "4;4;3;4;3", "correctness": "4;3;3;3;3", "technical_novelty": "2;4;3;2;2", "empirical_novelty": "0;0;3;2;3", "wc_summary_paper": "102;40;74;65;133", "wc_strength_and_weaknesses": "254;189;251;390;113", "wc_clarity_quality_novelty_and_reproducibility": "22;19;37;85;347", "wc_summary_review": "10;33;36;55;49", "wc_review": "388;281;398;595;642", "wc_reply_reviewers": "178;43;101;110;55", "wc_reply_authors": "984;364;276;666;462", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "recommendation_avg": [ 5.4, 1.2 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 2.6, 0.8 ], "empirical_novelty_avg": [ 1.6, 1.3564659966250538 ], "wc_summary_paper_avg": [ 82.8, 31.98374587192688 ], "wc_strength_and_weaknesses_avg": [ 239.4, 91.1539357351069 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 102.0, 124.7621737547082 ], "wc_summary_review_avg": [ 36.6, 15.576905982896603 ], "wc_review_avg": [ 460.8, 135.95205037070974 ], "wc_reply_reviewers_avg": [ 97.4, 47.8020920044301 ], "wc_reply_authors_avg": [ 550.4, 252.6765521373125 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.4082482904638631, "corr_recommendation_correctness": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4G40PJjICv0J:scholar.google.com/&scioq=Adaptive+IMLE+for+Few-shot+Image+Synthesis&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Simon Fraser University", "aff_unique_dep": "Computing Science", "aff_unique_url": "https://www.sfu.ca", "aff_unique_abbr": "SFU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Progressively Compressed Auto-Encoder for Self-supervised Representation Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12083", "id": "8T4qmZbTkW7", "poster": "/media/PosterPDFs/ICLR%202023/12083.png?t=1682137877.100378", "openreview": "https://openreview.net/forum?id=8T4qmZbTkW7", "slides": "https://iclr.cc/virtual/2023/poster/12083", "video": "https://iclr.cc/virtual/2023/poster/12083", "author_site": "Li Jin, Yaoming wang, XIAOPENG ZHANG, Yabo Chen, Dongsheng Jiang, Wenrui Dai, Chenglin Li, Hongkai Xiong, Qi Tian", "tldr": "", "abstract": "As a typical self-supervised learning strategy, Masked Image Modeling (MIM) is driven by recovering all masked patches from visible ones. However, patches from the same image are highly correlated and it is redundant to reconstruct all the masked patches. We find that this redundancy is neglected by existing MIM based methods and causes non-negligible overheads in computation that do not necessarily benefit self-supervised representation. In this paper, we present a novel approach named PCAE, short for Progressively Compressed AutoEncoder, to address the redundant reconstruction issue by progressively compacting tokens and only retaining necessary information for forward propagation and reconstruction. In particular, we identify those redundant tokens in an image via a simple yet effective similarity metric between each token with the mean of the token sequence. Those redundant tokens that other ones can probably represent are progressively dropped accordingly during the forward propagation, and importantly, we only focus on reconstructing these retained tokens. As a result, we are able to achieve a better trade-off between performance and efficiency for pre-training. Besides, benefitting from the flexible strategy, PCAE can be also directly employed for downstream fine-tuning tasks and enable scalable deployment. Experiments show that PCAE achieves comparable performance to MAE with only 1/8 GPU days. The code is available at https://github.com/caddyless/PCAE/.", "keywords": "MIM;Transformer;self-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Jin Li;Yaoming Wang;XIAOPENG ZHANG;Yabo Chen;Dongsheng Jiang;Wenrui Dai;Chenglin Li;Hongkai Xiong;Qi Tian", "authorids": "~Jin_Li10;~Yaoming_Wang1;~XIAOPENG_ZHANG7;~Yabo_Chen1;~Dongsheng_Jiang2;~Wenrui_Dai1;~Chenglin_Li2;~Hongkai_Xiong1;~Qi_Tian3", "gender": ";;M;M;;;M;M;M", "homepage": ";;https://sites.google.com/site/zxphistory/;;;;https://min.sjtu.edu.cn/En/FacultyShow/4?Vid=17;http://min.sjtu.edu.cn;https://www.qitian1987.com/index.html", "dblp": ";;;96/8624.html;;16/5135.html;;21/3569;78/1467-1.html", "google_scholar": ";;Ud6aBAcAAAAJ;6aHx1rgAAAAJ;;Xg8MhyAAAAAJ;ltW2JMcAAAAJ;bB16iN4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;;0000-0003-4552-0029;0000-0002-7252-5047", "linkedin": ";;;;;;;;", "or_profile": "~Jin_Li10;~Yaoming_Wang1;~XIAOPENG_ZHANG7;~Yabo_Chen1;~Dongsheng_Jiang2;~Wenrui_Dai1;~Chenglin_Li2;~Hongkai_Xiong1;~Qi_Tian3", "aff": ";;Huawei Technologies Ltd.;Shanghai Jiaotong University;;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Huawei Technologies Ltd.", "aff_domain": ";;huawei.com;sjtu.edu.cn;;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;huawei.com", "position": ";;Principal Researcher;PhD student;;Associate Professor;Full Professor;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nli2023progressively,\ntitle={Progressively Compressed Auto-Encoder for Self-supervised Representation Learning},\nauthor={Jin Li and Yaoming Wang and XIAOPENG ZHANG and Yabo Chen and Dongsheng Jiang and Wenrui Dai and Chenglin Li and Hongkai Xiong and Qi Tian},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8T4qmZbTkW7}\n}", "github": "", "project": "", "reviewers": "B38C;qAL9;Z7NJ;MSzM;u9Ph;8Ziy", "pdf_size": 3203588, "recommendation": "5;6;6;6;6;8", "confidence": "4;3;4;3;4;4", "correctness": "3;4;3;4;3;3", "technical_novelty": "2;3;3;2;2;3", "empirical_novelty": "2;3;2;2;2;3", "wc_summary_paper": "82;84;43;50;111;104", "wc_strength_and_weaknesses": "271;258;353;191;384;383", "wc_clarity_quality_novelty_and_reproducibility": "5;31;94;39;46;117", "wc_summary_review": "5;60;26;62;97;68", "wc_review": "363;433;516;342;638;672", "wc_reply_reviewers": "0;0;94;0;310;0", "wc_reply_authors": "757;421;1254;506;1523;874", "reply_reviewers": "0;0;1;0;2;0", "reply_authors": "2;2;3;1;4;2", "recommendation_avg": [ 6.166666666666667, 0.8975274678557507 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 79.0, 25.232254490367417 ], "wc_strength_and_weaknesses_avg": [ 306.6666666666667, 71.8486062649946 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.333333333333336, 38.247730506382844 ], "wc_summary_review_avg": [ 53.0, 29.78814081699852 ], "wc_review_avg": [ 494.0, 127.07609268990502 ], "wc_reply_reviewers_avg": [ 67.33333333333333, 113.82247385975916 ], "wc_reply_authors_avg": [ 889.1666666666666, 391.2677755990419 ], "reply_reviewers_avg": [ 0.5, 0.7637626158259734 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.13130643285972254, "corr_recommendation_correctness": -0.13130643285972254, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9043701951140659378&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=8T4qmZbTkW7", "email": ";;huawei.com;sjtu.edu.cn;;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;huawei.com", "author_num": 9, "aff_unique_index": "0;1;1;1;1;0", "aff_unique_norm": "Huawei;Shanghai Jiao Tong University", "aff_unique_dep": "Huawei Technologies;", "aff_unique_url": "https://www.huawei.com;https://www.sjtu.edu.cn", "aff_unique_abbr": "Huawei;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "8TKFt2x3Sx", "title": "Injecting Image Details into CLIP's Feature Space", "track": "main", "status": "Reject", "tldr": "We propose a framework, including a model-agnostic complete cover scheme to obtain image patches, a fusing model, the corresponding query proxy loss and a new text-image retrieval benchmark.", "abstract": "Although CLIP-like Visual Language Models provide a functional joint feature space for image and text, due to the limitation of the CILP-like model's image input size (e.g., 224), subtle details are lost in the feature representation if we input high-resolution images (e.g., 2240). In this work, we introduce an efficient framework that can produce a single feature representation for a high-resolution image that injects image details and shares the same semantic space as the original CLIP. In the framework, we train a feature fusing model based on CLIP features extracted from a carefully designed image patch method (Complete Cover) that can cover objects of any scale, weakly supervised by image-agnostic class prompted queries. We validate our framework by retrieving images from class prompted queries on the existing real-world and synthetic datasets, showing significant performance improvement on these tasks. Furthermore, to fully demonstrate our framework's detail retrieval ability, we construct a CLEVR-like synthetic dataset called CLVER-DS, which is fully annotated and has a controllable object scale. ", "keywords": "Text-Based Information Retrieval;Fine-Detail;CLIP;Single Feature;Detail Compression;Complete Cover;Feature Space Alignment;Self-Supervised", "primary_area": "", "supplementary_material": "", "author": "Zilun Zhang;Cuifeng Shen;Shen Yuan;Huixin Xiong;Xinyu Zhou", "authorids": "~Zilun_Zhang2;~Cuifeng_Shen1;~Shen_Yuan2;~Huixin_Xiong1;~Xinyu_Zhou1", "gender": "M;M;M;;M", "homepage": ";;https://github.com/shen453011331;https://github.com/xionghuixin;", "dblp": "262/0557;;;;27/3481-4", "google_scholar": ";;;;Jv4LCj8AAAAJ", "orcid": ";0000-0001-5292-9784;;;", "linkedin": "zilun-zhang-a2537113b/;;;;xinyu-zhou-59aa3462/", "or_profile": "~Zilun_Zhang2;~Cuifeng_Shen1;~Shen_Yuan2;~Huixin_Xiong1;~Xinyu_Zhou1", "aff": "Binjiang Insititute;Peking University;Megvii Technology Inc.;Megvii Technology Inc.;Megvii Technology Inc.", "aff_domain": "zju-bj.com;pku.edu.cn;megvii.com;megvii.com;megvii.com", "position": "PhD student;MS student;Researcher;Researcher;Researcher", "bibtex": "@misc{\nzhang2023injecting,\ntitle={Injecting Image Details into {CLIP}'s Feature Space},\nauthor={Zilun Zhang and Cuifeng Shen and Shen Yuan and Huixin Xiong and Xinyu Zhou},\nyear={2023},\nurl={https://openreview.net/forum?id=8TKFt2x3Sx}\n}", "github": "", "project": "", "reviewers": "9bd7;hJAs;qWGQ;PHZe", "site": "https://openreview.net/forum?id=8TKFt2x3Sx", "pdf_size": 3753099, "recommendation": "3;3;3;3", "confidence": "4;3;4;4", "correctness": "2;2;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "1;3;2;2", "wc_summary_paper": "101;237;70;107", "wc_strength_and_weaknesses": "366;354;263;119", "wc_clarity_quality_novelty_and_reproducibility": "52;39;56;22", "wc_summary_review": "86;48;32;37", "wc_review": "605;678;421;285", "wc_reply_reviewers": "41;57;45;17", "wc_reply_authors": "783;397;326;313", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 128.75, 64.05612773185716 ], "wc_strength_and_weaknesses_avg": [ 275.5, 98.74335420675156 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.25, 13.273563952458284 ], "wc_summary_review_avg": [ 50.75, 21.158627082114755 ], "wc_review_avg": [ 497.25, 154.22771313872224 ], "wc_reply_reviewers_avg": [ 40.0, 14.52583904633395 ], "wc_reply_authors_avg": [ 454.75, 192.19309951192315 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ol8De0eJ4O0J:scholar.google.com/&scioq=Injecting+Image+Details+into+CLIP%27s+Feature+Space&hl=en&as_sdt=0,10", "gs_version_total": 3, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "Binjiang Institute;Peking University;Megvii Technology", "aff_unique_dep": ";;", "aff_unique_url": ";http://www.pku.edu.cn;https://www.megvii.com", "aff_unique_abbr": ";Peking U;Megvii", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "8TjyUm_XarL", "title": "Domain Generalization in Regression", "track": "main", "status": "Withdraw", "tldr": "We propose a new domain generalization setting in regression scenario and a weighted meta-learning solution.", "abstract": "In the context of classification, \\textit{domain generalization} (DG) aims to predict the labels of unseen target-domain data only using labeled source-domain data, where the source and target domains usually share \\textit{the same label set}. However, in the context of regression, DG is not well studied in the literature, and the main reason is that ranges of response variable in both domains are often \\textit{different}, even disjoint under some extreme conditions. In this paper, we study a new problem setting: \\textit{domain generalization in regression} (DGR), and propose a weighted meta-learning strategy to get optimal meta-initialization across disjoint domains to help address the DGR problem. The motivation is that when the meta-model performs well on one domain, we hope such a model also performs well in other related domains. To measure the relatedness regarding domains in the context of regression, we use the feature discrepancy in meta-space to calculate the discrepancy between any two domains and treat the discrepancy as the weight of a meta-training task in the meta-learning framework. The extensive regression experiments on standard domain generalization benchmark demonstrate the superiority of the proposed method.", "keywords": "domain generalization;regression;meta-learning", "primary_area": "", "supplementary_material": "", "author": "Ning Ma;Feng Liu;Haishuai Wang;Sheng Zhou;Jiajun Bu;Bo Han", "authorids": "~Ning_Ma1;~Feng_Liu2;~Haishuai_Wang2;~Sheng_Zhou1;~Jiajun_Bu1;~Bo_Han1", "gender": "M;M;M;M;M;M", "homepage": "https://ningma-ai.github.io/;https://fengliu90.github.io/index.html;https://www.linkedin.com/in/haishuai-wang-b5241775/;https://zhoushengisnoob.github.io/;https://person.zju.edu.cn/bjj;https://bhanml.github.io/", "dblp": "60/3634/;77/1318-3;163/0767;34/4858-4.html;50/3147;241/0472-3", "google_scholar": "ZjX-TDIAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.co.jp/citations?user=Ss76nMwAAAAJ;OgZP2okAAAAJ;nTNjqHwAAAAJ", "orcid": ";0000-0002-5005-9129;0000-0003-1617-0920;0000-0003-3645-1041;0000-0002-1097-2044;", "linkedin": ";alexfengliu;;;;", "or_profile": "~Ning_Ma1;~Feng_Liu2;~Haishuai_Wang2;~Sheng_Zhou1;~Jiajun_Bu1;~bo_han2", "aff": "Zhejiang University;University of Melbourne;Zhejiang University;Zhejiang University;Zhejiang University;RIKEN", "aff_domain": "zju.edu.cn;unimelb.edu.au;zju.edu.cn;zju.edu.cn;zju.edu.cn;riken.jp", "position": "PhD student;Assistant Professor;Research Professor;Associate Professor;Full Professor;Adjunct Scientist", "bibtex": "@misc{\nma2023domain,\ntitle={Domain Generalization in Regression},\nauthor={Ning Ma and Feng Liu and Haishuai Wang and Sheng Zhou and Jiajun Bu and Bo Han},\nyear={2023},\nurl={https://openreview.net/forum?id=8TjyUm_XarL}\n}", "github": "", "project": "", "reviewers": "rq95;Wdrk;5Saq;MRvD", "site": "https://openreview.net/forum?id=8TjyUm_XarL", "pdf_size": 1540780, "recommendation": "3;3;3;6", "confidence": "5;5;4;2", "correctness": "3;4;4;3", "technical_novelty": "3;1;2;3", "empirical_novelty": "2;1;3;3", "wc_summary_paper": "74;39;54;65", "wc_strength_and_weaknesses": "815;593;21;102", "wc_clarity_quality_novelty_and_reproducibility": "118;11;113;10", "wc_summary_review": "121;2;27;46", "wc_review": "1128;645;215;223", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 58.0, 13.057564857200596 ], "wc_strength_and_weaknesses_avg": [ 382.75, 331.93702339449874 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.0, 52.53094326204318 ], "wc_summary_review_avg": [ 49.0, 44.401576548586654 ], "wc_review_avg": [ 552.75, 374.91090608303193 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.9428090415820632, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:sgtXHujP_x8J:scholar.google.com/&scioq=Domain+Generalization+in+Regression&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;0;2", "aff_unique_norm": "Zhejiang University;University of Melbourne;RIKEN", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.unimelb.edu.au;https://www.riken.jp", "aff_unique_abbr": "ZJU;UniMelb;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;2", "aff_country_unique": "China;Australia;Japan" }, { "id": "8Tr3v4ueNd7", "title": "Exphormer: Scaling Graph Transformers with Expander Graphs", "track": "main", "status": "Reject", "tldr": "We show how to use expander graphs to devise sparse graph transformers that are powerful and scalable.", "abstract": "Graph transformers have emerged as a promising architecture for a variety of graph learning and representation tasks. Despite their successes, it remains challenging to scale graph transformers to large graphs while maintaining accuracy competitive with message-passing networks. In this paper, we introduce Exphormer, a framework for building powerful and scalable graph transformers. Exphormer consists of a sparse attention mechanism based on expander graphs, whose mathematical characteristics, such as spectral expansion, and sparsity, yield graph transformers with complexity only linear in the size of the graph, while allowing us to prove desirable theoretical properties of the resulting transformer models. We show that incorporating Exphormer into the recently-proposed GraphGPS framework produces models with competitive empirical results on a wide variety of graph datasets, including state-of-the-art results on three datasets. We also show that Exphormer can scale to datasets on larger graphs than shown in previous graph transformer architectures.", "keywords": "Graph neural networks;Transformers", "primary_area": "", "supplementary_material": "/attachment/263eae3d14b4c60f8a2534aa717f7c1f04f961fe.zip", "author": "Hamed Shirzad;Ameya Velingker;Balaji Venkatachalam;Danica J. Sutherland;Ali Kemal Sinop", "authorids": "~Hamed_Shirzad1;~Ameya_Velingker1;~Balaji_Venkatachalam1;~Danica_J._Sutherland1;~Ali_Kemal_Sinop1", "gender": "M;M;;M;F", "homepage": "https://sites.google.com/view/hamedshirzad/home;http://www.ameyavelingker.com;https://ijalabv.wordpress.com/;;http://www.djsutherland.ml", "dblp": "295/9054;117/3666.html;39/1070.html;29/2539;92/10966", "google_scholar": "https://scholar.google.ca/citations?user=A2CbSLIAAAAJ;6dFFudUAAAAJ;e_YXLdYAAAAJ;;https://scholar.google.co.uk/citations?user=uO_NqicAAAAJ", "orcid": ";;;;0000-0002-1525-3532", "linkedin": "hamed-shirzad-84181473/?originalSubdomain=ir;ameya-velingker-5811b711;;;", "or_profile": "~Hamed_Shirzad1;~Ameya_Velingker1;~Balaji_Venkatachalam1;~Ali_Kemal_Sinop1;~Danica_J._Sutherland2", "aff": "University of British Columbia;Google;Google;Google;University of British Columbia", "aff_domain": "cs.ubc.ca;google.com;google.com;google.com;cs.ubc.ca", "position": "PhD student;Research Scientist;Researcher;Researcher;Assistant Professor", "bibtex": "@misc{\nshirzad2023exphormer,\ntitle={Exphormer: Scaling Graph Transformers with Expander Graphs},\nauthor={Hamed Shirzad and Ameya Velingker and Balaji Venkatachalam and Danica J. Sutherland and Ali Kemal Sinop},\nyear={2023},\nurl={https://openreview.net/forum?id=8Tr3v4ueNd7}\n}", "github": "", "project": "", "reviewers": "X95y;U7gQ;4gAE;eDs8", "site": "https://openreview.net/forum?id=8Tr3v4ueNd7", "pdf_size": 325610, "recommendation": "3;3;5;6", "confidence": "4;4;3;4", "correctness": "2;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "107;105;198;117", "wc_strength_and_weaknesses": "382;310;172;270", "wc_clarity_quality_novelty_and_reproducibility": "76;45;190;192", "wc_summary_review": "41;18;130;81", "wc_review": "606;478;690;660", "wc_reply_reviewers": "16;16;55;53", "wc_reply_authors": "406;487;118;419", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 131.75, 38.518664307060284 ], "wc_strength_and_weaknesses_avg": [ 283.5, 75.86006854729305 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 125.75, 66.16787362459218 ], "wc_summary_review_avg": [ 67.5, 42.547032799009614 ], "wc_review_avg": [ 608.5, 81.1341481744894 ], "wc_reply_reviewers_avg": [ 35.0, 19.013153341831543 ], "wc_reply_authors_avg": [ 357.5, 141.65539170818738 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8039511906627030151&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "University of British Columbia;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.ubc.ca;https://www.google.com", "aff_unique_abbr": "UBC;Google", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "Canada;United States" }, { "title": "Rethinking Self-Supervised Visual Representation Learning in Pre-training for 3D Human Pose and Shape Estimation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11695", "id": "8U4joMeLRF", "poster": "/media/PosterPDFs/ICLR%202023/11695.png?t=1682445064.490041", "openreview": "https://openreview.net/forum?id=8U4joMeLRF", "slides": "https://iclr.cc/virtual/2023/poster/11695", "video": "https://iclr.cc/virtual/2023/poster/11695", "author_site": "Hongsuk Choi, Hyeongjin Nam, Taeryung Lee, Gyeongsik Moon, Kyoung Mu Lee", "tldr": "Empirical Study of Pre-training a Backbone for 3D Human Pose and Shape Estimation", "abstract": "Recently, a few self-supervised representation learning (SSL) methods have outperformed the ImageNet classification pre-training for vision tasks such as object detection. However, its effects on 3D human body pose and shape estimation (3DHPSE) are open to question, whose target is fixed to a unique class, the human, and has an inherent task gap with SSL. We empirically study and analyze the effects of SSL and further compare it with other pre-training alternatives for 3DHPSE. The alternatives are 2D annotation-based pre-training and synthetic data pre-training, which share the motivation of SSL that aims to reduce the labeling cost. They have been widely utilized as a source of weak-supervision or fine-tuning, but have not been remarked as a pre-training source. SSL methods underperform the conventional ImageNet classification pre-training on multiple 3DHPSE benchmarks by 7.7% on average. In contrast, despite a much less amount of pre-training data, the 2D annotation-based pre-training improves accuracy on all benchmarks and shows faster convergence during fine-tuning. Our observations challenge the naive application of the current SSL pre-training to 3DHPSE and relight the value of other data types in the pre-training aspect.", "keywords": "pre-training;3D human pose and shape estimation;self-supervised representation learning", "primary_area": "", "supplementary_material": "", "author": "Hongsuk Choi;Hyeongjin Nam;Taeryung Lee;Gyeongsik Moon;Kyoung Mu Lee", "authorids": "~Hongsuk_Choi1;namhj28@gmail.com;trlee94@snu.ac.kr;~Gyeongsik_Moon1;~Kyoung_Mu_Lee2", "gender": "M;;;M;", "homepage": "https://hongsukchoi.github.io/;;;https://mks0601.github.io/;", "dblp": "229/3689;;;185/6852;", "google_scholar": "CZbowncAAAAJ;;;2f2D258AAAAJ;", "orcid": ";;;;", "linkedin": ";;;gyeongsik-moon-bb9a73152/;", "or_profile": "~Hongsuk_Choi1;namhj28@gmail.com;trlee94@snu.ac.kr;~Gyeongsik_Moon1;~Kyoung_Mu_Lee2", "aff": "Samsung Research America;;;Meta;", "aff_domain": "samsung.com;;;meta.com;", "position": "Researcher;;;Postdoc;", "bibtex": "@inproceedings{\nchoi2023rethinking,\ntitle={Rethinking Self-Supervised Visual Representation Learning in Pre-training for 3D Human Pose and Shape Estimation},\nauthor={Hongsuk Choi and Hyeongjin Nam and Taeryung Lee and Gyeongsik Moon and Kyoung Mu Lee},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8U4joMeLRF}\n}", "github": "", "project": "", "reviewers": "C9uS;jtxR;EdAX;s7cS", "pdf_size": 2503383, "recommendation": "6;6;8;8", "confidence": "5;5;4;4", "correctness": "3;4;4;3", "technical_novelty": "3;2;3;2", "empirical_novelty": "2;3;3;4", "wc_summary_paper": "20;46;71;173", "wc_strength_and_weaknesses": "366;159;196;136", "wc_clarity_quality_novelty_and_reproducibility": "18;9;36;71", "wc_summary_review": "73;159;76;78", "wc_review": "477;373;379;458", "wc_reply_reviewers": "199;0;114;0", "wc_reply_authors": "1876;1407;793;1049", "reply_reviewers": "2;0;2;0", "reply_authors": "5;3;4;3", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 77.5, 58.01077486122729 ], "wc_strength_and_weaknesses_avg": [ 214.25, 90.18973056839675 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.5, 23.732888572611635 ], "wc_summary_review_avg": [ 96.5, 36.12824379900025 ], "wc_review_avg": [ 421.75, 46.28917260008003 ], "wc_reply_reviewers_avg": [ 78.25, 83.82235680294369 ], "wc_reply_authors_avg": [ 1281.25, 406.77658179890346 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 3.75, 0.82915619758885 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7026637370081343085&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=8U4joMeLRF", "email": "samsung.com;;;meta.com;", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Samsung;Meta", "aff_unique_dep": "Samsung Research America;Meta Platforms, Inc.", "aff_unique_url": "https://www.samsung.com/us/careers/research/;https://meta.com", "aff_unique_abbr": "SRA;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "8VCiVV97Pji", "title": "Outlier Robust Adversarial Training", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Supervised learning models are challenged by the intrinsic complexities of training data such as outliers and minority subpopulations and intentional attacks at inference time with adversarial samples. While traditional robust learning methods and the recent adversarial training approaches are designed to handle each of the two challenges, to date, no work has been done to develop models that are robust with regard to the low-quality training data and the potential adversarial attack at inference time simultaneously. It is for this reason that we introduce Outlier Robust Adversarial Training (ORAT) in this work. ORAT is based on a bi-level optimization formulation of adversarial training with a robust rank-based loss function. Theoretically, we show that the learning objective of ORAT satisfies the H-consistency in binary classification, which establishes it as a proper surrogate to adversarial 0/1 loss. Furthermore, we analyze its generalization ability and provide uniform convergence rates in high probability. ORAT can be optimized with a simple algorithm. Experimental evaluations on three benchmark datasets demonstrate the effectiveness and robustness of ORAT in handling outliers and adversarial attacks. ", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/413d19c46e3978008aef8a04173bba02f93acec8.zip", "author": "Shu Hu;Zhenhuan Yang;Xin Wang;Yiming Ying;Siwei Lyu", "authorids": "~Shu_Hu1;~Zhenhuan_Yang1;~Xin_Wang35;~Yiming_Ying1;~Siwei_Lyu1", "gender": "M;M;M;M;M", "homepage": "https://web.ics.purdue.edu/~hu968/;https://zhenhuan-yang.github.io/;;https://www.sydney.edu.au/science/about/our-people/academic-staff/yiming-ying.html;https://www.cse.buffalo.edu/~siweilyu", "dblp": ";119/6965;;41/2012;51/4482", "google_scholar": "q4qu28QAAAAJ;pIkw46EAAAAJ;jUWx8fcAAAAJ;xnA_lMMAAAAJ;wefAEM4AAAAJ", "orcid": ";;;;0000-0002-0992-685X", "linkedin": ";zhenhuan-yang-phd-86722b115/;;;siwei-lyu-0806022/", "or_profile": "~Shu_Hu1;~Zhenhuan_Yang1;~Xin_Wang35;~Yiming_Ying1;~Siwei_Lyu1", "aff": "Carnegie Mellon University;;Keya Medical;State University of New York at Albany;State University of New York, Buffalo", "aff_domain": "cmu.edu;;keyamedna.com;albany.edu;buffalo.edu", "position": "Postdoc;;Researcher;Full Professor;Full Professor", "bibtex": "@misc{\nhu2023outlier,\ntitle={Outlier Robust Adversarial Training},\nauthor={Shu Hu and Zhenhuan Yang and Xin Wang and Yiming Ying and Siwei Lyu},\nyear={2023},\nurl={https://openreview.net/forum?id=8VCiVV97Pji}\n}", "github": "", "project": "", "reviewers": "A9jx;AkMH;K56T;TxSx", "site": "https://openreview.net/forum?id=8VCiVV97Pji", "pdf_size": 2571055, "recommendation": "3;5;5;8", "confidence": "4;4;3;3", "correctness": "2;3;3;4", "technical_novelty": "3;3;2;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "107;87;296;25", "wc_strength_and_weaknesses": "521;377;175;327", "wc_clarity_quality_novelty_and_reproducibility": "29;34;308;49", "wc_summary_review": "57;72;72;39", "wc_review": "714;570;851;440", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 128.75, 101.18392905990555 ], "wc_strength_and_weaknesses_avg": [ 350.0, 123.61634196173254 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 105.0, 117.43295959823205 ], "wc_summary_review_avg": [ 60.0, 13.583077707206124 ], "wc_review_avg": [ 643.75, 153.98112709030286 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.7001400420140049, "corr_recommendation_correctness": 0.9901475429766743, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=866946738225240122&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Carnegie Mellon University;Keya Medical;State University of New York;State University of New York at Buffalo", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cmu.edu;;https://www.albany.edu;https://www.buffalo.edu", "aff_unique_abbr": "CMU;;SUNY Albany;SUNY Buffalo", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Albany;Buffalo", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States;" }, { "id": "8VvQ4SpvZVi", "title": "Dual personalization for federated recommendation on devices", "track": "main", "status": "Reject", "tldr": "", "abstract": "Federated recommendation is a new Internet service architecture that aims to provide privacy-preserving recommendation services in federated settings. Existing solutions are used to combine distributed recommendation algorithms and privacy-preserving mechanisms. Thus it inherently takes the form of heavyweight models at the server and hinders the deployment of on-device intelligent models to end-users. This paper proposes a novel Personalized Federated Recommendation (PFedRec) framework to learn many user-specific lightweight models to be deployed on smart devices rather than a heavyweight model on a server. Moreover, we propose a new dual personalization mechanism to effectively learn fine-grained personalization on both users and items. The overall learning process is formulated into a unified federated optimization framework. Specifically, unlike previous methods that share exactly the same item embeddings across users in a federated system, dual personalization allows mild finetuning of item embeddings for each user to generate user-specific views for item representations which can be integrated into existing federated recommendation methods to gain improvements immediately. Experiments on multiple benchmark datasets have demonstrated the effectiveness of PFedRec and the dual personalization mechanism. Moreover, we provide visualizations and in-depth analysis of the personalization techniques in item embedding, which shed novel insights on the design of RecSys in federated settings.", "keywords": "federated learning;personalization;recommmendation system", "primary_area": "", "supplementary_material": "/attachment/af8da7cad069ea7e460ad0b2e9131a98349b68d0.zip", "author": "Chunxu Zhang;Guodong Long;Tianyi Zhou;Zijian Zhang;Bo Yang", "authorids": "~Chunxu_Zhang1;~Guodong_Long2;~Tianyi_Zhou1;zhangzj2114@mails.jlu.edu.cn;ybo@jlu.edu.cn", "gender": "F;M;M;;", "homepage": "https://zhangcx19.github.io/;https://www.uts.edu.au/staff/guodong.long;https://tianyizhou.github.io/;;", "dblp": "192/1046.html;34/10089;88/8205-1;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.au/citations?user=Pl8m7hMAAAAJ;OKvgizMAAAAJ;;", "orcid": "0000-0003-0825-872X;0000-0003-3740-9515;0000-0001-5348-0632;;", "linkedin": ";;tianyizhou;;", "or_profile": "~Chunxu_Zhang1;~Guodong_Long2;~Tianyi_Zhou1;zhangzj2114@mails.jlu.edu.cn;ybo@jlu.edu.cn", "aff": "Jilin University;University of Technology Sydney;University of Maryland, College Park;;", "aff_domain": "jlu.edu.cn;uts.edu.au;umd.edu;;", "position": "PhD student;Associate Professor;Assistant Professor;;", "bibtex": "@misc{\nzhang2023dual,\ntitle={Dual personalization for federated recommendation on devices},\nauthor={Chunxu Zhang and Guodong Long and Tianyi Zhou and Zijian Zhang and Bo Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=8VvQ4SpvZVi}\n}", "github": "", "project": "", "reviewers": "kuz5;2m4p;rUEH;EAiQ", "site": "https://openreview.net/forum?id=8VvQ4SpvZVi", "pdf_size": 805659, "recommendation": "3;5;6;6", "confidence": "5;4;3;3", "correctness": "2;4;3;3", "technical_novelty": "1;3;3;2", "empirical_novelty": "2;3;3;1", "wc_summary_paper": "59;33;68;82", "wc_strength_and_weaknesses": "259;85;152;34", "wc_clarity_quality_novelty_and_reproducibility": "49;40;17;71", "wc_summary_review": "13;54;33;172", "wc_review": "380;212;270;359", "wc_reply_reviewers": "62;0;0;0", "wc_reply_authors": "1519;1268;1098;1407", "reply_reviewers": "1;0;0;0", "reply_authors": "4;4;4;4", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 60.5, 17.867568385205637 ], "wc_strength_and_weaknesses_avg": [ 132.5, 84.1739270795892 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.25, 19.356846334049358 ], "wc_summary_review_avg": [ 68.0, 61.7697336889192 ], "wc_review_avg": [ 305.25, 67.85047899609847 ], "wc_reply_reviewers_avg": [ 15.5, 26.846787517317598 ], "wc_reply_authors_avg": [ 1323.0, 157.4182327432245 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.0, 0.0 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.9847319278346618, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0D57teJG_m0J:scholar.google.com/&scioq=Dual+personalization+for+federated+recommendation+on+devices&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Jilin University;University of Technology Sydney;University of Maryland", "aff_unique_dep": ";;", "aff_unique_url": "http://www.jlu.edu.cn;https://www.uts.edu.au;https://www/umd.edu", "aff_unique_abbr": "JLU;UTS;UMD", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;1;2", "aff_country_unique": "China;Australia;United States" }, { "id": "8Vxuz_PJNus", "title": "Federated Learning of Large Models at the Edge via Principal Sub-Model Training", "track": "main", "status": "Reject", "tldr": "We provide a sub-model training method that enabled resource-constrained clients to train large model in FL.", "abstract": "Limited compute, memory, and communication capabilities of edge users create a significant bottleneck for federated learning (FL) of large models. Current literature typically tackles the challenge with a heterogeneous client setting or allows training to be offloaded to the server. However, the former requires a fraction of clients to train near-full models, which may not be achievable at the edge; while the latter can compromise privacy with sharing of intermediate representations or labels. In this work, we consider a realistic, but much less explored, cross-device FL setting in which no client has the capacity to train a full large model nor is willing to share any intermediate representations with the server. To this end, we present Principal Sub-Model (PriSM) training methodology, which leverages models\u2019 low-rank structure and kernel orthogonality to train sub-models in the orthogonal kernel space. More specifically, by applying singular value decomposition to original kernels in the server model, PriSM first obtains a set of principal orthogonal kernels with importance weighed by their singular values. Thereafter, PriSM utilizes a novel sampling strategy that selects different subsets of the principal kernels independently to create sub-models for clients with reduced computation and communication requirements. Importantly, a kernel with a large singular value is assigned with a high sampling probability. Thus, each sub-model is a low-rank approximation of the full large model, and all clients together achieve nearly full coverage of the principal kernels. To further improve memory efficiency, PriSM exploits low-rank structure in intermediate representations and allows each sub-model to learn only a subset of them while still preserving training performance. Our extensive evaluations on multiple datasets in various resource-constrained settings demonstrate that PriSM can yield an improved performance of up to $10\\%$ compared to existing alternatives, when training sub-models with only $20\\%$ principal kernels ($\\sim 5\\%$ of the full server model.).", "keywords": "Federated Learning;Resource-Constrained Clients;Sub-Model Training", "primary_area": "", "supplementary_material": "", "author": "Yue Niu;Saurav Prakash;Souvik Kundu;Sunwoo Lee;Salman Avestimehr", "authorids": "~Yue_Niu1;~Saurav_Prakash1;~Souvik_Kundu2;~Sunwoo_Lee1;~Salman_Avestimehr1", "gender": ";M;M;M;", "homepage": ";https://sauravpr.com/;https://ksouvik52.github.io;https://sites.google.com/view/sunwoolee;", "dblp": ";194/2710.html;126/2210;56/7811-1;", "google_scholar": ";VhnTrugAAAAJ;https://scholar.google.com/citations?hl=en;WA9KNNcAAAAJ;", "orcid": ";0000-0002-1911-4062;0000-0002-3533-9405;0000-0001-6334-3068;", "linkedin": ";saurav-prakash-38504264/;souvik-kundu-64922b50/;sunwoo-lee-90a7308a;", "or_profile": "~Yue_Niu1;~Saurav_Prakash1;~Souvik_Kundu2;~Sunwoo_Lee1;~Salman_Avestimehr1", "aff": ";University of Illinois, Urbana Champaign;Intel;Inha University;", "aff_domain": ";uiuc.edu;intel.com;inha.ac.kr;", "position": ";Postdoc;Researcher;Assistant Professor;", "bibtex": "@misc{\nniu2023federated,\ntitle={Federated Learning of Large Models at the Edge via Principal Sub-Model Training},\nauthor={Yue Niu and Saurav Prakash and Souvik Kundu and Sunwoo Lee and Salman Avestimehr},\nyear={2023},\nurl={https://openreview.net/forum?id=8Vxuz_PJNus}\n}", "github": "", "project": "", "reviewers": "59uv;v26F;vmY3", "site": "https://openreview.net/forum?id=8Vxuz_PJNus", "pdf_size": 1267211, "recommendation": "3;6;6", "confidence": "4;3;3", "correctness": "4;4;3", "technical_novelty": "2;3;2", "empirical_novelty": "2;3;3", "wc_summary_paper": "139;78;66", "wc_strength_and_weaknesses": "186;73;113", "wc_clarity_quality_novelty_and_reproducibility": "64;19;95", "wc_summary_review": "222;100;40", "wc_review": "611;270;314", "wc_reply_reviewers": "0;0;46", "wc_reply_authors": "0;0;53", "reply_reviewers": "0;0;1", "reply_authors": "0;0;1", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 94.33333333333333, 31.961782734314987 ], "wc_strength_and_weaknesses_avg": [ 124.0, 46.783187863447985 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.333333333333336, 31.201851796897497 ], "wc_summary_review_avg": [ 120.66666666666667, 75.72464738922343 ], "wc_review_avg": [ 398.3333333333333, 151.44709380579812 ], "wc_reply_reviewers_avg": [ 15.333333333333334, 21.684607956387456 ], "wc_reply_authors_avg": [ 17.666666666666668, 24.984439601924677 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 0.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": -0.5, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2078413536393909320&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;Intel;Inha University", "aff_unique_dep": ";Intel Corporation;", "aff_unique_url": "https://illinois.edu;https://www.intel.com;https://www.inha.edu/", "aff_unique_abbr": "UIUC;Intel;Inha", "aff_campus_unique_index": "0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;South Korea" }, { "title": "Agent-based Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11765", "id": "8WTAh0tj2jC", "poster": "/media/PosterPDFs/ICLR%202023/11765.png?t=1682500610.8720565", "openreview": "https://openreview.net/forum?id=8WTAh0tj2jC", "slides": "https://iclr.cc/virtual/2023/poster/11765", "video": "https://iclr.cc/virtual/2023/poster/11765", "author_site": "Karolis Martinkus, P\u00e1l Andr\u00e1s Papp, Benedikt Schesch, Roger Wattenhofer", "tldr": "We present a new agent-based sublinear and expressive GNN architecture for graph-level tasks.", "abstract": "We present a novel graph neural network we call AgentNet, which is designed specifically for graph-level tasks. AgentNet is inspired by sublinear algorithms, featuring a computational complexity that is independent of the graph size. The architecture of AgentNet differs fundamentally from the architectures of traditional graph neural networks. In AgentNet, some trained \\textit{neural agents} intelligently walk the graph, and then collectively decide on the output. We provide an extensive theoretical analysis of AgentNet: We show that the agents can learn to systematically explore their neighborhood and that AgentNet can distinguish some structures that are even indistinguishable by 2-WL. Moreover, AgentNet is able to separate any two graphs which are sufficiently different in terms of subgraphs. We confirm these theoretical results with synthetic experiments on hard-to-distinguish graphs and real-world graph classification tasks. In both cases, we compare favorably not only to standard GNNs but also to computationally more expensive GNN extensions.", "keywords": "Graph Neural Networks;GNN;Graph Classification;Expressive Graph Neural Networks;Sublinear algorithms", "primary_area": "", "supplementary_material": "", "author": "Karolis Martinkus;P\u00e1l Andr\u00e1s Papp;Benedikt Schesch;Roger Wattenhofer", "authorids": "~Karolis_Martinkus1;~P\u00e1l_Andr\u00e1s_Papp1;~Benedikt_Schesch1;~Roger_Wattenhofer1", "gender": "M;M;M;Not Specified", "homepage": "https://disco.ethz.ch/members/mkarolis;https://disco.ethz.ch/members/apapp;;https://disco.ethz.ch/members/wroger", "dblp": "276/5531;132/0356.html;;w/RogerWattenhofer", "google_scholar": "https://scholar.google.ch/citations?user=Sr6ho54AAAAJ;1GV5aZIAAAAJ;;https://scholar.google.ch/citations?user=EG3VPm4AAAAJ", "orcid": "0000-0002-5344-4321;;;", "linkedin": ";;benedikt-schesch-563a46143/;roger-wattenhofer-4466731/", "or_profile": "~Karolis_Martinkus1;~P\u00e1l_Andr\u00e1s_Papp1;~Benedikt_Schesch1;~Roger_Wattenhofer1", "aff": "Swiss Federal Institute of Technology;Huawei Technologies Ltd.;Department of Computer Science, ETHZ - ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;huawei.com;inf.ethz.ch;ethz.ch", "position": "PhD student;Postdoc;MS student;Full Professor", "bibtex": "@inproceedings{\nmartinkus2023agentbased,\ntitle={Agent-based Graph Neural Networks},\nauthor={Karolis Martinkus and P{\\'a}l Andr{\\'a}s Papp and Benedikt Schesch and Roger Wattenhofer},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8WTAh0tj2jC}\n}", "github": "", "project": "", "reviewers": "pbyu;wMRf;z2nH;6DSe;PGLs", "pdf_size": 556266, "recommendation": "5;5;6;6;8", "confidence": "5;4;4;3;3", "correctness": "2;3;4;3;4", "technical_novelty": "3;2;4;3;3", "empirical_novelty": "2;2;2;3;3", "wc_summary_paper": "72;215;69;52;178", "wc_strength_and_weaknesses": "424;617;162;174;148", "wc_clarity_quality_novelty_and_reproducibility": "30;326;45;46;105", "wc_summary_review": "93;82;61;30;46", "wc_review": "619;1240;337;302;477", "wc_reply_reviewers": "170;0;31;0;12", "wc_reply_authors": "2450;3463;168;658;162", "reply_reviewers": "1;0;1;0;1", "reply_authors": "6;6;1;1;1", "recommendation_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "correctness_avg": [ 3.2, 0.7483314773547882 ], "technical_novelty_avg": [ 3.0, 0.6324555320336759 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 117.2, 66.14952758712643 ], "wc_strength_and_weaknesses_avg": [ 305.0, 186.4210288567253 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 110.4, 110.81624429658316 ], "wc_summary_review_avg": [ 62.4, 22.983472322519066 ], "wc_review_avg": [ 595.0, 341.449264166728 ], "wc_reply_reviewers_avg": [ 42.6, 64.7011591859064 ], "wc_reply_authors_avg": [ 1380.2, 1338.4723232103083 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 3.0, 2.449489742783178 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.7319250547113999, "corr_recommendation_correctness": 0.7319250547113999, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=590003782233259466&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=8WTAh0tj2jC", "email": "ethz.ch;huawei.com;inf.ethz.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Swiss Federal Institute of Technology;Huawei;ETH Zurich", "aff_unique_dep": ";Huawei Technologies;Department of Computer Science", "aff_unique_url": "https://www.ethz.ch;https://www.huawei.com;https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich;Huawei;ETHZ", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Switzerland;China" }, { "id": "8XQd91fDSf9", "title": "Two-Dimensional Weisfeiler-Lehman Graph Neural Networks for Link Prediction", "track": "main", "status": "Reject", "tldr": "We propose provably powerful 2-WL variants for link prediction and successfully implement them to get competitive results and speed advantage.", "abstract": "Link prediction is one important application of graph neural networks (GNNs). Most existing GNNs for link prediction are based on one-dimensional Weisfeiler-Lehman ($1$-WL) test. $1$-WL-GNNs first compute node representations by iteratively passing neighboring node features to the center, and then obtain link representations by aggregating the pairwise node representations. As pointed out by previous works, this two-step procedure results in low discriminating power, as $1$-WL-GNNs by nature learn node-level representations instead of link-level. In this paper, we study a completely different approach which can directly obtain node pair (link) representations based on \\textit{two-dimensional Weisfeiler-Lehman ($2$-WL) tests}. $2$-WL tests directly use links (2-tuples) as message passing units instead of nodes, and thus can directly obtain link representations. We theoretically analyze the expressive power of $2$-WL tests to discriminate non-isomorphic links, and prove their superior link discriminating power than $1$-WL. Based on different $2$-WL variants, we propose a series of novel $2$-WL-GNN models for link prediction. Experiments on a wide range of real-world datasets demonstrate their competitive performance to state-of-the-art baselines and superiority over plain $1$-WL-GNNs.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/550e60baca14171d2aa080a36363f5694439f4f8.zip", "author": "Yang Hu;Xiyuan Wang;Zhouchen Lin;Pan Li;Muhan Zhang", "authorids": "~Yang_Hu8;~Xiyuan_Wang1;~Zhouchen_Lin1;~Pan_Li2;~Muhan_Zhang1", "gender": "M;;M;;M", "homepage": "https://www.researchgate.net/profile/Yang-Hu-6;;https://zhouchenlin.github.io;;https://muhanzhang.github.io/", "dblp": ";95/8542;l/ZhouchenLin;https://dblp.org/pers/hd/l/Li_0005:Pan;157/5518", "google_scholar": ";;https://scholar.google.com.tw/citations?user=TanjFwoAAAAJ;IroP0EwAAAAJ;https://scholar.google.com.hk/citations?user=OBBqkosAAAAJ", "orcid": ";;0000-0003-1493-7569;;0000-0002-7680-6401", "linkedin": ";%E5%B8%8C%E5%85%83-%E7%8E%8B-969660221/;;pan-li-b951105a/;jerry-muhan-zhang-a33a1777/", "or_profile": "~Yang_Hu8;~Xiyuan_Wang1;~Zhouchen_Lin1;~Pan_Li2;~Muhan_Zhang1", "aff": ";Peking University;Peking University;Purdue University;Peking University", "aff_domain": ";pku.edu.cn;pku.edu.cn;purdue.edu;pku.edu.cn", "position": ";PhD student;Professor;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nhu2023twodimensional,\ntitle={Two-Dimensional Weisfeiler-Lehman Graph Neural Networks for Link Prediction},\nauthor={Yang Hu and Xiyuan Wang and Zhouchen Lin and Pan Li and Muhan Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=8XQd91fDSf9}\n}", "github": "", "project": "", "reviewers": "5pff;aR3Z;kWUU", "site": "https://openreview.net/forum?id=8XQd91fDSf9", "pdf_size": 587470, "recommendation": "3;5;5", "confidence": "4;4;3", "correctness": "3;4;4", "technical_novelty": "1;1;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "89;243;131", "wc_strength_and_weaknesses": "431;438;420", "wc_clarity_quality_novelty_and_reproducibility": "81;89;65", "wc_summary_review": "40;116;31", "wc_review": "641;886;647", "wc_reply_reviewers": "0;167;0", "wc_reply_authors": "835;542;420", "reply_reviewers": "0;1;0", "reply_authors": "2;1;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 1.3333333333333333, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 154.33333333333334, 64.9991452935259 ], "wc_strength_and_weaknesses_avg": [ 429.6666666666667, 7.408703590297623 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 78.33333333333333, 9.977753031397176 ], "wc_summary_review_avg": [ 62.333333333333336, 38.12552367582058 ], "wc_review_avg": [ 724.6666666666666, 114.10618836076429 ], "wc_reply_reviewers_avg": [ 55.666666666666664, 78.7245549721023 ], "wc_reply_authors_avg": [ 599.0, 174.15127523698087 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10876962340918458825&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Peking University;Purdue University", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.purdue.edu", "aff_unique_abbr": "Peking U;Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "8XfHh4XSQ0Q", "title": "Adaptive Block-wise Learning for Knowledge Distillation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Knowledge distillation allows the student network to improve its performance under the supervision of transferred knowledge. Existing knowledge distillation methods are implemented under the implicit hypothesis that knowledge from teacher and student contributes to each layer of the student network to the same extent. In this work, we argue that there should be different contributions of knowledge from the teacher and the student during training for each layer. Experimental results evidence this argument. To the end, we propose a novel Adaptive Block-wise Learning~(ABL) for Knowledge Distillation to automatically balance teacher-guided knowledge between self-knowledge in each block. Specifically, to solve the problem that the error backpropagation algorithm cannot assign weights to each block of the student network independently, we leverage the local error signals to approximate the global error signals on student objectives. Moreover, we utilize a set of meta variables to control the contribution of the student knowledge and teacher knowledge to each block during the training process. Finally, the extensive experiments prove the effectiveness of our method. Meanwhile, ABL provides an insightful view that in the shallow blocks, the weight of teacher guidance is greater, while in the deep blocks, student knowledge has more influence.", "keywords": "Knowledge distillation;Local error signals;Bilevel optimization", "primary_area": "", "supplementary_material": "", "author": "Tianyi Lei;Junyu Xie;Wang qian;Dezhong Peng;Xu Wang", "authorids": "~Tianyi_Lei1;~Junyu_Xie2;~Wang_qian2;~Dezhong_Peng1;~Xu_Wang12", "gender": ";M;M;M;", "homepage": "https://github.com/Tianyi-Lei;https://github.com/XieJunYu1106?tab=repositories;;https://cs.scu.edu.cn/info/1249/10284.htm;https://github.com/wangxu-scu", "dblp": ";;;;181/2815-28", "google_scholar": ";;https://scholar.google.com/citations?hl=en;0gupif8AAAAJ;https://scholar.google.com.hk/citations?user=XTOXhy4AAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Tianyi_Lei1;~Junyu_Xie2;~Wang_qian2;~Dezhong_Peng1;~Xu_Wang12", "aff": "Sichuan University;Sichuan University;Sichuan University;Sichuan University;Sichuan University", "aff_domain": "scu.edu.cn;scu.edu.cn;scu.edu.cn;scu.edu.cn;scu.edu.cn", "position": "MS student;MS student;PhD student;Full Professor;Associate Professor", "bibtex": "@misc{\nlei2023adaptive,\ntitle={Adaptive Block-wise Learning for Knowledge Distillation},\nauthor={Tianyi Lei and Junyu Xie and Wang qian and Dezhong Peng and Xu Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=8XfHh4XSQ0Q}\n}", "github": "", "project": "", "reviewers": "Tp5S;Jk2C;BeZK;XFGM", "site": "https://openreview.net/forum?id=8XfHh4XSQ0Q", "pdf_size": 1585586, "recommendation": "3;5;6;8", "confidence": "4;3;5;2", "correctness": "3;3;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "0;3;2;3", "wc_summary_paper": "132;35;53;101", "wc_strength_and_weaknesses": "583;16;381;207", "wc_clarity_quality_novelty_and_reproducibility": "42;84;6;56", "wc_summary_review": "20;26;46;91", "wc_review": "777;161;486;455", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 80.25, 38.40166012036459 ], "wc_strength_and_weaknesses_avg": [ 296.75, 209.70976968181526 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.0, 28.089143810376278 ], "wc_summary_review_avg": [ 45.75, 27.842189209902298 ], "wc_review_avg": [ 469.75, 218.06578709187738 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.4961389383568338, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XLPMHTchnfUJ:scholar.google.com/&scioq=Adaptive+Block-wise+Learning+for+Knowledge+Distillation&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Sichuan University", "aff_unique_dep": "", "aff_unique_url": "https://www.scu.edu.cn", "aff_unique_abbr": "SCU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Causality Compensated Attention for Contextual Biased Visual Recognition", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11804", "id": "8XqDnrmZQNF", "poster": "/media/PosterPDFs/ICLR%202023/11804.png?t=1681392343.0343797", "openreview": "https://openreview.net/forum?id=8XqDnrmZQNF", "slides": "https://iclr.cc/virtual/2023/poster/11804", "video": "https://iclr.cc/virtual/2023/poster/11804", "author_site": "Ruyang Liu, Jingjia Huang, Thomas Li, Ge Li", "tldr": "", "abstract": "Visual attention does not always capture the essential object representation desired for robust predictions. Attention modules tend to underline not only the target object but also the common co-occurring context that the module thinks helpful in the training. The problem is rooted in the confounding effect of the context leading to incorrect causalities between objects and predictions, which is further exacerbated by visual attention. In this paper, to learn causal object features robust for contextual bias, we propose a novel attention module named Interventional Dual Attention (IDA) for visual recognition. Specifically, IDA adopts two attention layers with multiple sampling intervention, which compensates the attention against the confounder context. Note that our method is model-agnostic and thus can be implemented on various backbones. Extensive experiments show our model obtains significant improvements in classification and detection with lower computation. In particular, we achieve the state-of-the-art results in multi-label classification on MS-COCO and PASCAL-VOC. ", "keywords": "Causal Inference;Object Recognition;Attention Mechanism;Confounding Context;Interventional Dual Attention", "primary_area": "", "supplementary_material": "/attachment/f111321e2394d706d955a9232e7a02d20f3f4a00.zip", "author": "Ruyang Liu;Jingjia Huang;Thomas H. Li;Ge Li", "authorids": "~Ruyang_Liu1;~Jingjia_Huang1;~Thomas_H._Li1;~Ge_Li2", "gender": "M;M;M;M", "homepage": "https://www.google.com.hk/search?q=cmt&newwindow=1&biw=1415&bih=615&sxsrf=ALiCzsZOeY-nTjWQ8wJ70XnFOqZ8vr1nXQ%3A1663598510580&ei=rn8oY82JI--UseMPpN-m2A8&ved=0ahUKEwjNmJayi6H6AhVvSmwGHaSvCfsQ4dUDCA4&uact=5&oq=cmt&gs_lcp=Cgdnd3Mtd2l6EANKBAhBGABKBAhGGABQAFgAYABoAHAAeACAAQCIAQCSAQCYAQA&sclient=gws-wiz;;https://dblp.org/pid/24/712-2.html;http://pku.edu.cn", "dblp": "276/3202;202/1720;24/712-2.html;213/4037", "google_scholar": "pZ3sWH0AAAAJ;ZOKmWQ4AAAAJ;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Ruyang_Liu1;~Jingjia_Huang1;~Ge_Li2;~Thomas_H._Li3", "aff": "Peking University;ByteDance Inc.;Peking University Shenzhen Graduate School;AIIT, Peking University", "aff_domain": "pku.edu.cn;bytedance.com;pku.edu.cn;aiit.org.cn", "position": "MS student;Researcher;Full Professor;Researcher", "bibtex": "@inproceedings{\nliu2023causality,\ntitle={Causality Compensated Attention for Contextual Biased Visual Recognition},\nauthor={Ruyang Liu and Jingjia Huang and Thomas H. Li and Ge Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8XqDnrmZQNF}\n}", "github": "", "project": "", "reviewers": "UmuF;JL6E;GZYH;sj7K", "pdf_size": 2930370, "recommendation": "3;6;6;6", "confidence": "2;3;3;3", "correctness": "2;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "134;59;116;55", "wc_strength_and_weaknesses": "293;519;377;307", "wc_clarity_quality_novelty_and_reproducibility": "51;53;30;142", "wc_summary_review": "77;78;57;83", "wc_review": "555;709;580;587", "wc_reply_reviewers": "0;596;402;47", "wc_reply_authors": "1274;2166;748;839", "reply_reviewers": "0;2;2;1", "reply_authors": "4;4;2;2", "recommendation_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 91.0, 34.61935874622752 ], "wc_strength_and_weaknesses_avg": [ 374.0, 89.56003573022959 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 69.0, 43.0987238790199 ], "wc_summary_review_avg": [ 73.75, 9.934158243152764 ], "wc_review_avg": [ 607.75, 59.65473577177256 ], "wc_reply_reviewers_avg": [ 261.25, 248.0034021944054 ], "wc_reply_authors_avg": [ 1256.75, 561.3320652697474 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17220010694440099272&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=8XqDnrmZQNF", "email": "pku.edu.cn;bytedance.com;pku.edu.cn;aiit.org.cn", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Peking University;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.bytedance.com", "aff_unique_abbr": "Peking U;ByteDance", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Shenzhen;Beijing", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "8Ygoj2IeXfW", "title": "Diversity Boosted Learning for Domain Generalization with a Large Number of Domains", "track": "main", "status": "Reject", "tldr": "We propose a novel sampling framework to efficiently sample the most informative domains and data points to help train robust models against two kinds of spurious correlations in Domain Generalization field.", "abstract": "Machine learning algorithms minimizing the average training loss typically suffer from poor generalization performance. It inspires various works for domain generalization (DG), among which a series of methods work by $O(n^2)$ pairwise domain operations with n domains, where each one is often costly. Moreover, while a common objective in the DG literature is to learn invariant representations against spurious correlations induced by domains, we point out the insufficiency of it and highlight the importance of alleviating spurious correlations caused by objects. Based on the observation that diversity helps mitigate spurious correlations, we propose a Diversity boosted twO-level saMplIng framework (DOMI) to efficiently sample the most informative ones among a large number of domains and data points. We show that DOMI helps train robust models against spurious correlations from both domain-side and object-side, substantially enhancing the performance of five backbone DG algorithms on Rotated MNIST and Rotated Fashion MNIST.", "keywords": "Domain Generalization;Spurious Correlation", "primary_area": "", "supplementary_material": "/attachment/2fa3161d3b1929c780a1a6505bc147ae8dd2a0df.zip", "author": "XI LENG;Xiaoying Tang;Yatao Bian", "authorids": "~XI_LENG1;~Xiaoying_Tang2;~Yatao_Bian1", "gender": "M;F;M", "homepage": "https://github.com/fake-fickle;https://sse.cuhk.edu.cn/en/faculty/tangxiaoying;https://yataobian.com", "dblp": ";134/9714-2;222/2694", "google_scholar": ";https://scholar.google.com/citations?hl=zh-TW;oZBTlBkAAAAJ", "orcid": ";0000-0003-3955-1195;0000-0002-2368-4084", "linkedin": ";;", "or_profile": "~XI_LENG1;~Xiaoying_Tang2;~An_Bian1", "aff": "CUHKSZ;The Chinese University of Hong Kong, Shenzhen;Tencent AI Lab", "aff_domain": "cuhk.edu.cn;cuhk.edu.cn;tencent.com", "position": "PhD student;Assistant Professor;Senior researcher ", "bibtex": "@misc{\nleng2023diversity,\ntitle={Diversity Boosted Learning for Domain Generalization with a Large Number of Domains},\nauthor={XI LENG and Xiaoying Tang and Yatao Bian},\nyear={2023},\nurl={https://openreview.net/forum?id=8Ygoj2IeXfW}\n}", "github": "", "project": "", "reviewers": "xNqP;3y4o;kTC5;eKSv", "site": "https://openreview.net/forum?id=8Ygoj2IeXfW", "pdf_size": 1690132, "recommendation": "3;5;5;6", "confidence": "4;3;5;3", "correctness": "3;3;2;2", "technical_novelty": "2;3;1;3", "empirical_novelty": "2;3;0;3", "wc_summary_paper": "46;85;129;195", "wc_strength_and_weaknesses": "121;90;114;296", "wc_clarity_quality_novelty_and_reproducibility": "28;601;38;5", "wc_summary_review": "28;68;185;20", "wc_review": "223;844;466;516", "wc_reply_reviewers": "0;0;199;27", "wc_reply_authors": "796;1410;1819;624", "reply_reviewers": "0;0;3;1", "reply_authors": "4;4;5;3", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 113.75, 55.341553104335624 ], "wc_strength_and_weaknesses_avg": [ 155.25, 82.0712343028908 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 168.0, 250.27884449149911 ], "wc_summary_review_avg": [ 75.25, 65.92182870643077 ], "wc_review_avg": [ 512.25, 221.28982692387828 ], "wc_reply_reviewers_avg": [ 56.5, 83.00752977893029 ], "wc_reply_authors_avg": [ 1162.25, 478.683807434511 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 4.0, 0.7071067811865476 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.3458572319330373, "corr_recommendation_correctness": -0.6882472016116854, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MCAxBdlSzykJ:scholar.google.com/&scioq=Diversity+Boosted+Learning+for+Domain+Generalization+with+a+Large+Number+of+Domains&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Chinese University of Hong Kong, Shenzhen;Chinese University of Hong Kong;Tencent", "aff_unique_dep": ";;Tencent AI Lab", "aff_unique_url": "https://www.cuhk.edu.cn/shenzhen;https://www.cuhk.edu.cn;https://ai.tencent.com", "aff_unique_abbr": "CUHKSZ;CUHK;Tencent AI Lab", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "8YnDrbx8bnh", "title": "Bias Mitigation Framework for Intersectional Subgroups in Neural Networks", "track": "main", "status": "Withdraw", "tldr": "This papers proposes a bias mitigation approach for intersectional subgroups.", "abstract": "We propose a fairness-aware learning framework that mitigates intersectional subgroup bias associated with protected attributes. Prior research has primarily focused on mitigating one kind of bias by incorporating complex fairness-driven constraints into optimization objectives or designing additional layers that focus on specific protected attributes. We introduce a simple and generic bias mitigation framework that prevents models from learning relationships between protected attributes and output variable by reducing mutual information. We demonstrate that our approach is effective in reducing bias with little or no drop in accuracy. We also show that our approach mitigates intersectional bias even when other attributes in the dataset are correlated with protected attributes. Finally, we validate our approach by studying feature interactions between protected and non-protected attributes. We demonstrate that these interactions are significantly reduced when applying our bias mitigation.\n", "keywords": "Fairness;Feature Interactions;Bias Mitigation", "primary_area": "", "supplementary_material": "", "author": "Narine Kokhlikyan;Bilal Alsallakh;Fulton Wang;Vivek Miglani;Aobo Yang;David Adkins", "authorids": "~Narine_Kokhlikyan1;~Bilal_Alsallakh1;~Fulton_Wang1;~Vivek_Miglani1;aoboyang@fb.com;~David_Adkins1", "gender": ";M;;;;M", "homepage": ";https://twitter.com/bilalalsallakh;https://scholar.google.com/citations?user=jKi3eEIAAAAJ&hl=en;;;https://davidadkins.com", "dblp": "136/9284;95/7852;;;;", "google_scholar": "oZjHXwUAAAAJ;0TZaxxwAAAAJ;;;;", "orcid": "0000-0002-5827-5141;;;;;", "linkedin": ";bilalalsallakh;;;;davidjadkins/", "or_profile": "~Narine_Kokhlikyan1;~Bilal_Alsallakh1;~Fulton_Wang1;~Vivek_Miglani1;aoboyang@fb.com;~David_Adkins1", "aff": "Meta;Voxel AI;Meta;;;Meta Facebook", "aff_domain": "meta.com;voxelai.com;meta.com;;;facebook.com", "position": "Research Scientist;Principal Researcher;Researcher;;;Researcher", "bibtex": "@misc{\nkokhlikyan2023bias,\ntitle={Bias Mitigation Framework for Intersectional Subgroups in Neural Networks},\nauthor={Narine Kokhlikyan and Bilal Alsallakh and Fulton Wang and Vivek Miglani and Aobo Yang and David Adkins},\nyear={2023},\nurl={https://openreview.net/forum?id=8YnDrbx8bnh}\n}", "github": "", "project": "", "reviewers": "PkHg;d3Up;gDyz;nwpN", "site": "https://openreview.net/forum?id=8YnDrbx8bnh", "pdf_size": 791239, "recommendation": "3;3;5;8", "confidence": "4;4;4;3", "correctness": "2;2;3;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "206;56;76;74", "wc_strength_and_weaknesses": "308;327;503;107", "wc_clarity_quality_novelty_and_reproducibility": "766;32;30;6", "wc_summary_review": "66;102;32;55", "wc_review": "1346;517;641;242", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 103.0, 59.9749947894954 ], "wc_strength_and_weaknesses_avg": [ 311.25, 140.30747485433554 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 208.5, 322.0353241493858 ], "wc_summary_review_avg": [ 63.75, 25.262373206015305 ], "wc_review_avg": [ 686.5, 407.22260497177706 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.9169493006161777, "corr_recommendation_correctness": 0.8551861104941366, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12957636017561887348&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Meta;Voxel AI", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://voxelai.com", "aff_unique_abbr": "Meta;Voxel AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "8Z6OZ3qKHDD", "title": "Robust Transfer Learning Based on Minimax Principle", "track": "main", "status": "Reject", "tldr": "", "abstract": "The similarity between target and source tasks is a crucial quantity for theoretical analyses and algorithm designs in transfer learning studies. However, this quantity is often difficult to be precisely captured. To address this issue, we make a boundedness assumption on the task similarity and then propose a mathematical framework based on the minimax principle, which minimizes the worst-case expected population risk under this assumption. Furthermore, our proposed minimax problem can be solved analytically, which provides a guideline for designing robust transfer learning models. According to the analytical expression, we interpret the influences of sample sizes, task distances, and the model dimensionality in knowledge transferring. Then, practical algorithms are developed based on the theoretical results. Finally, experiments conducted on image classification tasks show that our approaches can achieve robust and competitive accuracies under random selections of training sets.", "keywords": "Transfer Learning;Minimax Principle;Robustness", "primary_area": "", "supplementary_material": "/attachment/f31f006c353309afcadd54d355cc2c990620dff6.zip", "author": "Xinyi Tong;Xiangxiang Xu;Shao-Lun Huang;Lizhong Zheng", "authorids": "~Xinyi_Tong1;~Xiangxiang_Xu1;~Shao-Lun_Huang3;~Lizhong_Zheng1", "gender": "M;M;M;M", "homepage": "https://github.com/txyyaohui;https://xiangxiangxu.com/;https://sites.google.com/view/slhuang/home;http://lizhongzheng.mit.edu", "dblp": "171/0531;147/5345-1;64/2243;", "google_scholar": ";u-BAw9sAAAAJ;;", "orcid": ";0000-0002-4178-0934;;", "linkedin": ";xiangxiangxu/;;", "or_profile": "~Xinyi_Tong1;~Xiangxiang_Xu1;~Shao-Lun_Huang3;~Lizhong_Zheng1", "aff": "Tsinghua University;Massachusetts Institute of Technology;Tsinghua University;Massachusetts Institute of Technology", "aff_domain": "tsinghua.edu.cn;mit.edu;tsinghua.edu.cn;mit.edu", "position": "PhD student;Postdoc;Associate Professor;Full Professor", "bibtex": "@misc{\ntong2023robust,\ntitle={Robust Transfer Learning Based on Minimax Principle},\nauthor={Xinyi Tong and Xiangxiang Xu and Shao-Lun Huang and Lizhong Zheng},\nyear={2023},\nurl={https://openreview.net/forum?id=8Z6OZ3qKHDD}\n}", "github": "", "project": "", "reviewers": "snNr;cYFo;Axde;Vs9B", "site": "https://openreview.net/forum?id=8Z6OZ3qKHDD", "pdf_size": 441787, "recommendation": "3;3;5;6", "confidence": "4;4;5;2", "correctness": "2;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "56;47;69;89", "wc_strength_and_weaknesses": "682;357;387;151", "wc_clarity_quality_novelty_and_reproducibility": "7;11;69;46", "wc_summary_review": "75;28;74;25", "wc_review": "820;443;599;311", "wc_reply_reviewers": "353;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 65.25, 15.785673884886892 ], "wc_strength_and_weaknesses_avg": [ 394.25, 189.3480591397757 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.25, 25.616157010761782 ], "wc_summary_review_avg": [ 50.5, 24.026027553467927 ], "wc_review_avg": [ 543.25, 189.53149474427727 ], "wc_reply_reviewers_avg": [ 88.25, 152.85348376795343 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.4856618642571827, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17121150822447340899&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Tsinghua University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://web.mit.edu", "aff_unique_abbr": "THU;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "China;United States" }, { "title": "Is Reinforcement Learning (Not) for Natural Language Processing: Benchmarks, Baselines, and Building Blocks for Natural Language Policy Optimization", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10970", "id": "8aHzds2uUyB", "poster": "", "openreview": "https://openreview.net/forum?id=8aHzds2uUyB", "slides": "https://iclr.cc/virtual/2023/poster/10970", "video": "https://iclr.cc/virtual/2023/poster/10970", "author_site": "Rajkumar Ramamurthy, Prithviraj Ammanabrolu, Kiant\u00e9 Brantley, Jack Hessel, Rafet Sifa, Christian Bauckhage, Hannaneh Hajishirzi, Yejin Choi", "tldr": "We provide an open-source framework, benchmark, and novel algorithm to train large language models to better align to automated measures of human preferences.", "abstract": "We tackle the problem of aligning pre-trained large language models (LMs) with human preferences. If we view text generation as a sequential decision-making problem, reinforcement learning (RL) appears to be a natural conceptual framework. However, using RL for LM-based generation faces empirical challenges, including training instability due to the combinatorial action space, as well as a lack of open-source libraries and benchmarks customized for LM alignment. Thus, a question rises in the research community: is RL a practical paradigm for NLP?\n\nTo help answer this, we first introduce an open-source modular library, $RL4LMs$ (Reinforcement Learning for Language Models), for optimizing language generators with RL. The library consists of on-policy RL algorithms that can be used to train any encoder or encoder-decoder LM in the HuggingFace library (Wolf et al. 2020) with an arbitrary reward function. Next, we present the $GRUE$ (General Reinforced-language Understanding Evaluation) benchmark, a set of 6 language generation tasks which are supervised not by target strings, but by reward functions which capture automated measures of human preference.GRUE is the first leaderboard-style evaluation of RL algorithms for NLP tasks. Finally, we introduce an easy-to-use, performant RL algorithm, $NLPO$ (Natural Language Policy Optimization)} that learns to effectively reduce the combinatorial action space in language generation. We show 1) that RL techniques are generally better than supervised methods at aligning LMs to human preferences; and 2) that NLPO exhibits greater stability and performance than previous policy gradient methods (e.g., PPO (Schulman et al. 2017)), based on both automatic and human evaluations.", "keywords": "natural language processing;reinforcement learning;language models;feedback learning", "primary_area": "", "supplementary_material": "", "author": "Rajkumar Ramamurthy;Prithviraj Ammanabrolu;Kiant\u00e9 Brantley;Jack Hessel;Rafet Sifa;Christian Bauckhage;Hannaneh Hajishirzi;Yejin Choi", "authorids": "~Rajkumar_Ramamurthy1;~Prithviraj_Ammanabrolu1;~Kiant\u00e9_Brantley2;~Jack_Hessel1;~Rafet_Sifa1;~Christian_Bauckhage1;~Hannaneh_Hajishirzi1;~Yejin_Choi1", "gender": ";M;;M;;M;F;F", "homepage": ";http://prithvirajva.com;;https://www.jmhessel.com;https://www.b-it-center.de/research-groups/applied-machine-learning-group;;https://homes.cs.washington.edu/~hannaneh/;https://yejinc.github.io/", "dblp": "199/2181;202/2351;;https://dblp.uni-trier.de/pid/132/5250.html;122/7972.html;44/2560;52/1296;89/579-1", "google_scholar": "vVzcztcAAAAJ;2yaiWZ8AAAAJ;;SxQQ1msAAAAJ;https://scholar.google.com/citations?hl=en;f9iP-80AAAAJ;LOV6_WIAAAAJ;vhP-tlcAAAAJ", "orcid": ";;;0000-0002-4012-8979;;0000-0001-6615-2128;;", "linkedin": ";rajammanabrolu/;;;;;;", "or_profile": "~Rajkumar_Ramamurthy1;~Prithviraj_Ammanabrolu1;~Kiant\u00e9_Brantley2;~Jack_Hessel1;~Rafet_Sifa1;~Christian_Bauckhage1;~Hannaneh_Hajishirzi1;~Yejin_Choi1", "aff": "Fraunhofer Institute IAIS, Fraunhofer IAIS;Allen Institute for Artificial Intelligence;;Allen Institute for Artificial Intelligence;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;University of Bonn;University of Washington;Department of Computer Science, University of Washington", "aff_domain": "iais.fraunhofer.de;allenai.org;;allenai.org;uni-bonn.de;uni-bonn.de;uw.edu;cs.washington.edu", "position": "Researcher;Researcher;;Researcher;Full Professor;Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nramamurthy2023is,\ntitle={Is Reinforcement Learning (Not) for Natural Language Processing: Benchmarks, Baselines, and Building Blocks for Natural Language Policy Optimization},\nauthor={Rajkumar Ramamurthy and Prithviraj Ammanabrolu and Kiant{\\'e} Brantley and Jack Hessel and Rafet Sifa and Christian Bauckhage and Hannaneh Hajishirzi and Yejin Choi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8aHzds2uUyB}\n}", "github": "", "project": "", "reviewers": "a1G9;pVj7;H8br;zDTr", "pdf_size": 2251371, "recommendation": "6;6;8;8", "confidence": "4;3;3;4", "correctness": "3;4;4;4", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;1;3;4", "wc_summary_paper": "125;41;110;47", "wc_strength_and_weaknesses": "285;126;95;360", "wc_clarity_quality_novelty_and_reproducibility": "42;23;144;53", "wc_summary_review": "41;42;110;70", "wc_review": "493;232;459;530", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "458;266;148;282", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 80.75, 37.19122880465231 ], "wc_strength_and_weaknesses_avg": [ 216.5, 109.81461651346783 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.5, 46.57520799738848 ], "wc_summary_review_avg": [ 65.75, 28.07467720206236 ], "wc_review_avg": [ 428.5, 116.19487940524746 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 288.5, 110.70117433884792 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 108, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1754135811034002123&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=8aHzds2uUyB", "email": "iais.fraunhofer.de;allenai.org;;allenai.org;uni-bonn.de;uni-bonn.de;uw.edu;cs.washington.edu", "author_num": 8, "aff_unique_index": "0;1;1;2;3;4;4", "aff_unique_norm": "Fraunhofer Institute for Intelligent Analysis and Information Systems;Allen Institute for Artificial Intelligence;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;University of Bonn;University of Washington", "aff_unique_dep": "Intelligent Analysis and Information Systems;;;;", "aff_unique_url": "https://www.iais.fraunhofer.de/;https://allenai.org;https://www.uni-bonn.de/;https://www.uni-bonn.de/;https://www.washington.edu", "aff_unique_abbr": "Fraunhofer IAIS;AI2;Uni Bonn;UBonn;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;1;1;0;0;1;1", "aff_country_unique": "Germany;United States" }, { "id": "8abnSMeFaqA", "title": "Finding and only finding local Nash equilibria by both pretending to be a follower", "track": "main", "status": "Reject", "tldr": "We propose double Follow-the-Ridge (double-FTR), an algorithm with local convergence guarantee to differential Nash equilibria in general-sum two-player differential games.", "abstract": "Finding (local) Nash equilibria in two-player differentiable games is a classical problem in game theory with important relevance in machine learning. We propose double Follow-the-Ridge (double-FTR), an algorithm that locally converges to and only to local Nash equilibria in general-sum two-player differentiable games. To our knowledge, double-FTR is the first algorithm with such guarantees for general-sum games. Furthermore, we show that by varying its preconditioner, double-FTR leads to a broader family of algorithms with the same convergence guarantee. In addition, double-FTR avoids oscillation near equilibria due to the real-eigenvalues of its Jacobian at fixed points. \nEmpirically, we validate the double-FTR algorithm on a range of simple zero-sum and general sum games, as well as simple Generative Adversarial Network (GAN) tasks.", "keywords": "game theory;general-sum games;local Nash equilibrium;optimization", "primary_area": "", "supplementary_material": "/attachment/7562fea23b2bdb9fcf578691a0b554e5e1703821.zip", "author": "Xuchan Bao;Guodong Zhang", "authorids": "~Xuchan_Bao1;~Guodong_Zhang1", "gender": "F;M", "homepage": "https://www.cs.toronto.edu/~jennybao/;http://www.cs.toronto.edu/~gdzhang/", "dblp": "188/5742;28/4937", "google_scholar": "gnBwBZ4AAAAJ;B_TZBtwAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Xuchan_Bao1;~Guodong_Zhang1", "aff": "Meta Facebook;Department of Computer Science, University of Toronto", "aff_domain": "meta.com;cs.toronto.edu", "position": "Intern;PhD student", "bibtex": "@misc{\nbao2023finding,\ntitle={Finding and only finding local Nash equilibria by both pretending to be a follower},\nauthor={Xuchan Bao and Guodong Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=8abnSMeFaqA}\n}", "github": "", "project": "", "reviewers": "2AFw;2z6c;PeZQ;tEUJ", "site": "https://openreview.net/forum?id=8abnSMeFaqA", "pdf_size": 5279022, "recommendation": "5;5;5;6", "confidence": "3;5;4;4", "correctness": "4;3;3;4", "technical_novelty": "3;3;4;3", "empirical_novelty": "3;3;2;0", "wc_summary_paper": "29;42;61;98", "wc_strength_and_weaknesses": "267;104;606;106", "wc_clarity_quality_novelty_and_reproducibility": "19;91;2;7", "wc_summary_review": "56;21;51;66", "wc_review": "371;258;720;277", "wc_reply_reviewers": "50;0;83;0", "wc_reply_authors": "532;502;651;229", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 57.5, 26.004807247891687 ], "wc_strength_and_weaknesses_avg": [ 270.75, 204.54507449459643 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.75, 35.89829383132296 ], "wc_summary_review_avg": [ 48.5, 16.77050983124842 ], "wc_review_avg": [ 406.5, 185.9872307444788 ], "wc_reply_reviewers_avg": [ 33.25, 35.23758646672612 ], "wc_reply_authors_avg": [ 478.5, 154.45144868210204 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11369666993675096837&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Meta;University of Toronto", "aff_unique_dep": "Meta Platforms, Inc.;Department of Computer Science", "aff_unique_url": "https://meta.com;https://www.utoronto.ca", "aff_unique_abbr": "Meta;U of T", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Canada" }, { "title": "Deep Variational Implicit Processes", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12053", "id": "8aeSJNbmbQq", "poster": "/media/PosterPDFs/ICLR%202023/12053.png?t=1681121273.9051702", "openreview": "https://openreview.net/forum?id=8aeSJNbmbQq", "slides": "https://iclr.cc/virtual/2023/poster/12053", "video": "https://iclr.cc/virtual/2023/poster/12053", "author_site": "Luis A. Ortega, Simon Rodriguez, Daniel Hern\u00e1ndez-Lobato", "tldr": " We propose here a multi-layer generalization of IPs called the Deep Variational Implicit process, similar to that of deep GPs over GPs.", "abstract": "Implicit processes (IPs) are a generalization of Gaussian processes (GPs). IPs may lack a closed-form expression but are easy to sample from. Examples include, among others, Bayesian neural networks or neural samplers. IPs can be used as priors over functions, resulting in flexible models with well-calibrated prediction uncertainty estimates. Methods based on IPs usually carry out function-space approximate inference, which overcomes some of the difficulties of parameter-space approximate inference. Nevertheless, the approximations employed often limit the expressiveness of the final model, resulting, e.g., in a Gaussian predictive distribution, which can be restrictive. We propose here a multi-layer generalization of IPs called the Deep Variational Implicit process (DVIP). This generalization is similar to that of deep GPs over GPs, but it is more flexible due to the use of IPs as the prior distribution over the latent functions. We describe a scalable variational inference algorithm for training DVIP and show that it outperforms previous IP-based methods and also deep GPs. We support these claims via extensive regression and classification experiments. We also evaluate DVIP on large datasets with up to several million data instances to illustrate its good scalability and performance.", "keywords": "Gaussian process;implicit process;variational implicit process;Bayesian inference;function-space inference;implicit process concatenation", "primary_area": "", "supplementary_material": "/attachment/cc0a44ef9fe4c7b30cbb5f87c8843ef7da7ea246.zip", "author": "Luis A. Ortega;Simon Rodriguez Santana;Daniel Hern\u00e1ndez-Lobato", "authorids": "~Luis_A._Ortega1;~Simon_Rodriguez_Santana1;~Daniel_Hern\u00e1ndez-Lobato1", "gender": "M;M;M", "homepage": ";http://dhnzl.org;", "dblp": "249/2890;95/166;304/8839", "google_scholar": "https://scholar.google.es/citations?user=9x_tXzwAAAAJ;https://scholar.google.es/citations?user=rL6cvTUAAAAJ;1Ly8qeoAAAAJ", "orcid": "0000-0003-3760-0520;;", "linkedin": ";;ludvins", "or_profile": "~Simon_Rodriguez_Santana1;~Daniel_Hern\u00e1ndez-Lobato1;~Luis_Antonio_Ortega_Andr\u00e9s1", "aff": "ICMAT-CSIC;Universidad Aut\u00f3noma de Madrid;Universidad Aut\u00f3noma de Madrid", "aff_domain": "icmat.es;uam.es;uam.es", "position": "Postdoc;Associate Professor;PhD student", "bibtex": "@inproceedings{\nortega2023deep,\ntitle={Deep Variational Implicit Processes},\nauthor={Luis A. Ortega and Simon Rodriguez Santana and Daniel Hern{\\'a}ndez-Lobato},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8aeSJNbmbQq}\n}", "github": "", "project": "", "reviewers": "AATZ;cZop;Lo8f;GNWm", "pdf_size": 1743469, "recommendation": "6;6;6;8", "confidence": "4;3;4;3", "correctness": "3;4;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;2;2", "wc_summary_paper": "96;101;63;112", "wc_strength_and_weaknesses": "629;189;506;439", "wc_clarity_quality_novelty_and_reproducibility": "258;51;58;48", "wc_summary_review": "119;25;85;27", "wc_review": "1102;366;712;626", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 93.0, 18.261982367749674 ], "wc_strength_and_weaknesses_avg": [ 440.75, 160.52784026454725 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 103.75, 89.13017165920864 ], "wc_summary_review_avg": [ 64.0, 39.8622628559895 ], "wc_review_avg": [ 701.5, 263.9938446252109 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14997309462411179718&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=8aeSJNbmbQq", "email": "icmat.es;uam.es;uam.es", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Instituto de Ciencias Matem\u00e1ticas (ICMAT);Universidad Aut\u00f3noma de Madrid", "aff_unique_dep": ";", "aff_unique_url": "https://icmat.csic.es/;https://www.uam.es", "aff_unique_abbr": "ICMAT;UAM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Spain" }, { "id": "8cST_EWo9X", "title": "Understanding ReLU Network Robustness Through Test Set Certification Performance", "track": "main", "status": "Withdraw", "tldr": "Robustness certificates for ReLU networks are strongly correlated with network accuracy for data in-distribution and are highly unreliable for data out-of-distribution.", "abstract": "Neural networks might exhibit weak robustness against input perturbations within the learning distribution and become more severe for distributional shifts or data outside the distribution.\nFor their safer use, robustness certificates provide formal guarantees to the stability of the prediction in the vicinity of the input.\nHowever, the relationship between correctness and robustness remains unclear.\nIn this work, we investigate the unexpected outcomes of verification methods applied to piecewise linear classifiers for clean, perturbed, in- and out-of-distribution samples.\nIn our experiments, we conduct a thorough analysis for image classification tasks and show that robustness certificates are strongly correlated with prediction correctness for in-distribution data.\nIn addition, we provide a theoretical demonstration that formal verification methods robustly certify samples sufficiently far from the training distribution.\nThese results are integrated with an experimental analysis and demonstrate their weakness compared to standard out-of-distribution detection methods.", "keywords": "Robustness Certificates;Robust Machine Learning;Out-Of-Distribution Detection", "primary_area": "", "supplementary_material": "", "author": "Nicola Franco;Jeanette Miriam Lorenz;Karsten Roscher;Stephan G\u00fcnnemann", "authorids": "~Nicola_Franco1;jeanette.miriam.lorenz@iks.fraunhofer.de;~Karsten_Roscher1;~Stephan_G\u00fcnnemann1", "gender": "M;;;M", "homepage": ";;;http://www.daml.in.tum.de", "dblp": ";;143/9305;43/3011", "google_scholar": "cOpIhYQAAAAJ;;InBqYG4AAAAJ;", "orcid": ";;0000-0002-9458-104X;", "linkedin": ";;;", "or_profile": "~Nicola_Franco1;jeanette.miriam.lorenz@iks.fraunhofer.de;~Karsten_Roscher1;~Stephan_G\u00fcnnemann1", "aff": "Fraunhofer IKS;;Fraunhofer IKS;Technical University Munich", "aff_domain": "iks.fraunhofer.de;;iks.fraunhofer.de;tum.de", "position": "Researcher;;Researcher;Professor", "bibtex": "@misc{\nfranco2023understanding,\ntitle={Understanding Re{LU} Network Robustness Through Test Set Certification Performance},\nauthor={Nicola Franco and Jeanette Miriam Lorenz and Karsten Roscher and Stephan G{\\\"u}nnemann},\nyear={2023},\nurl={https://openreview.net/forum?id=8cST_EWo9X}\n}", "github": "", "project": "", "reviewers": "7Uwc;uVrN;WTte;YRTu", "site": "https://openreview.net/forum?id=8cST_EWo9X", "pdf_size": 2039755, "recommendation": "1;3;3;6", "confidence": "4;4;3;5", "correctness": "1;3;3;4", "technical_novelty": "1;2;2;3", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "33;47;64;68", "wc_strength_and_weaknesses": "71;521;190;140", "wc_clarity_quality_novelty_and_reproducibility": "30;60;71;10", "wc_summary_review": "2;79;21;39", "wc_review": "136;707;346;257", "wc_reply_reviewers": "0;56;0;0", "wc_reply_authors": "323;231;428;289", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.25, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 1.0897247358851685 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 53.0, 13.982131454109563 ], "wc_strength_and_weaknesses_avg": [ 230.5, 172.9602555502275 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.75, 24.138920854089562 ], "wc_summary_review_avg": [ 35.25, 28.44622119016865 ], "wc_review_avg": [ 361.5, 212.94424152815216 ], "wc_reply_reviewers_avg": [ 14.0, 24.24871130596428 ], "wc_reply_authors_avg": [ 317.75, 71.64975575673654 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5940885257860046, "corr_recommendation_correctness": 0.9316142209946916, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:t5_D4gyhxXgJ:scholar.google.com/&scioq=Understanding+ReLU+Network+Robustness+Through+Test+Set+Certification+Performance&hl=en&as_sdt=0,33", "gs_version_total": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "Fraunhofer Institute for Integrated Systems and Device Technology;Technical University of Munich", "aff_unique_dep": ";", "aff_unique_url": "https://www.iks.fraunhofer.de/;https://www.tum.de", "aff_unique_abbr": "Fraunhofer IKS;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "GReTo: Remedying dynamic graph topology-task discordance via target homophily", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11963", "id": "8duT3mi_5n", "poster": "/media/PosterPDFs/ICLR%202023/11963.png?t=1680745835.7917893", "openreview": "https://openreview.net/forum?id=8duT3mi_5n", "slides": "https://iclr.cc/virtual/2023/poster/11963", "video": "https://iclr.cc/virtual/2023/poster/11963", "author_site": "Zhengyang Zhou, Qihe Huang, Gengyu Lin, Kuo Yang, LEI BAI, Yang Wang", "tldr": "This paper revisits how node-wise relation modeling to facilitate regressions on dynamic graphs, from a new perspective of target-homophily. ", "abstract": "Dynamic graphs are ubiquitous across disciplines where observations usually change over time. Regressions on dynamic graphs often contribute to diverse critical tasks, such as climate early-warning and traffic controlling. Existing homophily Graph Neural Networks (GNNs) adopt physical connections or feature similarity as adjacent matrix to perform node-level aggregations. However, on dynamic graphs with diverse node-wise relations, exploiting a pre-defined fixed topology for message passing inevitably leads to the aggregations of target-deviated neighbors. We designate such phenomenon as the topology-task discordance, which naturally challenges the homophily assumption. In this work, we revisit node-wise relationships and explore novel homophily measurements on dynamic graphs with both signs and distances, capturing multiple node-level spatial relations and temporal evolutions. We discover that advancing homophily aggregations to signed target-oriented message passing can effectively resolve the discordance and promote aggregation capacity. Therefore, a GReTo is proposed, which performs signed message passing in immediate neighborhood, and exploits both local environments and target awareness to realize high-order message propagation. Empirically, our solution achieves significant improvements against best baselines, notably improving 24.79% on KnowAir and 3.60% on Metr-LA. ", "keywords": "Dynamic graph;graph homophily theory;Graph Neural Network;topology-task discordance", "primary_area": "", "supplementary_material": "/attachment/dc6e95ee1e8bc3d7cc6acbb11c78cc9f616f7bc7.zip", "author": "Zhengyang Zhou;Qihe Huang;Gengyu Lin;Kuo Yang;LEI BAI;Yang Wang", "authorids": "~Zhengyang_Zhou1;~Qihe_Huang2;~Gengyu_Lin1;~Kuo_Yang2;~LEI_BAI1;~Yang_Wang32", "gender": "M;M;;M;M;M", "homepage": "http://home.ustc.edu.cn/~zzy0929/Home/;;https://github.com/lingy-qd;;http://leibai.site/;http://staff.ustc.edu.cn/~angyan/", "dblp": "246/8238;;;;119/1223-1;", "google_scholar": "dPElQLUAAAAJ;;;;https://scholar.google.com.au/citations?user=sakOO04AAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0003-4728-7347;0000-0001-8960-6583;;0000-0003-3346-5130;0000-0003-3378-7201;0000-0002-6079-7053", "linkedin": ";;;https://www.linkedin.cn/incareer/in/kuo-yang-440a241b4;lei-bai-641370153/;", "or_profile": "~Zhengyang_Zhou1;~Qihe_Huang2;~Gengyu_Lin1;~Kuo_Yang2;~LEI_BAI1;~Yang_Wang32", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;Shanghai AI Laboratory;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;pjlab.org.cn;ustc.edu.cn", "position": "PhD student;PhD student;MS student;PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nzhou2023greto,\ntitle={{GR}eTo: Remedying dynamic graph topology-task discordance via target homophily},\nauthor={Zhengyang Zhou and Qihe Huang and Gengyu Lin and Kuo Yang and LEI BAI and Yang Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8duT3mi_5n}\n}", "github": "", "project": "", "reviewers": "1rTB;bmba;dBq6;XKsq;mrA2", "pdf_size": 3064971, "recommendation": "6;6;6;8;8", "confidence": "4;3;3;4;3", "correctness": "3;3;2;3;4", "technical_novelty": "3;2;3;3;3", "empirical_novelty": "3;2;2;2;3", "wc_summary_paper": "61;55;61;43;72", "wc_strength_and_weaknesses": "135;20;178;150;122", "wc_clarity_quality_novelty_and_reproducibility": "43;95;214;68;17", "wc_summary_review": "48;23;20;27;22", "wc_review": "287;193;473;288;233", "wc_reply_reviewers": "0;53;60;102;0", "wc_reply_authors": "1585;929;1455;586;246", "reply_reviewers": "0;1;1;2;0", "reply_authors": "3;3;4;3;1", "recommendation_avg": [ 6.8, 0.9797958971132712 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.8, 0.39999999999999997 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 58.4, 9.457272334029511 ], "wc_strength_and_weaknesses_avg": [ 121.0, 53.82936001848805 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 87.4, 68.39473663959822 ], "wc_summary_review_avg": [ 28.0, 10.256705123966467 ], "wc_review_avg": [ 294.8, 95.96332632834273 ], "wc_reply_reviewers_avg": [ 43.0, 38.90501253052105 ], "wc_reply_authors_avg": [ 960.2, 507.2046529755026 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.8, 0.9797958971132712 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.16666666666666663, "corr_recommendation_correctness": 0.6454972243679028, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6282463193527772086&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=8duT3mi_5n", "email": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;pjlab.org.cn;ustc.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "University of Science and Technology of China;Shanghai AI Laboratory", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "USTC;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "8e5xTOIQpj7", "title": "Confounder Identification-free Causal Visual Feature Learning", "track": "main", "status": "Reject", "tldr": "We propose a casual visual representation learning paradigm (CICF) for generalization without requiring to identify the existing confounders. ", "abstract": "Confounders in deep learning are in general detrimental to model's generalization where they infiltrate feature representations. Therefore, learning causal features that are free of interference from confounders is important. Most previous causal learning based approaches employ back-door criterion to mitigate the adverse effect of certain specific confounder, which require the explicit identification of confounder. However, in real scenarios, confounders are typically diverse and difficult to be identified. In this paper, we propose a novel Confounder Identification-free Causal Visual Feature Learning (CICF) method, which obviates the need for identifying confounders. CICF models the interventions among different samples based on front-door criterion, and then approximates the global-scope intervening effect upon the instance-level interventions from the perspective of optimization. In this way, we aim to find a reliable optimization direction, which avoids the intervening effects of confounders, to learn causal features. Furthermore, we uncover the relation between CICF and the popular meta-learning strategy MAML, and provide an interpretation of why MAML works from the theoretical perspective of causal learning for the first time. Thanks to the effective learning of causal features, our CICF enables models to have superior generalization capability. Extensive experiments on domain generalization benchmark datasets demonstrate the effectiveness of our CICF, which achieves the state-of-the-art performance.", "keywords": "Domain Generalization;Casual Learning;Front-door criterion;Confounder identification-free", "primary_area": "", "supplementary_material": "", "author": "Xin Li;Zhizheng Zhang;Guoqiang Wei;Cuiling Lan;Wenjun Zeng;Xin Jin;Zhibo Chen", "authorids": "~Xin_Li28;~Zhizheng_Zhang1;~Guoqiang_Wei1;~Cuiling_Lan1;~Wenjun_Zeng3;~Xin_Jin8;~Zhibo_Chen1", "gender": "M;M;M;F;M;M;M", "homepage": "https://lixinustc.github.io;;https://guoqiangwei.xyz/;https://www.microsoft.com/en-us/research/people/culan/;https://www.eias.ac.cn/h-col-187.html;http://home.ustc.edu.cn/~jinxustc/;https://faculty.ustc.edu.cn/chenzhibo", "dblp": "09/1365-82;67/4758;234/8900;95/8115;57/145;68/3340-14;54/6561.html", "google_scholar": "sbiY97gAAAAJ;X7M0I8kAAAAJ;https://scholar.google.com/citations?hl=en;XZugqiwAAAAJ;_cUfvYQAAAAJ;byaSC-kAAAAJ;1ayDJfsAAAAJ", "orcid": ";;0000-0003-1846-5693;0000-0001-9145-9957;;0000-0002-1820-8358;", "linkedin": ";;;;;;", "or_profile": "~Xin_Li28;~Zhizheng_Zhang1;~Guoqiang_Wei1;~Cuiling_Lan1;~Wenjun_Zeng3;~Xin_Jin8;~Zhibo_Chen1", "aff": "University of Science and Technology of China;Microsoft Research;University of Science and Technology of China;Microsoft;Eastern Institute for Advanced Study;Eastern Institute of Technology, Ningbo;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;microsoft.com;ustc.edu.cn;microsoft.com;eias.ac.cn;eitech.edu.cn;ustc.edu.cn", "position": "PhD student;Senior Researcher;PhD student;Principal Researcher;Full Professor;Assistant Professor;Full Professor", "bibtex": "@misc{\nli2023confounder,\ntitle={Confounder Identification-free Causal Visual Feature Learning},\nauthor={Xin Li and Zhizheng Zhang and Guoqiang Wei and Cuiling Lan and Wenjun Zeng and Xin Jin and Zhibo Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=8e5xTOIQpj7}\n}", "github": "", "project": "", "reviewers": "K8gB;pVYZ;wmrP;jBAD", "site": "https://openreview.net/forum?id=8e5xTOIQpj7", "pdf_size": 11280132, "recommendation": "1;5;5;8", "confidence": "5;4;3;3", "correctness": "1;3;3;4", "technical_novelty": "1;3;4;3", "empirical_novelty": "0;3;2;3", "wc_summary_paper": "79;124;49;101", "wc_strength_and_weaknesses": "262;190;306;87", "wc_clarity_quality_novelty_and_reproducibility": "42;22;26;33", "wc_summary_review": "13;56;27;56", "wc_review": "396;392;408;277", "wc_reply_reviewers": "244;127;0;0", "wc_reply_authors": "1332;897;1228;172", "reply_reviewers": "1;1;0;0", "reply_authors": "3;3;3;1", "recommendation_avg": [ 4.75, 2.48746859276655 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.75, 1.0897247358851685 ], "technical_novelty_avg": [ 2.75, 1.0897247358851685 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 88.25, 27.6891224129621 ], "wc_strength_and_weaknesses_avg": [ 211.25, 82.82926717048751 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.75, 7.595228765481656 ], "wc_summary_review_avg": [ 38.0, 18.66815470259447 ], "wc_review_avg": [ 368.25, 53.0112016464445 ], "wc_reply_reviewers_avg": [ 92.75, 101.55632673546242 ], "wc_reply_authors_avg": [ 907.25, 453.8696811861308 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.8787878787878788, "corr_recommendation_correctness": 0.9914573982080402, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=722493863971987196&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;1;2;3;0", "aff_unique_norm": "University of Science and Technology of China;Microsoft;Eastern Institute for Advanced Study;Eastern Institute of Technology", "aff_unique_dep": ";Microsoft Research;;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.microsoft.com/en-us/research;;https://www.eit.edu.cn", "aff_unique_abbr": "USTC;MSR;;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ningbo", "aff_country_unique_index": "0;1;0;1;0;0", "aff_country_unique": "China;United States;" }, { "title": "Multiple sequence alignment as a sequence-to-sequence learning problem", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10923", "id": "8efJYMBrNb", "poster": "/media/PosterPDFs/ICLR%202023/10923.png?t=1682599065.436587", "openreview": "https://openreview.net/forum?id=8efJYMBrNb", "slides": "https://iclr.cc/virtual/2023/poster/10923", "video": "https://iclr.cc/virtual/2023/poster/10923", "author_site": "Edo Dotan, Yonatan Belinkov, Oren Avram, Elya Wygoda, Noa Ecker, Michael Alburquerque, Omri Keren, Gil Loewenthal, Tal Pupko", "tldr": "", "abstract": "The sequence alignment problem is one of the most fundamental problems in bioinformatics and a plethora of methods were devised to tackle it. Here we introduce BetaAlign, a methodology for aligning sequences using an NLP approach. BetaAlign accounts for the possible variability of the evolutionary process among different datasets by using an ensemble of transformers, each trained on millions of samples generated from a different evolutionary model. Our approach leads to alignment accuracy that is similar and often better than commonly used methods, such as MAFFT, DIALIGN, ClustalW, T-Coffee, PRANK, and MUSCLE.", "keywords": "sequence alignment;molecular evolution;natural language processing;bioinformatics", "primary_area": "", "supplementary_material": "", "author": "Edo Dotan;Yonatan Belinkov;Oren Avram;Elya Wygoda;Noa Ecker;Michael Alburquerque;Omri Keren;Gil Loewenthal;Tal Pupko", "authorids": "~Edo_Dotan1;~Yonatan_Belinkov1;~Oren_Avram1;elya.wygoda@gmail.com;n12345e@gmail.com;michaelalb@gmail.com;~Omri_Keren1;gilloe@tauex.tau.ac.il;talp@tauex.tau.ac.il", "gender": ";M;;;;;M;;", "homepage": ";https://www.belinkov.com;;;;;;;", "dblp": ";136/8705;;;;;302/4390;;", "google_scholar": ";https://scholar.google.com/citations?authorid=K-6ujU4AAAAJ;LGY293MAAAAJ;;;;;;", "orcid": ";;0000-0003-1984-2139;;;;;;", "linkedin": "edo-dotan-76b7a0232;;orenavram/;;;;;;", "or_profile": "~Edo_Dotan1;~Yonatan_Belinkov1;~Oren_Avram1;elya.wygoda@gmail.com;n12345e@gmail.com;michaelalb@gmail.com;~Omri_Keren1;gilloe@tauex.tau.ac.il;talp@tauex.tau.ac.il", "aff": "Tel Aviv University;Technion, Technion;, University of California, Los Angeles;;;;Loora AI;;", "aff_domain": "tau.ac.il;technion.ac.il;cs.ucla.edu;;;;loora.ai;;", "position": "PhD student;Assistant Professor;Postdoc;;;;Researcher;;", "bibtex": "@inproceedings{\ndotan2023multiple,\ntitle={Multiple sequence alignment as a sequence-to-sequence learning problem},\nauthor={Edo Dotan and Yonatan Belinkov and Oren Avram and Elya Wygoda and Noa Ecker and Michael Alburquerque and Omri Keren and Gil Loewenthal and Tal Pupko},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8efJYMBrNb}\n}", "github": "", "project": "", "reviewers": "AUpf;8ZqM;JNkr", "pdf_size": 1857600, "recommendation": "3;6;6", "confidence": "5;4;3", "correctness": "2;3;3", "technical_novelty": "2;3;2", "empirical_novelty": "2;0;3", "wc_summary_paper": "38;29;118", "wc_strength_and_weaknesses": "278;295;448", "wc_clarity_quality_novelty_and_reproducibility": "26;226;35", "wc_summary_review": "46;109;32", "wc_review": "388;659;633", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1241;1026;816", "reply_reviewers": "0;0;0", "reply_authors": "2;2;2", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 61.666666666666664, 40.00277768133385 ], "wc_strength_and_weaknesses_avg": [ 340.3333333333333, 76.4475128583149 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 95.66666666666667, 92.23279725178506 ], "wc_summary_review_avg": [ 62.333333333333336, 33.48963355361709 ], "wc_review_avg": [ 560.0, 122.0846700723177 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1027.6666666666667, 173.5095258351989 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": 1.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3886145136849020303&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=8efJYMBrNb", "email": "tau.ac.il;technion.ac.il;cs.ucla.edu;;;;loora.ai;;", "author_num": 9, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Tel Aviv University;Technion - Israel Institute of Technology;University of California, Los Angeles;Loora AI", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tau.ac.il;https://www.technion.ac.il/en/;https://www.ucla.edu;https://www.loora.ai", "aff_unique_abbr": "TAU;Technion;UCLA;Loora AI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "Israel;United States" }, { "id": "8foynpwwRb", "title": "Randomized Sharpness-Aware Training for Boosting Computational Efficiency in Deep Learning", "track": "main", "status": "Reject", "tldr": "We propose a randomized training policy, called randomized sharpness-aware training, for boosting the compuation efficiency in sharpness-aware training.", "abstract": "By driving optimizers to converge to flat minima, sharpness-aware learning algorithms (such as SAM) have shown the power to achieve state-of-art performances. However, these algorithms will generally incur one extra forward-backward propagation at each training iteration, which largely burdens the computation especially for scalable models. To this end, we propose an efficient training scheme, called Randomized Sharpness-Aware Training (RST). Optimizers in RST would perform a Bernoulli trial at each iteration to choose randomly from base algorithms (SGD) and sharpness-aware algorithms (SAM) with a probability arranged by a predefined scheduling function. Due to the mixture of base algorithms, the overall count of propagation pairs could be largely reduced. Also, we give theoretical analysis on the convergence of RST. Then, we empirically study the computation cost and effect of various types of scheduling functions, and give directions on setting appropriate scheduling functions. Further, we extend the RST to a general framework (G-RST), where we can adjust regularization degree on sharpness freely for any scheduling function. We show that G-RST can outperform SAM in most cases while saving 50\\% extra computation cost.\n", "keywords": "Optimization;Sharpness-aware Training;Computation Efficiency.", "primary_area": "", "supplementary_material": "", "author": "Yang Zhao", "authorids": "~Yang_Zhao11", "gender": "M", "homepage": "", "dblp": "50/2082-16", "google_scholar": "KF9ag1sAAAAJ", "orcid": "0000-0001-5883-2799", "linkedin": "", "or_profile": "~Yang_Zhao11", "aff": "Tsinghua University", "aff_domain": "tsinghua.edu.cn", "position": "PhD student", "bibtex": "@misc{\nzhao2023randomized,\ntitle={Randomized Sharpness-Aware Training for Boosting Computational Efficiency in Deep Learning},\nauthor={Yang Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=8foynpwwRb}\n}", "github": "", "project": "", "reviewers": "5xxY;hEbB;eiyQ;cVKR", "site": "https://openreview.net/forum?id=8foynpwwRb", "pdf_size": 2090220, "recommendation": "5;5;5;8", "confidence": "4;4;3;4", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "152;96;68;114", "wc_strength_and_weaknesses": "116;190;201;70", "wc_clarity_quality_novelty_and_reproducibility": "26;5;23;54", "wc_summary_review": "34;44;22;8", "wc_review": "328;335;314;246", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1078;737;590;163", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 107.5, 30.475399915341555 ], "wc_strength_and_weaknesses_avg": [ 144.25, 53.9090669553833 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.0, 17.53567791675018 ], "wc_summary_review_avg": [ 27.0, 13.45362404707371 ], "wc_review_avg": [ 305.75, 35.31554190437972 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 642.0, 328.3542294534974 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 1.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6316982438668062451&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "8gU_8IdHN9g", "title": "Generated Graph Detection", "track": "main", "status": "Reject", "tldr": "We propose a general framework to detect generated graphs using GNN-based methods.", "abstract": "Graph generative models become increasingly effective for data distribution approximation and data augmentation. Although still in sandboxes, they have aroused public concerns about their malicious misuses or misinformation broadcasts, just as what Deepfake visual and auditory media has been delivering to society. It is never too early to regulate the prevalence of generated graphs. As a preventive response, we pioneer to formulate the generated graph detection problem to distinguish generated graphs from real ones. We propose the first framework to systematically investigate a set of sophisticated models and their performance in four classification scenarios. Each scenario switches between seen and unseen datasets/generators during testing to get closer to real world settings and progressively challenge the classifiers. Extensive experiments evidence that all the models are qualified for generated graph detection, with specific models having advantages in specific scenarios. Resulting from the validated generality and oblivion of the classifiers to unseen datasets/generators, we draw a safe conclusion that our solution can sustain for a decent while to curb generated graph misuses.", "keywords": "Generated Graph;Graph Neural Network;Contrastive Learning;Metric Learning", "primary_area": "", "supplementary_material": "", "author": "Yihan Ma;Zhikun Zhang;Ning Yu;Xinlei He;Michael Backes;Yun Shen;Yang Zhang", "authorids": "~Yihan_Ma1;~Zhikun_Zhang1;~Ning_Yu2;~Xinlei_He1;~Michael_Backes1;~Yun_Shen3;~Yang_Zhang15", "gender": "F;;;M;;M;M", "homepage": "https://yihanma1.github.io/;;;https://xinleihe.github.io/;;https://uk.linkedin.com/in/yun-shen-24336257;https://yangzhangalmo.github.io/", "dblp": "199/0302-1.html;;;227/7262;;;06/6785-16", "google_scholar": "uRTNFX4AAAAJ;;;6hZNEtoAAAAJ;;Gx_JJ6cAAAAJ;Xeb2888AAAAJ", "orcid": ";;;;;;0000-0003-3612-7348", "linkedin": ";;;;;;", "or_profile": "~Yihan_Ma1;~Zhikun_Zhang1;~Ning_Yu2;~Xinlei_He1;~Michael_Backes1;~Yun_Shen3;~Yang_Zhang15", "aff": "CISPA Helmholtz Center for Information Security;;;CISPA Helmholtz Center for Information Security;;NetApp;CISPA Helmholtz Center for Information Security", "aff_domain": "cispa.de;;;cispa.de;;netapp.com;cispa.de", "position": "PhD student;;;PhD student;;Technical Director;Assistant Professor", "bibtex": "@misc{\nma2023generated,\ntitle={Generated Graph Detection},\nauthor={Yihan Ma and Zhikun Zhang and Ning Yu and Xinlei He and Michael Backes and Yun Shen and Yang Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=8gU_8IdHN9g}\n}", "github": "", "project": "", "reviewers": "4WuZ;chCC;eg3n", "site": "https://openreview.net/forum?id=8gU_8IdHN9g", "pdf_size": 1347561, "recommendation": "3;5;6", "confidence": "5;3;4", "correctness": "3;3;4", "technical_novelty": "1;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "95;35;77", "wc_strength_and_weaknesses": "130;60;73", "wc_clarity_quality_novelty_and_reproducibility": "82;24;14", "wc_summary_review": "196;17;96", "wc_review": "503;136;260", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "893;503;245", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 69.0, 25.13961017995307 ], "wc_strength_and_weaknesses_avg": [ 87.66666666666667, 30.40102337458761 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.0, 29.97776954122282 ], "wc_summary_review_avg": [ 103.0, 73.2438848414437 ], "wc_review_avg": [ 299.6666666666667, 152.42994748043736 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 547.0, 266.36816626616627 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.6546536707079772, "corr_recommendation_correctness": 0.7559289460184545, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VII2l0SWcPAJ:scholar.google.com/&scioq=Generated+Graph+Detection&hl=en&as_sdt=0,5", "gs_version_total": 10, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "CISPA Helmholtz Center for Information Security;NetApp", "aff_unique_dep": ";", "aff_unique_url": "https://www.cispa.de/;https://www.netapp.com", "aff_unique_abbr": "CISPA;NetApp", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Germany;United States" }, { "title": "Hebbian Deep Learning Without Feedback", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11073", "id": "8gd4M-_Rj1", "poster": "/media/PosterPDFs/ICLR%202023/11073.png?t=1682893397.1910844", "openreview": "https://openreview.net/forum?id=8gd4M-_Rj1", "slides": "https://iclr.cc/virtual/2023/poster/11073", "video": "https://iclr.cc/virtual/2023/poster/11073", "author_site": "Adrien Journ\u00e9, Hector Garcia Rodriguez, Qinghai Guo, Timoleon Moraitis", "tldr": "Advancing the state of the art in bio-plausible Deep Learning, and the plausibility of DL, through Hebbian plasticity and soft winner-take-all nets.", "abstract": "Recent approximations to backpropagation (BP) have mitigated many of BP's computational inefficiencies and incompatibilities with biology, but important limitations still remain. Moreover, the approximations significantly decrease accuracy in benchmarks, suggesting that an entirely different approach may be more fruitful. Here, grounded on recent theory for Hebbian learning in soft winner-take-all networks, we present multilayer SoftHebb, i.e. an algorithm that trains deep neural networks, without any feedback, target, or error signals. As a result, it achieves efficiency by avoiding weight transport, non-local plasticity, time-locking of layer updates, iterative equilibria, and (self-) supervisory or other feedback signals \u2013 which were necessary in other approaches. Its increased efficiency and biological compatibility do not trade off accuracy compared to state-of-the-art bio-plausible learning, but rather improve it. With up to five hidden layers and an added linear classifier, accuracies on MNIST, CIFAR-10, STL-10, and ImageNet, respectively reach 99.4%, 80.3%, 76.2%, and 27.3%. In conclusion, SoftHebb shows with a radically different approach from BP that Deep Learning over few layers may be plausible in the brain and increases the accuracy of bio-plausible machine learning. Code is available at https://github.com/NeuromorphicComputing/SoftHebb.", "keywords": "Hebbian;winner-take-all;cortical circuits;unsupervised;online;biologically plausible;neuromorphic", "primary_area": "", "supplementary_material": "", "author": "Adrien Journ\u00e9;Hector Garcia Rodriguez;Qinghai Guo;Timoleon Moraitis", "authorids": "~Adrien_Journ\u00e91;hector.garcia.rodriguez@huawei.com;~Qinghai_Guo1;~Timoleon_Moraitis1", "gender": "M;;M;M", "homepage": ";;https://www.semanticscholar.org/author/Qinghai-Guo/47747957;https://www.tmoraitis.com", "dblp": ";;12/8502;", "google_scholar": ";;;https://scholar.google.ch/citations?user=w3KiO1MAAAAJ", "orcid": ";;0000-0003-4697-9464;0000-0002-6521-0717", "linkedin": "adrien-journe;;;timoleon-moraitis-56a81217/", "or_profile": "~Adrien_Journ\u00e91;hector.garcia.rodriguez@huawei.com;~Qinghai_Guo1;~Timoleon_Moraitis1", "aff": ";;Huawei Technologies Ltd.;Huawei Technologies Ltd.", "aff_domain": ";;huawei.com;huawei.com", "position": ";;Researcher;Researcher", "bibtex": "@inproceedings{\njourn{\\'e}2023hebbian,\ntitle={Hebbian Deep Learning Without Feedback},\nauthor={Adrien Journ{\\'e} and Hector Garcia Rodriguez and Qinghai Guo and Timoleon Moraitis},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8gd4M-_Rj1}\n}", "github": "", "project": "", "reviewers": "ba7W;72et;39im;ACU4", "pdf_size": 31413365, "recommendation": "6;6;6;8", "confidence": "3;3;4;4", "correctness": "4;4;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "53;90;59;172", "wc_strength_and_weaknesses": "98;191;188;136", "wc_clarity_quality_novelty_and_reproducibility": "85;122;92;670", "wc_summary_review": "50;218;51;63", "wc_review": "286;621;390;1041", "wc_reply_reviewers": "0;0;0;107", "wc_reply_authors": "760;1633;2924;2897", "reply_reviewers": "0;0;0;2", "reply_authors": "1;3;5;5", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 93.5, 47.447339229929426 ], "wc_strength_and_weaknesses_avg": [ 153.25, 38.674119253061214 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 242.25, 247.35235495139318 ], "wc_summary_review_avg": [ 95.5, 70.91015442092902 ], "wc_review_avg": [ 584.5, 290.1107547127476 ], "wc_reply_reviewers_avg": [ 26.75, 46.332359102467464 ], "wc_reply_authors_avg": [ 2053.5, 910.9370175813474 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.5, 1.6583123951777 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17367619793819174590&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=8gd4M-_Rj1", "email": ";;huawei.com;huawei.com", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "8is5PNk68ql", "title": "Inferring Causal Relations between Temporal Events", "track": "main", "status": "Withdraw", "tldr": "method to infer causal relations between temporal events", "abstract": "Due to the popularity of event-based data, causal inference from event datasets has attracted increasing interest. However, inferring causalities from observational event sequences is challenging because of the heterogeneous and irregular nature of event-based data. Existing work on causal inference for temporal events disregards the event durations, and is thus unable to capture their impact on the causal relations. In the present paper, we overcome this limitation by proposing a new modeling approach for temporal events that captures and utilizes event durations. Based on this new temporal model, we propose a set of novel Duration-based Event Causality (DEC) scores, including the Duration-based Necessity and Sufficiency Trade-off score, and the Duration-based Conditional Intensity Rates scores that take into consideration event durations when inferring causal relations between temporal events. We prove that the proposed scores follow the causality hypothesis testing framework. We conduct an extensive experimental evaluation using both synthetic datasets, and two real-world event datasets in the medical and environmental domains to evaluate our proposed scores, and compare them against the closest baseline. The experimental results show that our proposed scores outperforms the baseline with a large margin using the popular evaluation metric Hits@K.", "keywords": "causality;temporal event", "primary_area": "", "supplementary_material": "", "author": "Nguyen Ho;Trinh Cong Le;Torben Bach Pedersen;Van Long Ho;Nguyen Tuong Huynh", "authorids": "~Nguyen_Ho1;lecongtrinh@qnu.edu.vn;tbp@cs.aau.dk;vlh@cs.aau.dk;htnguyen@hcmut.edu.vn", "gender": ";;;;", "homepage": ";;;;", "dblp": "158/8728;;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Nguyen_Ho1;lecongtrinh@qnu.edu.vn;tbp@cs.aau.dk;vlh@cs.aau.dk;htnguyen@hcmut.edu.vn", "aff": "Aalborg University;;;;", "aff_domain": "cs.aau.dk;;;;", "position": "Assistant Professor;;;;", "bibtex": "@misc{\nho2023inferring,\ntitle={Inferring Causal Relations between Temporal Events},\nauthor={Nguyen Ho and Trinh Cong Le and Torben Bach Pedersen and Van Long Ho and Nguyen Tuong Huynh},\nyear={2023},\nurl={https://openreview.net/forum?id=8is5PNk68ql}\n}", "github": "", "project": "", "reviewers": "K3YC;jrUg;cLt3;kyMH", "site": "https://openreview.net/forum?id=8is5PNk68ql", "pdf_size": 464065, "recommendation": "1;3;3;3", "confidence": "5;4;4;4", "correctness": "2;2;4;2", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "124;41;158;38", "wc_strength_and_weaknesses": "478;198;316;329", "wc_clarity_quality_novelty_and_reproducibility": "97;22;16;51", "wc_summary_review": "49;80;97;46", "wc_review": "748;341;587;464", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 90.25, 52.16500263586689 ], "wc_strength_and_weaknesses_avg": [ 330.25, 99.40416238769883 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.5, 32.01952529317073 ], "wc_summary_review_avg": [ 68.0, 21.38924963620744 ], "wc_review_avg": [ 535.0, 150.62370331392069 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VQlIaVlEYXMJ:scholar.google.com/&scioq=Inferring+Causal+Relations+between+Temporal+Events&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Aalborg University", "aff_unique_dep": "", "aff_unique_url": "https://www.aau.dk", "aff_unique_abbr": "AAU", "aff_country_unique_index": "0", "aff_country_unique": "Denmark" }, { "title": "Supervision Complexity and its Role in Knowledge Distillation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10878", "id": "8jU7wy7N7mA", "poster": "", "openreview": "https://openreview.net/forum?id=8jU7wy7N7mA", "slides": "https://iclr.cc/virtual/2023/poster/10878", "video": "https://iclr.cc/virtual/2023/poster/10878", "author_site": "Hrayr Harutyunyan, Ankit Singh Rawat, Aditya Krishna Menon, Seungyeon Kim, Sanjiv Kumar", "tldr": "We provide a new theoretical perspective on knowledge distillation through the lens of supervision complexity -- a measure of alignment between the teacher-provided supervision and the student's neural tangent kernel.", "abstract": "Despite the popularity and efficacy of knowledge distillation, there is limited understanding of why it helps. In order to study the generalization behavior of a distilled student, we propose a new theoretical framework that leverages supervision complexity: a measure of alignment between teacher-provided supervision and the student's neural tangent kernel. The framework highlights a delicate interplay among the teacher's accuracy, the student's margin with respect to the teacher predictions, and the complexity of the teacher predictions. Specifically, it provides a rigorous justification for the utility of various techniques that are prevalent in the context of distillation, such as early stopping and temperature scaling. Our analysis further suggests the use of online distillation, where a student receives increasingly more complex supervision from teachers in different stages of their training. We demonstrate efficacy of online distillation and validate the theoretical findings on a range of image classification benchmarks and model architectures.", "keywords": "distillation;kernel methods;neural tangent kernel", "primary_area": "", "supplementary_material": "", "author": "Hrayr Harutyunyan;Ankit Singh Rawat;Aditya Krishna Menon;Seungyeon Kim;Sanjiv Kumar", "authorids": "~Hrayr_Harutyunyan1;~Ankit_Singh_Rawat1;~Aditya_Krishna_Menon1;~Seungyeon_Kim1;~Sanjiv_Kumar1", "gender": ";M;;;M", "homepage": "https://hrayrhar.github.io/;https://ankitsrawat.github.io/home/;https://www.seungyeon.ai;http://www.sanjivk.com/;https://akmenon.github.io/", "dblp": "198/1465;https://dblp.org/pers/hd/r/Rawat:Ankit_Singh;74/7997-1.html;;89/3514", "google_scholar": "GaCGz8wAAAAJ;http://scholar.google.com/citations?user=U0_ab4cAAAAJ;zbcN_QIAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;", "linkedin": "harhrayr;;;;", "or_profile": "~Hrayr_Harutyunyan1;~Ankit_Singh_Rawat1;~Seungyeon_Kim1;~Sanjiv_Kumar1;~Aditya_Menon1", "aff": "University of Southern California;Google;Google;Google;Google", "aff_domain": "usc.edu;google.com;google.com;google.com;google.com", "position": "PhD student;Research Scientist;Researcher;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nharutyunyan2023supervision,\ntitle={Supervision Complexity and its Role in Knowledge Distillation},\nauthor={Hrayr Harutyunyan and Ankit Singh Rawat and Aditya Krishna Menon and Seungyeon Kim and Sanjiv Kumar},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8jU7wy7N7mA}\n}", "github": "", "project": "", "reviewers": "oX8Z;jR2a;AC7X", "pdf_size": 1441614, "recommendation": "5;6;8", "confidence": "4;3;2", "correctness": "3;4;4", "technical_novelty": "3;3;3", "empirical_novelty": "3;2;2", "wc_summary_paper": "114;387;68", "wc_strength_and_weaknesses": "402;299;99", "wc_clarity_quality_novelty_and_reproducibility": "17;34;26", "wc_summary_review": "98;51;66", "wc_review": "631;771;259", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1443;838;88", "reply_reviewers": "0;0;0", "reply_authors": "2;2;1", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 189.66666666666666, 140.79378142833187 ], "wc_strength_and_weaknesses_avg": [ 266.6666666666667, 125.79436482697554 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.666666666666668, 6.944222218666553 ], "wc_summary_review_avg": [ 71.66666666666667, 19.601587237318874 ], "wc_review_avg": [ 553.6666666666666, 216.05760548726096 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 789.6666666666666, 554.2311992501164 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.9819805060619659, "corr_recommendation_correctness": 0.7559289460184545, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2850633941371432767&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=8jU7wy7N7mA", "email": "usc.edu;google.com;google.com;google.com;google.com", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "University of Southern California;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.usc.edu;https://www.google.com", "aff_unique_abbr": "USC;Google", "aff_campus_unique_index": "0;1;1;1;1", "aff_campus_unique": "Los Angeles;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "8l5GjEqGiRG", "title": "A Close Look at Token Mixer: From Attention to Convolution", "track": "main", "status": "Withdraw", "tldr": "We take a close look at two classical token-mixers, convolution and attention. Detailed comparison and visual analysis motivate us to present a novel fully convolutional vision transformer, which achieves promising performance on several benchmarks.", "abstract": "There is an increasingly intensive debate about the effectiveness of ConvNets and Transformers in vision fields. Originating from the language processing community, Transformers show great promise for many vision tasks due to the insightful architecture design and attention mechanism. Nevertheless, we witnessed the revenge of ConvNets soon, surpassing Transformer variants in mainstream vision tasks. In this paper, we are not engaging in this debate; instead, we look into the details of attention and convolution. By looking into the self-attention responses in Transformers, we empirically find that 1.) Vision Transformers present a query-irrelevant behavior in deep layers, where the attention maps exhibit nearly consistent contexts in global scope, regardless of the query patch position (also head-irrelevant). This phenomenon indicates that a global context may hide behind the self-attention mechanism. 2.) The attention maps are intrinsically sparse; introducing the knowledge from ConvNets would largely smooth the attention and improve the performance. Motivated by these, we generalize self-attention formulation to abstract the query-irrelevant global context directly and further integrate the global context into convolutions. The resulting model, a Fully Convolutional Vision Transformer (i.e., FCViT), purely consists of convolutional layers and firmly inherits the merits of both attention mechanism and convolutions, including dynamic property, weight sharing, and shortand long-range feature modeling, etc. Experimental results demonstrate the effectiveness of FCViT. With less than 14M parameters, our FCViT-S12 outperforms related work ResT-Lite by 3.7% top-1 accuracy on ImageNet-1K. When scaling FCViT to larger models, we still perform better than previous state-of-the-art ConvNeXt with even fewer parameters. FCViTbased models also demonstrate promising transferability to downstream tasks, like object detection, instance segmentation, and semantic segmentation. Codes and pretrained models are available at:https://anonymous.4open.science/r/FCViT-pytorch.", "keywords": "Convolution;Attention;Visual Representation", "primary_area": "", "supplementary_material": "", "author": "Xu Ma;Huan Wang;Can Qin;Kunpeng Li;Xingchen Zhao;Jie Fu;Yun Fu", "authorids": "~Xu_Ma2;~Huan_Wang3;~Can_Qin1;~Kunpeng_Li1;~Xingchen_Zhao1;~Jie_Fu2;~Yun_Fu1", "gender": "M;M;M;M;;;M", "homepage": "https://ma-xu.github.io/;https://huanwang.tech/;http://canqin.tech;https://kunpengli1994.github.io/;;;http://www1.ece.neu.edu/~yunfu/", "dblp": "77/9370-5;70/6155-14;214/2488;;;;00/5815-1", "google_scholar": "Ya7frcEAAAAJ;0-On0y4AAAAJ;QCik-YcAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;https://scholar.google.com.tw/citations?user=h-JEcQ8AAAAJ", "orcid": ";0000-0001-6951-901X;;;;;0000-0002-5098-2853", "linkedin": ";huanwang-zju/;;;;;furaymond/", "or_profile": "~Xu_Ma2;~Huan_Wang3;~Can_Qin1;~Kunpeng_Li1;~Xingchen_Zhao1;~Jie_Fu2;~Yun_Fu1", "aff": "Adobe Systems;Northeastern University;Northeastern University;Meta;;;Northeastern University", "aff_domain": "adobe.com;neu.edu;neu.edu;fb.com;;;northeastern.edu", "position": "Intern;PhD student;PhD student;Researcher;;;Full Professor", "bibtex": "@misc{\nma2023a,\ntitle={A Close Look at Token Mixer: From Attention to Convolution},\nauthor={Xu Ma and Huan Wang and Can Qin and Kunpeng Li and Xingchen Zhao and Jie Fu and Yun Fu},\nyear={2023},\nurl={https://openreview.net/forum?id=8l5GjEqGiRG}\n}", "github": "", "project": "", "reviewers": "dAMX;8jLn;GChF", "site": "https://openreview.net/forum?id=8l5GjEqGiRG", "pdf_size": 5188587, "recommendation": "5;5;5", "confidence": "4;5;5", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "3;3;2", "wc_summary_paper": "61;251;56", "wc_strength_and_weaknesses": "360;610;336", "wc_clarity_quality_novelty_and_reproducibility": "55;98;15", "wc_summary_review": "70;62;65", "wc_review": "546;1021;472", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "52;48;28", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 122.66666666666667, 90.76832536126733 ], "wc_strength_and_weaknesses_avg": [ 435.3333333333333, 123.89601374629542 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.0, 33.891985286593446 ], "wc_summary_review_avg": [ 65.66666666666667, 3.2998316455372216 ], "wc_review_avg": [ 679.6666666666666, 243.24244878081805 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 42.666666666666664, 10.498677165349081 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15581662182302908102&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "Adobe;Northeastern University;Meta", "aff_unique_dep": "Adobe Systems Incorporated;;Meta Platforms, Inc.", "aff_unique_url": "https://www.adobe.com;https://www.northeastern.edu;https://meta.com", "aff_unique_abbr": "Adobe;NEU;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "8mQSpCL36Lg", "title": "Neural Autoregressive Refinement for Self-Supervised Outlier Detection beyond Images", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Many self-supervised methods have been proposed with the target of image anomaly detection. These methods often rely on the paradigm of data augmentation with predefined transformations. However, it is not straightforward to apply these techniques to non-image data, such as time series or tabular data. Here we propose a novel data refinement (DR) scheme that relies on neural autoregressive flows (NAF) for self-supervised anomaly detection. Flow-based models allow to explicitly learn the probability density and thus can assign accurate likelihoods to normal data which makes it usable to detect anomalies. The proposed NAF-DR method is achieved by efficiently generating random samples from latent space and transforming them into feature space along with likelihoods via invertible mapping. The samples with lower likelihoods are selected and further checked by outlier detection using Mahalanobis distance. The augmented samples incorporated with normal samples are used to train a better detector to approach decision boundaries. Compared with random transformations, NAF-DR can be interpreted as a likelihood-oriented data augmentation that is more efficient and robust. Extensive experiments show that our approach outperforms existing baselines on multiple tabular and time series datasets, and {\\color{blue}one real-world application}, significantly improving accuracy and robustness over the state-of-the-art baselines. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Sirui Bi;Victor Fung;Jiaxin Zhang", "authorids": "siruijhu@gmail.com;~Victor_Fung1;~Jiaxin_Zhang2", "gender": ";;M", "homepage": ";;https://jxzhangjhu.github.io/", "dblp": ";;32/7698-5.html", "google_scholar": ";2QsddMIAAAAJ;LiDm8jEAAAAJ", "orcid": ";;", "linkedin": ";;jiaxin-zhang-1425289b/", "or_profile": "siruijhu@gmail.com;~Victor_Fung1;~Jiaxin_Zhang2", "aff": ";Georgia Institute of Technology;Intuit AI Research", "aff_domain": ";gatech.edu;intuit.com", "position": ";Assistant Professor;Researcher", "bibtex": "@misc{\nbi2023neural,\ntitle={Neural Autoregressive Refinement for Self-Supervised Outlier Detection beyond Images},\nauthor={Sirui Bi and Victor Fung and Jiaxin Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=8mQSpCL36Lg}\n}", "github": "", "project": "", "reviewers": "i6vC;1PU5;pLJo;GFEL;ERj1;zdb4", "site": "https://openreview.net/forum?id=8mQSpCL36Lg", "pdf_size": 1838433, "recommendation": "1;3;5;5;5;6", "confidence": "4;3;4;4;5;4", "correctness": "1;2;3;3;3;3", "technical_novelty": "3;2;2;3;3;2", "empirical_novelty": "3;3;2;2;4;2", "wc_summary_paper": "35;65;65;156;54;224", "wc_strength_and_weaknesses": "19;337;247;152;303;348", "wc_clarity_quality_novelty_and_reproducibility": "297;63;74;115;220;180", "wc_summary_review": "29;32;41;41;66;43", "wc_review": "380;497;427;464;643;795", "wc_reply_reviewers": "0;0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "0;0;0;0;0;0", "reply_authors": "0;0;0;0;0;0", "recommendation_avg": [ 4.166666666666667, 1.6749792701868151 ], "confidence_avg": [ 4.0, 0.5773502691896257 ], "correctness_avg": [ 2.5, 0.7637626158259734 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.7453559924999299 ], "wc_summary_paper_avg": [ 99.83333333333333, 67.45718806933938 ], "wc_strength_and_weaknesses_avg": [ 234.33333333333334, 116.53564070942225 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 158.16666666666666, 83.24545366298123 ], "wc_summary_review_avg": [ 42.0, 11.88836966675134 ], "wc_review_avg": [ 534.3333333333334, 142.26462041639945 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3446909937728556, "corr_recommendation_correctness": 0.9771071235928813, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5KGO0dC_bM4J:scholar.google.com/&scioq=Neural+Autoregressive+Refinement+for+Self-Supervised+Outlier+Detection+beyond+Images&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Georgia Institute of Technology;Intuit", "aff_unique_dep": ";Intuit AI Research", "aff_unique_url": "https://www.gatech.edu;https://intuit.com/", "aff_unique_abbr": "Georgia Tech;Intuit", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "8mQrCW_JO3", "title": "Less Is More: Training on Low-Fidelity Images Improves Robustness to Adversarial Attacks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Since adversarial attacks are defined relative to human perception, it may be fruitful to investigate why human perception (and biological perception in general) is robust to the types of perturbations that DNNs are convincingly deceived by. In the context of vision, we hypothesize that a factor contributing to the robustness of human visual perception is our constant exposure to low-fidelity visual stimuli. To investigate the impact, vis-\u00e0-vis adversarial robustness, of exposure to low-fidelity visual stimuli, we train and evaluate object recognition DNNs on images which have been blurred and have had their color saturation reduced. We find that DNNs trained on such images can achieve high classification accuracy over a small number of classes, while becoming significantly more robust to low-magnitude adversarial attacks. Furthermore, we design a blurring module that simulates that loss of visual acuity with increasing eccentricity by selecting the intensity of Gaussian blur at each pixel based on its distance from a given fixation point. Our results indicate that using this retina-inspired blurring mechanism, instead of blurring the entire image with the same Gaussian kernel, yields better robustness while keeping the accuracy on clean data unchanged.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Muhammad A Shah;Bhiksha Raj", "authorids": "~Muhammad_A_Shah1;~Bhiksha_Raj1", "gender": ";M", "homepage": ";https://www.cs.cmu.edu/directory/bhikshar/", "dblp": "142/5481;60/3996", "google_scholar": "74MwzTcAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Muhammad_A_Shah1;~Bhiksha_Raj1", "aff": "Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "cmu.edu;mbzuai.ac.ae", "position": "PhD student;Full Professor", "bibtex": "@misc{\nshah2023less,\ntitle={Less Is More: Training on Low-Fidelity Images Improves Robustness to Adversarial Attacks},\nauthor={Muhammad A Shah and Bhiksha Raj},\nyear={2023},\nurl={https://openreview.net/forum?id=8mQrCW_JO3}\n}", "github": "", "project": "", "reviewers": "82f1;2VPZ;MPri;ocRZ", "site": "https://openreview.net/forum?id=8mQrCW_JO3", "pdf_size": 5575425, "recommendation": "3;5;5;6", "confidence": "4;3;2;4", "correctness": "3;4;4;3", "technical_novelty": "3;4;2;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "32;105;78;61", "wc_strength_and_weaknesses": "148;236;403;651", "wc_clarity_quality_novelty_and_reproducibility": "4;28;37;21", "wc_summary_review": "23;66;43;45", "wc_review": "207;435;561;778", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 69.0, 26.504716561397142 ], "wc_strength_and_weaknesses_avg": [ 359.5, 191.60441017888914 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 22.5, 12.093386622447824 ], "wc_summary_review_avg": [ 44.25, 15.22128443988877 ], "wc_review_avg": [ 495.25, 206.75392982964073 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.20751433915982243, "corr_recommendation_correctness": 0.2294157338705618, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2404848622409670931&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://mbzuai.ac.ae", "aff_unique_abbr": "CMU;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;United Arab Emirates" }, { "id": "8mWlBArp1qx", "title": "MaPLe: Multi-modal Prompt Learning", "track": "main", "status": "Withdraw", "tldr": "Multi-modal prompt learning for improving synergy between learned language and vision representations for fine-tuning CLIP on downstream image recognition tasks.", "abstract": "Pre-trained vision-language (V-L) models such as CLIP have shown excellent generalization ability to downstream tasks. However, they are sensitive to the choice of input text prompts and require careful selection of prompt templates to perform well. Inspired by the Natural Language Processing (NLP) literature, recent CLIP adaptation approaches learn prompts as the textual inputs to fine-tune CLIP for downstream tasks. We note that using prompting to adapt representations in a single branch of CLIP (language or vision) is sub-optimal since it does not allow the flexibility to dynamically adjust both representation spaces on a downstream task. In this work, we propose Multi-modal Prompt Learning (MaPLe) for both vision and language branches to improve alignment between the vision and language representations. Our design promotes strong coupling between the vision-language prompts to ensure mutual synergy and discourages learning independent uni-modal solutions. Further, we learn separate prompts across different early stages to progressively model the stage-wise feature relationships to allow rich context learning. We evaluate the effectiveness of our approach on three representative tasks of generalization to novel classes, new target datasets and unseen domain shifts. Compared with the state-of-the-art method Co-CoOp, MaPLe exhibits favorable performance and achieves an absolute gain of 3.45% on novel classes and 2.72% on overall harmonic-mean, averaged over 11 diverse image recognition datasets. Our code and models will be publicly released.", "keywords": "Vision-language models;Prompt learning;Generalization;Fine-tuning;Transfer learning", "primary_area": "", "supplementary_material": "/attachment/973298a4b7829a62d59c67268c05f11457c602dd.zip", "author": "Muhammad Uzair Khattak;Hanoona Abdul Rasheed;Muhammad Maaz;Salman Khan;Fahad Khan", "authorids": "~Muhammad_Uzair_Khattak1;~Hanoona_Abdul_Rasheed1;~Muhammad_Maaz1;~Salman_Khan4;~Fahad_Khan1", "gender": "F;M;M;M;M", "homepage": "https://www.hanoonarasheed.com/;https://mmaaz60.github.io;https://salman-h-khan.github.io/;https://sites.google.com/view/fahadkhans/home;https://muzairkhattak.github.io/", "dblp": "293/7463;247/6083-1;32/11535-1;05/8618;324/2256.html", "google_scholar": "yhDdEuEAAAAJ;vTy9Te8AAAAJ;https://scholar.google.es/citations?user=M59O9lkAAAAJ;zvaeYnUAAAAJ;https://scholar.google.es/citations?user=M6fFL4gAAAAJ", "orcid": ";;0000-0002-9502-1749;;", "linkedin": ";mmaaz60/;;;muhammad-uzair-khattak-204ba1150/", "or_profile": "~Hanoona_Abdul_Rasheed1;~Muhammad_Maaz1;~Salman_Khan4;~Fahad_Khan1;~Muhammd_Uzair_Khattak1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Australian National University;Link\u00f6ping University;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "mbzuai.ac.ae;mbzuai.ac.ae;anu.edu.au;liu.se;mbzuai.ac.ae", "position": "PhD student;PhD student;Lecturer;Associate Professor;MS student", "bibtex": "@misc{\nkhattak2023maple,\ntitle={Ma{PL}e: Multi-modal Prompt Learning},\nauthor={Muhammad Uzair Khattak and Hanoona Abdul Rasheed and Muhammad Maaz and Salman Khan and Fahad Khan},\nyear={2023},\nurl={https://openreview.net/forum?id=8mWlBArp1qx}\n}", "github": "", "project": "", "reviewers": "N9kH;AgtR;iPLr;wst5", "site": "https://openreview.net/forum?id=8mWlBArp1qx", "pdf_size": 8406194, "recommendation": "3;5;6;8", "confidence": "5;4;3;4", "correctness": "2;3;4;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;0;3", "wc_summary_paper": "117;41;60;41", "wc_strength_and_weaknesses": "710;200;139;48", "wc_clarity_quality_novelty_and_reproducibility": "42;20;8;15", "wc_summary_review": "93;20;8;4", "wc_review": "962;281;215;108", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 64.75, 31.1478329904345 ], "wc_strength_and_weaknesses_avg": [ 274.25, 257.32894804121827 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 21.25, 12.71563997602952 ], "wc_summary_review_avg": [ 31.25, 36.134298111351214 ], "wc_review_avg": [ 391.5, 335.11378664567053 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5883484054145521, "corr_recommendation_correctness": 0.9198662110077999, "gs_citation": 845, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13763474262075251533&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;Australian National University;Link\u00f6ping University", "aff_unique_dep": ";;", "aff_unique_url": "https://mbzuai.ac.ae;https://www.anu.edu.au;https://www.liu.se", "aff_unique_abbr": "MBZUAI;ANU;LiU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;0", "aff_country_unique": "United Arab Emirates;Australia;Sweden" }, { "title": "Represent to Control Partially Observed Systems: Representation Learning with Provable Sample Efficiency", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11354", "id": "8oJHwb3Sgp", "poster": "", "openreview": "https://openreview.net/forum?id=8oJHwb3Sgp", "slides": "https://iclr.cc/virtual/2023/poster/11354", "video": "https://iclr.cc/virtual/2023/poster/11354", "author_site": "Lingxiao Wang, Qi Cai, Zhuoran Yang, Zhaoran Wang", "tldr": "", "abstract": "Reinforcement learning in partially observed Markov decision processes (POMDPs) faces two challenges. (i) It often takes the full history to predict the future, which induces a sample complexity that scales exponentially with the horizon. (ii) The observation and state spaces are often continuous, which induces a sample complexity that scales exponentially with the extrinsic dimension. Addressing such challenges requires learning a minimal but sufficient representation of the observation and state histories by exploiting the structure of the POMDP.\n\nTo this end, we propose a reinforcement learning algorithm named Represent to Control (RTC), which learns the representation at two levels while optimizing the policy.~(i) For each step, RTC learns to represent the state with a low-dimensional feature, which factorizes the transition kernel. (ii) Across multiple steps, RTC learns to represent the full history with a low-dimensional embedding, which assembles the per-step feature. We integrate (i) and (ii) in a unified framework that allows a variety of estimators (including maximum likelihood estimators and generative adversarial networks). For a class of POMDPs with a low-rank structure in the transition kernel, RTC attains an $O(1/\\epsilon^2)$ sample complexity that scales polynomially with the horizon and the intrinsic dimension (that is, the rank). Here $\\epsilon$ is the optimality gap. To our best knowledge, RTC is the first sample-efficient algorithm that bridges representation learning and policy optimization in POMDPs with infinite observation and state spaces.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/705db7f328aef942efe7ff525e815e150265a3b2.zip", "author": "Lingxiao Wang;Qi Cai;Zhuoran Yang;Zhaoran Wang", "authorids": "~Lingxiao_Wang6;~Qi_Cai2;~Zhuoran_Yang1;~Zhaoran_Wang1", "gender": "M;M;M;Not Specified", "homepage": ";;https://zhuoranyang.github.io/;https://zhaoranwang.github.io/", "dblp": "140/1229;;;117/2756", "google_scholar": ";FX6bV4UAAAAJ;;https://scholar.google.com.tw/citations?user=HSx0BgQAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Lingxiao_Wang6;~Qi_Cai2;~Zhuoran_Yang1;~Zhaoran_Wang1", "aff": "Northwestern University;Northwestern University;Yale University;", "aff_domain": "northwestern.edu;u.northwestern.edu;yale.edu;", "position": "PhD student;PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nwang2023represent,\ntitle={Represent to Control Partially Observed Systems: Representation Learning with Provable Sample Efficiency},\nauthor={Lingxiao Wang and Qi Cai and Zhuoran Yang and Zhaoran Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8oJHwb3Sgp}\n}", "github": "", "project": "", "reviewers": "nfDQ;iXuJ;6Ax4;reeX", "pdf_size": 264572, "recommendation": "6;6;6;6", "confidence": "3;2;3;4", "correctness": "3;4;4;4", "technical_novelty": "3;2;2;3", "empirical_novelty": "0;0;0;0", "wc_summary_paper": "69;80;116;58", "wc_strength_and_weaknesses": "181;48;181;496", "wc_clarity_quality_novelty_and_reproducibility": "148;29;106;31", "wc_summary_review": "35;44;63;33", "wc_review": "433;201;466;618", "wc_reply_reviewers": "124;15;84;0", "wc_reply_authors": "856;372;380;709", "reply_reviewers": "2;1;1;0", "reply_authors": "3;1;2;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 80.75, 21.787324296480282 ], "wc_strength_and_weaknesses_avg": [ 226.5, 164.79760313790973 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 78.5, 50.727211632416775 ], "wc_summary_review_avg": [ 43.75, 11.861176164276458 ], "wc_review_avg": [ 429.5, 149.23890243498843 ], "wc_reply_reviewers_avg": [ 55.75, 50.558752951393096 ], "wc_reply_authors_avg": [ 579.25, 209.80869262258892 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2994348922007687424&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=8oJHwb3Sgp", "email": "northwestern.edu;u.northwestern.edu;yale.edu;", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Northwestern University;Yale University", "aff_unique_dep": ";", "aff_unique_url": "https://www.northwestern.edu;https://www.yale.edu", "aff_unique_abbr": "NU;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "8onXkaNWLHA", "title": "Self-supervised video pretraining yields strong image representations", "track": "main", "status": "Reject", "tldr": "We achieve SoTA transfer to image scene understanding tasks using frame-based models pre-trained using contrastive learning on videos. ", "abstract": "Videos contain far more information than still images, and hold the potential for learning rich representations of the visual world. Yet, pretraining on image datasets has remained the dominant paradigm for learning representations that capture spatial information and previous attempts at video pretraining have fallen short on image understanding tasks. In this work we revisit self-supervised learning of image representations from the dynamic evolution of video frames. To that end, we propose a dataset curation procedure that addresses the domain mismatch between video and image datasets, and develop a contrastive learning framework which handles the complex transformations present in natural videos. This simple paradigm for distilling knowledge from videos to image representations, called VITO, performs surprisingly well on a variety of image-based transfer learning tasks. For the first time, our video-pretrained model closes the gap with ImageNet pretraining on semantic segmentation on PASCAL and ADE20k and object detection on COCO and LVIS, raising the possibility of video-pretraining becoming the new default for learning image representations. ", "keywords": "self-supervised;contrastive;video;representation learning;image segmentation;object detection", "primary_area": "", "supplementary_material": "", "author": "Nikhil Parthasarathy;S. M. Ali Eslami;Joao Carreira;Olivier J Henaff", "authorids": "~Nikhil_Parthasarathy1;~S._M._Ali_Eslami1;~Joao_Carreira1;~Olivier_J_Henaff1", "gender": "M;M;M;", "homepage": ";http://arkitus.com/research;;https://www.olivierhenaff.com/", "dblp": "209/4951;117/4847;61/5621-1;156/0035.html", "google_scholar": "X9mO4ckAAAAJ;skyUvycAAAAJ;https://scholar.google.pt/citations?user=IUZ-7_cAAAAJ;Sx75CVsAAAAJ", "orcid": ";;;0000-0001-8183-9489", "linkedin": "nikparth/;;jo%C3%A3o-carreira-56238a7/;", "or_profile": "~Nikhil_Parthasarathy1;~S._M._Ali_Eslami1;~Joao_Carreira1;~Olivier_J_Henaff1", "aff": "New York University;Google;Google DeepMind;Google DeepMind", "aff_domain": "nyu.edu;google.com;google.com;google.com", "position": "PhD student;Researcher;Research Scientist;Research Scientist", "bibtex": "@misc{\nparthasarathy2023selfsupervised,\ntitle={Self-supervised video pretraining yields strong image representations},\nauthor={Nikhil Parthasarathy and S. M. Ali Eslami and Joao Carreira and Olivier J Henaff},\nyear={2023},\nurl={https://openreview.net/forum?id=8onXkaNWLHA}\n}", "github": "", "project": "", "reviewers": "zSuF;kndf;ACJo;iRvv", "site": "https://openreview.net/forum?id=8onXkaNWLHA", "pdf_size": 2278737, "recommendation": "3;3;5;6", "confidence": "4;5;4;4", "correctness": "3;4;3;3", "technical_novelty": "2;2;1;2", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "58;100;158;88", "wc_strength_and_weaknesses": "298;160;270;121", "wc_clarity_quality_novelty_and_reproducibility": "19;19;66;171", "wc_summary_review": "45;27;70;92", "wc_review": "420;306;564;472", "wc_reply_reviewers": "0;0;320;59", "wc_reply_authors": "861;801;1688;768", "reply_reviewers": "0;0;2;1", "reply_authors": "2;1;3;2", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 101.0, 36.29049462324811 ], "wc_strength_and_weaknesses_avg": [ 212.25, 73.73050589816945 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.75, 62.07404852271197 ], "wc_summary_review_avg": [ 58.5, 24.642443060703215 ], "wc_review_avg": [ 440.5, 93.21346469260759 ], "wc_reply_reviewers_avg": [ 94.75, 132.25992401328529 ], "wc_reply_authors_avg": [ 1029.5, 381.644140528844 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5555555555555555, "corr_recommendation_correctness": -0.5555555555555555, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2036981961471876902&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "New York University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.nyu.edu;https://www.google.com", "aff_unique_abbr": "NYU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "LPT: Long-tailed Prompt Tuning for Image Classification", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12129", "id": "8pOVAeo8ie", "poster": "", "openreview": "https://openreview.net/forum?id=8pOVAeo8ie", "slides": "https://iclr.cc/virtual/2023/poster/12129", "video": "https://iclr.cc/virtual/2023/poster/12129", "author_site": "Bowen Dong, Pan Zhou, shuicheng YAN, Wangmeng Zuo", "tldr": "", "abstract": "For long-tailed classification tasks, most works often pretrain a big model on a large-scale (unlabeled) dataset, and then fine-tune the whole pretrained model for adapting to long-tailed data. Though promising, fine-tuning the whole pretrained model tends to suffer from high cost in computation and deployment of different models for different tasks, as well as weakened generalization capability for overfitting to certain features of long-tailed data. To alleviate these issues, we propose an effective Long-tailed Prompt Tuning (LPT) method for long-tailed classification tasks. LPT introduces several trainable prompts into a frozen pretrained model to adapt it to long-tailed data. For better effectiveness, we divide prompts into two groups: 1) a shared prompt for the whole long-tailed dataset to learn general features and to adapt a pretrained model into the target long-tailed domain; and 2) group-specific prompts to gather group-specific features for the samples which have similar features and also to empower the pretrained model with fine-grained discrimination ability. Then we design a two-phase training paradigm to learn these prompts. In the first phase, we train the shared prompt via conventional supervised prompt tuning to adapt a pretrained model to the desired long-tailed domain. In the second phase, we use the learnt shared prompt as query to select a small best matched set for a group of similar samples from the group-specific prompt set to dig the common features of these similar samples, and then optimize these prompts with a dual sampling strategy and the asymmetric Gaussian Clouded Logit loss. By only fine-tuning a few prompts while fixing the pretrained model, LPT can reduce training cost and deployment cost by storing a few prompts, and enjoys a strong generalization ability of the pretrained model. Experiments show that on various long-tailed benchmarks, with only $\\sim$1.1\\% extra trainable parameters, LPT achieves comparable or higher performance than previous whole model fine-tuning methods, and is more robust to domain-shift.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Bowen Dong;Pan Zhou;Shuicheng YAN;Wangmeng Zuo", "authorids": "~Bowen_Dong1;~Pan_Zhou6;~Shuicheng_YAN3;~Wangmeng_Zuo3", "gender": "M;M;M;M", "homepage": ";https://www.researchgate.net/profile/Pan-Zhou-8;https://yanshuicheng.ai/;", "dblp": ";;y/ShuichengYan;93/2671", "google_scholar": "t0WhKEYAAAAJ;;https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ;rUOpCEYAAAAJ", "orcid": "0000-0001-7379-1286;;;0000-0002-3330-783X", "linkedin": ";;;", "or_profile": "~Bowen_Dong1;~Pan_Zhou6;~Shuicheng_YAN3;~Wangmeng_Zuo3", "aff": "Harbin Institute of Technology;;sea Group;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;;sea.com;hit.edu.cn", "position": "PhD student;;Researcher;Full Professor", "bibtex": "@inproceedings{\ndong2023lpt,\ntitle={{LPT}: Long-tailed Prompt Tuning for Image Classification},\nauthor={Bowen Dong and Pan Zhou and Shuicheng YAN and Wangmeng Zuo},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8pOVAeo8ie}\n}", "github": "", "project": "", "reviewers": "ferv;qoSe;q9x4;mA6r", "pdf_size": 708000, "recommendation": "6;6;8;8", "confidence": "3;4;4;3", "correctness": "3;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "79;51;46;110", "wc_strength_and_weaknesses": "112;185;169;375", "wc_clarity_quality_novelty_and_reproducibility": "46;108;20;99", "wc_summary_review": "56;29;7;204", "wc_review": "293;373;242;788", "wc_reply_reviewers": "0;49;14;157", "wc_reply_authors": "412;1566;655;2006", "reply_reviewers": "0;2;1;1", "reply_authors": "1;3;1;3", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 71.5, 25.53918557824427 ], "wc_strength_and_weaknesses_avg": [ 210.25, 98.9125244850216 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.25, 36.567574434189645 ], "wc_summary_review_avg": [ 74.0, 77.03570600701988 ], "wc_review_avg": [ 424.0, 215.28005016721823 ], "wc_reply_reviewers_avg": [ 55.0, 61.53454314448105 ], "wc_reply_authors_avg": [ 1159.75, 650.9763340552405 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 84, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16368921315993980358&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=8pOVAeo8ie", "email": "hit.edu.cn;;sea.com;hit.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Harbin Institute of Technology;Sea Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.hit.edu.cn/;", "aff_unique_abbr": "HIT;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0", "aff_country_unique": "China;" }, { "title": "Denoising Diffusion Samplers", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10913", "id": "8pvnfTAbu1f", "poster": "", "openreview": "https://openreview.net/forum?id=8pvnfTAbu1f", "slides": "https://iclr.cc/virtual/2023/poster/10913", "video": "https://iclr.cc/virtual/2023/poster/10913", "author_site": "Francisco Vargas, Will Grathwohl, Arnaud Doucet", "tldr": "How to use denoising diffusion models ideas to sample unnormalized target densities and estimate their normalizing constants", "abstract": "Denoising diffusion models are a popular class of generative models providing state-of-the-art results in many domains. One adds gradually noise to data using a diffusion to transform the data distribution into a Gaussian distribution. Samples from the generative model are then obtained by simulating an approximation of the time-reversal of this diffusion initialized by Gaussian samples. Practically, the intractable score terms appearing in the time-reversed process are approximated using score matching techniques. We explore here a similar idea to sample approximately from unnormalized probability density functions and estimate their normalizing constants. We consider a process where the target density diffuses towards a Gaussian. Denoising Diffusion Samplers (DDS) are obtained by approximating the corresponding time-reversal. While score matching is not applicable in this context, we can leverage many of the ideas introduced in generative modeling for Monte Carlo sampling. Existing theoretical results from denoising diffusion models also provide theoretical guarantees for DDS. We discuss the connections between DDS, optimal control and Schr\\\"odinger bridges and finally demonstrate DDS experimentally on a variety of challenging sampling tasks.", "keywords": "diffusion models;importance sampling;monte carlo;variational inference", "primary_area": "", "supplementary_material": "/attachment/840d14234d9267c7d5906c8b8332a01340a959f6.zip", "author": "Francisco Vargas;Will Sussman Grathwohl;Arnaud Doucet", "authorids": "~Francisco_Vargas1;~Will_Sussman_Grathwohl2;~Arnaud_Doucet2", "gender": "M;M;", "homepage": ";http://www.cs.toronto.edu/~wgrathwohl/;https://www.stats.ox.ac.uk/~doucet/", "dblp": "79/7431-1;192/1565;68/1628", "google_scholar": ";;W4SZGV8AAAAJ", "orcid": ";;0000-0002-7662-419X", "linkedin": ";will-grathwohl-b44a383b/;", "or_profile": "~Francisco_Vargas1;~Will_Sussman_Grathwohl2;~Arnaud_Doucet2", "aff": "University of Cambridge;Google DeepMind;University of Oxford", "aff_domain": "cam.ac.uk;deepmind.com;ox.ac.uk", "position": "PhD student;Senior Research Scientist;Full Professor", "bibtex": "@inproceedings{\nvargas2023denoising,\ntitle={Denoising Diffusion Samplers},\nauthor={Francisco Vargas and Will Sussman Grathwohl and Arnaud Doucet},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8pvnfTAbu1f}\n}", "github": "", "project": "", "reviewers": "cpvn;SEx8;bg9B;Vb4W", "pdf_size": 3949089, "recommendation": "5;6;6;6", "confidence": "1;3;3;4", "correctness": "4;3;3;3", "technical_novelty": "3;3;3;2", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "157;21;87;100", "wc_strength_and_weaknesses": "252;25;138;796", "wc_clarity_quality_novelty_and_reproducibility": "69;23;56;114", "wc_summary_review": "30;34;44;139", "wc_review": "508;103;325;1149", "wc_reply_reviewers": "0;34;26;476", "wc_reply_authors": "903;492;842;3304", "reply_reviewers": "0;1;1;2", "reply_authors": "2;3;3;8", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 91.25, 48.35480844755772 ], "wc_strength_and_weaknesses_avg": [ 302.75, 295.8710656688146 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.5, 32.63816784073518 ], "wc_summary_review_avg": [ 61.75, 44.890839822841365 ], "wc_review_avg": [ 521.25, 389.7732513911133 ], "wc_reply_reviewers_avg": [ 134.0, 197.8534811419804 ], "wc_reply_authors_avg": [ 1385.25, 1118.8367564126593 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 4.0, 2.345207879911715 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.9271726499455306, "corr_recommendation_correctness": -1.0, "gs_citation": 84, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7383989856361204303&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=8pvnfTAbu1f", "email": "cam.ac.uk;deepmind.com;ox.ac.uk", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Cambridge;Google;University of Oxford", "aff_unique_dep": ";Google DeepMind;", "aff_unique_url": "https://www.cam.ac.uk;https://deepmind.com;https://www.ox.ac.uk", "aff_unique_abbr": "Cambridge;DeepMind;Oxford", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Combinatorial-Probabilistic Trade-Off: P-Values of Community Properties Test in the Stochastic Block Models", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11748", "id": "8qjSA5QACb40", "poster": "/media/PosterPDFs/ICLR%202023/11748.png?t=1682624686.160325", "openreview": "https://openreview.net/forum?id=8qjSA5QACb40", "slides": "https://iclr.cc/virtual/2023/poster/11748", "video": "https://iclr.cc/virtual/2023/poster/11748", "author_site": "Shuting Shen, Junwei Lu", "tldr": "We propose an inferential framework testing the general community combinatorial properties of the stochastic block model and prove the minimax lower bound of the general community property test.", "abstract": "We propose an inferential framework testing the general community combinatorial properties of the stochastic block model. We aim to test the hypothesis on whether a certain community property is satisfied, e.g., whether a given set of nodes belong to the same community, and provide p-values for uncertainty quantification. Our framework is applicable to all symmetric community properties. To ease the challenges caused by the combinatorial nature of community properties, we develop a novel shadowing bootstrap method. Utilizing the symmetry, our method can find a shadowing representative of the true assignment and the number of tested assignments in the alternative is largely reduced. In theory, we introduce a combinatorial distance between two community classes and show a combinatorial-probabilistic trade-off phenomenon. Our test is honest as long as the product of the combinatorial distance between two communities and the probabilistic distance between two connection probabilities is sufficiently large. Besides, we show that such trade-off also exists in the information-theoretic lower bound. We also implement numerical experiments to show the validity of our method.", "keywords": "combinatorial inference;stochastic block models;community properties;minimax lower bound", "primary_area": "", "supplementary_material": "/attachment/5a6903c26aa7292675c23a901d0c7befb8990c28.zip", "author": "Shuting Shen;Junwei Lu", "authorids": "~Shuting_Shen1;~Junwei_Lu1", "gender": "F;M", "homepage": ";https://junwei-lu.github.io/", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": "shuting-shen-9a4843174/;", "or_profile": "~Shuting_Shen1;~Junwei_Lu1", "aff": "Harvard University;Harvard University", "aff_domain": "g.harvard.edu;harvard.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nshen2023combinatorialprobabilistic,\ntitle={Combinatorial-Probabilistic Trade-Off: P-Values of Community Properties Test in the Stochastic Block Models},\nauthor={Shuting Shen and Junwei Lu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8qjSA5QACb40}\n}", "github": "", "project": "", "reviewers": "XzCa;f6aS;uY7c;on1z", "pdf_size": 2628406, "recommendation": "8;8;8;8", "confidence": "4;5;4;3", "correctness": "3;2;4;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "0;2;3;2", "wc_summary_paper": "137;226;72;233", "wc_strength_and_weaknesses": "168;180;135;153", "wc_clarity_quality_novelty_and_reproducibility": "142;79;196;12", "wc_summary_review": "2;107;14;49", "wc_review": "449;592;417;447", "wc_reply_reviewers": "0;0;60;0", "wc_reply_authors": "789;319;888;255", "reply_reviewers": "0;0;1;0", "reply_authors": "2;1;2;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 167.0, 66.63707676661694 ], "wc_strength_and_weaknesses_avg": [ 159.0, 16.837458240482736 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 107.25, 68.83812533763539 ], "wc_summary_review_avg": [ 43.0, 40.786027019066225 ], "wc_review_avg": [ 476.25, 68.01975815893496 ], "wc_reply_reviewers_avg": [ 15.0, 25.98076211353316 ], "wc_reply_authors_avg": [ 562.75, 278.88203151153357 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11014394536851281358&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=8qjSA5QACb40", "email": "g.harvard.edu;harvard.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "8re-nA0wDxW", "title": "The Curse of Low Task Diversity: On the Failure of Transfer Learning to Outperform MAML and their Empirical Equivalence", "track": "main", "status": "Reject", "tldr": "when the task diversity of few-shot learning benchmarks is low and comparison is fair, MAML and transfer learning perform the same -- opposite of claims that transfer learning is better", "abstract": "Recently, it has been observed that a transfer learning solution might be all we need to solve many few-shot learning benchmarks -- thus raising important questions about when and how meta-learning algorithms should be deployed. \nIn this paper, we seek to clarify these questions by \n1. proposing a novel metric -- the {\\it diversity coefficient} -- to measure the diversity of tasks in a few-shot learning benchmark and \n2. by comparing MAML and transfer learning under fair conditions (same architecture, same optimizer and all models trained to convergence).\nUsing the diversity coefficient, we show that the popular MiniImagenet and Cifar-fs few-shot learning benchmarks have low diversity. \nThis novel insight contextualizes claims that transfer learning solutions are better than meta-learned solutions in the regime of low diversity under a fair comparison. \nSpecifically, we empirically find that a low diversity coefficient correlates with a high similarity between transfer learning and Model-Agnostic Meta-Learning (MAML) learned solutions in terms of accuracy at meta-test time and classification layer similarity (using feature based distance metrics like SVCCA, PWCCA, CKA, and OPD). \nTo further support our claim, we find this meta-test accuracy holds even as the model size changes. \nTherefore, we conclude that in the low diversity regime, MAML and transfer learning have equivalent meta-test performance when both are compared fairly.\nWe also hope our work inspires more thoughtful constructions and quantitative evaluations of meta-learning benchmarks in the future.", "keywords": "meta-learning;machine learning;transfer learning;deep learning", "primary_area": "", "supplementary_material": "", "author": "Brando Miranda;Patrick Yu;Yu-Xiong Wang;Oluwasanmi O Koyejo", "authorids": "~Brando_Miranda1;~Patrick_Yu1;~Yu-Xiong_Wang1;~Oluwasanmi_O_Koyejo1", "gender": "M;;;M", "homepage": "https://cbmm.mit.edu/about/people/miranda;;https://yxw.cs.illinois.edu/;https://cs.stanford.edu/~sanmi/", "dblp": ";230/7692;35/10700;14/8885", "google_scholar": "_NQJoBkAAAAJ;3MpLP34AAAAJ;T_Q-xDkAAAAJ;EaaOeJwAAAAJ", "orcid": ";;;0000-0002-4023-419X", "linkedin": "brando-miranda-40821046/;;;sanmi-koyejo-984754/", "or_profile": "~Brando_Miranda1;~Patrick_Yu1;~Yu-Xiong_Wang1;~Oluwasanmi_O_Koyejo1", "aff": "Stanford University;Department of Computer Science, University of Illinois at Urbana-Champaign;Department of Computer Science, University of Illinois Urbana-Champaign;Google", "aff_domain": "stanford.edu;cs.illinois.edu;cs.illinois.edu;google.com", "position": "PhD student;Undergrad student;Assistant Professor;Research Scientist", "bibtex": "@misc{\nmiranda2023the,\ntitle={The Curse of Low Task Diversity: On the Failure of Transfer Learning to Outperform {MAML} and their Empirical Equivalence},\nauthor={Brando Miranda and Patrick Yu and Yu-Xiong Wang and Oluwasanmi O Koyejo},\nyear={2023},\nurl={https://openreview.net/forum?id=8re-nA0wDxW}\n}", "github": "", "project": "", "reviewers": "nDz4;NZ7N;NP6p;Bxys", "site": "https://openreview.net/forum?id=8re-nA0wDxW", "pdf_size": 4769002, "recommendation": "1;3;3;6", "confidence": "4;4;4;4", "correctness": "2;1;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "108;90;79;84", "wc_strength_and_weaknesses": "563;818;456;312", "wc_clarity_quality_novelty_and_reproducibility": "48;2;101;18", "wc_summary_review": "55;24;34;15", "wc_review": "774;934;670;429", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.25, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 90.25, 10.96300597464035 ], "wc_strength_and_weaknesses_avg": [ 537.25, 184.94779668868728 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.25, 37.72515738867102 ], "wc_summary_review_avg": [ 32.0, 14.882876066137216 ], "wc_review_avg": [ 701.75, 183.41261543307212 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.46442036401282394, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_pFojgKbm6wJ:scholar.google.com/&scioq=The+Curse+of+Low+Task+Diversity:+On+the+Failure+of+Transfer+Learning+to+Outperform+MAML+and+their+Empirical+Equivalence&hl=en&as_sdt=0,5", "gs_version_total": 7, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Stanford University;University of Illinois Urbana-Champaign;Google", "aff_unique_dep": ";Department of Computer Science;Google", "aff_unique_url": "https://www.stanford.edu;https://illinois.edu;https://www.google.com", "aff_unique_abbr": "Stanford;UIUC;Google", "aff_campus_unique_index": "0;1;1;2", "aff_campus_unique": "Stanford;Urbana-Champaign;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Soft Constraints From Constrained Expert Demonstrations", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11754", "id": "8sSnD78NqTN", "poster": "/media/PosterPDFs/ICLR%202023/11754.png?t=1682911467.7604208", "openreview": "https://openreview.net/forum?id=8sSnD78NqTN", "slides": "https://iclr.cc/virtual/2023/poster/11754", "video": "https://iclr.cc/virtual/2023/poster/11754", "author_site": "Ashish Gaurav, Kasra Rezaee, Guiliang Liu, Pascal Poupart", "tldr": "", "abstract": "Inverse reinforcement learning (IRL) methods assume that the expert data is generated by an agent optimizing some reward function. However, in many settings, the agent may optimize a reward function subject to some constraints, where the constraints induce behaviors that may be otherwise difficult to express with just a reward function. We consider the setting where the reward function is given, and the constraints are unknown, and propose a method that is able to recover these constraints satisfactorily from the expert data. While previous work has focused on recovering hard constraints, our method can recover cumulative soft constraints that the agent satisfies on average per episode. In IRL fashion, our method solves this problem by adjusting the constraint function iteratively through a constrained optimization procedure, until the agent behavior matches the expert behavior. We demonstrate our approach on synthetic environments, robotics environments and real world highway driving scenarios.", "keywords": "inverse reinforcement learning;constraint learning", "primary_area": "", "supplementary_material": "/attachment/ecf58c5e058aaae3db43179a16bfda43e4e006cf.zip", "author": "Ashish Gaurav;Kasra Rezaee;Guiliang Liu;Pascal Poupart", "authorids": "~Ashish_Gaurav1;~Kasra_Rezaee1;~Guiliang_Liu1;~Pascal_Poupart2", "gender": "M;M;M;M", "homepage": ";;http://guiliang.me/;https://cs.uwaterloo.ca/~ppoupart", "dblp": "236/4881;189/9684;220/5411;26/2122", "google_scholar": "https://scholar.google.ca/citations?user=5CqEn6YAAAAJ;https://scholar.google.com/citations?hl=en;CuMylvEAAAAJ;https://scholar.google.ca/citations?user=KhAJWroAAAAJ", "orcid": ";;;", "linkedin": "ashishgaurav13/;kasrarezaee;;", "or_profile": "~Ashish_Gaurav1;~Kasra_Rezaee1;~Guiliang_Liu1;~Pascal_Poupart2", "aff": "University of Waterloo;Huawei Noah's Ark Lab;The Chinese University of Hong Kong, Shenzhen;University of Waterloo", "aff_domain": "uwaterloo.ca;huawei.com;cuhk.edu.hk;uwaterloo.ca", "position": "PhD student;senior researcher ;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ngaurav2023learning,\ntitle={Learning Soft Constraints From Constrained Expert Demonstrations},\nauthor={Ashish Gaurav and Kasra Rezaee and Guiliang Liu and Pascal Poupart},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8sSnD78NqTN}\n}", "github": "", "project": "", "reviewers": "8Rs7;5hKt;3D7X;EuC1", "pdf_size": 4571356, "recommendation": "5;6;6;8", "confidence": "3;3;4;3", "correctness": "3;4;3;3", "technical_novelty": "3;3;3;4", "empirical_novelty": "2;0;3;0", "wc_summary_paper": "65;78;137;98", "wc_strength_and_weaknesses": "133;213;126;252", "wc_clarity_quality_novelty_and_reproducibility": "13;61;1;107", "wc_summary_review": "27;54;133;50", "wc_review": "238;406;397;507", "wc_reply_reviewers": "0;0;30;136", "wc_reply_authors": "841;887;911;1053", "reply_reviewers": "0;0;1;2", "reply_authors": "2;2;2;3", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 94.5, 27.207535720825582 ], "wc_strength_and_weaknesses_avg": [ 181.0, 53.37134062397159 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.5, 42.00892762258994 ], "wc_summary_review_avg": [ 66.0, 40.03123780249619 ], "wc_review_avg": [ 387.0, 96.25746724280667 ], "wc_reply_reviewers_avg": [ 41.5, 55.91734972260399 ], "wc_reply_authors_avg": [ 923.0, 79.1580697086532 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.13245323570650439, "corr_recommendation_correctness": -0.13245323570650439, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2440991622304282478&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=8sSnD78NqTN", "email": "uwaterloo.ca;huawei.com;cuhk.edu.hk;uwaterloo.ca", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Waterloo;Huawei;Chinese University of Hong Kong", "aff_unique_dep": ";Noah's Ark Lab;", "aff_unique_url": "https://uwaterloo.ca;https://www.huawei.com;https://www.cuhk.edu.cn", "aff_unique_abbr": "UW;Huawei;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Canada;China" }, { "id": "8sqKEkAO3jv", "title": "A simple but effective and efficient global modeling paradigm for image restoration", "track": "main", "status": "Reject", "tldr": "This is the first attempt to propose a theoretically feasible, simple but effective global modeling paradigm for image restoration.", "abstract": "Global modelling-based image restoration frameworks (e.g., Transformer-like architecture) has gained popularity. Despite the remarkable advancement, the success may be at the cost of model parameters and FLOPs while the intrinsic characteristics of specific task are ignored. The objective of our work is orthogonal to previous studies and we thus tailor a simple yet effective global modelling paradigm for image restoration. The key insights which motivate our study are two-fold: 1) Fourier transform is capable of disentangling image degradation and content component, acting as the image degradation prior embedded into image restoration framework; 2) Fourier domain innately embraces global property where each pixel of Fourier space is involved with all the spatial pixels. We obey the de facto global modeling rule ``spatial interaction + channel evolution\" of previous studies. Differently, we customize the core designs: multi-scale Fourier period spatial modeling and Fourier channel evolution. Equipped with above designs, our image restoration paradigm is verified on mainstream image restoration tasks including image de-raining, image enhancement, image de-hazing, and guided image super-resolution. The extensive experiments suggest that our paradigm achieves the competitive performance with fewer computational resources. Our main focus is not to beat previous frameworks but hopes to provide an alternative global modelling-based customized image restoration framework. Code will be publicly available.", "keywords": "image restoration;image de-raining;image de-hazing;image enhancement", "primary_area": "", "supplementary_material": "", "author": "man zhou;Jie Huang;Jie Xiao;Hu Yu;Danfeng Hong;Chongyi Li", "authorids": "~man_zhou1;~Jie_Huang4;~Jie_Xiao3;~Hu_Yu2;~Danfeng_Hong2;~Chongyi_Li1", "gender": "M;M;M;M;M;", "homepage": ";;https://jiexiaou.github.io/;https://yuhuustc.github.io/;https://sites.google.com/view/danfeng-hong;", "dblp": "165/8237;;15/3437-2;;153/2550;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?hl=zh-CN;n7gL0_IAAAAJ;", "orcid": "0000-0003-2872-605X;0000-0002-3518-3404;0000-0002-5677-270X;0000-0003-0598-8989;;", "linkedin": ";;;;;", "or_profile": "~man_zhou1;~Jie_Huang4;~Jie_Xiao3;~Hu_Yu2;~Danfeng_Hong2;~Chongyi_Li1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;Chinese Academy of Sciences, Aerospace Information Research Institute;", "aff_domain": "ustc.edu.cn;ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;aircas.ac.cn;", "position": "Postdoc;PhD student;PhD student;PhD student;Full Professor;", "bibtex": "@misc{\nzhou2023a,\ntitle={A simple but effective and efficient global modeling paradigm for image restoration},\nauthor={man zhou and Jie Huang and Jie Xiao and Hu Yu and Danfeng Hong and Chongyi Li},\nyear={2023},\nurl={https://openreview.net/forum?id=8sqKEkAO3jv}\n}", "github": "", "project": "", "reviewers": "DwsU;vJqt;FGSi;8mcG", "site": "https://openreview.net/forum?id=8sqKEkAO3jv", "pdf_size": 25899388, "recommendation": "3;3;8;8", "confidence": "5;5;4;5", "correctness": "2;2;3;3", "technical_novelty": "2;2;4;4", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "106;98;92;51", "wc_strength_and_weaknesses": "177;302;56;254", "wc_clarity_quality_novelty_and_reproducibility": "41;2;90;25", "wc_summary_review": "56;19;23;54", "wc_review": "380;421;261;384", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1128;802;658;105", "reply_reviewers": "0;0;0;0", "reply_authors": "5;5;3;2", "recommendation_avg": [ 5.5, 2.5 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 3.0, 1.0 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 86.75, 21.22940178149163 ], "wc_strength_and_weaknesses_avg": [ 197.25, 92.94453991494068 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.5, 32.283896914715854 ], "wc_summary_review_avg": [ 38.0, 17.073371078963874 ], "wc_review_avg": [ 361.5, 60.18513105410671 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 673.25, 369.63317965247654 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.75, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8ABKv6fkbvwJ:scholar.google.com/&scioq=A+simple+but+effective+and+efficient+global+modeling+paradigm+for+image+restoration&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "University of Science and Technology of China;Chinese Academy of Sciences", "aff_unique_dep": ";Aerospace Information Research Institute", "aff_unique_url": "http://www.ustc.edu.cn;http://www.cas.ac.cn", "aff_unique_abbr": "USTC;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Linearly Mapping from Image to Text Space", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10815", "id": "8tYRqb05pVn", "poster": "/media/PosterPDFs/ICLR%202023/10815.png?t=1682629159.044836", "openreview": "https://openreview.net/forum?id=8tYRqb05pVn", "slides": "https://iclr.cc/virtual/2023/poster/10815", "video": "https://iclr.cc/virtual/2023/poster/10815", "author_site": "Jack Merullo, Louis Castricato, Carsten Eickhoff, Ellie Pavlick", "tldr": "Language models (LMs) can 'understand' images through a single tuned linear layer between a frozen image encoder and the LM input, showcasing the similarities in their conceptual representation spaces.", "abstract": "The extent to which text-only language models (LMs) learn to represent the physical, non-linguistic world is an open question. Prior work has shown that pretrained LMs can be taught to ``understand'' visual inputs when the models' parameters are updated on image captioning tasks. We test a stronger hypothesis: that the conceptual representations learned by text-only models are functionally equivalent (up to a linear transformation) to those learned by models trained on vision tasks. Specifically, we show that the image representations from vision models can be transferred as continuous prompts to frozen LMs by training only a single linear projection. Using these to prompt the LM achieves competitive performance on captioning and visual question answering tasks compared to models that tune both the image encoder and text decoder (such as the MAGMA model). We compare three image encoders with increasing amounts of linguistic supervision seen during pretraining: BEIT (no linguistic information), NF-ResNET (lexical category information), and CLIP (full natural language descriptions). We find that all three encoders perform equally well at transferring visual property information to the language model (e.g., whether an animal is large or small), but that image encoders pretrained with linguistic supervision more saliently encode category information (e.g., distinguishing hippo vs.\\ elephant) and thus perform significantly better on benchmark language-and-vision tasks. Our results indicate that LMs encode conceptual information structurally similarly to vision-based models, even those that are solely trained on images.", "keywords": "representation learning;deep learning;grounded language learning;nlp;dl;image;image captioning;language grounding;grounded", "primary_area": "", "supplementary_material": "", "author": "Jack Merullo;Louis Castricato;Carsten Eickhoff;Ellie Pavlick", "authorids": "~Jack_Merullo2;~Louis_Castricato2;~Carsten_Eickhoff1;~Ellie_Pavlick1", "gender": "M;M;F;M", "homepage": "https://louiscastricato.com;https://health-nlp.org;http://cs.brown.edu/people/epavlick/;https://jmerullo.github.io/", "dblp": ";42/8700;141/4059;248/8361", "google_scholar": ";QQi1_rAAAAAJ;sFyrSa8AAAAJ;7w0xLF4AAAAJ", "orcid": ";0000-0001-9895-4061;;", "linkedin": ";;;", "or_profile": "~Louis_Castricato2;~Carsten_Eickhoff1;~Ellie_Pavlick1;~jack_merullo1", "aff": "Georgia Institute of Technology;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Brown University;Brown University", "aff_domain": "gatech.edu;uni-tuebingen.de;brown.edu;brown.edu", "position": "MS student;Full Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nmerullo2023linearly,\ntitle={Linearly Mapping from Image to Text Space},\nauthor={Jack Merullo and Louis Castricato and Carsten Eickhoff and Ellie Pavlick},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8tYRqb05pVn}\n}", "github": "", "project": "", "reviewers": "s1Pu;Sxvp;N4fG;kxWt", "pdf_size": 21574775, "recommendation": "5;6;8;8", "confidence": "4;4;3;4", "correctness": "4;3;4;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;0;4;3", "wc_summary_paper": "37;47;170;104", "wc_strength_and_weaknesses": "250;402;193;52", "wc_clarity_quality_novelty_and_reproducibility": "28;12;7;69", "wc_summary_review": "67;30;31;336", "wc_review": "382;491;401;561", "wc_reply_reviewers": "0;0;116;0", "wc_reply_authors": "247;420;129;264", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 1.479019945774904 ], "wc_summary_paper_avg": [ 89.5, 53.04007918546125 ], "wc_strength_and_weaknesses_avg": [ 224.25, 125.40409682303047 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.0, 24.361855430159665 ], "wc_summary_review_avg": [ 116.0, 127.88862341897344 ], "wc_review_avg": [ 458.75, 71.97351943597033 ], "wc_reply_reviewers_avg": [ 29.0, 50.22947341949744 ], "wc_reply_authors_avg": [ 265.0, 103.49637674817414 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5555555555555555, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 115, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13264068773841650102&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=8tYRqb05pVn", "email": "gatech.edu;uni-tuebingen.de;brown.edu;brown.edu", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Georgia Institute of Technology;Eberhard Karls University of T\u00fcbingen;Brown University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.gatech.edu;https://www.uni-tuebingen.de/;https://www.brown.edu", "aff_unique_abbr": "Georgia Tech;Uni T\u00fcbingen;Brown", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Germany" }, { "id": "8taH4yjN62m", "title": "UNDERSTANDING THE ROLE OF POSITIONAL ENCODINGS IN SENTENCE REPRESENTATIONS", "track": "main", "status": "Reject", "tldr": "In this work, we investigate the role of positional encodings systematically.", "abstract": "Positional encodings are used to inject word-order information into transformer-based language models. While they can significantly enhance the quality of sentence representations, their specific contribution to language models are not fully understood, especially given recent findings that building natural-language understanding from language models with positional encodings is insensitive to word order. In this work, we investigate the role of positional encodings systematically. (1) We uncover the core function of existing positional encodings is to symmetrically combine local units by identifying two common properties, Locality, and Symmetry. (2) We reveal that positional and contextual encodings play a distinct role in understanding sentences. (3) Based on these findings, we propose a simplified new method to inject positional information into such models. Empirical studies demonstrate that this method can improve the performance of the BERT-based model on 10 downstream tasks. We hope these new probing results and findings can shed light on how to design and inject positional encodings into language models.\n", "keywords": "Positional Encodings;Sentence Representations;Pre-trained Language Models", "primary_area": "", "supplementary_material": "/attachment/f9628fb9f3ad08d1377762b53df8c378d9ca2474.zip", "author": "Lihu Chen;Gael Varoquaux;Fabian M. Suchanek", "authorids": "~Lihu_Chen1;~Gael_Varoquaux1;~Fabian_M._Suchanek1", "gender": "M;M;", "homepage": "https://chenlihu.com;http://gael-varoquaux.info;", "dblp": ";36/7585;", "google_scholar": "oRs8regAAAAJ;https://scholar.google.fr/citations?user=OGGu384AAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Lihu_Chen1;~Gael_Varoquaux1;~Fabian_M._Suchanek1", "aff": "Institut Polytechnique de Paris;INRIA;", "aff_domain": "telecom-paristech.fr;inria.fr;", "position": "PhD student;Full Professor;", "bibtex": "@misc{\nchen2023understanding,\ntitle={{UNDERSTANDING} {THE} {ROLE} {OF} {POSITIONAL} {ENCODINGS} {IN} {SENTENCE} {REPRESENTATIONS}},\nauthor={Lihu Chen and Gael Varoquaux and Fabian M. Suchanek},\nyear={2023},\nurl={https://openreview.net/forum?id=8taH4yjN62m}\n}", "github": "", "project": "", "reviewers": "gutD;LpaP;eCeS;WDLJ", "site": "https://openreview.net/forum?id=8taH4yjN62m", "pdf_size": 4629694, "recommendation": "5;5;5;6", "confidence": "4;3;4;3", "correctness": "2;3;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "114;120;53;240", "wc_strength_and_weaknesses": "970;159;191;142", "wc_clarity_quality_novelty_and_reproducibility": "72;69;35;81", "wc_summary_review": "83;58;14;122", "wc_review": "1239;406;293;585", "wc_reply_reviewers": "127;0;9;0", "wc_reply_authors": "2029;743;470;242", "reply_reviewers": "3;0;1;0", "reply_authors": "5;3;2;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 131.75, 67.77305880657889 ], "wc_strength_and_weaknesses_avg": [ 365.5, 349.4513556991874 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.25, 17.455300054711177 ], "wc_summary_review_avg": [ 69.25, 39.213358693180055 ], "wc_review_avg": [ 630.75, 366.28156860535586 ], "wc_reply_reviewers_avg": [ 34.0, 53.819141576208736 ], "wc_reply_authors_avg": [ 871.0, 691.6989952284158 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CI7QZoNZjxAJ:scholar.google.com/&scioq=UNDERSTANDING+THE+ROLE+OF+POSITIONAL+ENCODINGS+IN+SENTENCE+REPRESENTATIONS&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Institut Polytechnique de Paris;INRIA", "aff_unique_dep": ";", "aff_unique_url": "https://www.ipparis.fr;https://www.inria.fr", "aff_unique_abbr": "IP Paris;INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "id": "8thVleggPV_", "title": "AUTOJOIN: EFFICIENT ADVERSARIAL TRAINING FOR ROBUST MANEUVERING VIA DENOISING AUTOEN- CODER AND JOINT LEARNING", "track": "main", "status": "Reject", "tldr": "", "abstract": "As a result of increasingly adopted machine learning algorithms and ubiquitous sensors, many \u2018perception-to-control\u2019 systems are developed and deployed. For these systems to be trustworthy, we need to improve their robustness with adversarial training being one approach. We propose a gradient-free adversarial training technique, called AutoJoin, which is a very simple yet effective and efficient approach to produce robust models for imaged-based maneuvering. Compared to other SOTA methods with testing on over 5M perturbed and clean images, AutoJoin achieves significant performance increases up to the 40% range under gradient-free perturbations while improving on clean performance up to 300%. Regarding efficiency, AutoJoin demonstrates strong advantages over other SOTA techniques by saving up to 83% time per training epoch and 90% training data. Although not the focus of AutoJoin, it even demonstrates superb ability in defending gradient-based attacks. The core idea of AutoJoin is to use a decoder attachment to the original regression model creating a denoising autoencoder within the architecture. This architecture allows the tasks \u2018maneuvering\u2019 and \u2018denoising sensor input\u2019 to be jointly learnt and reinforce each other\u2019s performance.", "keywords": "autonomous driving;machine learning;robust training", "primary_area": "", "supplementary_material": "", "author": "Taylor Michael Villarreal;Bibek Poudel;Ryan Wickman;Yu Shen;Weizi Li", "authorids": "~Taylor_Michael_Villarreal2;~Bibek_Poudel1;~Ryan_Wickman1;~Yu_Shen1;~Weizi_Li1", "gender": "M;M;M;M;M", "homepage": "http://www.tmichaelvillarreal.com/home;https://poudel-bibek.github.io;;http://www.cs.umd.edu/~yushen/;http://weizi-li.github.io/", "dblp": ";298/8061;;;60/7775", "google_scholar": ";PzBn1jgAAAAJ;;krGKS_QAAAAJ;", "orcid": ";;;;", "linkedin": ";;ryan-wickman-771160144/;;", "or_profile": "~Taylor_Michael_Villarreal2;~Bibek_Poudel1;~Ryan_Wickman1;~Yu_Shen1;~Weizi_Li1", "aff": "University of Memphis;University of Tennessee, Knoxville;University of Memphis;University of Maryland, College Park;University of Memphis", "aff_domain": "memphis.edu;utk.edu;memphis.edu;umd.edu;memphis.edu", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nvillarreal2023autojoin,\ntitle={{AUTOJOIN}: {EFFICIENT} {ADVERSARIAL} {TRAINING} {FOR} {ROBUST} {MANEUVERING} {VIA} {DENOISING} {AUTOEN}- {CODER} {AND} {JOINT} {LEARNING}},\nauthor={Taylor Michael Villarreal and Bibek Poudel and Ryan Wickman and Yu Shen and Weizi Li},\nyear={2023},\nurl={https://openreview.net/forum?id=8thVleggPV_}\n}", "github": "", "project": "", "reviewers": "wNFL;LDET;mroR;Jfom", "site": "https://openreview.net/forum?id=8thVleggPV_", "pdf_size": 5098533, "recommendation": "5;5;6;6", "confidence": "4;3;2;2", "correctness": "2;3;3;4", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "93;41;53;52", "wc_strength_and_weaknesses": "514;63;193;89", "wc_clarity_quality_novelty_and_reproducibility": "10;9;34;17", "wc_summary_review": "84;19;37;26", "wc_review": "701;132;317;184", "wc_reply_reviewers": "0;0;0;59", "wc_reply_authors": "581;467;466;445", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 59.75, 19.76581645164196 ], "wc_strength_and_weaknesses_avg": [ 214.75, 179.48868348728843 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 17.5, 10.012492197250394 ], "wc_summary_review_avg": [ 41.5, 25.362373705944798 ], "wc_review_avg": [ 333.5, 222.6437737732632 ], "wc_reply_reviewers_avg": [ 14.75, 25.54774941164094 ], "wc_reply_authors_avg": [ 489.75, 53.410556072746516 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.9045340337332909, "corr_recommendation_correctness": 0.7071067811865475, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7326885346363040672&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "University of Memphis;University of Tennessee;University of Maryland", "aff_unique_dep": ";;", "aff_unique_url": "https://www.memphis.edu;https://www.utk.edu;https://www/umd.edu", "aff_unique_abbr": "UM;UT;UMD", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Knoxville;College Park", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "8u9eXwu5GAb", "title": "Transferring Pretrained Diffusion Probabilistic Models", "track": "main", "status": "Reject", "tldr": "We propose a new tuning approach for transferring pretrained diffusion probabilistic models to new tasks with limited data and training resources.", "abstract": "Diffusion Probabilistic Models (DPMs) achieve impressive performance in visual generative tasks recently. However, the success of DPMs heavily relies on large amounts of data and optimization steps, which limits the application of DPMs to small datasets and limited computational resources. In this paper, we investigate transfer learning in DPMs to leverage the DPMs pretrained on large-scale datasets for generation with limited data. Firstly, we show that previous methods like training from scratch or determining the transferable parts is not suitable for the DPM due to its U-Net based denoising architecture with the external denoising timestep input. To address it, we present a condition-based tuning approach to take full advantages of existing pretrained models. Concretely, we obtain the semantic embeddings of condition images by the pretrained CLIP model, and then inject these semantic informations to the pretrained DPM via a ''Attention-NonLinear'' (ANL) module. The adaptation to a new task can be achieved by only tuning the ANL module inserted into the pretrained DPM hierarchically. To further enhance the diversity of generated images, we introduce a masked sampling strategy based on the condition mechanism. Extensive experiments validate the effectiveness and efficiency of our proposed tuning approach in generative task transfer and data augmentation for semi-supervised learning. ", "keywords": "transfer learning;diffusion probabilistic models;cross-attention;fine-tuning", "primary_area": "", "supplementary_material": "", "author": "Fuming You;Zhou Zhao", "authorids": "~Fuming_You3;~Zhou_Zhao2", "gender": "M;M", "homepage": "https://dblp.uni-trier.de/pid/75/7785.html?;", "dblp": "75/7785;277/1388", "google_scholar": "https://scholar.google.com.hk/citations?user=IIoFY90AAAAJ;W6Nf_CAAAAAJ", "orcid": "0000-0001-6121-0384;", "linkedin": ";", "or_profile": "~Zhou_Zhao2;~fuming_you2", "aff": "Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn", "position": "Associate Professor;MS student", "bibtex": "@misc{\nyou2023transferring,\ntitle={Transferring Pretrained Diffusion Probabilistic Models},\nauthor={Fuming You and Zhou Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=8u9eXwu5GAb}\n}", "github": "", "project": "", "reviewers": "wtTC;uRV2;2H8p;YaAB", "site": "https://openreview.net/forum?id=8u9eXwu5GAb", "pdf_size": 15427129, "recommendation": "5;5;6;6", "confidence": "4;4;4;4", "correctness": "3;3;4;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "80;74;65;69", "wc_strength_and_weaknesses": "360;147;393;89", "wc_clarity_quality_novelty_and_reproducibility": "8;41;72;61", "wc_summary_review": "26;65;34;57", "wc_review": "474;327;564;276", "wc_reply_reviewers": "0;271;0;155", "wc_reply_authors": "1026;585;616;356", "reply_reviewers": "0;1;0;2", "reply_authors": "3;3;2;3", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 72.0, 5.612486080160912 ], "wc_strength_and_weaknesses_avg": [ 247.25, 131.3856441929635 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.5, 24.336187047275914 ], "wc_summary_review_avg": [ 45.5, 16.00781059358212 ], "wc_review_avg": [ 410.25, 114.73529317520394 ], "wc_reply_reviewers_avg": [ 106.5, 114.12383624817384 ], "wc_reply_authors_avg": [ 645.75, 241.41289837123452 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:P5rx300Y11gJ:scholar.google.com/&scioq=Transferring+Pretrained+Diffusion+Probabilistic+Models&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "8uf1JIb07M", "title": "MERMADE: $K$-shot Robust Adaptive Mechanism Design via Model-Based Meta-Learning", "track": "main", "status": "Reject", "tldr": "We propose MERMADE, a deep RL approach to mechanism design that learns a world model together with a meta-learned mechanism which can be quickly adapted to perform well on unseen test agents that learn.", "abstract": "Mechanism design (MD) studies how rules and rewards shape the behavior of intelligent agents, e.g., in auctions or the economy. Simulations with AI agents are powerful tools for MD, but real-world agents may behave and learn differently than simulated agents under a given mechanism. Also, the mechanism designer may not fully observe an agent's learning strategy or rewards, and executing a mechanism may be costly, e.g., enforcing a tax might require extra labor. Hence, it is key to design robust adaptive mechanisms that generalize well to agents with unseen (learning) behavior, are few-shot adaptable, and are cost-efficient. Here, we introduce MERMADE, a model-based meta-learning framework to learn mechanisms that can quickly adapt when facing out-of-distribution agents with different learning strategies and reward functions. First, we show that meta-learning allows adapting to the theoretically known and appropriate Stackelberg equilibrium in a simple matrix game at meta-test time, with few interactions with the agent. Second, with bandit agents, we show empirically that our approach yields strong meta-test time performance against agents with various unseen explore-exploit behaviors. Finally, we outperform baselines that separately use either meta-learning or agent behavior modeling to learn a cost-effective mechanism that is $K$-shot adaptable with only partial information about the agents.", "keywords": "Mechanism design;Robustness;Meta-learning;Adaptive agents;Simulation based learning", "primary_area": "", "supplementary_material": "/attachment/42296bd35bdfa5f48c84e5ca5121d07f47ab629d.zip", "author": "Arundhati Banerjee;Soham Rajesh Phade;Stefano Ermon;Stephan Zheng", "authorids": "~Arundhati_Banerjee1;~Soham_Rajesh_Phade1;~Stefano_Ermon1;~Stephan_Zheng1", "gender": ";M;M;M", "homepage": "https://banerjee-arundhati.github.io;;http://cs.stanford.edu/~ermon/;http://www.stephanzheng.com", "dblp": "187/0739;206/9094.html;47/8135;https://dblp.org/pers/hd/z/Zheng:Stephan", "google_scholar": "asP4w6cAAAAJ;;;7mnKGGEAAAAJ", "orcid": ";;;", "linkedin": ";;;stephanzheng", "or_profile": "~Arundhati_Banerjee1;~Soham_Rajesh_Phade1;~Stefano_Ermon1;~Stephan_Zheng1", "aff": "Carnegie Mellon University;SalesForce.com;Stanford University;SalesForce.com", "aff_domain": "cmu.edu;salesforce.com;stanford.edu;salesforce.com", "position": "PhD student;Researcher;Associate Professor;Lead Research Scientist", "bibtex": "@misc{\nbanerjee2023mermade,\ntitle={{MERMADE}: \\$K\\$-shot Robust Adaptive Mechanism Design via Model-Based Meta-Learning},\nauthor={Arundhati Banerjee and Soham Rajesh Phade and Stefano Ermon and Stephan Zheng},\nyear={2023},\nurl={https://openreview.net/forum?id=8uf1JIb07M}\n}", "github": "", "project": "", "reviewers": "2WWc;9dtX;C92e;4J8Y", "site": "https://openreview.net/forum?id=8uf1JIb07M", "pdf_size": 1664557, "recommendation": "5;5;6;6", "confidence": "3;3;4;3", "correctness": "3;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;0;3;3", "wc_summary_paper": "120;80;51;170", "wc_strength_and_weaknesses": "429;90;240;54", "wc_clarity_quality_novelty_and_reproducibility": "106;61;73;36", "wc_summary_review": "83;41;83;206", "wc_review": "738;272;447;466", "wc_reply_reviewers": "206;0;847;0", "wc_reply_authors": "1432;199;1426;434", "reply_reviewers": "1;0;2;0", "reply_authors": "4;2;4;2", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 105.25, 44.69549753610536 ], "wc_strength_and_weaknesses_avg": [ 203.25, 147.8299276195453 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 69.0, 25.18928343562 ], "wc_summary_review_avg": [ 103.25, 61.75101213745407 ], "wc_review_avg": [ 480.75, 166.66639583311328 ], "wc_reply_reviewers_avg": [ 263.25, 347.3624727859934 ], "wc_reply_authors_avg": [ 872.75, 562.4248283104151 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TglZNU0XSdAJ:scholar.google.com/&scioq=MERMADE:+%24K%24-shot+Robust+Adaptive+Mechanism+Design+via+Model-Based+Meta-Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Carnegie Mellon University;Salesforce;Stanford University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;https://www.salesforce.com;https://www.stanford.edu", "aff_unique_abbr": "CMU;Salesforce;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Self-supervised learning with rotation-invariant kernels", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11124", "id": "8uu6JStuYm", "poster": "/media/PosterPDFs/ICLR%202023/11124.png?t=1682325817.6139667", "openreview": "https://openreview.net/forum?id=8uu6JStuYm", "slides": "https://iclr.cc/virtual/2023/poster/11124", "video": "https://iclr.cc/virtual/2023/poster/11124", "author_site": "L\u00e9on Zheng, Gilles Puy, Elisa Riccietti, Patrick Perez, R\u00e9mi Gribonval", "tldr": "A regularization loss based on kernel mean embeddings with rotation-invariant kernels on the hypersphere for self-supervised learning of image representations", "abstract": "We introduce a regularization loss based on kernel mean embeddings with rotation-invariant kernels on the hypersphere (also known as dot-product kernels) for self-supervised learning of image representations. Besides being fully competitive with the state of the art, our method significantly reduces time and memory complexity for self-supervised training, making it implementable for very large embedding dimensions on existing devices and more easily adjustable than previous methods to settings with limited resources. Our work follows the major paradigm where the model learns to be invariant to some predefined image transformations (cropping, blurring, color jittering, etc.), while avoiding a degenerate solution by regularizing the embedding distribution. Our particular contribution is to propose a loss family promoting the embedding distribution to be close to the uniform distribution on the hypersphere, with respect to the maximum mean discrepancy pseudometric. We demonstrate that this family encompasses several regularizers of former methods, including uniformity-based and information-maximization methods, which are variants of our flexible regularization loss with different kernels. Beyond its practical consequences for state of the art self-supervised learning with limited resources, the proposed generic regularization approach opens perspectives to leverage more widely the literature on kernel methods in order to improve self-supervised learning methods.", "keywords": "Self-supervised learning;maximum mean discrepancy;rotation-invariant kernel;hypersphere", "primary_area": "", "supplementary_material": "", "author": "L\u00e9on Zheng;Gilles Puy;Elisa Riccietti;Patrick Perez;R\u00e9mi Gribonval", "authorids": "~L\u00e9on_Zheng1;~Gilles_Puy2;~Elisa_Riccietti1;~Patrick_Perez1;~R\u00e9mi_Gribonval1", "gender": "M;;F;;", "homepage": ";;http://perso.ens-lyon.fr/elisa.riccietti/;;", "dblp": ";;179/5701;;", "google_scholar": ";;NtPpissAAAAJ;;", "orcid": ";;;;", "linkedin": "leon-zheng/;;;;", "or_profile": "~L\u00e9on_Zheng1;~Gilles_Puy2;~Elisa_Riccietti1;~Patrick_Perez1;~R\u00e9mi_Gribonval1", "aff": "Valeo;;ENS Lyon;;", "aff_domain": "valeo.com;;ens.fr;;", "position": "PhD student;;Associate Professor;;", "bibtex": "@inproceedings{\nzheng2023selfsupervised,\ntitle={Self-supervised learning with rotation-invariant kernels},\nauthor={L{\\'e}on Zheng and Gilles Puy and Elisa Riccietti and Patrick Perez and R{\\'e}mi Gribonval},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8uu6JStuYm}\n}", "github": "", "project": "", "reviewers": "uDSt;3r4M;7yz3;w91x", "pdf_size": 605079, "recommendation": "6;6;8;8", "confidence": "3;3;3;4", "correctness": "3;3;4;4", "technical_novelty": "2;4;2;4", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "68;31;93;173", "wc_strength_and_weaknesses": "246;216;212;153", "wc_clarity_quality_novelty_and_reproducibility": "10;17;65;52", "wc_summary_review": "11;16;125;33", "wc_review": "335;280;495;411", "wc_reply_reviewers": "50;13;0;0", "wc_reply_authors": "749;729;761;43", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 1.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 91.25, 52.09786463954161 ], "wc_strength_and_weaknesses_avg": [ 206.75, 33.699962907991456 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.0, 23.097618924902193 ], "wc_summary_review_avg": [ 46.25, 46.19185534269001 ], "wc_review_avg": [ 380.25, 80.94867200887239 ], "wc_reply_reviewers_avg": [ 15.75, 20.474068965401088 ], "wc_reply_authors_avg": [ 570.5, 304.76671406175575 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10036989327569331075&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "pdf": "https://openreview.net/pdf?id=8uu6JStuYm", "email": "valeo.com;;ens.fr;;", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Valeo;Ecole Normale Sup\u00e9rieure de Lyon", "aff_unique_dep": ";", "aff_unique_url": "https://www.valeo.com;https://www.ens-lyon.fr", "aff_unique_abbr": ";ENS Lyon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "id": "8vJcsZ-3Ly", "title": "Does the Half Adversarial Robustness Represent the Whole? It Depends... A Theoretical Perspective of Subnetwork Robustness", "track": "main", "status": "Reject", "tldr": "We prove with theory and experimental results that if a subnetwork is adversarially robust and highly correlated with the rest of the network, then the remaining layers are also robust.", "abstract": "Adversarial robustness of deep neural networks has been studied extensively and can bring security against adversarial attacks/examples. However, adversarially robust training approaches require a training mechanism on the entire deep network which can come at the cost of efficiency and computational complexity such as runtime. As a pilot study, we develop in this paper a novel theoretical framework that aims to answer the question of how can we make a whole model robust to adversarial examples by making part of a model robust? Toward promoting subnetwork robustness, we propose for the first time a new concept of semirobustness, which indicates adversarial robustness of a part of the network. We provide a theoretical analysis to show that if a subnetwork is robust and highly correlated with the rest of the network, then the remaining layers are also guaranteed to be robust. To guide the empirical investigation of our theoretical findings, we implemented our method at multiple layer depths and across multiple common image classification datasets. Experiments demonstrate that our method, with sufficient dependency between subnetworks, successfully utilizes subnetwork robustness to match fully-robust models' performance across AlexNet, VGG16, and ResNet50 benchmarks, for attack types FGSM, I-FGSM, PGD, and C&W.", "keywords": "Adversarial Learning;Adversarial Robustness;Subnetworks;Semirobustness;Information-Theoretic Measures;Mutual Dependency", "primary_area": "", "supplementary_material": "/attachment/951ca01b7a7c4b6c53a1f4b77ae4e0d1a0ee4e10.zip", "author": "Jovon Craig;Joshua Andle;Theodore Stein Nowak;Salimeh Yasaei Sekeh", "authorids": "~Jovon_Craig1;~Joshua_Andle1;~Theodore_Stein_Nowak1;~Salimeh_Yasaei_Sekeh1", "gender": ";M;M;F", "homepage": "https://salimehyasaei.wixsite.com/sekeh-lab;;http://theodorenowak.com;https://www.salimeh.info", "dblp": ";;;151/6376", "google_scholar": ";;44IvvoQAAAAJ;s17L6jAAAAAJ", "orcid": ";0000-0002-0156-732X;;", "linkedin": ";;tsnowak/;", "or_profile": "~Jovon_Craig1;~Joshua_Andle1;~Theodore_Stein_Nowak1;~Salimeh_Yasaei_Sekeh1", "aff": "University of Maine;Cisco;Pacific Northwest National Laboratory;University of Maine", "aff_domain": "umaine.edu;cisco.com;pnnl.gov;umaine.edu", "position": "PhD student;Intern;Data Scientist;Assistant Professor", "bibtex": "@misc{\ncraig2023does,\ntitle={Does the Half Adversarial Robustness Represent the Whole? It Depends... A Theoretical Perspective of Subnetwork Robustness},\nauthor={Jovon Craig and Joshua Andle and Theodore Stein Nowak and Salimeh Yasaei Sekeh},\nyear={2023},\nurl={https://openreview.net/forum?id=8vJcsZ-3Ly}\n}", "github": "", "project": "", "reviewers": "WXTT;fA2z;6THo;3nVE", "site": "https://openreview.net/forum?id=8vJcsZ-3Ly", "pdf_size": 401976, "recommendation": "3;5;5;8", "confidence": "4;4;3;3", "correctness": "3;3;3;4", "technical_novelty": "2;3;2;4", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "54;73;41;52", "wc_strength_and_weaknesses": "131;461;208;422", "wc_clarity_quality_novelty_and_reproducibility": "67;195;17;19", "wc_summary_review": "24;60;5;148", "wc_review": "276;789;271;641", "wc_reply_reviewers": "324;35;0;35", "wc_reply_authors": "2106;818;783;793", "reply_reviewers": "2;1;0;1", "reply_authors": "3;1;1;1", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 55.0, 11.510864433221338 ], "wc_strength_and_weaknesses_avg": [ 305.5, 139.38167024397433 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 74.5, 72.39302452584779 ], "wc_summary_review_avg": [ 59.25, 54.91527565259051 ], "wc_review_avg": [ 494.25, 226.87372589173916 ], "wc_reply_reviewers_avg": [ 98.5, 130.97423410732355 ], "wc_reply_authors_avg": [ 1125.0, 566.5240506809927 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.7001400420140049, "corr_recommendation_correctness": 0.8892972917998875, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:a8gJVPtFUHsJ:scholar.google.com/&scioq=Does+the+Half+Adversarial+Robustness+Represent+the+Whole%3F+It+Depends...+A+Theoretical+Perspective+of+Subnetwork+Robustness&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Maine;Cisco Systems;Pacific Northwest National Laboratory", "aff_unique_dep": ";;", "aff_unique_url": "https://www.umaine.edu;https://www.cisco.com;https://www.pnnl.gov", "aff_unique_abbr": "UMaine;Cisco;PNNL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "8vz6hO1S4o5", "title": "Fair Clustering via Equalized Confidence", "track": "main", "status": "Withdraw", "tldr": "fair clustering based on equality of predicted confidence between different demographic groups", "abstract": "Fair clustering aims at eliminating effects of sensitive information in clustering assignment. Existing work on fair clustering addresses this problem as a vanilla clustering with constraints that the distribution of protected groups on each cluster should be similar. However, existing criteria for fair clustering does not take into account clustering accuracy, and may restrain the performance of clustering algorithms. To tackle this problem, in this work, we propose a novel metric, equalized confidence, for fair clustering based on the predicted clustering confidence. Instead of enforcing similar distribution of sensitive attributes across different clusters, equalized confidence requires similar predicted confidence across different sensitive groups, bypassing the problem of disparities in statistical features across demographic groups. In light of the new metric, we propose a fair clustering method to learn a fair and good representation for clustering. Compared with conventional methods on fair clustering which try to adjust clustering assignment, our method focuses on learning a fair representation for downstream tasks. Our method proposes to eliminate the disparities of predicted soft labels of samples in different demographic groups using Sinkhorn divergence, as well as to learn clustering-favorable representations for clustering. Experimental results show that our method performs better or comparably than state-of-the-art methods, and that our proposed metric fits better under clustering accuracy.", "keywords": "fair clustering;Sinkhorn divergence;equalized confidence", "primary_area": "", "supplementary_material": "/attachment/f0037fe3eddc95cab0b53dec4a1b56d6db47f843.zip", "author": "Junyi Chai;Xiaoqian Wang", "authorids": "~Junyi_Chai1;~Xiaoqian_Wang1", "gender": "M;F", "homepage": ";https://engineering.purdue.edu/~joywang/", "dblp": "323/9078;151/3215-1", "google_scholar": "fucMzpYAAAAJ;I3tc214AAAAJ", "orcid": "0000-0002-4324-5361;", "linkedin": "junyi-chai-260869256/?trk=opento_sprofile_details;", "or_profile": "~Junyi_Chai1;~Xiaoqian_Wang1", "aff": "Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nchai2023fair,\ntitle={Fair Clustering via Equalized Confidence},\nauthor={Junyi Chai and Xiaoqian Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=8vz6hO1S4o5}\n}", "github": "", "project": "", "reviewers": "nAzw;H27L;bamj;dugb", "site": "https://openreview.net/forum?id=8vz6hO1S4o5", "pdf_size": 560256, "recommendation": "3;3;5;6", "confidence": "5;3;5;2", "correctness": "3;4;2;4", "technical_novelty": "2;2;1;3", "empirical_novelty": "2;3;0;2", "wc_summary_paper": "52;254;62;111", "wc_strength_and_weaknesses": "254;160;386;87", "wc_clarity_quality_novelty_and_reproducibility": "38;88;20;41", "wc_summary_review": "15;31;55;54", "wc_review": "359;533;523;293", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 1.299038105676658 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 119.75, 80.66094160124837 ], "wc_strength_and_weaknesses_avg": [ 221.75, 111.79082028503056 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.75, 25.13339412017406 ], "wc_summary_review_avg": [ 38.75, 16.7388022271607 ], "wc_review_avg": [ 427.0, 103.72077901751413 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.4074074074074074, "corr_recommendation_correctness": -0.058025885318565944, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DG57jPHOCUEJ:scholar.google.com/&scioq=Fair+Clustering+via+Equalized+Confidence&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Trainable Weight Averaging: Efficient Training by Optimizing Historical Solutions", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11905", "id": "8wbnpOJY-f", "poster": "/media/PosterPDFs/ICLR%202023/11905.png?t=1680768391.872985", "openreview": "https://openreview.net/forum?id=8wbnpOJY-f", "slides": "https://iclr.cc/virtual/2023/poster/11905", "video": "https://iclr.cc/virtual/2023/poster/11905", "author_site": "Tao Li, Zhehao Huang, Qinghua Tao, Yingwen Wu, Xiaolin Huang", "tldr": "We propose trainable weight averaging (TWA) to optimize historical solutions in DNNs' training to achieve efficiency and better performance.", "abstract": "Stochastic gradient descent (SGD) and its variants are considered as the de-facto methods to train deep neural networks (DNNs). While recent improvements to SGD mainly focus on the descent algorithm itself, few works pay attention to utilizing the historical solutions---as an iterative method, SGD has gone through substantial explorations before convergence. Recently, an interesting attempt is stochastic weight averaging (SWA), which significantly improves the generalization by simply averaging the solutions at the tail stage of training. In this paper, we realize that the averaging coefficients could be determined in a trainable manner and propose Trainable Weight Averaging (TWA), a novel optimization method in the reduced subspace spanned by historical solutions. TWA has much greater flexibility and can be applied to the head stage of training to achieve training efficiency while preserving good generalization capability. Further, we propose a distributed training scheme to resolve the memory burden of large-scale training with efficient parallel computation. In the extensive numerical experiments, (i) TWA achieves consistent improvements over SWA with less sensitivity to learning rate; (ii) applying TWA in the head stage of training largely speeds up the convergence, resulting in over $40\\%$ time saving on CIFAR and $30\\%$ on ImageNet with improved generalization compared with regular training.", "keywords": "efficient training;weight averaging;optimization", "primary_area": "", "supplementary_material": "/attachment/99bb915fb0009e5e0c35c9e23bcb3b36a54218c6.zip", "author": "Tao Li;Zhehao Huang;Qinghua Tao;Yingwen Wu;Xiaolin Huang", "authorids": "~Tao_Li12;~Zhehao_Huang1;~Qinghua_Tao1;~Yingwen_Wu1;~Xiaolin_Huang1", "gender": "M;M;F;F;M", "homepage": "https://nblt.github.io/;https://github.com/K1nght;https://qinghua-tao.github.io/;https://github.com/snowien;http://www.pami.sjtu.edu.cn/en/xiaolin", "dblp": ";258/1555;182/9643.html;236/4329;61/2227", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;_dZHZD8AAAAJ;https://scholar.google.com.hk/citations?user=PcJzfBEAAAAJ;DR-gBcEAAAAJ", "orcid": ";;0000-0001-9705-7748;;", "linkedin": ";;;;", "or_profile": "~Tao_Li12;~Zhehao_Huang1;~Qinghua_Tao1;~Yingwen_Wu1;~Xiaolin_Huang1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;(ESAT) Department of Electrical Engineering, KU Leuven, Belgium, KU Leuven;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu;sjtu.edu.cn;esat.kuleuven.be;sjtu.edu;sjtu.edu.cn", "position": "PhD student;PhD student;Postdoc;PhD student;Associate Professor", "bibtex": "@inproceedings{\nli2023trainable,\ntitle={Trainable Weight Averaging: Efficient Training by Optimizing Historical Solutions},\nauthor={Tao Li and Zhehao Huang and Qinghua Tao and Yingwen Wu and Xiaolin Huang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=8wbnpOJY-f}\n}", "github": "", "project": "", "reviewers": "nodd;Tq6j;zFxA;NH8Y", "pdf_size": 370572, "recommendation": "5;6;6;8", "confidence": "3;4;4;3", "correctness": "3;3;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "106;49;47;18", "wc_strength_and_weaknesses": "207;356;201;176", "wc_clarity_quality_novelty_and_reproducibility": "49;13;29;7", "wc_summary_review": "74;25;47;17", "wc_review": "436;443;324;218", "wc_reply_reviewers": "0;0;56;0", "wc_reply_authors": "1237;1362;906;723", "reply_reviewers": "0;0;1;0", "reply_authors": "3;3;4;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 55.0, 31.898275815473163 ], "wc_strength_and_weaknesses_avg": [ 235.0, 70.8201948599409 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 24.5, 16.27114009527298 ], "wc_summary_review_avg": [ 40.75, 22.117583502724706 ], "wc_review_avg": [ 355.25, 92.24254712441542 ], "wc_reply_reviewers_avg": [ 14.0, 24.24871130596428 ], "wc_reply_authors_avg": [ 1057.0, 254.8440699722087 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.2294157338705618, "corr_recommendation_correctness": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=427090211572423446&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=8wbnpOJY-f", "email": "sjtu.edu;sjtu.edu.cn;esat.kuleuven.be;sjtu.edu;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;KU Leuven", "aff_unique_dep": ";Department of Electrical Engineering", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.kuleuven.be", "aff_unique_abbr": "SJTU;KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;Belgium" }, { "id": "8xZogWcm73f", "title": "Automatic Dictionary Generation: Could Brothers Grimm Create a Dictionary with BERT?", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The creation of the most famous German dictionary, also referred to as ``Deutsches W\u00f6rterbuch'' or in English ``The German Dictionary'', by the two brothers Jacob and Wilhelm Grimm, took more than a lifetime to be finished (1838--1961). In our work we examine the question, if it would be possible for them to create a dictionary using present technology, i.e., language models such as BERT. Starting with the definition of the task of Automatic Dictionary Generation, we propose a method based on contextualized word embeddings and hierarchical clustering to create a dictionary given unannotated text corpora. We justify our design choices by running variants of our method on English texts, where ground truth dictionaries are available. Finally, we apply of our approach to Shakespeare's work and automatically generate a dictionary tailored to Shakespearean vocabulary and contexts without human intervention.", "keywords": "dictionary generation;natural language processing;transformers", "primary_area": "", "supplementary_material": "/attachment/fbc3ae65d462cd726a46e46641f5877fef157c80.zip", "author": "Hendryk Tobias Weiland;Maike Behrendt;Stefan Harmeling", "authorids": "~Hendryk_Tobias_Weiland1;~Maike_Behrendt1;~Stefan_Harmeling1", "gender": "M;F;Unspecified", "homepage": ";;", "dblp": ";274/6560;67/3271", "google_scholar": ";https://scholar.google.de/citations?hl=de;https://scholar.google.de/citations?user=TA2fG64AAAAJ", "orcid": ";;0000-0001-9709-8160", "linkedin": "hendryk-w41352/;;", "or_profile": "~Hendryk_Tobias_Weiland1;~Maike_Behrendt1;~Stefan_Harmeling1", "aff": ";Heinrich-Heine Universit\u00e4t D\u00fcsseldorf;Technische Universit\u00e4t Dortmund", "aff_domain": ";uni-duesseldorf.de;tu-dortmund.de", "position": ";PhD student;Full Professor", "bibtex": "@misc{\nweiland2023automatic,\ntitle={Automatic Dictionary Generation: Could Brothers Grimm Create a Dictionary with {BERT}?},\nauthor={Hendryk Tobias Weiland and Maike Behrendt and Stefan Harmeling},\nyear={2023},\nurl={https://openreview.net/forum?id=8xZogWcm73f}\n}", "github": "", "project": "", "reviewers": "rS8Z;XJCG;4KJg;xcJF", "site": "https://openreview.net/forum?id=8xZogWcm73f", "pdf_size": 1730144, "recommendation": "1;3;3;3", "confidence": "5;4;4;4", "correctness": "1;3;3;2", "technical_novelty": "1;1;1;2", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "130;113;55;42", "wc_strength_and_weaknesses": "129;178;30;140", "wc_clarity_quality_novelty_and_reproducibility": "62;83;104;3", "wc_summary_review": "76;55;15;8", "wc_review": "397;429;204;193", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 1.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 85.0, 37.275997639231605 ], "wc_strength_and_weaknesses_avg": [ 119.25, 54.64144489304799 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.0, 37.689521090085506 ], "wc_summary_review_avg": [ 38.5, 28.111385593741193 ], "wc_review_avg": [ 305.75, 107.91518660503719 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.8703882797784891, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2501505895518491210&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Heinrich-Heine Universit\u00e4t;Technische Universit\u00e4t Dortmund", "aff_unique_dep": ";", "aff_unique_url": "https://www.hhu.de;https://www.tu-dortmund.de", "aff_unique_abbr": "HHU;TU Dortmund", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "8xoV4ZrIgbk", "title": "Homeomorphism Alignment in Two Spaces for Unsupervised Domain Adaptation", "track": "main", "status": "Reject", "tldr": "A new appraoch uses Homeomorphism property to do Unsupervised Domain Adaptation.", "abstract": "The existing unsupervised domain adaptation methods always align the features from the source and target domains explicitly or implicitly in a common space, i.e., the domain invariant space. Explicit distribution matching always ignores the discriminability of the learned features, while implicit distribution matching such as self-supervised learning suffers from the pseudo-label noises. It is difficult to find a common space which maintains discriminative structure of the source and target domain data when aligning the data distributions. We propose a novel approach dubbed as HomeomorphisM Alignment (HMA) so that the source and target features can be aligned in two different spaces. Specifically, an invertible neural network based homeomorphism is constructed. Distribution matching method is used as a sewing up tool for connecting homeomorphism mapping between the source and target feature spaces. Theoretically, we show this mapping can preserve data topological structure, i.e., the samples in the same cluster are still in the same projected cluster. Based on this property, we adapt the model by the cross entropy of transformed and original source features and prediction consistency between target features and transformed target features. Extensive experiments demonstrate that our method can achieve the state-of-the-art results.", "keywords": "Homeomorphism Alignment;Unsupervised Domain Adaptation;Self-supervised Learning", "primary_area": "", "supplementary_material": "", "author": "Lihua Zhou;Mao Ye;Siying Xiao;Xu-Qian Fan;Xiatian Zhu;Ferrante Neri", "authorids": "~Lihua_Zhou1;~Mao_Ye1;~Siying_Xiao1;~Xu-Qian_Fan1;~Xiatian_Zhu3;~Ferrante_Neri1", "gender": "M;M;F;M;;M", "homepage": ";http://en.uestc.edu.cn/index.php?m=content&c=index&a=show&catid=79&id=5422;;;https://x-up-lab.github.io;https://www.surrey.ac.uk/people/ferrante-neri", "dblp": "dblp.uni-trier.de/pers/hd/y/Zhou_0001:Lihua;dblp.uni-trier.de/pers/hd/y/Ye_0001:Mao;;;128/7935;", "google_scholar": "7wECe98AAAAJ;V5gL_H0AAAAJ;TosYOUsAAAAJ;;ZbA-z1cAAAAJ;h0JDq7sAAAAJ", "orcid": "0000-0003-0370-3337;;;0000-0001-8505-0637;0000-0002-9284-2955;0000-0002-6100-6532", "linkedin": ";;;;;ferrante-neri-b4aa6219a/", "or_profile": "~Lihua_Zhou1;~Mao_Ye1;~Siying_Xiao1;~Xu-Qian_Fan1;~Xiatian_Zhu3;~Ferrante_Neri1", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;Jinan University;University of Surrey;University of Surrey", "aff_domain": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;jnu.edu.cn;surrey.ac.uk;surrey.ac.uk", "position": "PhD student;Full Professor;Undergrad student;Associate Professor;Associate Professor;Full Professor", "bibtex": "@misc{\nzhou2023homeomorphism,\ntitle={Homeomorphism Alignment in Two Spaces for Unsupervised Domain Adaptation},\nauthor={Lihua Zhou and Mao Ye and Siying Xiao and Xu-Qian Fan and Xiatian Zhu and Ferrante Neri},\nyear={2023},\nurl={https://openreview.net/forum?id=8xoV4ZrIgbk}\n}", "github": "", "project": "", "reviewers": "UL2Y;XjCb;X6x3;1hQA", "site": "https://openreview.net/forum?id=8xoV4ZrIgbk", "pdf_size": 1328653, "recommendation": "5;5;5;6", "confidence": "5;5;4;4", "correctness": "3;4;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "41;43;86;81", "wc_strength_and_weaknesses": "410;66;184;194", "wc_clarity_quality_novelty_and_reproducibility": "88;24;30;39", "wc_summary_review": "54;25;27;37", "wc_review": "593;158;327;351", "wc_reply_reviewers": "0;0;57;20", "wc_reply_authors": "790;246;905;268", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 62.75, 20.837166314064877 ], "wc_strength_and_weaknesses_avg": [ 213.5, 124.1158732797703 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.25, 25.252475126212875 ], "wc_summary_review_avg": [ 35.75, 11.4755174175285 ], "wc_review_avg": [ 357.25, 155.10701950588825 ], "wc_reply_reviewers_avg": [ 19.25, 23.27418097377435 ], "wc_reply_authors_avg": [ 552.25, 298.13786659865934 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zEYlUusffHIJ:scholar.google.com/&scioq=Homeomorphism+Alignment+in+Two+Spaces+for+Unsupervised+Domain+Adaptation&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2;2", "aff_unique_norm": "University of Electronic Science and Technology of China;Jinan University;University of Surrey", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uestc.edu.cn;https://www.jnu.edu.cn;https://www.surrey.ac.uk", "aff_unique_abbr": "UESTC;JNU;Surrey", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;1", "aff_country_unique": "China;United Kingdom" }, { "id": "8xuFD1yCoH", "title": "TuneUp: A Training Strategy for Improving Generalization of Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "We develop a curriculum learning strategy to train GNNs with high generalization performance especially on tail nodes.", "abstract": "Despite many advances in Graph Neural Networks (GNNs), their training strategies simply focus on minimizing a loss over nodes in a graph. However, such simplistic training strategies may be sub-optimal as they neglect that certain nodes are much harder to make accurate predictions on than others. Here we present TuneUp, a curriculum learning strategy for better training GNNs. Crucially, TuneUp trains a GNN in two stages. The first stage aims to produce a strong base GNN. Such base GNNs tend to perform well on head nodes (nodes with large degrees) but less so on tail nodes (nodes with small degrees). So, the second stage of TuneUp specifically focuses on improving prediction on tail nodes. Concretely, TuneUp synthesizes many additional supervised tail node data by dropping edges from head nodes and reusing the supervision on the original head nodes. TuneUp then minimizes the loss over the synthetic tail nodes to finetune the base GNN. TuneUp is a general training strategy that can be used with any GNN architecture and any loss, making TuneUp applicable to a wide range of prediction tasks. Extensive evaluation of TuneUp on two GNN architectures, three types of prediction tasks, and both inductive and transductive settings shows that TuneUp significantly improves the performance of the base GNN on tail nodes, while often even improving the performance on head nodes, which together leads up to 58.5% relative improvement in GNN predictive performance. Moreover, TuneUp significantly outperforms its variants without the two-stage curriculum learning, existing graph data augmentation techniques, as well as other specialized methods for tail nodes.", "keywords": "Graph Neural Networks;Curriculum learning;Tail nodes", "primary_area": "", "supplementary_material": "", "author": "Weihua Hu;Kaidi Cao;Kexin Huang;Edward W Huang;Karthik Subbian;Jure Leskovec", "authorids": "~Weihua_Hu1;~Kaidi_Cao1;~Kexin_Huang1;~Edward_W_Huang1;~Karthik_Subbian1;~Jure_Leskovec1", "gender": "M;M;M;M;M;", "homepage": "http://web.stanford.edu/~weihuahu/;https://ai.stanford.edu/~kaidicao/;https://www.kexinhuang.com/;;http://mailtosuka.googlepages.com;http://cs.stanford.edu/~jure/", "dblp": "42/1232;203/8207;;192/2417.html;32/5843;l/JureLeskovec", "google_scholar": "wAFMjfkAAAAJ;https://scholar.google.com.hk/citations?user=4Zw1PJ8AAAAJ;ogEXTOgAAAAJ;EqvdkCAAAAAJ;;Q_kKkIUAAAAJ", "orcid": ";;;0000-0002-4461-8545;;0000-0002-5411-923X", "linkedin": "weihua-hu-a8284228/;;;ewhuang/;;leskovec/", "or_profile": "~Weihua_Hu1;~Kaidi_Cao1;~Kexin_Huang1;~Edward_W_Huang1;~Karthik_Subbian1;~Jure_Leskovec1", "aff": ";Stanford University;Stanford University;Amazon;Amazon;Kumo.AI", "aff_domain": ";stanford.edu;stanford.edu;amazon.com;amazon.com;kumo.ai", "position": ";PhD student;PhD student;Applied Scientist;Researcher;Chief Scientist", "bibtex": "@misc{\nhu2023tuneup,\ntitle={TuneUp: A Training Strategy for Improving Generalization of Graph Neural Networks},\nauthor={Weihua Hu and Kaidi Cao and Kexin Huang and Edward W Huang and Karthik Subbian and Jure Leskovec},\nyear={2023},\nurl={https://openreview.net/forum?id=8xuFD1yCoH}\n}", "github": "", "project": "", "reviewers": "1BhX;Yj9L;xBXx;khNG", "site": "https://openreview.net/forum?id=8xuFD1yCoH", "pdf_size": 961569, "recommendation": "3;3;3;3", "confidence": "4;4;4;3", "correctness": "2;2;3;2", "technical_novelty": "3;2;2;2", "empirical_novelty": "3;0;3;2", "wc_summary_paper": "77;114;38;31", "wc_strength_and_weaknesses": "238;293;384;198", "wc_clarity_quality_novelty_and_reproducibility": "13;57;12;28", "wc_summary_review": "16;53;59;35", "wc_review": "344;517;493;292", "wc_reply_reviewers": "0;0;47;0", "wc_reply_authors": "220;348;230;273", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 65.0, 33.279122584587476 ], "wc_strength_and_weaknesses_avg": [ 278.25, 69.75089605159205 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.5, 18.172781845386247 ], "wc_summary_review_avg": [ 40.75, 16.798437427332342 ], "wc_review_avg": [ 411.5, 95.66739256402883 ], "wc_reply_reviewers_avg": [ 11.75, 20.351596988934308 ], "wc_reply_authors_avg": [ 267.75, 50.43002577829998 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9507240193277347499&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;1;2", "aff_unique_norm": "Stanford University;Amazon;Kumo.AI", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.stanford.edu;https://www.amazon.com;https://www.kumo.ai", "aff_unique_abbr": "Stanford;Amazon;Kumo.AI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "8yVy6LdhER4", "title": "Approximating How Single Head Attention Learns", "track": "main", "status": "Reject", "tldr": "Why do models often attend to salient words, and how does this evolve throughout training? We define a model property, Knowledge to Translate Individual Words, and claim that it drives the learning of the attention.", "abstract": "Why do models often attend to salient words, and how does this evolve throughout training? We approximate model training as a two stage process: early on in training when the attention weights are uniform, the model learns to translate individual input word `i` to `o` if they co-occur frequently. Later, the model learns to attend to `i` while the correct output is o because it knows `i` translates to `o`. To formalize, we define a model property, Knowledge to Translate Individual Words (KTIW) (e.g. knowing that `i` translates to `o`), and claim that it drives the learning of the attention. This claim is supported by the fact that before the attention mechanism is learned, KTIW can be learned from word co-occurrence statistics, but not the other way around. Particularly, we can construct a training distribution that makes KTIW hard to learn, the learning of the attention fails, and the model cannot even learn the simple task of copying the input words to the output. Our approximation explains why models sometimes attend to salient words, and inspires a toy example where a multi-head attention model can overcome the above hard training distribution by improving learning dynamics rather than expressiveness. We end by discussing the limitation of our approximation framework and suggest future directions.", "keywords": "NLP;training dynamics;attention", "primary_area": "", "supplementary_material": "/attachment/50fbe934a38e5a2f9fff136d76a78cdd7c0d665b.zip", "author": "Charlie Victor Snell;Ruiqi Zhong;Dan Klein;Jacob Steinhardt", "authorids": "~Charlie_Victor_Snell1;~Ruiqi_Zhong1;~Dan_Klein1;~Jacob_Steinhardt1", "gender": "M;M;;", "homepage": "https://sea-snell.github.io;https://ruiqi-zhong.github.io;http://people.eecs.berkeley.edu/~klein/;", "dblp": ";222/3024;;35/10625", "google_scholar": "dD7EpwQAAAAJ;GskOShAAAAAJ;;", "orcid": ";;;", "linkedin": ";;dan-klein/;", "or_profile": "~Charlie_Victor_Snell1;~Ruiqi_Zhong1;~Dan_Klein1;~Jacob_Steinhardt1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@misc{\nsnell2023approximating,\ntitle={Approximating How Single Head Attention Learns},\nauthor={Charlie Victor Snell and Ruiqi Zhong and Dan Klein and Jacob Steinhardt},\nyear={2023},\nurl={https://openreview.net/forum?id=8yVy6LdhER4}\n}", "github": "", "project": "", "reviewers": "AtcN;H7cW;8y4q", "site": "https://openreview.net/forum?id=8yVy6LdhER4", "pdf_size": 1674186, "recommendation": "3;3;3", "confidence": "3;2;3", "correctness": "2;2;2", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "46;212;21", "wc_strength_and_weaknesses": "114;284;75", "wc_clarity_quality_novelty_and_reproducibility": "50;79;104", "wc_summary_review": "40;101;36", "wc_review": "250;676;236", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 93.0, 84.76241305358565 ], "wc_strength_and_weaknesses_avg": [ 157.66666666666666, 90.73894178110938 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 77.66666666666667, 22.065558884580486 ], "wc_summary_review_avg": [ 59.0, 29.743346594938952 ], "wc_review_avg": [ 387.3333333333333, 204.19816083620557 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11476437474321968170&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "8znaO_qG0H", "title": "Federated Training of Dual Encoding Models on Small Non-IID Client Datasets", "track": "main", "status": "Reject", "tldr": "Novel approach for training dual encoding models on distributed data composed of many small, non-IID client datasets.", "abstract": "Dual encoding models that encode a pair of inputs are widely used for representation learning. Many approaches train dual encoding models by maximizing agreement between pairs of encodings on centralized training data. However, in many scenarios, datasets are inherently decentralized across many clients (user devices or organizations) due to privacy concerns, motivating federated learning. In this work, we focus on federated training of dual encoding models on decentralized data composed of many small, non-IID (independent and identically distributed) client datasets. We show that existing approaches that work well in centralized settings perform poorly when naively adapted to this setting using federated averaging. We observe that, we can simulate large-batch loss computation on individual clients for loss functions that are based on encoding statistics. Based on this insight, we propose a novel federated training approach, Distributed Cross Correlation Optimization (DCCO), which trains dual encoding models using encoding statistics aggregated across clients, without sharing individual data samples. Our experimental results on two datasets demonstrate that the proposed DCCO approach outperforms federated variants of existing approaches by a large margin.", "keywords": "dual encoding models;federated learning;representation learning;self-supervised learning;federated self-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Raviteja Vemulapalli;Warren Richard Morningstar;Philip Andrew Mansfield;Hubert Eichner;Karan Singhal;Arash Afkanpour;Bradley Green", "authorids": "~Raviteja_Vemulapalli1;~Warren_Richard_Morningstar1;~Philip_Andrew_Mansfield1;~Hubert_Eichner1;~Karan_Singhal1;~Arash_Afkanpour1;~Bradley_Green3", "gender": "M;M;M;;;;", "homepage": "http://ravitejav.weebly.com/;;https://www.linkedin.com/in/philipmansfield/;http://myselph.de;https://karansinghal.com;;", "dblp": "135/4940;260/0779;207/8144;51/4346;;87/2927;", "google_scholar": "0OFqm7YAAAAJ;https://scholar.google.com/citations?view_op=search_authors;https://scholar.google.ca/citations?user=Pm_geMkAAAAJ;;nMfflL0AAAAJ;egEZSjcAAAAJ;", "orcid": ";;0000-0003-4969-0543;;;;", "linkedin": "raviteja-vemulapalli-85146113?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;;philipmansfield/;;karan1149/;;brad-green-b0247915/", "or_profile": "~Raviteja_Vemulapalli1;~Warren_Richard_Morningstar1;~Philip_Andrew_Mansfield1;~Hubert_Eichner1;~Karan_Singhal1;~Arash_Afkanpour1;~Bradley_Green3", "aff": "Apple;Google;Simon Fraser University;;Google Research;Google;Google", "aff_domain": "apple.com;google.com;sfu.ca;;google.com;google.com;google.com", "position": "Researcher;Software Engineer;Assistant Professor;;Researcher;Software Engineer;Director of Research", "bibtex": "@misc{\nvemulapalli2023federated,\ntitle={Federated Training of Dual Encoding Models on Small Non-{IID} Client Datasets},\nauthor={Raviteja Vemulapalli and Warren Richard Morningstar and Philip Andrew Mansfield and Hubert Eichner and Karan Singhal and Arash Afkanpour and Bradley Green},\nyear={2023},\nurl={https://openreview.net/forum?id=8znaO_qG0H}\n}", "github": "", "project": "", "reviewers": "RUBX;xR7P;G6CA;j562", "site": "https://openreview.net/forum?id=8znaO_qG0H", "pdf_size": 383171, "recommendation": "3;3;5;6", "confidence": "5;5;3;2", "correctness": "2;2;3;3", "technical_novelty": "2;1;2;3", "empirical_novelty": "2;1;2;3", "wc_summary_paper": "85;63;32;81", "wc_strength_and_weaknesses": "144;313;107;143", "wc_clarity_quality_novelty_and_reproducibility": "39;50;24;61", "wc_summary_review": "9;38;16;101", "wc_review": "277;464;179;386", "wc_reply_reviewers": "28;0;0;31", "wc_reply_authors": "860;855;395;388", "reply_reviewers": "1;0;0;1", "reply_authors": "2;2;2;2", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 1.299038105676658 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 65.25, 20.90902915010642 ], "wc_strength_and_weaknesses_avg": [ 176.75, 80.06364655697367 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.5, 13.683932183404009 ], "wc_summary_review_avg": [ 41.0, 36.2560339805666 ], "wc_review_avg": [ 326.5, 107.99652772195965 ], "wc_reply_reviewers_avg": [ 14.75, 14.788086421170252 ], "wc_reply_authors_avg": [ 624.5, 233.01984893995618 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.9622504486493761, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4260092016755532282&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;1;1;1", "aff_unique_norm": "Apple;Google;Simon Fraser University", "aff_unique_dep": "Apple Inc.;Google;", "aff_unique_url": "https://www.apple.com;https://www.google.com;https://www.sfu.ca", "aff_unique_abbr": "Apple;Google;SFU", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;Canada" }, { "id": "8zsK9lbna9L", "title": "RephraseTTS: Dynamic Length Text based Speech Insertion with Speaker Style Transfer", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We propose a method for the task of text-conditioned speech insertion, i.e.\\ inserting a speech sample in an input speech sample, conditioned on the corresponding complete text transcript. An example use case of the task would be to update the speech audio when corrections are done on the corresponding text transcript. The proposed method follows a transformer-based non-autoregressive approach that allows speech insertions of variable lengths, which are dynamically determined during inference, based on the text transcript and tempo of the available partial input. It is capable of maintaining the speaker's voice characteristics, prosody and other spectral properties of the available speech input. Results from our experiments and user study on LibriTTS show that our method outperforms baselines based on an existing adaptive text to speech method. We also provide numerous qualitative results to appreciate the quality of the output from the proposed method.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/7818992fc9d5e9451ed1d5f8b390b5a59683a03e.zip", "author": "Neeraj Matiyali;Siddharth Srivastava;Gaurav Sharma", "authorids": "~Neeraj_Matiyali1;~Siddharth_Srivastava3;~Gaurav_Sharma1", "gender": "M;M;M", "homepage": ";https://siddharthsrivastava.github.io/;http://www.grvsharma.com/research.html", "dblp": "230/4592;;s/GauravSharma4", "google_scholar": "3skMlf8AAAAJ;tMpFlPIAAAAJ;tmZ8MaAAAAAJ", "orcid": ";;", "linkedin": ";;gaurav-sharma-b9b0b13", "or_profile": "~Neeraj_Matiyali1;~Siddharth_Srivastava3;~Gaurav_Sharma1", "aff": "IIT Kanpur;TensorTour Inc;TensorTour Inc.", "aff_domain": "iitk.ac.in;tensortour.com;tensortour.com", "position": "PhD student;Researcher;CEO", "bibtex": "@misc{\nmatiyali2023rephrasetts,\ntitle={Rephrase{TTS}: Dynamic Length Text based Speech Insertion with Speaker Style Transfer},\nauthor={Neeraj Matiyali and Siddharth Srivastava and Gaurav Sharma},\nyear={2023},\nurl={https://openreview.net/forum?id=8zsK9lbna9L}\n}", "github": "", "project": "", "reviewers": "6xh6;VxVo;LWo4;DiRd", "site": "https://openreview.net/forum?id=8zsK9lbna9L", "pdf_size": 419851, "recommendation": "3;5;6;6", "confidence": "4;4;4;2", "correctness": "3;3;3;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;0;0;2", "wc_summary_paper": "46;71;47;147", "wc_strength_and_weaknesses": "319;189;55;82", "wc_clarity_quality_novelty_and_reproducibility": "31;56;71;179", "wc_summary_review": "52;14;18;88", "wc_review": "448;330;191;496", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.0, 1.0 ], "wc_summary_paper_avg": [ 77.75, 41.215136782497765 ], "wc_strength_and_weaknesses_avg": [ 161.25, 103.95281381473038 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 84.25, 56.53925627384923 ], "wc_summary_review_avg": [ 43.0, 29.88310559496787 ], "wc_review_avg": [ 366.25, 117.83966861799978 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.4714045207910316, "corr_recommendation_correctness": 0.4714045207910316, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:g6LfTZb4xnUJ:scholar.google.com/&scioq=RephraseTTS:+Dynamic+Length+Text+based+Speech+Insertion+with+Speaker+Style+Transfer&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Indian Institute of Technology Kanpur;TensorTour Inc;TensorTour Inc.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.iitk.ac.in;;", "aff_unique_abbr": "IITK;;", "aff_campus_unique_index": "0", "aff_campus_unique": "Kanpur;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "India;United States" }, { "title": "Masked Frequency Modeling for Self-Supervised Visual Pre-Training", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12072", "id": "9-umxtNPx5E", "poster": "/media/PosterPDFs/ICLR%202023/12072.png?t=1682871534.9645417", "openreview": "https://openreview.net/forum?id=9-umxtNPx5E", "slides": "https://iclr.cc/virtual/2023/poster/12072", "video": "https://iclr.cc/virtual/2023/poster/12072", "author_site": "Jiahao Xie, Wei Li, Xiaohang Zhan, Ziwei Liu, Yew-Soon Ong, Chen Change Loy", "tldr": "", "abstract": "We present Masked Frequency Modeling (MFM), a unified frequency-domain-based approach for self-supervised pre-training of visual models. Instead of randomly inserting mask tokens to the input embeddings in the spatial domain, in this paper, we shift the perspective to the frequency domain. Specifically, MFM first masks out a portion of frequency components of the input image and then predicts the missing frequencies on the frequency spectrum. Our key insight is that predicting masked components in the frequency domain is more ideal to reveal underlying image patterns rather than predicting masked patches in the spatial domain, due to the heavy spatial redundancy. Our findings suggest that with the right configuration of mask-and-predict strategy, both the structural information within high-frequency components and the low-level statistics among low-frequency counterparts are useful in learning good representations. For the first time, MFM demonstrates that, for both ViT and CNN, a simple non-Siamese framework can learn meaningful representations even using none of the following: (i) extra data, (ii) extra model, (iii) mask token. Experimental results on image classification and semantic segmentation, as well as several robustness benchmarks show the competitive performance and advanced robustness of MFM compared with recent masked image modeling approaches. Furthermore, we also comprehensively investigate the effectiveness of classical image restoration tasks for representation learning from a unified frequency perspective and reveal their intriguing relations with our MFM approach. Project page: https://www.mmlab-ntu.com/project/mfm/index.html.", "keywords": "unsupervised learning;self-supervised learning;representation learning;masked frequency modeling", "primary_area": "", "supplementary_material": "", "author": "Jiahao Xie;Wei Li;Xiaohang Zhan;Ziwei Liu;Yew-Soon Ong;Chen Change Loy", "authorids": "~Jiahao_Xie2;~Wei_Li51;~Xiaohang_Zhan1;~Ziwei_Liu1;~Yew-Soon_Ong1;~Chen_Change_Loy2", "gender": ";M;M;M;M;M", "homepage": "https://jiahao000.github.io/;https://weivision.github.io/;https://xiaohangzhan.github.io/;https://liuziwei7.github.io/;https://www.mmlab-ntu.com/person/ccloy/index.html;http://www.ntu.edu.sg/home/asysong/", "dblp": "217/4325-2;;211/7010;05/6300-2;01/5855;64/4136", "google_scholar": "yA9qseUAAAAJ;41KAd6AAAAAJ;QfquhDEAAAAJ;https://scholar.google.com.hk/citations?user=lc45xlcAAAAJ;https://scholar.google.co.uk/citations?user=559LF80AAAAJ;https://scholar.google.com.tw/citations?user=h9oWOsEAAAAJ", "orcid": "0000-0001-9237-2802;;0000-0003-2136-7592;;0000-0001-5345-1591;0000-0002-4480-169X", "linkedin": ";;xiaohang-zhan-%EF%BC%88%E8%A9%B9%E6%99%93%E8%88%AA%EF%BC%89-7659b2b8/;;;", "or_profile": "~Jiahao_Xie2;~Wei_Li51;~Xiaohang_Zhan1;~Ziwei_Liu1;~Chen_Change_Loy2;~Yew_Soon_Ong1", "aff": "Nanyang Technological University;Nanyang Technological University;Huawei Technologies Ltd.;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;huawei.com;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "position": "PhD student;Postdoc;Researcher;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nxie2023masked,\ntitle={Masked Frequency Modeling for Self-Supervised Visual Pre-Training},\nauthor={Jiahao Xie and Wei Li and Xiaohang Zhan and Ziwei Liu and Yew-Soon Ong and Chen Change Loy},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9-umxtNPx5E}\n}", "github": "", "project": "", "reviewers": "34gh;Un1w;Nxgg;Lxfd", "pdf_size": 10265177, "recommendation": "5;5;6;8", "confidence": "4;4;3;4", "correctness": "3;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;2;2;4", "wc_summary_paper": "43;87;44;82", "wc_strength_and_weaknesses": "339;183;488;186", "wc_clarity_quality_novelty_and_reproducibility": "25;176;80;65", "wc_summary_review": "46;124;114;67", "wc_review": "453;570;726;400", "wc_reply_reviewers": "0;0;19;0", "wc_reply_authors": "1025;1209;1392;578", "reply_reviewers": "0;0;1;0", "reply_authors": "3;3;3;2", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 64.0, 20.579115627256677 ], "wc_strength_and_weaknesses_avg": [ 299.0, 126.04165977961414 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 86.5, 55.44591959738787 ], "wc_summary_review_avg": [ 87.75, 32.31389020220252 ], "wc_review_avg": [ 537.25, 125.13467744794006 ], "wc_reply_reviewers_avg": [ 4.75, 8.227241335952167 ], "wc_reply_authors_avg": [ 1051.0, 302.3450016123964 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 88, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=138700868719032581&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=9-umxtNPx5E", "email": "ntu.edu.sg;ntu.edu.sg;huawei.com;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Nanyang Technological University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.ntu.edu.sg;https://www.huawei.com", "aff_unique_abbr": "NTU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "Singapore;China" }, { "id": "917v6o8fO7", "title": "Generalizable Person Re-identification Without Demographics", "track": "main", "status": "Reject", "tldr": "", "abstract": "Domain generalizable person re-identification (DG-ReID) aims to learn a ready-to-use domain-agnostic model directly for cross-dataset/domain evaluation, while current methods mainly explore the demographic information such as domain and/or camera labels for domain-invariant representation learning. However, the above-mentioned demographic information is not always accessible in practice due to privacy and security issues. In this paper, we consider the problem of person re-identification in a more general setting, \\ie domain generalizable person re-identification without demographics (\\textbf{DGWD-ReID}). To address the underlying uncertainty of domain distribution, we introduce distributionally robust optimization (DRO) to learn robust person re-identification models that perform well on all possible data distributions within the uncertainty set without demographics. However, directly applying the popular Kullback-Leibler divergence constrained DRO (or KL-DRO) fails to generalize well under the distribution shifts in real-world scenarios, since the convex condition may not hold for overparameterized neural networks. Inspired by this, we analyze and reformulate the popular KL-DRO by applying the change-of-measure technique, and then propose a simple yet efficient approach, \\textbf{Unit-DRO}, which minimizes the loss over a new dataset with hard samples upweighted and other samples downweighted. We perform extensive experiments on both domain generalizable and cross-domain person re-identification tasks, and the empirical results on several large-scale benchmarks show that \\iw~achieves superior performance compared to all baselines without using demographics.\n", "keywords": "Generalizable Person Re-Identification;Distributionally robust optimization;Change-of-measure technique", "primary_area": "", "supplementary_material": "", "author": "YiFan Zhang;Feng Li;Zhang Zhang;Baosheng Yu;Liang Wang;Dacheng Tao;Tieniu Tan", "authorids": "~YiFan_Zhang8;~Feng_Li9;~Zhang_Zhang1;~Baosheng_Yu1;~Liang_Wang3;~Dacheng_Tao1;~Tieniu_Tan1", "gender": ";M;;;M;;", "homepage": ";https://fengli-ust.github.io/;https://zhangzhang80.github.io/;https://dr.ntu.edu.sg/cris/rp/rp02563;;;", "dblp": ";92/2954-40.html;94/2468-1;178/8725;56/4499-1;;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;rnRNwEMAAAAJ;fjzIdMQAAAAJ;;;", "orcid": ";;0000-0001-9425-3065;;;;", "linkedin": ";;;;;;", "or_profile": "~YiFan_Zhang8;~Feng_Li9;~Zhang_Zhang1;~Baosheng_Yu1;~Liang_Wang3;~Dacheng_Tao1;~Tieniu_Tan1", "aff": ";Hong Kong University of Science and Technology;Institute of Automation, Chinese Academy of Sciences;The University of Sydney;Institute of Automation\uff0c CAS\uff0cChina;;", "aff_domain": ";ust.hk;ia.ac.cn;sydney.edu.au;ia.ac.cn;;", "position": ";PhD student;Associate Professor;Research Fellow;Full Professor;;", "bibtex": "@misc{\nzhang2023generalizable,\ntitle={Generalizable Person Re-identification Without Demographics},\nauthor={YiFan Zhang and Feng Li and Zhang Zhang and Baosheng Yu and Liang Wang and Dacheng Tao and Tieniu Tan},\nyear={2023},\nurl={https://openreview.net/forum?id=917v6o8fO7}\n}", "github": "", "project": "", "reviewers": "42Bp;xvAD;2SbQ", "site": "https://openreview.net/forum?id=917v6o8fO7", "pdf_size": 2818808, "recommendation": "6;6;6", "confidence": "3;4;4", "correctness": "3;3;4", "technical_novelty": "3;3;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "133;59;87", "wc_strength_and_weaknesses": "154;369;174", "wc_clarity_quality_novelty_and_reproducibility": "63;17;24", "wc_summary_review": "92;36;62", "wc_review": "442;481;347", "wc_reply_reviewers": "0;33;0", "wc_reply_authors": "466;2058;865", "reply_reviewers": "0;1;0", "reply_authors": "2;4;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 93.0, 30.506829836393468 ], "wc_strength_and_weaknesses_avg": [ 232.33333333333334, 96.9822435065077 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.666666666666664, 20.237478982214054 ], "wc_summary_review_avg": [ 63.333333333333336, 22.88133640230735 ], "wc_review_avg": [ 423.3333333333333, 56.2751178487339 ], "wc_reply_reviewers_avg": [ 11.0, 15.556349186104045 ], "wc_reply_authors_avg": [ 1129.6666666666667, 676.3393296924917 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12525195099936833347&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Hong Kong University of Science and Technology;Chinese Academy of Sciences;University of Sydney", "aff_unique_dep": ";Institute of Automation;", "aff_unique_url": "https://www.ust.hk;http://www.ia.cas.cn;https://www.sydney.edu.au", "aff_unique_abbr": "HKUST;CAS;USYD", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;Australia" }, { "id": "91efl6aSU2d", "title": "Dual-Domain Diffusion Based Progressive Style Rendering towards Semantic Structure Preservation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In this paper, we propose a Dual-Domain Diffusion based Progressive Style Rendering (D3PSR) method to achieve style rendering from the semantic Domain A to the style Domain B. Different from the classic diffusion models, our model takes two unpaired images from two domains as inputs, and the output is obtained at the midst layer. With the benefits from diffusion models, a dynamic rendering process was leveraged to progressively incorporate the texture strokes from the style domain while preserving the semantic structure in the noise-adding steps. Our experiments shows that a range of artistic styles can be successfully transferred into the target images without breaking their semantic structures, demonstrating the merits of our new diffusion-based approach with beyond the state-of-the-art performance in style transferring. A further study utilized the similarity scores to measure such a diffusion-based process, showing how semantic structures were rendered in our progressive process in a quantitative view.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Mengjun Tao;Richard Jiang", "authorids": "~Mengjun_Tao1;~Richard_Jiang3", "gender": "M;M", "homepage": "https://www.lancaster.ac.uk/scc/about-us/people/mengjun-tao;https://wp.lancs.ac.uk/autobrain", "dblp": ";70/8305", "google_scholar": ";NuyoNc4AAAAJ", "orcid": ";0000-0003-1721-9474", "linkedin": ";", "or_profile": "~Mengjun_Tao1;~Richard_M._Jiang1", "aff": "Lancaster University;Shanghai Jiaotong University", "aff_domain": "lancaster.ac.uk;sjtu.edu.cn", "position": "PhD student;Visiting Scholar", "bibtex": "@misc{\ntao2023dualdomain,\ntitle={Dual-Domain Diffusion Based Progressive Style Rendering towards Semantic Structure Preservation},\nauthor={Mengjun Tao and Richard Jiang},\nyear={2023},\nurl={https://openreview.net/forum?id=91efl6aSU2d}\n}", "github": "", "project": "", "reviewers": "1j63;t82C;o8gJ", "site": "https://openreview.net/forum?id=91efl6aSU2d", "pdf_size": 20023088, "recommendation": "3;5;6", "confidence": "3;4;4", "correctness": "2;2;4", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "149;89;163", "wc_strength_and_weaknesses": "298;392;123", "wc_clarity_quality_novelty_and_reproducibility": "202;68;40", "wc_summary_review": "26;72;26", "wc_review": "675;621;352", "wc_reply_reviewers": "0;230;0", "wc_reply_authors": "581;526;206", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 133.66666666666666, 32.097074979228594 ], "wc_strength_and_weaknesses_avg": [ 271.0, 111.46598883366471 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 103.33333333333333, 70.69810621383165 ], "wc_summary_review_avg": [ 41.333333333333336, 21.684607956387456 ], "wc_review_avg": [ 549.3333333333334, 141.26649362896433 ], "wc_reply_reviewers_avg": [ 76.66666666666667, 108.42303978193728 ], "wc_reply_authors_avg": [ 437.6666666666667, 165.3447576698121 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.9449111825230683, "corr_recommendation_correctness": 0.7559289460184546, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12830717330254962027&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Lancaster University;Shanghai Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "https://www.lancaster.ac.uk;https://www.sjtu.edu.cn", "aff_unique_abbr": "Lancaster;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;China" }, { "title": "Large Language Models are Human-Level Prompt Engineers", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10850", "id": "92gvk82DE-", "poster": "/media/PosterPDFs/ICLR%202023/10850.png?t=1681178669.1183963", "openreview": "https://openreview.net/forum?id=92gvk82DE-", "slides": "https://iclr.cc/virtual/2023/poster/10850", "video": "https://iclr.cc/virtual/2023/poster/10850", "author_site": "Yongchao Zhou, Andrei Muresanu, Ziwen Han, Keiran Paster, Silviu Pitis, Harris Chan, Jimmy Ba", "tldr": "We propose an algorithm for automatic instruction generation and selection for large language models with human level performance.", "abstract": "By conditioning on natural language instructions, large language models (LLMs) have displayed impressive capabilities as general-purpose computers. However, task performance depends significantly on the quality of the prompt used to steer the model, and most effective prompts have been handcrafted by humans. Inspired by classical program synthesis and the human approach to prompt engineering, we propose Automatic Prompt Engineer (APE) for automatic instruction generation and selection. In our method, we treat the instruction as the \"program,\" optimized by searching over a pool of instruction candidates proposed by an LLM in order to maximize a chosen score function. To evaluate the quality of the selected instruction, we evaluate the zero-shot performance of another LLM following the selected instruction. Experiments on 24 NLP tasks show that our automatically generated instructions outperform the prior LLM baseline by a large margin and achieve better or comparable performance to the instructions generated by human annotators on 21/24 tasks. We conduct extensive qualitative and quantitative analyses to explore the performance of APE. We show that APE-engineered prompts can be applied to steer models toward truthfulness and/or informativeness, as well as to improve few-shot learning performance by simply prepending them to standard in-context learning prompts.", "keywords": "few-shot learning;automated reasoning;large language models", "primary_area": "", "supplementary_material": "/attachment/a7ae3d05be590cee96224a25d85a81478122585c.zip", "author": "Yongchao Zhou;Andrei Ioan Muresanu;Ziwen Han;Keiran Paster;Silviu Pitis;Harris Chan;Jimmy Ba", "authorids": "~Yongchao_Zhou1;~Andrei_Ioan_Muresanu1;~Ziwen_Han1;~Keiran_Paster1;~Silviu_Pitis1;~Harris_Chan1;~Jimmy_Ba1", "gender": "M;M;Not Specified;M;M;M;M", "homepage": ";https://andreimuresanu.com/;;http://keirp.com;https://silviupitis.com;http://www.cs.toronto.edu/~hchan/;http://jimmylba.github.io", "dblp": ";;;;https://dblp.org/pers/hd/p/Pitis:Silviu;227/3248;https://dblp.org/pers/b/Ba:Jimmy.html", "google_scholar": "35M6rhsAAAAJ;Zb0NwBUAAAAJ;https://scholar.google.com/citations?hl=en;;oYlo1ycAAAAJ;0tLCTHYAAAAJ;https://scholar.google.ca/citations?user=ymzxRhAAAAAJ", "orcid": ";;;;;;", "linkedin": "yongchao-zhou-a298a7158/;andreimuresanu/;;;;theharrischan/;", "or_profile": "~Yongchao_Zhou1;~Andrei_Ioan_Muresanu1;~Ziwen_Han1;~Keiran_Paster1;~Silviu_Pitis1;~Harris_Chan1;~Jimmy_Ba1", "aff": "University of Toronto;University of Waterloo;Qualcomm;University of Toronto;;University of Toronto;Department of Computer Science, University of Toronto", "aff_domain": "mail.utoronto.ca;uwaterloo.ca;qti.qualcomm.com;toronto.edu;;toronto.edu;cs.toronto.edu", "position": "PhD student;Undergrad student;Intern;PhD student;;PhD;Assistant Professor", "bibtex": "@inproceedings{\nzhou2023large,\ntitle={Large Language Models are Human-Level Prompt Engineers},\nauthor={Yongchao Zhou and Andrei Ioan Muresanu and Ziwen Han and Keiran Paster and Silviu Pitis and Harris Chan and Jimmy Ba},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=92gvk82DE-}\n}", "github": "", "project": "", "reviewers": "3MTv;y9rU;7jaz", "pdf_size": 3830725, "recommendation": "6;6;8", "confidence": "4;4;3", "correctness": "3;3;3", "technical_novelty": "3;3;4", "empirical_novelty": "3;3;4", "wc_summary_paper": "95;75;109", "wc_strength_and_weaknesses": "303;186;253", "wc_clarity_quality_novelty_and_reproducibility": "4;99;26", "wc_summary_review": "66;104;54", "wc_review": "468;464;442", "wc_reply_reviewers": "0;243;0", "wc_reply_authors": "1437;1790;1259", "reply_reviewers": "0;1;0", "reply_authors": "3;4;2", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 93.0, 13.9522996909709 ], "wc_strength_and_weaknesses_avg": [ 247.33333333333334, 47.93282336307299 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.0, 40.60377650744653 ], "wc_summary_review_avg": [ 74.66666666666667, 21.312489817527705 ], "wc_review_avg": [ 458.0, 11.430952132988164 ], "wc_reply_reviewers_avg": [ 81.0, 114.5512985522207 ], "wc_reply_authors_avg": [ 1495.3333333333333, 220.6691842756684 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.9999999999999998, "corr_recommendation_correctness": 0.0, "gs_citation": 1085, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14636664847015873910&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=92gvk82DE-", "email": "mail.utoronto.ca;uwaterloo.ca;qti.qualcomm.com;toronto.edu;;toronto.edu;cs.toronto.edu", "author_num": 7, "aff_unique_index": "0;1;2;0;0;0", "aff_unique_norm": "University of Toronto;University of Waterloo;Qualcomm Incorporated", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utoronto.ca;https://uwaterloo.ca;https://www.qualcomm.com", "aff_unique_abbr": "U of T;UW;Qualcomm", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "Canada;United States" }, { "id": "935WW9F8ALr", "title": "Learning to Improve Code Efficiency", "track": "main", "status": "Reject", "tldr": "We propose a generative model trained on programming competition submissions to transform programs into faster versions of those programs.", "abstract": "Improvements in the performance of computing systems, driven by Moore\u2019s Law, have transformed society. As such hardware-driven gains slow down, it becomes even more important for software developers to focus on performance and efficiency during development. While several studies have demonstrated the potential from such improved code efficiency (e.g., 2x better generational improvements compared to hardware), unlocking these gains in practice has been challenging. Reasoning about algorithmic complexity and the interaction of coding patterns on hardware can be challenging for the average programmer, especially when combined with pragmatic constraints around development velocity and multi-person development.\n\nThis paper seeks to address this problem. We analyze a large competitive programming dataset from the Google Code Jam competition and find that efficient code is indeed rare, with a 2x runtime difference between the median and the 90th percentile of solutions. We propose using machine learning to automatically provide prescriptive feedback in the form of hints, to guide programmers towards writing high-performance code. To automatically learn these hints from the dataset, we propose a novel discrete variational auto-encoder, where each discrete latent variable represents a different learned category of code-edit that increases performance. We show that this method represents the multi-modal space of code efficiency edits better than a sequence-to-sequence baseline and generates a distribution of more efficient solutions.", "keywords": "Machine Learning for Code;Program Synthesis;Program Optimization", "primary_area": "", "supplementary_material": "", "author": "Binghong Chen;Daniel Tarlow;Kevin Swersky;Martin Maas;Pablo Heiber;Ashish V Naik;Milad Hashemi;Parthasarathy Ranganathan", "authorids": "~Binghong_Chen1;~Daniel_Tarlow1;~Kevin_Swersky1;~Martin_Maas1;~Pablo_Heiber1;~Ashish_V_Naik1;~Milad_Hashemi1;~Parthasarathy_Ranganathan1", "gender": "M;;M;M;M;M;;M", "homepage": "http://binghongchen.net/;;http://www.cs.toronto.edu/~kswersky;http://www.martin-maas.com/;;;;http://www.parthasarathys.com", "dblp": "192/2022;;35/9381;https://dblp.uni-trier.de/pers/hd/m/Maas_0001:Martin;17/101.html;217/3855.html;127/9046;12/6848", "google_scholar": "6Px5HxsAAAAJ;;https://scholar.google.ca/citations?user=IrixA8MAAAAJ;yVLaR1QAAAAJ;;0gR9f0cAAAAJ;;S3gQoMgAAAAJ", "orcid": ";;;0000-0001-7653-8139;;;;0000-0002-9751-5902", "linkedin": "binghong-chen-91b697181/;;;;;ashish-naik-7a11041/;;partharanganathan/", "or_profile": "~Binghong_Chen1;~Daniel_Tarlow1;~Kevin_Swersky1;~Martin_Maas1;~Pablo_Heiber1;~Ashish_V_Naik1;~Milad_Hashemi1;~Parthasarathy_Ranganathan1", "aff": ";;Google Deepmind;Google;Computer Science Department, Universidad de Buenos Aires;;Google;Google", "aff_domain": ";;google.com;google.com;dc.uba.ar;;google.com;google.com", "position": ";;Research Scientist;Research Scientist;Lecturer;;Research Scientist;Researcher", "bibtex": "@misc{\nchen2023learning,\ntitle={Learning to Improve Code Efficiency},\nauthor={Binghong Chen and Daniel Tarlow and Kevin Swersky and Martin Maas and Pablo Heiber and Ashish V Naik and Milad Hashemi and Parthasarathy Ranganathan},\nyear={2023},\nurl={https://openreview.net/forum?id=935WW9F8ALr}\n}", "github": "", "project": "", "reviewers": "hCSu;39mZ;SDKK", "site": "https://openreview.net/forum?id=935WW9F8ALr", "pdf_size": 960236, "recommendation": "3;5;5", "confidence": "5;4;3", "correctness": "1;2;3", "technical_novelty": "3;4;2", "empirical_novelty": "1;2;2", "wc_summary_paper": "45;168;46", "wc_strength_and_weaknesses": "745;36;127", "wc_clarity_quality_novelty_and_reproducibility": "29;872;179", "wc_summary_review": "68;117;74", "wc_review": "887;1193;426", "wc_reply_reviewers": "156;0;0", "wc_reply_authors": "449;120;129", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 2.0, 0.816496580927726 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 86.33333333333333, 57.74849685393455 ], "wc_strength_and_weaknesses_avg": [ 302.6666666666667, 314.9754840548 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 360.0, 367.1811542004845 ], "wc_summary_review_avg": [ 86.33333333333333, 21.82251640444388 ], "wc_review_avg": [ 835.3333333333334, 315.25051766632976 ], "wc_reply_reviewers_avg": [ 52.0, 73.53910524340094 ], "wc_reply_authors_avg": [ 232.66666666666666, 153.01488671222666 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.8660254037844385, "corr_recommendation_correctness": 0.8660254037844385, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5519312551574019923&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "DeepMind;Google;Universidad de Buenos Aires", "aff_unique_dep": "DeepMind;Google;Computer Science Department", "aff_unique_url": "https://deepmind.com;https://www.google.com;https://www.db.uba.ar/", "aff_unique_abbr": "DeepMind;Google;UBA", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;2;1;1", "aff_country_unique": "United Kingdom;United States;Argentina" }, { "id": "94WEPuo8D_a", "title": "Joint Generator-Ranker Learning for Natural Language Generation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Generate-then-rank is a widely used mechanism for text generation, where a generator produces multiple candidates and a ranker chooses the best one. However, existing methods usually train the generator and the ranker separately, which causes a lack of mutual feedback and a misalignment of their objectives. This results in suboptimal generation quality. To address this issue, we propose JGR, a novel joint training algorithm that integrates the generator and the ranker in a single framework. JGR optimizes the generator with a hybrid objective that combines data likelihood and ranker reward, and trains the ranker with a contrastive loss that compares the generator outputs. By alternately updating the generator and the ranker, JGR can effectively harmonize their learning and enhance their quality jointly. We evaluate JGR on various text generation tasks and demonstrate that it surpasses existing methods on four public datasets across three common generation scenarios. We will make our code and models publicly available for reproducibility.\n", "keywords": "natural language processing;natural language generation", "primary_area": "", "supplementary_material": "", "author": "Weizhou Shen;Yeyun Gong;Yelong Shen;Song Wang;Xiaojun Quan;Nan Duan;Weizhu Chen", "authorids": "~Weizhou_Shen1;~Yeyun_Gong2;~Yelong_Shen2;~Song_Wang10;~Xiaojun_Quan1;~Nan_Duan1;~Weizhu_Chen1", "gender": "M;M;;M;M;M;M", "homepage": ";;;;https://sites.google.com/site/xiaojunquan/;https://nanduan.github.io/;https://www.microsoft.com/en-us/research/people/wzchen/", "dblp": "245/3622;06/10400.html;37/9376;62/3151-12;90/5936;;79/2536", "google_scholar": "387Sg1wAAAAJ;piUkwMYAAAAJ;;ho1SePQAAAAJ;dRpg4t8AAAAJ;Qaa6OxIAAAAJ;LG_E-4EAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Weizhou_Shen1;~Yeyun_Gong2;~Yelong_Shen2;~Song_Wang10;~Xiaojun_Quan1;~Nan_Duan1;~Weizhu_Chen1", "aff": "SUN YAT-SEN UNIVERSITY;Microsoft;;Microsoft Azure AI;SUN YAT-SEN UNIVERSITY;Microsoft Research Asia;Microsoft GenAI", "aff_domain": "sysu.edu.cn;microsoft.com;;microsoft.com;sysu.edu.cn;microsoft.com;microsoft.com", "position": "PhD student;Researcher;;Senior Applied Scientist;Full Professor;Principal Researcher;Vice President", "bibtex": "@misc{\nshen2023joint,\ntitle={Joint Generator-Ranker Learning for Natural Language Generation},\nauthor={Weizhou Shen and Yeyun Gong and Yelong Shen and Song Wang and Xiaojun Quan and Nan Duan and Weizhu Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=94WEPuo8D_a}\n}", "github": "", "project": "", "reviewers": "L47e;nPC9;xGN5;cvJv", "site": "https://openreview.net/forum?id=94WEPuo8D_a", "pdf_size": 455993, "recommendation": "6;6;6;6", "confidence": "4;3;4;4", "correctness": "3;3;3;3", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "54;83;58;72", "wc_strength_and_weaknesses": "75;266;232;101", "wc_clarity_quality_novelty_and_reproducibility": "23;31;45;80", "wc_summary_review": "33;46;64;106", "wc_review": "185;426;399;359", "wc_reply_reviewers": "0;96;0;0", "wc_reply_authors": "870;933;516;544", "reply_reviewers": "0;1;0;0", "reply_authors": "3;3;1;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 66.75, 11.519006033508273 ], "wc_strength_and_weaknesses_avg": [ 168.5, 81.91001159809466 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.75, 21.821720830401986 ], "wc_summary_review_avg": [ 62.25, 27.55335732719336 ], "wc_review_avg": [ 342.25, 93.86526247765997 ], "wc_reply_reviewers_avg": [ 24.0, 41.569219381653056 ], "wc_reply_authors_avg": [ 715.75, 187.34243379437558 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8170479421013054192&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1;0;1;1", "aff_unique_norm": "Sun Yat-sen University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "http://www.sysu.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "SYSU;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;1;0;0;1", "aff_country_unique": "China;United States" }, { "id": "94bybXmOLz-", "title": "Generative Adversarial Federated Model", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "As an emerging technique, vertical federated learning collaborates with different data sources to jointly train a machine learning model without data exchange. However, federated learning is computationally expensive and inefficient in modeling due to complex encryption algorithms or secure computation protocols. Split learning offers a solution to the high computational cost and low modeling efficiency of traditional federated learning using encryption algorithms or secure computation protocols. However, vanilla split learning still suffers privacy leakage, especially the label leakage from the active party. Here, we propose the Generative Adversarial Federated Model (GAFM) built upon the vanilla split learning framework and the Generative Adversarial Network (GAN) for improved label privacy protection against commonly used attacks. In our empirical studies on two publicly available datasets, GAFM showed significant performance improvement for prediction and label privacy protection compared to existing models, including Marvell and SplitNN, which is an application of split learning to neural networks. We provide intuition on why GAFM can improve over SplitNN and Marvell and demonstrate that GAFM offers label protection through gradient perturbation compared to SplitNN. ", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/ccb38a2aaffe012cf4180fc85227d02d56e44ab6.zip", "author": "Yujin Han;Leying Guan", "authorids": "~Yujin_Han1;~Leying_Guan1", "gender": "F;", "homepage": "https://yujinhanml.github.io/;https://campuspress.yale.edu/lguan/research/", "dblp": "317/6852;", "google_scholar": "https://scholar.google.co.kr/citations?user=SxpbS5YAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Yujin_Han1;~Leying_Guan1", "aff": "Yale University;Yale University", "aff_domain": "yale.edu;yale.edu", "position": "MS student;Assistant Professor", "bibtex": "@misc{\nhan2023generative,\ntitle={Generative Adversarial Federated Model},\nauthor={Yujin Han and Leying Guan},\nyear={2023},\nurl={https://openreview.net/forum?id=94bybXmOLz-}\n}", "github": "", "project": "", "reviewers": "EPy5;Dt6C;btEZ", "site": "https://openreview.net/forum?id=94bybXmOLz-", "pdf_size": 857571, "recommendation": "3;5;6", "confidence": "3;4;2", "correctness": "2;3;4", "technical_novelty": "3;3;4", "empirical_novelty": "2;3;4", "wc_summary_paper": "29;179;45", "wc_strength_and_weaknesses": "166;445;155", "wc_clarity_quality_novelty_and_reproducibility": "106;192;26", "wc_summary_review": "25;81;81", "wc_review": "326;897;307", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "35;67;51", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 84.33333333333333, 67.25738290742181 ], "wc_strength_and_weaknesses_avg": [ 255.33333333333334, 134.18974956712933 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 108.0, 67.78397057318689 ], "wc_summary_review_avg": [ 62.333333333333336, 26.398653164297777 ], "wc_review_avg": [ 510.0, 273.76023572949134 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 51.0, 13.063945294843617 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.3273268353539886, "corr_recommendation_correctness": 0.9819805060619659, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;0", "aff_unique_norm": "Yale University", "aff_unique_dep": "", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "96kgRrpnkgS", "title": "Topic and Hyperbolic Transformer to Handle Multi-modal Dependencies", "track": "main", "status": "Reject", "tldr": "", "abstract": "As multi-modal search relies on jointly learning image-text representations and has been investigated in the literature,\nour innovation is to develop Chimera, a framework in which to learn their representations and similarities.\nBecause the core of multi-modal search is learning the modalities in a shared semantic space and measuring their similarities,\nsearch quality depends on which expressive space is utilized in learning.\nThis motivates us to identify the space that can elucidate their semantic and complex relationships with small information loss.\nNovelty is assured by introducing the topic and hyperbolic as spaces,\nand performing contrastive/metric learning tasks to ensure the cooperation of these spaces with Transformer.\nExperiments show that Chimera empowers pre-trained models for multi-modal search tasks and demonstrate the ability of the layers it introduces.", "keywords": "Multi-modal search;Hyperbolic space;Hyperbolic geometry;Lorentz model;Transformer;Topic models", "primary_area": "", "supplementary_material": "", "author": "Noriaki Kawamae", "authorids": "~Noriaki_Kawamae1", "gender": "", "homepage": "", "dblp": "70/1704", "google_scholar": "https://scholar.google.co.jp/citations?user=ylU8pzwAAAAJ", "orcid": "0000-0002-0746-9624", "linkedin": "", "or_profile": "~Noriaki_Kawamae1", "aff": "NTT Comware", "aff_domain": "nttcom.co.jp", "position": "Evangelist", "bibtex": "@misc{\nkawamae2023topic,\ntitle={Topic and Hyperbolic Transformer to Handle Multi-modal Dependencies},\nauthor={Noriaki Kawamae},\nyear={2023},\nurl={https://openreview.net/forum?id=96kgRrpnkgS}\n}", "github": "", "project": "", "reviewers": "Gm2Z;hwZK;YYrR", "site": "https://openreview.net/forum?id=96kgRrpnkgS", "pdf_size": 263683, "recommendation": "5;5;5", "confidence": "3;4;4", "correctness": "3;4;3", "technical_novelty": "3;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "60;37;26", "wc_strength_and_weaknesses": "192;84;70", "wc_clarity_quality_novelty_and_reproducibility": "89;20;15", "wc_summary_review": "47;4;19", "wc_review": "388;145;130", "wc_reply_reviewers": "203;0;0", "wc_reply_authors": "715;443;166", "reply_reviewers": "2;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 41.0, 14.165686240583852 ], "wc_strength_and_weaknesses_avg": [ 115.33333333333333, 54.511976258025676 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.333333333333336, 33.76717669901087 ], "wc_summary_review_avg": [ 23.333333333333332, 17.82008853949821 ], "wc_review_avg": [ 221.0, 118.2455073142316 ], "wc_reply_reviewers_avg": [ 67.66666666666667, 95.69511772057942 ], "wc_reply_authors_avg": [ 441.3333333333333, 224.13140986682097 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:InTFnxGB9zMJ:scholar.google.com/&scioq=Topic+and+Hyperbolic+Transformer+to+Handle+Multi-modal+Dependencies&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "NTT Comware", "aff_unique_dep": "", "aff_unique_url": "https://www.ntt-comware.co.jp", "aff_unique_abbr": "", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "id": "98J48HZXxd5", "title": "Autoregressive Diffusion Model for Graph Generation", "track": "main", "status": "Reject", "tldr": "A new autoregressive diffusion model for graph generation", "abstract": " Diffusion-based graph generative models have recently obtained promising results for graph generation. However, existing diffusion-based graph generative models are all one-shot generative models that apply Gaussian diffusion in the dequantized adjacency matrix space. Such a strategy can suffer from difficulty in model training, slow sampling speed, and incapability of incorporating constraints. We propose an \\emph{autoregressive diffusion} model for graph generation. Unlike existing methods, we define a node-absorbing diffusion process that operates directly in the discrete graph space. For forward diffusion, we design a \\emph{diffusion ordering network}, which learns an optimal node absorbing ordering from graph topology. For reverse generation, we design a \\emph{denoising network} that uses the reverse node ordering to efficiently reconstruct the graph by predicting one row of the adjacency matrix at a time. Based on permutation invariance of graph generation, we show that the two networks can be jointly trained by optimizing a simple lower bound of data likelihood. Our experiments on six diverse datasets show that our model achieves better or comparable generation performance with previous state-of-the-art, and meanwhile enjoys fast generation speed.", "keywords": "graph generation;diffusion based generative model", "primary_area": "", "supplementary_material": "", "author": "Lingkai Kong;Jiaming Cui;Haotian Sun;Yuchen Zhuang;B. Aditya Prakash;Chao Zhang", "authorids": "~Lingkai_Kong1;~Jiaming_Cui1;~Haotian_Sun1;~Yuchen_Zhuang1;~B._Aditya_Prakash2;~Chao_Zhang15", "gender": "M;;M;M;;", "homepage": "https://lingkai-kong.com/;https://sites.cc.gatech.edu/~jcui75/index.html;https://haotiansun.tech/;https://night-chen.github.io/;https://www.cc.gatech.edu/~badityap/;http://chaozhang.org/", "dblp": "20/10253;;12/8162;191/5231.html;06/3956;94/3019-14", "google_scholar": "https://scholar.google.com/citations?hl=en;;lcWkVCQAAAAJ;T-f6XlEAAAAJ;C-NftTgAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-6480-513X;;0000-0001-9013-7016;;0000-0002-3252-455X;0000-0003-3009-598X", "linkedin": ";;haotian-sun-159597218/;;;", "or_profile": "~Lingkai_Kong1;~Jiaming_Cui1;~Haotian_Sun1;~Yuchen_Zhuang1;~B._Aditya_Prakash2;~Chao_Zhang15", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Adobe Systems;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;adobe.com;gatech.edu;gatech.edu", "position": "PhD student;PhD student;PhD student;Intern;Associate Professor;Assistant Professor", "bibtex": "@misc{\nkong2023autoregressive,\ntitle={Autoregressive Diffusion Model for Graph Generation},\nauthor={Lingkai Kong and Jiaming Cui and Haotian Sun and Yuchen Zhuang and B. Aditya Prakash and Chao Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=98J48HZXxd5}\n}", "github": "", "project": "", "reviewers": "dB1j;KoQP;uUZJ;EBGH", "site": "https://openreview.net/forum?id=98J48HZXxd5", "pdf_size": 678769, "recommendation": "1;6;6;6", "confidence": "5;4;4;3", "correctness": "1;4;4;3", "technical_novelty": "2;2;4;2", "empirical_novelty": "3;3;3;2", "wc_summary_paper": "23;65;21;73", "wc_strength_and_weaknesses": "291;179;38;145", "wc_clarity_quality_novelty_and_reproducibility": "91;55;7;29", "wc_summary_review": "60;49;18;35", "wc_review": "465;348;84;282", "wc_reply_reviewers": "1059;30;0;0", "wc_reply_authors": "4031;701;69;454", "reply_reviewers": "5;1;0;0", "reply_authors": "10;1;1;1", "recommendation_avg": [ 4.75, 2.165063509461097 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 1.224744871391589 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 45.5, 23.680160472429236 ], "wc_strength_and_weaknesses_avg": [ 163.25, 90.26177208541831 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.5, 31.284980421921315 ], "wc_summary_review_avg": [ 40.5, 15.724185193516387 ], "wc_review_avg": [ 294.75, 138.2016190209073 ], "wc_reply_reviewers_avg": [ 272.25, 454.395408757615 ], "wc_reply_authors_avg": [ 1313.75, 1584.8882255540925 ], "reply_reviewers_avg": [ 1.5, 2.0615528128088303 ], "reply_authors_avg": [ 3.25, 3.897114317029974 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.9428090415820632, "gs_citation": 71, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4846116731833002689&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Georgia Institute of Technology;Adobe", "aff_unique_dep": ";Adobe Systems Incorporated", "aff_unique_url": "https://www.gatech.edu;https://www.adobe.com", "aff_unique_abbr": "Georgia Tech;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Prompting GPT-3 To Be Reliable", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11550", "id": "98p5x51L5af", "poster": "", "openreview": "https://openreview.net/forum?id=98p5x51L5af", "slides": "https://iclr.cc/virtual/2023/poster/11550", "video": "https://iclr.cc/virtual/2023/poster/11550", "author_site": "Chenglei Si, Zhe Gan, Zhengyuan Yang, Shuohang Wang, Jianfeng Wang, Jordan Boyd-Graber, Lijuan Wang", "tldr": "We establish simple and effective prompting methods to make GPT-3 reliable in terms of: robustness, fairness, calibration, factuality. ", "abstract": "Large language models (LLMs) show impressive abilities via few-shot prompting. Commercialized APIs such as OpenAI GPT-3 further increase their use in real-world language applications. However, the crucial problem of how to improve the reliability of GPT-3 is still under-explored. While reliability is a broad and vaguely defined term, we decompose reliability into four main facets that correspond to the existing framework of ML safety and are well-recognized to be important: generalizability, social biases, calibration, and factuality. Our core contribution is to establish simple and effective prompts that improve GPT-3\u2019s reliability as it: 1) generalizes out-of-distribution, 2) balances demographic distribution and uses natural language instructions to reduce social biases, 3) calibrates output probabilities, and 4) updates the LLM\u2019s factual knowledge and reasoning chains. With appropriate prompts, GPT-3 is more reliable than smaller-scale supervised models on all these facets. We release all processed datasets, evaluation scripts, and model predictions. Our systematic empirical study not only sheds new insights on the reliability of prompting LLMs, but more importantly, our prompting strategies can help practitioners more reliably use LLMs like GPT-3.", "keywords": "prompting;GPT-3;large language models;reliability;robustness;biases;calibration;knowledge updating", "primary_area": "", "supplementary_material": "/attachment/d62a578352df4702ac8b12eaa7c7dab432865f51.zip", "author": "Chenglei Si;Zhe Gan;Zhengyuan Yang;Shuohang Wang;Jianfeng Wang;Jordan Lee Boyd-Graber;Lijuan Wang", "authorids": "~Chenglei_Si1;~Zhe_Gan1;~Zhengyuan_Yang1;~Shuohang_Wang1;~Jianfeng_Wang4;~Jordan_Lee_Boyd-Graber1;~Lijuan_Wang1", "gender": "M;M;M;M;M;M;F", "homepage": "https://noviscl.github.io/;http://zhegan27.github.io/;http://zhengyuan.info/;;;http://boydgraber.org;https://www.microsoft.com/en-us/research/people/lijuanw/", "dblp": "251/8778;41/7845;163/9713;173/5469.html;;57/5950;51/2527.html", "google_scholar": "https://scholar.google.com.sg/citations?user=CyKr1q8AAAAJ;E64XWyMAAAAJ;https://scholar.google.com/citations?hl=zh-CN;mN-IO6wAAAAJ;vJWEw_8AAAAJ;BT4XTP4AAAAJ;cDcWXuIAAAAJ", "orcid": ";;;;;0000-0002-7770-4431;", "linkedin": ";zhe-gan-a2229a78/;;;;jordan-boyd-graber-99a83994;", "or_profile": "~Chenglei_Si1;~Zhe_Gan1;~Zhengyuan_Yang1;~Shuohang_Wang1;~Jianfeng_Wang4;~Jordan_Lee_Boyd-Graber1;~Lijuan_Wang1", "aff": "Stanford University;Apple;Microsoft;Microsoft;Microsoft;University of Maryland, College Park;Microsoft", "aff_domain": "stanford.edu;apple.com;microsoft.com;microsoft.com;microsoft.com;umd.edu;microsoft.com", "position": "PhD student;Principal Researcher;Researcher;Researcher;Principal Researcher;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\nsi2023prompting,\ntitle={Prompting {GPT}-3 To Be Reliable},\nauthor={Chenglei Si and Zhe Gan and Zhengyuan Yang and Shuohang Wang and Jianfeng Wang and Jordan Lee Boyd-Graber and Lijuan Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=98p5x51L5af}\n}", "github": "", "project": "", "reviewers": "UbF3;d2Hh;S8uY;r9LV", "pdf_size": 1316275, "recommendation": "5;6;6;6", "confidence": "4;3;4;2", "correctness": "4;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "27;102;73;14", "wc_strength_and_weaknesses": "245;642;316;67", "wc_clarity_quality_novelty_and_reproducibility": "28;229;48;23", "wc_summary_review": "54;250;53;23", "wc_review": "354;1223;490;127", "wc_reply_reviewers": "0;64;47;0", "wc_reply_authors": "382;2071;502;181", "reply_reviewers": "0;1;1;0", "reply_authors": "1;4;1;1", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 54.0, 35.33411948810951 ], "wc_strength_and_weaknesses_avg": [ 317.5, 208.1519877397283 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 82.0, 85.38442480921213 ], "wc_summary_review_avg": [ 95.0, 90.35208907380061 ], "wc_review_avg": [ 548.5, 410.44640332204153 ], "wc_reply_reviewers_avg": [ 27.75, 28.39344114403888 ], "wc_reply_authors_avg": [ 784.0, 751.8487214859117 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.5222329678670935, "corr_recommendation_correctness": -1.0, "gs_citation": 287, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17725272328953429589&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=98p5x51L5af", "email": "stanford.edu;apple.com;microsoft.com;microsoft.com;microsoft.com;umd.edu;microsoft.com", "author_num": 7, "aff_unique_index": "0;1;2;2;2;3;2", "aff_unique_norm": "Stanford University;Apple;Microsoft;University of Maryland", "aff_unique_dep": ";Apple Inc.;Microsoft Corporation;", "aff_unique_url": "https://www.stanford.edu;https://www.apple.com;https://www.microsoft.com;https://www/umd.edu", "aff_unique_abbr": "Stanford;Apple;Microsoft;UMD", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Stanford;;College Park", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Distilling Model Failures as Directions in Latent Space", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11743", "id": "99RpBVpLiX", "poster": "/media/PosterPDFs/ICLR%202023/11743.png?t=1682724175.0475805", "openreview": "https://openreview.net/forum?id=99RpBVpLiX", "slides": "https://iclr.cc/virtual/2023/poster/11743", "video": "https://iclr.cc/virtual/2023/poster/11743", "author_site": "Saachi Jain, Hannah Lawrence, Ankur Moitra, Aleksander Madry", "tldr": "We present a scalable method for automatically distilling and captioning a model's failure modes as directions in a latent space.", "abstract": "Existing methods for isolating hard subpopulations and spurious correlations in datasets often require human intervention. This can make these methods labor-intensive and dataset-specific. To address these shortcomings, we present a scalable method for automatically distilling a model's failure modes. Specifically, we harness linear classifiers to identify consistent error patterns, and, in turn, induce a natural representation of these failure modes as directions within the feature space. We demonstrate that this framework allows us to discover and automatically caption challenging subpopulations within the training dataset. Moreover, by combining our framework with off-the-shelf diffusion models, we can generate images that are especially challenging for the analyzed model, and thus can be used to perform synthetic data augmentation that helps remedy the model's failure modes.", "keywords": "datasets;biases;subpopulations", "primary_area": "", "supplementary_material": "/attachment/cf68800d62e70258a55e3370c07c56e0ad7cd3e2.zip", "author": "Saachi Jain;Hannah Lawrence;Ankur Moitra;Aleksander Madry", "authorids": "~Saachi_Jain1;~Hannah_Lawrence1;~Ankur_Moitra1;~Aleksander_Madry1", "gender": "F;F;M;M", "homepage": "http://people.csail.mit.edu/saachij/;https://hannahlawrence.github.io/;http://people.csail.mit.edu/moitra/;https://people.csail.mit.edu/madry/", "dblp": "227/2617;251/5474;04/952;67/2454", "google_scholar": "6hsn3EYAAAAJ;;https://scholar.google.com.tw/citations?user=umFQktIAAAAJ;SupjsEUAAAAJ", "orcid": ";;;", "linkedin": ";hannah-lawrence-417b5a130/;;", "or_profile": "~Saachi_Jain1;~Hannah_Lawrence1;~Ankur_Moitra1;~Aleksander_Madry1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;PhD student;;Professor", "bibtex": "@inproceedings{\njain2023distilling,\ntitle={Distilling Model Failures as Directions in Latent Space},\nauthor={Saachi Jain and Hannah Lawrence and Ankur Moitra and Aleksander Madry},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=99RpBVpLiX}\n}", "github": "", "project": "", "reviewers": "ySph;ZiBp;HQAk;ZdLg", "pdf_size": 17244961, "recommendation": "6;8;8;8", "confidence": "3;4;4;3", "correctness": "3;4;3;4", "technical_novelty": "2;4;3;3", "empirical_novelty": "3;4;3;3", "wc_summary_paper": "41;75;126;96", "wc_strength_and_weaknesses": "224;253;254;122", "wc_clarity_quality_novelty_and_reproducibility": "32;19;16;31", "wc_summary_review": "32;28;15;46", "wc_review": "329;375;411;295", "wc_reply_reviewers": "0;72;0;0", "wc_reply_authors": "523;819;625;242", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 84.5, 30.97176133189716 ], "wc_strength_and_weaknesses_avg": [ 213.25, 54.04338534918034 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 24.5, 7.088723439378913 ], "wc_summary_review_avg": [ 30.25, 11.053845484717073 ], "wc_review_avg": [ 352.5, 44.121989982320606 ], "wc_reply_reviewers_avg": [ 18.0, 31.176914536239792 ], "wc_reply_authors_avg": [ 552.25, 208.30191429749271 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 86, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14192028416031944207&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=99RpBVpLiX", "email": "mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "99XwOpGYAH", "title": "Structural Adversarial Objectives for Self-Supervised Representation Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Within the framework of generative adversarial networks (GANs), we propose objectives that task the discriminator with additional structural modeling responsibilities. In combination with an efficient smoothness regularizer imposed on the network, these objectives guide the discriminator to learn to extract informative representations, while maintaining a generator capable of sampling from the domain. Specifically, we influence the features produced by the discriminator at two levels of granularity. At coarse scale, we impose a Gaussian assumption encouraging smoothness and diversified representation, while at finer scale, we group features forming local clusters. Experiments demonstrate that augmenting GANs with these self-supervised objectives suffices to produce discriminators which, evaluated in terms of representation learning, compete with networks trained by state-of-the-art contrastive approaches. Furthermore, operating within the GAN framework frees our system from the reliance on data augmentation schemes that is prevalent across purely contrastive representation learning methods.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Xiao Zhang;Michael Maire", "authorids": "~Xiao_Zhang11;~Michael_Maire1", "gender": "M;M", "homepage": "https://computerscience.uchicago.edu/people/profile/xiao-zhang/;http://people.cs.uchicago.edu/~mmaire/", "dblp": ";73/1498.html", "google_scholar": ";HXowq5YAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Xiao_Zhang11;~Michael_Maire1", "aff": "University of Chicago;University of Chicago", "aff_domain": "uchicago.edu;uchicago.edu", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nzhang2023structural,\ntitle={Structural Adversarial Objectives for Self-Supervised Representation Learning},\nauthor={Xiao Zhang and Michael Maire},\nyear={2023},\nurl={https://openreview.net/forum?id=99XwOpGYAH}\n}", "github": "", "project": "", "reviewers": "XWaT;GyrJ;9r54", "site": "https://openreview.net/forum?id=99XwOpGYAH", "pdf_size": 3131784, "recommendation": "3;5;6", "confidence": "4;3;4", "correctness": "2;3;3", "technical_novelty": "2;4;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "136;83;85", "wc_strength_and_weaknesses": "482;527;197", "wc_clarity_quality_novelty_and_reproducibility": "46;22;54", "wc_summary_review": "69;42;20", "wc_review": "733;674;356", "wc_reply_reviewers": "389;753;0", "wc_reply_authors": "2132;1511;436", "reply_reviewers": "2;3;0", "reply_authors": "4;4;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 101.33333333333333, 24.526629518862872 ], "wc_strength_and_weaknesses_avg": [ 402.0, 146.11639196202458 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.666666666666664, 13.59738536958076 ], "wc_summary_review_avg": [ 43.666666666666664, 20.038851153585515 ], "wc_review_avg": [ 587.6666666666666, 165.57442099819914 ], "wc_reply_reviewers_avg": [ 380.6666666666667, 307.46743278308713 ], "wc_reply_authors_avg": [ 1359.6666666666667, 700.6094172615406 ], "reply_reviewers_avg": [ 1.6666666666666667, 1.247219128924647 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.18898223650461363, "corr_recommendation_correctness": 0.9449111825230683, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1986005654652160777&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uchicago.edu", "aff_unique_abbr": "UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "9AuIMiZhkL2", "title": "Ti-MAE: Self-Supervised Masked Time Series Autoencoders", "track": "main", "status": "Reject", "tldr": "Self-Supervised Masked Time Series Autoencoders for time series forecasting and classification", "abstract": "Multivariate Time Series forecasting has been an increasingly popular topic in various applications and scenarios. Recently, contrastive learning and Transformer-based models have achieved good performance in many long-term series forecasting tasks. However, there are still several issues in existing methods. First, the training paradigm of contrastive learning and downstream prediction tasks are inconsistent, leading the accuracy of prediction not good enough. Second, existing Transformer-based models which learn similar patterns in historical time series data to predict future values always induces severe distribution shift problems, and does not fully leverage the sequence information compared to self-supervised methods. To address these issues, we propose a novel framework named Ti-MAE, in which the input time series are assumed to follow an integrate distribution. In detail, Ti-MAE randomly masks out embedded time series data and learns an autoencoder to reconstruct them at the point-level. Ti-MAE adopts mask modeling as the auxiliary task rather than contrastive learning and bridges the connection between existing representation learning and generative Transformer-based methods, reducing the difference between upstream and downstream forecasting tasks while maintaining the utilization of original time series data. Experiments on several public real-world datasets demonstrate that our framework of masked autoencoding could learn strong representations directly from the raw data, yielding better performance in time series forecasting and classification tasks. The code will be made public after this paper is accepted.", "keywords": "Time-Series;Autoencoders;Representation Learning;Self-Supervised Learning", "primary_area": "", "supplementary_material": "/attachment/68be4ade099e8502a98b0efafd053752349bbb48.zip", "author": "Zhe Li;Pengyun Wang;Zhongwen Rao;Lujia Pan;Zenglin Xu", "authorids": "~Zhe_Li13;~Pengyun_Wang1;~Zhongwen_Rao1;~Lujia_Pan2;~Zenglin_Xu1", "gender": "M;M;M;F;M", "homepage": "https://plumprc.github.io/;;;;https://faculty.fudan.edu.cn/xuzenglin/en/index.htm", "dblp": ";130/5404.html;338/6279;72/11359.html;68/1538", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;gF0H9nEAAAAJ", "orcid": ";;;0000-0002-8988-4740;0000-0001-5550-6461", "linkedin": ";;;lujia-pan-246909a/?originalSubdomain=cn;", "or_profile": "~Zhe_Li13;~Pengyun_Wang1;~Zhongwen_Rao1;~Lujia_Pan2;~Zenglin_Xu1", "aff": "Harbin Institute of Technology, Shenzhen;Noah's Ark Lab, Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Harbin Institute of Technology Shenzhen", "aff_domain": "hit.edu.cn;huawei.com;huawei.com;huawei.com;hit.edu.cn", "position": "MS student;Researcher;Researcher;Researcher;Full Professor", "bibtex": "@misc{\nli2023timae,\ntitle={Ti-{MAE}: Self-Supervised Masked Time Series Autoencoders},\nauthor={Zhe Li and Pengyun Wang and Zhongwen Rao and Lujia Pan and Zenglin Xu},\nyear={2023},\nurl={https://openreview.net/forum?id=9AuIMiZhkL2}\n}", "github": "", "project": "", "reviewers": "o7kt;5eEm;jXSb;6CpA", "site": "https://openreview.net/forum?id=9AuIMiZhkL2", "pdf_size": 441529, "recommendation": "5;5;5;6", "confidence": "4;4;3;4", "correctness": "3;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "39;57;73;13", "wc_strength_and_weaknesses": "254;181;103;41", "wc_clarity_quality_novelty_and_reproducibility": "63;27;18;16", "wc_summary_review": "43;170;128;42", "wc_review": "399;435;322;112", "wc_reply_reviewers": "178;0;0;0", "wc_reply_authors": "1858;986;536;493", "reply_reviewers": "1;0;0;0", "reply_authors": "4;2;2;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 45.5, 22.28788908802267 ], "wc_strength_and_weaknesses_avg": [ 144.75, 80.2445481014131 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.0, 18.934096228761486 ], "wc_summary_review_avg": [ 95.75, 55.28279569631044 ], "wc_review_avg": [ 317.0, 125.19784343190581 ], "wc_reply_reviewers_avg": [ 44.5, 77.07626093681505 ], "wc_reply_authors_avg": [ 968.25, 548.7879257964775 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14829556359561006719&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "Harbin Institute of Technology;Huawei", "aff_unique_dep": ";Noah's Ark Lab", "aff_unique_url": "http://en.hhit.edu.cn/;https://www.huawei.com", "aff_unique_abbr": "HIT;Huawei", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "9BXSGPfRhX", "title": "Improving Aspect Ratio Distribution Fairness in Detector Pretraining via Cooperating RPN\u2019s", "track": "main", "status": "Reject", "tldr": "We propose Cooperating RPN\u2019s for improving the fairness to object aspect ratio distribution in few-shot object detection.", "abstract": "Region proposal networks (RPN) are a key component of modern object detectors. An RPN identifies image boxes likely to contain objects, and so worth further investigation. An RPN false negative is unrecoverable, so the performance of an object detector can be significantly affected by RPN behavior, particularly in low-data regimes. The RPN for a few shot detector is trained on base classes. Our experiments demonstrate that, if the distribution of box aspect ratios for base classes is different from that for novel classes, errors caused by RPN failure to propose a good box become significant. This is predictable: for example, an RPN trained on base classes that are mostly square will tend to miss short wide boxes. It has not been noticed to date because the (relatively few) standard base/novel class splits on current datasets do not display this effect. But changing the base/novel split highlights the problem. We describe datasets where the distribution shift is severe using PASCAL VOC, COCO, and LVIS datasets.\nWe show that the effect can be mitigated by training multiple distinct but cooperating specialized RPNs. Each specializes in a different aspect ratio, but cooperation constraints reduce the extent to which the RPNs are tuned. This means that if a box is missed by one RPN, it has a good chance of being picked up by another. Experimental evaluation confirms this approach results in substantial improvements in performance on the ARShift benchmarks, while remaining comparable to SOTA on conventional splits. Our approach applies to any few-shot detector and consistently improves performance of detectors.", "keywords": "Few-Shot Learning;Object Detection;Distribution Shift", "primary_area": "", "supplementary_material": "/attachment/1ab542d9feab2532901856028eddb6bf5b101a40.zip", "author": "Weilin Zhang;Xiang Li;Yu-Xiong Wang;David Forsyth", "authorids": "~Weilin_Zhang1;~Xiang_Li19;~Yu-Xiong_Wang1;~David_Forsyth1", "gender": ";M;;M", "homepage": ";https://ryanxli.github.io/;https://yxw.cs.illinois.edu/;https://cs.illinois.edu/directory/profile/daf", "dblp": ";;35/10700;f/DavidAForsyth", "google_scholar": ";3Ds7hOQAAAAJ;T_Q-xDkAAAAJ;https://scholar.google.com.tw/citations?user=5H0arvkAAAAJ", "orcid": ";;;0000-0002-2278-0752", "linkedin": "weilin-zhang/;;;", "or_profile": "~Weilin_Zhang1;~Xiang_Li19;~Yu-Xiong_Wang1;~David_Forsyth1", "aff": ";University of Illinois, Urbana Champaign;Department of Computer Science, University of Illinois Urbana-Champaign;University of Illinois, Urbana-Champaign", "aff_domain": ";illinois.edu;cs.illinois.edu;uiuc.edu", "position": ";PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nzhang2023improving,\ntitle={Improving Aspect Ratio Distribution Fairness in Detector Pretraining via Cooperating {RPN}{\\textquoteright}s},\nauthor={Weilin Zhang and Xiang Li and Yu-Xiong Wang and David Forsyth},\nyear={2023},\nurl={https://openreview.net/forum?id=9BXSGPfRhX}\n}", "github": "", "project": "", "reviewers": "haU4;ZdBo;NdSa;UxEa", "site": "https://openreview.net/forum?id=9BXSGPfRhX", "pdf_size": 2895134, "recommendation": "1;3;3;5", "confidence": "5;4;5;5", "correctness": "3;3;2;3", "technical_novelty": "1;3;3;3", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "152;143;93;81", "wc_strength_and_weaknesses": "490;480;258;120", "wc_clarity_quality_novelty_and_reproducibility": "51;159;22;8", "wc_summary_review": "33;57;75;20", "wc_review": "726;839;448;229", "wc_reply_reviewers": "0;172;0;72", "wc_reply_authors": "1499;989;870;372", "reply_reviewers": "0;1;0;1", "reply_authors": "2;3;2;1", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 117.25, 30.711357833869865 ], "wc_strength_and_weaknesses_avg": [ 337.0, 155.87494987970325 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.0, 59.22415047934415 ], "wc_summary_review_avg": [ 46.25, 21.25294097295713 ], "wc_review_avg": [ 560.5, 238.4853244960788 ], "wc_reply_reviewers_avg": [ 61.0, 70.50531894828929 ], "wc_reply_authors_avg": [ 932.5, 400.68098282798496 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:V6L7I3bCHzkJ:scholar.google.com/&scioq=Improving+Aspect+Ratio+Distribution+Fairness+in+Detector+Pretraining+via+Cooperating+RPN%E2%80%99s&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Illinois", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://illinois.edu", "aff_unique_abbr": "UIUC;UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "9CGiwZeCAd", "title": "Closing the Performance Gap between Cumbersome and Lightweight Contrastive Models", "track": "main", "status": "Withdraw", "tldr": "We successfully improve the linear evaluation results from 36.3\\% to 62.3\\% of MobileNet-V3-Large and from 42.2\\% to 65.8\\% of EfficientNet-B0 on ImageNet, closing the accuracy gap to ResNet-50 which contains $5\\times$ parameters.", "abstract": "While self-supervised contrastive learning has made continuous progress utilizing big models, the performance lags far behind when the model size decreases. A common practice to address this problem requires a two-stage training procedure, where a larger model is pretrained in a self-supervised manner first, then its representational knowledge is transferred to a smaller model in the second stage. Despite its effectiveness, this method is highly time-consuming and is inapplicable to some resource-limited scenarios. In this work, we are aiming at directly training a lightweight contrastive model with satisfactory performance in the absence of a pretrained teacher model. Specifically, by empirically exploring the training recipes (e.g., MLP, lower temperature, et al), we boost the accuracy of different lightweight models by a large margin. Besides, we observe that smaller models are more sensitive to noisy labels, and propose a smooth version of InfoNCE loss to alleviate this problem. With these combined techniques, we successfully improve the linear evaluation results from 36.3\\% to 62.3\\% of MobileNet-V3-Large and from 42.2\\% to 65.8\\% of EfficientNet-B0 on ImageNet, closing the accuracy gap to ResNet-50 which contains $5\\times$ parameters. These results suggest the feasibility to train lightweight self-supervised models without distillation.\n\n", "keywords": "self-supervised learning;contrastive learning;lightweight model", "primary_area": "", "supplementary_material": "/attachment/e3dd9e3f84c75a8302e55b77e45f0191f2e5d3a3.zip", "author": "Wenye Lin;Yifeng Ding;Zhixiong Cao;Hai-Tao Zheng", "authorids": "~Wenye_Lin1;~Yifeng_Ding1;~Zhixiong_Cao1;~Hai-Tao_Zheng2", "gender": ";M;M;M", "homepage": ";https://github.com/VERSPD0;https://github.com/czxnlp;https://www.sigs.tsinghua.edu.cn/fg3/105069.jhtml", "dblp": ";;;20/134-2", "google_scholar": ";;;https://scholar.google.com.hk/citations?user=7VPeORoAAAAJ", "orcid": ";;;0000-0001-5128-5649", "linkedin": "wenye-lin-a816b5182/;;;", "or_profile": "~Wenye_Lin1;~Yifeng_Ding1;~Zhixiong_Cao1;~Hai-Tao_Zheng2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;MS student;MS student;Associate Professor", "bibtex": "@misc{\nlin2023closing,\ntitle={Closing the Performance Gap between Cumbersome and Lightweight Contrastive Models},\nauthor={Wenye Lin and Yifeng Ding and Zhixiong Cao and Hai-Tao Zheng},\nyear={2023},\nurl={https://openreview.net/forum?id=9CGiwZeCAd}\n}", "github": "", "project": "", "reviewers": "tXZs;FSQQ;xL2r;Jemd;cYjm", "site": "https://openreview.net/forum?id=9CGiwZeCAd", "pdf_size": 1260306, "recommendation": "3;3;3;5;6", "confidence": "4;3;4;4;4", "correctness": "3;3;3;3;3", "technical_novelty": "2;2;1;2;2", "empirical_novelty": "2;3;3;3;3", "wc_summary_paper": "36;62;51;87;69", "wc_strength_and_weaknesses": "335;194;270;202;256", "wc_clarity_quality_novelty_and_reproducibility": "15;13;42;26;74", "wc_summary_review": "17;49;14;28;44", "wc_review": "403;318;377;343;443", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 4.0, 1.2649110640673518 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 1.8, 0.4 ], "empirical_novelty_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_paper_avg": [ 61.0, 17.123083834403193 ], "wc_strength_and_weaknesses_avg": [ 251.4, 51.168740457431625 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.0, 22.494443758403985 ], "wc_summary_review_avg": [ 30.4, 14.03709371629327 ], "wc_review_avg": [ 376.8, 43.974538087397804 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.39528470752104744, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:c9-zuwyWMakJ:scholar.google.com/&scioq=Closing+the+Performance+Gap+between+Cumbersome+and+Lightweight+Contrastive+Models&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "9D5FH6LFbRu", "title": "Functional Risk Minimization", "track": "main", "status": "Reject", "tldr": "We propose to model uncertainty in function space rather than output space. We derive a learning framework, with experimental results, and show connections to recent theory on over-paramterized generalization.", "abstract": "In this work, we break the classic assumption of data coming from a single function $f_{\\theta^*}(x)$ followed by some noise in output space $p(y|f_{\\theta^*}(x))$. Instead, we model each data point $(x_i,y_i)$ as coming from its own function $f_{\\theta_i}$. We show that this model subsumes Empirical Risk Minimization for many common loss functions, and provides an avenue for more realistic noise processes. We derive Functional Risk Minimization~(FRM), a general framework for scalable training objectives which results in better performance in small experiments in regression and reinforcement learning. We also show that FRM can be seen as finding the simplest model that memorizes the training data, providing an avenue towards understanding generalization in the over-parameterized regime.", "keywords": "learning framework;theory;meta-learning;supervised learning", "primary_area": "", "supplementary_material": "/attachment/9cc2dc4c0ac6df87a9003739ff13bab4a3539a52.zip", "author": "Ferran Alet;Clement Gehring;Tom\u00e1s Lozano-P\u00e9rez;Joshua B. Tenenbaum;Leslie Pack Kaelbling", "authorids": "~Ferran_Alet1;~Clement_Gehring1;~Tom\u00e1s_Lozano-P\u00e9rez1;~Joshua_B._Tenenbaum1;~Leslie_Pack_Kaelbling1", "gender": "M;M;M;;F", "homepage": "http://alet-etal.com;http://people.csail.mit.edu/gehring/;http://people.csail.mit.edu/tlp/;;http://people.csail.mit.edu/lpk/", "dblp": "207/7626;131/5247;90/752;t/JoshuaBTenenbaum;k/LesliePackKaelbling", "google_scholar": "1lmBq3QAAAAJ;KvX7mJUAAAAJ;gQOKAggAAAAJ;;IcasIiwAAAAJ", "orcid": ";;;;0000-0001-6054-7145", "linkedin": ";;;;", "or_profile": "~Ferran_Alet1;~Clement_Gehring1;~Tom\u00e1s_Lozano-P\u00e9rez1;~Joshua_B._Tenenbaum1;~Leslie_Pack_Kaelbling1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;PhD student;Full Professor;Professor;Full Professor", "bibtex": "@misc{\nalet2023functional,\ntitle={Functional Risk Minimization},\nauthor={Ferran Alet and Clement Gehring and Tom{\\'a}s Lozano-P{\\'e}rez and Joshua B. Tenenbaum and Leslie Pack Kaelbling},\nyear={2023},\nurl={https://openreview.net/forum?id=9D5FH6LFbRu}\n}", "github": "", "project": "", "reviewers": "vBxE;6yNo;EeLk", "site": "https://openreview.net/forum?id=9D5FH6LFbRu", "pdf_size": 10824120, "recommendation": "3;5;6", "confidence": "3;3;4", "correctness": "2;3;3", "technical_novelty": "4;3;4", "empirical_novelty": "3;3;3", "wc_summary_paper": "104;52;73", "wc_strength_and_weaknesses": "503;220;71", "wc_clarity_quality_novelty_and_reproducibility": "29;20;18", "wc_summary_review": "40;43;15", "wc_review": "676;335;177", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "731;557;508", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 76.33333333333333, 21.35935912480106 ], "wc_strength_and_weaknesses_avg": [ 264.6666666666667, 179.16906975132608 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 22.333333333333332, 4.784233364802441 ], "wc_summary_review_avg": [ 32.666666666666664, 12.552113589175153 ], "wc_review_avg": [ 396.0, 208.2322421400362 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 598.6666666666666, 95.68815089770636 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.7559289460184545, "corr_recommendation_correctness": 0.9449111825230683, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pErOGPWLWmoJ:scholar.google.com/&scioq=Functional+Risk+Minimization&hl=en&as_sdt=0,44", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Gradient-Guided Importance Sampling for Learning Binary Energy-Based Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12032", "id": "9DZKk85Z4zA", "poster": "", "openreview": "https://openreview.net/forum?id=9DZKk85Z4zA", "slides": "https://iclr.cc/virtual/2023/poster/12032", "video": "https://iclr.cc/virtual/2023/poster/12032", "author_site": "Meng Liu, Haoran Liu, Shuiwang Ji", "tldr": "", "abstract": "Learning energy-based models (EBMs) is known to be difficult especially on discrete data where gradient-based learning strategies cannot be applied directly. Although ratio matching is a sound method to learn discrete EBMs, it suffers from expensive computation and excessive memory requirements, thereby resulting in difficulties in learning EBMs on high-dimensional data. Motivated by these limitations, in this study, we propose ratio matching with gradient-guided importance sampling (RMwGGIS). Particularly, we use the gradient of the energy function w.r.t. the discrete data space to approximately construct the provably optimal proposal distribution, which is subsequently used by importance sampling to efficiently estimate the original ratio matching objective. We perform experiments on density modeling over synthetic discrete data, graph generation, and training Ising models to evaluate our proposed method. The experimental results demonstrate that our method can significantly alleviate the limitations of ratio matching, perform more effectively in practice, and scale to high-dimensional problems. Our implementation is available at https://github.com/divelab/RMwGGIS.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Meng Liu;Haoran Liu;Shuiwang Ji", "authorids": "~Meng_Liu3;~Haoran_Liu1;~Shuiwang_Ji1", "gender": "M;;M", "homepage": "https://mengliu1998.github.io;;http://people.tamu.edu/~sji", "dblp": "41/7841-15;;84/6405", "google_scholar": "https://scholar.google.com/citations?hl=en;;BZGj6sAAAAAJ", "orcid": ";;0000-0002-4205-4563", "linkedin": "meng-liu-4a1813197/;;shuiwang-ji-9a040715/", "or_profile": "~Meng_Liu3;~Haoran_Liu1;~Shuiwang_Ji1", "aff": "Texas A&M University - College Station;;Texas A&M University", "aff_domain": "tamu.edu;;tamu.edu", "position": "PhD student;;Professor", "bibtex": "@inproceedings{\nliu2023gradientguided,\ntitle={Gradient-Guided Importance Sampling for Learning Binary Energy-Based Models},\nauthor={Meng Liu and Haoran Liu and Shuiwang Ji},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9DZKk85Z4zA}\n}", "github": "", "project": "", "reviewers": "wsgm;D4Cv;KMq7;pBMS", "pdf_size": 11107914, "recommendation": "5;6;8;8", "confidence": "4;3;4;4", "correctness": "3;3;4;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "49;118;129;66", "wc_strength_and_weaknesses": "162;170;231;46", "wc_clarity_quality_novelty_and_reproducibility": "40;98;22;35", "wc_summary_review": "77;63;94;13", "wc_review": "328;449;476;160", "wc_reply_reviewers": "0;0;85;0", "wc_reply_authors": "598;490;1010;431", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;3;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 90.5, 33.76758800980609 ], "wc_strength_and_weaknesses_avg": [ 152.25, 66.89684222741758 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.75, 29.18368551091517 ], "wc_summary_review_avg": [ 61.75, 30.21071829665756 ], "wc_review_avg": [ 353.25, 124.71843287982735 ], "wc_reply_reviewers_avg": [ 21.25, 36.80607966083864 ], "wc_reply_authors_avg": [ 632.25, 226.16628285400986 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.9622504486493761, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9809580698156727772&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=9DZKk85Z4zA", "email": "tamu.edu;;tamu.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "0", "aff_campus_unique": "College Station;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Optimizing Bi-Encoder for Named Entity Recognition via Contrastive Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11046", "id": "9EAQVEINuum", "poster": "", "openreview": "https://openreview.net/forum?id=9EAQVEINuum", "slides": "https://iclr.cc/virtual/2023/poster/11046", "video": "https://iclr.cc/virtual/2023/poster/11046", "author_site": "Sheng Zhang, Hao Cheng, Jianfeng Gao, Hoifung Poon", "tldr": "", "abstract": "We present a bi-encoder framework for named entity recognition (NER), which applies contrastive learning to map candidate text spans and entity types into the same vector representation space. Prior work predominantly approaches NER as sequence labeling or span classification. We instead frame NER as a representation learning problem that maximizes the similarity between the vector representations of an entity mention and its type. This makes it easy to handle nested and flat NER alike, and can better leverage noisy self-supervision signals. A major challenge to this bi-encoder formulation for NER lies in separating non-entity spans from entity mentions. Instead of explicitly labeling all non-entity spans as the same class $\\texttt{Outside}$ ($\\texttt{O}$) as in most prior methods, we introduce a novel dynamic thresholding loss, learned in conjunction with the standard contrastive loss. Experiments show that our method performs well in both supervised and distantly supervised settings, for nested and flat NER alike, establishing new state of the art across standard datasets in the general domain (e.g., ACE2004, ACE2005, CoNLL2003) and high-value verticals such as biomedicine (e.g., GENIA, NCBI, BC5CDR, JNLPBA). We release the code at https://github.com/microsoft/binder.", "keywords": "Named Entity Recognition;NER;Bi-Encoder;Contrastive Learning", "primary_area": "", "supplementary_material": "", "author": "Sheng Zhang;Hao Cheng;Jianfeng Gao;Hoifung Poon", "authorids": "~Sheng_Zhang9;~Hao_Cheng4;~Jianfeng_Gao1;~Hoifung_Poon1", "gender": "M;M;M;M", "homepage": "https://sheng-z.github.io/;https://sites.google.com/site/hcheng2site/Home;https://www.microsoft.com/en-us/research/people/jfgao/;https://www.microsoft.com/en-us/research/people/hoifung/", "dblp": "69/6137-12;09/5158-2;92/5339;78/4609", "google_scholar": "-LVEXQ8AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;yqqmVbkAAAAJ", "orcid": ";0000-0001-7988-3149;;0000-0002-9067-0918", "linkedin": "sheng-z/;;;hoifung-poon-9559943/", "or_profile": "~Sheng_Zhang9;~Hao_Cheng4;~Jianfeng_Gao1;~Hoifung_Poon1", "aff": "Microsoft;Microsoft Research;Microsoft Research;Microsoft", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com", "position": "Researcher;Researcher;Principal Researcher;General Manager", "bibtex": "@inproceedings{\nzhang2023optimizing,\ntitle={Optimizing Bi-Encoder for Named Entity Recognition via Contrastive Learning},\nauthor={Sheng Zhang and Hao Cheng and Jianfeng Gao and Hoifung Poon},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9EAQVEINuum}\n}", "github": "", "project": "", "reviewers": "QTaJ;FXLT;mFSQ;wC16", "pdf_size": 506641, "recommendation": "5;6;6;8", "confidence": "4;4;4;4", "correctness": "3;3;2;4", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "32;88;72;104", "wc_strength_and_weaknesses": "63;89;149;437", "wc_clarity_quality_novelty_and_reproducibility": "57;42;20;69", "wc_summary_review": "16;51;36;27", "wc_review": "168;270;277;637", "wc_reply_reviewers": "0;0;99;24", "wc_reply_authors": "748;492;812;628", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 74.0, 26.758176320519304 ], "wc_strength_and_weaknesses_avg": [ 184.5, 149.07967668330917 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.0, 18.289341158171883 ], "wc_summary_review_avg": [ 32.5, 12.816005617976296 ], "wc_review_avg": [ 338.0, 177.93678652824997 ], "wc_reply_reviewers_avg": [ 30.75, 40.604033050917494 ], "wc_reply_authors_avg": [ 670.0, 122.16382443260362 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.6488856845230502, "gs_citation": 68, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2431243175994725987&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=9EAQVEINuum", "email": "microsoft.com;microsoft.com;microsoft.com;microsoft.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "9EcAsB7wgM", "title": "Take 5: Interpretable Image Classification with a Handful of Features", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep Neural Networks use thousands of mostly incomprehensible features to identify a single class, a decision no human can follow. We propose an interpretable sparse and low dimensional final decision layer in a deep neural network with measurable aspects of interpretability and demonstrate it on fine-grained image classification. We argue that a human can only understand the decision of a machine learning model, if the input features are interpretable and only very few of them are used for a single decision. For that matter, the final layer has to be sparse and \u2013 to make interpreting the features feasible \u2013 low dimensional. We call a model with a Sparse Low-Dimensional Decision \u201cSLDD-Model\u201d. We show that a SLDD-Model is easier to interpret locally and globally than a dense high-dimensional decision layer while being able to maintain competitive accuracy. Additionally, we propose a loss function that improves a model\u2019s feature diversity and accuracy. Our interpretable SLDD-Model only uses 5 out of just 50 features per class, while maintaining 97% to 100% of the accuracy on four common benchmark datasets compared to the baseline model with 2048 features.", "keywords": "xAI;interpretability;fine-grained image classification;sparsity;image classification;interpretability by design", "primary_area": "", "supplementary_material": "/attachment/d324922e9e890876b2b3587a30c5a9a4bbe0df3a.zip", "author": "Thomas Norrenbrock;Marco Rudolph;Bodo Rosenhahn", "authorids": "~Thomas_Norrenbrock1;~Marco_Rudolph1;~Bodo_Rosenhahn1", "gender": "M;M;M", "homepage": ";https://www.tnt.uni-hannover.de/en/staff/rudolph/;http://www.tnt.uni-hannover.de/staff/rosenhahn/", "dblp": "344/0668;246/4999;09/2973", "google_scholar": ";https://scholar.google.de/citations?user=XpayVjAAAAAJ;qq3TxtcAAAAJ\\", "orcid": "0000-0001-7284-0568;;", "linkedin": "thomas-norrenbrock-8812111b2/;https://www.linkedin.com/mwlite/in/marco-rudolph-118a3024a;b-rosenhahn-a397b1183/", "or_profile": "~Thomas_Norrenbrock1;~Marco_Rudolph1;~Bodo_Rosenhahn1", "aff": "Universit\u00e4t Hannover;Universit\u00e4t Hannover;Institut f\u00fcr Informationsverarbeitung", "aff_domain": "uni-hannover.de;uni-hannover.de;tnt.uni-hannover.de", "position": "PhD student;PhD student;Professor", "bibtex": "@misc{\nnorrenbrock2023take,\ntitle={Take 5: Interpretable Image Classification with a Handful of Features},\nauthor={Thomas Norrenbrock and Marco Rudolph and Bodo Rosenhahn},\nyear={2023},\nurl={https://openreview.net/forum?id=9EcAsB7wgM}\n}", "github": "", "project": "", "reviewers": "Vk6D;iicb;9CdM", "site": "https://openreview.net/forum?id=9EcAsB7wgM", "pdf_size": 47376289, "recommendation": "3;5;5", "confidence": "3;4;4", "correctness": "2;3;3", "technical_novelty": "3;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "57;50;87", "wc_strength_and_weaknesses": "238;203;360", "wc_clarity_quality_novelty_and_reproducibility": "2;4;52", "wc_summary_review": "53;35;128", "wc_review": "350;292;627", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "615;565;518", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 64.66666666666667, 16.048537489614297 ], "wc_strength_and_weaknesses_avg": [ 267.0, 67.29536883520787 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 19.333333333333332, 23.113247764479624 ], "wc_summary_review_avg": [ 72.0, 40.27406113120453 ], "wc_review_avg": [ 423.0, 146.18025402449769 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 566.0, 39.60639678974429 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.9999999999999998, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1154830473207687025&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Hanover;Institut f\u00fcr Informationsverarbeitung", "aff_unique_dep": ";Department of Information Processing", "aff_unique_url": "https://www.uni-hannover.de;", "aff_unique_abbr": "Uni Hanover;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Generalization and Estimation Error Bounds for Model-based Neural Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11496", "id": "9F_xlC7sk9", "poster": "", "openreview": "https://openreview.net/forum?id=9F_xlC7sk9", "slides": "https://iclr.cc/virtual/2023/poster/11496", "video": "https://iclr.cc/virtual/2023/poster/11496", "author_site": "Avner Shultzman, Eyar Azar, Miguel Rodrigues, Yonina Eldar", "tldr": "", "abstract": "Model-based neural networks provide unparalleled performance for various tasks, such as sparse coding and compressed sensing problems. Due to the strong connection with the sensing model, these networks are interpretable and inherit prior structure of the problem. In practice, model-based neural networks exhibit higher generalization capability compared to ReLU neural networks. However, this phenomenon was not addressed theoretically. Here, we leverage complexity measures including the global and local Rademacher complexities, in order to provide upper bounds on the generalization and estimation errors of model-based networks. We show that the generalization abilities of model-based networks for sparse recovery outperform those of regular ReLU networks, and derive practical design rules that allow to construct model-based networks with guaranteed high generalization. We demonstrate through a series of experiments that our theoretical insights shed light on a few behaviours experienced in practice, including the fact that ISTA and ADMM networks exhibit higher generalization abilities (especially for small number of training samples), compared to ReLU networks.", "keywords": "Model based neural networks;Generalization error;Estimation error;Local Rademacher complexity.", "primary_area": "", "supplementary_material": "/attachment/7bc9bfdbf79066e55f08430a9a416e37cac45221.zip", "author": "Avner Shultzman;Eyar Azar;Miguel R. D. Rodrigues;Yonina C. Eldar", "authorids": "~Avner_Shultzman1;~Eyar_Azar1;~Miguel_R._D._Rodrigues1;~Yonina_C._Eldar2", "gender": "M;M;M;F", "homepage": ";;https://www.ee.ucl.ac.uk/iiml/;https://www.weizmann.ac.il/math/yonina/", "dblp": ";321/6591;21/6763;", "google_scholar": ";6AA9ikkAAAAJ;;https://scholar.google.co.il/citations?user=vyX6kpwAAAAJ", "orcid": " 0000-0001-6664-5600;;;", "linkedin": ";eyar-azar-a7b5ab160;;", "or_profile": "~Avner_Shultzman1;~Eyar_Azar1;~Miguel_R._D._Rodrigues1;~Yonina_C._Eldar2", "aff": ";Weizmann Institute of Science;University College London;", "aff_domain": ";weizmann.ac.il;ucl.ac.uk;", "position": ";MS student;Full Professor;", "bibtex": "@inproceedings{\nshultzman2023generalization,\ntitle={Generalization and Estimation Error Bounds for Model-based Neural Networks},\nauthor={Avner Shultzman and Eyar Azar and Miguel R. D. Rodrigues and Yonina C. Eldar},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9F_xlC7sk9}\n}", "github": "", "project": "", "reviewers": "CF5w;FqeZ;zELY;4ojw", "pdf_size": 876985, "recommendation": "6;6;8;8", "confidence": "3;2;3;4", "correctness": "4;3;4;4", "technical_novelty": "2;3;4;4", "empirical_novelty": "2;3;3;4", "wc_summary_paper": "83;67;591;100", "wc_strength_and_weaknesses": "203;81;241;644", "wc_clarity_quality_novelty_and_reproducibility": "14;27;41;4", "wc_summary_review": "36;33;41;63", "wc_review": "336;208;914;811", "wc_reply_reviewers": "0;0;18;34", "wc_reply_authors": "495;224;1090;804", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 210.25, 220.13561161247856 ], "wc_strength_and_weaknesses_avg": [ 292.25, 211.5104902835791 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 21.5, 13.901438774457844 ], "wc_summary_review_avg": [ 43.25, 11.755317945508747 ], "wc_review_avg": [ 567.25, 300.9097663752375 ], "wc_reply_reviewers_avg": [ 13.0, 14.177446878757825 ], "wc_reply_authors_avg": [ 653.25, 325.10565590281567 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.7071067811865475, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14030303429742432781&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=9F_xlC7sk9", "email": ";weizmann.ac.il;ucl.ac.uk;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Weizmann Institute of Science;University College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.weizmann.org.il;https://www.ucl.ac.uk", "aff_unique_abbr": "Weizmann;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Israel;United Kingdom" }, { "id": "9GOjmbRQ2o", "title": "Sensitivity-aware Visual Parameter-efficient Tuning", "track": "main", "status": "Reject", "tldr": "We propose a visual parameter-efficient tuning approach to identify and tune the parameters at task-specific important positions while being inference-efficient.", "abstract": "Visual Parameter-efficient Tuning (VPT) has become a powerful alternative for full fine-tuning, which only updates a small number of parameters while freezing the remaining vast majority of parameters to significantly reduce the storage costs for adapting the pre-trained vision models to downstream tasks. Although the storage burden is largely alleviated, VPT approaches still face many challenges, e.g., lower inference speed and lacking effective configurations for trainable parameters tailored for each task. In this paper, we present a simple yet effective approach termed Sensitivity-aware visual Parameter-efficient Tuning (SPT) to tackle these challenges. Given a desired tunable parameter budget, SPT quickly identifies the important parameters to the given task in a data-dependent way before fine-tuning, without the complex selection schedule. To increase the representational capacity at a negligible cost within the same parameter budget, we employ low-rank reparameterization to achieve a better trade-off between parameter efficiency and accuracy. Through extensive experiments on a wide range of downstream recognition tasks, our SPT achieves better overall transfer performance than the full fine-tuning and the other VPT approaches, with no additional computational or memory overhead during inference. For instance, SPT saves 99.35% of the trainable parameters than the full fine-tuning while achieving a 7.3% higher average top-1 accuracy on VTAB-1k benchmark with the supervised pre-trained ViT-B backbone. Notably, SPT is also the first work that bridges the gap between full fine-tuning and VPT approaches for backbones under self-supervised pre-training strategies MAE and MoCo v3 on the challenging VTAB-1k benchmark.", "keywords": "Visual Parameter-efficient Tuning;Fine-tuning;Visual Task Adaptation", "primary_area": "", "supplementary_material": "/attachment/2fb9c852794e49cb1361ea61af27a82ff461f607.zip", "author": "Haoyu He;Jianfei Cai;Jing Zhang;Dacheng Tao;Bohan Zhuang", "authorids": "~Haoyu_He2;~Jianfei_Cai1;~Jing_Zhang17;~Dacheng_Tao1;~Bohan_Zhuang1", "gender": "M;M;M;;M", "homepage": ";https://jianfei-cai.github.io/;;;https://bohanzhuang.github.io/", "dblp": "184/4312;83/6096;05/3499-37.html;;145/1096", "google_scholar": "aU1zMhUAAAAJ;https://scholar.google.com.tw/citations?user=N6czCoUAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com.au/citations?user=DFuDBBwAAAAJ", "orcid": ";;0000-0001-6595-7661;;", "linkedin": ";;;;bohan-zhuang/", "or_profile": "~Haoyu_He2;~Jianfei_Cai1;~Jing_Zhang17;~Dacheng_Tao1;~Bohan_Zhuang1", "aff": "Monash University;Monash University;The University of Sydney;;Monash University", "aff_domain": "monash.edu;monash.edu;sydney.edu.au;;monash.edu", "position": "PhD student;Full Professor;Research Fellow;;Assistant Professor", "bibtex": "@misc{\nhe2023sensitivityaware,\ntitle={Sensitivity-aware Visual Parameter-efficient Tuning},\nauthor={Haoyu He and Jianfei Cai and Jing Zhang and Dacheng Tao and Bohan Zhuang},\nyear={2023},\nurl={https://openreview.net/forum?id=9GOjmbRQ2o}\n}", "github": "", "project": "", "reviewers": "MqQs;y14U;Z771;Sx5t;hvdx", "site": "https://openreview.net/forum?id=9GOjmbRQ2o", "pdf_size": 1618978, "recommendation": "3;5;5;5;6", "confidence": "4;4;3;4;5", "correctness": "4;2;2;3;4", "technical_novelty": "2;3;2;3;3", "empirical_novelty": "2;3;2;2;3", "wc_summary_paper": "37;79;71;92;122", "wc_strength_and_weaknesses": "127;141;86;122;271", "wc_clarity_quality_novelty_and_reproducibility": "30;23;20;57;66", "wc_summary_review": "39;45;23;71;42", "wc_review": "233;288;200;342;501", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "1002;612;319;972;552", "reply_reviewers": "0;0;0;0;0", "reply_authors": "5;4;4;5;4", "recommendation_avg": [ 4.8, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "correctness_avg": [ 3.0, 0.8944271909999159 ], "technical_novelty_avg": [ 2.6, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 80.2, 27.708482455739073 ], "wc_strength_and_weaknesses_avg": [ 149.4, 63.449507484298096 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.2, 18.712562625145708 ], "wc_summary_review_avg": [ 44.0, 15.491933384829668 ], "wc_review_avg": [ 312.8, 105.81001842925839 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 691.4, 260.6250947242034 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 4.4, 0.48989794855663565 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.3227486121839514, "corr_recommendation_correctness": -0.22821773229381923, "gs_citation": 63, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16465826929847177759&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Monash University;University of Sydney", "aff_unique_dep": ";", "aff_unique_url": "https://www.monash.edu;https://www.sydney.edu.au", "aff_unique_abbr": "Monash;USYD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "id": "9HFobmKAmGv", "title": "A Class-Aware Representation Refinement Framework for Graph Classification", "track": "main", "status": "Withdraw", "tldr": "CARE computes simple yet powerful class representations and injects them to steer the learning of graph representations towards better class separability", "abstract": "Graph Neural Networks (GNNs) are widely used for graph representation learning. Despite its prevalence, GNN suffers from two drawbacks in the graph classification task, the neglect of graph-level relationships, and the generalization issue. Each graph is treated separately in GNN message passing/graph pooling, and existing methods to address overfitting operate on each individual graph. This makes the graph representations learnt less effective in the downstream classification. In this paper, we propose a Class-Aware Representation rEfinement (CARE) framework for the task of graph classification. CARE computes simple yet powerful class representations and injects them to steer the learning of graph representations towards better class separability. CARE is a plug-and-play framework that is highly flexible and able to incorporate arbitrary GNN backbones without significantly increasing the computational cost. We also theoretically prove that CARE has a better generalization upper bound than its GNN backbone through Vapnik-Chervonenkis (VC) dimension analysis. Our extensive experiments with 10 well-known GNN backbones on 9 benchmark datasets validate the superiority and effectiveness of CARE over its GNN counterparts.", "keywords": "Graph Neural Network;Representation Learning;Graph Classification", "primary_area": "", "supplementary_material": "/attachment/0247676f5a072d9eb59551a6b0ff3c3ca0a837a8.zip", "author": "Jiaxing Xu;Jinjie Ni;Sophi Shilpa Gururajapathy;Yiping Ke", "authorids": "~Jiaxing_Xu2;~Jinjie_Ni1;sophi.sg@ntu.edu.sg;~Yiping_Ke1", "gender": ";M;;F", "homepage": ";;;https://keyiping.wixsite.com/index", "dblp": ";257/4822;;07/3111", "google_scholar": ";TXfiHo8AAAAJ;;https://scholar.google.com.tw/citations?user=30Fp0YYAAAAJ", "orcid": ";;;0000-0001-9473-3202", "linkedin": ";;;", "or_profile": "~Jiaxing_Xu2;~Jinjie_Ni1;sophi.sg@ntu.edu.sg;~Yiping_Ke1", "aff": ";Nanyang Technological University;;Nanyang Technological University", "aff_domain": ";ntu.edu.sg;;ntu.edu.sg", "position": ";PhD student;;Associate Professor", "bibtex": "@misc{\nxu2023a,\ntitle={A Class-Aware Representation Refinement Framework for Graph Classification},\nauthor={Jiaxing Xu and Jinjie Ni and Sophi Shilpa Gururajapathy and Yiping Ke},\nyear={2023},\nurl={https://openreview.net/forum?id=9HFobmKAmGv}\n}", "github": "", "project": "", "reviewers": "J7S2;1G71;kwmy;PTM8", "site": "https://openreview.net/forum?id=9HFobmKAmGv", "pdf_size": 891854, "recommendation": "5;5;5;5", "confidence": "3;4;4;4", "correctness": "3;3;4;2", "technical_novelty": "2;3;3;4", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "49;66;78;86", "wc_strength_and_weaknesses": "419;425;185;317", "wc_clarity_quality_novelty_and_reproducibility": "24;44;21;76", "wc_summary_review": "16;52;32;89", "wc_review": "508;587;316;568", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1125;793;628;923", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;2", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 69.75, 13.935117509371782 ], "wc_strength_and_weaknesses_avg": [ 336.5, 97.43074463432987 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.25, 21.924586655168667 ], "wc_summary_review_avg": [ 47.25, 27.270634389394026 ], "wc_review_avg": [ 494.75, 107.24125838500777 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 867.25, 181.86310098532908 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=464495818544165969&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "simpleKT: A Simple But Tough-to-Beat Baseline for Knowledge Tracing", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11407", "id": "9HiGqC9C-KA", "poster": "/media/PosterPDFs/ICLR%202023/11407.png?t=1680403470.359604", "openreview": "https://openreview.net/forum?id=9HiGqC9C-KA", "slides": "https://iclr.cc/virtual/2023/poster/11407", "video": "https://iclr.cc/virtual/2023/poster/11407", "author_site": "Zitao Liu, Qiongqiong Liu, Jiahao Chen, Shuyan Huang, Weiqi Luo", "tldr": "We propose \\textsc{simpleKT}, a simple but tough-to-beat KT baseline that is simple to implement, computationally friendly and robust to a wide range of KT datasets across different domains", "abstract": "Knowledge tracing (KT) is the problem of predicting students' future performance based on their historical interactions with intelligent tutoring systems. Recently, many works present lots of special methods for applying deep neural networks to KT from different perspectives like model architecture, adversarial augmentation and etc., which make the overall algorithm and system become more and more complex. Furthermore, due to the lack of standardized evaluation protocol \\citep{liu2022pykt}, there is no widely agreed KT baselines and published experimental comparisons become inconsistent and self-contradictory, i.e., the reported AUC scores of DKT on ASSISTments2009 range from 0.721 to 0.821 \\citep{minn2018deep,yeung2018addressing}. Therefore, in this paper, we provide a strong but simple baseline method to deal with the KT task named \\textsc{simpleKT}. Inspired by the Rasch model in psychometrics, we explicitly model question-specific variations to capture the individual differences among questions covering the same set of knowledge components that are a generalization of terms of concepts or skills needed for learners to accomplish steps in a task or a problem. Furthermore, instead of using sophisticated representations to capture student forgetting behaviors, we use the ordinary dot-product attention function to extract the time-aware information embedded in the student learning interactions. Extensive experiments show that such a simple baseline is able to always rank top 3 in terms of AUC scores and achieve 57 wins, 3 ties and 16 loss against 12 DLKT baseline methods on 7 public datasets of different domains. We believe this work serves as a strong baseline for future KT research. Code is available at \\url{https://github.com/pykt-team/pykt-toolkit}\\footnote{We merged our model to the \\textsc{pyKT} benchmark at \\url{https://pykt.org/}.}.", "keywords": "knowledge tracing;assessment;ai for education", "primary_area": "", "supplementary_material": "/attachment/9bb9b369c086de4913a8bc1f3ce5aa9955710f44.zip", "author": "Zitao Liu;Qiongqiong Liu;Jiahao Chen;Shuyan Huang;Weiqi Luo", "authorids": "~Zitao_Liu1;~Qiongqiong_Liu1;~Jiahao_Chen6;~Shuyan_Huang1;~Weiqi_Luo1", "gender": "M;F;M;M;F", "homepage": "http://www.zitaoliu.com/;https://github.com/Liu-lqq;http://www.tabchen.com/;https://;https://sonyawong.github.io/", "dblp": "210/0898;;;;", "google_scholar": "rRTzNm0AAAAJ;;;;j_wq3bwAAAAJ", "orcid": "0000-0003-0491-307X;;;;0000-0003-0217-7494", "linkedin": ";;;;https://www.linkedin.cn/injobs/in/shyann-wong-7778a5223", "or_profile": "~Zitao_Liu1;~Qiongqiong_Liu1;~Jiahao_Chen6;~Weiqi_Luo1;~Shyann_Wong2", "aff": "Jinan University;;;Jinan University;Tomorrow Advancing Life, TAL", "aff_domain": "jnu.edu.cn;;;jnu.edu.cn;tal.com", "position": "Full Professor;;;Full Professor;Intern", "bibtex": "@inproceedings{\nliu2023simplekt,\ntitle={simple{KT}: A Simple But Tough-to-Beat Baseline for Knowledge Tracing},\nauthor={Zitao Liu and Qiongqiong Liu and Jiahao Chen and Shuyan Huang and Weiqi Luo},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9HiGqC9C-KA}\n}", "github": "", "project": "", "reviewers": "vuA2;EgtS;nkiF", "pdf_size": 4451839, "recommendation": "3;6;8", "confidence": "4;2;3", "correctness": "3;4;3", "technical_novelty": "2;3;2", "empirical_novelty": "2;3;3", "wc_summary_paper": "29;75;38", "wc_strength_and_weaknesses": "57;42;97", "wc_clarity_quality_novelty_and_reproducibility": "54;18;292", "wc_summary_review": "22;34;65", "wc_review": "162;169;492", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 47.333333333333336, 19.90533150244482 ], "wc_strength_and_weaknesses_avg": [ 65.33333333333333, 23.21398046197353 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 121.33333333333333, 121.57119541879794 ], "wc_summary_review_avg": [ 40.333333333333336, 18.116904322268255 ], "wc_review_avg": [ 274.3333333333333, 153.9401037922073 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5960395606792698, "corr_recommendation_correctness": 0.1147078669352809, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3447640824155809708&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=9HiGqC9C-KA", "email": "jnu.edu.cn;;;jnu.edu.cn;tal.com", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "Jinan University;Tomorrow Advancing Life", "aff_unique_dep": ";", "aff_unique_url": "https://www.jnu.edu.cn;", "aff_unique_abbr": "JNU;TAL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China;" }, { "id": "9IUxnGC8e9u", "title": "Exploring The Capacity Mismatch Problem in Knowledge Distillation from the View of Soft Labels", "track": "main", "status": "Withdraw", "tldr": "The main contributions of our work are the discovery, analysis, and validation of the effect of the smoothed soft label and a less time-consuming and adaptive transfer of the teacher's knowledge method.", "abstract": "Knowledge distillation (KD) has been extensively employed to transfer the knowledge using the soft label from a large teacher model to the smaller students, where the parameters of the teacher are fixed (or partially) during training. Recent studies show that this mode may cause difficulties in knowledge transfer due to the mismatched model capacities. To alleviate the mismatch problem, adjustment of temperature parameters, label smoothing and teacher-student joint training methods (online distillation) to smooth the soft label of a teacher network, have been proposed. But those methods rarely explain the effect of smoothed soft labels to enhance the KD performance. The main contributions of our work are the discovery, analysis, and validation of the effect of the smoothed soft label and a less time-consuming and adaptive transfer of the teacher's knowledge method, namely PESF-KD by adaptive tuning soft labels of the teacher network. Technically, we first mathematically formulate the mismatch as the sharpness gap between teachers' and students' predictive distributions, where we show such a gap can be narrowed with the appropriate smoothness of the soft label. Then, we introduce an adapter module for the teacher and only update the adapter to obtain soft labels with appropriate smoothness. Experiments on various benchmarks show that PESF-KD can significantly reduce the training cost while obtaining competitive results compared to advanced online distillation methods. ", "keywords": "knowledge distillation;parameter-efficiency;transfer learning", "primary_area": "", "supplementary_material": "/attachment/a9f9ad8550d9d94ac6270755ef8d00393c132620.zip", "author": "Jun Rao;Xv Meng;Liang Ding;Shuhan Qi;Xuebo Liu;Dacheng Tao", "authorids": "~Jun_Rao1;~Xv_Meng1;~Liang_Ding3;~Shuhan_Qi1;~Xuebo_Liu1;~Dacheng_Tao1", "gender": "M;M;M;M;M;", "homepage": "https://github.com/raojay7/;https://www.researchgate.net/profile/Xv-Meng;http://liamding.cc/;https://faculty.hitsz.edu.cn/qishuhan;https://sunbowliu.github.io/;", "dblp": "r/JunRao;;88/3340-6.html;38/10422;166/0029-2;", "google_scholar": "https://scholar.google.com.hk/citations?user=gbhq6EoAAAAJ;;lFCLvOAAAAAJ;;XkDl9aoAAAAJ;", "orcid": ";;;0000-0002-6903-145X;;", "linkedin": ";;;;xuebo-liu-47877b195/;", "or_profile": "~Jun_Rao1;~Xv_Meng1;~Liang_Ding3;~Shuhan_Qi1;~Xuebo_Liu1;~Dacheng_Tao1", "aff": "Harbin Institute of Technology;;JD Explore Academy, JD.com Inc.;Harbin Insitute of Technology, Shenzhen;Harbin Institute of Technolgy, Shenzhen;", "aff_domain": "hit.edu.cn;;jd.com;cs.hitsz.edu.cn;hit.edu.cn;", "position": "PhD student;;Research Scientist;Associate Professor;Assistant Professor;", "bibtex": "@misc{\nrao2023exploring,\ntitle={Exploring The Capacity Mismatch Problem in Knowledge Distillation from the View of Soft Labels},\nauthor={Jun Rao and Xv Meng and Liang Ding and Shuhan Qi and Xuebo Liu and Dacheng Tao},\nyear={2023},\nurl={https://openreview.net/forum?id=9IUxnGC8e9u}\n}", "github": "", "project": "", "reviewers": "6bch;dRUb;s5wY;7EDN;TfTn", "site": "https://openreview.net/forum?id=9IUxnGC8e9u", "pdf_size": 1232807, "recommendation": "3;5;5;5;5", "confidence": "4;4;3;3;4", "correctness": "3;3;3;2;4", "technical_novelty": "2;2;3;3;2", "empirical_novelty": "3;0;3;2;2", "wc_summary_paper": "53;117;105;60;112", "wc_strength_and_weaknesses": "419;544;343;585;198", "wc_clarity_quality_novelty_and_reproducibility": "24;34;175;15;41", "wc_summary_review": "19;20;154;39;62", "wc_review": "515;715;777;699;413", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 4.6, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.0, 1.0954451150103321 ], "wc_summary_paper_avg": [ 89.4, 27.222049886075812 ], "wc_strength_and_weaknesses_avg": [ 417.8, 139.85049159727683 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.8, 59.260104623599844 ], "wc_summary_review_avg": [ 58.8, 50.109480140987294 ], "wc_review_avg": [ 623.8, 136.90639137746638 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.408248290463863, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lKasfMLkLwsJ:scholar.google.com/&scioq=Exploring+The+Capacity+Mismatch+Problem+in+Knowledge+Distillation+from+the+View+of+Soft+Labels&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Harbin Institute of Technology;JD.com Inc.", "aff_unique_dep": ";JD Explore Academy", "aff_unique_url": "http://www.hit.edu.cn/;https://www.jd.com", "aff_unique_abbr": "HIT;JD.com", "aff_campus_unique_index": "0;2;2", "aff_campus_unique": "Harbin;;Shenzhen", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Deconstructing Distributions: A Pointwise Framework of Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11397", "id": "9IaN4FkVSR1", "poster": "", "openreview": "https://openreview.net/forum?id=9IaN4FkVSR1", "slides": "https://iclr.cc/virtual/2023/poster/11397", "video": "https://iclr.cc/virtual/2023/poster/11397", "author_site": "Gal Kaplun, Nikhil Ghosh, Saurabh Garg, Boaz Barak, Preetum Nakkiran", "tldr": "We propose a new lens for studying the pointwise performance of learning algorithms which reveals new insights into their behavior and goes beyond traditional notions of in-distribution and \"out-of-distribution\" learning. ", "abstract": "In machine learning, we traditionally evaluate the performance of a single model, averaged over a collection of test inputs. In this work, we propose a new approach: we measure the performance of a collection of models when evaluated at *single input point*. Specifically, we study a point's *profile*: the relationship between models' average performance on the test distribution and their pointwise performance on this individual point. We find that profiles can yield new insights into the structure of both models and data---in and out-of-distribution. For example, we empirically show that real data distributions consist of points with qualitatively different profiles. On one hand, there are ``compatible'' points with strong correlation between the pointwise and average performance. On the other hand, there are points with weak and even *negative* correlation: cases where improving overall model accuracy actually *hurts* performance on these inputs. As an application, we use profiles to construct a dataset we call CIFAR-10-NEG: a subset of CINIC-10 such that for standard models, accuracy on CIFAR-10-NEG is *negatively correlated* with CIFAR-10 accuracy. Illustrating for the first time an OOD dataset that completely inverts ``accuracy-on-the-line'' (Miller et al., 2021).", "keywords": "understanding deep learning;empirical investigation;distribution shift", "primary_area": "", "supplementary_material": "", "author": "Gal Kaplun;Nikhil Ghosh;Saurabh Garg;Boaz Barak;Preetum Nakkiran", "authorids": "~Gal_Kaplun1;~Nikhil_Ghosh1;~Saurabh_Garg3;~Boaz_Barak2;~Preetum_Nakkiran1", "gender": "M;M;M;M;", "homepage": "http://www.galkaplun.com;;http://saurabhgarg1996.github.io/;https://boazbarak.org;http://preetum.nakkiran.org", "dblp": "237/9816;251/8779;80/208;b/BBarak;151/6343", "google_scholar": "y4BzFYsAAAAJ;0Fv4bikAAAAJ;SAnJ1hIAAAAJ;I0fbJ6cAAAAJ;zithBbUAAAAJ", "orcid": ";;;0000-0002-4053-8927;", "linkedin": "gal-kaplun-865496151/;nikhil-ghosh-03389199/;saurabh-garg-b680b5b8/;;", "or_profile": "~Gal_Kaplun1;~Nikhil_Ghosh1;~Saurabh_Garg3;~Boaz_Barak2;~Preetum_Nakkiran1", "aff": "Harvard University;University of California, Berkeley;Carnegie Mellon University;Harvard University;Apple", "aff_domain": "harvard.edu;berkeley.edu;cmu.edu;fas.harvard.edu;apple.com", "position": "PhD student;PhD student;PhD student;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nkaplun2023deconstructing,\ntitle={Deconstructing Distributions: A Pointwise Framework of Learning},\nauthor={Gal Kaplun and Nikhil Ghosh and Saurabh Garg and Boaz Barak and Preetum Nakkiran},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9IaN4FkVSR1}\n}", "github": "", "project": "", "reviewers": "diDC;5UuZ;NDc7;JEZe", "pdf_size": 14120008, "recommendation": "6;6;8;8", "confidence": "3;3;4;4", "correctness": "3;4;3;4", "technical_novelty": "3;4;3;3", "empirical_novelty": "3;4;4;3", "wc_summary_paper": "42;95;100;93", "wc_strength_and_weaknesses": "380;165;638;125", "wc_clarity_quality_novelty_and_reproducibility": "15;15;33;13", "wc_summary_review": "16;44;20;23", "wc_review": "453;319;791;254", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "469;405;527;308", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 82.5, 23.521266972678152 ], "wc_strength_and_weaknesses_avg": [ 327.0, 204.06984098587424 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 19.0, 8.12403840463596 ], "wc_summary_review_avg": [ 25.75, 10.825317547305483 ], "wc_review_avg": [ 454.25, 207.24065117635584 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 427.25, 81.25384606281723 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5333930227879130753&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=9IaN4FkVSR1", "email": "harvard.edu;berkeley.edu;cmu.edu;fas.harvard.edu;apple.com", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Harvard University;University of California, Berkeley;Carnegie Mellon University;Apple", "aff_unique_dep": ";;;Apple Inc.", "aff_unique_url": "https://www.harvard.edu;https://www.berkeley.edu;https://www.cmu.edu;https://www.apple.com", "aff_unique_abbr": "Harvard;UC Berkeley;CMU;Apple", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "9IlzJa5cAv", "title": "DT+GNN: A Fully Explainable Graph Neural Network using Decision Trees", "track": "main", "status": "Reject", "tldr": "A new GNN architecture that allows for full explanation not only of the important imputs but also the full decision making process how the inputs are used.", "abstract": "We propose a new Decision Tree Graph Neural Network (DT+GNN) architecture for Graph Neural Network (GNN) explanation. Existing post-hoc explanation methods highlight important inputs but fail to reveal how a GNN uses these inputs. In contrast DT+GNN is fully explainable: Humans can inspect and understand the decision making of DT+GNN at every step. DT+GNN internally uses a novel GNN layer that is restricted to categorical state spaces for nodes and messages. After training with gradient descent we can easily distill these layers into decision trees. These trees are further pruned using our newly proposed method to ensure they are small and easy to interpret. DT+GNN can also compute node-level importance scores like the existing explanation methods. We demonstrate on real-world GNN benchmarks that DT+GNN has competitive classification accuracy and computes competitive explanations. Furthermore, we leverage DT+GNN's full explainability to inspect the decision processes in synthetic and real-world datasets with surprising results. We make this inspection accessible through an interactive web tool.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/2993a949e1719a99f0fb1b1a18ce769f65a492f7.zip", "author": "Peter M\u00fcller;Lukas Faber;Karolis Martinkus;Roger Wattenhofer", "authorids": "~Peter_M\u00fcller2;~Lukas_Faber1;~Karolis_Martinkus1;~Roger_Wattenhofer1", "gender": ";;M;Not Specified", "homepage": ";;https://disco.ethz.ch/members/mkarolis;https://disco.ethz.ch/members/wroger", "dblp": ";;276/5531;w/RogerWattenhofer", "google_scholar": ";;https://scholar.google.ch/citations?user=Sr6ho54AAAAJ;https://scholar.google.ch/citations?user=EG3VPm4AAAAJ", "orcid": ";;0000-0002-5344-4321;", "linkedin": "muepe/;;;roger-wattenhofer-4466731/", "or_profile": "~Peter_M\u00fcller2;~Lukas_Faber1;~Karolis_Martinkus1;~Roger_Wattenhofer1", "aff": ";;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": ";;ethz.ch;ethz.ch", "position": ";;PhD student;Full Professor", "bibtex": "@misc{\nm{\\\"u}ller2023dtgnn,\ntitle={{DT}+{GNN}: A Fully Explainable Graph Neural Network using Decision Trees},\nauthor={Peter M{\\\"u}ller and Lukas Faber and Karolis Martinkus and Roger Wattenhofer},\nyear={2023},\nurl={https://openreview.net/forum?id=9IlzJa5cAv}\n}", "github": "", "project": "", "reviewers": "9FEb;gfpK;Sxw4;6BLn", "site": "https://openreview.net/forum?id=9IlzJa5cAv", "pdf_size": 5170803, "recommendation": "6;6;6;6", "confidence": "3;3;3;4", "correctness": "4;2;4;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "62;88;43;110", "wc_strength_and_weaknesses": "65;170;71;231", "wc_clarity_quality_novelty_and_reproducibility": "58;74;31;12", "wc_summary_review": "33;31;55;28", "wc_review": "218;363;200;381", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "33;602;408;913", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 75.75, 25.420218331084413 ], "wc_strength_and_weaknesses_avg": [ 134.25, 69.70428609490237 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.75, 23.920441049445557 ], "wc_summary_review_avg": [ 36.75, 10.685855136581255 ], "wc_review_avg": [ 290.5, 81.99542670173746 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 489.0, 318.9992162999778 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16473954616370332184&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Swiss Federal Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Finding the Global Semantic Representation in GAN through Fr\u00e9chet Mean", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11625", "id": "9ImtNIZ7bYx", "poster": "/media/PosterPDFs/ICLR%202023/11625.png?t=1681999442.646878", "openreview": "https://openreview.net/forum?id=9ImtNIZ7bYx", "slides": "https://iclr.cc/virtual/2023/poster/11625", "video": "https://iclr.cc/virtual/2023/poster/11625", "author_site": "Jaewoong Choi, Geonho Hwang, Hyunsoo Cho, Myungjoo Kang", "tldr": "We propose the global basis for semantics in the latent space of GAN through Fr\u00e9chet Mean.", "abstract": "The ideally disentangled latent space in GAN involves the global representation of latent space using semantic attribute coordinates. In other words, in this disentangled space, there exists the global semantic basis as a vector space where each basis component describes one attribute of generated images. In this paper, we propose an unsupervised method for finding this global semantic basis in the intermediate latent space in GANs. This semantic basis represents sample-independent meaningful perturbations that change the same semantic attribute of an image on the entire latent space. The proposed global basis, called Fr\u00e9chet basis, is derived by introducing Fr\u00e9chet mean to the local semantic perturbations in a latent space. Fr\u00e9chet basis is discovered in two stages. First, the global semantic subspace is discovered by the Fr\u00e9chet mean in the Grassmannian manifold of the local semantic subspaces. Second, Fr\u00e9chet basis is found by optimizing a basis of the semantic subspace via the Fr\u00e9chet mean in the Special Orthogonal Group. Experimental results demonstrate that Fr\u00e9chet basis provides better semantic factorization and robustness compared to the previous methods. Moreover, we suggest the basis refinement scheme for the previous methods. The quantitative experiments show that the refined basis achieves better semantic factorization while constrained on the same semantic subspace given by the previous method.", "keywords": "generative adversarial network;disentanglement;semantic factorization", "primary_area": "", "supplementary_material": "/attachment/6ca3426793c408938d6661f52059ab98385e77c8.zip", "author": "Jaewoong Choi;Geonho Hwang;Hyunsoo Cho;Myungjoo Kang", "authorids": "~Jaewoong_Choi1;~Geonho_Hwang1;~Hyunsoo_Cho2;~Myungjoo_Kang1", "gender": "M;;M;", "homepage": ";;;http://ncia.snu.ac.kr/", "dblp": "63/11483;;;64/5657.html", "google_scholar": "e4ZLjREAAAAJ;UJ_Mw6YAAAAJ;;", "orcid": ";;0000-0003-1172-2458;", "linkedin": ";;hyunsoo-cho-82569478;", "or_profile": "~Jaewoong_Choi1;~Geonho_Hwang1;~Hyunsoo_Cho2;~Myungjoo_Kang1", "aff": "Korea Institute for Advanced Study;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "kias.re.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "Postdoc;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nchoi2023finding,\ntitle={Finding the Global Semantic Representation in {GAN} through Fr\\'echet Mean},\nauthor={Jaewoong Choi and Geonho Hwang and Hyunsoo Cho and Myungjoo Kang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9ImtNIZ7bYx}\n}", "github": "", "project": "", "reviewers": "hbhj;McwQ;G2MK;toHN", "pdf_size": 45110272, "recommendation": "6;6;8;8", "confidence": "4;4;2;4", "correctness": "4;3;4;3", "technical_novelty": "3;3;3;2", "empirical_novelty": "2;3;2;0", "wc_summary_paper": "77;49;90;73", "wc_strength_and_weaknesses": "229;83;122;158", "wc_clarity_quality_novelty_and_reproducibility": "71;45;37;50", "wc_summary_review": "62;43;22;33", "wc_review": "439;220;271;314", "wc_reply_reviewers": "0;164;0;18", "wc_reply_authors": "850;915;280;777", "reply_reviewers": "0;1;0;1", "reply_authors": "2;2;1;3", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 72.25, 14.821858857781638 ], "wc_strength_and_weaknesses_avg": [ 148.0, 53.76337043006139 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.75, 12.577261228105266 ], "wc_summary_review_avg": [ 40.0, 14.713938969562161 ], "wc_review_avg": [ 311.0, 81.04628307331558 ], "wc_reply_reviewers_avg": [ 45.5, 68.80951969022891 ], "wc_reply_authors_avg": [ 705.5, 250.4660655657768 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1721828828326147419&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=9ImtNIZ7bYx", "email": "kias.re.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Korea Institute for Advanced Study;Seoul National University", "aff_unique_dep": ";", "aff_unique_url": "http://www.kaist.edu;https://www.snu.ac.kr", "aff_unique_abbr": "KIAS;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Link Prediction with Non-Contrastive Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11383", "id": "9Jaz4APHtWD", "poster": "/media/PosterPDFs/ICLR%202023/11383.png?t=1682151695.0723298", "openreview": "https://openreview.net/forum?id=9Jaz4APHtWD", "slides": "https://iclr.cc/virtual/2023/poster/11383", "video": "https://iclr.cc/virtual/2023/poster/11383", "author_site": "William Shiao, Zhichun Guo, Tong Zhao, Evangelos Papalexakis, Yozen Liu, Neil Shah", "tldr": "We evaluate the performance of non-contrastive methods on link prediction and propose a new method to improve its performance in the inductive setting.", "abstract": "Graph neural networks (GNNs) are prominent in the graph machine learning domain, owing to their strong performance across various tasks. A recent focal area is the space of graph self-supervised learning (SSL), which aims to derive useful node representations without labeled data. Notably, many state-of-the-art graph SSL methods are contrastive methods, which use a combination of positive and negative samples to learn node representations. Owing to challenges in negative sampling (slowness and model sensitivity), recent literature introduced non-contrastive methods, which instead only use positive samples. Though such methods have shown promising performance in node-level tasks, their suitability for link prediction tasks, which are concerned with predicting link existence between pairs of nodes (and have broad applicability to recommendation systems contexts) is yet unexplored. In this work, we extensively evaluate the performance of existing non-contrastive methods for link prediction in both transductive and inductive settings. While most existing non-contrastive methods perform poorly overall, we find that, surprisingly, BGRL generally performs well in transductive settings. However, it performs poorly in the more realistic inductive settings where the model has to generalize to links to/from unseen nodes. We find that non-contrastive models tend to overfit to the training graph and use this analysis to propose T-BGRL, a novel non-contrastive framework that incorporates cheap corruptions to improve the generalization ability of the model. This simple modification strongly improves inductive performance in 5/6 of our datasets, with up to a 120% improvement in Hits@50 - all with comparable speed to other non-contrastive baselines, and up to $14\\times$ faster than the best-performing contrastive baseline. Our work imparts interesting findings about non-contrastive learning for link prediction and paves the way for future researchers to further expand upon this area.", "keywords": "graph learning;graph neural networks;non-contrastive learning;link prediction", "primary_area": "", "supplementary_material": "", "author": "William Shiao;Zhichun Guo;Tong Zhao;Evangelos E. Papalexakis;Yozen Liu;Neil Shah", "authorids": "~William_Shiao1;~Zhichun_Guo1;~Tong_Zhao3;~Evangelos_E._Papalexakis1;~Yozen_Liu1;~Neil_Shah2", "gender": "M;;M;M;;M", "homepage": "https://shiao.me;;https://tzhao.io/;http://www.cs.ucr.edu/~epapalex;https://www.linkedin.com/in/yozen-liu-531a67130/;http://nshah.net", "dblp": "304/3898;;94/6503-3;48/9024;242/8056.html;71/7771", "google_scholar": "TIq-P5AAAAAJ;;05cRc-MAAAAJ;https://scholar.google.com.tw/citations?user=2P1kinAAAAAJ;i3U2JjEAAAAJ;Qut69OgAAAAJ", "orcid": "0000-0001-5813-2266;;0000-0001-7660-1732;0000-0002-3411-8483;;0000-0003-3261-8430", "linkedin": "will-shiao;;;;;", "or_profile": "~William_Shiao1;~Zhichun_Guo1;~Tong_Zhao3;~Evangelos_E._Papalexakis1;~Yozen_Liu1;~Neil_Shah2", "aff": "University of California, Riverside;;Snap Inc.;University of California, Riverside;Snap Inc.;Snap Inc.", "aff_domain": "ucr.edu;;snap.com;ucr.edu;snapchat.com;snap.com", "position": "PhD student;;Researcher;Associate Professor;Researcher;Research Scientist", "bibtex": "@inproceedings{\nshiao2023link,\ntitle={Link Prediction with Non-Contrastive Learning},\nauthor={William Shiao and Zhichun Guo and Tong Zhao and Evangelos E. Papalexakis and Yozen Liu and Neil Shah},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9Jaz4APHtWD}\n}", "github": "", "project": "", "reviewers": "N6ym;Qz7f;myZB;tANx", "pdf_size": 674096, "recommendation": "5;5;6;6", "confidence": "3;3;4;4", "correctness": "2;3;4;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "0;2;2;3", "wc_summary_paper": "74;36;57;53", "wc_strength_and_weaknesses": "308;92;38;103", "wc_clarity_quality_novelty_and_reproducibility": "48;25;249;51", "wc_summary_review": "19;18;25;33", "wc_review": "449;171;369;240", "wc_reply_reviewers": "0;0;0;13", "wc_reply_authors": "1416;785;1279;581", "reply_reviewers": "0;0;0;1", "reply_authors": "5;4;2;2", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 55.0, 13.509256086106296 ], "wc_strength_and_weaknesses_avg": [ 135.25, 102.72627463312392 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 93.25, 90.4830785285293 ], "wc_summary_review_avg": [ 23.75, 5.973901572674261 ], "wc_review_avg": [ 307.25, 108.38905618188582 ], "wc_reply_reviewers_avg": [ 3.25, 5.629165124598851 ], "wc_reply_authors_avg": [ 1015.25, 343.42129738849917 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.7071067811865475, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6856746238920668095&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=9Jaz4APHtWD", "email": "ucr.edu;;snap.com;ucr.edu;snapchat.com;snap.com", "author_num": 6, "aff_unique_index": "0;1;0;1;1", "aff_unique_norm": "University of California, Riverside;Snap Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucr.edu;https://www.snapinc.com", "aff_unique_abbr": "UCR;Snap", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Riverside;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "9JjGZsDvHb", "title": "Metro: Memory-Enhanced Transformer for Retrosynthetic Planning via Reaction Tree", "track": "main", "status": "Reject", "tldr": "We use reaction database to search the retrosynthetic routes and introduce a memory network to learn the context information of the route.", "abstract": "Retrosynthetic planning plays a critical role in drug discovery and organic chemistry. Starting from a target molecule as the root node, it aims to find a complete reaction tree subject to the constraint that all leaf nodes belong to a set of starting materials. The multi-step reactions are crucial because they determine the flow chart in the production of the Organic Chemical Industry. However, existing datasets lack curation of tree-structured multi-step reactions and fail to provide such reaction trees, limiting models' understanding of organic molecule transformations. In this work, we first develop a benchmark curated for the retrosynthetic planning task, which consists of 124,869 reaction trees retrieved from the public USPTO-full dataset. On top of that, we propose Metro: Memory-Enhanced Transformer for RetrOsynthetic planning. Specifically, the dependency among molecules in the reaction tree is captured as context information for multi-step retrosynthesis predictions through transformers with a memory module. Extensive experiments show that Metro dramatically outperforms existing single-step retrosynthesis models by at least 10.7% in top-1 accuracy. The experiments demonstrate the superiority of exploiting context information in the retrosynthetic planning task. Moreover, the proposed model can be directly used for synthetic accessibility analysis, as it is trained on reaction trees with the shortest depths. Our work is the first step towards a brand new formulation for retrosynthetic planning in the aspects of data construction, model design, and evaluation.", "keywords": "Retrosynthetic Planning;Transformer;Memory Network;Reaction Database;Reaction tree", "primary_area": "", "supplementary_material": "", "author": "Songtao Liu;Zhitao Ying;Zuobai Zhang;Peilin Zhao;Jian Tang;Lu Lin;Dinghao Wu", "authorids": "~Songtao_Liu2;~Zhitao_Ying1;~Zuobai_Zhang1;~Peilin_Zhao2;~Jian_Tang1;~Lu_Lin2;~Dinghao_Wu1", "gender": "M;M;M;;;F;", "homepage": "https://songtaoliu0823.github.io/;https://www.cs.yale.edu/homes/ying-rex;https://oxer11.github.io/;;http://www.jian-tang.com;https://louise-lulin.github.io;", "dblp": ";209/4936;256/9098.html;84/8411;181/2667-5;86/2209-1;", "google_scholar": "https://scholar.google.com.tw/citations?hl=zh-CN;6fqNXooAAAAJ;UCDbNccAAAAJ;https://scholar.google.com.hk/citations?user=HPeX_YcAAAAJ;https://scholar.google.ca/citations?user=1ir6WUEAAAAJ;8N04pBgAAAAJ;", "orcid": ";;;0000-0001-8543-3953;;0000-0002-2539-3352;", "linkedin": ";rex-ying-92770148/;;;;lulin92/;", "or_profile": "~Songtao_Liu2;~Zhitao_Ying1;~Zuobai_Zhang1;~Peilin_Zhao2;~Jian_Tang1;~Lu_Lin2;~Dinghao_Wu1", "aff": "Shanghai Jiaotong University;Yale University;Mila - Universit\u00e9 de Montr\u00e9al;Tencent;Mila, HEC Montreal;Pennsylvania State University;", "aff_domain": "sjtu.edu.cn;yale.edu;umontreal.ca;tencent.com;hec.ca;psu.edu;", "position": "Intern;Assistant Professor;PhD student;Researcher;Assistant Professor;Assistant Professor;", "bibtex": "@misc{\nliu2023metro,\ntitle={Metro: Memory-Enhanced Transformer for Retrosynthetic Planning via Reaction Tree},\nauthor={Songtao Liu and Zhitao Ying and Zuobai Zhang and Peilin Zhao and Jian Tang and Lu Lin and Dinghao Wu},\nyear={2023},\nurl={https://openreview.net/forum?id=9JjGZsDvHb}\n}", "github": "", "project": "", "reviewers": "tdV3;f3iV;r9kB;1qQ1", "site": "https://openreview.net/forum?id=9JjGZsDvHb", "pdf_size": 656673, "recommendation": "3;3;3;5", "confidence": "5;3;5;4", "correctness": "3;1;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;1;3", "wc_summary_paper": "60;157;31;14", "wc_strength_and_weaknesses": "358;298;71;342", "wc_clarity_quality_novelty_and_reproducibility": "36;126;206;40", "wc_summary_review": "43;68;29;41", "wc_review": "497;649;337;437", "wc_reply_reviewers": "141;0;0;0", "wc_reply_authors": "1761;827;1115;771", "reply_reviewers": "1;0;0;0", "reply_authors": "4;2;3;2", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 65.5, 55.328564051491526 ], "wc_strength_and_weaknesses_avg": [ 267.25, 115.41528278352048 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 102.0, 69.98571282769076 ], "wc_summary_review_avg": [ 45.25, 14.184057952504283 ], "wc_review_avg": [ 480.0, 113.07961796893373 ], "wc_reply_reviewers_avg": [ 35.25, 61.054790966802926 ], "wc_reply_authors_avg": [ 1118.5, 393.2387951359835 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.17407765595569782, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13788983991118944741&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "Shanghai Jiao Tong University;Yale University;Universit\u00e9 de Montr\u00e9al;Tencent;HEC Montreal;Pennsylvania State University", "aff_unique_dep": ";;Mila;Tencent Holdings Limited;HEC Business School;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.yale.edu;https://www.umontreal.ca;https://www.tencent.com;https://www.hec.ca;https://www.psu.edu", "aff_unique_abbr": "SJTU;Yale;UdeM;Tencent;HEC;PSU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Montr\u00e9al;Montreal", "aff_country_unique_index": "0;1;2;0;2;1", "aff_country_unique": "China;United States;Canada" }, { "id": "9KmnrUpU2DG", "title": "Lost Domain Generalization Is a Natural Consequence of Lack of Training Domains", "track": "main", "status": "Reject", "tldr": "We show a hardness result for the number of training domains required to achieve a small population error on the test domain.", "abstract": "We show a hardness result for the number of training domains required to achieve a small population error in the test domain. Although many domain generalization algorithms have been developed under various domain-invariance assumptions, there is significant evidence to indicate that out-of-distribution (o.o.d.) test accuracy of state-of-the-art o.o.d. algorithms is on par with empirical risk minimization and random guess on the domain generalization benchmarks such as DomainBed. In this work, we analyze its cause and attribute the lost domain generalization to the lack of training domains. We show that, in a minimax lower bound fashion, \\emph{any} learning algorithm that outputs a classifier with an $\\epsilon$ excess error to the Bayes optimal classifier requires at least $\\mathrm{poly}(1/\\epsilon)$ number of training domains, even though the number of training data sampled from each training domain is large. Experiments on the DomainBed benchmark demonstrate that o.o.d. test accuracy is monotonically increasing as the number of training domains increases. Our result sheds light on the intrinsic hardness of domain generalization and suggests benchmarking o.o.d. algorithms by the datasets with a sufficient number of training domains.", "keywords": "Domain Generalization;Domain Complexity", "primary_area": "", "supplementary_material": "", "author": "Yimu Wang;Hongyang Zhang", "authorids": "~Yimu_Wang1;~Hongyang_Zhang1", "gender": "M;M", "homepage": "https://yimuwangcs.github.io;https://hongyanz.github.io/", "dblp": "140/7766;23/10537-1", "google_scholar": "TV2vnN8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": "yimu-wang-854743151/;", "or_profile": "~Yimu_Wang1;~Hongyang_Zhang1", "aff": "University of Waterloo;School of Computer Science, University of Waterloo", "aff_domain": "uwaterloo.ca;uwaterloo.ca", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nwang2023lost,\ntitle={Lost Domain Generalization Is a Natural Consequence of Lack of Training Domains},\nauthor={Yimu Wang and Hongyang Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=9KmnrUpU2DG}\n}", "github": "", "project": "", "reviewers": "8Svr;2RNe;qD5b", "site": "https://openreview.net/forum?id=9KmnrUpU2DG", "pdf_size": 1333915, "recommendation": "3;3;6", "confidence": "4;3;4", "correctness": "2;3;4", "technical_novelty": "2;2;4", "empirical_novelty": "2;2;4", "wc_summary_paper": "50;41;88", "wc_strength_and_weaknesses": "1404;456;322", "wc_clarity_quality_novelty_and_reproducibility": "92;18;67", "wc_summary_review": "164;4;33", "wc_review": "1710;519;510", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_paper_avg": [ 59.666666666666664, 20.368821489936252 ], "wc_strength_and_weaknesses_avg": [ 727.3333333333334, 481.59272788898664 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.0, 30.735430152621365 ], "wc_summary_review_avg": [ 67.0, 69.60363975157239 ], "wc_review_avg": [ 913.0, 563.5760818203696 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.8660254037844387, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7003579963241617127&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Waterloo", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "9L1Ts8t66YK", "title": "Towards Equivariant Graph Contrastive Learning via Cross-Graph Augmentation", "track": "main", "status": "Reject", "tldr": "We propose a cross-graph augmentation to achieve equivariant self-supervised learning on graphs. ", "abstract": "Leading graph contrastive learning (GCL) frameworks conform to the invariance mechanism by encouraging insensitivity to different augmented views of the same graph. Despite the promising performance, invariance worsens representation when augmentations cause aggressive semantics shifts. For example, dropping the super-node can dramatically change a social network's topology. In this case, encouraging invariance to the original graph can bring together dissimilar patterns and hurt the task of instance discrimination. To resolve the problem, we get inspiration from equivariant self-supervised learning and propose Equivariant Graph Contrastive Learning (E-GCL) to encourage the sensitivity to global semantic shifts. Viewing each graph as a transformation to others, we ground the equivariance principle as a cross-graph augmentation -- graph interpolation -- to simulate global semantic shifts. Without using annotation, we supervise the representation of cross-graph augmented views by linearly combining the representations of their original samples. This simple but effective equivariance principle empowers E-GCL with the ability of cross-graph discrimination. It shows significant improvements over the state-of-the-art GCL models in unsupervised learning and transfer learning. Further experiments demonstrate E-GCL's generalization to various graph pre-training frameworks. Code is available at \\url{https://anonymous.4open.science/r/E-GCL/}", "keywords": "equivariant;self-supervised learning;contrastive learning;graph neural networks", "primary_area": "", "supplementary_material": "", "author": "Zhiyuan Liu;An Zhang;Yu Sun;Yicong Li;Yaorui Shi;Sihang Li;Xiang Wang;Xiangnan He;Tat-Seng Chua", "authorids": "~Zhiyuan_Liu5;~An_Zhang2;~Yu_Sun15;~Yicong_Li1;~Yaorui_Shi2;~Sihang_Li1;~Xiang_Wang6;~Xiangnan_He2;~Tat-Seng_Chua2", "gender": "M;;M;;M;;M;;", "homepage": "https://acharkq.github.io/;;;https://yl3800.github.io/;;;https://github.com/xiangwang1223;;", "dblp": "53/3245-10;;;302/5062-4;;;31/2864-10;;", "google_scholar": "https://scholar.google.com.sg/citations?user=zF0AH64AAAAJ;;;BEsjkHgAAAAJ;EWU3rdIAAAAJ;;https://scholar.google.com.sg/citations?user=HdhaQB0AAAAJ;;", "orcid": ";;0000-0003-1092-9035;;;;0000-0002-6148-6329;;", "linkedin": ";;;;https://www.linkedin.cn/incareer/in/ACoAADX8m7MBB85jekmcqEP6gMuGa_pp35cLmbo;;;;", "or_profile": "~Zhiyuan_Liu5;~An_Zhang2;~Yu_Sun15;~Yicong_Li1;~Yaorui_Shi2;~Sihang_Li1;~Xiang_Wang6;~Xiangnan_He2;~Tat-Seng_Chua2", "aff": "National University of Singapore;;National University of Singapore;National University of Singapore;Xi'an Jiaotong University;;University of Science and Technology of China;;", "aff_domain": "nus.edu.sg;;u.nus.edu;u.nus.edu;xjtu.edu.cn;;ustc.edu.cn;;", "position": "PhD student;;MS student;PhD student;Undergrad student;;Full Professor;;", "bibtex": "@misc{\nliu2023towards,\ntitle={Towards Equivariant Graph Contrastive Learning via Cross-Graph Augmentation},\nauthor={Zhiyuan Liu and An Zhang and Yu Sun and Yicong Li and Yaorui Shi and Sihang Li and Xiang Wang and Xiangnan He and Tat-Seng Chua},\nyear={2023},\nurl={https://openreview.net/forum?id=9L1Ts8t66YK}\n}", "github": "", "project": "", "reviewers": "sATH;LCfW;itQe;f85p", "site": "https://openreview.net/forum?id=9L1Ts8t66YK", "pdf_size": 1218392, "recommendation": "3;6;6;8", "confidence": "4;3;4;4", "correctness": "3;3;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "0;2;3;4", "wc_summary_paper": "45;56;68;37", "wc_strength_and_weaknesses": "127;227;312;94", "wc_clarity_quality_novelty_and_reproducibility": "11;8;16;32", "wc_summary_review": "45;68;54;32", "wc_review": "228;359;450;195", "wc_reply_reviewers": "0;160;0;0", "wc_reply_authors": "1014;3244;781;1062", "reply_reviewers": "0;2;0;0", "reply_authors": "4;9;3;3", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 1.479019945774904 ], "wc_summary_paper_avg": [ 51.5, 11.672617529928752 ], "wc_strength_and_weaknesses_avg": [ 190.0, 85.78752823108962 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 16.75, 9.256754290786809 ], "wc_summary_review_avg": [ 49.75, 13.12202347201071 ], "wc_review_avg": [ 308.0, 102.38896424908302 ], "wc_reply_reviewers_avg": [ 40.0, 69.2820323027551 ], "wc_reply_authors_avg": [ 1525.25, 997.9963364161213 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 4.75, 2.48746859276655 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.08084520834544431, "corr_recommendation_correctness": 0.7276068751089989, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9777178389631179191&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "National University of Singapore;Xi'an Jiao Tong University;University of Science and Technology of China", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://www.xjtu.edu.cn;http://www.ustc.edu.cn", "aff_unique_abbr": "NUS;XJTU;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1", "aff_country_unique": "Singapore;China" }, { "id": "9MDjKb9lGi", "title": "The batch size can affect inference results", "track": "main", "status": "Reject", "tldr": "", "abstract": "When performing matrix multiplication using GPUs, the cuBLAS library is commonly used for computational efficiency. Because of the cuBLAS\u2019 heuristics, a vast, deep neural network model with GPUs may produce different test results owing to the batch sizes in both the training and inference stages. In this paper, we show that the batch size affects the inference results of deep neural network models. Our test models were the well-known bidirectional encoder representations from transformers (BERT) and generative pre-trained transformer (GPT) natural language processing (NLP) models, and the super-resolution generative adversarial network (SRGAN) image generation model in FP32 and TF32. In the TF32 setting, the evaluation loss in BERT using the general language understanding evaluation (GLUE) data sometimes varied for different batch sizes. The GPT generated sentences depending on batch size, and we show the logit's mean square error by increasing the token length. The SRGAN model produced different images from batch to batch. However, these phenomena were not observed under the FP32 setting. Therefore, the batch size must be carefully managed in large-sized deep neural networks under the TF32 setting.", "keywords": "Matrix operation;Floating-point;Batch size;GEMM", "primary_area": "", "supplementary_material": "", "author": "Yunkyung Park;Kyungsoo Kim;Deok-kyu Jang", "authorids": "~Yunkyung_Park1;~Kyungsoo_Kim1;dkjang@khu.ac.kr", "gender": ";;", "homepage": ";;", "dblp": ";;", "google_scholar": ";https://scholar.google.co.kr/citations?user=Za6a-YUAAAAJ;", "orcid": ";;", "linkedin": "yunkyung-park-936ab7228/;;", "or_profile": "~Yunkyung_Park1;~Kyungsoo_Kim1;dkjang@khu.ac.kr", "aff": "Kyung Hee University;Kyung Hee University;", "aff_domain": "khu.ac.kr;khu.ac.kr;", "position": "PhD student;Full Professor;", "bibtex": "@misc{\npark2023the,\ntitle={The batch size can affect inference results},\nauthor={Yunkyung Park and Kyungsoo Kim and Deok-kyu Jang},\nyear={2023},\nurl={https://openreview.net/forum?id=9MDjKb9lGi}\n}", "github": "", "project": "", "reviewers": "ReKx;YAmr;7wfT", "site": "https://openreview.net/forum?id=9MDjKb9lGi", "pdf_size": 253309, "recommendation": "1;3;3", "confidence": "4;3;5", "correctness": "2;4;3", "technical_novelty": "1;3;3", "empirical_novelty": "1;3;3", "wc_summary_paper": "40;70;30", "wc_strength_and_weaknesses": "110;31;129", "wc_clarity_quality_novelty_and_reproducibility": "178;218;12", "wc_summary_review": "43;27;24", "wc_review": "371;346;195", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_paper_avg": [ 46.666666666666664, 16.99673171197595 ], "wc_strength_and_weaknesses_avg": [ 90.0, 42.43426288586461 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 136.0, 89.18893802858439 ], "wc_summary_review_avg": [ 31.333333333333332, 8.339997335464536 ], "wc_review_avg": [ 304.0, 77.74745440634483 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8660254037844387, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BGyv5quq3Z0J:scholar.google.com/&scioq=The+batch+size+can+affect+inference+results&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Kyung Hee University", "aff_unique_dep": "", "aff_unique_url": "http://www.khu.ac.kr", "aff_unique_abbr": "KHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Self-Ensemble Protection: Training Checkpoints Are Good Data Protectors", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10872", "id": "9MO7bjoAfIA", "poster": "/media/PosterPDFs/ICLR%202023/10872.png?t=1681127946.6561382", "openreview": "https://openreview.net/forum?id=9MO7bjoAfIA", "slides": "https://iclr.cc/virtual/2023/poster/10872", "video": "https://iclr.cc/virtual/2023/poster/10872", "author_site": "Sizhe Chen, Geng Yuan, Xinwen Cheng, Yifan Gong, Minghai Qin, Yanzhi Wang, Xiaolin Huang", "tldr": "We protect proprietary datasets by using intermediate checkpoints in a self-ensemble way, which more than halves the testing accuracy in unauthorized training compared to the best baselines.", "abstract": "As data becomes increasingly vital, a company would be very cautious about releasing data, because the competitors could use it to train high-performance models, thereby posing a tremendous threat to the company's commercial competence. To prevent training good models on the data, we could add imperceptible perturbations to it. Since such perturbations aim at hurting the entire training process, they should reflect the vulnerability of DNN training, rather than that of a single model. Based on this new idea, we seek perturbed examples that are always unrecognized (never correctly classified) in training. In this paper, we uncover them by model checkpoints' gradients, forming the proposed self-ensemble protection (SEP), which is very effective because (1) learning on examples ignored during normal training tends to yield DNNs ignoring normal examples; (2) checkpoints' cross-model gradients are close to orthogonal, meaning that they are as diverse as DNNs with different architectures. That is, our amazing performance of ensemble only requires the computation of training one model. By extensive experiments with 9 baselines on 3 datasets and 5 architectures, SEP is verified to be a new state-of-the-art, e.g., our small $\\ell_\\infty=2/255$ perturbations reduce the accuracy of a CIFAR-10 ResNet18 from 94.56% to 14.68%, compared to 41.35% by the best-known method. Code is available at https://github.com/Sizhe-Chen/SEP.", "keywords": "data protection;poisoning attack;self-ensemble;deep neural network", "primary_area": "", "supplementary_material": "/attachment/428dc55cfc48c41c68dd5632cc47d5b3ed63bf5a.zip", "author": "Sizhe Chen;Geng Yuan;Xinwen Cheng;Yifan Gong;Minghai Qin;Yanzhi Wang;Xiaolin Huang", "authorids": "~Sizhe_Chen1;~Geng_Yuan1;~Xinwen_Cheng1;~Yifan_Gong2;~Minghai_Qin1;~Yanzhi_Wang3;~Xiaolin_Huang1", "gender": "M;M;F;F;M;M;M", "homepage": "https://sizhe-chen.github.io;;;https://yifanfanfanfan.github.io/;https://sites.google.com/site/minghaiqin/home;https://web.northeastern.edu/yanzhiwang/;http://www.pami.sjtu.edu.cn/en/xiaolin", "dblp": ";205/3007;;49/3073-4.html;;;61/2227", "google_scholar": ";tBIAgtgAAAAJ;;U_gevVgAAAAJ;MSgWKbYAAAAJ;https://scholar.google.com/citations?hl=en;DR-gBcEAAAAJ", "orcid": ";0000-0001-9844-992X;0000-0001-6080-0614;0000-0002-3912-097X;;;", "linkedin": ";;;yifan-gong-3059b8132/;;;", "or_profile": "~Sizhe_Chen1;~Geng_Yuan1;~Xinwen_Cheng1;~Yifan_Gong2;~Minghai_Qin1;~Yanzhi_Wang3;~Xiaolin_Huang1", "aff": "Shanghai Jiaotong University;Northeastern University;Shanghai Jiaotong University;Northeastern University;Western Digital Corporation;Northeastern University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;northeastern.edu;sjtu.edu.cn;neu.edu;wdc.com;northeastern.edu;sjtu.edu.cn", "position": "MS student;PhD student;PhD student;PhD student;senior technologist;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nchen2023selfensemble,\ntitle={Self-Ensemble Protection: Training Checkpoints Are Good Data Protectors},\nauthor={Sizhe Chen and Geng Yuan and Xinwen Cheng and Yifan Gong and Minghai Qin and Yanzhi Wang and Xiaolin Huang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9MO7bjoAfIA}\n}", "github": "", "project": "", "reviewers": "FTbn;bFJ9;JA31", "pdf_size": 2820308, "recommendation": "6;6;6", "confidence": "4;3;4", "correctness": "3;4;3", "technical_novelty": "3;2;2", "empirical_novelty": "3;2;3", "wc_summary_paper": "36;92;83", "wc_strength_and_weaknesses": "137;108;120", "wc_clarity_quality_novelty_and_reproducibility": "66;29;44", "wc_summary_review": "35;18;47", "wc_review": "274;247;294", "wc_reply_reviewers": "0;44;0", "wc_reply_authors": "734;905;737", "reply_reviewers": "0;1;0", "reply_authors": "2;3;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 70.33333333333333, 24.553795814270526 ], "wc_strength_and_weaknesses_avg": [ 121.66666666666667, 11.897712198383164 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.333333333333336, 15.195028426721974 ], "wc_summary_review_avg": [ 33.333333333333336, 11.897712198383164 ], "wc_review_avg": [ 271.6666666666667, 19.25847576753905 ], "wc_reply_reviewers_avg": [ 14.666666666666666, 20.741798914805393 ], "wc_reply_authors_avg": [ 792.0, 79.91245209602819 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3909749085867735425&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=9MO7bjoAfIA", "email": "sjtu.edu.cn;northeastern.edu;sjtu.edu.cn;neu.edu;wdc.com;northeastern.edu;sjtu.edu.cn", "author_num": 7, "aff_unique_index": "0;1;0;1;2;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Northeastern University;Western Digital Corporation", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.northeastern.edu;https://www.westerndigital.com", "aff_unique_abbr": "SJTU;NEU;WDC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;1;1;0", "aff_country_unique": "China;United States" }, { "title": "ImaginaryNet: Learning Object Detectors without Real Images and Annotations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11847", "id": "9MbhFHqrti9", "poster": "/media/PosterPDFs/ICLR%202023/11847.png?t=1681023990.4910338", "openreview": "https://openreview.net/forum?id=9MbhFHqrti9", "slides": "https://iclr.cc/virtual/2023/poster/11847", "video": "https://iclr.cc/virtual/2023/poster/11847", "author_site": "Minheng Ni, Zitong Huang, Kailai Feng, Wangmeng Zuo", "tldr": "This paper propose ImaginaryNet obtain about 70% performance in object detection trained without real images or annotations and improve the performance by incorporating real images and annotations.", "abstract": "Without the demand of training in reality, humans are able of detecting a new category of object simply based on the language description on its visual characteristics. Empowering deep learning with this ability undoubtedly enables the neural network to handle complex vision tasks, e.g., object detection, without collecting and annotating real images. To this end, this paper introduces a novel challenging learning paradigm Imaginary-Supervised Object Detection (ISOD), where neither real images nor manual annotations are allowed for training object detectors. To resolve this challenge, we propose ImaginaryNet, a framework to synthesize images by combining pretrained language model and text-to-image synthesis model. Given a class label, the language model is used to generate a full description of a scene with a target object, and the text-to-image model is deployed to generate a photo-realistic image. With the synthesized images and class labels, weakly supervised object detection can then be leveraged to accomplish ISOD. By gradually introducing real images and manual annotations, ImaginaryNet can collaborate with other supervision settings to further boost detection performance. Experiments show that ImaginaryNet can (i) obtain about 75% performance in ISOD compared with the weakly supervised counterpart of the same backbone trained on real data, (ii) significantly improve the baseline while achieving state-of-the-art or comparable performance by incorporating ImaginaryNet with other supervision settings. Our code will be publicly available at https://github.com/kodenii/ImaginaryNet.", "keywords": "Object detection;Visual synthesis;Generative model", "primary_area": "", "supplementary_material": "", "author": "Minheng Ni;Zitong Huang;Kailai Feng;Wangmeng Zuo", "authorids": "~Minheng_Ni1;~Zitong_Huang1;~Kailai_Feng1;~Wangmeng_Zuo3", "gender": "M;M;M;M", "homepage": "https://kodenii.github.io;https://scholar.google.com/citations?user=WHVC7kkAAAAJ&hl=zh-CN;https://github.com/carlofkl;", "dblp": "263/9969;69/10218;331/2340;93/2671", "google_scholar": "-ybr4_cAAAAJ;WHVC7kkAAAAJ;;rUOpCEYAAAAJ", "orcid": ";;;0000-0002-3330-783X", "linkedin": "https://linkedin.com/in/minheng-ni-7b8a99146;;;", "or_profile": "~Minheng_Ni1;~Zitong_Huang1;~Kailai_Feng1;~Wangmeng_Zuo3", "aff": "Microsoft;Megvii Technology Inc.;Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": "microsoft.com;megvii.com;hit.edu.cn;hit.edu.cn", "position": "Research Intern;Intern;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nni2023imaginarynet,\ntitle={ImaginaryNet: Learning Object Detectors without Real Images and Annotations},\nauthor={Minheng Ni and Zitong Huang and Kailai Feng and Wangmeng Zuo},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9MbhFHqrti9}\n}", "github": "", "project": "", "reviewers": "va3v;n5tS;w1NM;DNcv", "pdf_size": 3780897, "recommendation": "6;6;6;8", "confidence": "4;3;4;5", "correctness": "3;4;3;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "2;0;3;3", "wc_summary_paper": "81;83;102;92", "wc_strength_and_weaknesses": "137;189;86;326", "wc_clarity_quality_novelty_and_reproducibility": "59;110;69;16", "wc_summary_review": "119;73;85;11", "wc_review": "396;455;342;445", "wc_reply_reviewers": "61;0;107;18", "wc_reply_authors": "2277;703;2533;511", "reply_reviewers": "1;0;2;1", "reply_authors": "6;3;6;3", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 89.5, 8.32165848854662 ], "wc_strength_and_weaknesses_avg": [ 184.5, 89.4441166315594 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.5, 33.425289826716536 ], "wc_summary_review_avg": [ 72.0, 39.05124837953327 ], "wc_review_avg": [ 409.5, 44.91380634058975 ], "wc_reply_reviewers_avg": [ 46.5, 41.36725758374611 ], "wc_reply_authors_avg": [ 1506.0, 906.091055027032 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 4.5, 1.5 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.816496580927726, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15710408780350224235&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=9MbhFHqrti9", "email": "microsoft.com;megvii.com;hit.edu.cn;hit.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Microsoft;Megvii Technology;Harbin Institute of Technology", "aff_unique_dep": "Microsoft Corporation;;", "aff_unique_url": "https://www.microsoft.com;https://www.megvii.com;http://www.hit.edu.cn/", "aff_unique_abbr": "Microsoft;Megvii;HIT", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;China" }, { "id": "9MniHf5dmH", "title": "Label Distribution Learning via Implicit Distribution Representation", "track": "main", "status": "Reject", "tldr": "", "abstract": "In contrast to multi-label learning, label distribution learning characterizes the polysemy of examples by a label distribution to represent richer semantics. In the learning process of label distribution, the training data is collected mainly by manual annotation or label enhancement algorithms to generate label distribution. Unfortunately, the complexity of the manual annotation task or the inaccuracy of the label enhancement algorithm leads to noise and uncertainty in the label distribution training set. To alleviate this problem, we introduce the implicit distribution in the label distribution learning framework to characterize the uncertainty of each label value. Specifically, we use deep implicit representation learning to construct a label distribution matrix with Gaussian prior constraints, where each row component corresponds to the distribution estimate of each label value, and this row component is constrained by a prior Gaussian distribution to moderate the noise and uncertainty interference of the label distribution dataset. Finally, each row component of the label distribution matrix is transformed into a standard label distribution form by using the self-attention algorithm. In addition, some approaches with regularization characteristics are conducted in the training phase to improve the performance of the model.", "keywords": "label distribution learning;implicit distribution;Gaussian distribution;self-attention algorithm", "primary_area": "", "supplementary_material": "", "author": "Zhuoran Zheng;Xiuyi Jia", "authorids": "~Zhuoran_Zheng1;~Xiuyi_Jia1", "gender": "M;", "homepage": "https://zzr-idam.github.io/Zhuoranzheng.github.io;", "dblp": "293/8326;23/5047", "google_scholar": "pXzPL-sAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Zhuoran_Zheng1;~Xiuyi_Jia1", "aff": ";Nanjing University of Science and Technology", "aff_domain": ";njust.edu.cn", "position": ";Full Professor", "bibtex": "@misc{\nzheng2023label,\ntitle={Label Distribution Learning via Implicit Distribution Representation},\nauthor={Zhuoran Zheng and Xiuyi Jia},\nyear={2023},\nurl={https://openreview.net/forum?id=9MniHf5dmH}\n}", "github": "", "project": "", "reviewers": "eABN;7uiR;Paus;vTyL;JLrG", "site": "https://openreview.net/forum?id=9MniHf5dmH", "pdf_size": 508453, "recommendation": "5;5;5;6;8", "confidence": "3;3;3;3;4", "correctness": "3;2;3;3;4", "technical_novelty": "2;3;3;3;3", "empirical_novelty": "3;2;2;3;3", "wc_summary_paper": "67;106;69;100;179", "wc_strength_and_weaknesses": "181;965;93;153;306", "wc_clarity_quality_novelty_and_reproducibility": "8;73;115;50;49", "wc_summary_review": "20;73;60;60;15", "wc_review": "276;1217;337;363;549", "wc_reply_reviewers": "97;515;0;0;0", "wc_reply_authors": "1055;1619;640;372;288", "reply_reviewers": "1;2;0;0;0", "reply_authors": "3;5;2;2;1", "recommendation_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.8, 0.39999999999999997 ], "empirical_novelty_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_paper_avg": [ 104.2, 40.59261016490563 ], "wc_strength_and_weaknesses_avg": [ 339.6, 320.32458538176553 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.0, 34.968557305099104 ], "wc_summary_review_avg": [ 45.6, 23.482759633399137 ], "wc_review_avg": [ 548.4, 346.494213515897 ], "wc_reply_reviewers_avg": [ 122.4, 199.86255277064788 ], "wc_reply_authors_avg": [ 794.8, 491.2390049660145 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 2.6, 1.3564659966250538 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.9432422182837987, "corr_recommendation_correctness": 0.8134892168199606, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9421081079792660701&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Nanjing University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.nust.edu.cn/", "aff_unique_abbr": "NUST", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "9NHWYzbKHLd", "title": "Continuous Monte Carlo Graph Search", "track": "main", "status": "Withdraw", "tldr": "This paper proposes Continuous Monte Carlo Graph Search (CMCGS), a novel extension of MCTS to online planning in environments with continuous state and action spaces.", "abstract": "In many complex sequential decision making tasks, online planning is crucial for high-performance. For efficient online planning, Monte Carlo Tree Search (MCTS) employs a principled mechanism for trading off between exploration and exploitation. MCTS outperforms comparison methods in various discrete decision making domains such as Go, Chess, and Shogi. Following, extensions of MCTS to continuous domains have been proposed. However, the inherent high branching factor and the resulting explosion of search tree size is limiting existing methods. To solve this problem, this paper proposes Continuous Monte Carlo Graph Search (CMCGS), a novel extension of MCTS to online planning in environments with continuous state and action spaces. CMCGS takes advantage of the insight that, during planning, sharing the same action policy between several states can yield high performance. To implement this idea, at each time step CMCGS clusters similar states into a limited number of stochastic action bandit nodes, which produce a layered graph instead of an MCTS search tree. Experimental evaluation with limited sample budgets shows that CMCGS outperforms comparison methods in several complex continuous DeepMind Control Suite benchmarks and a 2D navigation task.", "keywords": "online planning;sequential decision making;monte carlo tree search;MCTS;continuous control", "primary_area": "", "supplementary_material": "/attachment/2d9bb8fa21d23e8b0b6da9d064d09e69c6d17d30.zip", "author": "Amin Babadi;Yi Zhao;Juho Kannala;Alexander Ilin;Joni Pajarinen", "authorids": "~Amin_Babadi1;~Yi_Zhao6;~Juho_Kannala5;~Alexander_Ilin1;~Joni_Pajarinen2", "gender": "M;M;M;M;", "homepage": ";https://zhaoyi11.github.io/;https://users.aalto.fi/~kannalj1/;https://users.aalto.fi/~alexilin/;", "dblp": ";51/4138-1;47/4656.html;85/5835;23/8355", "google_scholar": "https://scholar.google.fi/citations?user=lP0ai_AAAAAJ;https://scholar.google.com/citations?hl=en;c4mWQPQAAAAJ;i2gcTBQAAAAJ;https://scholar.google.fi/citations?user=-2fJStwAAAAJ", "orcid": ";0009-0002-9979-595X;0000-0001-5088-4041;;0000-0003-4469-8191", "linkedin": "amin-babadi-8442a9132/;;;alexanderilin/;", "or_profile": "~Amin_Babadi1;~Yi_Zhao6;~Juho_Kannala5;~Alexander_Ilin1;~Joni_Pajarinen2", "aff": ";Aalto University;Aalto University;Aalto University;Aalto University", "aff_domain": ";aalto.fi;aalto.fi;aalto.fi;aalto.fi", "position": ";PhD student;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nbabadi2023continuous,\ntitle={Continuous Monte Carlo Graph Search},\nauthor={Amin Babadi and Yi Zhao and Juho Kannala and Alexander Ilin and Joni Pajarinen},\nyear={2023},\nurl={https://openreview.net/forum?id=9NHWYzbKHLd}\n}", "github": "", "project": "", "reviewers": "FVMA;4vMm;YoX3", "site": "https://openreview.net/forum?id=9NHWYzbKHLd", "pdf_size": 2370703, "recommendation": "3;3;5", "confidence": "3;5;3", "correctness": "3;3;3", "technical_novelty": "3;1;3", "empirical_novelty": "1;1;2", "wc_summary_paper": "123;34;73", "wc_strength_and_weaknesses": "464;167;241", "wc_clarity_quality_novelty_and_reproducibility": "83;57;50", "wc_summary_review": "123;38;57", "wc_review": "793;296;421", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_summary_paper_avg": [ 76.66666666666667, 36.42648609032841 ], "wc_strength_and_weaknesses_avg": [ 290.6666666666667, 126.23346976491176 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.333333333333336, 14.197026292697903 ], "wc_summary_review_avg": [ 72.66666666666667, 36.42648609032841 ], "wc_review_avg": [ 503.3333333333333, 211.08660676498536 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5000000000000001, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17157513411576742780&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Aalto University", "aff_unique_dep": "", "aff_unique_url": "https://www.aalto.fi", "aff_unique_abbr": "Aalto", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Finland" }, { "title": "Leveraging Importance Weights in Subset Selection", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11171", "id": "9Nj_gNdvqYf", "poster": "/media/PosterPDFs/ICLR%202023/11171.png?t=1681410137.5612319", "openreview": "https://openreview.net/forum?id=9Nj_gNdvqYf", "slides": "https://iclr.cc/virtual/2023/poster/11171", "video": "https://iclr.cc/virtual/2023/poster/11171", "author_site": "Gui Citovsky, Giulia DeSalvo, Sanjiv Kumar, Srikumar Ramalingam, Afshin Rostamizadeh, Yunjuan Wang", "tldr": "", "abstract": "We present a subset selection algorithm designed to work with arbitrary model families in a practical batch setting. In such a setting, an algorithm can sample examples one at a time but, in order to limit overhead costs, is only able to update its state (i.e. further train model weights) once a large enough batch of examples is selected. Our algorithm, IWeS, selects examples by importance sampling where the sampling probability assigned to each example is based on the entropy of models trained on previously selected batches. IWeS admits significant performance improvement compared to other subset selection algorithms for seven publicly available datasets. Additionally, it is competitive in an active learning setting, where the label information is not available at selection time. We also provide an initial theoretical analysis to support our importance weighting approach, proving generalization and sampling rate bounds.", "keywords": "data subset selection;importance weighted sampling", "primary_area": "", "supplementary_material": "", "author": "Gui Citovsky;Giulia DeSalvo;Sanjiv Kumar;Srikumar Ramalingam;Afshin Rostamizadeh;Yunjuan Wang", "authorids": "~Gui_Citovsky1;~Giulia_DeSalvo1;~Sanjiv_Kumar1;~Srikumar_Ramalingam2;~Afshin_Rostamizadeh1;~Yunjuan_Wang1", "gender": ";F;;M;;F", "homepage": ";https://research.google/people/106565/;http://www.sanjivk.com/;https://www.cs.utah.edu/~srikumar/;;https://yunjuanwang.github.io/", "dblp": "https://dblp.uni-trier.de/pid/156/1823.html;169/0277;;17/4216;97/4479;31/560", "google_scholar": "trYRB5oAAAAJ;;https://scholar.google.com/citations?hl=en;6m1ptOgAAAAJ;;t_VSEEwAAAAJ", "orcid": ";;;;;", "linkedin": ";;;srikumar-ramalingam-17728b22/;;yunjuan-wang-12ab85169/", "or_profile": "~Gui_Citovsky1;~Giulia_DeSalvo1;~Sanjiv_Kumar1;~Srikumar_Ramalingam2;~Afshin_Rostamizadeh1;~Yunjuan_Wang1", "aff": "Google;Google;Google;Google;Google;Johns Hopkins University", "aff_domain": "google.com;google.com;google.com;google.com;google.com;jhu.edu", "position": "Software Engineer;Research Scientist;Research Scientist;Research Scientist;Researcher;PhD student", "bibtex": "@inproceedings{\ncitovsky2023leveraging,\ntitle={Leveraging Importance Weights in Subset Selection},\nauthor={Gui Citovsky and Giulia DeSalvo and Sanjiv Kumar and Srikumar Ramalingam and Afshin Rostamizadeh and Yunjuan Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9Nj_gNdvqYf}\n}", "github": "", "project": "", "reviewers": "j2Bp;q4Qj;EWd7;7Jex;DSZY", "pdf_size": 3512706, "recommendation": "3;6;6;8;8", "confidence": "4;4;3;4;3", "correctness": "2;3;3;4;4", "technical_novelty": "2;3;2;3;3", "empirical_novelty": "2;3;3;2;3", "wc_summary_paper": "47;39;134;76;57", "wc_strength_and_weaknesses": "203;80;282;157;64", "wc_clarity_quality_novelty_and_reproducibility": "56;184;91;44;105", "wc_summary_review": "29;71;55;61;22", "wc_review": "335;374;562;338;248", "wc_reply_reviewers": "0;0;174;39;0", "wc_reply_authors": "560;242;209;156;0", "reply_reviewers": "0;0;1;1;0", "reply_authors": "1;1;1;1;0", "recommendation_avg": [ 6.2, 1.8330302779823362 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "correctness_avg": [ 3.2, 0.7483314773547882 ], "technical_novelty_avg": [ 2.6, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_paper_avg": [ 70.6, 34.027048064738146 ], "wc_strength_and_weaknesses_avg": [ 157.2, 80.39751239932738 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 96.0, 49.30314391598167 ], "wc_summary_review_avg": [ 47.6, 18.8849146145806 ], "wc_review_avg": [ 371.4, 103.92612760995186 ], "wc_reply_reviewers_avg": [ 42.6, 67.41394514490308 ], "wc_reply_authors_avg": [ 233.4, 183.19563313572735 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 0.8, 0.4 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.3563483225498992, "corr_recommendation_correctness": 0.9914601339836673, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8010779852766839937&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=9Nj_gNdvqYf", "email": "google.com;google.com;google.com;google.com;google.com;jhu.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Google;Johns Hopkins University", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.jhu.edu", "aff_unique_abbr": "Google;JHU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "9NzCUqU7i1", "title": "Learning from Interval-valued Data", "track": "main", "status": "Reject", "tldr": "Learn a classifier with interval-valued observations using multi-view learning.", "abstract": "The classification problem concerning crisp-valued data has been well resolved. However, interval-valued data, where all of the observations\u2019 features are described by intervals, is also a common type of data in real-world scenarios. For example, the data extracted by many measuring devices are not exact numbers but intervals. In this paper, we focus on a highly challenging problem called learning from interval-valued data (LIND), where we aim to learn a classifier with high performance on interval-valued observations. First, we obtain the estimation error bound of the LIND problem based on Rademacher complexity. Then, we give the theoretical analysis to show the strengths of multi-view learning on classification problems, which inspires us to construct a new framework called multi-view interval information extraction (Mv-IIE) approach for improving classification accuracy on interval-valued data. The experiment comparisons with several baselines on both synthetic and real-world datasets illustrate the superiority of the proposed framework in handling interval-valued data. Moreover, we describe an application of the Mv-IIE framework that we can prevent data privacy leakage by transforming crisp-valued (raw) data into interval-valued data.", "keywords": "Machine learning;Interval-valued data;Classification", "primary_area": "", "supplementary_material": "/attachment/8a7914b5fb7a070366a8d12ce7024b5d2bb59e70.zip", "author": "Guangzhi Ma;Jie Lu;Zhen Fang;Feng Liu;Guangquan Zhang", "authorids": "~Guangzhi_Ma1;~Jie_Lu3;~Zhen_Fang2;~Feng_Liu2;~Guangquan_Zhang2", "gender": "M;;M;M;", "homepage": ";;https://fang-zhen.github.io/index.html;https://fengliu90.github.io/index.html;", "dblp": ";;;77/1318-3;", "google_scholar": ";;OzD6WJcAAAAJ;https://scholar.google.com/citations?hl=en;_1RMrhsAAAAJ", "orcid": " 0000-0001-5726-1672;;0000-0003-0602-6255;0000-0002-5005-9129;", "linkedin": ";;;alexfengliu;", "or_profile": "~Guangzhi_Ma1;~Jie_Lu3;~Zhen_Fang2;~Feng_Liu2;~Guangquan_Zhang2", "aff": "University of Technology Sydney;;University of Technology Sydney;University of Melbourne;University of Technology Sydney (UTS)", "aff_domain": "uts.edu.au;;uts.edu.au;unimelb.edu.au;uts.eud.au", "position": "PhD student;;Postdoc;Assistant Professor;Associate Professor", "bibtex": "@misc{\nma2023learning,\ntitle={Learning from Interval-valued Data},\nauthor={Guangzhi Ma and Jie Lu and Zhen Fang and Feng Liu and Guangquan Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=9NzCUqU7i1}\n}", "github": "", "project": "", "reviewers": "4H3Z;Nqqb;mFK1", "site": "https://openreview.net/forum?id=9NzCUqU7i1", "pdf_size": 1079378, "recommendation": "3;3;8", "confidence": "3;4;4", "correctness": "3;3;4", "technical_novelty": "3;2;4", "empirical_novelty": "3;1;4", "wc_summary_paper": "86;83;139", "wc_strength_and_weaknesses": "432;158;321", "wc_clarity_quality_novelty_and_reproducibility": "108;78;28", "wc_summary_review": "67;41;12", "wc_review": "693;360;500", "wc_reply_reviewers": "0;154;0", "wc_reply_authors": "2099;902;1037", "reply_reviewers": "0;1;0", "reply_authors": "5;3;2", "recommendation_avg": [ 4.666666666666667, 2.357022603955158 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.6666666666666665, 1.247219128924647 ], "wc_summary_paper_avg": [ 102.66666666666667, 25.72072229848057 ], "wc_strength_and_weaknesses_avg": [ 303.6666666666667, 112.52950230445742 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 71.33333333333333, 32.99831645537222 ], "wc_summary_review_avg": [ 40.0, 22.464787260658994 ], "wc_review_avg": [ 517.6666666666666, 136.51943288614834 ], "wc_reply_reviewers_avg": [ 51.333333333333336, 72.59629620181887 ], "wc_reply_authors_avg": [ 1346.0, 535.2961796986785 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 1.247219128924647 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5000000000000001, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vk3nM82zRgoJ:scholar.google.com/&scioq=Learning+from+Interval-valued+Data&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Technology Sydney;University of Melbourne", "aff_unique_dep": ";", "aff_unique_url": "https://www.uts.edu.au;https://www.unimelb.edu.au", "aff_unique_abbr": "UTS;UniMelb", "aff_campus_unique_index": "1", "aff_campus_unique": ";Sydney", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "id": "9OEW_t2uO4u", "title": "CLIP-PAE: Projection-Augmentation Embedding to Extract Relevant Features for a Disentangled, Interpretable and Controllable Text-Guided Image Manipulation", "track": "main", "status": "Reject", "tldr": "We propose a novel approach to enforce better disentanglement, interpretability and controllability for text-guided image manipulation.", "abstract": "Recently introduced Contrastive Language-Image Pre-Training (CLIP) bridges images and text by embedding them into a joint latent space. This opens the door to ample literature that aims to manipulate an input image by providing a textual explanation. However, due to the discrepancy between image and text embeddings in the joint space, using text embeddings as the optimization target often introduces undesired artifacts in the resulting images. Disentanglement, interpretability, and controllability are also hard to guarantee for manipulation. To alleviate these problems, we propose to define corpus subspaces spanned by prompts to capture specific image characteristics. We introduce CLIP projection-augmentation embedding (PAE) as an optimization target to improve the performance of text-guided image manipulation. Our method is a simple and general paradigm that can be easily computed and adapted, and smoothly incorporated into any CLIP-based latent manipulation algorithm to improve performance. To demonstrate the effectiveness of our method, we conduct several theoretical and empirical system studies. As a case study, we utilize the method for text-guided semantic face editing. We quantitatively and qualitatively demonstrate that PAE facilitates a more disentangled, interpretable, and controllable image manipulation method with state of the art quality and accuracy.", "keywords": "computer vision;text-guided image manipulation;latent manipulation", "primary_area": "", "supplementary_material": "/attachment/cfa2ec327c46c1a89fb39375da1ee1581509a572.zip", "author": "Chenliang Zhou;Fangcheng Zhong;Cengiz Oztireli", "authorids": "~Chenliang_Zhou1;~Fangcheng_Zhong1;~Cengiz_Oztireli1", "gender": ";;", "homepage": ";https://www.cl.cam.ac.uk/~fz261/;", "dblp": ";253/0188;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";fangcheng-zhong-125b9a85/;", "or_profile": "~Chenliang_Zhou1;~Fangcheng_Zhong1;~Cengiz_Oztireli1", "aff": ";University of Cambridge;", "aff_domain": ";cam.ac.uk;", "position": ";Researcher;", "bibtex": "@misc{\nzhou2023clippae,\ntitle={{CLIP}-{PAE}: Projection-Augmentation Embedding to Extract Relevant Features for a Disentangled, Interpretable and Controllable Text-Guided Image Manipulation},\nauthor={Chenliang Zhou and Fangcheng Zhong and Cengiz Oztireli},\nyear={2023},\nurl={https://openreview.net/forum?id=9OEW_t2uO4u}\n}", "github": "", "project": "", "reviewers": "62cf;Qecw;DXMN;WWDd", "site": "https://openreview.net/forum?id=9OEW_t2uO4u", "pdf_size": 9534532, "recommendation": "5;5;5;6", "confidence": "4;4;4;3", "correctness": "3;3;3;3", "technical_novelty": "2;2;2;4", "empirical_novelty": "0;0;2;3", "wc_summary_paper": "70;59;68;46", "wc_strength_and_weaknesses": "181;541;156;362", "wc_clarity_quality_novelty_and_reproducibility": "30;134;17;18", "wc_summary_review": "28;61;14;30", "wc_review": "309;795;255;456", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "331;784;531;824", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;2", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 1.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 60.75, 9.470348462437906 ], "wc_strength_and_weaknesses_avg": [ 310.0, 155.25946025927053 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.75, 48.90999386628463 ], "wc_summary_review_avg": [ 33.25, 17.166464400102893 ], "wc_review_avg": [ 453.75, 210.30379811120864 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 617.5, 199.95562007605588 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4384829099178201231&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "id": "9OL2fIfDLK", "title": "ESC: A Benchmark For Multi-Domain End-to-End Speech Recognition", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Speech recognition applications cover a range of different audio and text distributions, with different speaking styles, background noise, transcription punctuation and character casing. However, many speech recognition systems require dataset-specific tuning (audio filtering, punctuation removal and normalisation of casing), therefore assuming a-priori knowledge of both the audio and text distributions. This tuning requirement can lead to systems failing to generalise to other datasets and domains. To promote the development of multi-domain speech systems, we introduce the End-to end Speech Challenge (ESC) for evaluating the performance of a single automatic speech recognition (ASR) system across a broad set of speech datasets. Benchmarked systems must use the same data pre- and post-processing algorithm across datasets - assuming the audio and text data distributions are a-priori unknown. We compare a series of state-of-the-art (SoTA) end-to-end (E2E) systems on this benchmark, demonstrating how a single speech system can be applied and evaluated on a wide range of data distributions. We find E2E systems to be effective across datasets: in a fair comparison, E2E systems achieve within 2.6% of SoTA systems tuned to a specific dataset. Our analysis reveals that transcription artefacts, such as punctuation and casing, pose difficulties for ASR systems and should be included in evaluation. We believe E2E benchmarking over a range of datasets promotes the research of multi-domain speech recognition systems.", "keywords": "speech;end-to-end;evaluation;benchmark", "primary_area": "", "supplementary_material": "", "author": "Sanchit Gandhi;Patrick Von Platen;Alexander M Rush", "authorids": "~Sanchit_Gandhi1;~Patrick_Von_Platen1;~Alexander_M_Rush1", "gender": "M;M;M", "homepage": ";;http://rush.seas.harvard.edu/", "dblp": ";;http://dblp.uni-trier.de/pers/hd/r/Rush:Alexander_M=", "google_scholar": "etOmu7gAAAAJ;;LIjnUGgAAAAJ", "orcid": ";;0000-0002-9900-1606", "linkedin": ";https://www.linkedin.com/;sasha-rush-a69b6917/", "or_profile": "~Sanchit_Gandhi1;~Patrick_Von_Platen1;~Alexander_M_Rush1", "aff": "Hugging Face;;School of Engineering and Applied Sciences, Harvard University", "aff_domain": "huggingface.co;;seas.harvard.edu", "position": "Researcher;;Assistant Professor", "bibtex": "@misc{\ngandhi2023esc,\ntitle={{ESC}: A Benchmark For Multi-Domain End-to-End Speech Recognition},\nauthor={Sanchit Gandhi and Patrick Von Platen and Alexander M Rush},\nyear={2023},\nurl={https://openreview.net/forum?id=9OL2fIfDLK}\n}", "github": "", "project": "", "reviewers": "KonF;aTqQ;PhhZ;h1XK", "site": "https://openreview.net/forum?id=9OL2fIfDLK", "pdf_size": 255837, "recommendation": "3;3;5;5", "confidence": "2;5;4;4", "correctness": "4;4;4;3", "technical_novelty": "1;2;1;2", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "36;72;120;45", "wc_strength_and_weaknesses": "52;58;142;82", "wc_clarity_quality_novelty_and_reproducibility": "167;12;93;16", "wc_summary_review": "28;15;151;168", "wc_review": "283;157;506;311", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "356;74;368;607", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 1.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 68.25, 32.683137854251385 ], "wc_strength_and_weaknesses_avg": [ 83.5, 35.59143155311401 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 72.0, 63.64353855655733 ], "wc_summary_review_avg": [ 90.5, 69.41361537911709 ], "wc_review_avg": [ 314.25, 124.98274880958571 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 351.25, 188.79800713990602 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.22941573387056177, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5085170785538617498&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Hugging Face;Harvard University", "aff_unique_dep": ";School of Engineering and Applied Sciences", "aff_unique_url": "https://huggingface.co;https://www.harvard.edu", "aff_unique_abbr": "Hugging Face;Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "AE-FLOW: Autoencoders with Normalizing Flows for Medical Images Anomaly Detection", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11150", "id": "9OmCr1q54Z", "poster": "/media/PosterPDFs/ICLR%202023/11150.png?t=1680703583.8031437", "openreview": "https://openreview.net/forum?id=9OmCr1q54Z", "slides": "https://iclr.cc/virtual/2023/poster/11150", "video": "https://iclr.cc/virtual/2023/poster/11150", "author_site": "Yuzhong Zhao, Qiaoqiao Ding, Xiaoqun Zhang", "tldr": "We propose a normalizing flow based autoencoder for medical anomaly detection and it outperformed the other approaches by a large margin.", "abstract": "Anomaly detection from medical images is an important task for clinical screening and diagnosis. In general, a large dataset of normal images are available while only few abnormal images can be collected in clinical practice. By mimicking the diagnosis process of radiologists, we attempt to tackle this problem by learning a tractable distribution of normal images and identify anomalies by differentiating the original image and the reconstructed normal image. More specifically, we propose a normalizing flow-based autoencoder for an efficient and tractable representation of normal medical images. The anomaly score consists of the likelihood originated from the normalizing flow and the reconstruction error of the autoencoder, which allows to identify the abnormality and provide an interpretability at both image and pixel levels. Experimental evaluation on two medical images datasets showed that the proposed model outperformed the other approaches by a large margin, which validated the effectiveness and robustness of the proposed method.", "keywords": "Anomaly Detection;Normalizing Flow;Auto-encoder.", "primary_area": "", "supplementary_material": "", "author": "Yuzhong Zhao;Qiaoqiao Ding;Xiaoqun Zhang", "authorids": "~Yuzhong_Zhao2;~Qiaoqiao_Ding1;~Xiaoqun_Zhang1", "gender": ";F;F", "homepage": ";https://qqding.github.io;http://math.sjtu.edu.cn/faculty/xqzhang/", "dblp": ";272/5499;", "google_scholar": ";https://scholar.google.com.sg/citations?user=3WYB4xAAAAAJ;", "orcid": "0000-0001-6818-3649;;", "linkedin": ";;", "or_profile": "~Yuzhong_Zhao2;~Qiaoqiao_Ding1;~Xiaoqun_Zhang1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "MS student;Researcher;Full Professor", "bibtex": "@inproceedings{\nzhao2023aeflow,\ntitle={{AE}-{FLOW}: Autoencoders with Normalizing Flows for Medical Images Anomaly Detection },\nauthor={Yuzhong Zhao and Qiaoqiao Ding and Xiaoqun Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9OmCr1q54Z}\n}", "github": "", "project": "", "reviewers": "Ngn6;hWE4;XDoM", "pdf_size": 13311408, "recommendation": "6;6;8", "confidence": "3;3;4", "correctness": "2;2;3", "technical_novelty": "2;2;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "97;46;188", "wc_strength_and_weaknesses": "140;60;210", "wc_clarity_quality_novelty_and_reproducibility": "67;68;7", "wc_summary_review": "38;132;21", "wc_review": "342;306;426", "wc_reply_reviewers": "29;359;0", "wc_reply_authors": "784;254;353", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 110.33333333333333, 58.73291713813946 ], "wc_strength_and_weaknesses_avg": [ 136.66666666666666, 61.28258770283412 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.333333333333336, 28.522895287041877 ], "wc_summary_review_avg": [ 63.666666666666664, 48.8148429157453 ], "wc_review_avg": [ 358.0, 50.27922035990614 ], "wc_reply_reviewers_avg": [ 129.33333333333334, 162.8298361958138 ], "wc_reply_authors_avg": [ 463.6666666666667, 230.08742299878588 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.9999999999999998, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2699074461662533529&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=9OmCr1q54Z", "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "9OoFFWDPDQ", "title": "Delving into the Openness of CLIP", "track": "main", "status": "Reject", "tldr": "", "abstract": "Contrastive Language-Image Pre-training (CLIP) has demonstrated great potential in realizing open-vocabulary visual recognition in a matching style, due to its holistic use of natural language supervision that covers unconstrained real-world visual concepts. However, it is, in turn, also difficult to evaluate and analyze the openness of CLIP-like models, since they are in theory open to any vocabulary but the actual accuracy varies. To address the insufficiency of conventional studies on openness, we resort to an incremental perspective and define the extensibility, which essentially approximates the model's ability to deal with new visual concepts, by evaluating openness through vocabulary expansions. Our evaluation based on extensibility shows that CLIP-like models are hardly truly open and their performances degrade as the vocabulary expands to different degrees. Further analysis reveals that the over-estimation of openness is not because CLIP-like models fail to capture the general similarity of image and text features of novel visual concepts, but because of the confusion among competing text features, that is, they are not stable with respect to the vocabulary. In light of this, we propose to improve the openness of CLIP in feature space by enforcing the distinguishability of text features. Our method retrieves relevant texts from the pre-training corpus to enhance prompts for inference, which boosts the extensibility and stability of CLIP even without fine-tuning.", "keywords": "Contrastive Language-Image Pre-training;CLIP;Openness;Vision-and-Language", "primary_area": "", "supplementary_material": "/attachment/dc9e615ebc4714ac5f0dc2dda46c13bba89cee2d.zip", "author": "Shuhuai Ren;Lei Li;Xuancheng Ren;Guangxiang Zhao;Xu Sun", "authorids": "~Shuhuai_Ren1;~Lei_Li14;~Xuancheng_Ren1;~Guangxiang_Zhao3;~Xu_Sun1", "gender": "M;;;;M", "homepage": "https://renshuhuai-andy.github.io/;;;;https://xusun.org/", "dblp": "50/9511.html;;;;37/1971-1", "google_scholar": "https://scholar.google.com.hk/citations?user=3X8yS-cAAAAJ;;;;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": "shuhuai-ren-69580817a/;;;;", "or_profile": "~Shuhuai_Ren1;~Lei_Li14;~Xuancheng_Ren1;~Guangxiang_Zhao3;~Xu_Sun1", "aff": "Peking University;;;;Peking University", "aff_domain": "pku.edu.cn;;;;pku.edu.cn", "position": "PhD student;;;;Associate Professor", "bibtex": "@misc{\nren2023delving,\ntitle={Delving into the Openness of {CLIP}},\nauthor={Shuhuai Ren and Lei Li and Xuancheng Ren and Guangxiang Zhao and Xu Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=9OoFFWDPDQ}\n}", "github": "", "project": "", "reviewers": "hc5X;c4Um;8DmZ;No3A", "site": "https://openreview.net/forum?id=9OoFFWDPDQ", "pdf_size": 1672879, "recommendation": "5;5;5;6", "confidence": "4;4;5;4", "correctness": "1;2;4;4", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "72;63;413;111", "wc_strength_and_weaknesses": "409;273;551;179", "wc_clarity_quality_novelty_and_reproducibility": "14;39;94;111", "wc_summary_review": "57;46;78;137", "wc_review": "552;421;1136;538", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1587;1698;1433;755", "reply_reviewers": "0;0;0;0", "reply_authors": "3;3;3;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 1.299038105676658 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 164.75, 144.4582552158235 ], "wc_strength_and_weaknesses_avg": [ 353.0, 140.54892386638895 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.5, 39.474675426151386 ], "wc_summary_review_avg": [ 79.5, 35.131894341182345 ], "wc_review_avg": [ 661.75, 278.49270636768927 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1368.25, 366.3518629678304 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4002508575139143484&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "9Q7wZ0Uq4Z6", "title": "Policy-Induced Self-Supervision Improves Representation Finetuning in Visual RL", "track": "main", "status": "Reject", "tldr": "We study the transfer of visual representations in RL, show that they can be partially frozen, and propose a self-supervised method to accelerate their finetuning.", "abstract": "We study how to transfer representations pretrained on source tasks to target tasks in visual percept based RL. We analyze two popular approaches: freezing or finetuning the pretrained representations. Empirical studies on a set of popular tasks reveal several properties of pretrained representations. First, finetuning is required even when pretrained representations perfectly capture the information required to solve the target task. Second, finetuned representations improve learnability and are more robust to noise.\nThird, pretrained bottom layers are task-agnostic and readily transferable to new tasks, while top layers encode task-specific information and require adaptation. Building on these insights, we propose a self-supervised objective that \\emph{clusters representations according to the policy they induce}, as opposed to traditional representation similarity measures which are policy-agnostic (\\eg Euclidean norm, cosine similarity). Together with freezing the bottom layers, this objective results in significantly better representation than frozen, finetuned, and self-supervised alternatives on a wide range of benchmarks.", "keywords": "visual reinforcement learning;representation learning;transfer learning", "primary_area": "", "supplementary_material": "/attachment/661eadc7628200ece9361f707066346f85971a94.zip", "author": "S\u00e9b Arnold;Fei Sha", "authorids": "~S\u00e9b_Arnold1;~Fei_Sha3", "gender": "M;", "homepage": "http://feisha.org;http://sebarnold.net", "dblp": "13/3601;206/7057", "google_scholar": "HDHOS0QAAAAJ;qoEFB7UAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Fei_Sha2;~Sebastien_Arnold1", "aff": "Google;University of Southern California", "aff_domain": "google.com;usc.edu", "position": "research scientist;PhD student", "bibtex": "@misc{\narnold2023policyinduced,\ntitle={Policy-Induced Self-Supervision Improves Representation Finetuning in Visual {RL}},\nauthor={S{\\'e}b Arnold and Fei Sha},\nyear={2023},\nurl={https://openreview.net/forum?id=9Q7wZ0Uq4Z6}\n}", "github": "", "project": "", "reviewers": "ZmQR;b3ER;nmYq", "site": "https://openreview.net/forum?id=9Q7wZ0Uq4Z6", "pdf_size": 1840043, "recommendation": "3;6;6", "confidence": "4;3;4", "correctness": "4;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;2;2", "wc_summary_paper": "48;95;90", "wc_strength_and_weaknesses": "160;251;1055", "wc_clarity_quality_novelty_and_reproducibility": "16;51;34", "wc_summary_review": "26;36;129", "wc_review": "250;433;1308", "wc_reply_reviewers": "0;29;0", "wc_reply_authors": "432;768;810", "reply_reviewers": "0;1;0", "reply_authors": "1;1;2", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 77.66666666666667, 21.076579946049648 ], "wc_strength_and_weaknesses_avg": [ 488.6666666666667, 402.177683230794 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.666666666666664, 14.29063407348401 ], "wc_summary_review_avg": [ 63.666666666666664, 46.37767662236746 ], "wc_review_avg": [ 663.6666666666666, 461.69711090954667 ], "wc_reply_reviewers_avg": [ 9.666666666666666, 13.67073110293992 ], "wc_reply_authors_avg": [ 670.0, 169.162643630324 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1N3XCECjhiMJ:scholar.google.com/&scioq=Policy-Induced+Self-Supervision+Improves+Representation+Finetuning+in+Visual+RL&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Google;University of Southern California", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.usc.edu", "aff_unique_abbr": "Google;USC", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Mountain View;Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "9RDD2hefT94", "title": "An interpretable contrastive logical knowledge learning method for sentiment analysis", "track": "main", "status": "Withdraw", "tldr": "We present a novel contrastive logical knowledge learning (CLK) method to learn interpretable TPK models and generate explanations for sentiment analysis tasks. ", "abstract": "Current interpretable sentiment analysis (ISA) methods frequently underperform state-of-the-art models, and few of them cast light on the inner working of pre-trained models. In this work, we fill the gap by addressing four key research challenges in ISA\u2014knowledge acquisition, knowledge representation, knowledge learning and knowledge reasoning\u2014in one unified framework. Theoretically, we propose a novel contrasitive logical knowledge learning (CLK) framework that can visualize the decisions made through deterministic Talmudic public announcement logic semantics. We apply CLK to current popular sentiment analysis models to obtain CLK based interpretable ones. Empirically, experimental results of both binary sentiment analysis tasks and fine-grained sentiment analysis tasks indicate that CLK can achieve an effective trade-off between accuracy and interpretability. Furthermore, we find that CLK can reduce the uncertainty of logical knowledge for discriminative labels by visualizing the learned feature representations and model output. Besides, we carry out a case study to investigate the fidelity of model interpretability through knowledge reasoning, which demonstrates that the explanations provided by our method are reasonable and consistent for sentiment analysis tasks. ", "keywords": "interpretable sentiment analysis;Talmudic public announcement logic;contrastive logical knowledge learning;knowledge reasoning", "primary_area": "", "supplementary_material": "", "author": "Yulin Chen;Bo Yuan;Dongheng Chen;Dov Gabbay;Beishui Liao", "authorids": "sylvia_cyl@qq.com;byuan.186@qq.com;chen_dongheng@qq.com;dov.gabbay@kcl.ac.uk;baiseliao@zju.edu.cn", "gender": ";;;;", "homepage": ";;;;", "dblp": ";;;;", "google_scholar": ";;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": ";;;;", "aff": ";;;;", "aff_domain": ";;;;", "position": ";;;;", "bibtex": "@misc{\nchen2023an,\ntitle={An interpretable contrastive logical knowledge learning method for sentiment analysis},\nauthor={Yulin Chen and Bo Yuan and Dongheng Chen and Dov Gabbay and Beishui Liao},\nyear={2023},\nurl={https://openreview.net/forum?id=9RDD2hefT94}\n}", "github": "", "project": "", "reviewers": "dFzD;cdQ3;aDFb", "site": "https://openreview.net/forum?id=9RDD2hefT94", "pdf_size": 1439986, "recommendation": "3;3;5", "confidence": "4;2;3", "correctness": "2;2;3", "technical_novelty": "2;3;4", "empirical_novelty": "2;2;4", "wc_summary_paper": "817;77;34", "wc_strength_and_weaknesses": "2;280;73", "wc_clarity_quality_novelty_and_reproducibility": "2;74;13", "wc_summary_review": "39;43;25", "wc_review": "860;474;145", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_paper_avg": [ 309.3333333333333, 359.40351819214135 ], "wc_strength_and_weaknesses_avg": [ 118.33333333333333, 117.93312605973873 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.666666666666668, 31.668421004036322 ], "wc_summary_review_avg": [ 35.666666666666664, 7.717224601860151 ], "wc_review_avg": [ 493.0, 292.2065479530989 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PDp99rnmy94J:scholar.google.com/&scioq=An+interpretable+contrastive+logical+knowledge+learning+method+for+sentiment+analysis&hl=en&as_sdt=0,23", "gs_version_total": 0 }, { "id": "9RHjy5oHmfe", "title": "EIT: Enhanced Interactive Transformer for Sequence Generation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In this work, we tackle the head degradation problem in attention. We propose an \\textbf{E}nhanced \\textbf{I}nteractive \\textbf{T}ransformer (\\textsc{Eit}) architecture in which the standard multi-head self-attention is replaced with the enhanced multi-head attention (EMHA). EMHA removes the one-to-one mapping constraint among queries and keys in multiple subspaces and allows each query to attend to multiple keys. On top of that, we develop a method to make full use of many-to-many mapping by introducing two interaction models, namely Inner-Subspace Interaction and Cross-Subspace Interaction. Extensive experiments on a wide range of sequence generation tasks (e.g. machine translation, abstractive summarization and grammar correction), show its superiority, with a very modest increase in model size. ", "keywords": "Transformer;Multi-head self-attention;Sequence Generation;Machine Translation", "primary_area": "", "supplementary_material": "/attachment/e219afc7b3557fed63c9ca97a3620c5de071ce4e.zip", "author": "Tong Zheng;Bei Li;Huiwen Bao;Yi Jing;Tong Xiao;JingBo Zhu", "authorids": "~Tong_Zheng1;~Bei_Li1;~Huiwen_Bao1;~Yi_Jing2;~Tong_Xiao4;~JingBo_Zhu2", "gender": "M;M;;M;;F", "homepage": "https://kidzheng.github.io/;https://libeineu.github.io/;https://github.com/qinger521;https://www.nlplab.com/members/xiaotong.html;https://dblp.org/pid/73/2129.html;", "dblp": ";;;05/5091;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;wzbJ5EIAAAAJ;;-fov7zkAAAAJ;;3PnRUyQAAAAJ", "orcid": "0000-0002-3472-4387;;;;;", "linkedin": ";;;tong-xiao-168bb081/;;", "or_profile": "~Tong_Zheng1;~Bei_Li1;~Yi_Jing2;~Tong_Xiao4;~JingBo_Zhu2;~bao_huiwen1", "aff": ";Northeastern University;Northeastern University;Northeastern University;Northeastern University;Northeastern University", "aff_domain": ";neu.edu.cn;neu.edu.cn;mail.neu.edu.cn;mail.neu.edu.cn;neu.edu.cn", "position": ";PhD student;MS student;Full Professor;Full Professor;Intern", "bibtex": "@misc{\nzheng2023eit,\ntitle={{EIT}: Enhanced Interactive Transformer for Sequence Generation},\nauthor={Tong Zheng and Bei Li and Huiwen Bao and Yi Jing and Tong Xiao and JingBo Zhu},\nyear={2023},\nurl={https://openreview.net/forum?id=9RHjy5oHmfe}\n}", "github": "", "project": "", "reviewers": "vEt7;Bhuf;zUXt;wR9p", "site": "https://openreview.net/forum?id=9RHjy5oHmfe", "pdf_size": 6220996, "recommendation": "3;3;5;5", "confidence": "2;4;3;4", "correctness": "3;2;3;2", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;2;2;2", "wc_summary_paper": "41;77;94;45", "wc_strength_and_weaknesses": "49;161;96;298", "wc_clarity_quality_novelty_and_reproducibility": "16;25;98;26", "wc_summary_review": "44;8;55;20", "wc_review": "150;271;343;389", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "23;641;304;985", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;3", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 64.25, 22.128883839904805 ], "wc_strength_and_weaknesses_avg": [ 151.0, 93.72566350792081 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.25, 32.99526481178777 ], "wc_summary_review_avg": [ 31.75, 18.659782956937093 ], "wc_review_avg": [ 288.25, 90.22021669226915 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 488.25, 360.7279965569626 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.30151134457776363, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SAhVJehoWusJ:scholar.google.com/&scioq=EIT:+Enhanced+Interactive+Transformer+for+Sequence+Generation&hl=en&as_sdt=0,31", "gs_version_total": 2, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "9RQh6MOOaD", "title": "Efficient Hyperdimensional Computing", "track": "main", "status": "Reject", "tldr": "Based on a detailed analysis of dimension, accuracy, and orthogonality, this paper proposes a suite of novel techniques that reduce the hypervector dimension significantly while maintaining state-of-art accuracies and efficiency.", "abstract": "Hyperdimensional computing (HDC) uses binary vectors of high dimensions to perform classification. Due to its simplicity and massive parallelism, HDC can be highly energy-efficient and well-suited for resource-constrained platforms. However, in trading off orthogonality with efficiency, hypervectors may use tens of thousands of dimensions. In this paper, we will examine the necessity for such high dimensions. In particular, we give a detailed theoretical analysis of the relationship among dimensions of hypervectors, accuracy, and orthogonality. The main conclusion of this study is that a much lower dimension, typically less than 100, can also achieve similar or even higher detecting accuracy compared with other state-of-the-art HDC models. Based on this insight, we propose a suite of novel techniques to build HDC models that use binary hypervectors of dimensions that are orders of magnitude smaller than those found in the state-of-the-art HDC models, yet yield equivalent or even improved accuracy and efficiency. For image classification, we achieved an HDC accuracy of 96.88\\% with a dimension of only 32 on the MNIST dataset. We further explore our methods on more complex datasets like CIFAR-10 and show the limits of HDC computing.", "keywords": "Hyperdimensional computing", "primary_area": "", "supplementary_material": "", "author": "Zhanglu Yan;Shida Wang;Kaiwen Tang;Weng-Fai Wong", "authorids": "~Zhanglu_Yan1;~Shida_Wang1;~Kaiwen_Tang1;~Weng-Fai_Wong1", "gender": "M;M;;M", "homepage": ";https://radarfudan.github.io;;https://www.comp.nus.edu.sg/~wongwf/", "dblp": "280/2812;245/6187;283/3219;37/1143", "google_scholar": "pBjJBj0AAAAJ;vA2YMfgAAAAJ;;https://scholar.google.com.tw/citations?user=SL1cTsIAAAAJ", "orcid": ";;;0000-0002-4281-2053", "linkedin": ";;kaiwen-tang-1824741bb/;", "or_profile": "~Zhanglu_Yan1;~Shida_Wang1;~Kaiwen_Tang1;~Weng-Fai_Wong1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;nus.edu.sg;nus.edu.sg;nus.edu.sg", "position": "PhD student;PhD student;MS student;Associate Professor", "bibtex": "@misc{\nyan2023efficient,\ntitle={Efficient Hyperdimensional Computing},\nauthor={Zhanglu Yan and Shida Wang and Kaiwen Tang and Weng-Fai Wong},\nyear={2023},\nurl={https://openreview.net/forum?id=9RQh6MOOaD}\n}", "github": "", "project": "", "reviewers": "jTeX;781W;5WTt", "site": "https://openreview.net/forum?id=9RQh6MOOaD", "pdf_size": 2140894, "recommendation": "5;5;6", "confidence": "4;2;2", "correctness": "2;2;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "59;63;16", "wc_strength_and_weaknesses": "470;77;160", "wc_clarity_quality_novelty_and_reproducibility": "14;349;35", "wc_summary_review": "30;79;40", "wc_review": "573;568;251", "wc_reply_reviewers": "387;581;0", "wc_reply_authors": "1233;1836;49", "reply_reviewers": "1;4;0", "reply_authors": "2;4;1", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 46.0, 21.275964529643932 ], "wc_strength_and_weaknesses_avg": [ 235.66666666666666, 169.1278280538783 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 132.66666666666666, 153.21082062163742 ], "wc_summary_review_avg": [ 49.666666666666664, 21.139746660943903 ], "wc_review_avg": [ 464.0, 150.62757604989423 ], "wc_reply_reviewers_avg": [ 322.6666666666667, 241.51512490019246 ], "wc_reply_authors_avg": [ 1039.3333333333333, 742.2813183393177 ], "reply_reviewers_avg": [ 1.6666666666666667, 1.699673171197595 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.9999999999999997, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9820506546867328496&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "id": "9TpJYSI1n9t", "title": "Prompt Generation Networks for Efficient Adaptation of Frozen Vision Transformers", "track": "main", "status": "Withdraw", "tldr": "A novel method for adapting frozen pretrained vision transformer models by adding prompts that vary based on each input, which can surpass even full-finetuning.", "abstract": "Large-scale pretrained models, especially those trained from vision-language data have demonstrated the tremendous value that can be gained from both larger training datasets and models. Thus, in order to benefit from these developments, there is renewed interest in transfer learning and adapting models from large-scale general pretraining to particular downstream tasks. However, the continuously increasing size of the models means that even the classic approach of finetuning is becoming infeasible for all but big institutions. Prompt learning has emerged as a flexible way to adapt models by solely learning additional inputs to a model that is kept frozen, but so far performances remained inferior to finetuning. To address this, we propose the Prompt Generation Network (PGN) that generates input-dependent prompts by sampling from a learned library of tokens. We show the PGN is effective in adapting pretrained models to various new datasets. It surpasses previous prompt-learning methods by a large margin and even full-finetuning on 5 out of 12 datasets while requiring 100x less parameters. PGN can even be used for training and inferring on multiple datasets simultaneously and learns to allocate tokens between domains. Given these findings, we conclude that PGN is a viable and scalable approach for downstream adaptation of frozen models.", "keywords": "model adaptation;pretrained models;prompting;vision transformers", "primary_area": "", "supplementary_material": "", "author": "Jochem Loedeman;Maarten Stol;Tengda Han;Yuki M Asano", "authorids": "~Jochem_Loedeman1;maarten.stol@braincreators.com;~Tengda_Han1;~Yuki_M_Asano1", "gender": ";;M;", "homepage": ";;https://tengdahan.github.io/;", "dblp": ";;203/8188;", "google_scholar": ";;https://scholar.google.co.uk/citations?user=SbAuWREAAAAJ;", "orcid": ";;0000-0002-1874-9664;", "linkedin": "https://nl.linkedin.com/in/jochem-loedeman-811322124;;;", "or_profile": "~Jochem_Loedeman1;maarten.stol@braincreators.com;~Tengda_Han1;~Yuki_M_Asano1", "aff": ";;University of Oxford;", "aff_domain": ";;robots.ox.ac.uk;", "position": ";;Postdoc;", "bibtex": "@misc{\nloedeman2023prompt,\ntitle={Prompt Generation Networks for Efficient Adaptation of Frozen Vision Transformers},\nauthor={Jochem Loedeman and Maarten Stol and Tengda Han and Yuki M Asano},\nyear={2023},\nurl={https://openreview.net/forum?id=9TpJYSI1n9t}\n}", "github": "", "project": "", "reviewers": "ogqw;x7zq;JeYv;EG6J", "site": "https://openreview.net/forum?id=9TpJYSI1n9t", "pdf_size": 11606389, "recommendation": "5;5;5;5", "confidence": "4;5;5;5", "correctness": "3;3;4;4", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "33;73;71;69", "wc_strength_and_weaknesses": "160;161;368;264", "wc_clarity_quality_novelty_and_reproducibility": "51;35;11;35", "wc_summary_review": "9;41;6;36", "wc_review": "253;310;456;404", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 61.5, 16.515144564913744 ], "wc_strength_and_weaknesses_avg": [ 238.25, 86.00690379266074 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.0, 14.2828568570857 ], "wc_summary_review_avg": [ 23.0, 15.636495771111889 ], "wc_review_avg": [ 355.75, 79.1023861839831 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "id": "9U-cIq9P2p4", "title": "Learning Antidote Data to Individual Unfairness", "track": "main", "status": "Reject", "tldr": "", "abstract": "Fairness is an essential factor for machine learning systems deployed in high-stake applications. Among all fairness notions, individual fairness, following a consensus that `similar individuals should be treated similarly,' is a vital notion to guarantee fair treatment for individual cases. Previous methods typically characterize individual fairness as a prediction-invariant problem when perturbing sensitive attributes, and solve it by adopting the Distributionally Robust Optimization (DRO) paradigm. However, adversarial perturbations along a direction covering sensitive information do not consider the inherent feature correlations or innate data constraints, and thus mislead the model to optimize at off-manifold and unrealistic samples. In light of this, we propose a method to learn and generate antidote data that approximately follows the data distribution to remedy individual unfairness. These on-manifold antidote data can be used through a generic optimization procedure with original training data, resulting in a pure pre-processing approach to individual unfairness, or can also fit well with the in-processing DRO paradigm. Through extensive experiments, we demonstrate our antidote data resists individual unfairness at a minimal or zero cost to the model's predictive utility.", "keywords": "Individual Fairness;Antidote Data;Machine Learning Fairness", "primary_area": "", "supplementary_material": "/attachment/5cecda2bb2742844945059dc3a461e056f46ac45.zip", "author": "Peizhao Li;Ethan Xia;Hongfu Liu", "authorids": "~Peizhao_Li1;~Ethan_Xia1;~Hongfu_Liu2", "gender": "M;;M", "homepage": "https://peizhaoli.com;https://github.com/Torchee;http://hongfuliu.com/", "dblp": "232/1771;;32/9075-1", "google_scholar": "h8UyqB4AAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": "peizhao-li-099037182/;;", "or_profile": "~Peizhao_Li1;~Ethan_Xia1;~Hongfu_Liu2", "aff": "Brandeis University;;Brandeis University", "aff_domain": "brandeis.edu;;brandeis.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@misc{\nli2023learning,\ntitle={Learning Antidote Data to Individual Unfairness},\nauthor={Peizhao Li and Ethan Xia and Hongfu Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=9U-cIq9P2p4}\n}", "github": "", "project": "", "reviewers": "rYKn;rsds;Q48M;pdNt", "site": "https://openreview.net/forum?id=9U-cIq9P2p4", "pdf_size": 1029249, "recommendation": "3;3;5;5", "confidence": "4;5;3;4", "correctness": "3;2;3;3", "technical_novelty": "2;1;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "77;70;55;117", "wc_strength_and_weaknesses": "638;555;379;296", "wc_clarity_quality_novelty_and_reproducibility": "74;14;27;27", "wc_summary_review": "45;130;22;19", "wc_review": "834;769;483;459", "wc_reply_reviewers": "303;0;81;730", "wc_reply_authors": "1627;949;522;1072", "reply_reviewers": "2;0;1;1", "reply_authors": "4;2;1;3", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 79.75, 22.92787604642 ], "wc_strength_and_weaknesses_avg": [ 467.0, 135.98713174414704 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.5, 22.85278976405288 ], "wc_summary_review_avg": [ 54.0, 45.0166635813895 ], "wc_review_avg": [ 636.25, 167.05594122927803 ], "wc_reply_reviewers_avg": [ 278.5, 283.2935756419478 ], "wc_reply_authors_avg": [ 1042.5, 394.38971842582305 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.7071067811865475, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16067901438858534201&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0", "aff_unique_norm": "Brandeis University", "aff_unique_dep": "", "aff_unique_url": "https://www.brandeis.edu", "aff_unique_abbr": "Brandeis", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "9WdB5yVICCA", "title": "CausalAgents: A Robustness Benchmark for Motion Forecasting Using Causal Relationships", "track": "main", "status": "Reject", "tldr": "We construct a benchmark to measure the robustness of motion forecasting models for autonomous driving; we find models are sensitive to deleting irrelevant agents from the scene.", "abstract": "As machine learning models become increasingly prevalent in motion forecasting systems for autonomous vehicles (AVs), it is critical that we ensure that model predictions are safe and reliable. However, exhaustively collecting and labeling the data necessary to fully test the long tail of rare and challenging scenarios is difficult and expensive. In this work, we construct a new benchmark for evaluating and improving model robustness by applying perturbations to existing data. Specifically, we conduct an extensive labeling effort to identify causal agents, or agents whose presence influences human driver behavior in any way, in the Waymo Open Motion Dataset (WOMD), and we use these labels to perturb the data by deleting non-causal agents from the scene. We then evaluate a diverse set of state-of-the-art deep-learning model architectures on our proposed benchmark and find that all models exhibit large shifts under perturbation. Under non-causal perturbations, we observe a 25-38% relative change in minADE as compared to the original. We then investigate techniques to improve model robustness, including increasing the training dataset size and using targeted data augmentations that drop agents throughout training. We provide the causal agent labels as an additional attribute to WOMD and release the robustness benchmarks to aid the community in building more reliable and safe deep-learning models for motion forecasting. \n", "keywords": "robustness;motion forecasting;self-driving cars", "primary_area": "", "supplementary_material": "/attachment/08ba0600943e55b7c5d4c0f94350896ef0e3a1ab.zip", "author": "Rebecca Roelofs;Liting Sun;Benjamin Caine;Khaled S. Refaat;Benjamin Sapp;Scott Ettinger;Wei Chai", "authorids": "~Rebecca_Roelofs1;~Liting_Sun1;~Benjamin_Caine1;~Khaled_S._Refaat1;~Benjamin_Sapp3;~Scott_Ettinger1;chaiwei@google.com", "gender": "F;;M;;M;M;", "homepage": ";;;http://khaledrefaat.com/;;;", "dblp": "145/2224;;;42/2444;54/5582;72/174;", "google_scholar": ";BitIg-YAAAAJ;KS-nDCMAAAAJ;;aPqcyU4AAAAJ;-4n1jYUAAAAJ;", "orcid": ";;;;;;", "linkedin": ";;;;;scott-ettinger-5961b147/;", "or_profile": "~Rebecca_Roelofs1;~Liting_Sun1;~Benjamin_Caine1;~Khaled_S._Refaat1;~Benjamin_Sapp3;~Scott_Ettinger1;chaiwei@google.com", "aff": "Google;;Google Brain;Waymo;Waymo;Waymo LLC;", "aff_domain": "google.com;;google.com;waymo.com;waymo.com;waymo.com;", "position": "Research scientist;;Research Software Engineer;Research Scientist;Researcher;Researcher;", "bibtex": "@misc{\nroelofs2023causalagents,\ntitle={CausalAgents: A Robustness Benchmark for Motion Forecasting Using Causal Relationships},\nauthor={Rebecca Roelofs and Liting Sun and Benjamin Caine and Khaled S. Refaat and Benjamin Sapp and Scott Ettinger and Wei Chai},\nyear={2023},\nurl={https://openreview.net/forum?id=9WdB5yVICCA}\n}", "github": "", "project": "", "reviewers": "vJxd;Chzn;3msf;UcfZ;oUDJ", "site": "https://openreview.net/forum?id=9WdB5yVICCA", "pdf_size": 6289536, "recommendation": "3;5;5;6;6", "confidence": "5;5;3;4;4", "correctness": "3;2;3;3;3", "technical_novelty": "2;2;2;2;3", "empirical_novelty": "4;2;2;4;3", "wc_summary_paper": "77;166;69;97;81", "wc_strength_and_weaknesses": "354;297;307;1093;252", "wc_clarity_quality_novelty_and_reproducibility": "73;14;36;80;39", "wc_summary_review": "59;42;23;73;44", "wc_review": "563;519;435;1343;416", "wc_reply_reviewers": "0;139;0;115;0", "wc_reply_authors": "571;257;230;782;397", "reply_reviewers": "0;1;0;1;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "correctness_avg": [ 2.8, 0.39999999999999997 ], "technical_novelty_avg": [ 2.2, 0.39999999999999997 ], "empirical_novelty_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_paper_avg": [ 98.0, 35.20227265390688 ], "wc_strength_and_weaknesses_avg": [ 460.6, 317.8569489566022 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.4, 24.613817257792423 ], "wc_summary_review_avg": [ 48.2, 16.868906307167634 ], "wc_review_avg": [ 655.2, 348.0875751876243 ], "wc_reply_reviewers_avg": [ 50.8, 62.6782258842734 ], "wc_reply_authors_avg": [ 447.4, 206.53774473446734 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.48795003647426666, "corr_recommendation_correctness": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6030481932152549966&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;1;1", "aff_unique_norm": "Google;Waymo", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.waymo.com", "aff_unique_abbr": "Google;Waymo", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Hierarchical Protein Representations via Complete 3D Graph Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11707", "id": "9X-hgLDLYkQ", "poster": "", "openreview": "https://openreview.net/forum?id=9X-hgLDLYkQ", "slides": "https://iclr.cc/virtual/2023/poster/11707", "video": "https://iclr.cc/virtual/2023/poster/11707", "author_site": "Limei Wang, Haoran Liu, Yi Liu, Jerry Kurtin, Shuiwang Ji", "tldr": "", "abstract": "We consider representation learning for proteins with 3D structures. We build 3D graphs based on protein structures and develop graph networks to learn their representations. Depending on the levels of details that we wish to capture, protein representations can be computed at different levels, \\emph{e.g.}, the amino acid, backbone, or all-atom levels. Importantly, there exist hierarchical relations among different levels. In this work, we propose to develop a novel hierarchical graph network, known as ProNet, to capture the relations. Our ProNet is very flexible and can be used to compute protein representations at different levels of granularity. By treating each amino acid as a node in graph modeling as well as harnessing the inherent hierarchies, our ProNet is more effective and efficient than existing methods. We also show that, given a base 3D graph network that is complete, our ProNet representations are also complete at all levels. Experimental results show that ProNet outperforms recent methods on most datasets. In addition, results indicate that different downstream tasks may require representations at different levels. Our code is publicly available as part of the DIG library (\\url{https://github.com/divelab/DIG}).", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Limei Wang;Haoran Liu;Yi Liu;Jerry Kurtin;Shuiwang Ji", "authorids": "~Limei_Wang1;~Haoran_Liu1;~Yi_Liu12;~Jerry_Kurtin1;~Shuiwang_Ji1", "gender": ";;;M;M", "homepage": "https://limei0307.github.io/;;;;http://people.tamu.edu/~sji", "dblp": "57/2674;;;;84/6405", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;BZGj6sAAAAAJ", "orcid": ";;;;0000-0002-4205-4563", "linkedin": ";;;http://www.linkedin.com/in/jerry-kurtin;shuiwang-ji-9a040715/", "or_profile": "~Limei_Wang1;~Haoran_Liu1;~Yi_Liu12;~Jerry_Kurtin1;~Shuiwang_Ji1", "aff": "Texas A&M;;;Texas A&M University - College Station;Texas A&M University", "aff_domain": "tamu.edu;;;tamu.edu;tamu.edu", "position": "PhD student;;;Undergrad student;Professor", "bibtex": "@inproceedings{\nwang2023learning,\ntitle={Learning Hierarchical Protein Representations via Complete 3D Graph Networks},\nauthor={Limei Wang and Haoran Liu and Yi Liu and Jerry Kurtin and Shuiwang Ji},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9X-hgLDLYkQ}\n}", "github": "", "project": "", "reviewers": "KnXi;hFzJ;X8gX;S9jc", "pdf_size": 1918041, "recommendation": "3;6;6;8", "confidence": "4;3;4;3", "correctness": "3;4;4;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "0;2;0;3", "wc_summary_paper": "18;44;73;77", "wc_strength_and_weaknesses": "391;138;98;61", "wc_clarity_quality_novelty_and_reproducibility": "39;16;6;45", "wc_summary_review": "19;21;27;27", "wc_review": "467;219;204;210", "wc_reply_reviewers": "0;0;31;0", "wc_reply_authors": "3950;2244;1488;1218", "reply_reviewers": "0;0;1;0", "reply_authors": "8;5;3;3", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 53.0, 23.885141824992374 ], "wc_strength_and_weaknesses_avg": [ 172.0, 129.3387026376869 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.5, 16.03901493234544 ], "wc_summary_review_avg": [ 23.5, 3.570714214271425 ], "wc_review_avg": [ 275.0, 110.97972787856348 ], "wc_reply_reviewers_avg": [ 7.75, 13.423393758658799 ], "wc_reply_authors_avg": [ 2225.0, 1064.5661087973822 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.75, 2.0463381929681126 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.7001400420140049, "corr_recommendation_correctness": 0.14002800840280097, "gs_citation": 76, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2293587538847040248&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=9X-hgLDLYkQ", "email": "tamu.edu;;;tamu.edu;tamu.edu", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "9X3UZJSGIg9", "title": "Adversarial Text to Continuous Image Generation", "track": "main", "status": "Reject", "tldr": "Receiving text input, hypernetworks generate weights for INR-GAN to synthesize image.", "abstract": "Implicit Neural Representations (INR) provide a natural way to parametrize images as a continuous signal, using an MLP that predicts the RGB color at an (x, y) image location. Recently, it has been demonstrated that high-quality INR-decoders can be designed and integrated with Generative Adversarial Networks (GANs) to facilitate unconditional continuous image generation, that are no longer bounded to a spatial resolution. In this paper, we introduce HyperCGAN, a conceptually simple approach for Adversarial Text to Continuous Image Generation based on HyperNetworks, which are networks that produce parameters for another network. HyperCGAN utilizes HyperNetworks to condition an INR-based GAN model on text. In this setting, the generator and the discriminator weights are controlled by their corresponding HyperNetworks, which modulate weight parameters using the provided text query. We propose an effective Word-level hyper-modulation Attention operator, termed WhAtt, which encourages grounding words to independent pixels at input (x, y) coordinates. To the best of our knowledge, our work is the first that explores text-controllable continuous image generation. We conduct comprehensive experiments on the COCO 256x256, CUB 256x256, and the ArtEmis 256x256 benchmark which we introduce in this paper. HyperCGAN improves the performance of text-controllable image generators over the baselines while significantly reducing the gap between text-to-continuous and text-to-discrete image synthesis. Additionally, we show that HyperCGAN, when conditioned on text, retains the desired properties of continuous generative models (e.g., extrapolation outside of image boundaries, accelerated inference of low-\nresolution images, out-of-the-box superresolution).", "keywords": "gan;generative modelling;text-to-image;text2image;hypernetworks", "primary_area": "", "supplementary_material": "", "author": "Kilichbek Haydarov;Aashiq Muhamed;Jovana Lazarevic;Ivan Skorokhodov;Xiaoqian Shen;Chamuditha Jayanga Galappaththige;Mohamed Elhoseiny", "authorids": "~Kilichbek_Haydarov2;~Aashiq_Muhamed1;~Jovana_Lazarevic1;~Ivan_Skorokhodov1;~Xiaoqian_Shen3;~Chamuditha_Jayanga_Galappaththige1;~Mohamed_Elhoseiny1", "gender": "M;M;F;M;F;M;M", "homepage": "https://kilichbek.github.io/webpage/;https://github.com/aashiqmuhamed;;https://universome.github.io/;https://xiaoqian-shen.github.io;;http://www.mohamed-elhoseiny.com", "dblp": "259/1409;294/0107;;223/0010;197/6114;;125/2894", "google_scholar": "IW4UWrMAAAAJ;GbVC5NYAAAAJ;;https://scholar.google.com/citations?hl=en;uToGtIwAAAAJ;ZVnuYSYAAAAJ;iRBUTOAAAAAJ", "orcid": "0000-0002-3062-2228;;;0000-0002-7611-9310;;;0000-0001-9659-1551", "linkedin": "kilichbek-haydarov/;aashiq-muhamed-52169421/;jovana-lazarevi%C4%87-a940831bb/;ivan-skorokhodov;xiaoqian-shen-759991264;chamuditha-jayanga-624958198;mohamed-elhoseiny-8a836215/", "or_profile": "~Kilichbek_Haydarov2;~Aashiq_Muhamed1;~Jovana_Lazarevic1;~Ivan_Skorokhodov1;~Xiaoqian_Shen3;~Chamuditha_Jayanga_Galappaththige1;~Mohamed_Elhoseiny1", "aff": "King Abdullah University of Science and Technology;Amazon;;KAUST;King Abdullah University of Science and Technology;University of Moratuwa;KAUST", "aff_domain": "kaust.edu.sa;amazon.com;;kaust.edu.sa;kaust.edu.sa;mrt.ac.lk;kaust.edu.sa", "position": "PhD student;Researcher;;PhD student;PhD student;Undergrad student;Associate Professor", "bibtex": "@misc{\nhaydarov2023adversarial,\ntitle={Adversarial Text to Continuous Image Generation},\nauthor={Kilichbek Haydarov and Aashiq Muhamed and Jovana Lazarevic and Ivan Skorokhodov and Xiaoqian Shen and Chamuditha Jayanga Galappaththige and Mohamed Elhoseiny},\nyear={2023},\nurl={https://openreview.net/forum?id=9X3UZJSGIg9}\n}", "github": "", "project": "", "reviewers": "cHep;xx7K;3nfM;opUk", "site": "https://openreview.net/forum?id=9X3UZJSGIg9", "pdf_size": 14645681, "recommendation": "3;5;5;6", "confidence": "3;4;5;3", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "83;95;54;49", "wc_strength_and_weaknesses": "326;225;298;94", "wc_clarity_quality_novelty_and_reproducibility": "46;82;148;71", "wc_summary_review": "28;53;38;54", "wc_review": "483;455;538;268", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1476;1230;1008;709", "reply_reviewers": "0;0;0;0", "reply_authors": "3;3;3;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 70.25, 19.30511590226798 ], "wc_strength_and_weaknesses_avg": [ 235.75, 89.7618376594419 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 86.75, 37.692008436802624 ], "wc_summary_review_avg": [ 43.25, 10.848386976873567 ], "wc_review_avg": [ 436.0, 101.48645229783136 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1105.75, 282.6166794440838 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.20751433915982243, "corr_recommendation_correctness": 0.6622661785325219, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7860740978810480707&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "King Abdullah University of Science and Technology;Amazon;University of Moratuwa", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.kast.kau.edu.sa;https://www.amazon.com;https://www.mrt.ac.lk", "aff_unique_abbr": "KAUST;Amazon;UoM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;2;0", "aff_country_unique": "Saudi Arabia;United States;Sri Lanka" }, { "id": "9XAZBUfnefS", "title": "ProtFIM: Fill-in-Middle Protein Sequence Design via Protein Language Models", "track": "main", "status": "Reject", "tldr": " We propose a new evaluation scheme and protein language model for fill-in-middle protein sequence design.", "abstract": "Following the investigation that protein sequence determines its structure and function, engineering protein sequences allows us to optimize the functions of proteins for specific purposes such as enhancement of catalytic activity or binding affinity maturation. In protein engineering, there are many cases where the amino acids in the middle of a protein sequence are changed while maintaining the remaining residues to avoid unwanted functional changes from remaining residues. However, existing research on protein sequence design via protein language models (PLMs) has focused on modifying suffix residues by prompting prefix residues to the model or mutating the overall sequence residues. This is unsuitable for scenarios where the residues located in the middle of the sequence are to be optimized. In this work, we suggest a PLM-based framework to solve the fill-in-middle (FIM) protein engineering tasks. To evaluate the performance of PLMs on the FIM tasks, we design a novel evaluation scheme where PLMs are tasked to generate new sequences while maintaining the secondary structures. Also, we propose a new PROTein language model specialized for the Fill-In-Middle task, ProtFIM. Experiments confirm that ProtFIM performs FIM engineering efficiently, especially for alpha-helix structures, and provides decent protein representations of sequence-function relationships. Finally, we demonstrate an artificial protein sequence design framework composed of ProtFIM and a high-quality structure predictor as a novel tool to optimize protein sequences.", "keywords": "Protein language modeling;Protein engineering;Text infilling", "primary_area": "", "supplementary_material": "", "author": "Youhan Lee;Hasun Yu", "authorids": "~Youhan_Lee1;~Hasun_Yu2", "gender": "M;M", "homepage": ";", "dblp": "190/1819;153/5409.html", "google_scholar": "https://scholar.google.co.kr/citations?user=EFNg9UcAAAAJ;https://scholar.google.co.kr/citations?user=CvbGPQYAAAAJ", "orcid": ";", "linkedin": "youhanlee/;https://kr.linkedin.com/in/hasun-yu-733291119", "or_profile": "~Youhan_Lee1;~Hasun_Yu2", "aff": "Kakao Brain Corp;Kakao Brain", "aff_domain": "kakaobrain.com;kakaobrain.com", "position": "Researcher;Researcher", "bibtex": "@misc{\nlee2023protfim,\ntitle={Prot{FIM}: Fill-in-Middle Protein Sequence Design via Protein Language Models},\nauthor={Youhan Lee and Hasun Yu},\nyear={2023},\nurl={https://openreview.net/forum?id=9XAZBUfnefS}\n}", "github": "", "project": "", "reviewers": "ps6t;c4rP;tRtc;2Lrn", "site": "https://openreview.net/forum?id=9XAZBUfnefS", "pdf_size": 6157424, "recommendation": "3;5;5;5", "confidence": "4;4;4;4", "correctness": "3;2;3;4", "technical_novelty": "2;2;2;1", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "30;107;40;86", "wc_strength_and_weaknesses": "103;215;197;174", "wc_clarity_quality_novelty_and_reproducibility": "74;52;21;73", "wc_summary_review": "15;82;36;111", "wc_review": "222;456;294;444", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "649;1110;768;552", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;2;1", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 65.75, 31.830606340439072 ], "wc_strength_and_weaknesses_avg": [ 172.25, 42.540421953713626 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.0, 21.50581316760657 ], "wc_summary_review_avg": [ 61.0, 37.689521090085506 ], "wc_review_avg": [ 354.0, 99.40824915468535 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 769.75, 210.8131577961869 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2631115812121782662&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Kakao Brain", "aff_unique_dep": "Corp", "aff_unique_url": "https://www.kakaobrain.com", "aff_unique_abbr": "Kakao Brain", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Progress measures for grokking via mechanistic interpretability", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11385", "id": "9XFSbDPmdW", "poster": "", "openreview": "https://openreview.net/forum?id=9XFSbDPmdW", "slides": "https://iclr.cc/virtual/2023/poster/11385", "video": "https://iclr.cc/virtual/2023/poster/11385", "author_site": "Neel Nanda, Lawrence Chan, Tom Lieberum, Jess Smith, Jacob Steinhardt", "tldr": "We fully reverse engineer how one-layer transformers implement modular addition, and use this knowledge to explain grokking. ", "abstract": "Neural networks often exhibit emergent behavior in which qualitatively new capabilities that arise from scaling up the number of parameters, training data, or even the number of steps. One approach to understanding emergence is to find the continuous \\textit{progress measures} that underlie the seemingly discontinuous qualitative changes. In this work, we argue that progress measures can be found via mechanistic interpretability---that is, by reverse engineering learned models into components and measuring the progress of each component over the course of training. As a case study, we study small transformers trained on a modular arithmetic tasks with emergent grokking behavior. We fully reverse engineer the algorithm learned by these networks, which uses discrete fourier transforms and trigonometric identities to convert addition to rotation about a circle. After confirming the algorithm via ablation, we then use our understanding of the algorithm to define progress measures that precede the grokking phase transition on this task. We see our result as demonstrating both that it is possible to fully reverse engineer trained networks, and that doing so can be invaluable to understanding their training dynamics. ", "keywords": "interpretability;grokking;progress measures;mechanistic interpretability;circuits", "primary_area": "", "supplementary_material": "", "author": "Neel Nanda;Lawrence Chan;Tom Lieberum;Jess Smith;Jacob Steinhardt", "authorids": "~Neel_Nanda1;~Lawrence_Chan2;~Tom_Lieberum1;smith.jessk@gmail.com;~Jacob_Steinhardt1", "gender": "M;M;;;", "homepage": "https://neelnanda.io;https://chanlawrence.me/;http://tomfrederik.github.io/;;", "dblp": "285/6389;28/2626;;;35/10625", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?view_op=list_works;;;", "orcid": ";;;;", "linkedin": "https://linkedin.com/in/neel-nanda-993580151;;;;", "or_profile": "~Neel_Nanda1;~Lawrence_Chan2;~Tom_Lieberum1;smith.jessk@gmail.com;~Jacob_Steinhardt1", "aff": "Google DeepMind;University of California, Berkeley;;;University of California, Berkeley", "aff_domain": "deepmind.com;berkeley.edu;;;berkeley.edu", "position": "Researcher;PhD student;;;Assistant Professor", "bibtex": "@inproceedings{\nnanda2023progress,\ntitle={Progress measures for grokking via mechanistic interpretability},\nauthor={Neel Nanda and Lawrence Chan and Tom Lieberum and Jess Smith and Jacob Steinhardt},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9XFSbDPmdW}\n}", "github": "", "project": "", "reviewers": "WFHw;u853;g9KN", "pdf_size": 2981941, "recommendation": "8;8;8", "confidence": "4;4;4", "correctness": "3;4;4", "technical_novelty": "3;3;4", "empirical_novelty": "0;4;4", "wc_summary_paper": "89;192;149", "wc_strength_and_weaknesses": "74;1072;310", "wc_clarity_quality_novelty_and_reproducibility": "53;109;127", "wc_summary_review": "110;222;24", "wc_review": "326;1595;610", "wc_reply_reviewers": "59;127;0", "wc_reply_authors": "551;1305;769", "reply_reviewers": "1;1;0", "reply_authors": "1;2;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 1.8856180831641267 ], "wc_summary_paper_avg": [ 143.33333333333334, 42.24005471376928 ], "wc_strength_and_weaknesses_avg": [ 485.3333333333333, 425.8773949807099 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 96.33333333333333, 31.510139461590594 ], "wc_summary_review_avg": [ 118.66666666666667, 81.0651315644128 ], "wc_review_avg": [ 843.6666666666666, 543.7771193748001 ], "wc_reply_reviewers_avg": [ 62.0, 51.89091121445707 ], "wc_reply_authors_avg": [ 875.0, 316.8132993841431 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 397, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11834409806013703356&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=9XFSbDPmdW", "email": "deepmind.com;berkeley.edu;;;berkeley.edu", "author_num": 5, "aff_unique_index": "0;1;1", "aff_unique_norm": "Google;University of California, Berkeley", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.berkeley.edu", "aff_unique_abbr": "DeepMind;UC Berkeley", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "9XFX-DdkGp9", "title": "SPI-GAN: Denoising Diffusion GANs with Straight-Path Interpolations", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Score-based generative models (SGMs) are a recently proposed paradigm for deep generative tasks and now show the state-of-the-art sampling performance. It is known that the original SGM design solves the two problems of the generative trilemma: i) sampling quality, and ii) sampling diversity. However, the last problem of the trilemma was not solved, i.e., their training/sampling complexity is notoriously high. To this end, combining SGMs with simpler models, e.g., generative adversarial networks (GANs), is gathering much attention currently. We present an enhanced denoising method using GANs, called straight-path interpolation GAN (SPI-GAN), which drastically reduces the sampling time while achieving as high sampling quality and diversity as SGMs. Our SPI-GAN can be compared to the state-of-the-art shortcut-based denoising method using GANs, called denoising diffusion GAN (DD-GAN). However, our method corresponds to an extreme method that does not use any intermediate shortcut information of the reverse SDE path, in which case DD-GAN ($K=1$) fails to obtain good results. Nevertheless, our straight-path interpolation method greatly stabilizes the overall training process. As a result, SPI-GAN is one of the best-balanced models in terms of the sampling quality/diversity/time for CIFAR-10, CelebA-HQ-256, and LSUN-Church-256.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/0836b441825ea73814142675b8066e1997210a13.zip", "author": "Jinsung Jeon;Noseong Park", "authorids": "~Jinsung_Jeon1;~Noseong_Park1", "gender": ";", "homepage": "https://sites.google.com/view/npark/home?authuser=0;", "dblp": "294/0098;", "google_scholar": "0R6W6lsAAAAJ;", "orcid": "0000-0002-9693-2739;", "linkedin": "jinsung-jeon-994942289/;", "or_profile": "~Jinsung_Jeon1;~Noseong_Park1", "aff": "Yonsei University;", "aff_domain": "yonsei.ac.kr;", "position": "PhD student;", "bibtex": "@misc{\njeon2023spigan,\ntitle={{SPI}-{GAN}: Denoising Diffusion {GAN}s with Straight-Path Interpolations},\nauthor={Jinsung Jeon and Noseong Park},\nyear={2023},\nurl={https://openreview.net/forum?id=9XFX-DdkGp9}\n}", "github": "", "project": "", "reviewers": "uu8L;Araf;4xZo;ntMy;57Ba;vNnN", "site": "https://openreview.net/forum?id=9XFX-DdkGp9", "pdf_size": 45753798, "recommendation": "3;3;5;5;5;6", "confidence": "3;3;4;4;3;4", "correctness": "2;2;3;3;3;4", "technical_novelty": "2;2;3;3;3;3", "empirical_novelty": "3;0;2;3;2;2", "wc_summary_paper": "127;71;49;87;90;112", "wc_strength_and_weaknesses": "935;254;77;151;323;64", "wc_clarity_quality_novelty_and_reproducibility": "306;84;34;17;165;36", "wc_summary_review": "78;33;428;30;83;54", "wc_review": "1446;442;588;285;661;266", "wc_reply_reviewers": "342;0;99;89;92;0", "wc_reply_authors": "559;375;469;217;340;152", "reply_reviewers": "1;0;1;1;1;0", "reply_authors": "1;1;1;1;1;1", "recommendation_avg": [ 4.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 1.0 ], "wc_summary_paper_avg": [ 89.33333333333333, 25.4994553318737 ], "wc_strength_and_weaknesses_avg": [ 300.6666666666667, 298.187115005923 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 107.0, 101.62348809863462 ], "wc_summary_review_avg": [ 117.66666666666667, 140.23155454065568 ], "wc_review_avg": [ 614.6666666666666, 398.7554249021433 ], "wc_reply_reviewers_avg": [ 103.66666666666667, 114.5057009740369 ], "wc_reply_authors_avg": [ 352.0, 138.70592393021047 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.7453559924999299, "corr_recommendation_correctness": 0.976187060183953, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cfDWqu4Oi9YJ:scholar.google.com/&scioq=SPI-GAN:+Denoising+Diffusion+GANs+with+Straight-Path+Interpolations&hl=en&as_sdt=0,39", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "Yonsei University", "aff_unique_dep": "", "aff_unique_url": "https://www.yonsei.ac.kr", "aff_unique_abbr": "Yonsei", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "id": "9Y0P3YoERSy", "title": "The GANfather: Controllable generation of malicious activity to expose detection weaknesses and improve defence systems.", "track": "main", "status": "Reject", "tldr": "", "abstract": "Criminal activities are typically adversarial in nature, where an attacker and a defence system are constantly adapting to each other's behaviour. If the defence systems are helped by automated detection methods, then those methods need to be updated frequently. In practice, this means that the defence systems are always one step behind the attackers. For example, in anti-money laundering systems, new labels representing suspicious activity are frequently delayed by weeks or months and some money laundering activity may never be found, leading to detection systems that are inaccurate and resulting in an estimated undetected \u20ac0.7-3 trillion being laundered annually.\n\nTo tackle the problem of missing or delayed labels in adversarial settings, we propose The GANfather, an adversarial and label-free method to both (1) generate a variety of meaningful attacks, as guided by a custom, user-defined objective function; and (2) train a defence system to detect such attacks. Optionally, we can ensure that the generated attacks escape an existing detection system, revealing current weaknesses which the new defence system actively corrects. Our method is inspired by generative adversarial networks (GANs), but unlike GANs we nudge our generator to produce out-of-distribution data using a loss function that characterises criminal activity. Importantly, our method does not require any labelled examples.\n\nWe test our framework in two real-world use-cases, namely injection attacks in recommendation systems and anti-money laundering. In the former, we show how an injection attack with a limited number of generated fake profiles is sufficient to successfully recommend an item to a large number of users. These generated injection attacks are more effective in recommending the target item than naive \u2018bombing\u2019 strategies and harder to detect. In the latter, the generated attacks are able to simulate money laundering and move cumulative amounts close to 250 thousand dollars through a network of accounts without being detected by existing systems. We also show how we can train a new defence system that captures all these synthetic attacks, potentially saving millions of dollars in detected criminal activity. Our method is generic and applicable in a variety of adversarial domains, exposing current liabilities with the generated data and strengthening the defence systems against current and future malicious attacks.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Ricardo Ribeiro Pereira;Jacopo Bono;Jo\u00e3o Tiago Ascens\u00e3o;David Oliveira Aparicio;Pedro Manuel Pinto Ribeiro;Pedro Bizarro", "authorids": "~Ricardo_Ribeiro_Pereira1;~Jacopo_Bono1;~Jo\u00e3o_Tiago_Ascens\u00e3o1;~David_Oliveira_Aparicio1;~Pedro_Manuel_Pinto_Ribeiro1;~Pedro_Bizarro1", "gender": "M;;M;;F;", "homepage": ";;;;https://www.dcc.fc.up.pt/~pribeiro/;", "dblp": ";;;;82/451mp;b/PedroBizarro.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;tAmfrwsAAAAJ;QcKZvPkAAAAJ;DbR0sO4AAAAJ;", "orcid": ";;;;;", "linkedin": ";;jtascensao/;;;https://www.linkedin.com/mwlite/in/pedrobizarro", "or_profile": "~Ricardo_Ribeiro_Pereira1;~Jacopo_Bono1;~Jo\u00e3o_Tiago_Ascens\u00e3o1;~David_Oliveira_Aparicio1;~Pedro_Manuel_Pinto_Ribeiro1;~Pedro_Bizarro1", "aff": "Feedzai;;Feedzai;;Universidade do Porto;Feedzai", "aff_domain": "feedzai.com;;feedzai.com;;up.pt;feedzai.com", "position": "Researcher;;Researcher;;Assistant Professor;Principal Researcher", "bibtex": "@misc{\npereira2023the,\ntitle={The {GAN}father: Controllable generation of malicious activity to expose detection weaknesses and improve defence systems.},\nauthor={Ricardo Ribeiro Pereira and Jacopo Bono and Jo{\\~a}o Tiago Ascens{\\~a}o and David Oliveira Aparicio and Pedro Manuel Pinto Ribeiro and Pedro Bizarro},\nyear={2023},\nurl={https://openreview.net/forum?id=9Y0P3YoERSy}\n}", "github": "", "project": "", "reviewers": "kgFf;x8ad;hdrh", "site": "https://openreview.net/forum?id=9Y0P3YoERSy", "pdf_size": 712302, "recommendation": "1;3;6", "confidence": "4;4;5", "correctness": "2;3;1", "technical_novelty": "1;2;1", "empirical_novelty": "0;2;0", "wc_summary_paper": "59;58;99", "wc_strength_and_weaknesses": "163;312;32", "wc_clarity_quality_novelty_and_reproducibility": "2;30;84", "wc_summary_review": "21;23;1273", "wc_review": "245;423;1488", "wc_reply_reviewers": "0;0;1385", "wc_reply_authors": "401;761;2185", "reply_reviewers": "0;0;3", "reply_authors": "1;1;4", "recommendation_avg": [ 3.3333333333333335, 2.0548046676563256 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.0, 0.816496580927726 ], "technical_novelty_avg": [ 1.3333333333333333, 0.4714045207910317 ], "empirical_novelty_avg": [ 0.6666666666666666, 0.9428090415820634 ], "wc_summary_paper_avg": [ 72.0, 19.096247449870006 ], "wc_strength_and_weaknesses_avg": [ 169.0, 114.38822783252945 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.666666666666664, 34.03266404826725 ], "wc_summary_review_avg": [ 439.0, 589.7276207425481 ], "wc_review_avg": [ 718.6666666666666, 548.8329031276784 ], "wc_reply_reviewers_avg": [ 461.6666666666667, 652.8952612955788 ], "wc_reply_authors_avg": [ 1115.6666666666667, 770.2836418416847 ], "reply_reviewers_avg": [ 1.0, 1.4142135623730951 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.917662935482247, "corr_recommendation_correctness": -0.5960395606792697, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5923133927734312247&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Feedzai;Universidade do Porto", "aff_unique_dep": ";", "aff_unique_url": "https://www.feedzai.com;https://www.up.pt", "aff_unique_abbr": "Feedzai;UPorto", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Portugal" }, { "title": "Disentanglement with Biological Constraints: A Theory of Functional Cell Types", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10981", "id": "9Z_GfhZnGH", "poster": "", "openreview": "https://openreview.net/forum?id=9Z_GfhZnGH", "slides": "https://iclr.cc/virtual/2023/poster/10981", "video": "https://iclr.cc/virtual/2023/poster/10981", "author_site": "James Whittington, Will Dorrell, Surya Ganguli, Timothy Behrens", "tldr": "We prove biological constraints of nonnegativity and energy efficiency lead to disentanged representations, and empirically demonstrate this in machine learning and neuroscience tasks.", "abstract": "Neurons in the brain are often finely tuned for specific task variables. Moreover, such disentangled representations are highly sought after in machine learning. Here we mathematically prove that simple biological constraints on neurons, namely nonnegativity and energy efficiency in both activity and weights, promote such sought after disentangled representations by enforcing neurons to become selective for single factors of task variation. We demonstrate these constraints lead to disentanglement in a variety of tasks and architectures, including variational autoencoders. We also use this theory to explain why the brain partitions its cells into distinct cell types such as grid and object-vector cells, and also explain when the brain instead entangles representations in response to entangled task factors. Overall, this work provides a mathematical understanding of why single neurons in the brain often represent single human-interpretable factors, and steps towards an understanding task structure shapes the structure of brain representation.", "keywords": "Disentangling;neurosciece;representation learning;hippocampus;cortex", "primary_area": "", "supplementary_material": "/attachment/e164cdd96c9fc3eb8f7bb808af24fed55db04d69.zip", "author": "James C. R. Whittington;Will Dorrell;Surya Ganguli;Timothy Behrens", "authorids": "~James_C._R._Whittington1;~Will_Dorrell1;~Surya_Ganguli1;behrens@fmrib.ox.ac.uk", "gender": ";M;M;", "homepage": "http://www.jcrwhittington.com;http://www.williamdorrell.co.uk/;http://ganguli-gang.stanford.edu/surya.html;", "dblp": "198/7308;;56/10453;", "google_scholar": "https://scholar.google.co.uk/citations?user=zUu0JKYAAAAJ;GyVPmtYAAAAJ;;", "orcid": "0000-0001-5680-5586;;;", "linkedin": ";;;", "or_profile": "~James_C._R._Whittington1;~Will_Dorrell1;~Surya_Ganguli1;behrens@fmrib.ox.ac.uk", "aff": "University of Oxford;University College London, University of London;Stanford University;", "aff_domain": "oxford.ac.uk;ucl.ac.uk;@stanford.edu;", "position": "Postdoc;PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nwhittington2023disentanglement,\ntitle={Disentanglement with Biological Constraints: A Theory of Functional Cell Types},\nauthor={James C. R. Whittington and Will Dorrell and Surya Ganguli and Timothy Behrens},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9Z_GfhZnGH}\n}", "github": "", "project": "", "reviewers": "9LCk;pBZV;GUiS;4iCk", "pdf_size": 9586297, "recommendation": "6;8;8;10", "confidence": "4;3;4;4", "correctness": "3;4;4;4", "technical_novelty": "3;3;4;4", "empirical_novelty": "3;3;4;4", "wc_summary_paper": "106;164;146;62", "wc_strength_and_weaknesses": "174;489;376;202", "wc_clarity_quality_novelty_and_reproducibility": "803;133;22;21", "wc_summary_review": "189;39;100;79", "wc_review": "1272;825;644;364", "wc_reply_reviewers": "0;0;0;11", "wc_reply_authors": "4138;1610;869;585", "reply_reviewers": "0;0;0;1", "reply_authors": "7;3;3;2", "recommendation_avg": [ 8.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 119.5, 39.27785635698567 ], "wc_strength_and_weaknesses_avg": [ 310.25, 128.99297461489908 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 244.75, 325.5045122575108 ], "wc_summary_review_avg": [ 101.75, 54.933482503842775 ], "wc_review_avg": [ 776.25, 329.99422343428984 ], "wc_reply_reviewers_avg": [ 2.75, 4.763139720814412 ], "wc_reply_authors_avg": [ 1800.5, 1400.4757227456676 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 1.920286436967152 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9236933147820031901&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=9Z_GfhZnGH", "email": "oxford.ac.uk;ucl.ac.uk;@stanford.edu;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Oxford;University College London;Stanford University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ox.ac.uk;https://www.ucl.ac.uk;https://www.stanford.edu", "aff_unique_abbr": "Oxford;UCL;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Symmetries, Flat Minima, and the Conserved Quantities of Gradient Flow", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11070", "id": "9ZpciCOunFb", "poster": "/media/PosterPDFs/ICLR%202023/11070.png?t=1681736763.9236643", "openreview": "https://openreview.net/forum?id=9ZpciCOunFb", "slides": "https://iclr.cc/virtual/2023/poster/11070", "video": "https://iclr.cc/virtual/2023/poster/11070", "author_site": "Bo Zhao, Iordan Ganev, Robin Walters, Rose Yu, Nima Dehmamy", "tldr": "We introduce a framework for finding linear and nonlinear continuous symmetries in deep learning and show how they lead to extended local minima and conserved quantities", "abstract": "Empirical studies of the loss landscape of deep networks have revealed that many local minima are connected through low-loss valleys. Yet, little is known about the theoretical origin of such valleys. We present a general framework for finding continuous symmetries in the parameter space, which carve out low-loss valleys. Our framework uses equivariances of the activation functions and can be applied to different layer architectures. To generalize this framework to nonlinear neural networks, we introduce a novel set of nonlinear, data-dependent symmetries. These symmetries can transform a trained model such that it performs similarly on new samples, which allows ensemble building that improves robustness under certain adversarial attacks. We then show that conserved quantities associated with linear symmetries can be used to define coordinates along low-loss valleys. The conserved quantities help reveal that using common initialization methods, gradient flow only explores a small part of the global minimum. By relating conserved quantities to convergence rate and sharpness of the minimum, we provide insights on how initialization impacts convergence and generalizability.\n", "keywords": "symmetry;gradient flow;conserved quantity;flat minima;Lie group;Lie algebra", "primary_area": "", "supplementary_material": "", "author": "Bo Zhao;Iordan Ganev;Robin Walters;Rose Yu;Nima Dehmamy", "authorids": "~Bo_Zhao6;~Iordan_Ganev1;~Robin_Walters1;~Rose_Yu1;~Nima_Dehmamy1", "gender": ";;M;F;M", "homepage": "https://b-zhao.github.io;https://ivganev.github.io/;http://www.robinwalters.com;http://roseyu.com;", "dblp": ";;258/3416;164/7314;198/1338", "google_scholar": "ZCCrFoIAAAAJ;;fnprJmUAAAAJ;;gvHpUtgAAAAJ", "orcid": ";;;;0000-0003-1617-5502", "linkedin": ";;;;nima-dehmamy-57770a4a/", "or_profile": "~Bo_Zhao6;~Iordan_Ganev1;~Robin_Walters1;~Rose_Yu1;~Nima_Dehmamy1", "aff": "University of California, San Diego;Institute for Computing and Information Sciences, Radboud University Nijmegen, Radboud University;Northeastern University ;University of California, San Diego;International Business Machines", "aff_domain": "ucsd.edu;cs.ru.nl;northeastern.edu;ucsd.edu;ibm.com", "position": "PhD student;Postdoc;Assistant Professor;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nzhao2023symmetries,\ntitle={Symmetries, Flat Minima, and the Conserved Quantities of Gradient Flow},\nauthor={Bo Zhao and Iordan Ganev and Robin Walters and Rose Yu and Nima Dehmamy},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9ZpciCOunFb}\n}", "github": "", "project": "", "reviewers": "M5tH;z99y;J3kj;FC6G", "pdf_size": 2373666, "recommendation": "6;6;6;8", "confidence": "4;2;2;2", "correctness": "3;4;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "0;2;2;3", "wc_summary_paper": "58;78;25;34", "wc_strength_and_weaknesses": "724;194;231;89", "wc_clarity_quality_novelty_and_reproducibility": "165;73;19;183", "wc_summary_review": "74;68;18;42", "wc_review": "1021;413;293;348", "wc_reply_reviewers": "130;40;24;18", "wc_reply_authors": "2171;239;457;235", "reply_reviewers": "2;1;1;1", "reply_authors": "3;1;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 48.75, 20.753011829611623 ], "wc_strength_and_weaknesses_avg": [ 309.5, 244.91478109742582 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 110.0, 67.08949247087803 ], "wc_summary_review_avg": [ 50.5, 22.28788908802267 ], "wc_review_avg": [ 518.75, 293.06857132759905 ], "wc_reply_reviewers_avg": [ 53.0, 45.17742799230607 ], "wc_reply_authors_avg": [ 775.5, 810.6841246749563 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5556018160709910467&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=9ZpciCOunFb", "email": "ucsd.edu;cs.ru.nl;northeastern.edu;ucsd.edu;ibm.com", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "University of California, San Diego;Radboud University;Northeastern University;International Business Machines Corporation", "aff_unique_dep": ";Institute for Computing and Information Sciences;;", "aff_unique_url": "https://www.ucsd.edu;https://www.ru.nl;https://www.northeastern.edu;https://www.ibm.com", "aff_unique_abbr": "UCSD;RU;NEU;IBM", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "San Diego;Nijmegen;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Netherlands" }, { "id": "9Zx6tTcX0SE", "title": "A Study of Biologically Plausible Neural Network: the Role and Interactions of Brain-Inspired Mechanisms in Continual Learning", "track": "main", "status": "Reject", "tldr": "a comprehensive study on the role and interactions of different mechanisms inspired by the brain including sparse non-overlapping representations, Hebbian learning, synaptic consolidation, and replay of past activations", "abstract": "Humans excel at continually acquiring, consolidating, and retaining information from an ever-changing environment, whereas artificial neural networks (ANNs) exhibit catastrophic forgetting. There are considerable differences in the complexity of synapses, the processing of information, and the learning mechanisms in biological neural networks and their artificial counterpart, which may explain the mismatch in performance. We consider a biologically plausible framework that constitutes separate populations of exclusively excitatory and inhibitory neurons which adhere to Dale's principle and the excitatory pyramidal neurons are augmented with dendritic-like structures for context-dependent processing of stimuli. We then conduct a comprehensive study on the role and interactions of different mechanisms inspired by the brain including sparse non-overlapping representations, Hebbian learning, synaptic consolidation, and replay of past activations that accompanied the learning event. Our study suggests that employing multiple complementary mechanisms in a biologically plausible architecture, similar to the brain, can be effective in enabling continual learning in ANNs.", "keywords": "Continual Learning;Catastrophic Forgetting;Brain-inspired Mechanisms;Active Dendrites;Dale's Principle;Hebbian Learning;Sparsity", "primary_area": "", "supplementary_material": "", "author": "Fahad Sarfraz;Elahe Arani;Bahram Zonooz", "authorids": "~Fahad_Sarfraz1;~Elahe_Arani1;~Bahram_Zonooz1", "gender": "M;F;M", "homepage": "https://www.fahadsarfraz.com/;https://sites.google.com/view/elahe-arani;https://sites.google.com/view/bahramzonooz", "dblp": "250/9424;;250/9573", "google_scholar": "Zhx_sM4AAAAJ;e_I_v6cAAAAJ;", "orcid": ";0000-0002-0952-7007;", "linkedin": "fahadsarfraz/;elahe-arani-630870b2/;", "or_profile": "~Fahad_Sarfraz1;~Elahe_Arani1;~Bahram_Zonooz1", "aff": "Navinfo Europe;Advanced Research Lab, NavInfo Europe;Eindhoven University of Technology", "aff_domain": "navinfo.eu;navinfo.eu;tue.nl", "position": "Researcher;Sr. AI Manager & Sr. Research Scientist;Assistant Professor", "bibtex": "@misc{\nsarfraz2023a,\ntitle={A Study of Biologically Plausible Neural Network: the Role and Interactions of Brain-Inspired Mechanisms in Continual Learning},\nauthor={Fahad Sarfraz and Elahe Arani and Bahram Zonooz},\nyear={2023},\nurl={https://openreview.net/forum?id=9Zx6tTcX0SE}\n}", "github": "", "project": "", "reviewers": "jDQX;Ds8Z;Bd22;TqnH", "site": "https://openreview.net/forum?id=9Zx6tTcX0SE", "pdf_size": 407538, "recommendation": "3;3;6;8", "confidence": "4;3;4;4", "correctness": "2;3;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "103;55;21;83", "wc_strength_and_weaknesses": "174;84;136;215", "wc_clarity_quality_novelty_and_reproducibility": "35;70;13;119", "wc_summary_review": "75;18;23;48", "wc_review": "387;227;193;465", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 65.5, 30.834234221073174 ], "wc_strength_and_weaknesses_avg": [ 152.25, 48.30307961196677 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.25, 40.03982392568679 ], "wc_summary_review_avg": [ 41.0, 22.68259244442751 ], "wc_review_avg": [ 318.0, 112.11155159036913 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5443310539518174, "corr_recommendation_correctness": 0.5443310539518174, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5865678745936905000&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "NavInfo;NavInfo Europe;Eindhoven University of Technology", "aff_unique_dep": ";Advanced Research Lab;", "aff_unique_url": ";;https://www.tue.nl", "aff_unique_abbr": ";;TU/e", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Unknown;Netherlands" }, { "id": "9_VrvV7d-FK", "title": "Unsupervised Adaptation for Fairness under Covariate Shift", "track": "main", "status": "Reject", "tldr": "We propose an unsupervised adaptation algorithm to address fairness under covariate shift. Our proposed objective involves the standard training loss along with a novel min-max entropy formulation to handle shift and a wasserstein loss for fairness.", "abstract": "Training fair models typically involves optimizing a composite objective accounting for both prediction accuracy and some fairness measure. However, due to a shift in the distribution of the covariates at test time, the learnt fairness tradeoffs may no longer be valid, which we verify experimentally. To address this, we consider an unsupervised adaptation problem of training fair classifiers when only a small set of unlabeled test samples is available along with a large labeled training set. We propose a novel modification to the traditional composite objective by adding a weighted entropy objective on the unlabeled test dataset. This involves a min-max optimization where weights are optimized to mimic the importance weighting ratios followed by classifier optimization. We demonstrate that our weighted entropy objective provides an upper bound on the standard importance sampled training objective common in covariate shift formulations under some mild conditions. Experimentally, we demonstrate that Wasserstein distance based penalty for representation matching across protected sub groups together with the above loss outperforms existing baselines. Our method achieves the best accuracy-equalized odds tradeoff under the covariate shift setup. We find that, for the same accuracy, we get upto 2x improvement in equalized odds on notable benchmarks.", "keywords": "Out of Distribution;Fairness;Unsupervised;Adaptation", "primary_area": "", "supplementary_material": "", "author": "Jatin Chauhan;Shreyas Havaldar;Karthikeyan Shanmugam;Jay Nandy;Aravindan Raghuveer", "authorids": "~Jatin_Chauhan3;~Shreyas_Havaldar1;~Karthikeyan_Shanmugam1;~Jay_Nandy1;~Aravindan_Raghuveer1", "gender": "M;M;M;M;M", "homepage": "https://chauhanjatin10.github.io/;https://sites.google.com/corp/view/karthikeyan-shanmugam/;;;https://shreyashavaldar7.github.io", "dblp": "242/7749;;193/4096;20/1664;260/7163.html", "google_scholar": "kTiFFPcAAAAJ;https://scholar.google.ca/citations?user=m4DyPcUAAAAJ;https://scholar.google.co.in/citations?user=8N_wxz8AAAAJ;;Q2aGAk8AAAAJ", "orcid": ";0009-0008-2879-5868;;;", "linkedin": ";;jay-nandy-36654b34/;;shreyas-havaldar-71377b182", "or_profile": "~Jatin_Chauhan3;~Karthikeyan_Shanmugam1;~Jay_Nandy1;~Aravindan_Raghuveer1;~Shreyas_Jayant_Havaldar1", "aff": "University of California, Los Angeles;Google Research;Google;Google;Google Deepmind", "aff_domain": "ucla.edu;google.com;google.com;google.com;google.com", "position": "MS student;Researcher;Postdoc;Principal Researcher;Researcher", "bibtex": "@misc{\nchauhan2023unsupervised,\ntitle={Unsupervised Adaptation for Fairness under Covariate Shift},\nauthor={Jatin Chauhan and Shreyas Havaldar and Karthikeyan Shanmugam and Jay Nandy and Aravindan Raghuveer},\nyear={2023},\nurl={https://openreview.net/forum?id=9_VrvV7d-FK}\n}", "github": "", "project": "", "reviewers": "HTwH;DiLH;iDgW", "site": "https://openreview.net/forum?id=9_VrvV7d-FK", "pdf_size": 741093, "recommendation": "3;5;8", "confidence": "4;3;3", "correctness": "2;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;0", "wc_summary_paper": "74;40;76", "wc_strength_and_weaknesses": "458;184;260", "wc_clarity_quality_novelty_and_reproducibility": "9;39;47", "wc_summary_review": "31;27;58", "wc_review": "572;290;441", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 63.333333333333336, 16.519348924485154 ], "wc_strength_and_weaknesses_avg": [ 300.6666666666667, 115.4969937078691 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.666666666666668, 16.35712552851373 ], "wc_summary_review_avg": [ 38.666666666666664, 13.767917618708923 ], "wc_review_avg": [ 434.3333333333333, 115.22249008862038 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8029550685469661, "corr_recommendation_correctness": 0.8029550685469661, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dFI2T75z9q0J:scholar.google.com/&scioq=Unsupervised+Adaptation+for+Fairness+under+Covariate+Shift&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "University of California, Los Angeles;Google;DeepMind", "aff_unique_dep": ";Google Research;DeepMind", "aff_unique_url": "https://www.ucla.edu;https://research.google;https://deepmind.com", "aff_unique_abbr": "UCLA;Google Research;DeepMind", "aff_campus_unique_index": "0;1;1;1", "aff_campus_unique": "Los Angeles;Mountain View;", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "9_cba-ImPGb", "title": "Robustness Guarantees for Adversarially Trained Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "We study robust adversarial training of two-layer neural networks with Leaky ReLU activation function as a bi-level optimization problem. In particular, for the inner-loop that implements the PGD attack, we propose maximizing a lower bound on the 0/1-loss by reflecting a surrogate loss about the origin. This allows us to give a convergence guarantee for the inner-loop PGD attack and precise iteration complexity results for end-to-end adversarial training, which hold for any width and initialization in a realizable setting.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/83c85740a2178e713e769d033ccf58a1efefd0f4.zip", "author": "Poorya Mianjy;Raman Arora", "authorids": "~Poorya_Mianjy1;~Raman_Arora1", "gender": "M;M", "homepage": "https://www.cs.jhu.edu/~r3831/;http://www.cs.jhu.edu/~raman/Home.html", "dblp": "182/8944;", "google_scholar": "PTG3GAsAAAAJ;Spe0xdkAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Poorya_Mianjy1;~Raman_Arora1", "aff": "Citadel Securities;Johns Hopkins University", "aff_domain": "citadelsecurities.com;jhu.edu", "position": "Researcher;Associate Professor", "bibtex": "@misc{\nmianjy2023robustness,\ntitle={Robustness Guarantees for Adversarially Trained Neural Networks},\nauthor={Poorya Mianjy and Raman Arora},\nyear={2023},\nurl={https://openreview.net/forum?id=9_cba-ImPGb}\n}", "github": "", "project": "", "reviewers": "zEHs;KemA;VXHq;6erv", "site": "https://openreview.net/forum?id=9_cba-ImPGb", "pdf_size": 611240, "recommendation": "5;5;6;6", "confidence": "4;3;2;4", "correctness": "3;3;3;4", "technical_novelty": "3;2;4;3", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "47;47;99;91", "wc_strength_and_weaknesses": "87;334;533;174", "wc_clarity_quality_novelty_and_reproducibility": "10;19;31;14", "wc_summary_review": "40;42;64;53", "wc_review": "184;442;727;332", "wc_reply_reviewers": "0;97;41;0", "wc_reply_authors": "216;230;266;264", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 71.0, 24.166091947189145 ], "wc_strength_and_weaknesses_avg": [ 282.0, 169.84846187116327 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 18.5, 7.88986691902975 ], "wc_summary_review_avg": [ 49.75, 9.60143218483576 ], "wc_review_avg": [ 421.25, 198.85091777510107 ], "wc_reply_reviewers_avg": [ 34.5, 39.777506206397604 ], "wc_reply_authors_avg": [ 244.0, 21.587033144922902 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.30151134457776363, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9162151635322800457&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Citadel Securities;Johns Hopkins University", "aff_unique_dep": ";", "aff_unique_url": "https://www.citadel.com;https://www.jhu.edu", "aff_unique_abbr": "Citadel;JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Pseudoinverse-Guided Diffusion Models for Inverse Problems", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11030", "id": "9_gsMA8MRKQ", "poster": "", "openreview": "https://openreview.net/forum?id=9_gsMA8MRKQ", "slides": "https://iclr.cc/virtual/2023/poster/11030", "video": "https://iclr.cc/virtual/2023/poster/11030", "author_site": "Jiaming Song, Arash Vahdat, Morteza Mardani, Jan Kautz", "tldr": "We introduce pseudoinverse guidance, an approach to solve inverse problems with generative diffusion models.", "abstract": "Diffusion models have become competitive candidates for solving various inverse problems. Models trained for specific inverse problems work well but are limited to their particular use cases, whereas methods that use problem-agnostic models are general but often perform worse empirically. To address this dilemma, we introduce Pseudoinverse-guided Diffusion Models ($\\Pi$GDM), an approach that uses problem-agnostic models to close the gap in performance. $\\Pi$GDM directly estimates conditional scores from the measurement model of the inverse problem without additional training. It can address inverse problems with noisy, non-linear, or even non-differentiable measurements, in contrast to many existing approaches that are limited to noiseless linear ones. We illustrate the empirical effectiveness of $\\Pi$GDM on several image restoration tasks, including super-resolution, inpainting and JPEG restoration. On ImageNet, $\\Pi$GDM is competitive with state-of-the-art diffusion models trained on specific tasks, and is the first to achieve this with problem-agnostic diffusion models. $\\Pi$GDM can also solve a wider set of inverse problems where the measurement processes are composed of several simpler ones.", "keywords": "diffusion models;inverse problems", "primary_area": "", "supplementary_material": "", "author": "Jiaming Song;Arash Vahdat;Morteza Mardani;Jan Kautz", "authorids": "~Jiaming_Song1;~Arash_Vahdat3;~Morteza_Mardani1;~Jan_Kautz1", "gender": "M;M;M;", "homepage": "http://tsong.me;http://latentspace.cc/;http://web.stanford.edu/~morteza/;http://jankautz.com", "dblp": "173/5104;92/8108;74/258;48/6214", "google_scholar": ";https://scholar.google.ca/citations?user=p9-nlRIAAAAJ;H7edsyEAAAAJ;P9FclNEAAAAJ", "orcid": ";;;", "linkedin": "jiamings/;;;", "or_profile": "~Jiaming_Song1;~Arash_Vahdat3;~Morteza_Mardani1;~Jan_Kautz1", "aff": "NVIDIA;NVIDIA;NVIDIA;NVIDIA", "aff_domain": "nvidia.com;nvidia.com;nvidia.com;nvidia.com", "position": "Researcher;Research Scientist;Principal Researcher;VP Research", "bibtex": "@inproceedings{\nsong2023pseudoinverseguided,\ntitle={Pseudoinverse-Guided Diffusion Models for Inverse Problems},\nauthor={Jiaming Song and Arash Vahdat and Morteza Mardani and Jan Kautz},\nbooktitle={International Conference on Learning Representations},\nyear={2023},\nurl={https://openreview.net/forum?id=9_gsMA8MRKQ}\n}", "github": "", "project": "", "reviewers": "v3Ma;diJL;JCDz;4ai2", "pdf_size": 18729002, "recommendation": "6;6;6;8", "confidence": "4;4;3;2", "correctness": "1;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "0;3;3;3", "wc_summary_paper": "99;32;68;57", "wc_strength_and_weaknesses": "246;175;141;164", "wc_clarity_quality_novelty_and_reproducibility": "16;20;29;27", "wc_summary_review": "20;2;8;24", "wc_review": "381;229;246;272", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "992;462;913;654", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;2;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 64.0, 24.052026941611388 ], "wc_strength_and_weaknesses_avg": [ 181.5, 39.207779840230685 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 23.0, 5.244044240850758 ], "wc_summary_review_avg": [ 13.5, 8.874119674649425 ], "wc_review_avg": [ 282.0, 59.173473786824445 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 755.25, 210.46540689624032 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8703882797784891, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 285, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17338448631534650518&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=9_gsMA8MRKQ", "email": "nvidia.com;nvidia.com;nvidia.com;nvidia.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "NVIDIA", "aff_unique_dep": "NVIDIA Corporation", "aff_unique_url": "https://www.nvidia.com", "aff_unique_abbr": "NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "9_pgtXEB652", "title": "PBFormer: Capturing Complex Scene Text Shape with Polynomial Band Transformer", "track": "main", "status": "Reject", "tldr": "This paper presents PBFormer, an efficient yet powerful scene text detector that unifies the transformer with a novel text shape representation, Polynomial Band, which performs well for complex shape or crowded texts.", "abstract": "We present PBFormer, an efficient yet powerful scene text detector that unifies the transformer with a novel text shape representation Polynomial Band (PB). The representation has four polynomial curves to fit a text's top, bottom, left, and right sides, which can capture a text with a complex shape by varying polynomial coefficients. PB has appealing features compared with conventional representations: 1) It can model different curvatures with a fixed number of parameters, while polygon-points-based methods need to utilize a different number of points. 2) It can distinguish adjacent or overlapping texts as they have apparent different curve coefficients, while segmentation-based methods suffer from adhesive spatial positions. PBFormer combines the PB with the transformer, which can directly generate smooth text contours sampled from predicted curves without interpolation. To leverage the advantage of PB, PBFormer has a parameter-free cross-scale pixel attention module. The module can enlarge text features and suppress irrelevant areas to benefit from detecting texts with diverse scale variations. Furthermore, PBFormer is trained with a shape-contained loss, which not only enforces the piecewise alignment between the ground truth and the predicted curves but also makes curves' position and shapes consistent with each other. Without bells and whistles about text pre-training, our method is superior to the previous state-of-the-art text detectors on the arbitrary-shaped CTW1500 and Total-Text datasets. Codes will be public.", "keywords": "Complex Shape Text Detection;Text Representation;Transformer;Computer Vision;Application", "primary_area": "", "supplementary_material": "/attachment/cdc77cf16ad8b3bca6434705ef36922b5315ddbb.zip", "author": "Ruijin Liu;Ning Lu;Dapeng Chen;Cheng LI;Zejian Yuan;Wei Peng", "authorids": "~Ruijin_Liu1;~Ning_Lu3;~Dapeng_Chen4;~Cheng_LI12;~Zejian_Yuan4;~Wei_Peng6", "gender": "M;M;M;M;M;M", "homepage": ";;;;http://www.aiar.xjtu.edu.cn/szdw/js/2.htm;https://www.rmit.edu.au/profiles/p/wei-peng3", "dblp": "254/7956;29/2864-3;04/3068;;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;VOgUUfEAAAAJ;-Wpd7FcAAAAJ;K162I_IAAAAJ;;", "orcid": "0000-0002-3672-5332;0000-0002-3399-3681;;;;", "linkedin": ";;;;;wei-peng-phd-in-ai-4515ba22/?originalSubdomain=au", "or_profile": "~Ruijin_Liu1;~Ning_Lu3;~Dapeng_Chen4;~Cheng_LI12;~Zejian_Yuan4;~Wei_Peng6", "aff": "Xi'an Jiaotong University;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Xi'an Jiaotong University;Huawei Technologies Ltd.", "aff_domain": "stu.xjtu;huawei.com;huawei.com;huawei.com;xjtu.edu.cn;huawei.com", "position": "PhD student;Researcher;Researcher;Researcher;Full Professor;Principal Researcher", "bibtex": "@misc{\nliu2023pbformer,\ntitle={{PBF}ormer: Capturing Complex Scene Text Shape with Polynomial Band Transformer},\nauthor={Ruijin Liu and Ning Lu and Dapeng Chen and Cheng LI and Zejian Yuan and Wei Peng},\nyear={2023},\nurl={https://openreview.net/forum?id=9_pgtXEB652}\n}", "github": "", "project": "", "reviewers": "FmtQ;Tjeh;Lb4V;cjr7", "site": "https://openreview.net/forum?id=9_pgtXEB652", "pdf_size": 34405538, "recommendation": "3;3;5;5", "confidence": "4;5;4;5", "correctness": "3;2;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "79;42;86;85", "wc_strength_and_weaknesses": "284;221;404;190", "wc_clarity_quality_novelty_and_reproducibility": "43;36;70;41", "wc_summary_review": "59;21;54;52", "wc_review": "465;320;614;368", "wc_reply_reviewers": "165;0;0;57", "wc_reply_authors": "2503;1705;1038;873", "reply_reviewers": "2;0;0;1", "reply_authors": "7;6;3;2", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 73.0, 18.096961070853858 ], "wc_strength_and_weaknesses_avg": [ 274.75, 81.94929834965032 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.5, 13.238202294873727 ], "wc_summary_review_avg": [ 46.5, 14.941552797483935 ], "wc_review_avg": [ 441.75, 112.330705953448 ], "wc_reply_reviewers_avg": [ 55.5, 67.36653471865687 ], "wc_reply_authors_avg": [ 1529.75, 642.4692113245584 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 4.5, 2.0615528128088303 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1197567002971139226&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;1;0;1", "aff_unique_norm": "Xi'an Jiao Tong University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.huawei.com", "aff_unique_abbr": "XJTU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "FiT: Parameter Efficient Few-shot Transfer Learning for Personalized and Federated Image Classification", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11913", "id": "9aokcgBVIj1", "poster": "/media/PosterPDFs/ICLR%202023/11913.png?t=1681325036.5152946", "openreview": "https://openreview.net/forum?id=9aokcgBVIj1", "slides": "https://iclr.cc/virtual/2023/poster/11913", "video": "https://iclr.cc/virtual/2023/poster/11913", "author_site": "Aliaksandra Shysheya, John Bronskill, Massimiliano Patacchiola, Sebastian Nowozin, Richard E Turner", "tldr": "We propose FiT, a parameter efficient few-shot image classification system that uses a Naive Bayes head, FiLM layers that modulate a pretrained backbone, and an episodic fine-tuning protocol that achieves SOTA on the VTAB-1k benchmark.", "abstract": "Modern deep learning systems are increasingly deployed in situations such as personalization and federated learning where it is necessary to support i) learning on small amounts of data, and ii) communication efficient distributed training protocols. In this work, we develop FiLM Transfer (FiT) which fulfills these requirements in the image classification setting by combining ideas from transfer learning (fixed pretrained backbones and fine-tuned FiLM adapter layers) and meta-learning (automatically configured Naive Bayes classifiers and episodic training) to yield parameter efficient models with superior classification accuracy at low-shot. The resulting parameter efficiency is key for enabling few-shot learning, inexpensive model updates for personalization, and communication efficient federated learning. We experiment with FiT on a wide range of downstream datasets and show that it achieves better classification accuracy than the leading Big Transfer (BiT) algorithm at low-shot and achieves state-of-the art accuracy on the challenging VTAB-1k benchmark, with fewer than 1% of the updateable parameters. Finally, we demonstrate the parameter efficiency and superior accuracy of FiT in distributed low-shot applications including model personalization and federated learning where model update size is an important performance metric.", "keywords": "few-shot learning;transfer learning;federated learning", "primary_area": "", "supplementary_material": "/attachment/48521a61775850b7465103b92bc61967508cbe84.zip", "author": "Aliaksandra Shysheya;John F Bronskill;Massimiliano Patacchiola;Sebastian Nowozin;Richard E Turner", "authorids": "~Aliaksandra_Shysheya1;~John_F_Bronskill1;~Massimiliano_Patacchiola1;~Sebastian_Nowozin1;~Richard_E_Turner1", "gender": "F;M;M;M;M", "homepage": ";;https://mpatacchiola.github.io/;http://www.nowozin.net/sebastian/;https://rich-turner-group.github.io/", "dblp": "241/6203;;177/8630;https://dblp.org/pers/n/Nowozin:Sebastian.html;40/5352", "google_scholar": ";https://scholar.google.co.nz/citations?user=aH2jZsoAAAAJ;L4GcSrsAAAAJ;https://scholar.google.co.uk/citations?user=7-B7aQkAAAAJ;https://scholar.google.co.uk/citations?user=DgLEyZgAAAAJ", "orcid": ";;0000-0002-9500-6899;;", "linkedin": ";;mpatacchiola/;;", "or_profile": "~Aliaksandra_Shysheya1;~John_F_Bronskill1;~Massimiliano_Patacchiola1;~Sebastian_Nowozin1;~Richard_E_Turner1", "aff": "University of Cambridge;University of Cambridge;University of Cambridge;Microsoft;Microsoft Research", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk;microsoft.com;research.microsoft.com", "position": "PhD student;Research Associate;Postdoc;Researcher;Researcher", "bibtex": "@inproceedings{\nshysheya2023fit,\ntitle={FiT: Parameter Efficient Few-shot Transfer Learning for Personalized and Federated Image Classification},\nauthor={Aliaksandra Shysheya and John F Bronskill and Massimiliano Patacchiola and Sebastian Nowozin and Richard E Turner},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9aokcgBVIj1}\n}", "github": "", "project": "", "reviewers": "8TEt;YJfE;BNuH;DFdq;upHx", "pdf_size": 644617, "recommendation": "6;6;6;8;8", "confidence": "4;2;4;3;2", "correctness": "3;3;3;4;4", "technical_novelty": "4;3;2;4;3", "empirical_novelty": "3;3;3;4;3", "wc_summary_paper": "37;76;81;39;92", "wc_strength_and_weaknesses": "74;76;336;165;109", "wc_clarity_quality_novelty_and_reproducibility": "26;36;20;23;61", "wc_summary_review": "50;20;30;13;58", "wc_review": "187;208;467;240;320", "wc_reply_reviewers": "0;0;36;0;0", "wc_reply_authors": "323;155;1642;379;362", "reply_reviewers": "0;0;1;0;0", "reply_authors": "1;1;4;1;1", "recommendation_avg": [ 6.8, 0.9797958971132712 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 3.2, 0.7483314773547882 ], "empirical_novelty_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_paper_avg": [ 65.0, 22.653917983430592 ], "wc_strength_and_weaknesses_avg": [ 152.0, 97.70772743237865 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.2, 14.905032707109367 ], "wc_summary_review_avg": [ 34.2, 17.232527382830412 ], "wc_review_avg": [ 284.4, 101.88738881726236 ], "wc_reply_reviewers_avg": [ 7.2, 14.400000000000002 ], "wc_reply_authors_avg": [ 572.2, 540.7659012918621 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 1.2000000000000002 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.45643546458763845, "corr_recommendation_correctness": 1.0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10512979607739161369&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=9aokcgBVIj1", "email": "cam.ac.uk;cam.ac.uk;cam.ac.uk;microsoft.com;research.microsoft.com", "author_num": 5, "aff_unique_index": "0;0;0;1;1", "aff_unique_norm": "University of Cambridge;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.cam.ac.uk;https://www.microsoft.com", "aff_unique_abbr": "Cambridge;Microsoft", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "9bVBH1GD5sr", "title": "FOCUS: Fairness via Agent-Awareness for Federated Learning on Heterogeneous Data", "track": "main", "status": "Reject", "tldr": "We propose a formal definition of fairness via agent-awareness for FL (FAA) on heterogeneous data and a fair FL training algorithm based on agent clustering (FOCUS) to achieve FAA.", "abstract": "Federated learning (FL) provides an effective collaborative training paradigm, allowing local agents to train a global model jointly without sharing their local data to protect privacy.\nOn the other hand, due to the heterogeneous nature of local agents, it is challenging to optimize or even define the fairness for agents, which may discourage valuable participation. For instance, the trained global model may sacrifice the performance of a minority user with high-quality data based on loss optimization over all users.\nExisting work usually considers accuracy equity as fairness for different users in FL, which is limited especially under the heterogeneous setting, since it is intuitively \"unfair\" that agents with low-quality data would achieve similar accuracy.\nIn this work, we aim to address such limitations and propose a formal fairness definition in FL, fairness via agent-awareness (FAA), which takes the heterogeneous data contributions of local agents into account. In addition, we propose a fair FL training algorithm based on agent clustering (FOCUS) to achieve FAA. Theoretically, we prove the convergence and optimality of FOCUS under mild conditions for linear and general convex loss functions with bounded smoothness. We also prove that FOCUS always achieves higher fairness measured by FAA compared with standard FedAvg protocol under both linear and general convex loss functions. Empirically, we evaluate FOCUS on four datasets, including synthetic data, images, and texts under different settings, and we show that FOCUS achieves significantly higher fairness based on FAA while maintaining similar or even higher prediction accuracy compared with FedAvg and other existing fair FL algorithms.\n", "keywords": "federated learning;fairness;data heterogeneity;clustering;expectation\u2013maximization (EM)", "primary_area": "", "supplementary_material": "/attachment/0e5aea073d758ef2fe1241b02b18ba0b75fbc84c.zip", "author": "Wenda Chu;Chulin Xie;Boxin Wang;Linyi Li;Lang Yin;Han Zhao;Bo Li", "authorids": "~Wenda_Chu1;~Chulin_Xie1;~Boxin_Wang1;~Linyi_Li1;~Lang_Yin1;~Han_Zhao1;~Bo_Li19", "gender": "M;F;;M;M;M;F", "homepage": "https://chuwd19.github.io;;https://wbx.life;http://linyil.com;;https://hanzhaoml.github.io/;http://boli.cs.illinois.edu/", "dblp": "312/6559;245/4284;236/6319;99/4340-1.html;324/7991.html;03/3520-2;50/3402-26", "google_scholar": ";WeJnzAgAAAAJ;YOf2ATIAAAAJ;-b0sk-YAAAAJ;;x942ipYAAAAJ;K8vJkTcAAAAJ", "orcid": ";;;;;0000-0002-8579-1600;", "linkedin": ";;;;lang-yin-813222a0/;;", "or_profile": "~Wenda_Chu1;~Chulin_Xie1;~Boxin_Wang1;~Linyi_Li1;~Lang_Yin1;~Han_Zhao1;~Bo_Li19", "aff": "Tsinghua University;University of Illinois, Urbana Champaign;Department of Computer Science, University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "tsinghua.edu.cn;illinois.edu;cs.illinois.edu;illinois.edu;illinois.edu;illinois.edu;illinois.edu", "position": "Undergrad student;PhD student;PhD student;PhD student;MS student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nchu2023focus,\ntitle={{FOCUS}: Fairness via Agent-Awareness for Federated Learning on Heterogeneous Data},\nauthor={Wenda Chu and Chulin Xie and Boxin Wang and Linyi Li and Lang Yin and Han Zhao and Bo Li},\nyear={2023},\nurl={https://openreview.net/forum?id=9bVBH1GD5sr}\n}", "github": "", "project": "", "reviewers": "MUHj;wM5N;p3Qb", "site": "https://openreview.net/forum?id=9bVBH1GD5sr", "pdf_size": 628158, "recommendation": "3;6;6", "confidence": "3;3;3", "correctness": "2;4;4", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "62;115;40", "wc_strength_and_weaknesses": "415;295;409", "wc_clarity_quality_novelty_and_reproducibility": "30;36;19", "wc_summary_review": "75;85;32", "wc_review": "582;531;500", "wc_reply_reviewers": "425;0;178", "wc_reply_authors": "2957;1576;576", "reply_reviewers": "1;0;1", "reply_authors": "7;4;2", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 72.33333333333333, 31.478387647541428 ], "wc_strength_and_weaknesses_avg": [ 373.0, 55.20869496736904 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.333333333333332, 7.039570693980958 ], "wc_summary_review_avg": [ 64.0, 22.992752481307377 ], "wc_review_avg": [ 537.6666666666666, 33.8066397160216 ], "wc_reply_reviewers_avg": [ 201.0, 174.26608008062462 ], "wc_reply_authors_avg": [ 1703.0, 976.1786038767018 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 4.333333333333333, 2.0548046676563256 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5054571541365837895&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;1;1;1;1;1", "aff_unique_norm": "Tsinghua University;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://illinois.edu", "aff_unique_abbr": "THU;UIUC", "aff_campus_unique_index": "1;1;1;1;1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;1;1;1;1;1;1", "aff_country_unique": "China;United States" }, { "id": "9czfKu1QqcN", "title": "ErGOT: entropy-regularized graph optimal transport", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Graph comparison is a fundamental task, which not only relates to graph matching, an NP-hard problem, but also has various applications in graph learning. We tackle this task by studying optimal graph representation and the entropy-regularized optimal transport between graphs (ErGOT). First, we analytically derive a family of Gaussian variables that optimally represent graph topology and node relation. Second, we realize graph comparison by formulating ErGOT, a framework with low sample complexity, on represented graph information. Third, we control biases in the solution by defining ErGOT with a 2-Sinkhorn divergence, whose closed-form expression can be derived on the manifold of Gaussian variables. As the Gaussian geometry changes with entropy regularization magnitude, ErGOT defined with 2-Sinkhorn divergence wanders between pure optimal transport and maximum mean discrepancy among graphs. We demonstrate that these statistically efficient, principally unbiased, and in-between properties ensure theoretically faster convergence of our approach to empirically higher performance than the state-of-art algorithms on graph alignment, sketching, and retrieval tasks. ", "keywords": "Graph comparison;entropy-regularized optimal transport;NP-hard problem;graph matching;graph alignment;graph sketching;graph retrieval", "primary_area": "", "supplementary_material": "/attachment/0a900884b1526d15de340bc70af6cfb7d3b761f8.zip", "author": "Yang Tian;Yunhui Xu;Jie Sun;Yaoyuan Wang;Ziyang Zhang;Weihua He;Zeren Tan;Pei Sun", "authorids": "~Yang_Tian2;xuyunhui18@mails.tsinghua.edu.cn;~Jie_Sun3;~Yaoyuan_Wang1;~Ziyang_Zhang2;~Weihua_He1;~Zeren_Tan1;peisun@tsinghua.edu.cn", "gender": "M;;;;M;;M;", "homepage": ";;https://sites.google.com/view/jiesun;https://www.huawei.com/;https://www.huawei.com/cn/?ic_medium=direct&ic_source=surlent;;;", "dblp": "64/5869;;;242/8119;;116/4594;220/5551;", "google_scholar": "mRAghwIAAAAJ;;BWNJnfMAAAAJ;;;PRX4APYAAAAJ;GSgL6zEAAAAJ;", "orcid": "0000-0003-1970-0413;;;0000-0003-1060-4898;;;0009-0000-2266-8739;", "linkedin": ";;;;;;;", "or_profile": "~Yang_Tian2;xuyunhui18@mails.tsinghua.edu.cn;~Jie_Sun3;~Yaoyuan_Wang1;~Ziyang_Zhang2;~Weihua_He1;~Zeren_Tan1;peisun@tsinghua.edu.cn", "aff": ";;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Tsinghua University;Tsinghua University;", "aff_domain": ";;huawei.com;huawei.com;huawei.com;tsinghua.edu.cn;tsinghua.edu.cn;", "position": ";;Researcher;Researcher;Researcher;PhD student;PhD student;", "bibtex": "@misc{\ntian2023ergot,\ntitle={Er{GOT}: entropy-regularized graph optimal transport},\nauthor={Yang Tian and Yunhui Xu and Jie Sun and Yaoyuan Wang and Ziyang Zhang and Weihua He and Zeren Tan and Pei Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=9czfKu1QqcN}\n}", "github": "", "project": "", "reviewers": "k1zm;auE5;jDqu;6Rbi", "site": "https://openreview.net/forum?id=9czfKu1QqcN", "pdf_size": 6464855, "recommendation": "3;3;3;3", "confidence": "4;2;4;4", "correctness": "3;3;3;4", "technical_novelty": "1;2;2;2", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "71;62;76;53", "wc_strength_and_weaknesses": "44;141;89;345", "wc_clarity_quality_novelty_and_reproducibility": "74;39;232;38", "wc_summary_review": "16;48;15;53", "wc_review": "205;290;412;489", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 65.5, 8.789197915623474 ], "wc_strength_and_weaknesses_avg": [ 154.75, 115.07904891855858 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 95.75, 79.98867107284632 ], "wc_summary_review_avg": [ 33.0, 17.592612085759182 ], "wc_review_avg": [ 349.0, 109.3000457456446 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VHJvLuI8k18J:scholar.google.com/&scioq=ErGOT:+entropy-regularized+graph+optimal+transport&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;1", "aff_unique_norm": "Huawei;Tsinghua University", "aff_unique_dep": "Huawei Technologies;", "aff_unique_url": "https://www.huawei.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Huawei;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "9d13HEFFaea", "title": "Learning to represent and predict evolving visual signals via polar straightening", "track": "main", "status": "Reject", "tldr": "", "abstract": "Observer motion and continuous deformations of objects and textures imbue natural videos with distinct temporal structures, enabling the prediction of future frames from past ones. Conventional methods proceed by estimating local motion, or optic flow, and then using this to predict future frames by warping and copying content. Here, we explore a more direct methodology, in which frames are transformed into an alternative representation where temporal structure and evolution are more readily accessible. As a base case, a rigidly translating pattern can be described in the frequency domain as a linear combination of sinusoids, each with constant amplitude and phase that cycles at a rate proportional to its frequency. This fundamental property of Fourier representation reduces prediction to angular extrapolation. Motivated by the geometry of this well-known case, we formulate a self-supervised learning problem which seeks a transformation of video frames to facilitate next-frame prediction in these natural polar coordinates. We construct a network architecture in which pairs of convolutional channels are used to factorize signals into slowly evolving amplitudes and linearly advancing phases. We train this network to predict future frames, and compare its performance with that of conventional methods using optic flow, and other learned predictive neural networks, evaluated on natural videos from the DAVIS dataset. We find that the polar predictor achieves high prediction performance while remaining interpretable and fast, thereby demonstrating the potential of a flow-free video processing methodology that is trained end-to-end to predict natural video content.", "keywords": "Video prediction;self-supervised representation learning;phase prediction;invariance / equivariance factorization", "primary_area": "", "supplementary_material": "", "author": "Pierre-\u00c9tienne H Fiquet;Eero P Simoncelli", "authorids": "~Pierre-\u00c9tienne_H_Fiquet1;~Eero_P_Simoncelli1", "gender": "M;M", "homepage": "https://www.cns.nyu.edu/~fiquet/;https://www.cns.nyu.edu/~eero/", "dblp": "342/3591;30/5604", "google_scholar": "15zzdOAAAAAJ;MplR7_cAAAAJ", "orcid": ";0000-0002-1206-527X", "linkedin": ";eero-simoncelli-445782123", "or_profile": "~Pierre-\u00c9tienne_H_Fiquet1;~Eero_Peter_Simoncelli1", "aff": "New York University;New York University", "aff_domain": "nyu.edu;nyu.edu", "position": "PhD student;Full Professor", "bibtex": "@misc{\nfiquet2023learning,\ntitle={Learning to represent and predict evolving visual signals via polar straightening},\nauthor={Pierre-{\\'E}tienne H Fiquet and Eero P Simoncelli},\nyear={2023},\nurl={https://openreview.net/forum?id=9d13HEFFaea}\n}", "github": "", "project": "", "reviewers": "67e4;zsxR;gEGq", "site": "https://openreview.net/forum?id=9d13HEFFaea", "pdf_size": 2551756, "recommendation": "5;5;6", "confidence": "4;2;3", "correctness": "3;4;3", "technical_novelty": "2;3;4", "empirical_novelty": "2;3;3", "wc_summary_paper": "109;122;53", "wc_strength_and_weaknesses": "495;60;225", "wc_clarity_quality_novelty_and_reproducibility": "70;398;66", "wc_summary_review": "31;113;13", "wc_review": "705;693;357", "wc_reply_reviewers": "282;0;0", "wc_reply_authors": "813;442;460", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 94.66666666666667, 29.93697082575694 ], "wc_strength_and_weaknesses_avg": [ 260.0, 179.3042107704111 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 178.0, 155.5720626162251 ], "wc_summary_review_avg": [ 52.333333333333336, 43.52266331719857 ], "wc_review_avg": [ 585.0, 161.29476122924763 ], "wc_reply_reviewers_avg": [ 94.0, 132.93607486307093 ], "wc_reply_authors_avg": [ 571.6666666666666, 170.8065832715147 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gIB-PY2RK60J:scholar.google.com/&scioq=Learning+to+represent+and+predict+evolving+visual+signals+via+polar+straightening&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "9dFQcu9vmX", "title": "MemoNav: Working Memory Model for Visual Navigation", "track": "main", "status": "Reject", "tldr": "The MemoNav learns three types of scene representations, which contain goal-relevant information and scene-level features and are utilized to improve navigation performance in both 1-goal and multi-goal ImageNav tasks.", "abstract": "We present MemoNav, a novel memory model for image-goal navigation, which utilizes a working memory-inspired pipeline to improve navigation performance. Specifically, the node features on the topological map are stored in the short-term memory (STM), as these features are dynamically updated. The MemoNav retains the informative fraction of the STM via a forgetting module to improve navigation efficiency. To learn a global representation of 3D scenes, we introduce long-term memory (LTM) that continuously aggregates the STM. Afterward, a graph attention module encodes the retained STM and the LTM to generate working memory (WM). After encoding, the WM contains the informative features in the retained STM and the scene-level feature in the LTM and is finally used to generate actions. Consequently, the synergy of these three types of memory increases navigation performance by selectively retaining goal-relevant information and learning a high-level scene feature. When evaluated on multi-goal tasks, the MemoNav outperforms the SoTA methods at all difficulty levels in both Gibson and Matterport3D scenes. The MemoNav also achieves consistent improvements on traditional 1-goal tasks. Moreover, the qualitative results show that our model is less likely to be trapped in a deadlock.", "keywords": "Image-Goal Navigation;Memory mechanism;Embodied visual navigation;Embodied AI", "primary_area": "", "supplementary_material": "/attachment/1961bd486d5ecb14c62dfc34fd8906c72055c07b.zip", "author": "Hongxin Li;Xu Yang;Zeyu Wang;yuran Yang;Shuqi Mei;Zhaoxiang Zhang", "authorids": "~Hongxin_Li1;~Xu_Yang1;~Zeyu_Wang5;~yuran_Yang1;~Shuqi_Mei1;~Zhaoxiang_Zhang3", "gender": "M;M;M;M;M;M", "homepage": ";http://people.ucas.ac.cn/~XuYang;;https://ieeexplore.ieee.org/author/37090059964;;http://zhaoxiangzhang.net", "dblp": ";63/1534-4.html;132/7882;327/3762;;55/2285-1.html", "google_scholar": "BO1d4M8AAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;;;;qxWfV6cAAAAJ", "orcid": "0000-0002-1445-7357;0000-0003-0553-4581;0000-0001-5514-9716;;;", "linkedin": ";;;;https://www.linkedin.cn/incareer/in/shuqi-mei-a2346538?withFullProfile=true;", "or_profile": "~Hongxin_Li1;~Xu_Yang1;~Zeyu_Wang5;~yuran_Yang1;~Shuqi_Mei1;~Zhaoxiang_Zhang3", "aff": "Institute of Automation\uff0cChinese Academy of Sciences;Institute of Automation of Chinese academy of science;Institute of Automation, Chinese Academy of Sciences;Tencent;Tencent T-Lab;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn;tencent.com;tencent.com;ia.ac.cn", "position": "PhD student;Associate Professor;MS student;Researcher;Researcher;Full Professor", "bibtex": "@misc{\nli2023memonav,\ntitle={MemoNav: Working Memory Model for Visual Navigation},\nauthor={Hongxin Li and Xu Yang and Zeyu Wang and yuran Yang and Shuqi Mei and Zhaoxiang Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=9dFQcu9vmX}\n}", "github": "", "project": "", "reviewers": "LEke;aALd;EedW", "site": "https://openreview.net/forum?id=9dFQcu9vmX", "pdf_size": 11917239, "recommendation": "5;6;6", "confidence": "4;4;3", "correctness": "3;3;3", "technical_novelty": "3;2;2", "empirical_novelty": "2;3;3", "wc_summary_paper": "54;89;93", "wc_strength_and_weaknesses": "685;301;203", "wc_clarity_quality_novelty_and_reproducibility": "19;33;42", "wc_summary_review": "59;58;54", "wc_review": "817;481;392", "wc_reply_reviewers": "656;100;0", "wc_reply_authors": "2975;1137;815", "reply_reviewers": "1;3;0", "reply_authors": "5;5;2", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 78.66666666666667, 17.518244457961217 ], "wc_strength_and_weaknesses_avg": [ 396.3333333333333, 208.0021367411616 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.333333333333332, 9.463379711052259 ], "wc_summary_review_avg": [ 57.0, 2.160246899469287 ], "wc_review_avg": [ 563.3333333333334, 183.01244644984 ], "wc_reply_reviewers_avg": [ 252.0, 288.5735030571356 ], "wc_reply_authors_avg": [ 1642.3333333333333, 951.4624999908767 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 4.0, 1.4142135623730951 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16865496830611548831&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;1;1;0", "aff_unique_norm": "Chinese Academy of Sciences;Tencent", "aff_unique_dep": "Institute of Automation;Tencent Holdings Limited", "aff_unique_url": "http://www.ia.cas.cn;https://www.tencent.com", "aff_unique_abbr": "CAS;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "9eT2pA9P-vI", "title": "Adam Accumulation to Reduce Memory Footprints of both Activations and Gradients for Large-scale DNN Training", "track": "main", "status": "Reject", "tldr": "This is an algorithm-system co-designed work to ensure memory-efficient training for large-scale DNN models", "abstract": "Running out of GPU memory has become a main bottleneck for large-scale DNN training. How to reduce the memory footprint during training has received intensive research attention. We find that previous gradient accumulation reduces activation memory but fails to be compatible with gradient memory reduction due to a contradiction between preserving gradients and releasing gradients. To address this issue, we propose a novel optimizer accumulation method for Adam, named Adam Accumulation (AdamA), which enables reducing both activation and gradient memory. Specifically, AdamA directly integrates gradients into optimizer states and accumulates optimizer states over micro-batches, so that gradients can be released immediately after use. We mathematically and experimentally demonstrate AdamA yields the same convergence properties as Adam. Evaluated on transformer-based models, AdamA achieves up to 23% memory reduction compared to gradient accumulation with less than 2% degradation in training throughput. Notably, AdamA can work together with memory reduction methods for optimizer states to fit 1.26x~3.14x larger models over PyTorch and DeepSpeed baseline on GPUs with different memory capacities.", "keywords": "Large model training;Memory reduction", "primary_area": "", "supplementary_material": "/attachment/bcbb4b5fd749fe154840792f8610e1bda2de3ae2.zip", "author": "Yijia Zhang;Yibo Han;Shijie Cao;Guohao Dai;Youshan Miao;Ting Cao;Fan Yang;Ningyi Xu", "authorids": "~Yijia_Zhang2;~Yibo_Han1;~Shijie_Cao1;daiguohao1992@gmail.com;~Youshan_Miao1;~Ting_Cao1;~Fan_Yang28;xuningyi@sjtu.edu.cn", "gender": "M;Not Specified;M;;M;;M;", "homepage": "https://github.com/Chocolife-96;https://github.com/harryhan618;https://www.microsoft.com/en-us/research/people/shijiecao/;;;https://www.microsoft.com/en-us/research/people/ticao/;https://fanyangcs.github.io/;", "dblp": "65/2747;;;;https://dblp.uni-trier.de/pid/06/11190.html;;29/3081-24.html;", "google_scholar": "https://scholar.google.com.hk/citations?user=Y5Aq7I8AAAAJ;;StqnQfsAAAAJ;;;;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;;;0000-0002-0378-060X;", "linkedin": ";;;;;;;", "or_profile": "~Yijia_Zhang2;~Yibo_Han1;~Shijie_Cao1;daiguohao1992@gmail.com;~Youshan_Miao1;~Ting_Cao1;~Fan_Yang28;xuningyi@sjtu.edu.cn", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Microsoft Research Asia;;;Microsoft Research;Microsoft Research;", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;microsoft.com;;;microsoft.com;research.microsoft.com;", "position": "PhD student;PhD student;Researcher;;;Principal Researcher;Senior Principal Researcher;", "bibtex": "@misc{\nzhang2023adam,\ntitle={Adam Accumulation to Reduce Memory Footprints of both Activations and Gradients for Large-scale {DNN} Training},\nauthor={Yijia Zhang and Yibo Han and Shijie Cao and Guohao Dai and Youshan Miao and Ting Cao and Fan Yang and Ningyi Xu},\nyear={2023},\nurl={https://openreview.net/forum?id=9eT2pA9P-vI}\n}", "github": "", "project": "", "reviewers": "3RgH;u5im;a8MG;6Jc2", "site": "https://openreview.net/forum?id=9eT2pA9P-vI", "pdf_size": 449640, "recommendation": "3;3;6;6", "confidence": "5;4;3;3", "correctness": "3;2;3;2", "technical_novelty": "2;2;4;3", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "49;91;122;182", "wc_strength_and_weaknesses": "85;630;620;233", "wc_clarity_quality_novelty_and_reproducibility": "81;2;96;87", "wc_summary_review": "46;2;74;43", "wc_review": "261;725;912;545", "wc_reply_reviewers": "0;557;0;0", "wc_reply_authors": "588;1745;738;336", "reply_reviewers": "0;2;0;0", "reply_authors": "2;4;2;2", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 111.0, 48.49226742481733 ], "wc_strength_and_weaknesses_avg": [ 392.0, 238.82943704660863 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.5, 37.61980861195336 ], "wc_summary_review_avg": [ 41.25, 25.68438241422207 ], "wc_review_avg": [ 610.75, 240.02747238597502 ], "wc_reply_reviewers_avg": [ 139.25, 241.18807495396615 ], "wc_reply_authors_avg": [ 851.75, 535.349593723578 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.9045340337332909, "corr_recommendation_correctness": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9026234974162387686&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1;1;1", "aff_unique_norm": "Shanghai Jiao Tong University;Microsoft", "aff_unique_dep": ";Research", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "SJTU;MSR Asia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;1;1", "aff_country_unique": "China;United States" }, { "id": "9gRIOMVLCiH", "title": "Toward Learning Geometric Eigen-Lengths Crucial for Robotic Fitting Tasks", "track": "main", "status": "Reject", "tldr": "We formulate a novel learning problem and explore learning frameworks to discover useful low-dimensional yet sufficient geometric eigen-lengths for fitting tasks.", "abstract": "Some extremely low-dimensional yet crucial geometric eigen-lengths often determine whether an object can be fitted in the environment or not. For example, the {\\em height} of an object is important to measure to check if it can fit between the shelves of a cabinet, while the {\\em width} of a couch is crucial when trying to move it through a doorway. Humans have materialized such crucial geometric eigen-lengths in common sense since they are very useful in serving as succinct yet effective, highly interpretable, and universal object representations. However, it remains obscure and underexplored if learning systems can be equipped with similar capabilities of automatically discovering such key geometric quantities in doing robotic fitting tasks. In this work, we therefore for the first time formulate and propose a novel learning problem on this question and set up a benchmark suite including the tasks, the data, and the evaluation metrics for studying the problem. We explore potential solutions and demonstrate the feasibility of learning such eigen-lengths from simply observing successful and failed fitting trials. We also attempt geometric grounding for more accurate eigen-length measurement and study the reusability of the learned geometric eigen-lengths across multiple tasks. Our work marks the first exploratory step toward learning crucial geometric eigen-lengths and we hope it can inspire future research in tackling this important yet underexplored problem. \n\n", "keywords": "Visual Representation Learning;Shape Understanding", "primary_area": "", "supplementary_material": "", "author": "Yijia Weng;Kaichun Mo;Ruoxi Shi;Yanchao Yang;Leonidas Guibas", "authorids": "~Yijia_Weng1;~Kaichun_Mo1;~Ruoxi_Shi1;~Yanchao_Yang1;~Leonidas_Guibas1", "gender": "F;M;Not Specified;M;M", "homepage": "https://yijiaweng.github.io/;https://cs.stanford.edu/~kaichun/;https://rshi.top/;https://yanchaoyang.github.io/;http://geometry.stanford.edu/", "dblp": "264/9900;172/1283;190/7068;84/8637-1;g/LeonidasJGuibas", "google_scholar": "yeuv8L4AAAAJ;pL7JsOsAAAAJ;Z7zLvdkAAAAJ;r2tKnV4AAAAJ;https://scholar.google.com.tw/citations?user=5JlEyTAAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Yijia_Weng1;~Kaichun_Mo1;~Ruoxi_Shi1;~Yanchao_Yang1;~Leonidas_Guibas1", "aff": "Stanford University;NVIDIA;Shanghai Jiaotong University;University of Hong Kong;Stanford University", "aff_domain": "stanford.edu;nvidia.com;sjtu.edu.cn;hku.hk;stanford.edu", "position": "PhD student;Researcher;Undergrad student;Assistant Professor;Full Professor", "bibtex": "@misc{\nweng2023toward,\ntitle={Toward Learning Geometric Eigen-Lengths Crucial for Robotic Fitting Tasks},\nauthor={Yijia Weng and Kaichun Mo and Ruoxi Shi and Yanchao Yang and Leonidas Guibas},\nyear={2023},\nurl={https://openreview.net/forum?id=9gRIOMVLCiH}\n}", "github": "", "project": "", "reviewers": "rLUp;rqrT;4TGL;VXuJ", "site": "https://openreview.net/forum?id=9gRIOMVLCiH", "pdf_size": 13495423, "recommendation": "3;5;5;8", "confidence": "3;3;3;4", "correctness": "3;2;4;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "48;103;46;72", "wc_strength_and_weaknesses": "415;402;82;123", "wc_clarity_quality_novelty_and_reproducibility": "96;80;19;68", "wc_summary_review": "30;199;39;48", "wc_review": "589;784;186;311", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 67.25, 23.036655573238058 ], "wc_strength_and_weaknesses_avg": [ 255.5, 153.75386174012021 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.75, 28.760867511255636 ], "wc_summary_review_avg": [ 79.0, 69.57370192824297 ], "wc_review_avg": [ 467.5, 233.81028634343699 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.8892972917998875, "corr_recommendation_correctness": 0.46442036401282394, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_eeGzPkfetMJ:scholar.google.com/&scioq=Toward+Learning+Geometric+Eigen-Lengths+Crucial+for+Robotic+Fitting+Tasks&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Stanford University;NVIDIA;Shanghai Jiao Tong University;University of Hong Kong", "aff_unique_dep": ";NVIDIA Corporation;;", "aff_unique_url": "https://www.stanford.edu;https://www.nvidia.com;https://www.sjtu.edu.cn;https://www.hku.hk", "aff_unique_abbr": "Stanford;NVIDIA;SJTU;HKU", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Stanford;;Hong Kong SAR", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "United States;China" }, { "title": "NeRN: Learning Neural Representations for Neural Networks", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11919", "id": "9gfir3fSy3J", "poster": "/media/PosterPDFs/ICLR%202023/11919.png?t=1682753658.5638378", "openreview": "https://openreview.net/forum?id=9gfir3fSy3J", "slides": "https://iclr.cc/virtual/2023/poster/11919", "video": "https://iclr.cc/virtual/2023/poster/11919", "author_site": "Maor Ashkenazi, Zohar Rimon, Ron Vainshtein, Shir Levi, Elad Richardson, Pinchas Mintz, Eran Treister", "tldr": "In this paper we present NerN: a neural representation for the weights of a pretrained neural network, which is obtained by applying smoothness over the reconstructed weights and various knowledge distillation techniques", "abstract": "Neural Representations have recently been shown to effectively reconstruct a wide range of signals from 3D meshes and shapes to images and videos. We show that, when adapted correctly, neural representations can be used to directly represent the weights of a pre-trained convolutional neural network, resulting in a Neural Representation for Neural Networks (NeRN). Inspired by coordinate inputs of previous neural representation methods, we assign a coordinate to each convolutional kernel in our network based on its position in the architecture, and optimize a predictor network to map coordinates to their corresponding weights. Similarly to the spatial smoothness of visual scenes, we show that incorporating a smoothness constraint over the original network's weights aids NeRN towards a better reconstruction. In addition, since slight perturbations in pre-trained model weights can result in a considerable accuracy loss, we employ techniques from the field of knowledge distillation to stabilize the learning process. We demonstrate the effectiveness of NeRN in reconstructing widely used architectures on CIFAR-10, CIFAR-100, and ImageNet. Finally, we present two applications using NeRN, demonstrating the capabilities of the learned representations.", "keywords": "Convolutional Neural Networks;Neural Representations;Implicit Representations", "primary_area": "", "supplementary_material": "/attachment/bcf5ec7864fde8d72756ed6f516572a3a518615e.zip", "author": "Maor Ashkenazi;Zohar Rimon;Ron Vainshtein;Shir Levi;Elad Richardson;Pinchas Mintz;Eran Treister", "authorids": "~Maor_Ashkenazi1;~Zohar_Rimon1;~Ron_Vainshtein1;~Shir_Levi1;~Elad_Richardson2;~Pinchas_Mintz1;~Eran_Treister1", "gender": "M;M;M;F;;M;M", "homepage": ";https://zoharri.github.io/;;https://www.facebook.com/Leshir26;;;https://www.cs.bgu.ac.il/~erant/", "dblp": "190/8394;322/9338;336/9090;;;;22/10384", "google_scholar": ";GV8imVUAAAAJ;;;https://scholar.google.co.il/citations?user=9npMV2kAAAAJ;;https://scholar.google.co.il/citations?user=5nNoFlEAAAAJ", "orcid": ";;;;;;0000-0002-5351-0966", "linkedin": ";;ron-vainshtein-284038192;;;pinky-mintz-a69bb7146;", "or_profile": "~Maor_Ashkenazi1;~Zohar_Rimon1;~Ron_Vainshtein1;~Shir_Levi1;~Elad_Richardson2;~Pinchas_Mintz1;~Eran_Treister1", "aff": "Ben Gurion University of the Negev;Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;Technion - Israel Institute of Technology, Technion;;Tel Aviv University;;Ben-Gurion University of the Negev", "aff_domain": "bgu.ac.il;campus.technion.ac.il;technion.ac.il;;tau.ac.il;;bgu.ac.il", "position": "PhD student;MS student;MS student;;PhD student;;Associate Professor", "bibtex": "@inproceedings{\nashkenazi2023nern,\ntitle={Ne{RN}: Learning Neural Representations for Neural Networks},\nauthor={Maor Ashkenazi and Zohar Rimon and Ron Vainshtein and Shir Levi and Elad Richardson and Pinchas Mintz and Eran Treister},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9gfir3fSy3J}\n}", "github": "", "project": "", "reviewers": "uYwd;MGd2;HBhL;jngE", "pdf_size": 603994, "recommendation": "6;6;8;8", "confidence": "4;3;5;4", "correctness": "4;3;3;4", "technical_novelty": "4;2;4;3", "empirical_novelty": "2;0;4;3", "wc_summary_paper": "53;49;60;105", "wc_strength_and_weaknesses": "43;133;268;248", "wc_clarity_quality_novelty_and_reproducibility": "29;8;59;68", "wc_summary_review": "17;11;30;44", "wc_review": "142;201;417;465", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "459;428;541;818", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;2", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 1.479019945774904 ], "wc_summary_paper_avg": [ 66.75, 22.431841208425134 ], "wc_strength_and_weaknesses_avg": [ 173.0, 91.03570728016562 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.0, 23.90606617576384 ], "wc_summary_review_avg": [ 25.5, 12.698425099200294 ], "wc_review_avg": [ 306.25, 137.4070140131136 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 561.5, 153.73760112607457 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.7071067811865475, "corr_recommendation_correctness": 0.0, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13192173266744109243&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=9gfir3fSy3J", "email": "bgu.ac.il;campus.technion.ac.il;technion.ac.il;;tau.ac.il;;bgu.ac.il", "author_num": 7, "aff_unique_index": "0;1;1;2;3", "aff_unique_norm": "Ben Gurion University of the Negev;Technion - Israel Institute of Technology;Tel Aviv University;Ben-Gurion University of the Negev", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.bgu.ac.il;https://www.technion.ac.il/en/;https://www.tau.ac.il;https://www.bgu.ac.il", "aff_unique_abbr": "BGU;Technion;TAU;BGU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Israel" }, { "id": "9hp9PIFDhsK", "title": "SuperFed: Weight Shared Federated Learning", "track": "main", "status": "Reject", "tldr": "Federated Training of K models in O(1) (amortized) communication and computation cost. ", "abstract": "Federated Learning (FL) is a well-established technique for privacy preserving\ndistributed training. Much attention has been given to various aspects of FL training. A growing number of applications that consume FL-trained models, however, increasingly operate under dynamically and unpredictably variable conditions, rendering a single model insufficient. We argue for training a global \u201cfamily of models\u201d cost efficiently in a federated fashion. Training them independently for different tradeoff points incurs \u2248 O(k) cost for any k architectures of interest, however.\nStraightforward applications of FL techniques to recent weight-shared training\napproaches is either infeasible or prohibitively expensive. We propose SuperFed \u2014 an architectural framework that incurs O(1) cost to co-train a large family ofmodels in a federated fashion by leveraging weight-shared learning. We achieve an order of magnitude cost savings on both communication and computation by proposing two novel training mechanisms: (a) distribution of weight-shared models to federated clients, (b) central aggregation of arbitrarily overlapping weight-shared model parameters. The combination of these mechanisms is shown to reach an order of magnitude (9.43x) reduction in computation and communication cost for training a 5*10^18-sized family of models, compared to independently training as few as k = 9 DNNs without any accuracy loss.", "keywords": "Weight Shared;Federated Learning", "primary_area": "", "supplementary_material": "", "author": "Alind Khare;Animesh Agrawal;Alexey Tumanov", "authorids": "~Alind_Khare1;~Animesh_Agrawal1;~Alexey_Tumanov1", "gender": "M;M;", "homepage": "https://www.cc.gatech.edu/~akhare39/;https://www.animeshagrawal.tech;", "dblp": "211/0360;;", "google_scholar": "zOqYHzsAAAAJ;;", "orcid": "0000-0003-4649-9022;;", "linkedin": ";animesh7agrawal;", "or_profile": "~Alind_Khare1;~Animesh_Agrawal1;~Alexey_Tumanov1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;", "aff_domain": "gatech.edu;gatech.edu;", "position": "PhD student;MS student;", "bibtex": "@misc{\nkhare2023superfed,\ntitle={SuperFed: Weight Shared Federated Learning},\nauthor={Alind Khare and Animesh Agrawal and Alexey Tumanov},\nyear={2023},\nurl={https://openreview.net/forum?id=9hp9PIFDhsK}\n}", "github": "", "project": "", "reviewers": "uEYf;8Rfx;nfyJ;EQW3", "site": "https://openreview.net/forum?id=9hp9PIFDhsK", "pdf_size": 757205, "recommendation": "5;5;6;6", "confidence": "3;4;3;2", "correctness": "3;2;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;4;3", "wc_summary_paper": "44;67;96;45", "wc_strength_and_weaknesses": "237;230;272;77", "wc_clarity_quality_novelty_and_reproducibility": "17;58;69;35", "wc_summary_review": "30;32;87;31", "wc_review": "328;387;524;188", "wc_reply_reviewers": "0;189;0;0", "wc_reply_authors": "608;805;548;90", "reply_reviewers": "0;2;0;0", "reply_authors": "3;5;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 63.0, 21.15419580130618 ], "wc_strength_and_weaknesses_avg": [ 204.0, 75.0299940023988 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.75, 20.17888748172208 ], "wc_summary_review_avg": [ 45.0, 24.259018941416407 ], "wc_review_avg": [ 356.75, 120.6137948163476 ], "wc_reply_reviewers_avg": [ 47.25, 81.83940065762945 ], "wc_reply_authors_avg": [ 512.75, 261.9364188118941 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.7071067811865475, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bR4bNnz7r4IJ:scholar.google.com/&scioq=SuperFed:+Weight+Shared+Federated+Learning&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "9irBKvxsw9", "title": "Deep Learning-based Source Code Complexity Prediction", "track": "main", "status": "Withdraw", "tldr": "We suggest a deep-learning based approach for estimating computational (time) complexity of given programs and provide the largest code complexity dataset as the benchmark.", "abstract": "Deciding the computational complexity of algorithms is a really challenging problem even for human algorithm experts. Theoretically, the problem of deciding the computational complexity of a given program is undecidable due to the famous Halting problem. In this paper, we tackle the problem by designing a neural network that comprehends the algorithmic nature of codes and estimates the worst-case complexity.\nFirst, we construct a code dataset called the CodeComplex that consists of 4,120Java codes submitted to programming competitions by human programmers and their complexity labels annotated by a group of algorithm experts. As far as we are aware, the CodeComplex dataset is by far the largest code dataset for the complexity prediction problem. Then, we present several baseline algorithms using the previous code understanding neural models such as CodeBERT, GraphCodeBERT, PLBART, and CodeT5. As the previous code understanding models do not work well on longer codes due to the code length limit, we propose the hierarchical Transformer architecture which takes method-level code snippets instead of whole codes and combines the method-level embeddings to the class-level embedding and ultimately to the code-level embedding. Moreover, we introduce pre-training objectives for the proposed model to induce the model to learn both the intrinsic property of the method-level codes and the relationship between the components.\nLastly, we demonstrate that the proposed hierarchical architecture and pre-training objectives achieve state-of-the-art performance in terms of complexity prediction accuracy compared to the previous code understanding models.", "keywords": "computational complexity;code classification;programming language;data augmentation;code understanding", "primary_area": "", "supplementary_material": "/attachment/d1e12c6fe911f83896916bdb7451a8262a45df2e.zip", "author": "Mingi Jeon;Seung-yeop Baik;Joonghyuk Hahn;Yo-Sub Han;Sang-Ki Ko", "authorids": "~Mingi_Jeon1;sybaik2006@yonsei.ac.kr;~Joonghyuk_Hahn1;~Yo-Sub_Han1;~Sang-Ki_Ko1", "gender": "M;;M;;M", "homepage": "https://ckawoalt.github.io/;;https://peer0.github.io;http://toc.yonsei.ac.kr/~emmous/;https://sites.google.com/site/sangkikotoc/home", "dblp": ";;304/4027;h/YoSubHan;71/9491.html", "google_scholar": ";;08ccS2oAAAAJ;yDOh26sAAAAJ;https://scholar.google.com/scholar?hl=en", "orcid": ";;0009-0000-5890-4916;;", "linkedin": ";;joonghyuk-hahn;;", "or_profile": "~Mingi_Jeon1;sybaik2006@yonsei.ac.kr;~Joonghyuk_Hahn1;~Yo-Sub_Han1;~Sang-Ki_Ko1", "aff": "Kangwon National University;;Yonsei University;Yonsei University;Kangwon National University", "aff_domain": "kangwon.ac.kr;;yonsei.ac.kr;yonsei.ac.kr;kangwon.ac.kr", "position": "MS student;;PhD student;Full Professor;Assistant Professor", "bibtex": "@misc{\njeon2023deep,\ntitle={Deep Learning-based Source Code Complexity Prediction},\nauthor={Mingi Jeon and Seung-yeop Baik and Joonghyuk Hahn and Yo-Sub Han and Sang-Ki Ko},\nyear={2023},\nurl={https://openreview.net/forum?id=9irBKvxsw9}\n}", "github": "", "project": "", "reviewers": "uq2X;ZDEZ;ySD6;anef", "site": "https://openreview.net/forum?id=9irBKvxsw9", "pdf_size": 562156, "recommendation": "3;5;6;6", "confidence": "4;4;5;5", "correctness": "2;3;3;3", "technical_novelty": "2;2;4;4", "empirical_novelty": "2;0;4;2", "wc_summary_paper": "199;62;184;54", "wc_strength_and_weaknesses": "381;330;264;182", "wc_clarity_quality_novelty_and_reproducibility": "28;214;24;23", "wc_summary_review": "109;38;39;34", "wc_review": "717;644;511;293", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 1.0 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 124.75, 67.02005296924197 ], "wc_strength_and_weaknesses_avg": [ 289.25, 74.52977592881922 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 72.25, 81.86078120809745 ], "wc_summary_review_avg": [ 55.0, 31.232995373482833 ], "wc_review_avg": [ 541.25, 161.23643353783288 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.8164965809277259, "corr_recommendation_correctness": 0.9428090415820632, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2417950689638685925&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Kangwon National University;Yonsei University", "aff_unique_dep": ";", "aff_unique_url": "http://www.kangwon.ac.kr;https://www.yonsei.ac.kr", "aff_unique_abbr": "KNU;Yonsei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "9jW_Oynp0au", "title": "PointConvFormer: Revenge of the Point-Based Convolution", "track": "main", "status": "Withdraw", "tldr": "We introduce PointConvFormer, a novel building block using transformer attention to improve point convolution.", "abstract": "We introduce PointConvFormer, a novel building block for point cloud based deep network architectures. Inspired by generalization theory, PointConvFormer combines ideas from point convolution, where filter weights are only based on relative position, and Transformers which utilize feature-based attention. In PointConvFormer, attention computed from feature difference between points in the neighborhood is used to modify the convolutional weights at each point. Hence, we preserved the invariances from point convolution, whereas attention helps to select relevant points in the neighborhood for convolution. We experiment on both semantic segmentation and scene flow estimation tasks on point clouds with multiple datasets including ScanNet, SemanticKitti, FlyingThings3D and KITTI. Our results show that PointConvFormer substantially outperforms classic convolutions, regular transformers, and voxelized sparse convolution approaches with much smaller and faster networks. Visualizations show that PointConvFormer performs similarly to convolution on flat areas, whereas the neighborhood selection effect is stronger on object boundaries, showing that it has got the best of both worlds. The code will be available with the final version.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/c9f9fb3c26c2671bb77602cd2f0a3c4d4a020db7.zip", "author": "Wenxuan Wu;Li Fuxin;Qi Shan", "authorids": "~Wenxuan_Wu1;~Li_Fuxin1;~Qi_Shan1", "gender": "M;M;M", "homepage": ";http://shanqi.github.io;http://web.engr.oregonstate.edu/~lif/", "dblp": ";14/1682;03/2783", "google_scholar": "uv3I78wAAAAJ;0FbnKXwAAAAJ;snDpfA0AAAAJ", "orcid": ";;", "linkedin": ";qi-shan-2b8672b6/;", "or_profile": "~Wenxuan_Wu1;~Qi_Shan1;~Fuxin_Li1", "aff": "Institute of Automation, Chinese Academy of Sciences;Apple;Apple", "aff_domain": "ia.ac.cn;apple.com;apple.com", "position": "Postdoc;Researcher;Researcher", "bibtex": "@misc{\nwu2023pointconvformer,\ntitle={PointConvFormer: Revenge of the Point-Based Convolution},\nauthor={Wenxuan Wu and Li Fuxin and Qi Shan},\nyear={2023},\nurl={https://openreview.net/forum?id=9jW_Oynp0au}\n}", "github": "", "project": "", "reviewers": "FbQT;w5vE;PyjR", "site": "https://openreview.net/forum?id=9jW_Oynp0au", "pdf_size": 4556008, "recommendation": "3;3;5", "confidence": "3;5;4", "correctness": "4;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "3;2;2", "wc_summary_paper": "56;60;38", "wc_strength_and_weaknesses": "111;262;198", "wc_clarity_quality_novelty_and_reproducibility": "21;82;10", "wc_summary_review": "33;61;27", "wc_review": "221;465;273", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 51.333333333333336, 9.568466729604882 ], "wc_strength_and_weaknesses_avg": [ 190.33333333333334, 61.88340290866005 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.666666666666664, 31.668421004036322 ], "wc_summary_review_avg": [ 40.333333333333336, 14.817407180595247 ], "wc_review_avg": [ 319.6666666666667, 104.93595930640534 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.5, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5474187684369403866&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;1", "aff_unique_norm": "Chinese Academy of Sciences;Apple", "aff_unique_dep": "Institute of Automation;Apple Inc.", "aff_unique_url": "http://www.ia.cas.cn;https://www.apple.com", "aff_unique_abbr": "CAS;Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "id": "9jXqR128vKs", "title": "In-Context Policy Iteration", "track": "main", "status": "Reject", "tldr": "We present a novel algorithm for performing policy iteration through in-context adaptation", "abstract": "This work presents In-Context Policy Iteration, an algorithm for performing Reinforcement Learning (RL), in-context, using foundation models. While the application of foundation models to RL has received considerable attention, most approaches rely on either (1) the curation of expert demonstrations (either through manual design or task-specific pretraining) or (2) adaptation to the task of interest using gradient methods (either fine-tuning or training of adapter layers). Both of these techniques have drawbacks. Collecting demonstrations is labor-intensive, and algorithms that rely on them do not outperform the experts from which the demonstrations were derived. All gradient techniques are inherently slow, sacrificing the \u201cfew-shot\u201d quality that made in-context learning attractive to begin with. In this work, we present an algorithm, ICPI, that learns to perform RL tasks without expert demonstrations or gradients. Instead we present a policy-iteration method in which the prompt content is the entire locus of learning. ICPI iteratively updates the contents of the prompt from which it derives its policy through trial-and-error interaction with an RL environment. In order to eliminate the role of in-weights learning (on which approaches like Decision Transformer rely heavily), we demonstrate our algorithm using Codex Chen et al. (2021b), a language model with no prior knowledge of the domains on which we evaluate it.", "keywords": "Reinforcement Learning;In-Context Learning;Foundation Models", "primary_area": "", "supplementary_material": "/attachment/75f02357a44c4610c4f976e3c0aa0ad41b860f56.zip", "author": "Ethan Brooks;Logan A Walls;Richard Lewis;Satinder Singh", "authorids": "~Ethan_Brooks1;~Logan_A_Walls1;~Richard_Lewis1;~Satinder_Singh2", "gender": "M;;M;", "homepage": "https://ethanabrooks.github.io/;https://github.com/LoganWalls;;", "dblp": ";217/9343;12/590;", "google_scholar": "MxDHjTUAAAAJ;;;", "orcid": ";0000-0002-5678-441X;;", "linkedin": ";;;", "or_profile": "~Ethan_Brooks1;~Logan_A_Walls1;~Richard_Lewis1;~Satinder_Baveja2", "aff": "University of Michigan;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;Google DeepMind", "aff_domain": "umich.edu;umich.edu;umich.edu;google.com", "position": "PhD student;PhD student;Full Professor;Research Scientist", "bibtex": "@misc{\nbrooks2023incontext,\ntitle={In-Context Policy Iteration},\nauthor={Ethan Brooks and Logan A Walls and Richard Lewis and Satinder Singh},\nyear={2023},\nurl={https://openreview.net/forum?id=9jXqR128vKs}\n}", "github": "", "project": "", "reviewers": "jeCL;Kfri;8XAi;wJ8R", "site": "https://openreview.net/forum?id=9jXqR128vKs", "pdf_size": 879380, "recommendation": "5;5;6;6", "confidence": "4;2;2;4", "correctness": "3;4;4;3", "technical_novelty": "4;2;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "118;13;19;141", "wc_strength_and_weaknesses": "389;28;64;649", "wc_clarity_quality_novelty_and_reproducibility": "40;13;10;164", "wc_summary_review": "62;87;29;50", "wc_review": "609;141;122;1004", "wc_reply_reviewers": "261;44;37;235", "wc_reply_authors": "984;674;294;1457", "reply_reviewers": "1;1;1;2", "reply_authors": "4;4;3;8", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 1.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 72.75, 57.3688722217894 ], "wc_strength_and_weaknesses_avg": [ 282.5, 254.0556041499577 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.75, 63.013391433884905 ], "wc_summary_review_avg": [ 57.0, 20.96425529323663 ], "wc_review_avg": [ 469.0, 365.3142482849526 ], "wc_reply_reviewers_avg": [ 144.25, 104.18583157032438 ], "wc_reply_authors_avg": [ 852.25, 426.17389349888623 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.75, 1.920286436967152 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2153365501952918586&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Michigan;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.umich.edu;https://deepmind.com", "aff_unique_abbr": "UM;DeepMind", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Optimistic Exploration with Learned Features Provably Solves Markov Decision Processes with Neural Dynamics", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11131", "id": "9kBCMNb5mc", "poster": "", "openreview": "https://openreview.net/forum?id=9kBCMNb5mc", "slides": "https://iclr.cc/virtual/2023/poster/11131", "video": "https://iclr.cc/virtual/2023/poster/11131", "author_site": "Sirui Zheng, Lingxiao Wang, Shuang Qiu, Zuyue Fu, Zhuoran Yang, Csaba Szepesvari, Zhaoran Wang", "tldr": "We identify a class of Markov decision processes with neural network parameterization and propose an oracle-efficient algorithm whose sample complexity does not depend on the Eluder dimension of the NN class.", "abstract": "Incorporated with the recent advances in deep learning, deep reinforcement learning (DRL) has achieved tremendous success in empirical study. However, analyzing DRL is still challenging due to the complexity of the neural network class. In this paper, we address such a challenge by analyzing the Markov decision process (MDP) with neural dynamics, which covers several existing models as special cases, including the kernelized nonlinear regulator (KNR) model and the linear MDP. We propose a novel algorithm that designs exploration incentives via learnable representations of the dynamics model by embedding the neural dynamics into a kernel space induced by the system noise. We further establish an upper bound on the sample complexity of the algorithm, which demonstrates the sample efficiency of the algorithm. We highlight that, unlike previous analyses of RL algorithms with function approximation, our bound on the sample complexity does not depend on the Eluder dimension of the neural network class, which is known to be exponentially large (Dong et al., 2021).", "keywords": "Reinforcement Learning;Neural Network;Representation Learning.", "primary_area": "", "supplementary_material": "", "author": "Sirui Zheng;Lingxiao Wang;Shuang Qiu;Zuyue Fu;Zhuoran Yang;Csaba Szepesvari;Zhaoran Wang", "authorids": "~Sirui_Zheng2;~Lingxiao_Wang6;~Shuang_Qiu2;~Zuyue_Fu1;~Zhuoran_Yang1;~Csaba_Szepesvari1;~Zhaoran_Wang1", "gender": "M;M;M;;M;M;Not Specified", "homepage": ";;https://shq-ml.github.io/;;https://zhuoranyang.github.io/;https://sites.ualberta.ca/~szepesva/;https://zhaoranwang.github.io/", "dblp": ";140/1229;;https://dblp.uni-trier.de/pid/250/3176;;http://dblp.uni-trier.de/pers/hd/s/Szepesv=aacute=ri:Csaba;117/2756", "google_scholar": ";;-Z7fY00AAAAJ;;;https://scholar.google.ca/citations?user=zvC19mQAAAAJ;https://scholar.google.com.tw/citations?user=HSx0BgQAAAAJ", "orcid": ";;;;;;", "linkedin": "%E6%80%9D%E9%94%90-%E9%83%91-448756212/;;;;;csaba-szepesvari-09376b1?trk=hp-identity-name;", "or_profile": "~Sirui_Zheng2;~Lingxiao_Wang6;~Shuang_Qiu2;~Zuyue_Fu1;~Zhuoran_Yang1;~Csaba_Szepesvari1;~Zhaoran_Wang1", "aff": "Northwestern University;Northwestern University;;;Yale University;Google DeepMind;", "aff_domain": "northwestern.edu;northwestern.edu;;;yale.edu;google.com;", "position": "PhD student;PhD student;;;Assistant Professor;Research Scientist;", "bibtex": "@inproceedings{\nzheng2023optimistic,\ntitle={Optimistic Exploration with Learned Features Provably Solves Markov Decision Processes with Neural Dynamics},\nauthor={Sirui Zheng and Lingxiao Wang and Shuang Qiu and Zuyue Fu and Zhuoran Yang and Csaba Szepesvari and Zhaoran Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9kBCMNb5mc}\n}", "github": "", "project": "", "reviewers": "DNrX;rz87;UMwu", "pdf_size": 580877, "recommendation": "3;6;8", "confidence": "4;2;4", "correctness": "1;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;0;0", "wc_summary_paper": "32;35;100", "wc_strength_and_weaknesses": "16;798;357", "wc_clarity_quality_novelty_and_reproducibility": "376;18;22", "wc_summary_review": "5;31;38", "wc_review": "429;882;517", "wc_reply_reviewers": "324;149;221", "wc_reply_authors": "1095;1284;1678", "reply_reviewers": "1;1;1", "reply_authors": "4;4;4", "recommendation_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 2.6666666666666665, 1.247219128924647 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 0.6666666666666666, 0.9428090415820634 ], "wc_summary_paper_avg": [ 55.666666666666664, 31.372316175606514 ], "wc_strength_and_weaknesses_avg": [ 390.3333333333333, 320.11907506773514 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 138.66666666666666, 167.82795423356094 ], "wc_summary_review_avg": [ 24.666666666666668, 14.197026292697903 ], "wc_review_avg": [ 609.3333333333334, 196.1229772928767 ], "wc_reply_reviewers_avg": [ 231.33333333333334, 71.8161232283918 ], "wc_reply_authors_avg": [ 1352.3333333333333, 242.86393081083264 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 4.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.11470786693528094, "corr_recommendation_correctness": 0.997176464952738, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3812667520132523778&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=9kBCMNb5mc", "email": "northwestern.edu;northwestern.edu;;;yale.edu;google.com;", "author_num": 7, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Northwestern University;Yale University;Google", "aff_unique_dep": ";;Google DeepMind", "aff_unique_url": "https://www.northwestern.edu;https://www.yale.edu;https://deepmind.com", "aff_unique_abbr": "NU;Yale;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Explicitly Minimizing the Blur Error of Variational Autoencoders", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11021", "id": "9krnQ-ue9M", "poster": "/media/PosterPDFs/ICLR%202023/11021.png?t=1682713603.8661942", "openreview": "https://openreview.net/forum?id=9krnQ-ue9M", "slides": "https://iclr.cc/virtual/2023/poster/11021", "video": "https://iclr.cc/virtual/2023/poster/11021", "author_site": "Gustav Bredell, Kyriakos Flouris, Krishna Chaitanya, Ertunc Erdil, Ender Konukoglu", "tldr": "We propose a new reconstruction term for VAEs that explicitly focuses on minimizing the blur of generated/reconstructed images while still optimizing the ELBO.", "abstract": "Variational autoencoders (VAEs) are powerful generative modelling methods, however they suffer from blurry generated samples and reconstructions compared to the images they have been trained on. Significant research effort has been spent to increase the generative capabilities by creating more flexible models but often flexibility comes at the cost of higher complexity and computational cost. Several works have focused on altering the reconstruction term of the evidence lower bound (ELBO), however, often at the expense of losing the mathematical link to maximizing the likelihood of the samples under the modeled distribution. Here we propose a new formulation of the reconstruction term for the VAE that specifically penalizes the generation of blurry images while at the same time still maximizing the ELBO under the modeled distribution. \nWe show the potential of the proposed loss on three different data sets, where it outperforms several recently proposed reconstruction losses for VAEs.", "keywords": "Variational Autoencoders;Generative Modelling;Blur", "primary_area": "", "supplementary_material": "", "author": "Gustav Bredell;Kyriakos Flouris;Krishna Chaitanya;Ertunc Erdil;Ender Konukoglu", "authorids": "~Gustav_Bredell1;~Kyriakos_Flouris1;~Krishna_Chaitanya1;~Ertunc_Erdil1;~Ender_Konukoglu1", "gender": "M;Non-Binary;M;;", "homepage": ";https://k-flouris.github.io/;http://krishnabits001.github.io/;;http://www.vision.ee.ethz.ch/~kender", "dblp": "224/0051;;;84/8711;45/7041", "google_scholar": "rH0fjOQAAAAJ;JUKtIdkAAAAJ;https://scholar.google.ch/citations?user=-uexBRMAAAAJ;https://scholar.google.com.tr/citations?user=JJsyRqAAAAAJ;https://scholar.google.ch/citations?user=OeEMrhQAAAAJ", "orcid": ";0000-0001-7952-1922;0000-0002-9036-6967;;", "linkedin": "gustavbredell/;kyriakosflouris123/;krishnachaitanya001/;;", "or_profile": "~Gustav_Bredell1;~Kyriakos_Flouris1;~Krishna_Chaitanya1;~Ertunc_Erdil1;~Ender_Konukoglu1", "aff": "University of Zurich;Swiss Federal Institute of Technology;Janssen Pharmaceuticals, Johnson and Johnson;Swiss Federal Institute of Technology;ETHZ - ETH Zurich", "aff_domain": "uzh.ch;ethz.ch;jnj.com;ethz.ch;ethz.ch", "position": "PhD student;Postdoc;Researcher;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nbredell2023explicitly,\ntitle={Explicitly Minimizing the Blur Error of Variational Autoencoders},\nauthor={Gustav Bredell and Kyriakos Flouris and Krishna Chaitanya and Ertunc Erdil and Ender Konukoglu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9krnQ-ue9M}\n}", "github": "", "project": "", "reviewers": "QwSD;Hcu2;q8AD", "pdf_size": 17088851, "recommendation": "6;6;8", "confidence": "2;4;4", "correctness": "3;4;4", "technical_novelty": "3;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "56;103;113", "wc_strength_and_weaknesses": "179;279;592", "wc_clarity_quality_novelty_and_reproducibility": "27;46;533", "wc_summary_review": "28;73;43", "wc_review": "290;501;1281", "wc_reply_reviewers": "0;49;48", "wc_reply_authors": "754;1046;1388", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 90.66666666666667, 24.850665092821068 ], "wc_strength_and_weaknesses_avg": [ 350.0, 175.92233134729275 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 202.0, 234.18084180108897 ], "wc_summary_review_avg": [ 48.0, 18.708286933869708 ], "wc_review_avg": [ 690.6666666666666, 426.2239891053634 ], "wc_reply_reviewers_avg": [ 32.333333333333336, 22.866763848189994 ], "wc_reply_authors_avg": [ 1062.6666666666667, 259.0975792159308 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7698007756764269609&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=9krnQ-ue9M", "email": "uzh.ch;ethz.ch;jnj.com;ethz.ch;ethz.ch", "author_num": 5, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "University of Zurich;Swiss Federal Institute of Technology;Janssen Pharmaceuticals;ETH Zurich", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.unizh.ch;https://www.ethz.ch;https://www.janssen.com;https://www.ethz.ch", "aff_unique_abbr": "UZH;ETH Zurich;Janssen;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Switzerland;United States" }, { "id": "9pA3oXBwYh7", "title": "Towards biologically plausible Dreaming and Planning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Humans and animals can learn new skills after practicing for a few hours, while current reinforcement learning algorithms require a large amount of data to achieve good performances. \nRecent model-based approaches show promising results by reducing the number of necessary interactions with the environment to learn a desirable policy. However, these methods require biological implausible ingredients, such as the detailed storage of older experiences, and long periods of offline learning. The optimal way to learn and exploit word-models is still an open question.\nTaking inspiration from biology, we suggest that dreaming might be an efficient expedient to use an inner model. We propose a two-module (agent and model) neural network in which \"dreaming\" (living new experiences in a model-based simulated environment) significantly boosts learning. We also explore \"planning\", an online alternative to dreaming, that shows comparable performances. Importantly, our model does not require the detailed storage of experiences, and learns online the world-model. This is a key ingredient for biological plausibility and implementability (e.g., in neuromorphic hardware).", "keywords": "Reinforcement Learning;Model based;Biologically Plausible", "primary_area": "", "supplementary_material": "", "author": "Cristiano Capone;Pier Stanislao Paolucci", "authorids": "~Cristiano_Capone1;~Pier_Stanislao_Paolucci1", "gender": "M;", "homepage": ";https://sites.google.com/site/pierstanislaopaolucci/", "dblp": "228/9143;95/2151.html", "google_scholar": "wyHFdf4AAAAJ;https://scholar.google.it/citations?user=jhvLaT8AAAAJ", "orcid": ";0000-0003-1937-6086", "linkedin": ";", "or_profile": "~Cristiano_Capone1;~Pier_Stanislao_Paolucci1", "aff": "INFN;INFN (Istituto Nazionale di Fisica Nucleare)", "aff_domain": "infn.it;infn.it", "position": "Postdoc;Principal Researcher", "bibtex": "@misc{\ncapone2023towards,\ntitle={Towards biologically plausible Dreaming and Planning},\nauthor={Cristiano Capone and Pier Stanislao Paolucci},\nyear={2023},\nurl={https://openreview.net/forum?id=9pA3oXBwYh7}\n}", "github": "", "project": "", "reviewers": "w9LE;k79x;7i2g;MjQE", "site": "https://openreview.net/forum?id=9pA3oXBwYh7", "pdf_size": 2702642, "recommendation": "1;3;3;6", "confidence": "3;3;3;3", "correctness": "2;3;2;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "1;1;2;3", "wc_summary_paper": "62;72;82;184", "wc_strength_and_weaknesses": "220;265;259;112", "wc_clarity_quality_novelty_and_reproducibility": "118;37;749;48", "wc_summary_review": "68;59;146;59", "wc_review": "468;433;1236;403", "wc_reply_reviewers": "123;201;74;0", "wc_reply_authors": "430;287;253;34", "reply_reviewers": "2;2;1;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 3.25, 1.7853571071357126 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 100.0, 49.01020301937138 ], "wc_strength_and_weaknesses_avg": [ 214.0, 61.371817636436354 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 238.0, 296.65720958709227 ], "wc_summary_review_avg": [ 83.0, 36.55817282086182 ], "wc_review_avg": [ 635.0, 347.74919122839094 ], "wc_reply_reviewers_avg": [ 99.5, 73.15223851667152 ], "wc_reply_authors_avg": [ 251.0, 141.80091678123947 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.7001400420140049, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5351672059214613061&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Istituto Nazionale di Fisica Nucleare", "aff_unique_dep": "", "aff_unique_url": "https://www.infn.it", "aff_unique_abbr": "INFN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Italy" }, { "title": "SMART: Self-supervised Multi-task pretrAining with contRol Transformers", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11019", "id": "9piH3Hg8QEf", "poster": "", "openreview": "https://openreview.net/forum?id=9piH3Hg8QEf", "slides": "https://iclr.cc/virtual/2023/poster/11019", "video": "https://iclr.cc/virtual/2023/poster/11019", "author_site": "Yanchao Sun, shuang ma, Ratnesh Madaan, Rogerio Bonatti, Furong Huang, Ashish Kapoor", "tldr": "We propose a pretraining framework for sequential decision making based on a self-supervised objectives and a control transformer architecture, leading to significantly higher learning efficiency in various downstram control tasks.", "abstract": "Self-supervised pretraining has been extensively studied in language and vision domains, where a unified model can be easily adapted to various downstream tasks by pretraining representations without explicit labels. When it comes to sequential decision-making tasks, however, it is difficult to properly design such a pretraining approach that can cope with both high-dimensional perceptual information and the complexity of sequential control over long interaction horizons. The challenge becomes combinatorially more complex if we want to pretrain representations amenable to a large variety of tasks. To tackle this problem, in this work, we formulate a general pretraining-finetuning pipeline for sequential decision making, under which we propose a generic pretraining framework \\textit{Self-supervised Multi-task pretrAining with contRol Transformer (SMART)}. By systematically investigating pretraining regimes, we carefully design a Control Transformer (CT) coupled with a novel control-centric pretraining objective in a self-supervised manner. SMART encourages the representation to capture the common essential information relevant to short-term control and long-term control, which is transferrable across tasks. We show by extensive experiments in DeepMind Control Suite that SMART significantly improves the learning efficiency among seen and unseen downstream tasks and domains under different learning scenarios including Imitation Learning (IL) and Reinforcement Learning (RL). Benefiting from the proposed control-centric objective, SMART is resilient to distribution shift between pretraining and finetuning, and even works well with low-quality pretraining datasets that are randomly collected. The codebase, pretrained models and datasets are provided at https://github.com/microsoft/smart.", "keywords": "pretrain;transformer;multi-task reinforcement learning;sequential decision making;self-supervised", "primary_area": "", "supplementary_material": "/attachment/d2ca26ed0a3d656e825de541268d8a6f064318a7.zip", "author": "Yanchao Sun;Shuang Ma;Ratnesh Madaan;Rogerio Bonatti;Furong Huang;Ashish Kapoor", "authorids": "~Yanchao_Sun1;~Shuang_Ma3;~Ratnesh_Madaan1;~Rogerio_Bonatti1;~Furong_Huang1;~Ashish_Kapoor1", "gender": "F;;;F;;F", "homepage": "https://ycsun2017.github.io/home/index.html;https://ratneshmadaan.github.io/;http://rogeriobonatti.com/;https://furong-huang.com;;https://www.shuangma.me/", "dblp": "132/6840;;184/4631;72/8513;93/161;98/3906", "google_scholar": "bloBY_QAAAAJ;4F9L0-cAAAAJ;https://scholar.google.com.br/citations?user=WFgFAB8AAAAJ;13yyuCcAAAAJ;4D1n8scAAAAJ;IHPRZuMAAAAJ", "orcid": "0000-0002-1137-9939;;;;;", "linkedin": ";;;;ashish-kapoor-a2971b6/;", "or_profile": "~Yanchao_Sun1;~Ratnesh_Madaan1;~Rogerio_Bonatti1;~Furong_Huang1;~Ashish_Kapoor1;~shuang_ma1", "aff": "University of Maryland, College Park;;Microsoft;University of Maryland;Microsoft;Microsoft", "aff_domain": "umd.edu;;microsoft.com;cs.umd.edu;microsoft.com;microsoft.com", "position": "PhD student;;Researcher;Assistant Professor;Researcher;Senior Research Scientist", "bibtex": "@inproceedings{\nsun2023smart,\ntitle={{SMART}: Self-supervised Multi-task pretrAining with contRol Transformers},\nauthor={Yanchao Sun and Shuang Ma and Ratnesh Madaan and Rogerio Bonatti and Furong Huang and Ashish Kapoor},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9piH3Hg8QEf}\n}", "github": "", "project": "", "reviewers": "JbXg;Jx8K;CXdh;EgV4", "pdf_size": 911563, "recommendation": "6;8;8;8", "confidence": "2;4;3;3", "correctness": "4;4;3;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "58;74;81;113", "wc_strength_and_weaknesses": "65;140;318;185", "wc_clarity_quality_novelty_and_reproducibility": "22;117;48;1", "wc_summary_review": "69;42;106;42", "wc_review": "214;373;553;341", "wc_reply_reviewers": "0;10;22;0", "wc_reply_authors": "866;909;1511;384", "reply_reviewers": "0;1;1;0", "reply_authors": "3;2;3;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 81.5, 20.006249023742555 ], "wc_strength_and_weaknesses_avg": [ 177.0, 92.00271735117393 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.0, 43.70926675202868 ], "wc_summary_review_avg": [ 64.75, 26.242856170775315 ], "wc_review_avg": [ 370.25, 121.11435711756059 ], "wc_reply_reviewers_avg": [ 8.0, 9.055385138137417 ], "wc_reply_authors_avg": [ 917.5, 399.87154187313706 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.816496580927726, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13936195855868127683&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=9piH3Hg8QEf", "email": "umd.edu;;microsoft.com;cs.umd.edu;microsoft.com;microsoft.com", "author_num": 6, "aff_unique_index": "0;1;0;1;1", "aff_unique_norm": "University of Maryland;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www/umd.edu;https://www.microsoft.com", "aff_unique_abbr": "UMD;Microsoft", "aff_campus_unique_index": "0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "9qgOs_IwRS3", "title": "Neighborhood Gradient Clustering: An Efficient Decentralized Learning Method for Non-IID Data Distributions", "track": "main", "status": "Reject", "tldr": "Proposed a novel decentralized learning algorithm to improve the performance over non-IID data distributions through manipulation of local-gradients", "abstract": "Decentralized learning algorithms enable the training of deep learning models over large distributed datasets generated at different devices and locations, without the need for a central server. In practical scenarios, the distributed datasets can have significantly different data distributions across the agents. The current state-of-the-art decentralized algorithms mostly assume the data distributions to be Independent and Identically Distributed (IID). This paper focuses on improving decentralized learning over non-IID data distributions with minimal compute and memory overheads. We propose Neighborhood Gradient Clustering (NGC), a novel decentralized learning algorithm that modifies the local gradients of each agent using self- and cross-gradient information. Cross-gradients for a pair of neighboring agents are the derivatives of the model parameters of an agent with respect to the dataset of the other agent. In particular, the proposed method replaces the local gradients of the model with the weighted mean of the self-gradients, model-variant cross-gradients (derivatives of the received neighbors\u2019 model parameters with respect to the local dataset - computed locally), and data-variant cross-gradients (derivatives of the local model with respect to its neighbors\u2019 datasets - received through communication). The data-variant cross-gradients are aggregated through an additional communication round without breaking the privacy constraints of the decentralized setting. Further, we present CompNGC, a compressed version of NGC that reduces the communication overhead by $32 \\times$ by compressing the cross-gradients. We demonstrate the empirical convergence and efficiency of the proposed technique over non-IID data distributions sampled from the CIFAR-10 dataset on various model architectures and graph topologies. Our experiments demonstrate that NGC and CompNGC outperform the existing state-of-the-art (SoTA) decentralized learning algorithm over non-IID data by $1-5\\%$ with significantly less compute and memory requirements. Further, we also show that the proposed NGC method outperforms the baseline by $5-40\\%$ with no additional communication. ", "keywords": "Federated Learning;Distributed Machine Learning;Decentralized Learning;Communication Efficient;Energy Efficient;Non-IID Data Distribution;Convergence", "primary_area": "", "supplementary_material": "/attachment/8d0a2831c97823322497f48ef9c381169995d65b.zip", "author": "Sai Aparna Aketi;Sangamesh Kodge;Kaushik Roy", "authorids": "~Sai_Aparna_Aketi1;~Sangamesh_Kodge1;~Kaushik_Roy1", "gender": "F;M;M", "homepage": "https://aparna-aketi.github.io/;;https://engineering.purdue.edu/NRL/Group", "dblp": "217/0935;203/5657.html;r/KaushikRoy", "google_scholar": "YGtRZCUAAAAJ;;to4P8KgAAAAJ", "orcid": ";0000-0001-9713-5400;", "linkedin": "sai-aparna-aketi;sangameshkodge;", "or_profile": "~Sai_Aparna_Aketi1;~Sangamesh_Kodge1;~Kaushik_Roy1", "aff": "Purdue University;Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu;purdue.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@misc{\naketi2023neighborhood,\ntitle={Neighborhood Gradient Clustering: An Efficient Decentralized Learning Method for Non-{IID} Data Distributions},\nauthor={Sai Aparna Aketi and Sangamesh Kodge and Kaushik Roy},\nyear={2023},\nurl={https://openreview.net/forum?id=9qgOs_IwRS3}\n}", "github": "", "project": "", "reviewers": "b2jc;LFjx;y4qJ", "site": "https://openreview.net/forum?id=9qgOs_IwRS3", "pdf_size": 696053, "recommendation": "5;5;5", "confidence": "5;4;4", "correctness": "3;3;3", "technical_novelty": "1;2;3", "empirical_novelty": "1;2;2", "wc_summary_paper": "108;33;47", "wc_strength_and_weaknesses": "296;137;250", "wc_clarity_quality_novelty_and_reproducibility": "36;4;20", "wc_summary_review": "56;14;17", "wc_review": "496;188;334", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 62.666666666666664, 32.5610537640019 ], "wc_strength_and_weaknesses_avg": [ 227.66666666666666, 66.8048567762022 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 20.0, 13.063945294843617 ], "wc_summary_review_avg": [ 29.0, 19.131126469708992 ], "wc_review_avg": [ 339.3333333333333, 125.79701462629743 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2681740409233609258&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "9rRhMKNOkeT", "title": "Concept-based Explanations for Out-of-Distribution Detectors", "track": "main", "status": "Reject", "tldr": "We propose the first work to provide concept-based explanations for out-of-distribution detectors.", "abstract": "Out-of-distribution (OOD) detection plays a crucial role in ensuring the safe deployment of deep neural network (DNN) classifiers.\nWhile a myriad of methods have focused on improving the performance of OOD detectors, a critical gap remains in interpreting their decisions.\nWe help bridge this gap by providing explanations for OOD detectors based on learned high-level concepts.\nWe first propose two new metrics for assessing the effectiveness of a particular set of concepts for explaining OOD detectors: 1) $\\textit{detection completeness}$, which quantifies the sufficiency of concepts for explaining an OOD-detector's decisions, and 2) $\\textit{concept separability}$, which captures the distributional separation between in-distribution and OOD data in the concept space.\nBased on these metrics, we propose a framework for learning a set of concepts that satisfy the desired properties of detection completeness and concept separability, and demonstrate the framework's effectiveness in providing concept-based explanations for diverse OOD detection techniques.\nWe also show how to identify prominent concepts that contribute to the detection results via a modified Shapley value-based importance score.", "keywords": "out-of-distribution detection;interpretability;concept-based explanations", "primary_area": "", "supplementary_material": "/attachment/3343f686ce2bed714b65a601b7562170c1b3ceb4.zip", "author": "Jihye Choi;Jayaram Raghuram;Ryan Feng;Jiefeng Chen;Somesh Jha;Atul Prakash", "authorids": "~Jihye_Choi1;~Jayaram_Raghuram1;~Ryan_Feng1;~Jiefeng_Chen2;~Somesh_Jha1;~Atul_Prakash1", "gender": ";M;;M;M;", "homepage": "https://jihyechoi77.github.io/;;http://www-personal.umich.edu/~rtfeng/;https://jfc43.github.io/;;https://www.eecs.umich.edu/~aprakash", "dblp": "232/3097;117/7273;193/6789;199/3381;j/SomeshJha;p/AtulPrakash", "google_scholar": "lEa3R0sAAAAJ;xvjzWWEAAAAJ;TIJw4tQAAAAJ;5mOfQfAAAAAJ;BaI7l8QAAAAJ;kIkHa2IAAAAJ", "orcid": "0009-0000-9719-3758;0000-0002-9473-3357;0000-0002-4767-274X;;;0000-0002-4907-3687", "linkedin": "jihye-choi-a473a8148/;jayaram-raghuram-32b66410/;;jiefeng-chen-aa1769122/;;atul-prakash-8729a44/", "or_profile": "~Jihye_Choi1;~Jayaram_Raghuram1;~Ryan_Feng1;~Jiefeng_Chen2;~Somesh_Jha1;~Atul_Prakash1", "aff": "University of Wisconsin - Madison;University of Wisconsin - Madison;KLA;University of Wisconsin, Madison;Department of Computer Science, University of Wisconsin, Madison;University of Michigan", "aff_domain": "wisc.edu;cs.wisc.edu;kla.com;wisc.edu;cs.wisc.edu;umich.edu", "position": "PhD student;Researcher;Intern;PhD student;Full Professor;Professor", "bibtex": "@misc{\nchoi2023conceptbased,\ntitle={Concept-based Explanations for Out-of-Distribution Detectors},\nauthor={Jihye Choi and Jayaram Raghuram and Ryan Feng and Jiefeng Chen and Somesh Jha and Atul Prakash},\nyear={2023},\nurl={https://openreview.net/forum?id=9rRhMKNOkeT}\n}", "github": "", "project": "", "reviewers": "qdMm;H68v;v8DY;YcEH", "site": "https://openreview.net/forum?id=9rRhMKNOkeT", "pdf_size": 2695565, "recommendation": "5;6;6;6", "confidence": "4;5;4;4", "correctness": "3;4;3;3", "technical_novelty": "3;3;3;4", "empirical_novelty": "2;2;4;2", "wc_summary_paper": "13;53;44;46", "wc_strength_and_weaknesses": "156;93;88;238", "wc_clarity_quality_novelty_and_reproducibility": "39;262;47;172", "wc_summary_review": "48;81;73;92", "wc_review": "256;489;252;548", "wc_reply_reviewers": "0;166;0;0", "wc_reply_authors": "0;29;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "0;1;0;0", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 39.0, 15.378556499229699 ], "wc_strength_and_weaknesses_avg": [ 143.75, 60.6563063497935 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 130.0, 92.67955545857997 ], "wc_summary_review_avg": [ 73.5, 16.194134740701646 ], "wc_review_avg": [ 386.25, 133.89244750918553 ], "wc_reply_reviewers_avg": [ 41.5, 71.88010851410841 ], "wc_reply_authors_avg": [ 7.25, 12.55736835487436 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 0.25, 0.4330127018922193 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17771035506931374957&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff_unique_index": "0;0;1;2;0;3", "aff_unique_norm": "University of Wisconsin-Madison;KLA Corporation;University of Wisconsin;University of Michigan", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.wisc.edu;https://www.kla.com;https://www.wisc.edu;https://www.umich.edu", "aff_unique_abbr": "UW-Madison;KLA;UW;UM", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "9sPDt0z3oL4", "title": "Finite-time Analysis of Single-timescale Actor-Critic on Linear Quadratic Regulator", "track": "main", "status": "Reject", "tldr": "Finite-time convergence of single-sample single-timescale actor-critic with a global optimality guarantee", "abstract": "Actor-critic (AC) methods have achieved state-of-the-art performance in many challenging tasks. However, their convergence in most practical applications are still poorly understood. Existing works mostly consider the uncommon double-loop or two-timescale stepsize variants for the ease of analysis. We investigate the practical yet more challenging vanilla single-sample single-timescale AC for solving the canonical linear quadratic regulator problem. Specifically, the actor and the critic update only once with a single sample in each iteration using proportional stepsizes. We prove that the vanilla AC can attain an $\\epsilon$-optimal solution with a sample complexity of $\\tilde{\\mathcal{O}}(\\epsilon^{-2})$, which elucidates on the practical efficiency of single-sample single-timescale AC. We develop a novel analysis framework that directly bounds the whole interconnected iteration system without the conservative decoupling commonly adopted in previous analysis of AC. Our work presents the first finite-time analysis of single-sample single-timescale AC with a global optimality guarantee.", "keywords": "single-timescale actor-critic;linear quadratic regulator", "primary_area": "", "supplementary_material": "/attachment/06910dd594d115e29871db718ad6f7edcd19ec8e.zip", "author": "Xuyang Chen;Jingliang Duan;Lin Zhao", "authorids": "~Xuyang_Chen1;~Jingliang_Duan1;~Lin_Zhao3", "gender": "M;M;M", "homepage": ";;https://sites.google.com/view/lzhao", "dblp": ";208/9091;", "google_scholar": "n7GqLNQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;091lFhYAAAAJ", "orcid": ";;0000-0002-1078-887X", "linkedin": ";;", "or_profile": "~Xuyang_Chen1;~Jingliang_Duan1;~Lin_Zhao3", "aff": "National University of Singapore;University of Science and Technology Beijing;National University of Singapore", "aff_domain": "u.nus.edu;ustb.edu.cn;nus.edu.sg", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@misc{\nchen2023finitetime,\ntitle={Finite-time Analysis of Single-timescale Actor-Critic on Linear Quadratic Regulator},\nauthor={Xuyang Chen and Jingliang Duan and Lin Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=9sPDt0z3oL4}\n}", "github": "", "project": "", "reviewers": "HWQj;Me62;KvYy", "site": "https://openreview.net/forum?id=9sPDt0z3oL4", "pdf_size": 2315724, "recommendation": "3;5;6", "confidence": "4;4;4", "correctness": "2;4;3", "technical_novelty": "2;2;3", "empirical_novelty": "0;2;0", "wc_summary_paper": "35;41;49", "wc_strength_and_weaknesses": "460;54;121", "wc_clarity_quality_novelty_and_reproducibility": "30;39;13", "wc_summary_review": "60;119;284", "wc_review": "585;253;467", "wc_reply_reviewers": "327;0;0", "wc_reply_authors": "1307;373;406", "reply_reviewers": "1;0;0", "reply_authors": "3;2;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 0.6666666666666666, 0.9428090415820634 ], "wc_summary_paper_avg": [ 41.666666666666664, 5.734883511361751 ], "wc_strength_and_weaknesses_avg": [ 211.66666666666666, 177.71575306902 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.333333333333332, 10.780641085864152 ], "wc_summary_review_avg": [ 154.33333333333334, 94.79920299711854 ], "wc_review_avg": [ 435.0, 137.41421566441613 ], "wc_reply_reviewers_avg": [ 109.0, 154.14927829866735 ], "wc_reply_authors_avg": [ 695.3333333333334, 432.72341692535605 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.6546536707079772, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yJA7NrHHsB8J:scholar.google.com/&scioq=Finite-time+Analysis+of+Single-timescale+Actor-Critic+on+Linear+Quadratic+Regulator&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "National University of Singapore;University of Science and Technology Beijing", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;http://www.ustb.edu.cn", "aff_unique_abbr": "NUS;USTB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;China" }, { "id": "9ts90B3xUvP", "title": "Multiple Invertible and Equivariant Transformation for Disentanglement in VAEs", "track": "main", "status": "Withdraw", "tldr": "We improve disentangled representation learning with Multiple Invertible and Equivariant transformation (MIE-transformation) in VAEs.", "abstract": "Disentanglement learning is a core issue for understanding and re-using trained information in Variational AutoEncoder (VAE), and effective inductive bias has been reported as a key factor. However, the actual implementation of such bias is still vague. In this paper, we propose a novel method, called MIE-transformation, to inject inductive bias by 1) guaranteeing the invertibility of latent-to-latent vector transformation while preserving a certain portion of equivariance of input-to-latent vector transformation, called IE-transformation, 2) extending the form of prior and posterior in VAE frameworks to an unrestricted form through a learnable conversion to an approximated exponential family, called EF-conversion, and 3) integrating multiple units of IE-transformation and EF-conversion, and their training. In experiments on 3D Cars, 3D Shapes, and dSprites datasets, MIE-transformation improves the disentanglement performance of state-of-the-art VAEs.", "keywords": "Variational AutoEncoder (VAE);Unsupervised Disentanglement Learning;Invertible and Equivariant function;Exponential Family", "primary_area": "", "supplementary_material": "/attachment/5599536646001873e9347a54a3ae882a0552b000.zip", "author": "Hee-Jun Jung;Kangil Kim", "authorids": "~Hee-Jun_Jung1;~Kangil_Kim1", "gender": "M;M", "homepage": ";", "dblp": ";45/8372", "google_scholar": "wE6eHi0AAAAJ;RZggOtkAAAAJ", "orcid": "0000-0002-9805-8192;0000-0003-3220-6401", "linkedin": ";", "or_profile": "~Hee-Jun_Jung1;~Kangil_Kim1", "aff": "Gwangju Institute of Science and Technology;Gwangju Institute of Science and Technology", "aff_domain": "gist.ac.kr;gist.ac.kr", "position": "PhD student;Associate Professor", "bibtex": "@misc{\njung2023multiple,\ntitle={Multiple Invertible and Equivariant Transformation for Disentanglement in {VAE}s},\nauthor={Hee-Jun Jung and Kangil Kim},\nyear={2023},\nurl={https://openreview.net/forum?id=9ts90B3xUvP}\n}", "github": "", "project": "", "reviewers": "KCsj;fgZf;Hh4E;Cd5y", "site": "https://openreview.net/forum?id=9ts90B3xUvP", "pdf_size": 4498124, "recommendation": "3;5;5;5", "confidence": "2;3;2;3", "correctness": "2;4;3;2", "technical_novelty": "2;3;3;2", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "105;60;63;71", "wc_strength_and_weaknesses": "174;169;129;312", "wc_clarity_quality_novelty_and_reproducibility": "147;36;57;12", "wc_summary_review": "40;36;29;38", "wc_review": "466;301;278;433", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "750;560;239;852", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;2", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 2.5, 0.5 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 74.75, 17.92170471802278 ], "wc_strength_and_weaknesses_avg": [ 196.0, 69.20621359386742 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.0, 51.044098581520664 ], "wc_summary_review_avg": [ 35.75, 4.14578098794425 ], "wc_review_avg": [ 369.5, 81.2542306590863 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 600.25, 233.4120551728209 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vF_m_9D4KHoJ:scholar.google.com/&scioq=Multiple+Invertible+and+Equivariant+Transformation+for+Disentanglement+in+VAEs&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Gwangju Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gist.ac.kr", "aff_unique_abbr": "GIST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Gwangju", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Fair Attribute Completion on Graph with Missing Attributes", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12038", "id": "9vcXCMp9VEp", "poster": "/media/PosterPDFs/ICLR%202023/12038.png?t=1681110146.8576603", "openreview": "https://openreview.net/forum?id=9vcXCMp9VEp", "slides": "https://iclr.cc/virtual/2023/poster/12038", "video": "https://iclr.cc/virtual/2023/poster/12038", "author_site": "Dongliang Guo, Zhixuan Chu, Sheng Li", "tldr": "", "abstract": "Tackling unfairness in graph learning models is a challenging task, as the unfairness issues on graphs involve both attributes and topological structures. Existing work on fair graph learning simply assumes that attributes of all nodes are available for model training and then makes fair predictions. In practice, however, the attributes of some nodes might not be accessible due to missing data or privacy concerns, which makes fair graph learning even more challenging. In this paper, we propose FairAC, a fair attribute completion method, to complement missing information and learn fair node embeddings for graphs with missing attributes. FairAC adopts an attention mechanism to deal with the attribute missing problem and meanwhile, it mitigates two types of unfairness, i.e., feature unfairness from attributes and topological unfairness due to attribute completion. FairAC can work on various types of homogeneous graphs and generate fair embeddings for them and thus can be applied to most downstream tasks to improve their fairness performance. To our best knowledge, FairAC is the first method that jointly addresses the graph attribution completion and graph unfairness problems. Experimental results on benchmark datasets show that our method achieves better fairness performance with less sacrifice in accuracy, compared with the state-of-the-art methods of fair graph learning. Code is available at: https://github.com/donglgcn/FairAC.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Dongliang Guo;Zhixuan Chu;Sheng Li", "authorids": "~Dongliang_Guo1;~Zhixuan_Chu1;~Sheng_Li3", "gender": "M;M;M", "homepage": "https://donglgcn.github.io/;;http://sheng-li.org", "dblp": "48/7696-2.html;258/1233;23/3439-1", "google_scholar": ";a4IuTngAAAAJ;DEncVcYAAAAJ", "orcid": "0000-0003-2856-4011;;0000-0003-1205-8632", "linkedin": ";;sheng-li-15a70022/", "or_profile": "~Dongliang_Guo1;~Zhixuan_Chu1;~Sheng_Li3", "aff": "University of Georgia;Ant Group;University of Virginia, Charlottesville", "aff_domain": "uga.edu;antgroup.com;virginia.edu", "position": "PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nguo2023fair,\ntitle={Fair Attribute Completion on Graph with Missing Attributes},\nauthor={Dongliang Guo and Zhixuan Chu and Sheng Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9vcXCMp9VEp}\n}", "github": "", "project": "", "reviewers": "ntMx;Rh4z;ifZW;sBeK", "pdf_size": 438006, "recommendation": "5;6;6;6", "confidence": "2;4;4;4", "correctness": "2;3;4;3", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "104;46;81;141", "wc_strength_and_weaknesses": "21;256;151;271", "wc_clarity_quality_novelty_and_reproducibility": "15;76;19;60", "wc_summary_review": "19;22;60;76", "wc_review": "159;400;311;548", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "96;1016;648;509", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;2", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 93.0, 34.56153931757091 ], "wc_strength_and_weaknesses_avg": [ 174.75, 100.08590060542993 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.5, 26.158172719056658 ], "wc_summary_review_avg": [ 44.25, 24.437420076595647 ], "wc_review_avg": [ 354.5, 141.08951059522462 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 567.25, 329.1529849477291 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1679431367838230545&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=9vcXCMp9VEp", "email": "uga.edu;antgroup.com;virginia.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Georgia;Ant Group;University of Virginia", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uga.edu;https://www.antgroup.com;https://www.virginia.edu", "aff_unique_abbr": "UGA;Ant Group;UVA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Charlottesville", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "id": "9wx-QXt-JaN", "title": "Can Language Models Make Fun? A Case Study in Chinese Comical Crosstalk", "track": "main", "status": "Withdraw", "tldr": "Testing whether AI could make fun!", "abstract": "Language is the principal tool for human communication, in which humor is one of the most attractive parts. Producing natural language like humans using computers, a.k.a, Natural Language Generation (NLG), has been widely used for dialogue systems, chatbots, machine translation, as well as computer-aid creation e.g., idea generations, scriptwriting. However, the humor aspect of natural language is relatively under-investigated, especially in the age of pre-trained language models. \nIn this work, we aim to preliminarily test whether \\textit{NLG can generate humor as humans do}. We build a new dataset consisting of numerous digitized \\textbf{C}hinese \\textbf{C}omical \\textbf{C}rosstalk scripts (called \\textbf{C}$^3$ in short), which is for a popular Chinese performing art called `Xiangsheng' or `\u76f8\u58f0' since 1800s \\footnote{For convenience for non-Chinese speakers, we called `crosstalk' for `Xiangsheng' in this paper.}. We benchmark various generation approaches including training-from-scratch Seq2seq, fine-tuned middle-scale PLMs, and large-scale PLMs (with and without fine-tuning). Moreover, we also conduct a human assessment, showing that 1) \\textit{large-scale pretraining largely improves crosstalk generation quality}; and 2) \\textit{ even the scripts generated from the best PLM is far from what we expect}. We conclude humor generation could be largely improved using large-scaled PLMs, but it is still in its infancy. The data and benchmarking code are publicly available in \\url{https://github.com/anonNo2/crosstalk-generation}.", "keywords": "humor generation;Chinese crosstalk;pre-trained language model;GPT;natural language generation", "primary_area": "", "supplementary_material": "", "author": "Benyou Wang;Wu Xiangbo;xiaokang liu;Jianquan Li;Prayag Tiwari;Yongqiang Gao;Qianqian Xie", "authorids": "~Benyou_Wang2;~Wu_Xiangbo1;~xiaokang_liu3;~Jianquan_Li1;~Prayag_Tiwari1;~Yongqiang_Gao1;~Qianqian_Xie1", "gender": "M;M;M;M;M;M;F", "homepage": "https://wabyking.github.io/old.html;https://github.com/anonNo2;;;https://prayagtiwari.github.io/;;", "dblp": "169/1793;;;;198/3643;99/9999;", "google_scholar": "Jk4vJU8AAAAJ;;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;;UYW7X_0AAAAJ", "orcid": "0000-0002-1501-9914;;;;0000-0002-2851-4260;;0000-0002-9588-7454", "linkedin": ";;;;;;", "or_profile": "~Benyou_Wang2;~Wu_Xiangbo1;~xiaokang_liu3;~Jianquan_Li1;~Prayag_Tiwari1;~Yongqiang_Gao1;~Qianqian_Xie1", "aff": "The Chinese University of Hong Kong, Shenzhen;SRIBD;;;Halmstad University;;Yale University", "aff_domain": "cuhk.edu.cn;sirbd.cn;;;hh.se;;yale.edu", "position": "Assistant Professor;Software Engineer;;;Assistant Professor;;Postdoc", "bibtex": "@misc{\nwang2023can,\ntitle={Can Language Models Make Fun? A Case Study in Chinese Comical Crosstalk},\nauthor={Benyou Wang and Wu Xiangbo and xiaokang liu and Jianquan Li and Prayag Tiwari and Yongqiang Gao and Qianqian Xie},\nyear={2023},\nurl={https://openreview.net/forum?id=9wx-QXt-JaN}\n}", "github": "", "project": "", "reviewers": "dRcS;YSME;jJPD;514E", "site": "https://openreview.net/forum?id=9wx-QXt-JaN", "pdf_size": 903281, "recommendation": "3;3;5;5", "confidence": "3;5;4;4", "correctness": "2;3;2;3", "technical_novelty": "3;1;2;3", "empirical_novelty": "3;1;3;3", "wc_summary_paper": "52;87;47;71", "wc_strength_and_weaknesses": "38;229;89;262", "wc_clarity_quality_novelty_and_reproducibility": "115;35;29;146", "wc_summary_review": "82;28;26;26", "wc_review": "287;379;191;505", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 64.25, 15.896147331979533 ], "wc_strength_and_weaknesses_avg": [ 154.5, 93.5 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 81.25, 50.4993811843274 ], "wc_summary_review_avg": [ 40.5, 23.973944189473706 ], "wc_review_avg": [ 340.5, 115.92562270697536 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6608778503684450644&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Chinese University of Hong Kong;SRIBD;Halmstad University;Yale University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cuhk.edu.cn;;https://www.hh.se;https://www.yale.edu", "aff_unique_abbr": "CUHK;;HH;Yale", "aff_campus_unique_index": "0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;2;3", "aff_country_unique": "China;;Sweden;United States" }, { "title": "Sample Complexity of Nonparametric Off-Policy Evaluation on Low-Dimensional Manifolds using Deep Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12183", "id": "9x3CO0ZU9LR", "poster": "/media/PosterPDFs/ICLR%202023/12183.png?t=1681432052.252472", "openreview": "https://openreview.net/forum?id=9x3CO0ZU9LR", "slides": "https://iclr.cc/virtual/2023/poster/12183", "video": "https://iclr.cc/virtual/2023/poster/12183", "author_site": "Xiang Ji, Minshuo Chen, Mengdi Wang, Tuo Zhao", "tldr": "", "abstract": "We consider the off-policy evaluation problem of reinforcement learning using deep convolutional neural networks. We analyze the deep fitted Q-evaluation method for estimating the expected cumulative reward of a target policy, when the data are generated from an unknown behavior policy. We show that, by choosing network size appropriately, one can leverage any low-dimensional manifold structure in the Markov decision process and obtain a sample-efficient estimator without suffering from the curse of high data ambient dimensionality. Specifically, we establish a sharp error bound for fitted Q-evaluation, which depends on the intrinsic dimension of the state-action space, the smoothness of Bellman operator, and a function class-restricted $\\chi^2$-divergence. It is noteworthy that the restricted $\\chi^2$-divergence measures the behavior and target policies' {\\it mismatch in the function space}, which can be small even if the two policies are not close to each other in their tabular forms. We also develop a novel approximation result for convolutional neural networks in Q-function estimation. Numerical experiments are provided to support our theoretical analysis.", "keywords": "RL theory;deep off-policy evaluation;neural network function approximation;manifold data", "primary_area": "", "supplementary_material": "/attachment/eb357c3fba08792a9426b598735809029885092b.zip", "author": "Xiang Ji;Minshuo Chen;Mengdi Wang;Tuo Zhao", "authorids": "~Xiang_Ji3;~Minshuo_Chen1;~Mengdi_Wang1;~Tuo_Zhao1", "gender": ";M;F;M", "homepage": ";https://minshuochen.github.io;http://mwang.princeton.edu;http://www2.isye.gatech.edu/~tzhao80", "dblp": ";217/1509;;", "google_scholar": "oCcK0LoAAAAJ;qU9WvTgAAAAJ;;EJXN6tYAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Xiang_Ji3;~Minshuo_Chen1;~Mengdi_Wang1;~Tuo_Zhao1", "aff": "Princeton University;Princeton University;Princeton University;Georgia Institute of Technology", "aff_domain": "princeton.edu;princeton.edu;princeton.edu;gatech.edu", "position": "PhD student;Postdoc;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nji2023sample,\ntitle={Sample Complexity of Nonparametric Off-Policy Evaluation on Low-Dimensional Manifolds using Deep Networks},\nauthor={Xiang Ji and Minshuo Chen and Mengdi Wang and Tuo Zhao},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9x3CO0ZU9LR}\n}", "github": "", "project": "", "reviewers": "Mweq;5uSb;sfri;cmJs", "pdf_size": 717601, "recommendation": "5;6;8;8", "confidence": "3;3;3;3", "correctness": "3;4;4;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;1;2", "wc_summary_paper": "94;133;67;167", "wc_strength_and_weaknesses": "205;330;169;149", "wc_clarity_quality_novelty_and_reproducibility": "17;51;135;16", "wc_summary_review": "15;39;462;55", "wc_review": "331;553;833;387", "wc_reply_reviewers": "0;0;127;128", "wc_reply_authors": "762;1590;1603;736", "reply_reviewers": "0;0;1;1", "reply_authors": "3;5;6;3", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 115.25, 37.989307706248084 ], "wc_strength_and_weaknesses_avg": [ 213.25, 70.32913692062486 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.75, 48.427135987997474 ], "wc_summary_review_avg": [ 142.75, 184.86802725187502 ], "wc_review_avg": [ 526.0, 195.143536915779 ], "wc_reply_reviewers_avg": [ 63.75, 63.7509803846184 ], "wc_reply_authors_avg": [ 1172.75, 423.8746129458569 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 4.25, 1.299038105676658 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.19245008972987526, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3286980572535351687&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=9x3CO0ZU9LR", "email": "princeton.edu;princeton.edu;princeton.edu;gatech.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Princeton University;Georgia Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;https://www.gatech.edu", "aff_unique_abbr": "Princeton;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "9xlU4lhri9", "title": "Rethinking the Structure of Stochastic Gradients: Empirical and Statistical Evidence", "track": "main", "status": "Reject", "tldr": "We rethink the heavy-tail phenomenon and the covariance structure of stochastic gradients via novel empirical and statistical evidences.", "abstract": "It is well known that stochastic gradients significantly improve both optimization and generalization of deep neural networks (DNNs). Some works attempted to explain the success of stochastic optimization for deep learning by the arguably heavy-tail properties of gradient noise, while other works presented theoretical and empirical evidence against the heavy-tail hypothesis on gradient noise. Unfortunately, formal statistical tests for analyzing the structure and heavy tails of stochastic gradients in deep learning are still under-explored. In this paper, we mainly make two contributions. First, we conduct formal statistical tests on the distribution of stochastic gradients and gradient noise across both parameters and iterations. Our statistical tests reveal that dimension-wise gradients usually exhibit power-law heavy tails, while iteration-wise gradients and stochastic gradient noise caused by minibatch training usually do not exhibit power-law heavy tails. Second, we further discover that the covariance spectra of stochastic gradients have the power-law structures in deep learning. While previous papers believed that the anisotropic structure of stochastic gradients matters to deep learning, they did not expect the gradient covariance can have such an elegant mathematical structure. Our work challenges the existing belief and provides novel insights on the structure of stochastic gradients. The novel structure of stochastic gradients may help understand the success of stochastic optimization for deep learning.", "keywords": "Gradient Noise;SGD;Deep Learning", "primary_area": "", "supplementary_material": "", "author": "Zeke Xie;Qian-Yuan Tang;Zheng He;Mingming Sun;Ping Li", "authorids": "~Zeke_Xie1;~Qian-Yuan_Tang1;~Zheng_He1;~Mingming_Sun1;~Ping_Li3", "gender": "M;;F;M;M", "homepage": "https://sites.google.com/view/zeke-xie;;https://github.com/he-zh;;http://www.stat.rutgers.edu/home/pingli/", "dblp": "210/1039;;;87/8665-1.html;62/5860-1", "google_scholar": "https://scholar.google.co.jp/citations?user=ysXmZCMAAAAJ;;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Zeke_Xie1;~Qian-Yuan_Tang1;~Zheng_He1;~Mingming_Sun1;~Ping_Li3", "aff": "Baidu;;Beihang University;Baidu;LinkedIn", "aff_domain": "baidu.com;;buaa.edu.cn;baidu.com;linkedin.com", "position": "Researcher;;MS student;Principal Researcher;Engineer", "bibtex": "@misc{\nxie2023rethinking,\ntitle={Rethinking the Structure of Stochastic Gradients: Empirical and Statistical Evidence},\nauthor={Zeke Xie and Qian-Yuan Tang and Zheng He and Mingming Sun and Ping Li},\nyear={2023},\nurl={https://openreview.net/forum?id=9xlU4lhri9}\n}", "github": "", "project": "", "reviewers": "cbEm;ZG6J;2nic", "site": "https://openreview.net/forum?id=9xlU4lhri9", "pdf_size": 1370968, "recommendation": "5;5;5", "confidence": "4;3;3", "correctness": "3;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "3;2;2", "wc_summary_paper": "90;47;122", "wc_strength_and_weaknesses": "129;180;170", "wc_clarity_quality_novelty_and_reproducibility": "705;24;61", "wc_summary_review": "58;7;90", "wc_review": "982;258;443", "wc_reply_reviewers": "213;116;49", "wc_reply_authors": "1273;517;439", "reply_reviewers": "1;1;2", "reply_authors": "2;2;2", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 86.33333333333333, 30.728199137310703 ], "wc_strength_and_weaknesses_avg": [ 159.66666666666666, 22.065558884580486 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 263.3333333333333, 312.67057566852833 ], "wc_summary_review_avg": [ 51.666666666666664, 34.179265969622904 ], "wc_review_avg": [ 561.0, 307.1232108888331 ], "wc_reply_reviewers_avg": [ 126.0, 67.32508200267317 ], "wc_reply_authors_avg": [ 743.0, 376.1170030721823 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4010115354501993090&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Baidu;Beihang University;LinkedIn Corporation", "aff_unique_dep": "Baidu, Inc.;;", "aff_unique_url": "https://www.baidu.com;http://www.buaa.edu.cn/;https://www.linkedin.com", "aff_unique_abbr": "Baidu;BUAA;LinkedIn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Hidden Markov Transformer for Simultaneous Machine Translation", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11939", "id": "9y0HFvaAYD6", "poster": "", "openreview": "https://openreview.net/forum?id=9y0HFvaAYD6", "slides": "https://iclr.cc/virtual/2023/poster/11939", "video": "https://iclr.cc/virtual/2023/poster/11939", "author_site": "Shaolei Zhang, Yang Feng", "tldr": "", "abstract": "Simultaneous machine translation (SiMT) outputs the target sequence while receiving the source sequence, and hence learning when to start translating each target token is the core challenge for SiMT task. However, it is non-trivial to learn the optimal moment among many possible moments of starting translating, as the moments of starting translating always hide inside the model and can only be supervised with the observed target sequence. In this paper, we propose a Hidden Markov Transformer (HMT), which treats the moments of starting translating as hidden events and the target sequence as the corresponding observed events, thereby organizing them as a hidden Markov model. HMT explicitly models multiple moments of starting translating as the candidate hidden events, and then selects one to generate the target token. During training, by maximizing the marginal likelihood of the target sequence over multiple moments of starting translating, HMT learns to start translating at the moments that target tokens can be generated more accurately. Experiments on multiple SiMT benchmarks show that HMT outperforms strong baselines and achieves state-of-the-art performance.", "keywords": "Simultaneous machine translation;Machine translation;Natural language processing;Transformer", "primary_area": "", "supplementary_material": "", "author": "Shaolei Zhang;Yang Feng", "authorids": "~Shaolei_Zhang1;~Yang_Feng4", "gender": "M;", "homepage": "https://zhangshaolei1998.github.io/;http://people.ucas.edu.cn/~yangfeng?language=en", "dblp": ";07/6095-4.html", "google_scholar": "https://scholar.google.com.hk/citations?user=gWwAWo4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-7254-9380;", "linkedin": ";", "or_profile": "~Shaolei_Zhang1;~Yang_Feng4", "aff": "Key Laboratory of Intelligent Information Processing Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2023hidden,\ntitle={Hidden Markov Transformer for Simultaneous Machine Translation},\nauthor={Shaolei Zhang and Yang Feng},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9y0HFvaAYD6}\n}", "github": "", "project": "", "reviewers": "NSCn;WoTh;4GZM;C1gX", "pdf_size": 9463502, "recommendation": "6;8;8;8", "confidence": "5;4;4;4", "correctness": "4;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "4;2;3;3", "wc_summary_paper": "96;164;44;113", "wc_strength_and_weaknesses": "160;50;118;180", "wc_clarity_quality_novelty_and_reproducibility": "26;26;65;27", "wc_summary_review": "32;22;30;30", "wc_review": "314;262;257;350", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "816;203;1113;608", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;2;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 104.25, 42.85075845303091 ], "wc_strength_and_weaknesses_avg": [ 127.0, 49.76946855251722 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.0, 16.748134224444225 ], "wc_summary_review_avg": [ 28.5, 3.840572873934304 ], "wc_review_avg": [ 295.75, 38.46020670771284 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 685.0, 331.1336588146847 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": -1.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14752250516239004729&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=9y0HFvaAYD6", "email": "ict.ac.cn;ict.ac.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology", "aff_unique_url": "http://www.ict.ac.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Spotlight: Mobile UI Understanding using Vision-Language Models with a Focus", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10862", "id": "9yE2xEj0BH7", "poster": "", "openreview": "https://openreview.net/forum?id=9yE2xEj0BH7", "slides": "https://iclr.cc/virtual/2023/poster/10862", "video": "https://iclr.cc/virtual/2023/poster/10862", "author_site": "Gang Li, Yang Li", "tldr": "We propose an enhanced vision-language model for UI tasks that achieves SoTA on representative UI tasks and supports few-shot and multitask learning.", "abstract": "Mobile UI understanding is important for enabling various interaction tasks such as UI automation and accessibility. Previous mobile UI modeling often depends on the view hierarchy information of a screen, which directly provides the structural data of the UI, with the hope to bypass challenging tasks of visual modeling from screen pixels. However, view hierarchies are not always available, and are often corrupted with missing object descriptions or misaligned structure information. As a result, despite the use of view hierarchies could offer short-term gains, it may ultimately hinder the applicability and performance of the model. In this paper, we propose Spotlight, a vision-only approach for mobile UI understanding. Specifically, we enhance a vision-language model that only takes the screenshot of the UI and a region of interest on the screen---the focus---as the input. This general architecture of Spotlight is easily scalable and capable of performing a range of UI modeling tasks. Our experiments show that our model establishes SoTA results on several representative UI tasks and outperforms previous methods that use both screenshots and view hierarchies as inputs. Furthermore, we explore multi-task learning and few-shot prompting capacities of the proposed models, demonstrating promising results in the multi-task learning direction.", "keywords": "vision-language;UI;few-shot;finetuning;multi-task", "primary_area": "", "supplementary_material": "", "author": "Gang Li;Yang Li", "authorids": "~Gang_Li13;~Yang_Li2", "gender": ";M", "homepage": ";http://yangl.org", "dblp": "62/2655-21;37/4190-58", "google_scholar": "gmBt9v8AAAAJ;ZZdB48QAAAAJ", "orcid": "0000-0002-9490-2990;", "linkedin": ";yang-li-127a2a41/", "or_profile": "~Gang_Li13;~Yang_Li2", "aff": "Google;Google", "aff_domain": "google.com;google.com", "position": "Software Engineer;Research Scientist", "bibtex": "@inproceedings{\nli2023spotlight,\ntitle={Spotlight: Mobile {UI} Understanding using Vision-Language Models with a Focus},\nauthor={Gang Li and Yang Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=9yE2xEj0BH7}\n}", "github": "", "project": "", "reviewers": "WFS1;V2L4;cKCz", "pdf_size": 8824770, "recommendation": "5;5;6", "confidence": "3;4;4", "correctness": "4;3;4", "technical_novelty": "3;2;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "144;51;61", "wc_strength_and_weaknesses": "302;262;384", "wc_clarity_quality_novelty_and_reproducibility": "109;5;12", "wc_summary_review": "78;83;53", "wc_review": "633;401;510", "wc_reply_reviewers": "0;45;0", "wc_reply_authors": "639;878;864", "reply_reviewers": "0;1;0", "reply_authors": "2;3;2", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 85.33333333333333, 41.6839963961657 ], "wc_strength_and_weaknesses_avg": [ 316.0, 50.78057371344544 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.0, 47.462265713582056 ], "wc_summary_review_avg": [ 71.33333333333333, 13.12334645668635 ], "wc_review_avg": [ 514.6666666666666, 94.7710691907375 ], "wc_reply_reviewers_avg": [ 15.0, 21.213203435596427 ], "wc_reply_authors_avg": [ 793.6666666666666, 109.51509282083248 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 64, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17864670093735276430&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=9yE2xEj0BH7", "email": "google.com;google.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "A09CypdRq8D", "title": "Partial Advantage Estimator for Proximal Policy Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Estimation of value in policy gradient methods is a fundamental problem. Generalized Advantage Estimation (GAE) is an exponentially-weighted estimator of an advantage function similar to TD($\\lambda$). It substantially reduces the variance of policy gradient estimates at the expense of bias. In practical applications, a truncated GAE is used due to the incompleteness of the trajectory, which results in a large bias during estimation. To address this challenge, instead of using the all truncated GAE, we propose to take a part of the calculated GAE for updates, which significantly reduces the bias due to the incomplete trajectory. We perform experiments in MuJoCo and $\\mu$RTS to investigate the effect of different partial coefficient and sampling lengths. We show that our partial GAE approach yields better empirical results in both environments.", "keywords": "Reinforcement learning;value estimator", "primary_area": "", "supplementary_material": "/attachment/d0de4dd6bea61ea8ef6fe8d807aeb8fbd40e8d5d.zip", "author": "Xiulei Song;Yizhao Jin;Gregory Slabaugh;Simon Lucas", "authorids": "~Xiulei_Song2;~Yizhao_Jin1;~Gregory_Slabaugh2;~Simon_Lucas1", "gender": "M;M;M;M", "homepage": "https://github.com/decatt;https://www.eecs.qmul.ac.uk/~gslabaugh/publications.html;http://eecs.qmul.ac.uk/people/profiles/lucassimon.html;https://www.jumpw.com/", "dblp": ";s/GregoryGSlabaugh.html;50/4174;", "google_scholar": ";oUK2gu8AAAAJ;https://scholar.google.co.uk/citations?user=Jz8DDVAAAAAJ;", "orcid": ";0000-0003-4060-5226;0000-0002-3180-7451;", "linkedin": ";greg-slabaugh-a5b03a1/;simon-lucas-game-AI/;", "or_profile": "~Yizhao_Jin1;~Gregory_Slabaugh2;~Simon_Mark_Lucas1;~xiulei_song1", "aff": "Queen Mary, University of London;Queen Mary University London;Queen Mary University of London;", "aff_domain": "qmul.ac.uk;qmul.ac.uk;qmul.ac.uk;", "position": "PhD student;Professor;Full Professor;", "bibtex": "@misc{\nsong2023partial,\ntitle={Partial Advantage Estimator for Proximal Policy Optimization},\nauthor={Xiulei Song and Yizhao Jin and Gregory Slabaugh and Simon Lucas},\nyear={2023},\nurl={https://openreview.net/forum?id=A09CypdRq8D}\n}", "github": "", "project": "", "reviewers": "zF4x;VSUL;QPtY", "site": "https://openreview.net/forum?id=A09CypdRq8D", "pdf_size": 842140, "recommendation": "3;3;5", "confidence": "4;4;3", "correctness": "2;2;4", "technical_novelty": "3;2;2", "empirical_novelty": "2;1;2", "wc_summary_paper": "346;29;63", "wc_strength_and_weaknesses": "147;177;36", "wc_clarity_quality_novelty_and_reproducibility": "546;30;4", "wc_summary_review": "102;36;4", "wc_review": "1141;272;107", "wc_reply_reviewers": "212;0;0", "wc_reply_authors": "77;0;0", "reply_reviewers": "1;0;0", "reply_authors": "1;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 146.0, 142.10090311699875 ], "wc_strength_and_weaknesses_avg": [ 120.0, 60.64651680022522 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 193.33333333333334, 249.59878917085226 ], "wc_summary_review_avg": [ 47.333333333333336, 40.80304999493161 ], "wc_review_avg": [ 506.6666666666667, 453.5712610923325 ], "wc_reply_reviewers_avg": [ 70.66666666666667, 99.93775840769871 ], "wc_reply_authors_avg": [ 25.666666666666668, 36.29814810090944 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 0.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15246960489382989696&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1", "aff_unique_norm": "Queen Mary, University of London;Queen Mary University of London", "aff_unique_dep": ";", "aff_unique_url": "https://www.qmul.ac.uk;https://www.qmul.ac.uk", "aff_unique_abbr": "QMUL;QMUL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "London", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "A2EeU2Jn3iX", "title": "Game-Theoretic Understanding of Misclassification", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper analyzes various types of image misclassification from a game-theoretic view. Particularly, we consider the misclassification of clean, adversarial, and corrupted images and characterize it through the distribution of multi-order interactions. We discover that the distribution of multi-order interactions varies across the types of misclassification. For example, misclassified adversarial images have a higher strength of high-order interactions than correctly classified clean images, which indicates that adversarial perturbations create spurious features that arise from complex cooperation between pixels. By contrast, misclassified corrupted images have a lower strength of low-order interactions than correctly classified clean images, which indicates that corruptions break the local cooperation between pixels. We also provide the first analysis of Vision Transformers using interactions. We found that Vision Transformers show a different tendency in the distribution of interactions from that in CNNs, and this implies that they exploit the features that CNNs do not use for the prediction. Our study demonstrates that the recent game-theoretic analysis of deep learning models can be broadened to analyze various malfunctions of deep learning models including Vision Transformers by using the distribution, order, and sign of interactions. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Kosuke Sumiyasu;Kazuhiko Kawamoto;Kera Hiroshi", "authorids": "~Kosuke_Sumiyasu1;kawa@faculty.chiba-u.jp;kera@chiba-u.jp", "gender": "M;;", "homepage": "https://www.kawa-lab.org/;;", "dblp": "330/9740;;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Kosuke_Sumiyasu1;kawa@faculty.chiba-u.jp;kera@chiba-u.jp", "aff": "Chiba University;;", "aff_domain": "chiba-u.ac.jp;;", "position": "MS student;;", "bibtex": "@misc{\nsumiyasu2023gametheoretic,\ntitle={Game-Theoretic Understanding of Misclassification},\nauthor={Kosuke Sumiyasu and Kazuhiko Kawamoto and Kera Hiroshi},\nyear={2023},\nurl={https://openreview.net/forum?id=A2EeU2Jn3iX}\n}", "github": "", "project": "", "reviewers": "C5ku;QLY7;zrJN;oeXE;Mq4K", "site": "https://openreview.net/forum?id=A2EeU2Jn3iX", "pdf_size": 2571077, "recommendation": "1;3;5;6;6", "confidence": "5;4;3;1;3", "correctness": "3;3;4;3;3", "technical_novelty": "1;2;2;3;2", "empirical_novelty": "1;3;2;3;3", "wc_summary_paper": "77;81;171;52;80", "wc_strength_and_weaknesses": "511;464;261;85;96", "wc_clarity_quality_novelty_and_reproducibility": "8;23;10;12;13", "wc_summary_review": "196;47;47;23;28", "wc_review": "792;615;489;172;217", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "1760;988;690;313;358", "reply_reviewers": "0;0;0;0;0", "reply_authors": "3;2;1;1;1", "recommendation_avg": [ 4.2, 1.9390719429665317 ], "confidence_avg": [ 3.2, 1.32664991614216 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 2.0, 0.6324555320336759 ], "empirical_novelty_avg": [ 2.4, 0.8 ], "wc_summary_paper_avg": [ 92.2, 40.81862320069113 ], "wc_strength_and_weaknesses_avg": [ 283.4, 178.55038504579036 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 13.2, 5.192301994298868 ], "wc_summary_review_avg": [ 68.2, 64.63868810549917 ], "wc_review_avg": [ 457.0, 235.38819001810606 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 821.8, 528.9992060485536 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.870758578999572, "corr_recommendation_correctness": 0.20628424925175864, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XL0S_wJMhQ8J:scholar.google.com/&scioq=Game-Theoretic+Understanding+of+Misclassification&hl=en&as_sdt=0,44", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Chiba University", "aff_unique_dep": "", "aff_unique_url": "https://www.chiba-u.ac.jp", "aff_unique_abbr": "Chiba U", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "title": "Contextual Image Masking Modeling via Synergized Contrasting without View Augmentation for Faster and Better Visual Pretraining", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12239", "id": "A3sgyt4HWp", "poster": "/media/PosterPDFs/ICLR%202023/12239.png?t=1680958628.7166064", "openreview": "https://openreview.net/forum?id=A3sgyt4HWp", "slides": "https://iclr.cc/virtual/2023/poster/12239", "video": "https://iclr.cc/virtual/2023/poster/12239", "author_site": "Shaofeng Zhang, Feng Zhu, Rui Zhao, Junchi Yan", "tldr": "We propose a novel framework for synergizing MIM and contrastive learning in a close-loop.", "abstract": "We propose a new contextual masking image modeling (MIM) approach called contrasting-aided contextual MIM (ccMIM), under the MIM paradigm for visual pretraining. Specifically, we adopt importance sampling to select the masked patches with richer semantic information for reconstruction, instead of random sampling as done in previous MIM works. As such, the resulting patch reconstruction task from the remaining less semantic patches could be more difficult and helps to learn. To speed up the possibly slowed convergence due to our more difficult reconstruction task, we further propose a new contrastive loss that aligns the tokens of the vision transformer extracted from the selected masked patches and the remaining ones, respectively. The hope is that it serves as a regularizer for patch feature learning such that the image-level global information could be captured in both masked and unmasked patches, and notably such a single-view contrasting avoids the tedious image augmentation step required in recent efforts of introducing contrastive learning to MIM (to speedup convergence and discriminative ability). Meanwhile, the attention score from the contrastive global feature can also carry effective semantic clues to in turn guide our above masking patch selection scheme. In consequence, our contextual MIM and contrastive learning are synergetically performed in a loop (semantic patch selection-token alignment contrasting) to boost the best of the two worlds: fast convergence and strong performance on downstream tasks without ad-hoc augmentations, which are verified by empirical results on ImageNet-1K for both classification and dense vision tasks. ", "keywords": "Mask Image Modeling;Self-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Shaofeng Zhang;Feng Zhu;Rui Zhao;Junchi Yan", "authorids": "~Shaofeng_Zhang1;~Feng_Zhu1;~Rui_Zhao6;~Junchi_Yan2", "gender": "M;M;M;M", "homepage": "https://sherrylone.github.io;http://home.ustc.edu.cn/~zhufengx/;http://zhaorui.xyz/;http://thinklab.sjtu.edu.cn/", "dblp": "132/2540;71/2791-6;26/2578-1;60/7949.html", "google_scholar": "VoVVJIgAAAAJ;oO53gjEAAAAJ;1c9oQNMAAAAJ;ga230VoAAAAJ", "orcid": ";;;0000-0001-9639-7679", "linkedin": ";;;", "or_profile": "~Shaofeng_Zhang1;~Feng_Zhu1;~Rui_Zhao6;~Junchi_Yan1", "aff": "Shanghai Jiaotong University;SenseTime Group LTD;SenseTime Research;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sensetime.com;sensetime.com;sjtu.edu.cn", "position": "PhD student;Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nzhang2023contextual,\ntitle={Contextual Image Masking Modeling via Synergized Contrasting without View Augmentation for Faster and Better Visual Pretraining},\nauthor={Shaofeng Zhang and Feng Zhu and Rui Zhao and Junchi Yan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=A3sgyt4HWp}\n}", "github": "", "project": "", "reviewers": "tKmv;ZDzp;pWuU", "pdf_size": 619582, "recommendation": "6;6;6", "confidence": "3;4;3", "correctness": "3;3;4", "technical_novelty": "3;2;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "75;69;199", "wc_strength_and_weaknesses": "101;204;147", "wc_clarity_quality_novelty_and_reproducibility": "7;87;58", "wc_summary_review": "16;35;38", "wc_review": "199;395;442", "wc_reply_reviewers": "5;100;0", "wc_reply_authors": "251;653;318", "reply_reviewers": "1;1;0", "reply_authors": "1;2;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 114.33333333333333, 59.91846311632352 ], "wc_strength_and_weaknesses_avg": [ 150.66666666666666, 42.129430198958175 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.666666666666664, 33.06895153396242 ], "wc_summary_review_avg": [ 29.666666666666668, 9.741092797468305 ], "wc_review_avg": [ 345.3333333333333, 105.23729799310172 ], "wc_reply_reviewers_avg": [ 35.0, 46.007245806140865 ], "wc_reply_authors_avg": [ 407.3333333333333, 175.85284251959712 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1005210190063453926&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=A3sgyt4HWp", "email": "sjtu.edu.cn;sensetime.com;sensetime.com;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Shanghai Jiao Tong University;SenseTime Group;SenseTime", "aff_unique_dep": ";;SenseTime Research", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.sensetime.com;https://www.sensetime.com", "aff_unique_abbr": "SJTU;SenseTime;SenseTime", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "A4fSkNAs6E1", "title": "Hierarchical Gaussian Mixture based Task Generative Model for Robust Meta-Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Meta-learning enables quick adaptation of machine learning models to new tasks with limited data. While tasks could come from varying distributions in reality, most of the existing meta-learning methods consider both training and testing tasks as from the same uni-component distribution, overlooking two critical needs of a practical solution: (1) the various sources of tasks may compose a multi-component mixture distribution, and (2) novel tasks may come from a distribution that is unseen during meta-training. In this paper, we demonstrate these two challenges can be solved jointly by modeling the density of task instances. We develop a meta-training framework underlain by a novel Hierarchical Gaussian Mixture based Task Generative Model (HTGM). HTGM extends the widely used empirical process of sampling tasks to a theoretical model, which learns task embeddings, fits mixture distribution of tasks, and enables density-based scoring of novel tasks. The framework is agnostic to the encoder and scales well with large backbone networks. The model parameters are learned end-to-end by maximum likelihood estimation via an Expectation-Maximization algorithm. Extensive experiments on benchmark datasets indicate the effectiveness of our method for both sample classification and novel task detection.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yizhou Zhang;Jingchao Ni;Wei Cheng;Zhengzhang Chen;Liang Tong;Haifeng Chen", "authorids": "~Yizhou_Zhang3;~Jingchao_Ni1;~Wei_Cheng1;~Zhengzhang_Chen1;~Liang_Tong1;~Haifeng_Chen1", "gender": ";M;M;M;M;", "homepage": "https://yizhouzhang1997.netlify.app/;;https://chengw07.github.io/;https://zhengzhangchen.github.io/;;https://haifengchen.gitlab.io/intro/", "dblp": ";151/3208;89/2506-2.html;14/3744;71/6379;08/57-1.html", "google_scholar": "k127fcwAAAAJ;rH9MTZMAAAAJ;PRrGVmoAAAAJ;2t7wQ24AAAAJ;;QzakB68AAAAJ", "orcid": ";;;0000-0002-6803-0535;;", "linkedin": ";jingchao-ni-930a3871/;wei-cheng-ml/;;;", "or_profile": "~Yizhou_Zhang3;~Jingchao_Ni1;~Wei_Cheng1;~Zhengzhang_Chen1;~Liang_Tong1;~Haifeng_Chen1", "aff": "University of Southern California;Amazon;NEC-Labs;NEC Labs America;Stellar Cyber Inc.;NEC-Labs", "aff_domain": "usc.edu;amazon.com;nec-labs.com;nec-labs.com;stellarcyber.ai;nec-labs.com", "position": "PhD student;Applied Scientist;Principal Researcher;Senior Research Scientist;Researcher;Researcher", "bibtex": "@misc{\nzhang2023hierarchical,\ntitle={Hierarchical Gaussian Mixture based Task Generative Model for Robust Meta-Learning},\nauthor={Yizhou Zhang and Jingchao Ni and Wei Cheng and Zhengzhang Chen and Liang Tong and Haifeng Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=A4fSkNAs6E1}\n}", "github": "", "project": "", "reviewers": "DtsZ;gaxY;A867", "site": "https://openreview.net/forum?id=A4fSkNAs6E1", "pdf_size": 832327, "recommendation": "5;6;6", "confidence": "4;3;4", "correctness": "3;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "56;150;63", "wc_strength_and_weaknesses": "152;583;149", "wc_clarity_quality_novelty_and_reproducibility": "19;37;14", "wc_summary_review": "30;29;44", "wc_review": "257;799;270", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1376;1745;1361", "reply_reviewers": "0;0;0", "reply_authors": "5;4;3", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 89.66666666666667, 42.7577153531643 ], "wc_strength_and_weaknesses_avg": [ 294.6666666666667, 203.8861337991271 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 23.333333333333332, 9.877021593352701 ], "wc_summary_review_avg": [ 34.333333333333336, 6.847546194724712 ], "wc_review_avg": [ 442.0, 252.49290419072506 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1494.0, 177.5894140989265 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 4.0, 0.816496580927726 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3654565701645929376&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;2;3;4;2", "aff_unique_norm": "University of Southern California;Amazon;NEC Laboratories;NEC Labs America;Stellar Cyber Inc.", "aff_unique_dep": ";Amazon.com, Inc.;;;", "aff_unique_url": "https://www.usc.edu;https://www.amazon.com;https://www.nec-labs.com;https://www.nec-labs.com;https://www.stellarcyber.com", "aff_unique_abbr": "USC;Amazon;NEC-Labs;NEC LA;Stellar Cyber", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "A6MliD2e5Xp", "title": "Window Projection Features are All You Need for Time Series Anomaly Detection", "track": "main", "status": "Withdraw", "tldr": "A simple hand-crafted representation combined with a Gaussian estimator obtains SOTA results in time series anomaly detection.", "abstract": "The challenge of time series anomaly detection has motivated the development of increasingly more complex deep representations and anomaly metrics. In this paper we demonstrate that a simple approach based on window projection features can achieve better results. Projection features are a common way to discretize multivariate data; they first multiply the data by a projection matrix followed by discretization of each output dimension. We first show that short temporal windows, encoded by projection features, are often already sufficiently expressive for linearly separating between normal and anomalous time series. However, we find that while the expressivity of projection features is sufficient, current one-class classification methods are unable to use them effectively to detect anomalies. We hypothesize this is due to the difficulty of density estimation. The difficulty can be overcome by estimating the probability density of the sample mean, which follows the Gaussian distribution when the conditions of the Central Limit Theorem are met. Simply put, we fit a multivariate Gaussian model to the average of the projection features of adjacent windows within a time series. Despite its simplicity, our method outperforms the state-of-the-art in diverse settings including: five UEA datasets, video trajectory anomaly detection and standard anomaly segmentation benchmarks. Code is provided.", "keywords": "time series;anomaly detection", "primary_area": "", "supplementary_material": "/attachment/8c56affa635c648571943a81e09035d1f2ebf488.zip", "author": "Issar Tzachor;Yedid Hoshen", "authorids": "~Issar_Tzachor1;~Yedid_Hoshen3", "gender": ";M", "homepage": ";https://www.cs.huji.ac.il/~ydidh/", "dblp": ";136/0280", "google_scholar": ";https://scholar.google.co.il/citations?user=6y1-qS4AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Issar_Tzachor1;~Yedid_Hoshen3", "aff": ";Hebrew University of Jerusalem", "aff_domain": ";huji.ac.il", "position": ";Assistant Professor", "bibtex": "@misc{\ntzachor2023window,\ntitle={Window Projection Features are All You Need for Time Series Anomaly Detection},\nauthor={Issar Tzachor and Yedid Hoshen},\nyear={2023},\nurl={https://openreview.net/forum?id=A6MliD2e5Xp}\n}", "github": "", "project": "", "reviewers": "omkS;Bv2M;tDcR;Z8nD", "site": "https://openreview.net/forum?id=A6MliD2e5Xp", "pdf_size": 319483, "recommendation": "3;3;5;6", "confidence": "3;5;2;1", "correctness": "3;3;3;3", "technical_novelty": "3;2;2;3", "empirical_novelty": "0;2;2;3", "wc_summary_paper": "73;52;33;80", "wc_strength_and_weaknesses": "1232;320;66;101", "wc_clarity_quality_novelty_and_reproducibility": "569;28;36;20", "wc_summary_review": "237;43;103;2", "wc_review": "2111;443;238;203", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 2.75, 1.479019945774904 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 59.5, 18.445866745696716 ], "wc_strength_and_weaknesses_avg": [ 429.75, 473.2971450368151 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 163.25, 234.32816198656107 ], "wc_summary_review_avg": [ 96.25, 88.84642648975816 ], "wc_review_avg": [ 748.75, 791.8201737137038 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.8783100656536799, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10946104840477751495&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Hebrew University of Jerusalem", "aff_unique_dep": "", "aff_unique_url": "https://www.huji.ac.il", "aff_unique_abbr": "HUJI", "aff_campus_unique_index": "0", "aff_campus_unique": "Jerusalem", "aff_country_unique_index": "0", "aff_country_unique": "Israel" }, { "title": "Bridge the Inference Gaps of Neural Processes via Expectation Maximization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11763", "id": "A7v2DqLjZdq", "poster": "", "openreview": "https://openreview.net/forum?id=A7v2DqLjZdq", "slides": "https://iclr.cc/virtual/2023/poster/11763", "video": "https://iclr.cc/virtual/2023/poster/11763", "author_site": "Qi Wang, Marco Federici, Herke van Hoof", "tldr": "", "abstract": "The neural process (NP) is a family of computationally efficient models for learning distributions over functions. However, it suffers from under-fitting and shows suboptimal performance in practice. Researchers have primarily focused on incorporating diverse structural inductive biases, e.g. attention or convolution, in modeling. The topic of inference suboptimality and an analysis of the NP from the optimization objective perspective has hardly been studied in earlier work. To fix this issue, we propose a surrogate objective of the target log-likelihood of the meta dataset within the expectation maximization framework. The resulting model, referred to as the Self-normalized Importance weighted Neural Process (SI-NP), can learn a more accurate functional prior and has an improvement guarantee concerning the target log-likelihood. Experimental results show the competitive performance of SI-NP over other NPs objectives and illustrate that structural inductive biases, such as attention modules, can also augment our method to achieve SOTA performance.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Qi Wang;Marco Federici;Herke van Hoof", "authorids": "~Qi_Wang11;~Marco_Federici1;~Herke_van_Hoof4", "gender": "M;M;M", "homepage": "https://sites.google.com/view/albert-q-wang-at-ai-community/home;;https://staff.fnwi.uva.nl/h.c.vanhoof/", "dblp": "375/3186;166/5511.html;123/6759", "google_scholar": "Mvbvv3IAAAAJ;TfInmkIAAAAJ;https://scholar.google.ca/citations?user=9owUkLYAAAAJ", "orcid": "0000-0001-6135-6965;;", "linkedin": "qi-cheems-wang-518a421a1/;marco-federici-2b027b149/;", "or_profile": "~Qi_Wang11;~Marco_Federici1;~Herke_van_Hoof4", "aff": "Tsinghua University;University of Amsterdam;University of Amsterdam", "aff_domain": "cs.tsinghua.edu.cn;uva.nl;uva.nl", "position": "Postdoc;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2023bridge,\ntitle={Bridge the Inference Gaps of Neural Processes via Expectation Maximization},\nauthor={Qi Wang and Marco Federici and Herke van Hoof},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=A7v2DqLjZdq}\n}", "github": "", "project": "", "reviewers": "BUNX;ki6V;HNS9;sw5r", "pdf_size": 1714551, "recommendation": "3;6;6;8", "confidence": "4;1;4;3", "correctness": "2;3;4;3", "technical_novelty": "3;3;3;4", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "80;88;119;61", "wc_strength_and_weaknesses": "53;32;256;253", "wc_clarity_quality_novelty_and_reproducibility": "1025;75;113;57", "wc_summary_review": "45;49;111;8", "wc_review": "1203;244;599;379", "wc_reply_reviewers": "725;0;0;0", "wc_reply_authors": "1401;278;349;425", "reply_reviewers": "2;0;0;0", "reply_authors": "3;1;1;1", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 87.0, 20.91650066335189 ], "wc_strength_and_weaknesses_avg": [ 148.5, 106.26499894132593 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 317.5, 408.9752437495453 ], "wc_summary_review_avg": [ 53.25, 36.9754986443726 ], "wc_review_avg": [ 606.25, 367.09356777257756 ], "wc_reply_reviewers_avg": [ 181.25, 313.934208871859 ], "wc_reply_authors_avg": [ 613.25, 457.768705243161 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.34299717028501764, "corr_recommendation_correctness": 0.5940885257860046, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2073654597654679092&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=A7v2DqLjZdq", "email": "cs.tsinghua.edu.cn;uva.nl;uva.nl", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Tsinghua University;University of Amsterdam", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.uva.nl", "aff_unique_abbr": "THU;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;Netherlands" }, { "id": "A85gMNB01t1", "title": "HRDFuse: Monocular 360$^\\circ$ Depth Estimation by Collaboratively Learning Holistic-with-Regional Depth Distributions", "track": "main", "status": "Withdraw", "tldr": "This paper proposed a novel solution for monocular 360$^\\circ$ depth estimation, which predicts an ERP format depth map by collaboratively learning the holistic-with-regional information from the ERP image and its TP patches.", "abstract": "Depth estimation from a monocular 360$^\\circ$ image is a burgeoning problem as a 360$^\\circ$ image provides holistic sensing of a scene with a wide field of view. Recently, some methods, \\eg, OmniFusion, have applied the tangent projection (TP) to represent a 360$^\\circ$ image and predicted depth values via patch-wise regressions, which are merged to get a depth map with equirectangular projection (ERP) format. However, these methods suffer from 1) non-trivial process of merging a large number of patches; 2) less smooth and accurate depth results caused by ignoring the holistic contextual information contained only in the ERP image and directly regressing the depth value of each pixel. In this paper, we propose a novel framework, HRDFuse, that subtly combines the potential of convolutional neural networks (CNNs) and transformers by collaboratively learning the holistic contextual information from the ERP and the regional structural information from the TP. Firstly, we propose a spatial feature alignment (SFA) module that learns feature similarities between the TP and ERP to aggregate the TP features into a complete ERP feature map in a pixel-wise manner. Secondly, we propose a collaborative depth distribution classification CDDC module that learns the holistic-with-regional histograms capturing the ERP and TP depth distributions. As such, the final depth values can be predicted as a linear combination of histogram bin centers. Lastly, we adaptively combine the depth predictions from two projections to obtain the final depth map. Extensive experiments on three benchmark datasets show that our method achieves more smooth and accurate depth results while favorably surpassing the SOTA methods by a significant margin.", "keywords": "3D Computer Vision;Scene Analysis and Understanding;Depth distribution classification;Feature representation learning", "primary_area": "", "supplementary_material": "/attachment/541d481fd368bb4edf113a6c69a8b3d31e1552b6.zip", "author": "Hao Ai;Cao Zidong;Yan-Pei Cao;Ying Shan;Lin Wang", "authorids": "~Hao_Ai2;~Cao_Zidong1;~Yan-Pei_Cao1;~Ying_Shan2;~Lin_Wang2", "gender": "M;M;M;M;M", "homepage": ";;https://yanpei.me/;;https://dr.ntu.edu.sg/cris/rp/rp02550", "dblp": ";;141/6343;68/5910;", "google_scholar": "QNlF0DsAAAAJ;https://scholar.google.com.hk/citations?user=q1FcZzIAAAAJ;50194vkAAAAJ;4oXBp9UAAAAJ;SReb2csAAAAJ", "orcid": ";;;0000-0001-7673-8325;0000-0002-7485-4493", "linkedin": ";;;YingShanProfile/;", "or_profile": "~Hao_Ai2;~Cao_Zidong1;~Yan-Pei_Cao1;~Ying_Shan2;~Lin_Wang2", "aff": "Hong Kong University of Science and Technology;;Tencent ARC Lab, Tencent AI Lab;Tencent PCG ARC Lab;Hong Kong University of Science and Technology", "aff_domain": "connect.hkust-gz.edu.cn;;tencent.com;arc.tencent.com;ust.hk", "position": "PhD student;;Principal Researcher;Director;Assistant Professor", "bibtex": "@misc{\nai2023hrdfuse,\ntitle={{HRDF}use: Monocular 360\\${\\textasciicircum}{\\textbackslash}circ\\$ Depth Estimation by Collaboratively Learning Holistic-with-Regional Depth Distributions},\nauthor={Hao Ai and Cao Zidong and Yan-Pei Cao and Ying Shan and Lin Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=A85gMNB01t1}\n}", "github": "", "project": "", "reviewers": "GvLJ;qjcz;fDgi", "site": "https://openreview.net/forum?id=A85gMNB01t1", "pdf_size": 48624181, "recommendation": "3;5;5", "confidence": "5;5;4", "correctness": "3;3;2", "technical_novelty": "3;3;3", "empirical_novelty": "0;3;2", "wc_summary_paper": "43;72;131", "wc_strength_and_weaknesses": "196;328;476", "wc_clarity_quality_novelty_and_reproducibility": "20;30;86", "wc_summary_review": "10;51;53", "wc_review": "269;481;746", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 82.0, 36.615115275889366 ], "wc_strength_and_weaknesses_avg": [ 333.3333333333333, 114.37171367470582 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.333333333333336, 29.044027881055953 ], "wc_summary_review_avg": [ 38.0, 19.8158185969358 ], "wc_review_avg": [ 498.6666666666667, 195.1347113036416 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": -0.49999999999999983, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13952612648072421886&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Tencent", "aff_unique_dep": ";Tencent ARC Lab", "aff_unique_url": "https://www.ust.hk;https://ai.tencent.com", "aff_unique_abbr": "HKUST;Tencent AI Lab", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Panning for Gold in Federated Learning: Targeted Text Extraction under Arbitrarily Large-Scale Aggregation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10906", "id": "A9WQaxYsfx", "poster": "", "openreview": "https://openreview.net/forum?id=A9WQaxYsfx", "slides": "https://iclr.cc/virtual/2023/poster/10906", "video": "https://iclr.cc/virtual/2023/poster/10906", "author_site": "Hong-Min Chu, Jonas Geiping, Liam H Fowl, Micah Goldblum, Tom Goldstein", "tldr": "We propose a method that extracts target sequences by keywords under extremely large-scale aggregation in federated learning.", "abstract": "As federated learning (FL) matures, privacy attacks against FL systems in turn become more numerous and complex. Attacks on language models have progressed from recovering single sentences in simple classification tasks to recovering larger parts of user data. Current attacks against federated language models are sequence-agnostic and aim to extract as much data as possible from an FL update - often at the expense of fidelity for any particular sequence. Because of this, current attacks fail to extract any meaningful data under large-scale aggregation. In realistic settings, an attacker cares most about a small portion of user data that contains sensitive personal information, for example sequences containing the phrase \"my credit card number is ...\". In this work, we propose the first attack on FL that achieves targeted extraction of sequences that contain privacy-critical phrases, whereby we employ maliciously modified parameters to allow the transformer itself to filter relevant sequences from aggregated user data and encode them in the gradient update. Our attack can effectively extract sequences of interest even against extremely large-scale aggregation.", "keywords": "Federated Learning;Privacy;Security;Privacy attack", "primary_area": "", "supplementary_material": "/attachment/761e330bd4b6bd24d33db53b2b91fbea757ed244.zip", "author": "Hong-Min Chu;Jonas Geiping;Liam H Fowl;Micah Goldblum;Tom Goldstein", "authorids": "~Hong-Min_Chu1;~Jonas_Geiping1;~Liam_H_Fowl1;~Micah_Goldblum1;~Tom_Goldstein1", "gender": ";M;;;M", "homepage": ";https://jonasgeiping.github.io/;;;https://www.cs.umd.edu/~tomg/", "dblp": "185/0720;190/7229;241/6940;241/7231;25/8184", "google_scholar": ";https://scholar.google.de/citations?user=206vNCEAAAAJ;IXv3ToAAAAAJ;pGDKzuUAAAAJ;KmSuVtgAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Hong-Min_Chu1;~Jonas_Geiping1;~Liam_H_Fowl1;~Micah_Goldblum1;~Tom_Goldstein1", "aff": "Department of Computer Science, University of Maryland, College Park;University of Maryland, College Park;Google;New York University;University of Maryland, College Park", "aff_domain": "cs.umd.edu;umd.edu;google.com;nyu.edu;umd.edu", "position": "PhD student;Postdoc;Google;Postdoc;Full Professor", "bibtex": "@inproceedings{\nchu2023panning,\ntitle={Panning for Gold in Federated Learning: Targeted Text Extraction under Arbitrarily Large-Scale Aggregation},\nauthor={Hong-Min Chu and Jonas Geiping and Liam H Fowl and Micah Goldblum and Tom Goldstein},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=A9WQaxYsfx}\n}", "github": "", "project": "", "reviewers": "q4yB;dBzB;7zTS;8KC8;zieL", "pdf_size": 2762830, "recommendation": "5;6;6;6;6", "confidence": "3;3;3;2;3", "correctness": "2;4;3;3;3", "technical_novelty": "3;4;3;3;3", "empirical_novelty": "3;4;3;3;3", "wc_summary_paper": "102;123;71;150;114", "wc_strength_and_weaknesses": "591;205;280;107;307", "wc_clarity_quality_novelty_and_reproducibility": "100;40;85;43;123", "wc_summary_review": "39;25;46;61;80", "wc_review": "832;393;482;361;624", "wc_reply_reviewers": "1045;0;0;0;0", "wc_reply_authors": "2738;735;649;537;673", "reply_reviewers": "6;0;0;0;0", "reply_authors": "7;1;1;1;1", "recommendation_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 3.2, 0.39999999999999997 ], "empirical_novelty_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_paper_avg": [ 112.0, 25.88435821108957 ], "wc_strength_and_weaknesses_avg": [ 298.0, 162.10120295667147 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 78.2, 32.33202746503844 ], "wc_summary_review_avg": [ 50.2, 18.882796403075474 ], "wc_review_avg": [ 538.4, 172.8127310125038 ], "wc_reply_reviewers_avg": [ 209.0, 418.0 ], "wc_reply_authors_avg": [ 1066.4, 838.2509409478762 ], "reply_reviewers_avg": [ 1.2, 2.4000000000000004 ], "reply_authors_avg": [ 2.2, 2.4000000000000004 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.2500000000000001, "corr_recommendation_correctness": 0.790569415042095, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2200247901519460730&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=A9WQaxYsfx", "email": "cs.umd.edu;umd.edu;google.com;nyu.edu;umd.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "University of Maryland, College Park;University of Maryland;Google;New York University", "aff_unique_dep": "Department of Computer Science;;Google;", "aff_unique_url": "https://www/umd.edu;https://www/umd.edu;https://www.google.com;https://www.nyu.edu", "aff_unique_abbr": "UMD;UMD;Google;NYU", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "College Park;Mountain View;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "AB4xZG9uzGl", "title": "Active Topological Mapping by Metric-Free Exploration via Task and Motion Imitation", "track": "main", "status": "Reject", "tldr": "A novel framework of building metric-free topological map for exploration and navigation", "abstract": "Topological map is an effective environment representation for visual navigation. It is a graph of image nodes and spatial neighborhood edges without metric information such as global or relative agent poses. However, currently such a map construction relies on either less-efficient random exploration or more demanding training involving metric information. To overcome these issues, we propose active topological mapping (ATM), consisting of an active visual exploration and a topological mapping by visual place recognition. Our main novelty is the simple and lightweight active exploration policy that works entirely in the image feature space involving no metric information. More specifically, ATM's metric-free exploration is based on task and motion planning (TAMP). The task planner is a recurrent neural network using the latest local image observation sequence to hallucinate a feature as the next-step best exploration goal. The motion planner then fuses the current and the hallucinated feature to generate an action taking the agent towards the hallucinated feature goal. The two planners are jointly trained via deeply-supervised imitation learning from expert exploration demonstrations. Extensive experiments in both exploration and navigation tasks on the photo-realistic Gibson and MP3D datasets validate ATM's effectiveness and generalizability.", "keywords": "Topological Mapping;Feature-Space Task and Motion Planning;Visual Navigation;Deeply-Supervised Learning", "primary_area": "", "supplementary_material": "/attachment/273c983d13e5de675b45eda623b83f56a83fcef9.zip", "author": "Yuhang He;Irving Fang;Yiming Li;Chen Feng", "authorids": "~Yuhang_He3;~Irving_Fang1;~Yiming_Li2;~Chen_Feng2", "gender": "M;M;M;M", "homepage": "https://yuhanghe01.github.io/;https://irvingf7.github.io/;https://yimingli-page.github.io/;https://ai4ce.github.io/", "dblp": ";284/8283;l/YimingLi-3;01/161-2", "google_scholar": "H1p3ve8AAAAJ;0jVr_XwAAAAJ;https://scholar.google.com/citations?hl=en;YeG8ZM0AAAAJ", "orcid": ";;0000-0002-0157-6218;0000-0003-3211-1576", "linkedin": ";irving-fang-4396711a6;yiming-li-58b519173/;simbaforrest/", "or_profile": "~Yuhang_He3;~Irving_Fang1;~Yiming_Li2;~Chen_Feng2", "aff": "University of Oxford;New York University;New York University;New York University", "aff_domain": "ox.ac.uk;nyu.edu;nyu.edu;nyu.edu", "position": "PhD student;MS student;PhD student;Assistant Professor", "bibtex": "@misc{\nhe2023active,\ntitle={Active Topological Mapping by Metric-Free Exploration via Task and Motion Imitation},\nauthor={Yuhang He and Irving Fang and Yiming Li and Chen Feng},\nyear={2023},\nurl={https://openreview.net/forum?id=AB4xZG9uzGl}\n}", "github": "", "project": "", "reviewers": "ZEmd;9Cpk;CpyF;bznr;1W7X", "site": "https://openreview.net/forum?id=AB4xZG9uzGl", "pdf_size": 7190265, "recommendation": "3;3;5;6;6", "confidence": "3;5;3;4;3", "correctness": "3;3;3;3;3", "technical_novelty": "2;2;3;3;3", "empirical_novelty": "2;2;2;2;3", "wc_summary_paper": "164;38;72;177;309", "wc_strength_and_weaknesses": "334;283;269;366;193", "wc_clarity_quality_novelty_and_reproducibility": "245;41;31;43;361", "wc_summary_review": "101;46;45;128;76", "wc_review": "844;408;417;714;939", "wc_reply_reviewers": "0;0;0;527;92", "wc_reply_authors": "1009;1112;366;1553;508", "reply_reviewers": "0;0;0;1;1", "reply_authors": "2;2;1;3;1", "recommendation_avg": [ 4.6, 1.3564659966250536 ], "confidence_avg": [ 3.6, 0.8 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_paper_avg": [ 152.0, 94.67206557374777 ], "wc_strength_and_weaknesses_avg": [ 289.0, 59.3397000329459 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 144.2, 134.81008864324656 ], "wc_summary_review_avg": [ 79.2, 32.058696168122616 ], "wc_review_avg": [ 664.4, 217.74719286365095 ], "wc_reply_reviewers_avg": [ 123.8, 204.72459549355568 ], "wc_reply_authors_avg": [ 909.6, 429.3234677955539 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.33174440134851857, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pTjWtTwW-VgJ:scholar.google.com/&scioq=Active+Topological+Mapping+by+Metric-Free+Exploration+via+Task+and+Motion+Imitation&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Oxford;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.nyu.edu", "aff_unique_abbr": "Oxford;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "ABqIh51jNQm", "title": "Spatial Reasoning Network for Zero-shot Constrained Scene Generation", "track": "main", "status": "Reject", "tldr": "This paper introduces the Spatial Reasoning Network for zero-shot constrained scene generation.", "abstract": "Constrained scene generation (CSG) generates images satisfying a given set of constraints. Zero-shot CSG generates images satisfying constraints not presented in the training set without retraining. Recent neural-based models generate images with excellent details, but largely cannot satisfy constraints, especially in complex scenes involving multiple objects. Such difficulty is due to the lack of effective approaches combining low-level visual element generation with high-level spatial reasoning. We introduce a Spatial Reasoning Network for constrained scene generation (SPREN). SPREN adds to the state-of-the-art image generation networks (for low-level visual element generation) a spatial reasoning module (for high-level spatial reasoning). The spatial reasoning module decides objects' positions following the output of a Recursive Neural Network (RNN), which is trained to learn implicit spatial knowledge (such as trees growing from the ground) from an image dataset. During inference, explicit constraints can be enforced by a forward-checking algorithm, which blocks invalid decisions from the RNN in a zero-shot manner. In experiments, we demonstrate SPREN is able to generate images with excellent detail while satisfying complex spatial constraints. SPREN also transfers good quality scene generation to unseen constraints without retraining. ", "keywords": "Spatial Reasoning Network;Constrained Scene Generation", "primary_area": "", "supplementary_material": "/attachment/1f05ad57e9b9f3a4ecd9b36ec0257cc220f9bdf5.zip", "author": "Maxwell J Jacobson;Yexiang Xue", "authorids": "~Maxwell_J_Jacobson1;~Yexiang_Xue1", "gender": "M;M", "homepage": "http://mjj.epizy.com;https://www.cs.purdue.edu/people/faculty/yexiang/", "dblp": ";117/4903", "google_scholar": ";", "orcid": ";", "linkedin": "jacobs57/;", "or_profile": "~Maxwell_J_Jacobson1;~Yexiang_Xue1", "aff": "Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\njacobson2023spatial,\ntitle={Spatial Reasoning Network for Zero-shot Constrained Scene Generation},\nauthor={Maxwell J Jacobson and Yexiang Xue},\nyear={2023},\nurl={https://openreview.net/forum?id=ABqIh51jNQm}\n}", "github": "", "project": "", "reviewers": "wJp9;9XhE;4LXR", "site": "https://openreview.net/forum?id=ABqIh51jNQm", "pdf_size": 3860011, "recommendation": "1;3;5", "confidence": "5;4;4", "correctness": "3;3;3", "technical_novelty": "1;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "28;84;107", "wc_strength_and_weaknesses": "316;408;320", "wc_clarity_quality_novelty_and_reproducibility": "34;86;84", "wc_summary_review": "12;50;89", "wc_review": "390;628;600", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 1.632993161855452 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 73.0, 33.1762967593833 ], "wc_strength_and_weaknesses_avg": [ 348.0, 42.45782220824175 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.0, 24.055491403558285 ], "wc_summary_review_avg": [ 50.333333333333336, 31.436002007606216 ], "wc_review_avg": [ 539.3333333333334, 106.21152898291639 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.8660254037844385, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=262088194868164706&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "AFhaaOZTkKA", "title": "Populating memory in Continual Learning with Consistency Aware Sampling", "track": "main", "status": "Reject", "tldr": "", "abstract": "Continual Learning (CL) methods aim to mitigate Catastrophic Forgetting (CF), where knowledge from previously learned tasks is often lost in favor of the new one. Among those algorithms, some have shown the relevance of keeping a rehearsal buffer with previously seen examples, referred to as $memory$. Yet, despite their popularity, limited research has been done to understand which elements are more beneficial to store in memory. It is common for this memory to be populated through random sampling, with little guiding principles that may aid in retaining prior knowledge. In this paper, and consistent with previous work, we found that some storage policies behave similarly given a certain memory size or compute budget, but when these constraints are relevant, results differ considerably. Based on these insights, we propose CAWS (Consistency AWare Sampling), an original storage policy that leverages a learning consistency score (C-Score) to populate the memory with elements that are $easy$ $to$ $learn$ and $representative$ of previous tasks. Because of the impracticality of directly using the C-Score in CL, we propose more feasible and efficient proxies to calculate the score that yield state-of-the-art results on CIFAR-100 and Tiny Imagenet.", "keywords": "Continual Learning;Learning Consistency;Populating Memory;Memory-based Continual Learning", "primary_area": "", "supplementary_material": "/attachment/823b99649a648149f474d661714193d50084bfef.zip", "author": "Julio Hurtado;Alain Raymond;Vladimir Araujo;Vincenzo Lomonaco;Alvaro Soto;Davide Bacciu", "authorids": "~Julio_Hurtado1;~Alain_Raymond1;~Vladimir_Araujo1;~Vincenzo_Lomonaco1;~Alvaro_Soto1;~Davide_Bacciu1", "gender": "M;M;M;M;M;M", "homepage": "https://warwick.ac.uk/fac/sci/camacs/people/hurtado;https://ialab.ing.puc.cl/;https://vgaraujov.github.io/;https://vincenzolomonaco.com;http://asoto.ing.puc.cl;http://pages.di.unipi.it/bacciu/", "dblp": "178/4255;294/8655;248/8695;157/5127;25/3682;07/6626", "google_scholar": "https://scholar.google.com/citations?hl=es;j8KhaCIAAAAJ;p4TbBLEAAAAJ;https://scholar.google.it/citations?user=rQLINtQAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.it/citations?user=1d5n2WkAAAAJ", "orcid": ";;0000-0001-5760-8410;;;0000-0001-5213-2468", "linkedin": ";;vgaraujov/;;;bacciu/", "or_profile": "~Julio_Hurtado1;~Alain_Raymond1;~Vladimir_Araujo1;~Vincenzo_Lomonaco1;~Alvaro_Soto1;~Davide_Bacciu1", "aff": "University of Pisa;Pontificia Universidad Catolica de Chile;Pontificia Universidad Catolica de Chile;University of Pisa;Universidad Cat\u00f3lica de Chile;University of Pisa", "aff_domain": "unipi.it;uc.cl;uc.cl;unipi.it;uc.cl;unipi.it", "position": "Postdoc;PhD student;PhD student;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@misc{\nhurtado2023populating,\ntitle={Populating memory in Continual Learning with Consistency Aware Sampling},\nauthor={Julio Hurtado and Alain Raymond and Vladimir Araujo and Vincenzo Lomonaco and Alvaro Soto and Davide Bacciu},\nyear={2023},\nurl={https://openreview.net/forum?id=AFhaaOZTkKA}\n}", "github": "", "project": "", "reviewers": "Yna5;gaRy;1H7t;zPPV", "site": "https://openreview.net/forum?id=AFhaaOZTkKA", "pdf_size": 822992, "recommendation": "3;3;3;6", "confidence": "4;4;5;4", "correctness": "3;2;2;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "79;66;46;155", "wc_strength_and_weaknesses": "44;270;200;355", "wc_clarity_quality_novelty_and_reproducibility": "77;94;35;41", "wc_summary_review": "66;98;26;105", "wc_review": "266;528;307;656", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "274;646;596;545", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 86.5, 41.2583324917525 ], "wc_strength_and_weaknesses_avg": [ 217.25, 114.09508096320367 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.75, 24.5903944661325 ], "wc_summary_review_avg": [ 73.75, 31.24399942388938 ], "wc_review_avg": [ 439.25, 159.97089579045308 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 515.25, 143.79042909734986 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7401770902150693260&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;0;2;0", "aff_unique_norm": "University of Pisa;Pontificia Universidad Catolica de Chile;Universidad Cat\u00f3lica de Chile", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unipi.it;https://www.puc.cl;https://www.uc.cl", "aff_unique_abbr": "UNIP;PUC;PUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1;0", "aff_country_unique": "Italy;Chile" }, { "id": "AGLG_ncNp0X", "title": "Personalized Federated Hypernetworks for Privacy Preservation in Multi-Task Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "We use hypernetworks to aggregate learning across multiple reinforcement learning agents in a microgrid energy demand response setting while preserving privacy.", "abstract": "Multi-Agent Reinforcement Learning currently focuses on implementations where all data and training can be centralized to one machine. But what if local agents are split across multiple tasks, and need to keep data private between each? We develop the first application of Personalized Federated Hypernetworks (PFH) to Reinforcement Learning (RL). We then present a novel application of PFH to few-shot transfer, and demonstrate significant initial increases in learning. PFH has never been demonstrated beyond supervised learning benchmarks, so we apply PFH to an important domain: RL price-setting for energy demand response. We consider a general case across where agents are split across multiple microgrids, wherein energy consumption data must be kept private within each microgrid. Together, our work explores how the fields of personalized federated learning and RL can come together to make learning efficient across multiple tasks while keeping data secure.", "keywords": "microgrid clusters;energy demand response;transactive energy control;neural networks;multi-agent reinforcement learning;reinforcement learning;multi-task learning;transfer learning;hypernetworks;federated learning;personalized federated learning;microgrids", "primary_area": "", "supplementary_material": "/attachment/af90eda788e444bd653459c4a480fa745ef2255e.zip", "author": "Doseok Jang;Larry Yan;Lucas Spangher;Selvaprabu Nadarajah;Costas Spanos", "authorids": "~Doseok_Jang1;yanlarry@berkeley.edu;~Lucas_Spangher1;~Selvaprabu_Nadarajah1;~Costas_Spanos1", "gender": ";;M;M;", "homepage": ";;;https://selvan.people.uic.edu/;https://www2.eecs.berkeley.edu/Faculty/Homepages/spanos.html", "dblp": "291/4869;;267/8772;;", "google_scholar": "hH0DZdMAAAAJ;;https://scholar.google.com/citations?hl=en;;", "orcid": "0000-0002-8612-4420;;;;", "linkedin": "austin-jang-065156140/;;lucasspangher;;", "or_profile": "~Doseok_Jang1;yanlarry@berkeley.edu;~Lucas_Spangher1;~Selvaprabu_Nadarajah1;~Costas_Spanos1", "aff": "Electrical Engineering & Computer Science Department, University of California, Berkeley;;University of California, Berkeley;University of Illinois, Chicago;University of California, Berkeley", "aff_domain": "eecs.berkeley.edu;;berkeley.edu;uic.edu;berkeley.edu", "position": "MS student;;PhD student;Assistant Professor;Emeritus", "bibtex": "@misc{\njang2023personalized,\ntitle={Personalized Federated Hypernetworks for Privacy Preservation in Multi-Task Reinforcement Learning},\nauthor={Doseok Jang and Larry Yan and Lucas Spangher and Selvaprabu Nadarajah and Costas Spanos},\nyear={2023},\nurl={https://openreview.net/forum?id=AGLG_ncNp0X}\n}", "github": "", "project": "", "reviewers": "D68C;C5ih;a54n", "site": "https://openreview.net/forum?id=AGLG_ncNp0X", "pdf_size": 3616499, "recommendation": "1;3;3", "confidence": "4;4;4", "correctness": "2;4;3", "technical_novelty": "1;2;2", "empirical_novelty": "1;1;0", "wc_summary_paper": "23;95;79", "wc_strength_and_weaknesses": "144;130;760", "wc_clarity_quality_novelty_and_reproducibility": "4;70;35", "wc_summary_review": "22;67;55", "wc_review": "193;362;929", "wc_reply_reviewers": "0;0;39", "wc_reply_authors": "230;396;1128", "reply_reviewers": "0;0;1", "reply_authors": "1;1;3", "recommendation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 0.6666666666666666, 0.4714045207910317 ], "wc_summary_paper_avg": [ 65.66666666666667, 30.8688984074406 ], "wc_strength_and_weaknesses_avg": [ 344.6666666666667, 293.7406263279827 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.333333333333336, 26.960877005188255 ], "wc_summary_review_avg": [ 48.0, 19.026297590440446 ], "wc_review_avg": [ 494.6666666666667, 314.7743459827832 ], "wc_reply_reviewers_avg": [ 13.0, 18.384776310850235 ], "wc_reply_authors_avg": [ 584.6666666666666, 390.125905602566 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8660254037844387, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9109573615986875695&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of California, Berkeley;University of Illinois at Chicago", "aff_unique_dep": "Electrical Engineering & Computer Science Department;", "aff_unique_url": "https://www.berkeley.edu;https://www.uic.edu", "aff_unique_abbr": "UC Berkeley;UIC", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Berkeley;Chicago", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Diffusion Policies as an Expressive Policy Class for Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11133", "id": "AHvFDPi-FA", "poster": "/media/PosterPDFs/ICLR%202023/11133.png?t=1681255017.4809937", "openreview": "https://openreview.net/forum?id=AHvFDPi-FA", "slides": "https://iclr.cc/virtual/2023/poster/11133", "video": "https://iclr.cc/virtual/2023/poster/11133", "author_site": "Zhendong Wang, Jonathan J Hunt, Mingyuan Zhou", "tldr": "Diffusion models serve as expressive policies to boost offline RL performance. ", "abstract": "Offline reinforcement learning (RL), which aims to learn an optimal policy using a previously collected static dataset, is an important paradigm of RL. Standard RL methods often perform poorly in this regime due to the function approximation errors on out-of-distribution actions. While a variety of regularization methods have been proposed to mitigate this issue, they are often constrained by policy classes with limited expressiveness that can lead to highly suboptimal solutions. In this paper, we propose representing the policy as a diffusion model, a recent class of highly-expressive deep generative models. We introduce Diffusion Q-learning (Diffusion-QL) that utilizes a conditional diffusion model to represent the policy. In our approach, we learn an action-value function and we add a term maximizing action-values into the training loss of the conditional diffusion model, which results in a loss that seeks optimal actions that are near the behavior policy. We show the expressiveness of the diffusion model-based policy, and the coupling of the behavior cloning and policy improvement under the diffusion model both contribute to the outstanding performance of Diffusion-QL. We illustrate the superiority of our method compared to prior works in a simple 2D bandit example with a multimodal behavior policy. We then show that our method can achieve state-of-the-art performance on the majority of the D4RL benchmark tasks.", "keywords": "offline RL;diffusion models;behavior cloning;policy regularization;Q-learning", "primary_area": "", "supplementary_material": "/attachment/6098025cf961480e2453f8051b0d492ec54f35c0.zip", "author": "Zhendong Wang;Jonathan J Hunt;Mingyuan Zhou", "authorids": "~Zhendong_Wang1;~Jonathan_J_Hunt1;~Mingyuan_Zhou1", "gender": "M;;M", "homepage": "https://zhendong-wang.github.io/;https://me.net.nz;http://mingyuanzhou.github.io", "dblp": ";28/10525;", "google_scholar": "lRiIjhcAAAAJ;IdhJtDwAAAAJ;LXwCIisAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zhendong_Wang1;~Jonathan_J_Hunt1;~Mingyuan_Zhou1", "aff": "University of Texas at Austin;Twitter;Google", "aff_domain": "utexas.edu;twitter.com;google.com", "position": "PhD student;Research Scientist;Researcher", "bibtex": "@inproceedings{\nwang2023diffusion,\ntitle={Diffusion Policies as an Expressive Policy Class for Offline Reinforcement Learning},\nauthor={Zhendong Wang and Jonathan J Hunt and Mingyuan Zhou},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=AHvFDPi-FA}\n}", "github": "", "project": "", "reviewers": "Qge1;JRTd;J2jU;jsYy", "pdf_size": 1143995, "recommendation": "6;8;8;8", "confidence": "4;4;4;4", "correctness": "2;4;4;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "3;3;2;0", "wc_summary_paper": "72;131;200;61", "wc_strength_and_weaknesses": "220;369;135;184", "wc_clarity_quality_novelty_and_reproducibility": "14;12;208;24", "wc_summary_review": "57;28;42;21", "wc_review": "363;540;585;290", "wc_reply_reviewers": "150;43;25;0", "wc_reply_authors": "1253;560;444;42", "reply_reviewers": "1;1;1;0", "reply_authors": "3;1;1;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.8660254037844386 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 116.0, 55.32178594369491 ], "wc_strength_and_weaknesses_avg": [ 227.0, 87.35845694607936 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.5, 82.9743936404479 ], "wc_summary_review_avg": [ 37.0, 13.80217374184226 ], "wc_review_avg": [ 444.5, 121.83287733612795 ], "wc_reply_reviewers_avg": [ 54.5, 57.21232384722718 ], "wc_reply_authors_avg": [ 574.75, 436.22206214266606 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 377, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2214087461476385173&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=AHvFDPi-FA", "email": "utexas.edu;twitter.com;google.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Texas at Austin;Twitter, Inc.;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.utexas.edu;https://twitter.com;https://www.google.com", "aff_unique_abbr": "UT Austin;Twitter;Google", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Austin;;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Robust Active Distillation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11991", "id": "ALDM5SN2r7M", "poster": "/media/PosterPDFs/ICLR%202023/11991.png?t=1680640802.862064", "openreview": "https://openreview.net/forum?id=ALDM5SN2r7M", "slides": "https://iclr.cc/virtual/2023/poster/11991", "video": "https://iclr.cc/virtual/2023/poster/11991", "author_site": "Cenk Baykal, Khoa Trinh, Fotis Iliopoulos, Gaurav Menghani, Erik Vee", "tldr": "A new way of actively soft-labeling points in semi-supervised knowledge distillation to teach the student model in an efficient and robust way", "abstract": "Distilling knowledge from a large teacher model to a lightweight one is a widely successful approach for generating compact, powerful models in the semi-supervised learning setting where a limited amount of labeled data is available. In large-scale applications, however, the teacher tends to provide a large number of incorrect soft-labels that impairs student performance. The sheer size of the teacher additionally constrains the number of soft-labels that can be queried due to prohibitive computational and/or financial costs. The difficulty in achieving simultaneous \\emph{efficiency} (i.e., minimizing soft-label queries) and \\emph{robustness} (i.e., avoiding student inaccuracies due to incorrect labels) hurts the widespread application of knowledge distillation to many modern tasks. In this paper, we present a parameter-free approach with provable guarantees to query the soft-labels of points that are simultaneously informative and correctly labeled by the teacher. At the core of our work lies a game-theoretic formulation that explicitly considers the inherent trade-off between the informativeness and correctness of input instances. We establish bounds on the expected performance of our approach that hold even in worst-case distillation instances. We present empirical evaluations on popular benchmarks that demonstrate the improved distillation performance enabled by our work relative to that of state-of-the-art active learning and active distillation methods.", "keywords": "knowledge distillation;active learning;semi-supervised learning;model compression", "primary_area": "", "supplementary_material": "", "author": "Cenk Baykal;Khoa Trinh;Fotis Iliopoulos;Gaurav Menghani;Erik Vee", "authorids": "~Cenk_Baykal1;~Khoa_Trinh2;~Fotis_Iliopoulos1;~Gaurav_Menghani1;~Erik_Vee1", "gender": "M;M;M;M;", "homepage": "https://people.csail.mit.edu/baykal/;;http://www.filiop.org/;http://gaurav.ai;", "dblp": "151/9349;47/9680;147/4790;137/0537.html;", "google_scholar": "lRxoOlwAAAAJ;pVTeodYAAAAJ;v3e5F-AAAAAJ;XvncD4IAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Cenk_Baykal1;~Khoa_Trinh2;~Fotis_Iliopoulos1;~Gaurav_Menghani1;~Erik_Vee1", "aff": "Google;;Google;Google Research;", "aff_domain": "google.com;;google.com;google.com;", "position": "Research Scientist;;Researcher;Software Engineer;", "bibtex": "@inproceedings{\nbaykal2023robust,\ntitle={Robust Active Distillation},\nauthor={Cenk Baykal and Khoa Trinh and Fotis Iliopoulos and Gaurav Menghani and Erik Vee},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=ALDM5SN2r7M}\n}", "github": "", "project": "", "reviewers": "fKaY;FQ16;26jz", "pdf_size": 17378323, "recommendation": "6;6;8", "confidence": "4;3;3", "correctness": "3;4;4", "technical_novelty": "2;3;4", "empirical_novelty": "3;3;3", "wc_summary_paper": "74;54;114", "wc_strength_and_weaknesses": "209;177;69", "wc_clarity_quality_novelty_and_reproducibility": "36;12;38", "wc_summary_review": "64;30;65", "wc_review": "383;273;286", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "784;310;100", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 80.66666666666667, 24.94438257849294 ], "wc_strength_and_weaknesses_avg": [ 151.66666666666666, 59.89620652057654 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.666666666666668, 11.8133634311129 ], "wc_summary_review_avg": [ 53.0, 16.268579122549905 ], "wc_review_avg": [ 314.0, 49.07816894166557 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 398.0, 286.0908946471383 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3875257582974791620&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=ALDM5SN2r7M", "email": "google.com;;google.com;google.com;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "ALMbHbLb3PK", "title": "Multiple Instance Learning via Iterative Self-Paced Supervised Contrastive Learning", "track": "main", "status": "Withdraw", "tldr": "We propose a framework for multiple instance learning, which iteratively improves instance-level features by jointly estimating latent instance-level pseudo labels, and show that it outperforms existing methods on three real-world medical datasets.", "abstract": "Learning representations for individual instances when only bag-level labels are available is a fundamental challenge in multiple instance learning (MIL). Recent works have shown promising results using contrastive self-supervised learning (CSSL), which learns to push apart representations corresponding to two different randomly-selected instances. Unfortunately, in real-world applications such as medical image classification, there is often class imbalance, so randomly-selected instances mostly belong to the same majority class, which precludes CSSL from learning inter-class differences. To address this issue, we propose a novel framework, Iterative Self-paced Supervised Contrastive Learning for MIL Representations (ItS2CLR), which improves the learned representation by exploiting instance-level pseudo labels derived from the bag-level labels. The framework employs a novel self-paced sampling strategy to ensure the accuracy of pseudo labels. We evaluate ItS2CLR on three medical datasets, showing that it improves the quality of instance-level pseudo labels and representations, and outperforms existing MIL methods in terms of both bag and instance level accuracy.", "keywords": "multiple instance learning;whole slide image;contrastive learning;medical imaging", "primary_area": "", "supplementary_material": "", "author": "Kangning Liu;Weicheng Zhu;Yiqiu Shen;Sheng Liu;Narges Razavian;Krzysztof J. Geras;Carlos Fernandez-Granda", "authorids": "~Kangning_Liu1;~Weicheng_Zhu1;~Yiqiu_Shen1;~Sheng_Liu2;~Narges_Razavian1;~Krzysztof_J._Geras1;~Carlos_Fernandez-Granda1", "gender": "M;M;M;;;;M", "homepage": "https://kangning-liu.github.io/;;https://seyiqi.github.io/;https://shengliu66.github.io/;;https://cims.nyu.edu/~cfgranda/;https://cs.nyu.edu/~kgeras/", "dblp": "259/1458;180/5811;https://dblp.uni-trier.de/pers/hd/s/Shen:Yiqiu;;https://dblp.org/pers/hd/r/Razavian:Narges;77/11141;124/8920", "google_scholar": "F3F2qAkAAAAJ;Glw83HYAAAAJ;XaeN2zgAAAAJ;rzhzR-cAAAAJ;;GX-PtukAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-7726-2514;;;;", "linkedin": ";;yiqiu-shen-a2317782/;;;;", "or_profile": "~Kangning_Liu1;~Weicheng_Zhu1;~Yiqiu_Shen1;~Sheng_Liu2;~Narges_Razavian1;~Carlos_Fernandez-Granda1;~Krzysztof_Jerzy_Geras1", "aff": "Adobe Systems;New York University;New York University;New York University;New York University;New York University;NYU Grossman School of Medicine", "aff_domain": "adobe.com;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyulangone.org", "position": "Intern;PhD student;PhD student;PhD student;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@misc{\nliu2023multiple,\ntitle={Multiple Instance Learning via Iterative Self-Paced Supervised Contrastive Learning},\nauthor={Kangning Liu and Weicheng Zhu and Yiqiu Shen and Sheng Liu and Narges Razavian and Krzysztof J. Geras and Carlos Fernandez-Granda},\nyear={2023},\nurl={https://openreview.net/forum?id=ALMbHbLb3PK}\n}", "github": "", "project": "", "reviewers": "QmCR;sA1k;TTL4;n9qY", "site": "https://openreview.net/forum?id=ALMbHbLb3PK", "pdf_size": 2438614, "recommendation": "3;5;5;6", "confidence": "4;5;4;4", "correctness": "3;4;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "84;147;76;130", "wc_strength_and_weaknesses": "113;169;507;165", "wc_clarity_quality_novelty_and_reproducibility": "49;95;42;43", "wc_summary_review": "26;261;90;38", "wc_review": "272;672;715;376", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 109.25, 29.99479121447589 ], "wc_strength_and_weaknesses_avg": [ 238.5, 156.58464164789598 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.25, 21.958768180387533 ], "wc_summary_review_avg": [ 103.75, 93.92117705821195 ], "wc_review_avg": [ 508.75, 188.98594524461336 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.13245323570650439, "corr_recommendation_correctness": 0.6882472016116854, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2226974126852111732&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;1;1;1;1;2", "aff_unique_norm": "Adobe;New York University;New York University Grossman School of Medicine", "aff_unique_dep": "Adobe Systems Incorporated;;School of Medicine", "aff_unique_url": "https://www.adobe.com;https://www.nyu.edu;https://med.nyu.edu", "aff_unique_abbr": "Adobe;NYU;NYU Grossman SOM", "aff_campus_unique_index": "1", "aff_campus_unique": ";New York", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "ALbEpTC4hBp", "title": "Revisiting Instance-Reweighted Adversarial Training", "track": "main", "status": "Reject", "tldr": "We clarify a weakness of previous methods and propose a method to resolve the weakness by transforming margins into an appropriate representation.", "abstract": "Instance-reweighted adversarial training (IRAT) is a type of adversarial training that assigns large weights to high-importance examples and then minimizes the weighted loss. The importance often uses the margins between decision boundaries and each example. In particular, IRAT can alleviate robust overfitting and obtain excellent robustness by computing margins with an estimated probability. However, previous works implicitly dealt with binary classification even in the multi-class cases, because they computed margins with only the true class and the most confusing class. The computed margins can become equal even with different true probability examples, because of the complex decision boundaries in multi-class classification. In this paper, first, we clarify the above problem with a specific example. Then, we propose \\textit{margin reweighting}, which can transform the previous margins into appropriate representations for multi-class classification by leveraging the relations between the most confusing class and other classes. Experimental results on the CIFAR-10/100 datasets demonstrate that the proposed method is effective in boosting the robustness against several attacks as compared to the previous methods.", "keywords": "Adversarial training;Adversarial robustness;Instance-reweighted", "primary_area": "", "supplementary_material": "", "author": "Hiroki Adachi;Tsubasa Hirakawa;Takayoshi Yamashita;Hironobu Fujiyoshi", "authorids": "~Hiroki_Adachi1;~Tsubasa_Hirakawa1;~Takayoshi_Yamashita1;~Hironobu_Fujiyoshi2", "gender": "M;M;M;M", "homepage": "https://hirokiadachi.github.io/;https://thirakawa.github.io;http://mprg.cs.chubu.ac.jp/~takayoshi/index.html;http://mprg.jp/en/", "dblp": ";141/9933;64/5510;79/2304", "google_scholar": ";NSx1e0wAAAAJ;https://scholar.google.co.jp/citations?user=hkguTPgAAAAJ;CIHKZpEAAAAJ", "orcid": ";0000-0003-3851-5221;0000-0003-2325-126X;0000-0001-7391-4725", "linkedin": ";;;", "or_profile": "~Hiroki_Adachi1;~Tsubasa_Hirakawa1;~Takayoshi_Yamashita1;~Hironobu_Fujiyoshi2", "aff": "Chubu university;Chubu University;Chubu University;Chubu University", "aff_domain": "mprg.cs.chubu.ac.jp;chubu.ac.jp;chubu.ac.jp;chubu.ac.jp", "position": "PhD student;Lecturer;Full Professor;Full Professor", "bibtex": "@misc{\nadachi2023revisiting,\ntitle={Revisiting Instance-Reweighted Adversarial Training},\nauthor={Hiroki Adachi and Tsubasa Hirakawa and Takayoshi Yamashita and Hironobu Fujiyoshi},\nyear={2023},\nurl={https://openreview.net/forum?id=ALbEpTC4hBp}\n}", "github": "", "project": "", "reviewers": "QLpw;UgLM;mrgM;tft5", "site": "https://openreview.net/forum?id=ALbEpTC4hBp", "pdf_size": 1182236, "recommendation": "3;3;3;5", "confidence": "4;5;5;4", "correctness": "3;3;3;3", "technical_novelty": "3;2;2;3", "empirical_novelty": "2;1;0;0", "wc_summary_paper": "90;73;74;90", "wc_strength_and_weaknesses": "208;267;159;122", "wc_clarity_quality_novelty_and_reproducibility": "81;43;149;10", "wc_summary_review": "23;17;38;34", "wc_review": "402;400;420;256", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 0.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 81.75, 8.257572282456872 ], "wc_strength_and_weaknesses_avg": [ 189.0, 54.39209501388966 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.75, 51.69320554966581 ], "wc_summary_review_avg": [ 28.0, 8.396427811873332 ], "wc_review_avg": [ 369.5, 65.99052962357554 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:a-XE0jijDrkJ:scholar.google.com/&scioq=Revisiting+Instance-Reweighted+Adversarial+Training&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Chubu University", "aff_unique_dep": "", "aff_unique_url": "https://www.chubu.ac.jp", "aff_unique_abbr": "Chubu U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "id": "ALcz2n3Wsdf", "title": "Mugs: A Multi-Granular Self-Supervised Learning Framework", "track": "main", "status": "Withdraw", "tldr": "we propose an effective MUlti-Granular Self-supervised learning (Mugs) framework to explicitly learn multi-granular visual features.", "abstract": "In self-supervised learning, multi-granular features are heavily desired though rarely investigated, as different downstream tasks (e.g., general and fine-grained classification) often require different or multi-granular features, e.g.~fine- or coarse-grained one or their mixture. In this work, for the first time, we propose an effective MUlti-Granular Self-supervised learning (Mugs) framework to explicitly learn multi-granular visual features. Mugs has three complementary granular supervisions: 1) an instance discrimination supervision (IDS), 2) a novel local-group discrimination supervision (LGDS), and 3) a group discrimination supervision (GDS). IDS distinguishes different instances to learn instance-level fine-grained features. LGDS aggregates features of an image and its neighbors into a local-group feature, and pulls local-group features from different crops of the same image together and push them away for others. It provides complementary instance supervision to IDS via an extra alignment on local neighbors, and scatters different local-groups separately to increase discriminability. Accordingly, it helps learn high-level fine-grained features at a local-group level. Finally, to prevent similar local-groups from being scattered randomly or far away, GDS brings similar samples close and thus pulls similar local-groups together, capturing coarse-grained features at a (semantic) group level. Consequently, Mugs can capture three granular features that often enjoy higher generality on diverse downstream tasks over single-granular features, e.g.~instance-level fine-grained features in contrastive learning. By only pretraining on ImageNet-1K, Mugs sets new SoTA linear probing accuracy 81.9$\\%$ on ImageNet-1K and improves previous SoTA by $0.9\\%$. \tIt also surpasses SoTAs on other tasks, e.g.~transfer learning, detection and segmentation.", "keywords": "multi-granular learning;contrastive learning;self-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Pan Zhou;Yichen Zhou;Chenyang Si;Weihao Yu;Teck Khim Ng;Shuicheng YAN", "authorids": "~Pan_Zhou3;~Yichen_Zhou2;~Chenyang_Si2;~Weihao_Yu2;~Teck_Khim_Ng1;~Shuicheng_YAN3", "gender": ";M;M;;M;M", "homepage": ";;http://chenyangsi.top/;http://whyu.me;https://www.comp.nus.edu.sg/~ngtk/;https://yanshuicheng.ai/", "dblp": ";;220/3068;222/7846-1.html;88/1588;y/ShuichengYan", "google_scholar": ";https://scholar.google.com/citations?hl=en;XdahAuoAAAAJ;LYxjt1QAAAAJ;;https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ", "orcid": ";0000-0002-0596-2087;;;;", "linkedin": ";zhou-yichen;;;;", "or_profile": "~Pan_Zhou3;~Yichen_Zhou2;~Chenyang_Si2;~Weihao_Yu2;~Teck_Khim_Ng1;~Shuicheng_YAN3", "aff": ";National University of Singapore;Sea AI Lab;National University of Singapore;National University of Singapore;sea Group", "aff_domain": ";nus.edu.sg;sea.com;u.nus.edu;nus.edu.sg;sea.com", "position": ";PhD student;Researcher;PhD student;Associate Professor;Researcher", "bibtex": "@misc{\nzhou2023mugs,\ntitle={Mugs: A Multi-Granular Self-Supervised Learning Framework},\nauthor={Pan Zhou and Yichen Zhou and Chenyang Si and Weihao Yu and Teck Khim Ng and Shuicheng YAN},\nyear={2023},\nurl={https://openreview.net/forum?id=ALcz2n3Wsdf}\n}", "github": "", "project": "", "reviewers": "LDAn;KXhk;oyQB", "site": "https://openreview.net/forum?id=ALcz2n3Wsdf", "pdf_size": 11409737, "recommendation": "3;3;8", "confidence": "4;5;4", "correctness": "3;2;4", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "50;64;124", "wc_strength_and_weaknesses": "218;301;138", "wc_clarity_quality_novelty_and_reproducibility": "84;47;62", "wc_summary_review": "33;46;58", "wc_review": "385;458;382", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 2.357022603955158 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 79.33333333333333, 32.097074979228594 ], "wc_strength_and_weaknesses_avg": [ 219.0, 66.54822812567339 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.33333333333333, 15.195028426721974 ], "wc_summary_review_avg": [ 45.666666666666664, 10.208928554075703 ], "wc_review_avg": [ 408.3333333333333, 35.140985883849204 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.8660254037844387, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6324564557035161048&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "National University of Singapore;Sea AI Lab;Sea Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;;", "aff_unique_abbr": "NUS;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore;" }, { "id": "ALuRpkAeQP", "title": "Quasi-Conservative Score-based Generative Models", "track": "main", "status": "Withdraw", "tldr": "In this paper, we propose Quasi-Conservative Score-based Generative Models (QCSGMs), which are designed to maintain both the architectural flexibility and the property of conservativeness of score-based generative models.", "abstract": "Existing Score-based Generative Models (SGMs) can be categorized into constrained SGMs (CSGMs) or unconstrained SGMs (USGMs) according to their parameterization approaches. CSGMs model the probability density functions as Boltzmann distributions, and assign their predictions as the negative gradients of some scalar-valued energy functions. On the other hand, USGMs employ flexible architectures capable of directly estimating scores without the need to explicitly model energy functions. In this paper, we demonstrate that the architectural constraints of CSGMs may limit their score-matching ability. In addition, we show that USGMs' inability to preserve the property of conservativeness may lead to serious sampling inefficiency and degraded sampling performance in practice. To address the above issues, we propose Quasi-Conservative Score-based Generative Models (QCSGMs) for keeping the advantages of both CSGMs and USGMs. Our theoretical derivations demonstrate that the training objective of QCSGMs can be efficiently integrated into the training processes by leveraging the Hutchinson trace estimator. In addition, our experimental results on the Cifar-10, Cifar-100, ImageNet, and SVHN datasets validate the effectiveness of QCSGMs. Finally, we justify the advantage of QCSGMs using an example of a one-layered autoencoder.", "keywords": "Score-based Generative Models;Conservativeness", "primary_area": "", "supplementary_material": "", "author": "Chen-Hao Chao;Wei-Fang Sun;Bo-Wun Cheng;Chun-Yi Lee", "authorids": "~Chen-Hao_Chao2;~Wei-Fang_Sun1;~Bo-Wun_Cheng1;~Chun-Yi_Lee1", "gender": "M;M;M;M", "homepage": ";https://github.com/bobcheng15;https://elsalab.ai;https://chen-hao-chao.github.io/", "dblp": "275/9039;291/4696;36/3668;291/4406", "google_scholar": "TgMlVRUAAAAJ;https://scholar.google.com.tw/citations?user=WYk_3QgAAAAJ;https://scholar.google.com.tw/citations?user=5mYNdo0AAAAJ;puKAQDgAAAAJ", "orcid": ";;0000-0002-4680-4800;0000-0003-1409-7467", "linkedin": ";\u67cf\u6587-\u912d-489a541b9/;;", "or_profile": "~Wei-Fang_Sun1;~Bo-Wun_Cheng1;~Chun-Yi_Lee1;~CHEN-HAO_CHAO1", "aff": "Department of Computer Science, National Tsing Hua University, National Tsing Hua University;;National Tsing Hua University;Department of Computer Science, National Tsing Hua University, National Tsing Hua University", "aff_domain": "cs.nthu.edu.tw;;nthu.edu.tw;cs.nthu.edu.tw", "position": "MS student;;Associate Professor;MS student", "bibtex": "@misc{\nchao2023quasiconservative,\ntitle={Quasi-Conservative Score-based Generative Models},\nauthor={Chen-Hao Chao and Wei-Fang Sun and Bo-Wun Cheng and Chun-Yi Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=ALuRpkAeQP}\n}", "github": "", "project": "", "reviewers": "YXv3;2fxK;TYb7;xCU1", "site": "https://openreview.net/forum?id=ALuRpkAeQP", "pdf_size": 29514094, "recommendation": "3;5;5;5", "confidence": "4;3;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "153;125;88;180", "wc_strength_and_weaknesses": "259;182;449;304", "wc_clarity_quality_novelty_and_reproducibility": "35;39;45;82", "wc_summary_review": "18;76;27;61", "wc_review": "465;422;609;627", "wc_reply_reviewers": "47;0;240;244", "wc_reply_authors": "982;455;797;595", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;1", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 136.5, 34.09178786746157 ], "wc_strength_and_weaknesses_avg": [ 298.5, 97.2278252353718 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.25, 18.673175948402566 ], "wc_summary_review_avg": [ 45.5, 23.817010727629107 ], "wc_review_avg": [ 530.75, 88.79294735506869 ], "wc_reply_reviewers_avg": [ 132.75, 110.51555320406264 ], "wc_reply_authors_avg": [ 707.25, 199.8579182819635 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14063286086547692170&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "National Tsing Hua University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.nthu.edu.tw", "aff_unique_abbr": "NTHU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "AO8F51yRk67", "title": "Covariance-Robust Minimax Probability Machines for Algorithmic Recourse", "track": "main", "status": "Reject", "tldr": "We propose a novel pipeline to generate a model-agnostic recourse that is robust to model shifts.", "abstract": "Algorithmic recourse is rising as a prominent technique to promote the explainability and transparency of the predictive model in ethical machine learning. Existing approaches to algorithmic recourse often assume an invariant predictive model; however, this model, in reality, is usually updated temporally upon the input of new data. Thus, a recourse that is valid respective to the present model may become invalid for the future model. To resolve this issue, we propose a pipeline to generate a model-agnostic recourse that is robust to model shifts. Our pipeline first estimates a linear surrogate of the nonlinear (black-box) model using covariance-robust minimax probability machines (MPM); then, the recourse is generated with respect to this robust linear surrogate. We show that the covariance-robust MPM recovers popular regularization schemes, including l_2-regularization and class-reweighting. We also show that our covariance-robust MPM pushes the decision boundary in an intuitive manner, which facilitates an interpretable generation of a robust recourse. The numerical results demonstrate the usefulness and robustness of our pipeline. ", "keywords": "Optimization and Learning under Uncertainty;Algorithmic Recourse;Trustworthy ML and Statistics", "primary_area": "", "supplementary_material": "/attachment/969dcec11810db21110513ad4f16778c384f1fcf.zip", "author": "Ngoc Bui;Duy Nguyen;Kim-Cuc Nguyen;Man-Chung Yue;Viet Anh Nguyen", "authorids": "~Ngoc_Bui1;~Duy_Nguyen2;~Kim-Cuc_Nguyen1;~Man-Chung_Yue1;~Viet_Anh_Nguyen2", "gender": "M;M;;;M", "homepage": "http://ngocbh.github.io;https://duykhuongnguyen.github.io/;;;http://www.vietanhnguyen.net", "dblp": "312/6811;;;;", "google_scholar": ";y323M_cAAAAJ;;;3iyf-EoAAAAJ", "orcid": ";;;;", "linkedin": ";duy-nguyen-89272a17b/;;;", "or_profile": "~Ngoc_Bui1;~Duy_Nguyen2;~Kim-Cuc_Nguyen1;~Man-Chung_Yue1;~Viet_Anh_Nguyen2", "aff": "Hanoi University of Science and Technology;VinAI Research;;;The Chinese University of Hong Kong", "aff_domain": "hust.edu.vn;vinai.io;;;cuhk.edu.hk", "position": "MS student;Research Resident;;;Assistant Professor", "bibtex": "@misc{\nbui2023covariancerobust,\ntitle={Covariance-Robust Minimax Probability Machines for Algorithmic Recourse},\nauthor={Ngoc Bui and Duy Nguyen and Kim-Cuc Nguyen and Man-Chung Yue and Viet Anh Nguyen},\nyear={2023},\nurl={https://openreview.net/forum?id=AO8F51yRk67}\n}", "github": "", "project": "", "reviewers": "14wF;NHFj;33C8;CMDf", "site": "https://openreview.net/forum?id=AO8F51yRk67", "pdf_size": 3011023, "recommendation": "3;3;8;8", "confidence": "4;4;3;2", "correctness": "2;3;4;4", "technical_novelty": "3;2;3;3", "empirical_novelty": "2;1;3;3", "wc_summary_paper": "155;51;106;75", "wc_strength_and_weaknesses": "455;84;350;221", "wc_clarity_quality_novelty_and_reproducibility": "143;19;77;161", "wc_summary_review": "45;28;37;78", "wc_review": "798;182;570;535", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1735;978;669;782", "reply_reviewers": "0;0;0;0", "reply_authors": "4;3;2;2", "recommendation_avg": [ 5.5, 2.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 96.75, 38.87399516386243 ], "wc_strength_and_weaknesses_avg": [ 277.5, 139.1015815869827 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 100.0, 56.25833271614081 ], "wc_summary_review_avg": [ 47.0, 18.881207588499205 ], "wc_review_avg": [ 521.25, 220.3671652038933 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1041.0, 415.6531005538152 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.9045340337332909, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YbLrtOZwkz8J:scholar.google.com/&scioq=Covariance-Robust+Minimax+Probability+Machines+for+Algorithmic+Recourse&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Hanoi University of Science and Technology;VinAI Research;Chinese University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.hust.edu.vn;https://www.vinai.io/;https://www.cuhk.edu.hk", "aff_unique_abbr": "HUST;VinAI;CUHK", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Hanoi;;Hong Kong SAR", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Vietnam;China" }, { "id": "AONW9iXn22", "title": "Neural Operator Variational Inference based on Regularized Stein Discrepancy for Deep Gaussian Processes", "track": "main", "status": "Reject", "tldr": "", "abstract": "A Deep Gaussian Process (DGP) model is a hierarchical composition of GP models that provides a deep Bayesian nonparametric approach to infer the posterior. Exact Bayesian inference is usually intractable for DGPs, motivating the use of various approximations. We theoretically demonstrate that the traditional alternative of mean-field Gaussian assumptions across the hierarchy leads to lack of expressiveness and efficacy of DGP models, whilst stochastic approximation often incurs a significant computational cost. To address this issue, we propose Neural Operator Variational Inference (NOVI) for Deep Gaussian Processes, where a sampler is obtained from a neural generator through minimizing Regularized Stein Discrepancy in L2 space between the approximate distribution and true posterior. Wherein a minimax problem is obtained and solved by Monte Carlo estimation and subsampling stochastic optimization. We experimentally demonstrate the effectiveness and efficiency of the proposed model, by applying it to a more flexible and wider class of posterior approximations on data ranging in size from hundreds to tens of thousands. By comparison, NOVI is superior to previous methods in both classification and regression.", "keywords": "Deep Gaussian processes;Operator variational inference;Stein discrepancy", "primary_area": "", "supplementary_material": "/attachment/93390f8ace254359db161e8b776550ce4ba5cb9a.zip", "author": "JIAN XU;Shian Du;Junmei Yang;Qianli Ma;Delu Zeng", "authorids": "~JIAN_XU5;~Shian_Du1;~Junmei_Yang1;~Qianli_Ma3;~Delu_Zeng4", "gender": "M;M;F;M;M", "homepage": ";;http://www2.scut.edu.cn/eemd/2013/1231/c4581a76497/page.htm;http://www2.scut.edu.cn/qianlima;", "dblp": "73/1149-21;317/1383;157/9330.html;57/8221-1;38/5665", "google_scholar": "https://scholar.google.com.hk/citations?user=DublkSoAAAAJ;SUgR5VAAAAAJ;;https://scholar.google.com/citations?hl=en;08RCdoIAAAAJ", "orcid": ";;;0000-0002-9356-2883;", "linkedin": ";;;;", "or_profile": "~JIAN_XU5;~Shian_Du1;~Junmei_Yang1;~Qianli_Ma3;~Delu_zeng1", "aff": "South China University of Technology;South China University of Technology;South China University of Technology;South China University of Technology;South China University of Technology", "aff_domain": "scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn", "position": "PhD student;Undergrad student;Associate Professor;Full Professor;Full Professor", "bibtex": "@misc{\nxu2023neural,\ntitle={Neural Operator Variational Inference based on Regularized Stein Discrepancy for Deep Gaussian Processes},\nauthor={JIAN XU and Shian Du and Junmei Yang and Qianli Ma and Delu Zeng},\nyear={2023},\nurl={https://openreview.net/forum?id=AONW9iXn22}\n}", "github": "", "project": "", "reviewers": "GwnC;vt6c;C7ZF;wts5", "site": "https://openreview.net/forum?id=AONW9iXn22", "pdf_size": 480286, "recommendation": "3;3;3;5", "confidence": "4;4;5;3", "correctness": "3;3;3;2", "technical_novelty": "1;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "217;107;77;79", "wc_strength_and_weaknesses": "177;695;159;326", "wc_clarity_quality_novelty_and_reproducibility": "730;154;36;35", "wc_summary_review": "209;77;47;89", "wc_review": "1333;1033;319;529", "wc_reply_reviewers": "0;0;0;29", "wc_reply_authors": "592;586;199;322", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 120.0, 57.245087125446844 ], "wc_strength_and_weaknesses_avg": [ 339.25, 215.3768499630357 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 238.75, 287.71980727784455 ], "wc_summary_review_avg": [ 105.5, 61.68265558485626 ], "wc_review_avg": [ 803.5, 400.9772437433326 ], "wc_reply_reviewers_avg": [ 7.25, 12.55736835487436 ], "wc_reply_authors_avg": [ 424.75, 169.92259267089824 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": -1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9401369323610323985&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "South China University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.scut.edu.cn", "aff_unique_abbr": "SCUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Interactive Portrait Harmonization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12122", "id": "AP0iZoaRaS", "poster": "/media/PosterPDFs/ICLR%202023/12122.png?t=1681077631.6179488", "openreview": "https://openreview.net/forum?id=AP0iZoaRaS", "slides": "https://iclr.cc/virtual/2023/poster/12122", "video": "https://iclr.cc/virtual/2023/poster/12122", "author_site": "Jeya Maria Jose Valanarasu, HE Zhang, Jianming Zhang, Yilin Wang, Zhe Lin, Jose Echevarria, Yinglan Ma, Zijun Wei, Kalyan Sunkavalli, Vishal Patel", "tldr": "A new flexible framework that allows users to pick certain regions of the background image and use it to guide the harmonization.", "abstract": "Current image harmonization methods consider the entire background as the guidance for harmonization. However, this may limit the capability for user to choose any specific object/person in the background to guide the harmonization. To enable flexible interaction between user and harmonization, we introduce interactive harmonization, a new setting where the harmonization is performed with respect to a selected region in the reference image instead of the entire background. A new flexible framework that allows users to pick certain regions of the background image and use it to guide the harmonization is proposed. Inspired by professional portrait harmonization users, we also introduce a new luminance matching loss to optimally match the color/luminance conditions between the composite foreground and select reference region. This framework provides more control to the image harmonization pipeline achieving visually pleasing portrait edits. Furthermore, we also introduce a new dataset carefully curated for validating portrait harmonization. Extensive experiments on both synthetic and real-world datasets show that the proposed approach is efficient and robust compared to previous harmonization baselines, especially for portraits.", "keywords": "harmonization;image editing;low-level vision", "primary_area": "", "supplementary_material": "/attachment/53ae55896b1e09c96b14b4e4f81e4539b1f3806c.zip", "author": "Jeya Maria Jose Valanarasu;HE Zhang;Jianming Zhang;Yilin Wang;Zhe Lin;Jose Echevarria;Yinglan Ma;Zijun Wei;Kalyan Sunkavalli;Vishal Patel", "authorids": "~Jeya_Maria_Jose_Valanarasu1;~HE_Zhang2;~Jianming_Zhang1;~Yilin_Wang4;~Zhe_Lin1;~Jose_Echevarria1;yingma@adobe.com;~Zijun_Wei2;~Kalyan_Sunkavalli1;~Vishal_Patel2", "gender": "M;M;M;M;M;M;;M;M;M", "homepage": "https://jeya-maria-jose.github.io/research/;https://scholar.google.com/citations?user=HZLiJt0AAAAJ&hl=en;https://jimmie33.github.io/;https://www.yilinwang.org;https://sites.google.com/site/zhelin625/;http://www.jiechevarria.com/;;;http://www.kalyans.org/;https://engineering.jhu.edu/vpatel36/", "dblp": "275/7027;24/2058-4;;47/3464-2.html;42/1680-1;03/9592;;157/3589;42/5978;76/6100", "google_scholar": "https://scholar.google.co.in/citations?user=vphpzPYAAAAJ;HZLiJt0AAAAJ;TkVHKDgAAAAJ;fYqdLx4AAAAJ;R0bnqaAAAAAJ;AUsvKdEAAAAJ;;8l3bFYYAAAAJ;j7uL6VEAAAAJ;AkEXTbIAAAAJ", "orcid": ";;0000-0002-9954-6294;;0000-0003-1154-9907;0000-0001-6802-0911;;;;", "linkedin": ";;;;;;;;;", "or_profile": "~Jeya_Maria_Jose_Valanarasu1;~HE_Zhang2;~Jianming_Zhang1;~Yilin_Wang4;~Zhe_Lin1;~Jose_Echevarria1;yingma@adobe.com;~Zijun_Wei2;~Kalyan_Sunkavalli1;~Vishal_Patel2", "aff": "Johns Hopkins University;Adobe Systems;Adobe Systems;Adobe Systems;Adobe Research;Adobe Research;;Adobe Systems;Adobe Research;Johns Hopkins University", "aff_domain": "jhu.edu;adobe.com;adobe.com;adobe.com;adobe.com;adobe.com;;adobe.com;adobe.com;jhu.edu", "position": "PhD student;Researcher;Research Scientist;research scientist;Principal Researcher;Senior Research Scientist;;Research Scientist;Principal Scientist;Assistant Professor", "bibtex": "@inproceedings{\nvalanarasu2023interactive,\ntitle={Interactive Portrait Harmonization},\nauthor={Jeya Maria Jose Valanarasu and HE Zhang and Jianming Zhang and Yilin Wang and Zhe Lin and Jose Echevarria and Yinglan Ma and Zijun Wei and Kalyan Sunkavalli and Vishal Patel},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=AP0iZoaRaS}\n}", "github": "", "project": "", "reviewers": "MxTM;DBoQ;53Gf;UNoj", "pdf_size": 39347769, "recommendation": "5;6;6;8", "confidence": "5;4;5;5", "correctness": "3;4;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "74;46;80;75", "wc_strength_and_weaknesses": "217;76;265;165", "wc_clarity_quality_novelty_and_reproducibility": "11;43;122;176", "wc_summary_review": "70;65;33;14", "wc_review": "372;230;500;430", "wc_reply_reviewers": "163;0;15;0", "wc_reply_authors": "949;389;728;392", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 68.75, 13.329947486768281 ], "wc_strength_and_weaknesses_avg": [ 180.75, 70.05845773352422 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 88.0, 64.91147818375421 ], "wc_summary_review_avg": [ 45.5, 23.070543990118654 ], "wc_review_avg": [ 383.0, 99.28242543370907 ], "wc_reply_reviewers_avg": [ 44.5, 68.68951885113187 ], "wc_reply_authors_avg": [ 614.5, 237.238803740029 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": 0.13245323570650439, "corr_recommendation_correctness": -0.13245323570650439, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5010572693356584456&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=AP0iZoaRaS", "email": "jhu.edu;adobe.com;adobe.com;adobe.com;adobe.com;adobe.com;;adobe.com;adobe.com;jhu.edu", "author_num": 10, "aff_unique_index": "0;1;1;1;1;1;1;1;0", "aff_unique_norm": "Johns Hopkins University;Adobe", "aff_unique_dep": ";Adobe Systems Incorporated", "aff_unique_url": "https://www.jhu.edu;https://www.adobe.com", "aff_unique_abbr": "JHU;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "APkMDZtY9HL", "title": "Interpretable Out-of-Distribution Detection using Pattern Identification", "track": "main", "status": "Reject", "tldr": "We apply pattern detection to Out-of-Distribution detection on an extensive benchmark. ", "abstract": "Out-of-distribution (OoD) detection for data-based programs is a goal of paramount importance. Common approaches in the literature tend to train binary classifiers requiring inside-of-distribution (IoD) and OoD validation samples, and/or implement confidence metrics that are often abstract and therefore difficult to interpret. In this work, we propose to use the PARTICUL pattern identification algorithm in order to build more interpretable and robust OoD detectors for visual classifiers. Crucially, this approach does not require retraining the classifier and is tuned directly to the IoD dataset, making it applicable to domains where OoD does not have a clear definition. Moreover, pattern identification allows us to provide images from the IoD dataset as reference points to better explain our confidence scores. We illustrate the generalization abilities of our approach through an extensive benchmark across four datasets and two definitions of OoD. Our experiments show that the robustness of all metrics under test does not solely depend on the nature of the IoD dataset or the OoD definition, but also on the architecture of the classifier, which stresses the need for thorough experimentations for future work in OoD detection.", "keywords": "out-of-distribution detection;pattern detection;interpretable artificial intelligence;confidence;metric", "primary_area": "", "supplementary_material": "/attachment/59ad6865e8adac1100c98fdf278598c4f93ba894.zip", "author": "Romain Xu-Darme;Julien Girard-Satabin;Darryl Hond;Gabriele Incorvaia;Zakaria Chihani", "authorids": "~Romain_Xu-Darme2;~Julien_Girard-Satabin1;~Darryl_Hond1;~Gabriele_Incorvaia1;~Zakaria_Chihani1", "gender": "M;M;;Not Specified;", "homepage": ";https://julien.girard-satabin.fr/;https://ieeexplore.ieee.org/author/37088761332;https://ieeexplore.ieee.org/author/37088224943;", "dblp": ";;;;130/9747.html", "google_scholar": ";;;;mgzCh30AAAAJ", "orcid": "0000-0002-8630-5635;0000-0001-6374-3694;;;", "linkedin": ";julien-girard-smartsystems/;;;zakaria-chihani/", "or_profile": "~Romain_Xu-Darme2;~Julien_Girard-Satabin1;~Darryl_Hond1;~Gabriele_Incorvaia1;~Zakaria_Chihani1", "aff": "CEA;CEA;Thales UK;Thales UK;", "aff_domain": "cea.fr;cea.fr;thalesgroup.com;thalesgroup.com;", "position": "PhD student;Researcher;Researcher;Researcher;", "bibtex": "@misc{\nxu-darme2023interpretable,\ntitle={Interpretable Out-of-Distribution Detection using Pattern Identification},\nauthor={Romain Xu-Darme and Julien Girard-Satabin and Darryl Hond and Gabriele Incorvaia and Zakaria Chihani},\nyear={2023},\nurl={https://openreview.net/forum?id=APkMDZtY9HL}\n}", "github": "", "project": "", "reviewers": "DuyP;CPyj;2viJ;w3oX", "site": "https://openreview.net/forum?id=APkMDZtY9HL", "pdf_size": 5136251, "recommendation": "3;3;3;3", "confidence": "3;4;4;4", "correctness": "3;3;2;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;0;2;2", "wc_summary_paper": "49;40;92;51", "wc_strength_and_weaknesses": "131;197;372;88", "wc_clarity_quality_novelty_and_reproducibility": "40;59;37;38", "wc_summary_review": "50;27;17;175", "wc_review": "270;323;518;352", "wc_reply_reviewers": "0;0;47;0", "wc_reply_authors": "269;406;571;100", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 58.0, 20.062402647738878 ], "wc_strength_and_weaknesses_avg": [ 197.0, 108.23816332514147 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.5, 9.013878188659973 ], "wc_summary_review_avg": [ 67.25, 63.34972375630378 ], "wc_review_avg": [ 365.75, 92.68865896106168 ], "wc_reply_reviewers_avg": [ 11.75, 20.351596988934308 ], "wc_reply_authors_avg": [ 336.5, 173.4279389256529 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Commissariat \u00e0 l'\u00c9nergie Atomique et aux \u00c9nergies Alternatives;Thales", "aff_unique_dep": ";", "aff_unique_url": "https://www cea fr;https://www.thalesgroup.com", "aff_unique_abbr": "CEA;Thales", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "France;United Kingdom" }, { "title": "Surgical Fine-Tuning Improves Adaptation to Distribution Shifts", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12230", "id": "APuPRxjHvZ", "poster": "", "openreview": "https://openreview.net/forum?id=APuPRxjHvZ", "slides": "https://iclr.cc/virtual/2023/poster/12230", "video": "https://iclr.cc/virtual/2023/poster/12230", "author_site": "Yoonho Lee, Annie Chen, Fahim Tajwar, Ananya Kumar, Huaxiu Yao, Percy Liang, Chelsea Finn", "tldr": "Selectively fine-tuning a subset of layers outperforms full fine-tuning when transferring to tasks with various distribution shifts.", "abstract": "A common approach to transfer learning under distribution shift is to fine-tune the last few layers of a pre-trained model, preserving learned features while also adapting to the new task. This paper shows that in such settings, selectively fine-tuning a subset of layers (which we term surgical fine-tuning) matches or outperforms commonly used fine-tuning approaches. Moreover, the type of distribution shift influences which subset is more effective to tune: for example, for image corruptions, fine-tuning only the first few layers works best. We validate our findings systematically across seven real-world data tasks spanning three types of distribution shifts. Theoretically, we prove that for two-layer neural networks in an idealized setting, first-layer tuning can outperform fine-tuning all layers. Intuitively, fine-tuning more parameters on a small target dataset can cause information learned during pre-training to be forgotten, and the relevant information depends on the type of shift.", "keywords": "Transfer learning;fine-tuning;parameter freezing;distortion of pre-trained models", "primary_area": "", "supplementary_material": "", "author": "Yoonho Lee;Annie S Chen;Fahim Tajwar;Ananya Kumar;Huaxiu Yao;Percy Liang;Chelsea Finn", "authorids": "~Yoonho_Lee1;~Annie_S_Chen1;~Fahim_Tajwar1;~Ananya_Kumar1;~Huaxiu_Yao1;~Percy_Liang1;~Chelsea_Finn1", "gender": "M;F;M;M;M;;F", "homepage": "https://yoonholee.com/;https://anniesch.github.io/;https://tajwarfahim.github.io/;https://ananyakumar.wordpress.com/;http://huaxiuyao.mystrikingly.com;https://cs.stanford.edu/~pliang/;https://ai.stanford.edu/~cbfinn/", "dblp": "213/8086;277/1527.html;292/1504;192/0474;197/1635;04/1701;131/1783", "google_scholar": "https://scholar.google.co.kr/citations?user=BAAZ_ysAAAAJ;;iMlmLO4AAAAJ;tP5IBFkAAAAJ;A20BZnQAAAAJ;pouyVyUAAAAJ;vfPE6hgAAAAJ", "orcid": ";;0000-0001-9257-6282;;;;", "linkedin": ";annie-s-chen/;fahim-tajwar-8a5377162/;;huaxiuyao/;;", "or_profile": "~Yoonho_Lee1;~Annie_S_Chen1;~Fahim_Tajwar1;~Ananya_Kumar1;~Huaxiu_Yao1;~Percy_Liang1;~Chelsea_Finn1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University;Computer Science Department, Stanford University;Stanford University;Google", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;cs.stanford.edu;stanford.edu;google.com", "position": "PhD student;PhD student;MS student;PhD student;Postdoc;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\nlee2023surgical,\ntitle={Surgical Fine-Tuning Improves Adaptation to Distribution Shifts},\nauthor={Yoonho Lee and Annie S Chen and Fahim Tajwar and Ananya Kumar and Huaxiu Yao and Percy Liang and Chelsea Finn},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=APuPRxjHvZ}\n}", "github": "", "project": "", "reviewers": "eWpB;Y4ag;QAba", "pdf_size": 1900353, "recommendation": "6;8;8", "confidence": "4;3;3", "correctness": "4;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "104;56;113", "wc_strength_and_weaknesses": "468;64;265", "wc_clarity_quality_novelty_and_reproducibility": "36;52;65", "wc_summary_review": "55;36;81", "wc_review": "663;208;524", "wc_reply_reviewers": "28;64;0", "wc_reply_authors": "857;459;593", "reply_reviewers": "1;2;0", "reply_authors": "2;2;2", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 91.0, 25.019992006393608 ], "wc_strength_and_weaknesses_avg": [ 265.6666666666667, 164.93298302307178 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.0, 11.86029791643813 ], "wc_summary_review_avg": [ 57.333333333333336, 18.445113776342563 ], "wc_review_avg": [ 465.0, 190.38032111189082 ], "wc_reply_reviewers_avg": [ 30.666666666666668, 26.195843605851334 ], "wc_reply_authors_avg": [ 636.3333333333334, 165.3467736472519 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.9999999999999998, "corr_recommendation_correctness": -0.9999999999999998, "gs_citation": 229, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4644856885286773964&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=APuPRxjHvZ", "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;cs.stanford.edu;stanford.edu;google.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;1", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "Stanford;Google", "aff_campus_unique_index": "0;0;0;0;0;0;1", "aff_campus_unique": "Stanford;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "AR4rOT4sECN", "title": "Offline RL of the Underlying MDP from Heterogeneous Data Sources", "track": "main", "status": "Reject", "tldr": "This work investigated the problem of learning an underlying MDP with offline datasets from heterogeneous sources and proposed several provably efficient designs.", "abstract": "Most of the existing offline reinforcement learning (RL) studies assume the available dataset is sampled directly from the target environment. However, in some practical applications, the available data are often coming from several related but heterogeneous environments. A theoretical understanding of efficient learning from heterogeneous offline datasets remains lacking. In this work, we study the problem of learning a (hidden) underlying Markov decision process (MDP) based on heterogeneous offline datasets collected from multiple randomly perturbed data sources. A novel HetPEVI algorithm is proposed, which jointly considers two types of uncertainties: sample uncertainties from the finite number of data samples per data source, and source uncertainties due to a finite number of data sources. Building on HetPEVI, we further incorporate reference-advantage decompositions and Bernstein-type penalties to propose the HetPEVI-Adv algorithm. Theoretical analysis not only proves the effectiveness of both HetPEVI and HetPEVI-Adv but also demonstrates the advantage of the latter. More importantly, the results explicitly characterize the learning loss due to the finite heterogeneously realized environments compared with sampling directly from the underlying MDP. Finally, we extend the study to MDPs with linear function approximation and propose the HetPEVI-Lin algorithm that provides additional efficiency guarantees beyond the tabular case.", "keywords": "RL Theory;Offline RL;Underlying MDP;Heterogeneous Data Sources;Provable Efficiency", "primary_area": "", "supplementary_material": "", "author": "Chengshuai Shi;Wei Xiong;Cong Shen;Jing Yang", "authorids": "~Chengshuai_Shi1;~Wei_Xiong9;~Cong_Shen1;~Jing_Yang3", "gender": "M;M;M;", "homepage": "https://chengshuai-shi.github.io/;https://weixiongust.github.io/WeiXiongUST/index.html;https://cshen317.github.io/;http://www.ee.psu.edu/yang", "dblp": "259/3938;33/4054-15;79/6027-1.html;", "google_scholar": "twvDiW8AAAAJ;m2-OwQEAAAAJ;70LBhKcAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-2727-8251;;0000-0002-3148-4453;", "linkedin": ";;cong-shen-3372404/;", "or_profile": "~Chengshuai_Shi1;~Wei_Xiong9;~Cong_Shen1;~Jing_Yang3", "aff": "University of Virginia;Hong Kong University of Science and Technology;University of Virginia;Pennsylvania State University", "aff_domain": "virginia.edu;ust.hk;virginia.edu;psu.edu", "position": "PhD student;MS student;Assistant Professor;Associate Professor", "bibtex": "@misc{\nshi2023offline,\ntitle={Offline {RL} of the Underlying {MDP} from Heterogeneous Data Sources},\nauthor={Chengshuai Shi and Wei Xiong and Cong Shen and Jing Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=AR4rOT4sECN}\n}", "github": "", "project": "", "reviewers": "HaHm;LDfB;hDmb;cUra", "site": "https://openreview.net/forum?id=AR4rOT4sECN", "pdf_size": 386078, "recommendation": "3;5;5;6", "confidence": "4;4;3;3", "correctness": "2;4;3;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "0;0;0;0", "wc_summary_paper": "48;113;65;139", "wc_strength_and_weaknesses": "160;234;431;221", "wc_clarity_quality_novelty_and_reproducibility": "58;22;12;1", "wc_summary_review": "20;24;28;54", "wc_review": "286;393;536;415", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "890;603;1309;559", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;2;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 91.25, 36.44430682562093 ], "wc_strength_and_weaknesses_avg": [ 261.5, 101.77057531526488 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 23.25, 21.39363223017541 ], "wc_summary_review_avg": [ 31.5, 13.294735800308331 ], "wc_review_avg": [ 407.5, 88.79893017373577 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 840.25, 298.99446065102944 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.6882472016116854, "corr_recommendation_correctness": 0.899228803025897, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:sP9LjFPyaakJ:scholar.google.com/&scioq=Offline+RL+of+the+Underlying+MDP+from+Heterogeneous+Data+Sources&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Virginia;Hong Kong University of Science and Technology;Pennsylvania State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.virginia.edu;https://www.ust.hk;https://www.psu.edu", "aff_unique_abbr": "UVA;HKUST;PSU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Enhancing the Inductive Biases of Graph Neural ODE for Modeling Physical Systems", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11168", "id": "ATLEl_izD87", "poster": "/media/PosterPDFs/ICLR%202023/11168.png?t=1681123500.2170877", "openreview": "https://openreview.net/forum?id=ATLEl_izD87", "slides": "https://iclr.cc/virtual/2023/poster/11168", "video": "https://iclr.cc/virtual/2023/poster/11168", "author_site": "Suresh Bishnoi, Ravinder Bhattoo, Jayadeva Jayadeva, Sayan Ranu, N. M. Anoop Krishnan", "tldr": "Inferring the dynamics of physical systems can be significantly enhanced by Graph neural ODEs with appropriate inductive biases", "abstract": "Neural networks with physics-based inductive biases such as Lagrangian neural networks (LNNs), and Hamiltonian neural networks (HNNs) learn the dynamics of physical systems by encoding strong inductive biases. Alternatively, Neural ODEs with appropriate inductive biases have also been shown to give similar performances. However, these models, when applied to particle-based systems, are transductive in nature and hence, do not generalize to large system sizes. In this paper, we present a graph-based neural ODE, GNODE, to learn the time evolution of dynamical systems. Further, we carefully analyze the role of different inductive biases on the performance of GNODE. We show that similar to LNN and HNN, encoding the constraints explicitly can significantly improve the training efficiency and performance of GNODE significantly. Our experiments also assess the value of additional inductive biases, such as Newton\u2019s third law, on the final performance of the model. We demonstrate that inducing these biases can enhance the performance of the model by orders of magnitude in terms of both energy violation and rollout error. Interestingly, we observe that the GNODE trained with the most effective inductive biases, namely MCGNODE, outperforms the graph versions of LNN and HNN, namely, Lagrangian graph networks (LGN) and Hamiltonian graph networks (HGN) in terms of energy violation error by \u223c4 orders of magnitude for a pendulum system, and \u223c2 orders of magnitude for spring systems. These results suggest that NODE-based systems can give competitive performances with energy-conserving neural networks by employing appropriate inductive biases.", "keywords": "Neural ODE;Graph neural network;physical systems;Graph Neural ODE", "primary_area": "", "supplementary_material": "", "author": "Suresh Bishnoi;Ravinder Bhattoo;Jayadeva Jayadeva;Sayan Ranu;N M Anoop Krishnan", "authorids": "~Suresh_Bishnoi1;~Ravinder_Bhattoo1;~Jayadeva_Jayadeva1;~Sayan_Ranu2;~N_M_Anoop_Krishnan1", "gender": "M;M;M;M;M", "homepage": "https://web.iitd.ac.in/~srz208500/;https://ravinderbhattoo.github.io;;https://www.cse.iitd.ac.in/~sayan/index.html;", "dblp": "329/6194;;58/4288;38/768;", "google_scholar": "Wy6q2QwAAAAJ;lPTdGRMAAAAJ;;K4w5qYUAAAAJ;https://scholar.google.co.in/citations?user=fGnjHcEAAAAJ", "orcid": ";0000-0003-0323-9108;;0000-0003-4147-9372;0000-0003-1500-4947", "linkedin": "sureshb1999/;;;;", "or_profile": "~Suresh_Bishnoi1;~Ravinder_Bhattoo1;~Jayadeva_Jayadeva1;~Sayan_Ranu2;~N_M_Anoop_Krishnan1", "aff": "Indian Institute of Technology Delhi;Indian Institute of Technology, Indore;Indian Institute of Technology Delhi;Indian Institute of Technology Delhi;Indian Institute of Technology Delhi", "aff_domain": "iitd.ac.in;iiti.ac.in;iitd.ac.in;iitd.ac.in;iitd.ac.in", "position": "PhD student;Assistant Professor;Full Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nbishnoi2023enhancing,\ntitle={Enhancing the Inductive Biases of Graph Neural {ODE} for Modeling Physical Systems},\nauthor={Suresh Bishnoi and Ravinder Bhattoo and Jayadeva Jayadeva and Sayan Ranu and N M Anoop Krishnan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=ATLEl_izD87}\n}", "github": "", "project": "", "reviewers": "okft;gQW5;zGuL;UJsG", "pdf_size": 8327177, "recommendation": "6;6;8;8", "confidence": "3;3;3;4", "correctness": "3;3;4;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "76;59;191;71", "wc_strength_and_weaknesses": "241;202;85;530", "wc_clarity_quality_novelty_and_reproducibility": "32;56;29;3", "wc_summary_review": "27;50;224;20", "wc_review": "376;367;529;624", "wc_reply_reviewers": "28;46;23;142", "wc_reply_authors": "1278;2113;840;2380", "reply_reviewers": "1;2;1;2", "reply_authors": "6;7;3;6", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 99.25, 53.33092442476504 ], "wc_strength_and_weaknesses_avg": [ 264.5, 163.68338339611628 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.0, 18.774983355518586 ], "wc_summary_review_avg": [ 80.25, 83.73283406167499 ], "wc_review_avg": [ 474.0, 107.90968445881028 ], "wc_reply_reviewers_avg": [ 59.75, 48.25129531940049 ], "wc_reply_authors_avg": [ 1652.75, 620.8306431709053 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 5.5, 1.5 ], "replies_avg": [ 39, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5919353151171804022&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=ATLEl_izD87", "email": "iitd.ac.in;iiti.ac.in;iitd.ac.in;iitd.ac.in;iitd.ac.in", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Indian Institute of Technology Delhi;Indian Institute of Technology Indore", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitd.ac.in;https://www.iiti.ac.in", "aff_unique_abbr": "IIT Delhi;IIT Indore", "aff_campus_unique_index": "0;1;0;0;0", "aff_campus_unique": "Delhi;Indore", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "India" }, { "id": "ATWW-bUtxH", "title": "FEW-SHOT NODE PROMPT TUNING", "track": "main", "status": "Withdraw", "tldr": "In this paper, we propose Few-shot Node Prompt Tuning as a effective method to tackle general few-shot node classification tasks.", "abstract": "Despite the powerful representation ability of GNNs, recent works have demonstrated that the performance of GNNs can severely degrade when the number of labeled nodes is limited in training data. \\textit{Few-shot Node Classification} is one of the problems with an extreme shortage of node labels and has drawn growing attention lately. The current modus operandi, i.e., meta-learning, has succeeded in transferring the structural knowledge learned from \\textit{base classes} with abundant labeled nodes to few-shot \\textit{novel classes}. However, for real-world scenarios, it is often the case that all the classes on the graph have limited labeled nodes, thus meta-learning cannot be directly deployed. In this work, we generalize the few-shot node classification by removing the assumption that there exist abundant labeled nodes for the base classes. In the meantime, we propose a novel \\textit{Few-shot Node Prompt Tuning} method to effectively elicit substantial prior knowledge in the input graph for solving few-shot node classification tasks without labeled base classes. Specifically, we fix a pretrained graph transformer as the encoder and inject virtual nodes as soft prompts in the embedding space to bridge the gap of training objectives between the pretexts and downstream few-shot node classification tasks. Such prompts are small tensors and can be efficiently optimized with a simple classifier corresponding to the few labeled nodes. Since a single pretrained encoder is shared across different tasks, the proposed method retains the efficiency and potential for the model ensemble. Extensive experiments on four prevalent node classification datasets show that the proposed method, FS-NPT, is an efficient and effective way to tackle the general few-shot node classification problem. Our implementation is released\\footnote{\\url{https://github.com/Anonymous-submit-23/FS-NPT.git}}.", "keywords": "node classification;few-shot learning;graph neural networks", "primary_area": "", "supplementary_material": "", "author": "Zhen Tan;Ruocheng Guo;Kaize Ding;Aditya Rajeshbhai Boghara;huan liu", "authorids": "~Zhen_Tan2;~Ruocheng_Guo1;~Kaize_Ding1;~Aditya_Rajeshbhai_Boghara1;~huan_liu1", "gender": "M;M;M;M;", "homepage": "https://zhen-tan-dmml.github.io/;https://rguo12.github.io;https://kaize0409.github.io/;;", "dblp": "13/10345-1.html;167/4378;234/6878;;", "google_scholar": "yMV7JtIAAAAJ;8Nuj8NwAAAAJ;PI3myr8AAAAJ;;", "orcid": "0009-0006-9548-2330;;;;", "linkedin": ";;;www.linkedin.com/in/adityaboghara;", "or_profile": "~Zhen_Tan2;~Ruocheng_Guo1;~Kaize_Ding1;~Aditya_Rajeshbhai_Boghara1;~huan_liu1", "aff": "Arizona State University;Bytedance Research;Arizona State University;Arizona State University;", "aff_domain": "asu.edu;bytedance.com;asu.edu;asu.edu;", "position": "PhD student;Researcher;PhD student;Undergrad student;", "bibtex": "@misc{\ntan2023fewshot,\ntitle={{FEW}-{SHOT} {NODE} {PROMPT} {TUNING}},\nauthor={Zhen Tan and Ruocheng Guo and Kaize Ding and Aditya Rajeshbhai Boghara and huan liu},\nyear={2023},\nurl={https://openreview.net/forum?id=ATWW-bUtxH}\n}", "github": "", "project": "", "reviewers": "ErSP;fQ2F;VdsZ", "site": "https://openreview.net/forum?id=ATWW-bUtxH", "pdf_size": 575362, "recommendation": "3;5;5", "confidence": "4;3;4", "correctness": "4;4;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "53;50;55", "wc_strength_and_weaknesses": "495;168;118", "wc_clarity_quality_novelty_and_reproducibility": "50;36;6", "wc_summary_review": "32;55;15", "wc_review": "630;309;194", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 52.666666666666664, 2.0548046676563256 ], "wc_strength_and_weaknesses_avg": [ 260.3333333333333, 167.1851933901112 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.666666666666668, 18.354533197248273 ], "wc_summary_review_avg": [ 34.0, 16.391054470858997 ], "wc_review_avg": [ 377.6666666666667, 184.49992472145408 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": -0.49999999999999983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cRxQnIytSz0J:scholar.google.com/&scioq=FEW-SHOT+NODE+PROMPT+TUNING&hl=en&as_sdt=0,11", "gs_version_total": 0, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Arizona State University;ByteDance", "aff_unique_dep": ";Bytedance Research", "aff_unique_url": "https://www.asu.edu;https://www.bytedance.com", "aff_unique_abbr": "ASU;Bytedance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "id": "AV_bv4Ydcr9", "title": "Attention Enables Zero Approximation Error", "track": "main", "status": "Reject", "tldr": "", "abstract": "Attention-based architectures become the core backbone of many state-of-the-art models for various tasks, including language translation and image classification. However, theoretical properties of attention-based models are seldom considered. In this work, we show that with suitable adaptations, the single-head self-attention transformer with a fixed number of transformer encoder blocks and free parameters is able to generate any desired polynomial of the input with no error. The number of transformer encoder blocks is the same as the degree of the target polynomial. Even more exciting, we find that these transformer encoder blocks in this model do not need to be trained. As a direct consequence, we show that the single-head self-attention transformer with increasing numbers of free parameters is universal. Also, we show that our proposed model can avoid the classical trade-off between approximation error and sample error in the mean squared error analysis for the regression task if the target function is a polynomial. We conduct various experiments and ablation studies to verify our theoretical results.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/c9eba253d4861eb375d3947dc553c89cf679b756.zip", "author": "Zhiying Fang;Yidong Ouyang;Ding-Xuan Zhou;Guang Cheng", "authorids": "~Zhiying_Fang1;~Yidong_Ouyang1;~Ding-Xuan_Zhou1;~Guang_Cheng1", "gender": "M;M;M;M", "homepage": ";https://yidongouyang.github.io/;https://www.cityu.edu.hk/rcms/DXZhou.htm;http://www.stat.ucla.edu/~guangcheng/", "dblp": "255/0335;270/0351;;99/4812", "google_scholar": ";fQwCFK0AAAAJ;k-V5V0gAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Zhiying_Fang1;~Yidong_Ouyang1;~Ding-Xuan_Zhou1;~Guang_Cheng1", "aff": "Shenzhen Polytechnic;;City University of Hong Kong;University of California, Los Angeles", "aff_domain": "szpt.edu.cn;;cityu.edu.hk;ucla.edu", "position": "Lecturer;;Chair Professor;Full Professor", "bibtex": "@misc{\nfang2023attention,\ntitle={Attention Enables Zero Approximation Error},\nauthor={Zhiying Fang and Yidong Ouyang and Ding-Xuan Zhou and Guang Cheng},\nyear={2023},\nurl={https://openreview.net/forum?id=AV_bv4Ydcr9}\n}", "github": "", "project": "", "reviewers": "sof3;HdBP;8KLD;4Zo8;H8Qq", "site": "https://openreview.net/forum?id=AV_bv4Ydcr9", "pdf_size": 807853, "recommendation": "3;5;5;5;6", "confidence": "4;3;5;4;4", "correctness": "2;3;3;3;3", "technical_novelty": "2;4;3;3;3", "empirical_novelty": "1;2;3;0;3", "wc_summary_paper": "79;73;26;52;71", "wc_strength_and_weaknesses": "345;85;149;87;123", "wc_clarity_quality_novelty_and_reproducibility": "190;35;40;199;8", "wc_summary_review": "76;50;20;32;22", "wc_review": "690;243;235;370;224", "wc_reply_reviewers": "172;0;0;0;0", "wc_reply_authors": "269;374;292;507;236", "reply_reviewers": "1;0;0;0;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 4.8, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "correctness_avg": [ 2.8, 0.39999999999999997 ], "technical_novelty_avg": [ 3.0, 0.6324555320336759 ], "empirical_novelty_avg": [ 1.8, 1.1661903789690602 ], "wc_summary_paper_avg": [ 60.2, 19.343215865000317 ], "wc_strength_and_weaknesses_avg": [ 157.8, 96.58654150553275 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 94.4, 82.50236360250535 ], "wc_summary_review_avg": [ 40.0, 20.89976076418101 ], "wc_review_avg": [ 352.4, 176.93004267223813 ], "wc_reply_reviewers_avg": [ 34.4, 68.8 ], "wc_reply_authors_avg": [ 335.6, 97.06616300235629 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9185586535436918, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9905746641718256594&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Shenzhen Polytechnic;City University of Hong Kong;University of California, Los Angeles", "aff_unique_dep": ";;", "aff_unique_url": "http://www.szpt.edu.cn;https://www.cityu.edu.hk;https://www.ucla.edu", "aff_unique_abbr": ";CityU;UCLA", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Hong Kong SAR;Los Angeles", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "id": "AW0i0lOhzqJ", "title": "First-order Context-based Adaptation for Generalizing to New Dynamical Systems", "track": "main", "status": "Reject", "tldr": "We propose FOCA, a learning framework to model sets of systems governed by common but unknown laws that differentiate themselves by their context and train FOCA with a simple and efficient EMA-based method.", "abstract": "In this paper, we propose FOCA (First-Order Context-based Adaptation), a learning framework to model sets of systems governed by common but unknown laws that differentiate themselves by their context. Inspired by classical modeling-and-identification approaches, FOCA learns to represent the common law through shared parameters and relies on online optimization to compute system-specific context. Due to the online optimization-based context inference, the training of FOCA involves a bi-level optimization problem. To train FOCA efficiently, we utilize an exponential moving average (EMA)-based method that allows for fast training using only first-order derivatives. We test FOCA on polynomial regression and time-series prediction tasks composed of three ODEs and one PDE, empirically finding it outperforms baselines.", "keywords": "physical system modeling;differential equation;generalization;context;adaptation", "primary_area": "", "supplementary_material": "/attachment/b31bcc2d748af839e9b105be70a39930755bc361.zip", "author": "Junyoung Park;Federico Berto;Arec Jamgochian;Mykel Kochenderfer;Jinkyoo Park", "authorids": "~Junyoung_Park1;~Federico_Berto1;~Arec_Jamgochian1;~Mykel_Kochenderfer1;~Jinkyoo_Park1", "gender": ";M;;M;M", "homepage": ";https://fedebotu.github.io/;;https://mykel.kochenderfer.com;http://silab.kaist.ac.kr/", "dblp": ";317/1711;;34/2029.html;156/7535", "google_scholar": ";https://scholar.google.com/citations?hl=en;;cAy9G6oAAAAJ;sH2a0nkAAAAJ", "orcid": ";0000-0002-7438-8365;;0000-0002-7238-9663;0000-0003-2620-1479", "linkedin": ";federicoberto/;;mykel-kochenderfer;", "or_profile": "~Junyoung_Park1;~Federico_Berto1;~Arec_Jamgochian1;~Mykel_Kochenderfer1;~Jinkyoo_Park1", "aff": ";Korea Advanced Institute of Science & Technology;;Stanford University;Korea Advanced Institute of Science & Technology", "aff_domain": ";kaist.ac.kr;;stanford.edu;kaist.ac.kr", "position": ";PhD student;;Associate Professor;Associate Professor", "bibtex": "@misc{\npark2023firstorder,\ntitle={First-order Context-based Adaptation for Generalizing to New Dynamical Systems},\nauthor={Junyoung Park and Federico Berto and Arec Jamgochian and Mykel Kochenderfer and Jinkyoo Park},\nyear={2023},\nurl={https://openreview.net/forum?id=AW0i0lOhzqJ}\n}", "github": "", "project": "", "reviewers": "dXrp;t6gz;fp24", "site": "https://openreview.net/forum?id=AW0i0lOhzqJ", "pdf_size": 1805969, "recommendation": "5;5;8", "confidence": "4;3;4", "correctness": "3;2;4", "technical_novelty": "2;3;4", "empirical_novelty": "0;3;3", "wc_summary_paper": "42;158;72", "wc_strength_and_weaknesses": "551;454;113", "wc_clarity_quality_novelty_and_reproducibility": "10;163;5", "wc_summary_review": "21;113;13", "wc_review": "624;888;203", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "765;1876;141", "reply_reviewers": "0;0;0", "reply_authors": "2;4;1", "recommendation_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 90.66666666666667, 49.16186417223099 ], "wc_strength_and_weaknesses_avg": [ 372.6666666666667, 187.83385093096388 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.333333333333336, 73.33181816616546 ], "wc_summary_review_avg": [ 49.0, 45.37253207246281 ], "wc_review_avg": [ 571.6666666666666, 282.08785077623526 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 927.3333333333334, 717.5515467353006 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.8660254037844387, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Oar6q3RU_DcJ:scholar.google.com/&scioq=First-order+Context-based+Adaptation+for+Generalizing+to+New+Dynamical+Systems&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.stanford.edu", "aff_unique_abbr": "KAIST;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "Powderworld: A Platform for Understanding Generalization via Rich Task Distributions", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11470", "id": "AWZgXGmsbA", "poster": "", "openreview": "https://openreview.net/forum?id=AWZgXGmsbA", "slides": "https://iclr.cc/virtual/2023/poster/11470", "video": "https://iclr.cc/virtual/2023/poster/11470", "author_site": "Kevin Frans, Phillip Isola", "tldr": "Powderworld is an environment supporting the study of generalization by providing diverse tasks arising from the same core rules.", "abstract": "One of the grand challenges of reinforcement learning is the ability to generalize to new tasks. However, general agents require a set of rich, diverse tasks to train on. Designing a `foundation environment' for such tasks is tricky -- the ideal environment would support a range of emergent phenomena, an expressive task space, and fast runtime. To take a step towards addressing this research bottleneck, this work presents Powderworld, a lightweight yet expressive simulation environment running directly on the GPU. Within Powderworld, two motivating task distributions are presented, one for world-modelling and one for reinforcement learning. Each contains hand-designed test tasks to examine generalization. Experiments indicate that increasing the environment's complexity improves generalization for world models, yet causes reinforcement learning agents to struggle. Powderworld aims to support the study of generalization by providing a source of diverse tasks arising from the same core rules.", "keywords": "reinforcement learning;environment;generalization;out-of-distribution;multi-task", "primary_area": "", "supplementary_material": "/attachment/910a9af121317edc7a43ab973d613003673e2fda.zip", "author": "Kevin Frans;Phillip Isola", "authorids": "~Kevin_Frans1;~Phillip_Isola1", "gender": "M;M", "homepage": "http://kvfrans.com;http://web.mit.edu/phillipi/", "dblp": "199/2314;36/9988", "google_scholar": "NQ2ZWBoAAAAJ;ROILf3EAAAAJ", "orcid": ";0000-0002-1411-6704", "linkedin": ";phillip-isola-a9955b20/", "or_profile": "~Kevin_Frans1;~Phillip_Isola1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu", "position": "Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nfrans2023powderworld,\ntitle={Powderworld: A Platform for Understanding Generalization via Rich Task Distributions},\nauthor={Kevin Frans and Phillip Isola},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=AWZgXGmsbA}\n}", "github": "", "project": "", "reviewers": "oDrU;fLgh;EMHS;pDhR", "pdf_size": 10684288, "recommendation": "8;8;8;8", "confidence": "4;4;4;4", "correctness": "3;4;3;3", "technical_novelty": "2;4;3;2", "empirical_novelty": "2;4;3;3", "wc_summary_paper": "42;28;46;115", "wc_strength_and_weaknesses": "327;396;286;213", "wc_clarity_quality_novelty_and_reproducibility": "23;30;70;30", "wc_summary_review": "109;23;112;159", "wc_review": "501;477;514;517", "wc_reply_reviewers": "110;25;62;73", "wc_reply_authors": "898;775;715;617", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 57.75, 33.72221078161988 ], "wc_strength_and_weaknesses_avg": [ 305.5, 66.31176366226433 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.25, 18.552290963651902 ], "wc_summary_review_avg": [ 100.75, 49.07328703072579 ], "wc_review_avg": [ 502.25, 15.769828787910159 ], "wc_reply_reviewers_avg": [ 67.5, 30.30264014900352 ], "wc_reply_authors_avg": [ 751.25, 101.7800938297858 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14243782061275744099&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=AWZgXGmsbA", "email": "mit.edu;mit.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "AXP2Sf6qqSZ", "title": "Counterfactual Explanation via Search in Gaussian Mixture Distributed Latent Space", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Counterfactual Explanations (CEs) are an important tool in Algorithmic Recourse for addressing two questions: 1. What are the crucial factors that led to an automated prediction/decision? 2. How can these factors be changed to achieve a more favorable outcome from a user's perspective? Thus, guiding the user's interaction with AI systems by proposing easy-to-understand explanations and easy-to-attain actionable changes is essential for the trustworthy adoption and long-term acceptance of AI systems. In the literature, various methods have been proposed to generate CEs, and different quality measures have been suggested to evaluate these methods. However, the generation of CEs is usually computationally expensive, and the resulting suggestions are unrealistic and thus non-actionable. In this paper, we introduce a new method to generate CEs for a pre-trained binary classifier by first shaping the latent space of an autoencoder to be a mixture of Gaussian distributions. CEs are then generated in latent space by linear interpolation between the query sample and the centroid of the target class. We show that our method maintains the characteristics of the input sample during the counterfactual search. In various experiments, we show that the proposed method is competitive based on different quality measures on image and tabular datasets -- efficiently returns results that are closer to the original data manifold compared to three state-of-the-art methods, which are essential for realistic high-dimensional machine learning applications.", "keywords": "XAI;Counterfactual Explanation;Autoencoder;Guassian-Mixture Distribution;Disentanglement", "primary_area": "", "supplementary_material": "/attachment/cd4585eed927f14437752c3b7e80e9cb743f01f4.zip", "author": "Xuan Zhao;Klaus Broelemann;Gjergji Kasneci", "authorids": "~Xuan_Zhao5;~Klaus_Broelemann1;~Gjergji_Kasneci2", "gender": "F;;M", "homepage": "https://uni-tuebingen.de/jp/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/data-science-analytics/team/xuan-zhao/;;https://www.gov.sot.tum.de/rds/prof-dr-gjergji-kasneci/", "dblp": ";00/7271.html;69/3216", "google_scholar": ";;Zbc8GK4AAAAJ", "orcid": ";;0000-0002-3123-7268", "linkedin": ";;", "or_profile": "~Xuan_Zhao5;~Klaus_Broelemann1;~Gjergji_Kasneci2", "aff": "SCHUFA Holding AG;SCHUFA;University of Tuebingen", "aff_domain": "schufa.de;schufa.de;uni-tuebingen.de", "position": "PhD student;Principal Researcher;Professor", "bibtex": "@misc{\nzhao2023counterfactual,\ntitle={Counterfactual Explanation via Search in Gaussian Mixture Distributed Latent Space},\nauthor={Xuan Zhao and Klaus Broelemann and Gjergji Kasneci},\nyear={2023},\nurl={https://openreview.net/forum?id=AXP2Sf6qqSZ}\n}", "github": "", "project": "", "reviewers": "nTFN;Y5Zt;DRPb;PkaT", "site": "https://openreview.net/forum?id=AXP2Sf6qqSZ", "pdf_size": 562437, "recommendation": "3;3;3;5", "confidence": "5;3;4;5", "correctness": "3;2;2;3", "technical_novelty": "1;2;2;2", "empirical_novelty": "2;1;2;2", "wc_summary_paper": "85;91;42;30", "wc_strength_and_weaknesses": "140;158;149;83", "wc_clarity_quality_novelty_and_reproducibility": "438;213;35;278", "wc_summary_review": "131;159;18;72", "wc_review": "794;621;244;463", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 62.0, 26.42915057280502 ], "wc_strength_and_weaknesses_avg": [ 132.5, 29.278831943914703 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 241.0, 144.3935594131539 ], "wc_summary_review_avg": [ 95.0, 54.4288526426931 ], "wc_review_avg": [ 530.5, 202.64562664908414 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5222329678670935, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16880794727557555540&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "SCHUFA Holding AG;University of Tuebingen", "aff_unique_dep": ";", "aff_unique_url": "https://www.schufa.de;https://www.uni-tuebingen.de/", "aff_unique_abbr": "SCHUFA;Uni T\u00fcbingen", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "AYvLkPnDguL", "title": "The power of choices in decision tree learning", "track": "main", "status": "Reject", "tldr": "We propose a simple generalization of greedy decision tree learning algorithms which parameterizes the greediness in these algorithms by a parameter $k$, and validate the effectiveness of having this parameter, both theoretically and empirically.", "abstract": "We propose a simple and natural generalization of standard and empirically successful decision tree learning algorithms such as ID3, C4.5, and CART. These classic algorithms, which have been central to machine learning for decades, are greedy in nature: they grow a decision tree by iteratively splitting on the \"best\" attribute. We augment these algorithms with an additional greediness parameter $k$ and our resulting algorithm, Top-$k$, considers the $k$ best attributes as possible splits instead of just the single best attribute.\n\nWe demonstrate, theoretically and empirically, the power of this simple generalization. We first prove a sharp greediness hierarchy theorem showing that for every $k\\in \\mathbb{N}$, Top-$(k+1)$ can be much more powerful than Top-$k$: there are data distributions for which the former achieves accuracy $1-\\epsilon$, whereas the latter only achieves accuracy $\\frac{1}{2}+\\epsilon$. We then show, through extensive experiments, that Top-$k$ compares favorably with the two main approaches to decision tree learning: classic greedy algorithms and more recent \"optimal decision tree\" algorithms. On one hand, Top-$k$ consistently enjoys significant accuracy gains over the greedy algorithms across a wide range of benchmarks, at the cost of only a mild training slowdown. On the other hand, Top-$k$ is markedly more scalable than optimal decision tree algorithms, and is able to handle dataset and feature set sizes that remain beyond the reach of these algorithms. \n\nTaken together, our results highlight the potential practical impact of the power of choices in decision tree learning.", "keywords": "Decision Trees;Decision Tree Learning;Top-k;ID3;Greedy Algorithms", "primary_area": "", "supplementary_material": "", "author": "Guy Blanc;Jane Lange;Chirag Pabbaraju;Li-Yang Tan", "authorids": "~Guy_Blanc1;~Jane_Lange1;~Chirag_Pabbaraju1;~Li-Yang_Tan2", "gender": "M;Not Specified;M;", "homepage": ";;https://web.stanford.edu/~cpabbara/;", "dblp": "211/7035;254/1613.html;231/7619;", "google_scholar": "XDJL3bwAAAAJ;;IAGcpHkAAAAJ;", "orcid": ";0000-0002-0642-9815;0000-0002-3424-691X;", "linkedin": ";;chirag-pabbaraju-277a4ba5/;", "or_profile": "~Guy_Blanc1;~Jane_Lange1;~Chirag_Pabbaraju1;~Li-Yang_Tan2", "aff": "Stanford University;Massachusetts Institute of Technology;Stanford University;", "aff_domain": "stanford.edu;mit.edu;cs.stanford.edu;", "position": "PhD student;PhD student;PhD student;", "bibtex": "@misc{\nblanc2023the,\ntitle={The power of choices in decision tree learning},\nauthor={Guy Blanc and Jane Lange and Chirag Pabbaraju and Li-Yang Tan},\nyear={2023},\nurl={https://openreview.net/forum?id=AYvLkPnDguL}\n}", "github": "", "project": "", "reviewers": "E3aU;uFXj;Nivx;o6dK", "site": "https://openreview.net/forum?id=AYvLkPnDguL", "pdf_size": 484881, "recommendation": "3;5;6;8", "confidence": "4;4;4;3", "correctness": "2;4;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;4;3", "wc_summary_paper": "61;78;207;103", "wc_strength_and_weaknesses": "353;244;682;47", "wc_clarity_quality_novelty_and_reproducibility": "21;23;89;35", "wc_summary_review": "42;39;48;25", "wc_review": "477;384;1026;210", "wc_reply_reviewers": "0;0;124;0", "wc_reply_authors": "630;550;1135;212", "reply_reviewers": "0;0;1;0", "reply_authors": "2;2;4;2", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 112.25, 56.70703219178376 ], "wc_strength_and_weaknesses_avg": [ 331.5, 230.1678735184387 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.0, 27.65863337187866 ], "wc_summary_review_avg": [ 38.5, 8.440971508067067 ], "wc_review_avg": [ 524.25, 305.12651064763287 ], "wc_reply_reviewers_avg": [ 31.0, 53.693575034635195 ], "wc_reply_authors_avg": [ 631.75, 330.20325180106875 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8006407690254357, "corr_recommendation_correctness": 0.7526178090063818, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dOEzfL1tW5YJ:scholar.google.com/&scioq=The+power+of+choices+in+decision+tree+learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Stanford University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://web.mit.edu", "aff_unique_abbr": "Stanford;MIT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Trainability Preserving Neural Pruning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12213", "id": "AZFvpnnewr", "poster": "", "openreview": "https://openreview.net/forum?id=AZFvpnnewr", "slides": "https://iclr.cc/virtual/2023/poster/12213", "video": "https://iclr.cc/virtual/2023/poster/12213", "author_site": "Huan Wang, Yun Fu", "tldr": "We present a new filter pruning approach that effectively preserves trainability during pruning with encouraging performance. ", "abstract": "Many recent works have shown trainability plays a central role in neural network pruning -- unattended broken trainability can lead to severe under-performance and unintentionally amplify the effect of retraining learning rate, resulting in biased (or even misinterpreted) benchmark results. This paper introduces trainability preserving pruning (TPP), a scalable method to preserve network trainability against pruning, aiming for improved pruning performance and being more robust to retraining hyper-parameters (e.g., learning rate). Specifically, we propose to penalize the gram matrix of convolutional filters to decorrelate the pruned filters from the retained filters. In addition to the convolutional layers, per the spirit of preserving the trainability of the whole network, we also propose to regularize the batch normalization parameters (scale and bias). Empirical studies on linear MLP networks show that TPP can perform on par with the oracle trainability recovery scheme. On nonlinear ConvNets (ResNet56/VGG19) on CIFAR10/100, TPP outperforms the other counterpart approaches by an obvious margin. Moreover, results on ImageNet-1K with ResNets suggest that TPP consistently performs more favorably against other top-performing structured pruning approaches. Code: https://github.com/MingSun-Tse/TPP.", "keywords": "neural network structured pruning;trainability;kernel orthogonalization", "primary_area": "", "supplementary_material": "", "author": "Huan Wang;Yun Fu", "authorids": "~Huan_Wang3;~Yun_Fu1", "gender": "M;M", "homepage": "https://huanwang.tech/;http://www1.ece.neu.edu/~yunfu/", "dblp": "70/6155-14;00/5815-1", "google_scholar": "0-On0y4AAAAJ;https://scholar.google.com.tw/citations?user=h-JEcQ8AAAAJ", "orcid": "0000-0001-6951-901X;0000-0002-5098-2853", "linkedin": "huanwang-zju/;furaymond/", "or_profile": "~Huan_Wang3;~Yun_Fu1", "aff": "Northeastern University;Northeastern University", "aff_domain": "neu.edu;northeastern.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2023trainability,\ntitle={Trainability Preserving Neural Pruning},\nauthor={Huan Wang and Yun Fu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=AZFvpnnewr}\n}", "github": "", "project": "", "reviewers": "LVfu;Qf3U;aS72;p5ry", "pdf_size": 944217, "recommendation": "6;6;6;6", "confidence": "5;4;3;4", "correctness": "3;3;2;4", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "47;96;41;159", "wc_strength_and_weaknesses": "244;373;130;210", "wc_clarity_quality_novelty_and_reproducibility": "73;53;27;9", "wc_summary_review": "121;20;35;6", "wc_review": "485;542;233;384", "wc_reply_reviewers": "144;358;61;12", "wc_reply_authors": "1471;3188;1144;1183", "reply_reviewers": "2;2;1;1", "reply_authors": "4;7;4;3", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 85.75, 47.36757857437933 ], "wc_strength_and_weaknesses_avg": [ 239.25, 87.61100102156121 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.5, 24.428467000612216 ], "wc_summary_review_avg": [ 45.5, 44.78001786511479 ], "wc_review_avg": [ 411.0, 117.31368206650066 ], "wc_reply_reviewers_avg": [ 143.75, 132.39028476440407 ], "wc_reply_authors_avg": [ 1746.5, 841.7780289363699 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 4.5, 1.5 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6292392234810325924&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=AZFvpnnewr", "email": "neu.edu;northeastern.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Hyper-Decision Transformer for Efficient Online Policy Adaptation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10999", "id": "AatUEvC-Wjv", "poster": "/media/PosterPDFs/ICLR%202023/10999.png?t=1683165937.4506133", "openreview": "https://openreview.net/forum?id=AatUEvC-Wjv", "slides": "https://iclr.cc/virtual/2023/poster/10999", "video": "https://iclr.cc/virtual/2023/poster/10999", "author_site": "Mengdi Xu, Yuchen Lu, Yikang Shen, Shun Zhang, DING ZHAO, Chuang Gan", "tldr": "We propose Hyper-Decision Transformer (HDT), a transformer-based model which generalizes to novel unseen tasks maintaining strong data and parameter efficiency.", "abstract": "Decision Transformers (DT) have demonstrated strong performances in offline reinforcement learning settings, but quickly adapting to unseen novel tasks remains challenging. To address this challenge, we propose a new framework, called Hyper-Decision Transformer (HDT), that can generalize to novel tasks from a handful of demonstrations in a data- and parameter-efficient manner. To achieve such a goal, we propose to augment the base DT with an adaptation module, whose parameters are initialized by a hyper-network. When encountering unseen tasks, the hyper-network takes a handful of demonstrations as inputs and initializes the adaptation module accordingly. This initialization enables HDT to efficiently adapt to novel tasks by only fine-tuning the adaptation module. We validate HDT's generalization capability on object manipulation tasks. We find that with a single expert demonstration and fine-tuning only 0.5% of DT parameters, HDT adapts faster to unseen tasks than fine-tuning the whole DT model. Finally, we explore a more challenging setting where expert actions are not available, and we show that HDT outperforms state-of-the-art baselines in terms of task success rates by a large margin. Demos are available on our project page: https://sites.google.com/view/hdtforiclr2023/home.", "keywords": "Offline Reinforcement Learning;One-shot Imitation Learning;Parameter-efficient Fine-tuning", "primary_area": "", "supplementary_material": "", "author": "Mengdi Xu;Yuchen Lu;Yikang Shen;Shun Zhang;Ding Zhao;Chuang Gan", "authorids": "~Mengdi_Xu3;~Yuchen_Lu1;~Yikang_Shen1;~Shun_Zhang6;~Ding_Zhao1;~Chuang_Gan1", "gender": "F;M;M;;;M", "homepage": "https://mxu34.github.io/;http://jackhaha363.github.io/;;https://shunzh.github.io/;https://safeai-lab.github.io;http://people.csail.mit.edu/ganchuang/", "dblp": ";223/4762;152/8226;;;139/6993", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.ca/citations?hl=en;qff5rRYAAAAJ;;z7tPc9IAAAAJ;PTeSCbIAAAAJ", "orcid": "0000-0001-9332-4175;;;;;", "linkedin": ";;;;;", "or_profile": "~Mengdi_Xu3;~Yuchen_Lu1;~Yikang_Shen1;~Shun_Zhang6;~Ding_Zhao1;~Chuang_Gan1", "aff": "Carnegie Mellon University;University of Montreal;International Business Machines;MIT-IBM Watson AI Lab;Carnegie Mellon University;MIT-IBM Watson AI Lab", "aff_domain": "cmu.edu;umontreal.ca;ibm.com;ibm.com;cmu.edu;ibm.com", "position": "PhD student;PhD student;Researcher;Researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\nxu2023hyperdecision,\ntitle={Hyper-Decision Transformer for Efficient Online Policy Adaptation},\nauthor={Mengdi Xu and Yuchen Lu and Yikang Shen and Shun Zhang and Ding Zhao and Chuang Gan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=AatUEvC-Wjv}\n}", "github": "", "project": "", "reviewers": "k7sT;aTrS;FfBQ;9MRj", "pdf_size": 5803985, "recommendation": "6;6;8;8", "confidence": "3;4;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;3;4;3", "empirical_novelty": "2;3;4;3", "wc_summary_paper": "164;189;66;72", "wc_strength_and_weaknesses": "451;620;210;279", "wc_clarity_quality_novelty_and_reproducibility": "53;174;22;37", "wc_summary_review": "47;78;11;124", "wc_review": "715;1061;309;512", "wc_reply_reviewers": "0;26;0;0", "wc_reply_authors": "646;2108;185;565", "reply_reviewers": "0;1;0;0", "reply_authors": "1;4;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 122.75, 54.51318647813573 ], "wc_strength_and_weaknesses_avg": [ 390.0, 159.17129138132918 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 71.5, 60.18513105410671 ], "wc_summary_review_avg": [ 65.0, 41.50301193889427 ], "wc_review_avg": [ 649.25, 277.6998154482642 ], "wc_reply_reviewers_avg": [ 6.5, 11.258330249197702 ], "wc_reply_authors_avg": [ 876.0, 732.2782940931678 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12102069685557133245&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=AatUEvC-Wjv", "email": "cmu.edu;umontreal.ca;ibm.com;ibm.com;cmu.edu;ibm.com", "author_num": 6, "aff_unique_index": "0;1;2;3;0;3", "aff_unique_norm": "Carnegie Mellon University;University of Montreal;International Business Machines Corporation;Massachusetts Institute of Technology", "aff_unique_dep": ";;;IBM Watson AI Lab", "aff_unique_url": "https://www.cmu.edu;https://wwwumontreal.ca;https://www.ibm.com;https://www.mitibmwatsonailab.org", "aff_unique_abbr": "CMU;UM;IBM;MIT-IBM AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United States;Canada" }, { "id": "Ab8hkaJSJI", "title": "Multi-Epoch Matrix Factorization Mechanisms for Private Machine Learning", "track": "main", "status": "Reject", "tldr": "We enable generation of factorization-based methods under multi-participations, which enables us to achieve new SOTA results in private training without amplification.", "abstract": "We introduce new differentially private (DP) mechanisms for gradient-based machine learning (ML) training involving multiple passes (epochs) of a dataset, substantially improving the achievable privacy-utility-computation tradeoffs. Our key contribution is an extension of the online matrix factorization DP mechanism to multiple participations, substantially generalizing the approach of DMRST2022. We first give a non-trivial reduction of the problem with per-iteration vector contributions to the simpler one of scalar contributions. Using this, we formulate the construction of optimal (in total squared error at each iterate) matrix mechanisms for SGD variants as a convex program. We provide a closed form solution to the dual function, leading directly to an efficient optimization algorithms.\n\nWhile tractable, both solving the convex problem offline and computing the necessary noise masks during training can become prohibitively expensive when many training steps are necessary. To address this, we design a Fourier-transform-based mechanism with significantly less computation and only a minor utility decrease.\n\nExtensive empirical evaluation on two tasks, example-level DP for image classification and user-level DP for language modeling, demonstrate substantial improvements over the previous state-of-the-art. Though our primary application is to ML, we note our main DP results are applicable to arbitrary linear queries and hence may have much broader applicability.", "keywords": "differential privacy;matrix mechanism;machine learning;artificial intelligence;private learning", "primary_area": "", "supplementary_material": "", "author": "Christopher A. Choquette-Choo;Hugh Brendan McMahan;J Keith Rush;Abhradeep Guha Thakurta", "authorids": "~Christopher_A._Choquette-Choo1;~Hugh_Brendan_McMahan1;~J_Keith_Rush1;~Abhradeep_Guha_Thakurta1", "gender": "M;M;;M", "homepage": "https://www.christopherchoquette.com;;https://www.jkrush.com;https://athakurta.squarespace.com/", "dblp": "250/9674;;249/8135;31/8315", "google_scholar": "oDE4I64AAAAJ;;OrUyRAcAAAAJ;1rV69hMAAAAJ", "orcid": ";;;", "linkedin": "christopher-choquette-choo/;;;", "or_profile": "~Christopher_A._Choquette-Choo1;~Hugh_Brendan_McMahan1;~J_Keith_Rush1;~Abhradeep_Guha_Thakurta1", "aff": "Google Research, Brain Team;Google;Google;Google", "aff_domain": "google.com;google.com;google.com;google.com", "position": "Researcher;Research Scientist;Researcher;Senior Research Scientist", "bibtex": "@misc{\nchoquette-choo2023multiepoch,\ntitle={Multi-Epoch Matrix Factorization Mechanisms for Private Machine Learning},\nauthor={Christopher A. Choquette-Choo and Hugh Brendan McMahan and J Keith Rush and Abhradeep Guha Thakurta},\nyear={2023},\nurl={https://openreview.net/forum?id=Ab8hkaJSJI}\n}", "github": "", "project": "", "reviewers": "rY9u;PQTm;SJLq;6QXF", "site": "https://openreview.net/forum?id=Ab8hkaJSJI", "pdf_size": 7346324, "recommendation": "5;5;6;6", "confidence": "4;3;3;3", "correctness": "4;3;4;4", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "77;61;180;85", "wc_strength_and_weaknesses": "122;344;552;155", "wc_clarity_quality_novelty_and_reproducibility": "27;36;69;50", "wc_summary_review": "63;41;44;4", "wc_review": "289;482;845;294", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "240;545;916;206", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;3;2", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 100.75, 46.56380031741396 ], "wc_strength_and_weaknesses_avg": [ 293.25, 171.73144004520546 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.5, 15.850867484147358 ], "wc_summary_review_avg": [ 38.0, 21.365860619221497 ], "wc_review_avg": [ 477.5, 225.987278403011 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 476.75, 285.899785764173 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13193330051933381559&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Research", "aff_unique_url": "https://research.google", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "AbRe0e_R07", "title": "TEAS: Exploiting Spiking Activity for Temporal-wise Adaptive Spiking Neural Networks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Spiking neural networks (SNNs) are energy-efficient alternatives to commonly used deep artificial neural networks (ANNs). However, their sequential computation pattern over multiple time steps makes processing latency a significant hindrance to deployment. In existing SNNs deployed on time-driven hardware, all layers generate and receive spikes in a synchronized manner, forcing them to share the same time steps. This often leads to considerable time redundancy in the spike sequences and considerable repetitive processing. Motivated by the effectiveness of dynamic neural networks for boosting efficiency, we propose a temporal-wise adaptive SNN, namely TEAS, in which each layer is configured with independent number of time steps to fully exploit the potential of SNNs.\nSpecifically, given an SNN, the number of time steps of each layer is configured according to its contribution to the final performance of the whole network. Then, we exploit the temporal transforming module to produce a dynamic policy that can adapt the temporal information dynamically during inference. The adaptive configuration generating process also enables the trading-off between model complexity and accuracy.\nExtensive experiments on a variety of challenging datasets demonstrate that our method provides significant savings in energy efficiency and processing latency under similar accuracy outperforming the existing state-of-the-art methods.", "keywords": "Spiking Neural Network", "primary_area": "", "supplementary_material": "", "author": "Fangxin Liu;Wenbo Zhao;Xiaokang Yang;Li Jiang", "authorids": "~Fangxin_Liu1;zhaowenbo@sjtu.edu.cn;~Xiaokang_Yang1;~Li_Jiang1", "gender": "M;;M;", "homepage": "https://mxhx7199.github.io/;;https://icne.sjtu.edu.cn/info/1064/1078.htm;", "dblp": "198/2194;;06/3071-1.html;", "google_scholar": "https://scholar.google.com/citations?hl=zh-TW;;yDEavdMAAAAJ;", "orcid": ";;0000-0003-4029-3322;", "linkedin": ";;;", "or_profile": "~Fangxin_Liu1;zhaowenbo@sjtu.edu.cn;~Xiaokang_Yang1;~Li_Jiang1", "aff": "Shanghai Jiaotong University;;Shanghai Jiaotong University;", "aff_domain": "sjtu.edu.cn;;sjtu.edu.cn;", "position": "PhD student;;Full Professor;", "bibtex": "@misc{\nliu2023teas,\ntitle={{TEAS}: Exploiting Spiking Activity for Temporal-wise Adaptive Spiking Neural Networks},\nauthor={Fangxin Liu and Wenbo Zhao and Xiaokang Yang and Li Jiang},\nyear={2023},\nurl={https://openreview.net/forum?id=AbRe0e_R07}\n}", "github": "", "project": "", "reviewers": "H3nR;1GqH;Dudj;2EE2", "site": "https://openreview.net/forum?id=AbRe0e_R07", "pdf_size": 3667821, "recommendation": "3;3;5;5", "confidence": "5;4;5;4", "correctness": "1;3;3;3", "technical_novelty": "2;3;3;2", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "112;37;85;15", "wc_strength_and_weaknesses": "169;97;222;503", "wc_clarity_quality_novelty_and_reproducibility": "54;3;19;13", "wc_summary_review": "34;18;17;14", "wc_review": "369;155;343;545", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 62.25, 38.284298348017295 ], "wc_strength_and_weaknesses_avg": [ 247.75, 153.90155132421506 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 22.25, 19.201236939322424 ], "wc_summary_review_avg": [ 20.75, 7.790218225441442 ], "wc_review_avg": [ 353.0, 138.2244551445221 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5258613810101241858&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "AcyZ0Q5p6G8", "title": "Learning in Compressed Domain via Knowledge Transfer", "track": "main", "status": "Withdraw", "tldr": "We propose learning in compressed domain by transferring the knowledge learned in pixel domain.", "abstract": "Learning in compressed domain aims to perform vision tasks directly on compressed latent representations instead of reconstructed images. Existing reports show that learning in compressed domain can achieve a comparable performance compared to that in pixel domain for certain compression models. However, we observe that when using the state-of-the-art learned compression models, the performance gap between compressed-domain and pixel-domain vision tasks is still large due to the lack of some natural inductive biases in pixel-domain convolutional neural networks. In this paper, we attempt to address this problem by transferring knowledge from pixel domain to compressed domain. We first modify neural networks for pixel-domain vision tasks to better suit compressed-domain inputs. In addition, we propose a knowledge transfer loss to narrow the gap between compressed domain and pixel domain. Experimental results on classification and instance segmentation show that the proposed method improves the accuracy of compressed-domain vision tasks significantly, which even outperforms learning on reconstructed images while avoiding the computational cost for image reconstruction.", "keywords": "compressed-domain vision;image compression;knowledge transfer", "primary_area": "", "supplementary_material": "", "author": "Hanyue Tu;Li Li;Wengang Zhou;Houqiang Li", "authorids": "~Hanyue_Tu1;~Li_Li1;~Wengang_Zhou1;~Houqiang_Li1", "gender": "M;M;M;M", "homepage": ";https://faculty.ustc.edu.cn/lil1/en;http://staff.ustc.edu.cn/~zhwg/index.html;https://staff.ustc.edu.cn/~lihq/", "dblp": "263/2155.html;53/2189-40;22/4544-1;59/7017.html", "google_scholar": ";dEm6VKAAAAAJ;8s1JF8YAAAAJ;7sFMIKoAAAAJ", "orcid": ";0000-0002-7163-6263;0000-0003-1690-9836;0000-0003-2188-3028", "linkedin": "%E6%B6%B5%E8%B6%8A-%E6%B6%82-8a171a18a/;;;", "or_profile": "~Hanyue_Tu1;~Li_Li1;~Wengang_Zhou1;~Houqiang_Li1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;Professor;Full Professor;Professor", "bibtex": "@misc{\ntu2023learning,\ntitle={Learning in Compressed Domain via Knowledge Transfer},\nauthor={Hanyue Tu and Li Li and Wengang Zhou and Houqiang Li},\nyear={2023},\nurl={https://openreview.net/forum?id=AcyZ0Q5p6G8}\n}", "github": "", "project": "", "reviewers": "gMtE;tqMk;9jzx;b5a3", "site": "https://openreview.net/forum?id=AcyZ0Q5p6G8", "pdf_size": 5756359, "recommendation": "3;3;3;3", "confidence": "4;4;5;4", "correctness": "3;4;2;3", "technical_novelty": "1;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "51;42;56;85", "wc_strength_and_weaknesses": "286;121;461;77", "wc_clarity_quality_novelty_and_reproducibility": "27;19;32;301", "wc_summary_review": "60;48;94;35", "wc_review": "424;230;643;498", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 58.5, 16.101242188104617 ], "wc_strength_and_weaknesses_avg": [ 236.25, 151.35285758782356 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 94.75, 119.1687354132786 ], "wc_summary_review_avg": [ 59.25, 21.924586655168667 ], "wc_review_avg": [ 448.75, 148.8478669648981 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14119569142617302391&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "VoGE: A Differentiable Volume Renderer using Gaussian Ellipsoids for Analysis-by-Synthesis", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11755", "id": "AdPJb9cud_Y", "poster": "/media/PosterPDFs/ICLR%202023/11755.png?t=1680901264.9776359", "openreview": "https://openreview.net/forum?id=AdPJb9cud_Y", "slides": "https://iclr.cc/virtual/2023/poster/11755", "video": "https://iclr.cc/virtual/2023/poster/11755", "author_site": "Angtian Wang, Peng Wang, Jian Sun, Adam Kortylewski, Alan Yuille", "tldr": "VoGE is a differentiable renderer based on ray tracing volume densities, which gives better gradients for occlusion reasoning and yields better pose estimation results.", "abstract": "Differentiable rendering allows the application of computer graphics on vision tasks, e.g. object pose and shape fitting, via analysis-by-synthesis, where gradients at occluded regions are important when inverting the rendering process.To obtain those gradients, state-of-the-art (SoTA) differentiable renderers use rasterization to collect a set of nearest components for each pixel and aggregate them based on the viewing distance. In this paper, we propose VoGE, which uses ray tracing to capture nearest components with their volume density distributions on the rays and aggregates via integral of the volume densities based on Gaussian ellipsoids, which brings more efficient and stable gradients. To efficiently render via VoGE, we propose an approximate close-form solution for the volume density aggregation and a coarse-to-fine rendering strategy. Finally, we provide a CUDA implementation of VoGE, which gives a competitive rendering speed in comparison to PyTorch3D. Quantitative and qualitative experiment results show VoGE outperforms SoTA counterparts when applied to various vision tasks, e.g., object pose estimation, shape/texture fitting, and occlusion reasoning. The VoGE code is available at: https://github.com/Angtian/VoGE.", "keywords": "Differentiable Rendering;Analysis-by-Synthesis;Pose Estimation", "primary_area": "", "supplementary_material": "", "author": "Angtian Wang;Peng Wang;Jian Sun;Adam Kortylewski;Alan Yuille", "authorids": "~Angtian_Wang2;~Peng_Wang2;geomtop@gmail.com;~Adam_Kortylewski1;~Alan_Yuille1", "gender": "M;M;;;M", "homepage": "https://angtianwang.github.io/;https://pengwangucla.github.io/peng-wang.github.io/;;https://gvrl.mpi-inf.mpg.de/;", "dblp": ";95/4442;;161/0772;y/AlanLYuille", "google_scholar": "YR7re-cAAAAJ;Svk4ntYAAAAJ;;https://scholar.google.ch/citations?user=tRLUOBIAAAAJ;", "orcid": ";;;0000-0002-9146-4403;", "linkedin": ";;;;", "or_profile": "~Angtian_Wang2;~Peng_Wang2;geomtop@gmail.com;~Adam_Kortylewski1;~Alan_Yuille1", "aff": "Johns Hopkins University;Bytedance US AILab;;Albert-Ludwigs-Universit\u00e4t Freiburg;Johns Hopkins University", "aff_domain": "jhu.edu;bytedance.com;;uni-freiburg.de;johnshopkins.edu", "position": "PhD student;Research Scientist;;Research Group Leader;Full Professor", "bibtex": "@inproceedings{\nwang2023voge,\ntitle={Vo{GE}: A Differentiable Volume Renderer using Gaussian Ellipsoids for Analysis-by-Synthesis},\nauthor={Angtian Wang and Peng Wang and Jian Sun and Adam Kortylewski and Alan Yuille},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=AdPJb9cud_Y}\n}", "github": "", "project": "", "reviewers": "cWiy;qZYG;ppJY;LqFK", "pdf_size": 20459799, "recommendation": "3;5;5;8", "confidence": "4;4;5;4", "correctness": "2;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;0;2;0", "wc_summary_paper": "154;124;74;71", "wc_strength_and_weaknesses": "461;254;135;352", "wc_clarity_quality_novelty_and_reproducibility": "69;284;297;104", "wc_summary_review": "68;92;64;38", "wc_review": "752;754;570;565", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "987;534;289;588", "reply_reviewers": "0;0;0;0", "reply_authors": "3;2;2;2", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.0, 1.0 ], "wc_summary_paper_avg": [ 105.75, 34.91686555233731 ], "wc_strength_and_weaknesses_avg": [ 300.5, 120.37960790765187 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 188.5, 102.85061983284301 ], "wc_summary_review_avg": [ 65.5, 19.150718002205558 ], "wc_review_avg": [ 660.25, 92.76953972075101 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 599.5, 250.49401190447648 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.08084520834544431, "corr_recommendation_correctness": 0.7276068751089989, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13229176929567700969&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=AdPJb9cud_Y", "email": "jhu.edu;bytedance.com;;uni-freiburg.de;johnshopkins.edu", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Johns Hopkins University;ByteDance;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_unique_dep": ";AI Lab;", "aff_unique_url": "https://www.jhu.edu;https://www.bytedance.com;https://www.uni-freiburg.de", "aff_unique_abbr": "JHU;;Albert-Ludwigs-Universit\u00e4t", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";United States;Freiburg", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Germany" }, { "id": "AeTl9sbF-VT", "title": "Exploiting Certified Defences to Attack Randomised Smoothing", "track": "main", "status": "Reject", "tldr": "Certified defences can be used to attack the models they certify, yielding smaller adversarial perturbations", "abstract": "Certified guarantees of adversarial robustness play an important role in providing assurances regarding a models output, irrespective of the behaviour of an attacker. However, while the development of such guarantees has drawn upon an improved understanding of attacker behaviour, so too can certified guarantees be exploited in order to generate more efficient adversarial attacks. Within this work, we explore this heretofore undiscovered additional attack surface, while also considering how previously discovered attacks could be applied to models defended by randomised smoothing. In all bar one experiment our approach generates smaller adversarial perturbations for more than $70 \\%$ of tested samples, reducing the average magnitude of the adversarial perturbation by $13 \\%$.", "keywords": "adversarial;attack;certified robustness;machine learning", "primary_area": "", "supplementary_material": "/attachment/7ad45a9a137b4967131e7cd3b471e2777ccbd2e4.zip", "author": "Andrew Craig Cullen;Paul Montague;Shijie Liu;Sarah Monazam Erfani;Benjamin I. P. Rubinstein", "authorids": "~Andrew_Craig_Cullen1;~Paul_Montague1;~Shijie_Liu4;~Sarah_Monazam_Erfani1;~Benjamin_I._P._Rubinstein1", "gender": "M;M;M;;M", "homepage": "https://www.andrewcraigcullen.com;;https://github.com/shijiel2;https://people.eng.unimelb.edu.au/smonazam/;http://www.bipr.net/", "dblp": "238/6828;50/805;;136/0170;90/1092", "google_scholar": "BeXBviIAAAAJ;;https://scholar.google.com.au/citations?user=lH5nxwMAAAAJ;https://scholar.google.com.au/citations?user=Jq9ocx4AAAAJ;https://scholar.google.com.au/citations?user=hMG_gR4AAAAJ", "orcid": "0000-0001-8243-6470;0000-0001-9461-7471;0009-0008-2980-6266;;0000-0002-2947-6980", "linkedin": ";;;;benjaminrubinstein/", "or_profile": "~Andrew_Craig_Cullen1;~Paul_Montague1;~Shijie_Liu4;~Sarah_Monazam_Erfani1;~Benjamin_I._P._Rubinstein1", "aff": "The University of Melbourne;Defence Science and Technology Group;The University of Melbourne;The University of Melbourne;The University of Melbourne", "aff_domain": "unimelb.edu.au;dst.defence.gov.au;unimelb.edu.au;unimelb.edu.au;unimelb.edu.au", "position": "Postdoc;Researcher;PhD student;Associate Professor;Associate Professor", "bibtex": "@misc{\ncullen2023exploiting,\ntitle={Exploiting Certified Defences to Attack Randomised Smoothing},\nauthor={Andrew Craig Cullen and Paul Montague and Shijie Liu and Sarah Monazam Erfani and Benjamin I. P. Rubinstein},\nyear={2023},\nurl={https://openreview.net/forum?id=AeTl9sbF-VT}\n}", "github": "", "project": "", "reviewers": "YX18;5baj;Ms23;rMga", "site": "https://openreview.net/forum?id=AeTl9sbF-VT", "pdf_size": 517829, "recommendation": "3;3;5;5", "confidence": "3;4;4;4", "correctness": "3;3;4;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;1;2;2", "wc_summary_paper": "87;80;27;24", "wc_strength_and_weaknesses": "162;152;76;169", "wc_clarity_quality_novelty_and_reproducibility": "30;78;96;2", "wc_summary_review": "55;62;208;30", "wc_review": "334;372;407;225", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "729;416;757;534", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;2", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 54.5, 29.124731758421397 ], "wc_strength_and_weaknesses_avg": [ 139.75, 37.298625980054545 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.5, 37.399866309921485 ], "wc_summary_review_avg": [ 88.75, 69.86907398842494 ], "wc_review_avg": [ 334.5, 68.28799308809712 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 609.0, 140.6929280383346 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3365051701692850038&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Melbourne;Defence Science and Technology Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.unimelb.edu.au;https://www.dst.defence.gov.au/", "aff_unique_abbr": "UniMelb;DST Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Australia" }, { "id": "Af43zsue7kw", "title": "StructViT: Learning Correlation Structures for Vision Transformers", "track": "main", "status": "Withdraw", "tldr": "We introduce structural self-attention (StructSA) that exploits geometric structures of query-key correlations and the proposed network StructViT achieves state-of-the-art results on various image and video classification benchmarks.", "abstract": "We introduce the structural self-attention (StructSA) mechanism that leverages structural patterns of query-key correlation for visual representation learning. StructSA generates attention by recognizing space-time structures of correlations and performs long-range interactions across entire locations, effectively capturing structural patterns, e.g., spatial layouts, motion, or inter-object relations in images and videos. Using StructSA as a main building block, we develop the structural vision transformer (StructViT) and evaluate its effectiveness on both image and video classification tasks, achieving state-of-the-art results on ImageNet-1K, Kinetics-400, Something-Something V1 & V2, Diving-48, and FineGym.", "keywords": "transformer;self-attention;image classification;video classification;correlation", "primary_area": "", "supplementary_material": "", "author": "Manjin Kim;Paul Hongsuck Seo;Cordelia Schmid;Minsu Cho", "authorids": "~Manjin_Kim1;~Paul_Hongsuck_Seo1;~Cordelia_Schmid1;~Minsu_Cho1", "gender": "M;M;F;M", "homepage": "https://kimmanjin.github.io/;https://phseo.github.io;https://cordeliaschmid.github.io/;http://cvlab.postech.ac.kr/~mcho/", "dblp": "270/8837;172/0938;s/CordeliaSchmid;", "google_scholar": "https://scholar.google.co.kr/citations?user=kqddtlwAAAAJ;https://scholar.google.co.kr/citations?user=Tp7U8_UAAAAJ;IvqCXP4AAAAJ;5TyoF5QAAAAJ", "orcid": ";;;", "linkedin": ";;cordelia-schmid-47985a9;minsu-cho-062b3750/", "or_profile": "~Manjin_Kim1;~Paul_Hongsuck_Seo1;~Cordelia_Schmid1;~Minsu_Cho1", "aff": "POSTECH;Google;Inria;POSTECH", "aff_domain": "postech.ac.kr;google.com;inria.fr;postech.ac.kr", "position": "PhD student;Researcher;Researcher;Associate Professor", "bibtex": "@misc{\nkim2023structvit,\ntitle={StructViT: Learning Correlation Structures for Vision Transformers},\nauthor={Manjin Kim and Paul Hongsuck Seo and Cordelia Schmid and Minsu Cho},\nyear={2023},\nurl={https://openreview.net/forum?id=Af43zsue7kw}\n}", "github": "", "project": "", "reviewers": "XwQL;CQKi;GCi8", "site": "https://openreview.net/forum?id=Af43zsue7kw", "pdf_size": 1282848, "recommendation": "3;3;5", "confidence": "4;3;3", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "31;76;42", "wc_strength_and_weaknesses": "48;159;42", "wc_clarity_quality_novelty_and_reproducibility": "4;16;4", "wc_summary_review": "11;22;22", "wc_review": "94;273;110", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 49.666666666666664, 19.154343864744856 ], "wc_strength_and_weaknesses_avg": [ 83.0, 53.79591062525106 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 8.0, 5.656854249492381 ], "wc_summary_review_avg": [ 18.333333333333332, 5.185449728701348 ], "wc_review_avg": [ 159.0, 80.87438819964369 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:W-_s5agHGIoJ:scholar.google.com/&scioq=StructViT:+Learning+Correlation+Structures+for+Vision+Transformers&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Pohang University of Science and Technology;Google;INRIA", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.postech.ac.kr;https://www.google.com;https://www.inria.fr", "aff_unique_abbr": "POSTECH;Google;Inria", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Pohang;Mountain View;", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "South Korea;United States;France" }, { "id": "AfmFjelAqW6", "title": "Hierarchical Neural Program Synthesis", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Program synthesis aims to automatically construct human-readable programs that satisfy given task specifications such as input/output pairs or demonstrations. Recent works have demonstrated encouraging results in a variety of domains such as string transformation, tensor manipulation, and describing behaviors of embodied agents. Most existing program synthesis methods are designed to synthesize programs from scratch, generating a program token by token, line by line. This fundamentally prevents these methods from scaling up to synthesize programs that are longer or more complex. In this work, we present a scalable program synthesis framework that instead synthesizes a program by hierarchically composing programs. Specifically, we first learn a task embedding space and a program decoder that can decode a task embedding into a program. Then, we train a high-level module to comprehend the task specification (e.g. input/output pairs or demonstrations) from long programs and produce a sequence of task embeddings, which are then decoded by the program decoder and composed to yield the synthesized program. We extensively evaluate our proposed framework in a string transformation domain with input/output pairs. The experimental results demonstrate that the proposed framework can synthesize programs that are significantly longer and more complex than the programs considered in prior program synthesis works", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Linghan Zhong;Ryan Lindeborg;Jesse Zhang;Joseph J Lim;Shao-Hua Sun", "authorids": "~Linghan_Zhong1;~Ryan_Lindeborg1;~Jesse_Zhang3;~Joseph_J_Lim1;~Shao-Hua_Sun1", "gender": ";;M;M;M", "homepage": "https://thoughtp0lice.github.io/;;https://jessezhang.net;http://people.csail.mit.edu/lim/;http://shaohua0116.github.io", "dblp": ";;;08/3086;158/9680", "google_scholar": ";;fSXCOfEAAAAJ;jTnQTBoAAAAJ;uXsfnaQAAAAJ", "orcid": ";;;;0000-0001-7579-6734", "linkedin": ";;;;shaohua0116/", "or_profile": "~Linghan_Zhong1;~Ryan_Lindeborg1;~Jesse_Zhang3;~Joseph_J_Lim1;~Shao-Hua_Sun1", "aff": "University of Southern California;;Amazon;Korea Advanced Institute of Science & Technology;National Taiwan University", "aff_domain": "usc.edu;;amazon.com;kaist.ac.kr;ntu.edu.tw", "position": "MS student;;Intern;Associate Professor;Assistant Professor", "bibtex": "@misc{\nzhong2023hierarchical,\ntitle={Hierarchical Neural Program Synthesis},\nauthor={Linghan Zhong and Ryan Lindeborg and Jesse Zhang and Joseph J Lim and Shao-Hua Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=AfmFjelAqW6}\n}", "github": "", "project": "", "reviewers": "6Mah;CgK9;SvF9;WXTe", "site": "https://openreview.net/forum?id=AfmFjelAqW6", "pdf_size": 700402, "recommendation": "3;3;3;5", "confidence": "4;3;4;4", "correctness": "2;2;2;2", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;0;3", "wc_summary_paper": "58;98;69;86", "wc_strength_and_weaknesses": "265;309;98;540", "wc_clarity_quality_novelty_and_reproducibility": "87;82;261;210", "wc_summary_review": "44;73;125;31", "wc_review": "454;562;553;867", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 77.75, 15.368392889303683 ], "wc_strength_and_weaknesses_avg": [ 303.0, 157.85594698965255 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 160.0, 77.64341568993471 ], "wc_summary_review_avg": [ 68.25, 36.120458191999724 ], "wc_review_avg": [ 609.0, 154.86607117118973 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17408360599214467817&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Southern California;Amazon;Korea Advanced Institute of Science and Technology;National Taiwan University", "aff_unique_dep": ";Amazon.com, Inc.;;", "aff_unique_url": "https://www.usc.edu;https://www.amazon.com;https://www.kaist.ac.kr;https://www.ntu.edu.tw", "aff_unique_abbr": "USC;Amazon;KAIST;NTU", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Los Angeles;;Taiwan", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "United States;South Korea;China" }, { "id": "AgQ4GpzzRT", "title": "Unsupervised Pretraining for Neural Value Approximation", "track": "main", "status": "Withdraw", "tldr": "The paper presents an unsupervised pretraining approach that learns initializations of the critic/value network which possess desirable generalization properties in the context of deep reinforcement learning. ", "abstract": "Deep neural networks are powerful function approximators and have successfully been employed for the parameterization of value functions in deep reinforcement learning. Neural value approximation is a powerful paradigm for model-free control but it can often result in instability and divergence, especially when combined with off-policy learning and bootstrapping. Recent works have revealed some intrinsic connections between the unstable behavior of neural value approximation and the generalization properties of the value network/critic. Motivated by this, we propose a simple and computationally efficient unsupervised pretraining method to be performed before neural value learning. The method learns initializations of the critic parameters that correspond to Neural Tangent Kernels with desirable generalization structures. We demonstrate the merits of our approach by combining it with the Soft Actor-Critic algorithm and testing its performance on the continuous control environments of the DeepMind Control Suite. Our approach results in considerable improvements in reward accumulation, sample efficiency and stability for the majority of the domain environments. Furthermore, the use of the proposed pretraining enables us to retain the performance gains when changing the in between layers activation function of the critic architecture.", "keywords": "reinforcement learning;Neural Tangent Kernels;unsupervised pretraining;neural value approximation", "primary_area": "", "supplementary_material": "", "author": "Spilios Evmorfos;Suat Gumussoy", "authorids": "~Spilios_Evmorfos1;~Suat_Gumussoy1", "gender": "M;M", "homepage": ";http://gumussoysuat.github.io", "dblp": ";70/98", "google_scholar": "ddQSMq4AAAAJ;yW-r4J8AAAAJ", "orcid": ";0000-0003-2064-3196", "linkedin": ";gumussoysuat/", "or_profile": "~Spilios_Evmorfos1;~Suat_Gumussoy1", "aff": "Rutgers University, New Brunswick;Siemens Corporate Research", "aff_domain": "rutgers.edu;siemens.com", "position": "PhD student;Senior Key Expert", "bibtex": "@misc{\nevmorfos2023unsupervised,\ntitle={Unsupervised Pretraining for Neural Value Approximation},\nauthor={Spilios Evmorfos and Suat Gumussoy},\nyear={2023},\nurl={https://openreview.net/forum?id=AgQ4GpzzRT}\n}", "github": "", "project": "", "reviewers": "iUkV;Yp6v;XRX1;1sio", "site": "https://openreview.net/forum?id=AgQ4GpzzRT", "pdf_size": 1653493, "recommendation": "3;3;5;5", "confidence": "4;4;4;2", "correctness": "4;2;2;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;1;2;3", "wc_summary_paper": "167;132;52;170", "wc_strength_and_weaknesses": "347;604;273;219", "wc_clarity_quality_novelty_and_reproducibility": "25;58;415;9", "wc_summary_review": "27;39;40;60", "wc_review": "566;833;780;458", "wc_reply_reviewers": "0;619;336;0", "wc_reply_authors": "1070;1545;2553;674", "reply_reviewers": "0;1;1;0", "reply_authors": "2;3;5;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 130.25, 47.58347927589995 ], "wc_strength_and_weaknesses_avg": [ 360.75, 147.6082230094245 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 126.75, 167.35646835422884 ], "wc_summary_review_avg": [ 41.5, 11.84271928232701 ], "wc_review_avg": [ 659.25, 153.26998238402717 ], "wc_reply_reviewers_avg": [ 238.75, 258.86808899514824 ], "wc_reply_authors_avg": [ 1460.5, 702.0984617558993 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": -0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NqOtY5vpBe4J:scholar.google.com/&scioq=Unsupervised+Pretraining+for+Neural+Value+Approximation&hl=en&as_sdt=0,7", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Rutgers University;Siemens AG", "aff_unique_dep": ";Corporate Research", "aff_unique_url": "https://www.rutgers.edu;https://www.siemens.com/research", "aff_unique_abbr": "Rutgers;Siemens", "aff_campus_unique_index": "0", "aff_campus_unique": "New Brunswick;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Germany" }, { "title": "Simplified State Space Layers for Sequence Modeling", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11401", "id": "Ai8Hw3AXqks", "poster": "/media/PosterPDFs/ICLR%202023/11401.png?t=1682525261.5068254", "openreview": "https://openreview.net/forum?id=Ai8Hw3AXqks", "slides": "https://iclr.cc/virtual/2023/poster/11401", "video": "https://iclr.cc/virtual/2023/poster/11401", "author_site": "Jimmy Smith, andrew warrington, Scott Linderman", "tldr": "We introduce a new state space sequence modeling layer, building on the recent S4 layer, that increases the state of the art on many long-range benchmark tasks.", "abstract": "Models using structured state space sequence (S4) layers have achieved state-of-the-art performance on long-range sequence modeling tasks. An S4 layer combines linear state space models (SSMs), the HiPPO framework, and deep learning to achieve high performance. We build on the design of the S4 layer and introduce a new state space layer, the S5 layer. Whereas an S4 layer uses many independent single-input, single-output SSMs, the S5 layer uses one multi-input, multi-output SSM. We establish a connection between S5 and S4, and use this to develop the initialization and parameterization used by the S5 model. The result is a state space layer that can leverage efficient and widely implemented parallel scans, allowing S5 to match the computational efficiency of S4, while also achieving state-of-the-art performance on several long-range sequence modeling tasks. S5 averages $87.4\\%$ on the long range arena benchmark, and $98.5\\%$ on the most difficult Path-X task.", "keywords": "sequence models;state space;S4;RNN;transformers;long range arena", "primary_area": "", "supplementary_material": "", "author": "Jimmy T.H. Smith;Andrew Warrington;Scott Linderman", "authorids": "~Jimmy_T.H._Smith1;~Andrew_Warrington2;~Scott_Linderman1", "gender": "M;M;M", "homepage": "https://jimmysmith1919.github.io/;;https://web.stanford.edu/~swl1/", "dblp": "305/3641;207/8575;142/2484", "google_scholar": "GC9Vv1wAAAAJ;https://scholar.google.co.uk/citations?hl=en;6mD3I24AAAAJ", "orcid": "0000-0003-2016-2480;;", "linkedin": "jimmy-t-h-smith-1679b122/;;", "or_profile": "~Jimmy_T.H._Smith1;~Andrew_Warrington2;~Scott_W_Linderman1", "aff": "NVIDIA;Stanford University;Stanford University", "aff_domain": "nvidia.com;stanford.edu;stanford.edu", "position": "Intern;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nsmith2023simplified,\ntitle={Simplified State Space Layers for Sequence Modeling},\nauthor={Jimmy T.H. Smith and Andrew Warrington and Scott Linderman},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Ai8Hw3AXqks}\n}", "github": "", "project": "", "reviewers": "Gzd2;w99z;2DkJ", "pdf_size": 1329684, "recommendation": "8;8;8", "confidence": "3;4;3", "correctness": "4;4;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;0;3", "wc_summary_paper": "158;50;53", "wc_strength_and_weaknesses": "220;224;56", "wc_clarity_quality_novelty_and_reproducibility": "87;54;6", "wc_summary_review": "27;127;6", "wc_review": "492;455;121", "wc_reply_reviewers": "26;0;0", "wc_reply_authors": "293;634;59", "reply_reviewers": "1;0;0", "reply_authors": "2;2;2", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 87.0, 50.21951811795888 ], "wc_strength_and_weaknesses_avg": [ 166.66666666666666, 78.27018731434225 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.0, 33.25657829663178 ], "wc_summary_review_avg": [ 53.333333333333336, 52.79099249766847 ], "wc_review_avg": [ 356.0, 166.85522666871023 ], "wc_reply_reviewers_avg": [ 8.666666666666666, 12.256517540566824 ], "wc_reply_authors_avg": [ 328.6666666666667, 236.0936725586313 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 588, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16338332501923598217&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=Ai8Hw3AXqks", "email": "nvidia.com;stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "NVIDIA;Stanford University", "aff_unique_dep": "NVIDIA Corporation;", "aff_unique_url": "https://www.nvidia.com;https://www.stanford.edu", "aff_unique_abbr": "NVIDIA;Stanford", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Contrastive Learning Can Find An Optimal Basis For Approximately View-Invariant Functions", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11388", "id": "AjC0KBjiMu", "poster": "", "openreview": "https://openreview.net/forum?id=AjC0KBjiMu", "slides": "https://iclr.cc/virtual/2023/poster/11388", "video": "https://iclr.cc/virtual/2023/poster/11388", "author_site": "Daniel D Johnson, Ayoub El Hanchi, Chris Maddison", "tldr": "We show that existing contrastive objectives approximate a \"positive-pair kernel\", and that applying Kernel PCA produces a representation that is provably optimal for supervised learning of functions that assign similar values to positive pairs.", "abstract": "Contrastive learning is a powerful framework for learning self-supervised representations that generalize well to downstream supervised tasks. We show that multiple existing contrastive learning methods can be reinterpeted as learning kernel functions that approximate a fixed *positive-pair kernel*. We then prove that a simple representation obtained by combining this kernel with PCA provably minimizes the worst-case approximation error of linear predictors, under a straightforward assumption that positive pairs have similar labels. Our analysis is based on a decomposition of the target function in terms of the eigenfunctions of a positive-pair Markov chain, and a surprising equivalence between these eigenfunctions and the output of Kernel PCA. We give generalization bounds for downstream linear prediction using our kernel PCA representation, and show empirically on a set of synthetic tasks that applying kernel PCA to contrastive learning models can indeed approximately recover the Markov chain eigenfunctions, although the accuracy depends on the kernel parameterization as well as on the augmentation strength.", "keywords": "contrastive learning;self-supervised learning;representation learning;kernel;kernel PCA;positive definite;eigenfunction;spectral clustering;invariance;Markov chain;minimax optimal", "primary_area": "", "supplementary_material": "", "author": "Daniel D. Johnson;Ayoub El Hanchi;Chris J. Maddison", "authorids": "~Daniel_D._Johnson1;~Ayoub_El_Hanchi1;~Chris_J._Maddison1", "gender": "M;M;M", "homepage": "http://www.danieldjohnson.com;https://www.cs.toronto.edu/~aelhan/;http://www.cs.toronto.edu/~cmaddis/", "dblp": "120/9868-1;;139/1388", "google_scholar": "44R4pgMAAAAJ;5ZzcGmgAAAAJ;https://scholar.google.ca/citations?user=WjCG3owAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Daniel_D._Johnson1;~Ayoub_El_Hanchi1;~Chris_J_Maddison1", "aff": "Google;University of Toronto;Google", "aff_domain": "google.com;toronto.edu;google.com", "position": "Researcher;PhD student;Researcher", "bibtex": "@inproceedings{\njohnson2023contrastive,\ntitle={Contrastive Learning Can Find An Optimal Basis For Approximately View-Invariant Functions},\nauthor={Daniel D. Johnson and Ayoub El Hanchi and Chris J. Maddison},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=AjC0KBjiMu}\n}", "github": "", "project": "", "reviewers": "Ry9G;76h8;7QsK", "pdf_size": 1447113, "recommendation": "5;6;8", "confidence": "3;3;4", "correctness": "3;3;4", "technical_novelty": "3;4;3", "empirical_novelty": "2;2;4", "wc_summary_paper": "120;132;152", "wc_strength_and_weaknesses": "68;188;110", "wc_clarity_quality_novelty_and_reproducibility": "37;82;99", "wc_summary_review": "115;99;85", "wc_review": "340;501;446", "wc_reply_reviewers": "0;43;0", "wc_reply_authors": "369;408;105", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_paper_avg": [ 134.66666666666666, 13.199326582148888 ], "wc_strength_and_weaknesses_avg": [ 122.0, 49.71921157862421 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 72.66666666666667, 26.157641755751268 ], "wc_summary_review_avg": [ 99.66666666666667, 12.256517540566824 ], "wc_review_avg": [ 429.0, 66.81816120387231 ], "wc_reply_reviewers_avg": [ 14.333333333333334, 20.27039439401436 ], "wc_reply_authors_avg": [ 294.0, 134.58826100369973 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.944911182523068, "corr_recommendation_correctness": 0.944911182523068, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2700447575378396174&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=AjC0KBjiMu", "email": "google.com;toronto.edu;google.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Google;University of Toronto", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.utoronto.ca", "aff_unique_abbr": "Google;U of T", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Canada" }, { "id": "Ajk3Bfo9AUW", "title": "Using Planning to Improve Semantic Parsing of Instructional Texts", "track": "main", "status": "Withdraw", "tldr": "Integrating symbolic planning information as a decoding constraint improves few-shot semantic parsing of instructional texts", "abstract": "We develop a method for few-shot semantic parsing of instructional texts. The system takes long-form instructional texts as input and produces sequences of actions in a formal language that enable execution of the instructions. This task poses unique challenges since input texts may contain long context dependencies and ambiguous and domain-specific language. Valid semantic parses also require sequences of steps that constitute an executable plan. We build on recent progress in semantic parsing by leveraging large language models to learn parsers from small amounts of training data. During decoding, our method employs planning methods and domain information to rank and correct candidate parses. To validate our method, we investigate recipe interpretation in two cooking domains. We present results for few-shot semantic parsing using leave-one-out cross-validation. We show that utilizing planning domain information improves the quality of generated plans. Through ablations we also explore the effects of our decoder design choices and model size.", "keywords": "nlp;semantic parsing;planning", "primary_area": "", "supplementary_material": "", "author": "Vanya Cohen;Ray Mooney", "authorids": "~Vanya_Cohen1;~Ray_Mooney1", "gender": ";M", "homepage": ";https://www.cs.utexas.edu/~mooney/", "dblp": ";m/RaymondJMooney.html", "google_scholar": ";p9RsPG4AAAAJ", "orcid": ";0000-0002-4504-0490", "linkedin": ";", "or_profile": "~Vanya_Cohen1;~Ray_Mooney1", "aff": ";University of Texas at Austin", "aff_domain": ";cs.utexas.edu", "position": ";Full Professor", "bibtex": "@misc{\ncohen2023using,\ntitle={Using Planning to Improve Semantic Parsing of Instructional Texts},\nauthor={Vanya Cohen and Ray Mooney},\nyear={2023},\nurl={https://openreview.net/forum?id=Ajk3Bfo9AUW}\n}", "github": "", "project": "", "reviewers": "HVec;hp8K;kQif;gtZ1", "site": "https://openreview.net/forum?id=Ajk3Bfo9AUW", "pdf_size": 229833, "recommendation": "1;3;3;5", "confidence": "4;3;5;3", "correctness": "3;2;2;3", "technical_novelty": "1;2;2;2", "empirical_novelty": "2;1;2;3", "wc_summary_paper": "55;107;97;112", "wc_strength_and_weaknesses": "395;141;93;259", "wc_clarity_quality_novelty_and_reproducibility": "109;47;82;22", "wc_summary_review": "100;137;162;47", "wc_review": "659;432;434;440", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 92.75, 22.454119889231908 ], "wc_strength_and_weaknesses_avg": [ 222.0, 116.72617529928752 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.0, 33.15870926317851 ], "wc_summary_review_avg": [ 111.5, 43.281058212571466 ], "wc_review_avg": [ 491.25, 96.89523982115944 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.4264014327112209, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17897357233243251863&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "NTFields: Neural Time Fields for Physics-Informed Robot Motion Planning", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11502", "id": "ApF0dmi1_9K", "poster": "/media/PosterPDFs/ICLR%202023/11502.png?t=1681931177.0008454", "openreview": "https://openreview.net/forum?id=ApF0dmi1_9K", "slides": "https://iclr.cc/virtual/2023/poster/11502", "video": "https://iclr.cc/virtual/2023/poster/11502", "author_site": "Ruiqi Ni, Ahmed Qureshi", "tldr": "A physics-informed neural time fields model for robot motion planning.", "abstract": "Neural Motion Planners (NMPs) have emerged as a promising tool for solving robot navigation tasks in complex environments. However, these methods often require expert data for learning, which limits their application to scenarios where data generation is time-consuming. Recent developments have also led to physics-informed deep neural models capable of representing complex dynamical Partial Differential Equations (PDEs). Inspired by these developments, we propose Neural Time Fields (NTFields) for robot motion planning in cluttered scenarios. Our framework represents a wave propagation model generating continuous arrival time to find path solutions informed by a nonlinear first-order PDE called Eikonal Equation. We evaluate our method in various cluttered 3D environments, including the Gibson dataset, and demonstrate its ability to solve motion planning problems for 4-DOF and 6-DOF robot manipulators where the traditional grid-based Eikonal planners often face the curse of dimensionality. Furthermore, the results show that our method exhibits high success rates and significantly lower computational times than the state-of-the-art methods, including NMPs that require training data from classical planners.", "keywords": "Robotics;Motion Planning;Neural Fields;Implicit Neural Representation;Physics Informed Deep Learning", "primary_area": "", "supplementary_material": "/attachment/ff843c8e24c9e90980c923c5c464f3637f87e4ef.zip", "author": "Ruiqi Ni;Ahmed H Qureshi", "authorids": "~Ruiqi_Ni1;~Ahmed_H_Qureshi1", "gender": ";M", "homepage": "https://ruiqini.github.io/;https://qureshiahmed.github.io/", "dblp": "224/0702;222/2796", "google_scholar": "-Bzyvw0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Ruiqi_Ni1;~Ahmed_Qureshi1", "aff": "Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nni2023ntfields,\ntitle={{NTF}ields: Neural Time Fields for Physics-Informed Robot Motion Planning},\nauthor={Ruiqi Ni and Ahmed H Qureshi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=ApF0dmi1_9K}\n}", "github": "", "project": "", "reviewers": "8txW;t8K7;9pD7;E33u", "pdf_size": 7137116, "recommendation": "6;8;8;8", "confidence": "3;4;4;4", "correctness": "4;3;4;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "120;35;37;59", "wc_strength_and_weaknesses": "200;164;425;69", "wc_clarity_quality_novelty_and_reproducibility": "104;13;81;254", "wc_summary_review": "26;34;87;109", "wc_review": "450;246;630;491", "wc_reply_reviewers": "33;25;146;49", "wc_reply_authors": "470;512;1603;689", "reply_reviewers": "1;1;1;2", "reply_authors": "2;3;3;3", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 62.75, 34.368408458932166 ], "wc_strength_and_weaknesses_avg": [ 214.5, 130.6148919534063 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 113.0, 88.01420339922414 ], "wc_summary_review_avg": [ 64.0, 34.99285641384538 ], "wc_review_avg": [ 454.25, 137.4997727270849 ], "wc_reply_reviewers_avg": [ 63.25, 48.55087537830806 ], "wc_reply_authors_avg": [ 818.5, 460.32732919087044 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14279864575788368824&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=ApF0dmi1_9K", "email": "purdue.edu;purdue.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "ApNK_ApJoec", "title": "Disentangling Writer and Character Styles for Handwriting Generation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Training machines for synthesizing diverse handwritings is an intriguing task. Recently, some RNN-based methods are proposed to generate stylized online Chinese characters. But these methods mainly focus on learning a person\u2019s overall writing style and hence neglect the detailed style inconsistencies between characters from the same writer. For example, one person\u2019s handwritings always appear an overall uniformity (e.g., character slant and aspect ratios) but there are still small style differences between local regions (e.g., stroke length and curvature) of characters. Motivated by this, in this paper, we propose to disentangle the style representations at both writer and character levels from individual handwritings. Specifically, we propose the style-disentangled transformer (SDT), equipped with two complementary contrastive objectives, to extract the overall writer-wise and detailed character-wise style representations, respectively, which boosts the generation quality of online handwritings. Extensive experiments on various language scripts verify the superiority of SDT. Particularly, we empirically find that the two learned style representations provide information with different frequency magnitudes, which demonstrates the necessity of separate style extraction.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Gang Dai;Yifan Zhang;Qingfeng Wang;Qing Du;Zhuliang Yu;Shuangping Huang", "authorids": "~Gang_Dai1;~Yifan_Zhang1;~Qingfeng_Wang1;~Qing_Du1;~Zhuliang_Yu1;~Shuangping_Huang1", "gender": "M;M;M;F;M;F", "homepage": "https://eedaigang.cn;https://sites.google.com/view/yifan-zhang/%E9%A6%96%E9%A1%B5;;;;", "dblp": "41/8779-2;57/4707-4;;91/8490.html;;26/7950", "google_scholar": "https://scholar.google.com.hk/citations?user=a2SwkisAAAAJ;https://scholar.google.com.hk/citations?user=zuYIUJEAAAAJ;;zIyNFOoAAAAJ;oAUB9cQAAAAJ;", "orcid": "0000-0001-8864-908X;;0000-0003-0780-3861;0000-0002-8411-6734;;", "linkedin": ";;;;;", "or_profile": "~Gang_Dai1;~Yifan_Zhang1;~Qingfeng_Wang1;~Qing_Du1;~Zhuliang_Yu1;~Shuangping_Huang1", "aff": "South China University of Technology;National University of Singapore;South China University of Technology;South China University of Technology;South China University of Technology;South China University of Technology", "aff_domain": "scut.edu.cn;nus.edu;scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn", "position": "PhD student;PhD student;MS student;Associate Professor;Full Professor;Full Professor", "bibtex": "@misc{\ndai2023disentangling,\ntitle={Disentangling Writer and Character Styles for Handwriting Generation},\nauthor={Gang Dai and Yifan Zhang and Qingfeng Wang and Qing Du and Zhuliang Yu and Shuangping Huang},\nyear={2023},\nurl={https://openreview.net/forum?id=ApNK_ApJoec}\n}", "github": "", "project": "", "reviewers": "mg7S;nW7d;SRZg;aSVJ", "site": "https://openreview.net/forum?id=ApNK_ApJoec", "pdf_size": 5439377, "recommendation": "3;5;5;6", "confidence": "4;4;4;5", "correctness": "3;4;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;3;0;3", "wc_summary_paper": "41;89;205;82", "wc_strength_and_weaknesses": "240;164;159;167", "wc_clarity_quality_novelty_and_reproducibility": "58;43;160;1", "wc_summary_review": "49;59;101;33", "wc_review": "388;355;625;283", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 104.25, 60.98924085443268 ], "wc_strength_and_weaknesses_avg": [ 182.5, 33.32041416309227 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.5, 58.42302628245134 ], "wc_summary_review_avg": [ 60.5, 25.154522456210533 ], "wc_review_avg": [ 412.75, 128.28946761133588 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.6622661785325219, "corr_recommendation_correctness": 0.13245323570650439, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14249460145112364513&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "South China University of Technology;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.scut.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "SCUT;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;Singapore" }, { "id": "ApV_xBR9UUC", "title": "ML-ViG: Multi-Label Image Recognition with Vision Graph Convolutional Network", "track": "main", "status": "Withdraw", "tldr": "The first fully graph convolutional model for the task of multi-label image recognition.", "abstract": "Multi-Label Image Recognition (MLIR) aims to predict multiple object labels in a single image. Graph representations have been used to model label correlation or visual relationships separately. However, the representations of label embeddings and visual features are not well aligned, which hinders effective representation learning and leads to inferior performance. In this work, we propose the first fully graph convolutional model, termed Multi-Label Vision Graph Convolutional Network (ML-ViG), for the task of MLIR. ML-ViG unifies the representation of visual features and label embeddings, enabling the graph structures to capture the (1) spatial relationship among visual region features, (2) semantic relationship among object labels, and (3) cross-level relationship between labels and regions. In order to effectively pass messages between visual features and labels, Multi-Label Graph Convolutional Network (MLG) module is proposed. \nML-ViG achieves state-of-the-art performance with significantly lower computational costs on MS-COCO, VOC2007, and VG-500 datasets. Codes and models will be released.", "keywords": "Multi-Label Image Recognition;Graph Convolutional Network", "primary_area": "", "supplementary_material": "", "author": "Ruijie Yao;Sheng Jin;Wentao Liu;Chen Qian;Ping Luo;Ji Wu", "authorids": "~Ruijie_Yao1;~Sheng_Jin1;~Wentao_Liu1;~Chen_Qian1;~Ping_Luo2;~Ji_Wu3", "gender": "M;M;M;M;M;", "homepage": ";https://jin-s13.github.io/;;;http://speech.tsinghua.edu.cn/en/;http://luoping.me/", "dblp": ";70/6780-7;30/3943-2;;91/4957-2;54/4989-2.html", "google_scholar": "GWPPV90AAAAJ;wrNd--oAAAAJ;KZn9NWEAAAAJ;AerkT0YAAAAJ;;https://scholar.google.com.hk/citations?hl=en", "orcid": ";0000-0001-5736-7434;;;0000-0001-6170-726X;0000-0002-6685-7950", "linkedin": ";;;;;", "or_profile": "~Ruijie_Yao1;~Sheng_Jin1;~Wentao_Liu1;~Chen_Qian1;~Ji_Wu3;~Luo_Ping2", "aff": "Tsinghua University;The University of Hong Kong;Sensetime;Tsinghua University;Tsinghua University;The University of Hong Kong", "aff_domain": "tsinghua.edu.cn;hku.hk;sensetime.com;mails.tsinghua.edu.cn;tsinghua.edu.cn;hku.hk", "position": "MS student;PhD student;Senior Researcher;PhD student;Full Professor;Assistant Professor", "bibtex": "@misc{\nyao2023mlvig,\ntitle={{ML}-ViG: Multi-Label Image Recognition with Vision Graph Convolutional Network},\nauthor={Ruijie Yao and Sheng Jin and Wentao Liu and Chen Qian and Ping Luo and Ji Wu},\nyear={2023},\nurl={https://openreview.net/forum?id=ApV_xBR9UUC}\n}", "github": "", "project": "", "reviewers": "XtfX;BvVV;Qa77;12VP", "site": "https://openreview.net/forum?id=ApV_xBR9UUC", "pdf_size": 3327840, "recommendation": "3;3;3;5", "confidence": "4;4;5;4", "correctness": "3;2;3;3", "technical_novelty": "2;2;1;2", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "98;48;104;76", "wc_strength_and_weaknesses": "177;207;265;113", "wc_clarity_quality_novelty_and_reproducibility": "37;15;96;10", "wc_summary_review": "52;15;77;7", "wc_review": "364;285;542;206", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 81.5, 21.97157254271983 ], "wc_strength_and_weaknesses_avg": [ 190.5, 54.79735395071554 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.5, 34.16504061171302 ], "wc_summary_review_avg": [ 37.75, 28.314086600135983 ], "wc_review_avg": [ 349.25, 124.51782000982831 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11909048182438010471&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;0;1", "aff_unique_norm": "Tsinghua University;University of Hong Kong;SenseTime", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.hku.hk;https://www.sensetime.com", "aff_unique_abbr": "THU;HKU;SenseTime", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "AqX3oSbzyQ1", "title": "Object-Centric Learning with Slot Mixture Models", "track": "main", "status": "Reject", "tldr": "We propose to use Gaussian Mixture Model to represent slots in object-centric tasks, which leads to a more expressive slots representation and the state-of-the-art results in the set property prediction task.", "abstract": "Object-centric architectures usually apply some differentiable module on the whole feature map to decompose it into sets of entities representations called slots. Some of these methods structurally resemble clustering algorithms, where the center of the cluster in latent space serves as slot representation. Slot Attention is an example of such a method as a learnable analog of the soft k-Means algorithm. In our work, we use the learnable clustering method based on Gaussian Mixture Model, unlike other approaches we represent slots not only as centers of clusters but we also use information about the distance between clusters and assigned vectors, which leads to more expressive slots representations. Our experiments demonstrate that using this approach instead of Slot Attention improves performance in different scenarios achieving state-of-the-art performance in the set property prediction task.", "keywords": "object-centric task;gaussian mixture model;slot attention", "primary_area": "", "supplementary_material": "/attachment/3c170e24fd2605ef9200eb1ba6e958ec8ab6134c.zip", "author": "Daniil Kirilenko;Alexey Kovalev;Aleksandr Panov", "authorids": "~Daniil_Kirilenko1;~Alexey_Kovalev3;~Aleksandr_Panov1", "gender": "M;M;M", "homepage": ";;http://grafft.github.io", "dblp": "304/3977;245/7675;177/9975", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;https://scholar.google.ru/citations?hl=ru", "orcid": "0000-0002-4835-9413;0000-0003-2180-0990;0000-0002-9747-3837", "linkedin": ";alexey-kovalev-831433286/;", "or_profile": "~Daniil_Kirilenko1;~Alexey_Kovalev3;~Aleksandr_Panov1", "aff": "Universita della Svizzera Italiana;Federal Research Center \u00abComputer Science and Control\u00bb of Russian Academy of Sciences;Federal Research Center \u00abComputer Science and Control\u00bb of Russian Academy of Sciences", "aff_domain": "usi.ch;frccsc.ru;frccsc.ru", "position": "PhD student;Researcher;Principal Researcher", "bibtex": "@misc{\nkirilenko2023objectcentric,\ntitle={Object-Centric Learning with Slot Mixture Models},\nauthor={Daniil Kirilenko and Alexey Kovalev and Aleksandr Panov},\nyear={2023},\nurl={https://openreview.net/forum?id=AqX3oSbzyQ1}\n}", "github": "", "project": "", "reviewers": "oeon;wLsE;LBbF;koSa", "site": "https://openreview.net/forum?id=AqX3oSbzyQ1", "pdf_size": 4791524, "recommendation": "3;3;5;5", "confidence": "4;4;4;3", "correctness": "2;3;3;3", "technical_novelty": "3;1;2;3", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "76;49;79;60", "wc_strength_and_weaknesses": "226;194;327;53", "wc_clarity_quality_novelty_and_reproducibility": "105;54;166;38", "wc_summary_review": "43;50;145;25", "wc_review": "450;347;717;176", "wc_reply_reviewers": "0;0;289;0", "wc_reply_authors": "646;513;740;69", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 66.0, 12.186057606953941 ], "wc_strength_and_weaknesses_avg": [ 200.0, 98.04335775563789 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 90.75, 49.996874902337645 ], "wc_summary_review_avg": [ 65.75, 46.65498365662558 ], "wc_review_avg": [ 422.5, 196.18167600466666 ], "wc_reply_reviewers_avg": [ 72.25, 125.14067084685138 ], "wc_reply_authors_avg": [ 492.0, 257.19156284761755 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6165928678608744211&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1", "aff_unique_norm": "Universita della Svizzera Italiana;Russian Academy of Sciences", "aff_unique_dep": ";Computer Science and Control", "aff_unique_url": "https://www.usi.ch;https://www.ras.ru", "aff_unique_abbr": "USI;RAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Switzerland;Russian Federation" }, { "id": "AqiB_Tqqc8z", "title": "Kuiper: Moderated Asynchronous Federated Learning on Heterogeneous Mobile Devices with Non-IID Data", "track": "main", "status": "Reject", "tldr": "We develop a moderated asynchronous algorithm for training on a video action recognition task on embedded devices with mobile GPUs. ", "abstract": "Federated learning allows multiple clients to jointly learn an ML model while keeping their data private. While synchronous federated learning (Sync-FL) requires the devices to share local gradients synchronously to provide better guarantees, it suffers from the problem of stragglers. This is the scenario where the faster clients have to wait for the slower ones, slowing the entire training process. Conventional techniques completely drop the updates from the stragglers and lose the opportunity to learn from the data they hold, which is especially important in a non-iid setting. Asynchronous learning (Async-FL) provides a potential solution to allow the clients to function at their own pace, which typically achieves faster convergence. Since edge devices have a low compute, it is hard to train a video action recognition task on them. We present Kuiper, a variant of Async-FL, to help heterogeneous edge devices with limited resources learn a heavy model on video-action-recognition tasks with data distributed non-IID. Kuiper introduces a novel aggregation scheme, which solves the straggler problem while considering the different data distribution at different clients. Kuiper shows a 11% faster convergence compared to Oort15 [OSDI-21], up to 12% and 9% improvement in test accuracy compared to FedBuff16 [AISTAT-22] and Oort [OSDI-21] on HMDB51, and 10% and 9% on UCF101.", "keywords": "Federated Learning;Edge devices;Non-IID;Video action recognition", "primary_area": "", "supplementary_material": "", "author": "Dipesh Tamboli;Pranjal Jain;Atul Sharma;Biplab Banerjee;Saurabh Bagchi;Somali Chaterji", "authorids": "~Dipesh_Tamboli1;~Pranjal_Jain1;~Atul_Sharma1;~Biplab_Banerjee1;~Saurabh_Bagchi1;~Somali_Chaterji1", "gender": "M;M;M;M;M;F", "homepage": "https://dipeshtamboli.github.io/;;https://sharm438.github.io/;https://biplab-banerjee.github.io;https://saurabhbagchi.us;https://schaterji.io", "dblp": "260/0079;;;87/9571;57/95.html;157/2828", "google_scholar": "https://scholar.google.co.in/citations?hl=en;;0gIenGAAAAAJ;IEcsMPAAAAAJ;https://scholar.google.com.tw/citations?user=3EfsOvYAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-2765-5763;;;0000-0001-8371-8138;;0000-0002-3651-6362", "linkedin": "dipesh-tamboli-76b20a147/;pranjal-jain-9482b2167/;atul-sharma-8b7296137;;;", "or_profile": "~Dipesh_Tamboli1;~Pranjal_Jain1;~Atul_Sharma1;~Biplab_Banerjee1;~Saurabh_Bagchi1;~Somali_Chaterji1", "aff": "Purdue University;University of California, Los Angeles;Purdue University;Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology;Purdue University;", "aff_domain": "purdue.edu;cs.ucla.edu;purdue.edu;iitb.ac.in;purdue.edu;", "position": "PhD student;MS student;PhD student;Associate Professor;Full Professor;", "bibtex": "@misc{\ntamboli2023kuiper,\ntitle={Kuiper: Moderated Asynchronous Federated Learning on Heterogeneous Mobile Devices with Non-{IID} Data},\nauthor={Dipesh Tamboli and Pranjal Jain and Atul Sharma and Biplab Banerjee and Saurabh Bagchi and Somali Chaterji},\nyear={2023},\nurl={https://openreview.net/forum?id=AqiB_Tqqc8z}\n}", "github": "", "project": "", "reviewers": "t9vz;x4EL;cvzu", "site": "https://openreview.net/forum?id=AqiB_Tqqc8z", "pdf_size": 2514715, "recommendation": "3;5;6", "confidence": "3;4;3", "correctness": "2;4;3", "technical_novelty": "1;2;2", "empirical_novelty": "1;3;3", "wc_summary_paper": "40;119;89", "wc_strength_and_weaknesses": "112;132;90", "wc_clarity_quality_novelty_and_reproducibility": "18;143;682", "wc_summary_review": "10;130;121", "wc_review": "180;524;982", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "588;487;853", "reply_reviewers": "0;0;0", "reply_authors": "3;1;3", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_paper_avg": [ 82.66666666666667, 32.5610537640019 ], "wc_strength_and_weaknesses_avg": [ 111.33333333333333, 17.15290710702481 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 281.0, 288.10530482215466 ], "wc_summary_review_avg": [ 87.0, 54.57105459856901 ], "wc_review_avg": [ 562.0, 328.5158545133959 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 642.6666666666666, 154.33801288801868 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.18898223650461363, "corr_recommendation_correctness": 0.6546536707079772, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11174846623708453590&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Purdue University;University of California, Los Angeles;Indian Institute of Technology, Bombay", "aff_unique_dep": ";;", "aff_unique_url": "https://www.purdue.edu;https://www.ucla.edu;https://www.iitb.ac.in", "aff_unique_abbr": "Purdue;UCLA;IIT Bombay", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Los Angeles;Bombay", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;India" }, { "id": "ArPM-xtsFrk", "title": "Gated Neural ODEs: Trainability, Expressivity and Interpretability", "track": "main", "status": "Reject", "tldr": "", "abstract": "Understanding how the dynamics in biological and artificial neural networks implement the computations required for a task is a salient open question in machine learning and neuroscience. In particular, computations requiring complex memory storage and retrieval pose significant challenge for these networks to implement or learn. Recently, a family of models described by neural ordinary differential equations (nODEs) has emerged as powerful dynamical neural network models capable of capturing complex dynamics. Here, we extend nODEs by endowing them with adaptive timescales using gating interactions. We refer to these as gated neural ODEs (gnODEs). Using a task that requires memory of continuous quantities, we demonstrate the inductive bias of the gnODEs to learn (approximate) continuous attractors. We further show how reduced-dimensional gnODEs retain their modeling power while greatly improving interpretability, even allowing explicit visualization of the structure of learned attractors. We introduce a novel measure of expressivity which probes the capacity of a neural network to generate complex trajectories. Using this measure, we explore how the phase-space dimension of the nODEs and the complexity of the function modeling the flow field contribute to expressivity. We see that a more complex function for modeling the flow field allows a lower-dimensional nODE to capture a given target dynamics. Finally, we demonstrate the benefit of gating in nODEs on several real-world tasks.", "keywords": "Computational Neuroscience;Dynamical Systems;Differential Equations;Neural ODEs;Gating;Interpretability", "primary_area": "", "supplementary_material": "", "author": "Timothy Doyeon Kim;Tankut Can;Kamesh Krishnamurthy", "authorids": "~Timothy_Doyeon_Kim1;~Tankut_Can1;~Kamesh_Krishnamurthy1", "gender": ";M;M", "homepage": "https://timkimd.github.io/;https://sites.google.com/view/tankut-can;https://kameshkk.github.io", "dblp": "352/2551;;", "google_scholar": "FTWUPusAAAAJ;H5MicWUAAAAJ;EV1i2WIAAAAJ", "orcid": ";0000-0002-0999-2355;0000-0002-2011-2590", "linkedin": ";;", "or_profile": "~Timothy_Doyeon_Kim1;~Tankut_Can1;~Kamesh_Krishnamurthy1", "aff": "Princeton University;Institute for Advanced Study, Princeton;Princeton University", "aff_domain": "princeton.edu;ias.edu;princeton.edu", "position": "PhD student;Researcher;Postdoc", "bibtex": "@misc{\nkim2023gated,\ntitle={Gated Neural {ODE}s: Trainability, Expressivity and Interpretability},\nauthor={Timothy Doyeon Kim and Tankut Can and Kamesh Krishnamurthy},\nyear={2023},\nurl={https://openreview.net/forum?id=ArPM-xtsFrk}\n}", "github": "", "project": "", "reviewers": "fH84;5GnR;XxBD;oDgk", "site": "https://openreview.net/forum?id=ArPM-xtsFrk", "pdf_size": 9610534, "recommendation": "3;5;6;8", "confidence": "3;4;4;4", "correctness": "2;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "21;39;137;56", "wc_strength_and_weaknesses": "221;133;129;294", "wc_clarity_quality_novelty_and_reproducibility": "3;44;21;37", "wc_summary_review": "10;50;84;42", "wc_review": "255;266;371;429", "wc_reply_reviewers": "0;398;0;0", "wc_reply_authors": "1608;1659;425;1037", "reply_reviewers": "0;2;0;0", "reply_authors": "4;4;1;2", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 63.25, 44.34171286723146 ], "wc_strength_and_weaknesses_avg": [ 194.25, 68.32779449096832 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.25, 15.801503093060482 ], "wc_summary_review_avg": [ 46.5, 26.320144376503713 ], "wc_review_avg": [ 330.25, 72.80582050907744 ], "wc_reply_reviewers_avg": [ 99.5, 172.33905535310328 ], "wc_reply_authors_avg": [ 1182.25, 500.7690959913561 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.8006407690254357, "corr_recommendation_correctness": 0.8006407690254357, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2f19cOP29PcJ:scholar.google.com/&scioq=Gated+Neural+ODEs:+Trainability,+Expressivity+and+Interpretability&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Princeton University;Institute for Advanced Study", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;https://ias.edu", "aff_unique_abbr": "Princeton;IAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Princeton", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "AsOLzq1S-p", "title": "Offline Policy Comparison with Confidence: Benchmarks and Baselines", "track": "main", "status": "Reject", "tldr": "We introduce a benchmark and baselines to study uncertainty estimation via policy comparisons in offline reinforcement learning datasets.", "abstract": "Decision makers often wish to use offline historical data to compare sequential-action policies at various world states. Importantly, computational tools should produce confidence values for such offline policy comparison (OPC) to account for statistical variance and limited data coverage. Nevertheless, there is little work that directly evaluates the quality of confidence values for OPC. In this work, we address this issue by creating benchmarks for OPC with Confidence (OPCC), derived by adding sets of policy comparison queries to datasets from offline reinforcement learning. In addition, we present an empirical evaluation of the \"risk versus coverage\" trade-off for a class of model-based baselines. In particular, the baselines learn ensembles of dynamics models, which are used in various ways to produce simulations for answering queries with confidence values. While our results suggest advantages for certain baseline variations, there appears to be significant room for improvement in future work.", "keywords": "offline reinforcement learning;reinforcement learning;benchmark;uncertainty;model based reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Anurag Koul;Mariano Phielipp;Alan Fern", "authorids": "~Anurag_Koul1;~Mariano_Phielipp2;~Alan_Fern1", "gender": "M;M;M", "homepage": "http://koulanurag.github.io/;https://www.intel.com/content/www/us/en/research/researchers/mariano-phielipp.html;http://www.eecs.oregonstate.edu/~afern", "dblp": "209/9666;23/4518;49/6764", "google_scholar": "K-Q0Xq4AAAAJ;YArRsvEAAAAJ;https://scholar.google.com.tw/citations?user=GaKxFrcAAAAJ", "orcid": ";;", "linkedin": "koulanurag/;mariano-phielipp-941624;", "or_profile": "~Anurag_Koul1;~Mariano_Phielipp2;~Alan_Fern1", "aff": "Microsoft;Intel Labs;", "aff_domain": "microsoft.com;intel.com;", "position": "Postdoc;Principal Researcher;", "bibtex": "@misc{\nkoul2023offline,\ntitle={Offline Policy Comparison with Confidence: Benchmarks and Baselines},\nauthor={Anurag Koul and Mariano Phielipp and Alan Fern},\nyear={2023},\nurl={https://openreview.net/forum?id=AsOLzq1S-p}\n}", "github": "", "project": "", "reviewers": "SRAe;ErjK;J8DJ;GVaQ", "site": "https://openreview.net/forum?id=AsOLzq1S-p", "pdf_size": 10351002, "recommendation": "3;5;6;6", "confidence": "4;2;3;4", "correctness": "3;3;3;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "2;3;4;2", "wc_summary_paper": "78;136;54;71", "wc_strength_and_weaknesses": "259;112;56;509", "wc_clarity_quality_novelty_and_reproducibility": "84;96;331;58", "wc_summary_review": "28;27;33;48", "wc_review": "449;371;474;686", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 84.75, 30.849432733844555 ], "wc_strength_and_weaknesses_avg": [ 234.0, 175.22699563708784 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 142.25, 109.83709528205851 ], "wc_summary_review_avg": [ 34.0, 8.396427811873332 ], "wc_review_avg": [ 495.0, 116.63404305776251 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.24618298195866545, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Y5PmxhtW03cJ:scholar.google.com/&scioq=Offline+Policy+Comparison+with+Confidence:+Benchmarks+and+Baselines&hl=en&as_sdt=0,23", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Microsoft;Intel", "aff_unique_dep": "Microsoft Corporation;Intel Labs", "aff_unique_url": "https://www.microsoft.com;https://www.intel.com", "aff_unique_abbr": "Microsoft;Intel", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "AsSdrNJ-DZG", "title": "Sweet Gradient Matters: Designing Consistent and Efficient Estimator for Zero-Shot Neural Architecture Search", "track": "main", "status": "Reject", "tldr": "We observe Sweet Gradient and propose Sweetimator, a consistent and efficient performance estimator in Zero-Shot Neural Architecture Search.", "abstract": "Neural architecture search (NAS) is one of the core technologies of AutoML for designing high-performance networks. Recently, Zero-Shot NAS has gained growing interest due to its training-free property and super-fast search speed. However, existing Zero-Shot estimators commonly suffer from low consistency, which limits the reliability and applicability. In this paper, we observe that Sweet Gradient of parameters, i.e., the absolute gradient values within a certain interval, brings higher consistency in network performance compared to the overall number of parameters. We further demonstrate a positive correlation between the network depth and the parameter ratio of sweet gradients in each layer. Based on the analysis, we propose a training-free method to find the Sweet Gradient interval and obtain an estimator, named Sweetimator. Experiments show that Sweetimator has superior consistency to existing Zero-Shot estimators on four benchmarks with eight search spaces. Moreover, Sweetimator achieves state-of-the-art performance on NAS-Bench-201 and DARTS search spaces.", "keywords": "Neural Architecture Search;Zero-Shot;Estimator;Sweet Gradient", "primary_area": "", "supplementary_material": "/attachment/f10fff2cf421a55d1cf5ecc1d6f15fe7d76c2a8f.zip", "author": "Longxing Yang;Yanxin Fu;Shun Lu;Zihao Sun;Jilin Mei;Wenxiao Zhao;Yu Hu", "authorids": "~Longxing_Yang1;~Yanxin_Fu1;~Shun_Lu1;~Zihao_Sun1;~Jilin_Mei1;~Wenxiao_Zhao1;~Yu_Hu5", "gender": "M;;M;M;M;M;", "homepage": ";;https://shunlu91.github.io/;;;http://lsc.amss.cas.cn/;", "dblp": "309/0621;;;;212/1446.html;;", "google_scholar": "FhdrIgcAAAAJ;;-zX83WMAAAAJ;oSmC9pMAAAAJ;;;", "orcid": ";;;0000-0003-0412-9760;;;", "linkedin": ";;;;;;", "or_profile": "~Longxing_Yang1;~Yanxin_Fu1;~Shun_Lu1;~Zihao_Sun1;~Jilin_Mei1;~Wenxiao_Zhao1;~Yu_Hu5", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;;Institute of Computing Technology, Chinese Academy of Sciences ;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences;", "aff_domain": "ict.ac.cn;;ucas.ac.cn;ict.ac.cn;ict.ac.cn;amss.ac.cn;", "position": "PhD student;;PhD student;PhD student;Assistant Professor;Full Professor;", "bibtex": "@misc{\nyang2023sweet,\ntitle={Sweet Gradient Matters: Designing Consistent and Efficient Estimator for Zero-Shot Neural Architecture Search},\nauthor={Longxing Yang and Yanxin Fu and Shun Lu and Zihao Sun and Jilin Mei and Wenxiao Zhao and Yu Hu},\nyear={2023},\nurl={https://openreview.net/forum?id=AsSdrNJ-DZG}\n}", "github": "", "project": "", "reviewers": "FxrW;tGTD;3NxU;g7DU", "site": "https://openreview.net/forum?id=AsSdrNJ-DZG", "pdf_size": 2799395, "recommendation": "5;5;5;5", "confidence": "4;4;4;4", "correctness": "2;3;3;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "200;56;77;72", "wc_strength_and_weaknesses": "578;124;135;107", "wc_clarity_quality_novelty_and_reproducibility": "36;9;24;11", "wc_summary_review": "37;14;30;15", "wc_review": "851;203;266;205", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 101.25, 57.53857401778393 ], "wc_strength_and_weaknesses_avg": [ 236.0, 197.70558919767544 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 20.0, 10.88577052853862 ], "wc_summary_review_avg": [ 24.0, 9.82344135219425 ], "wc_review_avg": [ 381.25, 272.389771283725 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2480235810846516670&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology", "aff_unique_url": "http://www.ict.ac.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "At0BdxvACds", "title": "Curiosity-Driven Unsupervised Data Collection for Offline Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "We propose a novel adaptive reachability-based method to improve the data collection process in offline reinforcement learning. ", "abstract": "In offline reinforcement learning (RL), while the majority of efforts are focusing on engineering sophisticated learning algorithms given a fixed dataset, very few works have been carried out to improve the dataset quality itself. More importantly, it is even challenging to collect a task-agnostic dataset such that the offline RL agent can learn multiple skills from it. In this paper, we propose a Curiosity-driven Unsupervised Data Collection (CUDC) method to improve the data collection process. Specifically, we quantify the agent's internal belief to estimate the probability of the k-step future states being reachable from the current states. Different from existing approaches that implicitly assume limited feature space with fixed environment steps, CUDC is capable of adapting the number of environment steps to explore. Thus, the feature representation can be substantially diversified with the dynamics information. With this adaptive reachability mechanism in place, the agent can navigate itself to collect higher-quality data with curiosity. Empirically, CUDC surpasses existing unsupervised methods in sample efficiency and learning performance in various downstream offline RL tasks of the DeepMind control suite.", "keywords": "Offline Reinforcement Learning;Data Collection;Reachability;Unsupervised Learning;Curiosity-Driven Learning", "primary_area": "", "supplementary_material": "", "author": "Chenyu Sun;Hangwei Qian;Chunyan Miao", "authorids": "~Chenyu_Sun1;~Hangwei_Qian1;~Chunyan_Miao1", "gender": "M;F;F", "homepage": ";http://hangwei12358.github.io/;http://www.ntulily.org/ascymiao/", "dblp": "312/4407.html;37/7950;m/ChunyanMiao", "google_scholar": "https://scholar.google.com.sg/citations?view_op=list_works;https://scholar.google.com.sg/citations?user=RO3fS_wAAAAJ;https://scholar.google.com.tw/citations?user=fmXGRJgAAAAJ", "orcid": ";0000-0003-4831-0748;0000-0002-0300-3448", "linkedin": ";qianhangwei/;", "or_profile": "~Chenyu_Sun1;~Hangwei_Qian1;~Chunyan_Miao1", "aff": "Nanyang Technological University;Centre For Frontier AI Research (CFAR), IHPC, A*STAR, Singapore;School of Computer Science and Engineering, Nanyang Technological University", "aff_domain": "e.ntu.edu.sg;cfar.a-star.edu.sg;scse.ntu.edu.sg", "position": "PhD student;Scientist;Full Professor", "bibtex": "@misc{\nsun2023curiositydriven,\ntitle={Curiosity-Driven Unsupervised Data Collection for Offline Reinforcement Learning},\nauthor={Chenyu Sun and Hangwei Qian and Chunyan Miao},\nyear={2023},\nurl={https://openreview.net/forum?id=At0BdxvACds}\n}", "github": "", "project": "", "reviewers": "bF1X;LA4m;S2Ud;F8ph", "site": "https://openreview.net/forum?id=At0BdxvACds", "pdf_size": 959738, "recommendation": "3;5;6;6", "confidence": "4;3;3;4", "correctness": "3;3;4;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "89;258;170;84", "wc_strength_and_weaknesses": "545;464;293;176", "wc_clarity_quality_novelty_and_reproducibility": "120;5;103;5", "wc_summary_review": "62;57;33;26", "wc_review": "816;784;599;291", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1710;1415;958;990", "reply_reviewers": "0;0;0;0", "reply_authors": "3;2;2;2", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 150.25, 70.95905509517443 ], "wc_strength_and_weaknesses_avg": [ 369.5, 144.0702953422391 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 58.25, 53.58812834947681 ], "wc_summary_review_avg": [ 44.5, 15.305227865013967 ], "wc_review_avg": [ 622.5, 208.54795611561386 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1268.25, 312.39268157240815 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.40824829046386296, "corr_recommendation_correctness": 0.4714045207910316, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;1;0", "aff_unique_norm": "Nanyang Technological University;A*STAR", "aff_unique_dep": ";Centre For Frontier AI Research (CFAR)", "aff_unique_url": "https://www.ntu.edu.sg;https://www.a-star.edu.sg", "aff_unique_abbr": "NTU;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "id": "AtWKqgziLF", "title": "Show and Write: Entity-aware Article Generation with Image Information", "track": "main", "status": "Reject", "tldr": "", "abstract": "Prior work for article generation has primarily focused on generating articles using a human-written prompt to provide topical context and metadata about the article. However, for many applications, such as generating news stories, these articles are also often paired with images and their captions or alt-text, which in turn are based on real-world events and may reference many different named entities that are difficult to be correctly recognized and predicted by language models. To address this shortcoming, this paper introduces an ENtity-aware article Generation method with Image iNformation, ENGIN, to incorporate an article's image information into language models. ENGIN represents articles that can be conditioned on metadata used by prior work and information such as captions and named entities extracted from images. Our key contribution is a novel Entity-aware mechanism to help our model recognize and predict the entity names in articles. We perform experiments on three public datasets, GoodNews, VisualNews, and WikiText. Quantitative results show that our approach improves generated article perplexity by 4-5 points over the base models. Qualitative results demonstrate the text generated by ENGIN is more consistent with embedded article images. We also perform article quality annotation experiments on the generated articles to validate that our model produces higher-quality articles. Finally, we investigate the effect ENGIN has on methods that automatically detect machine-generated articles.", "keywords": "image-to-text generation;language modeling;named entity recognition", "primary_area": "", "supplementary_material": "", "author": "Zhongping Zhang;Yiwen Gu;Bryan A. Plummer", "authorids": "~Zhongping_Zhang1;yiweng@bu.edu;~Bryan_A._Plummer1", "gender": "M;;", "homepage": "http://cs-people.bu.edu/zpzhang/;;", "dblp": "132/6203;;", "google_scholar": "6C20vTwAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zhongping_Zhang1;yiweng@bu.edu;~Bryan_A._Plummer1", "aff": "Boston University;;", "aff_domain": "bu.edu;;", "position": "PhD student;;", "bibtex": "@misc{\nzhang2023show,\ntitle={Show and Write: Entity-aware Article Generation with Image Information},\nauthor={Zhongping Zhang and Yiwen Gu and Bryan A. Plummer},\nyear={2023},\nurl={https://openreview.net/forum?id=AtWKqgziLF}\n}", "github": "", "project": "", "reviewers": "ewLW;2PvE;g2Tq;5iPM;HgLz;LeSY", "site": "https://openreview.net/forum?id=AtWKqgziLF", "pdf_size": 17361997, "recommendation": "3;5;5;6;6;6", "confidence": "5;4;3;4;3;4", "correctness": "3;3;3;4;3;3", "technical_novelty": "2;2;2;3;3;2", "empirical_novelty": "2;2;2;2;3;3", "wc_summary_paper": "316;68;66;65;31;126", "wc_strength_and_weaknesses": "691;391;110;119;108;172", "wc_clarity_quality_novelty_and_reproducibility": "136;69;19;26;16;12", "wc_summary_review": "107;18;19;24;18;137", "wc_review": "1250;546;214;234;173;447", "wc_reply_reviewers": "0;0;0;0;0;0", "wc_reply_authors": "1337;883;680;332;252;322", "reply_reviewers": "0;0;0;0;0;0", "reply_authors": "2;2;1;1;1;1", "recommendation_avg": [ 5.166666666666667, 1.0671873729054746 ], "confidence_avg": [ 3.8333333333333335, 0.6871842709362768 ], "correctness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 112.0, 95.42361692299588 ], "wc_strength_and_weaknesses_avg": [ 265.1666666666667, 214.47254421538952 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.333333333333336, 44.37216344611663 ], "wc_summary_review_avg": [ 53.833333333333336, 49.01502037357755 ], "wc_review_avg": [ 477.3333333333333, 370.5453938303496 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 634.3333333333334, 385.7454543550132 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.47140452079103173 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.6439209162167846, "corr_recommendation_correctness": 0.34921514788478913, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UngYt5upSXYJ:scholar.google.com/&scioq=Show+and+Write:+Entity-aware+Article+Generation+with+Image+Information&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Boston University", "aff_unique_dep": "", "aff_unique_url": "https://www.bu.edu", "aff_unique_abbr": "BU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "AtyO3IZYVEy", "title": "HagSeg: Hardness-adaptive Guidance for Semi-supervised Semantic Segmentation", "track": "main", "status": "Withdraw", "tldr": "A instance-specific and hardness-adaptive SSS framework", "abstract": "Recently, semi-supervised semantic segmentation has achieved promising performance with a small fraction of labelled data. However, most existing studies treat all unlabeled data equally and barely consider the differences and training difficulties among unlabeled instances. Differentiating unlabeled instances can promote instance-specific supervision to adapt to the model's evolution dynamically. In this paper, we emphasize the cruciality of instance differences and propose an instance-specific and hardness-adaptive guidance for semi-supervised semantic segmentation, named as HagSeg. Relying on the model's performance, HagSeg employs the class-weighted symmetric intersection-over-union to evaluate the hardness of each unlabeled instance and then supervises the training on unlabeled data in a hardness-adaptive manner. Specifically, HagSeg learns from unlabeled instances progressively by weighing their corresponding consistency losses based on the evaluated hardness. Meanwhile, HagSeg dynamically adjusts the augmentation for each instance such that the distortion degree of augmented instances is adapted to the model's generalization capability across the training course. Not integrating additional losses and training procedures, HagSeg can obtain remarkable performance gains against current state-of-the-art approaches on segmentation benchmarks under different semi-supervised partition protocols.", "keywords": "Semi-supervised;Semantic Segmentation;Hardness-adaptive guidance", "primary_area": "", "supplementary_material": "/attachment/9be5bc40bbc7b1e9516deac5aff784eb625afc72.zip", "author": "Zhen Zhao;Tianyi Wu;Jimin Pi;Sifan Long;Luping Zhou;Jingdong Wang", "authorids": "~Zhen_Zhao4;~Tianyi_Wu2;~Jimin_Pi1;~Sifan_Long1;~Luping_Zhou3;~Jingdong_Wang1", "gender": "M;;M;F;M;M", "homepage": "http://zhaozhen.me/;http://jiminpi.github.io;;https://sites.google.com/view/lupingzhou;https://jingdongwang2017.github.io/;https://scholar.google.com/citations?user=FHdkcWsAAAAJ&hl=en", "dblp": "29/1773-1;;;45/933;49/3441;", "google_scholar": "7mpuhO8AAAAJ;;;https://scholar.google.com.au/citations?user=SgofT2MAAAAJ;z5SPCmgAAAAJ;", "orcid": "0000-0002-0796-4078;;0000-0001-7060-1133;;0000-0002-4888-4445;", "linkedin": ";;;;;", "or_profile": "~Zhen_Zhao4;~Jimin_Pi1;~Sifan_Long1;~Luping_Zhou3;~Jingdong_Wang1;~Wu_Tianyi1", "aff": "University of Sydney;Google Deepmind;Jilin University;University of Sydney;Baidu;", "aff_domain": "usyd.edu.au;google.com;jlu.edu.cn;sydney.edu.au;baidu.com;", "position": "PhD student;Research Engineer;PhD student;Associate Professor;Chief Scientist for Computer Vision;", "bibtex": "@misc{\nzhao2023hagseg,\ntitle={HagSeg: Hardness-adaptive Guidance for Semi-supervised Semantic Segmentation},\nauthor={Zhen Zhao and Tianyi Wu and Jimin Pi and Sifan Long and Luping Zhou and Jingdong Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=AtyO3IZYVEy}\n}", "github": "", "project": "", "reviewers": "GjUS;vpgR;Cigi", "site": "https://openreview.net/forum?id=AtyO3IZYVEy", "pdf_size": 1118194, "recommendation": "3;3;5", "confidence": "5;5;5", "correctness": "4;1;3", "technical_novelty": "1;2;2", "empirical_novelty": "0;3;2", "wc_summary_paper": "84;62;132", "wc_strength_and_weaknesses": "258;219;349", "wc_clarity_quality_novelty_and_reproducibility": "20;128;16", "wc_summary_review": "37;119;7", "wc_review": "399;528;504", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "764;508;377", "reply_reviewers": "0;0;0", "reply_authors": "2;2;3", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 5.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 1.247219128924647 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 92.66666666666667, 29.227080289043965 ], "wc_strength_and_weaknesses_avg": [ 275.3333333333333, 54.46915538989832 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.666666666666664, 51.88020388891659 ], "wc_summary_review_avg": [ 54.333333333333336, 47.33802793620462 ], "wc_review_avg": [ 477.0, 56.01785429664367 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 549.6666666666666, 160.7157601343302 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.18898223650461354, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ZOLmblgU09IJ:scholar.google.com/&scioq=HagSeg:+Hardness-adaptive+Guidance+for+Semi-supervised+Semantic+Segmentation&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "University of Sydney;DeepMind;Jilin University;Baidu", "aff_unique_dep": ";DeepMind;;Baidu, Inc.", "aff_unique_url": "https://www.sydney.edu.au;https://deepmind.com;http://www.jlu.edu.cn;https://www.baidu.com", "aff_unique_abbr": "USYD;DeepMind;JLU;Baidu", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;2", "aff_country_unique": "Australia;United Kingdom;China" }, { "title": "A theoretical study of inductive biases in contrastive learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10770", "id": "AuEgNlEAmed", "poster": "/media/PosterPDFs/ICLR%202023/10770.png?t=1683012353.278001", "openreview": "https://openreview.net/forum?id=AuEgNlEAmed", "slides": "https://iclr.cc/virtual/2023/poster/10770", "video": "https://iclr.cc/virtual/2023/poster/10770", "author_site": "Jeff Z. HaoChen, Tengyu Ma", "tldr": "We provide the first theoretical analysis of self-supervised learning that incorporates the effect of inductive biases of model classes.", "abstract": "Understanding self-supervised learning is important but challenging. Previous theoretical works study the role of pretraining losses, and view neural networks as general black boxes. However, the recent work of [Saunshi et al.] argues that the model architecture --- a component largely ignored by previous works --- also has significant influences on the downstream performance of self-supervised learning. In this work, we provide the first theoretical analysis of self-supervised learning that incorporates the effect of inductive biases originating from the model class. In particular, we focus on contrastive learning --- a popular self-supervised learning method that is widely used in the vision domain. We show that when the model has limited capacity, contrastive representations would recover certain special clustering structures that are compatible with the model architecture, but ignore many other clustering structures in the data distribution. As a result, our theory can capture the more realistic setting where contrastive representations have much lower dimensionality than the number of clusters in the data distribution. We instantiate our theory on several synthetic data distributions, and provide empirical evidence to support the theory.", "keywords": "theory of self-supervised learning;theory of contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Jeff Z. HaoChen;Tengyu Ma", "authorids": "~Jeff_Z._HaoChen1;~Tengyu_Ma1", "gender": ";M", "homepage": "https://cs.stanford.edu/~jhaochen/;http://ai.stanford.edu/~tengyuma/", "dblp": "267/5319;54/9061", "google_scholar": "SWQxcO8AAAAJ;i38QlUwAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Jeff_Z._HaoChen1;~Tengyu_Ma1", "aff": "Stanford University;Facebook AI Research", "aff_domain": "stanford.edu;fb.com", "position": "PhD student;Visiting Scientist", "bibtex": "@inproceedings{\nhaochen2023a,\ntitle={A theoretical study of inductive biases in contrastive learning},\nauthor={Jeff Z. HaoChen and Tengyu Ma},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=AuEgNlEAmed}\n}", "github": "", "project": "", "reviewers": "HMnY;XWTN;1bvx;tjye", "pdf_size": 496129, "recommendation": "6;6;6;6", "confidence": "4;3;4;3", "correctness": "3;4;3;2", "technical_novelty": "3;3;3;3", "empirical_novelty": "0;2;2;2", "wc_summary_paper": "85;53;192;97", "wc_strength_and_weaknesses": "376;213;209;272", "wc_clarity_quality_novelty_and_reproducibility": "42;34;247;43", "wc_summary_review": "25;61;125;27", "wc_review": "528;361;773;439", "wc_reply_reviewers": "59;0;277;0", "wc_reply_authors": "546;277;1227;518", "reply_reviewers": "1;0;2;0", "reply_authors": "1;1;3;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 106.75, 51.78018443381599 ], "wc_strength_and_weaknesses_avg": [ 267.5, 67.4258852370512 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 91.5, 89.84570106577165 ], "wc_summary_review_avg": [ 59.5, 40.432041749088064 ], "wc_review_avg": [ 525.25, 154.76171199621695 ], "wc_reply_reviewers_avg": [ 84.0, 114.00219296136369 ], "wc_reply_authors_avg": [ 642.0, 353.5682395238577 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11763191824283607892&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=AuEgNlEAmed", "email": "stanford.edu;fb.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Stanford University;Meta", "aff_unique_dep": ";Facebook AI Research", "aff_unique_url": "https://www.stanford.edu;https://research.facebook.com", "aff_unique_abbr": "Stanford;FAIR", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "AvSIqjCWVId", "title": "Abstract Visual Reasoning by Self-supervised Contrastive Learning", "track": "main", "status": "Reject", "tldr": "Demonstration of an unsupervised model to solve analogy reasoning in Raven\u2019s Progressive Matrices task and its variant. ", "abstract": "Neuro-symbolic models of artificial intelligence (AI) have been recently developed to perform tasks involving abstract visual reasoning that is a hallmark of human intelligence but remains challenging for deep neural network methods. However, most of the current neuro-symbolic models also rely on supervised learning and auxiliary annotations, different from human cognitive processes that are much dependent on the general cognitive abilities of entity and rule recognitions, rather than learning how to solve the specific tasks from examples. In this work, we propose a neuro-symbolic model by self-supervised contrastive learning (NS-SSCL) with unique and invariant representations of entities and rules in the perception and reasoning modules, respectively, to solve Raven\u2019s Progressive Matrices (RPMs) and its variant, a typical type of visual reasoning task used to test human intelligence. The perception module parses each object into invariant representations of attributes. The reasoning module grounds the representations of object attributes to form the latent rule representations also through SSCL. Further, the relationships between the neural representations of object attributes and symbols used for rule reasoning are coherently mapped. Finally, the scene generation engine aggregates all attribute and rule representation distributions to produce a probabilistic representation of the target. NS-SSCL obtains state-of-the-art performance in unsupervised models to solve the RAVEN and V-PROM benchmarks, even better than most of the supervised models. The success of the proposed model suggests that construction of general cognitive abilities like humans may render the AI algorithms to solve complex tasks involving higher-level cognition such as abstract reasoning in a human-like manner.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Weiwen Lu;Aihua Yin;Sidong Wang;Hongzhi You;Ru-Yuan Zhang;Dahui Wang;Zonglei Zhen;Xiaohong Wan", "authorids": "~Weiwen_Lu1;~Aihua_Yin1;~Sidong_Wang1;~Hongzhi_You1;~Ru-Yuan_Zhang1;~Dahui_Wang1;~Zonglei_Zhen1;~Xiaohong_Wan1", "gender": ";;M;M;M;M;M;M", "homepage": "https://github.com/Lu-WW;;;http://www.visionsenselab.com/;https://ruyuanzhang.github.io;;http://bnupsych.bnu.edu.cn/tabid/324/ArticleID/6360/frtid/307/Default.aspx;", "dblp": ";;;;296/9298;14/4994;;", "google_scholar": ";;;-YrT4k0AAAAJ;kERYZ90AAAAJ;;;", "orcid": ";0000-0001-5769-614X;0000-0001-9011-3778;;0000-0002-0654-715X;;0000-0002-6748-6434;my-orcid?orcid=0000-0002-6472-1435", "linkedin": ";;;;;;;", "or_profile": "~Weiwen_Lu1;~Aihua_Yin1;~Sidong_Wang1;~Hongzhi_You1;~Ru-Yuan_Zhang1;~Dahui_Wang1;~Zonglei_Zhen1;~Xiaohong_Wan1", "aff": ";;Beijing Normal University;University of Electronic Science and Technology of China;Shanghai Jiaotong University;Beijing Normal University;Beijing Normal University;Beijing Normal University", "aff_domain": ";;bnu.edu.cn;uestc.edu.cn;sjtu.edu.cn;bnu.edu.cn;bnu.edu.cn;bnu.edu.cn", "position": ";;PhD student;Associate Professor;Associate Professor;Full Professor;Associate Professor;Full Professor", "bibtex": "@misc{\nlu2023abstract,\ntitle={Abstract Visual Reasoning by Self-supervised Contrastive Learning},\nauthor={Weiwen Lu and Aihua Yin and Sidong Wang and Hongzhi You and Ru-Yuan Zhang and Dahui Wang and Zonglei Zhen and Xiaohong Wan},\nyear={2023},\nurl={https://openreview.net/forum?id=AvSIqjCWVId}\n}", "github": "", "project": "", "reviewers": "QQJu;mdUd;Naed;FVX7", "site": "https://openreview.net/forum?id=AvSIqjCWVId", "pdf_size": 1717149, "recommendation": "3;3;3;3", "confidence": "3;5;4;4", "correctness": "2;2;2;2", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "134;100;107;43", "wc_strength_and_weaknesses": "697;197;530;708", "wc_clarity_quality_novelty_and_reproducibility": "158;11;44;58", "wc_summary_review": "43;19;6;48", "wc_review": "1032;327;687;857", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 96.0, 33.12853754695489 ], "wc_strength_and_weaknesses_avg": [ 533.0, 206.4134201063487 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 67.75, 54.82871054475019 ], "wc_summary_review_avg": [ 29.0, 17.219175357722563 ], "wc_review_avg": [ 725.75, 260.5373053902262 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:N8VvUl6_nTUJ:scholar.google.com/&scioq=Abstract+Visual+Reasoning+by+Self-supervised+Contrastive+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;0;0", "aff_unique_norm": "Beijing Normal University;University of Electronic Science and Technology of China;Shanghai Jiao Tong University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bnu.edu.cn;https://www.uestc.edu.cn;https://www.sjtu.edu.cn", "aff_unique_abbr": "BNU;UESTC;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "AvwF6IvT8et", "title": "Deep reinforced active learning for multi-class image classification", "track": "main", "status": "Reject", "tldr": "", "abstract": "High accuracy medical image classification can be limited by the costs of acquiring more data as well as the time and expertise needed to label existing images. In this paper, we apply active learning to medical image classification, a method which aims to maximise model performance on a minimal subset from a larger pool of data. We present a new active learning framework, based on deep reinforcement learning, to learn an active learning query strategy to label images based on predictions from a convolutional neural network. Our framework modifies the deep-Q network formulation, allowing us to pick data based additionally on geometric arguments in the latent space of the classifier, allowing for high accuracy multi-class classification in a batch-based active learning setting, enabling the agent to label datapoints that are both diverse and about which it is most uncertain. We apply our framework to two medical imaging datasets and compare with standard query strategies as well as the most recent reinforcement learning based active learning approach for image classification.", "keywords": "Reinforcement learning;active learning;image classification", "primary_area": "", "supplementary_material": "", "author": "Emma Slade;Kim Branson", "authorids": "~Emma_Slade1;~Kim_Branson1", "gender": "F;M", "homepage": ";https://gsk.ai", "dblp": ";45/2850", "google_scholar": "https://scholar.google.co.uk/citations?user=hrCtIbsAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Emma_Slade1;~Kim_Branson1", "aff": "GlaxoSmithKline;GSK plc", "aff_domain": "gsk.com;gsk.com", "position": "Researcher;Principal Researcher", "bibtex": "@misc{\nslade2023deep,\ntitle={Deep reinforced active learning for multi-class image classification},\nauthor={Emma Slade and Kim Branson},\nyear={2023},\nurl={https://openreview.net/forum?id=AvwF6IvT8et}\n}", "github": "", "project": "", "reviewers": "CZBN;6vUj;tpMj", "site": "https://openreview.net/forum?id=AvwF6IvT8et", "pdf_size": 326786, "recommendation": "3;3;3", "confidence": "5;4;4", "correctness": "2;2;2", "technical_novelty": "2;2;2", "empirical_novelty": "1;2;3", "wc_summary_paper": "62;49;88", "wc_strength_and_weaknesses": "1085;20;299", "wc_clarity_quality_novelty_and_reproducibility": "160;261;98", "wc_summary_review": "47;43;29", "wc_review": "1354;373;514", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 66.33333333333333, 16.21384867602041 ], "wc_strength_and_weaknesses_avg": [ 468.0, 450.9079728725142 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 173.0, 67.17638473948018 ], "wc_summary_review_avg": [ 39.666666666666664, 7.717224601860151 ], "wc_review_avg": [ 747.0, 433.05657828971954 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1402907589419321701&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "GlaxoSmithKline;GlaxoSmithKline plc", "aff_unique_dep": ";", "aff_unique_url": "https://www.gsk.com;https://www.gsk.com", "aff_unique_abbr": "GSK;GSK", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Q-Pensieve: Boosting Sample Efficiency of Multi-Objective RL Through Memory Sharing of Q-Snapshots", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11674", "id": "AwWaBXLIJE", "poster": "/media/PosterPDFs/ICLR%202023/11674.png?t=1682572574.3604517", "openreview": "https://openreview.net/forum?id=AwWaBXLIJE", "slides": "https://iclr.cc/virtual/2023/poster/11674", "video": "https://iclr.cc/virtual/2023/poster/11674", "author_site": "Wei Hung, Bo Kai Huang, Ping-Chun Hsieh, Xi Liu", "tldr": "We boost the sample efficiency of multi-objective RL by using Q snapshots ", "abstract": "Many real-world continuous control problems are in the dilemma of weighing the pros and cons, multi-objective reinforcement learning (MORL) serves as a generic framework of learning control policies for different preferences over objectives. However, the existing MORL methods either rely on multiple passes of explicit search for finding the Pareto front and therefore are not sample-efficient, or utilizes a shared policy network for coarse knowledge sharing among policies. To boost the sample efficiency of MORL, we propose $Q$-Pensieve, a policy improvement scheme that stores a collection of $Q$-snapshots to jointly determine the policy update direction and thereby enables data sharing at the policy level. We show that $Q$-Pensieve can be naturally integrated with soft policy iteration with convergence guarantee. To substantiate this concept, we propose the technique of $Q$ replay buffer, which stores the learned $Q$-networks from the past iterations, and arrive at a practical actor-critic implementation. Through extensive experiments and an ablation study, we demonstrate that with much fewer samples, the proposed algorithm can outperform the benchmark MORL methods on a variety of MORL benchmark tasks.", "keywords": "Multi-objective reinforcement learning;sample efficiency", "primary_area": "", "supplementary_material": "/attachment/f7e628f72586460d5d5fbe1c044fa3cda57a99d6.zip", "author": "Wei Hung;Bo Kai Huang;Ping-Chun Hsieh;Xi Liu", "authorids": "~Wei_Hung1;~Bo_Kai_Huang2;~Ping-Chun_Hsieh1;~Xi_Liu1", "gender": "M;M;M;M", "homepage": "https://github.com/ndsl7109256;https://pinghsieh.github.io/;;https://github.com/redway1225", "dblp": ";163/7352;https://dblp.uni-trier.de/pers/hd/l/Liu_0011:Xi;", "google_scholar": ";ix38JgoAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;;", "linkedin": "%E6%9F%8F%E6%84%B7-%E9%BB%83-761175197/;;xi-liu-2b0285173/;", "or_profile": "~Bo_Kai_Huang2;~Ping-Chun_Hsieh1;~Xi_Liu1;~\u5049_\u6d2a1", "aff": ";National Yang Ming Chiao Tung University;Meta AI;National Yang Ming Chiao Tung University", "aff_domain": ";nycu.edu.tw;fb.com;cs.nycu.edu.tw", "position": ";Assistant Professor;Research Scientist;PhD student", "bibtex": "@inproceedings{\nhung2023qpensieve,\ntitle={Q-Pensieve: Boosting Sample Efficiency of Multi-Objective {RL} Through Memory Sharing of Q-Snapshots},\nauthor={Wei Hung and Bo Kai Huang and Ping-Chun Hsieh and Xi Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=AwWaBXLIJE}\n}", "github": "", "project": "", "reviewers": "g5HG;ecaS;jdUh;VKcR", "pdf_size": 2796554, "recommendation": "5;6;6;8", "confidence": "3;4;3;4", "correctness": "3;3;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "56;90;52;78", "wc_strength_and_weaknesses": "115;90;100;441", "wc_clarity_quality_novelty_and_reproducibility": "21;185;81;64", "wc_summary_review": "37;27;103;29", "wc_review": "229;392;336;612", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1169;346;396;987", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;3", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 69.0, 15.652475842498529 ], "wc_strength_and_weaknesses_avg": [ 186.5, 147.2047893242608 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 87.75, 60.2551864987571 ], "wc_summary_review_avg": [ 49.0, 31.400636936215164 ], "wc_review_avg": [ 392.25, 139.7361352692996 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 724.5, 359.7433112651297 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.6882472016116854, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6072589676460986384&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=AwWaBXLIJE", "email": ";nycu.edu.tw;fb.com;cs.nycu.edu.tw", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "National Yang Ming Chiao Tung University;Meta", "aff_unique_dep": ";Meta AI", "aff_unique_url": "https://www.nycu.edu.tw;https://meta.com", "aff_unique_abbr": "NYCU;Meta", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Taiwan;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "id": "AykEgQNPJEK", "title": "Score-Based Graph Generative Modeling with Self-Guided Latent Diffusion", "track": "main", "status": "Reject", "tldr": "We propose a novel and unified latent-based framework Score-Based Graph Generative Model powered by Self-Guided Latent Diffusion to promote graph generation in different scenarios.", "abstract": "Graph generation is a fundamental task in machine learning, and it is critical for numerous real-world applications, biomedical discovery and social science. Existing diffusion-based graph generation methods have two limitations: (i) they conduct diffusion process directly in complex graph space (i.e., node feature, adjacency matrix, or both), resulting in hard optimization with network evaluations; (ii) they usually neglect to sufficiently cover the whole distribution of target unlabeled graph set and thus fail to make semantic controllable generation. In this paper, we first propose a unified latent-based graph generative framework, Score-Based Graph Generative Model (SGGM), powered by Self-Guided Latent Diffusion (SLD) to address both limitations. Specifically, we pretrain a variational graph autoencoder to map raw graph of high-dimensional discrete space to low-dimensional topology-injected latent space, and apply score-based generative model there, yielding a smoother, faster and more expressive graph generation procedure. To sufficiently cover the whole semantical distribution of unlabeled graph set, we propose SLD to make controllable self-guidance of the sample generation with gradients from the designed assigning function towards the hierarchical pseudo label, produced by iteratively clustering on the latent embeddings. In addition, we conduct periodic update on the pseudo label in training process to achieve mutual adaptation between self-guidance and score-based generation. Experiments show that our SGGM powered by SLD outperforms previous graph generation baselines on both generic and molecular graph datasets, demonstrating the generality and extensibility along with further theoretical proofs.", "keywords": "Generative Model;Diffusion Model;Graph Generation", "primary_area": "", "supplementary_material": "/attachment/0e3bc91e32710f6e2a5d1137e926e4dcfd6751f9.zip", "author": "Ling Yang;Zhilong Zhang;Wentao Zhang;Shenda Hong", "authorids": "~Ling_Yang1;~Zhilong_Zhang1;~Wentao_Zhang1;~Shenda_Hong1", "gender": "M;;;", "homepage": "https://yangling0818.github.io/;;;", "dblp": "01/24-6.html;06/8799;;", "google_scholar": "https://scholar.google.com.hk/citations?user=sIKujqAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;", "orcid": "0000-0003-1905-8053;0009-0009-9307-8440;;", "linkedin": ";;;", "or_profile": "~Ling_Yang1;~Zhilong_Zhang1;~Wentao_Zhang1;~Shenda_Hong1", "aff": "Peking University;Korea Advanced Institute of Science & Technology;;", "aff_domain": "pku.edu.cn;kaist.ac.kr;;", "position": "PhD student;Intern;;", "bibtex": "@misc{\nyang2023scorebased,\ntitle={Score-Based Graph Generative Modeling with Self-Guided Latent Diffusion},\nauthor={Ling Yang and Zhilong Zhang and Wentao Zhang and Shenda Hong},\nyear={2023},\nurl={https://openreview.net/forum?id=AykEgQNPJEK}\n}", "github": "", "project": "", "reviewers": "XiBv;hZ5a;MazB;ASDF", "site": "https://openreview.net/forum?id=AykEgQNPJEK", "pdf_size": 1019939, "recommendation": "3;3;5;5", "confidence": "4;5;4;4", "correctness": "2;2;3;3", "technical_novelty": "2;3;4;2", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "68;33;168;40", "wc_strength_and_weaknesses": "330;127;345;277", "wc_clarity_quality_novelty_and_reproducibility": "77;176;99;10", "wc_summary_review": "40;40;37;53", "wc_review": "515;376;649;380", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 77.25, 54.00636536557519 ], "wc_strength_and_weaknesses_avg": [ 269.75, 86.20143560289469 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 90.5, 59.25580140374443 ], "wc_summary_review_avg": [ 42.5, 6.18465843842649 ], "wc_review_avg": [ 480.0, 112.47444154117859 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7565363633778834817&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Peking University;Korea Advanced Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.kaist.ac.kr", "aff_unique_abbr": "Peking U;KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;South Korea" }, { "id": "Azw-0kVtsX", "title": "One-Vs-All AUC Maximization: an effective solution to the low-resource named entity recognition problem", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Named entity recognition (NER), a sequence labelling/token classification task, has been traditionally considered a multi-class classification problem, the learning objective of which is to either optimise the multi-class cross entropy loss (CE) or train a conditional random field (CRF). However, these standard learning objectives, though scalable to large NER datasets and used in state-of-the-art work, largely ignore the problem of imbalanced label distributions that is inherent in all NER corpora. We show this leads to degraded performance in low-resource settings. While reformulating this standard multi-class labelling problem as a one-vs-all (OVA) learning problem, we propose to optimise the NER model with an AUC-based alternative loss function that is more capable of handling imbalanced datasets. As OVA often leads to a higher training time compared to the standard multi-class setting, we also develop two training strategies, one trains together the labels that share similar linguistic characteristics, and another employs a meta-learning approach to speed convergence. In order to motivate some of our experiments and better interpret the results, we also develop a Bayesian theory for what is the AUC function during learning. Experimental results under low-resource NER settings from benchmark corpora show that our methods can achieve consistently better performance compared with the learning objectives commonly used in NER. We also give evidence that our methods are robust and agnostic to the underlying NER embeddings, models, domains, and label distributions. The code to replicate this work will be released upon the publication of this paper.", "keywords": "NLP;NER;Low-Resource;Imbalanced Distribution;AUC Maximization;One-Vs-All", "primary_area": "", "supplementary_material": "/attachment/5280dcba11133d127a3a4f0b48fc161ce592865b.zip", "author": "Ngoc Dang Nguyen;Wei Tan;Lan Du;Wray Buntine", "authorids": "~Ngoc_Dang_Nguyen1;~Wei_Tan2;~Lan_Du1;~Wray_Buntine1", "gender": "M;M;M;M", "homepage": ";https://davidtw999.github.io/;https://research.monash.edu/en/persons/lan-du;https://bayesian-models.org/", "dblp": "333/0601;73/6520;98/1504-2;72/3885", "google_scholar": "https://scholar.google.com.au/citations?user=iv5B-RMAAAAJ;5c6UyZwAAAAJ;https://scholar.google.com.au/citations?user=HtiTsgwAAAAJ;J2pGGuAAAAAJ", "orcid": ";0000-0002-9778-9970;0000-0002-9925-0223;0000-0001-9292-1015", "linkedin": ";davidtw999;;wray-buntine-07693921a/", "or_profile": "~Ngoc_Dang_Nguyen1;~Wei_Tan2;~Lan_Du1;~Wray_Buntine1", "aff": "Monash University;Monash University;Monash University;VinUniversity", "aff_domain": "monash.edu;monash.edu;monash.edu;vinunu.edu.vn", "position": "PhD student;PhD student;Senior Lecturer;Full Professor", "bibtex": "@misc{\nnguyen2023onevsall,\ntitle={One-Vs-All {AUC} Maximization: an effective solution to the low-resource named entity recognition problem},\nauthor={Ngoc Dang Nguyen and Wei Tan and Lan Du and Wray Buntine},\nyear={2023},\nurl={https://openreview.net/forum?id=Azw-0kVtsX}\n}", "github": "", "project": "", "reviewers": "6Haz;xWfy;H8Rr", "site": "https://openreview.net/forum?id=Azw-0kVtsX", "pdf_size": 6115663, "recommendation": "3;5;8", "confidence": "3;4;3", "correctness": "3;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "39;32;143", "wc_strength_and_weaknesses": "178;136;302", "wc_clarity_quality_novelty_and_reproducibility": "71;22;48", "wc_summary_review": "37;21;64", "wc_review": "325;211;557", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "631;512;724", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 71.33333333333333, 50.756499310159505 ], "wc_strength_and_weaknesses_avg": [ 205.33333333333334, 70.47142841054254 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.0, 20.016659728003237 ], "wc_summary_review_avg": [ 40.666666666666664, 17.745108872274887 ], "wc_review_avg": [ 364.3333333333333, 143.96604537953462 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 622.3333333333334, 86.76532845683363 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.1147078669352809, "corr_recommendation_correctness": 0.9176629354822472, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7_sVaEj2DbAJ:scholar.google.com/&scioq=One-Vs-All+AUC+Maximization:+an+effective+solution+to+the+low-resource+named+entity+recognition+problem&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Monash University;VinUniversity", "aff_unique_dep": ";", "aff_unique_url": "https://www.monash.edu;https://vinuni.edu.vn", "aff_unique_abbr": "Monash;VinUni", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Australia;Vietnam" }, { "id": "B-dM7df9Axo", "title": "Learning PDE Solution Operator for Continuous Modeling of Time-Series", "track": "main", "status": "Reject", "tldr": " PDE-based approach for modeling time-series.", "abstract": "Learning underlying dynamics from data is important and challenging in many real-world scenarios. Incorporating differential equations (DEs) to design continuous networks has drawn much attention recently, the most prominent of which is Neural ODE. Most prior works make specific assumptions on the type of DEs or restrict them to first or second-order DEs, making the model specialized for certain problems. Furthermore, due to the use of numerical integration, they suffer from computational expensiveness and numerical instability. Building upon recent Fourier neural operator (FNO), this work proposes a partial differential equation (PDE) based framework which improves the dynamics modeling capability and circumvents the need for costly numerical integration. FNO is hard to be directly applied to real applications because it is mainly confined to physical PDE problems. To fill this void, we propose a continuous-in-time FNO to deal with irregularly-sampled time series and provide a theoretical result demonstrating its universality. Moreover, we reveal an intrinsic property of PDEs that increases the stability of the model. Several numerical evidence shows that our method represents a broader range of problems, including synthetic, image classification, and irregular time-series. Our framework opens up a new way for a continuous representation of neural networks that can be readily adopted for real-world applications.", "keywords": "Neural ODEs;Partial differential equations;Neural operators;Time-series", "primary_area": "", "supplementary_material": "/attachment/37d00bbf61b949047d8a684238d6471977455a4b.zip", "author": "Yesom Park;Jaemoo Choi;Changyeon Yoon;Chang hoon Song;Myungjoo Kang", "authorids": "~Yesom_Park1;~Jaemoo_Choi1;~Changyeon_Yoon1;~Chang_hoon_Song1;~Myungjoo_Kang1", "gender": "F;M;M;M;", "homepage": ";https://github.com/JaemooC;https://github.com/shinypond;;http://ncia.snu.ac.kr/", "dblp": "213/0699;295/8916;;;64/5657.html", "google_scholar": "https://scholar.google.com/citations?hl=ko;Ba2G6sIAAAAJ;;https://scholar.google.co.kr/citations?user=DAaFII4AAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Yesom_Park1;~Jaemoo_Choi1;~Changyeon_Yoon1;~Chang_hoon_Song1;~Myungjoo_Kang1", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@misc{\npark2023learning,\ntitle={Learning {PDE} Solution Operator for Continuous Modeling of Time-Series},\nauthor={Yesom Park and Jaemoo Choi and Changyeon Yoon and Chang hoon Song and Myungjoo Kang},\nyear={2023},\nurl={https://openreview.net/forum?id=B-dM7df9Axo}\n}", "github": "", "project": "", "reviewers": "FEU2;Jaqe;dm3F;92q6", "site": "https://openreview.net/forum?id=B-dM7df9Axo", "pdf_size": 4835523, "recommendation": "5;5;6;6", "confidence": "3;5;4;3", "correctness": "3;3;3;4", "technical_novelty": "3;3;3;2", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "14;47;75;59", "wc_strength_and_weaknesses": "263;418;369;94", "wc_clarity_quality_novelty_and_reproducibility": "12;53;71;21", "wc_summary_review": "50;26;33;30", "wc_review": "339;544;548;204", "wc_reply_reviewers": "838;0;45;0", "wc_reply_authors": "2489;594;865;588", "reply_reviewers": "3;0;1;0", "reply_authors": "4;1;3;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 48.75, 22.38721733489895 ], "wc_strength_and_weaknesses_avg": [ 286.0, 124.20346210955636 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.25, 23.836683913665507 ], "wc_summary_review_avg": [ 34.75, 9.148087231765993 ], "wc_review_avg": [ 408.75, 145.3192605954214 ], "wc_reply_reviewers_avg": [ 220.75, 356.84266490990115 ], "wc_reply_authors_avg": [ 1134.0, 790.2692579115045 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.30151134457776363, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11785624197376158710&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Causal Imitation Learning via Inverse Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11443", "id": "B-z41MBL_tH", "poster": "/media/PosterPDFs/ICLR%202023/11443.png?t=1682795105.2884648", "openreview": "https://openreview.net/forum?id=B-z41MBL_tH", "slides": "https://iclr.cc/virtual/2023/poster/11443", "video": "https://iclr.cc/virtual/2023/poster/11443", "author_site": "Kangrui Ruan, Junzhe Zhang, Xuan Di, Elias Bareinboim", "tldr": "This paper proposes novel inverse reinforcement learning methods to learn effective imitating policies from the expert's demonstrations when unobserved confounders are present.", "abstract": "One of the most common ways children learn when unfamiliar with the environment is by mimicking adults. Imitation learning concerns an imitator learning to behave in an unknown environment from an expert's demonstration; reward signals remain latent to the imitator. This paper studies imitation learning through causal lenses and extends the analysis and tools developed for behavior cloning (Zhang, Kumor, Bareinboim, 2020) to inverse reinforcement learning. First, we propose novel graphical conditions that allow the imitator to learn a policy performing as well as the expert's behavior policy, even when the imitator and the expert's state-action space disagree, and unobserved confounders (UCs) are present. When provided with parametric knowledge about the unknown reward function, such a policy may outperform the expert's. Also, our method is easily extensible and allows one to leverage existing IRL algorithms even when UCs are present, including the multiplicative-weights algorithm (MWAL) (Syed & Schapire, 2008) and the generative adversarial imitation learning (GAIL) (Ho & Ermon, 2016). Finally, we validate our framework by simulations using real-world and synthetic data.", "keywords": "Causal Inference;Graphical Models", "primary_area": "", "supplementary_material": "", "author": "Kangrui Ruan;Junzhe Zhang;Xuan Di;Elias Bareinboim", "authorids": "~Kangrui_Ruan1;~Junzhe_Zhang3;~Xuan_Di1;~Elias_Bareinboim2", "gender": ";;F;M", "homepage": "https://darrenruan.github.io/;;https://sharondi-columbia.wixsite.com/ditectlab;https://causalai.net", "dblp": "324/0593;;;85/9005", "google_scholar": "https://scholar.google.com/citations?authuser=2;;https://scholar.google.com/citations?hl=en;r5U-D7YAAAAJ", "orcid": ";;0000-0003-2925-7697;", "linkedin": "kangrui-ruan/;;;", "or_profile": "~Kangrui_Ruan1;~Junzhe_Zhang3;~Xuan_Di1;~Elias_Bareinboim2", "aff": "Columbia University;;Columbia University;Columbia University", "aff_domain": "columbia.edu;;columbia.edu;columbia.edu", "position": "PhD student;;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nruan2023causal,\ntitle={Causal Imitation Learning via Inverse Reinforcement Learning},\nauthor={Kangrui Ruan and Junzhe Zhang and Xuan Di and Elias Bareinboim},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=B-z41MBL_tH}\n}", "github": "", "project": "", "reviewers": "o8Xw;iWpt;U7KX;Ne7b", "pdf_size": 448287, "recommendation": "6;6;6;8", "confidence": "3;3;4;3", "correctness": "3;3;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "80;57;137;45", "wc_strength_and_weaknesses": "114;143;264;88", "wc_clarity_quality_novelty_and_reproducibility": "31;85;103;59", "wc_summary_review": "15;128;88;15", "wc_review": "240;413;592;207", "wc_reply_reviewers": "0;242;63;0", "wc_reply_authors": "695;2223;2397;212", "reply_reviewers": "0;1;1;0", "reply_authors": "1;5;5;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 79.75, 35.36506044106245 ], "wc_strength_and_weaknesses_avg": [ 152.25, 67.38833356004584 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 69.5, 27.179955849853766 ], "wc_summary_review_avg": [ 61.5, 48.602983447521 ], "wc_review_avg": [ 363.0, 153.62779696396092 ], "wc_reply_reviewers_avg": [ 76.25, 99.0918134862815 ], "wc_reply_authors_avg": [ 1381.75, 945.8296292144796 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.0, 2.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 1.0, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12058027487019962066&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=B-z41MBL_tH", "email": "columbia.edu;;columbia.edu;columbia.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "B2ww5cqWq14", "title": "Towards Diverse Perspective Learning with Switch over Multiple Temporal Pooling", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Pooling is a widely used method for classification problems. In particular, poolings that consider temporal relationships have been proposed in the time series classification (TSC) domain. However, we found that there exists a data dependency on temporal poolings. Since each pooling has only one perspective, existing temporal poolings cannot solve data dependency problem with a fixed perspective learning. In this paper, we propose a novel pooling architecture for diverse perspective learning: switch over multiple pooling (SoM-TP). The massive case study using layer-wise relevance propagation (LRP) reveals the distinct view that each pooling has and ultimately emphasizes the necessity of diverse perspective learning. Therefore, SoM-TP dynamically selects temporal poolings according to time series data characteristics. The ablation study on SoM-TP shows how diverse perspective learning is achieved. Furthermore, pooling classification is investigated through input attribution by LRP. Extensive experiments are done with the UCR/UEA repository.", "keywords": "timeseries classification;temporal pooling;temporal relationship;perspective learning", "primary_area": "", "supplementary_material": "/attachment/5147dd9a1516d180d03c8fad39924fc2f5fe993f.zip", "author": "Jihyeon Seong;JUNGMIN KIM;Jaesik Choi", "authorids": "~Jihyeon_Seong1;~JUNGMIN_KIM1;~Jaesik_Choi1", "gender": "F;M;M", "homepage": "http://sailab.kaist.ac.kr/members/#GraduateStudents;;https://sailab.kaist.ac.kr/jaesik", "dblp": ";;13/1402", "google_scholar": "DdSo9q4AAAAJ;;RqMLVzUAAAAJ", "orcid": "0000-0002-3591-131X;;", "linkedin": "jihyeon-seong-302571267/;jungmin-kim-jm/;", "or_profile": "~Jihyeon_Seong1;~JUNGMIN_KIM1;~Jaesik_Choi1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "MS student;MS student;Associate Professor", "bibtex": "@misc{\nseong2023towards,\ntitle={Towards Diverse Perspective Learning with Switch over Multiple Temporal Pooling},\nauthor={Jihyeon Seong and JUNGMIN KIM and Jaesik Choi},\nyear={2023},\nurl={https://openreview.net/forum?id=B2ww5cqWq14}\n}", "github": "", "project": "", "reviewers": "QzyE;2KQe;FNQF", "site": "https://openreview.net/forum?id=B2ww5cqWq14", "pdf_size": 4309981, "recommendation": "1;3;5", "confidence": "4;4;3", "correctness": "2;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "0;3;3", "wc_summary_paper": "56;83;53", "wc_strength_and_weaknesses": "234;700;145", "wc_clarity_quality_novelty_and_reproducibility": "23;242;92", "wc_summary_review": "34;149;37", "wc_review": "347;1174;327", "wc_reply_reviewers": "59;274;0", "wc_reply_authors": "1177;1527;541", "reply_reviewers": "1;1;0", "reply_authors": "4;5;2", "recommendation_avg": [ 3.0, 1.632993161855452 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 64.0, 13.490737563232042 ], "wc_strength_and_weaknesses_avg": [ 359.6666666666667, 243.37944768520524 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 119.0, 91.42209798511517 ], "wc_summary_review_avg": [ 73.33333333333333, 53.51842880935708 ], "wc_review_avg": [ 616.0, 394.650055956751 ], "wc_reply_reviewers_avg": [ 111.0, 117.74831916705506 ], "wc_reply_authors_avg": [ 1081.6666666666667, 408.1383207797191 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 1.247219128924647 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.8660254037844385, "corr_recommendation_correctness": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:eaW67yn4jYcJ:scholar.google.com/&scioq=Towards+Diverse+Perspective+Learning+with+Switch+over+Multiple+Temporal+Pooling&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Stateful Active Facilitator: Coordination and Environmental Heterogeneity in Cooperative Multi-Agent Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10810", "id": "B4maZQLLW0_", "poster": "", "openreview": "https://openreview.net/forum?id=B4maZQLLW0_", "slides": "https://iclr.cc/virtual/2023/poster/10810", "video": "https://iclr.cc/virtual/2023/poster/10810", "author_site": "Dianbo Liu, Vedant Shah, Oussama Boussif, Cristian Meo, Anirudh Goyal, Tianmin Shu, Michael Mozer, Nicolas Heess, Yoshua Bengio", "tldr": "", "abstract": "In cooperative multi-agent reinforcement learning, a team of agents works together\nto achieve a common goal. Different environments or tasks may require varying\ndegrees of coordination among agents in order to achieve the goal in an optimal\nway. The nature of coordination will depend on properties of the environment\u2014its\nspatial layout, distribution of obstacles, dynamics, etc. We term this variation\nof properties within an environment as heterogeneity. Existing literature has not\nsufficiently addressed the fact that different environments may have different levels\nof heterogeneity. We formalize the notions of coordination level and heterogeneity\nlevel of an environment and present HECOGrid, a suite of multi-agent RL\nenvironments that facilitates empirical evaluation of different MARL approaches\nacross different levels of coordination and environmental heterogeneity by providing\na quantitative control over coordination and heterogeneity levels of the\nenvironment. Further, we propose a Centralized Training Decentralized Execution\nlearning approach called Stateful Active Facilitator (SAF) that enables agents to\nwork efficiently in high-coordination and high-heterogeneity environments through\na differentiable and shared knowledge source used during training and dynamic\nselection from a shared pool of policies. We evaluate SAF and compare its performance\nagainst baselines IPPO and MAPPO on HECOGrid. Our results show\nthat SAF consistently outperforms the baselines across different tasks and different\nheterogeneity and coordination levels.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Dianbo Liu;Vedant Shah;Oussama Boussif;Cristian Meo;Anirudh Goyal;Tianmin Shu;Michael Curtis Mozer;Nicolas Heess;Yoshua Bengio", "authorids": "~Dianbo_Liu2;~Vedant_Shah2;~Oussama_Boussif1;c.meo@tudelft.nl;~Anirudh_Goyal1;~Tianmin_Shu1;~Michael_Curtis_Mozer1;~Nicolas_Heess1;~Yoshua_Bengio1", "gender": ";M;M;;M;;M;;M", "homepage": ";https://veds12.github.io/;https://jaggbow.github.io/;;https://anirudh9119.github.io/;;https://www.cs.colorado.edu/~mozer;;http://yoshuabengio.org", "dblp": ";;321/0990;;172/1039;163/2175.html;m/MichaelCMozer;76/9181;56/953", "google_scholar": ";;RwtLLioAAAAJ;;krrh6OUAAAAJ;YT_ffdwAAAAJ;lmjR_qMAAAAJ;79k7bGEAAAAJ;kukA0LcAAAAJ", "orcid": ";;;;;;;;", "linkedin": ";veds12/;oussama-boussif/;;;;;;yoshuabengio/?originalSubdomain=ca", "or_profile": "~Dianbo_Liu2;~Vedant_Shah2;~Oussama_Boussif1;c.meo@tudelft.nl;~Anirudh_Goyal1;~Tianmin_Shu1;~Michael_Curtis_Mozer1;~Nicolas_Heess1;~Yoshua_Bengio1", "aff": ";Mila - Quebec Artificial Intelligence Institute;Universit\u00e9 de Montr\u00e9al;;Google DeepMind;Massachusetts Institute of Technology;Google DeepMind;Google DeepMind;University of Montreal", "aff_domain": ";mila.quebec;umontreal.ca;;google.com;mit.edu;google.com;google.com;umontreal.ca", "position": ";MS student;PhD student;;Researcher;Postdoc;Research Scientist;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nliu2023stateful,\ntitle={Stateful Active Facilitator: Coordination and Environmental Heterogeneity in Cooperative Multi-Agent Reinforcement Learning},\nauthor={Dianbo Liu and Vedant Shah and Oussama Boussif and Cristian Meo and Anirudh Goyal and Tianmin Shu and Michael Curtis Mozer and Nicolas Heess and Yoshua Bengio},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=B4maZQLLW0_}\n}", "github": "", "project": "", "reviewers": "y8q8;PAoT;UJ6m;RJCt", "pdf_size": 3048547, "recommendation": "6;6;6;8", "confidence": "4;3;4;4", "correctness": "3;3;3;3", "technical_novelty": "3;3;2;2", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "91;46;79;117", "wc_strength_and_weaknesses": "437;231;375;332", "wc_clarity_quality_novelty_and_reproducibility": "168;27;164;13", "wc_summary_review": "84;48;78;104", "wc_review": "780;352;696;566", "wc_reply_reviewers": "0;29;18;0", "wc_reply_authors": "0;119;46;0", "reply_reviewers": "0;1;1;0", "reply_authors": "0;1;1;0", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 83.25, 25.518375732009275 ], "wc_strength_and_weaknesses_avg": [ 343.75, 75.03790708701835 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 93.0, 73.18128175975056 ], "wc_summary_review_avg": [ 78.5, 20.068632240389476 ], "wc_review_avg": [ 598.5, 161.45200525233497 ], "wc_reply_reviewers_avg": [ 11.75, 12.376893794486563 ], "wc_reply_authors_avg": [ 41.25, 48.65888921872344 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 0.5, 0.5 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18300947863075215&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=B4maZQLLW0_", "email": ";mila.quebec;umontreal.ca;;google.com;mit.edu;google.com;google.com;umontreal.ca", "author_num": 9, "aff_unique_index": "0;1;2;3;2;2;4", "aff_unique_norm": "Quebec Artificial Intelligence Institute;Universit\u00e9 de Montr\u00e9al;Google;Massachusetts Institute of Technology;University of Montreal", "aff_unique_dep": "Artificial Intelligence;;Google DeepMind;;", "aff_unique_url": "https://mila.quebec;https://www.umontreal.ca;https://deepmind.com;https://web.mit.edu;https://wwwumontreal.ca", "aff_unique_abbr": "Mila;UdeM;DeepMind;MIT;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;1;1;0", "aff_country_unique": "Canada;United Kingdom;United States" }, { "title": "Continuous PDE Dynamics Forecasting with Implicit Neural Representations", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12133", "id": "B73niNjbPs", "poster": "/media/PosterPDFs/ICLR%202023/12133.png?t=1682363365.9181695", "openreview": "https://openreview.net/forum?id=B73niNjbPs", "slides": "https://iclr.cc/virtual/2023/poster/12133", "video": "https://iclr.cc/virtual/2023/poster/12133", "author_site": "Yuan Yin, Matthieu Kirchmeyer, Jean-Yves Franceschi, alain rakotomamonjy, patrick gallinari", "tldr": "We propose a continuous-time, continuous-space data-driven PDE forecasting model with extensive spatiotemporal extrapolation capabilities including generalization to unseen sparse meshes and resolutions.", "abstract": "Effective data-driven PDE forecasting methods often rely on fixed spatial and / or temporal discretizations. This raises limitations in real-world applications like weather prediction where flexible extrapolation at arbitrary spatiotemporal locations is required. We address this problem by introducing a new data-driven approach, DINo, that models a PDE's flow with continuous-time dynamics of spatially continuous functions. This is achieved by embedding spatial observations independently of their discretization via Implicit Neural Representations in a small latent space temporally driven by a learned ODE. This separate and flexible treatment of time and space makes DINo the first data-driven model to combine the following advantages. It extrapolates at arbitrary spatial and temporal locations; it can learn from sparse irregular grids or manifolds; at test time, it generalizes to new grids or resolutions. DINo outperforms alternative neural PDE forecasters in a variety of challenging generalization scenarios on representative PDE systems.", "keywords": "spatiotemporal forecasting;Partial Differential Equations;PDEs;Implicit Neural Representations;INRs;continuous models;generalization;dynamical systems;physics", "primary_area": "", "supplementary_material": "/attachment/bf8ec3fb30847132318787051601182e5d15d563.zip", "author": "Yuan Yin;Matthieu Kirchmeyer;Jean-Yves Franceschi;Alain Rakotomamonjy;patrick gallinari", "authorids": "~Yuan_Yin1;~Matthieu_Kirchmeyer1;~Jean-Yves_Franceschi1;~Alain_Rakotomamonjy1;~patrick_gallinari1", "gender": "M;;M;;M", "homepage": "https://www.isir.upmc.fr/personnel/yin/;https://mkirchmeyer.github.io;http://jyfranceschi.fr;;", "dblp": ";241/9725;215/4886;;g/PatrickGallinari", "google_scholar": "https://scholar.google.com/citations?hl=fr;oJkKtrkAAAAJ;https://scholar.google.fr/citations?user=IL2OzksAAAAJ;;rFaxB20AAAAJ", "orcid": "0000-0003-1515-0696;;;;", "linkedin": "yuan-yin-nn/;;;;", "or_profile": "~Yuan_Yin1;~Matthieu_Kirchmeyer1;~Jean-Yves_Franceschi1;~Alain_Rakotomamonjy1;~patrick_gallinari1", "aff": "Sorbonne Universit\u00e9, CNRS, ISIR;Criteo AI Lab;Criteo;;Sorbonne Universite", "aff_domain": "isir.upmc.fr;criteo.com;criteo.com;;sorbonne-universite.fr", "position": "PhD student;Researcher;Researcher;;Full Professor", "bibtex": "@inproceedings{\nyin2023continuous,\ntitle={Continuous {PDE} Dynamics Forecasting with Implicit Neural Representations},\nauthor={Yuan Yin and Matthieu Kirchmeyer and Jean-Yves Franceschi and Alain Rakotomamonjy and patrick gallinari},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=B73niNjbPs}\n}", "github": "", "project": "", "reviewers": "P8h2;swAv;4pqH;eLsK", "pdf_size": 4030135, "recommendation": "6;6;8;8", "confidence": "4;3;3;4", "correctness": "4;3;3;4", "technical_novelty": "4;3;4;4", "empirical_novelty": "3;2;4;3", "wc_summary_paper": "61;62;48;92", "wc_strength_and_weaknesses": "204;162;838;315", "wc_clarity_quality_novelty_and_reproducibility": "26;37;27;39", "wc_summary_review": "23;87;209;26", "wc_review": "314;348;1122;472", "wc_reply_reviewers": "0;0;92;0", "wc_reply_authors": "608;1292;2146;972", "reply_reviewers": "0;0;1;0", "reply_authors": "1;3;5;2", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 65.75, 16.13032857693854 ], "wc_strength_and_weaknesses_avg": [ 379.75, 270.4111452954556 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.25, 5.80409338312195 ], "wc_summary_review_avg": [ 86.25, 75.33052170269366 ], "wc_review_avg": [ 564.0, 327.48435077114755 ], "wc_reply_reviewers_avg": [ 23.0, 39.83716857408418 ], "wc_reply_authors_avg": [ 1254.5, 568.7589559734422 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 82, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15360136814724624385&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 20, "pdf": "https://openreview.net/pdf?id=B73niNjbPs", "email": "isir.upmc.fr;criteo.com;criteo.com;;sorbonne-universite.fr", "author_num": 5, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Sorbonne Universit\u00e9;Criteo;Sorbonne University", "aff_unique_dep": "CNRS, ISIR;Criteo AI Lab;", "aff_unique_url": "https://www.sorbonne-universite.fr;https://www.criteo.com;https://www.sorbonne-universite.fr", "aff_unique_abbr": "Sorbonne U;Criteo;Sorbonne", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "id": "B7HJ9KLFV9U", "title": "Thinking Two Moves Ahead: Anticipating Other Users Improves Backdoor Attacks in Federated Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Federated learning is particularly susceptible to model poisoning and backdoor attacks because individual users have direct control over the training data and model updates. At the same time, the attack power of an individual user is limited because their updates are quickly drowned out by those of many other users. Existing attacks do not account for future behaviors of other users, and thus require many sequential updates and their effects are quickly erased. We propose an attack that anticipates and accounts for the entire federated learning pipeline, including behaviors of other clients, and ensures that backdoors are effective quickly and persist even after multiple rounds of community updates. We show that this new attack is effective in realistic scenarios where the attacker only contributes to a small fraction of randomly sampled rounds and demonstrate this attack on image classification, next-word prediction, and sentiment analysis.", "keywords": "Privacy;Federated Learning", "primary_area": "", "supplementary_material": "/attachment/f8102888f3396a32a4efdca82967db57cc6b0cd7.zip", "author": "Yuxin Wen;Jonas Geiping;Liam H Fowl;Hossein Souri;Rama Chellappa;Micah Goldblum;Tom Goldstein", "authorids": "~Yuxin_Wen2;~Jonas_Geiping1;~Liam_H_Fowl1;~Hossein_Souri1;~Rama_Chellappa1;~Micah_Goldblum1;~Tom_Goldstein1", "gender": ";M;;M;;;M", "homepage": "https://yuxinwenrick.github.io/;https://jonasgeiping.github.io/;;https://hsouri.github.io/;;;https://www.cs.umd.edu/~tomg/", "dblp": ";190/7229;241/6940;250/2286;;241/7231;25/8184", "google_scholar": "oUYfjg0AAAAJ;https://scholar.google.de/citations?user=206vNCEAAAAJ;IXv3ToAAAAAJ;rurbhy0AAAAJ;;pGDKzuUAAAAJ;KmSuVtgAAAAJ", "orcid": ";;;0000-0001-5264-798X;;;", "linkedin": ";;;hossein-souri-b7574795/;;;", "or_profile": "~Yuxin_Wen2;~Jonas_Geiping1;~Liam_H_Fowl1;~Hossein_Souri1;~Rama_Chellappa1;~Micah_Goldblum1;~Tom_Goldstein1", "aff": "University of Maryland, College Park;University of Maryland, College Park;Google;Johns Hopkins University;;New York University;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;google.com;jhu.edu;;nyu.edu;umd.edu", "position": "PhD student;Postdoc;Google;PhD student;;Postdoc;Full Professor", "bibtex": "@misc{\nwen2023thinking,\ntitle={Thinking Two Moves Ahead: Anticipating Other Users Improves Backdoor Attacks in Federated Learning},\nauthor={Yuxin Wen and Jonas Geiping and Liam H Fowl and Hossein Souri and Rama Chellappa and Micah Goldblum and Tom Goldstein},\nyear={2023},\nurl={https://openreview.net/forum?id=B7HJ9KLFV9U}\n}", "github": "", "project": "", "reviewers": "xjs7;VoJT;Fbk1", "site": "https://openreview.net/forum?id=B7HJ9KLFV9U", "pdf_size": 1590399, "recommendation": "3;5;5", "confidence": "5;3;4", "correctness": "3;3;3", "technical_novelty": "2;3;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "77;53;52", "wc_strength_and_weaknesses": "255;155;219", "wc_clarity_quality_novelty_and_reproducibility": "164;7;6", "wc_summary_review": "51;4;21", "wc_review": "547;219;298", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "550;221;350", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 60.666666666666664, 11.55662388223981 ], "wc_strength_and_weaknesses_avg": [ 209.66666666666666, 41.354833118055524 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.0, 74.2473344078201 ], "wc_summary_review_avg": [ 25.333333333333332, 19.430788855719562 ], "wc_review_avg": [ 354.6666666666667, 139.77203662472053 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 373.6666666666667, 135.352215431526 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.8660254037844385, "corr_recommendation_correctness": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6671897135280579899&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "University of Maryland;Google;Johns Hopkins University;New York University", "aff_unique_dep": ";Google;;", "aff_unique_url": "https://www/umd.edu;https://www.google.com;https://www.jhu.edu;https://www.nyu.edu", "aff_unique_abbr": "UMD;Google;JHU;NYU", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "College Park;Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "B7gBcrKQCl4", "title": "Neural Layered Min-sum Decoders for Algebraic Codes", "track": "main", "status": "Withdraw", "tldr": "A neural min-sum decoder based on the layered min-sum algorithm with reduced weights and better error rates.", "abstract": "In this article, we propose low-complexity neural network decoders based on the layered min-sum algorithm to decode binary algebraic codes. By generalizing the layered min-sum algorithm to its neural network counterpart, the number of iterations required for convergence is reduced. Consequently, the number of network weights decreases while retaining a good error correction performance. The Bose-Chaudhuri-Hocquenghem (BCH) codes and quadratic residue (QR) codes are selected as two exemplary binary algebraic codes. Simulation results show that the proposed decoders achieve superior performance with less computational complexity, compared with the decoders proposed by Chen & Ye (2021). Further, a neural decoder incorporating the modified random redundant decoding (mRRD) algorithm is investigated to approach the performance of maximum-likelihood (ML) decoding for some short codes.", "keywords": "Error correction code", "primary_area": "", "supplementary_material": "", "author": "Ming Wang;Rui Liu;Huihui Wu;Yong Li", "authorids": "~Ming_Wang4;liurui_cs@cqu.edu.cn;huihui.wu.phd@gmail.com;yongli@cqu.edu.cn", "gender": "M;;;", "homepage": ";;;", "dblp": ";;;", "google_scholar": ";;;", "orcid": "0000-0002-8346-0197;;;", "linkedin": ";;;", "or_profile": "~Ming_Wang4;liurui_cs@cqu.edu.cn;huihui.wu.phd@gmail.com;yongli@cqu.edu.cn", "aff": "Chongqing University;;;", "aff_domain": "cqu.edu.cn;;;", "position": "MS student;;;", "bibtex": "@misc{\nwang2023neural,\ntitle={Neural Layered Min-sum Decoders for Algebraic Codes},\nauthor={Ming Wang and Rui Liu and Huihui Wu and Yong Li},\nyear={2023},\nurl={https://openreview.net/forum?id=B7gBcrKQCl4}\n}", "github": "", "project": "", "reviewers": "5bqH;fvTc;bLcm", "site": "https://openreview.net/forum?id=B7gBcrKQCl4", "pdf_size": 400777, "recommendation": "3;3;3", "confidence": "4;3;4", "correctness": "4;3;4", "technical_novelty": "3;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "99;62;68", "wc_strength_and_weaknesses": "290;329;158", "wc_clarity_quality_novelty_and_reproducibility": "11;53;43", "wc_summary_review": "63;36;25", "wc_review": "463;480;294", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 76.33333333333333, 16.21384867602041 ], "wc_strength_and_weaknesses_avg": [ 259.0, 73.17103251970687 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.666666666666664, 17.913371790059205 ], "wc_summary_review_avg": [ 41.333333333333336, 15.965240019770729 ], "wc_review_avg": [ 412.3333333333333, 83.96163144886809 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XoGlBya5u-MJ:scholar.google.com/&scioq=Neural+Layered+Min-sum+Decoders+for+Algebraic+Codes&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Chongqing University", "aff_unique_dep": "", "aff_unique_url": "https://www.cqu.edu.cn", "aff_unique_abbr": "CQU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "B8_T6-8-tCU", "title": "On The Implicit Bias of Weight Decay in Shallow Univariate ReLU Networks", "track": "main", "status": "Reject", "tldr": "Minimal \\ell_2-norm interpolation by univariate scalar one layer ReLU is completely characterized in terms of the convexity of the learned predictor, giving new sharp generalization bounds on 1d Lipschitz functions.", "abstract": "We give a complete characterization of the implicit bias of infinitesimal weight decay in the modest setting of univariate one layer ReLU networks. Our main result is a surprisingly simple geometric description of all one layer ReLU networks that exactly fit a dataset $\\mathcal D= \\set{(x_i,y_i)}$ with the minimum value of the $\\ell_2$-norm of the neuron weights. Specifically, we prove that such functions must be either concave or convex between any two consecutive data sites $x_i$ and $x_{i+1}$. Our description implies that interpolating ReLU networks with weak $\\ell_2$-regularization achieve the best possible generalization for learning $1d$ Lipschitz functions, up to universal constants. ", "keywords": "theory;implicit bias;generalization;interpolation;theoretical;shallow ReLU networks;ReLU networks;analysis of weight decay", "primary_area": "", "supplementary_material": "/attachment/8aac20a70d534da243c7587003b5e644342a01fb.zip", "author": "Boris Hanin", "authorids": "~Boris_Hanin1", "gender": "", "homepage": "https://hanin.princeton.edu", "dblp": "205/2534", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Boris_Hanin1", "aff": "Princeton University", "aff_domain": "princeton.edu", "position": "Assistant Professor", "bibtex": "@misc{\nhanin2023on,\ntitle={On The Implicit Bias of Weight Decay in Shallow Univariate Re{LU} Networks},\nauthor={Boris Hanin},\nyear={2023},\nurl={https://openreview.net/forum?id=B8_T6-8-tCU}\n}", "github": "", "project": "", "reviewers": "LdBk;Y8Pp;izUL;oSFb", "site": "https://openreview.net/forum?id=B8_T6-8-tCU", "pdf_size": 384439, "recommendation": "3;5;5;8", "confidence": "3;4;4;3", "correctness": "2;4;3;4", "technical_novelty": "1;2;4;3", "empirical_novelty": "1;0;0;3", "wc_summary_paper": "98;64;142;179", "wc_strength_and_weaknesses": "20;129;45;82", "wc_clarity_quality_novelty_and_reproducibility": "164;27;58;27", "wc_summary_review": "39;28;119;44", "wc_review": "321;248;364;332", "wc_reply_reviewers": "0;0;146;0", "wc_reply_authors": "264;577;832;46", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 1.118033988749895 ], "empirical_novelty_avg": [ 1.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 120.75, 43.53949356618655 ], "wc_strength_and_weaknesses_avg": [ 69.0, 41.06701839676214 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 69.0, 56.28943062422998 ], "wc_summary_review_avg": [ 57.5, 35.975686233899694 ], "wc_review_avg": [ 316.25, 42.45217897823385 ], "wc_reply_reviewers_avg": [ 36.5, 63.21985447626402 ], "wc_reply_authors_avg": [ 429.75, 299.2593983486567 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": -0.14002800840280097, "corr_recommendation_correctness": 0.8021806287494232, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KPCl1tnJ1lUJ:scholar.google.com/&scioq=On+The+Implicit+Bias+of+Weight+Decay+in+Shallow+Univariate+ReLU+Networks&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "From $t$-SNE to UMAP with contrastive learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11861", "id": "B8a1FcY0vi", "poster": "/media/PosterPDFs/ICLR%202023/11861.png?t=1682082948.8276627", "openreview": "https://openreview.net/forum?id=B8a1FcY0vi", "slides": "https://iclr.cc/virtual/2023/poster/11861", "video": "https://iclr.cc/virtual/2023/poster/11861", "author_site": "Sebastian Damrich, Niklas B\u00f6hm, Fred A Hamprecht, Dmitry Kobak", "tldr": "We show that UMAP is effectively negative sampling applied to the t-SNE loss function.", "abstract": "Neighbor embedding methods $t$-SNE and UMAP are the de facto standard for visualizing high-dimensional datasets. Motivated from entirely different viewpoints, their loss functions appear to be unrelated. In practice, they yield strongly differing embeddings and can suggest conflicting interpretations of the same data. The fundamental reasons for this and, more generally, the exact relationship between $t$-SNE and UMAP have remained unclear. In this work, we uncover their conceptual connection via a new insight into contrastive learning methods. Noise-contrastive estimation can be used to optimize $t$-SNE, while UMAP relies on negative sampling, another contrastive method. We find the precise relationship between these two contrastive methods, and provide a mathematical characterization of the distortion introduced by negative sampling. Visually, this distortion results in UMAP generating more compact embeddings with tighter clusters compared to $t$-SNE. We exploit this new conceptual connection to propose and implement a generalization of negative sampling, allowing us to interpolate between (and even extrapolate beyond) $t$-SNE and UMAP and their respective embeddings. Moving along this spectrum of embeddings leads to a trade-off between discrete / local and continuous / global structures, mitigating the risk of over-interpreting ostensible features of any single embedding. We provide a PyTorch implementation.", "keywords": "visualization;dimensionality reduction;t-SNE;UMAP", "primary_area": "", "supplementary_material": "", "author": "Sebastian Damrich;Niklas B\u00f6hm;Fred A Hamprecht;Dmitry Kobak", "authorids": "~Sebastian_Damrich1;~Niklas_B\u00f6hm1;~Fred_A_Hamprecht1;~Dmitry_Kobak2", "gender": ";M;M;", "homepage": ";https://jnboehm.com/;https://sciai-lab.org/;https://dkobak.github.io/", "dblp": "252/5237;270/9446;18/4529;236/5191", "google_scholar": "-ClpooYAAAAJ;CWHChI8AAAAJ;lO62bt0AAAAJ;BUQbD5kAAAAJ", "orcid": "0000-0003-1394-6236;;;", "linkedin": "sebastian-damrich-a44a8131;;;", "or_profile": "~Sebastian_Damrich1;~Niklas_B\u00f6hm1;~Fred_A_Hamprecht1;~Dmitry_Kobak2", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University of T\u00fcbingen;Heidelberg University;Eberhard-Karls-Universit\u00e4t T\u00fcbingen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de;uni-heidelberg.de;uni-tuebingen.de", "position": "Postdoc;PhD student;Full Professor;Researcher", "bibtex": "@inproceedings{\ndamrich2023from,\ntitle={From \\$t\\$-{SNE} to {UMAP} with contrastive learning},\nauthor={Sebastian Damrich and Niklas B{\\\"o}hm and Fred A Hamprecht and Dmitry Kobak},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=B8a1FcY0vi}\n}", "github": "", "project": "", "reviewers": "fZKZ;gEqf;d2PD;ZbpN;xsMD", "pdf_size": 44142746, "recommendation": "3;5;6;8;8", "confidence": "4;2;2;3;3", "correctness": "2;3;2;4;3", "technical_novelty": "2;2;2;4;3", "empirical_novelty": "2;2;2;3;0", "wc_summary_paper": "58;45;44;75;34", "wc_strength_and_weaknesses": "142;176;63;175;194", "wc_clarity_quality_novelty_and_reproducibility": "56;19;68;39;24", "wc_summary_review": "46;64;25;32;47", "wc_review": "302;304;200;321;299", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "844;902;375;437;512", "reply_reviewers": "0;0;0;0;0", "reply_authors": "2;2;1;1;1", "recommendation_avg": [ 6.0, 1.8973665961010275 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "correctness_avg": [ 2.8, 0.7483314773547882 ], "technical_novelty_avg": [ 2.6, 0.8 ], "empirical_novelty_avg": [ 1.8, 0.9797958971132712 ], "wc_summary_paper_avg": [ 51.2, 14.133647795243801 ], "wc_strength_and_weaknesses_avg": [ 150.0, 46.62617290749907 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.2, 18.605375567292374 ], "wc_summary_review_avg": [ 42.8, 13.49666625504239 ], "wc_review_avg": [ 285.2, 43.28232895767047 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 614.0, 216.65548689105475 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.28171808490950556, "corr_recommendation_correctness": 0.7042952122737638, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1568814659161622322&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=B8a1FcY0vi", "email": "uni-tuebingen.de;uni-tuebingen.de;uni-heidelberg.de;uni-tuebingen.de", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;University of T\u00fcbingen;Heidelberg University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/;https://www.uni-heidelberg.de", "aff_unique_abbr": "Uni T\u00fcbingen;Uni T\u00fcbingen;Uni Heidelberg", "aff_campus_unique_index": "0;0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Re-parameterizing Your Optimizers rather than Architectures", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11873", "id": "B92TMCG_7rp", "poster": "/media/PosterPDFs/ICLR%202023/11873.png?t=1680667264.789494", "openreview": "https://openreview.net/forum?id=B92TMCG_7rp", "slides": "https://iclr.cc/virtual/2023/poster/11873", "video": "https://iclr.cc/virtual/2023/poster/11873", "author_site": "Xiaohan Ding, Honghao Chen, Xiangyu Zhang, Kaiqi Huang, Jungong Han, Guiguang Ding", "tldr": "Modify gradient flow to incorporate model-specific prior knowledge into the optimizers for training simple and efficient models.", "abstract": "The well-designed structures in neural networks reflect the prior knowledge incorporated into the models. However, though different models have various priors, we are used to training them with model-agnostic optimizers such as SGD. In this paper, we propose to incorporate model-specific prior knowledge into optimizers by modifying the gradients according to a set of model-specific hyper-parameters. Such a methodology is referred to as Gradient Re-parameterization, and the optimizers are named RepOptimizers. For the extreme simplicity of model structure, we focus on a VGG-style plain model and showcase that such a simple model trained with a RepOptimizer, which is referred to as RepOpt-VGG, performs on par with or better than the recent well-designed models. From a practical perspective, RepOpt-VGG is a favorable base model because of its simple structure, high inference speed and training efficiency. Compared to Structural Re-parameterization, which adds priors into models via constructing extra training-time structures, RepOptimizers require no extra forward/backward computations and solve the problem of quantization. We hope to spark further research beyond the realms of model structure design. Code and models https://github.com/DingXiaoH/RepOptimizers.", "keywords": "Deep Learning;Model Architecture;Optimizer;Re-parameterization", "primary_area": "", "supplementary_material": "", "author": "Xiaohan Ding;Honghao Chen;Xiangyu Zhang;Kaiqi Huang;Jungong Han;Guiguang Ding", "authorids": "~Xiaohan_Ding1;~Honghao_Chen1;~Xiangyu_Zhang1;~Kaiqi_Huang1;~Jungong_Han1;~Guiguang_Ding1", "gender": "M;M;M;M;M;M", "homepage": "https://dingxiaohan.xyz/;;;https://people.ucas.ac.cn/~huangkaiqi?language=en;https://jungonghan.github.io/;http://ise.thss.tsinghua.edu.cn/MIG/dgg.html", "dblp": "218/7379;279/9807;95/3760-5.html;89/7026;98/6127;51/740", "google_scholar": "CIjw0KoAAAAJ;https://scholar.google.com/citations?hl=en;yuB-cfoAAAAJ;caQ-OmYAAAAJ;hNi1gxAAAAAJ;https://scholar.google.com.tw/citations?user=B7F3yt4AAAAJ", "orcid": ";;0000-0003-2138-4608;;0000-0003-4361-956X;0000-0003-0137-9975", "linkedin": ";;;;;", "or_profile": "~Xiaohan_Ding1;~Honghao_Chen1;~Xiangyu_Zhang1;~Kaiqi_Huang1;~Jungong_Han1;~Guiguang_Ding1", "aff": "Tencent AI Lab;Institute of Automation, Chinese Academy of Sciences;MEGVII Technology;Institute of automation, Chinese academy of science;University of Sheffield;Tsinghua University", "aff_domain": "tencent.com;ia.ac.cn;megvii.com;nlpr.ia.ac.cn;sheffield.ac.uk;tsinghua.edu.cn", "position": "Researcher;PhD student;Principal Researcher;Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nding2023reparameterizing,\ntitle={Re-parameterizing Your Optimizers rather than Architectures},\nauthor={Xiaohan Ding and Honghao Chen and Xiangyu Zhang and Kaiqi Huang and Jungong Han and Guiguang Ding},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=B92TMCG_7rp}\n}", "github": "", "project": "", "reviewers": "96Hr;Hu4S;yhDx;FagT", "pdf_size": 1005991, "recommendation": "3;6;8;8", "confidence": "5;3;3;4", "correctness": "2;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "39;60;54;240", "wc_strength_and_weaknesses": "87;157;258;254", "wc_clarity_quality_novelty_and_reproducibility": "186;46;32;18", "wc_summary_review": "52;33;11;76", "wc_review": "364;296;355;588", "wc_reply_reviewers": "0;0;0;26", "wc_reply_authors": "1464;740;387;537", "reply_reviewers": "0;0;0;1", "reply_authors": "2;1;1;1", "recommendation_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 98.25, 82.1960309260733 ], "wc_strength_and_weaknesses_avg": [ 189.0, 71.4387849840687 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.5, 67.41476099490377 ], "wc_summary_review_avg": [ 43.0, 23.947860029656095 ], "wc_review_avg": [ 400.75, 111.21909683143448 ], "wc_reply_reviewers_avg": [ 6.5, 11.258330249197702 ], "wc_reply_authors_avg": [ 782.0, 413.2003146174988 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.6998739952495694, "corr_recommendation_correctness": 0.9169493006161777, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4676948259687534557&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=B92TMCG_7rp", "email": "tencent.com;ia.ac.cn;megvii.com;nlpr.ia.ac.cn;sheffield.ac.uk;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;1;3;4", "aff_unique_norm": "Tencent;Chinese Academy of Sciences;Megvii Technology;University of Sheffield;Tsinghua University", "aff_unique_dep": "Tencent AI Lab;Institute of Automation;;;", "aff_unique_url": "https://ai.tencent.com;http://www.ia.cas.cn;https://www.megvii.com;https://www.sheffield.ac.uk;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Tencent AI Lab;CAS;;Sheffield;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;United Kingdom" }, { "id": "BAakXAV6Cf", "title": "Break the Wall Between Homophily and Heterophily for Graph Representation Learning", "track": "main", "status": "Withdraw", "tldr": "This work proposes a new GNN model called OGNN (Omnipotent Graph Neural Network) that extracts different aspects of graph representations to generalize well on the whole spectrum of homophily.", "abstract": "Homophily and heterophily are intrinsic properties of graphs that describe whether two linked nodes share similar properties. Although many Graph Neural Network (GNN) models have been proposed, it remains unclear how to design a model so that it can generalize well to the whole spectrum of homophily. This work addresses the challenge by identifying three graph features, including the ego node feature, the aggregated node feature, and the graph structure feature, that are essential for graph representation learning. It further proposes a new GNN model called OGNN (Omnipotent Graph Neural Network) that extracts all three graph features and adaptively fuses them to achieve generalizability across the whole spectrum of homophily. Extensive experiments on both synthetic and real datasets demonstrate the superiority (average rank 1.56) of our OGNN compared with state-of-the-art methods. Our code will be available at https://*.", "keywords": "Graph Neural Networks;Graph Homophily;Graph Heterophily", "primary_area": "", "supplementary_material": "", "author": "Xiao Liu;Lijun Zhang;Hui Guan", "authorids": "~Xiao_Liu9;~Lijun_Zhang4;~Hui_Guan1", "gender": "M;F;F", "homepage": "https://johnsmith2012.github.io/resume/;https://zhanglijun95.github.io/resume/;https://guanh01.github.io/", "dblp": "82/1364-30;;77/6645-1.html", "google_scholar": ";;L2P0jCsAAAAJ", "orcid": ";;0000-0001-9128-2231", "linkedin": ";;", "or_profile": "~Xiao_Liu9;~Lijun_Zhang4;~Hui_Guan1", "aff": "University of Massachusetts at Amherst;University of Massachusetts, Amherst;University of Massachusetts, Amherst", "aff_domain": "umass.edu;umass.edu;umass.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nliu2023break,\ntitle={Break the Wall Between Homophily and Heterophily for Graph Representation Learning},\nauthor={Xiao Liu and Lijun Zhang and Hui Guan},\nyear={2023},\nurl={https://openreview.net/forum?id=BAakXAV6Cf}\n}", "github": "", "project": "", "reviewers": "212T;M8Mx;b86o", "site": "https://openreview.net/forum?id=BAakXAV6Cf", "pdf_size": 923269, "recommendation": "1;3;3", "confidence": "4;5;5", "correctness": "3;2;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "28;62;73", "wc_strength_and_weaknesses": "295;172;195", "wc_clarity_quality_novelty_and_reproducibility": "8;13;128", "wc_summary_review": "24;38;115", "wc_review": "355;285;511", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 54.333333333333336, 19.154343864744856 ], "wc_strength_and_weaknesses_avg": [ 220.66666666666666, 53.39371581833286 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.666666666666664, 55.42763049438631 ], "wc_summary_review_avg": [ 59.0, 40.00833246545858 ], "wc_review_avg": [ 383.6666666666667, 94.46457301843668 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0-1--U4ri0EJ:scholar.google.com/&scioq=Break+the+Wall+Between+Homophily+and+Heterophily+for+Graph+Representation+Learning&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Massachusetts Amherst", "aff_unique_dep": "", "aff_unique_url": "https://www.umass.edu", "aff_unique_abbr": "UMass Amherst", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Amherst", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "BCfxM1tR8E", "title": "Exploring interactions between modalities for deepfake detection", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "As face forgery techniques have become more mature, the proliferation of deepfakes may threat the human society security. Although existing deepfake detection methods achieve a good performance for in-dataset evaluation, it still remains to be improved in the generalization abiltiy, where the representation of the imperceptible artifacts plays a significant role. In this paper, we propose an Interactive Two-Stream Network (ITSNet) to explore the discriminant inconsistency representation from the perspective of cross-modality. Specially, the patch-wise Decomposable Discrete Cosine Transform (DDCT) is adopted to extract fine-grained high-frequency clues and information from different modalities are communitcated with each other via a designed interaction module. To perceive the temporal inconsistency, we first develop a Short-term Embedding Module (SEM) to refine subtle local inconsistency representation between adjacent frames, and then a Long-term Embedding Module (LEM) is designed to further refine the erratic temporal inconsistency representation from the long-range perspective. Extensive experimental results conducted on three public datasets show that ITSNet outperforms the state-of-the-art methods both in terms of in-dataset and cross-dataset evaluations.", "keywords": "cross-modality representation learning;inconsistency representation;interaction", "primary_area": "", "supplementary_material": "", "author": "Jianghao Wu;Baopeng Zhang;Zhaoyang Li;Guilin Pang;Zhu Teng;Jianping Fan", "authorids": "~Jianghao_Wu2;~Baopeng_Zhang1;21112025@bjtu.edu.cn;~Guilin_Pang1;~Zhu_Teng2;~Jianping_Fan4", "gender": "M;M;;;Unspecified;M", "homepage": ";http://faculty.bjtu.edu.cn/8221/;;;http://faculty.bjtu.edu.cn/8902/;", "dblp": ";22/3524;;;132/2247;69/2360.html", "google_scholar": ";;;;;", "orcid": "0000-0001-6756-5207;0000-0003-2592-2354;;;0000-0002-1754-4878;", "linkedin": ";;;;;", "or_profile": "~Jianghao_Wu2;~Baopeng_Zhang1;21112025@bjtu.edu.cn;~Guilin_Pang1;~Zhu_Teng2;~Jianping_Fan4", "aff": "Beijing Jiaotong University;Beijing jiaotong university;;;Beijing jiaotong univercity;Northwest University", "aff_domain": "bjtu.edu.cn;bjtu.edu.cn;;;bjtu.edu.cn;nwu.edu.cn", "position": "MS student;Associate Professor;;;Associate Professor;Full Professor", "bibtex": "@misc{\nwu2023exploring,\ntitle={Exploring interactions between modalities for deepfake detection},\nauthor={Jianghao Wu and Baopeng Zhang and Zhaoyang Li and Guilin Pang and Zhu Teng and Jianping Fan},\nyear={2023},\nurl={https://openreview.net/forum?id=BCfxM1tR8E}\n}", "github": "", "project": "", "reviewers": "cXbs;uiFT;rgmw;i38n;YyCY;x7bE", "site": "https://openreview.net/forum?id=BCfxM1tR8E", "pdf_size": 2711095, "recommendation": "3;3;5;5;6;6", "confidence": "5;4;4;4;4;2", "correctness": "2;3;3;3;3;3", "technical_novelty": "3;3;2;3;2;2", "empirical_novelty": "3;3;2;2;2;2", "wc_summary_paper": "76;83;106;92;80;65", "wc_strength_and_weaknesses": "330;249;217;112;182;153", "wc_clarity_quality_novelty_and_reproducibility": "31;165;37;31;54;19", "wc_summary_review": "36;57;59;29;30;26", "wc_review": "473;554;419;264;346;263", "wc_reply_reviewers": "0;0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "0;0;0;0;0;0", "reply_authors": "0;0;0;0;0;0", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.8333333333333335, 0.8975274678557507 ], "correctness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 83.66666666666667, 12.840906856172149 ], "wc_strength_and_weaknesses_avg": [ 207.16666666666666, 70.18883260335808 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.166666666666664, 49.77421242191807 ], "wc_summary_review_avg": [ 39.5, 13.425721582097552 ], "wc_review_avg": [ 386.5, 106.84373324314971 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.6451791670811048, "corr_recommendation_correctness": 0.5976143046671968, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:fOGyu18fPYQJ:scholar.google.com/&scioq=Exploring+interactions+between+modalities+for+deepfake+detection&hl=en&as_sdt=0,48", "gs_version_total": 0, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Beijing Jiao Tong University;Northwest University", "aff_unique_dep": ";", "aff_unique_url": "http://www.njtu.edu.cn/en;https://www.nwu.edu.cn", "aff_unique_abbr": "BJTU;NWU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "BDjGGZk9yz", "title": "Supervised Random Feature Regression via Projection Pursuit", "track": "main", "status": "Reject", "tldr": "", "abstract": "Random feature methods and neural network models are two popular nonparametric modeling methods, which are regarded as representatives of shallow learning and Neural Network, respectively. In practice random feature methods are short of the capacity of feature learning, while neural network methods lead to computationally heavy problems. This paper aims at proposing a flexible but computational efficient method for general nonparametric problems. Precisely, our proposed method is a feed-forward two-layer nonparametric estimation, and the first layer is used to learn a series of univariate basis functions for each projection variable, and then search for their optimal linear combination for each group of these learnt functions. Based on all the features derived in the first layer, the second layer attempts at learning a single index function with an unknown activation function. Our nonparametric estimation takes advantage of both random features and neural networks, and can be seen as an intermediate bridge between them.", "keywords": "Random Feature;multi-kernel;projection pursuit;semi-parametric regression;neural networks", "primary_area": "", "supplementary_material": "/attachment/173691885d93f4ad342f583174d0375e100f5bbd.zip", "author": "Jingran Zhou;Ling Zhou;shaogao lv", "authorids": "~Jingran_Zhou1;~Ling_Zhou9;~shaogao_lv1", "gender": "M;;M", "homepage": ";;", "dblp": ";;https://dblp.uni-trier.de/search?q=shaogao%20Lv", "google_scholar": ";5wpl_sUAAAAJ;http://cncc.bingj.com/cache.aspx?q=shaogao+lv", "orcid": ";0000-0002-2664-9583;", "linkedin": "jingranzhou-peter;;", "or_profile": "~Jingran_Zhou1;~Ling_Zhou9;~shaogao_lv1", "aff": "Southwest University of Finance and Economics;Southwest University of Finance and Economics;Nanjing Audit University", "aff_domain": "swufe.edu.cn;swufe.edu.cn;nau.edu.cn", "position": "PhD student;Associate Professor;Professor", "bibtex": "@misc{\nzhou2023supervised,\ntitle={Supervised Random Feature Regression via Projection Pursuit},\nauthor={Jingran Zhou and Ling Zhou and shaogao lv},\nyear={2023},\nurl={https://openreview.net/forum?id=BDjGGZk9yz}\n}", "github": "", "project": "", "reviewers": "hYRF;ZaoF;iHFk", "site": "https://openreview.net/forum?id=BDjGGZk9yz", "pdf_size": 510117, "recommendation": "1;3;3", "confidence": "3;4;3", "correctness": "3;2;3", "technical_novelty": "1;1;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "20;121;36", "wc_strength_and_weaknesses": "165;46;260", "wc_clarity_quality_novelty_and_reproducibility": "209;35;57", "wc_summary_review": "49;262;27", "wc_review": "443;464;380", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 1.3333333333333333, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 59.0, 44.3245605355165 ], "wc_strength_and_weaknesses_avg": [ 157.0, 87.54808202734465 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 100.33333333333333, 77.36206362868617 ], "wc_summary_review_avg": [ 112.66666666666667, 105.97588824298143 ], "wc_review_avg": [ 429.0, 35.6931365951495 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JlR2FxGFSB4J:scholar.google.com/&scioq=Supervised+Random+Feature+Regression+via+Projection+Pursuit&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "Southwest University of Finance and Economics;Nanjing Audit University", "aff_unique_dep": ";", "aff_unique_url": "https://www.swufe.edu.cn;http://www.nau.edu.cn/", "aff_unique_abbr": "SWUFE;NAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "BEpJFTH50iT", "title": "Searching optimal adjustment features for treatment effect estimation", "track": "main", "status": "Withdraw", "tldr": "We construct a reinforcement-learning based framework to search the optimal adjustment features for more precise treatment effect estimation.", "abstract": "Most efforts devoted to causal inference focus on controlling the adjustment features to further alleviate the confounding effect. In realistic applications, the collected covariates often contain variables correlating to only one of the treatment (e.g., instrumental variables) and the outcome (e.g., precision variables). Due to the absence of prior knowledge, the brute-force approach for the practitioner is to include every covariate for adjustment. However, previous literature shows that adjusting the former covariates (treatment-only) hurts the treatment effect estimation, while adjusting the latter covariates (outcome-only) brings benefits. Consequently, it is meaningful to find an optimal adjustment set rather than the brute-force approach for more efficient treatment effect estimation. To this end, we establish a variance metric which is computationally tractable to measure the optimality of the adjustment set. From the non-parametric viewpoint, we theoretically show that our metric is minimized if and only if the adjustment features contain the confounders and the outcome-only variables. As optimizing the proposed variance metric is a combinational optimization problem, we incorporate the Reinforcement Learning (RL) to search the corresponding optimal adjustment set. More specifically, we adopt the encoder-decoder model as the actor to generate the binary feature mask on the original covariates, which serves as the differentiable policy. Meanwhile, the proposed variance metric serves as the reward to guide the policy update. Empirical results on synthetic and real-world datasets demonstrate that ~(a) our method successfully searches the optimal adjustment sets and (b) the searched adjustment features achieve more precise treatment effect estimation.", "keywords": "treatment effect estimation;covariate separation;confounder balancing", "primary_area": "", "supplementary_material": "", "author": "Haotian Wang;Kun Kuang;Mingyang Geng;Haoang Chi;Ruoxuan Xiong;Longqi Yang;Wanrong Huang;Wenjing Yang", "authorids": "~Haotian_Wang2;~Kun_Kuang1;~Mingyang_Geng1;~Haoang_Chi1;~Ruoxuan_Xiong1;~Longqi_Yang2;~Wanrong_Huang1;~Wenjing_Yang1", "gender": "M;M;M;M;;M;M;F", "homepage": "https://www.researchgate.net/profile/Haotian-Wang-9;http://kunkuang.github.io;;;http://www.ruoxuanxiong.com/;;;https://www.researchgate.net/scientific-contributions/Wen-Jing-Yang-2056467943", "dblp": "63/11345-1;194/4245;204/1555;284/9320;222/2927;143/7359;184/0874;48/3396-2", "google_scholar": "CbH1UJAAAAAJ;https://scholar.google.com.hk/citations?user=FOsNiMQAAAAJ;;;lg_0u-0AAAAJ;;;", "orcid": "0000-0003-2928-5575;0009-0000-7528-8131;0000-0002-7239-1819;;;;0000-0001-5778-9055;", "linkedin": ";;;;;;;", "or_profile": "~Haotian_Wang2;~Kun_Kuang1;~Mingyang_Geng1;~Haoang_Chi1;~Ruoxuan_Xiong1;~Longqi_Yang2;~Wanrong_Huang1;~Wenjing_Yang1", "aff": "National University of Defense Technology;Zhejiang University;National University of Defense Technology;Intelligent Game and Decision Laboratory, Beijing;Emory University;;National University of Defense Technology;National University of Defense Technology", "aff_domain": "nudt.edu.cn;zju.edu.cn;nudt.edu.cn;nudt.edu.cn;emory.edu;;nudt.edu.cn;nudt.edu.cn", "position": "PhD student;Associate Professor;PhD student;PhD student;Assistant Professor;;Assistant Research Fellow;Associate Professor", "bibtex": "@misc{\nwang2023searching,\ntitle={Searching optimal adjustment features for treatment effect estimation},\nauthor={Haotian Wang and Kun Kuang and Mingyang Geng and Haoang Chi and Ruoxuan Xiong and Longqi Yang and Wanrong Huang and Wenjing Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=BEpJFTH50iT}\n}", "github": "", "project": "", "reviewers": "FqNX;pRn8;Tc6M;euT9", "site": "https://openreview.net/forum?id=BEpJFTH50iT", "pdf_size": 604954, "recommendation": "1;1;3;3", "confidence": "4;5;3;4", "correctness": "1;2;2;2", "technical_novelty": "1;2;1;2", "empirical_novelty": "1;2;1;1", "wc_summary_paper": "52;60;49;85", "wc_strength_and_weaknesses": "287;523;50;146", "wc_clarity_quality_novelty_and_reproducibility": "36;31;448;27", "wc_summary_review": "17;29;43;14", "wc_review": "392;643;590;272", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "94;130;272;43", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 2.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 1.75, 0.4330127018922193 ], "technical_novelty_avg": [ 1.5, 0.5 ], "empirical_novelty_avg": [ 1.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 61.5, 14.150971698084906 ], "wc_strength_and_weaknesses_avg": [ 251.5, 177.97822900568485 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 135.5, 180.45013161535792 ], "wc_summary_review_avg": [ 25.75, 11.431863365173676 ], "wc_review_avg": [ 474.25, 149.62014403147725 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 134.75, 85.05696620500875 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.7071067811865475, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:d_--PBWBcCUJ:scholar.google.com/&scioq=Searching+optimal+adjustment+features+for+treatment+effect+estimation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;3;0;0", "aff_unique_norm": "National University of Defense Technology;Zhejiang University;Intelligent Game and Decision Laboratory;Emory University", "aff_unique_dep": ";;Intelligent Game and Decision Laboratory;", "aff_unique_url": "http://www.nudt.edu.cn/;https://www.zju.edu.cn;;https://www.emory.edu", "aff_unique_abbr": "NUDT;ZJU;;Emory", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Learning to Linearize Deep Neural Networks for Secure and Efficient Private Inference", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12068", "id": "BGF9IeDfmlH", "poster": "", "openreview": "https://openreview.net/forum?id=BGF9IeDfmlH", "slides": "https://iclr.cc/virtual/2023/poster/12068", "video": "https://iclr.cc/virtual/2023/poster/12068", "author_site": "SOUVIK KUNDU, Shunlin Lu, Yuke Zhang, Jacqueline Liu, Peter Beerel", "tldr": "We present an automated linearization method to train a DNN with limited ReLU budget for inference in yielding models able to perform significantly better than exiting private inference SOTA both in terms of potentially improved latency and accuracy.", "abstract": "The large number of ReLU non-linearity operations in existing deep neural networks makes them ill-suited for latency-efficient private inference (PI). Existing techniques to reduce ReLU operations often involve manual effort and sacrifice significant accuracy. In this paper, we first present a novel measure of non-linearity layers\u2019 ReLU sensitivity, enabling mitigation of the time-consuming manual efforts in identifying the same. Based on this sensitivity, we then present SENet, a three-stage training method that for a given ReLU budget, automatically assigns per-layer ReLU counts, decides the ReLU locations for each layer\u2019s activation map, and trains a model with significantly fewer ReLUs to potentially yield latency and communication efficient PI. Experimental evaluations with multiple models on various datasets show SENet\u2019s superior performance both in terms of reduced ReLUs and improved classification accuracy compared to existing alternatives. In particular, SENet can yield models that require up to \u223c2\u00d7 fewer ReLUs while yielding similar accuracy. For a similar ReLU budget SENet can yield models with \u223c2.32% improved classification accuracy, evaluated on CIFAR-100.", "keywords": "Efficient private inference;cryptographic inference;machine learning as a service;efficient cryptographic inference;automated ReLU reduction", "primary_area": "", "supplementary_material": "/attachment/e27997a7838217d96e8951124e9bde065283baea.zip", "author": "Souvik Kundu;Shunlin Lu;Yuke Zhang;Jacqueline Tiffany Liu;Peter Anthony Beerel", "authorids": "~Souvik_Kundu2;~Shunlin_Lu1;~Yuke_Zhang1;~Jacqueline_Tiffany_Liu1;~Peter_Anthony_Beerel2", "gender": "M;M;F;F;M", "homepage": "https://ksouvik52.github.io;https://shunlinlu.github.io/;;;http://sites.usc.edu/eessc.html", "dblp": "126/2210;333/0021;;;29/6330", "google_scholar": "https://scholar.google.com/citations?hl=en;;CJ5iMiwAAAAJ;;JSdH7PsAAAAJ", "orcid": "0000-0002-3533-9405;;0000-0001-5253-5478;0009-0004-7085-0024;", "linkedin": "souvik-kundu-64922b50/;shunlin-lu-401aa61a6/;;jacquelineliu-mlds/;peter-beerel-b9902a1/", "or_profile": "~Souvik_Kundu2;~Shunlin_Lu1;~Yuke_Zhang1;~Jacqueline_Tiffany_Liu1;~Peter_Anthony_Beerel1", "aff": "Intel;The Chinese University of HongKong, ShenZhen;University of Southern California;University of Southern California;University of Southern California", "aff_domain": "intel.com;cuhk.edu.cn;usc.edu;usc.edu;usc.edu", "position": "Researcher;PhD student;PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nkundu2023learning,\ntitle={Learning to Linearize Deep Neural Networks for Secure and Efficient Private Inference},\nauthor={Souvik Kundu and Shunlin Lu and Yuke Zhang and Jacqueline Tiffany Liu and Peter Anthony Beerel},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=BGF9IeDfmlH}\n}", "github": "", "project": "", "reviewers": "uQYb;oSdq;e8en", "pdf_size": 2571758, "recommendation": "6;6;8", "confidence": "3;3;3", "correctness": "4;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "64;99;77", "wc_strength_and_weaknesses": "111;130;115", "wc_clarity_quality_novelty_and_reproducibility": "2;622;30", "wc_summary_review": "2;60;47", "wc_review": "179;911;269", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 80.0, 14.445299120013633 ], "wc_strength_and_weaknesses_avg": [ 118.66666666666667, 8.178562764256865 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 218.0, 285.8997493294925 ], "wc_summary_review_avg": [ 36.333333333333336, 24.850665092821068 ], "wc_review_avg": [ 453.0, 325.93250835103885 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.49999999999999983, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1444830135516010604&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=BGF9IeDfmlH", "email": "intel.com;cuhk.edu.cn;usc.edu;usc.edu;usc.edu", "author_num": 5, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "Intel;Chinese University of Hong Kong;University of Southern California", "aff_unique_dep": "Intel Corporation;;", "aff_unique_url": "https://www.intel.com;https://www.cuhk.edu.cn;https://www.usc.edu", "aff_unique_abbr": "Intel;CUHK;USC", "aff_campus_unique_index": "1;2;2;2", "aff_campus_unique": ";Shenzhen;Los Angeles", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;China" }, { "id": "BGId14emsBj", "title": "On the Impact of Adversarially Robust Models on Algorithmic Recourse", "track": "main", "status": "Reject", "tldr": "", "abstract": "The widespread deployment of machine learning models in various high-stakes settings has underscored the need for ensuring that individuals who are adversely impacted by model predictions are provided with a means for recourse. To this end, several algorithms have been proposed in recent literature to generate recourses. Recent research has also demonstrated that the recourses generated by these algorithms often correspond to adversarial examples. This key finding emphasizes the need for a deeper understanding of the impact of adversarially robust models (which are designed to guard against adversarial examples) on algorithmic recourse. In this work, we make one of the first attempts at studying the impact of adversarially robust models on algorithmic recourse. We theoretically and empirically analyze the cost (ease of implementation) and validity (probability of obtaining a positive model prediction) of the recourses output by state-of-the-art algorithms when the underlying models are adversarially robust. More specifically, we construct theoretical bounds on the differences between the cost and the validity of the recourses generated by various state-of-the-art algorithms when the underlying models are adversarially robust vs. non-robust. We also carry out extensive empirical analysis with multiple real-world datasets to not only validate our theoretical results, but also analyze the impact of varying degrees of model robustness on the cost and validity of the resulting recourses. Our theoretical and empirical analyses demonstrate that adversarially robust models significantly increase the cost and reduce the validity of the resulting recourses, thereby shedding light on the inherent trade-offs between achieving adversarial robustness in predictive models and providing easy-to-implement and reliable algorithmic recourse.", "keywords": "Algorithmic Recourse;Adversarial Robustness;Machine Learning", "primary_area": "", "supplementary_material": "/attachment/88da9da8dcac103678a5ce34e136c7e4f8dc4e5b.zip", "author": "Satyapriya Krishna;Chirag Agarwal;Himabindu Lakkaraju", "authorids": "~Satyapriya_Krishna2;~Chirag_Agarwal1;~Himabindu_Lakkaraju1", "gender": "M;M;F", "homepage": "http://satyapriyakrishna.com/;https://chirag-agarwall.github.io/;http://web.stanford.edu/~himalv", "dblp": "251/9225;173/8821;68/9376", "google_scholar": "Q5bfPlkAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;", "linkedin": "satyapriya-krishna-50553084/;chirag-agarwal-0a6a43a1/;", "or_profile": "~Satyapriya_Krishna2;~Chirag_Agarwal1;~Hima_Lakkaraju1", "aff": "Harvard University;Adobe Systems;Harvard University", "aff_domain": "harvard.edu;adobe.com;harvard.edu", "position": "PhD student;Researcher;Assistant Professor", "bibtex": "@misc{\nkrishna2023on,\ntitle={On the Impact of Adversarially Robust Models on Algorithmic Recourse},\nauthor={Satyapriya Krishna and Chirag Agarwal and Himabindu Lakkaraju},\nyear={2023},\nurl={https://openreview.net/forum?id=BGId14emsBj}\n}", "github": "", "project": "", "reviewers": "aAvX;G7SB;T2PC;em4b", "site": "https://openreview.net/forum?id=BGId14emsBj", "pdf_size": 1702245, "recommendation": "3;3;3;6", "confidence": "3;3;3;4", "correctness": "3;1;2;3", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "63;31;78;113", "wc_strength_and_weaknesses": "474;154;297;175", "wc_clarity_quality_novelty_and_reproducibility": "24;78;16;101", "wc_summary_review": "25;66;59;58", "wc_review": "586;329;450;447", "wc_reply_reviewers": "25;48;62;24", "wc_reply_authors": "464;516;286;216", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 71.25, 29.4819860253681 ], "wc_strength_and_weaknesses_avg": [ 275.0, 127.2065249898762 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.75, 35.800663401674555 ], "wc_summary_review_avg": [ 52.0, 15.890248582070704 ], "wc_review_avg": [ 453.0, 90.98076719834802 ], "wc_reply_reviewers_avg": [ 39.75, 16.037066439969625 ], "wc_reply_authors_avg": [ 370.5, 123.41292476884259 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.5222329678670935, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14075747277077215938&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;0", "aff_unique_norm": "Harvard University;Adobe", "aff_unique_dep": ";Adobe Systems Incorporated", "aff_unique_url": "https://www.harvard.edu;https://www.adobe.com", "aff_unique_abbr": "Harvard;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "BGqYCl1k1fN", "title": "Rank-1 Matrix Completion with Gradient Descent and Small Random Initialization", "track": "main", "status": "Withdraw", "tldr": "Proves the convergence of gradient descent with small random initialization for rank-1 matrix completion.", "abstract": "The nonconvex formulation of matrix completion problem has received significant attention in recent years due to its affordable complexity compared to the convex formulation. Gradient descent (GD) is the simplest yet efficient baseline algorithm for solving nonconvex optimization problems. The success of GD has been witnessed in many different problems in both theory and practice when it is combined with random initialization. However, previous works on matrix completion require either careful initialization or regularizer to prove the convergence of GD. In this work, we study the rank-1 symmetric matrix completion and prove that GD converges to the ground truth when small random initialization is used. We show that in logarithmic amount of iterations, the trajectory enters the region where local convergence occurs. We provide an upper bound on the initialization size that is sufficient to guarantee the convergence and show that a larger initialization can be used as more samples are available. We observe that implicit regularization effect of GD plays a critical role in the analysis, and for the entire trajectory, it prevents each entry from becoming much larger than the others.", "keywords": "Matrix Completion;Small Initialization;Gradient Descent", "primary_area": "", "supplementary_material": "", "author": "Daesung Kim;Hye Won Chung", "authorids": "~Daesung_Kim1;~Hye_Won_Chung2", "gender": "M;F", "homepage": ";https://iids.kaist.ac.kr/", "dblp": "129/1057;https://dblp.uni-trier.de/pers/hd/c/Chung:Hye_Won", "google_scholar": "AvyPJgYAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Daesung_Kim1;~Hye_Won_Chung2", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nkim2023rank,\ntitle={Rank-1 Matrix Completion with Gradient Descent and Small Random Initialization},\nauthor={Daesung Kim and Hye Won Chung},\nyear={2023},\nurl={https://openreview.net/forum?id=BGqYCl1k1fN}\n}", "github": "", "project": "", "reviewers": "kvJH;wHNp;dDzu;Qifu", "site": "https://openreview.net/forum?id=BGqYCl1k1fN", "pdf_size": 1011300, "recommendation": "1;3;3;6", "confidence": "4;3;4;3", "correctness": "2;4;3;3", "technical_novelty": "3;2;2;3", "empirical_novelty": "2;0;2;3", "wc_summary_paper": "138;37;57;28", "wc_strength_and_weaknesses": "502;139;139;143", "wc_clarity_quality_novelty_and_reproducibility": "33;15;263;38", "wc_summary_review": "30;40;41;16", "wc_review": "703;231;500;225", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.25, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 65.0, 43.433857760967996 ], "wc_strength_and_weaknesses_avg": [ 230.75, 156.61477420728863 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 87.25, 101.82920749961673 ], "wc_summary_review_avg": [ 31.75, 10.059199769365355 ], "wc_review_avg": [ 414.75, 200.07795355810694 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.7001400420140049, "corr_recommendation_correctness": 0.39605901719066966, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11665059671613022858&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "BGvOEUEMBzE", "title": "Pareto Optimization for Active Learning under Out-of-Distribution Data Scenarios", "track": "main", "status": "Reject", "tldr": "", "abstract": "Pool-based Active Learning (AL) has achieved great success in minimizing labeling costs by sequentially selecting the most informative unlabeled samples from a large unlabeled data pool and querying their labels from oracle/annotators. However, existing AL sampling schemes might not work well under out-of-distribution (OOD) data scenarios, where the unlabeled data pool contains data samples that do not belong to the pre-defined categories of the target task. Achieving good AL performance under OOD data scenarios is a challenging task due to the natural conflict between AL sampling strategies and OOD sample detection -- both more informative in-distribution (ID) data and OOD data in unlabeled data pool may be assigned high informativeness scores (e.g., high entropy) during AL processes. In this paper, we propose a sampling scheme, Monte-Carlo Pareto Optimization for Active Learning (POAL), which selects optimal subsets of unlabeled samples with \\emph{fixed batch size} from the unlabeled data pool. We cast the AL sampling task as a multi-objective optimization problem and utilize Pareto optimization based on two conflicting objectives: (1) the typical AL sampling scheme (e.g., maximum entropy), and (2) the confidence of not being an OOD data sample. Experimental results show the effectiveness of our POAL on classical Machine Learning (ML) and Deep Learning (DL) tasks.", "keywords": "active learning;pareto optimization;out-of-distribution", "primary_area": "", "supplementary_material": "/attachment/6999fe03580e6db15edd2fa3812d477754aa7ec0.zip", "author": "Xueying Zhan;Zeyu Dai;Qingzhong Wang;Haoyi Xiong;Dejing Dou;Qing Li;Antoni B. Chan", "authorids": "~Xueying_Zhan1;~Zeyu_Dai2;~Qingzhong_Wang3;~Haoyi_Xiong1;~Dejing_Dou3;~Qing_Li5;~Antoni_B._Chan1", "gender": "F;M;M;M;M;M;M", "homepage": ";https://scholar.google.com/citations?user=D-6MHNUAAAAJ&hl=en;https://qingzwang.github.io/;https://sites.google.com/site/haoyixiongshomepage/;https://www4.comp.polyu.edu.hk/~csqli/;http://www.cs.cityu.edu.hk/~abchan/;https://ix.cs.uoregon.edu/~dou/", "dblp": "187/4471;262/2474.html;;06/2700;(2024-11-14-1812689);55/5814;26/2854.html", "google_scholar": "3lLypDIAAAAJ;D-6MHNUAAAAJ;e7ZsEIcAAAAJ;f_Kcie0AAAAJ;https://scholar.google.co.in/citations?user=D1LEg-YAAAAJ;j4vFSn8AAAAJ;qBHsQ04AAAAJ", "orcid": ";0000-0002-1351-476X;;;0000-0003-3370-471X;0000-0002-2886-2513;", "linkedin": ";;;;;;", "or_profile": "~Xueying_Zhan1;~Zeyu_Dai2;~Qingzhong_Wang3;~Haoyi_Xiong1;~Qing_Li5;~Antoni_B._Chan1;~Dejing_Dou4", "aff": "City University of Hong Kong;The Hong Kong Polytechnic University;Baidu;Baidu;Hong Kong Polytechnic University;City University of Hong Kong;", "aff_domain": "cityu.edu.hk;polyu.edu.hk;baidu.com;baidu.com;polyu.edu.hk;cityu.edu.hk;", "position": "PhD student;PhD student;Researcher;Principal Researcher;Full Professor;Full Professor;", "bibtex": "@misc{\nzhan2023pareto,\ntitle={Pareto Optimization for Active Learning under Out-of-Distribution Data Scenarios},\nauthor={Xueying Zhan and Zeyu Dai and Qingzhong Wang and Haoyi Xiong and Dejing Dou and Qing Li and Antoni B. Chan},\nyear={2023},\nurl={https://openreview.net/forum?id=BGvOEUEMBzE}\n}", "github": "", "project": "", "reviewers": "SDAU;hK87;6RQ4", "site": "https://openreview.net/forum?id=BGvOEUEMBzE", "pdf_size": 2807601, "recommendation": "3;6;8", "confidence": "5;3;4", "correctness": "1;3;3", "technical_novelty": "3;2;2", "empirical_novelty": "3;2;2", "wc_summary_paper": "42;83;83", "wc_strength_and_weaknesses": "466;428;241", "wc_clarity_quality_novelty_and_reproducibility": "25;71;28", "wc_summary_review": "496;66;36", "wc_review": "1029;648;388", "wc_reply_reviewers": "350;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 69.33333333333333, 19.3275853524323 ], "wc_strength_and_weaknesses_avg": [ 378.3333333333333, 98.34067769183254 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.333333333333336, 21.01322334996598 ], "wc_summary_review_avg": [ 199.33333333333334, 210.1322334996598 ], "wc_review_avg": [ 688.3333333333334, 263.2366911271215 ], "wc_reply_reviewers_avg": [ 116.66666666666667, 164.99158227686107 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.5960395606792698, "corr_recommendation_correctness": 0.9176629354822472, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5983492998564544805&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2;2;1;0", "aff_unique_norm": "City University of Hong Kong;Hong Kong Polytechnic University;Baidu", "aff_unique_dep": ";;Baidu, Inc.", "aff_unique_url": "https://www.cityu.edu.hk;https://www.polyu.edu.hk;https://www.baidu.com", "aff_unique_abbr": "CityU;PolyU;Baidu", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "BKuboEUJd8u", "title": "Return Augmentation gives Supervised RL Temporal Compositionality", "track": "main", "status": "Reject", "tldr": "We propose a new data augmentation algorithm that enables RL via supervised methods to extrapolate beyond the best performing trajectories in the offline dataset using bootstrapping.", "abstract": "Offline Reinforcement Learning (RL) methods that use supervised learning or sequence modeling (e.g., Decision Transformer) work by training a return-conditioned policy. A fundamental limitation of these approaches, as compared to value-based methods, is that they have trouble generalizing to behaviors that have a higher return than what was seen at training. Value-based offline-RL algorithms like CQL use bootstrapping to combine training data from multiple trajectories to learn strong behaviors from sub-optimal data. We set out to endow RL via Supervised Learning (RvS) methods with this form of temporal compositionality. To do this, we introduce SuperB, a dynamic programming algorithm for data augmentation that augments the returns in the offline dataset by combining rewards from intersecting trajectories. We show theoretically that SuperB can improve sample complexity and enable RvS to find optimal policies in cases where it previously fell behind the performance of value-based methods. Empirically, we find that SuperB improves the performance of RvS in several offline RL environments, surpassing the prior state-of-the-art RvS agents in AntMaze by orders of magnitude and offering performance competitive with value-based algorithms on the D4RL-gym tasks.", "keywords": "reinforcement learning;offline reinforcement learning;decision transformer;behavioral cloning;dynamic programming;data augmentation", "primary_area": "", "supplementary_material": "/attachment/398159a10765d997b51c0f0d22994529f8f12bd8.zip", "author": "Keiran Paster;Silviu Pitis;Sheila A. McIlraith;Jimmy Ba", "authorids": "~Keiran_Paster1;~Silviu_Pitis1;~Sheila_A._McIlraith1;~Jimmy_Ba1", "gender": "M;M;F;M", "homepage": "http://keirp.com;https://silviupitis.com;http://www.cs.toronto.edu/~sheila/;http://jimmylba.github.io", "dblp": ";https://dblp.org/pers/hd/p/Pitis:Silviu;66/3221;https://dblp.org/pers/b/Ba:Jimmy.html", "google_scholar": ";oYlo1ycAAAAJ;https://scholar.google.com.tw/citations?user=ny2zuvMAAAAJ;https://scholar.google.ca/citations?user=ymzxRhAAAAAJ", "orcid": ";;0000-0003-4953-0945;", "linkedin": ";;sheila-mcilraith-a76aa513/?originalSubdomain=ca;", "or_profile": "~Keiran_Paster1;~Silviu_Pitis1;~Sheila_A._McIlraith1;~Jimmy_Ba1", "aff": "University of Toronto;;Department of Computer Science, University of Toronto;Department of Computer Science, University of Toronto", "aff_domain": "toronto.edu;;cs.toronto.edu;cs.toronto.edu", "position": "PhD student;;Full Professor;Assistant Professor", "bibtex": "@misc{\npaster2023return,\ntitle={Return Augmentation gives Supervised {RL} Temporal Compositionality},\nauthor={Keiran Paster and Silviu Pitis and Sheila A. McIlraith and Jimmy Ba},\nyear={2023},\nurl={https://openreview.net/forum?id=BKuboEUJd8u}\n}", "github": "", "project": "", "reviewers": "RPpv;JPnx;FZYT;sHVj", "site": "https://openreview.net/forum?id=BKuboEUJd8u", "pdf_size": 801746, "recommendation": "5;5;6;6", "confidence": "4;5;3;4", "correctness": "3;3;4;4", "technical_novelty": "3;3;2;4", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "171;27;95;51", "wc_strength_and_weaknesses": "186;374;232;172", "wc_clarity_quality_novelty_and_reproducibility": "1369;2;81;37", "wc_summary_review": "85;40;79;31", "wc_review": "1811;443;487;291", "wc_reply_reviewers": "369;0;118;0", "wc_reply_authors": "1960;693;361;313", "reply_reviewers": "1;0;1;0", "reply_authors": "3;2;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 86.0, 54.79963503528103 ], "wc_strength_and_weaknesses_avg": [ 241.0, 79.93122043357026 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 372.25, 576.1542219753319 ], "wc_summary_review_avg": [ 58.75, 23.562417108607512 ], "wc_review_avg": [ 758.0, 612.2834311003361 ], "wc_reply_reviewers_avg": [ 121.75, 150.6591766206095 ], "wc_reply_authors_avg": [ 831.75, 667.62765633248 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.7071067811865475, "corr_recommendation_correctness": 1.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5975893136887571600&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "id": "BLBulxMHuOp", "title": "Decomposing Texture and Semantics for Out-of-distribution Detection", "track": "main", "status": "Reject", "tldr": "We propose a novel OOD detection framework that decomposes the definition of the in-distribution as texture and semantics. ", "abstract": "Out-of-distribution (OOD) detection has made significant progress in recent years because the distribution mismatch between the training and testing can severely deteriorate the reliability of a machine learning system.Nevertheless, the lack of precise interpretation of the in-distribution limits the application of OOD detection methods to real-world system pipielines. To tackle this issue, we decompose the definition of the in-distribution into texture and semantics, motivated by real-world scenarios. In addition, we design new benchmarks to measure the robustness that OOD detection methods should have. To achieve a good balance between the OOD detection performance and robustness, our method takes a divide-and-conquer approach. That is, the model first tackles each component of the texture and semantics separately, and then combines them later. Such design philosophy is empirically proven by a series of benchmarks including not only ours but also the conventional counterpart.", "keywords": "Out-of-distribution detection;Fourier analysis;Normailzing flow model", "primary_area": "", "supplementary_material": "", "author": "Jeong-Hyeon Moon;Namhyuk Ahn;Kyung-Ah Sohn", "authorids": "~Jeong-Hyeon_Moon1;~Namhyuk_Ahn1;~Kyung-Ah_Sohn1", "gender": "M;M;F", "homepage": ";https://nmhkahn.github.io;https://sites.google.com/site/kasohn", "dblp": ";217/1998;65/3835", "google_scholar": ";cFSb6QQAAAAJ;-QsSytMAAAAJ", "orcid": "0000-0002-2805-7063;;0000-0001-8941-1188", "linkedin": ";;", "or_profile": "~Jeong-Hyeon_Moon1;~Namhyuk_Ahn1;~Kyung-Ah_Sohn1", "aff": "Ajou University;NAVER WEBTOON Corp.;Ajou University", "aff_domain": "ajou.ac.kr;webtoonscorp.com;ajou.ac.kr", "position": "PhD student;Researcher;Full Professor", "bibtex": "@misc{\nmoon2023decomposing,\ntitle={Decomposing Texture and Semantics for Out-of-distribution Detection},\nauthor={Jeong-Hyeon Moon and Namhyuk Ahn and Kyung-Ah Sohn},\nyear={2023},\nurl={https://openreview.net/forum?id=BLBulxMHuOp}\n}", "github": "", "project": "", "reviewers": "xaty;UnLe;W1ZC;BBXZ", "site": "https://openreview.net/forum?id=BLBulxMHuOp", "pdf_size": 4178601, "recommendation": "5;5;6;6", "confidence": "5;3;3;3", "correctness": "2;2;4;3", "technical_novelty": "3;2;2;2", "empirical_novelty": "3;2;4;3", "wc_summary_paper": "75;66;48;64", "wc_strength_and_weaknesses": "376;165;68;207", "wc_clarity_quality_novelty_and_reproducibility": "92;36;171;27", "wc_summary_review": "22;54;39;43", "wc_review": "565;321;326;341", "wc_reply_reviewers": "120;0;0;0", "wc_reply_authors": "1107;576;207;584", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 63.25, 9.730750228014282 ], "wc_strength_and_weaknesses_avg": [ 204.0, 111.36651202224122 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 81.5, 57.36070083253865 ], "wc_summary_review_avg": [ 39.5, 11.5 ], "wc_review_avg": [ 388.25, 102.31171731527137 ], "wc_reply_reviewers_avg": [ 30.0, 51.96152422706632 ], "wc_reply_authors_avg": [ 618.5, 320.5311997294491 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16179099271050242422&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Ajou University;NAVER WEBTOON Corp.", "aff_unique_dep": ";", "aff_unique_url": "https://www.ajou.ac.kr;https://www.webtoons.com", "aff_unique_abbr": "Ajou;Naver Webtoon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "BLNZwf-9k09", "title": "Architectural Backdoors in Neural Networks", "track": "main", "status": "Withdraw", "tldr": "paper demonstrates a backdoor that can be planted at the architectural definition level of a neural network", "abstract": "Machine learning is vulnerable to adversarial manipulation. Previous literature has demonstrated that at the training stage attackers can manipulate data (Gu et al.) and data sampling procedures (Shumailov et al.) to control model behaviour. A common attack goal is to plant backdoors i.e. force the victim model to learn to recognise a trigger known only by the adversary. In this paper, we introduce a new class of backdoor attacks that hide inside model architectures i.e. in the inductive bias of the functions used to train. These backdoors are simple to implement, for instance by publishing open-source code for a backdoored model architecture that others will reuse unknowingly. We demonstrate that model architectural backdoors represent a real threat and, unlike other approaches, can survive a complete re-training from scratch. We formalise the main construction principles behind architectural backdoors, such as a link between the input and the output, and describe some possible protections against them. We evaluate our attacks on computer vision benchmarks of different scales and demonstrate the underlying vulnerability is pervasive in a variety of common training settings. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Mikel Bober-Irizar;Ilia Shumailov;Yiren Zhao;Robert D. Mullins;Nicolas Papernot", "authorids": "~Mikel_Bober-Irizar1;~Ilia_Shumailov1;~Yiren_Zhao2;~Robert_D._Mullins1;~Nicolas_Papernot1", "gender": ";M;M;Unspecified;M", "homepage": "https://mxbi.net;https://aaronzhao.me;https://www.papernot.fr;https://www.cl.cam.ac.uk/~is410/;https://www.csat.cam.ac.uk/~rdm34", "dblp": ";https://dblp.uni-trier.de/pers/hd/z/Zhao:Yiren;162/1405;213/8587;31/789", "google_scholar": ";lOOmgEgAAAAJ;cGxq0cMAAAAJ;https://scholar.google.co.uk/citations?hl=en;zjXO2HMAAAAJ", "orcid": ";;;;", "linkedin": ";yiren-aaron-zhao-baa8b5116/;nicolaspapernot;ilia-shumailov/;", "or_profile": "~Mikel_Bober-Irizar1;~Yiren_Zhao2;~Nicolas_Papernot1;~I_Shumailov1;~Robert_Mullins1", "aff": "University of Cambridge;Imperial College London;Google;University of Oxford;University of Cambridge", "aff_domain": "cam.ac.uk;ic.ac.uk;google.com;ox.ac.uk;cam.ac.uk", "position": "Undergrad student;Assistant Professor;Research Scientist;Fellowship;Associate Professor", "bibtex": "@misc{\nbober-irizar2023architectural,\ntitle={Architectural Backdoors in Neural Networks},\nauthor={Mikel Bober-Irizar and Ilia Shumailov and Yiren Zhao and Robert D. Mullins and Nicolas Papernot},\nyear={2023},\nurl={https://openreview.net/forum?id=BLNZwf-9k09}\n}", "github": "", "project": "", "reviewers": "QFzx;KBqE;bM4M", "site": "https://openreview.net/forum?id=BLNZwf-9k09", "pdf_size": 2394286, "recommendation": "3;3;5", "confidence": "5;4;5", "correctness": "3;2;3", "technical_novelty": "2;4;4", "empirical_novelty": "2;2;2", "wc_summary_paper": "59;140;142", "wc_strength_and_weaknesses": "194;370;216", "wc_clarity_quality_novelty_and_reproducibility": "22;105;122", "wc_summary_review": "65;16;40", "wc_review": "340;631;520", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 113.66666666666667, 38.663792996664064 ], "wc_strength_and_weaknesses_avg": [ 260.0, 78.29857384822962 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.0, 43.688289811649376 ], "wc_summary_review_avg": [ 40.333333333333336, 20.00555478416488 ], "wc_review_avg": [ 497.0, 119.90829829498874 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.5, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10847641246950337384&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of Cambridge;Imperial College London;Google;University of Oxford", "aff_unique_dep": ";;Google;", "aff_unique_url": "https://www.cam.ac.uk;https://www.imperial.ac.uk;https://www.google.com;https://www.ox.ac.uk", "aff_unique_abbr": "Cambridge;ICL;Google;Oxford", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Cambridge;;Mountain View", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United Kingdom;United States" }, { "id": "BLOkjU9iS24", "title": "Constrained Reinforcement Learning for Safety-Critical Tasks via Scenario-Based Programming", "track": "main", "status": "Reject", "tldr": "A novel technique for incorporating domain-expert knowledge to train a constrained DRL agent, based on scenario-based programming paradigm, we validated our method on the popular robotic mapless navigation problem, both physically and in simulation.", "abstract": "Deep reinforcement learning (DRL) has achieved groundbreaking successes in various applications, including robotics. A natural consequence is the adoption of this paradigm for safety-critical tasks, where human safety and expensive hardware can be involved. In this context, it is crucial to optimize the performance of DRL-based agents while providing guarantees about their behavior. This paper presents a novel technique for incorporating domain-expert knowledge into a constrained DRL training loop. Our technique exploits the scenario-based programming paradigm, designed to specify such knowledge in a simple and intuitive way. While our approach can be considered general purpose, we validated our method by performing experiments on a synthetic set of benchmark environments, and the popular robotic mapless navigation problem, in simulation and on the actual platform. Our results demonstrate that using our approach to leverage expert knowledge dramatically improves the safety and performance of the agent.", "keywords": "Constrained Reinforcement Learning;Scenario Based Programming;Safety;Robotic Navigation", "primary_area": "", "supplementary_material": "/attachment/ec7a15b30f489fa2b6f640ff315f1c0cfeb421d2.zip", "author": "Davide Corsi;Raz Yerushalmi;Guy Amir;Alessandro Farinelli;David Harel;Guy Katz", "authorids": "~Davide_Corsi1;~Raz_Yerushalmi1;~Guy_Amir1;~Alessandro_Farinelli1;david.harel@weizmann.ac.il;~Guy_Katz1", "gender": "M;M;M;M;;M", "homepage": ";https://webhome.weizmann.ac.il/home/razyeru/;https://guyam2.github.io/;http://profs.sci.univr.it/~farinelli/;;http://www.katz-lab.com", "dblp": "184/6703;;277/9596;f/AlessandroFarinelli;;23/10321", "google_scholar": "qgtEMwEAAAAJ;;CSJEObYAAAAJ;https://scholar.google.co.uk/citations?user=KHAIAA8AAAAJ;;https://scholar.google.com.tw/citations?user=3nYG5BMAAAAJ", "orcid": ";;;0000-0002-2592-5814;;", "linkedin": ";raz-yerushalmi-b53a9a/;https://linkedin.com/in/guy-amir-a335a3ba;alessandro-farinelli/;;", "or_profile": "~Davide_Corsi1;~Raz_Yerushalmi1;~Guy_Amir1;~Alessandro_Farinelli1;david.harel@weizmann.ac.il;~Guy_Katz1", "aff": "University of Verona;Weizmann Institute of Science;The Hebrew University of Jerusalem;Universit\u00e0 degli Studi di Verona;;Hebrew University of Jerusalem", "aff_domain": "univr.it;weizmann.ac.il;huji.ac.il;univr.it;;huji.ac.il", "position": "PhD student;PhD student;PhD student;Full Professor;;Associate Professor", "bibtex": "@misc{\ncorsi2023constrained,\ntitle={Constrained Reinforcement Learning for Safety-Critical Tasks via Scenario-Based Programming},\nauthor={Davide Corsi and Raz Yerushalmi and Guy Amir and Alessandro Farinelli and David Harel and Guy Katz},\nyear={2023},\nurl={https://openreview.net/forum?id=BLOkjU9iS24}\n}", "github": "", "project": "", "reviewers": "XYtz;7ji2;ENWz", "site": "https://openreview.net/forum?id=BLOkjU9iS24", "pdf_size": 26761470, "recommendation": "3;3;3", "confidence": "4;4;4", "correctness": "4;2;4", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "90;34;157", "wc_strength_and_weaknesses": "220;370;76", "wc_clarity_quality_novelty_and_reproducibility": "115;24;243", "wc_summary_review": "42;33;15", "wc_review": "467;461;491", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 93.66666666666667, 50.28143019268865 ], "wc_strength_and_weaknesses_avg": [ 222.0, 120.03332870498926 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 127.33333333333333, 89.83070497082585 ], "wc_summary_review_avg": [ 30.0, 11.224972160321824 ], "wc_review_avg": [ 473.0, 12.96148139681572 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:aaLfP_kF-HAJ:scholar.google.com/&scioq=Constrained+Reinforcement+Learning+for+Safety-Critical+Tasks+via+Scenario-Based+Programming&hl=en&as_sdt=0,11", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;2", "aff_unique_norm": "University of Verona;Weizmann Institute of Science;Hebrew University of Jerusalem;Universit\u00e0 degli Studi di Verona", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.univr.it;https://www.weizmann.org.il;https://www.huji.ac.il;https://www.univr.it", "aff_unique_abbr": "UniVR;Weizmann;HUJI;UniVR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Jerusalem", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "Italy;Israel" }, { "id": "BLsM6WymMo6", "title": "SRBGCN: Tangent space-Free Lorentz Transformations for Graph Feature Learning", "track": "main", "status": "Reject", "tldr": "This work introduces a fully hyperbolic network that uses direct Lorentz transformations to learn the features directly on the manifold.", "abstract": "Hyperbolic graph convolutional networks have been successfully applied to represent complex graph data structures. However, optimization on Riemannian manifolds is nontrivial thus most of the existing hyperbolic networks build the network operations on the tangent space of the manifold, which is a Euclidean local approximation. This distorts the learnt features, limits the representation capacity of the network and makes it hard to optimize the network. In this work, we introduce a fully hyperbolic graph convolutional network (GCN), referred to as SRBGCN, which performs neural computations such as feature transformation and aggregation directly on the manifold, using manifold-preserving Lorentz transformations that include spatial rotation (SR) and boost (B) operations. Experiments conducted on static graph datasets for node classification and link prediction tasks validate the performance of the proposed method.", "keywords": "fully hyperbolic network;Lorentz transformations;boost and rotation;graph convolutional networks;hyperbolic rotations", "primary_area": "", "supplementary_material": "/attachment/535ae9db7c223e1763945426a451aa4f0d511139.zip", "author": "Abdelrahman Mostafa;Wei Peng;Guoying Zhao", "authorids": "~Abdelrahman_Mostafa1;~Wei_Peng4;~Guoying_Zhao3", "gender": "M;M;", "homepage": ";https://xiaoiker.github.io/;https://gyzhao-nm.github.io/Guoying/", "dblp": ";16/5560-9.html;35/814", "google_scholar": ";TDFM0QYAAAAJ;hzywrFMAAAAJ", "orcid": ";0000-0002-2892-5764;0000-0003-3694-206X", "linkedin": "abdelrahman-mostafa-bassiouny;;", "or_profile": "~Abdelrahman_Mostafa1;~Wei_Peng4;~Guoying_Zhao3", "aff": "University of Oulu;Stanford University;University of Oulu", "aff_domain": "oulu.fi;stanford.edu;oulu.fi", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@misc{\nmostafa2023srbgcn,\ntitle={{SRBGCN}: Tangent space-Free Lorentz Transformations for Graph Feature Learning},\nauthor={Abdelrahman Mostafa and Wei Peng and Guoying Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=BLsM6WymMo6}\n}", "github": "", "project": "", "reviewers": "ehJR;Ht7h;8Du2;x9HY", "site": "https://openreview.net/forum?id=BLsM6WymMo6", "pdf_size": 10156094, "recommendation": "3;5;5;6", "confidence": "5;4;3;4", "correctness": "3;4;2;1", "technical_novelty": "1;2;2;2", "empirical_novelty": "1;2;3;0", "wc_summary_paper": "37;105;105;27", "wc_strength_and_weaknesses": "503;283;129;272", "wc_clarity_quality_novelty_and_reproducibility": "15;22;681;36", "wc_summary_review": "27;64;78;48", "wc_review": "582;474;993;383", "wc_reply_reviewers": "0;0;276;15", "wc_reply_authors": "451;181;792;215", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 1.118033988749895 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 68.5, 36.670833096617805 ], "wc_strength_and_weaknesses_avg": [ 296.75, 133.67942063010298 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 188.5, 284.44551323583926 ], "wc_summary_review_avg": [ 54.25, 18.978606376654742 ], "wc_review_avg": [ 608.0, 233.17482711476381 ], "wc_reply_reviewers_avg": [ 72.75, 117.50611686205957 ], "wc_reply_authors_avg": [ 409.75, 243.96247149920416 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.6488856845230502, "corr_recommendation_correctness": -0.5129891760425771, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11898564388568039464&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Oulu;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.oulu.fi;https://www.stanford.edu", "aff_unique_abbr": "UOulu;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Finland;United States" }, { "id": "BM10-kHq8uX", "title": "Functional Relation Field: A Model-Agnostic Framework for Multivariate Time Series Forecasting", "track": "main", "status": "Reject", "tldr": "Functional Relation Field: A Model-Agnostic Framework for Multivariate Time Series Forecasting", "abstract": "In multivariate time series forecasting, the most popular strategy for modeling the relationship between multiple time series is the construction of graph, where each time series is represented as a node and related nodes are connected by edges, i.e. spatial-temporal graph neural networks. The graph structure is either given apriori or learned based the similarity between nodes. However, the relationship between multiple time series is typically complicated, for instance, the sum of outflows from upstream nodes may be equal to the inflows of downstream nodes. Such relations widely exist in many real-world multivariate time series forecasting scenarios, yet are far from well studied. In these cases, graph might only be a crude description on the dependency between nodes. To this end, we explore a new framework to model the inter-node relationship in a more precise way based our proposed inductive bias for graphs, Functional Relation Field, where a group of functions parameterized by neural networks are learned to characterize the dependency between multiple time series. These learned functions are versatile: they can then be used to discover the underlying graph structure by identifying the most relevant neighbors of the target node; and on the other hand, the learned functions will form a \u201cfield\u201d where the nodes in the backbone prediction networks are enforced to satisfy the constraints defined by these functions. The experiment is conducted on one toy dataset to show our approach can well recover the true constraint relationship between nodes. And two real-world MiniApp calling traffic and road network datasets are also considered with various different backbone networks. Results show that the prediction error can be reduced remarkably with the aid of the proposed functional relation field framework.", "keywords": "Functional Relation Field;Spatio-Temporal Forecasting;Constraint Optimization;Multivariate Time Series", "primary_area": "", "supplementary_material": "/attachment/dfed157d5d9f4f71fe580dcaf1b7e11a2f442089.zip", "author": "Bing Yu;Ting Li;Jianguo Li;Bin Dong;Zhanxing Zhu", "authorids": "~Bing_Yu1;~Ting_Li3;~Jianguo_Li2;~Bin_Dong1;~Zhanxing_Zhu1", "gender": ";F;M;M;M", "homepage": ";https://liting.com;http://bicmr.pku.edu.cn/~dongbin;https://zhanxingzhu.github.io/;https://sites.google.com/site/leeplus/", "dblp": "47/2129;;11/6024;87/7756.html;70/6237", "google_scholar": ";;zLXcC90AAAAJ;a2sHceIAAAAJ;n44GlFcAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Bing_Yu1;~Ting_Li3;~Bin_Dong1;~Zhanxing_Zhu1;~jianguo_Li1", "aff": "Peking University;AntGroup;Peking University;University of Southampton;Ant Group", "aff_domain": "pku.edu.cn;antgroup.com; ;soton.ac.uk;antgroup.com", "position": "PhD student;Algorithm Engineer;Associate Professor;Associate Professor;Director", "bibtex": "@misc{\nyu2023functional,\ntitle={Functional Relation Field: A Model-Agnostic Framework for Multivariate Time Series Forecasting},\nauthor={Bing Yu and Ting Li and Jianguo Li and Bin Dong and Zhanxing Zhu},\nyear={2023},\nurl={https://openreview.net/forum?id=BM10-kHq8uX}\n}", "github": "", "project": "", "reviewers": "RLjD;3BB6;n4W8;dWww", "site": "https://openreview.net/forum?id=BM10-kHq8uX", "pdf_size": 604348, "recommendation": "3;5;6;6", "confidence": "4;3;5;4", "correctness": "3;3;3;4", "technical_novelty": "2;3;4;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "52;30;148;85", "wc_strength_and_weaknesses": "404;148;935;72", "wc_clarity_quality_novelty_and_reproducibility": "33;21;147;14", "wc_summary_review": "73;21;260;23", "wc_review": "562;220;1490;194", "wc_reply_reviewers": "434;0;0;38", "wc_reply_authors": "2791;609;1890;359", "reply_reviewers": "4;0;0;1", "reply_authors": "7;1;3;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 78.75, 44.516148755255095 ], "wc_strength_and_weaknesses_avg": [ 389.75, 337.9751285227953 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 53.75, 54.264974891729196 ], "wc_summary_review_avg": [ 94.25, 97.93716097580122 ], "wc_review_avg": [ 616.5, 524.8073456040797 ], "wc_reply_reviewers_avg": [ 118.0, 183.10106498871053 ], "wc_reply_authors_avg": [ 1412.25, 985.3606890372682 ], "reply_reviewers_avg": [ 1.25, 1.6393596310755 ], "reply_authors_avg": [ 3.0, 2.449489742783178 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.28867513459481287, "corr_recommendation_correctness": 0.4714045207910316, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12065981940282725573&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;2;1", "aff_unique_norm": "Peking University;Ant Group;University of Southampton", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://www.antgroup.com;https://www.southampton.ac.uk", "aff_unique_abbr": "Peking U;AntGroup;Southampton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United Kingdom" }, { "id": "BMsqS_XALQU", "title": "SuperMarioDomains: Generalizing to Domains with Evolving Graphics", "track": "main", "status": "Withdraw", "tldr": "SuperMarioDomains is a new challenging Domain Generalization benchmark featuring domains derived from evolving video game graphics.", "abstract": "Domains in previous Domain Generalization (DG) benchmarks have been sampled from various image collections of different styles such as photographs, sketches, cartoons, paintings, product images, and etc. However, from these existing DG datasets, it is still difficult to quantify the magnitude of domain shift between different domains and relate that to the performance gap across domains. It is also unclear how to measure the overlap between different domains. Therefore, we present a new DG dataset, SuperMarioDomains, containing four domains that are derived from four chronological titles in the Mario video game franchise on four generations of video game hardware. The discrepancy between our domains is quantified in terms of image representation complexity that reflect the hardware evolution in image resolution, color palette, and presence of 3D rendering. We benchmark state-of-the-art DG algorithms under both Multi-Source and Single-Source DG settings on our dataset and find that they can only surpass the random average baseline in our dataset by at most 18.0% and 10.4% respectively. In addition, we show that adding our dataset as part of the pre-training process improves performance of existing DG algorithms on the PACS benchmark.", "keywords": "Domain Generalization;Domain;Shift;Domain Adaptation", "primary_area": "", "supplementary_material": "", "author": "Yiran Luo;Joshua Feinglass;Tejas Gokhale;Chitta Baral;Yezhou Yang", "authorids": "~Yiran_Luo1;~Joshua_Feinglass1;~Tejas_Gokhale1;~Chitta_Baral1;~Yezhou_Yang1", "gender": ";M;;M;M", "homepage": ";https://www.joshuafeinglass.com;;http://chitta.orissalinks.com;https://yezhouyang.engineering.asu.edu", "dblp": ";294/5562;;b/ChittaBaral;78/7455", "google_scholar": ";V2h3z7oAAAAJ;;9Yd716IAAAAJ;k2suuZgAAAAJ", "orcid": ";0000-0002-1731-0086;;0000-0002-7549-723X;", "linkedin": ";joshua-feinglass-b1ba23a2;;chitta-baral-8a8438b;", "or_profile": "~Yiran_Luo1;~Joshua_Feinglass1;~Tejas_Gokhale1;~Chitta_Baral1;~Yezhou_Yang1", "aff": ";Microsoft Research;;Arizona State University;Arizona State University", "aff_domain": ";research.microsoft.com;;asu.edu;asu.edu", "position": ";Intern;;Full Professor;Associate Professor", "bibtex": "@misc{\nluo2023supermariodomains,\ntitle={SuperMarioDomains: Generalizing to Domains with Evolving Graphics},\nauthor={Yiran Luo and Joshua Feinglass and Tejas Gokhale and Chitta Baral and Yezhou Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=BMsqS_XALQU}\n}", "github": "", "project": "", "reviewers": "BbbM;yFez;JR4F;oMCF", "site": "https://openreview.net/forum?id=BMsqS_XALQU", "pdf_size": 1409729, "recommendation": "3;3;3;5", "confidence": "3;4;2;3", "correctness": "2;3;3;4", "technical_novelty": "2;1;2;2", "empirical_novelty": "2;1;2;2", "wc_summary_paper": "120;54;54;135", "wc_strength_and_weaknesses": "685;965;225;162", "wc_clarity_quality_novelty_and_reproducibility": "83;48;33;23", "wc_summary_review": "220;88;21;31", "wc_review": "1108;1155;333;351", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 90.75, 37.13068138345969 ], "wc_strength_and_weaknesses_avg": [ 509.25, 331.6537162463282 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.75, 22.741756748325315 ], "wc_summary_review_avg": [ 90.0, 79.28745171841507 ], "wc_review_avg": [ 736.75, 395.15084145171704 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:icexrD8oit4J:scholar.google.com/&scioq=SuperMarioDomains:+Generalizing+to+Domains+with+Evolving+Graphics&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Microsoft;Arizona State University", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.asu.edu", "aff_unique_abbr": "MSR;ASU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "BN_P4LNiK2", "title": "TOAST: Topological Algorithm for Singularity Tracking", "track": "main", "status": "Reject", "tldr": "We develop a multi-scale score that characterises singularities of arbitrary (i.e. non-manifold) data spaces", "abstract": "The manifold hypothesis, which assumes that data lie on or close to an unknown manifold of low intrinsic dimensionality, is a staple of modern machine learning research. However, recent work has shown that real-world data exhibit distinct non-manifold structures, which result in singularities that can lead to erroneous conclusions about the data. Detecting such singularities is therefore crucial as a precursor to interpolation and inference tasks. We address detecting singularities by developing (i) persistent local homology, a new topology-driven framework for quantifying the intrinsic dimension of a data set locally, and (ii) Euclidicity, a topology-based multi-scale measure for assessing the \u2018manifoldness\u2019 of individual points. We show that our approach can reliably identify singularities of complex spaces, while also capturing singular structures in real-world data sets.", "keywords": "topology;persistent homology;topological data analysis;tda;stratified spaces;singularities", "primary_area": "", "supplementary_material": "/attachment/b0d60bdb85facb946c759e26e7868711f8323113.zip", "author": "Julius Von Rohrscheidt;Bastian Rieck", "authorids": "~Julius_Von_Rohrscheidt1;~Bastian_Rieck1", "gender": ";M", "homepage": "https://rohrscheidt.com/;https://bastian.rieck.me", "dblp": "330/4381;119/8860", "google_scholar": ";https://scholar.google.ch/citations?user=La7zuKQAAAAJ", "orcid": ";0000-0003-4335-0302", "linkedin": ";br-ml/", "or_profile": "~Julius_Von_Rohrscheidt1;~Bastian_Rieck1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Helmholtz Zentrum M\u00fcnchen", "aff_domain": "tum.de;helmholtz-munich.de", "position": "PhD student;Principal Investigator", "bibtex": "@misc{\nrohrscheidt2023toast,\ntitle={{TOAST}: Topological Algorithm for Singularity Tracking},\nauthor={Julius Von Rohrscheidt and Bastian Rieck},\nyear={2023},\nurl={https://openreview.net/forum?id=BN_P4LNiK2}\n}", "github": "", "project": "", "reviewers": "8kd2;vmhb;ntAA", "site": "https://openreview.net/forum?id=BN_P4LNiK2", "pdf_size": 3947153, "recommendation": "3;6;6", "confidence": "2;2;4", "correctness": "4;4;4", "technical_novelty": "3;3;3", "empirical_novelty": "2;3;4", "wc_summary_paper": "137;77;40", "wc_strength_and_weaknesses": "271;73;133", "wc_clarity_quality_novelty_and_reproducibility": "9;83;39", "wc_summary_review": "46;177;128", "wc_review": "463;410;340", "wc_reply_reviewers": "0;0;561", "wc_reply_authors": "699;299;2467", "reply_reviewers": "0;0;2", "reply_authors": "1;1;4", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 84.66666666666667, 39.96943276499625 ], "wc_strength_and_weaknesses_avg": [ 159.0, 82.89752710425083 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.666666666666664, 30.390056853443948 ], "wc_summary_review_avg": [ 117.0, 54.043192602460735 ], "wc_review_avg": [ 404.3333333333333, 50.37415563119203 ], "wc_reply_reviewers_avg": [ 187.0, 264.45793616376875 ], "wc_reply_authors_avg": [ 1155.0, 941.9865533364405 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.5000000000000001, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15738386471806072143&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Helmholtz Zentrum M\u00fcnchen", "aff_unique_dep": ";", "aff_unique_url": "https://www.tum.de;https://www.helmholtz-muenchen.de", "aff_unique_abbr": "TUM;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "BNsuf5g-JRd", "title": "Solving Partial Label Learning Problem with Multi-Agent Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Partial label learning (PLL) deals with classifications when a set of candidate labels instead of the true one is given for each training instance. As a weakly supervised learning problem, the main target of PLL is to discover latent relationships within training samples, and utilize such information to disambiguate noisy labels. Many existing methods choose nearest neighbors of each partially-labeled instance in an unsupervised way such that the obtained instance similarities can be empirically non-optimal and unrelated to the downstream classification task. To address this issue, we propose a novel multi-agent reinforcement learning (MARL) framework which models the connection between each pair of training samples as a reinforcement learning (RL) agent. We use attention-based graph neural network (GNN) to learn the instance similarity, and adaptively refine it using a deterministic policy gradient approach until some pre-defined scoring function is optimized. Different from those two-stage and alternative optimization algorithms whose training procedures are not end-to-end, our RL-based approach directly optimizes the objective function and estimates the instance similarities more precisely. The experimental results show that our method outperforms state-of-the-art competitors with a higher classification accuracy in both synthetic and real examples. ", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/c80a98e14de67f59fc33b6af5249cc9d3224e083.zip", "author": "Xinyi Zhang;Xingdong Feng;Fan Zhou", "authorids": "~Xinyi_Zhang5;~Xingdong_Feng1;~Fan_Zhou7", "gender": "F;M;", "homepage": ";https://bb9.sufe.edu.cn/bbcswebdav/users/2011000070/index.htm;", "dblp": ";;", "google_scholar": ";nQyBQOsAAAAJ;4QJkjl0AAAAJ", "orcid": ";;", "linkedin": "https://www.linkedin.cn/incareer/in/ACoAAC4vABQBVzOpPjYJIdfNdMRVtpxYvzGxqd0;;", "or_profile": "~Xinyi_Zhang5;~Xingdong_Feng1;~Fan_Zhou7", "aff": "Shanghai University of Finance and Economics;Shanghai University of Finance and Economics;Shanghai University of Finance and Economics", "aff_domain": "shufe.edu.cn;sufe.edu.cn;shufe.edu", "position": "PhD student;Full Professor;Associate Professor", "bibtex": "@misc{\nzhang2023solving,\ntitle={Solving Partial Label Learning Problem with Multi-Agent Reinforcement Learning},\nauthor={Xinyi Zhang and Xingdong Feng and Fan Zhou},\nyear={2023},\nurl={https://openreview.net/forum?id=BNsuf5g-JRd}\n}", "github": "", "project": "", "reviewers": "UdWa;h8m7;cHoo", "site": "https://openreview.net/forum?id=BNsuf5g-JRd", "pdf_size": 1159278, "recommendation": "3;3;5", "confidence": "2;3;4", "correctness": "2;1;4", "technical_novelty": "3;3;2", "empirical_novelty": "3;3;2", "wc_summary_paper": "158;87;86", "wc_strength_and_weaknesses": "119;181;178", "wc_clarity_quality_novelty_and_reproducibility": "24;46;98", "wc_summary_review": "28;29;58", "wc_review": "329;343;420", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 2.3333333333333335, 1.247219128924647 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 110.33333333333333, 33.70789554721894 ], "wc_strength_and_weaknesses_avg": [ 159.33333333333334, 28.546258754675524 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.0, 31.026870075253587 ], "wc_summary_review_avg": [ 38.333333333333336, 13.912424503139471 ], "wc_review_avg": [ 364.0, 40.00833246545858 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.8660254037844387, "corr_recommendation_correctness": 0.9449111825230683, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xPyPvCBy8PwJ:scholar.google.com/&scioq=Solving+Partial+Label+Learning+Problem+with+Multi-Agent+Reinforcement+Learning&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai University of Finance and Economics", "aff_unique_dep": "", "aff_unique_url": "http://www.sufe.edu.cn", "aff_unique_abbr": "SUFE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "BO5_Lm7iD_", "title": "Social Network Structure Shapes Innovation: Experience-sharing in RL with SAPIENS", "track": "main", "status": "Reject", "tldr": "We show that a group's ability to collectively solve tasks depends on the social network structure that determines who shares information with whom, with dynamically changing structures performing best..", "abstract": "\nThe human cultural repertoire relies on innovation: our ability to continuously explore how existing elements can be combined to create new ones. Innovation is not solitary, it relies on collective accumulation and merging of previous solutions. Machine learning approaches commonly assume that fully connected multi-agent networks are best suited for innovation. However, human laboratory and field studies have shown that hierarchical innovation is more robustly achieved by dynamic social network structures. In dynamic settings, humans oscillate between innovating individually or in small clusters, and then sharing outcomes with others. To our knowledge, the role of multi-agent topology on innovation has not been systematically studied in machine learning. It remains unclear a) which social network topologies are optimal for which innovation tasks, and b) which properties of experience sharing improve multi-level innovation. Here we use a multi-level hierarchical problem setting (WordCraft), with three different innovation tasks. We systematically design networks of DQNs sharing experiences from their replay buffers in varying topologies (fully connected, small world, dynamic, ring). Comparing the level of innovation achieved by different experience-sharing topologies across different tasks shows that, first, consistent with human findings, experience sharing within a dynamic topology achieves the highest level of innovation across tasks. Second, experience sharing is not as helpful when there is a single clear path to innovation. Third, two metrics we propose, conformity and diversity of shared experience, can explain the success of different topologies on different tasks. These contributions can advance our understanding of optimal AI-AI, human-human, and human-AI collaborative networks, inspiring future tools for fostering collective innovation in large organizations.", "keywords": "collective innovation;social network;multi-agent model;collective dynamics;communication topology;collective cognition", "primary_area": "", "supplementary_material": "/attachment/5d168ad56bd19d55bcf0794e716cf9ac85e9b170.zip", "author": "Eleni Nisioti;Mat\u00e9o Mahaut;Pierre-Yves Oudeyer;Ida Momennejad;Cl\u00e9ment Moulin-Frier", "authorids": "~Eleni_Nisioti1;~Mat\u00e9o_Mahaut1;~Pierre-Yves_Oudeyer1;~Ida_Momennejad1;~Cl\u00e9ment_Moulin-Frier2", "gender": "F;;M;F;M", "homepage": "https://eleninisioti.github.io/;https://mahautm.github.io;http://www.pyoudeyer.com;https://www.momen-nejad.org;http://clement-moulin-frier.github.io/", "dblp": ";322/4063;33/5513;;124/0220", "google_scholar": "qkR7XKUAAAAJ;;https://scholar.google.fr/citations?user=gCqGj4sAAAAJ;https://scholar.google.de/citations?user=OFdUAJwAAAAJ;rBnV60QAAAAJ", "orcid": ";;;0000-0003-0830-3973;", "linkedin": ";mateo-mahaut/;pierreyvesoudeyer/;ida-momennejad-8661a710/;", "or_profile": "~Eleni_Nisioti1;~Mat\u00e9o_Mahaut1;~Pierre-Yves_Oudeyer1;~Ida_Momennejad1;~Cl\u00e9ment_Moulin-Frier2", "aff": "IT University of Copenhagen;Amazon;Inria;Microsoft Research;Inria", "aff_domain": "itu.dk;amazon.com;inria.fr;research.microsoft.com;inria.fr", "position": "Postdoc;Intern;Research director;Principal Researcher;Associate Professor", "bibtex": "@misc{\nnisioti2023social,\ntitle={Social Network Structure Shapes Innovation: Experience-sharing in {RL} with {SAPIENS}},\nauthor={Eleni Nisioti and Mat{\\'e}o Mahaut and Pierre-Yves Oudeyer and Ida Momennejad and Cl{\\'e}ment Moulin-Frier},\nyear={2023},\nurl={https://openreview.net/forum?id=BO5_Lm7iD_}\n}", "github": "", "project": "", "reviewers": "Gs4s;DXUd;GCRF;3Lpq", "site": "https://openreview.net/forum?id=BO5_Lm7iD_", "pdf_size": 2480051, "recommendation": "3;5;6;6", "confidence": "4;4;2;5", "correctness": "2;2;4;4", "technical_novelty": "3;3;3;4", "empirical_novelty": "3;4;3;4", "wc_summary_paper": "85;105;101;138", "wc_strength_and_weaknesses": "1502;438;227;419", "wc_clarity_quality_novelty_and_reproducibility": "94;426;56;8", "wc_summary_review": "89;183;77;419", "wc_review": "1770;1152;461;984", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "3255;3381;709;979", "reply_reviewers": "0;0;0;0", "reply_authors": "10;11;1;2", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 3.0, 1.0 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 107.25, 19.266226926930972 ], "wc_strength_and_weaknesses_avg": [ 646.5, 500.7716545492566 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 146.0, 164.50531906294094 ], "wc_summary_review_avg": [ 192.0, 137.33535597216036 ], "wc_review_avg": [ 1091.75, 467.2014420996579 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 2081.0, 1241.4773457457852 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 6.0, 4.527692569068709 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.18731716231633877, "corr_recommendation_correctness": 0.8164965809277259, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14537945693751902333&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2;3;2", "aff_unique_norm": "IT University of Copenhagen;Amazon;INRIA;Microsoft", "aff_unique_dep": ";Amazon.com, Inc.;;Microsoft Research", "aff_unique_url": "https://itu.dk;https://www.amazon.com;https://www.inria.fr;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "ITU;Amazon;Inria;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1;2", "aff_country_unique": "Denmark;United States;France" }, { "title": "Jointly Learning Visual and Auditory Speech Representations from Raw Data", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11585", "id": "BPwIgvf5iQ", "poster": "/media/PosterPDFs/ICLR%202023/11585.png?t=1682931628.7431693", "openreview": "https://openreview.net/forum?id=BPwIgvf5iQ", "slides": "https://iclr.cc/virtual/2023/poster/11585", "video": "https://iclr.cc/virtual/2023/poster/11585", "author_site": "Alexandros Haliassos, Pingchuan Ma, Rodrigo Mira, Stavros Petridis, Maja Pantic", "tldr": "We propose a self-supervised audiovisual approach to jointly learn visual and auditory speech representations.", "abstract": "We present RAVEn, a self-supervised multi-modal approach to jointly learn visual and auditory speech representations. Our pre-training objective involves encoding masked inputs, and then predicting contextualised targets generated by slowly-evolving momentum encoders. Driven by the inherent differences between video and audio, our design is asymmetric w.r.t. the two modalities' pretext tasks: Whereas the auditory stream predicts both the visual and auditory targets, the visual one predicts only the auditory targets. We observe strong results in low- and high-resource labelled data settings when fine-tuning the visual and auditory encoders resulting from a single pre-training stage, in which the encoders are jointly trained. Notably, RAVEn surpasses all self-supervised methods on visual speech recognition (VSR) on LRS3, and combining RAVEn with self-training using only 30 hours of labelled data even outperforms a recent semi-supervised method trained on 90,000 hours of non-public data. At the same time, we achieve state-of-the-art results in the LRS3 low-resource setting for auditory speech recognition (as well as for VSR). Our findings point to the viability of learning powerful speech representations entirely from raw video and audio, i.e., without relying on handcrafted features. Code and models are available at https://github.com/ahaliassos/raven.", "keywords": "self-supervised learning;lipreading;speech recognition", "primary_area": "", "supplementary_material": "", "author": "Alexandros Haliassos;Pingchuan Ma;Rodrigo Mira;Stavros Petridis;Maja Pantic", "authorids": "~Alexandros_Haliassos1;~Pingchuan_Ma5;~Rodrigo_Mira1;~Stavros_Petridis1;~Maja_Pantic2", "gender": "M;M;M;M;F", "homepage": ";https://mpc001.github.io/;https://miraodasilva.github.io/;http://ibug.doc.ic.ac.uk/people/spetridis;https://ibug.doc.ic.ac.uk/maja/", "dblp": "257/3052;215/4446-1;291/3819;57/2474;p/MajaPantic", "google_scholar": "qejRKDYAAAAJ;ZUW256sAAAAJ;08YfKjcAAAAJ;https://scholar.google.co.uk/citations?user=6v-UKEMAAAAJ;ygpxbK8AAAAJ", "orcid": ";0000-0003-3752-0803;0000-0002-9493-3842;;", "linkedin": "alexandros-haliassos-692495150/;;rodrigo-mira-670bbb151/;;maja-pantic-3922952b/?originalSubdomain=uk", "or_profile": "~Alexandros_Haliassos1;~Pingchuan_Ma5;~Rodrigo_Mira1;~Stavros_Petridis1;~Maja_Pantic1", "aff": "Imperial College London;Meta;Imperial College London;Meta Facebook;Meta Facebook", "aff_domain": "imperial.ac.uk;meta.com;ic.ac.uk;meta.com;fb.com", "position": "PhD student;Postdoc;PhD student;Researcher;Research Lead", "bibtex": "@inproceedings{\nhaliassos2023jointly,\ntitle={Jointly Learning Visual and Auditory Speech Representations from Raw Data},\nauthor={Alexandros Haliassos and Pingchuan Ma and Rodrigo Mira and Stavros Petridis and Maja Pantic},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=BPwIgvf5iQ}\n}", "github": "", "project": "", "reviewers": "XgXU;Hokd;ykA5;CL4k", "pdf_size": 2766729, "recommendation": "6;6;6;8", "confidence": "4;4;4;4", "correctness": "3;4;3;4", "technical_novelty": "3;1;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "48;72;116;81", "wc_strength_and_weaknesses": "218;255;150;101", "wc_clarity_quality_novelty_and_reproducibility": "14;56;231;56", "wc_summary_review": "5;88;52;37", "wc_review": "285;471;549;275", "wc_reply_reviewers": "22;68;0;10", "wc_reply_authors": "801;1797;892;580", "reply_reviewers": "1;2;0;1", "reply_authors": "2;4;3;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 79.25, 24.40671014290947 ], "wc_strength_and_weaknesses_avg": [ 181.0, 59.594462830031446 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 89.25, 83.61631120780203 ], "wc_summary_review_avg": [ 45.5, 29.837057495671385 ], "wc_review_avg": [ 395.0, 118.31314381758267 ], "wc_reply_reviewers_avg": [ 25.0, 26.019223662515376 ], "wc_reply_authors_avg": [ 1017.5, 464.1252524911783 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15834014254103896480&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=BPwIgvf5iQ", "email": "imperial.ac.uk;meta.com;ic.ac.uk;meta.com;fb.com", "author_num": 5, "aff_unique_index": "0;1;0;1;1", "aff_unique_norm": "Imperial College London;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.imperial.ac.uk;https://meta.com", "aff_unique_abbr": "ICL;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "BR1qoDGxjWp", "title": "Feed-Forward Latent Domain Adaptation", "track": "main", "status": "Reject", "tldr": "Cross-attention based meta-learning approach for fast latent domain adaptation", "abstract": "We study the highly practical but comparatively under-studied problem of latent-domain adaptation, where a source model should be adapted to a target dataset that contains a mixture of unlabelled domain-relevant and domain-irrelevant examples. Furthermore, motivated by the requirements for data privacy and the need for embedded and resource-constrained devices of all kinds to adapt to local data distributions, we focus on the setting of feed-forward source-free domain adaptation, where adaptation should not require access to the source dataset, and also be back propagation-free. Our solution is to meta-learn a network capable of embedding the mixed-relevance target dataset and dynamically adapting inference for target examples using cross-attention. The resulting framework leads to consistent improvement on strong ERM baselines. We also show that our framework sometimes even improves on the upper bound of domain-supervised adaptation, where only domain-relevant instances are provided for adaptation. This suggests that human annotated domain labels may not always be optimal, and raises the possibility of doing better through automated instance selection.", "keywords": "latent domain adaptation;source-free;cross-attention;meta-learning", "primary_area": "", "supplementary_material": "/attachment/bd29cfaa394956237c0a64921cbdbd58130c6d4c.zip", "author": "Ondrej Bohdal;Da Li;Shell Xu Hu;Timothy Hospedales", "authorids": "~Ondrej_Bohdal1;~Da_Li3;~Shell_Xu_Hu1;~Timothy_Hospedales1", "gender": "M;M;M;M", "homepage": "https://ondrejbohdal.github.io/;https://dali-dl.github.io/;http://homepages.inf.ed.ac.uk/thospeda/;http://hushell.github.io/", "dblp": "267/5714.html;43/4804-1;32/3545;", "google_scholar": "aKppg0QAAAAJ;RPvaE3oAAAAJ;https://scholar.google.fr/citations?user=nHhtvqkAAAAJ;https://scholar.google.fr/citations?user=jU7nGnEAAAAJ", "orcid": ";0000-0002-2101-2989;0000-0003-4867-7486;", "linkedin": ";;timothyhospedales/;", "or_profile": "~Ondrej_Bohdal1;~Da_Li3;~Timothy_Hospedales1;~Xu_Shell_Hu1", "aff": "University of Edinburgh;University of Edinburgh;Samsung AI Research Centre;Samsung", "aff_domain": "ed.ac.uk;ed.ac.uk;samsung.com;samsung.com", "position": "PhD student;Visiting Scholar;Principal Researcher;Researcher", "bibtex": "@misc{\nbohdal2023feedforward,\ntitle={Feed-Forward Latent Domain Adaptation},\nauthor={Ondrej Bohdal and Da Li and Shell Xu Hu and Timothy Hospedales},\nyear={2023},\nurl={https://openreview.net/forum?id=BR1qoDGxjWp}\n}", "github": "", "project": "", "reviewers": "J7SR;zikv;tuYo;wyNm;tHMU", "site": "https://openreview.net/forum?id=BR1qoDGxjWp", "pdf_size": 2365753, "recommendation": "3;3;3;6;8", "confidence": "3;4;4;2;4", "correctness": "3;2;3;3;4", "technical_novelty": "2;2;2;3;4", "empirical_novelty": "0;1;2;3;3", "wc_summary_paper": "129;101;33;87;88", "wc_strength_and_weaknesses": "400;338;185;58;231", "wc_clarity_quality_novelty_and_reproducibility": "4;24;3;81;142", "wc_summary_review": "22;23;16;22;20", "wc_review": "555;486;237;248;481", "wc_reply_reviewers": "189;0;0;0;0", "wc_reply_authors": "1294;1497;684;223;167", "reply_reviewers": "2;0;0;0;0", "reply_authors": "3;2;1;1;1", "recommendation_avg": [ 4.6, 2.0591260281974 ], "confidence_avg": [ 3.4, 0.8 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.6, 0.8 ], "empirical_novelty_avg": [ 1.8, 1.1661903789690602 ], "wc_summary_paper_avg": [ 87.6, 31.225630498037987 ], "wc_strength_and_weaknesses_avg": [ 242.4, 119.50330539361661 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.8, 53.70809994777324 ], "wc_summary_review_avg": [ 20.6, 2.4979991993593593 ], "wc_review_avg": [ 401.4, 132.39728093884708 ], "wc_reply_reviewers_avg": [ 37.8, 75.6 ], "wc_reply_authors_avg": [ 773.0, 542.8211491826751 ], "reply_reviewers_avg": [ 0.4, 0.8000000000000002 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.14569287935358965, "corr_recommendation_correctness": 0.767868896042439, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9034163121250033981&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of Edinburgh;Samsung", "aff_unique_dep": ";AI Research", "aff_unique_url": "https://www.ed.ac.uk;https://www.samsung.com/global/researchers/samsung-ai-research-centre/", "aff_unique_abbr": "Edinburgh;SARC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United Kingdom;South Korea" }, { "title": "Explaining Temporal Graph Models through an Explorer-Navigator Framework", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11343", "id": "BR_ZhvcYbGJ", "poster": "/media/PosterPDFs/ICLR%202023/11343.png?t=1681215304.8221433", "openreview": "https://openreview.net/forum?id=BR_ZhvcYbGJ", "slides": "https://iclr.cc/virtual/2023/poster/11343", "video": "https://iclr.cc/virtual/2023/poster/11343", "author_site": "Wenwen Xia, Mincai Lai, Caihua Shan, Yao Zhang, Xinnan Dai, Xiang Li, Dongsheng Li", "tldr": "A MCTS-based explainer for temporal graph models.", "abstract": "While GNN explanation has recently received significant attention, existing works are consistently designed for static graphs. Due to the prevalence of temporal graphs, many temporal graph models have been proposed, but explaining their predictions remains to be explored. To bridge the gap, in this paper, we propose T-GNNExplainer for temporal graph model explanation. Specifically, we regard a temporal graph constituted by a sequence of temporal events. Given a target event, our task is to find a subset of previously occurred events that lead to the model's prediction for it. To handle this combinatorial optimization problem, T-GNNExplainer includes an explorer to find the event subsets with Monte Carlo Tree Search (MCTS) and a navigator that learns the correlations between events and helps reduce the search space. In particular, the navigator is trained in advance and then integrated with the explorer to speed up searching and achieve better results. To the best of our knowledge, T-GNNExplainer is the first explainer tailored for temporal graph models. We conduct extensive experiments to evaluate the performance of T-GNNExplainer. Experimental results on both real-world and synthetic datasets demonstrate that T-GNNExplainer can achieve superior performance with up to about 50% improvement in Area under Fidelity-Sparsity Curve. ", "keywords": "graph neural networks;gnn explainers;temporal graphs", "primary_area": "", "supplementary_material": "/attachment/8647094641767ca5dae8a214fa263108a7700c05.zip", "author": "Wenwen Xia;Mincai Lai;Caihua Shan;Yao Zhang;Xinnan Dai;Xiang Li;Dongsheng Li", "authorids": "~Wenwen_Xia1;~Mincai_Lai1;~Caihua_Shan1;~Yao_Zhang6;~Xinnan_Dai1;~Xiang_Li24;~Dongsheng_Li2", "gender": "M;M;F;M;F;M;M", "homepage": "https://xiawenwen49.github.io/;https://github.com/morningsky;;https://github.com/yzhang1918;;https://lixiang3776.github.io;http://recmind.cn", "dblp": "252/7958;;;57/3892-9;;40/1491-67.html;254/0830-2.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;-knurggAAAAJ;UwKOx_IAAAAJ;LGKDd2AAAAAJ;JnxxNtsAAAAJ;VNg5rA8AAAAJ", "orcid": "0000-0003-2928-7298;;;0000-0003-1481-8826;;0009-0003-0142-2483;0000-0003-3103-8442", "linkedin": ";;;;;;", "or_profile": "~Wenwen_Xia1;~Mincai_Lai1;~Caihua_Shan1;~Yao_Zhang6;~Xinnan_Dai1;~Xiang_Li24;~Dongsheng_Li2", "aff": "Shanghai Jiaotong University;;Microsoft;Fudan University;Michigan State University;East China Normal University;Microsoft Research Asia", "aff_domain": "sjtu.edu.cn;;microsoft.com;fudan.edu.cn;msu.edu;ecnu.edu.cn;microsoft.com", "position": "PhD student;;Researcher;Postdoc;PhD student;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nxia2023explaining,\ntitle={Explaining Temporal Graph Models through an Explorer-Navigator Framework},\nauthor={Wenwen Xia and Mincai Lai and Caihua Shan and Yao Zhang and Xinnan Dai and Xiang Li and Dongsheng Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=BR_ZhvcYbGJ}\n}", "github": "", "project": "", "reviewers": "J8qS;zSpG;3TUm", "pdf_size": 1087745, "recommendation": "5;6;6", "confidence": "4;3;4", "correctness": "3;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "36;75;71", "wc_strength_and_weaknesses": "287;321;193", "wc_clarity_quality_novelty_and_reproducibility": "50;59;24", "wc_summary_review": "76;66;36", "wc_review": "449;521;324", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1382;936;879", "reply_reviewers": "0;0;0", "reply_authors": "4;4;3", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 60.666666666666664, 17.518244457961217 ], "wc_strength_and_weaknesses_avg": [ 267.0, 54.135632135098106 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.333333333333336, 14.83988619303471 ], "wc_summary_review_avg": [ 59.333333333333336, 16.99673171197595 ], "wc_review_avg": [ 431.3333333333333, 81.38932498935117 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1065.6666666666667, 224.88861440475122 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5762505050039206449&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=BR_ZhvcYbGJ", "email": "sjtu.edu.cn;;microsoft.com;fudan.edu.cn;msu.edu;ecnu.edu.cn;microsoft.com", "author_num": 7, "aff_unique_index": "0;1;2;3;4;1", "aff_unique_norm": "Shanghai Jiao Tong University;Microsoft;Fudan University;Michigan State University;East China Normal University", "aff_unique_dep": ";Microsoft Corporation;;;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.microsoft.com;https://www.fudan.edu.cn;https://www.msu.edu;http://www.ecnu.edu.cn", "aff_unique_abbr": "SJTU;Microsoft;Fudan;MSU;ECNU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;0;1;0;0", "aff_country_unique": "China;United States" }, { "id": "BSUoWl5yfv", "title": "Training Instability and Disharmony Between ReLU and Batch Normalization", "track": "main", "status": "Reject", "tldr": "We mathematically show how the disharmony between ReLU and BN causes temporal gradient explosion and training instability. We also propose a better solution of the problem.", "abstract": "Deep neural networks based on batch normalization and ReLU-like activation functions experience instability during early stages of training owing to the high gradient induced by temporal gradient explosion. ReLU reduces the variance by more than the expected amount and batch normalization amplifies the gradient during its recovery. In this paper, we explain the explosion of a gradient mathematically while the forward propagation remains stable, and also the alleviation of the problem during training. Based on this, we propose a Layer-wise Asymmetric Learning rate Clipping (LALC) algorithm, which outperforms existing learning rate scaling methods in large batch training and can also be used to replace WarmUp in small batch training.", "keywords": "Deep learning;Gradient Exploding;ReLU;Batch normalization;Training instability;LARS;WarmUp", "primary_area": "", "supplementary_material": "", "author": "Inyoung Paik;Jaesik Choi", "authorids": "~Inyoung_Paik1;~Jaesik_Choi1", "gender": ";M", "homepage": ";https://sailab.kaist.ac.kr/jaesik", "dblp": ";13/1402", "google_scholar": "https://scholar.google.com/citations?hl=ko;RqMLVzUAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Inyoung_Paik1;~Jaesik_Choi1", "aff": "KAIST AI graduate school;Korea Advanced Institute of Science & Technology", "aff_domain": "gsai.kaist.ac.kr;kaist.ac.kr", "position": "MS student;Associate Professor", "bibtex": "@misc{\npaik2023training,\ntitle={Training Instability and Disharmony Between Re{LU} and Batch Normalization},\nauthor={Inyoung Paik and Jaesik Choi},\nyear={2023},\nurl={https://openreview.net/forum?id=BSUoWl5yfv}\n}", "github": "", "project": "", "reviewers": "NA9i;eGDm;EVtb;dvc8", "site": "https://openreview.net/forum?id=BSUoWl5yfv", "pdf_size": 637229, "recommendation": "3;3;3;6", "confidence": "3;3;4;4", "correctness": "3;3;2;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "84;56;105;56", "wc_strength_and_weaknesses": "230;137;255;177", "wc_clarity_quality_novelty_and_reproducibility": "15;333;19;51", "wc_summary_review": "6;52;23;43", "wc_review": "335;578;402;327", "wc_reply_reviewers": "12;0;0;0", "wc_reply_authors": "318;293;408;240", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 75.25, 20.632195714465293 ], "wc_strength_and_weaknesses_avg": [ 199.75, 45.8877707020073 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 104.5, 132.66028041580495 ], "wc_summary_review_avg": [ 31.0, 17.84656829757475 ], "wc_review_avg": [ 410.5, 100.99628706046575 ], "wc_reply_reviewers_avg": [ 3.0, 5.196152422706632 ], "wc_reply_authors_avg": [ 314.75, 60.75925855373813 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:j_QFndomOtwJ:scholar.google.com/&scioq=Training+Instability+and+Disharmony+Between+ReLU+and+Batch+Normalization&hl=en&as_sdt=0,48", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "KAIST;Korea Advanced Institute of Science and Technology", "aff_unique_dep": "AI graduate school;", "aff_unique_url": "https://www.kaist.edu;https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST;KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "BSZN74xLdqn", "title": "FedFA: Federated Learning with Feature Alignment for Heterogeneous Data", "track": "main", "status": "Withdraw", "tldr": "A federated learning framework with feature alignment is proposed to tackle the data heterogeneity problem, including label and feature distribution skews across clients, from a novel perspective of shared feature space by feature anchors.", "abstract": "Federated learning allows multiple clients to collaboratively train a model without exchanging their data, thus preserving data privacy. Unfortunately, it suffers significant performance degradation under heterogeneous data at clients. Common solutions involve designing specific regularizers for local-model training or developing aggregation schemes for global-model aggregation. Nevertheless, we found that these methods fail to achieve the desired performance due to neglecting the importance of feature mapping consistency across client models. We first observe and analyze that, with heterogeneous data, a vicious cycle exists between classifier divergence and feature mapping inconsistency across clients, thereby shifting the aggregated global model from the expected optima. We then propose a simple yet effective framework named Federated learning with Feature Alignment (FedFA) to tackle the data heterogeneity problem from a novel perspective of shared feature space. A key insight of FedFA is introducing feature anchors to align the feature mappings and calibrate the classifier updates across clients during their local updates, such that client models are updated in a shared feature space. We prove that this modification brings a property of consistent classifier updates if features are class-discriminative. Extensive experiments show that FedFA significantly outperforms the state-of-the-art federated learning algorithms on various image classification datasets under both label and feature distribution skews.", "keywords": "Federated learning;feature alignment;data heterogeneity;heterogeneous label distribution;heterogeneous feature distribution", "primary_area": "", "supplementary_material": "/attachment/99548e9e293e7a81d9eda8751e2d6ec3300980e4.zip", "author": "Tailin ZHOU;Jun Zhang;Danny Tsang", "authorids": "~Tailin_ZHOU1;~Jun_Zhang25;~Danny_Tsang1", "gender": "M;;", "homepage": ";https://eejzhang.people.ust.hk/;https://eetsang.home.ece.ust.hk/", "dblp": "297/2863;z/JunZhang4;72/5231.html", "google_scholar": "3rKwPGEAAAAJ;1Is687QAAAAJ;https://scholar.google.com.hk/citations?user=27LmFbwAAAAJ", "orcid": ";0000-0002-5222-1898;", "linkedin": ";;", "or_profile": "~Tailin_ZHOU1;~Jun_Zhang25;~Danny_Tsang1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "hkust.edu;ust.hk;ust.hk", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@misc{\nzhou2023fedfa,\ntitle={Fed{FA}: Federated Learning with Feature Alignment for Heterogeneous Data},\nauthor={Tailin ZHOU and Jun Zhang and Danny Tsang},\nyear={2023},\nurl={https://openreview.net/forum?id=BSZN74xLdqn}\n}", "github": "", "project": "", "reviewers": "jH4v;HuqA;4mEf", "site": "https://openreview.net/forum?id=BSZN74xLdqn", "pdf_size": 2892054, "recommendation": "3;5;6", "confidence": "4;4;4", "correctness": "3;3;4", "technical_novelty": "3;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "57;49;118", "wc_strength_and_weaknesses": "192;186;260", "wc_clarity_quality_novelty_and_reproducibility": "40;6;41", "wc_summary_review": "34;2;32", "wc_review": "323;243;451", "wc_reply_reviewers": "0;0;25", "wc_reply_authors": "3187;2077;3165", "reply_reviewers": "0;0;1", "reply_authors": "6;4;9", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 74.66666666666667, 30.81485933045218 ], "wc_strength_and_weaknesses_avg": [ 212.66666666666666, 33.559234529741914 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.0, 16.268579122549905 ], "wc_summary_review_avg": [ 22.666666666666668, 14.636332266733431 ], "wc_review_avg": [ 339.0, 85.66601815578139 ], "wc_reply_reviewers_avg": [ 8.333333333333334, 11.785113019775793 ], "wc_reply_authors_avg": [ 2809.6666666666665, 518.1514150216025 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 6.333333333333333, 2.0548046676563256 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.7559289460184545, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5335272284730611351&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "BSww-NrOzJ", "title": "Steering Prototypes with Prompt Tuning for Rehearsal-free Continual Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Prototype, as a representation of class embeddings, has been explored to reduce memory footprint or avoid bias towards the latest task for continual learning. However, prototype-based methods still suffer from performance deterioration due to semantic drift and prototype interference. In this work, we propose a simple and novel framework for rehearsal-free continual learning. We show that task-specific prompt-tuning when coupled with a contrastive loss design can effectively address both issues and largely improves the potency of prototypes. The proposed framework excels at three challenging benchmarks, resulting in 3% to 6% absolute improvements over state-of-the-art methods without usage of a rehearsal buffer or a test-time oracle. Furthermore, the proposed framework largely bridges the performance gap between incremental learning and offline joint learning, demonstrating a promising design schema for continual learning.", "keywords": "Continual Learning;Prompt Tuning;Prototype;Contrastive Learning", "primary_area": "", "supplementary_material": "", "author": "Zhuowei Li;Long Zhao;Zizhao Zhang;Han Zhang;Ting Liu;Dimitris N. Metaxas", "authorids": "~Zhuowei_Li1;~Long_Zhao2;~Zizhao_Zhang3;~Han_Zhang1;~Ting_Liu4;~Dimitris_N._Metaxas1", "gender": "M;M;M;M;;M", "homepage": "https://lzvv123456.github.io/;http://garyzhao.github.io/;https://sites.google.com/corp/view/zizhaozhang;https://sites.google.com/corp/view/hanzhang;http://tliu.org;https://www.cs.rutgers.edu/~dnm/", "dblp": "l/ZhuoweiLi;31/5383-3;;;52/5150-5;m/DNMetaxas", "google_scholar": "51OJEPcAAAAJ;YTyBTmgAAAAJ;https://scholar.google.dk/citations?hl=en;cxEoVL4AAAAJ;4wSfAIQAAAAJ;https://scholar.google.com.tw/citations?user=a7VNhCIAAAAJ", "orcid": ";0000-0001-8921-8564;;;;", "linkedin": "jack-li;garyzhao9012/;;;;dimitris-metaxas-1bb74914/", "or_profile": "~Zhuowei_Li1;~Long_Zhao2;~Zizhao_Zhang3;~Han_Zhang1;~Ting_Liu4;~Dimitris_Metaxas1", "aff": "Rutgers University, New Brunswick;Google DeepMind;Google;Google;Google DeepMind;Rutgers University", "aff_domain": "cs.rutgers.edu;google.com;google.com;google.com;google.com;cs.rutgers.edu", "position": "PhD student;Research scientist;Researcher;Researcher;Researcher;Full Professor", "bibtex": "@misc{\nli2023steering,\ntitle={Steering Prototypes with Prompt Tuning for Rehearsal-free Continual Learning},\nauthor={Zhuowei Li and Long Zhao and Zizhao Zhang and Han Zhang and Ting Liu and Dimitris N. Metaxas},\nyear={2023},\nurl={https://openreview.net/forum?id=BSww-NrOzJ}\n}", "github": "", "project": "", "reviewers": "frDv;YaMZ;gkaF;n83G", "site": "https://openreview.net/forum?id=BSww-NrOzJ", "pdf_size": 23537912, "recommendation": "6;6;6;6", "confidence": "2;3;4;3", "correctness": "3;4;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "4;3;3;3", "wc_summary_paper": "157;70;62;126", "wc_strength_and_weaknesses": "211;82;215;210", "wc_clarity_quality_novelty_and_reproducibility": "130;72;10;38", "wc_summary_review": "38;35;25;105", "wc_review": "536;259;312;479", "wc_reply_reviewers": "0;23;0;0", "wc_reply_authors": "410;270;677;587", "reply_reviewers": "0;1;0;0", "reply_authors": "2;2;2;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 103.75, 39.410499869958514 ], "wc_strength_and_weaknesses_avg": [ 179.5, 56.32273075766125 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.5, 44.72974401894113 ], "wc_summary_review_avg": [ 50.75, 31.68891762114951 ], "wc_review_avg": [ 396.5, 114.36017663505072 ], "wc_reply_reviewers_avg": [ 5.75, 9.959292143521045 ], "wc_reply_authors_avg": [ 486.0, 157.4118801107464 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12742742558579228053&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;1;1;1;0", "aff_unique_norm": "Rutgers University;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.rutgers.edu;https://deepmind.com", "aff_unique_abbr": "Rutgers;DeepMind", "aff_campus_unique_index": "0;2;2", "aff_campus_unique": "New Brunswick;;Mountain View", "aff_country_unique_index": "0;1;0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "FunkNN: Neural Interpolation for Functional Generation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10939", "id": "BT4N_v7CLrk", "poster": "/media/PosterPDFs/ICLR%202023/10939.png?t=1682508003.2373033", "openreview": "https://openreview.net/forum?id=BT4N_v7CLrk", "slides": "https://iclr.cc/virtual/2023/poster/10939", "video": "https://iclr.cc/virtual/2023/poster/10939", "author_site": "AmirEhsan Khorashadizadeh, Anadi Chaman, Valentin Debarnot, Ivan Dokmanic", "tldr": "", "abstract": "Can we build continuous generative models which generalize across scales, can be evaluated at any coordinate, admit calculation of exact derivatives, and are conceptually simple? Existing MLP-based architectures generate worse samples than the grid-based generators with favorable convolutional inductive biases. Models that focus on generating images at different scales do better, but employ complex architectures not designed for continuous evaluation of images and derivatives.\nWe take a signal-processing perspective and treat continuous signal generation as interpolation from samples. Indeed, correctly sampled discrete images contain all information about the low spatial frequencies. The question is then how to extrapolate the spectrum in a data-driven way while meeting the above design criteria. Our answer is FunkNN---a novel convolutional network which learns how to reconstruct continuous images at arbitrary coordinates and can be applied to any image dataset. Combined with a discrete generative model it becomes a functional generator which can act as a prior in continuous ill-posed inverse problems. We show that FunkNN generates high-quality continuous images and exhibits strong out-of-distribution performance thanks to its patch-based design. We further showcase its performance in several stylized inverse problems with exact spatial derivatives.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/28323313d79cbed851c83df3fa1a592442228df7.zip", "author": "AmirEhsan Khorashadizadeh;Anadi Chaman;Valentin Debarnot;Ivan Dokmani\u0107", "authorids": "~AmirEhsan_Khorashadizadeh1;achaman2@illinois.edu;valentin.debarnot@unibas.ch;~Ivan_Dokmani\u01071", "gender": "M;;;", "homepage": "https://amirehsan95.github.io/;;;", "dblp": ";;;", "google_scholar": "Rou2vXcAAAAJ;;;", "orcid": ";;;", "linkedin": "https://ch.linkedin.com/in/amir-ehsan;;;", "or_profile": "~AmirEhsan_Khorashadizadeh1;achaman2@illinois.edu;valentin.debarnot@unibas.ch;~Ivan_Dokmani\u01071", "aff": "University of Basel;;;", "aff_domain": "unibas.ch;;;", "position": "PhD student;;;", "bibtex": "@inproceedings{\nkhorashadizadeh2023funknn,\ntitle={Funk{NN}: Neural Interpolation for Functional Generation},\nauthor={AmirEhsan Khorashadizadeh and Anadi Chaman and Valentin Debarnot and Ivan Dokmani{\\'c}},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=BT4N_v7CLrk}\n}", "github": "", "project": "", "reviewers": "Zsei;kWFU;9a5t;BzrP", "pdf_size": 5268825, "recommendation": "6;6;8;8", "confidence": "4;4;4;3", "correctness": "4;3;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;0", "wc_summary_paper": "104;22;123;206", "wc_strength_and_weaknesses": "167;246;530;578", "wc_clarity_quality_novelty_and_reproducibility": "33;60;77;44", "wc_summary_review": "33;30;70;77", "wc_review": "337;358;800;905", "wc_reply_reviewers": "0;0;86;44", "wc_reply_authors": "570;462;460;467", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 113.75, 65.40021024431037 ], "wc_strength_and_weaknesses_avg": [ 380.25, 176.79702344779452 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 53.5, 16.62077013859466 ], "wc_summary_review_avg": [ 52.5, 21.17191535974013 ], "wc_review_avg": [ 600.0, 255.3223452814109 ], "wc_reply_reviewers_avg": [ 32.5, 35.731638641405745 ], "wc_reply_authors_avg": [ 489.75, 46.40245144386232 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15911384740474175639&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=BT4N_v7CLrk", "email": "unibas.ch;;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "University of Basel", "aff_unique_dep": "", "aff_unique_url": "https://www.unibas.ch", "aff_unique_abbr": "UniBas", "aff_country_unique_index": "0", "aff_country_unique": "Switzerland" }, { "id": "BUewet8vCFr", "title": "RetinexUTV: ROBUST RETINEX MODEL WITH UNFOLDING TOTAL VARIATION", "track": "main", "status": "Reject", "tldr": "", "abstract": "Digital images are underexposed due to poor scene lighting or hardware limitations, reducing visibility and level of detail in the image, which will affect subsequent high-level tasks and image aesthetics. Therefore, it is of great practical significance to enhance low-light images. Among existing low-light image enhancement techniques, retinex-based methods are the focus today. However, most retinex methods either ignore or poorly handle noise during enhancement, which can produce unpleasant visual effects in low-light image enhancement and affect high-level tasks. In this paper, we propose a robust low-light image enhancement method RetinexUTV, which aims to enhance low-light images well while suppressing noise. In RetinexUTV, we propose an adaptive illumination estimation unfolded total variational network, which approximates the noise level of the real low-light image by learning the balance parameter of the total variation regularization term of the model, obtains the noise level map and the smooth noise-free sub-map of the image. The initial illumination map is then estimated by obtaining the illumination information of the smooth sub-map. The initial reflection map is obtained through the initial illumination map and original image. Under the guidance of the noise level map, the noise of the reflection map is suppressed, and finally it is multiplied by the adjusted illumination map to obtain the final enhancement result. We test our method on real low-light datasets LOL, VELOL, and experiments demonstrate that our method outperforms state-of-the-art methods.", "keywords": "low light iamge enhancement;retinex;noise suppression;total variation", "primary_area": "", "supplementary_material": "", "author": "Guiyu Guo;Daming shi;Zunjin Zhao;Muhammad Tahir Rasheed", "authorids": "~Guiyu_Guo1;dshi@szu.edu.cn;~Zunjin_Zhao1;tahir@email.szu.edu.cn", "gender": ";;M;", "homepage": "http://futuremedia.szu.edu.cn/People.aspx;;https://sites.google.com/view/tituszhao;", "dblp": ";;;", "google_scholar": ";;mG5J9akAAAAJ;", "orcid": ";;0000-0003-3224-650X;", "linkedin": ";;;", "or_profile": "~Guiyu_Guo1;dshi@szu.edu.cn;~Zunjin_Zhao1;tahir@email.szu.edu.cn", "aff": "Shenzhen University;;Shenzhen University;", "aff_domain": "szu.edu.cn;;szu.edu.cn;", "position": "MS student;;PhD student;", "bibtex": "@misc{\nguo2023retinexutv,\ntitle={Retinex{UTV}: {ROBUST} {RETINEX} {MODEL} {WITH} {UNFOLDING} {TOTAL} {VARIATION}},\nauthor={Guiyu Guo and Daming shi and Zunjin Zhao and Muhammad Tahir Rasheed},\nyear={2023},\nurl={https://openreview.net/forum?id=BUewet8vCFr}\n}", "github": "", "project": "", "reviewers": "UJzM;dZQX;A79g;e8NH", "site": "https://openreview.net/forum?id=BUewet8vCFr", "pdf_size": 7498010, "recommendation": "1;3;3;5", "confidence": "4;5;4;4", "correctness": "3;3;3;3", "technical_novelty": "1;1;2;2", "empirical_novelty": "1;1;3;2", "wc_summary_paper": "57;82;78;124", "wc_strength_and_weaknesses": "37;319;145;244", "wc_clarity_quality_novelty_and_reproducibility": "288;49;83;64", "wc_summary_review": "48;11;54;66", "wc_review": "430;461;360;498", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 1.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 85.25, 24.304063446263466 ], "wc_strength_and_weaknesses_avg": [ 186.25, 105.9890914198249 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 121.0, 97.1673813581492 ], "wc_summary_review_avg": [ 44.75, 20.535031044534605 ], "wc_review_avg": [ 437.25, 50.68222074850312 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8rPWh5snR84J:scholar.google.com/&scioq=RetinexUTV:+ROBUST+RETINEX+MODEL+WITH+UNFOLDING+TOTAL+VARIATION&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Shenzhen University", "aff_unique_dep": "", "aff_unique_url": "https://www.szu.edu.cn", "aff_unique_abbr": "SZU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "BVaytYu5Yj", "title": "Improving Continual Learning by Accurate Gradient Reconstructions of the Past", "track": "main", "status": "Reject", "tldr": "We propose a new, principled yet practical continual learning method that combines the complementary benefits of function-regularisation, weight-regularisation and experience replay.", "abstract": "Knowledge reuse is essential for continual learning, and current methods attempt to realize it through regularization or experience replay. These two strategies have complementary strengths, e.g., regularization methods are compact, but replay methods can mimic batch training more accurately. At present, little has been done to find principled ways to combine the two methods and current heuristics can give suboptimal performance. Here, we provide a principled approach to combine and improve them by using a recently proposed principle of adaptation, where the goal is to reconstruct the \u201cgradients of the past\u201d, i.e., to mimic batch training by estimating gradients from past data. Using this principle, we design a prior that provably gives better gradient reconstructions by utilizing two types of replay and a quadratic weight-regularizer. This improves performance on standard benchmarks such as Split CIFAR, Split TinyImageNet, and ImageNet-1000. Our work shows that a good combination of replay and regularizer-based methods can be very effective in reducing forgetting, and can sometimes even completely eliminate it.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Erik Daxberger;Siddharth Swaroop;Kazuki Osawa;Rio Yokota;Richard E Turner;Jos\u00e9 Miguel Hern\u00e1ndez-Lobato;Mohammad Emtiyaz Khan", "authorids": "~Erik_Daxberger1;~Siddharth_Swaroop2;~Kazuki_Osawa1;~Rio_Yokota1;~Richard_E_Turner1;~Jos\u00e9_Miguel_Hern\u00e1ndez-Lobato1;~Mohammad_Emtiyaz_Khan1", "gender": "M;;M;M;M;M;", "homepage": ";https://siddharthswaroop.github.io/;https://kazukiosawa.github.io/;https://www.rio.scrc.iir.isct.ac.jp/en/index.html;https://rich-turner-group.github.io/;https://emtiyaz.github.io/;http://jmhl.org", "dblp": ";230/8333;206/0054;61/7413;40/5352;58/10432;40/6058", "google_scholar": "7L4W8KwAAAAJ;https://scholar.google.com/citations?hl=en;IHdZHh8AAAAJ;klw9KE0AAAAJ;https://scholar.google.co.uk/citations?user=DgLEyZgAAAAJ;https://scholar.google.com/citations?hl=en;BEBccCQAAAAJ", "orcid": ";;;0000-0001-7573-7873;;;0000-0001-7610-949X", "linkedin": "edaxberger;;;rio-yokota-62857235/?originalSubdomain=jp;;;", "or_profile": "~Erik_Daxberger1;~Siddharth_Swaroop2;~Kazuki_Osawa1;~Rio_Yokota1;~Richard_E_Turner1;~Mohammad_Emtiyaz_Khan1;~Jose_Miguel_Hernandez_Lobato1", "aff": "Max-Planck Institute for Intelligent Systems;School of Engineering and Applied Sciences, Harvard University;Department of Computer Science, ETH Zurich;Institute of Science Tokyo;Microsoft Research;RIKEN Center for AI Project;University of Cambridge", "aff_domain": "mpg.de;seas.harvard.edu;inf.ethz.ch;isct.ac.jp;research.microsoft.com;riken.jp;cam.ac.uk", "position": "PhD student;Postdoc;Postdoc;Full Professor;Researcher;Full Professor;Associate Professor", "bibtex": "@misc{\ndaxberger2023improving,\ntitle={Improving Continual Learning by Accurate Gradient Reconstructions of the Past},\nauthor={Erik Daxberger and Siddharth Swaroop and Kazuki Osawa and Rio Yokota and Richard E Turner and Jos{\\'e} Miguel Hern{\\'a}ndez-Lobato and Mohammad Emtiyaz Khan},\nyear={2023},\nurl={https://openreview.net/forum?id=BVaytYu5Yj}\n}", "github": "", "project": "", "reviewers": "ozeD;goJh;6uVZ;jV4b", "site": "https://openreview.net/forum?id=BVaytYu5Yj", "pdf_size": 1016000, "recommendation": "3;3;5;6", "confidence": "5;2;3;4", "correctness": "2;4;3;4", "technical_novelty": "2;1;2;3", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "52;52;43;203", "wc_strength_and_weaknesses": "182;36;72;152", "wc_clarity_quality_novelty_and_reproducibility": "27;26;31;41", "wc_summary_review": "30;35;29;25", "wc_review": "291;149;175;421", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 87.5, 66.78510312936561 ], "wc_strength_and_weaknesses_avg": [ 110.5, 58.879113444412525 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.25, 5.931905258852336 ], "wc_summary_review_avg": [ 29.75, 3.5619517121937516 ], "wc_review_avg": [ 259.0, 107.73114684249862 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.08606629658238704, "corr_recommendation_correctness": 0.4061811972299616, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=322960878715398926&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3;4;5;6", "aff_unique_norm": "Max-Planck Institute for Intelligent Systems;Harvard University;ETH Zurich;Institute of Science, Tokyo;Microsoft;RIKEN;University of Cambridge", "aff_unique_dep": ";School of Engineering and Applied Sciences;Department of Computer Science;;Microsoft Research;Center for AI Project;", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.harvard.edu;https://www.ethz.ch;https://www.iost.jp;https://www.microsoft.com/en-us/research;https://www.riken.jp/en/;https://www.cam.ac.uk", "aff_unique_abbr": "MPI-IS;Harvard;ETHZ;IoST;MSR;RIKEN;Cambridge", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Cambridge;Tokyo", "aff_country_unique_index": "0;1;2;3;1;3;4", "aff_country_unique": "Germany;United States;Switzerland;Japan;United Kingdom" }, { "id": "BW2A403ema", "title": "The Robustness Limits of SoTA Vision Models to Natural Variation", "track": "main", "status": "Withdraw", "tldr": "Even today's best vision models are not robust and struggle to generalize changes in factors such as pose, size, and position.", "abstract": "Recent state-of-the-art vision models introduced new architectures, learning paradigms, and larger pretraining data, leading to impressive performance on tasks such as classification. While previous generations of vision models were shown to lack robustness to factors such as pose, it\u2019s unclear the extent to which this next generation of models are more robust. To study this question, we develop a dataset of more than 7 million images with controlled changes in pose, position, background, lighting, and size. We study not only how robust recent state-of-the-art models are, but also the extent to which models can generalize variation in factors when they\u2019re present during training. We consider a catalog of recent vision models, including vision transformers (ViT), self-supervised models such as masked autoencoders (MAE), and models trained on larger datasets such as CLIP. We find out-of-the-box, even today\u2019s best models are not robust to common changes in pose, size, and background. When some samples varied during training, we found models required a significant portion of diversity to generalize\u2014though eventually robustness did improve. When diversity is only seen for some classes however, we found models did not generalize to other classes, unless the classes were very similar to those seen varying during training. We hope our work will shed further light on the blind spots of SoTA models and spur the development of more robust vision models", "keywords": "robustness;computer vision;generalization;deep learning", "primary_area": "", "supplementary_material": "/attachment/4cc900270d610d5e418ef46c25b311ad1ed99303.zip", "author": "Mark Ibrahim;Quentin Garrido;Ari S. Morcos;Diane Bouchacourt", "authorids": "~Mark_Ibrahim1;~Quentin_Garrido1;~Ari_S._Morcos1;~Diane_Bouchacourt3", "gender": ";M;M;F", "homepage": "https://markibrahim.me/;https://garridoq.com;http://www.arimorcos.com;https://dianebouchacourt.github.io/", "dblp": "180/5660;285/6628;217/3720;176/1498", "google_scholar": "AqYyoCMAAAAJ;RQaZUNsAAAAJ;v-A_7UsAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Mark_Ibrahim1;~Quentin_Garrido1;~Ari_Morcos1;~Diane_Nicole_Bouchacourt1", "aff": "Facebook AI Research (FAIR) Meta;Research, Facebook;Meta AI (FAIR);Meta AI Research", "aff_domain": "ai.facebook.com;research.facebook.com;meta.com;meta.com", "position": "Researcher;PhD student;Research Scientist;Researcher", "bibtex": "@misc{\nibrahim2023the,\ntitle={The Robustness Limits of So{TA} Vision Models to Natural Variation},\nauthor={Mark Ibrahim and Quentin Garrido and Ari S. Morcos and Diane Bouchacourt},\nyear={2023},\nurl={https://openreview.net/forum?id=BW2A403ema}\n}", "github": "", "project": "", "reviewers": "1gBd;3AEh;A7av;6R9U", "site": "https://openreview.net/forum?id=BW2A403ema", "pdf_size": 20570319, "recommendation": "3;3;5;5", "confidence": "4;3;4;3", "correctness": "3;1;2;3", "technical_novelty": "1;2;3;2", "empirical_novelty": "2;1;3;2", "wc_summary_paper": "102;72;53;199", "wc_strength_and_weaknesses": "358;100;157;277", "wc_clarity_quality_novelty_and_reproducibility": "403;28;106;143", "wc_summary_review": "76;40;55;99", "wc_review": "939;240;371;718", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 106.5, 56.189411813970786 ], "wc_strength_and_weaknesses_avg": [ 223.0, 100.77946219344496 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 170.0, 140.7817459758189 ], "wc_summary_review_avg": [ 67.5, 22.23173407541571 ], "wc_review_avg": [ 567.0, 276.82575747209654 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.30151134457776363, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13642960466132999274&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Meta", "aff_unique_dep": "Facebook AI Research", "aff_unique_url": "https://www.meta.com", "aff_unique_abbr": "Meta AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "BW9KtL-bott", "title": "SPIDER: Searching Personalized Neural Architecture for Federated Learning", "track": "main", "status": "Reject", "tldr": "SPIDER searches and trains heterogeneous architectures in a federated learning setting to achieve the objective of personalization.", "abstract": "Federated learning (FL) is an efficient learning framework that assists distributed machine learning when data cannot be shared with a centralized server. Recent advancements in FL use predefined architecture-based learning for all the clients. However, given that clients' data are invisible to the server and data distributions are non-identical across clients, a predefined architecture discovered in a centralized setting may not be an optimal solution for all the clientsin FL. Motivated by this challenge, we introduce SPIDER, an algorithmic framework that aims to Search Personalized neural architecture for feDERated learning. SPIDER is designed based on two unique features: (1) alternately optimizing one architecture-homogeneous global model (Supernet) in a generic FL manner and one architecture-heterogeneous local model that is connected to the global model by weight-sharing-based regularization (2) achieving architecture-heterogeneous local model by an operation-level perturbation based neural architecture search method. Experimental results demonstrate that SPIDER outperforms other state-of-the-art personalization methods with much fewer times of hyperparameter tuning.", "keywords": "Personalized Neural Architecture Search;Data Heterogeneity;Personalized Federated Learning", "primary_area": "", "supplementary_material": "/attachment/ef85576a0af5cc7e4ddc9757854240b152b44f29.zip", "author": "Erum Mushtaq;Chaoyang He;Jie Ding;Salman Avestimehr", "authorids": "~Erum_Mushtaq1;~Chaoyang_He1;~Jie_Ding2;~Salman_Avestimehr1", "gender": ";M;M;", "homepage": "https://scholar.google.com/citations?user=C5IpcRYAAAAJ&hl=en;http://chaoyanghe.com;http://jding.org;", "dblp": ";222/6721-1.html;94/1825-2;", "google_scholar": ";2z2camUAAAAJ;ZyqvoqcAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Erum_Mushtaq1;~Chaoyang_He1;~Jie_Ding2;~Salman_Avestimehr1", "aff": "University of Southern California;TensorOpera AI;University of Minnesota, Minneapolis;", "aff_domain": "usc.edu;tensoropera.ai;umn.edu;", "position": "PhD student;Researcher;Assistant Professor;", "bibtex": "@misc{\nmushtaq2023spider,\ntitle={{SPIDER}: Searching Personalized Neural Architecture for Federated Learning},\nauthor={Erum Mushtaq and Chaoyang He and Jie Ding and Salman Avestimehr},\nyear={2023},\nurl={https://openreview.net/forum?id=BW9KtL-bott}\n}", "github": "", "project": "", "reviewers": "fLM4;AAmq;p6Ea;pg98", "site": "https://openreview.net/forum?id=BW9KtL-bott", "pdf_size": 853236, "recommendation": "3;3;3;5", "confidence": "3;5;4;5", "correctness": "3;3;2;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "52;66;57;54", "wc_strength_and_weaknesses": "190;334;373;388", "wc_clarity_quality_novelty_and_reproducibility": "45;115;91;10", "wc_summary_review": "15;76;47;64", "wc_review": "302;591;568;516", "wc_reply_reviewers": "0;25;0;0", "wc_reply_authors": "653;964;902;766", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;2;2", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 57.25, 5.356071321407137 ], "wc_strength_and_weaknesses_avg": [ 321.25, 78.29870688587393 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.25, 40.62249992307219 ], "wc_summary_review_avg": [ 50.5, 22.940139493908923 ], "wc_review_avg": [ 494.25, 114.27242668290545 ], "wc_reply_reviewers_avg": [ 6.25, 10.825317547305483 ], "wc_reply_authors_avg": [ 821.25, 120.68424710789722 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5222329678670935, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2451139749023356163&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Southern California;TensorOpera AI;University of Minnesota", "aff_unique_dep": ";;", "aff_unique_url": "https://www.usc.edu;;https://www.minnesota.edu", "aff_unique_abbr": "USC;;UMN", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Los Angeles;;Minneapolis", "aff_country_unique_index": "0;0", "aff_country_unique": "United States;" }, { "title": "Score-based Continuous-time Discrete Diffusion Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10977", "id": "BYWWwSY2G5s", "poster": "/media/PosterPDFs/ICLR%202023/10977.png?t=1681109942.6815124", "openreview": "https://openreview.net/forum?id=BYWWwSY2G5s", "slides": "https://iclr.cc/virtual/2023/poster/10977", "video": "https://iclr.cc/virtual/2023/poster/10977", "author_site": "Haoran Sun, Lijun Yu, Bo Dai, Dale Schuurmans, Hanjun Dai", "tldr": "a generalized discrete score matching for learning continuous-time diffusion in categorical spaces, with new parameterization and novel analytical sampling.", "abstract": "Score-based modeling through stochastic differential equations (SDEs) has provided a new perspective on diffusion models, and demonstrated superior performance on continuous data. However, the gradient of the log-likelihood function, \\ie, the score function, is not properly defined for discrete spaces. This makes it non-trivial to adapt SDE with score functions to categorical data. In this paper, we extend diffusion models to discrete variables by introducing a stochastic jump process where the reverse process denoises via a continuous-time Markov chain. This formulation admits an analytical simulation during backward sampling. To learn the reverse process, we extend score matching to general categorical data, and show that an unbiased estimator can be obtained via simple matching of the conditional marginal distributions. We demonstrate the effectiveness of the proposed method on a set of synthetic and real-world music and image benchmarks.", "keywords": "discrete space diffusion;discrete score matching;continuous-time diffusion", "primary_area": "", "supplementary_material": "/attachment/dea62454444da121dd5372e7aa9c22cb7761cbfa.zip", "author": "Haoran Sun;Lijun Yu;Bo Dai;Dale Schuurmans;Hanjun Dai", "authorids": "~Haoran_Sun2;~Lijun_Yu1;~Bo_Dai1;~Dale_Schuurmans1;~Hanjun_Dai1", "gender": "M;M;;;M", "homepage": ";https://me.lj-y.com/;https://bo-dai.github.io/;;https://hanjun-dai.github.io", "dblp": ";94/5561;64/2903;;144/7311", "google_scholar": "p7of_yoAAAAJ;IaDc0OcAAAAJ;TIKl_foAAAAJ;;obpl7GQAAAAJ", "orcid": ";0000-0003-0645-1657;0009-0002-8070-574X;;", "linkedin": ";lijun-yu/;;;hanjun-dai", "or_profile": "~Haoran_Sun2;~Lijun_Yu1;~Bo_Dai1;~Dale_Schuurmans1;~Hanjun_Dai1", "aff": "Georgia Institute of Technology;School of Computer Science, Carnegie Mellon University;Google Brain;;Google Research", "aff_domain": "gatech.edu;cs.cmu.edu;google.com;;google.com", "position": "PhD student;PhD student;Research Scientist;;Researcher", "bibtex": "@inproceedings{\nsun2023scorebased,\ntitle={Score-based Continuous-time Discrete Diffusion Models},\nauthor={Haoran Sun and Lijun Yu and Bo Dai and Dale Schuurmans and Hanjun Dai},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=BYWWwSY2G5s}\n}", "github": "", "project": "", "reviewers": "4YQP;tzeK;grv3;eZKv", "pdf_size": 1053044, "recommendation": "5;6;6;10", "confidence": "4;4;3;4", "correctness": "3;3;3;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "2;0;2;4", "wc_summary_paper": "156;79;100;78", "wc_strength_and_weaknesses": "309;540;232;254", "wc_clarity_quality_novelty_and_reproducibility": "72;191;28;30", "wc_summary_review": "38;38;45;23", "wc_review": "575;848;405;385", "wc_reply_reviewers": "0;61;0;0", "wc_reply_authors": "756;1838;273;124", "reply_reviewers": "0;1;0;0", "reply_authors": "1;4;1;1", "recommendation_avg": [ 6.75, 1.920286436967152 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 103.25, 31.696805832764916 ], "wc_strength_and_weaknesses_avg": [ 333.75, 122.33637030744373 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 80.25, 66.31129240182248 ], "wc_summary_review_avg": [ 36.0, 8.031189202104505 ], "wc_review_avg": [ 553.25, 185.49713609649072 ], "wc_reply_reviewers_avg": [ 15.25, 26.413774815425377 ], "wc_reply_authors_avg": [ 747.75, 671.4098506128727 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.22549380840084865, "corr_recommendation_correctness": 0.9771398364036774, "gs_citation": 80, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12193507618573900807&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=BYWWwSY2G5s", "email": "gatech.edu;cs.cmu.edu;google.com;;google.com", "author_num": 5, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Georgia Institute of Technology;Carnegie Mellon University;Google", "aff_unique_dep": ";School of Computer Science;Google Brain", "aff_unique_url": "https://www.gatech.edu;https://www.cmu.edu;https://brain.google.com", "aff_unique_abbr": "Georgia Tech;CMU;Google Brain", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Pittsburgh;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "GRACE-C: Generalized Rate Agnostic Causal Estimation via Constraints", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12103", "id": "B_pCIsX8KL_", "poster": "/media/PosterPDFs/ICLR%202023/12103.png?t=1682969267.4942443", "openreview": "https://openreview.net/forum?id=B_pCIsX8KL_", "slides": "https://iclr.cc/virtual/2023/poster/12103", "video": "https://iclr.cc/virtual/2023/poster/12103", "author_site": "Mohammadsajad Abavisani, David Danks, Sergey Plis", "tldr": "A novel method for causal structure discovery in undersampled time-series with three orders of magnitude speedup under the same theoretical guarantees.", "abstract": "Graphical structures estimated by causal learning algorithms from time series data can provide highly misleading causal information if the causal timescale of the generating process fails to match the measurement timescale of the data. Existing algorithms provide limited resources to respond to this challenge, and so researchers must either use models that they know are likely misleading, or else forego causal learning entirely. Existing methods face up-to-four distinct shortfalls, as they might a) require that the difference between causal and measurement timescales is known; b) only handle very small number of random variables when the timescale difference is unknown; c) only apply to pairs of variables (albeit with fewer assumptions about prior knowledge); or d) be unable to find a solution given statistical noise in the data. This paper aims to address these challenges. We present an approach that combines constraint programming with both theoretical insights into the problem structure and prior information about admissible causal interactions to achieve speed up of multiple orders of magnitude. The resulting system scales to significantly larger sets of random variables ($>100$) without knowledge of the timescale difference while maintaining theoretical guarantees. This method is also robust to edge misidentification and can use parametric connection strengths, while optionally finding the optimal among many possible solutions.", "keywords": "Causal structure learning;causal learning;graph theory;brain imaging;fMRI", "primary_area": "", "supplementary_material": "", "author": "Mohammadsajad Abavisani;David Danks;Sergey Plis", "authorids": "~Mohammadsajad_Abavisani1;~David_Danks1;~Sergey_Plis1", "gender": "M;M;M", "homepage": ";https://www.daviddanks.org;", "dblp": "246/5202;99/4097;07/227", "google_scholar": "tuDtPWgAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0003-0040-0365", "linkedin": "sajadabavisani;;sergeyplis/", "or_profile": "~Mohammadsajad_Abavisani1;~David_Danks1;~Sergey_Plis1", "aff": "Georgia Institute of Technology;University of California, San Diego;Georgia State University", "aff_domain": "gatech.edu;ucsd.edu;gsu.edu", "position": "MS student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nabavisani2023gracec,\ntitle={{GRACE}-C: Generalized Rate Agnostic Causal Estimation via Constraints},\nauthor={Mohammadsajad Abavisani and David Danks and Sergey Plis},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=B_pCIsX8KL_}\n}", "github": "", "project": "", "reviewers": "FQ5L;yZ3C;9RAD;kYJa;3inH", "pdf_size": 6595027, "recommendation": "6;6;6;6;8", "confidence": "3;3;2;4;4", "correctness": "4;4;3;4;4", "technical_novelty": "4;2;3;3;3", "empirical_novelty": "2;2;3;2;3", "wc_summary_paper": "50;124;86;80;67", "wc_strength_and_weaknesses": "372;338;94;171;95", "wc_clarity_quality_novelty_and_reproducibility": "19;96;19;83;98", "wc_summary_review": "260;52;24;39;77", "wc_review": "701;610;223;373;337", "wc_reply_reviewers": "97;0;0;0;0", "wc_reply_authors": "660;355;353;425;334", "reply_reviewers": "2;0;0;0;0", "reply_authors": "3;1;1;1;1", "recommendation_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "correctness_avg": [ 3.8, 0.39999999999999997 ], "technical_novelty_avg": [ 3.0, 0.6324555320336759 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 81.4, 24.621941434419828 ], "wc_strength_and_weaknesses_avg": [ 214.0, 118.95377253370319 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.0, 36.29325006113396 ], "wc_summary_review_avg": [ 90.4, 86.56696829622716 ], "wc_review_avg": [ 448.8, 178.22502630102207 ], "wc_reply_reviewers_avg": [ 19.4, 38.8 ], "wc_reply_authors_avg": [ 425.4, 121.31710514185541 ], "reply_reviewers_avg": [ 0.4, 0.8000000000000002 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5345224838248488, "corr_recommendation_correctness": 0.25000000000000006, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17195299173300992870&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=B_pCIsX8KL_", "email": "gatech.edu;ucsd.edu;gsu.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Georgia Institute of Technology;University of California, San Diego;Georgia State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.gatech.edu;https://www.ucsd.edu;https://www.gsu.edu", "aff_unique_abbr": "Georgia Tech;UCSD;GSU", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "BaWtp9o25zN", "title": "Limitations of Piecewise Linearity for Efficient Robustness Certification", "track": "main", "status": "Reject", "tldr": "We show that piecewise linearity imposes fundamental limitations for efficient robustness certification, e.g., Lipschitz-based certification; this imposes additional capacity requirements on networks that must be certified by such techniques.", "abstract": "Certified defenses against small-norm adversarial examples have received growing attention in recent years; though certified accuracies of state-of-the-art methods remain far below their non-robust counterparts, despite the fact that benchmark datasets have been shown to be well-separated at far larger radii than the literature generally attempts to certify. In this work, we offer insights that identify potential factors in this performance gap. Specifically, our analysis reveals that piecewise linearity imposes fundamental limitations on the tightness of leading certification techniques. These limitations are felt in practical terms as a greater need for capacity in models hoped to be certified efficiently. Moreover, this is _in addition_ to the capacity necessary to learn a robust boundary, studied in prior work. However, we argue that addressing the limitations of piecewise linearity through scaling up model capacity may give rise to potential difficulties---particularly regarding robust generalization---therefore, we conclude by suggesting that developing _smooth_ activation functions may be the way forward for advancing the performance of certified neural networks.", "keywords": "robustness;certification;Lipschitz;limitations;adversarial examples", "primary_area": "", "supplementary_material": "/attachment/3fdefadbb8be254b675aac15147a09cd72f8c6f1.zip", "author": "Klas Leino", "authorids": "~Klas_Leino1", "gender": "M", "homepage": "https://klas.leino.tech", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Klas_Leino1", "aff": "Roam HQ", "aff_domain": "ro.am", "position": "Principal Researcher", "bibtex": "@misc{\nleino2023limitations,\ntitle={Limitations of Piecewise Linearity for Efficient Robustness Certification},\nauthor={Klas Leino},\nyear={2023},\nurl={https://openreview.net/forum?id=BaWtp9o25zN}\n}", "github": "", "project": "", "reviewers": "uK69;SHQ9;gENj;qESg", "site": "https://openreview.net/forum?id=BaWtp9o25zN", "pdf_size": 793009, "recommendation": "5;5;6;6", "confidence": "5;3;4;3", "correctness": "2;3;4;3", "technical_novelty": "3;2;4;3", "empirical_novelty": "2;0;0;0", "wc_summary_paper": "24;61;108;81", "wc_strength_and_weaknesses": "175;538;234;442", "wc_clarity_quality_novelty_and_reproducibility": "15;34;225;21", "wc_summary_review": "23;53;135;91", "wc_review": "237;686;702;635", "wc_reply_reviewers": "40;0;339;66", "wc_reply_authors": "1035;1477;1192;1192", "reply_reviewers": "1;0;1;1", "reply_authors": "2;4;3;3", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 0.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 68.5, 30.630866784993206 ], "wc_strength_and_weaknesses_avg": [ 347.25, 148.20488352277735 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.75, 87.5938782107517 ], "wc_summary_review_avg": [ 75.5, 41.961291686505554 ], "wc_review_avg": [ 565.0, 190.98036548294695 ], "wc_reply_reviewers_avg": [ 111.25, 133.57652301209222 ], "wc_reply_authors_avg": [ 1224.0, 159.51332232763508 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": -0.30151134457776363, "corr_recommendation_correctness": 0.7071067811865475, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8997001737155301146&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Roam", "aff_unique_dep": "", "aff_unique_url": "https://www.roamhq.com", "aff_unique_abbr": "", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "Bc_R_YyycnK", "title": "Learning Multi-Object Positional Relationships via Emergent Communication", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The study of emergent communication has been dedicated to interactive artificial intelligence. While existing work focuses on communication about single objects or complex image scenes, we argue that communicating relationships between multiple objects is important in more realistic tasks, but understudied. In this paper, we try to fill this gap and focus on emergent communication about positional relationships between two objects. We train agents in the referential game where observations contain two objects, and find that generalization is the major problem when the positional relationship is involved. The key factor affecting the generalization ability of the emergent language is the input variation between Speaker and Listener, which is realized by a random image generator in our work. Further, we find that the learned language can generalize well in a new multi-step MDP task where the positional relationship describes the goal, and performs better than raw-pixel images as well as pre-trained image features, verifying the strong generalization ability of discrete sequences. We also show that language transfer from the referential game performs better in the new task than learning language directly in this task, implying the potential benefits of pre-training in referential games. All in all, our experiments demonstrate the viability and merit of having agents learn to communicate positional relationships between multiple objects through emergent communication.", "keywords": "emergence communication", "primary_area": "", "supplementary_material": "", "author": "Yicheng Feng;Boshi An;Zongqing Lu", "authorids": "~Yicheng_Feng1;~Boshi_An1;~Zongqing_Lu2", "gender": "M;M;", "homepage": "https://takenpeanut.github.io/;https://boshi-an.github.io/;", "dblp": "340/4016;330/2184;", "google_scholar": ";https://scholar.google.com.hk/citations?hl=zh-CN;", "orcid": ";;", "linkedin": ";https://www.linkedin.cn/incareer/in/%E5%8D%9A%E6%96%BD-%E5%AE%89-2b3275236;", "or_profile": "~Yicheng_Feng1;~Boshi_An1;~Zongqing_Lu2", "aff": "Peking University;Peking University;", "aff_domain": "pku.edu.cn;pku.edu.cn;", "position": "PhD student;Undergrad student;", "bibtex": "@misc{\nfeng2023learning,\ntitle={Learning Multi-Object Positional Relationships via Emergent Communication},\nauthor={Yicheng Feng and Boshi An and Zongqing Lu},\nyear={2023},\nurl={https://openreview.net/forum?id=Bc_R_YyycnK}\n}", "github": "", "project": "", "reviewers": "qZvu;k5Ej;3Qz8;y3tf", "site": "https://openreview.net/forum?id=Bc_R_YyycnK", "pdf_size": 1997570, "recommendation": "5;5;6;8", "confidence": "4;4;3;4", "correctness": "4;3;3;3", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "231;64;131;105", "wc_strength_and_weaknesses": "709;91;106;176", "wc_clarity_quality_novelty_and_reproducibility": "26;523;18;40", "wc_summary_review": "178;183;17;20", "wc_review": "1144;861;272;341", "wc_reply_reviewers": "293;614;0;0", "wc_reply_authors": "1071;985;78;221", "reply_reviewers": "1;2;0;0", "reply_authors": "3;2;1;1", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 132.75, 61.54825342769687 ], "wc_strength_and_weaknesses_avg": [ 270.5, 255.192574343377 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 151.75, 214.48586783282482 ], "wc_summary_review_avg": [ 99.5, 81.02623032080415 ], "wc_review_avg": [ 654.5, 362.9190681129885 ], "wc_reply_reviewers_avg": [ 226.75, 253.56594309962054 ], "wc_reply_authors_avg": [ 588.75, 443.1942999407822 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.4714045207910316, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=990275603477080054&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "BcbwGQWB-Kd", "title": "What Spurious Features Can Pretrained Language Models Combat?", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Machine learning models are known to exploit spurious features: features that are predictive during training (e.g., the exclamation mark) but are not useful in general (e.g., the exclamation mark does not imply sentiment). Relying on such features may result in significant performance drops under distribution shifts. Recent work has found that Pretrained Language Models (PLMs) improve robustness against spurious features. However, existing evaluation of PLMs only focuses on a small set of spurious features, painting a limited picture of the inductive bias in PLMs. In this work, we conduct a comprehensive empirical analysis to compare the generalization patterns of PLMs on diverse categories of spurious features as a way to analyze the inductive biases of PLMs. We find systematic patterns when finetuning BERT and few-shot prompting GPT-3: they exploit certain types of spurious features (e.g., content words) to a much larger extent than others (e.g., function words). Our findings inform the kinds of settings where pretraining alone can be expected to confer robustness, and the kinds of spurious features where other mitigation methods are necessary, for which we also study how different finetuning and prompting methods affect the robustness of PLMs.", "keywords": "spurious correlation;pretrained language models", "primary_area": "", "supplementary_material": "", "author": "Chenglei Si;Dan Friedman;Nitish Joshi;Shi Feng;Danqi Chen;He He", "authorids": "~Chenglei_Si1;~Dan_Friedman2;~Nitish_Joshi1;~Shi_Feng1;~Danqi_Chen1;~He_He2", "gender": "M;;M;M;F;F", "homepage": "https://noviscl.github.io/;http://danfriedman0.github.io/;https://joshinh.github.io;https://ihsgnef.github.io/;https://www.cs.princeton.edu/~danqic/;http://hhexiy.github.io", "dblp": "251/8778;205/9386;242/7973;97/1374.html;87/7949;08/8618-1", "google_scholar": "https://scholar.google.com.sg/citations?user=CyKr1q8AAAAJ;1UMQ_KwAAAAJ;;d0npq2oAAAAJ;sVR8ktkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Chenglei_Si1;~Dan_Friedman2;~Nitish_Joshi1;~Shi_Feng1;~Danqi_Chen1;~He_He1", "aff": "Stanford University;Princeton University;New York University;University of Chicago;Princeton University;New York University", "aff_domain": "stanford.edu;princeton.edu;nyu.edu;uchicago.edu;cs.princeton.edu;nyu.edu", "position": "PhD student;PhD student;PhD student;Postdoc;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nsi2023what,\ntitle={What Spurious Features Can Pretrained Language Models Combat?},\nauthor={Chenglei Si and Dan Friedman and Nitish Joshi and Shi Feng and Danqi Chen and He He},\nyear={2023},\nurl={https://openreview.net/forum?id=BcbwGQWB-Kd}\n}", "github": "", "project": "", "reviewers": "pR2u;uqS3;nD44;m6CX", "site": "https://openreview.net/forum?id=BcbwGQWB-Kd", "pdf_size": 379388, "recommendation": "5;5;5;6", "confidence": "4;4;2;3", "correctness": "3;2;3;4", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "119;98;64;44", "wc_strength_and_weaknesses": "277;457;138;124", "wc_clarity_quality_novelty_and_reproducibility": "59;96;132;21", "wc_summary_review": "28;144;64;26", "wc_review": "483;795;398;215", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 81.25, 29.11507341567251 ], "wc_strength_and_weaknesses_avg": [ 249.0, 134.1584883635769 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 77.0, 41.37027918687521 ], "wc_summary_review_avg": [ 65.5, 47.778133073614335 ], "wc_review_avg": [ 472.75, 209.7455303457025 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.17407765595569782, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12750277770977376391&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;1;2", "aff_unique_norm": "Stanford University;Princeton University;New York University;University of Chicago", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.stanford.edu;https://www.princeton.edu;https://www.nyu.edu;https://www.uchicago.edu", "aff_unique_abbr": "Stanford;Princeton;NYU;UChicago", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "BckALoxD8ow", "title": "OPERA: Omni-Supervised Representation Learning with Hierarchical Supervisions", "track": "main", "status": "Withdraw", "tldr": "We propose an omni-supervised representation learning with hierarchical supervisions method for better transferability.", "abstract": "The pretrain-finetune paradigm in modern computer vision facilitates the success of self-supervised learning, which tends to achieve better transferability than supervised learning. However, with the availability of massive labeled data, a natural question emerges: how to train a better model with both self and full supervision signals? In this paper, we propose omni-supervised representation learning with hierarchical supervisions (OPERA) as a solution. We provide a unified perspective of supervisions from labeled and unlabeled data and propose a unified framework of fully supervised and self-supervised learning. We extract a set of hierarchical proxy representations for each image and impose self and full supervisions on the corresponding proxy representations. Extensive experiments on both convolutional neural networks and vision transformers demonstrate the superiority of OPERA in image classification, segmentation, and object detection.", "keywords": "Representation learning;omni-supervised learning.", "primary_area": "", "supplementary_material": "/attachment/56d6916873efcb46d7e1cbc4f16ac7b1bc3e434a.zip", "author": "Chengkun Wang;Wenzhao Zheng;Zheng Zhu;Jie Zhou;Jiwen Lu", "authorids": "~Chengkun_Wang1;~Wenzhao_Zheng1;~Zheng_Zhu1;~Jie_Zhou3;~Jiwen_Lu1", "gender": ";M;M;M;M", "homepage": "https://wzzheng.net;http://www.zhengzhu.net/;https://www.tsinghua.edu.cn/publish/auen/1713/2011/20110506105532098625469/20110506105532098625469_.html;http://ivg.au.tsinghua.edu.cn/Jiwen_Lu/;http://ivg.au.tsinghua.edu.cn/people.php", "dblp": "230/1277;29/4319.html/;00/5012-1;http://dblp.uni-trier.de/pers/hd/l/Lu:Jiwen;", "google_scholar": "LdK9scgAAAAJ;https://scholar.google.com.hk/citations?user=NmwjI0AAAAAJ;;TN8uDQoAAAAJ;", "orcid": ";;;0000-0002-6121-5529;", "linkedin": ";;;;", "or_profile": "~Wenzhao_Zheng1;~Zheng_Zhu1;~Jie_Zhou3;~Jiwen_Lu1;~Wang_Chengkun1", "aff": "Tsinghua University;PhiGent Robotics;Tsinghua University;Tsinghua University;Tsinghua university", "aff_domain": "tsinghua.edu.cn;phigent.ai;tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn", "position": "PhD student;Researcher;Full Professor;Associate Professor;PhD student", "bibtex": "@misc{\nwang2023opera,\ntitle={{OPERA}: Omni-Supervised Representation Learning with Hierarchical Supervisions},\nauthor={Chengkun Wang and Wenzhao Zheng and Zheng Zhu and Jie Zhou and Jiwen Lu},\nyear={2023},\nurl={https://openreview.net/forum?id=BckALoxD8ow}\n}", "github": "", "project": "", "reviewers": "nHMn;HfcB;2qns;kHg8", "site": "https://openreview.net/forum?id=BckALoxD8ow", "pdf_size": 3730411, "recommendation": "3;3;5;5", "confidence": "5;4;4;4", "correctness": "2;2;2;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;0;2;2", "wc_summary_paper": "43;60;131;79", "wc_strength_and_weaknesses": "353;568;228;149", "wc_clarity_quality_novelty_and_reproducibility": "19;40;171;176", "wc_summary_review": "122;64;69;33", "wc_review": "537;732;599;437", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 78.25, 33.0104150231408 ], "wc_strength_and_weaknesses_avg": [ 324.5, 158.28534360451695 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 101.5, 72.40338389882064 ], "wc_summary_review_avg": [ 72.0, 31.992186546092782 ], "wc_review_avg": [ 576.25, 106.89568513274986 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5627833352137365624&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Tsinghua University;PhiGent Robotics", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;", "aff_unique_abbr": "THU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "id": "BcmrpOpUGN2", "title": "Warped Convolutional Networks: Bridge Homography to $\\mathfrak{sl}(3)$ algebra by Group Convolution", "track": "main", "status": "Withdraw", "tldr": "We propose a Warped Convolution Networks to effectively learn the homography on $\\mathfrak{sl}(3)$ algebra with group convolution. ", "abstract": "Homography has an essential relationship with the special linear group and the embedding Lie algebra structure. Although the Lie algebra representation is elegant, few researchers have established the connection between homography and algebra expression in neural networks. In this paper, we propose Warped Convolution Networks (WCN) to effectively learn and represent the homography by $SL(3)$ group and $\\mathfrak{sl}(3)$ algebra with group convolution. To this end, six commutative subgroups within the $SL(3)$ group are composed to form a homography. For each subgroup, a warping function is proposed to bridge the Lie algebra structure to its corresponding parameters in homography. By taking advantage of the warped convolution, homography learning is formulated into several simple pseudo-translation regressions. By walking along the Lie topology, our proposed WCN is able to learn the features that are invariant to homography. Moreover, it can be easily plugged into other popular CNN-based methods. Extensive experiments on the POT benchmark, S-COCO-Proj, and MNIST-Proj dataset show that our proposed method is effective for planar object tracking, homography estimation, and classification. ", "keywords": "SL(3);Homography Learning;Lie algebra;Equivariance;Group Equivariant Architecture", "primary_area": "", "supplementary_material": "/attachment/93d37ed4c1d4ed3c66956aa4685bf5b9593a7e0a.zip", "author": "Xinrui Zhan;Yang Li;Wenyu Liu;Jianke Zhu", "authorids": "~Xinrui_Zhan1;~Yang_Li46;~Wenyu_Liu4;~Jianke_Zhu1", "gender": "M;M;M;M", "homepage": ";http://ihpdep.github.io/;;https://person.zju.edu.cn/en/jkzhu", "dblp": "309/6307;37/4190-41;42/4110-5;10/4016", "google_scholar": ";N1ZDSHYAAAAJ;https://scholar.google.com/citations?hl=zh-CN;SC-WmzwAAAAJ", "orcid": ";0000-0001-9427-7665;0000-0002-3035-987X;0000-0003-1831-0106", "linkedin": ";;;https://www.linkedin.cn/incareer/in/jianke-zhu-b83bba8", "or_profile": "~Xinrui_Zhan1;~Yang_Li46;~Wenyu_Liu4;~Jianke_Zhu1", "aff": "Zhejiang University;East China Normal University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;ecnu.edu.cn;zju.edu.cn;zju.edu.cn", "position": "MS student;Associate Professor;PhD student;Full Professor", "bibtex": "@misc{\nzhan2023warped,\ntitle={Warped Convolutional Networks: Bridge Homography to \\${\\textbackslash}mathfrak\\{sl\\}(3)\\$ algebra by Group Convolution},\nauthor={Xinrui Zhan and Yang Li and Wenyu Liu and Jianke Zhu},\nyear={2023},\nurl={https://openreview.net/forum?id=BcmrpOpUGN2}\n}", "github": "", "project": "", "reviewers": "UqX7;1jU3;cztz", "site": "https://openreview.net/forum?id=BcmrpOpUGN2", "pdf_size": 3565599, "recommendation": "3;5;8", "confidence": "3;2;2", "correctness": "3;3;4", "technical_novelty": "2;2;3", "empirical_novelty": "0;2;3", "wc_summary_paper": "57;69;80", "wc_strength_and_weaknesses": "479;130;154", "wc_clarity_quality_novelty_and_reproducibility": "39;19;63", "wc_summary_review": "19;34;40", "wc_review": "594;252;337", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 68.66666666666667, 9.392668535736915 ], "wc_strength_and_weaknesses_avg": [ 254.33333333333334, 159.16518323916057 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.333333333333336, 17.98765008430939 ], "wc_summary_review_avg": [ 31.0, 8.831760866327848 ], "wc_review_avg": [ 394.3333333333333, 145.38760454576436 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8029550685469661, "corr_recommendation_correctness": 0.9176629354822472, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17502235996074859657&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Zhejiang University;East China Normal University", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;http://www.ecnu.edu.cn", "aff_unique_abbr": "ZJU;ECNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "BAYES RISK CTC: CONTROLLABLE CTC ALIGNMENT IN SEQUENCE-TO-SEQUENCE TASKS", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11642", "id": "Bd7GueaTxUz", "poster": "", "openreview": "https://openreview.net/forum?id=Bd7GueaTxUz", "slides": "https://iclr.cc/virtual/2023/poster/11642", "video": "https://iclr.cc/virtual/2023/poster/11642", "author_site": "Jinchuan Tian, Brian Yan, Jianwei Yu, CHAO WENG, Dong Yu, Shinji Watanabe", "tldr": "A Bayes risk function is applied to each CTC path to express the preference for selected paths and achieve controllable CTC alignment prediction", "abstract": "Sequence-to-Sequence (seq2seq) tasks transcribe the input sequence to a target sequence. The Connectionist Temporal Classification (CTC) criterion is widely used in multiple seq2seq tasks. Besides predicting the target sequence, a side product of CTC is to predict the alignment, which is the most probable input-long sequence that specifies a hard aligning relationship between the input and target units. As there are multiple potential aligning sequences (called paths) that are equally considered in CTC formulation, the choice of which path will be most probable and become the predicted alignment is always uncertain. In addition, it is usually observed that the alignment predicted by vanilla CTC will drift compared with its reference and rarely provides practical functionalities. Thus, the motivation of this work is to make the CTC alignment prediction controllable and thus equip CTC with extra functionalities. The Bayes risk CTC (BRCTC) criterion is then proposed in this work, in which a customizable Bayes risk function is adopted to enforce the desired characteristics of the predicted alignment. With the risk function, the BRCTC is a general framework to adopt some customizable preference over the paths in order to concentrate the posterior into a particular subset of the paths. In applications, we explore one particular preference which yields models with the down-sampling ability and reduced inference costs. By using BRCTC with another preference for early emissions, we obtain an improved performance-latency trade-off for online models. Experimentally, the proposed BRCTC reduces the inference cost of offline models by up to 47% without performance degradation and cuts down the overall latency of online systems to an unseen level.", "keywords": "CTC;alignment;sequence-to-sequence;speech recognition", "primary_area": "", "supplementary_material": "/attachment/89f552b67b5346891ae4334ea96a12cd41562631.zip", "author": "Jinchuan Tian;Brian Yan;Jianwei Yu;CHAO WENG;Dong Yu;Shinji Watanabe", "authorids": "~Jinchuan_Tian1;~Brian_Yan1;~Jianwei_Yu1;~CHAO_WENG1;~Dong_Yu2;~Shinji_Watanabe1", "gender": "M;;M;M;M;M", "homepage": ";https://brianyan918.github.io/;https://cweng6.github.io/;https://sites.google.com/view/dongyu888/;https://sites.google.com/view/shinjiwatanabe;https://msldcherrypick.github.io/yujianwei1994/", "dblp": ";;;71/4598-1;39/3245-1;23/2761", "google_scholar": "https://scholar.google.com.hk/citations?user=KE5I4R0AAAAJ;Pn3DcuUAAAAJ;;tMY31_gAAAAJ;U5xRA6QAAAAJ;fY1IJ4wAAAAJ", "orcid": ";;;0000-0003-0520-6844;0000-0002-5970-8631;0000-0002-2449-1436", "linkedin": ";;;dongyu/;shinji-watanabe-82533520;", "or_profile": "~Jinchuan_Tian1;~Brian_Yan1;~CHAO_WENG1;~Dong_Yu2;~Shinji_Watanabe1;~Jianwei_Yu2", "aff": "Peking University;School of Computer Science, Carnegie Mellon University;Tencent AI Lab;Tencent AI Lab;Carnegie Mellon University;Tencent AI Lab", "aff_domain": "pku.edu.cn;cs.cmu.edu;tencent.com;tencent.com;cmu.edu;tencent.com", "position": "MS student;PhD student;Principal Researcher;Distinguished Scientist;Associate Professor;Researcher", "bibtex": "@inproceedings{\ntian2023bayes,\ntitle={{BAYES} {RISK} {CTC}: {CONTROLLABLE} {CTC} {ALIGNMENT} {IN} {SEQUENCE}-{TO}-{SEQUENCE} {TASKS}},\nauthor={Jinchuan Tian and Brian Yan and Jianwei Yu and CHAO WENG and Dong Yu and Shinji Watanabe},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Bd7GueaTxUz}\n}", "github": "", "project": "", "reviewers": "NBYg;GviL;TGbL;YKWK", "pdf_size": 3457532, "recommendation": "6;8;8;8", "confidence": "4;3;4;5", "correctness": "4;4;3;3", "technical_novelty": "3;3;3;4", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "107;108;99;121", "wc_strength_and_weaknesses": "120;361;186;189", "wc_clarity_quality_novelty_and_reproducibility": "8;17;16;270", "wc_summary_review": "63;33;19;41", "wc_review": "298;519;320;621", "wc_reply_reviewers": "0;19;0;19", "wc_reply_authors": "744;787;634;849", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 108.75, 7.8859051477937525 ], "wc_strength_and_weaknesses_avg": [ 214.0, 89.23844463010323 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 77.75, 111.05038270983131 ], "wc_summary_review_avg": [ 39.0, 15.937377450509228 ], "wc_review_avg": [ 439.5, 135.61434289926711 ], "wc_reply_reviewers_avg": [ 9.5, 9.5 ], "wc_reply_authors_avg": [ 753.5, 78.44265421312565 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18056181405908782202&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=Bd7GueaTxUz", "email": "pku.edu.cn;cs.cmu.edu;tencent.com;tencent.com;cmu.edu;tencent.com", "author_num": 6, "aff_unique_index": "0;1;2;2;1;2", "aff_unique_norm": "Peking University;Carnegie Mellon University;Tencent", "aff_unique_dep": ";School of Computer Science;Tencent AI Lab", "aff_unique_url": "http://www.pku.edu.cn;https://www.cmu.edu;https://ai.tencent.com", "aff_unique_abbr": "Peking U;CMU;Tencent AI Lab", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;1;0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "BdcfKgE9dhF", "title": "Robust Training through Adversarially Selected Data Subsets", "track": "main", "status": "Reject", "tldr": "Develops robust learning strategy where a subset of instances are selectively chosen for perturbation and the selection strategy is never revealed to the learner.", "abstract": "Robustness to adversarial perturbations often comes at the cost of a drop in accuracy on unperturbed or clean instances. Most existing defense mechanisms attempt to defend the learner from attack on all possible instances, which often degrades the accuracy on clean instances significantly. However, in practice, an attacker might only select a small subset of instances to attack, $e.g.$, in facial recognition systems an adversary might aim to target specific faces. Moreover, the subset selection strategy of the attacker is seldom known to the defense mechanism a priori, making it challenging to attune the mechanism beforehand. This motivates designing defense mechanisms which can (i) defend against attacks on subsets instead of all instances to prevent degradation of clean accuracy and, (ii) ensure good overall performance for attacks on any selected subset. In this work, we take a step towards solving this problem. We cast the training problem as a min-max game involving worst-case subset selection along with optimization of model parameters, rendering the problem NP-hard. To tackle this, we first show that, for a given learner's model, the objective can be expressed as a difference between a $\\gamma$-weakly submodular and a modular function. We use this property to propose ROGET, an iterative algorithm, which admits approximation guarantees for a class of loss functions. Our experiments show that ROGET obtains better overall accuracy compared to several state-of-the-art defense methods for different adversarial subset selection techniques.", "keywords": "Subset selection;Robust learning", "primary_area": "", "supplementary_material": "/attachment/462afb783d262ff307cae706c956b9ca051b9324.zip", "author": "Hitvarth Diwanji;Divyanshu Shende;Rishi Agarwal;Swaprava Nath;Abir De", "authorids": "~Hitvarth_Diwanji1;~Divyanshu_Shende1;~Rishi_Agarwal1;~Swaprava_Nath2;~Abir_De1", "gender": "M;M;M;M;M", "homepage": "https://homepages.iitb.ac.in/~190100057/HitvarthDiwanji/;https://github.com/divush;https://rishiagarwal2000.github.io;https://www.cse.iitb.ac.in/~swaprava/;", "dblp": ";;;70/9376;118/7174", "google_scholar": ";;mKJs6cAAAAAJ;TlpsH9cAAAAJ;https://scholar.google.co.in/citations?user=_9ZKKbIAAAAJ", "orcid": ";;0000-0002-1284-2593;0000-0001-8309-5006;", "linkedin": ";;rishi-agarwal-a473a2202/?originalSubdomain=in;swaprava/;", "or_profile": "~Hitvarth_Diwanji1;~Divyanshu_Shende1;~Rishi_Agarwal1;~Swaprava_Nath2;~Abir_De1", "aff": "Indian Institute of Technology, Bombay;;Stanford University;Computer Science and Engineering, Indian Institute of Technology Bombay;Indian Institute of Technology Bombay,", "aff_domain": "iitb.ac.in;;stanford.edu;cse.iitb.ac.in;iitb.ac.in", "position": "Undergrad student;;MS student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\ndiwanji2023robust,\ntitle={Robust Training through Adversarially Selected Data Subsets},\nauthor={Hitvarth Diwanji and Divyanshu Shende and Rishi Agarwal and Swaprava Nath and Abir De},\nyear={2023},\nurl={https://openreview.net/forum?id=BdcfKgE9dhF}\n}", "github": "", "project": "", "reviewers": "H4So;SByk;iAPp;gdCM", "site": "https://openreview.net/forum?id=BdcfKgE9dhF", "pdf_size": 771970, "recommendation": "5;5;6;6", "confidence": "4;3;2;3", "correctness": "3;2;2;3", "technical_novelty": "3;3;3;4", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "99;129;99;74", "wc_strength_and_weaknesses": "206;265;302;168", "wc_clarity_quality_novelty_and_reproducibility": "17;22;65;29", "wc_summary_review": "20;2;42;50", "wc_review": "342;418;508;321", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1436;1491;489;309", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 100.25, 19.48557158514987 ], "wc_strength_and_weaknesses_avg": [ 235.25, 51.76569810212164 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.25, 18.819869818890883 ], "wc_summary_review_avg": [ 28.5, 18.83480820183736 ], "wc_review_avg": [ 397.25, 73.42130140497375 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 931.25, 536.3936870433879 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.7071067811865475, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qTU_E4F9nPIJ:scholar.google.com/&scioq=Robust+Training+through+Adversarially+Selected+Data+Subsets&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Indian Institute of Technology Bombay;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitb.ac.in;https://www.stanford.edu", "aff_unique_abbr": "IIT Bombay;Stanford", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Bombay;Stanford", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "India;United States" }, { "id": "BeI1fdNH_X", "title": "Improving Explanation Reliability through Group Attribution", "track": "main", "status": "Reject", "tldr": "We have proposed the group-wise attribution methods to yield more reliable explanation in understanding a model's prediction", "abstract": "Although input attribution methods are mainstream in understanding predictions of DNNs for straightforward interpretations, the non-linearity of DNNs often makes the attributed scores unreliable in explaining a given prediction, deteriorating the faithfulness of the explanation. \nHowever, the challenge could be mitigated by attributing scores to groups of explanatory components instead of the individuals, termed group attribution. While a group attribution would explain the group-wise contribution more reliably, it does not explain the component-wise contributions so that estimating component-wise scores yields less reliable explanation, indicating the trade-off of group attributions.\nIn this work, we introduce the generalized definition of reliability loss and group attribution, and formulate the optimization problem of the trade-off with these terms. We apply our formalization to Shapley value attribution to propose the optimization method G-SHAP. We show the effectiveness and explanatory benefits of our method through empirical results on image classification tasks.", "keywords": "Explainable AI;Attribution methods;Group attribution;Attribution reliability", "primary_area": "", "supplementary_material": "", "author": "Sangmin Lee;Jongseong Jang;Woohyung Lim", "authorids": "~Sangmin_Lee2;~Jongseong_Jang1;~Woohyung_Lim1", "gender": "M;M;M", "homepage": ";https://sites.google.com/view/jongseong-jang;", "dblp": ";140/4388;86/7195", "google_scholar": ";https://scholar.google.co.kr/citations?user=-DJPQqgAAAAJ;https://scholar.google.co.kr/citations?user=gtvxdcUAAAAJ", "orcid": ";;0000-0003-0525-9065", "linkedin": "smlee89/;;woohyunglim/", "or_profile": "~Sangmin_Lee2;~Jongseong_Jang1;~Woohyung_Lim1", "aff": "LG AI Research;LG AI Research;LG AI Research", "aff_domain": "lgresearch.ai;lgresearch.ai;lgresearch.ai", "position": "Research Scientist;Researcher;Vice President", "bibtex": "@misc{\nlee2023improving,\ntitle={Improving Explanation Reliability through Group Attribution},\nauthor={Sangmin Lee and Jongseong Jang and Woohyung Lim},\nyear={2023},\nurl={https://openreview.net/forum?id=BeI1fdNH_X}\n}", "github": "", "project": "", "reviewers": "FuXt;BDjb;ssJm;boPc", "site": "https://openreview.net/forum?id=BeI1fdNH_X", "pdf_size": 2592736, "recommendation": "3;5;6;6", "confidence": "3;3;4;3", "correctness": "3;3;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;3;1;3", "wc_summary_paper": "80;53;115;47", "wc_strength_and_weaknesses": "413;41;237;142", "wc_clarity_quality_novelty_and_reproducibility": "49;71;52;16", "wc_summary_review": "57;306;19;2", "wc_review": "599;471;423;207", "wc_reply_reviewers": "15;18;0;0", "wc_reply_authors": "699;558;539;312", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 73.75, 26.864242032858474 ], "wc_strength_and_weaknesses_avg": [ 208.25, 137.03170253631092 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.0, 19.78635893740938 ], "wc_summary_review_avg": [ 96.0, 122.86781515108015 ], "wc_review_avg": [ 425.0, 141.35062787267697 ], "wc_reply_reviewers_avg": [ 8.25, 8.317902379807062 ], "wc_reply_authors_avg": [ 527.0, 138.66686698703478 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.4714045207910316, "corr_recommendation_correctness": 0.4714045207910316, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Acrj5VjEMq8J:scholar.google.com/&scioq=Improving+Explanation+Reliability+through+Group+Attribution&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "LG", "aff_unique_dep": "LG AI Research", "aff_unique_url": "https://www.lgaires.com", "aff_unique_abbr": "LG AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "BgLTe3a4FO", "title": "Fed-Cor: Federated Correlation Test with Secure Aggregation", "track": "main", "status": "Reject", "tldr": "We propose the first secure federated correlation test protocol Fed-Cor, which minimizes both privacy leakage and communication cost.", "abstract": "In this paper, we propose the first federated correlation test framework compatible with secure aggregation, namely Fed-Cor. In Fed-Cor, correlation tests are recast as frequency moment estimation problems. To estimate the frequency moments, the clients collaboratively generate a shared projection matrix and then use stable projection to encode the local information in a compact vector. As such encodings can be linearly aggregated, secure aggregation can be applied to conceal the individual updates. We formally establish the security guarantee of Fed-Cor by proving that only the minimum necessary information (i.e., the correlation statistics) is revealed to the server. The evaluation results show that Fed-Cor achieves good accuracy with small client-side computation overhead and performs comparably to the centralized correlation test in several real-world case studies.", "keywords": "Federated Analytics;Privacy and Security", "primary_area": "", "supplementary_material": "/attachment/3e070ba04669ea51b10392560ec0bfa703e263c1.zip", "author": "Lun Wang;Qi Pang;Shuai Wang;Wenting Zheng;Dawn Song", "authorids": "~Lun_Wang1;~Qi_Pang1;~Shuai_Wang7;wenting@cmu.edu;~Dawn_Song1", "gender": ";;M;;F", "homepage": "https://wanglun1996.github.io/;;https://home.cse.ust.hk/~shuaiw/;;", "dblp": ";;42/1503-11;;s/DXSong", "google_scholar": ";;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Lun_Wang1;~Qi_Pang1;~Shuai_Wang7;wenting@cmu.edu;~Dawn_Song1", "aff": "Google;;;;University of California, Berkeley", "aff_domain": "google.com;;;;berkeley.edu", "position": "Researcher;;;;Full Professor", "bibtex": "@misc{\nwang2023fedcor,\ntitle={Fed-Cor: Federated Correlation Test with Secure Aggregation},\nauthor={Lun Wang and Qi Pang and Shuai Wang and Wenting Zheng and Dawn Song},\nyear={2023},\nurl={https://openreview.net/forum?id=BgLTe3a4FO}\n}", "github": "", "project": "", "reviewers": "jFiZ;Nyg7;2ZL2", "site": "https://openreview.net/forum?id=BgLTe3a4FO", "pdf_size": 1188982, "recommendation": "3;6;6", "confidence": "4;3;3", "correctness": "4;4;3", "technical_novelty": "2;2;4", "empirical_novelty": "4;2;2", "wc_summary_paper": "96;40;22", "wc_strength_and_weaknesses": "214;177;74", "wc_clarity_quality_novelty_and_reproducibility": "19;97;12", "wc_summary_review": "84;48;12", "wc_review": "413;362;120", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_paper_avg": [ 52.666666666666664, 31.510139461590597 ], "wc_strength_and_weaknesses_avg": [ 155.0, 59.23399924592857 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.666666666666664, 38.5256047612782 ], "wc_summary_review_avg": [ 48.0, 29.393876913398138 ], "wc_review_avg": [ 298.3333333333333, 127.80801574584002 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:aWR1w3Z8aqYJ:scholar.google.com/&scioq=Fed-Cor:+Federated+Correlation+Test+with+Secure+Aggregation&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Google;University of California, Berkeley", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.berkeley.edu", "aff_unique_abbr": "Google;UC Berkeley", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Mountain View;Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "BgMo9ofIQi6", "title": "GENERALIZED MATRIX LOCAL LOW RANK REPRESENTATION BY RANDOM PROJECTION AND SUBMATRIX PROPAGATION", "track": "main", "status": "Reject", "tldr": "We developed a sub-matrix propagation based approach to solve the fundamental mathematical problem of matrix local low rank representation.", "abstract": "Detecting distinct submatrices of low rank property is a highly desirable matrix representation learning technique for the ease of data interpretation, called the matrix local low rank representation (MLLRR). Based on different mathematical assumptions of the local pattern, the MLLRR problem could be categorized into two sub-problems, namely local constant variation (LCV) and local linear low rank (LLR). Existing solutions on MLLRR only focused on the LCV problem, which misses a substantial amount of true and interesting patterns. In this work, we develop a novel matrix computational framework called RPSP (Random Probing based submatrix Propagation) that provides an effective solution for both of the LCV and LLR problems. RPSP detects local low rank patterns that grow from small submatrices of low rank property, which are determined by a random projection approach. RPSP is supported by theories of random projection. Experiments on synthetic data demonstrate that RPSP outperforms all state-of-the-art methods, with the capacity to robustly and correctly identify the low rank matrices under both LCV and LLR settings. On real-world datasets, RPSP also demonstrates its effectiveness in identifying interpretable local low rank matrices.\n", "keywords": "Matrix decomposition;Local Low Rank matrix detection;Representation learning;Subspace learning", "primary_area": "", "supplementary_material": "", "author": "Pengtao Dang;Wennan Chang;Haiqi Zhu;Changlin Wan;Tong Zhao;Tingo Guo;Paul Salama;Sha Cao;Chi Zhang", "authorids": "~Pengtao_Dang1;~Wennan_Chang1;~Haiqi_Zhu1;~Changlin_Wan1;~Tong_Zhao2;guoti@iu.edu;~Paul_Salama1;~Sha_Cao1;~Chi_Zhang18", "gender": "M;;;M;M;;M;F;M", "homepage": "https://zcslab.github.io/people/pengtao/;https://zcslab.github.io/;;https://clwan.github.io/;;;;https://www.ohsu.edu/people/sha-cao-phd;https://zcslab.github.io", "dblp": "312/3705;;;15/158;;;;169/1773;91/195-21", "google_scholar": "p1j1-YIAAAAJ;;;DISpxbgAAAAJ;SSBJh9oAAAAJ;;r5wLPJkAAAAJ;wob2pT4AAAAJ;8r9Eb_sAAAAJ", "orcid": ";;;;;;0000-0002-7643-3879;0000-0002-8645-848X;0000-0001-9553-0925", "linkedin": ";;haiqi-zhu-480855138/;;;;;;", "or_profile": "~Pengtao_Dang1;~Wennan_Chang1;~Haiqi_Zhu1;~Changlin_Wan1;~Tong_Zhao2;guoti@iu.edu;~Paul_Salama1;~Sha_Cao1;~Chi_Zhang18", "aff": "Purdue University;Purdue University;Indiana University;Genentech;Uber;;Indiana University/Purdue University at Indianapolis;Indiana University, School of Medicine;Indiana University", "aff_domain": "purdue.edu;purdue.edu;cs.indiana.edu;gene.com;uber.com;;iupui.edu;iu.edu;iu.edu", "position": "PhD student;PhD student;PhD student;Researcher;Researcher;;Full Professor;Assistant Professor;Associate Professor", "bibtex": "@misc{\ndang2023generalized,\ntitle={{GENERALIZED} {MATRIX} {LOCAL} {LOW} {RANK} {REPRESENTATION} {BY} {RANDOM} {PROJECTION} {AND} {SUBMATRIX} {PROPAGATION}},\nauthor={Pengtao Dang and Wennan Chang and Haiqi Zhu and Changlin Wan and Tong Zhao and Tingo Guo and Paul Salama and Sha Cao and Chi Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=BgMo9ofIQi6}\n}", "github": "", "project": "", "reviewers": "gxjs;8ATp;8ysf", "site": "https://openreview.net/forum?id=BgMo9ofIQi6", "pdf_size": 16933243, "recommendation": "3;3;5", "confidence": "3;4;4", "correctness": "2;3;3", "technical_novelty": "3;2;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "54;113;98", "wc_strength_and_weaknesses": "243;349;148", "wc_clarity_quality_novelty_and_reproducibility": "70;59;50", "wc_summary_review": "22;158;41", "wc_review": "389;679;337", "wc_reply_reviewers": "764;0;0", "wc_reply_authors": "932;1277;578", "reply_reviewers": "1;0;0", "reply_authors": "2;3;2", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 88.33333333333333, 25.037749277618563 ], "wc_strength_and_weaknesses_avg": [ 246.66666666666666, 82.09885640020951 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.666666666666664, 8.178562764256865 ], "wc_summary_review_avg": [ 73.66666666666667, 60.135033235396335 ], "wc_review_avg": [ 468.3333333333333, 150.46889674909193 ], "wc_reply_reviewers_avg": [ 254.66666666666666, 360.15305388434825 ], "wc_reply_authors_avg": [ 929.0, 285.3734395489531 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.5, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17835256243009911700&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1;2;3;4;1;1", "aff_unique_norm": "Purdue University;Indiana University;Genentech;Uber Technologies Inc.;Indiana University-Purdue University Indianapolis", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.purdue.edu;https://www.indiana.edu;https://www.genentech.com;https://www.uber.com;https://www.iupui.edu", "aff_unique_abbr": "Purdue;IU;Genentech;Uber;IUPUI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Indianapolis", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Bgcp4BniE-U", "title": "Generated Distributions Are All You Need for Membership Inference Attacks Against Generative Models", "track": "main", "status": "Withdraw", "tldr": "Our work proposes a generalized membership inference against various generative models.", "abstract": "Generative models have shown their promising performance on various real-world tasks, which, at the same time, introduce the threat of leaking private information of their training data. Several membership inference attacks against generative models have been proposed in recent years and exhibit their effectiveness in different settings. However, these attacks all suffer from their own limitations and cannot be generalized to all generative models under all scenarios. In this paper, we propose the first generalized membership inference attack for generative models, which can be utilized to quantitatively evaluate the privacy leakage of various existing generative models. Compared with previous works, our attack has three main advantages, i.e., (i) only requires black-box access to the target model, (ii) is computationally efficient, and (iii) can be generalized to numerous generative models. Extensive experiments show that various existing generative models in a variety of applications are vulnerable to our attack. For example, our attack could achieve the AUC of 0.997 (0.997) and 0.998 (0.999) against the generative model of DDPM (DDIM) on the CelebA and CIFAR-10 datasets. These results demonstrate that private information can be effectively exploited by attackers in an efficient way, which calls on the community to be aware of privacy threats when designing generative models.", "keywords": "generative models;diffusion models;membership inference", "primary_area": "", "supplementary_material": "", "author": "Minxing Zhang;Ning Yu;Rui Wen;Michael Backes;Yang Zhang", "authorids": "~Minxing_Zhang1;~Ning_Yu2;~Rui_Wen3;director@cispa.de;~Yang_Zhang15", "gender": ";;M;;M", "homepage": "https://minxingzhang.github.io/;;https://ruiwen-ai.github.io/;;https://yangzhangalmo.github.io/", "dblp": "302/0867;;63/10765-2;;06/6785-16", "google_scholar": "wsSLja0AAAAJ;;https://scholar.google.com/citations?hl=en;;Xeb2888AAAAJ", "orcid": "0009-0005-6368-263X;;0009-0009-0691-7569;;0000-0003-3612-7348", "linkedin": ";;;;", "or_profile": "~Minxing_Zhang1;~Ning_Yu2;~Rui_Wen3;director@cispa.de;~Yang_Zhang15", "aff": "CISPA Helmholtz Center for Information Security;;CISPA Helmholtz Center for Information Security;;CISPA Helmholtz Center for Information Security", "aff_domain": "cispa.saarland;;cispa.de;;cispa.de", "position": "PhD student;;PhD student;;Assistant Professor", "bibtex": "@misc{\nzhang2023generated,\ntitle={Generated Distributions Are All You Need for Membership Inference Attacks Against Generative Models},\nauthor={Minxing Zhang and Ning Yu and Rui Wen and Michael Backes and Yang Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=Bgcp4BniE-U}\n}", "github": "", "project": "", "reviewers": "EQ1k;1W21;nE9X", "site": "https://openreview.net/forum?id=Bgcp4BniE-U", "pdf_size": 662832, "recommendation": "3;3;5", "confidence": "4;5;3", "correctness": "3;1;3", "technical_novelty": "3;2;2", "empirical_novelty": "2;2;1", "wc_summary_paper": "51;125;64", "wc_strength_and_weaknesses": "203;230;55", "wc_clarity_quality_novelty_and_reproducibility": "46;37;26", "wc_summary_review": "42;13;101", "wc_review": "342;405;246", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 80.0, 32.25936556516056 ], "wc_strength_and_weaknesses_avg": [ 162.66666666666666, 76.92564952963059 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.333333333333336, 8.178562764256865 ], "wc_summary_review_avg": [ 52.0, 36.615115275889366 ], "wc_review_avg": [ 331.0, 65.37583651472461 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": 0.5000000000000001, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7418057502111667427&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff_unique_index": "0;0;0", "aff_unique_norm": "CISPA Helmholtz Center for Information Security", "aff_unique_dep": "", "aff_unique_url": "https://www.cispa.de/", "aff_unique_abbr": "CISPA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "BgoOPulkznY", "title": "Unsupervised 3D Scene Representation Learning via Movable Object Inference", "track": "main", "status": "Reject", "tldr": "Unsupervised, category-agnostic, object-centric 3D representation learning for complex scenes ", "abstract": "Unsupervised, category-agnostic, object-centric 3D representation learning for complex scenes remains an open problem in computer vision. While a few recent methods can now discover 3D object radiance fields from a single image without supervision, they are limited to simplistic scenes with objects of a single category, often with a uniform color. This is because they discover objects purely based on appearance cues\u2014objects are made of pixels that look alike. In this work, we propose Movable Object Radiance Fields (MORF), aiming at scaling to complex scenes with diverse categories of objects. Inspired by cognitive science of object learning in babies, MORF learns 3D object representations via movable object inference. During training, MORF first obtains 2D masks of movable objects via a self-supervised movable object segmentation method; it then bridges the gap to 3D object representations via conditional neural rendering in multiple views. During testing, MORF can discover, reconstruct, and move unseen objects from novel categories, all from a single image. Experiments show that MORF extracts accurate object geometry and supports realistic object and scene reconstruction and editing, significantly outperforming the state-of-the-art.", "keywords": "3D representation learning;self-supervised learning;object-discovery;neural rendering", "primary_area": "", "supplementary_material": "/attachment/11414fd8d6f192dbd4b54b15782d884abe5bb4f7.zip", "author": "Honglin Chen;Wanhee Lee;Hong-Xing Yu;Rahul Mysore Venkatesh;Joshua B. Tenenbaum;Daniel Bear;Jiajun Wu;Daniel LK Yamins", "authorids": "~Honglin_Chen2;~Wanhee_Lee1;~Hong-Xing_Yu1;~Rahul_Mysore_Venkatesh1;~Joshua_B._Tenenbaum1;~Daniel_Bear1;~Jiajun_Wu1;~Daniel_LK_Yamins1", "gender": "M;M;M;;M;M;M;M", "homepage": "https://web.stanford.edu/~honglinc/;;https://kovenyu.com;;;https://jiajunwu.com;https://Neuroailab.stanford.edu;", "dblp": "224/0552;;205/2676.html;t/JoshuaBTenenbaum;223/5736;117/4768;;225/4643", "google_scholar": "https://scholar.google.com/citations?hl=en;BdHgmrUAAAAJ;kNKncZcAAAAJ;;uYbkEzYAAAAJ;2efgcS0AAAAJ;;", "orcid": ";;;;;0000-0002-4176-343X;;", "linkedin": ";;;;;jiajunwu/;;", "or_profile": "~Honglin_Chen2;~Wanhee_Lee1;~Hong-Xing_Yu1;~Joshua_B._Tenenbaum1;~Daniel_Bear1;~Jiajun_Wu1;~Daniel_LK_Yamins1;~Rahul_M._V.1", "aff": "Stanford University;Stanford University;Stanford University;Massachusetts Institute of Technology;;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;cs.stanford.edu;mit.edu;;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;PhD student;Professor;;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@misc{\nchen2023unsupervised,\ntitle={Unsupervised 3D Scene Representation Learning via Movable Object Inference},\nauthor={Honglin Chen and Wanhee Lee and Hong-Xing Yu and Rahul Mysore Venkatesh and Joshua B. Tenenbaum and Daniel Bear and Jiajun Wu and Daniel LK Yamins},\nyear={2023},\nurl={https://openreview.net/forum?id=BgoOPulkznY}\n}", "github": "", "project": "", "reviewers": "NgLD;kstW;dyX9;Kin7", "site": "https://openreview.net/forum?id=BgoOPulkznY", "pdf_size": 37562899, "recommendation": "3;5;6;6", "confidence": "3;4;3;4", "correctness": "3;3;4;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "54;49;98;87", "wc_strength_and_weaknesses": "175;238;81;258", "wc_clarity_quality_novelty_and_reproducibility": "9;47;167;227", "wc_summary_review": "9;53;32;39", "wc_review": "247;387;378;611", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "594;736;433;776", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 72.0, 20.940391591371924 ], "wc_strength_and_weaknesses_avg": [ 188.0, 68.95288246331694 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 112.5, 88.15185760946845 ], "wc_summary_review_avg": [ 33.25, 15.911866640969563 ], "wc_review_avg": [ 405.75, 130.81547117982643 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 634.75, 134.6910817389184 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.40824829046386296, "corr_recommendation_correctness": 0.4714045207910316, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11242883452669306071&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;1;0;0;0", "aff_unique_norm": "Stanford University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://web.mit.edu", "aff_unique_abbr": "Stanford;MIT", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "BnznzofWMi", "title": "Representation Mutual Learning for End-to-End Weakly-Supervised Semantic Segmentation", "track": "main", "status": "Withdraw", "tldr": "An efficient and decoder-free Representation Mutual Learning (RML) framework for WSSS that combines instance-level, feature-level and pixel-level mutual learning strategies to improve segmentation quality.", "abstract": "In recent years, end-to-end solutions for Weakly Supervised Semantic Segmentation (WSSS) with image-level labels have been developed rapidly. Previous end-to-end methods usually rely on segmentation branches or decoders to predict segmentation masks, bringing additional parameter numbers and consumption time. In this paper, we propose a decoder-free Representation Mutual Learning (RML) framework to directly predict segmentation masks, which leverages collaborative learning and mutual teaching among multi-level feature representations to improve segmentation performance. Our RML is a straightforward and efficient end-to-end WSSS framework, which incorporates the instance-level, feature-level and pixel-level representation mutual learning strategies to improve segmentation quality. To enhance the Class Activation Map (CAM) representations, we propose a CAM-driven Instance-leave Mutual Learning strategy that preserves the equivariance of CAMs and expands the distance between different classes of semantic prototypes. Besides, we design a Multi-scale Feature-leave Mutual Learning strategy, which can align aggregated contextual representations and facilitate the representation capability of contextual representations. Furthermore, we also provide an Affinity-aware Pixel-level Mutual Learning strategy to learn semantic affinity representations. Experiments validate that our RML yields a significant performance improvement over recent end-to-end methods on the Pascal VOC 2012 dataset and the MS COCO 2014 dataset. The release code is available at supplementary material.", "keywords": "Weakly Supervised Semantic Segmentation;Representation Mutual Learning;End-to-End", "primary_area": "", "supplementary_material": "/attachment/aef323b4eed9edd8c20a023bcca06a67a8b6c663.zip", "author": "Rongtao Xu;Changwei Wang;Shibiao Xu;Weiliang Meng;Xiaopeng Zhang", "authorids": "~Rongtao_Xu1;~Changwei_Wang2;~Shibiao_Xu2;~Weiliang_Meng1;~Xiaopeng_Zhang4", "gender": ";M;M;M;", "homepage": "http://www.nlpr.ia.ac.cn/ivc/;;https://people.ucas.ac.cn/~mengweiliang?language=en;http://people.ucas.edu.cn/~0005319?language=en;http://jsxb.scsc.cn/introlist_76/1168.html", "dblp": "93/4025;76/8401;58/7599;;61/8501", "google_scholar": ";;;;", "orcid": ";;;0000-0002-0092-6474;0000-0001-8259-7717", "linkedin": ";;;;", "or_profile": "~Rongtao_Xu1;~Shibiao_Xu2;~Weiliang_Meng1;~Xiaopeng_Zhang4;~Wang_Changwei1", "aff": "Institute of Automation, Chinese Academy of Sciences;Beijing University of Posts and Telecommunications;Institute of automation, Chinese academy of sciences, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;bupt.edu.cn;ia.ac.cn;ia.ac.cn;ucas.ac.cn", "position": "PhD student;Full Professor;Associate Professor;Full Professor;PhD student", "bibtex": "@misc{\nxu2023representation,\ntitle={Representation Mutual Learning for End-to-End Weakly-Supervised Semantic Segmentation},\nauthor={Rongtao Xu and Changwei Wang and Shibiao Xu and Weiliang Meng and Xiaopeng Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=BnznzofWMi}\n}", "github": "", "project": "", "reviewers": "efo1;E4tC;W7kd", "site": "https://openreview.net/forum?id=BnznzofWMi", "pdf_size": 2978594, "recommendation": "3;3;5", "confidence": "5;4;4", "correctness": "3;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "56;42;66", "wc_strength_and_weaknesses": "388;215;140", "wc_clarity_quality_novelty_and_reproducibility": "5;31;74", "wc_summary_review": "56;54;45", "wc_review": "505;342;325", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 54.666666666666664, 9.843215373488933 ], "wc_strength_and_weaknesses_avg": [ 247.66666666666666, 103.84710984048725 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.666666666666664, 28.4526897771644 ], "wc_summary_review_avg": [ 51.666666666666664, 4.784233364802441 ], "wc_review_avg": [ 390.6666666666667, 81.14322043290014 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Oa--D70v7BkJ:scholar.google.com/&scioq=Representation+Mutual+Learning+for+End-to-End+Weakly-Supervised+Semantic+Segmentation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Chinese Academy of Sciences;Beijing University of Posts and Telecommunications;University of Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation;;", "aff_unique_url": "http://www.ia.cas.cn;http://www.bupt.edu.cn/;http://www.ucas.ac.cn", "aff_unique_abbr": "CAS;BUPT;UCAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "Bo-1bxmCrrA", "title": "Domain-Invariant Auxiliary Learning for Robust Few-Shot Predictions from Noisy Data", "track": "main", "status": "Withdraw", "tldr": "We propose a novel MetaAux framework using auxiliary tasks to effectively learn a robust representation for better generalization and adaptation in unseen few-shot tasks.", "abstract": "Modern meta-learning approaches produce state-of-the-art performance by imitating the test condition for few-shot learning (FSL) using episodic training. However, overfitting and memorizing corrupted labels has been a long-standing issue. Data cleansing offers a promising solution for dealing with noisy labels. Nevertheless, in FSL, data cleansing exacerbates the severity of the problem as the available training data becomes much more limited and the model is typically inadequately trained. In this work, we address overfitting in a noisy setting by exploiting auxiliary tasks to learn a better shared representation. Unsupervised auxiliary tasks are designed with no extra labeling overhead and Wasserstein distance is leveraged to align the primary and auxiliary distributions that ensure the learned knowledge is domain-invariant. Building upon the theoretical advances on PAC-Bayesian analysis, we gain ground on \nderiving novel generalization bounds of meta-learning with auxiliary tasks and under the effect of noisy corruptions. Extensive experiments on FSL tasks with noisy labels are conducted to show the effectiveness and robustness of our proposed method. ", "keywords": "meta-learning;few-shot learning;auxiliary task", "primary_area": "", "supplementary_material": "", "author": "Xiaofan Que;Qi Yu", "authorids": "~Xiaofan_Que1;~Qi_Yu1", "gender": "F;M", "homepage": "https://sites.google.com/d/1AhoIzPoUGppAbU5lGkI9i1CHfCQNDv3l/p/1XvVyabWCP44ubZvpcs2wQnO-wugU8P1j/edit;https://www.rit.edu/mining/", "dblp": "208/4881.html;58/6957-1", "google_scholar": "cgxoMiIAAAAJ;L3gWdfEAAAAJ", "orcid": ";0000-0002-0426-5407", "linkedin": ";", "or_profile": "~Xiaofan_Que1;~Qi_Yu1", "aff": "Rochester Institute of Technology;Rochester Institute of Technology", "aff_domain": "rit.edu;rit.edu", "position": "PhD student;Professor", "bibtex": "@misc{\nque2023domaininvariant,\ntitle={Domain-Invariant Auxiliary Learning for Robust Few-Shot Predictions from Noisy Data},\nauthor={Xiaofan Que and Qi Yu},\nyear={2023},\nurl={https://openreview.net/forum?id=Bo-1bxmCrrA}\n}", "github": "", "project": "", "reviewers": "vJb6;kCFb;yBKH;mKi1", "site": "https://openreview.net/forum?id=Bo-1bxmCrrA", "pdf_size": 2849161, "recommendation": "3;3;6;6", "confidence": "4;4;4;4", "correctness": "2;3;3;3", "technical_novelty": "2;2;4;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "86;229;29;63", "wc_strength_and_weaknesses": "1150;114;163;228", "wc_clarity_quality_novelty_and_reproducibility": "80;1266;7;17", "wc_summary_review": "66;30;14;8", "wc_review": "1382;1639;213;316", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 101.75, 76.21474594853676 ], "wc_strength_and_weaknesses_avg": [ 413.75, 426.9931937396661 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 342.5, 533.9168942822469 ], "wc_summary_review_avg": [ 29.5, 22.555487137279922 ], "wc_review_avg": [ 887.5, 630.6435205407251 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896258, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RclB3Y975Q4J:scholar.google.com/&scioq=Domain-Invariant+Auxiliary+Learning+for+Robust+Few-Shot+Predictions+from+Noisy+Data&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Rochester Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.rit.edu", "aff_unique_abbr": "RIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Multi-lingual Evaluation of Code Generation Models", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12102", "id": "Bo7eeXm6An8", "poster": "", "openreview": "https://openreview.net/forum?id=Bo7eeXm6An8", "slides": "https://iclr.cc/virtual/2023/poster/12102", "video": "https://iclr.cc/virtual/2023/poster/12102", "author_site": "Ben Athiwaratkun, Sanjay Krishna Gouda, Zijian Wang, Xiaopeng Li, YUCHEN TIAN, Ming Tan, Wasi Ahmad, Shiqi Wang, Qing Sun, Mingyue Shang, Sujan Kumar Gonugondla, Hantian Ding, Varun Kumar, Nathan Fulton, Arash Farahani, Siddhartha Jain, Robert Giaquinto, Haifeng Qian, Murali Krishna Ramanathan, Ramesh Nallapati, Baishakhi Ray, Parminder Bhatia, Sudipta Sengupta, Dan Roth, Bing Xiang", "tldr": "", "abstract": "We present two new benchmarks, MBXP and Multilingual HumanEval, designed to evaluate code completion models in over 10 programming languages. These datasets are generated using a conversion framework that transpiles prompts and test cases from the original MBPP and HumanEval datasets into the corresponding data in the target language. By using these benchmarks, we are able to assess the performance of code generation models in a multi-lingual fashion, and discovered generalization ability of language models on out-of-domain languages, advantages of multi-lingual models over mono-lingual, the ability of few-shot prompting to teach the model new languages, and zero-shot translation abilities. In addition, we use our code generation model to perform large-scale bootstrapping to obtain synthetic canonical solutions in several languages, which can be used for other code-related evaluations such as code insertion, robustness, or summarization tasks.", "keywords": "code generation;execution-based evaluation;test-based evaluation;language models;multi-lingual code generation benchmark;code insertion;code summarization;robustness for code;code translation;zero-shot code translation;multi-lingual;mono-lingual;language models.", "primary_area": "", "supplementary_material": "", "author": "Ben Athiwaratkun;Sanjay Krishna Gouda;Zijian Wang;Xiaopeng Li;Yuchen Tian;Ming Tan;Wasi Uddin Ahmad;Shiqi Wang;Qing Sun;Mingyue Shang;Sujan Kumar Gonugondla;Hantian Ding;Varun Kumar;Nathan Fulton;Arash Farahani;Siddhartha Jain;Robert Giaquinto;Haifeng Qian;Murali Krishna Ramanathan;Ramesh Nallapati;Baishakhi Ray;Parminder Bhatia;Sudipta Sengupta;Dan Roth;Bing Xiang", "authorids": "~Ben_Athiwaratkun1;~Sanjay_Krishna_Gouda1;~Zijian_Wang1;~Xiaopeng_Li1;tiayuche@amazon.com;~Ming_Tan2;~Wasi_Uddin_Ahmad1;~Shiqi_Wang2;~Qing_Sun2;~Mingyue_Shang1;~Sujan_Kumar_Gonugondla1;~Hantian_Ding1;~Varun_Kumar3;~Nathan_Fulton2;~Arash_Farahani1;~Siddhartha_Jain1;~Robert_Giaquinto1;~Haifeng_Qian1;~Murali_Krishna_Ramanathan1;~Ramesh_Nallapati1;~Baishakhi_Ray2;~Parminder_Bhatia1;sudipta@amazon.com;~Dan_Roth3;~Bing_Xiang2", "gender": "M;M;;M;;;M;M;F;;;M;M;M;M;M;;M;M;M;F;M;;M;", "homepage": "https://benathi.github.io;;;http://eelxpeng.github.io/;;https://www.linkedin.com/in/ming-tan-18b3436a/;http://wasiahmad.github.io/;https://shiqi-wang.github.io;https://computing.ece.vt.edu/~sunqing/;;https://gsujankumar.github.io;;https://varunkumar-dev.github.io/;https://nfulton.org;;https://tmfs10.github.io/;https://www-users.cs.umn.edu/~smit7982/;https://sites.google.com/view/haifengqian;;;http://rayb.info/;;;https://www.cis.upenn.edu/~danroth/;", "dblp": "166/1659;;;;;;183/0576;58/9145-2;https://dblp.uni-trier.de/pers/hd/s/Sun:Qing;;166/6408.html;242/8095;;120/0131.html;;81/8212;213/8085;61/6767;75/541.html;59/4797;74/1969;168/8615;;r/DanRoth;", "google_scholar": "KZpZTTQAAAAJ;_zJ8IOEAAAAJ;;https://scholar.google.com.hk/citations?user=vUZu9msAAAAJ;;;YCHJZOMAAAAJ;u_MzXeMAAAAJ;sSlAO5sAAAAJ;;F_ud9E4AAAAJ;nEuMO58AAAAJ;d-La2lQAAAAJ;E9HcZ6YAAAAJ;ZAmO1WAAAAAJ;mBJIa8cAAAAJ;s7MpvDUAAAAJ;https://scholar.google.com/citations?hl=en;;;https://scholar.google.com.tw/citations?user=VaAEb5YAAAAJ;;;E-bpPWgAAAAJ;A6yjdJAAAAAJ", "orcid": ";;;;;;;0000-0002-6338-1432;;;0000-0003-4743-6461;;;;;;;0000-0002-7189-6903;;;;;;;", "linkedin": ";;;eelxpeng/;;;ahmadwasi/;tcwangshiqi/;;;sujan-kumar-gonugondla-ab6787142/;;varunin/;;arashfarahani/;;;haifengqian;;;;;;dan-roth-8667361/;", "or_profile": "~Ben_Athiwaratkun1;~Sanjay_Krishna_Gouda1;~Zijian_Wang1;~Xiaopeng_Li1;tiayuche@amazon.com;~Ming_Tan2;~Wasi_Uddin_Ahmad1;~Shiqi_Wang2;~Qing_Sun2;~Mingyue_Shang1;~Sujan_Kumar_Gonugondla1;~Hantian_Ding1;~Varun_Kumar3;~Nathan_Fulton2;~Arash_Farahani1;~Siddhartha_Jain1;~Robert_Giaquinto1;~Haifeng_Qian1;~Murali_Krishna_Ramanathan1;~Ramesh_Nallapati1;~Baishakhi_Ray2;~Parminder_Bhatia1;sudipta@amazon.com;~Dan_Roth3;~Bing_Xiang2", "aff": "Amazon;Amazon;;Amazon;;Amazon;Amazon;Amazon;Amazon;;Amazon;Amazon;Amazon;;Amazon;Amazon;Amazon;Amazon;Amazon;Amazon Web Services;Columbia University;Amazon;;Amazon;Goldman Sachs", "aff_domain": "amazon.com;amazon.com;;amazon.com;;amazon.com;amazon.com;amazon.com;amazon.com;;amazon.com;amazon.com;amazon.com;;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;columbia.edu;amazon.com;;amazon.com;gs.com", "position": "AI Scientist;Researcher;;Applied Scientist;;Applied Scientist;Applied Scientist;Researcher;Researcher;;Researcher;Researcher;Senior Applied Scientist;;Researcher;Applied Scientist;Researcher;Senior Applied Scientist;Principal Researcher;Senior Principal Scientist;Assistant Professor;Principal Researcher;;VP and Distinguished Scientist;Managing Director", "bibtex": "@inproceedings{\nathiwaratkun2023multilingual,\ntitle={Multi-lingual Evaluation of Code Generation Models},\nauthor={Ben Athiwaratkun and Sanjay Krishna Gouda and Zijian Wang and Xiaopeng Li and Yuchen Tian and Ming Tan and Wasi Uddin Ahmad and Shiqi Wang and Qing Sun and Mingyue Shang and Sujan Kumar Gonugondla and Hantian Ding and Varun Kumar and Nathan Fulton and Arash Farahani and Siddhartha Jain and Robert Giaquinto and Haifeng Qian and Murali Krishna Ramanathan and Ramesh Nallapati and Baishakhi Ray and Parminder Bhatia and Sudipta Sengupta and Dan Roth and Bing Xiang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Bo7eeXm6An8}\n}", "github": "", "project": "", "reviewers": "DMjf;3QeX;rPgV;n1wH", "pdf_size": 2166353, "recommendation": "6;6;8;8", "confidence": "4;3;4;4", "correctness": "3;3;4;4", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "86;76;196;134", "wc_strength_and_weaknesses": "271;106;334;53", "wc_clarity_quality_novelty_and_reproducibility": "57;49;170;18", "wc_summary_review": "44;62;132;17", "wc_review": "458;293;832;222", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "430;301;966;145", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 123.0, 47.50789408087881 ], "wc_strength_and_weaknesses_avg": [ 191.0, 115.2367129000129 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.5, 57.586890869363664 ], "wc_summary_review_avg": [ 63.75, 42.5345447842104 ], "wc_review_avg": [ 451.25, 235.91033784046004 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 460.5, 308.80454983694784 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 25, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 137, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6842308431728854839&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=Bo7eeXm6An8", "email": "amazon.com;amazon.com;;amazon.com;;amazon.com;amazon.com;amazon.com;amazon.com;;amazon.com;amazon.com;amazon.com;;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;columbia.edu;amazon.com;;amazon.com;gs.com", "author_num": 25, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;2", "aff_unique_norm": "Amazon;Columbia University;Goldman Sachs", "aff_unique_dep": "Amazon.com, Inc.;;", "aff_unique_url": "https://www.amazon.com;https://www.columbia.edu;https://www.goldmansachs.com", "aff_unique_abbr": "Amazon;Columbia;GS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Bq1-IOPKet", "title": "Optimal Transport-Based Supervised Graph Summarization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph summarization is the problem of producing smaller graph representations of an input graph dataset, in such a way that\n the smaller ``compressed'' graphs capture relevant structural information for downstream tasks. One graph summarization\n method, recently proposed in Garg & Jaakkola (2019), formulates an optimal transport-based framework that allows prior information\n about node, edge, and attribute importance to be incorporated into the graph summarization process. We extend the optimal transport framework to a supervised graph summarization setting, wherein we seek to preserve relevant information about a class label. We first formulate the problem in terms of maximizing the mutual information between the summarized graph and the class label. We then propose a method that incorporates mutual information estimates between random variables associated with sample graphs and class labels into\n the optimal transport compression framework from Garg & Jaakkola (2019). We empirically show performance improvements over the previous work by Garg & Jaakkola (2019), in terms of classification and compression on synthetic and real datasets. We then theoretically show limitations of the optimal transport approach: e.g., that it fails to satisfy a certain desirable information monotonicity property. ", "keywords": "Graph Summarization;Optimal Transport;Supervised Learning;Mutual Information", "primary_area": "", "supplementary_material": "/attachment/0487504f29ae25245f5ed1027821911ddceb70e9.zip", "author": "Sepideh Neshatfar;Abram Magner;Salimeh Yasaei Sekeh", "authorids": "~Sepideh_Neshatfar1;~Abram_Magner1;~Salimeh_Yasaei_Sekeh1", "gender": "F;;F", "homepage": "https://sepidehnsf.github.io/;https://www.albany.edu/faculty/amagner;https://www.salimeh.info", "dblp": ";141/2242;151/6376", "google_scholar": ";;s17L6jAAAAAJ", "orcid": ";;", "linkedin": "sepideh-neshatfar-26922b186;;", "or_profile": "~Sepideh_Neshatfar1;~Abram_Magner1;~Salimeh_Yasaei_Sekeh1", "aff": "University of Maine;State University of New York, Albany;University of Maine", "aff_domain": "umaine.edu;albany.edu;umaine.edu", "position": "PhD student;Assistant professor;Assistant Professor", "bibtex": "@misc{\nneshatfar2023optimal,\ntitle={Optimal Transport-Based Supervised Graph Summarization},\nauthor={Sepideh Neshatfar and Abram Magner and Salimeh Yasaei Sekeh},\nyear={2023},\nurl={https://openreview.net/forum?id=Bq1-IOPKet}\n}", "github": "", "project": "", "reviewers": "9TND;MXgV;u1qZ;atLk", "site": "https://openreview.net/forum?id=Bq1-IOPKet", "pdf_size": 363492, "recommendation": "3;5;6;8", "confidence": "3;2;3;3", "correctness": "2;4;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "21;35;56;86", "wc_strength_and_weaknesses": "27;443;97;366", "wc_clarity_quality_novelty_and_reproducibility": "66;38;23;6", "wc_summary_review": "17;42;15;6", "wc_review": "131;558;191;464", "wc_reply_reviewers": "331;51;57;14", "wc_reply_authors": "1213;560;420;738", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 49.5, 24.479583329787296 ], "wc_strength_and_weaknesses_avg": [ 233.25, 175.1576075995559 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.25, 22.038318901404434 ], "wc_summary_review_avg": [ 20.0, 13.360389215887388 ], "wc_review_avg": [ 336.0, 179.38645433811328 ], "wc_reply_reviewers_avg": [ 113.25, 126.7919062874283 ], "wc_reply_authors_avg": [ 732.75, 299.3003299363367 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.16012815380508713, "corr_recommendation_correctness": 0.39223227027636803, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:x9Gusz5eN30J:scholar.google.com/&scioq=Optimal+Transport-Based+Supervised+Graph+Summarization&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Maine;State University of New York", "aff_unique_dep": ";", "aff_unique_url": "https://www.umaine.edu;https://www.albany.edu", "aff_unique_abbr": "UMaine;SUNY Albany", "aff_campus_unique_index": "1", "aff_campus_unique": ";Albany", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "SIMPLE: Specialized Model-Sample Matching for Domain Generalization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12235", "id": "BqrPeZ_e5P", "poster": "/media/PosterPDFs/ICLR%202023/12235.png?t=1682527238.5141883", "openreview": "https://openreview.net/forum?id=BqrPeZ_e5P", "slides": "https://iclr.cc/virtual/2023/poster/12235", "video": "https://iclr.cc/virtual/2023/poster/12235", "author_site": "Ziyue Li, Kan Ren, XINYANG JIANG, Yifei Shen, Haipeng Zhang, Dongsheng Li", "tldr": "", "abstract": "In domain generalization (DG), most existing methods aspire to fine-tune a specific pretrained model through novel DG algorithms. In this paper, we propose an alternative direction, i.e., to efficiently leverage a pool of pretrained models without fine-tuning. Through extensive empirical and theoretical evidence, we demonstrate that (1) pretrained models have possessed generalization to some extent while there is no single best pretrained model across all distribution shifts, and (2) out-of-distribution (OOD) generalization error depends on the fitness between the pretrained model and unseen test distributions. This analysis motivates us to incorporate diverse pretrained models and to dispatch the best matched models for each OOD sample by means of recommendation techniques. To this end, we propose SIMPLE, a specialized model-sample matching method for domain generalization. First, the predictions of pretrained models are adapted to the target domain by a linear label space transformation. A matching network aware of model specialty is then proposed to dynamically recommend proper pretrained models to predict each test sample. The experiments on DomainBed show that our method achieves significant performance improvements (up to 12.2% for individual dataset and 3.9% on average) compared to state-of-the-art (SOTA) methods and further achieves 6.1% gain via enlarging the pretrained model pool. Moreover, our method is highly efficient and achieves more than 1000 times training speedup compared to the conventional DG methods with fine-tuning a pretrained model. Code and supplemental materials are available at https://seqml.github.io/simple.", "keywords": "domain generalization;ensemble learning;pretrained model", "primary_area": "", "supplementary_material": "", "author": "Ziyue Li;Kan Ren;XINYANG JIANG;Yifei Shen;Haipeng Zhang;Dongsheng Li", "authorids": "~Ziyue_Li1;~Kan_Ren1;~XINYANG_JIANG2;~Yifei_Shen1;~Haipeng_Zhang3;~Dongsheng_Li2", "gender": "F;M;M;M;M;M", "homepage": "https://litzy0619.github.io/;https://saying.ren;;https://openreview.net/profile?id=~Yifei_Shen1;https://faculty.sist.shanghaitech.edu.cn/zhanghp/;http://recmind.cn", "dblp": ";28/7458;155/6316;51/609.html;;254/0830-2.html", "google_scholar": "NQVzCSkAAAAJ;USnQVWgAAAAJ;JiTfWVMAAAAJ;;377DmKgAAAAJ;VNg5rA8AAAAJ", "orcid": ";;;;;0000-0003-3103-8442", "linkedin": "litzyli/;;xinyang-jiang-ab5416b0/;;;", "or_profile": "~Ziyue_Li1;~Kan_Ren1;~XINYANG_JIANG2;~Yifei_Shen1;~Haipeng_Zhang3;~Dongsheng_Li2", "aff": "University of Maryland, College Park;Microsoft;Microsoft;Microsoft Research Asia;ShanghaiTech University;Microsoft Research Asia", "aff_domain": "umd.edu;microsoft.com;microsoft.com;microsoft.com;shanghaitech.edu.cn;microsoft.com", "position": "PhD student;Researcher;Senior Researcher;Research Cheerleader;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nli2023simple,\ntitle={{SIMPLE}: Specialized Model-Sample Matching for Domain Generalization},\nauthor={Ziyue Li and Kan Ren and XINYANG JIANG and Yifei Shen and Haipeng Zhang and Dongsheng Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=BqrPeZ_e5P}\n}", "github": "", "project": "", "reviewers": "ySiZ;qJrT;At6g;KF3m", "pdf_size": 4634833, "recommendation": "5;5;6;8", "confidence": "3;4;4;5", "correctness": "2;3;3;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;4;4", "wc_summary_paper": "64;71;31;127", "wc_strength_and_weaknesses": "103;282;479;121", "wc_clarity_quality_novelty_and_reproducibility": "121;96;34;79", "wc_summary_review": "31;69;87;98", "wc_review": "319;518;631;425", "wc_reply_reviewers": "24;0;18;0", "wc_reply_authors": "1159;1262;1723;655", "reply_reviewers": "1;0;1;0", "reply_authors": "3;3;5;2", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 73.25, 34.513584282134474 ], "wc_strength_and_weaknesses_avg": [ 246.25, 151.3759805913739 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 82.5, 31.737202145116697 ], "wc_summary_review_avg": [ 71.25, 25.439880109780393 ], "wc_review_avg": [ 473.25, 115.11814583287901 ], "wc_reply_reviewers_avg": [ 10.5, 10.712142642814275 ], "wc_reply_authors_avg": [ 1199.75, 379.499258892557 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.8660254037844386, "corr_recommendation_correctness": 0.8660254037844386, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4845196712867668290&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=BqrPeZ_e5P", "email": "umd.edu;microsoft.com;microsoft.com;microsoft.com;shanghaitech.edu.cn;microsoft.com", "author_num": 6, "aff_unique_index": "0;1;1;1;2;1", "aff_unique_norm": "University of Maryland;Microsoft;ShanghaiTech University", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www/umd.edu;https://www.microsoft.com;https://www.shanghaitech.edu.cn", "aff_unique_abbr": "UMD;Microsoft;ShanghaiTech", "aff_campus_unique_index": "0;2;2", "aff_campus_unique": "College Park;;Asia", "aff_country_unique_index": "0;0;0;1;1;1", "aff_country_unique": "United States;China" }, { "id": "BqxE86ufTzq", "title": "Gradient Estimation for Unseen Domain Risk Minimization with Pre-Trained Models", "track": "main", "status": "Reject", "tldr": "Gradient Estimation for Unseen Domain Risk Minimization with Pre-Trained Models", "abstract": "Domain generalization aims to build generalized models that perform well on unseen domains when only source domains are available for model optimization. Recent studies have demonstrated that large-scale pre-trained models could play an important role in domain generalization by providing their generalization power. However, large-scale pre-trained models are not fully equipped with target task-specific knowledge due to a discrepancy between the pre-training objective and the target task. Although the task-specific knowledge could be learned from source domains by fine-tuning, this hurts the generalization power of the pre-trained models because of gradient bias toward the source domains. To address this issue, we propose a new domain generalization method that estimates unobservable gradients that reduce potential risks in unseen domains, using a large-scale pre-trained model. Our proposed method allows the pre-trained model to learn task-specific knowledge further while preserving its generalization ability with the estimated gradients. Experimental results show that our proposed method outperforms baseline methods on DomainBed, a standard benchmark in domain generalization. We also provide extensive analyses to demonstrate that the estimated unobserved gradients relieve the gradient bias, and the pre-trained model learns the task-specific knowledge without sacrificing its generalization power.", "keywords": "domain generalization;gradient estimation;pre-trained models", "primary_area": "", "supplementary_material": "/attachment/c45e29b747e7c45ad0e1d704cc12c95d395a6163.zip", "author": "Byounggyu Lew;Donghyun Son;Buru Chang", "authorids": "~Byounggyu_Lew1;~Donghyun_Son2;~Buru_Chang1", "gender": "M;M;Not Specified", "homepage": ";https://dhdroid.github.io/;https://sites.google.com/view/buru-chang", "dblp": ";;221/3390", "google_scholar": ";;https://scholar.google.co.kr/citations?hl=ko", "orcid": ";;0000-0002-7595-9035", "linkedin": "byounggyu-lew-098663193;;", "or_profile": "~Byounggyu_Lew1;~Donghyun_Son2;~Buru_Chang1", "aff": "Hyperconnect;Seoul National University;Hyperconnect", "aff_domain": "hpcnt.com;snu.ac.kr;hpcnt.com", "position": "Researcher;Undergrad student;Research Scientist", "bibtex": "@misc{\nlew2023gradient,\ntitle={Gradient Estimation for Unseen Domain Risk Minimization with Pre-Trained Models},\nauthor={Byounggyu Lew and Donghyun Son and Buru Chang},\nyear={2023},\nurl={https://openreview.net/forum?id=BqxE86ufTzq}\n}", "github": "", "project": "", "reviewers": "e3A6;TqRb;ENaU;seAG", "site": "https://openreview.net/forum?id=BqxE86ufTzq", "pdf_size": 2650090, "recommendation": "5;5;5;5", "confidence": "4;4;3;3", "correctness": "3;3;3;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "116;48;55;64", "wc_strength_and_weaknesses": "136;187;210;392", "wc_clarity_quality_novelty_and_reproducibility": "54;30;20;69", "wc_summary_review": "103;63;52;27", "wc_review": "409;328;337;552", "wc_reply_reviewers": "0;0;0;59", "wc_reply_authors": "212;327;765;596", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;2;2", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 70.75, 26.733639856929322 ], "wc_strength_and_weaknesses_avg": [ 231.25, 96.59548384888394 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.25, 19.330998422223306 ], "wc_summary_review_avg": [ 61.25, 27.407799984675894 ], "wc_review_avg": [ 406.5, 89.67859276326764 ], "wc_reply_reviewers_avg": [ 14.75, 25.54774941164094 ], "wc_reply_authors_avg": [ 475.0, 217.83824273988256 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=137441493589925407&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "Hyperconnect;Seoul National University", "aff_unique_dep": ";", "aff_unique_url": ";https://www.snu.ac.kr", "aff_unique_abbr": ";SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";South Korea" }, { "title": "Sub-Task Decomposition Enables Learning in Sequence to Sequence Tasks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11932", "id": "BrJATVZDWEH", "poster": "/media/PosterPDFs/ICLR%202023/11932.png?t=1682083531.0113723", "openreview": "https://openreview.net/forum?id=BrJATVZDWEH", "slides": "https://iclr.cc/virtual/2023/poster/11932", "video": "https://iclr.cc/virtual/2023/poster/11932", "author_site": "Noam Wies, Yoav Levine, Amnon Shashua", "tldr": "", "abstract": "The field of Natural Language Processing (NLP) has experienced a dramatic leap in capabilities with the recent introduction of huge Language Models (LMs). Despite this success, natural language problems that involve several compounded steps are still practically unlearnable, even by the largest LMs. This complies with experimental failures for end-to-end learning of composite problems that were demonstrated in a variety of domains. An effective mitigation is to introduce intermediate supervision for solving sub-tasks of the compounded problem. Recently, several works have demonstrated high gains by taking a straightforward approach for incorporating intermediate supervision in compounded natural language problems: the sequence-to-sequence LM is fed with an augmented input, in which the decomposed tasks' labels are simply concatenated to the original input. In this paper, we prove a positive learning result that motivates these recent efforts. We show that when concatenating intermediate supervision to the input and training a sequence-to-sequence model on this modified input, unlearnable composite problems can become learnable. We show that this is true for any family of tasks which on the one hand, are unlearnable, and on the other hand, can be decomposed into a polynomial number of simple sub-tasks, each of which depends only on $O(1)$ previous sub-task results. Beyond motivating contemporary empirical efforts for incorporating intermediate supervision in sequence-to-sequence language models, our positive theoretical result is the first of its kind in the landscape of results on the benefits of intermediate supervision for neural-network learning: Until now, all theoretical results on the subject are negative, i.e., show cases where learning is impossible without intermediate supervision, while our result is positive, showing that learning is facilitated in the presence of intermediate supervision.", "keywords": "Language Models;Generalization;End-to-End;Composition", "primary_area": "", "supplementary_material": "/attachment/7abb6957d19aed40d556cb7a4974504f766fa889.zip", "author": "Noam Wies;Yoav Levine;Amnon Shashua", "authorids": "~Noam_Wies1;~Yoav_Levine1;~Amnon_Shashua1", "gender": "M;M;M", "homepage": ";;http://www.cs.huji.ac.il/~shashua/", "dblp": "236/6106;199/1895;47/1492", "google_scholar": "https://scholar.google.co.il/citations?user=FxlR8voAAAAJ;;https://scholar.google.com.tw/citations?user=dwi5wvYAAAAJ", "orcid": "0000-0002-1337-2298;;", "linkedin": "noam-wies-a5ab1663/;;", "or_profile": "~Noam_Wies1;~Yoav_Levine1;~Amnon_Shashua1", "aff": "Hebrew University of Jerusalem;AI21 Labs;Hebrew University, Hebrew University of Jerusalem", "aff_domain": "huji.ac.il;ai21.com;cs.huji.ac.il", "position": "PhD student;Principal Researcher;Professor", "bibtex": "@inproceedings{\nwies2023subtask,\ntitle={Sub-Task Decomposition Enables Learning in Sequence to Sequence Tasks},\nauthor={Noam Wies and Yoav Levine and Amnon Shashua},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=BrJATVZDWEH}\n}", "github": "", "project": "", "reviewers": "iwZz;f3DQ;Qt7K;G5w1;T2bv", "pdf_size": 933650, "recommendation": "5;6;6;8;8", "confidence": "2;2;3;3;3", "correctness": "4;3;4;4;4", "technical_novelty": "3;3;3;3;4", "empirical_novelty": "2;3;3;0;3", "wc_summary_paper": "33;106;129;248;264", "wc_strength_and_weaknesses": "127;117;151;31;202", "wc_clarity_quality_novelty_and_reproducibility": "8;64;71;11;20", "wc_summary_review": "39;36;61;20;72", "wc_review": "207;323;412;310;558", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "645;199;228;130;318", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 6.6, 1.2 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "correctness_avg": [ 3.8, 0.39999999999999997 ], "technical_novelty_avg": [ 3.2, 0.39999999999999997 ], "empirical_novelty_avg": [ 2.2, 1.16619037896906 ], "wc_summary_paper_avg": [ 156.0, 87.73368794254576 ], "wc_strength_and_weaknesses_avg": [ 125.6, 55.69057370866276 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.8, 27.080620376941145 ], "wc_summary_review_avg": [ 45.6, 18.575252353602092 ], "wc_review_avg": [ 362.0, 117.61462494094857 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 304.0, 180.86127280321787 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.748455199183749, "corr_recommendation_correctness": 0.24999999999999997, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4958960567825191206&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=BrJATVZDWEH", "email": "huji.ac.il;ai21.com;cs.huji.ac.il", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Hebrew University of Jerusalem;AI21 Labs", "aff_unique_dep": ";", "aff_unique_url": "https://www.huji.ac.il;https://www.ai21labs.com", "aff_unique_abbr": "HUJI;AI21", "aff_campus_unique_index": "0", "aff_campus_unique": "Jerusalem;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "id": "BrKY4Wr6dk2", "title": "Revisiting Activation Function Design for Improving Adversarial Robustness at Scale", "track": "main", "status": "Reject", "tldr": "ReLU significantly weakens adversarial training, but its smooth approximations can fix this issue", "abstract": "Modern ConvNets typically use ReLU activation function. Recently smooth activation functions have been used to improve their accuracy. Here we study the role of smooth activation function from the perspective of adversarial robustness. We find that ReLU activation function significantly weakens adversarial training due to its non-smooth nature. Replacing ReLU with its smooth alternatives allows adversarial training to find harder adversarial training examples and to compute better gradient updates for network optimization.\n\nWe focus our study on the large-scale ImageNet dataset. On ResNet-50, switching from ReLU to the smooth activation function SILU improves adversarial robustness from 33.0% to 42.3%, while also improving accuracy by 0.9% on ImageNet. Smooth activation functions also scale well with larger networks: it helps EfficientNet-L1 to achieve 82.2% accuracy and 58.6% robustness, largely outperforming the previous state-of-the-art defense by 9.5% for accuracy and 11.6% for robustness. Models are available at https://rb.gy/qt8jya.", "keywords": "adversarial training;activation function;neural network architecture", "primary_area": "", "supplementary_material": "", "author": "Cihang Xie;Mingxing Tan;Boqing Gong;Alan Yuille;Quoc V Le", "authorids": "~Cihang_Xie3;~Mingxing_Tan3;~Boqing_Gong1;~Alan_Yuille1;~Quoc_V_Le1", "gender": "M;M;M;M;M", "homepage": ";http://boqinggong.info;;;https://cihangxie.github.io/", "dblp": "11/7863;29/7457;y/AlanLYuille;29/6166;175/3366", "google_scholar": "6POeyBoAAAAJ;lv9ZeVUAAAAJ;;;X3vVZPcAAAAJ", "orcid": ";;;;", "linkedin": "mingxing-tan-2724551b/;boqing-gong-46aa5821/;;;", "or_profile": "~Mingxing_Tan3;~Boqing_Gong1;~Alan_Yuille1;~Quoc_V_Le1;~cihang_xie1", "aff": "Google/Waymo;Google;Johns Hopkins University;Google;University of California, Santa Cruz", "aff_domain": "google.com;google.com;johnshopkins.edu;google.com;ucsc.edu", "position": "Researcher;Research Scientist;Full Professor;Scientist;Assistant Professor", "bibtex": "@misc{\nxie2023revisiting,\ntitle={Revisiting Activation Function Design for Improving Adversarial Robustness at Scale},\nauthor={Cihang Xie and Mingxing Tan and Boqing Gong and Alan Yuille and Quoc V Le},\nyear={2023},\nurl={https://openreview.net/forum?id=BrKY4Wr6dk2}\n}", "github": "", "project": "", "reviewers": "zKRu;U7PC;yesB;qQzz", "site": "https://openreview.net/forum?id=BrKY4Wr6dk2", "pdf_size": 1091620, "recommendation": "3;3;5;5", "confidence": "5;4;3;4", "correctness": "2;3;4;3", "technical_novelty": "1;2;3;2", "empirical_novelty": "2;2;4;3", "wc_summary_paper": "68;37;16;59", "wc_strength_and_weaknesses": "211;247;423;384", "wc_clarity_quality_novelty_and_reproducibility": "63;12;111;123", "wc_summary_review": "35;55;97;46", "wc_review": "377;351;647;612", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 45.0, 20.18662923818635 ], "wc_strength_and_weaknesses_avg": [ 316.25, 89.24509790459082 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 77.25, 43.85416171813115 ], "wc_summary_review_avg": [ 58.25, 23.466731770743024 ], "wc_review_avg": [ 496.75, 133.64201248110567 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.7071067811865475, "corr_recommendation_correctness": 0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hhGZFe1l3ioJ:scholar.google.com/&scioq=Revisiting+Activation+Function+Design+for+Improving+Adversarial+Robustness+at+Scale&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Google;Johns Hopkins University;University of California, Santa Cruz", "aff_unique_dep": "Waymo;;", "aff_unique_url": "https://www.google.com;https://www.jhu.edu;https://www.ucsc.edu", "aff_unique_abbr": "Google;JHU;UCSC", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Mountain View;Santa Cruz", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "BsxMeLGAmU", "title": "Likelihood adjusted semidefinite programs for clustering heterogeneous data", "track": "main", "status": "Reject", "tldr": "We propose an iterative likelihood adjusted SDP for clustering under data heterogeneity.", "abstract": "Clustering is a widely deployed unsupervised learning tool. Model-based clustering is a flexible framework to tackle data heterogeneity when the clusters have different shapes. Likelihood-based inference for mixture distributions often involves non-convex and high-dimensional objective functions, imposing difficult computational and statistical challenges. The classic expectation-maximization (EM) algorithm is a computationally thrifty iterative method that maximizes a surrogate function minorizing the log-likelihood of observed data in each iteration, which however suffers from bad local maxima even in the special case of the standard Gaussian mixture model with common isotropic covariance matrices. On the other hand, recent studies reveal that the unique global solution of a semidefinite programming (SDP) relaxed $K$-means achieves the information-theoretically sharp threshold for perfectly recovering the cluster labels under the standard Gaussian mixture model. In this paper, we extend the SDP approach to a general setting by integrating cluster labels as model parameters and propose an iterative likelihood adjusted SDP (iLA-SDP) method that directly maximizes the \\emph{exact} observed likelihood in the presence of data heterogeneity. By lifting the cluster assignment to group-specific membership matrices, iLA-SDP avoids centroids estimation -- a key feature that allows exact recovery under well-separateness of centroids without being trapped by their adversarial configurations. Thus iLA-SDP is less sensitive than EM to initialization and more stable on high-dimensional data. Our numeric experiments demonstrate that iLA-SDP can achieve lower mis-clustering errors over several widely used clustering methods including $K$-means, SDP and EM algorithms.", "keywords": "clustering;likelihood inference;semidefinite programming;alternating maximization", "primary_area": "", "supplementary_material": "/attachment/13b7db2dae259219c20a7d0390dada4ebeed776d.zip", "author": "Yubo Zhuang;Xiaohui Chen;Yun Yang", "authorids": "~Yubo_Zhuang1;~Xiaohui_Chen3;~Yun_Yang4", "gender": "M;M;M", "homepage": ";https://the-xiaohuichen.github.io/;https://www-math.umd.edu/people/faculty/item/1811-yy84.html", "dblp": ";;", "google_scholar": ";ZKij_0cAAAAJ;FY_UnPAAAAAJ", "orcid": ";;", "linkedin": "yubo-zhuang-09b282192/;xiaohui-chen-b67677a0/;", "or_profile": "~Yubo_Zhuang1;~Xiaohui_Chen3;~Yun_Yang4", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;illinois.edu;illinois.edu", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@misc{\nzhuang2023likelihood,\ntitle={Likelihood adjusted semidefinite programs for clustering heterogeneous data},\nauthor={Yubo Zhuang and Xiaohui Chen and Yun Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=BsxMeLGAmU}\n}", "github": "", "project": "", "reviewers": "FBAZ;BdFu;cBBj", "site": "https://openreview.net/forum?id=BsxMeLGAmU", "pdf_size": 1478081, "recommendation": "3;5;5", "confidence": "2;3;3", "correctness": "4;3;3", "technical_novelty": "2;3;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "18;76;90", "wc_strength_and_weaknesses": "146;785;279", "wc_clarity_quality_novelty_and_reproducibility": "24;84;39", "wc_summary_review": "27;64;24", "wc_review": "215;1009;432", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "140;763;448", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 61.333333333333336, 31.169785940162562 ], "wc_strength_and_weaknesses_avg": [ 403.3333333333333, 275.2869210276596 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.0, 25.495097567963924 ], "wc_summary_review_avg": [ 38.333333333333336, 18.190351532856337 ], "wc_review_avg": [ 552.0, 335.0711367257208 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 450.3333333333333, 254.344036472037 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.9999999999999998, "corr_recommendation_correctness": -0.9999999999999998, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17351086611566894814&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Bvaekygzl2m", "title": "Strength-Adaptive Adversarial Training", "track": "main", "status": "Reject", "tldr": "", "abstract": "Adversarial training (AT) is proved to reliably improve network's robustness against adversarial data. However, current AT with a pre-specified perturbation budget has limitations in learning a robust network. Firstly, applying a pre-specified perturbation budget on networks of various model capacities will yield divergent degree of robustness disparity between natural and robust accuracies, which deviates from robust network's desideratum. Secondly, the attack strength of adversarial training data constrained by the pre-specified perturbation budget fails to upgrade as the growth of network robustness, which leads to robust overfitting and further degrades the adversarial robustness. To overcome these limitations, we propose Strength-Adaptive Adversarial Training (SAAT). Specifically, the adversary employs an adversarial loss constraint to generate adversarial training data. Under this constraint, the perturbation budget will be adaptively adjusted according to the training state of adversarial data, which can effectively avoid robust overfitting. Besides, SAAT explicitly constrains the attack strength of training data through the adversarial loss, which manipulates model capacity scheduling during training, and thereby can flexibly control the degree of robustness disparity and adjust the tradeoff between natural accuracy and robustness. Extensive experiments show that our proposal boosts the robustness of adversarial training. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Chaojian Yu;Dawei Zhou;Li Shen;Jun Yu;Bo Han;Mingming Gong;Nannan Wang;Tongliang Liu", "authorids": "~Chaojian_Yu1;~Dawei_Zhou3;~Li_Shen1;~Jun_Yu3;~Bo_Han1;~Mingming_Gong1;~Nannan_Wang1;~Tongliang_Liu1", "gender": "M;M;M;M;M;M;M;M", "homepage": ";https://sites.google.com/site/mathshenli/home;https://faculty.ustc.edu.cn/yujun_AI/en/index.htm;https://mingming-gong.github.io/;;https://tongliang-liu.github.io/;https://bhanml.github.io/;", "dblp": "223/9872;91/3680-8;50/5754-1.html;98/8479;10/8359-1;150/6667;241/0472-3;39/3130-4", "google_scholar": "b3ltuG8AAAAJ;yVhgENIAAAAJ;efZyqyQAAAAJ;https://scholar.google.com.au/citations?user=6BmiCJIAAAAJ;SRBn7oUAAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;nTNjqHwAAAAJ;https://scholar.google.com.hk/citations?user=7H-LIigAAAAJ", "orcid": ";;0000-0002-3197-8103;0000-0001-7147-5589;;;;0000-0002-0694-3603", "linkedin": ";;;;;;;", "or_profile": "~Chaojian_Yu1;~Li_Shen1;~Jun_Yu3;~Mingming_Gong1;~Nannan_Wang1;~Tongliang_Liu1;~bo_han2;~Zhou_Dawei1", "aff": "The University of Sydney;JD Explore Academy;University of Science and Technology of China;University of Melbourne;Xidian University;University of Sydney;RIKEN;Xidian University", "aff_domain": "uni.sydney.edu.au;jd.com;ustc.edu.cn;unimelb.edu.au;xidian.edu.cn;sydney.edu.au;riken.jp;xidian.edu.cn", "position": "PhD student;Researcher;Associate Professor;Assistant Professor;Full Professor;Lecturer;Adjunct Scientist;PhD student", "bibtex": "@misc{\nyu2023strengthadaptive,\ntitle={Strength-Adaptive Adversarial Training},\nauthor={Chaojian Yu and Dawei Zhou and Li Shen and Jun Yu and Bo Han and Mingming Gong and Nannan Wang and Tongliang Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=Bvaekygzl2m}\n}", "github": "", "project": "", "reviewers": "snup;e7uD;HbG6;AoDK", "site": "https://openreview.net/forum?id=Bvaekygzl2m", "pdf_size": 1490380, "recommendation": "3;3;3;5", "confidence": "4;4;4;4", "correctness": "3;2;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "0;2;3;2", "wc_summary_paper": "55;56;87;27", "wc_strength_and_weaknesses": "304;220;116;119", "wc_clarity_quality_novelty_and_reproducibility": "20;85;75;7", "wc_summary_review": "30;23;14;45", "wc_review": "409;384;292;198", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1103;823;615;694", "reply_reviewers": "0;0;0;0", "reply_authors": "4;4;3;3", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 56.25, 21.22940178149163 ], "wc_strength_and_weaknesses_avg": [ 189.75, 78.12289997177524 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.75, 33.751851801049376 ], "wc_summary_review_avg": [ 28.0, 11.335784048754634 ], "wc_review_avg": [ 320.75, 83.1906695489344 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 808.75, 185.40007416395497 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9565202295494464712&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3;4;0;5;4", "aff_unique_norm": "University of Sydney;JD;University of Science and Technology of China;University of Melbourne;Xidian University;RIKEN", "aff_unique_dep": ";JD Explore Academy;;;;", "aff_unique_url": "https://www.sydney.edu.au;;http://www.ustc.edu.cn;https://www.unimelb.edu.au;http://www.xidian.edu.cn/;https://www.riken.jp", "aff_unique_abbr": "USYD;;USTC;UniMelb;Xidian;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;2;0;2;0;3;2", "aff_country_unique": "Australia;;China;Japan" }, { "id": "Bvnjqe3ZroD", "title": "Principal Trade-off Analysis", "track": "main", "status": "Reject", "tldr": "A decomposition method that represents a game as a sum of planar embeddings ", "abstract": "The focus on equilibrium solutions in games underemphasizes the importance of understanding their overall structure. A different set of tools is needed for learning and representing the general structure of a game. In this paper we illustrate \"Principle Trade-off Analysis\" (PTA), a decomposition method that embeds games into a low dimensional feature space and argue that the embeddings are more revealing than previously demonstrated. Here, we develop an analogy to Principal Component Analysis (PCA). PTA represents an arbitrary two-player zero-sum game as the weighted sum of pairs of orthogonal 2D feature planes. We show that each of the feature planes represent unique strategic trade-offs (cyclic modes) and truncation of the sequence provides insightful model reduction. We demonstrate the validity of PTA on a pair of games (Blotto, Pokemon). In Blotto, PTA identifies game symmetries, and specifies strategic trade-offs associated with distinct win conditions. These symmetries reveal limitations of PTA unaddressed in previous work. For Pokemon, PTA recovers clusters that naturally correspond to Pokemon types, correctly identifies the designed tradeoff between those types, and discovers a rock-paper-scissor (RPS) cycle in the Pokemon generation type - all absent any specific information except game outcomes. ", "keywords": "Learning theory;Representation Learning;algorithmic game theory;Functional form games;matrix decomposition", "primary_area": "", "supplementary_material": "/attachment/9770ebdd85cb050d474d067b1b9f69e8854a93b7.zip", "author": "Alexander Strang;David Robert SeWell;Alexander Kim;Kevin Alcedo;David Rosenbluth", "authorids": "~Alexander_Strang1;~David_Robert_SeWell1;~Alexander_Kim1;~Kevin_Alcedo1;~David_Rosenbluth1", "gender": "M;M;M;;M", "homepage": ";;;http://google.com;", "dblp": ";;;;", "google_scholar": "3h0DltIAAAAJ;https://scholar.google.com/citations?hl=en;;;", "orcid": "0000-0001-6618-631X;;;;", "linkedin": ";;alexanderkim49831;;david-rosenbluth-353123bb/", "or_profile": "~Alexander_Strang1;~David_Robert_SeWell1;~Alexander_Kim1;~Kevin_Alcedo1;~David_Rosenbluth1", "aff": ";;KBRA Analytics, LLC;School of Engineering and Applied Science, University of Pennsylvania;", "aff_domain": ";;kbra.com;seas.upenn.edu;", "position": ";;Researcher;MS student;", "bibtex": "@misc{\nstrang2023principal,\ntitle={Principal Trade-off Analysis},\nauthor={Alexander Strang and David Robert SeWell and Alexander Kim and Kevin Alcedo and David Rosenbluth},\nyear={2023},\nurl={https://openreview.net/forum?id=Bvnjqe3ZroD}\n}", "github": "", "project": "", "reviewers": "pi9j;d7g3;1u6d;enae", "site": "https://openreview.net/forum?id=Bvnjqe3ZroD", "pdf_size": 17611258, "recommendation": "3;6;8;8", "confidence": "2;3;3;3", "correctness": "3;4;4;4", "technical_novelty": "3;3;4;4", "empirical_novelty": "3;3;4;3", "wc_summary_paper": "112;60;36;30", "wc_strength_and_weaknesses": "28;140;349;246", "wc_clarity_quality_novelty_and_reproducibility": "5;15;27;22", "wc_summary_review": "453;76;20;55", "wc_review": "598;291;432;353", "wc_reply_reviewers": "0;33;88;0", "wc_reply_authors": "2010;1216;1358;86", "reply_reviewers": "0;1;1;0", "reply_authors": "3;2;2;1", "recommendation_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 59.5, 32.32259271778797 ], "wc_strength_and_weaknesses_avg": [ 190.75, 119.53948092575942 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 17.25, 8.257572282456872 ], "wc_summary_review_avg": [ 151.0, 175.50356121742942 ], "wc_review_avg": [ 418.5, 115.05324854170786 ], "wc_reply_reviewers_avg": [ 30.25, 35.960916284210555 ], "wc_reply_authors_avg": [ 1167.5, 692.4758118519376 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.9169493006161777, "corr_recommendation_correctness": 0.9169493006161777, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2186571808692232459&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1", "aff_unique_norm": "KBRA Analytics;University of Pennsylvania", "aff_unique_dep": ";School of Engineering and Applied Science", "aff_unique_url": "https://www.kbra.com;https://www.upenn.edu", "aff_unique_abbr": "KBRA;UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "BxXXPvrL1Pg", "title": "Movement-to-Action Transformer Networks for Temporal Action Proposal Generation", "track": "main", "status": "Reject", "tldr": "", "abstract": "The task of generating temporal action proposals is aimed at identifying temporal intervals containing human actions in untrimmed videos. For arbitrary actions, this requires learning long-range interactions. We propose an end-to-end Movement-and-Action Transformer Network (MatNet) that uses results of human movement studies to encode actions ranging from localized, atomic, body part movements, to longer-range, semantic ones, involving movements of subsets of body parts. In particular, we make direct use of the results of Laban Movement Analysis (LMA). We use LMA-based measures of movements as computational definitions of actions. We input RGB + Flow (I3D) features and 3D pose, compute LMA based low-to-high-level movement features from it, and learn the action proposals by applying two heads on the boundary Transformer and three heads on the proposal Transformer, and using five losses with different weights. We visualize and explain relations between the movement descriptors and attention map of the action proposals. We report results from extensive experiments on the Thumos14, ActivityNet and PKU-MMD datasets, showing that MatNet achieves SOTA or better performance on the temporal action proposal generation task.", "keywords": "Temporal Action Proposal Generation;Video Action Segmentation", "primary_area": "", "supplementary_material": "/attachment/c480cfb47f2a00cca7b58ea6b586d099f3ea95ea.zip", "author": "Xiaodan Hu;Narendra Ahuja", "authorids": "~Xiaodan_Hu1;~Narendra_Ahuja1", "gender": "F;", "homepage": ";http://vision.ai.illinois.edu/ahuja.html", "dblp": "09/9830;", "google_scholar": "ElA73tgAAAAJ;dY7OSl0AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Xiaodan_Hu1;~Narendra_Ahuja1", "aff": "University of Illinois, Urbana-Champaign;University of Illinois, Urbana Champaign", "aff_domain": "uiuc.edu;illinois.edu", "position": "PhD student;Research Professor", "bibtex": "@misc{\nhu2023movementtoaction,\ntitle={Movement-to-Action Transformer Networks for Temporal Action Proposal Generation},\nauthor={Xiaodan Hu and Narendra Ahuja},\nyear={2023},\nurl={https://openreview.net/forum?id=BxXXPvrL1Pg}\n}", "github": "", "project": "", "reviewers": "2bvy;Exnj;dxzc;R3np", "site": "https://openreview.net/forum?id=BxXXPvrL1Pg", "pdf_size": 11142025, "recommendation": "3;3;6;8", "confidence": "4;5;3;4", "correctness": "2;3;3;4", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "44;57;49;35", "wc_strength_and_weaknesses": "299;332;111;62", "wc_clarity_quality_novelty_and_reproducibility": "20;22;87;16", "wc_summary_review": "30;60;58;239", "wc_review": "393;471;305;352", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "361;645;217;341", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 46.25, 7.980444849756184 ], "wc_strength_and_weaknesses_avg": [ 201.0, 116.38943251000066 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.25, 29.38005275693017 ], "wc_summary_review_avg": [ 96.75, 82.98004278138208 ], "wc_review_avg": [ 380.25, 60.94823623370901 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 391.0, 156.6780137734711 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5000000000000001, "corr_recommendation_correctness": 0.8333333333333334, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pGA8Kq6anGwJ:scholar.google.com/&scioq=Movement-to-Action+Transformer+Networks+for+Temporal+Action+Proposal+Generation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of Illinois;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://illinois.edu", "aff_unique_abbr": "UIUC;UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "ByaNEZdnx2O", "title": "Learned Nearest-Class-Mean for Biased Representations in Long-Tailed Recognition", "track": "main", "status": "Withdraw", "tldr": "Representations in long-tailed recognition exhibit high tail variance; propose Learned NCM to mitigate representation bias.", "abstract": "The problem of long-tailed recognition (LTR) has received attention in recent years due to the fundamental power-law distribution of objects in the real-world. While classifier bias in LTR has been addressed by many works, representation bias has not yet been researched. At the same time, most recent works use softmax classifiers that are unable to cope with representation bias. In this work, we address these shortcomings by firstly making the key observation that intra-class variance in representation space is negatively correlated to class frequency, leading to biased representations; our analysis reveals that high tail variance is due to spurious correlations learned by deep models. Secondly, to counter representation bias, we propose the Learned Nearest-Class-Mean (NCM), which overcomes uncertainty in empirical centroid estimates and jointly learns centroids minimizing average class-distance normalized variance. Further, we adapt the logit adjustment technique in the NCM framework to achieve higher tail class margin. Our Learned NCM with Logit Adjustment achieves 6\\% gain over state-of-the-art in tail accuracy on the benchmark CIFAR100-LT and ImageNet-LT datasets. ", "keywords": "Long-Tailed Recognition;Representation bias;Nearest-Class-Mean", "primary_area": "", "supplementary_material": "/attachment/70562761e69c478f1b8e572582749226edb4e03e.zip", "author": "Saurabh Sharma;Yongqin Xian;Ambuj Singh", "authorids": "~Saurabh_Sharma3;~Yongqin_Xian1;~Ambuj_Singh1", "gender": "M;M;", "homepage": "https://dynamo.cs.ucsb.edu/people/sharma;https://xianyongqin.github.io/;", "dblp": ";177/9313.html;", "google_scholar": "https://scholar.google.de/citations?user=SgbYgdsAAAAJ;https://scholar.google.de/citations?user=JmdnBzcAAAAJ;", "orcid": ";;", "linkedin": "saurabh-sharma-deeptinkerer/;;", "or_profile": "~Saurabh_Sharma3;~Yongqin_Xian1;~Ambuj_Singh1", "aff": "University of California, Santa Barbara;Google;", "aff_domain": "cs.ucsb.edu;google.com;", "position": "PhD student;Researcher;", "bibtex": "@misc{\nsharma2023learned,\ntitle={Learned Nearest-Class-Mean for Biased Representations in Long-Tailed Recognition},\nauthor={Saurabh Sharma and Yongqin Xian and Ambuj Singh},\nyear={2023},\nurl={https://openreview.net/forum?id=ByaNEZdnx2O}\n}", "github": "", "project": "", "reviewers": "F1h7;ZLtQ;FMwA;umXZ", "site": "https://openreview.net/forum?id=ByaNEZdnx2O", "pdf_size": 5097572, "recommendation": "3;3;3;5", "confidence": "4;4;5;4", "correctness": "2;3;3;3", "technical_novelty": "2;1;2;2", "empirical_novelty": "2;1;0;2", "wc_summary_paper": "33;53;49;107", "wc_strength_and_weaknesses": "147;736;151;118", "wc_clarity_quality_novelty_and_reproducibility": "15;6;4;466", "wc_summary_review": "8;14;39;31", "wc_review": "203;809;243;722", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 60.5, 27.870235018743564 ], "wc_strength_and_weaknesses_avg": [ 288.0, 258.96621401256186 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 122.75, 198.21878695017787 ], "wc_summary_review_avg": [ 23.0, 12.509996003196804 ], "wc_review_avg": [ 494.25, 273.3545088342243 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CnF3NFf9KD8J:scholar.google.com/&scioq=Learned+Nearest-Class-Mean+for+Biased+Representations+in+Long-Tailed+Recognition&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of California, Santa Barbara;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.ucsb.edu;https://www.google.com", "aff_unique_abbr": "UCSB;Google", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Santa Barbara;Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "C-7D-u3q62f", "title": "CCMLN: Combinatorial Correction for Multi-Label Classification with Noisy Labels", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Multi-label classification aims to learn classification models from instances associated with multiple labels. It is pivotal to learn and utilize the label dependence among multiple labels in multi-label classification. As a result of today\u2019s big and complex data, noisy labels are inevitable, making it looming to target multi-label classification with noisy labels. Although the importance of label dependence has been shown in multi-label classification with clean labels, it is challenging and unresolved to bring label dependence to the problem of multi-label classification with noisy labels. The issues are, that we do not understand why the label dependence is helpful in the problem, and how to learn and utilize the label dependence only using training data with noisy multiple labels. In this paper, we bring label dependence to tackle the problem of multi-label classification with noisy labels. Specifically, we first provide a high-level understanding of why label dependence helps distinguish the examples with clean/noisy multiple labels. Benefiting from the memorization effect in handling noisy labels, a novel algorithm is then proposed to learn the label dependence by only employing training data with noisy multiple labels, and utilize the learned dependence to help correct noisy multiple labels to clean ones. We prove that the use of label dependence could bring a higher success rate for recovering correct multiple labels. Empirical evaluations justify our claims and demonstrate the superiority of our algorithm. ", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/fd36ec6d6098b74d08076641024f6645624069cb.zip", "author": "Xiaobo Xia;Jiankang Deng;Wei Bao;Yuxuan Du;Bo Han;Shiguang Shan;Tongliang Liu", "authorids": "~Xiaobo_Xia1;~Jiankang_Deng1;~Wei_Bao1;~Yuxuan_Du2;~Bo_Han1;~Shiguang_Shan2;~Tongliang_Liu1", "gender": "M;M;;M;M;M;M", "homepage": "https://xiaoboxia.github.io/;https://jiankangdeng.github.io/;https://www.sydney.edu.au/engineering/about/our-people/academic-staff/wei-bao.html;https://github.com/yuxuan-du/Yuxuan-Du.github.io;http://vipl.ict.ac.cn/people/sgshan/;https://tongliang-liu.github.io/;https://bhanml.github.io/", "dblp": "242/8072;156/7808;;;s/ShiguangShan;150/6667;241/0472-3", "google_scholar": "jRsugY0AAAAJ;Z_UoQFsAAAAJ;;https://scholar.google.com.au/citations?user=50sFkzIAAAAJ;https://scholar.google.com.tw/citations?user=Vkzd7MIAAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;nTNjqHwAAAAJ", "orcid": ";0000-0002-3709-6216;;0000-0002-1193-9756;0000-0002-8348-392X;;", "linkedin": ";jiankang-deng-b45b21b4/?originalSubdomain=uk;;;;;", "or_profile": "~Xiaobo_Xia1;~Jiankang_Deng1;~Wei_Bao1;~Yuxuan_Du2;~Shiguang_Shan2;~Tongliang_Liu1;~bo_han2", "aff": "The University of Sydney;;University of Sydney;JD.com;Institute of Computing Technology, Chinese Academy of Sciences;University of Sydney;RIKEN", "aff_domain": "sydney.edu.au;;sydney.edu.au;jd.com;ict.ac.cn;sydney.edu.au;riken.jp", "position": "PhD student;;Lecturer;Researcher;Full Professor;Lecturer;Adjunct Scientist", "bibtex": "@misc{\nxia2023ccmln,\ntitle={{CCMLN}: Combinatorial Correction for Multi-Label Classification with Noisy Labels},\nauthor={Xiaobo Xia and Jiankang Deng and Wei Bao and Yuxuan Du and Bo Han and Shiguang Shan and Tongliang Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=C-7D-u3q62f}\n}", "github": "", "project": "", "reviewers": "KQwe;KXHj;BFVX;9ufk", "site": "https://openreview.net/forum?id=C-7D-u3q62f", "pdf_size": 914885, "recommendation": "3;5;5;6", "confidence": "4;3;4;2", "correctness": "3;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "52;116;323;113", "wc_strength_and_weaknesses": "214;259;613;224", "wc_clarity_quality_novelty_and_reproducibility": "94;21;100;21", "wc_summary_review": "22;89;31;107", "wc_review": "382;485;1067;465", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 151.0, 102.53535975457442 ], "wc_strength_and_weaknesses_avg": [ 327.5, 165.6781518486973 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.0, 38.05916446797013 ], "wc_summary_review_avg": [ 62.25, 36.45116596214722 ], "wc_review_avg": [ 599.75, 272.51731596359156 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.7608859102526822, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xDkrcMNCcQ8J:scholar.google.com/&scioq=CCMLN:+Combinatorial+Correction+for+Multi-Label+Classification+with+Noisy+Labels&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;0;3", "aff_unique_norm": "University of Sydney;JD.com;Chinese Academy of Sciences;RIKEN", "aff_unique_dep": ";;Institute of Computing Technology;", "aff_unique_url": "https://www.sydney.edu.au;https://www.jd.com;http://www.ict.ac.cn;https://www.riken.jp", "aff_unique_abbr": "USYD;JD;CAS;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;2", "aff_country_unique": "Australia;China;Japan" }, { "title": "DEP-RL: Embodied Exploration for Reinforcement Learning in Overactuated and Musculoskeletal Systems", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11618", "id": "C-xa_D3oTj6", "poster": "/media/PosterPDFs/ICLR%202023/11618.png?t=1682073682.7964256", "openreview": "https://openreview.net/forum?id=C-xa_D3oTj6", "slides": "https://iclr.cc/virtual/2023/poster/11618", "video": "https://iclr.cc/virtual/2023/poster/11618", "author_site": "Pierre Schumacher, Daniel Haeufle, Dieter B\u00fcchler, Syn Schmitt, Georg Martius", "tldr": "A technique from the self-organization literature is used to improve performance of RL agents on overactuated systems with up to 120 muscle actuators.", "abstract": "Muscle-actuated organisms are capable of learning an unparalleled diversity of dexterous movements despite their vast amount of muscles. \nReinforcement learning (RL) on large musculoskeletal models, however, has not been able to show similar performance. \nWe conjecture that ineffective exploration in large overactuated action spaces is a key problem.\nThis is supported by the finding that common exploration noise strategies are inadequate in synthetic examples of overactuated systems. \nWe identify differential extrinsic plasticity (DEP), a method from the domain of self-organization, as being able to induce state-space covering exploration within seconds of interaction. \nBy integrating DEP into RL, we achieve fast learning of reaching and locomotion in musculoskeletal systems, outperforming current approaches in all considered tasks in sample efficiency and robustness.", "keywords": "reinforcement learning;musculoskeletal;correlated exploration", "primary_area": "", "supplementary_material": "/attachment/a3a1b5fc1575aa780e3e4c170fbec643e764a4e9.zip", "author": "Pierre Schumacher;Daniel Haeufle;Dieter B\u00fcchler;Syn Schmitt;Georg Martius", "authorids": "~Pierre_Schumacher1;~Daniel_Haeufle1;~Dieter_B\u00fcchler1;~Syn_Schmitt1;~Georg_Martius1", "gender": "M;;M;M;M", "homepage": "https://al.is.mpg.de/person/pschumacher;;http://embodied.ml/;https://www.imsb.uni-stuttgart.de/research/cbb/;https://uni-tuebingen.de/de/264672", "dblp": ";;181/4076.html;;47/2706", "google_scholar": ";;https://scholar.google.de/citations?user=8HYQ1tgAAAAJ;;https://scholar.google.de/citations?user=b-JF-UIAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Pierre_Schumacher1;~Daniel_Haeufle1;~Dieter_B\u00fcchler1;~Syn_Schmitt1;~Georg_Martius1", "aff": "Max Planck Institute for Intelligent Systems, Max-Planck Institute;;Max Planck Institute for Intelligent Systems, Max-Planck Institute;Universit\u00e4t Stuttgart;Max Planck Institute for Intelligent Systems", "aff_domain": "tuebingen.mpg.de;;tuebingen.mpg.de;uni-stuttgart.de;tuebingen.mpg.de", "position": "PhD Student;;Group Leader;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nschumacher2023deprl,\ntitle={{DEP}-{RL}: Embodied Exploration for Reinforcement Learning in Overactuated and Musculoskeletal Systems},\nauthor={Pierre Schumacher and Daniel Haeufle and Dieter B{\\\"u}chler and Syn Schmitt and Georg Martius},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=C-xa_D3oTj6}\n}", "github": "", "project": "", "reviewers": "sbak;x7yt;PfGN;Bv9J", "pdf_size": 7384721, "recommendation": "8;8;8;10", "confidence": "4;4;2;4", "correctness": "4;4;4;4", "technical_novelty": "4;3;3;4", "empirical_novelty": "4;3;4;4", "wc_summary_paper": "156;76;118;35", "wc_strength_and_weaknesses": "128;88;247;69", "wc_clarity_quality_novelty_and_reproducibility": "45;23;38;22", "wc_summary_review": "563;36;72;80", "wc_review": "892;223;475;206", "wc_reply_reviewers": "14;18;0;0", "wc_reply_authors": "925;309;226;164", "reply_reviewers": "1;1;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 8.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 96.25, 45.29003753586433 ], "wc_strength_and_weaknesses_avg": [ 133.0, 69.17730841829567 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.0, 9.82344135219425 ], "wc_summary_review_avg": [ 187.75, 217.28365677151146 ], "wc_review_avg": [ 449.0, 277.0604627152709 ], "wc_reply_reviewers_avg": [ 8.0, 8.12403840463596 ], "wc_reply_authors_avg": [ 406.0, 304.02878153227533 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7445687133977028728&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=C-xa_D3oTj6", "email": "tuebingen.mpg.de;;tuebingen.mpg.de;uni-stuttgart.de;tuebingen.mpg.de", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;University of Stuttgart", "aff_unique_dep": "Intelligent Systems;", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.uni-stuttgart.de", "aff_unique_abbr": "MPI-IS;Uni Stuttgart", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "C0oEBO4ZpOj", "title": "MGMA: Mesh Graph Masked Autoencoders for Self-supervised Learning on 3D Shape", "track": "main", "status": "Withdraw", "tldr": "We introduce a self-supervised learning model to extract face nodes and global graph embeddings on meshes.", "abstract": "We introduce a self-supervised learning model to extract face nodes and global graph embeddings on meshes. We define a graph masking on a mesh graph composed of faces. We evaluate our model on shape classification and segmentation benchmarks. The results suggest that our model outperforms prior state-of-the-art mesh encoders: In ModelNet40 classification task, it achieves an accuracy of 89.8% and in ShapeNet segmentation task, it achieves a mean Intersection-over-Union (mIoU) of 78.5. Further, we explore and explain the correlation between test and training masking ratios on MGMA. And we find best performances are obtained when mesh graph masked autoencoders are trained and evaluated under different masking ratios. Our work may open up new opportunities to address label scarcity and improve the learning power in geometric deep learning research.", "keywords": "mesh graph;self-supvervised learning;masked autoencoder;attention", "primary_area": "", "supplementary_material": "/attachment/33af69b13e8629e20f4ac6e8c06ce297d07783b8.zip", "author": "Zhangsihao Yang", "authorids": "~Zhangsihao_Yang1", "gender": "M", "homepage": "", "dblp": "", "google_scholar": "VaRp0cMAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Zhangsihao_Yang1", "aff": "Arizona State University", "aff_domain": "asu.edu", "position": "PhD student", "bibtex": "@misc{\nyang2023mgma,\ntitle={{MGMA}: Mesh Graph Masked Autoencoders for Self-supervised Learning on 3D Shape},\nauthor={Zhangsihao Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=C0oEBO4ZpOj}\n}", "github": "", "project": "", "reviewers": "Z7zm;hqAC;tuMB;RF7b", "site": "https://openreview.net/forum?id=C0oEBO4ZpOj", "pdf_size": 32407637, "recommendation": "3;3;3;5", "confidence": "4;4;4;3", "correctness": "4;3;3;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "79;63;106;69", "wc_strength_and_weaknesses": "383;429;50;152", "wc_clarity_quality_novelty_and_reproducibility": "107;27;31;126", "wc_summary_review": "50;16;14;35", "wc_review": "619;535;201;382", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 79.25, 16.467771555374455 ], "wc_strength_and_weaknesses_avg": [ 253.5, 157.54761185114802 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 72.75, 44.285296657016985 ], "wc_summary_review_avg": [ 28.75, 14.7542366796795 ], "wc_review_avg": [ 434.25, 159.23155309171608 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KPHwyVRkOKwJ:scholar.google.com/&scioq=MGMA:+Mesh+Graph+Masked+Autoencoders+for+Self-supervised+Learning+on+3D+Shape&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Arizona State University", "aff_unique_dep": "", "aff_unique_url": "https://www.asu.edu", "aff_unique_abbr": "ASU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Temporal Dependencies in Feature Importance for Time Series Prediction", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11463", "id": "C0q9oBc3n4", "poster": "", "openreview": "https://openreview.net/forum?id=C0q9oBc3n4", "slides": "https://iclr.cc/virtual/2023/poster/11463", "video": "https://iclr.cc/virtual/2023/poster/11463", "author_site": "Kin Kwan Leung, Clayton Rooke, Jonathan Smith, Saba Zuberi, Maksims Volkovs", "tldr": "New explainability method for multivariate time series predictions", "abstract": "Time series data introduces two key challenges for explainability methods: firstly, observations of the same feature over subsequent time steps are not independent, and secondly, the same feature can have varying importance to model predictions over time. In this paper, we propose Windowed Feature Importance in Time (WinIT), a feature removal based explainability approach to address these issues. Unlike existing feature removal explanation methods, WinIT explicitly accounts for the temporal dependence between different observations of the same feature in the construction of its importance score. Furthermore, WinIT captures the varying importance of a feature over time, by summarizing its importance over a window of past time steps. We conduct an extensive empirical study on synthetic and real-world data, compare against a wide range of leading explainability methods, and explore the impact of various evaluation strategies. Our results show that WinIT achieves significant gains over existing methods, with more consistent performance across different evaluation metrics.", "keywords": "time series;recurrent;explainability", "primary_area": "", "supplementary_material": "", "author": "Kin Kwan Leung;Clayton Rooke;Jonathan Smith;Saba Zuberi;Maksims Volkovs", "authorids": "~Kin_Kwan_Leung1;~Clayton_Rooke1;~Jonathan_Smith2;~Saba_Zuberi1;~Maksims_Volkovs3", "gender": "M;;;;M", "homepage": ";;;;https://www.cs.toronto.edu/~mvolkovs", "dblp": "239/5952;;34/2857;;22/1815", "google_scholar": "https://scholar.google.ca/citations?user=WuSpCo8AAAAJ;;URWZ--QAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?user=m9I8jgcAAAAJ", "orcid": ";;0000-0002-4385-3316;;", "linkedin": ";;;;", "or_profile": "~Kin_Kwan_Leung1;~Clayton_Rooke1;~Jonathan_Smith2;~Saba_Zuberi1;~Maksims_Volkovs1", "aff": "Layer 6 AI;;Meta;Layer 6 AI;Layer6 AI", "aff_domain": "layer6.ai;;meta.com;layer6.ai;layer6.ai", "position": "Researcher;;Machine Learning Engineer;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nleung2023temporal,\ntitle={Temporal Dependencies in Feature Importance for Time Series Prediction},\nauthor={Kin Kwan Leung and Clayton Rooke and Jonathan Smith and Saba Zuberi and Maksims Volkovs},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=C0q9oBc3n4}\n}", "github": "", "project": "", "reviewers": "CWz4;4Fvg;vrY7", "pdf_size": 879386, "recommendation": "6;8;8", "confidence": "3;3;3", "correctness": "3;4;4", "technical_novelty": "3;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "45;101;79", "wc_strength_and_weaknesses": "303;88;29", "wc_clarity_quality_novelty_and_reproducibility": "39;41;24", "wc_summary_review": "36;21;93", "wc_review": "423;251;225", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "994;254;96", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 75.0, 23.03620339089466 ], "wc_strength_and_weaknesses_avg": [ 140.0, 117.74831916705506 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.666666666666664, 7.586537784494029 ], "wc_summary_review_avg": [ 50.0, 31.016124838541646 ], "wc_review_avg": [ 299.6666666666667, 87.85341326449543 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 448.0, 391.43156064204464 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13701646948255642137&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=C0q9oBc3n4", "email": "layer6.ai;;meta.com;layer6.ai;layer6.ai", "author_num": 5, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Layer 6 AI;Meta;Layer6 AI", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://layer6.ai;https://meta.com;https://layer6.ai", "aff_unique_abbr": "Layer 6 AI;Meta;Layer6", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Canada;United States" }, { "id": "C1A2HD6EEGO", "title": "ImageNet-E: Benchmarking Neural Network Robustness via Attribute Editing", "track": "main", "status": "Withdraw", "tldr": "A new robustness benchmark that can help to evaluate the robustness against different object attributes", "abstract": "Recent studies have shown that higher accuracy on ImageNet usually leads to better robustness against different corruptions. \nIn this paper, instead of following the traditional research paradigm that investigates new out-of-distribution corruptions or perturbations deep models may encounter, we conduct model debugging in in-distribution data to explore which object attributes a model may be sensitive to. To achieve this goal, we create a toolkit for object editing with controls of backgrounds, sizes, positions, and directions, and create a rigorous benchmark named ImageNet-E(diting) for evaluating the image classifier robustness in terms of object attributes.\nWith our ImageNet-E, we evaluate the performance of current deep learning models, including both convolutional neural networks and vision transformers. We find that most models are quite sensitive to attribute changes. An imperceptible change in the background can lead to an average of 10.15% drop rate on top-1 accuracy. We also evaluate some robust models including both adversarially trained models and other robust trained models and find that some models show worse robustness against attribute changes than vanilla models.\nBased on these findings, we discover ways to enhance attribute robustness with preprocessing, architecture designs, and training strategies. We hope this work can provide some insights to the community and open up a new avenue for research in robust computer vision. The code and dataset will be publicly available.\n", "keywords": "Robustness;benchmark;attribute", "primary_area": "", "supplementary_material": "/attachment/76caa5fa67348f83ef2ac091ce4d6c8c385e8a09.zip", "author": "Xiaodan Li;YueFeng Chen;Yao Zhu;Cen Chen;Shuhui Wang;Rong Zhang;Hui Xue';Zhao Li", "authorids": "~Xiaodan_Li1;~YueFeng_Chen1;~Yao_Zhu2;~Cen_Chen1;~Shuhui_Wang1;~Rong_Zhang2;~Hui_Xue'1;~Zhao_Li10", "gender": "F;M;M;F;M;M;M;M", "homepage": ";;;https://sites.google.com/site/chencenpersonalwebsite/;https://vipl.ict.ac.cn/people/shwang/;;http://www.alibaba.com;https://sites.google.com/view/zhaoli", "dblp": "126/7789;52/8180;;152/6215-1.html;37/2537;13/5366-2;;l/ZhaoLi-7", "google_scholar": "YximuHAAAAAJ;Kf-IpFsAAAAJ;Te8bmo0AAAAJ;https://scholar.google.com.sg/citations?user=3Mn4S9UAAAAJ;h-JxBSYAAAAJ;;;", "orcid": ";;0000-0003-0991-1970;0000-0003-0325-1705;0000-0002-5931-0527;;;0000-0002-5056-0351", "linkedin": ";;;;;;;", "or_profile": "~Xiaodan_Li1;~YueFeng_Chen1;~Yao_Zhu2;~Cen_Chen1;~Shuhui_Wang1;~Rong_Zhang2;~Hui_Xue'1;~Zhao_Li10", "aff": "Alibaba Group;Alibaba Group;Zhejiang University;East China Normal University;Institute of Computing Technology, Chinese Academy of Sciences;;Alibaba Group;Zhejiang University", "aff_domain": "alibaba-inc.com;alibaba-inc.com;zju.edu.cn;dase.ecnu.edu.cn;ict.ac.cn;;alibaba-inc.com;zju.edu.cn", "position": "Researcher;Staff Algorithm Engineer;PhD student;Associate Professor;Full Professor;;Principal Researcher;Adjunct Professor", "bibtex": "@misc{\nli2023imagenete,\ntitle={ImageNet-E: Benchmarking Neural Network Robustness via Attribute Editing},\nauthor={Xiaodan Li and YueFeng Chen and Yao Zhu and Cen Chen and Shuhui Wang and Rong Zhang and Hui Xue' and Zhao Li},\nyear={2023},\nurl={https://openreview.net/forum?id=C1A2HD6EEGO}\n}", "github": "", "project": "", "reviewers": "ACWD;THGV;mvB1;g57E", "site": "https://openreview.net/forum?id=C1A2HD6EEGO", "pdf_size": 12747744, "recommendation": "5;5;5;6", "confidence": "5;4;3;4", "correctness": "3;2;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "192;70;73;176", "wc_strength_and_weaknesses": "784;161;727;272", "wc_clarity_quality_novelty_and_reproducibility": "192;401;154;138", "wc_summary_review": "107;105;65;58", "wc_review": "1275;737;1019;644", "wc_reply_reviewers": "0;157;499;81", "wc_reply_authors": "1574;1581;2525;330", "reply_reviewers": "0;1;4;1", "reply_authors": "5;4;9;3", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 127.75, 56.543677807514435 ], "wc_strength_and_weaknesses_avg": [ 486.0, 273.08698247994175 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 221.25, 105.61575403319337 ], "wc_summary_review_avg": [ 83.75, 22.398381637966615 ], "wc_review_avg": [ 918.75, 247.73208815169664 ], "wc_reply_reviewers_avg": [ 184.25, 190.01233512590701 ], "wc_reply_authors_avg": [ 1502.5, 779.6693209303544 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 5.25, 2.277608394786075 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4216360697643297068&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1;2;3;0;1", "aff_unique_norm": "Alibaba Group;Zhejiang University;East China Normal University;Chinese Academy of Sciences", "aff_unique_dep": ";;;Institute of Computing Technology", "aff_unique_url": "https://www.alibaba.com;https://www.zju.edu.cn;http://www.ecnu.edu.cn;http://www.ict.ac.cn", "aff_unique_abbr": "Alibaba;ZJU;ECNU;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MetaGL: Evaluation-Free Selection of Graph Learning Models via Meta-Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11433", "id": "C1ns08q9jZ", "poster": "/media/PosterPDFs/ICLR%202023/11433.png?t=1681348493.3220282", "openreview": "https://openreview.net/forum?id=C1ns08q9jZ", "slides": "https://iclr.cc/virtual/2023/poster/11433", "video": "https://iclr.cc/virtual/2023/poster/11433", "author_site": "Namyong Park, Ryan Rossi, Nesreen Ahmed, Christos Faloutsos", "tldr": "We present a meta-learning based framework that tackles the new problem of selecting a graph learning model without any evaluation.", "abstract": "Given a graph learning task, such as link prediction, on a new graph, how can we select the best method as well as its hyperparameters (collectively called a model) without having to train or evaluate any model on the new graph? Model selection for graph learning has been largely ad hoc. A typical approach has been to apply popular methods to new datasets, but this is often suboptimal. On the other hand, systematically comparing models on the new graph quickly becomes too costly, or even impractical. In this work, we develop the first meta-learning approach for evaluation-free graph learning model selection, called MetaGL, which utilizes the prior performances of existing methods on various benchmark graph datasets to automatically select an effective model for the new graph, without any model training or evaluations. To quantify similarities across a wide variety of graphs, we introduce specialized meta-graph features that capture the structural characteristics of a graph. Then we design G-M network, which represents the relations among graphs and models, and develop a graph-based meta-learner operating on this G-M network, which estimates the relevance of each model to different graphs. Extensive experiments show that using MetaGL to select a model for the new graph greatly outperforms several existing meta-learning techniques tailed for graph learning model selection (up to 47% better), while being extremely fast at test time (\u223c1 sec).", "keywords": "evaluation-free model selection;automatic graph learning;link prediction;meta-learning", "primary_area": "", "supplementary_material": "", "author": "Namyong Park;Ryan A. Rossi;Nesreen Ahmed;Christos Faloutsos", "authorids": "~Namyong_Park1;~Ryan_A._Rossi2;~Nesreen_Ahmed1;~Christos_Faloutsos1", "gender": ";F;M;M", "homepage": "https://namyongpark.github.io/;http://nesreenahmed.com;https://www.cs.cmu.edu/~christos/;http://ryanrossi.com", "dblp": "116/9404;33/11518;f/CFaloutsos;17/5085", "google_scholar": "YBTXGb8AAAAJ;AFV0nLcAAAAJ;nd8lQQIAAAAJ;_Dc6lbQAAAAJ", "orcid": ";;0000-0003-2996-9790;0000-0001-9758-0635", "linkedin": ";nkahmed/;christos-faloutsos-43a7aa2/;", "or_profile": "~Namyong_Park1;~Nesreen_Ahmed1;~Christos_Faloutsos1;~Ryan_Rossi1", "aff": "Meta AI;Intel AI Research;Carnegie Mellon University;Adobe Research", "aff_domain": "meta.com;intel.com;cmu.edu;adobe.com", "position": "Researcher;Principal Researcher;Full Professor;Senior Research Scientist", "bibtex": "@inproceedings{\npark2023metagl,\ntitle={Meta{GL}: Evaluation-Free Selection of Graph Learning Models via Meta-Learning},\nauthor={Namyong Park and Ryan A. Rossi and Nesreen Ahmed and Christos Faloutsos},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=C1ns08q9jZ}\n}", "github": "", "project": "", "reviewers": "uUz4;P6vP;3hVZ;GdF8", "pdf_size": 1383660, "recommendation": "5;6;6;8", "confidence": "4;3;5;4", "correctness": "3;3;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "58;71;79;54", "wc_strength_and_weaknesses": "345;253;129;173", "wc_clarity_quality_novelty_and_reproducibility": "22;119;7;82", "wc_summary_review": "51;47;43;59", "wc_review": "476;490;258;368", "wc_reply_reviewers": "0;15;0;0", "wc_reply_authors": "1102;664;395;544", "reply_reviewers": "0;1;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 65.5, 10.012492197250394 ], "wc_strength_and_weaknesses_avg": [ 225.0, 82.3164625090267 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.5, 45.2575960475145 ], "wc_summary_review_avg": [ 50.0, 5.916079783099616 ], "wc_review_avg": [ 398.0, 93.60555539069249 ], "wc_reply_reviewers_avg": [ 3.75, 6.49519052838329 ], "wc_reply_authors_avg": [ 676.25, 263.630778741785 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14678121668944079922&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=C1ns08q9jZ", "email": "meta.com;intel.com;cmu.edu;adobe.com", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Meta;Intel;Carnegie Mellon University;Adobe", "aff_unique_dep": "Meta AI;Intel AI Research;;Adobe Research", "aff_unique_url": "https://meta.com;https://www.intel.com/research;https://www.cmu.edu;https://research.adobe.com", "aff_unique_abbr": "Meta;Intel AI;CMU;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Neural Episodic Control with State Abstraction", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12008", "id": "C2fsSj3ZGiU", "poster": "/media/PosterPDFs/ICLR%202023/12008.png?t=1680777672.8945425", "openreview": "https://openreview.net/forum?id=C2fsSj3ZGiU", "slides": "https://iclr.cc/virtual/2023/poster/12008", "video": "https://iclr.cc/virtual/2023/poster/12008", "author_site": "Zhuo Li, Derui Zhu, Yujing Hu, Xiaofei Xie, Lei Ma, YAN ZHENG, Yan Song, Yingfeng Chen, Jianjun Zhao", "tldr": "We propose NECSA, a simple and effective state abstraction-based episodic control containing a more comprehensive episodic memory, a novel state measurement, and a multi-step state analysis.", "abstract": "Existing Deep Reinforcement Learning (DRL) algorithms suffer from sample inefficiency. Generally, episodic control-based approaches are solutions that leverage highly rewarded past experiences to improve sample efficiency of DRL algorithms. However, previous episodic control-based approaches fail to utilize the latent information from the historical behaviors (\\eg, state transitions, topological similarities, \\etc) and lack scalability during DRL training. This work introduces Neural Episodic Control with State Abstraction (NECSA), a simple but effective state abstraction-based episodic control containing a more comprehensive episodic memory, a novel state evaluation, and a multi-step state analysis. We evaluate our approach to the MuJoCo and Atari tasks in OpenAI gym domains. The experimental results indicate that NECSA achieves higher sample efficiency than the state-of-the-art episodic control-based approaches. Our data and code are available at the project website\\footnote{\\url{https://sites.google.com/view/drl-necsa}}. ", "keywords": "Deep reinforcement learning;episodic control;sample efficiency;state abstraction", "primary_area": "", "supplementary_material": "/attachment/8c7ba7297579f446b825beb84765b7950c5a6a4b.zip", "author": "Zhuo Li;Derui Zhu;Yujing Hu;Xiaofei Xie;Lei Ma;YAN ZHENG;Yan Song;Yingfeng Chen;Jianjun Zhao", "authorids": "~Zhuo_Li6;~Derui_Zhu2;~Yujing_Hu2;~Xiaofei_Xie2;~Lei_Ma1;~YAN_ZHENG1;~Yan_Song4;~Yingfeng_Chen2;~Jianjun_Zhao1", "gender": "M;;;M;M;M;;;M", "homepage": "https://lizhuo-1994.github.io/;;;http://xiaofeixie.bitbucket.io/;https://www.malei.org;https://yanzzzzz.github.io;;;http://stap.ait.kyushu-u.ac.jp/~zhao/", "dblp": ";;https://dblp.uni-trier.de/pid/160/1923.html;127/0713;20/6534-3;10/2381-2;;;71/6948", "google_scholar": "https://scholar.google.co.jp/citations?user=54d2brwAAAAJ;;IR5WY-wAAAAJ;FfcZfJgAAAAJ;xsfGc58AAAAJ;https://scholar.google.com.hk/citations?user=tJuhd1kAAAAJ;;;https://scholar.google.com/scholar?hl=en", "orcid": ";;;0000-0002-1288-6502;;;;;", "linkedin": ";;;;lei-ma-345a0484;;;;jianjunzhao/", "or_profile": "~Zhuo_Li6;~Derui_Zhu2;~Yujing_Hu2;~Xiaofei_Xie2;~Lei_Ma1;~YAN_ZHENG1;~Yan_Song4;~Yingfeng_Chen2;~Jianjun_Zhao1", "aff": "Kyushu University;;NetEase, Inc.;Singapore Management University;Kyushu University;Tianjin Unibersity, China;;;Kyushu University", "aff_domain": "kyushu-u.ac.jp;;corp.netease.com;smu.edu.sg;kyushu-u.ac.jp;tju.edu.cn;;;kyushu-u.ac.jp", "position": "PhD student;;Researcher;Assistant Professor;Associate Professor;Associate Professor;;;Full Professor", "bibtex": "@inproceedings{\nli2023neural,\ntitle={Neural Episodic Control with State Abstraction},\nauthor={Zhuo Li and Derui Zhu and Yujing Hu and Xiaofei Xie and Lei Ma and YAN ZHENG and Yan Song and Yingfeng Chen and Jianjun Zhao},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=C2fsSj3ZGiU}\n}", "github": "", "project": "", "reviewers": "4xFQ;QTYi;D6zD", "pdf_size": 5697671, "recommendation": "6;8;8", "confidence": "4;3;4", "correctness": "4;3;4", "technical_novelty": "4;3;3", "empirical_novelty": "3;3;2", "wc_summary_paper": "168;130;52", "wc_strength_and_weaknesses": "143;296;340", "wc_clarity_quality_novelty_and_reproducibility": "88;98;49", "wc_summary_review": "61;35;94", "wc_review": "460;559;535", "wc_reply_reviewers": "0;19;199", "wc_reply_authors": "2120;1113;3280", "reply_reviewers": "0;1;3", "reply_authors": "4;3;8", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 116.66666666666667, 48.28618389928485 ], "wc_strength_and_weaknesses_avg": [ 259.6666666666667, 84.42879972036924 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 78.33333333333333, 21.139746660943903 ], "wc_summary_review_avg": [ 63.333333333333336, 24.143091949642425 ], "wc_review_avg": [ 518.0, 42.16633728461603 ], "wc_reply_reviewers_avg": [ 72.66666666666667, 89.66728624321259 ], "wc_reply_authors_avg": [ 2171.0, 885.408756827414 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 5.0, 2.160246899469287 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": -0.49999999999999983, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=727549204338714330&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=C2fsSj3ZGiU", "email": "kyushu-u.ac.jp;;corp.netease.com;smu.edu.sg;kyushu-u.ac.jp;tju.edu.cn;;;kyushu-u.ac.jp", "author_num": 9, "aff_unique_index": "0;1;2;0;3;0", "aff_unique_norm": "Kyushu University;NetEase, Inc.;Singapore Management University;Tianjin University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.kyushu-u.ac.jp;https://www.163.com;https://www.smu.edu.sg;http://www.tju.edu.cn", "aff_unique_abbr": "Kyushu U;NetEase;SMU;TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;1;0", "aff_country_unique": "Japan;China;Singapore" }, { "title": "Computational Language Acquisition with Theory of Mind", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10772", "id": "C2ulri4duIs", "poster": "/media/PosterPDFs/ICLR%202023/10772.png?t=1682117974.693625", "openreview": "https://openreview.net/forum?id=C2ulri4duIs", "slides": "https://iclr.cc/virtual/2023/poster/10772", "video": "https://iclr.cc/virtual/2023/poster/10772", "author_site": "Andy Liu, Hao Zhu, Emmy Liu, Yonatan Bisk, Graham Neubig", "tldr": "Analyzing the effects of Theory of Mind and environment complexity on language acquisition models.", "abstract": "Unlike current state-of-the-art language models, young children actively acquire language through interactions with their surrounding environment and caretakers. One mechanism that has been argued to be critical to language learning is the ability to infer the mental states of other agents in social environments, coined Theory of Mind (ToM) by Premack & Woodruff (1978). Drawing inspiration from the modern operationalized versions of ToM implemented in Rabinowitz et al. (2018) and Zhu et al. (2021), we build language-learning agents equipped with ToM, and measure its effects on the learning process. We model ToM by giving the speaker agent an internal listener model that is trained alongside the speaker and used to rerank potential utterances. We experiment with varying task difficulty, hypothesizing that models will acquire more complex language to adapt to stronger environmental pressures. We find that training speakers with a highly weighted ToM listener component leads to performance gains in our image referential game setting. We also find some evidence that increasing task difficulty in the training process results in more fluent and precise utterances in evaluation. This suggests the potential utility of further incorporating ToM, as well as other insights from child language acquisition, into computational models of language acquisition.", "keywords": "language acquisition;theory of mind;referential games;natural language processing", "primary_area": "", "supplementary_material": "", "author": "Andy Liu;Hao Zhu;Emmy Liu;Yonatan Bisk;Graham Neubig", "authorids": "~Andy_Liu1;~Hao_Zhu1;~Emmy_Liu1;~Yonatan_Bisk1;~Graham_Neubig1", "gender": "M;M;F;M;M", "homepage": "https://andyjliu.github.io/;http://www.zhuhao.me;https://nightingal3.github.io/;http://www.YonatanBisk.com;http://phontron.com", "dblp": "63/10289;10/3520-6;249/6997;38/9282;03/8155", "google_scholar": "FtdDAMoAAAAJ;-3yFcsMAAAAJ;;bWoGh8UAAAAJ;wlosgkoAAAAJ", "orcid": ";;;0000-0002-2111-9081;", "linkedin": "andyjliu/;;;yonatanbisk/;", "or_profile": "~Andy_Liu1;~Hao_Zhu1;~Emmy_Liu1;~Yonatan_Bisk1;~Graham_Neubig1", "aff": "Harvey Mudd College;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Meta;Carnegie Mellon University", "aff_domain": "hmc.edu;cmu.edu;cs.cmu.edu;meta.com;cmu.edu", "position": "Undergrad student;PhD student;PhD student;Visiting Professor;Associate Professor", "bibtex": "@inproceedings{\nliu2023computational,\ntitle={Computational Language Acquisition with Theory of Mind},\nauthor={Andy Liu and Hao Zhu and Emmy Liu and Yonatan Bisk and Graham Neubig},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=C2ulri4duIs}\n}", "github": "", "project": "", "reviewers": "Z8Jq;NqTJ;eJ14;J8M3", "pdf_size": 1607014, "recommendation": "5;6;6;8", "confidence": "3;4;4;4", "correctness": "3;3;4;3", "technical_novelty": "2;3;3;4", "empirical_novelty": "2;3;3;4", "wc_summary_paper": "160;94;205;338", "wc_strength_and_weaknesses": "185;77;1386;965", "wc_clarity_quality_novelty_and_reproducibility": "35;47;14;106", "wc_summary_review": "186;46;114;40", "wc_review": "566;264;1719;1449", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "489;197;2363;1277", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;5;3", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 199.25, 89.30670467551694 ], "wc_strength_and_weaknesses_avg": [ 653.25, 544.3879016840841 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.5, 34.15040263305837 ], "wc_summary_review_avg": [ 96.5, 59.285327021110376 ], "wc_review_avg": [ 999.5, 601.7916998430603 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1081.5, 838.7280548544921 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.6622661785325219, "corr_recommendation_correctness": -0.13245323570650439, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5239285744195652884&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=C2ulri4duIs", "email": "hmc.edu;cmu.edu;cs.cmu.edu;meta.com;cmu.edu", "author_num": 5, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "Harvey Mudd College;Carnegie Mellon University;Meta", "aff_unique_dep": ";;Meta Platforms, Inc.", "aff_unique_url": "https://www.hmc.edu;https://www.cmu.edu;https://meta.com", "aff_unique_abbr": "HMC;CMU;Meta", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "C3ukgkqJuh0", "title": "Reinforcement learning for instance segmentation with high-level priors", "track": "main", "status": "Reject", "tldr": "Instance segmentation can be learned from high-level rules only for objects following a regular shape prior.", "abstract": "Instance segmentation is a fundamental computer vision problem which remains challenging despite impressive recent advances due to deep learning-based methods. Given sufficient training data, fully supervised methods can yield excellent performance, but annotation of groundtruth data remains a major bottleneck, especially for biomedical applications where it has to be performed by domain experts. The amount of labels required can be drastically reduced by using rules derived from prior knowledge to guide the segmentation. However, these rules are in general not differentiable and thus cannot be used with existing methods. Here, we revoke this requirement by using stateless actor critic reinforcement learning, which enables non-differentiable rewards. We formulate the instance segmentation problem as graph partitioning and the actor critic predicts the edge weights driven by the rewards, which are based on the conformity of segmented instances to high-level priors on object shape, position or size. The experiments on toy and real data demonstrate that a good set of priors is sufficient to reach excellent performance without any direct object-level supervision.", "keywords": "Instance Segmentation;Reinforcement Learning;Biomedical Imaging", "primary_area": "", "supplementary_material": "/attachment/d3fdf99a0f165c914c07f73b23a03692f661f149.zip", "author": "Paul Hilt;Edgar Kaziakhmedov;Maedeh Zarvandi;Sourabh Bhide;Maria Leptin;Constantin Pape;Anna Kreshuk", "authorids": "~Paul_Hilt2;~Edgar_Kaziakhmedov1;~Maedeh_Zarvandi1;~Sourabh_Bhide1;~Maria_Leptin1;~Constantin_Pape1;~Anna_Kreshuk2", "gender": "M;M;F;M;;M;", "homepage": ";;;;https://www.embl.de/research/units/directors_research/leptin/;https://constantinpape.github.io/;", "dblp": ";;;;;213/4097;42/6174", "google_scholar": ";Pe8FMzoAAAAJ;;;;https://scholar.google.de/citations?user=idkzOIUAAAAJ;https://scholar.google.de/citations?user=u4BlQBYAAAAJ", "orcid": ";;;0000-0002-3662-9033;;0000-0001-6562-7187;", "linkedin": "paul-hilt-04a674204/;;maedeh-zarvandi/;;;;", "or_profile": "~Paul_Hilt2;~Edgar_Kaziakhmedov1;~Maedeh_Zarvandi1;~Sourabh_Bhide1;~Maria_Leptin1;~Constantin_Pape1;~Anna_Kreshuk2", "aff": ";State University of New York at Binghamton;European Molecular Biology Laboratory;;;Georg-August Universit\u00e4t G\u00f6ttingen;EMBL", "aff_domain": ";binghamton.edu;embl.de;;;uni-goettingen.de;embl.de", "position": ";PhD student;MS student;;;Assistant Professor;Group Leader", "bibtex": "@misc{\nhilt2023reinforcement,\ntitle={Reinforcement learning for instance segmentation with high-level priors},\nauthor={Paul Hilt and Edgar Kaziakhmedov and Maedeh Zarvandi and Sourabh Bhide and Maria Leptin and Constantin Pape and Anna Kreshuk},\nyear={2023},\nurl={https://openreview.net/forum?id=C3ukgkqJuh0}\n}", "github": "", "project": "", "reviewers": "KFcT;qRMW;qqMa", "site": "https://openreview.net/forum?id=C3ukgkqJuh0", "pdf_size": 4669110, "recommendation": "5;5;5", "confidence": "4;2;4", "correctness": "3;3;2", "technical_novelty": "3;3;3", "empirical_novelty": "3;2;2", "wc_summary_paper": "80;81;81", "wc_strength_and_weaknesses": "126;100;102", "wc_clarity_quality_novelty_and_reproducibility": "64;76;12", "wc_summary_review": "77;11;29", "wc_review": "347;268;224", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;335", "reply_reviewers": "0;0;0", "reply_authors": "0;0;1", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 80.66666666666667, 0.4714045207910317 ], "wc_strength_and_weaknesses_avg": [ 109.33333333333333, 11.8133634311129 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.666666666666664, 27.776888874666213 ], "wc_summary_review_avg": [ 39.0, 27.85677655436824 ], "wc_review_avg": [ 279.6666666666667, 50.88767587103537 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 111.66666666666667, 157.92051446499562 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FEuzQ7ShcvcJ:scholar.google.com/&scioq=Reinforcement+learning+for+instance+segmentation+with+high-level+priors&hl=en&as_sdt=0,33", "gs_version_total": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "State University of New York at Binghamton;European Molecular Biology Laboratory;Georg-August Universit\u00e4t G\u00f6ttingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.binghamton.edu;https://www.embl.org;https://www.uni-goettingen.de", "aff_unique_abbr": "SUNY Binghamton;EMBL;GAU", "aff_campus_unique_index": "0", "aff_campus_unique": "Binghamton;", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "United States;Unknown;Germany" }, { "id": "C49AIKljGaa", "title": "ConBaT: Control Barrier Transformer for Safety-Critical Policy Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large-scale self-supervised models have recently revolutionized our ability to perform a variety of tasks within the vision and language domains. However, using such models for autonomous systems is challenging because of safety requirements: besides executing correct actions, an autonomous agent needs to also avoid high cost and potentially fatal critical mistakes. Traditionally, self-supervised training mostly focuses on imitating previously observed behaviors, and the training demonstrations carry no notion of which behaviors should be explicitly avoided. In this work, we propose Control Barrier Transformer (ConBaT), an approach that learns safe behaviors from demonstrations in a self-supervised fashion. ConBaT is inspired by the concept of control barrier functions in control theory and uses a causal transformer that learns to predict safe robot actions autoregressively using a critic that requires minimal safety data labeling. During deployment, we employ a lightweight online optimization to find actions that can ensure future states lie within the safe set. We apply our approach to different simulated control tasks and show that our method results in safer control policies compared to other classical and learning-based methods.", "keywords": "Learning from demonstration;Control barrier functions;Transformer models", "primary_area": "", "supplementary_material": "/attachment/c10cd7fd7e124d0832360a114b4004678efecd37.zip", "author": "Yue Meng;Sai Vemprala;Rogerio Bonatti;Chuchu Fan;Ashish Kapoor", "authorids": "~Yue_Meng1;~Sai_Vemprala1;~Rogerio_Bonatti1;~Chuchu_Fan2;~Ashish_Kapoor1", "gender": "M;M;;F;", "homepage": "https://mengyuest.github.io;https://www.saihv.com;http://rogeriobonatti.com/;https://chuchu.mit.edu;", "dblp": ";190/8334.html;184/4631;127/1756;93/161", "google_scholar": "HQHZKyQAAAAJ;PnaHFhUAAAAJ;https://scholar.google.com.br/citations?user=WFgFAB8AAAAJ;J-dq_8EAAAAJ;4D1n8scAAAAJ", "orcid": "0000-0003-0204-4819;;;;", "linkedin": "yuemeng95/;;;chuchu-fan/;ashish-kapoor-a2971b6/", "or_profile": "~Yue_Meng1;~Sai_Vemprala1;~Rogerio_Bonatti1;~Chuchu_Fan2;~Ashish_Kapoor1", "aff": "Massachusetts Institute of Technology;Microsoft;Microsoft;Massachusetts Institute of Technology;Microsoft", "aff_domain": "mit.edu;microsoft.com;microsoft.com;mit.edu;microsoft.com", "position": "PhD student;Senior Researcher;Researcher;Assistant Professor;Researcher", "bibtex": "@misc{\nmeng2023conbat,\ntitle={ConBaT: Control Barrier Transformer for Safety-Critical Policy Learning},\nauthor={Yue Meng and Sai Vemprala and Rogerio Bonatti and Chuchu Fan and Ashish Kapoor},\nyear={2023},\nurl={https://openreview.net/forum?id=C49AIKljGaa}\n}", "github": "", "project": "", "reviewers": "tYu8;okow;iZzR;89Hu", "site": "https://openreview.net/forum?id=C49AIKljGaa", "pdf_size": 1186114, "recommendation": "3;5;5;6", "confidence": "3;3;4;4", "correctness": "2;4;3;3", "technical_novelty": "2;3;3;2", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "54;208;89;74", "wc_strength_and_weaknesses": "105;344;491;302", "wc_clarity_quality_novelty_and_reproducibility": "34;94;2;57", "wc_summary_review": "31;45;63;64", "wc_review": "224;691;645;497", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 106.25, 60.04321360486962 ], "wc_strength_and_weaknesses_avg": [ 310.5, 137.84502167289176 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.75, 33.55126674210677 ], "wc_summary_review_avg": [ 50.75, 13.681648292512127 ], "wc_review_avg": [ 514.25, 182.26268817286768 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.6882472016116854, "corr_recommendation_correctness": 0.6488856845230502, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xntfoboH6qEJ:scholar.google.com/&scioq=ConBaT:+Control+Barrier+Transformer+for+Safety-Critical+Policy+Learning&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://web.mit.edu;https://www.microsoft.com", "aff_unique_abbr": "MIT;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "C6CEY8xiA7v", "title": "Automaton Distillation: A Neuro-Symbolic Transfer Learning Approach for Deep RL", "track": "main", "status": "Withdraw", "tldr": "Transfer reinforcement learning using symbolic knowledge extracted from a teacher", "abstract": "Reinforcement learning is a powerful tool for finding optimal policies in sequential decision processes. However, deep learning methods suffer from two weaknesses: collecting the amount of agent experience required for practical RL problems is prohibitively expensive, and the learned policies exhibit poor generalization on tasks outside the training distribution. To mitigate these issues, we introduce automaton distillation, a form of neuro-symbolic transfer learning in which Q-value estimates from a teacher are distilled into a low-dimensional representation in the form of an automaton. We then propose two methods for generating Q-value estimates: static transfer, which reasons over an abstract MDP constructed based on prior knowledge, and dynamic transfer, where symbolic information is extracted from a DQN teacher. The resulting Q-value estimates from either method are used to bootstrap learning in the target environment via a modified DQN loss function. We list several failure modes of existing automaton-based transfer methods and demonstrate that both static and dynamic automaton distillation decrease the time required to find optimal policies for various decision tasks.", "keywords": "reinforcement learning;transfer learning;neuro-symbolic;formal languages;automaton", "primary_area": "", "supplementary_material": "", "author": "Suraj Singireddy;Sumit Kumar Jha;Alvaro Velasquez", "authorids": "~Suraj_Singireddy1;~Sumit_Kumar_Jha2;~Alvaro_Velasquez1", "gender": "M;;M", "homepage": ";http://www.sumitkumarjha.com;", "dblp": ";05/5046-1;151/6275", "google_scholar": ";3kJbs98AAAAJ;1g3pA4cAAAAJ", "orcid": ";0000-0003-0354-2940;0000-0001-6757-105X", "linkedin": "surajsingireddy/;sumit-jha-572a45180/;alvaro-velasquez-b14963246/", "or_profile": "~Suraj_Singireddy1;~Sumit_Kumar_Jha2;~Alvaro_Velasquez1", "aff": "University of Texas at San Antonio;University of Texas, San Antonio;University of Colorado at Boulder", "aff_domain": "utsa.edu;utsa.edu;colorado.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@misc{\nsingireddy2023automaton,\ntitle={Automaton Distillation: A Neuro-Symbolic Transfer Learning Approach for Deep {RL}},\nauthor={Suraj Singireddy and Sumit Kumar Jha and Alvaro Velasquez},\nyear={2023},\nurl={https://openreview.net/forum?id=C6CEY8xiA7v}\n}", "github": "", "project": "", "reviewers": "yVEJ;DLmX;CN5a;De5m", "site": "https://openreview.net/forum?id=C6CEY8xiA7v", "pdf_size": 584847, "recommendation": "1;3;3;3", "confidence": "5;4;3;4", "correctness": "1;2;2;3", "technical_novelty": "1;2;2;3", "empirical_novelty": "1;2;2;0", "wc_summary_paper": "24;100;221;208", "wc_strength_and_weaknesses": "175;557;199;427", "wc_clarity_quality_novelty_and_reproducibility": "13;62;62;24", "wc_summary_review": "14;30;111;74", "wc_review": "226;749;593;733", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 138.25, 80.97646262958145 ], "wc_strength_and_weaknesses_avg": [ 339.5, 159.50156739041782 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.25, 22.094965489902897 ], "wc_summary_review_avg": [ 57.25, 38.022197464112985 ], "wc_review_avg": [ 575.25, 210.57347292572254 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GPHkg9AsuFIJ:scholar.google.com/&scioq=Automaton+Distillation:+A+Neuro-Symbolic+Transfer+Learning+Approach+for+Deep+RL&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Texas at San Antonio;University of Colorado", "aff_unique_dep": ";", "aff_unique_url": "https://www.utsa.edu;https://www.colorado.edu", "aff_unique_abbr": "UTSA;CU", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "San Antonio;Boulder", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "C8by2OoY6Y2", "title": "Zero-Shot Retrieval with Search Agents and Hybrid Environments", "track": "main", "status": "Reject", "tldr": "A learning to search agent combined with a hybrid dense-sparse retrieval environment achieves sota on zero shot retrieval (BEIR).", "abstract": "Learning to search is the task of building artificial agents that learn to autonomously use a search box to find information. So far, it has been shown that current language models can learn symbolic query reformulation policies, in combination with traditional term-based retrieval, but fall short of outperforming neural retrievers. We extend the previous learning to search setup to a hybrid environment, which accepts discrete query refinement operations, after a first-pass retrieval step performed by a dual encoder. Experiments on the BEIR task show that search agents, trained via behavioral cloning, outperform the underlying search system based on a combined dual encoder retriever and cross encoder reranker. Furthermore, we find that simple heuristic Hybrid Retrieval Environments (HRE) can improve baseline performance by several nDCG points. The search agent based on the HRE environment (HaRE) produces state-of-the-art performance on both zero-shot and in-domain evaluations. We carry out an extensive qualitative analysis to shed light on the agents policies.", "keywords": "learning to search;information retrieval;document ranking;relevance feedback;zero shot;language models;behavioral cloning", "primary_area": "", "supplementary_material": "", "author": "Michelle Chen Huebscher;Christian Buck;Massimiliano Ciaramita;Sascha Rothe", "authorids": "~Michelle_Chen_Huebscher1;~Christian_Buck1;~Massimiliano_Ciaramita2;~Sascha_Rothe1", "gender": "F;M;;M", "homepage": "https://arxiv.org/search/cs?searchtype=author&query=Huebscher%2C+M+C;;;", "dblp": ";;31/916;148/9544", "google_scholar": ";DSb_wQ8AAAAJ;;https://scholar.google.de/citations?user=Vu6r1BEAAAAJ", "orcid": ";;;", "linkedin": ";;;sascha-rothe-53b7b066/", "or_profile": "~Michelle_Chen_Huebscher1;~Christian_Buck1;~Massimiliano_Ciaramita2;~Sascha_Rothe1", "aff": ";Google;Google;Google", "aff_domain": ";google.com;google.com;google.com", "position": ";Researcher;Research Scientist;Researcher", "bibtex": "@misc{\nhuebscher2023zeroshot,\ntitle={Zero-Shot Retrieval with Search Agents and Hybrid Environments},\nauthor={Michelle Chen Huebscher and Christian Buck and Massimiliano Ciaramita and Sascha Rothe},\nyear={2023},\nurl={https://openreview.net/forum?id=C8by2OoY6Y2}\n}", "github": "", "project": "", "reviewers": "ettj;PtJd;iyX7", "site": "https://openreview.net/forum?id=C8by2OoY6Y2", "pdf_size": 450993, "recommendation": "3;5;5", "confidence": "4;3;4", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;3;2", "wc_summary_paper": "165;255;107", "wc_strength_and_weaknesses": "206;258;373", "wc_clarity_quality_novelty_and_reproducibility": "79;82;13", "wc_summary_review": "56;103;106", "wc_review": "506;698;599", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 175.66666666666666, 60.889699913495676 ], "wc_strength_and_weaknesses_avg": [ 279.0, 69.77583153690587 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 58.0, 31.843366656181317 ], "wc_summary_review_avg": [ 88.33333333333333, 22.89589968143253 ], "wc_review_avg": [ 601.0, 78.39642849007855 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3456939586199980741&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "C9sU3Tnnki8", "title": "Exploring Transformer Backbones for Heterogeneous Treatment Effect Estimation", "track": "main", "status": "Reject", "tldr": "We propose a general-purpose treatment effect estimator which significantly outperforms competitive baselines on a variety of challenging TEE problems.", "abstract": "Previous works on Treatment Effect Estimation (TEE) are not in widespread use because they are predominantly theoretical, where strong parametric assumptions are made but untractable for practical application. Recent works use Multilayer Perceptron (MLP) for modeling casual relationships, however, MLPs lag far behind recent advances in ML methodology, which limits their applicability and generalizability. To extend beyond the single domain formulation and towards more realistic learning scenarios, we explore model design spaces beyond MLPs, i.e., transformer backbones, which provide flexibility where attention layers govern interactions among treatments and covariates to exploit structural similarities of potential outcomes for confounding control. Through careful model design, Transformers as Treatment Effect Estimators (TransTEE) is proposed. We show empirically that TransTEE can: (1) serve as a general-purpose treatment effect estimator which significantly outperforms competitive baselines on a variety of challenging TEE problems (e.g., discrete, continuous, structured, or dosage-associated treatments.) and is applicable to both when covariates are tabular and when they consist of structural data (e.g., texts, graphs); (2) yield multiple advantages: compatibility with propensity score modeling, parameter efficiency, robustness to continuous treatment value distribution shifts, explainable in covariate adjustment, and real-world utility in auditing pre-trained language models. ", "keywords": "Causal Inference;Continuous Treatment Effect;Heterogeneous Treatment Effect", "primary_area": "", "supplementary_material": "/attachment/785b985d65ea9e4f17b1ea85076eab8a33893574.zip", "author": "YiFan Zhang;Hanlin Zhang;Zachary Chase Lipton;Li Erran Li;Eric Xing", "authorids": "~YiFan_Zhang8;~Hanlin_Zhang1;~Zachary_Chase_Lipton1;~Li_Erran_Li1;~Eric_Xing1", "gender": "M;Unspecified;;M;M", "homepage": "https://hanlin-zhang.com/;http://zacklipton.com;http://www.cs.columbia.edu/~lierranli/;http://www.cs.cmu.edu/~epxing/;https://yfzhang114.github.io/", "dblp": ";;l/ErranLLi.html;36/3855;", "google_scholar": "h5IXxToAAAAJ;MN9Kfg8AAAAJ;GkMfzy4AAAAJ;https://scholar.google.com.tw/citations?user=5pKTRxEAAAAJ;lUnt8X4AAAAJ", "orcid": "0000-0002-9292-1645;;;;0000-0002-6227-0183", "linkedin": "hanlin-zhang-931b46143/;;;;", "or_profile": "~Hanlin_Zhang1;~Zachary_Chase_Lipton1;~Li_Erran_Li1;~Eric_Xing1;~yifan_zhang7", "aff": "Harvard University;Carnegie Mellon University;Columbia University;School of Computer Science, Carnegie Mellon University;Institute of automation, Chinese academy of science", "aff_domain": "harvard.edu;cmu.edu;columbia.edu;cs.cmu.edu;nlpr.ia.ac.cn", "position": "PhD student;Assistant Professor;Adjunct Professor;Full Professor;PhD student", "bibtex": "@misc{\nzhang2023exploring,\ntitle={Exploring Transformer Backbones for Heterogeneous Treatment Effect Estimation},\nauthor={YiFan Zhang and Hanlin Zhang and Zachary Chase Lipton and Li Erran Li and Eric Xing},\nyear={2023},\nurl={https://openreview.net/forum?id=C9sU3Tnnki8}\n}", "github": "", "project": "", "reviewers": "u5ZN;NSTZ;mZnE", "site": "https://openreview.net/forum?id=C9sU3Tnnki8", "pdf_size": 1659403, "recommendation": "5;6;6", "confidence": "5;3;4", "correctness": "3;4;3", "technical_novelty": "2;3;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "33;43;123", "wc_strength_and_weaknesses": "360;75;162", "wc_clarity_quality_novelty_and_reproducibility": "48;24;56", "wc_summary_review": "33;40;59", "wc_review": "474;182;400", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1337;756;397", "reply_reviewers": "0;0;0", "reply_authors": "3;2;2", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 66.33333333333333, 40.27681991198191 ], "wc_strength_and_weaknesses_avg": [ 199.0, 119.2560271013587 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.666666666666664, 13.59738536958076 ], "wc_summary_review_avg": [ 44.0, 10.98483803552272 ], "wc_review_avg": [ 352.0, 123.94622489881112 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 830.0, 387.3043592146449 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8660254037844385, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10956141995533492357&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "Harvard University;Carnegie Mellon University;Columbia University;Chinese Academy of Sciences", "aff_unique_dep": ";;;Institute of Automation", "aff_unique_url": "https://www.harvard.edu;https://www.cmu.edu;https://www.columbia.edu;http://www.ia.cas.cn", "aff_unique_abbr": "Harvard;CMU;Columbia;CAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;China" }, { "id": "C9uEwyfklBE", "title": "Pareto Manifold Learning: Tackling multiple tasks via ensembles of single-task models", "track": "main", "status": "Reject", "tldr": "", "abstract": "In Multi-Task Learning, tasks may compete and limit the performance achieved on each other rather than guiding the optimization trajectory to a common solution, superior to its single-task counterparts. There is often not a single solution that is optimal for all tasks, leading practitioners to balance tradeoffs between tasks' performance, and to resort to optimality in the Pareto sense. Current Multi-Task Learning methodologies either completely neglect this aspect of functional diversity, and produce one solution in the Pareto Front predefined by their optimization schemes, or produce diverse but discrete solutions, each requiring a separate training run. In this paper, we conjecture that there exist Pareto Subspaces, i.e., weight subspaces where multiple optimal functional solutions lie. We propose Pareto Manifold Learning, an ensembling method in weight space that is able to discover such a parameterization and produces a continuous Pareto Front in a single training run, allowing practitioners to modulate the performance on each task during inference on the fly. We validate the proposed method on a diverse set of multi-task learning benchmarks, ranging from image classification to tabular datasets and scene understanding, and show that Pareto Manifold Learning outperforms state-of-the-art algorithms.\n", "keywords": "Multi-Task Learning;multitask learning;mode connectivity;loss landscape;pareto optimal;pareto frontier", "primary_area": "", "supplementary_material": "", "author": "Nikolaos Dimitriadis;Pascal Frossard;Fran\u00e7ois Fleuret", "authorids": "~Nikolaos_Dimitriadis1;~Pascal_Frossard1;~Fran\u00e7ois_Fleuret2", "gender": ";;", "homepage": "https://nik-dim.github.io;;", "dblp": "278/8332;;", "google_scholar": "ZG2WrKwAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Nikolaos_Dimitriadis1;~Pascal_Frossard1;~Fran\u00e7ois_Fleuret2", "aff": "EPFL - EPF Lausanne;;", "aff_domain": "epfl.ch;;", "position": "PhD student;;", "bibtex": "@misc{\ndimitriadis2023pareto,\ntitle={Pareto Manifold Learning: Tackling multiple tasks via ensembles of single-task models},\nauthor={Nikolaos Dimitriadis and Pascal Frossard and Fran{\\c{c}}ois Fleuret},\nyear={2023},\nurl={https://openreview.net/forum?id=C9uEwyfklBE}\n}", "github": "", "project": "", "reviewers": "qexX;SqFR;NfGo", "site": "https://openreview.net/forum?id=C9uEwyfklBE", "pdf_size": 23785593, "recommendation": "5;5;6", "confidence": "3;4;4", "correctness": "3;3;2", "technical_novelty": "3;3;2", "empirical_novelty": "3;3;2", "wc_summary_paper": "60;111;108", "wc_strength_and_weaknesses": "363;204;853", "wc_clarity_quality_novelty_and_reproducibility": "9;31;100", "wc_summary_review": "21;44;59", "wc_review": "453;390;1120", "wc_reply_reviewers": "0;0;1071", "wc_reply_authors": "555;441;2345", "reply_reviewers": "0;0;2", "reply_authors": "1;1;5", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 93.0, 23.366642891095847 ], "wc_strength_and_weaknesses_avg": [ 473.3333333333333, 276.20081261446154 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.666666666666664, 38.76711091748892 ], "wc_summary_review_avg": [ 41.333333333333336, 15.627610892974722 ], "wc_review_avg": [ 654.3333333333334, 330.27900663260783 ], "wc_reply_reviewers_avg": [ 357.0, 504.8742417671949 ], "wc_reply_authors_avg": [ 1113.6666666666667, 871.9271121423446 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": -0.9999999999999997, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2816294752069119248&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0", "aff_unique_norm": "EPFL", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0", "aff_country_unique": "Switzerland" }, { "id": "CAsH4Z_Xzj7", "title": "Architecture Matters in Continual Learning", "track": "main", "status": "Reject", "tldr": "The choice of architecture can significantly impact the continual learning performance.", "abstract": "A large body of research in continual learning is devoted to overcoming the catastrophic forgetting of neural networks by designing new algorithms that are robust to the distribution shifts. However, the majority of these works are strictly focused on the \"algorithmic\" part of continual learning for a \"fixed neural network architecture\", and the implications of using different architectures are not clearly understood. The few existing continual learning methods that expand the model also assume a fixed architecture and develop algorithms that can efficiently use the model throughout the learning experience. In contrast, in this work, we build on existing works that study continual learning from a neural network's architecture perspective and provide new insights into how the architecture choice, for the same learning algorithm, can impact stability-plasticity trade-off resulting in markedly different continual learning performance. We empirically analyze the impact of various architectural components providing best practices and recommendations that can improve the continual learning performance irrespective of the learning algorithm.", "keywords": "Continual Learning;Catastrophic Forgetting;Neural Network Architecture", "primary_area": "", "supplementary_material": "", "author": "Seyed Iman Mirzadeh;Arslan Chaudhry;Dong Yin;Timothy Nguyen;Razvan Pascanu;Dilan Gorur;Mehrdad Farajtabar", "authorids": "~Seyed_Iman_Mirzadeh1;~Arslan_Chaudhry1;~Dong_Yin1;~Timothy_Nguyen1;~Razvan_Pascanu1;~Dilan_Gorur1;~Mehrdad_Farajtabar1", "gender": "M;M;M;M;M;;M", "homepage": "https://imirzadeh.me/;http://www.robots.ox.ac.uk/~arslan/;https://dongyin92.github.io/;http://timothynguyen.wordpress.com;https://razp.info;;https://www.cc.gatech.edu/~mfarajta/", "dblp": "236/5113;https://dblp.uni-trier.de/pers/c/Chaudhry:Arslan.html;85/4137;;65/8368.html;g/DilanGorur;21/9988", "google_scholar": "AjKbt44AAAAJ;https://scholar.google.co.uk/citations?user=FO8vjQMAAAAJ;YtM8P88AAAAJ;r4FbY1IAAAAJ;https://scholar.google.ca/citations?user=eSPY8LwAAAAJ;;shkKxnQAAAAJ", "orcid": ";;;;;;", "linkedin": "iman-mirzadeh-a687278b/;arslanch/;dong-yin-6747137b/;;;dilan-gorur-6298124a;", "or_profile": "~Seyed_Iman_Mirzadeh1;~Arslan_Chaudhry1;~Dong_Yin1;~Timothy_Nguyen1;~Razvan_Pascanu1;~Dilan_Gorur1;~Mehrdad_Farajtabar1", "aff": "Washington State University;Google;Google DeepMind;Google;Google DeepMind;Google;Apple", "aff_domain": "wsu.edu;google.com;google.com;google.com;google.com;google.com;apple.com", "position": "PhD student;Research Scientist;Research scientist;Research Engineer;Research Scientist;Researcher;Researcher", "bibtex": "@misc{\nmirzadeh2023architecture,\ntitle={Architecture Matters in Continual Learning},\nauthor={Seyed Iman Mirzadeh and Arslan Chaudhry and Dong Yin and Timothy Nguyen and Razvan Pascanu and Dilan Gorur and Mehrdad Farajtabar},\nyear={2023},\nurl={https://openreview.net/forum?id=CAsH4Z_Xzj7}\n}", "github": "", "project": "", "reviewers": "b2F4;y8Pr;4mDG", "site": "https://openreview.net/forum?id=CAsH4Z_Xzj7", "pdf_size": 920740, "recommendation": "3;5;8", "confidence": "5;5;4", "correctness": "2;3;4", "technical_novelty": "2;2;2", "empirical_novelty": "3;2;4", "wc_summary_paper": "62;29;54", "wc_strength_and_weaknesses": "576;327;220", "wc_clarity_quality_novelty_and_reproducibility": "85;21;106", "wc_summary_review": "61;55;49", "wc_review": "784;432;429", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 48.333333333333336, 14.055445761538678 ], "wc_strength_and_weaknesses_avg": [ 374.3333333333333, 149.14050049831832 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.66666666666667, 36.15091823023157 ], "wc_summary_review_avg": [ 55.0, 4.898979485566356 ], "wc_review_avg": [ 548.3333333333334, 166.64599871850777 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.9176629354822472, "corr_recommendation_correctness": 0.9933992677987828, "gs_citation": 82, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3228301340635126405&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;1;1;1;1;2", "aff_unique_norm": "Washington State University;Google;Apple", "aff_unique_dep": ";Google;Apple Inc.", "aff_unique_url": "https://wsu.edu;https://www.google.com;https://www.apple.com", "aff_unique_abbr": "WSU;Google;Apple", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Evolving Populations of Diverse RL Agents with MAP-Elites", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10728", "id": "CBfYffLqWqb", "poster": "", "openreview": "https://openreview.net/forum?id=CBfYffLqWqb", "slides": "https://iclr.cc/virtual/2023/poster/10728", "video": "https://iclr.cc/virtual/2023/poster/10728", "author_site": "Thomas PIERROT, Arthur Flajolet", "tldr": "", "abstract": "Quality Diversity (QD) has emerged as a powerful alternative optimization paradigm that aims at generating large and diverse collections of solutions, notably with its flagship algorithm MAP-ELITES (ME) which evolves solutions through mutations and crossovers. While very effective for some unstructured problems, early ME implementations relied exclusively on random search to evolve the population of solutions, rendering them notoriously sample-inefficient for high-dimensional problems, such as when evolving neural networks. Follow-up works considered exploiting gradient information to guide the search in order to address these shortcomings through techniques borrowed from either Black-Box Optimization (BBO) or Reinforcement Learning (RL). While mixing RL techniques with ME unlocked state-of-the-art performance for robotics control problems that require a good amount of exploration, it also plagued these ME variants with limitations common among RL algorithms that ME was free of, such as hyperparameter sensitivity, high stochasticity as well as training instability, including when the population size increases as some components are shared across the population in recent approaches. Furthermore, existing approaches mixing ME with RL tend to be tied to a specific RL algorithm, which effectively prevents their use on problems where the corresponding RL algorithm fails. To address these shortcomings, we introduce a flexible framework that allows the use of any RL algorithm and alleviates the aforementioned limitations by evolving populations of agents (whose definition include hyperparameters and all learnable parameters) instead of just policies. We demonstrate the benefits brought about by our framework through extensive numerical experiments on a number of robotics control problems, some of which with deceptive rewards, taken from the QD-RL literature. We open source an efficient JAX-based implementation of our algorithm in the QDax library. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Thomas PIERROT;Arthur Flajolet", "authorids": "~Thomas_PIERROT1;~Arthur_Flajolet2", "gender": "M;", "homepage": ";", "dblp": "228/7739;", "google_scholar": "https://scholar.google.fr/citations?user=0zBiyNUAAAAJ;", "orcid": "0000-0002-5227-6194;", "linkedin": "thomas-pierrot-120a43128/;", "or_profile": "~Thomas_PIERROT1;~Arthur_Flajolet2", "aff": "Universit\u00e9 Pierre et Marie Curie - Paris 6, Computer Science Lab - Pierre and Marie Curie University, Paris, France;", "aff_domain": "isir.upmc.fr;", "position": "PhD student;", "bibtex": "@inproceedings{\npierrot2023evolving,\ntitle={Evolving Populations of Diverse {RL} Agents with {MAP}-Elites},\nauthor={Thomas PIERROT and Arthur Flajolet},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CBfYffLqWqb}\n}", "github": "", "project": "", "reviewers": "XjfY;bFfK;Y8Hf", "pdf_size": 3468497, "recommendation": "5;5;6", "confidence": "4;3;3", "correctness": "2;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "3;2;2", "wc_summary_paper": "141;94;97", "wc_strength_and_weaknesses": "263;103;333", "wc_clarity_quality_novelty_and_reproducibility": "57;26;126", "wc_summary_review": "39;62;190", "wc_review": "500;285;746", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "619;506;758", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 110.66666666666667, 21.483844059096022 ], "wc_strength_and_weaknesses_avg": [ 233.0, 96.26352718795768 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 69.66666666666667, 41.79579989531111 ], "wc_summary_review_avg": [ 97.0, 66.42790578263526 ], "wc_review_avg": [ 510.3333333333333, 188.34424747136705 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 627.6666666666666, 103.06093127638405 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13291069620883043552&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=CBfYffLqWqb", "email": "isir.upmc.fr;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Universit\u00e9 Pierre et Marie Curie - Paris 6", "aff_unique_dep": "Computer Science Lab", "aff_unique_url": "https://www.upmc.fr", "aff_unique_abbr": "UPMC", "aff_campus_unique_index": "0", "aff_campus_unique": "Paris", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "id": "CCF5eG4UPNS", "title": "Interpreting Class Conditional GANs with Channel Awareness", "track": "main", "status": "Reject", "tldr": "This work discovered that some channels are primarily responsible for a particular class, and some channels are shared by all classes. This finding further facilitates multiple novel applications.", "abstract": "Understanding the mechanism of generative adversarial networks (GANs) helps us better use GANs for downstream applications. Existing efforts mainly target interpreting unconditional models, leaving it less explored how a conditional GAN learns to render images regarding various categories. This work fills in this gap by investigating how a class conditional generator unifies the synthesis of multiple classes. For this purpose, we dive into the widely used class-conditional batch normalization (CCBN), and observe that each feature channel is activated at varying degrees given different categorical embeddings. To describe such a phenomenon, we propose channel awareness, which quantitatively characterizes how a single channel contributes to the final synthesis. Extensive evaluations and analyses on the BigGAN model pre-trained on ImageNet reveal that only a subset of channels is primarily responsible for the generation of a particular category, similar categories (e.g., cat and dog) usually get related to some same channels, and some channels turn out to share information across all classes. For good measure, our algorithm enables several novel applications with conditional GANs. Concretely, we achieve (1) versatile image editing via simply altering a single channel and manage to (2) harmoniously hybridize two different classes. We further verify that the proposed channel awareness shows promising potential in (3) segmenting the synthesized image and (4) evaluating the category-wise synthesis performance.", "keywords": "class-conditional GANs;representations;interpretation", "primary_area": "", "supplementary_material": "", "author": "Yingqing He;Zhiyi Zhang;Jiapeng Zhu;Yujun Shen;Qifeng Chen", "authorids": "~Yingqing_He1;~Zhiyi_Zhang1;~Jiapeng_Zhu1;~Yujun_Shen1;~Qifeng_Chen1", "gender": ";M;M;;M", "homepage": "https://github.com/YingqingHe;;;;http://cqf.io/", "dblp": "161/3838;;169/7704;;117/4819", "google_scholar": ";;-ACBm-gAAAAJ;;lLMX9hcAAAAJ", "orcid": "0000-0003-0134-8220;;;;", "linkedin": ";zhiyi-zhang-3088b0163/;;;", "or_profile": "~Yingqing_He1;~Zhiyi_Zhang1;~Jiapeng_Zhu1;~Yujun_Shen1;~Qifeng_Chen1", "aff": "Hong Kong University of Science and Technology;University of Southern California;Hong Kong University of Science and Technology;;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;usc.edu;hkust.edu;;hkust.edu", "position": "PhD student;MS student;PhD student;;Assistant Professor", "bibtex": "@misc{\nhe2023interpreting,\ntitle={Interpreting Class Conditional {GAN}s with Channel Awareness},\nauthor={Yingqing He and Zhiyi Zhang and Jiapeng Zhu and Yujun Shen and Qifeng Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=CCF5eG4UPNS}\n}", "github": "", "project": "", "reviewers": "vDMD;EESf;dhhh", "site": "https://openreview.net/forum?id=CCF5eG4UPNS", "pdf_size": 16665666, "recommendation": "5;5;5", "confidence": "4;4;4", "correctness": "3;3;4", "technical_novelty": "2;2;2", "empirical_novelty": "3;3;2", "wc_summary_paper": "72;76;104", "wc_strength_and_weaknesses": "102;298;351", "wc_clarity_quality_novelty_and_reproducibility": "199;33;29", "wc_summary_review": "46;63;34", "wc_review": "419;470;518", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "180;587;335", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 84.0, 14.236104336041748 ], "wc_strength_and_weaknesses_avg": [ 250.33333333333334, 107.09601092270222 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 87.0, 79.21279357948858 ], "wc_summary_review_avg": [ 47.666666666666664, 11.897712198383164 ], "wc_review_avg": [ 469.0, 40.422765862815474 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 367.3333333333333, 167.72265466802295 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11693608256373758323&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology;University of Southern California", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;https://www.usc.edu", "aff_unique_abbr": "HKUST;USC", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Hong Kong SAR;Los Angeles", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "MACTA: A Multi-agent Reinforcement Learning Approach for Cache Timing Attacks and Detection", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11111", "id": "CDlHZ78-Xzi", "poster": "/media/PosterPDFs/ICLR%202023/11111.png?t=1682485178.3274672", "openreview": "https://openreview.net/forum?id=CDlHZ78-Xzi", "slides": "https://iclr.cc/virtual/2023/poster/11111", "video": "https://iclr.cc/virtual/2023/poster/11111", "author_site": "Jiaxun Cui, Xiaomeng Yang, Mulong Luo, Geunbae Lee, Peter Stone, Hsien-Hsin Lee, Benjamin Lee, G. Edward Suh, Wenjie Xiong, Yuandong Tian", "tldr": "", "abstract": "Security vulnerabilities in computer systems raise serious concerns as computers process an unprecedented amount of private and sensitive data today. Cache timing attacks (CTA) pose an important practical threat as they can effectively breach many protection mechanisms in today\u2019s systems. However, the current detection techniques for cache timing attacks heavily rely on heuristics and expert knowledge, which can lead to brittleness and the inability to adapt to new attacks. To mitigate the CTA threat, we propose MACTA, a multi-agent reinforcement learning (MARL) approach that leverages population-based training to train both attackers and detectors. Following best practices, we develop a realistic simulated MARL environment, MA-AUTOCAT, which enables training and evaluation of cache-timing attackers and detectors. Our empirical results suggest that MACTA is an effective solution without any manual input from security experts. MACTA detectors can generalize to a heuristic attack not exposed in training with a 97.8% detection rate and reduce the attack bandwidth of adaptive attackers by 20% on average. In the meantime, MACTA attackers are qualitatively more effective than other attacks studied, and the average evasion rate of MACTA attackers against an unseen state-of-the-art detector can reach up to 99%. Furthermore, we found that agents equipped with a Transformer encoder can learn effective policies in situations when agents with multi-layer perceptron encoders do not in this environment, suggesting the potential of Transformer structures in CTA problems.\n", "keywords": "multi-agent reinforcement learning;security;game theory", "primary_area": "", "supplementary_material": "", "author": "Jiaxun Cui;Xiaomeng Yang;Mulong Luo;Geunbae Lee;Peter Stone;Hsien-Hsin S. Lee;Benjamin Lee;G. Edward Suh;Wenjie Xiong;Yuandong Tian", "authorids": "~Jiaxun_Cui1;~Xiaomeng_Yang1;~Mulong_Luo1;~Geunbae_Lee1;~Peter_Stone1;~Hsien-Hsin_S._Lee1;~Benjamin_Lee3;~G._Edward_Suh2;~Wenjie_Xiong1;~Yuandong_Tian1", "gender": "F;M;M;M;M;;;;;M", "homepage": "https://cuijiaxun.github.io;;http://mulongluo.me;;http://www.cs.utexas.edu/~pstone;;https://www.seas.upenn.edu/~leebcc/;;https://computing.ece.vt.edu/~wenjiex/;http://yuandong-tian.com", "dblp": "286/8124;;;;s/PeterStone;;;;177/2257.html;t/YuandongTian", "google_scholar": "https://scholar.google.com/citations?hl=en;t8v3JXsAAAAJ;;TY5ME-oAAAAJ;qnwjcfAAAAAJ;;4Tnj6PcAAAAJ;;07UMduYAAAAJ;0mgEF28AAAAJ", "orcid": "0009-0009-1987-9549;0009-0007-3917-6811;;0000-0002-5484-540X;0000-0002-6795-420X;;;;0000-0002-7626-2651;0000-0003-4202-4847", "linkedin": "cuijiaxun/;xiaomeng-yang-356a976b;;glee08350031;;;;;https://linkedin.com/in/wenjie-xiong-07672b86;yuandongtian", "or_profile": "~Jiaxun_Cui1;~Xiaomeng_Yang1;~Mulong_Luo1;~Geunbae_Lee1;~Peter_Stone1;~Hsien-Hsin_S._Lee1;~Benjamin_Lee3;~G._Edward_Suh2;~Wenjie_Xiong1;~Yuandong_Tian1", "aff": "The University of Texas at Austin;Meta;Cornell University;Virginia Polytechnic Institute and State University;University of Texas, Austin;;University of Pennsylvania;;Virginia Polytechnic Institute and State University;Meta AI (FAIR)", "aff_domain": "utexas.edu;meta.com;cornell.edu;vt.edu;utexas.edu;;upenn.edu;;vt.edu;meta.com", "position": "PhD student;Software Engineer;PhD student;PhD student;Full Professor;;Full Professor;;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\ncui2023macta,\ntitle={{MACTA}: A Multi-agent Reinforcement Learning Approach for Cache Timing Attacks and Detection},\nauthor={Jiaxun Cui and Xiaomeng Yang and Mulong Luo and Geunbae Lee and Peter Stone and Hsien-Hsin S. Lee and Benjamin Lee and G. Edward Suh and Wenjie Xiong and Yuandong Tian},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CDlHZ78-Xzi}\n}", "github": "", "project": "", "reviewers": "aHoC;9uNr;Mrpg", "pdf_size": 788252, "recommendation": "5;6;6", "confidence": "2;2;4", "correctness": "4;3;3", "technical_novelty": "3;3;2", "empirical_novelty": "0;3;2", "wc_summary_paper": "53;40;157", "wc_strength_and_weaknesses": "95;155;34", "wc_clarity_quality_novelty_and_reproducibility": "30;33;129", "wc_summary_review": "31;109;1405", "wc_review": "209;337;1725", "wc_reply_reviewers": "0;0;644", "wc_reply_authors": "402;666;1972", "reply_reviewers": "0;0;2", "reply_authors": "2;1;4", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 83.33333333333333, 52.359865885576475 ], "wc_strength_and_weaknesses_avg": [ 94.66666666666667, 49.398605468395786 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.0, 45.9782557302906 ], "wc_summary_review_avg": [ 515.0, 630.1301452874636 ], "wc_review_avg": [ 757.0, 686.471169581554 ], "wc_reply_reviewers_avg": [ 214.66666666666666, 303.5845113894244 ], "wc_reply_authors_avg": [ 1013.3333333333334, 686.3941206689411 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": -0.9999999999999997, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3658084671503637260&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=CDlHZ78-Xzi", "email": "utexas.edu;meta.com;cornell.edu;vt.edu;utexas.edu;;upenn.edu;;vt.edu;meta.com", "author_num": 10, "aff_unique_index": "0;1;2;3;0;4;3;1", "aff_unique_norm": "University of Texas at Austin;Meta;Cornell University;Virginia Tech;University of Pennsylvania", "aff_unique_dep": ";Meta Platforms, Inc.;;;", "aff_unique_url": "https://www.utexas.edu;https://meta.com;https://www.cornell.edu;https://www.vt.edu;https://www.upenn.edu", "aff_unique_abbr": "UT Austin;Meta;Cornell;VT;UPenn", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "CEhy-i7_KfC", "title": "Pretraining the Vision Transformer using self-supervised methods for vision based Deep Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "The Vision Transformer architecture has shown to be competitive in the computer vision (CV) space where it has dethroned convolution-based networks in several benchmarks. Nevertheless, Convolutional Neural Networks (CNN) remain the preferential architecture for the representation module in Reinforcement Learning. In this work, we study pretraining a Vision Transformer using several state-of-the-art self-supervised methods and assess data-efficiency gains from this training framework. We propose a new self-supervised learning method called TOV-VICReg that extends VICReg to better capture temporal relations between observations by adding a temporal order verification task. Furthermore, we evaluate the resultant encoders with Atari games in a sample-efficiency regime. Our results show that the vision transformer, when pretrained with TOV-VICReg, outperforms the other self-supervised methods but still struggles to overcome a CNN. Nevertheless, we were able to outperform a CNN in two of the ten games where we perform a 100k steps evaluation. Ultimately, we believe that such approaches in Deep Reinforcement Learning (DRL) might be the key to achieving new levels of performance as seen in natural language processing and computer vision.", "keywords": "Deep Reinforcement Learning;Transformers;Self-Supervised Learning;Pre-training", "primary_area": "", "supplementary_material": "/attachment/640dfaa45f9938d2b78247518802e5ffe2751a71.zip", "author": "Manuel Goul\u00e3o;Arlindo L. Oliveira", "authorids": "~Manuel_Goul\u00e3o1;~Arlindo_L._Oliveira1", "gender": "M;M", "homepage": "https://goulao.pt;http://web.tecnico.ulisboa.pt/arlindo.oliveira/", "dblp": ";o/ArlindoLOliveira", "google_scholar": "ZjeL7rAAAAAJ;dqtEnaoAAAAJ", "orcid": "0000-0001-6478-2038;0000-0001-8638-5594", "linkedin": "manuel-goul%C3%A3o/;arlindo-oliveira-4119a1a/", "or_profile": "~Manuel_Goul\u00e3o1;~Arlindo_L._Oliveira1", "aff": ";INESC-ID", "aff_domain": ";inesc-id.pt", "position": ";Researcher", "bibtex": "@misc{\ngoul{\\~a}o2023pretraining,\ntitle={Pretraining the Vision Transformer using self-supervised methods for vision based Deep Reinforcement Learning},\nauthor={Manuel Goul{\\~a}o and Arlindo L. Oliveira},\nyear={2023},\nurl={https://openreview.net/forum?id=CEhy-i7_KfC}\n}", "github": "", "project": "", "reviewers": "X82N;Wxq5;Wuo3;9PJR;rKFV", "site": "https://openreview.net/forum?id=CEhy-i7_KfC", "pdf_size": 393689, "recommendation": "3;3;3;3;3", "confidence": "4;3;3;3;4", "correctness": "3;2;3;4;3", "technical_novelty": "2;3;2;2;2", "empirical_novelty": "2;2;2;4;2", "wc_summary_paper": "91;73;48;54;37", "wc_strength_and_weaknesses": "236;205;169;98;146", "wc_clarity_quality_novelty_and_reproducibility": "170;43;137;41;168", "wc_summary_review": "98;19;47;18;42", "wc_review": "595;340;401;211;393", "wc_reply_reviewers": "0;0;27;0;0", "wc_reply_authors": "564;298;537;560;519", "reply_reviewers": "0;0;1;0;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.2, 0.39999999999999997 ], "empirical_novelty_avg": [ 2.4, 0.8 ], "wc_summary_paper_avg": [ 60.6, 19.168724527208376 ], "wc_strength_and_weaknesses_avg": [ 170.8, 47.62100376934531 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 111.8, 58.18384655555183 ], "wc_summary_review_avg": [ 44.8, 29.075075236360096 ], "wc_review_avg": [ 388.0, 123.83537459062335 ], "wc_reply_reviewers_avg": [ 5.4, 10.8 ], "wc_reply_authors_avg": [ 495.6, 100.13311140676694 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3054584755390744023&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0", "aff_unique_norm": "INESC-ID", "aff_unique_dep": "", "aff_unique_url": "https://www.inesc-id.pt", "aff_unique_abbr": "INESC-ID", "aff_country_unique_index": "0", "aff_country_unique": "Portugal" }, { "title": "Leveraging Future Relationship Reasoning for Vehicle Trajectory Prediction", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11678", "id": "CGBCTp2M6lA", "poster": "/media/PosterPDFs/ICLR%202023/11678.png?t=1682406487.0910153", "openreview": "https://openreview.net/forum?id=CGBCTp2M6lA", "slides": "https://iclr.cc/virtual/2023/poster/11678", "video": "https://iclr.cc/virtual/2023/poster/11678", "author_site": "Daehee Park, Hobin Ryu, Yunseo Yang, Jegyeong Cho, Jiwon Kim, Kuk-Jin Yoon", "tldr": "We defined and modeled Future Relationship to better modeling interaction between vehicles.", "abstract": "Understanding the interaction between multiple agents is crucial for realistic vehicle trajectory prediction. \nExisting methods have attempted to infer the interaction from the observed past trajectories of agents using pooling, attention, or graph-based methods, which rely on a deterministic approach. \nHowever, these methods can fail under complex road structures, as they cannot predict various interactions that may occur in the future. \nIn this paper, we propose a novel approach that uses lane information to predict a stochastic future relationship among agents. \nTo obtain a coarse future motion of agents, our method first predicts the probability of lane-level waypoint occupancy of vehicles. \nWe then utilize the temporal probability of passing adjacent lanes for each agent pair, assuming that agents passing adjacent lanes will highly interact. \nWe also model the interaction using a probabilistic distribution, which allows for multiple possible future interactions. \nThe distribution is learned from the posterior distribution of interaction obtained from ground truth future trajectories. \nWe validate our method on popular trajectory prediction datasets: nuScenes and Argoverse. \nThe results show that the proposed method brings remarkable performance gain in prediction accuracy, and achieves state-of-the-art performance in long-term prediction benchmark dataset.", "keywords": "Trajectory prediction;Autonomous driving;Neural relation inference;Stochasticity modeling;Multimodal prediction", "primary_area": "", "supplementary_material": "/attachment/a4690f126f8038855aa2b7840b24b1d2451f3f16.zip", "author": "Daehee Park;Hobin Ryu;Yunseo Yang;Jegyeong Cho;Jiwon Kim;Kuk-Jin Yoon", "authorids": "~Daehee_Park1;~Hobin_Ryu2;~Yunseo_Yang1;~Jegyeong_Cho1;~Jiwon_Kim8;~Kuk-Jin_Yoon1", "gender": "M;;;;;M", "homepage": "https://daeheepark.github.io;;;https://vi.kaist.ac.kr;;", "dblp": ";;;331/6513;;42/5677", "google_scholar": "https://scholar.google.com/citations?hl=en;iozt8Y4AAAAJ;;;ddMrczIAAAAJ;1NvBj_gAAAAJ", "orcid": "0000-0002-3961-6932;;;0000-0002-2330-4029;;", "linkedin": "daehee-park-533a701a4/;;;;ichbing1/;", "or_profile": "~Daehee_Park1;~Hobin_Ryu2;~Yunseo_Yang1;~Jegyeong_Cho1;~Jiwon_Kim8;~Kuk-Jin_Yoon1", "aff": "Korea Advanced Institute of Science & Technology;;;Korea Advanced Institute of Science & Technology;Naver Labs;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;;;kaist.ac.kr;naverlabs.com;kaist.ac.kr", "position": "PhD student;;;PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\npark2023leveraging,\ntitle={Leveraging Future Relationship Reasoning for Vehicle Trajectory Prediction},\nauthor={Daehee Park and Hobin Ryu and Yunseo Yang and Jegyeong Cho and Jiwon Kim and Kuk-Jin Yoon},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CGBCTp2M6lA}\n}", "github": "", "project": "", "reviewers": "Y1Xx;2v4y;6gEW", "pdf_size": 1564090, "recommendation": "3;6;8", "confidence": "5;4;3", "correctness": "3;3;3", "technical_novelty": "2;3;4", "empirical_novelty": "2;3;3", "wc_summary_paper": "58;81;105", "wc_strength_and_weaknesses": "111;289;359", "wc_clarity_quality_novelty_and_reproducibility": "40;55;196", "wc_summary_review": "6;26;112", "wc_review": "215;451;772", "wc_reply_reviewers": "109;0;0", "wc_reply_authors": "1059;868;1334", "reply_reviewers": "1;0;0", "reply_authors": "3;2;2", "recommendation_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 81.33333333333333, 19.189117286165672 ], "wc_strength_and_weaknesses_avg": [ 253.0, 104.39667938524993 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 97.0, 70.27090436304346 ], "wc_summary_review_avg": [ 48.0, 45.985504962614755 ], "wc_review_avg": [ 479.3333333333333, 228.27517507507372 ], "wc_reply_reviewers_avg": [ 36.333333333333336, 51.383092766222454 ], "wc_reply_authors_avg": [ 1087.0, 191.27118619035818 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.9933992677987828, "corr_recommendation_correctness": 0.0, "gs_citation": 78, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8254510479289584273&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=CGBCTp2M6lA", "email": "kaist.ac.kr;;;kaist.ac.kr;naverlabs.com;kaist.ac.kr", "author_num": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;NAVER LABS", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://labs.naver.com", "aff_unique_abbr": "KAIST;Naver Labs", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Suppressing the Heterogeneity: A Strong Feature Extractor for Few-shot Segmentation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12224", "id": "CGuvK3U09LH", "poster": "/media/PosterPDFs/ICLR%202023/12224.png?t=1682149684.5434885", "openreview": "https://openreview.net/forum?id=CGuvK3U09LH", "slides": "https://iclr.cc/virtual/2023/poster/12224", "video": "https://iclr.cc/virtual/2023/poster/12224", "author_site": "zhengdong Hu, Yifan Sun, Yi Yang", "tldr": "", "abstract": "This paper tackles the Few-shot Semantic Segmentation (FSS) task with focus on learning the feature extractor. Somehow the feature extractor has been overlooked by recent state-of-the-art methods, which directly use a deep model pretrained on ImageNet for feature extraction (without further fine-tuning). Under this background, we think the FSS feature extractor deserves exploration and observe the heterogeneity (i.e., the intra-class diversity in the raw images) as a critical challenge hindering the intra-class feature compactness. The heterogeneity has three levels from coarse to fine: 1) Sample-level: the inevitable distribution gap between the support and query images makes them heterogeneous from each other. 2) Region-level: the background in FSS actually contains multiple regions with different semantics. 3) Patch-level: some neighboring patches belonging to a same class may appear quite different from each other. Motivated by these observations, we propose a feature extractor with Multi-level Heterogeneity Suppressing (MuHS). MuHS leverages the attention mechanism in transformer backbone to effectively suppress all these three-level heterogeneity. Concretely, MuHS reinforces the attention / interaction between different samples (query and support), different regions and neighboring patches by constructing cross-sample attention, cross-region interaction and a novel masked image segmentation (inspired by the recent masked image modeling), respectively. We empirically show that 1) MuHS brings consistent improvement for various FSS heads and 2) using a simple linear classification head, MuHS sets new states of the art on multiple FSS datasets, validating the importance of FSS feature learning.", "keywords": "deep learning;computer vision;few-shot learning;few-shot semantic segmentation", "primary_area": "", "supplementary_material": "", "author": "Zhengdong Hu;Yifan Sun;Yi Yang", "authorids": "~Zhengdong_Hu1;~Yifan_Sun2;~Yi_Yang22", "gender": "M;M;M", "homepage": ";https://yifansun-reid.github.io;https://person.zju.edu.cn/yiyang", "dblp": "323/9595;99/10261-3.html;33/4854-1.html", "google_scholar": "Udl0uiMAAAAJ;uUZEL7UAAAAJ;RMSuNFwAAAAJ", "orcid": ";0000-0003-3532-6521;", "linkedin": ";;", "or_profile": "~Zhengdong_Hu1;~Yifan_Sun2;~Yi_Yang22", "aff": "Baidu;Baidu;Zhejiang University", "aff_domain": "baidu.com;baidu.com;zju.edu.cn", "position": "Researcher;Senior Expert;Full Professor", "bibtex": "@inproceedings{\nhu2023suppressing,\ntitle={Suppressing the Heterogeneity: A Strong Feature Extractor for Few-shot Segmentation},\nauthor={Zhengdong Hu and Yifan Sun and Yi Yang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CGuvK3U09LH}\n}", "github": "", "project": "", "reviewers": "ZBET;FHxi;UVUC;B9Ww", "pdf_size": 2120096, "recommendation": "5;6;6;8", "confidence": "2;4;4;4", "correctness": "3;3;4;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;4", "wc_summary_paper": "56;74;51;76", "wc_strength_and_weaknesses": "103;107;161;167", "wc_clarity_quality_novelty_and_reproducibility": "43;32;28;47", "wc_summary_review": "105;87;11;21", "wc_review": "307;300;251;311", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "422;693;319;554", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 64.25, 10.917302780449024 ], "wc_strength_and_weaknesses_avg": [ 134.5, 29.609964538985857 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.5, 7.762087348130012 ], "wc_summary_review_avg": [ 56.0, 40.65710270051225 ], "wc_review_avg": [ 292.25, 24.138920854089562 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 497.0, 140.5115653602934 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.6622661785325219, "corr_recommendation_correctness": 0.6882472016116854, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7035735770759255791&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=CGuvK3U09LH", "email": "baidu.com;baidu.com;zju.edu.cn", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Baidu;Zhejiang University", "aff_unique_dep": "Baidu, Inc.;", "aff_unique_url": "https://www.baidu.com;https://www.zju.edu.cn", "aff_unique_abbr": "Baidu;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "TempCLR: Temporal Alignment Representation with Contrastive Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12078", "id": "CIFOsnhZvON", "poster": "/media/PosterPDFs/ICLR%202023/12078.png?t=1681419516.7501888", "openreview": "https://openreview.net/forum?id=CIFOsnhZvON", "slides": "https://iclr.cc/virtual/2023/poster/12078", "video": "https://iclr.cc/virtual/2023/poster/12078", "author_site": "Yuncong Yang, Jiawei Ma, Shiyuan Huang, Long Chen, Xudong Lin, Guangxing Han, Shih-Fu Chang", "tldr": "Global sequence matching under temporal order consistency matters in contrastive-based video-paragraph/text learning.", "abstract": "Video representation learning has been successful in video-text pre-training for zero-shot transfer, where each sentence is trained to be close to the paired video clips in a common feature space. For long videos, given a paragraph of description where the sentences describe different segments of the video, by matching all sentence-clip pairs, the paragraph and the full video are aligned implicitly. However, such unit-level similarity measure may ignore the global temporal context over a long time span, which inevitably limits the generalization ability. In this paper, we propose a contrastive learning framework TempCLR to compare the full video and the paragraph explicitly. As the video/paragraph is formulated as a sequence of clips/sentences, under the constraint of their temporal order, we use dynamic time warping to compute the minimum cumulative cost over sentence-clip pairs as the sequence-level distance. To explore the temporal dynamics, we break the consistency of temporal order by shuffling the video clips or sentences according to the temporal granularity. In this way, we obtain the representations for clips/sentences, which perceive the temporal information and thus facilitate the sequence alignment. In addition to pre-training on the video and paragraph, our approach can also generalize on the matching between different video instances. We evaluate our approach on video retrieval, action step localization, and few-shot action recognition, and achieve consistent performance gain over all three tasks. Detailed ablation studies are provided to justify the approach design. ", "keywords": "Representation learning;Global Sequence Alignment;Zero/Few-shot Transfer", "primary_area": "", "supplementary_material": "", "author": "Yuncong Yang;Jiawei Ma;Shiyuan Huang;Long Chen;Xudong Lin;Guangxing Han;Shih-Fu Chang", "authorids": "~Yuncong_Yang1;~Jiawei_Ma1;~Shiyuan_Huang1;~Long_Chen8;~Xudong_Lin1;~Guangxing_Han1;~Shih-Fu_Chang3", "gender": "M;M;F;M;M;M;M", "homepage": "https://yyuncong.github.io;https://blogs.cuit.columbia.edu/jm4743/;https://shiyuanh.github.io/;https://zjuchenlong.github.io/;;https://guangxinghan.github.io/;http://www.ee.columbia.edu/~sfchang/", "dblp": ";201/7741;226/2744/;64/5725-16;23/7723-3;208/4894;c/ShihFuChang", "google_scholar": "https://scholar.google.com/citations?hl=en;kXbWREkAAAAJ;CEtd-cMAAAAJ;https://scholar.google.com.sg/citations?user=-gtmMpIAAAAJ;https://scholar.google.com.hk/citations?hl=en;1dh5WWUAAAAJ;OMVTRscAAAAJ", "orcid": ";0000-0002-8625-5391;;0000-0001-6148-9709;;;", "linkedin": "yuncong-yupsong-yang/;jiawei-ma-ee1128/?locale=en_US;;;;guangxing-han-19b0a999/;", "or_profile": "~Yuncong_Yang1;~Jiawei_Ma1;~Shiyuan_Huang1;~Long_Chen8;~Xudong_Lin1;~Guangxing_Han1;~Shih-Fu_Chang3", "aff": ";Columbia University;Columbia University;Columbia University;Columbia University;Columbia University;Columbia University", "aff_domain": ";columbia.edu;columbia.edu;columbia.edu;columbia.edu;columbia.edu;ee.columbia.edu", "position": ";PhD student;PhD student;Postdoc;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nyang2023tempclr,\ntitle={Temp{CLR}: Temporal Alignment Representation with Contrastive Learning},\nauthor={Yuncong Yang and Jiawei Ma and Shiyuan Huang and Long Chen and Xudong Lin and Guangxing Han and Shih-Fu Chang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CIFOsnhZvON}\n}", "github": "", "project": "", "reviewers": "e5YM;kTb8;pEeR;KVqq", "pdf_size": 11296595, "recommendation": "6;6;6;6", "confidence": "4;4;3;4", "correctness": "3;3;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "67;171;101;67", "wc_strength_and_weaknesses": "118;417;292;204", "wc_clarity_quality_novelty_and_reproducibility": "42;148;44;26", "wc_summary_review": "17;57;46;24", "wc_review": "244;793;483;321", "wc_reply_reviewers": "52;86;86;0", "wc_reply_authors": "1311;1391;1452;900", "reply_reviewers": "1;1;1;0", "reply_authors": "3;3;4;3", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 101.5, 42.45880356298326 ], "wc_strength_and_weaknesses_avg": [ 257.75, 110.62634179977208 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.0, 48.425200051213004 ], "wc_summary_review_avg": [ 36.0, 16.170961628796228 ], "wc_review_avg": [ 460.25, 210.59128068369782 ], "wc_reply_reviewers_avg": [ 56.0, 35.185224171518364 ], "wc_reply_authors_avg": [ 1263.5, 215.74116436137078 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6045548955848313774&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=CIFOsnhZvON", "email": ";columbia.edu;columbia.edu;columbia.edu;columbia.edu;columbia.edu;ee.columbia.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "AIM: Adapting Image Models for Efficient Video Action Recognition", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11785", "id": "CIoSZ_HKHS7", "poster": "/media/PosterPDFs/ICLR%202023/11785.png?t=1682037896.645195", "openreview": "https://openreview.net/forum?id=CIoSZ_HKHS7", "slides": "https://iclr.cc/virtual/2023/poster/11785", "video": "https://iclr.cc/virtual/2023/poster/11785", "author_site": "Taojiannan Yang, Yi Zhu, Yusheng Xie, Aston Zhang, Chen Chen, Mu Li", "tldr": "We propose a new method to adapt frozen image pre-trained model for efficient video action recognition", "abstract": "Recent vision transformer based video models mostly follow the ``image pre-training then finetuning\" paradigm and have achieved great success on multiple video benchmarks. However, fully finetuning such a video model could be computationally expensive and unnecessary, given the pre-trained image transformer models have demonstrated exceptional transferability. In this work, we propose a novel method to Adapt pre-trained Image Models (AIM) for efficient video understanding. By freezing the pre-trained image model and adding a few lightweight Adapters, we introduce spatial adaptation, temporal adaptation and joint adaptation to gradually equip an image model with spatiotemporal reasoning capability. We show that our proposed AIM can achieve competitive or even better performance than prior arts with substantially fewer tunable parameters on four video action recognition benchmarks. Thanks to its simplicity, our method is also generally applicable to different image pre-trained models, which has the potential to leverage more powerful image foundation models in the future. The project webpage is https://adapt-image-models.github.io/.", "keywords": "Video action recognition;efficient finetuning", "primary_area": "", "supplementary_material": "", "author": "Taojiannan Yang;Yi Zhu;Yusheng Xie;Aston Zhang;Chen Chen;Mu Li", "authorids": "~Taojiannan_Yang1;~Yi_Zhu1;~Yusheng_Xie1;~Aston_Zhang2;~Chen_Chen18;~Mu_Li4", "gender": "M;M;;;M;", "homepage": ";https://bryanyzhu.github.io/;;;https://www.crcv.ucf.edu/chenchen/;https://github.com/mli", "dblp": "249/8103;;56/10813;;65/4423-1;", "google_scholar": "Z_--q5UAAAAJ;IXw4UiwAAAAJ;Hs923REAAAAJ;;TuEwcZ0AAAAJ;", "orcid": ";0000-0002-6482-6712;;;0000-0003-3957-7061;", "linkedin": ";yi-zhu-546a437a/;;;dennychen/;", "or_profile": "~Taojiannan_Yang1;~Yi_Zhu1;~Yusheng_Xie1;~Aston_Zhang2;~Chen_Chen18;~Mu_Li4", "aff": "University of Central Florida;Amazon;Amazon;;University of Central Florida;Amazon", "aff_domain": "ucf.edu;amazon.com;amazon.com;;ucf.edu;amazon.com", "position": "PhD student;Applied Scientist;Principal Applied Scientist;;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nyang2023aim,\ntitle={{AIM}: Adapting Image Models for Efficient Video Action Recognition},\nauthor={Taojiannan Yang and Yi Zhu and Yusheng Xie and Aston Zhang and Chen Chen and Mu Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CIoSZ_HKHS7}\n}", "github": "", "project": "", "reviewers": "auyL;UEFx;jsrf", "pdf_size": 1077499, "recommendation": "6;6;8", "confidence": "4;4;4", "correctness": "3;4;3", "technical_novelty": "3;2;3", "empirical_novelty": "3;2;4", "wc_summary_paper": "62;106;85", "wc_strength_and_weaknesses": "233;323;194", "wc_clarity_quality_novelty_and_reproducibility": "41;193;57", "wc_summary_review": "37;36;24", "wc_review": "373;658;360", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 84.33333333333333, 17.96910929592474 ], "wc_strength_and_weaknesses_avg": [ 250.0, 54.01851534427802 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 97.0, 68.19579654690358 ], "wc_summary_review_avg": [ 32.333333333333336, 5.90668171555645 ], "wc_review_avg": [ 463.6666666666667, 137.5168676522589 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.49999999999999983, "gs_citation": 229, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=340463095507824788&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=CIoSZ_HKHS7", "email": "ucf.edu;amazon.com;amazon.com;;ucf.edu;amazon.com", "author_num": 6, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "University of Central Florida;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.ucf.edu;https://www.amazon.com", "aff_unique_abbr": "UCF;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Non-Asymptotic Analysis of Oversmoothing in Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11534", "id": "CJd-BtnwtXq", "poster": "/media/PosterPDFs/ICLR%202023/11534.png?t=1682128515.7010143", "openreview": "https://openreview.net/forum?id=CJd-BtnwtXq", "slides": "https://iclr.cc/virtual/2023/poster/11534", "video": "https://iclr.cc/virtual/2023/poster/11534", "author_site": "Xinyi Wu, Zhengdao Chen, William Wang, Ali Jadbabaie", "tldr": "We precisely characterize the mechanism of overmoothing via a non-asymptotic analysis and answer why oversmoothing happens in shallow GNNs.", "abstract": "Oversmoothing is a central challenge of building more powerful Graph Neural Networks (GNNs). While previous works have only demonstrated that oversmoothing is inevitable when the number of graph convolutions tends to infinity, in this paper, we precisely characterize the mechanism behind the phenomenon via a non-asymptotic analysis. Specifically, we distinguish between two different effects when applying graph convolutions\u2014an undesirable mixing effect that homogenizes node representations in different classes, and a desirable denoising effect that homogenizes node representations in the same class. By quantifying these two effects on random graphs sampled from the Contextual Stochastic Block Model (CSBM), we show that oversmoothing happens once the mixing effect starts to dominate the denoising effect, and the number of layers required for this transition is $O(\\log N/\\log (\\log N))$ for sufficiently dense graphs with $N$ nodes. We also extend our analysis to study the effects of Personalized PageRank (PPR), or equivalently, the effects of initial residual connections on oversmoothing. Our results suggest that while PPR mitigates oversmoothing at deeper layers, PPR-based architectures still achieve their best performance at a shallow depth and are outperformed by the graph convolution approach on certain graphs. Finally, we support our theoretical results with numerical experiments, which further suggest that the oversmoothing phenomenon observed in practice can be magnified by the difficulty of optimizing deep GNN models.", "keywords": "graph neural networks;oversmoothing;representational power;theory;deep learning", "primary_area": "", "supplementary_material": "", "author": "Xinyi Wu;Zhengdao Chen;William Wei Wang;Ali Jadbabaie", "authorids": "~Xinyi_Wu3;~Zhengdao_Chen1;~William_Wei_Wang1;~Ali_Jadbabaie1", "gender": "F;;M;M", "homepage": "https://xinyiwu98.github.io;;;http://www.mit.edu/~jadbabai/www", "dblp": "98/7827;;;83/3158", "google_scholar": ";;Gv4kyjQAAAAJ;ZBc_WwYAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Xinyi_Wu3;~Zhengdao_Chen1;~William_Wei_Wang1;~Ali_Jadbabaie1", "aff": "Massachusetts Institute of Technology;;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;;mit.edu;mit.edu", "position": "PhD student;;PhD student;Full Professor", "bibtex": "@inproceedings{\nwu2023a,\ntitle={A Non-Asymptotic Analysis of Oversmoothing in Graph Neural Networks},\nauthor={Xinyi Wu and Zhengdao Chen and William Wei Wang and Ali Jadbabaie},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CJd-BtnwtXq}\n}", "github": "", "project": "", "reviewers": "2Zpy;KQXD;K5CA", "pdf_size": 4405802, "recommendation": "3;6;8", "confidence": "4;3;2", "correctness": "3;4;4", "technical_novelty": "2;3;4", "empirical_novelty": "2;2;4", "wc_summary_paper": "53;126;63", "wc_strength_and_weaknesses": "264;370;184", "wc_clarity_quality_novelty_and_reproducibility": "20;32;27", "wc_summary_review": "66;52;55", "wc_review": "403;580;329", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "809;845;465", "reply_reviewers": "0;0;0", "reply_authors": "2;2;1", "recommendation_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_paper_avg": [ 80.66666666666667, 32.31442746239244 ], "wc_strength_and_weaknesses_avg": [ 272.6666666666667, 76.18107084804961 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.333333333333332, 4.9216076867444665 ], "wc_summary_review_avg": [ 57.666666666666664, 6.018490028422596 ], "wc_review_avg": [ 437.3333333333333, 105.30695872332254 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 706.3333333333334, 171.28014738693125 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.9933992677987828, "corr_recommendation_correctness": 0.9176629354822472, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8065328409858429577&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=CJd-BtnwtXq", "email": "mit.edu;;mit.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "CJl2S0w1mbq", "title": "A UNIFIED VIEW OF FINDING AND TRANSFORMING WINNING LOTTERY TICKETS", "track": "main", "status": "Reject", "tldr": "This paper presents a novel paradigm that combines the increased regularization term and early stopping to find or transform winning tickets.", "abstract": "While over-parameterized deep neural networks obtain prominent results on various machine learning tasks, their superfluous parameters usually make model training and inference notoriously inefficient. Lottery Ticket Hypothesis (LTH) addresses this issue from a novel perspective: it articulates that there always exist sparse and admirable subnetworks in a randomly initialized dense network, which can be realized by an iterative pruning strategy. Dual Lottery Ticket Hypothesis (DLTH) further investigates sparse network training from a complementary view. Concretely, it introduces a gradually increased regularization term to transform a dense network to an ultra-light subnetwork without sacrificing learning capacity. After revisiting the success of LTH and DLTH, we unify these two research lines by coupling the stability of iterative pruning and the excellent performance of increased regularization, resulting in two new algorithms (UniLTH and UniDLTH) for finding and transforming winning tickets, respectively. Unlike either LTH without regularization or DLTH which applies regularization across the training, our methods first train the network without any regularization force until the model reaches a certain point (i.e., the validation loss does not decrease for several epochs), and then employ increased regularization for information extrusion and iteratively perform magnitude pruning till the end. We theoretically prove that the early stopping mechanism acts analogously as regularization and can help the optimization trajectory stop at a particularly better point in space than regularization. This not only prevent the parameters from being excessively skewed to the training distribution (over-fitting), but also better stimulate the network potential to obtain more powerful subnetworks. Extensive experiments are conducted to show the superiority of our methods in terms of accuracy and sparsity. ", "keywords": "Lottery Tickets Hypothesis;Dual Lottery Tickets Hypothesis;Non-linear increased regularization;early stopping", "primary_area": "", "supplementary_material": "/attachment/9d5c2949c052b13a8e7266c22afe3a70582645ec.zip", "author": "Kun Wang;Yuxuan Liang;Pengkun Wang;Pengfei Gu;Zhengyang Zhou;Chao Huang;Yang Wang", "authorids": "~Kun_Wang15;~Yuxuan_Liang1;~Pengkun_Wang1;~Pengfei_Gu1;~Zhengyang_Zhou1;~Chao_Huang7;~Yang_Wang32", "gender": "M;M;M;;M;M;M", "homepage": "http://home.ustc.edu.cn/~wk520529/#home;https://yuxuanliang.com;http://home.ustc.edu.cn/~pengkun/index.html;https://github.com/gfly007;http://home.ustc.edu.cn/~zzy0929/Home/;;http://staff.ustc.edu.cn/~angyan/", "dblp": ";183/0977;;;246/8238;;", "google_scholar": "UnyqjWQAAAAJ;n9cODgcAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;dPElQLUAAAAJ;Zkv9FqwAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0003-0602-169X;0000-0003-2817-7337;0000-0002-2680-4563;;0000-0003-4728-7347;;0000-0002-6079-7053", "linkedin": ";yoshall/;;;;;", "or_profile": "~Kun_Wang15;~Yuxuan_Liang1;~Pengkun_Wang1;~Pengfei_Gu1;~Zhengyang_Zhou1;~Chao_Huang7;~Yang_Wang32", "aff": "University of Science and Technology of China;The Hong Kong University of Science and Technology (Guangzhou);University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Hong Kong;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;hkust-gz.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;hku.hk;ustc.edu.cn", "position": "PhD student;Assistant Professor;PhD student;MS student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@misc{\nwang2023a,\ntitle={A {UNIFIED} {VIEW} {OF} {FINDING} {AND} {TRANSFORMING} {WINNING} {LOTTERY} {TICKETS}},\nauthor={Kun Wang and Yuxuan Liang and Pengkun Wang and Pengfei Gu and Zhengyang Zhou and Chao Huang and Yang Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=CJl2S0w1mbq}\n}", "github": "", "project": "", "reviewers": "FGiK;3BTa;vpf2;4vCa", "site": "https://openreview.net/forum?id=CJl2S0w1mbq", "pdf_size": 1815750, "recommendation": "3;3;6;6", "confidence": "4;4;4;2", "correctness": "3;2;4;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "68;63;63;64", "wc_strength_and_weaknesses": "121;931;278;286", "wc_clarity_quality_novelty_and_reproducibility": "20;46;8;33", "wc_summary_review": "24;46;23;12", "wc_review": "233;1086;372;395", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;693", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;1", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 64.5, 2.0615528128088303 ], "wc_strength_and_weaknesses_avg": [ 404.0, 311.2948762829225 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.75, 14.201672436723781 ], "wc_summary_review_avg": [ 26.25, 12.336429791475327 ], "wc_review_avg": [ 521.5, 331.754804034546 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 173.25, 300.07780241130797 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.25, 0.4330127018922193 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.5773502691896258, "corr_recommendation_correctness": 0.7071067811865476, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:fiaR_zc9yqsJ:scholar.google.com/&scioq=A+UNIFIED+VIEW+OF+FINDING+AND+TRANSFORMING+WINNING+LOTTERY+TICKETS&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;0;2;0", "aff_unique_norm": "University of Science and Technology of China;Hong Kong University of Science and Technology;University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.ust.hk;https://www.hku.hk", "aff_unique_abbr": "USTC;HKUST;HKU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Guangzhou;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "CKATCkQFcdJ", "title": "Gradient Descent Converges Linearly for Logistic Regression on Separable Data", "track": "main", "status": "Reject", "tldr": "We theoretically show that gradient descent with increasing learning rate obtains favorable rates on logistic regression.", "abstract": "We show that running gradient descent on the logistic regression objective guarantees loss $f(x) \\leq 1.1 \\cdot f(x^*) + \\epsilon$, where the error $\\epsilon$ decays exponentially with the number of iterations. This is in contrast to the common intuition that the absence of strong convexity precludes linear convergence of first-order methods, and highlights the importance of variable learning rates for gradient descent. For separable data, our analysis proves that the error between the predictor returned by gradient descent and the hard SVM predictor decays as $\\mathrm{poly}(1/t)$, exponentially faster than the previously known bound of $O(\\log\\log t / \\log t)$. Our key observation is a property of the logistic loss that we call multiplicative smoothness and is (surprisingly) little-explored: As the loss decreases, the objective becomes (locally) smoother and therefore the learning rate can increase. Our results also extend to sparse logistic regression, where they lead to an exponential improvement of the sparsity-error tradeoff.\n", "keywords": "logistic regression;gradient descent;sparse optimization", "primary_area": "", "supplementary_material": "", "author": "Kyriakos Axiotis;Maxim Sviridenko", "authorids": "~Kyriakos_Axiotis1;~Maxim_Sviridenko1", "gender": ";", "homepage": ";", "dblp": "176/5139;s/MaximSviridenko", "google_scholar": "Xhv2tkcAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Kyriakos_Axiotis1;~Maxim_Sviridenko1", "aff": "Google;Yahoo", "aff_domain": "google.com;yahoo.com", "position": "Researcher;Principal Researcher", "bibtex": "@misc{\naxiotis2023gradient,\ntitle={Gradient Descent Converges Linearly for Logistic Regression on Separable Data},\nauthor={Kyriakos Axiotis and Maxim Sviridenko},\nyear={2023},\nurl={https://openreview.net/forum?id=CKATCkQFcdJ}\n}", "github": "", "project": "", "reviewers": "qF3Q;DG4g;9NWd;gbUi", "site": "https://openreview.net/forum?id=CKATCkQFcdJ", "pdf_size": 390847, "recommendation": "5;6;8;8", "confidence": "4;3;4;4", "correctness": "4;3;4;4", "technical_novelty": "2;3;4;3", "empirical_novelty": "0;2;4;3", "wc_summary_paper": "99;91;127;139", "wc_strength_and_weaknesses": "262;220;234;396", "wc_clarity_quality_novelty_and_reproducibility": "12;9;2;57", "wc_summary_review": "32;47;42;25", "wc_review": "405;367;405;617", "wc_reply_reviewers": "916;0;59;0", "wc_reply_authors": "1328;147;99;413", "reply_reviewers": "3;0;1;0", "reply_authors": "4;1;2;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 1.479019945774904 ], "wc_summary_paper_avg": [ 114.0, 19.672315572906 ], "wc_strength_and_weaknesses_avg": [ 278.0, 69.7853852894716 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 20.0, 21.66794868002045 ], "wc_summary_review_avg": [ 36.5, 8.558621384311845 ], "wc_review_avg": [ 448.5, 98.51268953794734 ], "wc_reply_reviewers_avg": [ 243.75, 388.8703993620497 ], "wc_reply_authors_avg": [ 496.75, 494.60103871706536 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3802346219137414703&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1", "aff_unique_norm": "Google;Yahoo", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.yahoo.com", "aff_unique_abbr": "Google;Yahoo", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "CKTmsDxRPn", "title": "On Convergence of Federated Averaging Langevin Dynamics", "track": "main", "status": "Reject", "tldr": "A federated averaging Langevin algorithm (FA-LD) for uncertainty quantification and mean predictions in federated learning.", "abstract": "We propose a federated averaging Langevin algorithm (FA-LD) for uncertainty quantification and mean predictions with distributed clients. In particular, we generalize beyond normal posterior distributions and consider a general class of models. We develop theoretical guarantees for FA-LD for strongly log-concave distributions with non-i.i.d data and study how the injected noise and the stochastic-gradient noise, the heterogeneity of data, and the varying learning rates affect the convergence. Such an analysis sheds light on the optimal choice of local updates to minimize communication cost. Important to our approach is that the communication efficiency does not deteriorate with the injected noise in the Langevin algorithms. In addition, we examine in our FA-LD algorithm both independent and correlated noise used over different clients. We observe there is a trade-off between the pairs among communication, accuracy, and data privacy. As local devices may become inactive in federated networks, we also show convergence results based on different averaging schemes where only partial device updates are available. In such a case, we discover an additional bias that does not decay to zero.", "keywords": "Langevin dynamics;federated learning;posterior inference;MCMC;stochastic gradient Langevin dynamics;differential privacy", "primary_area": "", "supplementary_material": "", "author": "Wei Deng;Qian Zhang;Yian Ma;Zhao Song;Guang Lin", "authorids": "~Wei_Deng1;~Qian_Zhang10;~Yian_Ma1;~Zhao_Song6;~Guang_Lin1", "gender": "M;M;M;M;M", "homepage": "https://waynedw.github.io/;;https://sites.google.com/view/yianma;http://www.math.purdue.edu/~lin491/;https://www.youtube.com/@zhaosong2031", "dblp": "69/508-2;04/2024-67.html;;;76/4051-2", "google_scholar": "IYiyxssAAAAJ;https://scholar.google.com/citations?hl=en;A0TFlacAAAAJ;https://scholar.google.com/citations?hl=en;yDZct7UAAAAJ", "orcid": ";;;0000-0002-0976-1987;", "linkedin": ";;;;", "or_profile": "~Wei_Deng1;~Qian_Zhang10;~Yian_Ma1;~Guang_Lin1;~Zhao_Song3", "aff": "Morgan Stanley;Purdue University;University of California, San Diego;Purdue University;Adobe", "aff_domain": "morganstanley.com;purdue.edu;ucsd.edu;purdue.edu;adobe.com", "position": "Researcher;PhD student;Assistant Professor;Associate Professor;Researcher", "bibtex": "@misc{\ndeng2023on,\ntitle={On Convergence of Federated Averaging Langevin Dynamics},\nauthor={Wei Deng and Qian Zhang and Yian Ma and Zhao Song and Guang Lin},\nyear={2023},\nurl={https://openreview.net/forum?id=CKTmsDxRPn}\n}", "github": "", "project": "", "reviewers": "PJdt;Nk8K;mK8G", "site": "https://openreview.net/forum?id=CKTmsDxRPn", "pdf_size": 787240, "recommendation": "3;5;6", "confidence": "4;3;3", "correctness": "1;2;4", "technical_novelty": "4;3;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "65;148;41", "wc_strength_and_weaknesses": "341;571;397", "wc_clarity_quality_novelty_and_reproducibility": "14;215;115", "wc_summary_review": "10;87;45", "wc_review": "430;1021;598", "wc_reply_reviewers": "110;10;0", "wc_reply_authors": "388;185;323", "reply_reviewers": "4;1;0", "reply_authors": "4;1;2", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 1.247219128924647 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 84.66666666666667, 45.84272631023983 ], "wc_strength_and_weaknesses_avg": [ 436.3333333333333, 97.9296799863158 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 114.66666666666667, 82.05824489687527 ], "wc_summary_review_avg": [ 47.333333333333336, 31.47838764754143 ], "wc_review_avg": [ 683.0, 248.64834606327065 ], "wc_reply_reviewers_avg": [ 40.0, 49.6655480858378 ], "wc_reply_authors_avg": [ 298.6666666666667, 84.64172861078761 ], "reply_reviewers_avg": [ 1.6666666666666667, 1.699673171197595 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.9449111825230683, "corr_recommendation_correctness": 0.9285714285714286, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3803638839151987213&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "Morgan Stanley;Purdue University;University of California, San Diego;Adobe", "aff_unique_dep": ";;;Adobe Inc.", "aff_unique_url": "https://www.morganstanley.com;https://www.purdue.edu;https://www.ucsd.edu;https://www.adobe.com", "aff_unique_abbr": "Morgan Stanley;Purdue;UCSD;Adobe", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Weighted Ensemble Self-Supervised Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11408", "id": "CL-sVR9pvF", "poster": "", "openreview": "https://openreview.net/forum?id=CL-sVR9pvF", "slides": "https://iclr.cc/virtual/2023/poster/11408", "video": "https://iclr.cc/virtual/2023/poster/11408", "author_site": "Yangjun Ruan, Saurabh Singh, Warren Morningstar, Alexander Alemi, Sergey Ioffe, Ian Fischer, Joshua Dillon", "tldr": "We efficiently ensemble SSL methods and train them with new objectives to get SOTA results on ImageNet-1K SSL evaluations.", "abstract": "Ensembling has proven to be a powerful technique for boosting model performance, uncertainty estimation, and robustness in supervised learning. Advances in self-supervised learning (SSL) enable leveraging large unlabeled corpora for state-of-the-art few-shot and supervised learning performance. In this paper, we explore how ensemble methods can improve recent SSL techniques by developing a framework that permits data-dependent weighted cross-entropy losses. We refrain from ensembling the representation backbone; this choice yields an efficient ensemble method that incurs a small training cost and requires no architectural changes or computational overhead to downstream evaluation. The effectiveness of our method is demonstrated with two state-of-the-art SSL methods, DINO (Caron et al., 2021) and MSN (Assran et al., 2022). Our method outperforms both in multiple evaluation metrics on ImageNet-1K, particularly in the few-shot setting. We explore several weighting schemes and find that those which increase the diversity of ensemble heads lead to better downstream evaluation results. Thorough experiments yield improved prior art baselines which our method still surpasses; e.g., our overall improvement with MSN ViT-B/16 is 3.9 p.p. for 1-shot learning.", "keywords": "self-supervised learning;ensemble;representation learning", "primary_area": "", "supplementary_material": "", "author": "Yangjun Ruan;Saurabh Singh;Warren Richard Morningstar;Alexander A Alemi;Sergey Ioffe;Ian Fischer;Joshua V. Dillon", "authorids": "~Yangjun_Ruan1;~Saurabh_Singh1;~Warren_Richard_Morningstar1;~Alexander_A_Alemi1;~Sergey_Ioffe3;~Ian_Fischer1;~Joshua_V._Dillon1", "gender": "M;M;M;M;M;M;M", "homepage": "http://www.cs.toronto.edu/~yjruan/;http://www.saurabhsingh.info;;https://alexalemi.com;;;", "dblp": "237/3892;75/5436-5;260/0779;160/8158;93/2096;17/5600;", "google_scholar": "https://scholar.google.com.hk/citations?user=9AdCSywAAAAJ;L7fTK1MAAAAJ;https://scholar.google.com/citations?view_op=search_authors;68hTs9wAAAAJ;S5zOyIkAAAAJ;tPnf61gAAAAJ;g8vrSV8AAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;sergey-ioffe-1758821/;iantfischer;jvdillon/", "or_profile": "~Yangjun_Ruan1;~Saurabh_Singh1;~Warren_Richard_Morningstar1;~Alexander_A_Alemi1;~Sergey_Ioffe3;~Ian_Fischer1;~Joshua_V._Dillon1", "aff": "University of Toronto;Google;Google;Google;Google;Google;Google", "aff_domain": "toronto.edu;google.com;google.com;google.com;google.com;google.com;google.com", "position": "PhD student;Research Scientist;Software Engineer;Research Scientist;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nruan2023weighted,\ntitle={Weighted Ensemble Self-Supervised Learning},\nauthor={Yangjun Ruan and Saurabh Singh and Warren Richard Morningstar and Alexander A Alemi and Sergey Ioffe and Ian Fischer and Joshua V. Dillon},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CL-sVR9pvF}\n}", "github": "", "project": "", "reviewers": "37Hd;wYk7;pJds;8Pcm", "pdf_size": 669585, "recommendation": "3;6;6;8", "confidence": "3;4;3;3", "correctness": "3;3;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;3;4", "wc_summary_paper": "68;66;67;51", "wc_strength_and_weaknesses": "289;94;103;313", "wc_clarity_quality_novelty_and_reproducibility": "25;2;28;91", "wc_summary_review": "31;44;60;67", "wc_review": "413;206;258;522", "wc_reply_reviewers": "0;0;40;59", "wc_reply_authors": "777;399;414;394", "reply_reviewers": "0;0;1;1", "reply_authors": "6;3;3;3", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 63.0, 6.96419413859206 ], "wc_strength_and_weaknesses_avg": [ 199.75, 101.65474656896254 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.5, 33.03407331831786 ], "wc_summary_review_avg": [ 50.5, 14.0089257261219 ], "wc_review_avg": [ 349.75, 125.25249498512994 ], "wc_reply_reviewers_avg": [ 24.75, 25.645418694183956 ], "wc_reply_authors_avg": [ 496.0, 162.40227830914196 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.75, 1.299038105676658 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.08084520834544431, "corr_recommendation_correctness": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17411145107348589921&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=CL-sVR9pvF", "email": "toronto.edu;google.com;google.com;google.com;google.com;google.com;google.com", "author_num": 7, "aff_unique_index": "0;1;1;1;1;1;1", "aff_unique_norm": "University of Toronto;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.utoronto.ca;https://www.google.com", "aff_unique_abbr": "U of T;Google", "aff_campus_unique_index": "1;1;1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;1;1;1", "aff_country_unique": "Canada;United States" }, { "id": "CLmXXljIf__", "title": "Uncertainty-Aware Self-Supervised Learning with Independent Sub-networks", "track": "main", "status": "Withdraw", "tldr": "We introduce an uncertainty-aware training regime for self-supervised models with an ensemble of independent sub-networks and a novel loss function for encouraging diversity.", "abstract": "Self-supervised learning methods are state-of-the-art across a wide range of tasks in computer vision, natural language processing, and multimodal analysis. Estimating the epistemic -- or model -- uncertainty of self-supervised model predictions is critical for building trustworthy machine learning systems in crucial applications, such as medical diagnosis and autonomous driving. A common approach to estimating model uncertainty is to train a \\emph{model ensemble}. However, deep ensembles induce high computational costs and memory demand. This is particularly challenging in self-supervised deep learning, where even a single network is computationally demanding. Moreover, most existing model uncertainty techniques are built for supervised deep learning. Motivated by this, we propose a novel approach to making self-supervised learning probabilistic. We introduce an uncertainty-aware training regime for self-supervised models with an ensemble of independent sub-networks and a novel loss function for encouraging diversity. Our method builds a sub-model ensemble with high diversity -- and consequently, well-calibrated estimates of model uncertainty -- at low computational overhead over a single model, while performing on par with deep self-supervised ensembles. Extensive experiments across different tasks, such as in-distribution generalization, out-of-distribution detection, dataset corruption, and semi-supervised settings, demonstrate that our approach increases prediction reliability. We show that our method achieves both excellent accuracy and calibration, improving over existing ensemble methods in a wide range of self-supervised architectures for computer vision, natural language processing, and genomics data. ", "keywords": "uncertainty-awareness;calibration;self-supervised pretraining;independent sub-networks;efficient ensemble", "primary_area": "", "supplementary_material": "", "author": "Amirhossein Vahidi;Lisa Wimmer;H\u00fcseyin Anil G\u00fcnd\u00fcz;Bernd Bischl;Mina Rezaei", "authorids": "a.vahidi@campus.lmu.de;~Lisa_Wimmer1;~H\u00fcseyin_Anil_G\u00fcnd\u00fcz1;~Bernd_Bischl1;~Mina_Rezaei1", "gender": ";F;;M;F", "homepage": ";https://www.slds.stat.uni-muenchen.de/people/wimmer/;https://www.slds.stat.uni-muenchen.de/people/guenduez/;https://www.slds.stat.uni-muenchen.de/;https://www.compstat.statistik.uni-muenchen.de/people/minar/", "dblp": ";;321/9907.html;48/5326;205/2767", "google_scholar": ";https://scholar.google.de/citations?user=l0hl-mAAAAAJ;;https://scholar.google.de/citations?user=s34UckkAAAAJ;https://scholar.google.de/citations?hl=en", "orcid": ";0009-0009-7928-6075;;0000-0001-6002-6980;0000-0001-6994-6345", "linkedin": ";;;;mina-rezaei-b88a3a69/", "or_profile": "a.vahidi@campus.lmu.de;~Lisa_Wimmer1;~H\u00fcseyin_Anil_G\u00fcnd\u00fcz1;~Bernd_Bischl1;~Mina_Rezaei1", "aff": ";LMU Munich;LMU Munich;LMU;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen", "aff_domain": ";stat.uni-muenchen.de;lmu.de;uni-muenchen.de;lmu.de", "position": ";PhD student;PhD student;Full Professor;Principal Researcher", "bibtex": "@misc{\nvahidi2023uncertaintyaware,\ntitle={Uncertainty-Aware Self-Supervised Learning with Independent Sub-networks},\nauthor={Amirhossein Vahidi and Lisa Wimmer and H{\\\"u}seyin Anil G{\\\"u}nd{\\\"u}z and Bernd Bischl and Mina Rezaei},\nyear={2023},\nurl={https://openreview.net/forum?id=CLmXXljIf__}\n}", "github": "", "project": "", "reviewers": "aqUe;uAiq;ywPk;x29N", "site": "https://openreview.net/forum?id=CLmXXljIf__", "pdf_size": 909803, "recommendation": "5;5;5;5", "confidence": "3;3;4;4", "correctness": "3;3;3;3", "technical_novelty": "3;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "158;42;81;46", "wc_strength_and_weaknesses": "141;579;127;296", "wc_clarity_quality_novelty_and_reproducibility": "416;29;34;22", "wc_summary_review": "55;41;40;53", "wc_review": "770;691;282;417", "wc_reply_reviewers": "27;0;0;117", "wc_reply_authors": "375;1398;615;1505", "reply_reviewers": "1;0;0;1", "reply_authors": "2;2;1;3", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 81.75, 46.56380031741396 ], "wc_strength_and_weaknesses_avg": [ 285.75, 181.83423082577164 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 125.25, 167.9186931225943 ], "wc_summary_review_avg": [ 47.25, 6.796138609534093 ], "wc_review_avg": [ 540.0, 198.36456336755313 ], "wc_reply_reviewers_avg": [ 36.0, 48.0468521341409 ], "wc_reply_authors_avg": [ 973.25, 487.19009380322996 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wtNqwD8EMF0J:scholar.google.com/&scioq=Uncertainty-Aware+Self-Supervised+Learning+with+Independent+Sub-networks&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Ludwig Maximilian University of Munich;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen", "aff_unique_dep": ";", "aff_unique_url": "https://www.lmu.de;https://www.lmu.de", "aff_unique_abbr": "LMU;LMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Munich;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Neural Implicit Shape Editing using Boundary Sensitivity", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11096", "id": "CMPIBjmhpo", "poster": "/media/PosterPDFs/ICLR%202023/11096.png?t=1683550189.8389788", "openreview": "https://openreview.net/forum?id=CMPIBjmhpo", "slides": "https://iclr.cc/virtual/2023/poster/11096", "video": "https://iclr.cc/virtual/2023/poster/11096", "author_site": "Arturs Berzins, Moritz Ibing, Leif Kobbelt", "tldr": "", "abstract": "Neural fields are receiving increased attention as a geometric representation due to their ability to compactly store detailed and smooth shapes and easily undergo topological changes. Compared to classic geometry representations, however, neural representations do not allow the user to exert intuitive control over the shape. Motivated by this, we leverage boundary sensitivity to express how perturbations in parameters move the shape boundary. This allows us to interpret the effect of each learnable parameter and study achievable deformations. With this, we perform geometric editing: finding a parameter update that best approximates a globally prescribed deformation. Prescribing the deformation only locally allows the rest of the shape to change according to some prior, such as semantics or deformation rigidity. Our method is agnostic to the model and its training and updates the NN in-place. Furthermore, we show how boundary sensitivity helps to optimize and constrain objectives (such as surface area and volume), which are difficult to compute without first converting to another representation, such as a mesh.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Arturs Berzins;Moritz Ibing;Leif Kobbelt", "authorids": "~Arturs_Berzins1;~Moritz_Ibing1;~Leif_Kobbelt1", "gender": ";M;M", "homepage": "https://arturs-berzins.github.io/;;https://www.graphics.rwth-aachen.de/", "dblp": "268/6949;https://dblp.uni-trier.de/pid/185/2738;k/LeifKobbelt", "google_scholar": "cAU3TUoAAAAJ;WEvcegEAAAAJ;https://scholar.google.de/citations?user=eCsqoJgAAAAJ", "orcid": ";;0000-0002-7880-9470", "linkedin": "arturs-berzins/;;", "or_profile": "~Arturs_Berzins1;~Moritz_Ibing1;~Leif_Kobbelt1", "aff": "SINTEF Digital;Rheinisch Westf\u00e4lische Technische Hochschule Aachen;RWTH Aachen University, Rheinisch Westf\u00e4lische Technische Hochschule Aachen", "aff_domain": "sintef.com;rwth-aachen.de;cs.rwth-aachen.de", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nberzins2023neural,\ntitle={Neural Implicit Shape Editing using Boundary Sensitivity},\nauthor={Arturs Berzins and Moritz Ibing and Leif Kobbelt},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CMPIBjmhpo}\n}", "github": "", "project": "", "reviewers": "TumX;n2KN;mGb1;VBEG", "pdf_size": 1823343, "recommendation": "5;5;6;6", "confidence": "4;4;4;4", "correctness": "4;3;4;4", "technical_novelty": "3;3;2;3", "empirical_novelty": "2;0;2;3", "wc_summary_paper": "87;43;69;124", "wc_strength_and_weaknesses": "379;56;384;126", "wc_clarity_quality_novelty_and_reproducibility": "68;369;19;68", "wc_summary_review": "365;23;39;39", "wc_review": "899;491;511;357", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "413;326;446;236", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 80.75, 29.46502163583119 ], "wc_strength_and_weaknesses_avg": [ 236.25, 147.3539531196907 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 131.0, 138.8578409741416 ], "wc_summary_review_avg": [ 116.5, 143.62015875217517 ], "wc_review_avg": [ 564.5, 201.99690591689765 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 355.25, 81.61916135320186 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8271714633924082274&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=CMPIBjmhpo", "email": "sintef.com;rwth-aachen.de;cs.rwth-aachen.de", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "SINTEF;RWTH Aachen University", "aff_unique_dep": "Digital;", "aff_unique_url": "https://www.sintef.no;https://www.rwth-aachen.de", "aff_unique_abbr": "SINTEF;RWTH", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Aachen", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Norway;Germany" }, { "id": "CMsuT6Cmfvs", "title": "Lifting the Curse of Capacity Gap in Distilling Large Language Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Large language models (LLMs) have shown compelling performance on various downstream tasks, but unfortunately require a tremendous amount of inference compute. Knowledge distillation finds a path to compress LLMs to small ones with a teacher-student paradigm. However, when capacity gap between the teacher and the student is large, a curse of capacity gap appears, invoking a deficiency in distilling LLMs. While a few studies have been investigated to fill the gap, the curse is not yet well tackled. To the demand, we aim at lifting the curse of capacity gap via enlarging the capacity of the student without notably increasing the inference compute. Largely motivated by sparse activation regime of mixture of experts (MoE), we propose a mixture of minimal experts (MiniMoE), which imposes extra parameters to the student but introduces almost no additional inference compute. Experimental results on GLUE and CoNLL demonstrate the curse of capacity gap is lifted by the magic of MiniMoE to a large extent.MiniMoE also achieves state-of-the-art performance at small FLOPs compared with a range of competitive baselines. With compression as much as ~50x, MiniMoE preserves 95% GLUE score of the teacher.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Chen Zhang;Yang Yang;Jiahao Liu;Jingang Wang;Wei Wu;Benyou Wang;Dawei Song", "authorids": "~Chen_Zhang7;~Yang_Yang30;~Jiahao_Liu6;~Jingang_Wang1;~Wei_Wu1;~Benyou_Wang2;~Dawei_Song1", "gender": "M;;M;M;M;M;M", "homepage": "https://genezc7.github.io;https://github.com/YangYang317;https://hit-computer.github.io/;https://sites.google.com/site/bitwjg/;https://sites.google.com/view/wei-wu-homepage;https://wabyking.github.io/old.html;", "dblp": ";;;59/7807;95/6985-14;169/1793;47/6784-1.html", "google_scholar": "IMwAXWcAAAAJ;;https://scholar.google.com.hk/citations?user=IvImF70AAAAJ;janU39IAAAAJ;https://scholar.google.co.jp/citations?hl=en;Jk4vJU8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;0000-0001-6079-7697;0000-0002-1501-9914;", "linkedin": ";;;;;;", "or_profile": "~Chen_Zhang7;~Yang_Yang30;~Jiahao_Liu6;~Jingang_Wang1;~Wei_Wu1;~Benyou_Wang2;~Dawei_Song1", "aff": "Beijing Institute of Technology;;Meituan;Meituan;Ant Research;The Chinese University of Hong Kong, Shenzhen;Open University", "aff_domain": "bit.edu.cn;;meituan.com;meituan.com;antgroup.com;cuhk.edu.cn;open.ac.uk", "position": "PhD student;;Researcher;Researcher;Researcher;Assistant Professor;Full Professor", "bibtex": "@misc{\nzhang2023lifting,\ntitle={Lifting the Curse of Capacity Gap in Distilling Large Language Models},\nauthor={Chen Zhang and Yang Yang and Jiahao Liu and Jingang Wang and Wei Wu and Benyou Wang and Dawei Song},\nyear={2023},\nurl={https://openreview.net/forum?id=CMsuT6Cmfvs}\n}", "github": "", "project": "", "reviewers": "89th;yXUa;2V6h;dnTB;tQUP", "site": "https://openreview.net/forum?id=CMsuT6Cmfvs", "pdf_size": 491943, "recommendation": "3;3;5;5;5", "confidence": "5;2;3;4;3", "correctness": "2;3;3;2;4", "technical_novelty": "2;2;3;3;2", "empirical_novelty": "2;2;2;3;2", "wc_summary_paper": "80;76;138;114;226", "wc_strength_and_weaknesses": "185;222;264;148;223", "wc_clarity_quality_novelty_and_reproducibility": "176;73;44;308;47", "wc_summary_review": "63;10;44;73;88", "wc_review": "504;381;490;643;584", "wc_reply_reviewers": "0;0;118;0;0", "wc_reply_authors": "723;727;733;614;632", "reply_reviewers": "0;0;1;0;0", "reply_authors": "2;2;2;2;2", "recommendation_avg": [ 4.2, 0.9797958971132712 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "correctness_avg": [ 2.8, 0.7483314773547882 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_paper_avg": [ 126.8, 54.590841722765184 ], "wc_strength_and_weaknesses_avg": [ 208.4, 39.20510170883376 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 129.6, 101.32442943337999 ], "wc_summary_review_avg": [ 55.6, 26.911707489492375 ], "wc_review_avg": [ 520.4, 89.10802433002316 ], "wc_reply_reviewers_avg": [ 23.6, 47.2 ], "wc_reply_authors_avg": [ 685.8, 51.68907041145159 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.08006407690254354, "corr_recommendation_correctness": 0.3273268353539886, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:W1Dyv1thw6kJ:scholar.google.com/&scioq=Lifting+the+Curse+of+Capacity+Gap+in+Distilling+Large+Language+Models&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1;2;3;4", "aff_unique_norm": "Beijing Institute of Technology;Meituan;Ant Research;Chinese University of Hong Kong;Open University", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.bit.edu.cn/;https://www.meituan.com;https://www.antgroup.com;https://www.cuhk.edu.cn;https://www.open.ac.uk", "aff_unique_abbr": "BIT;Meituan;Ant Research;CUHK;OU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "BALTO: fast tensor program optimization with diversity-based active learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11147", "id": "CN223OXgyb5", "poster": "", "openreview": "https://openreview.net/forum?id=CN223OXgyb5", "slides": "https://iclr.cc/virtual/2023/poster/11147", "video": "https://iclr.cc/virtual/2023/poster/11147", "author_site": "Jun Bi, Xiaqing Li, Qi Guo, Rui Zhang, Yuanbo Wen, Xing Hu, Zidong Du, Xinkai Song, Yifan Hao, Yunji Chen", "tldr": "", "abstract": "Tensor program optimization (TPO) based on pre-trained models can effectively reduce the computing time of deep neural networks. However, training of such models is prohibitively expensive, which highly depends on a large-scale dataset and thus requires tremendous time-consuming performance measurements (more than 1 million) on target platforms. In this paper, we propose BALTO, a fast TPO approach with biased-diversity-based active learning, aiming at reducing much lower training costs under similar optimization accuracy.The key insight is that random sampling of existing approaches suffers from a heavy redundancy of low-performance programs, which incurs tremendous duplicated time-consuming measurements. Inspired by this, BALTO removes such redundancy by introducing active learning (AL) to TPO for a much lower training cost. However, applying AL with a brute-force way in BALTO can lead to an overestimation problem. To address this, we further propose a biased-diversity-based diversity scheme specially designed for BALTO. We compare BALTO against TenSet on $6$ typical hardware platforms over $2$ learning models. Experimental results show that, on average, BALTO only requires 5% of the total performance measurements of TenSet to achieve the same or higher model accuracy. Moreover, the optimized tensor programs even outperform that of TenSet by 1.06% due to higher model accuracy.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Jun Bi;Xiaqing Li;Qi Guo;Rui Zhang;Yuanbo Wen;Xing Hu;Zidong Du;Xinkai Song;Yifan Hao;Yunji Chen", "authorids": "~Jun_Bi2;~Xiaqing_Li1;~Qi_Guo4;~Rui_Zhang1;~Yuanbo_Wen1;~Xing_Hu3;~Zidong_Du1;~Xinkai_Song1;~Yifan_Hao1;~Yunji_Chen1", "gender": ";M;M;F;M;F;;;;M", "homepage": "https://github.com/bijun1;https://sciprofiles.com/profile/1301031;http://novel.ict.ac.cn/qguo;;;;https://zidongdu.github.io/;;;", "dblp": ";;67/398-1;60/2536-40;262/3144;49/10052-1;44/11216;;;48/474", "google_scholar": ";;;dse6jAsAAAAJ;;Hc3iRxUAAAAJ;https://scholar.google.com.sg/citations?user=8N9ym9YAAAAJ;;;", "orcid": ";;;;0000-0002-7775-2724;;0000-0002-7603-4210;;;", "linkedin": ";;;;;;;;;", "or_profile": "~Jun_Bi2;~Xiaqing_Li1;~Qi_Guo4;~Rui_Zhang1;~Yuanbo_Wen1;~Xing_Hu3;~Zidong_Du1;~Xinkai_Song1;~Yifan_Hao1;~Yunji_Chen1", "aff": "University of Science and Technology of China;Institute of computing, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, CAS;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;;;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ustc.edu.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;;;ict.ac.cn", "position": "PhD student;Assistant Professor;Full Professor;Assistant Professor;Postdoc;Associate Professor;Full Professor;;;Full Professor", "bibtex": "@inproceedings{\nbi2023balto,\ntitle={{BALTO}: fast tensor program optimization with diversity-based active learning},\nauthor={Jun Bi and Xiaqing Li and Qi Guo and Rui Zhang and Yuanbo Wen and Xing Hu and Zidong Du and Xinkai Song and Yifan Hao and Yunji Chen},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CN223OXgyb5}\n}", "github": "", "project": "", "reviewers": "7kGb;nyuX;UWiq;3pkd", "pdf_size": 861987, "recommendation": "5;6;6;8", "confidence": "4;4;4;4", "correctness": "2;3;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "0;3;2;4", "wc_summary_paper": "60;151;39;67", "wc_strength_and_weaknesses": "206;452;133;58", "wc_clarity_quality_novelty_and_reproducibility": "71;119;18;32", "wc_summary_review": "62;99;25;191", "wc_review": "399;821;215;348", "wc_reply_reviewers": "0;0;18;43", "wc_reply_authors": "860;647;833;690", "reply_reviewers": "0;0;1;1", "reply_authors": "2;1;2;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 1.479019945774904 ], "wc_summary_paper_avg": [ 79.25, 42.68708821177664 ], "wc_strength_and_weaknesses_avg": [ 212.25, 147.98036187278365 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.0, 39.2109678533953 ], "wc_summary_review_avg": [ 94.25, 61.68214895737664 ], "wc_review_avg": [ 445.75, 226.8252355889882 ], "wc_reply_reviewers_avg": [ 15.25, 17.62632973707232 ], "wc_reply_authors_avg": [ 757.5, 90.79234549233762 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.6622661785325219, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4027737220602672708&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=CN223OXgyb5", "email": "ustc.edu.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;;;ict.ac.cn", "author_num": 10, "aff_unique_index": "0;1;1;1;1;1;1;1", "aff_unique_norm": "University of Science and Technology of China;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Computing", "aff_unique_url": "http://www.ustc.edu.cn;http://www.cas.cn", "aff_unique_abbr": "USTC;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Hungry Hungry Hippos: Towards Language Modeling with State Space Models", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11738", "id": "COZDy0WYGg", "poster": "", "openreview": "https://openreview.net/forum?id=COZDy0WYGg", "slides": "https://iclr.cc/virtual/2023/poster/11738", "video": "https://iclr.cc/virtual/2023/poster/11738", "author_site": "Dan Fu, Tri Dao, Khaled Saab, Armin Thomas, Atri Rudra, Christopher Re", "tldr": "We study the expressivity gap between state space models (SSMs) and attention on language modeling and reduce the hardware barrier between SSMs and attention.", "abstract": "State space models (SSMs) have demonstrated state-of-the-art sequence modeling performance in some modalities, but underperform attention in language modeling. Moreover, despite scaling nearly linearly in sequence length instead of quadratically, SSMs are still slower than Transformers due to poor hardware utilization. In this paper, we make progress on understanding the expressivity gap between SSMs and attention in language modeling, and on reducing the hardware barrier between SSMs and attention. First, we use synthetic language modeling tasks to understand the gap between SSMs and attention. We find that existing SSMs struggle with two capabilities: recalling earlier tokens in the sequence and comparing tokens across the sequence. To understand the impact on language modeling, we propose a new SSM layer, H3, that is explicitly designed for these abilities. H3 matches attention on the synthetic languages and comes within 0.4 PPL of Transformers on OpenWebText. Furthermore, a hybrid 125M-parameter H3-attention model that retains two attention layers surprisingly outperforms Transformers on OpenWebText by 1.0 PPL. Next, to improve the efficiency of training SSMs on modern hardware, we propose FlashConv. FlashConv uses a fused block FFT algorithm to improve efficiency on sequences up to 8K, and introduces a novel state passing algorithm that exploits the recurrent properties of SSMs to scale to longer sequences. FlashConv yields 2$\\times$ speedup on the long-range arena benchmark and allows hybrid language models to generate text 2.4$\\times$ faster than Transformers. Using FlashConv, we scale hybrid H3-attention language models up to 2.7B parameters on the Pile and find promising initial results, achieving lower perplexity than Transformers and outperforming Transformers in zero- and few-shot learning on a majority of tasks in the SuperGLUE benchmark.", "keywords": "language modeling;state space models;efficiency", "primary_area": "", "supplementary_material": "/attachment/4c012ef368f3eaa6ff0ecd823df3989e121a78f0.zip", "author": "Daniel Y Fu;Tri Dao;Khaled Kamal Saab;Armin W Thomas;Atri Rudra;Christopher Re", "authorids": "~Daniel_Y_Fu1;~Tri_Dao1;~Khaled_Kamal_Saab1;~Armin_W_Thomas1;~Atri_Rudra1;~Christopher_Re1", "gender": ";;;Non-Binary;M;", "homepage": ";https://tridao.me/;https://web.stanford.edu/~ksaab/;;http://www.cse.buffalo.edu/faculty/atri/;", "dblp": ";206/7018;176/4061;228/8292;04/4980;", "google_scholar": ";NQRw0bQAAAAJ;W77CiNUAAAAJ;awtZJwkAAAAJ;https://scholar.google.com.tw/citations?user=_e5H8IoAAAAJ;", "orcid": ";;0000-0003-1427-0469;0000-0002-9947-5705;;", "linkedin": ";;khaled-saab-181034122/;;;", "or_profile": "~Daniel_Y_Fu1;~Tri_Dao1;~Khaled_Kamal_Saab1;~Armin_W_Thomas1;~Atri_Rudra1;~Christopher_Re1", "aff": ";Stanford University;Stanford University;Stanford University;State University of New York, Buffalo;", "aff_domain": ";stanford.edu;stanford.edu;stanford.edu;buffalo.edu;", "position": ";PhD student;PhD student;Postdoc;Professor;", "bibtex": "@inproceedings{\nfu2023hungry,\ntitle={Hungry Hungry Hippos: Towards Language Modeling with State Space Models},\nauthor={Daniel Y Fu and Tri Dao and Khaled Kamal Saab and Armin W Thomas and Atri Rudra and Christopher Re},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=COZDy0WYGg}\n}", "github": "", "project": "", "reviewers": "EkLp;UjQC;PnYM", "pdf_size": 1663930, "recommendation": "6;6;8", "confidence": "4;4;3", "correctness": "3;3;3", "technical_novelty": "3;3;4", "empirical_novelty": "3;2;3", "wc_summary_paper": "74;132;200", "wc_strength_and_weaknesses": "264;71;229", "wc_clarity_quality_novelty_and_reproducibility": "10;156;34", "wc_summary_review": "30;18;36", "wc_review": "378;377;499", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "437;203;102", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 135.33333333333334, 51.49325737954005 ], "wc_strength_and_weaknesses_avg": [ 188.0, 83.9563378588339 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.66666666666667, 63.923565468629974 ], "wc_summary_review_avg": [ 28.0, 7.483314773547883 ], "wc_review_avg": [ 418.0, 57.27710420985567 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 247.33333333333334, 140.30997430293004 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.9999999999999998, "corr_recommendation_correctness": 0.0, "gs_citation": 521, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12122410297351643465&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=COZDy0WYGg", "email": ";stanford.edu;stanford.edu;stanford.edu;buffalo.edu;", "author_num": 6, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Stanford University;State University of New York at Buffalo", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.buffalo.edu", "aff_unique_abbr": "Stanford;SUNY Buffalo", "aff_campus_unique_index": "0;0;0;1", "aff_campus_unique": "Stanford;Buffalo", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "COrdS9G6TJ8", "title": "AQUILA: Communication Efficient Federated Learning with Adaptive Quantization of Lazily-Aggregated Gradients", "track": "main", "status": "Reject", "tldr": "", "abstract": "The development and deployment of federated learning (FL) have been bottlenecked by the heavy communication overheads of high-dimensional models between the distributed device nodes and the central server. To achieve better error-communication trade-offs, recent efforts have been made to either adaptively reduce the communication frequency by skipping unimportant updates, a.k.a. lazily-aggregated quantization (LAQ), or adjust the quantization bits for each communication. In this paper, we propose a unifying communication efficient framework for FL based on adaptive quantization of lazily-aggregated gradients (AQUILA), which adaptively adjusts two mutually-dependent factors, the communication frequency, and the quantization level, in a synergistic way. Specifically, we start from a careful investigation of the classical LAQ scheme and formulate AQUILA as an optimization problem where the optimal quantization level per communication is selected by minimizing the model deviation caused by update skipping. Meanwhile, we create a new lazy aggregation strategy to fit the novel quantization criterion better and thus keep the communication frequency at an appropriate level. The effectiveness and convergence of the proposed AQUILA framework are theoretically verified. The experimental results demonstrate that AQUILA can reduce around 60% of overall transmitted bits compared to existing methods while achieving the same level of model accuracy in a number of non-homogeneous FL scenarios, including Non-IID data distribution and heterogeneous model architecture. The proposed AQUILA is highly adaptive and compatible with existing FL settings.", "keywords": "Federated Learning;communication efficiency;adaptive quantization", "primary_area": "", "supplementary_material": "/attachment/35e15193d012ced770f63cfccc855321f7be4a16.zip", "author": "Zihao Zhao;Yuzhu Mao;Zhenpeng Shi;Muhammad Zeeshan;Yang Liu;Tian Lan;Wenbo Ding", "authorids": "~Zihao_Zhao1;~Yuzhu_Mao1;~Zhenpeng_Shi1;~Muhammad_Zeeshan2;~Yang_Liu59;~Tian_Lan4;~Wenbo_Ding1", "gender": "M;F;;M;F;M;M", "homepage": ";;;;;https://www2.seas.gwu.edu/~tlan/;http://ssr-group.net/", "dblp": ";;;;;;", "google_scholar": "825UyCgAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;;;JEieoFsAAAAJ;;xo2FkgIAAAAJ", "orcid": ";;;;;;", "linkedin": ";;https://cn.linkedin.com/in/zhenpeng-shi-848a09194?trk=people-guest_people_search-card;http://linkedin.com/in/muhammad-zeeshan-198553159;;;", "or_profile": "~Zihao_Zhao1;~Yuzhu_Mao1;~Zhenpeng_Shi1;~Muhammad_Zeeshan2;~Yang_Liu59;~Tian_Lan4;~Wenbo_Ding1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;George Washington University;Tsinghua Univeresity", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;gwu.edu;sz.tsinghua.edu.cn", "position": "MS student;MS student;MS student;PhD student;Associate Professor;Full Professor;Associate Professor", "bibtex": "@misc{\nzhao2023aquila,\ntitle={{AQUILA}: Communication Efficient Federated Learning with Adaptive Quantization of Lazily-Aggregated Gradients},\nauthor={Zihao Zhao and Yuzhu Mao and Zhenpeng Shi and Muhammad Zeeshan and Yang Liu and Tian Lan and Wenbo Ding},\nyear={2023},\nurl={https://openreview.net/forum?id=COrdS9G6TJ8}\n}", "github": "", "project": "", "reviewers": "NDFY;6iqd;ogm4;qabV", "site": "https://openreview.net/forum?id=COrdS9G6TJ8", "pdf_size": 3641351, "recommendation": "3;5;6;6", "confidence": "4;3;2;3", "correctness": "3;3;4;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "72;42;51;58", "wc_strength_and_weaknesses": "480;242;69;246", "wc_clarity_quality_novelty_and_reproducibility": "3;82;17;274", "wc_summary_review": "69;43;5;56", "wc_review": "624;409;142;634", "wc_reply_reviewers": "0;0;0;41", "wc_reply_authors": "856;353;128;641", "reply_reviewers": "0;0;0;1", "reply_authors": "2;1;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 55.75, 10.96300597464035 ], "wc_strength_and_weaknesses_avg": [ 259.25, 146.11532260512584 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 94.0, 108.11336642617323 ], "wc_summary_review_avg": [ 43.25, 23.920441049445557 ], "wc_review_avg": [ 452.25, 200.41004840077255 ], "wc_reply_reviewers_avg": [ 10.25, 17.75352077758099 ], "wc_reply_authors_avg": [ 494.5, 276.80724340233587 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.8660254037844386, "corr_recommendation_correctness": 0.8164965809277259, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4954507831936334945&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Tsinghua University;George Washington University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.gwu.edu", "aff_unique_abbr": "THU;GWU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "CPDtGLmXEfy", "title": "On Representation Learning Under Class Imbalance", "track": "main", "status": "Reject", "tldr": "We study foundational questions regarding representation learning under imbalanced data for a variety of model classes and across a wide range of domains ", "abstract": "Unlike carefully curated academic benchmarks, real-world datasets are often highly class-imbalanced, involving training and test sets which contain few examples from certain minority classes. While there is a common understanding that neural network generalization is negatively impacted by imbalance, the source of this problem and its resolution are unclear. Through extensive empirical investigation, we study foundational learning behaviors for various models such as neural networks, gradient-boosted decision trees, and SVMs across a range of domains and find that (1) contrary to conventional wisdom, re-balancing the training set to include a higher proportion of minority samples degrades performance on imbalanced test sets; (2) minority samples are hard to fit, yet algorithms which fit them, such as oversampling, do not improve generalization. Motivated by the observation that re-balancing class-imbalanced training data is ineffective, we show that several existing techniques for improving representation learning are effective in this setting: (3) self-supervised pre-training is insensitive to imbalance and can be used for feature learning before fine-tuning on labels; (4) Bayesian inference is effective because neural networks are especially underspecified under class imbalance; (5) flatness-seeking regularization pulls decision boundaries away from minority samples, especially when we seek minima that are particularly flat on the minority samples\u2019 loss.", "keywords": "Class Imbalance;Neural Networks;Representation Learning;Flatness;Self-Supervised Learning;Bayesian Learning", "primary_area": "", "supplementary_material": "", "author": "Ravid Shwartz-Ziv;Micah Goldblum;Yucen Lily Li;C. Bayan Bruss;Andrew Gordon Wilson", "authorids": "~Ravid_Shwartz-Ziv2;~Micah_Goldblum1;~Yucen_Lily_Li1;~C._Bayan_Bruss1;~Andrew_Gordon_Wilson1", "gender": ";;M;Not Specified;M", "homepage": ";https://yucenli.com;https://www.cbbruss.com;https://cims.nyu.edu/~andrewgw;https://www.ravid-shwartz-ziv.com/", "dblp": "241/7231;252/6123;;65/10453;", "google_scholar": "pGDKzuUAAAAJ;;ClqvGRQAAAAJ;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ;https://scholar.google.co.il/citations?user=SqsLFwMAAAAJ", "orcid": ";;;;", "linkedin": ";;bayan-bruss/;;", "or_profile": "~Micah_Goldblum1;~Yucen_Lily_Li1;~C._Bayan_Bruss1;~Andrew_Gordon_Wilson1;~ravid_ziv1", "aff": "New York University;New York University;Capital One;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;capitalone.com;nyu.edu;nyu.edu", "position": "Postdoc;PhD student;Director of Applied Research;Associate Professor;Postdoc", "bibtex": "@misc{\nshwartz-ziv2023on,\ntitle={On Representation Learning Under Class Imbalance},\nauthor={Ravid Shwartz-Ziv and Micah Goldblum and Yucen Lily Li and C. Bayan Bruss and Andrew Gordon Wilson},\nyear={2023},\nurl={https://openreview.net/forum?id=CPDtGLmXEfy}\n}", "github": "", "project": "", "reviewers": "4VFj;aUfE;Mds7;D5Wc", "site": "https://openreview.net/forum?id=CPDtGLmXEfy", "pdf_size": 1701430, "recommendation": "3;3;3;5", "confidence": "3;5;3;3", "correctness": "2;3;2;2", "technical_novelty": "1;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "79;67;87;100", "wc_strength_and_weaknesses": "62;753;110;215", "wc_clarity_quality_novelty_and_reproducibility": "28;109;14;157", "wc_summary_review": "46;84;31;36", "wc_review": "215;1013;242;508", "wc_reply_reviewers": "0;163;0;0", "wc_reply_authors": "0;606;235;410", "reply_reviewers": "0;1;0;0", "reply_authors": "0;1;1;1", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 83.25, 12.007809958522827 ], "wc_strength_and_weaknesses_avg": [ 285.0, 275.8069977357355 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 77.0, 58.72393038617221 ], "wc_summary_review_avg": [ 49.25, 20.777090749188154 ], "wc_review_avg": [ 494.5, 320.507800217093 ], "wc_reply_reviewers_avg": [ 40.75, 70.58107040843176 ], "wc_reply_authors_avg": [ 312.75, 223.2211627512051 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 0.75, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2972960759482878969&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "New York University;Capital One", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://www.capitalone.com", "aff_unique_abbr": "NYU;Capital One", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Proactive Multi-Camera Collaboration for 3D Human Pose Estimation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11935", "id": "CPIy9TWFYBG", "poster": "", "openreview": "https://openreview.net/forum?id=CPIy9TWFYBG", "slides": "https://iclr.cc/virtual/2023/poster/11935", "video": "https://iclr.cc/virtual/2023/poster/11935", "author_site": "Hai Ci, Mickel Liu, Xuehai Pan, Fangwei Zhong, Yizhou Wang", "tldr": "We propose a novel MARL framework to solve proactive multi-camrea collaborations for 3D HPE in human crowds", "abstract": "This paper presents a multi-agent reinforcement learning (MARL) scheme for proactive Multi-Camera Collaboration in 3D Human Pose Estimation in dynamic human crowds. Traditional fixed-viewpoint multi-camera solutions for human motion capture (MoCap) are limited in capture space and susceptible to dynamic occlusions. Active camera approaches proactively control camera poses to find optimal viewpoints for 3D reconstruction. However, current methods still face challenges with credit assignment and environment dynamics. To address these issues, our proposed method introduces a novel Collaborative Triangulation Contribution Reward (CTCR) that improves convergence and alleviates multi-agent credit assignment issues resulting from using 3D reconstruction accuracy as the shared reward. Additionally, we jointly train our model with multiple world dynamics learning tasks to better capture environment dynamics and encourage anticipatory behaviors for occlusion avoidance. We evaluate our proposed method in four photo-realistic UE4 environments to ensure validity and generalizability. Empirical results show that our method outperforms fixed and active baselines in various scenarios with different numbers of cameras and humans.", "keywords": "Multi-Cameras Collaboration;Multi-Agent Credit Assignment;Active Vision;Human Pose Estimation", "primary_area": "", "supplementary_material": "/attachment/47cb793a9cc61db98ca45772eef2966a35328215.zip", "author": "Hai Ci;Mickel Liu;Xuehai Pan;fangwei zhong;Yizhou Wang", "authorids": "~Hai_Ci1;~Mickel_Liu1;~Xuehai_Pan1;~fangwei_zhong1;~Yizhou_Wang1", "gender": "M;M;M;M;M", "homepage": ";https://mickel-liu.github.io/;https://github.com/XuehaiPan;https://fangweizhong.xyz/;https://cfcs.pku.edu.cn/wangyizhou/", "dblp": "227/4707;;;207/1900;71/3387-1", "google_scholar": "GMrjppAAAAAJ;2oog2ZcAAAAJ;VIwJg4gAAAAJ;ejDz1bYAAAAJ;831z_VcAAAAJ", "orcid": ";;;0000-0002-0428-4552;", "linkedin": ";;;;", "or_profile": "~Hai_Ci1;~Mickel_Liu1;~Xuehai_Pan1;~fangwei_zhong1;~Yizhou_Wang1", "aff": "National University of Singapore;Peking University;Peking University;Peking University;Peking University", "aff_domain": "nus.edu.sg;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "Postdoc;MS student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nci2023proactive,\ntitle={Proactive Multi-Camera Collaboration for 3D Human Pose Estimation},\nauthor={Hai Ci and Mickel Liu and Xuehai Pan and fangwei zhong and Yizhou Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CPIy9TWFYBG}\n}", "github": "", "project": "", "reviewers": "W1S5;eqPb;FtmL;XxGE", "pdf_size": 15703831, "recommendation": "6;6;6;8", "confidence": "3;4;3;2", "correctness": "3;3;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;0", "wc_summary_paper": "153;73;68;98", "wc_strength_and_weaknesses": "199;101;481;463", "wc_clarity_quality_novelty_and_reproducibility": "63;65;66;73", "wc_summary_review": "50;56;86;65", "wc_review": "465;295;701;699", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "795;537;2299;1543", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;4;3", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 98.0, 33.726843908080106 ], "wc_strength_and_weaknesses_avg": [ 311.0, 164.80898033784445 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.75, 3.766629793329841 ], "wc_summary_review_avg": [ 64.25, 13.645054048995188 ], "wc_review_avg": [ 540.0, 170.91810904640855 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1293.5, 688.1269868272861 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4383232465151555248&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=CPIy9TWFYBG", "email": "nus.edu.sg;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "National University of Singapore;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;http://www.pku.edu.cn", "aff_unique_abbr": "NUS;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "Singapore;China" }, { "title": "Win: Weight-Decay-Integrated Nesterov Acceleration for Adaptive Gradient Algorithms", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12120", "id": "CPdc77SQfQ5", "poster": "", "openreview": "https://openreview.net/forum?id=CPdc77SQfQ5", "slides": "https://iclr.cc/virtual/2023/poster/12120", "video": "https://iclr.cc/virtual/2023/poster/12120", "author_site": "Pan Zhou, Xingyu Xie, shuicheng YAN", "tldr": "We propose a new and general Weight-decay-Integrated Nesterov acceleration for adaptive algorithms to enhance their convergence speed, and also analyze their convergence justify their convergence superiority. ", "abstract": "Training deep networks on large-scale datasets is computationally challenging. In this work, we explore the problem of ``\\textit{how to accelerate adaptive gradient algorithms in a general manner}\", and aim to provide practical efficiency-boosting insights. To this end, we propose an effective and general {Weight-decay-Integrated Nesterov acceleration} (Win) to accelerate adaptive algorithms. Taking AdamW and Adam as examples, we minimize a dynamical loss per iteration which combines the vanilla training loss and a dynamic regularizer inspired by proximal point method (PPM) to improve the convexity of the problem. To introduce Nesterov-alike-acceleration into AdamW and Adam, we respectively use the first- and second-order Taylor approximations of vanilla loss to update the variable twice. In this way, we arrive at our Win acceleration for AdamW and Adam that uses a conservative step and a reckless step to update twice and then linearly combines these two updates for acceleration. Next, we extend Win acceleration to LAMB and SGD. Our transparent acceleration derivation could provide insights for other accelerated methods and their integration into adaptive algorithms. Besides, we prove the convergence of Win-accelerated adaptive algorithms and justify their convergence superiority over their non-accelerated counterparts by taking AdamW and Adam as examples. Experimental results testify to the faster convergence speed and superior performance of our Win-accelerated AdamW, Adam, LAMB and SGD over their non-accelerated counterparts on vision classification tasks and language modeling tasks with both CNN and Transformer backbones. We hope Win shall be a default acceleration option for popular optimizers in deep learning community to improve the training efficiency. Code will be released at \\url{https://github.com/sail-sg/win}.", "keywords": "Optimization acceleration in deep learning;network optimizers;deep learning optimizer;deep learning algorithm", "primary_area": "", "supplementary_material": "/attachment/8315eae99791a485116b3ad3b92bd7b664ab30c3.zip", "author": "Pan Zhou;Xingyu Xie;Shuicheng YAN", "authorids": "~Pan_Zhou3;~Xingyu_Xie1;~Shuicheng_YAN3", "gender": ";M;M", "homepage": ";;https://yanshuicheng.ai/", "dblp": ";174/9633;y/ShuichengYan", "google_scholar": ";BpFCmZMAAAAJ;https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Pan_Zhou3;~Xingyu_Xie1;~Shuicheng_YAN3", "aff": ";Peking University;sea Group", "aff_domain": ";pku.edu.cn;sea.com", "position": ";PhD student;Researcher", "bibtex": "@inproceedings{\nzhou2023win,\ntitle={Win: Weight-Decay-Integrated Nesterov Acceleration for Adaptive Gradient Algorithms},\nauthor={Pan Zhou and Xingyu Xie and Shuicheng YAN},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CPdc77SQfQ5}\n}", "github": "", "project": "", "reviewers": "FLq8;kZaA;hKYG", "pdf_size": 568785, "recommendation": "8;8;8", "confidence": "3;4;4", "correctness": "4;3;4", "technical_novelty": "3;4;3", "empirical_novelty": "3;4;3", "wc_summary_paper": "46;164;34", "wc_strength_and_weaknesses": "93;491;69", "wc_clarity_quality_novelty_and_reproducibility": "28;104;56", "wc_summary_review": "40;184;7", "wc_review": "207;943;166", "wc_reply_reviewers": "54;355;0", "wc_reply_authors": "1137;1248;827", "reply_reviewers": "1;2;0", "reply_authors": "2;3;2", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 81.33333333333333, 58.6590904198905 ], "wc_strength_and_weaknesses_avg": [ 217.66666666666666, 193.52404386937445 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.666666666666664, 31.38293945583952 ], "wc_summary_review_avg": [ 77.0, 76.8505042273634 ], "wc_review_avg": [ 438.6666666666667, 357.0101150138778 ], "wc_reply_reviewers_avg": [ 136.33333333333334, 156.184363991904 ], "wc_reply_authors_avg": [ 1070.6666666666667, 178.15785759326536 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14841882769720666150&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=CPdc77SQfQ5", "email": ";pku.edu.cn;sea.com", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Peking University;Sea Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;", "aff_unique_abbr": "Peking U;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "China;" }, { "id": "CPg5IRu9PL", "title": "Efficient Large-scale Transformer Training via Random and Layerwise Token Dropping", "track": "main", "status": "Reject", "tldr": "We present a novel random and layerwise token dropping method that can save up to 33.3% theoretical compute cost and 25.6% wall-clock time while achieving comparable accuracy as compared to the standard training procedure.", "abstract": "Large-scale transformer models have become the de-facto architectures for various machine learning applications, e.g., CV and NLP. \nHowever, those large models also introduce prohibitive training costs. \nTo mitigate this issue, we propose a novel random and layerwise token dropping method (\\OURS), which skips the computation of a subset of the input tokens at all middle layers.\nParticularly, \\OURS achieves considerable speedups and comparable accuracy as the standard training baseline. \nCompared to other token dropping methods, \\OURS does not require (1) any importance score-based metrics, (2) any special token treatment (e.g., \\texttt{[CLS]}), and (3) many layers in full sequence length training except the first and the last layers. \nBesides, a new \\layertoken learning rate schedule is proposed for pretraining problems that resolve the heavy tuning requirement for our proposed training mechanism. \nFinally, we demonstrate that \\OURS can be applied to broader applications, including \\gpt and \\bert pretraining as well as ViT and \\gpt finetuning tasks. \nOur results show that \\OURS can save about 33.3\\% theoretical compute cost and 25.6\\% wall-clock training time while achieving similar zero-shot evaluations on \\gptb as compared to baseline.", "keywords": "Efficient Training;Large-scale Transformers;Token Dropping;GPT;BERT;ViT", "primary_area": "", "supplementary_material": "/attachment/ae9bf67418f85331956bd56ee024014bdb888045.zip", "author": "Zhewei Yao;Xiaoxia Wu;Conglong Li;Connor Holmes;Minjia Zhang;Cheng Li;Yuxiong He", "authorids": "~Zhewei_Yao1;~Xiaoxia_Wu1;~Conglong_Li1;~Connor_Holmes1;~Minjia_Zhang1;~Cheng_Li10;~Yuxiong_He1", "gender": "M;F;;M;M;F;", "homepage": ";https://sites.google.com/view/xwu/home;;;https://minjiazhang.github.io/;https://chengli.netlify.app/;", "dblp": "195/2887;63/1016;158/7995;;58/9033;;https://dblp.org/pers/hd/h/He:Yuxiong", "google_scholar": "gpSeMjYAAAAJ;Ry0Bdt8AAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;da9Vl6QAAAAJ;SB3_eb0AAAAJ", "orcid": ";;;;0000-0002-8165-166X;;", "linkedin": ";;;;minjia-zhang-05857226/;;", "or_profile": "~Zhewei_Yao1;~Xiaoxia_Wu1;~Conglong_Li1;~Connor_Holmes1;~Minjia_Zhang1;~Cheng_Li10;~Yuxiong_He1", "aff": "Microsoft;Microsoft;Microsoft;Colorado School of Mines;Microsoft ;Microsoft;Microsoft", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;mines.edu;microsoft.com;microsoft.com;microsoft.com", "position": "Researcher;Researcher;Researcher;PhD student;Principle Researcher;Researcher;Researcher", "bibtex": "@misc{\nyao2023efficient,\ntitle={Efficient Large-scale Transformer Training via Random and Layerwise Token Dropping},\nauthor={Zhewei Yao and Xiaoxia Wu and Conglong Li and Connor Holmes and Minjia Zhang and Cheng Li and Yuxiong He},\nyear={2023},\nurl={https://openreview.net/forum?id=CPg5IRu9PL}\n}", "github": "", "project": "", "reviewers": "YEPB;kbJK;1ZdE;JdQ2", "site": "https://openreview.net/forum?id=CPg5IRu9PL", "pdf_size": 2284398, "recommendation": "3;5;5;6", "confidence": "4;4;4;4", "correctness": "2;3;3;4", "technical_novelty": "1;3;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "46;36;83;82", "wc_strength_and_weaknesses": "304;422;552;43", "wc_clarity_quality_novelty_and_reproducibility": "109;156;73;98", "wc_summary_review": "41;77;113;49", "wc_review": "500;691;821;272", "wc_reply_reviewers": "0;128;0;20", "wc_reply_authors": "768;660;1287;348", "reply_reviewers": "0;1;0;1", "reply_authors": "2;1;2;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 61.75, 21.05201890555868 ], "wc_strength_and_weaknesses_avg": [ 330.25, 187.61180000202546 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 109.0, 30.108138434649195 ], "wc_summary_review_avg": [ 70.0, 28.19574435974337 ], "wc_review_avg": [ 571.0, 206.96738873552036 ], "wc_reply_reviewers_avg": [ 37.0, 53.16954015223378 ], "wc_reply_authors_avg": [ 765.75, 338.1585242160842 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9733285267845754, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oParJcFkfz8J:scholar.google.com/&scioq=Efficient+Large-scale+Transformer+Training+via+Random+and+Layerwise+Token+Dropping&hl=en&as_sdt=0,15", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0;0;0", "aff_unique_norm": "Microsoft;Colorado School of Mines", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.mines.edu", "aff_unique_abbr": "Microsoft;CSM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Git Re-Basin: Merging Models modulo Permutation Symmetries", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10807", "id": "CQsmMYmlP5T", "poster": "", "openreview": "https://openreview.net/forum?id=CQsmMYmlP5T", "slides": "https://iclr.cc/virtual/2023/poster/10807", "video": "https://iclr.cc/virtual/2023/poster/10807", "author_site": "Samuel Ainsworth, Jonathan Hayase, Siddhartha Srinivasa", "tldr": "", "abstract": "The success of deep learning is due in large part to our ability to solve certain massive non-convex optimization problems with relative ease. Though non-convex optimization is NP-hard, simple algorithms -- often variants of stochastic gradient descent -- exhibit surprising effectiveness in fitting large neural networks in practice. We argue that neural network loss landscapes often contain (nearly) a single basin after accounting for all possible permutation symmetries of hidden units a la Entezari et al. 2021. We introduce three algorithms to permute the units of one model to bring them into alignment with a reference model in order to merge the two models in weight space. This transformation produces a functionally equivalent set of weights that lie in an approximately convex basin near the reference model. Experimentally, we demonstrate the single basin phenomenon across a variety of model architectures and datasets, including the first (to our knowledge) demonstration of zero-barrier linear mode connectivity between independently trained ResNet models on CIFAR-10. Additionally, we identify intriguing phenomena relating model width and training time to mode connectivity. Finally, we discuss shortcomings of the linear mode connectivity hypothesis, including a counterexample to the single basin theory.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Samuel Ainsworth;Jonathan Hayase;Siddhartha Srinivasa", "authorids": "~Samuel_Ainsworth1;~Jonathan_Hayase2;~Siddhartha_Srinivasa1", "gender": "M;M;M", "homepage": "https://samlikes.pizza/;https://jhayase.github.io/;https://goodrobot.ai", "dblp": "215/4385;244/9599;", "google_scholar": "YvtGXcwAAAAJ;Zw-l1d8AAAAJ;https://scholar.google.com.tw/citations?user=RCi98EAAAAAJ", "orcid": ";0000-0002-3757-6586;", "linkedin": "samuelainsworth/;jonathan-hayase-5ab849128;", "or_profile": "~Samuel_Ainsworth1;~Jonathan_Hayase2;~Siddhartha_Srinivasa1", "aff": "GM Cruise LLC;University of Washington;University of Washington", "aff_domain": "getcruise.com;washington.edu;washington.edu", "position": "Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nainsworth2023git,\ntitle={Git Re-Basin: Merging Models modulo Permutation Symmetries},\nauthor={Samuel Ainsworth and Jonathan Hayase and Siddhartha Srinivasa},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CQsmMYmlP5T}\n}", "github": "", "project": "", "reviewers": "qnqB;fSNF;Nc4c", "pdf_size": 1616003, "recommendation": "8;8;10", "confidence": "5;4;4", "correctness": "3;4;4", "technical_novelty": "3;3;4", "empirical_novelty": "3;3;4", "wc_summary_paper": "68;61;111", "wc_strength_and_weaknesses": "158;235;343", "wc_clarity_quality_novelty_and_reproducibility": "686;35;224", "wc_summary_review": "206;22;25", "wc_review": "1118;353;703", "wc_reply_reviewers": "170;0;0", "wc_reply_authors": "1065;375;620", "reply_reviewers": "1;0;0", "reply_authors": "3;2;2", "recommendation_avg": [ 8.666666666666666, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 80.0, 22.105806175452337 ], "wc_strength_and_weaknesses_avg": [ 245.33333333333334, 75.87855794330541 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 315.0, 273.44834978474455 ], "wc_summary_review_avg": [ 84.33333333333333, 86.04004235754937 ], "wc_review_avg": [ 724.6666666666666, 312.6855004988594 ], "wc_reply_reviewers_avg": [ 56.666666666666664, 80.13876853447539 ], "wc_reply_authors_avg": [ 686.6666666666666, 285.60851216695596 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 351, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8283864796943357473&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=CQsmMYmlP5T", "email": "getcruise.com;washington.edu;washington.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "GM Cruise LLC;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.gmcruise.com;https://www.washington.edu", "aff_unique_abbr": "GM Cruise;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "NORM: Knowledge Distillation via N-to-One Representation Matching", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11985", "id": "CRNwGauQpb6", "poster": "", "openreview": "https://openreview.net/forum?id=CRNwGauQpb6", "slides": "https://iclr.cc/virtual/2023/poster/11985", "video": "https://iclr.cc/virtual/2023/poster/11985", "author_site": "Xiaolong Liu, Lujun Li, Chao Li, Anbang Yao", "tldr": "This paper presents a new knowledge distillation method via n-to-one representation matching", "abstract": "Existing feature distillation methods commonly adopt the One-to-one Representation Matching between any pre-selected teacher-student layer pair. In this paper, we present $N$-to-$O$ne $R$epresentation $M$atching (NORM), a new two-stage knowledge distillation method, which relies on a simpleFeature Transform (FT) module consisting of two linear layers. In view of preserving the intact information learnt by the teacher network, during training, our FT module is merely inserted after the last convolutional layer of the student network. The first linear layer projects the student representation to a feature space having $N$ times feature channels than the teacher representation from the last convolutional layer, and the second linear layer contracts the expanded output back to the original feature space. By sequentially splitting the expanded student representation into $N$ non-overlapping feature segments having the same number of feature channels as the teacher's, they can be readily forced to approximate the intact teacher representation simultaneously, formulating a novel many-to-one representation matching mechanism conditioned on a single teacher-student layer pair. After training, such an FT module will be naturally merged into the subsequent fully connected layer thanks to its linear property, introducing no extra parameters or architectural modifications to the student network at inference. Extensive experiments on different visual recognition benchmarks demonstrate the leading performance of our method. For instance, the ResNet18|MobileNet|ResNet50-1/4 model trained by NORM reaches 72.14%|74.26%|68.03% top-1 accuracy on the ImageNet dataset when using a pre-trained ResNet34|ResNet50|ResNet50 model as the teacher, achieving an absolute improvement of 2.01%|4.63%|3.03% against the individually trained counterpart. Code is available at https://github.com/OSVAI/NORM.", "keywords": "Knowledge distillation;model compression;image classification", "primary_area": "", "supplementary_material": "/attachment/5c63b62448a3ba7053b34419c682d0461bf5790d.zip", "author": "Xiaolong Liu;LUKING LI;Chao Li;Anbang Yao", "authorids": "~Xiaolong_Liu2;~LUKING_LI1;~Chao_Li16;~Anbang_Yao1", "gender": "M;;M;", "homepage": ";;https://github.com/chaoli-ai/chaoli.github.io;https://yaoanbang.github.io/", "dblp": ";;;http://dblp.uni-trier.de/pers/hd/y/Yao:Anbang", "google_scholar": "hgFJj0MAAAAJ;;;b9hCmPYAAAAJ", "orcid": ";;;0000-0002-3878-8679", "linkedin": ";;;anbang-yao-1805b712a/", "or_profile": "~Xiaolong_Liu2;~LUKING_LI1;~Chao_Li16;~Anbang_Yao1", "aff": ";;Intel;Intel", "aff_domain": ";;intel.com;intel.com", "position": ";;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nliu2023norm,\ntitle={{NORM}: Knowledge Distillation via N-to-One Representation Matching},\nauthor={Xiaolong Liu and LUKING LI and Chao Li and Anbang Yao},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CRNwGauQpb6}\n}", "github": "", "project": "", "reviewers": "iHQs;xShr;d7Sn;Xp75", "pdf_size": 1163247, "recommendation": "6;6;6;8", "confidence": "4;4;4;4", "correctness": "3;3;3;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "92;68;65;144", "wc_strength_and_weaknesses": "358;560;144;208", "wc_clarity_quality_novelty_and_reproducibility": "130;23;32;61", "wc_summary_review": "32;58;61;39", "wc_review": "612;709;302;452", "wc_reply_reviewers": "85;872;63;308", "wc_reply_authors": "2395;4590;2801;2720", "reply_reviewers": "1;6;1;1", "reply_authors": "5;12;6;5", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 92.25, 31.657345119261027 ], "wc_strength_and_weaknesses_avg": [ 317.5, 160.10855692310764 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.5, 41.96724913548659 ], "wc_summary_review_avg": [ 47.5, 12.298373876248844 ], "wc_review_avg": [ 518.75, 155.1827551630657 ], "wc_reply_reviewers_avg": [ 332.0, 326.1694344968578 ], "wc_reply_authors_avg": [ 3126.5, 858.5040768685959 ], "reply_reviewers_avg": [ 2.25, 2.165063509461097 ], "reply_authors_avg": [ 7.0, 2.9154759474226504 ], "replies_avg": [ 43, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 84, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1834669473181539460&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=CRNwGauQpb6", "email": ";;intel.com;intel.com", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Intel", "aff_unique_dep": "Intel Corporation", "aff_unique_url": "https://www.intel.com", "aff_unique_abbr": "Intel", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Copy is All You Need", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11172", "id": "CROlOA9Nd8C", "poster": "/media/PosterPDFs/ICLR%202023/11172.png?t=1680840771.7591448", "openreview": "https://openreview.net/forum?id=CROlOA9Nd8C", "slides": "https://iclr.cc/virtual/2023/poster/11172", "video": "https://iclr.cc/virtual/2023/poster/11172", "author_site": "Tian Lan, Deng Cai, Yan Wang, Heyan Huang, Xian-Ling Mao", "tldr": "", "abstract": "The dominant text generation models compose the output by sequentially selecting words from a fixed vocabulary. In this paper, we formulate text generation as progressively copying text segments (e.g., words or phrases) from an existing text collection. We compute the contextualized representations of meaningful text segments and index them using efficient vector search toolkits. The task of text generation is then decomposed into a series of copy-and-paste operations: at each time step, we seek suitable text spans from the text collection rather than selecting from a standalone vocabulary. Experiments on the standard language modeling benchmark (WikiText-103) show that our approach achieves better generation quality according to both automatic and human evaluations. Besides, its inference efficiency is comparable to token-level autoregressive models thanks to the reduction of decoding steps. We also show that our approach allows for effective domain adaptation by simply switching to domain-specific text collection without extra training. Finally, we observe that our approach attains additional performance gains by simply scaling up to larger text collections, again without further training.\\footnote{Our source codes are publicly available at \\url{https://github.com/gmftbyGMFTBY/Copyisallyouneed}.}", "keywords": "neural text genertion", "primary_area": "", "supplementary_material": "", "author": "Tian Lan;Deng Cai;Yan Wang;Heyan Huang;Xian-Ling Mao", "authorids": "~Tian_Lan7;~Deng_Cai1;~Yan_Wang17;~Heyan_Huang1;~Xian-Ling_Mao1", "gender": "M;M;M;F;M", "homepage": "https://github.com/gmftbyGMFTBY;https://jcyk.github.io/;https://libertywing.github.io/yanwang.github.io/;https://cs.bit.edu.cn/szdw/jsml/js/hhy/index.htm;https://cs.bit.edu.cn/szdw/jsml/js/mxl/index.htm", "dblp": "31/83-3;c/DCai-2;59/2227-60;27/8686;46/9687.html", "google_scholar": "https://scholar.google.com/citations?hl=en;KpbRLYcAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;b2DzFF8AAAAJ", "orcid": "0000-0002-5200-1537;;;0000-0002-0320-7520;", "linkedin": "%E5%A4%A9-%E5%85%B0-13128318b/;;;;", "or_profile": "~Tian_Lan7;~Deng_Cai1;~Yan_Wang17;~Heyan_Huang1;~Xian-Ling_Mao1", "aff": "Beijing Institute of Technology;Tencent AI Lab;miHoYo;Beijing Institute of Technology;Beijing Institute of Technology", "aff_domain": "bit.edu.cn;tencent.com;mihoyo.com;bit.edu.cn;bit.edu.cn", "position": "PhD student;Research Scientist;Research Scientist;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nlan2023copy,\ntitle={Copy is All You Need},\nauthor={Tian Lan and Deng Cai and Yan Wang and Heyan Huang and Xian-Ling Mao},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CROlOA9Nd8C}\n}", "github": "", "project": "", "reviewers": "LpvV;8gPK;pM7y;siHy", "pdf_size": 1875725, "recommendation": "5;5;6;8", "confidence": "3;4;4;4", "correctness": "3;4;3;3", "technical_novelty": "3;3;3;4", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "91;63;98;380", "wc_strength_and_weaknesses": "389;319;314;186", "wc_clarity_quality_novelty_and_reproducibility": "7;38;302;220", "wc_summary_review": "34;32;112;113", "wc_review": "521;452;826;899", "wc_reply_reviewers": "0;0;39;0", "wc_reply_authors": "447;666;969;728", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 158.0, 128.8390468763255 ], "wc_strength_and_weaknesses_avg": [ 302.0, 73.24274708119569 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 141.75, 123.21196167580484 ], "wc_summary_review_avg": [ 72.75, 39.757860857948586 ], "wc_review_avg": [ 674.5, 191.32498529988183 ], "wc_reply_reviewers_avg": [ 9.75, 16.887495373796554 ], "wc_reply_authors_avg": [ 702.5, 185.93345583837245 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.4714045207910316, "corr_recommendation_correctness": -0.4714045207910316, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2240115286471241777&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=CROlOA9Nd8C", "email": "bit.edu.cn;tencent.com;mihoyo.com;bit.edu.cn;bit.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Beijing Institute of Technology;Tencent;miHoYo", "aff_unique_dep": ";Tencent AI Lab;", "aff_unique_url": "http://www.bit.edu.cn/;https://ai.tencent.com;https://www.mihoyo.com", "aff_unique_abbr": "BIT;Tencent AI Lab;miHoYo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "CRhzJqLhnwU", "title": "Federated Learning for Inference at Anytime and Anywhere", "track": "main", "status": "Reject", "tldr": "", "abstract": "Federated learning has been predominantly concerned with collaborative training of deep networks from scratch, and especially the many challenges that arise, such as communication cost, robustness to heterogeneous data, and support for diverse device capabilities. However, there is no unified framework that addresses all these problems together. This paper studies the challenges and opportunities of exploiting pre-trained Transformer models in FL. In particular, we propose to efficiently adapt such pre-trained models by injecting a novel attention-based adapter module at each transformer block that both modulates the forward pass and makes an early prediction. Training only the lightweight adapter by FL leads to fast and communication-efficient learning even in the presence of heterogeneous data and devices. Extensive experiments on standard FL benchmarks, including CIFAR- 100, FEMNIST and SpeechCommandsv2 demonstrate that this simple framework provides fast and accurate FL while supporting heterogenous device capabilities, efficient personalization, and scalable-cost anytime inference.", "keywords": "Federated Learning", "primary_area": "", "supplementary_material": "", "author": "Zicheng Liu;Da Li;Javier Fernandez-Marques;Stefanos Laskaridis;Yan Gao;\u0141ukasz Dudziak;Stan Z. Li;Shell Xu Hu;Timothy Hospedales", "authorids": "~Zicheng_Liu2;~Da_Li3;~Javier_Fernandez-Marques1;~Stefanos_Laskaridis1;~Yan_Gao4;~\u0141ukasz_Dudziak1;~Stan_Z._Li2;~Shell_Xu_Hu1;~Timothy_Hospedales1", "gender": "M;M;;M;M;M;M;M;M", "homepage": ";https://dali-dl.github.io/;https://stefanos.cc;https://www.cst.cam.ac.uk/people/yg381;;http://homepages.inf.ed.ac.uk/thospeda/;;http://hushell.github.io/;https://en.westlake.edu.cn/academics/School_of_Engineering/About/Our_People/Faculty/201912/t20191206_2497.shtml", "dblp": "l/ZichengLiu-6;43/4804-1;241/6273;;228/7987;32/3545;171/7908;;l/StanZLi", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;RPvaE3oAAAAJ;https://scholar.google.co.uk/citations?user=TcVC--IAAAAJ;https://scholar.google.com/citations?hl=en;R47NvpoAAAAJ;https://scholar.google.fr/citations?user=nHhtvqkAAAAJ;Htu1YhIAAAAJ;https://scholar.google.fr/citations?user=jU7nGnEAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0002-2101-2989;;;;0000-0003-4867-7486;;;", "linkedin": ";;stevelaskaridis/;;;timothyhospedales/;jafermarq/;;stan-z-li-%E6%9D%8E%E5%AD%90%E9%9D%92-55753224/", "or_profile": "~Zicheng_Liu2;~Da_Li3;~Stefanos_Laskaridis1;~Yan_Gao4;~\u0141ukasz_Dudziak1;~Timothy_Hospedales1;~Javier_Fern\u00e1ndez_Marqu\u00e9s1;~Xu_Shell_Hu1;~Stan_Z._Li1", "aff": "Zhejiang University;University of Edinburgh;Samsung AI Center Cambridge;University of Cambridge;Samsung;Samsung AI Research Centre;Samsung AI;Samsung;Westlake University", "aff_domain": "zju.edu.cn;ed.ac.uk;samsung.com;cam.ac.uk;samsung.com;samsung.com;samsung.com;samsung.com;westlake.edu.cn", "position": "PhD student;Visiting Scholar;Researcher;PhD student;Software Engineer;Principal Researcher;Researcher;Researcher;Chair Professor", "bibtex": "@misc{\nliu2023federated,\ntitle={Federated Learning for Inference at Anytime and Anywhere},\nauthor={Zicheng Liu and Da Li and Javier Fernandez-Marques and Stefanos Laskaridis and Yan Gao and {\\L}ukasz Dudziak and Stan Z. Li and Shell Xu Hu and Timothy Hospedales},\nyear={2023},\nurl={https://openreview.net/forum?id=CRhzJqLhnwU}\n}", "github": "", "project": "", "reviewers": "eyXk;Fwvb;pRUk;sDyc", "site": "https://openreview.net/forum?id=CRhzJqLhnwU", "pdf_size": 376387, "recommendation": "5;5;6;6", "confidence": "3;5;3;4", "correctness": "3;2;2;3", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "70;36;92;68", "wc_strength_and_weaknesses": "128;431;570;223", "wc_clarity_quality_novelty_and_reproducibility": "23;32;41;7", "wc_summary_review": "37;53;35;36", "wc_review": "258;552;738;334", "wc_reply_reviewers": "0;34;98;0", "wc_reply_authors": "304;724;738;355", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 66.5, 19.96872554771586 ], "wc_strength_and_weaknesses_avg": [ 338.0, 173.059238412747 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.75, 12.55736835487436 ], "wc_summary_review_avg": [ 40.25, 7.39509972887452 ], "wc_review_avg": [ 470.5, 188.40581201226252 ], "wc_reply_reviewers_avg": [ 33.0, 40.01249804748511 ], "wc_reply_authors_avg": [ 530.25, 201.61891652322706 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.30151134457776363, "corr_recommendation_correctness": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12299173207728054134&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3;2;2;2;2;4", "aff_unique_norm": "Zhejiang University;University of Edinburgh;Samsung;University of Cambridge;Westlake University", "aff_unique_dep": ";;AI Center;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.ed.ac.uk;https://www.samsung.com/global/innovation/ai-research/;https://www.cam.ac.uk;https://www.westlake.edu.cn", "aff_unique_abbr": "ZJU;Edinburgh;SAC;Cambridge;WU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;1;1;2;2;2;2;0", "aff_country_unique": "China;United Kingdom;South Korea" }, { "id": "CS7dB_FnGx", "title": "Incremental Unified Parameter Additional Tuning with Basic Memory Replaying", "track": "main", "status": "Withdraw", "tldr": "We propose a novel method for class incremental learning by tuning an unified additional parameter structure and replaying basic memory.", "abstract": "Class incremental learning (CIL) aims to develop an open intelligence system that can continuously learn new concepts from new tasks while retaining the knowledge to distinguish between new and old concepts. Recently, parameter-additional-tuning methods (PAT) have successfully alleviated catastrophic forgetting by starting from a well-pre-trained model and only allowing a few additional parameters to be trained. However, the contradiction between stability and plasticity and the lack of inter-task features still challenge PAT-based CIL methods. To address these, we propose Unified PAT and basic memory replaying (BMR). On the one hand, unified PAT transfer the model to sequential arrival downstream tasks based on a fixed pre-trained vision transformer by unifying the prompt-based and the adapter-based methods, offering more diversified plastic structures to efficiently capture more useful features without large-scale parameters. On the other hand, BMR synthesizes on-call virtual old samples with a fixed-size basic memory to create a global task that covers up all the sub-tasks, which makes inter-task features more learnable without a large memory budget. Abundant experiments prove the effectiveness of our method.", "keywords": "class incremental learning;parameter-additional-tuning;basic memory replaying", "primary_area": "", "supplementary_material": "", "author": "Jieren Deng;Jianhua Hu;Haojian Zhang;Yunkuan Wang", "authorids": "~Jieren_Deng2;~Jianhua_Hu2;~Haojian_Zhang1;~Yunkuan_Wang1", "gender": "M;M;M;M", "homepage": "https://jarintotiondin.github.io/;http://www.ia.cas.cn/sourcedb_ia_cas/cn/iaexpert/202008/t20200805_5649727.html;;https://people.ucas.ac.cn/~wangyunkuan", "dblp": "274/1449;53/2957;;43/2005", "google_scholar": "mC2TOZ4AAAAJ;;;", "orcid": "0000-0002-5738-0927;;0000-0001-8447-0269;", "linkedin": ";;;", "or_profile": "~Jieren_Deng2;~Jianhua_Hu2;~Haojian_Zhang1;~Yunkuan_Wang1", "aff": "Chinese academy of science;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn", "position": "PhD student;Associate Professor;Associate Professor;Full Professor", "bibtex": "@misc{\ndeng2023incremental,\ntitle={Incremental Unified Parameter Additional Tuning with Basic Memory Replaying},\nauthor={Jieren Deng and Jianhua Hu and Haojian Zhang and Yunkuan Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=CS7dB_FnGx}\n}", "github": "", "project": "", "reviewers": "siG9;iGem;BJT6;5yQc", "site": "https://openreview.net/forum?id=CS7dB_FnGx", "pdf_size": 957826, "recommendation": "3;3;3;3", "confidence": "4;4;5;4", "correctness": "2;4;2;2", "technical_novelty": "2;2;2;1", "empirical_novelty": "2;2;2;0", "wc_summary_paper": "53;82;39;41", "wc_strength_and_weaknesses": "207;295;232;189", "wc_clarity_quality_novelty_and_reproducibility": "48;96;17;28", "wc_summary_review": "30;47;38;17", "wc_review": "338;520;326;275", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 53.75, 17.166464400102893 ], "wc_strength_and_weaknesses_avg": [ 230.75, 40.114679358060435 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.25, 30.26032881513352 ], "wc_summary_review_avg": [ 33.0, 11.022703842524301 ], "wc_review_avg": [ 364.75, 92.70214398815165 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ooz21NbKNmsJ:scholar.google.com/&scioq=Incremental+Unified+Parameter+Additional+Tuning+with+Basic+Memory+Replaying&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "", "aff_unique_url": "http://www.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "CTX5JcDaUX9", "title": "Prefer to Classify: Improving Text Classifier via Pair-wise Preference Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "The development of largely human-annotated benchmarks has driven the success of deep neural networks in various NLP tasks. These benchmarks are collected by aggregating decisions made by different annotators on the target task. Aggregating the annotated decisions via majority is still used as a common practice, despite its inevitable limitation from simple aggregation. In this paper, we establish a novel classification framework, based on task-specific human preference between a pair of samples, which provides an informative training signal to capture fine-grained and complementary task information through pair-wise comparison. Hence, it improves the existing instance-wise annotation system by enabling better task modeling from learning the relation between samples. Specifically, we propose a new multi-task learning framework, called prefer-to-classify (P2C), to effectively learn human preferences in addition to the given classification task.\nWe collect human preference signals in two ways: (1) extracting relative preferences implicitly from annotation records (for free) or (2) collecting subjective preferences explicitly from (paid) crowd workers. In various text classification tasks, we demonstrate that both extractive and subjective preferences are effective in improving the classifier with our preference learning framework. Interestingly, we found that subjective preference shows more significant improvements than extractive preference, revealing the effectiveness of explicit modeling of human preferences. Our code and preference dataset will be publicly available upon acceptance.", "keywords": "NLP;text classification;annotation;disagreement;preference", "primary_area": "", "supplementary_material": "/attachment/d0c28385b5b25f650b60fba375e6aca11f92441b.zip", "author": "Jaehyung Kim;Jinwoo Shin;Dongyeop Kang", "authorids": "~Jaehyung_Kim1;~Jinwoo_Shin1;~Dongyeop_Kang2", "gender": "M;M;M", "homepage": "https://sites.google.com/view/jaehyungkim;https://sites.google.com/site/mijirim/;https://dykang.github.io/", "dblp": "02/7206-1;31/7062;69/9056", "google_scholar": "https://scholar.google.co.kr/citations?user=6OYOsGsAAAAJ;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ;https://scholar.google.co.kr/citations?user=fMKZOjwAAAAJ", "orcid": ";;0000-0002-9021-1789", "linkedin": ";;dongyeop-kang-30ba0611/", "or_profile": "~Jaehyung_Kim1;~Jinwoo_Shin1;~dongyeop_kang1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;University of Minnesota", "aff_domain": "kaist.ac.kr;kaist.ac.kr;umn.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@misc{\nkim2023prefer,\ntitle={Prefer to Classify: Improving Text Classifier via Pair-wise Preference Learning},\nauthor={Jaehyung Kim and Jinwoo Shin and Dongyeop Kang},\nyear={2023},\nurl={https://openreview.net/forum?id=CTX5JcDaUX9}\n}", "github": "", "project": "", "reviewers": "M4mz;cBxH;TEZ4", "site": "https://openreview.net/forum?id=CTX5JcDaUX9", "pdf_size": 1096206, "recommendation": "3;5;8", "confidence": "4;3;3", "correctness": "3;3;4", "technical_novelty": "2;2;2", "empirical_novelty": "2;0;3", "wc_summary_paper": "74;77;29", "wc_strength_and_weaknesses": "196;216;119", "wc_clarity_quality_novelty_and_reproducibility": "92;141;8", "wc_summary_review": "52;37;42", "wc_review": "414;471;198", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 60.0, 21.95449840010015 ], "wc_strength_and_weaknesses_avg": [ 177.0, 41.817061908587824 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 80.33333333333333, 54.92014404771916 ], "wc_summary_review_avg": [ 43.666666666666664, 6.236095644623236 ], "wc_review_avg": [ 361.0, 117.58401251870936 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.8029550685469661, "corr_recommendation_correctness": 0.9176629354822472, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Jw8F7osJV3kJ:scholar.google.com/&scioq=Prefer+to+Classify:+Improving+Text+Classifier+via+Pair-wise+Preference+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;University of Minnesota", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.minnesota.edu", "aff_unique_abbr": "KAIST;UMN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "South Korea;United States" }, { "id": "CU8BwVAzLme", "title": "Pathfinding Neural Cellular Automata", "track": "main", "status": "Withdraw", "tldr": "We show the algorithmic alignment of Neural Cellular Automata with pathfinding problems using hand-coded networks and learned models", "abstract": "Pathfinding makes up an important sub-component of a broad range of complex tasks in AI, such as robot path planning, transport routing, and game playing. While classical algorithms can efficiently compute shortest paths, neural networks could be better suited to adapting these sub-routines to more complex and intractable tasks. As a step toward developing such networks, we hand-code and learn models for Breadth-First Search (BFS), i.e. shortest path finding, using the unified architectural framework of Neural Cellular Automata, which are iterative neural networks with equal-size inputs and outputs. Similarly, we present a neural implementation of Depth-First Search (DFS), and outline how it can be combined with neural BFS to produce an NCA for computing diameter of a graph. We experiment with architectural modifications inspired by these hand-coded NCAs, training networks from scratch to solve the diameter problem on grid mazes while exhibiting strong generalization ability. Finally, we introduce a scheme in which data points are mutated adversarially during training. We find that adversarially evolving mazes leads to increased generalization on out-of-distribution examples, while at the same time generating data-sets with significantly more complex solutions for reasoning tasks.", "keywords": "pathfinding;neural cellular automata;graph neural networks;algorithmic alignment", "primary_area": "", "supplementary_material": "/attachment/5cf53811205187c120af4b0875e1edf4242e68c0.zip", "author": "Sam Earle;Ozlem Yildiz;Julian Togelius;Chinmay Hegde", "authorids": "~Sam_Earle1;~Ozlem_Yildiz1;~Julian_Togelius1;~Chinmay_Hegde1", "gender": "M;F;M;M", "homepage": ";;http://julian.togelius.com;https://chinmayhegde.github.io/", "dblp": ";;47/767;39/2056", "google_scholar": "pilikrUAAAAJ;I93NXVwAAAAJ;lr4I9BwAAAAJ;eJAV17IAAAAJ", "orcid": ";;0000-0003-3128-4598;", "linkedin": "sam-earle-43a0701ab;ozlem-yildiz-95a47bb5/;togelius/;", "or_profile": "~Sam_Earle1;~Ozlem_Yildiz1;~Julian_Togelius1;~Chinmay_Hegde1", "aff": "New York University;New York University;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu;nyu.edu", "position": "PhD student;PhD student;Associate Professor;Associate Professor", "bibtex": "@misc{\nearle2023pathfinding,\ntitle={Pathfinding Neural Cellular Automata},\nauthor={Sam Earle and Ozlem Yildiz and Julian Togelius and Chinmay Hegde},\nyear={2023},\nurl={https://openreview.net/forum?id=CU8BwVAzLme}\n}", "github": "", "project": "", "reviewers": "62BN;rbmf;zqfK;8BuJ", "site": "https://openreview.net/forum?id=CU8BwVAzLme", "pdf_size": 336951, "recommendation": "3;3;3;6", "confidence": "2;4;2;3", "correctness": "2;3;4;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "96;754;66;64", "wc_strength_and_weaknesses": "189;444;138;159", "wc_clarity_quality_novelty_and_reproducibility": "101;286;138;42", "wc_summary_review": "49;23;86;86", "wc_review": "435;1507;428;351", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 245.0, 294.144522301538 ], "wc_strength_and_weaknesses_avg": [ 232.5, 123.44735720135931 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 141.75, 90.04547462254835 ], "wc_summary_review_avg": [ 61.0, 26.636441203734407 ], "wc_review_avg": [ 680.25, 478.4607481288303 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.17407765595569782, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11323068235114448702&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Hierarchical Sliced Wasserstein Distance", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11704", "id": "CUOaVn6mYEj", "poster": "/media/PosterPDFs/ICLR%202023/11704.png?t=1680749007.9131477", "openreview": "https://openreview.net/forum?id=CUOaVn6mYEj", "slides": "https://iclr.cc/virtual/2023/poster/11704", "video": "https://iclr.cc/virtual/2023/poster/11704", "author_site": "Khai Nguyen, Tongzheng Ren, Huy Nguyen, Litu Rout, TAN NGUYEN, Nhat Ho", "tldr": "The paper proposes hierarchical sliced Wasserstein distance which is faster than the conventional sliced Wasserstein distance.", "abstract": "Sliced Wasserstein (SW) distance has been widely used in different application scenarios since it can be scaled to a large number of supports without suffering from the curse of dimensionality. The value of sliced Wasserstein distance is the average of transportation cost between one-dimensional representations (projections) of original measures that are obtained by Radon Transform (RT). Despite its efficiency in the number of supports, estimating the sliced Wasserstein requires a relatively large number of projections in high-dimensional settings. Therefore, for applications where the number of supports is relatively small compared with the dimension, e.g., several deep learning applications where the mini-batch approaches are utilized, the complexities from matrix multiplication of Radon Transform become the main computational bottleneck. To address this issue, we propose to derive projections by linearly and randomly combining a smaller number of projections which are named bottleneck projections. We explain the usage of these projections by introducing Hierarchical Radon Transform (HRT) which is constructed by applying Radon Transform variants recursively. We then formulate the approach into a new metric between measures, named Hierarchical Sliced Wasserstein (HSW) distance. By proving the injectivity of HRT, we derive the metricity of HSW. Moreover, we investigate the theoretical properties of HSW including its connection to SW variants and its computational and sample complexities. Finally, we compare the computational cost and generative quality of HSW with the conventional SW on the task of deep generative modeling using various benchmark datasets including CIFAR10, CelebA, and Tiny ImageNet.", "keywords": "Sliced Wasserstein;Radon Transform;Optimal Transport;Generative Models", "primary_area": "", "supplementary_material": "/attachment/d49eff92ecff703235e0f14f9ed9e4303c333713.zip", "author": "Khai Nguyen;Tongzheng Ren;Huy Nguyen;Litu Rout;Tan Minh Nguyen;Nhat Ho", "authorids": "~Khai_Nguyen1;~Tongzheng_Ren1;~Huy_Nguyen5;~Litu_Rout1;~Tan_Minh_Nguyen1;~Nhat_Ho1", "gender": "M;M;M;M;M;M", "homepage": "https://khainb.com;https://www.cs.utexas.edu/~tzren/;https://huynm99.github.io/;https://liturout.github.io/;https://tanmnguyen89.github.io/;https://nhatptnk8912.github.io/", "dblp": "120/4308;211/8004;48/6075;206/6445;255/4725;203/4479", "google_scholar": "im5fNaQAAAAJ;VgNDYeYAAAAJ;_YYwzhQAAAAJ;https://scholar.google.co.in/citations?hl=en;OizOh88AAAAJ;https://scholar.google.ca/citations?user=Xs7cKMwAAAAJ", "orcid": ";;;;;", "linkedin": ";;huy-nguyen-081199/;litu-rout-sac-isro/;;nhat-pham-minh-ho-267b8164/", "or_profile": "~Khai_Nguyen1;~Tongzheng_Ren1;~Huy_Nguyen5;~Litu_Rout1;~Tan_Minh_Nguyen1;~Nhat_Ho1", "aff": "University of Texas, Austin;Google;University of Texas at Austin;University of Texas at Austin;University of California, Los Angeles;University of Texas, Austin", "aff_domain": "utexas.edu;google.com;utexas.edu;utexas.edu;ucla.edu;utexas.edu", "position": "PhD student;Intern;PhD student;PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2023hierarchical,\ntitle={Hierarchical Sliced Wasserstein Distance},\nauthor={Khai Nguyen and Tongzheng Ren and Huy Nguyen and Litu Rout and Tan Minh Nguyen and Nhat Ho},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CUOaVn6mYEj}\n}", "github": "", "project": "", "reviewers": "CrfW;YGxc;XSPt;EUsv", "pdf_size": 9294423, "recommendation": "5;6;6;8", "confidence": "4;3;4;5", "correctness": "4;4;4;4", "technical_novelty": "3;4;3;3", "empirical_novelty": "2;3;3;0", "wc_summary_paper": "172;86;128;185", "wc_strength_and_weaknesses": "231;486;141;347", "wc_clarity_quality_novelty_and_reproducibility": "105;23;132;49", "wc_summary_review": "19;37;29;48", "wc_review": "527;632;430;629", "wc_reply_reviewers": "80;152;13;105", "wc_reply_authors": "1356;1038;783;481", "reply_reviewers": "1;2;1;1", "reply_authors": "4;2;2;2", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 142.75, 38.98316944528754 ], "wc_strength_and_weaknesses_avg": [ 301.25, 129.2678904446112 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 77.25, 43.3265219005634 ], "wc_summary_review_avg": [ 33.25, 10.638961415476606 ], "wc_review_avg": [ 554.5, 83.38614992910993 ], "wc_reply_reviewers_avg": [ 87.5, 50.18216814765978 ], "wc_reply_authors_avg": [ 914.5, 322.2533940860825 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.6488856845230502, "corr_recommendation_correctness": 0.0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10493608665494568269&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=CUOaVn6mYEj", "email": "utexas.edu;google.com;utexas.edu;utexas.edu;ucla.edu;utexas.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "University of Texas at Austin;Google;University of California, Los Angeles", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.utexas.edu;https://www.google.com;https://www.ucla.edu", "aff_unique_abbr": "UT Austin;Google;UCLA", "aff_campus_unique_index": "0;1;0;0;2;0", "aff_campus_unique": "Austin;Mountain View;Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "CUOhDJGy3Mn", "title": "Progressive Mixup Augmented Teacher-Student Learning for Unsupervised Domain Adaptation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Unsupervised Domain Adaptation (UDA) aims to transfer knowledge learned from a labeled source domain to an unlabeled target domain, mostly through learning a domain invariant feature representation. Currently, the best performing UDA methods use category level domain alignment to capture fine-grained information, resulting in significantly improved performance over global alignment. While successful, category level UDA methods suffer from the unreliable pseudo-labels for target data. Additionally, most UDA methods directly adapt from source to target domain without regard for the large domain discrepancy. In this paper, we propose an UDA approach with teacher-student learning where the teacher network is used to provide more reliable target pseudo-labels for the student network to train with. Furthermore, we use a progressive mixup augmentation strategy which generates intermediate samples that become increasingly target-dominant as training progresses. Aligning the source and intermediate domains allows the model to gradually transfer fine-grained domain knowledge from the source to the target domain while minimizing the negative impact of noisy target pseudo-labels. This progressive mixup augmented teacher-student (PMATS) training strategy along with class subset sampling and clustering based pseudo-label refinement achieves state-of-the-art performance on two public UDA benchmark datasets: Office-31, and Office-Home.", "keywords": "Unsupervised Domain Adaptation;Progressive Mixup Augmentation;Teacher-Student Learning", "primary_area": "", "supplementary_material": "", "author": "Aotian Zheng;Jie Mei;Farron Wallace;Craig Rose;Rania Hussein;Jenq-Neng Hwang", "authorids": "~Aotian_Zheng1;jiemei@uw.edu;farron.wallace@noaa.gov;craig.rose@noaa.gov;rhussein@uw.edu;~Jenq-Neng_Hwang1", "gender": "M;;;;;M", "homepage": ";;;;;https://people.ece.uw.edu/hwang/", "dblp": "230/2135;;;;;78/4381", "google_scholar": "eF4_0cIAAAAJ;;;;;b365J6kAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Aotian_Zheng1;jiemei@uw.edu;farron.wallace@noaa.gov;craig.rose@noaa.gov;rhussein@uw.edu;~Jenq-Neng_Hwang1", "aff": "University of Washington;;;;;University of Washington, Seattle", "aff_domain": "uw.edu;;;;;uw.edu", "position": "PhD student;;;;;Full Professor", "bibtex": "@misc{\nzheng2023progressive,\ntitle={Progressive Mixup Augmented Teacher-Student Learning for Unsupervised Domain Adaptation},\nauthor={Aotian Zheng and Jie Mei and Farron Wallace and Craig Rose and Rania Hussein and Jenq-Neng Hwang},\nyear={2023},\nurl={https://openreview.net/forum?id=CUOhDJGy3Mn}\n}", "github": "", "project": "", "reviewers": "6xGt;dVDV;CyYH;XSGj;kMVd", "site": "https://openreview.net/forum?id=CUOhDJGy3Mn", "pdf_size": 512689, "recommendation": "3;3;3;3;5", "confidence": "4;5;3;4;4", "correctness": "2;3;2;2;3", "technical_novelty": "2;1;1;2;2", "empirical_novelty": "2;1;1;2;2", "wc_summary_paper": "88;50;70;69;98", "wc_strength_and_weaknesses": "233;177;248;473;363", "wc_clarity_quality_novelty_and_reproducibility": "65;13;13;51;96", "wc_summary_review": "34;17;17;34;44", "wc_review": "420;257;348;627;601", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 3.4, 0.8 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "correctness_avg": [ 2.4, 0.4898979485566356 ], "technical_novelty_avg": [ 1.6, 0.4898979485566356 ], "empirical_novelty_avg": [ 1.6, 0.4898979485566356 ], "wc_summary_paper_avg": [ 75.0, 16.637307474468336 ], "wc_strength_and_weaknesses_avg": [ 298.8, 106.03093888106434 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.6, 31.78427284051029 ], "wc_summary_review_avg": [ 29.2, 10.609429767899874 ], "wc_review_avg": [ 450.6, 143.304710320352 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.6123724356957946, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3201752183744877086&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "DAVA: Disentangling Adversarial Variational Autoencoder", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11220", "id": "CW6KmU5wPh", "poster": "/media/PosterPDFs/ICLR%202023/11220.png?t=1682675900.0989544", "openreview": "https://openreview.net/forum?id=CW6KmU5wPh", "slides": "https://iclr.cc/virtual/2023/poster/11220", "video": "https://iclr.cc/virtual/2023/poster/11220", "author_site": "Benjamin Estermann, Roger Wattenhofer", "tldr": "We propose an adversarial variational auto-encoder that alleviates the issue of hyperparameter selection for disentanglement learning and propose a new unsupervised disentanglement metric.", "abstract": "The use of well-disentangled representations offers many advantages for downstream tasks, e.g. an increased sample efficiency, or better interpretability.\nHowever, the quality of disentangled interpretations is often highly dependent on the choice of dataset-specific hyperparameters, in particular the regularization strength.\nTo address this issue, we introduce DAVA, a novel training procedure for variational auto-encoders. DAVA completely alleviates the problem of hyperparameter selection.\nWe compare DAVA to models with optimal hyperparameters.\nWithout any hyperparameter tuning, DAVA is competitive on a diverse range of commonly used datasets.\nUnderlying DAVA, we discover a necessary condition for unsupervised disentanglement, which we call PIPE.\nWe demonstrate the ability of PIPE to positively predict the performance of downstream models in abstract reasoning.\nWe also thoroughly investigate correlations with existing supervised and unsupervised metrics. The code is available at https://github.com/besterma/dava.", "keywords": "Disentanglement learning;varational auto-encoder;curriculum learning;generative adversarial networks", "primary_area": "", "supplementary_material": "", "author": "Benjamin Estermann;Roger Wattenhofer", "authorids": "~Benjamin_Estermann1;~Roger_Wattenhofer1", "gender": "Not Specified;Not Specified", "homepage": "https://disco.ethz.ch/members/besterma;https://disco.ethz.ch/members/wroger", "dblp": "277/5034;w/RogerWattenhofer", "google_scholar": "zawztfkAAAAJ;https://scholar.google.ch/citations?user=EG3VPm4AAAAJ", "orcid": ";", "linkedin": ";roger-wattenhofer-4466731/", "or_profile": "~Benjamin_Estermann1;~Roger_Wattenhofer1", "aff": "ETHZ - ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;ethz.ch", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nestermann2023dava,\ntitle={{DAVA}: Disentangling Adversarial Variational Autoencoder},\nauthor={Benjamin Estermann and Roger Wattenhofer},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CW6KmU5wPh}\n}", "github": "", "project": "", "reviewers": "SzgX;GT7q;pitB", "pdf_size": 3442680, "recommendation": "6;6;6", "confidence": "4;4;4", "correctness": "4;3;4", "technical_novelty": "3;3;2", "empirical_novelty": "3;3;2", "wc_summary_paper": "77;152;51", "wc_strength_and_weaknesses": "331;346;299", "wc_clarity_quality_novelty_and_reproducibility": "57;49;10", "wc_summary_review": "48;36;39", "wc_review": "513;583;399", "wc_reply_reviewers": "54;0;30", "wc_reply_authors": "631;309;639", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 93.33333333333333, 42.82003684673281 ], "wc_strength_and_weaknesses_avg": [ 325.3333333333333, 19.601587237318874 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.666666666666664, 20.531818125912658 ], "wc_summary_review_avg": [ 41.0, 5.0990195135927845 ], "wc_review_avg": [ 498.3333333333333, 75.83021971629927 ], "wc_reply_reviewers_avg": [ 28.0, 22.090722034374522 ], "wc_reply_authors_avg": [ 526.3333333333334, 153.712574487436 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3314903623643304707&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=CW6KmU5wPh", "email": "ethz.ch;ethz.ch", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "MPCFORMER: FAST, PERFORMANT AND PRIVATE TRANSFORMER INFERENCE WITH MPC", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12110", "id": "CWmvjOEhgH-", "poster": "", "openreview": "https://openreview.net/forum?id=CWmvjOEhgH-", "slides": "https://iclr.cc/virtual/2023/poster/12110", "video": "https://iclr.cc/virtual/2023/poster/12110", "author_site": "Dacheng Li, Hongyi Wang, Rulin Shao, Han Guo, Eric Xing, Hao Zhang", "tldr": "We develop a framework that allows fast, performant, and private inference with MPC for Transformer models.", "abstract": "Enabling private inference is crucial for many cloud inference services that are based on Transformer models. However, existing private inference solutions can increase the inference latency by more than 60$\\times$ or significantly compromise the inference quality. In this paper, we design the framework MPCFORMER as a practical solution, using Secure Multi-Party Computation (MPC) and Knowledge Distillation (KD). Through extensive evaluations, we show that MPCFORMER significantly speeds up Transformer inference in MPC settings while achieving similar ML performance to the input model. On the IMDb dataset, it achieves similar performance to $\\text{BERT}_\\text{BASE}$, while being 5.3$\\times$ faster. On the GLUE benchmark, it achieves 97% performance of $\\text{BERT}_\\text{BASE}$ with a 2.2$\\times$ speedup. MPCFORMER remains effective with different trained Transformer weights such as $\\text{ROBERTA}_\\text{BASE}$ and larger models including $\\text{BERT}_\\text{LARGE}$. Code is available at https://github.com/MccRee177/MPCFormer.", "keywords": "Secure Multiparty Computation;Privacy;Machine Learning;Transformer model", "primary_area": "", "supplementary_material": "/attachment/8505517d4650b76652fe471fdd6e536b4160ddec.zip", "author": "Dacheng Li;Hongyi Wang;Rulin Shao;Han Guo;Eric Xing;Hao Zhang", "authorids": "~Dacheng_Li1;~Hongyi_Wang1;~Rulin_Shao1;~Han_Guo1;~Eric_Xing1;~Hao_Zhang2", "gender": ";M;;;M;M", "homepage": ";https://hwang595.github.io/;https://rulinshao.github.io/;;http://www.cs.cmu.edu/~epxing/;https://cseweb.ucsd.edu/~haozhang/", "dblp": ";15/832-1.html;;;36/3855;55/2270-25", "google_scholar": ";zYdZORsAAAAJ;Vdwh6bcAAAAJ;;https://scholar.google.com.tw/citations?user=5pKTRxEAAAAJ;H1d4BS8AAAAJ", "orcid": ";;;;;", "linkedin": ";hongyi-wang-b89651102/;;;;", "or_profile": "~Dacheng_Li1;~Hongyi_Wang1;~Rulin_Shao1;~Han_Guo1;~Eric_Xing1;~Hao_Zhang2", "aff": ";Carnegie Mellon University;University of Washington;;School of Computer Science, Carnegie Mellon University;University of California, Berkeley", "aff_domain": ";andrew.cmu.edu;uw.edu;;cs.cmu.edu;berkeley.edu", "position": ";Researcher;PhD student;;Full Professor;Postdoc", "bibtex": "@inproceedings{\nli2023mpcformer,\ntitle={{MPCFORMER}: {FAST}, {PERFORMANT} {AND} {PRIVATE} {TRANSFORMER} {INFERENCE} {WITH} {MPC}},\nauthor={Dacheng Li and Hongyi Wang and Rulin Shao and Han Guo and Eric Xing and Hao Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CWmvjOEhgH-}\n}", "github": "", "project": "", "reviewers": "dCKB;BURu;ENWC;tn3E", "pdf_size": 1494237, "recommendation": "6;8;8;8", "confidence": "2;3;3;3", "correctness": "3;4;4;4", "technical_novelty": "3;4;2;2", "empirical_novelty": "3;4;3;4", "wc_summary_paper": "36;23;104;113", "wc_strength_and_weaknesses": "52;45;172;123", "wc_clarity_quality_novelty_and_reproducibility": "134;101;55;53", "wc_summary_review": "36;6;53;7", "wc_review": "258;175;384;296", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "899;264;821;113", "reply_reviewers": "0;0;0;0", "reply_authors": "3;1;3;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 69.0, 39.89360851063739 ], "wc_strength_and_weaknesses_avg": [ 98.0, 52.502380898393554 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 85.75, 33.83323070591988 ], "wc_summary_review_avg": [ 25.5, 19.93113142799475 ], "wc_review_avg": [ 278.25, 75.11449593786807 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 524.25, 341.08457528888636 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 89, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12209422273877866411&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=CWmvjOEhgH-", "email": ";andrew.cmu.edu;uw.edu;;cs.cmu.edu;berkeley.edu", "author_num": 6, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Carnegie Mellon University;University of Washington;University of California, Berkeley", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;https://www.washington.edu;https://www.berkeley.edu", "aff_unique_abbr": "CMU;UW;UC Berkeley", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Pittsburgh;Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "AudioGen: Textually Guided Audio Generation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11521", "id": "CYK7RfcOzQ4", "poster": "", "openreview": "https://openreview.net/forum?id=CYK7RfcOzQ4", "slides": "https://iclr.cc/virtual/2023/poster/11521", "video": "https://iclr.cc/virtual/2023/poster/11521", "author_site": "Felix Kreuk, Gabriel Synnaeve, Adam Polyak, Uriel Singer, Alexandre D\u00e9fossez, Jade Copet, Devi Parikh, Yaniv Taigman, Yossi Adi", "tldr": "We propose a text-to-audio generation model", "abstract": "In this work, we tackle the problem of generating audio samples conditioned on descriptive text captions. We propose AudioGen, an auto-regressive generative model, operating on a learnt discrete audio representation, that generates audio samples conditioned on text inputs. The task of text-to-audio generation poses multiple challenges. Due to the way audio travels through a medium, differentiating ``objects'' can be a difficult task (e.g., separating multiple people simultaneously speaking). This is further complicated by real-world recording conditions (e.g., background noise, reverberation, etc.). Scarce text annotations impose another constraint, limiting the ability to scale models. Finally, modeling high fidelity audio requires one to operate over extremely long sequences. To alleviate the aforementioned challenges we propose an augmentation technique that mixes different audio samples, driving the model to internally learn to separate multiple sources. We curated 10 datasets containing different types of audio and text annotations to handle the scarcity of text-audio data points. For faster inference, we explore the use of multi-stream modeling, allowing the use of shorter sequences while maintaining a similar bitrate and perceptual quality. Finally, we apply classifier-free guidance to improve adherence to text. Comparing to the evaluated baselines, AudioGen outperforms over both objective and subjective metrics. We further conduct an ablation study to gauge the effects of pre-trained text and audio components.", "keywords": "text-to-audio;audio generation", "primary_area": "", "supplementary_material": "", "author": "Felix Kreuk;Gabriel Synnaeve;Adam Polyak;Uriel Singer;Alexandre D\u00e9fossez;Jade Copet;Devi Parikh;Yaniv Taigman;Yossi Adi", "authorids": "~Felix_Kreuk1;~Gabriel_Synnaeve1;~Adam_Polyak1;~Uriel_Singer1;~Alexandre_D\u00e9fossez1;~Jade_Copet1;~Devi_Parikh1;~Yaniv_Taigman1;~Yossi_Adi1", "gender": "M;M;;;M;;F;;M", "homepage": "https://scholar.google.co.il/citations?user=UiERcYsAAAAJ&hl=en;;;https://il.linkedin.com/in/urielsinger;https://ai.honu.io/;;https://www.cc.gatech.edu/~parikh/;;http://adiyoss.github.io/", "dblp": "213/7459;http://dblp.uni-trier.de/pers/hd/s/Synnaeve:Gabriel;;238/0243;156/0054;;64/2121;;171/0957.html", "google_scholar": ";wN9rBkcAAAAJ;;nIEep3cAAAAJ;https://scholar.google.fr/citations?user=DubNUU0AAAAJ;GRMLwjAAAAAJ;ijpYJQwAAAAJ;;https://scholar.google.co.il/citations?user=4W-HuYYAAAAJ", "orcid": ";;;0000-0001-8451-8533;;;;;0000-0003-2237-3898", "linkedin": ";;;;;jadecopet/?locale=en_US;;;yossi-adi-31a32858?trk=nav_responsive_tab_profile_pic", "or_profile": "~Felix_Kreuk1;~Gabriel_Synnaeve1;~Adam_Polyak1;~Uriel_Singer1;~Alexandre_D\u00e9fossez1;~Jade_Copet1;~Devi_Parikh1;~Yaniv_Taigman1;~Yossi_Adi1", "aff": "Meta Facebook;Meta Facebook;;Meta AI Research;Meta;Facebook AI Research;FAIR, Meta;;Meta", "aff_domain": "fb.com;fb.com;;meta.com;meta.com;facebook.com;fb.com;;meta.com", "position": "Researcher;Research Scientist;;Researcher;Researcher;Research Engineering Manager;Director;;Research Scientist", "bibtex": "@inproceedings{\nkreuk2023audiogen,\ntitle={AudioGen: Textually Guided Audio Generation},\nauthor={Felix Kreuk and Gabriel Synnaeve and Adam Polyak and Uriel Singer and Alexandre D{\\'e}fossez and Jade Copet and Devi Parikh and Yaniv Taigman and Yossi Adi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CYK7RfcOzQ4}\n}", "github": "", "project": "", "reviewers": "pGs2;Tiv8;iK2a;J2ki", "pdf_size": 655823, "recommendation": "8;8;8;8", "confidence": "4;5;4;5", "correctness": "3;3;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;4;3;4", "wc_summary_paper": "61;133;28;91", "wc_strength_and_weaknesses": "99;421;142;27", "wc_clarity_quality_novelty_and_reproducibility": "453;412;103;62", "wc_summary_review": "39;114;25;143", "wc_review": "652;1080;298;323", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "701;941;176;95", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 78.25, 38.674119253061214 ], "wc_strength_and_weaknesses_avg": [ 172.25, 149.37766734020184 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 257.5, 176.19662312314614 ], "wc_summary_review_avg": [ 80.25, 49.57506933933628 ], "wc_review_avg": [ 588.25, 316.41932226082525 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 478.25, 354.2565278156494 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 400, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11166122285082527856&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=CYK7RfcOzQ4", "email": "fb.com;fb.com;;meta.com;meta.com;facebook.com;fb.com;;meta.com", "author_num": 9, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Guiding Energy-based Models via Contrastive Latent Variables", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10841", "id": "CZmHHj9MgkP", "poster": "/media/PosterPDFs/ICLR%202023/10841.png?t=1681917100.289699", "openreview": "https://openreview.net/forum?id=CZmHHj9MgkP", "slides": "https://iclr.cc/virtual/2023/poster/10841", "video": "https://iclr.cc/virtual/2023/poster/10841", "author_site": "Hankook Lee, Jongheon Jeong, Sejun Park, Jinwoo Shin", "tldr": "We propose a simple yet effective framework for improving energy-based models (EBMs) via contrastive representation learning.", "abstract": "An energy-based model (EBM) is a popular generative framework that offers both explicit density and architectural flexibility, but training them is difficult since it is often unstable and time-consuming. In recent years, various training techniques have been developed, e.g., better divergence measures or stabilization in MCMC sampling, but there often exists a large gap between EBMs and other generative frameworks like GANs in terms of generation quality. In this paper, we propose a novel and effective framework for improving EBMs via contrastive representation learning (CRL). To be specific, we consider representations learned by contrastive methods as the true underlying latent variable. This contrastive latent variable could guide EBMs to understand the data structure better, so it can improve and accelerate EBM training significantly. To enable the joint training of EBM and CRL, we also design a new class of latent-variable EBMs for learning the joint density of data and the contrastive latent variable. Our experimental results demonstrate that our scheme achieves lower FID scores, compared to prior-art EBM methods (e.g., additionally using variational autoencoders or diffusion techniques), even with significantly faster and more memory-efficient training. We also show conditional and compositional generation abilities of our latent-variable EBMs as their additional benefits, even without explicit conditional training. The code is available at https://github.com/hankook/CLEL.", "keywords": "energy-based model;contrastive representation learning", "primary_area": "", "supplementary_material": "", "author": "Hankook Lee;Jongheon Jeong;Sejun Park;Jinwoo Shin", "authorids": "~Hankook_Lee1;~Jongheon_Jeong1;~Sejun_Park1;~Jinwoo_Shin1", "gender": "M;M;;M", "homepage": "https://hankook.github.io;https://jh-jeong.github.io;;https://sites.google.com/site/mijirim/", "dblp": "223/4393;241/5923;155/9882;31/7062", "google_scholar": "CgqswXUAAAAJ;mZB2qfcAAAAJ;;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ", "orcid": ";0000-0002-4058-5774;;", "linkedin": ";jongheonj/;;", "or_profile": "~Hankook_Lee1;~Jongheon_Jeong1;~Sejun_Park1;~Jinwoo_Shin1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea University;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;korea.ac.kr;kaist.ac.kr", "position": "Postdoc;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nlee2023guiding,\ntitle={Guiding Energy-based Models via Contrastive Latent Variables},\nauthor={Hankook Lee and Jongheon Jeong and Sejun Park and Jinwoo Shin},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CZmHHj9MgkP}\n}", "github": "", "project": "", "reviewers": "nYLf;hpmQ;zYAi;eaGu", "pdf_size": 2394553, "recommendation": "5;6;8;8", "confidence": "4;4;5;5", "correctness": "3;3;4;3", "technical_novelty": "2;3;3;4", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "53;84;105;94", "wc_strength_and_weaknesses": "259;141;234;210", "wc_clarity_quality_novelty_and_reproducibility": "86;53;9;53", "wc_summary_review": "84;37;70;51", "wc_review": "482;315;418;408", "wc_reply_reviewers": "454;0;0;0", "wc_reply_authors": "1476;478;449;440", "reply_reviewers": "1;0;0;0", "reply_authors": "3;1;1;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 84.0, 19.3778223750761 ], "wc_strength_and_weaknesses_avg": [ 211.0, 43.971581731841304 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.25, 27.36215452043205 ], "wc_summary_review_avg": [ 60.5, 17.92344832893492 ], "wc_review_avg": [ 405.75, 59.59184088447008 ], "wc_reply_reviewers_avg": [ 113.5, 196.58776665906757 ], "wc_reply_authors_avg": [ 710.75, 442.0403686316443 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.9622504486493761, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17990069763960453933&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=CZmHHj9MgkP", "email": "kaist.ac.kr;kaist.ac.kr;korea.ac.kr;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Korea University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.korea.ac.kr", "aff_unique_abbr": "KAIST;KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "DM-NeRF: 3D Scene Geometry Decomposition and Manipulation from 2D Images", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11810", "id": "C_PRLz8bEJx", "poster": "", "openreview": "https://openreview.net/forum?id=C_PRLz8bEJx", "slides": "https://iclr.cc/virtual/2023/poster/11810", "video": "https://iclr.cc/virtual/2023/poster/11810", "author_site": "Bing WANG, Lu Chen, Bo Yang", "tldr": "In this paper, we study the problem of 3D scene geometry decomposition and manipulation from 2D views.", "abstract": "In this paper, we study the problem of 3D scene geometry decomposition and manipulation from 2D views. By leveraging the recent implicit neural representation techniques, particularly the appealing neural radiance fields, we introduce an object field component to learn unique codes for all individual objects in 3D space only from 2D supervision. The key to this component is a series of carefully designed loss functions to enable every 3D point, especially in non-occupied space, to be effectively optimized even without 3D labels. In addition, we introduce an inverse query algorithm to freely manipulate any specified 3D object shape in the learned scene representation. Notably, our manipulation algorithm can explicitly tackle key issues such as object collisions and visual occlusions. Our method, called DM-NeRF, is among the first to simultaneously reconstruct, decompose, manipulate and render complex 3D scenes in a single pipeline. Extensive experiments on three datasets clearly show that our method can accurately decompose all 3D objects from 2D views, allowing any interested object to be freely manipulated in 3D space such as translation, rotation, size adjustment, and deformation.", "keywords": "3D Scene Decomposition;Object Manipulation;Neural Rendering", "primary_area": "", "supplementary_material": "/attachment/fdd0f32548e304c80bec76a9f7e990e9b7f39e9a.zip", "author": "Bing WANG;Lu Chen;Bo Yang", "authorids": "~Bing_WANG8;lu.chen@polyu.edu.hk;~Bo_Yang7", "gender": "M;;M", "homepage": "http://www.cs.ox.ac.uk/people/bing.wang/;;https://yang7879.github.io/", "dblp": "06/1909-13;;46/999-27", "google_scholar": "W7QhPeUAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-0977-0426;;0000-0002-2419-4140", "linkedin": ";;", "or_profile": "~Bing_WANG8;lu.chen@polyu.edu.hk;~Bo_Yang7", "aff": "Hong Kong Polytechnic University;;The Hong Kong Polytechnic University", "aff_domain": "polyu.edu.hk;;polyu.edu.hk", "position": "Assistant Professor;;Assistant Professor", "bibtex": "@inproceedings{\nwang2023dmnerf,\ntitle={{DM}-Ne{RF}: 3D Scene Geometry Decomposition and Manipulation from 2D Images},\nauthor={Bing WANG and Lu Chen and Bo Yang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=C_PRLz8bEJx}\n}", "github": "", "project": "", "reviewers": "1RkG;Tr2a;BSjr;tL8f", "pdf_size": 2101373, "recommendation": "3;6;6;8", "confidence": "5;4;3;4", "correctness": "3;2;3;4", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "26;70;77;67", "wc_strength_and_weaknesses": "295;441;136;132", "wc_clarity_quality_novelty_and_reproducibility": "37;21;42;65", "wc_summary_review": "26;26;50;73", "wc_review": "384;558;305;337", "wc_reply_reviewers": "0;27;0;0", "wc_reply_authors": "2279;1702;357;1305", "reply_reviewers": "0;1;0;0", "reply_authors": "5;4;1;2", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 60.0, 19.96246477767713 ], "wc_strength_and_weaknesses_avg": [ 251.0, 127.88862341897344 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.25, 15.75396775418815 ], "wc_summary_review_avg": [ 43.75, 19.524023663169434 ], "wc_review_avg": [ 396.0, 97.66012492312305 ], "wc_reply_reviewers_avg": [ 6.75, 11.691342951089922 ], "wc_reply_authors_avg": [ 1410.75, 700.0458467129135 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.5811388300841898 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5940885257860046, "corr_recommendation_correctness": 0.39605901719066966, "gs_citation": 75, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7689671824586770576&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=C_PRLz8bEJx", "email": "polyu.edu.hk;;polyu.edu.hk", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong Polytechnic University", "aff_unique_dep": "", "aff_unique_url": "https://www.polyu.edu.hk", "aff_unique_abbr": "PolyU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "C__2aUY0_3w", "title": "Semantic Video Synthesis from Video Scene Graphs", "track": "main", "status": "Withdraw", "tldr": "A video scene graph-to-video synthesis framework is proposed with a pre-trained video scene graph encoder, VQ-VAE and auto-regressive Transformer.", "abstract": "Video synthesis has recently attracted a lot of attention, as the natural extension to the image synthesis task. Most image synthesis works use class labels or text as guidance. However, neither labels nor text can provide explicit temporal guidance, such as when action starts or ends. To overcome this limitation, we introduce video scene graphs as input for video synthesis, as they represent the spatial and temporal relationships between objects in the scene. Since video scene graphs are usually temporally discrete annotations, we propose a video scene graph (VSG) encoder that not only encodes the existing video scene graphs but also predicts the graph representations for unlabeled frames. The VSG encoder is pre-trained with different contrastive multi-modal losses. A video scene graph-to-video synthesis framework (SGVS) based on the pre-trained VSG encoder, VQ-VAE, and auto-regressive Transformer is proposed to synthesize a semantic video given an initial scene image and a non-fixed number of video scene graphs. We evaluate SGVS and other state-of-the-art video synthesis models on Action Genome dataset and demonstrate the positive significance of video scene graphs in video synthesis.", "keywords": "video synthesis;scene graph;scene understanding", "primary_area": "", "supplementary_material": "/attachment/3d6acfd9e23f6b428d7c95503e45db4fe9a1b150.zip", "author": "Yuren Cong;Jinhui Yi;Bodo Rosenhahn;Michael Ying Yang", "authorids": "~Yuren_Cong1;~Jinhui_Yi1;~Bodo_Rosenhahn1;~Michael_Ying_Yang2", "gender": "M;M;M;M", "homepage": "https://yrcong.github.io;;http://www.tnt.uni-hannover.de/staff/rosenhahn/;https://sites.google.com/site/michaelyingyang/", "dblp": "256/4899;;09/2973;24/7671", "google_scholar": "6DPcOUEAAAAJ;https://scholar.google.com/citations?hl=en;qq3TxtcAAAAJ\\;https://scholar.google.co.uk/citations?user=lgGmYBoAAAAJ", "orcid": ";;;", "linkedin": "yuren-cong-78626a193/;;b-rosenhahn-a397b1183/;", "or_profile": "~Yuren_Cong1;~Jinhui_Yi1;~Bodo_Rosenhahn1;~Michael_Yang1", "aff": "Leibniz University Hannover;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Institut f\u00fcr Informationsverarbeitung;University of Twente", "aff_domain": "uni-hannover.de;uni-bonn.de;tnt.uni-hannover.de;utwente.nl", "position": "PhD student;PhD student;Professor;Assistant Professor", "bibtex": "@misc{\ncong2023semantic,\ntitle={Semantic Video Synthesis from Video Scene Graphs},\nauthor={Yuren Cong and Jinhui Yi and Bodo Rosenhahn and Michael Ying Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=C__2aUY0_3w}\n}", "github": "", "project": "", "reviewers": "9ogn;3sgP;bwvU;x36y", "site": "https://openreview.net/forum?id=C__2aUY0_3w", "pdf_size": 5269270, "recommendation": "3;5;6;6", "confidence": "5;4;3;4", "correctness": "2;3;3;3", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "55;53;86;236", "wc_strength_and_weaknesses": "700;429;272;305", "wc_clarity_quality_novelty_and_reproducibility": "25;40;43;64", "wc_summary_review": "115;55;33;57", "wc_review": "895;577;434;662", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 107.5, 75.3342551566019 ], "wc_strength_and_weaknesses_avg": [ 426.5, 168.40501773997116 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.0, 13.910427743243556 ], "wc_summary_review_avg": [ 65.0, 30.364452901377952 ], "wc_review_avg": [ 642.0, 167.25579212690963 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8660254037844386, "corr_recommendation_correctness": 0.9428090415820632, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JcP861UbKIsJ:scholar.google.com/&scioq=Semantic+Video+Synthesis+from+Video+Scene+Graphs&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Leibniz University Hannover;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Institut f\u00fcr Informationsverarbeitung;University of Twente", "aff_unique_dep": ";;Department of Information Processing;", "aff_unique_url": "https://www.leibniz.uni-hannover.de;https://www.uni-bonn.de/;;https://www.utwente.nl", "aff_unique_abbr": "LUH;Uni Bonn;;UT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Germany;Netherlands" }, { "id": "CcXTudu9bvu", "title": "DELTA: Diverse Client Sampling for Fasting Federated Learning", "track": "main", "status": "Reject", "tldr": "We propose a unbiased sampling method that characterizes the impact of client diversity and local variance, and provide a complete theoretical proof and experimental verification.", "abstract": "Partial client participation has been widely adopted in Federated Learning (FL) to efficiently reduce the communication burden. However, an improper client sampling scheme will select unrepresentative subsets, which will cause a large variance in the model update and slows down the convergence. Existing sampling methods are either biased or can be further improved to accelerate the convergence. In this paper, we propose an unbiased sampling scheme, termed DELTA, to alleviate this problem. In particular, DELTA characterizes the impact of client diversity and local variance and samples the representative clients who carry valuable information for global model updates. Moreover, DELTA is a provably optimal unbiased sampling scheme that minimizes the variance caused by partial client participation and achieves better convergence than other unbiased sampling schemes. We corroborate our results with experiments on both synthetic and real data sets.", "keywords": "federated learning;client sampling", "primary_area": "", "supplementary_material": "/attachment/d97590fbcadbfcc0a3cfdea70b2964207936dd9b.zip", "author": "Lin Wang;Yongxin Guo;Tao Lin;Xiaoying Tang", "authorids": "~Lin_Wang14;~Yongxin_Guo1;~Tao_Lin1;~Xiaoying_Tang2", "gender": ";M;M;F", "homepage": ";https://gyxxyg.github.io/yongxinguo/;https://lins-lab.github.io/;https://sse.cuhk.edu.cn/en/faculty/tangxiaoying", "dblp": ";;64/4492-4.html;134/9714-2", "google_scholar": ";5Cl1GZwAAAAJ;QE9pa_cAAAAJ;https://scholar.google.com/citations?hl=zh-TW", "orcid": ";0009-0001-8652-0722;0000-0002-3246-6935;0000-0003-3955-1195", "linkedin": ";;;", "or_profile": "~Lin_Wang14;~Yongxin_Guo1;~Tao_Lin1;~Xiaoying_Tang2", "aff": ";Chinese University of HongKong, Shenzhen;Westlake University;The Chinese University of Hong Kong, Shenzhen", "aff_domain": ";cuhk.edu.cn;westlake.edu;cuhk.edu.cn", "position": ";PhD student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nwang2023delta,\ntitle={{DELTA}: Diverse Client Sampling for Fasting Federated Learning},\nauthor={Lin Wang and Yongxin Guo and Tao Lin and Xiaoying Tang},\nyear={2023},\nurl={https://openreview.net/forum?id=CcXTudu9bvu}\n}", "github": "", "project": "", "reviewers": "Yzew;LBTW;xCAd;N8Mo", "site": "https://openreview.net/forum?id=CcXTudu9bvu", "pdf_size": 2641510, "recommendation": "3;5;6;6", "confidence": "4;3;3;4", "correctness": "2;3;4;4", "technical_novelty": "3;2;3;3", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "63;38;85;70", "wc_strength_and_weaknesses": "246;551;36;243", "wc_clarity_quality_novelty_and_reproducibility": "25;27;26;74", "wc_summary_review": "17;65;55;76", "wc_review": "351;681;202;463", "wc_reply_reviewers": "0;86;0;0", "wc_reply_authors": "1224;2156;419;1209", "reply_reviewers": "0;1;0;0", "reply_authors": "4;7;2;5", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 64.0, 16.98528775146303 ], "wc_strength_and_weaknesses_avg": [ 269.0, 183.7239777492312 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.0, 20.796634343085422 ], "wc_summary_review_avg": [ 53.25, 22.20782519743885 ], "wc_review_avg": [ 424.25, 174.77324595028838 ], "wc_reply_reviewers_avg": [ 21.5, 37.239092362730865 ], "wc_reply_authors_avg": [ 1252.0, 615.1703016238674 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.5, 1.8027756377319946 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.40824829046386296, "corr_recommendation_correctness": 0.9847319278346618, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7563394755737674697&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;Westlake University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;https://www.westlake.edu.cn", "aff_unique_abbr": "CUHK;WU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "CdU7ApBxICO", "title": "Self-attentive Rationalization for Graph Contrastive Learning", "track": "main", "status": "Reject", "tldr": "Graph contrastive learning framework with self-attentive rationalization", "abstract": "Graph augmentation is the key component to reveal instance-discriminative features of a graph as its rationale in graph contrastive learning (GCL).\nAnd existing rationale-aware augmentation mechanisms in GCL frameworks roughly fall into two categories and suffer from inherent limitations: (1) non-heuristic methods with the guidance of domain knowledge to preserve salient features, which require expensive expertise and lacks generality, or (2) heuristic augmentations with a co-trained auxiliary model to identify crucial substructures, which face not only the dilemma between system complexity and transformation diversity, but also the instability stemming from the co-training of two separated sub-models. \nInspired by recent studies on transformers, we propose $\\underline{S}$elf-attentive $\\underline{R}$ationale guided $\\underline{G}$raph $\\underline{C}$ontrastive $\\underline{L}$earning (SR-GCL), which integrates rationale finder and encoder together, leverages the self-attention values in transformer module as a natural guidance to delineate semantically informative substructures from both node- and edge-wise views, and contrasts on rationale-aware augmented pairs.\nOn real world biochemistry datasets, visualization results verify the effectiveness of self-attentive rationalization and the performance on downstream tasks demonstrates the state-of-the-art performance of SR-GCL for graph model pre-training. ", "keywords": "Graph Contrastive Learning;Self-supervised Learning;Transformer;Rationalization;self-attention", "primary_area": "", "supplementary_material": "", "author": "Sihang Li;Yanchen Luo;An Zhang;Xiang Wang;Xiangnan He;Tat-Seng Chua", "authorids": "~Sihang_Li1;~Yanchen_Luo1;~An_Zhang2;~Xiang_Wang6;~Xiangnan_He1;~Tat-Seng_Chua2", "gender": ";M;;M;M;", "homepage": ";https://github.com/lyc0930;;https://github.com/xiangwang1223;http://staff.ustc.edu.cn/~hexn;", "dblp": ";359/3305;;31/2864-10;59/1007;", "google_scholar": ";e5SeNbMAAAAJ;;https://scholar.google.com.sg/citations?user=HdhaQB0AAAAJ;https://scholar.google.com.sg/citations?user=X45Go24AAAAJ;", "orcid": ";0009-0009-2637-176X;;0000-0002-6148-6329;0000-0001-8472-7992;", "linkedin": ";;;;;", "or_profile": "~Sihang_Li1;~Yanchen_Luo1;~An_Zhang2;~Xiang_Wang6;~Xiangnan_He1;~Tat-Seng_Chua2", "aff": ";University of Science and Technology of China;;University of Science and Technology of China;University of Science and Technology of China;", "aff_domain": ";ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;", "position": ";PhD student;;Full Professor;Professor;", "bibtex": "@misc{\nli2023selfattentive,\ntitle={Self-attentive Rationalization for Graph Contrastive Learning},\nauthor={Sihang Li and Yanchen Luo and An Zhang and Xiang Wang and Xiangnan He and Tat-Seng Chua},\nyear={2023},\nurl={https://openreview.net/forum?id=CdU7ApBxICO}\n}", "github": "", "project": "", "reviewers": "K54f;YpSx;PbQf;KsiL;oC1b", "site": "https://openreview.net/forum?id=CdU7ApBxICO", "pdf_size": 4877585, "recommendation": "3;5;5;6;6", "confidence": "4;4;3;4;3", "correctness": "3;3;3;4;3", "technical_novelty": "3;3;3;4;3", "empirical_novelty": "3;2;3;3;3", "wc_summary_paper": "80;76;48;39;38", "wc_strength_and_weaknesses": "154;289;163;35;123", "wc_clarity_quality_novelty_and_reproducibility": "49;36;44;7;31", "wc_summary_review": "38;32;31;16;26", "wc_review": "321;433;286;97;218", "wc_reply_reviewers": "0;0;0;24;33", "wc_reply_authors": "482;567;692;103;237", "reply_reviewers": "0;0;0;1;1", "reply_authors": "2;2;3;2;2", "recommendation_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 3.2, 0.39999999999999997 ], "empirical_novelty_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_paper_avg": [ 56.2, 18.18130908378162 ], "wc_strength_and_weaknesses_avg": [ 152.8, 81.7567122626638 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.4, 14.595889832415152 ], "wc_summary_review_avg": [ 28.6, 7.364781055808788 ], "wc_review_avg": [ 271.0, 111.40377013368982 ], "wc_reply_reviewers_avg": [ 11.4, 14.249210504445502 ], "wc_reply_authors_avg": [ 416.2, 216.0290721176203 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.372677996249965, "corr_recommendation_correctness": 0.4564354645876385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1013571056263146312&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "PGrad: Learning Principal Gradients For Domain Generalization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11505", "id": "CgCmwcfgEdH", "poster": "", "openreview": "https://openreview.net/forum?id=CgCmwcfgEdH", "slides": "https://iclr.cc/virtual/2023/poster/11505", "video": "https://iclr.cc/virtual/2023/poster/11505", "author_site": "Zhe Wang, Jake Grigsby, Yanjun Qi", "tldr": "", "abstract": "Machine learning models fail to perform when facing out-of-distribution (OOD) domains, a challenging task known as domain generalization (DG). In this work, we develop a novel DG training strategy, we call PGrad, to learn a robust gradient direction, improving models' generalization ability on unseen domains. The proposed gradient aggregates the principal directions of a sampled roll-out optimization trajectory that measures the training dynamics across all training domains. PGrad gradient design forces the DG training to ignore domain-dependent noise signals and updates all training domains with a robust direction covering main components of parameter dynamics. We further improve PGrad via bijection-based computational refinement and directional plus length-based calibrations. Our theoretical proof connects PGrad to the spectral analysis of Hessian in training neural networks. Experiments on DomainBed and WILDS benchmarks demonstrate that our approach effectively enables robust DG optimization and leads to smoothly decreased loss curves. Empirically, PGrad achieves competitive results across seven datasets, demonstrating its efficacy across both synthetic and real-world distributional shifts.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Zhe Wang;Jake Grigsby;Yanjun Qi", "authorids": "~Zhe_Wang19;~Jake_Grigsby1;~Yanjun_Qi1", "gender": "M;M;", "homepage": "https://zhexjtu.github.io;https://github.com/jakegrigsby;", "dblp": "75/3158-25.html;276/6109;", "google_scholar": "fqNkQjgAAAAJ;qgUe3jYAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zhe_Wang19;~Jake_Grigsby1;~Yanjun_Qi1", "aff": "University of Virginia;University of Texas at Austin;", "aff_domain": "virginia.edu;cs.utexas.edu;", "position": "PhD student;PhD student;", "bibtex": "@inproceedings{\nwang2023pgrad,\ntitle={{PG}rad: Learning Principal Gradients For Domain Generalization},\nauthor={Zhe Wang and Jake Grigsby and Yanjun Qi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CgCmwcfgEdH}\n}", "github": "", "project": "", "reviewers": "Fpjy;jbQd;ap7X", "pdf_size": 3272293, "recommendation": "3;8;8", "confidence": "4;4;4", "correctness": "2;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "43;145;174", "wc_strength_and_weaknesses": "190;530;436", "wc_clarity_quality_novelty_and_reproducibility": "18;164;544", "wc_summary_review": "23;48;18", "wc_review": "274;887;1172", "wc_reply_reviewers": "0;308;0", "wc_reply_authors": "667;622;747", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.333333333333333, 2.357022603955158 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 120.66666666666667, 56.18026541608915 ], "wc_strength_and_weaknesses_avg": [ 385.3333333333333, 143.35348695522623 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 242.0, 221.70851735255158 ], "wc_summary_review_avg": [ 29.666666666666668, 13.123346456686352 ], "wc_review_avg": [ 777.6666666666666, 374.66992881142136 ], "wc_reply_reviewers_avg": [ 102.66666666666667, 145.19259240363775 ], "wc_reply_authors_avg": [ 678.6666666666666, 51.69354139756941 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15256679034750048824&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=CgCmwcfgEdH", "email": "virginia.edu;cs.utexas.edu;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Virginia;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.virginia.edu;https://www.utexas.edu", "aff_unique_abbr": "UVA;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "Ch4e4wk7Ew", "title": "Continuously Parameterized Mixture Models", "track": "main", "status": "Withdraw", "tldr": "We parameterize mixtures of factor analyzers by a neural ordinary differential equation and train with a smooth curriculum to learn an interpretable likelihood model superior to standard mixture results.", "abstract": "Mixture models are universal approximators of smooth densities but are difficult to utilize in complicated datasets due to restrictions on typically available modes and challenges with initialiations.\nWe show that by continuously parameterizing a mixture of factor analyzers using a learned ordinary differential equation, we can improve the fit of mixture models over direct methods.\nOnce trained, the mixture components can be extracted and the neural ODE can be discarded, leaving us with an effective, but low-resource model.\nWe additionally explore the use of a training curriculum from an easy-to-model latent space extracted from a normalizing flow to the more complex input space and show that the smooth curriculum helps to stabilize and improve results with and without the continuous parameterization.\nFinally, we introduce a hierarchical version of the model to enable more flexible, robust classification and clustering, and show substantial improvements against traditional parameterizations of GMMs.", "keywords": "mixture models;normalizing flows;ordinary differential equations;clustering;interpretable learning", "primary_area": "", "supplementary_material": "", "author": "Christopher M Bender;Yifeng Shi;Marc Niethammer;Junier Oliva", "authorids": "~Christopher_M_Bender1;~Yifeng_Shi3;~Marc_Niethammer1;~Junier_Oliva1", "gender": "M;M;M;M", "homepage": ";;http://wwwx.cs.unc.edu/~mn/;http://lupalab.com", "dblp": ";;88/3304;137/8390", "google_scholar": ";u9mELXIAAAAJ;https://scholar.google.com.au/citations?user=KqtBi6MAAAAJ;", "orcid": ";;;", "linkedin": ";https://www.linkedin.com/feed/;;", "or_profile": "~Christopher_M_Bender1;~Yifeng_Shi3;~Marc_Niethammer1;~Junier_Oliva1", "aff": "Department of Computer Science, University of North Carolina, Chapel Hill;Department of Computer Science, University of North Carolina, Chapel Hill;The University of North Carolina at Chapel Hill;", "aff_domain": "cs.unc.edu;cs.unc.edu;unc.edu;", "position": "PhD student;PhD student;Full Professor;", "bibtex": "@misc{\nbender2023continuously,\ntitle={Continuously Parameterized Mixture Models},\nauthor={Christopher M Bender and Yifeng Shi and Marc Niethammer and Junier Oliva},\nyear={2023},\nurl={https://openreview.net/forum?id=Ch4e4wk7Ew}\n}", "github": "", "project": "", "reviewers": "z39F;c5hB;uzQD;7fWr", "site": "https://openreview.net/forum?id=Ch4e4wk7Ew", "pdf_size": 5576721, "recommendation": "3;3;3;5", "confidence": "4;2;3;3", "correctness": "2;4;3;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "2;3;1;3", "wc_summary_paper": "77;161;78;104", "wc_strength_and_weaknesses": "442;77;493;293", "wc_clarity_quality_novelty_and_reproducibility": "36;129;72;33", "wc_summary_review": "315;485;31;43", "wc_review": "870;852;674;473", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 105.0, 34.09545424246464 ], "wc_strength_and_weaknesses_avg": [ 326.25, 161.58182911453875 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 67.5, 38.68139087468288 ], "wc_summary_review_avg": [ 218.5, 191.24003241999307 ], "wc_review_avg": [ 717.25, 160.48267040400344 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10893372526961825497&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of North Carolina;University of North Carolina at Chapel Hill", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.unc.edu;https://www.unc.edu", "aff_unique_abbr": "UNC;UNC Chapel Hill", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Cj4aar5X65H", "title": "Rewarding Episodic Visitation Discrepancy for Exploration in Reinforcement Learning", "track": "main", "status": "Desk Reject", "tldr": "The paper proposes a quantified and computation-efficient intrinsic reward method for improving exploration in reinforcement learning.", "abstract": "Exploration is critical for deep reinforcement learning in complex environments with high-dimensional observations and sparse rewards. To address this problem, recent approaches proposed to leverage intrinsic rewards to improve exploration, such as novelty-based exploration and prediction-based exploration. However, many intrinsic reward modules require sophisticated structures and representation learning, resulting in prohibitive computational complexity and unstable performance. In this paper, we propose Rewarding Episodic Visitation Discrepancy (REVD), a computation-efficient and quantified exploration method. More specifically, REVD provides intrinsic rewards by evaluating the R\u00e9nyi divergence-based visitation discrepancy between episodes. To estimate the divergence efficiently, a $k$-nearest neighbor estimator is utilized with a randomly-initialized state encoder. Finally, the REVD is tested on Atari games and PyBullet Robotics Environments. Extensive experiments demonstrate that REVD can significantly improve the sample efficiency of reinforcement learning algorithms and outperform the benchmarking methods.", "keywords": "reinforcement learning;exploration;intrinsic reward;computation-efficient", "primary_area": "", "supplementary_material": "", "author": "Mingqi Yuan;Bo Li;Xin Jin;Wenjun Zeng", "authorids": "~Mingqi_Yuan1;~Bo_Li27;~Xin_Jin8;~Wenjun_Zeng3", "gender": "M;M;M;M", "homepage": "https://github.com/yuanmingqi;https://www4.comp.polyu.edu.hk/~bo2li/;http://home.ustc.edu.cn/~jinxustc/;https://www.eias.ac.cn/h-col-187.html", "dblp": "282/4291;50/3402-37;68/3340-14;57/145", "google_scholar": "https://scholar.google.com.hk/citations?user=xtj9MIMAAAAJ;;byaSC-kAAAAJ;_cUfvYQAAAAJ", "orcid": ";;0000-0002-1820-8358;", "linkedin": ";;;", "or_profile": "~Mingqi_Yuan1;~Bo_Li27;~Xin_Jin8;~Wenjun_Zeng3", "aff": "The Hong Kong Polytechnic University;The Hong Kong Polytechnic University;Eastern Institute of Technology, Ningbo;Eastern Institute for Advanced Study", "aff_domain": "polyu.edu.hk;polyu.edu.hk;eitech.edu.cn;eias.ac.cn", "position": "PhD student;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@misc{\nyuan2023rewarding,\ntitle={Rewarding Episodic Visitation Discrepancy for Exploration in Reinforcement Learning},\nauthor={Mingqi Yuan and Bo Li and Xin Jin and Wenjun Zeng},\nyear={2023},\nurl={https://openreview.net/forum?id=Cj4aar5X65H}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=Cj4aar5X65H", "pdf_size": 814054, "recommendation": "", "confidence": "", "correctness": "", "technical_novelty": "", "empirical_novelty": "", "wc_summary_paper": "", "wc_strength_and_weaknesses": "", "wc_clarity_quality_novelty_and_reproducibility": "", "wc_summary_review": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "recommendation_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "correctness_avg": [ 0, 0 ], "technical_novelty_avg": [ 0, 0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 0, 0 ], "wc_strength_and_weaknesses_avg": [ 0, 0 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 0, 0 ], "wc_summary_review_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 1, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0, "corr_recommendation_correctness": 0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4751713315087761866&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Hong Kong Polytechnic University;Eastern Institute of Technology;Eastern Institute for Advanced Study", "aff_unique_dep": ";;", "aff_unique_url": "https://www.polyu.edu.hk;https://www.eit.edu.cn;", "aff_unique_abbr": "PolyU;;", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Hong Kong SAR;Ningbo;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "title": "Molecular Geometry Pretraining with SE(3)-Invariant Denoising Distance Matching", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12234", "id": "CjTHVo1dvR", "poster": "/media/PosterPDFs/ICLR%202023/12234.png?t=1682195606.4097044", "openreview": "https://openreview.net/forum?id=CjTHVo1dvR", "slides": "https://iclr.cc/virtual/2023/poster/12234", "video": "https://iclr.cc/virtual/2023/poster/12234", "author_site": "Shengchao Liu, Hongyu Guo, Jian Tang", "tldr": "We propose GeoSSL, a self-supervised learning method using the denoising distance matching for molecular goemetry pretraining.", "abstract": "Molecular representation pretraining is critical in various applications for drug and material discovery due to the limited number of labeled molecules, and most existing work focuses on pretraining on 2D molecular graphs. However, the power of pretraining on 3D geometric structures has been less explored. This is owing to the difficulty of finding a sufficient proxy task that can empower the pretraining to effectively extract essential features from the geometric structures. Motivated by the dynamic nature of 3D molecules, where the continuous motion of a molecule in the 3D Euclidean space forms a smooth potential energy surface, we propose GeoSSL, a 3D coordinate denoising pretraining framework to model such an energy landscape. Further by leveraging an SE(3)-invariant score matching method, we propose GeoSSL-DDM in which the coordinate denoising proxy task is effectively boiled down to denoising the pairwise atomic distances in a molecule. Our comprehensive experiments confirm the effectiveness and robustness of our proposed method.", "keywords": "molecule;pretraining;representation;geometry;denoising score matching", "primary_area": "", "supplementary_material": "", "author": "Shengchao Liu;Hongyu Guo;Jian Tang", "authorids": "~Shengchao_Liu1;~Hongyu_Guo1;~Jian_Tang1", "gender": "M;M;", "homepage": "https://chao1224.github.io/;https://hongyuharryguo.github.io/;http://www.jian-tang.com", "dblp": ";;181/2667-5", "google_scholar": "F1ws3XUAAAAJ;https://scholar.google.ca/citations?user=bZUqlakAAAAJ;https://scholar.google.ca/citations?user=1ir6WUEAAAAJ", "orcid": "0000-0003-2030-2367;;", "linkedin": ";harry-h-y-guo-a582087/;", "or_profile": "~Shengchao_Liu1;~Hongyu_Guo1;~Jian_Tang1", "aff": "MILA-UdeM;National Research Council Canada;Mila, HEC Montreal", "aff_domain": "mila.quebec;nrc-cnrc.gc.ca;hec.ca", "position": "PhD student;Senior Research Officer;Assistant Professor", "bibtex": "@inproceedings{\nliu2023molecular,\ntitle={Molecular Geometry Pretraining with {SE}(3)-Invariant Denoising Distance Matching},\nauthor={Shengchao Liu and Hongyu Guo and Jian Tang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CjTHVo1dvR}\n}", "github": "", "project": "", "reviewers": "wxWw;WmAR;a9bJ", "pdf_size": 1582527, "recommendation": "5;6;6", "confidence": "4;3;3", "correctness": "3;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "67;43;35", "wc_strength_and_weaknesses": "92;55;222", "wc_clarity_quality_novelty_and_reproducibility": "27;78;31", "wc_summary_review": "42;309;21", "wc_review": "228;485;309", "wc_reply_reviewers": "0;20;73", "wc_reply_authors": "2392;916;884", "reply_reviewers": "0;1;1", "reply_authors": "4;3;2", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 48.333333333333336, 13.59738536958076 ], "wc_strength_and_weaknesses_avg": [ 123.0, 71.6147098483731 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.333333333333336, 23.156472577277878 ], "wc_summary_review_avg": [ 124.0, 131.0953851209111 ], "wc_review_avg": [ 340.6666666666667, 107.2825967040114 ], "wc_reply_reviewers_avg": [ 31.0, 30.80043289739069 ], "wc_reply_authors_avg": [ 1397.3333333333333, 703.4568611901909 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9999999999999997, "corr_recommendation_correctness": 0.0, "gs_citation": 92, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11474810957514222597&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=CjTHVo1dvR", "email": "mila.quebec;nrc-cnrc.gc.ca;hec.ca", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Mila;National Research Council Canada;HEC Montreal", "aff_unique_dep": "Montreal Institute for Learning Algorithms;;HEC Business School", "aff_unique_url": "https://mila.quebec;https://www.nrc-cnrc.gc.ca;https://www.hec.ca", "aff_unique_abbr": "MILA;NRC-CNRC;HEC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "id": "Ck1UtnVukP8", "title": "From Images to Textual Prompts: Zero-shot VQA with Frozen Large Language Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Large language models (LLMs) have demonstrated excellent zero-shot generalization to new tasks. However, effective utilization of LLMs for zero-shot visual question-answering (VQA) remains challenging, primarily due to the modality disconnection and task disconnection between LLM and VQA task. End-to-end training on vision and language data may bridge the disconnections, but is inflexible and computationally expensive. To address this issue, we propose \\emph{Img2Prompt}, a plug-and-play module that provides the prompts that can bridge the aforementioned modality and task disconnections, so that LLMs can perform VQA tasks without end-to-end training. In order to provide such prompts, we further employ LLM-agnostic models to provide prompts that can describe image content and self-constructed question-answer pairs, which can effectively guide LLM to perform VQA tasks. Img2Prompt offers the following benefits: 1) It is LLM-agnostic and can work with any LLM to perform VQA. 2) It renders end-to-end training unnecessary and significantly reduces the cost of deploying LLM for VQA tasks. 3) It achieves comparable or better performance than methods relying on end-to-end training. On the challenging A-OKVQA dataset, our method outperforms some few-shot methods by as much as 20\\%.", "keywords": "Large Language Model;Visual Question Answer;Prompts;Zero-Shot", "primary_area": "", "supplementary_material": "", "author": "Jiaxian Guo;Junnan Li;Dongxu Li;Anthony Tiong;Boyang Li;Dacheng Tao;Steven HOI", "authorids": "~Jiaxian_Guo2;~Junnan_Li2;~Dongxu_Li3;~Anthony_Tiong1;~Boyang_Li1;~Dacheng_Tao1;~Steven_HOI1", "gender": "M;;;;Unspecified;;M", "homepage": ";;;;http://www.boyangli.org;;https://www.smu.edu.sg/faculty/profile/110831/Steven-HOI", "dblp": "206/6264;;;;70/1211-1;;h/StevenCHHoi", "google_scholar": "wQgPocEAAAAJ;;;;QwL4z2UAAAAJ;;https://scholar.google.com.tw/citations?user=JoLjflYAAAAJ", "orcid": ";;;;0000-0002-6230-2376;;", "linkedin": ";;;;;;", "or_profile": "~Jiaxian_Guo2;~Junnan_Li2;~Dongxu_Li3;~Anthony_Tiong1;~Boyang_Li1;~Dacheng_Tao1;~Steven_HOI1", "aff": "The University of Tokyo, The University of Tokyo;;;;Nanyang Technological University;;Singapore Management University", "aff_domain": "weblab.t.u-tokyo.ac.jp;;;;ntu.edu.sg;;", "position": "Postdoc;;;;Associate Professor;;Associate Professor", "bibtex": "@misc{\nguo2023from,\ntitle={From Images to Textual Prompts: Zero-shot {VQA} with Frozen Large Language Models},\nauthor={Jiaxian Guo and Junnan Li and Dongxu Li and Anthony Tiong and Boyang Li and Dacheng Tao and Steven HOI},\nyear={2023},\nurl={https://openreview.net/forum?id=Ck1UtnVukP8}\n}", "github": "", "project": "", "reviewers": "BBCT;GnMz;5P1h;XNvZ", "site": "https://openreview.net/forum?id=Ck1UtnVukP8", "pdf_size": 6544141, "recommendation": "3;3;5;6", "confidence": "5;4;3;5", "correctness": "2;2;3;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "43;104;47;62", "wc_strength_and_weaknesses": "173;44;211;124", "wc_clarity_quality_novelty_and_reproducibility": "29;262;8;13", "wc_summary_review": "25;12;35;55", "wc_review": "270;422;301;254", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "878;799;566;170", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 64.0, 24.155744658362327 ], "wc_strength_and_weaknesses_avg": [ 138.0, 62.42195126716242 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 78.0, 106.5152571230995 ], "wc_summary_review_avg": [ 31.75, 15.706288549495072 ], "wc_review_avg": [ 311.75, 65.8573268513079 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 603.25, 275.18119030922156 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.058025885318565944, "corr_recommendation_correctness": 0.986440050415621, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4186752577721482747&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Tokyo;Nanyang Technological University;Singapore Management University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.ntu.edu.sg;https://www.smu.edu.sg", "aff_unique_abbr": "UTokyo;NTU;SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Japan;Singapore" }, { "id": "Cn6JkFnKgPX", "title": "Analysis of differentially private synthetic data: a general measurement error approach", "track": "main", "status": "Reject", "tldr": "", "abstract": "Differential private (DP) synthetic datasets have been receiving significant attention from academia, industry, and government. However, little is known about how to perform statistical inference using DP synthetic datasets. Naive approaches that do not take into account the induced uncertainty due to DP mechanism will result in biased estimators and invalid inferences. In this paper, we present a general class of bias-corrected DP estimators with valid asymptotic confidence intervals for parameters in regression settings, by establishing the connection between additive DP mechanisms and measurement error models. Our simulation shows that when the sample covariance between DP noises and data is close to zero, our estimator is far superior to the widely used sufficient statistic perturbation algorithm, and the CIs can achieve better coverage when comparing to the naive CIs obtained from ignoring the DP mechanism.", "keywords": "Measurement Error Model;Differential Privacy;Regression;Statistical Inference", "primary_area": "", "supplementary_material": "", "author": "Yangdi Jiang;Yi Liu;Xiaodong Yan;Anne-Sophie Charest;Linglong Kong;Bei Jiang", "authorids": "~Yangdi_Jiang1;~Yi_Liu13;~Xiaodong_Yan1;~Anne-Sophie_Charest1;~Linglong_Kong2;~Bei_Jiang1", "gender": "M;M;M;F;M;F", "homepage": "https://yangdijiang.github.io/;https://apps.ualberta.ca/directory/person/yliu16;https://yanxiaodong128.github.io/index.html;https://www.mat.ulaval.ca/departement-et-professeurs/direction-personnel-et-etudiants/professeurs/fiche-de-professeur/show/charest-anne-sophie/;https://www.ualberta.ca/~lkong;https://www.ualberta.ca/~bei1", "dblp": ";97/4626-62;;;35/8525;190/4697", "google_scholar": "https://scholar.google.ca/citations?user=OfZ985EAAAAJ;;;F2gY_WkAAAAJ;https://scholar.google.ca/citations?hl=en;https://scholar.google.ca/citations?user=MfOZ8G0AAAAJ", "orcid": ";;;;0000-0003-3011-9216;0000-0002-0033-839X", "linkedin": "https://ca.linkedin.com/in/yangdi-jiang-b50408141;;;;;", "or_profile": "~Yangdi_Jiang1;~Yi_Liu13;~Xiaodong_Yan1;~Anne-Sophie_Charest1;~Linglong_Kong2;~Bei_Jiang1", "aff": "University of Alberta;University of Alberta;Shandong University;Laval university;University of Alberta;University of Alberta", "aff_domain": "ualberta.ca;ualberta.ca;edu.cn;ulaval.ca;ualberta.ca;ualberta.ca", "position": "PhD student;PhD student;Associate Professor;Associate Professor;Full Professor;Associate Professor", "bibtex": "@misc{\njiang2023analysis,\ntitle={Analysis of differentially private synthetic data: a general measurement error approach},\nauthor={Yangdi Jiang and Yi Liu and Xiaodong Yan and Anne-Sophie Charest and Linglong Kong and Bei Jiang},\nyear={2023},\nurl={https://openreview.net/forum?id=Cn6JkFnKgPX}\n}", "github": "", "project": "", "reviewers": "cBN5;cuVk;3s3F;ffBc", "site": "https://openreview.net/forum?id=Cn6JkFnKgPX", "pdf_size": 654264, "recommendation": "3;5;5;5", "confidence": "4;3;4;3", "correctness": "3;4;3;3", "technical_novelty": "2;4;2;3", "empirical_novelty": "2;0;0;3", "wc_summary_paper": "38;65;111;37", "wc_strength_and_weaknesses": "143;63;18;155", "wc_clarity_quality_novelty_and_reproducibility": "132;33;327;9", "wc_summary_review": "45;51;46;24", "wc_review": "358;212;502;225", "wc_reply_reviewers": "0;0;13;0", "wc_reply_authors": "387;82;430;194", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 1.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 62.75, 30.036436206714004 ], "wc_strength_and_weaknesses_avg": [ 94.75, 56.69380477618344 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 125.25, 125.27245307728272 ], "wc_summary_review_avg": [ 41.5, 10.35615758860399 ], "wc_review_avg": [ 324.25, 117.45717304617884 ], "wc_reply_reviewers_avg": [ 3.25, 5.629165124598851 ], "wc_reply_authors_avg": [ 273.25, 141.74514982883895 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:o3ewsvoEM58J:scholar.google.com/&scioq=Analysis+of+differentially+private+synthetic+data:+a+general+measurement+error+approach&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "University of Alberta;Shandong University;Laval University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ualberta.ca;http://www.sdu.edu.cn;https://www.laval.ca", "aff_unique_abbr": "UAlberta;SDU;Laval", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "Canada;China" }, { "id": "CnG8rd1hHeT", "title": "OpenFE: Automated Feature Generation beyond Expert-level Performance", "track": "main", "status": "Reject", "tldr": "OpenFE: automated feature generation beyond expert-level performance", "abstract": "The goal of automated feature generation is to liberate machine learning experts from the laborious task of manual feature generation, which is crucial for improving the learning performance of tabular data. The major challenge in automated feature generation is to efficiently and accurately identify useful features from a vast pool of candidate features. In this paper, we present OpenFE, an automated feature generation tool that provides competitive results against machine learning experts. OpenFE achieves efficiency and accuracy with two components: 1) a novel feature boosting method for accurately estimating the incremental performance of candidate features. 2) a feature-scoring framework for retrieving effective features from a large number of candidates through successive featurewise halving and feature importance attribution. Extensive experiments on seven benchmark datasets show that OpenFE outperforms existing baseline methods. We further evaluate OpenFE in two famous Kaggle competitions with thousands of data science teams participating. In one of the competitions, features generated by OpenFE with a simple baseline model can beat 99.3% data science teams, demonstrating for the first time that automated feature generation can outperform human experts. In addition to the empirical results, we provide a theoretical perspective to show that feature generation has benefit provably in a simple yet representative setting. Codes and datasets are available in the supplementary materials.", "keywords": "tabular data;feature generation", "primary_area": "", "supplementary_material": "/attachment/676b5a449ee1ffba6310bde2ea5010f3d5dbf246.zip", "author": "Tianping Zhang;Zheyu Zhang;Haoyan Luo;Fengyuan Liu;Wei Cao;Jian Li", "authorids": "~Tianping_Zhang1;~Zheyu_Zhang4;~Haoyan_Luo1;~Fengyuan_Liu1;~Wei_Cao1;~Jian_Li2", "gender": "M;M;M;M;M;M", "homepage": "https://zheyuaqazhang.github.io/;https://clarenceluo78.github.io;;;http://iiis.tsinghua.edu.cn/~jianli;https://scholar.google.com/citations?user=D_AJuY0AAAAJ", "dblp": ";;;54/6265;33/5448-15;", "google_scholar": "-vSxSMMAAAAJ;;7gxp6NkAAAAJ;;zX7i1EkAAAAJ;D_AJuY0AAAAJ", "orcid": "0000-0003-1798-3093;;0009-0008-3451-6132;;;", "linkedin": ";;lfy0x4c-15481919b/;;;", "or_profile": "~Zheyu_Zhang4;~Haoyan_Luo1;~Fengyuan_Liu1;~Wei_Cao1;~Jian_Li2;~\u5929\u5e73_\u5f201", "aff": "Tsinghua University;The Chinese University of Hong Kong, Shenzhen;University of Oxford;;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;cuhk.edu.hk;ox.ac.uk;;tsinghua.edu.cn;tsinghua.edu.cn", "position": "Undergrad student;Undergrad student;MS student;;Associate Professor;PhD student", "bibtex": "@misc{\nzhang2023openfe,\ntitle={Open{FE}: Automated Feature Generation beyond Expert-level Performance},\nauthor={Tianping Zhang and Zheyu Zhang and Haoyan Luo and Fengyuan Liu and Wei Cao and Jian Li},\nyear={2023},\nurl={https://openreview.net/forum?id=CnG8rd1hHeT}\n}", "github": "", "project": "", "reviewers": "YEEM;mToT;kN28", "site": "https://openreview.net/forum?id=CnG8rd1hHeT", "pdf_size": 1008927, "recommendation": "5;6;8", "confidence": "2;3;3", "correctness": "3;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "3;1;3", "wc_summary_paper": "101;61;67", "wc_strength_and_weaknesses": "375;176;224", "wc_clarity_quality_novelty_and_reproducibility": "141;44;220", "wc_summary_review": "36;125;152", "wc_review": "653;406;663", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "341;606;597", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_paper_avg": [ 76.33333333333333, 17.613126418163876 ], "wc_strength_and_weaknesses_avg": [ 258.3333333333333, 84.79124692770803 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 135.0, 71.9768481295664 ], "wc_summary_review_avg": [ 104.33333333333333, 49.56028876249837 ], "wc_review_avg": [ 574.0, 118.86406802169724 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 514.6666666666666, 122.8558324034946 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.7559289460184545, "corr_recommendation_correctness": 0.944911182523068, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12768649048645583676&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Tsinghua University;Chinese University of Hong Kong;University of Oxford", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.cuhk.edu.cn;https://www.ox.ac.uk", "aff_unique_abbr": "THU;CUHK;Oxford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United Kingdom" }, { "id": "CniFDGvqbUZ", "title": "Make Memory Buffer Stronger in Continual Learning: A Continuous Neural Transformation Approach", "track": "main", "status": "Reject", "tldr": "", "abstract": "Continual learning (CL) focuses on learning non-stationary data distribution without forgetting previous knowledge. However, the most widely used memory-replay approach often suffers from memory overfitting. To mitigate the memory overfitting, we propose a continuous and reversible memory transformation method so that the memory data is hard to overfit, thus improving generalization. The transformation is achieved by optimizing a bi-level optimization objective that jointly learns the CL model and memory transformer. Specifically, we propose a deterministic continuous memory transformer (DCMT) modeled by an ordinary differential equation, allowing for infinite memory transformation and generating diverse and hard memory data. Furthermore, we inject uncertainty into the transformation function and propose a stochastic continuous memory transformer (SCMT) modeled by a stochastic differential equation, which substantially enhances the diversity of the transformed memory buffer. The proposed neural transformation approaches have significant advantages over existing ones: (1) we can obtain infinite many transformed data, thus significantly increasing the memory buffer diversity; (2) the proposed continuous transformations are reversible, i.e., the original raw memory data could be restored from the transformed memory data without the need to make a replica of the memory data. Extensive experiments on both task-aware and task-free CL show significant improvement with our approach compared to strong baselines. ", "keywords": "Continual Learning", "primary_area": "", "supplementary_material": "", "author": "Zhenyi Wang;Li Shen;Qiuling Suo;Tiehang Duan;Yanjun Zhu;Tongliang Liu;Mingchen Gao", "authorids": "~Zhenyi_Wang1;~Li_Shen1;~Qiuling_Suo1;~Tiehang_Duan1;~Yanjun_Zhu1;~Tongliang_Liu1;~Mingchen_Gao1", "gender": ";M;F;;;M;F", "homepage": ";https://sites.google.com/site/mathshenli/home;https://suoql.github.io/;;;https://tongliang-liu.github.io/;http://engineering.buffalo.edu/computer-science-engineering/people/faculty-directory/mingchen-gao.html", "dblp": ";91/3680-8;192/2607;184/7734;;150/6667;11/9613", "google_scholar": ";yVhgENIAAAAJ;nXxEXv4AAAAJ;gemTJXgAAAAJ;;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;1KUHms8AAAAJ", "orcid": ";;;0000-0003-4323-642X;;;0000-0002-5488-8514", "linkedin": ";;;;;;", "or_profile": "~Zhenyi_Wang1;~Li_Shen1;~Qiuling_Suo1;~Tiehang_Duan1;~Yanjun_Zhu1;~Tongliang_Liu1;~Mingchen_Gao1", "aff": ";JD Explore Academy;State University of New York, Buffalo;Meta Platforms, Inc.;;University of Sydney;University at Buffalo, SUNY", "aff_domain": ";jd.com;buffalo.edu;fb.com;;sydney.edu.au;buffalo.edu", "position": ";Researcher;PhD student;Research Scientist;;Lecturer;Assistant Professor", "bibtex": "@misc{\nwang2023make,\ntitle={Make Memory Buffer Stronger in Continual Learning: A Continuous Neural Transformation Approach},\nauthor={Zhenyi Wang and Li Shen and Qiuling Suo and Tiehang Duan and Yanjun Zhu and Tongliang Liu and Mingchen Gao},\nyear={2023},\nurl={https://openreview.net/forum?id=CniFDGvqbUZ}\n}", "github": "", "project": "", "reviewers": "A5Re;JHx6;XsdS;vLJC", "site": "https://openreview.net/forum?id=CniFDGvqbUZ", "pdf_size": 1587021, "recommendation": "5;5;5;5", "confidence": "5;5;4;3", "correctness": "3;3;3;2", "technical_novelty": "2;3;3;2", "empirical_novelty": "3;2;0;2", "wc_summary_paper": "80;82;198;44", "wc_strength_and_weaknesses": "135;143;369;89", "wc_clarity_quality_novelty_and_reproducibility": "43;25;66;42", "wc_summary_review": "32;31;35;38", "wc_review": "290;281;668;213", "wc_reply_reviewers": "0;0;0;334", "wc_reply_authors": "734;699;1017;1539", "reply_reviewers": "0;0;0;2", "reply_authors": "2;2;3;5", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 101.0, 58.008620049092706 ], "wc_strength_and_weaknesses_avg": [ 184.0, 108.77959367454909 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.0, 14.577379737113251 ], "wc_summary_review_avg": [ 34.0, 2.7386127875258306 ], "wc_review_avg": [ 363.0, 178.59031328714332 ], "wc_reply_reviewers_avg": [ 83.5, 144.62624243200125 ], "wc_reply_authors_avg": [ 997.25, 336.2055732732579 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15178557777951040245&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "JD;State University of New York at Buffalo;Meta;University of Sydney;University at Buffalo", "aff_unique_dep": "JD Explore Academy;;Meta Platforms, Inc.;;", "aff_unique_url": ";https://www.buffalo.edu;https://www.meta.com;https://www.sydney.edu.au;https://www.buffalo.edu", "aff_unique_abbr": ";SUNY Buffalo;Meta;USYD;UB", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Buffalo", "aff_country_unique_index": "1;1;2;1", "aff_country_unique": ";United States;Australia" }, { "title": "IDEAL: Query-Efficient Data-Free Learning from Black-Box Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12210", "id": "ConT6H7MWL", "poster": "/media/PosterPDFs/ICLR%202023/12210.png?t=1681128751.6557744", "openreview": "https://openreview.net/forum?id=ConT6H7MWL", "slides": "https://iclr.cc/virtual/2023/poster/12210", "video": "https://iclr.cc/virtual/2023/poster/12210", "author_site": "Jie Zhang, Chen Chen, Lingjuan Lyu", "tldr": "query-efficiently learn from black-box model APIs to train a good student without any real data", "abstract": "Knowledge Distillation (KD) is a typical method for training a lightweight student model with the help of a well-trained teacher model. \nHowever, most KD methods require access to either the teacher's training data or model parameter, which is unrealistic. To tackle this problem, recent works study KD under data-free and black-box settings. Nevertheless, these works require a large number of queries to the teacher model, which incurs significant monetary and computational costs. To address these problems, we propose a novel method called \\emph{query-effIcient Data-free lEarning from blAck-box modeLs} (IDEAL), which aims to query-efficiently learn from black-box model APIs to train a good student without any real data. In detail, IDEAL trains the student model in two stages: data generation and model distillation. Note that IDEAL does not require any query in the data generation stage and queries the teacher only once for each sample in the distillation stage. Extensive experiments on various real-world datasets show the effectiveness of the proposed IDEAL. For instance, IDEAL can improve the performance of the best baseline method DFME by 5.83\\% on CIFAR10 dataset with only $0.02\\times$ the query budget of DFME.", "keywords": "black-box model;knowledge distillation", "primary_area": "", "supplementary_material": "", "author": "Jie Zhang;Chen Chen;Lingjuan Lyu", "authorids": "~Jie_Zhang14;~Chen_Chen20;~Lingjuan_Lyu1", "gender": "M;M;F", "homepage": "https://zj-jayzhang.github.io/;https://cc233.github.io/;https://sites.google.com/view/lingjuan-lyu", "dblp": "84/6889-81;65/4423-43;178/9876", "google_scholar": "soDBSE8AAAAJ;;", "orcid": ";0000-0001-7359-8515;", "linkedin": ";;", "or_profile": "~Jie_Zhang14;~Chen_Chen20;~Lingjuan_Lyu1", "aff": "Zhejiang University;Zhejiang University;Sony", "aff_domain": "zju.edu.cn;zju.edu.cn;sony.com", "position": "MS student;PhD student;scientist", "bibtex": "@inproceedings{\nzhang2023ideal,\ntitle={{IDEAL}: Query-Efficient Data-Free Learning from Black-Box Models},\nauthor={Jie Zhang and Chen Chen and Lingjuan Lyu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=ConT6H7MWL}\n}", "github": "", "project": "", "reviewers": "XB73;Epxx;Ruz3;S4b4", "pdf_size": 1076487, "recommendation": "5;5;8;8", "confidence": "4;3;4;4", "correctness": "3;2;4;3", "technical_novelty": "2;2;4;3", "empirical_novelty": "3;2;4;3", "wc_summary_paper": "116;99;108;111", "wc_strength_and_weaknesses": "568;197;332;419", "wc_clarity_quality_novelty_and_reproducibility": "142;320;64;13", "wc_summary_review": "61;65;28;69", "wc_review": "887;681;532;612", "wc_reply_reviewers": "0;277;19;33", "wc_reply_authors": "2461;1862;746;807", "reply_reviewers": "0;1;1;1", "reply_authors": "8;7;2;2", "recommendation_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 108.5, 6.18465843842649 ], "wc_strength_and_weaknesses_avg": [ 379.0, 134.77202973911167 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 134.75, 116.40312495805256 ], "wc_summary_review_avg": [ 55.75, 16.269219403523945 ], "wc_review_avg": [ 678.0, 131.68333227861453 ], "wc_reply_reviewers_avg": [ 82.25, 113.04727993189398 ], "wc_reply_authors_avg": [ 1469.0, 724.4801584584633 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 4.75, 2.7726341266023544 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5773502691896258, "corr_recommendation_correctness": 0.7071067811865476, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14995725983048791830&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=ConT6H7MWL", "email": "zju.edu.cn;zju.edu.cn;sony.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Zhejiang University;Sony Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.sony.com", "aff_unique_abbr": "ZJU;Sony", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;Japan" }, { "title": "FluidLab: A Differentiable Environment for Benchmarking Complex Fluid Manipulation", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11304", "id": "Cp-io_BoFaE", "poster": "", "openreview": "https://openreview.net/forum?id=Cp-io_BoFaE", "slides": "https://iclr.cc/virtual/2023/poster/11304", "video": "https://iclr.cc/virtual/2023/poster/11304", "author_site": "Zhou Xian, Bo Zhu, Zhenjia Xu, Hsiao-Yu Tung, Antonio Torralba, Katerina Fragkiadaki, Chuang Gan", "tldr": "", "abstract": "Humans manipulate various kinds of fluids in their everyday life: creating latte art, scooping floating objects from water, rolling an ice cream cone, etc. Using robots to augment or replace human labors in these daily settings remain as a challenging task due to the multifaceted complexities of fluids. Previous research in robotic fluid manipulation mostly consider fluids governed by an ideal, Newtonian model in simple task settings (e.g., pouring water into a container). However, the vast majority of real-world fluid systems manifest their complexities in terms of the fluid\u2019s complex material behaviors (e.g., elastoplastic deformation) and multi-component interactions (e.g. coffee and frothed milk when making latte art), both of which were well beyond the scope of the current literature. To evaluate robot learning algorithms on understanding and interacting with such complex fluid systems, a comprehensive virtual platform with versatile simulation capabilities and well-established tasks is needed. In this work, we introduce FluidLab, a simulation environment with a diverse set of manipulation tasks involving complex fluid dynamics. These tasks address interactions between solid and fluid as well as among multiple fluids. At the heart of our platform is a fully differentiable physics simulator, FluidEngine, providing GPU-accelerated simulations and gradient calculations for various material types and their couplings, extending the scope of the existing differentiable simulation engines. We identify several challenges for fluid manipulation learning by evaluating a set of reinforcement learning and trajectory optimization methods on our platform. To address these challenges, we propose several domain-specific optimization schemes coupled with differentiable physics, which are empirically shown to be effective in tackling optimization problems featured by fluid system\u2019s non-convex and non-smooth properties. Furthermore, we demonstrate reasonable sim-to-real transfer by deploying optimized trajectories in real-world settings. FluidLab is publicly available at: https://fluidlab2023.github.io.", "keywords": "Complex Fluid Manipulation;Differentiable Physics", "primary_area": "", "supplementary_material": "", "author": "Zhou Xian;Bo Zhu;Zhenjia Xu;Hsiao-Yu Tung;Antonio Torralba;Katerina Fragkiadaki;Chuang Gan", "authorids": "~Zhou_Xian1;~Bo_Zhu2;~Zhenjia_Xu1;~Hsiao-Yu_Tung2;~Antonio_Torralba1;~Katerina_Fragkiadaki1;~Chuang_Gan1", "gender": "M;M;M;F;M;F;M", "homepage": ";https://faculty.cc.gatech.edu/~bozhu/;https://www.zhenjiaxu.com/;https://sfish0101.bitbucket.io/;http://web.mit.edu/torralba/www//;https://www.cs.cmu.edu/~katef/;http://people.csail.mit.edu/ganchuang/", "dblp": "258/5020;;238/0000;;t/AntonioBTorralba;21/8780;139/6993", "google_scholar": ";atNjbs0AAAAJ;QE8cLMEAAAAJ;;https://scholar.google.com.tw/citations?user=8cxDHS4AAAAJ;FWp7728AAAAJ;PTeSCbIAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Zhou_Xian1;~Bo_Zhu2;~Zhenjia_Xu1;~Hsiao-Yu_Tung2;~Antonio_Torralba1;~Katerina_Fragkiadaki1;~Chuang_Gan1", "aff": "Carnegie Mellon University;Dartmouth College;Columbia University;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Carnegie Mellon University;MIT-IBM Watson AI Lab", "aff_domain": "cmu.edu;dartmouth.edu;columbia.edu;mit.edu;mit.edu;cmu.edu;ibm.com", "position": "PhD student;Assistant Professor;PhD student;Postdoc;Full Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nxian2023fluidlab,\ntitle={FluidLab: A Differentiable Environment for Benchmarking Complex Fluid Manipulation},\nauthor={Zhou Xian and Bo Zhu and Zhenjia Xu and Hsiao-Yu Tung and Antonio Torralba and Katerina Fragkiadaki and Chuang Gan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Cp-io_BoFaE}\n}", "github": "", "project": "", "reviewers": "3F7A;Frwu;KJH8;fmMN", "pdf_size": 2681464, "recommendation": "6;6;8;10", "confidence": "4;4;4;5", "correctness": "2;3;4;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "79;46;102;91", "wc_strength_and_weaknesses": "374;380;114;263", "wc_clarity_quality_novelty_and_reproducibility": "37;37;209;186", "wc_summary_review": "133;47;67;64", "wc_review": "623;510;492;604", "wc_reply_reviewers": "127;0;24;0", "wc_reply_authors": "1873;2151;1657;587", "reply_reviewers": "1;0;1;0", "reply_authors": "4;3;3;1", "recommendation_avg": [ 7.5, 1.6583123951777 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 79.5, 20.982135258357285 ], "wc_strength_and_weaknesses_avg": [ 282.75, 107.99392344016398 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 117.25, 80.66094160124837 ], "wc_summary_review_avg": [ 77.75, 32.797675222491 ], "wc_review_avg": [ 557.25, 57.00603038275863 ], "wc_reply_reviewers_avg": [ 37.75, 52.45176355471759 ], "wc_reply_authors_avg": [ 1567.0, 592.2820274159938 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.8703882797784891, "corr_recommendation_correctness": 0.8181818181818182, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18396959536729889173&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=Cp-io_BoFaE", "email": "cmu.edu;dartmouth.edu;columbia.edu;mit.edu;mit.edu;cmu.edu;ibm.com", "author_num": 7, "aff_unique_index": "0;1;2;3;3;0;3", "aff_unique_norm": "Carnegie Mellon University;Dartmouth College;Columbia University;Massachusetts Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cmu.edu;https://www.dartmouth.edu;https://www.columbia.edu;https://web.mit.edu", "aff_unique_abbr": "CMU;Dartmouth;Columbia;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "CqoBqextqY", "title": "HyperFeel: An Efficient Federated Learning Framework Using Hyperdimensional Computing", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Federated Learning (FL) aims to establish a shared model across decentralized clients under the privacy-preserving constraint. Each client learns an independent model with local data, and only model updates are communicated. However, as the FL model typically employs computation-intensive neural networks, major issues in Federated Learning are (i) significant computation overhead for local training; (ii) the massive communication overhead that arises from sending around the model updates; (iii) notable performance degradation resulting from the non-IID scenario.\n\nIn this work, we propose HyperFeel, an efficient learning framework for federated learning based on Hyper-Dimensional Computing (HDC), that can significantly improve communication/storage efficiency over existing works with nearly no performance degradation. Unlike current solutions that employ neural networks as the learned model, HyperFeel introduces a simple yet effective computing paradigm that encodes and represents data using hyperdimensional vectors. Then, it performs concise and highly parallel operations for encryption, computation, and communication, taking advantage of the lightweight feature representation of hyperdimensional vectors. For further enhance HyperFeel performance, we propose a two-fold optimization scheme combining the characteristics of encoding and updating in hyper-dimensional computing. On the one hand, we design the personalization update based on hyperdimensional computing with a client-specific model, which achieves better accuracy to the non-IID data. On the other hand, we extend the framework from horizontal FL to vertical FL based on a shared encoding mechanism.\nComprehensive experimental results demonstrate our method consistently outperforms the state-of-the-art FL models. Typically, we achieves $26\\times$ storage reduction and up to $81\\times$ communication reduction over FedAvg, with minimal accuracy drops on FEMNIST and Synthetic. \\emph{Code will be open-source in the camera-ready version.}", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Fangxin Liu;Haomin Li;Li Jiang", "authorids": "~Fangxin_Liu1;~Haomin_Li1;~Li_Jiang1", "gender": "M;M;M", "homepage": "https://mxhx7199.github.io/;https://home.epeenofront.com;https://www.cs.sjtu.edu.cn/~jiangli/", "dblp": "198/2194;56/4925-2;45/4954-2", "google_scholar": "https://scholar.google.com/citations?hl=zh-TW;635o82sAAAAJ;wCxFd8YAAAAJ", "orcid": ";0000-0002-2939-6534;0000-0002-7353-8798", "linkedin": ";;", "or_profile": "~Fangxin_Liu1;~Haomin_Li1;~Li_Jiang5", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@misc{\nliu2023hyperfeel,\ntitle={HyperFeel: An Efficient Federated Learning Framework Using Hyperdimensional Computing},\nauthor={Fangxin Liu and Haomin Li and Li Jiang},\nyear={2023},\nurl={https://openreview.net/forum?id=CqoBqextqY}\n}", "github": "", "project": "", "reviewers": "kRzc;3Dfo;csQV", "site": "https://openreview.net/forum?id=CqoBqextqY", "pdf_size": 1808154, "recommendation": "3;5;5", "confidence": "3;4;2", "correctness": "2;3;4", "technical_novelty": "2;2;2", "empirical_novelty": "0;2;3", "wc_summary_paper": "26;41;74", "wc_strength_and_weaknesses": "177;113;71", "wc_clarity_quality_novelty_and_reproducibility": "2;2;8", "wc_summary_review": "32;13;46", "wc_review": "237;169;199", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 47.0, 20.049937655763422 ], "wc_strength_and_weaknesses_avg": [ 120.33333333333333, 43.58389100981641 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 4.0, 2.8284271247461903 ], "wc_summary_review_avg": [ 30.333333333333332, 13.523641850067197 ], "wc_review_avg": [ 201.66666666666666, 27.824849006278942 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8660254037844385, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4803198316944449968&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Nonlinear Reconstruction for Operator Learning of PDEs with Discontinuities", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12026", "id": "CrfhZAsJDsZ", "poster": "/media/PosterPDFs/ICLR%202023/12026.png?t=1682672229.0005388", "openreview": "https://openreview.net/forum?id=CrfhZAsJDsZ", "slides": "https://iclr.cc/virtual/2023/poster/12026", "video": "https://iclr.cc/virtual/2023/poster/12026", "author_site": "Samuel Lanthaler, Roberto Molinaro, Patrik Hadorn, Siddhartha Mishra", "tldr": "Operator learning based on non-linear reconstruction (FNOs, shift-DeepONets) outperform methods based on linear reconstruction (DeepONets, PCA-Net) for PDEs with discontinuities.", "abstract": "Discontinuous solutions arise in a large class of hyperbolic and advection-dominated PDEs. This paper investigates, both theoretically and empirically, the operator learning of PDEs with discontinuous solutions. We rigorously prove, in terms of lower approximation bounds, that methods which entail a linear reconstruction step (e.g. DeepONets or PCA-Nets) fail to efficiently approximate the solution operator of such PDEs. In contrast, we show that certain methods employing a non-linear reconstruction mechanism can overcome these fundamental lower bounds and approximate the underlying operator efficiently. The latter class includes Fourier Neural Operators and a novel extension of DeepONets termed shift-DeepONets. Our theoretical findings are confirmed by empirical results for advection equations, inviscid Burgers\u2019 equation and the compressible Euler equations of gas dynamics.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/80383641b8f1818bc2f501b8ac3c27c6add6c1da.zip", "author": "Samuel Lanthaler;Roberto Molinaro;Patrik Hadorn;Siddhartha Mishra", "authorids": "~Samuel_Lanthaler1;~Roberto_Molinaro1;~Patrik_Hadorn1;~Siddhartha_Mishra1", "gender": "M;M;M;M", "homepage": "https://slanthaler.github.io/;;;http://www.sam.math.ethz.ch/", "dblp": ";249/2799;;07/2856.html", "google_scholar": "v-Jv3LoAAAAJ;2ohT8yYAAAAJ;;FmEqyNcAAAAJ", "orcid": "0000-0003-1911-246X;;;", "linkedin": ";;patrik-hadorn-3a92a139;", "or_profile": "~Samuel_Lanthaler1;~Roberto_Molinaro1;~Patrik_Hadorn1;~Siddhartha_Mishra1", "aff": "California Institute of Technology;ETHZ - ETH Zurich;;Swiss Federal Institute of Technology", "aff_domain": "caltech.edu;ethz.ch;;ethz.ch", "position": "Postdoc;PhD student;;Full Professor", "bibtex": "@inproceedings{\nlanthaler2023nonlinear,\ntitle={Nonlinear Reconstruction for Operator Learning of {PDE}s with Discontinuities},\nauthor={Samuel Lanthaler and Roberto Molinaro and Patrik Hadorn and Siddhartha Mishra},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CrfhZAsJDsZ}\n}", "github": "", "project": "", "reviewers": "koS7;vjUn;ZZzU;sk7n", "pdf_size": 2734542, "recommendation": "3;8;8;8", "confidence": "4;5;3;3", "correctness": "2;4;4;3", "technical_novelty": "2;4;4;3", "empirical_novelty": "1;4;4;3", "wc_summary_paper": "65;90;300;122", "wc_strength_and_weaknesses": "86;64;248;172", "wc_clarity_quality_novelty_and_reproducibility": "48;99;69;92", "wc_summary_review": "42;61;137;32", "wc_review": "241;314;754;418", "wc_reply_reviewers": "0;0;22;37", "wc_reply_authors": "866;812;923;408", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;3;2", "recommendation_avg": [ 6.75, 2.165063509461097 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 3.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 144.25, 92.16391647494153 ], "wc_strength_and_weaknesses_avg": [ 142.5, 73.06675030408839 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 77.0, 20.087309426600665 ], "wc_summary_review_avg": [ 68.0, 41.176449579826574 ], "wc_review_avg": [ 431.75, 196.3954874736179 ], "wc_reply_reviewers_avg": [ 14.75, 15.674421839417235 ], "wc_reply_authors_avg": [ 752.25, 202.59118317439186 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.17407765595569782, "corr_recommendation_correctness": 0.8703882797784891, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13103997305256520327&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=CrfhZAsJDsZ", "email": "caltech.edu;ethz.ch;;ethz.ch", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "California Institute of Technology;ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.caltech.edu;https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "Caltech;ETHZ;ETH Zurich", "aff_campus_unique_index": "0", "aff_campus_unique": "Pasadena;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Switzerland" }, { "id": "Crw1sKsLDvl", "title": "COMNET : CORTICAL MODULES ARE POWERFUL", "track": "main", "status": "Reject", "tldr": "A novel CNN architecture leveraging biological structures in visual cortex to cater real-time applications with low latency, smaller depths", "abstract": "Existing CNN architectures may achieve efficiency in either one or two dimensions: FLOPs, depth, accuracy, representation power, latency but not in all. In this work, we present a pragmatically designed novel CNN architecture \u201cCoMNet\u201d which offers multi-dimensional efficiency at once such as: simple yet accurate, lower latency and FLOPs, high representation power in limited parameters, low memory consumption, negligible branching, smaller depths, and only a few design hyperparameters. The key to achieve the multi-dimensional efficiency is our use of biological underpinnings into CoMNet which is primarily the organization of cortical modules in the visual cortex. To realize CoMNet, a few concepts from well understood CNN designs are directly inherited such as residual learning. Our solid experimental evaluations demonstrate superiority of CoMNet over many state-of-the-art industry and academia dominant architectures such as ResNet, RepVGG etc. For instance, CoMNet supersedes ResNet-50 on ImageNet while being 50% shallower, 22% lesser parameters, 25% lower FLOPs and latency, and in 16% lesser training epochs. Code will be opensourced post the reviews.", "keywords": "CNN Architecture;Multi-dimensional efficiencies;Cortical Modules;Columnar Structure;Real-Time Applications;Latency", "primary_area": "", "supplementary_material": "/attachment/548b288b3faa5ade5cc088b75ed430034221adbe.zip", "author": "Ashish Kumar;Laxmidhar Behera", "authorids": "~Ashish_Kumar2;~Laxmidhar_Behera1", "gender": "M;M", "homepage": "https://ashishkumar822.github.io;https://home.iitk.ac.in/~lbehera/", "dblp": "34/5378-6;14/1412", "google_scholar": "n-oRDEYAAAAJ;https://scholar.google.co.in/citations?user=QWTcyP8AAAAJ", "orcid": ";", "linkedin": "ashishkumar822/;laxmidhar-behera-a74a5b174/?originalSubdomain=in", "or_profile": "~Ashish_Kumar2;~Laxmidhar_Behera1", "aff": "Indian Institute of Technology, Kanpur;Indian Institute of Technology, Kanpur", "aff_domain": "iitk.ac.in;iitmandi.ac.in", "position": "PhD student;Director", "bibtex": "@misc{\nkumar2023comnet,\ntitle={{COMNET} : {CORTICAL} {MODULES} {ARE} {POWERFUL}},\nauthor={Ashish Kumar and Laxmidhar Behera},\nyear={2023},\nurl={https://openreview.net/forum?id=Crw1sKsLDvl}\n}", "github": "", "project": "", "reviewers": "8V6y;6Lbu;fyC8", "site": "https://openreview.net/forum?id=Crw1sKsLDvl", "pdf_size": 1550630, "recommendation": "5;5;6", "confidence": "3;4;5", "correctness": "3;2;3", "technical_novelty": "3;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "64;83;31", "wc_strength_and_weaknesses": "103;596;338", "wc_clarity_quality_novelty_and_reproducibility": "52;42;17", "wc_summary_review": "31;94;178", "wc_review": "250;815;564", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1016;3844;1634", "reply_reviewers": "0;0;0", "reply_authors": "3;8;4", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 59.333333333333336, 21.483844059096022 ], "wc_strength_and_weaknesses_avg": [ 345.6666666666667, 201.33940388199116 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.0, 14.719601443879744 ], "wc_summary_review_avg": [ 101.0, 60.21627686929839 ], "wc_review_avg": [ 543.0, 231.13776555696532 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 2164.6666666666665, 1213.974555838063 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 5.0, 2.160246899469287 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.8660254037844385, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:aTdC7oW2D54J:scholar.google.com/&scioq=COMNET+:+CORTICAL+MODULES+ARE+POWERFUL&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Indian Institute of Technology Kanpur", "aff_unique_dep": "", "aff_unique_url": "https://www.iitk.ac.in", "aff_unique_abbr": "IIT Kanpur", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Kanpur", "aff_country_unique_index": "0;0", "aff_country_unique": "India" }, { "title": "Learning MLPs on Graphs: A Unified View of Effectiveness, Robustness, and Efficiency", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10954", "id": "Cs3r5KLdoj", "poster": "/media/PosterPDFs/ICLR%202023/10954.png?t=1680811839.671119", "openreview": "https://openreview.net/forum?id=Cs3r5KLdoj", "slides": "https://iclr.cc/virtual/2023/poster/10954", "video": "https://iclr.cc/virtual/2023/poster/10954", "author_site": "Yijun Tian, Chuxu Zhang, Zhichun Guo, Xiangliang Zhang, Nitesh Chawla", "tldr": "We propose NOSMOG, a novel method to learn noise-robust and structure-aware MLPs on graphs, with superior effectiveness, outstanding robustness, and exceptional efficiency.", "abstract": "While Graph Neural Networks (GNNs) have demonstrated their efficacy in dealing with non-Euclidean structural data, they are difficult to be deployed in real applications due to the scalability constraint imposed by the multi-hop data dependency. Existing methods attempt to address this scalability issue by training student multi-layer perceptrons (MLPs) exclusively on node content features using labels derived from the teacher GNNs. However, the trained MLPs are neither effective nor robust. In this paper, we ascribe the lack of effectiveness and robustness to three significant challenges: 1) the misalignment between content feature and label spaces, 2) the strict hard matching to teacher's output, and 3) the sensitivity to node feature noises. To address the challenges, we propose NOSMOG, a novel method to learn NOise-robust Structure-aware MLPs On Graphs, with remarkable effectiveness, robustness, and efficiency. Specifically, we first address the misalignment by complementing node content with position features to capture the graph structural information. We then design an innovative representational similarity distillation strategy to inject soft node similarities into MLPs. Finally, we introduce adversarial feature augmentation to ensure stable learning against feature noises. Extensive experiments and theoretical analyses demonstrate the superiority of NOSMOG by comparing it to GNNs and the state-of-the-art method in both transductive and inductive settings across seven datasets. Codes are available at https://github.com/meettyj/NOSMOG.", "keywords": "Graph Representation Learning;Knowledge Distillation", "primary_area": "", "supplementary_material": "", "author": "Yijun Tian;Chuxu Zhang;Zhichun Guo;Xiangliang Zhang;Nitesh Chawla", "authorids": "~Yijun_Tian1;~Chuxu_Zhang2;~Zhichun_Guo1;~Xiangliang_Zhang1;~Nitesh_Chawla1", "gender": ";;;F;M", "homepage": "https://www.yijuntian.com/;;;https://sites.nd.edu/xiangliang-zhang/;http://niteshchawla.nd.edu", "dblp": "234/9123-1;;;74/1890-1;c/NiteshVChawla.html", "google_scholar": "dbaBgV0AAAAJ;;;BhRJe4wAAAAJ;hDLBEhkAAAAJ", "orcid": "0000-0003-2795-6080;;;0000-0002-3574-5665;", "linkedin": "yijun-tian/;;;;", "or_profile": "~Yijun_Tian1;~Chuxu_Zhang2;~Zhichun_Guo1;~Xiangliang_Zhang1;~Nitesh_Chawla1", "aff": "University of Notre Dame;;;University of Notre Dame;University of Notre Dame", "aff_domain": "nd.edu;;;nd.edu;nd.edu", "position": "PhD student;;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\ntian2023learning,\ntitle={Learning {MLP}s on Graphs: A Unified View of Effectiveness, Robustness, and Efficiency},\nauthor={Yijun Tian and Chuxu Zhang and Zhichun Guo and Xiangliang Zhang and Nitesh Chawla},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Cs3r5KLdoj}\n}", "github": "", "project": "", "reviewers": "yeLS;8WF2;44NK;iP1i", "pdf_size": 1194675, "recommendation": "6;8;8;8", "confidence": "3;4;4;4", "correctness": "3;3;4;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "209;72;50;74", "wc_strength_and_weaknesses": "342;405;312;362", "wc_clarity_quality_novelty_and_reproducibility": "74;38;9;23", "wc_summary_review": "145;49;34;33", "wc_review": "770;564;405;492", "wc_reply_reviewers": "0;17;0;0", "wc_reply_authors": "0;27;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "0;1;0;0", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 101.25, 62.9181015288923 ], "wc_strength_and_weaknesses_avg": [ 355.25, 33.7888665095472 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.0, 24.21776207662467 ], "wc_summary_review_avg": [ 65.25, 46.47781728954147 ], "wc_review_avg": [ 557.75, 134.8561733848325 ], "wc_reply_reviewers_avg": [ 4.25, 7.361215932167728 ], "wc_reply_authors_avg": [ 6.75, 11.691342951089922 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 0.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 79, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3204984314014768821&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=Cs3r5KLdoj", "email": "nd.edu;;;nd.edu;nd.edu", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Notre Dame", "aff_unique_dep": "", "aff_unique_url": "https://www.nd.edu", "aff_unique_abbr": "Notre Dame", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Visual Recognition with Deep Nearest Centroids", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12128", "id": "CsKwavjr7A", "poster": "", "openreview": "https://openreview.net/forum?id=CsKwavjr7A", "slides": "https://iclr.cc/virtual/2023/poster/12128", "video": "https://iclr.cc/virtual/2023/poster/12128", "author_site": "Wenguan Wang, Cheng Han, Tianfei Zhou, Dongfang Liu", "tldr": "", "abstract": "We devise deep nearest centroids (DNC), a conceptually elegant yet surprisingly effective network for large-scale visual recognition, by revisiting Nearest Centroids, one of the most classic and simple classifiers. Current deep models learn the classifier in a fully parametric manner, ignoring the latent data structure and lacking simplicity and explainability. DNC instead conducts nonparametric, case-based reasoning; it utilizes sub-centroids of training samples to describe class distributions and clearly explains the classification as the proximity of test data and the class sub-centroids in the feature space. Due to the distance-based nature, the network output dimensionality is flexible, and all the learnable parameters are only for data embedding. That means all the knowledge learnt for ImageNet classification can be completely transferred for pixel recognition learning, under the \u2018pre-training and fine-tuning\u2019 paradigm. Apart from its nested simplicity and intuitive decision-making mechanism, DNC can even possess ad-hoc explainability when the sub-centroids are selected as actual training images that humans can view and inspect. Compared with parametric counterparts, DNC performs better on image classification (CIFAR-10, ImageNet) and greatly boots pixel recognition (ADE20K, Cityscapes), with improved transparency and fewer learnable parameters, using various network architectures (ResNet, Swin) and segmentation models (FCN, DeepLabV3, Swin). We feel this work brings fundamental insights into related fields. Our code is available at https://github.com/ChengHan111/DNC.", "keywords": "Nearest centroids classifier;Cased-base reasoning;Image classification;Image segmentation;Explainable neural networks", "primary_area": "", "supplementary_material": "", "author": "Wenguan Wang;Cheng Han;Tianfei Zhou;Dongfang Liu", "authorids": "~Wenguan_Wang4;~Cheng_Han1;~Tianfei_Zhou2;~Dongfang_Liu1", "gender": "M;M;M;", "homepage": "https://sites.google.com/view/wenguanwang/;https://chenghan111.github.io/;https://www.tfzhou.com/;https://www.rit.edu/directory/dxleec-dongfang-liu", "dblp": "145/1078;53/6096-1.html;150/6710;", "google_scholar": "CqAQQkgAAAAJ;VgkEKZwAAAAJ;https://scholar.google.ae/citations?user=-_33ccMAAAAJ;uICY0vEAAAAJ", "orcid": "0000-0002-0802-9567;0000-0002-8145-3436;0000-0001-5475-1473;", "linkedin": "wenguanwang;chenghan-87129219a/;;", "or_profile": "~Wenguan_Wang4;~Cheng_Han1;~Tianfei_Zhou2;~Dongfang_Liu1", "aff": "University of Technology Sydney;Rochester Institute of Technology;Swiss Federal Institute of Technology;Rochester Institute of Technology", "aff_domain": "uts.edu.au;rit.edu;ethz.ch;rit.edu", "position": "Lecturer;PhD student;Postdoctoral Scholar;Assistant Professor", "bibtex": "@inproceedings{\nwang2023visual,\ntitle={Visual Recognition with Deep Nearest Centroids},\nauthor={Wenguan Wang and Cheng Han and Tianfei Zhou and Dongfang Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CsKwavjr7A}\n}", "github": "", "project": "", "reviewers": "1ipB;GtUv;k9FP;3KaQ", "pdf_size": 8342255, "recommendation": "5;6;8;8", "confidence": "4;4;4;4", "correctness": "3;3;4;4", "technical_novelty": "3;2;3;3", "empirical_novelty": "1;3;3;3", "wc_summary_paper": "62;149;142;60", "wc_strength_and_weaknesses": "373;440;112;162", "wc_clarity_quality_novelty_and_reproducibility": "99;409;693;39", "wc_summary_review": "144;199;53;37", "wc_review": "678;1197;1000;298", "wc_reply_reviewers": "231;0;0;0", "wc_reply_authors": "2550;2096;1876;496", "reply_reviewers": "2;0;0;0", "reply_authors": "9;6;6;2", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 103.25, 42.32832975679527 ], "wc_strength_and_weaknesses_avg": [ 271.75, 137.95357008791038 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 310.0, 261.94083301386974 ], "wc_summary_review_avg": [ 108.25, 66.41300700916952 ], "wc_review_avg": [ 793.25, 340.70322496272325 ], "wc_reply_reviewers_avg": [ 57.75, 100.02593413710267 ], "wc_reply_authors_avg": [ 1754.5, 766.1636574518528 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 5.75, 2.48746859276655 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9622504486493761, "gs_citation": 183, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5312225196983076861&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=CsKwavjr7A", "email": "uts.edu.au;rit.edu;ethz.ch;rit.edu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University of Technology Sydney;Rochester Institute of Technology;Swiss Federal Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uts.edu.au;https://www.rit.edu;https://www.ethz.ch", "aff_unique_abbr": "UTS;RIT;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "Australia;United States;Switzerland" }, { "id": "CtR4H2enl90", "title": "SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data", "track": "main", "status": "Withdraw", "tldr": "This is the first work to explicitly use phoneme/hidden units as the the shared space of speech and text modalities for pre-training.", "abstract": "How to boost speech pre-training with textual data is an unsolved problem due to the fact that speech and text are very different modalities with distinct characteristics. In this paper, we propose a cross-modal Speech and Language Model (SpeechLM) to explicitly align speech and text pre-training with a pre-defined unified discrete representation. Specifically, we introduce two alternative discrete tokenizers to bridge the speech and text modalities, including phoneme-unit and hidden-unit tokenizers, which can be trained using a small amount of paired speech-text data. Based on the trained tokenizers, we convert the unlabeled speech and text data into tokens of phoneme units or hidden units. The pre-training objective is designed to unify the speech and the text into the same discrete semantic space with a unified Transformer network. Leveraging only 10K text sentences, our SpeechLM gets a 16\\% relative WER reduction over the best base model performance (from 6.8 to 5.7) on the public LibriSpeech ASR benchmark. Moreover, SpeechLM with fewer parameters even outperforms previous SOTA models on CoVoST-2 speech translation tasks. We also evaluate our SpeechLM on various spoken language processing tasks under the universal representation evaluation framework SUPERB, demonstrating significant improvements on content-related tasks. Our code and models are available at https://anonymous.", "keywords": "Speech pre-training;Speech-text joint modeling;Unified tokenizer", "primary_area": "", "supplementary_material": "", "author": "Ziqiang Zhang;Sanyuan Chen;Long Zhou;Yu Wu;Shuo Ren;Shujie LIU;Zhuoyuan Yao;Xun Gong;Lirong Dai;Jinyu Li;Furu Wei", "authorids": "~Ziqiang_Zhang1;~Sanyuan_Chen1;~Long_Zhou2;~Yu_Wu1;~Shuo_Ren1;~Shujie_LIU1;~Zhuoyuan_Yao1;v-xungong@microsoft.com;lrdai@ustc.edu.cn;~Jinyu_Li1;~Furu_Wei1", "gender": "M;M;;M;M;M;M;;;M;M", "homepage": ";https://sanyuan-chen.github.io;;https://scholar.google.com/citations?user=aQizmzsAAAAJ&hl=en;https://rshuo.github.io/;https://www.microsoft.com/en-us/research/people/shujliu/;;;;https://www.microsoft.com/en-us/research/people/jinyli;https://www.microsoft.com/en-us/research/people/fuwei/", "dblp": "277/3621;143/4869;;22/0-12;147/6063;;;;;87/4873-1;72/5870", "google_scholar": "https://scholar.google.com/citations?hl=en;XrZRIy0AAAAJ;ZnwgSXIAAAAJ;aQizmzsAAAAJ;bC-AGbgAAAAJ;6mNya-wAAAAJ;pRo0_RoAAAAJ;;;grUvupMAAAAJ;G-V1VpwAAAAJ", "orcid": ";0000-0002-3082-6052;;;;0009-0008-2599-6752;;;;0000-0002-1089-9748;", "linkedin": ";sanyuan-chen-08a495167/;;;;;;;;;", "or_profile": "~Ziqiang_Zhang1;~Sanyuan_Chen1;~Long_Zhou2;~Yu_Wu1;~Shuo_Ren1;~Shujie_LIU1;~Zhuoyuan_Yao1;v-xungong@microsoft.com;lrdai@ustc.edu.cn;~Jinyu_Li1;~Furu_Wei1", "aff": "Microsoft Research Asia;Microsoft;Microsoft Research Asia;Microsoft;;Microsoft;Northwestern Polytechnical University, Northwest Polytechnical University Xi'an;;;Microsoft;Microsoft Research", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;;microsoft.com;mai.nwpu.edu.cn;;;microsoft.com;microsoft.com", "position": "Intern;Intern;Researcher;Researcher;;Researcher;MS student;;;Researcher;Distinguished Scientist", "bibtex": "@misc{\nzhang2023speechlm,\ntitle={Speech{LM}: Enhanced Speech Pre-Training with Unpaired Textual Data},\nauthor={Ziqiang Zhang and Sanyuan Chen and Long Zhou and Yu Wu and Shuo Ren and Shujie LIU and Zhuoyuan Yao and Xun Gong and Lirong Dai and Jinyu Li and Furu Wei},\nyear={2023},\nurl={https://openreview.net/forum?id=CtR4H2enl90}\n}", "github": "", "project": "", "reviewers": "VrdX;LP8q;EUSA;q8bU", "site": "https://openreview.net/forum?id=CtR4H2enl90", "pdf_size": 1561157, "recommendation": "3;3;5;8", "confidence": "5;5;5;4", "correctness": "2;4;2;4", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;0;3;3", "wc_summary_paper": "74;33;53;100", "wc_strength_and_weaknesses": "215;136;474;144", "wc_clarity_quality_novelty_and_reproducibility": "11;65;91;37", "wc_summary_review": "75;20;38;75", "wc_review": "375;254;656;356", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "673;802;401;358", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 1.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 65.0, 24.869660230891775 ], "wc_strength_and_weaknesses_avg": [ 242.25, 137.28870128309904 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.0, 29.966648127543394 ], "wc_summary_review_avg": [ 52.0, 23.86419912756345 ], "wc_review_avg": [ 410.25, 149.1582632642255 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 558.5, 185.34359983554867 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 11, 0 ], "corr_recommendation_confidence": -0.9169493006161777, "corr_recommendation_correctness": 0.3665083330689157, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10394157884098196393&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0;0;1;0;0", "aff_unique_norm": "Microsoft;Northwestern Polytechnical University", "aff_unique_dep": "Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research/group/asia;http://www.nwpu.edu.cn", "aff_unique_abbr": "MSR Asia;NWPU", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Asia;;Xi'an", "aff_country_unique_index": "0;1;0;1;1;0;1;1", "aff_country_unique": "China;United States" }, { "title": "Stay Moral and Explore: Learn to Behave Morally in Text-based Games", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11130", "id": "CtS2Rs_aYk", "poster": "", "openreview": "https://openreview.net/forum?id=CtS2Rs_aYk", "slides": "https://iclr.cc/virtual/2023/poster/11130", "video": "https://iclr.cc/virtual/2023/poster/11130", "author_site": "Zijing Shi, Meng Fang, Yunqiu Xu, Ling Chen, Yali Du", "tldr": "", "abstract": "Reinforcement learning (RL) in text-based games has developed rapidly and achieved promising results. However, little effort has been expended to design agents that pursue objectives while behaving morally, which is a critical issue in the field of autonomous agents. In this paper, we propose a general framework named Moral Awareness Adaptive Learning (MorAL) that enhances the morality capacity of an agent using a plugin moral-aware learning model. The framework allows the agent to execute task learning and morality learning adaptively. The agent selects trajectories from past experiences during task learning. Meanwhile, the trajectories are used to conduct self-imitation learning with a moral-enhanced objective. In order to achieve the trade-off between morality and task progress, the agent uses the combination of task policy and moral policy for action selection. We evaluate on the Jiminy Cricket benchmark, a set of text-based games with various scenes and dense morality annotations. Our experiments demonstrate that, compared with strong contemporary value alignment approaches, the proposed framework improves task performance while reducing immoral behaviours in various games.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Zijing Shi;Meng Fang;Yunqiu Xu;Ling Chen;Yali Du", "authorids": "~Zijing_Shi1;~Meng_Fang1;~Yunqiu_Xu3;~Ling_Chen5;~Yali_Du1", "gender": "F;M;;F;", "homepage": "https://winni18.github.io/;;;https://profiles.uts.edu.au/Ling.Chen;", "dblp": "313/5293;67/463;;17/1237-6;", "google_scholar": "dIIHQaMAAAAJ;IcNYP1oAAAAJ;6kfxdwoAAAAJ;https://scholar.google.com.au/citations?user=L5aYWQcAAAAJ;", "orcid": ";;;0000-0002-6468-5729;", "linkedin": ";;yunqiu-xu-53052ab4/;;", "or_profile": "~Zijing_Shi1;~Meng_Fang1;~Yunqiu_Xu3;~Ling_Chen5;~Yali_Du1", "aff": "University of Technology Sydney;Eindhoven University of Technology;;University of Technology Sydney;", "aff_domain": "uts.edu.au;tue.nl;;uts.edu.au;", "position": "PhD student;Assistant Professor;;Full Professor;", "bibtex": "@inproceedings{\nshi2023stay,\ntitle={Stay Moral and Explore: Learn to Behave Morally in Text-based Games},\nauthor={Zijing Shi and Meng Fang and Yunqiu Xu and Ling Chen and Yali Du},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=CtS2Rs_aYk}\n}", "github": "", "project": "", "reviewers": "qzhN;uEc3;bjYp;foEH", "pdf_size": 1870224, "recommendation": "5;6;6;6", "confidence": "3;5;4;3", "correctness": "3;3;4;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "201;180;141;162", "wc_strength_and_weaknesses": "627;156;305;244", "wc_clarity_quality_novelty_and_reproducibility": "40;426;273;160", "wc_summary_review": "98;88;98;56", "wc_review": "966;850;817;622", "wc_reply_reviewers": "0;180;0;537", "wc_reply_authors": "1293;907;781;801", "reply_reviewers": "0;1;0;2", "reply_authors": "3;2;1;4", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 171.0, 22.147234590350102 ], "wc_strength_and_weaknesses_avg": [ 333.0, 177.81310412902644 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 224.75, 142.438363863111 ], "wc_summary_review_avg": [ 85.0, 17.233687939614086 ], "wc_review_avg": [ 813.75, 123.76666554448335 ], "wc_reply_reviewers_avg": [ 179.25, 219.22975961305983 ], "wc_reply_authors_avg": [ 945.5, 206.26378741795662 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5222329678670935, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6199373727435011125&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=CtS2Rs_aYk", "email": "uts.edu.au;tue.nl;;uts.edu.au;", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Technology Sydney;Eindhoven University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.uts.edu.au;https://www.tue.nl", "aff_unique_abbr": "UTS;TU/e", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Australia;Netherlands" }, { "id": "CvfiXFOW2n", "title": "ClusTR: Exploring Efficient Self-attention via Clustering for Vision Transformers", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Although Transformers have successfully transitioned from their language modelling origins to image-based applications, their quadratic computational complexity remains a challenge, particularly for dense prediction. In this paper we propose a content-based sparse attention method, as an alternative to dense self-attention, aiming to reduce the computation complexity while retaining the ability to model long-range dependencies. Specifically, we cluster and then aggregate key and value tokens, as a content-based method of reducing the total token count. The resulting clustered-token sequence retains the semantic diversity of the original signal, but can be processed at a lower computational cost. Besides, we further extend the clustering-guided attention from single-scale to multi-scale, which is conducive to dense prediction tasks. We label the proposed Transformer architecture ClusTR, and demonstrate that it achieves state-of-the-art performance on various vision tasks but at lower computational cost and with fewer parameters. For instance, our ClusTR small model with 22.7M parameters achieves 83.2% Top-1 accuracy on ImageNet. Source code and ImageNet models will be made publicly available.", "keywords": "Vision Transformer;Clustering;Multi-scale;Efficient", "primary_area": "", "supplementary_material": "", "author": "Yutong Xie;Jianpeng Zhang;Yong Xia;Anton van den Hengel;Qi Wu", "authorids": "~Yutong_Xie4;~Jianpeng_Zhang2;~Yong_Xia1;~Anton_van_den_Hengel1;~Qi_Wu3", "gender": "Not Specified;M;;M;M", "homepage": "https://ytongxie.github.io/;https://teacher.nwpu.edu.cn/yongxia.html;;http://qi-wu.me/;https://jianpengz.github.io/", "dblp": ";50/2433-1.html;v/AntonvandenHengel;96/3446-1;", "google_scholar": "ddDL9HMAAAAJ;https://scholar.google.com.hk/citations?user=Usw1jeMAAAAJ;https://scholar.google.com.au/citations?user=nMGZ2ZQAAAAJ;https://scholar.google.co.uk/citations?user=aKXe1FEAAAAJ;KBIydr4AAAAJ", "orcid": ";0000-0001-9273-2847;0000-0003-3027-8364;;", "linkedin": ";;;;", "or_profile": "~Yutong_Xie4;~Yong_Xia1;~Anton_van_den_Hengel1;~Qi_Wu3;~Zhang_Jianpeng1", "aff": "University of Adelaide;Northwestern Polytechnical University;University of Adelaide;The University of Adelaide;Alibaba Group", "aff_domain": "adelaide.edu.au;nwpu.edu.cn;adelaide.edu.au;adelaide.edu.au;alibaba-inc.com", "position": "Postdoc;Professor;Professor;Associate Professor;Researcher", "bibtex": "@misc{\nxie2023clustr,\ntitle={Clus{TR}: Exploring Efficient Self-attention via Clustering for Vision Transformers},\nauthor={Yutong Xie and Jianpeng Zhang and Yong Xia and Anton van den Hengel and Qi Wu},\nyear={2023},\nurl={https://openreview.net/forum?id=CvfiXFOW2n}\n}", "github": "", "project": "", "reviewers": "vdv8;nEGw;frEZ;bjyM", "site": "https://openreview.net/forum?id=CvfiXFOW2n", "pdf_size": 7483305, "recommendation": "3;3;3;5", "confidence": "4;4;4;5", "correctness": "3;3;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;3;0", "wc_summary_paper": "85;87;138;75", "wc_strength_and_weaknesses": "252;216;456;102", "wc_clarity_quality_novelty_and_reproducibility": "46;78;50;62", "wc_summary_review": "43;25;81;17", "wc_review": "426;406;725;256", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 96.25, 24.529319191530774 ], "wc_strength_and_weaknesses_avg": [ 256.5, 127.79964788683887 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.0, 12.449899597988733 ], "wc_summary_review_avg": [ 41.5, 24.672859582950654 ], "wc_review_avg": [ 453.25, 170.0961125364128 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5144038930318657233&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "University of Adelaide;Northwestern Polytechnical University;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.adelaide.edu.au;https://www.nwpu.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "Adelaide;NWPU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "Australia;China" }, { "id": "CvnKNdZQsxb", "title": "Leveraging Human Features at Test-Time", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Machine learning (ML) models can make decisions based on large amounts of data, but they may be missing important context. For example, a model trained to predict psychiatric outcomes may know nothing about a patient\u2019s social support system, and social support may look different for different patients. In this work, we explore strategies for querying for a small, additional set of these human fea- tures that are relevant for each specific instance at test time, so as to incorporate this information while minimizing the burden to the user to label feature values. We define the problem of querying users for an instance-specific set of human fea- ture values, and propose algorithms to solve it. We show in experiments on real datasets that our approach outperforms a feature selection baseline that chooses the same set of human features for all instances.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/578298c84e9a234d323a8a0d48e128c5586c94c0.zip", "author": "Isaac Lage;Sonali Parbhoo;Finale Doshi-Velez", "authorids": "~Isaac_Lage1;~Sonali_Parbhoo2;~Finale_Doshi-Velez1", "gender": ";;F", "homepage": ";;https://finale.seas.harvard.edu/", "dblp": "220/5432;;64/7056", "google_scholar": ";FwEz5s4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Isaac_Lage1;~Sonali_Parbhoo2;~Finale_Doshi-Velez1", "aff": "Colby College;Harvard University;Harvard University", "aff_domain": "colby.edu;harvard.edu;harvard.edu", "position": "Assistant Professor;Postdoc;Professor", "bibtex": "@misc{\nlage2023leveraging,\ntitle={Leveraging Human Features at Test-Time},\nauthor={Isaac Lage and Sonali Parbhoo and Finale Doshi-Velez},\nyear={2023},\nurl={https://openreview.net/forum?id=CvnKNdZQsxb}\n}", "github": "", "project": "", "reviewers": "izJs;R4aK;adjV", "site": "https://openreview.net/forum?id=CvnKNdZQsxb", "pdf_size": 413684, "recommendation": "3;3;6", "confidence": "4;3;4", "correctness": "3;3;4", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "78;42;190", "wc_strength_and_weaknesses": "487;137;118", "wc_clarity_quality_novelty_and_reproducibility": "29;66;95", "wc_summary_review": "20;21;89", "wc_review": "614;266;492", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "195;313;31", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 103.33333333333333, 63.020278923181195 ], "wc_strength_and_weaknesses_avg": [ 247.33333333333334, 169.6473466406776 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.333333333333336, 27.010286106510527 ], "wc_summary_review_avg": [ 43.333333333333336, 32.293790252754306 ], "wc_review_avg": [ 457.3333333333333, 144.16965314825757 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 179.66666666666666, 115.6354424713961 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0wWBMXWeoEEJ:scholar.google.com/&scioq=Leveraging+Human+Features+at+Test-Time&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Colby College;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "https://www.colby.edu;https://www.harvard.edu", "aff_unique_abbr": "Colby;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "CwFcw5DBVOR", "title": "AMA: Asymptotic Midpoint Augmentation for Margin Balancing and Moderate Broadening", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Margin plays an important role like alignment and uniformity for regularization, as shown in contrastive learning literature. However, feature augmentation has been rarely analyzed in this framework, despite their effective regularization. In this paper, we focus on the analysis framework for feature augmentations and propose a novel method to gradually push a decision boundary to the midpoint of related representations via their augmentation, called $\\textit{asymptotic midpoint augmentation}$ (AMA). The method induces two effects: 1) balancing the margin for all classes and 2) only moderately broadening the margin until it holds maximal confidence. Each effect addresses the low uniformity of feature augmentations and representation collapse by excessively low alignment of contrastive learning, respectively. We empirically analyze the effects in a toy task for clear visualization and validate the impacts in original, long-tailed, and coarse-to-fine transfer tasks on CIFAR-10 and CIFAR-100.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/43de7f6a99475173b14df96d85d0354c479fd130.zip", "author": "Semi Lee;Hoyong Kim;Kangil Kim", "authorids": "~Semi_Lee1;~Hoyong_Kim1;~Kangil_Kim1", "gender": "F;M;M", "homepage": ";https://khyong.github.io/;", "dblp": ";;45/8372", "google_scholar": ";PaBLiH4AAAAJ;RZggOtkAAAAJ", "orcid": "0000-0002-2010-2982;0000-0002-1608-1939;0000-0003-3220-6401", "linkedin": ";hoyong-kim-66220a175/;", "or_profile": "~Semi_Lee1;~Hoyong_Kim1;~Kangil_Kim1", "aff": "Gwangju Institute of Science and Technology;Gwangju Institute of Science and Technology;Gwangju Institute of Science and Technology", "aff_domain": "gist.ac.kr;gist.ac.kr;gist.ac.kr", "position": "MS student;PhD student;Associate Professor", "bibtex": "@misc{\nlee2023ama,\ntitle={{AMA}: Asymptotic Midpoint Augmentation for Margin Balancing and Moderate Broadening},\nauthor={Semi Lee and Hoyong Kim and Kangil Kim},\nyear={2023},\nurl={https://openreview.net/forum?id=CwFcw5DBVOR}\n}", "github": "", "project": "", "reviewers": "HUbu;QUwu;G8Vs", "site": "https://openreview.net/forum?id=CwFcw5DBVOR", "pdf_size": 1179988, "recommendation": "3;3;5", "confidence": "4;4;4", "correctness": "3;1;3", "technical_novelty": "2;1;3", "empirical_novelty": "2;0;3", "wc_summary_paper": "55;31;53", "wc_strength_and_weaknesses": "129;456;211", "wc_clarity_quality_novelty_and_reproducibility": "170;25;20", "wc_summary_review": "33;14;31", "wc_review": "387;526;315", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 46.333333333333336, 10.873004286866726 ], "wc_strength_and_weaknesses_avg": [ 265.3333333333333, 138.91564186784566 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 71.66666666666667, 69.56212251569735 ], "wc_summary_review_avg": [ 26.0, 8.524474568362947 ], "wc_review_avg": [ 409.3333333333333, 87.57599874141061 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5000000000000001, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yAhwBN3cXJAJ:scholar.google.com/&scioq=AMA:+Asymptotic+Midpoint+Augmentation+for+Margin+Balancing+and+Moderate+Broadening&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Gwangju Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gist.ac.kr", "aff_unique_abbr": "GIST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Gwangju", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "Cx1xYn6vVm2", "title": "A Mutual Information Duality Algorithm for Multi-Agent Specialization", "track": "main", "status": "Reject", "tldr": "The social behavioral change in population learning is impacted by the dual properties of mutual information.", "abstract": "The social behavior change in a population has long been studied as an essential component of multi-agent learning. The learning of behavioral change not only involves reinforcement learning (RL), but also be measured against the general population with mutual information (MI). The combination of RL and MI led us to derive MI optimizations from policy gradient. With MI as multi-agent's optimization objective, we discover that the dual properties of MI can result in distinctly different population behaviors. From MI maximization that maximizes the stability of a population to MI minimization that enables specialization among the agents, the dual of MI creates a significant change in a population's behavioral properties. In this paper, we propose a minimax formulation of MI (M\\&M) that enables agents specialization with stable regularization. Empirically we evaluated M\\&M against the prior SOTA MARL framework, and analyze the social behavior change in performance, diversity, and the stability of their social graphs. ", "keywords": "Multi-agent;Reinforcement Learning;Mutual Information;Duality;Policy Gradient;Social Graph", "primary_area": "", "supplementary_material": "", "author": "Stefan Juang;Qiyang Cao;Yuan Zhou;Ruochen Liu;Nevin Zhang;Elvis S. Liu", "authorids": "~Stefan_Juang1;~Qiyang_Cao1;~Yuan_Zhou9;~Ruochen_Liu2;~Nevin_Zhang1;~Elvis_S._Liu1", "gender": "M;M;;M;M;M", "homepage": ";https://www.linkedin.com/in/qiyang-cao-a0b6ab111/;;https://github.com/RuochenLiu;https://cse.hkust.edu.hk/~lzhang/teach/courses.html;", "dblp": ";;;;https://dblp.uni-trier.de/pid/z/NevinLianwenZhang.html;", "google_scholar": ";;;;;https://scholar.google.co.uk/citations?user=ZxRKSisAAAAJ", "orcid": ";;;;;", "linkedin": "stefan-juang-93b63998;qiyang-cao-a0b6ab111/;https://www.linkedin.cn/incareer/in/\u5706-\u5468-6002b623a;;;", "or_profile": "~Stefan_Juang1;~Qiyang_Cao1;~Yuan_Zhou9;~Ruochen_Liu2;~Nevin_Zhang1;~Elvis_S._Liu1", "aff": "Hong Kong University of Science and Technology;;Tencent;;Hong Kong University of Science and Technology;Tencent", "aff_domain": "ust.hk;;tencent.com;;ust.hk;tencent.com", "position": "PhD student;;Researcher;;Full Professor;Principal Researcher", "bibtex": "@misc{\njuang2023a,\ntitle={A Mutual Information Duality Algorithm for Multi-Agent Specialization},\nauthor={Stefan Juang and Qiyang Cao and Yuan Zhou and Ruochen Liu and Nevin Zhang and Elvis S. Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=Cx1xYn6vVm2}\n}", "github": "", "project": "", "reviewers": "Eoc4;BFMo;ryJg;3HQh;khP1;LEQx;fF3t;NL8L", "site": "https://openreview.net/forum?id=Cx1xYn6vVm2", "pdf_size": 6205450, "recommendation": "3;3;3;5;5;6;6;6", "confidence": "3;3;3;3;4;2;3;2", "correctness": "2;3;3;3;2;4;2;3", "technical_novelty": "2;3;3;3;2;3;3;3", "empirical_novelty": "0;3;2;3;2;3;4;3", "wc_summary_paper": "53;79;130;118;208;100;120;64", "wc_strength_and_weaknesses": "161;194;55;262;139;170;156;116", "wc_clarity_quality_novelty_and_reproducibility": "54;60;211;82;40;85;57;65", "wc_summary_review": "16;41;27;80;78;80;50;49", "wc_review": "284;374;423;542;465;435;383;294", "wc_reply_reviewers": "18;23;710;197;245;0;0;34", "wc_reply_authors": "262;881;1773;634;410;715;651;539", "reply_reviewers": "1;1;2;1;2;0;0;1", "reply_authors": "2;3;4;3;3;2;2;3", "recommendation_avg": [ 4.625, 1.3169567191065923 ], "confidence_avg": [ 2.875, 0.5994789404140899 ], "correctness_avg": [ 2.75, 0.6614378277661477 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 109.0, 45.53295509847785 ], "wc_strength_and_weaknesses_avg": [ 156.625, 55.88814163129778 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 81.75, 50.738915045554535 ], "wc_summary_review_avg": [ 52.625, 23.17292331580114 ], "wc_review_avg": [ 400.0, 80.46738469715541 ], "wc_reply_reviewers_avg": [ 153.375, 228.2410663640529 ], "wc_reply_authors_avg": [ 733.125, 430.72016365036825 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 0.6614378277661477 ], "replies_avg": [ 41, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.3760349343306476, "corr_recommendation_correctness": 0.1793740008335438, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:fcUzurFgeSwJ:scholar.google.com/&scioq=A+Mutual+Information+Duality+Algorithm+for+Multi-Agent+Specialization&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Hong Kong University of Science and Technology;Tencent", "aff_unique_dep": ";Tencent Holdings Limited", "aff_unique_url": "https://www.ust.hk;https://www.tencent.com", "aff_unique_abbr": "HKUST;Tencent", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "CxPw6TeByX4", "title": "SoundNeRirF: Receiver-to-Receiver Sound Neural Room Impulse Response Field", "track": "main", "status": "Reject", "tldr": "Propose a receiver-to-receiver sound neural room acoustics rendering field", "abstract": "We present SoundNeRirF, a framework that learns a continuous receiver-to-receiver neural room impulse response field~(r2r-RIR) to help robot efficiently predict the sound to be heard at novel locations. It represents a room acoustic scene as a continuous 6D function, whose input is a reference receiver's 3D position and a target receiver's 3D position, and whose outputs are an inverse room impulse response~(inverse-RIR) and a forward room impulse response~(forward-RIR) that jointly project the sound from the reference position to the target position. SoundNeRirF requires knowledge of neither sound source (e.g. location and number of sound sources) nor room acoustic properties~(e.g. room size, geometry, materials). Instead, it merely depends on a sparse set of sound receivers' positions, as well as the recorded sound at each position. We instantiate the continuous 6D function as multi-layer perceptrons~(MLP), so it is fully differentiable and continuous at any spatial position. SoundNeRirF is encouraged, during the training stage, to implicitly encode the interaction between sound sources, receivers and room acoustic properties by minimizing the discrepancy between the predicted sound and the truly heard sound at the target position. During inference, the sound at a novel position is predicted by giving a reference position and the corresponding reference sound. Extensive experiments on both synthetic and real-world datasets show SoundNeRirF is capable of predicting high-fidelity and audio-realistic sound that fully captures room reverberation characteristics, significantly outperforming existing methods in terms of accuracy and efficiency.", "keywords": "Sound Neural Rendering Field;Sound Prediction;Representation Learning;Receiver-to-Receiver Modelling", "primary_area": "", "supplementary_material": "/attachment/dc8154c7dd883b6389288902c0468373cf3ff02e.zip", "author": "Yuhang He;Jia-Xing Zhong;Zhuangzhuang Dai;Niki Trigoni;Andrew Markham", "authorids": "~Yuhang_He3;~Jia-Xing_Zhong1;~Zhuangzhuang_Dai1;~Niki_Trigoni1;~Andrew_Markham2", "gender": "M;M;M;F;M", "homepage": "https://yuhanghe01.github.io/;;;https://www.cs.ox.ac.uk/people/niki.trigoni/;", "dblp": ";208/4752;;t/NikiTrigoni;83/7169", "google_scholar": "H1p3ve8AAAAJ;dIckm98AAAAJ;;;https://scholar.google.co.uk/citations?user=g3JTO9EAAAAJ", "orcid": ";;my-orcid?orcid=0000-0002-6098-115X;;", "linkedin": ";;;;", "or_profile": "~Yuhang_He3;~Jia-Xing_Zhong1;~Zhuangzhuang_Dai1;~Niki_Trigoni1;~Andrew_Markham2", "aff": "University of Oxford;Department of Computer Science, University of Oxford;;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;cs.ox.ac.uk;;ox.ac.uk;ox.ac.uk", "position": "PhD student;PhD student;;Full Professor;Associate Professor", "bibtex": "@misc{\nhe2023soundnerirf,\ntitle={SoundNeRirF: Receiver-to-Receiver Sound Neural Room Impulse Response Field},\nauthor={Yuhang He and Jia-Xing Zhong and Zhuangzhuang Dai and Niki Trigoni and Andrew Markham},\nyear={2023},\nurl={https://openreview.net/forum?id=CxPw6TeByX4}\n}", "github": "", "project": "", "reviewers": "2NMU;cCRH;tmuv;9Fm6", "site": "https://openreview.net/forum?id=CxPw6TeByX4", "pdf_size": 13179184, "recommendation": "3;6;6;6", "confidence": "4;5;4;5", "correctness": "3;4;3;3", "technical_novelty": "1;3;3;3", "empirical_novelty": "1;3;3;2", "wc_summary_paper": "290;45;35;107", "wc_strength_and_weaknesses": "391;271;193;287", "wc_clarity_quality_novelty_and_reproducibility": "22;19;36;95", "wc_summary_review": "32;18;46;55", "wc_review": "735;353;310;544", "wc_reply_reviewers": "448;22;0;0", "wc_reply_authors": "1366;555;930;0", "reply_reviewers": "2;1;0;0", "reply_authors": "3;1;2;0", "recommendation_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 119.25, 102.36790268438638 ], "wc_strength_and_weaknesses_avg": [ 285.5, 70.53190767305249 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.0, 30.700162866017504 ], "wc_summary_review_avg": [ 37.75, 14.042346669983617 ], "wc_review_avg": [ 485.5, 168.84090144274876 ], "wc_reply_reviewers_avg": [ 117.5, 191.02552185506522 ], "wc_reply_authors_avg": [ 712.75, 501.70478122098854 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 1.118033988749895 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7689628334841918549&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Oxford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Diagnosing and Rectifying Vision Models using Language", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12214", "id": "D-zfUK7BR6c", "poster": "/media/PosterPDFs/ICLR%202023/12214.png?t=1682441086.1056924", "openreview": "https://openreview.net/forum?id=D-zfUK7BR6c", "slides": "https://iclr.cc/virtual/2023/poster/12214", "video": "https://iclr.cc/virtual/2023/poster/12214", "author_site": "Yuhui Zhang, Jeff Z. HaoChen, Shih-Cheng Huang, Kuan-Chieh Wang, James Y Zou, Serena Yeung", "tldr": "Our work highlights a distinct advantage of multi-modal embedding space: the ability to diagnose vision classifiers through natural language.", "abstract": "Recent multi-modal contrastive learning models have demonstrated the ability to learn an embedding space suitable for building strong vision classifiers, by leveraging the rich information in large-scale image-caption datasets. Our work highlights a distinct advantage of this multi-modal embedding space: the ability to diagnose vision classifiers through natural language. The traditional process of diagnosing model behaviors in deployment settings involves labor-intensive data acquisition and annotation. Our proposed method can discover high-error data slices, identify influential attributes and further rectify undesirable model behaviors, without requiring any visual data. Through a combination of theoretical explanation and empirical verification, we present conditions under which classifiers trained on embeddings from one modality can be equivalently applied to embeddings from another modality. On a range of image datasets with known error slices, we demonstrate that our method can effectively identify the error slices and influential attributes, and can further use language to rectify failure modes of the classifier.", "keywords": "model diagnosis;multi-modal contrastive learning;vision and language", "primary_area": "", "supplementary_material": "", "author": "Yuhui Zhang;Jeff Z. HaoChen;Shih-Cheng Huang;Kuan-Chieh Wang;James Zou;Serena Yeung", "authorids": "~Yuhui_Zhang3;~Jeff_Z._HaoChen1;~Shih-Cheng_Huang1;~Kuan-Chieh_Wang1;~James_Zou1;~Serena_Yeung1", "gender": "M;;;;;F", "homepage": "https://cs.stanford.edu/~yuhuiz/;https://cs.stanford.edu/~jhaochen/;https://www.linkedin.com/in/mschuang/;https://wangkua1.github.io;;http://ai.stanford.edu/~syyeung/", "dblp": ";267/5319;;13/7562;;147/5023", "google_scholar": "X-Agfu8AAAAJ;SWQxcO8AAAAJ;;https://scholar.google.ca/citations?user=LgMuT6IAAAAJ;23ZXZvEAAAAJ;Tw2m5kUAAAAJ", "orcid": ";;;;;0000-0003-0529-0628", "linkedin": ";;;;;", "or_profile": "~Yuhui_Zhang3;~Jeff_Z._HaoChen1;~Shih-Cheng_Huang1;~Kuan-Chieh_Wang1;~James_Zou1;~Serena_Yeung1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;PhD student;Postdoc;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023diagnosing,\ntitle={Diagnosing and Rectifying Vision Models using Language},\nauthor={Yuhui Zhang and Jeff Z. HaoChen and Shih-Cheng Huang and Kuan-Chieh Wang and James Zou and Serena Yeung},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=D-zfUK7BR6c}\n}", "github": "", "project": "", "reviewers": "n4R4;Wtjn;yEKt;LGzN", "pdf_size": 12909977, "recommendation": "6;6;6;6", "confidence": "3;4;4;3", "correctness": "3;3;4;3", "technical_novelty": "3;4;4;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "21;123;132;68", "wc_strength_and_weaknesses": "21;77;281;86", "wc_clarity_quality_novelty_and_reproducibility": "21;172;83;123", "wc_summary_review": "21;69;227;69", "wc_review": "84;441;723;346", "wc_reply_reviewers": "0;212;0;0", "wc_reply_authors": "0;1509;1742;971", "reply_reviewers": "0;1;0;0", "reply_authors": "0;4;5;4", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 86.0, 44.81629168059312 ], "wc_strength_and_weaknesses_avg": [ 116.25, 98.32439931166628 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 99.75, 55.32348054849767 ], "wc_summary_review_avg": [ 96.5, 77.85081887815953 ], "wc_review_avg": [ 398.5, 228.4584207246474 ], "wc_reply_reviewers_avg": [ 53.0, 91.7986928011505 ], "wc_reply_authors_avg": [ 1055.5, 670.4783739987442 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.920286436967152 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3357329248737862883&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=D-zfUK7BR6c", "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Neural ePDOs: Spatially Adaptive Equivariant Partial Differential Operator Based Networks", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12190", "id": "D1Iqfm7WTkk", "poster": "", "openreview": "https://openreview.net/forum?id=D1Iqfm7WTkk", "slides": "https://iclr.cc/virtual/2023/poster/12190", "video": "https://iclr.cc/virtual/2023/poster/12190", "author_site": "Lingshen He, Yuxuan Chen, Zhengyang Shen, Yibo Yang, Zhouchen Lin", "tldr": "We propose a novel spatial adaptive equivariant PDOs-based network which achieves superior performance than previous works. ", "abstract": "Endowing deep learning models with symmetry priors can lead to a considerable performance improvement. As an interesting bridge between physics and deep learning, the equivariant partial differential operators (PDOs) have drawn much researchers' attention recently. However, to ensure the PDOs translation equivariance, previous works have to require coefficient matrices to be constant and spatially shared for their linearity, which could lead to the sub-optimal feature learning at each position. In this work, we propose a novel nonlinear PDOs scheme that is both spatially adaptive and translation equivariant. The coefficient matrices are obtained by local features through a generator rather than spatially shared. Besides, we establish a new theory on incorporating more equivariance like rotations for such PDOs. Based on our theoretical results, we efficiently implement the generator with an equivariant multilayer perceptron (EMLP). As such equivariant PDOs are generated by neural networks, we call them Neural ePDOs. In experiments, we show that our method can significantly improve previous works with smaller model size in various datasets. Especially, we achieve the state-of-the-art performance on the MNIST-rot dataset with only half parameters of the previous best model.", "keywords": "Equivariance;Partial differential operators", "primary_area": "", "supplementary_material": "/attachment/2cf284dafe3be848e309cb9aa5d9f95414e91523.zip", "author": "Lingshen He;Yuxuan Chen;Zhengyang Shen;Yibo Yang;Zhouchen Lin", "authorids": "~Lingshen_He1;~Yuxuan_Chen2;~Zhengyang_Shen3;~Yibo_Yang2;~Zhouchen_Lin1", "gender": "M;M;M;M;M", "homepage": ";;https://zero-lab-pku.github.io/personwise/shenzhengyang/;https://iboing.github.io/;https://zhouchenlin.github.io", "dblp": "252/0142;;163/9959;28/7717/;l/ZhouchenLin", "google_scholar": "RQC187sAAAAJ;45GyXBUAAAAJ;e5KqA88AAAAJ;DxXXnCcAAAAJ;https://scholar.google.com.tw/citations?user=TanjFwoAAAAJ", "orcid": ";;;;0000-0003-1493-7569", "linkedin": ";;;;", "or_profile": "~Lingshen_He1;~Yuxuan_Chen2;~Zhengyang_Shen3;~Yibo_Yang2;~Zhouchen_Lin1", "aff": "Peking University;Peking University;Baidu;JD Explore Academy;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;baidu.com;jd.com;pku.edu.cn", "position": "PhD student;Intern;Researcher;Researcher;Professor", "bibtex": "@inproceedings{\nhe2023neural,\ntitle={Neural e{PDO}s: Spatially Adaptive Equivariant Partial Differential Operator Based Networks},\nauthor={Lingshen He and Yuxuan Chen and Zhengyang Shen and Yibo Yang and Zhouchen Lin},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=D1Iqfm7WTkk}\n}", "github": "", "project": "", "reviewers": "rHY6;vWQG;j4DB;V42w", "pdf_size": 334638, "recommendation": "6;8;8;8", "confidence": "3;4;4;4", "correctness": "3;4;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "124;43;108;142", "wc_strength_and_weaknesses": "211;88;453;488", "wc_clarity_quality_novelty_and_reproducibility": "26;2;194;73", "wc_summary_review": "57;4;52;49", "wc_review": "418;137;807;752", "wc_reply_reviewers": "16;0;70;38", "wc_reply_authors": "709;90;1419;842", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;3;2", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 104.25, 37.35220877003126 ], "wc_strength_and_weaknesses_avg": [ 310.0, 166.74681406251815 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.75, 73.97423537962389 ], "wc_summary_review_avg": [ 40.5, 21.266170318136737 ], "wc_review_avg": [ 528.5, 270.64598648418934 ], "wc_reply_reviewers_avg": [ 31.0, 26.248809496813376 ], "wc_reply_authors_avg": [ 765.0, 472.336214999443 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6145270849441442869&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=D1Iqfm7WTkk", "email": "pku.edu.cn;pku.edu.cn;baidu.com;jd.com;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Peking University;Baidu;JD", "aff_unique_dep": ";Baidu, Inc.;JD Explore Academy", "aff_unique_url": "http://www.pku.edu.cn;https://www.baidu.com;", "aff_unique_abbr": "Peking U;Baidu;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "id": "D1Sawu2y1QG", "title": "TILDE-Q: a Transformation Invariant Loss Function for Time-Series Forecasting", "track": "main", "status": "Reject", "tldr": "We designed a novel, lightweight, and shape-aware loss function for time-series forecasting.", "abstract": "Time-series forecasting has caught increasing attention in the AI research field due to its importance in solving real-world problems across different domains, such as energy, weather, traffic, and economy. As shown in various types of data, it has been a must-see issue to deal with drastic changes, temporal patterns, and shapes in sequential data that previous models are weak in prediction. This is because most cases in time-series forecasting aim to minimize $L_p$ norm distances as loss functions, such as mean absolute error (MAE) or mean square error (MSE). These loss functions are vulnerable to not only considering temporal dynamics modeling but also capturing the shape of signals. In addition, these functions often make models misbehave and return uncorrelated results to the original time-series. To become an effective loss function, it has to be invariant to the set of distortions between two time-series data instead of just comparing exact values. In this paper, we propose a novel loss function, called TILDE-Q (Transformation Invariant Loss function with Distance EQuilibrium), that not only considers the distortions in amplitude and phase but also allows models to capture the shape of time-series sequences. In addition, TILDE-Q supports modeling periodic and non-periodic temporal dynamics at the same time. We evaluate the effectiveness of TILDE-Q by conducting extensive experiments with respect to periodic and non-periodic conditions of data, from naive models to state-of-the-art models. The experiment results indicate that the models trained with TILDE-Q outperform those trained with other training metrics (e.g., MSE, dynamic time warping (DTW), temporal distortion index (TDI), and longest common subsequence (LCSS)).", "keywords": "Time-Series Forecasting;Deep Learning;Loss functions;Time-series similarity", "primary_area": "", "supplementary_material": "", "author": "Hyunwook Lee;Chunggi Lee;Hongkyu Lim;Sungahn Ko", "authorids": "~Hyunwook_Lee1;~Chunggi_Lee1;limhongkyu1219@unist.ac.kr;~Sungahn_Ko1", "gender": "M;M;;M", "homepage": ";https://chungyi347.github.io/;;https://sites.google.com/view/haiv/", "dblp": ";243/0156;;16/9189", "google_scholar": "GTWj-V4AAAAJ;0k8JcJIAAAAJ;;gKnZiVcAAAAJ", "orcid": "0000-0002-5506-7347;;;", "linkedin": "hyunwook-lee-2b15ba283;;;", "or_profile": "~Hyunwook_Lee1;~Chunggi_Lee1;limhongkyu1219@unist.ac.kr;~Sungahn_Ko1", "aff": "Ulsan National Institute of Science and Technology;Naver Webtoon;;Ulsan National Institute of Science and Technology", "aff_domain": "unist.ac.kr;webtoonscorp.com;;unist.ac.kr", "position": "PhD student;Researcher;;Associate Professor", "bibtex": "@misc{\nlee2023tildeq,\ntitle={{TILDE}-Q: a Transformation Invariant Loss Function for Time-Series Forecasting},\nauthor={Hyunwook Lee and Chunggi Lee and Hongkyu Lim and Sungahn Ko},\nyear={2023},\nurl={https://openreview.net/forum?id=D1Sawu2y1QG}\n}", "github": "", "project": "", "reviewers": "iGk7;4GPm;szr1;wdgV;bzqM", "site": "https://openreview.net/forum?id=D1Sawu2y1QG", "pdf_size": 36354129, "recommendation": "1;3;6;8;8", "confidence": "5;4;3;3;4", "correctness": "1;3;3;3;4", "technical_novelty": "1;2;3;3;3", "empirical_novelty": "1;2;3;3;3", "wc_summary_paper": "28;44;46;86;30", "wc_strength_and_weaknesses": "93;278;69;193;43", "wc_clarity_quality_novelty_and_reproducibility": "121;28;12;33;23", "wc_summary_review": "21;31;83;66;62", "wc_review": "263;381;210;378;158", "wc_reply_reviewers": "0;67;0;0;0", "wc_reply_authors": "792;1752;112;420;27", "reply_reviewers": "0;1;0;0;0", "reply_authors": "2;3;1;1;1", "recommendation_avg": [ 5.2, 2.7856776554368237 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "correctness_avg": [ 2.8, 0.9797958971132712 ], "technical_novelty_avg": [ 2.4, 0.8 ], "empirical_novelty_avg": [ 2.4, 0.8 ], "wc_summary_paper_avg": [ 46.8, 20.88444397153058 ], "wc_strength_and_weaknesses_avg": [ 135.2, 87.63195764103413 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.4, 39.4187772514572 ], "wc_summary_review_avg": [ 52.6, 23.052982453470094 ], "wc_review_avg": [ 278.0, 89.28381712270146 ], "wc_reply_reviewers_avg": [ 13.4, 26.8 ], "wc_reply_authors_avg": [ 620.6, 626.2138931706961 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.7483413175420027, "corr_recommendation_correctness": 0.8206945344647689, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11350394722117885277&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Ulsan National Institute of Science and Technology;NAVER Corporation", "aff_unique_dep": ";Naver Webtoon", "aff_unique_url": "https://www.unist.ac.kr;https://www.webtoons.com", "aff_unique_abbr": "UNIST;Naver Webtoon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "D3lPaQ7iqw", "title": "Indoor Localisation for Detecting Medication Use in Parkinson's Disease", "track": "main", "status": "Reject", "tldr": "A transformer-based network is proposed for indoor localisation utilising dual modalities where the derived in-home mobility features can be used to classify the medication state of a person with Parkinson's disease", "abstract": "Parkinson\u2019s disease (PD) is a slowly progressive debilitating neurodegenerative disease which is prominently characterised by motor symptoms. Indoor localisation, including its in-home mobility features, could provide a digital biomarker that can be used to quantify how mobility changes as this disease progresses. To improve the effectiveness of current methods for indoor localisation, a transformer-based approach utilising multiple modalities, Received Signal Strength Indicator (RSSI) and accelerometer data from wearable devices, which provide complementary views of movement, is proposed. To properly evaluate our proposed method, we use a free-living dataset where the movements and mobility are greatly varied and unstructured as expected in real-world conditions. 12 pairs of people (one with PD, and the other a control participant) lived for five days in a smart home with various sensors. Our evaluation on such a dataset, which includes subjects with and without PD, demonstrates that our proposed network outperforms the current state-of-the-art in indoor localisation. We also show how the accurate room-level localisation predictions can be transformed into in-home mobility features (i.e. room-to-room transition duration) which can be used to effectively classify whether the PD participant is taking their medications or withholding them (increasing their symptoms)", "keywords": "Transformer;Indoor Localisation;Medication State Classification;Parkinson's Disease", "primary_area": "", "supplementary_material": "", "author": "Ferdian Jovan;Catherine Morgan;Ryan McConville;Emma Tonkin;Alan Whone;Ian Craddock", "authorids": "~Ferdian_Jovan1;catherine.morgan@bristol.ac.uk;~Ryan_McConville1;e.l.tonkin@bristol.ac.uk;alan.whone@bristol.ac.uk;ian.craddock@bristol.ac.uk", "gender": "M;;;;;", "homepage": "https://ferdianjovan.github.io;;https://ryanmcconville.com;;;", "dblp": ";;173/4592;;;", "google_scholar": "19dF3RQAAAAJ;;https://scholar.google.co.uk/citations?user=5fslgt0AAAAJ;;;", "orcid": "0000-0003-4911-540X;;http://orcid.org/0000-0002-7708-3110;;;", "linkedin": "ferdian-jovan/;;;;;", "or_profile": "~Ferdian_Jovan1;catherine.morgan@bristol.ac.uk;~Ryan_McConville1;e.l.tonkin@bristol.ac.uk;alan.whone@bristol.ac.uk;ian.craddock@bristol.ac.uk", "aff": "University of Bristol;;University of Bristol;;;", "aff_domain": "bristol.ac.uk;;bristol.ac.uk;;;", "position": "Postdoc;;Assistant Professor;;;", "bibtex": "@misc{\njovan2023indoor,\ntitle={Indoor Localisation for Detecting Medication Use in Parkinson's Disease},\nauthor={Ferdian Jovan and Catherine Morgan and Ryan McConville and Emma Tonkin and Alan Whone and Ian Craddock},\nyear={2023},\nurl={https://openreview.net/forum?id=D3lPaQ7iqw}\n}", "github": "", "project": "", "reviewers": "Ktqs;NCB2;r87h;Q2sw", "site": "https://openreview.net/forum?id=D3lPaQ7iqw", "pdf_size": 2914198, "recommendation": "1;3;3;3", "confidence": "3;4;3;4", "correctness": "1;2;3;3", "technical_novelty": "1;3;2;2", "empirical_novelty": "1;3;2;3", "wc_summary_paper": "896;69;116;37", "wc_strength_and_weaknesses": "23;92;93;33", "wc_clarity_quality_novelty_and_reproducibility": "29;79;127;27", "wc_summary_review": "27;26;100;14", "wc_review": "975;266;436;111", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 279.5, 357.043764824426 ], "wc_strength_and_weaknesses_avg": [ 60.25, 32.44514601600677 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.5, 41.16734142496938 ], "wc_summary_review_avg": [ 41.75, 34.01745875282279 ], "wc_review_avg": [ 447.0, 325.79211162948684 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.8703882797784891, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15794325910611769259&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Bristol", "aff_unique_dep": "", "aff_unique_url": "https://www.bristol.ac.uk", "aff_unique_abbr": "Bristol", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "D4JQEKlTyG", "title": "Individual Privacy Accounting for Differentially Private Stochastic Gradient Descent", "track": "main", "status": "Reject", "tldr": "We compute individual privacy parameters for DP-SGD and show the privacy guarantee varies across different groups.", "abstract": "Differentially private stochastic gradient descent (DP-SGD) is the workhorse algorithm for recent advances in private deep learning. It provides a single privacy guarantee to all datapoints in the dataset. We propose an efficient algorithm to compute privacy guarantees for individual examples when releasing models trained by DP-SGD. We use our algorithm to investigate individual privacy parameters across a number of datasets. We find that most examples enjoy stronger privacy guarantees than the worst-case bound. We further discover that the training loss and the privacy parameter of an example are well-correlated. This implies groups that are underserved in terms of model utility are simultaneously underserved in terms of privacy guarantee. For example, on CIFAR-10, the average $\\varepsilon$ of the class with the lowest test accuracy is 43.6% higher than that of the class with the highest accuracy. We also run membership inference attacks to show this reflects disparate empirical privacy risks.\n", "keywords": "individual privacy for DP-SGD;fairness in privacy", "primary_area": "", "supplementary_material": "", "author": "Da Yu;Gautam Kamath;Janardhan Kulkarni;Tie-Yan Liu;Jian Yin;Huishuai Zhang", "authorids": "~Da_Yu1;~Gautam_Kamath1;~Janardhan_Kulkarni1;~Tie-Yan_Liu1;~Jian_Yin3;~Huishuai_Zhang3", "gender": "M;M;M;M;M;M", "homepage": ";http://www.gautamkamath.com/;http://member.acm.org/~tieyanliu;http://sai.sysu.edu.cn/teacher/teacher01/1385356.htm;https://huishuai-git.github.io;", "dblp": "48/8545;73/11140;l/TieYanLiu;95/578-1;144/7537;54/1978", "google_scholar": "FcRGdiwAAAAJ;MK6zHkYAAAAJ;Nh832fgAAAAJ;;w1srHyIAAAAJ;_fxnybwAAAAJ", "orcid": ";;0000-0002-0476-8020;;;", "linkedin": ";;;;;", "or_profile": "~Da_Yu1;~Gautam_Kamath1;~Tie-Yan_Liu1;~Jian_Yin3;~Huishuai_Zhang2;~Janardhan_Kulkarni2", "aff": "Microsoft;University of Waterloo;Microsoft;SUN YAT-SEN UNIVERSITY;Microsoft Research Asia;Microsoft Research, Redmond", "aff_domain": "microsoft.com;uwaterloo.ca;microsoft.com;sysu.edu.cn;microsoft.com;microsoft.com", "position": "Research intern;Assistant Professor;Distinguished Scientist;Full Professor;Researcher;Researcher", "bibtex": "@misc{\nyu2023individual,\ntitle={Individual Privacy Accounting for Differentially Private Stochastic Gradient Descent},\nauthor={Da Yu and Gautam Kamath and Janardhan Kulkarni and Tie-Yan Liu and Jian Yin and Huishuai Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=D4JQEKlTyG}\n}", "github": "", "project": "", "reviewers": "JYd4;9tdY;rPUG", "site": "https://openreview.net/forum?id=D4JQEKlTyG", "pdf_size": 1484045, "recommendation": "3;6;6", "confidence": "5;4;3", "correctness": "1;3;3", "technical_novelty": "1;4;2", "empirical_novelty": "1;3;4", "wc_summary_paper": "15;35;50", "wc_strength_and_weaknesses": "400;53;384", "wc_clarity_quality_novelty_and_reproducibility": "9;129;35", "wc_summary_review": "40;14;92", "wc_review": "464;231;561", "wc_reply_reviewers": "0;122;350", "wc_reply_authors": "1228;407;1482", "reply_reviewers": "0;1;3", "reply_authors": "4;2;5", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "technical_novelty_avg": [ 2.3333333333333335, 1.247219128924647 ], "empirical_novelty_avg": [ 2.6666666666666665, 1.247219128924647 ], "wc_summary_paper_avg": [ 33.333333333333336, 14.337208778404378 ], "wc_strength_and_weaknesses_avg": [ 279.0, 159.93957192223152 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.666666666666664, 51.54501808020722 ], "wc_summary_review_avg": [ 48.666666666666664, 32.42769735204082 ], "wc_review_avg": [ 418.6666666666667, 138.48305151012363 ], "wc_reply_reviewers_avg": [ 157.33333333333334, 145.05477892468377 ], "wc_reply_authors_avg": [ 1039.0, 458.764282248157 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 3.6666666666666665, 1.247219128924647 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": 1.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11280148459635913493&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;2;0;0", "aff_unique_norm": "Microsoft;University of Waterloo;Sun Yat-sen University", "aff_unique_dep": "Microsoft Corporation;;", "aff_unique_url": "https://www.microsoft.com;https://uwaterloo.ca;http://www.sysu.edu.cn", "aff_unique_abbr": "Microsoft;UW;SYSU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Asia;Redmond", "aff_country_unique_index": "0;1;0;2;2;0", "aff_country_unique": "United States;Canada;China" }, { "id": "D4aZrqLDFtE", "title": "Sample Importance in SGD Training", "track": "main", "status": "Reject", "tldr": "Biasing SGD towards important samples after a short warm-up phase yields fast convergence, auto-balancing of classes, efficient use of augmentations, and shows that sample importance is model specific.", "abstract": "Deep learning requires increasingly bigger models and datasets to improve generalization on unseen data, where some training data samples may be more informative than others. We investigate this assumption in supervised image classification by biasing SGD (Stochastic Gradient Descent) to sample important samples more often during training of a classifier. In contrast to state-of-the-art, our approach does not require additional training iterations to estimate the sample importance, because it computes estimates once during training using the training prediction probabilities. In experiments, we see that our learning technique converges on par or faster in terms of training iterations and can achieve higher test accuracy compared to state-of-the-art, especially when datasets are not suitably balanced. Results suggest that sample importance has intrinsic balancing properties and that an importance weighted class distribution can converge faster than the usual balanced class distribution. Finally, in contrast to recent work, we find that sample importance is model dependent. Therefore, calculating sample importance during training, rather than in a pre-processing step, may be the only viable way to go.", "keywords": "Deep Learning;Sample Importance;Hard Example Mining", "primary_area": "", "supplementary_material": "", "author": "Alessio Quercia;Hanno Scharr;Ira Assent", "authorids": "~Alessio_Quercia1;~Hanno_Scharr1;~Ira_Assent1", "gender": "M;M;F", "homepage": "https://alessioquercia.github.io;;https://cs.au.dk/contact/people/show/person/ira@cs.au.dk", "dblp": "305/9890.html;64/2329;a/IraAssent", "google_scholar": "https://scholar.google.co.uk/citations?user=CPfJjQgAAAAJ;https://scholar.google.de/citations?user=RSaoKE0AAAAJ;https://scholar.google.com.tw/citations?user=w2n5LhUAAAAJ", "orcid": "0000-0002-7828-570X;0000-0002-8555-6416;0000-0002-1091-9948", "linkedin": "https://linkedin.com/in/alessioquercia;;ira-assent-954b2431/", "or_profile": "~Alessio_Quercia1;~Hanno_Scharr1;~Ira_Assent1", "aff": "Forschungszentrum J\u00fclich;Forschungszentrum Juelich GmbH;Aarhus University", "aff_domain": "fz-juelich.de;fz-juelich.de;au.dk", "position": "PhD student;Principal Researcher;Full Professor", "bibtex": "@misc{\nquercia2023sample,\ntitle={Sample Importance in {SGD} Training},\nauthor={Alessio Quercia and Hanno Scharr and Ira Assent},\nyear={2023},\nurl={https://openreview.net/forum?id=D4aZrqLDFtE}\n}", "github": "", "project": "", "reviewers": "VNC6;cDss;LLpx;auZ4", "site": "https://openreview.net/forum?id=D4aZrqLDFtE", "pdf_size": 6684602, "recommendation": "3;3;5;5", "confidence": "4;4;4;4", "correctness": "2;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;2;1", "wc_summary_paper": "112;64;71;30", "wc_strength_and_weaknesses": "455;301;251;185", "wc_clarity_quality_novelty_and_reproducibility": "13;4;56;14", "wc_summary_review": "107;30;42;46", "wc_review": "687;399;420;275", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 69.25, 29.14939965076468 ], "wc_strength_and_weaknesses_avg": [ 298.0, 99.54396013822235 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 21.75, 20.154093876927337 ], "wc_summary_review_avg": [ 56.25, 29.88624265443885 ], "wc_review_avg": [ 445.25, 150.17052806726093 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yCrZPqkJlDAJ:scholar.google.com/&scioq=Sample+Importance+in+SGD+Training&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Forschungszentrum J\u00fclich;Forschungszentrum Juelich;Aarhus University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.fz-juelich.de;https://www.fz-juelich.de;https://au.dk", "aff_unique_abbr": "FZJ;FZJ;AU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Germany;Denmark" }, { "id": "D6gktu1C7C_", "title": "Voting from Nearest Tasks: Meta-Vote Pruning of Pretrained Models for Downstream Tasks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "As a few large-scale pre-trained models become the major choices of various applications, new challenges arise for model pruning, e.g., can we avoid pruning the same model from scratch for every downstream task? How to reuse the pruning results of previous tasks to accelerate the pruning for a new task? To address these challenges, we create a small model for a new task from the pruned models of similar tasks. We show that a few fine-tuning steps on this model suffice to produce a promising pruned-model for the new task. We study this ``meta-pruning'' from nearest tasks on two major classes of pre-trained models, convolutional neural network (CNN) and vision transformer (ViT), under a limited budget of pruning iterations. Our study begins by investigating the overlap of pruned models for similar tasks and how the overlap changes over different layers and blocks. Inspired by these discoveries, we develop a simple but effective ``Meta-Vote Pruning (MVP)'' method that significantly reduces the pruning iterations for a new task by initializing a sub-network from the pruned models of its nearest tasks. In experiments, we demonstrate MVP's advantages in accuracy, efficiency, and generalization through extensive empirical studies and comparisons with popular pruning methods over several datasets.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Haiyan Zhao;Tianyi Zhou;Guodong Long;Jing Jiang;Chengqi Zhang", "authorids": "~Haiyan_Zhao2;~Tianyi_Zhou1;~Guodong_Long2;~Jing_Jiang6;~Chengqi_Zhang1", "gender": "M;M;M;F;M", "homepage": "http://haiyan.tech/;https://tianyizhou.github.io/;https://www.uts.edu.au/staff/guodong.long;https://www.uts.edu.au/staff/jing.jiang;https://research.polyu.edu.hk/en/persons/chengqi-zhang", "dblp": ";88/8205-1;34/10089;68/1974-2;71/964", "google_scholar": ";OKvgizMAAAAJ;https://scholar.google.com.au/citations?user=Pl8m7hMAAAAJ;https://scholar.google.com.au/citations?hl=en;https://scholar.google.com.au/citations?user=B6lBmqEAAAAJ", "orcid": ";0000-0001-5348-0632;0000-0003-3740-9515;;0000-0001-5715-7154", "linkedin": ";tianyizhou;;;chengqi-zhang-55aa8910/", "or_profile": "~Haiyan_Zhao2;~Tianyi_Zhou1;~Guodong_Long2;~Jing_Jiang6;~Chengqi_Zhang1", "aff": ";University of Maryland, College Park;University of Technology Sydney;University of Technology Sydney;University of Technology Sydney", "aff_domain": ";umd.edu;uts.edu.au;uts.edu.au;uts.edu.au", "position": ";Assistant Professor;Associate Professor;Associate Professor;Full Professor", "bibtex": "@misc{\nzhao2023voting,\ntitle={Voting from Nearest Tasks: Meta-Vote Pruning of Pretrained Models for Downstream Tasks},\nauthor={Haiyan Zhao and Tianyi Zhou and Guodong Long and Jing Jiang and Chengqi Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=D6gktu1C7C_}\n}", "github": "", "project": "", "reviewers": "S1rf;shX5;wTgH;b8PR", "site": "https://openreview.net/forum?id=D6gktu1C7C_", "pdf_size": 816461, "recommendation": "3;3;5;6", "confidence": "4;4;3;4", "correctness": "3;3;2;3", "technical_novelty": "3;1;2;3", "empirical_novelty": "2;1;2;3", "wc_summary_paper": "86;126;49;65", "wc_strength_and_weaknesses": "763;410;192;120", "wc_clarity_quality_novelty_and_reproducibility": "76;144;24;31", "wc_summary_review": "38;64;22;29", "wc_review": "963;744;287;245", "wc_reply_reviewers": "0;371;0;0", "wc_reply_authors": "1893;1207;607;473", "reply_reviewers": "0;2;0;0", "reply_authors": "5;3;3;2", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 81.5, 28.848743473503312 ], "wc_strength_and_weaknesses_avg": [ 371.25, 250.11334930387062 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.75, 47.80886424084973 ], "wc_summary_review_avg": [ 38.25, 15.911866640969563 ], "wc_review_avg": [ 559.75, 304.14583261981414 ], "wc_reply_reviewers_avg": [ 92.75, 160.64771240201338 ], "wc_reply_authors_avg": [ 1045.0, 562.2223759332245 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15845493828957152147&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Maryland;University of Technology Sydney", "aff_unique_dep": ";", "aff_unique_url": "https://www/umd.edu;https://www.uts.edu.au", "aff_unique_abbr": "UMD;UTS", "aff_campus_unique_index": "0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;Australia" }, { "id": "D6hDzJMbRt4", "title": "Dynamics-inspired Neuromorphic Representation Learning", "track": "main", "status": "Withdraw", "tldr": "We build a dynamics-inspired neural mechanism that outperform the weight-based one on classification tasks.", "abstract": "This paper investigates the dynamics-inspired neuromorphic architecture for neural representation and learning following Hamilton's principle. The proposed approach converts weight-based neural structure to its dynamics-based form that consists of finite sub-models, whose mutual relations measured by computing path integrals amongst their dynamic states are equivalent to the typical neural weights. The feedback signals interpreted as stress forces amongst sub-models push them to move based on the entropy-reduction process derived from the Euler-Lagrange equations. We first train a dynamics-based neural model from scratch and observe that this model outperforms its corresponding feedforward neural networks on MNIST dataset. Then we convert several pre-trained neural structures (e.g., DenseNet, ResNet, Transformers, etc.) into dynamics-based forms, followed by fine-tuning via entropy-reduction to obtain the stabilized dynamic states. We observe consistent improvements of these transformed models on the ImageNet dataset in terms of computational complexity, the number of trainable units, testing accuracy, and robustness. Moreover, we demonstrate the correlation between the performance of a neural system and its structural entropy.", "keywords": "dynamics-based;neuromorphic representation;neural network;Hamilton's principle", "primary_area": "", "supplementary_material": "/attachment/83a8b095008897a95833db501396eb687d39de81.zip", "author": "Zhengqi Pei;Shuhui Wang", "authorids": "~Zhengqi_Pei1;~Shuhui_Wang1", "gender": "M;M", "homepage": ";https://vipl.ict.ac.cn/people/shwang/", "dblp": "223/2296;37/2537", "google_scholar": "Qs5zacQAAAAJ;h-JxBSYAAAAJ", "orcid": ";0000-0002-5931-0527", "linkedin": ";", "or_profile": "~Zhengqi_Pei1;~Shuhui_Wang1", "aff": "University of Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;ict.ac.cn", "position": "MS student;Full Professor", "bibtex": "@misc{\npei2023dynamicsinspired,\ntitle={Dynamics-inspired Neuromorphic Representation Learning},\nauthor={Zhengqi Pei and Shuhui Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=D6hDzJMbRt4}\n}", "github": "", "project": "", "reviewers": "eQSZ;r6pt;KJEX", "site": "https://openreview.net/forum?id=D6hDzJMbRt4", "pdf_size": 3468605, "recommendation": "5;5;8", "confidence": "3;2;3", "correctness": "4;2;3", "technical_novelty": "2;3;4", "empirical_novelty": "2;3;4", "wc_summary_paper": "122;94;150", "wc_strength_and_weaknesses": "847;290;311", "wc_clarity_quality_novelty_and_reproducibility": "84;23;79", "wc_summary_review": "92;26;117", "wc_review": "1145;433;657", "wc_reply_reviewers": "652;181;0", "wc_reply_authors": "2080;844;268", "reply_reviewers": "2;1;0", "reply_authors": "5;2;1", "recommendation_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 122.0, 22.861904265976328 ], "wc_strength_and_weaknesses_avg": [ 482.6666666666667, 257.7651816845884 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.0, 27.65260686927485 ], "wc_summary_review_avg": [ 78.33333333333333, 38.38691906829143 ], "wc_review_avg": [ 745.0, 297.25858552221274 ], "wc_reply_reviewers_avg": [ 277.6666666666667, 274.8142807222523 ], "wc_reply_authors_avg": [ 1064.0, 755.925922296623 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.6666666666666665, 1.699673171197595 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:x4BlgV3tEgYJ:scholar.google.com/&scioq=Dynamics-inspired+Neuromorphic+Representation+Learning&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Computing Technology", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ict.ac.cn", "aff_unique_abbr": "UCAS;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "D6lZTMvBo3", "title": "I Speak, You Verify: Toward Trustworthy Neural Program Synthesis", "track": "main", "status": "Withdraw", "tldr": "Large language models over source code can be made more trustworthy when they jointly generate programs and specifications", "abstract": "We develop an approach for improving the trustworthiness and overall accuracy of programs synthesizers based on large language models for source code. Given a natural language description of a programming problem, our method samples both candidate programs as well as candidate predicates specifying what the program should compute. Our method learns to analyze the agreement between programs and predicates to judge both which program is most likely to be correct, and also judge whether the language model is able to solve the programming problem in the first place. This latter capacity allows favoring high precision over broad recall: fostering trust by only proposing a program when the system is certain that it is correct.", "keywords": "program synthesis;large language models", "primary_area": "", "supplementary_material": "/attachment/56ce67eefb1857aaadc5829cc13ed64e734aa0da.zip", "author": "Darren Key;Wen-Ding Li;Kevin Ellis", "authorids": "dyk34@cornell.edu;~Wen-Ding_Li1;~Kevin_Ellis1", "gender": ";;M", "homepage": ";https://www.cs.cornell.edu/~wdli/;https://www.cs.cornell.edu/~ellisk/", "dblp": ";132/0674;", "google_scholar": ";2G2mr9QAAAAJ;L7XI6asAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "dyk34@cornell.edu;~Wen-Ding_Li1;~Kevin_Ellis1", "aff": ";Cornell University;Cornell University", "aff_domain": ";cornell.edu;cornell.edu", "position": ";PhD student;Assistant Professor", "bibtex": "@misc{\nkey2023i,\ntitle={I Speak, You Verify: Toward Trustworthy Neural Program Synthesis},\nauthor={Darren Key and Wen-Ding Li and Kevin Ellis},\nyear={2023},\nurl={https://openreview.net/forum?id=D6lZTMvBo3}\n}", "github": "", "project": "", "reviewers": "ZSTh;3qf4;myCC", "site": "https://openreview.net/forum?id=D6lZTMvBo3", "pdf_size": 1253932, "recommendation": "1;5;5", "confidence": "5;4;3", "correctness": "2;3;4", "technical_novelty": "2;2;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "72;111;58", "wc_strength_and_weaknesses": "398;179;72", "wc_clarity_quality_novelty_and_reproducibility": "30;97;11", "wc_summary_review": "2;51;353", "wc_review": "502;438;494", "wc_reply_reviewers": "145;100;0", "wc_reply_authors": "887;1288;747", "reply_reviewers": "1;2;0", "reply_authors": "2;3;1", "recommendation_avg": [ 3.6666666666666665, 1.8856180831641267 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 80.33333333333333, 22.425184255405547 ], "wc_strength_and_weaknesses_avg": [ 216.33333333333334, 135.68181733583745 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.0, 36.88721549082645 ], "wc_summary_review_avg": [ 135.33333333333334, 155.20810402667627 ], "wc_review_avg": [ 478.0, 28.472208672083497 ], "wc_reply_reviewers_avg": [ 81.66666666666667, 60.59886320899281 ], "wc_reply_authors_avg": [ 974.0, 229.26985555599467 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.8660254037844385, "corr_recommendation_correctness": 0.8660254037844385, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11546390801620258698&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "D7shOsFXMv", "title": "Online Placebos for Class-incremental Learning", "track": "main", "status": "Reject", "tldr": "We design an online learning algorithm to quickly evaluate and select unlabeled data to improve the KD loss in class-incremental learning. ", "abstract": "Not forgetting old class knowledge is a key challenge for class-incremental learning (CIL) when the model continuously adapts to new coming classes. A common technique to address this is knowledge distillation (KD) which penalizes prediction inconsistencies between old and new models. Such prediction is made with almost new class data, as old class data is extremely scarce due to the strict memory limitation in CIL. In this paper, we take a deep dive into KD losses and find that \u201cusing new class data for KD\u201d not only hinders the model adaption (for learning new classes) but also results in low efficiency for preserving old class knowledge. We address this by \u201cusing the placebos of old classes for KD\u201d, where the placebos are chosen from a free image stream, such as Google Images, in an automatical and economical fashion. To this end, we train an online placebo selection policy to quickly evaluate the quality of streaming images (good or bad placebos) and use only good ones for one-time feed-forward computation of KD. We formulate the policy training process as an online Markov Decision Process (MDP), and introduce an online learning algorithm to solve this MDP problem without causing much computation costs. In experiments, we show that our method 1) is surprisingly effective even when there is no class overlap between placebos and original old class data, 2) does not require any additional supervision or memory budget, and 3) significantly outperforms a number of top-performing CIL methods, in particular when using lower memory budgets for old class exemplars, e.g., five exemplars per class. The code is available in the supplementary. ", "keywords": "incremental learning;continual learning;class-incremental learning", "primary_area": "", "supplementary_material": "/attachment/5a88ff57ea2de698c89f7fd9d6427488d5de22e0.zip", "author": "Yaoyao Liu;Yingying Li;Bernt Schiele;Qianru Sun", "authorids": "~Yaoyao_Liu1;~Yingying_Li3;~Bernt_Schiele1;~Qianru_Sun2", "gender": ";F;M;F", "homepage": "https://yaoyaoliu.web.illinois.edu/;https://yingying.li;http://www.mpi-inf.mpg.de/~schiele;https://qianrusun.com/", "dblp": "12/10033-1;63/5869;s/BerntSchiele;127/6132.html", "google_scholar": "Qi2PSmEAAAAJ;;https://scholar.google.de/citations?user=z76PBfYAAAAJ;https://scholar.google.de/citations?user=fNfrGMIAAAAJ", "orcid": "0000-0002-5316-3028;;0000-0001-9683-5237;0000-0003-2689-317X", "linkedin": ";;;", "or_profile": "~Yaoyao_Liu1;~Yingying_Li3;~Bernt_Schiele1;~Qianru_Sun2", "aff": "Max Planck Institute for Informatics;California Institute of Technology;Amazon;Singapore Management University", "aff_domain": "mpi-inf.mpg.de;caltech.edu;amazon.com;smu.edu.sg", "position": "PhD student;Postdoc;Principal Researcher;Assistant Professor", "bibtex": "@misc{\nliu2023online,\ntitle={Online Placebos for Class-incremental Learning},\nauthor={Yaoyao Liu and Yingying Li and Bernt Schiele and Qianru Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=D7shOsFXMv}\n}", "github": "", "project": "", "reviewers": "5n8q;rXB7;4WyN;DLTo", "site": "https://openreview.net/forum?id=D7shOsFXMv", "pdf_size": 1130541, "recommendation": "3;5;5;8", "confidence": "4;4;5;5", "correctness": "1;3;4;4", "technical_novelty": "2;3;2;4", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "37;49;99;111", "wc_strength_and_weaknesses": "109;181;281;419", "wc_clarity_quality_novelty_and_reproducibility": "27;33;10;90", "wc_summary_review": "25;35;32;46", "wc_review": "198;298;422;666", "wc_reply_reviewers": "103;0;0;0", "wc_reply_authors": "765;878;637;285", "reply_reviewers": "1;0;0;0", "reply_authors": "3;3;3;3", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.0, 1.224744871391589 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 74.0, 31.575306807693888 ], "wc_strength_and_weaknesses_avg": [ 247.5, 116.33894446830777 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.0, 30.074906483645133 ], "wc_summary_review_avg": [ 34.5, 7.566372975210778 ], "wc_review_avg": [ 396.0, 174.91712323268982 ], "wc_reply_reviewers_avg": [ 25.75, 44.60030829489859 ], "wc_reply_authors_avg": [ 641.25, 222.65261619841795 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.7001400420140049, "corr_recommendation_correctness": 0.8003267306650411, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15635530112722389896&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Max Planck Institute for Informatics;California Institute of Technology;Amazon;Singapore Management University", "aff_unique_dep": ";;Amazon.com, Inc.;", "aff_unique_url": "https://mpi-inf.mpg.de;https://www.caltech.edu;https://www.amazon.com;https://www.smu.edu.sg", "aff_unique_abbr": "MPII;Caltech;Amazon;SMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pasadena", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "Germany;United States;Singapore" }, { "title": "HomoDistil: Homotopic Task-Agnostic Distillation of Pre-trained Transformers", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10956", "id": "D7srTrGhAs", "poster": "/media/PosterPDFs/ICLR%202023/10956.png?t=1682901240.893343", "openreview": "https://openreview.net/forum?id=D7srTrGhAs", "slides": "https://iclr.cc/virtual/2023/poster/10956", "video": "https://iclr.cc/virtual/2023/poster/10956", "author_site": "Chen Liang, Haoming Jiang, Zheng Li, Xianfeng Tang, Bing Yin, Tuo Zhao", "tldr": "We propose a novel task-agnostic distillation method for Transformer-based language models equipped with iterative pruning.", "abstract": "Knowledge distillation has been shown to be a powerful model compression approach to facilitate the deployment of pre-trained language models in practice. This paper focuses on task-agnostic distillation. It produces a compact pre-trained model that can be easily fine-tuned on various tasks with small computational costs and memory footprints. Despite the practical benefits, task-agnostic distillation is challenging. Since the teacher model has a significantly larger capacity and stronger representation power than the student model, it is very difficult for the student to produce predictions that match the teacher's over a massive amount of open-domain training data. Such a large prediction discrepancy often diminishes the benefits of knowledge distillation. To address this challenge, we propose Homotopic Distillation (HomoDistil), a novel task-agnostic distillation approach equipped with iterative pruning. Specifically, we initialize the student model from the teacher model, and iteratively prune the student's neurons until the target width is reached. Such an approach maintains a small discrepancy between the teacher's and student's predictions throughout the distillation process, which ensures the effectiveness of knowledge transfer. Extensive experiments demonstrate that HomoDistil achieves significant improvements on existing baselines. Our codes will be released.", "keywords": "Knowledge Distillation;Structured Pruning;Pre-trained Transformer Language Models;Model Compression", "primary_area": "", "supplementary_material": "/attachment/2e6806373cacece68f5d3cdf45c7140787b3132c.zip", "author": "Chen Liang;Haoming Jiang;Zheng Li;Xianfeng Tang;Bing Yin;Tuo Zhao", "authorids": "~Chen_Liang3;~Haoming_Jiang1;~Zheng_Li9;~Xianfeng_Tang1;~Bing_Yin1;~Tuo_Zhao1", "gender": "F;M;M;M;M;M", "homepage": "https://cliang1453.github.io/;https://hmjianggatech.github.io;https://xta.ng/;;http://www2.isye.gatech.edu/~tzhao80;https://hsqmlzno1.github.io/", "dblp": "35/3221-6;230/3684;33/7694;;;10/1143-18", "google_scholar": "https://scholar.google.com/citations?hl=en;XaFhuG8AAAAJ;u1PEv-QAAAAJ;qSOxydEAAAAJ;EJXN6tYAAAAJ;https://scholar.google.com.hk/citations?user=P6fwn4AAAAAJ", "orcid": ";;;0000-0002-5890-0031;;", "linkedin": ";;xianfengtang/;bingyin;;", "or_profile": "~Chen_Liang3;~Haoming_Jiang1;~Xianfeng_Tang1;~Bing_Yin1;~Tuo_Zhao1;~zheng_li4", "aff": "Georgia Institute of Technology;Amazon;Amazon;Amazon;Georgia Institute of Technology;Amazon", "aff_domain": "gatech.edu;amazon.com;amazon.com;amazon.com;gatech.edu;amazon.com", "position": "PhD student;Principal Researcher;Researcher;Senior Science Manager;Associate Professor;Researcher", "bibtex": "@inproceedings{\nliang2023homodistil,\ntitle={HomoDistil: Homotopic Task-Agnostic Distillation of Pre-trained Transformers},\nauthor={Chen Liang and Haoming Jiang and Zheng Li and Xianfeng Tang and Bing Yin and Tuo Zhao},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=D7srTrGhAs}\n}", "github": "", "project": "", "reviewers": "Y2q6;na3M;1n6p", "pdf_size": 478942, "recommendation": "6;6;8", "confidence": "2;2;3", "correctness": "3;3;4", "technical_novelty": "2;3;2", "empirical_novelty": "2;3;3", "wc_summary_paper": "16;86;49", "wc_strength_and_weaknesses": "174;88;65", "wc_clarity_quality_novelty_and_reproducibility": "23;21;64", "wc_summary_review": "18;60;194", "wc_review": "231;255;372", "wc_reply_reviewers": "42;12;19", "wc_reply_authors": "1889;899;1131", "reply_reviewers": "1;1;1", "reply_authors": "4;3;3", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 50.333333333333336, 28.592928418676454 ], "wc_strength_and_weaknesses_avg": [ 109.0, 46.91126375047539 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.0, 19.8158185969358 ], "wc_summary_review_avg": [ 90.66666666666667, 75.05257416563998 ], "wc_review_avg": [ 286.0, 61.59545437773797 ], "wc_reply_reviewers_avg": [ 24.333333333333332, 12.814921857827391 ], "wc_reply_authors_avg": [ 1306.3333333333333, 422.75393420864685 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.9999999999999998, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7368096082398355221&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=D7srTrGhAs", "email": "gatech.edu;amazon.com;amazon.com;amazon.com;gatech.edu;amazon.com", "author_num": 6, "aff_unique_index": "0;1;1;1;0;1", "aff_unique_norm": "Georgia Institute of Technology;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.gatech.edu;https://www.amazon.com", "aff_unique_abbr": "Georgia Tech;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "D8ulVmpjzYX", "title": "PBES: PCA Based Exemplar Sampling Algorithm for Continual Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Traditional machine learning is both data and computation intensive. The most powerful models require huge quantities of data to train and the training is highly time-consuming. In the streaming or incremental model of machine learning, the data is received and processed in a streaming manner, i.e., the entire data stream is not stored, and the models are updated incrementally. While this is closer to the learning process of humans, a common problem associated with this is \u201ccatastrophic forgetting\u201d (CF), i.e., because the entire data is not stored, but just a sketch of it, as more and more data arrives, the older data has invariably a smaller representation in the stored sketch, and this causes models to perform badly on tasks that are closer to older data. One of the approaches to solve this problem stores an \u201cexemplar set\u201d of data items from the stream \u2013 but this raises the central question: how to choose which items to store? Current approaches\nto solve this are based on herding, which is a way to select a random looking sample by a deterministic algorithm. We propose a novel selection approach based on Principal Component analysis and median sampling. This approach avoids the pitfalls due to outliers and is both simple to implement and use across various incremental machine learning models. It also has independent usage as a sampling algorithm. We achieve better performance compared to state-of-the-art methods.\n", "keywords": "Continual Learning;Incremental Learning;Machine Learning;PCA;principal directions;principal component analysis;Class-incremental learning", "primary_area": "", "supplementary_material": "", "author": "Sahil Nokhwal;Nirman Kumar", "authorids": "~Sahil_Nokhwal1;~Nirman_Kumar1", "gender": "M;M", "homepage": "https://www.memphis.edu/cs/;http://www.memphis.edu/cs/people/faculty_pages/nirman-kumar.php", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Sahil_Nokhwal1;~Nirman_Kumar1", "aff": "University of Memphis;University of Memphis", "aff_domain": "memphis.edu;memphis.edu", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nnokhwal2023pbes,\ntitle={{PBES}: {PCA} Based Exemplar Sampling Algorithm for Continual Learning},\nauthor={Sahil Nokhwal and Nirman Kumar},\nyear={2023},\nurl={https://openreview.net/forum?id=D8ulVmpjzYX}\n}", "github": "", "project": "", "reviewers": "JcYz;Gq7Z;RPyF", "site": "https://openreview.net/forum?id=D8ulVmpjzYX", "pdf_size": 1635992, "recommendation": "3;3;5", "confidence": "4;4;3", "correctness": "2;2;3", "technical_novelty": "2;1;2", "empirical_novelty": "1;1;2", "wc_summary_paper": "51;37;49", "wc_strength_and_weaknesses": "781;227;100", "wc_clarity_quality_novelty_and_reproducibility": "91;31;13", "wc_summary_review": "47;32;22", "wc_review": "970;327;184", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "776;662;313", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_summary_paper_avg": [ 45.666666666666664, 6.182412330330469 ], "wc_strength_and_weaknesses_avg": [ 369.3333333333333, 295.67361885851244 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.0, 33.34666400106613 ], "wc_summary_review_avg": [ 33.666666666666664, 10.274023338281626 ], "wc_review_avg": [ 493.6666666666667, 341.84044361205844 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 583.6666666666666, 196.96756642204375 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11606407044316216268&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "University of Memphis", "aff_unique_dep": "", "aff_unique_url": "https://www.memphis.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "D9WJEsALpI1", "title": "SegNeRF: 3D Part Segmentation with Neural Radiance Fields", "track": "main", "status": "Reject", "tldr": "We perform 3D part segmentation on novel objects using only images by leveraging volume rendering.", "abstract": "Recent advances in Neural Radiance Fields (NeRF) boast impressive performances for generative tasks such as novel view synthesis and 3D reconstruction. Methods based on neural radiance fields are able to represent the 3D world implicitly by relying exclusively on posed images. Yet, they have seldom been explored in the realm of discriminative tasks such as 3D part segmentation. In this work, we attempt to bridge that gap by proposing SegNeRF: a neural field representation that integrates a semantic field along with the usual radiance field. SegNeRF inherits from previous works the ability to perform novel view synthesis and 3D reconstruction, and enables 3D part segmentation from a few images. Our extensive experiments on PartNet show that SegNeRF is capable of simultaneously predicting geometry, appearance, and semantic information from posed images, even for unseen objects. The predicted semantic fields allow SegNeRF to achieve an average mIoU of 30.30% for 2D novel view segmentation, and 37.46% for 3D part segmentation, boasting competitive performance against point-based methods by using only a few posed images. Additionally, SegNeRF is able to generate an explicit 3D model from a single image of an object taken in the wild, with its corresponding part segmentation.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/d9daae918632f2127b7af6ac5644c238bcad681c.zip", "author": "Jesus Zarzar;Sara Rojas Martinez;Silvio Giancola;Bernard Ghanem", "authorids": "~Jesus_Zarzar1;~Sara_Rojas_Martinez1;~Silvio_Giancola1;~Bernard_Ghanem1", "gender": "M;F;M;M", "homepage": ";;https://silviogiancola.com/;https://ivul.kaust.edu.sa", "dblp": "237/9581;255/4944;173/3571;37/2516", "google_scholar": ";;2kq5Zl0AAAAJ;rVsGTeEAAAAJ", "orcid": ";;0000-0002-3937-9834;0000-0002-5534-587X", "linkedin": ";sara-rojas-martinez-398323170/;silvio-giancola/;bernardghanem/", "or_profile": "~Jesus_Zarzar1;~Sara_Rojas_Martinez1;~Silvio_Giancola1;~Bernard_Ghanem1", "aff": "KAUST;King Abdullah University of Science and Technology;KAUST;King Abdullah University of Science and Technology", "aff_domain": "kaust.edu.sa;kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "position": "PhD student;PhD student;Research Scientist;Full Professor", "bibtex": "@misc{\nzarzar2023segnerf,\ntitle={SegNe{RF}: 3D Part Segmentation with Neural Radiance Fields},\nauthor={Jesus Zarzar and Sara Rojas Martinez and Silvio Giancola and Bernard Ghanem},\nyear={2023},\nurl={https://openreview.net/forum?id=D9WJEsALpI1}\n}", "github": "", "project": "", "reviewers": "iUC9;zVwm;miT3;ynF6", "site": "https://openreview.net/forum?id=D9WJEsALpI1", "pdf_size": 22472594, "recommendation": "3;5;5;5", "confidence": "5;4;4;4", "correctness": "2;3;3;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "57;53;50;64", "wc_strength_and_weaknesses": "196;395;184;253", "wc_clarity_quality_novelty_and_reproducibility": "16;34;230;33", "wc_summary_review": "28;57;41;25", "wc_review": "297;539;505;375", "wc_reply_reviewers": "24;0;11;30", "wc_reply_authors": "267;396;679;316", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 56.0, 5.244044240850758 ], "wc_strength_and_weaknesses_avg": [ 257.0, 83.83018549424783 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 78.25, 87.90442252810719 ], "wc_summary_review_avg": [ 37.75, 12.636751956100111 ], "wc_review_avg": [ 429.0, 97.74456506629922 ], "wc_reply_reviewers_avg": [ 16.25, 11.627015954233485 ], "wc_reply_authors_avg": [ 414.5, 159.5 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14834790699707984181&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaust.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Saudi Arabia" }, { "id": "DAxQXzdq8SF", "title": "SOM-CPC: Unsupervised Contrastive Learning with Self-Organizing Maps for Structured Representations of High-Rate Time Series", "track": "main", "status": "Reject", "tldr": "This work proposes SOM-CPC, an unsupervised model for interpretable 2D representation learning of high-rate time series.", "abstract": "Continuous monitoring with an ever-increasing number of sensors has become ubiquitous across many application domains. Acquired data are typically high-dimensional and difficult to interpret, but they are also hypothesized to lie on a low-dimensional manifold. Dimensionality reduction techniques have, therefore, been sought for. Popular linear methods like Principle Component Analysis (PCA) have been extended to non-linear techniques such as Self-Organizing Maps (SOMs) or deep learning (DL) models. DL models have the ability to act on raw data, preventing heuristic feature selection, but the resulting latent space is often unstructured and still multi-dimensional. PCA and SOMs, on the other hand, need to be preceded with a feature-extraction step, but can then map high-dimensional features to 2D space. In this work we propose SOM-CPC, a model that jointly optimizes Contrastive Predictive Coding and a SOM to find an organized 2D manifold. We address a largely unexplored and challenging set of scenarios comprising high-rate time series, and show on both synthetic and real-life data (medical sleep data and audio recordings) that SOM-CPC outperforms both DL-based feature extraction, followed by PCA, K-means or a SOM, and strong deep-SOM baselines that jointly optimize a DL model and a SOM. SOM-CPC has great potential to expose latent patterns in high-rate data streams and may therefore contribute to a better understanding of many different processes and systems. ", "keywords": "Contrastive Predictive Coding;Self-Organizing Maps;Time series;Dimensionality Reduction", "primary_area": "", "supplementary_material": "", "author": "Iris A.M. Huijben;Arthur Andreas Nijdam;Sebastiaan Overeem;Merel M Van Gilst;Ruud Van Sloun", "authorids": "~Iris_A.M._Huijben1;~Arthur_Andreas_Nijdam1;~Sebastiaan_Overeem1;~Merel_M_Van_Gilst1;~Ruud_Van_Sloun1", "gender": "Non-Binary;;F;F;F", "homepage": ";;;https://www.tue.nl/en/research/researchers/ruud-van-sloun;", "dblp": ";;;162/9715.html;247/0968", "google_scholar": "IRXz-6AAAAAJ;RSI1dewAAAAJ;;gQQJgocAAAAJ;https://scholar.google.nl/citations?user=1ReBr6sAAAAJ", "orcid": ";;0000-0003-2138-5686;;0000-0002-2629-3898", "linkedin": ";;;;", "or_profile": "~Arthur_Andreas_Nijdam1;~Sebastiaan_Overeem1;~Merel_M_Van_Gilst1;~Ruud_Van_Sloun1;~Iris_Anne_Marie_Huijben1", "aff": ";Eindhoven University of Technology;Eindhoven University of Technology;Eindhoven University of Technology;Meta Facebook", "aff_domain": ";tue.nl;tue.nl;tue.nl;meta.com", "position": ";Full Professor;Assistant Professor;Assistant Professor;Intern", "bibtex": "@misc{\nhuijben2023somcpc,\ntitle={{SOM}-{CPC}: Unsupervised Contrastive Learning with Self-Organizing Maps for Structured Representations of High-Rate Time Series},\nauthor={Iris A.M. Huijben and Arthur Andreas Nijdam and Sebastiaan Overeem and Merel M Van Gilst and Ruud Van Sloun},\nyear={2023},\nurl={https://openreview.net/forum?id=DAxQXzdq8SF}\n}", "github": "", "project": "", "reviewers": "hhoQ;eMcU;FiNp", "site": "https://openreview.net/forum?id=DAxQXzdq8SF", "pdf_size": 4109605, "recommendation": "3;6;6", "confidence": "4;3;4", "correctness": "3;3;4", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;4", "wc_summary_paper": "38;59;75", "wc_strength_and_weaknesses": "450;217;151", "wc_clarity_quality_novelty_and_reproducibility": "73;28;59", "wc_summary_review": "55;34;62", "wc_review": "616;338;347", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1680;947;883", "reply_reviewers": "0;0;0", "reply_authors": "3;2;2", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_paper_avg": [ 57.333333333333336, 15.15109090315135 ], "wc_strength_and_weaknesses_avg": [ 272.6666666666667, 128.25582074726884 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 53.333333333333336, 18.80307303489394 ], "wc_summary_review_avg": [ 50.333333333333336, 11.897712198383164 ], "wc_review_avg": [ 433.6666666666667, 128.98148015208315 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1170.0, 361.56972587132714 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.5, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10183622434431526296&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Eindhoven University of Technology;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.tue.nl;https://meta.com", "aff_unique_abbr": "TU/e;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Netherlands;United States" }, { "id": "DB3BH3arU2Y", "title": "Revisiting Uncertainty Estimation for Node Classification: New Benchmark and Insights", "track": "main", "status": "Reject", "tldr": "We analyze uncertainty estimation for node classification problems: we propose a benchmark covering distribution shifts of different types and perform a thorough analysis of various uncertainty estimation techniques.", "abstract": "Uncertainty estimation is an important task that can be essential for high-risk applications of machine learning. This problem is especially challenging for node-level prediction in graph-structured data, as the samples (nodes) are interdependent. Recently, several studies addressed node-level uncertainty estimation. However, there is no established benchmark for evaluating these methods in a unified setup covering diverse distributional shift. In this paper, we address this problem and propose such a benchmark together with a technique for the controllable generation of data splits with various types of distributional shift. Importantly, besides the standard feature-based distributional shift, we also consider shifts specifically designed for graph-structured data. In summary, our benchmark consists of several graph datasets equipped with various distributional shift on which we evaluate the robustness of models and uncertainty estimation performance. This allows us to compare existing solutions in a unified setup. Moreover, we decompose the current state-of-the-art Dirichlet-based framework and perform an ablation study on its components. In our experiments, we demonstrate that when faced with complex yet realistic distributional shift, most models fail to maintain high classification performance and consistency of uncertainty estimates with prediction errors. However, ensembling techniques help to partially overcome significant drops in performance and achieve better results than distinct models. Among single-pass models, Natural Posterior Network with GNN encoder achieves the best performance.", "keywords": "uncertainty estimation;distribution shift;graph;node classification;benchmark", "primary_area": "", "supplementary_material": "", "author": "Gleb Bazhenov;Denis Kuznedelev;Andrey Malinin;Artem Babenko;Liudmila Prokhorenkova", "authorids": "~Gleb_Bazhenov1;~Denis_Kuznedelev1;~Andrey_Malinin1;~Artem_Babenko1;~Liudmila_Prokhorenkova1", "gender": "M;M;M;M;F", "homepage": ";https://github.com/Godofnothing;;;", "dblp": "322/8649.html;322/8616;174/5705;117/4834;45/11468", "google_scholar": "DLt-B68AAAAJ;;;q885d1wAAAAJ;https://scholar.google.ru/citations?user=6JyZlSEAAAAJ", "orcid": ";0009-0005-2420-9620;;0000-0002-1830-8252;", "linkedin": "bazhenov-gleb/;;;;", "or_profile": "~Gleb_Bazhenov1;~Denis_Kuznedelev1;~Andrey_Malinin1;~Artem_Babenko1;~Liudmila_Prokhorenkova1", "aff": "Skolkovo Institute of Science and Technology;;Yandex;Yandex;Yandex", "aff_domain": "skoltech.ru;;yandex.ru;yandex-team.ru;yandex-team.ru", "position": "MS student;;Principal Researcher;Researcher;Researcher", "bibtex": "@misc{\nbazhenov2023revisiting,\ntitle={Revisiting Uncertainty Estimation for Node Classification: New Benchmark and Insights},\nauthor={Gleb Bazhenov and Denis Kuznedelev and Andrey Malinin and Artem Babenko and Liudmila Prokhorenkova},\nyear={2023},\nurl={https://openreview.net/forum?id=DB3BH3arU2Y}\n}", "github": "", "project": "", "reviewers": "zVhc;fvq7;6NVp", "site": "https://openreview.net/forum?id=DB3BH3arU2Y", "pdf_size": 5083896, "recommendation": "5;5;6", "confidence": "4;3;4", "correctness": "3;3;2", "technical_novelty": "3;2;2", "empirical_novelty": "3;1;2", "wc_summary_paper": "130;40;57", "wc_strength_and_weaknesses": "228;129;136", "wc_clarity_quality_novelty_and_reproducibility": "79;63;23", "wc_summary_review": "120;29;45", "wc_review": "557;261;261", "wc_reply_reviewers": "70;0;0", "wc_reply_authors": "839;732;608", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 75.66666666666667, 39.04128868547018 ], "wc_strength_and_weaknesses_avg": [ 164.33333333333334, 45.10974272691975 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.0, 23.55136231020759 ], "wc_summary_review_avg": [ 64.66666666666667, 39.66806720216597 ], "wc_review_avg": [ 359.6666666666667, 139.53573815414538 ], "wc_reply_reviewers_avg": [ 23.333333333333332, 32.99831645537222 ], "wc_reply_authors_avg": [ 726.3333333333334, 94.39044207733936 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": -0.9999999999999997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11597953431898802833&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Skolkovo Institute of Science and Technology;Yandex", "aff_unique_dep": ";", "aff_unique_url": "https://www.skoltech.ru;https://yandex.com", "aff_unique_abbr": "Skoltech;Yandex", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Russian Federation" }, { "id": "DBDpZIdVIY", "title": "CLR-GAM: Contrastive Point Cloud Learning with Guided Augmentation and Feature Mapping", "track": "main", "status": "Withdraw", "tldr": "efficient augmentation selection strategy, and effective feature association in contrastive learning", "abstract": "Point cloud data plays an essential role in robotics and self-driving applications. Yet, it is time-consuming and nontrivial to annotate point cloud data while they enable learning discriminative 3D representations that empower downstream tasks, such as classification and segmentation. Recently, contrastive learning based frameworks show promising results for learning 3D representations in a self-supervised manner. However, existing contrastive learning methods cannot encode and associate structural features precisely and search the higher dimensional augmentation space efficiently. In this paper, we present CLR-GAM, a novel contrastive learning based framework with Guided Augmentation (GA) for efficient dynamic exploration strategy and Guided Feature Mapping (GFM) for similar structural feature association between augmented point clouds. We empirically demonstrate that the proposed approach achieves state-of-the-art performance on both simulated and real-world 3D point cloud datasets for three different downstream tasks, i.e., 3D point cloud classification, few-shot learning, and object part segmentation. The code and pretrained models are made available in the supplementary material.", "keywords": "contrastive learning;point cloud representation learning;few shot learning;self supervised learning", "primary_area": "", "supplementary_material": "/attachment/c26c000ad6b157aa8f5b38729f2c3b75a2860058.zip", "author": "Srikanth Malla;Yi-Ting Chen", "authorids": "~Srikanth_Malla1;~Yi-Ting_Chen2", "gender": "M;M", "homepage": "http://www.srikanthmalla.com;https://sites.google.com/site/yitingchen0524/", "dblp": "223/4108;12/5268-1", "google_scholar": "TjIKwLcAAAAJ;8tRH7RMAAAAJ", "orcid": ";", "linkedin": "srikanthmalla/;", "or_profile": "~Srikanth_Malla1;~Yi-Ting_Chen2", "aff": "Kinetic Automation;National Yang Ming Chiao Tung University", "aff_domain": "kinetic.auto;nycu.edu.tw", "position": "Research Engineer;Assistant Professor", "bibtex": "@misc{\nmalla2023clrgam,\ntitle={{CLR}-{GAM}: Contrastive Point Cloud Learning with Guided Augmentation and Feature Mapping},\nauthor={Srikanth Malla and Yi-Ting Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=DBDpZIdVIY}\n}", "github": "", "project": "", "reviewers": "4neM;Waes;jUuG;YPRn", "site": "https://openreview.net/forum?id=DBDpZIdVIY", "pdf_size": 2900948, "recommendation": "3;3;5;5", "confidence": "4;4;4;4", "correctness": "3;3;3;3", "technical_novelty": "3;2;2;2", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "108;88;69;158", "wc_strength_and_weaknesses": "445;323;241;247", "wc_clarity_quality_novelty_and_reproducibility": "57;36;124;80", "wc_summary_review": "38;51;54;53", "wc_review": "648;498;488;538", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 105.75, 33.16907445196504 ], "wc_strength_and_weaknesses_avg": [ 314.0, 82.24962005991274 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 74.25, 32.66783586342995 ], "wc_summary_review_avg": [ 49.0, 6.442049363362563 ], "wc_review_avg": [ 543.0, 63.4428877022476 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14286231588660632371&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Kinetic Automation;National Yang Ming Chiao Tung University", "aff_unique_dep": ";", "aff_unique_url": ";https://www.nycu.edu.tw", "aff_unique_abbr": ";NYCU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Taiwan", "aff_country_unique_index": "1", "aff_country_unique": ";China" }, { "title": "Understanding new tasks through the lens of training data via exponential tilting", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11773", "id": "DBMttEEoLbw", "poster": "/media/PosterPDFs/ICLR%202023/11773.png?t=1683149350.5841265", "openreview": "https://openreview.net/forum?id=DBMttEEoLbw", "slides": "https://iclr.cc/virtual/2023/poster/11773", "video": "https://iclr.cc/virtual/2023/poster/11773", "author_site": "Subha Maity, Mikhail Yurochkin, Moulinath Banerjee, Yuekai Sun", "tldr": "", "abstract": "Deploying machine learning models on new tasks is a major challenge due to differences in distributions of the train (source) data and the new (target) data. However, the training data likely captures some of the properties of the new task. We consider the problem of reweighing the training samples to gain insights into the distribution of the target task. Specifically, we formulate a distribution shift model based on the exponential tilt assumption and learn train data importance weights minimizing the KL divergence between labeled train and unlabeled target datasets. The learned train data weights can then be used for downstream tasks such as target performance evaluation, fine-tuning, and model selection. We demonstrate the efficacy of our method on Waterbirds and Breeds benchmarks.", "keywords": "Out-of-distribution generalization;model selection;subpopulation shift;concept drift", "primary_area": "", "supplementary_material": "/attachment/e685fcb5bd590f1972c46f74f70397f7904c3466.zip", "author": "Subha Maity;Mikhail Yurochkin;Moulinath Banerjee;Yuekai Sun", "authorids": "~Subha_Maity1;~Mikhail_Yurochkin1;~Moulinath_Banerjee1;~Yuekai_Sun1", "gender": "M;M;M;", "homepage": "https://lsa.umich.edu/stats/people/phd-students/smaity.html;https://moonfolk.github.io/;https://lsa.umich.edu/stats/people/faculty/moulib.html;https://yuekai.github.io/", "dblp": "278/2922;191/6719;;", "google_scholar": "eD9vCGMAAAAJ;QjBF9sUAAAAJ;;6T1XtW8AAAAJ", "orcid": ";;;", "linkedin": ";mikhail-yurochkin-a45659114/;;", "or_profile": "~Subha_Maity1;~Mikhail_Yurochkin1;~Moulinath_Banerjee1;~Yuekai_Sun1", "aff": ";IBM Research;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor", "aff_domain": ";ibm.com;umich.edu;umich.edu", "position": ";Researcher;Full Professor;Assistant \u2192 Associate Professor of Statistics", "bibtex": "@inproceedings{\nmaity2023understanding,\ntitle={Understanding new tasks through the lens of training data via exponential tilting},\nauthor={Subha Maity and Mikhail Yurochkin and Moulinath Banerjee and Yuekai Sun},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=DBMttEEoLbw}\n}", "github": "", "project": "", "reviewers": "AxBN;3m7n;UqFX", "pdf_size": 730262, "recommendation": "6;6;6", "confidence": "3;3;4", "correctness": "4;3;4", "technical_novelty": "3;4;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "63;100;74", "wc_strength_and_weaknesses": "77;313;253", "wc_clarity_quality_novelty_and_reproducibility": "28;44;5", "wc_summary_review": "21;59;5", "wc_review": "189;516;337", "wc_reply_reviewers": "0;63;40", "wc_reply_authors": "144;675;869", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 79.0, 15.513435037626794 ], "wc_strength_and_weaknesses_avg": [ 214.33333333333334, 100.15099711047425 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.666666666666668, 16.006942938057293 ], "wc_summary_review_avg": [ 28.333333333333332, 22.647050335284035 ], "wc_review_avg": [ 347.3333333333333, 133.69700403856808 ], "wc_reply_reviewers_avg": [ 34.333333333333336, 26.02989734047285 ], "wc_reply_authors_avg": [ 562.6666666666666, 306.4531865645315 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16748833416963429503&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=DBMttEEoLbw", "email": ";ibm.com;umich.edu;umich.edu", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "IBM;University of Michigan", "aff_unique_dep": "IBM Research;", "aff_unique_url": "https://www.ibm.com/research;https://www.umich.edu", "aff_unique_abbr": "IBM;UM", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "DCgjv41MD2M", "title": "Distortion-Aware Network Pruning and Feature Reuse for Real-time Video Segmentation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Real-time video segmentation is a crucial task for many real-world applications such as autonomous driving and robot control. Since state-of-the-art semantic segmentation models are often too heavy for real-time applications despite their impressive performance, researchers have proposed lightweight architectures with speed-accuracy trade-offs, achieving real-time speed at the expense of reduced accuracy. In this paper, we propose a novel framework to speed up any architecture with skip-connections for real-time vision tasks by exploiting the temporal locality in videos. Specifically, at the arrival of each frame, we transform the features from the previous frame to reuse them at specific spatial bins. We then perform partial computation of the backbone network on the regions of the current frame that captures temporal differences between the current and previous frame. This is done by dynamically dropping out residual blocks using a gating mechanism which decides which blocks to drop based on inter-frame distortion. We validate our Spatial-Temporal Mask Generator (STMG) on video semantic segmentation benchmarks with multiple backbone networks, and show that our method largely speeds up inference with minimal loss of accuracy.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Hyunsu Rhee;Dongchan Min;Sunil Hwang;Bruno Andreis;Sung Ju Hwang", "authorids": "~Hyunsu_Rhee1;~Dongchan_Min1;~Sunil_Hwang1;~Bruno_Andreis1;~Sung_Ju_Hwang1", "gender": "M;M;;M;", "homepage": ";https://kevinmin95.github.io/;https://github.com/sunilhoho;https://andreisbruno.github.io/;", "dblp": "322/8595;294/6881;225/3993;225/0404;", "google_scholar": ";;;WzQ_v4IAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Hyunsu_Rhee1;~Dongchan_Min1;~Sunil_Hwang1;~Bruno_Andreis1;~Sung_Ju_Hwang1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;", "position": "MS student;PhD student;MS student;PhD student;", "bibtex": "@misc{\nrhee2023distortionaware,\ntitle={Distortion-Aware Network Pruning and Feature Reuse for Real-time Video Segmentation},\nauthor={Hyunsu Rhee and Dongchan Min and Sunil Hwang and Bruno Andreis and Sung Ju Hwang},\nyear={2023},\nurl={https://openreview.net/forum?id=DCgjv41MD2M}\n}", "github": "", "project": "", "reviewers": "T6Sh;hrTg;hBDa", "site": "https://openreview.net/forum?id=DCgjv41MD2M", "pdf_size": 10234978, "recommendation": "5;5;5", "confidence": "1;4;3", "correctness": "2;4;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;3;2", "wc_summary_paper": "130;51;89", "wc_strength_and_weaknesses": "117;228;179", "wc_clarity_quality_novelty_and_reproducibility": "1;21;9", "wc_summary_review": "14;56;4", "wc_review": "262;356;281", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 2.6666666666666665, 1.247219128924647 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 90.0, 32.25936556516056 ], "wc_strength_and_weaknesses_avg": [ 174.66666666666666, 45.419036635411906 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 10.333333333333334, 8.219218670625303 ], "wc_summary_review_avg": [ 24.666666666666668, 22.5289936649544 ], "wc_review_avg": [ 299.6666666666667, 40.581878824037815 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10673909186225540480&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "DClS-1HQ_0P", "title": "Jointist: Simultaneous Improvement of Multi-instrument Transcription and Music Source Separation via Joint Training", "track": "main", "status": "Reject", "tldr": "Joint training of music transcription and source separation improves the performance of both", "abstract": "In this paper, we introduce Jointist, an instrument-aware multi-instrument framework that is capable of transcribing, recognizing, and separating multiple musical instruments from an audio clip.\nJointist consists of an instrument recognition module that conditions the other two modules: a transcription module that outputs instrument-specific piano rolls, and a source separation module that utilizes instrument information and transcription results. The joint training of the transcription and source separation modules serves to improve the performance of both tasks. The instrument module is optional and can be directly controlled by human users. This makes Jointist a flexible user-controllable framework.\n\nOur challenging problem formulation makes the model highly useful in the real world given that modern popular music typically consists of multiple instruments. Its novelty, however, necessitates a new perspective on how to evaluate such a model. In our experiments, we assess the proposed model from various aspects, providing a new evaluation perspective for multi-instrument transcription. Subjective listening test shows that Jointist achieves state-of-the-art performance on popular music, outperforming existing multi-instrument transcription models such as MT3. %We also argue that transcription models can be used as a preprocessing module for other music analysis tasks. We conducted experiments on several downstream tasks, and found that the proposed method improved transcription by more than 1 percentage points (ppt.); source separation by 5 SDR, downbeat detection by 1.8 ppt., chord recognition by 1.4 ppt., and key estimation by 1.4 ppt., when utilizing transcription results obtained from Jointist. ", "keywords": "multi-task learning;automatic music transcription;music source separation;instrument recognition", "primary_area": "", "supplementary_material": "/attachment/92900befa7a67c8ed6f1fe760c0b190a13037d85.zip", "author": "Kin Wai Cheuk;Keunwoo Choi;Qiuqiang Kong;Bochen Li;Minz Won;Ju-Chiang Wang;Yun-Ning Hung;Dorien Herremans", "authorids": "~Kin_Wai_Cheuk1;~Keunwoo_Choi1;~Qiuqiang_Kong1;bochen1106@gmail.com;minz.won@upf.edu;ju-chiang.wang@bytedance.com;yunning.hung@tiktok.com;~Dorien_Herremans1", "gender": "M;;M;;;;;F", "homepage": ";;https://qiuqiangkong.github.io/;;;;;http://dorienherremans.com/", "dblp": ";;;;;;;", "google_scholar": "Ib0JOs4AAAAJ;;;;;;;https://scholar.google.com.tw/citations?user=Hp5W5f0AAAAJ", "orcid": ";;;;;;;0000-0001-8607-1640", "linkedin": ";;;;;;;", "or_profile": "~Kin_Wai_Cheuk1;~Keunwoo_Choi1;~Qiuqiang_Kong1;bochen1106@gmail.com;minz.won@upf.edu;ju-chiang.wang@bytedance.com;yunning.hung@tiktok.com;~Dorien_Herremans1", "aff": "Singapore University of Technology and Design;;ByteDance;;;;;Singapore University of Technology and Design", "aff_domain": "sutd.edu.sg;;bytedance.com;;;;;sutd.edu.sg", "position": "PhD student;;Researcher;;;;;Associate Professor", "bibtex": "@misc{\ncheuk2023jointist,\ntitle={Jointist: Simultaneous Improvement of Multi-instrument Transcription and Music Source Separation via Joint Training},\nauthor={Kin Wai Cheuk and Keunwoo Choi and Qiuqiang Kong and Bochen Li and Minz Won and Ju-Chiang Wang and Yun-Ning Hung and Dorien Herremans},\nyear={2023},\nurl={https://openreview.net/forum?id=DClS-1HQ_0P}\n}", "github": "", "project": "", "reviewers": "ZUz4;YdWq;jSBB;ZLUi", "site": "https://openreview.net/forum?id=DClS-1HQ_0P", "pdf_size": 3260595, "recommendation": "3;3;6;6", "confidence": "5;5;3;3", "correctness": "1;2;3;3", "technical_novelty": "1;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "70;107;92;79", "wc_strength_and_weaknesses": "58;994;523;207", "wc_clarity_quality_novelty_and_reproducibility": "43;39;23;33", "wc_summary_review": "350;119;43;62", "wc_review": "521;1259;681;381", "wc_reply_reviewers": "35;347;0;0", "wc_reply_authors": "797;1850;731;317", "reply_reviewers": "1;2;0;0", "reply_authors": "1;5;1;1", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 4.0, 1.0 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 87.0, 13.946325680981353 ], "wc_strength_and_weaknesses_avg": [ 445.5, 358.4330481414904 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.5, 7.533259586659682 ], "wc_summary_review_avg": [ 143.5, 122.45917687131495 ], "wc_review_avg": [ 710.5, 333.9921406260932 ], "wc_reply_reviewers_avg": [ 95.5, 145.90493480345344 ], "wc_reply_authors_avg": [ 923.75, 565.5313320232576 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17427538592217632220&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Singapore University of Technology and Design;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "https://www.sutd.edu.sg;https://www.bytedance.com", "aff_unique_abbr": "SUTD;ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;China" }, { "id": "DD8ZJNdTPtO", "title": "Stochastic Optimization under Strongly Convexity and Lipschitz Hessian: Minimax Sample Complexity", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Optimization of convex functions under stochastic zeroth-order feedback has been a major and challenging question in online learning. In this work we consider the problem of optimizing second-order smooth and strongly convex functions where the algorithm is only accessible to noisy evaluations of the objective function it queries. We provide the first tight characterization for the rate of the minimax simple regret by developing matching upper and lower bounds. We propose an algorithm that features a combination of a bootstrapping stage and a mirror-descent stage. The main innovation of our approach is the usage of a gradient estimation scheme that exploits the local geometry of the objective function, and we provide sharp analysis for the corresponding estimation bounds. ", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/fd31299b575ab80846394be698c8965bc758c67d.zip", "author": "Qian Yu;Yining Wang;Baihe Huang;Qi Lei;Jason D. Lee", "authorids": "~Qian_Yu5;~Yining_Wang1;~Baihe_Huang1;~Qi_Lei1;~Jason_D._Lee1", "gender": ";M;;F;M", "homepage": "https://scholar.princeton.edu/qyu;https://yining-wang.com;;https://cecilialeiqi.github.io/;https://jasondlee88.github.io/", "dblp": "16/3790-1;04/7235;279/4131;;88/3262", "google_scholar": "SxUNhucAAAAJ;HpQGq54AAAAJ;chICXXMAAAAJ;kGOgaowAAAAJ;GR_DsT0AAAAJ", "orcid": "0000-0002-2034-5941;;;;", "linkedin": ";;;;", "or_profile": "~Qian_Yu5;~Yining_Wang1;~Baihe_Huang1;~Qi_Lei1;~Jason_D._Lee1", "aff": "University of California, Santa Barbara;University of Texas at Dallas;University of California, Berkeley;New York University;Princeton University", "aff_domain": "ucsb.edu;cs.utdallas.edu;berkeley.edu;nyu.edu;princeton.edu", "position": "Assistant Professor;Associate Professor;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nyu2023stochastic,\ntitle={Stochastic Optimization under Strongly Convexity and Lipschitz Hessian: Minimax Sample Complexity},\nauthor={Qian Yu and Yining Wang and Baihe Huang and Qi Lei and Jason D. Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=DD8ZJNdTPtO}\n}", "github": "", "project": "", "reviewers": "YHu2;PQ3G;e1GW;Sb1e", "site": "https://openreview.net/forum?id=DD8ZJNdTPtO", "pdf_size": 498175, "recommendation": "1;3;6;6", "confidence": "3;4;3;2", "correctness": "2;2;4;3", "technical_novelty": "4;2;4;3", "empirical_novelty": "4;0;0;0", "wc_summary_paper": "60;79;67;37", "wc_strength_and_weaknesses": "254;1298;212;168", "wc_clarity_quality_novelty_and_reproducibility": "50;41;95;19", "wc_summary_review": "182;38;23;41", "wc_review": "546;1456;397;265", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 2.1213203435596424 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 1.0, 1.7320508075688772 ], "wc_summary_paper_avg": [ 60.75, 15.303185942802891 ], "wc_strength_and_weaknesses_avg": [ 483.0, 471.522003728352 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.25, 27.66202270261522 ], "wc_summary_review_avg": [ 71.0, 64.4476531768225 ], "wc_review_avg": [ 666.0, 466.8142028687645 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5000000000000001, "corr_recommendation_correctness": 0.8528028654224419, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BjDzpuiQwdkJ:scholar.google.com/&scioq=Stochastic+Optimization+under+Strongly+Convexity+and+Lipschitz+Hessian:+Minimax+Sample+Complexity&hl=en&as_sdt=0,24", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "University of California, Santa Barbara;University of Texas at Dallas;University of California, Berkeley;New York University;Princeton University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ucsb.edu;https://www.utdallas.edu;https://www.berkeley.edu;https://www.nyu.edu;https://www.princeton.edu", "aff_unique_abbr": "UCSB;UT Dallas;UC Berkeley;NYU;Princeton", "aff_campus_unique_index": "0;1;2", "aff_campus_unique": "Santa Barbara;Dallas;Berkeley;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Dichotomy of Control: Separating What You Can Control from What You Cannot", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10943", "id": "DEGjDDV22pI", "poster": "/media/PosterPDFs/ICLR%202023/10943.png?t=1681275783.7015517", "openreview": "https://openreview.net/forum?id=DEGjDDV22pI", "slides": "https://iclr.cc/virtual/2023/poster/10943", "video": "https://iclr.cc/virtual/2023/poster/10943", "author_site": "Sherry Yang, Dale Schuurmans, Pieter Abbeel, Ofir Nachum", "tldr": "We propose dichotomy of control (DoC) for supervised learning in stochastic environments by separating things within a policy's control (actions) from those outside of a policy\u2019s control (env stochasticity) through a mutual information constraint.", "abstract": "Future- or return-conditioned supervised learning is an emerging paradigm for offline reinforcement learning (RL), in which the future outcome (i.e., return) associated with a sequence of actions in an offline dataset is used as input to a policy trained to imitate those same actions. While return-conditioning is at the heart of popular algorithms such as decision transformer (DT), these methods tend to perform poorly in highly stochastic environments, where an occasional high return associated with a sequence of actions may be due more to the randomness of the environment than to the actions themselves. Such situations can lead to a learned policy that is inconsistent with its conditioning inputs; i.e., using the policy \u2013 while conditioned on a specific desired return \u2013 to act in the environment can lead to a distribution of real returns that is wildly different than desired. In this work, we propose the dichotomy of control (DoC), a future-conditioned supervised learning framework that separates mechanisms within a policy\u2019s control (actions) from those outside of a policy\u2019s control (environment stochasticity). We achieve this by conditioning the policy on a latent variable representation of the future and designing a mutual information constraint that removes any future information from the latent variable that is only due to randomness of the environment. Theoretically, we show that DoC yields policies that are consistent with their conditioning inputs, ensuring that conditioning a learned policy on a desired high-return future outcome will correctly induce high-return behavior. Empirically, we show that DoC is able to achieve significantly better performance than DT on environments with highly stochastic rewards (e.g., Bandit) and transitions (e.g., FrozenLake).", "keywords": "Offline reinforcement learning;return-conditioned supervised learning;stochastic environments;decision transformer", "primary_area": "", "supplementary_material": "/attachment/dd819f96090bcc25a2f9ed64b43c745c2dcb27b1.zip", "author": "Sherry Yang;Dale Schuurmans;Pieter Abbeel;Ofir Nachum", "authorids": "~Sherry_Yang1;~Dale_Schuurmans1;~Pieter_Abbeel2;~Ofir_Nachum1", "gender": "F;;M;M", "homepage": "https://sherryy.github.io;;https://people.eecs.berkeley.edu/~pabbeel/;https://scholar.google.com/citations?user=C-ZlBWMAAAAJ&hl=en", "dblp": ";;;", "google_scholar": "7c1B_fIAAAAJ;;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;C-ZlBWMAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Sherry_Yang1;~Dale_Schuurmans1;~Pieter_Abbeel2;~Ofir_Nachum1", "aff": "University of California, Berkeley;;Covariant;OpenAI", "aff_domain": "berkeley.edu;;covariant.ai;openai.com", "position": "Student;;Founder;Researcher", "bibtex": "@inproceedings{\nyang2023dichotomy,\ntitle={Dichotomy of Control: Separating What You Can Control from What You Cannot},\nauthor={Sherry Yang and Dale Schuurmans and Pieter Abbeel and Ofir Nachum},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=DEGjDDV22pI}\n}", "github": "", "project": "", "reviewers": "aa3o;kJLT;eVkG;LLnY", "pdf_size": 1856452, "recommendation": "6;6;8;8", "confidence": "4;4;4;3", "correctness": "3;2;4;3", "technical_novelty": "3;3;4;4", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "41;101;137;111", "wc_strength_and_weaknesses": "220;344;290;111", "wc_clarity_quality_novelty_and_reproducibility": "2;19;43;94", "wc_summary_review": "50;89;44;43", "wc_review": "313;553;514;359", "wc_reply_reviewers": "205;218;0;0", "wc_reply_authors": "557;558;258;208", "reply_reviewers": "2;1;0;0", "reply_authors": "3;2;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 97.5, 35.16745654721137 ], "wc_strength_and_weaknesses_avg": [ 241.25, 87.1073332159813 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.5, 34.67347689517162 ], "wc_summary_review_avg": [ 56.5, 18.953891421024867 ], "wc_review_avg": [ 434.75, 101.02567742905761 ], "wc_reply_reviewers_avg": [ 105.75, 105.84983467157613 ], "wc_reply_authors_avg": [ 395.25, 163.21056185186055 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.7071067811865475, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17864827318965313304&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=DEGjDDV22pI", "email": "berkeley.edu;;covariant.ai;openai.com", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Berkeley;Covariant;OpenAI", "aff_unique_dep": ";;", "aff_unique_url": "https://www.berkeley.edu;;https://openai.com", "aff_unique_abbr": "UC Berkeley;;OpenAI", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States;" }, { "id": "DEhSlPNviW", "title": "Gradient Properties of Hard Thresholding Operator", "track": "main", "status": "Reject", "tldr": "", "abstract": "Sparse optimization receives increasing attention in many applications such as compressed sensing, variable selection in regression problems, and recently neural network compression in machine learning. For example, the problem of compressing a neural network is a bi-level, stochastic, and nonconvex problem that can be cast into a sparse optimization problem. Hence, developing efficient methods for sparse optimization plays a critical role in applications. The goal of this paper is to develop analytical techniques for general, large size sparse optimization problems using the hard thresholding operator. To this end, we study the iterative hard thresholding (IHT) algorithm, which has been extensively studied in the literature because it is scalable, fast, and easily implementable. In spite of extensive research on the IHT scheme, we develop several new techniques that not only recover many known results but also lead to new results. Specifically, we first establish a new and critical gradient descent property of the hard thresholding (HT) operator. Our gradient descent result can be related to the distance between points that are sparse. However, the distance between sparse points cannot provide any information about the gradient in the sparse setting. To the best of our knowledge, the other way around (the gradient to the distance) has not been shown so far in the literature. Also, our gradient descent property allows one to study the IHT when the stepsize is less than or equal to 1/L, where L>0 is the Lipschitz constant of the gradient of an objective function. Note that the existing techniques in the literature can only handle the case when the stepsize is strictly less than 1/L. By exploiting this we introduce and study HT-stable and HT-unstable stationary points and show no matter how close an initialization is to a HT-unstable stationary point (saddle point in sparse sense), the IHT sequence leaves it. Finally, we show that no matter what sparse initial point is selected, the IHT sequence converges if the function values at HT-stable stationary points are distinct, where the last condition is a new assumption that has not been found in the literature. We provide a video of 4000 independent runs where the IHT algorithm is initialized very close to a HT-unstable stationary point and show the sequences escape them.", "keywords": "Sparse optimization;Hard thresholding;Iterative hard thresholding;HT-stationary point;HT-stable point;HT-unstable point", "primary_area": "", "supplementary_material": "/attachment/a6c4bf13bdf75cd824f1c083dcc48bb44012460a.zip", "author": "Saeed Damadi;Jinglai Shen", "authorids": "~Saeed_Damadi1;~Jinglai_Shen1", "gender": "M;M", "homepage": ";https://jinglai-shen.github.io", "dblp": ";", "google_scholar": "yR44Le8AAAAJ;", "orcid": ";", "linkedin": "saeed-damadi-54627889/;", "or_profile": "~Saeed_Damadi1;~Jinglai_Shen1", "aff": ";University of Maryland, Baltimore County", "aff_domain": ";umbc.edu", "position": ";Full Professor", "bibtex": "@misc{\ndamadi2023gradient,\ntitle={Gradient Properties of Hard Thresholding Operator},\nauthor={Saeed Damadi and Jinglai Shen},\nyear={2023},\nurl={https://openreview.net/forum?id=DEhSlPNviW}\n}", "github": "", "project": "", "reviewers": "BYNJ;F4td;kzNB;raL8", "site": "https://openreview.net/forum?id=DEhSlPNviW", "pdf_size": 434166, "recommendation": "1;3;3;5", "confidence": "5;3;2;5", "correctness": "4;3;2;4", "technical_novelty": "1;2;2;2", "empirical_novelty": "1;0;2;2", "wc_summary_paper": "104;111;80;83", "wc_strength_and_weaknesses": "209;160;116;74", "wc_clarity_quality_novelty_and_reproducibility": "35;53;90;122", "wc_summary_review": "22;68;22;39", "wc_review": "370;392;308;318", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 1.299038105676658 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 94.5, 13.275918047351754 ], "wc_strength_and_weaknesses_avg": [ 139.75, 50.231339818882 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 75.0, 33.60803475361212 ], "wc_summary_review_avg": [ 37.75, 18.793283374652763 ], "wc_review_avg": [ 347.0, 35.05709628591621 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6325107905172199259&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "University of Maryland, Baltimore County", "aff_unique_dep": "", "aff_unique_url": "https://www.umbc.edu", "aff_unique_abbr": "UMBC", "aff_campus_unique_index": "0", "aff_campus_unique": "Baltimore County", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "DFaFg1u7UT", "title": "Examining the Value of Neural Filter Pruning -- Retrospect and Prospect", "track": "main", "status": "Withdraw", "tldr": "We study the \"value of filter pruning\" issue and show it might be inaccurate due to suboptimal LR setups, more insights provided to explain the reason behind.", "abstract": "Neural network filter pruning is one of the major methods in model compression and acceleration. Despite the remarkable progress in the past several years, there is an ongoing debate concerning the value of filter pruning -- Some works in 2019 argue that filter pruning is of no value since they found training the pruned network from scratch can achieve similar or even better performance than pruning a pretrained model. This argument fundamentally challenges the value of many filter pruning works. However, to date, the community has not formally responded to such acute questioning. In this paper, we present extensive empirical analyses to show the seeming contradiction is due to suboptimal learning rate schedule settings. We introduce more strict comparison setups and show filter pruning still has value within the same training epoch budgets. Apart from justifying the value of filter pruning empirically, we further examine the reason behind it and discover that the poor trainability caused by pruning is largely responsible for the sub-optimality of the learning rate schedule, thus calling for an urgent need to recover trainability after pruning. This paper does not target new SOTA performance of filter pruning. Instead, we focus on clarifying the existing mysteries in filter pruning towards a better understanding.", "keywords": "Neural network filter pruning;value of pruning;trainability;dynamical isometry", "primary_area": "", "supplementary_material": "", "author": "Huan Wang;Can Qin;Yue Bai;Yun Fu", "authorids": "~Huan_Wang3;~Can_Qin1;~Yue_Bai1;~Yun_Fu1", "gender": "M;M;M;M", "homepage": "https://huanwang.tech/;http://canqin.tech;https://yueb17.github.io/;http://www1.ece.neu.edu/~yunfu/", "dblp": "70/6155-14;214/2488;119/0848;00/5815-1", "google_scholar": "0-On0y4AAAAJ;QCik-YcAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=h-JEcQ8AAAAJ", "orcid": "0000-0001-6951-901X;;;0000-0002-5098-2853", "linkedin": "huanwang-zju/;;;furaymond/", "or_profile": "~Huan_Wang3;~Can_Qin1;~Yue_Bai1;~Yun_Fu1", "aff": "Northeastern University;Northeastern University;Northeastern University;Northeastern University", "aff_domain": "neu.edu;neu.edu;neu.edu;northeastern.edu", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@misc{\nwang2023examining,\ntitle={Examining the Value of Neural Filter Pruning -- Retrospect and Prospect},\nauthor={Huan Wang and Can Qin and Yue Bai and Yun Fu},\nyear={2023},\nurl={https://openreview.net/forum?id=DFaFg1u7UT}\n}", "github": "", "project": "", "reviewers": "dgUc;wJEo;i1a4;1E8S", "site": "https://openreview.net/forum?id=DFaFg1u7UT", "pdf_size": 318856, "recommendation": "3;5;5;6", "confidence": "5;5;3;3", "correctness": "2;2;3;4", "technical_novelty": "2;3;3;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "71;195;71;99", "wc_strength_and_weaknesses": "689;235;273;229", "wc_clarity_quality_novelty_and_reproducibility": "6;162;41;90", "wc_summary_review": "65;87;102;30", "wc_review": "831;679;487;448", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 1.0 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 109.0, 50.950956811427986 ], "wc_strength_and_weaknesses_avg": [ 356.5, 192.70897747640092 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 74.75, 58.546455913231846 ], "wc_summary_review_avg": [ 71.0, 27.08320512790168 ], "wc_review_avg": [ 611.25, 154.0850008923646 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.6882472016116854, "corr_recommendation_correctness": 0.7608859102526822, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2tEt2VIr6cEJ:scholar.google.com/&scioq=Examining+the+Value+of+Neural+Filter+Pruning+--+Retrospect+and+Prospect&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "DH4v0nW7yJ", "title": "Curved Representation Space of Vision Transformers", "track": "main", "status": "Withdraw", "tldr": "We analyze how the representation space of Transformers is shaped, based on which their characteristics in terms of adversarial robustness, model calibration, and difficulty of training are explained.", "abstract": "Neural networks with self-attention (a.k.a. Transformers) like ViT and Swin have emerged as a better alternative to traditional convolutional neural networks (CNNs) for computer vision tasks. However, our understanding of how the new architecture works is still limited. In this paper, we focus on the phenomenon that Transformers show higher robustness against corruptions than CNNs, while not being overconfident (in fact, we find Transformers are actually underconfident). This is contrary to the intuition that robustness increases with confidence. We resolve this contradiction by investigating how the output of the penultimate layer moves in the representation space as the input data moves within a small area. In particular, we show the following. (1) While CNNs exhibit fairly linear relationship between the input and output movements, Transformers show nonlinear relationship for some data. For those data, the output of Transformers moves in a curved trajectory as the input moves linearly. (2) When a data is located in a curved region, it is hard to move it out of the decision region since the output moves along a curved trajectory instead of a straight line to the decision boundary, resulting in high robustness of Transformers. (3) If a data is slightly modified to jump out of the curved region, the movements afterwards become linear and the output goes to the decision boundary directly. Thus, Transformers can be attacked easily after a small random jump and the perturbation in the final attacked data remains imperceptible. In other words, there does exist a decision boundary near the data, which is hard to find only because of the curved representation space. This also explains the underconfident prediction of Transformers. (4) The curved regions in the representation space start to form at an early training stage and grow throughout the training course. Some data are trapped in the regions, obstructing Transformers from reducing the training loss.", "keywords": "Vision transformers;representation space;robustness;calibration;decision boundary", "primary_area": "", "supplementary_material": "", "author": "Juyeop Kim;Junha Park;Songkuk Kim;Jong-Seok Lee", "authorids": "~Juyeop_Kim1;~Junha_Park1;~Songkuk_Kim1;~Jong-Seok_Lee1", "gender": "M;M;M;", "homepage": ";;;http://mcml.yonsei.ac.kr", "dblp": ";297/4014;78/2018;70/1152", "google_scholar": ";;https://scholar.google.com/citations?hl=en;YGwwt6cAAAAJ", "orcid": "0009-0008-5192-7662;;;", "linkedin": "juyeopkim/;%EC%A4%80%ED%95%98-%EB%B0%95-ba645b219;;", "or_profile": "~Juyeop_Kim1;~Junha_Park1;~Songkuk_Kim1;~Jong-Seok_Lee1", "aff": "Yonsei University;Yonsei University;Yonsei University;Yonsei University", "aff_domain": "yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nkim2023curved,\ntitle={Curved Representation Space of Vision Transformers},\nauthor={Juyeop Kim and Junha Park and Songkuk Kim and Jong-Seok Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=DH4v0nW7yJ}\n}", "github": "", "project": "", "reviewers": "eJLG;7XVP;2afJ;3oCH", "site": "https://openreview.net/forum?id=DH4v0nW7yJ", "pdf_size": 5982673, "recommendation": "3;5;6;6", "confidence": "4;4;4;3", "correctness": "2;3;4;4", "technical_novelty": "2;2;3;2", "empirical_novelty": "4;3;4;3", "wc_summary_paper": "57;125;200;100", "wc_strength_and_weaknesses": "290;99;204;153", "wc_clarity_quality_novelty_and_reproducibility": "318;264;40;24", "wc_summary_review": "38;46;50;38", "wc_review": "703;534;494;315", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 120.5, 51.944682114726625 ], "wc_strength_and_weaknesses_avg": [ 186.5, 70.35090617753264 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 161.5, 131.0219447268281 ], "wc_summary_review_avg": [ 43.0, 5.196152422706632 ], "wc_review_avg": [ 511.5, 137.9284234666662 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.4714045207910316, "corr_recommendation_correctness": 0.9847319278346618, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3764987755018267492&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Yonsei University", "aff_unique_dep": "", "aff_unique_url": "https://www.yonsei.ac.kr", "aff_unique_abbr": "Yonsei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Dynamic Prompt Learning via Policy Gradient for Semi-structured Mathematical Reasoning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12073", "id": "DHyHRBwJUTN", "poster": "/media/PosterPDFs/ICLR%202023/12073.png?t=1681072803.1327834", "openreview": "https://openreview.net/forum?id=DHyHRBwJUTN", "slides": "https://iclr.cc/virtual/2023/poster/12073", "video": "https://iclr.cc/virtual/2023/poster/12073", "author_site": "Pan Lu, Liang Qiu, Kai-Wei Chang, Yingnian Wu, Song-Chun Zhu, Tanmay Rajpurohit, Peter Clark, Ashwin Kalyan", "tldr": "We present a new tabular math word problem dataset, TabMWP, and we propose a novel approach to it that learns to select in-context examples in few-shot GPT-3 via policy gradient. ", "abstract": "Mathematical reasoning, a core ability of human intelligence, presents unique challenges for machines in abstract thinking and logical reasoning. Recent large pre-trained language models such as GPT-3 have achieved remarkable progress on mathematical reasoning tasks written in text form, such as math word problems (MWP). However, it is unknown if the models can handle more complex problems that involve math reasoning over heterogeneous information, such as tabular data. To fill the gap, we present Tabular Math Word Problems (TabMWP), a new dataset containing 38,431 open-domain grade-level problems that require mathematical reasoning on both textual and tabular data. Each question in TabMWP is aligned with a tabular context, which is presented as an image, semi-structured text, and a structured table. There are two types of questions: free-text and multi-choice, and each problem is annotated with gold solutions to reveal the multi-step reasoning process. We evaluate different pre-trained models on TabMWP, including the GPT-3 model in a few-shot setting. As earlier studies suggest, since few-shot GPT-3 relies on the selection of in-context examples, its performance is unstable and can degrade to near chance. The unstable issue is more severe when handling complex problems like TabMWP. To mitigate this, we further propose a novel approach, PromptPG, which utilizes policy gradient to learn to select in-context examples from a small amount of training data and then constructs the corresponding prompt for the test example. Experimental results show that our method outperforms the best baseline by 5.31% on the accuracy metric and reduces the prediction variance significantly compared to random selection, which verifies its effectiveness in selecting in-context examples. The data and code are available at https://promptpg.github.io.", "keywords": "Mathematical Reasoning;Tabular Math Word Problems;Prompt Learning;Policy Gradient", "primary_area": "", "supplementary_material": "/attachment/fea5b2fa1c4a53e1939549aab34aa41915496492.zip", "author": "Pan Lu;Liang Qiu;Kai-Wei Chang;Ying Nian Wu;Song-Chun Zhu;Tanmay Rajpurohit;Peter Clark;Ashwin Kalyan", "authorids": "~Pan_Lu2;~Liang_Qiu2;~Kai-Wei_Chang1;~Ying_Nian_Wu1;~Song-Chun_Zhu1;tanmay.rajpurohit@gmail.com;~Peter_Clark1;~Ashwin_Kalyan6", "gender": ";M;M;;M;;M;", "homepage": ";https://www.lqiu.info/;http://kwchang.net;;https://zhusongchun.net/;;https://allenai.org/team/peterc;", "dblp": ";01/1198-1;18/2428;;10/10313;;34/1184;", "google_scholar": ";mr1VxDwAAAAJ;fqDBtzYAAAAJ;;https://scholar.google.com.tw/citations?user=Al8dyb4AAAAJ;;o-5vyEsAAAAJ;", "orcid": ";0000-0001-9904-2953;0000-0001-5365-0072;;;;;", "linkedin": ";liangqiu/;kai-wei-chang-41239040;;;;peter-clark-a8b556/;", "or_profile": "~Pan_Lu2;~Liang_Qiu2;~Kai-Wei_Chang1;~Ying_Nian_Wu1;~Song-Chun_Zhu1;tanmay.rajpurohit@gmail.com;~Peter_Clark1;~Ashwin_Kalyan6", "aff": ";Amazon;Amazon;;Peking University;;Allen Institute for Artificial Intelligence;", "aff_domain": ";amazon.com;amazon.com;;pku.edu.cn;;allenai.org;", "position": ";Applied Scientist;Researcher;;Full Professor;;Senior Research Manager;", "bibtex": "@inproceedings{\nlu2023dynamic,\ntitle={Dynamic Prompt Learning via Policy Gradient for Semi-structured Mathematical Reasoning},\nauthor={Pan Lu and Liang Qiu and Kai-Wei Chang and Ying Nian Wu and Song-Chun Zhu and Tanmay Rajpurohit and Peter Clark and Ashwin Kalyan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=DHyHRBwJUTN}\n}", "github": "", "project": "", "reviewers": "MEHB;5Mut;MYrE;fX43;G2Gx", "pdf_size": 2585072, "recommendation": "5;6;6;6;8", "confidence": "3;3;3;3;4", "correctness": "3;2;4;4;4", "technical_novelty": "3;3;3;2;4", "empirical_novelty": "3;3;2;3;3", "wc_summary_paper": "87;119;47;72;96", "wc_strength_and_weaknesses": "273;211;128;128;245", "wc_clarity_quality_novelty_and_reproducibility": "55;36;10;14;46", "wc_summary_review": "62;88;30;52;48", "wc_review": "477;454;215;266;435", "wc_reply_reviewers": "0;11;0;37;0", "wc_reply_authors": "799;2092;755;905;932", "reply_reviewers": "0;1;0;1;0", "reply_authors": "2;6;3;4;3", "recommendation_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "correctness_avg": [ 3.4, 0.8 ], "technical_novelty_avg": [ 3.0, 0.6324555320336759 ], "empirical_novelty_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_paper_avg": [ 84.2, 24.044957891416654 ], "wc_strength_and_weaknesses_avg": [ 197.0, 59.66238345892661 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.2, 17.6 ], "wc_summary_review_avg": [ 56.0, 19.05780679931455 ], "wc_review_avg": [ 369.4, 107.30256287712797 ], "wc_reply_reviewers_avg": [ 9.6, 14.347125147568763 ], "wc_reply_authors_avg": [ 1096.6, 501.97234983612395 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 3.6, 1.3564659966250536 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.9185586535436918, "corr_recommendation_correctness": 0.4082482904638631, "gs_citation": 273, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6779916582872034145&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=DHyHRBwJUTN", "email": ";amazon.com;amazon.com;;pku.edu.cn;;allenai.org;", "author_num": 8, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Amazon;Peking University;Allen Institute for Artificial Intelligence", "aff_unique_dep": "Amazon.com, Inc.;;", "aff_unique_url": "https://www.amazon.com;http://www.pku.edu.cn;https://allenai.org", "aff_unique_abbr": "Amazon;Peking U;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "RLx2: Training a Sparse Deep Reinforcement Learning Model from Scratch", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11531", "id": "DJEEqoAq7to", "poster": "", "openreview": "https://openreview.net/forum?id=DJEEqoAq7to", "slides": "https://iclr.cc/virtual/2023/poster/11531", "video": "https://iclr.cc/virtual/2023/poster/11531", "author_site": "Yiqin Tan, Pihe Hu, Ling Pan, Jiatai Huang, Longbo Huang", "tldr": "We propose a new framework for training an efficient DRL agent from scratch with an ultra-sparse network with strong performanc without performance degradation.", "abstract": "Training deep reinforcement learning (DRL) models usually requires high computation costs. Therefore, compressing DRL models possesses immense potential for training acceleration and model deployment. However, existing methods that generate small models mainly adopt the knowledge distillation-based approach by iteratively training a dense network. As a result, the training process still demands massive computing resources. Indeed, sparse training from scratch in DRL has not been well explored and is particularly challenging due to non-stationarity in bootstrap training. In this work, we propose a novel sparse DRL training framework, \u201cthe Rigged Reinforcement Learning Lottery\u201d (RLx2), which builds upon gradient-based topology evolution and is capable of training a sparse DRL model based entirely on a sparse network. Specifically, RLx2 introduces a novel multi-step TD target mechanism with a dynamic-capacity replay buffer to achieve robust value learning and efficient topology exploration in sparse models. It also reaches state-of-the-art sparse training performance in several tasks, showing $7.5\\times$-$20\\times$ model compression with less than $3\\%$ performance degradation and up to $20\\times$ and $50\\times$ FLOPs reduction for training and inference, respectively.", "keywords": "Deep Reinforcement Learning;Lottery Ticket Hypothesis;Model Compression;Value Learning", "primary_area": "", "supplementary_material": "/attachment/eedd483a7a1d6b37739f9e7b0cdfed676dea2867.zip", "author": "Yiqin Tan;Pihe Hu;Ling Pan;Jiatai Huang;Longbo Huang", "authorids": "~Yiqin_Tan1;~Pihe_Hu1;~Ling_Pan1;~Jiatai_Huang1;~Longbo_Huang2", "gender": "M;M;F;M;M", "homepage": "https://tyq1024.github.io/;https://hupihe.top/;https://ling-pan.github.io/;;http://people.iiis.tsinghua.edu.cn/~huang/", "dblp": ";215/4280;199/9303/;;79/7077", "google_scholar": ";https://scholar.google.com/citations?hl=en;qZ_zlacAAAAJ;Y1w8ziAAAAAJ;", "orcid": "0000-0002-9060-8137;;;;", "linkedin": ";;;;", "or_profile": "~Yiqin_Tan1;~Pihe_Hu1;~Ling_Pan1;~Jiatai_Huang1;~Longbo_Huang2", "aff": "Tsinghua University;Tsinghua University;Montreal Institute for Learning Algorithms (MILA);Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;mila.umontreal.ca;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;PhD student;Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\ntan2023rlx,\ntitle={{RL}x2: Training a Sparse Deep Reinforcement Learning Model from Scratch},\nauthor={Yiqin Tan and Pihe Hu and Ling Pan and Jiatai Huang and Longbo Huang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=DJEEqoAq7to}\n}", "github": "", "project": "", "reviewers": "i62R;7aAy;Qrdc;B2aD", "pdf_size": 2100295, "recommendation": "6;8;8;8", "confidence": "3;3;4;4", "correctness": "3;3;4;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;0;3;3", "wc_summary_paper": "76;54;182;84", "wc_strength_and_weaknesses": "224;154;401;550", "wc_clarity_quality_novelty_and_reproducibility": "92;42;21;108", "wc_summary_review": "33;60;76;32", "wc_review": "425;310;680;774", "wc_reply_reviewers": "0;0;0;40", "wc_reply_authors": "1304;559;911;1128", "reply_reviewers": "0;0;0;1", "reply_authors": "3;2;2;3", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 99.0, 49.16299421312742 ], "wc_strength_and_weaknesses_avg": [ 332.25, 154.62272633736606 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.75, 35.499119707395565 ], "wc_summary_review_avg": [ 50.25, 18.632968094214082 ], "wc_review_avg": [ 547.25, 187.26368441318246 ], "wc_reply_reviewers_avg": [ 10.0, 17.320508075688775 ], "wc_reply_authors_avg": [ 975.5, 277.84932967347606 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8084674374946693622&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=DJEEqoAq7to", "email": "tsinghua.edu.cn;tsinghua.edu.cn;mila.umontreal.ca;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Tsinghua University;Montreal Institute for Learning Algorithms", "aff_unique_dep": ";Artificial Intelligence", "aff_unique_url": "https://www.tsinghua.edu.cn;https://mila.quebec", "aff_unique_abbr": "THU;MILA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;Canada" }, { "id": "DL8dTTvCpU", "title": "Deformable Graph Transformer", "track": "main", "status": "Reject", "tldr": "", "abstract": "Transformer-based models have recently shown success in representation learning on graph-structured data beyond natural language processing and computer vision. However, the success is limited to small-scale graphs due to the drawbacks of full dot-product attention on graphs such as the quadratic complexity with respect to the number of nodes and message aggregation from enormous irrelevant nodes. To address these issues, we propose Deformable Graph Transformer (DGT) that performs sparse attention via dynamically sampled relevant nodes for efficiently handling large-scale graphs with a linear complexity in the number of nodes. Specifically, our framework first constructs multiple node sequences with various criteria to consider both structural and semantic proximity. Then, combining with our learnable Katz Positional Encodings, the sparse attention is applied to the node sequences for learning node representations with a significantly reduced computational cost. Extensive experiments demonstrate that our DGT achieves state-of-the-art performance on 7 graph benchmark datasets with 2.5 \u223c 449 times less computational cost compared to transformer-based graph models with full attention.", "keywords": "Graph Transformer;Graph Neural Networks", "primary_area": "", "supplementary_material": "/attachment/6b420e29bf5b98aeba979ba808f4ac30875293f0.zip", "author": "Jinyoung Park;Seongjun Yun;Hyeonjin Park;Jaewoo Kang;Jisu Jeong;Kyung-Min Kim;Jung-Woo Ha;Hyunwoo J. Kim", "authorids": "~Jinyoung_Park1;~Seongjun_Yun1;~Hyeonjin_Park1;~Jaewoo_Kang1;~Jisu_Jeong1;~Kyung-Min_Kim1;~Jung-Woo_Ha1;~Hyunwoo_J._Kim3", "gender": "M;M;;M;M;M;M;M", "homepage": ";https://www.linkedin.com/in/seongjun-yun-01475919b;;https://dmis.korea.ac.kr;;;https://aidljwha.wordpress.com/;https://hyunwoojkim.com/publications", "dblp": "03/1524;72/6305;;k/JaewooKang;126/2963.html/;85/8572;66/867-1;150/4259", "google_scholar": "zThEyOYAAAAJ;8-MZ2RwAAAAJ;;https://scholar.google.co.kr/citations?user=RaBZafQAAAAJ;xEy9hKAAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.kr/citations?user=eGj3ay4AAAAJ;https://scholar.google.co.kr/citations?user=LfBoJt8AAAAJ", "orcid": ";;;0000-0001-6798-9106;0000-0003-3614-4199;0000-0003-2426-2198;0000-0002-7400-7681;0000-0002-2181-9264", "linkedin": "jinyoung-park-4861461a3/;;hyeonjin-park-711ab0212/;;;;jung-woo-ha-b2782862?trk=hp-identity-name;", "or_profile": "~Jinyoung_Park1;~Seongjun_Yun1;~Hyeonjin_Park1;~Jaewoo_Kang1;~Jisu_Jeong1;~Kyung-Min_Kim1;~Jung-Woo_Ha1;~Hyunwoo_Kim1", "aff": "Amazon;Amazon;;Korea University;;NAVER;NAVER AI Lab;Korea University", "aff_domain": "amazon.com;amazon.com;;korea.ac.kr;;navercorp.com;navercorp.com;korea.ac.kr", "position": "Intern;Researcher;;Full Professor;;Leader;Head (Executive Director);Assistant Professor", "bibtex": "@misc{\npark2023deformable,\ntitle={Deformable Graph Transformer},\nauthor={Jinyoung Park and Seongjun Yun and Hyeonjin Park and Jaewoo Kang and Jisu Jeong and Kyung-Min Kim and Jung-Woo Ha and Hyunwoo J. Kim},\nyear={2023},\nurl={https://openreview.net/forum?id=DL8dTTvCpU}\n}", "github": "", "project": "", "reviewers": "wsk3;HE3Q;PKKU;Xtmm;KagM", "site": "https://openreview.net/forum?id=DL8dTTvCpU", "pdf_size": 2348860, "recommendation": "5;5;5;5;6", "confidence": "4;3;4;3;4", "correctness": "2;4;3;3;3", "technical_novelty": "2;4;2;3;3", "empirical_novelty": "1;4;3;2;3", "wc_summary_paper": "152;48;36;61;202", "wc_strength_and_weaknesses": "656;144;220;134;173", "wc_clarity_quality_novelty_and_reproducibility": "47;6;34;72;232", "wc_summary_review": "70;14;20;545;105", "wc_review": "925;212;310;812;712", "wc_reply_reviewers": "0;0;0;448;118", "wc_reply_authors": "1254;554;685;2013;945", "reply_reviewers": "0;0;0;1;1", "reply_authors": "3;3;2;3;2", "recommendation_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.8, 0.7483314773547882 ], "empirical_novelty_avg": [ 2.6, 1.019803902718557 ], "wc_summary_paper_avg": [ 99.8, 65.46571621849104 ], "wc_strength_and_weaknesses_avg": [ 265.4, 197.57084805203425 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 78.2, 79.79072627818348 ], "wc_summary_review_avg": [ 150.8, 199.92138454902715 ], "wc_review_avg": [ 594.2, 281.98893595316815 ], "wc_reply_reviewers_avg": [ 113.2, 173.52625161629004 ], "wc_reply_authors_avg": [ 1090.2, 519.840514004055 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.6, 0.4898979485566356 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.408248290463863, "corr_recommendation_correctness": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15364427099365172497&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;2;2;1", "aff_unique_norm": "Amazon;Korea University;NAVER Corporation", "aff_unique_dep": "Amazon.com, Inc.;;", "aff_unique_url": "https://www.amazon.com;https://www.korea.ac.kr;https://www.naver.com", "aff_unique_abbr": "Amazon;KU;NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;1;1", "aff_country_unique": "United States;South Korea" }, { "id": "DP_u25iQWg", "title": "Enhanced Spatio-Temporal Image Encoding for Online Human Activity Recognition", "track": "main", "status": "Withdraw", "tldr": "In this work, we propose to improve the spatio-temporal image encoding of 3D skeletons data, by studying the concept of motion energy which focuses mainly on the joints that are the most solicited for an action.", "abstract": "Human Activity Recognition (HAR) based on sensors data can be seen as a time series classification problem where the challenge is to handle both spatial and temporal dependencies, while focusing on the most relevant data variations. It can be done using 3D skeleton data extracted from a RGB+D camera.\nIn this work, we propose to improve the spatio-temporal image encoding of 3D skeletons captured from a Kinect sensor, by studying the concept of motion energy which focuses mainly on skeleton joints that are the most solicited for an action. This encoding allows us to achieve a better discrimination for the detection of online activities by focusing on the most significant parts of the actions.\nThe article presents this new encoding and its application for HAR using a deep learning model trained on the encoded 3D skeleton data.\nFor this purpose, we proposed to investigate the knowledge transferability of several pre-trained CNNs provided by Keras.\nThe article shows a significant improvement of the accuracy of the learning according to the state of the art.", "keywords": "3D Skeleton Data;Spatio-temporal Image Encoding;Motion Energy;Online Action Recognition;Human Activity Recognition;Deep learning", "primary_area": "", "supplementary_material": "", "author": "Nassim MOKHTARI;Vincent Fer;Alexis N\u00e9d\u00e9lec;Marl\u00e8ne Gilles;Pierre De Loor", "authorids": "~Nassim_MOKHTARI1;~Vincent_Fer1;~Alexis_N\u00e9d\u00e9lec1;~Marl\u00e8ne_Gilles1;deloor@enib.fr", "gender": "M;M;M;;", "homepage": ";;https://www.enib.fr/~nedelec/;https://marlenegilles.wordpress.com/;", "dblp": ";;;;", "google_scholar": ";;;l6Qq3qEAAAAJ;", "orcid": "0000-0002-9402-3638;;0000-0003-3970-004X;0000-0003-1806-1672;", "linkedin": "nassim-mokhtari/;vincent-fer-468683138;;;", "or_profile": "~Nassim_MOKHTARI1;~Vincent_Fer1;~Alexis_N\u00e9d\u00e9lec1;~Marl\u00e8ne_Gilles1;deloor@enib.fr", "aff": "Ecole Nationale d'Ingenieurs de Brest;;Ecole Nationale d'Ingenieurs de Brest;Ecole Nationale d'Ingenieurs de Brest;", "aff_domain": "enib.fr;;enib.fr;enib.fr;", "position": "PhD student;;Assistant Professor;Postdoc;", "bibtex": "@misc{\nmokhtari2023enhanced,\ntitle={Enhanced Spatio-Temporal Image Encoding for Online Human Activity Recognition},\nauthor={Nassim MOKHTARI and Vincent Fer and Alexis N{\\'e}d{\\'e}lec and Marl{\\`e}ne Gilles and Pierre De Loor},\nyear={2023},\nurl={https://openreview.net/forum?id=DP_u25iQWg}\n}", "github": "", "project": "", "reviewers": "dQAc;Q2MC;DGBj;Shwk", "site": "https://openreview.net/forum?id=DP_u25iQWg", "pdf_size": 1128250, "recommendation": "3;3;3;3", "confidence": "5;3;4;5", "correctness": "2;3;3;2", "technical_novelty": "1;2;2;2", "empirical_novelty": "1;1;2;2", "wc_summary_paper": "68;58;81;63", "wc_strength_and_weaknesses": "483;92;110;34", "wc_clarity_quality_novelty_and_reproducibility": "64;52;59;216", "wc_summary_review": "36;21;25;68", "wc_review": "651;223;275;381", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 0.5 ], "wc_summary_paper_avg": [ 67.5, 8.558621384311845 ], "wc_strength_and_weaknesses_avg": [ 179.75, 177.319450427752 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 97.75, 68.40458683450986 ], "wc_summary_review_avg": [ 37.5, 18.445866745696716 ], "wc_review_avg": [ 382.5, 165.14463357917506 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14354256004369877796&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0", "aff_unique_norm": "Ecole Nationale d'Ingenieurs de Brest", "aff_unique_dep": "", "aff_unique_url": "https://www.enib.fr", "aff_unique_abbr": "ENIB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "id": "DQou0RiwkR0", "title": "Exploiting Spatial Separability for Deep Learning Multichannel Speech Enhancement with an Align-and-Filter Network", "track": "main", "status": "Reject", "tldr": "This paper presents an Align-and-Filter network to study spatial separability of sound sources for deep learning multichannel speech enhancement by incorporating relative transfer functions for signal alignment with sequential masking network design.", "abstract": "Multichannel speech enhancement (SE) systems separate the target speech from background noise by performing spatial and spectral filtering. The development of multichannel SE has a long history in the signal processing field, where one crucial step is to exploit spatial separability of sound sources by aligning the microphone signals in response to the target speech source prior to further filtering processes. However, most existing deep learning based multichannel SE works have yet to effectively incorporate or emphasize this spatial alignment aspect in the network design \u2013 we postulate that it is owing to the lack of suitable datasets with sufficient spatial diversity of the speech sources. In this paper, we highlight this important but often overlooked step in deep learning based multichannel SE, i.e., signal alignment, by introducing an Align-and-Filter network (AFnet) featuring a two-stage sequential masking design. The AFnet estimates two sets of masks, the alignment masks and filtering masks, and multiplies the estimated masks with the respective input signals to each stage sequentially, while leveraging the relative transfer functions (RTFs) for guiding the model to align signals with various speech source locations during training. For exploration purposes, we argue that the popular CHiME-3 multichannel dataset has its own limitation in representing spatially diverse speech data as the speakers were mostly located at the front side, and thereby adopt simulated and real-world measured room impulse responses to generate multichannel recordings where the target speech sources might come from arbitrary directions. Our findings suggest that for spatially diverse speaker scenarios, careful consideration of exploiting spatial characteristics is of great importance for deep learning based multichannel SE especially when the number of microphone gets increased. We show that utilizing the RTFs for signal alignment purposes in the two-stage, sequential masking framework consistently improves the capability of the network to separate the target speech from the noise signals, supporting that spatial separability is being effectively exploited by the proposed model. Our studies advocate for the advantages and significance of considering the signal alignment aspect, a wisdom coming from conventional signal processing, for developing future deep based multichannel SE algorithms to improve enhancement outcomes with positional diverse target speech scenarios.", "keywords": "Multichannel speech enhancement;microphone array beamforming;spatial filtering;signal alignment;relative transfer functions", "primary_area": "", "supplementary_material": "", "author": "Ching-Hua Lee;Chouchang Yang;Yilin Shen;Hongxia Jin", "authorids": "~Ching-Hua_Lee2;~Chouchang_Yang2;~Yilin_Shen1;~Hongxia_Jin1", "gender": ";;M;", "homepage": ";;;", "dblp": ";;30/383;", "google_scholar": ";;9PSFMzAAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Ching-Hua_Lee2;~Chouchang_Yang2;~Yilin_Shen1;~Hongxia_Jin1", "aff": ";;Samsung Research America;", "aff_domain": ";;gmail.com;", "position": ";;Principal Researcher;", "bibtex": "@misc{\nlee2023exploiting,\ntitle={Exploiting Spatial Separability for Deep Learning Multichannel Speech Enhancement with an Align-and-Filter Network},\nauthor={Ching-Hua Lee and Chouchang Yang and Yilin Shen and Hongxia Jin},\nyear={2023},\nurl={https://openreview.net/forum?id=DQou0RiwkR0}\n}", "github": "", "project": "", "reviewers": "aM96;6rDM;jGjP;4Sgp", "site": "https://openreview.net/forum?id=DQou0RiwkR0", "pdf_size": 3074053, "recommendation": "3;6;6;6", "confidence": "4;3;5;5", "correctness": "2;4;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "52;47;58;107", "wc_strength_and_weaknesses": "392;18;496;243", "wc_clarity_quality_novelty_and_reproducibility": "11;140;67;14", "wc_summary_review": "44;66;58;164", "wc_review": "499;271;679;528", "wc_reply_reviewers": "582;0;22;977", "wc_reply_authors": "3998;830;1786;4250", "reply_reviewers": "1;0;1;2", "reply_authors": "9;3;7;9", "recommendation_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 66.0, 23.98958107179031 ], "wc_strength_and_weaknesses_avg": [ 287.25, 179.58476410876287 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 58.0, 52.321123841140874 ], "wc_summary_review_avg": [ 83.0, 47.4236228055175 ], "wc_review_avg": [ 494.25, 145.88929878507196 ], "wc_reply_reviewers_avg": [ 395.25, 408.9152571132557 ], "wc_reply_authors_avg": [ 2716.0, 1450.739121965076 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 7.0, 2.449489742783178 ], "replies_avg": [ 39, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.17407765595569782, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CuC8kXqV7Y4J:scholar.google.com/&scioq=Exploiting+Spatial+Separability+for+Deep+Learning+Multichannel+Speech+Enhancement+with+an+Align-and-Filter+Network&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Samsung", "aff_unique_dep": "Samsung Research America", "aff_unique_url": "https://www.samsung.com/us/careers/research/", "aff_unique_abbr": "SRA", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "DSKD610FRN1", "title": "Decentralized Federated Learning via Overlapping Data Augmentation", "track": "main", "status": "Withdraw", "tldr": "This paper studies the scenario of selective partial sharing in federated learning.", "abstract": "Recently, there have been rising concerns about the heterogeneity among local clients in federated learning, which could lead to inefficient utilization of the data from other clients. To mitigate the adverse effects of heterogeneity, FL research has mostly focused on learning a globally shared initialization under the assumption that the shared information is consistent among all clients. In this paper, we consider a more general scenario, Selective Partial Sharing (SPS), where each pair of clients may share different patterns or distribution components. We propose a novel FL framework named Fed-SPS to exploit the shared knowledge by a partial and pairwise collaboration. Meanwhile, to reduce data traffic and improve computing efficiency, we realize a decentralized learning paradigm for our framework. Due to privacy concerns, one cannot obtain the overlapped distribution components with direct access to the raw data. While the learned personalized model is an approximation of local distribution, we propose to identify the selective sharing structure by exploring the vulnerability overlap between local models. With the detected sharing structure, we propose an overlapping data augmentation, which efficiently boosts the leveraging of the overlapped data between clients. Comprehensive experiments on a suite of benchmark data sets and a real-world clinical data set show that our approach can achieve better generalization compared with existing methods.", "keywords": "federated learning;personalized federated learning;decentralized federated learning", "primary_area": "", "supplementary_material": "", "author": "Sen Cui;Jian Liang;Weishen Pan;Jingfeng Zhang;Bo Han;Jianwei Zhang;Changshui Zhang;Fei Wang", "authorids": "~Sen_Cui1;~Jian_Liang3;~Weishen_Pan1;~Jingfeng_Zhang1;~Bo_Han1;~Jianwei_Zhang2;~Changshui_Zhang2;~Fei_Wang3", "gender": "M;M;M;M;M;M;;M", "homepage": ";;https://scholar.google.com/citations?user=PtTBMhUAAAAJ;https://zjfheart.github.io;https://tams.informatik.uni-hamburg.de/people/zhang/;http://bigeye.au.tsinghua.edu.cn/english/Introduction.html;https://wcm-wanglab.github.io/index.html;https://bhanml.github.io/", "dblp": "267/5483;19/2208;161/2032;227/2664.html;z/JianweiZhang1;z/ChangshuiZhang;52/3194-9.html;241/0472-3", "google_scholar": "UzQuG1UAAAAJ;mrunnpoAAAAJ;PtTBMhUAAAAJ;NS0P1FkAAAAJ;;GL9M37YAAAAJ;https://scholar.google.com/citations?hl=en;nTNjqHwAAAAJ", "orcid": ";;0009-0006-0431-5642;0000-0003-3491-8074;;;;", "linkedin": ";;;;;;fei-wang-50682425/;", "or_profile": "~Sen_Cui1;~Jian_Liang3;~Weishen_Pan1;~Jingfeng_Zhang1;~Jianwei_Zhang2;~Changshui_Zhang2;~Fei_Wang3;~bo_han2", "aff": "Tsinghua University;Kuaishou Technology;Weill Cornell Medicine, Cornell University;University of Auckland;Universit\u00e4t Hamburg;Tsinghua University;Cornell University;RIKEN", "aff_domain": "tsinghua.edu.cn;kuaishou.com;med.cornell.edu;auckland.ac.nz;uni-hamburg.de;mail.tsinghua.edu.cn;cornell.edu;riken.jp", "position": "PhD student;Senior Algorithm Engineer;Postdoc;Assistant Professor;Full Professor;Full Professor;Full Professor;Adjunct Scientist", "bibtex": "@misc{\ncui2023decentralized,\ntitle={Decentralized Federated Learning via Overlapping Data Augmentation},\nauthor={Sen Cui and Jian Liang and Weishen Pan and Jingfeng Zhang and Bo Han and Jianwei Zhang and Changshui Zhang and Fei Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=DSKD610FRN1}\n}", "github": "", "project": "", "reviewers": "exVj;13M8;LygQ", "site": "https://openreview.net/forum?id=DSKD610FRN1", "pdf_size": 878755, "recommendation": "3;3;5", "confidence": "4;3;4", "correctness": "3;3;3", "technical_novelty": "1;3;2", "empirical_novelty": "1;2;0", "wc_summary_paper": "55;49;54", "wc_strength_and_weaknesses": "85;90;224", "wc_clarity_quality_novelty_and_reproducibility": "253;36;57", "wc_summary_review": "27;15;37", "wc_review": "420;190;372", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 1.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 52.666666666666664, 2.6246692913372702 ], "wc_strength_and_weaknesses_avg": [ 133.0, 64.37908563086825 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 115.33333333333333, 97.72182742640231 ], "wc_summary_review_avg": [ 26.333333333333332, 8.993825042154695 ], "wc_review_avg": [ 327.3333333333333, 99.06675639296071 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15719593946093798096&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4;0;2;5", "aff_unique_norm": "Tsinghua University;Kuaishou Technology;Cornell University;University of Auckland;University of Hamburg;RIKEN", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.kuaishou.com;https://www.weill.cornell.edu;https://www.auckland.ac.nz;https://www.uni-hamburg.de;https://www.riken.jp", "aff_unique_abbr": "THU;Kuaishou;Cornell;UoA;UHH;RIKEN", "aff_campus_unique_index": "1", "aff_campus_unique": ";Weill Cornell Medicine", "aff_country_unique_index": "0;0;1;2;3;0;1;4", "aff_country_unique": "China;United States;New Zealand;Germany;Japan" }, { "title": "Voxurf: Voxel-based Efficient and Accurate Neural Surface Reconstruction", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12150", "id": "DSy8tP4WctmZ", "poster": "", "openreview": "https://openreview.net/forum?id=DSy8tP4WctmZ", "slides": "https://iclr.cc/virtual/2023/poster/12150", "video": "https://iclr.cc/virtual/2023/poster/12150", "author_site": "Tong Wu, Jiaqi Wang, Xingang Pan, Xudong XU, Christian Theobalt, Ziwei Liu, Dahua Lin", "tldr": "We present Voxurf, a voxel-based approach for efficient and accurate neural surface reconstruction.", "abstract": "Neural surface reconstruction aims to reconstruct accurate 3D surfaces based on multi-view images. Previous methods based on neural volume rendering mostly train a fully implicit model with MLPs, which typically require hours of training for a single scene. Recent efforts explore the explicit volumetric representation to accelerate the optimization via memorizing significant information with learnable voxel grids. However, existing voxel-based methods often struggle in reconstructing fine-grained geometry, even when combined with an SDF-based volume rendering scheme. We reveal that this is because 1) the voxel grids tend to break the color-geometry dependency that facilitates fine-geometry learning, and 2) the under-constrained voxel grids lack spatial coherence and are vulnerable to local minima. In this work, we present Voxurf, a voxel-based surface reconstruction approach that is both efficient and accurate. Voxurf addresses the aforementioned issues via several key designs, including 1) a two-stage training procedure that attains a coherent coarse shape and recovers fine details successively, 2) a dual color network that maintains color-geometry dependency, and 3) a hierarchical geometry feature to encourage information propagation across voxels. Extensive experiments show that Voxurf achieves high efficiency and high quality at the same time. On the DTU benchmark, Voxurf achieves higher reconstruction quality with a 20x training speedup compared to previous fully implicit methods. Our code is publicly available at https://github.com/wutong16/Voxurf/.", "keywords": "Surface Reconstruction;Neural Radiance Field", "primary_area": "", "supplementary_material": "/attachment/babbd940967b424157f96e3f6a6ba4e961958b05.zip", "author": "Tong Wu;Jiaqi Wang;Xingang Pan;Xudong XU;Christian Theobalt;Ziwei Liu;Dahua Lin", "authorids": "~Tong_Wu2;~Jiaqi_Wang1;~Xingang_Pan1;~Xudong_XU1;~Christian_Theobalt2;~Ziwei_Liu1;~Dahua_Lin1", "gender": "F;M;M;M;M;M;M", "homepage": "https://wutong16.github.io/;https://myownskyw7.github.io/;https://xingangpan.github.io/;https://sheldontsui.github.io;https://www.mpi-inf.mpg.de/~theobalt/;https://liuziwei7.github.io/;http://dahua.site", "dblp": "75/5056-2;44/740-3;211/7940;210/2741;55/3346;05/6300-2;53/6088", "google_scholar": "https://scholar.google.com.hk/citations?user=cLUgV4YAAAAJ;https://scholar.google.com.hk/citations?user=GDvt570AAAAJ;https://scholar.google.com.hk/citations?user=uo0q9WgAAAAJ;https://scholar.google.com.hk/citations?user=D8VMkA8AAAAJ;https://scholar.google.com.tw/citations?user=eIWg8NMAAAAJ;https://scholar.google.com.hk/citations?user=lc45xlcAAAAJ;GMzzRRUAAAAJ", "orcid": ";;0000-0002-5825-9467;;;;", "linkedin": ";;;;;;", "or_profile": "~Tong_Wu2;~Jiaqi_Wang1;~Xingang_Pan1;~Xudong_XU1;~Christian_Theobalt2;~Ziwei_Liu1;~Dahua_Lin1", "aff": "The Chinese University of Hong Kong;Shanghai AI Laboratory;Max-Planck Institute Informatics;The Chinese University of Hong Kong;Max-Planck-Institute for Informatics, Saarland Informatics Campus;Nanyang Technological University;The Chinese University of Hong Kong", "aff_domain": "cuhk.edu.hk;pjlab.org.cn;mpi-inf.mpg.de;ie.cuhk.edu;mpi-inf.mpg.de;ntu.edu.sg;cuhk.edu.hk", "position": "PhD student;Research Scientist;Postdoc;PhD student;Director;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nwu2023voxurf,\ntitle={Voxurf: Voxel-based Efficient and Accurate Neural Surface Reconstruction},\nauthor={Tong Wu and Jiaqi Wang and Xingang Pan and Xudong XU and Christian Theobalt and Ziwei Liu and Dahua Lin},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=DSy8tP4WctmZ}\n}", "github": "", "project": "", "reviewers": "kZqD;VHoF;8Ao6;7aTz", "pdf_size": 10001982, "recommendation": "6;6;8;8", "confidence": "4;4;3;3", "correctness": "3;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "83;78;94;153", "wc_strength_and_weaknesses": "683;258;198;488", "wc_clarity_quality_novelty_and_reproducibility": "310;70;202;162", "wc_summary_review": "133;48;71;58", "wc_review": "1209;454;565;861", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "926;637;642;681", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 102.0, 30.008332176247315 ], "wc_strength_and_weaknesses_avg": [ 406.75, 192.75551224284092 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 186.0, 86.11620056644394 ], "wc_summary_review_avg": [ 77.5, 33.06433123473088 ], "wc_review_avg": [ 772.25, 292.7724158796385 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 721.5, 119.29061153334742 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 101, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10976078225004203514&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=DSy8tP4WctmZ", "email": "cuhk.edu.hk;pjlab.org.cn;mpi-inf.mpg.de;ie.cuhk.edu;mpi-inf.mpg.de;ntu.edu.sg;cuhk.edu.hk", "author_num": 7, "aff_unique_index": "0;1;2;0;3;4;0", "aff_unique_norm": "Chinese University of Hong Kong;Shanghai AI Laboratory;Max-Planck Institute for Informatics;Max-Planck-Institute for Informatics;Nanyang Technological University", "aff_unique_dep": ";;Informatics;;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.shanghai-ai-lab.com;https://www.mpi-inf.mpg.de;https://mpi-inf.mpg.de;https://www.ntu.edu.sg", "aff_unique_abbr": "CUHK;SAIL;MPII;MPII;NTU", "aff_campus_unique_index": "0;0;2;0", "aff_campus_unique": "Hong Kong SAR;;Saarland", "aff_country_unique_index": "0;0;1;0;1;2;0", "aff_country_unique": "China;Germany;Singapore" }, { "id": "DT7btGps59z", "title": "Your Neighbors Are Communicating: Towards Powerful and Scalable Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "We propose a general GNN framework with provably expressive power, while maintaining the scalability of the message passing scheme.", "abstract": "Message passing graph neural networks (GNNs) are known to have their expressiveness upper-bounded by 1-dimensional Weisfeiler-Lehman (1-WL) algorithm. To achieve more powerful GNNs, existing attempts either require ad hoc features, or involve operations that incur high time and space complexities. In this work, we propose a general and provably powerful GNN framework that preserves the scalability of message passing scheme. In particular, we first propose to empower 1-WL for graph isomorphism test by considering edges among neighbors, giving rise to NC-1-WL. The expressiveness of NC-1-WL is shown to be strictly above 1-WL and below 3-WL theoretically. Further, we propose the NC-GNN framework as a differentiable neural version of NC-1-WL. Our simple implementation of NC-GNN is provably as powerful as NC-1-WL. Experiments demonstrate that our NC-GNN achieves remarkable performance on various benchmarks.", "keywords": "Graph neural networks;expressiveness;graph isomorphism test", "primary_area": "", "supplementary_material": "", "author": "Meng Liu;Haiyang Yu;Shuiwang Ji", "authorids": "~Meng_Liu3;~Haiyang_Yu6;~Shuiwang_Ji1", "gender": "M;M;M", "homepage": "https://mengliu1998.github.io;https://oceanusity.github.io/;http://people.tamu.edu/~sji", "dblp": "41/7841-15;90/6643-5;84/6405", "google_scholar": "https://scholar.google.com/citations?hl=en;LZKU1hUAAAAJ;BZGj6sAAAAAJ", "orcid": ";;0000-0002-4205-4563", "linkedin": "meng-liu-4a1813197/;;shuiwang-ji-9a040715/", "or_profile": "~Meng_Liu3;~Haiyang_Yu6;~Shuiwang_Ji1", "aff": "Texas A&M University - College Station;Texas A&M University - College Station;Texas A&M University", "aff_domain": "tamu.edu;tamu.edu;tamu.edu", "position": "PhD student;PhD student;Professor", "bibtex": "@misc{\nliu2023your,\ntitle={Your Neighbors Are Communicating: Towards Powerful and Scalable Graph Neural Networks},\nauthor={Meng Liu and Haiyang Yu and Shuiwang Ji},\nyear={2023},\nurl={https://openreview.net/forum?id=DT7btGps59z}\n}", "github": "", "project": "", "reviewers": "Agxt;zieR;nYWZ;xnLi", "site": "https://openreview.net/forum?id=DT7btGps59z", "pdf_size": 1562324, "recommendation": "3;5;5;6", "confidence": "5;3;3;2", "correctness": "3;4;2;3", "technical_novelty": "1;3;3;3", "empirical_novelty": "1;3;2;3", "wc_summary_paper": "47;62;125;90", "wc_strength_and_weaknesses": "160;138;368;253", "wc_clarity_quality_novelty_and_reproducibility": "3;37;86;73", "wc_summary_review": "109;32;64;147", "wc_review": "319;269;643;563", "wc_reply_reviewers": "810;0;0;0", "wc_reply_authors": "2725;513;549;529", "reply_reviewers": "2;0;0;0", "reply_authors": "5;2;2;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 81.0, 29.723727895403698 ], "wc_strength_and_weaknesses_avg": [ 229.75, 90.74242392618791 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.75, 32.41431011143072 ], "wc_summary_review_avg": [ 88.0, 43.68638231760556 ], "wc_review_avg": [ 448.5, 158.05932430578085 ], "wc_reply_reviewers_avg": [ 202.5, 350.74028853269766 ], "wc_reply_authors_avg": [ 1079.0, 950.4041245701746 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10959981273489996946&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Station;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "DUIjRKNFFbG", "title": "LEXA: Language-agnostic Cross-consistency Training for Question Answering Tasks", "track": "main", "status": "Withdraw", "tldr": "We developed a novel pre-training method to improve cross-lingual consistency in a language model. We demonstrate the achieved ability on several question answering datasets.", "abstract": "Cross-lingual information retrieval (CLIR) is a knowledge-intensive NLP task that requires a lot of domain-specific data in different languages. In previous works, authors were mostly using machine translation and iterative training for data mining. We considered the problem from another angle and present a novel cross-lingual pre-training and fine-tuning approach for CLIR tasks based on cross-lingual alignment. We present a new model LEXA-LM significantly improving cross-lingual knowledge transfer thus achieving new state-of-the-art in cross-lingual and monolingual question answering and cross-lingual sentence retrieval. Moreover, we show that our pre-training technique LEXA is a very powerful tool for a zero-shot scenario allowing to outperform some supervised methods. ", "keywords": "pre-training;language model;natural language processing", "primary_area": "", "supplementary_material": "", "author": "Nikita Sorokin;Dmitry Abulkhanov;Valentin Malykh", "authorids": "~Nikita_Sorokin1;~Dmitry_Abulkhanov1;~Valentin_Malykh1", "gender": "M;;M", "homepage": ";;http://val.maly.hk", "dblp": ";;186/6505", "google_scholar": "Gmxbe_8AAAAJ;;Q-DWwNAAAAAJ", "orcid": ";;0009-0008-5632-6188", "linkedin": "%D0%BD%D0%B8%D0%BA%D0%B8%D1%82%D0%B0-%D1%81%D0%BE%D1%80%D0%BE%D0%BA%D0%B8%D0%BD-b2b975221/;;val-malykh/", "or_profile": "~Nikita_Sorokin1;~Dmitry_Abulkhanov1;~Valentin_Malykh1", "aff": "Huawei Technologies Ltd.;;", "aff_domain": "huawei.com;;", "position": "Researcher;;", "bibtex": "@misc{\nsorokin2023lexa,\ntitle={{LEXA}: Language-agnostic Cross-consistency Training for Question Answering Tasks},\nauthor={Nikita Sorokin and Dmitry Abulkhanov and Valentin Malykh},\nyear={2023},\nurl={https://openreview.net/forum?id=DUIjRKNFFbG}\n}", "github": "", "project": "", "reviewers": "zdxR;ukDR;FuWT;5Pag", "site": "https://openreview.net/forum?id=DUIjRKNFFbG", "pdf_size": 4639369, "recommendation": "3;3;3;5", "confidence": "4;4;4;2", "correctness": "2;3;3;3", "technical_novelty": "2;2;1;2", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "18;57;4;43", "wc_strength_and_weaknesses": "137;74;45;111", "wc_clarity_quality_novelty_and_reproducibility": "3;114;4;79", "wc_summary_review": "12;41;157;71", "wc_review": "170;286;210;304", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 30.5, 20.71834935510066 ], "wc_strength_and_weaknesses_avg": [ 91.75, 35.06690034776384 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.0, 48.119642558938445 ], "wc_summary_review_avg": [ 70.25, 54.25576006287259 ], "wc_review_avg": [ 242.5, 54.742579405797095 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:c3OopUCSbi0J:scholar.google.com/&scioq=LEXA:+Language-agnostic+Cross-consistency+Training+for+Question+Answering+Tasks&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "DUfpVGCXfwa", "title": "REM: Routing Entropy Minimization for Capsule Networks", "track": "main", "status": "Reject", "tldr": " REM is a technique for Capsule Networks which combines pruning and quantization driving these models towards a higher interpretability. REM generate a significantly lower number of parse trees, with no performance loss.", "abstract": "Capsule Networks aim to build an interpretable and biologically-inspired neural network model. One of their main innovations relies on the routing mechanism which extracts a parse tree: its main purpose is to explicitly build relationships between capsules.\nHowever, their true potential has not surfaced yet: these relationships are extremely heterogeneous and difficult to understand, as the intra-class extracted parse trees are very different from each other. A school of thoughts, giving-up on this side, propose less interpretable versions of Capsule Networks without routing.\nThis paper proposes REM, a technique which minimizes the entropy of the parse tree-like structure. We accomplish this by driving the model parameters distribution towards low entropy configurations, using a pruning mechanism as a proxy. \nThanks to REM, we generate a significantly lower number of parse trees, with essentially no performance loss, showing also that Capsule Networks build stronger and more stable relationships between capsules.", "keywords": "capsule networks;deep learning;entropy;parse tree;routing", "primary_area": "", "supplementary_material": "/attachment/5b8520ea389a8d5cc6502f977e81a0af2fbfdc8f.zip", "author": "Riccardo Renzulli;Enzo Tartaglione;Marco Grangetto", "authorids": "~Riccardo_Renzulli1;~Enzo_Tartaglione1;~Marco_Grangetto1", "gender": "M;M;M", "homepage": "https://riccardorenzulli.github.io/;https://perso.telecom-paristech.fr/etartaglione/index.html;https://www.di.unito.it/~mgrange/", "dblp": "203/2058;170/0115;77/2058", "google_scholar": "JlAby_oAAAAJ;https://scholar.google.it/citations?user=uKuvN64AAAAJ;Pt1gmQYAAAAJ", "orcid": "0000-0003-0532-5966;0000-0003-4274-8298;0000-0002-2709-7864", "linkedin": "riccardo-renzulli/;enzo-tartaglione-490950a2;marco-grangetto-542aa31/", "or_profile": "~Riccardo_Renzulli1;~Enzo_Tartaglione1;~Marco_Grangetto1", "aff": "University of Turin;T\u00e9l\u00e9com Paris;University of Turin", "aff_domain": "unito.it;telecom-paristech.fr;unito.it", "position": "Postdoc;Associate Professor;Full Professor", "bibtex": "@misc{\nrenzulli2023rem,\ntitle={{REM}: Routing Entropy Minimization for Capsule Networks},\nauthor={Riccardo Renzulli and Enzo Tartaglione and Marco Grangetto},\nyear={2023},\nurl={https://openreview.net/forum?id=DUfpVGCXfwa}\n}", "github": "", "project": "", "reviewers": "uZJ5;LZej;1ErG;Ni7P", "site": "https://openreview.net/forum?id=DUfpVGCXfwa", "pdf_size": 748165, "recommendation": "3;5;6;6", "confidence": "4;4;1;4", "correctness": "4;3;3;4", "technical_novelty": "2;3;3;2", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "73;70;50;66", "wc_strength_and_weaknesses": "210;274;59;153", "wc_clarity_quality_novelty_and_reproducibility": "9;33;18;56", "wc_summary_review": "42;83;31;71", "wc_review": "334;460;158;346", "wc_reply_reviewers": "152;0;0;0", "wc_reply_authors": "1032;824;265;563", "reply_reviewers": "1;0;0;0", "reply_authors": "3;2;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 64.75, 8.870597499605086 ], "wc_strength_and_weaknesses_avg": [ 174.0, 78.99683537965302 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.0, 17.790446874657196 ], "wc_summary_review_avg": [ 56.75, 21.05201890555868 ], "wc_review_avg": [ 324.5, 107.9756917088286 ], "wc_reply_reviewers_avg": [ 38.0, 65.81793068761733 ], "wc_reply_authors_avg": [ 671.0, 287.3282095444163 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.4714045207910316, "corr_recommendation_correctness": -0.40824829046386296, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3456987498546985631&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Turin;T\u00e9l\u00e9com Paris", "aff_unique_dep": ";", "aff_unique_url": "https://www.unito.it;https://www.telecom-paris.fr", "aff_unique_abbr": "UNITO;T\u00e9l\u00e9com Paris", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Italy;France" }, { "id": "DWDPhB6Hi7k", "title": "A Representation Bottleneck of Bayesian Neural Networks", "track": "main", "status": "Withdraw", "tldr": "We theoretically prove and empirically verify a representation bottleneck of Bayesian neural networks.", "abstract": "Unlike standard deep neural networks (DNNs), Bayesian neural networks (BNNs) formulate network weights as probability distributions, which results in distinctive representation capacities from standard DNNs. In this paper, we explore the representation bottleneck of BNNs from the perspective of conceptual representations. It is proven that the logic of a neural network can be faithfully mimicked by a specific sparse causal graph, where each causal pattern can be considered as a concept encoded by the neural network. Then, we formally define the complexity of concepts, and prove that compared to standard DNNs, it is more difficult for BNNs to encode complex concepts. Extensive experiments verify our theoretical proofs. The code will be released when the paper is accepted.", "keywords": "interpretability;Bayesian neural network", "primary_area": "", "supplementary_material": "", "author": "Qihan Ren;Huiqi Deng;Yunuo Chen;Siyu Lou;Quanshi Zhang", "authorids": "~Qihan_Ren1;~Huiqi_Deng1;~Yunuo_Chen1;~Siyu_Lou1;~Quanshi_Zhang1", "gender": "M;F;M;F;M", "homepage": "https://nebularaid2000.github.io/;;;;http://qszhang.com", "dblp": "268/5838;229/1317;;317/1067;http://dblp.uni-trier.de/pers/hd/z/Zhang:Quanshi", "google_scholar": "ybTy_DwAAAAJ;QEjqzXgAAAAJ;;M2PvE1IAAAAJ;iFFhHK0AAAAJ", "orcid": ";;;0000-0002-8046-0052;", "linkedin": ";;https://www.linkedin.cn/incareer/in/unochenyn;;", "or_profile": "~Qihan_Ren1;~Huiqi_Deng1;~Yunuo_Chen1;~Siyu_Lou1;~Quanshi_Zhang1", "aff": "Shanghai Jiaotong University;Shanghai jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Postdoc;Undergrad student;PhD student;Associate Professor", "bibtex": "@misc{\nren2023a,\ntitle={A Representation Bottleneck of Bayesian Neural Networks},\nauthor={Qihan Ren and Huiqi Deng and Yunuo Chen and Siyu Lou and Quanshi Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=DWDPhB6Hi7k}\n}", "github": "", "project": "", "reviewers": "wyGy;KcdN;LfDd", "site": "https://openreview.net/forum?id=DWDPhB6Hi7k", "pdf_size": 2096370, "recommendation": "3;3;3", "confidence": "3;4;2", "correctness": "2;2;2", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;0", "wc_summary_paper": "44;125;114", "wc_strength_and_weaknesses": "430;532;339", "wc_clarity_quality_novelty_and_reproducibility": "77;46;4", "wc_summary_review": "30;38;37", "wc_review": "581;741;494", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 2.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 94.33333333333333, 35.87323359956402 ], "wc_strength_and_weaknesses_avg": [ 433.6666666666667, 78.83456658654812 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.333333333333336, 29.914693528246094 ], "wc_summary_review_avg": [ 35.0, 3.559026084010437 ], "wc_review_avg": [ 605.3333333333334, 102.29478101165388 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8106214673372348935&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Treeformer: Dense Gradient Trees for Efficient Attention Computation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11681", "id": "DWn1TEb2fK", "poster": "", "openreview": "https://openreview.net/forum?id=DWn1TEb2fK", "slides": "https://iclr.cc/virtual/2023/poster/11681", "video": "https://iclr.cc/virtual/2023/poster/11681", "author_site": "Lovish Madaan, Srinadh Bhojanapalli, Himanshu Jain, Prateek Jain", "tldr": "Efficient Decision Tree based attention computation to reduce FLOPs for self-attention", "abstract": "Standard inference and training with transformer based architectures scale quadratically with input sequence length. This is prohibitively large for a variety of applications especially in web-page translation, query-answering etc. Consequently, several approaches have been developed recently to speedup attention computation by enforcing different attention structures such as sparsity, low-rank, approximating attention using kernels. In this work, we view attention computation as that of nearest neighbor retrieval, and use decision tree based hierarchical navigation to reduce the retrieval cost per query token from linear in sequence length to nearly logarithmic. Based on such hierarchical navigation, we design Treeformer which can use one of two efficient attention layers -- TF-Attention and TC-Attention. TF-Attention computes the attention in a fine-grained style, while TC-Attention is a coarse attention layer which also ensures that the gradients are \"dense\". To optimize such challenging discrete layers, we propose a two-level bootstrapped training method. Using extensive experiments on standard NLP benchmarks, especially for long-sequences, we demonstrate that our Treeformer architecture can be almost as accurate as baseline Transformer while using 30x lesser FLOPs in the attention layer. Compared to Linformer, the accuracy can be as much as 12% higher while using similar FLOPs in the attention layer.", "keywords": "Transformers;Attention;Decision Trees", "primary_area": "", "supplementary_material": "", "author": "Lovish Madaan;Srinadh Bhojanapalli;Himanshu Jain;Prateek Jain", "authorids": "~Lovish_Madaan1;~Srinadh_Bhojanapalli1;~Himanshu_Jain3;~Prateek_Jain1", "gender": ";M;M;M", "homepage": "https://lovishmadaan.github.io;https://bsrinadh.github.io/;;http://prateekjain.org", "dblp": "245/6079;131/6700;;https://dblp.uni-trier.de/pers/j/Jain_0002:Prateek.html", "google_scholar": "zc2WyXkAAAAJ;bpSF_9EAAAAJ;JtrH9jQAAAAJ;qYhRbJoAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Lovish_Madaan1;~Srinadh_Bhojanapalli1;~Himanshu_Jain3;~Prateek_Jain1", "aff": "Google;Google;Google;Google", "aff_domain": "google.com;google.com;google.com;google.com", "position": "Researcher;Research Scientist;Researcher;Researcher", "bibtex": "@inproceedings{\nmadaan2023treeformer,\ntitle={Treeformer: Dense Gradient Trees for Efficient Attention Computation},\nauthor={Lovish Madaan and Srinadh Bhojanapalli and Himanshu Jain and Prateek Jain},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=DWn1TEb2fK}\n}", "github": "", "project": "", "reviewers": "xiDb;bNoK;3zNR", "pdf_size": 7669631, "recommendation": "6;6;8", "confidence": "4;4;3", "correctness": "3;3;3", "technical_novelty": "3;3;4", "empirical_novelty": "0;2;3", "wc_summary_paper": "66;88;120", "wc_strength_and_weaknesses": "171;375;369", "wc_clarity_quality_novelty_and_reproducibility": "47;28;69", "wc_summary_review": "48;38;120", "wc_review": "332;529;678", "wc_reply_reviewers": "48;146;0", "wc_reply_authors": "469;583;481", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 91.33333333333333, 22.17105219775452 ], "wc_strength_and_weaknesses_avg": [ 305.0, 94.78396488858229 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.0, 16.753109164172084 ], "wc_summary_review_avg": [ 68.66666666666667, 36.527006751473934 ], "wc_review_avg": [ 513.0, 141.70626897447644 ], "wc_reply_reviewers_avg": [ 64.66666666666667, 60.75817274701039 ], "wc_reply_authors_avg": [ 511.0, 51.146847410177685 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.9999999999999998, "corr_recommendation_correctness": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2987643376523538794&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=DWn1TEb2fK", "email": "google.com;google.com;google.com;google.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "DZ4FS-Evau7", "title": "Task-Agnostic Unsupervised Robust Representation Learning", "track": "main", "status": "Withdraw", "tldr": "We propose a method to learn robust representations without any labels or adversarial fine-tuning in downstream tasks, based on a theoretically grounded unsupervised robustness regularizer.", "abstract": "It has been reported that deep learning models are extremely vulnerable to small but intentionally chosen perturbations of their input. In particular, a deep network, despite its near-optimal accuracy on the clean images, often mis-classifies an image with a worst-case but humanly imperceptible perturbation (so-called adversarial examples). To tackle this problem, a great amount of research has been done to study the training procedure of a network to improve its robustness. However, most of the research so far has focused on the case of supervised learning. With the increasing popularity of self-supervised learning methods, it is also important to study and improve the robustness of their resulting representation on downstream tasks. In this paper, we study the problem of robust representation learning with unlabeled data in a task-agnostic manner. Specifically, we first derive an upper bound on the adversarial loss of a prediction model (which is based on the learned representation) on any downstream task, using its loss on the clean data and a robustness regularizer. Importantly, the regularizer is task-independent, thus we propose to minimize it directly during the representation learning phase to make the downstream prediction model more robust. Extensive experiments show that our method results in a robust model for downstream tasks without any supervised adversarial training, and achieves preferable adversarial performance compared to relevant baselines.", "keywords": "unsupervised robustness;transferable adversarial robustness", "primary_area": "", "supplementary_material": "", "author": "A. Tuan Nguyen;Philip Torr;Ser-Nam Lim", "authorids": "~A._Tuan_Nguyen1;~Philip_Torr1;~Ser-Nam_Lim3", "gender": "M;;M", "homepage": "https://atuannguyen.com;http://www.robots.ox.ac.uk/~tvg/;https://sites.google.com/site/sernam", "dblp": ";;04/6633", "google_scholar": "V-guxukAAAAJ;;HX0BfLYAAAAJ", "orcid": ";;", "linkedin": "a-tuan-nguyen/;;", "or_profile": "~A._Tuan_Nguyen1;~Philip_Torr1;~Ser-Nam_Lim1", "aff": "University of Oxford;University of Oxford;Meta Facebook", "aff_domain": "ox.ac.uk;ox.ac.uk;facebook.com", "position": "PhD student;Full Professor;Research Scientist Manager", "bibtex": "@misc{\nnguyen2023taskagnostic,\ntitle={Task-Agnostic Unsupervised Robust Representation Learning},\nauthor={A. Tuan Nguyen and Philip Torr and Ser-Nam Lim},\nyear={2023},\nurl={https://openreview.net/forum?id=DZ4FS-Evau7}\n}", "github": "", "project": "", "reviewers": "vmes;3zta;GaGP;Xbsp", "site": "https://openreview.net/forum?id=DZ4FS-Evau7", "pdf_size": 390798, "recommendation": "3;3;5;5", "confidence": "4;3;4;4", "correctness": "3;2;3;2", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "79;75;25;56", "wc_strength_and_weaknesses": "113;317;182;341", "wc_clarity_quality_novelty_and_reproducibility": "227;22;68;11", "wc_summary_review": "52;40;51;66", "wc_review": "471;454;326;474", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 58.75, 21.33512362279628 ], "wc_strength_and_weaknesses_avg": [ 238.25, 94.35405396696 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 82.0, 86.40312494348801 ], "wc_summary_review_avg": [ 52.25, 9.229707470987366 ], "wc_review_avg": [ 431.25, 61.24285672631544 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xVLp7tPiZ3sJ:scholar.google.com/&scioq=Task-Agnostic+Unsupervised+Robust+Representation+Learning&hl=en&as_sdt=0,21", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Oxford;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.ox.ac.uk;https://meta.com", "aff_unique_abbr": "Oxford;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "D__ipVB0Z7", "title": "Disentangled Conditional Variational Autoencoder for Unsupervised Anomaly Detection", "track": "main", "status": "Reject", "tldr": "unsupervised anomaly detection architecture incorporating disentangled learning, information theory and conditional variational modeling. ", "abstract": "Recently, generative models have shown promising performance in anomaly detection tasks. Specifically, autoencoders learn representations of high-dimensional data, and their reconstruction ability can be used to assess whether a new instance is likely to be anomalous. However, the primary challenge of unsupervised anomaly detection (UAD) is in learning appropriate disentangled features and avoiding information loss, while incorporating known sources of variation to improve the reconstruction. In this paper, we propose a novel architecture of generative autoencoder by combining the frameworks of $\\beta$-VAE, conditional variational autoencoder (CVAE), and the principle of total correlation (TC). We show that our architecture improves the disentanglement of latent features, optimizes TC loss more efficiently, and improves the ability to detect anomalies in an unsupervised manner with respect to high-dimensional instances, such as in imaging datasets. Through both qualitative and quantitative experiments on several benchmark datasets, we demonstrate that our proposed method excels in terms of both anomaly detection and capturing disentangled features. Our analysis underlines the importance of learning disentangled features for UAD tasks.", "keywords": "unsupervised anomaly detection;autoencoder;disentanglement learning;representation learning;information theory", "primary_area": "", "supplementary_material": "/attachment/c4e929c27df6ff15d04701157e0a3b56be6192df.zip", "author": "Asif Ahmed Neloy;Maxime Turgeon", "authorids": "~Asif_Ahmed_Neloy1;~Maxime_Turgeon1", "gender": "M;", "homepage": "https://aaneloy.ca/;https://maxturgeon.ca", "dblp": "252/2623.html;", "google_scholar": "WjL1EDcAAAAJ;", "orcid": "0000-0002-2289-2762;0000-0003-4863-6035", "linkedin": "aaneloy/;", "or_profile": "~Asif_Ahmed_Neloy1;~Maxime_Turgeon1", "aff": ";University of Manitoba", "aff_domain": ";umanitoba.ca", "position": ";Assistant Professor", "bibtex": "@misc{\nneloy2023disentangled,\ntitle={Disentangled Conditional Variational Autoencoder for Unsupervised Anomaly Detection},\nauthor={Asif Ahmed Neloy and Maxime Turgeon},\nyear={2023},\nurl={https://openreview.net/forum?id=D__ipVB0Z7}\n}", "github": "", "project": "", "reviewers": "Pbh4;8jzN;HT5F;DSrs", "site": "https://openreview.net/forum?id=D__ipVB0Z7", "pdf_size": 9835040, "recommendation": "3;3;3;3", "confidence": "4;4;4;3", "correctness": "3;3;3;2", "technical_novelty": "1;2;2;2", "empirical_novelty": "1;3;2;2", "wc_summary_paper": "61;31;63;79", "wc_strength_and_weaknesses": "123;472;59;123", "wc_clarity_quality_novelty_and_reproducibility": "184;21;34;17", "wc_summary_review": "21;50;130;61", "wc_review": "389;574;286;280", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "368;689;702;421", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 58.5, 17.342145196024624 ], "wc_strength_and_weaknesses_avg": [ 194.25, 162.4736517100542 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.0, 69.56651493355119 ], "wc_summary_review_avg": [ 65.5, 40.00312487793922 ], "wc_review_avg": [ 382.25, 118.88308332138766 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 545.0, 151.7316710512344 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NVcL6n1AhoEJ:scholar.google.com/&scioq=Disentangled+Conditional+Variational+Autoencoder+for+Unsupervised+Anomaly+Detection&hl=en&as_sdt=0,33", "gs_version_total": 5, "aff_unique_index": "0", "aff_unique_norm": "University of Manitoba", "aff_unique_dep": "", "aff_unique_url": "https://umanitoba.ca", "aff_unique_abbr": "U of M", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "id": "DaYt6DAA-JK", "title": "MiDAS: Multi-integrated Domain Adaptive Supervision for Fake News Detection", "track": "main", "status": "Reject", "tldr": "We use Lipschitz smoothness and probabilistic Lipschitzness to build a theoretical foundation for effective multi-domain adaptation using randomized perturbations on unseen data.", "abstract": "COVID-19 related misinformation and fake news, coined an 'infodemic', has dramatically increased over the past few years. This misinformation exhibits concept drift, where the distribution of fake news changes over time, reducing effectiveness of previously trained models for fake news detection. Given a set of fake news models trained on multiple domains, we propose an adaptive decision module to select the best-fit model for a new sample. We propose MiDAS, a multi-domain adaptative approach for fake news detection that ranks relevancy of existing models to new samples. MiDAS contains 2 components: a doman-invariant encoder, and an adaptive model selector. MiDAS integrates multiple pre-trained and fine-tuned models with their training data to create a domain-invariant representation. Then, MiDAS uses local Lipschitz smoothness of the invariant embedding space to estimate each model's relevance to a new sample. Higher ranked models provide predictions, and lower ranked models abstain. We evaluate MiDAS on generalization to drifted data with 9 fake news datasets, each obtained from different domains and modalities. MiDAS achieves new state-of-the-art performance on multi-domain adaptation for out-of-distribution fake news classification. ", "keywords": "multi-domain adaptation;lipschitz continuity;text classification;weak supervision;team-of-experts", "primary_area": "", "supplementary_material": "/attachment/fdf2aa084bbb8be320d9452b2db939ce593ec1b2.zip", "author": "Abhijit Suprem;Calton Pu", "authorids": "~Abhijit_Suprem1;~Calton_Pu1", "gender": "M;M", "homepage": "https://asuprem.com/;https://faculty.cc.gatech.edu/~calton/", "dblp": "128/4693;", "google_scholar": "mhcq1sAAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Abhijit_Suprem1;~Calton_Pu1", "aff": "Georgia Institute of Technology;College of Computing, Georgia Institute of Technology", "aff_domain": "gatech.edu;cc.gatech.edu", "position": "PhD student;Full Professor", "bibtex": "@misc{\nsuprem2023midas,\ntitle={Mi{DAS}: Multi-integrated Domain Adaptive Supervision for Fake News Detection},\nauthor={Abhijit Suprem and Calton Pu},\nyear={2023},\nurl={https://openreview.net/forum?id=DaYt6DAA-JK}\n}", "github": "", "project": "", "reviewers": "WVAw;i2cY;35Bs;5EyG", "site": "https://openreview.net/forum?id=DaYt6DAA-JK", "pdf_size": 618366, "recommendation": "3;5;5;6", "confidence": "4;4;3;2", "correctness": "3;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "69;78;46;45", "wc_strength_and_weaknesses": "284;144;81;245", "wc_clarity_quality_novelty_and_reproducibility": "101;75;111;267", "wc_summary_review": "48;157;147;35", "wc_review": "502;454;385;592", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1404;1184;1640;2000", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;3;3", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 59.5, 14.361406616345072 ], "wc_strength_and_weaknesses_avg": [ 188.5, 80.38812101299544 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 138.5, 75.34421012924616 ], "wc_summary_review_avg": [ 96.75, 55.55346523845295 ], "wc_review_avg": [ 483.25, 75.31060682267804 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1557.0, 302.35575073082373 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.7608859102526822, "corr_recommendation_correctness": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4747511095175284220&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Atlanta", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "Db8XXy9RCL", "title": "Points2NeRF: Generating Neural Radiance Fields from 3D point cloud", "track": "main", "status": "Withdraw", "tldr": "We convert 3D point clouds into Neural Radiance Fields (NeRFs).", "abstract": "Neural Radiance Fields (NeRFs) offer a state-of-the-art quality in synthesising novel views of complex 3D scenes from a small subset of base images. For NeRFs to perform optimally, the registration of base images has to follow certain assumptions, including maintaining constant distance between the camera and the object. We can address this limitation by training NeRFs with 3D point clouds, instead of images, yet a straightforward substitution is impossible due to the sparsity of 3D clouds in the under-sampled regions which leads to incomplete reconstructions output by NeRFs. To solve this problem, here we propose an auto-encoder-based architecture that leverages a hypernetwork paradigm to transfer 3D points with the associated color values through a lower-dimensional latent space and generate weights of NeRF model. This way we are able to accommodate sparsity of 3D point clouds and fully exploit the potential of point cloud data. As a side benefit, our method offers an implicit way for representing 3D scenes and objects, that can be employed to condition NeRFs and hence generalize the models beyond objects seen during training. Empirical evaluation confirms the advantages of our method over conventional NeRFs and proves its superiority in practical applications.", "keywords": "NeRF;Neural Radiance Fields;3D point clouds", "primary_area": "", "supplementary_material": "/attachment/16782a70904bcf5b437f7b71e6b9e69c103119cb.zip", "author": "Dominik Zimny;Tomasz Trzcinski;Przemys\u0142aw Spurek", "authorids": "~Dominik_Zimny1;~Tomasz_Trzcinski2;~Przemys\u0142aw_Spurek1", "gender": "M;M;M", "homepage": ";https://cvlab.ii.pw.edu.pl/ttrzcins/;http://ww2.ii.uj.edu.pl/~spurek/", "dblp": ";05/11408;77/10260", "google_scholar": ";https://scholar.google.pl/citations?user=bJMRBFoAAAAJ;0kp0MbgAAAAJ", "orcid": ";;0000-0003-0097-5521", "linkedin": "dominik-zimny-89a8271b7/;;spurek/", "or_profile": "~Dominik_Zimny1;~Tomasz_Trzcinski2;~Przemys\u0142aw_Spurek1", "aff": ";;Jagiellonian University Cracow", "aff_domain": ";;uj.edu.pl", "position": ";;Associate Professor", "bibtex": "@misc{\nzimny2023pointsnerf,\ntitle={Points2Ne{RF}: Generating Neural Radiance Fields from 3D point cloud},\nauthor={Dominik Zimny and Tomasz Trzcinski and Przemys{\\l}aw Spurek},\nyear={2023},\nurl={https://openreview.net/forum?id=Db8XXy9RCL}\n}", "github": "", "project": "", "reviewers": "QdR8;oS6r;QKTW;8g5d", "site": "https://openreview.net/forum?id=Db8XXy9RCL", "pdf_size": 3402202, "recommendation": "3;3;5;5", "confidence": "4;4;4;2", "correctness": "2;2;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "41;47;80;32", "wc_strength_and_weaknesses": "157;771;314;144", "wc_clarity_quality_novelty_and_reproducibility": "32;14;131;23", "wc_summary_review": "19;62;71;22", "wc_review": "249;894;596;221", "wc_reply_reviewers": "0;13;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 50.0, 18.12456896039186 ], "wc_strength_and_weaknesses_avg": [ 346.5, 254.05363606923638 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.0, 47.19639816765682 ], "wc_summary_review_avg": [ 43.5, 23.243278598338918 ], "wc_review_avg": [ 490.0, 276.0860373144575 ], "wc_reply_reviewers_avg": [ 3.25, 5.629165124598851 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9861482819645333512&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff_unique_index": "0", "aff_unique_norm": "Jagiellonian University", "aff_unique_dep": "", "aff_unique_url": "https://www.uj.edu.pl", "aff_unique_abbr": "UJ", "aff_campus_unique_index": "0", "aff_campus_unique": "Cracow", "aff_country_unique_index": "0", "aff_country_unique": "Poland" }, { "id": "DbLtChzghG", "title": "Event-former: A Self-supervised Learning Paradigm for Temporal Point Processes", "track": "main", "status": "Withdraw", "tldr": "We propose a new paradigm for self-supervised learning for multivariate temporal point processes. Our approach demonstrates performance boost of as high as up to 16% compared to state-of-the art models for next event prediction. ", "abstract": "Self-supervision is one of the hallmarks of representation learning in the increasingly popular suite of foundation models including large language models such as BERT and GPT-3, but it has not been pursued in the context of multivariate event streams, to the best of our knowledge. We introduce a new paradigm for self-supervised learning for temporal point processes using a transformer encoder. Specifically, we design a novel pre-training strategy for the encoder where we not only mask random event epochs but also insert randomly sampled \u2018void\u2019 epochs where an event does not occur; this differs from the typical discrete-time pretext tasks such as word-masking in BERT but expands the effectiveness of masking to better capture continuous-time dynamics. The pre-trained model can subsequently be fine-tuned on a potentially much smaller event dataset, similar to other foundation models. We demonstrate the effectiveness of our proposed paradigm on the next-event prediction task using synthetic datasets and 3 real applications, observing a relative performance boost of as high as up to 15% compared to state-of-the art models.", "keywords": "event sequences;self-supervised learning;point process;transformer", "primary_area": "", "supplementary_material": "", "author": "Xiao Shou;Dharmashankar Subramanian;Debarun Bhattacharjya;Tian Gao;Kristin Bennett", "authorids": "~Xiao_Shou2;~Dharmashankar_Subramanian1;~Debarun_Bhattacharjya1;~Tian_Gao1;~Kristin_Bennett1", "gender": "M;M;M;;F", "homepage": "https://www.ecs.baylor.edu/person/dr-xiao-shou;http://researcher.watson.ibm.com/researcher/view.php?person=us-dharmash;https://researcher.watson.ibm.com/researcher/view.php?person=us-debarunb;https://sites.google.com/view/tiangao/home;https://science.rpi.edu/mathematical-sciences/faculty/kristin-bennett", "dblp": ";;98/5604;;24/4209.html", "google_scholar": "https://scholar.google.com/citations?hl=en;j54RzcEAAAAJ;pwfVt-MAAAAJ;5rweipAAAAAJ;GX4ZXSkAAAAJ", "orcid": ";;;0000-0002-0337-6682;0000-0002-8782-105X", "linkedin": ";;;;kristin-bennett-b337637/", "or_profile": "~Xiao_Shou2;~Dharmashankar_Subramanian1;~Debarun_Bhattacharjya1;~Tian_Gao1;~Kristin_Bennett1", "aff": "Rensselaer Polytechnic Institute;International Business Machines;International Business Machines;Rensselaer Polytechnic Institute;Rensselaer Polytechnic Institute", "aff_domain": "rpi.edu;ibm.com;ibm.com;rpi.edu;rpi.edu", "position": "PhD student;Principal Researcher;Researcher;PhD student;Full Professor", "bibtex": "@misc{\nshou2023eventformer,\ntitle={Event-former: A Self-supervised Learning Paradigm for Temporal Point Processes},\nauthor={Xiao Shou and Dharmashankar Subramanian and Debarun Bhattacharjya and Tian Gao and Kristin Bennett},\nyear={2023},\nurl={https://openreview.net/forum?id=DbLtChzghG}\n}", "github": "", "project": "", "reviewers": "WD9T;BDPi;CbC5;UHSa", "site": "https://openreview.net/forum?id=DbLtChzghG", "pdf_size": 571981, "recommendation": "1;3;6;6", "confidence": "5;2;1;4", "correctness": "2;3;4;3", "technical_novelty": "1;2;4;3", "empirical_novelty": "2;2;4;3", "wc_summary_paper": "56;52;29;97", "wc_strength_and_weaknesses": "433;182;29;436", "wc_clarity_quality_novelty_and_reproducibility": "3;16;29;2", "wc_summary_review": "79;2;29;55", "wc_review": "571;252;116;590", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 2.1213203435596424 ], "confidence_avg": [ 3.0, 1.5811388300841898 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 1.118033988749895 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 58.5, 24.5 ], "wc_strength_and_weaknesses_avg": [ 270.0, 173.16899260548928 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 12.5, 11.01135777277262 ], "wc_summary_review_avg": [ 41.25, 28.74347752099596 ], "wc_review_avg": [ 382.25, 204.10827396262013 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.521749194749951, "corr_recommendation_correctness": 0.8333333333333334, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QqRrXtBbuhsJ:scholar.google.com/&scioq=Event-former:+A+Self-supervised+Learning+Paradigm+for+Temporal+Point+Processes&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Rensselaer Polytechnic Institute;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.rpi.edu;https://www.ibm.com", "aff_unique_abbr": "RPI;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Db_WALIfbdC", "title": "Bayesian Optimal Experimental Design for the Survey Bandit Setting", "track": "main", "status": "Reject", "tldr": "We develop a Bayesian optimal experimental design procedure for the survey bandit setting.", "abstract": "The contextual bandit is a classic problem in sequential decision\u00a0making under uncertainty that finds broad application to tasks in precision medicine, personalized education, and drug discovery. Here, a decision maker repeatedly receives a context, takes an action, and then observes an associated outcome, with the goal of choosing actions that achieve a minimal regret. However, in many settings, the context is not given, and the decision maker must instead collect some information to infer a context before proceeding. For example, when a doctor does not have prior information about a patient, they might ask a sequence of questions before recommending a medical treatment. In this paper, we aim to develop methods for this setting\u2014which we refer to as the \\emph{survey bandit}\u2014where the decision maker is not given access to the context but can ask a finite sequence of questions to gain information about the context before taking an action and observing an outcome. Using insights from Bayesian optimal experimental design (BOED) and decision-theoretic information theory, we view the interaction with each user as a BOED task, where the goal is to ask a sequence of questions that elicit the most information about the optimal action for this user. Our procedure is agnostic to the choice of probabilistic model, and we demonstrate its usefulness in a few common classes of distributions. Our algorithm achieves significantly better performance on\u00a0both synthetic and real data relative to existing baseline methods while remaining statistically efficient, interpretable, and computationally friendly.", "keywords": "Bayesian optimal experimental design;contextual bandit;survey", "primary_area": "", "supplementary_material": "", "author": "Sang T. Truong;Willie Neiswanger;Susan Athey", "authorids": "~Sang_T._Truong1;~Willie_Neiswanger2;~Susan_Athey1", "gender": "M;M;F", "homepage": "https://cs.stanford.edu/~sttruong;https://willieneis.github.io/;https://athey.people.stanford.edu/", "dblp": "301/9134;120/7593.html;59/6032", "google_scholar": "oXPm0dAAAAAJ;QwKHApEAAAAJ;UdaJi94AAAAJ", "orcid": ";;0000-0001-6934-562X", "linkedin": "sangttruong/;;", "or_profile": "~Sang_T._Truong1;~Willie_Neiswanger2;~Susan_Athey1", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@misc{\ntruong2023bayesian,\ntitle={Bayesian Optimal Experimental Design for the Survey Bandit Setting},\nauthor={Sang T. Truong and Willie Neiswanger and Susan Athey},\nyear={2023},\nurl={https://openreview.net/forum?id=Db_WALIfbdC}\n}", "github": "", "project": "", "reviewers": "XEp2;DagW;sGDT;5EZr", "site": "https://openreview.net/forum?id=Db_WALIfbdC", "pdf_size": 3990570, "recommendation": "3;3;3;6", "confidence": "3;4;4;3", "correctness": "2;2;3;4", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "130;76;135;86", "wc_strength_and_weaknesses": "175;243;356;204", "wc_clarity_quality_novelty_and_reproducibility": "295;64;70;71", "wc_summary_review": "61;19;22;48", "wc_review": "661;402;583;409", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 106.75, 26.05163142684158 ], "wc_strength_and_weaknesses_avg": [ 244.5, 68.7477272351603 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 125.0, 98.18604788868936 ], "wc_summary_review_avg": [ 37.5, 17.64227876437735 ], "wc_review_avg": [ 513.75, 111.7348983084515 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.8703882797784892, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:L9lekL_40CQJ:scholar.google.com/&scioq=Bayesian+Optimal+Experimental+Design+for+the+Survey+Bandit+Setting&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Transformers Learn Shortcuts to Automata", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11118", "id": "De4FYqjFueZ", "poster": "/media/PosterPDFs/ICLR%202023/11118.png?t=1683139230.52861", "openreview": "https://openreview.net/forum?id=De4FYqjFueZ", "slides": "https://iclr.cc/virtual/2023/poster/11118", "video": "https://iclr.cc/virtual/2023/poster/11118", "author_site": "Bingbin Liu, Jordan Ash, Surbhi Goel, Akshay Krishnamurthy, Cyril Zhang", "tldr": "Shallow, non-recurrent Transformers can simulate the recurrent dynamics of finite-state automata, via counterintuitive shortcuts.", "abstract": "Algorithmic reasoning requires capabilities which are most naturally understood through recurrent models of computation, like the Turing machine. However, Transformer models, while lacking recurrence, are able to perform such reasoning using far fewer layers than the number of reasoning steps. This raises the question: what solutions are these shallow and non-recurrent models finding? We investigate this question in the setting of learning automata, discrete dynamical systems naturally suited to recurrent modeling and expressing algorithmic tasks. Our theoretical results completely characterize shortcut solutions, whereby a shallow Transformer with only $o(T)$ layers can exactly replicate the computation of an automaton on an input sequence of length $T$. By representing automata using the algebraic structure of their underlying transformation semigroups, we obtain $O(\\log T)$-depth simulators for all automata and $O(1)$-depth simulators for all automata whose associated groups are solvable. Empirically, we perform synthetic experiments by training Transformers to simulate a wide variety of automata, and show that shortcut solutions can be learned via standard training. We further investigate the brittleness of these solutions and propose potential mitigations.", "keywords": "Transformer;self-attention;group theory;semigroup theory;algebraic automata theory;shortcut learning;theory of deep learning", "primary_area": "", "supplementary_material": "/attachment/a6bb850bd7ce5ce37df76892ec4c3b91474f8d16.zip", "author": "Bingbin Liu;Jordan T. Ash;Surbhi Goel;Akshay Krishnamurthy;Cyril Zhang", "authorids": "~Bingbin_Liu1;~Jordan_T._Ash1;~Surbhi_Goel1;~Akshay_Krishnamurthy1;~Cyril_Zhang1", "gender": "F;;F;M;", "homepage": "https://clarabing.github.io/;http://www.jordantash.com;https://www.surbhigoel.com;https://www.cics.umass.edu/~akshay/;https://cyrilzhang.com", "dblp": "222/1554;176/5225;190/7815;85/8024;203/4448", "google_scholar": "2ud06rQAAAAJ;bmRNH-UAAAAJ;https://scholar.google.co.in/citations?user=Zqz4CQoAAAAJ;https://scholar.google.com.tw/citations?user=K0kaNvkAAAAJ;sXtjq8IAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Bingbin_Liu1;~Jordan_T._Ash1;~Surbhi_Goel1;~Akshay_Krishnamurthy1;~Cyril_Zhang1", "aff": "Carnegie Mellon University;Microsoft Research;University of Pennsylvania;Microsoft Research;Microsoft", "aff_domain": "cmu.edu;research.microsoft.com;upenn.edu;research.microsoft.com;microsoft.com", "position": "PhD student;Postdoc;Assistant Professor;Principal Researcher;Senior Researcher", "bibtex": "@inproceedings{\nliu2023transformers,\ntitle={Transformers Learn Shortcuts to Automata},\nauthor={Bingbin Liu and Jordan T. Ash and Surbhi Goel and Akshay Krishnamurthy and Cyril Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=De4FYqjFueZ}\n}", "github": "", "project": "", "reviewers": "zU3W;Czkt;cJyA", "pdf_size": 11111468, "recommendation": "6;8;10", "confidence": "3;4;3", "correctness": "3;4;4", "technical_novelty": "3;4;4", "empirical_novelty": "2;3;4", "wc_summary_paper": "56;144;88", "wc_strength_and_weaknesses": "66;298;71", "wc_clarity_quality_novelty_and_reproducibility": "236;92;518", "wc_summary_review": "43;160;11", "wc_review": "401;694;688", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "766;127;145", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 8.0, 1.632993161855452 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 96.0, 36.368484525295614 ], "wc_strength_and_weaknesses_avg": [ 145.0, 108.20659252867482 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 282.0, 176.92936443677178 ], "wc_summary_review_avg": [ 71.33333333333333, 64.04338807055382 ], "wc_review_avg": [ 594.3333333333334, 136.72925396157507 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 346.0, 297.07574791625115 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8660254037844385, "gs_citation": 213, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8457247585808812316&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=De4FYqjFueZ", "email": "cmu.edu;research.microsoft.com;upenn.edu;research.microsoft.com;microsoft.com", "author_num": 5, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Carnegie Mellon University;Microsoft;University of Pennsylvania", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "https://www.cmu.edu;https://www.microsoft.com/en-us/research;https://www.upenn.edu", "aff_unique_abbr": "CMU;MSR;UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Emergent World Representations: Exploring a Sequence Model Trained on a Synthetic Task", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11827", "id": "DeG07_TcZvT", "poster": "", "openreview": "https://openreview.net/forum?id=DeG07_TcZvT", "slides": "https://iclr.cc/virtual/2023/poster/11827", "video": "https://iclr.cc/virtual/2023/poster/11827", "author_site": "Kenneth Li, Aspen Hopkins, David Bau, Fernanda Vi\u00e9gas, Hanspeter Pfister, Martin Wattenberg", "tldr": "", "abstract": "Language models show a surprising range of capabilities, but the source of their apparent competence is unclear. Do these networks just memorize a collection of surface statistics, or do they rely on internal representations of the process that generates the sequences they see? We investigate this question by applying a variant of the GPT model to the task of predicting legal moves in a simple board game, Othello. Although the network has no a priori knowledge of the game or its rules, we uncover evidence of an emergent nonlinear internal representation of the board state. Interventional experiments indicate this representation can be used to control the output of the network and create \"latent saliency maps\" that can help explain predictions in human terms.", "keywords": "world representation;GPT", "primary_area": "", "supplementary_material": "", "author": "Kenneth Li;Aspen K Hopkins;David Bau;Fernanda Vi\u00e9gas;Hanspeter Pfister;Martin Wattenberg", "authorids": "~Kenneth_Li1;~Aspen_K_Hopkins1;~David_Bau1;~Fernanda_Vi\u00e9gas1;~Hanspeter_Pfister1;~Martin_Wattenberg1", "gender": ";;M;;M;M", "homepage": "https://likenneth.github.io/;;https://baulab.info/;;https://vcg.seas.harvard.edu;http://www.bewitched.com", "dblp": "75/6627-12;;47/3614;;p/HanspeterPfister;w/MartinWattenberg", "google_scholar": "v0GItgwAAAAJ;;CYI6cKgAAAAJ;;tvBEoaMAAAAJ;pv54dqMAAAAJ", "orcid": ";;0000-0003-1744-6765;;0000-0002-3620-2582;", "linkedin": ";aspen-hopkins-9ab281107;david-bau-4b8130/;;hpfister/;", "or_profile": "~Kenneth_Li1;~Aspen_K_Hopkins1;~David_Bau1;~Fernanda_Vi\u00e9gas1;~Hanspeter_Pfister1;~Martin_Wattenberg1", "aff": "Harvard University;Massachusetts Institute of Technology;Northeastern University;;Harvard University;Google", "aff_domain": "harvard.edu;mit.edu;northeastern.edu;;harvard.edu;google.com", "position": "PhD student;PhD student;Assistant Professor;;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nli2023emergent,\ntitle={Emergent World Representations: Exploring a Sequence Model Trained on a Synthetic Task},\nauthor={Kenneth Li and Aspen K Hopkins and David Bau and Fernanda Vi{\\'e}gas and Hanspeter Pfister and Martin Wattenberg},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=DeG07_TcZvT}\n}", "github": "", "project": "", "reviewers": "uxFu;1z26;LWko;VKpz", "pdf_size": 2947091, "recommendation": "6;8;8;8", "confidence": "3;4;4;4", "correctness": "3;3;4;4", "technical_novelty": "2;2;3;4", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "161;241;61;63", "wc_strength_and_weaknesses": "493;293;74;113", "wc_clarity_quality_novelty_and_reproducibility": "181;395;989;3", "wc_summary_review": "88;44;134;39", "wc_review": "923;973;1258;218", "wc_reply_reviewers": "36;53;36;0", "wc_reply_authors": "681;818;300;130", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;1;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 131.5, 75.03832354204084 ], "wc_strength_and_weaknesses_avg": [ 243.25, 166.17817997559126 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 392.0, 371.5709891797259 ], "wc_summary_review_avg": [ 76.25, 38.408169703853375 ], "wc_review_avg": [ 843.0, 382.8021682279242 ], "wc_reply_reviewers_avg": [ 31.25, 19.330998422223306 ], "wc_reply_authors_avg": [ 482.25, 278.1747427427587 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 304, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10210654007916766378&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=DeG07_TcZvT", "email": "harvard.edu;mit.edu;northeastern.edu;;harvard.edu;google.com", "author_num": 6, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Harvard University;Massachusetts Institute of Technology;Northeastern University;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://www.harvard.edu;https://web.mit.edu;https://www.northeastern.edu;https://www.google.com", "aff_unique_abbr": "Harvard;MIT;NEU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "DiKT4rrUD9n", "title": "Friends to Help: Saving Federated Learning from Client Dropout", "track": "main", "status": "Reject", "tldr": "This paper proposes an algorithm to address client dropout in Federated Learning that discovers the ``friendship'' among clients and uses the friend client's local update as a substitute for the dropout client. ", "abstract": "Federated learning (FL) is an outstanding distributed machine learning framework due to its benefits on data privacy and communication efficiency. Since full client participation in many cases is infeasible due to constrained resources, partial participation FL algorithms have been investigated that proactively select/sample a subset of clients, aiming to achieve learning performance close to the full participation case. This paper studies a passive partial client participation scenario that is much less well understood, where partial participation is a result of external events, namely client dropout, rather than a decision of the FL algorithm. We cast FL with client dropout as a special case of a larger class of FL problems where clients can submit substitute (possibly inaccurate) local model updates. Based on our convergence analysis, we develop a new algorithm FL-FDMS that discovers friends of clients (i.e., clients whose data distributions are similar) on-the-fly and uses friends' local updates as substitutes for the dropout clients, thereby reducing the substitution error and improving the convergence performance. A complexity reduction mechanism is also incorporated into FL-FDMS, making it both theoretically sound and practically useful. Experiments on MNIST and CIFAR-10 confirmed the superior performance of FL-FDMS in handling client dropout in FL.", "keywords": "Federated Learning;Client Dropout;Partial Participation", "primary_area": "", "supplementary_material": "/attachment/969a9e3b67fd755327d552f367ea37e4e7e764d8.zip", "author": "Heqiang Wang;Jie Xu", "authorids": "~Heqiang_Wang1;~Jie_Xu6", "gender": "M;", "homepage": ";https://jiexu.ece.ufl.edu", "dblp": ";37/5126-1", "google_scholar": "7EILG68AAAAJ;07kG-YsAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Heqiang_Wang1;~Jie_Xu6", "aff": "University of Miami;University of Miami", "aff_domain": "miami.edu;miami.edu", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nwang2023friends,\ntitle={Friends to Help: Saving Federated Learning from Client Dropout},\nauthor={Heqiang Wang and Jie Xu},\nyear={2023},\nurl={https://openreview.net/forum?id=DiKT4rrUD9n}\n}", "github": "", "project": "", "reviewers": "mQor;tupj;7guq;AVQE", "site": "https://openreview.net/forum?id=DiKT4rrUD9n", "pdf_size": 847793, "recommendation": "3;5;5;6", "confidence": "3;4;3;4", "correctness": "3;3;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "111;90;70;64", "wc_strength_and_weaknesses": "693;232;146;88", "wc_clarity_quality_novelty_and_reproducibility": "230;286;8;22", "wc_summary_review": "68;25;75;29", "wc_review": "1102;633;299;203", "wc_reply_reviewers": "333;364;0;0", "wc_reply_authors": "776;616;391;356", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 83.75, 18.444172521422587 ], "wc_strength_and_weaknesses_avg": [ 289.75, 238.3866344827243 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 136.5, 123.20206978780836 ], "wc_summary_review_avg": [ 49.25, 22.431841208425134 ], "wc_review_avg": [ 559.25, 351.6606140869347 ], "wc_reply_reviewers_avg": [ 174.25, 174.59435128319586 ], "wc_reply_authors_avg": [ 534.75, 171.33209710967762 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.6882472016116854, "corr_recommendation_correctness": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2832396752670808260&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Miami", "aff_unique_dep": "", "aff_unique_url": "https://www.miami.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Spectral Augmentation for Self-Supervised Learning on Graphs", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11112", "id": "DjzBCrMBJ_p", "poster": "", "openreview": "https://openreview.net/forum?id=DjzBCrMBJ_p", "slides": "https://iclr.cc/virtual/2023/poster/11112", "video": "https://iclr.cc/virtual/2023/poster/11112", "author_site": "Lu Lin, Jinghui Chen, Hongning Wang", "tldr": "We propose a novel spectral augmentation method which uses graph spectrum to capture structural properties and guide topology augmentations for graph self-supervised learning.", "abstract": "Graph contrastive learning (GCL), as an emerging self-supervised learning technique on graphs, aims to learn representations via instance discrimination. Its performance heavily relies on graph augmentation to reflect invariant patterns that are robust to small perturbations; yet it still remains unclear about what graph invariance GCL should capture. Recent studies mainly perform topology augmentations in a uniformly random manner in the spatial domain, ignoring its influence on the intrinsic structural properties embedded in the spectral domain. In this work, we aim to find a principled way for topology augmentations by exploring the invariance of graphs from the spectral perspective. We develop spectral augmentation which guides topology augmentations by maximizing the spectral change. Extensive experiments on both graph and node classification tasks demonstrate the effectiveness of our method in self-supervised representation learning. The proposed method also brings promising generalization capability in transfer learning, and is equipped with intriguing robustness property under adversarial attacks. Our study sheds light on a general principle for graph topology augmentation.", "keywords": "graph self-supervised learning;graph spectral theory;graph augmentation", "primary_area": "", "supplementary_material": "", "author": "Lu Lin;Jinghui Chen;Hongning Wang", "authorids": "~Lu_Lin2;~Jinghui_Chen1;~Hongning_Wang1", "gender": "F;M;M", "homepage": "https://louise-lulin.github.io;https://jinghuichen.github.io/;http://www.cs.virginia.edu/~hw5x/", "dblp": "86/2209-1;67/5633;05/6545", "google_scholar": "8N04pBgAAAAJ;mKia7Y4AAAAJ;qkdvKNoAAAAJ", "orcid": "0000-0002-2539-3352;;0000-0002-6524-9195", "linkedin": "lulin92/;;", "or_profile": "~Lu_Lin2;~Jinghui_Chen1;~Hongning_Wang1", "aff": "Pennsylvania State University;Pennsylvania State University;University of Virginia", "aff_domain": "psu.edu;psu.edu;virginia.edu", "position": "Assistant Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nlin2023spectral,\ntitle={Spectral Augmentation for Self-Supervised Learning on Graphs},\nauthor={Lu Lin and Jinghui Chen and Hongning Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=DjzBCrMBJ_p}\n}", "github": "", "project": "", "reviewers": "2d5T;fWm4;eFML;SvG5", "pdf_size": 1949342, "recommendation": "6;6;8;8", "confidence": "4;5;3;3", "correctness": "2;2;4;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "78;76;86;39", "wc_strength_and_weaknesses": "395;419;199;97", "wc_clarity_quality_novelty_and_reproducibility": "137;60;39;21", "wc_summary_review": "79;64;35;33", "wc_review": "689;619;359;190", "wc_reply_reviewers": "299;0;0;0", "wc_reply_authors": "1210;1353;634;237", "reply_reviewers": "1;0;0;0", "reply_authors": "2;3;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 69.75, 18.14352501582865 ], "wc_strength_and_weaknesses_avg": [ 277.5, 134.6950258918272 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.25, 44.21184795956849 ], "wc_summary_review_avg": [ 52.75, 19.49839737004044 ], "wc_review_avg": [ 464.25, 200.4686696219636 ], "wc_reply_reviewers_avg": [ 74.75, 129.4707978657736 ], "wc_reply_authors_avg": [ 858.5, 448.53790252329844 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9045340337332909, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 62, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10322402811874560871&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=DjzBCrMBJ_p", "email": "psu.edu;psu.edu;virginia.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Pennsylvania State University;University of Virginia", "aff_unique_dep": ";", "aff_unique_url": "https://www.psu.edu;https://www.virginia.edu", "aff_unique_abbr": "PSU;UVA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Batch Multivalid Conformal Prediction", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11605", "id": "Dk7QQp8jHEo", "poster": "", "openreview": "https://openreview.net/forum?id=Dk7QQp8jHEo", "slides": "https://iclr.cc/virtual/2023/poster/11605", "video": "https://iclr.cc/virtual/2023/poster/11605", "author_site": "Christopher Jung, Georgy Noarov, Ramya Ramalingam, Aaron Roth", "tldr": "We give algorithms for conformal prediction in the batch setting that have coverage guarantees even when conditioning on group membership for intersecting groups and on the threshold used to produce the prediction set.", "abstract": "We develop fast distribution-free conformal prediction algorithms for obtaining multivalid coverage on exchangeable data in the batch setting. Multivalid coverage guarantees are stronger than marginal coverage guarantees in two ways: (1) They hold even conditional on group membership---that is, the target coverage level $1-\\alpha$ holds conditionally on membership in each of an arbitrary (potentially intersecting) group in a finite collection $\\mathcal{G}$ of regions in the feature space. (2) They hold even conditional on the value of the threshold used to produce the prediction set on a given example. In fact multivalid coverage guarantees hold even when conditioning on group membership and threshold value simultaneously.\n\nWe give two algorithms: both take as input an arbitrary non-conformity score and an arbitrary collection of possibly intersecting groups $\\mathcal{G}$, and then can equip arbitrary black-box predictors with prediction sets. Our first algorithm is a direct extension of quantile regression, needs to solve only a single convex minimization problem, and produces an estimator which has group-conditional guarantees for each group in $\\mathcal{G}$. Our second algorithm is iterative, and gives the full guarantees of multivalid conformal prediction: prediction sets that are valid conditionally both on group membership and non-conformity threshold. We evaluate the performance of both of our algorithms in an extensive set of experiments. ", "keywords": "Conformal prediction;multicalibration;uncertainty quantification", "primary_area": "", "supplementary_material": "/attachment/c289e962f10a539b9959987e6a7c63fdefc72100.zip", "author": "Christopher Jung;Georgy Noarov;Ramya Ramalingam;Aaron Roth", "authorids": "~Christopher_Jung1;~Georgy_Noarov1;~Ramya_Ramalingam1;~Aaron_Roth1", "gender": ";;F;M", "homepage": ";;;http://www.cis.upenn.edu/~aaroth/", "dblp": "08/8676-1;229/4286;;80/3311", "google_scholar": "UrsQh_0AAAAJ;P0-hDecAAAAJ;;https://scholar.google.com.tw/citations?user=kLUQrrYAAAAJ", "orcid": ";;;", "linkedin": ";;ramya-ramalingam-4483a418b/;", "or_profile": "~Christopher_Jung1;~Georgy_Noarov1;~Ramya_Ramalingam1;~Aaron_Roth1", "aff": "Stanford University;School of Engineering and Applied Science, University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": "stanford.edu;seas.upenn.edu;seas.upenn.edu;upenn.edu", "position": "Postdoc;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\njung2023batch,\ntitle={Batch Multivalid Conformal Prediction},\nauthor={Christopher Jung and Georgy Noarov and Ramya Ramalingam and Aaron Roth},\nbooktitle={International Conference on Learning Representations},\nyear={2023},\nurl={https://openreview.net/forum?id=Dk7QQp8jHEo}\n}", "github": "", "project": "", "reviewers": "3CEp;FUsZ;auqC;2Lnk", "pdf_size": 10044751, "recommendation": "6;6;8;8", "confidence": "2;3;3;3", "correctness": "4;3;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "111;74;71;112", "wc_strength_and_weaknesses": "174;278;267;557", "wc_clarity_quality_novelty_and_reproducibility": "28;8;2;73", "wc_summary_review": "222;3;20;22", "wc_review": "535;363;360;764", "wc_reply_reviewers": "103;51;0;47", "wc_reply_authors": "600;739;973;999", "reply_reviewers": "1;1;0;1", "reply_authors": "2;3;2;4", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 92.0, 19.53202498462461 ], "wc_strength_and_weaknesses_avg": [ 319.0, 143.22534691876294 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.75, 27.842189209902298 ], "wc_summary_review_avg": [ 66.75, 89.93713081925618 ], "wc_review_avg": [ 505.5, 165.20366218701085 ], "wc_reply_reviewers_avg": [ 50.25, 36.46488036453705 ], "wc_reply_authors_avg": [ 827.75, 165.95989726436926 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 63, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13098935030170451157&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=Dk7QQp8jHEo", "email": "stanford.edu;seas.upenn.edu;seas.upenn.edu;upenn.edu", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Stanford University;University of Pennsylvania", "aff_unique_dep": ";School of Engineering and Applied Science", "aff_unique_url": "https://www.stanford.edu;https://www.upenn.edu", "aff_unique_abbr": "Stanford;UPenn", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Dk7tsv9fkF", "title": "Correcting Data Distribution Mismatch in Offline Meta-Reinforcement Learning with Few-Shot Online Adaptation", "track": "main", "status": "Reject", "tldr": "This paper formalizes the data distribution mismatch between offline meta-training and online adaptation, and proposes a novel data correction algorithm for effective online adaptation.", "abstract": "Offline meta-reinforcement learning (offline meta-RL) extracts knowledge from a given dataset of multiple tasks and achieves fast adaptation to new tasks. Recent offline meta-RL methods typically use task-dependent behavior policies (e.g., training RL agents on each individual task) to collect a multi-task dataset and learn an offline meta-policy. However, these methods always require extra information for fast adaptation, such as offline context for testing tasks or oracle reward functions. Offline meta-RL with few-shot online adaptation remains an open problem. In this paper, we first formally characterize a unique challenge under this setting: data distribution mismatch between offline training and online adaptation. This distribution mismatch may lead to unreliable offline policy evaluation and the regular adaptation methods of online meta-RL will suffer. To address this challenge, we introduce a novel mechanism of data distribution correction, which ensures the consistency between offline and online evaluation by filtering out out-of-distribution episodes in online adaptation. As few-shot out-of-distribution episodes usually have lower returns, we propose a Greedy Context-based data distribution Correction approach, called GCC, which greedily infers how to solve new tasks. GCC diversely samples \u201ctask hypotheses\u201d from the current posterior belief and selects a greedy hypothesis with the highest return to update the task belief. Our method is the first to provide an effective online adaptation without additional information, and can be combined with off-the-shelf context-based offline meta-training algorithms. Empirical experiments show that GCC achieves state-of-the-art performance on the Meta-World ML1 benchmark compared to baselines with/without offline adaptation. ", "keywords": "offline meta reinforcement learning;offline reinforcement learning;meta-reinforcement learning;few-shot online adaptation;data distribution mismatch correction", "primary_area": "", "supplementary_material": "/attachment/db50772c5c3a557438a51dde9333f913284d6082.zip", "author": "Jianhao Wang;Jin Zhang;Haozhe Jiang;Junyu Zhang;Liwei Wang;Chongjie Zhang", "authorids": "~Jianhao_Wang1;~Jin_Zhang6;~Haozhe_Jiang1;~Junyu_Zhang3;~Liwei_Wang1;~Chongjie_Zhang1", "gender": "M;M;M;F;M;", "homepage": "http://group.iiis.tsinghua.edu.cn/~milab/;http://group.iiis.tsinghua.edu.cn/~milab/person-zhangjin.html;https://astro-eric.github.io;https://jyzhang1208.github.io/;http://www.liweiwang-pku.com/;", "dblp": "https://dblp.uni-trier.de/pid/239/5945;43/6657-16;303/4241;;;29/6693", "google_scholar": ";;HQCpSJMAAAAJ;n3Q40BAAAAAJ;VZHxoh8AAAAJ;LjxqXycAAAAJ", "orcid": ";;;;;", "linkedin": ";;haozhe-jiang-261b83212/;;;", "or_profile": "~Jianhao_Wang1;~Jin_Zhang6;~Haozhe_Jiang1;~Junyu_Zhang3;~Liwei_Wang1;~Chongjie_Zhang1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Huazhong University of Science and Technology;Peking University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;hust.edu.cn;pku.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;Undergrad student;Undergrad student;Full Professor;Assistant Professor", "bibtex": "@misc{\nwang2023correcting,\ntitle={Correcting Data Distribution Mismatch in Offline Meta-Reinforcement Learning with Few-Shot Online Adaptation},\nauthor={Jianhao Wang and Jin Zhang and Haozhe Jiang and Junyu Zhang and Liwei Wang and Chongjie Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=Dk7tsv9fkF}\n}", "github": "", "project": "", "reviewers": "HYwC;VSon;NxHR;7zMW", "site": "https://openreview.net/forum?id=Dk7tsv9fkF", "pdf_size": 2469069, "recommendation": "5;5;5;6", "confidence": "4;2;4;4", "correctness": "3;4;3;3", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "89;29;154;73", "wc_strength_and_weaknesses": "129;108;412;528", "wc_clarity_quality_novelty_and_reproducibility": "254;2;126;50", "wc_summary_review": "26;40;415;45", "wc_review": "498;179;1107;696", "wc_reply_reviewers": "0;0;0;318", "wc_reply_authors": "1296;325;3070;2585", "reply_reviewers": "0;0;0;1", "reply_authors": "4;2;6;6", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 86.25, 44.862985856940014 ], "wc_strength_and_weaknesses_avg": [ 294.25, 180.62443771538778 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 108.0, 95.18403227432636 ], "wc_summary_review_avg": [ 131.5, 163.8268903446562 ], "wc_review_avg": [ 620.0, 336.2699808189842 ], "wc_reply_reviewers_avg": [ 79.5, 137.69803920172575 ], "wc_reply_authors_avg": [ 1819.0, 1079.0414727896236 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.5, 1.6583123951777 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:A2nXO_Dn69oJ:scholar.google.com/&scioq=Correcting+Data+Distribution+Mismatch+in+Offline+Meta-Reinforcement+Learning+with+Few-Shot+Online+Adaptation&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "Tsinghua University;Huazhong University of Science and Technology;Peking University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.hust.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "THU;HUST;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Provably Auditing Ordinary Least Squares in Low Dimensions", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11389", "id": "DlpCotqdTy", "poster": "", "openreview": "https://openreview.net/forum?id=DlpCotqdTy", "slides": "https://iclr.cc/virtual/2023/poster/11389", "video": "https://iclr.cc/virtual/2023/poster/11389", "author_site": "Ankur Moitra, Dhruv Rohatgi", "tldr": "We develop provable and efficient algorithms for estimating stability of OLS to dropping samples in the low-dimensional regime.", "abstract": "Auditing the stability of a machine learning model to small changes in the training procedure is critical for engendering trust in practical applications. For example, a model should not be overly sensitive to removing a small fraction of its training data. However, algorithmically validating this property seems computationally challenging, even for the simplest of models: Ordinary Least Squares (OLS) linear regression. Concretely, recent work defines the stability of a regression as the minimum number of samples that need to be removed so that rerunning the analysis overturns the conclusion (Broderick et al., 2020), specifically meaning that the sign of a particular coefficient of the OLS regressor changes. But the only known approach for estimating this metric, besides the obvious exponential-time algorithm, is a greedy heuristic that may produce severe overestimates and therefore cannot certify stability. We show that stability can be efficiently certified in the low-dimensional regime: when the number of covariates is a constant but the number of samples is large, there are polynomial-time algorithms for estimating (a fractional version of) stability, with provable approximation guarantees. Applying our algorithms to the Boston Housing dataset, we exhibit regression analyses where our estimator outperforms the greedy heuristic, and can successfully certify stability even in the regime where a constant fraction of the samples are dropped.", "keywords": "stability;linear regression;ordinary least squares;robustness", "primary_area": "", "supplementary_material": "/attachment/9e215cd9423109f494397f5fcc5e32e4752726a9.zip", "author": "Ankur Moitra;Dhruv Rohatgi", "authorids": "~Ankur_Moitra1;~Dhruv_Rohatgi1", "gender": "M;M", "homepage": "http://people.csail.mit.edu/moitra/;http://www.mit.edu/~drohatgi/", "dblp": "04/952;223/4465", "google_scholar": "https://scholar.google.com.tw/citations?user=umFQktIAAAAJ;NUd_d6UAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Ankur_Moitra1;~Dhruv_Rohatgi1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu", "position": ";PhD student", "bibtex": "@inproceedings{\nmoitra2023provably,\ntitle={Provably Auditing Ordinary Least Squares in Low Dimensions},\nauthor={Ankur Moitra and Dhruv Rohatgi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=DlpCotqdTy}\n}", "github": "", "project": "", "reviewers": "LbgB;BXKi;hNxp;Hpqm", "pdf_size": 616375, "recommendation": "6;8;8;8", "confidence": "3;4;2;4", "correctness": "3;4;3;4", "technical_novelty": "3;4;3;3", "empirical_novelty": "2;4;3;3", "wc_summary_paper": "63;31;252;68", "wc_strength_and_weaknesses": "128;199;730;51", "wc_clarity_quality_novelty_and_reproducibility": "62;47;467;15", "wc_summary_review": "29;9;35;96", "wc_review": "282;286;1484;230", "wc_reply_reviewers": "0;31;0;0", "wc_reply_authors": "534;187;1650;38", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;4;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 103.5, 86.90368231553828 ], "wc_strength_and_weaknesses_avg": [ 277.0, 266.72551434011706 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 147.75, 185.09912884722067 ], "wc_summary_review_avg": [ 42.25, 32.491345001399985 ], "wc_review_avg": [ 570.5, 527.8719068107338 ], "wc_reply_reviewers_avg": [ 7.75, 13.423393758658799 ], "wc_reply_authors_avg": [ 602.25, 631.119788550478 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.17407765595569782, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=657766699318356820&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=DlpCotqdTy", "email": "mit.edu;mit.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "DmYnLaFGMoc", "title": "Deep Active Anomaly Detection With Diverse Queries", "track": "main", "status": "Reject", "tldr": "A new active learning approach for deep anomaly detection that leads to leads to systematic improvements over current approaches.", "abstract": "Selecting informative data points for expert feedback can significantly improve the performance of anomaly detection in various contexts, such as medical diagnostics or fraud detection. In this paper, we determine a set of conditions under which the ranking of anomaly scores generalizes from labeled queries to unlabeled data. Inspired by these conditions, we propose a new querying strategy for active anomaly detection that leads to systematic improvements over current approaches for this problem. It selects a diverse set of data points for labeling, achieving high data coverage with a limited budget. These labeled data points provide weak supervision to the unsupervised anomaly detection problem. However, correctly identifying anomalies requires an estimate of the fraction of anomalies in the data. We show how this anomaly rate can be estimated from the query set by importance-weighting, removing the associated bias due to the non-uniform sampling procedure. Extensive experiments on image, tabular, and video data sets show that our approach results in state-of-the-art active anomaly detection performance.", "keywords": "deep anomaly detection;active learning;diversified sampling", "primary_area": "", "supplementary_material": "/attachment/2653ddebfd9001e6d3297b68e5c2ed023244a0c4.zip", "author": "Aodong Li;Chen Qiu;Padhraic Smyth;Marius Kloft;Stephan Mandt;Maja Rudolph", "authorids": "~Aodong_Li1;~Chen_Qiu1;~Padhraic_Smyth1;~Marius_Kloft1;~Stephan_Mandt1;~Maja_Rudolph4", "gender": "M;M;M;M;F;M", "homepage": "https://aodongli.github.io;;https://www.ics.uci.edu/~smyth;http://ml.informatik.uni-kl.de/;http://maja-rita-rudolph.com/;https://www.stephanmandt.com", "dblp": "207/7672;;s/PadhraicSmyth;73/2217;164/5581;147/5018", "google_scholar": ";uX5Y9XUAAAAJ;OsoQ-dcAAAAJ;https://scholar.google.de/citations?user=l-BJCdAAAAAJ;https://scholar.google.com/citations?hl=en;HOrGe7wAAAAJ", "orcid": ";;0000-0001-9971-8378;;;", "linkedin": ";;;;;stephan-mandt-8702795a/", "or_profile": "~Aodong_Li1;~Chen_Qiu1;~Padhraic_Smyth1;~Marius_Kloft1;~Maja_Rudolph4;~Stephan_M_Mandt1", "aff": "University of California, Irvine;Robert Bosch GmbH, Germany;University of California, Irvine;RPTU Kaiserslautern-Landau;Bosch;University of California, Irvine", "aff_domain": "uci.edu;de.bosch.com;uci.edu;uni-kl.de;bosch.com;uci.edu", "position": "PhD student;PhD student;Full Professor;Professor;Researcher;Associate Professor", "bibtex": "@misc{\nli2023deep,\ntitle={Deep Active Anomaly Detection With Diverse Queries},\nauthor={Aodong Li and Chen Qiu and Padhraic Smyth and Marius Kloft and Stephan Mandt and Maja Rudolph},\nyear={2023},\nurl={https://openreview.net/forum?id=DmYnLaFGMoc}\n}", "github": "", "project": "", "reviewers": "uxaT;7TvR;i6D3", "site": "https://openreview.net/forum?id=DmYnLaFGMoc", "pdf_size": 5517449, "recommendation": "3;6;6", "confidence": "3;4;4", "correctness": "2;3;3", "technical_novelty": "1;3;3", "empirical_novelty": "2;3;2", "wc_summary_paper": "33;127;87", "wc_strength_and_weaknesses": "922;129;203", "wc_clarity_quality_novelty_and_reproducibility": "17;34;42", "wc_summary_review": "39;82;151", "wc_review": "1011;372;483", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1775;297;493", "reply_reviewers": "0;0;0", "reply_authors": "3;1;1", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 82.33333333333333, 38.5169515350781 ], "wc_strength_and_weaknesses_avg": [ 418.0, 357.6599875114166 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.0, 10.424330514074594 ], "wc_summary_review_avg": [ 90.66666666666667, 46.13265895460852 ], "wc_review_avg": [ 622.0, 278.77230852435827 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 855.0, 655.4408185844597 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FfjCyzduGg8J:scholar.google.com/&scioq=Deep+Active+Anomaly+Detection+With+Diverse+Queries&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;1;0", "aff_unique_norm": "University of California, Irvine;Robert Bosch GmbH;Rheinland-Pfalz Technical University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uci.edu;https://www.bosch.com;https://www.rptu.de", "aff_unique_abbr": "UCI;Bosch;RPTU", "aff_campus_unique_index": "0;0;2;0", "aff_campus_unique": "Irvine;;Kaiserslautern-Landau", "aff_country_unique_index": "0;1;0;1;1;0", "aff_country_unique": "United States;Germany" }, { "title": "Learning Sparse Group Models Through Boolean Relaxation", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11391", "id": "Do9MOlwWHu0", "poster": "", "openreview": "https://openreview.net/forum?id=Do9MOlwWHu0", "slides": "https://iclr.cc/virtual/2023/poster/11391", "video": "https://iclr.cc/virtual/2023/poster/11391", "author_site": "Yijie Wang, Yuan Zhou, Xiaoqing Huang, Kun Huang, Jie Zhang, Jianzhu Ma", "tldr": "", "abstract": "We introduce an efficient algorithmic framework for learning sparse group models formulated as the natural convex relaxation of a cardinality-constrained program with Boolean variables. We provide theoretical techniques to characterize the equivalent condition when the relaxation achieves the exact integral optimal solution, as well as a rounding algorithm to produce a feasible integral solution once the optimal relaxation solution is fractional. We demonstrate the power of our equivalent condition by applying it to two ensembles of random problem instances that are challenging and popularly used in literature and prove that our method achieves exactness with overwhelming probability and nearly optimal sample complexity. Empirically, we use synthetic datasets to demonstrate that our proposed method significantly outperforms the state-of-the-art group sparse learning models in terms of individual and group support recovery when the number of samples is small. Furthermore, we show the out-performance of our method in cancer drug response prediction.", "keywords": "Structured sparisity;Convex relaxation;Cardinality-constrained program;Small sample size", "primary_area": "", "supplementary_material": "", "author": "Yijie Wang;Yuan Zhou;Xiaoqing Huang;Kun Huang;Jie Zhang;Jianzhu Ma", "authorids": "~Yijie_Wang2;~Yuan_Zhou1;~Xiaoqing_Huang1;~Kun_Huang2;~Jie_Zhang20;~Jianzhu_Ma2", "gender": ";M;;M;;M", "homepage": "https://wyjhxq.github.io/;http://yuanz.web.illinois.edu;;https://medicine.iu.edu/faculty/38697/huang-kun;;https://majianzhu.com/", "dblp": "91/1726-4;40/7018;40/9039;10/4151-1.html;;24/9080.html", "google_scholar": "rloap-cAAAAJ;https://scholar.google.com.tw/citations?user=aR34e1gAAAAJ;https://scholar.google.com/citations?hl=en;CSzbLwUAAAAJ;;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Yijie_Wang2;~Yuan_Zhou1;~Xiaoqing_Huang1;~Kun_Huang2;~Jie_Zhang20;~Jianzhu_Ma2", "aff": "Indiana University, Bloomington;;;Indiana University;;Tsinghua University", "aff_domain": "iu.edu;;;iu.edu;;tsinghua.edu.cn", "position": "Assistant Professor;;;Full Professor;;Associate Professor", "bibtex": "@inproceedings{\nwang2023learning,\ntitle={Learning Sparse Group Models Through Boolean Relaxation},\nauthor={Yijie Wang and Yuan Zhou and Xiaoqing Huang and Kun Huang and Jie Zhang and Jianzhu Ma},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Do9MOlwWHu0}\n}", "github": "", "project": "", "reviewers": "fvhG;4Ewe;7cXz;4rdY", "pdf_size": 1008907, "recommendation": "6;8;8;8", "confidence": "3;2;3;3", "correctness": "3;3;3;4", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;3;3;2", "wc_summary_paper": "64;43;66;107", "wc_strength_and_weaknesses": "475;81;163;215", "wc_clarity_quality_novelty_and_reproducibility": "60;35;28;37", "wc_summary_review": "91;8;52;19", "wc_review": "690;167;309;378", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 70.0, 23.18404623873926 ], "wc_strength_and_weaknesses_avg": [ 233.5, 147.38639692997452 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.0, 12.020815280171307 ], "wc_summary_review_avg": [ 42.5, 32.34578797927173 ], "wc_review_avg": [ 386.0, 191.29166212880267 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ctOGkc2H0nMJ:scholar.google.com/&scioq=Learning+Sparse+Group+Models+Through+Boolean+Relaxation&hl=en&as_sdt=0,47", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=Do9MOlwWHu0", "email": "iu.edu;;;iu.edu;;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1", "aff_unique_norm": "Indiana University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.indiana.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "IU;THU", "aff_campus_unique_index": "0", "aff_campus_unique": "Bloomington;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;China" }, { "title": "A Unified Approach to Reinforcement Learning, Quantal Response Equilibria, and Two-Player Zero-Sum Games", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11487", "id": "DpE5UYUQzZH", "poster": "", "openreview": "https://openreview.net/forum?id=DpE5UYUQzZH", "slides": "https://iclr.cc/virtual/2023/poster/11487", "video": "https://iclr.cc/virtual/2023/poster/11487", "author_site": "Samuel Sokota, Ryan D'Orazio, Zico Kolter, Nicolas Loizou, Marc Lanctot, Ioannis Mitliagkas, Noam Brown, Christian Kroer", "tldr": "A single algorithm for both single-agent reinforcement learning and approximating quantal response and Nash equilibria in two-player zero-sum games.", "abstract": "This work studies an algorithm, which we call magnetic mirror descent, that is inspired by mirror descent and the non-Euclidean proximal gradient algorithm. Our contribution is demonstrating the virtues of magnetic mirror descent as both an equilibrium solver and as an approach to reinforcement learning in two-player zero-sum games. These virtues include: 1) Being the first quantal response equilibria solver to achieve linear convergence for extensive-form games with first order feedback; 2) Being the first standard reinforcement learning algorithm to achieve empirically competitive results with CFR in tabular settings; 3) Achieving favorable performance in 3x3 Dark Hex and Phantom Tic-Tac-Toe as a self-play deep reinforcement learning algorithm.", "keywords": "reinforcement learning;quantal response equilibria;two-player zero-sum games;mirror descent;variational inequalities;Nash equilibria;algorithmic game theory;proximal gradient", "primary_area": "", "supplementary_material": "", "author": "Samuel Sokota;Ryan D'Orazio;J Zico Kolter;Nicolas Loizou;Marc Lanctot;Ioannis Mitliagkas;Noam Brown;Christian Kroer", "authorids": "~Samuel_Sokota1;~Ryan_D'Orazio1;~J_Zico_Kolter1;~Nicolas_Loizou1;~Marc_Lanctot1;~Ioannis_Mitliagkas1;~Noam_Brown2;~Christian_Kroer1", "gender": "M;M;M;M;M;;M;M", "homepage": "https://ssokota.github.io/;https://ryan-dorazio.github.io/;https://nicolasloizou.github.io/;http://mlanctot.info;http://mitliagkas.github.io/;http://www.cs.cmu.edu/~noamb;http://www.columbia.edu/~ck2945/;http://www.zicokolter.com", "dblp": "243/5881;243/5789;173/4958;64/10094.html;83/8757;https://dblp.uni-trier.de/pers/hd/b/Brown:Noam;64/10660;67/2526", "google_scholar": ";vW8ZuWUAAAAJ;https://scholar.google.co.uk/citations?user=mvDmzAQAAAAJ;E_oZZj8AAAAJ;K757SxgAAAAJ;RLDbLcUAAAAJ;https://scholar.google.ch/citations?user=ckHwjPAAAAAJ;UXh1I6UAAAAJ", "orcid": ";;;;;;0000-0002-9009-8683;", "linkedin": "samuel-sokota-87a153149/;;;;;;;", "or_profile": "~Samuel_Sokota1;~Ryan_D'Orazio1;~Nicolas_Loizou1;~Marc_Lanctot1;~Ioannis_Mitliagkas1;~Noam_Brown2;~Christian_Kroer1;~Zico_Kolter1", "aff": "Carnegie Mellon University;University of Montreal, University of Montreal;Johns Hopkins University;Google DeepMind;Mila - Quebec AI Institute;Meta Facebook;Columbia University;Carnegie Mellon University", "aff_domain": "cmu.edu;iro.umontreal.ca;jhu.edu;deepmind.com;mila.quebec;facebook.com;columbia.edu;cmu.edu", "position": "PhD student;PhD student;Assistant Professor;Research Scientist;Principal Researcher;Research Scientist;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nsokota2023a,\ntitle={A Unified Approach to Reinforcement Learning, Quantal Response Equilibria, and Two-Player Zero-Sum Games},\nauthor={Samuel Sokota and Ryan D'Orazio and J Zico Kolter and Nicolas Loizou and Marc Lanctot and Ioannis Mitliagkas and Noam Brown and Christian Kroer},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=DpE5UYUQzZH}\n}", "github": "", "project": "", "reviewers": "BKWB;8q9G;UyUG;Mj5Y", "pdf_size": 4928895, "recommendation": "3;5;8;8", "confidence": "3;3;2;2", "correctness": "4;4;3;4", "technical_novelty": "1;2;3;4", "empirical_novelty": "1;2;3;3", "wc_summary_paper": "42;46;156;56", "wc_strength_and_weaknesses": "204;88;403;426", "wc_clarity_quality_novelty_and_reproducibility": "19;13;63;15", "wc_summary_review": "18;58;47;47", "wc_review": "283;205;669;544", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "665;574;787;490", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.0, 2.1213203435596424 ], "confidence_avg": [ 2.5, 0.5 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 1.118033988749895 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 75.0, 47.042533945356304 ], "wc_strength_and_weaknesses_avg": [ 280.25, 140.6100547613861 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.5, 20.60946384552495 ], "wc_summary_review_avg": [ 42.5, 14.84082207965583 ], "wc_review_avg": [ 425.25, 188.58734713654573 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 629.0, 110.23384235342611 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.9428090415820635, "corr_recommendation_correctness": -0.5443310539518174, "gs_citation": 70, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2083682077263566343&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=DpE5UYUQzZH", "email": "cmu.edu;iro.umontreal.ca;jhu.edu;deepmind.com;mila.quebec;facebook.com;columbia.edu;cmu.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;4;5;6;0", "aff_unique_norm": "Carnegie Mellon University;University of Montreal;Johns Hopkins University;Google;Quebec AI Institute;Meta;Columbia University", "aff_unique_dep": ";;;Google DeepMind;AI Institute;Meta Platforms, Inc.;", "aff_unique_url": "https://www.cmu.edu;https://www.umontreal.ca;https://www.jhu.edu;https://deepmind.com;https://mila.quebec;https://meta.com;https://www.columbia.edu", "aff_unique_abbr": "CMU;UM;JHU;DeepMind;Mila;Meta;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;1;0;0;0", "aff_country_unique": "United States;Canada;United Kingdom" }, { "title": "Composing Task Knowledge With Modular Successor Feature Approximators", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11042", "id": "DrtSx1z40Ib", "poster": "", "openreview": "https://openreview.net/forum?id=DrtSx1z40Ib", "slides": "https://iclr.cc/virtual/2023/poster/11042", "video": "https://iclr.cc/virtual/2023/poster/11042", "author_site": "Wilka Carvalho, Angelos Filos, Richard Lewis, Honglak Lee, Satinder Singh", "tldr": "A modular neural network for discovering, composing, and transferring predictive knowledge and behavior via Successor Features & Generalized Policy Improvement.", "abstract": "Recently, the Successor Features and Generalized Policy Improvement (SF&GPI) framework has been proposed as a method for learning, composing and transferring predictive knowledge and behavior. SF&GPI works by having an agent learn predictive representations (SFs) that can be combined for transfer to new tasks with GPI. However, to be effective this approach requires state features that are useful to predict, and these state-features are typically hand-designed. In this work, we present a novel neural network architecture, \u201cModular Successor Feature Approximators\u201d (MSFA), where modules both discover what is useful to predict, and learn their own predictive representations. We show that MSFA is able to better generalize compared to baseline architectures for learning SFs and a modular network that discovers factored state representations.\n\n", "keywords": "deep reinforcement learning;successor features;generalization;compositional generalization", "primary_area": "", "supplementary_material": "/attachment/07d623857ad73d4b6d855c92df8d3cf84bbc0d9e.zip", "author": "Wilka Torrico Carvalho;Angelos Filos;Richard Lewis;Honglak Lee;Satinder Singh", "authorids": "~Wilka_Torrico_Carvalho1;~Angelos_Filos1;~Richard_Lewis1;~Honglak_Lee2;~Satinder_Singh2", "gender": "M;M;M;M;", "homepage": "https://wcarvalho.github.io/;;;http://web.eecs.umich.edu/~honglak;", "dblp": "230/3919;https://dblp.uni-trier.de/pers/hd/f/Filos:Angelos;12/590;58/2562;", "google_scholar": "tvJTXwoAAAAJ;SGjYdrEAAAAJ;;fmSHtE8AAAAJ;", "orcid": ";;;;", "linkedin": "wilkacarvalho;;;;", "or_profile": "~Wilka_Torrico_Carvalho1;~Angelos_Filos1;~Richard_Lewis1;~Honglak_Lee1;~Satinder_Baveja2", "aff": "Google;Google DeepMind;University of Michigan - Ann Arbor;University of Michigan;Google DeepMind", "aff_domain": "google.com;deepmind.com;umich.edu;umich.edu;google.com", "position": "Research Scientist Intern;Researcher;Full Professor;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\ncarvalho2023composing,\ntitle={Composing Task Knowledge With Modular Successor Feature Approximators},\nauthor={Wilka Torrico Carvalho and Angelos Filos and Richard Lewis and Honglak Lee and Satinder Singh},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=DrtSx1z40Ib}\n}", "github": "", "project": "", "reviewers": "kJTC;H7je;xfgM;GLtt", "pdf_size": 6694198, "recommendation": "3;6;6;8", "confidence": "4;4;4;4", "correctness": "1;4;4;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "95;46;65;77", "wc_strength_and_weaknesses": "345;97;100;31", "wc_clarity_quality_novelty_and_reproducibility": "87;34;50;17", "wc_summary_review": "67;2;7;135", "wc_review": "594;179;222;260", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1225;89;468;177", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 1.224744871391589 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 70.75, 17.83781096435322 ], "wc_strength_and_weaknesses_avg": [ 143.25, 119.70040726747759 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.0, 25.874698065871222 ], "wc_summary_review_avg": [ 52.75, 53.93688441131913 ], "wc_review_avg": [ 313.75, 164.3203806592475 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 489.75, 447.0678779559095 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.6859943405700353, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3145485811145726835&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=DrtSx1z40Ib", "email": "google.com;deepmind.com;umich.edu;umich.edu;google.com", "author_num": 5, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "Google;University of Michigan", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.umich.edu", "aff_unique_abbr": "Google;UM", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Mountain View;;Ann Arbor", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "DswOSXvLfuy", "title": "MLM with Global Co-occurrence", "track": "main", "status": "Withdraw", "tldr": "We present MLM-GC (Masked Language Modeling with Global Co-occurrence) for multilingual tasks.", "abstract": "When pre-training models with the objective of MLM (masked language modeling) on multilingual corpora, the model learns to refine different language spaces to overlap each other for forming isomorphic spaces by understanding structural similarities from local bidirectional information. Global co-occurrence information is the primary source of information available to all methods, which potentially gives additional structural similarities to the model. In this work, we push MLM pre-training further to leverage global co-occurrence information. The result is MLM-GC (MLM with Global Co-occurrence) pre-training that the model learns local bidirectional information from masking and global co-occurrence information from a log-bilinear regression. In our experiments, MLM-GC pre-training substantially outperforms MLM pre-training for 4 downstream multilingual/cross-lingual tasks and 1 additional monolingual task, showing the advantages of capturing embedding analogies.", "keywords": "MLM pre-training;Multilingual model;Machine Learning for NLP;Language Modeling", "primary_area": "", "supplementary_material": "/attachment/9a0f67cc6422fdec104b1b3d9a0a2e1a58889e0f.zip", "author": "Xi Ai;Bin Fang", "authorids": "~Xi_Ai1;~Bin_Fang3", "gender": "M;M", "homepage": "http://www.cs.cqu.edu.cn/info/1274/3792.htm;https://baridxiai.github.io", "dblp": "94/4033-1;294/0314", "google_scholar": ";", "orcid": "0000-0003-1955-6626;0000-0002-4241-3837", "linkedin": ";\u60dc-\u827e-960134114/", "or_profile": "~Bin_Fang3;~Ai_Xi1", "aff": "Chongqing University;Chongqing University", "aff_domain": "cs.cqu.edu.cn;cqu.edu.cn", "position": "Full Professor;PhD student", "bibtex": "@misc{\nai2023mlm,\ntitle={{MLM} with Global Co-occurrence},\nauthor={Xi Ai and Bin Fang},\nyear={2023},\nurl={https://openreview.net/forum?id=DswOSXvLfuy}\n}", "github": "", "project": "", "reviewers": "WTdD;yHFD;g3At;j8eq", "site": "https://openreview.net/forum?id=DswOSXvLfuy", "pdf_size": 969818, "recommendation": "3;3;5;5", "confidence": "4;3;3;4", "correctness": "2;2;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "144;106;56;68", "wc_strength_and_weaknesses": "689;277;195;238", "wc_clarity_quality_novelty_and_reproducibility": "128;69;34;159", "wc_summary_review": "83;86;47;46", "wc_review": "1044;538;332;511", "wc_reply_reviewers": "0;0;86;0", "wc_reply_authors": "728;736;1104;737", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 93.5, 34.50724561595724 ], "wc_strength_and_weaknesses_avg": [ 349.75, 198.00173610349987 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 97.5, 48.87995499179597 ], "wc_summary_review_avg": [ 65.5, 19.03286631067428 ], "wc_review_avg": [ 606.25, 264.84370390855054 ], "wc_reply_reviewers_avg": [ 21.5, 37.239092362730865 ], "wc_reply_authors_avg": [ 826.25, 160.39696848756213 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5NJBOMtohCIJ:scholar.google.com/&scioq=MLM+with+Global+Co-occurrence&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Chongqing University", "aff_unique_dep": "", "aff_unique_url": "https://www.cqu.edu.cn", "aff_unique_abbr": "CQU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "DvMDIEFtyjV", "title": "CLUTR: Curriculum Learning via Unsupervised Task Representation Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Reinforcement Learning (RL) algorithms are often known for sample inefficiency and difficult generalization. Recently, Unsupervised Environment Design (UED) emerged as a new paradigm for zero-shot generalization by simultaneously learning a task distribution and agent policies on the sampled tasks. This is a non-stationary process where the task distribution evolves along with agent policies; creating an instability over time. While past works demonstrated the potential of such approaches, sampling effectively from the task space remains an open challenge, bottlenecking these approaches. To this end, we introduce CLUTR: a novel curriculum learning algorithm that decouples task representation and curriculum learning into a two-stage optimization. It first trains a recurrent variational autoencoder on randomly generated tasks to learn a latent task manifold. Next, a teacher agent creates a curriculum by optimizing a minimax REGRET-based objective on a set of latent tasks sampled from this manifold. By keeping the task manifold fixed, we show that CLUTR successfully overcomes the non-stationarity problem and improves stability. Our experimental results show CLUTR outperforms PAIRED, a principled and popular UED method, in terms of generalization and sample efficiency in the challenging CarRacing and navigation environments: showing an 18x improvement on the F1 CarRacing benchmark. CLUTR also performs comparably to the non-UED state-of-the-art for CarRacing, outperforming it in nine of the 20 tracks. CLUTR also achieves a 33% higher solved rate than PAIRED on a set of 18 out-of-distribution navigation tasks.", "keywords": "reinforcement learning;curriculum learning", "primary_area": "", "supplementary_material": "", "author": "Abdus Salam Azad;Izzeddin Gur;Aleksandra Faust;Pieter Abbeel;Ion Stoica", "authorids": "~Abdus_Salam_Azad1;~Izzeddin_Gur1;~Aleksandra_Faust1;~Pieter_Abbeel2;~Ion_Stoica1", "gender": "M;;F;M;M", "homepage": "https://www.azadsalam.org/;;http://www.afaust.info;https://people.eecs.berkeley.edu/~pabbeel/;http://people.eecs.berkeley.edu/~istoica/", "dblp": ";188/9027;135/8420;;s/IonStoica", "google_scholar": "3h3FvhwAAAAJ;qS_ugJAAAAAJ;RK72t68AAAAJ;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;vN-is70AAAAJ", "orcid": ";;0000-0002-3268-8685;;", "linkedin": ";;aleksandrafaust;;ionstoica", "or_profile": "~Abdus_Salam_Azad1;~Izzeddin_Gur1;~Aleksandra_Faust1;~Pieter_Abbeel2;~Ion_Stoica1", "aff": "University of California, Berkeley;Google;Google Brain;Covariant;University of California, Berkeley", "aff_domain": "berkeley.edu;google.com;google.com;covariant.ai;berkeley.edu", "position": "PhD student;Research Scientist;Principal Researcher;Founder;Full Professor", "bibtex": "@misc{\nazad2023clutr,\ntitle={{CLUTR}: Curriculum Learning via Unsupervised Task Representation Learning},\nauthor={Abdus Salam Azad and Izzeddin Gur and Aleksandra Faust and Pieter Abbeel and Ion Stoica},\nyear={2023},\nurl={https://openreview.net/forum?id=DvMDIEFtyjV}\n}", "github": "", "project": "", "reviewers": "NPYm;eq4e;Ui8Z", "site": "https://openreview.net/forum?id=DvMDIEFtyjV", "pdf_size": 2658059, "recommendation": "3;5;5", "confidence": "4;5;2", "correctness": "1;3;4", "technical_novelty": "3;2;2", "empirical_novelty": "0;2;3", "wc_summary_paper": "174;65;84", "wc_strength_and_weaknesses": "819;378;116", "wc_clarity_quality_novelty_and_reproducibility": "297;97;20", "wc_summary_review": "59;20;67", "wc_review": "1349;560;287", "wc_reply_reviewers": "973;0;0", "wc_reply_authors": "2324;1023;501", "reply_reviewers": "3;0;0", "reply_authors": "7;2;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "correctness_avg": [ 2.6666666666666665, 1.247219128924647 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 107.66666666666667, 47.541794478355804 ], "wc_strength_and_weaknesses_avg": [ 437.6666666666667, 290.08312984767355 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 138.0, 116.74188051709064 ], "wc_summary_review_avg": [ 48.666666666666664, 20.531818125912658 ], "wc_review_avg": [ 732.0, 450.29545856026573 ], "wc_reply_reviewers_avg": [ 324.3333333333333, 458.67659872967386 ], "wc_reply_authors_avg": [ 1282.6666666666667, 766.5517305150094 ], "reply_reviewers_avg": [ 1.0, 1.4142135623730951 ], "reply_authors_avg": [ 3.3333333333333335, 2.6246692913372702 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.18898223650461363, "corr_recommendation_correctness": 0.9449111825230679, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1439014644027390653&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;1;2;0", "aff_unique_norm": "University of California, Berkeley;Google;Covariant", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.berkeley.edu;https://www.google.com;", "aff_unique_abbr": "UC Berkeley;Google;", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Berkeley;Mountain View;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Searching Lottery Tickets in Graph Neural Networks: A Dual Perspective", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11637", "id": "Dvs-a3aymPe", "poster": "/media/PosterPDFs/ICLR%202023/11637.png?t=1680748731.3430552", "openreview": "https://openreview.net/forum?id=Dvs-a3aymPe", "slides": "https://iclr.cc/virtual/2023/poster/11637", "video": "https://iclr.cc/virtual/2023/poster/11637", "author_site": "Kun Wang, Yuxuan Liang, Pengkun Wang, Xu Wang, Pengfei Gu, Junfeng Fang, Yang Wang", "tldr": "This paper generalizes Dual Lottery Ticket Hypothesis (DLTH) to the graph to address information loss and aggregation failure issues caused by sampling-based GNN pruning algorithms", "abstract": "Graph Neural Networks (GNNs) have shown great promise in various graph learning tasks. However, the computational overheads of fitting GNNs to large-scale graphs grow rapidly, posing obstacles to GNNs from scaling up to real-world applications. To tackle this issue, Graph Lottery Ticket (GLT) hypothesis articulates that there always exists a sparse subnetwork/subgraph with admirable performance in GNNs with random initialization. Such a pair of core subgraph and sparse subnetwork (called graph lottery tickets) can be uncovered by iteratively applying a novel sparsification method. While GLT provides new insights for GNN compression, it requires a full pretraining process to obtain graph lottery tickets, which is not universal and friendly to real-world applications. Moreover, the graph sparsification in GLT utilizes sampling techniques, which may result in massive information loss and aggregation failure. In this paper, we explore the searching of graph lottery tickets from a complementary perspective -- transforming a random ticket into a graph lottery ticket, which allows us to more comprehensively explore the relationships between the original network/graph and their sparse counterpart. To achieve this, we propose regularization-based network pruning and hierarchical graph sparsification, leading to our Dual Graph Lottery Ticket (DGLT) framework for a joint sparsification of network and graph. Compared to GLT, our DGLT helps achieve a triple-win situation of graph lottery tickets with high sparsity, admirable performance, and good explainability. More importantly, we rigorously prove that our model can eliminate noise and maintain reliable information in substructures using the graph information bottleneck theory. Extensive experimental results on various graph-related tasks validate the effectiveness of our framework.", "keywords": "Lottery Tickets Hypothesis;Dual Lottery Tickets Hypothesis;Graph pooling;Graph information bottleneck", "primary_area": "", "supplementary_material": "/attachment/a18bcda0ce2bab9bbf457398b17246b24b0f452b.zip", "author": "Kun Wang;Yuxuan Liang;Pengkun Wang;Xu Wang;Pengfei Gu;Junfeng Fang;Yang Wang", "authorids": "~Kun_Wang15;~Yuxuan_Liang1;~Pengkun_Wang1;~Xu_Wang16;~Pengfei_Gu1;~Junfeng_Fang1;~Yang_Wang32", "gender": "M;M;M;M;;M;M", "homepage": "http://home.ustc.edu.cn/~wk520529/#home;https://yuxuanliang.com;http://home.ustc.edu.cn/~pengkun/index.html;http://home.ustc.edu.cn/~wx309/;https://github.com/gfly007;https://scholar.google.com/citations?user=beNNywsAAAAJ&hl=zh-CN;http://staff.ustc.edu.cn/~angyan/", "dblp": ";183/0977;;181/2815-29;;340/7929;", "google_scholar": "UnyqjWQAAAAJ;n9cODgcAAAAJ;https://scholar.google.com/citations?hl=zh-CN;7hYGPC8AAAAJ;;beNNywsAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0003-0602-169X;0000-0003-2817-7337;0000-0002-2680-4563;0000-0002-1492-3477;;;0000-0002-6079-7053", "linkedin": ";yoshall/;;;;;", "or_profile": "~Kun_Wang15;~Yuxuan_Liang1;~Pengkun_Wang1;~Xu_Wang16;~Pengfei_Gu1;~Junfeng_Fang1;~Yang_Wang32", "aff": "University of Science and Technology of China;The Hong Kong University of Science and Technology (Guangzhou);University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;hkust-gz.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn", "position": "PhD student;Assistant Professor;PhD student;PhD student;MS student;;Associate Professor", "bibtex": "@inproceedings{\nwang2023searching,\ntitle={Searching Lottery Tickets in Graph Neural Networks: A Dual Perspective},\nauthor={Kun Wang and Yuxuan Liang and Pengkun Wang and Xu Wang and Pengfei Gu and Junfeng Fang and Yang Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Dvs-a3aymPe}\n}", "github": "", "project": "", "reviewers": "yVH3;PXxs;xzm4;cfDA;rozM", "pdf_size": 8648362, "recommendation": "6;6;6;6;8", "confidence": "3;3;4;3;3", "correctness": "3;3;3;3;3", "technical_novelty": "2;3;2;3;4", "empirical_novelty": "3;3;3;3;4", "wc_summary_paper": "131;47;62;32;108", "wc_strength_and_weaknesses": "355;225;91;131;162", "wc_clarity_quality_novelty_and_reproducibility": "62;24;71;39;44", "wc_summary_review": "59;73;391;32;34", "wc_review": "607;369;615;234;348", "wc_reply_reviewers": "0;76;50;36;0", "wc_reply_authors": "764;800;1499;601;526", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;2;3;2;1", "recommendation_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.8, 0.7483314773547882 ], "empirical_novelty_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_paper_avg": [ 76.0, 37.47532521540007 ], "wc_strength_and_weaknesses_avg": [ 192.8, 92.17027720474752 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.0, 16.721243972862784 ], "wc_summary_review_avg": [ 117.8, 137.46766892618786 ], "wc_review_avg": [ 434.6, 151.20132274553683 ], "wc_reply_reviewers_avg": [ 32.4, 29.40476151918257 ], "wc_reply_authors_avg": [ 838.0, 345.6454831181799 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.25000000000000006, "corr_recommendation_correctness": 0.0, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3391574048015739106&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=Dvs-a3aymPe", "email": "ustc.edu.cn;hkust-gz.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn", "author_num": 7, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.ust.hk", "aff_unique_abbr": "USTC;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Guangzhou", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "DwOaHJJKy9", "title": "Using semantic distance for diverse and sample efficient genetic programming", "track": "main", "status": "Reject", "tldr": "We show the importance of diversity in semantic (phenotypic) space when mutating genetic programs, and apply it to learning ML components.", "abstract": "Evolutionary methods, such as genetic programming, search a space of programs to find those with good fitness, often using mutations that manipulate the syntactic structure of programs without being aware of how they affect the semantics. For applications where the semantics are highly sensitive to small syntactic mutations, or where fitness evaluation is expensive, this can make learning programs intractable.\n\nWe introduce a mutation operator that yields mutated programs that are semantically far from previously evaluated programs, while still being semantically close to their parent. For function regression, this leads to an algorithm that is one to two orders of magnitude more sample efficient than other gradient-free methods, such as genetic programming, or learning the weights of a neural network using evolutionary strategies.\n\nWe show how this method can be applied to learning architecture-specific and general purpose neural network optimizers, and to reinforcement learning loss functions. The learnt components are simple, interpretable, high performance, and contain novel features not seen before such as weight growth.", "keywords": "genetic programming;meta learning", "primary_area": "", "supplementary_material": "", "author": "David Saxton;Chrisantha Fernando", "authorids": "~David_Saxton1;~Chrisantha_Fernando1", "gender": ";M", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~David_Saxton1;~Chrisantha_Fernando1", "aff": ";", "aff_domain": "deepmind.com;", "position": "Research engineer;", "bibtex": "@misc{\nsaxton2023using,\ntitle={Using semantic distance for diverse and sample efficient genetic programming},\nauthor={David Saxton and Chrisantha Fernando},\nyear={2023},\nurl={https://openreview.net/forum?id=DwOaHJJKy9}\n}", "github": "", "project": "", "reviewers": "zufj;6oRb;kHGQ", "site": "https://openreview.net/forum?id=DwOaHJJKy9", "pdf_size": 4578321, "recommendation": "1;3;5", "confidence": "4;3;2", "correctness": "2;3;4", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "129;137;72", "wc_strength_and_weaknesses": "112;211;134", "wc_clarity_quality_novelty_and_reproducibility": "952;89;8", "wc_summary_review": "45;39;90", "wc_review": "1238;476;304", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "810;139;471", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 3.0, 1.632993161855452 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 112.66666666666667, 28.94055209486432 ], "wc_strength_and_weaknesses_avg": [ 152.33333333333334, 42.444735310230826 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 349.6666666666667, 427.19576568854495 ], "wc_summary_review_avg": [ 58.0, 22.759613353482084 ], "wc_review_avg": [ 672.6666666666666, 405.87135345519954 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 473.3333333333333, 273.9395716009078 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UkQHNsYpo90J:scholar.google.com/&scioq=Using+semantic+distance+for+diverse+and+sample+efficient+genetic+programming&hl=en&as_sdt=0,5", "gs_version_total": 0 }, { "id": "DxmF48B_49z", "title": "Random Weight Factorization improves the training of Continuous Neural Representations", "track": "main", "status": "Reject", "tldr": "A simple drop-in replacement of conventional dense layers for accelerating and improving the training of coordinate-based multi-layer perceptrons (MLPs).", "abstract": "Continuous neural representations have recently emerged as a powerful and flexible alternative to classical discretized representations of signals. However, training them to capture fine details in multi-scale signals is difficult and computationally expensive. Here we propose random weight factorization as a simple drop-in replacement for parameterizing and initializing conventional linear layers in coordinate-based multi-layer perceptrons (MLPs) that significantly accelerates and improves their training. We show how this factorization alters the underlying loss landscape and effectively enables each neuron in the network to learn using its own self-adaptive learning rate. This not only helps with mitigating spectral bias, but also allows networks to quickly recover from poor initializations and reach better local minima. We demonstrate how random weight factorization can be leveraged to improve the training of neural representations on a variety of tasks, including image regression, shape representation, computed tomography, inverse rendering, solving partial differential equations, and learning operators between function spaces.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/9a8236ef3d3178c88d5c9e3ecd186fe39b8c8279.zip", "author": "Sifan Wang;Hanwen Wang;Jacob H Seidman;Paris Perdikaris", "authorids": "~Sifan_Wang1;~Hanwen_Wang1;~Jacob_H_Seidman1;~Paris_Perdikaris1", "gender": "M;M;;M", "homepage": ";;;https://directory.seas.upenn.edu/paris-perdikaris/", "dblp": ";62/11316;234/2021;180/9141", "google_scholar": "cVbvfXsAAAAJ;4FlLPGoAAAAJ;w3QOLcMAAAAJ;h_zkt1oAAAAJ", "orcid": ";0000-0002-4990-6810;;0000-0002-2816-3229", "linkedin": ";;;paris-perdikaris-093068102/", "or_profile": "~Sifan_Wang1;~Hanwen_Wang1;~Jacob_H_Seidman1;~Paris_Perdikaris1", "aff": "University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": "upenn.edu;upenn.edu;upenn.edu;upenn.edu", "position": "PhD student;PhD student;Postdoc;Associate Professor", "bibtex": "@misc{\nwang2023random,\ntitle={Random Weight Factorization improves the training of Continuous Neural Representations},\nauthor={Sifan Wang and Hanwen Wang and Jacob H Seidman and Paris Perdikaris},\nyear={2023},\nurl={https://openreview.net/forum?id=DxmF48B_49z}\n}", "github": "", "project": "", "reviewers": "mJaA;C8fw;chW4;AGKQ", "site": "https://openreview.net/forum?id=DxmF48B_49z", "pdf_size": 9866072, "recommendation": "5;5;6;8", "confidence": "4;3;3;4", "correctness": "4;3;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "77;47;57;116", "wc_strength_and_weaknesses": "218;436;505;199", "wc_clarity_quality_novelty_and_reproducibility": "9;102;65;65", "wc_summary_review": "27;37;39;158", "wc_review": "331;622;666;538", "wc_reply_reviewers": "61;53;10;28", "wc_reply_authors": "1158;2476;1446;191", "reply_reviewers": "1;1;1;1", "reply_authors": "3;6;3;2", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 74.25, 26.413774815425377 ], "wc_strength_and_weaknesses_avg": [ 339.5, 133.42132513207923 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.25, 33.22179254645962 ], "wc_summary_review_avg": [ 65.25, 53.741859848725 ], "wc_review_avg": [ 539.25, 128.72718244411317 ], "wc_reply_reviewers_avg": [ 38.0, 20.23610634484806 ], "wc_reply_authors_avg": [ 1317.75, 814.4134008597845 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.40824829046386296, "corr_recommendation_correctness": 0.40824829046386296, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2502081645337049877&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "DyFvlCAj8j_", "title": "Highly Parallel Deep Ensemble Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In this paper, we propose a novel highly parallel deep ensemble learning, which leads to highly compact and parallel deep neural networks. The main idea is to \\textit{split data into spectral subsets; train subnetworks separately; and ensemble the output results in the inference stage}. The proposed method has parallel branches with each branch being an independent neural network trained using one spectral subset of the training data. It ensembles the outputs of the parallel branches to produce an overall network with substantially stronger generalization capability. It can also scale up the model to the large scale dataset with limited memory. The joint data/model parallel method is amiable for GPU implementation. Due to the reduced size of inputs, the proposed spectral tensor network exhibits an inherent network compression, which leads to the acceleration of training process. We evaluate the proposed spectral tensor networks on the MNIST, CIFAR-10 and ImageNet data sets, to highlight that they simultaneously achieve network compression, reduction in computation and parallel speedup. Specifically, on both ImageNet-1K and ImageNet-21K dataset, our proposed AlexNet-spectral, VGG-16-spectral, ResNet-34-spectral, CycleMLP-spectral and MobileVit-spectral networks achieve a comparable performance with the vanila ones, and enjoy up to $4 \\times$ compression ratio and $1.5 \\times$ speedups.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/0be5a6da5540db0cc38f77513fb8c0195d5b0704.zip", "author": "Xiao-Yang Liu;Zeliang Zhang;Xiaodong Wang", "authorids": "~Xiao-Yang_Liu1;~Zeliang_Zhang1;~Xiaodong_Wang1", "gender": "M;M;", "homepage": "http://www.tensorlet.org/publications/;https://github.com/ZhangAIPI;http://ee.columbia.edu/~wangx", "dblp": "125/9849;219/9383;", "google_scholar": "https://scholar.google.com/citations?hl=en;7nLfsSgAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Xiao-Yang_Liu1;~Zeliang_Zhang1;~Xiaodong_Wang1", "aff": "Columbia University;University of Rochester;Columbia University", "aff_domain": "columbia.edu;rochester.edu;ee.columbia.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@misc{\nliu2023highly,\ntitle={Highly Parallel Deep Ensemble Learning},\nauthor={Xiao-Yang Liu and Zeliang Zhang and Xiaodong Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=DyFvlCAj8j_}\n}", "github": "", "project": "", "reviewers": "JKsA;y6GM;ydb4", "site": "https://openreview.net/forum?id=DyFvlCAj8j_", "pdf_size": 1112899, "recommendation": "3;5;5", "confidence": "2;2;4", "correctness": "2;3;3", "technical_novelty": "2;3;2", "empirical_novelty": "1;2;2", "wc_summary_paper": "63;68;51", "wc_strength_and_weaknesses": "72;184;198", "wc_clarity_quality_novelty_and_reproducibility": "15;8;2", "wc_summary_review": "40;17;31", "wc_review": "190;277;282", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 60.666666666666664, 7.1336448530109 ], "wc_strength_and_weaknesses_avg": [ 151.33333333333334, 56.38754787677467 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 8.333333333333334, 5.312459150169743 ], "wc_summary_review_avg": [ 29.333333333333332, 9.46337971105226 ], "wc_review_avg": [ 249.66666666666666, 42.24005471376928 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Tjtb6u5ouYwJ:scholar.google.com/&scioq=Highly+Parallel+Deep+Ensemble+Learning&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;1;0", "aff_unique_norm": "Columbia University;University of Rochester", "aff_unique_dep": ";", "aff_unique_url": "https://www.columbia.edu;https://www.rochester.edu", "aff_unique_abbr": "Columbia;U of R", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Dyzhru5NO3u", "title": "On the Efficacy of Server-Aided Federated Learning against Partial Client Participation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Although federated learning (FL) has become a prevailing distributed learning framework in recent years due to its benefits in scalability/privacy, there remain many significant challenges in FL system design. Notably, most existing works in the current FL literature assume either full client or uniformly distributed client participation. Unfortunately, this idealistic assumption rarely hold in practice. It has been frequently observed that some clients may never participate in FL training (aka partial/incomplete participation) due to a meld of system heterogeneity factors. To mitigate impacts of partial client participation, an increasingly popular approach in practical FL systems is the sever-aided federated learning (SA-FL) framework, where one equips the server with an auxiliary dataset. However, despite the fact that SA-FL has been empirically shown to be effective in addressing the partial client participation problem, there remains a lack of theoretical understanding for SA-FL. Worse yet, even the ramifications of partial worker participation is not clearly understood in conventional FL so far. These theoretical gaps motivate us to rigorously investigate SA-FL. To this end, we first reveal that conventional FL is {\\em not} PAC-learnable under partial participation in the worst case, which advances our understanding of conventional FL. Then, we show that the PAC-learnability of FL with partial client participation can indeed be revived by SA-FL, which theoretically justifies the use of SA-FL for the first time. Lastly, to further make SA-FL communication-efficient, we propose the \\alg (\\ul{s}erver-\\ul{a}ided \\ul{f}ederated \\ul{a}ve\\ul{r}ag\\ul{i}ng) algorithm that enjoys convergence guarantee and the same level of communication efficiency and privacy as state-of-the-art FL.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/9b11e76755ed20d1a6f2d5c2081dc0770889abb2.zip", "author": "Haibo Yang;Peiwen Qiu;Prashant Khanduri;Jia Liu", "authorids": "~Haibo_Yang1;~Peiwen_Qiu1;~Prashant_Khanduri1;~Jia_Liu1", "gender": "M;F;M;M", "homepage": "https://haibo-yang-osu.github.io/homepage/;;https://sites.google.com/view/khanduri-prashant/home?authuser=0;https://kevinliu-osu.github.io/index.html", "dblp": "43/7829-1;287/6757;158/4888;", "google_scholar": "eyy22VoAAAAJ;LzaQe5sAAAAJ;;Ofx3dScAAAAJ", "orcid": "0000-0002-3245-2728;;;", "linkedin": ";peiwen-qiu/;prashant-khanduri-0497894b/;", "or_profile": "~Haibo_Yang1;~Peiwen_Qiu1;~Prashant_Khanduri1;~Jia_Liu1", "aff": "Ohio State University;Ohio State University, Columbus;Wayne State University;The Ohio State University", "aff_domain": "osu.edu;osu.edu;wayne.edu;osu.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nyang2023on,\ntitle={On the Efficacy of Server-Aided Federated Learning against Partial Client Participation},\nauthor={Haibo Yang and Peiwen Qiu and Prashant Khanduri and Jia Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=Dyzhru5NO3u}\n}", "github": "", "project": "", "reviewers": "dvBg;uhzS;m3ZD;fscs", "site": "https://openreview.net/forum?id=Dyzhru5NO3u", "pdf_size": 427911, "recommendation": "3;5;5;6", "confidence": "4;3;4;3", "correctness": "1;3;2;4", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "48;83;125;87", "wc_strength_and_weaknesses": "446;443;485;73", "wc_clarity_quality_novelty_and_reproducibility": "1;73;90;5", "wc_summary_review": "74;39;38;35", "wc_review": "569;638;738;200", "wc_reply_reviewers": "1329;0;0;0", "wc_reply_authors": "3292;1847;2132;31", "reply_reviewers": "9;0;0;0", "reply_authors": "6;3;4;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.5, 1.118033988749895 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 85.75, 27.270634389394026 ], "wc_strength_and_weaknesses_avg": [ 361.75, 167.53115381922254 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.25, 39.73270063813936 ], "wc_summary_review_avg": [ 46.5, 15.945218719101975 ], "wc_review_avg": [ 536.25, 203.21955491536733 ], "wc_reply_reviewers_avg": [ 332.25, 575.4738808147595 ], "wc_reply_authors_avg": [ 1825.5, 1168.8944563133148 ], "reply_reviewers_avg": [ 2.25, 3.897114317029974 ], "reply_authors_avg": [ 3.5, 1.8027756377319946 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.6882472016116854, "corr_recommendation_correctness": 0.9233805168766388, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1520942723054611587&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Ohio State University;Wayne State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.osu.edu;https://wayne.edu", "aff_unique_abbr": "OSU;WSU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Columbus", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "How Does Semi-supervised Learning with Pseudo-labelers Work? A Case Study", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10909", "id": "Dzmd-Cc8OI", "poster": "", "openreview": "https://openreview.net/forum?id=Dzmd-Cc8OI", "slides": "https://iclr.cc/virtual/2023/poster/10909", "video": "https://iclr.cc/virtual/2023/poster/10909", "author_site": "Yiwen Kou, Zixiang Chen, Yuan Cao, Quanquan Gu", "tldr": "", "abstract": "Semi-supervised learning is a popular machine learning paradigm that utilizes a large amount of unlabeled data as well as a small amount of labeled data to facilitate learning tasks. While semi-supervised learning has achieved great success in training neural networks, its theoretical understanding remains largely open. In this paper, we aim to theoretically understand a semi-supervised learning approach based on pre-training and linear probing. In particular, the semi-supervised learning approach we consider first trains a two-layer neural network based on the unlabeled data with the help of pseudo-labelers. Then it linearly probes the pre-trained network on a small amount of labeled data. We prove that, under a certain toy data generation model and two-layer convolutional neural network, the semisupervised learning approach can achieve nearly zero test loss, while a neural network directly trained by supervised learning on the same amount of labeled data can only achieve constant test loss. Through this case study, we demonstrate a separation between semi-supervised learning and supervised learning in terms of test loss provided the same amount of labeled data.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/9d515e0e6b9cc7577f52fbbeae622a23d43c2a5f.zip", "author": "Yiwen Kou;Zixiang Chen;Yuan Cao;Quanquan Gu", "authorids": "~Yiwen_Kou1;~Zixiang_Chen1;~Yuan_Cao1;~Quanquan_Gu1", "gender": "F;M;M;M", "homepage": "https://evankou.github.io/;https://sites.google.com/view/zxchen;https://yuancaohku.github.io/;http://web.cs.ucla.edu/~qgu/", "dblp": "323/9058;137/3624;;50/4597", "google_scholar": "https://scholar.google.com/citations?hl=en;6nrCHr0AAAAJ;-VGnHI4AAAAJ;GU9HgNAAAAAJ", "orcid": ";;;", "linkedin": "yiwen-kou-5a444916b/;;;", "or_profile": "~Yiwen_Kou1;~Zixiang_Chen1;~Yuan_Cao1;~Quanquan_Gu1", "aff": "University of California, Los Angeles; University of California, Los Angeles;University of Hong Kong;University of California, Los Angeles", "aff_domain": "ucla.edu;cs.ucla.edu;hku.hk;cs.ucla.edu", "position": "PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nkou2023how,\ntitle={How Does Semi-supervised Learning with Pseudo-labelers Work? A Case Study},\nauthor={Yiwen Kou and Zixiang Chen and Yuan Cao and Quanquan Gu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Dzmd-Cc8OI}\n}", "github": "", "project": "", "reviewers": "eLuC;r8ze;jwNu", "pdf_size": 947908, "recommendation": "5;6;8", "confidence": "3;3;4", "correctness": "2;3;4", "technical_novelty": "2;2;3", "empirical_novelty": "2;0;0", "wc_summary_paper": "73;97;99", "wc_strength_and_weaknesses": "754;466;361", "wc_clarity_quality_novelty_and_reproducibility": "76;51;43", "wc_summary_review": "109;45;41", "wc_review": "1012;659;544", "wc_reply_reviewers": "0;290;240", "wc_reply_authors": "1213;852;789", "reply_reviewers": "0;2;1", "reply_authors": "3;3;3", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 0.6666666666666666, 0.9428090415820634 ], "wc_summary_paper_avg": [ 89.66666666666667, 11.8133634311129 ], "wc_strength_and_weaknesses_avg": [ 527.0, 166.13849644197458 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.666666666666664, 14.055445761538678 ], "wc_summary_review_avg": [ 65.0, 31.15552385479446 ], "wc_review_avg": [ 738.3333333333334, 199.12530951360472 ], "wc_reply_reviewers_avg": [ 176.66666666666666, 126.57891697365017 ], "wc_reply_authors_avg": [ 951.3333333333334, 186.80530565865152 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.944911182523068, "corr_recommendation_correctness": 0.9819805060619659, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12099632725542731485&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=Dzmd-Cc8OI", "email": "ucla.edu;cs.ucla.edu;hku.hk;cs.ucla.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of California, Los Angeles;University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucla.edu;https://www.hku.hk", "aff_unique_abbr": "UCLA;HKU", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Los Angeles;Hong Kong SAR", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "UNIFIED-IO: A Unified Model for Vision, Language, and Multi-modal Tasks", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11770", "id": "E01k9048soZ", "poster": "/media/PosterPDFs/ICLR%202023/11770.png?t=1682672920.2135084", "openreview": "https://openreview.net/forum?id=E01k9048soZ", "slides": "https://iclr.cc/virtual/2023/poster/11770", "video": "https://iclr.cc/virtual/2023/poster/11770", "author_site": "Jiasen Lu, Christopher Clark, Rowan Zellers, Roozbeh Mottaghi, Aniruddha Kembhavi", "tldr": "", "abstract": "We propose Unified-IO, a model that performs a large variety of AI tasks spanning classical computer vision tasks, including pose estimation, object detection, depth estimation and image generation, vision-and-language tasks such as region captioning and referring expression, to natural language processing tasks such as question answering and paraphrasing. Developing a single unified model for such a large variety of tasks poses unique challenges due to the heterogeneous inputs and outputs pertaining to each task, including RGB images, per-pixel maps, binary masks, bounding boxes, and language. We achieve this unification by homogenizing every supported input and output into a sequence of discrete vocabulary tokens. This common representation across all tasks allows us to train a single transformer-based architecture, jointly on over 90 diverse datasets in the vision and language fields. Unified-IO is the first model capable of performing all 7 tasks on the GRIT benchmark and produces strong results across 16 diverse benchmarks like NYUv2-Depth, ImageNet, VQA2.0, OK-VQA, Swig, VizWizGround, BoolQ, and SciTail, with no task-specific fine-tuning. Code and pre-trained models will be made publicly available.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/f20b20659f2134138240091060a33b38914ef019.zip", "author": "Jiasen Lu;Christopher Clark;Rowan Zellers;Roozbeh Mottaghi;Aniruddha Kembhavi", "authorids": "~Jiasen_Lu2;~Christopher_Clark1;~Rowan_Zellers1;~Roozbeh_Mottaghi1;~Aniruddha_Kembhavi1", "gender": "M;M;;M;M", "homepage": "https://chrisc36.github.io;https://rowanzellers.com;http://roozbehm.info;https://anikem.github.io/;https://jiasenlu.github.io/", "dblp": ";182/2175;36/633;81/7583;144/6253", "google_scholar": "CmzeVaEAAAAJ;CFp3IakAAAAJ;CCV58dgAAAAJ;JnUevM0AAAAJ;zP9K32EAAAAJ", "orcid": ";;;;", "linkedin": ";;roozbeh-mottaghi-63397aa0;;", "or_profile": "~Christopher_Clark1;~Rowan_Zellers1;~Roozbeh_Mottaghi1;~Aniruddha_Kembhavi1;~jiasen_lu1", "aff": "Allen Institute for Artificial Intelligence;OpenAI;University of Washington;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence", "aff_domain": "allenai.org;openai.com;cs.washington.edu;allenai.org;allenai.org", "position": "Research Scientist;Researcher;Affiliate Professor ;Research Manager;Researcher", "bibtex": "@inproceedings{\nlu2023unifiedio,\ntitle={{UNIFIED}-{IO}: A Unified Model for Vision, Language, and Multi-modal Tasks},\nauthor={Jiasen Lu and Christopher Clark and Rowan Zellers and Roozbeh Mottaghi and Aniruddha Kembhavi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=E01k9048soZ}\n}", "github": "", "project": "", "reviewers": "6qFN;iS57;gX6J;csLY", "pdf_size": 9650160, "recommendation": "6;8;8;8", "confidence": "4;5;4;4", "correctness": "4;4;4;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "187;69;83;49", "wc_strength_and_weaknesses": "503;280;422;223", "wc_clarity_quality_novelty_and_reproducibility": "25;37;13;95", "wc_summary_review": "115;61;51;64", "wc_review": "830;447;569;431", "wc_reply_reviewers": "79;12;45;19", "wc_reply_authors": "1259;521;598;302", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 97.0, 53.34791467339656 ], "wc_strength_and_weaknesses_avg": [ 357.0, 111.15979489005906 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.5, 31.476181471074284 ], "wc_summary_review_avg": [ 72.75, 24.863376681376163 ], "wc_review_avg": [ 569.25, 159.72535021091673 ], "wc_reply_reviewers_avg": [ 38.75, 26.290445032368698 ], "wc_reply_authors_avg": [ 670.0, 356.9768900083029 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 446, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1828958259233079068&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=E01k9048soZ", "email": "allenai.org;openai.com;cs.washington.edu;allenai.org;allenai.org", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Allen Institute for Artificial Intelligence;OpenAI;University of Washington", "aff_unique_dep": ";;", "aff_unique_url": "https://allenai.org;https://openai.com;https://www.washington.edu", "aff_unique_abbr": "AI2;OpenAI;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "E08kaoSiQl0", "title": "Transcendental Idealism of Planner: Evaluating Perception from Planning Perspective for Autonomous Driving", "track": "main", "status": "Reject", "tldr": "The paper proposes a systematic and principled framework to evaluate the consequence of perception module error from the perspective of autonomous vehicle planning. ", "abstract": "Evaluating the performance of perception module in autonomous driving is one of the most critical tasks in developing these complex intelligent systems. While module-level unit test methodologies adopted from traditional computer vision tasks are viable to a certain extent, it still remains far less explored to evaluate how changes in a perception module can impact the planning of an autonomous vehicle in a consistent and holistic manner. In this work, we propose a principled framework that provides a coherent and systematic understanding of how perception modules affect the planning of an autonomous vehicle that actually controls the vehicle. Specifically, planning of an autonomous vehicle is formulated as an expected utility maximisation problem, where all input signals from upstream modules jointly provide a world state description, and the planner aims to find the optimal action to execute by finding the solution to maximise the expected utility determined by both the world state and the action. We show that, under some mild conditions, the objective function can be represented as an inner product between the world state description and the utility function in a Hilbert space. This geometric interpretation enables a novel way to formulate, analyse and evaluate the impact of noise in world state estimation on the solution to the problem, and leads to a universal quantitative metric for such purpose. The whole framework resembles the idea of transcendental idealism in the classical philosophy literature, which gives the name to our approach.", "keywords": "Autonomous Driving;Utility Maximisation;Hilbert Space;Planning;Perception", "primary_area": "", "supplementary_material": "", "author": "Weixin Li;Xiaodong Yang", "authorids": "~Weixin_Li1;~Xiaodong_Yang4", "gender": "Not Specified;M", "homepage": "http://www.svcl.ucsd.edu/~nicolas/;https://xiaodongyang.org", "dblp": "https://dblp.org/pers/hd/l/Li:Weixin;", "google_scholar": "99Cyd0YAAAAJ;yWsMg_gAAAAJ", "orcid": "0000-0002-0938-8107;", "linkedin": "weixin-li-13571814/;", "or_profile": "~Weixin_Li1;~Xiaodong_Yang4", "aff": "Meta Facebook;QCraft", "aff_domain": "facebook.com;qcraft.ai", "position": "Research Scientist;Researcher", "bibtex": "@misc{\nli2023transcendental,\ntitle={Transcendental Idealism of Planner: Evaluating Perception from Planning Perspective for Autonomous Driving},\nauthor={Weixin Li and Xiaodong Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=E08kaoSiQl0}\n}", "github": "", "project": "", "reviewers": "NfXU;dqEU;34kZ", "site": "https://openreview.net/forum?id=E08kaoSiQl0", "pdf_size": 4652319, "recommendation": "5;6;6", "confidence": "4;2;2", "correctness": "4;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "0;3;3", "wc_summary_paper": "85;128;119", "wc_strength_and_weaknesses": "169;70;153", "wc_clarity_quality_novelty_and_reproducibility": "46;21;115", "wc_summary_review": "17;34;27", "wc_review": "317;253;414", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 110.66666666666667, 18.517259216441534 ], "wc_strength_and_weaknesses_avg": [ 130.66666666666666, 43.39226761634945 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.666666666666664, 39.75200903378624 ], "wc_summary_review_avg": [ 26.0, 6.97614984548545 ], "wc_review_avg": [ 328.0, 66.18660488850193 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": -0.9999999999999997, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10158271349097978759&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1", "aff_unique_norm": "Meta;QCraft", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;", "aff_unique_abbr": "Meta;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "United States;" }, { "id": "E1_fqDe3YIC", "title": "Generative Gradual Domain Adaptation with Optimal Transport", "track": "main", "status": "Reject", "tldr": "", "abstract": "Unsupervised domain adaptation (UDA) adapts a model from a labeled source domain to an unlabeled target domain in a one-off way. Though widely applied, UDA faces a great challenge whenever the distribution shift between the source and the target is large. Gradual domain adaptation (GDA) mitigates this limitation by using intermediate domains to gradually adapt from the source to the target domain. However, it remains an open problem on how to leverage this paradigm when the oracle intermediate domains are missing or scarce. To approach this practical challenge, we propose Generative Gradual Domain Adaptation with Optimal Transport (GOAT), an algorithmic framework that can generate intermediate domains in a data-dependent way. More concretely, we generate intermediate domains along the Wasserstein geodesic between two given consecutive domains in a feature space, and apply gradual self-training, a standard GDA algorithm, to adapt the source-trained classifier to the target along the sequence of intermediate domains. Empirically, we demonstrate that our GOAT framework can improve the performance of standard GDA when the oracle intermediate domains are scarce, significantly broadening the real-world application scenarios of GDA.", "keywords": "Domain Adaptation;Gradual Domain Adaptation;Distribution Shift", "primary_area": "", "supplementary_material": "/attachment/497437f26024264ae92475fae3ca706617492e97.zip", "author": "Yifei He;Haoxiang Wang;Han Zhao", "authorids": "~Yifei_He1;~Haoxiang_Wang1;~Han_Zhao1", "gender": ";M;M", "homepage": "https://yifei-he.github.io/;https://haoxiang-wang.github.io/;https://hanzhaoml.github.io/", "dblp": ";;03/3520-2", "google_scholar": "https://scholar.google.com/citations?hl=en;bcInPlwAAAAJ;x942ipYAAAAJ", "orcid": ";;0000-0002-8579-1600", "linkedin": ";haoxiang-wang-071414ab/;", "or_profile": "~Yifei_He1;~Haoxiang_Wang1;~Han_Zhao1", "aff": "University of Illinois Urbana-Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;illinois.edu;illinois.edu", "position": "MS student;PhD student;Assistant Professor", "bibtex": "@misc{\nhe2023generative,\ntitle={Generative Gradual Domain Adaptation with Optimal Transport},\nauthor={Yifei He and Haoxiang Wang and Han Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=E1_fqDe3YIC}\n}", "github": "", "project": "", "reviewers": "3XRG;tuhk;d4Li;SoJb", "site": "https://openreview.net/forum?id=E1_fqDe3YIC", "pdf_size": 1902699, "recommendation": "3;6;8;8", "confidence": "4;4;4;4", "correctness": "3;3;4;4", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;2;2;2", "wc_summary_paper": "73;101;103;175", "wc_strength_and_weaknesses": "212;313;54;277", "wc_clarity_quality_novelty_and_reproducibility": "18;37;36;167", "wc_summary_review": "33;65;376;43", "wc_review": "336;516;569;662", "wc_reply_reviewers": "0;22;0;0", "wc_reply_authors": "821;794;938;412", "reply_reviewers": "0;1;0;0", "reply_authors": "2;2;2;1", "recommendation_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 113.0, 37.70941526992961 ], "wc_strength_and_weaknesses_avg": [ 214.0, 99.2144142753461 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.5, 59.659450215368224 ], "wc_summary_review_avg": [ 129.25, 142.93070873678616 ], "wc_review_avg": [ 520.75, 118.77999621148335 ], "wc_reply_reviewers_avg": [ 5.5, 9.526279441628825 ], "wc_reply_authors_avg": [ 741.25, 197.64788766895538 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8551861104941366, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4a6_amEmjXgJ:scholar.google.com/&scioq=Generative+Gradual+Domain+Adaptation+with+Optimal+Transport&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "E2KNgQVJMiP", "title": "UNICO: Efficient Unified Hardware-Software Co-Optimization For Deep Neural Networks", "track": "main", "status": "Withdraw", "tldr": "UNICO is a fast and high-fidelity neural accelerator HW-SW co-search solution that can find high-quality HW configurations that are generalizable to unseen DNN applications at the time of co-search.", "abstract": "Specialized hardware has become an indispensable component to deep neural network acceleration. To keep up with the rapid evolution of neural networks, recently, holistic and automated solutions for jointly optimizing both hardware architectures and software mapping have been proposed. In this paper, we propose UNICO, a Unified Co-Optimization framework for hardware-software co-design, aimed at addressing the efficiency issues of vast design space exploration and the issue of overfitting to specific input neural network workloads that are facing current approaches. UNICO employs multi-objective Bayesian optimization to sample hardware, and performs parallel and adaptive software mapping search for hardware samples with a customized successive halving algorithm. To reduce overfitting, UNICO incorporates quantitative robustness measures to guide the proposed search and evaluation procedure. Experiments performed for both open-source spatial accelerators and a real-word commercial environment show that UNICO significantly improves over its counterparts, finding not only superior but also more robust hardware configurations, yet at drastically lower search cost.", "keywords": "Neural accelerator optimization;Hardware-Software co-design;Hardware optimization;HW design robustness;HW design generalizability;Successive halving;Holistic time-efficient search;Multi-objective Bayesian optimization;High-fidelity search;Tensor computation", "primary_area": "", "supplementary_material": "", "author": "Bahador Rashidi;Chao Gao;Shan Lu;Wang Zhisheng;Lu Wei;SHANGLING JUI;Di Niu", "authorids": "~Bahador_Rashidi1;~Chao_Gao1;shan.lu1@huawei.com;wangzhisheng1@hisilicon.com;robin.luwei@hisilicon.com;~SHANGLING_JUI1;~Di_Niu1", "gender": "M;M;;;;M;M", "homepage": ";http://cgao3.github.io;;;;;https://www.ualberta.ca/~dniu", "dblp": ";;;;;;82/4953", "google_scholar": "https://scholar.google.ca/citations?user=0RjNMVYAAAAJ;https://scholar.google.ca/citations?user=N2H5y_MAAAAJ;;;;;https://scholar.google.ca/citations?user=3kC5OogAAAAJ", "orcid": "0000-0002-1508-0061;;;;;0000-0002-1047-4264;0000-0002-5250-7327", "linkedin": "https://ca.linkedin.com/in/bahador-rashidi-ml-da?original_referer=https%3A%2F%2Fwww.google.com%2F;;;;;;", "or_profile": "~Bahador_Rashidi1;~Chao_Gao1;shan.lu1@huawei.com;wangzhisheng1@hisilicon.com;robin.luwei@hisilicon.com;~SHANGLING_JUI1;~Di_Niu1", "aff": "Huawei Technologies Ltd.;Huawei Technologies Canada;;;;Huawei Technologies Ltd.;University of Alberta", "aff_domain": "huawei.com;huawei.com;;;;huawei.com;ualberta.ca", "position": "Researcher;Researcher;;;;Principal Researcher;Associate Professor", "bibtex": "@misc{\nrashidi2023unico,\ntitle={{UNICO}: Efficient Unified Hardware-Software Co-Optimization For Deep Neural Networks},\nauthor={Bahador Rashidi and Chao Gao and Shan Lu and Wang Zhisheng and Lu Wei and SHANGLING JUI and Di Niu},\nyear={2023},\nurl={https://openreview.net/forum?id=E2KNgQVJMiP}\n}", "github": "", "project": "", "reviewers": "JE4S;o1vt;TX1F;sd31", "site": "https://openreview.net/forum?id=E2KNgQVJMiP", "pdf_size": 1299340, "recommendation": "5;5;5;5", "confidence": "4;4;3;2", "correctness": "3;2;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "136;54;66;125", "wc_strength_and_weaknesses": "184;109;112;180", "wc_clarity_quality_novelty_and_reproducibility": "11;278;63;92", "wc_summary_review": "17;57;109;29", "wc_review": "348;498;350;426", "wc_reply_reviewers": "0;334;171;0", "wc_reply_authors": "605;758;1092;662", "reply_reviewers": "0;1;2;0", "reply_authors": "1;1;3;1", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 95.25, 35.716767770894386 ], "wc_strength_and_weaknesses_avg": [ 146.25, 35.79367960967411 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 111.0, 100.69011868103047 ], "wc_summary_review_avg": [ 53.0, 35.4400902933387 ], "wc_review_avg": [ 405.5, 61.97378478034079 ], "wc_reply_reviewers_avg": [ 126.25, 138.7810776006585 ], "wc_reply_authors_avg": [ 779.25, 188.66289380797699 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vnHSKw8Ej4gJ:scholar.google.com/&scioq=UNICO:+Efficient+Unified+Hardware-Software+Co-Optimization+For+Deep+Neural+Networks&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Huawei;University of Alberta", "aff_unique_dep": "Huawei Technologies;", "aff_unique_url": "https://www.huawei.com;https://www.ualberta.ca", "aff_unique_abbr": "Huawei;UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "China;Canada" }, { "id": "E2Y_xv8ybf", "title": "On the Calibration Set Difficulty and Out-of-distribution Calibration", "track": "main", "status": "Withdraw", "tldr": "Calibration set difficulity impacts out-of-distribution calibration performance", "abstract": "Model calibration usually requires optimizing some parameters (e.g., temperature) w.r.t an objective function (e.g., negative log-likelihood). In this paper, we report a plain, important but often neglected fact that the objective function is influenced by calibration set difficulty, i.e., the ratio of the number of incorrectly classified samples to that of correctly classified samples. If a test set has a drastically different difficulty level from the calibration set, the optimal calibration parameters of the two datasets would be different. In other words, a calibrator optimal on the calibration set would be suboptimal on the OOD test set and thus has degraded performance. With this knowledge, we propose a simple and effective method named adaptive calibrator ensemble (ACE) to calibrate OOD datasets whose difficulty is usually higher than the calibration set. Specifically, two calibration functions are trained, one for in-distribution data (low difficulty), and the other for severely OOD data (high difficulty). To achieve desirable calibration on a new OOD dataset, ACE uses an adaptive weighting method that strikes a balance between the two extreme functions. When plugged in, ACE generally improves the performance of a few state-of-the-art calibration schemes on a series of OOD benchmarks. Importantly, such improvement does not come at the cost of the in-distribution calibration accuracy.", "keywords": "nerual network calibration;out-of-distribution calibration", "primary_area": "", "supplementary_material": "", "author": "Yuli Zou;Weijian Deng;Liang Zheng", "authorids": "~Yuli_Zou1;~Weijian_Deng1;~Liang_Zheng4", "gender": "M;M;M", "homepage": ";http://weijiandeng.xyz;http://zheng-lab.cecs.anu.edu.au/", "dblp": ";198/1517;61/7360-1", "google_scholar": "QPUPu2gAAAAJ;https://scholar.google.com.hk/citations?user=lReHnAEAAAAJ;https://scholar.google.com.au/citations?user=vNHqr3oAAAAJ", "orcid": "0000-0001-9437-617X;;", "linkedin": ";;liang-zheng-76341311a/", "or_profile": "~Yuli_Zou1;~Weijian_Deng1;~Liang_Zheng4", "aff": "Hong Kong Polytechnic University;Australian National University;Australian National University", "aff_domain": "polyu.edu.hk;anu.edu.au;anu.edu.au", "position": "PhD student;PhD student;Senior Lecturer", "bibtex": "@misc{\nzou2023on,\ntitle={On the Calibration Set Difficulty and Out-of-distribution Calibration},\nauthor={Yuli Zou and Weijian Deng and Liang Zheng},\nyear={2023},\nurl={https://openreview.net/forum?id=E2Y_xv8ybf}\n}", "github": "", "project": "", "reviewers": "qLK7;oSf7;yPoN;vcBC", "site": "https://openreview.net/forum?id=E2Y_xv8ybf", "pdf_size": 979763, "recommendation": "3;3;3;5", "confidence": "4;4;4;4", "correctness": "3;3;3;4", "technical_novelty": "1;2;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "129;128;97;141", "wc_strength_and_weaknesses": "304;78;733;107", "wc_clarity_quality_novelty_and_reproducibility": "39;50;27;122", "wc_summary_review": "76;149;88;43", "wc_review": "548;405;945;413", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 123.75, 16.269219403523945 ], "wc_strength_and_weaknesses_avg": [ 305.5, 261.6854027262507 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.5, 36.98986347636336 ], "wc_summary_review_avg": [ 89.0, 38.360135557633264 ], "wc_review_avg": [ 577.75, 219.51238575533728 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xkuVBPB-CT4J:scholar.google.com/&scioq=On+the+Calibration+Set+Difficulty+and+Out-of-distribution+Calibration&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Hong Kong Polytechnic University;Australian National University", "aff_unique_dep": ";", "aff_unique_url": "https://www.polyu.edu.hk;https://www.anu.edu.au", "aff_unique_abbr": "PolyU;ANU", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;Australia" }, { "id": "E2y2TrpJhYN", "title": "Perturbation Defocusing for Adversarial Defense", "track": "main", "status": "Reject", "tldr": "propose a new perspective to defend against text adversarial attack", "abstract": "Recent research indicates adversarial attacks are likely to deceive neural systems, including large-scale, pre-trained language models. Given a natural sentence, an attacker replaces a subset of words to fool objective models. To defend against adversarial attacks, existing works aim to reconstruct the adversarial examples. However, these methods show limited defense performance on the adversarial examples whilst also damaging the clean performance on natural examples. To achieve better defense performance, our finding indicates that the reconstruction of adversarial examples is not necessary. More specifically, we inject non-toxic perturbations into adversarial examples, which can disable almost all malicious perturbations. In order to minimize performance sacrifice, we employ an adversarial example detector to distinguish and repair detected adversarial examples, which alleviates the mis-defense on natural examples. Our experimental results on three datasets, two objective models and a variety of adversarial attacks show that the proposed method successfully repairs up to \u223c 97% correctly identified adversarial examples with \u2264\u223c 2% performance sacrifice. We provide an anony-mus demonstration of adversarial detection and repair based on our work.", "keywords": "text adversarial defense;perturbation defocusing", "primary_area": "", "supplementary_material": "/attachment/e58fa1c1e37342e5ee41f2ce93c83f589a03b413.zip", "author": "HENG YANG;Ke Li", "authorids": "~HENG_YANG5;~Ke_Li5", "gender": "M;M", "homepage": "https://yangheng95.github.io;https://colalab.ai/", "dblp": "83/415-8;75/6627-1.html", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=lUFU8KsAAAAJ", "orcid": "0000-0002-6831-196X;0000-0001-7200-4244", "linkedin": "heng-yang-3b6278243/;ke-li-29423226/", "or_profile": "~HENG_YANG5;~Ke_Li5", "aff": "University of Exeter;University of Exeter", "aff_domain": "exeter.ac.uk;exeter.ac.uk", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nyang2023perturbation,\ntitle={Perturbation Defocusing for Adversarial Defense},\nauthor={HENG YANG and Ke Li},\nyear={2023},\nurl={https://openreview.net/forum?id=E2y2TrpJhYN}\n}", "github": "", "project": "", "reviewers": "Kc7s;6CCu;pngk", "site": "https://openreview.net/forum?id=E2y2TrpJhYN", "pdf_size": 4879934, "recommendation": "1;5;5", "confidence": "3;3;5", "correctness": "2;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "56;43;25", "wc_strength_and_weaknesses": "236;114;228", "wc_clarity_quality_novelty_and_reproducibility": "13;202;174", "wc_summary_review": "18;30;36", "wc_review": "323;389;463", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 1.8856180831641267 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 41.333333333333336, 12.710450643291745 ], "wc_strength_and_weaknesses_avg": [ 192.66666666666666, 55.7215298505783 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 129.66666666666666, 83.28398538868215 ], "wc_summary_review_avg": [ 28.0, 7.483314773547883 ], "wc_review_avg": [ 391.6666666666667, 57.18585683735291 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DGXlqTzNbNQJ:scholar.google.com/&scioq=Perturbation+Defocusing+for+Adversarial+Defense&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Exeter", "aff_unique_dep": "", "aff_unique_url": "https://www.exeter.ac.uk", "aff_unique_abbr": "Exeter", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Distributionally Robust Recourse Action", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11722", "id": "E3ip6qBLF7", "poster": "/media/PosterPDFs/ICLR%202023/11722.png?t=1681126640.46508", "openreview": "https://openreview.net/forum?id=E3ip6qBLF7", "slides": "https://iclr.cc/virtual/2023/poster/11722", "video": "https://iclr.cc/virtual/2023/poster/11722", "author_site": "Duy Nguyen, Ngoc Bui, Viet Anh Nguyen", "tldr": "Distributionally Robust Recourse Action framework generates a recourse action that has high probability of being valid under a mixture of model shifts.", "abstract": "A recourse action aims to explain a particular algorithmic decision by showing one specific way in which the instance could be modified to receive an alternate outcome. Existing recourse generation methods often assume that the machine learning model does not change over time. However, this assumption does not always hold in practice because of data distribution shifts, and in this case, the recourse action may become invalid. To redress this shortcoming, we propose the Distributionally Robust Recourse Action (DiRRAc) framework, which generates a recourse action that has high probability of being valid under a mixture of model shifts. We first formulate the robustified recourse setup as a min-max optimization problem, where the max problem is specified by Gelbrich distance over an ambiguity set around the distribution of model parameters. Then we suggest a projected gradient descent algorithm to find a robust recourse according to the min-max objective. We also show that our DiRRAc framework can be extended to hedge against the misspecification of the mixture weights. Numerical experiments with both synthetic and three real-world datasets demonstrate the benefits of our proposed framework over the state-of-the-art recourse methods, which generate robust recourses.\n", "keywords": "Robust Optimization;Explainable AI;Algorithmic Recourse", "primary_area": "", "supplementary_material": "/attachment/84ef9025c212bb7b300e9f6a7ca745be9e010b27.zip", "author": "Duy Nguyen;Ngoc Bui;Viet Anh Nguyen", "authorids": "~Duy_Nguyen2;~Ngoc_Bui1;~Viet_Anh_Nguyen2", "gender": "M;M;M", "homepage": "https://duykhuongnguyen.github.io/;http://ngocbh.github.io;http://www.vietanhnguyen.net", "dblp": ";312/6811;", "google_scholar": "y323M_cAAAAJ;;3iyf-EoAAAAJ", "orcid": ";;", "linkedin": "duy-nguyen-89272a17b/;;", "or_profile": "~Duy_Nguyen2;~Ngoc_Bui1;~Viet_Anh_Nguyen2", "aff": "VinAI Research;Hanoi University of Science and Technology;The Chinese University of Hong Kong", "aff_domain": "vinai.io;hust.edu.vn;cuhk.edu.hk", "position": "Research Resident;MS student;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2023distributionally,\ntitle={Distributionally Robust Recourse Action},\nauthor={Duy Nguyen and Ngoc Bui and Viet Anh Nguyen},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=E3ip6qBLF7}\n}", "github": "", "project": "", "reviewers": "UpMU;URnu;LdkR;7AqH", "pdf_size": 835408, "recommendation": "6;6;6;8", "confidence": "3;2;4;5", "correctness": "4;3;4;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "75;83;114;47", "wc_strength_and_weaknesses": "115;54;232;120", "wc_clarity_quality_novelty_and_reproducibility": "55;3;22;27", "wc_summary_review": "76;80;65;20", "wc_review": "321;220;433;214", "wc_reply_reviewers": "0;17;0;0", "wc_reply_authors": "188;629;823;175", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 79.75, 23.8681272830526 ], "wc_strength_and_weaknesses_avg": [ 130.25, 64.23540690304687 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.75, 18.606114586339622 ], "wc_summary_review_avg": [ 60.25, 23.878599205146017 ], "wc_review_avg": [ 297.0, 89.2888570875448 ], "wc_reply_reviewers_avg": [ 4.25, 7.361215932167728 ], "wc_reply_authors_avg": [ 453.75, 280.7947426502142 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.7745966692414834, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4373094515805631486&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=E3ip6qBLF7", "email": "vinai.io;hust.edu.vn;cuhk.edu.hk", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "VinAI Research;Hanoi University of Science and Technology;Chinese University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.vinai.io/;https://www.hust.edu.vn;https://www.cuhk.edu.hk", "aff_unique_abbr": "VinAI;HUST;CUHK", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Hanoi;Hong Kong SAR", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Vietnam;China" }, { "id": "E4-uRvmKkeB", "title": "Beyond Traditional Transfer Learning: Co-finetuning for Action Localisation", "track": "main", "status": "Reject", "tldr": "Instead of pretraining on \"upstream\" datasets and then finetuning on \"downstream\" tasks, we simultaneously train on all datasets, achieving significant performance improvements across all tasks, and particularly on rare classes.", "abstract": "Transfer learning is the predominant paradigm for training deep networks on small target datasets. Models are typically pretrained on large \u201cupstream\u201d datasets for classification, as such labels are easy to collect, and then finetuned on downstream\u201d tasks such as action localisation, which are smaller due to their finer-grained annotations.\n\nIn this paper, we question this approach, and propose co-finetuning -- simultaneously training a single model on multiple \u201cupstream\u201d and \u201cdownstream\u201d tasks. We demonstrate that co-finetuning outperforms traditional transfer learning when using the same total amount of data, and also show how we can easily extend our approach to multiple \u201cupstream\u201d datasets to further improve performance. In particular, co-finetuning significantly improves the performance on rare classes in our downstream task, as it has a regularising effect, and enables the network to learn feature representations that transfer between different datasets. Finally, we observe how co-finetuning with public, video classification datasets, we are able to achieve state-of-the-art results for spatio-temporal action localisation on the challenging AVA and AVA-Kinetics datasets, outperforming recent works which develop intricate models.", "keywords": "transformer;video;action recognition;action detection;multi-task learning;co-training;transfer learning", "primary_area": "", "supplementary_material": "", "author": "Anurag Arnab;Xuehan Xiong;Alexey A. Gritsenko;Rob Romijnders;Josip Djolonga;Mostafa Dehghani;Chen Sun;Mario Lucic;Cordelia Schmid", "authorids": "~Anurag_Arnab1;~Xuehan_Xiong1;~Alexey_A._Gritsenko1;~Rob_Romijnders1;~Josip_Djolonga2;~Mostafa_Dehghani1;~Chen_Sun1;~Mario_Lucic1;~Cordelia_Schmid1", "gender": ";;;M;M;M;M;M;F", "homepage": ";;;http://robromijnders.github.io;;http://mostafadehghani.com/;https://chensun.me;http://lucic.ai;https://cordeliaschmid.github.io/", "dblp": ";;;185/0664;139/1342;125/4062;01/6072-2;155/1945;s/CordeliaSchmid", "google_scholar": ";vM1SktEAAAAJ;;;;https://scholar.google.nl/citations?user=MiHOX3QAAAAJ;vQa7heEAAAAJ;SzZRlcMAAAAJ;IvqCXP4AAAAJ", "orcid": ";;;;;;;;", "linkedin": ";;;robromijnders/;;;;;cordelia-schmid-47985a9", "or_profile": "~Anurag_Arnab1;~Xuehan_Xiong1;~Alexey_A._Gritsenko1;~Rob_Romijnders1;~Josip_Djolonga2;~Mostafa_Dehghani1;~Chen_Sun1;~Mario_Lucic1;~Cordelia_Schmid1", "aff": ";Google;;University of Amsterdam;Google;Google DeepMind;Google;Google;Inria", "aff_domain": ";google.com;;uva.nl;google.com;google.com;google.com;deepmind.com;inria.fr", "position": ";Staff Software Engineer;;PhD student;Research Engineer;Research Scientist;Research Scientist;Senior Staff Research Scientist;Researcher", "bibtex": "@misc{\narnab2023beyond,\ntitle={Beyond Traditional Transfer Learning: Co-finetuning for Action Localisation},\nauthor={Anurag Arnab and Xuehan Xiong and Alexey A. Gritsenko and Rob Romijnders and Josip Djolonga and Mostafa Dehghani and Chen Sun and Mario Lucic and Cordelia Schmid},\nyear={2023},\nurl={https://openreview.net/forum?id=E4-uRvmKkeB}\n}", "github": "", "project": "", "reviewers": "kFF7;mBx1;wkyw;L9xn", "site": "https://openreview.net/forum?id=E4-uRvmKkeB", "pdf_size": 1386077, "recommendation": "3;3;3;5", "confidence": "5;4;3;4", "correctness": "3;2;2;3", "technical_novelty": "1;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "112;76;34;47", "wc_strength_and_weaknesses": "314;151;151;245", "wc_clarity_quality_novelty_and_reproducibility": "29;24;3;83", "wc_summary_review": "79;49;56;19", "wc_review": "534;300;244;394", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 67.25, 29.978117018918983 ], "wc_strength_and_weaknesses_avg": [ 215.25, 68.72545016222156 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.75, 29.51588555337617 ], "wc_summary_review_avg": [ 50.75, 21.428660714099703 ], "wc_review_avg": [ 368.0, 109.80892495603443 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NCH1OQcpUMIJ:scholar.google.com/&scioq=Beyond+Traditional+Transfer+Learning:+Co-finetuning+for+Action+Localisation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;0;0;2", "aff_unique_norm": "Google;University of Amsterdam;INRIA", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.uva.nl;https://www.inria.fr", "aff_unique_abbr": "Google;UvA;Inria", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0;2;0;0;3", "aff_country_unique": "United States;Netherlands;United Kingdom;France" }, { "id": "E67OghNSDMf", "title": "SepRep-Net: Multi-source Free Domain Adaptation via Model Separation and Reparameterization", "track": "main", "status": "Withdraw", "tldr": "We introduce a general approach to multi-source free domain adaptation via model separation and reparameterization, which enhances effectiveness, efficiency and generalizability. ", "abstract": "We consider multi-source free domain adaptation, the problem of adapting multiple existing models to a new domain without accessing the source data. This is a practical problem, which often arises in commercial settings but remains an open question despite the advances in recent years. Previous methods, e.g., model ensemble, are effective, but they also incur significantly increased computational costs. Conventional solutions for efficiency, such as distillation, are limited in preserving source knowledge, i.e., maintaining generalizability. In this work, we propose a novel framework called SepRep-Net, which tackles multi-source free domain adaptation via model Separation and Reparameterization. Concretely, SepRep-Net reassembled multiple existing models to a unified network, while maintaining separate pathways (Separation). During training, separate pathways are optimized in parallel with the information exchange regularly performed via an additional feature merging unit. With our specific design, these pathways can be further reparameterized into a single one to facilitate inference (Reparameterization). SepRep-Net is characterized by 1) effectiveness: competitive performance on the target domain, 2) efficiency: low computational costs, and 3) generalizability: maintaining more source knowledge than existed solutions. As a general approach, SepRep-Net can be seamlessly plugged into various methods. Extensive experiments validate the performance of SepRep-Net on mainstream benchmarks.", "keywords": "multi-source free domain adaptation;generalized domain adaptation", "primary_area": "", "supplementary_material": "/attachment/028813b6e67ec517a837b944356d7f393393616a.zip", "author": "Ying Jin;Jiaqi Wang;Dahua Lin", "authorids": "~Ying_Jin1;~Jiaqi_Wang1;~Dahua_Lin1", "gender": "F;M;M", "homepage": "https://jin-ying.github.io/;https://myownskyw7.github.io/;http://dahua.site", "dblp": "46/176/;44/740-3;53/6088", "google_scholar": "RSqGfysAAAAJ;https://scholar.google.com.hk/citations?user=GDvt570AAAAJ;GMzzRRUAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ying_Jin1;~Jiaqi_Wang1;~Dahua_Lin1", "aff": "The Chinese University of Hong Kong;Shanghai AI Laboratory;The Chinese University of Hong Kong", "aff_domain": "ie.cuhk.edu;pjlab.org.cn;cuhk.edu.hk", "position": "PhD student;Research Scientist;Associate Professor", "bibtex": "@misc{\njin2023seprepnet,\ntitle={SepRep-Net: Multi-source Free Domain Adaptation via Model Separation and Reparameterization},\nauthor={Ying Jin and Jiaqi Wang and Dahua Lin},\nyear={2023},\nurl={https://openreview.net/forum?id=E67OghNSDMf}\n}", "github": "", "project": "", "reviewers": "yaLS;tA6h;3NTc;ZbyZ", "site": "https://openreview.net/forum?id=E67OghNSDMf", "pdf_size": 334609, "recommendation": "3;3;5;5", "confidence": "5;4;5;4", "correctness": "4;3;2;3", "technical_novelty": "3;2;3;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "120;63;52;59", "wc_strength_and_weaknesses": "223;159;579;138", "wc_clarity_quality_novelty_and_reproducibility": "31;54;115;13", "wc_summary_review": "46;11;88;29", "wc_review": "420;287;834;239", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 73.5, 27.13392710243027 ], "wc_strength_and_weaknesses_avg": [ 274.75, 178.42698086332123 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 53.25, 38.499188303131795 ], "wc_summary_review_avg": [ 43.5, 28.5175384631984 ], "wc_review_avg": [ 445.0, 234.1719453734798 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.7071067811865475, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9222606155095006545&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;Shanghai AI Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "CUHK;SAIL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "ChordMixer: A Scalable Neural Attention Model for Sequences with Different Length", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11497", "id": "E8mzu3JbdR", "poster": "/media/PosterPDFs/ICLR%202023/11497.png?t=1680974375.496835", "openreview": "https://openreview.net/forum?id=E8mzu3JbdR", "slides": "https://iclr.cc/virtual/2023/poster/11497", "video": "https://iclr.cc/virtual/2023/poster/11497", "author_site": "Ruslan Khalitov, Tong Yu, Lei Cheng, Zhirong Yang", "tldr": "", "abstract": "Sequential data naturally have different lengths in many domains, with some very long sequences. As an important modeling tool, neural attention should capture long-range interaction in such sequences. However, most existing neural attention models admit only short sequences, or they have to employ chunking or padding to enforce a constant input length. Here we propose a simple neural network building block called ChordMixer which can model the attention for long sequences with variable lengths. Each ChordMixer block consists of a position-wise rotation layer without learnable parameters and an element-wise MLP layer. Repeatedly applying such blocks forms an effective network backbone that mixes the input signals towards the learning targets. We have tested ChordMixer on the synthetic adding problem, long document classification, and DNA sequence-based taxonomy classification. The experiment results show that our method substantially outperforms other neural attention models.", "keywords": "Mixer;Attention;Scalable", "primary_area": "", "supplementary_material": "/attachment/73da6c2df1f4c481af9168b5eec3ef089c9447ae.zip", "author": "Ruslan Khalitov;Tong Yu;Lei Cheng;Zhirong Yang", "authorids": "~Ruslan_Khalitov1;~Tong_Yu4;~Lei_Cheng1;~Zhirong_Yang1", "gender": "M;M;F;M", "homepage": "https://www.ntnu.no/ansatte/ruslan.khalitov;;https://www.ntnu.no/ansatte/lei.cheng;https://folk.ntnu.no/yangzh/", "dblp": "302/0588;;;85/4582", "google_scholar": "bSO_57MAAAAJ;https://scholar.google.com/citations?hl=en;;jxOrsf4AAAAJ", "orcid": ";;;", "linkedin": "ruslan-khalitov/;;;", "or_profile": "~Ruslan_Khalitov1;~Tong_Yu4;~Lei_Cheng1;~Zhirong_Yang1", "aff": "Norwegian University of Science and Technology;Norwegian University of Science and Technology;Norwegian University of Science and Technology;Norwegian University of Science and Technology", "aff_domain": "ntnu.no;ntnu.no;ntnu.no;ntnu.no", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nkhalitov2023chordmixer,\ntitle={ChordMixer: A Scalable Neural Attention Model for Sequences with Different Length},\nauthor={Ruslan Khalitov and Tong Yu and Lei Cheng and Zhirong Yang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=E8mzu3JbdR}\n}", "github": "", "project": "", "reviewers": "pZas;Bvdo;9e1K", "pdf_size": 888119, "recommendation": "5;8;8", "confidence": "4;3;4", "correctness": "2;4;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "90;114;147", "wc_strength_and_weaknesses": "319;264;324", "wc_clarity_quality_novelty_and_reproducibility": "93;82;48", "wc_summary_review": "67;54;21", "wc_review": "569;514;540", "wc_reply_reviewers": "37;0;0", "wc_reply_authors": "918;366;337", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 117.0, 23.366642891095847 ], "wc_strength_and_weaknesses_avg": [ 302.3333333333333, 27.182510717166817 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 74.33333333333333, 19.154343864744856 ], "wc_summary_review_avg": [ 47.333333333333336, 19.362047641943473 ], "wc_review_avg": [ 541.0, 22.464787260658994 ], "wc_reply_reviewers_avg": [ 12.333333333333334, 17.441967269268172 ], "wc_reply_authors_avg": [ 540.3333333333334, 267.3129668052454 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 1.0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8742324750477788078&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=E8mzu3JbdR", "email": "ntnu.no;ntnu.no;ntnu.no;ntnu.no", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Norwegian University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ntnu.no", "aff_unique_abbr": "NTNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Norway" }, { "id": "E94ID_k7CTA", "title": "How and Why We Detect Distribution Shift: Critical Analysis of Methods and Benchmarks", "track": "main", "status": "Withdraw", "tldr": "we aim to provide a consolidated view of the two largest sub-fields: open-set recognition (OSR) and out-of-distribution detection (OOD)", "abstract": "Detecting test-time distribution shift has emerged as a key capability for safely deployed machine learning models, with the question being tackled under various guises in recent years. In this paper, we aim to provide a consolidated view of the two largest sub-fields within the community: open-set recognition (OSR) and out-of-distribution detection (OOD). In particular, we aim to provide rigorous empirical analysis of different methods across settings and provide actionable takeaways for practitioners and researchers. Concretely, we make the following contributions:\n(i) For the first time, we perform rigorous cross-evaluation between state-of-the-art methods in the OOD and OSR settings and identify a strong correlation between the performances of methods for them;\n(ii) We propose a new, large-scale benchmark setting which we suggest better disentangles the problem tackled by OOD and OSR;\n(iii) We thoroughly examine SOTA methods for OOD and OSR on our large-scale benchmark; \nand (iv) Finally, we find that the best performing method on previous benchmarks struggles on our large-scale benchmark, while magnitude-aware scoring rules consistently show promise.", "keywords": "Open-set Recognition;Out of distribution Detection", "primary_area": "", "supplementary_material": "/attachment/3261aec5959f9342210988c9102e61bbd6d36589.zip", "author": "Hongjun Wang;Sagar Vaze;Kai Han", "authorids": "~Hongjun_Wang2;~Sagar_Vaze1;~Kai_Han1", "gender": "M;M;M", "homepage": "https://whj363636.github.io/;https://sgvaze.github.io/;http://www.kaihan.org/", "dblp": "65/3627-5;226/4705;51/4757-1.html", "google_scholar": "DNi-nB0AAAAJ;lvuOknUAAAAJ;tG8S_vMAAAAJ", "orcid": ";0000-0003-2920-9345;0000-0002-7995-9999", "linkedin": ";sagar-vaze-2356ab171/;kaihancs/", "or_profile": "~Hongjun_Wang2;~Sagar_Vaze1;~Kai_Han1", "aff": "The University of Hong Kong;University of Oxford;The University of Hong Kong", "aff_domain": "hku.hk;ox.ac.uk;hku.hk", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nwang2023how,\ntitle={How and Why We Detect Distribution Shift: Critical Analysis of Methods and Benchmarks},\nauthor={Hongjun Wang and Sagar Vaze and Kai Han},\nyear={2023},\nurl={https://openreview.net/forum?id=E94ID_k7CTA}\n}", "github": "", "project": "", "reviewers": "QCH2;5MMb;ykpZ", "site": "https://openreview.net/forum?id=E94ID_k7CTA", "pdf_size": 12279338, "recommendation": "3;3;6", "confidence": "4;4;2", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;0;2", "wc_summary_paper": "42;65;61", "wc_strength_and_weaknesses": "400;671;243", "wc_clarity_quality_novelty_and_reproducibility": "88;40;25", "wc_summary_review": "96;49;37", "wc_review": "626;825;366", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 56.0, 10.03327796219494 ], "wc_strength_and_weaknesses_avg": [ 438.0, 176.78423760807033 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.0, 26.870057685088806 ], "wc_summary_review_avg": [ 60.666666666666664, 25.460208605237746 ], "wc_review_avg": [ 605.6666666666666, 187.93675058971894 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:w72l5_hExbcJ:scholar.google.com/&scioq=How+and+Why+We+Detect+Distribution+Shift:+Critical+Analysis+of+Methods+and+Benchmarks&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Hong Kong;University of Oxford", "aff_unique_dep": ";", "aff_unique_url": "https://www.hku.hk;https://www.ox.ac.uk", "aff_unique_abbr": "HKU;Oxford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United Kingdom" }, { "id": "E9_04otJ62", "title": "Winograd Structured Pruning for Fast Winograd Convolution", "track": "main", "status": "Withdraw", "tldr": "We propose a novel Winograd structured pruning method, which prunes the weights in the Winograd-domain in a structured form with optimized pruning unit size for fast Winograd convolution on parallel processors.", "abstract": "Convolutional Neural Networks (CNNs) are computationally intensive, which limits deployment into mobile devices. \nTo minimize operation counts in CNNs, pruning optimization techniques and Winograd\u2019s minimal filtering algorithm are widely used; however, the benefit of pruning disappears when both optimizations are simply applied together in CNN. \nTo take full advantage of both approaches, two previous pruning methods were proposed: one is to apply pruning after kernel transformation, and the other is applying filter pruning on Winograd convolution. \nUnfortunately, the first method is hardware-unfriendly and the second approach suffers from a significant loss of accuracy.\nThus, we propose structured pruning method specialized for Winograd convolution, that maximizes the hardware utilization by considering the conversion algorithm of parallel processors. \nWe analyze the conversion algorithm of Winograd convolution on parallel processing units; then, we prune the weights in the Winograd-domain in a structured form with optimized pruning unit size, which maximizes the parallelism of the hardware while minimizing the loss of accuracy. \nFor VGG-16 on the ImageNet dataset, the inference time of our method is $1.84$ and $2.89$ times better than previous two pruning methods with less than $1\\%$ accuracy loss.", "keywords": "Winograd convolution;structured pruning;GPU;parallel processor", "primary_area": "", "supplementary_material": "", "author": "Cheonjun Park;Hyun Jae Oh;Mincheol Park;Myung Kuk Yoon;Minsik Kim;Suhyun Kim;Won Woo Ro", "authorids": "~Cheonjun_Park1;~Hyun_Jae_Oh1;~Mincheol_Park1;~Myung_Kuk_Yoon1;~Minsik_Kim1;~Suhyun_Kim1;~Won_Woo_Ro1", "gender": "M;F;M;;M;;M", "homepage": ";http://escal.yonsei.ac.kr/test2/temp/ohj.html;;;https://minsik-kim.github.io/;https://kdst.tistory.com/;http://escal.yonsei.ac.kr/", "dblp": "254/7925;;270/1814;;172/9940;45/6898-1;r/WonWooRo", "google_scholar": "https://scholar.google.com/citations?hl=ko;https://scholar.google.com/citations?hl=ko;kSIW-XAAAAAJ;;;;GVfD5LQAAAAJ", "orcid": ";;;;;;0000-0001-5390-6445", "linkedin": "cheonjun-park-ba7217183;;mincheol-park-66b166186;;;;", "or_profile": "~Cheonjun_Park1;~Hyun_Jae_Oh1;~Mincheol_Park1;~Myung_Kuk_Yoon1;~Minsik_Kim1;~Suhyun_Kim1;~Won_Woo_Ro1", "aff": "Yonsei University;Samsung;Korea Institute of Science and Technology;;Yonsei University;Korea Institute of Science and Technology;Yonsei University", "aff_domain": "yonsei.ac.kr;samsung.com;kist.re.kr;;yonsei.ac.kr;kist.re.kr;yonsei.ac.kr", "position": "PhD student;Researcher;Research assistant;;Research Professor;Principal Researcher;Full Professor", "bibtex": "@misc{\npark2023winograd,\ntitle={Winograd Structured Pruning for Fast Winograd Convolution },\nauthor={Cheonjun Park and Hyun Jae Oh and Mincheol Park and Myung Kuk Yoon and Minsik Kim and Suhyun Kim and Won Woo Ro},\nyear={2023},\nurl={https://openreview.net/forum?id=E9_04otJ62}\n}", "github": "", "project": "", "reviewers": "g32B;jfux;EPWE;S8bY", "site": "https://openreview.net/forum?id=E9_04otJ62", "pdf_size": 3204599, "recommendation": "3;3;5;5", "confidence": "5;4;4;4", "correctness": "4;3;3;3", "technical_novelty": "1;1;3;2", "empirical_novelty": "1;0;3;3", "wc_summary_paper": "115;68;181;98", "wc_strength_and_weaknesses": "248;182;496;153", "wc_clarity_quality_novelty_and_reproducibility": "18;5;227;10", "wc_summary_review": "58;5;65;35", "wc_review": "439;260;969;296", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.82915619758885 ], "empirical_novelty_avg": [ 1.75, 1.299038105676658 ], "wc_summary_paper_avg": [ 115.5, 41.391424232562954 ], "wc_strength_and_weaknesses_avg": [ 269.75, 135.08585233102687 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.0, 93.64560854626339 ], "wc_summary_review_avg": [ 40.75, 23.434749838647733 ], "wc_review_avg": [ 491.0, 283.9779921050221 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1473125149861672166&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;2;0", "aff_unique_norm": "Yonsei University;Samsung;Korea Institute of Science and Technology", "aff_unique_dep": ";Samsung;", "aff_unique_url": "https://www.yonsei.ac.kr;https://www.samsung.com;https://www.kist.re.kr", "aff_unique_abbr": "Yonsei;Samsung;KIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "EA6YF_qwVe", "title": "Rate-Distortion Optimized Post-Training Quantization for Learned Image Compression", "track": "main", "status": "Reject", "tldr": "", "abstract": "Quantizing floating-point neural network to its fixed-point representation is crucial for Learned Image Compression (LIC) because it ensures the decoding consistency for interoperability and reduces space-time complexity for implementation. Existing solutions often have to retrain the network for model quantization which is time consuming and impractical. This work suggests the use of Post-Training Quantization (PTQ) to directly process pretrained, off-the-shelf LIC models. We theoretically prove that minimizing the mean squared error (MSE) in PTQ is sub-optimal for compression task and thus develop a novel Rate-Distortion (R-D) Optimized PTQ (RDO-PTQ) to best retain the compression performance. Such RDO-PTQ just needs to compress few images (e.g., 10) to optimize the transformation of weight, bias, and activation of underlying LIC model from its native 32-bit floating-point (FP32) format to 8-bit fixed-point (INT8) precision for fixed-point inference onwards. Experiments reveal outstanding efficiency of the proposed method on different LICs, showing the closest coding performance to their floating-point counterparts. And, our method is a lightweight and plug-and-play approach without any need of model retraining which is attractive to practitioners. ", "keywords": "learned image compression;post-training quantization;rate-distortion optimization", "primary_area": "", "supplementary_material": "/attachment/c75a4a868e956af2fa0132cd5e3355b4c5db05c0.zip", "author": "Junqi Shi;Ming Lu;fangdong chen;Shiliang Pu;Zhan Ma", "authorids": "~Junqi_Shi1;~Ming_Lu3;~fangdong_chen1;~Shiliang_Pu1;~Zhan_Ma1", "gender": "M;;;M;M", "homepage": ";;;;http://vision.nju.edu.cn", "dblp": ";;125/2304;155/3173;", "google_scholar": "UCNz8noAAAAJ;qDtMMVgAAAAJ;;https://scholar.google.com.hk/citations?user=NWR_wpoAAAAJ;78KxtRMAAAAJ", "orcid": "0000-0003-0503-2995;;0009-0006-5671-0716;;", "linkedin": ";;;;", "or_profile": "~Junqi_Shi1;~Ming_Lu3;~fangdong_chen1;~Shiliang_Pu1;~Zhan_Ma1", "aff": "Nanjing University;Nanjing University;Hikvision Research Institute;;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;hikvision.com;;nju.edu.cn", "position": "MS student;PhD student;Researcher;;Full Professor", "bibtex": "@misc{\nshi2023ratedistortion,\ntitle={Rate-Distortion Optimized Post-Training Quantization for Learned Image Compression},\nauthor={Junqi Shi and Ming Lu and fangdong chen and Shiliang Pu and Zhan Ma},\nyear={2023},\nurl={https://openreview.net/forum?id=EA6YF_qwVe}\n}", "github": "", "project": "", "reviewers": "soh4;8wCS;AqaT;LeDd;6kPp;ra2Y", "site": "https://openreview.net/forum?id=EA6YF_qwVe", "pdf_size": 403315, "recommendation": "3;3;5;5;5;8", "confidence": "3;4;4;4;3;4", "correctness": "3;3;3;4;3;3", "technical_novelty": "2;2;2;2;2;3", "empirical_novelty": "2;2;1;2;2;2", "wc_summary_paper": "105;72;75;207;76;53", "wc_strength_and_weaknesses": "470;191;221;154;269;79", "wc_clarity_quality_novelty_and_reproducibility": "127;52;19;86;56;66", "wc_summary_review": "59;52;37;123;74;26", "wc_review": "761;367;352;570;475;224", "wc_reply_reviewers": "591;0;76;0;0;0", "wc_reply_authors": "1465;476;527;283;381;118", "reply_reviewers": "2;0;1;0;0;0", "reply_authors": "3;1;2;1;1;1", "recommendation_avg": [ 4.833333333333333, 1.674979270186815 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "technical_novelty_avg": [ 2.1666666666666665, 0.3726779962499649 ], "empirical_novelty_avg": [ 1.8333333333333333, 0.3726779962499649 ], "wc_summary_paper_avg": [ 98.0, 51.062053751620034 ], "wc_strength_and_weaknesses_avg": [ 230.66666666666666, 121.99544618094927 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 67.66666666666667, 33.18969050105101 ], "wc_summary_review_avg": [ 61.833333333333336, 31.344412934719678 ], "wc_review_avg": [ 458.1666666666667, 172.6821904218524 ], "wc_reply_reviewers_avg": [ 111.16666666666667, 216.37499598818917 ], "wc_reply_authors_avg": [ 541.6666666666666, 433.8370149670905 ], "reply_reviewers_avg": [ 0.5, 0.7637626158259734 ], "reply_authors_avg": [ 1.5, 0.7637626158259734 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.35179877236514595, "corr_recommendation_correctness": 0.0444994159489985, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16491131125577638459&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Nanjing University;Hikvision Research Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.hikvision.com/cn/", "aff_unique_abbr": "Nanjing U;Hikvision", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "How gradient estimator variance and bias impact learning in neural networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11023", "id": "EBC60mxBwyw", "poster": "/media/PosterPDFs/ICLR%202023/11023.png?t=1682209306.8966718", "openreview": "https://openreview.net/forum?id=EBC60mxBwyw", "slides": "https://iclr.cc/virtual/2023/poster/11023", "video": "https://iclr.cc/virtual/2023/poster/11023", "author_site": "Arna Ghosh, Yuhan Helena Liu, Guillaume Lajoie, Konrad P Kording, Blake A Richards", "tldr": "We characterize the impact of variance and bias in gradient estimates on learning and generalization and study how network architecture properties modulate these effects.", "abstract": "There is growing interest in understanding how real brains may approximate gradients and how gradients can be used to train neuromorphic chips. However, neither real brains nor neuromorphic chips can perfectly follow the loss gradient, so parameter updates would necessarily use gradient estimators that have some variance and/or bias. Therefore, there is a need to understand better how variance and bias in gradient estimators impact learning dependent on network and task properties. Here, we show that variance and bias can impair learning on the training data, but some degree of variance and bias in a gradient estimator can be beneficial for generalization. We find that the ideal amount of variance and bias in a gradient estimator are dependent on several properties of the network and task: the size and activity sparsity of the network, the norm of the gradient, and the curvature of the loss landscape. As such, whether considering biologically-plausible learning algorithms or algorithms for training neuromorphic chips, researchers can analyze these properties to determine whether their approximation to gradient descent will be effective for learning given their network and task properties.", "keywords": "Computational Neuroscience;learning and plasticity;Credit assignment;Imperfect gradient descent;Gradient approximation;Biologically-plausible learning;Neuromorphic computing;Neural networks", "primary_area": "", "supplementary_material": "/attachment/ef61b7339cae4bd7c7d7c4d46f6f42490be45774.zip", "author": "Arna Ghosh;Yuhan Helena Liu;Guillaume Lajoie;Konrad Kording;Blake Aaron Richards", "authorids": "~Arna_Ghosh1;~Yuhan_Helena_Liu1;~Guillaume_Lajoie1;~Konrad_Kording1;~Blake_Aaron_Richards1", "gender": "M;F;M;M;M", "homepage": "https://arnaghosh.github.io/;https://helena-yuhan-liu.github.io/;https://dms.umontreal.ca/~lajoie/;http://www.kordinglab.com;http://linclab.org", "dblp": "190/7223;195/5396;31/10384;;70/10850", "google_scholar": "https://scholar.google.ca/citations?user=YjS546oAAAAJ;hP3kxJQAAAAJ;;MiFqJGcAAAAJ;https://scholar.google.ca/citations?user=1CPY1LsAAAAJ", "orcid": ";;;0000-0001-8408-4499;0000-0001-9662-2151", "linkedin": ";;;;", "or_profile": "~Arna_Ghosh1;~Yuhan_Helena_Liu1;~Guillaume_Lajoie1;~Konrad_Kording1;~Blake_Aaron_Richards1", "aff": "McGill University;University of Washington;Mila - Quebec Artificial Intelligence Institute;University of Pennsylvania;Mila - Quebec Artificial Intelligence Institute", "aff_domain": "mcgill.ca;uw.edu;mila.quebec;upenn.edu;mila.quebec", "position": "PhD student;PhD student;Associate Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nghosh2023how,\ntitle={How gradient estimator variance and bias impact learning in neural networks},\nauthor={Arna Ghosh and Yuhan Helena Liu and Guillaume Lajoie and Konrad Kording and Blake Aaron Richards},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=EBC60mxBwyw}\n}", "github": "", "project": "", "reviewers": "HbzZ;Z5BT;27vS;Yx5o", "pdf_size": 2950814, "recommendation": "5;6;8;8", "confidence": "3;3;4;2", "correctness": "3;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "190;57;111;44", "wc_strength_and_weaknesses": "137;294;1234;137", "wc_clarity_quality_novelty_and_reproducibility": "129;42;105;71", "wc_summary_review": "100;54;224;41", "wc_review": "556;447;1674;293", "wc_reply_reviewers": "0;0;50;0", "wc_reply_authors": "1399;519;2493;507", "reply_reviewers": "0;0;1;0", "reply_authors": "3;2;5;2", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 100.5, 57.45650528878345 ], "wc_strength_and_weaknesses_avg": [ 450.5, 456.8722469137297 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 86.75, 33.04826016600571 ], "wc_summary_review_avg": [ 104.75, 72.2543251300571 ], "wc_review_avg": [ 742.5, 545.8582691505186 ], "wc_reply_reviewers_avg": [ 12.5, 21.650635094610966 ], "wc_reply_authors_avg": [ 1229.5, 814.2448955934572 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10016418407181371376&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=EBC60mxBwyw", "email": "mcgill.ca;uw.edu;mila.quebec;upenn.edu;mila.quebec", "author_num": 5, "aff_unique_index": "0;1;2;3;2", "aff_unique_norm": "McGill University;University of Washington;Quebec Artificial Intelligence Institute;University of Pennsylvania", "aff_unique_dep": ";;Artificial Intelligence;", "aff_unique_url": "https://www.mcgill.ca;https://www.washington.edu;https://mila.quebec;https://www.upenn.edu", "aff_unique_abbr": "McGill;UW;Mila;UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "Canada;United States" }, { "id": "EBJG0A0PUo1", "title": "Enabling Probabilistic Inference on Large-Scale Spiking Neural Networks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Deep spiking neural networks have achieved success in many machine learning tasks. However, most existing works consider deterministic SNNs, which ignore the inherent randomness of neurons. On the other hand, existing works on stochastic SNNs are limited to small networks and are hard to scale to larger SNN topologies. We introduce Noisy SNNs (NSNNs), built upon a stochastic noisy LIF neuron model to enable probabilistic inference on large-scale SNN topologies. By viewing NSNN as a Bayesian Network, we derive a three-factor learning rule called noise-driven learning (NDL) for synaptic optimization. The post-synaptic factor in NDL is obtained using the neuronal membrane noise statistics, avoiding the problematic derivative of the Heaviside spiking function and providing an explanation for surrogate gradients from the standpoint of random noise. NDL is backpropagation-compatible, enabling NSNNs to be extended to any SNN topology through modular replacement (Codes are available at https://cutt.ly/9CxT5jI). Evaluations on CIFAR-10/100 and DVS-CIFAR show that NSNNs achieve competitive performance in clean test scenarios. Furthermore, NSNNs exhibit high robustness against challenging perturbations like adversarial perturbation and spike-level disturbance.", "keywords": "spiking neural networks;SNNs", "primary_area": "", "supplementary_material": "", "author": "Gehua Ma;Huajin Tang", "authorids": "~Gehua_Ma1;~Huajin_Tang1", "gender": "M;M", "homepage": "https://genema.github.io;https://person.zju.edu.cn/htang", "dblp": "https://dblp.uni-trier.de/pid/348/6861;18/434", "google_scholar": ";U041O4QAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Gehua_Ma1;~Huajin_Tang1", "aff": "Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@misc{\nma2023enabling,\ntitle={Enabling Probabilistic Inference on Large-Scale Spiking Neural Networks},\nauthor={Gehua Ma and Huajin Tang},\nyear={2023},\nurl={https://openreview.net/forum?id=EBJG0A0PUo1}\n}", "github": "", "project": "", "reviewers": "DsgC;38qu;pjQ1;dsY7", "site": "https://openreview.net/forum?id=EBJG0A0PUo1", "pdf_size": 3212503, "recommendation": "3;5;5;8", "confidence": "5;4;3;4", "correctness": "3;3;3;4", "technical_novelty": "2;4;2;4", "empirical_novelty": "2;3;2;4", "wc_summary_paper": "61;35;23;41", "wc_strength_and_weaknesses": "262;255;492;43", "wc_clarity_quality_novelty_and_reproducibility": "143;2;129;33", "wc_summary_review": "154;33;45;28", "wc_review": "620;325;689;145", "wc_reply_reviewers": "86;304;0;48", "wc_reply_authors": "1317;1766;1882;273", "reply_reviewers": "1;2;0;1", "reply_authors": "3;6;5;3", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 1.0 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 40.0, 13.74772708486752 ], "wc_strength_and_weaknesses_avg": [ 263.0, 158.82852388661175 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 76.75, 60.45814668016214 ], "wc_summary_review_avg": [ 65.0, 51.754226880516725 ], "wc_review_avg": [ 444.75, 220.54520511677418 ], "wc_reply_reviewers_avg": [ 109.5, 116.35613434623892 ], "wc_reply_authors_avg": [ 1309.5, 634.5346720235231 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 4.25, 1.299038105676658 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.39605901719066966, "corr_recommendation_correctness": 0.8892972917998875, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iw6UewXRFqMJ:scholar.google.com/&scioq=Enabling+Probabilistic+Inference+on+Large-Scale+Spiking+Neural+Networks&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "SLTUNET: A Simple Unified Model for Sign Language Translation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11236", "id": "EBS4C77p_5S", "poster": "/media/PosterPDFs/ICLR%202023/11236.png?t=1681122866.9070837", "openreview": "https://openreview.net/forum?id=EBS4C77p_5S", "slides": "https://iclr.cc/virtual/2023/poster/11236", "video": "https://iclr.cc/virtual/2023/poster/11236", "author_site": "Biao Zhang, Mathias M\u00fcller, Rico Sennrich", "tldr": "A simple unified model for sign language translation that achieves (near) state-of-the-art performance", "abstract": "Despite recent successes with neural models for sign language translation (SLT), translation quality still lags behind spoken languages because of the data scarcity and modality gap between sign video and text. To address both problems, we investigate strategies for cross-modality representation sharing for SLT. We propose SLTUNET, a simple unified neural model designed to support multiple SLTrelated tasks jointly, such as sign-to-gloss, gloss-to-text and sign-to-text translation. Jointly modeling different tasks endows SLTUNET with the capability to explore the cross-task relatedness that could help narrow the modality gap. In addition, this allows us to leverage the knowledge from external resources, such as abundant parallel data used for spoken-language machine translation (MT). We show in experiments that SLTUNET achieves competitive and even state-of-the-art performance on PHOENIX-2014T and CSL-Daily when augmented with MT data and equipped with a set of optimization techniques. We further use the DGS Corpus for end-to-end SLT for the first time. It covers broader domains with a significantly larger vocabulary, which is more challenging and which we consider to allow for a more realistic assessment of the current state of SLT than the former two. Still, SLTUNET obtains improved results on the DGS Corpus. Code is available at https://github.com/bzhangGo/sltunet.", "keywords": "Unified Modeling;Multi-task Learning;Sign Language Translation;Cross-modality Learning", "primary_area": "", "supplementary_material": "/attachment/326f76f0b62fbf9e4452f4e7d5614c30237f4ec6.zip", "author": "Biao Zhang;Mathias M\u00fcller;Rico Sennrich", "authorids": "~Biao_Zhang2;~Mathias_M\u00fcller1;~Rico_Sennrich1", "gender": "M;M;M", "homepage": ";https://www.cl.uzh.ch/de/people/team/compling/mmueller.html;http://cl.uzh.ch/sennrich", "dblp": "https://dblp.uni-trier.de/pers/hd/z/Zhang_0002:Biao;07/9808-2;00/8341.html", "google_scholar": "gqPKjaIAAAAJ;kcpNn2EAAAAJ;https://scholar.google.ch/citations?user=XTpJvCgAAAAJ", "orcid": ";0000-0002-8248-199X;0000-0002-1438-4741", "linkedin": ";;", "or_profile": "~Biao_Zhang2;~Mathias_M\u00fcller1;~Rico_Sennrich1", "aff": "Google DeepMind;University of Zurich;University of Zurich", "aff_domain": "google.com;cl.uzh.ch;uzh.ch", "position": "Researcher;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023sltunet,\ntitle={{SLTUNET}: A Simple Unified Model for Sign Language Translation},\nauthor={Biao Zhang and Mathias M{\\\"u}ller and Rico Sennrich},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=EBS4C77p_5S}\n}", "github": "", "project": "", "reviewers": "LzzH;FxDU;x36u;JikT", "pdf_size": 526591, "recommendation": "5;5;6;6", "confidence": "4;4;3;4", "correctness": "3;3;4;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "106;25;105;114", "wc_strength_and_weaknesses": "268;204;125;262", "wc_clarity_quality_novelty_and_reproducibility": "108;37;60;77", "wc_summary_review": "76;7;43;44", "wc_review": "558;273;333;497", "wc_reply_reviewers": "0;0;0;179", "wc_reply_authors": "477;614;245;869", "reply_reviewers": "0;0;0;2", "reply_authors": "2;2;2;4", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 87.5, 36.25258611464843 ], "wc_strength_and_weaknesses_avg": [ 214.75, 57.52988353890524 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.5, 25.889186931999237 ], "wc_summary_review_avg": [ 42.5, 24.41823089414956 ], "wc_review_avg": [ 415.25, 116.25483860898005 ], "wc_reply_reviewers_avg": [ 44.75, 77.50927363870726 ], "wc_reply_authors_avg": [ 551.25, 225.94509841994804 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 62, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11641902122381234166&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=EBS4C77p_5S", "email": "google.com;cl.uzh.ch;uzh.ch", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Google;University of Zurich", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.unizh.ch", "aff_unique_abbr": "DeepMind;UZH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United Kingdom;Switzerland" }, { "id": "ED2Jjms9A4H", "title": "Efficient Exploration via Fragmentation and Recall", "track": "main", "status": "Reject", "tldr": "We propose a novel framework for exploration based on fragmentation-and-recall.", "abstract": "Efficient exploration and model-building are critical for learning in large state- spaces. However, agents typically face problems like getting stuck locally during exploration and catastrophic forgetting in their construction of models when the environments are heterogeneous. Here, we propose and apply the concept of Fragmentation-and-Recall to solve spatial (FarMap) and reinforcement learning problems (FarCuriosity). Agents construct local maps or local models, respectively, which are used to predict the current observation. High surprisal points lead to a fragmentation event. At fracture points, we store the current map or model fragment in a long-term memory (LTM) and initialize a new fragment. On the other hand, Fragments are recalled (and thus reused) from LTM if the observations of their fracture points match the agent\u2019s current observation during exploration. The set of fracture points defines a set of intrinsic potential subgoals. Agents choose their next subgoal from the set of near and far potential subgoals in the current fragment or LTM, respectively. Thus, local maps and model fragments guide exploration locally and avoid catastrophic forgetting in learning heterogeneous environments, while LTM promotes exploration more globally. We evaluate FarMap and FarCuriosity on complex procedurally-generated spatial environments and on reinforcement learning benchmarks and demonstrate that the proposed methods are more efficient at exploration and memory use, and in harvesting extrinsic rewards, respectively.", "keywords": "fragmentation;recall;exploration;cognitive science;neuroscience;curiosity;reinforcement learning;spatial navigation", "primary_area": "", "supplementary_material": "/attachment/fe3a9a42c73844c670e14cc900d788d0ae108fba.zip", "author": "Jaedong Hwang;Zhang-Wei Hong;Eric R Chen;Akhilan Boopathy;Pulkit Agrawal;Ila R Fiete", "authorids": "~Jaedong_Hwang1;~Zhang-Wei_Hong1;~Eric_R_Chen1;~Akhilan_Boopathy1;~Pulkit_Agrawal1;~Ila_R_Fiete1", "gender": "M;M;;M;M;F", "homepage": "https://jd730.github.io/;;https://echen9898.github.io/;;https://people.eecs.berkeley.edu/~pulkitag/;https://fietelab.mit.edu/", "dblp": "239/1982;198/0600;;230/8358;149/2672;", "google_scholar": "https://scholar.google.co.kr/citations?user=bITgqEUAAAAJ;GZkyN4cAAAAJ;;;UpZmJI0AAAAJ;uE-CihIAAAAJ", "orcid": ";;;;;0000-0003-4738-2539", "linkedin": ";;;;;", "or_profile": "~Jaedong_Hwang1;~Zhang-Wei_Hong1;~Eric_R_Chen1;~Akhilan_Boopathy1;~Pulkit_Agrawal1;~Ila_R_Fiete1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;;mit.edu;mit.edu;mit.edu", "position": "PhD student;PhD student;;PhD student;Assistant Professor;Professor", "bibtex": "@misc{\nhwang2023efficient,\ntitle={Efficient Exploration via Fragmentation and Recall},\nauthor={Jaedong Hwang and Zhang-Wei Hong and Eric R Chen and Akhilan Boopathy and Pulkit Agrawal and Ila R Fiete},\nyear={2023},\nurl={https://openreview.net/forum?id=ED2Jjms9A4H}\n}", "github": "", "project": "", "reviewers": "m2i7;FqLm;RmS4;HdLq", "site": "https://openreview.net/forum?id=ED2Jjms9A4H", "pdf_size": 11203876, "recommendation": "5;5;5;6", "confidence": "3;4;4;3", "correctness": "3;3;2;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "3;2;1;2", "wc_summary_paper": "164;98;112;191", "wc_strength_and_weaknesses": "200;113;833;155", "wc_clarity_quality_novelty_and_reproducibility": "165;244;359;641", "wc_summary_review": "51;68;60;141", "wc_review": "580;523;1364;1128", "wc_reply_reviewers": "0;0;454;0", "wc_reply_authors": "759;1180;4229;1307", "reply_reviewers": "0;0;2;0", "reply_authors": "2;3;7;3", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 141.25, 37.81120865563543 ], "wc_strength_and_weaknesses_avg": [ 325.25, 294.7595418302858 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 352.25, 180.418090833486 ], "wc_summary_review_avg": [ 80.0, 35.72814016989969 ], "wc_review_avg": [ 898.75, 357.7019534472799 ], "wc_reply_reviewers_avg": [ 113.5, 196.58776665906757 ], "wc_reply_authors_avg": [ 1868.75, 1377.703229109956 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.75, 1.920286436967152 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LjPPd8zl4gwJ:scholar.google.com/&scioq=Efficient+Exploration+via+Fragmentation+and+Recall&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "ED3WvUgu09", "title": "Kernel Regression with Infinite-Width Neural Networks on Millions of Examples", "track": "main", "status": "Reject", "tldr": "We enable kernel regression with infinite-width neural networks at a larger scale than was previously possible to calculate scaling laws across many orders of magnitude and achieve SotA results on protein and small molecule prediction benchmarks.", "abstract": "While kernel regression remains an important practical method, its connection to neural networks as their width becomes large has initiated fresh research. These neural kernels have drastically increased performance on diverse and nonstandard data modalities but require significantly more compute, which previously limited their application to smaller datasets. We address this by massively parallelizing their computation across many GPUs. We combine this with a distributed, preconditioned conjugate gradients algorithm to enable kernel regression at a large scale (i.e. up to 5 million examples). Using this approach, we study scaling laws of several neural kernels across many orders of magnitude for the CIFAR-5m dataset. Using data augmentation to expand the original CIFAR-10 training dataset by a factor of 20, we obtain a test accuracy of 91.2\\% (SotA for a pure kernel method). Finally, we explore other data modalities, obtaining results on protein and small molecule prediction tasks that are competitive with SotA methods.\n", "keywords": "gaussian processes;neural tangent kernel;infinite-width neural networks", "primary_area": "", "supplementary_material": "", "author": "Ben Adlam;Jaehoon Lee;Shreyas Padhy;Zachary Nado;Jasper Snoek", "authorids": "~Ben_Adlam1;~Jaehoon_Lee2;~Shreyas_Padhy1;~Zachary_Nado1;~Jasper_Snoek1", "gender": "M;;M;M;M", "homepage": "http://www.benadlam.com;https://jaehlee.github.io;http://shreyaspadhy.github.io;http://zna.do;", "dblp": ";95/386-1.html;267/9851;228/7785;95/6097", "google_scholar": "Q93u3c0AAAAJ;d3YhiooAAAAJ;JxbV2R0AAAAJ;tazGc34AAAAJ;FM2DTXwAAAAJ", "orcid": ";;;;", "linkedin": ";eejaehoon/;;;", "or_profile": "~Ben_Adlam1;~Jaehoon_Lee2;~Shreyas_Padhy1;~Zachary_Nado1;~Jasper_Snoek1", "aff": "Google;Google;Microsoft Research;Google;Google", "aff_domain": "google.com;google.com;research.microsoft.com;google.com;google.com", "position": "Research Scientist;Research Scientist;Intern;Research Engineer;Research Scientist", "bibtex": "@misc{\nadlam2023kernel,\ntitle={Kernel Regression with Infinite-Width Neural Networks on Millions of Examples},\nauthor={Ben Adlam and Jaehoon Lee and Shreyas Padhy and Zachary Nado and Jasper Snoek},\nyear={2023},\nurl={https://openreview.net/forum?id=ED3WvUgu09}\n}", "github": "", "project": "", "reviewers": "wb1p;c8Ru;nR21;7cjF", "site": "https://openreview.net/forum?id=ED3WvUgu09", "pdf_size": 686040, "recommendation": "3;5;6;8", "confidence": "4;4;4;4", "correctness": "3;2;4;4", "technical_novelty": "1;1;3;3", "empirical_novelty": "2;2;4;3", "wc_summary_paper": "58;142;113;191", "wc_strength_and_weaknesses": "354;58;481;270", "wc_clarity_quality_novelty_and_reproducibility": "53;59;73;145", "wc_summary_review": "19;22;24;57", "wc_review": "484;281;691;663", "wc_reply_reviewers": "0;0;287;0", "wc_reply_authors": "448;221;496;157", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.0, 1.0 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 126.0, 48.15080477001397 ], "wc_strength_and_weaknesses_avg": [ 290.75, 153.94702822724446 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 82.5, 36.806928695559485 ], "wc_summary_review_avg": [ 30.5, 15.402921800749363 ], "wc_review_avg": [ 529.75, 164.1087672856024 ], "wc_reply_reviewers_avg": [ 71.75, 124.27464544306694 ], "wc_reply_authors_avg": [ 330.5, 144.29916839677213 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5853694070049635, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16772338741641477197&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Google;Microsoft", "aff_unique_dep": "Google;Microsoft Research", "aff_unique_url": "https://www.google.com;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Google;MSR", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "EFTpmFg9cb", "title": "GROOT: Corrective Reward Optimization for Generative Sequential Labeling", "track": "main", "status": "Reject", "tldr": "This paper proposes a novel framework for iteratively training Seq2Seq models to directly optimize a given blackbox reward metric, showing its effectiveness on sequential labeling tasks.", "abstract": "Sequential labeling is a fundamental NLP task, forming the backbone of many applications.\nSupervised learning of Seq2Seq models (like T5) has shown great success on these problems.\nHowever there remains a significant disconnect between the training objectives of these models vs the metrics and desiderata we care about in practical applications.\nFor example, a practical sequence tagging application may want to optimize for a certain precision-recall trade-off (of the top-k predictions) which is quite different from the standard objective of maximizing the likelihood of the gold labeled sequence.\nThus to bridge this gap, we propose GROOT -- a simple yet effective framework for Generative Reward Optimization Of Text sequences.\nGROOT works by training a generative sequential labeling model to match the decoder output distribution with that of the (black-box) reward function.\nUsing an iterative training regime, we first generate prediction candidates, then correct errors in them, and finally contrast those candidates (based on their reward values).\nAs demonstrated via extensive experiments on four public benchmarks, GROOT significantly improves all reward metrics.\nFurthermore, GROOT also leads to improvements of the overall decoder distribution as evidenced by the quality gains of the top-k candidates.", "keywords": "sequential labeling;reward optimization;natural language processing", "primary_area": "", "supplementary_material": "", "author": "Kazuma Hashimoto;Karthik Raman", "authorids": "~Kazuma_Hashimoto1;~Karthik_Raman1", "gender": "M;", "homepage": "http://www.logos.t.u-tokyo.ac.jp/~hassy/;", "dblp": "76/2653.html;01/7071-1", "google_scholar": "https://scholar.google.com/citations?hl=en;x1zTxLoAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Kazuma_Hashimoto1;~Karthik_Raman1", "aff": "Google Research;Google", "aff_domain": "google.com;google.com", "position": "Research Scientist;Research Scientist", "bibtex": "@misc{\nhashimoto2023groot,\ntitle={{GROOT}: Corrective Reward Optimization for Generative Sequential Labeling},\nauthor={Kazuma Hashimoto and Karthik Raman},\nyear={2023},\nurl={https://openreview.net/forum?id=EFTpmFg9cb}\n}", "github": "", "project": "", "reviewers": "oMZW;PagF;cdc3", "site": "https://openreview.net/forum?id=EFTpmFg9cb", "pdf_size": 1929876, "recommendation": "5;5;5", "confidence": "3;4;3", "correctness": "2;2;4", "technical_novelty": "3;4;3", "empirical_novelty": "3;0;3", "wc_summary_paper": "113;31;37", "wc_strength_and_weaknesses": "172;166;170", "wc_clarity_quality_novelty_and_reproducibility": "70;6;35", "wc_summary_review": "98;32;29", "wc_review": "453;235;271", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "468;332;525", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 60.333333333333336, 37.32142667274241 ], "wc_strength_and_weaknesses_avg": [ 169.33333333333334, 2.494438257849294 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.0, 26.166135875720485 ], "wc_summary_review_avg": [ 53.0, 31.843366656181317 ], "wc_review_avg": [ 319.6666666666667, 95.41954144140962 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 441.6666666666667, 80.962268303753 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14952069647858170672&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Research", "aff_unique_url": "https://research.google", "aff_unique_abbr": "Google Research", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "EGIvMUk5duH", "title": "Exploring Connections Between Memorization And Membership Inference", "track": "main", "status": "Reject", "tldr": "", "abstract": "Membership inference (MI) allows privacy adversaries to query trained machine learning models to infer if a particular data sample was used in model training. Prior work has shown that the efficacy of MI is not the same for every sample in the training dataset; they broadly attribute this behavior to various data properties such as distributional difference. However, systematically analyzing the reasons for such disparate behavior has received little attention. In this work, we investigate the cause for such a discrepancy, and observe that the reason is more subtle and fundamental. We first provide empirical insight that an MI adversary is very successful with those samples that are highly $\\textit{likely to be memorized}$, irrespective of whether the sample is from the same or a different distribution. Next, we provide a game-based formulation which lower-bounds the advantage of an adversary with the ability to determine if a sample is memorized or not, under certain assumptions made about the efficacy of the model on the memorized samples. Finally, based on our theoretical results, we present a practical instantiation of a highly effective MI attack on memorized samples.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Jihye Choi;Varun Chandrasekaran;Shruti Tople;Somesh Jha", "authorids": "~Jihye_Choi1;~Varun_Chandrasekaran1;~Shruti_Tople2;~Somesh_Jha1", "gender": ";M;;M", "homepage": "https://jihyechoi77.github.io/;http://pages.cs.wisc.edu/~chandrasekaran/;;", "dblp": "232/3097;;;j/SomeshJha", "google_scholar": "lEa3R0sAAAAJ;Sl7nSOsAAAAJ;;BaI7l8QAAAAJ", "orcid": "0009-0000-9719-3758;;;", "linkedin": "jihye-choi-a473a8148/;;;", "or_profile": "~Jihye_Choi1;~Varun_Chandrasekaran1;~Shruti_Tople2;~Somesh_Jha1", "aff": "University of Wisconsin - Madison;Microsoft;;Department of Computer Science, University of Wisconsin, Madison", "aff_domain": "wisc.edu;microsoft.com;;cs.wisc.edu", "position": "PhD student;Postdoc;;Full Professor", "bibtex": "@misc{\nchoi2023exploring,\ntitle={Exploring Connections Between Memorization And Membership Inference},\nauthor={Jihye Choi and Varun Chandrasekaran and Shruti Tople and Somesh Jha},\nyear={2023},\nurl={https://openreview.net/forum?id=EGIvMUk5duH}\n}", "github": "", "project": "", "reviewers": "eP6o;j9KE;HK7T;zop7", "site": "https://openreview.net/forum?id=EGIvMUk5duH", "pdf_size": 1938431, "recommendation": "3;3;3;6", "confidence": "4;4;2;3", "correctness": "2;3;2;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "51;45;134;80", "wc_strength_and_weaknesses": "62;281;37;229", "wc_clarity_quality_novelty_and_reproducibility": "179;10;27;15", "wc_summary_review": "465;83;152;42", "wc_review": "757;419;350;366", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "606;369;355;529", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 77.5, 35.20298282816387 ], "wc_strength_and_weaknesses_avg": [ 152.25, 104.75536979076539 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.75, 70.27579597557042 ], "wc_summary_review_avg": [ 185.5, 166.08807904241652 ], "wc_review_avg": [ 473.0, 165.9442677527609 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 464.75, 106.41046706034139 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.17407765595569782, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17894215590181843485&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Wisconsin-Madison;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.wisc.edu;https://www.microsoft.com", "aff_unique_abbr": "UW-Madison;Microsoft", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "EGobBwPc1J-", "title": "TGP: Explainable Temporal Graph Neural Networks for Personalized Recommendation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The majority of item retrieval algorithms in typical \"retrieval-rank-rerank\" structured recommendation systems can be separated into three categories: deep latent, sequential and graph-based recommenders, which collect collaborative-filtering, sequential and homogeneous signals respectively. However, there is a conceptual overlap between sequential and graph recommenders on a user's past interacted items. It triggers an idea that the sequential, collaborative-filtering and homegeneous signals can be included in one temporal graph formatted data structure, and the sequential, latent and graph learning algorithms can be summarized as one temporal graph encoder. In this paper, Temporal Graph Plugin is proposed as a such explainable temporal graph encoder to supplement deep latent algorithms with aggregated $k$-hop temporal neighborhood message via a local attention module. We conduct extensive experiments on two public datasets Reddit and Wikipedia, where TGP exceeds SOTA sequential, latent, graph algorithms by $1.1\\%$, $52.8\\%$ and $98.9\\%$ respectively, partially verifying the proposed hypothesis. Codes will be made public upon receival.", "keywords": "deep learning;graph neural networks;temporal graph;retrieval models;recommendation system", "primary_area": "", "supplementary_material": "", "author": "Fan Zhang", "authorids": "~Fan_Zhang18", "gender": "F", "homepage": "https://fanfanman.github.io", "dblp": "", "google_scholar": "", "orcid": "0000-0001-5250-1323", "linkedin": "", "or_profile": "~Fan_Zhang18", "aff": "OPPO Research Institute", "aff_domain": "oppo.com", "position": "Researcher", "bibtex": "@misc{\nzhang2023tgp,\ntitle={{TGP}: Explainable Temporal Graph Neural Networks for Personalized Recommendation},\nauthor={Fan Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=EGobBwPc1J-}\n}", "github": "", "project": "", "reviewers": "MNUN;RGXG;vSPs;3i9f", "site": "https://openreview.net/forum?id=EGobBwPc1J-", "pdf_size": 346231, "recommendation": "1;3;3;5", "confidence": "3;4;4;4", "correctness": "3;1;3;4", "technical_novelty": "2;1;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "54;39;41;34", "wc_strength_and_weaknesses": "203;38;112;36", "wc_clarity_quality_novelty_and_reproducibility": "23;170;31;35", "wc_summary_review": "21;35;46;34", "wc_review": "301;282;230;139", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 1.0897247358851685 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 42.0, 7.3824115301167 ], "wc_strength_and_weaknesses_avg": [ 97.25, 68.30583796426188 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.75, 60.91951657720209 ], "wc_summary_review_avg": [ 34.0, 8.860022573334675 ], "wc_review_avg": [ 238.0, 62.789330303802416 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0.816496580927726, "corr_recommendation_correctness": 0.3244428422615251, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3abAJtdWTHoJ:scholar.google.com/&scioq=TGP:+Explainable+Temporal+Graph+Neural+Networks+for+Personalized+Recommendation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "OPPO Research Institute", "aff_unique_dep": "", "aff_unique_url": "https://www.oppo.com/en", "aff_unique_abbr": "OPPO RI", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "EGx_FtsO1eu", "title": "MixQuant: A Quantization Bit-width Search that Can Optimize the Performance of your Quantization Method", "track": "main", "status": "Reject", "tldr": "We propose MixQuant, a search algorithm that finds the optimal custom quantization bit-width for each layer weight based on roundoff error minimization and can be combined with any quantization method as a form of pre-processing optimization.", "abstract": "Quantization is a technique for creating efficient Deep Neural Networks (DNNs), which involves performing computations and storing tensors at lower bit-widths than f32 floating point precision. Quantization reduces model size and inference latency, and therefore allows for DNNs to be deployed on platforms with constrained computational resources and real-time systems. However, quantization can lead to numerical instability caused by roundoff error which leads to inaccurate computations and therefore, a decrease in quantized model accuracy. In this paper we focus on simulated quantized inference, where the quantized model parameters are stored in low-precision, but the mathematical operations on them (e.g. matrix multiplications and additions) are performed with floating point arithmetic. This means that the DNN parameters are first quantized from f32 to, for example, int4, and then dequantized back to f32 to perform computations. We show that the roundtrip process of quantizing and dequantizing the model parameters leads to roundoff error, which may lead to numerical instability. Similarly to prior works, which have shown that both biases and activations are more sensitive to quantization and are best kept in full precision or quantized with higher bit-widths, we show that some weights are more sensitive than others which should be reflected on their quantization bit-width. To that end we propose MixQuant, a search algorithm that finds the optimal custom quantization bit-width for each layer weight based on roundoff error and can be combined with any quantization method as a form of pre-processing optimization. We show that combining MixQuant with BRECQ, a state-of-the-art quantization method, yields better quantized model accuracy than BRECQ alone. Additionally, we combine MixQuant with vanilla asymmetric quantization to show that MixQuant has the potential to optimize the performance of any quantization technique.", "keywords": "neural network quantization;rounding error;bit-width search", "primary_area": "", "supplementary_material": "", "author": "Eliska Kloberdanz;Wei Le", "authorids": "~Eliska_Kloberdanz1;~Wei_Le1", "gender": "F;F", "homepage": ";https://weile.work", "dblp": ";", "google_scholar": "p2EKvPoAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Eliska_Kloberdanz1;~Wei_Le1", "aff": "Iowa State University;Iowa State University", "aff_domain": "iastate.edu;iastate.edu", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nkloberdanz2023mixquant,\ntitle={MixQuant: A Quantization Bit-width Search that Can Optimize the Performance of your Quantization Method},\nauthor={Eliska Kloberdanz and Wei Le},\nyear={2023},\nurl={https://openreview.net/forum?id=EGx_FtsO1eu}\n}", "github": "", "project": "", "reviewers": "Kh8z;Z2i6;MUWW;6kQQ", "site": "https://openreview.net/forum?id=EGx_FtsO1eu", "pdf_size": 628002, "recommendation": "1;1;3;3", "confidence": "5;4;4;5", "correctness": "1;1;3;2", "technical_novelty": "1;2;2;2", "empirical_novelty": "1;1;2;0", "wc_summary_paper": "45;68;70;39", "wc_strength_and_weaknesses": "299;581;361;99", "wc_clarity_quality_novelty_and_reproducibility": "46;79;99;24", "wc_summary_review": "19;36;43;55", "wc_review": "409;764;573;217", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 1.75, 0.82915619758885 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 55.5, 13.683932183404009 ], "wc_strength_and_weaknesses_avg": [ 335.0, 171.88949938841523 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.0, 28.97412638890084 ], "wc_summary_review_avg": [ 38.25, 13.026415470113028 ], "wc_review_avg": [ 490.75, 201.89895368723435 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4013296551525513020&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Iowa State University", "aff_unique_dep": "", "aff_unique_url": "https://www.iastate.edu", "aff_unique_abbr": "ISU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "EHi_B2stiNs", "title": "Change Detection for bi-temporal images classification based on Siamese Variational AutoEncoder and Transfer Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Siamese structures empower Deep Learning (DL) models to increase their efficiency by learning how to extract the relevant temporal features from the input data. In this paper, a Siamese Variational Auto-Encoder (VAE) model based on transfer learning (TL) is applied for change detection (CD) using bi-temporal images. The introduced method is trained in a supervised strategy for classification tasks. Firstly, the suggested generative method utilizes two VAEs to extract features from bi-temporal images. Subsequently, concatenates them into a feature vector. To get a classification map of the source scene, the classifier receives this vector and the ground truth data as input. The source model is fine-tuned to be applied to the target scene with less ground truth data using a TL strategy. Experiments were carried out in two study areas in the arid regions of southern Tunisia. The obtained results reveal that the proposed method outperformed the Siamese Convolution Neural Network (SCNN) by achieving an accuracy of more than 98%, in the source scene, and increased the accuracy in the target scene by 1.25% by applying the TL strategy.", "keywords": "Feature extraction;Variational Auto-Encoder;Change Detection;Siamese structure;Transfer Learning;Desertification", "primary_area": "", "supplementary_material": "", "author": "Chouikhi Farah;Ali Ben Abbes;Imed Riadh Farah", "authorids": "~Chouikhi_Farah1;ali.benabbes@yahoo.fr;imedriadh.farah@isamm.uma.tn", "gender": "M;;", "homepage": ";;", "dblp": ";;", "google_scholar": "1-Rvb1kAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Chouikhi_Farah1;ali.benabbes@yahoo.fr;imedriadh.farah@isamm.uma.tn", "aff": "National School of Computer Science, Manouba, Tunisia;;", "aff_domain": "ensi.rnu.tn;;", "position": "PhD student;;", "bibtex": "@misc{\nfarah2023change,\ntitle={Change Detection for bi-temporal images classification based on Siamese Variational AutoEncoder and Transfer Learning},\nauthor={Chouikhi Farah and Ali Ben Abbes and Imed Riadh Farah},\nyear={2023},\nurl={https://openreview.net/forum?id=EHi_B2stiNs}\n}", "github": "", "project": "", "reviewers": "CkES;Zx8V;8FMz;G4jm", "site": "https://openreview.net/forum?id=EHi_B2stiNs", "pdf_size": 1760711, "recommendation": "1;3;3;3", "confidence": "1;5;4;3", "correctness": "1;3;3;1", "technical_novelty": "1;1;2;1", "empirical_novelty": "0;1;2;0", "wc_summary_paper": "8;87;97;45", "wc_strength_and_weaknesses": "8;77;399;184", "wc_clarity_quality_novelty_and_reproducibility": "8;132;95;55", "wc_summary_review": "8;17;35;29", "wc_review": "32;313;626;313", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 1.479019945774904 ], "correctness_avg": [ 2.0, 1.0 ], "technical_novelty_avg": [ 1.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 0.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 59.25, 35.442735503908274 ], "wc_strength_and_weaknesses_avg": [ 167.0, 147.89692356502889 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 72.5, 46.13296001775737 ], "wc_summary_review_avg": [ 22.25, 10.473180032826706 ], "wc_review_avg": [ 321.0, 210.1630319537668 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.8783100656536799, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WOgADFWnXrYJ:scholar.google.com/&scioq=Change+Detection+for+bi-temporal+images+classification+based+on+Siamese+Variational+AutoEncoder+and+Transfer+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "National School of Computer Science", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "", "aff_campus_unique_index": "0", "aff_campus_unique": "Manouba", "aff_country_unique_index": "0", "aff_country_unique": "Tunisia" }, { "title": "TDR-CL: Targeted Doubly Robust Collaborative Learning for Debiased Recommendations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11221", "id": "EIgLnNx_lC", "poster": "", "openreview": "https://openreview.net/forum?id=EIgLnNx_lC", "slides": "https://iclr.cc/virtual/2023/poster/11221", "video": "https://iclr.cc/virtual/2023/poster/11221", "author_site": "Haoxuan Li, Yan Lyu, Chunyuan Zheng, Peng Wu", "tldr": "This paper proposes a principled approach that can effectively reduce the bias and variance simultaneously compared to existing DR estimators for debiased recommendations.", "abstract": "Bias is a common problem inherent in recommender systems, which is entangled with users' preferences and poses a great challenge to unbiased learning. For debiasing tasks, the doubly robust (DR) method and its variants show superior performance due to the double robustness property, that is, DR is unbiased when either imputed errors or learned propensities are accurate.\nHowever, our theoretical analysis reveals that DR usually has a large variance. Meanwhile, DR would suffer unexpectedly large bias and poor generalization caused by inaccurate imputed errors and learned propensities, which usually occur in practice. In this paper, we propose a principled approach that can effectively reduce the bias and variance simultaneously for existing DR approaches when the error imputation model is misspecified. In addition, we further propose a novel semi-parametric collaborative learning approach that decomposes imputed errors into parametric and nonparametric parts and updates them collaboratively, resulting in more accurate predictions. Both theoretical analysis and experiments demonstrate the superiority of the proposed methods compared with existing debiasing methods.", "keywords": "Recommender System;Bias;Debias;Doubly Robust", "primary_area": "", "supplementary_material": "/attachment/35e78512a10f9b143be2befc26eb150e502394f3.zip", "author": "Haoxuan Li;Yan Lyu;Chunyuan Zheng;Peng Wu", "authorids": "~Haoxuan_Li6;~Yan_Lyu1;~Chunyuan_Zheng1;~Peng_Wu5", "gender": "M;;M;M", "homepage": "https://haoxuanli-pku.github.io/;https://github.com/lyuyan9527;;https://pengwu.site/", "dblp": "145/4965-1.html;;;15/6146-12", "google_scholar": "gtDqiucAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?view_op=list_works", "orcid": "0000-0003-3620-3769;;0000-0002-0306-7310;0000-0001-7154-8880", "linkedin": ";;;", "or_profile": "~Haoxuan_Li6;~Yan_Lyu1;~Chunyuan_Zheng1;~Peng_Wu5", "aff": "Peking University;Peking University;Department of Computer Science, University of Illinois at Urbana-Champaign;Beijing Technology and Business University", "aff_domain": "pku.edu.cn;pku.edu.cn;cs.illinois.edu;btbu.edu.cn", "position": "PhD student;MS student;MS student;Associate Professor", "bibtex": "@inproceedings{\nli2023tdrcl,\ntitle={{TDR}-{CL}: Targeted Doubly Robust Collaborative Learning for Debiased Recommendations},\nauthor={Haoxuan Li and Yan Lyu and Chunyuan Zheng and Peng Wu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=EIgLnNx_lC}\n}", "github": "", "project": "", "reviewers": "Z8P2;k4cv;VJ5g", "pdf_size": 671609, "recommendation": "6;6;8", "confidence": "4;3;3", "correctness": "3;4;4", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "181;75;183", "wc_strength_and_weaknesses": "243;218;176", "wc_clarity_quality_novelty_and_reproducibility": "10;87;53", "wc_summary_review": "14;25;35", "wc_review": "448;405;447", "wc_reply_reviewers": "0;35;44", "wc_reply_authors": "458;1023;704", "reply_reviewers": "0;1;1", "reply_authors": "1;2;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 146.33333333333334, 50.446891766380304 ], "wc_strength_and_weaknesses_avg": [ 212.33333333333334, 27.644569488820444 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.0, 31.506613062445584 ], "wc_summary_review_avg": [ 24.666666666666668, 8.576453553512405 ], "wc_review_avg": [ 433.3333333333333, 20.038851153585515 ], "wc_reply_reviewers_avg": [ 26.333333333333332, 18.979521127315678 ], "wc_reply_authors_avg": [ 728.3333333333334, 231.30115049913223 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6950923458653340196&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=EIgLnNx_lC", "email": "pku.edu.cn;pku.edu.cn;cs.illinois.edu;btbu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Peking University;University of Illinois Urbana-Champaign;Beijing Technology and Business University", "aff_unique_dep": ";Department of Computer Science;", "aff_unique_url": "http://www.pku.edu.cn;https://illinois.edu;http://www.btbu.edu.cn", "aff_unique_abbr": "Peking U;UIUC;BTBU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "EJPWfoJRba", "title": "On the Importance of the Policy Structure in Offline Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "We introduce a structure in a policy representation in offline reinforcement learning, which reduces the critic loss during the training and improves the resulting policy performance. ", "abstract": "Offline reinforcement learning (RL) has attracted a great deal of attention recently as an approach to utilizing past experience to learn a policy. Recent studies have reported the challenges of offline RL, such as estimating the values of actions that are out of the data distribution. To mitigate the issues of offline RL, we propose an algorithm that leverages a mixture of deterministic policies. With our framework, the state-action space is divided by learning discrete latent variables, and sub-policies corresponding to each region are trained. The proposed algorithm, which we call Value-Weighted Variational Auto-Encoder (V2AE), is derived by considering the variational lower bound of the offline RL objective function. The aim of this work is to shed lights on the importance on the policy structure in offline RL. We show empirically that the use of the proposed mixture policy can reduce the accumulation of the approximation error in offline RL, which was reported in previous studies. Experimental results also indicate that introducing the policy structure improves the performance on tasks with D4RL benchmarking datasets.", "keywords": "offline reinforcement learning;discrete latent representations", "primary_area": "", "supplementary_material": "", "author": "Takayuki Osa;Akinobu Hayashi;Pranav Deo;Naoki Morihira;Takahide Yoshiike", "authorids": "~Takayuki_Osa1;~Akinobu_Hayashi1;~Pranav_Deo1;naoki_morihira@jp.honda;takahide_yoshiike@jp.honda", "gender": "M;;M;;", "homepage": ";;;;", "dblp": "27/1571;122/0776;;;", "google_scholar": "https://scholar.google.co.jp/citations?user=LqVev6MAAAAJ;jZMpSNwAAAAJ;;;", "orcid": ";;;;", "linkedin": ";;pranavdeo99;;", "or_profile": "~Takayuki_Osa1;~Akinobu_Hayashi1;~Pranav_Deo1;naoki_morihira@jp.honda;takahide_yoshiike@jp.honda", "aff": "The University of Tokyo;Honda R&D Co.,Ltd.;Honda R&D Co., Ltd.;;", "aff_domain": "u-tokyo.ac.jp;jp.honda;jp.honda;;", "position": "Associate Professor;Assistant Chief Engineer;Researcher;;", "bibtex": "@misc{\nosa2023on,\ntitle={On the Importance of the Policy Structure in Offline Reinforcement Learning},\nauthor={Takayuki Osa and Akinobu Hayashi and Pranav Deo and Naoki Morihira and Takahide Yoshiike},\nyear={2023},\nurl={https://openreview.net/forum?id=EJPWfoJRba}\n}", "github": "", "project": "", "reviewers": "NNyS;chjv;RQRe;7tms", "site": "https://openreview.net/forum?id=EJPWfoJRba", "pdf_size": 2103806, "recommendation": "3;6;6;8", "confidence": "3;3;3;4", "correctness": "3;3;2;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "23;206;149;62", "wc_strength_and_weaknesses": "175;141;288;518", "wc_clarity_quality_novelty_and_reproducibility": "12;186;2;26", "wc_summary_review": "24;87;23;59", "wc_review": "234;620;462;665", "wc_reply_reviewers": "0;0;0;207", "wc_reply_authors": "1937;376;2234;1328", "reply_reviewers": "0;0;0;4", "reply_authors": "3;2;5;4", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 110.0, 71.78091668403239 ], "wc_strength_and_weaknesses_avg": [ 280.5, 147.5237269051999 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.5, 75.25124583686306 ], "wc_summary_review_avg": [ 48.25, 26.65872277510684 ], "wc_review_avg": [ 495.25, 168.62291510942396 ], "wc_reply_reviewers_avg": [ 51.75, 89.6336292916894 ], "wc_reply_authors_avg": [ 1468.75, 710.4186705739088 ], "reply_reviewers_avg": [ 1.0, 1.7320508075688772 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.7276068751089989, "corr_recommendation_correctness": -0.08084520834544431, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cIGYowL6ruEJ:scholar.google.com/&scioq=On+the+Importance+of+the+Policy+Structure+in+Offline+Reinforcement+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Tokyo;Honda R&D Co., Ltd.", "aff_unique_dep": ";", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.honda.com", "aff_unique_abbr": "UTokyo;Honda", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "id": "EJka_dVXEcr", "title": "TabDDPM: Modelling Tabular Data with Diffusion Models", "track": "main", "status": "Reject", "tldr": "Proposed a new state-of-the-art approach for tabular data generation using diffusion models", "abstract": "Denoising diffusion probabilistic models are currently becoming the leading paradigm of generative modeling for many important data modalities. Being the most prevalent in the computer vision community, diffusion models have also recently gained some attention for other domains, including speech, NLP, and graph-like data. In this work, we investigate if the framework of diffusion models can be advantageous for general tabular problems, where datapoints are typically represented by vectors of heterogeneous features. The inherent heterogeneity of tabular data makes it quite challenging for accurate modeling, since the individual features can be of completely different nature, i.e., some of them can be continuous and some of them can be discrete. To address such data types, we introduce TabDDPM --- a diffusion model that can be universally applied to any tabular dataset and handles any types of features. We extensively evaluate TabDDPM on a wide set of benchmarks and demonstrate its superiority over existing GAN/VAE alternatives, which is consistent with the advantage of diffusion models in other fields. Additionally, we show that TabDDPM can be successfully used in privacy-oriented setups, where the original datapoints cannot be shared.", "keywords": "tabular data;diffusion models;generative modelling", "primary_area": "", "supplementary_material": "", "author": "Akim Kotelnikov;Dmitry Baranchuk;Ivan Rubachev;Artem Babenko", "authorids": "~Akim_Kotelnikov1;~Dmitry_Baranchuk2;~Ivan_Rubachev1;~Artem_Babenko1", "gender": "M;M;M;M", "homepage": ";;https://github.com/puhsu;", "dblp": "330/3738;215/3712;295/9535;117/4834", "google_scholar": "https://scholar.google.com/citations?hl=en;NiPmk8oAAAAJ;;q885d1wAAAAJ", "orcid": ";0000-0001-7660-3666;;0000-0002-1830-8252", "linkedin": "akimkot;;;", "or_profile": "~Akim_Kotelnikov1;~Dmitry_Baranchuk2;~Ivan_Rubachev1;~Artem_Babenko1", "aff": "Higher School of Economics, Higher School of Economics;Higher School of Economics;Higher School of Economics;Yandex", "aff_domain": "edu.hse.ru;hse.ru;hse.ru;yandex-team.ru", "position": "MS student;PhD student;PhD student;Researcher", "bibtex": "@misc{\nkotelnikov2023tabddpm,\ntitle={Tab{DDPM}: Modelling Tabular Data with Diffusion Models},\nauthor={Akim Kotelnikov and Dmitry Baranchuk and Ivan Rubachev and Artem Babenko},\nyear={2023},\nurl={https://openreview.net/forum?id=EJka_dVXEcr}\n}", "github": "", "project": "", "reviewers": "d279;8b5K;qTDQ;x66q", "site": "https://openreview.net/forum?id=EJka_dVXEcr", "pdf_size": 512026, "recommendation": "1;3;3;5", "confidence": "5;2;3;3", "correctness": "1;1;3;3", "technical_novelty": "1;3;2;2", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "25;95;34;50", "wc_strength_and_weaknesses": "185;279;483;119", "wc_clarity_quality_novelty_and_reproducibility": "13;164;97;8", "wc_summary_review": "18;25;47;51", "wc_review": "241;563;661;228", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "327;553;570;228", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "correctness_avg": [ 2.0, 1.0 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 51.0, 26.93510720231126 ], "wc_strength_and_weaknesses_avg": [ 266.5, 137.31988202733064 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.5, 64.53100030218035 ], "wc_summary_review_avg": [ 35.25, 14.042346669983617 ], "wc_review_avg": [ 423.25, 191.95881719785626 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 419.5, 146.3736656642854 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.6488856845230502, "corr_recommendation_correctness": 0.7071067811865476, "gs_citation": 323, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9575407485572097634&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Higher School of Economics;Yandex", "aff_unique_dep": ";", "aff_unique_url": "https://www.hse.ru;https://yandex.com", "aff_unique_abbr": "HSE;Yandex", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Russian Federation" }, { "id": "EKdBD-1qHW6", "title": "Implicit regularization via Spectral Neural Networks and non-linear matrix sensing", "track": "main", "status": "Reject", "tldr": "", "abstract": "The phenomenon of \\textit{implicit regularization} has attracted interest in the recent years as a fundamental aspect of the remarkable generalizing ability of neural networks. In a nutshell, it entails that gradient flow dynamics in many neural nets, even without any explicit regularizer in the loss function, converges to the solution of a regularized learning problem. However, known results attempting to theoretically explain this phenomenon focus overwhelmingly on the setting of linear neural nets, and the simplicity of the linear structure is particularly crucial to existing arguments. In this paper, we explore this problem in the context of more realistic neural networks with a general class of non-linear activation functions, and rigorously demonstrate the implicit regularization phenomenon for such networks in the setting of matrix sensing problems. This is coupled with rigorous rate guarantees that ensure exponentially fast convergence of gradient descent, complemented by matching lower bounds which stipulate that the exponential rate is the best achievable. In this vein, we contribute a network architecture called Spectral Neural Networks (\\textit{abbrv.} SNN) that is particularly suitable for matrix learning problems. Conceptually, this entails coordinatizing the space of matrices by their singular values and singular vectors, as opposed to by their entries, a potentially fruitful perspective for matrix learning. We demonstrate that the SNN architecture is inherently much more amenable to theoretical analysis than vanilla neural nets and confirm its effectiveness in the context of matrix sensing, supported via both mathematical guarantees and empirical investigations. We believe that the SNN architecture has the potential to be of wide applicability in a broad class of matrix learning scenarios.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/2bb4fb7ab8aec398abf513ced16cc6756c787a8e.zip", "author": "Subhroshekhar Ghosh;Thanh Lam;Soumendu Sundar Mukherjee", "authorids": "~Subhroshekhar_Ghosh1;~Thanh_Lam1;~Soumendu_Sundar_Mukherjee2", "gender": ";M;M", "homepage": "https://subhro-ghosh.github.io/;;https://soumendu041.gitlab.io/", "dblp": ";280/1674;", "google_scholar": "RpGHEzsAAAAJ;https://scholar.google.com/citations?hl=en;DctLHfsAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Subhroshekhar_Ghosh1;~Thanh_Lam1;~Soumendu_Mukherjee1", "aff": "National University of Singapore;National University of Singapore;Indian Statistical Institute", "aff_domain": "nus.edu.sg;nus.edu.sg;isical.ac.in", "position": "Assistant Professor;PhD student;Assistant Professor", "bibtex": "@misc{\nghosh2023implicit,\ntitle={Implicit regularization via Spectral Neural Networks and non-linear matrix sensing},\nauthor={Subhroshekhar Ghosh and Thanh Lam and Soumendu Sundar Mukherjee},\nyear={2023},\nurl={https://openreview.net/forum?id=EKdBD-1qHW6}\n}", "github": "", "project": "", "reviewers": "p8b1;Ajzu;xZxU;yAZt", "site": "https://openreview.net/forum?id=EKdBD-1qHW6", "pdf_size": 2493322, "recommendation": "3;6;6;8", "confidence": "4;3;4;3", "correctness": "2;3;4;3", "technical_novelty": "2;4;3;3", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "34;37;91;79", "wc_strength_and_weaknesses": "286;96;97;262", "wc_clarity_quality_novelty_and_reproducibility": "42;19;33;25", "wc_summary_review": "36;22;43;71", "wc_review": "398;174;264;437", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1070;523;845;419", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;2;1", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 60.25, 25.13339412017406 ], "wc_strength_and_weaknesses_avg": [ 185.25, 89.15541206230837 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.75, 8.642193008721803 ], "wc_summary_review_avg": [ 43.0, 17.84656829757475 ], "wc_review_avg": [ 318.25, 105.13414050630746 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 714.25, 258.5549989847421 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.7001400420140049, "corr_recommendation_correctness": 0.5940885257860046, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7369967003808495292&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;1", "aff_unique_norm": "National University of Singapore;Indian Statistical Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.isical.ac.in", "aff_unique_abbr": "NUS;ISI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Singapore;India" }, { "title": "SimPer: Simple Self-Supervised Learning of Periodic Targets", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11414", "id": "EKpMeEV0hOo", "poster": "/media/PosterPDFs/ICLR%202023/11414.png?t=1682623392.3937404", "openreview": "https://openreview.net/forum?id=EKpMeEV0hOo", "slides": "https://iclr.cc/virtual/2023/poster/11414", "video": "https://iclr.cc/virtual/2023/poster/11414", "author_site": "Yuzhe Yang, Xin Liu, Jiang Wu, Silviu Borac, Dina Katabi, Ming-Zher Poh, Daniel McDuff", "tldr": "A simple contrastive self-supervised framework for learning periodic targets and tasks.", "abstract": "From human physiology to environmental evolution, important processes in nature often exhibit meaningful and strong periodic or quasi-periodic changes. Due to their inherent label scarcity, learning useful representations for periodic tasks with limited or no supervision is of great benefit. Yet, existing self-supervised learning (SSL) methods overlook the intrinsic periodicity in data, and fail to learn representations that capture periodic or frequency attributes. In this paper, we present SimPer, a simple contrastive SSL regime for learning periodic information in data. To exploit the periodic inductive bias, SimPer introduces customized augmentations, feature similarity measures, and a generalized contrastive loss for learning efficient and robust periodic representations. Extensive experiments on common real-world tasks in human behavior analysis, environmental sensing, and healthcare domains verify the superior performance of SimPer compared to state-of-the-art SSL methods, highlighting its intriguing properties including better data efficiency, robustness to spurious correlations, and generalization to distribution shifts.", "keywords": "Periodic learning;Self-supervised learning;Representation learning;Periodic targets;Periodicity", "primary_area": "", "supplementary_material": "", "author": "Yuzhe Yang;Xin Liu;Jiang Wu;Silviu Borac;Dina Katabi;Ming-Zher Poh;Daniel McDuff", "authorids": "~Yuzhe_Yang1;~Xin_Liu8;wujiang@google.com;sborac@google.com;~Dina_Katabi1;~Ming-Zher_Poh2;~Daniel_McDuff1", "gender": "M;M;;;;M;M", "homepage": "https://people.csail.mit.edu/yuzhe/;https://homes.cs.washington.edu/~xliu0/;;;;;http://alumni.media.mit.edu/~djmcduff/", "dblp": "213/0962;76/1820-61;;;k/DinaKatabi;17/8870;63/9606", "google_scholar": "0_bSbIoAAAAJ;p9F83HoAAAAJ;;;;HhErKoYAAAAJ;m7Jr-b4AAAAJ", "orcid": "0000-0002-7634-8295;;;;;0000-0002-3510-1923;", "linkedin": "yuzhe-yang-6809b2131/;;;;;mingzher;", "or_profile": "~Yuzhe_Yang1;~Xin_Liu8;wujiang@google.com;sborac@google.com;~Dina_Katabi1;~Ming-Zher_Poh2;~Daniel_McDuff1", "aff": "Massachusetts Institute of Technology;Department of Computer Science, University of Washington;;;Massachusetts Institute of Technology;Google;Google", "aff_domain": "mit.edu;cs.washington.edu;;;mit.edu;google.com;google.com", "position": "PhD student;PhD student;;;Full Professor;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nyang2023simper,\ntitle={SimPer: Simple Self-Supervised Learning of Periodic Targets},\nauthor={Yuzhe Yang and Xin Liu and Jiang Wu and Silviu Borac and Dina Katabi and Ming-Zher Poh and Daniel McDuff},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=EKpMeEV0hOo}\n}", "github": "", "project": "", "reviewers": "y54W;DG5c;pX9g", "pdf_size": 6017081, "recommendation": "8;8;10", "confidence": "4;2;3", "correctness": "3;4;4", "technical_novelty": "3;3;4", "empirical_novelty": "4;3;3", "wc_summary_paper": "175;91;110", "wc_strength_and_weaknesses": "340;230;249", "wc_clarity_quality_novelty_and_reproducibility": "116;57;260", "wc_summary_review": "58;34;34", "wc_review": "689;412;653", "wc_reply_reviewers": "26;221;66", "wc_reply_authors": "1897;3864;1583", "reply_reviewers": "1;2;1", "reply_authors": "4;8;3", "recommendation_avg": [ 8.666666666666666, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 125.33333333333333, 35.96603335865043 ], "wc_strength_and_weaknesses_avg": [ 273.0, 48.00694394217014 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 144.33333333333334, 85.2616886740789 ], "wc_summary_review_avg": [ 42.0, 11.313708498984761 ], "wc_review_avg": [ 584.6666666666666, 122.97515557578649 ], "wc_reply_reviewers_avg": [ 104.33333333333333, 84.09650541028576 ], "wc_reply_authors_avg": [ 2448.0, 1009.4358160213391 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 5.0, 2.160246899469287 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12120474438296402027&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=EKpMeEV0hOo", "email": "mit.edu;cs.washington.edu;;;mit.edu;google.com;google.com", "author_num": 7, "aff_unique_index": "0;1;0;2;2", "aff_unique_norm": "Massachusetts Institute of Technology;University of Washington;Google", "aff_unique_dep": ";Department of Computer Science;Google", "aff_unique_url": "https://web.mit.edu;https://www.washington.edu;https://www.google.com", "aff_unique_abbr": "MIT;UW;Google", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Seattle;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "ELmZduELxm", "title": "UTS: When Monotonic Value Factorisation Meets Non-monotonic and Stochastic Targets", "track": "main", "status": "Reject", "tldr": "We propose a novel value factorisation method to deal with non-monotonic and stochastic target joint action-values. ", "abstract": "Extracting decentralised policies from joint action-values is an attractive way to exploit centralised learning. It is possible to apply monotonic value factorisation to guarantee consistency between the centralised and decentralised policies. However, the best strategy for training decentralised policies when the target joint action-values are non-monotonic and stochastic is still unclear. We propose a novel value factorisation method named uncertainty-based target shaping (UTS) to solve this problem. UTS employs networks that estimate the reward and the following state's embedding, where the large prediction error indicates that the target is stochastic. By replacing deterministic targets for the suboptimal with the best per-agent values, we enforce that all shaped targets become a subset of the space that can be represented by monotonic value factorisation. Empirical results show that UTS outperforms state-of-the-art baselines on multiple benchmarks, including matrix games, predator-prey, and challenging tasks in the StarCraft II micromanagement.", "keywords": "Value Decomposition;Multi-Agent Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Zeyang Liu;Lipeng Wan;Xue Sui;Xingyu Chen;Xuguang Lan", "authorids": "~Zeyang_Liu2;~Lipeng_Wan1;~Xue_Sui1;~Xingyu_Chen2;~Xuguang_Lan2", "gender": "M;M;F;M;", "homepage": ";http://gr.xjtu.edu.cn/web/zeuslan/team;jsessionid=F923495DAAA043708B20337E681673E5;https://gr.xjtu.edu.cn/web/zeuslan/team;;", "dblp": ";377/4923.html;;;", "google_scholar": "YOOlkJoAAAAJ;;;https://scholar.google.com.hk/citations?user=LR76K-MAAAAJ;", "orcid": "0000-0002-3110-8618;;;0000-0002-5226-963X;", "linkedin": ";;;;", "or_profile": "~Zeyang_Liu2;~Lipeng_Wan1;~Xue_Sui1;~Xingyu_Chen2;~Xuguang_Lan2", "aff": "Xi'an Jiaotong University;Xi'an Jiaotong University;Xi'an Jiaotong University;Xi'an Jiaotong University;", "aff_domain": "xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;", "position": "PhD student;PhD student;MS student;Assistant Professor;", "bibtex": "@misc{\nliu2023uts,\ntitle={{UTS}: When Monotonic Value Factorisation Meets Non-monotonic and Stochastic Targets},\nauthor={Zeyang Liu and Lipeng Wan and Xue Sui and Xingyu Chen and Xuguang Lan},\nyear={2023},\nurl={https://openreview.net/forum?id=ELmZduELxm}\n}", "github": "", "project": "", "reviewers": "9h2R;gapB;kwZB", "site": "https://openreview.net/forum?id=ELmZduELxm", "pdf_size": 5773979, "recommendation": "3;3;6", "confidence": "4;5;4", "correctness": "2;2;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;0", "wc_summary_paper": "37;33;178", "wc_strength_and_weaknesses": "413;346;204", "wc_clarity_quality_novelty_and_reproducibility": "62;94;275", "wc_summary_review": "66;10;18", "wc_review": "578;483;675", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1248;1032;569", "reply_reviewers": "0;0;0", "reply_authors": "2;2;1", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 82.66666666666667, 67.4306227828936 ], "wc_strength_and_weaknesses_avg": [ 321.0, 87.13590916876157 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 143.66666666666666, 93.78106892592389 ], "wc_summary_review_avg": [ 31.333333333333332, 24.72964932132188 ], "wc_review_avg": [ 578.6666666666666, 78.38508928503062 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 949.6666666666666, 283.24822251084925 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:f_Z0m3mn2-wJ:scholar.google.com/&scioq=UTS:+When+Monotonic+Value+Factorisation+Meets+Non-monotonic+and+Stochastic+Targets&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Xi'an Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.xjtu.edu.cn", "aff_unique_abbr": "XJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Disentangling Learning Representations with Density Estimation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11547", "id": "EMvG1Jdhw_8", "poster": "/media/PosterPDFs/ICLR%202023/11547.png?t=1681909405.5342534", "openreview": "https://openreview.net/forum?id=EMvG1Jdhw_8", "slides": "https://iclr.cc/virtual/2023/poster/11547", "video": "https://iclr.cc/virtual/2023/poster/11547", "author_site": "Eric Yeats, Frank Liu, Hai Li", "tldr": "We present GCAE, a scalable disentanglement method that uses the dual total correlation criterion", "abstract": "Disentangled learning representations have promising utility in many applications, but they currently suffer from serious reliability issues. We present Gaussian Channel Autoencoder (GCAE), a method which achieves reliable disentanglement via scalable non-parametric density estimation of the latent space. GCAE avoids the curse of dimensionality of density estimation by disentangling subsets of its latent space with the Dual Total Correlation (DTC) metric, thereby representing its high-dimensional latent joint distribution as a collection of many low-dimensional conditional distributions. In our experiments, GCAE achieves highly competitive and reliable disentanglement scores compared with state-of-the-art baselines.", "keywords": "autoencoder;representation learning;disentanglement;density estimation", "primary_area": "", "supplementary_material": "", "author": "Eric Yeats;Frank Y Liu;Hai Li", "authorids": "~Eric_Yeats1;~Frank_Y_Liu1;~Hai_Li1", "gender": "M;;F", "homepage": ";;https://ece.duke.edu/faculty/hai-helen-li", "dblp": "296/9361;18/2008.html;30/5330-1", "google_scholar": ";v69y--0AAAAJ;E6Tpfq8AAAAJ", "orcid": ";0000-0001-6615-0739;0000-0003-3228-6544", "linkedin": "eric-yeats-4a7132134/;;", "or_profile": "~Eric_Yeats1;~Frank_Y_Liu1;~Hai_Li1", "aff": "Duke University;Oak Ridge National Laboratory;Duke University", "aff_domain": "duke.edu;ornl.gov;duke.edu", "position": "PhD student;Principal Researcher;Professor", "bibtex": "@inproceedings{\nyeats2023disentangling,\ntitle={Disentangling Learning Representations with Density Estimation},\nauthor={Eric Yeats and Frank Y Liu and Hai Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=EMvG1Jdhw_8}\n}", "github": "", "project": "", "reviewers": "cTbH;qWQn;atH7;d23T", "pdf_size": 4973452, "recommendation": "5;6;6;6", "confidence": "3;4;3;5", "correctness": "3;3;3;4", "technical_novelty": "3;3;2;2", "empirical_novelty": "3;4;2;2", "wc_summary_paper": "127;133;53;47", "wc_strength_and_weaknesses": "373;395;243;362", "wc_clarity_quality_novelty_and_reproducibility": "108;43;42;128", "wc_summary_review": "43;128;38;37", "wc_review": "651;699;376;574", "wc_reply_reviewers": "0;402;26;0", "wc_reply_authors": "304;1311;127;238", "reply_reviewers": "0;1;1;0", "reply_authors": "1;3;1;1", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 90.0, 40.11234224026316 ], "wc_strength_and_weaknesses_avg": [ 343.25, 59.08627167117587 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 80.25, 38.408169703853375 ], "wc_summary_review_avg": [ 61.5, 38.46101922726437 ], "wc_review_avg": [ 575.0, 123.24163257600898 ], "wc_reply_reviewers_avg": [ 107.0, 170.6487620816512 ], "wc_reply_authors_avg": [ 495.0, 475.3446118344038 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5222329678670935, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7649154234806174544&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=EMvG1Jdhw_8", "email": "duke.edu;ornl.gov;duke.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Duke University;Oak Ridge National Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.duke.edu;https://www.ornl.gov", "aff_unique_abbr": "Duke;ORNL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "EN8YE5dkOO", "title": "Cold Rao-Blackwellized Straight-Through Gumbel-Softmax Gradient Estimator", "track": "main", "status": "Reject", "tldr": "Improved gradient estimator for categorical random variables by finding the zero temperature limit of the Rao-Blackwellized Straight-Through Gumbel-Softmax Gradient Estimator", "abstract": "The problem of estimating the gradient of an expectation in discrete random variables arises in many applications: learning with discrete latent representations, training neural networks with quantized weights, activations, conditional blocks, etc. \nThis work contributes to the development of the popular Gumbel-Softmax family of estimator, which is based on approximating argmax with a temperature-parametrized softmax. The state-of-the art in this family, the Gumbel-Rao estimator uses internal MC samples to reduce the variance.\nWe show that in the limit of zero temperature the internal integration has a closed form solution. The limit estimator, called ZGR, has a favorable bias and variance, is simple to implement and computationally inexpensive and is obviously free of the temperature hyperparameter. Furthermore, ZGR is unbiased for the class of quadratic functions of categorical variables and can be decomposed into a sum of two simple but not very well performing on their own estimators: the straight through estimator and the DARN estimator. Experiments thoroughly validate the method.", "keywords": "Gumbel-Softmax;categorical variables;Concrete distribution;gradient;straight-through;VAE;quantization", "primary_area": "", "supplementary_material": "", "author": "Alexander Shekhovtsov", "authorids": "~Alexander_Shekhovtsov1", "gender": "M", "homepage": "http://cmp.felk.cvut.cz/~shekhovt/", "dblp": "61/5386", "google_scholar": "https://scholar.google.cz/citations?hl=en", "orcid": "", "linkedin": "", "or_profile": "~Alexander_Shekhovtsov1", "aff": "Czech Technical University in Prague", "aff_domain": "cvut.cz", "position": "Assistant Professor", "bibtex": "@misc{\nshekhovtsov2023cold,\ntitle={Cold Rao-Blackwellized Straight-Through Gumbel-Softmax Gradient Estimator},\nauthor={Alexander Shekhovtsov},\nyear={2023},\nurl={https://openreview.net/forum?id=EN8YE5dkOO}\n}", "github": "", "project": "", "reviewers": "GHDv;PG9N;Waws;wSJz", "site": "https://openreview.net/forum?id=EN8YE5dkOO", "pdf_size": 1061241, "recommendation": "3;3;6;8", "confidence": "3;2;3;3", "correctness": "3;3;4;3", "technical_novelty": "3;2;2;4", "empirical_novelty": "3;1;2;2", "wc_summary_paper": "87;25;74;74", "wc_strength_and_weaknesses": "55;77;327;729", "wc_clarity_quality_novelty_and_reproducibility": "105;54;139;37", "wc_summary_review": "41;35;107;66", "wc_review": "288;191;647;906", "wc_reply_reviewers": "96;835;410;0", "wc_reply_authors": "762;2780;706;1021", "reply_reviewers": "1;4;1;0", "reply_authors": "2;5;2;2", "recommendation_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 65.0, 23.695991222145572 ], "wc_strength_and_weaknesses_avg": [ 297.0, 271.3337428334338 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.75, 40.54241606022019 ], "wc_summary_review_avg": [ 62.25, 28.331740151286155 ], "wc_review_avg": [ 508.0, 285.7507655282834 ], "wc_reply_reviewers_avg": [ 335.25, 325.9489032041679 ], "wc_reply_authors_avg": [ 1317.25, 852.8380194972548 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0.5443310539518174, "corr_recommendation_correctness": 0.2721655269759087, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AELkztihv_oJ:scholar.google.com/&scioq=Cold+Rao-Blackwellized+Straight-Through+Gumbel-Softmax+Gradient+Estimator&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Czech Technical University", "aff_unique_dep": "", "aff_unique_url": "https://www.ctu.cz", "aff_unique_abbr": "CTU", "aff_campus_unique_index": "0", "aff_campus_unique": "Prague", "aff_country_unique_index": "0", "aff_country_unique": "Czech Republic" }, { "id": "EO-NrUPaFLz", "title": "On the Forward Invariance of Neural ODEs", "track": "main", "status": "Withdraw", "tldr": "This paper proposes to achieve specification guarantees in the output space of neural ODEs with invariance set propagation.", "abstract": "To ensure robust and trustworthy decision-making, it is highly desirable to enforce constraints over a neural network's parameters and its inputs automatically by back-propagating output specifications. This way, we can guarantee that the network makes reliable decisions under perturbations. Here, we propose a new method for achieving a class of specification guarantees for neural Ordinary Differentiable Equations (ODEs) by using invariance set propagation. An invariance of a neural ODE is defined as an output specification, such as to satisfy mathematical formulae, physical laws, and system safety. We use control barrier functions to specify the invariance of a neural ODE on the output layer and propagate it back to the input layer. Through the invariance backpropagation, we map output specifications onto constraints on the neural ODE parameters or its input. The satisfaction of the corresponding constraints implies the satisfaction of output specifications. This allows us to achieve output specification guarantees by changing the input or parameters while maximally preserving the model performance. We demonstrate the invariance propagation on a comprehensive series of representation learning tasks, including spiral curve regression, autoregressive modeling of joint physical dynamics, convexity portrait of a function, and safe neural control of collision avoidance for autonomous vehicles.", "keywords": "Neural ODE;Forward Invariance;Specification Guarantees", "primary_area": "", "supplementary_material": "/attachment/66dcfc8ac63941e7f8ad1c1fe162cf2bb7b5cc99.zip", "author": "Wei Xiao;Tsun-Hsuan Wang;Ramin Hasani;Mathias Lechner;Daniela Rus", "authorids": "~Wei_Xiao2;~Tsun-Hsuan_Wang2;~Ramin_Hasani1;~Mathias_Lechner1;~Daniela_Rus1", "gender": "M;M;Unspecified;F;M", "homepage": ";https://zswang666.github.io/;https://mlech26l.github.io/pages/;https://www.csail.mit.edu/person/daniela-rus;http://www.raminhasani.com", "dblp": "20/4794-3;217/1809.html;209/9862;r/DanielaRus;190/3168", "google_scholar": "BxdZJNQAAAAJ;xE3WSuYAAAAJ;https://scholar.google.at/citations?hl=en;https://scholar.google.com/citations?hl=en;https://scholar.google.at/citations?user=YarJF3QAAAAJ", "orcid": ";;;;0000-0002-9889-5222", "linkedin": ";;;;raminhasani/", "or_profile": "~Wei_Xiao2;~Tsun-Hsuan_Wang2;~Mathias_Lechner1;~Daniela_Rus1;~Ramin_M._Hasani1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "position": "Postdoc;PhD student;Postdoc;Full Professor;Researcher", "bibtex": "@misc{\nxiao2023on,\ntitle={On the Forward Invariance of Neural {ODE}s},\nauthor={Wei Xiao and Tsun-Hsuan Wang and Ramin Hasani and Mathias Lechner and Daniela Rus},\nyear={2023},\nurl={https://openreview.net/forum?id=EO-NrUPaFLz}\n}", "github": "", "project": "", "reviewers": "q3cH;s4Ax;HpUr;razA", "site": "https://openreview.net/forum?id=EO-NrUPaFLz", "pdf_size": 1176826, "recommendation": "1;3;6;6", "confidence": "4;3;3;3", "correctness": "2;3;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "108;183;42;47", "wc_strength_and_weaknesses": "217;763;298;82", "wc_clarity_quality_novelty_and_reproducibility": "1106;85;105;36", "wc_summary_review": "74;64;38;34", "wc_review": "1505;1095;483;199", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "68;43;125;127", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 2.1213203435596424 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 95.0, 57.06575155029503 ], "wc_strength_and_weaknesses_avg": [ 340.0, 256.1181368040928 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 333.0, 446.9972035706711 ], "wc_summary_review_avg": [ 52.5, 16.9336942218761 ], "wc_review_avg": [ 820.5, 510.8960266042397 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 90.75, 36.34814300621147 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8164965809277261, "corr_recommendation_correctness": 0.8333333333333334, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13350866493395878576&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "EONdIvi64h-", "title": "Harnessing spectral representations for subgraph alignment", "track": "main", "status": "Reject", "tldr": "", "abstract": "With the rise and advent of graph learning techniques, graph data has become ubiquitous in the machine learning field. However, while several efforts have been devoted to the design of new convolutional architectures, pooling or positional encoding schemes, relatively little focus has been spent on modeling pairwise problems such as signal transfer, graph isomorphism and subgraph correspondence tasks. With this paper, we anticipate the need for a convenient framework to deal with problems that revolve around the notion of a map among graphs, and focus in particular on the challenging subgraph alignment scenario. We claim that, first and foremost, the representation of a map plays a central role in how these problems should be modeled -- be it a map inference problem or a simpler signal transport task. Taking the hint from recent work in geometry processing, we propose the adoption of a spectral representation for maps that is compact, easy to compute, permutation-equivariant, easy to plug into learning pipelines, and especially effective for a wide range of situations, most notably when dealing with subgraph alignment problems. We further report for the first time a surprising phenomenon where the partiality arising in subgraph alignment is manifested in the structure of the map coefficients, even in the absence of exact isomorphism, and which is consistently observed over different families of graphs.", "keywords": "Graph alignment;Spectral theory", "primary_area": "", "supplementary_material": "/attachment/0786807597944d32e9fbf76148d9505b7c30399d.zip", "author": "Marco Pegoraro;Riccardo Marin;Arianna Rampini;Simone Melzi;Luca Cosmo;Emanuele Rodol\u00e0", "authorids": "~Marco_Pegoraro1;~Riccardo_Marin1;~Arianna_Rampini1;~Simone_Melzi2;~Luca_Cosmo2;~Emanuele_Rodol\u00e01", "gender": "M;;F;M;M;M", "homepage": ";;;https://sites.google.com/site/melzismn/;;", "dblp": "117/4931-2;;230/7989;160/2770;122/8728;54/8401", "google_scholar": "3YCdMCYAAAAJ;;https://scholar.google.it/citations?user=xI1O33gAAAAJ;https://scholar.google.it/citations?user=hkrUTqEAAAAJ;https://scholar.google.it/citations?hl=it;-EH4wBYAAAAJ", "orcid": "0000-0001-5690-8403;;;0000-0003-2790-9591;0000-0001-7729-4666;0000-0003-0091-7241", "linkedin": ";;;;;", "or_profile": "~Marco_Pegoraro1;~Riccardo_Marin1;~Arianna_Rampini1;~Simone_Melzi2;~Luca_Cosmo2;~Emanuele_Rodol\u00e01", "aff": "University of Roma \"La Sapienza\";;Autodesk;University of Milan - Bicocca;University of Venice;Sapienza University of Rome", "aff_domain": "uniroma1.it;;autodesk.com;unimib.it;unive.it;uniroma1.it", "position": "PhD student;;Researcher;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@misc{\npegoraro2023harnessing,\ntitle={Harnessing spectral representations for subgraph alignment},\nauthor={Marco Pegoraro and Riccardo Marin and Arianna Rampini and Simone Melzi and Luca Cosmo and Emanuele Rodol{\\`a}},\nyear={2023},\nurl={https://openreview.net/forum?id=EONdIvi64h-}\n}", "github": "", "project": "", "reviewers": "BSxT;iFfS;rw4F", "site": "https://openreview.net/forum?id=EONdIvi64h-", "pdf_size": 41422726, "recommendation": "3;3;5", "confidence": "4;4;3", "correctness": "2;2;2", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "80;35;57", "wc_strength_and_weaknesses": "364;271;162", "wc_clarity_quality_novelty_and_reproducibility": "95;60;61", "wc_summary_review": "58;39;30", "wc_review": "597;405;310", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "77;0;182", "reply_reviewers": "0;0;0", "reply_authors": "1;0;1", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 57.333333333333336, 18.372685039360892 ], "wc_strength_and_weaknesses_avg": [ 265.6666666666667, 82.5523402992846 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 72.0, 16.268579122549905 ], "wc_summary_review_avg": [ 42.333333333333336, 11.67142760000773 ], "wc_review_avg": [ 437.3333333333333, 119.37708695092576 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 86.33333333333333, 74.59371436134697 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.6666666666666666, 0.4714045207910317 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8212071328200984212&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "University of Rome La Sapienza;Autodesk;University of Milan;University of Venice;Sapienza University of Rome", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.uniroma1.it;https://www.autodesk.com;https://www.unimib.it;https://www.unive.it;https://www.uniroma1.it", "aff_unique_abbr": "La Sapienza;Autodesk;UNIMIB;Unive;Sapienza", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Rome;;Bicocca", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "Italy;United States" }, { "id": "EPUWZhBd9Lb", "title": "Interpolating Compressed Parameter Subspaces", "track": "main", "status": "Reject", "tldr": "", "abstract": "Though distribution shifts have caused growing concern for machine learning scalability, solutions tend to specialize towards a specific type of distribution shift. Methods for label shift may not succeed against domain or task shift, and vice versa. We learn that constructing a Compressed Parameter Subspaces (CPS), a geometric structure representing distance-regularized parameters mapped to a set of train-time distributions, can maximize average accuracy over a broad range of distribution shifts concurrently. We show sampling parameters within a CPS can mitigate backdoor, adversarial, permutation, stylization and rotation perturbations. We also show training a hypernetwork representing a CPS can adapt to seen tasks as well as unseen interpolated tasks.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Siddhartha Datta;Nigel Shadbolt", "authorids": "~Siddhartha_Datta1;~Nigel_Shadbolt1", "gender": ";M", "homepage": "http://siddharthadatta.ml/;https://www.cs.ox.ac.uk/people/nigel.shadbolt/", "dblp": ";s/NigelShadbolt", "google_scholar": ";wTAM67UAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Siddhartha_Datta1;~Nigel_Shadbolt1", "aff": "University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk", "position": "PhD student;Full Professor", "bibtex": "@misc{\ndatta2023interpolating,\ntitle={Interpolating Compressed Parameter Subspaces},\nauthor={Siddhartha Datta and Nigel Shadbolt},\nyear={2023},\nurl={https://openreview.net/forum?id=EPUWZhBd9Lb}\n}", "github": "", "project": "", "reviewers": "326H;jM1a;haBW", "site": "https://openreview.net/forum?id=EPUWZhBd9Lb", "pdf_size": 1378762, "recommendation": "3;3;5", "confidence": "4;4;3", "correctness": "3;2;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "43;73;69", "wc_strength_and_weaknesses": "235;288;65", "wc_clarity_quality_novelty_and_reproducibility": "28;25;49", "wc_summary_review": "21;29;132", "wc_review": "327;415;315", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 61.666666666666664, 13.299958228840001 ], "wc_strength_and_weaknesses_avg": [ 196.0, 95.12447984965104 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.0, 10.677078252031311 ], "wc_summary_review_avg": [ 60.666666666666664, 50.54590872552287 ], "wc_review_avg": [ 352.3333333333333, 44.58200932613464 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5640700568096401548&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "TTN: A Domain-Shift Aware Batch Normalization in Test-Time Adaptation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11100", "id": "EQfeudmWLQ", "poster": "/media/PosterPDFs/ICLR%202023/11100.png?t=1682647217.1022327", "openreview": "https://openreview.net/forum?id=EQfeudmWLQ", "slides": "https://iclr.cc/virtual/2023/poster/11100", "video": "https://iclr.cc/virtual/2023/poster/11100", "author_site": "Hyesu Lim, Byeonggeun Kim, Jaegul Choo, Sungha Choi", "tldr": "We propose a test-time batch normalization method, which interpolates source and current batch statistics considering each layer's domain-shift sensitivity level that shows robust performance over various realistic evaluation scenarios..", "abstract": "This paper proposes a novel batch normalization strategy for test-time adaptation. Recent test-time adaptation methods heavily rely on the modified batch normalization, i.e., transductive batch normalization (TBN), which calculates the mean and the variance from the current test batch rather than using the running mean and variance obtained from the source data, i.e., conventional batch normalization (CBN). Adopting TBN that employs test batch statistics mitigates the performance degradation caused by the domain shift. However, re-estimating normalization statistics using test data depends on impractical assumptions that a test batch should be large enough and be drawn from i.i.d. stream, and we observed that the previous methods with TBN show critical performance drop without the assumptions. In this paper, we identify that CBN and TBN are in a trade-off relationship and present a new test-time normalization (TTN) method that interpolates the statistics by adjusting the importance between CBN and TBN according to the domain-shift sensitivity of each BN layer. Our proposed TTN improves model robustness to shifted domains across a wide range of batch sizes and in various realistic evaluation scenarios. TTN is widely applicable to other test-time adaptation methods that rely on updating model parameters via backpropagation. We demonstrate that adopting TTN further improves their performance and achieves state-of-the-art performance in various standard benchmarks.", "keywords": "Test time adaptation;Domain adaptation;Batch Normalization", "primary_area": "", "supplementary_material": "", "author": "Hyesu Lim;Byeonggeun Kim;Jaegul Choo;Sungha Choi", "authorids": "~Hyesu_Lim1;~Byeonggeun_Kim1;~Jaegul_Choo1;~Sungha_Choi1", "gender": ";M;M;M", "homepage": "https://sites.google.com/view/hyesulim;https://sites.google.com/view/byeonggeun-kim;https://sites.google.com/site/jaegulchoo/;https://www.sunghachoi.com/", "dblp": "304/7506;250/9485;07/2074;16/1923", "google_scholar": "https://scholar.google.co.kr/citations?hl=ko;Pee89n0AAAAJ;GHJYsLEAAAAJ;JMTnthsAAAAJ", "orcid": ";;;0000-0003-2313-9243", "linkedin": "hyesulim/;byeonggeun-kim-b8112a194/;;sungha-choi-1130185a/", "or_profile": "~Hyesu_Lim1;~Byeonggeun_Kim1;~Jaegul_Choo1;~Sungha_Choi1", "aff": "Carnegie Mellon University;QualComm;Korea Advanced Institute of Science & Technology;Qualcomm AI Research", "aff_domain": "andrew.cmu.edu;qti.qualcomm.com;kaist.ac.kr;qti.qualcomm.com", "position": "Researcher;Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nlim2023ttn,\ntitle={{TTN}: A Domain-Shift Aware Batch Normalization in Test-Time Adaptation},\nauthor={Hyesu Lim and Byeonggeun Kim and Jaegul Choo and Sungha Choi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=EQfeudmWLQ}\n}", "github": "", "project": "", "reviewers": "yiJu;RNFt;EgJB;4Hgx", "pdf_size": 1002619, "recommendation": "5;6;6;8", "confidence": "4;4;3;4", "correctness": "3;2;4;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "96;69;71;76", "wc_strength_and_weaknesses": "287;298;215;437", "wc_clarity_quality_novelty_and_reproducibility": "31;21;30;20", "wc_summary_review": "58;23;31;59", "wc_review": "472;411;347;592", "wc_reply_reviewers": "101;137;0;79", "wc_reply_authors": "2267;2039;522;1892", "reply_reviewers": "1;2;0;1", "reply_authors": "7;6;2;6", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 78.0, 10.700467279516348 ], "wc_strength_and_weaknesses_avg": [ 309.25, 80.35040447938019 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.5, 5.024937810560445 ], "wc_summary_review_avg": [ 42.75, 16.005858302509115 ], "wc_review_avg": [ 455.5, 90.35623940824452 ], "wc_reply_reviewers_avg": [ 79.25, 50.22138488731668 ], "wc_reply_authors_avg": [ 1680.0, 681.7913903827182 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 5.25, 1.920286436967152 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.13245323570650439, "corr_recommendation_correctness": 0.0, "gs_citation": 111, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12984514498411836030&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=EQfeudmWLQ", "email": "andrew.cmu.edu;qti.qualcomm.com;kaist.ac.kr;qti.qualcomm.com", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Carnegie Mellon University;Qualcomm Incorporated;Korea Advanced Institute of Science and Technology;Qualcomm", "aff_unique_dep": ";;;Qualcomm AI Research", "aff_unique_url": "https://www.cmu.edu;https://www.qualcomm.com;https://www.kaist.ac.kr;https://www.qualcomm.com/research", "aff_unique_abbr": "CMU;Qualcomm;KAIST;QAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;South Korea" }, { "id": "EQiRSnqUYOh", "title": "Demystifying the Optimization and Generalization of Deep PAC-Bayesian Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "In addition to being a successful generalization bound analysis tool, the PAC-Bayesian bound can also be incorporated into an objective function to train a probabilistic neural network, which we refer to simply as {\\it PAC-Bayesian Learning}. PAC-Bayesian learning has been proven to be able to achieve a competitive expected test set error numerically, while providing a tight generalization bound in practice, through gradient descent training. Despite its empirical success, the theoretical analysis of deep PAC-Bayesian learning for neural networks is rarely explored. To this end, this paper proposes a theoretical convergence and generalization analysis for PAC-Bayesian learning. For a deep and wide probabilistic neural network, we show that when PAC-Bayesian learning is applied, the convergence result corresponds to solving a kernel ridge regression when the probabilistic neural tangent kernel (PNTK) is used as its kernel. Based on this finding, we further obtain an analytic and guaranteed PAC-Bayesian generalization bound for the first time, which is an improvement over the Rademacher complexity-based bound for deterministic neural networks. Finally, drawing insight from our theoretical results, we propose a proxy measure for efficient hyperparameter selection, which is proven to be time-saving on various benchmarks.", "keywords": "PAC-Bayes;Probabilistic Neural Netowrks;Neural Tangent Kernel", "primary_area": "", "supplementary_material": "/attachment/1b86c16b09d9100ecb029bb0ee9b5fe4a2ea2f32.zip", "author": "Wei Huang;Chunrui Liu;Yilan Chen;Richard Yi Da Xu;Miao Zhang;Tsui-Wei Weng", "authorids": "~Wei_Huang6;~Chunrui_Liu1;~Yilan_Chen1;~Richard_Yi_Da_Xu1;~Miao_Zhang4;~Tsui-Wei_Weng1", "gender": "M;M;M;M;M;F", "homepage": "https://weihuang05.github.io/;;https://yilanchen6.github.io/;https://www.math.hkbu.edu.hk/people/xu-yi-da/;https://sites.google.com/view/miaozhang;https://lilywenglab.github.io", "dblp": "81/6685-34;;167/6638-2.html;38/3064;60/7041-1.html;177/9197", "google_scholar": "RZfDh4MAAAAJ;;6wmzpRIAAAAJ;ykOUWa4AAAAJ;6EUV_UMAAAAJ;v8GM4xoAAAAJ", "orcid": "0000-0001-5674-7021;;;0000-0003-2080-4762;0000-0002-1262-4174;", "linkedin": ";kasper-liu-494028126/;;richard-xu-0221a943/;miao-zhang-71b13a177/;", "or_profile": "~Wei_Huang6;~Chunrui_Liu1;~Yilan_Chen1;~Richard_Yi_Da_Xu1;~Miao_Zhang4;~Tsui-Wei_Weng1", "aff": "RIKEN AIP;;University of California, San Diego;Hong Kong Baptist University;Harbin Institute of Technology (Shenzhen);University of California, San Diego", "aff_domain": "riken.jp;;ucsd.edu;hkbu.edu.hk;hit.edu.cn;ucsd.edu", "position": "Postdoc;;PhD student;Full Professor;Full Professor;Assistant Professor", "bibtex": "@misc{\nhuang2023demystifying,\ntitle={Demystifying the Optimization and Generalization of Deep {PAC}-Bayesian Learning},\nauthor={Wei Huang and Chunrui Liu and Yilan Chen and Richard Yi Da Xu and Miao Zhang and Tsui-Wei Weng},\nyear={2023},\nurl={https://openreview.net/forum?id=EQiRSnqUYOh}\n}", "github": "", "project": "", "reviewers": "ag6P;Fsnz;7Cjm;DZ2U", "site": "https://openreview.net/forum?id=EQiRSnqUYOh", "pdf_size": 672113, "recommendation": "3;3;5;5", "confidence": "4;2;3;3", "correctness": "3;2;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;0;2;2", "wc_summary_paper": "66;64;116;106", "wc_strength_and_weaknesses": "282;52;147;252", "wc_clarity_quality_novelty_and_reproducibility": "245;114;33;7", "wc_summary_review": "43;24;31;49", "wc_review": "636;254;327;414", "wc_reply_reviewers": "130;0;0;0", "wc_reply_authors": "1327;567;570;871", "reply_reviewers": "1;0;0;0", "reply_authors": "3;1;1;2", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 88.0, 23.280893453645632 ], "wc_strength_and_weaknesses_avg": [ 183.25, 90.85531079689288 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 99.75, 92.6805670030131 ], "wc_summary_review_avg": [ 36.75, 9.807522622966516 ], "wc_review_avg": [ 407.75, 143.43705065289095 ], "wc_reply_reviewers_avg": [ 32.5, 56.29165124598851 ], "wc_reply_authors_avg": [ 833.75, 310.40407133283543 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:A4UmBt2E5HcJ:scholar.google.com/&scioq=Demystifying+the+Optimization+and+Generalization+of+Deep+PAC-Bayesian+Learning&hl=en&as_sdt=0,48", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "RIKEN;University of California, San Diego;Hong Kong Baptist University;Harbin Institute of Technology", "aff_unique_dep": "Advanced Institute for Computational Science;;;", "aff_unique_url": "https://www.aip.riken.jp;https://www.ucsd.edu;https://www.hkbu.edu.hk;http://en.hhit.edu.cn/", "aff_unique_abbr": "RIKEN AIP;UCSD;HKBU;HIT", "aff_campus_unique_index": "1;2;3;1", "aff_campus_unique": ";San Diego;Hong Kong SAR;Shenzhen", "aff_country_unique_index": "0;1;2;2;1", "aff_country_unique": "Japan;United States;China" }, { "id": "ERjQnrmLKH4", "title": "Learning Counterfactually Invariant Predictors", "track": "main", "status": "Reject", "tldr": "We propose a new technique to train predictors that are counterfactually invariant, i.e., robust to interventions on specified covariates.", "abstract": "We propose a method to learn predictors that are invariant under counterfactual changes of certain covariates. This method is useful when the prediction target is causally influenced by covariates that should not affect the predictor output. For instance, this could prevent an object recognition model from being influenced by position, orientation, or scale of the object itself. We propose a model-agnostic regularization term based on conditional kernel mean embeddings to enforce counterfactual invariance during training. We prove the soundness of our method, which can handle mixed categorical and continuous multivariate attributes. Empirical results on synthetic and real-world data demonstrate the efficacy of our method in a variety of settings.", "keywords": "causality;kernel mean embeddings;counterfactual fairness;counterfactual invariance", "primary_area": "", "supplementary_material": "/attachment/9c6dd0258611bb79437a7bef1c31a4166683e5e2.zip", "author": "Francesco Quinzan;Cecilia Casolo;Krikamol Muandet;Niki Kilbertus;Yucen Luo", "authorids": "~Francesco_Quinzan1;~Cecilia_Casolo1;~Krikamol_Muandet1;~Niki_Kilbertus1;~Yucen_Luo1", "gender": ";F;M;;F", "homepage": ";;http://krikamol.org;;http://yucenluo.com", "dblp": ";;34/1240;202/1966;http://dblp.uni-trier.de/pers/hd/l/Luo:Yucen", "google_scholar": ";;E2z5uYsAAAAJ;uQZjTq4AAAAJ;__wMZSYAAAAJ", "orcid": ";;0000-0002-4182-5282;;", "linkedin": ";cecilia-casolo-819374171;krikamol-muandet/;;http://www.linkedin.com/in/yucen-luo-5b8071a1/", "or_profile": "~Francesco_Quinzan1;~Cecilia_Casolo1;~Krikamol_Muandet1;~Niki_Kilbertus1;~Yucen_Luo1", "aff": ";Technische Universit\u00e4t M\u00fcnchen;CISPA Helmholtz Center for Information Security;Helmholtz AI;Max Planck Institute for Intelligent Systems, Max-Planck Institute", "aff_domain": ";tum.de;cispa.saarland;helmholtz-muenchen.de;tuebingen.mpg.de", "position": ";PhD student;Associate Professor;Group Leader;Postdoc", "bibtex": "@misc{\nquinzan2023learning,\ntitle={Learning Counterfactually Invariant Predictors},\nauthor={Francesco Quinzan and Cecilia Casolo and Krikamol Muandet and Niki Kilbertus and Yucen Luo},\nyear={2023},\nurl={https://openreview.net/forum?id=ERjQnrmLKH4}\n}", "github": "", "project": "", "reviewers": "zfVq;Udyy;yLyi;RdaH", "site": "https://openreview.net/forum?id=ERjQnrmLKH4", "pdf_size": 740414, "recommendation": "5;5;6;6", "confidence": "3;3;2;2", "correctness": "4;3;3;3", "technical_novelty": "3;3;2;2", "empirical_novelty": "1;2;3;2", "wc_summary_paper": "67;83;61;73", "wc_strength_and_weaknesses": "257;706;60;114", "wc_clarity_quality_novelty_and_reproducibility": "59;24;230;29", "wc_summary_review": "38;52;136;4", "wc_review": "421;865;487;220", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "743;1217;255;345", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 71.0, 8.12403840463596 ], "wc_strength_and_weaknesses_avg": [ 284.25, 253.91374027413326 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 85.5, 84.49408263304596 ], "wc_summary_review_avg": [ 57.5, 48.566964080535236 ], "wc_review_avg": [ 498.25, 233.46453156743104 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 640.0, 380.3905887374187 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12603329754815759414&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;CISPA Helmholtz Center for Information Security;Helmholtz Association of German Research Centres;Max Planck Institute for Intelligent Systems", "aff_unique_dep": ";;Helmholtz AI;Intelligent Systems", "aff_unique_url": "https://www.tum.de;https://www.cispa.de/;https://www.helmholtz-ai.de;https://www.mpi-is.mpg.de", "aff_unique_abbr": "TUM;CISPA;Helmholtz AI;MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "ESR6hysKDsW", "title": "Class-Incremental Learning with Repetition", "track": "main", "status": "Reject", "tldr": "", "abstract": "Real-world data streams naturally include the repetition of previous concepts. From a Continual Learning (CL) perspective, repetition is a property of the environment and, unlike replay, cannot be controlled by the user. Nowadays, Class-Incremental scenarios represent the leading test-bed for assessing and comparing CL strategies. This family of scenarios is very easy to use, but it never allows revisiting previously seen classes, thus completely disregarding the role of repetition. We focus on the family of Class-Incremental with Repetition (CIR) scenarios, where repetition is embedded in the definition of the stream. We propose two stochastic scenario generators that produce a wide range of CIR scenarios starting from a single dataset and a few control parameters. We conduct the first comprehensive evaluation of repetition in CL by studying the behavior of existing CL strategies under different CIR scenarios. We then present a novel replay strategy that exploits repetition and counteracts the natural imbalance present in the stream. On both CIFAR100 and TinyImageNet, our strategy outperforms other replay approaches, which are not designed for environments with repetition.", "keywords": "continual learning;lifelong learning;class-incremental learning;incremental learning", "primary_area": "", "supplementary_material": "", "author": "Hamed Hemati;Andrea Cossu;Antonio Carta;Julio Hurtado;Lorenzo Pellegrini;Davide Bacciu;Vincenzo Lomonaco;Damian Borth", "authorids": "~Hamed_Hemati1;~Andrea_Cossu1;~Antonio_Carta1;~Julio_Hurtado1;~Lorenzo_Pellegrini1;~Davide_Bacciu1;~Vincenzo_Lomonaco1;~Damian_Borth1", "gender": ";M;;M;M;M;M;M", "homepage": ";https://www.andreacossu.com/;http://pages.di.unipi.it/carta/;https://warwick.ac.uk/fac/sci/camacs/people/hurtado;;http://pages.di.unipi.it/bacciu/;https://vincenzolomonaco.com;http://www.hsg.ai", "dblp": ";262/6262;178/6658;178/4255;;07/6626;157/5127;48/1492", "google_scholar": ";0Kst5iEAAAAJ;;https://scholar.google.com/citations?hl=es;X3jGASoAAAAJ;https://scholar.google.it/citations?user=1d5n2WkAAAAJ;https://scholar.google.it/citations?user=rQLINtQAAAAJ;J-8Z038AAAAJ", "orcid": ";;;;;0000-0001-5213-2468;;0000-0002-4660-2627", "linkedin": ";;;;;bacciu/;;damianborth/", "or_profile": "~Hamed_Hemati1;~Andrea_Cossu1;~Antonio_Carta1;~Julio_Hurtado1;~Lorenzo_Pellegrini1;~Davide_Bacciu1;~Vincenzo_Lomonaco1;~Damian_Borth1", "aff": ";Scuola Normale Superiore;University of Pisa;University of Pisa;;University of Pisa;University of Pisa;University of St.Gallen", "aff_domain": ";sns.it;unipi.it;unipi.it;;unipi.it;unipi.it;using.ch", "position": ";PhD student;Assistant Professor;Postdoc;;Full Professor;Assistant Professor;Full Professor", "bibtex": "@misc{\nhemati2023classincremental,\ntitle={Class-Incremental Learning with Repetition},\nauthor={Hamed Hemati and Andrea Cossu and Antonio Carta and Julio Hurtado and Lorenzo Pellegrini and Davide Bacciu and Vincenzo Lomonaco and Damian Borth},\nyear={2023},\nurl={https://openreview.net/forum?id=ESR6hysKDsW}\n}", "github": "", "project": "", "reviewers": "MpJD;U14g;rWm4", "site": "https://openreview.net/forum?id=ESR6hysKDsW", "pdf_size": 1545202, "recommendation": "3;6;8", "confidence": "4;4;2", "correctness": "3;3;4", "technical_novelty": "3;2;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "84;85;111", "wc_strength_and_weaknesses": "217;100;41", "wc_clarity_quality_novelty_and_reproducibility": "122;108;16", "wc_summary_review": "27;66;38", "wc_review": "450;359;206", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "764;418;106", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 93.33333333333333, 12.498888839501783 ], "wc_strength_and_weaknesses_avg": [ 119.33333333333333, 73.14065596886287 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 82.0, 47.01772715334788 ], "wc_summary_review_avg": [ 43.666666666666664, 16.418147141366337 ], "wc_review_avg": [ 338.3333333333333, 100.67880721493586 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 429.3333333333333, 268.746886286872 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.8029550685469663, "corr_recommendation_correctness": 0.8029550685469661, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16816123823149056860&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;1;1;1;2", "aff_unique_norm": "Scuola Normale Superiore;University of Pisa;University of St.Gallen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sns.it;https://www.unipi.it;https://www.unisg.ch", "aff_unique_abbr": "SNS;UNIP;HSG", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "Italy;Switzerland" }, { "title": "Discovering Latent Knowledge in Language Models Without Supervision", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11480", "id": "ETKGuby0hcs", "poster": "/media/PosterPDFs/ICLR%202023/11480.png?t=1682296361.602752", "openreview": "https://openreview.net/forum?id=ETKGuby0hcs", "slides": "https://iclr.cc/virtual/2023/poster/11480", "video": "https://iclr.cc/virtual/2023/poster/11480", "author_site": "Collin Burns, Haotian Ye, Dan Klein, Jacob Steinhardt", "tldr": "", "abstract": "Existing techniques for training language models can be misaligned with the truth: if we train models with imitation learning, they may reproduce errors that humans make; if we train them to generate text that humans rate highly, they may output errors that human evaluators can't detect. We propose circumventing this issue by directly finding latent knowledge inside the internal activations of a language model in a purely unsupervised way. Specifically, we introduce a method for accurately answering yes-no questions given only unlabeled model activations. It works by finding a direction in activation space that satisfies logical consistency properties, such as that a statement and its negation have opposite truth values. We show that despite using no supervision and no model outputs, our method can recover diverse knowledge represented in large language models: across 6 models and 10 question-answering datasets, it outperforms zero-shot accuracy by 4\\% on average. We also find that it cuts prompt sensitivity in half and continues to maintain high accuracy even when models are prompted to generate incorrect answers. Our results provide an initial step toward discovering what language models know, distinct from what they say, even when we don't have access to explicit ground truth labels.", "keywords": "AI safety;AI alignment;truthfulness;large language models;honesty;interpretability", "primary_area": "", "supplementary_material": "/attachment/b71e69e80dccd499e214bb4590f3d2dbb102e085.zip", "author": "Collin Burns;Haotian Ye;Dan Klein;Jacob Steinhardt", "authorids": "~Collin_Burns1;~Haotian_Ye1;~Dan_Klein1;~Jacob_Steinhardt1", "gender": "M;M;;", "homepage": "http://collinpburns.com/;https://haotianye.com;http://people.eecs.berkeley.edu/~klein/;", "dblp": ";284/0539;;35/10625", "google_scholar": "JGS2xjkAAAAJ;VU4chlsAAAAJ;;", "orcid": ";;;", "linkedin": "collin-burns/;;dan-klein/;", "or_profile": "~Collin_Burns1;~Haotian_Ye1;~Dan_Klein1;~Jacob_Steinhardt1", "aff": "University of California, Berkeley;Peking University;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;pku.edu.cn;berkeley.edu;berkeley.edu", "position": "PhD student;Undergrad student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nburns2023discovering,\ntitle={Discovering Latent Knowledge in Language Models Without Supervision},\nauthor={Collin Burns and Haotian Ye and Dan Klein and Jacob Steinhardt},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=ETKGuby0hcs}\n}", "github": "", "project": "", "reviewers": "32x3;eJhP;r2Hq;YocT", "pdf_size": 1872510, "recommendation": "6;6;6;6", "confidence": "4;3;3;4", "correctness": "2;3;3;3", "technical_novelty": "3;3;4;3", "empirical_novelty": "4;3;4;3", "wc_summary_paper": "166;50;141;89", "wc_strength_and_weaknesses": "362;159;426;282", "wc_clarity_quality_novelty_and_reproducibility": "117;55;191;81", "wc_summary_review": "59;37;50;40", "wc_review": "704;301;808;492", "wc_reply_reviewers": "327;0;542;403", "wc_reply_authors": "1749;347;2301;637", "reply_reviewers": "3;0;2;1", "reply_authors": "4;2;3;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 111.5, 45.0804835821445 ], "wc_strength_and_weaknesses_avg": [ 307.25, 99.64279953915386 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 111.0, 51.16639522186413 ], "wc_summary_review_avg": [ 46.5, 8.674675786448736 ], "wc_review_avg": [ 576.25, 195.50495518016928 ], "wc_reply_reviewers_avg": [ 318.0, 199.1268439964838 ], "wc_reply_authors_avg": [ 1258.5, 797.5730374078602 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 321, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6459101429940897247&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=ETKGuby0hcs", "email": "berkeley.edu;pku.edu.cn;berkeley.edu;berkeley.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of California, Berkeley;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;http://www.pku.edu.cn", "aff_unique_abbr": "UC Berkeley;Peking U", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Mutual Partial Label Learning with Competitive Label Noise", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11405", "id": "EUrxG8IBCrC", "poster": "/media/PosterPDFs/ICLR%202023/11405.png?t=1680827204.3608603", "openreview": "https://openreview.net/forum?id=EUrxG8IBCrC", "slides": "https://iclr.cc/virtual/2023/poster/11405", "video": "https://iclr.cc/virtual/2023/poster/11405", "author_site": "Yan Yan, Yuhong Guo", "tldr": "", "abstract": "Partial label learning (PLL) is an important weakly supervised learning problem, where each training instance is associated with a set of candidate labels that include both the true label and additional noisy labels. Most existing PLL methods assume the candidate noisy labels are randomly chosen, which hardly holds in real-world learning scenarios. In this paper, we consider a more realistic PLL scenario with competitive label noise that is more difficult to distinguish from the true label than the random label noise. We propose a novel Mutual Learning based PLL approach named ML-PLL to address this challenging problem. ML-PLL learns a prediction network based classifier and a class-prototype based classifier cooperatively through interactive mutual learning and label correction. Moreover, we use a transformation network to model the association relationships between the true label and candidate labels, and learn it together with the prediction network to match the observed candidate labels in the training data and enhance label correction. Extensive experiments are conducted on several benchmark PLL datasets, and the proposed ML-PLL approach demonstrates state-of-the-art performance for partial label learning.", "keywords": "Partial label learning;label noise;classification", "primary_area": "", "supplementary_material": "", "author": "Yan Yan;Yuhong Guo", "authorids": "~Yan_Yan10;~Yuhong_Guo1", "gender": "M;", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": "yan-yan-502b19219/?originalSubdomain=ca;", "or_profile": "~Yan_Yan10;~Yuhong_Guo1", "aff": "Carleton University;", "aff_domain": "carleton.ca;", "position": "Postdoc;", "bibtex": "@inproceedings{\nyan2023mutual,\ntitle={Mutual Partial Label Learning with Competitive Label Noise},\nauthor={Yan Yan and Yuhong Guo},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=EUrxG8IBCrC}\n}", "github": "", "project": "", "reviewers": "CWMY;8WLg;qLYR", "pdf_size": 541418, "recommendation": "6;8;8", "confidence": "4;4;4", "correctness": "3;4;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "232;63;74", "wc_strength_and_weaknesses": "451;262;260", "wc_clarity_quality_novelty_and_reproducibility": "273;61;9", "wc_summary_review": "257;21;19", "wc_review": "1213;407;362", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 123.0, 77.20535387307454 ], "wc_strength_and_weaknesses_avg": [ 324.3333333333333, 89.57058048761819 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 114.33333333333333, 114.1850350187021 ], "wc_summary_review_avg": [ 99.0, 111.72585496055363 ], "wc_review_avg": [ 660.6666666666666, 390.99048013417865 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5094740291123328319&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=EUrxG8IBCrC", "email": "carleton.ca;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Carleton University", "aff_unique_dep": "", "aff_unique_url": "https://carleton.ca", "aff_unique_abbr": "Carleton", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "Volumetric Optimal Transportation by Fast Fourier Transform", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12187", "id": "EVrz7UM-ZDm", "poster": "", "openreview": "https://openreview.net/forum?id=EVrz7UM-ZDm", "slides": "https://iclr.cc/virtual/2023/poster/12187", "video": "https://iclr.cc/virtual/2023/poster/12187", "author_site": "Na Lei, DONGSHENG An, Min Zhang, Xiaoyin Xu, David Gu", "tldr": "Optimal transport, Monge-Amp\\`ere equation, Elliptic PDE, Fast Fourier transform", "abstract": "The optimal transportation map finds the most economical way to transport one probability measure to another, and it has been applied in a broad range of applications in machine learning and computer vision. By the Brenier theory, computing the optimal transport map is equivalent to solving a Monge-Amp\\`ere equation, which is highly non-linear. Therefore, the computation of optimal transportation maps is intrinsically challenging.\n\nIn this work, we propose a novel and powerful method, the FFT-OT (fast Fourier transform-optimal transport), to compute the 3-dimensional OT problems. The method is based on several key ideas: first, the Monge-Amp\\`ere equation is linearized to a sequence of linear elliptic PDEs with spacial and temporal variant coefficients; second, the obliqueness property of optimal transportation maps is reformulated as a Neumann boundary condition; and third, the variant coefficient elliptic PDEs are approximated by constant coefficient elliptic PDEs and solved by FFT on GPUs. We also prove that the algorithm converges linearly, namely the approximation error decreases exponentially fast. Experimental results show that the FFT-OT algorithm is more than a hundred times faster than the conventional methods based on the convex geometry. Furthermore, the method can be directly applied for sampling from complex 3D density functions in machine learning and magnifying the volumetric data in medical imaging. ", "keywords": "Optimal transport;Monge-Ampere equation;Elliptic PDE;Fast Fourier transform", "primary_area": "", "supplementary_material": "", "author": "Na Lei;DONGSHENG An;Min Zhang;Xiaoyin Xu;David Gu", "authorids": "~Na_Lei1;~DONGSHENG_An1;~Min_Zhang8;~Xiaoyin_Xu2;~David_Gu1", "gender": "F;M;F;M;M", "homepage": "https://faculty.dlut.edu.cn/leina/en;http://ads08.github.io/;https://person.zju.edu.cn/min_zhang;;http://www.cs.stonybrook.edu/~gu", "dblp": "90/2981;173/5382;;35/46;g/XianfengGu.html", "google_scholar": ";https://scholar.google.com/citations?hl=en;;NSht_-IAAAAJ;Y063_CIAAAAJ", "orcid": ";;;0000-0003-0813-7979;0000-0001-8226-5851", "linkedin": ";;;;", "or_profile": "~Na_Lei1;~DONGSHENG_An1;~Min_Zhang8;~Xiaoyin_Xu2;~David_Gu1", "aff": "Dalian University of Technology;Amazon;Zhejiang University;Zhejiang University;State University of New York at Stony Brook", "aff_domain": "dlut.edu.cn;amazon.com;zju.edu.cn;zju.edu.cn;cs.stonybrook.edu", "position": "Full Professor;Researcher;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nlei2023volumetric,\ntitle={Volumetric Optimal Transportation by Fast Fourier Transform},\nauthor={Na Lei and DONGSHENG An and Min Zhang and Xiaoyin Xu and David Gu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=EVrz7UM-ZDm}\n}", "github": "", "project": "", "reviewers": "AQZc;Q68w;wDkb", "pdf_size": 21929460, "recommendation": "6;6;8", "confidence": "3;4;4", "correctness": "3;3;4", "technical_novelty": "3;3;4", "empirical_novelty": "3;0;3", "wc_summary_paper": "65;185;37", "wc_strength_and_weaknesses": "192;665;121", "wc_clarity_quality_novelty_and_reproducibility": "179;346;45", "wc_summary_review": "180;286;14", "wc_review": "616;1482;217", "wc_reply_reviewers": "1094;69;0", "wc_reply_authors": "1614;1597;295", "reply_reviewers": "2;1;0", "reply_authors": "3;3;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 95.66666666666667, 64.19414995845719 ], "wc_strength_and_weaknesses_avg": [ 326.0, 241.45530987465708 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 190.0, 123.12865899808487 ], "wc_summary_review_avg": [ 160.0, 111.94046036472544 ], "wc_review_avg": [ 771.6666666666666, 528.0343002326858 ], "wc_reply_reviewers_avg": [ 387.6666666666667, 500.24682796484467 ], "wc_reply_authors_avg": [ 1168.6666666666667, 617.8146072155375 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.49999999999999983, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NCE8e9s82GkJ:scholar.google.com/&scioq=Volumetric+Optimal+Transportation+by+Fast+Fourier+Transform&hl=en&as_sdt=0,5", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=EVrz7UM-ZDm", "email": "dlut.edu.cn;amazon.com;zju.edu.cn;zju.edu.cn;cs.stonybrook.edu", "author_num": 5, "aff_unique_index": "0;1;2;2;3", "aff_unique_norm": "Dalian University of Technology;Amazon;Zhejiang University;State University of New York at Stony Brook", "aff_unique_dep": ";Amazon.com, Inc.;;", "aff_unique_url": "http://www.dlut.edu.cn/;https://www.amazon.com;https://www.zju.edu.cn;https://www.stonybrook.edu", "aff_unique_abbr": "DUT;Amazon;ZJU;SUNY Stony Brook", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stony Brook", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "China;United States" }, { "id": "EW00yKKLiX", "title": "K-SAM: Sharpness-Aware Minimization at the Speed of SGD", "track": "main", "status": "Reject", "tldr": "We propose an efficient sharpness-aware minimization by subsampling training data with highest k losses in both gradient calculation steps.", "abstract": "Sharpness-Aware Minimization (SAM) has recently emerged as a robust technique for improving the accuracy of deep neural networks. However, SAM incurs a high computational cost in practice, requiring up to twice as much computation as vanilla SGD. The computational challenge posed by SAM arises because each iteration requires both ascent and descent steps and thus double the gradient computations. To address this challenge, we propose to compute gradients in both stages of SAM on only the top-k samples with highest loss. K-SAM is simple and extremely easy-to-implement while providing significant generalization boosts over vanilla SGD at little to no additional cost.", "keywords": "deep learning;efficient training", "primary_area": "", "supplementary_material": "/attachment/e3b1604ee9eadacfc8be49d97e03e282fbdc14c2.zip", "author": "Renkun Ni;Ping-yeh Chiang;Jonas Geiping;Micah Goldblum;Andrew Gordon Wilson;Tom Goldstein", "authorids": "~Renkun_Ni1;~Ping-yeh_Chiang1;~Jonas_Geiping1;~Micah_Goldblum1;~Andrew_Gordon_Wilson1;~Tom_Goldstein1", "gender": "M;;M;;Not Specified;M", "homepage": "https://www.cs.umd.edu/~rn9zm/;;https://jonasgeiping.github.io/;;https://cims.nyu.edu/~andrewgw;https://www.cs.umd.edu/~tomg/", "dblp": "183/7067;236/4288;190/7229;241/7231;65/10453;25/8184", "google_scholar": ";WUoMq1IAAAAJ;https://scholar.google.de/citations?user=206vNCEAAAAJ;pGDKzuUAAAAJ;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ;KmSuVtgAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Renkun_Ni1;~Ping-yeh_Chiang1;~Jonas_Geiping1;~Micah_Goldblum1;~Andrew_Gordon_Wilson1;~Tom_Goldstein1", "aff": "Department of Computer Science, University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;New York University;New York University;University of Maryland, College Park", "aff_domain": "cs.umd.edu;umd.edu;umd.edu;nyu.edu;nyu.edu;umd.edu", "position": "PhD student;PhD student;Postdoc;Postdoc;Associate Professor;Full Professor", "bibtex": "@misc{\nni2023ksam,\ntitle={K-{SAM}: Sharpness-Aware Minimization at the Speed of {SGD}},\nauthor={Renkun Ni and Ping-yeh Chiang and Jonas Geiping and Micah Goldblum and Andrew Gordon Wilson and Tom Goldstein},\nyear={2023},\nurl={https://openreview.net/forum?id=EW00yKKLiX}\n}", "github": "", "project": "", "reviewers": "T2UG;Y4Jo;gf5z;wYVX", "site": "https://openreview.net/forum?id=EW00yKKLiX", "pdf_size": 1374531, "recommendation": "3;3;3;6", "confidence": "4;4;5;5", "correctness": "3;3;2;4", "technical_novelty": "2;2;1;2", "empirical_novelty": "2;2;1;4", "wc_summary_paper": "60;56;82;96", "wc_strength_and_weaknesses": "501;245;143;401", "wc_clarity_quality_novelty_and_reproducibility": "68;70;27;213", "wc_summary_review": "62;35;53;83", "wc_review": "691;406;305;793", "wc_reply_reviewers": "97;27;0;0", "wc_reply_authors": "176;324;214;97", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_paper_avg": [ 73.5, 16.332482971061076 ], "wc_strength_and_weaknesses_avg": [ 322.5, 138.06791806933282 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 94.5, 70.53545207907865 ], "wc_summary_review_avg": [ 58.25, 17.282577932704367 ], "wc_review_avg": [ 548.75, 199.80287160098575 ], "wc_reply_reviewers_avg": [ 31.0, 39.667366940597404 ], "wc_reply_authors_avg": [ 202.75, 81.74158978145702 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8525232962036950920&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1;2;2;1", "aff_unique_norm": "University of Maryland, College Park;University of Maryland;New York University", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": "https://www/umd.edu;https://www/umd.edu;https://www.nyu.edu", "aff_unique_abbr": "UMD;UMD;NYU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "EWjYk3R2jhr", "title": "Elastic Aggregation for Federated Optimization", "track": "main", "status": "Withdraw", "tldr": "Elastic aggregation works well with other federated optimizers and achieves significant improvements across the board.", "abstract": " Federated learning enables the privacy-preserving training of neural network models using real-world data across distributed clients.\n FedAvg has become the preferred optimizer for federated learning because of its simplicity and effectiveness.\n FedAvg uses na\u00efve aggregation to update the server model, interpolating client models based on the number of instances used in their training.\n However, na\u00efve aggregation suffers from client-drift when the data is heterogenous~(non-IID), leading to unstable and slow convergence.\n In this work, we propose a novel aggregation approach, elastic aggregation, to overcome these issues. Elastic aggregation interpolates client models adaptively according to parameter sensitivity, which is measured by computing how much the overall prediction function output changes when each parameter is changed. This measurement is performed in an unsupervised and online manner.\n Elastic aggregation reduces the magnitudes of updates to the more sensitive parameters so as to prevent the server model from drifting to any one client distribution, and conversely boosts updates to the less sensitive parameters to better explore different client distributions.\n Empirical results on real and synthetic data as well as analytical results show that elastic aggregation leads to efficient training in both convex and non-convex settings, while being fully agnostic to client heterogeneity and robust to large numbers of clients, partial participation, and imbalanced data.\n Finally, elastic aggregation works well with other federated optimizers and achieves significant improvements across the board.", "keywords": "Federated Learning;AI Safety;Autonomous Driving;Drug Discovery;Clinical Diagnosis;Recommender Systems", "primary_area": "", "supplementary_material": "/attachment/e45dce6f5b9b8fd5df475667c76722b412418eef.zip", "author": "Dengsheng Chen;Jie Hu;Vince Jun Kai Tan;Enhua Wu", "authorids": "~Dengsheng_Chen1;~Jie_Hu4;~Vince_Jun_Kai_Tan1;~Enhua_Wu1", "gender": ";;;M", "homepage": ";;;https://www.fst.um.edu.mo/personal/ehwu", "dblp": ";;;", "google_scholar": ";;f63R2FsAAAAJ;", "orcid": ";;;0000-0002-2174-1428", "linkedin": ";;;", "or_profile": "~Dengsheng_Chen1;~Jie_Hu4;~Vince_Jun_Kai_Tan1;~Enhua_Wu1", "aff": ";;Bytedance;Institute of Software, Chinese Academy of Sciences", "aff_domain": ";;bytedance.com;ios.ac.cn", "position": ";;Researcher;Principal Researcher", "bibtex": "@misc{\ndengsheng2023elastic,\ntitle={Elastic Aggregation for Federated Optimization},\nauthor={Dengsheng Chen and Jie Hu and Vince Jun Kai Tan and Enhua Wu},\nyear={2023},\nurl={https://openreview.net/forum?id=EWjYk3R2jhr}\n}", "github": "", "project": "", "reviewers": "UTtt;frR4;EyxF;s3QP", "site": "https://openreview.net/forum?id=EWjYk3R2jhr", "pdf_size": 399120, "recommendation": "3;5;5;6", "confidence": "3;4;3;5", "correctness": "3;2;3;4", "technical_novelty": "2;3;2;2", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "62;71;55;71", "wc_strength_and_weaknesses": "235;87;192;60", "wc_clarity_quality_novelty_and_reproducibility": "66;47;27;24", "wc_summary_review": "27;343;20;27", "wc_review": "390;548;294;182", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 64.75, 6.722164829874376 ], "wc_strength_and_weaknesses_avg": [ 143.5, 72.26513682267543 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.0, 16.926310879810757 ], "wc_summary_review_avg": [ 104.25, 137.87199679412785 ], "wc_review_avg": [ 353.5, 134.27118082447925 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.7608859102526822, "corr_recommendation_correctness": 0.3244428422615251, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5891085353074568004&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1", "aff_unique_norm": "ByteDance;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Software", "aff_unique_url": "https://www.bytedance.com;http://www.ios.ac.cn", "aff_unique_abbr": "Bytedance;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Towards Understanding and Mitigating Dimensional Collapse in Heterogeneous Federated Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11831", "id": "EXnIyMVTL8s", "poster": "/media/PosterPDFs/ICLR%202023/11831.png?t=1681887871.6157773", "openreview": "https://openreview.net/forum?id=EXnIyMVTL8s", "slides": "https://iclr.cc/virtual/2023/poster/11831", "video": "https://iclr.cc/virtual/2023/poster/11831", "author_site": "Yujun Shi, Jian Liang, Wenqing Zhang, Vincent Tan, Song Bai", "tldr": "We show data heterogeneity in federated learning causes dimensional collapse for trained models, and propose FedDecorr to mitigate such problem.", "abstract": "Federated learning aims to train models collaboratively across different clients without sharing data for privacy considerations. However, one major challenge for this learning paradigm is the data heterogeneity problem, which refers to the discrepancies between the local data distributions among various clients. To tackle this problem, we first study how data heterogeneity affects the representations of the globally aggregated models. Interestingly, we find that heterogeneous data results in the global model suffering from severe dimensional collapse, in which representations tend to reside in a lower-dimensional space instead of the ambient space. Moreover, we observe a similar phenomenon on models locally trained on each client and deduce that the dimensional collapse on the global model is inherited from local models. In addition, we theoretically analyze the gradient flow dynamics to shed light on how data heterogeneity result in dimensional collapse for local models. To remedy this problem caused by the data heterogeneity, we propose FedDecorr, a novel method that can effectively mitigate dimensional collapse in federated learning. Specifically, FedDecorr applies a regularization term during local training that encourages different dimensions of representations to be uncorrelated. FedDecorr, which is implementation-friendly and computationally-efficient, yields consistent improvements over baselines on standard benchmark datasets. Code: https://github.com/bytedance/FedDecorr.", "keywords": "federated Learning;representation Learning;data heterogeneity;dimensional collapse", "primary_area": "", "supplementary_material": "", "author": "Yujun Shi;Jian Liang;Wenqing Zhang;Vincent Tan;Song Bai", "authorids": "~Yujun_Shi1;~Jian_Liang1;~Wenqing_Zhang1;~Vincent_Tan1;~Song_Bai3", "gender": "M;M;;M;", "homepage": "https://yujun-shi.github.io/;https://liangjian.xyz;https://hannibalape.github.io/;https://www.ece.nus.edu.sg/stfpage/vtan/pubs.htm;https://songbai.site/", "dblp": "146/4499;19/2208-1;;60/2327;", "google_scholar": "Okeolr8AAAAJ;https://scholar.google.com/citations?hl=en;;dJoAVvAAAAAJ;LXuWMF4AAAAJ", "orcid": ";0000-0003-3890-1894;;0000-0002-5008-4527;", "linkedin": ";;;;", "or_profile": "~Yujun_Shi1;~Jian_Liang1;~Wenqing_Zhang1;~Vincent_Tan1;~Song_Bai3", "aff": "National University of Singapore;Institute of Automation, Chinese Academy of Sciences;ByteDance Inc.;National University of Singapore;ByteDance", "aff_domain": "u.nus.edu;ia.ac.cn;bytedance.com;nus.edu.sg;bytedance.com", "position": "PhD student;Associate Professor;Researcher;Full Professor;Computer Vision Lead", "bibtex": "@inproceedings{\nshi2023towards,\ntitle={Towards Understanding and Mitigating Dimensional Collapse in Heterogeneous Federated Learning},\nauthor={Yujun Shi and Jian Liang and Wenqing Zhang and Vincent Tan and Song Bai},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=EXnIyMVTL8s}\n}", "github": "", "project": "", "reviewers": "deuX;h8K7;QKrC;mWEv", "pdf_size": 1045695, "recommendation": "5;6;8;8", "confidence": "4;4;3;3", "correctness": "3;3;4;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "55;123;46;104", "wc_strength_and_weaknesses": "249;303;110;51", "wc_clarity_quality_novelty_and_reproducibility": "20;80;131;17", "wc_summary_review": "25;116;79;14", "wc_review": "349;622;366;186", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "729;952;906;322", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 82.0, 32.36510466536452 ], "wc_strength_and_weaknesses_avg": [ 178.25, 101.75798494467153 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.0, 47.10095540432275 ], "wc_summary_review_avg": [ 58.5, 41.31888188225814 ], "wc_review_avg": [ 380.75, 156.00861354425274 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 727.25, 248.3418762512678 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.9622504486493761, "corr_recommendation_correctness": 0.9622504486493761, "gs_citation": 87, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13682189428741189676&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=EXnIyMVTL8s", "email": "u.nus.edu;ia.ac.cn;bytedance.com;nus.edu.sg;bytedance.com", "author_num": 5, "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "National University of Singapore;Chinese Academy of Sciences;ByteDance", "aff_unique_dep": ";Institute of Automation;", "aff_unique_url": "https://www.nus.edu.sg;http://www.ia.cas.cn;https://www.bytedance.com", "aff_unique_abbr": "NUS;CAS;ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "Singapore;China" }, { "id": "EdH_fkyhAO", "title": "Exploring Over-smoothing in Graph Attention Networks from the Markov Chain Perspective", "track": "main", "status": "Withdraw", "tldr": "We give a theoretical analysis on the over-smoothing in GAT under the perspective of Markov Chains and propose a method to solve this problem.", "abstract": "The over-smoothing problem causing the depth limitation is an obstacle of developing deep graph neural network (GNN). Compared with Graph Convolutional Networks (GCN), over-smoothing in Graph Attention Network (GAT) has not drawed enough attention. In this work, we analyze the over-smoothing problem in GAT from the Markov chain perspective. First we establish a connection between GAT and a time-inhomogeneous random walk on the graph. Then we show that the GAT is not always over-smoothing using conclusions in the time-inhomogeneous Markov chain. Finally, we derive a sufficient condition for GAT to avoid over-smoothing based on our findings about the existence of the limiting distribution of the time-inhomogeneous Markov chain. We design experiments to verify our theoretical findings. Results show that our proposed sufficient condition can effectively improve over-smoothing problem in GAT and enhance the performance of the model.", "keywords": "Graph Attention Networks;Over-smoothing;Markov Chain", "primary_area": "", "supplementary_material": "", "author": "Weichen Zhao;Chenguang Wang;Congying Han;Tiande Guo", "authorids": "zhaoweichen14@mails.ucas.ac.cn;~Chenguang_Wang2;~Congying_Han1;~Tiande_Guo1", "gender": ";M;F;M", "homepage": ";https://github.com/Wastedzz/cgwang;http://people.ucas.edu.cn/~hancy;https://people.ucas.ac.cn/~tdguo?language=en", "dblp": ";62/3432-11;07/2808;", "google_scholar": ";Ptf3uO0AAAAJ;;", "orcid": ";0009-0008-4097-1174;0000-0002-3445-4620;0000-0002-3804-9163", "linkedin": ";;;", "or_profile": "zhaoweichen14@mails.ucas.ac.cn;~Chenguang_Wang2;~Congying_Han1;~Tiande_Guo1", "aff": ";The Chinese University of Hong Kong, Shenzhen;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_domain": ";cuhk.edu.cn;ucas.ac.cn;ucas.ac.cn", "position": ";PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nzhao2023exploring,\ntitle={Exploring Over-smoothing in Graph Attention Networks from the Markov Chain Perspective},\nauthor={Weichen Zhao and Chenguang Wang and Congying Han and Tiande Guo},\nyear={2023},\nurl={https://openreview.net/forum?id=EdH_fkyhAO}\n}", "github": "", "project": "", "reviewers": "4zqJ;kdQN;qCRC;D6kf", "site": "https://openreview.net/forum?id=EdH_fkyhAO", "pdf_size": 499744, "recommendation": "1;3;3;3", "confidence": "4;4;5;4", "correctness": "2;1;2;2", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "76;103;48;99", "wc_strength_and_weaknesses": "94;445;388;232", "wc_clarity_quality_novelty_and_reproducibility": "28;64;44;18", "wc_summary_review": "77;61;28;3", "wc_review": "275;673;508;352", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 1.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 81.5, 21.914607000811127 ], "wc_strength_and_weaknesses_avg": [ 289.75, 137.30326835148534 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.5, 17.399712641305314 ], "wc_summary_review_avg": [ 42.25, 28.734778579275673 ], "wc_review_avg": [ 452.0, 152.7301541935973 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1089176606529710878&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Chinese University of Hong Kong;University of Chinese Academy of Sciences", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "CUHK;UCAS", "aff_campus_unique_index": "0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "EeEU0b9CPD3", "title": "Inferring Fluid Dynamics via Inverse Rendering", "track": "main", "status": "Reject", "tldr": "", "abstract": "Humans have a strong intuitive understanding of physical processes such as fluid falling by just a glimpse of such a scene picture, i.e., quickly derived from our immersive visual experiences in memory. This work achieves such a photo-to-fluid-dynamics reconstruction functionality learned from unannotated videos, without any supervision of ground-truth fluid dynamics. In a nutshell, a differentiable Euler simulator modeled with a ConvNet-based pressure projection solver, is integrated with a volumetric renderer, supporting end-to-end/coherent differentiable dynamic simulation and rendering. By endowing each sampled point with a fluid volume value, we derive a NeRF-like differentiable renderer dedicated from fluid data; and thanks to this volume-augmented representation, fluid dynamics could be inversely inferred from error signal between the rendered result and ground-truth video frame (i.e., inverse rendering). Experiments on our generated Fluid Fall datasets and DPI Dam Break dataset are conducted to demonstrate both effectiveness and generalization ability of our method.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/de5a0d8ca8c762327f2535af488bc7b822f26634.zip", "author": "Jinxian Liu;Ye Chen;Bingbing Ni;Jiyao Mao;Zhenbo Yu", "authorids": "~Jinxian_Liu1;~Ye_Chen2;~Bingbing_Ni3;~Jiyao_Mao1;~Zhenbo_Yu1", "gender": "M;;M;M;M", "homepage": ";;;https://github.com/matyja256;https://yuzhenbo.github.io/", "dblp": "230/1167;;64/831.html;344/4421;https://dblp.org/rec/conf/iccv/YuWXNZW021.html", "google_scholar": "_2KMjfoAAAAJ;;V9W87PYAAAAJ;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Jinxian_Liu1;~Ye_Chen2;~Bingbing_Ni3;~Jiyao_Mao1;~Zhenbo_Yu1", "aff": "Shanghai Jiaotong University;;Shanghai Jiaotong University;Shanghai Jiaotong University;", "aff_domain": "sjtu.edu.cn;;sjtu.edu.cn;sjtu.edu.cn;", "position": "PhD student;;Full Professor;Undergrad student;", "bibtex": "@misc{\nliu2023inferring,\ntitle={Inferring Fluid Dynamics via Inverse Rendering},\nauthor={Jinxian Liu and Ye Chen and Bingbing Ni and Jiyao Mao and Zhenbo Yu},\nyear={2023},\nurl={https://openreview.net/forum?id=EeEU0b9CPD3}\n}", "github": "", "project": "", "reviewers": "24gb;zis7;DkWW", "site": "https://openreview.net/forum?id=EeEU0b9CPD3", "pdf_size": 22836452, "recommendation": "5;5;8", "confidence": "4;4;5", "correctness": "2;4;4", "technical_novelty": "3;2;4", "empirical_novelty": "3;2;4", "wc_summary_paper": "52;89;114", "wc_strength_and_weaknesses": "306;203;376", "wc_clarity_quality_novelty_and_reproducibility": "29;33;332", "wc_summary_review": "50;45;45", "wc_review": "437;370;867", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "993;770;622", "reply_reviewers": "0;0;0", "reply_authors": "3;1;1", "recommendation_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 85.0, 25.468935326524086 ], "wc_strength_and_weaknesses_avg": [ 295.0, 71.05397009785355 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 131.33333333333334, 141.90215721482963 ], "wc_summary_review_avg": [ 46.666666666666664, 2.357022603955158 ], "wc_review_avg": [ 558.0, 220.2014229442368 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 795.0, 152.48825091352668 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.5000000000000001, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9665185766252478774&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "EfTN2tSGlF", "title": "Towards Understanding Convergence and Generalization of AdamW", "track": "main", "status": "Reject", "tldr": "It theoretically proves the convergence of AdamW, and justifies its generalization superiority over both Adam and its $\\ell_2$-regularized variant. ", "abstract": "AdamW modifies vanilla Adam by decaying network weights per training iteration, and shows remarkable generalization superiority over Adam and its $\\ell_2$-regularized variant. In context of adaptive gradient algorithms (\\eg~Adam), the decoupled weight decay in AdamW differs from the widely used $\\ell_2$-regularizer, since the former does not affect optimization steps, while the latter changes the first- and second-order gradient moments and thus the optimization steps. Despite its great success on both vision transformers and CNNs, for AdamW, its convergence behavior and its generalization improvement over ($\\ell_2$-regularized) Adam remain absent yet. To solve this issue, we prove the convergence of AdamW and justify its generalization advantages over Adam and its $\\ell_2$-regularized version. Specifically, AdamW can provably converge but minimizes a dynamically regularized loss that combines a vanilla loss and a dynamical regularization induced by the decoupled weight decay, thus leading to its different behaviors compared with Adam and its $\\ell_2$-regularized version. Moreover, on both general nonconvex problems and P\\L-conditioned problems, we establish the stochastic gradient complexity of AdamW to find a stationary point. Such complexity is also applicable to Adam and its $\\ell_{2}$-regularized variant, and indeed improves their previously known complexity, especially for modern over-parametrized networks. Besides, we theoretically show that AdamW often enjoys smaller generalization error bound than both Adam and its $\\ell_2$-regularized variant from the Bayesian posterior aspect. This result, for the first time, explicitly reveals the benefits of the unique decoupled weight decay in AdamW. We hope the theoretical results in this work could motivate researchers to propose novel optimizers with faster convergence and better generalization. Experimental results testify our theoretical implications. ", "keywords": "deep learning optimization;network optimizer", "primary_area": "", "supplementary_material": "", "author": "Pan Zhou;Xingyu Xie;Shuicheng YAN", "authorids": "~Pan_Zhou3;~Xingyu_Xie1;~Shuicheng_YAN3", "gender": ";M;M", "homepage": ";;https://yanshuicheng.ai/", "dblp": ";174/9633;y/ShuichengYan", "google_scholar": ";BpFCmZMAAAAJ;https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Pan_Zhou3;~Xingyu_Xie1;~Shuicheng_YAN3", "aff": ";Peking University;sea Group", "aff_domain": ";pku.edu.cn;sea.com", "position": ";PhD student;Researcher", "bibtex": "@misc{\nzhou2023towards,\ntitle={Towards Understanding Convergence and Generalization of AdamW},\nauthor={Pan Zhou and Xingyu Xie and Shuicheng YAN},\nyear={2023},\nurl={https://openreview.net/forum?id=EfTN2tSGlF}\n}", "github": "", "project": "", "reviewers": "5pbQ;ViR8;yhHR", "site": "https://openreview.net/forum?id=EfTN2tSGlF", "pdf_size": 948540, "recommendation": "3;5;6", "confidence": "5;3;3", "correctness": "3;2;3", "technical_novelty": "1;3;3", "empirical_novelty": "2;3;2", "wc_summary_paper": "75;41;58", "wc_strength_and_weaknesses": "261;148;155", "wc_clarity_quality_novelty_and_reproducibility": "63;25;18", "wc_summary_review": "43;35;31", "wc_review": "442;249;262", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1533;1264;1457", "reply_reviewers": "0;0;0", "reply_authors": "2;2;3", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 58.0, 13.880441875771343 ], "wc_strength_and_weaknesses_avg": [ 188.0, 51.697840058039816 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.333333333333336, 19.770910168449223 ], "wc_summary_review_avg": [ 36.333333333333336, 4.988876515698588 ], "wc_review_avg": [ 317.6666666666667, 88.07698652630864 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1418.0, 113.22838277864197 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.944911182523068, "corr_recommendation_correctness": -0.18898223650461363, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7859121958641363009&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1", "aff_unique_norm": "Peking University;Sea Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;", "aff_unique_abbr": "Peking U;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "China;" }, { "id": "EgJ0PbRPkCW", "title": "Joint Representations of Text and Knowledge Graphs for Retrieval and Evaluation", "track": "main", "status": "Withdraw", "tldr": "We learn joint representations for knowledge base elements and corresponding text, which allows to perform retrieval and referenceless adequacy evaluation", "abstract": "A key feature of neural models is that they can produce semantic vector representations of objects (texts, images, speech, etc.) ensuring that similar objects are close to each other in the vector space. While much work has focused on learning text, image, knowledge-base (KB) and image-text representations, there are no aligned cross-modal text-KB representations. One challenge for learning such representations is the lack of parallel data. We train retrieval models on datasets of (graph, text) pairs where the graph is a KB subgraph and the text has been heuristically aligned with the graph. When performing retrieval on WebNLG, a clean parallel corpus, our best model achieves 80\\% accuracy and 99\\% recall@10, showing that similar texts and KB graphs are mapped close to each other. We use this property to create a similarity metric between English text and KB graphs, matching state-of-the-art metrics in terms of correlation with human judgments even though, unlike them, it does not require a reference text to compare against.", "keywords": "Representation learning;Text generation;Knowledge bases;Evaluation", "primary_area": "", "supplementary_material": "", "author": "Teven Le Scao;Claire Gardent", "authorids": "~Teven_Le_Scao1;~Claire_Gardent1", "gender": ";F", "homepage": ";https://members.loria.fr/CGardent/", "dblp": ";71/6819", "google_scholar": ";gHC1paQAAAAJ", "orcid": "0000-0002-7052-3048;0000-0002-3805-6662", "linkedin": ";claire-gardent-70116341/?originalSubdomain=fr", "or_profile": "~Teven_Le_Scao1;~Claire_Gardent1", "aff": ";CNRS", "aff_domain": ";cnrs.fr", "position": ";Principal Researcher", "bibtex": "@misc{\nscao2023joint,\ntitle={Joint Representations of Text and Knowledge Graphs for Retrieval and Evaluation},\nauthor={Teven Le Scao and Claire Gardent},\nyear={2023},\nurl={https://openreview.net/forum?id=EgJ0PbRPkCW}\n}", "github": "", "project": "", "reviewers": "9EbG;WNsg;VCRt", "site": "https://openreview.net/forum?id=EgJ0PbRPkCW", "pdf_size": 612693, "recommendation": "3;3;3", "confidence": "4;4;4", "correctness": "2;2;3", "technical_novelty": "2;1;2", "empirical_novelty": "2;1;2", "wc_summary_paper": "94;81;64", "wc_strength_and_weaknesses": "216;325;158", "wc_clarity_quality_novelty_and_reproducibility": "40;43;181", "wc_summary_review": "16;29;87", "wc_review": "366;478;490", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 79.66666666666667, 12.283683848458853 ], "wc_strength_and_weaknesses_avg": [ 233.0, 69.2290882986817 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 88.0, 65.7723346096214 ], "wc_summary_review_avg": [ 44.0, 30.865298745786774 ], "wc_review_avg": [ 444.6666666666667, 55.841044243658466 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10388309375852503149&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0", "aff_unique_norm": "Centre National de la Recherche Scientifique", "aff_unique_dep": "", "aff_unique_url": "https://www.cnrs.fr", "aff_unique_abbr": "CNRS", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "title": "Faster federated optimization under second-order similarity", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11404", "id": "ElC6LYO4MfD", "poster": "", "openreview": "https://openreview.net/forum?id=ElC6LYO4MfD", "slides": "https://iclr.cc/virtual/2023/poster/11404", "video": "https://iclr.cc/virtual/2023/poster/11404", "author_site": "Ahmed Khaled, Chi Jin", "tldr": "", "abstract": "Federated learning (FL) is a subfield of machine learning where multiple clients try to collaboratively learn a model over a network under communication constraints. We consider finite-sum federated optimization under a second-order function similarity condition and strong convexity, and propose two new algorithms: SVRP and Catalyzed SVRP. This second-order similarity condition has grown popular recently, and is satisfied in many applications including distributed statistical learning and differentially private empirical risk minimization. The first algorithm, SVRP, combines approximate stochastic proximal point evaluations, client sampling, and variance reduction. We show that SVRP is communication efficient and achieves superior performance to many existing algorithms when function similarity is high enough. Our second algorithm, Catalyzed SVRP, is a Catalyst-accelerated variant of SVRP that achieves even better performance and uniformly improves upon existing algorithms for federated optimization under second-order similarity and strong convexity. In the course of analyzing these algorithms, we provide a new analysis of the Stochastic Proximal Point Method (SPPM) that might be of independent interest. Our analysis of SPPM is simple, allows for approximate proximal point evaluations, does not require any smoothness assumptions, and shows a clear benefit in communication complexity over ordinary distributed stochastic gradient descent.", "keywords": "federated learning;distributed optimization;hessian similarity;client sampling;stochastic proximal point;proximal point method;distributed learning", "primary_area": "", "supplementary_material": "/attachment/806d8f9e9e481a06bd1020f0bf0639cd1524d1ec.zip", "author": "Ahmed Khaled;Chi Jin", "authorids": "~Ahmed_Khaled1;~Chi_Jin1", "gender": "M;M", "homepage": "https://www.akhaled.net;https://sites.google.com/view/cjin/home", "dblp": "154/3591-1;126/1802-1", "google_scholar": "Bc3wOdsAAAAJ;GINhGvwAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Ahmed_Khaled1;~Chi_Jin1", "aff": "Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nkhaled2023faster,\ntitle={Faster federated optimization under second-order similarity},\nauthor={Ahmed Khaled and Chi Jin},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=ElC6LYO4MfD}\n}", "github": "", "project": "", "reviewers": "sQ1q;gy4c;Q8H3;JTga;2cGY", "pdf_size": 823744, "recommendation": "5;5;5;5;6", "confidence": "4;2;4;3;4", "correctness": "3;4;3;3;4", "technical_novelty": "3;3;2;2;3", "empirical_novelty": "0;1;2;2;2", "wc_summary_paper": "38;53;72;122;85", "wc_strength_and_weaknesses": "138;267;72;306;567", "wc_clarity_quality_novelty_and_reproducibility": "19;2;29;9;32", "wc_summary_review": "4;26;165;78;33", "wc_review": "199;348;338;515;717", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.4, 0.8 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 2.6, 0.4898979485566356 ], "empirical_novelty_avg": [ 1.4, 0.8 ], "wc_summary_paper_avg": [ 74.0, 28.865203966021095 ], "wc_strength_and_weaknesses_avg": [ 270.0, 170.963153925049 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 18.2, 11.443775600735973 ], "wc_summary_review_avg": [ 61.2, 57.20629336008408 ], "wc_review_avg": [ 423.4, 177.7218050774862 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.37500000000000006, "corr_recommendation_correctness": 0.6123724356957947, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12491893458074219440&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=ElC6LYO4MfD", "email": "princeton.edu;princeton.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "ElI9znK_eUz", "title": "Diagnosing and exploiting the computational demands of videos games for deep reinforcement learning", "track": "main", "status": "Reject", "tldr": "Strategies for improving deep reinforcement learning agents can be predicted from their generalization performance.", "abstract": "Humans learn by interacting with their environments and perceiving the outcomes of their actions. A landmark in artificial intelligence has been the development of deep reinforcement learning (dRL) algorithms capable of doing the same in video games, on par with or better than humans. However, it remains unclear whether the successes of dRL models reflect advances in visual representation learning, the effectiveness of reinforcement learning algorithms at discovering better policies, or both. To address this question, we introduce the Learning Challenge Diagnosticator (LCD), a tool that separately measures the perceptual and reinforcement learning demands of a task. We use LCD to discover a novel taxonomy of challenges in the Procgen benchmark, and demonstrate that these predictions are both highly reliable and can instruct algorithmic development. More broadly, the LCD reveals multiple failure cases that can occur when optimizing dRL algorithms over entire video game benchmarks like Procgen, and provides a pathway towards more efficient progress.", "keywords": "Cognitive Science;Deep Reinforcement Learning;Perceptual Grouping;Neuroscience", "primary_area": "", "supplementary_material": "", "author": "Lakshmi Narasimhan Govindarajan;Rex G Liu;Drew Linsley;Alekh Karkada Ashok;Max Reuter;Michael Frank;Thomas Serre", "authorids": "~Lakshmi_Narasimhan_Govindarajan3;~Rex_G_Liu1;~Drew_Linsley1;~Alekh_Karkada_Ashok1;~Max_Reuter1;~Michael_Frank2;~Thomas_Serre1", "gender": ";;;M;M;M;M", "homepage": ";;;;;http://ski.clps.brown.edu;https://serre-lab.clps.brown.edu/", "dblp": ";;194/2308;230/2212;;33/2026.html;", "google_scholar": ";;cXZlAuQAAAAJ;;;f-xyFpUAAAAJ;kZlPW4wAAAAJ", "orcid": ";;;;;0000-0001-8451-0523;", "linkedin": ";;;;max-reuter/;;", "or_profile": "~Lakshmi_Narasimhan_Govindarajan3;~Rex_G_Liu1;~Drew_Linsley1;~Alekh_Karkada_Ashok1;~Max_Reuter1;~Michael_Frank2;~Thomas_Serre1", "aff": ";;Brown University;Brown University;Brown University;Brown University;Universit\u00e9 de Toulouse", "aff_domain": ";;brown.edu;brown.edu;brown.edu;brown.edu;univ-toulouse.fr", "position": ";;Assistant Professor;PhD student;Research Assistant;Full Professor;Full Professor", "bibtex": "@misc{\ngovindarajan2023diagnosing,\ntitle={Diagnosing and exploiting the computational demands of videos games for deep reinforcement learning},\nauthor={Lakshmi Narasimhan Govindarajan and Rex G Liu and Drew Linsley and Alekh Karkada Ashok and Max Reuter and Michael Frank and Thomas Serre},\nyear={2023},\nurl={https://openreview.net/forum?id=ElI9znK_eUz}\n}", "github": "", "project": "", "reviewers": "SeAm;tmph;Ni8m;8UAX", "site": "https://openreview.net/forum?id=ElI9znK_eUz", "pdf_size": 23175947, "recommendation": "3;3;5;6", "confidence": "4;4;4;3", "correctness": "3;2;4;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;1;2;3", "wc_summary_paper": "77;63;92;212", "wc_strength_and_weaknesses": "360;74;161;433", "wc_clarity_quality_novelty_and_reproducibility": "21;48;61;70", "wc_summary_review": "40;45;85;92", "wc_review": "498;230;399;807", "wc_reply_reviewers": "193;0;52;29", "wc_reply_authors": "621;554;416;1141", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;2", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 111.0, 59.20726306797165 ], "wc_strength_and_weaknesses_avg": [ 257.0, 145.16370069683398 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.0, 18.479718612576328 ], "wc_summary_review_avg": [ 65.5, 23.200215516240362 ], "wc_review_avg": [ 483.5, 209.9196274768036 ], "wc_reply_reviewers_avg": [ 68.5, 74.20411039827916 ], "wc_reply_authors_avg": [ 683.0, 274.5623790689467 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.7777777777777777, "corr_recommendation_correctness": 0.5443310539518174, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13015657519234015576&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Brown University;Universit\u00e9 de Toulouse", "aff_unique_dep": ";", "aff_unique_url": "https://www.brown.edu;https://www.univ-toulouse.fr", "aff_unique_abbr": "Brown;UT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;France" }, { "id": "EmABrt4zzz3", "title": "PES: Probabilistic Exponential Smoothing for Time Series Forecasting", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Time series forecasting is a common task in many industries. It helps organizations in setting goals, making plans and taking decisions. Probabilistic forecasting, in addition, summarizes the confidence over future quantities, a useful property when targeting uncertainty. This paper proposes PES - Probabilistic Exponential Smoothing -, a hybrid model for univariate time series forecasting. The contribution is two-fold: we introduce a RNN-like cell incorporating a simple exponential smoothing operator; building on this new cell we develop an intelligible and data-efficient model. The proposed solution shows several desirable characteristics; being easy to implement and fast to train, it can accommodate multiple seasonality and learn them via cross-learning. It can produce intervals as well as point-forecasts and its structure could be a valuable time series decomposition scheme. We test the PES model over a demand forecasting task on a well-known, publicly available, dataset. Finally we show that the results obtained compare favorably to the state-of-the-art.", "keywords": "time series forecast;demand forecast;probabilistic forecast;recurrent neural network;exponential smoothing;automatic differentiation", "primary_area": "", "supplementary_material": "/attachment/1189607f83c34a40c730a1d456140890b2be0033.zip", "author": "Antonio Cifonelli;Stephane Canu", "authorids": "~Antonio_Cifonelli1;~Stephane_Canu1", "gender": "M;M", "homepage": "https://www.linkedin.com/in/antonio-cifonelli-88bb4996;http://asi.insa-rouen.fr/enseignants/~scanu/", "dblp": ";17/122", "google_scholar": ";PpibCZUAAAAJ", "orcid": ";0000-0002-7602-4557", "linkedin": "antonio-cifonelli-88bb4996;st%C3%A9phane-canu-b1127a14/", "or_profile": "~Antonio_Cifonelli1;~Stephane_Canu1", "aff": "Institut National des Sciences Appliqu\u00e9es de Rouen;INSA Rouen Normandy", "aff_domain": "insa-rouen.fr;insa-rouen.fr", "position": "PhD student;Full Professor", "bibtex": "@misc{\ncifonelli2023pes,\ntitle={{PES}: Probabilistic Exponential Smoothing for Time Series Forecasting},\nauthor={Antonio Cifonelli and Stephane Canu},\nyear={2023},\nurl={https://openreview.net/forum?id=EmABrt4zzz3}\n}", "github": "", "project": "", "reviewers": "izXc;T5jp;bwqD", "site": "https://openreview.net/forum?id=EmABrt4zzz3", "pdf_size": 292523, "recommendation": "1;3;3", "confidence": "5;5;3", "correctness": "1;2;3", "technical_novelty": "2;2;2", "empirical_novelty": "1;2;2", "wc_summary_paper": "56;62;42", "wc_strength_and_weaknesses": "251;366;318", "wc_clarity_quality_novelty_and_reproducibility": "167;55;45", "wc_summary_review": "106;69;59", "wc_review": "580;552;464", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "442;647;780", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "correctness_avg": [ 2.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 53.333333333333336, 8.379870059984356 ], "wc_strength_and_weaknesses_avg": [ 311.6666666666667, 47.16166051171462 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 89.0, 55.3052137385497 ], "wc_summary_review_avg": [ 78.0, 20.215505600075073 ], "wc_review_avg": [ 532.0, 49.42334131426837 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 623.0, 139.0275752024276 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.8660254037844387, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:nSaXia4j404J:scholar.google.com/&scioq=PES:+Probabilistic+Exponential+Smoothing+for+Time+Series+Forecasting&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Institut National des Sciences Appliqu\u00e9es;INSA Rouen Normandy", "aff_unique_dep": ";", "aff_unique_url": "https://www.insa-rouen.fr;https://www.insa-rouen.fr", "aff_unique_abbr": "INSA Rouen;INSA Rouen", "aff_campus_unique_index": "0", "aff_campus_unique": "Rouen;", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "id": "EmH1WE1fRbt", "title": "Exploring Parameter-Efficient Fine-tuning for Improving Communication Efficiency in Federated Learning", "track": "main", "status": "Withdraw", "tldr": "We explore the viability of a parameter-efficient fine-tuning framework in federated learning to leverage strong pre-trained models and significantly reduce communication costs.", "abstract": "Federated learning (FL) has emerged as a promising paradigm for enabling the collaborative training of models without centralized access to the raw data on local devices. In the typical FL paradigm (e.g., FedAvg), model weights are sent to and from the server each round to participating clients. However, this can quickly put a massive communication burden on the system, especially if more capable models beyond very small MLPs are employed.\nRecently, the use of pre-trained models has been shown effective in federated learning optimization and improving convergence. This opens the door for new research questions. Can we adjust the weight-sharing paradigm in federated learning, leveraging strong and readily-available pre-trained models, to significantly reduce the communication burden while simultaneously achieving excellent performance? To this end, we investigate the use of parameter-efficient fine-tuning in federated learning. Specifically, we systemically evaluate the performance of several parameter-efficient fine-tuning methods across a variety of client stability, data distribution, and differential privacy settings. By only locally tuning and globally sharing a small portion of the model weights, significant reductions in the total communication overhead can be achieved while maintaining competitive performance in a wide range of federated learning scenarios, providing insight into a new paradigm for practical and effective federated systems.", "keywords": "federated learning;computer vision;vision transformer;fine-tuning", "primary_area": "", "supplementary_material": "", "author": "Guangyu Sun;Matias Mendieta;Taojiannan Yang;Chen Chen", "authorids": "~Guangyu_Sun3;~Matias_Mendieta1;~Taojiannan_Yang1;~Chen_Chen18", "gender": "M;;M;M", "homepage": "https://imguangyu.github.io/;https://sites.google.com/view/matiasmendieta;;https://www.crcv.ucf.edu/chenchen/", "dblp": ";254/1788;249/8103;65/4423-1", "google_scholar": "e4NjsVIAAAAJ;iO5zyPwAAAAJ;Z_--q5UAAAAJ;TuEwcZ0AAAAJ", "orcid": "0000-0002-8523-9074;0000-0002-5497-6207;;0000-0003-3957-7061", "linkedin": "guangyu-sun-686b94198/;matias-mendieta/;;dennychen/", "or_profile": "~Guangyu_Sun3;~Matias_Mendieta1;~Taojiannan_Yang1;~Chen_Chen18", "aff": "University of Central Florida;University of Central Florida;University of Central Florida;University of Central Florida", "aff_domain": "ucf.edu;ucf.edu;ucf.edu;ucf.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nsun2023exploring,\ntitle={Exploring Parameter-Efficient Fine-tuning for Improving Communication Efficiency in Federated Learning},\nauthor={Guangyu Sun and Matias Mendieta and Taojiannan Yang and Chen Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=EmH1WE1fRbt}\n}", "github": "", "project": "", "reviewers": "q8ua;5UMf;HwsW;SpNr", "site": "https://openreview.net/forum?id=EmH1WE1fRbt", "pdf_size": 885487, "recommendation": "3;3;5;6", "confidence": "4;4;4;4", "correctness": "3;3;3;3", "technical_novelty": "1;2;3;2", "empirical_novelty": "1;2;3;3", "wc_summary_paper": "54;47;118;50", "wc_strength_and_weaknesses": "104;145;342;244", "wc_clarity_quality_novelty_and_reproducibility": "30;149;37;40", "wc_summary_review": "13;30;61;41", "wc_review": "201;371;558;375", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 67.25, 29.40556920040828 ], "wc_strength_and_weaknesses_avg": [ 208.75, 92.24254712441542 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.0, 49.208739061268375 ], "wc_summary_review_avg": [ 36.25, 17.426631917843448 ], "wc_review_avg": [ 376.25, 126.268315503138 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=756539490316220350&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Central Florida", "aff_unique_dep": "", "aff_unique_url": "https://www.ucf.edu", "aff_unique_abbr": "UCF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "En7lGmzT_x", "title": "Sharper Rates and Flexible Framework for Nonconvex SGD with Client and Data Sampling", "track": "main", "status": "Reject", "tldr": "", "abstract": "We revisit the classical problem of finding an approximately stationary point of the average of $n$ smooth and possibly nonconvex functions. The optimal complexity of stochastic first-order methods in terms of the number of gradient evaluations of individual functions is $\\mathcal{O}\\left(n + n^{1/2}\\varepsilon^{-1}\\right)$, attained by the optimal SGD methods SPIDER (Cong Fang et al., 2018) and PAGE (Zhize Li et al., 2020), for example, where $\\varepsilon$ is the error tolerance. However, i) the big-$\\mathcal{O}$ notation hides crucial dependencies on the smoothness constants associated with the functions, and ii) the rates and theory in these methods assume simplistic sampling mechanisms that do not offer any flexibility. In this work we remedy the situation. First, we generalize the PAGE algorithm so that it can provably work with virtually any (unbiased) sampling mechanism. This is particularly useful in federated learning, as it allows us to construct and better understand the impact of various combinations of client and data sampling strategies. Second, our analysis is sharper as we make explicit use of certain novel inequalities that capture the intricate interplay between the smoothness constants and the sampling procedure. Indeed, our analysis is better even for the simple sampling procedure analyzed in the PAGE paper. However, this already improved bound can be further sharpened by a different sampling scheme which we propose. In summary, we provide the most general and most accurate analysis of optimal SGD in the smooth nonconvex regime. Finally, our theoretical findings are supposed with carefully designed experiments.", "keywords": "nonconvex optimization;empirical risk minimization;SGD;variance reduction;data sampling;client sampling;optimal methods;biased gradient estimator;federated learning", "primary_area": "", "supplementary_material": "", "author": "Alexander Tyurin;Lukang Sun;Konstantin Pavlovich Burlachenko;Peter Richt\u00e1rik", "authorids": "~Alexander_Tyurin1;~Lukang_Sun1;~Konstantin_Pavlovich_Burlachenko1;~Peter_Richt\u00e1rik1", "gender": "M;M;M;M", "homepage": "https://k3nfalt.github.io/;https://lukangsun.github.io/;https://burlachenkok.github.io/;https://richtarik.org", "dblp": "203/8919;294/4996.html;;62/8001", "google_scholar": ";hBbg20cAAAAJ;3pA-LoQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0003-4380-5848", "linkedin": ";lukang-sun-62a92020b/;burlachenkok/;richtarik/", "or_profile": "~Alexander_Tyurin1;~Lukang_Sun1;~Konstantin_Pavlovich_Konstantin_Burlachenko1;~Peter_Richtarik1", "aff": "KAUST;KAUST;KAUST;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "kaust.edu.sa;kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "position": "Postdoc;PhD student;PhD student;Full Professor", "bibtex": "@misc{\ntyurin2023sharper,\ntitle={Sharper Rates and Flexible Framework for Nonconvex {SGD} with Client and Data Sampling},\nauthor={Alexander Tyurin and Lukang Sun and Konstantin Pavlovich Burlachenko and Peter Richt{\\'a}rik},\nyear={2023},\nurl={https://openreview.net/forum?id=En7lGmzT_x}\n}", "github": "", "project": "", "reviewers": "paK1;NfNX;CFoS;xtiL", "site": "https://openreview.net/forum?id=En7lGmzT_x", "pdf_size": 1050259, "recommendation": "3;3;5;6", "confidence": "4;4;3;2", "correctness": "3;3;3;4", "technical_novelty": "1;2;2;4", "empirical_novelty": "0;1;2;4", "wc_summary_paper": "28;151;159;42", "wc_strength_and_weaknesses": "79;188;181;43", "wc_clarity_quality_novelty_and_reproducibility": "25;47;13;5", "wc_summary_review": "162;51;43;125", "wc_review": "294;437;396;215", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 1.0897247358851685 ], "empirical_novelty_avg": [ 1.75, 1.479019945774904 ], "wc_summary_paper_avg": [ 95.0, 60.27022482121665 ], "wc_strength_and_weaknesses_avg": [ 122.75, 63.0966520506437 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 22.5, 15.835087622113116 ], "wc_summary_review_avg": [ 95.25, 50.071823413972055 ], "wc_review_avg": [ 335.5, 86.89792862893799 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.986440050415621, "corr_recommendation_correctness": 0.7777777777777777, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8798782928345109242&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaust.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Saudi Arabia" }, { "title": "Agnostic Learning of General ReLU Activation Using Gradient Descent", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11445", "id": "EnrY5TOrbQ", "poster": "/media/PosterPDFs/ICLR%202023/11445.png?t=1682922911.6756043", "openreview": "https://openreview.net/forum?id=EnrY5TOrbQ", "slides": "https://iclr.cc/virtual/2023/poster/11445", "video": "https://iclr.cc/virtual/2023/poster/11445", "author_site": "Pranjal Awasthi, Alex Tang, Aravindan Vijayaraghavan", "tldr": "We provide a convergence analysis of gradient descent for the problem of agnostically learning a single ReLU function under Gaussian distributions that achieves loss of O(OPT). ", "abstract": "We provide a convergence analysis of gradient descent for the problem of agnostically learning a single ReLU function under Gaussian distributions. Unlike prior work that studies the setting of zero bias, we consider the more challenging scenario when the bias of the ReLU function is non-zero. Our main result establishes that starting from random initialization, in a polynomial number of iterations gradient descent outputs, with high probability, a ReLU function that achieves an error that is within a constant factor of the optimal i.e., it is guaranteed to achieve an error of $O(OPT)$, where $OPT$ is the error of the best ReLU function. This is a significant improvement over existing guarantees for gradient descent, which only guarantee error of $O(\\sqrt{d \\cdot OPT})$ even in the zero-bias case (Frei et al., 2020). We also provide finite sample guarantees, and obtain similar guarantees for a broader class of marginal distributions beyond Gaussians. ", "keywords": "agnostic learning;learning ReLU;global convergence;learning theory", "primary_area": "", "supplementary_material": "/attachment/5aa4e20402af992065d6e546bc2be11c2e338777.zip", "author": "Pranjal Awasthi;Alex Tang;Aravindan Vijayaraghavan", "authorids": "~Pranjal_Awasthi3;~Alex_Tang1;~Aravindan_Vijayaraghavan1", "gender": ";;M", "homepage": "https://www.cs.rutgers.edu/~pa336/;https://cplalexandtang.github.io/;http://www.cs.northwestern.edu/~aravindv", "dblp": "57/679;;84/7804", "google_scholar": ";;tokXOxkAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Pranjal_Awasthi3;~Alex_Tang1;~Aravindan_Vijayaraghavan1", "aff": "Rutgers University;Northwestern University;Northwestern University", "aff_domain": "rutgers.edu;northwestern.edu;northwestern.edu", "position": "Assistant Professor;PhD student;Associate Professor", "bibtex": "@inproceedings{\nawasthi2023agnostic,\ntitle={Agnostic Learning of General Re{LU} Activation Using Gradient Descent},\nauthor={Pranjal Awasthi and Alex Tang and Aravindan Vijayaraghavan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=EnrY5TOrbQ}\n}", "github": "", "project": "", "reviewers": "iUj1;na9i;4WPP;DGFB", "pdf_size": 548655, "recommendation": "5;6;6;8", "confidence": "5;3;3;4", "correctness": "4;3;4;3", "technical_novelty": "2;3;3;4", "empirical_novelty": "2;0;0;0", "wc_summary_paper": "93;108;102;120", "wc_strength_and_weaknesses": "297;437;137;557", "wc_clarity_quality_novelty_and_reproducibility": "11;57;62;51", "wc_summary_review": "14;58;51;10", "wc_review": "415;660;352;738", "wc_reply_reviewers": "0;0;0;293", "wc_reply_authors": "176;275;156;1058", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 0.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 105.75, 9.807522622966516 ], "wc_strength_and_weaknesses_avg": [ 357.0, 156.8438714135812 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.25, 20.154093876927337 ], "wc_summary_review_avg": [ 33.25, 21.44032415799724 ], "wc_review_avg": [ 541.25, 161.6839123104089 ], "wc_reply_reviewers_avg": [ 73.25, 126.87272165442026 ], "wc_reply_authors_avg": [ 416.25, 373.2441392707995 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.20751433915982243, "corr_recommendation_correctness": -0.6882472016116854, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8052882856045749798&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=EnrY5TOrbQ", "email": "rutgers.edu;northwestern.edu;northwestern.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Rutgers University;Northwestern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.rutgers.edu;https://www.northwestern.edu", "aff_unique_abbr": "Rutgers;NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Eo89g5X1m5g", "title": "Sample Relationships through the Lens of Learning Dynamics with Label Information", "track": "main", "status": "Withdraw", "tldr": "We propose a new kernel function for neural networks which can take the label information into consideration, and show that it helps to improve generalisation performance.", "abstract": "Although much research has been done on proposing new models or loss functions to improve the generalisation of artificial neural networks (ANNs), less attention has been directed to the data, which is also an important factor for training ANNs. In this work, we start from approximating the interaction between two samples, i.e. how learning one sample would modify the model's prediction on the other sample. Through analysing the terms involved in weight updates in supervised learning, we find that the signs of labels influence the interactions between samples. Therefore, we propose the labelled pseudo Neural Tangent Kernel (lpNTK) which takes label information into consideration when measuring the interactions between samples. We first prove that lpNTK would asymptotically converge to the well-known empirical NTK in terms of the Frobenius norm under certain assumptions. Secondly, we illustrate how lpNTK helps to understand learning phenomena identified in previous work, specifically the learning difficulty of samples and forgetting events during learning. Moreover, we also show that lpNTK can help to improve the generalisation performance of ANNs in image classification tasks, compared with the original whole training sets.", "keywords": "Sample Relationship;Iterated Learning;Generalisation;Neural Networks;Neural Tangent Kernel", "primary_area": "", "supplementary_material": "", "author": "Shangmin Guo;Yi Ren;Stefano V Albrecht;Kenny Smith", "authorids": "~Shangmin_Guo1;~Yi_Ren6;~Stefano_V_Albrecht1;~Kenny_Smith1", "gender": "M;M;;M", "homepage": ";https://joshua-ren.github.io/;https://agents-lab.org/stefano-albrecht/;http://www.ling.ed.ac.uk/~kenny", "dblp": "183/0949;;118/3975;58/6224", "google_scholar": "cpOrbSoAAAAJ;5QNce38AAAAJ;https://scholar.google.co.uk/citations?user=ceSFqCcAAAAJ;", "orcid": "0000-0003-1716-0994;;0000-0002-8735-1465;0000-0002-4530-6914", "linkedin": ";;;", "or_profile": "~Shangmin_Guo1;~Yi_Ren6;~Stefano_V_Albrecht1;~Kenny_Smith1", "aff": "University of Edinburgh;University of British Columbia;University of Edinburgh;University of Edinburgh", "aff_domain": "ed.ac.uk;ubc.ca;ed.ac.uk;ed.ac.uk", "position": "PhD student;PhD student;Associate Professor;Professor", "bibtex": "@misc{\nguo2023sample,\ntitle={Sample Relationships through the Lens of Learning Dynamics with Label Information},\nauthor={Shangmin Guo and Yi Ren and Stefano V Albrecht and Kenny Smith},\nyear={2023},\nurl={https://openreview.net/forum?id=Eo89g5X1m5g}\n}", "github": "", "project": "", "reviewers": "2esP;NeYL;ghqW;KFE7;HxBc", "site": "https://openreview.net/forum?id=Eo89g5X1m5g", "pdf_size": 871458, "recommendation": "5;5;5;6;8", "confidence": "3;4;4;3;3", "correctness": "3;3;3;3;4", "technical_novelty": "2;3;2;3;3", "empirical_novelty": "2;4;2;2;3", "wc_summary_paper": "103;232;45;129;86", "wc_strength_and_weaknesses": "215;653;309;253;255", "wc_clarity_quality_novelty_and_reproducibility": "91;118;3;50;33", "wc_summary_review": "47;53;53;30;59", "wc_review": "456;1056;410;462;433", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "418;637;781;527;440", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 2.6, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.6, 0.8 ], "wc_summary_paper_avg": [ 119.0, 62.75348595894893 ], "wc_strength_and_weaknesses_avg": [ 337.0, 160.81293480314324 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.0, 40.982923272992615 ], "wc_summary_review_avg": [ 48.4, 9.951884243699782 ], "wc_review_avg": [ 563.4, 246.9846958821538 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 560.6, 134.47765613662366 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.560112033611204, "corr_recommendation_correctness": 0.9432422182837987, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wyGbrDx1ftkJ:scholar.google.com/&scioq=Sample+Relationships+through+the+Lens+of+Learning+Dynamics+with+Label+Information&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Edinburgh;University of British Columbia", "aff_unique_dep": ";", "aff_unique_url": "https://www.ed.ac.uk;https://www.ubc.ca", "aff_unique_abbr": "Edinburgh;UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;Canada" }, { "id": "EpvL_FaLtw", "title": "Multi-Treatment Effect Estimation with Proxy: Contrastive Learning and Rank Weighting", "track": "main", "status": "Reject", "tldr": "", "abstract": "We study the treatment effect estimation problem for continuous and multi-dimensional treatments, in the setting with unobserved confounders, but high-dimension proxy variables for unobserved confounders are available. Existing methods either directly adjust the relationship between observed covariates and treatments or recover the hidden confounders by probabilistic models. However, they either rely on a correctly specified treatment assignment model or require strong prior of the unobserved confounder distribution. To relax these requirements, we propose a Contrastive Regularizer (CR) to learn the proxy representation that contains all the relevant information in unobserved confounders. Based on the CR, we propose a novel ranked weighting method (Rw) to de-bias the treatment assignment. Combining Cr and Rw, we propose a neural network framework named CRNet to estimate the effects of multiple continuous treatments under unobserved confounders, evaluated by the Average Dose-Response Function. Empirically, we demonstrate that CRNet achieves state-of-the-art performance on both synthetic and semi-synthetic datasets.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/64f2bdaba70f9fe0f9dd9d03eacc4080dcc275f6.zip", "author": "Minqin Zhu;Anpeng Wu;Ruoxuan Xiong;Kun Kuang", "authorids": "~Minqin_Zhu1;~Anpeng_Wu1;~Ruoxuan_Xiong1;~Kun_Kuang1", "gender": "M;M;;M", "homepage": "https://scholar.google.com/citations?user=bNFv_sUAAAAJ;https://scholar.google.com.hk/citations?user=VQ4m6zQAAAAJ&hl=zh-CN&oi=sra;http://www.ruoxuanxiong.com/;http://kunkuang.github.io", "dblp": "371/6014.html;267/5637;222/2927;194/4245", "google_scholar": "bNFv_sUAAAAJ;https://scholar.google.com.hk/citations?user=VQ4m6zQAAAAJ;lg_0u-0AAAAJ;https://scholar.google.com.hk/citations?user=FOsNiMQAAAAJ", "orcid": "0009-0008-9527-8895;0000-0003-3898-7122;;0009-0000-7528-8131", "linkedin": ";;;", "or_profile": "~Minqin_Zhu1;~Anpeng_Wu1;~Ruoxuan_Xiong1;~Kun_Kuang1", "aff": "Zhejiang University;Mohamed bin Zayed University of Artificial Intelligence;Emory University;Zhejiang University", "aff_domain": "zju.edu.cn;mbzuai.ac.ae;emory.edu;zju.edu.cn", "position": "PhD student;Researcher;Assistant Professor;Associate Professor", "bibtex": "@misc{\nzhu2023multitreatment,\ntitle={Multi-Treatment Effect Estimation with Proxy: Contrastive Learning and Rank Weighting},\nauthor={Minqin Zhu and Anpeng Wu and Ruoxuan Xiong and Kun Kuang},\nyear={2023},\nurl={https://openreview.net/forum?id=EpvL_FaLtw}\n}", "github": "", "project": "", "reviewers": "2J3F;X9JX;S7Ex;kSiR", "site": "https://openreview.net/forum?id=EpvL_FaLtw", "pdf_size": 697104, "recommendation": "3;5;5;5", "confidence": "3;4;4;3", "correctness": "3;2;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "58;75;69;74", "wc_strength_and_weaknesses": "218;302;185;554", "wc_clarity_quality_novelty_and_reproducibility": "33;179;156;2", "wc_summary_review": "34;71;51;39", "wc_review": "343;627;461;669", "wc_reply_reviewers": "0;48;29;38", "wc_reply_authors": "173;387;520;852", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;2", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 69.0, 6.745368781616021 ], "wc_strength_and_weaknesses_avg": [ 314.75, 144.5672421401197 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 92.5, 76.23155514614666 ], "wc_summary_review_avg": [ 48.75, 14.254385290148432 ], "wc_review_avg": [ 525.0, 130.728726758888 ], "wc_reply_reviewers_avg": [ 28.75, 17.907749719046222 ], "wc_reply_authors_avg": [ 483.0, 246.3970373198509 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YifXXxTi_p4J:scholar.google.com/&scioq=Multi-Treatment+Effect+Estimation+with+Proxy:+Contrastive+Learning+and+Rank+Weighting&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Zhejiang University;Mohamed bin Zayed University of Artificial Intelligence;Emory University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://mbzuai.ac.ae;https://www.emory.edu", "aff_unique_abbr": "ZJU;MBZUAI;Emory", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "China;United Arab Emirates;United States" }, { "id": "EqDnVOyiVX", "title": "Learning to Optimize Quasi-Newton Methods", "track": "main", "status": "Reject", "tldr": "We introduce a novel machine learning optimizer which combines learning to optimize with quasi-Newton methodology.", "abstract": "We introduce a novel machine learning optimizer called LODO, which online meta-learns an implicit inverse Hessian of the loss as a subroutine of quasi-Newton optimization. Our optimizer merges Learning to Optimize (L2O) techniques with quasi-Newton methods to learn neural representations of symmetric matrix vector products, which are more flexible than those in other quasi-Newton methods. Unlike other L2O methods, ours does not require any meta-training on a training task distribution, and instead learns to optimize on the fly while optimizing on the test task, adapting to the local characteristics of the loss landscape while traversing it. Theoretically, we show that our optimizer approximates the inverse Hessian in noisy loss landscapes and is capable of representing a wide range of inverse Hessians. We experimentally verify our algorithm's performance in the presence of noise, and show that simpler alternatives for representing the inverse Hessians worsen performance. Lastly, we use our optimizer to train a semi-realistic deep neural network with 95k parameters, and obtain competitive results against standard neural network optimizers.", "keywords": "meta-learning;learning to optimize;quasi-Newton;optimization;hypergradients", "primary_area": "", "supplementary_material": "/attachment/7d74777fc470c3a641ecf76ff405001cf9db4d37.zip", "author": "Isaac Liao;Rumen Dangovski;Jakob Nicolaus Foerster;Marin Soljacic", "authorids": "~Isaac_Liao1;~Rumen_Dangovski1;~Jakob_Nicolaus_Foerster1;~Marin_Soljacic1", "gender": "M;M;M;", "homepage": "https://iliao2345.github.io/;http://super-ms.mit.edu/rumen.html;https://www.jakobfoerster.com;https://www.rle.mit.edu/marin/", "dblp": ";207/8546;176/5095;131/2044", "google_scholar": "ktiXUnoAAAAJ;;6z4lQzMAAAAJ;", "orcid": "0009-0003-5556-8322;;;", "linkedin": "isaac-liao-345676276/;;;", "or_profile": "~Isaac_Liao1;~Rumen_Dangovski1;~Jakob_Nicolaus_Foerster1;~Marin_Soljacic1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;University of Oxford, University of Oxford;", "aff_domain": "mit.edu;mit.edu;eng.ox.ac.uk;", "position": "Undergrad student;PhD student;Associate Professor;", "bibtex": "@misc{\nliao2023learning,\ntitle={Learning to Optimize Quasi-Newton Methods},\nauthor={Isaac Liao and Rumen Dangovski and Jakob Nicolaus Foerster and Marin Soljacic},\nyear={2023},\nurl={https://openreview.net/forum?id=EqDnVOyiVX}\n}", "github": "", "project": "", "reviewers": "oyKA;XFBK;w1wa", "site": "https://openreview.net/forum?id=EqDnVOyiVX", "pdf_size": 996714, "recommendation": "3;5;6", "confidence": "4;3;4", "correctness": "2;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;2;2", "wc_summary_paper": "60;82;106", "wc_strength_and_weaknesses": "370;219;212", "wc_clarity_quality_novelty_and_reproducibility": "27;23;69", "wc_summary_review": "24;32;95", "wc_review": "481;356;482", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1069;249;699", "reply_reviewers": "0;0;0", "reply_authors": "4;3;3", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 82.66666666666667, 18.785337071473826 ], "wc_strength_and_weaknesses_avg": [ 267.0, 72.8880420005001 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.666666666666664, 20.805982045769646 ], "wc_summary_review_avg": [ 50.333333333333336, 31.752515210959622 ], "wc_review_avg": [ 439.6666666666667, 59.162675921751735 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 672.3333333333334, 335.29423231278855 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.18898223650461363, "corr_recommendation_correctness": 0.9819805060619659, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9655001890957609214&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;University of Oxford", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.ox.ac.uk", "aff_unique_abbr": "MIT;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "Ew9gIwAQ7wr", "title": "FFCV: Accelerating Training by Removing Data Bottlenecks", "track": "main", "status": "Withdraw", "tldr": "We present FFCV, an easy-to-use yet highly optimized library for training machine learning models.", "abstract": "We present FFCV, a library for easy, fast, resource-efficient training of machine learning models. FFCV speeds up model training by eliminating (often subtle) data bottlenecks from the training process. In particular, we combine techniques such as an efficient file storage format, caching, data pre-loading, asynchronous data transfer, and just-in-time compilation to (a) make data loading and transfer significantly more efficient, ensuring that GPUs can reach full utilization; and (b) offload as much data processing as possible to the CPU asynchronously, freeing GPU up capacity for training. Using FFCV, we train ResNet-18 and ResNet-50 on the ImageNet dataset with a state-of-the-art tradeoff between accuracy and training time. For example, across the range of ResNet-50 models we test, we obtain the same accuracy as the best baselines in half the time. We demonstrate FFCV's performance, ease-of-use, extensibility, and ability to adapt to resource constraints through several case studies.", "keywords": "infrastructure;data loading;fast training;library;usability", "primary_area": "", "supplementary_material": "", "author": "Guillaume Leclerc;Andrew Ilyas;Logan Engstrom;Sung Min Park;Hadi Salman;Aleksander Madry", "authorids": "~Guillaume_Leclerc1;~Andrew_Ilyas1;~Logan_Engstrom1;~Sung_Min_Park2;~Hadi_Salman1;~Aleksander_Madry1", "gender": "M;M;M;;M;M", "homepage": ";http://andrewilyas.com;;https://sungminpark.com;https://hadisalman.com/;https://people.csail.mit.edu/madry/", "dblp": "183/9387;156/5465;207/7298;28/157;192/3204;67/2454", "google_scholar": ";Dtw3YBoAAAAJ;;;Kr8JjF0AAAAJ;SupjsEUAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Guillaume_Leclerc1;~Andrew_Ilyas1;~Logan_Engstrom1;~Sung_Min_Park2;~Hadi_Salman1;~Aleksander_Madry1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;PhD student;PhD student;PhD student;PhD Student;Professor", "bibtex": "@misc{\nleclerc2023ffcv,\ntitle={{FFCV}: Accelerating Training by Removing Data Bottlenecks},\nauthor={Guillaume Leclerc and Andrew Ilyas and Logan Engstrom and Sung Min Park and Hadi Salman and Aleksander Madry},\nyear={2023},\nurl={https://openreview.net/forum?id=Ew9gIwAQ7wr}\n}", "github": "", "project": "", "reviewers": "eGr1;rVNF;3hj7", "site": "https://openreview.net/forum?id=Ew9gIwAQ7wr", "pdf_size": 861816, "recommendation": "3;3;5", "confidence": "4;5;3", "correctness": "3;3;4", "technical_novelty": "3;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "48;57;111", "wc_strength_and_weaknesses": "452;75;183", "wc_clarity_quality_novelty_and_reproducibility": "323;34;73", "wc_summary_review": "24;427;90", "wc_review": "847;593;457", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 72.0, 27.820855486487112 ], "wc_strength_and_weaknesses_avg": [ 236.66666666666666, 158.51883869818823 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 143.33333333333334, 128.03732094805622 ], "wc_summary_review_avg": [ 180.33333333333334, 176.48858949581478 ], "wc_review_avg": [ 632.3333333333334, 161.62783038683517 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": 1.0, "gs_citation": 85, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4006279832286260353&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "EwoBTLCY-Y", "title": "Neural Optimal Transport with General Cost Functionals", "track": "main", "status": "Reject", "tldr": "", "abstract": "We present a novel neural-networks-based algorithm to compute optimal transport (OT) plans and maps for general cost functionals. The algorithm is based on a saddle point reformulation of the OT problem and generalizes prior OT methods for weak and strong cost functionals. As an application, we construct a functional to map data distributions with preserving the class-wise structure of data.", "keywords": "Optimal Transport;Neural Networks;Generative Modelling;Unpaired Learning", "primary_area": "", "supplementary_material": "/attachment/92046cada7388a7cc8bd091c27f919c4ea4534dd.zip", "author": "Arip Asadulaev;Alexander Korotin;Vage Egiazarian;Evgeny Burnaev", "authorids": "~Arip_Asadulaev1;~Alexander_Korotin2;~Vage_Egiazarian1;~Evgeny_Burnaev1", "gender": "M;M;M;M", "homepage": ";;http://faculty.skoltech.ru/people/evgenyburnaev;https://akorotin.netlify.app", "dblp": "243/2822;232/3274;144/7845;209/9906", "google_scholar": "wcdrgdYAAAAJ;Bktg6JEAAAAJ;https://scholar.google.ru/citations?user=pCRdcOwAAAAJ;https://scholar.google.ru/citations?user=1rIIvjAAAAAJ", "orcid": ";0000-0003-4444-9769;0000-0001-8424-0690;0000-0003-4286-925X", "linkedin": ";;;", "or_profile": "~Arip_Asadulaev1;~Vage_Egiazarian1;~Evgeny_Burnaev1;~Alexander_Andreevich_Korotin1", "aff": "ITMO University;Yandex;Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology", "aff_domain": "itmo.ru;yandex-team.ru;skoltech.ru;skoltech.ru", "position": "PhD student;Researcher;Full Professor;Head of Research Group", "bibtex": "@misc{\nasadulaev2023neural,\ntitle={Neural Optimal Transport with General Cost Functionals},\nauthor={Arip Asadulaev and Alexander Korotin and Vage Egiazarian and Evgeny Burnaev},\nyear={2023},\nurl={https://openreview.net/forum?id=EwoBTLCY-Y}\n}", "github": "", "project": "", "reviewers": "tFkj;MBuL;AhFA;7wv1", "site": "https://openreview.net/forum?id=EwoBTLCY-Y", "pdf_size": 4360900, "recommendation": "3;5;6;6", "confidence": "5;3;4;3", "correctness": "2;4;3;4", "technical_novelty": "2;4;2;3", "empirical_novelty": "2;4;2;2", "wc_summary_paper": "46;153;91;91", "wc_strength_and_weaknesses": "66;304;143;87", "wc_clarity_quality_novelty_and_reproducibility": "34;111;50;12", "wc_summary_review": "37;43;41;34", "wc_review": "183;611;325;224", "wc_reply_reviewers": "0;216;0;0", "wc_reply_authors": "670;465;493;273", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 95.25, 38.06819538670043 ], "wc_strength_and_weaknesses_avg": [ 150.0, 93.2603881613196 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.75, 36.772102197182036 ], "wc_summary_review_avg": [ 38.75, 3.491060010942235 ], "wc_review_avg": [ 335.75, 167.10681464261114 ], "wc_reply_reviewers_avg": [ 54.0, 93.53074360871938 ], "wc_reply_authors_avg": [ 475.25, 140.75932473552152 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.7385489458759963, "corr_recommendation_correctness": 0.7385489458759963, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11441074728953273241&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "ITMO University;Yandex;Skolkovo Institute of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.itmo.ru;https://yandex.com;https://www.skoltech.ru", "aff_unique_abbr": "ITMO;Yandex;Skoltech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Russian Federation" }, { "id": "Ey2ePmtABj", "title": "No-Regret Learning in Strongly Monotone Games Converges to a Nash Equilibrium", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper studies a class of online games involving multiple agents with continuous actions that aim to minimize their local loss functions. An open question in the study of online games is whether no-regret learning for such agents leads to a Nash equilibrium. We address this question by providing a sufficient condition for strongly monotone games that guarantees Nash equilibrium convergence in a time average sense. Furthermore, we show that the class of games for which no-regret learning leads to a Nash equilibrium can be expanded if some further information on the learning algorithm is known. Specifically, we provide relaxed sufficient conditions for first-order and zeroth-order gradient descent algorithms as well as for best response algorithms in which agents choose actions that best respond to other players' actions during the last episode. We analyze the convergence rate for these algorithms and present numerical experiments on three economic market problems to illustrate their performance.\n", "keywords": "Online game;no-regret learning;Nash equilibrium convergence;monotone game", "primary_area": "", "supplementary_material": "", "author": "Zifan Wang;Yi Shen;Michael Zavlanos;Karl Henrik Johansson", "authorids": "~Zifan_Wang2;~Yi_Shen3;~Michael_Zavlanos1;~Karl_Henrik_Johansson1", "gender": "M;;;", "homepage": "https://www.kth.se/profile/zifanw/;;;https://people.kth.se/~kallej/", "dblp": "35/10147-2;;;", "google_scholar": "LV-DrIUAAAAJ;YYOyml4AAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Zifan_Wang2;~Yi_Shen3;~Michael_Zavlanos1;~Karl_Henrik_Johansson1", "aff": "KTH Royal Institute of Technology;Duke University;;KTH Royal Institute of Technology", "aff_domain": "kth.se;duke.edu;;kth.se", "position": "PhD student;PhD student;;Full Professor", "bibtex": "@misc{\nwang2023noregret,\ntitle={No-Regret Learning in Strongly Monotone Games Converges to a Nash Equilibrium},\nauthor={Zifan Wang and Yi Shen and Michael Zavlanos and Karl Henrik Johansson},\nyear={2023},\nurl={https://openreview.net/forum?id=Ey2ePmtABj}\n}", "github": "", "project": "", "reviewers": "yhnp;MydF;k8bM;yciX", "site": "https://openreview.net/forum?id=Ey2ePmtABj", "pdf_size": 7546760, "recommendation": "3;5;6;6", "confidence": "5;2;4;2", "correctness": "4;4;4;4", "technical_novelty": "3;2;2;3", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "68;40;19;76", "wc_strength_and_weaknesses": "169;19;509;252", "wc_clarity_quality_novelty_and_reproducibility": "230;5;3;50", "wc_summary_review": "65;49;40;15", "wc_review": "532;113;571;393", "wc_reply_reviewers": "80;69;0;0", "wc_reply_authors": "1030;652;530;632", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 50.75, 22.68672519338126 ], "wc_strength_and_weaknesses_avg": [ 237.25, 177.73347321199796 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 72.0, 93.13699587167282 ], "wc_summary_review_avg": [ 42.25, 18.102140757380052 ], "wc_review_avg": [ 402.25, 179.62652226216483 ], "wc_reply_reviewers_avg": [ 37.25, 37.45246987850067 ], "wc_reply_authors_avg": [ 711.0, 189.89734068701435 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.6285393610547089, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3056308406368296361&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "KTH Royal Institute of Technology;Duke University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kth.se;https://www.duke.edu", "aff_unique_abbr": "KTH;Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Sweden;United States" }, { "title": "Test-Time Adaptation via Self-Training with Nearest Neighbor Information", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11610", "id": "EzLtB4M1SbM", "poster": "/media/PosterPDFs/ICLR%202023/11610.png?t=1681370155.4042072", "openreview": "https://openreview.net/forum?id=EzLtB4M1SbM", "slides": "https://iclr.cc/virtual/2023/poster/11610", "video": "https://iclr.cc/virtual/2023/poster/11610", "author_site": "Minguk Jang, Sae-Young Chung, Hye Won Chung", "tldr": "This work presents a simple and efficient test-time adaptation method to adapt trained classifiers by utilizing an ensemble of adaptation modules and self-training with nearest neighbor information.", "abstract": "Test-time adaptation (TTA) aims to adapt a trained classifier using online unlabeled test data only, without any information related to the training procedure. Most existing TTA methods adapt the trained classifier using the classifier's prediction on the test data as pseudo-label.\nHowever, under test-time domain shift, accuracy of the pseudo labels cannot be guaranteed, and thus the TTA methods often encounter performance degradation at the adapted classifier. To overcome this limitation, we propose a novel test-time adaptation method, called Test-time Adaptation via Self-Training with nearest neighbor information (TAST), which is composed of the following procedures: (1) adds trainable adaptation modules on top of the trained feature extractor; (2) newly defines a pseudo-label distribution for the test data by using the nearest neighbor information; (3) trains these modules only a few times during test time to match the nearest neighbor-based pseudo label distribution and a prototype-based class distribution for the test data; and (4) predicts the label of test data using the average predicted class distribution from these modules. The pseudo-label generation is based on the basic intuition that a test data and its nearest neighbor in the embedding space are likely to share the same label under the domain shift. By utilizing multiple randomly initialized adaptation modules, TAST extracts useful information for the classification of the test data under the domain shift, using the nearest neighbor information. TAST showed better performance than the state-of-the-art TTA methods on two standard benchmark tasks, domain generalization, namely VLCS, PACS, OfficeHome, and TerraIncognita, and image corruption, particularly CIFAR-10/100C.\n", "keywords": "test-time adaptation;domain adaptation;domain shift", "primary_area": "", "supplementary_material": "/attachment/d32f002f5f19bf0b3173dd75c99d319e78153783.zip", "author": "Minguk Jang;Sae-Young Chung;Hye Won Chung", "authorids": "~Minguk_Jang1;~Sae-Young_Chung1;~Hye_Won_Chung2", "gender": "M;;F", "homepage": "https://www.facebook.com/minguk.jang.5/;http://itml.kaist.ac.kr;https://iids.kaist.ac.kr/", "dblp": ";https://dblp.uni-trier.de/pers/c/Chung:Sae=Young.html;https://dblp.uni-trier.de/pers/hd/c/Chung:Hye_Won", "google_scholar": ";https://scholar.google.co.kr/citations?user=k-o3JBIAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Minguk_Jang1;~Sae-Young_Chung1;~Hye_Won_Chung2", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\njang2023testtime,\ntitle={Test-Time Adaptation via Self-Training with Nearest Neighbor Information},\nauthor={Minguk Jang and Sae-Young Chung and Hye Won Chung},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=EzLtB4M1SbM}\n}", "github": "", "project": "", "reviewers": "rMac;tB9n;hpzb;Lha6", "pdf_size": 504445, "recommendation": "5;6;6;8", "confidence": "3;4;5;4", "correctness": "3;3;4;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "35;69;195;118", "wc_strength_and_weaknesses": "380;346;455;189", "wc_clarity_quality_novelty_and_reproducibility": "24;320;661;52", "wc_summary_review": "55;200;98;32", "wc_review": "494;935;1409;391", "wc_reply_reviewers": "127;297;431;0", "wc_reply_authors": "1015;2782;1723;131", "reply_reviewers": "1;2;1;0", "reply_authors": "2;6;4;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 104.25, 60.13058705850127 ], "wc_strength_and_weaknesses_avg": [ 342.5, 97.0012886512339 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 264.25, 256.55835106267733 ], "wc_summary_review_avg": [ 96.25, 64.41418710191103 ], "wc_review_avg": [ 807.25, 403.05481947248813 ], "wc_reply_reviewers_avg": [ 213.75, 163.81601722664362 ], "wc_reply_authors_avg": [ 1412.75, 971.1061669560131 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.25, 1.920286436967152 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3244428422615251, "corr_recommendation_correctness": 0.6882472016116854, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13183441377692558295&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=EzLtB4M1SbM", "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Accelerating Guided Diffusion Sampling with Splitting Numerical Methods", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11604", "id": "F0KTk2plQzO", "poster": "/media/PosterPDFs/ICLR%202023/11604.png?t=1680877324.235403", "openreview": "https://openreview.net/forum?id=F0KTk2plQzO", "slides": "https://iclr.cc/virtual/2023/poster/11604", "video": "https://iclr.cc/virtual/2023/poster/11604", "author_site": "Suttisak Wisadwongsa, Supasorn Suwajanakorn", "tldr": "We accelerate guided diffusion sampling using splitting numerical methods.", "abstract": "Guided diffusion is a technique for conditioning the output of a diffusion model at sampling time without retraining the network for each specific task. However, one drawback of diffusion models, whether they are guided or unguided, is their slow sampling process. \nRecent techniques can accelerate unguided sampling by applying high-order numerical methods to the sampling process when viewed as differential equations. On the contrary, we discover that the same techniques do not work for guided sampling, and little has been explored about its acceleration. This paper explores the culprit of this problem and provides a solution based on operator splitting methods, motivated by our key finding that classical high-order numerical methods are unsuitable for the conditional function. Our proposed method can re-utilize the high-order methods for guided sampling and can generate images with the same quality as a 250-step DDIM baseline using 32-58% less sampling time on ImageNet256. \nWe also demonstrate usage on a wide variety of conditional generation tasks, such as text-to-image generation, colorization, inpainting, and super-resolution.", "keywords": "Splitting Numerical Methods;Guided Diffusion Models", "primary_area": "", "supplementary_material": "/attachment/401894933b7f321c80e14a96d12fab23c5d84b43.zip", "author": "Suttisak Wizadwongsa;Supasorn Suwajanakorn", "authorids": "~Suttisak_Wizadwongsa1;supasorn.s@vistec.ac.th", "gender": "M;", "homepage": "https://sites.google.com/view/suttisak-wizadwongsa/;", "dblp": "210/6607;", "google_scholar": "https://scholar.google.com/citations?hl=th;", "orcid": ";", "linkedin": ";", "or_profile": "~Suttisak_Wizadwongsa1;supasorn.s@vistec.ac.th", "aff": "Vidyasirimedhi Institute of Science and Technology (VISTEC);", "aff_domain": "vistec.ac.th;", "position": "PhD student;", "bibtex": "@inproceedings{\nwizadwongsa2023accelerating,\ntitle={Accelerating Guided Diffusion Sampling with Splitting Numerical Methods},\nauthor={Suttisak Wizadwongsa and Supasorn Suwajanakorn},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=F0KTk2plQzO}\n}", "github": "", "project": "", "reviewers": "PHsB;3tU3;sMbW;CQRF", "pdf_size": 12880507, "recommendation": "6;6;6;6", "confidence": "4;3;4;3", "correctness": "3;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "99;59;126;89", "wc_strength_and_weaknesses": "413;106;315;80", "wc_clarity_quality_novelty_and_reproducibility": "47;5;104;126", "wc_summary_review": "26;19;107;32", "wc_review": "585;189;652;327", "wc_reply_reviewers": "163;9;94;0", "wc_reply_authors": "2062;193;1112;434", "reply_reviewers": "1;1;1;0", "reply_authors": "3;1;2;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 93.25, 23.962209831315644 ], "wc_strength_and_weaknesses_avg": [ 228.5, 140.16151397584144 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.5, 47.55260245244207 ], "wc_summary_review_avg": [ 46.0, 35.51760127035608 ], "wc_review_avg": [ 438.25, 188.233066967523 ], "wc_reply_reviewers_avg": [ 66.5, 66.7026985960838 ], "wc_reply_authors_avg": [ 950.25, 724.9297810822784 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12096771542467952621&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=F0KTk2plQzO", "email": "vistec.ac.th;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Vidyasirimedhi Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.vistec.ac.th", "aff_unique_abbr": "VISTEC", "aff_country_unique_index": "0", "aff_country_unique": "Thailand" }, { "id": "F0UQv_MNWCt", "title": "ORCA: Interpreting Prompted Language Models via Locating Supporting Evidence in the Ocean of Pretraining Data", "track": "main", "status": "Reject", "tldr": "We find supporting data evidence from pretraining data to interpret prompted language models.", "abstract": "Prompting large pretrained language models leads to strong performance in a variety of downstream tasks. However, it is still unclear from where the model learns task-specific knowledge, especially in zero-shot setups. In this work, we propose a novel method ORCA to identify evidence of the model's task-specific competence in prompt-based learning. Through an instance attribution approach to model interpretability, by iteratively using gradient information related to the downstream task, ORCA locates a very small subset of pretraining data that directly supports the model's predictions in a given task; we call this subset supporting data evidence. We show that supporting data evidence offers new insights about the prompted language models. For example, in the tasks of sentiment analysis and textual entailment, BERT shows a substantial reliance on BookCorpus---the smaller corpus of BERT's two pretraining corpora---as well as on pretraining examples that mask out synonyms to the task labels used in prompts.", "keywords": "interpretability;prompting language models;pretraining data as evidence", "primary_area": "", "supplementary_material": "", "author": "Xiaochuang Han;Yulia Tsvetkov", "authorids": "~Xiaochuang_Han1;~Yulia_Tsvetkov1", "gender": "M;F", "homepage": "https://xhan77.github.io/;https://homes.cs.washington.edu/~yuliats/", "dblp": "216/6755;75/8157", "google_scholar": "GamSVF0AAAAJ;SEDPkrsAAAAJ", "orcid": ";0000-0002-4634-7128", "linkedin": ";", "or_profile": "~Xiaochuang_Han1;~Yulia_Tsvetkov1", "aff": "Department of Computer Science, University of Washington;Department of Computer Science, University of Washington", "aff_domain": "cs.washington.edu;cs.washington.edu", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nhan2023orca,\ntitle={{ORCA}: Interpreting Prompted Language Models via Locating Supporting Evidence in the Ocean of Pretraining Data},\nauthor={Xiaochuang Han and Yulia Tsvetkov},\nyear={2023},\nurl={https://openreview.net/forum?id=F0UQv_MNWCt}\n}", "github": "", "project": "", "reviewers": "st4M;NC89;uxbN;sFZJ", "site": "https://openreview.net/forum?id=F0UQv_MNWCt", "pdf_size": 431646, "recommendation": "3;5;6;6", "confidence": "4;4;3;3", "correctness": "3;4;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "57;112;135;95", "wc_strength_and_weaknesses": "284;589;224;39", "wc_clarity_quality_novelty_and_reproducibility": "7;58;4;6", "wc_summary_review": "30;37;45;4", "wc_review": "378;796;408;144", "wc_reply_reviewers": "335;0;0;0", "wc_reply_authors": "1078;1147;348;39", "reply_reviewers": "2;0;0;0", "reply_authors": "3;2;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 99.75, 28.472574523565655 ], "wc_strength_and_weaknesses_avg": [ 284.0, 197.895174271633 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 18.75, 22.68672519338126 ], "wc_summary_review_avg": [ 29.0, 15.378556499229699 ], "wc_review_avg": [ 431.5, 233.9503152380864 ], "wc_reply_reviewers_avg": [ 83.75, 145.0592551338935 ], "wc_reply_authors_avg": [ 653.0, 472.9381566336131 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.8164965809277259, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17677159543097011662&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "F5Cj26wfiu", "title": "xTrimoABFold: Improving Antibody Structure Prediction without Multiple Sequence Alignments", "track": "main", "status": "Reject", "tldr": "", "abstract": "Antibody, used by the immune system to identify and neutralize foreign objects such as pathogenic bacteria and viruses, plays an important role in immune system. In the field of drug engineering, the essential task is designing a novel antibody to make sure its paratope (substructures in the antibody) binds to the epitope of the specific antigen with high precision. Also, understanding the structure of antibody and its paratope can facilitate a mechanistic understanding of the function. Therefore, antibody structure prediction has always been a highly valuable problem for drug discovery. AlphaFold2, a breakthrough in the field of structural biology, provides a feasible solution to predict protein structure based on protein sequences and computationally expensive coevolutionary multiple sequence alignments (MSAs). However, the computational efficiency and undesirable prediction accuracy on antibody, especially on the complementarity-determining regions (CDRs) of antibody limit its applications on the industrially high-throughput drug design. In this paper, we present a novel method named xTrimoABFold to predict antibody structure from antibody sequence based on a pretrained antibody language model (ALM) as well as homologous templates, which are searched from protein database (PDB) via fast and cheap algorithms. xTrimoABFold outperforms the MSA-based AlphaFold2 and the protein language model based SOTAs, e.g., OmegaFold, HelixFold-Single and IgFold with a large significant margin (30+% improvement on RMSD) while performs 151x faster than AlphaFold2. To the best of our knowledge, xTrimoABFold is the best antibody structure predictor to date in the world.", "keywords": "Protein structure prediction;antibody structure prediction;amino acid sequence;homologous structure", "primary_area": "", "supplementary_material": "", "author": "Yining Wang;Xumeng Gong;Shaochuan Li;Bing Yang;YiWu Sun;Chuan Shi;Hui Li;Yangang Wang;Cheng Yang;Le Song", "authorids": "~Yining_Wang3;~Xumeng_Gong1;~Shaochuan_Li1;~Bing_Yang3;~YiWu_Sun1;~Chuan_Shi1;~Hui_Li2;~Yangang_Wang4;~Cheng_Yang6;~Le_Song1", "gender": "F;M;;M;M;M;;;M;M", "homepage": "https://yiningwang2.github.io/;;;;https://github.com/SYW23;http://www.shichuan.org/;;;https://albertyang33.github.io/;http://www.cc.gatech.edu/~lsong", "dblp": ";https://dblp.uni-trier.de/pid/314/0059.html;;;;64/3041-1;;83/9429.html;49/1457-2;94/3481", "google_scholar": ";;;;;tUq_v90AAAAJ;;;OlLjVUcAAAAJ;Xl4E0CsAAAAJ", "orcid": ";;;0000-0003-1983-3988;0000-0002-0061-0779;0000-0002-3734-0266;;;0000-0001-7821-0030;", "linkedin": ";;;;;;;;;", "or_profile": "~Yining_Wang3;~Xumeng_Gong1;~Shaochuan_Li1;~Bing_Yang3;~YiWu_Sun1;~Chuan_Shi1;~Hui_Li2;~Yangang_Wang4;~Cheng_Yang6;~Le_Song1", "aff": "University of Chinese Academy of Sciences;Beijing University of Posts and Telecommunications;;;;Beijing University of Post and Telecommunication;;;Beijing University of Posts and Telecommunications;College of Computing, Georgia Institute of Technology", "aff_domain": "ucas.ac.cn;bupt.edu.cn;;;;bupt.edu.cn;;;bupt.edu.cn;cc.gatech.edu", "position": "MS student;MS student;;;;Full Professor;;;Associate Professor;Associate Professor", "bibtex": "@misc{\nwang2023xtrimoabfold,\ntitle={xTrimo{ABF}old: Improving Antibody Structure Prediction without Multiple Sequence Alignments },\nauthor={Yining Wang and Xumeng Gong and Shaochuan Li and Bing Yang and YiWu Sun and Chuan Shi and Hui Li and Yangang Wang and Cheng Yang and Le Song},\nyear={2023},\nurl={https://openreview.net/forum?id=F5Cj26wfiu}\n}", "github": "", "project": "", "reviewers": "pjvj;1hLD;bKCt;hjqN", "site": "https://openreview.net/forum?id=F5Cj26wfiu", "pdf_size": 918390, "recommendation": "1;3;5;6", "confidence": "5;4;2;3", "correctness": "2;4;2;3", "technical_novelty": "1;1;2;2", "empirical_novelty": "1;0;2;2", "wc_summary_paper": "76;65;114;57", "wc_strength_and_weaknesses": "195;79;127;77", "wc_clarity_quality_novelty_and_reproducibility": "444;17;3;177", "wc_summary_review": "145;49;68;12", "wc_review": "860;210;312;323", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.75, 1.920286436967152 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 1.5, 0.5 ], "empirical_novelty_avg": [ 1.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 78.0, 21.85177338341216 ], "wc_strength_and_weaknesses_avg": [ 119.5, 47.96613388631608 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 160.25, 177.51249955989016 ], "wc_summary_review_avg": [ 68.5, 48.541219597369 ], "wc_review_avg": [ 426.25, 254.27187713154595 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": -0.873333764609373, "corr_recommendation_correctness": 0.11776030079682893, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2804497642264830359&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "University of Chinese Academy of Sciences;Beijing University of Posts and Telecommunications;Georgia Institute of Technology", "aff_unique_dep": ";;College of Computing", "aff_unique_url": "http://www.ucas.ac.cn;http://www.bupt.edu.cn/;https://www.gatech.edu", "aff_unique_abbr": "UCAS;BUPT;Georgia Tech", "aff_campus_unique_index": "1;1;1;2", "aff_campus_unique": ";Beijing;Atlanta", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "F5LPNbgpuo0", "title": "Dual Ensembled Multiagent Q-Learning with Hypernet Regularizer", "track": "main", "status": "Reject", "tldr": "", "abstract": "Overestimation in the temporal-difference single-agent reinforcement learning has been widely studied, where the variance in value estimation causes overestimation of the maximal target value due to Jensen's inequality. Instead, overestimation in multiagent settings has received little attention though it can be even more severe. One kind of pioneer work extends ensemble methods from single-agent deep reinforcement learning to address the multiagent overestimation by discarding the large target values among the ensemble. However, its ability is limited by the ensemble diversity. Another kind of work softens the maximum operator in the Bellman equation to avoid large target values, but also leads to sub-optimal value functions. Unlike previous works, in this paper, we address the multiagent overestimation by analyzing its underlying causes in an estimation-optimization iteration manner. We show that the overestimation in multiagent value-mixing Q-learning not only comes from the overestimation of target Q-values but also accumulates in the online Q-network's optimization step. Therefore, first, we integrate the random ensemble and in-target minimization into the estimation of target Q-values to derive a lower update target. Second, we propose a novel hypernet regularizer on the learnable terms of the online global Q-network to further reduce overestimation. Experiments on various kinds of tasks demonstrate that the proposed method consistently addresses the overestimation problem while previous works fail.", "keywords": "multiagent system;deep reinforcement learning;overestimation", "primary_area": "", "supplementary_material": "", "author": "Yaodong Yang;Guangyong Chen;Hongyao Tang;Furui Liu;Danruo DENG;Jianye HAO;Pheng-Ann Heng", "authorids": "~Yaodong_Yang2;~Guangyong_Chen1;~Hongyao_Tang1;~Furui_Liu1;~Danruo_DENG1;~Jianye_HAO1;~Pheng-Ann_Heng1", "gender": "M;M;M;M;;M;M", "homepage": ";https://guangyongchen.github.io/;https://bluecontra.github.io/;;;http://www.icdai.org/jianye.html;http://www.cse.cuhk.edu.hk/~pheng", "dblp": "170/1496-2;175/1354;220/4275;116/7289;304/2088.html;21/7664.html;52/2889", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=AUpqepUAAAAJ;yIqzRH4AAAAJ;https://scholar.google.com.hk/citations?user=DJY8NXMAAAAJ;;;https://scholar.google.com/citations?sortby=pubdate", "orcid": ";;;;;0000-0002-0422-8235;", "linkedin": ";;;;;;", "or_profile": "~Yaodong_Yang2;~Guangyong_Chen1;~Hongyao_Tang1;~Furui_Liu1;~Danruo_DENG1;~Jianye_HAO1;~Pheng-Ann_Heng1", "aff": "Department of Computer Science and Engineering, The Chinese University of Hong Kong;Zhejiang Lab;College of Intelligence and Computing, Tianjin University;Zhejiang Lab & UCAS & Zhejiang University;The Chinese University of Hong Kong;Tianjin University;The Chinese University of Hong Kong", "aff_domain": "cse.cuhk.edu.hk;zju.edu.cn;tju.edu.cn;zhejianglab.com;cuhk.edu.hk;tju.edu.cn;cuhk.edu.hk", "position": "PhD student;Principal Researcher;PhD student;Associate Professor;PhD student;Associate Professor;Full Professor", "bibtex": "@misc{\nyang2023dual,\ntitle={Dual Ensembled Multiagent Q-Learning with Hypernet Regularizer},\nauthor={Yaodong Yang and Guangyong Chen and Hongyao Tang and Furui Liu and Danruo DENG and Jianye HAO and Pheng-Ann Heng},\nyear={2023},\nurl={https://openreview.net/forum?id=F5LPNbgpuo0}\n}", "github": "", "project": "", "reviewers": "DqHH;hj7z;dFfz", "site": "https://openreview.net/forum?id=F5LPNbgpuo0", "pdf_size": 1384199, "recommendation": "3;3;5", "confidence": "3;5;3", "correctness": "2;2;3", "technical_novelty": "2;2;3", "empirical_novelty": "3;2;2", "wc_summary_paper": "86;36;100", "wc_strength_and_weaknesses": "270;316;266", "wc_clarity_quality_novelty_and_reproducibility": "77;73;17", "wc_summary_review": "18;14;34", "wc_review": "451;439;417", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 74.0, 27.47119703738202 ], "wc_strength_and_weaknesses_avg": [ 284.0, 22.686266036231405 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.666666666666664, 27.39018477889885 ], "wc_summary_review_avg": [ 22.0, 8.640987597877148 ], "wc_review_avg": [ 435.6666666666667, 14.079141387961917 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.5000000000000001, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oHeb9hPL7OsJ:scholar.google.com/&scioq=Dual+Ensembled+Multiagent+Q-Learning+with+Hypernet+Regularizer&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;1;2;3;0;2;0", "aff_unique_norm": "Chinese University of Hong Kong;Zhejiang Lab;Tianjin University;Zhejiang University", "aff_unique_dep": "Department of Computer Science and Engineering;;College of Intelligence and Computing;", "aff_unique_url": "https://www.cuhk.edu.hk;http://www.zhejianglab.com;http://www.tju.edu.cn;http://www.zju.edu.cn", "aff_unique_abbr": "CUHK;;;ZJU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "F5uYcwABMu", "title": "Same Pre-training Loss, Better Downstream: Implicit Bias Matters for Language Models", "track": "main", "status": "Reject", "tldr": "We study the role of implicit bias in language modeling", "abstract": "Language modeling on large-scale datasets leads to impressive performance gains on various downstream language tasks. The (validation) pre-training loss (or perplexity in autoregressive language modeling) is often used as the evaluation metric when developing language models since the pre-training loss tends to be well-correlated with downstream performance (which is itself difficult to evaluate comprehensively). Contrary to this conventional wisdom, this paper shows that 1) pre-training loss cannot fully explain downstream performance and 2) flatness of the model is well-correlated with downstream performance where pre-training loss is not. On simplified datasets, we identify three ways to produce models with the same (statistically optimal) pre-training loss but different downstream performance: continue pre-training after convergence, increasing the model size, and changing the training algorithm. These experiments demonstrate the existence of implicit bias of pre-training algorithms/optimizers---among models with the same minimal pre-training loss, they implicitly prefer more transferable ones. Toward understanding this implicit bias, we prove that SGD with standard mini-batch noise implicitly prefers flatter minima in language models, and empirically observe a strong correlation between flatness and downstream performance among models with the same minimal pre-training loss. We also prove in a synthetic language setting that among the models with the minimal pre-training loss, the flattest model transfers to downstream tasks.", "keywords": "Language Modeling;Implicit Bias", "primary_area": "", "supplementary_material": "/attachment/316176d187ff4d72e34202a83efa4b1353c9d668.zip", "author": "Hong Liu;Sang Michael Xie;Zhiyuan Li;Tengyu Ma", "authorids": "~Hong_Liu5;~Sang_Michael_Xie1;~Zhiyuan_Li2;~Tengyu_Ma1", "gender": "M;;M;M", "homepage": ";https://cs.stanford.edu/~eix/;https://zhiyuanli.ttic.edu;http://ai.stanford.edu/~tengyuma/", "dblp": ";220/3987;l/ZhiyuanLi;54/9061", "google_scholar": "BUc2uq0AAAAJ;EBNa5IEAAAAJ;https://scholar.google.com/citations?hl=en;i38QlUwAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Hong_Liu5;~Sang_Michael_Xie1;~Zhiyuan_Li2;~Tengyu_Ma1", "aff": "Stanford University;Stanford University;Computer Science Department, Stanford University;Facebook AI Research", "aff_domain": "stanford.edu;stanford.edu;cs.stanford.edu;fb.com", "position": "PhD student;PhD student;Postdoc;Visiting Scientist", "bibtex": "@misc{\nliu2023same,\ntitle={Same Pre-training Loss, Better Downstream: Implicit Bias Matters for Language Models},\nauthor={Hong Liu and Sang Michael Xie and Zhiyuan Li and Tengyu Ma},\nyear={2023},\nurl={https://openreview.net/forum?id=F5uYcwABMu}\n}", "github": "", "project": "", "reviewers": "KN6A;mwuY;8JLN;6MPN", "site": "https://openreview.net/forum?id=F5uYcwABMu", "pdf_size": 901320, "recommendation": "3;6;6;6", "confidence": "4;3;3;3", "correctness": "3;3;4;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "65;56;85;70", "wc_strength_and_weaknesses": "291;570;205;192", "wc_clarity_quality_novelty_and_reproducibility": "6;37;8;42", "wc_summary_review": "22;56;33;51", "wc_review": "384;719;331;355", "wc_reply_reviewers": "0;197;266;124", "wc_reply_authors": "782;1818;629;630", "reply_reviewers": "0;2;1;1", "reply_authors": "2;5;2;2", "recommendation_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 69.0, 10.51189802081432 ], "wc_strength_and_weaknesses_avg": [ 314.5, 152.33925954920485 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 23.25, 16.361158271956175 ], "wc_summary_review_avg": [ 40.5, 13.683932183404009 ], "wc_review_avg": [ 447.25, 158.01325102661485 ], "wc_reply_reviewers_avg": [ 146.75, 98.48699152680013 ], "wc_reply_authors_avg": [ 964.75, 496.5427348174576 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1539113939437571504&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Stanford University;Meta", "aff_unique_dep": ";Facebook AI Research", "aff_unique_url": "https://www.stanford.edu;https://research.facebook.com", "aff_unique_abbr": "Stanford;FAIR", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Mastering the Game of No-Press Diplomacy via Human-Regularized Reinforcement Learning and Planning", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11320", "id": "F61FwJTZhb", "poster": "/media/PosterPDFs/ICLR%202023/11320.png?t=1682397246.5151865", "openreview": "https://openreview.net/forum?id=F61FwJTZhb", "slides": "https://iclr.cc/virtual/2023/poster/11320", "video": "https://iclr.cc/virtual/2023/poster/11320", "author_site": "Anton Bakhtin, David Wu, Adam Lerer, Jonathan Gray, Athul Jacob, Gabriele Farina, Alexander Miller, Noam Brown", "tldr": "We train a bot that places first in a no-press Diplomacy tournament with humans by using human-data-regularized reinforcement learning and planning ", "abstract": "No-press Diplomacy is a complex strategy game involving both cooperation and competition that has served as a benchmark for multi-agent AI research. While self-play reinforcement learning has resulted in numerous successes in purely adversarial games like chess, Go, and poker, self-play alone is insufficient for achieving optimal performance in domains involving cooperation with humans. We address this shortcoming by first introducing a planning algorithm we call DiL-piKL that regularizes a reward-maximizing policy toward a human imitation-learned policy. We prove that this is a no-regret learning algorithm under a modified utility function. We then show that DiL-piKL can be extended into a self-play reinforcement learning algorithm we call RL-DiL-piKL that provides a model of human play while simultaneously training an agent that responds well to this human model. We used RL-DiL-piKL to train an agent we name Diplodocus.\nIn a 200-game no-press Diplomacy tournament involving 62 human participants spanning skill levels from beginner to expert, two Diplodocus agents both achieved a higher average score than all other participants who played more than two games, and ranked first and third according to an Elo ratings model.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Anton Bakhtin;David J Wu;Adam Lerer;Jonathan Gray;Athul Paul Jacob;Gabriele Farina;Alexander H Miller;Noam Brown", "authorids": "~Anton_Bakhtin1;~David_J_Wu1;~Adam_Lerer1;~Jonathan_Gray2;~Athul_Paul_Jacob1;~Gabriele_Farina1;~Alexander_H_Miller1;~Noam_Brown2", "gender": ";;M;;;M;;M", "homepage": ";;;;http://apjacob.me/;http://www.cs.cmu.edu/~gfarina/about/;http://www.cs.cmu.edu/~noamb;", "dblp": ";;;;192/1229;;https://dblp.uni-trier.de/pers/hd/b/Brown:Noam;190/7117", "google_scholar": "50O3v1MAAAAJ;;;abPVGwYAAAAJ;https://scholar.google.ca/citations?user=XT3E7RoAAAAJ;sktDNcEAAAAJ;RLDbLcUAAAAJ;3b0l5LEAAAAJ", "orcid": ";0000-0002-5834-4936;;;;;;", "linkedin": ";;;;apjacob/;;;alexholdenmiller/", "or_profile": "~Anton_Bakhtin1;~David_J_Wu1;~Adam_Lerer1;~Jonathan_Gray2;~Athul_Paul_Jacob1;~Gabriele_Farina1;~Noam_Brown2;~Alexander_Miller1", "aff": "Meta Facebook;Meta Facebook;;Facebook AI Research;MIT-IBM Watson AI Lab;FAIR, Meta AI;Meta Facebook;New York University", "aff_domain": "facebook.com;fb.com;;fb.com;ibm.com;meta.com;facebook.com;nyu.edu", "position": "Researcher;Researcher;;Researcher;Research Intern;Researcher;Research Scientist;MS student", "bibtex": "@inproceedings{\nbakhtin2023mastering,\ntitle={Mastering the Game of No-Press Diplomacy via Human-Regularized Reinforcement Learning and Planning},\nauthor={Anton Bakhtin and David J Wu and Adam Lerer and Jonathan Gray and Athul Paul Jacob and Gabriele Farina and Alexander H Miller and Noam Brown},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=F61FwJTZhb}\n}", "github": "", "project": "", "reviewers": "qUpu;gn6m;FjZ3", "pdf_size": 1015737, "recommendation": "8;8;8", "confidence": "4;3;3", "correctness": "3;4;4", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "155;103;85", "wc_strength_and_weaknesses": "715;135;165", "wc_clarity_quality_novelty_and_reproducibility": "39;45;94", "wc_summary_review": "78;35;48", "wc_review": "987;318;392", "wc_reply_reviewers": "283;0;0", "wc_reply_authors": "1028;14;256", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 114.33333333333333, 29.67977238606942 ], "wc_strength_and_weaknesses_avg": [ 338.3333333333333, 266.62499674428295 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.333333333333336, 24.63511495586917 ], "wc_summary_review_avg": [ 53.666666666666664, 18.00617178142601 ], "wc_review_avg": [ 565.6666666666666, 299.4554316681458 ], "wc_reply_reviewers_avg": [ 94.33333333333333, 133.40747938386198 ], "wc_reply_authors_avg": [ 432.6666666666667, 432.4020762618463 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6052094907704807684&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=F61FwJTZhb", "email": "facebook.com;fb.com;;fb.com;ibm.com;meta.com;facebook.com;nyu.edu", "author_num": 8, "aff_unique_index": "0;0;0;1;0;0;2", "aff_unique_norm": "Meta;Massachusetts Institute of Technology;New York University", "aff_unique_dep": "Meta Platforms, Inc.;IBM Watson AI Lab;", "aff_unique_url": "https://meta.com;https://www.mitibmwatsonailab.org;https://www.nyu.edu", "aff_unique_abbr": "Meta;MIT-IBM AI Lab;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "F7f4BYnDAIc", "title": "Sampled Transformer for Point Sets", "track": "main", "status": "Reject", "tldr": "", "abstract": "The sparse transformer can reduce the computational complexity of the self-attention layers to $O(n)$, whilst still being a universal approximator of continuous sequence-to-sequence functions. However, this permutation variant operation is not appropriate for direct application to sets. In this paper, we proposed an $O(n)$ complexity sampled transformer that can process point set elements directly without any additional inductive bias. Our sampled transformer introduces random element sampling, which randomly splits point sets into subsets, followed by applying a shared Hamiltonian self-attention mechanism to each subset. The overall attention mechanism can be viewed as a Hamiltonian cycle in the complete attention graph, and the permutation of point set elements is equivalent to randomly sampling Hamiltonian cycles. This mechanism implements a Monte Carlo simulation of the $O(n^2)$ dense attention connections. We show that it is a universal approximator for continuous set-to-set functions. Experimental results for classification and few-shot learning on point-clouds show comparable or better accuracy with significantly reduced computational complexity compared to the dense transformer or alternative sparse attention schemes.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Shidi Li;Christian Walder;Alexander Soen;Lexing Xie;miaomiao Liu", "authorids": "~Shidi_Li1;~Christian_Walder1;~Alexander_Soen1;~Lexing_Xie1;~miaomiao_Liu2", "gender": "M;;M;F;F", "homepage": ";;https://alexandersoen.github.io/;https://users.cecs.anu.edu.au/~xlx/;http://users.cecs.anu.edu.au/~mliu/", "dblp": ";;245/9661.html;59/4002.html;66/8063-1.html", "google_scholar": "https://scholar.google.com.au/citations?user=6x17jhEAAAAJ;;apRX4awAAAAJ;https://scholar.google.com.tw/citations?user=u0xUDSoAAAAJ;https://scholar.google.com.au/citations?user=ptAR7tUAAAAJ", "orcid": ";;;0000-0001-8319-0118;", "linkedin": ";;;;", "or_profile": "~Shidi_Li1;~Christian_Walder1;~Alexander_Soen1;~Lexing_Xie1;~miaomiao_Liu2", "aff": "Shanghai AI Lab;;Amazon;Australian National University;Australian National University", "aff_domain": "pjlab.org.cn;;amazon.com;anu.edu.au;anu.edu.au", "position": "Intern researcher ;;Intern;Full Professor;Assistant Professor", "bibtex": "@misc{\nli2023sampled,\ntitle={Sampled Transformer for Point Sets},\nauthor={Shidi Li and Christian Walder and Alexander Soen and Lexing Xie and miaomiao Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=F7f4BYnDAIc}\n}", "github": "", "project": "", "reviewers": "AbVu;V8rM;YJ8Q;cTVx", "site": "https://openreview.net/forum?id=F7f4BYnDAIc", "pdf_size": 386729, "recommendation": "5;6;6;8", "confidence": "3;3;2;2", "correctness": "3;2;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "33;47;84;128", "wc_strength_and_weaknesses": "206;117;247;71", "wc_clarity_quality_novelty_and_reproducibility": "12;19;4;27", "wc_summary_review": "21;94;16;69", "wc_review": "272;277;351;295", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "828;540;1004;28", "reply_reviewers": "0;0;0;0", "reply_authors": "3;2;3;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 2.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 73.0, 36.817115585010185 ], "wc_strength_and_weaknesses_avg": [ 160.25, 69.7401426726387 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 15.5, 8.5 ], "wc_summary_review_avg": [ 50.0, 32.76430985081175 ], "wc_review_avg": [ 298.75, 31.355820831226854 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 600.0, 369.4536506789451 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.6882472016116854, "corr_recommendation_correctness": 0.6488856845230502, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15770268464291635265&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Shanghai AI Lab;Amazon;Australian National University", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.shanghaiailab.com;https://www.amazon.com;https://www.anu.edu.au", "aff_unique_abbr": "SAIL;Amazon;ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "China;United States;Australia" }, { "id": "F8OUxtWEQRi", "title": "Kinship Representation Learning with Face Componential Relation", "track": "main", "status": "Withdraw", "tldr": "We achieve the SOTA kinship recognition performance by the learning face componential relation with contrastive learning.", "abstract": "Kinship recognition aims to determine whether the subjects in two facial images are kin or non-kin, which is an emerging and challenging problem. However, most previous methods focus on heuristic designs without considering the spatial correlation between face images. In this paper, we aim to learn discriminative kinship representations embedded with the relation information between face components (e.g., eyes, nose, etc.). To achieve this goal, we propose the Face Componential Relation Network (FaCoRNet), which learns the relationship between face components among images with a cross-attention mechanism, which automatically learns the important facial regions for kinship recognition. Moreover, we propose Relation-Guided Contrastive Learning, which adapts the loss function by the guidance from cross-attention to learn more discriminative feature representations. The proposed FaCoRNet outperforms previous state-of-the-art methods by large margins for the largest public kinship recognition FIW benchmark. The code will be publicly released upon acceptance.", "keywords": "kinship recognition;attention;contrastive learning", "primary_area": "", "supplementary_material": "/attachment/26a242fd77c33f19fe7057927e3df41c665427c7.zip", "author": "Weng-Tai Su;Min-Hung Chen;Chien-Yi Wang;Shang-Hong Lai;Trista Pei-Chun Chen", "authorids": "~Weng-Tai_Su1;~Min-Hung_Chen2;~Chien-Yi_Wang1;~Shang-Hong_Lai1;~Trista_Pei-Chun_Chen2", "gender": "F;M;M;M;F", "homepage": "https://github.com/wtnthu;https://minhungchen.netlify.app/;https://chienyiwang.github.io/;http://www.cs.nthu.edu.tw/~lai/;https://www.linkedin.com/in/tristachen/", "dblp": ";04/6305;12/6741;27/679;13/4630", "google_scholar": ";ovzuxi8AAAAJ;05LW2DcAAAAJ;https://scholar.google.co.uk/citations?user=LlybOXQAAAAJ;zUi7TfgAAAAJ", "orcid": ";0000-0002-4046-3937;;0000-0002-5092-993X;0000-0002-8217-1465", "linkedin": ";chensteven/;chienyiwang/;;tristachen/", "or_profile": "~Weng-Tai_Su1;~Min-Hung_Chen2;~Chien-Yi_Wang1;~Shang-Hong_Lai1;~Trista_Pei-Chun_Chen2", "aff": ";NVIDIA;NVIDIA;National Tsing Hua University;Microsoft", "aff_domain": ";nvidia.com;nvidia.com;cs.nthu.edu.tw;microsoft.com", "position": ";Research Scientist;Researcher;Full Professor;Full Professor", "bibtex": "@misc{\nsu2023kinship,\ntitle={Kinship Representation Learning with Face Componential Relation},\nauthor={Weng-Tai Su and Min-Hung Chen and Chien-Yi Wang and Shang-Hong Lai and Trista Pei-Chun Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=F8OUxtWEQRi}\n}", "github": "", "project": "", "reviewers": "QYTb;eGR9;yT4e;e763", "site": "https://openreview.net/forum?id=F8OUxtWEQRi", "pdf_size": 870478, "recommendation": "3;3;6;8", "confidence": "4;3;3;2", "correctness": "3;2;3;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;4;3", "wc_summary_paper": "45;37;94;38", "wc_strength_and_weaknesses": "108;207;274;92", "wc_clarity_quality_novelty_and_reproducibility": "20;33;89;30", "wc_summary_review": "5;28;83;7", "wc_review": "178;305;540;167", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 53.5, 23.58495283014151 ], "wc_strength_and_weaknesses_avg": [ 170.25, 74.35178209027676 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.0, 26.99073915253156 ], "wc_summary_review_avg": [ 30.75, 31.483130403439873 ], "wc_review_avg": [ 297.5, 150.14409745308006 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8333333333333334, "corr_recommendation_correctness": 0.8333333333333334, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14441466694278220461&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "NVIDIA;National Tsing Hua University;Microsoft", "aff_unique_dep": "NVIDIA Corporation;;Microsoft Corporation", "aff_unique_url": "https://www.nvidia.com;https://www.nthu.edu.tw;https://www.microsoft.com", "aff_unique_abbr": "NVIDIA;NTHU;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Taiwan", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Neural Compositional Rule Learning for Knowledge Graph Reasoning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10932", "id": "F8VKQyDgRVj", "poster": "/media/PosterPDFs/ICLR%202023/10932.png?t=1681363103.8872805", "openreview": "https://openreview.net/forum?id=F8VKQyDgRVj", "slides": "https://iclr.cc/virtual/2023/poster/10932", "video": "https://iclr.cc/virtual/2023/poster/10932", "author_site": "Kewei Cheng, Nesreen Ahmed, Yizhou Sun", "tldr": "In this paper, we propose an end-to-end neural model for learning compositional logic rules called NCRL. NCRL treats logic rules as a hierarchical tree, and breaks the rule body into small atomic compositions during inference.", "abstract": "Learning logical rules is critical to improving reasoning in KGs. This is due to their ability to provide logical and interpretable explanations when used for predictions, as well as their ability to generalize to other tasks, domains, and data. While recent methods have been proposed to learn logical rules, the majority of these methods are either restricted by their computational complexity and can not handle the large search space of large-scale KGs, or show poor generalization when exposed to data outside the training set. In this paper, we propose an end-to-end neural model for learning compositional logical rules called NCRL. NCRL detects the best compositional structure of a rule body, and breaks it into small compositions in order to infer the rule head. By recurrently merging compositions in the rule body with a recurrent attention unit, NCRL finally predicts a single rule head. Experimental results show that NCRL learns high-quality rules, as well as being generalizable. Specifically, we show that NCRL is scalable, efficient, and yields state-of-the-art results for knowledge graph completion on large-scale KGs. Moreover, we test NCRL for systematic generalization by learning to reason on small-scale observed graphs and evaluating on larger unseen ones.", "keywords": "Logical Rule;Knowledge Graph;Reasoning;Compositionality;Systematicity", "primary_area": "", "supplementary_material": "/attachment/cd38dcce7b5c0f41dc3433ae0a2d9feb9b67d6ea.zip", "author": "Kewei Cheng;Nesreen Ahmed;Yizhou Sun", "authorids": "~Kewei_Cheng2;~Nesreen_Ahmed1;~Yizhou_Sun1", "gender": "F;F;F", "homepage": "http://nesreenahmed.com;http://web.cs.ucla.edu/~yzsun/;https://vivian1993.github.io/", "dblp": "33/11518;37/3868;175/1247", "google_scholar": "AFV0nLcAAAAJ;https://scholar.google.com.tw/citations?user=TQgOjK0AAAAJ;1HZGSB8AAAAJ", "orcid": ";;", "linkedin": "nkahmed/;;", "or_profile": "~Nesreen_Ahmed1;~Yizhou_Sun1;~Kewei_Cheng1", "aff": "Intel AI Research;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "intel.com;ucla.edu;ucla.edu", "position": "Principal Researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\ncheng2023neural,\ntitle={Neural Compositional Rule Learning for Knowledge Graph Reasoning},\nauthor={Kewei Cheng and Nesreen Ahmed and Yizhou Sun},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=F8VKQyDgRVj}\n}", "github": "", "project": "", "reviewers": "WAeF;XZA5;N8Wt;FLaJ", "pdf_size": 1009408, "recommendation": "6;6;8;8", "confidence": "4;5;5;4", "correctness": "1;3;4;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "52;76;49;84", "wc_strength_and_weaknesses": "228;34;274;302", "wc_clarity_quality_novelty_and_reproducibility": "153;429;3;16", "wc_summary_review": "83;56;3;75", "wc_review": "516;595;329;477", "wc_reply_reviewers": "0;287;0;0", "wc_reply_authors": "695;1392;663;776", "reply_reviewers": "0;2;0;0", "reply_authors": "1;3;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 2.75, 1.0897247358851685 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 65.25, 15.056144924913548 ], "wc_strength_and_weaknesses_avg": [ 209.5, 104.71270219032647 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 150.25, 171.32917877582906 ], "wc_summary_review_avg": [ 54.25, 31.171902412268647 ], "wc_review_avg": [ 479.25, 96.60324787500677 ], "wc_reply_reviewers_avg": [ 71.75, 124.27464544306694 ], "wc_reply_authors_avg": [ 881.5, 297.6008232515495 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.6882472016116854, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1635313134565508714&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=F8VKQyDgRVj", "email": "intel.com;ucla.edu;ucla.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Intel;University of California, Los Angeles", "aff_unique_dep": "Intel AI Research;", "aff_unique_url": "https://www.intel.com/research;https://www.ucla.edu", "aff_unique_abbr": "Intel AI;UCLA", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Causal Balancing for Domain Generalization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12091", "id": "F91SROvVJ_6", "poster": "", "openreview": "https://openreview.net/forum?id=F91SROvVJ_6", "slides": "https://iclr.cc/virtual/2023/poster/12091", "video": "https://iclr.cc/virtual/2023/poster/12091", "author_site": "Xinyi Wang, Michael Saxon, Jiachen Li, Hongyang Zhang, Kun Zhang, William Wang", "tldr": "We propose a balanced mini-batch sampling strategy to reduce spurious correlations for domain generalization.", "abstract": "While machine learning models rapidly advance the state-of-the-art on various real-world tasks, out-of-domain (OOD) generalization remains a challenging problem given the vulnerability of these models to spurious correlations. We propose a balanced mini-batch sampling strategy to transform a biased data distribution into a spurious-free balanced distribution, based on the invariance of the underlying causal mechanisms for the data generation process. We argue that the Bayes optimal classifiers trained on such balanced distribution are minimax optimal across a diverse enough environment space. We also provide an identifiability guarantee of the latent variable model of the proposed data generation process, when utilizing enough train environments. Experiments are conducted on DomainBed, demonstrating empirically that our method obtains the best performance across 20 baselines reported on the benchmark.", "keywords": "domain generalization;causality;latent variable model", "primary_area": "", "supplementary_material": "/attachment/cf204506ffaf3bac91913f30442613d10b9650bf.zip", "author": "Xinyi Wang;Michael Saxon;Jiachen Li;Hongyang Zhang;Kun Zhang;William Yang Wang", "authorids": "~Xinyi_Wang2;~Michael_Saxon1;~Jiachen_Li6;~Hongyang_Zhang1;~Kun_Zhang1;~William_Yang_Wang2", "gender": "F;M;M;M;M;M", "homepage": "https://wangxinyilinda.github.io/;https://saxon.me;https://sites.google.com/view/jiachenli/;https://hongyanz.github.io/;http://www.andrew.cmu.edu/user/kunz1/;https://www.cs.ucsb.edu/~william/", "dblp": ";222/6656;;23/10537-1;96/3115-1;08/9282", "google_scholar": "3vvbplcAAAAJ;pAlwjdgAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;RGoypN4AAAAJ;gf8Ms_8AAAAJ", "orcid": ";;;;;", "linkedin": "xinyi-wang-444385133/;;;;;", "or_profile": "~Xinyi_Wang2;~Michael_Saxon1;~Jiachen_Li6;~Hongyang_Zhang1;~Kun_Zhang1;~William_Wang1", "aff": "Microsoft;UC Santa Barbara;Amazon;School of Computer Science, University of Waterloo;Carnegie Mellon University;UC Santa Barbara", "aff_domain": "microsoft.com;ucsb.edu;amazon.com;uwaterloo.ca;cmu.edu;ucsb.edu", "position": "Intern;PhD student;Intern;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023causal,\ntitle={Causal Balancing for Domain Generalization},\nauthor={Xinyi Wang and Michael Saxon and Jiachen Li and Hongyang Zhang and Kun Zhang and William Yang Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=F91SROvVJ_6}\n}", "github": "", "project": "", "reviewers": "KTyH;AxQ6;ypmL;2Bti", "pdf_size": 1680729, "recommendation": "6;6;6;8", "confidence": "4;3;3;3", "correctness": "4;3;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;4;2;4", "wc_summary_paper": "71;72;31;77", "wc_strength_and_weaknesses": "443;119;87;110", "wc_clarity_quality_novelty_and_reproducibility": "44;11;30;35", "wc_summary_review": "57;4;41;54", "wc_review": "615;206;189;276", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "395;162;270;314", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 62.75, 18.471261462065875 ], "wc_strength_and_weaknesses_avg": [ 189.75, 146.6788583947939 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.0, 12.062338081814818 ], "wc_summary_review_avg": [ 39.0, 21.083168642308014 ], "wc_review_avg": [ 321.5, 172.5608588295735 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 285.25, 84.10521684176315 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9024663426432978437&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=F91SROvVJ_6", "email": "microsoft.com;ucsb.edu;amazon.com;uwaterloo.ca;cmu.edu;ucsb.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;4;1", "aff_unique_norm": "Microsoft;University of California, Santa Barbara;Amazon;University of Waterloo;Carnegie Mellon University", "aff_unique_dep": "Microsoft Corporation;;Amazon.com, Inc.;School of Computer Science;", "aff_unique_url": "https://www.microsoft.com;https://www.ucsb.edu;https://www.amazon.com;https://uwaterloo.ca;https://www.cmu.edu", "aff_unique_abbr": "Microsoft;UCSB;Amazon;UWaterloo;CMU", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Santa Barbara;Waterloo", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;Canada" }, { "id": "FAHVsSfhWs", "title": "Revisiting Global Pooling through the Lens of Optimal Transport", "track": "main", "status": "Withdraw", "tldr": "We develop a novel and solid global pooling framework through the lens of optimal transport, which covers many existing pooling methods and performs well on various learning problems.", "abstract": "Global pooling is one of the most significant operations in many machine learning models and tasks, whose implementation, however, is often empirical in practice. In this study, we develop a novel and solid global pooling framework through the lens of optimal transport. We demonstrate that most existing global pooling methods are equivalent to solving some specializations of an unbalanced optimal transport (UOT) problem. Making the parameters of the UOT problem learnable, we unify various global pooling methods in the same framework, and accordingly, propose a generalized global pooling layer called UOT-Pooling (UOTP) for neural networks. Besides implementing the UOTP layer based on the classic Sinkhorn-scaling algorithm, we design new model architectures based on the Bregman ADMM algorithm, which has comparable complexity but better numerical stability. We test our UOTP layers in several application scenarios, including multi-instance learning, graph classification, and image classification. In these applications, our UOTP layers can either imitate conventional global pooling layers or learn new pooling mechanisms to perform better. ", "keywords": "Global pooling;regularized optimal transport;Bregman ADMM;multi-instance learning;graph embedding", "primary_area": "", "supplementary_material": "/attachment/2c7f882f5894ff01e247397e7111349a1f91d410.zip", "author": "Minjie Cheng;Hongteng Xu", "authorids": "~Minjie_Cheng1;~Hongteng_Xu1", "gender": "F;M", "homepage": ";https://hongtengxu.github.io", "dblp": ";38/10816", "google_scholar": "HAsgHvoAAAAJ;7gYVOO8AAAAJ", "orcid": ";0000-0003-4192-5360", "linkedin": ";", "or_profile": "~Minjie_Cheng1;~Hongteng_Xu1", "aff": "Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn", "position": "PhD student;Associate Professor", "bibtex": "@misc{\ncheng2023revisiting,\ntitle={Revisiting Global Pooling through the Lens of Optimal Transport},\nauthor={Minjie Cheng and Hongteng Xu},\nyear={2023},\nurl={https://openreview.net/forum?id=FAHVsSfhWs}\n}", "github": "", "project": "", "reviewers": "dtQm;ZYts;GKUo;fuCZ", "site": "https://openreview.net/forum?id=FAHVsSfhWs", "pdf_size": 3401564, "recommendation": "3;5;5;5", "confidence": "4;4;3;4", "correctness": "4;3;3;3", "technical_novelty": "1;2;2;3", "empirical_novelty": "1;1;2;2", "wc_summary_paper": "92;98;64;38", "wc_strength_and_weaknesses": "263;630;292;152", "wc_clarity_quality_novelty_and_reproducibility": "70;32;30;46", "wc_summary_review": "30;65;35;43", "wc_review": "455;825;421;279", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.5, 0.5 ], "wc_summary_paper_avg": [ 73.0, 23.93741840717165 ], "wc_strength_and_weaknesses_avg": [ 334.25, 178.56703923176863 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.5, 15.960889699512368 ], "wc_summary_review_avg": [ 43.25, 13.386093530227555 ], "wc_review_avg": [ 495.0, 201.6382900145704 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YomIQww09ooJ:scholar.google.com/&scioq=Revisiting+Global+Pooling+through+the+Lens+of+Optimal+Transport&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Renmin University of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ruc.edu.cn", "aff_unique_abbr": "RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "FAXVNe1GxX", "title": "PRUDEX-Compass: Towards Systematic Evaluation of Reinforcement Learning in Financial Markets", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The financial markets, which involve more than $90 trillion market capitals, attract the attention of innumerable investors around the world. Recently, reinforcement learning in financial markets (FinRL) has emerged as a promising direction to train agents for making profitable investment decisions. However, the evaluation of most FinRL methods only focuses on profit-related measures, which are far from satisfactory for practitioners to deploy these methods into real-world financial markets. Therefore, we introduce PRUDEX-Compass, which has 6 axes, i.e., Profitability, Risk-control, Universality, Diversity, rEliability, and eXplainability, with a total of 17 measures for a systematic evaluation. Specifically, i) we propose AlphaMix+ as a strong FinRL baseline, which leverages mixture-of-experts (MoE) and risk-sensitive approaches to make diversified risk-aware investment decisions, ii) we evaluate 8 FinRL methods in 4 long-term real-world datasets of influential financial markets to demonstrate the usage of our PRUDEX-Compass, iii) PRUDEX-Compass1 together with 4 real-world datasets, standard implementation of 8 FinRL methods and a portfolio management RL environment is released as public resources to facilitate the design and comparison of new FinRL methods. We hope that PRUDEX-Compass can shed light on future FinRL research to prevent untrustworthy results from stagnating FinRL into successful industry deployment.", "keywords": "Evaluation;Reinforcement Learning;Finance;Benchmarking", "primary_area": "", "supplementary_material": "/attachment/71bdef35e1f834bc9e4795384dc73f8bcfc2ecef.zip", "author": "Shuo Sun;Molei Qin;Xinrun Wang;Bo An", "authorids": "~Shuo_Sun2;~Molei_Qin1;~Xinrun_Wang1;~Bo_An2", "gender": "M;M;M;M", "homepage": ";https://rainwangphy.github.io/;https://personal.ntu.edu.sg/boan/;https://github.com/qinmoelei", "dblp": "04/4493;199/6413;42/6178-1.html;339/6915", "google_scholar": "kGgWv8IAAAAJ;ROANfPUAAAAJ;PEEpuNwAAAAJ;", "orcid": ";;0000-0002-7064-7438;", "linkedin": ";;;", "or_profile": "~Shuo_Sun2;~Xinrun_Wang1;~Bo_An2;~Qin_Molei1", "aff": ";Nanyang Technological University;Nanyang Technological University;Nanyang Technological University", "aff_domain": ";ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "position": ";Postdoc;Full Professor;PhD student", "bibtex": "@misc{\nsun2023prudexcompass,\ntitle={{PRUDEX}-Compass: Towards Systematic Evaluation of Reinforcement Learning in Financial Markets},\nauthor={Shuo Sun and Molei Qin and Xinrun Wang and Bo An},\nyear={2023},\nurl={https://openreview.net/forum?id=FAXVNe1GxX}\n}", "github": "", "project": "", "reviewers": "oRCM;Nq6W;DGag;FC1J", "site": "https://openreview.net/forum?id=FAXVNe1GxX", "pdf_size": 2524708, "recommendation": "1;3;6;8", "confidence": "4;4;4;3", "correctness": "1;2;3;3", "technical_novelty": "1;2;3;3", "empirical_novelty": "0;1;3;3", "wc_summary_paper": "92;74;48;32", "wc_strength_and_weaknesses": "523;446;166;221", "wc_clarity_quality_novelty_and_reproducibility": "20;54;12;39", "wc_summary_review": "17;29;22;29", "wc_review": "652;603;248;321", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 2.692582403567252 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 1.75, 1.299038105676658 ], "wc_summary_paper_avg": [ 61.5, 23.124662159694356 ], "wc_strength_and_weaknesses_avg": [ 339.0, 149.29668449098259 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.25, 16.391689967785506 ], "wc_summary_review_avg": [ 24.25, 5.0682837331783235 ], "wc_review_avg": [ 456.0, 174.29429135803616 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.7504787743864564, "corr_recommendation_correctness": 0.9518172686249523, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16496763482986319825&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Spectral Decomposition Representation for Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11374", "id": "FBMLeaXpZN", "poster": "", "openreview": "https://openreview.net/forum?id=FBMLeaXpZN", "slides": "https://iclr.cc/virtual/2023/poster/11374", "video": "https://iclr.cc/virtual/2023/poster/11374", "author_site": "Tongzheng Ren, Tianjun Zhang, Lisa Lee, Joseph E Gonzalez, Dale Schuurmans, Bo Dai", "tldr": "We propose a new spectral representation learning method that gets rid of the policy dependency and can be easily applied in downstream tasks.", "abstract": "Representation learning often plays a critical role in avoiding the curse of dimensionality in reinforcement learning. A representative class of algorithms exploits spectral decomposition of the stochastic transition dynamics to construct representations that enjoy strong theoretical properties in idealized settings. However, current spectral methods suffer from limited applicability because they are constructed for\nstate-only aggregation and are derived from a policy-dependent transition kernel, without considering the issue of exploration. To address these issues, we propose an alternative spectral method, Spectral Decomposition Representation (SPEDER), that extracts a state-action abstraction from the dynamics without inducing spurious dependence on the data collection policy, while also balancing the exploration-versus-exploitation trade-off during learning. A theoretical analysis establishes the sample efficiency of the proposed algorithm in both the online and offline settings. In addition, an experimental investigation demonstrates superior performance over current state-of-the-art algorithms across several RL benchmarks.", "keywords": "Spectral Representation;Markov Decision Processes;Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Tongzheng Ren;Tianjun Zhang;Lisa Lee;Joseph E. Gonzalez;Dale Schuurmans;Bo Dai", "authorids": "~Tongzheng_Ren1;~Tianjun_Zhang1;~Lisa_Lee1;~Joseph_E._Gonzalez1;~Dale_Schuurmans1;~Bo_Dai1", "gender": "M;;;M;;", "homepage": "https://www.cs.utexas.edu/~tzren/;https://tianjunz.github.io;;http://eecs.berkeley.edu/~jegonzal;;https://bo-dai.github.io/", "dblp": "211/8004;;;61/8262;;64/2903", "google_scholar": "VgNDYeYAAAAJ;UE9jz_MAAAAJ;;https://scholar.google.com.tw/citations?user=gM2WW9UAAAAJ;;TIKl_foAAAAJ", "orcid": ";;;0000-0003-2921-956X;;0009-0002-8070-574X", "linkedin": ";;;;;", "or_profile": "~Tongzheng_Ren1;~Tianjun_Zhang1;~Lisa_Lee1;~Joseph_E._Gonzalez1;~Dale_Schuurmans1;~Bo_Dai1", "aff": "Google;University of California, Berkeley;;University of California, Berkeley;;Google Brain", "aff_domain": "google.com;berkeley.edu;;berkeley.edu;;google.com", "position": "Intern;PhD student;;Associate Professor;;Research Scientist", "bibtex": "@inproceedings{\nren2023spectral,\ntitle={Spectral Decomposition Representation for Reinforcement Learning},\nauthor={Tongzheng Ren and Tianjun Zhang and Lisa Lee and Joseph E. Gonzalez and Dale Schuurmans and Bo Dai},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=FBMLeaXpZN}\n}", "github": "", "project": "", "reviewers": "uUqZ;PTm5;jSW9", "pdf_size": 2286811, "recommendation": "5;8;8", "confidence": "4;2;2", "correctness": "3;4;4", "technical_novelty": "2;3;4", "empirical_novelty": "2;3;3", "wc_summary_paper": "101;67;56", "wc_strength_and_weaknesses": "323;51;73", "wc_clarity_quality_novelty_and_reproducibility": "140;39;68", "wc_summary_review": "63;40;58", "wc_review": "627;197;255", "wc_reply_reviewers": "0;17;0", "wc_reply_authors": "803;129;30", "reply_reviewers": "0;1;0", "reply_authors": "2;1;1", "recommendation_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 74.66666666666667, 19.154343864744856 ], "wc_strength_and_weaknesses_avg": [ 149.0, 123.36396016125076 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 82.33333333333333, 42.460439103816256 ], "wc_summary_review_avg": [ 53.666666666666664, 9.877021593352701 ], "wc_review_avg": [ 359.6666666666667, 190.51042549483276 ], "wc_reply_reviewers_avg": [ 5.666666666666667, 8.013876853447538 ], "wc_reply_authors_avg": [ 320.6666666666667, 343.4475538160408 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1252718171986850375&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=FBMLeaXpZN", "email": "google.com;berkeley.edu;;berkeley.edu;;google.com", "author_num": 6, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Google;University of California, Berkeley", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.berkeley.edu", "aff_unique_abbr": "Google;UC Berkeley", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Mountain View;Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Dataless Knowledge Fusion by Merging Weights of Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11949", "id": "FCnohuR6AnM", "poster": "", "openreview": "https://openreview.net/forum?id=FCnohuR6AnM", "slides": "https://iclr.cc/virtual/2023/poster/11949", "video": "https://iclr.cc/virtual/2023/poster/11949", "author_site": "Xisen Jin, Xiang Ren, Daniel Preotiuc-Pietro, Pengxiang Cheng", "tldr": "We study the problem of merging individual models built on different training data sets and propose a novel merging algorithm.", "abstract": "Fine-tuning pre-trained language models has become the prevalent paradigm for building downstream NLP models. Oftentimes fine-tuned models are readily available but their training data is not, due to data privacy or intellectual property concerns. This creates a barrier to fusing knowledge across individual models to yield a better single model. In this paper, we study the problem of merging individual models built on different training data sets to obtain a single model that performs well both across all data set domains and can generalize on out-of-domain data. We propose a data-less knowledge fusion method that merges models in their parameter space, guided by weights that minimize prediction differences between the merged model and the individual models. Over a battery of evaluation settings, we show that the proposed method significantly outperforms baselines such as Fisher-weighted averaging or model ensembling. Further, we find that our method is a promising alternative to multi-task learning that can preserve or sometimes improve over the individual models without access to the training data. Finally, model merging is more efficient than training a multi-task model, thus making it applicable to a wider set of scenarios.", "keywords": "model merging;weight merging", "primary_area": "", "supplementary_material": "", "author": "Xisen Jin;Xiang Ren;Daniel Preotiuc-Pietro;Pengxiang Cheng", "authorids": "~Xisen_Jin3;~Xiang_Ren1;~Daniel_Preotiuc-Pietro2;~Pengxiang_Cheng1", "gender": "M;M;M;M", "homepage": "https://xsjin.xyz;https://shanzhenren.github.io/;https://www.preotiuc.ro/;https://www.pengxiang.me", "dblp": "222/9324;36/360-1;126/8668;163/2279-1", "google_scholar": "https://scholar.google.com/citations?hl=en;_moJlrIAAAAJ;7HSgxLEAAAAJ;hqhIgYUAAAAJ", "orcid": ";;;", "linkedin": ";xren7;danielpreotiuc/;", "or_profile": "~Xisen_Jin3;~Xiang_Ren1;~Daniel_Preotiuc-Pietro2;~Pengxiang_Cheng1", "aff": "University of Southern California;University of Southern California;Bloomberg;Bloomberg", "aff_domain": "usc.edu;usc.edu;bloomberg.com;bloomberg.net", "position": "PhD student;Associate Professor;Researcher;Researcher", "bibtex": "@inproceedings{\njin2023dataless,\ntitle={Dataless Knowledge Fusion by Merging Weights of Language Models},\nauthor={Xisen Jin and Xiang Ren and Daniel Preotiuc-Pietro and Pengxiang Cheng},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=FCnohuR6AnM}\n}", "github": "", "project": "", "reviewers": "MwUi;PCeq;hdqV;fiB5", "pdf_size": 942361, "recommendation": "5;6;8;8", "confidence": "4;3;4;4", "correctness": "3;3;4;4", "technical_novelty": "3;2;4;4", "empirical_novelty": "2;3;3;4", "wc_summary_paper": "44;28;139;116", "wc_strength_and_weaknesses": "245;302;416;499", "wc_clarity_quality_novelty_and_reproducibility": "68;73;13;49", "wc_summary_review": "46;62;48;75", "wc_review": "403;465;616;739", "wc_reply_reviewers": "0;0;10;0", "wc_reply_authors": "643;621;426;594", "reply_reviewers": "0;0;1;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 81.75, 46.81012176869443 ], "wc_strength_and_weaknesses_avg": [ 365.5, 98.64709828474429 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.75, 23.562417108607512 ], "wc_summary_review_avg": [ 57.75, 11.712706775122479 ], "wc_review_avg": [ 555.75, 131.12851520550365 ], "wc_reply_reviewers_avg": [ 2.5, 4.330127018922194 ], "wc_reply_authors_avg": [ 571.0, 85.4956139225867 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.9622504486493761, "gs_citation": 201, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9179982772635833055&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=FCnohuR6AnM", "email": "usc.edu;usc.edu;bloomberg.com;bloomberg.net", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of Southern California;Bloomberg", "aff_unique_dep": ";", "aff_unique_url": "https://www.usc.edu;https://www.bloomberg.com", "aff_unique_abbr": "USC;Bloomberg", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "FDiO2xfKnkj", "title": "Affinity-VAE for clustering and classification of objects in multidimensional image data", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In this work we present affinity-VAE: a framework for automatic clustering and classification of objects in multidimensional image data based on their similarity. The method expands on the concept of $\\beta$-VAEs with an informed similarity-based loss component driven by an affinity matrix. The affinity-VAE is able to create rotationally-invariant, morphologically homogeneous clusters in the latent representation, with improved cluster separation compared with a standard $\\beta$-VAE. We explore the extent of latent disentanglement and continuity of the latent spaces on both 2D and 3D image data, including simulated biological electron cryo-tomography (cryo-ET) volumes as an example of a scientific application. ", "keywords": "representation learning;VAE;$\\beta$-VAE;affinity;cryo-ET;cryo-electron tommography;structural biology;visual proteomics", "primary_area": "", "supplementary_material": "", "author": "Jola Mirecka;Marjan Famili;Anna Kotanska;Nikolai Juraschko;Beatriz Costa-Gomes;Colin M Palmer;Jeyan Thiyagalingam;Tom Burnley;Mark Basham;Alan R Lowe", "authorids": "~Jola_Mirecka1;mfamili@turing.ac.uk;anna.kotanska@jesus.ox.ac.uk;nikolai.juraschko@rfi.ac.uk;bcostagomes@turing.ac.uk;colin.palmer@stfc.ac.uk;~Jeyan_Thiyagalingam1;tom.burnley@stfc.ac.uk;mark.basham@rfi.ac.uk;alowe@turing.ac.uk", "gender": "F;;;;;;;;;", "homepage": ";;;;;;;;;", "dblp": ";;;;;;;;;", "google_scholar": ";;;;;;;;;", "orcid": "0000-0001-9361-1713;;;;;;;;;", "linkedin": ";;;;;;;;;", "or_profile": "~Jola_Mirecka1;mfamili@turing.ac.uk;anna.kotanska@jesus.ox.ac.uk;nikolai.juraschko@rfi.ac.uk;bcostagomes@turing.ac.uk;colin.palmer@stfc.ac.uk;~Jeyan_Thiyagalingam1;tom.burnley@stfc.ac.uk;mark.basham@rfi.ac.uk;alowe@turing.ac.uk", "aff": "STFC;;;;;;;;;", "aff_domain": "stfc.ac.uk;;;;;;;;;", "position": "Postdoc;;;;;;;;;", "bibtex": "@misc{\nmirecka2023affinityvae,\ntitle={Affinity-{VAE} for clustering and classification of objects in multidimensional image data},\nauthor={Jola Mirecka and Marjan Famili and Anna Kotanska and Nikolai Juraschko and Beatriz Costa-Gomes and Colin M Palmer and Jeyan Thiyagalingam and Tom Burnley and Mark Basham and Alan R Lowe},\nyear={2023},\nurl={https://openreview.net/forum?id=FDiO2xfKnkj}\n}", "github": "", "project": "", "reviewers": "xo7f;BXNM;CTMg;R6EZ", "site": "https://openreview.net/forum?id=FDiO2xfKnkj", "pdf_size": 4470010, "recommendation": "3;3;3;5", "confidence": "5;5;4;2", "correctness": "3;2;2;3", "technical_novelty": "1;2;2;2", "empirical_novelty": "0;2;1;2", "wc_summary_paper": "31;42;116;81", "wc_strength_and_weaknesses": "163;29;413;125", "wc_clarity_quality_novelty_and_reproducibility": "34;135;100;51", "wc_summary_review": "15;26;34;71", "wc_review": "243;232;663;328", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 67.5, 33.60431519909311 ], "wc_strength_and_weaknesses_avg": [ 182.5, 141.75595225598113 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 80.0, 39.943710393502506 ], "wc_summary_review_avg": [ 36.5, 21.02974084481309 ], "wc_review_avg": [ 366.5, 175.16920391438674 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": -0.9428090415820632, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4x-B4kOqM60J:scholar.google.com/&scioq=Affinity-VAE+for+clustering+and+classification+of+objects+in+multidimensional+image+data&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Science and Technology Facilities Council", "aff_unique_dep": "", "aff_unique_url": "https://www.stfc.ac.uk", "aff_unique_abbr": "STFC", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "id": "FDlfFbnI7AR", "title": "Countering the Attack-Defense Complexity Gap for Robust Classifiers", "track": "main", "status": "Reject", "tldr": "We provide a formal rationale for why attacks are more efficient than defenses and introduce a new defensive technique that sidesteps this asymmetry.", "abstract": "We consider the decision version of defending and attacking Machine Learning classifiers. We provide a rationale for the well-known difficulties in building robust models: in particular we prove that, under broad assumptions, attacking a polynomial-time classifier is $NP$-complete, while training a polynomial-time model that is robust on even a single input is $\\Sigma_2^P$-complete. We also provide more general bounds for non-polynomial classifiers. We then show how such a complexity gap can be sidestepped by introducing Counter-Attack (CA), a system that computes on-the-fly robustness certificates for a given input up to an arbitrary distance bound $\\varepsilon$. We also prove that, even when attacked with perturbations of magnitude $\\varepsilon^\\prime > \\varepsilon$, CA still provides computational robustness: specifically, while computing a certificate is $NP$-complete, attacking the system beyond its intended robustness is $\\Sigma_2^P$-complete. Since the exact form of CA can still be computationally expensive, we introduce a relaxation of this method, which we empirically show to be reliable at identifying non-robust inputs. As part of our work, we introduce UG100, a new dataset obtained by applying a provably optimal attack to six limited-scale networks (three for MNIST and three for CIFAR10), each trained in three different manners.", "keywords": "adversarial attacks;adversarial robustness;computational complexity;dataset", "primary_area": "", "supplementary_material": "", "author": "Samuele Marro;Michele Lombardi", "authorids": "~Samuele_Marro1;~Michele_Lombardi1", "gender": "M;M", "homepage": ";https://www.unibo.it/sitoweb/michele.lombardi2", "dblp": "313/1563;l/MicheleLombardi", "google_scholar": ";https://scholar.google.com/citations?hl=en", "orcid": ";0000-0003-4709-8888", "linkedin": "samuelemarro/;", "or_profile": "~Samuele_Marro1;~Michele_Lombardi1", "aff": "Institute of Superior Studies;University of Bologna", "aff_domain": "unibo.it;unibo.it", "position": "MS student;Associate Professor", "bibtex": "@misc{\nmarro2023countering,\ntitle={Countering the Attack-Defense Complexity Gap for Robust Classifiers},\nauthor={Samuele Marro and Michele Lombardi},\nyear={2023},\nurl={https://openreview.net/forum?id=FDlfFbnI7AR}\n}", "github": "", "project": "", "reviewers": "2UCv;wxLx;mBWL", "site": "https://openreview.net/forum?id=FDlfFbnI7AR", "pdf_size": 11032412, "recommendation": "5;6;6", "confidence": "5;3;2", "correctness": "3;4;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;0", "wc_summary_paper": "787;202;76", "wc_strength_and_weaknesses": "124;164;107", "wc_clarity_quality_novelty_and_reproducibility": "16;72;21", "wc_summary_review": "77;147;20", "wc_review": "1004;585;224", "wc_reply_reviewers": "178;0;0", "wc_reply_authors": "1555;453;418", "reply_reviewers": "1;0;0", "reply_authors": "3;1;1", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 355.0, 309.77088307328046 ], "wc_strength_and_weaknesses_avg": [ 131.66666666666666, 23.893281249943232 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.333333333333336, 25.302612952464457 ], "wc_summary_review_avg": [ 81.33333333333333, 51.93799722318483 ], "wc_review_avg": [ 604.3333333333334, 318.72698173968405 ], "wc_reply_reviewers_avg": [ 59.333333333333336, 83.91000470080364 ], "wc_reply_authors_avg": [ 808.6666666666666, 527.9307614535156 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.9449111825230683, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QivCqqERshoJ:scholar.google.com/&scioq=Countering+the+Attack-Defense+Complexity+Gap+for+Robust+Classifiers&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Institute of Superior Studies;University of Bologna", "aff_unique_dep": ";", "aff_unique_url": ";https://www.unibo.it", "aff_unique_abbr": ";Unibo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";Italy" }, { "title": "Semi-Parametric Inducing Point Networks and Neural Processes", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11018", "id": "FE99-fDrWd5", "poster": "/media/PosterPDFs/ICLR%202023/11018.png?t=1682521420.720694", "openreview": "https://openreview.net/forum?id=FE99-fDrWd5", "slides": "https://iclr.cc/virtual/2023/poster/11018", "video": "https://iclr.cc/virtual/2023/poster/11018", "author_site": "Richa Rastogi, Yair Schiff, Alon Hacohen, Zhaozhi Li, Yi-Yuan Lee, Yuntian Deng, Mert Sabuncu, Volodymyr Kuleshov", "tldr": "", "abstract": "We introduce semi-parametric inducing point networks (SPIN), a general-purpose architecture that can query the training set at inference time in a compute-efficient manner. Semi-parametric architectures are typically more compact than parametric models, but their computational complexity is often quadratic. In contrast, SPIN attains linear complexity via a cross-attention mechanism between datapoints inspired by inducing point methods. Querying large training sets can be particularly useful in meta-learning, as it unlocks additional training signal, but often exceeds the scaling limits of existing models. We use SPIN as the basis of the Inducing Point Neural Process, a probabilistic model which supports large contexts in meta-learning and achieves high accuracy where existing models fail. In our experiments, SPIN reduces memory requirements, improves accuracy across a range of meta-learning tasks, and improves state-of-the-art performance on an important practical problem, genotype imputation.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Richa Rastogi;Yair Schiff;Alon Hacohen;Zhaozhi Li;Ian Lee;Yuntian Deng;Mert R. Sabuncu;Volodymyr Kuleshov", "authorids": "~Richa_Rastogi1;~Yair_Schiff1;alonhacohen@campus.technion.ac.il;zl643@cornell.edu;yl759@cornell.edu;~Yuntian_Deng2;~Mert_R._Sabuncu1;~Volodymyr_Kuleshov1", "gender": "F;M;;;;;M;", "homepage": "https://richrast.github.io/;https://github.com/yair-schiff;;;;https://yuntiandeng.com;http://sabuncu.engineering.cornell.edu;https://www.cs.cornell.edu/~kuleshov/", "dblp": "163/0918;;;;;166/1720;36/4898;81/8612", "google_scholar": ";GhFrOdQAAAAJ;;;;tk0e5lYAAAAJ;;RY_t8XAAAAAJ", "orcid": "0000-0002-5073-1667;;;;;;;", "linkedin": "richa-rastogi-8517128/;yair-schiff;;;;;;", "or_profile": "~Richa_Rastogi1;~Yair_Schiff1;alonhacohen@campus.technion.ac.il;zl643@cornell.edu;yl759@cornell.edu;~Yuntian_Deng2;~Mert_R._Sabuncu1;~Volodymyr_Kuleshov1", "aff": "Cornell University;Department of Computer Science, Cornell University;;;;Harvard University;Cornell Tech;Cornell University", "aff_domain": "cs.cornell.edu;cs.cornell.edu;;;;harvard.edu;cornell.edu;cornell.edu", "position": "PhD student;PhD student;;;;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nrastogi2023semiparametric,\ntitle={Semi-Parametric Inducing Point Networks and Neural Processes},\nauthor={Richa Rastogi and Yair Schiff and Alon Hacohen and Zhaozhi Li and Ian Lee and Yuntian Deng and Mert R. Sabuncu and Volodymyr Kuleshov},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=FE99-fDrWd5}\n}", "github": "", "project": "", "reviewers": "TnrL;GJez;jnqW;wVgc", "pdf_size": 1270466, "recommendation": "6;6;6;8", "confidence": "4;3;3;3", "correctness": "4;3;4;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "182;53;101;126", "wc_strength_and_weaknesses": "206;305;436;138", "wc_clarity_quality_novelty_and_reproducibility": "243;81;135;96", "wc_summary_review": "135;54;113;66", "wc_review": "766;493;785;426", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "453;819;1043;323", "reply_reviewers": "0;0;0;0", "reply_authors": "1;3;3;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 115.5, 46.5 ], "wc_strength_and_weaknesses_avg": [ 271.25, 112.13245515906624 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 138.75, 63.33393640063753 ], "wc_summary_review_avg": [ 92.0, 33.20391543176798 ], "wc_review_avg": [ 617.5, 159.90700422433034 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 659.5, 286.52530429265755 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6443990310550033962&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=FE99-fDrWd5", "email": "cs.cornell.edu;cs.cornell.edu;;;;harvard.edu;cornell.edu;cornell.edu", "author_num": 8, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Cornell University;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cornell.edu;https://www.harvard.edu", "aff_unique_abbr": "Cornell;Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";New York City", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "FEAIArDldTA", "title": "Improved Training of Physics-Informed Neural Networks with Model Ensembles", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Learning the solution of partial differential equations (PDEs) with a neural network is an attractive alternative to traditional solvers due to its elegance, greater flexibility and the ease of incorporating observed data. However, training such physics-informed neural networks (PINNs) is notoriously difficult in practice since PINNs often converge to wrong solutions. In this paper, we propose a training algorithm that starts approximation of the PDE solution in the neighborhood of initial conditions and gradually expands the solution domain based on agreement of an ensemble of PINNs. PINNs in the ensemble find similar solutions in the vicinity of points with targets (e.g., observed data or initial conditions) while the found solutions may substantially differ farther away from the observations. Therefore, we propose to use the ensemble agreement as the criterion for gradual expansion of the solution interval, that is including new points for computing the loss derived from differential equations. Due to the flexibility of the domain expansion, our algorithm can easily incorporate measurements in arbitrary locations. In contrast to the existing PINN algorithms with time-adaptive strategies, the proposed algorithm does not need a pre-defined schedule of interval expansion and it treats time and space equally. We experimentally show that the proposed algorithm can stabilize PINN training and yield performance competitive to the recent variants of PINNs trained with time adaptation.", "keywords": "Label propagation;Model ensembles;Partial differential equations;Physics-informed neural networks", "primary_area": "", "supplementary_material": "/attachment/5c40f9d703605d7ecda865adb030ba08153bd55d.zip", "author": "Katsiaryna Haitsiukevich;Alexander Ilin", "authorids": "~Katsiaryna_Haitsiukevich1;~Alexander_Ilin1", "gender": "F;M", "homepage": "https://www.aalto.fi/en/people/katsiaryna-haitsiukevich;https://users.aalto.fi/~alexilin/", "dblp": ";85/5835", "google_scholar": ";i2gcTBQAAAAJ", "orcid": ";", "linkedin": "haitsiukevich/;alexanderilin/", "or_profile": "~Katsiaryna_Haitsiukevich1;~Alexander_Ilin1", "aff": "Aalto University;Aalto University", "aff_domain": "aalto.fi;aalto.fi", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nhaitsiukevich2023improved,\ntitle={Improved Training of Physics-Informed Neural Networks with Model Ensembles},\nauthor={Katsiaryna Haitsiukevich and Alexander Ilin},\nyear={2023},\nurl={https://openreview.net/forum?id=FEAIArDldTA}\n}", "github": "", "project": "", "reviewers": "PC3y;esVi;wiLo;zNYw", "site": "https://openreview.net/forum?id=FEAIArDldTA", "pdf_size": 50551259, "recommendation": "3;3;6;8", "confidence": "5;3;3;5", "correctness": "2;3;4;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "0;2;3;4", "wc_summary_paper": "55;82;66;63", "wc_strength_and_weaknesses": "135;619;155;330", "wc_clarity_quality_novelty_and_reproducibility": "4;69;25;49", "wc_summary_review": "30;36;42;21", "wc_review": "224;806;288;463", "wc_reply_reviewers": "13;0;0;36", "wc_reply_authors": "394;1377;188;597", "reply_reviewers": "1;0;0;1", "reply_authors": "1;2;1;1", "recommendation_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 4.0, 1.0 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 1.479019945774904 ], "wc_summary_paper_avg": [ 66.5, 9.810708435174291 ], "wc_strength_and_weaknesses_avg": [ 309.75, 193.99146244100538 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.75, 24.498724456591614 ], "wc_summary_review_avg": [ 32.25, 7.75806032459145 ], "wc_review_avg": [ 445.25, 225.90636887879015 ], "wc_reply_reviewers_avg": [ 12.25, 14.703315952532613 ], "wc_reply_authors_avg": [ 639.0, 449.9538865261639 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.23570226039551587, "corr_recommendation_correctness": 0.8528028654224419, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6788195308843675352&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0", "aff_unique_norm": "Aalto University", "aff_unique_dep": "", "aff_unique_url": "https://www.aalto.fi", "aff_unique_abbr": "Aalto", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Finland" }, { "id": "FEBCwrGzR3j", "title": "PromptSum: Planning with Mixed Prompts for Parameter-Efficient Controllable Abstractive Summarization", "track": "main", "status": "Withdraw", "tldr": "A new prompting mechanism which enables controllable, parameter-efficient and data-efficient summarization. ", "abstract": "Prompt tuning (PT), a technique that only tunes the additional prompt embeddings while keeping the backbone pretrained language model frozen, has shown promising results in language understanding tasks, especially in low-resource scenarios. However, there lacks better prompt design methods for generation tasks such as summarization. At the same time, summarization guided through instructions (discrete prompts) can achieve a desirable double objective of higher quality and controllability in summary generation. Towards a triple goal of data-efficiency, parameter-efficiency and controllability, we introduce PromptSum, a method combining PT with a multi-task objective and discrete entity prompts for abstractive summarization. Our model achieves state-of-the-art results on several popular few-shot benchmarks as well as a strong level of controllability through entities, all while only tuning several orders of magnitude less parameters.", "keywords": "summarization;controllability;parameter-efficiency;prompt-tuning;pre-training;multi-tasking", "primary_area": "", "supplementary_material": "", "author": "Mathieu Ravaut;Hailin Chen;Ruochen Zhao;Chengwei Qin;Shafiq Joty;Nancy F. Chen", "authorids": "~Mathieu_Ravaut1;~Hailin_Chen1;~Ruochen_Zhao1;~Chengwei_Qin1;~Shafiq_Joty1;~Nancy_F._Chen1", "gender": "M;;F;M;M;", "homepage": "https://ravoxsg.github.io/;;;;https://raihanjoty.github.io/;http://alum.mit.edu/www/nancychen", "dblp": "200/8525;36/8249;253/2147;195/2732;62/2078;84/8761", "google_scholar": "LVS4IdgAAAAJ;oE4KrU0AAAAJ;;;hR249csAAAAJ;https://scholar.google.com.sg/citations?user=K3Z9UiAAAAAJ", "orcid": "0000-0003-1971-7688;;;;;0000-0003-0872-5877", "linkedin": "mravox/;chenhailin/;esther-ruochen-zhao-855357150/;chengwei-qin-3401a1107/;;nancy-chen-4644865/?originalSubdomain=sg", "or_profile": "~Mathieu_Ravaut1;~Hailin_Chen1;~Ruochen_Zhao1;~Chengwei_Qin1;~Shafiq_Joty1;~Nancy_F._Chen1", "aff": "Nanyang Technological University;National Technological University;Nanyang Technological University;Nanyang Technological University;SalesForce.com;I2R, A*STAR", "aff_domain": "ntu.edu.sg;ntu.edu;ntu.edu.sg;ntu.edu.sg;salesforce.com;i2r.a-star.edu.sg", "position": "PhD student;PhD student;PhD student;PhD student;Principal Researcher;Principal Researcher", "bibtex": "@misc{\nravaut2023promptsum,\ntitle={PromptSum: Planning with Mixed Prompts for Parameter-Efficient Controllable Abstractive Summarization},\nauthor={Mathieu Ravaut and Hailin Chen and Ruochen Zhao and Chengwei Qin and Shafiq Joty and Nancy F. Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=FEBCwrGzR3j}\n}", "github": "", "project": "", "reviewers": "jkwr;wsS5;wCg2;Neu3", "site": "https://openreview.net/forum?id=FEBCwrGzR3j", "pdf_size": 1078079, "recommendation": "3;3;5;5", "confidence": "4;3;4;4", "correctness": "2;2;4;3", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "61;127;76;45", "wc_strength_and_weaknesses": "448;568;80;237", "wc_clarity_quality_novelty_and_reproducibility": "138;49;18;14", "wc_summary_review": "101;33;25;53", "wc_review": "748;777;199;349", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "412;219;99;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 77.25, 30.74390183434757 ], "wc_strength_and_weaknesses_avg": [ 333.25, 188.19853214092825 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.75, 49.936835101956554 ], "wc_summary_review_avg": [ 53.0, 29.5296461204668 ], "wc_review_avg": [ 518.25, 250.15132919894708 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 182.5, 153.52605642040052 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.75, 0.4330127018922193 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:51a11NoVyBIJ:scholar.google.com/&scioq=PromptSum:+Planning+with+Mixed+Prompts+for+Parameter-Efficient+Controllable+Abstractive+Summarization&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;2;3", "aff_unique_norm": "Nanyang Technological University;National Technological University;Salesforce;A*STAR", "aff_unique_dep": ";;;Institute for Infocomm Research", "aff_unique_url": "https://www.ntu.edu.sg;https://www.ntu.edu;https://www.salesforce.com;https://www.a-star.edu.sg", "aff_unique_abbr": "NTU;NTU;Salesforce;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1;0", "aff_country_unique": "Singapore;United States" }, { "id": "FELWgMjxZJj", "title": "Open-Vocabulary Semantic Segmentation with Mask-adapted CLIP", "track": "main", "status": "Withdraw", "tldr": "For the first time, zero-shot segmentation model matches the supervised model on ADE-20k without seeing any single training images.", "abstract": "Open-vocabulary semantic segmentation aims to segment an image into semantic regions according to text descriptions, which may not have been seen during training. Recent two-stage methods first generate class-agnostic mask proposals and then leverage pre-trained vision-language models, e.g., CLIP, to classify masked regions. We identify the performance bottleneck of this paradigm to be the pre-trained CLIP model, since it does not perform well on masked images. To address this, we propose to finetune CLIP on a collection of masked image regions and their corresponding text descriptions. We collect training data by mining an existing image-caption dataset (e.g., COCO Captions), using CLIP to match masked image regions to nouns in the image captions. Compared with the more precise and manually annotated segmentation labels with fixed classes (e.g., COCO-Stuff), we find our noisy but diverse dataset can better retain CLIP's generalization ability. Along with finetuning the entire model, we utilize the \"blank\" areas in masked images using a method we dub mask prompt tuning. Experiments demonstrate mask prompt tuning brings significant improvement without modifying any weights of CLIP, and it can further improve a fully finetuned model. In particular, when trained on COCO and evaluated on ADE20K-150, our best model achieves 29.6% mIoU, which is +8.5% higher than the previous state-of-the-art. For the first time, open-vocabulary generalist models match the performance of supervised specialist models in 2017 without dataset-specific adaptations.", "keywords": "vision-language models;open-vocabulary;image segmentation", "primary_area": "", "supplementary_material": "", "author": "Feng Liang;Bichen Wu;Xiaoliang Dai;Kunpeng Li;Yinan Zhao;Hang Zhang;Peizhao Zhang;Peter Vajda;Diana Marculescu", "authorids": "~Feng_Liang3;~Bichen_Wu1;~Xiaoliang_Dai1;~Kunpeng_Li1;~Yinan_Zhao1;~Hang_Zhang3;~Peizhao_Zhang1;~Peter_Vajda1;~Diana_Marculescu4", "gender": "M;M;M;M;M;M;M;;", "homepage": "https://jeff-liangf.github.io/;;;https://kunpengli1994.github.io/;;https://hangzhang.org/;;https://sites.google.com/site/vajdap;", "dblp": ";130/1371;192/3904;;172/1380;49/6156-5;23/8011.html;44/5953;", "google_scholar": "ecTFCUMAAAAJ;K3QJPdMAAAAJ;u4olrOcAAAAJ;https://scholar.google.com/citations?hl=zh-CN;6Kyp7rQAAAAJ;gCoWdkUAAAAJ;eqQQkM4AAAAJ;k8QB5VUAAAAJ;", "orcid": ";;;;;;;;", "linkedin": "feng-liang-854a30150/;bichenwu/;;;;;;p%C3%A9ter-vajda-9a03aaa/;", "or_profile": "~Feng_Liang3;~Bichen_Wu1;~Xiaoliang_Dai1;~Kunpeng_Li1;~Yinan_Zhao1;~Hang_Zhang3;~Peizhao_Zhang1;~Peter_Vajda1;~Diana_Marculescu4", "aff": "University of Texas, Austin;Meta Facebook;Meta Facebook;Meta;Meta;Cruise LLC;Meta;Meta;", "aff_domain": "utexas.edu;fb.com;fb.com;fb.com;meta.com;getcruise.com;meta.com;meta.com;", "position": "PhD student;Research Scientist;Research Scientist;Researcher;Researcher;Researcher;Research Scientist;Researcher;", "bibtex": "@misc{\nliang2023openvocabulary,\ntitle={Open-Vocabulary Semantic Segmentation with Mask-adapted {CLIP}},\nauthor={Feng Liang and Bichen Wu and Xiaoliang Dai and Kunpeng Li and Yinan Zhao and Hang Zhang and Peizhao Zhang and Peter Vajda and Diana Marculescu},\nyear={2023},\nurl={https://openreview.net/forum?id=FELWgMjxZJj}\n}", "github": "", "project": "", "reviewers": "AzxJ;SWXX;PRQy;ALTp", "site": "https://openreview.net/forum?id=FELWgMjxZJj", "pdf_size": 1599344, "recommendation": "5;5;5;8", "confidence": "5;4;5;3", "correctness": "4;2;3;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "3;3;0;3", "wc_summary_paper": "69;49;53;189", "wc_strength_and_weaknesses": "151;163;144;115", "wc_clarity_quality_novelty_and_reproducibility": "10;14;19;67", "wc_summary_review": "43;28;35;32", "wc_review": "273;254;251;403", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "597;612;776;299", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 90.0, 57.645468165329355 ], "wc_strength_and_weaknesses_avg": [ 143.25, 17.66882848408462 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.5, 23.02715787933891 ], "wc_summary_review_avg": [ 34.5, 5.5 ], "wc_review_avg": [ 295.25, 62.77887781730413 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 571.0, 172.02180094395013 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.8703882797784892, "corr_recommendation_correctness": 0.5222329678670935, "gs_citation": 519, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14945132931136967765&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 10, "aff_unique_index": "0;1;1;1;1;2;1;1", "aff_unique_norm": "University of Texas at Austin;Meta;Cruise LLC", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.utexas.edu;https://meta.com;https://www.cruisellc.com", "aff_unique_abbr": "UT Austin;Meta;Cruise", "aff_campus_unique_index": "0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "FGlL0dLjpn", "title": "Learning Gradient-based Mixup towards Flatter Minima for Domain Generalization", "track": "main", "status": "Withdraw", "tldr": " We propose a policy to generate the instance weights for mixup based on gradient similarity and optimize a learnable similarity function towards flatter minima for better generalization.", "abstract": "To address the distribution shifts between training and test data, domain generalization (DG) leverages multiple source domains to learn a model that generalizes well to unseen domains. However, existing DG methods generally suffer from overfitting to the source domains, partly due to the limited coverage of the expected region in feature space. Motivated by this, we propose to perform mixup with data interpolation and extrapolation to cover the potential unseen regions. To prevent the detrimental effects of unconstrained extrapolation, we carefully design a policy to generate the instance weights, named Flatness-aware Gradient-based Mixup (FGMix). The policy employs a gradient-based similarity to assign greater weights to instances that carry more invariant information, and learns the similarity function towards flatter minima for better generalization. On the DomainBed benchmark, we validate the efficacy of various designs of FGMix and demonstrate its superiority over other DG algorithms.", "keywords": "Domain Generalization;Mixup;Gradient-based Method;Flatness-aware Optimization", "primary_area": "", "supplementary_material": "", "author": "Danni Peng;Sinno Pan", "authorids": "~Danni_Peng1;~Sinno_Pan1", "gender": ";M", "homepage": ";http://www.cse.cuhk.edu.hk/~sinnopan/", "dblp": ";80/5412", "google_scholar": ";https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Danni_Peng1;~Sinno_Pan1", "aff": ";Nanyang Technological University", "aff_domain": ";ntu.edu.sg", "position": ";Full Professor", "bibtex": "@misc{\npeng2023learning,\ntitle={Learning Gradient-based Mixup towards Flatter Minima for Domain Generalization},\nauthor={Danni Peng and Sinno Pan},\nyear={2023},\nurl={https://openreview.net/forum?id=FGlL0dLjpn}\n}", "github": "", "project": "", "reviewers": "6Ead;ccUL;7DBG;tKse", "site": "https://openreview.net/forum?id=FGlL0dLjpn", "pdf_size": 7486559, "recommendation": "3;3;3;5", "confidence": "4;4;4;5", "correctness": "3;3;2;4", "technical_novelty": "2;2;2;4", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "78;64;48;62", "wc_strength_and_weaknesses": "280;200;343;175", "wc_clarity_quality_novelty_and_reproducibility": "8;8;63;6", "wc_summary_review": "46;134;19;2", "wc_review": "412;406;473;245", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 63.0, 10.63014581273465 ], "wc_strength_and_weaknesses_avg": [ 249.5, 66.46991800807339 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 21.25, 24.118198523106987 ], "wc_summary_review_avg": [ 50.25, 50.83490434730845 ], "wc_review_avg": [ 384.0, 84.42452250383178 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2751950664886827290&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_country_unique_index": "0", "aff_country_unique": "Singapore" }, { "id": "FH3Mwjb_H8B", "title": "Focusing on what to decode and what to train: Efficient Training with HOI Split Decoders and Split Target Guided DeNoising", "track": "main", "status": "Withdraw", "tldr": "A novel one-stage framework with HOI specific denoising training strategy for human-object interaction detection.", "abstract": "Recent one-stage transformer-based methods achieve notable gains in the Human-object Interaction Detection (HOI) task by leveraging the detection of DETR. However, the current methods redirect the detection target of the object decoder, and the box target is not explicitly separated from the query embeddings, which leads to long and hard training. Furthermore, matching the predicted HOI instances with the ground-truth is more challenging than object detection, simply adapting training strategies from the object detection makes the training more difficult. To clear the ambiguity between human and object detection, we propose a novel one-stage framework (SOV), which consists of a subject decoder, an object decoder, and a well-designed verb decoder. Three split decoders with explicitly defined box queries share the prediction burden and accelerate the training convergence. To further improve the training efficiency, we propose a novel Split Target Guided (STG) DeNoising strategy, which leverages learnable object label embeddings and verb label embeddings to guide the training. In addition, for the prediction part, the label-specific information is directly fed into the decoders by initializing the query embeddings from the learnable label embeddings. Extensive experiments show that our method (SOV-STG) achieves 3$\\times$ fewer training epochs and 4.68\\% higher accuracy than the state-of-the-art method.", "keywords": "human-object interaction detection;transformer", "primary_area": "", "supplementary_material": "", "author": "Junwen Chen;Yingcheng Wang;Keiji Yanai", "authorids": "~Junwen_Chen3;wang-y@mm.inf.uec.ac.jp;~Keiji_Yanai1", "gender": "M;;M", "homepage": ";;", "dblp": ";;60/2410", "google_scholar": "https://scholar.google.co.jp/citations?user=LiCkH5MAAAAJ;;GfBhHw0AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Junwen_Chen3;wang-y@mm.inf.uec.ac.jp;~Keiji_Yanai1", "aff": "The University of Electro-Communications;;The University of Electro-Communications, Tokyo", "aff_domain": "uec.ac.jp;;uec.ac.jp", "position": "MS student;;Full Professor", "bibtex": "@misc{\nchen2023focusing,\ntitle={Focusing on what to decode and what to train: Efficient Training with {HOI} Split Decoders and Split Target Guided DeNoising},\nauthor={Junwen Chen and Yingcheng Wang and Keiji Yanai},\nyear={2023},\nurl={https://openreview.net/forum?id=FH3Mwjb_H8B}\n}", "github": "", "project": "", "reviewers": "zfBe;oYan;jeKz;43zf", "site": "https://openreview.net/forum?id=FH3Mwjb_H8B", "pdf_size": 21435790, "recommendation": "5;5;5;6", "confidence": "3;4;4;4", "correctness": "2;3;3;3", "technical_novelty": "2;3;3;2", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "53;53;31;80", "wc_strength_and_weaknesses": "264;148;156;204", "wc_clarity_quality_novelty_and_reproducibility": "36;53;22;15", "wc_summary_review": "30;34;53;64", "wc_review": "383;288;262;363", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 54.25, 17.36915369268175 ], "wc_strength_and_weaknesses_avg": [ 193.0, 46.24932431938871 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.5, 14.534441853748634 ], "wc_summary_review_avg": [ 45.25, 13.88119231190174 ], "wc_review_avg": [ 324.0, 50.35374861914453 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11902003801534978870&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Electro-Communications", "aff_unique_dep": "", "aff_unique_url": "https://www.uec.ac.jp", "aff_unique_abbr": "UEC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Tokyo", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "id": "FHZUqgxIBYn", "title": "Opportunistic Actor-Critic (OPAC) with Clipped Triple Q-learning", "track": "main", "status": "Withdraw", "tldr": "OPAC achieves higher average rewards than relevant baselines and mitigates the underestimation bias with the help of Clipped Triple Q-learning.", "abstract": "Despite being the most successful model-free deep reinforcement learning (RL) algorithms in recent years, Soft Actor-Critic (SAC) and Twin Delayed Deep Deterministic Policy Gradient (TD3) have their respective downsides--TD3 performs well in simple tasks, while SAC does so in relatively complicated ones. However, they also suffer from underestimation due to Clipped Double Q-learning, i.e., taking a minimum of two Q-values. This paper introduces Opportunistic Actor-Critic (OPAC), an ensemble model-free deep RL algorithm that performs well in simple and complex tasks. OPAC combines the features of TD3 and SAC under one roof to retain their respective benefits. It also employs three critics and considers taking the mean of the smallest two Q-values for updating the shared target, dubbed Clipped Triple Q-learning. Our analytical results establish that Clipped Triple Q-learning incurs less underestimation than Clipped Double Q-learning. Furthermore, we have systematically evaluated OPAC in MuJoCo environments, and the empirical results indicate that OPAC attains higher average rewards than the current baselines.", "keywords": "Model-free Deep RL;Actor-Critic;Estimation Bias;Continuous Control", "primary_area": "", "supplementary_material": "/attachment/caa3702118a148c7ef0bf698470f462db77e2bb0.zip", "author": "Srinjoy Roy;Saptam Bakshi;Tamal Maharaj;Swagatam Das", "authorids": "~Srinjoy_Roy1;~Saptam_Bakshi1;~Tamal_Maharaj1;~Swagatam_Das2", "gender": ";M;;", "homepage": ";;http://cs.rkmvu.ac.in/~tamal/;", "dblp": ";;135/8800.html;", "google_scholar": ";;pMdi9woAAAAJ;", "orcid": ";;;", "linkedin": ";saptam-bakshi;;", "or_profile": "~Srinjoy_Roy1;~Saptam_Bakshi1;~Tamal_Maharaj1;~Swagatam_Das2", "aff": ";;Ramakrishna Mission Vivekananda Educational and Research Institute;", "aff_domain": ";;rkmvu.ac.in;", "position": ";;Assistant Professor;", "bibtex": "@misc{\nroy2023opportunistic,\ntitle={Opportunistic Actor-Critic ({OPAC}) with Clipped Triple Q-learning},\nauthor={Srinjoy Roy and Saptam Bakshi and Tamal Maharaj and Swagatam Das},\nyear={2023},\nurl={https://openreview.net/forum?id=FHZUqgxIBYn}\n}", "github": "", "project": "", "reviewers": "PzML;br6G;xcgL;CdKZ", "site": "https://openreview.net/forum?id=FHZUqgxIBYn", "pdf_size": 2210020, "recommendation": "3;3;3;5", "confidence": "5;4;4;4", "correctness": "2;2;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "54;92;76;61", "wc_strength_and_weaknesses": "146;122;341;195", "wc_clarity_quality_novelty_and_reproducibility": "24;298;42;23", "wc_summary_review": "44;64;98;30", "wc_review": "268;576;557;309", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 70.75, 14.618053906043718 ], "wc_strength_and_weaknesses_avg": [ 201.0, 85.00294112558694 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 96.75, 116.4374832259784 ], "wc_summary_review_avg": [ 59.0, 25.553864678361276 ], "wc_review_avg": [ 427.5, 139.9151528605819 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=889299600703296582&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Ramakrishna Mission Vivekananda Educational and Research Institute", "aff_unique_dep": "", "aff_unique_url": "http://www.rkmveri.org/", "aff_unique_abbr": "", "aff_country_unique_index": "0", "aff_country_unique": "India" }, { "id": "FI5IysDR8pG", "title": "Learning Dynamic Query Combinations for Transformer-based Object Detection and Segmentation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Transformer-based detection and segmentation methods use a list of learned detection queries to retrieve information from the transformer network and learn to predict the location and category of one specific object from each query. We empirically find that random convex combinations of the learned queries are still good queries for the corresponding models. We then propose to learn a convex combination with dynamic coefficients based on the high-level semantics of the image. The generated dynamic queries better capture the prior of object locations and categories in the different images. Equipped with our dynamic queries, a wide range of DETR-based models achieve consistent and superior performance across multiple tasks (object detection, instance segmentation, panoptic segmentation) and on different benchmarks (MS COCO, CityScapes, YoutubeVIS).", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/81b61cd1b7ac483c0c46110694b3efe7f790b931.zip", "author": "Yiming Cui;Linjie Yang;Haichao Yu", "authorids": "~Yiming_Cui3;~Linjie_Yang4;~Haichao_Yu2", "gender": ";M;M", "homepage": ";https://sites.google.com/site/linjieyang89/;", "dblp": ";126/6794;205/3982.html", "google_scholar": ";;6vBNzOsAAAAJ", "orcid": ";;", "linkedin": ";;haichaoyu/", "or_profile": "~Yiming_Cui3;~Linjie_Yang4;~Haichao_Yu2", "aff": ";ByteDance Inc.;ByteDance Inc.", "aff_domain": ";bytedance.com;bytedance.com", "position": ";Research Scientist;Researcher", "bibtex": "@misc{\ncui2023learning,\ntitle={Learning Dynamic Query Combinations for Transformer-based Object Detection and Segmentation},\nauthor={Yiming Cui and Linjie Yang and Haichao Yu},\nyear={2023},\nurl={https://openreview.net/forum?id=FI5IysDR8pG}\n}", "github": "", "project": "", "reviewers": "ykWm;QkjX;tEAE;LvX2", "site": "https://openreview.net/forum?id=FI5IysDR8pG", "pdf_size": 3444089, "recommendation": "5;5;5;8", "confidence": "5;4;4;5", "correctness": "3;4;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "214;42;42;109", "wc_strength_and_weaknesses": "457;403;138;182", "wc_clarity_quality_novelty_and_reproducibility": "22;49;5;185", "wc_summary_review": "69;22;21;79", "wc_review": "762;516;206;555", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "813;505;786;390", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 101.75, 70.34335434140172 ], "wc_strength_and_weaknesses_avg": [ 295.0, 137.22791261255853 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.25, 70.89560987818639 ], "wc_summary_review_avg": [ 47.75, 26.48938466631492 ], "wc_review_avg": [ 509.75, 198.73396161703212 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 623.5, 180.88739591248475 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9895752924198109310&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0", "aff_unique_norm": "ByteDance", "aff_unique_dep": "", "aff_unique_url": "https://www.bytedance.com", "aff_unique_abbr": "ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "FILleBqk31S", "title": "Do Not Blindly Imitate the Teacher: Loss Perturbation for Knowledge Distillation", "track": "main", "status": "Reject", "tldr": "We propose a perturbed loss function for the knowledge distillation task which outperforms the underlying KL loss and other perturbation methods.", "abstract": "Knowledge distillation (KD) is a popular model compression technique to transfer knowledge from large teacher models to a small student model. Typically, the student learns to imitate the teacher by minimizing the KL divergence of its output distribution with the teacher's output distribution. We argue that such a learning objective is sub-optimal because there exists a discrepancy between the teacher's output distribution and the ground truth label distribution, and forcing the student to blindly imitate the unreliable teacher output distribution leads to inferior performance. To this end, we propose a novel knowledge distillation objective PTLoss by first representing the vanilla KL-based distillation loss function via a Maclaurin series and then perturbing the leading-order terms in this series. This perturbed loss improves the student generalizability by effectively distilling knowledge from a shifted distribution closer to the ground truth data. We also propose a method to compute this shifted teacher distribution, named Proxy Teacher, which enables us to select the perturbation coefficients in PTLoss. We theoretically show the perturbed loss reduces the deviation from the true population risk compared to the vanilla KL-based distillation loss functions. Experiments on three tasks with teachers of different scales show that our method significantly outperforms vanilla distillation loss functions and other perturbation methods.", "keywords": "distillation;loss function;natural language processing", "primary_area": "", "supplementary_material": "", "author": "Rongzhi Zhang;Jiaming Shen;Tianqi Liu;Jialu Liu;Michael Bendersky;Marc Najork;Chao Zhang", "authorids": "~Rongzhi_Zhang2;~Jiaming_Shen1;~Tianqi_Liu1;~Jialu_Liu1;~Michael_Bendersky1;~Marc_Najork1;~Chao_Zhang15", "gender": "M;;M;M;;M;", "homepage": "https://rongzhizhang.org/;https://mickeysjm.github.io;;https://jialu.info/;http://bendersky.github.io/;http://marc.najork.org/;http://chaozhang.org/", "dblp": "130/7337;178/3627;134/5653-2;14/8399;80/4305;n/MarcNajork;94/3019-14", "google_scholar": "https://scholar.google.com/citations?hl=en;-ZJ0sCoAAAAJ;pUKhiMIAAAAJ;BUERw4QAAAAJ;C9mxM5IAAAAJ;7HeAnjwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-0467-4956;;;0000-0002-2941-6240;0000-0003-1423-0854;0000-0003-3009-598X", "linkedin": ";jiaming-shen-08186710a/;;;;najork/;", "or_profile": "~Rongzhi_Zhang2;~Jiaming_Shen1;~Tianqi_Liu1;~Jialu_Liu1;~Michael_Bendersky1;~Marc_Najork1;~Chao_Zhang15", "aff": "Zhejiang University;Google Research;Google DeepMind;Google Research;Google;Google Research;Georgia Institute of Technology", "aff_domain": "zju.edu.cn;google.com;google.com;google.com;google.com;google.com;gatech.edu", "position": "Undergrad student;Research Scientist;Software Engineer;Software Engineer Manager;Researcher;Director, Research Engineering;Assistant Professor", "bibtex": "@misc{\nzhang2023do,\ntitle={Do Not Blindly Imitate the Teacher: Loss Perturbation for Knowledge Distillation},\nauthor={Rongzhi Zhang and Jiaming Shen and Tianqi Liu and Jialu Liu and Michael Bendersky and Marc Najork and Chao Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=FILleBqk31S}\n}", "github": "", "project": "", "reviewers": "BGUG;Djov;yv5C", "site": "https://openreview.net/forum?id=FILleBqk31S", "pdf_size": 836099, "recommendation": "3;6;6", "confidence": "4;4;2", "correctness": "3;3;4", "technical_novelty": "2;3;4", "empirical_novelty": "2;3;4", "wc_summary_paper": "58;108;72", "wc_strength_and_weaknesses": "195;762;269", "wc_clarity_quality_novelty_and_reproducibility": "11;24;9", "wc_summary_review": "6;200;22", "wc_review": "270;1094;372", "wc_reply_reviewers": "172;0;0", "wc_reply_authors": "1807;1185;807", "reply_reviewers": "1;0;0", "reply_authors": "4;3;2", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 79.33333333333333, 21.06075866524175 ], "wc_strength_and_weaknesses_avg": [ 408.6666666666667, 251.66423839888114 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 14.666666666666666, 6.649979114420002 ], "wc_summary_review_avg": [ 76.0, 87.92420978699022 ], "wc_review_avg": [ 578.6666666666666, 366.7672589289047 ], "wc_reply_reviewers_avg": [ 57.333333333333336, 81.08157757605744 ], "wc_reply_authors_avg": [ 1266.3333333333333, 412.27930122942416 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.5000000000000001, "corr_recommendation_correctness": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UREF1d_piKgJ:scholar.google.com/&scioq=Do+Not+Blindly+Imitate+the+Teacher:+Loss+Perturbation+for+Knowledge+Distillation&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;1;1;2", "aff_unique_norm": "Zhejiang University;Google;Georgia Institute of Technology", "aff_unique_dep": ";Google Research;", "aff_unique_url": "https://www.zju.edu.cn;https://research.google;https://www.gatech.edu", "aff_unique_abbr": "ZJU;Google Research;Georgia Tech", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;2;1;1;1;1", "aff_country_unique": "China;United States;United Kingdom" }, { "title": "Instance-wise Batch Label Restoration via Gradients in Federated Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11872", "id": "FIrQfNSOoTr", "poster": "/media/PosterPDFs/ICLR%202023/11872.png?t=1680773677.5553157", "openreview": "https://openreview.net/forum?id=FIrQfNSOoTr", "slides": "https://iclr.cc/virtual/2023/poster/11872", "video": "https://iclr.cc/virtual/2023/poster/11872", "author_site": "Kailang Ma, Yu Sun, Jian Cui, Dawei Li, Zhenyu Guan, Jianwei Liu", "tldr": "We propose an analytic method to perform instance-wise batch label restoration and enhance the existing gradient inversion attacks.", "abstract": "Gradient inversion attacks have posed a serious threat to the privacy of federated learning. The attacks search for the optimal pair of input and label best matching the shared gradients and the search space of the attacks can be reduced by pre-restoring labels. Recently, label restoration technique allows for the extraction of labels from gradients analytically, but even the state-of-the-art remains limited to identify the presence of categories (i.e., the class-wise label restoration). This work considers the more real-world settings, where there are multiple instances of each class in a training batch. An analytic method is proposed to perform instance-wise batch label restoration from only the gradient of the final layer. On the basis of the approximate recovered class-wise embeddings and post-softmax probabilities, we establish linear equations of the gradients, probabilities and labels to derive the Number of Instances (NoI) per class by the Moore-Penrose pseudoinverse algorithm. Our experimental evaluations reach over 99% Label existence Accuracy (LeAcc) and exceed 96% Label number Accuracy (LnAcc) in most cases on three image datasets and four classification models. The two metrics are used to evaluate class-wise and instance-wise label restoration accuracy, respectively. And the recovery is made feasible even with a batch size of 4096 and partially negative activations (e.g., Leaky ReLU and Swish). Furthermore, we demonstrate that our method facilitates the existing gradient inversion attacks by exploiting the recovered labels, with an increase of 6-7 in PSNR on both MNIST and CIFAR100. Our code is\navailable at https://github.com/BUAA-CST/iLRG.", "keywords": "federated learning;batch label restoration;gradient inversion attack.", "primary_area": "", "supplementary_material": "/attachment/6257774975c8582a098e8405fabe1f9aa9c5a882.zip", "author": "Kailang Ma;Yu Sun;Jian Cui;Dawei Li;Zhenyu Guan;Jianwei Liu", "authorids": "~Kailang_Ma1;sunyv@buaa.edu.cn;cuijianw@buaa.edu.cn;lidawei@buaa.edu.cn;~Zhenyu_Guan1;liujianwei@buaa.edu.cn", "gender": ";;;;;", "homepage": ";;;;;", "dblp": ";;;;121/1665;", "google_scholar": ";;;;https://scholar.google.com/citations?hl=zh-TW;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Kailang_Ma1;sunyv@buaa.edu.cn;cuijianw@buaa.edu.cn;lidawei@buaa.edu.cn;~Zhenyu_Guan1;liujianwei@buaa.edu.cn", "aff": ";;;;Beihang University;", "aff_domain": ";;;;buaa.edu.cn;", "position": ";;;;Full Professor;", "bibtex": "@inproceedings{\nma2023instancewise,\ntitle={Instance-wise Batch Label Restoration via Gradients in Federated Learning},\nauthor={Kailang Ma and Yu Sun and Jian Cui and Dawei Li and Zhenyu Guan and Jianwei Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=FIrQfNSOoTr}\n}", "github": "", "project": "", "reviewers": "9FpD;ngm7;CT8P", "pdf_size": 1336108, "recommendation": "6;6;8", "confidence": "4;3;4", "correctness": "2;3;4", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "70;94;121", "wc_strength_and_weaknesses": "162;367;385", "wc_clarity_quality_novelty_and_reproducibility": "48;26;147", "wc_summary_review": "5;66;43", "wc_review": "285;553;696", "wc_reply_reviewers": "31;26;90", "wc_reply_authors": "920;2399;1237", "reply_reviewers": "1;1;1", "reply_authors": "7;7;3", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 95.0, 20.83266665599966 ], "wc_strength_and_weaknesses_avg": [ 304.6666666666667, 101.14785657090756 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.66666666666667, 52.62656701282686 ], "wc_summary_review_avg": [ 38.0, 25.152865973217974 ], "wc_review_avg": [ 511.3333333333333, 170.3571411933047 ], "wc_reply_reviewers_avg": [ 49.0, 29.06314963431642 ], "wc_reply_authors_avg": [ 1518.6666666666667, 635.7999335919717 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 5.666666666666667, 1.8856180831641267 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.49999999999999983, "corr_recommendation_correctness": 0.8660254037844385, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8299640953080419549&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=FIrQfNSOoTr", "email": ";;;;buaa.edu.cn;", "author_num": 6, "aff_unique_index": "0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Towards Understanding GD with Hard and Conjugate Pseudo-labels for Test-Time Adaptation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10951", "id": "FJXf1FXN8C", "poster": "", "openreview": "https://openreview.net/forum?id=FJXf1FXN8C", "slides": "https://iclr.cc/virtual/2023/poster/10951", "video": "https://iclr.cc/virtual/2023/poster/10951", "author_site": "Jun-Kun Wang, Andre Wibisono", "tldr": "Towards Understanding GD with Hard and Conjugate Pseudo-labels for Test-Time Adaptation", "abstract": "We consider a setting that a model needs to adapt to a new domain under distribution shifts, given that only unlabeled test samples from the new domain are accessible at test time. A common idea in most of the related works is constructing pseudo-labels for the unlabeled test samples and applying gradient descent (GD) to a loss function with the pseudo-labels. Recently, Goyal et al. (2022) propose conjugate labels, which is a new kind of pseudo-labels for self-training at test time. They empirically show that the conjugate label outperforms other ways of pseudo-labeling on many domain adaptation benchmarks. However, provably showing that GD with conjugate labels learns a good classifier for test-time adaptation remains open. In this work, we aim at theoretically understanding GD with hard and conjugate labels for a binary classification problem. We show that for square loss, GD with conjugate labels converges to an $\\epsilon$-optimal predictor under a Gaussian model for any arbitrarily small $\\epsilon$, while GD with hard pseudo-labels fails in this task. We also analyze them under different loss functions for the update. Our results shed lights on understanding when and why GD with hard labels or conjugate labels works in test-time adaptation.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Jun-Kun Wang;Andre Wibisono", "authorids": "~Jun-Kun_Wang1;~Andre_Wibisono1", "gender": "M;M", "homepage": "https://jimwang123.github.io/;http://www.cs.yale.edu/homes/wibisono/", "dblp": "153/5463;64/10962", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Jun-Kun_Wang1;~Andre_Wibisono1", "aff": "Yale University;Yale University", "aff_domain": "yale.edu;yale.edu", "position": "Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nwang2023towards,\ntitle={Towards Understanding {GD} with Hard and Conjugate Pseudo-labels for Test-Time Adaptation},\nauthor={Jun-Kun Wang and Andre Wibisono},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=FJXf1FXN8C}\n}", "github": "", "project": "", "reviewers": "3i1a;w2JR;4HLp;VqqL", "pdf_size": 1418136, "recommendation": "5;6;6;8", "confidence": "2;3;4;3", "correctness": "3;4;4;4", "technical_novelty": "2;3;4;4", "empirical_novelty": "2;3;4;3", "wc_summary_paper": "47;57;53;32", "wc_strength_and_weaknesses": "207;192;185;188", "wc_clarity_quality_novelty_and_reproducibility": "18;37;32;80", "wc_summary_review": "48;5;27;52", "wc_review": "320;291;297;352", "wc_reply_reviewers": "233;0;114;0", "wc_reply_authors": "2290;743;1344;302", "reply_reviewers": "1;0;1;0", "reply_authors": "5;1;3;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 47.25, 9.496709956611289 ], "wc_strength_and_weaknesses_avg": [ 193.0, 8.455767262643882 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.75, 23.155722834755128 ], "wc_summary_review_avg": [ 33.0, 18.748333259252675 ], "wc_review_avg": [ 315.0, 23.947860029656095 ], "wc_reply_reviewers_avg": [ 86.75, 96.4141457463582 ], "wc_reply_authors_avg": [ 1169.75, 745.0551573541385 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.3244428422615251, "corr_recommendation_correctness": 0.6622661785325219, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1406109997056067681&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=FJXf1FXN8C", "email": "yale.edu;yale.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Yale University", "aff_unique_dep": "", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "FJdSi_seSg", "title": "Do We Always Need to Penalize Variance of Losses for Learning with Label Noise?", "track": "main", "status": "Reject", "tldr": "", "abstract": "Algorithms which minimize the averaged loss have been widely designed for dealing with noisy labels. Intuitively, when there is a finite training sample, penalizing the variance of losses will improve the stability and generalization of the algorithms. Interestingly, we found that the variance of losses sometimes needs to be increased for the problem of learning with noisy labels. Specifically, increasing the variance of losses would boost the memorization effect and reduce the harmfulness of incorrect labels. Regularizers can be easily designed to increase the variance of losses and be plugged in many existing algorithms. Empirically, the proposed method by increasing the variance of losses could improve the generalization ability of baselines on both synthetic and real-world datasets.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/64d8f779f579c330c04bd19c328d8b24e2eef47c.zip", "author": "Yexiong Lin;Yu Yao;Yuxuan Du;Jun Yu;Bo Han;Mingming Gong;Tongliang Liu", "authorids": "~Yexiong_Lin1;~Yu_Yao3;~Yuxuan_Du2;~Jun_Yu3;~Bo_Han1;~Mingming_Gong1;~Tongliang_Liu1", "gender": "M;M;M;M;M;M;M", "homepage": "https://yexionglin.github.io/;https://a5507203.github.io/;https://github.com/yuxuan-du/Yuxuan-Du.github.io;https://faculty.ustc.edu.cn/yujun_AI/en/index.htm;https://mingming-gong.github.io/;https://tongliang-liu.github.io/;https://bhanml.github.io/", "dblp": "287/6488;230/9625;;50/5754-1.html;98/8479;150/6667;241/0472-3", "google_scholar": "OfsQPbwAAAAJ;OkcaMKAAAAAJ;https://scholar.google.com.au/citations?user=50sFkzIAAAAJ;efZyqyQAAAAJ;https://scholar.google.com.au/citations?user=6BmiCJIAAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;nTNjqHwAAAAJ", "orcid": ";;0000-0002-1193-9756;0000-0002-3197-8103;0000-0001-7147-5589;;", "linkedin": ";yu-yao-150377134/;;;;;", "or_profile": "~Yexiong_Lin1;~Yu_Yao3;~Yuxuan_Du2;~Jun_Yu3;~Mingming_Gong1;~Tongliang_Liu1;~bo_han2", "aff": "University of Sydney;University of Sydney;JD.com;University of Science and Technology of China;University of Melbourne;University of Sydney;RIKEN", "aff_domain": "usyd.edu.au;uni.sydney.edu.au;jd.com;ustc.edu.cn;unimelb.edu.au;sydney.edu.au;riken.jp", "position": "PhD student;PhD student;Researcher;Associate Professor;Assistant Professor;Lecturer;Adjunct Scientist", "bibtex": "@misc{\nlin2023do,\ntitle={Do We Always Need to Penalize Variance of Losses for Learning with Label Noise?},\nauthor={Yexiong Lin and Yu Yao and Yuxuan Du and Jun Yu and Bo Han and Mingming Gong and Tongliang Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=FJdSi_seSg}\n}", "github": "", "project": "", "reviewers": "zTmG;JfaU;VFMQ", "site": "https://openreview.net/forum?id=FJdSi_seSg", "pdf_size": 2151775, "recommendation": "5;5;6", "confidence": "4;3;4", "correctness": "3;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "181;228;110", "wc_strength_and_weaknesses": "237;268;261", "wc_clarity_quality_novelty_and_reproducibility": "81;32;66", "wc_summary_review": "185;52;79", "wc_review": "684;580;516", "wc_reply_reviewers": "59;0;121", "wc_reply_authors": "1544;2410;563", "reply_reviewers": "1;0;1", "reply_authors": "5;5;2", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 173.0, 48.5042953424402 ], "wc_strength_and_weaknesses_avg": [ 255.33333333333334, 13.27487183449325 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.666666666666664, 20.499322482029065 ], "wc_summary_review_avg": [ 105.33333333333333, 57.40112271453311 ], "wc_review_avg": [ 593.3333333333334, 69.23069325731824 ], "wc_reply_reviewers_avg": [ 60.0, 49.4031038161234 ], "wc_reply_authors_avg": [ 1505.6666666666667, 754.5216291723444 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 4.0, 1.4142135623730951 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:aEQX5UDeClMJ:scholar.google.com/&scioq=Do+We+Always+Need+to+Penalize+Variance+of+Losses+for+Learning+with+Label+Noise%3F&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;3;0;4", "aff_unique_norm": "University of Sydney;JD.com;University of Science and Technology of China;University of Melbourne;RIKEN", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.sydney.edu.au;https://www.jd.com;http://www.ustc.edu.cn;https://www.unimelb.edu.au;https://www.riken.jp", "aff_unique_abbr": "USYD;JD;USTC;UniMelb;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;0;2", "aff_country_unique": "Australia;China;Japan" }, { "title": "LightGCL: Simple Yet Effective Graph Contrastive Learning for Recommendation", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11723", "id": "FKXVK9dyMM", "poster": "/media/PosterPDFs/ICLR%202023/11723.png?t=1681739203.260472", "openreview": "https://openreview.net/forum?id=FKXVK9dyMM", "slides": "https://iclr.cc/virtual/2023/poster/11723", "video": "https://iclr.cc/virtual/2023/poster/11723", "author_site": "Xuheng Cai, Chao Huang, Lianghao Xia, Xubin Ren", "tldr": "A new lightweight graph contrastive learning approach to enhance recommender systems", "abstract": "Graph neural network (GNN) is a powerful learning approach for graph-based recommender systems. Recently, GNNs integrated with contrastive learning have shown superior performance in recommendation with their data augmentation schemes, aiming at dealing with highly sparse data. Despite their success, most existing graph contrastive learning methods either perform stochastic augmentation (e.g., node/edge perturbation) on the user-item interaction graph, or rely on the heuristic-based augmentation techniques (e.g., user clustering) for generating contrastive views. We argue that these methods cannot well preserve the intrinsic semantic structures and are easily biased by the noise perturbation. In this paper, we propose a simple yet effective graph contrastive learning paradigm LightGCL that mitigates these issues impairing the generality and robustness of CL-based recommenders. Our model exclusively utilizes singular value decomposition for contrastive augmentation, which enables the unconstrained structural refinement with global collaborative relation modeling. Experiments conducted on several benchmark datasets demonstrate the significant improvement in performance of our model over the state-of-the-arts. Further analyses demonstrate the superiority of LightGCL's robustness against data sparsity and popularity bias. The source code of our model is available at https://github.com/HKUDS/LightGCL.", "keywords": "recommender systems;graph neural networks;contrastive learning", "primary_area": "", "supplementary_material": "/attachment/2cdd627a52cb87e9e61ea6ae23276ee476450fbb.zip", "author": "Xuheng Cai;Chao Huang;Lianghao Xia;Xubin Ren", "authorids": "~Xuheng_Cai1;~Chao_Huang7;~Lianghao_Xia1;~Xubin_Ren1", "gender": "M;M;M;M", "homepage": "https://web.stanford.edu/~xuheng;;https://akaxlh.github.io/;https://ren-xubin.github.io", "dblp": ";;270/6586;318/9196.html", "google_scholar": "IaxzikkAAAAJ;Zkv9FqwAAAAJ;fDDjoUEAAAAJ;https://scholar.google.com/scholar?hl=en", "orcid": ";;0000-0003-0725-2211;0000-0003-3332-1073", "linkedin": "xhcai;;;", "or_profile": "~Xuheng_Cai1;~Chao_Huang7;~Lianghao_Xia1;~Xubin_Ren1", "aff": "University of Hong Kong;University of Hong Kong;University of Hong Kong;Wuhan University", "aff_domain": "hku.hk;hku.hk;hku.hk;whu.edu.cn", "position": "Undergrad student;Assistant Professor;Postdoc;Undergrad student", "bibtex": "@inproceedings{\ncai2023lightgcl,\ntitle={Light{GCL}: Simple Yet Effective Graph Contrastive Learning for Recommendation},\nauthor={Xuheng Cai and Chao Huang and Lianghao Xia and Xubin Ren},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=FKXVK9dyMM}\n}", "github": "", "project": "", "reviewers": "k7CA;rCJG;3y1q;kpFd", "pdf_size": 1473996, "recommendation": "5;8;8;8", "confidence": "4;5;4;5", "correctness": "2;4;4;3", "technical_novelty": "2;3;4;4", "empirical_novelty": "2;4;3;4", "wc_summary_paper": "45;46;159;93", "wc_strength_and_weaknesses": "424;174;173;187", "wc_clarity_quality_novelty_and_reproducibility": "23;40;255;40", "wc_summary_review": "60;23;72;29", "wc_review": "552;283;659;349", "wc_reply_reviewers": "0;0;20;0", "wc_reply_authors": "1669;375;1046;466", "reply_reviewers": "0;0;1;0", "reply_authors": "3;1;2;1", "recommendation_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 85.75, 46.5262022950509 ], "wc_strength_and_weaknesses_avg": [ 239.5, 106.66419267964297 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 89.5, 95.80318366317478 ], "wc_summary_review_avg": [ 46.0, 20.554804791094465 ], "wc_review_avg": [ 460.75, 151.4205649837564 ], "wc_reply_reviewers_avg": [ 5.0, 8.660254037844387 ], "wc_reply_authors_avg": [ 889.0, 518.6940331255026 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.8703882797784892, "gs_citation": 333, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6311827797424842922&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=FKXVK9dyMM", "email": "hku.hk;hku.hk;hku.hk;whu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Hong Kong;Wuhan University", "aff_unique_dep": ";", "aff_unique_url": "https://www.hku.hk;http://www.whu.edu.cn/", "aff_unique_abbr": "HKU;WHU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "FLMvYXMucWk", "title": "Temporary feature collapse phenomenon in early learning of MLPs", "track": "main", "status": "Reject", "tldr": "In this paper, we focus on a typical two-phase phenomenon in the learning of multi-layer perceptrons (MLPs), and we discover and explain the reason for the feature collapse in the first phase.", "abstract": "In this paper, we focus on a typical two-phase phenomenon in the learning of multi-layer perceptrons (MLPs). We discover and explain the reason for the feature collapse phenomenon in the first phase, i.e., the diversity of features over different samples keeps decreasing in the first phase, until samples of different categories share almost the same feature, which hurts the optimization of MLPs. We explain such a phenomenon in terms of the learning dynamics of MLPs. Furthermore, we theoretically analyze the reason why four typical operations can alleviate the feature collapse. The code has been attached with the submission.", "keywords": "Neural Networks;Deep Learning Theory;Multi-Layer Perceptrons", "primary_area": "", "supplementary_material": "/attachment/4a9a67093217a7b14668f14b150a471464599aa2.zip", "author": "Dongrui Liu;Shaobo Wang;Jie Ren;Kangrui Wang;Sheng Yin;Huiqi Deng;Quanshi Zhang", "authorids": "~Dongrui_Liu1;~Shaobo_Wang1;~Jie_Ren1;~Kangrui_Wang2;~Sheng_Yin1;~Huiqi_Deng1;~Quanshi_Zhang1", "gender": "M;M;F;M;M;F;M", "homepage": "https://shenqildr.github.io/;https://gszfwsb.github.io/;https://jie-ren.github.io/;https://jameskrw.github.io/;https://shengyin1224.github.io/;;http://qszhang.com", "dblp": "199/9200.html;44/9990-1;r/JieRen-18;216/9159;52/2662;229/1317;http://dblp.uni-trier.de/pers/hd/z/Zhang:Quanshi", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.co.in/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;;eS2g0gIAAAAJ;QEjqzXgAAAAJ;iFFhHK0AAAAJ", "orcid": "0000-0003-0087-1124;;0000-0001-9918-3000;;;;", "linkedin": ";;;wang-kangrui-8b9a37257/;;;", "or_profile": "~Dongrui_Liu1;~Shaobo_Wang1;~Jie_Ren1;~Kangrui_Wang2;~Sheng_Yin1;~Huiqi_Deng1;~Quanshi_Zhang1", "aff": "Shanghai Jiao Tong University,;Shanghai Jiaotong University;Shanghai Jiaotong University;;Shanghai Jiaotong University;Shanghai jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;;sjtu.edu.cn;edu.cn;sjtu.edu.cn", "position": "PhD student;MS student;PhD student;;Undergrad student;Postdoc;Associate Professor", "bibtex": "@misc{\nliu2023temporary,\ntitle={Temporary feature collapse phenomenon in early learning of {MLP}s},\nauthor={Dongrui Liu and Shaobo Wang and Jie Ren and Kangrui Wang and Sheng Yin and Huiqi Deng and Quanshi Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=FLMvYXMucWk}\n}", "github": "", "project": "", "reviewers": "4Wyv;nGWd;4cUL;7KPL", "site": "https://openreview.net/forum?id=FLMvYXMucWk", "pdf_size": 7480528, "recommendation": "5;6;6;6", "confidence": "4;3;3;3", "correctness": "3;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "3;3;3;2", "wc_summary_paper": "57;90;59;59", "wc_strength_and_weaknesses": "485;131;288;153", "wc_clarity_quality_novelty_and_reproducibility": "78;14;109;33", "wc_summary_review": "101;37;37;143", "wc_review": "721;272;493;388", "wc_reply_reviewers": "88;0;110;18", "wc_reply_authors": "4161;1629;1520;1422", "reply_reviewers": "1;0;1;1", "reply_authors": "8;3;4;2", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 66.25, 13.736356867816154 ], "wc_strength_and_weaknesses_avg": [ 264.25, 140.91375908689685 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 58.5, 37.286056375004314 ], "wc_summary_review_avg": [ 79.5, 45.019440245298476 ], "wc_review_avg": [ 468.5, 165.41538622510302 ], "wc_reply_reviewers_avg": [ 54.0, 46.10856753359401 ], "wc_reply_authors_avg": [ 2183.0, 1144.3436983703803 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 4.25, 2.277608394786075 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4NUmBy3EZxAJ:scholar.google.com/&scioq=Temporary+feature+collapse+phenomenon+in+early+learning+of+MLPs&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "FLr9RRqbwB-", "title": "Batch Normalization and Bounded Activation Functions", "track": "main", "status": "Reject", "tldr": "With bounded activation functions, using batch normalization after activation functions is better because of asymmetric saturation and sparsity. ", "abstract": "Since Batch Normalization was proposed, it has been commonly located in front of activation functions, as proposed by the original paper. Swapping the order, i.e., using Batch Normalization after activation functions, has also been attempted, but it is generally not much different from the conventional order when ReLU is used. However, in the case of bounded activation functions like Tanh, we discovered that the swapped order achieves considerably better performance on various benchmarks and architectures than the conventional order. We report this remarkable phenomenon and closely examine what contributes to this performance improvement in this paper. One noteworthy thing about swapped models is the extreme saturation of activation values, which is usually considered harmful. Looking at the output distribution of individual activation functions, we found that many of them are highly asymmetrically saturated. The experiments inducing a different degree of asymmetric saturation support the hypothesis that asymmetric saturation helps improve performance. In addition, we found that Batch Normalization after bounded activation functions has another important effect: it relocates the asymmetrically saturated output of activation functions near zero. This enables the swapped model to have higher sparsity, further improving performance. Extensive experiments with Tanh, LeLecun Tanh, and Softsign show that the swapped models achieve improved performance with a high degree of asymmetric saturation. ", "keywords": "Batch Normalization;Activation Functions;Saturation;Sparsity", "primary_area": "", "supplementary_material": "/attachment/93a5b484f6aadf225e9d199888fb06a819999f74.zip", "author": "Dongjin Kim;Woojeong Kim;TaeJoo Park;Suhyun Kim", "authorids": "~Dongjin_Kim1;~Woojeong_Kim1;~TaeJoo_Park1;~Suhyun_Kim1", "gender": "M;F;M;", "homepage": ";https://sites.google.com/view/woojeongkim/;https://github.com/PTAEJOOO;https://kdst.tistory.com/", "dblp": ";243/0064;;45/6898-1", "google_scholar": ";fGCEQQgAAAAJ;;", "orcid": ";;;", "linkedin": "%EB%8F%99%EC%A7%84-%EA%B9%80-ba872923a/;woojeong-kim-072ab4160/;;", "or_profile": "~Dongjin_Kim1;~Woojeong_Kim1;~TaeJoo_Park1;~Suhyun_Kim1", "aff": "Korea University;Cornell University;Yonsei University;Korea Institute of Science and Technology", "aff_domain": "korea.ac.kr;cornell.edu;yonsei.ac.kr;kist.re.kr", "position": "MS student;PhD student;Undergrad student;Principal Researcher", "bibtex": "@misc{\nkim2023batch,\ntitle={Batch Normalization and Bounded Activation Functions},\nauthor={Dongjin Kim and Woojeong Kim and TaeJoo Park and Suhyun Kim},\nyear={2023},\nurl={https://openreview.net/forum?id=FLr9RRqbwB-}\n}", "github": "", "project": "", "reviewers": "DHjF;Qoao;AJ9Z;5V5C", "site": "https://openreview.net/forum?id=FLr9RRqbwB-", "pdf_size": 812663, "recommendation": "3;5;5;5", "confidence": "3;4;2;3", "correctness": "3;2;2;3", "technical_novelty": "2;3;4;3", "empirical_novelty": "3;2;2;1", "wc_summary_paper": "108;97;54;107", "wc_strength_and_weaknesses": "476;851;149;259", "wc_clarity_quality_novelty_and_reproducibility": "41;35;21;4", "wc_summary_review": "46;35;23;36", "wc_review": "671;1018;247;406", "wc_reply_reviewers": "250;0;0;0", "wc_reply_authors": "1122;1755;260;642", "reply_reviewers": "1;0;0;0", "reply_authors": "2;3;1;1", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 91.5, 22.073740054644116 ], "wc_strength_and_weaknesses_avg": [ 433.75, 268.09641456013543 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.25, 14.254385290148432 ], "wc_summary_review_avg": [ 35.0, 8.154753215150045 ], "wc_review_avg": [ 585.5, 292.04836928152844 ], "wc_reply_reviewers_avg": [ 62.5, 108.25317547305482 ], "wc_reply_authors_avg": [ 944.75, 558.6731490773474 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BJAbxFxSih0J:scholar.google.com/&scioq=Batch+Normalization+and+Bounded+Activation+Functions&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Korea University;Cornell University;Yonsei University;Korea Institute of Science and Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.korea.ac.kr;https://www.cornell.edu;https://www.yonsei.ac.kr;https://www.kist.re.kr", "aff_unique_abbr": "KU;Cornell;Yonsei;KIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "South Korea;United States" }, { "id": "FMEXgK9-I8", "title": "E$^2$: Entropy Discrimination and Energy Optimization for Source-free Universal Domain Adaptation", "track": "main", "status": "Withdraw", "tldr": "This paper presents a novel source-free universal domain adaptation method by combining two innovative components of confidence-guided Entropy discrimination and likelihood-induced Energy optimization.", "abstract": "Universal domain adaptation (UniDA) aims to tackle the knowledge transfer problem in the presence of both distribution and category shifts. Most existing UniDA methods are developed based on the accessibility assumption of source-domain data during target model adaptation, which may result in privacy policy violation and source-data transfer inefficiency. To address this issue, we propose a novel source-free UniDA method by confidence-guided entropy discrimination and likelihood-induced energy optimization. The entropy-based separation criterion to determine known- and unknown-class target data may be too conservative for known-class prediction. Thus, we derive the confidence-guided entropy by scaling the normalized prediction score with the known-class confidence, such that much more known-class samples are correctly predicted. Without source-domain data for distribution alignment, we constrain the target-domain marginal distribution by maximizing the known-class likelihood and minimizing the unknown-class one. Since the marginal distribution is difficult to estimate but can be written as a function of free energy, the likelihood-induced loss is changed to an equivalent form based on energy optimization. Theoretically, the proposed method amounts to decreasing and increasing internal energy of known and unknown classes in physics, respectively. Extensive experiments on four publicly available datasets demonstrate the superiority of our method for source-free UniDA.", "keywords": "Domain Adaptation;Confidence-guided Entropy;Energy-based Model", "primary_area": "", "supplementary_material": "", "author": "Meng Shen;Jinhua Ma;Pong chi Yuen", "authorids": "~Meng_Shen2;~Jinhua_Ma1;~Pong_chi_Yuen1", "gender": "F;M;M", "homepage": ";https://scholar.google.com/citations?user=nhghITMAAAAJ&hl=en;http://www.comp.hkbu.edu.hk/~pcyuen", "dblp": ";119/1514.html;y/PongChiYuen", "google_scholar": "DbqZf8kAAAAJ;nhghITMAAAAJ;https://scholar.google.com.tw/citations?user=CwhIcHkAAAAJ", "orcid": ";;0000-0002-9343-2202", "linkedin": ";;", "or_profile": "~Meng_Shen2;~Jinhua_Ma1;~Pong_chi_Yuen1", "aff": "Hong Kong Baptist University;SUN YAT-SEN UNIVERSITY;Hong Kong Baptist University", "aff_domain": "hkbu.edu.hk;sysu.edu.cn;hkbu.edu.hk", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@misc{\nshen2023e,\ntitle={E\\${\\textasciicircum}2\\$: Entropy Discrimination and Energy Optimization for Source-free Universal Domain Adaptation},\nauthor={Meng Shen and Jinhua Ma and Pong chi Yuen},\nyear={2023},\nurl={https://openreview.net/forum?id=FMEXgK9-I8}\n}", "github": "", "project": "", "reviewers": "6Gr7;f4Hb;WpbQ", "site": "https://openreview.net/forum?id=FMEXgK9-I8", "pdf_size": 1258387, "recommendation": "3;5;5", "confidence": "5;5;3", "correctness": "3;4;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "56;99;55", "wc_strength_and_weaknesses": "89;282;131", "wc_clarity_quality_novelty_and_reproducibility": "38;32;29", "wc_summary_review": "18;22;131", "wc_review": "201;435;346", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "518;1567;640", "reply_reviewers": "0;0;0", "reply_authors": "1;3;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 70.0, 20.51016008388688 ], "wc_strength_and_weaknesses_avg": [ 167.33333333333334, 82.8747381427068 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.0, 3.7416573867739413 ], "wc_summary_review_avg": [ 57.0, 52.351376931907595 ], "wc_review_avg": [ 327.3333333333333, 96.4376597715966 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 908.3333333333334, 468.40319763592083 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6670022395427716606&as_sdt=5,33&sciodt=0,33&hl=en&oe=ASCII", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Hong Kong Baptist University;Sun Yat-sen University", "aff_unique_dep": ";", "aff_unique_url": "https://www.hkbu.edu.hk;http://www.sysu.edu.cn", "aff_unique_abbr": "HKBU;SYSU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "FOeVcSmRAeQ", "title": "CUSTOMIZING PRE-TRAINED DIFFUSION MODELS FOR YOUR OWN DATA", "track": "main", "status": "Withdraw", "tldr": "We propose a method to utilize the pre-trained text-to-image diffusion models to generate a custom dataset.", "abstract": "Recently, several large-scale text-to-image diffusion models have been released, showing unprecedented performance. Since the shift from learning a task-specific model from scratch to leveraging pre-trained large-scale models is an inevitable trend in deep generative modeling, it is necessary to develop methods to better utilize these models. In this paper, we propose a method dubbed Diffusion model for Your Own Data (DYOD) that can effectively utilize a pre-trained text-to-image diffusion model to approximate the implicit distribution of a custom dataset. Specifically, we first obtain a text prompt that can best represent the custom dataset through optimization in the semantic latent space of the diffusion model. In order to be able to better control generative image content, in particular geometry of the objects, we show that the text prompt alone is not sufficient, but rather an informative initialization that can guide the pre-trained diffusion model is necessary. As representative examples, we demonstrate that learned distribution initialization from user's data set or an image initialization by user's sketch, photo, etc. serves the goal for customizing diffusion model for user's own data. Experiments show that the customized DYOD outperforms the Stable Diffusion baselines both qualitatively and quantitatively with accelerated sampling speed.", "keywords": "Diffusion models;score-based models;generative models;personalization", "primary_area": "", "supplementary_material": "", "author": "Sangyun Lee;Jong Chul Ye", "authorids": "~Sangyun_Lee1;~Jong_Chul_Ye1", "gender": ";M", "homepage": "https://sangyun884.github.io/about/;https://bispl.weebly.com/", "dblp": "87/8208;15/5613", "google_scholar": "CGFkx-IAAAAJ;HNMjoNEAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Sangyun_Lee1;~Jong_Chul_Ye1", "aff": "Soongsil University;Korea Advanced Institute of Science & Technology", "aff_domain": "ssu.ac.kr;kaist.ac.kr", "position": "Undergrad student;Full Professor", "bibtex": "@misc{\nlee2023customizing,\ntitle={{CUSTOMIZING} {PRE}-{TRAINED} {DIFFUSION} {MODELS} {FOR} {YOUR} {OWN} {DATA}},\nauthor={Sangyun Lee and Jong Chul Ye},\nyear={2023},\nurl={https://openreview.net/forum?id=FOeVcSmRAeQ}\n}", "github": "", "project": "", "reviewers": "2kk5;FqMS;a7dA;DSFB", "site": "https://openreview.net/forum?id=FOeVcSmRAeQ", "pdf_size": 5936041, "recommendation": "3;5;5;5", "confidence": "4;4;4;4", "correctness": "2;3;3;3", "technical_novelty": "1;3;3;3", "empirical_novelty": "1;3;3;3", "wc_summary_paper": "62;72;73;72", "wc_strength_and_weaknesses": "149;151;233;414", "wc_clarity_quality_novelty_and_reproducibility": "48;40;18;133", "wc_summary_review": "21;66;35;67", "wc_review": "280;329;359;686", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 69.75, 4.493050188902857 ], "wc_strength_and_weaknesses_avg": [ 236.75, 107.8016117690269 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.75, 43.694250193818405 ], "wc_summary_review_avg": [ 47.25, 19.879323429131084 ], "wc_review_avg": [ 413.5, 159.83507124533088 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:x6zus-tT0YAJ:scholar.google.com/&scioq=CUSTOMIZING+PRE-TRAINED+DIFFUSION+MODELS+FOR+YOUR+OWN+DATA&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Soongsil University;Korea Advanced Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.soongsil.ac.kr;https://www.kaist.ac.kr", "aff_unique_abbr": "SSU;KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "FPdDFUVYVPl", "title": "Is Self-Supervised Contrastive Learning More Robust Than Supervised Learning?", "track": "main", "status": "Reject", "tldr": "", "abstract": "Prior work on self-supervised contrastive learning has primarily focused on evaluating the recognition accuracy, but has overlooked other behavioral aspects. In addition to accuracy, distributional robustness plays a critical role in the reliability of machine learning models. We design and conduct a series of robustness tests to quantify the behavioral differences between contrastive learning and supervised learning to downstream and pre-training data distribution changes. These tests leverage data corruptions at multiple levels, ranging from pixel-level distortion to patch-level shuffling and to dataset-level distribution shift, including both natural and unnatural corruptions. Our tests unveil intriguing robustness behaviors of contrastive and supervised learning: while we generally observe that contrastive learning is more robust than supervised learning under downstream corruptions, we surprisingly discover the robustness vulnerability of contrastive learning under pixel and patch level corruptions during pre-training. Furthermore, we observe the higher dependence of contrastive learning on spatial image coherence information during pre-training, e.g., it is particularly sensitive to global patch shuffling. We explain these results by connecting to feature space uniformity and data augmentation. Our analysis has implications in improving the downstream robustness of supervised learning, and calls for more studies on understanding contrastive learning.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yuanyi Zhong;Haoran Tang;Junkun Chen;Jian Peng;Yu-Xiong Wang", "authorids": "~Yuanyi_Zhong1;~Haoran_Tang2;~Junkun_Chen2;~Jian_Peng1;~Yu-Xiong_Wang1", "gender": ";M;M;M;", "homepage": ";;https://scholar.google.com/citations?user=_m5__wUAAAAJ;http://jianpeng.web.engr.illinois.edu/;https://yxw.cs.illinois.edu/", "dblp": "194/2743;190/7703;333/0859;29/4181-1;35/10700", "google_scholar": "PtmjwooAAAAJ;7PC01c0AAAAJ;_m5__wUAAAAJ;https://scholar.google.com.tw/citations?user=4wcAVXAAAAAJ;T_Q-xDkAAAAJ", "orcid": ";;0000-0002-3431-0870;;", "linkedin": ";;junkun-chen-a24520167/;;", "or_profile": "~Yuanyi_Zhong1;~Haoran_Tang2;~Junkun_Chen2;~Jian_Peng1;~Yu-Xiong_Wang1", "aff": "University of Illinois Urbana Champaign;University of Pennsylvania;Facebook Switzerland;University of Illinois, Urbana Champaign;Department of Computer Science, University of Illinois Urbana-Champaign", "aff_domain": "illinois.edu;seas.upenn.edu;meta.com;illinois.edu;cs.illinois.edu", "position": "PhD student;MS student;Intern;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nzhong2023is,\ntitle={Is Self-Supervised Contrastive Learning More Robust Than Supervised Learning?},\nauthor={Yuanyi Zhong and Haoran Tang and Junkun Chen and Jian Peng and Yu-Xiong Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=FPdDFUVYVPl}\n}", "github": "", "project": "", "reviewers": "CG2S;J9U1;XV1R;fxue", "site": "https://openreview.net/forum?id=FPdDFUVYVPl", "pdf_size": 13465423, "recommendation": "3;5;5;5", "confidence": "3;5;3;3", "correctness": "3;4;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "103;81;35;109", "wc_strength_and_weaknesses": "458;277;185;322", "wc_clarity_quality_novelty_and_reproducibility": "232;43;11;49", "wc_summary_review": "31;75;57;8", "wc_review": "824;476;288;488", "wc_reply_reviewers": "144;0;330;0", "wc_reply_authors": "2422;1644;2142;1390", "reply_reviewers": "1;0;1;0", "reply_authors": "5;3;4;3", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 82.0, 29.068883707497267 ], "wc_strength_and_weaknesses_avg": [ 310.5, 98.439067447838 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.75, 86.80257772670119 ], "wc_summary_review_avg": [ 42.75, 25.439880109780393 ], "wc_review_avg": [ 519.0, 193.12949023906214 ], "wc_reply_reviewers_avg": [ 118.5, 135.52398311738037 ], "wc_reply_authors_avg": [ 1899.5, 405.1798983167847 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.75, 0.82915619758885 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14631284782651958860&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Pennsylvania;Meta", "aff_unique_dep": ";;Facebook", "aff_unique_url": "https://illinois.edu;https://www.upenn.edu;https://www.facebook.com", "aff_unique_abbr": "UIUC;UPenn;FB", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;Switzerland" }, { "id": "FPeVU4Y_Lo6", "title": "Newton Losses: Efficiently Including Second-Order Information into Gradient Descent", "track": "main", "status": "Reject", "tldr": "Applying Newton to the loss and gradient descent to the neural network.", "abstract": "We present Newton losses, a method for incorporating second-order information of losses by approximating them with quadratic functions. The presented method is applied only to the loss function and allows training the neural network with gradient descent. As loss functions are usually substantially cheaper to compute than the neural network, Newton losses can be used at a relatively small additional cost. We find that they yield superior performance, especially when applied to non-convex and hard-to-optimize loss functions such as algorithmic losses, which have been popularized in recent research.", "keywords": "differentiable algorithms;backpropagation;differentiable", "primary_area": "", "supplementary_material": "", "author": "Felix Petersen;Christian Borgelt;Tobias Sutter;Hilde Kuehne;Oliver Deussen", "authorids": "~Felix_Petersen1;~Christian_Borgelt1;~Tobias_Sutter1;~Hilde_Kuehne5;~Oliver_Deussen1", "gender": "Not Specified;M;M;F;M", "homepage": "http://www.petersen.ai/;https://www.borgelt.net/;https://sites.google.com/view/suttert/home;https://hildekuehne.github.io;https://graphics.uni-konstanz.de", "dblp": "230/3983;b/ChristianBorgelt.html;01/10961;45/4963;48/2158", "google_scholar": "v8Kat6YAAAAJ;https://scholar.google.de/citations?user=T50Bxb8AAAAJ;https://scholar.google.ch/citations?user=11gxHJIAAAAJ;pxhCcH0AAAAJ;https://scholar.google.de/scholar?hl=en", "orcid": ";;0000-0003-1226-6845;0000-0003-1079-4441;0000-0001-5803-2185", "linkedin": ";christian-borgelt-a2429071/;;hilde-kuehne-8b9aa661;", "or_profile": "~Felix_Petersen1;~Christian_Borgelt1;~Tobias_Sutter1;~Hilde_Kuehne5;~Oliver_Deussen1", "aff": "Stanford University;Paris-Lodron-University of Salzburg;Universit\u00e4t Konstanz;Goethe University Frankfurt;University of Konstanz", "aff_domain": "stanford.edu;sbg.ac.at;uni-konstanz.de;uni-frankfurt.de;uni-konstanz.de", "position": "Postdoc;Full Professor;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@misc{\npetersen2023newton,\ntitle={Newton Losses: Efficiently Including Second-Order Information into Gradient Descent},\nauthor={Felix Petersen and Christian Borgelt and Tobias Sutter and Hilde Kuehne and Oliver Deussen},\nyear={2023},\nurl={https://openreview.net/forum?id=FPeVU4Y_Lo6}\n}", "github": "", "project": "", "reviewers": "T2yc;fJV7;GdUr;qAQi", "site": "https://openreview.net/forum?id=FPeVU4Y_Lo6", "pdf_size": 381385, "recommendation": "3;3;3;5", "confidence": "4;2;5;3", "correctness": "4;2;3;2", "technical_novelty": "2;1;1;2", "empirical_novelty": "2;2;0;1", "wc_summary_paper": "64;29;33;92", "wc_strength_and_weaknesses": "228;264;138;156", "wc_clarity_quality_novelty_and_reproducibility": "87;40;9;135", "wc_summary_review": "31;15;9;25", "wc_review": "410;348;189;408", "wc_reply_reviewers": "142;0;102;0", "wc_reply_authors": "812;511;578;579", "reply_reviewers": "1;0;2;0", "reply_authors": "2;1;2;1", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 1.5, 0.5 ], "empirical_novelty_avg": [ 1.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 54.5, 25.53918557824427 ], "wc_strength_and_weaknesses_avg": [ 196.5, 51.504854140168185 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 67.75, 47.73559992290869 ], "wc_summary_review_avg": [ 20.0, 8.54400374531753 ], "wc_review_avg": [ 338.75, 89.97603847691896 ], "wc_reply_reviewers_avg": [ 61.0, 62.617888817813075 ], "wc_reply_authors_avg": [ 620.0, 114.22565386111826 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.2581988897471611, "corr_recommendation_correctness": -0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14648822801268472286&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Stanford University;Paris-Lodron-University of Salzburg;Universit\u00e4t Konstanz;Goethe University Frankfurt;University of Konstanz", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.stanford.edu;https://www.uni-salzburg.at;https://www.uni-konstanz.de;https://www.uni-frankfurt.de;https://www.uni-konstanz.de", "aff_unique_abbr": "Stanford;PLUS;Uni Konstanz;GU Frankfurt;Uni Konstanz", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Stanford;;Frankfurt", "aff_country_unique_index": "0;1;2;2;2", "aff_country_unique": "United States;Austria;Germany" }, { "id": "FQvAlf6xwTy", "title": "Convergence of Generative Deep Linear Networks Trained with Bures-Wasserstein Loss", "track": "main", "status": "Reject", "tldr": "We prove convergence of gradient decent optimization of generative deep linear networks trained with the Bures-Wasserstein loss. ", "abstract": "We consider a deep matrix factorization model of covariance matrices trained with the Bures-Wasserstein distance. While recent works have made important advances in the study of the optimization problem for overparametrized low-rank matrix approximation, much emphasis has been placed on discriminative settings and the square loss. In contrast, our model considers another interesting type of loss and connects with the generative setting. We characterize the critical points and minimizers of the Bures-Wasserstein distance over the space of rank-bounded matrices. For low-rank matrices the Hessian of this loss can blow up, which creates challenges to analyze convergence of optimizaton methods. We establish convergence results for gradient flow using a smooth perturbative version of the loss and convergence results for finite step size gradient descent under certain assumptions on the initial weights", "keywords": "deep linear network;low-rank approximation;Bures-Wasserstein distance;optimal transport;implicit generative model;critical points", "primary_area": "", "supplementary_material": "", "author": "Pierre Br\u00e9chet;Katerina Papagiannouli;Jing An;Guido Montufar", "authorids": "~Pierre_Br\u00e9chet1;katerina.papagiannouli@mis.mpg.de;~Jing_An2;~Guido_Montufar1", "gender": ";;F;M", "homepage": "https://github.com/brechetp;;http://sites.google.com/view/jingan;http://www.math.ucla.edu/~montufar/", "dblp": ";;;", "google_scholar": ";;FZEnNE0AAAAJ;https://scholar.google.de/citations?user=pDIuuVwAAAAJ", "orcid": ";;;0000-0002-0131-2669", "linkedin": ";;;", "or_profile": "~Pierre_Br\u00e9chet1;katerina.papagiannouli@mis.mpg.de;~Jing_An2;~Guido_Montufar1", "aff": "Max Planck Institute for Mathematics in the Sciences, Max-Planck Institute;;Duke University;UCLA ", "aff_domain": "mis.mpg;;duke.edu;math.ucla.edu", "position": "PhD student;;Postdoc;Associate Professor", "bibtex": "@misc{\nbr{\\'e}chet2023convergence,\ntitle={Convergence of Generative Deep Linear Networks Trained with Bures-Wasserstein Loss},\nauthor={Pierre Br{\\'e}chet and Katerina Papagiannouli and Jing An and Guido Montufar},\nyear={2023},\nurl={https://openreview.net/forum?id=FQvAlf6xwTy}\n}", "github": "", "project": "", "reviewers": "LN7P;5WMe;pMGB;8S82", "site": "https://openreview.net/forum?id=FQvAlf6xwTy", "pdf_size": 465292, "recommendation": "3;5;5;5", "confidence": "3;5;4;3", "correctness": "3;3;3;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "0;0;0;0", "wc_summary_paper": "41;85;99;96", "wc_strength_and_weaknesses": "317;405;355;199", "wc_clarity_quality_novelty_and_reproducibility": "48;20;39;148", "wc_summary_review": "9;18;40;26", "wc_review": "415;528;533;469", "wc_reply_reviewers": "44;74;60;108", "wc_reply_authors": "662;1332;1508;1707", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;4;4", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 80.25, 23.25268801665734 ], "wc_strength_and_weaknesses_avg": [ 319.0, 75.98684096605149 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.75, 49.68085647409875 ], "wc_summary_review_avg": [ 23.25, 11.388041973930374 ], "wc_review_avg": [ 486.25, 48.225382320931374 ], "wc_reply_reviewers_avg": [ 71.5, 23.595550427993835 ], "wc_reply_authors_avg": [ 1302.25, 392.73424538738664 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5222329678670935, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:M_JxLY3twF8J:scholar.google.com/&scioq=Convergence+of+Generative+Deep+Linear+Networks+Trained+with+Bures-Wasserstein+Loss&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Max Planck Institute for Mathematics in the Sciences;Duke University;University of California, Los Angeles", "aff_unique_dep": "Mathematics in the Sciences;;", "aff_unique_url": "https://www.mis.mpg.de;https://www.duke.edu;https://www.ucla.edu", "aff_unique_abbr": "MPI MIS;Duke;UCLA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Germany;United States" }, { "title": "Improved Convergence of Differential Private SGD with Gradient Clipping", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11841", "id": "FRLswckPXQ5", "poster": "", "openreview": "https://openreview.net/forum?id=FRLswckPXQ5", "slides": "https://iclr.cc/virtual/2023/poster/11841", "video": "https://iclr.cc/virtual/2023/poster/11841", "author_site": "Huang Fang, Xiaoyun Li, Chenglin Fan, Ping Li", "tldr": "", "abstract": "Differential private stochastic gradient descent (DP-SGD) with gradient clipping (DP-SGD-GC) is an effective optimization algorithm that can train machine learning models with a privacy guarantee. Despite the popularity of DP-SGD-GC, its convergence in unbounded domain without the Lipschitz continuous assumption is less-understood; existing analysis of DP-SGD-GC either impose additional assumptions or end up with an utility bound that involves an non-vanishing bias term. In this work, for smooth and unconstrained problems, we improve the current analysis and show that DP-SGD-GC can achieve a vanishing utility bound without any bias term. Furthermore, when the noise generated from subsampled gradients is light-tailed, we prove that DP-SGD-GC can achieve nearly the same utility bound as DP-SGD applies to the Lipschitz continuous objectives. As a by-product, we propose a new clipping technique, called value clipping, to mitigate the computational overhead caused by the classic gradient clipping. Experiments on standard benchmark datasets are conducted to support our analysis.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/005a1ba62f11ce86e19ef2996ebe57a03f12e66c.zip", "author": "Huang Fang;Xiaoyun Li;Chenglin Fan;Ping Li", "authorids": "~Huang_Fang1;~Xiaoyun_Li1;~Chenglin_Fan1;~Ping_Li3", "gender": "M;M;M;M", "homepage": "https://www.cs.ubc.ca/~hgfang;https://lixiaoyun0239.github.io/cv/;;http://www.stat.rutgers.edu/home/pingli/", "dblp": "17/7697;;76/8243.html;62/5860-1", "google_scholar": "SYYFwD8AAAAJ;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Huang_Fang1;~Xiaoyun_Li1;~Chenglin_Fan1;~Ping_Li3", "aff": "Baidu;Baidu;Research, Baidu;LinkedIn", "aff_domain": "baidu.com;baidu.com;research.baidu.com;linkedin.com", "position": "Researcher;Researcher;Visiting Researcher;Engineer", "bibtex": "@inproceedings{\nfang2023improved,\ntitle={Improved Convergence of Differential Private {SGD} with Gradient Clipping},\nauthor={Huang Fang and Xiaoyun Li and Chenglin Fan and Ping Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=FRLswckPXQ5}\n}", "github": "", "project": "", "reviewers": "aq32;49Nb;bKYj", "pdf_size": 1774072, "recommendation": "6;6;8", "confidence": "4;3;3", "correctness": "4;3;4", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;2", "wc_summary_paper": "115;119;71", "wc_strength_and_weaknesses": "168;262;160", "wc_clarity_quality_novelty_and_reproducibility": "18;154;366", "wc_summary_review": "39;21;71", "wc_review": "340;556;668", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "382;486;501", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 101.66666666666667, 21.74600857373345 ], "wc_strength_and_weaknesses_avg": [ 196.66666666666666, 46.31294515455575 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 179.33333333333334, 143.19528235556118 ], "wc_summary_review_avg": [ 43.666666666666664, 20.677416559027762 ], "wc_review_avg": [ 521.3333333333334, 136.13065619306897 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 456.3333333333333, 52.91712598225854 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11942574183327938930&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=FRLswckPXQ5", "email": "baidu.com;baidu.com;research.baidu.com;linkedin.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Baidu;LinkedIn Corporation", "aff_unique_dep": "Baidu, Inc.;", "aff_unique_url": "https://www.baidu.com;https://www.linkedin.com", "aff_unique_abbr": "Baidu;LinkedIn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "FU2FX1wDN2x", "title": "gGN: learning to represent nodes in directed graphs as low-rank Gaussian distributions", "track": "main", "status": "Withdraw", "tldr": "Representing graph nodes using low-rank Gaussian distributions", "abstract": "Unsupervised learning of node representations from knowledge graphs is critical for numerous downstream tasks, ranging from large-scale graph analysis to measuring semantic similarity between nodes. This study presents gGN as a novel representation that defines graph nodes as Gaussian distributions. Unlike existing representations that approximate such distributions using diagonal covariance matrices, our proposal approximates them using low-rank perturbations. We demonstrate that this low-rank approximation is more expressive and better suited to represent complex asymmetric relations between nodes. In addition, we provide a computationally affordable algorithm for learning the low-rank representations in an unsupervised fashion. This learning algorithm uses a novel loss function based on the reverse Kullback-Leibler divergence and two ranking metrics whose joint minimization results in node representations that preserve not only node depths but also local and global asymmetric relationships between nodes. We assessed the representation power of the low-rank approximation with an in-depth systematic empirical study. The results show that our proposal was significantly better than the diagonal approximation for preserving graph structures. Moreover, gGN also outperformed 17 methods on the downstream task of measuring semantic similarity between graph nodes.", "keywords": "knowledge graphs;representation learning;low-rank approximation;Gaussian distribution;deep learning", "primary_area": "", "supplementary_material": "", "author": "Alejandro A. Edera;Georgina Stegmayer;Diego H Milone", "authorids": "~Alejandro_A._Edera1;~Georgina_Stegmayer2;~Diego_H_Milone1", "gender": ";F;M", "homepage": "https://sinc.unl.edu.ar/staff/alejandro-edera/;https://sinc.unl.edu.ar/staff/georgina-stegmayer/;https://sinc.unl.edu.ar/staff/diego-milone/", "dblp": ";;08/4690", "google_scholar": ";;nLd_jfgAAAAJ", "orcid": ";;0000-0003-2182-4351", "linkedin": ";;", "or_profile": "~Alejandro_A._Edera1;~Georgina_Stegmayer2;~Diego_H_Milone1", "aff": ";Universidad Nacional del Litoral;Universidad Nacional del Littoral", "aff_domain": ";unl.edu.ar;unl.edu.ar", "position": ";Assistant Professor;Full Professor", "bibtex": "@misc{\nedera2023ggn,\ntitle={g{GN}: learning to represent nodes in directed graphs as low-rank Gaussian distributions},\nauthor={Alejandro A. Edera and Georgina Stegmayer and Diego H Milone},\nyear={2023},\nurl={https://openreview.net/forum?id=FU2FX1wDN2x}\n}", "github": "", "project": "", "reviewers": "Gpt7;zNK5;SWaC;KqLR", "site": "https://openreview.net/forum?id=FU2FX1wDN2x", "pdf_size": 6685836, "recommendation": "3;3;5;5", "confidence": "4;3;3;4", "correctness": "3;3;3;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "58;104;152;50", "wc_strength_and_weaknesses": "572;859;224;183", "wc_clarity_quality_novelty_and_reproducibility": "25;52;81;25", "wc_summary_review": "14;50;95;44", "wc_review": "669;1065;552;302", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 91.0, 40.80441152620633 ], "wc_strength_and_weaknesses_avg": [ 459.5, 275.7575928238423 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.75, 23.14492384951828 ], "wc_summary_review_avg": [ 50.75, 28.960101864461734 ], "wc_review_avg": [ 647.0, 275.3443298853274 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:24lFAOCSF30J:scholar.google.com/&scioq=gGN:+learning+to+represent+nodes+in+directed+graphs+as+low-rank+Gaussian+distributions&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Universidad Nacional del Litoral;Universidad Nacional del Littoral", "aff_unique_dep": ";", "aff_unique_url": "https://www.unl.edu.ar;https://www.unl.edu.ar", "aff_unique_abbr": "UNL;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Argentina" }, { "title": "CROM: Continuous Reduced-Order Modeling of PDEs Using Implicit Neural Representations", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12094", "id": "FUORz1tG8Og", "poster": "/media/PosterPDFs/ICLR%202023/12094.png?t=1680816720.6448858", "openreview": "https://openreview.net/forum?id=FUORz1tG8Og", "slides": "https://iclr.cc/virtual/2023/poster/12094", "video": "https://iclr.cc/virtual/2023/poster/12094", "author_site": "Peter Yichen Chen, Jinxu Xiang, Dong Heon Cho, Yue Chang, G Pershing, Henrique Maia, Maurizio Chiaramonte, Kevin Carlberg, Eitan Grinspun", "tldr": "We accelerate PDE solvers via rapid latent space traversal of continuous vector fields leveraging implicit neural representations.", "abstract": "The long runtime of high-fidelity partial differential equation (PDE) solvers makes them unsuitable for time-critical applications. We propose to accelerate PDE solvers using reduced-order modeling (ROM). Whereas prior ROM approaches reduce the dimensionality of discretized vector fields, our continuous reduced-order modeling (CROM) approach builds a low-dimensional embedding of the continuous vector fields themselves, not their discretization. We represent this reduced manifold using continuously differentiable neural fields, which may train on any and all available numerical solutions of the continuous system, even when they are obtained using diverse methods or discretizations. We validate our approach on an extensive range of PDEs with training data from voxel grids, meshes, and point clouds. Compared to prior discretization-dependent ROM methods, such as linear subspace proper orthogonal decomposition (POD) and nonlinear manifold neural-network-based autoencoders, CROM features higher accuracy, lower memory consumption, dynamically adaptive resolutions, and applicability to any discretization. For equal latent space dimension, CROM exhibits 79$\\times$ and 49$\\times$ better accuracy, and 39$\\times$ and 132$\\times$ smaller memory footprint, than POD and autoencoder methods, respectively. Experiments demonstrate 109$\\times$ and 89$\\times$ wall-clock speedups over unreduced models on CPUs and GPUs, respectively. Videos and codes are available on the project page: https://crom-pde.github.io", "keywords": "PDE;implicit neural representation;neural field;latent space traversal;reduced-order modeling;numerical methods", "primary_area": "", "supplementary_material": "/attachment/41475a8caf3c6970b181a03a01741218f62703b3.zip", "author": "Peter Yichen Chen;Jinxu Xiang;Dong Heon Cho;Yue Chang;G A Pershing;Henrique Teles Maia;Maurizio M Chiaramonte;Kevin Thomas Carlberg;Eitan Grinspun", "authorids": "~Peter_Yichen_Chen1;~Jinxu_Xiang1;~Dong_Heon_Cho2;~Yue_Chang1;~G_A_Pershing1;~Henrique_Teles_Maia1;~Maurizio_M_Chiaramonte1;~Kevin_Thomas_Carlberg1;~Eitan_Grinspun3", "gender": "M;M;M;F;Non-Binary;M;;;M", "homepage": "https://peterchencyc.com;;;https://changy1506.github.io/;https://github.com/gpershing;http://henrique.is/here;https://mchiaram.github.io;http://www.dgp.toronto.edu/~eitan;https://kevintcarlberg.net", "dblp": "230/7889;321/6636.html;;;;;;;", "google_scholar": "9TX3RmEAAAAJ;uYDfhx4AAAAJ;;;;9oRqw5YAAAAJ;6Y0LlQMAAAAJ;-HyEryoAAAAJ;HBfhtyEAAAAJ", "orcid": ";0009-0003-6230-6048;;;;;0000-0002-2529-3159;;", "linkedin": ";jinxu-xiang-0862631a2/;david-cho-003285131;;;henrique-t-maia;;;kevintcarlberg/", "or_profile": "~Peter_Yichen_Chen1;~Jinxu_Xiang1;~Dong_Heon_Cho2;~Yue_Chang1;~G_A_Pershing1;~Henrique_Teles_Maia1;~Maurizio_M_Chiaramonte1;~Eitan_Grinspun3;~Kevin_Carlberg1", "aff": "MIT;Tencent;Department of Computer Science, Duke University;University of Toronto;;;Stanford University;University of Toronto;Meta Facebook", "aff_domain": "csail.mit.edu;tencent.com;cs.duke.edu;utoronto.ca;;;stanford.edu;toronto.edu;meta.com", "position": "Postdoc;Researcher;PhD student;PhD student;;;PhD student;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nchen2023crom,\ntitle={{CROM}: Continuous Reduced-Order Modeling of {PDE}s Using Implicit Neural Representations},\nauthor={Peter Yichen Chen and Jinxu Xiang and Dong Heon Cho and Yue Chang and G A Pershing and Henrique Teles Maia and Maurizio M Chiaramonte and Kevin Thomas Carlberg and Eitan Grinspun},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=FUORz1tG8Og}\n}", "github": "", "project": "", "reviewers": "XNfR;7uQY;yRQN;YNJM;NfXC", "pdf_size": 10205999, "recommendation": "6;8;8;8;8", "confidence": "3;4;4;4;4", "correctness": "3;4;4;3;3", "technical_novelty": "3;3;3;3;3", "empirical_novelty": "3;3;3;4;3", "wc_summary_paper": "90;92;85;167;92", "wc_strength_and_weaknesses": "82;278;234;307;248", "wc_clarity_quality_novelty_and_reproducibility": "316;21;71;24;29", "wc_summary_review": "48;8;53;68;36", "wc_review": "536;399;443;566;405", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "856;429;409;635;254", "reply_reviewers": "0;0;0;0;0", "reply_authors": "2;1;1;1;1", "recommendation_avg": [ 7.6, 0.7999999999999999 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_paper_avg": [ 105.2, 31.00580590792634 ], "wc_strength_and_weaknesses_avg": [ 229.8, 78.07278655203744 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 92.2, 113.35854621509576 ], "wc_summary_review_avg": [ 42.6, 20.11566553708825 ], "wc_review_avg": [ 469.8, 68.65391467352754 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 516.6, 208.52875101529767 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.408248290463863, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17450317891497120706&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=FUORz1tG8Og", "email": "csail.mit.edu;tencent.com;cs.duke.edu;utoronto.ca;;;stanford.edu;toronto.edu;meta.com", "author_num": 9, "aff_unique_index": "0;1;2;3;4;3;5", "aff_unique_norm": "Massachusetts Institute of Technology;Tencent;Duke University;University of Toronto;Stanford University;Meta", "aff_unique_dep": ";Tencent Holdings Limited;Department of Computer Science;;;Meta Platforms, Inc.", "aff_unique_url": "https://web.mit.edu;https://www.tencent.com;https://www.duke.edu;https://www.utoronto.ca;https://www.stanford.edu;https://meta.com", "aff_unique_abbr": "MIT;Tencent;Duke;U of T;Stanford;Meta", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;0;2;0;2;0", "aff_country_unique": "United States;China;Canada" }, { "title": "A Statistical Framework for Personalized Federated Learning and Estimation: Theory, Algorithms, and Privacy", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10968", "id": "FUiDMCr_W4o", "poster": "/media/PosterPDFs/ICLR%202023/10968.png?t=1682612553.8774083", "openreview": "https://openreview.net/forum?id=FUiDMCr_W4o", "slides": "https://iclr.cc/virtual/2023/poster/10968", "video": "https://iclr.cc/virtual/2023/poster/10968", "author_site": "Kaan Ozkara, Antonious Bebawy, Deepesh Data, Suhas Diggavi", "tldr": "We utilize a statistical framework to enable our design of new personalized Federated Learning/Estimation algorithms with privacy guarantees.", "abstract": "A distinguishing characteristic of federated learning is that the (local) client data could have statistical heterogeneity. This heterogeneity has motivated the design of personalized learning, where individual (personalized) models are trained, through collaboration. There have been various personalization methods proposed in literature, with seemingly very different forms and methods ranging from use of a single global model for local regularization and model interpolation, to use of multiple global models for personalized clustering, etc. In this work, we begin with a statistical framework that unifies several different algorithms as well as suggest new algorithms. We apply our framework to personalized estimation, and connect it to the classical empirical Bayes' methodology. We develop novel private personalized estimation under this framework. We then use our statistical framework to propose new personalized learning algorithms, including AdaPeD based on information-geometry regularization, which numerically outperforms several known algorithms. We develop privacy for personalized learning methods with guarantees for user-level privacy and composition. We numerically evaluate the performance as well as the privacy for both the estimation and learning problems, demonstrating the advantages of our proposed methods.", "keywords": "Personalized Federated Learning;Personalized Statistical Estimation;Differential Privacy;Empirical/Hierarchical Bayes", "primary_area": "", "supplementary_material": "", "author": "Kaan Ozkara;Antonious M. Girgis;Deepesh Data;Suhas Diggavi", "authorids": "~Kaan_Ozkara1;~Antonious_M._Girgis1;~Deepesh_Data1;~Suhas_Diggavi1", "gender": ";M;M;", "homepage": ";https://www.linkedin.com/in/antoniousmamdouh/;https://sites.google.com/view/deepeshdata/;https://www.ee.ucla.edu/suhas-diggavi/", "dblp": ";183/6262;137/8017.html;d/SNDiggavi.html#j15", "google_scholar": "W-JoHj0AAAAJ;Oi7ZTFEAAAAJ;nI6-huIAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Kaan_Ozkara1;~Antonious_M._Girgis1;~Deepesh_Data1;~Suhas_Diggavi1", "aff": "University of California, Los Angeles;University of California, Los Angeles;Meta;University of California, Los Angeles", "aff_domain": "ucla.edu;ucla.edu;meta.com;ucla.edu", "position": "PhD student;PhD student;Researcher;Professor", "bibtex": "@inproceedings{\nozkara2023a,\ntitle={A Statistical Framework for Personalized Federated Learning and Estimation: Theory, Algorithms, and Privacy},\nauthor={Kaan Ozkara and Antonious M. Girgis and Deepesh Data and Suhas Diggavi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=FUiDMCr_W4o}\n}", "github": "", "project": "", "reviewers": "MwhW;3hXR;zS9y;t7xg", "pdf_size": 857367, "recommendation": "5;6;6;6", "confidence": "3;3;3;4", "correctness": "3;3;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "47;71;43;122", "wc_strength_and_weaknesses": "195;85;226;188", "wc_clarity_quality_novelty_and_reproducibility": "8;68;17;229", "wc_summary_review": "70;22;19;15", "wc_review": "320;246;305;554", "wc_reply_reviewers": "0;0;29;0", "wc_reply_authors": "620;272;281;1146", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;2", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 70.75, 31.467244874631145 ], "wc_strength_and_weaknesses_avg": [ 173.5, 53.05892950295925 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 80.5, 88.7369708746022 ], "wc_summary_review_avg": [ 31.5, 22.36626924634504 ], "wc_review_avg": [ 356.25, 117.47419929499414 ], "wc_reply_reviewers_avg": [ 7.25, 12.55736835487436 ], "wc_reply_authors_avg": [ 579.75, 355.74595921809146 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17099772895062179122&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=FUiDMCr_W4o", "email": "ucla.edu;ucla.edu;meta.com;ucla.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of California, Los Angeles;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.ucla.edu;https://meta.com", "aff_unique_abbr": "UCLA;Meta", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "PAC Reinforcement Learning for Predictive State Representations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11113", "id": "FVW7Mi2ph6C", "poster": "", "openreview": "https://openreview.net/forum?id=FVW7Mi2ph6C", "slides": "https://iclr.cc/virtual/2023/poster/11113", "video": "https://iclr.cc/virtual/2023/poster/11113", "author_site": "Wenhao Zhan, Masatoshi Uehara, Wen Sun, Jason Lee", "tldr": "PAC learning for PSRs.", "abstract": "In this paper we study online Reinforcement Learning (RL) in partially observable dynamical systems. We focus on the Predictive State Representations (PSRs) model, which is an expressive model that captures other well-known models such as Partially Observable Markov Decision Processes (POMDP). PSR represents the states using a set of predictions of future observations and is defined entirely using observable quantities. We develop a novel model-based algorithm for PSRs that can learn a near optimal policy in sample complexity scaling polynomially with respect to all the relevant parameters of the systems. Our algorithm naturally works with function approximation to extend to systems with potentially large state and observation spaces. We show that given a realizable model class, the sample complexity of learning the near optimal policy only scales polynomially with respect to the statistical complexity of the model class, without any explicit polynomial dependence on the size of the state and observation spaces. Notably, our work is the first work that shows polynomial sample complexities to compete with the globally optimal policy in PSRs. Finally, we demonstrate how our general theorem can be directly used to derive sample complexity bounds for special models including $m$-step weakly revealing and $m$-step decodable tabular POMDPs, POMDPs with low-rank latent transition, and POMDPs with linear emission and latent transition. ", "keywords": "Reinforcement learning theory (statistical learning theory)", "primary_area": "", "supplementary_material": "", "author": "Wenhao Zhan;Masatoshi Uehara;Wen Sun;Jason D. Lee", "authorids": "~Wenhao_Zhan1;~Masatoshi_Uehara1;~Wen_Sun1;~Jason_D._Lee1", "gender": "M;M;;M", "homepage": ";https://www.masatoshiuehara.com/;https://wensun.github.io;https://jasondlee88.github.io/", "dblp": "275/3558;225/6517;;88/3262", "google_scholar": ";https://scholar.google.co.jp/citations?user=xuLKJboAAAAJ;iOLC30YAAAAJ;GR_DsT0AAAAJ", "orcid": ";0000-0001-9017-3105;;", "linkedin": ";;;", "or_profile": "~Wenhao_Zhan1;~Masatoshi_Uehara1;~Wen_Sun1;~Jason_D._Lee1", "aff": "Princeton University;Cornell University;Cornell University;Princeton University", "aff_domain": "princeton.edu;cornell.edu;cornell.edu;princeton.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhan2023pac,\ntitle={{PAC} Reinforcement Learning for Predictive State Representations},\nauthor={Wenhao Zhan and Masatoshi Uehara and Wen Sun and Jason D. Lee},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=FVW7Mi2ph6C}\n}", "github": "", "project": "", "reviewers": "YxZB;XgNc;gdmT", "pdf_size": 620973, "recommendation": "5;6;8", "confidence": "3;3;5", "correctness": "3;4;4", "technical_novelty": "2;2;3", "empirical_novelty": "0;3;0", "wc_summary_paper": "75;187;127", "wc_strength_and_weaknesses": "437;313;226", "wc_clarity_quality_novelty_and_reproducibility": "117;14;12", "wc_summary_review": "40;44;25", "wc_review": "669;558;390", "wc_reply_reviewers": "251;0;82", "wc_reply_authors": "1937;1014;734", "reply_reviewers": "1;0;1", "reply_authors": "5;3;2", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 129.66666666666666, 45.7626728046147 ], "wc_strength_and_weaknesses_avg": [ 325.3333333333333, 86.58072662101088 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.666666666666664, 49.03286879453641 ], "wc_summary_review_avg": [ 36.333333333333336, 8.178562764256865 ], "wc_review_avg": [ 539.0, 114.69088891450794 ], "wc_reply_reviewers_avg": [ 111.0, 104.50199360139818 ], "wc_reply_authors_avg": [ 1228.3333333333333, 513.9755722686525 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 1.247219128924647 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.944911182523068, "corr_recommendation_correctness": 0.7559289460184545, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12978248765007981196&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=FVW7Mi2ph6C", "email": "princeton.edu;cornell.edu;cornell.edu;princeton.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Princeton University;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;https://www.cornell.edu", "aff_unique_abbr": "Princeton;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "FWPLpE981t", "title": "Learning to Counter: Stochastic Feature-based Learning for Diverse Counterfactual Explanations", "track": "main", "status": "Reject", "tldr": "", "abstract": "Interpretable machine learning seeks to understand the reasoning process of complex black-box systems that are long notorious for lack of explainability. One growing interpreting approach is through counterfactual explanations, which go beyond why a system arrives at a certain decision to further provide suggestions on what a user can do to alter the outcome. A counterfactual example must be able to counter the original prediction from the black-box classifier, while also satisfying various constraints for practical applications. These constraints exist at trade-offs between one and another presenting radical challenges to existing works. To this end, we propose a stochastic learning-based framework that effectively balances the counterfactual trade-offs. The framework consists of a generation and a feature selection module with complementary roles: the former aims to model the distribution of valid counterfactuals whereas the latter serves to enforce additional constraints in a way that allows for differentiable training and amortized optimization. We demonstrate the effectiveness of our method in generating actionable and plausible counterfactuals that are more diverse than the existing methods and particularly in a more efficient manner than counterparts of the same capacity.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/606f684239649891c98034de2008c8752bccbe96.zip", "author": "Vy Vo;Trung Le;Van Nguyen;He Zhao;Edwin V. Bonilla;Gholamreza Haffari;Dinh Phung", "authorids": "~Vy_Vo2;~Trung_Le2;~Van_Nguyen2;~He_Zhao1;~Edwin_V._Bonilla1;~Gholamreza_Haffari2;~Dinh_Phung2", "gender": "F;M;M;;;;", "homepage": "https://isvy08.github.io/;;;;;;", "dblp": "176/4660;;;;;;", "google_scholar": "3CpFpFkAAAAJ;https://scholar.google.com/citations?hl=en;KPpmKZ0AAAAJ;;;;", "orcid": ";;0000-0002-5838-3409;;;;", "linkedin": ";;;;;;", "or_profile": "~Vy_Vo2;~Trung_Le2;~Van_Nguyen2;~He_Zhao1;~Edwin_V._Bonilla1;~Gholamreza_Haffari2;~Dinh_Phung2", "aff": "Monash University;Monash University;Monash University;;;;", "aff_domain": "monash.edu;monash.edu;monash.edu;;;;", "position": "PhD student;Assistant Professor;Postdoc;;;;", "bibtex": "@misc{\nvo2023learning,\ntitle={Learning to Counter: Stochastic Feature-based Learning for Diverse Counterfactual Explanations},\nauthor={Vy Vo and Trung Le and Van Nguyen and He Zhao and Edwin V. Bonilla and Gholamreza Haffari and Dinh Phung},\nyear={2023},\nurl={https://openreview.net/forum?id=FWPLpE981t}\n}", "github": "", "project": "", "reviewers": "DTNp;67mR;Xqce;nrfX", "site": "https://openreview.net/forum?id=FWPLpE981t", "pdf_size": 394749, "recommendation": "3;3;5;5", "confidence": "4;4;5;3", "correctness": "3;2;3;3", "technical_novelty": "1;2;2;2", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "45;117;95;127", "wc_strength_and_weaknesses": "195;268;195;150", "wc_clarity_quality_novelty_and_reproducibility": "244;99;76;34", "wc_summary_review": "16;49;64;81", "wc_review": "500;533;430;392", "wc_reply_reviewers": "51;0;0;0", "wc_reply_authors": "1167;1477;987;657", "reply_reviewers": "1;0;0;0", "reply_authors": "2;3;2;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 96.0, 31.63858403911275 ], "wc_strength_and_weaknesses_avg": [ 202.0, 42.30248219667494 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 113.25, 79.00435114599702 ], "wc_summary_review_avg": [ 52.5, 23.921747427811372 ], "wc_review_avg": [ 463.75, 55.670346684747706 ], "wc_reply_reviewers_avg": [ 12.75, 22.083647796503186 ], "wc_reply_authors_avg": [ 1072.0, 296.8585521759479 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Dj2tBr6I35AJ:scholar.google.com/&scioq=Learning+to+Counter:+Stochastic+Feature-based+Learning+for+Diverse+Counterfactual+Explanations&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Monash University", "aff_unique_dep": "", "aff_unique_url": "https://www.monash.edu", "aff_unique_abbr": "Monash", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "id": "FWl6TFsE7Cp", "title": "Universal Mini-Batch Consistency for Set Encoding Functions", "track": "main", "status": "Reject", "tldr": "We propose a method to make arbitrary set functions produce consistent outputs given mini-batches of a set.", "abstract": "Previous works have established solid foundations for neural set functions, complete with architectures which preserve the necessary properties for operating on sets, such as invariance to permutations of the set elements. Subsequent work has highlighted the utility of Mini-Batch Consistency (MBC), the ability to sequentially process any permutation of a set partition scheme (e.g. streaming chunks of data) while maintaining consistency guarantees on the output, although there are limited options for MBC architectures. We propose a framework which can convert an arbitrary non-MBC model to one which satisfies MBC. In doing so, we allow all set functions to universally be considered in an MBC setting (UMBC). Additionally, we explore a Monte Carlo dropout strategy made possible by our framework which allows performing Monte Carlo dropout on streaming sets while never seeing the entire set at once. We validate UMBC with theoretical proofs, unit tests, and also provide qualitative/quantitative experiments on Gaussian data, clean and corrupted point cloud classification, and amortized clustering on ImageNet. Additionally, we investigate the probabilistic calibration of set-functions under test-time distributional shifts. Our results demonstrate the utility of universal mini-batch consistency, and we further discover that our dropout strategy improves uncertainty calibration.", "keywords": "set", "primary_area": "", "supplementary_material": "/attachment/5e82ac145fb9cb04530de57e05b2c7abe5a1adde.zip", "author": "Jeffrey Ryan Willette;Bruno Andreis;Juho Lee;Sung Ju Hwang", "authorids": "~Jeffrey_Ryan_Willette1;~Bruno_Andreis1;~Juho_Lee2;~Sung_Ju_Hwang1", "gender": "M;M;M;", "homepage": "https://jeffwillette.github.io;https://andreisbruno.github.io/;https://juho.lee.github.io;", "dblp": "286/0937;225/0404;55/3410-1;", "google_scholar": "https://scholar.google.com/citations?hl=en;WzQ_v4IAAAAJ;Py4URJUAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Jeffrey_Ryan_Willette1;~Bruno_Andreis1;~Juho_Lee2;~Sung_Ju_Hwang1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;", "position": "Student;PhD student;Assistant Professor;", "bibtex": "@misc{\nwillette2023universal,\ntitle={Universal Mini-Batch Consistency for Set Encoding Functions},\nauthor={Jeffrey Ryan Willette and Bruno Andreis and Juho Lee and Sung Ju Hwang},\nyear={2023},\nurl={https://openreview.net/forum?id=FWl6TFsE7Cp}\n}", "github": "", "project": "", "reviewers": "BZo6;SNvt;9w5z;cnQu", "site": "https://openreview.net/forum?id=FWl6TFsE7Cp", "pdf_size": 6289682, "recommendation": "3;5;5;5", "confidence": "3;4;2;3", "correctness": "2;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "273;99;69;122", "wc_strength_and_weaknesses": "735;244;182;1192", "wc_clarity_quality_novelty_and_reproducibility": "195;307;13;69", "wc_summary_review": "123;62;78;124", "wc_review": "1326;712;342;1507", "wc_reply_reviewers": "0;0;0;762", "wc_reply_authors": "1521;1264;630;2872", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;1;4", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 140.75, 78.6332467853134 ], "wc_strength_and_weaknesses_avg": [ 588.25, 409.14445798519625 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 146.0, 113.95174417269794 ], "wc_summary_review_avg": [ 96.75, 27.343874999714288 ], "wc_review_avg": [ 971.75, 467.9852428228907 ], "wc_reply_reviewers_avg": [ 190.5, 329.95567884187113 ], "wc_reply_authors_avg": [ 1571.75, 817.7451849445523 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1ulamLfrVUsJ:scholar.google.com/&scioq=Universal+Mini-Batch+Consistency+for+Set+Encoding+Functions&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "ERL-Re$^2$: Efficient Evolutionary Reinforcement Learning with Shared State Representation and Individual Policy Representation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12045", "id": "FYZCHEtt6H0", "poster": "/media/PosterPDFs/ICLR%202023/12045.png?t=1681100988.4979453", "openreview": "https://openreview.net/forum?id=FYZCHEtt6H0", "slides": "https://iclr.cc/virtual/2023/poster/12045", "video": "https://iclr.cc/virtual/2023/poster/12045", "author_site": "Jianye HAO, Pengyi Li, Hongyao Tang, YAN ZHENG, Xian Fu, Zhaopeng Meng", "tldr": "A novel and effective framework to fuse Reinforcement Learning and Evolutionary Algorithm for policy optimization.", "abstract": "Deep Reinforcement Learning (Deep RL) and Evolutionary Algorithm (EA) are two major paradigms of policy optimization with distinct learning principles, i.e., gradient-based v.s. gradient-free. An appealing research direction is integrating Deep RL and EA to devise new methods by fusing their complementary advantages. However, existing works on combining Deep RL and EA have two common drawbacks:1) the RL agent and EA agents learn their policies individually, neglecting efficient sharing of useful common knowledge; 2) parameter-level policy optimization guarantees no semantic level of behavior evolution for the EA side. In this paper, we propose Evolutionary Reinforcement Learning with Two-scale State Representation and Policy Representation (ERL-Re$^2$), a novel solution to the aforementioned two drawbacks. The key idea of ERL-Re$^2$ is two-scale representation: all EA and RL policies share the same nonlinear state representation while maintaining individual linear policy representations. The state representation conveys expressive common features of the environment learned by all the agents collectively; the linear policy representation provides a favorable space for efficient policy optimization, where novel behavior-level crossover and mutation operations can be performed. Moreover, the linear policy representation allows convenient generalization of policy fitness with the help of Policy-extended Value Function Approximator (PeVFA), further improving the sample efficiency of fitness estimation. The experiments on a range of continuous control tasks show that ERL-Re$^2$ consistently outperforms advanced baselines and achieves the State Of The Art (SOTA). Our code is available on https://github.com/yeshenpy/ERL-Re2.", "keywords": "Reinforcement Learning;Evolutionary Algorithm;Representation", "primary_area": "", "supplementary_material": "", "author": "Jianye HAO;Pengyi Li;Hongyao Tang;YAN ZHENG;Xian Fu;Zhaopeng Meng", "authorids": "~Jianye_HAO1;~Pengyi_Li1;~Hongyao_Tang1;~YAN_ZHENG1;~Xian_Fu1;~Zhaopeng_Meng1", "gender": "M;M;M;M;M;", "homepage": "http://www.icdai.org/jianye.html;https://yeshenpy.github.io/;https://bluecontra.github.io/;https://yanzzzzz.github.io;https://cyanwatts.github.io/;http://cic.tju.edu.cn/info/1104/1205.htm", "dblp": "21/7664.html;195/6948;220/4275;10/2381-2;54/1085;67/8175", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;yIqzRH4AAAAJ;https://scholar.google.com.hk/citations?user=tJuhd1kAAAAJ;https://scholar.google.com/citations?hl=zh-CN;", "orcid": "0000-0002-0422-8235;0009-0009-8546-2346;;;;", "linkedin": ";;;;;", "or_profile": "~Jianye_HAO1;~Pengyi_Li1;~Hongyao_Tang1;~YAN_ZHENG1;~Xian_Fu1;~Zhaopeng_Meng1", "aff": "Tianjin University;Tianjin University;College of Intelligence and Computing, Tianjin University;Tianjin Unibersity, China;Tianjin University;Tianjin University", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn", "position": "Associate Professor;PhD student;PhD student;Associate Professor;MS student;Full Professor", "bibtex": "@inproceedings{\nhao2023erlre,\ntitle={{ERL}-Re\\${\\textasciicircum}2\\$: Efficient Evolutionary Reinforcement Learning with Shared State Representation and Individual Policy Representation },\nauthor={Jianye HAO and Pengyi Li and Hongyao Tang and YAN ZHENG and Xian Fu and Zhaopeng Meng},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=FYZCHEtt6H0}\n}", "github": "", "project": "", "reviewers": "eTSz;oDSW;vR7r;ouqq", "pdf_size": 3339503, "recommendation": "5;6;6;8", "confidence": "4;4;3;4", "correctness": "3;4;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "1;3;3;3", "wc_summary_paper": "84;88;102;233", "wc_strength_and_weaknesses": "401;93;113;158", "wc_clarity_quality_novelty_and_reproducibility": "12;29;110;23", "wc_summary_review": "63;59;35;27", "wc_review": "560;269;360;441", "wc_reply_reviewers": "166;9;0;32", "wc_reply_authors": "3497;606;1022;787", "reply_reviewers": "1;1;0;1", "reply_authors": "7;2;3;3", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 126.75, 61.70646238442129 ], "wc_strength_and_weaknesses_avg": [ 191.25, 123.36607110547048 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.5, 38.87479903485033 ], "wc_summary_review_avg": [ 46.0, 15.329709716755891 ], "wc_review_avg": [ 407.5, 107.02452989852372 ], "wc_reply_reviewers_avg": [ 51.75, 66.98647251497873 ], "wc_reply_authors_avg": [ 1478.0, 1174.9640420029882 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 1.920286436967152 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.13245323570650439, "corr_recommendation_correctness": -0.13245323570650439, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8710139168545477409&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=FYZCHEtt6H0", "email": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Tianjin University", "aff_unique_dep": "", "aff_unique_url": "http://www.tju.edu.cn", "aff_unique_abbr": "TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "FZAKltxF4y2", "title": "The Multiple Subnetwork Hypothesis: Enabling Multidomain Learning by Isolating Task-Specific Subnetworks in Feedforward Neural Networks", "track": "main", "status": "Reject", "tldr": "In this paper, we test our \"Multiple Subnetwork Hypothesis,\" which proposes that it is possible to train unused weights within a pruned feedforward neural network to learn subsequent tasks.", "abstract": "Neural networks have seen an explosion of usage and research in the past decade, particularly within the domains of computer vision and natural language processing. However, only recently have advancements in neural networks yielded performance improvements beyond narrow applications and translated to expanded multitask models capable of generalizing across multiple data types and modalities. Simultaneously, it has been shown that neural networks are overparameterized to a high degree, and pruning techniques have proved capable of significantly reducing the number of active weights within the network while largely preserving performance. In this work, we identify a methodology and network representational structure which allows a pruned network to employ previously unused weights to learn subsequent tasks. We employ these methodologies on well-known benchmarking datasets for testing purposes and show that networks trained using our approaches are able to learn multiple tasks, which may be related or unrelated, in parallel or in sequence without sacrificing performance on any task or exhibiting catastrophic forgetting.", "keywords": "Neural Networks;Multitask Learning;Pruning", "primary_area": "", "supplementary_material": "", "author": "Jacob William Renn;Ian Sotnek;Benjamin Harvey;Brian Caffo", "authorids": "~Jacob_William_Renn1;ian.sotnek@squared.ai;benjamin.harvey@squared.ai;bcaffo1@jhu.edu", "gender": "M;;;", "homepage": ";;;", "dblp": ";;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": "jacob-renn-934009141/;;;", "or_profile": "~Jacob_William_Renn1;ian.sotnek@squared.ai;benjamin.harvey@squared.ai;bcaffo1@jhu.edu", "aff": "AI Squared, Inc.;;;", "aff_domain": "squared.ai;;;", "position": "Researcher;;;", "bibtex": "@misc{\nrenn2023the,\ntitle={The Multiple Subnetwork Hypothesis: Enabling Multidomain Learning by Isolating Task-Specific Subnetworks in Feedforward Neural Networks},\nauthor={Jacob William Renn and Ian Sotnek and Benjamin Harvey and Brian Caffo},\nyear={2023},\nurl={https://openreview.net/forum?id=FZAKltxF4y2}\n}", "github": "", "project": "", "reviewers": "aGRW;3BZv;yCzy;PPCR", "site": "https://openreview.net/forum?id=FZAKltxF4y2", "pdf_size": 450941, "recommendation": "3;3;3;5", "confidence": "4;3;4;4", "correctness": "2;3;2;3", "technical_novelty": "3;1;1;3", "empirical_novelty": "2;1;2;2", "wc_summary_paper": "179;57;39;68", "wc_strength_and_weaknesses": "589;111;122;316", "wc_clarity_quality_novelty_and_reproducibility": "235;43;118;6", "wc_summary_review": "158;135;57;10", "wc_review": "1161;346;336;400", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 1.0 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 85.75, 54.82415070021605 ], "wc_strength_and_weaknesses_avg": [ 284.5, 193.79176969107846 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 100.5, 87.51142782517036 ], "wc_summary_review_avg": [ 90.0, 59.451661036509314 ], "wc_review_avg": [ 560.75, 347.40853112725944 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2402951803319165689&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "AI Squared, Inc.", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "FZCFlj2_c7z", "title": "Jump-Start Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "Efficiently initializing reinforcement learning policies using a prior policy. ", "abstract": "Reinforcement learning (RL) provides a theoretical framework for continuously improving an agent\u2019s behavior via trial and error. However, efficiently learning policies from scratch can be very difficult, particularly for tasks that present exploration challenges. In such settings, it might be desirable to initialize RL with an existing policy, offline data, or demonstrations. However, naively performing such initialization in RL often works poorly, especially for value-based methods. In this paper, we present a meta algorithm that can use offline data, demonstrations, or a pre-existing policy to initialize an RL policy, and is compatible with any RL approach. In particular, we propose Jump-Start Reinforcement Learning (JSRL), an algorithm that employs two policies to solve tasks: a guide-policy, and an exploration-policy. By using the guide-policy to form a curriculum of starting states for the exploration-policy, we are able to efficiently improve performance on a set of simulated robotic tasks. We show via experiments that it is able to significantly outperform existing imitation and reinforcement learning algorithms, particularly in the small-data regime. In addition, we provide an upper bound on the sample complexity of JSRL and show that with the help of a guide-policy, one can improve the sample complexity for non-optimism exploration methods from exponential in horizon to polynomial.", "keywords": "reinforcement learning;offline reinforcement learning;fine-tuning", "primary_area": "", "supplementary_material": "", "author": "Ikechukwu Uchendu;Ted Xiao;Yao Lu;Banghua Zhu;Mengyuan Yan;Jos\u00e9phine Simon;Matthew Bennice;Chuyuan Fu;Cong Ma;Jiantao Jiao;Sergey Levine;Karol Hausman", "authorids": "~Ikechukwu_Uchendu1;~Ted_Xiao1;~Yao_Lu13;~Banghua_Zhu1;~Mengyuan_Yan1;josimon@google.com;~Matthew_Bennice1;~Chuyuan_Fu1;~Cong_Ma1;~Jiantao_Jiao1;~Sergey_Levine1;~Karol_Hausman2", "gender": "M;M;;M;F;;M;F;M;M;M;", "homepage": "https://ikeuchendu.com/;https://www.tedxiao.me;;https://people.eecs.berkeley.edu/~banghua/;;;;;https://congma1028.github.io/;https://scholar.google.com/citations?user=aO8KpGcAAAAJ&hl=en;https://people.eecs.berkeley.edu/~svlevine/;", "dblp": "215/4335;198/0598;26/5662-6;204/5394;164/5672;;;;42/10808;43/8919;80/7594;", "google_scholar": "KcPrLhIAAAAJ;;OI7zFmwAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;;;bDq7MZMAAAAJ;;aO8KpGcAAAAJ;8R35rCwAAAAJ;", "orcid": ";;;;;;;;;;;", "linkedin": "ikeuchendu/;;;;;;matt-b-51776837/;;;;;", "or_profile": "~Ikechukwu_Uchendu1;~Ted_Xiao1;~Yao_Lu13;~Banghua_Zhu1;~Mengyuan_Yan1;josimon@google.com;~Matthew_Bennice1;~Chuyuan_Fu1;~Cong_Ma1;~Jiantao_Jiao1;~Sergey_Levine1;~Karol_Hausman2", "aff": "Harvard University;;Google;University of California, Berkeley;Google;;;Google;University of Chicago;University of California, Berkeley;Google;", "aff_domain": "harvard.edu;;google.com;berkeley.edu;google.com;;;google.com;uchicago.edu;berkeley.edu;google.com;", "position": "PhD student;;Researcher;PhD student;Researcher;;;software engineer;Assistant Professor;Assistant Professor;Research Scientist;", "bibtex": "@misc{\nuchendu2023jumpstart,\ntitle={Jump-Start Reinforcement Learning},\nauthor={Ikechukwu Uchendu and Ted Xiao and Yao Lu and Banghua Zhu and Mengyuan Yan and Jos{\\'e}phine Simon and Matthew Bennice and Chuyuan Fu and Cong Ma and Jiantao Jiao and Sergey Levine and Karol Hausman},\nyear={2023},\nurl={https://openreview.net/forum?id=FZCFlj2_c7z}\n}", "github": "", "project": "", "reviewers": "WwEP;hK9Y;5862;3rP7", "site": "https://openreview.net/forum?id=FZCFlj2_c7z", "pdf_size": 2491337, "recommendation": "3;6;6;8", "confidence": "3;4;4;4", "correctness": "2;3;3;4", "technical_novelty": "2;3;4;3", "empirical_novelty": "2;3;4;3", "wc_summary_paper": "62;53;76;101", "wc_strength_and_weaknesses": "169;244;247;139", "wc_clarity_quality_novelty_and_reproducibility": "99;21;10;17", "wc_summary_review": "22;28;3;30", "wc_review": "352;346;336;287", "wc_reply_reviewers": "0;28;0;0", "wc_reply_authors": "932;1191;799;161", "reply_reviewers": "0;1;0;0", "reply_authors": "2;2;2;1", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 73.0, 18.12456896039186 ], "wc_strength_and_weaknesses_avg": [ 199.75, 46.97539249436879 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.75, 36.15504805694496 ], "wc_summary_review_avg": [ 20.75, 10.662434056068061 ], "wc_review_avg": [ 330.25, 25.616157010761782 ], "wc_reply_reviewers_avg": [ 7.0, 12.12435565298214 ], "wc_reply_authors_avg": [ 770.75, 379.21127027028086 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 12, 0 ], "corr_recommendation_confidence": 0.8892972917998875, "corr_recommendation_correctness": 0.9901475429766743, "gs_citation": 145, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15331057141078785906&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;2;1;1;3;2;1", "aff_unique_norm": "Harvard University;Google;University of California, Berkeley;University of Chicago", "aff_unique_dep": ";Google;;", "aff_unique_url": "https://www.harvard.edu;https://www.google.com;https://www.berkeley.edu;https://www.uchicago.edu", "aff_unique_abbr": "Harvard;Google;UC Berkeley;UChicago", "aff_campus_unique_index": "1;2;1;1;2;1", "aff_campus_unique": ";Mountain View;Berkeley", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Is the Performance of My Deep Network Too Good to Be True? A Direct Approach to Estimating the Bayes Error in Binary Classification", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10824", "id": "FZdJQgy05rz", "poster": "/media/PosterPDFs/ICLR%202023/10824.png?t=1680746333.2635193", "openreview": "https://openreview.net/forum?id=FZdJQgy05rz", "slides": "https://iclr.cc/virtual/2023/poster/10824", "video": "https://iclr.cc/virtual/2023/poster/10824", "author_site": "Takashi Ishida, Ikko Yamane, Nontawat Charoenphakdee, Gang Niu, Masashi Sugiyama", "tldr": "A simple and direct Bayes error estimator that just takes the mean of the labels that show uncertainty of the classes.", "abstract": "There is a fundamental limitation in the prediction performance that a machine learning model can achieve due to the inevitable uncertainty of the prediction target. In classification problems, this can be characterized by the Bayes error, which is the best achievable error with any classifier. The Bayes error can be used as a criterion to evaluate classifiers with state-of-the-art performance and can be used to detect test set overfitting. We propose a simple and direct Bayes error estimator, where we just take the mean of the labels that show \\emph{uncertainty} of the class assignments. Our flexible approach enables us to perform Bayes error estimation even for weakly supervised data. In contrast to others, our method is model-free and even instance-free. Moreover, it has no hyperparameters and gives a more accurate estimate of the Bayes error than several baselines empirically. Experiments using our method suggest that recently proposed deep networks such as the Vision Transformer may have reached, or is about to reach, the Bayes error for benchmark datasets. Finally, we discuss how we can study the inherent difficulty of the acceptance/rejection decision for scientific articles, by estimating the Bayes error of the ICLR papers from 2017 to 2023.", "keywords": "Bayes error;best achievable error;irreducible error", "primary_area": "", "supplementary_material": "", "author": "Takashi Ishida;Ikko Yamane;Nontawat Charoenphakdee;Gang Niu;Masashi Sugiyama", "authorids": "~Takashi_Ishida1;~Ikko_Yamane1;~Nontawat_Charoenphakdee1;~Gang_Niu1;~Masashi_Sugiyama1", "gender": "M;M;M;M;M", "homepage": "https://takashiishida.github.io/;https://i-yamane.github.io;https://nolfwin.github.io/;https://niug1984.github.io;http://www.ms.k.u-tokyo.ac.jp/sugi/", "dblp": "84/2290-1;162/6816;227/3074;26/3367-1;35/1228", "google_scholar": "IzoyKyUAAAAJ;IMrAPqkAAAAJ;https://scholar.google.co.jp/citations?user=sEFoFbgAAAAJ;https://scholar.google.co.jp/citations?user=HOkcy00AAAAJ;https://scholar.google.co.jp/citations?user=GkYIrlIAAAAJ", "orcid": ";;0000-0002-0214-4943;;0000-0001-6658-6743", "linkedin": ";;nontawat-charoenphakdee-b07b7385/;;", "or_profile": "~Takashi_Ishida1;~Ikko_Yamane1;~Nontawat_Charoenphakdee1;~Gang_Niu1;~Masashi_Sugiyama1", "aff": "The University of Tokyo;Ecole Nationale de la Statistique et de l'Analyse de l'information;Preferred Networks, Inc.;RIKEN;The University of Tokyo", "aff_domain": "tokyo.ac.jp;ensai.fr;preferred.jp;riken.jp;u-tokyo.ac.jp", "position": "Lecturer;Assistant Professor;Researcher;Research Scientist (tenured);Full Professor", "bibtex": "@inproceedings{\nishida2023is,\ntitle={Is the Performance of My Deep Network Too Good to Be True? A Direct Approach to Estimating the Bayes Error in Binary Classification},\nauthor={Takashi Ishida and Ikko Yamane and Nontawat Charoenphakdee and Gang Niu and Masashi Sugiyama},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=FZdJQgy05rz}\n}", "github": "", "project": "", "reviewers": "5ZHz;bzQr;YXWn;Y3qe", "pdf_size": 589218, "recommendation": "5;8;8;8", "confidence": "2;3;3;2", "correctness": "3;3;4;4", "technical_novelty": "3;3;3;4", "empirical_novelty": "2;4;3;4", "wc_summary_paper": "44;43;75;59", "wc_strength_and_weaknesses": "177;475;140;71", "wc_clarity_quality_novelty_and_reproducibility": "28;30;22;20", "wc_summary_review": "70;33;31;5", "wc_review": "319;581;268;155", "wc_reply_reviewers": "0;23;31;0", "wc_reply_authors": "760;723;713;277", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "recommendation_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 2.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 55.25, 13.045593125649749 ], "wc_strength_and_weaknesses_avg": [ 215.75, 154.43667796219913 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.0, 4.123105625617661 ], "wc_summary_review_avg": [ 34.75, 23.155722834755128 ], "wc_review_avg": [ 330.75, 156.19599066557373 ], "wc_reply_reviewers_avg": [ 13.5, 13.793114224133722 ], "wc_reply_authors_avg": [ 618.25, 197.7970866822866 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11394063579122935472&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=FZdJQgy05rz", "email": "tokyo.ac.jp;ensai.fr;preferred.jp;riken.jp;u-tokyo.ac.jp", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of Tokyo;Ecole Nationale de la Statistique et de l'Analyse de l'information;Preferred Networks, Inc.;RIKEN", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://ensai.fr;https://www.preferred-networks.com;https://www.riken.jp", "aff_unique_abbr": "UTokyo;ENSAI;PFN;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "Japan;France" }, { "title": "Parametrizing Product Shape Manifolds by Composite Networks", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11446", "id": "F_EhNDSamN", "poster": "/media/PosterPDFs/ICLR%202023/11446.png?t=1682342725.682383", "openreview": "https://openreview.net/forum?id=F_EhNDSamN", "slides": "https://iclr.cc/virtual/2023/poster/11446", "video": "https://iclr.cc/virtual/2023/poster/11446", "author_site": "Josua Sassen, Klaus Hildebrandt, Martin Rumpf, Benedikt Wirth", "tldr": "", "abstract": "Parametrizations of data manifolds in shape spaces can be computed using the rich toolbox of Riemannian geometry. This, however, often comes with high computational costs, which raises the question if one can learn an efficient neural network approximation. We show that this is indeed possible for shape spaces with a special product structure, namely those smoothly approximable by a direct sum of low-dimensional manifolds. Our proposed architecture leverages this structure by separately learning approximations for the low-dimensional factors and a subsequent combination. After developing the approach as a general framework, we apply it to a shape space of triangular surfaces. Here, typical examples of data manifolds are given through datasets of articulated models and can be factorized, for example, by a Sparse Principal Geodesic Analysis (SPGA). We demonstrate the effectiveness of our proposed approach with experiments on synthetic data as well as manifolds extracted from data via SPGA.", "keywords": "shape spaces;product manifolds;nonlinear statistics;low-dimensional data manifolds", "primary_area": "", "supplementary_material": "/attachment/53d366369d9788f1052b60d4bfa9c9cc517d2b5d.zip", "author": "Josua Sassen;Klaus Hildebrandt;Martin Rumpf;Benedikt Wirth", "authorids": "~Josua_Sassen1;~Klaus_Hildebrandt1;~Martin_Rumpf1;~Benedikt_Wirth1", "gender": "M;M;;M", "homepage": "https://josuasassen.com;https://graphics.tudelft.nl/klaus-hildebrandt/;;https://www.uni-muenster.de/AMM/wirth/", "dblp": "244/2856;;;20/723", "google_scholar": "MC1HRCMAAAAJ;https://scholar.google.com.tw/citations?user=qYZBq5cAAAAJ;;", "orcid": "0000-0002-8069-4713;;;", "linkedin": ";;;", "or_profile": "~Josua_Sassen1;~Klaus_Hildebrandt1;~Martin_Rumpf1;~Benedikt_Wirth1", "aff": "Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Delft University of Technology;;Westf\u00e4lische Wilhelms-Universit\u00e4t M\u00fcnster", "aff_domain": "uni-bonn.de;tudelft.nl;;uni-muenster.de", "position": "PhD student;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\nsassen2023parametrizing,\ntitle={Parametrizing Product Shape Manifolds by Composite Networks},\nauthor={Josua Sassen and Klaus Hildebrandt and Martin Rumpf and Benedikt Wirth},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=F_EhNDSamN}\n}", "github": "", "project": "", "reviewers": "VcAe;SjVS;ADhs", "pdf_size": 9428126, "recommendation": "5;8;8", "confidence": "3;3;3", "correctness": "3;4;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "51;193;130", "wc_strength_and_weaknesses": "262;330;273", "wc_clarity_quality_novelty_and_reproducibility": "46;18;56", "wc_summary_review": "33;48;85", "wc_review": "392;589;544", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "264;277;293", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 124.66666666666667, 58.09379389305616 ], "wc_strength_and_weaknesses_avg": [ 288.3333333333333, 29.80305726300948 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.0, 16.08311744241976 ], "wc_summary_review_avg": [ 55.333333333333336, 21.853044537445015 ], "wc_review_avg": [ 508.3333333333333, 84.28654828750685 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 278.0, 11.86029791643813 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Q8XHo-h9-RgJ:scholar.google.com/&scioq=Parametrizing+Product+Shape+Manifolds+by+Composite+Networks&hl=en&as_sdt=0,5", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=F_EhNDSamN", "email": "uni-bonn.de;tudelft.nl;;uni-muenster.de", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Delft University of Technology;Westf\u00e4lische Wilhelms-Universit\u00e4t M\u00fcnster", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-bonn.de/;https://www.tudelft.nl;https://www.uni-muenster.de", "aff_unique_abbr": "Uni Bonn;TU Delft;WWU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;Netherlands" }, { "id": "F_P8Dtg43vF", "title": "Spatio-temporal Self-Attention for Egocentric 3D Pose Estimation", "track": "main", "status": "Reject", "tldr": "spatio-temporal egocentric pose estimation using transformers.", "abstract": "Vision-based ego-centric 3D human pose estimation (ego-HPE) is essential to support critical applications of xR-technologies. However, severe self-occlusions and strong distortion introduced by the fish-eye view from the head mounted camera, make ego-HPE extremely challenging. While current state-of-the-art (SOTA) methods try to address the distortion, they still suffer from large errors in the most critical joints (such as hands) due to self-occlusions. To this end, we propose a spatio-temporal transformer model that can attend to semantically rich feature maps obtained from popular convolutional backbones. Leveraging the complex spatio-temporal information encoded in ego-centric videos, we design a spatial concept called feature map tokens (FMT) which can attend to all the other spatial units in our spatio-temporal feature maps. Powered by this FMT-based transformer, we build Egocentric Spatio-Temporal Self-Attention Network (Ego-STAN), which uses heatmap-based representations and spatio-temporal attention specialized to address distortions and self-occlusions in ego-HPE.\nOur quantitative evaluation on the contemporary sequential xR-EgoPose dataset, achieves a 38.2% improvement on the highest error joints against the SOTA ego-HPE model, while accomplishing a 22% decrease in the number of parameters. Finally, we also demonstrate the generalization capabilities of our model to real-world HPE tasks beyond ego-views.", "keywords": "pose estimation;egocentric vision;computer vision;self-attention;spatio-temporal data analysis", "primary_area": "", "supplementary_material": "/attachment/ec2fb28c4e6e4b9e7fd05669ddb61406a1a55c1b.zip", "author": "Jinman Park;Kimathi Kaai;Saad Hossain;Norikatsu Sumi;Sirisha Rambhatla;Paul W. Fieguth", "authorids": "~Jinman_Park1;~Kimathi_Kaai1;~Saad_Hossain1;~Norikatsu_Sumi1;~Sirisha_Rambhatla1;~Paul_W._Fieguth1", "gender": "M;M;M;M;F;", "homepage": ";https://kimathkaai.com;https://saad-hossain.github.io/;;;", "dblp": "81/3875;322/4009;322/3947;322/4041;123/4808.html;f/PWFieguth", "google_scholar": ";https://scholar.google.ca/citations?user=XpCEjJ4AAAAJ;;https://scholar.google.ca/citations?view_op=list_works;EOSZeBMAAAAJ;TObmBfYAAAAJ", "orcid": "0009-0003-0870-8185;;0009-0006-9844-8437;0000-0002-8716-1565;;0000-0001-7260-2260", "linkedin": "jinmanpark/;kimathikaai/;s42hossa/;;;paul-fieguth-1071461", "or_profile": "~Jinman_Park1;~Kimathi_Kaai1;~Saad_Hossain1;~Norikatsu_Sumi1;~Sirisha_Rambhatla1;~Paul_W._Fieguth1", "aff": "University of Waterloo;University of Waterloo;Deep Breathe;Nissan Motor Co., Ltd.;University of Waterloo;University of Waterloo", "aff_domain": "uwaterloo.ca;uwaterloo.ca;deepbreathe.ai;mail.nisssan.co.jp;uwaterloo.ca;uwaterloo.ca", "position": "PhD student;MS student;Intern;Researcher;Assistant Professor;Full Professor", "bibtex": "@misc{\npark2023spatiotemporal,\ntitle={Spatio-temporal Self-Attention for Egocentric 3D Pose Estimation},\nauthor={Jinman Park and Kimathi Kaai and Saad Hossain and Norikatsu Sumi and Sirisha Rambhatla and Paul W. Fieguth},\nyear={2023},\nurl={https://openreview.net/forum?id=F_P8Dtg43vF}\n}", "github": "", "project": "", "reviewers": "kGU6;cA4Q;uHJW", "site": "https://openreview.net/forum?id=F_P8Dtg43vF", "pdf_size": 4871890, "recommendation": "3;6;6", "confidence": "2;4;3", "correctness": "3;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;0;3", "wc_summary_paper": "39;61;61", "wc_strength_and_weaknesses": "336;198;52", "wc_clarity_quality_novelty_and_reproducibility": "57;33;18", "wc_summary_review": "35;12;35", "wc_review": "467;304;166", "wc_reply_reviewers": "34;0;0", "wc_reply_authors": "1842;561;611", "reply_reviewers": "1;0;0", "reply_authors": "5;1;1", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 53.666666666666664, 10.370899457402697 ], "wc_strength_and_weaknesses_avg": [ 195.33333333333334, 115.95784674709265 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.0, 16.06237840420901 ], "wc_summary_review_avg": [ 27.333333333333332, 10.842303978193728 ], "wc_review_avg": [ 312.3333333333333, 123.02393624367939 ], "wc_reply_reviewers_avg": [ 11.333333333333334, 16.027753706895076 ], "wc_reply_authors_avg": [ 1004.6666666666666, 592.4358380636862 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.8660254037844387, "corr_recommendation_correctness": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jaXXxIa3ciIJ:scholar.google.com/&scioq=Spatio-temporal+Self-Attention+for+Egocentric+3D+Pose+Estimation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "University of Waterloo;Deep Breathe;Nissan Motor Corporation", "aff_unique_dep": ";;", "aff_unique_url": "https://uwaterloo.ca;;https://www.nissan-global.com", "aff_unique_abbr": "UW;;Nissan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;2;0;0", "aff_country_unique": "Canada;;Japan" }, { "id": "FbC2VeNlth5", "title": "Differentiable Logic Programming for Probabilistic Reasoning", "track": "main", "status": "Reject", "tldr": "Learn Logic Rules for Reasoning", "abstract": "This paper studies inductive logic programming for probabilistic reasoning. The key problems, i.e. learning rule structures and learning rule weights, have been extensively studied with traditional discrete searching methods as well as recent neural-based approaches. In this paper, we present a new approach called Differentiable Logic Programming (DLP), which provides a flexible framework for learning first-order logical rules for reasoning. We propose a continuous version of optimization problem for learning high-quality rules as a proxy and generalize rule learning and forward chaining algorithms in a differentiable manner, which enables us to efficiently learn rule structures and weights via gradient-based methods. Theoretical analysis and empirical results show effectiveness of our approach.", "keywords": "Inductive Logic Programming;Differentiable Programming;Logic Rules", "primary_area": "", "supplementary_material": "/attachment/ef6ca2c1a191fbcdfdd57692389f05fef86b58a7.zip", "author": "Tuo Xu;Lei Zou", "authorids": "~Tuo_Xu1;~Lei_Zou2", "gender": "M;M", "homepage": "https://github.com/doujzc;https://www.wict.pku.edu.cn/zoulei/", "dblp": ";81/3390-1.html", "google_scholar": ";", "orcid": ";0000-0002-8586-4400", "linkedin": ";", "or_profile": "~Tuo_Xu1;~Lei_Zou2", "aff": "Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn", "position": "MS student;Full Professor", "bibtex": "@misc{\nxu2023differentiable,\ntitle={Differentiable Logic Programming for Probabilistic Reasoning},\nauthor={Tuo Xu and Lei Zou},\nyear={2023},\nurl={https://openreview.net/forum?id=FbC2VeNlth5}\n}", "github": "", "project": "", "reviewers": "kdMZ;qPBm;qAry;mi4W", "site": "https://openreview.net/forum?id=FbC2VeNlth5", "pdf_size": 1078560, "recommendation": "3;3;5;6", "confidence": "4;3;2;2", "correctness": "3;2;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "0;3;3;3", "wc_summary_paper": "88;46;61;14", "wc_strength_and_weaknesses": "225;641;332;77", "wc_clarity_quality_novelty_and_reproducibility": "51;51;20;23", "wc_summary_review": "61;47;9;22", "wc_review": "425;785;422;136", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1577;3675;1169;927", "reply_reviewers": "0;0;0;0", "reply_authors": "3;6;2;2", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 52.25, 26.7242867070386 ], "wc_strength_and_weaknesses_avg": [ 318.75, 206.91347829467273 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.25, 14.788086421170252 ], "wc_summary_review_avg": [ 34.75, 20.40067400847335 ], "wc_review_avg": [ 442.0, 230.20317113367489 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1837.0, 1086.2973810149779 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.8703882797784892, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:N5JSmy7P9_0J:scholar.google.com/&scioq=Differentiable+Logic+Programming+for+Probabilistic+Reasoning&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Accelerating Hamiltonian Monte Carlo via Chebyshev Integration Time", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11075", "id": "FbRY1XVfwK", "poster": "", "openreview": "https://openreview.net/forum?id=FbRY1XVfwK", "slides": "https://iclr.cc/virtual/2023/poster/11075", "video": "https://iclr.cc/virtual/2023/poster/11075", "author_site": "Jun-Kun Wang, Andre Wibisono", "tldr": "", "abstract": "Hamiltonian Monte Carlo (HMC) is a popular method in sampling. While there are quite a few works of studying this method on various aspects, an interesting question is how to choose its integration time to achieve acceleration. In this work, we consider accelerating the process of sampling from a distribution $\\pi(x) \\propto \\exp(-f(x))$ via HMC via time-varying integration time. When the potential $f$ is $L$-smooth and $m$-strongly convex, i.e. for sampling from a log-smooth and strongly log-concave target distribution $\\pi$, it is known that under a constant integration time, the number of iterations that ideal HMC takes to get an $\\epsilon$ Wasserstein-2 distance to the target $\\pi$ is $O( \\kappa \\log \\frac{1}{\\epsilon} )$, where $\\kappa := \\frac{L}{m}$ is the condition number. We propose a scheme of time-varying integration time based on the roots of Chebyshev polynomials. We show that in the case of quadratic potential $f$, i.e. when the target $\\pi$ is a Gaussian distribution, ideal HMC with this choice of integration time only takes $O( \\sqrt{\\kappa} \\log \\frac{1}{\\epsilon} )$ number of iterations to reach Wasserstein-2 distance less than $\\epsilon$; this improvement on the dependence on condition number is akin to acceleration in optimization. The design and analysis of HMC with the proposed integration time is built on the tools of Chebyshev polynomials. Experiments find the advantage of adopting our scheme of time-varying integration time even for sampling from distributions with smooth strongly convex potentials that are not quadratic. \n", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/dfc2a3fefefe29f69b37957760924757e8171e47.zip", "author": "Jun-Kun Wang;Andre Wibisono", "authorids": "~Jun-Kun_Wang1;~Andre_Wibisono1", "gender": "M;M", "homepage": "https://jimwang123.github.io/;http://www.cs.yale.edu/homes/wibisono/", "dblp": "153/5463;64/10962", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Jun-Kun_Wang1;~Andre_Wibisono1", "aff": "Yale University;Yale University", "aff_domain": "yale.edu;yale.edu", "position": "Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nwang2023accelerating,\ntitle={Accelerating Hamiltonian Monte Carlo via Chebyshev Integration Time},\nauthor={Jun-Kun Wang and Andre Wibisono},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=FbRY1XVfwK}\n}", "github": "", "project": "", "reviewers": "Kxzm;GRbp;dujW;XHWf", "pdf_size": 811555, "recommendation": "5;5;6;8", "confidence": "4;3;2;3", "correctness": "4;4;4;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "83;133;104;142", "wc_strength_and_weaknesses": "215;134;110;46", "wc_clarity_quality_novelty_and_reproducibility": "33;101;37;64", "wc_summary_review": "36;156;76;25", "wc_review": "367;524;327;277", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "3428;1587;352;793", "reply_reviewers": "0;0;0;0", "reply_authors": "7;5;2;2", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 115.5, 23.43608329051593 ], "wc_strength_and_weaknesses_avg": [ 126.25, 60.499483468869386 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 58.75, 27.151197027018902 ], "wc_summary_review_avg": [ 73.25, 51.40707636113923 ], "wc_review_avg": [ 373.75, 92.42125026204742 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1540.0, 1176.4465563721967 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 4.0, 2.1213203435596424 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.28867513459481287, "corr_recommendation_correctness": -0.9428090415820632, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6569217075753548162&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=FbRY1XVfwK", "email": "yale.edu;yale.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Yale University", "aff_unique_dep": "", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learnable Behavior Control: Breaking Atari Human World Records via Sample-Efficient Behavior Selection", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12208", "id": "FeWvD0L_a4", "poster": "/media/PosterPDFs/ICLR%202023/12208.png?t=1681959804.18974", "openreview": "https://openreview.net/forum?id=FeWvD0L_a4", "slides": "https://iclr.cc/virtual/2023/poster/12208", "video": "https://iclr.cc/virtual/2023/poster/12208", "author_site": "Jiajun Fan, Yuzheng Zhuang, Yuecheng Liu, Jianye HAO, Bin Wang, Jiangcheng Zhu, Hao Wang, Shu-Tao Xia", "tldr": "We have constructed a general framework to control the behaviors in RL and achieved SOTA performance in Atari 1B benchmark.", "abstract": "The exploration problem is one of the main challenges in deep reinforcement learning (RL). Recent promising works tried to handle the problem with population-based methods, which collect samples with diverse behaviors derived from a population of different exploratory policies. Adaptive policy selection has been adopted for behavior control. However, the behavior selection space is largely limited by the predefined policy population, which further limits behavior diversity. In this paper, we propose a general framework called Learnable Behavioral Control (LBC) to address the limitation, which a) enables a significantly enlarged behavior selection space via formulating a hybrid behavior mapping from all policies; b) constructs a unified learnable process for behavior selection. We introduce LBC into distributed off-policy actor-critic methods and achieve behavior control via optimizing the selection of the behavior mappings with bandit-based meta-controllers. Our agents have achieved 10077.52% mean human normalized score and surpassed 24 human world records within 1B training frames in the Arcade Learning Environment, which demonstrates our significant state-of-the-art (SOTA) performance without degrading the sample efficiency.", "keywords": "Deep Reinforcement Learning;The Arcade Learning Environment;Human World Records;Behavioral Control", "primary_area": "", "supplementary_material": "/attachment/6746eb48c97dfbd01313f76908b65a38fde4a943.zip", "author": "Jiajun Fan;Yuzheng Zhuang;Yuecheng Liu;Jianye HAO;Bin Wang;Jiangcheng Zhu;Hao Wang;Shu-Tao Xia", "authorids": "~Jiajun_Fan1;~Yuzheng_Zhuang1;~Yuecheng_Liu2;~Jianye_HAO1;~Bin_Wang12;~Jiangcheng_Zhu1;~Hao_Wang25;~Shu-Tao_Xia1", "gender": ";F;;M;M;M;;M", "homepage": ";;;http://www.icdai.org/jianye.html;http://binwang.top;;;https://www.sigs.tsinghua.edu.cn/xst/list.htm", "dblp": ";;;21/7664.html;13/1898-34;202/5904.html;;03/6195", "google_scholar": ";https://scholar.google.com/citations?hl=en;;;KWZG_YsAAAAJ;ZosT8hcAAAAJ;;https://scholar.google.com.hk/citations?user=koAXTXgAAAAJ", "orcid": ";;;0000-0002-0422-8235;0000-0002-0267-3749;;;0000-0002-8639-982X", "linkedin": ";;;;;https://cn.linkedin.com/in/%E7%96%86%E6%88%90-%E6%9C%B1-85672b169;;", "or_profile": "~Jiajun_Fan1;~Yuzheng_Zhuang1;~Yuecheng_Liu2;~Jianye_HAO1;~Bin_Wang12;~Jiangcheng_Zhu1;~Hao_Wang25;~Shu-Tao_Xia1", "aff": ";Huawei Technologies Ltd.;;Tianjin University;Huawei Noah's Ark Lab;Huawei Technologies Ltd.;;Shenzhen International Graduate School, Tsinghua University", "aff_domain": ";huawei.com;;tju.edu.cn;huawei.com;huawei.com;;sz.tsinghua.edu.cn", "position": ";Research Engineer;;Associate Professor;Senior Researcher;Researcher;;Full Professor", "bibtex": "@inproceedings{\nfan2023learnable,\ntitle={Learnable Behavior Control: Breaking Atari Human World Records via Sample-Efficient Behavior Selection},\nauthor={Jiajun Fan and Yuzheng Zhuang and Yuecheng Liu and Jianye HAO and Bin Wang and Jiangcheng Zhu and Hao Wang and Shu-Tao Xia},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=FeWvD0L_a4}\n}", "github": "", "project": "", "reviewers": "Spxv;qVG5;PtKB", "pdf_size": 884250, "recommendation": "8;8;10", "confidence": "4;4;5", "correctness": "4;4;3", "technical_novelty": "3;3;2", "empirical_novelty": "3;4;2", "wc_summary_paper": "63;88;39", "wc_strength_and_weaknesses": "100;206;963", "wc_clarity_quality_novelty_and_reproducibility": "137;279;66", "wc_summary_review": "32;70;82", "wc_review": "332;643;1150", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 8.666666666666666, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 63.333333333333336, 20.005554784164875 ], "wc_strength_and_weaknesses_avg": [ 423.0, 384.2820144980333 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 160.66666666666666, 88.55255815364994 ], "wc_summary_review_avg": [ 61.333333333333336, 21.31248981752771 ], "wc_review_avg": [ 708.3333333333334, 337.12740750180615 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.9999999999999998, "corr_recommendation_correctness": -0.9999999999999997, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9578396533066026541&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=FeWvD0L_a4", "email": ";huawei.com;;tju.edu.cn;huawei.com;huawei.com;;sz.tsinghua.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Huawei;Tianjin University;Tsinghua University", "aff_unique_dep": "Huawei Technologies;;Shenzhen International Graduate School", "aff_unique_url": "https://www.huawei.com;http://www.tju.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Huawei;TJU;THU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Knowledge Distillation based Degradation Estimation for Blind Super-Resolution", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12136", "id": "Fg3mYW8owg", "poster": "/media/PosterPDFs/ICLR%202023/12136.png?t=1680774367.718447", "openreview": "https://openreview.net/forum?id=Fg3mYW8owg", "slides": "https://iclr.cc/virtual/2023/poster/12136", "video": "https://iclr.cc/virtual/2023/poster/12136", "author_site": "Bin Xia, Yulun Zhang, Yitong Wang, Yapeng Tian, Wenming Yang, Radu Timofte, Luc Van Gool", "tldr": "We propose a knowledge distillation based blind super-resolution network, which can generalize to all degradation processes and achieve SOTA performance efficiently.", "abstract": "Blind image super-resolution (Blind-SR) aims to recover a high-resolution (HR) image from its corresponding low-resolution (LR) input image with unknown degradations. Most of the existing works design an explicit degradation estimator for each degradation to guide SR. However, it is infeasible to provide concrete labels of multiple degradation combinations (\\eg, blur, noise, jpeg compression) to supervise the degradation estimator training. In addition, these special designs for certain degradation, such as blur, impedes the models from being generalized to handle different degradations. To this end, it is necessary to design an implicit degradation estimator that can extract discriminative degradation representation for all degradations without relying on the supervision of degradation ground-truth. In this paper, we propose a Knowledge Distillation based Blind-SR network (KDSR). It consists of a knowledge distillation based implicit degradation estimator network (KD-IDE) and an efficient SR network. To learn the KDSR model, we first train a teacher network: KD-IDE$_{T}$. It takes paired HR and LR patches as inputs and is optimized with the SR network jointly. Then, we further train a student network KD-IDE$_{S}$, which only takes LR images as input and learns to extract the same implicit degradation representation (IDR) as KD-IDE$_{T}$. In addition, to fully use extracted IDR, we design a simple, strong, and efficient IDR based dynamic convolution residual block (IDR-DCRB) to build an SR network. We conduct extensive experiments under classic and real-world degradation settings. The results show that KDSR achieves SOTA performance and can generalize to various degradation processes. The source codes and pre-trained models will be released.", "keywords": "Image Super-Resolution", "primary_area": "", "supplementary_material": "/attachment/799ec29bf1038d624b0b117c3c73d49343f8e4e2.zip", "author": "Bin Xia;Yulun Zhang;Yitong Wang;Yapeng Tian;Wenming Yang;Radu Timofte;Luc Van Gool", "authorids": "~Bin_Xia2;~Yulun_Zhang1;~Yitong_Wang1;~Yapeng_Tian1;~Wenming_Yang1;~Radu_Timofte1;~Luc_Van_Gool1", "gender": "M;M;M;M;M;M;", "homepage": "https://github.com/Zj-BinXia;http://yulunzhang.com/;;http://www.yapengtian.com/;https://www.sigs.tsinghua.edu.cn/ywm_en/main.htm;https://www.informatik.uni-wuerzburg.de/computervision/;", "dblp": ";166/2763-1.html;;176/4020;75/2339.html;24/8616;61/5017", "google_scholar": "rh2fID8AAAAJ;ORmLjWoAAAAJ;NfFTKfYAAAAJ;lxCqdpoAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.ch/citations?user=u3MwH5kAAAAJ;https://scholar.google.be/citations?user=TwMib_QAAAAJ", "orcid": ";0000-0002-2288-5079;;;0000-0002-2506-1286;0000-0002-1478-0402;", "linkedin": ";yulun-zhang-1116b5b9/;;;;https://ch.linkedin.com/in/radutimofte;", "or_profile": "~Bin_Xia2;~Yulun_Zhang1;~Yitong_Wang1;~Yapeng_Tian1;~Wenming_Yang1;~Radu_Timofte1;~Luc_Van_Gool1", "aff": "Tsinghua University;Swiss Federal Institute of Technology;ByteDance Inc;University of Texas at Dallas;Tsinghua University,;Bayerische Julius-Maximilians-Universit\u00e4t W\u00fcrzburg;KU Leuven", "aff_domain": "tsinghua.edu.cn;ethz.ch;bytedance.com;utdallas.edu;tsinghua.edu.cn;uni-wuerzburg.de;kuleuven.be", "position": "MS student;Postdoc;Researcher;Assistant Professor;Associate Professor;Full Professor;Emeritus", "bibtex": "@inproceedings{\nxia2023knowledge,\ntitle={Knowledge Distillation based Degradation Estimation for Blind Super-Resolution},\nauthor={Bin Xia and Yulun Zhang and Yitong Wang and Yapeng Tian and Wenming Yang and Radu Timofte and Luc Van Gool},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Fg3mYW8owg}\n}", "github": "", "project": "", "reviewers": "GVEj;HYPQ;cRmZ;y283", "pdf_size": 6553998, "recommendation": "6;6;6;6", "confidence": "5;5;4;4", "correctness": "3;4;4;3", "technical_novelty": "2;3;3;2", "empirical_novelty": "3;0;3;2", "wc_summary_paper": "51;41;74;138", "wc_strength_and_weaknesses": "162;129;176;363", "wc_clarity_quality_novelty_and_reproducibility": "23;19;43;33", "wc_summary_review": "52;70;186;128", "wc_review": "288;259;479;662", "wc_reply_reviewers": "381;0;0;0", "wc_reply_authors": "1953;291;194;800", "reply_reviewers": "2;0;0;0", "reply_authors": "6;2;2;3", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 76.0, 37.74254893353124 ], "wc_strength_and_weaknesses_avg": [ 207.5, 91.3851738522174 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.5, 9.313968005098578 ], "wc_summary_review_avg": [ 109.0, 52.583267300539625 ], "wc_review_avg": [ 422.0, 162.3068082367465 ], "wc_reply_reviewers_avg": [ 95.25, 164.97783942093557 ], "wc_reply_authors_avg": [ 809.5, 699.1718315264138 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9401235376863544178&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=Fg3mYW8owg", "email": "tsinghua.edu.cn;ethz.ch;bytedance.com;utdallas.edu;tsinghua.edu.cn;uni-wuerzburg.de;kuleuven.be", "author_num": 7, "aff_unique_index": "0;1;2;3;0;4;5", "aff_unique_norm": "Tsinghua University;Swiss Federal Institute of Technology;ByteDance;University of Texas at Dallas;University of W\u00fcrzburg;Katholieke Universiteit Leuven", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.ethz.ch;https://www.bytedance.com;https://www.utdallas.edu;https://www.uni-wuerzburg.de;https://www.kuleuven.be", "aff_unique_abbr": "THU;ETH Zurich;ByteDance;UT Dallas;JMU;KU Leuven", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Dallas;W\u00fcrzburg", "aff_country_unique_index": "0;1;0;2;0;3;4", "aff_country_unique": "China;Switzerland;United States;Germany;Belgium" }, { "title": "On The Specialization of Neural Modules", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10955", "id": "Fh97BDaR6I", "poster": "/media/PosterPDFs/ICLR%202023/10955.png?t=1682631507.2782705", "openreview": "https://openreview.net/forum?id=Fh97BDaR6I", "slides": "https://iclr.cc/virtual/2023/poster/10955", "video": "https://iclr.cc/virtual/2023/poster/10955", "author_site": "Devon Jarvis, Richard Klein, Benjamin Rosman, Andrew Saxe", "tldr": "We use the linear neural networks framework to mathematically study the ability of neural modules to specialize and facilitate systematic generalization in modular network architectures.", "abstract": "A number of machine learning models have been proposed with the goal of achieving systematic generalization: the ability to reason about new situations by combining aspects of previous experiences. These models leverage compositional architectures which aim to learn specialized modules dedicated to structures in a task that can be composed to solve novel problems with similar structures. While the compositionality of these architectures is guaranteed by design, the modules specializing is not. Here we theoretically study the ability of network modules to specialize to useful structures in a dataset and achieve systematic generalization. To this end we introduce a minimal space of datasets motivated by practical systematic generalization benchmarks. From this space of datasets we present a mathematical definition of systematicity and study the learning dynamics of linear neural modules when solving components of the task. Our results shed light on the difficulty of module specialization, what is required for modules to successfully specialize, and the necessity of modular architectures to achieve systematicity. Finally, we confirm that the theoretical results in our tractable setting generalize to more complex datasets and non-linear architectures.", "keywords": "Systematic Generalization;Linear Neural Networks;Neural Module Networks", "primary_area": "", "supplementary_material": "/attachment/bf7f9111eb11d12a99b6b5e9c5d1907a7aa57181.zip", "author": "Devon Jarvis;Richard Klein;Benjamin Rosman;Andrew M Saxe", "authorids": "~Devon_Jarvis1;~Richard_Klein1;~Benjamin_Rosman1;~Andrew_M_Saxe1", "gender": "M;M;M;M", "homepage": "https://jarvisdevon.github.io/;https://www.wits.ac.za/staff/academic-a-z-listing/k/richardkleinwitsacza/;http://www.raillab.org;https://www.saxelab.org", "dblp": "320/3650;26/8293;45/4591;39/6894", "google_scholar": "https://scholar.google.co.za/citations?user=MJjN5nEAAAAJ;https://scholar.google.co.za/citations?user=QZ_MjosAAAAJ;https://scholar.google.co.za/citations?user=pWJ0SocAAAAJ;h0Al1fcAAAAJ", "orcid": "0000-0003-2362-7538;0000-0003-0783-2072;;0000-0002-9831-8812", "linkedin": "devon-jarvis-6b059a139;;;", "or_profile": "~Devon_Jarvis1;~Richard_Klein1;~Benjamin_Rosman1;~Andrew_M_Saxe1", "aff": "University College London, University of London;University of the Witwatersrand;University of the Witwatersrand;University College London, University of London", "aff_domain": "ucl.ac.uk;wits.ac.za;wits.ac.za;ucl.ac.uk", "position": "Researcher;Associate Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\njarvis2023on,\ntitle={On The Specialization of Neural Modules},\nauthor={Devon Jarvis and Richard Klein and Benjamin Rosman and Andrew M Saxe},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Fh97BDaR6I}\n}", "github": "", "project": "", "reviewers": "83KP;tTiF;rTdC", "pdf_size": 1721885, "recommendation": "5;6;8", "confidence": "3;2;4", "correctness": "3;4;4", "technical_novelty": "3;4;4", "empirical_novelty": "2;3;4", "wc_summary_paper": "389;83;66", "wc_strength_and_weaknesses": "627;893;129", "wc_clarity_quality_novelty_and_reproducibility": "212;56;115", "wc_summary_review": "34;149;40", "wc_review": "1262;1181;350", "wc_reply_reviewers": "0;406;0", "wc_reply_authors": "2366;2627;96", "reply_reviewers": "0;2;0", "reply_authors": "5;6;1", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 179.33333333333334, 148.419076342033 ], "wc_strength_and_weaknesses_avg": [ 549.6666666666666, 316.6589472743331 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 127.66666666666667, 64.31346843564124 ], "wc_summary_review_avg": [ 74.33333333333333, 52.854096866331524 ], "wc_review_avg": [ 931.0, 412.15773679502854 ], "wc_reply_reviewers_avg": [ 135.33333333333334, 191.39023544115884 ], "wc_reply_authors_avg": [ 1696.3333333333333, 1136.6120221469105 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 4.0, 2.160246899469287 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.6546536707079772, "corr_recommendation_correctness": 0.7559289460184545, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3811265072654282523&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=Fh97BDaR6I", "email": "ucl.ac.uk;wits.ac.za;wits.ac.za;ucl.ac.uk", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University College London;University of the Witwatersrand", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucl.ac.uk;https://www.wits.ac.za", "aff_unique_abbr": "UCL;Wits", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United Kingdom;South Africa" }, { "id": "FhYkgzYNMQ7", "title": "On Representation Learning in the First Layer of Deep CNNs and the Dynamics of Gradient Descent", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "It has previously been reported that the representation that is learned in the first layer of deep CNNs is very different from the initial representation and highly consistent across initialization and architecture. In this work, we quantify this consistency by considering the set of filters as a filter bank and measuring its energy distribution. We find that the energy distribution is remarkably consistent and try to determine the source of this consistency. We show that this consistency cannot be explained by the fact that CNNs learn a representation that is useful for recognition and that CNNs trained with fixed, random filters in the first layer yield comparable recognition performance to full learning. We then show that similar behavior occurs in simple, linear CNNs and obtain an analytical characterization of the energy profile of linear CNNs trained with gradient descent. Our analysis shows that the energy profile is determined by two factors (1) the correlation of the average patch and the class label and (2) an implicit bias given the dynamics of gradient descent. Finally, we show that in commonly used image recognition datasets the correlation between the average patch and the class label is very low and it is the implicit bias that best explains the consistency of representations observed in real-world CNNs.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/2d9481e017ac45bfa423e44cfeb7f6105b369dd1.zip", "author": "Rhea Chowers;Yair Weiss", "authorids": "~Rhea_Chowers1;~Yair_Weiss1", "gender": ";M", "homepage": ";http://www.cs.huji.ac.il/~yweiss/", "dblp": ";44/1092", "google_scholar": "pTu3JKYAAAAJ;https://scholar.google.com.tw/citations?user=9DXQi8gAAAAJ", "orcid": "0009-0001-5066-5794;", "linkedin": "https://il.linkedin.com/in/rhea-chowers-838938146?original_referer=https%3A%2F%2Fwww.google.com%2F;", "or_profile": "~Rhea_Chowers1;~Yair_Weiss1", "aff": "Hebrew University of Jerusalem;Hebrew University of Jerusalem", "aff_domain": "huji.ac.il;huji.ac.il", "position": "PhD student;Professor", "bibtex": "@misc{\nchowers2023on,\ntitle={On Representation Learning in the First Layer of Deep {CNN}s and the Dynamics of Gradient Descent},\nauthor={Rhea Chowers and Yair Weiss},\nyear={2023},\nurl={https://openreview.net/forum?id=FhYkgzYNMQ7}\n}", "github": "", "project": "", "reviewers": "zyNX;SEZy;6fQC;iCuX", "site": "https://openreview.net/forum?id=FhYkgzYNMQ7", "pdf_size": 2723112, "recommendation": "3;3;5;5", "confidence": "3;3;4;3", "correctness": "2;2;2;2", "technical_novelty": "3;2;3;2", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "48;156;191;75", "wc_strength_and_weaknesses": "150;519;418;77", "wc_clarity_quality_novelty_and_reproducibility": "76;45;120;473", "wc_summary_review": "40;33;60;49", "wc_review": "314;753;789;674", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;310;0;144", "reply_reviewers": "0;0;0;0", "reply_authors": "0;2;0;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 117.5, 58.139917440601856 ], "wc_strength_and_weaknesses_avg": [ 291.0, 182.88657687211492 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 178.5, 172.10534564620588 ], "wc_summary_review_avg": [ 45.5, 10.111874208078342 ], "wc_review_avg": [ 632.5, 188.53182755174257 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 113.5, 127.77617148748823 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.75, 0.82915619758885 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ZhfgHW9kqK0J:scholar.google.com/&scioq=On+Representation+Learning+in+the+First+Layer+of+Deep+CNNs+and+the+Dynamics+of+Gradient+Descent&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Hebrew University of Jerusalem", "aff_unique_dep": "", "aff_unique_url": "https://www.huji.ac.il", "aff_unique_abbr": "HUJI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Jerusalem", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "id": "Fj1S0SV8p3U", "title": "Augmentation Curriculum Learning For Generalization in RL", "track": "main", "status": "Reject", "tldr": "Combining data augmentation, reinforcement learning and curriculum learning for generalization in reinforcement learning", "abstract": "Many Reinforcement Learning tasks rely solely on pixel-based observations of\nthe environment. During deployment, these observations can fall victim to visual\nperturbations and distortions, causing the agent\u2019s policy to significantly degrade\nin performance. This motivates the need for robust agents that can generalize in\nthe face of visual distribution shift. One common technique for doing this is to ap-\nply augmentations during training; however, it comes at the cost of performance.\nWe propose Augmentation Curriculum Learning a novel curriculum learning ap-\nproach that schedules augmentation into training into a weak augmentation phase\nand strong augmentation phase. We also introduce a novel visual augmentation\nstrategy that proves to aid in the benchmarks we evaluate on. Our method achieves\nstate-of-the-art performance on Deep Mind Control Generalization Benchmark.", "keywords": "reinforcement learning;generalization;pixel-based RL;embodied learning", "primary_area": "", "supplementary_material": "/attachment/b93c90970819e55d851cd3c7ead6c916048bda57.zip", "author": "Dylan Yung;Andrew Szot;Prithvijit Chattopadhyay;Judy Hoffman;Zsolt Kira", "authorids": "~Dylan_Yung1;~Andrew_Szot1;~Prithvijit_Chattopadhyay1;~Judy_Hoffman1;~Zsolt_Kira1", "gender": ";M;M;F;M", "homepage": ";https://www.andrewszot.com;https://prithv1.xyz/;https://www.cc.gatech.edu/~judy/;https://faculty.cc.gatech.edu/~zk15", "dblp": ";;179/2452;45/10336;36/4127", "google_scholar": ";IwIWKPYAAAAJ;https://scholar.google.co.in/citations?user=rIK7AMkAAAAJ;mqpjAt4AAAAJ;2a5XgNAAAAAJ", "orcid": ";;;;0000-0002-2626-2004", "linkedin": "dylan-yung-4544a6149/;;;;", "or_profile": "~Dylan_Yung1;~Andrew_Szot1;~Prithvijit_Chattopadhyay1;~Judy_Hoffman1;~Zsolt_Kira1", "aff": ";Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Tech Research Institute", "aff_domain": ";gatech.edu;gatech.edu;gatech.edu;gtri.gatech.edu", "position": ";PhD student;PhD;Assistant Professor;Senior Research Scientist", "bibtex": "@misc{\nyung2023augmentation,\ntitle={Augmentation Curriculum Learning For Generalization in {RL}},\nauthor={Dylan Yung and Andrew Szot and Prithvijit Chattopadhyay and Judy Hoffman and Zsolt Kira},\nyear={2023},\nurl={https://openreview.net/forum?id=Fj1S0SV8p3U}\n}", "github": "", "project": "", "reviewers": "wrjJ;QnFd;3K31;iZtt", "site": "https://openreview.net/forum?id=Fj1S0SV8p3U", "pdf_size": 1525182, "recommendation": "3;5;5;6", "confidence": "4;3;2;3", "correctness": "4;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "104;82;87;195", "wc_strength_and_weaknesses": "267;287;431;266", "wc_clarity_quality_novelty_and_reproducibility": "179;52;93;92", "wc_summary_review": "109;57;39;120", "wc_review": "659;478;650;673", "wc_reply_reviewers": "507;17;0;8", "wc_reply_authors": "722;649;550;597", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 117.0, 45.765707685995636 ], "wc_strength_and_weaknesses_avg": [ 312.75, 68.78362813926 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 104.0, 46.351914739307155 ], "wc_summary_review_avg": [ 81.25, 34.07620137280563 ], "wc_review_avg": [ 615.0, 79.52043762455033 ], "wc_reply_reviewers_avg": [ 133.0, 216.0127311062938 ], "wc_reply_authors_avg": [ 629.5, 63.86117756509036 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.6488856845230502, "corr_recommendation_correctness": -0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8p6PuPXLGsUJ:scholar.google.com/&scioq=Augmentation+Curriculum+Learning+For+Generalization+in+RL&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Georgia Institute of Technology;Georgia Tech Research Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.gtri.gatech.edu", "aff_unique_abbr": "Georgia Tech;GTRI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "DreamFusion: Text-to-3D using 2D Diffusion", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10961", "id": "FjNys5c7VyY", "poster": "", "openreview": "https://openreview.net/forum?id=FjNys5c7VyY", "slides": "https://iclr.cc/virtual/2023/poster/10961", "video": "https://iclr.cc/virtual/2023/poster/10961", "author_site": "Ben Poole, Ajay Jain, Jonathan T. Barron, Ben Mildenhall", "tldr": "DeepDream on a pretrained 2D diffusion model enables text-to-3D synthesis", "abstract": "Recent breakthroughs in text-to-image synthesis have been driven by diffusion models trained on billions of image-text pairs. Adapting this approach to 3D synthesis would require large-scale datasets of labeled 3D or multiview data and efficient architectures for denoising 3D data, neither of which currently exist. In this work, we circumvent these limitations by using a pretrained 2D text-to-image diffusion model to perform text-to-3D synthesis. We introduce a loss based on probability density distillation that enables the use of a 2D diffusion model as a prior for optimization of a parametric image generator. Using this loss in a DeepDream-like procedure, we optimize a randomly-initialized 3D model (a Neural Radiance Field, or NeRF) via gradient descent such that its 2D renderings from random angles achieve a low loss. The resulting 3D model of the given text can be viewed from any angle, relit by arbitrary illumination, or composited into any 3D environment. Our approach requires no 3D training data and no modifications to the image diffusion model, demonstrating the effectiveness of pretrained image diffusion models as priors.", "keywords": "diffusion models;score-based generative models;NeRF;neural rendering;3d synthesis", "primary_area": "", "supplementary_material": "/attachment/2085ee205d8d430c77198b35a3a47a814259c85e.zip", "author": "Ben Poole;Ajay Jain;Jonathan T. Barron;Ben Mildenhall", "authorids": "~Ben_Poole1;~Ajay_Jain1;~Jonathan_T._Barron1;~Ben_Mildenhall1", "gender": "M;M;M;M", "homepage": "https://cs.stanford.edu/~poole;https://ajayj.com;https://bmild.github.io;https://jonbarron.info/", "dblp": "16/10397;;167/4350;30/9988", "google_scholar": "i5FMLA4AAAAJ;Ih7iLuUAAAAJ;NozIDL8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": ";ajay-jain;;", "or_profile": "~Ben_Poole1;~Ajay_Jain1;~Ben_Mildenhall1;~Jonathan_T_Barron2", "aff": "Google;University of California, Berkeley;Google;Google", "aff_domain": "google.com;berkeley.edu;google.com;google.com", "position": "Research Scientist;PhD student;Researcher;Research Scientist", "bibtex": "@inproceedings{\npoole2023dreamfusion,\ntitle={DreamFusion: Text-to-3D using 2D Diffusion},\nauthor={Ben Poole and Ajay Jain and Jonathan T. Barron and Ben Mildenhall},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=FjNys5c7VyY}\n}", "github": "", "project": "", "reviewers": "nnv7;Ccog;UY2y;44Jy", "pdf_size": 12735186, "recommendation": "8;8;8;8", "confidence": "4;3;4;4", "correctness": "3;3;4;4", "technical_novelty": "3;4;4;4", "empirical_novelty": "3;0;4;3", "wc_summary_paper": "42;77;64;76", "wc_strength_and_weaknesses": "208;161;351;209", "wc_clarity_quality_novelty_and_reproducibility": "20;64;15;65", "wc_summary_review": "53;33;15;86", "wc_review": "323;335;445;436", "wc_reply_reviewers": "0;39;0;21", "wc_reply_authors": "359;311;526;381", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 1.5 ], "wc_summary_paper_avg": [ 64.75, 14.095655359010449 ], "wc_strength_and_weaknesses_avg": [ 232.25, 71.25087718758274 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.0, 23.569047498785352 ], "wc_summary_review_avg": [ 46.75, 26.34743820563965 ], "wc_review_avg": [ 384.75, 56.00167408212008 ], "wc_reply_reviewers_avg": [ 15.0, 16.294170736800325 ], "wc_reply_authors_avg": [ 394.25, 80.16662335411165 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 2361, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9434943370687834774&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=FjNys5c7VyY", "email": "google.com;berkeley.edu;google.com;google.com", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Google;University of California, Berkeley", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.berkeley.edu", "aff_unique_abbr": "Google;UC Berkeley", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Mountain View;Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "FkRMv-mlSTy", "title": "Adaptive Computation with Elastic Input Sequence", "track": "main", "status": "Reject", "tldr": "We present a new perspective for embattling dynamic allocation of computation budget to different inputs via introducing elasticity to the input length.", "abstract": "When solving a problem, human beings have the adaptive ability in terms of the type of information they use, the procedure they take, and the amount of time they spend approaching and solving the problem. However, most standard neural networks have the same function type and fixed computation budget on different samples regardless of their nature and difficulty. Adaptivity is a powerful paradigm as it not only imbues practitioners with flexibility pertaining to the downstream usage of these models but can also serve as a powerful inductive bias for solving certain challenging classes of problems. In this work, we propose a new strategy, AdaTape, that enables dynamic computation in neural networks via adaptive tape tokens. AdaTape employs an elastic input sequence by equipping an existing architecture with a dynamic read and write tape. Specifically, we adaptively generate input sequences using tape tokens obtained from a tape bank that can either be trainable or generated from input data. We analyze the challenges and requirements to obtain dynamic sequence content and length, and propose the Adaptive Tape Reader (ATR) algorithm to achieve both objectives. Via extensive experiments on image recognition tasks, we show that AdaTape can achieve better performance while maintaining the computational cost.", "keywords": "Adaptive computation;dynamic allocation of computation budget.", "primary_area": "", "supplementary_material": "/attachment/d6fc4c180f265024f4c916c065584ce21965691d.zip", "author": "Fuzhao Xue;Valerii Likhosherstov;Anurag Arnab;Neil Houlsby;Yi Tay;Mostafa Dehghani;Yang You", "authorids": "~Fuzhao_Xue1;~Valerii_Likhosherstov2;~Anurag_Arnab1;~Neil_Houlsby1;~Yi_Tay1;~Mostafa_Dehghani1;~Yang_You1", "gender": "M;;;M;M;M;M", "homepage": "https://xuefuzhao.github.io/;https://valerytyumen.github.io/;;https://neilhoulsby.github.io/;http://yitay.net;http://mostafadehghani.com/;https://www.comp.nus.edu.sg/~youy/", "dblp": "248/1245;232/4391.html;;91/10669;;125/4062;33/8167-1.html", "google_scholar": "JMHsqIkAAAAJ;iiVVfxUAAAAJ;;https://scholar.google.com/citations?hl=en;VBclY_cAAAAJ;https://scholar.google.nl/citations?user=MiHOX3QAAAAJ;jF4dPZwAAAAJ", "orcid": ";;;;;;", "linkedin": "fuzhao-xue-6410561a6/;;;;;;yang-you-0b92914b/", "or_profile": "~Fuzhao_Xue1;~Valerii_Likhosherstov2;~Anurag_Arnab1;~Neil_Houlsby1;~Yi_Tay1;~Mostafa_Dehghani1;~Yang_You1", "aff": "National University of Singapore;Waymo;;Google;Google;Google DeepMind;National University of Singapore", "aff_domain": "nus.edu.sg;waymo.com;;google.com;google.com;google.com;nus.edu.sg", "position": "PhD student;Researcher;;Researcher;Research Scientist;Research Scientist;Professor", "bibtex": "@misc{\nxue2023adaptive,\ntitle={Adaptive Computation with Elastic Input Sequence},\nauthor={Fuzhao Xue and Valerii Likhosherstov and Anurag Arnab and Neil Houlsby and Yi Tay and Mostafa Dehghani and Yang You},\nyear={2023},\nurl={https://openreview.net/forum?id=FkRMv-mlSTy}\n}", "github": "", "project": "", "reviewers": "V6XW;rSKm;vn6P;pZjZ", "site": "https://openreview.net/forum?id=FkRMv-mlSTy", "pdf_size": 554811, "recommendation": "5;5;6;6", "confidence": "3;3;4;5", "correctness": "3;4;3;4", "technical_novelty": "2;3;4;4", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "86;68;64;74", "wc_strength_and_weaknesses": "55;177;476;413", "wc_clarity_quality_novelty_and_reproducibility": "18;28;43;22", "wc_summary_review": "55;52;50;76", "wc_review": "214;325;633;585", "wc_reply_reviewers": "0;0;74;0", "wc_reply_authors": "332;597;459;506", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 73.0, 8.306623862918075 ], "wc_strength_and_weaknesses_avg": [ 280.25, 171.27372098486094 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.75, 9.496709956611289 ], "wc_summary_review_avg": [ 58.25, 10.40132203135736 ], "wc_review_avg": [ 439.25, 175.05195657289866 ], "wc_reply_reviewers_avg": [ 18.5, 32.04293994002423 ], "wc_reply_authors_avg": [ 473.5, 95.57850176687224 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.9045340337332909, "corr_recommendation_correctness": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12362381145933542821&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;2;2;0", "aff_unique_norm": "National University of Singapore;Waymo;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.nus.edu.sg;https://www.waymo.com;https://www.google.com", "aff_unique_abbr": "NUS;Waymo;Google", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;2;0", "aff_country_unique": "Singapore;United States;United Kingdom" }, { "title": "Language Modelling with Pixels", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11621", "id": "FkSp8VW8RjH", "poster": "", "openreview": "https://openreview.net/forum?id=FkSp8VW8RjH", "slides": "https://iclr.cc/virtual/2023/poster/11621", "video": "https://iclr.cc/virtual/2023/poster/11621", "author_site": "Phillip Rust, Jonas F. Lotz, Emanuele Bugliarello, Elizabeth Salesky, Miryam de Lhoneux, Desmond Elliott", "tldr": "We train PIXEL, a language model that operates solely on images of rendered text, and show that it is possible to transfer representations across languages based on orthographic similarity or the co-activation of pixels.", "abstract": "Language models are defined over a finite set of inputs, which creates a vocabulary bottleneck when we attempt to scale the number of supported languages. Tackling this bottleneck results in a trade-off between what can be represented in the embedding matrix and computational issues in the output layer. This paper introduces PIXEL, the Pixel-based Encoder of Language, which suffers from neither of these issues. PIXEL is a pretrained language model that renders text as images, making it possible to transfer representations across languages based on orthographic similarity or the co-activation of pixels. PIXEL is trained to reconstruct the pixels of masked patches instead of predicting a distribution over tokens. We pretrain the 86M parameter PIXEL model on the same English data as BERT and evaluate on syntactic and semantic tasks in typologically diverse languages, including various non-Latin scripts. We find that PIXEL substantially outperforms BERT on syntactic and semantic processing tasks on scripts that are not found in the pretraining data, but PIXEL is slightly weaker than BERT when working with Latin scripts. Furthermore, we find that PIXEL is more robust than BERT to orthographic attacks and linguistic code-switching, further confirming the benefits of modelling language with pixels.", "keywords": "representation learning;nlp;transformers;language model;masked autoencoder", "primary_area": "", "supplementary_material": "", "author": "Phillip Rust;Jonas F. Lotz;Emanuele Bugliarello;Elizabeth Salesky;Miryam de Lhoneux;Desmond Elliott", "authorids": "~Phillip_Rust1;~Jonas_F._Lotz1;~Emanuele_Bugliarello1;~Elizabeth_Salesky1;~Miryam_de_Lhoneux1;~Desmond_Elliott1", "gender": ";M;M;;F;", "homepage": "https://phillip.rs;;http://e-bug.github.io/;https://esalesky.github.io;http://cl.lingfil.uu.se/~miryam/;", "dblp": "263/9843;;241/9497;184/8920;https://dblp.uni-trier.de/pid/163/1873.html;46/7536", "google_scholar": "6MxyDqcAAAAJ;rQi0nEcAAAAJ;9yc1aXYAAAAJ;9I7TjgMAAAAJ;Z2VK5nIAAAAJ;", "orcid": "0000-0001-5123-821X;0000-0001-6405-0590;0000-0002-2999-7081;0000-0001-6765-1447;0000-0001-8844-2126;", "linkedin": ";jonas-f-lotz-ab7805113/;emanuelebugliarello/;elizabeth-salesky;miryamdelhoneux/;", "or_profile": "~Phillip_Rust1;~Jonas_F._Lotz1;~Emanuele_Bugliarello1;~Elizabeth_Salesky1;~Miryam_de_Lhoneux1;~Desmond_Elliott1", "aff": "University of Copenhagen;University of Copenhagen;University of Copenhagen;Johns Hopkins University;KU Leuven;University of Copenhagen", "aff_domain": "ku.dk;diku.dk;ku.dk;jhu.edu;kuleuven.be;ku.dk", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nrust2023language,\ntitle={Language Modelling with Pixels},\nauthor={Phillip Rust and Jonas F. Lotz and Emanuele Bugliarello and Elizabeth Salesky and Miryam de Lhoneux and Desmond Elliott},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=FkSp8VW8RjH}\n}", "github": "", "project": "", "reviewers": "znCA;zqCz;2dJC;HZPD", "pdf_size": 2530290, "recommendation": "6;6;8;8", "confidence": "3;3;4;4", "correctness": "4;3;3;4", "technical_novelty": "3;4;4;3", "empirical_novelty": "3;4;4;4", "wc_summary_paper": "87;175;139;100", "wc_strength_and_weaknesses": "53;318;94;218", "wc_clarity_quality_novelty_and_reproducibility": "104;83;102;64", "wc_summary_review": "40;38;20;41", "wc_review": "284;614;355;423", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "607;993;631;654", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 125.25, 34.513584282134474 ], "wc_strength_and_weaknesses_avg": [ 170.75, 104.48773851510042 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 88.25, 16.223054582907622 ], "wc_summary_review_avg": [ 34.75, 8.584142356694699 ], "wc_review_avg": [ 419.0, 122.84339624090504 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 721.25, 157.7725815850143 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7197505745606582741&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=FkSp8VW8RjH", "email": "ku.dk;diku.dk;ku.dk;jhu.edu;kuleuven.be;ku.dk", "author_num": 6, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "University of Copenhagen;Johns Hopkins University;Katholieke Universiteit Leuven", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ku.dk;https://www.jhu.edu;https://www.kuleuven.be", "aff_unique_abbr": "UCPH;JHU;KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;2;0", "aff_country_unique": "Denmark;United States;Belgium" }, { "title": "Label-free Concept Bottleneck Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11326", "id": "FlCg47MNvBA", "poster": "", "openreview": "https://openreview.net/forum?id=FlCg47MNvBA", "slides": "https://iclr.cc/virtual/2023/poster/11326", "video": "https://iclr.cc/virtual/2023/poster/11326", "author_site": "Tuomas Oikarinen, Subhro Das, Lam Nguyen, Tsui-Wei Weng", "tldr": "Scalable, automated and efficient way to create Concept Bottleneck Models without labeled concept data.", "abstract": "Concept bottleneck models (CBM) are a popular way of creating more interpretable neural networks by having hidden layer neurons correspond to human-understandable concepts. However, existing CBMs and their variants have two crucial limitations: first, they need to collect labeled data for each of the predefined concepts, which is time consuming and labor intensive; second, the accuracy of a CBM is often significantly lower than that of a standard neural network, especially on more complex datasets. This poor performance creates a barrier for adopting CBMs in practical real world applications. Motivated by these challenges, we propose Label-free CBM which is a novel framework to transform any neural network into an interpretable CBM without labeled concept data, while retaining a high accuracy. Our Label-free CBM has many advantages, it is: scalable - we present the first CBM scaled to ImageNet, efficient - creating a CBM takes only a few hours even for very large datasets, and automated - training it for a new dataset requires minimal human effort. Our code is available at https://github.com/Trustworthy-ML-Lab/Label-free-CBM.", "keywords": "Interpretability;Explainability;Concept Bottleneck Models", "primary_area": "", "supplementary_material": "", "author": "Tuomas Oikarinen;Subhro Das;Lam M. Nguyen;Tsui-Wei Weng", "authorids": "~Tuomas_Oikarinen1;~Subhro_Das1;~Lam_M._Nguyen1;~Tsui-Wei_Weng1", "gender": "M;;;F", "homepage": "https://tuomaso.github.io/;;;https://lilywenglab.github.io", "dblp": "243/3532;;;177/9197", "google_scholar": "M3KZnPwAAAAJ;;;v8GM4xoAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Tuomas_Oikarinen1;~Subhro_Das1;~Lam_M._Nguyen1;~Tsui-Wei_Weng1", "aff": "University of California, San Diego;;;University of California, San Diego", "aff_domain": "ucsd.edu;;;ucsd.edu", "position": "PhD student;;;Assistant Professor", "bibtex": "@inproceedings{\noikarinen2023labelfree,\ntitle={Label-free Concept Bottleneck Models},\nauthor={Tuomas Oikarinen and Subhro Das and Lam M. Nguyen and Tsui-Wei Weng},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=FlCg47MNvBA}\n}", "github": "", "project": "", "reviewers": "8fC7;mHS9;FHRn;CRuQ", "pdf_size": 13995203, "recommendation": "6;6;6;8", "confidence": "3;4;3;3", "correctness": "2;2;3;3", "technical_novelty": "3;3;2;2", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "206;206;144;122", "wc_strength_and_weaknesses": "383;692;42;280", "wc_clarity_quality_novelty_and_reproducibility": "130;95;11;105", "wc_summary_review": "167;143;302;36", "wc_review": "886;1136;499;543", "wc_reply_reviewers": "0;190;0;35", "wc_reply_authors": "1209;2620;2033;1275", "reply_reviewers": "0;1;0;1", "reply_authors": "3;5;5;5", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 169.5, 37.319565913874186 ], "wc_strength_and_weaknesses_avg": [ 349.25, 233.3531390403823 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 85.25, 44.72345581459465 ], "wc_summary_review_avg": [ 162.0, 94.686324250126 ], "wc_review_avg": [ 766.0, 260.92048597225937 ], "wc_reply_reviewers_avg": [ 56.25, 78.53144274747535 ], "wc_reply_authors_avg": [ 1784.25, 581.0771786088317 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 4.5, 0.8660254037844386 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 202, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5903277205127169957&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=FlCg47MNvBA", "email": "ucsd.edu;;;ucsd.edu", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "FmpRQpQLs5J", "title": "Model-based Unknown Input Estimation via Partially Observable Markov Decision Processes", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In the context of condition monitoring for structures and industrial assets, the estimation of unknown inputs, usually referring to acting loads, is of salient importance for guaranteeing safe and performant engineered systems. In this work, we propose a novel method for estimating unknown inputs from measured outputs, particularly for the case of dynamical systems with known or learned dynamics. The objective is to search for those system inputs that will reproduce the actual measured outputs, which can be reformulated as a Partially Observable Markov Decision Process (POMDP) problem and solved with well-established planning algorithms for POMDPs. The cross-entropy method is adopted in this paper for solving the POMDP due to its efficiency and robustness. The proposed method is demonstrated using simulated dynamical systems for structures with known dynamics, as well as a real wind turbine with learned dynamics, which is inferred via use of a Replay Overshooting (RO) scheme, a deep learning-based dynamics method for learning stochastic dynamics.", "keywords": "unknown input estimation;partially observable markov decision process;model-based reinforcement learning;model predictive control;cross-entropy method;dynamics modeling", "primary_area": "", "supplementary_material": "", "author": "Wei Liu;Zhilu Lai;Charikleia D. Stoura;Kiran Bacsa;Eleni Chatzi", "authorids": "~Wei_Liu35;zhilulai@ust.hk;charikleia.stoura@ibk.baug.ethz.ch;kiran.bacsa@sec.ethz.ch;~Eleni_Chatzi1", "gender": ";;;;F", "homepage": "https://frs.ethz.ch/people/researchers/Liu-Wei.html;;;;https://chatzi.ibk.ethz.ch/", "dblp": ";;;;281/5425", "google_scholar": ";;;;2n9Mwt8AAAAJ", "orcid": "0000-0002-1103-7699;;;;0000-0002-6870-240X", "linkedin": ";;;;eleni-chatzi-88065010/", "or_profile": "~Wei_Liu35;zhilulai@ust.hk;charikleia.stoura@ibk.baug.ethz.ch;kiran.bacsa@sec.ethz.ch;~Eleni_Chatzi1", "aff": "National University of Singapore;;;;Swiss Federal Institute of Technology", "aff_domain": "nus.edu;;;;ethz.ch", "position": "PhD student;;;;Associate Professor", "bibtex": "@misc{\nliu2023modelbased,\ntitle={Model-based Unknown Input Estimation via Partially Observable Markov Decision Processes},\nauthor={Wei Liu and Zhilu Lai and Charikleia D. Stoura and Kiran Bacsa and Eleni Chatzi},\nyear={2023},\nurl={https://openreview.net/forum?id=FmpRQpQLs5J}\n}", "github": "", "project": "", "reviewers": "BQsP;hX2D;zbmm;noD2", "site": "https://openreview.net/forum?id=FmpRQpQLs5J", "pdf_size": 3940823, "recommendation": "1;3;5;6", "confidence": "5;4;3;3", "correctness": "3;3;4;3", "technical_novelty": "1;1;2;3", "empirical_novelty": "1;1;0;3", "wc_summary_paper": "129;88;66;81", "wc_strength_and_weaknesses": "104;171;143;169", "wc_clarity_quality_novelty_and_reproducibility": "55;319;1;18", "wc_summary_review": "51;78;23;44", "wc_review": "339;656;233;312", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.75, 1.920286436967152 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.82915619758885 ], "empirical_novelty_avg": [ 1.25, 1.0897247358851685 ], "wc_summary_paper_avg": [ 91.0, 23.33452377915607 ], "wc_strength_and_weaknesses_avg": [ 146.75, 27.040478915877213 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 98.25, 128.93675775355916 ], "wc_summary_review_avg": [ 49.0, 19.6596032513375 ], "wc_review_avg": [ 385.0, 161.23740260869994 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.9813358399735743, "corr_recommendation_correctness": 0.3758230140014144, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3193870314380115487&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "National University of Singapore;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.ethz.ch", "aff_unique_abbr": "NUS;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Singapore;Switzerland" }, { "id": "FoRC6dIfO8u", "title": "Cyclophobic Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "Cyclophobic Reinforcement Learning systematically and efficiently explores the state space by penalizing cycles, achieving excellent results in sparse reward environements.", "abstract": "In environments with sparse rewards finding a good inductive bias for exploration is crucial to the agent\u2019s success. However, there are two competing goals: novelty search and systematic exploration. While existing approaches such as curiousity- driven exploration find novelty, they sometimes do not systematically explore the whole state space, akin to depth-first-search vs breadth-first-search. In this paper, we propose a new intrinsic reward that is cyclophobic, i.e. it does not reward novelty, but punishes redundancy by avoiding cycles. Augmenting the cyclophobic intrinsic reward with a sequence of hierarchical representations based on the agent\u2019s cropped observations we are able to achieve excellent results in the MiniGrid and MiniHack environments. Both are particularly hard, as they require complex interactions with different objects in order to be solved. Detailed comparisons with previous approaches and thorough ablation studies show that our newly proposed cyclophobic reinforcement learning is vastly more efficient than other state of the art methods.", "keywords": "Reinforcement learning;intrinsic rewards;exploration;transfer learning;objects", "primary_area": "", "supplementary_material": "", "author": "Stefan Sylvius Wagner;Peter Arndt;Jan Robine;Stefan Harmeling", "authorids": "~Stefan_Sylvius_Wagner1;~Peter_Arndt1;~Jan_Robine1;~Stefan_Harmeling1", "gender": "M;;M;Unspecified", "homepage": ";https://www.cs.hhu.de/lehrstuehle-und-arbeitsgruppen/dialog-systems-and-machine-learning/unser-team/team/arndt.html;https://github.com/jrobine;", "dblp": "295/8868;;276/5929;67/3271", "google_scholar": "https://scholar.google.de/citations?user=nk46qycAAAAJ;;https://scholar.google.de/citations?user=07Qj3h0AAAAJ;https://scholar.google.de/citations?user=TA2fG64AAAAJ", "orcid": ";;;0000-0001-9709-8160", "linkedin": "stefan-wagner-a30423108/;;;", "or_profile": "~Stefan_Sylvius_Wagner1;~Peter_Arndt1;~Jan_Robine1;~Stefan_Harmeling1", "aff": "University of D\u00fcsseldorf;Heinrich-Heine-Universit\u00e4t;Technische Universit\u00e4t Dortmund;Technische Universit\u00e4t Dortmund", "aff_domain": "hhu.de;hhu.de;tu-dortmund.de;tu-dortmund.de", "position": "PhD student;Lecturer;PhD student;Full Professor", "bibtex": "@misc{\nwagner2023cyclophobic,\ntitle={Cyclophobic Reinforcement Learning},\nauthor={Stefan Sylvius Wagner and Peter Arndt and Jan Robine and Stefan Harmeling},\nyear={2023},\nurl={https://openreview.net/forum?id=FoRC6dIfO8u}\n}", "github": "", "project": "", "reviewers": "CKV7;FEf9;FYSy", "site": "https://openreview.net/forum?id=FoRC6dIfO8u", "pdf_size": 3210777, "recommendation": "3;3;5", "confidence": "4;4;3", "correctness": "2;3;3", "technical_novelty": "2;3;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "58;65;218", "wc_strength_and_weaknesses": "336;190;1220", "wc_clarity_quality_novelty_and_reproducibility": "31;23;452", "wc_summary_review": "40;56;118", "wc_review": "465;334;2008", "wc_reply_reviewers": "182;0;0", "wc_reply_authors": "1193;293;1548", "reply_reviewers": "1;0;0", "reply_authors": "2;1;2", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 113.66666666666667, 73.8301353709235 ], "wc_strength_and_weaknesses_avg": [ 582.0, 455.0545754815203 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 168.66666666666666, 200.37354005845074 ], "wc_summary_review_avg": [ 71.33333333333333, 33.6386021641143 ], "wc_review_avg": [ 935.6666666666666, 760.1378529948075 ], "wc_reply_reviewers_avg": [ 60.666666666666664, 85.79562278396777 ], "wc_reply_authors_avg": [ 1011.3333333333334, 528.2097647294638 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:y3OnjcdtD2MJ:scholar.google.com/&scioq=Cyclophobic+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Heinrich-Heine-Universit\u00e4t D\u00fcsseldorf;Heinrich-Heine-Universit\u00e4t;Technische Universit\u00e4t Dortmund", "aff_unique_dep": ";;", "aff_unique_url": "https://www.hhu.de;https://www.hhu.de;https://www.tu-dortmund.de", "aff_unique_abbr": "HHU;HHU;TU Dortmund", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "Fp0CMUtBtw", "title": "VLG: General Video Recognition with Web Textual Knowledge", "track": "main", "status": "Withdraw", "tldr": "We build a comprehensive video benchmark of Kinetics-GVR including close-set, long-tail, few-shot and open-set, and present a unified video-text framework (VLG) with web textual knowledge to achieve SOTA performance under different settings.", "abstract": "Video recognition in an open world is quite challenging, as we need to handle different settings such as close-set, long-tail, few-shot and open-set. By leveraging semantic knowledge from noisy text descriptions crawled from the Internet, we focus on the general video recognition (GVR) problem of solving different recognition tasks within a unified framework. The contribution of this paper is twofold. First, we build a comprehensive video recognition benchmark of Kinetics-GVR, including four sub-task datasets to cover the mentioned settings. To facilitate the research of GVR, we propose to utilize external textual knowledge from the Internet and provide multi-source text descriptions for all action classes. Second, inspired by the flexibility of language representation, we present a unified visual-linguistic framework (VLG) to solve the problem of GVR by devising an effective two-stage training paradigm. Our VLG is first pre-trained on video and language datasets to learn a shared feature space, and then devises a flexible bi-modal attention head to collaborate high-level semantic concepts under different settings. Extensive results show that our VLG obtains the state-of-the-art performance under four settings. The superior performance demonstrates the effectiveness and generalization ability of our proposed VLG framework. We hope our work makes a step towards the general video recognition and could serve as a baseline for future research.", "keywords": "Video Recognition;Multi Modality;Video-language representation learning", "primary_area": "", "supplementary_material": "", "author": "Jintao Lin;Zhaoyang Liu;Wenhai Wang;Wayne Wu;Limin Wang", "authorids": "~Jintao_Lin1;~Zhaoyang_Liu1;~Wenhai_Wang2;~Wayne_Wu1;~Limin_Wang1", "gender": "M;M;;;", "homepage": "https://github.com/dreamerlin;https://scholar.google.com/citations?user=btgwZosAAAAJ&hl=en;;;", "dblp": "24/10701;120/5899-1;;;", "google_scholar": ";btgwZosAAAAJ;;;", "orcid": ";0000-0003-0258-3097;;;", "linkedin": ";;;;", "or_profile": "~Jintao_Lin1;~Zhaoyang_Liu1;~Wenhai_Wang2;~Wayne_Wu1;~Limin_Wang1", "aff": "Nanjing University;Shanghai AI Laboratory ;;;", "aff_domain": "nju.edu.cn;pjlab.org.cn;;;", "position": "MS student;Intern;;;", "bibtex": "@misc{\nlin2023vlg,\ntitle={{VLG}: General Video Recognition with Web Textual Knowledge},\nauthor={Jintao Lin and Zhaoyang Liu and Wenhai Wang and Wayne Wu and Limin Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=Fp0CMUtBtw}\n}", "github": "", "project": "", "reviewers": "jUCy;Xk2H;rSDC", "site": "https://openreview.net/forum?id=Fp0CMUtBtw", "pdf_size": 7369239, "recommendation": "3;5;5", "confidence": "4;3;3", "correctness": "4;3;4", "technical_novelty": "1;3;1", "empirical_novelty": "2;2;2", "wc_summary_paper": "39;86;47", "wc_strength_and_weaknesses": "125;247;147", "wc_clarity_quality_novelty_and_reproducibility": "31;131;36", "wc_summary_review": "26;26;15", "wc_review": "221;490;245", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 1.6666666666666667, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 57.333333333333336, 20.531818125912658 ], "wc_strength_and_weaknesses_avg": [ 173.0, 53.09111664550546 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.0, 46.007245806140865 ], "wc_summary_review_avg": [ 22.333333333333332, 5.185449728701348 ], "wc_review_avg": [ 318.6666666666667, 121.54651601570305 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.9999999999999998, "corr_recommendation_correctness": -0.49999999999999983, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14004477125031706037&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1", "aff_unique_norm": "Nanjing University;Shanghai AI Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "Nanjing U;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "FpkVnbE_h6i", "title": "SAAL: Sharpness-Aware Active Learning", "track": "main", "status": "Reject", "tldr": "We propose Sharpness-Aware Active Learning, or SAAL, which adopts the loss sharpness for the acquisition score.", "abstract": "While modern deep neural networks play significant roles in many research areas, they are also prone to overfitting problems under limited data instances. Particularly, this overfitting, or generalization issue, could be a problem in the framework of active learning because it selects a few data instances for learning over time. To consider the generalization, this paper introduces the first active learning method to incorporate the sharpness of loss space in the design of the acquisition function, inspired by sharpness-aware minimization (SAM). SAM intends to maximally perturb the training dataset, so the optimization can be led to a flat minima, which is known to have better generalization ability. Specifically, our active learning, Sharpness-Aware Active Learning (SAAL), constructs its acquisition function by selecting unlabeled instances whose perturbed loss becomes maximum. Over the adaptation of SAM into SAAL, we design a pseudo labeling mechanism to look forward to the perturbed loss w.r.t. the ground-truth label. Furthermore, we present a theoretic analysis between SAAL and recent active learning methods, so the recent works could be reduced to SAAL under a specific condition. We conduct experiments on various benchmark datasets for vision-based tasks in image classification and object detection. The experimental results confirm that SAAL outperforms the baselines by selecting instances that have the potentially maximal perturbation on the loss.", "keywords": "active learning;loss sharpness;SAM", "primary_area": "", "supplementary_material": "", "author": "Yoon-Yeong Kim;JoonHo Jang;Byeonghu Na;Yeongmin Kim;Kyungwoo Song;Wanmo Kang;Il-chul Moon", "authorids": "~Yoon-Yeong_Kim1;~JoonHo_Jang1;~Byeonghu_Na1;~Yeongmin_Kim1;~Kyungwoo_Song1;~Wanmo_Kang1;~Il-chul_Moon1", "gender": "F;;M;M;;M;", "homepage": "https://sites.google.com/view/yykim/home;https://aailab.kaist.ac.kr/xe2/members_phdstudent/16877;https://sites.google.com/view/byeonghu-na;https://sites.google.com/view/yeongmin-space/%ED%99%88;https://mlai.yonsei.ac.kr;https://sites.google.com/site/wanmokang/;", "dblp": "254/0952.html;241/9686;276/5100;;155/4867;;", "google_scholar": ";oYbKry4AAAAJ;https://scholar.google.co.kr/citations?user=mJoqpmEAAAAJ;SBF13JUAAAAJ;HWxRii4AAAAJ;;", "orcid": ";;0000-0003-3463-2674;;0000-0003-0082-4280;;", "linkedin": ";;byeonghu-na-17942120b/;;kyungwoo-song-862863155/;;", "or_profile": "~Yoon-Yeong_Kim1;~JoonHo_Jang1;~Byeonghu_Na1;~Yeongmin_Kim1;~Kyungwoo_Song1;~Wanmo_Kang1;~Il-chul_Moon1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;University of Seoul;Korea Advanced Institute of Science & Technology;", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;uos.ac.kr;kaist.ac.kr;", "position": "PhD student;PhD student;PhD student;MS student;Assistant Professor;Professor;", "bibtex": "@misc{\nkim2023saal,\ntitle={{SAAL}: Sharpness-Aware Active Learning},\nauthor={Yoon-Yeong Kim and JoonHo Jang and Byeonghu Na and Yeongmin Kim and Kyungwoo Song and Wanmo Kang and Il-chul Moon},\nyear={2023},\nurl={https://openreview.net/forum?id=FpkVnbE_h6i}\n}", "github": "", "project": "", "reviewers": "Xi2g;tmqo;3NDR", "site": "https://openreview.net/forum?id=FpkVnbE_h6i", "pdf_size": 855849, "recommendation": "5;6;6", "confidence": "4;4;3", "correctness": "3;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;2", "wc_summary_paper": "83;55;52", "wc_strength_and_weaknesses": "201;221;122", "wc_clarity_quality_novelty_and_reproducibility": "85;28;39", "wc_summary_review": "95;37;72", "wc_review": "464;341;285", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 63.333333333333336, 13.960261060914616 ], "wc_strength_and_weaknesses_avg": [ 181.33333333333334, 42.74212078136611 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.666666666666664, 24.689178916188272 ], "wc_summary_review_avg": [ 68.0, 23.84673283002656 ], "wc_review_avg": [ 363.3333333333333, 74.76333028668236 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5985226111425665224&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;University of Seoul", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;http://www.useoul.edu", "aff_unique_abbr": "KAIST;UOS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "Frt6LTRFhui", "title": "General Policy Evaluation and Improvement by Learning to Identify Few But Crucial States", "track": "main", "status": "Reject", "tldr": "", "abstract": "Learning to evaluate and improve policies is a core problem of Reinforcement Learning (RL). Traditional RL algorithms learn a value function defined for a single policy. A recently explored competitive alternative is to learn a single value function for many policies. Here we combine the actor-critic architecture of Parameter-Based Value Functions and the policy embedding of Policy Evaluation Networks to learn a single value function for evaluating (and thus helping to improve) any policy represented by a deep neural network (NN). The method yields competitive experimental results. In continuous control problems with infinitely many states, our value function minimizes its prediction error by simultaneously learning a small set of `probing states' and a mapping from actions produced in probing states to the policy's return. The method extracts crucial abstract knowledge about the environment in form of very few states sufficient to fully specify the behavior of many policies. A policy improves solely by changing actions in probing states, following the gradient of the value function's predictions. Surprisingly, it is possible to clone the behavior of a near-optimal policy in Swimmer-v3 and Hopper-v3 environments only by knowing how to act in 3 and 5 such learned states, respectively. Remarkably, our value function trained to evaluate NN policies is also invariant to changes of the policy architecture: we show that it allows for zero-shot learning of linear policies competitive with the best policy seen during training.", "keywords": "Reinforcement Learning;Off-Policy Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/8ec34e68c12b6c9c620120d4210f7efa359cac33.zip", "author": "Francesco Faccio;Aditya Ramesh;Vincent Herrmann;Jean Harb;J\u00fcrgen Schmidhuber", "authorids": "~Francesco_Faccio1;~Aditya_Ramesh2;~Vincent_Herrmann1;~Jean_Harb1;~J\u00fcrgen_Schmidhuber1", "gender": "M;M;M;M;M", "homepage": ";https://adityaramesh.in;https://vincentherrmann.github.io;cs.mcgill.ca/~jmerhe1;http://people.idsia.ch/~juergen/", "dblp": "227/3214;;248/8663;;s/JurgenSchmidhuber", "google_scholar": "0z3DkrkAAAAJ;https://scholar.google.ch/citations?user=60K82BkAAAAJ;;;https://scholar.google.ch/citations?user=gLnCTgIAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Francesco_Faccio1;~Aditya_Ramesh2;~Vincent_Herrmann1;~Jean_Harb1;~J\u00fcrgen_Schmidhuber1", "aff": "The Swiss AI Lab IDSIA - USI - SUPSI;Dalle Molle Institute for Artificial Intelligence (IDSIA);The Swiss AI Lab IDSIA;McGill University;IDSIA", "aff_domain": "idsia.ch;idsia.ch;idsia.ch;mcgill.ca;idsia.ch", "position": "PhD student;PhD student;PhD student;PhD student;Scientific Director", "bibtex": "@misc{\nfaccio2023general,\ntitle={General Policy Evaluation and Improvement by Learning to Identify Few But Crucial States},\nauthor={Francesco Faccio and Aditya Ramesh and Vincent Herrmann and Jean Harb and J{\\\"u}rgen Schmidhuber},\nyear={2023},\nurl={https://openreview.net/forum?id=Frt6LTRFhui}\n}", "github": "", "project": "", "reviewers": "xK3x;eb1Q;yJcN;zMK3", "site": "https://openreview.net/forum?id=Frt6LTRFhui", "pdf_size": 4983761, "recommendation": "3;5;5;6", "confidence": "4;3;4;3", "correctness": "2;3;4;2", "technical_novelty": "3;2;2;4", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "72;82;83;255", "wc_strength_and_weaknesses": "329;108;164;596", "wc_clarity_quality_novelty_and_reproducibility": "238;7;334;20", "wc_summary_review": "27;215;157;24", "wc_review": "666;412;738;895", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1547;864;1168;820", "reply_reviewers": "0;0;0;0", "reply_authors": "3;2;3;2", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 123.0, 76.33151380655306 ], "wc_strength_and_weaknesses_avg": [ 299.25, 189.61457618020825 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 149.75, 140.4891010007538 ], "wc_summary_review_avg": [ 105.75, 82.83530346416315 ], "wc_review_avg": [ 677.75, 174.34789215817895 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1099.75, 290.9161176353074 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.6882472016116854, "corr_recommendation_correctness": 0.20751433915982243, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13842577082735883972&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Swiss AI Lab IDSIA;Dalle Molle Institute for Artificial Intelligence;IDSIA;McGill University;Institute of Digital Technologies", "aff_unique_dep": "AI Lab;Artificial Intelligence;Swiss AI Lab;;", "aff_unique_url": "https://www.idsia.ch/;https://www.idsia.ch/;https://www.idsia.ch/;https://www.mcgill.ca;https://www.idsia.ch", "aff_unique_abbr": "IDSIA;IDSIA;IDSIA;McGill;IDSIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Switzerland;Canada" }, { "title": "Evaluating Representations with Readout Model Switching", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11027", "id": "Fsd-6ax4T1m", "poster": "/media/PosterPDFs/ICLR%202023/11027.png?t=1682892755.014728", "openreview": "https://openreview.net/forum?id=Fsd-6ax4T1m", "slides": "https://iclr.cc/virtual/2023/poster/11027", "video": "https://iclr.cc/virtual/2023/poster/11027", "author_site": "Yazhe Li, Jorg Bornschein, Marcus Hutter", "tldr": "We propose an evaluation framework that is based on MDL and model switching for evaluating representations.", "abstract": "Although much of the success of Deep Learning builds on learning good representations, a rigorous method to evaluate their quality is lacking. In this paper, we treat the evaluation of representations as a model selection problem and propose to use the Minimum Description Length (MDL) principle to devise an evaluation metric. Contrary to the established practice of limiting the capacity of the readout model, we design a hybrid discrete and continuous-valued model space for the readout models and employ a switching strategy to combine their predictions. The MDL score takes model complexity, as well as data efficiency into account. As a result, the most appropriate model for the specific task and representation will be chosen, making it a unified measure for comparison. The proposed metric can be efficiently computed with an online method and we present results for pre-trained vision encoders of various architectures (ResNet and ViT) and objective functions (supervised and self-supervised) on a range of downstream tasks. We compare our methods with accuracy-based approaches and show that the latter are inconsistent when multiple readout models are used. Finally, we discuss important properties revealed by our evaluations such as model scaling, preferred readout model, and data efficiency.", "keywords": "Representation Learning;Evaluation;Expert Switching;Minumum Description Length", "primary_area": "", "supplementary_material": "", "author": "Yazhe Li;Jorg Bornschein;Marcus Hutter", "authorids": "~Yazhe_Li2;~Jorg_Bornschein1;~Marcus_Hutter1", "gender": ";M;", "homepage": ";;http://www.hutter1.net/", "dblp": "182/2163;13/8510;h/MarcusHutter", "google_scholar": "lpswgyIAAAAJ;X7kZFnoAAAAJ;https://scholar.google.com.tw/citations?user=7hmCntEAAAAJ", "orcid": ";0000-0002-3356-7922;0000-0002-3263-4097", "linkedin": ";;hutter1/", "or_profile": "~Yazhe_Li2;~Jorg_Bornschein1;~Marcus_Hutter1", "aff": "Google DeepMind;Google Deepmind;Australian National University", "aff_domain": "deepmind.com;google.com;anu.edu.au", "position": "Researcher;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nli2023evaluating,\ntitle={Evaluating Representations with Readout Model Switching},\nauthor={Yazhe Li and Jorg Bornschein and Marcus Hutter},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Fsd-6ax4T1m}\n}", "github": "", "project": "", "reviewers": "pMpS;mVad;GYkn;Jszx;4XSa", "pdf_size": 630113, "recommendation": "6;6;6;6;8", "confidence": "3;3;4;3;1", "correctness": "3;3;4;3;3", "technical_novelty": "3;3;2;3;3", "empirical_novelty": "2;3;2;2;3", "wc_summary_paper": "97;68;159;79;113", "wc_strength_and_weaknesses": "617;321;751;233;1", "wc_clarity_quality_novelty_and_reproducibility": "75;48;78;54;1", "wc_summary_review": "191;83;63;61;2", "wc_review": "980;520;1051;427;117", "wc_reply_reviewers": "99;0;256;0;0", "wc_reply_authors": "1106;379;1255;332;0", "reply_reviewers": "1;0;1;0;0", "reply_authors": "2;1;3;1;0", "recommendation_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 2.8, 0.39999999999999997 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 103.2, 31.85215848258953 ], "wc_strength_and_weaknesses_avg": [ 384.6, 269.2341731652949 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.2, 27.650678111033734 ], "wc_summary_review_avg": [ 80.0, 61.747874457344686 ], "wc_review_avg": [ 619.0, 350.8885863062519 ], "wc_reply_reviewers_avg": [ 71.0, 100.131912994809 ], "wc_reply_authors_avg": [ 614.4, 482.6363434305378 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 1.019803902718557 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9185586535436918, "corr_recommendation_correctness": -0.25000000000000006, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5214032318584179375&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=Fsd-6ax4T1m", "email": "deepmind.com;google.com;anu.edu.au", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;DeepMind;Australian National University", "aff_unique_dep": "Google DeepMind;DeepMind;", "aff_unique_url": "https://deepmind.com;https://deepmind.com;https://www.anu.edu.au", "aff_unique_abbr": "DeepMind;DeepMind;ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;Australia" }, { "title": "Guess the Instruction! Flipped Learning Makes Language Models Stronger Zero-Shot Learners", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11200", "id": "FtOxgKe_Zg2", "poster": "/media/PosterPDFs/ICLR%202023/11200.png?t=1682574544.2171602", "openreview": "https://openreview.net/forum?id=FtOxgKe_Zg2", "slides": "https://iclr.cc/virtual/2023/poster/11200", "video": "https://iclr.cc/virtual/2023/poster/11200", "author_site": "Seonghyeon Ye, Doyoung Kim, Joel Jang, Joongbo Shin, Minjoon Seo", "tldr": "We introduce Flipped Learning, a meta-training method that computes the likelihood of the task instruction given input instance and label.", "abstract": "Meta-training, which fine-tunes the language model (LM) on various downstream tasks by maximizing the likelihood of the target label given the task instruction and input instance, has improved the zero-shot task generalization performance. However, meta-trained LMs still struggle to generalize to challenging tasks containing novel labels unseen during meta-training. In this paper, we propose Flipped Learning, an alternative method of meta-training which trains the LM to generate the task instruction given the input instance and label. During inference, the LM trained with Flipped Learning, referred to as FLIPPED, selects the label option that is most likely to generate the task instruction. On 14 tasks of the BIG-bench benchmark, the 11B-sized FLIPPED outperforms zero-shot T0-11B (Sanh et al, 2021) and even a 16 times larger 3-shot GPT-3 (175B) (Brown et al, 2020) on average by 8.4% and 9.7% points, respectively. FLIPPED gives particularly large improvements on tasks with unseen labels, outperforming T0-11B by up to +20% average F1 score. This indicates that the strong task generalization of FLIPPED comes from improved generalization to novel labels. We release our code at github.com/seonghyeonye/Flipped-Learning.", "keywords": "natural language processing;zeroshot language models;large language models", "primary_area": "", "supplementary_material": "/attachment/09097cfd782c66fcff067e26bad160a10b96043d.zip", "author": "Seonghyeon Ye;Doyoung Kim;Joel Jang;Joongbo Shin;Minjoon Seo", "authorids": "~Seonghyeon_Ye1;~Doyoung_Kim3;~Joel_Jang1;~Joongbo_Shin1;~Minjoon_Seo1", "gender": "M;M;M;M;M", "homepage": "https://vano1205.github.io/;https://doyoungkim-ml.github.io/;https://joeljang.github.io/;https://joongbo.github.io/;https://seominjoon.github.io", "dblp": "301/8927;;;207/7602;149/1367", "google_scholar": "https://scholar.google.co.kr/citations?user=JfGGjBoAAAAJ;https://scholar.google.co.kr/citations?user=PJR9ogMAAAAJ;xL-7eFEAAAAJ;xzJSvJcAAAAJ;zYze5fIAAAAJ", "orcid": ";;;;", "linkedin": ";doyoung-kim-870a141a2/;joel-jang-1289331a5/;;minjoon-seo/", "or_profile": "~Seonghyeon_Ye1;~Doyoung_Kim3;~Joel_Jang1;~Joongbo_Shin1;~Minjoon_Seo1", "aff": "Korea Advanced Institute of Science & Technology;KAIST;Korea Advanced Institute of Science & Technology;LG AI Research;Twelve Labs", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;lgresearch.ai;twelvelabs.io", "position": "PhD student;MS student;MS student;Researcher;Chief Scientist", "bibtex": "@inproceedings{\nye2023guess,\ntitle={Guess the Instruction! Flipped Learning Makes Language Models Stronger Zero-Shot Learners},\nauthor={Seonghyeon Ye and Doyoung Kim and Joel Jang and Joongbo Shin and Minjoon Seo},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=FtOxgKe_Zg2}\n}", "github": "", "project": "", "reviewers": "dMHf;CFao;55Bv", "pdf_size": 1154528, "recommendation": "6;6;8", "confidence": "4;4;5", "correctness": "3;3;4", "technical_novelty": "2;2;4", "empirical_novelty": "3;2;4", "wc_summary_paper": "48;76;126", "wc_strength_and_weaknesses": "173;108;153", "wc_clarity_quality_novelty_and_reproducibility": "48;104;17", "wc_summary_review": "55;42;22", "wc_review": "324;330;318", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "646;650;179", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 83.33333333333333, 32.262809686834295 ], "wc_strength_and_weaknesses_avg": [ 144.66666666666666, 27.182510717166817 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.333333333333336, 36.003086287459055 ], "wc_summary_review_avg": [ 39.666666666666664, 13.572848714334887 ], "wc_review_avg": [ 324.0, 4.898979485566356 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 491.6666666666667, 221.09475093020387 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.9999999999999997, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3781716006821861117&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=FtOxgKe_Zg2", "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;lgresearch.ai;twelvelabs.io", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;LG;Twelve Labs", "aff_unique_dep": ";LG AI Research;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.lgaires.com;https://twelvelabs.com", "aff_unique_abbr": "KAIST;LG AI;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "South Korea;United States" }, { "title": "Unified Detoxifying and Debiasing in Language Generation via Inference-time Adaptive Optimization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12264", "id": "FvevdI0aA_h", "poster": "", "openreview": "https://openreview.net/forum?id=FvevdI0aA_h", "slides": "https://iclr.cc/virtual/2023/poster/12264", "video": "https://iclr.cc/virtual/2023/poster/12264", "author_site": "Zonghan Yang, Xiaoyuan Yi, Peng Li, Yang Liu, Xing Xie", "tldr": "We propose an inference-time unified detoxifying and debiasing framework, which achieves better balance among effectiveness, computation cost and generation quality.", "abstract": "Recently pre-trained language models (PLMs) have prospered in various natural language generation (NLG) tasks due to their ability to generate fairly fluent text. Nevertheless, these models are observed to capture and reproduce harmful contents in training corpora, typically toxic language and social biases, raising severe moral issues. Prior works on ethical NLG tackle detoxifying and debiasing separately, which is problematic since we find debiased models still exhibit toxicity while detoxified ones even exacerbate biases. To address such a challenge, we propose the first unified framework of detoxifying and debiasing called UDDIA, which jointly formalizes these two problems as rectifying the output space. We theoretically interpret our framework as learning a text distribution mixing weighted attributes. Besides, UDDIA conducts adaptive optimization of only a few parameters during decoding based on a parameter-efficient tuning schema without any training data. This leads to minimal generation quality loss and improved rectification performance with acceptable computational cost. Experimental results demonstrate that compared to several strong baselines, UDDIA achieves debiasing and detoxifying simultaneously and better balances efficiency and effectiveness, taking a further step towards practical ethical NLG.", "keywords": "detoxify;debias;language generation", "primary_area": "", "supplementary_material": "", "author": "Zonghan Yang;Xiaoyuan Yi;Peng Li;Yang Liu;Xing Xie", "authorids": "~Zonghan_Yang1;~Xiaoyuan_Yi1;~Peng_Li2;~Yang_Liu19;~Xing_Xie3", "gender": "M;M;M;M;M", "homepage": "https://minicheshire.github.io/;;http://www.lpeng.net/;http://nlp.csai.tsinghua.edu.cn/~ly/;http://research.microsoft.com/en-us/people/xingx/", "dblp": "222/7860;179/2248;83/6353-30;51/3710-5;08/6809-1", "google_scholar": "rt9HOIUAAAAJ;BdpXcLgAAAAJ;hgYzkOQAAAAJ;https://scholar.google.com.hk/citations?user=lVhoKNcAAAAJ;5EQfAFIAAAAJ", "orcid": ";0000-0003-2710-1613;0000-0003-1374-5979;0000-0002-3087-242X;0000-0002-8608-8482", "linkedin": ";xiaoyuan-yi-471212a5/;;;xingx/", "or_profile": "~Zonghan_Yang1;~Xiaoyuan_Yi1;~Peng_Li2;~Yang_Liu19;~Xing_Xie3", "aff": "Department of Computer Science and Technology, Tsinghua University;Microsoft Research;Tsinghua University;Tsinghua University;Microsoft Research Asia", "aff_domain": "cs.tsinghua.edu.cn;research.microsoft.com;tsinghua.edu.cn;tsinghua.edu.cn;microsoft.com", "position": "PhD student;Researcher;Associate Professor;Professor;Senior Principal Researcher", "bibtex": "@inproceedings{\nyang2023unified,\ntitle={Unified Detoxifying and Debiasing in Language Generation via Inference-time Adaptive Optimization},\nauthor={Zonghan Yang and Xiaoyuan Yi and Peng Li and Yang Liu and Xing Xie},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=FvevdI0aA_h}\n}", "github": "", "project": "", "reviewers": "HmKe;4XSk;Syvk", "pdf_size": 2751483, "recommendation": "5;8;8", "confidence": "3;4;3", "correctness": "3;3;3", "technical_novelty": "3;2;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "125;94;87", "wc_strength_and_weaknesses": "322;436;248", "wc_clarity_quality_novelty_and_reproducibility": "55;49;37", "wc_summary_review": "28;41;60", "wc_review": "530;620;432", "wc_reply_reviewers": "0;32;54", "wc_reply_authors": "1014;1770;1486", "reply_reviewers": "0;1;1", "reply_authors": "2;4;4", "recommendation_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 102.0, 16.51262143533445 ], "wc_strength_and_weaknesses_avg": [ 335.3333333333333, 77.32758599332811 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.0, 7.483314773547883 ], "wc_summary_review_avg": [ 43.0, 13.140268896284683 ], "wc_review_avg": [ 527.3333333333334, 76.77383813658285 ], "wc_reply_reviewers_avg": [ 28.666666666666668, 22.17105219775452 ], "wc_reply_authors_avg": [ 1423.3333333333333, 311.80050602196843 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.0, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1791124724425231985&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=FvevdI0aA_h", "email": "cs.tsinghua.edu.cn;research.microsoft.com;tsinghua.edu.cn;tsinghua.edu.cn;microsoft.com", "author_num": 5, "aff_unique_index": "0;1;0;0;1", "aff_unique_norm": "Tsinghua University;Microsoft", "aff_unique_dep": "Department of Computer Science and Technology;Microsoft Research", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "THU;MSR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;United States" }, { "id": "FvqcQ_9u7Mo", "title": "ECLAD: Extracting Concepts with Local Aggregated Descriptors", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Convolutional neural networks (CNNs) are increasingly being used in critical systems, where robustness and alignment are crucial. In this context, the field of explainable artificial intelligence has proposed the generation of high-level explanations of the prediction process of CNNs through concept extraction. While these methods can detect whether or not a concept is present in an image, they are unable to determine its location. What is more, a fair comparison of such approaches is difficult due to a lack of proper validation procedures. To address these issues, we propose a novel method for automatic concept extraction and localization based on representations obtained through pixel-wise aggregations of CNN activation maps. Further, we introduce a process for the validation of concept-extraction techniques based on synthetic datasets with pixel-wise annotations of their main components, reducing the need for human intervention. Extensive experimentation on both synthetic and real-world datasets demonstrates that our method outperforms state-of-the-art alternatives.", "keywords": "explainable artificial intelligence;deep learning;interpretability;concept extraction", "primary_area": "", "supplementary_material": "/attachment/93225e73184889884f33043f0f848b3a561f0bae.zip", "author": "Andres Felipe Posada Moreno;Nikita Surya;Sebastian Trimpe", "authorids": "~Andres_Felipe_Posada_Moreno1;~Nikita_Surya1;~Sebastian_Trimpe1", "gender": "M;;M", "homepage": ";;https://www.dsme.rwth-aachen.de/trimpe", "dblp": ";;15/8135", "google_scholar": "BnCVQsUAAAAJ;;https://scholar.google.de/citations?user=9kzHZssAAAAJ", "orcid": "0000-0003-3751-0680;;0000-0002-2785-2487", "linkedin": ";nikita-surya/;sebastian-trimpe-2472a0a3/", "or_profile": "~Andres_Felipe_Posada_Moreno1;~Nikita_Surya1;~Sebastian_Trimpe1", "aff": "Rheinisch Westf\u00e4lische Technische Hochschule Aachen;;RWTH Aachen University", "aff_domain": "rwth-aachen.de;;rwth-aachen.de", "position": "PhD student;;Full Professor", "bibtex": "@misc{\nmoreno2023eclad,\ntitle={{ECLAD}: Extracting Concepts with Local Aggregated Descriptors},\nauthor={Andres Felipe Posada Moreno and Nikita Surya and Sebastian Trimpe},\nyear={2023},\nurl={https://openreview.net/forum?id=FvqcQ_9u7Mo}\n}", "github": "", "project": "", "reviewers": "4TNw;qcqK;Vydf;d5Kb", "site": "https://openreview.net/forum?id=FvqcQ_9u7Mo", "pdf_size": 40395792, "recommendation": "3;5;5;6", "confidence": "4;4;2;3", "correctness": "2;2;2;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "70;69;51;68", "wc_strength_and_weaknesses": "172;147;139;268", "wc_clarity_quality_novelty_and_reproducibility": "107;31;54;61", "wc_summary_review": "69;102;51;39", "wc_review": "418;349;295;436", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 64.5, 7.826237921249264 ], "wc_strength_and_weaknesses_avg": [ 181.5, 51.40282093426391 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.25, 27.589626673806226 ], "wc_summary_review_avg": [ 65.25, 23.75263143316967 ], "wc_review_avg": [ 374.5, 56.224994441973934 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.48420012470625223, "corr_recommendation_correctness": 0.6622661785325219, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18233531668825152880&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "RWTH Aachen University", "aff_unique_dep": "", "aff_unique_url": "https://www.rwth-aachen.de", "aff_unique_abbr": "RWTH", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Aachen", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "Fw516fpXI-c", "title": "Zipper: Decoupling the tradeoff Between Robustness and Accuracy", "track": "main", "status": "Withdraw", "tldr": "We propose a bi-expert framework where we simultaneously train base-learners with distribution-aware strategies so that it can obtain both satisfying clean accuracy and robustenss", "abstract": "Deep neural networks obtained by standard training have been constantly plagued by adversarial examples. Although adversarial training demonstrates its capability to defend against adversarial examples, unfortunately, training robust classifiers leads to an inevitable drop in the natural generalization when performing adversarial training. To address the issues, we decouple the standard generalization and the robust generalization from joint training and formulate different training strategies for each one. Specifically, instead of minimizing a global loss on the expectation over these two generalization errors, we propose a bi-expert framework called \\emph{Zipper} where we simultaneously train base learners with distribution-aware strategies so that they can specialize in their own fields. The parameters of base learners are collected and combined to form a global learner at intervals during the training process, which is then distributed to base learners as initialized parameters for continued training. Theoretically, we show that the risks of Zipper will get lower once the base learners are well trained. Extensive experiments verify the applicability of Zipper to satisfying high clean accuracy in the natural setting while keeping considerably robust to the adversarial setting, compared to relevant techniques. ", "keywords": "Adversarial Training", "primary_area": "", "supplementary_material": "", "author": "Hongjun Wang;Yisen Wang", "authorids": "~Hongjun_Wang2;~Yisen_Wang1", "gender": "M;M", "homepage": "https://whj363636.github.io/;https://yisenwang.github.io/", "dblp": "65/3627-5;172/1346-1", "google_scholar": "DNi-nB0AAAAJ;uMWPDboAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Hongjun_Wang2;~Yisen_Wang1", "aff": "The University of Hong Kong;Peking University", "aff_domain": "hku.hk;pku.edu.cn", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nwang2023zipper,\ntitle={Zipper: Decoupling the tradeoff Between Robustness and Accuracy},\nauthor={Hongjun Wang and Yisen Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=Fw516fpXI-c}\n}", "github": "", "project": "", "reviewers": "Dcgw;NtRf;hV6E", "site": "https://openreview.net/forum?id=Fw516fpXI-c", "pdf_size": 1280986, "recommendation": "3;5;6", "confidence": "4;3;3", "correctness": "3;2;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "100;86;56", "wc_strength_and_weaknesses": "633;225;169", "wc_clarity_quality_novelty_and_reproducibility": "580;31;147", "wc_summary_review": "123;45;40", "wc_review": "1436;387;412", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 80.66666666666667, 18.354533197248273 ], "wc_strength_and_weaknesses_avg": [ 342.3333333333333, 206.79995701697385 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 252.66666666666666, 236.2545707964657 ], "wc_summary_review_avg": [ 69.33333333333333, 38.00292386412159 ], "wc_review_avg": [ 745.0, 488.7173689021771 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.9449111825230683, "corr_recommendation_correctness": -0.18898223650461363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:aYhMFDHnk54J:scholar.google.com/&scioq=Zipper:+Decoupling+the+tradeoff+Between+Robustness+and+Accuracy&hl=en&as_sdt=0,48", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of Hong Kong;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://www.hku.hk;http://www.pku.edu.cn", "aff_unique_abbr": "HKU;Peking U", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "FwlV6h4KxVE", "title": "Test-Time Adaptation for Visual Document Understanding", "track": "main", "status": "Reject", "tldr": "Proposing a novel test-time adaptation approach and three benchmarks for visual document understanding via masked language modeling and pseudo labeling.", "abstract": "Self-supervised pretraining has been able to produce transferable representations for various visual document understanding (VDU) tasks. However, the ability of such representations to adapt to new distribution shifts at test-time has not been studied yet. We propose DocTTA, a novel test-time adaptation approach for documents that leverages cross-modality self-supervised learning via masked visual language modeling as well as pseudo labeling to adapt models learned on a \\textit{source} domain to an unlabeled \\textit{target} domain at test time. We also introduce new benchmarks using existing public datasets for various VDU tasks including entity recognition, key-value extraction, and document visual question answering tasks where DocTTA improves the source model performance up to 1.79\\% in (F1 score), 3.43\\% (F1 score), and 17.68\\% (ANLS score), respectively.", "keywords": "Test-time adaptation;source data-free domain adaptation;visual document understanding", "primary_area": "", "supplementary_material": "/attachment/3354470e558e7293c917d0f52c333b24a38a4c3b.zip", "author": "Sayna Ebrahimi;Sercan O Arik;Tomas Pfister", "authorids": "~Sayna_Ebrahimi1;~Sercan_O_Arik1;~Tomas_Pfister1", "gender": "F;M;M", "homepage": "https://saynaebrahimi.github.io/;https://www.sercanarik.com/;http://tomas.pfister.fi", "dblp": "207/7584;;14/8360", "google_scholar": "wRyjJfMAAAAJ;;ahSpJOAAAAAJ", "orcid": ";0000-0001-6333-1729;0009-0004-4088-8718", "linkedin": "saynaebrahimi/;;", "or_profile": "~Sayna_Ebrahimi1;~Sercan_O_Arik1;~Tomas_Pfister1", "aff": "Google;Google;Google", "aff_domain": "google.com;google.com;google.com", "position": "Research Scientist;Research Scientist;Head of Research @ Cloud AI", "bibtex": "@misc{\nebrahimi2023testtime,\ntitle={Test-Time Adaptation for Visual Document Understanding},\nauthor={Sayna Ebrahimi and Sercan O Arik and Tomas Pfister},\nyear={2023},\nurl={https://openreview.net/forum?id=FwlV6h4KxVE}\n}", "github": "", "project": "", "reviewers": "eUpw;tuFF;F7R6", "site": "https://openreview.net/forum?id=FwlV6h4KxVE", "pdf_size": 4303209, "recommendation": "5;6;6", "confidence": "3;3;4", "correctness": "2;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "110;21;57", "wc_strength_and_weaknesses": "434;21;367", "wc_clarity_quality_novelty_and_reproducibility": "46;21;33", "wc_summary_review": "101;21;34", "wc_review": "691;84;491", "wc_reply_reviewers": "0;0;38", "wc_reply_authors": "1666;31;1069", "reply_reviewers": "0;0;1", "reply_authors": "3;1;3", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 62.666666666666664, 36.55437350334734 ], "wc_strength_and_weaknesses_avg": [ 274.0, 180.9769782780856 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.333333333333336, 10.208928554075703 ], "wc_summary_review_avg": [ 52.0, 35.05234181430203 ], "wc_review_avg": [ 422.0, 252.56418326173383 ], "wc_reply_reviewers_avg": [ 12.666666666666666, 17.913371790059205 ], "wc_reply_authors_avg": [ 922.0, 675.5309023279394 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.9999999999999997, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4974275319186080260&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "FzKeidp3qnB", "title": "Learning Layered Implicit Model for 3D Avatar Clothing Representation", "track": "main", "status": "Withdraw", "tldr": "We present a novel 3D cloth represention, i.e., a neural implicit surface model conditioned on volumetric SMPL prior, to capture realistic clothes from raw scans.", "abstract": "Modeling 3D clothed avatars is a popular topic in the computer graphics and vision area. Due to the complicated nature of realistic garments, the most concerned issue is how to represent 3D cloth shapes efficiently and effectively. A desirable cloth model is expected to preserve high-quality geometric details while establishing essential correspondences between clothes and animation-ready templates. However, by far there is no such a 3D cloth representation that can simultaneously satisfy these two requirements.\nIn this work, we thus formulate a novel 3D cloth representation that integrating the neural implicit surface with a statistical body prior.\nDifferent from previous methods using explicit cloth primitives conditioned on the SMPL surface, we adopt a two-layer implicit function to capture the coarse and fine levels of cloth displacements, based on a parametric SMPL volume space. Our approach is aware of the underlying statistical minimal body shapes, and is also capable of modeling challenging clothes like skirts. \nTo evaluate the geometric modeling capacity of our 3D cloth representation, we conduct both qualitative and quantitative experiments on raw scans, which indicate superior performance over existing 3D cloth representations. \nThe effectiveness and flexibility of our 3D cloth representation is further validated in downstream applications, e.g. 3D virtual try-on.", "keywords": "Geometric Representation;3D Cloth;Human Body;Implicit Surface", "primary_area": "", "supplementary_material": "", "author": "Keyu CHEN;Jingbo Wang;Wayne Wu;Bo Dai", "authorids": "~Keyu_CHEN1;~Jingbo_Wang3;~Wayne_Wu1;~Bo_Dai2", "gender": "M;M;;M", "homepage": ";https://scholar.google.com/citations?user=GStTsxAAAAAJ&hl=en;;http://daibo.info/", "dblp": ";10/1491-3.html;;64/2903-2", "google_scholar": "pTu2NakAAAAJ;GStTsxAAAAAJ;;https://scholar.google.com.hk/citations?user=KNWTvgEAAAAJ", "orcid": ";;;0000-0003-0777-9232", "linkedin": ";;;", "or_profile": "~Keyu_CHEN1;~Jingbo_Wang3;~Wayne_Wu1;~Bo_Dai2", "aff": "Shanghai Artificial Intelligence Laboratory;The Chinese University of Hong Kong;;Shanghai AI Laboratory", "aff_domain": "pjlab.org.cn;cuhk.edu.hk;;pjlab.org.cn", "position": "Research Engineer;PhD student;;Scientist", "bibtex": "@misc{\nchen2023learning,\ntitle={Learning Layered Implicit Model for 3D Avatar Clothing Representation},\nauthor={Keyu CHEN and Jingbo Wang and Wayne Wu and Bo Dai},\nyear={2023},\nurl={https://openreview.net/forum?id=FzKeidp3qnB}\n}", "github": "", "project": "", "reviewers": "ijd6;5Z2P;pnv5;kaPi", "site": "https://openreview.net/forum?id=FzKeidp3qnB", "pdf_size": 4002321, "recommendation": "3;3;5;5", "confidence": "4;4;3;4", "correctness": "2;2;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;1;2;2", "wc_summary_paper": "78;54;48;71", "wc_strength_and_weaknesses": "384;132;113;273", "wc_clarity_quality_novelty_and_reproducibility": "113;11;393;29", "wc_summary_review": "84;26;26;64", "wc_review": "659;223;580;437", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 62.75, 12.193748398257199 ], "wc_strength_and_weaknesses_avg": [ 225.5, 110.42757807721765 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 136.5, 153.0122544112072 ], "wc_summary_review_avg": [ 50.0, 25.019992006393608 ], "wc_review_avg": [ 474.75, 165.7021046939356 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:i5mlhFa1ilcJ:scholar.google.com/&scioq=Learning+Layered+Implicit+Model+for+3D+Avatar+Clothing+Representation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Shanghai Artificial Intelligence Laboratory;Chinese University of Hong Kong;Shanghai AI Laboratory", "aff_unique_dep": ";;", "aff_unique_url": "http://www.shailab.org/;https://www.cuhk.edu.hk;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "Shanghai AI Lab;CUHK;SAIL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "G-dM79m_EXd", "title": "Time Series Anomaly Detection via Hypothesis Testing for Dynamical Systems", "track": "main", "status": "Reject", "tldr": "We tackle the problem of anomaly detection in dynamical systems from the perspective of hypothesis testing and propose a new algorithm.", "abstract": "Real world systems---such as robots, weather, energy systems and stock markets---are complicated and high-dimensional. Hence, without prior knowledge of the system dynamics, detecting or forecasting abnormal events from the sequential observations of the system is challenging. In this work, we address the problem caused by high-dimensionality via viewing time series anomaly detection as hypothesis testing on dynamical systems. This perspective can avoid the dimension of the problem from increasing linearly with time horizon, and naturally leads to a novel anomaly detection model, termed as DyAD (Dynamical system Anomaly Detection). Furthermore, as existing time-series anomaly detection algorithms are usually evaluated on relatively small datasets, we released a large-scale one on detecting battery failures in electric vehicles. We benchmarked several popular algorithms on both public datasets and our released new dataset. Our experiments demonstrated that our proposed model achieves state-of-the-art results.", "keywords": "anomaly detection;dynamical system;hypothesis testing", "primary_area": "", "supplementary_material": "/attachment/2dedea6498d916a27457e3f4b3b498a5043dfa9a.zip", "author": "Haowei He;Jingzhao Zhang;Yanan Wang;Benben Jiang;Shaobo Huang;Chen Wang;Yang Zhang;Xuebing Han;Dongxu Guo;Guannan He;Minggao Ouyang", "authorids": "~Haowei_He1;~Jingzhao_Zhang2;~Yanan_Wang4;~Benben_Jiang1;~Shaobo_Huang3;~Chen_Wang18;~Yang_Zhang29;~Xuebing_Han1;~Dongxu_Guo1;~Guannan_He1;~Minggao_Ouyang1", "gender": "M;M;F;;M;Not Specified;M;M;;;M", "homepage": "https://962086838.github.io/;https://sites.google.com/view/jingzhao/home;;http://www.benbenjiang.net/;;;http://www.svm.tsinghua.edu.cn/essay/74/1839.html;;http://www.guannanhe.com/;https://webvpn.tsinghua.edu.cn/;https://ethinkenergy.com/", "dblp": ";220/5559;;;;https://dblp.uni-trier.de/pid/06/6785-25;;;;;", "google_scholar": "IcNEbaMAAAAJ;8NudxYsAAAAJ;;;;;;LxFl7C0AAAAJ;;;", "orcid": ";;0000-0003-0445-1696;;0000-0002-6834-9327;;;;;;", "linkedin": ";;;;;;;;;;", "or_profile": "~Haowei_He1;~Jingzhao_Zhang2;~Yanan_Wang4;~Benben_Jiang1;~Chen_Wang18;~Yang_Zhang29;~Xuebing_Han1;~Dongxu_Guo1;~Guannan_He1;~Minggao_Ouyang1;~shaobo_Huang2", "aff": "Tsinghua University;Tsinghua University;;Tsinghua University;Beihang University;Nanjing University of Aeronautics and Astronautics;Tsinghua University;Tsinghua University;Peking University;Tsinghua University;", "aff_domain": "tsinghua.edu.cn;mail.tsinghua.edu.cn;;tsinghua.edu.cn;buaa.edu.cn;nuaa.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;pku.edu.cn;tsinghua.edu.cn;", "position": "PhD student;Assistant Professor;;Associate Professor;PhD student;Associate Professor;Assistant Professor;Postdoc;Assistant Professor;Full Professor;", "bibtex": "@misc{\nhe2023time,\ntitle={Time Series Anomaly Detection via Hypothesis Testing for Dynamical Systems},\nauthor={Haowei He and Jingzhao Zhang and Yanan Wang and Benben Jiang and Shaobo Huang and Chen Wang and Yang Zhang and Xuebing Han and Dongxu Guo and Guannan He and Minggao Ouyang},\nyear={2023},\nurl={https://openreview.net/forum?id=G-dM79m_EXd}\n}", "github": "", "project": "", "reviewers": "R2Bu;4Vb4;V5eP", "site": "https://openreview.net/forum?id=G-dM79m_EXd", "pdf_size": 466514, "recommendation": "1;5;6", "confidence": "4;2;3", "correctness": "3;3;3", "technical_novelty": "1;2;2", "empirical_novelty": "1;3;2", "wc_summary_paper": "102;111;60", "wc_strength_and_weaknesses": "173;278;72", "wc_clarity_quality_novelty_and_reproducibility": "70;86;64", "wc_summary_review": "96;105;192", "wc_review": "441;580;388", "wc_reply_reviewers": "0;0;66", "wc_reply_authors": "487;735;589", "reply_reviewers": "0;0;1", "reply_authors": "2;2;1", "recommendation_avg": [ 4.0, 2.160246899469287 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 91.0, 22.22611077089287 ], "wc_strength_and_weaknesses_avg": [ 174.33333333333334, 84.10443243703364 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.33333333333333, 9.285592184789413 ], "wc_summary_review_avg": [ 131.0, 43.289721643826724 ], "wc_review_avg": [ 469.6666666666667, 80.962268303753 ], "wc_reply_reviewers_avg": [ 22.0, 31.11269837220809 ], "wc_reply_authors_avg": [ 603.6666666666666, 101.77535174207074 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 11, 0 ], "corr_recommendation_confidence": -0.7559289460184544, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wsHoTDjKe0IJ:scholar.google.com/&scioq=Time+Series+Anomaly+Detection+via+Hypothesis+Testing+for+Dynamical+Systems&hl=en&as_sdt=0,11", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2;0;0;3;0", "aff_unique_norm": "Tsinghua University;Beihang University;Nanjing University of Aeronautics and Astronautics;Peking University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.buaa.edu.cn/;http://www.nuaa.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "THU;BUAA;NUAA;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Efficient Attention via Control Variates", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11348", "id": "G-uNfHKrj46", "poster": "", "openreview": "https://openreview.net/forum?id=G-uNfHKrj46", "slides": "https://iclr.cc/virtual/2023/poster/11348", "video": "https://iclr.cc/virtual/2023/poster/11348", "author_site": "Lin Zheng, Jianbo Yuan, Chong Wang, Lingpeng Kong", "tldr": "We present a novel analysis of random feature attention based on control variates, which characterizes its gap to full softmax attention and induces a novel efficient variant that significantly improves the approximation while remaining efficient.", "abstract": "Random-feature-based attention (RFA) is an efficient approximation of softmax attention with linear runtime and space complexity. However, the approximation gap between RFA and conventional softmax attention is not well studied. Built upon previous progress of RFA, we characterize this gap through the lens of control variates and show that RFA can be decomposed into a sum of multiple control variate estimators for each element in the sequence. This new framework reveals that exact softmax attention can be recovered from RFA by manipulating each control variate. Besides, it allows us to develop a more flexible form of control variates, resulting in a novel attention mechanism that significantly reduces the approximation gap while maintaining linear complexity. Extensive experiments demonstrate that our model outperforms state-of-the-art efficient attention mechanisms on both vision and language tasks.", "keywords": "attention mechanism;transformers;random features;control variates;importance sampling", "primary_area": "", "supplementary_material": "", "author": "Lin Zheng;Jianbo Yuan;Chong Wang;Lingpeng Kong", "authorids": "~Lin_Zheng1;~Jianbo_Yuan1;~Chong_Wang8;~Lingpeng_Kong1", "gender": "M;M;M;M", "homepage": "https://lzhengisme.github.io/;;https://ikekonglp.github.io/;https://chongw.github.io", "dblp": ";134/6790;144/7656;w/ChongWang2", "google_scholar": "3NXH0t8AAAAJ;https://scholar.google.com/citations?hl=en;f1hBi5wAAAAJ;vRI2blsAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Lin_Zheng1;~Jianbo_Yuan1;~Lingpeng_Kong1;~Chong_Wang1", "aff": "The University of Hong Kong;Bytedance;Department of Computer Science, The University of Hong Kong;Apple", "aff_domain": "hku.hk;bytedance.com;cs.hku.hk;apple.com", "position": "PhD student;Researcher;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nzheng2023efficient,\ntitle={Efficient Attention via Control Variates},\nauthor={Lin Zheng and Jianbo Yuan and Chong Wang and Lingpeng Kong},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=G-uNfHKrj46}\n}", "github": "", "project": "", "reviewers": "B8ci;xJRA;KXnA;rNYj", "pdf_size": 607270, "recommendation": "6;8;8;8", "confidence": "3;4;3;4", "correctness": "2;3;4;4", "technical_novelty": "3;2;4;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "45;45;173;63", "wc_strength_and_weaknesses": "55;51;117;154", "wc_clarity_quality_novelty_and_reproducibility": "26;169;87;23", "wc_summary_review": "18;56;32;50", "wc_review": "144;321;409;290", "wc_reply_reviewers": "0;27;17;0", "wc_reply_authors": "375;705;704;317", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 81.5, 53.33619783974107 ], "wc_strength_and_weaknesses_avg": [ 94.25, 43.29766159967534 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 76.25, 59.326954245098406 ], "wc_summary_review_avg": [ 39.0, 15.0 ], "wc_review_avg": [ 291.0, 95.43846184846024 ], "wc_reply_reviewers_avg": [ 11.0, 11.554220008291344 ], "wc_reply_authors_avg": [ 525.25, 180.41947649851997 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.8703882797784891, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11768111876293707640&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=G-uNfHKrj46", "email": "hku.hk;bytedance.com;cs.hku.hk;apple.com", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Hong Kong;ByteDance;Apple", "aff_unique_dep": ";;Apple Inc.", "aff_unique_url": "https://www.hku.hk;https://www.bytedance.com;https://www.apple.com", "aff_unique_abbr": "HKU;Bytedance;Apple", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "RoPAWS: Robust Semi-supervised Representation Learning from Uncurated Data", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11680", "id": "G1H4NSATlr", "poster": "/media/PosterPDFs/ICLR%202023/11680.png?t=1680965026.2909617", "openreview": "https://openreview.net/forum?id=G1H4NSATlr", "slides": "https://iclr.cc/virtual/2023/poster/11680", "video": "https://iclr.cc/virtual/2023/poster/11680", "author_site": "Sangwoo Mo, Jong-Chyi Su, Chih-Yao Ma, Mahmoud Assran, Ishan Misra, Licheng Yu, Sean Bell", "tldr": "We propose a robust semi-supervised learning method for uncurated data derived from a novel probabilistic view of learned representations", "abstract": "Semi-supervised learning aims to train a model using limited labels. State-of-the-art semi-supervised methods for image classification such as PAWS rely on self-supervised representations learned with large-scale unlabeled but curated data. However, PAWS is often less effective when using real-world unlabeled data that is uncurated, e.g., contains out-of-class data. We propose RoPAWS, a robust extension of PAWS that can work with real-world unlabeled data. We first reinterpret PAWS as a generative classifier that models densities using kernel density estimation. From this probabilistic perspective, we calibrate its prediction based on the densities of labeled and unlabeled data, which leads to a simple closed-form solution from the Bayes' rule. We demonstrate that RoPAWS significantly improves PAWS for uncurated Semi-iNat by +5.3% and curated ImageNet by +0.4%.", "keywords": "semi-supervised learning;representation learning;uncurated data", "primary_area": "", "supplementary_material": "/attachment/c423ffe45c57f06973072c9642ab3f74ef884d74.zip", "author": "Sangwoo Mo;Jong-Chyi Su;Chih-Yao Ma;Mido Assran;Ishan Misra;Licheng Yu;Sean Bell", "authorids": "~Sangwoo_Mo1;~Jong-Chyi_Su1;~Chih-Yao_Ma1;~Mido_Assran1;~Ishan_Misra2;~Licheng_Yu2;~Sean_Bell1", "gender": "M;M;M;M;M;M;M", "homepage": "https://sites.google.com/view/sangwoomo;https://jongchyisu.github.io/;https://chihyaoma.github.io/;https://seanbell.ca/;http://imisra.github.io/;https://lichengunc.github.io/;http://www.midoassran.ca/", "dblp": "198/0432;179/2111.html;198/0963;132/3980;12/10954;32/10805;216/2717", "google_scholar": "https://scholar.google.co.kr/citations?user=Sq9y3NMAAAAJ;jeKOwvsAAAAJ;HrrtgKkAAAAJ;xY1GdVgAAAAJ;WvufSLAAAAAJ;pwpweRQAAAAJ;gcQTTvkAAAAJ", "orcid": ";;;;;;0000-0001-9159-8447", "linkedin": ";jong-chyi-su-a5b2026a/;kevin-chih-yao-ma-9b5b3063/;seanbell3/;ishan-misra-7a140215;licheng-yu/;", "or_profile": "~Sangwoo_Mo1;~Jong-Chyi_Su1;~Chih-Yao_Ma1;~Sean_Bell1;~Ishan_Misra1;~Licheng_Yu4;~Mahmoud_Assran1", "aff": "KAIST;NEC Laboratories America;Meta;Meta Platforms Inc.;Meta Facebook;Meta AI;McGill University", "aff_domain": "kaist.ac.kr;nec-labs.com;meta.com;meta.com;fb.com;fb.com;mcgill.ca", "position": "PhD student;Researcher;Research Scientist;Researcher;Research Scientist;Research Scientist;PhD student", "bibtex": "@inproceedings{\nmo2023ropaws,\ntitle={Ro{PAWS}: Robust Semi-supervised Representation Learning from Uncurated Data},\nauthor={Sangwoo Mo and Jong-Chyi Su and Chih-Yao Ma and Mido Assran and Ishan Misra and Licheng Yu and Sean Bell},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=G1H4NSATlr}\n}", "github": "", "project": "", "reviewers": "NjZV;yYAe;oxEy;Q9GX;R1Ae", "pdf_size": 3183879, "recommendation": "5;5;8;8;8", "confidence": "5;3;3;3;3", "correctness": "2;3;4;3;3", "technical_novelty": "2;2;2;3;3", "empirical_novelty": "2;2;2;2;3", "wc_summary_paper": "51;82;75;40;83", "wc_strength_and_weaknesses": "121;127;113;24;295", "wc_clarity_quality_novelty_and_reproducibility": "25;79;22;13;53", "wc_summary_review": "17;34;36;36;57", "wc_review": "214;322;246;113;488", "wc_reply_reviewers": "10;0;0;0;0", "wc_reply_authors": "896;664;322;58;703", "reply_reviewers": "1;0;0;0;0", "reply_authors": "4;1;1;1;1", "recommendation_avg": [ 6.8, 1.469693845669907 ], "confidence_avg": [ 3.4, 0.8000000000000002 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_paper_avg": [ 66.2, 17.474552927042225 ], "wc_strength_and_weaknesses_avg": [ 136.0, 87.93179174792243 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.4, 24.311314238436392 ], "wc_summary_review_avg": [ 36.0, 12.696456198483101 ], "wc_review_avg": [ 276.6, 125.19680507105602 ], "wc_reply_reviewers_avg": [ 2.0, 4.0 ], "wc_reply_authors_avg": [ 528.6, 299.2922317735627 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 1.2000000000000002 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.6123724356957944, "corr_recommendation_correctness": 0.6454972243679027, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15863761009212377943&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=G1H4NSATlr", "email": "kaist.ac.kr;nec-labs.com;meta.com;meta.com;fb.com;fb.com;mcgill.ca", "author_num": 7, "aff_unique_index": "0;1;2;2;2;2;3", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;NEC Laboratories America;Meta;McGill University", "aff_unique_dep": ";;Meta Platforms, Inc.;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.nec-labs.com;https://meta.com;https://www.mcgill.ca", "aff_unique_abbr": "KAIST;NEC Labs America;Meta;McGill", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;2", "aff_country_unique": "South Korea;United States;Canada" }, { "id": "G1STYDZDBeH", "title": "Mitigating Out-of-Distribution Data Density Overestimation in Energy-Based Models", "track": "main", "status": "Withdraw", "tldr": "We investigate why EBMs assign high density to OOD data and propose a method to mitigate this problem.", "abstract": "Deep energy-based models (EBMs), which use deep neural networks (DNNs) as energy functions, are receiving increasing attention due to their ability to learn complex distributions. To train deep EBMs, the maximum likelihood estimation (MLE) with short-run Langevin Monte Carlo (LMC) is often used. While the MLE with short-run LMC is computationally efficient compared to an MLE with full Markov Chain Monte Carlo (MCMC), it often assigns high density to out-of-distribution (OOD) data. To address this issue, here we systematically investigate why the MLE with short-run LMC can converge to EBMs with wrong density estimates, and reveal that the heuristic modifications to LMC introduced by previous works were the main problem. We then propose a Uniform Support Partitioning (USP) scheme that optimizes a set of points to evenly partition the support of the EBM and then uses the resulting points to approximate the EBM-MLE loss gradient. We empirically demonstrate that USP avoids the pitfalls of short-run LMC, leading to significantly improved OOD data detection performance on Fashion-MNIST.", "keywords": "Energy-Based Model", "primary_area": "", "supplementary_material": "", "author": "Beomsu Kim;Jong Chul Ye", "authorids": "~Beomsu_Kim1;~Jong_Chul_Ye1", "gender": "M;M", "homepage": ";https://bispl.weebly.com/", "dblp": ";15/5613", "google_scholar": "https://scholar.google.co.kr/citations?user=TofIFUgAAAAJ;HNMjoNEAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Beomsu_Kim1;~Jong_Chul_Ye1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr", "position": "MS student;Full Professor", "bibtex": "@misc{\nkim2023mitigating,\ntitle={Mitigating Out-of-Distribution Data Density Overestimation in Energy-Based Models},\nauthor={Beomsu Kim and Jong Chul Ye},\nyear={2023},\nurl={https://openreview.net/forum?id=G1STYDZDBeH}\n}", "github": "", "project": "", "reviewers": "CS3d;6HFb;AYrS;SFWa", "site": "https://openreview.net/forum?id=G1STYDZDBeH", "pdf_size": 1132345, "recommendation": "3;3;5;5", "confidence": "4;5;4;5", "correctness": "3;3;3;3", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;1;3;3", "wc_summary_paper": "421;79;86;94", "wc_strength_and_weaknesses": "267;124;111;598", "wc_clarity_quality_novelty_and_reproducibility": "9;41;49;60", "wc_summary_review": "45;65;52;39", "wc_review": "742;309;298;791", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 170.0, 145.01206846328344 ], "wc_strength_and_weaknesses_avg": [ 275.0, 196.27149563805744 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.75, 18.9917745353087 ], "wc_summary_review_avg": [ 50.25, 9.67923034130297 ], "wc_review_avg": [ 535.0, 232.1798871564891 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15334246993383424768&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "G29-Xa55dCXD", "title": "Optical Flow Regularization of Implicit Neural Representations for Video Frame Interpolation", "track": "main", "status": "Withdraw", "tldr": "We show that constraining the derivatives of video INR to satisfy the optical flow constraint equation allows to reach state of the art VFI on limited motion ranges without relying on additional training data.", "abstract": "Recent works have shown the ability of Implicit Neural Representations (INR) to carry meaningful representations of signal derivatives. In this work, we leverage this property to perform Video Frame Interpolation (VFI) by explicitly constraining the derivatives of the INR to satisfy the optical flow constraint equation. We achieve state of the art VFI on limited motion ranges using only a target video and its optical flow, without learning the interpolation operator from additional training data. We further show that constraining the INR derivatives not only allows to better interpolate intermediate frames but also improves the ability of narrow networks to fit the observed frames, which suggests potential applications to video compression and INR optimization.", "keywords": "Implicit Neural Representation;Video Representation;Video Frame Interpretation", "primary_area": "", "supplementary_material": "", "author": "Weihao Zhuang;Tristan Hascoet;Ryoichi Takashima;Tetsuya Takiguchi", "authorids": "~Weihao_Zhuang1;~Tristan_Hascoet1;rtakashima@port.kobe-u.ac.jp;~Tetsuya_Takiguchi1", "gender": "M;M;;M", "homepage": ";;;http://www.me.cs.scitec.kobe-u.ac.jp/~takigu", "dblp": "260/2980;152/2262;;79/4485", "google_scholar": ";https://scholar.google.com/citations?hl=en;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Weihao_Zhuang1;~Tristan_Hascoet1;rtakashima@port.kobe-u.ac.jp;~Tetsuya_Takiguchi1", "aff": "Kobe University;Kobe University;;Kobe University", "aff_domain": "kobe-u.ac.jp;kobe-u.ac.jp;;kobe-u.ac.jp", "position": "PhD student;Assistant Professor;;Full Professor", "bibtex": "@misc{\nzhuang2023optical,\ntitle={Optical Flow Regularization of Implicit Neural Representations for Video Frame Interpolation},\nauthor={Weihao Zhuang and Tristan Hascoet and Ryoichi Takashima and Tetsuya Takiguchi},\nyear={2023},\nurl={https://openreview.net/forum?id=G29-Xa55dCXD}\n}", "github": "", "project": "", "reviewers": "syrP;uKuc;nsxF;qbMw", "site": "https://openreview.net/forum?id=G29-Xa55dCXD", "pdf_size": 10403522, "recommendation": "3;3;3;3", "confidence": "3;3;4;4", "correctness": "2;3;3;2", "technical_novelty": "2;2;2;2", "empirical_novelty": "0;2;2;2", "wc_summary_paper": "58;60;38;75", "wc_strength_and_weaknesses": "164;279;186;105", "wc_clarity_quality_novelty_and_reproducibility": "47;66;41;67", "wc_summary_review": "92;26;29;95", "wc_review": "361;431;294;342", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 57.75, 13.160072188251856 ], "wc_strength_and_weaknesses_avg": [ 183.5, 62.5879381350752 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.25, 11.453711188955307 ], "wc_summary_review_avg": [ 60.5, 33.03407331831786 ], "wc_review_avg": [ 357.0, 49.208739061268375 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13148934039809755451&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0", "aff_unique_norm": "Kobe University", "aff_unique_dep": "", "aff_unique_url": "https://www.kobe-u.ac.jp", "aff_unique_abbr": "Kobe U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "id": "G2AA1eB1vVE", "title": "Learning Robust Representations via Nuisance-extended Information Bottleneck", "track": "main", "status": "Withdraw", "tldr": "We propose to model the nuisance of information bottleneck for out-of-distribution generalization.", "abstract": "The information bottleneck (IB) is a principled approach to obtain a succinct representation $\\mathbf{x} \\rightarrow \\mathbf{z}$ for a given downstream task $\\mathbf{x} \\rightarrow \\mathbf{y}$: namely, it finds $\\mathbf{z}$ that (a) maximizes the (task-relevant) mutual information $I(\\mathbf{z}; \\mathbf{y})$, while (b) minimizing $I(\\mathbf{x}; \\mathbf{z})$ to constrain the capacity of $\\mathbf{z}$ for better generalization. In practical scenarios where the training data is limited, however, many predictive-yet-compressible signals in the data can be rather from some biases in data acquisition (i.e., less generalizable), so that even the IB objective cannot prevent $\\mathbf{z}$ from co-adapting on such (so-called) \"shortcut\" signals. To bypass such a failure mode, we consider an adversarial threat model of $\\mathbf{x}$ under constraint on the mutual information $I(\\mathbf{x}; \\mathbf{y})$. This motivates us to extend IB to additionally model the nuisance information against $\\mathbf{z}$, namely $\\mathbf{z}_n$, so that $(\\mathbf{z}, \\mathbf{z}_n)$ can reconstruct $\\mathbf{x}$. To enable the idea, we propose an auto-encoder based training upon the variational IB framework, as well as practical encoder designs to facilitate the proposed hybrid discriminative-generative training considering both convolutional- and Transformer-based architectures. Our experimental results show that the proposed scheme improves robustness of learned representations (remarkably without using any domain-specific knowledge), with respect to multiple challenging modern security measures including novelty detection, corruption (or natural) robustness and certified adversarial robustness.", "keywords": "out-of-distribution robustness;information bottleneck;representation learning;autoencoder", "primary_area": "", "supplementary_material": "", "author": "Jongheon Jeong;Sihyun Yu;Hankook Lee;Jinwoo Shin", "authorids": "~Jongheon_Jeong1;~Sihyun_Yu2;~Hankook_Lee1;~Jinwoo_Shin1", "gender": "M;M;M;M", "homepage": "https://jh-jeong.github.io;https://sihyun-yu.github.io;https://hankook.github.io;https://sites.google.com/site/mijirim/", "dblp": "241/5923;287/4627;223/4393;31/7062", "google_scholar": "mZB2qfcAAAAJ;https://scholar.google.com/citations?hl=en;CgqswXUAAAAJ;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ", "orcid": "0000-0002-4058-5774;;;", "linkedin": "jongheonj/;;;", "or_profile": "~Jongheon_Jeong1;~Sihyun_Yu2;~Hankook_Lee1;~Jinwoo_Shin1", "aff": "Korea Advanced Institute of Science & Technology;NVIDIA;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;nvidia.com;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;Intern;Postdoc;Full Professor", "bibtex": "@misc{\njeong2023learning,\ntitle={Learning Robust Representations via Nuisance-extended Information Bottleneck},\nauthor={Jongheon Jeong and Sihyun Yu and Hankook Lee and Jinwoo Shin},\nyear={2023},\nurl={https://openreview.net/forum?id=G2AA1eB1vVE}\n}", "github": "", "project": "", "reviewers": "dPLo;dHs6;8gS6", "site": "https://openreview.net/forum?id=G2AA1eB1vVE", "pdf_size": 9248260, "recommendation": "5;5;5", "confidence": "4;4;5", "correctness": "2;4;2", "technical_novelty": "3;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "125;226;96", "wc_strength_and_weaknesses": "162;227;524", "wc_clarity_quality_novelty_and_reproducibility": "608;9;13", "wc_summary_review": "61;9;86", "wc_review": "956;471;719", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 149.0, 55.719535772174794 ], "wc_strength_and_weaknesses_avg": [ 304.3333333333333, 157.57819928178165 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 210.0, 281.433236606245 ], "wc_summary_review_avg": [ 52.0, 32.072833779799794 ], "wc_review_avg": [ 715.3333333333334, 198.0173954199198 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6AqQ6vAzc1UJ:scholar.google.com/&scioq=Learning+Robust+Representations+via+Nuisance-extended+Information+Bottleneck&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.kaist.ac.kr;https://www.nvidia.com", "aff_unique_abbr": "KAIST;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "South Korea;United States" }, { "id": "G2GpzH1l9AC", "title": "Learned Neural Network Representations are Spread Diffusely with Redundancy", "track": "main", "status": "Reject", "tldr": "We show that a randomly selected fraction of neurons from a pre-trained representation achieve similar performance as the full representation.", "abstract": "Representations learned by pre-training a neural network on a large dataset are increasingly used successfully to perform a variety of downstream tasks. In this work, we take a closer look at how features are encoded in such pre-trained representations. We find that learned representations in a given layer exhibit a degree of diffuse redundancy, ie, any randomly chosen subset of neurons in the layer that is larger than a threshold size shares a large degree of similarity with the full layer and is able to perform similarly as the whole layer on a variety of downstream tasks. For example, a linear probe trained on 20% of randomly picked neurons from a ResNet50 pre-trained on ImageNet1k achieves an accuracy within 5% of a linear probe trained on the full layer of neurons for downstream CIFAR10 classification. We conduct experiments on different neural architectures (including CNNs and Transformers) pre-trained on both ImageNet1k and ImageNet21k and evaluate a variety of downstream tasks taken from the VTAB benchmark. We find that the loss & dataset used during pre-training largely govern the degree of diffuse redundancy and the \"critical mass\" of neurons needed often depends on the downstream task, suggesting that there is a task-inherent sparsity-performance Pareto frontier. Our findings shed light on the nature of representations learned by pre-trained deep neural networks and suggest that entire layers might not be necessary to perform many downstream tasks. We investigate the potential for exploiting this redundancy to achieve efficient generalization for downstream tasks and also draw caution to certain possible unintended consequences.", "keywords": "representation learning;redundancy;transfer learning;fairness", "primary_area": "", "supplementary_material": "/attachment/3bcc030552858f623d31dc2793769c419ef051a0.zip", "author": "Vedant Nanda;Till Speicher;John P Dickerson;Soheil Feizi;Krishna Gummadi;Adrian Weller", "authorids": "~Vedant_Nanda2;~Till_Speicher1;~John_P_Dickerson1;~Soheil_Feizi2;~Krishna_Gummadi1;~Adrian_Weller1", "gender": "M;M;M;M;M;M", "homepage": "https://tillspeicher.com/;https://jpdickerson.com/;https://www.cs.umd.edu/~sfeizi/;http://mlg.eng.cam.ac.uk/adrian/;https://www.mpi-sws.org/~gummadi/;", "dblp": "144/7849;75/8479;57/2132;73/8324;g/PKrishnaGummadi;201/5458", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=QgDpfCQAAAAJ;lptAmrMAAAAJ;https://scholar.google.co.uk/citations?user=Ek4hM10AAAAJ;https://scholar.google.com.tw/citations?user=Bz3APTsAAAAJ;9GfPrCsAAAAJ", "orcid": "0009-0000-1172-2525;0000-0003-2231-680X;;;;", "linkedin": ";john-dickerson-83a74a7/;;;;", "or_profile": "~Till_Speicher1;~John_P_Dickerson1;~Soheil_Feizi2;~Adrian_Weller1;~Krishna_P._Gummadi1;~Vedant_Nanda1", "aff": "MPI-SWS;Optimized Markets, Inc;University of Maryland, College Park;University of Cambridge;MPI-SWS;Amazon", "aff_domain": "mpi-sws.org;optimizedmarkets.com;umd.edu;cam.ac.uk;mpi-sws.org;amazon.com", "position": "PhD student;Consultant;Associate Professor;Principal Researcher;Full Professor;Intern", "bibtex": "@misc{\nnanda2023learned,\ntitle={Learned Neural Network Representations are Spread Diffusely with Redundancy},\nauthor={Vedant Nanda and Till Speicher and John P Dickerson and Soheil Feizi and Krishna Gummadi and Adrian Weller},\nyear={2023},\nurl={https://openreview.net/forum?id=G2GpzH1l9AC}\n}", "github": "", "project": "", "reviewers": "tTBB;JaiD;9A8r", "site": "https://openreview.net/forum?id=G2GpzH1l9AC", "pdf_size": 1141751, "recommendation": "6;6;6", "confidence": "3;3;5", "correctness": "3;3;3", "technical_novelty": "3;2;3", "empirical_novelty": "2;3;2", "wc_summary_paper": "81;379;120", "wc_strength_and_weaknesses": "422;251;413", "wc_clarity_quality_novelty_and_reproducibility": "117;78;14", "wc_summary_review": "63;19;27", "wc_review": "683;727;574", "wc_reply_reviewers": "211;0;258", "wc_reply_authors": "559;948;897", "reply_reviewers": "1;0;3", "reply_authors": "2;2;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 193.33333333333334, 132.248083371955 ], "wc_strength_and_weaknesses_avg": [ 362.0, 78.57480512225277 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 69.66666666666667, 42.460439103816256 ], "wc_summary_review_avg": [ 36.333333333333336, 19.136933459209764 ], "wc_review_avg": [ 661.3333333333334, 64.31346843564124 ], "wc_reply_reviewers_avg": [ 156.33333333333334, 112.19724694582403 ], "wc_reply_authors_avg": [ 801.3333333333334, 172.61582262997277 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:tw35RcE1lwYJ:scholar.google.com/&scioq=Learned+Neural+Network+Representations+are+Spread+Diffusely+with+Redundancy&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;0;4", "aff_unique_norm": "Max Planck Institute for Software Systems;Optimized Markets, Inc;University of Maryland;University of Cambridge;Amazon", "aff_unique_dep": ";;;;Amazon.com, Inc.", "aff_unique_url": "https://www.mpi-sws.org;;https://www/umd.edu;https://www.cam.ac.uk;https://www.amazon.com", "aff_unique_abbr": "MPI-SWS;;UMD;Cambridge;Amazon", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";College Park;Cambridge", "aff_country_unique_index": "0;1;1;2;0;1", "aff_country_unique": "Germany;United States;United Kingdom" }, { "title": "Socratic Models: Composing Zero-Shot Multimodal Reasoning with Language", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12101", "id": "G2Q2Mh3avow", "poster": "/media/PosterPDFs/ICLR%202023/12101.png?t=1682439817.206241", "openreview": "https://openreview.net/forum?id=G2Q2Mh3avow", "slides": "https://iclr.cc/virtual/2023/poster/12101", "video": "https://iclr.cc/virtual/2023/poster/12101", "author_site": "Andy Zeng, Maria Attarian, brian ichter, Krzysztof Choromanski, Adrian Wong, Stefan Welker, Federico Tombari, Aveek Purohit, Michael Ryoo, Vikas Sindhwani, Johnny Lee, Vincent Vanhoucke, Pete Florence", "tldr": "We present a modular class of systems in which multiple pretrained models may be composed zero-shot via multimodal-informed prompt engineering to capture new multimodal capabilities, without additional finetuning.", "abstract": "We investigate how multimodal prompt engineering can use language as the intermediate representation to combine complementary knowledge from different pretrained (potentially multimodal) language models for a variety of tasks. This approach is both distinct from and complementary to the dominant paradigm of joint multimodal training. It also recalls a traditional systems-building view as in classical NLP pipelines, but with prompting large pretrained multimodal models. We refer to these as Socratic Models (SMs): a modular class of systems in which multiple pretrained models may be composed zero-shot via multimodal-informed prompting to capture new multimodal capabilities, without additional finetuning. We show that these systems provide competitive state-of-the-art performance for zero-shot image captioning and video-to-text retrieval, and also enable new applications such as (i) answering free-form questions about egocentric video, (ii) engaging in multimodal assistive dialogue with people (e.g., for cooking recipes), and (iii) robot perception and planning. We hope this work provides (a) results for stronger zero-shot baseline performance with analysis also highlighting their limitations, (b) new perspectives for building multimodal systems powered by large pretrained models, and (c) practical application advantages in certain regimes limited by data scarcity, training compute, or model access.", "keywords": "prompt engineering;multimodal applications;visual language models;large language models;commonsense reasoning", "primary_area": "", "supplementary_material": "", "author": "Andy Zeng;Maria Attarian;brian ichter;Krzysztof Marcin Choromanski;Adrian Wong;Stefan Welker;Federico Tombari;Aveek Purohit;Michael S Ryoo;Vikas Sindhwani;Johnny Lee;Vincent Vanhoucke;Pete Florence", "authorids": "~Andy_Zeng3;~Maria_Attarian1;~brian_ichter1;~Krzysztof_Marcin_Choromanski1;~Adrian_Wong1;swelker@google.com;~Federico_Tombari1;aveek@google.com;~Michael_S_Ryoo1;~Vikas_Sindhwani1;johnnylee@google.com;~Vincent_Vanhoucke1;~Pete_Florence1", "gender": ";F;;;;;M;;M;M;;M;", "homepage": ";https://jmattarian.com/;;;http://almostsquare.com/;;https://federicotombari.github.io/;;http://michaelryoo.com/;http://vikas.sindhwani.org;;http://vincent.vanhoucke.com;http://www.peteflorence.com/", "dblp": ";;;78/11411;;;16/3539;;r/MichaelSRyoo;26/4825;;69/7157;", "google_scholar": ";6Hk7QdkAAAAJ;-w5DuHgAAAAJ;;9MjZO8wAAAAJ;;TFsE4BIAAAAJ;;vcw0TJIAAAAJ;https://scholar.google.com/citations?hl=en;;T7uctwYAAAAJ;", "orcid": ";;;;;;0000-0001-5598-5212;;;;;0000-0003-0544-2791;", "linkedin": ";maria-attarian/;;;almostsquare;;fedet/;;;vikassindhwani;;vanhoucke;", "or_profile": "~Andy_Zeng3;~Maria_Attarian1;~brian_ichter1;~Krzysztof_Marcin_Choromanski1;~Adrian_Wong1;swelker@google.com;~Federico_Tombari1;aveek@google.com;~Michael_S_Ryoo1;~Vikas_Sindhwani1;johnnylee@google.com;~Vincent_Vanhoucke1;~Pete_Florence1", "aff": ";Google;Google;Google Brain Robotics & Columbia University;Google;;Technical University Munich (TUM);;Google DeepMind;Google;;Google;Google", "aff_domain": ";google.com;google.com;columbia.edu;google.com;;in.tum.de;;google.com;google.com;;google.com;google.com", "position": ";Researcher;Research Scientist;research scientist & adjunct assistant professor;Researcher;;Lecturer;;Research Scientist;Senior Staff Research Scientist;;Principal Scientist;Research Scientist", "bibtex": "@inproceedings{\nzeng2023socratic,\ntitle={Socratic Models: Composing Zero-Shot Multimodal Reasoning with Language},\nauthor={Andy Zeng and Maria Attarian and brian ichter and Krzysztof Marcin Choromanski and Adrian Wong and Stefan Welker and Federico Tombari and Aveek Purohit and Michael S Ryoo and Vikas Sindhwani and Johnny Lee and Vincent Vanhoucke and Pete Florence},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=G2Q2Mh3avow}\n}", "github": "", "project": "", "reviewers": "x3bb;NinX;J1Ki;3Nxe", "pdf_size": 5710987, "recommendation": "6;8;8;8", "confidence": "3;4;4;4", "correctness": "3;4;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "63;91;61;118", "wc_strength_and_weaknesses": "108;324;627;161", "wc_clarity_quality_novelty_and_reproducibility": "35;2;31;22", "wc_summary_review": "54;59;86;40", "wc_review": "260;476;805;341", "wc_reply_reviewers": "0;24;109;0", "wc_reply_authors": "469;341;1735;341", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;3;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 83.25, 23.306383245797704 ], "wc_strength_and_weaknesses_avg": [ 305.0, 202.23130321490785 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 22.5, 12.737739202856996 ], "wc_summary_review_avg": [ 59.75, 16.67895380412093 ], "wc_review_avg": [ 470.5, 207.96694448878168 ], "wc_reply_reviewers_avg": [ 33.25, 44.81838350498599 ], "wc_reply_authors_avg": [ 721.5, 587.4731908776774 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 13, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 559, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17485588102904105060&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=G2Q2Mh3avow", "email": ";google.com;google.com;columbia.edu;google.com;;in.tum.de;;google.com;google.com;;google.com;google.com", "author_num": 13, "aff_unique_index": "0;0;0;0;1;0;0;0;0", "aff_unique_norm": "Google;Technical University Munich", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.tum.de", "aff_unique_abbr": "Google;TUM", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;1;2;0;0;0", "aff_country_unique": "United States;Germany;United Kingdom" }, { "id": "G4ywctru8UX", "title": "TPC-NAS: Sub-Five-Minute Neural Architecture Search for Image Classification, Object-Detection, and Super-Resolution", "track": "main", "status": "Reject", "tldr": "", "abstract": "Neural network models have become more sophisticated with the explosive development of AI and its applications. Automating the model search process is essential to explore a full range of neural architectures for satisfactory performance. However, most current NAS algorithms consume significant time and computing resources, and many cater only to image classification applications. This paper proposes the total path count (TPC) score, which requires only simple calculation based on the architecture information, as an efficient accuracy predictor. TPC score is not only simple to come by but also very effective. The Kendall rank correlation coefficient of the TPC scores and the accuracies of 20 architectures for the CIFAR100 problem is as high as 0.87. This paper also proposes TPC-NAS, a zero-shot NAS method leveraging the novel TPC score. TPC-NAS requires no training and inference, and can complete a NAS task for Imagenet and other vision applications in less than five CPU minutes. Then, we apply TPC-NAS to image classification, object detection, and super-resolution applications for further validation. In image classification, TPC-NAS finds an architecture that achieves 76.4% top-1 accuracy in ImageNet with 355M FLOPs, outperforming other NAS solutions. Starting with yolov4-p5, TPC-NAS comes up with a high-performance model with at least 2% mAP improvement over other NAS algorithms\u2019 results in object detection. Finally, in the super-resolution application, TPC-NAS discovers a model with fewer than 300K parameters and generates images with 32.09dB PSNR in the Urban100 dataset. These three experiments convince us that the TPC-NAS method can swiftly deliver high-quality CNN architectures in diverse applications. The related source code is available at https://github.com/TPC-NAS/TPC.", "keywords": "NAS;Neural Architecture Search;Image Classification;Object Detection;Super Resolution", "primary_area": "", "supplementary_material": "/attachment/3bbe4addd875df5d101986130a93183104348faf.zip", "author": "Ming-shan Huang;Tzi-Dar Chiueh", "authorids": "~Ming-shan_Huang1;~Tzi-Dar_Chiueh1", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": ";", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@misc{\nhuang2023tpcnas,\ntitle={{TPC}-{NAS}: Sub-Five-Minute Neural Architecture Search for Image Classification, Object-Detection, and Super-Resolution},\nauthor={Ming-shan Huang and Tzi-Dar Chiueh},\nyear={2023},\nurl={https://openreview.net/forum?id=G4ywctru8UX}\n}", "github": "", "project": "", "reviewers": "F9pB;CdaL;3RDB;3zBH", "site": "https://openreview.net/forum?id=G4ywctru8UX", "pdf_size": 838435, "recommendation": "5;5;5;5", "confidence": "4;4;5;4", "correctness": "4;3;4;3", "technical_novelty": "3;3;1;3", "empirical_novelty": "3;3;2;2", "wc_summary_paper": "128;59;88;75", "wc_strength_and_weaknesses": "217;139;330;186", "wc_clarity_quality_novelty_and_reproducibility": "17;11;12;45", "wc_summary_review": "62;41;56;64", "wc_review": "424;250;486;370", "wc_reply_reviewers": "76;0;0;29", "wc_reply_authors": "772;319;1313;731", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;2;1", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 87.5, 25.53918557824427 ], "wc_strength_and_weaknesses_avg": [ 218.0, 70.37400088100719 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 21.25, 13.899190623917638 ], "wc_summary_review_avg": [ 55.75, 9.01041064547005 ], "wc_review_avg": [ 382.5, 86.81445732134712 ], "wc_reply_reviewers_avg": [ 26.25, 31.06746690671771 ], "wc_reply_authors_avg": [ 783.75, 353.2062959518134 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XQIePSJZE3IJ:scholar.google.com/&scioq=TPC-NAS:+Sub-Five-Minute+Neural+Architecture+Search+for+Image+Classification,+Object-Detection,+and+Super-Resolution&hl=en&as_sdt=0,5", "gs_version_total": 0 }, { "id": "G6-oxjbc_mK", "title": "Sharper Analysis of Sparsely Activated Wide Neural Networks with Trainable Biases", "track": "main", "status": "Reject", "tldr": "We study convergence and generalization of training one-hidden-layer neural networks with sparse activation. ", "abstract": "This work studies training one-hidden-layer overparameterized ReLU networks via gradient descent in the neural tangent kernel (NTK) regime, where, differently from the previous works, the networks' biases are trainable and are initialized to some constant rather than zero. The tantalizing benefit of such initialization is that the neural network will provably have sparse activation pattern before, during and after training, which can enable fast training procedures and, therefore, reduce the training cost. The first set of results of this work characterize the convergence of the network's gradient descent dynamics. The required width is provided to ensure gradient descent can drive the training error towards zero in linear rate. The contribution over previous work is that not only the bias is allowed to be updated by gradient descent under our setting but also a finer analysis is given such that the required width to ensure the network's closeness to its NTK is improved. Secondly, the networks' generalization bound after training is provided. A width-sparsity dependence is presented which yields sparsity-dependent localized Rademacher complexity and same-as-previous (ignoring logarithmic factors) generalization bound. Up to our knowledge, this is the first sparsity-dependent generalization result via localized Rademacher complexity. As a by-product, if the bias initialization is chosen to be zero, the width requirement improves the previous bound for the shallow networks' generalization. Lastly, since the generalization bound has dependence on the smallest eigenvalue of the limiting NTK and the bounds from previous works yield vacuous generalization, this work further studies the least eigenvalue of the limiting NTK. Surprisingly, while it is not shown that trainable biases are necessary, trainable bias helps to identify a nice data-dependent region where a much finer analysis of the NTK's smallest eigenvalue can be conducted, which leads to a much sharper lower bound than the previously known worst-case bound and, consequently, a non-vacuous generalization bound. Experimental evaluation is provided to evaluate our results. ", "keywords": "Convergence analysis;sparse activation;neural tangent kernel;Rademacher complexity;generalization bound", "primary_area": "", "supplementary_material": "/attachment/d529a5252837e51d1df3a720998cfaed949e40a9.zip", "author": "Hongru Yang;Ziyu Jiang;Ruizhe Zhang;Zhangyang Wang;Yingbin Liang", "authorids": "~Hongru_Yang1;~Ziyu_Jiang1;~Ruizhe_Zhang2;~Zhangyang_Wang1;~Yingbin_Liang1", "gender": "M;M;M;M;F", "homepage": ";https://geekjzy.github.io/;;https://vita-group.github.io;https://sites.google.com/view/yingbinliang/home", "dblp": "234/7562;232/9728;133/6407-1;119/4026;51/332", "google_scholar": "VwBcercAAAAJ;t5KUxs4AAAAJ;;pxFyKAIAAAAJ;lGgLAiIAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Hongru_Yang1;~Ziyu_Jiang1;~Ruizhe_Zhang2;~Zhangyang_Wang1;~Yingbin_Liang1", "aff": "University of Texas at Austin;Texas A&M;The University of Texas at Austin;University of Texas, Austin;The Ohio State University", "aff_domain": "utexas.edu;tamu.edu;utexas.edu;utexas.edu;osu.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Professor", "bibtex": "@misc{\nyang2023sharper,\ntitle={Sharper Analysis of Sparsely Activated Wide Neural Networks with Trainable Biases},\nauthor={Hongru Yang and Ziyu Jiang and Ruizhe Zhang and Zhangyang Wang and Yingbin Liang},\nyear={2023},\nurl={https://openreview.net/forum?id=G6-oxjbc_mK}\n}", "github": "", "project": "", "reviewers": "neAa;crhr;dzEj;D4Qh", "site": "https://openreview.net/forum?id=G6-oxjbc_mK", "pdf_size": 530512, "recommendation": "3;5;6;6", "confidence": "4;4;3;4", "correctness": "4;4;4;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "0;0;0;2", "wc_summary_paper": "33;82;119;43", "wc_strength_and_weaknesses": "661;234;505;289", "wc_clarity_quality_novelty_and_reproducibility": "307;7;71;5", "wc_summary_review": "84;11;47;46", "wc_review": "1085;334;742;383", "wc_reply_reviewers": "840;0;0;0", "wc_reply_authors": "3176;907;1028;913", "reply_reviewers": "3;0;0;0", "reply_authors": "6;2;2;2", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 0.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 69.25, 34.06152521540984 ], "wc_strength_and_weaknesses_avg": [ 422.25, 171.0575561032017 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 97.5, 123.83355764896686 ], "wc_summary_review_avg": [ 47.0, 25.816661286851172 ], "wc_review_avg": [ 636.0, 303.33562270198337 ], "wc_reply_reviewers_avg": [ 210.0, 363.73066958946424 ], "wc_reply_authors_avg": [ 1506.0, 965.3799770038738 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 3.0, 1.7320508075688772 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.4714045207910316, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12792627892904905891&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "University of Texas at Austin;Texas A&M University;Ohio State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utexas.edu;https://www.tamu.edu;https://www.osu.edu", "aff_unique_abbr": "UT Austin;TAMU;OSU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "G7E_K3WaLpK", "title": "Infusing Lattice Symmetry Priors in Neural Networks Using Soft Attention Masks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Infusing inductive biases and knowledge priors in artificial neural networks is a promising approach for achieving sample efficiency in current deep learning models. Core knowledge priors of human intelligence have been studied extensively in developmental science and recent work has postulated the idea that research on artificial intelligence should revolve around the same basic priors. As a step towards this direction, in this paper, we introduce LatFormer, a model that incorporates lattice geometry and topology priors in attention masks.\nOur study of the properties of these masks motivates a modification to the standard attention mechanism, where attention weights are scaled using soft attention masks generated by a convolutional neural network. Our experiments on ARC and on synthetic visual reasoning tasks show that LatFormer requires 2-orders of magnitude fewer data than standard attention and transformers in these tasks. Moreover, our results on ARC tasks that incorporate geometric priors provide preliminary evidence that deep learning can tackle this complex dataset, which is widely viewed as an important open challenge for AI research.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Mattia Atzeni;Mrinmaya Sachan;Andreas Loukas", "authorids": "~Mattia_Atzeni1;~Mrinmaya_Sachan3;~Andreas_Loukas1", "gender": ";M;M", "homepage": ";;https://sites.google.com/site/mrinsachan/", "dblp": "204/8455.html;19/10012;86/10440.html", "google_scholar": "GxcjDq0AAAAJ;https://scholar.google.ch/citations?user=-XGXJbQAAAAJ;Tpp9ZjoAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Mattia_Atzeni1;~Andreas_Loukas1;~MRINMAYA_SACHAN2", "aff": "Meta;Roche / Genentech;Swiss Federal Institute of Technology", "aff_domain": "meta.com;roche.com;ethz.ch", "position": "Intern;Principal Researcher;Assistant Professor", "bibtex": "@misc{\natzeni2023infusing,\ntitle={Infusing Lattice Symmetry Priors in Neural Networks Using Soft Attention Masks},\nauthor={Mattia Atzeni and Mrinmaya Sachan and Andreas Loukas},\nyear={2023},\nurl={https://openreview.net/forum?id=G7E_K3WaLpK}\n}", "github": "", "project": "", "reviewers": "vL2a;9teR;YP2T;ch2n;TgJP", "site": "https://openreview.net/forum?id=G7E_K3WaLpK", "pdf_size": 2051026, "recommendation": "5;6;6;6;6", "confidence": "3;4;4;3;2", "correctness": "2;3;4;3;3", "technical_novelty": "2;2;3;3;3", "empirical_novelty": "3;2;3;3;2", "wc_summary_paper": "48;64;75;53;97", "wc_strength_and_weaknesses": "92;84;174;147;164", "wc_clarity_quality_novelty_and_reproducibility": "13;19;27;25;91", "wc_summary_review": "24;8;30;54;123", "wc_review": "177;175;306;279;475", "wc_reply_reviewers": "0;16;23;34;0", "wc_reply_authors": "432;264;561;618;608", "reply_reviewers": "0;1;1;1;0", "reply_authors": "2;2;2;2;2", "recommendation_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.6, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_paper_avg": [ 67.4, 17.48828179095934 ], "wc_strength_and_weaknesses_avg": [ 132.2, 37.193547827546645 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.0, 28.42534080710379 ], "wc_summary_review_avg": [ 47.8, 40.400000000000006 ], "wc_review_avg": [ 282.4, 109.82458741101648 ], "wc_reply_reviewers_avg": [ 14.6, 13.230268326832983 ], "wc_reply_authors_avg": [ 496.6, 133.84408840139335 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.1336306209562122, "corr_recommendation_correctness": 0.790569415042095, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gm6k-bH4ypIJ:scholar.google.com/&scioq=Infusing+Lattice+Symmetry+Priors+in+Neural+Networks+Using+Soft+Attention+Masks&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Meta;Roche;Swiss Federal Institute of Technology", "aff_unique_dep": "Meta Platforms, Inc.;;", "aff_unique_url": "https://meta.com;https://www.roche.com;https://www.ethz.ch", "aff_unique_abbr": "Meta;Roche;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Switzerland" }, { "id": "G7_LoXdE2Oe", "title": "Video-based 3D Object Detection with Learnable Object-Centric Global Optimization", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We study utilizing long-term temporal visual correspondence-based optimization for video-based 3D object detection in this work. Visual correspondence refers to one-to-one mappings for pixels across multiple images. Correspondence-based optimization is the cornerstone for 3D scene reconstruction but is less studied in 3D object detection, for that moving objects violate multi-view geometry constraints and are treated as outliers during scene reconstruction. We resolve this issue by treating objects as first-class citizens during correspondence-based optimization. In this work, we propose BA-Det, an end-to-end optimizable object detector with object-centric temporal correspondence learning and object-centric featuremetric bundle adjustment. Empirically, we verify the effectiveness and efficiency of BA-Det for multiple baseline 3D detectors under various setups. Our BA-Det achieves SOTA performance on the large-scale Waymo Open Dataset (WOD) with only marginal computation cost. Codes will be released soon.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Jiawei He;Yuntao Chen;Naiyan Wang;Zhaoxiang Zhang", "authorids": "~Jiawei_He2;~Yuntao_Chen1;~Naiyan_Wang1;~Zhaoxiang_Zhang3", "gender": "M;M;M;M", "homepage": "https://jiaweihe.com/;;http://winsty.net;http://zhaoxiangzhang.net", "dblp": "172/2564-2;203/8284;31/9922;55/2285-1.html", "google_scholar": "35lEP_oAAAAJ;iLOoUqIAAAAJ;yAWtq6QAAAAJ;qxWfV6cAAAAJ", "orcid": "0000-0001-6872-3254;;;", "linkedin": ";;;", "or_profile": "~Jiawei_He2;~Yuntao_Chen1;~Naiyan_Wang1;~Zhaoxiang_Zhang3", "aff": "Institute of automation, Chinese Academy of Sciences;Centre for Artificial Intelligence and Robotics (CAIR), Hong Kong Institute of Science & Innovation, Chinese Academy of Sciences;Tusimple;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;cair-cas.org.hk;tusimple.ai;ia.ac.cn", "position": "PhD student;Assistant Professor;Chief Scientist;Full Professor", "bibtex": "@misc{\nhe2023videobased,\ntitle={Video-based 3D Object Detection with Learnable Object-Centric Global Optimization},\nauthor={Jiawei He and Yuntao Chen and Naiyan Wang and Zhaoxiang Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=G7_LoXdE2Oe}\n}", "github": "", "project": "", "reviewers": "pKfk;vy1X;sfte", "site": "https://openreview.net/forum?id=G7_LoXdE2Oe", "pdf_size": 1557334, "recommendation": "3;5;5", "confidence": "4;5;4", "correctness": "3;3;4", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;0", "wc_summary_paper": "42;91;101", "wc_strength_and_weaknesses": "66;430;557", "wc_clarity_quality_novelty_and_reproducibility": "52;66;116", "wc_summary_review": "37;104;41", "wc_review": "197;691;815", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 78.0, 25.78113005022601 ], "wc_strength_and_weaknesses_avg": [ 351.0, 208.08812235845338 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 78.0, 27.47119703738202 ], "wc_summary_review_avg": [ 60.666666666666664, 30.684777260973487 ], "wc_review_avg": [ 567.6666666666666, 266.9448548962043 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7rHIMePCBRAJ:scholar.google.com/&scioq=Video-based+3D+Object+Detection+with+Learnable+Object-Centric+Global+Optimization&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Chinese Academy of Sciences;TuSimple", "aff_unique_dep": "Institute of Automation;", "aff_unique_url": "http://www.ia.cas.cn;https://www.tusimple.com", "aff_unique_abbr": "CAS;Tusimple", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "G7iioWGldQ7", "title": "To be robust and to be fair: aligning fairness with robustness", "track": "main", "status": "Withdraw", "tldr": "bridging adversarial robustness of fairness and accuracy in a unified framework", "abstract": "Adversarial training has been shown to be reliable in improving robustness against adversarial samples. However, the problem of adversarial training in terms of fairness has not yet been properly studied, and the relationship between fairness and accuracy attack still remains unclear. Can we simultaneously improve robustness w.r.t. both fairness and accuracy? To tackle this topic, in this paper, we study the problem of adversarial training and adversarial attacks w.r.t. both metrics. We propose a unified structure for fairness attack which bring together common notions in group fairness, and we theoretically prove the equivalence of fairness attacks against different notions. We show the alignment of fairness and accuracy attack in disadvantaged subgroups, and we theoretically demonstrate that robustness of samples w.r.t. adversarial attack against one metric also benefit from robustness of samples w.r.t. adversarial attack against the other metric. Our work unifies adversarial training and attack w.r.t. fairness and accuracy, where both metrics benefit from robustness of the other metric under adversarial attack. Our study suggests a novel way to incorporate adversarial training with fairness, and experimental results show that our proposed method achieves better performance in terms of robustness w.r.t. both fairness and accuracy.", "keywords": "fairness;adversarial robustness", "primary_area": "", "supplementary_material": "/attachment/aa8776590a3a288d6f98143b8646b03cae41b841.zip", "author": "Junyi Chai;Xiaoqian Wang", "authorids": "~Junyi_Chai1;~Xiaoqian_Wang1", "gender": "M;F", "homepage": ";https://engineering.purdue.edu/~joywang/", "dblp": "323/9078;151/3215-1", "google_scholar": "fucMzpYAAAAJ;I3tc214AAAAJ", "orcid": "0000-0002-4324-5361;", "linkedin": "junyi-chai-260869256/?trk=opento_sprofile_details;", "or_profile": "~Junyi_Chai1;~Xiaoqian_Wang1", "aff": "Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nchai2023to,\ntitle={To be robust and to be fair: aligning fairness with robustness},\nauthor={Junyi Chai and Xiaoqian Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=G7iioWGldQ7}\n}", "github": "", "project": "", "reviewers": "6HVC;yzTJ;nBZh;Ew9r", "site": "https://openreview.net/forum?id=G7iioWGldQ7", "pdf_size": 1276298, "recommendation": "3;3;3;8", "confidence": "4;3;3;3", "correctness": "3;3;2;4", "technical_novelty": "3;3;2;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "97;58;46;42", "wc_strength_and_weaknesses": "354;504;178;89", "wc_clarity_quality_novelty_and_reproducibility": "30;234;1;28", "wc_summary_review": "47;42;2;19", "wc_review": "528;838;227;178", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 2.165063509461097 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 60.75, 21.741377601246892 ], "wc_strength_and_weaknesses_avg": [ 281.25, 160.10211585110298 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.25, 93.5130338509023 ], "wc_summary_review_avg": [ 27.5, 18.117670931993437 ], "wc_review_avg": [ 442.75, 264.6368974651872 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11176485612929684989&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "GAGpLgWAWX", "title": "Factors Influencing Generalization in Chaotic Dynamical Systems", "track": "main", "status": "Reject", "tldr": "We explore factors influencing in- and out-of-distribution generalisation in forecasting chaotic dynamics.", "abstract": "Many real-world systems exhibit chaotic behaviour, for example: weather, fluid dynamics, stock markets, natural ecosystems, and disease transmission. While chaotic systems are often thought to be completely unpredictable, in fact there are patterns within and across that experts frequently describe and contrast qualitatively. We hypothesize that given the right supervision / task definition, representation learning systems will be able to pick up on these patterns, and successfully generalize both in- and out-of-distribution (OOD).\nThus, this work explores and identifies key factors which lead to good generalization. We observe a variety of interesting phenomena, including: learned representations transfer much better when fine-tuned vs. frozen; forecasting appears to be the best pre-training task; OOD robustness falls off very quickly outside the training distribution; recurrent architectures generally outperform others on OOD generalization.\nOur findings are of interest to any domain of prediction where chaotic dynamics play a role.", "keywords": "generalization;multi-task learning;chaos;dynamical systems;representation learning", "primary_area": "", "supplementary_material": "", "author": "Lu\u00e3 Streit;Vikram Voleti;Tegan Maharaj", "authorids": "~Lu\u00e3_Streit1;~Vikram_Voleti1;~Tegan_Maharaj1", "gender": ";M;F", "homepage": ";https://voletiv.github.io;http://teganmaharaj.com", "dblp": ";243/6609;", "google_scholar": ";PPCRqZUAAAAJ;https://scholar.google.ca/citations?user=XpscC-EAAAAJ", "orcid": ";;", "linkedin": "lua-streit/;vikram-voleti-45372222;", "or_profile": "~Lu\u00e3_Streit1;~Vikram_Voleti1;~Tegan_Maharaj1", "aff": ";Meta;Ecole Polytechnique de Montreal", "aff_domain": ";meta.com;polymtl.ca", "position": ";Intern;PhD student", "bibtex": "@misc{\nstreit2023factors,\ntitle={Factors Influencing Generalization in Chaotic Dynamical Systems},\nauthor={Lu{\\~a} Streit and Vikram Voleti and Tegan Maharaj},\nyear={2023},\nurl={https://openreview.net/forum?id=GAGpLgWAWX}\n}", "github": "", "project": "", "reviewers": "k5uo;KoFr;hEVM", "site": "https://openreview.net/forum?id=GAGpLgWAWX", "pdf_size": 11781499, "recommendation": "3;3;5", "confidence": "5;4;3", "correctness": "2;2;2", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "39;49;45", "wc_strength_and_weaknesses": "603;34;136", "wc_clarity_quality_novelty_and_reproducibility": "89;643;8", "wc_summary_review": "75;29;15", "wc_review": "806;755;204", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 2.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 44.333333333333336, 4.109609335312651 ], "wc_strength_and_weaknesses_avg": [ 257.6666666666667, 247.71264714494404 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 246.66666666666666, 282.19418058414243 ], "wc_summary_review_avg": [ 39.666666666666664, 25.629843715654783 ], "wc_review_avg": [ 588.3333333333334, 272.56110426022923 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:D1Fy2Vz1d4cJ:scholar.google.com/&scioq=Factors+Influencing+Generalization+in+Chaotic+Dynamical+Systems&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Meta;Ecole Polytechnique de Montreal", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.polymtl.ca", "aff_unique_abbr": "Meta;Polytechnique Montreal", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Canada" }, { "id": "GBU1mm8_WkV", "title": "Semi-Autoregressive Energy Flows: Towards Determinant-Free Training of Normalizing Flows", "track": "main", "status": "Reject", "tldr": "", "abstract": "Normalizing flows are a popular approach for constructing probabilistic and generative models. However, maximum likelihood training of flows is challenging due to the need to calculate computationally expensive determinants of Jacobians. This paper takes steps towards addressing this challenge by introducing objectives and model architectures for determinant-free training of flows. Central to our framework is the energy objective, a multidimensional extension of proper scoring rules that admits efficient estimators based on random projections. The energy objective does not require calculating determinants and therefore supports general flow architectures that are not well-suited to maximum likelihood training. In particular, we introduce semi-autoregressive flows, an architecture that can be trained with the energy loss, and that interpolates between fully autoregressive and non-autoregressive models, capturing the benefits of both. We empirically demonstrate that energy flows achieve competitive generative modeling performance while maintaining fast generation and posterior inference.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Phillip Si;Zeyi Chen;Subham Sekhar Sahoo;Yair Schiff;Volodymyr Kuleshov", "authorids": "~Phillip_Si1;~Zeyi_Chen1;~Subham_Sekhar_Sahoo1;~Yair_Schiff1;~Volodymyr_Kuleshov1", "gender": "M;M;M;M;", "homepage": "https://sites.google.com/view/psi6;;;https://github.com/yair-schiff;https://www.cs.cornell.edu/~kuleshov/", "dblp": ";;;;81/8612", "google_scholar": ";;Z7DoDbAAAAAJ;GhFrOdQAAAAJ;RY_t8XAAAAAJ", "orcid": ";;;;", "linkedin": ";zeyi-chen-a429a2218;shakeh3r/;yair-schiff;", "or_profile": "~Phillip_Si1;~Zeyi_Chen1;~Subham_Sekhar_Sahoo1;~Yair_Schiff1;~Volodymyr_Kuleshov1", "aff": "Carnegie Mellon University;Tsinghua University;Department of Computer Science, Cornell University;Department of Computer Science, Cornell University;Cornell University", "aff_domain": "cmu.edu;mails.tsinghua.edu.cn;cs.cornell.edu;cs.cornell.edu;cornell.edu", "position": "MS student;Undergrad student;PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nsi2023semiautoregressive,\ntitle={Semi-Autoregressive Energy Flows: Towards Determinant-Free Training of Normalizing Flows},\nauthor={Phillip Si and Zeyi Chen and Subham Sekhar Sahoo and Yair Schiff and Volodymyr Kuleshov},\nyear={2023},\nurl={https://openreview.net/forum?id=GBU1mm8_WkV}\n}", "github": "", "project": "", "reviewers": "B3hX;WiBH;gK13;Ljxv", "site": "https://openreview.net/forum?id=GBU1mm8_WkV", "pdf_size": 911529, "recommendation": "3;3;5;6", "confidence": "3;4;3;4", "correctness": "3;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "9;105;92;144", "wc_strength_and_weaknesses": "212;121;424;237", "wc_clarity_quality_novelty_and_reproducibility": "197;110;4;39", "wc_summary_review": "68;329;28;52", "wc_review": "486;665;548;472", "wc_reply_reviewers": "0;76;0;0", "wc_reply_authors": "848;439;463;536", "reply_reviewers": "0;1;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 87.5, 49.19603642571219 ], "wc_strength_and_weaknesses_avg": [ 248.5, 110.13741416975432 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 87.5, 73.8596642288604 ], "wc_summary_review_avg": [ 119.25, 121.933127164032 ], "wc_review_avg": [ 542.75, 76.15567936798936 ], "wc_reply_reviewers_avg": [ 19.0, 32.90896534380867 ], "wc_reply_authors_avg": [ 571.5, 163.5856045011296 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.19245008972987526, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TTpEKNdlvZQJ:scholar.google.com/&scioq=Semi-Autoregressive+Energy+Flows:+Towards+Determinant-Free+Training+of+Normalizing+Flows&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "Carnegie Mellon University;Tsinghua University;Cornell University", "aff_unique_dep": ";;Department of Computer Science", "aff_unique_url": "https://www.cmu.edu;https://www.tsinghua.edu.cn;https://www.cornell.edu", "aff_unique_abbr": "CMU;THU;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;China" }, { "id": "GC5MsCxrU-", "title": "Continual Active Learning", "track": "main", "status": "Reject", "tldr": "We reduce Active Learning (AL) training time with the help of replay based Continual Learning algorithms all while maintaining performance on par with standard AL. ", "abstract": "While active learning (AL) improves the labeling efficiency of machine learning (by allowing models to query the labels of data samples), a major problem is that compute efficiency is decreased since models are typically retrained from scratch at each query round. In this work, we develop a new framework that circumvents this problem by biasing further training towards the recently labeled sets, thereby complementing existing work on AL acceleration. We employ existing and novel replay-based Continual Learning (CL) algorithms that are effective at quickly learning new samples without forgetting previously learned information, especially when data comes from a shifting or evolving distribution. We call this compute-efficient active learning paradigm $\\textit{``Continual Active Learning\" (CAL)}$. We demonstrate that standard AL with warm starting fails, both to accelerate training, and that naive fine-tuning suffers from catastrophic forgetting due to distribution shifts over query rounds. We then show CAL achieves significant speedups using a plethora of replay schemes that use model distillation, and that select diverse/uncertain points from the history, all while maintaining performance on par with standard AL. We conduct experiments across many data domains, including natural language, vision, medical imaging, and computational biology, each with very different neural architectures (Transformers/CNNs/MLPs). CAL consistently provides a 2-6x reduction in training time, thus showing its applicability across differing modalities.", "keywords": "Active Learning;Deep Learning;Efficient Machine Learning;Continual Learning", "primary_area": "", "supplementary_material": "", "author": "Arnav Mohanty Das;Gantavya Bhatt;Megh Manoj Bhalerao;Vianne R. Gao;Rui Yang;Jeff Bilmes", "authorids": "~Arnav_Mohanty_Das1;~Gantavya_Bhatt1;~Megh_Manoj_Bhalerao1;~Vianne_R._Gao1;~Rui_Yang14;~Jeff_Bilmes1", "gender": "M;M;M;F;M;F", "homepage": ";https://sites.google.com/view/gbhatt/;https://meghbhalerao.github.io/;https://sites.google.com/view/ruiyang-compbio/home;http://melodi.ee.washington.edu/people/bilmes;", "dblp": "263/7747;265/5828;;;b/JeffABilmes;294/8646", "google_scholar": "rnRml4EAAAAJ;A18gBf4AAAAJ;7dzVm94AAAAJ;ZbOR7q8AAAAJ;L9QufAsAAAAJ;", "orcid": ";;;;0000-0002-7372-8778;", "linkedin": "arnavdas/;bhattgantavya/;megh-bhalerao-572b29130/;;jbilmes/;vianne-gao-40820513a/", "or_profile": "~Arnav_Mohanty_Das1;~Gantavya_Bhatt1;~Megh_Manoj_Bhalerao1;~Rui_Yang14;~Jeff_Bilmes1;~Vianne_Gao1", "aff": "University of Washington;University of Washington, Seattle;University of Washington, Seattle;Memorial Sloan Kettering Cancer Centre;University of Washington, Seattle;Memorial Sloan Kettering Cancer Centre", "aff_domain": "uw.edu;uw.edu;uw.edu;mskcc.org;uw.edu;mskcc.org", "position": "PhD student;Graduate Student;MS student;PhD student;Full Professor;PhD student", "bibtex": "@misc{\ndas2023continual,\ntitle={Continual Active Learning},\nauthor={Arnav Mohanty Das and Gantavya Bhatt and Megh Manoj Bhalerao and Vianne R. Gao and Rui Yang and Jeff Bilmes},\nyear={2023},\nurl={https://openreview.net/forum?id=GC5MsCxrU-}\n}", "github": "", "project": "", "reviewers": "wt23;nCXE;HMvu;wxxe", "site": "https://openreview.net/forum?id=GC5MsCxrU-", "pdf_size": 644762, "recommendation": "1;3;5;6", "confidence": "2;4;4;4", "correctness": "1;3;2;3", "technical_novelty": "2;2;3;4", "empirical_novelty": "1;2;2;4", "wc_summary_paper": "75;105;47;105", "wc_strength_and_weaknesses": "50;411;254;328", "wc_clarity_quality_novelty_and_reproducibility": "196;4;64;87", "wc_summary_review": "7;32;48;109", "wc_review": "328;552;413;629", "wc_reply_reviewers": "0;0;91;136", "wc_reply_authors": "264;860;746;440", "reply_reviewers": "0;0;1;1", "reply_authors": "1;2;2;1", "recommendation_avg": [ 3.75, 1.920286436967152 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_paper_avg": [ 83.0, 24.124676163629637 ], "wc_strength_and_weaknesses_avg": [ 260.75, 133.75233642819103 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 87.75, 69.45637119804057 ], "wc_summary_review_avg": [ 49.0, 37.59654239421492 ], "wc_review_avg": [ 480.5, 117.23587334941469 ], "wc_reply_reviewers_avg": [ 56.75, 58.93799708167898 ], "wc_reply_authors_avg": [ 577.5, 237.3747037912844 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.8268106308031117, "corr_recommendation_correctness": 0.6673083711820306, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1618434847150794629&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0;1", "aff_unique_norm": "University of Washington;Memorial Sloan Kettering Cancer Center", "aff_unique_dep": ";", "aff_unique_url": "https://www.washington.edu;https://www.mskcc.org", "aff_unique_abbr": "UW;MSKCC", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "GCF6ZOA6Npk", "title": "Learning Rotation-Equivariant Features for Visual Correspondence", "track": "main", "status": "Withdraw", "tldr": "We introduce a self-supervised learning framework to yield discriminative rotation-invariant descriptors using local features extracted from group-equivariant CNNs for the task of visual correspondence.", "abstract": "Extracting discriminative local features that are invariant to imaging variations is an integral part of establishing correspondences between images. In this work, we introduce a self-supervised learning framework to extract discriminative rotation-invariant descriptors using group-equivariant CNNs. Thanks to employing group-equivariant CNNs, our method effectively learns to obtain rotation-equivariant features and their orientations explicitly, without having to perform sophisticated data augmentations. The resultant features and their orientations are further processed by group aligning, a novel invariant mapping technique that shifts the group-equivariant features by their orientations along the group dimension. Our group aligning technique achieves rotation-invariance without any collapse of the group dimension and thus eschews loss of discriminability. The proposed method is trained end-to-end in a self-supervised manner, where we use an orientation alignment loss for the orientation estimation and a contrastive descriptor loss for robust local descriptors to geometric/photometric variations. Our method demonstrates state-of-the-art matching accuracy among existing rotation-invariant descriptors under varying rotation and also show competitive results when transferred to the task of keypoint matching and camera pose estimation.", "keywords": "visual correspondence;equivariant representation learning;deep local feature extraction;self-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Jongmin Lee;Byungjin Kim;Seungwook Kim;Minsu Cho", "authorids": "~Jongmin_Lee2;~Byungjin_Kim1;~Seungwook_Kim2;~Minsu_Cho1", "gender": "M;M;M;M", "homepage": "https://bluedream1121.github.io/;https://github.com/kbjpc123/;http://cvlab.postech.ac.kr/~mcho/;https://wookiekim.github.io", "dblp": "68/222-5;;;07/10150-5", "google_scholar": "https://scholar.google.co.kr/citations?user=WVVqJX8AAAAJ;;5TyoF5QAAAAJ;kZ4AN54AAAAJ", "orcid": ";;;", "linkedin": ";;minsu-cho-062b3750/;seung-wook-kim-77b9bb117/", "or_profile": "~Jongmin_Lee2;~Byungjin_Kim1;~Minsu_Cho1;~Seung_Wook_Kim2", "aff": "POSTECH;POSTECH;POSTECH;ByteDance", "aff_domain": "postech.ac.kr;postech.ac.kr;postech.ac.kr;bytedance.com", "position": "PhD student;MS student;Associate Professor;Intern", "bibtex": "@misc{\nlee2023learning,\ntitle={Learning Rotation-Equivariant Features for Visual Correspondence},\nauthor={Jongmin Lee and Byungjin Kim and Seungwook Kim and Minsu Cho},\nyear={2023},\nurl={https://openreview.net/forum?id=GCF6ZOA6Npk}\n}", "github": "", "project": "", "reviewers": "yYDa;rFQg;vLq5;gFuX", "site": "https://openreview.net/forum?id=GCF6ZOA6Npk", "pdf_size": 12116931, "recommendation": "3;3;5;5", "confidence": "5;4;4;5", "correctness": "3;3;2;4", "technical_novelty": "2;2;1;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "65;178;244;53", "wc_strength_and_weaknesses": "399;399;616;9", "wc_clarity_quality_novelty_and_reproducibility": "327;38;157;48", "wc_summary_review": "120;48;163;351", "wc_review": "911;663;1180;461", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 135.0, 79.61469713564199 ], "wc_strength_and_weaknesses_avg": [ 355.75, 218.92164694246205 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 142.5, 116.29810832511421 ], "wc_summary_review_avg": [ 170.5, 112.01897160749157 ], "wc_review_avg": [ 803.75, 269.4228785756696 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11365347038207862874&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Pohang University of Science and Technology;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "https://www.postech.ac.kr;https://www.bytedance.com", "aff_unique_abbr": "POSTECH;ByteDance", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pohang;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "South Korea;China" }, { "id": "GF4A49QlqjN", "title": "SuperWeight Ensembles: Automated Compositional Parameter Sharing Across Diverse Architechtures", "track": "main", "status": "Reject", "tldr": " A novel efficient ensembling technique for ensembling models of different architechtures; enabling anytime inference", "abstract": "Neural net ensembles boost task performance, but have excessive storage requirements. Recent work in efficient ensembling has made the memory cost more tractable by sharing learned parameters between ensemble members. Existing efficient ensembles have high predictive accuracy, but they are overly restrictive in two ways: 1) They constrain ensemble members to have the same architecture, limiting their usefulness in applications such as anytime inference, and 2) They reduce the parameter count for a small predictive performance penalty, but do not provide an easy way to trade-off parameter count for predictive performance without increasing inference time. In this paper, we propose SuperWeight Ensembles, an approach for architecture-agnostic parameter sharing. SuperWeight Ensembles share parameters between layers which have sufficiently similar computation, even if they have different shapes. This allows anytime prediction of heterogeneous ensembles by selecting a subset of members during inference, which is a flexibility not supported by prior work. In addition, SuperWeight Ensembles provide control over the total number of parameters used, allowing us to increase or decrease the number of parameters without changing model architecture. On the anytime prediction task, our method shows a consistent boost over prior work while allowing for more flexibility in architectures and efficient parameter sharing. SuperWeight Ensembles preserve the performance of prior work in the low-parameter regime, and even outperform fully-parameterized ensembles with 17% fewer parameters on CIFAR-100 and 50% fewer parameters on ImageNet.", "keywords": "efficent ensembles;anytime inference", "primary_area": "", "supplementary_material": "/attachment/993a0d7a0fcbe70c78ce67ce0daa6bf4cda8feea.zip", "author": "Piotr Teterwak;Soren Nelson;Nikoli Dryden;Dina Bashkirova;Kate Saenko;Bryan A. Plummer", "authorids": "~Piotr_Teterwak1;~Soren_Nelson1;~Nikoli_Dryden1;~Dina_Bashkirova1;~Kate_Saenko1;~Bryan_A._Plummer1", "gender": "M;M;;F;F;M", "homepage": "https://scholar.google.com/citations?user=lUkd1AMAAAAJ&hl=en&oi=ao;https://sorennelson.github.io;https://ndryden.com;https://cs-people.bu.edu/dbash/;http://ai.bu.edu;http://bryanplummer.com/", "dblp": "247/6128;;148/1273;;88/2754;163/2330", "google_scholar": "lUkd1AMAAAAJ;oIJ3jbgAAAAJ;nRhl3Q4AAAAJ;qvUTYsUAAAAJ;https://scholar.google.com.tw/citations?user=9xDADY4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0003-3557-3215;0000-0002-5704-7614;", "linkedin": ";nelsonsoren/;;;;", "or_profile": "~Piotr_Teterwak1;~Soren_Nelson1;~Nikoli_Dryden1;~Dina_Bashkirova1;~Kate_Saenko1;~Bryan_Allen_Plummer1", "aff": "Boston University;;Lawrence Livermore National Labs;Boston University;Boston University, Boston University;Boston University", "aff_domain": "bu.edu;;llnl.gov;bu.edu;bu.edu;bu.edu", "position": "PhD student;;Researcher;PhD student;Full Professor;Assistant Professor", "bibtex": "@misc{\nteterwak2023superweight,\ntitle={SuperWeight Ensembles: Automated Compositional Parameter Sharing Across Diverse Architechtures},\nauthor={Piotr Teterwak and Soren Nelson and Nikoli Dryden and Dina Bashkirova and Kate Saenko and Bryan A. Plummer},\nyear={2023},\nurl={https://openreview.net/forum?id=GF4A49QlqjN}\n}", "github": "", "project": "", "reviewers": "Qjhr;d45p;owxX", "site": "https://openreview.net/forum?id=GF4A49QlqjN", "pdf_size": 1097851, "recommendation": "5;5;6", "confidence": "3;2;2", "correctness": "3;3;4", "technical_novelty": "2;2;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "79;24;77", "wc_strength_and_weaknesses": "289;110;209", "wc_clarity_quality_novelty_and_reproducibility": "11;21;147", "wc_summary_review": "46;21;19", "wc_review": "425;176;452", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 60.0, 25.468935326524086 ], "wc_strength_and_weaknesses_avg": [ 202.66666666666666, 73.2135385172867 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.666666666666664, 61.88878914813427 ], "wc_summary_review_avg": [ 28.666666666666668, 12.283683848458853 ], "wc_review_avg": [ 351.0, 124.23365083583433 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:b0gZSuT_vbMJ:scholar.google.com/&scioq=SuperWeight+Ensembles:+Automated+Compositional+Parameter+Sharing+Across+Diverse+Architechtures&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Boston University;Lawrence Livermore National Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.bu.edu;https://www.llnl.gov", "aff_unique_abbr": "BU;LLNL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boston", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "GG0sigkMnxF", "title": "URVoice: An Akl-Toussaint/ Graham- Sklansky Approach towards Convex Hull Computation for Sign Language Interpretation", "track": "main", "status": "Reject", "tldr": " We present URVoice, a vocalizer for the communication impaired, based on the Indian Sign Language Notations and a real time translation of gesture to text/voice using convex hull as the computational geometry.", "abstract": "We present URVoice, a vocalizer for the communication impaired, based on the Indian Sign Language Notations. Contemporary psychological theories consider language and speech as devices to understand complex psychological processes and deliver them as cultural products of ideas and communication. Sign and gesture language, offering an intelligent co-ordination of eye-and-hand and ear-and-mouth, has evolved as an intelligent manifestation of speech for the impaired. However, they have very limited modality and iconicity in accommodating a greater range of linguistically relevant meanings. URVoice is an Augmentative and Alternative Communication (AAC) device, which currently features a pipeline of forward communication from signer to collocutor with a novel approach shouldered on convex hull using vision-based approach. The solution achieves real time translation of gesture to text/voice using convex hull as the computational geometry, which follows Akl-Toussaint heuristic and Graham-Sklansky scan algorithms. The results are weighed against our other solutions based on conventional Machine Learning and Deep Learning approaches. A futuristic version of URVoice, with voice translated to sign language gestures, will be a complete solution for effectively bridging the cognitive and communication gap between the impaired and the abled lot.", "keywords": "Communication disorder;computational geometry;convex hull;sign language;URVoice;vocalizer;computer vision;deep learning", "primary_area": "", "supplementary_material": "/attachment/98ee6b6e3c14a81b77c3a5360dab2c90ffcc4bff.zip", "author": "Madhumitha V;Santhi Natarajan;Bharathi Malarkeddy A", "authorids": "~Madhumitha_V1;santhinatarajan@snuchennai.edu.in;bharathi_m@bmsit.in", "gender": ";;", "homepage": ";;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Madhumitha_V1;santhinatarajan@snuchennai.edu.in;bharathi_m@bmsit.in", "aff": ";;", "aff_domain": ";;", "position": ";;", "bibtex": "@misc{\nv2023urvoice,\ntitle={{URV}oice: An Akl-Toussaint/ Graham- Sklansky Approach towards Convex Hull Computation for Sign Language Interpretation},\nauthor={Madhumitha V and Santhi Natarajan and Bharathi Malarkeddy A},\nyear={2023},\nurl={https://openreview.net/forum?id=GG0sigkMnxF}\n}", "github": "", "project": "", "reviewers": "jVQ2;eit5;aLeh;SEnB", "site": "https://openreview.net/forum?id=GG0sigkMnxF", "pdf_size": 1176943, "recommendation": "1;1;1;3", "confidence": "5;5;4;5", "correctness": "1;3;1;2", "technical_novelty": "1;1;3;2", "empirical_novelty": "0;1;3;2", "wc_summary_paper": "29;31;40;20", "wc_strength_and_weaknesses": "136;81;110;394", "wc_clarity_quality_novelty_and_reproducibility": "23;24;151;7", "wc_summary_review": "32;35;39;37", "wc_review": "220;171;340;458", "wc_reply_reviewers": "0;0;13;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 1.5, 0.8660254037844386 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "correctness_avg": [ 1.75, 0.82915619758885 ], "technical_novelty_avg": [ 1.75, 0.82915619758885 ], "empirical_novelty_avg": [ 1.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 30.0, 7.106335201775948 ], "wc_strength_and_weaknesses_avg": [ 180.25, 124.9327319000109 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.25, 57.984372894772264 ], "wc_summary_review_avg": [ 35.75, 2.5860201081971503 ], "wc_review_avg": [ 297.25, 111.32693968667243 ], "wc_reply_reviewers_avg": [ 3.25, 5.629165124598851 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_b9Vkp3wS3gJ:scholar.google.com/&scioq=URVoice:+An+Akl-Toussaint/+Graham-+Sklansky+Approach+towards+Convex+Hull+Computation+for+Sign+Language+Interpretation&hl=en&as_sdt=0,5", "gs_version_total": 0 }, { "id": "GGItImF9oG5", "title": "Scaling Laws vs Model Architectures: How does Inductive Bias Influence Scaling?", "track": "main", "status": "Reject", "tldr": "Your model is pretty cool, but does it scale? Let's find out. ", "abstract": "There have been a lot of interest in the scaling properties of Transformer models \\citep{kaplan2020scaling}. However, not much has been done on the front of investigating the effect of scaling properties of different inductive biases and model architectures. Do model architectures scale differently? If so, how does inductive bias affect scaling behaviour? How does this influence upstream (pretraining) and downstream (transfer)? This paper conducts a systematic study of scaling behaviour of ten diverse model architectures such as Transformers, Switch Transformers, Universal Transformers, Dynamic convolutions, Performers, and recently proposed MLP-Mixers. Via extensive experiments, we show that (1) architecture is an indeed an important consideration when performing scaling and (2) the best performing model can fluctuate at different scales. We believe that the findings outlined in this work has significant implications to how model architectures are currently evaluated in the community.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yi Tay;Mostafa Dehghani;Samira Abnar;Hyung Won Chung;William Fedus;Jinfeng Rao;Sharan Narang;Vinh Q. Tran;Dani Yogatama;Donald Metzler", "authorids": "~Yi_Tay1;~Mostafa_Dehghani1;~Samira_Abnar1;~Hyung_Won_Chung1;~William_Fedus2;~Jinfeng_Rao2;~Sharan_Narang1;~Vinh_Q._Tran1;~Dani_Yogatama2;~Donald_Metzler1", "gender": "M;M;Unspecified;M;;;M;M;;M", "homepage": "http://yitay.net;http://mostafadehghani.com/;https://samiraabnar.github.io/;;;;;https://vqtran.github.io;;https://research.google/people/DonaldMetzler/", "dblp": ";125/4062;150/5405;;;;;77/2885-2.html;;95/2272", "google_scholar": "VBclY_cAAAAJ;https://scholar.google.nl/citations?user=MiHOX3QAAAAJ;https://scholar.google.nl/citations?user=jbxwjgMAAAAJ;1CAlXvYAAAAJ;;;CWOixywAAAAJ;ot3WsOwAAAAJ;;bmXpOd8AAAAJ", "orcid": ";;;;;;;;;0000-0003-4276-6269", "linkedin": ";;;;;;;vinh-tran-32597468/;;donmetzler/", "or_profile": "~Yi_Tay1;~Mostafa_Dehghani1;~Samira_Abnar1;~Hyung_Won_Chung1;~William_Fedus2;~Jinfeng_Rao2;~Sharan_Narang1;~Vinh_Q._Tran1;~Dani_Yogatama2;~Donald_Metzler1", "aff": "Google;Google DeepMind;Apple;Google Brain;;;Meta;Google;;Google", "aff_domain": "google.com;google.com;apple.com;google.com;;;meta.com;google.com;;google.com", "position": "Research Scientist;Research Scientist;Researcher;Researcher;;;Researcher;Researcher;;Research Scientist", "bibtex": "@misc{\ntay2023scaling,\ntitle={Scaling Laws vs Model Architectures: How does Inductive Bias Influence Scaling?},\nauthor={Yi Tay and Mostafa Dehghani and Samira Abnar and Hyung Won Chung and William Fedus and Jinfeng Rao and Sharan Narang and Vinh Q. Tran and Dani Yogatama and Donald Metzler},\nyear={2023},\nurl={https://openreview.net/forum?id=GGItImF9oG5}\n}", "github": "", "project": "", "reviewers": "7UyJ;4um9;1jM9;6T2s", "site": "https://openreview.net/forum?id=GGItImF9oG5", "pdf_size": 6091059, "recommendation": "3;5;5;6", "confidence": "3;3;5;3", "correctness": "3;2;4;3", "technical_novelty": "2;2;1;2", "empirical_novelty": "2;3;0;3", "wc_summary_paper": "65;100;13;48", "wc_strength_and_weaknesses": "92;276;297;184", "wc_clarity_quality_novelty_and_reproducibility": "59;137;61;58", "wc_summary_review": "19;17;51;31", "wc_review": "235;530;422;321", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 56.5, 31.34086788842964 ], "wc_strength_and_weaknesses_avg": [ 212.25, 81.40139740815265 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 78.75, 33.647993996670884 ], "wc_summary_review_avg": [ 29.5, 13.518505834595775 ], "wc_review_avg": [ 377.0, 110.37889290983127 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": 0.13245323570650439, "corr_recommendation_correctness": 0.0, "gs_citation": 96, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6561537948523517592&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1;0;2;0;0", "aff_unique_norm": "Google;Apple;Meta", "aff_unique_dep": "Google;Apple Inc.;Meta Platforms, Inc.", "aff_unique_url": "https://www.google.com;https://www.apple.com;https://meta.com", "aff_unique_abbr": "Google;Apple;Meta", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "GGx4EVTZU5r", "title": "Exploit Unlabeled Data on the Server! Federated Learning via Uncertainty-aware Ensemble Distillation and Self-Supervision", "track": "main", "status": "Reject", "tldr": "A federated learning algorithm that tackles data deficiency by exploiting unlabeled data at server.", "abstract": "Federated Learning (FL) is a distributed machine learning paradigm that involves the cooperation of multiple clients to train a server model. In practice, it is hard to assume that each client possesses large-scale data or many clients are always available to participate in FL for the same round, which may lead to data deficiency. This deficiency degrades the entire learning process. To resolve this challenge, we propose a Federated learning with entropy-weighted ensemble Distillation and Self-supervised learning (FedDS). FedDS reliably deals with situations where not only the amount of data per client but also the number of clients is scarce. This advantage is achieved by leveraging the prevalent unlabeled data in the server. We demonstrate the effectiveness of FedDS on classification tasks for CIFAR-10/100 and PathMNIST. In CIFAR-10, our method shows the improvement over FedAVG by 12.54% in data deficient regime, and by 17.16% and 23.56% in more challenging scenarios of noisy label or Byzantine client cases, respectively.", "keywords": "Federated Learning;Knowledge Distillation;Ensemble Distillation;Self-supervised Learning;Uncertainty", "primary_area": "", "supplementary_material": "", "author": "Jae-Min Park;Won-Jun Jang;Tae-Hyun Oh;Si-Hyeon Lee", "authorids": "~Jae-Min_Park1;~Won-Jun_Jang2;~Tae-Hyun_Oh3;~Si-Hyeon_Lee1", "gender": ";M;F;M", "homepage": "https://sites.google.com/view/kaist-infolab;https://ami.kaist.ac.kr;https://sites.google.com/view/kaist-infolab/team/professor?authuser=0;https://", "dblp": ";119/1450;;", "google_scholar": ";dMCBjeIAAAAJ;;", "orcid": ";0000-0003-0468-1571;;", "linkedin": ";tae-hyun-oh-at-mit/;;", "or_profile": "~Won-Jun_Jang2;~Tae-Hyun_Oh3;~Si-Hyeon_Lee1;~Jaemin_Park1", "aff": "KAIST, Korea Advanced Institute of Science & Technology;POSTECH;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "ee.kaist.ac.kr;postech.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "MS student;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@misc{\npark2023exploit,\ntitle={Exploit Unlabeled Data on the Server! Federated Learning via Uncertainty-aware Ensemble Distillation and Self-Supervision},\nauthor={Jae-Min Park and Won-Jun Jang and Tae-Hyun Oh and Si-Hyeon Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=GGx4EVTZU5r}\n}", "github": "", "project": "", "reviewers": "CynN;qQ7S;fQQY", "site": "https://openreview.net/forum?id=GGx4EVTZU5r", "pdf_size": 812858, "recommendation": "3;5;6", "confidence": "4;3;3", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;3", "wc_summary_paper": "86;99;78", "wc_strength_and_weaknesses": "267;245;137", "wc_clarity_quality_novelty_and_reproducibility": "28;32;22", "wc_summary_review": "35;18;40", "wc_review": "416;394;277", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1180;1015;540", "reply_reviewers": "0;0;0", "reply_authors": "2;2;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 87.66666666666667, 8.65383665716478 ], "wc_strength_and_weaknesses_avg": [ 216.33333333333334, 56.81157941437252 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.333333333333332, 4.109609335312651 ], "wc_summary_review_avg": [ 31.0, 9.41629792788369 ], "wc_review_avg": [ 362.3333333333333, 61.00455356410336 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 911.6666666666666, 271.3034381565327 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.9449111825230683, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:A0BXQ8IKXqsJ:scholar.google.com/&scioq=Exploit+Unlabeled+Data+on+the+Server!+Federated+Learning+via+Uncertainty-aware+Ensemble+Distillation+and+Self-Supervision&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Pohang University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.postech.ac.kr", "aff_unique_abbr": "KAIST;POSTECH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pohang", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "GHOMWtsFhj", "title": "Object Detection with OOD Generalizable Neural Architecture Search", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We present a Neural Architecture Search (NAS) framework guided by feature orthogonalization to improve Out-of-Distribution (OOD) Generalization on Object Detection. Specifically, we attribute the failure of generalizing on OOD data to the spurious correlations of category-related features and context-related features. The category-related features describe the causal information for predicting the target objects, e.g., \"a car with four wheels\", while the context-related features describe the non-causal information, e.g., \"a car driving at night\", and the context-related features are always mistaken for causal information due to the existence of distinct data distribution between training and testing sets (OOD) to some degree. Therefore, we aim at automatically discovering an optimal architecture that is able to disentangle the category-related features and the context-related features with a novel weight-based detector head. Both theoretical and experimental results show that the proposed scheme is able to achieve the disentanglement and better performance on both Independent-Identically-Distribution datasets (Pascal VOC 2012 and MS COCO) and OOD datasets (BDD100K-weather and BDD100K-time-of-day).", "keywords": "Out-of-Distribution;neural architecture search", "primary_area": "", "supplementary_material": "/attachment/c45857eea95d1d18c36e0648b9099f5273b01768.zip", "author": "Fan Wu;Nanyang Ye;Lanqing HONG;Zhenguo Li;Chensheng Peng", "authorids": "~Fan_Wu14;~Nanyang_Ye1;~Lanqing_HONG1;~Zhenguo_Li1;~Chensheng_Peng1", "gender": ";;F;M;", "homepage": ";;https://racheltechie.github.io/;http://www.ee.columbia.edu/~zgli/;", "dblp": ";175/2581;226/4258;23/6479;", "google_scholar": ";;https://scholar.google.com.sg/citations?user=2p7x6OUAAAAJ;XboZC1AAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Fan_Wu14;~Nanyang_Ye1;~Lanqing_HONG1;~Zhenguo_Li1;~Chensheng_Peng1", "aff": ";Shanghai Jiaotong University;Huawei Technologies Ltd.;Huawei Noah's Ark Lab;", "aff_domain": ";sjtu.edu.cn;huawei.com;huawei.com;", "position": ";Assistant Professor;Researcher;Principal Researcher;", "bibtex": "@misc{\nwu2023object,\ntitle={Object Detection with {OOD} Generalizable Neural Architecture Search},\nauthor={Fan Wu and Nanyang Ye and Lanqing HONG and Zhenguo Li and Chensheng Peng},\nyear={2023},\nurl={https://openreview.net/forum?id=GHOMWtsFhj}\n}", "github": "", "project": "", "reviewers": "Qqde;w1CP;GmLe", "site": "https://openreview.net/forum?id=GHOMWtsFhj", "pdf_size": 24817972, "recommendation": "3;5;5", "confidence": "4;4;3", "correctness": "2;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "3;3;2", "wc_summary_paper": "107;112;68", "wc_strength_and_weaknesses": "333;372;244", "wc_clarity_quality_novelty_and_reproducibility": "342;35;35", "wc_summary_review": "192;58;26", "wc_review": "974;577;373", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 95.66666666666667, 19.669491322575904 ], "wc_strength_and_weaknesses_avg": [ 316.3333333333333, 53.568232708906464 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 137.33333333333334, 144.72118788284672 ], "wc_summary_review_avg": [ 92.0, 71.90734779330042 ], "wc_review_avg": [ 641.3333333333334, 249.5386854889549 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:EHX9ENuCeiMJ:scholar.google.com/&scioq=Object+Detection+with+OOD+Generalizable+Neural+Architecture+Search&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;1;1", "aff_unique_norm": "Shanghai Jiao Tong University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.huawei.com", "aff_unique_abbr": "SJTU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "GIZg_kOXqyG", "title": "Private and Efficient Meta-Learning with Low Rank and Sparse decomposition", "track": "main", "status": "Reject", "tldr": "Provable meta-learning via privacy preserving and optimal low-rank+sparse decomposition", "abstract": "Meta-learning is critical for a variety of practical ML systems -- like personalized recommendations systems -- that are required to generalize to new tasks despite a small number of task-specific training points. Existing meta-learning techniques use two complementary approaches of either learning a low-dimensional representation of points for all tasks, or task-specific fine-tuning of a global model trained using all the tasks. In this work, we propose a novel meta-learning framework that combines both the techniques to enable handling of a large number of data-starved tasks. Our framework models network weights as a sum of low-rank and sparse matrices. This allows us to capture information from multiple domains together in the low-rank part while still allowing task specific personalization using the sparse part. We instantiate and study the framework in the linear setting, where the problem reduces to that of estimating the sum of a rank-$r$ and a $k$-column sparse matrix using a small number of linear measurements. We propose an alternating minimization method with hard thresholding -- AMHT-LRS -- to learn the low-rank and sparse part effectively and efficiently. For the realizable, Gaussian data setting, we show that AMHT-LRS indeed solves the problem efficiently with nearly optimal samples. We extend AMHT-LRS to ensure that it preserves privacy of each individual user in the dataset, while still ensuring strong generalization with nearly optimal number of samples. Finally, on multiple datasets, we demonstrate that the framework allows personalized models to obtain superior performance in the data-scarce regime.", "keywords": "Meta-learning;Low-rank;Sparse;Privacy", "primary_area": "", "supplementary_material": "/attachment/24048cffaf9cad6e2238ad00363ccc1401e2c2f7.zip", "author": "Soumyabrata Pal;Prateek Varshney;Gagan Madan;Abhradeep Guha Thakurta;Gaurav Aggarwal;Pradeep Shenoy;Gaurav Srivastava;Prateek Jain", "authorids": "~Soumyabrata_Pal1;~Prateek_Varshney1;~Gagan_Madan1;~Abhradeep_Guha_Thakurta1;~Gaurav_Aggarwal4;~Pradeep_Shenoy1;gasrivastava@google.com;~Prateek_Jain1", "gender": "M;M;M;M;;M;;M", "homepage": "https://soumyabratap.github.io/;https://pvarshney1729.github.io/;;https://athakurta.squarespace.com/;;;;http://prateekjain.org", "dblp": "206/6371;;177/8934.html;31/8315;14/5218;12/771;;https://dblp.uni-trier.de/pers/j/Jain_0002:Prateek.html", "google_scholar": "J4UxoTEAAAAJ;GUKPKh0AAAAJ;_61mGn8AAAAJ;1rV69hMAAAAJ;https://scholar.google.co.in/citations?user=9XiIwDQAAAAJ;lXbPKmkAAAAJ;;qYhRbJoAAAAJ", "orcid": ";;;;;;;", "linkedin": ";pvarshney1729/;;;;;;", "or_profile": "~Soumyabrata_Pal1;~Prateek_Varshney1;~Gagan_Madan1;~Abhradeep_Guha_Thakurta1;~Gaurav_Aggarwal4;~Pradeep_Shenoy1;gasrivastava@google.com;~Prateek_Jain1", "aff": "Google;Google;Google;Google;Google;Google;;Google", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;;google.com", "position": "Postdoc;Research Associate;Researcher;Senior Research Scientist;Researcher;Researcher;;Researcher", "bibtex": "@misc{\npal2023private,\ntitle={Private and Efficient Meta-Learning with Low Rank and Sparse decomposition},\nauthor={Soumyabrata Pal and Prateek Varshney and Gagan Madan and Abhradeep Guha Thakurta and Gaurav Aggarwal and Pradeep Shenoy and Gaurav Srivastava and Prateek Jain},\nyear={2023},\nurl={https://openreview.net/forum?id=GIZg_kOXqyG}\n}", "github": "", "project": "", "reviewers": "wf3D;6iVb;aEd6", "site": "https://openreview.net/forum?id=GIZg_kOXqyG", "pdf_size": 425467, "recommendation": "5;5;6", "confidence": "3;3;2", "correctness": "3;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;2;2", "wc_summary_paper": "38;43;223", "wc_strength_and_weaknesses": "59;35;236", "wc_clarity_quality_novelty_and_reproducibility": "70;159;122", "wc_summary_review": "41;30;64", "wc_review": "208;267;645", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "504;341;717", "reply_reviewers": "0;0;0", "reply_authors": "2;2;1", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 101.33333333333333, 86.05553762283724 ], "wc_strength_and_weaknesses_avg": [ 110.0, 89.63258336118624 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 117.0, 36.5057073163453 ], "wc_summary_review_avg": [ 45.0, 14.165686240583852 ], "wc_review_avg": [ 373.3333333333333, 193.60153810224637 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 520.6666666666666, 153.9530953100832 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.9999999999999997, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dR3H1fjQ1gkJ:scholar.google.com/&scioq=Private+and+Efficient+Meta-Learning+with+Low+Rank+and+Sparse+decomposition&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "GK5m7a3Uy4", "title": "A distinct unsupervised reference model from the environment helps continual learning", "track": "main", "status": "Reject", "tldr": "In this paper, we introduced open-set semi-supervised continual learning as a realistic, practical scenario and proposed a novel dual-structured method to perform in this scenario.", "abstract": "The existing continual learning methods are mainly focused on fully-supervised scenarios and are still not able to take advantage of unlabeled data available in the environment. Some recent works tried to investigate semi-supervised continual learning (SSCL) settings in which the unlabeled data are available, but it is only from the same distribution as the labeled data. This assumption is still not general enough for real-world applications and restricts the utilization of unsupervised data. In this work, we introduce Open-Set Semi-Supervised Continual Learning (OSSCL), a more realistic semi-supervised continual learning setting in which out-of-distribution (OoD) unlabeled samples in the environment are assumed to coexist with the in-distribution ones. Under this configuration, we present a model with two distinct parts: (i) the reference network captures general-purpose and task-agnostic knowledge in the environment by using a broad spectrum of unlabeled samples, (ii) the learner network is designed to learn task-specific representations by exploiting supervised samples. The reference model both provides a pivotal representation space and also segregates unlabeled data to exploit them more efficiently. By performing a diverse range of experiments, we show the superior performance of our model compared with other competitors and prove the effectiveness of each component of the proposed model.", "keywords": "continual Learning;open-set semi-supervised continual learning;knowledge distillation", "primary_area": "", "supplementary_material": "/attachment/7e5287811656f4a80f179eb725464c5c0e3a4227.zip", "author": "Seyyed AmirHossein Ameli Kalkhoran;Mohammadamin Banayeeanzade;Mahdi Samiei;Mahdieh Soleymani Baghshah", "authorids": "~Seyyed_AmirHossein_Ameli_Kalkhoran1;~Mohammadamin_Banayeeanzade1;~Mahdi_Samiei1;~Mahdieh_Soleymani_Baghshah1", "gender": "M;M;F;M", "homepage": ";;http://sharif.edu/~soleymani/;http://ce.sharif.edu/~banayeean/", "dblp": ";;21/473;", "google_scholar": ";;S1U0KlgAAAAJ;", "orcid": ";;;", "linkedin": "amirhosseinameli/;mmsamiei/;;", "or_profile": "~Seyyed_AmirHossein_Ameli_Kalkhoran1;~Mahdi_Samiei1;~Mahdieh_Baghshah1;~Mohammad_Amin_Banayeean_Zade1", "aff": "TomTom;Sharif University of Technology, Sharif University of Technology;Sharif University of Technology;Department of Computer Science, Viterbi School of Engineering", "aff_domain": "tomtom.com;ce.sharif.edu;sharif.edu;cs.usc.edu", "position": "Researcher;PhD student;Associate Professor;PhD student", "bibtex": "@misc{\nkalkhoran2023a,\ntitle={A distinct unsupervised reference model from the environment helps continual learning},\nauthor={Seyyed AmirHossein Ameli Kalkhoran and Mohammadamin Banayeeanzade and Mahdi Samiei and Mahdieh Soleymani Baghshah},\nyear={2023},\nurl={https://openreview.net/forum?id=GK5m7a3Uy4}\n}", "github": "", "project": "", "reviewers": "HjYX;GwGC;aMML;G7Pp;4qUo", "site": "https://openreview.net/forum?id=GK5m7a3Uy4", "pdf_size": 4030350, "recommendation": "3;5;5;5;5", "confidence": "4;4;4;3;4", "correctness": "3;3;3;3;4", "technical_novelty": "2;2;3;3;2", "empirical_novelty": "2;3;2;1;3", "wc_summary_paper": "182;81;92;101;106", "wc_strength_and_weaknesses": "532;365;149;198;172", "wc_clarity_quality_novelty_and_reproducibility": "78;50;22;23;42", "wc_summary_review": "73;425;23;39;70", "wc_review": "865;921;286;361;390", "wc_reply_reviewers": "24;0;0;0;16", "wc_reply_authors": "1022;646;296;334;372", "reply_reviewers": "1;0;0;0;1", "reply_authors": "2;2;2;2;2", "recommendation_avg": [ 4.6, 0.7999999999999999 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_paper_avg": [ 112.4, 35.82513084414347 ], "wc_strength_and_weaknesses_avg": [ 283.2, 145.75788143356093 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.0, 20.571825392998065 ], "wc_summary_review_avg": [ 126.0, 150.68112025068038 ], "wc_review_avg": [ 564.6, 270.8568625676669 ], "wc_reply_reviewers_avg": [ 8.0, 10.119288512538814 ], "wc_reply_authors_avg": [ 534.0, 273.3408129057935 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.2500000000000001, "corr_recommendation_correctness": 0.2500000000000001, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7vlqofNEIsAJ:scholar.google.com/&scioq=A+distinct+unsupervised+reference+model+from+the+environment+helps+continual+learning&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "TomTom;Sharif University of Technology;Viterbi School of Engineering", "aff_unique_dep": ";;Department of Computer Science", "aff_unique_url": "https://www.tomtom.com;https://www.sharif.edu;https://viterbi.usc.edu", "aff_unique_abbr": "TomTom;SUT;Viterbi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "Netherlands;Iran;United States" }, { "id": "GKB566-8WkZ", "title": "Explainability as statistical inference", "track": "main", "status": "Reject", "tldr": "We propose to embed any classification or regression model in a framework that casts interpretability as a maximum likelihood problem.", "abstract": "A wide variety of model explanation approaches have been proposed in recent years, all guided by very different rationales and heuristics. In this paper, we take a new route and cast interpretability as a statistical inference problem. We propose a general deep probabilistic model designed to produce interpretable predictions. The model\u2019s parameters can be learned via maximum likelihood, and the method can be adapted to any predictor network architecture, and any type of prediction problem. Our method is a case of amortized interpretability models, where a neural network is used as a selector to allow for fast interpretation at inference time. Several popular interpretability methods are shown to be particular cases of regularised maximum likelihood for our general model. We propose new datasets with ground truth selection which allow for the evaluation of the features importance map. Using these datasets, we show experimentally that using multiple imputation provides more reasonable interpretation.", "keywords": "Interpretability;Explainability;Statistical Learning;Imputation", "primary_area": "", "supplementary_material": "/attachment/680949571c7d76172ed147f463ec5a1cb93cf7ee.zip", "author": "Hugo Henri Joseph Senetaire;Damien Garreau;Jes Frellsen;Pierre-Alexandre Mattei", "authorids": "~Hugo_Henri_Joseph_Senetaire1;~Damien_Garreau1;~Jes_Frellsen1;~Pierre-Alexandre_Mattei3", "gender": "M;M;M;M", "homepage": "https://orbit.dtu.dk/en/persons/hugo-henri-joseph-s%C3%A9n%C3%A9taire;https://sites.google.com/view/damien-garreau/home;https://frellsen.org;http://pamattei.github.io", "dblp": ";151/6584;83/8247;177/7275", "google_scholar": "SY5DUXkAAAAJ;https://scholar.google.fr/citations?user=qn4N61QAAAAJ;Yj2sBWkAAAAJ;https://scholar.google.fr/citations?user=Tqa_-D0AAAAJ", "orcid": ";0000-0002-7855-2847;0000-0001-9224-1271;", "linkedin": "https://fr.linkedin.com/in/hugo-senetaire-5771a1142;damien-garreau-05817858/;frellsen/;", "or_profile": "~Hugo_Henri_Joseph_Senetaire1;~Damien_Garreau1;~Jes_Frellsen1;~Pierre-Alexandre_Mattei3", "aff": "Technical University of Denmark;Universit\u00e9 C\u00f4te d'Azur;Technical University of Denmark;INRIA", "aff_domain": "dtu.dk;unice.fr;dtu.dk;inria.fr", "position": "PhD student;Associate Professor;Associate Professor;Research scientist", "bibtex": "@misc{\nsenetaire2023explainability,\ntitle={Explainability as statistical inference},\nauthor={Hugo Henri Joseph Senetaire and Damien Garreau and Jes Frellsen and Pierre-Alexandre Mattei},\nyear={2023},\nurl={https://openreview.net/forum?id=GKB566-8WkZ}\n}", "github": "", "project": "", "reviewers": "sxVY;DxL9;Hv7W", "site": "https://openreview.net/forum?id=GKB566-8WkZ", "pdf_size": 1986286, "recommendation": "5;6;6", "confidence": "3;3;2", "correctness": "3;4;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;2", "wc_summary_paper": "92;48;67", "wc_strength_and_weaknesses": "859;210;121", "wc_clarity_quality_novelty_and_reproducibility": "67;80;36", "wc_summary_review": "135;16;49", "wc_review": "1153;354;273", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1917;578;755", "reply_reviewers": "0;0;0", "reply_authors": "3;1;1", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 69.0, 18.01850900231944 ], "wc_strength_and_weaknesses_avg": [ 396.6666666666667, 328.9319416265654 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.0, 18.457157599876172 ], "wc_summary_review_avg": [ 66.66666666666667, 50.16195991209098 ], "wc_review_avg": [ 593.3333333333334, 397.12326661741804 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1083.3333333333333, 593.9036584796861 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7605605981586840043&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Technical University of Denmark;Universit\u00e9 C\u00f4te d'Azur;INRIA", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tek.dk;https://www.univ-cotedazur.fr;https://www.inria.fr", "aff_unique_abbr": "DTU;UCA;INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Denmark;France" }, { "id": "GKpwIa9wgwR", "title": "Efficient Data Subset Selection to Generalize Training Across Models: Transductive and Inductive Networks", "track": "main", "status": "Reject", "tldr": "Trainable non-adaptive data subset selection that generalizes across different model training", "abstract": "Subset selection, in recent times, has emerged as a successful approach toward efficient training of models by significantly reducing the amount of data and computational resources required. However, existing methods employ discrete combinatorial and model-specific approaches which lack generalizability--- for each new model, the algorithm has to be executed from the beginning. Therefore, for data subset selection for an unseen architecture, one cannot use the subset chosen for a different model. In this work, we propose SubSelNet, a non-adaptive subset selection framework, which tackles these problems with two main components. First, we introduce an attention-based neural gadget that leverages the graph structure of architectures and acts as a surrogate to trained deep neural networks for quick model prediction. Then, we use these predictions to build subset samplers. This leads us to develop two variants of SubSelNet. The first variant is transductive (called as Transductive-SubSelNet) which computes the subset separately for each model by solving a small optimization problem. Such an optimization is still super fast, thanks to the replacement of explicit model training by the model approximator. The second variant is inductive (called as Inductive-SubSelNet) which computes the subset using a trained subset selector, without any optimization. Most state-of-the-art data subset selection approaches are adaptive, in that the subset selection adapts as the training progresses, and as a result, they require access to the entire data at training time. Our approach, in contrast, is non-adaptive and does the subset selection only once in the beginning, thereby achieving resource and memory efficiency along with compute-efficiency at training time. Our experiments show that both transductive and inductive variants of our models outperform several methods on the quality of the subset chosen and further demonstrate that our method can be used for choosing the best architecture from a set of architectures.\n", "keywords": "Data Subset Selection;Efficient Learning", "primary_area": "", "supplementary_material": "/attachment/05281d64c27befefbed03c4b2cab3d52a6079364.zip", "author": "Eeshaan Jain;Tushar Nandy;Gaurav Aggarwal;Ashish V. Tendulkar;Rishabh K Iyer;Abir De", "authorids": "~Eeshaan_Jain1;~Tushar_Nandy1;~Gaurav_Aggarwal4;~Ashish_V._Tendulkar1;~Rishabh_K_Iyer2;~Abir_De1", "gender": "M;M;;;M;M", "homepage": "https://eeshaanjain.github.io;;;;https://www.rishiyer.com;", "dblp": ";;14/5218;08/1521;37/10544.html;118/7174", "google_scholar": "r5rqqJEAAAAJ;;https://scholar.google.co.in/citations?user=9XiIwDQAAAAJ;;l_XxJ1kAAAAJ;https://scholar.google.co.in/citations?user=_9ZKKbIAAAAJ", "orcid": ";;;;;", "linkedin": "eeshaanjain/;tushar-nandy/;;;rishabh-iyer-36893717/;", "or_profile": "~Eeshaan_Jain1;~Tushar_Nandy1;~Gaurav_Aggarwal4;~Ashish_V._Tendulkar1;~Rishabh_K_Iyer2;~Abir_De1", "aff": "Indian Institute of Technology, Bombay;Indian Institute of Technology, Bombay;Google;Google;Microsoft;Indian Institute of Technology Bombay,", "aff_domain": "iitb.ac.in;iitb.ac.in;google.com;google.com;microsoft.com;iitb.ac.in", "position": "Undergrad student;Undergrad student;Researcher;Researcher;Research Scientist;Assistant Professor", "bibtex": "@misc{\njain2023efficient,\ntitle={Efficient Data Subset Selection to Generalize Training Across Models: Transductive and Inductive Networks},\nauthor={Eeshaan Jain and Tushar Nandy and Gaurav Aggarwal and Ashish V. Tendulkar and Rishabh K Iyer and Abir De},\nyear={2023},\nurl={https://openreview.net/forum?id=GKpwIa9wgwR}\n}", "github": "", "project": "", "reviewers": "B3Rj;a8HM;VJSk", "site": "https://openreview.net/forum?id=GKpwIa9wgwR", "pdf_size": 1498817, "recommendation": "5;6;6", "confidence": "4;4;4", "correctness": "2;2;3", "technical_novelty": "3;2;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "114;69;70", "wc_strength_and_weaknesses": "327;171;189", "wc_clarity_quality_novelty_and_reproducibility": "71;62;41", "wc_summary_review": "106;51;43", "wc_review": "618;353;343", "wc_reply_reviewers": "0;29;0", "wc_reply_authors": "1273;1423;1010", "reply_reviewers": "0;1;0", "reply_authors": "2;2;2", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 84.33333333333333, 20.98147330914162 ], "wc_strength_and_weaknesses_avg": [ 229.0, 69.68500556073738 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 58.0, 12.569805089976535 ], "wc_summary_review_avg": [ 66.66666666666667, 28.003967972810962 ], "wc_review_avg": [ 438.0, 127.34467663262043 ], "wc_reply_reviewers_avg": [ 9.666666666666666, 13.67073110293992 ], "wc_reply_authors_avg": [ 1235.3333333333333, 170.69726288243626 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=304134140646087627&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1;1;2;0", "aff_unique_norm": "Indian Institute of Technology Bombay;Google;Microsoft", "aff_unique_dep": ";Google;Microsoft Corporation", "aff_unique_url": "https://www.iitb.ac.in;https://www.google.com;https://www.microsoft.com", "aff_unique_abbr": "IIT Bombay;Google;Microsoft", "aff_campus_unique_index": "0;0;1;1;0", "aff_campus_unique": "Bombay;Mountain View;", "aff_country_unique_index": "0;0;1;1;1;0", "aff_country_unique": "India;United States" }, { "id": "GKsNIC_mQRG", "title": "Emergence of Exploration in Policy Gradient Reinforcement Learning via Resetting", "track": "main", "status": "Reject", "tldr": "", "abstract": "In reinforcement learning (RL), many exploration methods explicitly promote stochastic policies, e.g., by adding an entropy bonus. We argue that exploration only matters in RL because the agent repeatedly encounters the same or similar states, so that it is beneficial to gradually improve the performance over the encounters; otherwise, the greedy policy would be optimal. Based on this intuition, we propose ReMax, an objective for RL whereby stochastic exploration arises as an emergent property, without adding any explicit exploration bonus. In ReMax, an episode is modified so that the agent can reset to previous states in the trajectory, and the agent\u2019s goal is to maximize the best return in the trajectory tree. We show that this ReMax objective can be directly optimized with an unbiased policy gradient method. Experiments confirm that ReMax leads to the emergence of a stochastic exploration policy, and improves the performance compared to RL with no exploration bonus.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/089fe2d322fd660dd4864e444c5fadac9ae03b97.zip", "author": "Sotetsu Koyamada;Paavo Parmas;Tadashi Kozuno;Shin Ishii", "authorids": "~Sotetsu_Koyamada1;~Paavo_Parmas1;~Tadashi_Kozuno1;~Shin_Ishii1", "gender": ";M;M;M", "homepage": ";;;", "dblp": ";222/9823;207/8504;27/3253.html", "google_scholar": ";https://scholar.google.co.jp/citations?user=IXbKCUYAAAAJ;4VJmx8QAAAAJ;https://scholar.google.co.jp/citations?hl=en", "orcid": ";;;", "linkedin": ";paavo-parmas-882591123/?originalSubdomain=jp;;", "or_profile": "~Sotetsu_Koyamada1;~Paavo_Parmas1;~Tadashi_Kozuno1;~Shin_Ishii1", "aff": ";Kyoto University;OMRON SINIC X;Kyoto University", "aff_domain": ";kyoto-u.ac.jp;sinicx.com;kyoto-u.ac.jp", "position": ";Program Specific Assistant Professor;Researcher;Full Professor", "bibtex": "@misc{\nkoyamada2023emergence,\ntitle={Emergence of Exploration in Policy Gradient Reinforcement Learning via Resetting},\nauthor={Sotetsu Koyamada and Paavo Parmas and Tadashi Kozuno and Shin Ishii},\nyear={2023},\nurl={https://openreview.net/forum?id=GKsNIC_mQRG}\n}", "github": "", "project": "", "reviewers": "AvuR;Z1SD;6LaJ;KQHn", "site": "https://openreview.net/forum?id=GKsNIC_mQRG", "pdf_size": 1656351, "recommendation": "1;1;3;3", "confidence": "4;4;4;4", "correctness": "1;2;2;2", "technical_novelty": "1;2;3;3", "empirical_novelty": "1;1;2;0", "wc_summary_paper": "22;93;105;49", "wc_strength_and_weaknesses": "419;372;320;271", "wc_clarity_quality_novelty_and_reproducibility": "2;19;86;21", "wc_summary_review": "13;74;58;47", "wc_review": "456;558;569;388", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "962;642;447;295", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 2.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 1.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 1.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 67.25, 33.42435489280234 ], "wc_strength_and_weaknesses_avg": [ 345.5, 55.46395225729952 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.0, 32.03903868720159 ], "wc_summary_review_avg": [ 48.0, 22.371857321197094 ], "wc_review_avg": [ 492.75, 74.82437771207991 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 586.5, 249.2553911152174 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:OB2snGI5bpEJ:scholar.google.com/&scioq=Emergence+of+Exploration+in+Policy+Gradient+Reinforcement+Learning+via+Resetting&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Kyoto University;OMRON Corporation", "aff_unique_dep": ";SINIC X", "aff_unique_url": "https://www.kyoto-u.ac.jp;https://www.omron.com", "aff_unique_abbr": "Kyoto U;OMRON", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "id": "GLOtO2QbNp", "title": "Uncertainty Calibration via Knowledge Flow under Long-tailed Distribution", "track": "main", "status": "Withdraw", "tldr": "We propose a novel method to realize the calibration under long-tailed distribution", "abstract": "How to estimate the uncertainty of a given model is a crucial problem. Current calibration techniques treat different classes equally and thus implicitly assume that the distribution of training data is balanced, but ignore the fact that real-world data often follows a long-tailed distribution. In this paper, we explore the problem of calibrating the model trained from a long-tailed distribution. Due to the difference between the imbalanced training distribution and balanced test distribution, existing calibration methods such as temperature scaling can not generalize well to this problem. Specific calibration methods for domain adaptation are also not applicable because they rely on unlabeled target domain instances which are not available. Models trained from a long-tailed distribution tend to be more overconfident to head classes. To this end, we propose a novel knowledge flow based calibration method by estimating the importance weight for samples of tail classes to realize long-tailed calibration. Our method models the distribution of each class as a Gaussian distribution and view the source statistics of head classes as a prior to calibrate the target distributions of tail classes. We transfer knowledge from head classes to get the target probability density of tail classes. The importance weight is estimated by the ratio of the target probability density over the source probability density. Extensive experiments on CIFAR-10-LT, MNIST-LT, CIFAR-100-LT, and ImageNet-LT datasets demonstrate the effectiveness of our method.", "keywords": "Long-tailed;Calibration", "primary_area": "", "supplementary_material": "", "author": "Jiahao Chen;Bing Su", "authorids": "~Jiahao_Chen4;~Bing_Su1", "gender": "M;M", "homepage": "https://jiahaochen1.github.io/;https://gsai.ruc.edu.cn/bingsu", "dblp": ";41/5270-1", "google_scholar": "https://scholar.google.com.hk/citations?user=Af4IREwAAAAJ;https://scholar.google.com.sg/citations?user=d3g2VJQAAAAJ", "orcid": ";0000-0001-8560-1910", "linkedin": ";", "or_profile": "~Jiahao_Chen4;~Bing_Su1", "aff": "Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nchen2023uncertainty,\ntitle={Uncertainty Calibration via Knowledge Flow under Long-tailed Distribution},\nauthor={Jiahao Chen and Bing Su},\nyear={2023},\nurl={https://openreview.net/forum?id=GLOtO2QbNp}\n}", "github": "", "project": "", "reviewers": "6Fe9;guJW;AVLp;n8JR", "site": "https://openreview.net/forum?id=GLOtO2QbNp", "pdf_size": 2738050, "recommendation": "3;5;5;5", "confidence": "2;4;3;4", "correctness": "4;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "3;2;3;2", "wc_summary_paper": "60;120;94;84", "wc_strength_and_weaknesses": "106;150;349;363", "wc_clarity_quality_novelty_and_reproducibility": "34;5;45;48", "wc_summary_review": "15;40;42;38", "wc_review": "215;315;530;533", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 89.5, 21.511624764298954 ], "wc_strength_and_weaknesses_avg": [ 242.0, 115.16292806281021 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.0, 16.98528775146303 ], "wc_summary_review_avg": [ 33.75, 10.917302780449024 ], "wc_review_avg": [ 398.25, 137.86474349883656 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.8703882797784891, "corr_recommendation_correctness": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2o6o8xFG1vsJ:scholar.google.com/&scioq=Uncertainty+Calibration+via+Knowledge+Flow+under+Long-tailed+Distribution&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Renmin University of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ruc.edu.cn", "aff_unique_abbr": "RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "ISS: Image as Stepping Stone for Text-Guided 3D Shape Generation", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12167", "id": "GMRodZ8OlVr", "poster": "/media/PosterPDFs/ICLR%202023/12167.png?t=1681092459.4237652", "openreview": "https://openreview.net/forum?id=GMRodZ8OlVr", "slides": "https://iclr.cc/virtual/2023/poster/12167", "video": "https://iclr.cc/virtual/2023/poster/12167", "author_site": "Zhengzhe Liu, Peng Dai, Ruihui Li, XIAOJUAN QI, Chi-Wing Fu", "tldr": "An efficient text-guided 3D shape generation framework without needing paired text and shape. ", "abstract": "Text-guided 3D shape generation remains challenging due to the absence of large paired text-shape dataset, the substantial semantic gap between these two modalities, and the structural complexity of 3D shapes. This paper presents a new framework called Image as Stepping Stone (ISS) for the task by introducing 2D image as a stepping stone to connect the two modalities and to eliminate the need for paired text-shape data. Our key contribution is a two-stage feature-space-alignment approach that maps CLIP features to shapes by harnessing a pre-trained single-view reconstruction (SVR) model with multi-view supervisions: first map the CLIP image feature to the detail-rich shape space in the SVR model, then map the CLIP text feature to the shape space and optimize the mapping by encouraging CLIP consistency between the input text and the rendered images. Further, we formulate a textguided shape stylization module to dress up the output shapes with novel structures and textures. Beyond existing works on 3D shape generation from text, our new approach is general for creating shapes in a broad range of categories, without requiring paired text-shape data. Experimental results manifest that our approach outperforms the state-of-the-arts and our baselines in terms of fidelity and consistency with text. Further, our approach can stylize the generated shapes with both realistic and fantasy structures and textures. Codes are available at https://github.com/liuzhengzhe/ISS-Image-as-Stepping-Stone-for-Text-Guided-3D-Shape-Generation.", "keywords": "Text;3D shape;CLIP;differentiable rendering", "primary_area": "", "supplementary_material": "/attachment/cd376a997dab9aeecf90bb23091d4829c0f003a8.zip", "author": "Zhengzhe Liu;Peng Dai;Ruihui Li;XIAOJUAN QI;Chi-Wing Fu", "authorids": "~Zhengzhe_Liu1;~Peng_Dai3;~Ruihui_Li1;~XIAOJUAN_QI2;~Chi-Wing_Fu2", "gender": "M;M;M;F;", "homepage": "https://liuzhengzhe.github.io/;https://daipengwa.github.io/;https://liruihui.github.io/;https://xjqi.github.io/;", "dblp": "160/0247;08/3547-3;204/0720;176/1445-1.html;", "google_scholar": "HBpZeWsAAAAJ;2fGIJBsAAAAJ;https://scholar.google.com.hk/citations?user=kFNXpOsAAAAJ;bGn0uacAAAAJ;", "orcid": ";;0000-0002-4266-6420;;", "linkedin": "zhengzhe-liu-767493b3/?originalSubdomain=hk;;;;", "or_profile": "~Zhengzhe_Liu1;~Peng_Dai3;~Ruihui_Li1;~XIAOJUAN_QI2;~Chi-Wing_Fu2", "aff": "The Chinese University of Hong Kong;University of Hong Kong;Hunan University;University of Hong Kong;", "aff_domain": "cuhk.edu.hk;eee.hku.hk;hnu.edu.cn;hku.hk;", "position": "PhD student;PhD student;Associate Professor;Assistant Professor;", "bibtex": "@inproceedings{\nliu2023iss,\ntitle={{ISS}: Image as Stepping Stone for Text-Guided 3D Shape Generation},\nauthor={Zhengzhe Liu and Peng Dai and Ruihui Li and XIAOJUAN QI and Chi-Wing Fu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=GMRodZ8OlVr}\n}", "github": "", "project": "", "reviewers": "YJ7a;CfJ1;Z3WY;HSBz", "pdf_size": 1814652, "recommendation": "6;6;6;6", "confidence": "3;3;5;5", "correctness": "2;4;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "113;95;64;59", "wc_strength_and_weaknesses": "323;119;143;145", "wc_clarity_quality_novelty_and_reproducibility": "34;60;40;226", "wc_summary_review": "33;27;8;41", "wc_review": "503;301;255;471", "wc_reply_reviewers": "0;0;20;0", "wc_reply_authors": "1106;286;1226;1803", "reply_reviewers": "0;0;1;0", "reply_authors": "2;1;3;3", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 1.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 82.75, 22.252808811473667 ], "wc_strength_and_weaknesses_avg": [ 182.5, 81.76032044946986 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 90.0, 79.10752176626443 ], "wc_summary_review_avg": [ 27.25, 12.173228823939851 ], "wc_review_avg": [ 382.5, 106.36141217565701 ], "wc_reply_reviewers_avg": [ 5.0, 8.660254037844387 ], "wc_reply_authors_avg": [ 1105.25, 541.4348414167673 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17696489929945497708&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=GMRodZ8OlVr", "email": "cuhk.edu.hk;eee.hku.hk;hnu.edu.cn;hku.hk;", "author_num": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Chinese University of Hong Kong;University of Hong Kong;Hunan University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.hku.hk;http://www.hunu.edu.cn/", "aff_unique_abbr": "CUHK;HKU;HNU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "GN6cm7uSjV", "title": "Masked Surfel Prediction for Self-Supervised Point Cloud Learning", "track": "main", "status": "Withdraw", "tldr": "Considering the local geometry information explicitly into the masked auto-encoding", "abstract": "Masked auto-encoding is a popular and effective self-supervised learning approach to point cloud learning. However, most of the existing methods reconstruct only the masked points and overlook the local geometry information, which is also important to understand the point cloud data. In this work, we make the first attempt, to the best of our knowledge, to consider the local geometry information explicitly into the masked auto-encoding, and propose a novel Masked Surfel Prediction (MaskSurf) method. Specifically, given the input point cloud masked at a high ratio, we learn a transformer-based encoder-decoder network to estimate the underlying masked surfels by simultaneously predicting the surfel positions (i.e., points) and per-surfel orientations (i.e., normals). The predictions of points and normals are supervised by the Chamfer Distance and a newly introduced Position-Indexed Normal Distance in a set-to-set manner. Our MaskSurf is validated on six downstream tasks under three fine-tuning strategies. In particular, MaskSurf outperforms its closest competitor, Point-MAE, by 1.2\\% on the real-world dataset of ScanObjectNN under the OBJ-BG setting, justifying the advantages of masked surfel prediction over masked point cloud reconstruction.", "keywords": "Self-supervised point cloud learning;surfel representation;masked auto-encoding", "primary_area": "", "supplementary_material": "/attachment/1364445a265ee5a79c92c7332ce9462c2bf2129e.zip", "author": "Yabin Zhang;Jiehong Lin;Chenhang HE;Yongwei Chen;Kui Jia;Lei Zhang", "authorids": "~Yabin_Zhang2;~Jiehong_Lin1;~Chenhang_HE1;~Yongwei_Chen2;~Kui_Jia1;~Lei_Zhang2", "gender": "M;M;M;;M;M", "homepage": "https://ybzh.github.io/;;https://github.com/skyhehe123;;http://kuijia.site/;http://www4.comp.polyu.edu.hk/~cslzhang/", "dblp": "70/6124-1;239/8762;272/1145;;60/3834;64/5666-6.html", "google_scholar": "p0GLwtoAAAAJ;eSkDBYcAAAAJ;dU6hpFUAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;Mf9VHRcAAAAJ;tAK5l1IAAAAJ", "orcid": ";;0000-0001-5069-3587;;;0000-0002-2078-4215", "linkedin": ";;;;;", "or_profile": "~Yabin_Zhang2;~Jiehong_Lin1;~Chenhang_HE1;~Yongwei_Chen2;~Kui_Jia1;~Lei_Zhang2", "aff": "The Hong Kong Polytechnic University;South China University of Technology;The Hong Kong Polytechnic University;;South China University of Technology;The Hong Kong Polytechnic University", "aff_domain": "polyu.edu.hk;scut.edu.cn;polyu.edu.hk;;scut.edu.cn;polyu.edu.hk", "position": "PhD student;PhD student;Assistant Professor;;Full Professor;Chair Professor", "bibtex": "@misc{\nzhang2023masked,\ntitle={Masked Surfel Prediction for Self-Supervised Point Cloud Learning},\nauthor={Yabin Zhang and Jiehong Lin and Chenhang HE and Yongwei Chen and Kui Jia and Lei Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=GN6cm7uSjV}\n}", "github": "", "project": "", "reviewers": "Habi;Zgrd;cXhc;2BiM", "site": "https://openreview.net/forum?id=GN6cm7uSjV", "pdf_size": 3713127, "recommendation": "3;3;3;5", "confidence": "4;5;4;4", "correctness": "3;3;3;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;0;2;2", "wc_summary_paper": "84;43;58;210", "wc_strength_and_weaknesses": "122;137;197;276", "wc_clarity_quality_novelty_and_reproducibility": "287;31;37;114", "wc_summary_review": "35;20;33;88", "wc_review": "528;231;325;688", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 98.75, 65.88389408649128 ], "wc_strength_and_weaknesses_avg": [ 183.0, 60.58465152165192 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 117.25, 103.32563815433225 ], "wc_summary_review_avg": [ 44.0, 26.04803255526221 ], "wc_review_avg": [ 443.0, 177.56548087958987 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 1.0, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13073904280920281707&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "Hong Kong Polytechnic University;South China University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.polyu.edu.hk;https://www.scut.edu.cn", "aff_unique_abbr": "PolyU;SCUT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "GNFimGDfEiV", "title": "Quadratic models for understanding neural network dynamics", "track": "main", "status": "Reject", "tldr": "Quadratic models capture properties of wide neural networks in both optimization and generalization. ", "abstract": "In this work, we show that recently proposed quadratic models capture optimization and generalization properties of wide neural networks that cannot be captured by linear models. In particular, we prove that quadratic models for shallow ReLU networks exhibit the \"catapult phase\" from Lewkowycz et al. (2020) that arises when training such models with large learning rates. We then empirically show that the behaviour of quadratic models parallels that of neural networks in generalization, especially in the catapult phase regime. Our analysis further demonstrates that quadratic models are an effective tool for analysis of neural networks. ", "keywords": "quadratic models;wide neural networks;catapult phase;optimization dynamics", "primary_area": "", "supplementary_material": "", "author": "Libin Zhu;Chaoyue Liu;Adityanarayanan Radhakrishnan;Misha Belkin", "authorids": "~Libin_Zhu1;~Chaoyue_Liu2;~Adityanarayanan_Radhakrishnan1;~Misha_Belkin1", "gender": "M;M;M;", "homepage": ";https://cliu212.github.io/;https://aditradha.com/;http://misha.belkin-wang.org/", "dblp": "260/0355;191/6684-1;;", "google_scholar": "hyTGiUcAAAAJ;sRjoMX0AAAAJ;jd7_Ed0AAAAJ;Iwd9DdkAAAAJ", "orcid": ";;;", "linkedin": ";;aditradha/;", "or_profile": "~Libin_Zhu1;~Chaoyue_Liu2;~Adityanarayanan_Radhakrishnan1;~Misha_Belkin1", "aff": "University of California, San Diego;University of California, San Diego;Massachusetts Institute of Technology;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;mit.edu;ucsd.edu", "position": "PhD student;Postdoc;PhD student;Professor", "bibtex": "@misc{\nzhu2023quadratic,\ntitle={Quadratic models for understanding neural network dynamics},\nauthor={Libin Zhu and Chaoyue Liu and Adityanarayanan Radhakrishnan and Misha Belkin},\nyear={2023},\nurl={https://openreview.net/forum?id=GNFimGDfEiV}\n}", "github": "", "project": "", "reviewers": "uqyZ;Ctfq;LYg1;JdaH", "site": "https://openreview.net/forum?id=GNFimGDfEiV", "pdf_size": 2993424, "recommendation": "5;6;6;8", "confidence": "4;5;2;2", "correctness": "2;4;3;4", "technical_novelty": "4;4;3;4", "empirical_novelty": "4;2;3;4", "wc_summary_paper": "55;202;45;127", "wc_strength_and_weaknesses": "276;316;94;111", "wc_clarity_quality_novelty_and_reproducibility": "248;38;46;63", "wc_summary_review": "100;105;93;35", "wc_review": "679;661;278;336", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "649;639;109;199", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 107.25, 63.1916727108881 ], "wc_strength_and_weaknesses_avg": [ 199.25, 97.96268422210572 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 98.75, 86.64114207465181 ], "wc_summary_review_avg": [ 83.25, 28.181332473820326 ], "wc_review_avg": [ 488.5, 182.76556021307735 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 399.0, 247.08298201211673 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5739640213948524, "corr_recommendation_correctness": 0.7608859102526822, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of California, San Diego;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsd.edu;https://web.mit.edu", "aff_unique_abbr": "UCSD;MIT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "CLIP-ViP: Adapting Pre-trained Image-Text Model to Video-Language Alignment", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11587", "id": "GNjzMAgawq", "poster": "/media/PosterPDFs/ICLR%202023/11587.png?t=1680776779.8072026", "openreview": "https://openreview.net/forum?id=GNjzMAgawq", "slides": "https://iclr.cc/virtual/2023/poster/11587", "video": "https://iclr.cc/virtual/2023/poster/11587", "author_site": "Hongwei Xue, Yuchong Sun, Bei Liu, Jianlong Fu, Ruihua Song, Houqiang Li, Jiebo Luo", "tldr": "", "abstract": "Pre-trained image-text models, like CLIP, have demonstrated the strong power of vision-language representation learned from a large scale of web-collected image-text data. In light of the well-learned visual features, there are works that transfer image representation to the video domain and achieve good results. However, adapting image-text pre-trained models to video-text pre-training (i.e., post-pretraining) has not demonstrated a significant advantage yet. In this paper, we tackle this challenge by raising and addressing two questions: 1) what are the factors hindering post-pretraining CLIP from improving performance on video-text tasks, and 2) how to mitigate the impact of these factors. Through a series of comparative experiments and analyses, we find that the data scale and domain gap between language sources have large impacts. By these observations, we propose an Omnisource Cross-modal Learning method equipped with a Video Proxy mechanism on the basis of CLIP, namely CLIP-ViP. Extensive results show that our approach improves the performance of CLIP on video-text retrieval by a large margin. Our model achieves state-of-the-art results on a variety of datasets, including MSR-VTT, DiDeMo, LSMDC, and ActivityNet. We release our code and pre-trained CLIP-ViP models at \\url{https://github.com/microsoft/XPretrain/tree/main/CLIP-ViP}.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Hongwei Xue;Yuchong Sun;Bei Liu;Jianlong Fu;Ruihua Song;Houqiang Li;Jiebo Luo", "authorids": "~Hongwei_Xue1;~Yuchong_Sun1;~Bei_Liu2;~Jianlong_Fu1;~Ruihua_Song1;~Houqiang_Li1;~Jiebo_Luo1", "gender": ";M;F;M;F;M;M", "homepage": "https://hellwayxue.github.io/;;https://www.microsoft.com/en-us/research/people/libei/;;;https://staff.ustc.edu.cn/~lihq/;https://www.cs.rochester.edu/u/jluo/", "dblp": "272/6488;206/8045;39/3711-1;83/8692;s/RuihuaSong;59/7017.html;25/5545", "google_scholar": "k5CJa5YAAAAJ;DuSxNqgAAAAJ;7IZyaZsAAAAJ;-WqSwu8AAAAJ;v5LctN8AAAAJ;7sFMIKoAAAAJ;CcbnBvgAAAAJ", "orcid": ";;;;;0000-0003-2188-3028;0000-0002-4516-9729", "linkedin": ";;;;;;jieboluo/", "or_profile": "~Hongwei_Xue1;~Yuchong_Sun1;~Bei_Liu2;~Jianlong_Fu1;~Ruihua_Song1;~Houqiang_Li1;~Jiebo_Luo3", "aff": "University of Science and Technology of China;Renmin University of China;Microsoft Research Asia;Microsoft;Renmin University of China;University of Science and Technology of China;University of Rochester", "aff_domain": "ustc.edu.cn;ruc.edu.cn;microsoft.com;microsoft.com;ruc.edu.cn;ustc.edu.cn;rochester.edu", "position": "PhD student;PhD student;Researcher;Senior Researcher;Associate Professor;Professor;Full Professor", "bibtex": "@inproceedings{\nxue2023clipvip,\ntitle={{CLIP}-ViP: Adapting Pre-trained Image-Text Model to Video-Language Alignment},\nauthor={Hongwei Xue and Yuchong Sun and Bei Liu and Jianlong Fu and Ruihua Song and Houqiang Li and Jiebo Luo},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=GNjzMAgawq}\n}", "github": "", "project": "", "reviewers": "XvUh;P4Rx;39Ux;qsPb;XjSo", "pdf_size": 411068, "recommendation": "5;6;6;8;8", "confidence": "5;4;2;4;4", "correctness": "3;3;3;4;3", "technical_novelty": "2;3;3;4;3", "empirical_novelty": "2;3;3;4;3", "wc_summary_paper": "78;107;48;120;97", "wc_strength_and_weaknesses": "209;285;304;138;207", "wc_clarity_quality_novelty_and_reproducibility": "54;8;15;22;44", "wc_summary_review": "20;25;25;44;37", "wc_review": "361;425;392;324;385", "wc_reply_reviewers": "0;170;0;45;0", "wc_reply_authors": "498;693;586;145;805", "reply_reviewers": "0;2;0;1;0", "reply_authors": "1;2;1;2;2", "recommendation_avg": [ 6.6, 1.2 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 3.0, 0.6324555320336759 ], "empirical_novelty_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_paper_avg": [ 90.0, 25.083859352181037 ], "wc_strength_and_weaknesses_avg": [ 228.6, 59.87520354871455 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.6, 17.522556891047607 ], "wc_summary_review_avg": [ 30.2, 8.885943956609225 ], "wc_review_avg": [ 377.4, 33.63688451685144 ], "wc_reply_reviewers_avg": [ 43.0, 65.84831053261732 ], "wc_reply_authors_avg": [ 545.4, 225.11916844196097 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.06804138174397723, "corr_recommendation_correctness": 0.5833333333333334, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18109512580829051676&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=GNjzMAgawq", "email": "ustc.edu.cn;ruc.edu.cn;microsoft.com;microsoft.com;ruc.edu.cn;ustc.edu.cn;rochester.edu", "author_num": 7, "aff_unique_index": "0;1;2;2;1;0;3", "aff_unique_norm": "University of Science and Technology of China;Renmin University of China;Microsoft;University of Rochester", "aff_unique_dep": ";;Research;", "aff_unique_url": "http://www.ustc.edu.cn;http://www.ruc.edu.cn;https://www.microsoft.com/en-us/research/group/asia;https://www.rochester.edu", "aff_unique_abbr": "USTC;RUC;MSR Asia;U of R", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;1;0;0;1", "aff_country_unique": "China;United States" }, { "id": "GOEpRos3w0L", "title": "TopoZero: Digging into Topology Alignment on Zero-Shot Learning", "track": "main", "status": "Reject", "tldr": "we utilize persistent homology to investigate geometry structure alignment, based on which, we propose a TopoZero framework to achieve multi-dimensional structure alignment.", "abstract": "Common space learning, associating semantic and visual domains in a common\nlatent space, is essential to transfer knowledge from seen classes to unseen ones\non Zero-Shot Learning (ZSL) realm. Existing methods for common space learning\nrely heavily on structure alignment due to the heterogeneous nature between\nsemantic and visual domains, but the existing design is sub-optimal. In this paper,\nwe utilize persistent homology to investigate geometry structure alignment,\nand observe two following issues: (i) The sampled mini-batch data points present\na distinct structure gap compared to global data points, thus the learned structure\nalignment space inevitably neglects abundant and accurate global structure\ninformation. (ii) The latent visual and semantic space fail to preserve multiple\ndimensional geometry structure, especially high dimensional structure information.\nTo address the first issue, we propose a Topology-guided Sampling Strategy\n(TGSS) to mitigate the gap between sampled and global data points. Both theoretical\nanalyses and empirical results guarantee the effectiveness of the TGSS.\nTo solve the second issue, we introduce a Topology Alignment Module (TAM)\nto preserve multi-dimensional geometry structure in latent visual and semantic\nspace, respectively. The proposed method is dubbed TopoZero. Empirically, our\nTopoZero achieves superior performance on three authoritative ZSL benchmark\ndatasets.", "keywords": "Zero-Shot Learning;Structure Alignment;Persistent Homology", "primary_area": "", "supplementary_material": "", "author": "Yang Liu;Fei Wang;Jiankang Deng;Chen WeiTao;Lei Shang;Baigui Sun;Xuansong Xie", "authorids": "~Yang_Liu51;~Fei_Wang15;~Jiankang_Deng1;~Chen_WeiTao1;~Lei_Shang1;~Baigui_Sun1;~Xuansong_Xie1", "gender": "M;M;M;;M;M;M", "homepage": ";https://jiankangdeng.github.io/;;;;;", "dblp": "27/3367-5;156/7808;;;186/8016;234/8028;52/3194-15", "google_scholar": "t1emSE0AAAAJ;Z_UoQFsAAAAJ;;WO1eMcIAAAAJ;ZNhTHywAAAAJ;M0Ei1zkAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0002-3709-6216;0000-0003-1796-2671;;0000-0001-7722-4748;;", "linkedin": ";jiankang-deng-b45b21b4/?originalSubdomain=uk;;;;;", "or_profile": "~Yang_Liu51;~Jiankang_Deng1;~Chen_WeiTao1;~Lei_Shang1;~Baigui_Sun1;~Xuansong_Xie1;~Steven_Wang2", "aff": "Alibaba Group;;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group", "aff_domain": "alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com", "position": "Researcher at Alibaba Group;;Researcher;Researcher;Researcher;Researcher;Researcher", "bibtex": "@misc{\nliu2023topozero,\ntitle={TopoZero: Digging into Topology Alignment on Zero-Shot Learning},\nauthor={Yang Liu and Fei Wang and Jiankang Deng and Chen WeiTao and Lei Shang and Baigui Sun and Xuansong Xie},\nyear={2023},\nurl={https://openreview.net/forum?id=GOEpRos3w0L}\n}", "github": "", "project": "", "reviewers": "MFWd;ijnG;ugcD;FV59", "site": "https://openreview.net/forum?id=GOEpRos3w0L", "pdf_size": 814701, "recommendation": "3;5;6;8", "confidence": "5;4;4;5", "correctness": "3;3;4;3", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "53;69;59;41", "wc_strength_and_weaknesses": "220;82;128;135", "wc_clarity_quality_novelty_and_reproducibility": "60;339;43;43", "wc_summary_review": "56;51;52;38", "wc_review": "389;541;282;257", "wc_reply_reviewers": "132;10;0;0", "wc_reply_authors": "1625;1293;749;655", "reply_reviewers": "1;1;0;0", "reply_authors": "4;5;1;2", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 55.5, 10.136567466356647 ], "wc_strength_and_weaknesses_avg": [ 141.25, 49.816538418481066 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 121.25, 125.90944166344318 ], "wc_summary_review_avg": [ 49.25, 6.7592529172978875 ], "wc_review_avg": [ 367.25, 111.89811213778363 ], "wc_reply_reviewers_avg": [ 35.5, 55.863673348608216 ], "wc_reply_authors_avg": [ 1080.5, 397.6741756765204 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.0, 1.5811388300841898 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.16012815380508713, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xOqmROajv4AJ:scholar.google.com/&scioq=TopoZero:+Digging+into+Topology+Alignment+on+Zero-Shot+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Alibaba Group", "aff_unique_dep": "", "aff_unique_url": "https://www.alibaba.com", "aff_unique_abbr": "Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SIMPLE: A Gradient Estimator for k-Subset Sampling", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10885", "id": "GPJVuyX4p_h", "poster": "/media/PosterPDFs/ICLR%202023/10885.png?t=1681112540.7106907", "openreview": "https://openreview.net/forum?id=GPJVuyX4p_h", "slides": "https://iclr.cc/virtual/2023/poster/10885", "video": "https://iclr.cc/virtual/2023/poster/10885", "author_site": "Kareem Ahmed, Zhe Zeng, Mathias Niepert, Guy Van den Broeck", "tldr": "", "abstract": "$k$-subset sampling is ubiquitous in machine learning, enabling regularization and interpretability through sparsity. The challenge lies in rendering $k$-subset sampling amenable to end-to-end learning. This has typically involved relaxing the reparameterized samples to allow for backpropagation, but introduces both bias and variance. In this work, we fall back to discrete $k$-subset sampling on the forward pass. This is coupled with using the gradient with respect to the exact marginals, computed efficiently, as a proxy for the true gradient. We show that our gradient estimator exhibits lower bias and variance compared to state-of-the-art estimators. Empirical results show improved performance on learning to explain and sparse models benchmarks. We provide an algorithm for computing the exact ELBO for the $k$-subset distribution, obtaining significantly lower loss compared to state-of-the-art discrete sparse VAEs. All of our algorithms are exact and efficient.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/58bf6117dcfad6f8d66c6c3995f9c97505eeeab8.zip", "author": "Kareem Ahmed;Zhe Zeng;Mathias Niepert;Guy Van den Broeck", "authorids": "~Kareem_Ahmed2;~Zhe_Zeng1;~Mathias_Niepert1;~Guy_Van_den_Broeck1", "gender": "M;F;M;M", "homepage": "http://kareemahmed.com;https://zzeng.me/;http://www.matlog.net;http://web.cs.ucla.edu/~guyvdb/", "dblp": "188/6144;27/10464;n/MathiasNiepert;96/7521.html", "google_scholar": "hkM0hbIAAAAJ;PyK6cB0AAAAJ;https://scholar.google.de/citations?user=p5vLzq0AAAAJ;d0KQ9z0AAAAJ", "orcid": ";;;0000-0003-3434-2503", "linkedin": "kareem-yousrii/;;;guyvdb", "or_profile": "~Kareem_Ahmed2;~Zhe_Zeng1;~Mathias_Niepert1;~Guy_Van_den_Broek1", "aff": "University of California, Los Angeles;University of California, Los Angeles;NEC;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;cs.ucla.edu;neclab.eu;ucla.edu", "position": "PhD student;PhD student;Research Scientist;Associate Professor", "bibtex": "@inproceedings{\nahmed2023simple,\ntitle={{SIMPLE}: A Gradient Estimator for k-Subset Sampling},\nauthor={Kareem Ahmed and Zhe Zeng and Mathias Niepert and Guy Van den Broeck},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=GPJVuyX4p_h}\n}", "github": "", "project": "", "reviewers": "6jdF;AhEr;pM21;XcRS", "pdf_size": 473640, "recommendation": "3;6;6;6", "confidence": "4;2;3;4", "correctness": "3;4;2;2", "technical_novelty": "2;2;3;3", "empirical_novelty": "1;2;3;2", "wc_summary_paper": "62;15;31;90", "wc_strength_and_weaknesses": "59;28;172;300", "wc_clarity_quality_novelty_and_reproducibility": "253;15;23;43", "wc_summary_review": "41;20;16;86", "wc_review": "415;78;242;519", "wc_reply_reviewers": "55;0;11;136", "wc_reply_authors": "881;570;851;600", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "recommendation_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 49.5, 28.848743473503312 ], "wc_strength_and_weaknesses_avg": [ 139.75, 106.92140805283103 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.5, 98.39080241567298 ], "wc_summary_review_avg": [ 40.75, 27.797257058925798 ], "wc_review_avg": [ 313.5, 168.15543404838274 ], "wc_reply_reviewers_avg": [ 50.5, 53.48130514488217 ], "wc_reply_authors_avg": [ 725.5, 141.29844302043813 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5222329678670935, "corr_recommendation_correctness": -0.17407765595569782, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3880377191562350458&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=GPJVuyX4p_h", "email": "cs.ucla.edu;cs.ucla.edu;neclab.eu;ucla.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of California, Los Angeles;NEC Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucla.edu;https://www.nec.com", "aff_unique_abbr": "UCLA;NEC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Japan" }, { "id": "GPPmQdU3k7", "title": "A Probabilistic Approach to Self-Supervised Learning using Cyclical Stochastic Gradient MCMC", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this paper we present a practical Bayesian formulation for self-supervised learning method with Cyclical Stochastic Gradient Hamiltonian Monte Carlo (cSGHMC). Within this framework, we place a prior over the parameters of a self-supervised learning model and use cSGHMC to approximate the high dimensional and multimodal posterior distribution over the embeddings. By exploring an expressive posterior over the embeddings, the Bayesian self-supervised learning produces interpretable and diverse representations. Marginalising over these representations results improvement in semi-supervised learning and out-of-distribution detection tasks. We provide experimental results on multiple classification tasks in semi-supervised learning including Cifar10 and Cifar100. Moreover we demonstrate the effectiveness of the proposed method in out-of distribution detection task using SVHN dataset.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Masoumeh Javanbakhat;Christoph Lippert", "authorids": "~Masoumeh_Javanbakhat1;~Christoph_Lippert1", "gender": "F;M", "homepage": ";http://hpi.de/lippert", "dblp": ";45/7970.html", "google_scholar": ";RVl8TE0AAAAJ", "orcid": ";0000-0001-6363-2556", "linkedin": "masoumeh-javanbakht-99141744/;christoph-lippert-307b8135/", "or_profile": "~Masoumeh_Javanbakhat1;~Christoph_Lippert1", "aff": "Hasso Plattner Institute;Hasso Plattner Institute", "aff_domain": "hpi.de;hpi.de", "position": "Postdoc;Full Professor", "bibtex": "@misc{\njavanbakhat2023a,\ntitle={A Probabilistic Approach to Self-Supervised Learning using Cyclical Stochastic Gradient {MCMC} },\nauthor={Masoumeh Javanbakhat and Christoph Lippert},\nyear={2023},\nurl={https://openreview.net/forum?id=GPPmQdU3k7}\n}", "github": "", "project": "", "reviewers": "YvPk;tf9n;6qBF", "site": "https://openreview.net/forum?id=GPPmQdU3k7", "pdf_size": 273402, "recommendation": "3;3;3", "confidence": "5;5;4", "correctness": "2;3;2", "technical_novelty": "1;1;1", "empirical_novelty": "1;1;1", "wc_summary_paper": "58;47;56", "wc_strength_and_weaknesses": "32;100;191", "wc_clarity_quality_novelty_and_reproducibility": "38;55;33", "wc_summary_review": "94;44;31", "wc_review": "222;246;311", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 1.0, 0.0 ], "empirical_novelty_avg": [ 1.0, 0.0 ], "wc_summary_paper_avg": [ 53.666666666666664, 4.784233364802441 ], "wc_strength_and_weaknesses_avg": [ 107.66666666666667, 65.13746148637426 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.0, 9.41629792788369 ], "wc_summary_review_avg": [ 56.333333333333336, 27.157974069424906 ], "wc_review_avg": [ 259.6666666666667, 37.59728122558273 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=302239253416965069&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Hasso Plattner Institute", "aff_unique_dep": "", "aff_unique_url": "https://www.hpi.de", "aff_unique_abbr": "HPI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Free Lunch for Domain Adversarial Training: Environment Label Smoothing", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11317", "id": "GPTjnA57h_3", "poster": "/media/PosterPDFs/ICLR%202023/11317.png?t=1680749710.5052726", "openreview": "https://openreview.net/forum?id=GPTjnA57h_3", "slides": "https://iclr.cc/virtual/2023/poster/11317", "video": "https://iclr.cc/virtual/2023/poster/11317", "author_site": "YiFan Zhang, xue wang, Jian Liang, Zhang Zhang, Liang Wang, Rong Jin, Tieniu Tan", "tldr": "We propose to smooth environment label for domain adversarial training methods, which is experimentally and theoretically shown able to improve training stability, local convergence, and robustness to noisy labels.", "abstract": "A fundamental challenge for machine learning models is how to generalize learned models for out-of-distribution (OOD) data. Among various approaches, exploiting invariant features by Domain Adversarial Training (DAT) received widespread attention. Despite its success, we observe training instability from DAT, mostly due to over-confident domain discriminator and environment label noise. To address this issue, we proposed Environment Label Smoothing (ELS), which encourages the discriminator to output soft probability, which thus reduces the confidence of the discriminator and alleviates the impact of noisy environment labels. We demonstrate, both experimentally and theoretically, that ELS can improve training stability, local convergence, and robustness to noisy environment labels. By incorporating ELS with DAT methods, we are able to yield state-of-art results on a wide range of domain generalization/adaptation tasks, particularly when the environment labels are highly noisy. \n", "keywords": "Out-of-Distribution Generalization;Domain adaptation/generalization;Domain adversarial training;environmnt label noise;non-asymptotic convergence", "primary_area": "", "supplementary_material": "/attachment/337a93a72fdb5e2bc6e045929966e1613747b0b6.zip", "author": "YiFan Zhang;xue wang;Jian Liang;Zhang Zhang;Liang Wang;Rong Jin;Tieniu Tan", "authorids": "~YiFan_Zhang8;~xue_wang1;~Jian_Liang1;~Zhang_Zhang1;~Liang_Wang3;~Rong_Jin3;~Tieniu_Tan1", "gender": ";M;M;;M;M;", "homepage": ";https://www.linkedin.com/in/xue-wang-98739572/;https://liangjian.xyz;https://zhangzhang80.github.io/;;https://www.cse.msu.edu/~rongjin/;", "dblp": ";;19/2208-1;94/2468-1;56/4499-1;j/RongJin;", "google_scholar": ";;https://scholar.google.com/citations?hl=en;rnRNwEMAAAAJ;;;", "orcid": ";;0000-0003-3890-1894;0000-0001-9425-3065;;;", "linkedin": ";;;;;;", "or_profile": "~YiFan_Zhang8;~xue_wang1;~Jian_Liang1;~Zhang_Zhang1;~Liang_Wang3;~Rong_Jin3;~Tieniu_Tan1", "aff": ";Alibaba Group US;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation\uff0c CAS\uff0cChina;Twitter;", "aff_domain": ";alibaba-inc.com;ia.ac.cn;ia.ac.cn;ia.ac.cn;twitter.com;", "position": ";Researcher;Associate Professor;Associate Professor;Full Professor;Researcher;", "bibtex": "@inproceedings{\nzhang2023free,\ntitle={Free Lunch for Domain Adversarial Training: Environment Label Smoothing},\nauthor={YiFan Zhang and xue wang and Jian Liang and Zhang Zhang and Liang Wang and Rong Jin and Tieniu Tan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=GPTjnA57h_3}\n}", "github": "", "project": "", "reviewers": "LfGU;kFo4;hUFN", "pdf_size": 3152442, "recommendation": "5;6;8", "confidence": "4;3;4", "correctness": "3;4;3", "technical_novelty": "2;2;3", "empirical_novelty": "0;3;2", "wc_summary_paper": "22;72;101", "wc_strength_and_weaknesses": "176;393;259", "wc_clarity_quality_novelty_and_reproducibility": "20;29;19", "wc_summary_review": "27;37;59", "wc_review": "245;531;438", "wc_reply_reviewers": "0;75;12", "wc_reply_authors": "1170;1201;551", "reply_reviewers": "0;1;1", "reply_authors": "3;2;2", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 65.0, 32.629230249374054 ], "wc_strength_and_weaknesses_avg": [ 276.0, 89.40171512150461 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 22.666666666666668, 4.4969125210773475 ], "wc_summary_review_avg": [ 41.0, 13.366625103842281 ], "wc_review_avg": [ 404.6666666666667, 119.11432416893537 ], "wc_reply_reviewers_avg": [ 29.0, 32.89376840679705 ], "wc_reply_authors_avg": [ 974.0, 299.37379088134395 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.18898223650461363, "corr_recommendation_correctness": -0.18898223650461363, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=448626149885441304&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=GPTjnA57h_3", "email": ";alibaba-inc.com;ia.ac.cn;ia.ac.cn;ia.ac.cn;twitter.com;", "author_num": 7, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "Alibaba Group;Chinese Academy of Sciences;Twitter, Inc.", "aff_unique_dep": ";Institute of Automation;", "aff_unique_url": "https://www.alibaba.com;http://www.ia.cas.cn;https://twitter.com", "aff_unique_abbr": "Alibaba;CAS;Twitter", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "United States;China" }, { "id": "GQVfDsoFSBg", "title": "Causal RL Agents for Out-of-distribution Generalization", "track": "main", "status": "Withdraw", "tldr": "This paper proposes a novel technique GCRL to learn a OOD generalization policy by establishing the dependence of actions on a disentangled representation that captures the information about causal factors. ", "abstract": "Out-of-distribution (OOD) generalization is critical for applying reinforcement learning algorithms to real-world applications. To address the OOD problem, recent works focus on learning an OOD adaptation policy by capturing the causal factors affecting the environmental dynamics. However, these works recover the causal factors with only an entangled or binary form, resulting in a limited generalization of the policy that requires extra data from the testing environments. To break this limitation, we propose Generalizable Causal Reinforcement Learning (GCRL) to learn a disentangled representation of causal factors, on the basis of which we learn a policy that achieves the OOD generalization without extra training. For capturing the causal factors, GCRL deploys a variant of $\\beta$-VAE structure with a two-stage constraint to ensure that all factors can be disentangled. Then, to achieve the OOD generalization through causal factors, we adopt an additional network to establish the dependence of actions on the learned representation. Theoretically, we prove that while the optimal policy can be found in training environments, the established dependence can recover the causal relationship between causal factors and actions. Experimental results show that GCRL achieves the OOD generalization on eight benchmarks from Causal World and Mujoco. Moreover, the policy learned by our model is more explainable, which can be controlled to generate semantic actions by intervening in the representation of causal factors.", "keywords": "Reinforcement Learning;Out-of-distribution Generalization;Disentangled Representation", "primary_area": "", "supplementary_material": "", "author": "Sili Huang;Bo Yang;Hechang Chen;Peng Cui;Jifeng Hu;haiyin piao;Lichao Sun", "authorids": "~Sili_Huang1;ybo@jlu.edu.cn;~Hechang_Chen2;~Peng_Cui1;~Jifeng_Hu1;~haiyin_piao1;~Lichao_Sun1", "gender": "M;;M;M;;M;M", "homepage": ";;http://sai.jlu.edu.cn/info/1094/2387.htm;http://pengcui.thumedialab.com/;;https://www.researchgate.net/profile/Haiyin-Piao;https://lichao-sun.github.io/", "dblp": "26/6752;;145/1142;31/891-1;;269/4228.html;121/0780-1.html", "google_scholar": "ZMhi8A0AAAAJ;;EezEcbgAAAAJ;https://scholar.google.com.tw/citations?user=G8x97ZgAAAAJ;;;WhGUE7AAAAAJ", "orcid": "0000-0001-5387-7904;;;0000-0003-2957-8511;;;", "linkedin": ";;;;;;lichao-sun-b273a290/", "or_profile": "~Sili_Huang1;ybo@jlu.edu.cn;~Hechang_Chen2;~Peng_Cui1;~Jifeng_Hu1;~haiyin_piao1;~Lichao_Sun1", "aff": "Jilin University;;Jilin University;Tsinghua University;;Northwestern Polytechnical University;Lehigh University", "aff_domain": "jlu.edu.cn;;jlu.edu.cn;tsinghua.edu.cn;;nwpu.edu.cn;lehigh.edu", "position": "PhD student;;Associate Professor;Associate Professor;;PhD student;Assistant Professor", "bibtex": "@misc{\nhuang2023causal,\ntitle={Causal {RL} Agents for Out-of-distribution Generalization},\nauthor={Sili Huang and Bo Yang and Hechang Chen and Peng Cui and Jifeng Hu and haiyin piao and Lichao Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=GQVfDsoFSBg}\n}", "github": "", "project": "", "reviewers": "AozH;LgAG;83n4", "site": "https://openreview.net/forum?id=GQVfDsoFSBg", "pdf_size": 2568241, "recommendation": "3;6;6", "confidence": "4;4;4", "correctness": "1;3;3", "technical_novelty": "2;3;2", "empirical_novelty": "2;2;3", "wc_summary_paper": "102;145;194", "wc_strength_and_weaknesses": "370;171;177", "wc_clarity_quality_novelty_and_reproducibility": "93;122;32", "wc_summary_review": "24;83;43", "wc_review": "589;521;446", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 147.0, 37.58545818087983 ], "wc_strength_and_weaknesses_avg": [ 239.33333333333334, 92.42774956088073 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 82.33333333333333, 37.5085175512028 ], "wc_summary_review_avg": [ 50.0, 24.589970855343985 ], "wc_review_avg": [ 518.6666666666666, 58.40281576164705 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hScJ9uUcHNwJ:scholar.google.com/&scioq=Causal+RL+Agents+for+Out-of-distribution+Generalization&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "Jilin University;Tsinghua University;Northwestern Polytechnical University;Lehigh University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.jlu.edu.cn;https://www.tsinghua.edu.cn;https://www.nwpu.edu.cn;https://www.lehigh.edu", "aff_unique_abbr": "JLU;THU;NWPU;Lehigh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Scalable Batch-Mode Deep Bayesian Active Learning via Equivalence Class Annealing", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11016", "id": "GRZtigJljLY", "poster": "", "openreview": "https://openreview.net/forum?id=GRZtigJljLY", "slides": "https://iclr.cc/virtual/2023/poster/11016", "video": "https://iclr.cc/virtual/2023/poster/11016", "author_site": "Renyu Zhang, Aly Khan, Robert Grossman, Yuxin Chen", "tldr": "We propose a new scalable batch-mode active learning algorithm", "abstract": "Active learning has demonstrated data efficiency in many fields. Existing active learning algorithms, especially in the context of batch-mode deep Bayesian active models, rely heavily on the quality of uncertainty estimations of the model, and are often challenging to scale to large batches. In this paper, we propose Batch-BALanCe, a scalable batch-mode active learning algorithm, which combines insights from decision-theoretic active learning, combinatorial information measure, and diversity sampling. At its core, Batch-BALanCe relies on a novel decision-theoretic acquisition function that facilitates differentiation among different equivalence classes. Intuitively, each equivalence class consists of hypotheses (e.g., posterior samples of deep neural networks) with similar predictions, and Batch-BALanCe adaptively adjusts the size of the equivalence classes as learning progresses. To scale up the computation of queries to large batches, we further propose an efficient batch-mode acquisition procedure, which aims to maximize a novel combinatorial information measure defined through the acquisition function. We show that our algorithm can effectively handle realistic multi-class classification tasks, and achieves compelling performance on several benchmark datasets for active learning under both low- and large-batch regimes.", "keywords": "Bayesian Neural Network;Batch-Mode Active Learning;Decision-Centric Data Acquisition;Scalability", "primary_area": "", "supplementary_material": "", "author": "Renyu Zhang;Aly A Khan;Robert L. Grossman;Yuxin Chen", "authorids": "~Renyu_Zhang2;~Aly_A_Khan1;~Robert_L._Grossman2;~Yuxin_Chen1", "gender": "M;;M;", "homepage": "https://zhangrenyuuchicago.github.io/;http://ttic.uchicago.edu/~aakhan/;https://rgrossman.com;http://yuxinchen.org/", "dblp": "152/4749;46/2390;g/RobertLGrossman.html;11/5123-1", "google_scholar": "-4Hr3l0AAAAJ;xIVBCnEAAAAJ;3KG8arsAAAAJ;-k1N7HAAAAAJ", "orcid": ";0000-0003-3933-8538;0000-0003-3741-5739;", "linkedin": ";;robertgrossman/;", "or_profile": "~Renyu_Zhang2;~Aly_A_Khan1;~Robert_L._Grossman2;~Yuxin_Chen1", "aff": "Department of Computer Science, University of Chicago;Toyota Technological Institute at Chicago;University of Chicago;University of Chicago", "aff_domain": "cs.uchicago.edu;ttic.edu;uchicago.edu;uchicago.edu", "position": "PhD student;Courtesy Faculty;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023scalable,\ntitle={Scalable Batch-Mode Deep Bayesian Active Learning via Equivalence Class Annealing},\nauthor={Renyu Zhang and Aly A Khan and Robert L. Grossman and Yuxin Chen},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=GRZtigJljLY}\n}", "github": "", "project": "", "reviewers": "RVjP;wGy4;CBRc;xKhG", "pdf_size": 1317522, "recommendation": "5;6;8;8", "confidence": "4;4;4;3", "correctness": "2;3;2;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;3;0;3", "wc_summary_paper": "112;70;43;88", "wc_strength_and_weaknesses": "236;113;181;472", "wc_clarity_quality_novelty_and_reproducibility": "21;291;130;80", "wc_summary_review": "29;50;14;110", "wc_review": "398;524;368;750", "wc_reply_reviewers": "64;41;0;17", "wc_reply_authors": "1461;1303;733;591", "reply_reviewers": "1;1;0;1", "reply_authors": "4;3;1;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 78.25, 25.222757581200355 ], "wc_strength_and_weaknesses_avg": [ 250.5, 135.10088822802018 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 130.5, 100.3755448304018 ], "wc_summary_review_avg": [ 50.75, 36.519686471819554 ], "wc_review_avg": [ 510.0, 150.41941364066008 ], "wc_reply_reviewers_avg": [ 30.5, 24.212600025606502 ], "wc_reply_authors_avg": [ 1022.0, 367.7512746408909 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5555555555555555, "corr_recommendation_correctness": 0.19245008972987526, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=972413762384599656&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=GRZtigJljLY", "email": "cs.uchicago.edu;ttic.edu;uchicago.edu;uchicago.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Chicago;Toyota Technological Institute at Chicago", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.uchicago.edu;https://www.tti-chicago.org", "aff_unique_abbr": "UChicago;TTI Chicago", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Neural Agents Struggle to Take Turns in Bidirectional Emergent Communication", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11549", "id": "GULFHQfgw0g", "poster": "/media/PosterPDFs/ICLR%202023/11549.png?t=1682628224.0397797", "openreview": "https://openreview.net/forum?id=GULFHQfgw0g", "slides": "https://iclr.cc/virtual/2023/poster/11549", "video": "https://iclr.cc/virtual/2023/poster/11549", "author_site": "Valentin Taillandier, Dieuwke Hupkes, Beno\u00eet Sagot, Emmanuel Dupoux, Paul Michel", "tldr": "Neural agents struggle to develop a turn-taking protocol when playing cooperative game for which they have to communicate.", "abstract": "The spontaneous exchange of turns is a central aspect of human communication. Although turn-taking conventions come to us naturally, artificial dialogue agents struggle to coordinate, and must rely on hard-coded rules to engage in interactive conversations with human interlocutors. In this paper, we investigate the conditions under which artificial agents may naturally develop turn-taking conventions in a simple language game. We describe a cooperative task where success is contingent on the exchange of information along a shared communication channel where talking over each other hinders communication. Despite these environmental constraints, neural-network based agents trained to solve this task with reinforcement learning do not systematically adopt turn-taking conventions. However, we find that agents that do agree on turn-taking protocols end up performing better. \nMoreover, agents that are forced to perform turn-taking can learn to solve the task more quickly. \nThis suggests that turn-taking may help to generate conversations that are easier for speakers to interpret.", "keywords": "language emergence;turn-taking;conversation;communication;neural agents;cooperative game;reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Valentin Taillandier;Dieuwke Hupkes;Beno\u00eet Sagot;Emmanuel Dupoux;Paul Michel", "authorids": "~Valentin_Taillandier1;~Dieuwke_Hupkes1;~Beno\u00eet_Sagot1;~Emmanuel_Dupoux1;~Paul_Michel1", "gender": "M;;M;M;M", "homepage": ";https://github.com/google/BIG-bench;http://pauillac.inria.fr/~sagot/;http://www.lscp.net/persons/dupoux/;https://pmichel31415.github.io/", "dblp": ";184/8838;66/1016;41/8160;185/1024", "google_scholar": ";https://scholar.google.nl/citations?user=tAtSMTcAAAAJ;https://scholar.google.fr/citations?user=HXUT9ZkAAAAJ;https://scholar.google.fr/citations?user=94c1abIAAAAJ;oyyIf0YAAAAJ", "orcid": ";;0000-0002-0107-8526;0000-0002-7814-2952;", "linkedin": "valentin-taillandier/;;beno\u00eet-sagot-4731735/;emmanuel-dupoux-18034055/;paul-michel-4954b799/", "or_profile": "~Valentin_Taillandier1;~Dieuwke_Hupkes1;~Beno\u00eet_Sagot1;~Emmanuel_Dupoux1;~Paul_Michel1", "aff": ";Meta Facebook;Inria;EHESS;Google DeepMind", "aff_domain": ";facebook.com;inria.fr;ehess.fr;deepmind.com", "position": ";Research Scientist;Research Director;Full Professor;Researcher", "bibtex": "@inproceedings{\ntaillandier2023neural,\ntitle={Neural Agents Struggle to Take Turns in Bidirectional Emergent Communication},\nauthor={Valentin Taillandier and Dieuwke Hupkes and Beno{\\^\\i}t Sagot and Emmanuel Dupoux and Paul Michel},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=GULFHQfgw0g}\n}", "github": "", "project": "", "reviewers": "qXss;31zg;Nn7U;D8G5", "pdf_size": 541132, "recommendation": "3;6;6;8", "confidence": "3;4;3;4", "correctness": "2;3;3;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;2;4", "wc_summary_paper": "83;189;88;67", "wc_strength_and_weaknesses": "172;35;67;118", "wc_clarity_quality_novelty_and_reproducibility": "5;51;24;51", "wc_summary_review": "28;49;22;52", "wc_review": "288;324;201;288", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "568;438;338;263", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 106.75, 48.11639533464659 ], "wc_strength_and_weaknesses_avg": [ 98.0, 51.97595597966429 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.75, 19.447043477094404 ], "wc_summary_review_avg": [ 37.75, 12.968712349342937 ], "wc_review_avg": [ 275.25, 45.31762902006238 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 401.75, 114.31179947844404 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.7001400420140049, "corr_recommendation_correctness": 0.9901475429766743, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11394628775447027013&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=GULFHQfgw0g", "email": ";facebook.com;inria.fr;ehess.fr;deepmind.com", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Meta;INRIA;Ecole des Hautes Etudes en Sciences Sociales;Google", "aff_unique_dep": "Meta Platforms, Inc.;;;Google DeepMind", "aff_unique_url": "https://meta.com;https://www.inria.fr;https://www.ehess.fr;https://deepmind.com", "aff_unique_abbr": "Meta;Inria;EHESS;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "United States;France;United Kingdom" }, { "id": "GUMLIArCIwB", "title": "Reconciling Security and Communication Efficiency in Federated Learning", "track": "main", "status": "Reject", "tldr": "Uplink communication effiency with a high privacy and security bar", "abstract": "Cross-device Federated Learning is an increasingly popular machine learning setting to train a model by leveraging a large population of client devices with high privacy and security guarantees. However, communication efficiency remains a major bottleneck when scaling federated learning to production environments, particularly due to bandwidth constraints during uplink communication. In this paper, we formalize and address the problem of compressing client-to-server model updates under the Secure Aggregation primitive, a core component of Federated Learning pipelines that allows the server to aggregate the client updates without accessing them individually. In particular, we adapt standard scalar quantization and pruning methods to Secure Aggregation and propose Secure Indexing, a variant of Secure Aggregation that supports quantization for extreme compression. We establish state-of-the-art results on LEAF benchmarks in a secure Federated Learning setup with up to 40x compression in uplink communication with no meaningful loss in utility compared to uncompressed baselines.", "keywords": "Federated Learning;Secure Aggregation;Compression;Efficiency;Product Quantization", "primary_area": "", "supplementary_material": "", "author": "Karthik Prasad;Sayan Ghosh;Graham Cormode;Ilya Mironov;Ashkan Yousefpour;Pierre Stock", "authorids": "~Karthik_Prasad1;~Sayan_Ghosh1;~Graham_Cormode1;~Ilya_Mironov2;~Ashkan_Yousefpour1;~Pierre_Stock1", "gender": ";;M;;M;M", "homepage": "https://ai.facebook.com/people/karthik-prasad;;http://dimacs.rutgers.edu/~graham/;https://www.hireashkan.com;https://research.fb.com/people/stock-pierre/;https://ai.facebook.com/people/ilya-mironov/", "dblp": ";67/6126-4;c/GrahamCormode;194/6896;210/2208;19/5860", "google_scholar": "O7Qb6I8AAAAJ;WC_NlykAAAAJ;https://scholar.google.co.uk/citations?user=gpLVKmEAAAAJ;d-9r49UAAAAJ;https://scholar.google.fr/citations?user=3e2-59cAAAAJ;hg3A9TgAAAAJ", "orcid": ";;0000-0002-0698-0922;;;0000-0002-2149-1916", "linkedin": "prasadkarthik/;;;ashkany/;;ilya-mironov-668216/", "or_profile": "~Karthik_Prasad1;~Sayan_Ghosh1;~Graham_Cormode1;~Ashkan_Yousefpour1;~Pierre_Stock1;~Ilya_Mironov1", "aff": "Meta;Meta Facebook;The university of Warwick;Meta (Facebook);Meta Facebook;Meta Facebook", "aff_domain": "meta.com;fb.com;warwick.ac.uk;fb.com;fb.com;meta.com", "position": "Researcher;Research Scientist;Full Professor;Research Scientist;Research Scientist;Principal Researcher", "bibtex": "@misc{\nprasad2023reconciling,\ntitle={Reconciling Security and Communication Efficiency in Federated Learning},\nauthor={Karthik Prasad and Sayan Ghosh and Graham Cormode and Ilya Mironov and Ashkan Yousefpour and Pierre Stock},\nyear={2023},\nurl={https://openreview.net/forum?id=GUMLIArCIwB}\n}", "github": "", "project": "", "reviewers": "r6Pi;dmTz;Tmdx;wijR", "site": "https://openreview.net/forum?id=GUMLIArCIwB", "pdf_size": 831838, "recommendation": "3;5;5;6", "confidence": "5;3;3;1", "correctness": "3;3;3;3", "technical_novelty": "1;2;3;3", "empirical_novelty": "1;2;3;2", "wc_summary_paper": "171;98;43;44", "wc_strength_and_weaknesses": "213;126;141;32", "wc_clarity_quality_novelty_and_reproducibility": "23;231;40;17", "wc_summary_review": "18;6;45;32", "wc_review": "425;461;269;125", "wc_reply_reviewers": "0;0;0;15", "wc_reply_authors": "251;770;102;88", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 1.4142135623730951 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 89.0, 52.31156659860226 ], "wc_strength_and_weaknesses_avg": [ 128.0, 64.4476531768225 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 77.75, 88.88018620592555 ], "wc_summary_review_avg": [ 25.25, 14.652218262092603 ], "wc_review_avg": [ 320.0, 133.72733452813603 ], "wc_reply_reviewers_avg": [ 3.75, 6.49519052838329 ], "wc_reply_authors_avg": [ 302.75, 277.226779911321 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.9733285267845754, "corr_recommendation_correctness": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3483146815601313306&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Meta;University of Warwick", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://warwick.ac.uk", "aff_unique_abbr": "Meta;Warwick", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "GUSf17i8RMZ", "title": "CircuitNet: A Generic Neural Network to Realize Universal Circuit Motif Modeling", "track": "main", "status": "Reject", "tldr": "We proposed CircuitNet by modeling the universal circuit motifs and structures in human brains to function as a generic neural network and tested in several machine learning tasks.", "abstract": "The successes of artificial neural networks (ANNs) are largely attributed to mimicking the human brain structures. Recent advances in neuroscience revealed that neurons interact with each other through various kinds of connectivity patterns to process information, in which the common connectivity patterns are also called circuit motifs. However, many existing ANNs can only model one or two circuit motifs in their architectures, so that their performance may drastically vary among different types of machine learning tasks. \nIn this paper, we propose a new type of neural network inspired by the architectures of neuronal circuits, namely Circuit Neural Network (CircuitNet). In CircuitNet, a group of densely connected neurons, namely circuit motif unit (CMU), form the basic unit of the network, which is capable of modeling universal circuit motifs by adjusting the weights within the CMUs. Compared with traditional feed-forward networks, CircuitNet has the ability to model more types of neuron connections such as feed-back and lateral motifs.\nInspired by the locally dense and globally sparse structure of the human brain, several iterations of signal transmission among different CMUs are achieved by sparse connections through the input ports and output ports of different CMUs. \nExperiments have demonstrated that CircuitNet can outperform popular neural network architectures in function approximation, reinforcement learning, image classification, and time series forecasting tasks.", "keywords": "Bio-inspired neural network;Deep Learning", "primary_area": "", "supplementary_material": "", "author": "Yansen Wang;XINYANG JIANG;Kan Ren;Caihua Shan;Xufang Luo;Kaitao Song;Dongsheng Li", "authorids": "~Yansen_Wang2;~XINYANG_JIANG2;~Kan_Ren1;~Caihua_Shan1;~Xufang_Luo1;~Kaitao_Song1;~Dongsheng_Li2", "gender": "M;M;F;F;M;M;M", "homepage": ";https://saying.ren;;;;http://recmind.cn;", "dblp": "155/6316;28/7458;;218/7350;222/2082;254/0830-2.html;134/7817", "google_scholar": "JiTfWVMAAAAJ;USnQVWgAAAAJ;-knurggAAAAJ;;https://scholar.google.com.hk/citations?user=LLk9dR8AAAAJ;VNg5rA8AAAAJ;Hvbzb1kAAAAJ", "orcid": ";;;;;0000-0003-3103-8442;", "linkedin": "xinyang-jiang-ab5416b0/;;;;;;", "or_profile": "~XINYANG_JIANG2;~Kan_Ren1;~Caihua_Shan1;~Xufang_Luo1;~Kaitao_Song1;~Dongsheng_Li2;~Yansen_Wang1", "aff": "Microsoft;Microsoft;Microsoft;Microsoft Research;Microsoft;Microsoft Research Asia;Microsoft Research Asia", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "position": "Senior Researcher;Researcher;Researcher;Researcher;Researcher;Principal Researcher;Researcher", "bibtex": "@misc{\nwang2023circuitnet,\ntitle={CircuitNet: A Generic Neural Network to Realize Universal Circuit Motif Modeling},\nauthor={Yansen Wang and XINYANG JIANG and Kan Ren and Caihua Shan and Xufang Luo and Kaitao Song and Dongsheng Li},\nyear={2023},\nurl={https://openreview.net/forum?id=GUSf17i8RMZ}\n}", "github": "", "project": "", "reviewers": "zeXY;CjTa;oKif", "site": "https://openreview.net/forum?id=GUSf17i8RMZ", "pdf_size": 2953778, "recommendation": "6;6;6", "confidence": "4;3;4", "correctness": "3;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "79;29;24", "wc_strength_and_weaknesses": "252;189;145", "wc_clarity_quality_novelty_and_reproducibility": "36;62;9", "wc_summary_review": "83;78;9", "wc_review": "450;358;187", "wc_reply_reviewers": "83;0;0", "wc_reply_authors": "170;748;672", "reply_reviewers": "2;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 44.0, 24.8327740429189 ], "wc_strength_and_weaknesses_avg": [ 195.33333333333334, 43.911527213503085 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.666666666666664, 21.63844315615664 ], "wc_summary_review_avg": [ 56.666666666666664, 33.76717669901086 ], "wc_review_avg": [ 331.6666666666667, 108.97196377458235 ], "wc_reply_reviewers_avg": [ 27.666666666666668, 39.12657522565563 ], "wc_reply_authors_avg": [ 530.0, 256.442326199609 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:OmW-_1Xej_0J:scholar.google.com/&scioq=CircuitNet:+A+Generic+Neural+Network+to+Realize+Universal+Circuit+Motif+Modeling&hl=en&as_sdt=0,11", "gs_version_total": 5, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;0;0;1;1", "aff_country_unique": "United States;China" }, { "id": "GUfVNbxIYv", "title": "$\\Phi$-DVAE: Learning Physically Interpretable Representations with Nonlinear Filtering", "track": "main", "status": "Reject", "tldr": "", "abstract": "Incorporating unstructured data into physical models is a challenging problem that is emerging in data assimilation. Traditional approaches focus on well-defined observation operators whose functional forms are typically assumed to be known. This prevents these methods from achieving a consistent model-data synthesis in configurations where the mapping from data-space to model-space is unknown. To address these shortcomings, in this paper we develop a physics-informed dynamical variational autoencoder ($\\Phi$-DVAE) for embedding diverse data streams into time-evolving physical systems described by differential equations. Our approach combines a standard (possibly nonlinear) filter for the latent state-space model and a VAE, to embed the unstructured data stream into the latent dynamical system. A variational Bayesian framework is used for the joint estimation of the embedding, latent states, and unknown system parameters. To demonstrate the method, we look at three examples: video datasets generated by the advection and Korteweg-de Vries partial differential equations, and a velocity field generated by the Lorenz-63 system. Comparisons with relevant baselines show that the $\\Phi$-DVAE provides a data efficient dynamics encoding methodology that is competitive with standard approaches, with the added benefit of incorporating a physically interpretable latent space.", "keywords": "variational autoencoder;nonlinear filter;physics-informed;parameter estimation;variational inference;Bayesian inverse problems", "primary_area": "", "supplementary_material": "", "author": "Alex John Glyn-Davies;Connor Duffin;Omer Deniz Akyildiz;Mark Girolami", "authorids": "~Alex_John_Glyn-Davies1;~Connor_Duffin1;~Omer_Deniz_Akyildiz1;~Mark_Girolami2", "gender": "M;M;;Not Specified", "homepage": "https://github.com/alexglyndavies;https://connor-duffin.github.io/;http://akyildiz.me;http://www.eng.cam.ac.uk/profiles/mag92", "dblp": ";;139/6538;g/MarkAGirolami.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;X1fsIjIAAAAJ;https://scholar.google.co.uk/citations?hl=en", "orcid": "0000-0001-5201-1228;;0000-0002-5248-1219;", "linkedin": ";;;", "or_profile": "~Alex_John_Glyn-Davies1;~Connor_Duffin1;~Omer_Deniz_Akyildiz1;~Mark_Girolami2", "aff": "University of Cambridge;University of Cambridge;Imperial College London;University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk;imperial.ac.uk;cam.ac.uk", "position": "PhD student;Postdoc;Assistant Professor;Full Professor", "bibtex": "@misc{\nglyn-davies2023phidvae,\ntitle={\\${\\textbackslash}Phi\\$-{DVAE}: Learning Physically Interpretable Representations with Nonlinear Filtering},\nauthor={Alex John Glyn-Davies and Connor Duffin and Omer Deniz Akyildiz and Mark Girolami},\nyear={2023},\nurl={https://openreview.net/forum?id=GUfVNbxIYv}\n}", "github": "", "project": "", "reviewers": "91wk;FFuK;66rK;8UxT", "site": "https://openreview.net/forum?id=GUfVNbxIYv", "pdf_size": 2334485, "recommendation": "3;5;5;8", "confidence": "4;3;2;4", "correctness": "2;3;3;4", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "101;22;85;85", "wc_strength_and_weaknesses": "352;147;848;286", "wc_clarity_quality_novelty_and_reproducibility": "737;74;203;44", "wc_summary_review": "111;38;113;21", "wc_review": "1301;281;1249;436", "wc_reply_reviewers": "0;0;529;0", "wc_reply_authors": "1182;203;1665;331", "reply_reviewers": "0;0;2;0", "reply_authors": "3;1;3;1", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 73.25, 30.30160886817728 ], "wc_strength_and_weaknesses_avg": [ 408.25, 264.4526186295004 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 264.5, 279.2619737808927 ], "wc_summary_review_avg": [ 70.75, 41.69157588770182 ], "wc_review_avg": [ 816.75, 461.88114001331553 ], "wc_reply_reviewers_avg": [ 132.25, 229.06371930098402 ], "wc_reply_authors_avg": [ 845.25, 604.6339285055049 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.1266600992762247, "corr_recommendation_correctness": 0.9901475429766743, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12913877201043372169&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Cambridge;Imperial College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.imperial.ac.uk", "aff_unique_abbr": "Cambridge;ICL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "GVMwL15UrZO", "title": "UNDERSTANDING HTML WITH LARGE LANGUAGE MODELS", "track": "main", "status": "Reject", "tldr": "Large language models are very effective at understanding HTML including navigating web pages, classifying elements, and generating descriptions of elements.", "abstract": "Large language models (LLM) have shown exceptional performance on a variety of natural language tasks. Yet, their capabilities for HTML understanding \u2013 i.e., parsing the raw HTML of a webpage, with applications to automation of web-based tasks, crawling, and browser-assisted retrieval \u2013 have not been fully explored. We contribute HTML understanding models (fine-tuned LLMs) and an in-depth analysis of their capabilities under three tasks: (i) Semantic Classification of HTML elements, (ii) Description Generation for HTML inputs, and (iii) Autonomous Web Navigation of HTML pages. While previous work has developed dedicated architectures and training procedures for HTML understanding, we show that LLMs pretrained on standard natural language corpora transfer remarkably well to HTML understanding tasks. For instance, fine-tuned LLMs are 12% more accurate at semantic classification compared to models trained exclusively on the task dataset. Moreover, when fine-tuned on data from the MiniWoB benchmark, LLMs successfully complete 50% more tasks using 192x less data compared to the previous best supervised model. To promote further research on LLMs for HTML understanding, we create and open-source a large-scale HTML dataset distilled and auto-labeled from CommonCrawl. We show evidence that T5-based models due to the bidirectional encoder-decoder architecture are the best choice and that for practitioners larger models are not necessarily better.", "keywords": "html understanding;web navigation;large language models;semantic classification;description generation", "primary_area": "", "supplementary_material": "/attachment/46e495cedaf517b5994b5b4a87cfd0cbd2a7eb8d.zip", "author": "Izzeddin Gur;Ofir Nachum;Yingjie Miao;Mustafa Safdari;Austin V Huang;Sharan Narang;Aakanksha Chowdhery;Noah Fiedel;Aleksandra Faust", "authorids": "~Izzeddin_Gur1;~Ofir_Nachum1;~Yingjie_Miao1;~Mustafa_Safdari1;~Austin_V_Huang1;~Sharan_Narang1;~Aakanksha_Chowdhery1;~Noah_Fiedel1;~Aleksandra_Faust1", "gender": ";M;;M;;M;;;F", "homepage": ";https://scholar.google.com/citations?user=C-ZlBWMAAAAJ&hl=en;;;https://github.com/austinvhuang;;http://www.achowdhery.com;;http://www.afaust.info", "dblp": "188/9027;;22/10043;05/7184;;;;204/3399;135/8420", "google_scholar": "qS_ugJAAAAAJ;C-ZlBWMAAAAJ;ScqM05wAAAAJ;;;CWOixywAAAAJ;7KDSCpQAAAAJ;;RK72t68AAAAJ", "orcid": ";;;0009-0002-1604-8685;;;;;0000-0002-3268-8685", "linkedin": ";;yingjiemiao/;mustafasafdari/;austin-huang-74a75422/;;;;aleksandrafaust", "or_profile": "~Izzeddin_Gur1;~Ofir_Nachum1;~Yingjie_Miao1;~Mustafa_Safdari1;~Austin_V_Huang1;~Sharan_Narang1;~Aakanksha_Chowdhery1;~Noah_Fiedel1;~Aleksandra_Faust1", "aff": "Google;OpenAI;Google DeepMind;Research, Google;;Meta;Google;Google;Google Brain", "aff_domain": "google.com;openai.com;google.com;research.google.com;;meta.com;google.com;google.com;google.com", "position": "Research Scientist;Researcher;Software Engineer;Researcher;;Researcher;Researcher;Director, Research & Engineering;Principal Researcher", "bibtex": "@misc{\ngur2023understanding,\ntitle={{UNDERSTANDING} {HTML} {WITH} {LARGE} {LANGUAGE} {MODELS}},\nauthor={Izzeddin Gur and Ofir Nachum and Yingjie Miao and Mustafa Safdari and Austin V Huang and Sharan Narang and Aakanksha Chowdhery and Noah Fiedel and Aleksandra Faust},\nyear={2023},\nurl={https://openreview.net/forum?id=GVMwL15UrZO}\n}", "github": "", "project": "", "reviewers": "d1QN;wKTj;hMUi;5MM1", "site": "https://openreview.net/forum?id=GVMwL15UrZO", "pdf_size": 1386919, "recommendation": "5;5;5;6", "confidence": "3;4;3;4", "correctness": "3;3;2;3", "technical_novelty": "2;3;2;2", "empirical_novelty": "3;3;3;2", "wc_summary_paper": "98;138;62;374", "wc_strength_and_weaknesses": "227;97;179;272", "wc_clarity_quality_novelty_and_reproducibility": "50;73;66;94", "wc_summary_review": "90;44;63;87", "wc_review": "465;352;370;827", "wc_reply_reviewers": "0;0;160;0", "wc_reply_authors": "835;303;1019;483", "reply_reviewers": "0;0;3;0", "reply_authors": "2;1;3;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 168.0, 121.93440859740946 ], "wc_strength_and_weaknesses_avg": [ 193.75, 64.82042502174758 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.75, 15.801503093060482 ], "wc_summary_review_avg": [ 71.0, 18.774983355518586 ], "wc_review_avg": [ 503.5, 191.64354933052144 ], "wc_reply_reviewers_avg": [ 40.0, 69.2820323027551 ], "wc_reply_authors_avg": [ 660.0, 282.0833210241258 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 95, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12552367862749035013&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;0;2;0;0;0", "aff_unique_norm": "Google;OpenAI;Meta", "aff_unique_dep": "Google;;Meta Platforms, Inc.", "aff_unique_url": "https://www.google.com;https://openai.com;https://meta.com", "aff_unique_abbr": "Google;OpenAI;Meta", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Teacher Guided Training: An Efficient Framework for Knowledge Transfer", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11548", "id": "GVSf7Z7DbYL", "poster": "/media/PosterPDFs/ICLR%202023/11548.png?t=1682801935.0222738", "openreview": "https://openreview.net/forum?id=GVSf7Z7DbYL", "slides": "https://iclr.cc/virtual/2023/poster/11548", "video": "https://iclr.cc/virtual/2023/poster/11548", "author_site": "Manzil Zaheer, Ankit Singh Rawat, Seungyeon Kim, Chong You, Himanshu Jain, Andreas Veit, Rob Fergus, Sanjiv Kumar", "tldr": "We propose and theoretically analyze a novel way to improve the training efficiency of compact student models that better leverages the knowledge of pretrained generative (teacher) models compared to standard distillation methods.", "abstract": "The remarkable performance gains realized by large pretrained models, e.g., GPT-3, hinge on the massive amounts of data they are exposed to during training. Analogously, distilling such large models to compact models for efficient deployment also necessitates a large amount of (labeled or unlabeled) training data. In this paper, we propose the teacher-guided training (TGT) framework for training a high-quality compact model that leverages the knowledge acquired by pretrained generative models, while obviating the need to go through a large volume of data. TGT exploits the fact that the teacher has acquired a good representation of the underlying data domain, which typically corresponds to a much lower dimensional manifold than the input space. Furthermore, we can use the teacher to explore input space more efficiently through sampling or gradient-based methods; thus, making TGT especially attractive for limited data or long-tail settings. We formally capture this benefit of proposed data-domain exploration in our generalization bounds. We find that TGT can improve accuracy on several image classification benchmarks as well as a range of text classification and retrieval tasks.", "keywords": "Distillation;Semisupervised learning;Efficient machine learning;Generalization bounds;knowledge distillation", "primary_area": "", "supplementary_material": "", "author": "Manzil Zaheer;Ankit Singh Rawat;Seungyeon Kim;Chong You;Himanshu Jain;Andreas Veit;Rob Fergus;Sanjiv Kumar", "authorids": "~Manzil_Zaheer1;~Ankit_Singh_Rawat1;~Seungyeon_Kim1;~Chong_You2;~Himanshu_Jain3;~Andreas_Veit1;~Rob_Fergus1;~Sanjiv_Kumar1", "gender": "M;M;;M;M;;M;", "homepage": "https://www.aclweb.org/anthology/people/m/manzil-zaheer/;https://ankitsrawat.github.io/home/;https://www.seungyeon.ai;https://sites.google.com/view/cyou;;http://andreasveit.eu/;http://cs.nyu.edu/fergus/;http://www.sanjivk.com/", "dblp": "40/10701;https://dblp.org/pers/hd/r/Rawat:Ankit_Singh;74/7997-1.html;164/7311;;133/1801;77/3763;", "google_scholar": "A33FhJMAAAAJ;http://scholar.google.com/citations?user=U0_ab4cAAAAJ;zbcN_QIAAAAJ;Mfrpm_IAAAAJ;JtrH9jQAAAAJ;UA9Hb2EAAAAJ;https://scholar.google.com.tw/citations?user=GgQ9GEkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Manzil_Zaheer1;~Ankit_Singh_Rawat1;~Seungyeon_Kim1;~Chong_You2;~Himanshu_Jain3;~Andreas_Veit1;~Rob_Fergus1;~Sanjiv_Kumar1", "aff": "Google DeepMind;Google;Google;Google;Google;Google;Google;Google", "aff_domain": "deepmind.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "position": "Researcher;Research Scientist;Researcher;Research Scientist;Researcher;Senior Research Scientist;Research scientist;Research Scientist", "bibtex": "@inproceedings{\nzaheer2023teacher,\ntitle={Teacher Guided Training: An Efficient Framework for Knowledge Transfer},\nauthor={Manzil Zaheer and Ankit Singh Rawat and Seungyeon Kim and Chong You and Himanshu Jain and Andreas Veit and Rob Fergus and Sanjiv Kumar},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=GVSf7Z7DbYL}\n}", "github": "", "project": "", "reviewers": "PrCv;Qnjd;hz78;u9os", "pdf_size": 3266262, "recommendation": "6;6;6;8", "confidence": "3;4;4;5", "correctness": "3;3;3;4", "technical_novelty": "3;3;2;4", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "44;67;100;97", "wc_strength_and_weaknesses": "74;167;228;50", "wc_clarity_quality_novelty_and_reproducibility": "32;10;27;38", "wc_summary_review": "74;1;26;74", "wc_review": "224;245;381;259", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "417;363;535;52", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 77.0, 23.010866998007703 ], "wc_strength_and_weaknesses_avg": [ 129.75, 71.6043818491578 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.75, 10.425329730996522 ], "wc_summary_review_avg": [ 43.75, 31.514877439076294 ], "wc_review_avg": [ 277.25, 61.18159445454164 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 341.75, 178.4760137945713 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.816496580927726, "corr_recommendation_correctness": 1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7368720953888606901&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=GVSf7Z7DbYL", "email": "deepmind.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1;1;1;1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;1;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "GVWySHBD3Cl", "title": "Estimating Treatment Effects using Neurosymbolic Program Synthesis", "track": "main", "status": "Reject", "tldr": "We estimate treatment effects/ causal effects using neurosymbolic program synthesis by designing a domain specific language ", "abstract": "Estimating treatment effects from observational data is a central problem in causal inference. Methods to solve this problem exploit inductive biases and heuristics from causal inference to design multi-head neural network architectures and regularizers. In this work, we propose to use neurosymbolic program synthesis, a data-efficient, and interpretable technique, to solve the treatment effect estimation problem. We theoretically show that neurosymbolic programming can solve the treatment effect estimation problem. By designing a Domain Specific Language (DSL) for treatment effect estimation based on the inductive biases used in literature, we argue that neurosymbolic programming is a better alternative to treatment effect estimation than traditional models. Our empirical study reveals that our model, which implicitly encodes inductive biases in a DSL, achieves better performance on benchmark datasets than the state-of-the-art models.", "keywords": "Causal effect;treatment effect;neurosymbolic programming;domain specific language", "primary_area": "", "supplementary_material": "/attachment/162bbe4cca81d3f212dfc205b4313c8e31e30e44.zip", "author": "Abbavaram Gowtham Reddy;Vineeth N. Balasubramanian", "authorids": "~Abbavaram_Gowtham_Reddy1;~Vineeth_N._Balasubramanian2", "gender": "M;M", "homepage": "https://gautam0707.github.io;https://people.iith.ac.in/vineethnb/", "dblp": "294/8798;88/4691", "google_scholar": "Iewg-GAAAAAJ;https://scholar.google.co.in/citations?user=7soDcboAAAAJ", "orcid": ";0000-0003-2656-0375", "linkedin": ";vineethnb?originalSubdomain=in", "or_profile": "~Abbavaram_Gowtham_Reddy1;~Vineeth_Balasubramanian1", "aff": "Indian Institute of Technology Hyderabad;Indian Institute of Technology Hyderabad", "aff_domain": "iith.ac.in;iith.ac.in", "position": "PhD student;Full Professor", "bibtex": "@misc{\nreddy2023estimating,\ntitle={Estimating Treatment Effects using Neurosymbolic Program Synthesis},\nauthor={Abbavaram Gowtham Reddy and Vineeth N. Balasubramanian},\nyear={2023},\nurl={https://openreview.net/forum?id=GVWySHBD3Cl}\n}", "github": "", "project": "", "reviewers": "K4Ej;Ykau;iRvp", "site": "https://openreview.net/forum?id=GVWySHBD3Cl", "pdf_size": 0, "recommendation": "3;3;8", "confidence": "4;4;3", "correctness": "3;3;3", "technical_novelty": "3;2;3", "empirical_novelty": "2;0;3", "wc_summary_paper": "60;54;101", "wc_strength_and_weaknesses": "491;208;451", "wc_clarity_quality_novelty_and_reproducibility": "16;30;99", "wc_summary_review": "31;33;56", "wc_review": "598;325;707", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 2.357022603955158 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 71.66666666666667, 20.885933597094056 ], "wc_strength_and_weaknesses_avg": [ 383.3333333333333, 125.05021213718733 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.333333333333336, 36.279777042068794 ], "wc_summary_review_avg": [ 40.0, 11.343133018115703 ], "wc_review_avg": [ 543.3333333333334, 160.67012444411964 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.9999999999999998, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15580836965532651961&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Indian Institute of Technology Hyderabad", "aff_unique_dep": "", "aff_unique_url": "https://www.iith.ac.in", "aff_unique_abbr": "IIT Hyderabad", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hyderabad", "aff_country_unique_index": "0;0", "aff_country_unique": "India" }, { "id": "GX0uI5T8kd", "title": "Self-Supervised Off-Policy Ranking via Crowd Layer", "track": "main", "status": "Reject", "tldr": "", "abstract": "Off-policy evaluation (OPE) aims to estimate the online performance of target policies given dataset collected by some behavioral policies. OPE is crucial in many applications where online policy evaluation is expensive. However, existing OPE methods are far from reliable. Fortunately, in many real-world scenarios, we care only about the ranking of the evaluating policies, rather than their exact online performance. Existing works on off-policy ranking (OPR) adopt a supervised training paradigm, which assumes that there are plenty of deployed policies and the labels of their performance are available. However, this assumption does not apply to most OPE scenarios because collecting such training data might be highly expensive. In this paper, we propose a novel OPR framework called SOCCER, where the existing OPE methods are modeled as workers in a crowdsourcing system. SOCCER can be trained in a self-supervised way as it does not require any ground-truth labels of policies. Moreover, in order to capture the relative discrepancies between policies, we propose a novel transformer-based architecture to learn effective pairwise policy representations. Experimental results show that SOCCER achieves significantly high accuracy in a variety of OPR tasks. Surprisingly, SOCCER even performs better than baselines trained in a supervised way using additional labeled data, which further demonstrates the superiority of SOCCER in OPR tasks.", "keywords": "off-policy ranking;policy representation learning;reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Pengjie Gu;Mengchen Zhao;Jianye HAO;Bo An", "authorids": "~Pengjie_Gu1;~Mengchen_Zhao1;~Jianye_HAO1;~Bo_An2", "gender": "M;M;M;M", "homepage": ";https://batmanzzmc.github.io/;http://www.icdai.org/jianye.html;https://personal.ntu.edu.sg/boan/", "dblp": "226/1222;178/8719;21/7664.html;42/6178-1.html", "google_scholar": ";nLgORGMAAAAJ;;PEEpuNwAAAAJ", "orcid": ";;0000-0002-0422-8235;0000-0002-7064-7438", "linkedin": ";;;", "or_profile": "~Pengjie_Gu1;~Mengchen_Zhao1;~Jianye_HAO1;~Bo_An2", "aff": "Nanyang Technological University;Huawei Noah's Ark Lab;Tianjin University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;huawei.com;tju.edu.cn;ntu.edu.sg", "position": "PhD student;Research Scientist;Associate Professor;Full Professor", "bibtex": "@misc{\ngu2023selfsupervised,\ntitle={Self-Supervised Off-Policy Ranking via Crowd Layer},\nauthor={Pengjie Gu and Mengchen Zhao and Jianye HAO and Bo An},\nyear={2023},\nurl={https://openreview.net/forum?id=GX0uI5T8kd}\n}", "github": "", "project": "", "reviewers": "RMFP;5DpY;KHEq;jfpk", "site": "https://openreview.net/forum?id=GX0uI5T8kd", "pdf_size": 1907689, "recommendation": "3;6;6;6", "confidence": "4;4;4;4", "correctness": "3;3;3;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;3;1;3", "wc_summary_paper": "106;150;45;76", "wc_strength_and_weaknesses": "400;256;129;505", "wc_clarity_quality_novelty_and_reproducibility": "82;36;49;62", "wc_summary_review": "46;63;115;77", "wc_review": "634;505;338;720", "wc_reply_reviewers": "891;99;0;195", "wc_reply_authors": "2679;525;479;989", "reply_reviewers": "5;1;0;1", "reply_authors": "7;2;1;3", "recommendation_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 94.25, 38.74516098818019 ], "wc_strength_and_weaknesses_avg": [ 322.5, 142.4578885144659 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.25, 16.990806337546196 ], "wc_summary_review_avg": [ 75.25, 25.439880109780393 ], "wc_review_avg": [ 549.25, 143.98155263782925 ], "wc_reply_reviewers_avg": [ 296.25, 350.232333601568 ], "wc_reply_authors_avg": [ 1168.0, 894.8927310018782 ], "reply_reviewers_avg": [ 1.75, 1.920286436967152 ], "reply_authors_avg": [ 3.25, 2.277608394786075 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Yvipe_p0azwJ:scholar.google.com/&scioq=Self-Supervised+Off-Policy+Ranking+via+Crowd+Layer&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Nanyang Technological University;Huawei;Tianjin University", "aff_unique_dep": ";Noah's Ark Lab;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.huawei.com;http://www.tju.edu.cn", "aff_unique_abbr": "NTU;Huawei;TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Singapore;China" }, { "id": "G_D6xThdQe4", "title": "Mixture of Quantized Experts (MoQE): Complementary Effect of Low-bit Quantization and Robustness", "track": "main", "status": "Reject", "tldr": "We investigate robustness of MoE experts, and apply ultra low-bit quantization to them for achieving more efficient MoE model inference.", "abstract": "Large Mixture of Experts (MoE) models could achieve state-of-the-art quality on various language tasks, including machine translation task, thanks to the efficient model scaling capability with expert parallelism. However, it has brought a fundamental issue of larger memory consumption at deployment time. Furthermore, this results in significant inference speed degradation at auto-regressive decoding steps due to the increased memory transfers. In this paper, we propose a simple weight-only quantization method using ultra low-bit such as 2-bit, 3-bit and 4-bits to effectively mitigate the increased memory and latency issues of MoE models. We show that low-bit quantization together with the MoE architecture delivers a reliable model performance while reducing the memory size significantly even without any additional training. Especially, expert layers in MoE models are much more robust to the quantization than conventional feedforward networks (FFN) layers. In our comprehensive analysis, we show that MoE models with 2-bit and 80\\% sparse expert weights can deliver better model performance than the dense model trained on the same dataset. We present how quantization of different parts of models affects the performance with various experiments using a large MoE model (5.3 B). As a result of low-bit quantization, we show the model size can be reduced by 4.9X smaller than the original half precision floating point (fp16) MoE model. This cuts down the model size of 5.3B parameters from 8.4x of the dense model to only 1.7x of the dense model after 2-bit quantization. It still preserves 1.88\\% higher accuracy than the dense model. Combined with an optimized GPU runtime implementation, it also achieves 2.7X speed-up which is even slightly faster than the FLOPs equivalent dense model.", "keywords": "MoE;Quantization;Mixture of Experts;Sparse Model;Machine Translation", "primary_area": "", "supplementary_material": "", "author": "Young Jin Kim;Raffy Fahim;Hany Hassan", "authorids": "~Young_Jin_Kim1;~Raffy_Fahim1;~Hany_Hassan1", "gender": "M;M;M", "homepage": "https://www.microsoft.com/en-us/research/people/youki/;;", "dblp": "00/8110-1.html;;83/64", "google_scholar": ";https://scholar.google.com/citations?hl=en;", "orcid": ";;", "linkedin": "ykim362/;raffy-fahim-322316117;", "or_profile": "~Young_Jin_Kim1;~Raffy_Fahim1;~Hany_Hassan1", "aff": "Microsoft;Microsoft;Microsoft", "aff_domain": "microsoft.com;microsoft.com;microsoft.com", "position": "Principal Researcher;Researcher;Research Scientist", "bibtex": "@misc{\nkim2023mixture,\ntitle={Mixture of Quantized Experts (Mo{QE}): Complementary Effect of Low-bit Quantization and Robustness},\nauthor={Young Jin Kim and Raffy Fahim and Hany Hassan},\nyear={2023},\nurl={https://openreview.net/forum?id=G_D6xThdQe4}\n}", "github": "", "project": "", "reviewers": "ZfnQ;X5nA;5W77;MzdQ;P5VH", "site": "https://openreview.net/forum?id=G_D6xThdQe4", "pdf_size": 705340, "recommendation": "3;3;3;5;6", "confidence": "4;4;4;4;4", "correctness": "3;2;4;3;3", "technical_novelty": "2;2;1;2;2", "empirical_novelty": "2;2;2;2;3", "wc_summary_paper": "61;78;166;42;70", "wc_strength_and_weaknesses": "70;35;190;68;196", "wc_clarity_quality_novelty_and_reproducibility": "39;149;139;27;226", "wc_summary_review": "62;27;47;66;47", "wc_review": "232;289;542;203;539", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "229;356;476;200;447", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 4.0, 1.2649110640673518 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 1.8, 0.4 ], "empirical_novelty_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_paper_avg": [ 83.4, 43.00511597473026 ], "wc_strength_and_weaknesses_avg": [ 111.8, 67.48155303488502 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 116.0, 74.25361944040169 ], "wc_summary_review_avg": [ 49.8, 13.760813929415658 ], "wc_review_avg": [ 361.0, 149.15361209169558 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 341.6, 111.45330860948005 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4004446232834632175&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Graph Signal Sampling for Inductive One-Bit Matrix Completion: a Closed-form Solution", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11357", "id": "G_HSyfLk0m", "poster": "/media/PosterPDFs/ICLR%202023/11357.png?t=1681101008.2675083", "openreview": "https://openreview.net/forum?id=G_HSyfLk0m", "slides": "https://iclr.cc/virtual/2023/poster/11357", "video": "https://iclr.cc/virtual/2023/poster/11357", "author_site": "Chao Chen, Haoyu Geng, Gang Zeng, Zhaobing Han, Hua Chai, Xiaokang Yang, Junchi Yan", "tldr": "", "abstract": "Inductive one-bit matrix completion is motivated by modern applications such as recommender systems, where new users would appear at test stage with the ratings consisting of only ones and no zeros. We propose a unified graph signal sampling framework which enjoys the benefits of graph signal analysis and processing. The key idea is to transform each user's ratings on the items to a function (signal) on the vertices of an item-item graph, then learn structural graph properties to recover the function from its values on certain vertices --- the problem of graph signal sampling. We propose a class of regularization functionals that takes into account discrete random label noise in the graph vertex domain, then develop the GS-IMC approach which biases the reconstruction towards functions that vary little between adjacent vertices for noise reduction. Theoretical result shows that accurate reconstructions can be achieved under mild conditions. For the online setting, we develop a Bayesian extension, i.e., BGS-IMC which considers continuous random Gaussian noise in the graph Fourier domain and builds upon a prediction-correction update algorithm to obtain the unbiased and minimum-variance reconstruction. Both GS-IMC and BGS-IMC have closed-form solutions and thus are highly scalable in large data. Experiments show that our methods achieve state-of-the-art performance on public benchmarks.", "keywords": "inductive one-bit matrix completion;graph signal sampling", "primary_area": "", "supplementary_material": "", "author": "Chao Chen;Haoyu Geng;Gang Zeng;Zhaobing Han;Hua Chai;Xiaokang Yang;Junchi Yan", "authorids": "~Chao_Chen5;~Haoyu_Geng1;~Gang_Zeng3;hanzhaobinghzb@didiglobal.com;~Hua_Chai1;~Xiaokang_Yang1;~Junchi_Yan2", "gender": ";M;M;;M;M;", "homepage": ";https://hygeng.site;https://twitter.com/zenggang1988;;;https://icne.sjtu.edu.cn/info/1064/1078.htm;", "dblp": "66/3019-16;289/8341;98/3076;;;06/3071-1.html;", "google_scholar": "gmK_nHYAAAAJ;_R_RZpAAAAAJ;;;;yDEavdMAAAAJ;", "orcid": "0000-0003-3911-8711;0000-0001-7808-3959;;;0000-0003-3381-2526;0000-0003-4029-3322;", "linkedin": ";;;;;;", "or_profile": "~Chao_Chen5;~Haoyu_Geng1;~Gang_Zeng3;hanzhaobinghzb@didiglobal.com;~Hua_Chai1;~Xiaokang_Yang1;~Junchi_Yan2", "aff": ";Shanghai Jiaotong University;Didi Research;;Didi Research;Shanghai Jiaotong University;", "aff_domain": ";sjtu.edu.cn;didichuxing.com;;didichuxing.com;sjtu.edu.cn;", "position": ";PhD student;Full Professor;;Principal Researcher;Full Professor;", "bibtex": "@inproceedings{\nchen2023graph,\ntitle={Graph Signal Sampling for Inductive One-Bit Matrix Completion: a Closed-form Solution},\nauthor={Chao Chen and Haoyu Geng and Gang Zeng and Zhaobing Han and Hua Chai and Xiaokang Yang and Junchi Yan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=G_HSyfLk0m}\n}", "github": "", "project": "", "reviewers": "fzvN;rJLs;idfJ;ZrCo", "pdf_size": 871315, "recommendation": "3;3;6;8", "confidence": "4;1;4;3", "correctness": "2;2;4;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "13;41;48;235", "wc_strength_and_weaknesses": "287;100;293;155", "wc_clarity_quality_novelty_and_reproducibility": "2;159;80;300", "wc_summary_review": "2;10;59;43", "wc_review": "304;310;480;733", "wc_reply_reviewers": "116;0;0;25", "wc_reply_authors": "3710;317;344;453", "reply_reviewers": "2;0;0;1", "reply_authors": "9;1;2;2", "recommendation_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "correctness_avg": [ 3.0, 1.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 84.25, 88.01526856176717 ], "wc_strength_and_weaknesses_avg": [ 208.75, 83.57145146519834 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 135.25, 110.13032052981595 ], "wc_summary_review_avg": [ 28.5, 23.371991785040485 ], "wc_review_avg": [ 456.75, 174.44393798581825 ], "wc_reply_reviewers_avg": [ 35.25, 47.72512441052407 ], "wc_reply_authors_avg": [ 1206.0, 1446.5813146864575 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.5, 3.2015621187164243 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.2886751345948129, "corr_recommendation_correctness": 0.9428090415820635, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16622238036072617928&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=G_HSyfLk0m", "email": ";sjtu.edu.cn;didichuxing.com;;didichuxing.com;sjtu.edu.cn;", "author_num": 7, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Didi Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.didi.com", "aff_unique_abbr": "SJTU;Didi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "Gb2Rndy5595", "title": "Context Autoencoder for Self-Supervised Representation Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "We present a novel masked image modeling (MIM) approach, context autoencoder (CAE), for self-supervised representation pretraining. The goal is to pretrain an encoder by solving the pretext task: estimate the masked patches from the visible patches in an image. Our approach first feeds the visible patches into the encoder, extracting the representations. Then, we make predictions from visible patches to masked patches in the encoded representation space. We introduce an alignment constraint, encouraging that the representations for masked patches, predicted from the encoded representations of visible patches, are aligned with the masked patch presentations computed from the encoder. In other words, the predicted representations are expected to lie in the encoded representation space, which empirically shows the benefit to representation learning. Last, the predicted masked patch representations are mapped to the targets of the pretext task through a decoder.\nOne additional characteristic is that our approach encourages the separation of the representation learning part (encoder), and the pretext task completion part that will be replaced by the downstream task part. In contrast, previous MIM methods (e.g., BEiT and MAE) couple the two parts, potentially limiting the representation learning quality. We demonstrate the effectiveness of our CAE through superior transfer performance in downstream tasks: semantic segmentation, and object detection and instance segmentation.", "keywords": "Self-Supervised Representation Learning;Masked Image Modeling;Context Autoencoder", "primary_area": "", "supplementary_material": "", "author": "Xiaokang Chen;Mingyu Ding;Xiaodi Wang;Ying Xin;Shentong Mo;Yunhao Wang;Shumin Han;Ping Luo;Gang Zeng;Jingdong Wang", "authorids": "~Xiaokang_Chen1;~Mingyu_Ding1;~Xiaodi_Wang2;~Ying_Xin1;~Shentong_Mo1;~Yunhao_Wang1;~Shumin_Han1;~Ping_Luo2;~Gang_Zeng1;~Jingdong_Wang1", "gender": "M;M;;F;;;M;;M;M", "homepage": "https://charlescxk.github.io/;https://dingmyu.github.io/;https://github.com/bczhangbczhang;https://github.com/xinyingxinying;;;https://www.researchgate.net/scientific-contributions/Shumin-Han-2149208232;;https://www.cis.pku.edu.cn/info/1177/1378.htm;https://jingdongwang2017.github.io/", "dblp": "163/6632;188/5243;https://dblp.uni-trier.de/pid/07/8227.html;;;;https://dblp.uni-trier.de/pid/119/8234;;;49/3441", "google_scholar": "https://scholar.google.com.hk/citations?view_op=list_works;w4yTWwoAAAAJ;;;;;eFoLdbQAAAAJ;;RuHyY6gAAAAJ;z5SPCmgAAAAJ", "orcid": ";0000-0001-6556-8359;;;;;;;;0000-0002-4888-4445", "linkedin": ";dingmyu/;;;;;;;;", "or_profile": "~Xiaokang_Chen1;~Mingyu_Ding1;~Xiaodi_Wang2;~Ying_Xin1;~Shentong_Mo1;~Yunhao_Wang1;~Shumin_Han1;~Ping_Luo2;~Gang_Zeng1;~Jingdong_Wang1", "aff": "Peking University;University of California, Berkeley;Baidu;;;;Baidu;;Peking University;Baidu", "aff_domain": "pku.edu.cn;berkeley.edu;baidu.com;;;;baidu.com;;pku.edu.cn;baidu.com", "position": "PhD student;Postdoc;Software Engineer;;;;Researcher;;Researcher;Chief Scientist for Computer Vision", "bibtex": "@misc{\nchen2023context,\ntitle={Context Autoencoder for Self-Supervised Representation Learning},\nauthor={Xiaokang Chen and Mingyu Ding and Xiaodi Wang and Ying Xin and Shentong Mo and Yunhao Wang and Shumin Han and Ping Luo and Gang Zeng and Jingdong Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=Gb2Rndy5595}\n}", "github": "", "project": "", "reviewers": "b4Df;cbX1;TXRx;KNoM", "site": "https://openreview.net/forum?id=Gb2Rndy5595", "pdf_size": 12318393, "recommendation": "5;6;6;6", "confidence": "4;5;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "82;54;79;57", "wc_strength_and_weaknesses": "101;233;155;180", "wc_clarity_quality_novelty_and_reproducibility": "7;29;141;41", "wc_summary_review": "18;15;26;42", "wc_review": "208;331;401;320", "wc_reply_reviewers": "0;12;0;0", "wc_reply_authors": "514;1194;841;485", "reply_reviewers": "0;1;0;0", "reply_authors": "2;3;2;1", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 68.0, 12.589678312014172 ], "wc_strength_and_weaknesses_avg": [ 167.25, 47.49934210070704 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.5, 51.407684250508694 ], "wc_summary_review_avg": [ 25.25, 10.473180032826706 ], "wc_review_avg": [ 315.0, 69.14839115988165 ], "wc_reply_reviewers_avg": [ 3.0, 5.196152422706632 ], "wc_reply_authors_avg": [ 758.5, 287.68428876113484 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 448, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8172785740427689603&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2;2;0;2", "aff_unique_norm": "Peking University;University of California, Berkeley;Baidu", "aff_unique_dep": ";;Baidu, Inc.", "aff_unique_url": "http://www.pku.edu.cn;https://www.berkeley.edu;https://www.baidu.com", "aff_unique_abbr": "Peking U;UC Berkeley;Baidu", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "id": "GbFK7JJJVTz", "title": "Contrastive Graph Representation Learning with Cross-view Reconstruction", "track": "main", "status": "Withdraw", "tldr": "Our paper propose a new contrastive learning framework to learn graph representation in accordance with the information bottleneck principle.", "abstract": "Although different graph self-supervised learning strategies have been proposed to tackle the supervision shortage issue in graph learning tasks, Graph contrastive learning (GCL) has been the most prevalent approach to this problem. Despite the remarkable performances those GCL methods have achieved, existing GCL methods that heavily depend on various manually designed augmentation techniques still struggle to improve model robustness without risking losing task-relevant information. Consequently, the learned representation is either brittle or unilluminating. In light of this, we introduce the GraphCV, which follows the information bottleneck principle to learn minimal yet sufficient representations from graph data. Specifically, our proposed model elicits the predictive (useful for downstream instance discrimination) and other non-predictive features separately. Except for the conventional contrastive loss which guarantees the consistency and sufficiency of the representations across different augmentation views, we introduce a cross-view reconstruction mechanism to pursue the disentanglement of the two learned representations. Besides, an adversarial global view is added as the third view of contrastive loss to avoid the learned representation from being drafted too far away from the original distribution. We empirically demonstrate that our proposed model outperforms the state-of-the-art on graph classification task over multiple benchmark datasets.", "keywords": "Graph Neural Network;Graph Contrastive Learning", "primary_area": "", "supplementary_material": "/attachment/c3be56f2a5fdf1f8f497391c9dc329d770edd383.zip", "author": "Qianlong Wen;Zhongyu Ouyang;Chunhui Zhang;Yiyue Qian;Yanfang Ye;Chuxu Zhang", "authorids": "~Qianlong_Wen1;~Zhongyu_Ouyang1;~Chunhui_Zhang1;~Yiyue_Qian2;~Yanfang_Ye1;~Chuxu_Zhang2", "gender": "M;F;M;;;", "homepage": "https://hoytwen.github.io/;https://zyouyang.github.io/;https://chunhuizng.github.io;https://yiyueqian.github.io/;http://yes-lab.org/;", "dblp": "301/6224;326/3910;62/3401;261/9059;;", "google_scholar": "cc-uK9gAAAAJ;ds4NE-gAAAAJ;https://scholar.google.com.hk/citations?user=jlqnbkAAAAAJ;c6c81_kAAAAJ;egjr888AAAAJ;", "orcid": "0000-0003-3812-8395;;;0000-0001-7924-5438;;", "linkedin": "qianlong-wen-87550a1a7/;;chunhui-zhang-541827161/;yiyue-qian-224655212/;;", "or_profile": "~Qianlong_Wen1;~Zhongyu_Ouyang1;~Chunhui_Zhang1;~Yiyue_Qian2;~Yanfang_Ye1;~Chuxu_Zhang2", "aff": "University of Notre Dame;;Brandeis University;University of Notre Dame;University of Notre Dame;", "aff_domain": "nd.edu;;brandeis.edu;nd.edu;nd.edu;", "position": "PhD student;;MS student;PhD student;Associate Professor;", "bibtex": "@misc{\nwen2023contrastive,\ntitle={Contrastive Graph Representation Learning with Cross-view Reconstruction},\nauthor={Qianlong Wen and Zhongyu Ouyang and Chunhui Zhang and Yiyue Qian and Yanfang Ye and Chuxu Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=GbFK7JJJVTz}\n}", "github": "", "project": "", "reviewers": "JZ7m;2jSm;L8jj", "site": "https://openreview.net/forum?id=GbFK7JJJVTz", "pdf_size": 1588959, "recommendation": "1;5;5", "confidence": "5;3;5", "correctness": "3;3;3", "technical_novelty": "3;3;2", "empirical_novelty": "3;3;2", "wc_summary_paper": "54;65;105", "wc_strength_and_weaknesses": "240;175;177", "wc_clarity_quality_novelty_and_reproducibility": "6;62;7", "wc_summary_review": "55;17;42", "wc_review": "355;319;331", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 1.8856180831641267 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 74.66666666666667, 21.913973218524802 ], "wc_strength_and_weaknesses_avg": [ 197.33333333333334, 30.18093585177386 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.0, 26.166135875720485 ], "wc_summary_review_avg": [ 38.0, 15.769168230019828 ], "wc_review_avg": [ 335.0, 14.966629547095765 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2518967358782207576&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Notre Dame;Brandeis University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nd.edu;https://www.brandeis.edu", "aff_unique_abbr": "Notre Dame;Brandeis", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "GbsvQSaJV-6", "title": "Towards Skilled Population Curriculum for MARL", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent advances in multi-agent reinforcement learning (MARL) allow agents to coordinate their behaviors in complex environments. However, common MARL algorithms still suffer from scalability and sparse reward issues. One promising approach to resolve them is automated curriculum learning (ACL), where a student (curriculum learner) train on tasks of increasing difficulty controlled by a teacher (curriculum generator). Unfortunately, in spite of its success, ACL\u2019s applicability is restricted due to: (1) lack of a general student framework to deal with the varying number of agents across tasks and the sparse reward problem, and (2) the non-stationarity in the teacher\u2019s task due to the ever-changing student strategies. As a remedy for ACL, we introduce a novel automatic curriculum learning framework, Skilled Population Curriculum (SPC), adapting curriculum learning to multi-agent coordination. To be specific, we endow the student with population-invariant communication and a hierarchical skill set. Thus, the student can learn cooperation and behavior skills from distinct tasks with a varying number of agents. In addition, we model the teacher as a contextual bandit conditioned by student policies. As a result, a team of agents can change its size while retaining previously acquired skills. We also analyze the inherent non-stationarity of this multi-agent automatic curriculum teaching problem, and provide a corresponding regret bound. Empirical results show that our method improves scalability, sample efficiency, and generalization in multiple MARL environments. The source code and the video can be found at https://sites.google.com/view/marl-spc/.", "keywords": "multi-agent reinforcement learning;multi-agent cooperation", "primary_area": "", "supplementary_material": "/attachment/ff48cedb78109cb06c20ac17dc4398b5db70d6ab.zip", "author": "Rundong Wang;Longtao Zheng;Wei Qiu;Bowei He;Bo An;Zinovi Rabinovich;Yujing Hu;Yingfeng Chen;Tangjie Lv;Changjie Fan", "authorids": "~Rundong_Wang1;~Longtao_Zheng1;~Wei_Qiu3;~Bowei_He1;~Bo_An2;~Zinovi_Rabinovich1;~Yujing_Hu2;~Yingfeng_Chen2;~Tangjie_Lv1;~Changjie_Fan1", "gender": "M;M;M;M;M;M;;M;M;M", "homepage": ";https://ltzheng.github.io/;;;https://personal.ntu.edu.sg/boan/;http://zinovi.zinovi.net;;;;http://chyf.ink/", "dblp": "254/1228;293/7155;11/5166-1;179/0894;42/6178-1.html;93/4009;https://dblp.uni-trier.de/pid/160/1923.html;;71/882;37/1835", "google_scholar": "JEVpgE8AAAAJ;https://scholar.google.com/citations?hl=en;gszGlZIAAAAJ;1cH0A9cAAAAJ;PEEpuNwAAAAJ;https://scholar.google.com.tw/citations?user=JwJRnmAAAAAJ;IR5WY-wAAAAJ;EIuWpJcAAAAJ;;SSBrkpMAAAAJ", "orcid": ";;;0000-0002-0360-2950;0000-0002-7064-7438;;;0000-0001-9858-809X;0000-0001-5420-0516;", "linkedin": ";longtaozheng;;;;;;;;", "or_profile": "~Rundong_Wang1;~Longtao_Zheng1;~Wei_Qiu3;~Bowei_He1;~Bo_An2;~Zinovi_Rabinovich1;~Yujing_Hu2;~Tangjie_Lv1;~Changjie_Fan1;~Charles_Chen1", "aff": "Nanyang Technological University;Nanyang Technological University, Singapore;Nanyang Technological University;City University of Hong Kong;Nanyang Technological University;Nanyang Technological University;NetEase, Inc.;NetEase, Inc.;Netease, Fuxi AI Lab;Fuxi AI Lab in Netease", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;my.cityu.edu.hk;ntu.edu.sg;ntu.edu.sg;corp.netease.com;netease.com;corp.netease.com;corp.netease.com", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor;Assistant Professor;Researcher;Researcher;Principal Researcher;Researcher", "bibtex": "@misc{\nwang2023towards,\ntitle={Towards Skilled Population Curriculum for {MARL}},\nauthor={Rundong Wang and Longtao Zheng and Wei Qiu and Bowei He and Bo An and Zinovi Rabinovich and Yujing Hu and Yingfeng Chen and Tangjie Lv and Changjie Fan},\nyear={2023},\nurl={https://openreview.net/forum?id=GbsvQSaJV-6}\n}", "github": "", "project": "", "reviewers": "qKoe;cbi7;WEWH;HPXF", "site": "https://openreview.net/forum?id=GbsvQSaJV-6", "pdf_size": 3524178, "recommendation": "6;6;6;6", "confidence": "4;3;4;3", "correctness": "3;3;2;3", "technical_novelty": "2;3;2;4", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "62;91;77;76", "wc_strength_and_weaknesses": "333;125;354;155", "wc_clarity_quality_novelty_and_reproducibility": "53;168;40;45", "wc_summary_review": "77;42;24;25", "wc_review": "525;426;495;301", "wc_reply_reviewers": "0;5;67;0", "wc_reply_authors": "285;297;503;211", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 76.5, 10.259142264341596 ], "wc_strength_and_weaknesses_avg": [ 241.75, 102.5704026510572 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 76.5, 53.0306515140065 ], "wc_summary_review_avg": [ 42.0, 21.435951110226018 ], "wc_review_avg": [ 436.75, 86.20433573782701 ], "wc_reply_reviewers_avg": [ 18.0, 28.36370920736567 ], "wc_reply_authors_avg": [ 324.0, 108.4665847162157 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8axx3-suVloJ:scholar.google.com/&scioq=Towards+Skilled+Population+Curriculum+for+MARL&hl=en&as_sdt=0,14", "gs_version_total": 2, "aff_unique_index": "0;0;0;1;0;0;2;2;3;3", "aff_unique_norm": "Nanyang Technological University;City University of Hong Kong;NetEase, Inc.;Netease", "aff_unique_dep": ";;;Fuxi AI Lab", "aff_unique_url": "https://www.ntu.edu.sg;https://www.cityu.edu.hk;https://www.163.com;https://www.netease.com", "aff_unique_abbr": "NTU;CityU;NetEase;Netease", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;1;0;0;1;1;1;1", "aff_country_unique": "Singapore;China" }, { "title": "AutoGT: Automated Graph Transformer Architecture Search", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11970", "id": "GcM7qfl5zY", "poster": "/media/PosterPDFs/ICLR%202023/11970.png?t=1682757188.786314", "openreview": "https://openreview.net/forum?id=GcM7qfl5zY", "slides": "https://iclr.cc/virtual/2023/poster/11970", "video": "https://iclr.cc/virtual/2023/poster/11970", "author_site": "Zizhao Zhang, Xin Wang, Chaoyu Guan, Ziwei Zhang, Haoyang Li, Wenwu Zhu", "tldr": "", "abstract": "Although Transformer architectures have been successfully applied to graph data with the advent of Graph Transformer, current design of Graph Transformer still heavily relies on human labor and expertise knowledge to decide proper neural architectures and suitable graph encoding strategies at each Transformer layer. In literature, there have been some works on automated design of Transformers focusing on non-graph data such as texts and images without considering graph encoding strategies, which fail to handle the non-euclidean graph data. In this paper, we study the problem of automated graph Transformer, for the first time. However, solving these problems poses the following challenges: i) how can we design a unified search space for graph Transformer, and ii) how to deal with the coupling relations between Transformer architectures and the graph encodings of each Transformer layer. To address these challenges, we propose Automated Graph Transformer (AutoGT), a neural architecture search framework that can automatically discover the optimal graph Transformer architectures by joint optimization of Transformer architecture and graph encoding strategies. Specifically, we first propose a unified graph Transformer formulation that can represent most of state-of-the-art graph Transformer architectures. Based upon the unified formulation, we further design the graph Transformer search space that includes both candidate architectures and various graph encodings. To handle the coupling relations, we propose a novel encoding-aware performance estimation strategy by gradually training and splitting the supernets according to the correlations between graph encodings and architectures. The proposed strategy can provide a more consistent and fine-grained performance prediction when evaluating the jointly optimized graph encodings and architectures. Extensive experiments and ablation studies show that our proposed AutoGT gains sufficient improvement over state-of-the-art hand-crafted baselines on all datasets, demonstrating its effectiveness and wide applicability.\n", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Zizhao Zhang;Xin Wang;Chaoyu Guan;Ziwei Zhang;Haoyang Li;Wenwu Zhu", "authorids": "~Zizhao_Zhang4;~Xin_Wang17;~Chaoyu_Guan1;~Ziwei_Zhang1;~Haoyang_Li1;~Wenwu_Zhu1", "gender": ";M;M;;M;M", "homepage": ";http://mn.cs.tsinghua.edu.cn/xinwang/;;;https://haoyang.li;http://media.cs.tsinghua.edu.cn/en/zww", "dblp": ";10/5630-19;242/3864;;118/0004-1.html;97/6308-1.html", "google_scholar": ";YPOBHYUAAAAJ;;;86RE16gAAAAJ;https://scholar.google.com.tw/citations?user=7t2jzpgAAAAJ", "orcid": ";0000-0002-0351-2939;;;0000-0003-3544-5563;0000-0003-2236-9290", "linkedin": ";;;;;", "or_profile": "~Zizhao_Zhang4;~Xin_Wang17;~Chaoyu_Guan1;~Ziwei_Zhang1;~Haoyang_Li1;~Wenwu_Zhu1", "aff": ";Tsinghua University;Tsinghua University;;Tsinghua University;Tsinghua University", "aff_domain": ";cs.tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn", "position": ";Assistant Professor;MS student;;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2023autogt,\ntitle={Auto{GT}: Automated Graph Transformer Architecture Search},\nauthor={Zizhao Zhang and Xin Wang and Chaoyu Guan and Ziwei Zhang and Haoyang Li and Wenwu Zhu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=GcM7qfl5zY}\n}", "github": "", "project": "", "reviewers": "iKrv;ueHs;C87i", "pdf_size": 1064746, "recommendation": "8;8;8", "confidence": "5;5;3", "correctness": "4;4;4", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;4", "wc_summary_paper": "42;62;75", "wc_strength_and_weaknesses": "115;125;208", "wc_clarity_quality_novelty_and_reproducibility": "22;28;176", "wc_summary_review": "96;60;108", "wc_review": "275;275;567", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "360;178;1001", "reply_reviewers": "0;0;0", "reply_authors": "1;1;3", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 59.666666666666664, 13.572848714334887 ], "wc_strength_and_weaknesses_avg": [ 149.33333333333334, 41.6839963961657 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 75.33333333333333, 71.2242156073964 ], "wc_summary_review_avg": [ 88.0, 20.396078054371138 ], "wc_review_avg": [ 372.3333333333333, 137.65012007098125 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 513.0, 352.9768642087845 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6065238137628057158&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=GcM7qfl5zY", "email": ";cs.tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "GdimRqV_S7", "title": "Homotopy Learning of Parametric Solutions to Constrained Optimization Problems", "track": "main", "status": "Reject", "tldr": "", "abstract": "Building deep learning (DL) alternatives to constrained optimization problems has been proposed as a cheaper solution approach than classical constrained optimization solvers. However, these approximate learning-based solutions still suffer from constraint violations. From this perspective, reaching a reliable convergence remains an open challenge to DL models even with state-of-the-art methods to impose constraints, especially when facing a large set of nonlinear constraints forming a non-convex feasible set. In this paper, we propose the use of homotopy meta-optimization heuristics which creates a continuous transformation of the objective and constraints during training, to promote a more reliable convergence where the solution feasibility can be further improved. The method developed in this work includes 1) general-purpose homotopy heuristics based on the relaxation of objectives and constraint bounds to enlarge the basin of attraction and 2) physics-informed transformation of domain problem leading to trivial starting points lying within the basin of attraction. Experimentally, we demonstrate the efficacy of the proposed method on a set of abstract constrained optimization problems and real-world power grid optimal power flow problems with increasing complexity. Results show that constrained deep learning models with homotopy heuristics can improve the feasibility of the resulting solutions while achieving near-optimal objective values when compared with non-homotopy counterparts.", "keywords": "homotopy;deep learning;constrained optimization;nonlinear programming;constrained deep learning;differentiable parametric programming", "primary_area": "", "supplementary_material": "", "author": "Shimiao Li;Jan Drgona;Aaron R Tuor;Larry Pileggi;Draguna L Vrabie", "authorids": "~Shimiao_Li2;~Jan_Drgona1;~Aaron_R_Tuor1;pileggi@andrew.cmu.edu;~Draguna_L_Vrabie1", "gender": "F;;M;;F", "homepage": ";https://drgona.github.io/;https://studentweb.cs.wwu.edu/~tuora/aarontuor/;;https://www.pnnl.gov/people/draguna-l-vrabie", "dblp": ";;https://dblp.uni-trier.de/pers/hd/t/Tuor:Aaron;;", "google_scholar": "YMOWdAQAAAAJ;A-EA2KsAAAAJ;S9Qg6qcAAAAJ;;3rRxzNIAAAAJ", "orcid": ";0000-0003-1223-208X;0000-0001-6951-1923;;0000-0002-1547-2049", "linkedin": ";drgona/;aarontuor/;;draguna-vrabie-a7870012", "or_profile": "~Shimiao_Li2;~Jan_Drgona1;~Aaron_R_Tuor1;pileggi@andrew.cmu.edu;~Draguna_L_Vrabie1", "aff": "Carnegie Mellon University;Pacific Northwest National Laboratory;Pacific Northwest National Laboratory;;Pacific Northwest National Laboratory", "aff_domain": "cmu.edu;pnnl.gov;pnnl.gov;;pnnl.gov", "position": "PhD student;Researcher;Data Scientist;;Principal Researcher", "bibtex": "@misc{\nli2023homotopy,\ntitle={Homotopy Learning of Parametric Solutions to Constrained Optimization Problems},\nauthor={Shimiao Li and Jan Drgona and Aaron R Tuor and Larry Pileggi and Draguna L Vrabie},\nyear={2023},\nurl={https://openreview.net/forum?id=GdimRqV_S7}\n}", "github": "", "project": "", "reviewers": "Rnhn;H3TT;JJRB", "site": "https://openreview.net/forum?id=GdimRqV_S7", "pdf_size": 583480, "recommendation": "3;3;5", "confidence": "3;4;3", "correctness": "4;2;3", "technical_novelty": "1;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "155;128;69", "wc_strength_and_weaknesses": "339;833;94", "wc_clarity_quality_novelty_and_reproducibility": "130;86;172", "wc_summary_review": "25;96;72", "wc_review": "649;1143;407", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 117.33333333333333, 35.91038228083288 ], "wc_strength_and_weaknesses_avg": [ 422.0, 307.35104793487636 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 129.33333333333334, 35.11251755270318 ], "wc_summary_review_avg": [ 64.33333333333333, 29.48822740612863 ], "wc_review_avg": [ 733.0, 306.2852700778584 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=966395356042006595&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Carnegie Mellon University;Pacific Northwest National Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.pnnl.gov", "aff_unique_abbr": "CMU;PNNL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Gg5PaJRQbRw", "title": "On Incremental Learning with Long Short Term Strategy", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Incremental learning aims at mitigating the forgetting during the sequential learning of deep neural networks. In the process, a procedure (including distillation, replaying, etc.) is usually adopted to help model accumulate knowledge. However, we discover the tuning of such procedure could face the ``long short term dilemma'' that the optimal procedure of short term learning is not necessarily equivalent to that of long term learning due to their need of different plasticity/stability balances. The existing methods have to take the trade-off to achieve better overall performance along the incremental tasks. In this paper, we propose a novel LongShortTerm strategy that circumvents limitations of widely-used pipeline with single branch and brings model capability in both short and long term into full play. To further control the plasticity/stability balance in LongShortTerm strategy, we discover that for ViT backbone, magnitude of memory augmentation is critical to retention of model and propose Margin-based Data Augmentation to meet different balances in long short term learning. Extensive experiments on two complex CIL benchmarks: ImageNet-100 and ImageNet-1K demonstrate the effectiveness of our LongShortTerm strategy with improvements of 0.59\\%-3.10\\% over state-of-the-art solution. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yichen Lu;Weihua Chen;Xiuyu Sun", "authorids": "~Yichen_Lu2;~Weihua_Chen1;~Xiuyu_Sun1", "gender": ";M;M", "homepage": "https://github.com/eddielyc;https://cwhgn.github.io;https://sites.google.com/view/sunxiuyu/home", "dblp": ";;40/8845", "google_scholar": ";KWVlYaMAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0003-4141-7833;0000-0002-7208-8078", "linkedin": ";;", "or_profile": "~Yichen_Lu2;~Weihua_Chen1;~Xiuyu_Sun1", "aff": "Beijing University of Posts and Telecommunications;Alibaba Group;Alibaba Group", "aff_domain": "bupt.edu.cn;alibaba-inc.com;alibaba-inc.com", "position": "MS student;Algorithm Engineer;Staff Algorithm Engineer", "bibtex": "@misc{\nlu2023on,\ntitle={On Incremental Learning with Long Short Term Strategy},\nauthor={Yichen Lu and Weihua Chen and Xiuyu Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=Gg5PaJRQbRw}\n}", "github": "", "project": "", "reviewers": "rJe1;VqYF;XZfc;DvKB", "site": "https://openreview.net/forum?id=Gg5PaJRQbRw", "pdf_size": 1335554, "recommendation": "3;5;5;5", "confidence": "4;4;2;5", "correctness": "1;4;2;2", "technical_novelty": "1;4;3;2", "empirical_novelty": "1;3;3;2", "wc_summary_paper": "65;44;61;123", "wc_strength_and_weaknesses": "111;127;122;104", "wc_clarity_quality_novelty_and_reproducibility": "30;28;23;155", "wc_summary_review": "24;23;73;47", "wc_review": "230;222;279;429", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 2.25, 1.0897247358851685 ], "technical_novelty_avg": [ 2.5, 1.118033988749895 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 73.25, 29.785692874264313 ], "wc_strength_and_weaknesses_avg": [ 116.0, 9.027735042633894 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.0, 55.4842319943243 ], "wc_summary_review_avg": [ 41.75, 20.437404434027332 ], "wc_review_avg": [ 290.0, 83.16549765377466 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.13245323570650439, "corr_recommendation_correctness": 0.6622661785325219, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:fUg2LTmfvO0J:scholar.google.com/&scioq=On+Incremental+Learning+with+Long+Short+Term+Strategy&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.alibaba.com", "aff_unique_abbr": "BUPT;Alibaba", "aff_campus_unique_index": "0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "Gid_Z_oUV5q", "title": "SARNET: SARCASM VS TRUE-HATE DETECTION NETWORK", "track": "main", "status": "Reject", "tldr": "This research paper focuses on quasi-ternary classification of hate and sarcasm in a tweet using game theory, Nash Equilibrium and deep learning.", "abstract": "At times hate speech detection classifiers miss the context of a sentence and flag a sarcastic tweet incorrectly. To tackle this problem by emphasising on the context of a tweet we propose SarNet. SarNet is a two-fold deep learning based model which follows a quasi-ternary labelling strategy and contextually classifies a tweet as hate, sarcastic or neither. The first module of SarNet is an ANN-BiLSTM based Pyramid Network used to calculate the hate and sarcastic probabilities of a sentence. The second module of the SarNet is the Nash Equalizer which stems from the concept of game theory and prisoner\u2019s dilemma. It treats hate and sarcasm as two prisoners. A payoff matrix is constructed to calculate the true hate of the tweet. True hate considers the hate part of a tweet excluding the sarcastic part of the tweet. Thus, this gives a true estimate of the hate content in a tweet thereby decreasing the number of sarcastic tweets being falsely flagged as hate. Our proposed model is trained on state-of-the-art hate speech and sarcasm datasets in the English language. The precision, recall and F1 score of our proposed model is 0.93, 0.84 and 0.88 respectively. Comparison with state-of-the-art architectures demonstrated better performance of SarNet by a significant margin.", "keywords": "Game Theory;Hate Speech;Sarcasm;Nash Equilibrium;Prisoner's Dilemma", "primary_area": "", "supplementary_material": "", "author": "Harsh Mittal;Kartikeya Singh Chauhan;Anil Singh Parihar;Kavinder Singh;Ashutosh Pandey", "authorids": "24mithar@gmail.com;ksc13dec@gmail.com;parihar.anil@gmail.com;kavinder85@gmail.com;ashutoshpndy@gmail.com", "gender": ";;;;", "homepage": ";;;;", "dblp": ";;;;", "google_scholar": ";;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": ";;;;", "aff": ";;;;", "aff_domain": ";;;;", "position": ";;;;", "bibtex": "@misc{\nmittal2023sarnet,\ntitle={{SARNET}: {SARCASM} {VS} {TRUE}-{HATE} {DETECTION} {NETWORK}},\nauthor={Harsh Mittal and Kartikeya Singh Chauhan and Anil Singh Parihar and Kavinder Singh and Ashutosh Pandey},\nyear={2023},\nurl={https://openreview.net/forum?id=Gid_Z_oUV5q}\n}", "github": "", "project": "", "reviewers": "ioZ4;jETC;cP5A;xKDB", "site": "https://openreview.net/forum?id=Gid_Z_oUV5q", "pdf_size": 393345, "recommendation": "3;5;5;5", "confidence": "5;3;3;3", "correctness": "3;3;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;1;2;2", "wc_summary_paper": "42;72;99;45", "wc_strength_and_weaknesses": "147;206;112;110", "wc_clarity_quality_novelty_and_reproducibility": "11;82;89;15", "wc_summary_review": "7;26;58;34", "wc_review": "207;386;358;204", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 64.5, 23.092206477510977 ], "wc_strength_and_weaknesses_avg": [ 143.75, 38.8353897881816 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.25, 36.36189626518397 ], "wc_summary_review_avg": [ 31.25, 18.2944663764757 ], "wc_review_avg": [ 288.75, 83.84323168867002 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wVWXecMjXyUJ:scholar.google.com/&scioq=SARNET:+SARCASM+VS+TRUE-HATE+DETECTION+NETWORK&hl=en&as_sdt=0,5", "gs_version_total": 0 }, { "id": "Gkbxt7ThQxU", "title": "Explicitly Maintaining Diverse Playing Styles in Self-Play", "track": "main", "status": "Reject", "tldr": "", "abstract": "Self-play has proven to be an effective training schema to obtain a high-level agent in complex games through iteratively playing against an opponent from its historical versions. However, its training process may prevent it from generating a well-generalised policy since the trained agent rarely encounters diversely-behaving opponents along its own historical path. In this paper, we aim to improve the generalisation of the policy by maintaining a population of agents with diverse playing styles and high skill levels throughout the training process. Specifically, we propose a bi-objective optimisation model to simultaneously optimise the agents' skill level and playing style. A feature of this model is that we do not regard the skill level and playing style as two objectives to maximise directly since they are not equally important (i.e., agents with diverse playing styles but low skill levels are meaningless). Instead, we create a meta bi-objective model to enable high-level agents with diverse playing styles more likely to be incomparable (i.e. Pareto non-dominated), thereby playing against each other through the training process. We then present an evolutionary algorithm working with the proposed model. Experiments in a classic table tennis game Pong and a commercial role-playing game Justice Online show that our algorithm can learn a well generalised policy and at the same time is able to provide a set of high-level policies with various playing styles.", "keywords": "Reinforcement learning;evolutionary algorithm;self-play;diverse playing styles;high skill levels", "primary_area": "", "supplementary_material": "", "author": "Yuan Liu;Ruimin Shen;Miqing Li;Yingfeng Chen;Juan Zou;Changjie Fan", "authorids": "~Yuan_Liu12;ruimin0shen@gmail.com;~Miqing_Li1;~Yingfeng_Chen1;zoujuan@xtu.edu.cn;~Changjie_Fan1", "gender": ";;M;;;M", "homepage": ";;https://sites.google.com/view/miqing-li;;;", "dblp": ";;05/3393;;;71/882", "google_scholar": "Shf3oT8AAAAJ;;h8UksmEAAAAJ;;;", "orcid": ";;;;;0000-0001-5420-0516", "linkedin": ";;;;;", "or_profile": "~Yuan_Liu12;ruimin0shen@gmail.com;~Miqing_Li1;~Yingfeng_Chen1;zoujuan@xtu.edu.cn;~Changjie_Fan1", "aff": "Xiangtan University;;University of Birmingham;;;Netease, Fuxi AI Lab", "aff_domain": "xtu.edu.cn;;bham.ac.uk;;;corp.netease.com", "position": "Lecturer;;Associate Professor;;;Principal Researcher", "bibtex": "@misc{\nliu2023explicitly,\ntitle={Explicitly Maintaining Diverse Playing Styles in Self-Play},\nauthor={Yuan Liu and Ruimin Shen and Miqing Li and Yingfeng Chen and Juan Zou and Changjie Fan},\nyear={2023},\nurl={https://openreview.net/forum?id=Gkbxt7ThQxU}\n}", "github": "", "project": "", "reviewers": "FdvX;mGvT;1h9y", "site": "https://openreview.net/forum?id=Gkbxt7ThQxU", "pdf_size": 4964845, "recommendation": "3;3;6", "confidence": "3;4;4", "correctness": "3;2;4", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "96;90;104", "wc_strength_and_weaknesses": "679;408;82", "wc_clarity_quality_novelty_and_reproducibility": "91;59;508", "wc_summary_review": "135;45;15", "wc_review": "1001;602;709", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 96.66666666666667, 5.734883511361751 ], "wc_strength_and_weaknesses_avg": [ 389.6666666666667, 244.06875169827777 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 219.33333333333334, 204.5357887727448 ], "wc_summary_review_avg": [ 65.0, 50.99019513592785 ], "wc_review_avg": [ 770.6666666666666, 168.6264774253702 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.8660254037844387, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:heQH-cn_D78J:scholar.google.com/&scioq=Explicitly+Maintaining+Diverse+Playing+Styles+in+Self-Play&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Xiangtan University;University of Birmingham;Netease", "aff_unique_dep": ";;Fuxi AI Lab", "aff_unique_url": "http://www.xtu.edu.cn/;https://www.birmingham.ac.uk;https://www.netease.com", "aff_unique_abbr": "XTU;Birmingham;Netease", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United Kingdom" }, { "id": "GmjwnzduXzf", "title": "Grassmannian Class Representation in Deep Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "We generalize the class representative vector found in deep classification networks to linear subspaces and show that the new formulation enables the simultaneous enhancement of the inter-class discrimination and intra-class feature variation. Traditionally, the logit is computed by the inner product between a feature and the class vector. In our modeling, classes are subspaces and the logit is defined as the norm of the projection from a feature onto the subspace. Since the set of subspaces forms Grassmann manifolds, finding the optimal subspace representation for classes is to optimize the loss on a Grassmannian. We integrate the Riemannian SGD into existing deep learning frameworks such that the class subspaces in a Grassmannian are jointly optimized with other model parameters in Euclidean. Compared to the vector form, subspaces have two appealing properties: they can be multi-dimensional and they are scaleless. Empirically, we reveal that these distinct characteristics improve various tasks. (1) Image classification. The new formulation brings the top-1 accuracy of ResNet50-D on ImageNet-1K from 78.04% to 79.37% using the standard augmentation in 100 training epochs. This confirms that the representative capability of subspaces is more powerful than vectors. (2) Feature transfer. Subspaces provide freedom for features to vary and we observed that the intra-class variability of features increases when the subspace dimensions are larger. Consequently, the quality of features is better for downstream tasks. The average transfer accuracy across 6 datasets improves from 77.98% to 80.12% compared to the strong baseline of vanilla softmax. (3) Long-tail classification. The scaleless property of subspaces benefits classification in the long-tail scenario and improves the accuracy of ImageNet-LT from 46.83% to 48.94% compared to the standard formulation. With these encouraging results, we believe that more applications could benefit from the Grassmannian class representation. Codes will be released.", "keywords": "Grassmannian;geometric optimization;classification;feature transfer;long-tail", "primary_area": "", "supplementary_material": "", "author": "Haoqi Wang;Zhizhong Li;Wayne Zhang", "authorids": "~Haoqi_Wang1;~Zhizhong_Li2;~Wayne_Zhang2", "gender": "F;M;M", "homepage": "https://scholar.google.com/citations?user=70_DgI8AAAAJ&hl=en;https://zhizhong.li/;http://www.statfe.com", "dblp": ";42/4133-2;239/6045", "google_scholar": "70_DgI8AAAAJ;https://scholar.google.com.hk/citations?user=YIaflDkAAAAJ;5GtyVooAAAAJ", "orcid": "0000-0002-5760-4097;0000-0003-0574-2487;0000-0002-8415-1062", "linkedin": "haoqi-wang-curious-king;%E6%B2%BB%E4%B8%AD-%E6%9D%8E-b14134a5/;", "or_profile": "~Haoqi_Wang1;~Zhizhong_Li2;~Wei_Zhang5", "aff": "SenseTime;SenseTime;SenseTime Research", "aff_domain": "sensetime.com;sensetime.com;sensetime.com", "position": "Researcher;Researcher;Research Director", "bibtex": "@misc{\nwang2023grassmannian,\ntitle={Grassmannian Class Representation in Deep Learning},\nauthor={Haoqi Wang and Zhizhong Li and Wayne Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=GmjwnzduXzf}\n}", "github": "", "project": "", "reviewers": "Acxf;Ut4X;ShD7;iWx6;K7X9", "site": "https://openreview.net/forum?id=GmjwnzduXzf", "pdf_size": 2402727, "recommendation": "5;5;6;6;6", "confidence": "5;5;3;3;4", "correctness": "2;4;4;3;3", "technical_novelty": "2;2;4;2;3", "empirical_novelty": "2;3;0;3;3", "wc_summary_paper": "107;103;69;102;80", "wc_strength_and_weaknesses": "665;256;258;139;554", "wc_clarity_quality_novelty_and_reproducibility": "32;51;8;62;12", "wc_summary_review": "34;28;92;60;68", "wc_review": "838;438;427;363;714", "wc_reply_reviewers": "222;0;0;0;0", "wc_reply_authors": "2901;1040;1028;426;1833", "reply_reviewers": "1;0;0;0;0", "reply_authors": "4;2;2;1;3", "recommendation_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "correctness_avg": [ 3.2, 0.7483314773547882 ], "technical_novelty_avg": [ 2.6, 0.8 ], "empirical_novelty_avg": [ 2.2, 1.16619037896906 ], "wc_summary_paper_avg": [ 92.2, 14.958609561052123 ], "wc_strength_and_weaknesses_avg": [ 374.4, 199.84253801430765 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.0, 21.12818023399081 ], "wc_summary_review_avg": [ 56.4, 23.337523433303716 ], "wc_review_avg": [ 556.0, 185.6351259864361 ], "wc_reply_reviewers_avg": [ 44.4, 88.8 ], "wc_reply_authors_avg": [ 1445.6, 854.0202807896309 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.4, 1.019803902718557 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9128709291752768, "corr_recommendation_correctness": 0.21821789023599236, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TRQFx3dUdeYJ:scholar.google.com/&scioq=Grassmannian+Class+Representation+in+Deep+Learning&hl=en&as_sdt=0,11", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "SenseTime", "aff_unique_dep": "", "aff_unique_url": "https://www.sensetime.com", "aff_unique_abbr": "SenseTime", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "Gp91Et4LeRf", "title": "Auditing Fairness Online through Interactive Refinement", "track": "main", "status": "Reject", "tldr": "A visual inference-based optimization framework that facilitates the specification and auditing of fairness on blackbox ML models efficiently.", "abstract": "Machine learning algorithms are increasingly being deployed for high-stakes scenarios. A sizeable proportion of currently deployed models make their decisions in a black box manner. Such decision-making procedures are susceptible to intrinsic biases, which has led to a call for accountability in deployed decision systems. In this work, we focus on user-specified accountability of decision-making processes of black box systems. Previous work has formulated this problem as run time fairness monitoring over decision functions. However, formulating appropriate specifications for situation-appropriate fairness metrics is challenging. We construct AVOIR, an automated inference-based optimization system that improves bounds for and generalizes prior work across a wide range of fairness metrics. AVOIR offers an interactive and iterative process for exploring fairness violations aligned with governance and regulatory requirements. Our bounds improve over previous probabilistic guarantees for such fairness grammars in online settings. We also construct a novel visualization mechanism that can be used to investigate the context of reported fairness violations and guide users towards meaningful and compliant fairness specifications. We then conduct case studies with fairness metrics on three different datasets and demonstrate how the visualization and improved optimization can detect fairness violations more efficiently and ameliorate the issues with faulty fairness metric design. ", "keywords": "fairness;metrics;verification;inference;online;monitoring", "primary_area": "", "supplementary_material": "/attachment/aad9c9b8b61eef5dd94cf4243ae346d3be3c467a.zip", "author": "Pranav Maneriker;Codi Jay Burley;srinivasan parthasarathy", "authorids": "~Pranav_Maneriker1;~Codi_Jay_Burley1;~srinivasan_parthasarathy1", "gender": "M;M;", "homepage": "https://pranavmaneriker.github.io;;https://web.cse.ohio-state.edu/~parthasarathy.2/", "dblp": "188/7008;233/6226.html;p/SParathasarathy.html", "google_scholar": "9t8gbkkAAAAJ;hZkcNTMAAAAJ;2mjUsP8AAAAJ", "orcid": "0000-0003-1333-4424;;0000-0002-6062-6449", "linkedin": "pranav-maneriker/;;srinivasan-parthasarathy-5703761/", "or_profile": "~Pranav_Maneriker1;~Codi_Jay_Burley1;~srinivasan_parthasarathy1", "aff": "Ohio State University;;Ohio State University", "aff_domain": "osu.edu;;osu.edu", "position": "PhD student;;Professor", "bibtex": "@misc{\nmaneriker2023auditing,\ntitle={Auditing Fairness Online through Interactive Refinement},\nauthor={Pranav Maneriker and Codi Jay Burley and srinivasan parthasarathy},\nyear={2023},\nurl={https://openreview.net/forum?id=Gp91Et4LeRf}\n}", "github": "", "project": "", "reviewers": "Uuem;nDjC;Gatp;Guzx;1wit", "site": "https://openreview.net/forum?id=Gp91Et4LeRf", "pdf_size": 793749, "recommendation": "3;3;3;5;5", "confidence": "4;4;2;2;2", "correctness": "2;2;3;4;4", "technical_novelty": "2;3;3;2;2", "empirical_novelty": "3;2;2;2;2", "wc_summary_paper": "70;34;93;35;150", "wc_strength_and_weaknesses": "233;875;316;220;166", "wc_clarity_quality_novelty_and_reproducibility": "29;176;39;59;270", "wc_summary_review": "73;170;49;13;80", "wc_review": "405;1255;497;327;666", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "425;506;493;195;274", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 3.8, 0.9797958971132712 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "correctness_avg": [ 3.0, 0.8944271909999159 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_paper_avg": [ 76.4, 43.000465113763596 ], "wc_strength_and_weaknesses_avg": [ 362.0, 260.96206620886494 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 114.6, 93.86500945506798 ], "wc_summary_review_avg": [ 77.0, 52.065343559799935 ], "wc_review_avg": [ 630.0, 332.27819669668366 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 378.6, 123.38654707868277 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.6666666666666667, "corr_recommendation_correctness": 0.9128709291752769, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gi6SqP8yN6oJ:scholar.google.com/&scioq=Auditing+Fairness+Online+through+Interactive+Refinement&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Ohio State University", "aff_unique_dep": "", "aff_unique_url": "https://www.osu.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "GpW327gxLTF", "title": "Univariate vs Multivariate Time Series Forecasting with Transformers", "track": "main", "status": "Reject", "tldr": "We achieve SOTA results via a simple method of producing multivariate forecasts in a univariate manner which points to flaws in current architectures.", "abstract": "Multivariate time series forecasting is a challenging problem and a number of Transformer-based long-term time series forecasting models have been developed to tackle it. These models, however, are impeded by the additional information available in multivariate forecasting. In this paper we propose a simple univariate setting as an alternative method for producing multivariate forecasts. The univariate model is trained on each individual dimension of the time series. This single model is then used to forecast each dimension of the multivariate forecast in turn. A comparative study shows that our setting outperforms state-of-the-art Transformers in the multivariate setting in benchmark datasets. To investigate why, we set three hypotheses and verify them via an empirical study, which leads to a criterion for when our univariate setting is likely to lead to better performance and reveals flaws in the current multivariate Transformers for long-term time series forecasting.", "keywords": "forecasting;time series;transformers;univariate;multivariate", "primary_area": "", "supplementary_material": "", "author": "William Michael John Murphy;Ke Chen", "authorids": "~William_Michael_John_Murphy1;~Ke_Chen12", "gender": "M;", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": "william-murphy-87724710b/;", "or_profile": "~William_Michael_John_Murphy1;~Ke_Chen12", "aff": "University of Manchester;", "aff_domain": "cs.manchester.ac.uk;", "position": "PhD student;", "bibtex": "@misc{\nmurphy2023univariate,\ntitle={Univariate vs Multivariate Time Series Forecasting with Transformers},\nauthor={William Michael John Murphy and Ke Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=GpW327gxLTF}\n}", "github": "", "project": "", "reviewers": "qsvN;tf8M;rqx4", "site": "https://openreview.net/forum?id=GpW327gxLTF", "pdf_size": 4887292, "recommendation": "5;5;6", "confidence": "2;3;4", "correctness": "3;3;3", "technical_novelty": "1;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "146;49;24", "wc_strength_and_weaknesses": "117;23;29", "wc_clarity_quality_novelty_and_reproducibility": "6;11;38", "wc_summary_review": "32;63;149", "wc_review": "301;146;240", "wc_reply_reviewers": "202;0;0", "wc_reply_authors": "588;128;242", "reply_reviewers": "1;0;0", "reply_authors": "3;1;1", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 73.0, 52.61812108643434 ], "wc_strength_and_weaknesses_avg": [ 56.333333333333336, 42.96768811819205 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 18.333333333333332, 14.055445761538678 ], "wc_summary_review_avg": [ 81.33333333333333, 49.492984912566705 ], "wc_review_avg": [ 229.0, 63.75473838599502 ], "wc_reply_reviewers_avg": [ 67.33333333333333, 95.2237131997884 ], "wc_reply_authors_avg": [ 319.3333333333333, 195.5936831518055 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.8660254037844385, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15346815428740726689&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of Manchester", "aff_unique_dep": "", "aff_unique_url": "https://www.manchester.ac.uk", "aff_unique_abbr": "UoM", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "id": "GpicUyuSdTr", "title": "SynMotor: A Benchmark Suite for Object Attribute Regression and Multi-task Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In this paper, we develop a novel benchmark suite including both a 2D synthetic image dataset and a 3D synthetic point cloud dataset. Our work is a sub-task in the framework of a remanufacturing project, in which small electric motors are used as fundamental objects. Apart from the given detection, classification, and segmentation annotations, the key objects also have multiple learnable attributes with ground truth provided. This benchmark can be used for computer vision tasks including 2D/3D detection, classification, segmentation, and multi-attribute learning. It is worth mentioning that most attributes of the motors are quantified as continuously variable rather than binary, which makes our benchmark well suited for the less explored regression tasks. In addition, appropriate evaluation metrics are adopted or developed for each task and promising baseline results are provided. We hope this benchmark can stimulate more research efforts on the sub-domain of object attribute learning and multi-task learning in the future.", "keywords": "synthetic dataset;benchmark development;3D point cloud;object attribute regression;multi-task learning", "primary_area": "", "supplementary_material": "", "author": "Chengzhi Wu;Linxi Qiu;Kanran Zhou;Julius Pfrommer;J\u00fcrgen Beyerer", "authorids": "~Chengzhi_Wu1;uznba@student.kit.edu;kanran.zhou@student.kit.edu;julius.pfrommer@iosb.fraunhofer.de;juergen.beyerer@iosb.fraunhofer.de", "gender": "M;;;;", "homepage": "https://ies.anthropomatik.kit.edu/1473_1524.php;;;;", "dblp": ";;;;", "google_scholar": ";;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Chengzhi_Wu1;uznba@student.kit.edu;kanran.zhou@student.kit.edu;julius.pfrommer@iosb.fraunhofer.de;juergen.beyerer@iosb.fraunhofer.de", "aff": "Karlsruhe Institute of Technology;;;;", "aff_domain": "kit.edu;;;;", "position": "PhD student;;;;", "bibtex": "@misc{\nwu2023synmotor,\ntitle={SynMotor: A Benchmark Suite for Object Attribute Regression and Multi-task Learning},\nauthor={Chengzhi Wu and Linxi Qiu and Kanran Zhou and Julius Pfrommer and J{\\\"u}rgen Beyerer},\nyear={2023},\nurl={https://openreview.net/forum?id=GpicUyuSdTr}\n}", "github": "", "project": "", "reviewers": "hrgN;6QXt;A5wW", "site": "https://openreview.net/forum?id=GpicUyuSdTr", "pdf_size": 13707537, "recommendation": "1;3;5", "confidence": "4;4;4", "correctness": "2;3;3", "technical_novelty": "1;2;2", "empirical_novelty": "1;1;2", "wc_summary_paper": "91;45;51", "wc_strength_and_weaknesses": "327;95;126", "wc_clarity_quality_novelty_and_reproducibility": "51;47;40", "wc_summary_review": "46;44;20", "wc_review": "515;231;237", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 1.632993161855452 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_summary_paper_avg": [ 62.333333333333336, 20.417857108151406 ], "wc_strength_and_weaknesses_avg": [ 182.66666666666666, 102.84076148211963 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.0, 4.546060565661952 ], "wc_summary_review_avg": [ 36.666666666666664, 11.8133634311129 ], "wc_review_avg": [ 327.6666666666667, 132.48731595473163 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2176350055185183066&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0", "aff_unique_norm": "Karlsruhe Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kit.edu", "aff_unique_abbr": "KIT", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "title": "Improving the imputation of missing data with Markov Blanket discovery", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11189", "id": "GrpU6dxFmMN", "poster": "/media/PosterPDFs/ICLR%202023/11189.png?t=1680701768.0584912", "openreview": "https://openreview.net/forum?id=GrpU6dxFmMN", "slides": "https://iclr.cc/virtual/2023/poster/11189", "video": "https://iclr.cc/virtual/2023/poster/11189", "author_site": "Yang Liu, Anthony Constantinou", "tldr": "", "abstract": "The process of imputation of missing data typically relies on generative and regression models. These approaches often operate on the unrealistic assumption that all of the data features are directly related with one another, and use all of the available features to impute missing values. In this paper, we propose a novel Markov Blanket discovery approach to determine the optimal feature set for a given variable by considering both observed variables and missingness of partially observed variables to account for systematic missingness. We then incorporate this method to the learning process of the state-of-the-art MissForest imputation algorithm, such that it informs MissForest which features to consider to impute missing values, depending on the variable the missing value belongs to. Experiments across different case studies and multiple imputation algorithms show that the proposed solution improves imputation accuracy, both under random and systematic missingness.", "keywords": "Feature selection;Imputation;Markov Blanket discovery", "primary_area": "", "supplementary_material": "", "author": "Yang Liu;Anthony Constantinou", "authorids": "~Yang_Liu77;~Anthony_Constantinou1", "gender": "M;M", "homepage": ";http://www.constantinou.info/", "dblp": ";121/8140", "google_scholar": "GMeLKiQAAAAJ;https://scholar.google.co.uk/citations?user=lAbimKMAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Yang_Liu77;~Anthony_Constantinou1", "aff": "Queen Mary University of London;Queen Mary University of London", "aff_domain": "qmul.ac.uk;qmul.ac.uk", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nliu2023improving,\ntitle={Improving the imputation of missing data with Markov Blanket discovery},\nauthor={Yang Liu and Anthony Constantinou},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=GrpU6dxFmMN}\n}", "github": "", "project": "", "reviewers": "p56x;14vZ;X4h7;HRJ6", "pdf_size": 294903, "recommendation": "5;8;8;8", "confidence": "3;4;4;3", "correctness": "4;4;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "29;58;78;105", "wc_strength_and_weaknesses": "120;2;182;136", "wc_clarity_quality_novelty_and_reproducibility": "10;2;20;7", "wc_summary_review": "48;234;36;28", "wc_review": "207;296;316;276", "wc_reply_reviewers": "0;0;164;16", "wc_reply_authors": "221;112;662;385", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;1", "recommendation_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 67.5, 27.789386463180506 ], "wc_strength_and_weaknesses_avg": [ 110.0, 66.37770710110436 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 9.75, 6.5717197140474575 ], "wc_summary_review_avg": [ 86.5, 85.45612909557745 ], "wc_review_avg": [ 273.75, 41.051035309721485 ], "wc_reply_reviewers_avg": [ 45.0, 69.01449123191448 ], "wc_reply_authors_avg": [ 345.0, 207.21607080533113 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13582902979903083920&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=GrpU6dxFmMN", "email": "qmul.ac.uk;qmul.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Queen Mary University of London", "aff_unique_dep": "", "aff_unique_url": "https://www.qmul.ac.uk", "aff_unique_abbr": "QMUL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "London", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "GvMuB-YsiK6", "title": "Explaining Patterns in Data with Language Models via Interpretable Autoprompting", "track": "main", "status": "Reject", "tldr": "We introduce interpretable autoprompting, a simple approach to *understand a dataset* by finding a semantically meaningful prompt for a large language model.", "abstract": "Large language models (LLMs) have displayed an impressive ability to harness natural language to perform complex tasks. In this work, we explore whether we can leverage this learned ability to find and explain patterns in data. Specifically, given a pre-trained LLM and data examples, we introduce interpretable autoprompting (iPrompt), an algorithm that generates a natural-language string explaining the data. iPrompt iteratively alternates between generating explanations with an LLM and reranking them based on their performance when used as a prompt. Experiments on a wide range of datasets, from synthetic mathematics to natural-language understanding, show that iPrompt can yield meaningful insights by accurately finding groundtruth dataset descriptions. Moreover, the prompts produced by iPrompt are simultaneously human-interpretable and highly effective for generalization: on real-world sentiment classification datasets, iPrompt produces prompts that match or even improve upon human-written prompts for GPT-3. Finally, experiments with an fMRI dataset show the potential for iPrompt to aid in scientific discovery.", "keywords": "Interpretability;explainability;XAI;AI for science", "primary_area": "", "supplementary_material": "/attachment/643b3538f28468f89809d524fbc7fd245a780564.zip", "author": "Chandan Singh;John Xavier Morris;Jyoti Aneja;Alexander M Rush;Jianfeng Gao", "authorids": "~Chandan_Singh1;~John_Xavier_Morris1;~Jyoti_Aneja2;~Alexander_M_Rush1;~Jianfeng_Gao1", "gender": "M;M;;M;M", "homepage": "https://csinva.io/;http://jxmo.io;http://jyotianeja.com/;http://rush.seas.harvard.edu/;https://www.microsoft.com/en-us/research/people/jfgao/", "dblp": "38/2317;263/9958.html;;http://dblp.uni-trier.de/pers/hd/r/Rush:Alexander_M=;92/5339", "google_scholar": "https://scholar.google.com/citations?hl=en;Utsbve4AAAAJ;FYB92lkAAAAJ;LIjnUGgAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-0318-2340;;;0000-0002-9900-1606;", "linkedin": "csinva/;;;sasha-rush-a69b6917/;", "or_profile": "~Chandan_Singh1;~John_Xavier_Morris1;~Jyoti_Aneja2;~Alexander_M_Rush1;~Jianfeng_Gao1", "aff": "Microsoft Research;Cornell University;;School of Engineering and Applied Sciences, Harvard University;Microsoft Research", "aff_domain": "microsoft.com;cornell.edu;;seas.harvard.edu;microsoft.com", "position": "Researcher;PhD student;;Assistant Professor;Principal Researcher", "bibtex": "@misc{\nsingh2023explaining,\ntitle={Explaining Patterns in Data with Language Models via Interpretable Autoprompting},\nauthor={Chandan Singh and John Xavier Morris and Jyoti Aneja and Alexander M Rush and Jianfeng Gao},\nyear={2023},\nurl={https://openreview.net/forum?id=GvMuB-YsiK6}\n}", "github": "", "project": "", "reviewers": "a8A9;5C3P;R7d2", "site": "https://openreview.net/forum?id=GvMuB-YsiK6", "pdf_size": 2200320, "recommendation": "3;3;5", "confidence": "4;4;4", "correctness": "2;2;3", "technical_novelty": "3;2;3", "empirical_novelty": "3;2;2", "wc_summary_paper": "75;37;54", "wc_strength_and_weaknesses": "474;240;211", "wc_clarity_quality_novelty_and_reproducibility": "37;6;48", "wc_summary_review": "34;24;39", "wc_review": "620;307;352", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "318;277;228", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 55.333333333333336, 15.542057635833023 ], "wc_strength_and_weaknesses_avg": [ 308.3333333333333, 117.74076986706385 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.333333333333332, 17.78263822446552 ], "wc_summary_review_avg": [ 32.333333333333336, 6.236095644623236 ], "wc_review_avg": [ 426.3333333333333, 138.16978283578828 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 274.3333333333333, 36.790699307780976 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8639644104242486840&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Microsoft;Cornell University;Harvard University", "aff_unique_dep": "Microsoft Research;;School of Engineering and Applied Sciences", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.cornell.edu;https://www.harvard.edu", "aff_unique_abbr": "MSR;Cornell;Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Gy8vD-zGQqH", "title": "FoveaTer: Foveated Transformer for Image Classification", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Many animals and humans process the visual field with varying spatial resolution (foveated vision) and use peripheral processing to make eye movements and point the fovea to acquire high-resolution information about objects of interest. This architecture results in computationally efficient rapid scene exploration. Recent progress in self-attention-based vision Transformers, an alternative to the traditionally convolution-reliant computer vision systems, allows global interactions between feature locations and increases robustness to adversarial attacks. However, the Transformer models do not explicitly model the foveated properties of the visual system nor the interaction between eye movements and the classification task. We propose Foveated Transformer (FoveaTer) model, which uses pooling regions and eye movements to perform object classification tasks using a Vision Transformer architecture. Using square pooling regions or biologically-inspired radial-polar pooling regions, our proposed model pools the image features from the convolution backbone and uses the pooled features as an input to transformer layers. It decides on subsequent fixation location based on the attention assigned by the Transformer to various locations from past and present fixations. The model uses a confidence threshold to stop scene exploration. It dynamically allocates more fixation/computational resources to more challenging images before making the final image category decision. We construct a Foveated model using our proposed approach and compare it against a Baseline model, which does not contain any pooling. Using five ablation studies, we evaluate the contribution of different components of the Foveated model. We perform a psychophysics scene categorization task and use the experimental data to find a suitable radial-polar pooling region combination. We also show that the Foveated model better explains the human decisions in a scene categorization task than a Baseline model. On the ImageNet dataset, the Foveated model with Dynamic-stop achieves an accuracy of $8\\%$ below the Baseline model with a throughput gain of $76\\%$. Using a Foveated model with Dynamic-stop and the Baseline model, the ensemble achieves an accuracy of $0.7\\%$ below the Baseline using the same throughput. We demonstrate our model's robustness against PGD adversarial attacks with both types of pooling regions, where we see the Foveated model outperform the Baseline model.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/dcbac4061e0fb54010e3643284529753fd84bf9a.zip", "author": "Aditya Jonnalagadda;William Yang Wang;B.S. Manjunath;Miguel Eckstein", "authorids": "~Aditya_Jonnalagadda1;~William_Yang_Wang2;~B.S._Manjunath1;~Miguel_Eckstein1", "gender": "M;;;M", "homepage": "https://viu.psych.ucsb.edu/people/aditya-jonnalagadda;;;https://psych.ucsb.edu/people/faculty/miguel-eckstein", "dblp": ";;;56/975", "google_scholar": ";;;G5dQztgAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Aditya_Jonnalagadda1;~William_Yang_Wang2;~B.S._Manjunath1;~Miguel_Eckstein1", "aff": ";;;", "aff_domain": ";;;", "position": ";;;", "bibtex": "@misc{\njonnalagadda2023foveater,\ntitle={FoveaTer: Foveated Transformer for Image Classification},\nauthor={Aditya Jonnalagadda and William Yang Wang and B.S. Manjunath and Miguel Eckstein},\nyear={2023},\nurl={https://openreview.net/forum?id=Gy8vD-zGQqH}\n}", "github": "", "project": "", "reviewers": "Ma99;nWwN;K2yr;ytiB", "site": "https://openreview.net/forum?id=Gy8vD-zGQqH", "pdf_size": 7122869, "recommendation": "3;3;5;6", "confidence": "4;4;4;3", "correctness": "2;2;3;3", "technical_novelty": "1;2;3;3", "empirical_novelty": "1;2;2;4", "wc_summary_paper": "130;55;107;195", "wc_strength_and_weaknesses": "231;259;193;150", "wc_clarity_quality_novelty_and_reproducibility": "93;48;25;733", "wc_summary_review": "97;15;72;93", "wc_review": "551;377;397;1171", "wc_reply_reviewers": "11;0;0;0", "wc_reply_authors": "10;10;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_paper_avg": [ 121.75, 50.26616655365714 ], "wc_strength_and_weaknesses_avg": [ 208.25, 40.9839907768875 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 224.75, 294.4557479486519 ], "wc_summary_review_avg": [ 69.25, 32.72900090134131 ], "wc_review_avg": [ 624.0, 322.9071073853903 ], "wc_reply_reviewers_avg": [ 2.75, 4.763139720814412 ], "wc_reply_authors_avg": [ 5.0, 5.0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 0.5, 0.5 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.7777777777777777, "corr_recommendation_correctness": 0.9622504486493761, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9670164795000389792&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4 }, { "id": "Gzmyu-Baq0", "title": "Self-Guided Diffusion Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Diffusion models have demonstrated remarkable progress in image generation quality, especially when guidance is used to control the generative process. However, guidance requires a large amount of image-annotation pairs for training and is thus dependent on their availability, correctness and unbiasedness. In this paper, we eliminate the need for such annotation by instead leveraging the flexibility of self-supervision signals to design a framework for $\\textit{self-guided}$ diffusion models. By leveraging a feature extraction function and a self-annotation function, our method provides guidance signals at various image granularities: from the level of holistic images to object boxes and even segmentation masks. Our experiments on single-label and multi-label image datasets demonstrate that self-labeled guidance always outperforms diffusion models without guidance and may even surpass guidance based on ground-truth labels, especially on unbalanced data. When equipped with self-supervised box or mask proposals, our method further generates visually diverse yet semantically consistent images, without the need for any class, box, or segment label annotation. Self-guided diffusion is simple, flexible and expected to profit from deployment at scale.\n", "keywords": "diffusion model;self-supervised learning;unsupervised learning", "primary_area": "", "supplementary_material": "", "author": "Vincent Tao Hu;David W Zhang;Yuki M Asano;Gertjan J. Burghouts;Cees G. M. Snoek", "authorids": "~Vincent_Tao_Hu1;~David_W_Zhang1;~Yuki_M_Asano1;~Gertjan_J._Burghouts1;~Cees_G._M._Snoek1", "gender": "M;M;M;M;M", "homepage": "http://taohu.me;https://davzha.netlify.app/;https://gertjanburghouts.github.io/;http://www.ceessnoek.info;https://yukimasano.github.io/", "dblp": "272/5410;119/0960;84/2061;s/CeesSnoek;239/8823", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.nl/citations?user=MG3oLzUAAAAJ;zN6afwwAAAAJ;https://scholar.google.nl/citations?user=0uKdbscAAAAJ;CdpLhlgAAAAJ", "orcid": ";0000-0002-2137-1738;0000-0001-6265-7276;0000-0001-9092-1556;", "linkedin": "taohu620/;david-zhang-1b86b314a;gertjanburghouts/;cgmsnoek/;", "or_profile": "~Vincent_Tao_Hu1;~David_W_Zhang1;~Gertjan_J._Burghouts1;~Cees_Snoek1;~Yuki_Asano1", "aff": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;University of Amsterdam;TNO;University of Amsterdam;University of Amsterdam", "aff_domain": "lmu.de;uva.nl;tno.nl;uva.nl;uva.nl", "position": "Postdoc;PhD student;Researcher;Full Professor;Assistant Professor", "bibtex": "@misc{\nhu2023selfguided,\ntitle={Self-Guided Diffusion Models},\nauthor={Vincent Tao Hu and David W Zhang and Yuki M Asano and Gertjan J. Burghouts and Cees G. M. Snoek},\nyear={2023},\nurl={https://openreview.net/forum?id=Gzmyu-Baq0}\n}", "github": "", "project": "", "reviewers": "fSRy;JweP;nhBm;Kbxh;rnRJ", "site": "https://openreview.net/forum?id=Gzmyu-Baq0", "pdf_size": 32922163, "recommendation": "3;5;5;5;6", "confidence": "3;3;3;5;5", "correctness": "3;3;3;3;4", "technical_novelty": "3;3;2;3;3", "empirical_novelty": "0;2;2;2;3", "wc_summary_paper": "80;54;97;107;115", "wc_strength_and_weaknesses": "317;266;365;193;74", "wc_clarity_quality_novelty_and_reproducibility": "11;201;80;35;49", "wc_summary_review": "54;26;41;26;89", "wc_review": "462;547;583;361;327", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 4.8, 0.9797958971132712 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 2.8, 0.39999999999999997 ], "empirical_novelty_avg": [ 1.8, 0.9797958971132713 ], "wc_summary_paper_avg": [ 90.6, 21.712669112755343 ], "wc_strength_and_weaknesses_avg": [ 243.0, 101.93134944657605 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 75.2, 66.74249021425557 ], "wc_summary_review_avg": [ 47.2, 23.37006632425334 ], "wc_review_avg": [ 456.0, 100.11193735014821 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5833333333333334, "corr_recommendation_correctness": 0.6123724356957946, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10707988697092359063&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;University of Amsterdam;TNO", "aff_unique_dep": ";;", "aff_unique_url": "https://www.lmu.de;https://www.uva.nl;https://www.tno.nl", "aff_unique_abbr": "LMU;UvA;TNO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "Germany;Netherlands" }, { "title": "CLIPSep: Learning Text-queried Sound Separation with Noisy Unlabeled Videos", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11346", "id": "H-T3F0dMbyj", "poster": "/media/PosterPDFs/ICLR%202023/11346.png?t=1681695900.262564", "openreview": "https://openreview.net/forum?id=H-T3F0dMbyj", "slides": "https://iclr.cc/virtual/2023/poster/11346", "video": "https://iclr.cc/virtual/2023/poster/11346", "author_site": "Hao-Wen Dong, Naoya Takahashi, Yuki Mitsufuji, Julian McAuley, Taylor Berg-Kirkpatrick", "tldr": "A new method the leverages the pretrained CLIP model and noise invariant training for learning text-queried sound separation with only noisy unlabeled videos", "abstract": "Recent years have seen progress beyond domain-specific sound separation for speech or music towards universal sound separation for arbitrary sounds. Prior work on universal sound separation has investigated separating a target sound out of an audio mixture given a text query. Such text-queried sound separation systems provide a natural and scalable interface for specifying arbitrary target sounds. However, supervised text-queried sound separation systems require costly labeled audio-text pairs for training. Moreover, the audio provided in existing datasets is often recorded in a controlled environment, causing a considerable generalization gap to noisy audio in the wild. In this work, we aim to approach text-queried universal sound separation by using only unlabeled data. We propose to leverage the visual modality as a bridge to learn the desired audio-textual correspondence. The proposed CLIPSep model first encodes the input query into a query vector using the contrastive language-image pretraining (CLIP) model, and the query vector is then used to condition an audio separation model to separate out the target sound. While the model is trained on image-audio pairs extracted from unlabeled videos, at test time we can instead query the model with text inputs in a zero-shot setting, thanks to the joint language-image embedding learned by the CLIP model. Further, videos in the wild often contain off-screen sounds and background noise that may hinder the model from learning the desired audio-textual correspondence. To address this problem, we further propose an approach called noise invariant training for training a query-based sound separation model on noisy data. Experimental results show that the proposed models successfully learn text-queried universal sound separation using only noisy unlabeled videos, even achieving competitive performance against a supervised model in some settings.", "keywords": "universal sound separation;source separation;contrastive language-image pre-training;multi-modal learning;self-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Hao-Wen Dong;Naoya Takahashi;Yuki Mitsufuji;Julian McAuley;Taylor Berg-Kirkpatrick", "authorids": "~Hao-Wen_Dong1;~Naoya_Takahashi1;~Yuki_Mitsufuji1;~Julian_McAuley1;~Taylor_Berg-Kirkpatrick1", "gender": "M;M;M;M;M", "homepage": "https://hermandong.com;;https://www.yukimitsufuji.com/;http://cseweb.ucsd.edu/~jmcauley/;https://cseweb.ucsd.edu/~tberg/", "dblp": "206/7187;19/8442;136/5043;29/3483;22/8160", "google_scholar": "tEOa3O4AAAAJ;https://scholar.google.co.jp/citations?user=JbtYJMoAAAAJ;https://scholar.google.com/citations?hl=en;icbo4M0AAAAJ;mN6_BKAAAAAJ", "orcid": "0000-0002-5765-7594;;0000-0002-6806-6140;0000-0003-0955-7588;", "linkedin": "hwdong;naoyatakahashi/;mittu1204;;", "or_profile": "~Hao-Wen_Dong1;~Naoya_Takahashi1;~Yuki_Mitsufuji1;~Julian_McAuley1;~Taylor_Berg-Kirkpatrick1", "aff": "Amazon;Sony Group Corporation;Tokyo Institute of Technology, Tokyo Institute of Technology;University of California, San Diego, University of California, San Diego;University of California, San Diego", "aff_domain": "amazon.com;sony.com;titech.ac.jp;eng.ucsd.edu;ucsd.edu", "position": "Intern;Researcher;Associate Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\ndong2023clipsep,\ntitle={{CLIPS}ep: Learning Text-queried Sound Separation with Noisy Unlabeled Videos},\nauthor={Hao-Wen Dong and Naoya Takahashi and Yuki Mitsufuji and Julian McAuley and Taylor Berg-Kirkpatrick},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=H-T3F0dMbyj}\n}", "github": "", "project": "", "reviewers": "LZWB;CpEt;8zRs;dcmL;vU71", "pdf_size": 12554075, "recommendation": "6;8;8;8;8", "confidence": "4;3;4;3;4", "correctness": "4;4;3;3;3", "technical_novelty": "3;3;3;3;3", "empirical_novelty": "3;3;3;0;3", "wc_summary_paper": "299;26;63;202;178", "wc_strength_and_weaknesses": "601;240;156;187;109", "wc_clarity_quality_novelty_and_reproducibility": "70;216;37;294;429", "wc_summary_review": "25;93;49;71;60", "wc_review": "995;575;305;754;776", "wc_reply_reviewers": "0;117;110;7;181", "wc_reply_authors": "1137;1033;789;627;1082", "reply_reviewers": "0;1;2;1;1", "reply_authors": "2;3;2;1;2", "recommendation_avg": [ 7.6, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.4, 1.2 ], "wc_summary_paper_avg": [ 153.6, 98.55881492794036 ], "wc_strength_and_weaknesses_avg": [ 258.6, 176.41836639080412 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 209.2, 144.62281977613353 ], "wc_summary_review_avg": [ 59.6, 22.60619384151167 ], "wc_review_avg": [ 681.0, 230.4612765737446 ], "wc_reply_reviewers_avg": [ 83.0, 69.50395672190182 ], "wc_reply_authors_avg": [ 933.6, 194.00371130470674 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.408248290463863, "corr_recommendation_correctness": -0.6123724356957948, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7281563466116640426&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=H-T3F0dMbyj", "email": "amazon.com;sony.com;titech.ac.jp;eng.ucsd.edu;ucsd.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Amazon;Sony Group Corporation;Tokyo Institute of Technology;University of California, San Diego", "aff_unique_dep": "Amazon.com, Inc.;;;", "aff_unique_url": "https://www.amazon.com;https://www.sony.com;https://www.titech.ac.jp;https://www.ucsd.edu", "aff_unique_abbr": "Amazon;Sony;Titech;UCSD", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Tokyo;San Diego", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "United States;Japan" }, { "id": "H-VlwsYvVi", "title": "Speculative Decoding: Lossless Speedup of Autoregressive Translation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Different from some previous work accelerating autoregressive translation (AT) at the sacrifice of quality, we propose Speculative Decoding (SpecDec) -- a novel decoding paradigm inspired by speculative execution in computer architecture, which combines respective advantages of AT and non-autoregressive translation (NAT) for lossless speedup of translation. At each decoding step, SpecDec first speculatively drafts (i.e. decodes) next $k$ tokens with an NAT model and then verifies them with an AT model, where only the drafted tokens passing the verification are accepted as decoded tokens for guaranteeing its translation result is exactly the same as AT. The collaboration of NAT drafting and AT verification leads to a much higher decoding speed without quality loss due to parallel computing enabled by speculative decoding.\n\nWe conduct experiments in 4 standard WMT translation benchmarks and confirm the vanilla SpecDec yields exactly the same results as AT greedy decoding with an around $3\\times$ speedup, and that its variant (SpecDec++) with an advanced verification strategy not only outperforms AT greedy decoding, but also further improves the decoding speed, resulting in an around $5\\times$ speedup over AT. Moreover, SpecDec can be easily generalized for speeding up other seq2seq tasks like Abstractive Summarization, and benefit more from stronger computing devices, demonstrating its potential to become a de facto decoding standard in the future for efficient and lossless seq2seq generation.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/db05531c3d5f744fa5dc717606925513c0a6321f.zip", "author": "Heming Xia;Tao Ge;Si-Qing Chen;Furu Wei;Zhifang Sui", "authorids": "~Heming_Xia1;~Tao_Ge1;~Si-Qing_Chen1;~Furu_Wei1;~Zhifang_Sui1", "gender": "M;M;F;M;F", "homepage": "https://hemingkx.github.io/;https://getao.github.io/;;https://www.microsoft.com/en-us/research/people/fuwei/;http://eecs.pku.edu.cn/EN/People/Faculty/Detail/?ID=6024", "dblp": "278/2940;136/7923;;72/5870;", "google_scholar": "6r2ESKkAAAAJ;LYbs7Q8AAAAJ;;G-V1VpwAAAAJ;", "orcid": "0000-0001-5074-3441;;0000-0002-6945-4540;;", "linkedin": ";;si-qing-chen-seattle/;;", "or_profile": "~Heming_Xia1;~Tao_Ge1;~Si-Qing_Chen1;~Furu_Wei1;~Zhifang_Sui1", "aff": "Peking University;Microsoft Research;Microsoft;Microsoft Research;Peking University", "aff_domain": "pku.edu.cn;microsoft.com;microsoft.com;microsoft.com;pku.edu.cn", "position": "MS student;Principal Researcher;Partner Applied Science Manager;Distinguished Scientist;Full Professor", "bibtex": "@misc{\nxia2023speculative,\ntitle={Speculative Decoding: Lossless Speedup of Autoregressive Translation},\nauthor={Heming Xia and Tao Ge and Si-Qing Chen and Furu Wei and Zhifang Sui},\nyear={2023},\nurl={https://openreview.net/forum?id=H-VlwsYvVi}\n}", "github": "", "project": "", "reviewers": "KQwP;GbVT;RP8T;cfnL", "site": "https://openreview.net/forum?id=H-VlwsYvVi", "pdf_size": 745876, "recommendation": "5;5;5;6", "confidence": "4;4;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "90;89;88;184", "wc_strength_and_weaknesses": "203;400;125;252", "wc_clarity_quality_novelty_and_reproducibility": "59;21;262;72", "wc_summary_review": "123;23;50;34", "wc_review": "475;533;525;542", "wc_reply_reviewers": "79;0;0;0", "wc_reply_authors": "709;1156;1279;602", "reply_reviewers": "1;0;0;0", "reply_authors": "2;2;2;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 112.75, 41.14228360215315 ], "wc_strength_and_weaknesses_avg": [ 245.0, 100.29705878040492 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 103.5, 93.40904667107999 ], "wc_summary_review_avg": [ 57.5, 39.01602234979881 ], "wc_review_avg": [ 518.75, 25.96512083545925 ], "wc_reply_reviewers_avg": [ 19.75, 34.208003449485325 ], "wc_reply_authors_avg": [ 936.5, 286.85057085527995 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9566311943061639712&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "Peking University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "http://www.pku.edu.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Peking U;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "China;United States" }, { "title": "MOAT: Alternating Mobile Convolution and Attention Brings Strong Vision Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11503", "id": "H0HGljkxQFN", "poster": "", "openreview": "https://openreview.net/forum?id=H0HGljkxQFN", "slides": "https://iclr.cc/virtual/2023/poster/11503", "video": "https://iclr.cc/virtual/2023/poster/11503", "author_site": "Chenglin Yang, Siyuan Qiao, Qihang Yu, Xiaoding Yuan, Yukun Zhu, Alan Yuille, Hartwig Adam, Liang-Chieh Chen", "tldr": "", "abstract": "This paper presents MOAT, a family of neural networks that build on top of MObile convolution (i.e., inverted residual blocks) and ATtention. Unlike the current works that stack separate mobile convolution and transformer blocks, we effectively merge them into a MOAT block. Starting with a standard Transformer block, we replace its multi-layer perceptron with a mobile convolution block, and further reorder it before the self-attention operation. The mobile convolution block not only enhances the network representation capacity, but also produces better downsampled features. Our conceptually simple MOAT networks are surprisingly effective, achieving 89.1% / 81.5% top-1 accuracy on ImageNet-1K / ImageNet-1K-V2 with ImageNet-22K pretraining. Additionally, MOAT can be seamlessly applied to downstream tasks that require large resolution inputs by simply converting the global attention to window attention. Thanks to the mobile convolution that effectively exchanges local information between pixels (and thus cross-windows), MOAT does not need the extra window-shifting mechanism. As a result, on COCO object detection, MOAT achieves 59.2% AP$^{\\text{box}}$ with 227M model parameters (single-scale inference, and hard NMS), and on ADE20K semantic segmentation, MOAT attains 57.6% mIoU with 496M model parameters (single-scale inference). Finally, the tiny-MOAT family, obtained by simply reducing the channel sizes, also surprisingly outperforms several mobile-specific transformer-based models on ImageNet. The tiny-MOAT family is also benchmarked on downstream tasks, serving as a baseline for the community. We hope our simple yet effective MOAT will inspire more seamless integration of convolution and self-attention. Code is publicly available.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/1bbf96b475a0872946b5e4f02a2fd8b6048f3381.zip", "author": "Chenglin Yang;Siyuan Qiao;Qihang Yu;Xiaoding Yuan;Yukun Zhu;Alan Yuille;Hartwig Adam;Liang-Chieh Chen", "authorids": "~Chenglin_Yang1;~Siyuan_Qiao1;~Qihang_Yu1;~Xiaoding_Yuan1;~Yukun_Zhu1;~Alan_Yuille1;~Hartwig_Adam1;~Liang-Chieh_Chen1", "gender": "M;M;;;M;M;He/him;", "homepage": "https://www.chenglinyang.com/;https://www.cs.jhu.edu/~syqiao/;;https://www.xiaodingyuan.com/;;;https://research.google/people/author37870/;http://liangchiehchen.com/", "dblp": ";43/7562;;275/2126;18/10777;y/AlanLYuille;75/948;138/2443", "google_scholar": "DsumNkgAAAAJ;6Hfk-90AAAAJ;7zZdZxsAAAAJ;p7QTY-cAAAAJ;;;fWd88tEAAAAJ;ACjYGPUAAAAJ", "orcid": ";;;;;;0000-0003-1258-4341;", "linkedin": ";;;;;;hartwig-adam-1873392/;", "or_profile": "~Chenglin_Yang1;~Siyuan_Qiao1;~Qihang_Yu1;~Xiaoding_Yuan1;~Yukun_Zhu1;~Alan_Yuille1;~Hartwig_Adam1;~Liang-Chieh_Chen1", "aff": "Johns Hopkins University;Google;Johns Hopkins University;Johns Hopkins University;Google;Johns Hopkins University;Google Research;Google", "aff_domain": "jhu.edu;google.com;jhu.edu;jhu.edu;google.com;johnshopkins.edu;google.com;google.com", "position": "PhD student;Research Scientist;PhD student;PhD student;SWE;Full Professor;Principal Researcher;Research Scientist", "bibtex": "@inproceedings{\nyang2023moat,\ntitle={{MOAT}: Alternating Mobile Convolution and Attention Brings Strong Vision Models},\nauthor={Chenglin Yang and Siyuan Qiao and Qihang Yu and Xiaoding Yuan and Yukun Zhu and Alan Yuille and Hartwig Adam and Liang-Chieh Chen},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=H0HGljkxQFN}\n}", "github": "", "project": "", "reviewers": "BAq9;W4v5;bFXs;GsU5", "pdf_size": 560299, "recommendation": "6;6;6;6", "confidence": "4;5;4;3", "correctness": "3;4;4;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "31;151;28;42", "wc_strength_and_weaknesses": "360;237;58;35", "wc_clarity_quality_novelty_and_reproducibility": "36;78;73;50", "wc_summary_review": "88;155;87;5", "wc_review": "515;621;246;132", "wc_reply_reviewers": "389;107;11;0", "wc_reply_authors": "3086;1287;756;747", "reply_reviewers": "2;1;1;0", "reply_authors": "6;3;2;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 63.0, 51.07347648241698 ], "wc_strength_and_weaknesses_avg": [ 172.5, 133.54119214684283 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.25, 17.07886120325357 ], "wc_summary_review_avg": [ 83.75, 53.166601358371594 ], "wc_review_avg": [ 378.5, 197.33030684616085 ], "wc_reply_reviewers_avg": [ 126.75, 157.02607267584577 ], "wc_reply_authors_avg": [ 1469.0, 958.8360130908726 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 1.8708286933869707 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 78, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=297116116872847031&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=H0HGljkxQFN", "email": "jhu.edu;google.com;jhu.edu;jhu.edu;google.com;johnshopkins.edu;google.com;google.com", "author_num": 8, "aff_unique_index": "0;1;0;0;1;0;1;1", "aff_unique_norm": "Johns Hopkins University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.jhu.edu;https://www.google.com", "aff_unique_abbr": "JHU;Google", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Diffusion Adversarial Representation Learning for Self-supervised Vessel Segmentation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11481", "id": "H0gdPxSwkPb", "poster": "", "openreview": "https://openreview.net/forum?id=H0gdPxSwkPb", "slides": "https://iclr.cc/virtual/2023/poster/11481", "video": "https://iclr.cc/virtual/2023/poster/11481", "author_site": "Boah Kim, Yujin Oh, Jong Ye", "tldr": "", "abstract": "Vessel segmentation in medical images is one of the important tasks in the diagnosis of vascular diseases and therapy planning. Although learning-based segmentation approaches have been extensively studied, a large amount of ground-truth labels are required in supervised methods and confusing background structures make neural networks hard to segment vessels in an unsupervised manner. To address this, here we introduce a novel diffusion adversarial representation learning (DARL) model that leverages a denoising diffusion probabilistic model with adversarial learning, and apply it to vessel segmentation. In particular, for self-supervised vessel segmentation, DARL learns the background signal using a diffusion module, which lets a generation module effectively provide vessel representations. Also, by adversarial learning based on the proposed switchable spatially-adaptive denormalization, our model estimates synthetic fake vessel images as well as vessel segmentation masks, which further makes the model capture vessel-relevant semantic information. Once the proposed model is trained, the model generates segmentation masks in a single step and can be applied to general vascular structure segmentation of coronary angiography and retinal images. Experimental results on various datasets show that our method significantly outperforms existing unsupervised and self-supervised vessel segmentation methods.", "keywords": "Diffusion model;Adversarial learning;Self-supervised learning;Vessel segmentation", "primary_area": "", "supplementary_material": "/attachment/5672aa9ea2e1ee812adbd9e4725582173eea2177.zip", "author": "Boah Kim;Yujin Oh;Jong Chul Ye", "authorids": "~Boah_Kim1;~Yujin_Oh1;~Jong_Chul_Ye1", "gender": "F;F;M", "homepage": ";;https://bispl.weebly.com/", "dblp": "239/4299;;15/5613", "google_scholar": "1IkNuooAAAAJ;4nnw088AAAAJ;HNMjoNEAAAAJ", "orcid": ";0000-0003-4319-8435;", "linkedin": ";;", "or_profile": "~Boah_Kim1;~Yujin_Oh1;~Jong_Chul_Ye1", "aff": "Korea Advanced Institute of Science & Technology;KAIST;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nkim2023diffusion,\ntitle={Diffusion Adversarial Representation Learning for Self-supervised Vessel Segmentation},\nauthor={Boah Kim and Yujin Oh and Jong Chul Ye},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=H0gdPxSwkPb}\n}", "github": "", "project": "", "reviewers": "yNNg;xqnu;qGAg;dTPf", "pdf_size": 10566030, "recommendation": "6;6;6;6", "confidence": "4;4;4;4", "correctness": "2;3;4;3", "technical_novelty": "3;3;4;3", "empirical_novelty": "3;3;4;3", "wc_summary_paper": "50;169;84;74", "wc_strength_and_weaknesses": "165;402;423;162", "wc_clarity_quality_novelty_and_reproducibility": "10;44;123;75", "wc_summary_review": "17;31;31;29", "wc_review": "242;646;661;340", "wc_reply_reviewers": "0;0;83;0", "wc_reply_authors": "757;1913;1701;644", "reply_reviewers": "0;0;2;0", "reply_authors": "2;5;5;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 94.25, 44.890839822841365 ], "wc_strength_and_weaknesses_avg": [ 288.0, 124.72569903592444 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.0, 41.57523301197481 ], "wc_summary_review_avg": [ 27.0, 5.830951894845301 ], "wc_review_avg": [ 472.25, 184.6081999803909 ], "wc_reply_reviewers_avg": [ 20.75, 35.94005425705421 ], "wc_reply_authors_avg": [ 1253.75, 559.7317996147798 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 85, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10420010746445712898&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=H0gdPxSwkPb", "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Time Will Tell: New Outlooks and A Baseline for Temporal Multi-View 3D Object Detection", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10890", "id": "H3HcEJA2Um", "poster": "", "openreview": "https://openreview.net/forum?id=H3HcEJA2Um", "slides": "https://iclr.cc/virtual/2023/poster/10890", "video": "https://iclr.cc/virtual/2023/poster/10890", "author_site": "Jinhyung Park, Chenfeng Xu, Shijia Yang, Kurt Keutzer, Kris Kitani, Masayoshi Tomizuka, Wei Zhan", "tldr": "We leverage complementary coarse, long-term and fine-grained, short-term multi-view stereo for camera-only 3D object detection.", "abstract": "While recent camera-only 3D detection methods leverage multiple timesteps, the limited history they use significantly hampers the extent to which temporal fusion can improve object perception. Observing that existing works' fusion of multi-frame images are instances of temporal stereo matching, we find that performance is hindered by the interplay between 1) the low granularity of matching resolution and 2) the sub-optimal multi-view setup produced by limited history usage. Our theoretical and empirical analysis demonstrates that the optimal temporal difference between views varies significantly for different pixels and depths, making it necessary to fuse many timesteps over long-term history. Building on our investigation, we propose to generate a cost volume from a long history of image observations, compensating for the coarse but efficient matching resolution with a more optimal multi-view matching setup. Further, we augment the per-frame monocular depth predictions used for long-term, coarse matching with short-term, fine-grained matching and find that long and short term temporal fusion are highly complementary. While maintaining high efficiency, our framework sets new state-of-the-art on nuScenes, achieving first place on the test set and outperforming previous best art by 5.2% mAP and 3.7% NDS on the validation set. Code will be released here: https://github.com/Divadi/SOLOFusion.", "keywords": "Computer Vision;3D Object Detection;Stereo Matching", "primary_area": "", "supplementary_material": "", "author": "Jinhyung Park;Chenfeng Xu;Shijia Yang;Kurt Keutzer;Kris M. Kitani;Masayoshi Tomizuka;Wei Zhan", "authorids": "~Jinhyung_Park1;~Chenfeng_Xu1;~Shijia_Yang1;~Kurt_Keutzer1;~Kris_M._Kitani1;~Masayoshi_Tomizuka1;~Wei_Zhan2", "gender": "M;M;F;M;M;M;", "homepage": "http://jindapark.github.io/;;;https://people.eecs.berkeley.edu/~keutzer/;http://www.cs.cmu.edu/~kkitani/;https://me.berkeley.edu/people/masayoshi-tomizuka/;", "dblp": "213/8792.html;65/1881;;k/KurtKeutzer.html;42/163;10/4434;", "google_scholar": "L3Ea5NIAAAAJ;RpqvaTUAAAAJ;;ID9QePIAAAAJ;yv3sH74AAAAJ;;", "orcid": ";0000-0002-4941-6985;;0000-0003-3868-8501;0000-0002-9389-4060;;", "linkedin": "jinhyun1/;;bronya-shijia-yang-762927193/;kurtkeutzer/;;;", "or_profile": "~Jinhyung_Park1;~Chenfeng_Xu1;~Shijia_Yang1;~Kurt_Keutzer1;~Kris_M._Kitani1;~Masayoshi_Tomizuka1;~Wei_Zhan2", "aff": "Meta;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Carnegie Mellon University;University of California, Berkeley;", "aff_domain": "meta.com;berkeley.edu;berkeley.edu;berkeley.edu;cmu.edu;berkeley.edu;", "position": "Intern;PhD student;Undergrad student;Full Professor;Associate Professor;Full Professor;", "bibtex": "@inproceedings{\npark2023time,\ntitle={Time Will Tell: New Outlooks and A Baseline for Temporal Multi-View 3D Object Detection},\nauthor={Jinhyung Park and Chenfeng Xu and Shijia Yang and Kurt Keutzer and Kris M. Kitani and Masayoshi Tomizuka and Wei Zhan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=H3HcEJA2Um}\n}", "github": "", "project": "", "reviewers": "FyEa;U6zw;Bu5e", "pdf_size": 6084996, "recommendation": "8;8;8", "confidence": "5;4;3", "correctness": "3;4;4", "technical_novelty": "3;3;4", "empirical_novelty": "4;3;4", "wc_summary_paper": "104;150;81", "wc_strength_and_weaknesses": "413;215;87", "wc_clarity_quality_novelty_and_reproducibility": "45;56;48", "wc_summary_review": "132;47;33", "wc_review": "694;468;249", "wc_reply_reviewers": "42;0;0", "wc_reply_authors": "834;346;137", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 111.66666666666667, 28.686039965266882 ], "wc_strength_and_weaknesses_avg": [ 238.33333333333334, 134.10775103956107 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.666666666666664, 4.642796092394707 ], "wc_summary_review_avg": [ 70.66666666666667, 43.74420596553966 ], "wc_review_avg": [ 470.3333333333333, 181.67798129902505 ], "wc_reply_reviewers_avg": [ 14.0, 19.79898987322333 ], "wc_reply_authors_avg": [ 439.0, 292.04908263281135 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 192, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2198577936319456732&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=H3HcEJA2Um", "email": "meta.com;berkeley.edu;berkeley.edu;berkeley.edu;cmu.edu;berkeley.edu;", "author_num": 7, "aff_unique_index": "0;1;1;1;2;1", "aff_unique_norm": "Meta;University of California, Berkeley;Carnegie Mellon University", "aff_unique_dep": "Meta Platforms, Inc.;;", "aff_unique_url": "https://meta.com;https://www.berkeley.edu;https://www.cmu.edu", "aff_unique_abbr": "Meta;UC Berkeley;CMU", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Diminishing Return of Value Expansion Methods in Model-Based Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11586", "id": "H4Ncs5jhTCu", "poster": "/media/PosterPDFs/ICLR%202023/11586.png?t=1682597943.5917115", "openreview": "https://openreview.net/forum?id=H4Ncs5jhTCu", "slides": "https://iclr.cc/virtual/2023/poster/11586", "video": "https://iclr.cc/virtual/2023/poster/11586", "author_site": "Daniel Palenicek, Michael Lutter, Joao Carvalho, Jan Peters", "tldr": "", "abstract": "Model-based reinforcement learning is one approach to increase sample efficiency. However, the accuracy of the dynamics model and the resulting compounding error over modelled trajectories are commonly regarded as key limitations. A natural question to ask is: How much more sample efficiency can be gained by improving the learned dynamics models? Our paper empirically answers this question for the class of model-based value expansion methods in continuous control problems. Value expansion methods should benefit from increased model accuracy by enabling longer rollout horizons and better value function approximations. Our empirical study, which leverages oracle dynamics models to avoid compounding model errors, shows that (1) longer horizons increase sample efficiency, but the gain in improvement decreases with each additional expansion step, and (2) the increased model accuracy only marginally increases the sample efficiency compared to learned models with identical horizons. Therefore, longer horizons and increased model accuracy yield diminishing returns in terms of sample efficiency. These improvements in sample efficiency are particularly disappointing when compared to model-free value expansion methods. Even though they introduce no computational overhead, we find their performance to be on-par with model-based value expansion methods. Therefore, we conclude that the limitation of model-based value expansion methods is not the model accuracy of the learned models. While higher model accuracy is beneficial, our experiments show that even a perfect model will not provide an un-rivaled sample efficiency but that the bottleneck lies elsewhere.", "keywords": "Model-based Reinforcement Learning;Value Expansion", "primary_area": "", "supplementary_material": "/attachment/fa66d1e0accfaa6cc51a34b84780a345758380f9.zip", "author": "Daniel Palenicek;Michael Lutter;Joao Carvalho;Jan Peters", "authorids": "~Daniel_Palenicek1;~Michael_Lutter1;~Joao_Carvalho1;~Jan_Peters3", "gender": "M;M;M;M", "homepage": ";http://mlutter.eu;https://www.ias.informatik.tu-darmstadt.de/Team/JoaoCarvalho;https://www.jan-peters.net", "dblp": "267/9480;;;p/JanPeters1", "google_scholar": "AtX0UHsAAAAJ;https://scholar.google.de/citations?user=Wvdo5bYAAAAJ;xYUxMF0AAAAJ;https://scholar.google.de/citations?user=-kIVAcAAAAAJ", "orcid": "0000-0002-8292-1318;;;0000-0002-5266-8091", "linkedin": "danielpalenicek/;;;janrpeters/", "or_profile": "~Daniel_Palenicek1;~Michael_Lutter1;~Joao_Carvalho1;~Jan_Peters3", "aff": "Technische Universit\u00e4t Darmstadt;Boston Dynamics;TU Darmstadt;TU Darmstadt", "aff_domain": "tu-darmstadt.de;bostondynamics.com;tu-darmstadt.de;tu-darmstadt.de", "position": "PhD student;Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\npalenicek2023diminishing,\ntitle={Diminishing Return of Value Expansion Methods in Model-Based Reinforcement Learning},\nauthor={Daniel Palenicek and Michael Lutter and Joao Carvalho and Jan Peters},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=H4Ncs5jhTCu}\n}", "github": "", "project": "", "reviewers": "kpea;vr7K;Q2WM;5jBe", "pdf_size": 1618912, "recommendation": "6;6;6;8", "confidence": "4;2;4;4", "correctness": "2;3;3;4", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;2;3;4", "wc_summary_paper": "98;105;490;118", "wc_strength_and_weaknesses": "588;217;1021;166", "wc_clarity_quality_novelty_and_reproducibility": "135;18;241;176", "wc_summary_review": "74;10;93;32", "wc_review": "895;350;1845;492", "wc_reply_reviewers": "0;124;199;0", "wc_reply_authors": "346;414;1118;40", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 202.75, 165.99905873227112 ], "wc_strength_and_weaknesses_avg": [ 498.0, 343.0794368655749 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 142.5, 81.21114455541185 ], "wc_summary_review_avg": [ 52.25, 32.89661836724255 ], "wc_review_avg": [ 895.5, 583.5094258021887 ], "wc_reply_reviewers_avg": [ 80.75, 84.9922790611006 ], "wc_reply_authors_avg": [ 479.5, 394.6374918833739 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3150099774999492207&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=H4Ncs5jhTCu", "email": "tu-darmstadt.de;bostondynamics.com;tu-darmstadt.de;tu-darmstadt.de", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt;Boston Dynamics", "aff_unique_dep": ";", "aff_unique_url": "https://www.tu-darmstadt.de;https://www.bostondynamics.com", "aff_unique_abbr": "TUD;BD", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Germany;United States" }, { "id": "H4xO3doonl-", "title": "Optimising Event-Driven Spiking Neural Network with Regularisation and Cutoff", "track": "main", "status": "Withdraw", "tldr": "Two novel optimisation techniques are presented to consider anytime optimal inference SNNs, AOI-SNNs: a regularisation and a cutoff.", "abstract": "Spiking neural networks (SNNs), a variant of artificial neural networks (ANNs) with the benefit of energy efficiency, have achieved the accuracy close to its ANN counterparts, on benchmark datasets such as CIFAR10/100 and ImageNet. However, comparing with frame-based input (e.g., images), event-based inputs from e.g., Dynamic Vision Sensor (DVS) can make a better use of SNNs thanks to the SNNs' asynchronous working mechanism. In this paper, we strengthen the marriage between SNNs and event-based inputs with a proposal to consider anytime optimal inference SNNs, or AOI-SNNs, which can terminate anytime during the inference to achieve optimal inference result. Two novel optimisation techniques are presented to achieve AOI-SNNs: a regularisation and a cutoff. The regularisation enables the training and construction of SNNs with optimised performance, and the cutoff technique optimises the inference of SNNs on event-driven inputs. We conduct an extensive set of experiments on multiple benchmark event-based datasets, including CIFAR10-DVS, N-Caltech101 and DVS128 Gesture. The experimental results demonstrate that our techniques are superior to the state-of-the-art with respect to the accuracy and latency. ", "keywords": "Spiking Neural Network;Event-driven Neural Network;ANN-to-SNN Conversion", "primary_area": "", "supplementary_material": "/attachment/b40f9bd6bb5b2fb0d04f21c7b337e0659175f25b.zip", "author": "Dengyu Wu;Gaojie Jin;Han Yu;Xinping Yi;Xiaowei Huang", "authorids": "~Dengyu_Wu1;~Gaojie_Jin1;~Han_Yu4;~Xinping_Yi1;~Xiaowei_Huang1", "gender": ";M;F;M;M", "homepage": ";https://alexkael.github.io/;;https://sites.google.com/site/xinpingyi00/;https://cgi.csc.liv.ac.uk/~xiaowei/", "dblp": ";276/5476;;95/10043.html;60/5414-1.html", "google_scholar": ";n_cu7jwAAAAJ;;wAcbI5kAAAAJ;https://scholar.google.co.uk/citations?user=X4fLCCIAAAAJ", "orcid": ";;0000-0002-8489-2266;;", "linkedin": ";;;;", "or_profile": "~Dengyu_Wu1;~Gaojie_Jin1;~Han_Yu4;~Xinping_Yi1;~Xiaowei_Huang1", "aff": ";University of Liverpool;University of Liverpool;University of Liverpool;University of Liverpool", "aff_domain": ";liverpool.ac.uk;liverpool.ac.uk;liverpool.ac.uk;liverpool.ac.uk", "position": ";PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nwu2023optimising,\ntitle={Optimising Event-Driven Spiking Neural Network with Regularisation and Cutoff},\nauthor={Dengyu Wu and Gaojie Jin and Han Yu and Xinping Yi and Xiaowei Huang},\nyear={2023},\nurl={https://openreview.net/forum?id=H4xO3doonl-}\n}", "github": "", "project": "", "reviewers": "ozZz;Tbtq;zEep;5NBM;uret", "site": "https://openreview.net/forum?id=H4xO3doonl-", "pdf_size": 2258935, "recommendation": "3;5;6;6;6", "confidence": "4;5;2;4;4", "correctness": "2;2;3;3;3", "technical_novelty": "1;2;3;3;3", "empirical_novelty": "2;2;3;3;3", "wc_summary_paper": "44;51;112;40;97", "wc_strength_and_weaknesses": "188;183;81;257;168", "wc_clarity_quality_novelty_and_reproducibility": "17;51;45;15;15", "wc_summary_review": "32;21;50;10;41", "wc_review": "281;306;288;322;321", "wc_reply_reviewers": "344;102;0;244;0", "wc_reply_authors": "1702;1044;433;1215;247", "reply_reviewers": "1;1;0;2;0", "reply_authors": "4;2;1;3;1", "recommendation_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "correctness_avg": [ 2.6, 0.4898979485566356 ], "technical_novelty_avg": [ 2.4, 0.8 ], "empirical_novelty_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_paper_avg": [ 68.8, 29.741553422778708 ], "wc_strength_and_weaknesses_avg": [ 175.4, 56.28712108466732 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.6, 15.969971822141703 ], "wc_summary_review_avg": [ 30.8, 14.161920773680384 ], "wc_review_avg": [ 303.6, 16.74037036627326 ], "wc_reply_reviewers_avg": [ 138.0, 136.42287198267013 ], "wc_reply_authors_avg": [ 928.2, 529.8465438218881 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.2, 1.16619037896906 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.31506301890630223, "corr_recommendation_correctness": 0.840168050416806, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Liverpool", "aff_unique_dep": "", "aff_unique_url": "https://www.liverpool.ac.uk", "aff_unique_abbr": "Liv Uni", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "H6LVUiHzYDE", "title": "MEGAN: Multi Explanation Graph Attention Network", "track": "main", "status": "Reject", "tldr": "Novel, self-explaining graph attention network features multiple explanation channels independent of task specifications to improve interpretability of graph regression and classification problems", "abstract": "Explainable artificial intelligence (XAI) methods are expected to improve trust during human-AI interactions, provide tools for model analysis and extend human understanding of complex problems. Attention-based models are an important subclass of XAI methods, partly due to their full differentiability and the potential to improve explanations by means of explanation-supervised training. We propose the novel multi-explanation graph attention network (MEGAN). Our graph regression and classification model features multiple explanation channels, which can be chosen independently of the task specifications. We first validate our model on a synthetic graph regression dataset, where our model produces single-channel explanations with quality similar to GNNExplainer. Furthermore, we demonstrate the advantages of multi-channel explanations on one synthetic and two real-world datasets: The prediction of water solubility of molecular graphs and sentiment classification of movie reviews. We find that our model produces explanations consistent with human intuition, opening the way to learning from our model in less well-understood tasks.", "keywords": "explainable artificial intelligence;interpretable machine learning;graph neural networks;attention network;graph regression;graph classification", "primary_area": "", "supplementary_material": "", "author": "Jonas Teufel;Luca Torresi;Patrick Nicholas Reiser;Pascal Friederich", "authorids": "~Jonas_Teufel1;~Luca_Torresi1;~Patrick_Nicholas_Reiser1;~Pascal_Friederich1", "gender": "M;M;M;M", "homepage": "https://blog.electronic-heart.com/home/;;;https://aimat.science", "dblp": ";327/3364;284/9584;182/0165", "google_scholar": "https://scholar.google.com/citations?hl=en;8hKjRuQAAAAJ;by0iT6IAAAAJ;3B5h6u0AAAAJ", "orcid": ";0000-0003-2205-6753;;0000-0003-4465-1465", "linkedin": "jonas-teufel/;luca7520b616b;;pascal-friederich-6088b9117/", "or_profile": "~Jonas_Teufel1;~Luca_Torresi1;~Patrick_Nicholas_Reiser1;~Pascal_Friederich1", "aff": ";Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie", "aff_domain": ";kit.edu;kit.edu;kit.edu", "position": ";PhD student;Postdoc;Assistant Professor", "bibtex": "@misc{\nteufel2023megan,\ntitle={{MEGAN}: Multi Explanation Graph Attention Network},\nauthor={Jonas Teufel and Luca Torresi and Patrick Nicholas Reiser and Pascal Friederich},\nyear={2023},\nurl={https://openreview.net/forum?id=H6LVUiHzYDE}\n}", "github": "", "project": "", "reviewers": "8m6Y;Bi9b;NxCN;EFXj", "site": "https://openreview.net/forum?id=H6LVUiHzYDE", "pdf_size": 5053332, "recommendation": "3;3;3;6", "confidence": "3;3;3;3", "correctness": "3;3;3;3", "technical_novelty": "2;2;2;4", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "122;37;163;108", "wc_strength_and_weaknesses": "299;152;83;365", "wc_clarity_quality_novelty_and_reproducibility": "61;9;43;41", "wc_summary_review": "37;152;48;76", "wc_review": "519;350;337;590", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "854;1207;778;708", "reply_reviewers": "0;0;0;0", "reply_authors": "3;3;2;2", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 107.5, 45.445021729558015 ], "wc_strength_and_weaknesses_avg": [ 224.75, 112.43748262923712 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.5, 18.728320800328042 ], "wc_summary_review_avg": [ 78.25, 44.890839822841365 ], "wc_review_avg": [ 449.0, 108.54261835795191 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 886.75, 191.97053810415807 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6255112479552764610&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0", "aff_unique_norm": "Karlsruher Institut f\u00fcr Technologie", "aff_unique_dep": "", "aff_unique_url": "https://www.kit.edu", "aff_unique_abbr": "KIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "H6T7AAoTUsR", "title": "Towards Realtime Distributed Virtual Flow Meter via Compressed Continual Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "A robust-accurate estimation of fluid flow is the main building block of a distributed virtual flow meter. Unfortunately, a big leap in algorithm development would be required for this objective to come to fruition, mainly due to the inability of current machine learning algorithms to make predictions outside the training data distribution. To improve predictions outside the training distribution, we explore the Continual Learning (CL) paradigm for accurately estimating the characteristics of fluid flow in pipelines. A significant challenge facing CL is the concept of catastrophic forgetting. In this paper, we provide a novel approach of how to address the forgetting problem via compressing the distributed sensor data to increase the capacity of CL memory bank using a compressive learning algorithm. Through extensive experiments, we show that our approach provides around 8% accuracy improvement compared to other CL algorithms in the real-field distributed sensor dataset. Noticeable accuracy improvement is also achieved when using our proposed approach with the CL-benchmark datasets, achieving state-of-the-art accuracies of 94.95% and 77.27% for the MNIST and CIFAR-10 datasets, respectively.", "keywords": "continual learning;distributed sensor;compressed learning", "primary_area": "", "supplementary_material": "", "author": "Hasan Asy'ari Arief;Peter J Thomas;Kevin Constable;Aggelos Katsaggelos", "authorids": "~Hasan_Asy'ari_Arief1;~Peter_J_Thomas1;keco@equinor.com;~Aggelos_Katsaggelos1", "gender": "M;M;;M", "homepage": "https://www.linkedin.com/in/hasanasyariarief/;https://www.researchgate.net/profile/Peter_Thomas50;;http://www.mccormick.northwestern.edu/research-faculty/directory/profiles/katsaggelos-aggelos.html", "dblp": ";;;k/AggelosKAggelos", "google_scholar": ";;;https://scholar.google.com.tw/citations?user=aucB85kAAAAJ", "orcid": ";;;0000-0003-4554-0070", "linkedin": ";;;", "or_profile": "~Hasan_Asy'ari_Arief1;~Peter_J_Thomas1;keco@equinor.com;~Aggelos_Katsaggelos1", "aff": "NORCE Research Centre AS;NORCE;;Northwestern University", "aff_domain": "norceresearch.no;norceresearch.no;;northwestern.edu", "position": "Postdoc;Principal Researcher;;Full Professor", "bibtex": "@misc{\narief2023towards,\ntitle={Towards Realtime Distributed Virtual Flow Meter via Compressed Continual Learning},\nauthor={Hasan Asy'ari Arief and Peter J Thomas and Kevin Constable and Aggelos Katsaggelos},\nyear={2023},\nurl={https://openreview.net/forum?id=H6T7AAoTUsR}\n}", "github": "", "project": "", "reviewers": "K9gH;Am5c;2L1q", "site": "https://openreview.net/forum?id=H6T7AAoTUsR", "pdf_size": 3693060, "recommendation": "3;3;5", "confidence": "3;3;3", "correctness": "2;3;2", "technical_novelty": "1;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "108;33;106", "wc_strength_and_weaknesses": "140;128;169", "wc_clarity_quality_novelty_and_reproducibility": "36;46;90", "wc_summary_review": "37;56;320", "wc_review": "321;263;685", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 82.33333333333333, 34.89348872720462 ], "wc_strength_and_weaknesses_avg": [ 145.66666666666666, 17.21110752456745 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.333333333333336, 23.456816114345575 ], "wc_summary_review_avg": [ 137.66666666666666, 129.16225798927832 ], "wc_review_avg": [ 423.0, 186.76901955802697 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DTPmiOxrjQIJ:scholar.google.com/&scioq=Towards+Realtime+Distributed+Virtual+Flow+Meter+via+Compressed+Continual+Learning&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "NORCE Research Centre;NORCE Norwegian Research Centre;Northwestern University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.norce.no;https://www.norce.no;https://www.northwestern.edu", "aff_unique_abbr": "NORCE;NORCE;NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Norway;United States" }, { "id": "H71l8_zALJ", "title": "ProSampler: Improving Contrastive Learning by Better Mini-batch Sampling", "track": "main", "status": "Reject", "tldr": "", "abstract": "In-batch contrastive learning has emerged as a state-of-the-art self-supervised learning solution, with the philosophy of bringing semantically similar instances closer while pushing dissimilar instances apart within a mini-batch. However, the in-batch negative sharing strategy is limited by the batch size and falls short of prioritizing the informative negatives (i.e., hard negatives) globally. In this paper, we propose to sample mini-batches with hard negatives on a proximity graph in which the instances (nodes) are connected according to the similarity measurement. Sampling on the proximity graph can better exploit the hard negatives globally by bridging in similar instances from the entire dataset. The proposed method can flexibly explore the negatives by modulating two parameters, and we show that such flexibility is the key to better exploit hard negative globally. We evaluate the proposed method on three representative contrastive learning algorithms, each of which corresponds to one modality: image, text, and graph. Besides, we also apply it to the variants of the InfoNCE objective to verify its generality. The results show that our method can consistently boost the performance of contrastive methods, with a relative improvement of 2.5% for SimCLR on ImageNet-100, 1.4% for SimCSE on the standard STS task, and 1.2% for GraphCL on the COLLAB dataset.", "keywords": "global hard negative sampling;contrastive learning", "primary_area": "", "supplementary_material": "/attachment/d4149aed6df9ed8088b6ffa5d5a42e7d83c8808f.zip", "author": "Zhen Yang;Tinglin Huang;Ming Ding;Zhitao Ying;Yukuo Cen;Yangliao Geng;Yuxiao Dong;Jie Tang", "authorids": "~Zhen_Yang9;~Tinglin_Huang1;~Ming_Ding1;~Zhitao_Ying1;~Yukuo_Cen1;~Yangliao_Geng1;~Yuxiao_Dong1;~Jie_Tang1", "gender": "F;M;M;M;M;M;M;", "homepage": "https://zyang-16.github.io/zhenyang.github.io/;https://huangtinglin.github.io/;;https://www.cs.yale.edu/homes/ying-rex;https://www.cenyk1230.top/;;https://keg.cs.tsinghua.edu.cn/yuxiao/;", "dblp": ";;48/3462-4;209/4936;241/4994;190/7083.html;17/9267;", "google_scholar": "zPVItDgAAAAJ;izW2ygYAAAAJ;Va50YzkAAAAJ;6fqNXooAAAAJ;LCjW058AAAAJ;https://scholar.google.com.hk/citations?user=gA0xam0AAAAJ;https://scholar.google.com.hk/citations?hl=en;", "orcid": ";0009-0005-5644-4879;;;0000-0001-5682-2810;0000-0002-0084-5164;0000-0002-6092-2002;", "linkedin": ";;;rex-ying-92770148/;;;;", "or_profile": "~Zhen_Yang9;~Tinglin_Huang1;~Ming_Ding1;~Zhitao_Ying1;~Yukuo_Cen1;~Yangliao_Geng1;~Yuxiao_Dong1;~Jie_Tang1", "aff": "Tsinghua University;Yale University;Tsinghua University;Yale University;Tsinghua University;Tsinghua University;Tsinghua University;", "aff_domain": "tsinghua.edu.cn;yale.edu;tsinghua.edu.cn;yale.edu;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;", "position": "PhD student;PhD student;PhD student;Assistant Professor;PhD student;Postdoc;Associate Professor;", "bibtex": "@misc{\nyang2023prosampler,\ntitle={ProSampler: Improving Contrastive Learning by Better Mini-batch Sampling},\nauthor={Zhen Yang and Tinglin Huang and Ming Ding and Zhitao Ying and Yukuo Cen and Yangliao Geng and Yuxiao Dong and Jie Tang},\nyear={2023},\nurl={https://openreview.net/forum?id=H71l8_zALJ}\n}", "github": "", "project": "", "reviewers": "4epo;FNA5;nXV4;uJHq", "site": "https://openreview.net/forum?id=H71l8_zALJ", "pdf_size": 20323891, "recommendation": "3;3;6;8", "confidence": "4;5;4;5", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "83;58;74;130", "wc_strength_and_weaknesses": "121;305;166;204", "wc_clarity_quality_novelty_and_reproducibility": "30;7;28;200", "wc_summary_review": "37;31;61;254", "wc_review": "271;401;329;788", "wc_reply_reviewers": "101;96;0;0", "wc_reply_authors": "971;1480;584;410", "reply_reviewers": "1;1;0;0", "reply_authors": "3;4;1;2", "recommendation_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 86.25, 26.799020504488592 ], "wc_strength_and_weaknesses_avg": [ 199.0, 67.88593374182902 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.25, 77.74437278671685 ], "wc_summary_review_avg": [ 95.75, 92.05263440010829 ], "wc_review_avg": [ 447.25, 202.04996288047172 ], "wc_reply_reviewers_avg": [ 49.25, 49.281715676303314 ], "wc_reply_authors_avg": [ 861.25, 410.9108023646981 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.23570226039551587, "corr_recommendation_correctness": 0.8164965809277261, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zo1bwTh-_tAJ:scholar.google.com/&scioq=ProSampler:+Improving+Contrastive+Learning+by+Better+Mini-batch+Sampling&hl=en&as_sdt=0,38", "gs_version_total": 0, "aff_unique_index": "0;1;0;1;0;0;0", "aff_unique_norm": "Tsinghua University;Yale University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.yale.edu", "aff_unique_abbr": "THU;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "id": "H73xwqPfW2f", "title": "Multitask Reinforcement Learning by Optimizing Neural Pathways", "track": "main", "status": "Reject", "tldr": "Proposing a novel multitask learning framework using task-specific neural pathways for online and offline reinforcement learning.", "abstract": "Reinforcement learning (RL) algorithms have achieved great success in learning specific tasks, as evidenced by examples such as AlphaGo or fusion control. However, it is still difficult for an RL agent to learn how to solve multiple tasks. In this paper, we propose a novel multitask learning framework, in which multiple specialized pathways through a single network are trained simultaneously, with each pathway focusing on a single task. We show that this approach achieves competitive performance with existing multitask RL methods, while using only 5% of the number of neurons per task. We demonstrate empirically the success of our approach on several continuous control tasks, in both online and offline training.", "keywords": "Multitask Learning;Online Reinforcement Learning;Offline Reinforcement Learning;Neural Pathways", "primary_area": "", "supplementary_material": "", "author": "Samin Yeasar Arnob;Riyasat Ohib;Amy Zhang;Sergey Plis;Doina Precup", "authorids": "~Samin_Yeasar_Arnob1;~Riyasat_Ohib1;~Amy_Zhang1;~Sergey_Plis1;~Doina_Precup1", "gender": "M;M;M;F;F", "homepage": "https://www.linkedin.com/in/samin-yeasar-arnob/;https://www.riyasatohib.com/;;http://cs.mcgill.ca/~dprecup/;", "dblp": ";;07/227;p/DoinaPrecup;43/2754", "google_scholar": "RMPv4RQAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=j54VcVEAAAAJ;", "orcid": ";;0000-0003-0040-0365;;", "linkedin": ";;sergeyplis/;;", "or_profile": "~Samin_Yeasar_Arnob1;~Riyasat_Ohib1;~Sergey_Plis1;~Doina_Precup1;~Amy_Zhang2", "aff": "McGill University;Georgia Institute of Technology;Georgia State University;McGill University;Meta Facebook", "aff_domain": "mcgill.ca;gatech.edu;gsu.edu;mcgill.ca;facebook.com", "position": "PhD student;PhD student;Associate Professor;Associate Professor;Research Scientist", "bibtex": "@misc{\narnob2023multitask,\ntitle={Multitask Reinforcement Learning by Optimizing Neural Pathways},\nauthor={Samin Yeasar Arnob and Riyasat Ohib and Amy Zhang and Sergey Plis and Doina Precup},\nyear={2023},\nurl={https://openreview.net/forum?id=H73xwqPfW2f}\n}", "github": "", "project": "", "reviewers": "rc1X;Q8xY;6eEZ;jX9C", "site": "https://openreview.net/forum?id=H73xwqPfW2f", "pdf_size": 2570568, "recommendation": "3;3;5;6", "confidence": "4;3;2;4", "correctness": "3;2;2;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "69;52;64;109", "wc_strength_and_weaknesses": "239;227;180;264", "wc_clarity_quality_novelty_and_reproducibility": "70;58;119;388", "wc_summary_review": "31;39;28;68", "wc_review": "409;376;391;829", "wc_reply_reviewers": "0;0;0;343", "wc_reply_authors": "1129;920;1374;1520", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;2;3", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 73.5, 21.406774628607646 ], "wc_strength_and_weaknesses_avg": [ 227.5, 30.5 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 158.75, 134.31562641777762 ], "wc_summary_review_avg": [ 41.5, 15.819292019556375 ], "wc_review_avg": [ 501.25, 189.58688641359137 ], "wc_reply_reviewers_avg": [ 85.75, 148.52335674903122 ], "wc_reply_authors_avg": [ 1235.75, 229.67626673211143 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.058025885318565944, "corr_recommendation_correctness": 0.19245008972987526, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Hda3tu4MXdwJ:scholar.google.com/&scioq=Multitask+Reinforcement+Learning+by+Optimizing+Neural+Pathways&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "McGill University;Georgia Institute of Technology;Georgia State University;Meta", "aff_unique_dep": ";;;Meta Platforms, Inc.", "aff_unique_url": "https://www.mcgill.ca;https://www.gatech.edu;https://www.gsu.edu;https://meta.com", "aff_unique_abbr": "McGill;Georgia Tech;GSU;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "Canada;United States" }, { "title": "Progressive Mix-Up for Few-Shot Supervised Multi-Source Domain Transfer", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10931", "id": "H7M_5K5qKJV", "poster": "/media/PosterPDFs/ICLR%202023/10931.png?t=1682930697.0070555", "openreview": "https://openreview.net/forum?id=H7M_5K5qKJV", "slides": "https://iclr.cc/virtual/2023/poster/10931", "video": "https://iclr.cc/virtual/2023/poster/10931", "author_site": "Ronghang Zhu, Ronghang Zhu, Xiang Yu, Sheng Li", "tldr": "", "abstract": "This paper targets at a new and challenging setting of knowledge transfer from multiple source domains to a single target domain, where target data is few shot or even one shot with label. Traditional domain generalization or adaptation methods cannot directly work since there is no sufficient target domain distribution serving as the transfer object. The multi-source setting further prevents the transfer task as excessive domain gap introduced from all the source domains. To tackle this problem, we newly propose a progressive mix-up (P-Mixup) mechanism to introduce an intermediate mix-up domain, pushing both the source domains and the few-shot target domain aligned to this mix-up domain. Further by enforcing the mix-up domain to progressively move towards the source domains, we achieve the domain transfer from multi-source domains to the single one-shot target domain. Our P-Mixup is different from traditional mix-up that ours is with a progressive and adaptive mix-up ratio, following the curriculum learning spirit to better align the source and target domains. Moreover, our P-Mixup combines both pixel-level and feature-level mix-up to better enrich the data diversity. Experiments on two benchmarks show that our P-Mixup significantly outperforms the state-of-the-art methods, i.e., 6.0\\% and 6.8\\% improvements on Office-Home and DomainNet.", "keywords": "Representation Learning;Domain Adaptation", "primary_area": "", "supplementary_material": "", "author": "Ronghang Zhu;Ronghang Zhu;Xiang Yu;Sheng Li", "authorids": "~Ronghang_Zhu2;ronghangzhu@foxmail.com;~Xiang_Yu1;~Sheng_Li3", "gender": ";;M;M", "homepage": ";;https://sites.google.com/site/xiangyurutgers/;http://sheng-li.org", "dblp": ";;19/2453-2.html;23/3439-1", "google_scholar": ";;QJbtEKMAAAAJ;DEncVcYAAAAJ", "orcid": ";;;0000-0003-1205-8632", "linkedin": ";;;sheng-li-15a70022/", "or_profile": "~Ronghang_Zhu2;ronghangzhu@foxmail.com;~Xiang_Yu1;~Sheng_Li3", "aff": ";;Amazon;University of Virginia, Charlottesville", "aff_domain": ";;amazon.com;virginia.edu", "position": ";;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nzhu2023progressive,\ntitle={Progressive Mix-Up for Few-Shot Supervised Multi-Source Domain Transfer},\nauthor={Ronghang Zhu and Ronghang Zhu and Xiang Yu and Sheng Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=H7M_5K5qKJV}\n}", "github": "", "project": "", "reviewers": "dGEs;Fa3w;6uuP;DEHW", "pdf_size": 1174919, "recommendation": "6;6;6;8", "confidence": "3;3;4;4", "correctness": "3;3;3;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "52;54;94;61", "wc_strength_and_weaknesses": "154;211;293;226", "wc_clarity_quality_novelty_and_reproducibility": "4;24;35;38", "wc_summary_review": "20;41;27;31", "wc_review": "230;330;449;356", "wc_reply_reviewers": "51;38;483;149", "wc_reply_authors": "882;968;2314;949", "reply_reviewers": "1;1;3;1", "reply_authors": "3;3;5;3", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 65.25, 16.931848688197046 ], "wc_strength_and_weaknesses_avg": [ 221.0, 49.49242366261729 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.25, 13.329947486768281 ], "wc_summary_review_avg": [ 29.75, 7.595228765481656 ], "wc_review_avg": [ 341.25, 77.99158608465403 ], "wc_reply_reviewers_avg": [ 180.25, 179.982464423621 ], "wc_reply_authors_avg": [ 1278.25, 598.8432077764596 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.5, 0.8660254037844386 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13477924275390242382&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=H7M_5K5qKJV", "email": ";;amazon.com;virginia.edu", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Amazon;University of Virginia", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.virginia.edu", "aff_unique_abbr": "Amazon;UVA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Charlottesville", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "H8XpqEkbua_", "title": "Differentially Private Dataset Condensation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent work in ICML'22 builds a theoretical connection between dataset condensation (DC) and differential privacy (DP) and claims that DC can provide privacy protection for free. However, the connection is problematic because of two controversial assumptions. In this paper, we revisit the ICML'22 work and elucidate the issues in the two controversial assumptions. To correctly connect DC and DP, we propose two differentially private dataset condensation (DPDC) algorithms---LDPDC and NDPDC. Through extensive evaluations on multiple datasets, we demonstrate that LDPDC has comparable performance to recent DP generative methods despite its simplicity. NDPDC provides acceptable DP guarantees with a mild utility loss, compared to the state-of-the-art DC method. Additionally, NDPDC allows a flexible trade-off between the synthetic data utility and DP budget.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/1d1c1bb082e77b047af6ed3e466179f14c96588f.zip", "author": "Tianhang Zheng;Baochun Li", "authorids": "~Tianhang_Zheng2;~Baochun_Li1", "gender": "M;M", "homepage": ";http://iqua.ece.toronto.edu/bli/", "dblp": "212/1269.html;l/BaochunLi", "google_scholar": ";https://scholar.google.com.tw/citations?user=rkb3_FgAAAAJ", "orcid": ";0000-0003-2404-0974", "linkedin": ";https://linkedin.com/in/baochun", "or_profile": "~Tianhang_Zheng2;~Baochun_Li1", "aff": "University of Toronto;University of Toronto", "aff_domain": "utoronto.ca;toronto.edu", "position": "Student;Full Professor", "bibtex": "@misc{\nzheng2023differentially,\ntitle={Differentially Private Dataset Condensation},\nauthor={Tianhang Zheng and Baochun Li},\nyear={2023},\nurl={https://openreview.net/forum?id=H8XpqEkbua_}\n}", "github": "", "project": "", "reviewers": "Spfc;kpgg;gV1f", "site": "https://openreview.net/forum?id=H8XpqEkbua_", "pdf_size": 2122128, "recommendation": "6;6;6", "confidence": "4;3;2", "correctness": "4;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;2", "wc_summary_paper": "51;43;25", "wc_strength_and_weaknesses": "306;100;20", "wc_clarity_quality_novelty_and_reproducibility": "5;5;171", "wc_summary_review": "32;20;19", "wc_review": "394;168;235", "wc_reply_reviewers": "151;0;83", "wc_reply_authors": "2938;446;1287", "reply_reviewers": "1;0;1", "reply_authors": "8;1;4", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 39.666666666666664, 10.873004286866726 ], "wc_strength_and_weaknesses_avg": [ 142.0, 120.47683041426126 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.333333333333336, 78.25315045131126 ], "wc_summary_review_avg": [ 23.666666666666668, 5.90668171555645 ], "wc_review_avg": [ 265.6666666666667, 94.77810342525792 ], "wc_reply_reviewers_avg": [ 78.0, 61.746794788609606 ], "wc_reply_authors_avg": [ 1557.0, 1035.1138423703292 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 4.333333333333333, 2.8674417556808756 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4765801274568469822&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "H9LxwdiXlh", "title": "Additive Poisson Process: Learning Intensity of Higher-Order Interaction in Poisson Processes", "track": "main", "status": "Reject", "tldr": "An efficient technique that uses a log-linear model on a partial order structure to approximate a high-dimensional intensity functions in a Poisson Process.", "abstract": "We present the Additive Poisson Process (APP), a novel framework that can model the higher-order interaction effects of the intensity functions in Poisson processes using projections into lower-dimensional space. Our model combines the techniques in information geometry to model higher-order interactions on a statistical manifold and in generalized additive models to use lower-dimensional projections to overcome the effects from the curse of dimensionality. Our approach solves a convex optimization problem by minimizing the KL divergence from a sample distribution in lower-dimensional projections to the distribution modeled by an intensity function in the Poisson process. Our empirical results show that our model is able to use samples observed in the lower dimensional space to estimate the higher-order intensity function with extremely sparse observations.", "keywords": "Poisson Process;Log-Linear Model;Energy-Based Model;Generalized Additive Models;Information Geometry", "primary_area": "", "supplementary_material": "/attachment/0042eee12987c73f1d28d6dc96e833080de7ed6a.zip", "author": "Simon Luo;Feng Zhou;lamiae azizi;Mahito Sugiyama", "authorids": "~Simon_Luo1;~Feng_Zhou9;~lamiae_azizi1;~Mahito_Sugiyama1", "gender": ";;;M", "homepage": ";;https://www.maths.usyd.edu.au/u/CMIG/;https://mahito.nii.ac.jp/", "dblp": "199/2628;;;05/8421", "google_scholar": ";;https://scholar.google.fr/citations?user=fThAv-cAAAAJ;qLlRvTkAAAAJ", "orcid": ";;0000-0001-9894-2618;0000-0001-5907-9831", "linkedin": ";;;", "or_profile": "~Simon_Luo1;~Feng_Zhou9;~lamiae_azizi1;~Mahito_Sugiyama1", "aff": ";;;National Institute of Informatics", "aff_domain": ";;;nii.ac.jp", "position": ";;;Associate Professor", "bibtex": "@misc{\nluo2023additive,\ntitle={Additive Poisson Process: Learning Intensity of Higher-Order Interaction in Poisson Processes},\nauthor={Simon Luo and Feng Zhou and lamiae azizi and Mahito Sugiyama},\nyear={2023},\nurl={https://openreview.net/forum?id=H9LxwdiXlh}\n}", "github": "", "project": "", "reviewers": "x4an;8qDw;Cwen;2omy", "site": "https://openreview.net/forum?id=H9LxwdiXlh", "pdf_size": 5013173, "recommendation": "3;3;3;6", "confidence": "4;3;4;3", "correctness": "4;2;3;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "54;97;86;70", "wc_strength_and_weaknesses": "93;738;346;220", "wc_clarity_quality_novelty_and_reproducibility": "224;341;9;52", "wc_summary_review": "25;18;22;38", "wc_review": "396;1194;463;380", "wc_reply_reviewers": "0;0;17;0", "wc_reply_authors": "508;1278;685;500", "reply_reviewers": "0;0;1;0", "reply_authors": "1;2;2;1", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 76.75, 16.269219403523945 ], "wc_strength_and_weaknesses_avg": [ 349.25, 241.6126807516526 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 156.5, 133.48501788590357 ], "wc_summary_review_avg": [ 25.75, 7.495832175282475 ], "wc_review_avg": [ 608.25, 339.6132911121118 ], "wc_reply_reviewers_avg": [ 4.25, 7.361215932167728 ], "wc_reply_authors_avg": [ 742.75, 317.7509834760547 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9347708017325503881&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "aff_unique_index": "0", "aff_unique_norm": "National Institute of Informatics", "aff_unique_dep": "", "aff_unique_url": "https://www.nii.ac.jp/", "aff_unique_abbr": "NII", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "id": "HB2HBIQKhp-", "title": "Illusory Adversarial Attacks on Sequential Decision-Makers and Countermeasures", "track": "main", "status": "Reject", "tldr": "We present illusory attacks on sequential decision-makers, which are undetectable.", "abstract": "Autonomous decision-making agents deployed in the real world need to be robust against possible adversarial attacks on sensory inputs. Existing work on adversarial attacks focuses on the notion of perceptual invariance popular in computer vision. We observe that such attacks can often be detected by victim agents, since they result in action-observation sequences that are not consistent with the dynamics of the environment. Furthermore, real-world agents, such as physical robots, commonly operate under human supervisors who are not susceptible to such attacks. We propose to instead focus on attacks that are statistically undetectable. Specifically, we propose illusory attacks, a novel class of adversarial attack that is consistent with the environment dynamics. We introduce a novel algorithm that can learn illusory attacks end-to-end. We empirically verify that our algorithm generates attacks that, in contrast to current methods, are undetectable to both AI\nagents with an environment dynamics model, as well as to humans. Furthermore, we show that existing robustification approaches are relatively ineffective against illusory attacks. Our findings highlight the need to ensure that real-world AI, and human-AI, systems are designed to make it difficult to corrupt sensory observations in ways that are consistent with the environment dynamics.", "keywords": "reinforcement learning;adversarial attacks", "primary_area": "", "supplementary_material": "/attachment/0262026cd0a29e33e508bc65ec2d33c0d889ad70.zip", "author": "Tim Franzmeyer;Stephen Marcus McAleer;Joao F. Henriques;Philip Torr;Jakob Nicolaus Foerster;Adel Bibi;Christian Schroeder de Witt", "authorids": "~Tim_Franzmeyer1;~Stephen_Marcus_McAleer1;~Joao_F._Henriques1;~Philip_Torr1;~Jakob_Nicolaus_Foerster1;~Adel_Bibi1;~Christian_Schroeder_de_Witt1", "gender": ";M;M;;M;M;M", "homepage": "https://www.robots.ox.ac.uk/~frtim/;https://www.andrew.cmu.edu/user/smcaleer/;http://www.robots.ox.ac.uk/~joao/;http://www.robots.ox.ac.uk/~tvg/;https://www.jakobfoerster.com;http://adelbibi.com;https://www.schroederdewitt.com", "dblp": "298/1117;;31/8617.html;;176/5095;176/0964;", "google_scholar": "Jvv1rkkAAAAJ;iEFL4-YAAAAJ;aCQjyp0AAAAJ;;6z4lQzMAAAAJ;Q4j2laYAAAAJ;DE60h_0AAAAJ", "orcid": ";;;;;0000-0002-6169-3918;", "linkedin": "tim-franzmeyer-370257110/;stephen-mcaleer/;;;;adel-bibi-ba3671ab/;", "or_profile": "~Tim_Franzmeyer1;~Stephen_Marcus_McAleer1;~Joao_F._Henriques1;~Philip_Torr1;~Jakob_Nicolaus_Foerster1;~Adel_Bibi1;~Christian_Schroeder_de_Witt1", "aff": "University of Oxford;Carnegie Mellon University;University of Oxford;University of Oxford;University of Oxford, University of Oxford;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;cmu.edu;ox.ac.uk;ox.ac.uk;eng.ox.ac.uk;ox.ac.uk;oxford.ac.uk", "position": "PhD student;Postdoc;Principal Researcher;Full Professor;Associate Professor;Senior Research Associate;Postdoc", "bibtex": "@misc{\nfranzmeyer2023illusory,\ntitle={Illusory Adversarial Attacks on Sequential Decision-Makers and Countermeasures},\nauthor={Tim Franzmeyer and Stephen Marcus McAleer and Joao F. Henriques and Philip Torr and Jakob Nicolaus Foerster and Adel Bibi and Christian Schroeder de Witt},\nyear={2023},\nurl={https://openreview.net/forum?id=HB2HBIQKhp-}\n}", "github": "", "project": "", "reviewers": "G6kZ;58cU;YTMM;pRsS", "site": "https://openreview.net/forum?id=HB2HBIQKhp-", "pdf_size": 767650, "recommendation": "5;5;5;6", "confidence": "4;4;4;4", "correctness": "2;3;2;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "86;239;67;115", "wc_strength_and_weaknesses": "357;422;688;501", "wc_clarity_quality_novelty_and_reproducibility": "18;77;97;186", "wc_summary_review": "56;50;115;60", "wc_review": "517;788;967;862", "wc_reply_reviewers": "586;0;0;20", "wc_reply_authors": "1399;1178;1140;1066", "reply_reviewers": "2;0;0;1", "reply_authors": "5;4;3;2", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 126.75, 67.02378309227255 ], "wc_strength_and_weaknesses_avg": [ 492.0, 124.11889461318933 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 94.5, 60.28474102125678 ], "wc_summary_review_avg": [ 70.25, 26.080404521402652 ], "wc_review_avg": [ 783.5, 166.49099074724734 ], "wc_reply_reviewers_avg": [ 151.5, 250.99153372175724 ], "wc_reply_authors_avg": [ 1195.75, 124.0652550071937 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2LZOWGo1AU8J:scholar.google.com/&scioq=Illusory+Adversarial+Attacks+on+Sequential+Decision-Makers+and+Countermeasures&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;0;0;0", "aff_unique_norm": "University of Oxford;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.cmu.edu", "aff_unique_abbr": "Oxford;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "United Kingdom;United States" }, { "id": "HBLr-G1Zpn", "title": "Deep Causal Generative Modeling for Tabular Data Imputation and Intervention", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Tabular data synthesis could overcome the tabular data incompleteness and data availability issue. In most prior works, deep generative models are basically constructed following standard architecture designs. However, these works do not consider the inter-relationships among the features, or the latent variables. To fully leverage these inter-relationships, we develop a novel causal-aware asymmetric variational autoencoder architecture (CAT) for tabular data generation, imputation, and intervention. The developed model, called CAT-MIWAE, learns exogenous causal representation with a pre-defined causal graph in incomplete data context. It provides interpretability for partially observed features and could efficiently address missing value imputation problem. Besides, CAT-MIWAE can sample data from distributions under arbitrary conditions and interventions. This merit enables us to actively generate counterfactuals or debiased fair data samples for any subpopulation of interest. To validate the effectiveness of the proposed causally aware models, we conduct extensive experiments on real-world tabular datasets. Experiments show that the proposed models outperform the state of the art models. Moreover, we perform CATE estimations to show that CAT-MIWAE model could appropriately extrapolate any conditional or interventional distributions from the original observed data distribution.", "keywords": "tabular data;generative models;missing value imputation;causal knowledge", "primary_area": "", "supplementary_material": "", "author": "Xiaohan Jiang;Sen Liu;Hongbin Zhu", "authorids": "~Xiaohan_Jiang1;senliu@fudan.edu.cn;zhuhb@fudan.edu.cn", "gender": ";;", "homepage": ";;", "dblp": ";;", "google_scholar": ";;", "orcid": "0000-0002-2440-8196;;", "linkedin": ";;", "or_profile": "~Xiaohan_Jiang1;senliu@fudan.edu.cn;zhuhb@fudan.edu.cn", "aff": "Fudan University;;", "aff_domain": "fudan.edu.cn;;", "position": "MS student;;", "bibtex": "@misc{\njiang2023deep,\ntitle={Deep Causal Generative Modeling for Tabular Data Imputation and Intervention},\nauthor={Xiaohan Jiang and Sen Liu and Hongbin Zhu},\nyear={2023},\nurl={https://openreview.net/forum?id=HBLr-G1Zpn}\n}", "github": "", "project": "", "reviewers": "sVss;sUpg;91vS", "site": "https://openreview.net/forum?id=HBLr-G1Zpn", "pdf_size": 722289, "recommendation": "3;5;5", "confidence": "4;4;4", "correctness": "2;3;3", "technical_novelty": "1;3;3", "empirical_novelty": "0;3;3", "wc_summary_paper": "101;50;68", "wc_strength_and_weaknesses": "413;343;301", "wc_clarity_quality_novelty_and_reproducibility": "355;2;28", "wc_summary_review": "57;25;25", "wc_review": "926;420;422", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 73.0, 21.118712081942874 ], "wc_strength_and_weaknesses_avg": [ 352.3333333333333, 46.197643037521104 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 128.33333333333334, 160.6286262020427 ], "wc_summary_review_avg": [ 35.666666666666664, 15.084944665313014 ], "wc_review_avg": [ 589.3333333333334, 238.06068320680106 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bov56tRHlvwJ:scholar.google.com/&scioq=Deep+Causal+Generative+Modeling+for+Tabular+Data+Imputation+and+Intervention&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Truncated Diffusion Probabilistic Models and Diffusion-based Adversarial Auto-Encoders", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12106", "id": "HDxgaKk956l", "poster": "/media/PosterPDFs/ICLR%202023/12106.png?t=1680734976.1244237", "openreview": "https://openreview.net/forum?id=HDxgaKk956l", "slides": "https://iclr.cc/virtual/2023/poster/12106", "video": "https://iclr.cc/virtual/2023/poster/12106", "author_site": "Huangjie Zheng, Pengcheng He, Weizhu Chen, Mingyuan Zhou", "tldr": "We propose truncated diffusion probabilistic models, which models an implicit prior to truncate the diffusion chain and requires significantly fewer reverse steps to generate high-quality samples.", "abstract": "Employing a forward diffusion chain to gradually map the data to a noise distribution, diffusion-based generative models learn how to generate the data by inferring a reverse diffusion chain. However, this approach is slow and costly because it needs many forward and reverse steps. We propose a faster and cheaper approach that adds noise not until the data become pure random noise, but until they reach a hidden noisy data distribution that we can confidently learn. Then, we use fewer reverse steps to generate data by starting from this hidden distribution that is made similar to the noisy data. We reveal that the proposed model can be cast as an adversarial auto-encoder empowered by both the diffusion process and a learnable implicit prior. Experimental results show even with a significantly smaller number of reverse diffusion steps, the proposed truncated diffusion probabilistic models can provide consistent improvements over the non-truncated ones in terms of performance in both unconditional and text-guided image generations.", "keywords": "Diffusion model;adversarial autoencoder;implicit prior", "primary_area": "", "supplementary_material": "/attachment/050b6a11ff07877432f3414fbbe7c653e0a1d82e.zip", "author": "Huangjie Zheng;Pengcheng He;Weizhu Chen;Mingyuan Zhou", "authorids": "~Huangjie_Zheng1;~Pengcheng_He2;~Weizhu_Chen1;~Mingyuan_Zhou1", "gender": "M;M;M;M", "homepage": ";;https://www.microsoft.com/en-us/research/people/wzchen/;http://mingyuanzhou.github.io", "dblp": "192/2170;116/8665;79/2536;", "google_scholar": "Vl5wCXsAAAAJ;https://scholar.google.com/citations?hl=en;LG_E-4EAAAAJ;LXwCIisAAAAJ", "orcid": "0000-0003-0508-5034;;;", "linkedin": ";;;", "or_profile": "~Huangjie_Zheng1;~Pengcheng_He2;~Weizhu_Chen1;~Mingyuan_Zhou1", "aff": "University of Texas, Austin;Microsoft;Microsoft GenAI;Google", "aff_domain": "utexas.edu;microsoft.com;microsoft.com;google.com", "position": "PhD student;Principal Researcher;Vice President;Researcher", "bibtex": "@inproceedings{\nzheng2023truncated,\ntitle={Truncated Diffusion Probabilistic Models and Diffusion-based Adversarial Auto-Encoders},\nauthor={Huangjie Zheng and Pengcheng He and Weizhu Chen and Mingyuan Zhou},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HDxgaKk956l}\n}", "github": "", "project": "", "reviewers": "vJWX;6UUu;MGCM;YWr1", "pdf_size": 35545565, "recommendation": "5;6;8;8", "confidence": "4;4;5;3", "correctness": "3;4;4;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "80;108;81;129", "wc_strength_and_weaknesses": "341;72;297;77", "wc_clarity_quality_novelty_and_reproducibility": "93;117;27;154", "wc_summary_review": "40;46;33;137", "wc_review": "554;343;438;497", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "726;409;235;304", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 99.5, 20.402205763103165 ], "wc_strength_and_weaknesses_avg": [ 196.75, 123.2484786924366 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 97.75, 46.267564232408 ], "wc_summary_review_avg": [ 64.0, 42.39693385140015 ], "wc_review_avg": [ 458.0, 78.04165554369025 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 418.5, 188.03523605962792 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.7777777777777777, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "pdf": "https://openreview.net/pdf?id=HDxgaKk956l", "email": "utexas.edu;microsoft.com;microsoft.com;google.com", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of Texas at Austin;Microsoft;Google", "aff_unique_dep": ";Microsoft Corporation;Google", "aff_unique_url": "https://www.utexas.edu;https://www.microsoft.com;https://www.google.com", "aff_unique_abbr": "UT Austin;Microsoft;Google", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Austin;;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "StrucTexTv2: Masked Visual-Textual Prediction for Document Image Pre-training", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12020", "id": "HE_75XY5Ljh", "poster": "/media/PosterPDFs/ICLR%202023/12020.png?t=1682475373.3377533", "openreview": "https://openreview.net/forum?id=HE_75XY5Ljh", "slides": "https://iclr.cc/virtual/2023/poster/12020", "video": "https://iclr.cc/virtual/2023/poster/12020", "author_site": "Yuechen Yu, yulin li, Chengquan Zhang, Xiaoqiang Zhang, Zengyuan Guo, Xiameng Qin, Kun Yao, Junyu Han, Errui Ding, Jingdong Wang", "tldr": "", "abstract": "In this paper, we present StrucTexTv2, an effective document image pre-training framework, by performing masked visual-textual prediction. It consists of two self-supervised pre-training tasks: masked image modeling and masked language modeling, based on text region-level image masking. The proposed method randomly masks some image regions according to the bounding box coordinates of text words. The objectives of our pre-training tasks are reconstructing the pixels of masked image regions and the corresponding masked tokens simultaneously. Hence the pre-trained encoder can capture more textual semantics in comparison to the masked image modeling that usually predicts the masked image patches. Compared to the masked multi-modal modeling methods for document image understanding that rely on both the image and text modalities, StrucTexTv2 models image-only input and potentially deals with more application scenarios free from OCR pre-processing. Extensive experiments on mainstream benchmarks of document image understanding demonstrate the effectiveness of StrucTexTv2. It achieves competitive or even new state-of-the-art performance in various downstream tasks such as image classification, layout analysis, table structure recognition, document OCR, and information extraction under the end-to-end scenario.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/f4c1bb4dc46c4df0af26c8f202bf0e26cae0f302.zip", "author": "Yuechen Yu;Yulin Li;Chengquan Zhang;Xiaoqiang Zhang;Zengyuan Guo;Xiameng Qin;Kun Yao;Junyu Han;Errui Ding;Jingdong Wang", "authorids": "~Yuechen_Yu1;liyulin03@baidu.com;~Chengquan_Zhang2;~Xiaoqiang_Zhang3;~Zengyuan_Guo1;qinxiameng@baidu.com;~Kun_Yao1;~Junyu_Han1;~Errui_Ding2;~Jingdong_Wang1", "gender": ";;M;M;M;;M;;M;M", "homepage": ";;;;;;https://github.com/kk12333;;;https://jingdongwang2017.github.io/", "dblp": ";;;;https://dblp.uni-trier.de/pers/hd/g/Guo:Zengyuan;;03/6550;;180/5531;49/3441", "google_scholar": ";;koZQ_NgAAAAJ;;;;;;1wzEtxcAAAAJ;z5SPCmgAAAAJ", "orcid": ";;0000-0001-8254-5773;;;;0000-0001-7155-4076;;;0000-0002-4888-4445", "linkedin": ";;;zhangxiaoqiang;;;;;;", "or_profile": "~Yuechen_Yu1;liyulin03@baidu.com;~Chengquan_Zhang2;~Xiaoqiang_Zhang3;~Zengyuan_Guo1;qinxiameng@baidu.com;~Kun_Yao1;~Junyu_Han1;~Errui_Ding2;~Jingdong_Wang1", "aff": ";;Baidu;;;;Baidu;;Baidu;Baidu", "aff_domain": ";;baidu.com;;;;baidu.com;;baidu.com;baidu.com", "position": ";;Staff Software Engineer;;;;Manager;;Director;Chief Scientist for Computer Vision", "bibtex": "@inproceedings{\nyu2023structextv,\ntitle={StrucTexTv2: Masked Visual-Textual Prediction for Document Image Pre-training},\nauthor={Yuechen Yu and Yulin Li and Chengquan Zhang and Xiaoqiang Zhang and Zengyuan Guo and Xiameng Qin and Kun Yao and Junyu Han and Errui Ding and Jingdong Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HE_75XY5Ljh}\n}", "github": "", "project": "", "reviewers": "HUEm;vxyY;oo5U;XqWz", "pdf_size": 3307331, "recommendation": "5;6;8;8", "confidence": "5;4;4;4", "correctness": "3;3;4;3", "technical_novelty": "2;4;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "30;175;112;94", "wc_strength_and_weaknesses": "279;521;144;420", "wc_clarity_quality_novelty_and_reproducibility": "34;42;57;64", "wc_summary_review": "32;44;47;70", "wc_review": "375;782;360;648", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 102.75, 51.65934087848973 ], "wc_strength_and_weaknesses_avg": [ 341.0, 142.56051346708878 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.25, 11.861176164276458 ], "wc_summary_review_avg": [ 48.25, 13.754544703478919 ], "wc_review_avg": [ 541.25, 180.17127268241182 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": -0.7777777777777777, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13756551931237594507&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=HE_75XY5Ljh", "email": ";;baidu.com;;;;baidu.com;;baidu.com;baidu.com", "author_num": 10, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Baidu", "aff_unique_dep": "Baidu, Inc.", "aff_unique_url": "https://www.baidu.com", "aff_unique_abbr": "Baidu", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "HG0SwOmlaEo", "title": "Clustering Structure Identification With Ordering Graph", "track": "main", "status": "Reject", "tldr": "", "abstract": "In machine learning, data is often presented in the form of a graph or similarity (or distance) values between samples. Graph-based clustering methods such as spectral clustering are defined for general weighted graphs to identify the clustering structure. Graph construction research has developed significantly for decades, but the graph-based partition study still requires more attention because of its poor performance. For example, spectral clustering needs a post-processing (e.g., K-Means) step to uncover the clustering indicators. Yet, K-Means is sensitive to the initial center setting and easily falls into a local optimum. In this paper, we investigate a new type of graph-based clustering approach. Firstly, we introduce a new algorithm for the purpose of cluster analysis which does not explicitly produce a clustering of a dataset but instead creates an augmented graph representing its density-based ordered clustering structure. This ordered graph contains information equivalent to density-based clustering corresponding to a broad range of parameter settings. Secondly, we found that the graph matrix is shown in a block-diagonal form because of the nature of ordering. We propose a partition method to learn the graph matrix's block-diagonal structure and identify the clustering directly. The global optimality is guaranteed theoretically. We test the proposed method on synthetic datasets and five high-dimensional datasets. Experimental results show that the proposed method outperforms state-of-the-art graph-based clustering methods and improves their performance by roughly 10%-50%. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Zheng Xing;Weibing Zhao", "authorids": "~Zheng_Xing1;~Weibing_Zhao1", "gender": "M;F", "homepage": "http://zhengxing.tech/;https://sse.cuhk.edu.cn/zh-hans/content/8492", "dblp": ";239/7036.html", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;9Ul77ZoAAAAJ", "orcid": "0000-0002-9710-082X;0000-0002-2819-990X", "linkedin": ";weibing-zhao-971662183", "or_profile": "~Zheng_Xing1;~Weibing_Zhao1", "aff": "The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "cuhk.edu.cn;link.cuhk.edu.cn", "position": "PhD student;PhD student", "bibtex": "@misc{\nxing2023clustering,\ntitle={Clustering Structure Identification With Ordering Graph},\nauthor={Zheng Xing and Weibing Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=HG0SwOmlaEo}\n}", "github": "", "project": "", "reviewers": "eHSt;vLzH;ops2;Fmso", "site": "https://openreview.net/forum?id=HG0SwOmlaEo", "pdf_size": 4496906, "recommendation": "5;6;6;8", "confidence": "4;2;3;2", "correctness": "2;3;4;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "96;14;153;63", "wc_strength_and_weaknesses": "188;17;389;51", "wc_clarity_quality_novelty_and_reproducibility": "151;37;190;25", "wc_summary_review": "122;179;67;17", "wc_review": "557;247;799;156", "wc_reply_reviewers": "625;0;0;0", "wc_reply_authors": "2068;132;1135;129", "reply_reviewers": "3;0;0;0", "reply_authors": "5;1;2;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 81.5, 50.54948070949889 ], "wc_strength_and_weaknesses_avg": [ 161.25, 146.24358960310022 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 100.75, 71.22631185173074 ], "wc_summary_review_avg": [ 96.25, 60.51187899908579 ], "wc_review_avg": [ 439.75, 255.1836348592911 ], "wc_reply_reviewers_avg": [ 156.25, 270.6329386826371 ], "wc_reply_authors_avg": [ 866.0, 806.0846729717666 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.7608859102526822, "corr_recommendation_correctness": 0.7608859102526822, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7658567641840872852&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.cn", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "HGsoe1wmRW5", "title": "Pocket-specific 3D Molecule Generation by Fragment-based Autoregressive Diffusion Models", "track": "main", "status": "Reject", "tldr": "Using fragment-based autoregressive diffusion model to generate 3D molecules for protein binding pockets", "abstract": "Autoregressive model is widely adopted to generate 3D molecules which can fit any protein binding pocket. Current autoregressive model suffers from two major drawbacks. First, it is hard to capture local geometric patterns as only one atom is generated at each step. Second, most of the autoregressive models generate atoms and chemical bonds in two separate processes, which causes a number of problems such as incorrect counts of rings, a bias distribution of bond lengths, and inaccurate 3D molecular structures. To tackle this problem, we designed a model, named FragDiff, to generate 3D molecules fragment-by-fragment for pockets. In each generation step, FragDiff places a molecular fragment around the pocket by using E(3)-equivariant diffusion generative models to simultaneously predict the atom types, atom coordinates and the chemical bonds of the fragment. Extensive experimental results confirm our assumption that unifying the atoms and bonds generations could significantly improve the quality of the sampled 3D molecules in terms of more accurate distributions of 2D subgraphs and 3D substructures.", "keywords": "3D molecule generation;drug design;protein binding pocket;generative model;diffusion model", "primary_area": "", "supplementary_material": "", "author": "Xingang Peng;Jiaqi Guan;Jian Peng;Jianzhu Ma", "authorids": "~Xingang_Peng1;~Jiaqi_Guan1;~Jian_Peng1;~Jianzhu_Ma2", "gender": ";M;M;M", "homepage": "https://github.com/pengxingang;http://jiaqi.web.illinois.edu/;http://jianpeng.web.engr.illinois.edu/;https://majianzhu.com/", "dblp": "223/2200;207/7593;29/4181-1;24/9080.html", "google_scholar": "6yMuAlgAAAAJ;On-ONT4AAAAJ;https://scholar.google.com.tw/citations?user=4wcAVXAAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Xingang_Peng1;~Jiaqi_Guan1;~Jian_Peng1;~Jianzhu_Ma2", "aff": "Peking University;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;Tsinghua University", "aff_domain": "pku.edu.cn;illinois.edu;illinois.edu;tsinghua.edu.cn", "position": "PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@misc{\npeng2023pocketspecific,\ntitle={Pocket-specific 3D Molecule Generation by Fragment-based Autoregressive Diffusion Models},\nauthor={Xingang Peng and Jiaqi Guan and Jian Peng and Jianzhu Ma},\nyear={2023},\nurl={https://openreview.net/forum?id=HGsoe1wmRW5}\n}", "github": "", "project": "", "reviewers": "nh1t;ia3Y;5r3H", "site": "https://openreview.net/forum?id=HGsoe1wmRW5", "pdf_size": 2265657, "recommendation": "3;3;5", "confidence": "4;3;4", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "70;48;113", "wc_strength_and_weaknesses": "328;213;533", "wc_clarity_quality_novelty_and_reproducibility": "68;37;140", "wc_summary_review": "34;33;14", "wc_review": "500;331;800", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 77.0, 26.993826454703797 ], "wc_strength_and_weaknesses_avg": [ 358.0, 132.35054464061216 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 81.66666666666667, 43.14574782705192 ], "wc_summary_review_avg": [ 27.0, 9.201449161228174 ], "wc_review_avg": [ 543.6666666666666, 193.9421448668534 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3383731160481982745&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Peking University;University of Illinois Urbana-Champaign;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://illinois.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Peking U;UIUC;THU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "China;United States" }, { "id": "HHPEkUi5POw", "title": "Less is More: Identifying the Cherry on the Cake for Dynamic Networks", "track": "main", "status": "Withdraw", "tldr": "We reveal the contradiction between the human brain and dynamic networks, then propose and validate the Cherry Hypothesis to show that a partially dynamic network (PAD-Net) could advance the performance in dynamic networks.", "abstract": "Dynamic networks, e.g., Dynamic Convolution (DY-Conv) and the Mixture of Experts (MoE), have been extensively explored as they can considerably improve the model's representation power with acceptable computational cost. The common practice in implementing dynamic networks is to convert given static layers into fully dynamic ones where all parameters are dynamic (at least within a single layer) and vary with the input. Recent studies empirically show the trend that the more dynamic layers contribute to ever-increasing performance. However, such a fully dynamic setting 1) may cause redundant parameters and high deployment costs, limiting the applicability of dynamic networks to a broader range of tasks and models, and more importantly, 2) contradicts the previous discovery in the human brain that \\textit{when human brains process an attention-demanding task, only partial neurons in the task-specific areas are activated by the input, while the rest neurons leave in a baseline state.} Critically, there is no effort to understand and resolve the above contradictory finding, leaving the primal question -- to make the computational parameters fully dynamic or not? -- unanswered. The main contributions of our work are challenging the basic commonsense in dynamic networks, and, proposing and validating the \\textsc{cherry hypothesis} -- \\textit{A fully dynamic network contains a subset of dynamic parameters that when transforming other dynamic parameters into static ones, can maintain or even exceed the performance of the original network.} Technically, we propose a brain-inspired partially dynamic network, namely PAD-Net, to transform the redundant dynamic parameters into static ones. Also, we further design Iterative Mode Partition to partition the dynamic- and static-subnet, which alleviates the redundancy in traditional fully dynamic networks. Our hypothesis and method are comprehensively supported by large-scale experiments with two typical advanced dynamic methods, i.e., DY-Conv and MoE, on both image classification and GLUE benchmarks. Encouragingly, we surpass the fully dynamic networks by $+0.7\\%$ top-1 acc with only $30\\%$ dynamic parameters for ResNet-50 and $+1.9\\%$ average score in language understanding tasks with only $50\\%$ dynamic parameters for BERT-base. ", "keywords": "Dynamic Networks;Cherry Hypothesis;Efficient Architecture Designation.", "primary_area": "", "supplementary_material": "/attachment/d7e10b7556f3cf701e0909c6f9e74b174f88e1a3.zip", "author": "Shwai He;Liang Ding;Daize Dong;Boan Liu;Fuqiang Yu;Dacheng Tao", "authorids": "~Shwai_He1;~Liang_Ding3;~Daize_Dong1;~Boan_Liu1;~Fuqiang_Yu1;~Dacheng_Tao1", "gender": ";M;;M;M;", "homepage": ";http://liamding.cc/;;;https://scholar.google.com/citations?user=DvuEjzEAAAAJ&hl=zh-CN;", "dblp": ";88/3340-6.html;;;;", "google_scholar": ";lFCLvOAAAAAJ;;;DvuEjzEAAAAJ;", "orcid": ";;;0000-0002-5877-9999;0000-0002-7117-5524;", "linkedin": ";;;;;", "or_profile": "~Shwai_He1;~Liang_Ding3;~Daize_Dong1;~Boan_Liu1;~Fuqiang_Yu1;~Dacheng_Tao1", "aff": ";JD Explore Academy, JD.com Inc.;;Wuhan University;Qilu University of Technology (Shandong Academy of Sciences);", "aff_domain": ";jd.com;;whu.edu.cn;qlu.edu.cn;", "position": ";Research Scientist;;MS student;Associate Professor;", "bibtex": "@misc{\nhe2023less,\ntitle={Less is More: Identifying the Cherry on the Cake for Dynamic Networks},\nauthor={Shwai He and Liang Ding and Daize Dong and Boan Liu and Fuqiang Yu and Dacheng Tao},\nyear={2023},\nurl={https://openreview.net/forum?id=HHPEkUi5POw}\n}", "github": "", "project": "", "reviewers": "WP9M;4CJk;LUwb;z6B9", "site": "https://openreview.net/forum?id=HHPEkUi5POw", "pdf_size": 7562694, "recommendation": "5;5;5;5", "confidence": "4;3;4;2", "correctness": "3;3;2;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "69;90;81;130", "wc_strength_and_weaknesses": "452;435;451;88", "wc_clarity_quality_novelty_and_reproducibility": "2;62;36;469", "wc_summary_review": "2;93;61;22", "wc_review": "525;680;629;709", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 92.5, 22.89650628371062 ], "wc_strength_and_weaknesses_avg": [ 356.5, 155.1652345082493 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 142.25, 189.84516717578038 ], "wc_summary_review_avg": [ 44.5, 35.131894341182345 ], "wc_review_avg": [ 635.75, 70.06202609117153 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Xxmtlmd7CL0J:scholar.google.com/&scioq=Less+is+More:+Identifying+the+Cherry+on+the+Cake+for+Dynamic+Networks&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "JD.com Inc.;Wuhan University;Qilu University of Technology", "aff_unique_dep": "JD Explore Academy;;", "aff_unique_url": "https://www.jd.com;http://www.whu.edu.cn/;http://www.qilu.edu.cn/", "aff_unique_abbr": "JD.com;WHU;QUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "HHcl-5chhkt", "title": "IT-NAS: Integrating Lite-Transformer into NAS for Architecture Seletion", "track": "main", "status": "Reject", "tldr": "This paper proposes to integrate Lite-Transformer into NAS for architecture selection, and introduces an additional indicator token (IT) to reflect the importance of each candidate operation.", "abstract": "Neural Architecture Search (NAS) aims to search for the best network in the pre-defined search space. However, much work focuses on the search strategy but little on the architecture selection process. Despite the fact that the weight-sharing based NAS has promoted the search efficiency, we notice that the architecture selection is quite unstable or circuitous. For instance, the differentiable NAS may derive the suboptimal architecture due to the performance collapse caused by bi-level optimization, or the One-shot NAS requires sampling and evaluating a large number of candidate structures. Recently, the self-attention mechanism achieves better performance in terms of the long-range modeling capabilities. Considering that different operations are widely distributed in the search space, we suggest leveraging the self-attention mechanism to extract the relationship among them and to determine which operation is superior to others. Therefore, we integrate Lite-Transformer into NAS for architecture selection. Specifically, we regard the feature map of each candidate operation as distinct patches and feed them into the Lite-Transformer module along with an additional Indicator Token (called IT). The cross attention among various operations can be extracted by the self-attention mechanism, and the importance of each candidate operation is then shown by the softmax result between the query of indicator token (IT) and other values of operational tokens. We experimentally demonstrate that our framework can select the truly representative architecture in different search spaces and achieves 2.39% test error on CIFAR-10 in DARTS search space, and 24.1% test error on ImageNet in the ProxylessNAS search space, as well as the stable and better performance in NAS-Bench-201 search space and S1-S4 search spaces, outperforming state-of-the-art NAS methods.", "keywords": "Neural Architecture Search;Transformer;Self-Attention", "primary_area": "", "supplementary_material": "", "author": "Zihao Sun;Yu Hu;Longxing Yang;Shun Lu;Jilin Mei;Yinhe Han", "authorids": "~Zihao_Sun1;~Yu_Hu5;~Longxing_Yang1;~Shun_Lu1;~Jilin_Mei1;~Yinhe_Han1", "gender": "M;;M;M;M;M", "homepage": ";;;https://shunlu91.github.io/;;http://www.ict.cas.cn/sourcedb_2018_ict_cas/cn/jssrck/201610/t20161010_4674169.html", "dblp": ";;309/0621;;212/1446.html;32/2695.html", "google_scholar": "oSmC9pMAAAAJ;;FhdrIgcAAAAJ;-zX83WMAAAAJ;;", "orcid": "0000-0003-0412-9760;;;;;", "linkedin": ";;;;;", "or_profile": "~Zihao_Sun1;~Yu_Hu5;~Longxing_Yang1;~Shun_Lu1;~Jilin_Mei1;~Yinhe_Han1", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences ;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;;ict.ac.cn;ucas.ac.cn;ict.ac.cn;ict.ac.cn", "position": "PhD student;;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nsun2023itnas,\ntitle={{IT}-{NAS}: Integrating Lite-Transformer into {NAS} for Architecture Seletion},\nauthor={Zihao Sun and Yu Hu and Longxing Yang and Shun Lu and Jilin Mei and Yinhe Han},\nyear={2023},\nurl={https://openreview.net/forum?id=HHcl-5chhkt}\n}", "github": "", "project": "", "reviewers": "CZtJ;3FDx;2Hwu;o9WQ", "site": "https://openreview.net/forum?id=HHcl-5chhkt", "pdf_size": 1815248, "recommendation": "3;6;6;6", "confidence": "4;3;4;5", "correctness": "3;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;0;3", "wc_summary_paper": "34;68;95;51", "wc_strength_and_weaknesses": "250;166;126;219", "wc_clarity_quality_novelty_and_reproducibility": "44;11;334;54", "wc_summary_review": "398;20;37;25", "wc_review": "726;265;592;349", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 62.0, 22.52776065213762 ], "wc_strength_and_weaknesses_avg": [ 190.25, 47.730362454102526 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 110.75, 129.8718117991737 ], "wc_summary_review_avg": [ 120.0, 160.62222760253326 ], "wc_review_avg": [ 483.0, 184.6686221316442 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wNtfyg02zVEJ:scholar.google.com/&scioq=IT-NAS:+Integrating+Lite-Transformer+into+NAS+for+Architecture+Seletion&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology", "aff_unique_url": "http://www.ict.ac.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "HHd2OVBoF_-", "title": "Federated Learning in Non-IID Settings Aided by Differentially Private Synthetic Data", "track": "main", "status": "Withdraw", "tldr": "A novel federated learning framework utilizing data augmentation to improve global accuracy among data-heterogeneous clients", "abstract": "Federated learning (FL) is a privacy-promoting framework that enables potentially large number of clients to collaboratively train machine learning models. In an FL system, a server coordinates the collaboration by collecting and aggregating clients' model updates while the clients' data remains local and private. A major challenge in federated learning arises when the local data is non-iid -- the setting in which performance of the learned global model may deteriorate significantly compared to the scenario where the data is identically distributed across the clients. In this paper we propose FedDPMS (Federated Differentially Private Means Sharing), an FL algorithm in which clients augment local datasets with data synthesized using differentially private information collected and communicated by a trusted server. In particular, the server matches the pairs of clients having complementary local datasets and facilitates differentially-private sharing of the means of latent data representations; the clients then deploy variational auto-encoders to enrich their datasets and thus ameliorate the effects of non-iid data distribution. Our experiments on deep image classification tasks demonstrate that FedDPMS outperforms competing state-of-the-art FL methods specifically developed to address the challenge of federated learning on non-iid data.", "keywords": "Federated Learning;Representation Learning;Differential Privacy", "primary_area": "", "supplementary_material": "/attachment/69d118d745fd4bfb4a097529b7275b6cdee60992.zip", "author": "Huancheng Chen;Haris Vikalo", "authorids": "~Huancheng_Chen1;~Haris_Vikalo1", "gender": "M;", "homepage": "https://citychan.github.io/;", "dblp": "302/4540;", "google_scholar": "https://scholar.google.com.tw/citations?hl=zh-TW;", "orcid": ";", "linkedin": ";", "or_profile": "~Huancheng_Chen1;~Haris_Vikalo1", "aff": "University of Texas, Austin;", "aff_domain": "utexas.edu;", "position": "PhD student;", "bibtex": "@misc{\nchen2023federated,\ntitle={Federated Learning in Non-{IID} Settings Aided by Differentially Private Synthetic Data},\nauthor={Huancheng Chen and Haris Vikalo},\nyear={2023},\nurl={https://openreview.net/forum?id=HHd2OVBoF_-}\n}", "github": "", "project": "", "reviewers": "oPpa;bHsu;iW3f;Cw7K", "site": "https://openreview.net/forum?id=HHd2OVBoF_-", "pdf_size": 2644562, "recommendation": "3;5;5;5", "confidence": "3;5;3;3", "correctness": "2;2;3;3", "technical_novelty": "1;3;3;2", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "97;72;39;56", "wc_strength_and_weaknesses": "435;215;275;321", "wc_clarity_quality_novelty_and_reproducibility": "51;7;11;67", "wc_summary_review": "82;37;32;80", "wc_review": "665;331;357;524", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 66.0, 21.365860619221497 ], "wc_strength_and_weaknesses_avg": [ 311.5, 80.60241931852914 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.0, 25.67099530598687 ], "wc_summary_review_avg": [ 57.75, 23.327826731180938 ], "wc_review_avg": [ 469.25, 135.11916037335342 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14106905539109740197&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Provable Robustness against Wasserstein Distribution Shifts via Input Randomization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10911", "id": "HJFVrpCaGE", "poster": "/media/PosterPDFs/ICLR%202023/10911.png?t=1681757791.493748", "openreview": "https://openreview.net/forum?id=HJFVrpCaGE", "slides": "https://iclr.cc/virtual/2023/poster/10911", "video": "https://iclr.cc/virtual/2023/poster/10911", "author_site": "Aounon Kumar, Alexander Levine, Tom Goldstein, Soheil Feizi", "tldr": "We present provable robustness guarantees on the accuracy of a model under Wasserstein shifts of the input distribution.", "abstract": "Certified robustness in machine learning has primarily focused on adversarial perturbations with a fixed attack budget for each sample in the input distribution. In this work, we present provable robustness guarantees on the accuracy of a model under bounded Wasserstein shifts of the data distribution. We show that a simple procedure that randomizes the input of the model within a transformation space is provably robust to distributional shifts under that transformation. Our framework allows the datum-specific perturbation size to vary across different points in the input distribution and is general enough to include fixed-sized perturbations as well. Our certificates produce guaranteed lower bounds on the performance of the model for any shift (natural or adversarial) of the input distribution within a Wasserstein ball around the original distribution. We apply our technique to certify robustness against natural (non-adversarial) transformations of images such as color shifts, hue shifts, and changes in brightness and saturation. We obtain strong performance guarantees for the robust model under clearly visible shifts in the input images. Our experiments establish the non-vacuousness of our certificates by showing that the certified lower bound on a robust model's accuracy is higher than the empirical accuracy of an undefended model under a distribution shift. Moreover, our results also imply guaranteed lower bounds (hardness result) on the performance of models trained on so-called \"unlearnable\" datasets that have been poisoned to interfere with model training. We show that the performance of a robust model is guaranteed to remain above a certain threshold on the test distribution even when the base model is trained on the poisoned dataset.", "keywords": "Distributional Robustness;Wasserstein Distance;Certified Robustness", "primary_area": "", "supplementary_material": "/attachment/fdefb1e75639ac21bb43fa1a1c63ea0a0672f629.zip", "author": "Aounon Kumar;Alexander Levine;Tom Goldstein;Soheil Feizi", "authorids": "~Aounon_Kumar1;~Alexander_Levine2;~Tom_Goldstein1;~Soheil_Feizi2", "gender": "M;;M;M", "homepage": "https://aounon.github.io;;https://www.cs.umd.edu/~tomg/;https://www.cs.umd.edu/~sfeizi/", "dblp": "191/8334;;25/8184;57/2132", "google_scholar": "NjhpUykAAAAJ;;KmSuVtgAAAAJ;lptAmrMAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Aounon_Kumar1;~Alexander_Levine2;~Tom_Goldstein1;~Soheil_Feizi2", "aff": "University of Maryland, College Park;;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;;umd.edu;umd.edu", "position": "PhD student;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nkumar2023provable,\ntitle={Provable Robustness against Wasserstein Distribution Shifts via Input Randomization},\nauthor={Aounon Kumar and Alexander Levine and Tom Goldstein and Soheil Feizi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HJFVrpCaGE}\n}", "github": "", "project": "", "reviewers": "fyNV;ait3;Ne1j", "pdf_size": 4618437, "recommendation": "6;6;6", "confidence": "3;3;3", "correctness": "3;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;0", "wc_summary_paper": "78;85;20", "wc_strength_and_weaknesses": "218;180;203", "wc_clarity_quality_novelty_and_reproducibility": "37;120;30", "wc_summary_review": "31;29;30", "wc_review": "364;414;283", "wc_reply_reviewers": "50;0;12", "wc_reply_authors": "834;679;729", "reply_reviewers": "1;0;1", "reply_authors": "3;1;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 61.0, 29.13188402192118 ], "wc_strength_and_weaknesses_avg": [ 200.33333333333334, 15.627610892974724 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.333333333333336, 40.876507787345155 ], "wc_summary_review_avg": [ 30.0, 0.816496580927726 ], "wc_review_avg": [ 353.6666666666667, 53.977361509762176 ], "wc_reply_reviewers_avg": [ 20.666666666666668, 21.312489817527705 ], "wc_reply_authors_avg": [ 747.3333333333334, 64.59274125025368 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12305610365015633550&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=HJFVrpCaGE", "email": "umd.edu;;umd.edu;umd.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "HLQyRgRnoXo", "title": "Distributed Inference and Fine-tuning of Large Language Models Over The Internet", "track": "main", "status": "Reject", "tldr": "We propose a practical algorithm for running large language models by pooling together weak geographically distributed devices. Our system can inference BLOOM-176B over the Internet more than 10x faster compared to RAM offloading.", "abstract": "Large language models (LLMs) are useful in many NLP tasks and become more capable with size, scaling to over 100 billion parameters. With the release of BLOOM-176B and OPT-175B, everyone can download pretrained models of this scale. Still, using a pre-trained 100B+ model requires high-end hardware, making it inaccessible to most researchers. Recent studies in memory-efficient training (e.g. offloading) could alleviate these costs, but they do not cover important use cases of LLMs, such as autoregressive inference. In this work, we investigate methods for cost-efficient inference of large language models, comparing local and distributed strategies. We observe that a large enough model (100B+) could run efficiently on geodistributed devices in a consumer-grade network, for example by connecting existing compute resources of multiple research groups or pooling under-utilized compute from multiple cloud regions. To run LLMs in this unconventional setting, we develop a fault-tolerant algorithm for inferencing language models. We propose Petals - a decentralized system for running LLMs - and show that it can run BLOOM-176B over the Internet over $10\\times$ faster than offloading for sequential generation. We evaluate the performance of our system in both simulated conditions and an actual distributed system spanning two continents. The design of Petals allows participants to inference, and fine-tune, or inference fine-tuned models simultaneously without affecting each other's results.", "keywords": "volunteer computing;distributed deep learning;distributed inference;efficient inference;large language models;gpt-3", "primary_area": "", "supplementary_material": "", "author": "Alexander Borzunov;Dmitry Baranchuk;Tim Dettmers;Max Ryabinin;Younes Belkada;Artem Chumachenko;Pavel Samygin;Colin Raffel", "authorids": "~Alexander_Borzunov1;~Dmitry_Baranchuk2;~Tim_Dettmers2;~Max_Ryabinin1;~Younes_Belkada1;~Artem_Chumachenko1;~Pavel_Samygin1;~Colin_Raffel1", "gender": "M;M;M;Not Specified;M;M;M;", "homepage": "https://github.com/borzunov;;https://timdettmers.com/;https://mryab.github.io/;https://younesbelkada.github.io/;;;http://colinraffel.com", "dblp": "295/8854;215/3712;172/1045;276/0192;;;;149/0082", "google_scholar": "https://scholar.google.ru/citations?user=HdwzsCMAAAAJ;NiPmk8oAAAAJ;lHI3w5kAAAAJ;930PERsAAAAJ;;brCfhZkAAAAJ;;I66ZBYwAAAAJ", "orcid": ";0000-0001-7660-3666;;;;;;", "linkedin": ";;;;;;pavel-samygin-535b151a6/;", "or_profile": "~Alexander_Borzunov1;~Dmitry_Baranchuk2;~Tim_Dettmers2;~Max_Ryabinin1;~Younes_Belkada1;~Artem_Chumachenko1;~Pavel_Samygin1;~Colin_Raffel1", "aff": "HSE University;Higher School of Economics;University of Washington;Yandex;;;;University of North Carolina, Chapel Hill", "aff_domain": "hse.ru;hse.ru;cs.washington.edu;yandex-team.ru;;;;unc.edu", "position": "Instructor;PhD student;PhD student;Research Scientist;;;;Assistant Professor", "bibtex": "@misc{\nborzunov2023distributed,\ntitle={Distributed Inference and Fine-tuning of Large Language Models Over The Internet},\nauthor={Alexander Borzunov and Dmitry Baranchuk and Tim Dettmers and Max Ryabinin and Younes Belkada and Artem Chumachenko and Pavel Samygin and Colin Raffel},\nyear={2023},\nurl={https://openreview.net/forum?id=HLQyRgRnoXo}\n}", "github": "", "project": "", "reviewers": "s5Ah;X2vi;JnBu;3oHu", "site": "https://openreview.net/forum?id=HLQyRgRnoXo", "pdf_size": 636357, "recommendation": "5;5;5;6", "confidence": "3;4;4;4", "correctness": "3;2;2;2", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "83;76;114;109", "wc_strength_and_weaknesses": "172;332;182;160", "wc_clarity_quality_novelty_and_reproducibility": "26;28;203;641", "wc_summary_review": "29;55;23;109", "wc_review": "310;491;522;1019", "wc_reply_reviewers": "0;0;0;267", "wc_reply_authors": "394;946;945;1556", "reply_reviewers": "0;0;0;1", "reply_authors": "1;2;2;2", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 95.5, 16.28649747490233 ], "wc_strength_and_weaknesses_avg": [ 211.5, 70.00535693788012 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 224.5, 250.97260806709565 ], "wc_summary_review_avg": [ 54.0, 33.95585369269929 ], "wc_review_avg": [ 585.5, 263.05180098224 ], "wc_reply_reviewers_avg": [ 66.75, 115.61439140522256 ], "wc_reply_authors_avg": [ 960.25, 411.0938913435713 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11981493985293993587&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "Higher School of Economics;University of Washington;Yandex;University of North Carolina", "aff_unique_dep": ";;;", "aff_unique_url": "https://hse.ru;https://www.washington.edu;https://yandex.com;https://www.unc.edu", "aff_unique_abbr": "HSE;UW;Yandex;UNC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chapel Hill", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "Russian Federation;United States" }, { "id": "HN0ehX-ov5Q", "title": "A Fast, Well-Founded Approximation to the Empirical Neural Tangent Kernel", "track": "main", "status": "Reject", "tldr": "A fast and provable approximation to the empirical Neural Tangent Kernel", "abstract": "Empirical neural tangent kernels (eNTKs) can provide a good understanding of a given network's representation: they are often far less expensive to compute and applicable more broadly than infinite-width NTKs. For networks with $O$ output units (e.g. an $O$-class classifier), however, the eNTK on $N$ inputs is of size $NO \\times NO$, taking $\\mathcal{O}\\big( (N O)^2\\big)$ memory and up to $\\mathcal{O}\\big( (N O)^3 \\big)$ computation. Most existing applications have therefore used one of a handful of approximations yielding $N \\times N$ kernel matrices, saving orders of magnitude of computation, but with limited to no justification. We prove that one such approximation, which we call ``sum of logits,'' converges to the true eNTK at initialization. Our experiments demonstrate the quality of this approximation for various uses across a range of settings.", "keywords": "neural tangent kernels;deep learning theory", "primary_area": "", "supplementary_material": "/attachment/9d431587a4a91e9ea85369252d3f19670a65ac8f.zip", "author": "Mohamad Amin Mohamadi;Danica J. Sutherland", "authorids": "~Mohamad_Amin_Mohamadi1;~Danica_J._Sutherland1", "gender": "M;F", "homepage": ";http://www.djsutherland.ml", "dblp": "323/6299;92/10966", "google_scholar": ";https://scholar.google.co.uk/citations?user=uO_NqicAAAAJ", "orcid": ";0000-0002-1525-3532", "linkedin": "mohamad-amin-mohamadi-b4196b89/;", "or_profile": "~Mohamad_Amin_Mohamadi1;~Danica_J._Sutherland2", "aff": "University of British Columbia;University of British Columbia", "aff_domain": "ubc.ca;cs.ubc.ca", "position": "MS student;Assistant Professor", "bibtex": "@misc{\nmohamadi2023a,\ntitle={A Fast, Well-Founded Approximation to the Empirical Neural Tangent Kernel},\nauthor={Mohamad Amin Mohamadi and Danica J. Sutherland},\nyear={2023},\nurl={https://openreview.net/forum?id=HN0ehX-ov5Q}\n}", "github": "", "project": "", "reviewers": "wjkj;u749;bBN3;JUEa", "site": "https://openreview.net/forum?id=HN0ehX-ov5Q", "pdf_size": 998644, "recommendation": "5;5;5;6", "confidence": "4;3;3;3", "correctness": "4;3;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "109;30;249;148", "wc_strength_and_weaknesses": "202;162;423;109", "wc_clarity_quality_novelty_and_reproducibility": "33;27;709;81", "wc_summary_review": "20;21;308;303", "wc_review": "364;240;1689;641", "wc_reply_reviewers": "94;0;754;0", "wc_reply_authors": "1098;735;2671;769", "reply_reviewers": "1;0;2;0", "reply_authors": "5;3;5;4", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 134.0, 78.83844239963142 ], "wc_strength_and_weaknesses_avg": [ 224.0, 119.53451384432866 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 212.5, 287.4173794327685 ], "wc_summary_review_avg": [ 163.0, 142.5114030525277 ], "wc_review_avg": [ 733.5, 570.4404000419325 ], "wc_reply_reviewers_avg": [ 212.0, 315.2681398428963 ], "wc_reply_authors_avg": [ 1318.25, 793.772440627665 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 4.25, 0.82915619758885 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4580998787359405017&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "HNcqEt0zuMo", "title": "On the Role of Self-supervision in Deep Multi-view Clustering", "track": "main", "status": "Withdraw", "tldr": "We investigate self-supervision in deep multi-view clustering, and present several new models and novel findings.", "abstract": "Self-supervised learning is a central component in many recent approaches to deep multi-view clustering (MVC). However, we find large variations in the motivation and design of self-supervision-based methods for deep MVC. To address this, we present DeepMVC, a new, unified framework for deep MVC. Crucially, we show that many recent methods can be regarded as instances of our framework -- allowing us to implement recent methods in a unified and consistent manner. We make key observations about the effect of self-supervision, and in particular, drawbacks of representation alignment. Motivated by these insights, we develop several new DeepMVC instances, with new forms of self-supervision. We conduct extensive experiments, and find that (i) the popular contrastive alignment degrades performance when the number of views becomes large; (ii) all methods benefit from some form of self-supervision; and (iii) our new instances outperform previous methods on several datasets. Based on our findings, we suggest several promising directions for future research. To enhance the openness of the field, we provide an open-source implementation of DeepMVC, including recent models and our new instances. Our implementation includes a consistent evaluation protocol, facilitating fair and accurate evaluation of methods and components.\n", "keywords": "deep learning;multi-view clustering;self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/b2a3fabaaddd8f8b3a6a9702913db5bc78a30022.zip", "author": "Daniel J. Trosten;Sigurd L\u00f8kse;Robert Jenssen;Michael Kampffmeyer", "authorids": "~Daniel_J._Trosten1;~Sigurd_L\u00f8kse1;~Robert_Jenssen1;~Michael_Kampffmeyer1", "gender": "M;M;M;M", "homepage": ";;https://uit.no/ansatte/robert.jenssen;https://sites.google.com/view/michaelkampffmeyer", "dblp": "230/8356;163/4540;45/5813;191/9382", "google_scholar": ";7nKP_jYAAAAJ;HiviXjIAAAAJ;https://scholar.google.no/citations?user=9lDh2UgAAAAJ", "orcid": "0000-0003-0708-3059;0000-0002-1953-4315;0000-0002-7496-8474;", "linkedin": "danieltrosten/;;robert-jenssen-10b79318/?originalSubdomain=no;", "or_profile": "~Daniel_J._Trosten1;~Sigurd_L\u00f8kse1;~Robert_Jenssen1;~Michael_Kampffmeyer1", "aff": "University of Troms\u00f8;UiT The Arctic University of Troms\u00f8;UiT The Arctic University of Norway;UiT The Arctic University of Norway", "aff_domain": "uit.no;uit.no;uit.no;uit.no", "position": "PhD student;Postdoc;Full Professor;Associate Professor", "bibtex": "@misc{\ntrosten2023on,\ntitle={On the Role of Self-supervision in Deep Multi-view Clustering},\nauthor={Daniel J. Trosten and Sigurd L{\\o}kse and Robert Jenssen and Michael Kampffmeyer},\nyear={2023},\nurl={https://openreview.net/forum?id=HNcqEt0zuMo}\n}", "github": "", "project": "", "reviewers": "pFxi;U7aB;D9qQ;kGHQ", "site": "https://openreview.net/forum?id=HNcqEt0zuMo", "pdf_size": 394359, "recommendation": "3;3;5;8", "confidence": "4;5;2;5", "correctness": "4;3;3;3", "technical_novelty": "1;2;2;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "38;49;43;75", "wc_strength_and_weaknesses": "98;308;175;137", "wc_clarity_quality_novelty_and_reproducibility": "49;24;17;26", "wc_summary_review": "38;32;26;29", "wc_review": "223;413;261;267", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 51.25, 14.254385290148432 ], "wc_strength_and_weaknesses_avg": [ 179.5, 79.02689415635668 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.0, 12.020815280171307 ], "wc_summary_review_avg": [ 31.25, 4.437059837324712 ], "wc_review_avg": [ 291.0, 72.42927584892728 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.09975093361076329, "corr_recommendation_correctness": -0.49374193110101877, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14001118447384370569&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "University of Troms\u00f8;Arctic University of Troms\u00f8;Arctic University of Norway", "aff_unique_dep": ";;", "aff_unique_url": "https://uit.no;https://uit.no;https://www.uit.no", "aff_unique_abbr": "UIT;UiT;UiT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Troms\u00f8", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Norway" }, { "id": "HO2q49XYRC", "title": "SaMoE: Parameter Efficient MoE Language Models via Self-Adaptive Expert Combination", "track": "main", "status": "Reject", "tldr": "SaMoE is a parameter efficient MoE architecture design that enables parameter savings on MoE while achieving comparable or better accuracy.", "abstract": "Recently, Mixture-of-Experts (MoE) has demonstrated success in scaling models to have large amounts of parameters without significant increases in computational cost. However, MoEs have been also reported to be parameter inefficient such that larger models do not always lead to better performance. \nIn this work, we study how to build parameter-efficient MoE models. Our analysis identifies that MoE layers exhibit poor gradient flow as the number of experts increases, leading to insufficient training of experts. To overcome this issue, we propose a new MoE architecture design (SaMoE), which improves the parameter efficiency of MoE models by learning a soft combination of a global set of expert layers for each MoE layer. Such a scheme enables substantial parameter savings on MoE while achieving comparable or better accuracy than the standard MoE training baseline. Extensive experiments on billion-scale GPT-3 style autoregressive MoE language models demonstrate that SaMoE significantly improves the parameter efficiency of MoE models by reducing up to 5.2X total parameters while obtaining superior pre-training and zero-shot generalization results as compared to baseline. ", "keywords": "Mixture-of-Expert;Autoregressive language model;Parameter efficiency.", "primary_area": "", "supplementary_material": "", "author": "Minjia Zhang;Conglong Li;Xiaoxia Wu;Zhewei Yao;Yuxiong He", "authorids": "~Minjia_Zhang1;~Conglong_Li1;~Xiaoxia_Wu1;~Zhewei_Yao1;~Yuxiong_He1", "gender": "M;;F;M;", "homepage": "https://minjiazhang.github.io/;;https://sites.google.com/view/xwu/home;;", "dblp": "58/9033;158/7995;63/1016;195/2887;https://dblp.org/pers/hd/h/He:Yuxiong", "google_scholar": "https://scholar.google.com/citations?hl=en;;Ry0Bdt8AAAAJ;gpSeMjYAAAAJ;SB3_eb0AAAAJ", "orcid": "0000-0002-8165-166X;;;;", "linkedin": "minjia-zhang-05857226/;;;;", "or_profile": "~Minjia_Zhang1;~Conglong_Li1;~Xiaoxia_Wu1;~Zhewei_Yao1;~Yuxiong_He1", "aff": "Microsoft ;Microsoft;Microsoft;Microsoft;Microsoft", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "position": "Principle Researcher;Researcher;Researcher;Researcher;Researcher", "bibtex": "@misc{\nzhang2023samoe,\ntitle={SaMoE: Parameter Efficient MoE Language Models via Self-Adaptive Expert Combination},\nauthor={Minjia Zhang and Conglong Li and Xiaoxia Wu and Zhewei Yao and Yuxiong He},\nyear={2023},\nurl={https://openreview.net/forum?id=HO2q49XYRC}\n}", "github": "", "project": "", "reviewers": "9vyj;WUCK;VcJV", "site": "https://openreview.net/forum?id=HO2q49XYRC", "pdf_size": 793851, "recommendation": "3;3;6", "confidence": "5;4;5", "correctness": "2;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;3", "wc_summary_paper": "27;64;36", "wc_strength_and_weaknesses": "487;261;217", "wc_clarity_quality_novelty_and_reproducibility": "14;33;49", "wc_summary_review": "123;136;105", "wc_review": "651;494;407", "wc_reply_reviewers": "21;83;0", "wc_reply_authors": "1156;992;313", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 42.333333333333336, 15.755069730795299 ], "wc_strength_and_weaknesses_avg": [ 321.6666666666667, 118.28026979265063 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.0, 14.30617582258329 ], "wc_summary_review_avg": [ 121.33333333333333, 12.710450643291745 ], "wc_review_avg": [ 517.3333333333334, 100.96974244242128 ], "wc_reply_reviewers_avg": [ 34.666666666666664, 35.23571420527127 ], "wc_reply_authors_avg": [ 820.3333333333334, 364.9331750090997 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16749576413132334364&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "HOF3CTk2WH6", "title": "LEARNING THE SPECTROGRAM TEMPORAL RESOLUTION FOR AUDIO CLASSIFICATION", "track": "main", "status": "Reject", "tldr": "This paper proposes DiffRes, which enables differentiable temporal resolution learning on audio spectrogram (as opposed to common fixed hop size approaches) to improve the performance of audio classification models. ", "abstract": "The audio spectrogram is a time-frequency representation that has been widely used for audio classification. The temporal resolution of a spectrogram depends on hop size. Previous works generally assume the hop size should be a constant value such as ten milliseconds. However, a fixed hop size or resolution is not always optimal for different types of sound. This paper proposes a novel method, DiffRes, that enables differentiable temporal resolution learning to improve the performance of audio classification models. Given a spectrogram calculated with a fixed hop size, DiffRes merges non-essential time frames while preserving important frames. DiffRes acts as a \"drop-in\" module between an audio spectrogram and a classifier, and can be end-to-end optimized. We evaluate DiffRes on the mel-spectrogram, followed by state-of-the-art classifier backbones, and apply it to five different subtasks. Compared with using the fixed-resolution mel-spectrogram, the DiffRes-based method can achieve the same or better classification accuracy with at least 25% fewer temporal dimensions on the feature level, which alleviates the computational cost at the same time. Starting from a high-temporal-resolution spectrogram such as one-millisecond hop size, we show that DiffRes can improve classification accuracy with the same computational complexity. ", "keywords": "audio classification;differentiable temporal resolution;feature dimension reduction", "primary_area": "", "supplementary_material": "/attachment/46c2d1c519bc641f4f1fef3a23854d6c15b3edbb.zip", "author": "Haohe Liu;Xubo Liu;Qiuqiang Kong;Wenwu Wang;Mark D Plumbley", "authorids": "~Haohe_Liu1;~Xubo_Liu1;~Qiuqiang_Kong1;~Wenwu_Wang1;~Mark_D_Plumbley1", "gender": "M;M;M;;M", "homepage": "https://liuxubo717.github.io/;https://qiuqiangkong.github.io/;http://personal.ee.surrey.ac.uk/Personal/W.Wang/;https://www.surrey.ac.uk/people/mark-plumbley;https://haoheliu.github.io/", "dblp": "235/1970/;;https://dblp.org/pers/hd/w/Wang:Wenwu;84/1168;272/5570", "google_scholar": "-OlNYSgAAAAJ;;https://scholar.google.co.uk/citations?user=JQFnV5IAAAAJ;28TCymYAAAAJ;g3O4lJMAAAAJ", "orcid": ";;;0000-0002-9708-1075;0000-0003-1036-7888", "linkedin": ";;https://uk.linkedin.com/in/wenwu;;haohe-liu-4483a71a4/", "or_profile": "~Xubo_Liu1;~Qiuqiang_Kong1;~Wenwu_Wang1;~Mark_D_Plumbley1;~Haohe_Liu2", "aff": "University of Surrey;ByteDance;University of Surrey;University of Surrey;University of Surrey", "aff_domain": "surrey.ac.uk;bytedance.com;surrey.ac.uk;surrey.ac.uk;surrey.ac.uk", "position": "PhD student;Researcher;Full Professor;Full Professor;PhD student", "bibtex": "@misc{\nliu2023learning,\ntitle={{LEARNING} {THE} {SPECTROGRAM} {TEMPORAL} {RESOLUTION} {FOR} {AUDIO} {CLASSIFICATION}},\nauthor={Haohe Liu and Xubo Liu and Qiuqiang Kong and Wenwu Wang and Mark D Plumbley},\nyear={2023},\nurl={https://openreview.net/forum?id=HOF3CTk2WH6}\n}", "github": "", "project": "", "reviewers": "Uoii;wxoQ;qgYk", "site": "https://openreview.net/forum?id=HOF3CTk2WH6", "pdf_size": 7219112, "recommendation": "3;6;6", "confidence": "4;4;4", "correctness": "3;4;2", "technical_novelty": "2;4;2", "empirical_novelty": "2;4;3", "wc_summary_paper": "65;110;126", "wc_strength_and_weaknesses": "268;137;301", "wc_clarity_quality_novelty_and_reproducibility": "62;149;81", "wc_summary_review": "59;102;183", "wc_review": "454;498;691", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "770;763;671", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 100.33333333333333, 25.82419193099542 ], "wc_strength_and_weaknesses_avg": [ 235.33333333333334, 70.82529366139065 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 97.33333333333333, 37.3482113211448 ], "wc_summary_review_avg": [ 114.66666666666667, 51.40903509003927 ], "wc_review_avg": [ 547.6666666666666, 102.93147666719295 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 734.6666666666666, 45.10974272691975 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8761620590119799190&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Surrey;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "https://www.surrey.ac.uk;https://www.bytedance.com", "aff_unique_abbr": "Surrey;ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United Kingdom;China" }, { "title": "Revisiting adapters with adversarial training", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11142", "id": "HPdxC1THU8T", "poster": "/media/PosterPDFs/ICLR%202023/11142.png?t=1681315474.6506383", "openreview": "https://openreview.net/forum?id=HPdxC1THU8T", "slides": "https://iclr.cc/virtual/2023/poster/11142", "video": "https://iclr.cc/virtual/2023/poster/11142", "author_site": "Sylvestre-Alvise Rebuffi, francesco croce, Sven Gowal", "tldr": "", "abstract": "While adversarial training is generally used as a defense mechanism, recent works show that it can also act as a regularizer. By co-training a neural network on clean and adversarial inputs, it is possible to improve classification accuracy on the clean, non-adversarial inputs. We demonstrate that, contrary to previous findings, it is not necessary to separate batch statistics when co-training on clean and adversarial inputs, and that it is sufficient to use adapters with few domain-specific parameters for each type of input. We establish that using the classification token of a Vision Transformer (ViT) as an adapter is enough to match the classification performance of dual normalization layers, while using significantly less additional parameters. First, we improve upon the top-1 accuracy of a non-adversarially trained ViT-B16 model by +1.12% on ImageNet (reaching 83.76% top-1 accuracy). Second, and more importantly, we show that training with adapters enables model soups through linear combinations of the clean and adversarial tokens. These model soups, which we call adversarial model soups, allow us to trade-off between clean and robust accuracy without sacrificing efficiency. Finally, we show that we can easily adapt the resulting models in the face of distribution shifts. Our ViT-B16 obtains top-1 accuracies on ImageNet variants that are on average +4.00% better than those obtained with Masked Autoencoders.", "keywords": "adapters;adversarial;robustness;soup", "primary_area": "", "supplementary_material": "", "author": "Sylvestre-Alvise Rebuffi;Francesco Croce;Sven Gowal", "authorids": "~Sylvestre-Alvise_Rebuffi1;~Francesco_Croce1;~Sven_Gowal2", "gender": "M;M;M", "homepage": ";;", "dblp": "190/7811;52/4288;75/8368", "google_scholar": "swP3h24AAAAJ;https://scholar.google.de/citations?view_op=list_works;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Sylvestre-Alvise_Rebuffi1;~Francesco_Croce1;~Sven_Gowal1", "aff": "Google DeepMind;University of Tuebingen;Google DeepMind", "aff_domain": "deepmind.com;uni-tuebingen.de;google.com", "position": "Researcher;PhD student;Research Engineer", "bibtex": "@inproceedings{\nrebuffi2023revisiting,\ntitle={Revisiting adapters with adversarial training},\nauthor={Sylvestre-Alvise Rebuffi and Francesco Croce and Sven Gowal},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HPdxC1THU8T}\n}", "github": "", "project": "", "reviewers": "Wvkr;VYKg;rT7Z;NrHB", "pdf_size": 1041483, "recommendation": "6;6;6;8", "confidence": "3;4;4;4", "correctness": "3;3;3;4", "technical_novelty": "3;2;2;4", "empirical_novelty": "3;2;3;4", "wc_summary_paper": "70;50;89;52", "wc_strength_and_weaknesses": "126;150;564;272", "wc_clarity_quality_novelty_and_reproducibility": "154;21;20;4", "wc_summary_review": "29;30;52;16", "wc_review": "379;251;725;344", "wc_reply_reviewers": "0;58;61;41", "wc_reply_authors": "595;679;657;264", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 65.25, 15.769828787910159 ], "wc_strength_and_weaknesses_avg": [ 278.0, 174.1551032843999 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.75, 60.565563648000506 ], "wc_summary_review_avg": [ 31.75, 12.93010054098575 ], "wc_review_avg": [ 424.75, 179.54995822890075 ], "wc_reply_reviewers_avg": [ 40.0, 24.320773014030618 ], "wc_reply_authors_avg": [ 548.75, 167.26083671917942 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 1.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13196003782460604883&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=HPdxC1THU8T", "email": "deepmind.com;uni-tuebingen.de;google.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Google;University of Tuebingen", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.uni-tuebingen.de/", "aff_unique_abbr": "DeepMind;Uni T\u00fcbingen", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;Germany" }, { "title": "Perfectly Secure Steganography Using Minimum Entropy Coupling", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11490", "id": "HQ67mj5rJdR", "poster": "", "openreview": "https://openreview.net/forum?id=HQ67mj5rJdR", "slides": "https://iclr.cc/virtual/2023/poster/11490", "video": "https://iclr.cc/virtual/2023/poster/11490", "author_site": "Christian Schroeder de Witt, Samuel Sokota, Zico Kolter, Jakob Foerster, Martin Strohmeier", "tldr": "A scalable, perfect security approach to information-theoretic steganography based on minimum entropy coupling ", "abstract": "Steganography is the practice of encoding secret information into innocuous content in such a manner that an adversarial third party would not realize that there is hidden meaning. While this problem has classically been studied in security literature, recent advances in generative models have led to a shared interest among security and machine learning researchers in developing scalable steganography techniques. In this work, we show that a steganography procedure is perfectly secure under Cachin (1998)'s information theoretic-model of steganography if and only if it is induced by a coupling. Furthermore, we show that, among perfectly secure procedures, a procedure is maximally efficient if and only if it is induced by a minimum entropy coupling. These insights yield what are, to the best of our knowledge, the first steganography algorithms to achieve perfect security guarantees with non-trivial efficiency; additionally, these algorithms are highly scalable. To provide empirical validation, we compare a minimum entropy coupling-based approach to three modern baselines---arithmetic coding, Meteor, and adaptive dynamic grouping---using GPT-2, WaveRNN, and Image Transformer as communication channels. We find that the minimum entropy coupling-based approach achieves superior encoding efficiency, despite its stronger security constraints. In aggregate, these results suggest that it may be natural to view information-theoretic steganography through the lens of minimum entropy coupling.", "keywords": "Information-Theoretic Steganography;Minimum Entropy Coupling", "primary_area": "", "supplementary_material": "/attachment/9e8916422b9bf239c1a036ca31fbbb45932d285e.zip", "author": "Christian Schroeder de Witt;Samuel Sokota;J Zico Kolter;Jakob Nicolaus Foerster;Martin Strohmeier", "authorids": "~Christian_Schroeder_de_Witt1;~Samuel_Sokota1;~J_Zico_Kolter1;~Jakob_Nicolaus_Foerster1;~Martin_Strohmeier1", "gender": "M;M;M;;M", "homepage": "https://www.schroederdewitt.com;https://ssokota.github.io/;https://www.jakobfoerster.com;https://www.cs.ox.ac.uk/people/martin.strohmeier/;http://www.zicokolter.com", "dblp": ";243/5881;176/5095;117/8959;67/2526", "google_scholar": "DE60h_0AAAAJ;;6z4lQzMAAAAJ;https://scholar.google.co.uk/citations?user=QUNoQIYAAAAJ;UXh1I6UAAAAJ", "orcid": ";;;;", "linkedin": ";samuel-sokota-87a153149/;;;", "or_profile": "~Christian_Schroeder_de_Witt1;~Samuel_Sokota1;~Jakob_Nicolaus_Foerster1;~Martin_Strohmeier1;~Zico_Kolter1", "aff": "University of Oxford;Carnegie Mellon University;University of Oxford, University of Oxford;University of Oxford;Carnegie Mellon University", "aff_domain": "oxford.ac.uk;cmu.edu;eng.ox.ac.uk;ox.ac.uk;cmu.edu", "position": "Postdoc;PhD student;Associate Professor;PhD / PostDoc / Fellow;Full Professor", "bibtex": "@inproceedings{\nwitt2023perfectly,\ntitle={Perfectly Secure Steganography Using Minimum Entropy Coupling},\nauthor={Christian Schroeder de Witt and Samuel Sokota and J Zico Kolter and Jakob Nicolaus Foerster and Martin Strohmeier},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HQ67mj5rJdR}\n}", "github": "", "project": "", "reviewers": "tJp6;3EY8;x2x4;yanH", "pdf_size": 461850, "recommendation": "1;6;6;8", "confidence": "5;3;2;3", "correctness": "2;3;3;4", "technical_novelty": "1;3;3;4", "empirical_novelty": "1;3;3;3", "wc_summary_paper": "56;52;54;67", "wc_strength_and_weaknesses": "283;235;139;125", "wc_clarity_quality_novelty_and_reproducibility": "14;49;31;24", "wc_summary_review": "260;60;19;29", "wc_review": "613;396;243;245", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "391;386;24;357", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.25, 2.5860201081971503 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 1.0897247358851685 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 57.25, 5.80409338312195 ], "wc_strength_and_weaknesses_avg": [ 195.5, 65.9147176281595 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.5, 12.776932339180638 ], "wc_summary_review_avg": [ 92.0, 98.16567628249703 ], "wc_review_avg": [ 374.25, 151.1677462291477 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 289.5, 153.83513902876678 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8206028760472865, "corr_recommendation_correctness": 0.9570202978345285, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11951425499575479643&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=HQ67mj5rJdR", "email": "oxford.ac.uk;cmu.edu;eng.ox.ac.uk;ox.ac.uk;cmu.edu", "author_num": 5, "aff_unique_index": "0;1;0;0;1", "aff_unique_norm": "University of Oxford;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.cmu.edu", "aff_unique_abbr": "Oxford;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Accelerated Single-Call Methods for Constrained Min-Max Optimization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11037", "id": "HRwN7IQLUKA", "poster": "/media/PosterPDFs/ICLR%202023/11037.png?t=1680887805.4153726", "openreview": "https://openreview.net/forum?id=HRwN7IQLUKA", "slides": "https://iclr.cc/virtual/2023/poster/11037", "video": "https://iclr.cc/virtual/2023/poster/11037", "author_site": "Yang Cai, Weiqiang Zheng", "tldr": "We propose the first single-call single-projection algorithms with optimal convergence rate for constrained min-max optimization problems in the nonconvex-nonconcave setting.", "abstract": "We study first-order methods for constrained min-max optimization. Existing methods either require two gradient calls or two projections in each iteration, which may be costly in some applications. In this paper, we first show that a variant of the \\emph{Optimistic Gradient (OG)} method, a \\emph{single-call single-projection} algorithm, has $O(\\frac{1}{\\sqrt{T}})$ best-iterate convergence rate for inclusion problems with operators that satisfy the weak Minty variation inequality (MVI). Our second result is the first single-call single-projection algorithm -- the \\emph{Accelerated Reflected Gradient (ARG)} method that achieves the \\emph{optimal $O(\\frac{1}{T})$} last-iterate convergence rate for inclusion problems that satisfy negative comonotonicity. Both the weak MVI and negative comonotonicity are well-studied assumptions and capture a rich set of non-convex non-concave min-max optimization problems. Finally, we show that the \\emph{Reflected Gradient (RG)} method, another \\emph{single-call single-projection} algorithm, has $O(\\frac{1}{\\sqrt{T}})$ last-iterate convergence rate for constrained convex-concave min-max optimization, answering an open problem of [Hsieh et al., 2019]. Our convergence rates hold for standard measures such as the tangent residual and the natural residual. ", "keywords": "min-max optimization;nonconvex-nonconcave;variational inequalities;saddle point problem;first-order method", "primary_area": "", "supplementary_material": "/attachment/7efc6d177cd96f5a1cd9a8704271b1b1addea519.zip", "author": "Yang Cai;Weiqiang Zheng", "authorids": "~Yang_Cai1;~Weiqiang_Zheng1", "gender": ";M", "homepage": ";https://weiqiang-zheng.com/", "dblp": ";277/5088", "google_scholar": ";YrfhnIwAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Yang_Cai1;~Weiqiang_Zheng1", "aff": ";Yale University", "aff_domain": ";yale.edu", "position": ";PhD student", "bibtex": "@inproceedings{\ncai2023accelerated,\ntitle={Accelerated Single-Call Methods for Constrained Min-Max Optimization},\nauthor={Yang Cai and Weiqiang Zheng},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HRwN7IQLUKA}\n}", "github": "", "project": "", "reviewers": "Sm3u;GPXn;qU6A", "pdf_size": 339442, "recommendation": "3;5;8", "confidence": "3;4;4", "correctness": "4;4;4", "technical_novelty": "2;2;4", "empirical_novelty": "0;0;0", "wc_summary_paper": "68;151;223", "wc_strength_and_weaknesses": "315;230;125", "wc_clarity_quality_novelty_and_reproducibility": "46;426;13", "wc_summary_review": "58;129;40", "wc_review": "487;936;401", "wc_reply_reviewers": "100;0;0", "wc_reply_authors": "1283;1213;8", "reply_reviewers": "1;0;0", "reply_authors": "3;2;1", "recommendation_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 147.33333333333334, 63.331578923068776 ], "wc_strength_and_weaknesses_avg": [ 223.33333333333334, 77.7102881791651 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 161.66666666666666, 187.39678640669257 ], "wc_summary_review_avg": [ 75.66666666666667, 38.42163742245016 ], "wc_review_avg": [ 608.0, 234.5733716061281 ], "wc_reply_reviewers_avg": [ 33.333333333333336, 47.14045207910317 ], "wc_reply_authors_avg": [ 834.6666666666666, 585.2397419481657 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.8029550685469661, "corr_recommendation_correctness": 0.0, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6762197051257296944&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "pdf": "https://openreview.net/pdf?id=HRwN7IQLUKA", "email": ";yale.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Yale University", "aff_unique_dep": "", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Softened Symbol Grounding for Neuro-symbolic Systems", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10685", "id": "HTJE5Krui0g", "poster": "/media/PosterPDFs/ICLR%202023/10685.png?t=1682852219.4783795", "openreview": "https://openreview.net/forum?id=HTJE5Krui0g", "slides": "https://iclr.cc/virtual/2023/poster/10685", "video": "https://iclr.cc/virtual/2023/poster/10685", "author_site": "Zenan Li, Yuan Yao, Taolue Chen, Jingwei Xu, Chun Cao, Xiaoxing Ma, Jian Lu", "tldr": "", "abstract": "Neuro-symbolic learning generally consists of two separated worlds, i.e., neural network training and symbolic constraint solving, \nwhose success hinges on symbol grounding, a fundamental problem in AI. This paper presents a novel, softened symbol grounding process, bridging the gap between the two worlds, and resulting in an effective and efficient neuro-symbolic learning framework. Technically, the framework features (1) modeling of symbol solution states as a Boltzmann distribution, which avoids expensive state searching and facilitates mutually beneficial interactions between network training and symbolic reasoning; (2) a new MCMC technique leveraging projection and SMT solvers, which efficiently samples from disconnected symbol solution spaces; (3) an annealing mechanism that can escape from sub-optimal symbol groundings. Experiments with three representative neuro-symbolic learning tasks demonstrate that, owing to its superior symbol grounding capability, our framework successfully solves problems well beyond the frontier of the existing proposals. ", "keywords": "neuro-symbolic learning;symbol grounding problem;projection-based sampling", "primary_area": "", "supplementary_material": "", "author": "Zenan Li;Yuan Yao;Taolue Chen;Jingwei Xu;Chun Cao;Xiaoxing Ma;Jian L\\\"{u}", "authorids": "~Zenan_Li3;~Yuan_Yao7;~Taolue_Chen2;~Jingwei_Xu3;~Chun_Cao1;~Xiaoxing_Ma1;lj@nju.edu.cn", "gender": "M;M;;M;M;;", "homepage": "https://lizn-zn.github.io/;;;http://ics.nju.edu.cn/people/jingweixu/;https://ccao.cc;;", "dblp": "242/2285;25/4120-1;;148/9997-1;;;", "google_scholar": "eu4eqTcAAAAJ;;;15maGTwAAAAJ;;;", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Zenan_Li3;~Yuan_Yao7;~Taolue_Chen2;~Jingwei_Xu3;~Chun_Cao1;~Xiaoxing_Ma1;lj@nju.edu.cn", "aff": "Microsoft Research;Nanjing University;;Nanjing University;Nanjing University;;", "aff_domain": "research.microsoft.com;nju.edu.cn;;nju.edu.cn;nju.edu.cn;;", "position": "Intern;Associate Professor;;Assistant Professor;Full Professor;;", "bibtex": "@inproceedings{\nli2023softened,\ntitle={Softened Symbol Grounding for Neuro-symbolic Systems},\nauthor={Zenan Li and Yuan Yao and Taolue Chen and Jingwei Xu and Chun Cao and Xiaoxing Ma and Jian L{\\textbackslash}''{\\{}u{\\}}},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HTJE5Krui0g}\n}", "github": "", "project": "", "reviewers": "ypT3;78tc;qkDm;262g", "pdf_size": 1257199, "recommendation": "5;6;8;10", "confidence": "2;3;3;5", "correctness": "2;4;3;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "1;0;3;3", "wc_summary_paper": "123;116;102;97", "wc_strength_and_weaknesses": "178;695;255;164", "wc_clarity_quality_novelty_and_reproducibility": "478;6;16;69", "wc_summary_review": "66;57;12;34", "wc_review": "845;874;385;364", "wc_reply_reviewers": "112;87;27;0", "wc_reply_authors": "139;555;477;293", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.25, 1.920286436967152 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.75, 1.299038105676658 ], "wc_summary_paper_avg": [ 109.5, 10.452272480183437 ], "wc_strength_and_weaknesses_avg": [ 323.0, 217.55114341230203 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 142.25, 195.3181699177012 ], "wc_summary_review_avg": [ 42.25, 21.00446381129497 ], "wc_review_avg": [ 617.0, 242.83018757971587 ], "wc_reply_reviewers_avg": [ 56.5, 44.924937395615814 ], "wc_reply_authors_avg": [ 366.0, 161.94134740701648 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.9258889211887232, "corr_recommendation_correctness": 0.5888015039841447, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17640728868212937833&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=HTJE5Krui0g", "email": "research.microsoft.com;nju.edu.cn;;nju.edu.cn;nju.edu.cn;;", "author_num": 7, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Microsoft;Nanjing University", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.nju.edu.cn", "aff_unique_abbr": "MSR;Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;China" }, { "id": "HTKSDFhGYhQ", "title": "Identifiability of Label Noise Transition Matrix", "track": "main", "status": "Reject", "tldr": "This paper provides understandings for when a label noise transition matrix is identifiable, and what factors contribute to its identifiability. ", "abstract": "The noise transition matrix plays a central role in the problem of learning with noisy labels. Among many other reasons, a large number of existing solutions rely on access to it. Identifying and estimating the transition matrix without ground truth labels is a critical and challenging task. When label noise transition depends on each instance, the problem of identifying the instance-dependent noise transition matrix becomes substantially more challenging. Despite recent works proposing solutions for learning from instance-dependent noisy labels, the field lacks a unified understanding of when such a problem remains identifiable. The goal of this paper is to characterize the identifiability of the label noise transition matrix. Building on Kruskal's identifiability results, we show the necessity of multiple noisy labels in identifying the noise transition matrix for the generic case at the instance level. We further instantiate the results to \nrelate to the successes of the state-of-the-art solutions and how additional assumptions alleviated the requirement of multiple noisy labels. Our result also reveals that disentangled features are helpful in the above identification task and we provide empirical evidence. ", "keywords": "identifiability;label noise transition matrix;noisy labels", "primary_area": "", "supplementary_material": "/attachment/f7c5b9bfab8bfc02fd39cc850eacd6ab3cffd8aa.zip", "author": "Yang Liu;Hao Cheng;Kun Zhang", "authorids": "~Yang_Liu3;~Hao_Cheng5;~Kun_Zhang1", "gender": "M;M;M", "homepage": "http://www.yliuu.com;https://haochenglouis.github.io;http://www.andrew.cmu.edu/user/kunz1/", "dblp": "51/3710-18;;96/3115-1", "google_scholar": "jKrIVCIAAAAJ;ftlVqVIAAAAJ;RGoypN4AAAAJ", "orcid": "0000-0001-8420-6011;0000-0001-8864-7818;", "linkedin": ";;", "or_profile": "~Yang_Liu3;~Hao_Cheng5;~Kun_Zhang1", "aff": "University of California, Santa Cruz;University of California, Santa Cruz;Carnegie Mellon University", "aff_domain": "ucsc.edu;ucsc.edu;cmu.edu", "position": "Assistant Professor;PhD student;Associate Professor", "bibtex": "@misc{\nliu2023identifiability,\ntitle={Identifiability of Label Noise Transition Matrix },\nauthor={Yang Liu and Hao Cheng and Kun Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=HTKSDFhGYhQ}\n}", "github": "", "project": "", "reviewers": "14GL;4T3g;G5Yr;GkBx", "site": "https://openreview.net/forum?id=HTKSDFhGYhQ", "pdf_size": 474901, "recommendation": "3;5;5;6", "confidence": "3;4;3;3", "correctness": "2;3;3;4", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "85;67;57;126", "wc_strength_and_weaknesses": "611;261;122;155", "wc_clarity_quality_novelty_and_reproducibility": "56;104;9;29", "wc_summary_review": "148;44;64;32", "wc_review": "900;476;252;342", "wc_reply_reviewers": "0;88;114;0", "wc_reply_authors": "622;1159;1111;657", "reply_reviewers": "0;1;1;0", "reply_authors": "1;4;4;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 83.75, 26.37588861062315 ], "wc_strength_and_weaknesses_avg": [ 287.25, 193.84320338871828 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.5, 35.61249780624774 ], "wc_summary_review_avg": [ 72.0, 45.34313619501854 ], "wc_review_avg": [ 492.5, 248.40440817344606 ], "wc_reply_reviewers_avg": [ 50.5, 51.32981589680602 ], "wc_reply_authors_avg": [ 887.25, 248.63866855338492 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.13245323570650439, "corr_recommendation_correctness": 0.9733285267845754, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15938507645581117140&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of California, Santa Cruz;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsc.edu;https://www.cmu.edu", "aff_unique_abbr": "UCSC;CMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Santa Cruz;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "HTbp9Y7g9P", "title": "Hard Regularization to Prevent Collapse in Online Deep Clustering without Data Augmentation", "track": "main", "status": "Reject", "tldr": "regularizing hard cluster assignments with a Bayesian optimization problem to prevent collapse in online deep clustering without data augmentaiton", "abstract": "Online deep clustering refers to the joint use of a feature extraction network and\na clustering model to assign cluster labels to each new data point or batch as it\nis processed. While faster and more versatile than offline methods, online clus-\ntering can easily reach the collapsed solution where the encoder maps all inputs\nto the same point and all are put into a single cluster. Successful existing mod-\nels have employed various techniques to avoid this problem, most of which re-\nquire data augmentation or which aim to make the average soft assignment across\nthe dataset the same for each cluster. We propose a method that does not require\ndata augmentation, and that, different from existing methods, regularizes the hard\nassignments. Using a Bayesian framework, we derive an intuitive optimization\nobjective that can be straightforwardly included in the training of the encoder net-\nwork. Tested on four image datasets, we show that it consistently avoids collapse\nmore robustly than other methods and that it leads to more accurate clustering. We\nalso conduct further experiments and analysis justifying our choice to regularize\nthe hard cluster assignments.", "keywords": "deep learning;clustering;online", "primary_area": "", "supplementary_material": "/attachment/7db9859fd010e7e2ae435f33226991c756361815.zip", "author": "Louis Mahon;Thomas Lukasiewicz", "authorids": "~Louis_Mahon1;~Thomas_Lukasiewicz2", "gender": ";", "homepage": ";https://www.cs.ox.ac.uk/people/thomas.lukasiewicz/", "dblp": ";l/ThomasLukasiewicz", "google_scholar": "https://scholar.google.co.uk/citations?hl=en;arjucpEAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Louis_Mahon1;~Thomas_Lukasiewicz2", "aff": "School of Informatics, University of Edinburgh;Department of Computer Science, University of Oxford", "aff_domain": "inf.ed.ac.uk;cs.ox.ac.uk", "position": "Postdoc;Full Professor", "bibtex": "@misc{\nmahon2023hard,\ntitle={Hard Regularization to Prevent Collapse in Online Deep Clustering without Data Augmentation},\nauthor={Louis Mahon and Thomas Lukasiewicz},\nyear={2023},\nurl={https://openreview.net/forum?id=HTbp9Y7g9P}\n}", "github": "", "project": "", "reviewers": "VqHu;Zch2;pkxS;uDkk", "site": "https://openreview.net/forum?id=HTbp9Y7g9P", "pdf_size": 288155, "recommendation": "3;3;3;5", "confidence": "4;4;4;3", "correctness": "2;2;3;4", "technical_novelty": "2;1;2;3", "empirical_novelty": "1;2;1;2", "wc_summary_paper": "37;55;55;44", "wc_strength_and_weaknesses": "271;87;76;250", "wc_clarity_quality_novelty_and_reproducibility": "69;93;53;39", "wc_summary_review": "77;67;354;64", "wc_review": "454;302;538;397", "wc_reply_reviewers": "0;0;130;0", "wc_reply_authors": "520;522;683;166", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.5, 0.5 ], "wc_summary_paper_avg": [ 47.75, 7.660776723022281 ], "wc_strength_and_weaknesses_avg": [ 171.0, 89.89160138744887 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.5, 20.068632240389476 ], "wc_summary_review_avg": [ 140.5, 123.35821821021898 ], "wc_review_avg": [ 422.75, 85.88182287306202 ], "wc_reply_reviewers_avg": [ 32.5, 56.29165124598851 ], "wc_reply_authors_avg": [ 472.75, 189.04943136650795 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.8703882797784891, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2960577907251248803&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of Edinburgh;University of Oxford", "aff_unique_dep": "School of Informatics;Department of Computer Science", "aff_unique_url": "https://www.ed.ac.uk;https://www.ox.ac.uk", "aff_unique_abbr": "Edinburgh;Oxford", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Edinburgh;Oxford", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "HUCgU5EQluN", "title": "Effective Self-Supervised Transformers For Sparse Time Series Data", "track": "main", "status": "Reject", "tldr": "We propose a Transformer based model for sparse time series that utilizes an input binning scheme to aggregate the time series inputs.", "abstract": "Electronic health records (EHRs) typically contain a wide range of time series data that is characterized by high sparsity and irregular observations. Self-supervised Transformer architectures have shown outstanding performance in a variety of structured tasks in natural language processing and computer vision. However, their use in modelling sparse irregular time series with tabular data has not been widely explored. One of the major challenges is the quadratic scaling of self-attention layers that can significantly limit the input sequence length. In this work, we introduce TESS, Transformers for EHR data with Self Supervised learning, a self-supervised Transformer-based architecture designed to extract robust representations from EHR data. We propose an input binning scheme that aggregates the time series inputs and sparsity information into a regular sequence with fixed length, enabling the training of larger and deeper Transformers. We demonstrate that significant compression of EHR input data is possible without sacrificing useful information, likely due to the highly correlated nature of observations in small time bins. We then introduce self-supervised prediction tasks that provide rich and informative signals for model pre-training. TESS outperforms state-of-the-art deep learning models on multiple downstream tasks from the MIMIC-IV and PhysioNet-2012 EHR datasets.", "keywords": "Representation learning;Transformers;Sparse Time Series", "primary_area": "", "supplementary_material": "/attachment/a6e566cf5cd9d27362244d897d67aadaefedcf93.zip", "author": "Alex Labach;Aslesha Pokhrel;Seung Eun Yi;Saba Zuberi;Maksims Volkovs;Rahul G Krishnan", "authorids": "~Alex_Labach1;~Aslesha_Pokhrel1;~Seung_Eun_Yi1;~Saba_Zuberi1;~Maksims_Volkovs3;~Rahul_G_Krishnan1", "gender": ";F;F;;M;M", "homepage": ";;;;http://www.cs.toronto.edu/~rahulgk/index.html;https://www.cs.toronto.edu/~mvolkovs", "dblp": "239/8468;;;;172/0880;22/1815", "google_scholar": "HxQdunIAAAAJ;;;https://scholar.google.com/citations?hl=en;ilJgXHkAAAAJ;https://scholar.google.ca/citations?user=m9I8jgcAAAAJ", "orcid": "0000-0002-4746-2718;;;;;", "linkedin": "alex-labach-a72aa68b/;aslesha-pokhrel/;seungeunyi/;;rahulgk/;", "or_profile": "~Alex_Labach1;~Aslesha_Pokhrel1;~Seung_Eun_Yi1;~Saba_Zuberi1;~Rahul_G_Krishnan1;~Maksims_Volkovs1", "aff": "Layer 6 AI;;Meta;Layer 6 AI;Department of Computer Science, University of Toronto;Layer6 AI", "aff_domain": "layer6.ai;;meta.com;layer6.ai;cs.toronto.edu;layer6.ai", "position": "Researcher;;Research Engineer;Researcher;Assistant Professor;Principal Researcher", "bibtex": "@misc{\nlabach2023effective,\ntitle={Effective Self-Supervised Transformers For Sparse Time Series Data},\nauthor={Alex Labach and Aslesha Pokhrel and Seung Eun Yi and Saba Zuberi and Maksims Volkovs and Rahul G Krishnan},\nyear={2023},\nurl={https://openreview.net/forum?id=HUCgU5EQluN}\n}", "github": "", "project": "", "reviewers": "fWW7;HAwr;6eHs;EBR4", "site": "https://openreview.net/forum?id=HUCgU5EQluN", "pdf_size": 980301, "recommendation": "3;5;5;6", "confidence": "5;5;5;3", "correctness": "2;2;2;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;2;0;2", "wc_summary_paper": "62;58;91;80", "wc_strength_and_weaknesses": "548;350;359;421", "wc_clarity_quality_novelty_and_reproducibility": "40;38;125;57", "wc_summary_review": "39;441;29;70", "wc_review": "689;887;604;628", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "570;1015;895;743", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;2;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 72.75, 13.40475661845451 ], "wc_strength_and_weaknesses_avg": [ 419.5, 79.06484680311472 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.0, 35.41892149684968 ], "wc_summary_review_avg": [ 144.75, 171.70669031811195 ], "wc_review_avg": [ 702.0, 111.21375814169755 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 805.75, 166.78335498484253 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.6622661785325219, "corr_recommendation_correctness": 0.6622661785325219, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zF6IygOcVoAJ:scholar.google.com/&scioq=Effective+Self-Supervised+Transformers+For+Sparse+Time+Series+Data&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "Layer 6 AI;Meta;University of Toronto;Layer6 AI", "aff_unique_dep": ";Meta Platforms, Inc.;Department of Computer Science;", "aff_unique_url": "https://layer6.ai;https://meta.com;https://www.utoronto.ca;https://layer6.ai", "aff_unique_abbr": "Layer 6 AI;Meta;U of T;Layer6", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "Canada;United States" }, { "id": "HUsh1c7p0gc", "title": "T2D: Spatiotemporal Feature Learning Based on Triple 2D Decomposition", "track": "main", "status": "Reject", "tldr": "A new spatiotemporal feature learning method based on triple 2D decomposition.", "abstract": "In this paper, we propose triple 2D decomposition (T2D) of a 3D vision Transformer (ViT) for efficient spatiotemporal feature learning. The idea is to divide the input 3D video data into three 2D data planes and use three 2D filters, implemented by 2D ViT, to extract spatial and motion features. Such a design not only effectively reduces the computational complexity of a 3D ViT, but also guides the network to focus on learning correlations among more relevant tokens. Compared with other decomposition methods, the proposed T2D is shown to be more powerful at a similar computational complexity. The CLIP-initialized T2D-B model achieves state-of-the-art top-1 accuracy of 85.0% and 70.5% on Kinetics-400 and Something-Something-v2 datasets, respectively. It also outperforms other methods by a large margin on FineGym (+17.9%) and Diving-48 (+1.3%) datasets. Under the zero-shot setting, the T2D model obtains a 2.5% top-1 accuracy gain over X-CLIP on HMDB-51 dataset. In addition, T2D is a general decomposition method that can be plugged into any ViT structure of any model size. We demonstrate this by building a tiny size of T2D model based on a hierarchical ViT structure named DaViT. The resulting DaViT-T2D-T model achieves 82.0\\% and 71.3\\% top-1 accuracy with only 91 GFLOPs on Kinectics-400 and Something-Something-v2 datasets, respectively. Source code will be made publicly available. ", "keywords": "spatiotemporal feature learning;video recognition;action recognition;video Transformer", "primary_area": "", "supplementary_material": "", "author": "Yucheng Zhao;Chong Luo;Chuanxin Tang;Dongdong Chen;Noel C Codella;Lu Yuan;Zheng-Jun Zha", "authorids": "~Yucheng_Zhao1;~Chong_Luo1;~Chuanxin_Tang1;~Dongdong_Chen1;~Noel_C_Codella1;~Lu_Yuan1;~Zheng-Jun_Zha2", "gender": "M;F;M;M;M;M;M", "homepage": ";https://www.microsoft.com/en-us/research/people/cluo/;https://www.microsoft.com/en-us/research/people/chutan/;http://www.dongdongchen.bid/;http://www.noelcodella.com/;https://www.microsoft.com/en-us/research/people/luyuan/;", "dblp": "177/8576;79/3712;159/3894;92/1489-1;;;23/1818", "google_scholar": "QWemjjQAAAAJ;01iBf38AAAAJ;3ZC8B7MAAAAJ;https://scholar.google.com.sg/citations?user=sYKpKqEAAAAJ;8BnjC-4AAAAJ;k9TsUVsAAAAJ;", "orcid": ";0000-0003-0939-474X;;;;;", "linkedin": ";;;;noel-c-f-codella-ph-d-1b1b1723/;;", "or_profile": "~Yucheng_Zhao1;~Chong_Luo1;~Chuanxin_Tang1;~Dongdong_Chen1;~Noel_C_Codella1;~Lu_Yuan1;~Zheng-Jun_Zha2", "aff": "Microsoft;Microsoft Research Asia;Microsoft Research Asia;Microsoft Research;Microsoft;Microsoft;University of Science and Technology of China", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;ustc.edu.cn", "position": "Intern;Principal Researcher;Researcher;Principal Researcher;Principal Researcher;Principal Research Manager;Full Professor", "bibtex": "@misc{\nzhao2023td,\ntitle={T2D: Spatiotemporal Feature Learning Based on Triple 2D Decomposition},\nauthor={Yucheng Zhao and Chong Luo and Chuanxin Tang and Dongdong Chen and Noel C Codella and Lu Yuan and Zheng-Jun Zha},\nyear={2023},\nurl={https://openreview.net/forum?id=HUsh1c7p0gc}\n}", "github": "", "project": "", "reviewers": "LYJp;LefE;Z88y;NtS5", "site": "https://openreview.net/forum?id=HUsh1c7p0gc", "pdf_size": 31180823, "recommendation": "3;5;6;8", "confidence": "4;2;3;3", "correctness": "3;3;4;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "70;197;97;77", "wc_strength_and_weaknesses": "143;3;95;143", "wc_clarity_quality_novelty_and_reproducibility": "14;3;14;24", "wc_summary_review": "14;3;29;54", "wc_review": "241;206;235;298", "wc_reply_reviewers": "0;0;28;0", "wc_reply_authors": "504;941;296;275", "reply_reviewers": "0;0;1;0", "reply_authors": "1;2;1;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 110.25, 51.055729355283916 ], "wc_strength_and_weaknesses_avg": [ 96.0, 57.15767664977295 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 13.75, 7.428828979051813 ], "wc_summary_review_avg": [ 25.0, 19.11805429430516 ], "wc_review_avg": [ 245.0, 33.3391661563393 ], "wc_reply_reviewers_avg": [ 7.0, 12.12435565298214 ], "wc_reply_authors_avg": [ 504.0, 267.7097308653535 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.39223227027636803, "corr_recommendation_correctness": 0.16012815380508713, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GNphm0hgkesJ:scholar.google.com/&scioq=T2D:+Spatiotemporal+Feature+Learning+Based+on+Triple+2D+Decomposition&hl=en&as_sdt=0,11", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0;1", "aff_unique_norm": "Microsoft;University of Science and Technology of China", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;http://www.ustc.edu.cn", "aff_unique_abbr": "Microsoft;USTC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;1;0;0;0;1", "aff_country_unique": "United States;China" }, { "id": "HVVDVaegjaW", "title": "MonoFlow: A Unified Generative Modeling Framework for GAN Variants", "track": "main", "status": "Reject", "tldr": "", "abstract": "Generative adversarial networks (GANs) play a minmax two-player game via adversarial training. The conventional understanding of adversarial training is that the discriminator is trained to estimate a divergence and the generator learns to minimize this divergence. We argue that despite the fact that many variants of GANs are developed following this paradigm, the existing theoretical understanding of GANs and the practical algorithms are inconsistent. In order to gain deeper theoretical insights and algorithmic inspiration for these GAN variants, we leverage Wasserstein gradient flows which characterize the evolution of particles in the sample space. Based on this, we introduce a unified generative modeling framework \u2013 MonoFlow: the particle evolution is rescaled via an arbitrary monotonically increasing mapping. Under our framework, adversarial training can be viewed as a procedure first obtaining MonoFlow's vector field via the discriminator and then the generator learns to parameterize the flow defined by the corresponding vector field. We also reveal the fundamental difference between variational divergence minimization and adversarial training. These analysis help us to identify what types of generator loss functions can lead to the successful training of GANs and suggest that GANs may have more loss designs beyond those developed in the literature, e.g., non-saturated loss, as long as they realize MonoFlow. Consistent empirical studies are also included to validate the effectiveness of our framework.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Mingxuan Yi;Zhanxing Zhu;Song Liu", "authorids": "~Mingxuan_Yi1;~Zhanxing_Zhu1;~Song_Liu1", "gender": "M;M;M", "homepage": "https://mingxuan-yi.github.io/;https://zhanxingzhu.github.io/;http://allmodelsarewrong.net", "dblp": "https://dblp.uni-trier.de/pid/259/3016;87/7756.html;80/1141-2", "google_scholar": "l0xKeZcAAAAJ;a2sHceIAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Mingxuan_Yi1;~Zhanxing_Zhu1;~Song_Liu1", "aff": "University of Bristol;University of Southampton;University of Bristol, UK", "aff_domain": "bristol.ac.uk;soton.ac.uk;bristol.ac.uk", "position": "PhD student;Associate Professor;Lecturer", "bibtex": "@misc{\nyi2023monoflow,\ntitle={MonoFlow: A Unified Generative Modeling Framework for {GAN} Variants},\nauthor={Mingxuan Yi and Zhanxing Zhu and Song Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=HVVDVaegjaW}\n}", "github": "", "project": "", "reviewers": "4htw;Wf4w;wjv3", "site": "https://openreview.net/forum?id=HVVDVaegjaW", "pdf_size": 2097116, "recommendation": "3;6;6", "confidence": "4;2;4", "correctness": "2;3;3", "technical_novelty": "3;4;3", "empirical_novelty": "2;4;3", "wc_summary_paper": "95;106;85", "wc_strength_and_weaknesses": "829;258;293", "wc_clarity_quality_novelty_and_reproducibility": "434;40;58", "wc_summary_review": "121;72;133", "wc_review": "1479;476;569", "wc_reply_reviewers": "1027;0;0", "wc_reply_authors": "3117;0;1001", "reply_reviewers": "4;0;0", "reply_authors": "7;0;2", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 95.33333333333333, 8.576453553512405 ], "wc_strength_and_weaknesses_avg": [ 460.0, 261.3133495760725 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 177.33333333333334, 181.63944750215708 ], "wc_summary_review_avg": [ 108.66666666666667, 26.386023236217735 ], "wc_review_avg": [ 841.3333333333334, 452.49407608154263 ], "wc_reply_reviewers_avg": [ 342.3333333333333, 484.13244285238954 ], "wc_reply_authors_avg": [ 1372.6666666666667, 1299.3650586173062 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.8856180831641267 ], "reply_authors_avg": [ 3.0, 2.943920288775949 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5000000000000001, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-0iTqLNVeiUJ:scholar.google.com/&scioq=MonoFlow:+A+Unified+Generative+Modeling+Framework+for+GAN+Variants&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Bristol;University of Southampton", "aff_unique_dep": ";", "aff_unique_url": "https://www.bristol.ac.uk;https://www.southampton.ac.uk", "aff_unique_abbr": "Bristol;Southampton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Can Neural Networks Learn Implicit Logic from Physical Reasoning?", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10947", "id": "HVoJCRLByVk", "poster": "/media/PosterPDFs/ICLR%202023/10947.png?t=1682956380.4204295", "openreview": "https://openreview.net/forum?id=HVoJCRLByVk", "slides": "https://iclr.cc/virtual/2023/poster/10947", "video": "https://iclr.cc/virtual/2023/poster/10947", "author_site": "Aaron Traylor, Roman Feiman, Ellie Pavlick", "tldr": "", "abstract": "Despite the success of neural network models in a range of domains, it remains an open question whether they can learn to represent abstract logical operators such as negation and disjunction. We test the hypothesis that neural networks without inherent inductive biases for logical reasoning can acquire an implicit representation of negation and disjunction. Here, implicit refers to limited, domain-specific forms of these operators, and work in psychology suggests these operators may be a precursor (developmentally and evolutionarily) to the type of abstract, domain-general logic that is characteristic of adult humans. To test neural networks, we adapt a test designed to diagnose the presence of negation and disjunction in animals and pre-verbal children, which requires inferring the location of a hidden object using constraints of the physical environment as well as implicit logic: if a ball is hidden in A or B, and shown not to be in A, can the subject infer that it is in B? Our results show that, despite the neural networks learning to track objects behind occlusion, they are unable to generalize to a task that requires implicit logic. We further show that models are unable to generalize to the test task even when they are trained directly on a logically identical (though visually dissimilar) task. However, experiments using transfer learning reveal that the models do recognize structural similarity between tasks which invoke the same logical reasoning pattern, suggesting that some desirable abstractions are learned, even if they are not yet sufficient to pass established tests of logical reasoning.", "keywords": "logic;logical operators;logical reasoning;intuitive physics;physical reasoning;representation learning;developmental psychology;cognitive science", "primary_area": "", "supplementary_material": "/attachment/eec763b5fb4873838b76345b9c35ea4c82029efd.zip", "author": "Aaron Traylor;Roman Feiman;Ellie Pavlick", "authorids": "~Aaron_Traylor1;roman_feiman@brown.edu;~Ellie_Pavlick1", "gender": "M;;F", "homepage": "https://attraylor.github.io;;http://cs.brown.edu/people/epavlick/", "dblp": ";;141/4059", "google_scholar": ";;sFyrSa8AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Aaron_Traylor1;roman_feiman@brown.edu;~Ellie_Pavlick1", "aff": "Brown University;;Brown University", "aff_domain": "brown.edu;;brown.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\ntraylor2023can,\ntitle={Can Neural Networks Learn Implicit Logic from Physical Reasoning?},\nauthor={Aaron Traylor and Roman Feiman and Ellie Pavlick},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HVoJCRLByVk}\n}", "github": "", "project": "", "reviewers": "9VQw;mWJf;1KAw;7YcZ;ggkT", "pdf_size": 322658, "recommendation": "6;6;6;8;8", "confidence": "4;3;3;4;3", "correctness": "3;3;4;4;3", "technical_novelty": "2;3;3;3;3", "empirical_novelty": "3;3;3;3;3", "wc_summary_paper": "71;101;86;123;139", "wc_strength_and_weaknesses": "267;88;137;595;180", "wc_clarity_quality_novelty_and_reproducibility": "38;86;14;227;97", "wc_summary_review": "39;54;53;223;74", "wc_review": "415;329;290;1168;490", "wc_reply_reviewers": "0;0;0;457;0", "wc_reply_authors": "271;20;162;633;245", "reply_reviewers": "0;0;0;2;0", "reply_authors": "1;1;2;2;1", "recommendation_avg": [ 6.8, 0.9797958971132712 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 2.8, 0.39999999999999997 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 104.0, 24.52753554680943 ], "wc_strength_and_weaknesses_avg": [ 253.4, 180.64838775920478 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 92.4, 73.87178080972463 ], "wc_summary_review_avg": [ 88.6, 68.11930710158464 ], "wc_review_avg": [ 538.4, 322.3405652411747 ], "wc_reply_reviewers_avg": [ 91.4, 182.8 ], "wc_reply_authors_avg": [ 266.2, 203.2175189298403 ], "reply_reviewers_avg": [ 0.4, 0.8 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.16666666666666663, "corr_recommendation_correctness": 0.16666666666666663, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7814359676490446031&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=HVoJCRLByVk", "email": "brown.edu;;brown.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Brown University", "aff_unique_dep": "", "aff_unique_url": "https://www.brown.edu", "aff_unique_abbr": "Brown", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "HVvqbDQdhW2", "title": "DeepDFA: Dataflow Analysis-Guided Efficient Graph Learning for Vulnerability Detection", "track": "main", "status": "Reject", "tldr": "We present DeepDFA, a dataflow analysis-guided graph learning framework and embedding technique for vulnerability detection.", "abstract": "Deep learning-based vulnerability detection models have recently been shown to be effective and, in some cases, outperform static analysis tools. However, the highest-performing approaches use token-based transformer models, which do not leverage domain knowledge. Classical program analysis techniques such as dataflow analysis can detect many types of bugs and are the most commonly used methods in practice. Motivated by the causal relationship between bugs and dataflow analysis, we present DeepDFA, a dataflow analysis-guided graph learning framework and embedding that use program semantic features for vulnerability detection. We show that DeepDFA is performant and efficient. DeepDFA ranked first in recall, first in generalizing over unseen projects, and second in F1 among all the state-of-the-art models we experimented with. It is also the smallest model in terms of the number of parameters, and was trained in 9 minutes, 69x faster than the highest-performing baseline. DeepDFA can be used with other models. By integrating LineVul and DeepDFA, we achieved the best vulnerability detection performance of 96.4 F1 score, 98.69 precision, and 94.22 recall.", "keywords": "deep learning;vulnerability detection;dataflow analysis;program analysis", "primary_area": "", "supplementary_material": "", "author": "Benjamin Steenhoek;Wei Le;Hongyang Gao", "authorids": "~Benjamin_Steenhoek1;~Wei_Le1;~Hongyang_Gao1", "gender": "M;F;M", "homepage": "https://benjijang.com;https://weile.work;https://faculty.sites.iastate.edu/hygao/", "dblp": ";;200/7985", "google_scholar": "dzXbaEMAAAAJ;;jGmq0aEAAAAJ", "orcid": "0000-0001-6175-105X;;0000-0002-9020-9080", "linkedin": ";;hongyang-gao-74924690/", "or_profile": "~Benjamin_Steenhoek1;~Wei_Le1;~Hongyang_Gao1", "aff": "Iowa State University;Iowa State University;Iowa State University", "aff_domain": "iastate.edu;iastate.edu;iastate.edu", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@misc{\nsteenhoek2023deepdfa,\ntitle={Deep{DFA}: Dataflow Analysis-Guided Efficient Graph Learning for Vulnerability Detection},\nauthor={Benjamin Steenhoek and Wei Le and Hongyang Gao},\nyear={2023},\nurl={https://openreview.net/forum?id=HVvqbDQdhW2}\n}", "github": "", "project": "", "reviewers": "ePbc;EyDV;x4r7;FYuz", "site": "https://openreview.net/forum?id=HVvqbDQdhW2", "pdf_size": 477799, "recommendation": "3;3;3;3", "confidence": "4;4;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;0;2", "wc_summary_paper": "90;82;50;82", "wc_strength_and_weaknesses": "207;75;403;136", "wc_clarity_quality_novelty_and_reproducibility": "51;109;42;176", "wc_summary_review": "28;429;84;31", "wc_review": "376;695;579;425", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 76.0, 15.362291495737216 ], "wc_strength_and_weaknesses_avg": [ 205.25, 123.35796488269413 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 94.5, 53.621357685161236 ], "wc_summary_review_avg": [ 143.0, 166.6178261771531 ], "wc_review_avg": [ 518.75, 126.35342298489583 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13702586889793734783&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0", "aff_unique_norm": "Iowa State University", "aff_unique_dep": "", "aff_unique_url": "https://www.iastate.edu", "aff_unique_abbr": "ISU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "On the Trade-Off between Actionable Explanations and the Right to be Forgotten", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11145", "id": "HWt4BBZjVW", "poster": "", "openreview": "https://openreview.net/forum?id=HWt4BBZjVW", "slides": "https://iclr.cc/virtual/2023/poster/11145", "video": "https://iclr.cc/virtual/2023/poster/11145", "author_site": "Martin Pawelczyk, Tobias Leemann, Asia Biega, Gjergji Kasneci", "tldr": "We analyze the tradeoff between actionable explanations and the right to be forgotten, and provide algorithms to find a critical subset of training data points, which, when removed would lead to a maximum invalidation of recourses.", "abstract": "As machine learning (ML) models are increasingly being deployed in high-stakes applications, policymakers have suggested tighter data protection regulations (e.g., GDPR, CCPA). One key principle is the \u201cright to be forgotten\u201d which gives users the right to have their data deleted. Another key principle is the right to an actionable explanation, also known as algorithmic recourse, allowing users to reverse unfavorable decisions. To date, it is unknown whether these two principles can be operationalized simultaneously. Therefore, we introduce and study the problem of recourse invalidation in the context of data deletion requests. More specifically, we theoretically and empirically analyze the behavior of popular state-of-the-art algorithms and demonstrate that the recourses generated by these algorithms are likely to be invalidated if a small number of data deletion requests (e.g., 1 or 2) warrant updates of the predictive model. For the setting of differentiable models, we suggest a framework to identify a minimal subset of critical training points which, when removed, maximize the fraction of invalidated recourses.Using our framework, we empirically show that the removal of as little as 2 data instances from the training set can invalidate up to 95 percent of all recourses output by popular state-of-the-art algorithms. Thus, our work raises fundamental questions about the compatibility of ``the right to an actionable explanation'' in the context of the ``right to be forgotten'', while also providing constructive insights on the determining factors of recourse robustness.", "keywords": "Counterfactual Explanations;Algorihtmic Recourse;Explainability;Interpretability;Transparency", "primary_area": "", "supplementary_material": "/attachment/b6e592870517c13731f0f2e39c9c9e1fa910e287.zip", "author": "Martin Pawelczyk;Tobias Leemann;Asia Biega;Gjergji Kasneci", "authorids": "~Martin_Pawelczyk1;~Tobias_Leemann1;~Asia_Biega1;~Gjergji_Kasneci2", "gender": "M;M;F;M", "homepage": "https://sites.google.com/view/martinpawelczyk/;https://uni-tuebingen.de/en/209071;https://asiabiega.github.io/;https://www.gov.sot.tum.de/rds/prof-dr-gjergji-kasneci/", "dblp": "251/3229;303/4480;130/0373.html;69/3216", "google_scholar": "oYAf_hgAAAAJ;VsNjvo0AAAAJ;Whr_kkwAAAAJ;Zbc8GK4AAAAJ", "orcid": ";0000-0001-9333-228X;;0000-0002-3123-7268", "linkedin": ";tobias-leemann/;;", "or_profile": "~Martin_Pawelczyk1;~Tobias_Leemann1;~Asia_Biega1;~Gjergji_Kasneci2", "aff": "University of Tuebingen;University of Tuebingen;Max Planck Institute for Security and Privacy;University of Tuebingen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de;mpi-sp.org;uni-tuebingen.de", "position": "PhD student;PhD student;Tenure-Track Faculty;Professor", "bibtex": "@inproceedings{\npawelczyk2023on,\ntitle={On the Trade-Off between Actionable Explanations and the Right to be Forgotten},\nauthor={Martin Pawelczyk and Tobias Leemann and Asia Biega and Gjergji Kasneci},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HWt4BBZjVW}\n}", "github": "", "project": "", "reviewers": "CkVr;2k5p;8JMg;9bdZ", "pdf_size": 682578, "recommendation": "6;6;6;8", "confidence": "3;3;4;4", "correctness": "4;4;3;4", "technical_novelty": "4;3;3;4", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "88;199;111;72", "wc_strength_and_weaknesses": "365;364;110;162", "wc_clarity_quality_novelty_and_reproducibility": "74;65;367;233", "wc_summary_review": "47;81;68;44", "wc_review": "574;709;656;511", "wc_reply_reviewers": "132;64;58;0", "wc_reply_authors": "516;920;1190;790", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;3;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 117.5, 49.053542175871456 ], "wc_strength_and_weaknesses_avg": [ 250.25, 115.72029856511777 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 184.75, 124.64825510210723 ], "wc_summary_review_avg": [ 60.0, 15.247950681976906 ], "wc_review_avg": [ 612.5, 75.81061930890685 ], "wc_reply_reviewers_avg": [ 63.5, 46.78407848830626 ], "wc_reply_authors_avg": [ 854.0, 242.68910152703603 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14212576463146093615&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=HWt4BBZjVW", "email": "uni-tuebingen.de;uni-tuebingen.de;mpi-sp.org;uni-tuebingen.de", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Tuebingen;Max Planck Institute for Security and Privacy", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.mpi-sps.org", "aff_unique_abbr": "Uni T\u00fcbingen;MPI-SPS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "ImageNet-X: Understanding Model Mistakes with Factor of Variation Annotations", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11092", "id": "HXz7Vcm3VgM", "poster": "", "openreview": "https://openreview.net/forum?id=HXz7Vcm3VgM", "slides": "https://iclr.cc/virtual/2023/poster/11092", "video": "https://iclr.cc/virtual/2023/poster/11092", "author_site": "Badr Youbi Idrissi, Diane Bouchacourt, Randall Balestriero, Ivan Evtimov, Caner Hazirbas, Nicolas Ballas, Pascal Vincent, Michal Drozdzal, David Lopez-Paz, Mark Ibrahim", "tldr": "we annotate ImageNet images with factor labels to explain model mistakes", "abstract": "Deep learning vision systems are widely deployed across applications where reliability is critical. However, even today's best models can fail to recognize an object when its pose, lighting, or background varies. While existing benchmarks surface examples challenging for models, they do not explain why such mistakes arise. To address this need, we introduce ImageNet-X\u2014a set of sixteen human annotations of factors such as pose, background, or lighting the entire ImageNet-1k validation set as well as a random subset of 12k training images. Equipped with ImageNet-X, we investigate 2,200 current recognition models and study the types of mistakes as a function of model\u2019s (1) architecture, e.g. transformer vs. convolutional, (2) learning paradigm, e.g. supervised vs. self-supervised, and (3) training procedures, e.g., data augmentation. Regardless of these choices, we find models have consistent failure modes across ImageNet-X categories. We also find that while data augmentation can improve robustness to certain factors, they induce spill-over effects to other factors. For example, color-jitter augmentation improves robustness to color and brightness, but surprisingly hurts robustness to pose. Together, these insights suggest to advance the robustness of modern vision models, future research should focus on collecting additional data and understanding data augmentation schemes. Along with these insights, we release a toolkit based on ImageNet-X to spur further study into the mistakes image recognition systems make.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/f4ccd4ac9d0e4e4e00d846f33eca00d2809fb317.zip", "author": "Badr Youbi Idrissi;Diane Bouchacourt;Randall Balestriero;Ivan Evtimov;Caner Hazirbas;Nicolas Ballas;Pascal Vincent;Michal Drozdzal;David Lopez-Paz;Mark Ibrahim", "authorids": "~Badr_Youbi_Idrissi1;~Diane_Bouchacourt3;~Randall_Balestriero1;~Ivan_Evtimov2;~Caner_Hazirbas2;~Nicolas_Ballas1;~Pascal_Vincent1;~Michal_Drozdzal1;~David_Lopez-Paz2;~Mark_Ibrahim1", "gender": "M;M;;M;;M;M;;;F", "homepage": ";https://randallbalestriero.github.io/;https://ivanevtimov.eu;https://hazirbas.com;;http://www.iro.umontreal.ca/~vincentp;;http://lopezpaz.org;https://markibrahim.me/;https://dianebouchacourt.github.io/", "dblp": "265/5811.html;175/5364;203/8980;161/7891;120/9066;43/861;24/9794;74/10481;180/5660;176/1498", "google_scholar": ";S1x_xqcAAAAJ;F0yiLRIAAAAJ;JEiXKpcAAAAJ;euUV4iUAAAAJ;WBCKQMsAAAAJ;https://scholar.google.ca/citations?user=XK_ktwQAAAAJ;;AqYyoCMAAAAJ;", "orcid": ";;;;;;;;;", "linkedin": "badr-y-idrissi/;randallbalestriero/;;;;;;;;", "or_profile": "~Badr_Youbi_Idrissi1;~Randall_Balestriero1;~Ivan_Evtimov2;~Caner_Hazirbas2;~Nicolas_Ballas1;~Pascal_Vincent1;~Michal_Drozdzal1;~David_Lopez-Paz2;~Mark_Ibrahim1;~Diane_Nicole_Bouchacourt1", "aff": ";Meta Facebook;Meta Facebook;Meta AI;Meta;Facebook A.I. Research;Meta;Meta Facebook;Facebook AI Research (FAIR) Meta;Meta AI Research", "aff_domain": ";facebook.com;facebook.com;meta.com;meta.com;fb.com;fb.com;fb.com;ai.facebook.com;meta.com", "position": ";Postdoc;Research Scientist;Researcher;Researcher;Research Scientist;Research Scientst;Research Scientist;Researcher;Researcher", "bibtex": "@inproceedings{\nidrissi2023imagenetx,\ntitle={ImageNet-X: Understanding Model Mistakes with Factor of Variation Annotations},\nauthor={Badr Youbi Idrissi and Diane Bouchacourt and Randall Balestriero and Ivan Evtimov and Caner Hazirbas and Nicolas Ballas and Pascal Vincent and Michal Drozdzal and David Lopez-Paz and Mark Ibrahim},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HXz7Vcm3VgM}\n}", "github": "", "project": "", "reviewers": "8YJy;HVUi;cwD7", "pdf_size": 18362555, "recommendation": "8;8;8", "confidence": "4;5;4", "correctness": "3;4;4", "technical_novelty": "3;4;3", "empirical_novelty": "3;4;3", "wc_summary_paper": "72;82;48", "wc_strength_and_weaknesses": "175;345;272", "wc_clarity_quality_novelty_and_reproducibility": "71;39;206", "wc_summary_review": "106;95;185", "wc_review": "424;561;711", "wc_reply_reviewers": "0;141;0", "wc_reply_authors": "703;1015;624", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 67.33333333333333, 14.2672897060218 ], "wc_strength_and_weaknesses_avg": [ 264.0, 69.6323679524592 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 105.33333333333333, 72.37095795659717 ], "wc_summary_review_avg": [ 128.66666666666666, 40.086018621071474 ], "wc_review_avg": [ 565.3333333333334, 117.20731869450626 ], "wc_reply_reviewers_avg": [ 47.0, 66.46803743153546 ], "wc_reply_authors_avg": [ 780.6666666666666, 168.80824097840195 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7649876297714062863&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=HXz7Vcm3VgM", "email": ";facebook.com;facebook.com;meta.com;meta.com;fb.com;fb.com;fb.com;ai.facebook.com;meta.com", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "HYfD5CoCjDX", "title": "N\u00dcWA-LIP: Language-guided Image Inpainting with Defect-free VQGAN", "track": "main", "status": "Withdraw", "tldr": "This paper proposes N\u00dcWA-LIP by incorporating DF-VQGAN with MP-S2S to address receptive spreading or information loss in language-guided image inpainting.", "abstract": "Language-guided image inpainting aims to fill in the defective regions of an image under the guidance of text while keeping non-defective regions unchanged. However, the encoding process of existing models suffers from either receptive spreading of defective regions or information loss of non-defective regions, giving rise to visually unappealing inpainting results. To address the above issues, this paper proposes N\u00dcWA-LIP by incorporating defect-free VQGAN (DF-VQGAN) with multi-perspective sequence to sequence (MP-S2S). In particular, DF-VQGAN introduces relative estimation to control receptive spreading and adopts symmetrical connections to protect information. MP-S2S further enhances visual information from complementary perspectives, including both low-level pixel and high-level token. Experiments show that DF-VQGAN performs much more robustly than VQGAN. To evaluate language-guided inpainting, we built up three open-domain benchmarks, where N\u00dcWA-LIP is also superior to recent strong baselines.", "keywords": "Language-guided image inpainting;Vector quantization;Visual synthesis;Generative model", "primary_area": "", "supplementary_material": "/attachment/13b8336fa8b95d1bedfb849eece0aa93017fab14.zip", "author": "Minheng Ni;Wangmeng Zuo", "authorids": "~Minheng_Ni1;~Wangmeng_Zuo3", "gender": "M;M", "homepage": "https://kodenii.github.io;", "dblp": "263/9969;93/2671", "google_scholar": "-ybr4_cAAAAJ;rUOpCEYAAAAJ", "orcid": ";0000-0002-3330-783X", "linkedin": "https://linkedin.com/in/minheng-ni-7b8a99146;", "or_profile": "~Minheng_Ni1;~Wangmeng_Zuo3", "aff": "Microsoft;Harbin Institute of Technology", "aff_domain": "microsoft.com;hit.edu.cn", "position": "Research Intern;Full Professor", "bibtex": "@misc{\nni2023nwalip,\ntitle={N\\\"U{WA}-{LIP}: Language-guided Image Inpainting with Defect-free {VQGAN}},\nauthor={Minheng Ni and Wangmeng Zuo},\nyear={2023},\nurl={https://openreview.net/forum?id=HYfD5CoCjDX}\n}", "github": "", "project": "", "reviewers": "WF5V;Bq9N;3Nc3;uw2i", "site": "https://openreview.net/forum?id=HYfD5CoCjDX", "pdf_size": 5060269, "recommendation": "3;5;5;6", "confidence": "5;5;4;3", "correctness": "2;4;3;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "66;40;87;48", "wc_strength_and_weaknesses": "307;277;296;196", "wc_clarity_quality_novelty_and_reproducibility": "7;21;98;4", "wc_summary_review": "16;50;39;37", "wc_review": "396;388;520;285", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 60.25, 18.08832496390973 ], "wc_strength_and_weaknesses_avg": [ 269.0, 43.49137845596527 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.5, 38.356876828021335 ], "wc_summary_review_avg": [ 35.5, 12.298373876248844 ], "wc_review_avg": [ 397.25, 83.2987845049374 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.7608859102526822, "corr_recommendation_correctness": 0.899228803025897, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6181300095058872690&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1", "aff_unique_norm": "Microsoft;Harbin Institute of Technology", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;http://www.hit.edu.cn/", "aff_unique_abbr": "Microsoft;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;China" }, { "id": "HZJje06x6IO", "title": "Global Context Vision Transformers", "track": "main", "status": "Reject", "tldr": "We introduce general computer vision backbone to effectively learn both short and long-range spatial information.", "abstract": "We propose global context vision transformer (GC ViT), a novel architecture that enhances parameter and compute utilization for computer vision tasks. The core of the novel model are global context self-attention modules, joint with standard local self-attention, to effectively yet efficiently model both long and short-range spatial interactions, as an alternative to complex operations such as an attention masks or local windows shifting. While the local self-attention modules are responsible for modeling short-range information, the global query tokens are shared across all global self-attention modules to interact with local key and values. In addition, we address the lack of inductive bias in ViTs and improve the modeling of inter-channel dependencies by proposing a novel downsampler which leverages a parameter-efficient fused inverted residual block. The proposed GC ViT achieves new state-of-the-art performance across image classification, object detection and semantic segmentation tasks. On ImageNet-1K dataset for classification, the tiny, small and base variants of GC ViT with 28M, 51M and 90M parameters achieve 83.4%, 83.9% and 84.4% Top-1 accuracy, respectively, surpassing comparably-sized prior art such as CNN-based ConvNeXt and ViT-based Swin Transformer. Pre-trained GC ViT backbones in downstream tasks of object detection, instance segmentation, and semantic segmentation on MS COCO and ADE20K datasets outperform prior work consistently, sometimes by large margins.", "keywords": "Vision Transformers;Classification;Detection;Instance Segmentation;Semantic Segmentation", "primary_area": "", "supplementary_material": "/attachment/7f4d45dbbc9079f4f60926a40e49a83979da7762.zip", "author": "Ali Hatamizadeh;Hongxu Yin;Jan Kautz;Pavlo Molchanov", "authorids": "~Ali_Hatamizadeh1;~Hongxu_Yin2;~Jan_Kautz1;~Pavlo_Molchanov1", "gender": ";;;M", "homepage": ";;http://jankautz.com;", "dblp": ";;48/6214;165/8169.html", "google_scholar": ";;P9FclNEAAAAJ;J9PoyoIAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Ali_Hatamizadeh1;~Hongxu_Yin2;~Jan_Kautz1;~Pavlo_Molchanov1", "aff": ";;NVIDIA;NVIDIA Research", "aff_domain": ";;nvidia.com;nvidia.com", "position": ";;VP Research;Research Scientist", "bibtex": "@misc{\nhatamizadeh2023global,\ntitle={Global Context Vision Transformers},\nauthor={Ali Hatamizadeh and Hongxu Yin and Jan Kautz and Pavlo Molchanov},\nyear={2023},\nurl={https://openreview.net/forum?id=HZJje06x6IO}\n}", "github": "", "project": "", "reviewers": "t1Sn;V9pP;Fuf9;YBSt", "site": "https://openreview.net/forum?id=HZJje06x6IO", "pdf_size": 892774, "recommendation": "1;6;6;6", "confidence": "5;5;3;4", "correctness": "1;3;3;4", "technical_novelty": "1;2;2;2", "empirical_novelty": "1;2;3;2", "wc_summary_paper": "159;62;46;68", "wc_strength_and_weaknesses": "424;251;84;84", "wc_clarity_quality_novelty_and_reproducibility": "30;15;32;22", "wc_summary_review": "235;61;33;5", "wc_review": "848;389;195;179", "wc_reply_reviewers": "370;0;0;0", "wc_reply_authors": "3107;738;281;391", "reply_reviewers": "2;0;0;0", "reply_authors": "6;1;1;1", "recommendation_avg": [ 4.75, 2.165063509461097 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 2.75, 1.0897247358851685 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 83.75, 44.18356594934365 ], "wc_strength_and_weaknesses_avg": [ 210.75, 140.7362337850491 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 24.75, 6.7592529172978875 ], "wc_summary_review_avg": [ 83.5, 89.68138045324682 ], "wc_review_avg": [ 402.75, 270.0281235353088 ], "wc_reply_reviewers_avg": [ 92.5, 160.21469970012114 ], "wc_reply_authors_avg": [ 1129.25, 1154.2435564039333 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.25, 2.165063509461097 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5222329678670935, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 172, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1624877256766018839&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff_unique_index": "0;0", "aff_unique_norm": "NVIDIA", "aff_unique_dep": "NVIDIA Corporation", "aff_unique_url": "https://www.nvidia.com", "aff_unique_abbr": "NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Diffusion-GAN: Training GANs with Diffusion", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10834", "id": "HZf7UbpWHuA", "poster": "/media/PosterPDFs/ICLR%202023/10834.png?t=1680759297.9427428", "openreview": "https://openreview.net/forum?id=HZf7UbpWHuA", "slides": "https://iclr.cc/virtual/2023/poster/10834", "video": "https://iclr.cc/virtual/2023/poster/10834", "author_site": "Zhendong Wang, Huangjie Zheng, Pengcheng He, Weizhu Chen, Mingyuan Zhou", "tldr": "", "abstract": "Generative adversarial networks (GANs) are challenging to train stably, and a promising remedy of injecting instance noise into the discriminator input has not been very effective in practice. In this paper, we propose Diffusion-GAN, a novel GAN framework that leverages a forward diffusion chain to generate Gaussian-mixture distributed instance noise. Diffusion-GAN consists of three components, including an adaptive diffusion process, a diffusion timestep-dependent discriminator, and a generator. Both the observed and generated data are diffused by the adaptive diffusion process via different noise-to-data ratios at each timestep. The timestep-dependent discriminator learns to distinguish the diffused real data from the diffused generated data at each diffusion timestep. The generator learns from the discriminator's feedback by backpropagating through the forward diffusion chain, whose length is adaptively adjusted to balance the noise and data levels. We theoretically show that the discriminator's timestep-dependent strategy gives consistent and helpful guidance to the generator, enabling it to match the true data distribution. We demonstrate the advantages of Diffusion-GAN over strong GAN baselines on various datasets, showing that it can produce more realistic images with higher stability and data efficiency than state-of-the-art GANs.", "keywords": "deep generative models;diffusion models;data-efficient stable GAN training;adaptive data augmentation", "primary_area": "", "supplementary_material": "/attachment/db4815530092ec6ce137cbb793488fbb2926042d.zip", "author": "Zhendong Wang;Huangjie Zheng;Pengcheng He;Weizhu Chen;Mingyuan Zhou", "authorids": "~Zhendong_Wang1;~Huangjie_Zheng1;~Pengcheng_He2;~Weizhu_Chen1;~Mingyuan_Zhou1", "gender": "M;M;M;M;M", "homepage": "https://zhendong-wang.github.io/;;;https://www.microsoft.com/en-us/research/people/wzchen/;http://mingyuanzhou.github.io", "dblp": ";192/2170;116/8665;79/2536;", "google_scholar": "lRiIjhcAAAAJ;Vl5wCXsAAAAJ;https://scholar.google.com/citations?hl=en;LG_E-4EAAAAJ;LXwCIisAAAAJ", "orcid": ";0000-0003-0508-5034;;;", "linkedin": ";;;;", "or_profile": "~Zhendong_Wang1;~Huangjie_Zheng1;~Pengcheng_He2;~Weizhu_Chen1;~Mingyuan_Zhou1", "aff": "University of Texas at Austin;University of Texas, Austin;Microsoft;Microsoft GenAI;Google", "aff_domain": "utexas.edu;utexas.edu;microsoft.com;microsoft.com;google.com", "position": "PhD student;PhD student;Principal Researcher;Vice President;Researcher", "bibtex": "@inproceedings{\nwang2023diffusiongan,\ntitle={Diffusion-{GAN}: Training {GAN}s with Diffusion},\nauthor={Zhendong Wang and Huangjie Zheng and Pengcheng He and Weizhu Chen and Mingyuan Zhou},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HZf7UbpWHuA}\n}", "github": "", "project": "", "reviewers": "xwap;dVvW;YnnY;jaon", "pdf_size": 13586059, "recommendation": "6;6;8;8", "confidence": "4;3;4;3", "correctness": "4;3;3;4", "technical_novelty": "3;3;4;3", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "37;60;89;18", "wc_strength_and_weaknesses": "67;133;68;135", "wc_clarity_quality_novelty_and_reproducibility": "17;46;22;17", "wc_summary_review": "44;30;37;13", "wc_review": "165;269;216;183", "wc_reply_reviewers": "0;69;0;20", "wc_reply_authors": "289;332;155;299", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 51.0, 26.504716561397142 ], "wc_strength_and_weaknesses_avg": [ 100.75, 33.25939716831921 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.5, 12.010412149464313 ], "wc_summary_review_avg": [ 31.0, 11.510864433221338 ], "wc_review_avg": [ 208.25, 39.55613100392909 ], "wc_reply_reviewers_avg": [ 22.25, 28.199069133572475 ], "wc_reply_authors_avg": [ 268.75, 67.57357101707738 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 290, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14117814646038893480&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=HZf7UbpWHuA", "email": "utexas.edu;utexas.edu;microsoft.com;microsoft.com;google.com", "author_num": 5, "aff_unique_index": "0;0;1;1;2", "aff_unique_norm": "University of Texas at Austin;Microsoft;Google", "aff_unique_dep": ";Microsoft Corporation;Google", "aff_unique_url": "https://www.utexas.edu;https://www.microsoft.com;https://www.google.com", "aff_unique_abbr": "UT Austin;Microsoft;Google", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Austin;;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Causal Estimation for Text Data with (Apparent) Overlap Violations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11330", "id": "Ha2MnQM9Ph", "poster": "/media/PosterPDFs/ICLR%202023/11330.png?t=1680988566.729097", "openreview": "https://openreview.net/forum?id=Ha2MnQM9Ph", "slides": "https://iclr.cc/virtual/2023/poster/11330", "video": "https://iclr.cc/virtual/2023/poster/11330", "author_site": "Lin Gui, Victor Veitch", "tldr": "", "abstract": "Consider the problem of estimating the causal effect of some attribute of a text document; for example: what effect does writing a polite vs. rude email have on response time? To estimate a causal effect from observational data, we need to adjust for confounding aspects of the text that affect both the treatment and outcome---e.g., the topic or writing level of the text. These confounding aspects are unknown a priori, so it seems natural to adjust for the entirety of the text (e.g., using a transformer). However, causal identification and estimation procedures rely on the assumption of overlap: for all levels of the adjustment variables, there is randomness leftover so that every unit could have (not) received treatment. Since the treatment here is itself an attribute of the text, it is perfectly determined, and overlap is apparently violated. The purpose of this paper is to show how to handle causal identification and obtain robust causal estimation in the presence of apparent overlap violations. In brief, the idea is to use supervised representation learning to produce a data representation that preserves confounding information while eliminating information that is only predictive of the treatment. This representation then suffices for adjustment and satisfies overlap. Adapting results on non-parametric estimation, we show that this procedure shows robustness with respect to conditional outcome misestimation and yields a low-bias estimator that admits valid uncertainty quantification under weak conditions. Empirical results show reductions in bias and strong improvements in uncertainty quantification relative to the natural (transformer-based) baseline.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/b8a27ba4c568e51b4d4b735082876c95c71d5d70.zip", "author": "Lin Gui;Victor Veitch", "authorids": "~Lin_Gui5;~Victor_Veitch1", "gender": "F;", "homepage": ";http://victorveitch.com", "dblp": ";167/5650", "google_scholar": "88eaL8UAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Lin_Gui5;~Victor_Veitch1", "aff": "University of Chicago;Google", "aff_domain": "uchicago.edu;google.com", "position": "PhD student;Research Scientist", "bibtex": "@inproceedings{\ngui2023causal,\ntitle={Causal Estimation for Text Data with (Apparent) Overlap Violations},\nauthor={Lin Gui and Victor Veitch},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Ha2MnQM9Ph}\n}", "github": "", "project": "", "reviewers": "yJuA;HJbF;SeHw;m2tm", "pdf_size": 1800886, "recommendation": "6;6;6;6", "confidence": "4;4;2;4", "correctness": "4;4;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "71;74;105;91", "wc_strength_and_weaknesses": "603;207;232;85", "wc_clarity_quality_novelty_and_reproducibility": "37;28;26;7", "wc_summary_review": "101;74;42;229", "wc_review": "812;383;405;412", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1317;476;452;638", "reply_reviewers": "0;0;0;0", "reply_authors": "3;2;2;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 85.25, 13.718144918318949 ], "wc_strength_and_weaknesses_avg": [ 281.75, 193.63286781948977 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 24.5, 10.920164833920778 ], "wc_summary_review_avg": [ 111.5, 70.98063116090192 ], "wc_review_avg": [ 503.0, 178.72185093043325 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 720.75, 351.60018131394645 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9492115081907061213&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=Ha2MnQM9Ph", "email": "uchicago.edu;google.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Chicago;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.uchicago.edu;https://www.google.com", "aff_unique_abbr": "UChicago;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Sound Randomized Smoothing in Floating-Point Arithmetic", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12062", "id": "HaHCoGcpV9", "poster": "/media/PosterPDFs/ICLR%202023/12062.png?t=1682703209.7674222", "openreview": "https://openreview.net/forum?id=HaHCoGcpV9", "slides": "https://iclr.cc/virtual/2023/poster/12062", "video": "https://iclr.cc/virtual/2023/poster/12062", "author_site": "Vaclav Voracek, Matthias Hein", "tldr": "We construct classifiers producing wrong randomized smoothing certificates on images and propose a method to overcome this at a negligible cost.", "abstract": "Randomized smoothing is sound when using infinite precision. However, we show that randomized smoothing is no longer sound for limited floating-point precision. We present a simple example where randomized smoothing certifies a radius of $1.26$ around a point, even though there is an adversarial example in the distance $0.8$ and show how this can be abused to give false certificates for CIFAR10. We discuss the implicit assumptions of randomized smoothing and show that they do not apply to generic image classification models whose smoothed versions are commonly certified. In order to overcome this problem, we propose a sound approach to randomized smoothing when using floating-point precision with essentially equal speed for quantized input. It yields sound certificates or image classifiers which for the ones tested so far are very similar to the unsound practice of randomized smoothing. Our only assumption is that we have access to a fair coin.", "keywords": "Randomized smoothing;floating-point arithmetic;adversarial robustness;formal methods", "primary_area": "", "supplementary_material": "/attachment/15c82021437cb7116ba1fc6766d52f341c7e2ef8.zip", "author": "Vaclav Voracek;Matthias Hein", "authorids": "~Vaclav_Voracek1;~Matthias_Hein2", "gender": "M;M", "homepage": ";https://uni-tuebingen.de/de/164260", "dblp": "292/8831.html;97/1213-1", "google_scholar": "Db13d44AAAAJ;0ZAb3tsAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Vaclav_Voracek1;~Matthias_Hein2", "aff": "University of Tuebingen;University of T\u00fcbingen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nvoracek2023sound,\ntitle={Sound Randomized Smoothing in Floating-Point Arithmetic},\nauthor={Vaclav Voracek and Matthias Hein},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HaHCoGcpV9}\n}", "github": "", "project": "", "reviewers": "9zy2;rQaT;YCXh;88Dn", "pdf_size": 419510, "recommendation": "5;6;6;8", "confidence": "3;2;3;4", "correctness": "4;4;4;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "0;3;3;3", "wc_summary_paper": "72;59;15;84", "wc_strength_and_weaknesses": "148;141;56;62", "wc_clarity_quality_novelty_and_reproducibility": "26;9;27;140", "wc_summary_review": "60;22;17;28", "wc_review": "306;231;115;314", "wc_reply_reviewers": "0;0;0;12", "wc_reply_authors": "375;341;147;279", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 57.5, 26.081602711489953 ], "wc_strength_and_weaknesses_avg": [ 101.75, 42.87408891160254 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.5, 52.16560169306973 ], "wc_summary_review_avg": [ 31.75, 16.768646337734005 ], "wc_review_avg": [ 241.5, 79.8889854735933 ], "wc_reply_reviewers_avg": [ 3.0, 5.196152422706632 ], "wc_reply_authors_avg": [ 285.5, 87.05601644918058 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.6488856845230502, "corr_recommendation_correctness": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10736197227836934794&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=HaHCoGcpV9", "email": "uni-tuebingen.de;uni-tuebingen.de", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Tuebingen;University of T\u00fcbingen", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;Uni T\u00fcbingen", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "HcGb9QnNAew", "title": "Global Nash Equilibrium in a Class of Nonconvex N-player Games", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We consider seeking the global Nash equilibrium (NE) in a class of nonconvex N-player games. The structured nonconvex payoffs are composited with canonical functions and quadratic operators, which are broadly investigated in various tasks such as robust network training and sensor network communication. However, the full-fledged development of nonconvex min-max games may not provide available help due to the interference of multiple players\u2019 coupled stationary conditions, and the existing results on convex games may also perform unsatisfactorily since they may be stuck in local NE or Nash stationary points, rather than the global NE. Here, we first make efforts to take a canonical conjugate transformation of the nonconvex N-player game, and cast the complementary problem into a variational inequality (VI) problem for the derivation of the global NE. Then we design a conjugate-based ordinary differential equation (ODE) for the solvable VI problem, and present the equilibrium equivalence and guaranteed convergence within the ODE. Furthermore, we provide a discretized algorithm based on the ODE, and discuss step-size settings and convergence rates in two typical nonconvex N-player games. At last, we conduct experiments in practical tasks to illustrate the effectiveness of our approach.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/9bcc250c8485ddfb49cd4e846d18ef5c934eb114.zip", "author": "Guanpu Chen;Gehui Xu;Yiguang Hong", "authorids": "~Guanpu_Chen1;~Gehui_Xu1;~Yiguang_Hong1", "gender": "M;;M", "homepage": ";;https://see.tongji.edu.cn/info/1377/10305.htm", "dblp": ";;", "google_scholar": "YMuK8eEAAAAJ;https://scholar.google.com/citations?view_op=list_works;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Guanpu_Chen1;~Gehui_Xu1;~Yiguang_Hong1", "aff": ";Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences;Tongji University", "aff_domain": ";amss.ac.cn;tongji.edu.cn", "position": ";PhD student;Full Professor", "bibtex": "@misc{\nchen2023global,\ntitle={Global Nash Equilibrium in a Class of Nonconvex N-player Games},\nauthor={Guanpu Chen and Gehui Xu and Yiguang Hong},\nyear={2023},\nurl={https://openreview.net/forum?id=HcGb9QnNAew}\n}", "github": "", "project": "", "reviewers": "tuXp;xq37;fVHZ;xeBd", "site": "https://openreview.net/forum?id=HcGb9QnNAew", "pdf_size": 1434411, "recommendation": "5;5;5;5", "confidence": "3;3;3;4", "correctness": "3;4;3;3", "technical_novelty": "3;3;3;2", "empirical_novelty": "3;0;0;2", "wc_summary_paper": "79;211;189;109", "wc_strength_and_weaknesses": "163;109;517;623", "wc_clarity_quality_novelty_and_reproducibility": "12;21;323;2", "wc_summary_review": "58;18;104;69", "wc_review": "312;359;1133;803", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 147.0, 54.607691765904185 ], "wc_strength_and_weaknesses_avg": [ 353.0, 221.03845819223406 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 89.5, 134.97870202368964 ], "wc_summary_review_avg": [ 62.25, 30.678779310787448 ], "wc_review_avg": [ 651.75, 337.494722180955 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AMMSkylFWEQJ:scholar.google.com/&scioq=Global+Nash+Equilibrium+in+a+Class+of+Nonconvex+N-player+Games&hl=en&as_sdt=0,34", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Chinese Academy of Sciences;Tongji University", "aff_unique_dep": "Academy of Mathematics and Systems Science;", "aff_unique_url": "http://www.cas.cn;https://www.tongji.edu.cn", "aff_unique_abbr": "CAS;Tongji", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "A System for Morphology-Task Generalization via Unified Representation and Behavior Distillation", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11363", "id": "HcUf-QwZeFh", "poster": "/media/PosterPDFs/ICLR%202023/11363.png?t=1681204202.672557", "openreview": "https://openreview.net/forum?id=HcUf-QwZeFh", "slides": "https://iclr.cc/virtual/2023/poster/11363", "video": "https://iclr.cc/virtual/2023/poster/11363", "author_site": "Hiroki Furuta, Yusuke Iwasawa, Yutaka Matsuo, Shixiang Gu", "tldr": "We explore a method for learning a single policy that manipulates various forms of agents to various goal positions by distilling a large amount of proficient behavioral data.", "abstract": "The rise of generalist large-scale models in natural language and vision has made us expect that a massive data-driven approach could achieve broader generalization in other domains such as continuous control. In this work, we explore a method for learning a single policy that manipulates various forms of agents to solve various tasks by distilling a large amount of proficient behavioral data. In order to align input-output (IO) interface among multiple tasks and diverse agent morphologies while preserving essential 3D geometric relations, we introduce morphology-task graph, which treats observations, actions and goals/task in a unified graph representation. We also develop MxT-Bench for fast large-scale behavior generation, which supports procedural generation of diverse morphology-task combinations with a minimal blueprint and hardware-accelerated simulator. Through efficient representation and architecture selection on MxT-Bench, we find out that a morphology-task graph representation coupled with Transformer architecture improves the multi-task performances compared to other baselines including recent discrete tokenization, and provides better prior knowledge for zero-shot transfer or sample efficiency in downstream multi-task imitation learning. Our work suggests large diverse offline datasets, unified IO representation, and policy representation and architecture selection through supervised learning form a promising approach for studying and advancing morphology-task generalization.", "keywords": "Morphology-Task Generalization;Behavior Distillation;Supervised RL;Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Hiroki Furuta;Yusuke Iwasawa;Yutaka Matsuo;Shixiang Shane Gu", "authorids": "~Hiroki_Furuta1;~Yusuke_Iwasawa1;~Yutaka_Matsuo1;~Shixiang_Shane_Gu1", "gender": "M;M;M;M", "homepage": "https://github.com/frt03;;http://ymatsuo.com;https://sites.google.com/view/gugurus/home", "dblp": "267/2065;117/7377;m/YMatsuo.html;121/0550", "google_scholar": "M0OhM1UAAAAJ;https://scholar.google.co.jp/citations?user=pvvZgj0AAAAJ;Dy8iau4AAAAJ;B8wslVsAAAAJ", "orcid": ";0000-0002-1321-2622;;", "linkedin": ";;;", "or_profile": "~Hiroki_Furuta1;~Yusuke_Iwasawa1;~Yutaka_Matsuo1;~Shixiang_Gu1", "aff": "Google DeepMind;The University of Tokyo, The University of Tokyo;The University of Tokyo;OpenAI", "aff_domain": "google.com;weblab.t.u-tokyo.ac.jp;u-tokyo.ac.jp;openai.com", "position": "Intern;Lecturer;Associate Professor;Researcher", "bibtex": "@inproceedings{\nfuruta2023a,\ntitle={A System for Morphology-Task Generalization via Unified Representation and Behavior Distillation},\nauthor={Hiroki Furuta and Yusuke Iwasawa and Yutaka Matsuo and Shixiang Shane Gu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HcUf-QwZeFh}\n}", "github": "", "project": "", "reviewers": "UDt3;feBA;zDig;JKD7", "pdf_size": 4145620, "recommendation": "5;8;8;8", "confidence": "4;4;5;2", "correctness": "3;3;4;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;2;4;3", "wc_summary_paper": "50;65;116;43", "wc_strength_and_weaknesses": "249;661;185;106", "wc_clarity_quality_novelty_and_reproducibility": "54;160;70;4", "wc_summary_review": "83;85;31;37", "wc_review": "436;971;402;190", "wc_reply_reviewers": "49;974;16;0", "wc_reply_authors": "678;2532;139;207", "reply_reviewers": "1;4;1;0", "reply_authors": "1;6;1;1", "recommendation_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 68.5, 28.552583070538468 ], "wc_strength_and_weaknesses_avg": [ 300.25, 214.3494518304164 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 72.0, 56.3382640840131 ], "wc_summary_review_avg": [ 59.0, 25.099800796022265 ], "wc_review_avg": [ 499.75, 287.94129175927515 ], "wc_reply_reviewers_avg": [ 259.75, 412.75075711620445 ], "wc_reply_authors_avg": [ 889.0, 971.0296081994617 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.25, 2.165063509461097 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.13245323570650439, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13603270088420750551&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=HcUf-QwZeFh", "email": "google.com;weblab.t.u-tokyo.ac.jp;u-tokyo.ac.jp;openai.com", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Google;University of Tokyo;OpenAI", "aff_unique_dep": "Google DeepMind;;", "aff_unique_url": "https://deepmind.com;https://www.u-tokyo.ac.jp;https://openai.com", "aff_unique_abbr": "DeepMind;UTokyo;OpenAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "United Kingdom;Japan;United States" }, { "id": "Hcq7zGgcsOg", "title": "Curriculum-inspired Training for Selective Neural Networks", "track": "main", "status": "Reject", "tldr": "We propose a curriculum-inspired method to train selective neural network models by leveraging example difficulty scores.", "abstract": "We consider the problem of training neural network models for selective classification, where the models have the reject option to abstain from predicting certain examples as needed. Recent advances in curriculum learning have demonstrated the benefit of leveraging the example difficulty scores in training deep neural networks for typical classification settings. Example difficulty scores are even more important in selective classification as a lower prediction error rate can be achieved by rejecting hard examples and accepting easy ones. In this paper, we propose a curriculum-inspired method to train selective neural network models by leveraging example difficulty scores. Our method tailors the curriculum idea to selective neural network training by calibrating the ratio of easy and hard examples in each mini-batch, and exploiting difficulty ordering at the mini-batch level. Our experimental results demonstrate that our method outperforms both the state-of-the-art and alternative methods using vanilla curriculum techniques for training selective neural network models.", "keywords": "curriculum learning;selective classification", "primary_area": "", "supplementary_material": "/attachment/b7dd9d85baee2f7a71044a9987ed2df33d0e36d7.zip", "author": "Rui Liu;Reza Soroushmehr;Barzan Mozafari", "authorids": "~Rui_Liu6;~Reza_Soroushmehr1;~Barzan_Mozafari1", "gender": ";M;M", "homepage": ";https://najarianlab.ccmb.med.umich.edu/home;https://web.eecs.umich.edu/~mozafari/", "dblp": "42/469-13;;", "google_scholar": "hMR1iP4AAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Rui_Liu6;~Reza_Soroushmehr1;~Barzan_Mozafari1", "aff": "Meta;;University of Michigan", "aff_domain": "meta.com;;umich.edu", "position": "Researcher;;Associate Professor", "bibtex": "@misc{\nliu2023curriculuminspired,\ntitle={Curriculum-inspired Training for Selective Neural Networks},\nauthor={Rui Liu and Reza Soroushmehr and Barzan Mozafari},\nyear={2023},\nurl={https://openreview.net/forum?id=Hcq7zGgcsOg}\n}", "github": "", "project": "", "reviewers": "bj8f;QZ16;oBun;xFiA;F3no", "site": "https://openreview.net/forum?id=Hcq7zGgcsOg", "pdf_size": 449491, "recommendation": "3;3;5;5;6", "confidence": "3;4;5;4;3", "correctness": "3;3;2;4;4", "technical_novelty": "2;2;2;2;2", "empirical_novelty": "2;2;2;2;3", "wc_summary_paper": "68;96;62;184;115", "wc_strength_and_weaknesses": "184;228;234;359;165", "wc_clarity_quality_novelty_and_reproducibility": "23;160;9;46;4", "wc_summary_review": "56;43;18;96;32", "wc_review": "331;527;323;685;316", "wc_reply_reviewers": "102;0;0;0;0", "wc_reply_authors": "436;670;567;797;354", "reply_reviewers": "1;0;0;0;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 4.4, 1.2 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "correctness_avg": [ 3.2, 0.7483314773547882 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_paper_avg": [ 105.0, 43.9089968002003 ], "wc_strength_and_weaknesses_avg": [ 234.0, 67.70819743576105 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.4, 57.670096237131425 ], "wc_summary_review_avg": [ 49.0, 26.623298067669978 ], "wc_review_avg": [ 436.4, 147.29236232744725 ], "wc_reply_reviewers_avg": [ 20.4, 40.8 ], "wc_reply_authors_avg": [ 564.8, 158.7544015137848 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.08908708063747486, "corr_recommendation_correctness": 0.3563483225498991, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BPPya8BotE0J:scholar.google.com/&scioq=Curriculum-inspired+Training+for+Selective+Neural+Networks&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Meta;University of Michigan", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.umich.edu", "aff_unique_abbr": "Meta;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "HdYxZ_OVZG", "title": "ThinkSum: Probabilistic reasoning over sets using large language models", "track": "main", "status": "Reject", "tldr": "A wise System 2 for large language models: Think (parallel model call) + Sum (aggregate results to make a prediction).", "abstract": "Large language models (LLMs) have a substantial capacity for high-level analogical reasoning: reproducing patterns in linear text that occur in their training data (zero-shot evaluation) or in the provided context (few-shot in-context learning). However, recent studies show that even the largest LLMs fail in scenarios that require reasoning over multiple objects or facts or making sequences of logical deductions. We propose a two-stage probabilistic inference paradigm, ThinkSum, that reasons over sets of objects or facts in a structured manner. In the first stage (Think -- 'fast' retrieval of associations), a LLM is queried in parallel over a set of phrases extracted from the prompt or an auxiliary model call. In the second stage (Sum -- 'slow' probabilistic inference or reasoning), the results of these queries are aggregated to make the final prediction. We demonstrate the advantages of ThinkSum on the BIG-bench suite of evaluation tasks, achieving improvements over the state of the art using GPT-family models on ten difficult tasks, often with far smaller model variants. We compare and contrast ThinkSum with other proposed modifications to direct prompting of LLMs, such as variants of chain-of-thought prompting. We argue that because the probabilistic inference in ThinkSum is performed outside of calls to the LLM, ThinkSum is less sensitive to prompt design, yields more interpretable predictions, and can be flexibly combined with latent variable models to extract structured knowledge from LLMs.", "keywords": "NLP;language models;prompting;zero-shot learning", "primary_area": "", "supplementary_material": "", "author": "Batu Ozturkler;Nikolay Malkin;Zhen Wang;Nebojsa Jojic", "authorids": "~Batu_Ozturkler1;~Nikolay_Malkin1;~Zhen_Wang6;~Nebojsa_Jojic1", "gender": ";;M;", "homepage": "https://batuozt.github.io;;https://zhenwang9102.github.io;www.research.microsoft.com/~jojic", "dblp": "281/6970;;78/6727;20/1944", "google_scholar": "O_tiFfoAAAAJ;;asBaytUAAAAJ;", "orcid": ";;0000-0001-7407-5118;", "linkedin": ";;zhenwang9102/;", "or_profile": "~Batu_Ozturkler1;~Nikolay_Malkin1;~Zhen_Wang6;~Nebojsa_Jojic1", "aff": "NVIDIA;;University of California, San Diego;Microsoft Research", "aff_domain": "nvidia.com;;ucsd.edu; ", "position": "Intern;;Postdoc;Researcher", "bibtex": "@misc{\nozturkler2023thinksum,\ntitle={ThinkSum: Probabilistic reasoning over sets using large language models},\nauthor={Batu Ozturkler and Nikolay Malkin and Zhen Wang and Nebojsa Jojic},\nyear={2023},\nurl={https://openreview.net/forum?id=HdYxZ_OVZG}\n}", "github": "", "project": "", "reviewers": "Xf9p;KxRo;nmJR;2TCH", "site": "https://openreview.net/forum?id=HdYxZ_OVZG", "pdf_size": 1337194, "recommendation": "3;3;3;8", "confidence": "3;2;3;3", "correctness": "2;2;2;4", "technical_novelty": "1;2;2;2", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "66;34;31;170", "wc_strength_and_weaknesses": "145;142;163;97", "wc_clarity_quality_novelty_and_reproducibility": "20;17;17;70", "wc_summary_review": "62;41;15;39", "wc_review": "293;234;226;376", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "402;1030;891;189", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;2;1", "recommendation_avg": [ 4.25, 2.165063509461097 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 75.25, 56.39758416811841 ], "wc_strength_and_weaknesses_avg": [ 136.75, 24.31434761617099 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.0, 22.54994456755936 ], "wc_summary_review_avg": [ 39.25, 16.64894891577243 ], "wc_review_avg": [ 282.25, 59.99322878458868 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 628.0, 344.44520609234786 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 1.0, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14644750323994262280&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2", "aff_unique_norm": "NVIDIA;University of California, San Diego;Microsoft", "aff_unique_dep": "NVIDIA Corporation;;Microsoft Research", "aff_unique_url": "https://www.nvidia.com;https://www.ucsd.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "NVIDIA;UCSD;MSR", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "He7UIpiEq_O", "title": "Linkless Link Prediction via Relational Distillation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have been widely used on graph data and have shown exceptional performance in the task of link prediction. Despite their effectiveness, GNNs often suffer from high latency due to non-trivial neighborhood data dependency in practical deployments. To address this issue, researchers have proposed methods based on knowledge distillation (KD) to transfer the knowledge from teacher GNNs to student MLPs, which are known to be efficient even with industrial scale data, and have shown promising results on node classification. Nonetheless, using KD to accelerate link prediction is still unexplored. In this work, we start with exploring two direct analogs of traditional KD for link prediction, i.e., predicted logit-based matching and node representation-based matching. Upon observing direct KD analogs do not perform well for link prediction, we propose a relational KD framework, Linkless Link Prediction (LLP). Unlike simple KD methods that match independent link logits or node representations, LLP distills relational knowledge that is centered around each (anchor) node to the student MLP. Specifically, we propose two matching strategies that complement each other: rank-based matching and distribution-based matching. Extensive experiments demonstrate that LLP boosts the link prediction performance of MLPs with significant margins, and even outperforms the teacher GNNs on 6 out of 9 benchmarks. LLP also achieves a 776.37x speedup in link prediction inference compared to GNNs on the large scale Citation2 dataset. ", "keywords": "link prediction;knowledge distillation", "primary_area": "", "supplementary_material": "/attachment/d8c5ee2bc77c850e15376501795801b597d18036.zip", "author": "Zhichun Guo;William Shiao;Shichang Zhang;Yozen Liu;Nitesh Chawla;Neil Shah;Tong Zhao", "authorids": "~Zhichun_Guo1;~William_Shiao1;~Shichang_Zhang2;~Yozen_Liu1;~Nitesh_Chawla1;~Neil_Shah2;~Tong_Zhao3", "gender": ";M;M;;M;M;M", "homepage": ";https://shiao.me;https://shichangzh.github.io/;https://www.linkedin.com/in/yozen-liu-531a67130/;http://niteshchawla.nd.edu;http://nshah.net;https://tzhao.io/", "dblp": ";304/3898;234/4118;242/8056.html;c/NiteshVChawla.html;71/7771;94/6503-3", "google_scholar": ";TIq-P5AAAAAJ;TYqG0x4AAAAJ;i3U2JjEAAAAJ;hDLBEhkAAAAJ;Qut69OgAAAAJ;05cRc-MAAAAJ", "orcid": ";0000-0001-5813-2266;0000-0003-0954-5018;;;0000-0003-3261-8430;0000-0001-7660-1732", "linkedin": ";will-shiao;shichang-zhang-4430a4106/;;;;", "or_profile": "~Zhichun_Guo1;~William_Shiao1;~Shichang_Zhang2;~Yozen_Liu1;~Nitesh_Chawla1;~Neil_Shah2;~Tong_Zhao3", "aff": ";University of California, Riverside;University of California, Los Angeles;Snap Inc.;University of Notre Dame;Snap Inc.;Snap Inc.", "aff_domain": ";ucr.edu;cs.ucla.edu;snapchat.com;nd.edu;snap.com;snap.com", "position": ";PhD student;PhD student;Researcher;Full Professor;Research Scientist;Researcher", "bibtex": "@misc{\nguo2023linkless,\ntitle={Linkless Link Prediction via Relational Distillation},\nauthor={Zhichun Guo and William Shiao and Shichang Zhang and Yozen Liu and Nitesh Chawla and Neil Shah and Tong Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=He7UIpiEq_O}\n}", "github": "", "project": "", "reviewers": "Z2kr;Wrjf;4BGr", "site": "https://openreview.net/forum?id=He7UIpiEq_O", "pdf_size": 751370, "recommendation": "5;5;8", "confidence": "4;3;4", "correctness": "4;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "3;2;3", "wc_summary_paper": "72;127;73", "wc_strength_and_weaknesses": "195;38;264", "wc_clarity_quality_novelty_and_reproducibility": "30;78;13", "wc_summary_review": "32;264;407", "wc_review": "329;507;757", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "2023;2314;2104", "reply_reviewers": "0;0;0", "reply_authors": "6;6;4", "recommendation_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 90.66666666666667, 25.69478978746902 ], "wc_strength_and_weaknesses_avg": [ 165.66666666666666, 94.56684878375133 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.333333333333336, 27.523727137790686 ], "wc_summary_review_avg": [ 234.33333333333334, 154.52364076592147 ], "wc_review_avg": [ 531.0, 175.5524612948126 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 2147.0, 122.62952336203546 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 5.333333333333333, 0.9428090415820634 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": -0.5, "gs_citation": 64, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15293153908655955337&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2;3;2;2", "aff_unique_norm": "University of California, Riverside;University of California, Los Angeles;Snap Inc.;University of Notre Dame", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ucr.edu;https://www.ucla.edu;https://www.snapinc.com;https://www.nd.edu", "aff_unique_abbr": "UCR;UCLA;Snap;Notre Dame", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Riverside;Los Angeles;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "HeEqRvCtN2-", "title": "Consistent Targets Provide Better Supervision in Semi-supervised Object Detection", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In this study, we dive deep into the inconsistency of pseudo targets in semi-supervised object detection (SSOD). Our core observation is that the oscillating pseudo targets undermine the training of an accurate semi-supervised detector. It not only inject noise into student training but also lead to severe overfitting on the classification task. Therefore, we propose a systematic solution, termed Consistent-Teacher, to reduce the inconsistency. First, adaptive anchor assignment~(ASA) substitutes the static IoU-based strategy, which enables the student network to be resistant to noisy pseudo bounding boxes; Then we calibrate the subtask predictions by designing a 3D feature alignment module~(FAM-3D). It allows each classification feature to adaptively query the optimal feature vector for the regression task at arbitrary scales and locations. Lastly, a Gaussian Mixture Model (GMM) dynamically revises the score threshold of the pseudo-bboxes, which stabilizes the number of ground-truths at an early stage and remedies the unreliable supervision signal during training. Consistent-Teacher provides strong results on a large range of SSOD evaluations. It achieves 40.0 mAP with ResNet-50 backbone given only 10\\% of annotated MS-COCO data, which surpasses previous baselines using pseudo labels by around 3 mAP. When trained on fully annotated MS-COCO with additional unlabeled data, the performance further increases to 47.2 mAP. Our code will be open-sourced soon.", "keywords": "Semi-supervised Learning;Object Detection", "primary_area": "", "supplementary_material": "/attachment/f48c811ede7e245501eed0b7f16f9ac8957c8bc4.zip", "author": "Xinjiang Wang;Xingyi Yang;Shilong Zhang;Yijiang Li;Litong Feng;Shijie Fang;Chengqi Lyu;Kai Chen;Wayne Zhang", "authorids": "~Xinjiang_Wang1;~Xingyi_Yang1;~Shilong_Zhang1;~Yijiang_Li1;~Litong_Feng1;~Shijie_Fang1;lvchengqi@pjlab.org.cn;~Kai_Chen4;~Wayne_Zhang2", "gender": "M;M;M;Not Specified;M;M;;M;", "homepage": ";https://adamdad.github.io/;https://jshilong.github.io/;https://williamium3000.github.io/;;;;https://chenkai.site/;", "dblp": "215/3546;;;;133/4032.html;;;181/2839-26;", "google_scholar": "https://scholar.google.com/citations?hl=zh-TW;1n2OPtwAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=Dx3z0m8AAAAJ;PnNAAasAAAAJ;;;https://scholar.google.com.hk/citations?user=eGD0b7IAAAAJ;", "orcid": ";;0009-0005-4336-4941;;;;;0000-0002-6820-2325;", "linkedin": ";;;;litong-feng-9579747b/;%E4%BB%95%E6%9D%B0-%E6%96%B9-35259817a/;;;", "or_profile": "~Xinjiang_Wang1;~Xingyi_Yang1;~Shilong_Zhang1;~Yijiang_Li1;~Litong_Feng1;~Shijie_Fang1;lvchengqi@pjlab.org.cn;~Kai_Chen4;~Wayne_Zhang2", "aff": "SenseTime Group;National University of Singapore;Alibaba Group;Johns Hopkins University;SenseTime Research;Peking University;;Shanghai AI Laboratory;", "aff_domain": "sensetime.com;nus.edu;alibaba.com;jh.edu;sensetime.com;pku.edu.cn;;pjlab.org.cn;", "position": "Researcher;PhD student;Intern;MS student;Associate Research Director;MS student;;Researcher;", "bibtex": "@misc{\nwang2023consistent,\ntitle={Consistent Targets Provide Better Supervision in Semi-supervised Object Detection},\nauthor={Xinjiang Wang and Xingyi Yang and Shilong Zhang and Yijiang Li and Litong Feng and Shijie Fang and Chengqi Lyu and Kai Chen and Wayne Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=HeEqRvCtN2-}\n}", "github": "", "project": "", "reviewers": "tBiG;6uS6;4adR;eai7", "site": "https://openreview.net/forum?id=HeEqRvCtN2-", "pdf_size": 7692316, "recommendation": "3;5;6;6", "confidence": "3;4;3;5", "correctness": "3;4;3;4", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;0;4;0", "wc_summary_paper": "79;74;181;62", "wc_strength_and_weaknesses": "194;148;183;70", "wc_clarity_quality_novelty_and_reproducibility": "43;39;32;59", "wc_summary_review": "46;29;33;39", "wc_review": "362;290;429;230", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "591;463;442;400", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 1.6583123951777 ], "wc_summary_paper_avg": [ 99.0, 47.74410958432464 ], "wc_strength_and_weaknesses_avg": [ 148.75, 48.53542520674976 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.25, 9.908960591303208 ], "wc_summary_review_avg": [ 36.75, 6.417748826496718 ], "wc_review_avg": [ 327.75, 74.84108163301757 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 474.0, 71.25657864365928 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.4923659639173309, "corr_recommendation_correctness": 0.40824829046386296, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6898594770373886727&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4;5;6", "aff_unique_norm": "SenseTime Group;National University of Singapore;Alibaba Group;Johns Hopkins University;SenseTime;Peking University;Shanghai AI Laboratory", "aff_unique_dep": ";;;;SenseTime Research;;", "aff_unique_url": "https://www.sensetime.com;https://www.nus.edu.sg;https://www.alibaba.com;https://www.jhu.edu;https://www.sensetime.com;http://www.pku.edu.cn;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "SenseTime;NUS;Alibaba;JHU;SenseTime;Peking U;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;0;0;0", "aff_country_unique": "China;Singapore;United States" }, { "title": "Text Summarization with Oracle Expectation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11474", "id": "HehQobsr0S", "poster": "", "openreview": "https://openreview.net/forum?id=HehQobsr0S", "slides": "https://iclr.cc/virtual/2023/poster/11474", "video": "https://iclr.cc/virtual/2023/poster/11474", "author_site": "Yumo Xu, Mirella Lapata", "tldr": "", "abstract": "Extractive summarization produces summaries by identifying and concatenating the most important sentences in a document. Since most summarization datasets do not come with gold labels indicating whether document sentences are summary-worthy, different labeling algorithms have been proposed to extrapolate oracle extracts for model training. In this work, we identify two flaws with the widely used greedy labeling approach: it delivers suboptimal and deterministic oracles. To alleviate both issues, we propose a simple yet effective labeling algorithm that creates soft, expectation-based sentence labels. We define a new learning objective for extractive summarization which incorporates learning signals from multiple oracle summaries and prove it is equivalent to estimating the oracle expectation for each document sentence. Without any architectural modifications, the proposed labeling scheme achieves superior performance on a variety of summarization benchmarks across domains and languages, in both supervised and zero-shot settings.", "keywords": "Text Summarization;NLP", "primary_area": "", "supplementary_material": "/attachment/da5863b669f263027a246c55a777acf6db73f173.zip", "author": "Yumo Xu;Mirella Lapata", "authorids": "~Yumo_Xu1;~Mirella_Lapata1", "gender": "M;F", "homepage": ";https://homepages.inf.ed.ac.uk/mlap/", "dblp": "222/9446;59/6701", "google_scholar": ";j67B9Q4AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Yumo_Xu1;~Mirella_Lapata1", "aff": "University of Edinburgh, University of Edinburgh;Edinburgh University, University of Edinburgh", "aff_domain": "ed.ac.uk;inf.ed.ac.uk", "position": "Research Associate ;Full Professor", "bibtex": "@inproceedings{\nxu2023text,\ntitle={Text Summarization with Oracle Expectation},\nauthor={Yumo Xu and Mirella Lapata},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HehQobsr0S}\n}", "github": "", "project": "", "reviewers": "mPYc;perC;Sdw2", "pdf_size": 401229, "recommendation": "6;6;8", "confidence": "3;4;3", "correctness": "4;3;4", "technical_novelty": "3;2;4", "empirical_novelty": "2;3;3", "wc_summary_paper": "88;136;65", "wc_strength_and_weaknesses": "120;368;92", "wc_clarity_quality_novelty_and_reproducibility": "45;45;263", "wc_summary_review": "51;58;25", "wc_review": "304;607;445", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "472;1273;755", "reply_reviewers": "0;0;0", "reply_authors": "1;2;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 96.33333333333333, 29.578520735305354 ], "wc_strength_and_weaknesses_avg": [ 193.33333333333334, 124.03583711528249 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 117.66666666666667, 102.76618553244491 ], "wc_summary_review_avg": [ 44.666666666666664, 14.197026292697903 ], "wc_review_avg": [ 452.0, 123.79822292747178 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 833.3333333333334, 331.6648241154849 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7727248729176644379&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=HehQobsr0S", "email": "ed.ac.uk;inf.ed.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "HehY2ZX2Cz", "title": "Sorted eigenvalue comparison $d_{\\mathsf{Eig}}$: A simple alternative to $d_{\\mathsf{FID}}$", "track": "main", "status": "Reject", "tldr": "We propose to compare sorted eigenvalues as a simple alternative to FID score.", "abstract": "For $i = 1, 2$, let $\\mathbf{S}_i$ be the sample covariance of $\\mathbf{Z}_i$ with $n_i$ $p$-dimensional vectors. First, we theoretically justify an improved Fr\u00e9chet Inception Distance ($d_{\\mathsf{FID}}$) algorithm that replaces np.trace(sqrtm($\\mathbf{S}_1 \\mathbf{S}_2$)) with np.sqrt(eigvals($\\mathbf{S}_1 \\mathbf{S}_2$)).sum(). With the appearance of unsorted eigenvalues in the improved $d_{\\mathsf{FID}}$, we are then motivated to propose sorted eigenvalue comparison ($d_{\\mathsf{Eig}}$) as a simple alternative: $d_{\\mathsf{Eig}}(\\mathbf{S}_1, \\mathbf{S}_2)^2=\\sum_{j=1}^p (\\sqrt{\\lambda_j^1} - \\sqrt{\\lambda_j^2})^2$, and $\\lambda_j^i$ is the $j$-th largest eigenvalue of $\\mathbf{S}_i$. Second, we present two main takeaways for the improved $d_{\\mathsf{FID}}$ and proposed $d_{\\mathsf{Eig}}$ . (i) $d_{\\mathsf{FID}}$: The error bound for computing non-negative eigenvalues of diagonalizable $\\mathbf S_1 \\mathbf S_2$ is reduced to $\\mathcal{O}(\\varepsilon) \\|\\mathbf S_1 \\| \\|\\mathbf S_1 \\mathbf S_2 \\|$, along with reducing the run time by $\\sim25\\%$. (ii) $d_{\\mathsf{Eig}}$: The error bound for computing non-negative eigenvalues of sample covariance $\\mathbf S_i$ is further tightened to $\\mathcal{O}(\\varepsilon) \\|\\mathbf S_i \\|$, with reducing $\\sim90\\%$ run time. Taking a statistical viewpoint (random matrix theory) on $\\mathsf{S}_i$, we illustrate the asymptotic stability of its largest eigenvalues, i.e., rigidity estimates of $\\mathcal{O}(n_i^{-\\frac{1}{2}+\\alpha})$. Last, we discuss limitations and future work for $d_{\\mathsf{Eig}}$.", "keywords": "Distribution shift;FID;eigenvalue comparison;random matrix theory", "primary_area": "", "supplementary_material": "", "author": "Jiqing Wu;Viktor Koelzer", "authorids": "~Jiqing_Wu1;~Viktor_Koelzer1", "gender": "M;M", "homepage": "https://musikisomorphie.github.io/;https://www.uzh.ch/de.html", "dblp": "05/7556;", "google_scholar": "BCKKfUEAAAAJ;", "orcid": "0000-0002-6898-8698;", "linkedin": ";", "or_profile": "~Jiqing_Wu1;~Viktor_Koelzer1", "aff": "University Hospital Zurich;", "aff_domain": "usz.ch;", "position": "Postdoc;", "bibtex": "@misc{\nwu2023sorted,\ntitle={Sorted eigenvalue comparison \\$d\\_\\{{\\textbackslash}mathsf\\{Eig\\}\\}\\$: A simple alternative to \\$d\\_\\{{\\textbackslash}mathsf\\{{FID}\\}\\}\\$},\nauthor={Jiqing Wu and Viktor Koelzer},\nyear={2023},\nurl={https://openreview.net/forum?id=HehY2ZX2Cz}\n}", "github": "", "project": "", "reviewers": "6Rmw;RbUs;7tys;28R5", "site": "https://openreview.net/forum?id=HehY2ZX2Cz", "pdf_size": 1380274, "recommendation": "3;3;5;5", "confidence": "4;4;4;3", "correctness": "2;3;4;3", "technical_novelty": "2;1;2;2", "empirical_novelty": "1;2;3;2", "wc_summary_paper": "101;64;38;90", "wc_strength_and_weaknesses": "429;42;295;34", "wc_clarity_quality_novelty_and_reproducibility": "56;15;2;46", "wc_summary_review": "76;172;7;202", "wc_review": "662;293;342;372", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 73.25, 24.38621536852326 ], "wc_strength_and_weaknesses_avg": [ 200.0, 168.80906373770338 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.75, 22.02697210240209 ], "wc_summary_review_avg": [ 114.25, 77.46087722198865 ], "wc_review_avg": [ 417.25, 144.09263513448562 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.7071067811865475, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0", "aff_unique_norm": "University Hospital Zurich", "aff_unique_dep": "", "aff_unique_url": "https://www.uhz.ch", "aff_unique_abbr": "UHZ", "aff_country_unique_index": "0", "aff_country_unique": "Switzerland" }, { "id": "HgJ3HYIP3pY", "title": "DCT-DiffStride: Differentiable Strides with Real-Valued Data", "track": "main", "status": "Reject", "tldr": "We propose DCT-DiffStride, a differentiable method to learn strides leveraging the energy compaction properties of the discrete cosine transform.", "abstract": "Reducing the size of intermediate feature maps within various neural network architectures is critical for generalization performance, and memory and computational complexity. Until recently, most methods required downsampling rates (i.e., decimation) to be predefined and static during training, with optimal downsampling rates requiring a vast hyper-parameter search. Recent work has proposed a novel and differentiable method for learning strides named DiffStride which uses the discrete Fourier transform (DFT) to learn strides for decimation. However, in many cases the DFT does not capture signal properties as efficiently as the discrete cosine transform (DCT). Therefore, we propose an alternative method for learning decimation strides, DCT-DiffStride, as well as new regularization methods to reduce model complexity. Our work employs the DCT and its inverse as a low-pass filter in the frequency domain to reduce feature map dimensionality. Leveraging the well-known energy compaction properties of the DCT for natural signals, we evaluate DCT-DiffStride with its competitors on image and audio datasets demonstrating a favorable tradeoff in model performance and model complexity compared to competing methods. Additionally, we show DCT-DiffStride and DiffStride can be applied to data outside the natural signal domain, increasing the general applications of such methods.", "keywords": "strides;decimation;deep learning;discrete cosine transform", "primary_area": "", "supplementary_material": "", "author": "Clayton Harper;Mitchell Thornton;Eric Larson", "authorids": "~Clayton_Harper1;~Mitchell_Thornton1;~Eric_Larson1", "gender": ";M;M", "homepage": ";http://lyle.smu.edu/~mitch/;http://www.eclarson.com/", "dblp": ";t/MitchellAThornton.html;22/4287", "google_scholar": "cZxMJEEAAAAJ;qIY28F8AAAAJ;https://scholar.google.no/citations?user=vThE9GIAAAAJ", "orcid": "0000-0003-3185-7210;0000-0003-3559-9511;0000-0001-6040-868X", "linkedin": "clay-harper-6a2b94141/;mitchthornton/;", "or_profile": "~Clayton_Harper1;~Mitchell_Thornton1;~Eric_Larson1", "aff": "Southern Methodist University, Southern Methodist University;Southern Methodist University, Southern Methodist University;Southern Methodist University, Southern Methodist University", "aff_domain": "smu.edu;smu.edu;smu.edu", "position": "PhD student;Full Professor;Associate Professor", "bibtex": "@misc{\nharper2023dctdiffstride,\ntitle={{DCT}-DiffStride: Differentiable Strides with Real-Valued Data},\nauthor={Clayton Harper and Mitchell Thornton and Eric Larson},\nyear={2023},\nurl={https://openreview.net/forum?id=HgJ3HYIP3pY}\n}", "github": "", "project": "", "reviewers": "yZ5T;NRJF;LwJS;Kc3t", "site": "https://openreview.net/forum?id=HgJ3HYIP3pY", "pdf_size": 1583668, "recommendation": "3;5;5;6", "confidence": "4;3;3;3", "correctness": "3;3;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "17;228;70;29", "wc_strength_and_weaknesses": "174;325;151;105", "wc_clarity_quality_novelty_and_reproducibility": "73;191;17;16", "wc_summary_review": "59;25;28;50", "wc_review": "323;769;266;200", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "764;939;818;508", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;2;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 86.0, 84.30599029724993 ], "wc_strength_and_weaknesses_avg": [ 188.75, 82.49356035473315 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 74.25, 71.24385938451117 ], "wc_summary_review_avg": [ 40.5, 14.396180048887969 ], "wc_review_avg": [ 389.5, 223.38587690362164 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 757.25, 157.23767837258345 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9271726499455306, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10186728515562158074&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Southern Methodist University", "aff_unique_dep": "", "aff_unique_url": "https://www.smu.edu", "aff_unique_abbr": "SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Write and Paint: Generative Vision-Language Models are Unified Modal Learners", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11724", "id": "HgQR0mXQ1_a", "poster": "/media/PosterPDFs/ICLR%202023/11724.png?t=1682440151.0294664", "openreview": "https://openreview.net/forum?id=HgQR0mXQ1_a", "slides": "https://iclr.cc/virtual/2023/poster/11724", "video": "https://iclr.cc/virtual/2023/poster/11724", "author_site": "Shizhe Diao, Wangchunshu Zhou, Xinsong Zhang, Jiawei Wang", "tldr": "The paper proposes a simple, scalable, and versatile seq2seq foundation model, which is capable of vision-language understanding, image-to-text generation, and text-to-image generation with a single unified architecture", "abstract": "Recent advances in vision-language pre-training have pushed the state-of-the-art on various vision-language tasks, making machines more capable of multi-modal writing (image-to-text generation) and painting (text-to-image generation). However, few studies investigate if these two essential capabilities can be learned together and boost each other, making a versatile and powerful multi-modal foundation model. In this work, we disclose the potential of symmetric generative vision-language pre-training in learning to write and paint concurrently, and propose a new unified modal model, named DaVinci, trained with prefix language modeling and prefix image modeling, a simple generative self-supervised objective on image-text pairs. Thanks to the proposed prefix multi-modal modeling framework, DaVinci is simple to train, scalable to huge data, adaptable to both writing and painting tasks, and also strong on other vision, text, and multi-modal understanding tasks. DaVinci achieves competitive performance on a wide range of 27 generation/understanding tasks and demonstrates the superiority of combining vision/language generative pre-training. Furthermore, we carefully benchmark the performance of different vision-language pre-training objectives on different scales of pre-training datasets on a heterogeneous and broad distribution coverage. Our results demonstrate the potential of exploiting self-supervision in both language and vision inputs, and establish new, stronger baselines for future comparisons at different data scales. The code and pre-trained models are available at https://github.com/shizhediao/DaVinci.", "keywords": "Foundation model;Multi-modal learning;Vision-language pre-training", "primary_area": "", "supplementary_material": "", "author": "Shizhe Diao;Wangchunshu Zhou;Xinsong Zhang;Jiawei Wang", "authorids": "~Shizhe_Diao2;~Wangchunshu_Zhou1;~Xinsong_Zhang1;~Jiawei_Wang5", "gender": "M;M;M;M", "homepage": "https://michaelzhouwang.github.io;;https://bcmi.sjtu.edu.cn/home/wangjiawei/;https://shizhediao.github.io/", "dblp": "245/8640.html;04/2640;;221/3896", "google_scholar": "UebIjuQAAAAJ;BnSQUocAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;NDFQrLQAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Wangchunshu_Zhou1;~Xinsong_Zhang1;~Jiawei_Wang5;~SHIZHE_DIAO1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;Bytedance AI Lab;Shanghai Jiaotong University;Hong Kong University of Science and Technology", "aff_domain": "inf.ethz.ch;bytedance.com;sjtu.edu.cn;ust.hk", "position": "PhD student;research fellow;MS student;PhD student", "bibtex": "@inproceedings{\ndiao2023write,\ntitle={Write and Paint: Generative Vision-Language Models are Unified Modal Learners},\nauthor={Shizhe Diao and Wangchunshu Zhou and Xinsong Zhang and Jiawei Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HgQR0mXQ1_a}\n}", "github": "", "project": "", "reviewers": "qYLj;9zCh;wium;NJt6", "pdf_size": 1635143, "recommendation": "6;6;8;8", "confidence": "4;3;3;4", "correctness": "3;3;3;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "61;52;101;62", "wc_strength_and_weaknesses": "343;120;321;290", "wc_clarity_quality_novelty_and_reproducibility": "22;35;32;210", "wc_summary_review": "54;18;41;29", "wc_review": "480;225;495;591", "wc_reply_reviewers": "19;0;64;174", "wc_reply_authors": "1229;616;1051;1278", "reply_reviewers": "1;0;1;2", "reply_authors": "3;1;3;4", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 69.0, 18.881207588499205 ], "wc_strength_and_weaknesses_avg": [ 268.5, 87.77955342789117 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 74.75, 78.23482280928359 ], "wc_summary_review_avg": [ 35.5, 13.425721582097552 ], "wc_review_avg": [ 447.75, 135.4720912217716 ], "wc_reply_reviewers_avg": [ 64.25, 67.49212917074108 ], "wc_reply_authors_avg": [ 1043.5, 260.8701784413082 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12439397764083500705&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=HgQR0mXQ1_a", "email": "inf.ethz.ch;bytedance.com;sjtu.edu.cn;ust.hk", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "ETH Zurich;ByteDance;Shanghai Jiao Tong University;Hong Kong University of Science and Technology", "aff_unique_dep": "Department of Computer Science;AI Lab;;", "aff_unique_url": "https://www.ethz.ch;https://www.bytedance.com;https://www.sjtu.edu.cn;https://www.ust.hk", "aff_unique_abbr": "ETHZ;Bytedance AI Lab;SJTU;HKUST", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Zurich;;Hong Kong SAR", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Switzerland;China" }, { "id": "Hh0BdBf6Ls", "title": "UNREAL: Unlabeled Nodes Retrieval and Labeling for Heavily-imbalanced Node Classification", "track": "main", "status": "Reject", "tldr": "A method for retrieving unlabeled node information to handle heavily-imbalanced node classification", "abstract": "Extremely skewed label distributions are common in real-world node classification tasks. If not dealt with appropriately, it significantly hurts the performance of GNNs on minority classes. Due to the practical importance, there have been a series of recent researches devoted to this challenge. Existing over-sampling techniques smooth the label distribution by generating ''fake'' minority nodes and synthesize their features and local topology, which largely ignore the rich information of unlabeled nodes on graphs. Recent methods based on loss function modification re-weight different samples or change classification margins, which achieve good performance. However, representative methods need label information to estimate the distance of each node to its class center, which is unavailable on unlabeled nodes. In this paper, we propose UNREAL, which is an iterative over-sampling method. The first key difference is that we only add unlabeled nodes instead of synthetic nodes, which eliminates the challenge of feature and neighborhood generation. To select which unlabeled nodes to add, we propose geometric ranking, which ranks unlabeled nodes based on unsupervised learning results in the node embedding space. Finally, we identify the issue of geometric imbalance in the embedding space and provide a simple metric to filter out geometrically imbalanced nodes. Extensive experiments on real-world benchmark datasets are conducted, and the empirical results show that our method significantly outperforms current state-of-the-art methods consistent on different datasets with different imbalance ratios.", "keywords": "Node Classification;Heavily-imbalanced Representation Learning;Graph Neural Networks", "primary_area": "", "supplementary_material": "/attachment/32984ec18cdd3e1826436a3b34bc1136c0714d84.zip", "author": "Divin Yan;Shengzhong Zhang;Bisheng Li;min zhou;Zengfeng Huang", "authorids": "~Divin_Yan1;~Shengzhong_Zhang1;~Bisheng_Li1;~min_zhou1;~Zengfeng_Huang1", "gender": "M;M;M;F;M", "homepage": "https://divinyan.com/;https://szzhang17.github.io/;;;https://zengfenghuang.github.io/", "dblp": "359/6307.html;255/8703;;10/2513-6;97/9726", "google_scholar": "-Vv6hJsAAAAJ;bWD48lgAAAAJ;63eD24EAAAAJ;P8WYyYIAAAAJ;https://scholar.google.com.hk/citations?user=FwNBuXUAAAAJ", "orcid": "0009-0009-2880-3124;0000-0003-1783-6835;;0000-0002-4088-1266;0000-0003-2671-7483", "linkedin": ";;;min-zhou-48661893/;", "or_profile": "~Divin_Yan1;~Shengzhong_Zhang1;~Bisheng_Li1;~min_zhou1;~Zengfeng_Huang1", "aff": "ISTBI & School of Data Science, Fudan University;Fudan University;Fudan University;Huawei Technologies Ltd.;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;huawei.com;fudan.edu", "position": "Applied Mathmatics Research Master Student;PhD student;MS student;Principal Researcher;Full Professor", "bibtex": "@misc{\nyan2023unreal,\ntitle={{UNREAL}: Unlabeled Nodes Retrieval and Labeling for Heavily-imbalanced Node Classification},\nauthor={Divin Yan and Shengzhong Zhang and Bisheng Li and min zhou and Zengfeng Huang},\nyear={2023},\nurl={https://openreview.net/forum?id=Hh0BdBf6Ls}\n}", "github": "", "project": "", "reviewers": "fiXK;9Evz;D8cW", "site": "https://openreview.net/forum?id=Hh0BdBf6Ls", "pdf_size": 1158159, "recommendation": "5;6;6", "confidence": "4;4;5", "correctness": "3;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "41;63;76", "wc_strength_and_weaknesses": "284;406;120", "wc_clarity_quality_novelty_and_reproducibility": "23;54;52", "wc_summary_review": "36;37;37", "wc_review": "384;560;285", "wc_reply_reviewers": "0;193;0", "wc_reply_authors": "1635;2332;1606", "reply_reviewers": "0;3;0", "reply_authors": "3;6;3", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 60.0, 14.445299120013633 ], "wc_strength_and_weaknesses_avg": [ 270.0, 117.1779273868021 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.0, 14.165686240583852 ], "wc_summary_review_avg": [ 36.666666666666664, 0.4714045207910317 ], "wc_review_avg": [ 409.6666666666667, 113.72579107465269 ], "wc_reply_reviewers_avg": [ 64.33333333333333, 90.98107251266912 ], "wc_reply_authors_avg": [ 1857.6666666666667, 335.6132032894746 ], "reply_reviewers_avg": [ 1.0, 1.4142135623730951 ], "reply_authors_avg": [ 4.0, 1.4142135623730951 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10272864740181135122&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Fudan University;Huawei", "aff_unique_dep": "School of Data Science;Huawei Technologies", "aff_unique_url": "https://www.fudan.edu.cn;https://www.huawei.com", "aff_unique_abbr": "Fudan;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "HiuupcGa-0g", "title": "Continual Learning via Adaptive Neuron Selection", "track": "main", "status": "Withdraw", "tldr": "This paper presents a novel continual learning solution with adaptive neuron selection.", "abstract": "Continual learning (CL) aims at learning a sequence of tasks without losing previously acquired knowledge. Early efforts have achieved promising results in overcoming the catastrophic forgetting problem. As a consequence, contemporary studies turn to investigate whether learning a sequence of tasks can be facilitated from the perspective of knowledge consolidation. However, existing solutions either still confront severe forgetting issues or share narrow knowledge between the new and previous tasks. This paper presents a novel Continual Learning solution with Adaptive Neuron Selection (CLANS), which treats the used neurons in earlier tasks as a knowledge pool and makes it scalable via reinforcement learning with a small margin. Subsequently, the adaptive neuron selection enables knowledge consolidation for both old and new tasks in addition to overcoming the CF problem. The experimental results conducted on four datasets widely used in CL evaluations demonstrate that CLANS outperforms the state-of-the-art baselines. ", "keywords": "continual learning;knowledge transfer;neural network;neuron selection;deep learning", "primary_area": "", "supplementary_material": "/attachment/5e75a8bd3378a38174c354405d49bcedc2fce9bd.zip", "author": "Qiang Gao;Siqi Yang;Xiaojun Shan;Fan Zhou;Goce Trajcevski", "authorids": "~Qiang_Gao1;~Siqi_Yang3;~Xiaojun_Shan1;~Fan_Zhou11;~Goce_Trajcevski2", "gender": "M;M;M;M;M", "homepage": "https://qianggao.xyz/;https://github.com/YesQ11;https://sxj1215.github.io/shanxiaojun.github.io/;https://sise.uestc.edu.cn/info/1035/9375.htm;", "dblp": "43/5917-3;;127/8709;63/3122-2;66/974", "google_scholar": "3KPOGeAAAAAJ;;;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-9621-5414;0009-0000-3057-1160;;0000-0002-8038-8150;", "linkedin": ";;;;", "or_profile": "~Qiang_Gao1;~Siqi_Yang3;~Xiaojun_Shan1;~Fan_Zhou11;~Goce_Trajcevski2", "aff": "Southwestern University of Finance and Economics;;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;Iowa State University", "aff_domain": "swufe.edu.cn;;uestc.edu.cn;uestc.edu.cn;iastate.edu", "position": "Associate Professor;;Undergrad student;Full Professor;Associate Professor", "bibtex": "@misc{\ngao2023continual,\ntitle={Continual Learning via Adaptive Neuron Selection},\nauthor={Qiang Gao and Siqi Yang and Xiaojun Shan and Fan Zhou and Goce Trajcevski},\nyear={2023},\nurl={https://openreview.net/forum?id=HiuupcGa-0g}\n}", "github": "", "project": "", "reviewers": "bfaJ;apt6;qFwi;mFmB", "site": "https://openreview.net/forum?id=HiuupcGa-0g", "pdf_size": 938437, "recommendation": "3;3;3;8", "confidence": "5;4;4;4", "correctness": "3;2;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "101;11;91;48", "wc_strength_and_weaknesses": "488;387;312;169", "wc_clarity_quality_novelty_and_reproducibility": "118;36;80;39", "wc_summary_review": "64;31;104;40", "wc_review": "771;465;587;296", "wc_reply_reviewers": "410;0;175;0", "wc_reply_authors": "579;588;323;600", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.25, 2.165063509461097 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 62.75, 35.90525727522364 ], "wc_strength_and_weaknesses_avg": [ 339.0, 116.33357211054769 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.25, 33.573613150806395 ], "wc_summary_review_avg": [ 59.75, 28.252212302756046 ], "wc_review_avg": [ 529.75, 173.4292002518607 ], "wc_reply_reviewers_avg": [ 146.25, 168.20281656381383 ], "wc_reply_authors_avg": [ 522.5, 115.42205161926381 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kWMqGi0sK80J:scholar.google.com/&scioq=Continual+Learning+via+Adaptive+Neuron+Selection&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Southwestern University of Finance and Economics;University of Electronic Science and Technology of China;Iowa State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.swufe.edu.cn;https://www.uestc.edu.cn;https://www.iastate.edu", "aff_unique_abbr": "SWUFE;UESTC;ISU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Learning Rationalizable Equilibria in Multiplayer Games", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11138", "id": "HjOo2k8lhFl", "poster": "/media/PosterPDFs/ICLR%202023/11138.png?t=1682922312.9268348", "openreview": "https://openreview.net/forum?id=HjOo2k8lhFl", "slides": "https://iclr.cc/virtual/2023/poster/11138", "video": "https://iclr.cc/virtual/2023/poster/11138", "author_site": "Yuanhao Wang, Dingwen Kong, Yu Bai, Chi Jin", "tldr": "We develop provably efficient algorithms for finding approximate CE and CCE that are also rationalizable.", "abstract": "A natural goal in multi-agent learning is to learn \\emph{rationalizable} behavior, where players learn to avoid any Iteratively Dominated Action (IDA). However, standard no-regret based equilibria-finding algorithms could take exponential samples to find such rationalizable strategies. In this paper, we first propose a simple yet sample-efficient algorithm for finding a rationalizable action profile in multi-player general-sum games under bandit feedback, which substantially improves over the results of Wu et al. We further develop algorithms with the first efficient guarantees for learning rationalizable Coarse Correlated Equilibria (CCE) and Correlated Equilibria (CE). Our algorithms incorporate several novel techniques to guarantee the elimination of IDA and no (swap-)regret simultaneously, including a correlated exploration scheme and adaptive learning rates, which may be of independent interest. We complement our results with a sample complexity lower bound showing the sharpness of our guarantees.", "keywords": "Game Theory;Online Learning;Rationalizability", "primary_area": "", "supplementary_material": "", "author": "Yuanhao Wang;Dingwen Kong;Yu Bai;Chi Jin", "authorids": "~Yuanhao_Wang1;~Dingwen_Kong1;~Yu_Bai1;~Chi_Jin1", "gender": ";;;M", "homepage": ";;https://yubai.org;https://sites.google.com/view/cjin/home", "dblp": ";;03/6325-17.html;126/1802-1", "google_scholar": "yj2b7pgAAAAJ;;owqhKD8AAAAJ;GINhGvwAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yuanhao_Wang1;~Dingwen_Kong1;~Yu_Bai1;~Chi_Jin1", "aff": "Princeton University;;Salesforce Research;Princeton University", "aff_domain": "princeton.edu;;salesforce.com;princeton.edu", "position": "PhD student;;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nwang2023learning,\ntitle={Learning Rationalizable Equilibria in Multiplayer Games},\nauthor={Yuanhao Wang and Dingwen Kong and Yu Bai and Chi Jin},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HjOo2k8lhFl}\n}", "github": "", "project": "", "reviewers": "TWWs;xdmb;eq1u;ta5R", "pdf_size": 419668, "recommendation": "6;8;8;8", "confidence": "4;2;3;4", "correctness": "4;3;4;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "0;3;2;0", "wc_summary_paper": "71;179;51;160", "wc_strength_and_weaknesses": "166;414;127;240", "wc_clarity_quality_novelty_and_reproducibility": "31;26;24;72", "wc_summary_review": "18;24;52;29", "wc_review": "286;643;254;501", "wc_reply_reviewers": "21;0;0;0", "wc_reply_authors": "591;568;114;243", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 115.25, 55.11975598639747 ], "wc_strength_and_weaknesses_avg": [ 236.75, 110.08945226496496 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.25, 19.651653874419832 ], "wc_summary_review_avg": [ 30.75, 12.871965661856 ], "wc_review_avg": [ 421.0, 159.52899422988912 ], "wc_reply_reviewers_avg": [ 5.25, 9.093266739736606 ], "wc_reply_authors_avg": [ 379.0, 205.78265233007374 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5222329678670935, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5528922073496310128&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=HjOo2k8lhFl", "email": "princeton.edu;;salesforce.com;princeton.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Princeton University;Salesforce", "aff_unique_dep": ";Salesforce Research", "aff_unique_url": "https://www.princeton.edu;https://research.salesforce.com", "aff_unique_abbr": "Princeton;Salesforce", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "HjzWIMEWipV", "title": "Actionable Recourse Guided by User Preference", "track": "main", "status": "Reject", "tldr": "Capturing user preference and suggesting actionable recourse for adversely affected individuals by a machine learning model.", "abstract": "The growing popularity of machine learning models has led to their increased application in domains directly impacting human lives. In critical fields such as healthcare, banking, and criminal justice, tools that ensure trust and transparency are vital for the responsible adoption of these models. One such tool is \\emph{actionable recourse} (AR) for negatively impacted users. AR describes recommendations of cost-efficient changes to a user's \\emph{actionable} features to help them obtain favorable outcomes. Existing approaches for providing recourse optimize for properties such as proximity, sparsity, validity, and distance-based costs. However, an often-overlooked but crucial requirement for actionability is a consideration of \\emph{User Preference} to guide the recourse generation process. Moreover, existing works considering a user's preferences require users to precisely specify their costs for taking actions. This requirement raises questions about the practicality of the corresponding solutions due to the high cognitive loads imposed. In this work, we attempt to capture user preferences via soft constraints in three simple forms: \\textit{i) scoring continuous features, ii) bounding feature values} and \\textit{iii) ranking categorical features}. We propose an optimization framework that is sensitive to {user preference} and a gradient-based approach to identify \\emph{User Preferred Actionable Recourse (UP-AR)}. We empirically demonstrate the proposed approach's superiority in adhering to user preference while maintaining competitive performance in traditional metrics with extensive experiments.", "keywords": "Actionable recourse", "primary_area": "", "supplementary_material": "/attachment/a045ec4a19f41f864e51eee85a7dd881e1bce482.zip", "author": "Jayanth Yetukuri;Ian Hardy;Yang Liu", "authorids": "~Jayanth_Yetukuri1;~Ian_Hardy1;~Yang_Liu3", "gender": "M;M;M", "homepage": ";;http://www.yliuu.com", "dblp": ";;51/3710-18", "google_scholar": ";;jKrIVCIAAAAJ", "orcid": ";;0000-0001-8420-6011", "linkedin": "jayanth-yetukuri-87052647/;ianstclairhardy/;", "or_profile": "~Jayanth_Yetukuri1;~Ian_Hardy1;~Yang_Liu3", "aff": "University of California, Santa Cruz;University of California, Santa Cruz;University of California, Santa Cruz", "aff_domain": "ucsc.edu;ucsc.edu;ucsc.edu", "position": "PhD student;MS student;Assistant Professor", "bibtex": "@misc{\nyetukuri2023actionable,\ntitle={Actionable Recourse Guided by User Preference},\nauthor={Jayanth Yetukuri and Ian Hardy and Yang Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=HjzWIMEWipV}\n}", "github": "", "project": "", "reviewers": "uWp4;zS7o;7UMN", "site": "https://openreview.net/forum?id=HjzWIMEWipV", "pdf_size": 768076, "recommendation": "3;6;6", "confidence": "4;3;3", "correctness": "2;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "161;97;55", "wc_strength_and_weaknesses": "363;188;149", "wc_clarity_quality_novelty_and_reproducibility": "39;50;27", "wc_summary_review": "35;59;53", "wc_review": "598;394;284", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1242;635;486", "reply_reviewers": "0;0;0", "reply_authors": "4;1;2", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 104.33333333333333, 43.58389100981641 ], "wc_strength_and_weaknesses_avg": [ 233.33333333333334, 93.06031496949826 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.666666666666664, 9.392668535736913 ], "wc_summary_review_avg": [ 49.0, 10.198039027185569 ], "wc_review_avg": [ 425.3333333333333, 130.09056674315613 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 787.6666666666666, 326.97026708181824 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ERBYDF2g_NEJ:scholar.google.com/&scioq=Actionable+Recourse+Guided+by+User+Preference&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Santa Cruz", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsc.edu", "aff_unique_abbr": "UCSC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Santa Cruz", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "HkQ7Ompkpqe", "title": "Smart Multi-tenant Federated Learning", "track": "main", "status": "Reject", "tldr": "We propose a smart multi-tenant federated learning system, MuFL, to efficiently coordinate and execute simultaneous training activities under resource constraints by considering both synergies and differences among training activities.", "abstract": "Federated learning (FL) is an emerging distributed machine learning method that empowers in-situ model training on decentralized edge devices. However, multiple simultaneous training activities could overload resource-constrained devices. In this work, we propose a smart multi-tenant FL system, MuFL, to effectively coordinate and execute simultaneous training activities. We first formalize the problem of multi-tenant FL, define multi-tenant FL scenarios, and introduce a vanilla multi-tenant FL system that trains activities sequentially to form baselines. Then, we propose two approaches to optimize multi-tenant FL: 1) activity consolidation merges training activities into one activity with a multi-task architecture; 2) after training it for rounds, activity splitting divides it into groups by employing affinities among activities such that activities within a group have better synergy. Extensive experiments demonstrate that MuFL outperforms other methods while consuming 40% less energy. We hope this work will inspire the community to further study and optimize multi-tenant FL.", "keywords": "federated learning;multi-tenant federated learning", "primary_area": "", "supplementary_material": "", "author": "Weiming Zhuang;Yonggang Wen;Shuai Zhang", "authorids": "~Weiming_Zhuang1;~Yonggang_Wen1;~Shuai_Zhang14", "gender": ";M;Not Specified", "homepage": "https://weiming.me/;https://personal.ntu.edu.sg/ygwen/;", "dblp": "274/0724;;", "google_scholar": "lLuLAzEAAAAJ;https://scholar.google.com.tw/citations?user=byeygOkAAAAJ;RsZcMZcAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Weiming_Zhuang1;~Yonggang_Wen1;~Shuai_Zhang14", "aff": "Sony Research;Nanyang Technological University;sensetime", "aff_domain": "sony.com;ntu.edu.sg;sensetime.com", "position": "Researcher;Full Professor;Researcher", "bibtex": "@misc{\nzhuang2023smart,\ntitle={Smart Multi-tenant Federated Learning},\nauthor={Weiming Zhuang and Yonggang Wen and Shuai Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=HkQ7Ompkpqe}\n}", "github": "", "project": "", "reviewers": "icP7;NZnW;CNLB;hdrV", "site": "https://openreview.net/forum?id=HkQ7Ompkpqe", "pdf_size": 3280209, "recommendation": "3;3;3;5", "confidence": "3;4;4;3", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "60;77;62;121", "wc_strength_and_weaknesses": "208;134;292;117", "wc_clarity_quality_novelty_and_reproducibility": "20;34;91;20", "wc_summary_review": "17;23;52;17", "wc_review": "305;268;497;275", "wc_reply_reviewers": "43;0;147;113", "wc_reply_authors": "637;518;846;154", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;2;1", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 80.0, 24.566236993076494 ], "wc_strength_and_weaknesses_avg": [ 187.75, 69.23284986189721 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.25, 29.286302258905955 ], "wc_summary_review_avg": [ 27.25, 14.49784466739798 ], "wc_review_avg": [ 336.25, 93.84395292185853 ], "wc_reply_reviewers_avg": [ 75.75, 57.60805065266486 ], "wc_reply_authors_avg": [ 538.75, 251.2562188285098 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7802444036420469822&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Sony;Nanyang Technological University;SenseTime", "aff_unique_dep": "Research;;", "aff_unique_url": "https://www.sony.com;https://www.ntu.edu.sg;https://www.sensetime.com", "aff_unique_abbr": "Sony;NTU;SenseTime", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Japan;Singapore;China" }, { "id": "HlRfoQDDj-V", "title": "Proximal Validation Protocol", "track": "main", "status": "Reject", "tldr": "", "abstract": "Modern machine learning algorithms are generally built upon a train/validation/test split protocol. In particular, with the absence of accessible testing set in real-world ML development, how to split out a validation set becomes crucial for reliable model evaluation, selection and etc. Concretely, under a randomized splitting setup, the split ratio of the validation set generally acts as a vital meta-parameter; that is, with more data picked and used for validation, it would cost model performance due to the less training data, and vice versa. Unfortunately, this implies a vexing trade-off between performance enhancement against trustful model evaluation. However, to date, the research conducted on this line remains very few. We reason this could be due to a workflow gap between the academic and ML production which we may attribute to a form of technical debt of ML. In this article, we propose a novel scheme --- dubbed Proximal Validation Protocol (PVP) --- which is targeted to resolve this problem of validation set construction. Core to PVP is to assemble a \\emph{proximal set} as a substitution for the traditional validation set while avoiding the valuable data wasted by the training procedure. The construction of the proximal validation set is established with dense data augmentation followed by a novel distributional-consistent sampling algorithm. With extensive empirical findings, we prove that PVP works (much) better than all the other existing validation protocols on three data modalities (images, text, and tabular data), demonstrating its feasibility towards ML production.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/8e515b396208780d480c5ad7da833eb30a753297.zip", "author": "MingFeng Ou;Yiming Zhang;Sai Wu;Gang Chen;Junbo Zhao", "authorids": "~MingFeng_Ou1;~Yiming_Zhang3;~Sai_Wu2;~Gang_Chen6;~Junbo_Zhao1", "gender": "M;;M;M;M", "homepage": "https://github.com/omf2333;;https://person.zju.edu.cn/0011057;;http://jakezhao.net/", "dblp": ";;30/1186.html;67/6383-1;191/6665", "google_scholar": ";;RMaqDKAAAAAJ;;8ipao8MAAAAJ", "orcid": ";;;0000-0002-7483-0045;", "linkedin": ";;;;", "or_profile": "~MingFeng_Ou1;~Yiming_Zhang3;~Sai_Wu2;~Gang_Chen6;~Junbo_Zhao1", "aff": "Zhejiang University;;Zhejiang University;College of Computer Science and Technology, Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;;zju.edu.cn;cs.zju.edu.cn;zju.edu.cn", "position": "MS student;;Full Professor;Full Professor;Assistant Professor", "bibtex": "@misc{\nou2023proximal,\ntitle={Proximal Validation Protocol},\nauthor={MingFeng Ou and Yiming Zhang and Sai Wu and Gang Chen and Junbo Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=HlRfoQDDj-V}\n}", "github": "", "project": "", "reviewers": "cmsD;wMBi;sKMx;iGVu", "site": "https://openreview.net/forum?id=HlRfoQDDj-V", "pdf_size": 1008190, "recommendation": "3;3;5;5", "confidence": "4;3;4;3", "correctness": "1;2;3;2", "technical_novelty": "2;3;3;2", "empirical_novelty": "1;2;3;2", "wc_summary_paper": "131;48;104;170", "wc_strength_and_weaknesses": "63;119;466;512", "wc_clarity_quality_novelty_and_reproducibility": "88;13;56;83", "wc_summary_review": "35;19;43;43", "wc_review": "317;199;669;808", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 113.25, 44.38116154406056 ], "wc_strength_and_weaknesses_avg": [ 290.0, 200.64271728622498 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.0, 29.740544715926102 ], "wc_summary_review_avg": [ 35.0, 9.797958971132712 ], "wc_review_avg": [ 498.25, 248.7482411998123 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IWefflcAYGwJ:scholar.google.com/&scioq=Proximal+Validation+Protocol&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "ResAct: Reinforcing Long-term Engagement in Sequential Recommendation with Residual Actor", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12018", "id": "HmPOzJQhbwg", "poster": "/media/PosterPDFs/ICLR%202023/12018.png?t=1680945808.125206", "openreview": "https://openreview.net/forum?id=HmPOzJQhbwg", "slides": "https://iclr.cc/virtual/2023/poster/12018", "video": "https://iclr.cc/virtual/2023/poster/12018", "author_site": "Wanqi Xue, Qingpeng Cai, Ruohan Zhan, Dong Zheng, Peng Jiang, Kun Gai, Bo An", "tldr": "We propose a novel paradigm to reinforce long-term engagement in sequential recommendation.", "abstract": "Long-term engagement is preferred over immediate engagement in sequential recommendation as it directly affects product operational metrics such as daily active users (DAUs) and dwell time. Meanwhile, reinforcement learning (RL) is widely regarded as a promising framework for optimizing long-term engagement in sequential recommendation. However, due to expensive online interactions, it is very difficult for RL algorithms to perform state-action value estimation, exploration and feature extraction when optimizing long-term engagement. In this paper, we propose ResAct which seeks a policy that is close to, but better than, the online-serving policy. In this way, we can collect sufficient data near the learned policy so that state-action values can be properly estimated, and there is no need to perform online exploration. ResAct optimizes the policy by first reconstructing the online behaviors and then improving it via a Residual Actor. To extract long-term information, ResAct utilizes two information-theoretical regularizers to confirm the expressiveness and conciseness of features. We conduct experiments on a benchmark dataset and a large-scale industrial dataset which consists of tens of millions of recommendation requests. Experimental results show that our method significantly outperforms the state-of-the-art baselines in various long-term engagement optimization tasks.", "keywords": "Sequential Recommendation;Long-term Engagement;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/1dc0913dc1fd8aece12b41aee46e24a850c3d445.zip", "author": "Wanqi Xue;Qingpeng Cai;Ruohan Zhan;Dong Zheng;Peng Jiang;Kun Gai;Bo An", "authorids": "~Wanqi_Xue2;~Qingpeng_Cai2;~Ruohan_Zhan1;~Dong_Zheng1;~Peng_Jiang6;~Kun_Gai1;~Bo_An2", "gender": "M;F;M;M;M;M;", "homepage": "https://qingpengcai.github.io/;https://ruohanzhan.github.io;https://scholar.google.com/citations?user=KI7sbM4AAAAJ;;;https://personal.ntu.edu.sg/boan/;", "dblp": "183/0940-1;;;;59/2902;42/6178-1.html;", "google_scholar": "uU6s1tYAAAAJ;;KI7sbM4AAAAJ;https://scholar.google.com/citations?hl=en;PXO4ygEAAAAJ;PEEpuNwAAAAJ;dG1nIS8AAAAJ", "orcid": "0000-0001-6451-9299;;0000-0003-0424-9658;0000-0002-9266-0780;;0000-0002-7064-7438;", "linkedin": ";;;;;;", "or_profile": "~Qingpeng_Cai2;~Ruohan_Zhan1;~Dong_Zheng1;~Peng_Jiang6;~Kun_Gai1;~Bo_An2;~wanqi_xue1", "aff": "Kuaishou;Hong Kong University of Science and Technology;Kuaishou Technology;Kuaishou Technology;Kuaishou- \u5feb\u624b\u79d1\u6280;Nanyang Technological University;Nanyang Technological University", "aff_domain": "kuaishou.com;ust.hk;kuaishou.com;kuaishou.com;kuaishou.com;ntu.edu.sg;ntu.edu.sg", "position": "Senior Staff Algorithm Engineer;Assistant Professor;Researcher;Vice President;Instructor;Full Professor;PhD student", "bibtex": "@inproceedings{\nxue2023resact,\ntitle={ResAct: Reinforcing Long-term Engagement in Sequential Recommendation with Residual Actor},\nauthor={Wanqi Xue and Qingpeng Cai and Ruohan Zhan and Dong Zheng and Peng Jiang and Kun Gai and Bo An},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HmPOzJQhbwg}\n}", "github": "", "project": "", "reviewers": "pqv6;VU33;2DQo;EPzq", "pdf_size": 584338, "recommendation": "5;8;8;8", "confidence": "4;4;3;4", "correctness": "2;4;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "138;98;152;134", "wc_strength_and_weaknesses": "269;165;129;72", "wc_clarity_quality_novelty_and_reproducibility": "52;15;55;54", "wc_summary_review": "71;26;109;17", "wc_review": "530;304;445;277", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "732;61;123;18", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 130.5, 19.91858428704209 ], "wc_strength_and_weaknesses_avg": [ 158.75, 71.7717736996934 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.0, 16.777961735562517 ], "wc_summary_review_avg": [ 55.75, 36.92813967694555 ], "wc_review_avg": [ 389.0, 103.4238850556292 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 233.5, 290.21931362333555 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.8703882797784892, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6874639801899214692&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=HmPOzJQhbwg", "email": "kuaishou.com;ust.hk;kuaishou.com;kuaishou.com;kuaishou.com;ntu.edu.sg;ntu.edu.sg", "author_num": 7, "aff_unique_index": "0;1;0;0;0;2;2", "aff_unique_norm": "Kuaishou Technology;Hong Kong University of Science and Technology;Nanyang Technological University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kuaishou.com;https://www.ust.hk;https://www.ntu.edu.sg", "aff_unique_abbr": "Kuaishou;HKUST;NTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;1;1", "aff_country_unique": "China;Singapore" }, { "id": "HmdOxc8zIWx", "title": "Can we achieve robustness from data alone?", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In robust machine learning, there is a widespread belief that samples can be decomposed into robust features (parts of the data that withstand small perturbations) and non-robust ones, and it is the role of the robust algorithm (i.e. adversarial training) to amplify the former and erase the latter. In this work, we challenge this view and try to position adversarial robustness as a more model-dependent property: many approaches that assume this simplistic distinction in the features, optimizing the data directly, only give rise to superficial adversarial robustness. We revisit prior approaches in the literature that were believed to be robust, and proceed to devise a principled meta-learning algorithm, that optimizes the dataset for robustness. Our method can be thought as a non-parametric version of adversarial training, and it is of independent interest and potentially wider applicability. Specifically, we cast the bi-level optimization as a min-max procedure on kernel regression, with a class of kernels that describe infinitely wide neural nets (Neural Tangent Kernels). Through extensive experiments we analyse the properties of the models trained on the optimized datasets and identify their shortcomings - all of them come in a similar flavor.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Nikolaos Tsilivis;Jingtong Su;Julia Kempe", "authorids": "~Nikolaos_Tsilivis1;~Jingtong_Su1;~Julia_Kempe1", "gender": ";M;", "homepage": "https://tsili42.github.io;https://cims.nyu.edu/~js12196/;", "dblp": "312/6719;275/3776;", "google_scholar": "uQ83NcQAAAAJ;i0OY_LAAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Nikolaos_Tsilivis1;~Jingtong_Su1;~Julia_Kempe1", "aff": "Harvard University, Harvard University;New York University;", "aff_domain": "g.harvard.edu;nyu.edu;", "position": "Intern;PhD student;", "bibtex": "@misc{\ntsilivis2023can,\ntitle={Can we achieve robustness from data alone?},\nauthor={Nikolaos Tsilivis and Jingtong Su and Julia Kempe},\nyear={2023},\nurl={https://openreview.net/forum?id=HmdOxc8zIWx}\n}", "github": "", "project": "", "reviewers": "gGjN;saBf;QrtA", "site": "https://openreview.net/forum?id=HmdOxc8zIWx", "pdf_size": 1560962, "recommendation": "3;3;5", "confidence": "4;4;3", "correctness": "3;3;2", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;3", "wc_summary_paper": "76;121;58", "wc_strength_and_weaknesses": "363;179;153", "wc_clarity_quality_novelty_and_reproducibility": "31;46;21", "wc_summary_review": "22;68;32", "wc_review": "492;414;264", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 85.0, 26.49528259898354 ], "wc_strength_and_weaknesses_avg": [ 231.66666666666666, 93.47132656001459 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.666666666666664, 10.274023338281626 ], "wc_summary_review_avg": [ 40.666666666666664, 19.754043186705406 ], "wc_review_avg": [ 390.0, 94.61500938011896 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": -1.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5133330818978220817&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Harvard University;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.nyu.edu", "aff_unique_abbr": "Harvard;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "HnLFY8F9uS", "title": "Robust Policy Optimization in Deep Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Entropy can play an essential role in policy optimization by selecting the stochastic policy, which eventually helps better explore the environment in reinforcement learning (RL). A proper balance between exploration and exploitation is challenging and might depend on the particular RL task. However, the stochasticity often reduces as the training progresses; thus, the policy becomes less exploratory. Therefore, in many cases, the policy can converge to sub-optimal due to a lack of representative data during training. Moreover, this issue can even be severe in high-dimensional environments. This paper investigates whether keeping a certain entropy threshold throughout training can help better policy learning. In particular, we propose an algorithm Robust Policy Optimization (RPO), which leverages a perturbed Gaussian distribution to encourage high-entropy actions. We evaluated our methods on various continuous control tasks from DeepMind Control, OpenAI Gym, Pybullet, and IsaacGym. We observed that in many settings, RPO increases the policy entropy early in training and then maintains a certain level of entropy throughout the training period. Eventually, our agent RPO shows consistently improved performance compared to PPO and other techniques such as data augmentation and entropy regularization. Furthermore, in several settings, our method stays robust in performance, while other baseline mechanisms fail to improve and even worsen the performance.", "keywords": "Deep Reinforcement Learning;Policy Optimization", "primary_area": "", "supplementary_material": "", "author": "Md Masudur Rahman;Yexiang Xue", "authorids": "~Md_Masudur_Rahman2;~Yexiang_Xue1", "gender": "M;M", "homepage": "https://mmasudurrah.github.io/;https://www.cs.purdue.edu/people/faculty/yexiang/", "dblp": "08/2425-1;117/4903", "google_scholar": "0nUv7b0AAAAJ;", "orcid": "0000-0002-3633-0621;", "linkedin": "masud99r/;", "or_profile": "~Md_Masudur_Rahman2;~Yexiang_Xue1", "aff": "Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nrahman2023robust,\ntitle={Robust Policy Optimization in Deep Reinforcement Learning},\nauthor={Md Masudur Rahman and Yexiang Xue},\nyear={2023},\nurl={https://openreview.net/forum?id=HnLFY8F9uS}\n}", "github": "", "project": "", "reviewers": "fgod;NT5y;JmVS;JnzH", "site": "https://openreview.net/forum?id=HnLFY8F9uS", "pdf_size": 2624490, "recommendation": "3;3;3;3", "confidence": "3;3;3;4", "correctness": "2;3;3;2", "technical_novelty": "1;2;3;2", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "58;88;81;59", "wc_strength_and_weaknesses": "212;145;146;174", "wc_clarity_quality_novelty_and_reproducibility": "32;27;89;19", "wc_summary_review": "33;84;105;51", "wc_review": "335;344;421;303", "wc_reply_reviewers": "47;11;0;0", "wc_reply_authors": "289;110;129;94", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 71.5, 13.238202294873727 ], "wc_strength_and_weaknesses_avg": [ 169.25, 27.288962970402523 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.75, 27.67105888830422 ], "wc_summary_review_avg": [ 68.25, 28.01227409547465 ], "wc_review_avg": [ 350.75, 43.3265219005634 ], "wc_reply_reviewers_avg": [ 14.5, 19.29378138157474 ], "wc_reply_authors_avg": [ 155.5, 78.06567747736517 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=862281577649507119&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "RPM: Generalizable Multi-Agent Policies for Multi-Agent Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11728", "id": "HnSceSzlfrY", "poster": "/media/PosterPDFs/ICLR%202023/11728.png?t=1681909303.2781029", "openreview": "https://openreview.net/forum?id=HnSceSzlfrY", "slides": "https://iclr.cc/virtual/2023/poster/11728", "video": "https://iclr.cc/virtual/2023/poster/11728", "author_site": "WEI QIU, Xiao Ma, Bo An, Svetlana Obraztsova, shuicheng YAN, Zhongwen Xu", "tldr": "", "abstract": "Despite the recent advancement in multi-agent reinforcement learning (MARL), the MARL agents easily overfit the training environment and perform poorly in evaluation scenarios where other agents behave differently. Obtaining generalizable policies for MARL agents is thus necessary but challenging mainly due to complex multi-agent interactions. In this work, we model the MARL problem with Markov Games and propose a simple yet effective method, called ranked policy memory (RPM), i.e., to maintain a look-up memory of policies to achieve good generalizability. The main idea of RPM is to train MARL policies via gathering massive multi-agent interaction data. In particular, we first rank each agent\u2019s policies by its training episode return, i.e., the episode return of each agent in the training environment; we then save the ranked policies in the memory; when an episode starts, each agent can randomly select a policy from the RPM as the behavior policy. Each agent uses the behavior policy to gather multi-agent interaction data for MARL training. This innovative self-play framework guarantees the diversity of multi-agent interaction in the training data. Experimental results on Melting Pot demonstrate that RPM enables MARL agents to interact with unseen agents in multi-agent generalization evaluation scenarios and complete given tasks. It significantly boosts the performance up to 818% on average.", "keywords": "multi-agent system;multi-agent reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/146a63012a6fc7b817480d298a524d24d9e91aeb.zip", "author": "Wei Qiu;Xiao Ma;Bo An;Svetlana Obraztsova;Shuicheng YAN;Zhongwen Xu", "authorids": "~Wei_Qiu3;~Xiao_Ma2;~Bo_An2;~Svetlana_Obraztsova1;~Shuicheng_YAN3;~Zhongwen_Xu1", "gender": "M;M;M;F;M;M", "homepage": ";https://yusufma03.github.io/;https://personal.ntu.edu.sg/boan/;https://sites.google.com/site/svobraztsova/;https://yanshuicheng.ai/;https://zhongwen.one/", "dblp": "11/5166-1;35/573-6;42/6178-1.html;;y/ShuichengYan;130/5077", "google_scholar": "gszGlZIAAAAJ;hR4G6hoAAAAJ;PEEpuNwAAAAJ;https://scholar.google.com.tw/citations?user=aorQUi0AAAAJ;https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ;https://scholar.google.co.uk/citations?user=T4xuHn8AAAAJ", "orcid": ";;0000-0002-7064-7438;;;", "linkedin": ";;;;;", "or_profile": "~Wei_Qiu3;~Xiao_Ma2;~Bo_An2;~Svetlana_Obraztsova1;~Shuicheng_YAN3;~Zhongwen_Xu1", "aff": "Nanyang Technological University;SEA AI Lab;Nanyang Technological University;Nanyang Technological University;sea Group;Sea AI Lab", "aff_domain": "ntu.edu.sg;sea.com;ntu.edu.sg;ntu.edu.sg;sea.com;sea.com", "position": "PhD student;Research Scientist;Full Professor;Assistant Professor;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nqiu2023rpm,\ntitle={{RPM}: Generalizable Multi-Agent Policies for Multi-Agent Reinforcement Learning},\nauthor={Wei Qiu and Xiao Ma and Bo An and Svetlana Obraztsova and Shuicheng YAN and Zhongwen Xu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HnSceSzlfrY}\n}", "github": "", "project": "", "reviewers": "7r99;8Vwu;3kor;GwoH", "pdf_size": 3311319, "recommendation": "5;5;6;6", "confidence": "3;5;4;4", "correctness": "3;4;2;3", "technical_novelty": "2;3;3;2", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "99;115;124;70", "wc_strength_and_weaknesses": "274;121;921;259", "wc_clarity_quality_novelty_and_reproducibility": "96;121;75;44", "wc_summary_review": "32;6;35;26", "wc_review": "501;363;1155;399", "wc_reply_reviewers": "52;0;181;18", "wc_reply_authors": "355;901;1053;673", "reply_reviewers": "1;0;2;1", "reply_authors": "2;3;3;3", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 102.0, 20.530465167647808 ], "wc_strength_and_weaknesses_avg": [ 393.75, 310.1945961811714 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 84.0, 28.257742301889582 ], "wc_summary_review_avg": [ 24.75, 11.299889379989523 ], "wc_review_avg": [ 604.5, 321.836526826897 ], "wc_reply_reviewers_avg": [ 62.75, 70.77914594002954 ], "wc_reply_authors_avg": [ 745.5, 262.90825395943733 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.7071067811865475, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13039904081479752800&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=HnSceSzlfrY", "email": "ntu.edu.sg;sea.com;ntu.edu.sg;ntu.edu.sg;sea.com;sea.com", "author_num": 6, "aff_unique_index": "0;1;0;0;2;1", "aff_unique_norm": "Nanyang Technological University;Sea AI Lab;Sea Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntu.edu.sg;;", "aff_unique_abbr": "NTU;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Singapore;Unknown;" }, { "title": "Multimodal Federated Learning via Contrastive Representation Ensemble", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11896", "id": "Hnk1WRMAYqg", "poster": "", "openreview": "https://openreview.net/forum?id=Hnk1WRMAYqg", "slides": "https://iclr.cc/virtual/2023/poster/11896", "video": "https://iclr.cc/virtual/2023/poster/11896", "author_site": "Qiying Yu, Yang Liu, Yimu Wang, Ke Xu, Jingjing Liu", "tldr": "CreamFL, a multimodal FL framework using contrastive representation-level ensemble to learn a larger server model from heterogeneous clients across multi-modalities.", "abstract": "With the increasing amount of multimedia data on modern mobile systems and IoT infrastructures, harnessing these rich multimodal data without breaching user privacy becomes a critical issue. Federated learning (FL) serves as a privacy-conscious alternative to centralized machine learning. However, existing FL methods extended to multimodal data all rely on model aggregation on single modality level, which restrains the server and clients to have identical model architecture for each modality. This limits the global model in terms of both model complexity and data capacity, not to mention task diversity. In this work, we propose \\textit{Contrastive Representation Ensemble and Aggregation for Multimodal FL (CreamFL)}, a multimodal federated learning framework that enables training larger server models from clients with heterogeneous model architectures and data modalities, while only communicating knowledge on public dataset. To achieve better multimodal representation fusion, we design a global-local cross-modal ensemble strategy to aggregate client representations. To mitigate local model drift caused by two unprecedented heterogeneous factors stemming from multimodal discrepancy (\\textit{modality gap} and \\textit{task gap}), we further propose two inter-modal and intra-modal contrasts to regularize local training, which complements information of the absent modality for uni-modal clients and regularizes local clients to head towards global consensus. Thorough evaluations and ablation studies on image-text retrieval and visual question answering tasks showcase the superiority of CreamFL over state-of-the-art FL methods and its practical value.", "keywords": "Federated Learning;Multi-modal Learning;Representation-level Ensemble Knowledge Transfer", "primary_area": "", "supplementary_material": "", "author": "Qiying Yu;Yang Liu;Yimu Wang;Ke Xu;Jingjing Liu", "authorids": "~Qiying_Yu1;~Yang_Liu59;~Yimu_Wang1;~Ke_Xu7;~Jingjing_Liu2", "gender": ";F;M;M;", "homepage": "https://yqy2001.github.io;;https://yimuwangcs.github.io;;https://air.tsinghua.edu.cn/en/info/1046/1194.htm#:~:text=Jingjing%20Liu%20is%20Professor%2C%20Principal,CVPR%2C%20ACL%2C%20etc.)", "dblp": "324/5612;;140/7766;181/2626;30/3008-1", "google_scholar": "eFFssJYAAAAJ;JEieoFsAAAAJ;TV2vnN8AAAAJ;;BzJ_GboAAAAJ", "orcid": ";;;;", "linkedin": ";;yimu-wang-854743151/;;jingjing-liu-65703431/", "or_profile": "~Qiying_Yu1;~Yang_Liu59;~Yimu_Wang1;~Ke_Xu7;~Jingjing_Liu2", "aff": "Tsinghua University;Tsinghua University;University of Waterloo;Carnegie Mellon University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;tsinghua.edu.cn;uwaterloo.ca;cmu.edu;tsinghua.edu.cn", "position": "PhD student;Associate Professor;PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nyu2023multimodal,\ntitle={Multimodal Federated Learning via Contrastive Representation Ensemble},\nauthor={Qiying Yu and Yang Liu and Yimu Wang and Ke Xu and Jingjing Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Hnk1WRMAYqg}\n}", "github": "", "project": "", "reviewers": "KYph;U8Yh;LDTA;FnzN", "pdf_size": 3498266, "recommendation": "5;6;6;8", "confidence": "4;4;4;5", "correctness": "3;3;4;3", "technical_novelty": "3;4;4;3", "empirical_novelty": "0;3;3;3", "wc_summary_paper": "78;49;69;98", "wc_strength_and_weaknesses": "267;500;167;146", "wc_clarity_quality_novelty_and_reproducibility": "84;49;32;23", "wc_summary_review": "90;36;6;104", "wc_review": "519;634;274;371", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1104;2024;771;650", "reply_reviewers": "0;0;0;0", "reply_authors": "3;4;2;2", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 73.5, 17.613914953808536 ], "wc_strength_and_weaknesses_avg": [ 270.0, 140.44037880894513 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.0, 23.313086453749534 ], "wc_summary_review_avg": [ 59.0, 39.761790704142086 ], "wc_review_avg": [ 449.5, 137.6889610680537 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1137.25, 538.2803056958336 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.9271726499455306, "corr_recommendation_correctness": -0.13245323570650439, "gs_citation": 106, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5937005223190385373&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=Hnk1WRMAYqg", "email": "mails.tsinghua.edu.cn;tsinghua.edu.cn;uwaterloo.ca;cmu.edu;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Tsinghua University;University of Waterloo;Carnegie Mellon University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://uwaterloo.ca;https://www.cmu.edu", "aff_unique_abbr": "THU;UW;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;0", "aff_country_unique": "China;Canada;United States" }, { "title": "Transformer Meets Boundary Value Inverse Problems", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11788", "id": "HnlCZATopvr", "poster": "/media/PosterPDFs/ICLR%202023/11788.png?t=1681742430.895722", "openreview": "https://openreview.net/forum?id=HnlCZATopvr", "slides": "https://iclr.cc/virtual/2023/poster/11788", "video": "https://iclr.cc/virtual/2023/poster/11788", "author_site": "Ruchi Guo, Shuhao Cao, Long Chen", "tldr": "We argue that, from both theoretical and experimental perspective, the attention mechanism is a structure-conforming neural architecture for learning the PDE-based boundary value inverse problems.", "abstract": "A Transformer-based deep direct sampling method is proposed for electrical impedance tomography, a well-known severely ill-posed nonlinear boundary value inverse problem. A real-time reconstruction is achieved by evaluating the learned inverse operator between carefully designed data and the reconstructed images. An effort is made to give a specific example to a fundamental question: whether and how one can benefit from the theoretical structure of a mathematical problem to develop task-oriented and structure-conforming deep neural networks? Specifically, inspired by direct sampling methods for inverse problems, the 1D boundary data in different frequencies are preprocessed by a partial differential equation-based feature map to yield 2D harmonic extensions as different input channels. Then, by introducing learnable non-local kernels, the direct sampling is recast to a modified attention mechanism. The new method achieves superior accuracy over its predecessors and contemporary operator learners and shows robustness to noises in benchmarks. \nThis research shall strengthen the insights that, despite being invented for natural language processing tasks, the attention mechanism offers great flexibility to be modified in conformity with the a priori mathematical knowledge, which ultimately leads to the design of more physics-compatible neural architectures. ", "keywords": "inverse problems;attention;operator learning;Transformer;partial differential equations", "primary_area": "", "supplementary_material": "/attachment/e1a060836030a2eb5149bb9b3f7867e7f2d2a5ac.zip", "author": "Ruchi Guo;Shuhao Cao;Long Chen", "authorids": "~Ruchi_Guo1;~Shuhao_Cao2;~Long_Chen11", "gender": "M;M;M", "homepage": "https://faculty.sites.uci.edu/ruchiguo/;https://scaomath.github.io;https://www.math.uci.edu/~chenlong/", "dblp": ";235/2667;64/5725-2", "google_scholar": "eY4X2UYAAAAJ;XMNDlgwAAAAJ;LhKNyYAAAAAJ", "orcid": ";0000-0001-6555-706X;0000-0002-7345-5116", "linkedin": ";;", "or_profile": "~Ruchi_Guo1;~Shuhao_Cao2;~Long_Chen11", "aff": ";University of Missouri - Kansas City;University of California, Irvine", "aff_domain": ";umkc.edu;uci.edu", "position": ";Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nguo2023transformer,\ntitle={Transformer Meets Boundary Value Inverse Problems},\nauthor={Ruchi Guo and Shuhao Cao and Long Chen},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HnlCZATopvr}\n}", "github": "", "project": "", "reviewers": "CBcr;ne3Z;5fLN;4qwm", "pdf_size": 5017590, "recommendation": "5;8;8;8", "confidence": "3;4;3;3", "correctness": "3;3;3;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "77;71;82;93", "wc_strength_and_weaknesses": "291;160;654;692", "wc_clarity_quality_novelty_and_reproducibility": "17;10;45;18", "wc_summary_review": "51;36;47;76", "wc_review": "436;277;828;879", "wc_reply_reviewers": "0;56;32;41", "wc_reply_authors": "890;1349;2033;2096", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;3;3", "recommendation_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 80.75, 8.073877630977572 ], "wc_strength_and_weaknesses_avg": [ 449.25, 228.88793655411374 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 22.5, 13.35102992281869 ], "wc_summary_review_avg": [ 52.5, 14.637281168304447 ], "wc_review_avg": [ 605.0, 255.41632680782175 ], "wc_reply_reviewers_avg": [ 32.25, 20.498475553074673 ], "wc_reply_authors_avg": [ 1592.0, 500.0874923450896 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13920447344831673507&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=HnlCZATopvr", "email": ";umkc.edu;uci.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Missouri-Kansas City;University of California, Irvine", "aff_unique_dep": ";", "aff_unique_url": "https://www.umkc.edu;https://www.uci.edu", "aff_unique_abbr": "UMKC;UCI", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Kansas City;Irvine", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "Ho7W1yr8tV", "title": "Handling Covariate Shifts in Federated Learning with Generalization Guarantees", "track": "main", "status": "Reject", "tldr": "We optimize a global FL model focusing on the overall generalization performance under both intra-client and inter-client covariate shifts.", "abstract": "Covariate shift across clients is a major challenge for federated learning (FL). This work studies the generalization properties of FL under intra-client and inter-client covariate shifts. To this end, we propose Federated Importance-weighteD Empirical risk Minimization (FIDEM) to optimize a global FL model, along with new variants of density ratio matching methods, aiming to handle covariate shifts. These methods trade off some level of privacy for improving the overall generalization performance. We theoretically show that FIDEM achieves smaller generalization error than classical empirical risk minimization under some certain settings. Experimental results demonstrate the superiority of FIDEM over federated averaging (McMahan et al., 2017) and other baselines, which would open the door to study FL under distribution shifts more systematically.\n", "keywords": "Federate Learning;Generalization;Covariate Shift;Importance Weighting", "primary_area": "", "supplementary_material": "", "author": "Ali Ramezani-Kebrya;Fanghui Liu;Thomas Pethick;Grigorios Chrysos;Volkan Cevher", "authorids": "~Ali_Ramezani-Kebrya1;~Fanghui_Liu1;~Thomas_Pethick1;~Grigorios_Chrysos1;~Volkan_Cevher1", "gender": ";M;M;M;M", "homepage": "https://alirk.github.io/;http://www.lfhsgre.org;https://pethick.dk;https://grigorisg9gr.github.io/;http://lions.epfl.ch", "dblp": "129/4841;119/1038;305/4521;75/6117-2;70/5301", "google_scholar": "qZ8KukkAAAAJ;AKxBgssAAAAJ;;1bU041kAAAAJ;https://scholar.google.ch/citations?user=hlWhzU8AAAAJ", "orcid": ";0000-0003-4133-7921;;;", "linkedin": ";;;;", "or_profile": "~Ali_Ramezani-Kebrya1;~Fanghui_Liu1;~Thomas_Pethick1;~Grigorios_Chrysos1;~Volkan_Cevher1", "aff": "University of Oslo;\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL);Swiss Federal Institute of Technology Lausanne;Swiss Federal Institute of Technology Lausanne;Amazon Development Center Germany", "aff_domain": "uio.no;epfl.ch;epfl.ch;epfl.ch;amazon.de", "position": "Associate Professor;Postdoc;PhD student;Postdoc;Amazon Scholar", "bibtex": "@misc{\nramezani-kebrya2023handling,\ntitle={Handling Covariate Shifts in Federated Learning with Generalization Guarantees},\nauthor={Ali Ramezani-Kebrya and Fanghui Liu and Thomas Pethick and Grigorios Chrysos and Volkan Cevher},\nyear={2023},\nurl={https://openreview.net/forum?id=Ho7W1yr8tV}\n}", "github": "", "project": "", "reviewers": "S6qR;Yvya;Vgm5;rE23", "site": "https://openreview.net/forum?id=Ho7W1yr8tV", "pdf_size": 999977, "recommendation": "3;5;5;6", "confidence": "3;3;4;4", "correctness": "2;3;4;4", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "129;59;88;71", "wc_strength_and_weaknesses": "193;258;271;471", "wc_clarity_quality_novelty_and_reproducibility": "13;58;29;19", "wc_summary_review": "28;39;53;67", "wc_review": "363;414;441;628", "wc_reply_reviewers": "169;45;0;367", "wc_reply_authors": "576;822;609;746", "reply_reviewers": "1;1;0;2", "reply_authors": "4;5;4;3", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 86.75, 26.479945241635225 ], "wc_strength_and_weaknesses_avg": [ 298.25, 104.02253361651984 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.75, 17.282577932704367 ], "wc_summary_review_avg": [ 46.75, 14.669270602180601 ], "wc_review_avg": [ 461.5, 100.12617040514432 ], "wc_reply_reviewers_avg": [ 145.25, 142.20122186535528 ], "wc_reply_authors_avg": [ 688.25, 100.13085188891584 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 4.0, 0.7071067811865476 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.6882472016116854, "corr_recommendation_correctness": 0.899228803025897, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:C-i4PNpfVykJ:scholar.google.com/&scioq=Handling+Covariate+Shifts+in+Federated+Learning+with+Generalization+Guarantees&hl=en&as_sdt=0,30", "gs_version_total": 0, "aff_unique_index": "0;1;2;2;3", "aff_unique_norm": "University of Oslo;EPFL;Swiss Federal Institute of Technology Lausanne;Amazon", "aff_unique_dep": ";;;Development Center", "aff_unique_url": "https://www.uio.no;https://www.epfl.ch;https://www.epfl.ch;https://www.amazon.de", "aff_unique_abbr": "UiO;EPFL;EPFL;Amazon", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;1;1;1;2", "aff_country_unique": "Norway;Switzerland;Germany" }, { "id": "HpEfFkzHUgt", "title": "Auto-Encoding Adversarial Imitation Learning", "track": "main", "status": "Reject", "tldr": "this paper presents a new adversarial imitation learning method based on auto-encoding", "abstract": "Reinforcement learning (RL) provides a powerful framework for decision-making, but its application in practice often requires a carefully designed reward function. Adversarial Imitation Learning (AIL) sheds light on automatic policy acquisition without access to the reward signal from the environment. In this work, we propose Auto-Encoding Adversarial Imitation Learning (AEAIL), a robust and scalable AIL framework. To induce expert policies from demonstrations, AEAIL utilizes the reconstruction error of an auto-encoder as a reward signal, which provides more information for optimizing policies than the prior discriminator-based ones. Subsequently, we use the derived objective functions to train the auto-encoder and the agent policy. Experiments show that our AEAIL performs superior compared to state-of-the-art methods in the MuJoCo environments. More importantly, AEAIL shows much better robustness when the expert demonstrations are noisy. Specifically, our method achieves $11\\%$ and $50.7\\%$ relative improvement overall compared to the best baseline GAIL and PWIL on clean and noisy expert data, respectively. Video results, open-source code and dataset are available in supplementary materials. ", "keywords": "imitation learning;reinforcement learning;auto-encoders", "primary_area": "", "supplementary_material": "/attachment/4cdaaedfa5f325b02928ed25e37d4a59c789f6f6.zip", "author": "Kaifeng Zhang;Rui Zhao;Ziming Zhang;Yang Gao", "authorids": "~Kaifeng_Zhang1;~Rui_Zhao1;~Ziming_Zhang4;~Yang_Gao1", "gender": "M;M;M;M", "homepage": "https://alexkfzhang.github.io;https://ruizhaogit.github.io;http://yang-gao.weebly.com;https://zimingzhang.wordpress.com/", "dblp": ";26/2578-11;89/4402-29;", "google_scholar": ";N1yNDnQAAAAJ;https://scholar.google.com/citations?hl=en;2yqx3oIAAAAJ", "orcid": ";;;", "linkedin": ";rui-zhao-profile/;yang-gao-45245348/;", "or_profile": "~Kaifeng_Zhang1;~Rui_Zhao1;~Yang_Gao1;~Ziming_Zhang1", "aff": "Shanghai Qi Zhi Institute;Tencent AI Lab;Tsinghua University;Worcester Polytechnic Institute", "aff_domain": "sqz.ac.cn;tencent.com;tsinghua.edu.cn;wpi.edu", "position": "Researcher;Researcher;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nzhang2023autoencoding,\ntitle={Auto-Encoding Adversarial Imitation Learning},\nauthor={Kaifeng Zhang and Rui Zhao and Ziming Zhang and Yang Gao},\nyear={2023},\nurl={https://openreview.net/forum?id=HpEfFkzHUgt}\n}", "github": "", "project": "", "reviewers": "EB8r;9Nok;ymvo;vULj", "site": "https://openreview.net/forum?id=HpEfFkzHUgt", "pdf_size": 2880009, "recommendation": "3;5;5;5", "confidence": "4;5;3;4", "correctness": "2;2;3;2", "technical_novelty": "1;3;2;2", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "35;51;49;102", "wc_strength_and_weaknesses": "141;381;810;28", "wc_clarity_quality_novelty_and_reproducibility": "25;100;2;223", "wc_summary_review": "49;42;36;85", "wc_review": "250;574;897;438", "wc_reply_reviewers": "0;442;0;0", "wc_reply_authors": "525;1672;615;478", "reply_reviewers": "0;2;0;0", "reply_authors": "2;4;2;2", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 59.25, 25.439880109780393 ], "wc_strength_and_weaknesses_avg": [ 340.0, 299.80243494674954 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 87.5, 86.21629776324195 ], "wc_summary_review_avg": [ 53.0, 19.03943276465977 ], "wc_review_avg": [ 539.75, 236.17194477752855 ], "wc_reply_reviewers_avg": [ 110.5, 191.39161423636094 ], "wc_reply_authors_avg": [ 822.5, 492.9231684552878 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5223704989343576944&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Shanghai Qi Zhi Institute;Tencent;Tsinghua University;Worcester Polytechnic Institute", "aff_unique_dep": ";Tencent AI Lab;;", "aff_unique_url": "https://www.qz.io;https://ai.tencent.com;https://www.tsinghua.edu.cn;https://www.wpi.edu", "aff_unique_abbr": ";Tencent AI Lab;THU;WPI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "Hq16Jk2bVlp", "title": "AutoFHE: Automated Adaption of CNNs for Efficient Evaluation over FHE", "track": "main", "status": "Reject", "tldr": "Automated adaption of CNNs to the RNS-CKKS FHE scheme by jointly evolving polynomial activations (EvoReLUs) and searching for placement of bootstrapping operations.", "abstract": "Secure inference of deep convolutional neural networks (CNNs) was recently demonstrated under the RNS-CKKS fully homomorphic encryption (FHE) scheme. The state-of-the-art solution uses a high-order composite polynomial to approximate non-arithmetic ReLUs and refreshes zero-level ciphertext through bootstrapping. However, this solution suffers from prohibitively high latency, both due to the number of levels consumed by the polynomials ($47\\%$) and the inference time consumed by bootstrapping operations ($70\\%$). Furthermore, it requires a hand-crafted architecture for homomorphically evaluating CNNs by placing a bootstrapping operation after every Conv-BN layer. To accelerate CNNs on FHE and automatically design a homomorphic evaluation architecture, we propose AutoFHE: Automated adaption of CNNs for evaluation over FHE. AutoFHE exploits the varying sensitivity of approximate activations across different layers in a network and jointly evolves polynomial activations (EvoReLUs) and searches for placement of bootstrapping operations for evaluation under RNS-CKKS. The salient features of AutoFHE include: i) a multi-objective co-evolutionary (MOCoEv) search algorithm to maximize validation accuracy and minimize the number of bootstrapping operations, ii) a gradient-free search algorithm, R-CCDE, to optimize EvoReLU coefficients, and iii) polynomial-aware training (PAT) to fine-tune polynomial-only CNNs for one epoch to adapt trainable weights to EvoReLUs. We demonstrate the efficacy of AutoFHE through the evaluation of ResNets on CIFAR-10 and CIFAR-100 under RNS-CKKS. Experimental results on CIFAR-10 indicate that in comparison to the state-of-the-art solution, AutoFHE reduces inference time (50 images on 50 threads) by 1,000 seconds and amortized inference time (per image) by $28\\%$ and $17\\%$ for ResNet-20 and ResNet-32, respectively.", "keywords": "Fully Homomorphic Encryption;Multi-Objective Co-Evolutionary Search;RNS-CKKS", "primary_area": "", "supplementary_material": "", "author": "Wei Ao;Vishnu Boddeti", "authorids": "~Wei_Ao1;~Vishnu_Boddeti1", "gender": "M;M", "homepage": "https://wei-ao.github.io/;https://hal.cse.msu.edu", "dblp": ";55/6988", "google_scholar": "jEIMATUAAAAJ;JKcrO9IAAAAJ", "orcid": "0000-0003-1449-936X;", "linkedin": "wei-ao/;", "or_profile": "~Wei_Ao1;~Vishnu_Boddeti1", "aff": "Michigan State University;Michigan State University", "aff_domain": "msu.edu;msu.edu", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nao2023autofhe,\ntitle={Auto{FHE}: Automated Adaption of {CNN}s for Efficient Evaluation over {FHE}},\nauthor={Wei Ao and Vishnu Boddeti},\nyear={2023},\nurl={https://openreview.net/forum?id=Hq16Jk2bVlp}\n}", "github": "", "project": "", "reviewers": "b7YQ;Btk1;Lx4T", "site": "https://openreview.net/forum?id=Hq16Jk2bVlp", "pdf_size": 1347825, "recommendation": "5;5;6", "confidence": "4;4;4", "correctness": "3;2;4", "technical_novelty": "2;2;4", "empirical_novelty": "2;2;4", "wc_summary_paper": "89;64;72", "wc_strength_and_weaknesses": "444;210;98", "wc_clarity_quality_novelty_and_reproducibility": "45;40;11", "wc_summary_review": "64;192;14", "wc_review": "642;506;195", "wc_reply_reviewers": "226;0;0", "wc_reply_authors": "3715;992;403", "reply_reviewers": "1;0;0", "reply_authors": "9;4;3", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_paper_avg": [ 75.0, 10.424330514074594 ], "wc_strength_and_weaknesses_avg": [ 250.66666666666666, 144.15115523489763 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.0, 14.98888477061141 ], "wc_summary_review_avg": [ 90.0, 74.95776588630872 ], "wc_review_avg": [ 447.6666666666667, 187.0905900597058 ], "wc_reply_reviewers_avg": [ 75.33333333333333, 106.53742169877317 ], "wc_reply_authors_avg": [ 1703.3333333333333, 1442.6439462166522 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 5.333333333333333, 2.6246692913372702 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8660254037844385, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4387805981143712291&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff_unique_index": "0;0", "aff_unique_norm": "Michigan State University", "aff_unique_dep": "", "aff_unique_url": "https://www.msu.edu", "aff_unique_abbr": "MSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "HqVp0rNC8jn", "title": "Learning Geometric Representations of Interactive Objects", "track": "main", "status": "Reject", "tldr": "We propose a representation learning framework that extracts from observations the geometric state of both an agent and an object the agent interacts with.", "abstract": "We address the problem of learning geometric representations from observations perceived by an agent operating within an environment and interacting with an external object. To this end, we propose a representation learning framework that extracts the state of both the agent and the object from unstructured observations of arbitrary nature (e.g., images). Supervision comes from the performed actions alone, while the dynamics of the object is assumed to be unknown. We provide a theoretical foundation and formally prove that an ideal learner is guaranteed to infer an isometric representation, disentangling the agent from the object. Finally, we investigate empirically our framework on a variety of scenarios. Results show that our model reliably infers the correct representation and outperforms vision-based approaches such as a state-of-the-art keypoint extractor. \n", "keywords": "Representation Learning;Interaction;Equivariance", "primary_area": "", "supplementary_material": "/attachment/83be4b09270558f509dcd88c1b374f2d35167351.zip", "author": "Alfredo Reichlin;Giovanni Luca Marchetti;Hang Yin;Anastasia Varava;Danica Kragic", "authorids": "~Alfredo_Reichlin1;~Giovanni_Luca_Marchetti1;~Hang_Yin1;~Anastasia_Varava1;~Danica_Kragic1", "gender": "M;M;M;F;F", "homepage": ";https://www.kth.se/profile/glma;;;http://www.csc.kth.se/~danik", "dblp": "324/2394;310/4949;;;82/1211", "google_scholar": "PE0RmDUAAAAJ;ePYa2qAAAAAJ;https://scholar.google.pt/citations?hl=en;https://scholar.google.ru/citations?user=zbl1MvgAAAAJ;", "orcid": "0000-0001-8938-9363;;;;", "linkedin": ";;;;", "or_profile": "~Alfredo_Reichlin1;~Giovanni_Luca_Marchetti1;~Hang_Yin1;~Anastasia_Varava1;~Danica_Kragic1", "aff": "KTH Royal Institute of Technology;KTH Royal Institute of Technology, Stockholm, Sweden;University of Copenhagen;;KTH", "aff_domain": "kth.se;kth.se;ku.dk;;kth.se", "position": "PhD student;PhD student;Assistant Professor;;Professor", "bibtex": "@misc{\nreichlin2023learning,\ntitle={Learning Geometric Representations of~Interactive Objects},\nauthor={Alfredo Reichlin and Giovanni Luca Marchetti and Hang Yin and Anastasia Varava and Danica Kragic},\nyear={2023},\nurl={https://openreview.net/forum?id=HqVp0rNC8jn}\n}", "github": "", "project": "", "reviewers": "a3YM;tBxf;ySVW;M4Jm", "site": "https://openreview.net/forum?id=HqVp0rNC8jn", "pdf_size": 1378775, "recommendation": "3;5;6;8", "confidence": "3;3;3;2", "correctness": "3;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "1;2;3;3", "wc_summary_paper": "84;53;81;64", "wc_strength_and_weaknesses": "205;218;443;594", "wc_clarity_quality_novelty_and_reproducibility": "52;109;21;24", "wc_summary_review": "49;28;68;46", "wc_review": "390;408;613;728", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "633;917;1120;1307", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;2;2", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 70.5, 12.658988901172163 ], "wc_strength_and_weaknesses_avg": [ 365.0, 162.58382453368478 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.5, 35.33058165385903 ], "wc_summary_review_avg": [ 47.75, 14.184057952504283 ], "wc_review_avg": [ 534.75, 141.8509340822259 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 994.25, 250.04737051206916 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8006407690254357, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1041164099495592638&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "KTH Royal Institute of Technology;University of Copenhagen", "aff_unique_dep": ";", "aff_unique_url": "https://www.kth.se;https://www.ku.dk", "aff_unique_abbr": "KTH;UCPH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stockholm", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Sweden;Denmark" }, { "id": "Hrj3MhDO_a", "title": "A Simple Nadaraya-Watson Head for Explainable and Calibrated Classification", "track": "main", "status": "Withdraw", "tldr": "We present a simple, nonparametric replacement to the fully-connected head in the image classification setting based on the Nadaraya-Watson (NW) estimator, which can be shown to be interpretable and well-calibrated.", "abstract": "We propose a simple, non-learnable, and nonparametric prediction head to be used with any neural network architecture. The proposed head can be viewed as a classic Nadaraya-Watson (NW) model, where the prediction is a weighted average of labels from a support set. \nThe weights are computed from distances between the query feature and support features. This is in contrast to the dominant approach of using a learnable classification head (e.g., a fully-connected layer) on the features, which can be challenging to interpret and can yield poorly calibrated predictions. Our empirical results on an array of computer vision tasks demonstrate that the NW head can yield better calibration than its parametric counterpart, while having comparable accuracy and with minimal computational overhead. To further increase inference-time efficiency, we propose a simple approach that involves a clustering step run on the training set to create a relatively small distilled support set. In addition to using the weights as a means of interpreting model predictions, we further present an easy-to-compute ``support influence function,'' which quantifies the influence of a support element on the prediction for a given query. As we demonstrate in our experiments, the influence function can allow the user to debug a trained model. We believe that the NW head is a flexible, interpretable, and highly useful building block that can be used in a range of applications.", "keywords": "Image Classification;Nonparametric;Interpretability;Explainability;Calibration", "primary_area": "", "supplementary_material": "", "author": "Alan Q. Wang;Mert R. Sabuncu", "authorids": "~Alan_Q._Wang1;~Mert_R._Sabuncu1", "gender": "M;M", "homepage": "http://sabuncu.engineering.cornell.edu;https://alanqrwang.github.io", "dblp": "36/4898;271/4734", "google_scholar": ";P7nRvlIAAAAJ", "orcid": ";0000-0003-0149-6055", "linkedin": ";", "or_profile": "~Mert_R._Sabuncu1;~Alan_Wang2", "aff": "Cornell Tech;Cornell University", "aff_domain": "cornell.edu;cornell.edu", "position": "Full Professor;PhD student", "bibtex": "@misc{\nwang2023a,\ntitle={A Simple Nadaraya-Watson Head for Explainable and Calibrated Classification},\nauthor={Alan Q. Wang and Mert R. Sabuncu},\nyear={2023},\nurl={https://openreview.net/forum?id=Hrj3MhDO_a}\n}", "github": "", "project": "", "reviewers": "GHtA;Trz4;dLVq;Y7mf", "site": "https://openreview.net/forum?id=Hrj3MhDO_a", "pdf_size": 33593086, "recommendation": "3;3;5;6", "confidence": "3;5;5;3", "correctness": "3;3;2;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "67;88;90;154", "wc_strength_and_weaknesses": "79;129;640;230", "wc_clarity_quality_novelty_and_reproducibility": "279;46;191;152", "wc_summary_review": "42;12;60;66", "wc_review": "467;275;981;602", "wc_reply_reviewers": "79;0;438;73", "wc_reply_authors": "1095;364;593;648", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 1.0 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 99.75, 32.591218142315576 ], "wc_strength_and_weaknesses_avg": [ 269.5, 220.71531437578136 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 167.0, 83.64508353752778 ], "wc_summary_review_avg": [ 45.0, 21.0 ], "wc_review_avg": [ 581.25, 258.39540920844547 ], "wc_reply_reviewers_avg": [ 147.5, 170.57916050913136 ], "wc_reply_authors_avg": [ 675.0, 264.8461817734966 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.19245008972987526, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KgxDDMkEXBQJ:scholar.google.com/&scioq=A+Simple+Nadaraya-Watson+Head+for+Explainable+and+Calibrated+Classification&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://tech.cornell.edu", "aff_unique_abbr": "Cornell Tech", "aff_campus_unique_index": "0", "aff_campus_unique": "New York City;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "MA-BERT: Towards Matrix Arithmetic-only BERT Inference by Eliminating Complex Non-Linear Functions", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10746", "id": "HtAfbHa7LAL", "poster": "/media/PosterPDFs/ICLR%202023/10746.png?t=1681028161.0359814", "openreview": "https://openreview.net/forum?id=HtAfbHa7LAL", "slides": "https://iclr.cc/virtual/2023/poster/10746", "video": "https://iclr.cc/virtual/2023/poster/10746", "author_site": "Wei Ming Neo, Zhehui Wang, Cheng Liu, Rick Goh, Tao Luo", "tldr": "MA-BERT completely eliminates the complex non-linear functions in BERT and achieves matrix arithmetic-only operation with trivial ReLU, which could benefit inference on both general computing units and accelerator designs for edge applications", "abstract": "Due to their superior results, Transformer-based models such as BERT have become de facto standards in many Natural Language Processing (NLP) applications. However, the intensive use of complex non-linear functions within the Transformer architecture impairs its computing efficiency and complicates corresponding accelerator designs, because non-linear functions are generally computation-intensive and require special hardware support. In light of this, we propose MA-BERT, which allows matrix arithmetic-only operations in Transformer-based NLP models and achieves efficient inference with negligible accuracy loss. Specifically, we propose four correlated techniques that include approximating softmax with a two-layer neural network, replacing GELU with ReLU, fusing normalization layers with adjacent linear layers, and leveraging knowledge transfer from baseline models. Through these techniques, we are able to eliminate the major non-linear functions in Transformer-based models and obtain MA-BERT with only matrix arithmetic and trivial ReLU operations without compromising on accuracy. With mainly regular matrix arithmetic operations, MA-BERT enables hardware-friendly processing on various computing engines, including CPUs and GPUs. Our experimental results show that MA-BERT achieves up to 27% and 41% reduction in inference time on CPU and GPU, respectively, with comparable accuracy on many downstream tasks compared to the baseline BERT models. ", "keywords": "BERT;Efficient inference;Matrix arithmetic-only;Eleminate non-linear functions", "primary_area": "", "supplementary_material": "", "author": "Neo Wei Ming;Zhehui Wang;Cheng Liu;Rick Siow Mong Goh;Tao Luo", "authorids": "~Neo_Wei_Ming1;~Zhehui_Wang2;liucheng@ict.ac.cn;~Rick_Siow_Mong_Goh1;~Tao_Luo2", "gender": "M;;;;M", "homepage": ";;;https://sites.google.com/view/rickgoh/home;", "dblp": ";;;https://dblp.uni-trier.de/pers/g/Goh:Rick_Siow_Mong;43/4720-14", "google_scholar": "8DWm85kAAAAJ;;;https://scholar.google.com.sg/citations?user=fBsBJjoAAAAJ;d4KZI8MAAAAJ", "orcid": ";;;0000-0001-9116-1595;0000-0002-3415-3676", "linkedin": "neoweiming/;;;rickgoh/;", "or_profile": "~Neo_Wei_Ming1;~Zhehui_Wang2;liucheng@ict.ac.cn;~Rick_Siow_Mong_Goh1;~Tao_Luo2", "aff": "Nanyang Technological University;;;Institute of High Performance Computing, Singapore, A*STAR;Institute of High Performance Computing, Singapore, A*STAR", "aff_domain": "ntu.edu;;;ihpc.a-star.edu.sg;ihpc.a-star.edu.sg", "position": "Undergrad student;;;Director;Researcher", "bibtex": "@inproceedings{\nming2023mabert,\ntitle={{MA}-{BERT}: Towards Matrix Arithmetic-only {BERT} Inference by Eliminating Complex Non-Linear Functions},\nauthor={Neo Wei Ming and Zhehui Wang and Cheng Liu and Rick Siow Mong Goh and Tao Luo},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HtAfbHa7LAL}\n}", "github": "", "project": "", "reviewers": "RFc3;K662;9tBM", "pdf_size": 1394710, "recommendation": "5;5;6", "confidence": "4;3;4", "correctness": "3;3;3", "technical_novelty": "3;2;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "45;56;80", "wc_strength_and_weaknesses": "91;58;203", "wc_clarity_quality_novelty_and_reproducibility": "8;44;35", "wc_summary_review": "8;138;97", "wc_review": "152;296;415", "wc_reply_reviewers": "17;0;24", "wc_reply_authors": "1236;762;1332", "reply_reviewers": "1;0;1", "reply_authors": "3;1;2", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 60.333333333333336, 14.613540144521982 ], "wc_strength_and_weaknesses_avg": [ 117.33333333333333, 62.055530687352025 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.0, 15.297058540778355 ], "wc_summary_review_avg": [ 81.0, 54.264782932088345 ], "wc_review_avg": [ 287.6666666666667, 107.53087411943088 ], "wc_reply_reviewers_avg": [ 13.666666666666666, 10.077477638553983 ], "wc_reply_authors_avg": [ 1110.0, 249.17463755366435 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15067565297963718509&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=HtAfbHa7LAL", "email": "ntu.edu;;;ihpc.a-star.edu.sg;ihpc.a-star.edu.sg", "author_num": 5, "aff_unique_index": "0;1;1", "aff_unique_norm": "Nanyang Technological University;Institute of High Performance Computing", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.ihpc.a-star.edu.sg", "aff_unique_abbr": "NTU;IHPC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Novel View Synthesis with Diffusion Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11809", "id": "HtoA0oT30jC", "poster": "", "openreview": "https://openreview.net/forum?id=HtoA0oT30jC", "slides": "https://iclr.cc/virtual/2023/poster/11809", "video": "https://iclr.cc/virtual/2023/poster/11809", "author_site": "Daniel Watson, William Chan, Ricardo Martin Brualla, Jonathan Ho, Andrea Tagliasacchi, Mohammad Norouzi", "tldr": "Novel View Synthesis with diffusion models from as few a single image", "abstract": "We present 3DiM (pronounced \"three-dim\"), a diffusion model for 3D novel view synthesis from as few as a single image. The core of 3DiM is an image-to-image diffusion model -- 3DiM takes a single reference view and their poses as inputs, and generates a novel view via diffusion. 3DiM can then generate a full 3D consistent scene following our novel stochastic conditioning sampler: the output frames of the scene are generated autoregressively, and during the reverse diffusion process of each individual frame, we select a random conditioning frame from the set of previous frames at each denoising step. We demonstrate that stochastic conditioning yields much more 3D consistent results compared to the naive sampling process which only conditions on a single previous frame. We compare 3DiMs to prior work on the SRN ShapeNet dataset, demonstrating that 3DiM's generated videos from a single view achieve much higher fidelity while being approximately 3D consistent. We also introduce a new evaluation methodology, 3D consistency scoring, to measure the 3D consistency of a generated object by training a neural field on the model's output views. 3DiMs are geometry free, do not rely on hyper-networks or test-time optimization for novel view synthesis, and allow a single model to easily scale to a large number of scenes.", "keywords": "3D;diffusion;ddpm;novel;view;synthesis;generative;models", "primary_area": "", "supplementary_material": "/attachment/84fb9108922dba109dfb021256ea6aefc394b322.zip", "author": "Daniel Watson;William Chan;Ricardo Martin Brualla;Jonathan Ho;Andrea Tagliasacchi;Mohammad Norouzi", "authorids": "~Daniel_Watson1;~William_Chan1;~Ricardo_Martin_Brualla1;~Jonathan_Ho1;~Andrea_Tagliasacchi2;~Mohammad_Norouzi1", "gender": ";;;;M;M", "homepage": ";http://williamchan.ca;http://ricardomartinbrualla.com;;http://taiya.github.io;https://norouzi.github.io/", "dblp": ";58/2301;16/7968;80/8677;46/5514;https://dblp.org/pers/hd/n/Norouzi_0002:Mohammad", "google_scholar": "_pKKv2QAAAAJ;Nla9qfUAAAAJ;9F59OCYAAAAJ;iVLAQysAAAAJ;1RmD-YsAAAAJ;Lncr-VoAAAAJ", "orcid": ";;0000-0003-3247-9522;;;", "linkedin": ";;;;;", "or_profile": "~Daniel_Watson1;~William_Chan1;~Ricardo_Martin_Brualla1;~Jonathan_Ho1;~Andrea_Tagliasacchi2;~Mohammad_Norouzi1", "aff": "Google;Google Brain;Google;Google;Google DeepMind;Google Brain", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com", "position": "Researcher;Research Scientist;Researcher;Researcher;Researcher;Research Scientist", "bibtex": "@inproceedings{\nwatson2023novel,\ntitle={Novel View Synthesis with Diffusion Models},\nauthor={Daniel Watson and William Chan and Ricardo Martin Brualla and Jonathan Ho and Andrea Tagliasacchi and Mohammad Norouzi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=HtoA0oT30jC}\n}", "github": "", "project": "", "reviewers": "Zv2q;wGc3;Am8T;o2Dj", "pdf_size": 1276338, "recommendation": "5;6;6;8", "confidence": "4;4;3;4", "correctness": "3;3;3;4", "technical_novelty": "2;3;4;3", "empirical_novelty": "2;3;4;3", "wc_summary_paper": "58;174;64;46", "wc_strength_and_weaknesses": "923;343;194;283", "wc_clarity_quality_novelty_and_reproducibility": "17;102;35;54", "wc_summary_review": "55;92;67;159", "wc_review": "1053;711;360;542", "wc_reply_reviewers": "391;0;0;114", "wc_reply_authors": "1262;529;336;594", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 85.5, 51.504854140168185 ], "wc_strength_and_weaknesses_avg": [ 435.75, 286.26506510575126 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 52.0, 31.693847983480957 ], "wc_summary_review_avg": [ 93.25, 40.23912896671597 ], "wc_review_avg": [ 666.5, 255.34535437324877 ], "wc_reply_reviewers_avg": [ 126.25, 159.78168699822893 ], "wc_reply_authors_avg": [ 680.25, 349.01889275510575 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.13245323570650439, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 277, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10588026954342974040&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=HtoA0oT30jC", "email": "google.com;google.com;google.com;google.com;google.com;google.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Weakly Supervised Explainable Phrasal Reasoning with Neural Fuzzy Logic", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11400", "id": "Hu4r-dedqR0", "poster": "/media/PosterPDFs/ICLR%202023/11400.png?t=1682364273.9600189", "openreview": "https://openreview.net/forum?id=Hu4r-dedqR0", "slides": "https://iclr.cc/virtual/2023/poster/11400", "video": "https://iclr.cc/virtual/2023/poster/11400", "author_site": "Zijun Wu, Zi Xuan Zhang, Atharva Naik, Zhijian Mei, Mauajama Firdaus, Lili Mou", "tldr": "", "abstract": "Natural language inference (NLI) aims to determine the logical relationship between two sentences, such as Entailment, Contradiction, and Neutral. In recent years, deep learning models have become a prevailing approach to NLI, but they lack interpretability and explainability. In this work, we address the explainability of NLI by weakly supervised logical reasoning, and propose an Explainable Phrasal Reasoning (EPR) approach. Our model first detects phrases as the semantic unit and aligns corresponding phrases in the two sentences. Then, the model predicts the NLI label for the aligned phrases, and induces the sentence label by fuzzy logic formulas. Our EPR is almost everywhere differentiable and thus the system can be trained end to end. In this way, we are able to provide explicit explanations of phrasal logical relationships in a weakly supervised manner. We further show that such reasoning results help textual explanation generation.", "keywords": "Neural Fuzzy Logic;Weakly Supervised Reasoning;Natural Language Inference;Explainability and Interpretability", "primary_area": "", "supplementary_material": "", "author": "Zijun Wu;Zi Xuan Zhang;Atharva Naik;Zhijian Mei;Mauajama Firdaus;Lili Mou", "authorids": "~Zijun_Wu2;~Zi_Xuan_Zhang1;~Atharva_Naik1;zmei1@ualberta.ca;~Mauajama_Firdaus1;~Lili_Mou1", "gender": "M;;M;;F;M", "homepage": ";;https://atharva-naik.github.io;;;https://lili-mou.github.io/", "dblp": ";;272/8869.html;;223/8272;", "google_scholar": "rmET9UUAAAAJ;;wTTF4yYAAAAJ;;https://scholar.google.co.in/citations?user=nVmB914AAAAJ;https://scholar.google.com.hk/schhp?hl=en", "orcid": ";;0009-0007-1759-2259;;0000-0001-7485-5974;", "linkedin": "zijun-wu-844474195/?originalSubdomain=ca;zixuan-z-86b181a3/;;;mauajama-firdaus-9b577a16a/;", "or_profile": "~Zijun_Wu2;~Zi_Xuan_Zhang1;~Atharva_Naik1;zmei1@ualberta.ca;~Mauajama_Firdaus1;~Lili_Mou1", "aff": "University of Alberta;University of Alberta;Carnegie Mellon University;;University of Alberta;University of Alberta", "aff_domain": "ualberta.ca;ualberta.ca;andrew.cmu.edu;;ualberta.ca;ualberta.ca", "position": "PhD student;MS student;MS student;;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nwu2023weakly,\ntitle={Weakly Supervised Explainable Phrasal Reasoning with Neural Fuzzy Logic},\nauthor={Zijun Wu and Zi Xuan Zhang and Atharva Naik and Zhijian Mei and Mauajama Firdaus and Lili Mou},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Hu4r-dedqR0}\n}", "github": "", "project": "", "reviewers": "vyjc;K5wq;VdSJ;72Kh", "pdf_size": 1300682, "recommendation": "6;6;6;8", "confidence": "3;4;3;4", "correctness": "3;3;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;2;4", "wc_summary_paper": "29;85;112;173", "wc_strength_and_weaknesses": "217;294;232;206", "wc_clarity_quality_novelty_and_reproducibility": "30;51;105;164", "wc_summary_review": "35;36;53;16", "wc_review": "311;466;502;559", "wc_reply_reviewers": "60;13;550;462", "wc_reply_authors": "557;652;1143;830", "reply_reviewers": "1;1;1;2", "reply_authors": "3;4;3;3", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 99.75, 51.81397012389612 ], "wc_strength_and_weaknesses_avg": [ 237.25, 34.03949911499874 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 87.5, 51.95430684745972 ], "wc_summary_review_avg": [ 35.0, 13.095800853708795 ], "wc_review_avg": [ 459.5, 91.92524136492653 ], "wc_reply_reviewers_avg": [ 271.25, 237.38510378707423 ], "wc_reply_authors_avg": [ 795.5, 223.28289231376417 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12382630938089254952&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=Hu4r-dedqR0", "email": "ualberta.ca;ualberta.ca;andrew.cmu.edu;;ualberta.ca;ualberta.ca", "author_num": 6, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Alberta;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ualberta.ca;https://www.cmu.edu", "aff_unique_abbr": "UAlberta;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Canada;United States" }, { "id": "HumfPzF2yeI", "title": "Learning Rewards and Skills to Follow Commands with a Data Efficient Visual-Audio Representation", "track": "main", "status": "Reject", "tldr": "We learn a representation to generate a reward function to train command-following robots with reinforcement learning", "abstract": "Based on the recent advancements in representation learning, we propose a novel framework for command-following robots with raw sensor inputs. Previous RL-based methods are either difficult to continuously improve after the deployment or require a large number of new labels during the fine-tuning. Motivated by (self-)supervised contrastive learning literature, we propose a novel representation, named VAR++, that generates an intrinsic reward function for command-following robot tasks by associating images with sound commands. After the robot is deployed in a new domain, the representation can be updated intuitively and data-efficiently by non-expert, and the robots are able to fulfill sound commands without any hand-crafted reward functions. We demonstrate our approach to various sound types and robotic tasks, including navigation and manipulation with raw sensor inputs. In the simulated experiments, we show that our system can continually self-improve in previously unseen scenarios given fewer new labeled data, yet achieves better performance, compared with previous methods.\n\n", "keywords": "Robotics;Representation Learning;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/467b92738397e9034f47491e42498a0f1f694e6d.zip", "author": "Peixin Chang;Shuijing Liu;Tianchen Ji;Neeloy Chakraborty;D Livingston McPherson;Katherine Rose Driggs-Campbell", "authorids": "~Peixin_Chang1;~Shuijing_Liu1;~Tianchen_Ji1;~Neeloy_Chakraborty1;~D_Livingston_McPherson1;~Katherine_Rose_Driggs-Campbell1", "gender": "M;F;M;M;Non-Binary;", "homepage": ";https://shuijing725.github.io;https://tianchenji.github.io/;https://theneeloy.github.io/;https://people.eecs.berkeley.edu/~david.mcpherson/;", "dblp": ";211/7210;;278/2404;;", "google_scholar": "0AloliwAAAAJ;I4k7ukgAAAAJ;9XgufxkAAAAJ;Fwc4xyEAAAAJ;83a8tp0AAAAJ;", "orcid": ";;;0000-0001-7132-6671;;", "linkedin": ";shuijing-liu-4089b3123;;neeloy-chakraborty/;david-mcpherson-4bb96595/;", "or_profile": "~Peixin_Chang1;~Shuijing_Liu1;~Tianchen_Ji1;~Neeloy_Chakraborty1;~D_Livingston_McPherson1;~Katherine_Rose_Driggs-Campbell1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;Department of Computer Science;", "aff_domain": "uiuc.edu;uiuc.edu;uiuc.edu;uiuc.edu;cs.illinois.edu;", "position": "PhD student;PhD student;PhD student;PhD student;Postdoc;", "bibtex": "@misc{\nchang2023learning,\ntitle={Learning Rewards and Skills to Follow Commands with a Data Efficient Visual-Audio Representation},\nauthor={Peixin Chang and Shuijing Liu and Tianchen Ji and Neeloy Chakraborty and D Livingston McPherson and Katherine Rose Driggs-Campbell},\nyear={2023},\nurl={https://openreview.net/forum?id=HumfPzF2yeI}\n}", "github": "", "project": "", "reviewers": "hTKz;Fs4r;dfAL", "site": "https://openreview.net/forum?id=HumfPzF2yeI", "pdf_size": 17591623, "recommendation": "5;6;6", "confidence": "4;3;3", "correctness": "4;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "23;56;99", "wc_strength_and_weaknesses": "133;216;163", "wc_clarity_quality_novelty_and_reproducibility": "24;80;1", "wc_summary_review": "25;56;97", "wc_review": "205;408;360", "wc_reply_reviewers": "24;52;44", "wc_reply_authors": "619;610;755", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 59.333333333333336, 31.116269413639905 ], "wc_strength_and_weaknesses_avg": [ 170.66666666666666, 34.31552936434983 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.0, 33.1762967593833 ], "wc_summary_review_avg": [ 59.333333333333336, 29.48822740612863 ], "wc_review_avg": [ 324.3333333333333, 86.62691395993639 ], "wc_reply_reviewers_avg": [ 40.0, 11.775681155103795 ], "wc_reply_authors_avg": [ 661.3333333333334, 66.33417084898419 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.9999999999999997, "corr_recommendation_correctness": -0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:H5x07PvC-4gJ:scholar.google.com/&scioq=Learning+Rewards+and+Skills+to+Follow+Commands+with+a+Data+Efficient+Visual-Audio+Representation&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Unknown Institution", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://illinois.edu;", "aff_unique_abbr": "UIUC;", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "id": "Hv57u3WQ0WZ", "title": "Contrastive Hierarchical Clustering", "track": "main", "status": "Reject", "tldr": "Hierarchical clustering model based on deep neural networks, which has been applied to large-scale image data", "abstract": "Deep clustering has been dominated by flat clustering models, which split a dataset into a predefined number of groups. Although recent methods achieve extremely high similarity with the ground truth on popular benchmarks, the information contained in the flat partition is limited. In this paper, we introduce CoHiClust, a Contrastive Hierarchical Clustering model based on deep neural networks, which can be applied to large-scale image data. By employing a self-supervised learning approach, CoHiClust distills the base network into a binary tree without access to any labeled data. The hierarchical clustering structure can be used to analyze the relationship between clusters as well as to measure the similarity between data points. Experiments performed on typical image benchmarks demonstrate that CoHiClust generates a reasonable structure of clusters, which is consistent with our intuition and image semantics. Moreover, by applying the proposed pruning strategy, we can restrict the hierarchy to the requested number of clusters (leaf nodes) and obtain the clustering accuracy comparable to the state-of-the-art flat clustering baselines.\n", "keywords": "clustering;hierarchical clustering;contrastive learning;soft decision trees", "primary_area": "", "supplementary_material": "/attachment/8e3eff03dfee31bc45d2721108b80370a7c1acf5.zip", "author": "Micha\u0142 Znale\u017aniak;Przemys\u0142aw Rola;Patryk Kaszuba;Jacek Tabor;Marek \u015amieja", "authorids": "michalznalezniak@gmail.com;rp.przemek@gmail.com;patrykkaszuba@protonmail.com;~Jacek_Tabor1;~Marek_\u015amieja1", "gender": ";;;M;M", "homepage": ";;;;https://mareksmieja.github.io/", "dblp": ";;;31/5172;81/10360", "google_scholar": ";;;https://scholar.google.pl/citations?user=zSKYziUAAAAJ;https://scholar.google.pl/citations?user=MOmnpZcAAAAJ", "orcid": ";;;0000-0001-6652-7727;0000-0003-2027-4132", "linkedin": ";;;;", "or_profile": "michalznalezniak@gmail.com;rp.przemek@gmail.com;patrykkaszuba@protonmail.com;~Jacek_Tabor1;~Marek_\u015amieja1", "aff": ";;;Jagiellonian University;Jagiellonian University", "aff_domain": ";;;uj.edu.pl;uj.edu.pl", "position": ";;;Full Professor;Associate Professor", "bibtex": "@misc{\nznale{\\'z}niak2023contrastive,\ntitle={Contrastive Hierarchical Clustering},\nauthor={Micha{\\l} Znale{\\'z}niak and Przemys{\\l}aw Rola and Patryk Kaszuba and Jacek Tabor and Marek {\\'S}mieja},\nyear={2023},\nurl={https://openreview.net/forum?id=Hv57u3WQ0WZ}\n}", "github": "", "project": "", "reviewers": "8rQe;4K5k;Xeet;PDcK", "site": "https://openreview.net/forum?id=Hv57u3WQ0WZ", "pdf_size": 27749250, "recommendation": "3;5;5;6", "confidence": "5;4;3;3", "correctness": "4;3;2;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "71;7;63;77", "wc_strength_and_weaknesses": "202;7;160;316", "wc_clarity_quality_novelty_and_reproducibility": "9;7;75;41", "wc_summary_review": "26;404;41;47", "wc_review": "308;425;339;481", "wc_reply_reviewers": "0;0;0;249", "wc_reply_authors": "975;1016;786;1462", "reply_reviewers": "0;0;0;2", "reply_authors": "2;2;2;3", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 54.5, 27.870235018743564 ], "wc_strength_and_weaknesses_avg": [ 171.25, 110.68282387073434 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.0, 27.748873851023216 ], "wc_summary_review_avg": [ 129.5, 158.66710434113304 ], "wc_review_avg": [ 388.25, 68.59072459159475 ], "wc_reply_reviewers_avg": [ 62.25, 107.82016277116261 ], "wc_reply_authors_avg": [ 1059.75, 247.91165261036036 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.899228803025897, "corr_recommendation_correctness": -0.6488856845230502, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15329298827837505958&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0", "aff_unique_norm": "Jagiellonian University", "aff_unique_dep": "", "aff_unique_url": "https://www.uj.edu.pl", "aff_unique_abbr": "UJ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Poland" }, { "id": "Hvcmr6FSIX8", "title": "When is Offline Hyperparameter Selection Feasible for Reinforcement Learning?", "track": "main", "status": "Reject", "tldr": "", "abstract": "Hyperparameter selection is a critical procedure before deploying reinforcement learning algorithms in real-world applications. However, hyperparameter selection prior to deployment requires selecting policies offline without online execution, which is a significant challenge known as offline policy selection. As yet, there is little understanding about the fundamental limitations of the offline policy selection problem. To contribute to our understanding of this problem, in this paper, we investigate when sample efficient offline policy selection is possible. As off-policy policy evaluation (OPE) is a natural approach for policy selection, the sample complexity of offline policy selection is therefore upper-bounded by the number of samples needed to perform OPE. In addition, we prove that the sample complexity of offline policy selection is also lower-bounded by the sample complexity of OPE. These results imply not only that offline policy selection is effective when OPE is effective, but also that sample efficient policy selection is not possible without additional assumptions that make OPE effective. Moreover, we theoretically study the conditions under which offline policy selection using Fitted Q evaluation (FQE) and the Bellman error is sample efficient. We conclude with an empirical study comparing FQE and Bellman errors for offline policy selection.", "keywords": "Offline policy selection;offline reinforcement learning;off-policy policy evaluation", "primary_area": "", "supplementary_material": "", "author": "Vincent Liu;Prabhat Nagarajan;Andrew Patterson;Martha White", "authorids": "~Vincent_Liu3;~Prabhat_Nagarajan1;~Andrew_Patterson1;~Martha_White1", "gender": ";M;M;F", "homepage": ";http://prabhatnagarajan.com;https://andnp.github.io;http://marthawhite.ca", "dblp": ";227/3287;41/467;60/7057", "google_scholar": "https://scholar.google.ca/citations?hl=en;Gjjj8IQAAAAJ;jd2nCqYAAAAJ;t5zdD_IAAAAJ", "orcid": ";;;0000-0002-5356-2950", "linkedin": ";prabhatnagarajan;;", "or_profile": "~Vincent_Liu3;~Prabhat_Nagarajan1;~Andrew_Patterson1;~Martha_White1", "aff": "University of Alberta;University of Alberta;University of Alberta;University of Alberta", "aff_domain": "ualberta.ca;ualberta.ca;ualberta.ca;ualberta.ca", "position": "PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@misc{\nliu2023when,\ntitle={When is Offline Hyperparameter Selection Feasible for Reinforcement Learning?},\nauthor={Vincent Liu and Prabhat Nagarajan and Andrew Patterson and Martha White},\nyear={2023},\nurl={https://openreview.net/forum?id=Hvcmr6FSIX8}\n}", "github": "", "project": "", "reviewers": "fS4W;vxfJ;DgEQ;H7wj", "site": "https://openreview.net/forum?id=Hvcmr6FSIX8", "pdf_size": 431499, "recommendation": "5;5;6;6", "confidence": "4;4;3;3", "correctness": "4;3;3;4", "technical_novelty": "2;2;3;2", "empirical_novelty": "3;2;2;2", "wc_summary_paper": "79;100;88;80", "wc_strength_and_weaknesses": "370;243;235;449", "wc_clarity_quality_novelty_and_reproducibility": "54;20;471;73", "wc_summary_review": "26;229;21;54", "wc_review": "529;592;815;656", "wc_reply_reviewers": "27;0;109;0", "wc_reply_authors": "690;494;1359;316", "reply_reviewers": "1;0;2;0", "reply_authors": "1;1;3;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 86.75, 8.407585860400118 ], "wc_strength_and_weaknesses_avg": [ 324.25, 89.75348182661216 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 154.5, 183.7151327463255 ], "wc_summary_review_avg": [ 82.5, 85.51169510657591 ], "wc_review_avg": [ 648.0, 106.36023693091323 ], "wc_reply_reviewers_avg": [ 34.0, 44.68221122549778 ], "wc_reply_authors_avg": [ 714.75, 394.7792896036974 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5D1C1wM9mgUJ:scholar.google.com/&scioq=When+is+Offline+Hyperparameter+Selection+Feasible+for+Reinforcement+Learning%3F&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Alberta", "aff_unique_dep": "", "aff_unique_url": "https://www.ualberta.ca", "aff_unique_abbr": "UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "id": "HwbEioBGLo3", "title": "Rethinking Knowledge Distillation with Raw Features for Semantic Segmentation", "track": "main", "status": "Withdraw", "tldr": "In-depth analysis of the raw feature distillation and the design of an effective feature distillation method for semantic segmentation.", "abstract": "Most existing knowledge distillation methods for semantic segmentation focus on extracting various complex forms of knowledge from raw features. However, such knowledge is usually manually designed and relies on prior knowledge as in traditional feature engineering. In this paper, in order to seek a more simple and effective way to perform feature distillation, we analyze the naive feature distillation method with raw features and reveal that it actually attempts to make the student learn both the magnitude and angular information from the teacher features simultaneously. We further find experimentally that the angular information is more effective than the magnitude information for feature distillation. Based on this finding, we propose a simple and effective feature distillation method for semantic segmentation, which eliminates the need to manually design distillation knowledge. Experimental results on three popular benchmark datasets show that our method achieves state-of-the-art distillation performance for semantic segmentation. The code will be available.", "keywords": "Knowledge Distillation;Semantic Segmentation;Raw Feature Learning", "primary_area": "", "supplementary_material": "", "author": "Tao Liu;Chenshu Chen;Xi Yang;Wenming Tan", "authorids": "~Tao_Liu13;~Chenshu_Chen1;~Xi_Yang12;~Wenming_Tan1", "gender": "M;;;M", "homepage": "https://github.com/LTnanana;;;", "dblp": "43/656;313/8923;https://dblp.org/rec/journals/corr/abs-2207-05256;224/0172", "google_scholar": ";;;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0003-1338-4536", "linkedin": ";;;", "or_profile": "~Tao_Liu13;~Chenshu_Chen1;~Xi_Yang12;~Wenming_Tan1", "aff": "Hikvision Research Institute;Hikvision Research Institute;Hikvision Research Institute;Hikvision Research Institute", "aff_domain": "hikvision.com;hikvision.com;hikvision.com;hikvision.com", "position": "Researcher;Researcher;Researcher;Researcher", "bibtex": "@misc{\nliu2023rethinking,\ntitle={Rethinking Knowledge Distillation with Raw Features for Semantic Segmentation},\nauthor={Tao Liu and Chenshu Chen and Xi Yang and Wenming Tan},\nyear={2023},\nurl={https://openreview.net/forum?id=HwbEioBGLo3}\n}", "github": "", "project": "", "reviewers": "4CRW;JgPM;CcKX;jn8m;vaVH", "site": "https://openreview.net/forum?id=HwbEioBGLo3", "pdf_size": 8521623, "recommendation": "1;5;5;5;6", "confidence": "5;5;4;4;4", "correctness": "3;2;3;3;4", "technical_novelty": "1;3;2;2;4", "empirical_novelty": "2;4;2;2;4", "wc_summary_paper": "92;48;62;27;45", "wc_strength_and_weaknesses": "39;191;164;123;386", "wc_clarity_quality_novelty_and_reproducibility": "45;48;24;8;18", "wc_summary_review": "33;100;33;7;37", "wc_review": "209;387;283;165;486", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 4.4, 1.7435595774162693 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.4, 1.019803902718557 ], "empirical_novelty_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_paper_avg": [ 54.8, 21.683173199511184 ], "wc_strength_and_weaknesses_avg": [ 180.6, 114.84006269590765 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.6, 15.512575543732254 ], "wc_summary_review_avg": [ 42.0, 30.906310035330975 ], "wc_review_avg": [ 306.0, 117.285975291166 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.6556100681071858, "corr_recommendation_correctness": 0.18136906252750293, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8210548890562331092&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Hikvision Research Institute", "aff_unique_dep": "", "aff_unique_url": "https://www.hikvision.com/cn/", "aff_unique_abbr": "Hikvision", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "HwcEuhLtCJr", "title": "Cold Posteriors through PAC-Bayes", "track": "main", "status": "Reject", "tldr": "PAC-Bayes objectives naturally contain a temperature parameter, we investigate it's relation to the cold posterior effect.", "abstract": "We investigate the cold posterior effect through the lens of PAC-Bayes generalization bounds. We argue that in the non-asymptotic setting, when the number of training samples is (relatively) small, discussions of the cold posterior effect should take into account that approximate Bayesian inference does not readily provide guarantees of performance on out-of-sample data. Instead, out-of-sample error is better described through a generalization bound. In this context, we explore the connections of the ELBO objective from variational inference and the PAC-Bayes objectives. We note that, while the ELBO and PAC-Bayes objectives are similar, the latter objectives naturally contain a temperature parameter $\\lambda$ which is not restricted to be $\\lambda=1$. For both simplified regression and realistic classification tasks, in the case of Laplace approximations to the posterior, we show how this PAC-Bayesian interpretation of the temperature parameter captures important aspects of the cold posterior effect.", "keywords": "cold posteriors;Bayesian;Bayesian Neural Networks;PAC-Bayes;Laplace approximation", "primary_area": "", "supplementary_material": "/attachment/8724f53a9f870892e793722819b3371fca1a0662.zip", "author": "Konstantinos Pitas;Julyan Arbel", "authorids": "~Konstantinos_Pitas1;~Julyan_Arbel1", "gender": "M;M", "homepage": "https://team.inria.fr/statify/members/;http://www.julyanarbel.com/", "dblp": ";172/8198", "google_scholar": "https://scholar.google.ch/citations?user=SAOHTb0AAAAJ;Q7P4K3wAAAAJ", "orcid": ";0000-0002-2525-4416", "linkedin": ";julyanarbel/", "or_profile": "~Konstantinos_Pitas1;~Julyan_Arbel1", "aff": "INRIA;Inria", "aff_domain": "inria.fr;inria.fr", "position": "Postdoc;Researcher", "bibtex": "@misc{\npitas2023cold,\ntitle={Cold Posteriors through {PAC}-Bayes},\nauthor={Konstantinos Pitas and Julyan Arbel},\nyear={2023},\nurl={https://openreview.net/forum?id=HwcEuhLtCJr}\n}", "github": "", "project": "", "reviewers": "qVvM;jFkt;oDKT;Pea3", "site": "https://openreview.net/forum?id=HwcEuhLtCJr", "pdf_size": 621161, "recommendation": "3;5;5;5", "confidence": "4;3;3;4", "correctness": "2;4;3;4", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "76;121;107;80", "wc_strength_and_weaknesses": "854;53;99;97", "wc_clarity_quality_novelty_and_reproducibility": "308;9;438;85", "wc_summary_review": "35;53;151;34", "wc_review": "1273;236;795;296", "wc_reply_reviewers": "644;20;228;0", "wc_reply_authors": "2035;270;909;454", "reply_reviewers": "1;1;1;0", "reply_authors": "4;1;2;1", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 96.0, 18.721645226849056 ], "wc_strength_and_weaknesses_avg": [ 275.75, 334.35862109417786 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 210.0, 171.47448789834598 ], "wc_summary_review_avg": [ 68.25, 48.37031631072925 ], "wc_review_avg": [ 650.0, 420.0791592069285 ], "wc_reply_reviewers_avg": [ 223.0, 258.942078465436 ], "wc_reply_authors_avg": [ 917.0, 686.1060413667847 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.8703882797784891, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17826342666741851257&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "id": "HyIY8u5LVDr", "title": "Discovering the Representation Bottleneck of Graph Neural Networks from Multi-order Interactions", "track": "main", "status": "Desk Reject", "tldr": "", "abstract": "Most graph neural networks (GNNs) rely on the message passing paradigm to propagate node features and build interactions. Recent studies point out that different graph learning tasks require different ranges of interactions between nodes. In this work, we explore the capacity of GNNs to capture multi-order interactions between nodes, and the order represents the complexity of the context where interactions take place. We study two standard graph construction methods, namely, \\emph{K-nearest neighbor} (KNN) graphs and \\emph{fully-connected} (FC) graphs, and concentrate on scientific problems in the 3D Euclidean space. \nWe demonstrate that the inductive bias introduced by KNN-graphs and FC-graphs prevents GNNs from learning interactions of the most appropriate complexity. We found that such a phenomenon is broadly shared by several GNNs for diverse graph learning tasks, so we name it a \\emph{representation bottleneck}. To overcome that, we propose a novel graph rewiring approach based on interaction strengths of various orders to adjust the receptive fields of each node dynamically. \nExtensive experiments in molecular property prediction and dynamic system forecast prove the superiority of our method over state-of-the-art graph rewiring baselines. This paper provides a reasonable explanation of why subgraphs play a vital role in determining graph properties. The code is available at \\url{https://github.com/smiles724/bottleneck}", "keywords": "GNN bottleneck;graph rewiring;representation bottleneck;multi-order interactions", "primary_area": "", "supplementary_material": "", "author": "Fang Wu;Siyuan Li;Lirong Wu;Dragomir Radev;Qiang Zhang;Stan Z. Li", "authorids": "~Fang_Wu1;~Siyuan_Li6;~Lirong_Wu1;~Dragomir_Radev2;~Qiang_Zhang6;~Stan_Z._Li2", "gender": ";M;;;;", "homepage": ";https://lupin1998.github.io/;;;https://qiangairesearcher.github.io;", "dblp": ";63/9705-2;15/10330;;72/3527-26;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;Tk7TrCoAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";0000-0001-6806-2468;;;;", "linkedin": ";https://www.linkedin.cn/incareer/in/siyuan-li-lupin1998/;;;;", "or_profile": "~Fang_Wu1;~Siyuan_Li6;~Lirong_Wu1;~Dragomir_Radev2;~Qiang_Zhang6;~Stan_Z._Li2", "aff": ";Alibaba Group;Westlake University;;Zhejiang University;", "aff_domain": ";alibaba-inc.com;westlake.edu.cn;;zju.edu.cn;", "position": ";Intern;PhD student;;Principal Researcher;", "bibtex": "@misc{\nwu2023discovering,\ntitle={Discovering the Representation Bottleneck of Graph Neural Networks from Multi-order Interactions},\nauthor={Fang Wu and Siyuan Li and Lirong Wu and Dragomir Radev and Qiang Zhang and Stan Z. Li},\nyear={2023},\nurl={https://openreview.net/forum?id=HyIY8u5LVDr}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=HyIY8u5LVDr", "pdf_size": 4602124, "recommendation": "", "confidence": "", "correctness": "", "technical_novelty": "", "empirical_novelty": "", "wc_summary_paper": "", "wc_strength_and_weaknesses": "", "wc_clarity_quality_novelty_and_reproducibility": "", "wc_summary_review": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "recommendation_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "correctness_avg": [ 0, 0 ], "technical_novelty_avg": [ 0, 0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 0, 0 ], "wc_strength_and_weaknesses_avg": [ 0, 0 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 0, 0 ], "wc_summary_review_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 1, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0, "corr_recommendation_correctness": 0, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;1;2", "aff_unique_norm": "Alibaba Group;Westlake University;Zhejiang University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.alibaba.com;https://www.westlake.edu.cn;https://www.zju.edu.cn", "aff_unique_abbr": "Alibaba;WU;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "Hyan74saltV", "title": "SkillS: Adaptive Skill Sequencing for Efficient Temporally-Extended Exploration", "track": "main", "status": "Reject", "tldr": "", "abstract": "The ability to effectively reuse prior knowledge is a key requirement when building general and flexible Reinforcement Learning (RL) agents. \nSkill reuse is one of the most common approaches, but current methods have considerable limitations. For example, fine-tuning an existing policy frequently fails, as the policy can degrade rapidly early in training, particularly in sparse reward tasks. In a similar vein, distillation of expert behavior can lead to poor results when given sub-optimal experts. \nWe compare several common approaches for skill transfer on multiple domains and in several different transfer settings, including under changes in task and system dynamics. We identify how existing methods can fail and introduce an alternative approach which sidesteps some of these problems. \nOur approach learns to sequence existing temporally-abstract skills for exploration but learns the final policy directly from the raw experience. This conceptual split enables rapid adaptation and thus efficient data collection but without constraining the final solution. Our approach significantly outperforms many classical methods across a suite of evaluation tasks and we use a broad set of ablations to highlight the importance of different components of our method.", "keywords": "Reinforcement Learning;Control;Skills;Priors;Hierarchical Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Giulia Vezzani;Dhruva Tirumala;Markus Wulfmeier;Dushyant Rao;Abbas Abdolmaleki;Ben Moran;Tuomas Haarnoja;Jan Humplik;Roland Hafner;Michael Neunert;Claudio Fantacci;Tim Hertweck;Thomas Lampe;Fereshteh Sadeghi;Nicolas Heess;Martin Riedmiller", "authorids": "~Giulia_Vezzani1;~Dhruva_Tirumala1;~Markus_Wulfmeier1;~Dushyant_Rao1;~Abbas_Abdolmaleki3;benmoran@deepmind.com;~Tuomas_Haarnoja1;~Jan_Humplik1;~Roland_Hafner1;~Michael_Neunert1;cfantacci@deepmind.com;thertweck@deepmind.com;~Thomas_Lampe1;~Fereshteh_Sadeghi3;~Nicolas_Heess1;~Martin_Riedmiller1", "gender": "F;;M;M;;;M;M;Not Specified;M;;;;F;;M", "homepage": ";;;;;;;;;;;;;http://homes.cs.washington.edu/~fsadeghi/;;https://www.riedmiller.me/", "dblp": ";;166/1552;;;;80/9963;215/9213;19/765;153/7715;;;139/5934;;76/9181;", "google_scholar": "https://scholar.google.it/citations?user=Zlpuln8AAAAJ;;;;;;VT7peyEAAAAJ;YE9w2BsAAAAJ;;;;;;vS8b6GwAAAAJ;79k7bGEAAAAJ;1gVfqpcAAAAJ", "orcid": ";;;;;;;;;;;;;;;", "linkedin": ";;;;;;tuomas-haarnoja;;;;;;;;;", "or_profile": "~Giulia_Vezzani1;~Dhruva_Tirumala1;~Markus_Wulfmeier1;~Dushyant_Rao1;~Abbas_Abdolmaleki3;benmoran@deepmind.com;~Tuomas_Haarnoja1;~Jan_Humplik1;~Roland_Hafner1;~Michael_Neunert1;cfantacci@deepmind.com;thertweck@deepmind.com;~Thomas_Lampe1;~Fereshteh_Sadeghi3;~Nicolas_Heess1;~Martin_Riedmiller1", "aff": "Google DeepMind;;Google DeepMind;Google DeepMind;Google;;Google DeepMind;Google DeepMind;Google DeepMind;;;;Google DeepMind;Google DeepMind;Google DeepMind;", "aff_domain": "deepmind.com;;deepmind.com;google.com;google.com;;deepmind.com;google.com;deepmind.com;;;;deepmind.com;deepmind.com;google.com;", "position": "Researcher;;Research Scientist;Research Scientist;research scientist;;Research Scientist;Research scientist;Researcher;;;;Researcher;Researcher;Research Scientist;", "bibtex": "@misc{\nvezzani2023skills,\ntitle={SkillS: Adaptive Skill Sequencing for Efficient Temporally-Extended Exploration},\nauthor={Giulia Vezzani and Dhruva Tirumala and Markus Wulfmeier and Dushyant Rao and Abbas Abdolmaleki and Ben Moran and Tuomas Haarnoja and Jan Humplik and Roland Hafner and Michael Neunert and Claudio Fantacci and Tim Hertweck and Thomas Lampe and Fereshteh Sadeghi and Nicolas Heess and Martin Riedmiller},\nyear={2023},\nurl={https://openreview.net/forum?id=Hyan74saltV}\n}", "github": "", "project": "", "reviewers": "E1qM;Qd5u;C7sy;XQxL", "site": "https://openreview.net/forum?id=Hyan74saltV", "pdf_size": 5529459, "recommendation": "3;5;6;8", "confidence": "4;4;3;4", "correctness": "2;2;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "90;55;127;224", "wc_strength_and_weaknesses": "104;151;123;420", "wc_clarity_quality_novelty_and_reproducibility": "118;120;11;79", "wc_summary_review": "53;142;23;57", "wc_review": "365;468;284;780", "wc_reply_reviewers": "0;171;0;0", "wc_reply_authors": "444;1897;304;788", "reply_reviewers": "0;1;0;0", "reply_authors": "1;4;1;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 124.0, 63.09912836165013 ], "wc_strength_and_weaknesses_avg": [ 199.5, 128.39879282921626 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 82.0, 44.130488327232456 ], "wc_summary_review_avg": [ 68.75, 44.285296657016985 ], "wc_review_avg": [ 474.25, 188.1839193448792 ], "wc_reply_reviewers_avg": [ 42.75, 74.0451720235695 ], "wc_reply_authors_avg": [ 858.25, 625.0465482666071 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 16, 0 ], "corr_recommendation_confidence": -0.16012815380508713, "corr_recommendation_correctness": 0.8320502943378437, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10914552444848446816&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0;0", "aff_country_unique": "United Kingdom;United States" }, { "id": "I1Mdyc2Bg5x", "title": "Pre-train Graph Neural Networks for Brain Network Analysis", "track": "main", "status": "Reject", "tldr": "", "abstract": "Human brains, controlling behaviors and cognition, are at the center of complex neurobiological systems. Recent studies in neuroscience and neuroimaging analysis have reached a consensus that interactions among brain regions of interest (ROIs) are driving factors for neural development and disorders. Graph neural networks as a powerful tool for analyzing graph-structured data are naturally applied to the analysis of brain networks. However, training of deep learning models including GNNs often requires a significant amount of labeled data. Due to the complicated data acquisition process and restrictions on data sharing, brain network datasets are still small compared to other domains (e.g., molecules, proteins). Moreover, real clinical tasks (e.g., mental disorder analysis) are often conducted on local datasets with even smaller scales and larger noises. To this end, we propose to leverage pre-training to capture the intrinsic brain network structures regardless of specific clinical outcomes. Specifically, we characterize the contributions in this work from two perspectives: (1) We design brain-network-oriented unsupervised pre-training techniques to utilize large-scale brain imaging studies without highly relevant task labels. (2) To facilitate effective knowledge transfer across studies with different ROI systems, we propose to develop a data-driven parcellation atlas mapping pipeline. The proposed pre-training techniques are validated with various GNN models. Extensive experiments demonstrate consistent improvement in performance as well as robustness.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yi Yang;Hejie Cui;Carl Yang", "authorids": "~Yi_Yang28;~Hejie_Cui1;~Carl_Yang1", "gender": ";F;M", "homepage": ";https://hejiecui.com/;https://cs.emory.edu/~jyang71/", "dblp": ";221/7865;305/0254", "google_scholar": "https://scholar.google.com/citations?hl=en;r0Vh6GEAAAAJ;mOINlwcAAAAJ", "orcid": ";0000-0001-6388-2619;0000-0001-9145-4531", "linkedin": ";hejie-cui-b1071b13b/;", "or_profile": "~Yi_Yang28;~Hejie_Cui1;~Carl_Yang1", "aff": "Emory University;Emory University;Emory University", "aff_domain": "emory.edu;emory.edu;emory.edu", "position": "Undergrad student;PhD student;Assistant Professor", "bibtex": "@misc{\nyang2023pretrain,\ntitle={Pre-train Graph Neural Networks for Brain Network Analysis},\nauthor={Yi Yang and Hejie Cui and Carl Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=I1Mdyc2Bg5x}\n}", "github": "", "project": "", "reviewers": "XzZd;TtZF;khKm;Y7QB", "site": "https://openreview.net/forum?id=I1Mdyc2Bg5x", "pdf_size": 6198078, "recommendation": "3;3;5;5", "confidence": "5;5;4;3", "correctness": "3;1;3;3", "technical_novelty": "2;1;3;2", "empirical_novelty": "2;0;3;2", "wc_summary_paper": "73;56;90;104", "wc_strength_and_weaknesses": "373;177;360;570", "wc_clarity_quality_novelty_and_reproducibility": "63;185;96;222", "wc_summary_review": "47;17;53;52", "wc_review": "556;435;599;948", "wc_reply_reviewers": "0;0;321;0", "wc_reply_authors": "828;996;1614;1441", "reply_reviewers": "0;0;3;0", "reply_authors": "2;2;5;2", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 80.75, 18.019087102292392 ], "wc_strength_and_weaknesses_avg": [ 370.0, 139.0665308404578 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 141.5, 64.43019478474359 ], "wc_summary_review_avg": [ 42.25, 14.7542366796795 ], "wc_review_avg": [ 634.5, 190.72558821511078 ], "wc_reply_reviewers_avg": [ 80.25, 138.9970773074024 ], "wc_reply_authors_avg": [ 1219.75, 319.3418035585069 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9045340337332909, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17425780485253539109&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Emory University", "aff_unique_dep": "", "aff_unique_url": "https://www.emory.edu", "aff_unique_abbr": "Emory", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Robust Algorithms on Adaptive Inputs from Bounded Adversaries", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10795", "id": "I29Kt0RwChs", "poster": "", "openreview": "https://openreview.net/forum?id=I29Kt0RwChs", "slides": "https://iclr.cc/virtual/2023/poster/10795", "video": "https://iclr.cc/virtual/2023/poster/10795", "author_site": "Yeshwanth Cherapanamjeri, Sandeep Silwal, David Woodruff, Fred Zhang, Qiuyi Zhang, Samson Zhou", "tldr": "We give algorithms robust to adaptive input from adversaries with bounded capabilities and a general framework for achieving it.", "abstract": "We study dynamic algorithms robust to adaptive input generated from sources with bounded capabilities, such as sparsity or limited interaction. For example, we consider robust linear algebraic algorithms when the updates to the input are sparse but given by an adversary with access to a query oracle. We also study robust algorithms in the standard centralized setting, where an adversary queries an algorithm in an adaptive manner, but the number of interactions between the adversary and the algorithm is bounded. We first recall a unified framework of (Hassidim et al., 2020; Beimel et al., 2022; Attias et al., 2023) for answering $Q$ adaptive queries that incurs $\\widetilde{\\mathcal{O}}(\\sqrt{Q})$ overhead in space, which is roughly a quadratic improvement over the na\\\"{i}ve implementation, and only incurs a logarithmic overhead in query time. Although the general framework has diverse applications in machine learning and data science, such as adaptive distance estimation, kernel density estimation, linear regression, range queries, point queries, and serves as a preliminary benchmark, we demonstrate even better algorithmic improvements for (1) reducing the pre-processing time for adaptive distance estimation and (2) permitting an unlimited number of adaptive queries for kernel density estimation. Finally, we complement our theoretical results with additional empirical evaluations. ", "keywords": "streaming algorithms;adversarial robustness;sketching;kernel density estimation", "primary_area": "", "supplementary_material": "/attachment/5d1a2550ad36bb0a27327efee30fb4d84726a51c.zip", "author": "Yeshwanth Cherapanamjeri;Sandeep Silwal;David Woodruff;Fred Zhang;Qiuyi Zhang;Samson Zhou", "authorids": "~Yeshwanth_Cherapanamjeri1;~Sandeep_Silwal1;~David_Woodruff1;~Fred_Zhang1;~Qiuyi_Zhang1;~Samson_Zhou1", "gender": "M;M;M;M;M;", "homepage": "http://yeshwanth94.github.io;https://sandeepsilwal.com;http://www.cs.cmu.edu/~dwoodruf/;http://fredzhang.me/;https://qiuyiz.github.io;https://samsonzhou.github.io/", "dblp": "182/2247;225/4637;w/DPWoodruff;232/9071;133/8559;179/2683", "google_scholar": ";MnDnUvcAAAAJ;https://scholar.google.com.tw/citations?user=0G2t-6sAAAAJ;guJ_kBQAAAAJ;mE11hO8AAAAJ;NpjsgocAAAAJ", "orcid": ";;;;;", "linkedin": ";;;fred-zhang-0/;;", "or_profile": "~Yeshwanth_Cherapanamjeri1;~Sandeep_Silwal1;~David_Woodruff1;~Fred_Zhang1;~Qiuyi_Zhang1;~Samson_Zhou1", "aff": "University of California, Berkeley;Massachusetts Institute of Technology;Carnegie Mellon University;University of California, Berkeley;Google;University of California, Berkeley", "aff_domain": "berkeley.edu;mit.edu;cmu.edu;berkeley.edu;google.com;berkeley.edu", "position": "PhD student;PhD student;Full Professor;PhD student;Researcher;Postdoc", "bibtex": "@inproceedings{\ncherapanamjeri2023robust,\ntitle={Robust Algorithms on Adaptive Inputs from Bounded Adversaries},\nauthor={Yeshwanth Cherapanamjeri and Sandeep Silwal and David Woodruff and Fred Zhang and Qiuyi Zhang and Samson Zhou},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=I29Kt0RwChs}\n}", "github": "", "project": "", "reviewers": "Kvip;n2fk;iYm7;w9hf", "pdf_size": 589183, "recommendation": "6;6;8;8", "confidence": "2;1;3;3", "correctness": "3;2;4;4", "technical_novelty": "3;2;3;3", "empirical_novelty": "0;0;0;0", "wc_summary_paper": "115;41;117;100", "wc_strength_and_weaknesses": "67;87;42;130", "wc_clarity_quality_novelty_and_reproducibility": "300;41;18;1", "wc_summary_review": "70;32;9;11", "wc_review": "552;201;186;242", "wc_reply_reviewers": "0;0;12;0", "wc_reply_authors": "739;451;84;323", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 93.25, 30.873734791890662 ], "wc_strength_and_weaknesses_avg": [ 81.5, 32.22188697143605 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 90.0, 122.0717002421118 ], "wc_summary_review_avg": [ 30.5, 24.520399670478458 ], "wc_review_avg": [ 295.25, 149.64520540264562 ], "wc_reply_reviewers_avg": [ 3.0, 5.196152422706632 ], "wc_reply_authors_avg": [ 399.25, 236.27565998214882 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.9045340337332909, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10528271751578324057&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=I29Kt0RwChs", "email": "berkeley.edu;mit.edu;cmu.edu;berkeley.edu;google.com;berkeley.edu", "author_num": 6, "aff_unique_index": "0;1;2;0;3;0", "aff_unique_norm": "University of California, Berkeley;Massachusetts Institute of Technology;Carnegie Mellon University;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://www.berkeley.edu;https://web.mit.edu;https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;MIT;CMU;Google", "aff_campus_unique_index": "0;0;2;0", "aff_campus_unique": "Berkeley;;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Finding Actual Descent Directions for Adversarial Training", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10683", "id": "I3HCE7Ro78H", "poster": "", "openreview": "https://openreview.net/forum?id=I3HCE7Ro78H", "slides": "https://iclr.cc/virtual/2023/poster/10683", "video": "https://iclr.cc/virtual/2023/poster/10683", "author_site": "Fabian Latorre, Igor Krawczuk, Leello Dadi, Thomas Pethick, Volkan Cevher", "tldr": "There is a subtle bug in the theory behind PGD. We show how to correct it and that it matters in practice", "abstract": "Adversarial Training using a strong first-order adversary (PGD) is the gold standard for training Deep Neural Networks that are robust to adversarial examples. We show that, contrary to the general understanding of the method, the gradient at an optimal adversarial example may increase, rather than decrease, the adversarially robust loss. This holds independently of the learning rate. More precisely, we provide a counterexample to a corollary of Danskin's Theorem presented in the seminal paper of Madry et al. (2018) which states that a solution of the inner maximization problem can yield a descent direction for the adversarially robust loss. Based on a correct interpretation of Danskin's Theorem, we propose Danskin's Descent Direction (DDi) and we verify experimentally that it provides better directions than those obtained by a PGD adversary. Using the CIFAR10 dataset we further provide a real world example showing that our method achieves a steeper increase in robustness levels in the early stages of training, and is more stable than the PGD baseline. As a limitation, PGD training of ReLU+BatchNorm networks still performs better, but current theory is unable to explain this.\n", "keywords": "Adversarial Training;Adversarial Examples;non-convex optimization;robustness", "primary_area": "", "supplementary_material": "/attachment/bc629f5e4b7ebaa5d367d9de7a8bbef38106840f.zip", "author": "Fabian Latorre;Igor Krawczuk;Leello Tadesse Dadi;Thomas Pethick;Volkan Cevher", "authorids": "~Fabian_Latorre1;~Igor_Krawczuk1;~Leello_Tadesse_Dadi1;~Thomas_Pethick1;~Volkan_Cevher1", "gender": "M;Unspecified;M;M;M", "homepage": "https://fabianlatorre.com;https://krawczuk.eu;;https://pethick.dk;http://lions.epfl.ch", "dblp": "244/9638;244/7380.html;314/6241;305/4521;70/5301", "google_scholar": "B46S5NwAAAAJ;https://scholar.google.ch/citations?user=rLQIkUsAAAAJ;bhAxvCIAAAAJ;;https://scholar.google.ch/citations?user=hlWhzU8AAAAJ", "orcid": ";0000-0002-5281-8926;0000-0003-2580-4913;;", "linkedin": ";https://linkedin.com/in/igorkrawczuk;;;", "or_profile": "~Fabian_Latorre1;~Igor_Krawczuk1;~Leello_Tadesse_Dadi1;~Thomas_Pethick1;~Volkan_Cevher1", "aff": "Swiss Federal Institute of Technology Lausanne;Swiss Federal Institute of Technology Lausanne;EPFL;Swiss Federal Institute of Technology Lausanne;Amazon Development Center Germany", "aff_domain": "epfl.ch;epfl.ch;epfl.ch;epfl.ch;amazon.de", "position": "PhD student;PhD student;PhD student;PhD student;Amazon Scholar", "bibtex": "@inproceedings{\nlatorre2023finding,\ntitle={Finding Actual Descent Directions for Adversarial Training},\nauthor={Fabian Latorre and Igor Krawczuk and Leello Tadesse Dadi and Thomas Pethick and Volkan Cevher},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=I3HCE7Ro78H}\n}", "github": "", "project": "", "reviewers": "UVpf;gFkW;wzJU;FpN3", "pdf_size": 584371, "recommendation": "6;6;8;10", "confidence": "2;4;4;4", "correctness": "3;4;3;4", "technical_novelty": "3;3;4;4", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "43;61;87;111", "wc_strength_and_weaknesses": "144;171;324;836", "wc_clarity_quality_novelty_and_reproducibility": "10;73;37;48", "wc_summary_review": "39;117;113;87", "wc_review": "236;422;561;1082", "wc_reply_reviewers": "0;63;128;74", "wc_reply_authors": "792;769;908;1325", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;2;2", "recommendation_avg": [ 7.5, 1.6583123951777 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 75.5, 25.782746168707476 ], "wc_strength_and_weaknesses_avg": [ 368.75, 278.36251094570906 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.0, 22.616365755797283 ], "wc_summary_review_avg": [ 89.0, 31.080540535840107 ], "wc_review_avg": [ 575.25, 314.47366741907024 ], "wc_reply_reviewers_avg": [ 66.25, 45.477329517024195 ], "wc_reply_authors_avg": [ 948.5, 223.66548683245702 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5222329678670935, "corr_recommendation_correctness": 0.30151134457776363, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "pdf": "https://openreview.net/pdf?id=I3HCE7Ro78H", "email": "epfl.ch;epfl.ch;epfl.ch;epfl.ch;amazon.de", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;EPFL;Amazon", "aff_unique_dep": ";;Development Center", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch;https://www.amazon.de", "aff_unique_abbr": "EPFL;EPFL;Amazon", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Switzerland;Germany" }, { "id": "I7Mvqi0p9Xj", "title": "VC Theoretical Explanation of Double Descent", "track": "main", "status": "Reject", "tldr": "", "abstract": "There has been growing interest in generalization performance of large multilayer neural networks that can be trained to achieve zero training error, while generalizing well on test data. This regime is known as \u2018second descent\u2019 and it appears to contradict the conventional view that optimal model complexity should reflect an optimal balance between underfitting and overfitting, i.e., the bias-variance tradeoff.\nThis paper presents a VC-theoretical analysis of double descent and shows that it can be fully explained by classical VC-generalization bounds. We illustrate an application of analytic VC-bounds for modeling double descent for classification problems, using empirical results for several learning methods, such as SVM, Least Squares, and Multilayer Perceptron classifiers. In addition, we discuss several reasons for the misinterpretation of VC-theoretical results in Deep Learning community.", "keywords": "Double Descent;Deep Learning;SVM;Least Squares;VC Dimension;VC Generalization Bounds;Structural Risk Minimization", "primary_area": "", "supplementary_material": "", "author": "Eng Hock Lee;Vladimir Cherkassky", "authorids": "~Eng_Hock_Lee1;~Vladimir_Cherkassky2", "gender": "M;", "homepage": ";http://www.ece.umn.edu/users/cherkass/predictive_learning/", "dblp": ";", "google_scholar": ";", "orcid": "0000-0002-6858-1513;", "linkedin": ";", "or_profile": "~Eng_Hock_Lee1;~Vladimir_Cherkassky2", "aff": "University of Minnesota - Twin Cities;", "aff_domain": "umn.edu;", "position": "PhD student;", "bibtex": "@misc{\nlee2023vc,\ntitle={{VC} Theoretical Explanation of Double Descent},\nauthor={Eng Hock Lee and Vladimir Cherkassky},\nyear={2023},\nurl={https://openreview.net/forum?id=I7Mvqi0p9Xj}\n}", "github": "", "project": "", "reviewers": "Uf6C;7rdX;J8jr;4NbT", "site": "https://openreview.net/forum?id=I7Mvqi0p9Xj", "pdf_size": 4246677, "recommendation": "3;3;3;6", "confidence": "4;4;3;3", "correctness": "1;4;2;3", "technical_novelty": "3;1;2;3", "empirical_novelty": "3;1;2;3", "wc_summary_paper": "54;49;89;69", "wc_strength_and_weaknesses": "259;571;488;297", "wc_clarity_quality_novelty_and_reproducibility": "48;17;14;63", "wc_summary_review": "43;56;56;52", "wc_review": "404;693;647;481", "wc_reply_reviewers": "234;0;556;0", "wc_reply_authors": "986;605;1821;303", "reply_reviewers": "1;0;2;0", "reply_authors": "2;1;3;1", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.5, 1.118033988749895 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 65.25, 15.562374497485916 ], "wc_strength_and_weaknesses_avg": [ 403.75, 129.82560417729624 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.5, 20.71834935510066 ], "wc_summary_review_avg": [ 51.75, 5.3091901453988255 ], "wc_review_avg": [ 556.25, 118.08762636279891 ], "wc_reply_reviewers_avg": [ 197.5, 227.96216791388872 ], "wc_reply_authors_avg": [ 928.75, 569.1583149704483 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.2581988897471611, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13343631184045752397&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "University of Minnesota", "aff_unique_dep": "", "aff_unique_url": "https://www.minnesota.edu", "aff_unique_abbr": "UMN", "aff_campus_unique_index": "0", "aff_campus_unique": "Twin Cities", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "I7triE0okW3", "title": "Critical Learning Periods Augmented Model Poisoning Attacks to Byzantine-Robust Federated Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Existing attacks in federated learning (FL) control a set of malicious clients and share a fixed number of malicious gradients with the central server in each training round, to achieve a desired tradeoff between attack impact and resilience against defenses. In this paper, we show that such a tradeoff is not fundamental and an adaptive attack budget not only improves the impact of attack $\\mathcal{A}$ but makes it more resilient to defenses. Inspired by recent findings on critical learning periods (CLP), where small gradient errors have irrecoverable impact on model accuracy, we advocate CLP augmented model poisoning attacks $\\mathcal{A}$-CLP, which merely augment attack $\\mathcal{A}$ with an adaptive attack budget scheme. $\\mathcal{A}$-CLP inspects the changes in federated gradient norms to identify CLP and adaptively adjusts the number of malicious clients that share their malicious gradients with the central server in each round, leading to dramatically improved attack impact compared to $\\mathcal{A}$ itself by up to 6.85$\\times$, with a smaller attack budget and hence improved resilience of $\\mathcal{A}$ by up to 2$\\times$. Based on understandings on $\\mathcal{A}$-CLP, we further relax the inner attack subroutine $\\mathcal{A}$ in $\\mathcal{A}$-CLP, and propose SimAttack-CLP, a lightweight CLP augmented similarity-based attack, which is more flexible and impactful. ", "keywords": "Critical Learning Periods;Byzantine-Robust Federated Learning;Model Poisoning Attacks", "primary_area": "", "supplementary_material": "/attachment/6453fa25e00e3508e6e389d6bb63bd9eeb44ea2d.zip", "author": "Gang Yan;Hao Wang;Xu Yuan;Jian Li", "authorids": "~Gang_Yan1;~Hao_Wang29;~Xu_Yuan1;~Jian_Li14", "gender": "M;M;M;M", "homepage": "https://www.gyan23.com/;https://www.haow.us;https://yuanxuyx.github.io/;https://sites.google.com/stonybrook.edu/jianli", "dblp": "203/8629;w/HaoWang-22;24/6114-1;33/5448-8", "google_scholar": "wyHzGcgAAAAJ;r-Ik__gAAAAJ;R3XkwA8AAAAJ;h039Yq4AAAAJ", "orcid": "0000-0002-7734-1589;0000-0002-1444-2657;;", "linkedin": "gang-yan-4b7622212/;haowanguoft/;;", "or_profile": "~Gang_Yan1;~Hao_Wang29;~Xu_Yuan1;~Jian_Li14", "aff": "State University of New York at Binghamton;Louisiana State University;University of Louisiana at Lafeyette;State University of New York, Binghamton", "aff_domain": "binghamton.edu;lsu.edu;louisiana.edu;binghamton.edu", "position": "PhD student;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nyan2023critical,\ntitle={Critical Learning Periods Augmented Model Poisoning Attacks to Byzantine-Robust Federated Learning},\nauthor={Gang Yan and Hao Wang and Xu Yuan and Jian Li},\nyear={2023},\nurl={https://openreview.net/forum?id=I7triE0okW3}\n}", "github": "", "project": "", "reviewers": "sHTK;Rx2e;137d;xBrr", "site": "https://openreview.net/forum?id=I7triE0okW3", "pdf_size": 3688836, "recommendation": "3;3;5;5", "confidence": "5;3;3;4", "correctness": "1;2;3;2", "technical_novelty": "2;2;2;2", "empirical_novelty": "1;2;3;2", "wc_summary_paper": "80;20;77;123", "wc_strength_and_weaknesses": "420;107;165;159", "wc_clarity_quality_novelty_and_reproducibility": "35;17;33;9", "wc_summary_review": "34;20;47;53", "wc_review": "569;164;322;344", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "844;343;367;387", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 75.0, 36.5991803186902 ], "wc_strength_and_weaknesses_avg": [ 212.75, 121.76283299923668 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 23.5, 10.897247358851684 ], "wc_summary_review_avg": [ 38.5, 12.698425099200294 ], "wc_review_avg": [ 349.75, 144.37516233757108 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 485.25, 207.7093823109587 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.30151134457776363, "corr_recommendation_correctness": 0.7071067811865475, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5817719193815732092&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "State University of New York at Binghamton;Louisiana State University;University of Louisiana at Lafayette", "aff_unique_dep": ";;", "aff_unique_url": "https://www.binghamton.edu;https://www.lsu.edu;https://www.louisiana.edu", "aff_unique_abbr": "SUNY Binghamton;LSU;UL Lafayette", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Binghamton;;Lafayette", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "I89hkzP0U4y", "title": "Analyzing the Effects of Classifier Lipschitzness on Explainers", "track": "main", "status": "Reject", "tldr": "Theoretical work in support of the intuition that robust classifiers lend themselves to robust explainers", "abstract": "Machine learning methods are getting increasingly better at making predictions, but at the same time they are also becoming more complicated and less transparent. As a result, explainers are often relied on to provide interpretability to these \\textit{black-box} prediction models. As crucial diagnostics tools, it is important that these explainers themselves are reliable. In this paper we focus on one particular aspect of reliability, namely that an explainer should give similar explanations for similar data inputs. We formalize this notion by introducing and defining \\textit{explainer astuteness}, analogous to astuteness of classifiers. Our formalism is inspired by the concept of \\textit{probabilistic Lipschitzness}, which captures the probability of local smoothness of a function. For a variety of explainers (e.g., SHAP, RISE, CXPlain), we provide lower bound guarantees on the astuteness of these explainers given the Lipschitzness of the prediction function. These theoretical results imply that locally smooth prediction functions lend themselves to locally robust explanations. We evaluate these results empirically on simulated as well as real datasets.", "keywords": "Explainers;Explanation;Robustness;Astuteness;Lipschitz;Blackbox;Classifiers", "primary_area": "", "supplementary_material": "/attachment/6877bffbaf9eee969ca5990d67ced53dc0ee9e05.zip", "author": "Zulqarnain Khan;Aria Masoomi;Davin Hill;Jennifer Dy", "authorids": "~Zulqarnain_Khan1;~Aria_Masoomi1;~Davin_Hill1;~Jennifer_Dy1", "gender": ";M;;", "homepage": ";;;https://mllabneu.github.io/", "dblp": ";242/9324;;24/6000", "google_scholar": ";KXcX8coAAAAJ;;6h7b0fAAAAAJ", "orcid": ";;;", "linkedin": ";aria-masoomi-779a02232;;", "or_profile": "~Zulqarnain_Khan1;~Aria_Masoomi1;~Davin_Hill1;~Jennifer_Dy1", "aff": ";Northeastern University;;Northeastern University", "aff_domain": ";northeastern.edu;;northeastern.edu", "position": ";PhD student;;Full Professor", "bibtex": "@misc{\nkhan2023analyzing,\ntitle={Analyzing the Effects of Classifier Lipschitzness on Explainers},\nauthor={Zulqarnain Khan and Aria Masoomi and Davin Hill and Jennifer Dy},\nyear={2023},\nurl={https://openreview.net/forum?id=I89hkzP0U4y}\n}", "github": "", "project": "", "reviewers": "bjVt;Hif5;tAqb", "site": "https://openreview.net/forum?id=I89hkzP0U4y", "pdf_size": 1469434, "recommendation": "3;5;6", "confidence": "4;4;3", "correctness": "3;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "57;124;144", "wc_strength_and_weaknesses": "235;272;192", "wc_clarity_quality_novelty_and_reproducibility": "61;42;53", "wc_summary_review": "101;43;68", "wc_review": "454;481;457", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "587;496;108", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 108.33333333333333, 37.205137040766594 ], "wc_strength_and_weaknesses_avg": [ 233.0, 32.69046751985457 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 52.0, 7.788880963698615 ], "wc_summary_review_avg": [ 70.66666666666667, 23.753362335093158 ], "wc_review_avg": [ 464.0, 12.083045973594572 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 397.0, 207.70331404834798 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.7559289460184545, "corr_recommendation_correctness": 0.7559289460184545, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16162534865333599608&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "I8ly64E5Nt", "title": "Branch-Train-Merge: Embarrassingly Parallel Training of Expert Language Models", "track": "main", "status": "Reject", "tldr": "We develop a new class of large language models that is embarrassingly parallel: different parts of the model are independently trained on different subsets of the data, with no need for multi-node training or inference.", "abstract": "We present Branch-Train-Merge (BTM), a communication-efficient algorithm for embarrassingly parallel training of large language models (LLMs). We show it is possible to independently train subparts of a new class of LLMs on different subsets of the data, eliminating the massive multi-node synchronization currently required to train LLMs. BTM learns a set of independent Expert LMs (ELMs), each specialized to a different textual domain, such as scientific or legal text. These ELMs can be added and removed to update data coverage, ensembled to generalize to new domains, or averaged to collapse back to a single LM for efficient inference. New ELMs are learned by branching from (mixtures of) ELMs in the current set, further training on new domains, and then merging the resulting models back into the set for future use. Experiments show that BTM improves in- and out-of-domain perplexities as compared to GPT-style Transformer LMs, when controlling for training cost. Through extensive analysis, we show that these results are robust to different ELM initialization schemes, but require expert domain specialization; ensembles with random data splits do not perform well. Our results suggest that aggressive parallelism could be used to efficiently scale larger LMs in future work.", "keywords": "sparsity;language model;efficient", "primary_area": "", "supplementary_material": "", "author": "Margaret Li;Suchin Gururangan;Tim Dettmers;Mike Lewis;Tim Althoff;Noah A. Smith;Luke Zettlemoyer", "authorids": "~Margaret_Li1;~Suchin_Gururangan1;~Tim_Dettmers2;~Mike_Lewis1;~Tim_Althoff2;~Noah_A._Smith2;~Luke_Zettlemoyer1", "gender": "F;M;M;M;M;M;M", "homepage": "https://margs.li;https://suchin.io;https://timdettmers.com/;;https://althoff.cs.uw.edu/;https://www.cs.washington.edu/people/faculty/lsz/;https://homes.cs.washington.edu/~nasmith/", "dblp": "230/3760;217/1570;172/1045;19/6214;119/1352;21/6793;90/5204.html", "google_scholar": "cUSS3fYAAAAJ;CJIKhNIAAAAJ;lHI3w5kAAAAJ;SnQnQicAAAAJ;yc4nBNgAAAAJ;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;0000-0003-4793-2289;;0000-0002-2310-6380", "linkedin": ";;;;timalthoff/;luke-zettlemoyer-a0109b226/;", "or_profile": "~Margaret_Li1;~Suchin_Gururangan1;~Tim_Dettmers2;~Mike_Lewis1;~Tim_Althoff2;~Luke_Zettlemoyer1;~Noah_Smith1", "aff": "Meta;University of Washington, Seattle;University of Washington;Facebook AI Research;Department of Computer Science, University of Washington;Meta;Allen Institute for Artificial Intelligence", "aff_domain": "meta.com;uw.edu;cs.washington.edu;fb.com;cs.washington.edu;meta.com;allenai.org", "position": "Researcher;PhD student;PhD student;Research Scientist;Assistant Professor;Researcher;Senior Director of NLP Research", "bibtex": "@misc{\nli2023branchtrainmerge,\ntitle={Branch-Train-Merge: Embarrassingly Parallel Training of Expert Language Models},\nauthor={Margaret Li and Suchin Gururangan and Tim Dettmers and Mike Lewis and Tim Althoff and Noah A. Smith and Luke Zettlemoyer},\nyear={2023},\nurl={https://openreview.net/forum?id=I8ly64E5Nt}\n}", "github": "", "project": "", "reviewers": "B2er;Rw95;kH7N;uT5X", "site": "https://openreview.net/forum?id=I8ly64E5Nt", "pdf_size": 887048, "recommendation": "5;5;5;6", "confidence": "4;2;3;4", "correctness": "4;3;3;3", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "76;64;58;92", "wc_strength_and_weaknesses": "174;87;207;111", "wc_clarity_quality_novelty_and_reproducibility": "71;34;34;83", "wc_summary_review": "62;45;17;67", "wc_review": "383;230;316;353", "wc_reply_reviewers": "88;0;0;0", "wc_reply_authors": "520;295;552;353", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 72.5, 12.99038105676658 ], "wc_strength_and_weaknesses_avg": [ 144.75, 47.97069417884215 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.5, 21.914607000811127 ], "wc_summary_review_avg": [ 47.75, 19.536824204563032 ], "wc_review_avg": [ 320.5, 57.38684518249805 ], "wc_reply_reviewers_avg": [ 22.0, 38.1051177665153 ], "wc_reply_authors_avg": [ 430.0, 108.55643693489576 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.5222329678670935, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 173, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11112995280041029220&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;0;1;0;2", "aff_unique_norm": "Meta;University of Washington;Allen Institute for Artificial Intelligence", "aff_unique_dep": "Meta Platforms, Inc.;;", "aff_unique_url": "https://meta.com;https://www.washington.edu;https://allenai.org", "aff_unique_abbr": "Meta;UW;AI2", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "I9J8gIyqRE", "title": "Lmser-pix2seq: Learning Stable Sketch Representations For Sketch Healing", "track": "main", "status": "Reject", "tldr": "", "abstract": "Sketch healing aims to recreate a complete sketch from the corrupted one. The sparse and abstract nature of the sketch makes it challenging due to the difficulty in learning. The features extracted from the corrupted sketch may be inconsistent with the ones from the corresponding full sketch. In this paper, we present Lmser-pix2seq to learn stable sketch representations against the missing information by employing a Least mean square error reconstruction (Lmser) block, which falls into encoder-decoder paradigm. Taking as input a corrupted sketch, the Lmser encoder computes the embeddings of structural patterns of the input, while the decoder reconstructs the complete sketch from the embeddings. We build bi-directional skip connections between the encoder and the decoder in our Lmser block. The feedback connections enable recurrent paths to receive more information about the reconstructed sketch produced by the decoder, which helps the encoder extract stable sketch features. The features captured by the Lmser block are eventually fed into a recurrent neural network decoder to recreate the sketches. Experimental results show that our Lmser-pix2seq outperforms the state-of-the-art methods in sketch healing, especially when the sketches are heavily masked or corrupted.", "keywords": "sketch healing;Lmser;stable representations;bi-directional connections", "primary_area": "", "supplementary_material": "/attachment/3488af446f7f5b20dbc1e2baae687cca9bc89781.zip", "author": "Tengjie Li;Sicong Zang;Shikui Tu;Lei Xu", "authorids": "~Tengjie_Li2;~Sicong_Zang1;~Shikui_Tu1;~Lei_Xu7", "gender": ";M;M;M", "homepage": "https://sczang.github.io;http://www.cs.sjtu.edu.cn/~tushikui;http://www.cse.cuhk.edu.hk/~lxu/;https://github.com/TjieLee", "dblp": ";04/115;19/360-1;", "google_scholar": "https://scholar.google,com/citations?user=0R8uikQAAAAJ;Yewd61kAAAAJ;rN2ny9kAAAAJ;", "orcid": "0000-0002-0548-5356;0000-0001-6270-0449;0000-0002-2752-1573;", "linkedin": ";;;", "or_profile": "~Sicong_Zang1;~Shikui_Tu1;~Lei_Xu7;~tengjie_li1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Associate Professor;Full Professor;PhD student", "bibtex": "@misc{\nli2023lmserpixseq,\ntitle={Lmser-pix2seq: Learning Stable Sketch Representations For Sketch Healing},\nauthor={Tengjie Li and Sicong Zang and Shikui Tu and Lei Xu},\nyear={2023},\nurl={https://openreview.net/forum?id=I9J8gIyqRE}\n}", "github": "", "project": "", "reviewers": "oNd8;hWp5;rRmX;po7R", "site": "https://openreview.net/forum?id=I9J8gIyqRE", "pdf_size": 3153120, "recommendation": "3;5;5;8", "confidence": "3;2;3;5", "correctness": "3;3;4;3", "technical_novelty": "1;2;2;4", "empirical_novelty": "1;3;1;2", "wc_summary_paper": "97;25;106;111", "wc_strength_and_weaknesses": "68;150;129;150", "wc_clarity_quality_novelty_and_reproducibility": "16;57;19;388", "wc_summary_review": "14;14;154;84", "wc_review": "195;246;408;733", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "387;511;336;1025", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;2", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 1.0897247358851685 ], "empirical_novelty_avg": [ 1.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 84.75, 34.859539583878615 ], "wc_strength_and_weaknesses_avg": [ 124.25, 33.58850249713434 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 120.0, 155.57152695785948 ], "wc_summary_review_avg": [ 66.5, 58.0409338312195 ], "wc_review_avg": [ 395.5, 210.12674746447678 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 564.75, 273.24016450734325 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.7388664511337208, "corr_recommendation_correctness": -0.08084520834544431, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17094490031718378005&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "IA96Pn7A08h", "title": "Learning to Split for Automatic Bias Detection", "track": "main", "status": "Reject", "tldr": "We propose ls, an algorithm that learns to split the given dataset so that predictors trained on the training split cannot generalize to the testing split.", "abstract": "Classifiers are biased when trained on biased datasets. As a remedy, we propose Learning to Split (ls), an algorithm for automatic bias detection. Given a dataset with input-label pairs, ls learns to split this dataset so that predictors trained on the training split cannot generalize to the testing split. This performance gap suggests that the testing split is under-represented in the dataset, which is a signal of potential bias. Identifying non-generalizable splits is challenging since we have no annotations about the bias. In this work, we show that the prediction correctness of each example in the testing split can be used as a source of weak supervision: generalization performance will drop if we move examples that are predicted correctly away from the testing split, leaving only those that are mispredicted. ls is task-agnostic and can be applied to any supervised learning problem, ranging from natural language understanding and image classification to molecular property prediction. Empirical results show that ls is able to generate astonishingly challenging splits that correlate with human-identified biases. Moreover, we demonstrate that combining robust learning algorithms (such as group DRO) with splits identified by ls enables automatic de-biasing. Compared to previous state-of-the-art, we substantially improve the worst-group performance (23.4% on average) when the source of biases is unknown during training and validation. Our code is included in the supplemental materials and will be publicly available.", "keywords": "bias;robustness;spurious correlation", "primary_area": "", "supplementary_material": "/attachment/b3bf4acba809e8e72fbfa1d9768ab5c4e85696ff.zip", "author": "Yujia Bao;Regina Barzilay", "authorids": "~Yujia_Bao1;~Regina_Barzilay1", "gender": "M;female", "homepage": "https://people.csail.mit.edu/yujia/;https://www.regina.csail.mit.edu/", "dblp": "214/4122;b/ReginaBarzilay", "google_scholar": "https://scholar.google.com/citations?authorid=Ee4Peu4AAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Yujia_Bao1;~Regina_Barzilay1", "aff": "Insitro;Massachusetts Institute of Technology", "aff_domain": "insitro.com;mit.edu", "position": "Researcher;Professor", "bibtex": "@misc{\nbao2023learning,\ntitle={Learning to Split for Automatic Bias Detection},\nauthor={Yujia Bao and Regina Barzilay},\nyear={2023},\nurl={https://openreview.net/forum?id=IA96Pn7A08h}\n}", "github": "", "project": "", "reviewers": "Jd9v;GE6C;pwSX;Fe8j", "site": "https://openreview.net/forum?id=IA96Pn7A08h", "pdf_size": 1602842, "recommendation": "5;5;6;8", "confidence": "4;4;4;4", "correctness": "1;3;4;3", "technical_novelty": "2;3;4;4", "empirical_novelty": "2;3;4;3", "wc_summary_paper": "134;89;138;123", "wc_strength_and_weaknesses": "633;97;234;148", "wc_clarity_quality_novelty_and_reproducibility": "191;413;32;15", "wc_summary_review": "133;22;39;13", "wc_review": "1091;621;443;299", "wc_reply_reviewers": "818;117;33;20", "wc_reply_authors": "2756;845;433;295", "reply_reviewers": "3;2;1;1", "reply_authors": "6;2;1;2", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.75, 1.0897247358851685 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 121.0, 19.27433526739638 ], "wc_strength_and_weaknesses_avg": [ 278.0, 210.72612557535433 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 162.75, 159.95995592647554 ], "wc_summary_review_avg": [ 51.75, 47.82977629050757 ], "wc_review_avg": [ 613.5, 298.34669430043965 ], "wc_reply_reviewers_avg": [ 247.0, 331.7627164103887 ], "wc_reply_authors_avg": [ 1082.25, 987.2961498456276 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 1.920286436967152 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.37463432463267754, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9452139112172120611&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Insitro;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.insitro.com;https://web.mit.edu", "aff_unique_abbr": ";MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "IAIrNRktVWR", "title": "Multimedia Generative Script Learning for Task Planning", "track": "main", "status": "Withdraw", "tldr": "We introduce a new multimedia generative script learning task with a new benchmark; novel visually trackable, inductive, diverse script learning methods; and a new multimodal-retrieval-based metric.", "abstract": "Goal-oriented generative script learning aims to generate subsequent steps to reach a particular goal, which is an essential task to assist robots or humans in performing stereotypical activities in daily life. However, an important aspect of this process is the ability to capture historical states visually, which provides detailed information that is not covered by text and will guide subsequent steps. Therefore, we propose a new task, Multimedia Generative Script Learning, to generate subsequent steps by tracking historical states in both text and vision modalities, as well as presenting the first benchmark containing 5,652 tasks and 79,089 steps with descriptive images. This task is challenging in three aspects: the multimedia challenge of capturing the visual states in images, the induction challenge of performing unseen tasks, and the diversity challenge of covering different information in individual steps. We propose to encode visual state changes through a selective multimedia encoder to address the multimedia challenge, transfer knowledge from previously observed tasks using a retrieval-augmented decoder to overcome the induction challenge, and further present distinct information at each step by optimizing a diversity-oriented contrastive learning objective. We define metrics to evaluate both generation quality and inductive quality. Experiment results demonstrate that our approach significantly outperforms strong baselines.", "keywords": "multimedia generative script learning;contrastive learning;retrieval-augmented generation;selective multimedia encoding;procedure planning", "primary_area": "", "supplementary_material": "/attachment/8cc7ff84082d66df08446aa20a56f130f5ef581b.zip", "author": "Qingyun Wang;Manling Li;Hou Pong Chan;Lifu Huang;Julia Hockenmaier;Girish Chowdhary;Heng Ji", "authorids": "~Qingyun_Wang1;~Manling_Li1;~Hou_Pong_Chan2;~Lifu_Huang1;~Julia_Hockenmaier1;~Girish_Chowdhary1;~Heng_Ji3", "gender": "M;F;M;M;F;M;F", "homepage": "https://eaglew.github.io/;https://limanling.github.io/;https://kenchan0226.github.io;https://wilburone.github.io/;https://cs.illinois.edu/directory/profile/juliahmr;http://www.daslab.illinois.edu;http://blender.cs.illinois.edu/hengji.html", "dblp": "53/3310-5;178/3620;178/3691.html;127/0072;64/2448;09/5775;", "google_scholar": "HQcZOHMAAAAJ;6U4SXnUAAAAJ;HCljxf0AAAAJ;76IEGtYAAAAJ;https://scholar.google.com.tw/citations?user=iIiVrrQAAAAJ;pf2zAXkAAAAJ;z7GCqT4AAAAJ", "orcid": "0000-0002-2659-6100;;0000-0001-9207-4178;;;;", "linkedin": "qingyunwang/;;;;;girishchowdhary/;", "or_profile": "~Qingyun_Wang1;~Manling_Li1;~Hou_Pong_Chan2;~Lifu_Huang1;~Julia_Hockenmaier1;~Girish_Chowdhary1;~Heng_Ji3", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Macau;Virginia Tech;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana-Champaign", "aff_domain": "illinois.edu;illinois.edu;umac.mo;vt.edu;illinois.edu;illinois.edu;uiuc.edu", "position": "PhD student;PhD student;Lecturer;Assistant Professor;Full Professor;Associate Professor;Full Professor", "bibtex": "@misc{\nwang2023multimedia,\ntitle={Multimedia Generative Script Learning for Task Planning},\nauthor={Qingyun Wang and Manling Li and Hou Pong Chan and Lifu Huang and Julia Hockenmaier and Girish Chowdhary and Heng Ji},\nyear={2023},\nurl={https://openreview.net/forum?id=IAIrNRktVWR}\n}", "github": "", "project": "", "reviewers": "oNPU;8Xq1;F6rc", "site": "https://openreview.net/forum?id=IAIrNRktVWR", "pdf_size": 4223541, "recommendation": "3;5;8", "confidence": "5;2;5", "correctness": "2;3;4", "technical_novelty": "2;2;4", "empirical_novelty": "2;3;4", "wc_summary_paper": "74;78;72", "wc_strength_and_weaknesses": "105;383;92", "wc_clarity_quality_novelty_and_reproducibility": "785;79;46", "wc_summary_review": "55;39;83", "wc_review": "1019;579;293", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1431;820;104", "reply_reviewers": "0;0;0", "reply_authors": "2;2;1", "recommendation_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 4.0, 1.4142135623730951 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 74.66666666666667, 2.494438257849294 ], "wc_strength_and_weaknesses_avg": [ 193.33333333333334, 134.21955479321517 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 303.3333333333333, 340.8561117082821 ], "wc_summary_review_avg": [ 59.0, 18.184242262647807 ], "wc_review_avg": [ 630.3333333333334, 298.60267171536753 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 785.0, 542.3104891726387 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.1147078669352809, "corr_recommendation_correctness": 0.9933992677987828, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7137499096242098484&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff_unique_index": "0;0;1;2;0;0;3", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Macau;Virginia Tech;University of Illinois", "aff_unique_dep": ";;;", "aff_unique_url": "https://illinois.edu;https://www.um.edu.mo;https://www.vt.edu;https://illinois.edu", "aff_unique_abbr": "UIUC;UM;VT;UIUC", "aff_campus_unique_index": "0;0;1;0;0;0", "aff_campus_unique": "Urbana-Champaign;Macau SAR;", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "id": "IAy-lKeb3z", "title": "Sharpness-aware Quantization for Deep Neural Networks", "track": "main", "status": "Withdraw", "tldr": "We propose a novel method, dubbed Sharpness-Aware Quantization (SAQ), to smooth the loss landscape and improve the generalization performance of the quantized models.", "abstract": "Network quantization has gained increasing attention since it can significantly reduce the model size and computational overhead. However, due to the discrete nature of quantization, a small change in full-precision weights might incur large change in quantized weights, which leads to severe loss fluctuations and thus results in sharp loss landscape. The fluctuating loss makes the gradients unstable during training, resulting in considerable performance degradation. Recently, Sharpness-Aware Minimization (SAM) has been proposed to smooth the loss landscape and improve the generalization performance of the models. Nevertheless, how to customize SAM to the quantized models is non-trivial due to the effect of the clipping and discretization in quantization. In this paper, we propose a novel method, dubbed Sharpness-Aware Quantization (SAQ), to smooth the loss landscape and improve the generalization performance of the quantized models, which explores the effect of SAM in model compression, particularly quantization for the first time. Specifically, we first propose a unified view for quantization and SAM, where we consider them as introducing quantization noises and adversarial perturbations to the model weights. According to whether the quantization noises and adversarial perturbations depend on each other, SAQ can be divided into three cases. We then analyze and compare different cases comprehensively. Extensive experiments on both convolutional neural networks and Transformers show that SAQ improves the generalization performance of the quantized models, yielding the SOTA results in uniform quantization. For example, on ImageNet, our SAQ outperforms the model trained with the conventional optimization procedure (i.e., SGD) by 1.1% on the Top-1 accuracy on 4-bit ResNet-50. Our 4-bit ResNet-34 surpasses the previous SOTA quantization method by 1.0% on the Top-1 accuracy.", "keywords": "Sharpness-aware Minimization;Quantization;CNNs;Transformers", "primary_area": "", "supplementary_material": "/attachment/40065f39b75b41bc972f96810b9f951b38c31b4b.zip", "author": "Jing Liu;Jianfei Cai;Bohan Zhuang", "authorids": "~Jing_Liu8;~Jianfei_Cai1;~Bohan_Zhuang1", "gender": "M;M;M", "homepage": "https://www.jing-liu.com/;https://jianfei-cai.github.io/;https://bohanzhuang.github.io/", "dblp": "72/2590-48;83/6096;145/1096", "google_scholar": "-lHaZH4AAAAJ;https://scholar.google.com.tw/citations?user=N6czCoUAAAAJ;https://scholar.google.com.au/citations?user=DFuDBBwAAAAJ", "orcid": "0000-0002-6745-3050;;", "linkedin": "jing-liu-619688133/;;bohan-zhuang/", "or_profile": "~Jing_Liu8;~Jianfei_Cai1;~Bohan_Zhuang1", "aff": "Monash University;Monash University;Monash University", "aff_domain": "monash.edu.au;monash.edu;monash.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@misc{\nliu2023sharpnessaware,\ntitle={Sharpness-aware Quantization for Deep Neural Networks},\nauthor={Jing Liu and Jianfei Cai and Bohan Zhuang},\nyear={2023},\nurl={https://openreview.net/forum?id=IAy-lKeb3z}\n}", "github": "", "project": "", "reviewers": "mZxn;qbwr;Nx5X;CNY9", "site": "https://openreview.net/forum?id=IAy-lKeb3z", "pdf_size": 538829, "recommendation": "3;3;3;5", "confidence": "4;5;4;4", "correctness": "3;3;3;2", "technical_novelty": "2;3;1;2", "empirical_novelty": "2;2;1;3", "wc_summary_paper": "63;50;34;61", "wc_strength_and_weaknesses": "268;257;107;566", "wc_clarity_quality_novelty_and_reproducibility": "40;71;31;107", "wc_summary_review": "63;83;49;82", "wc_review": "434;461;221;816", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 52.0, 11.510864433221338 ], "wc_strength_and_weaknesses_avg": [ 299.5, 166.49099074724734 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.25, 29.794084983432533 ], "wc_summary_review_avg": [ 69.25, 14.148763196831021 ], "wc_review_avg": [ 483.0, 213.55210137107056 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": -1.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1516183312905242173&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Monash University", "aff_unique_dep": "", "aff_unique_url": "https://www.monash.edu", "aff_unique_abbr": "Monash", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "id": "IB5Njg_ztYB", "title": "GraphVF: Controllable Protein-Specific 3D Molecule Generation with Variational Flow", "track": "main", "status": "Reject", "tldr": "", "abstract": "Designing molecules that bind to specific target proteins is a fundamental task in drug discovery. Recent generative models leveraging geometrical constraints imposed by proteins and molecules have shown great potential in generating protein-specific 3D molecules. Nevertheless, these existing methods fail to generate 3D molecules with 2D skeletal curtailments, which encode pharmacophoric patterns essential to drug potency. To cope with this challenge, we propose GraphVF, which seamlessly integrates geometrical and skeletal restraints into a variational flow framework, where the former is captured through a flow transformation and the latter is encoded by an amortized factorized Gaussian. We empirically verify that our method achieves state-of-the-art performance on protein-specific 3D molecule generation in terms of binding affinity and some other drug properties. In particular, it represents the first controllable geometry-aware, protein-specific molecule generation method, which enables creating 3D molecules with specified chemical sub-structures or drug properties.\n", "keywords": "Controllable Molecular Generation;Pocket-based Drug Design;Variational Flow", "primary_area": "", "supplementary_material": "/attachment/9843e9a191126b9df3d8d50803c95b2a52cd19e4.zip", "author": "Fang Sun;Zhihao Zhan;Hongyu Guo;Ming Zhang;Jian Tang", "authorids": "~Fang_Sun2;~Zhihao_Zhan1;~Hongyu_Guo1;~Ming_Zhang5;~Jian_Tang1", "gender": ";M;M;F;", "homepage": ";;https://hongyuharryguo.github.io/;https://cs.pku.edu.cn/info/1080/1371.htm;http://www.jian-tang.com", "dblp": ";;;73/1844-4;181/2667-5", "google_scholar": ";;https://scholar.google.ca/citations?user=bZUqlakAAAAJ;LbzoQBsAAAAJ;https://scholar.google.ca/citations?user=1ir6WUEAAAAJ", "orcid": "0000-0001-5693-0379;;;0000-0002-9809-3430;", "linkedin": ";%E8%87%B4%E8%B1%AA-%E8%A9%B9-648427245/;harry-h-y-guo-a582087/;;", "or_profile": "~Fang_Sun2;~Zhihao_Zhan1;~Hongyu_Guo1;~Ming_Zhang5;~Jian_Tang1", "aff": "Peking University;Peking University;National Research Council Canada;Peking University;Mila, HEC Montreal", "aff_domain": "pku.edu.cn;pku.edu.cn;nrc-cnrc.gc.ca;pku.edu.cn;hec.ca", "position": "Undergrad student;Undergrad student;Senior Research Officer;Full Professor;Assistant Professor", "bibtex": "@misc{\nsun2023graphvf,\ntitle={Graph{VF}: Controllable Protein-Specific 3D Molecule Generation with Variational Flow},\nauthor={Fang Sun and Zhihao Zhan and Hongyu Guo and Ming Zhang and Jian Tang},\nyear={2023},\nurl={https://openreview.net/forum?id=IB5Njg_ztYB}\n}", "github": "", "project": "", "reviewers": "Ruu5;uWFf;6f2G;Mkbw", "site": "https://openreview.net/forum?id=IB5Njg_ztYB", "pdf_size": 2402447, "recommendation": "1;3;3;3", "confidence": "5;3;5;3", "correctness": "1;2;2;2", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;1;3;2", "wc_summary_paper": "25;39;55;30", "wc_strength_and_weaknesses": "86;68;490;209", "wc_clarity_quality_novelty_and_reproducibility": "4;132;49;43", "wc_summary_review": "38;21;48;51", "wc_review": "153;260;642;333", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "622;519;800;778", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;2;2", "recommendation_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 1.0 ], "correctness_avg": [ 1.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 37.25, 11.409973707244026 ], "wc_strength_and_weaknesses_avg": [ 213.25, 168.74444435299196 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.0, 46.62081080376016 ], "wc_summary_review_avg": [ 39.5, 11.715374513859981 ], "wc_review_avg": [ 347.0, 181.95191672527113 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 679.75, 115.42178087345559 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=84599578183916793&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Peking University;National Research Council Canada;HEC Montreal", "aff_unique_dep": ";;HEC Business School", "aff_unique_url": "http://www.pku.edu.cn;https://www.nrc-cnrc.gc.ca;https://www.hec.ca", "aff_unique_abbr": "Peking U;NRC-CNRC;HEC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "China;Canada" }, { "id": "IC8LwiOLKFr", "title": "Some Practical Concerns and Solutions for Using Pretrained Representation in Industrial Systems", "track": "main", "status": "Reject", "tldr": "We investigate some practical concerns and solutions for using pretrained representation in industrial systems.", "abstract": "Deep learning has dramatically changed the way data scientists and engineers craft features -- the once tedious process of measuring and constructing can now be achieved by training learnable representations. Recent work shows pretraining can endow representations with relevant signals, and in practice they are often used as feature vectors in downstream models. In real-world production, however, we have encountered key problems that cannot be justified by existing knowledge. They raise concerns that the naive use of pretrained representation as feature vector could lead to unwarranted and suboptimal solution.\nOur investigation reveals critical insights into the gap of uniform convergence for analyzing pretrained representations, their stochastic nature under gradient descent optimization, what does model convergence means to them, and how they might interact with downstream tasks. Inspired by our analysis, we explore a simple yet powerful approach that can refine pretrained representation in multiple ways, which we call \"Featurizing Pretrained Representations\". Our work balances practicality and rigor, and contributes to both applied and theoretical research of representation learning. ", "keywords": "Representation Learning;Stability;Generalization;Convergence;Predictability;Industry Application", "primary_area": "", "supplementary_material": "/attachment/df62e6775f25e527f6af9f7539b59af7ace461e0.zip", "author": "Da Xu", "authorids": "~Da_Xu2", "gender": "M", "homepage": "", "dblp": "", "google_scholar": "-jl6A84AAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Da_Xu2", "aff": "University of California, Berkeley", "aff_domain": "berkeley.edu", "position": "PhD student", "bibtex": "@misc{\nxu2023some,\ntitle={Some Practical Concerns and Solutions for Using Pretrained Representation in Industrial Systems},\nauthor={Da Xu},\nyear={2023},\nurl={https://openreview.net/forum?id=IC8LwiOLKFr}\n}", "github": "", "project": "", "reviewers": "VPk5;BWrq;vPVs", "site": "https://openreview.net/forum?id=IC8LwiOLKFr", "pdf_size": 1778346, "recommendation": "3;6;6", "confidence": "4;3;2", "correctness": "2;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "92;90;53", "wc_strength_and_weaknesses": "596;359;67", "wc_clarity_quality_novelty_and_reproducibility": "75;4;9", "wc_summary_review": "310;40;89", "wc_review": "1073;493;218", "wc_reply_reviewers": "325;22;43", "wc_reply_authors": "1780;737;762", "reply_reviewers": "2;1;1", "reply_authors": "4;1;1", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 78.33333333333333, 17.93197020841702 ], "wc_strength_and_weaknesses_avg": [ 340.6666666666667, 216.35207931106697 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.333333333333332, 32.355662392986005 ], "wc_summary_review_avg": [ 146.33333333333334, 117.44596866455466 ], "wc_review_avg": [ 594.6666666666666, 356.3783881712744 ], "wc_reply_reviewers_avg": [ 130.0, 138.15209010362457 ], "wc_reply_authors_avg": [ 1093.0, 485.8895622121005 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VxbO8A1d_n4J:scholar.google.com/&scioq=Some+Practical+Concerns+and+Solutions+for+Using+Pretrained+Representation+in+Industrial+Systems&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Predicting Cellular Responses with Variational Causal Inference and Refined Relational Information", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12017", "id": "ICYasJBlZNs", "poster": "/media/PosterPDFs/ICLR%202023/12017.png?t=1681343396.1488395", "openreview": "https://openreview.net/forum?id=ICYasJBlZNs", "slides": "https://iclr.cc/virtual/2023/poster/12017", "video": "https://iclr.cc/virtual/2023/poster/12017", "author_site": "Yulun Wu, Rob Barton, Zichen Wang, Vassilis N. Ioannidis, Carlo De Donno, Layne Price, Luis Voloch, George Karypis", "tldr": "We predict single-cell perturbation responses using a graph variational Bayesian causal inference framework with distilled gene regulatory networks.", "abstract": "Predicting the responses of a cell under perturbations may bring important benefits to drug discovery and personalized therapeutics. In this work, we propose a novel graph variational Bayesian causal inference framework to predict a cell's gene expressions under counterfactual perturbations (perturbations that this cell did not factually receive), leveraging information representing biological knowledge in the form of gene regulatory networks (GRNs) to aid individualized cellular response predictions. Aiming at a data-adaptive GRN, we also developed an adjacency matrix updating technique for graph convolutional networks and used it to refine GRNs during pre-training, which generated more insights on gene relations and enhanced model performance. Additionally, we propose a robust estimator within our framework for the asymptotically efficient estimation of marginal perturbation effect, which is yet to be carried out in previous works. With extensive experiments, we exhibited the advantage of our approach over state-of-the-art deep learning models for individual response prediction.", "keywords": "graph neural network;causal inference;variational bayes;asymptotic statistics;single-cell perturbation", "primary_area": "", "supplementary_material": "/attachment/b74704fcaedf67bb17e69a4165c7460204098c87.zip", "author": "Yulun Wu;Rob Barton;Zichen Wang;Vassilis N. Ioannidis;Carlo De Donno;Layne C Price;Luis F. Voloch;George Karypis", "authorids": "~Yulun_Wu1;~Rob_Barton1;~Zichen_Wang3;~Vassilis_N._Ioannidis1;~Carlo_De_Donno1;~Layne_C_Price1;luis@immunai.com;~George_Karypis1", "gender": ";M;;;M;;;M", "homepage": "https://github.com/yulun-rayn;;https://wangz10.github.io/;https://scholar.google.com/citations?hl=en&user=mjmiI4sAAAAJ&view_op=list_works&authuser=1;;;;", "dblp": ";292/7971;118/3574;;;;;", "google_scholar": "5QJJxS4AAAAJ;uIGfO6oAAAAJ;bwLMCp4AAAAJ;;https://scholar.google.de/citations?user=Zc5hDnUAAAAJ;JpHkhWAAAAAJ;;ElqwScwAAAAJ", "orcid": ";;0000-0002-1415-1286;0000-0002-8367-0733;;;;", "linkedin": "yu-lun-wu/;;;;;;;", "or_profile": "~Yulun_Wu1;~Rob_Barton1;~Zichen_Wang3;~Vassilis_N._Ioannidis1;~Carlo_De_Donno1;~Layne_C_Price1;luis@immunai.com;~George_Karypis1", "aff": "University of California, Berkeley;Amazon;Amazon;Amazon Web Services;HMGU;;;University of Minnesota, Minneapolis", "aff_domain": "berkeley.edu;amazon.com;amazon.com;amazon.com;helmholtz-muenchen.de;;;umn.edu", "position": "PhD student;Researcher;Researcher;Applied Scientist II;PhD student;;;Full Professor", "bibtex": "@inproceedings{\nwu2023predicting,\ntitle={Predicting Cellular Responses with Variational Causal Inference and Refined Relational Information},\nauthor={Yulun Wu and Rob Barton and Zichen Wang and Vassilis N. Ioannidis and Carlo De Donno and Layne C Price and Luis F. Voloch and George Karypis},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=ICYasJBlZNs}\n}", "github": "", "project": "", "reviewers": "TW7F;qD32;kcXz;FrCx", "pdf_size": 733304, "recommendation": "5;6;6;8", "confidence": "4;3;3;3", "correctness": "3;3;3;4", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "55;64;83;110", "wc_strength_and_weaknesses": "52;167;131;262", "wc_clarity_quality_novelty_and_reproducibility": "2;23;43;2", "wc_summary_review": "46;37;71;2", "wc_review": "155;291;328;376", "wc_reply_reviewers": "0;0;0;9", "wc_reply_authors": "631;670;727;556", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 78.0, 21.059439688652688 ], "wc_strength_and_weaknesses_avg": [ 153.0, 75.43540282917564 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 17.5, 17.03672503740082 ], "wc_summary_review_avg": [ 39.0, 24.728526037756478 ], "wc_review_avg": [ 287.5, 82.22073947612002 ], "wc_reply_reviewers_avg": [ 2.25, 3.897114317029974 ], "wc_reply_authors_avg": [ 646.0, 62.17314532818812 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.6622661785325219, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4307995622549511536&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=ICYasJBlZNs", "email": "berkeley.edu;amazon.com;amazon.com;amazon.com;helmholtz-muenchen.de;;;umn.edu", "author_num": 8, "aff_unique_index": "0;1;1;1;2;3", "aff_unique_norm": "University of California, Berkeley;Amazon;Heidelberg University;University of Minnesota", "aff_unique_dep": ";Amazon.com, Inc.;;", "aff_unique_url": "https://www.berkeley.edu;https://www.amazon.com;https://www.uni-heidelberg.de/;https://www.minnesota.edu", "aff_unique_abbr": "UC Berkeley;Amazon;HMGU;UMN", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Berkeley;;Minneapolis", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "United States;Germany" }, { "title": "SQA3D: Situated Question Answering in 3D Scenes", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12163", "id": "IDJx97BC38", "poster": "", "openreview": "https://openreview.net/forum?id=IDJx97BC38", "slides": "https://iclr.cc/virtual/2023/poster/12163", "video": "https://iclr.cc/virtual/2023/poster/12163", "author_site": "Xiaojian Ma, Silong Yong, Zilong Zheng, Qing Li, Yitao Liang, Song-Chun Zhu, Siyuan Huang", "tldr": "We introduce a grand challenge for embodied agents to understand situations and reason about 3D scenes accordingly.", "abstract": "We propose a new task to benchmark scene understanding of embodied agents: Situated Question Answering in 3D Scenes (SQA3D). Given a scene context (e.g., 3D scan), SQA3D requires the tested agent to first understand its situation (position, orientation, etc.) in the 3D scene as described by text, then reason about its surrounding environment and answer a question under that situation. Based upon 650 scenes from ScanNet, we provide a dataset centered around 6.8k unique situations, along with 20.4k descriptions and 33.4k diverse reasoning questions for these situations. These questions examine a wide spectrum of reasoning capabilities for an intelligent agent, ranging from spatial relation comprehension to commonsense understanding, navigation, and multi-hop reasoning. SQA3D imposes a significant challenge to current multi-modal especially 3D reasoning models. We evaluate various state-of-the-art approaches and find that the best one only achieves an overall score of 47.20%, while amateur human participants can reach 90.06%. We believe SQA3D could facilitate future embodied AI research with stronger situation understanding and reasoning capability.", "keywords": "3D vision;scene understanding;visual question answering;embodied AI", "primary_area": "", "supplementary_material": "", "author": "Xiaojian Ma;Silong Yong;Zilong Zheng;Qing Li;Yitao Liang;Song-Chun Zhu;Siyuan Huang", "authorids": "~Xiaojian_Ma1;~Silong_Yong1;~Zilong_Zheng1;~Qing_Li1;~Yitao_Liang1;~Song-Chun_Zhu1;~Siyuan_Huang2", "gender": ";M;M;M;M;M;M", "homepage": ";https://github.com/SilongYong;http://zilongzheng.github.io;http://liqing-ustc.github.io/;https://web.cs.ucla.edu/~yliang/;https://zhusongchun.net/;https://siyuanhuang.com/", "dblp": ";;218/5234;181/2689-3;173/4969;10/10313;62/885-1", "google_scholar": ";EitVAcwAAAAJ;9sDx70IAAAAJ;iwdFZBEAAAAJ;KVzR1XEAAAAJ;https://scholar.google.com.tw/citations?user=Al8dyb4AAAAJ;1NN7Ee8AAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Xiaojian_Ma1;~Silong_Yong1;~Zilong_Zheng1;~Qing_Li1;~Yitao_Liang1;~Song-Chun_Zhu1;~Siyuan_Huang2", "aff": ";Tsinghua University;Beijing Institute for General Artificial Intelligence;Beijing Institute for General Artificial Intelligence (BIGAI);Peking University;Peking University;Beijing Institute for General Artificial Intelligence", "aff_domain": ";mails.tsinghua.edu.cn;bigai.ai;bigai.ai;pku.edu.cn;pku.edu.cn;bigai.ai", "position": ";Undergrad student;Researcher;Researcher;Assistant Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nma2023sqad,\ntitle={{SQA}3D: Situated Question Answering in 3D Scenes},\nauthor={Xiaojian Ma and Silong Yong and Zilong Zheng and Qing Li and Yitao Liang and Song-Chun Zhu and Siyuan Huang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=IDJx97BC38}\n}", "github": "", "project": "", "reviewers": "ZAer;cywf;neRZ;4eyF", "pdf_size": 23925630, "recommendation": "6;6;6;8", "confidence": "3;5;4;3", "correctness": "3;3;3;3", "technical_novelty": "3;3;2;2", "empirical_novelty": "3;2;4;2", "wc_summary_paper": "76;121;142;74", "wc_strength_and_weaknesses": "187;175;322;343", "wc_clarity_quality_novelty_and_reproducibility": "100;148;135;6", "wc_summary_review": "55;11;77;56", "wc_review": "418;455;676;479", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 103.25, 29.217931138258233 ], "wc_strength_and_weaknesses_avg": [ 256.75, 76.23114520981565 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 97.25, 55.53095983323177 ], "wc_summary_review_avg": [ 49.75, 24.0351305384431 ], "wc_review_avg": [ 507.0, 99.96249296611204 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.5222329678670935, "corr_recommendation_correctness": 0.0, "gs_citation": 142, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3635422699359377641&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=IDJx97BC38", "email": ";mails.tsinghua.edu.cn;bigai.ai;bigai.ai;pku.edu.cn;pku.edu.cn;bigai.ai", "author_num": 7, "aff_unique_index": "0;1;1;2;2;1", "aff_unique_norm": "Tsinghua University;Beijing Institute for General Artificial Intelligence;Peking University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.bigaiai.org/;http://www.pku.edu.cn", "aff_unique_abbr": "THU;BIGAI;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "IDSXUFQeZO5", "title": "NeuralPCG: Learning Preconditioner for Solving Partial Differential Equations with Graph Neural Network", "track": "main", "status": "Reject", "tldr": "", "abstract": "Fast and accurate partial differential equation (PDE) solvers empower scientific and engineering research. Classic numerical solvers provide unparalleled accuracy but often require extensive computation time. Machine learning solvers are significantly faster but lack convergence and accuracy guarantees. We present Neural-Network-Preconditioned Conjugate Gradient, or NeuralPCG, a novel linear second-order PDE solver that combines the benefits of classic iterative solvers and machine learning approaches. Our key observation is that both neural-network PDE solvers and classic preconditioners excel at obtaining fast but inexact solutions. NeuralPCG proposes to use neural network models to \\emph{precondition} PDE systems in classic iterative solvers. Compared with neural-network PDE solvers, NeuralPCG achieves converging and accurate solutions (e.g.,1e-12 precision) by construction. Compared with classic solvers, NeuralPCG is faster via data-driven preconditioners. We demonstrate the efficacy and generalizability of NeuralPCG by conducting extensive experiments on various 2D and 3D linear second-order PDEs.", "keywords": "Physics Simulation;Graph Neural Network;Applied Mathematics", "primary_area": "", "supplementary_material": "", "author": "Yichen Li;Tao Du;Peter Yichen Chen;Wojciech Matusik", "authorids": "~Yichen_Li2;~Tao_Du1;~Peter_Yichen_Chen1;~Wojciech_Matusik2", "gender": "F;;M;M", "homepage": ";https://people.iiis.tsinghua.edu.cn/~taodu/;https://peterchencyc.com;https://cdfg.mit.edu/wojciech", "dblp": ";51/3026-1;230/7889;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;9TX3RmEAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-5659-8748;0000-0001-7337-7667;;0000-0003-0212-5643", "linkedin": ";;;wojciech-matusik-67238126/", "or_profile": "~Yichen_Li2;~Tao_Du1;~Peter_Yichen_Chen1;~Wojciech_Matusik2", "aff": "Massachusetts Institute of Technology;Shanghai Qi Zhi Institute;MIT;Massachusetts Institute of Technology", "aff_domain": "mit.edu;sqz.ac.cn;csail.mit.edu;mit.edu", "position": "PhD student;Principal investigator;Postdoc;Full Professor", "bibtex": "@misc{\nli2023neuralpcg,\ntitle={Neural{PCG}: Learning Preconditioner for Solving Partial Differential Equations with Graph Neural Network},\nauthor={Yichen Li and Tao Du and Peter Yichen Chen and Wojciech Matusik},\nyear={2023},\nurl={https://openreview.net/forum?id=IDSXUFQeZO5}\n}", "github": "", "project": "", "reviewers": "G2sH;94zp;DNx8", "site": "https://openreview.net/forum?id=IDSXUFQeZO5", "pdf_size": 2527416, "recommendation": "3;5;5", "confidence": "3;3;1", "correctness": "3;3;3", "technical_novelty": "1;3;3", "empirical_novelty": "1;3;3", "wc_summary_paper": "45;83;31", "wc_strength_and_weaknesses": "650;62;53", "wc_clarity_quality_novelty_and_reproducibility": "19;79;15", "wc_summary_review": "49;135;9", "wc_review": "763;359;108", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1174;353;35", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_paper_avg": [ 53.0, 21.96967607104544 ], "wc_strength_and_weaknesses_avg": [ 255.0, 279.3313444638822 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.666666666666664, 29.272664533466862 ], "wc_summary_review_avg": [ 64.33333333333333, 52.56953067657687 ], "wc_review_avg": [ 410.0, 269.8233990347514 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 520.6666666666666, 479.8710474932013 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ww6xMcwDUwgJ:scholar.google.com/&scioq=NeuralPCG:+Learning+Preconditioner+for+Solving+Partial+Differential+Equations+with+Graph+Neural+Network&hl=en&as_sdt=0,11", "gs_version_total": 0, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Shanghai Qi Zhi Institute", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.qz.io", "aff_unique_abbr": "MIT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "id": "IERSU0La-Nt", "title": "FedPD: Defying data heterogeneity through privacy distillation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Model performance of federated learning (FL) typically suffers from data heterogeneity, i.e., data distribution varies with clients. Advanced works have already shown great potential for sharing client information to mitigate data heterogeneity. Yet, some literature shows a dilemma in preserving strong privacy and promoting model performance simultaneously. Revisiting the purpose of sharing information motivates us to raise the fundamental questions: Which part of the data is more critical for model generalization? Which part of the data is more privacy-sensitive? Can we solve this dilemma by sharing useful (for generalization) features and maintaining more sensitive data locally? Our work sheds light on data-dominated sharing and training, in a way that we decouple original training data into sensitive features and generalizable features. To be specific, we propose a \\textbf{Fed}erated \\textbf{P}rivacy \\textbf{D}istillation framework named FedPD to alleviate the privacy-performance dilemma. Namely, FedPD keeps the distilled sensitive features locally and constructs a global dataset using shared generalizable features in a differentially private manner. Accordingly, clients can perform local training on both the local and securely shared data for acquiring high model performance and avoiding the leakage of not distilled privacy. Theoretically, we demonstrate the superiority of the sharing-only useful feature strategy over sharing raw data. Empirically, we show the efficacy of FedPD in promoting performance with comprehensive experiments.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Zhiqin Brian Yang;Yonggang Zhang;Yu Zheng;Zhenheng TANG;Xiaowen Chu;Hao Peng;Bo Han", "authorids": "~Zhiqin_Brian_Yang1;~Yonggang_Zhang1;~Yu_Zheng10;~Zhenheng_TANG1;~Xiaowen_Chu1;~Hao_Peng7;~Bo_Han1", "gender": "M;M;F;M;M;M;M", "homepage": "https://visitworld123.github.io/;https://yonggangzhangben.github.io/index.html;https://yuzhengcuhk.github.io;;https://penghao-bdsc.github.io/;https://bhanml.github.io/;https://facultyprofiles.hkust-gz.edu.cn/faculty-personal-page/CHU-Xiaowen/xwchu", "dblp": "251/6782;27/6859-3;;234/7546;69/7742-1;241/0472-3;24/2536", "google_scholar": "DSjGPu0AAAAJ;XSbEr98AAAAJ;fH3uUgYAAAAJ;FlYcrEcAAAAJ;R25rbyQAAAAJ;nTNjqHwAAAAJ;https://scholar.google.com.hk/citations?user=v4rX24EAAAAJ", "orcid": ";0000-0002-4080-7592;;0000-0001-8769-9974;0000-0003-0458-5977;;0000-0001-9745-4372", "linkedin": ";;;;;;", "or_profile": "~Zhiqin_Brian_Yang1;~Yonggang_Zhang1;~Yu_Zheng10;~Zhenheng_TANG1;~Hao_Peng7;~bo_han2;~Xiaowen_Chu2", "aff": "Beihang University;Hong Kong Baptist University;Chinese University of Hong Kong;Hong Kong Baptist University;Beijing University;RIKEN;Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "buaa.edu.cn;hkbu.edu.hk;cuhk.hk;hkbu.edu.hk;buaa.edu.cn;riken.jp;ust.hk", "position": "MS student;Postdoc;PhD student;PhD student;Associate Professor;Adjunct Scientist;Full Professor", "bibtex": "@misc{\nyang2023fedpd,\ntitle={Fed{PD}: Defying data heterogeneity through privacy distillation},\nauthor={Zhiqin Brian Yang and Yonggang Zhang and Yu Zheng and Zhenheng TANG and Xiaowen Chu and Hao Peng and Bo Han},\nyear={2023},\nurl={https://openreview.net/forum?id=IERSU0La-Nt}\n}", "github": "", "project": "", "reviewers": "52C4;Lg1w;Yumu;Kf6q", "site": "https://openreview.net/forum?id=IERSU0La-Nt", "pdf_size": 2093277, "recommendation": "1;3;3;3", "confidence": "4;4;3;4", "correctness": "1;2;3;2", "technical_novelty": "1;2;3;3", "empirical_novelty": "1;3;3;2", "wc_summary_paper": "111;54;16;60", "wc_strength_and_weaknesses": "431;177;162;138", "wc_clarity_quality_novelty_and_reproducibility": "27;18;207;31", "wc_summary_review": "50;72;12;21", "wc_review": "619;321;397;250", "wc_reply_reviewers": "0;59;0;47", "wc_reply_authors": "375;254;154;246", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 60.25, 33.81105588413352 ], "wc_strength_and_weaknesses_avg": [ 227.0, 118.59806069240761 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.75, 78.8047428775705 ], "wc_summary_review_avg": [ 38.75, 23.7841859225831 ], "wc_review_avg": [ 396.75, 138.4456120648105 ], "wc_reply_reviewers_avg": [ 26.5, 26.837473800639284 ], "wc_reply_authors_avg": [ 257.25, 78.52189185189057 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:atfSORxCkiAJ:scholar.google.com/&scioq=FedPD:+Defying+data+heterogeneity+through+privacy+distillation&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;1;3;4;5", "aff_unique_norm": "Beihang University;Hong Kong Baptist University;Chinese University of Hong Kong;Peking University;RIKEN;Hong Kong University of Science and Technology", "aff_unique_dep": ";;;;;", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.hkbu.edu.hk;https://www.cuhk.edu.hk;http://www.pku.edu.cn;https://www.riken.jp;https://www.ust.hk", "aff_unique_abbr": "BUAA;HKBU;CUHK;PKU;RIKEN;HKUST", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "China;Japan" }, { "id": "IHGnybgLo1Z", "title": "A Critical Analysis of Out-of-Distribution Detection for Document Understanding", "track": "main", "status": "Reject", "tldr": "This work investigates the OOD robustness of pretrained models and presents a benchmark for various document understanding tasks.", "abstract": "Large-scale pretraining is widely used in recent document understanding models. During deployment, one may expect that large-scale pretrained models should trigger a conservative fallback policy when encountering out-of-distribution (OOD) samples, which suggests the importance of OOD detection. However, most existing OOD detection methods focus on single-modal inputs such as images or texts. While documents are multi-modal in nature, it is underexplored if and how multi-modal information in documents can be exploited for OOD detection. In this work, we first provide a systematic and in-depth analysis on OOD detection for document understanding models. We study the effects of model modality, pretraining, and finetuning across various types of OOD inputs. In particular, we find that spatial information is critical for document OOD detection. To better exploit spatial information, we propose a simple yet effective special-aware adapter, which serves as an add-on module to adapt transformer-based language models to document domain. Extensive experiments show that our method consistently improves ID accuracy and OOD detection performance compared to baselines. We hope our findings can help inspire future works on understanding OOD robustness for documents.", "keywords": "Document Understanding;Pretraining;Out-of-Distribution;Document intelligence;Robustness", "primary_area": "", "supplementary_material": "", "author": "Jiuxiang Gu;Yifei Ming;Yi Zhou;Jason Kuen;Vlad I Morariu;Handong Zhao;Ruiyi Zhang;Nikolaos Barmpalios;Anqi Liu;Yixuan Li;Tong Sun;Ani Nenkova", "authorids": "~Jiuxiang_Gu2;~Yifei_Ming1;~Yi_Zhou20;~Jason_Kuen1;~Vlad_I_Morariu1;~Handong_Zhao3;~Ruiyi_Zhang3;~Nikolaos_Barmpalios1;~Anqi_Liu2;~Yixuan_Li1;~Tong_Sun1;~Ani_Nenkova1", "gender": "M;M;;M;M;;;M;F;F;F;", "homepage": "http://gujiuxiang.com;https://alvinmingsf.github.io/;;http://jasonkuen.com/;https://research.adobe.com/person/vlad-morariu/;;;;https://anqiliu-ai.github.io/;http://pages.cs.wisc.edu/~sharonli/;https://research.adobe.com/person/tong-sun/;", "dblp": "173/4935.html;277/4125;;165/1403;27/6671;;;;;144/6087-1;;", "google_scholar": "https://scholar.google.com.sg/citations?user=zPxKV9EAAAAJ;Dh_4cyQAAAAJ;;e6u7GlQAAAAJ;oyWpVa8AAAAJ;;;Yp4dul4AAAAJ;Q8yp6zQAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;;;;;0000-0002-0468-5698;;;", "linkedin": ";;;;;;;;;liyixuan;tong-sun/?trk=hb_tab_pro_top;", "or_profile": "~Jiuxiang_Gu2;~Yifei_Ming1;~Yi_Zhou20;~Jason_Kuen1;~Vlad_I_Morariu1;~Handong_Zhao3;~Ruiyi_Zhang3;~Nikolaos_Barmpalios1;~Anqi_Liu2;~Yixuan_Li1;~Tong_Sun1;~Ani_Nenkova1", "aff": "Adobe Systems;University of Wisconsin - Madison;;Adobe Research;Adobe;;;Adobe Systems;University of Illinois, Chicago;Cornell University;Adobe Systems;", "aff_domain": "adobe.com;wisc.edu;;adobe.com;adobe.com;;;adobe.com;uic.edu;cornell.edu;adobe.com;", "position": "Researcher;PhD student;;Researcher;Senior Research Scientist;;;Senior Machine Learning Scientist;PhD student;Graduate Student;Director, Document Intelligence Lab;", "bibtex": "@misc{\ngu2023a,\ntitle={A Critical Analysis of Out-of-Distribution Detection for Document Understanding},\nauthor={Jiuxiang Gu and Yifei Ming and Yi Zhou and Jason Kuen and Vlad I Morariu and Handong Zhao and Ruiyi Zhang and Nikolaos Barmpalios and Anqi Liu and Yixuan Li and Tong Sun and Ani Nenkova},\nyear={2023},\nurl={https://openreview.net/forum?id=IHGnybgLo1Z}\n}", "github": "", "project": "", "reviewers": "Dbcz;oNdB;4z2u", "site": "https://openreview.net/forum?id=IHGnybgLo1Z", "pdf_size": 15069191, "recommendation": "3;5;6", "confidence": "4;5;4", "correctness": "4;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "72;47;88", "wc_strength_and_weaknesses": "352;145;78", "wc_clarity_quality_novelty_and_reproducibility": "61;43;48", "wc_summary_review": "13;49;60", "wc_review": "498;284;274", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 69.0, 16.87206764645835 ], "wc_strength_and_weaknesses_avg": [ 191.66666666666666, 116.62570709562947 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.666666666666664, 7.586537784494028 ], "wc_summary_review_avg": [ 40.666666666666664, 20.07209228976613 ], "wc_review_avg": [ 352.0, 103.31827847320466 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 12, 0 ], "corr_recommendation_confidence": 0.18898223650461357, "corr_recommendation_correctness": -0.9449111825230683, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15040286298890587962&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;0;2;3;0", "aff_unique_norm": "Adobe;University of Wisconsin-Madison;University of Illinois at Chicago;Cornell University", "aff_unique_dep": "Adobe Systems Incorporated;;;", "aff_unique_url": "https://www.adobe.com;https://www.wisc.edu;https://www.uic.edu;https://www.cornell.edu", "aff_unique_abbr": "Adobe;UW-Madison;UIC;Cornell", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Madison;Chicago", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "IIxe8wlXwb0", "title": "Generalization bounds and algorithms for estimating the effect of multiple treatments and dosage", "track": "main", "status": "Reject", "tldr": "We propose generalization bounds for the counterfactual error in treatment effect estimation in the context of multiple treatments and dosage parameters, and regularization techniques for training prediction models inspired by these bounds.", "abstract": "Estimating conditional treatment effects has been a longstanding challenge for fields of study such as epidemiology or economics that require a treatment-dosage pair to make decisions, but may not be able to run randomized trials to precisely quantify their effect. In the context of representation learning, there is an extensive literature relating model architectures with regularization techniques to solve this problem using observational data. However, theoretically motivated loss functions and bounds on generalization errors only exist in select circumstances, such as in the presence of binary treatments. In this paper, we introduce new bounds on the counterfactual generalization error in the context of multiple treatments and continuous dosage parameters, which subsume existing results. This result, in a principled manner, guides the definition of new learning objectives that can be used to train representation learning algorithms. We show empirically new state-of-the-art performance results across several benchmark datasets for this problem, including in comparison to doubly-robust estimation methods.", "keywords": "Treatment effect estimation", "primary_area": "", "supplementary_material": "", "author": "Alexis Bellot;Anish Dhir;Giulia Prando", "authorids": "~Alexis_Bellot1;~Anish_Dhir1;~Giulia_Prando1", "gender": "M;M;F", "homepage": ";;", "dblp": "217/4339;251/9010;", "google_scholar": ";nuA78i0AAAAJ;vd6ovb0AAAAJ", "orcid": ";;", "linkedin": ";;giulia-prando-23178a59/", "or_profile": "~Alexis_Bellot1;~Anish_Dhir1;~Giulia_Prando1", "aff": "Google DeepMind;Imperial College London, Imperial College London;babylon health", "aff_domain": "deepmind.com;imperial.ac.uk;babylonhealth.com", "position": "Researcher;PhD student;Researcher", "bibtex": "@misc{\nbellot2023generalization,\ntitle={Generalization bounds and algorithms for estimating the effect of multiple treatments and dosage},\nauthor={Alexis Bellot and Anish Dhir and Giulia Prando},\nyear={2023},\nurl={https://openreview.net/forum?id=IIxe8wlXwb0}\n}", "github": "", "project": "", "reviewers": "BdyF;zcLw;RujR;Z4Tv", "site": "https://openreview.net/forum?id=IIxe8wlXwb0", "pdf_size": 443269, "recommendation": "5;5;5;6", "confidence": "4;4;2;3", "correctness": "3;3;4;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "39;53;122;29", "wc_strength_and_weaknesses": "325;117;78;44", "wc_clarity_quality_novelty_and_reproducibility": "40;24;62;424", "wc_summary_review": "33;61;37;18", "wc_review": "437;255;299;515", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 60.75, 36.375644324190326 ], "wc_strength_and_weaknesses_avg": [ 141.0, 109.3274896812325 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 137.5, 165.96008556276416 ], "wc_summary_review_avg": [ 37.25, 15.433324334050653 ], "wc_review_avg": [ 376.5, 104.41623436994843 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.17407765595569782, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8100030964028835104&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;Imperial College London;Babylon Health", "aff_unique_dep": "Google DeepMind;;", "aff_unique_url": "https://deepmind.com;https://www.imperial.ac.uk;https://www.babylonhealth.com", "aff_unique_abbr": "DeepMind;ICL;Babylon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "IIyox3dwad0", "title": "Fast-PINN for Complex Geometry: Solving PDEs with Boundary Connectivity Loss", "track": "main", "status": "Reject", "tldr": "We present a fast-PINN method based on the incorporation of boundary connectivity constraints into training loss, which can efficiently produce accurate solutions with order of magnitude fewer training samples, across multiple fluid dynamic problems.", "abstract": "We present a novel loss formulation for efficient learning of complex dynamics from governing physics, typically described by partial differential equations (PDEs), using physics-informed neural networks (PINNs). In our experiments, existing versions of PINNs are seen to learn poorly in many problems, especially for complex geometries, as it becomes increasingly difficult to establish appropriate sampling strategy at the near boundary region. Overly dense sampling can adversely impede training convergence if the local gradient behaviors are too complex to be adequately modelled by PINNs. On the other hand, if the samples are too sparse, PINNs may over-fit the near boundary region, leading to incorrect solution. To prevent such issues, we propose a new Boundary Connectivity (BCXN) loss function which provides local structure approximation at the boundary. Our BCXN-loss can implicitly or explicitly impose such approximations during training, thus facilitating fast physics-informed learning across entire problem domains with order of magnitude fewer training samples. This method shows a few orders of magnitude smaller errors than existing methods in terms of the standard L2-norm metric, while using dramatically fewer training samples and iterations. Our proposed Fast-PINN method does not pose any requirement on the differentiable property of the networks, and we demonstrate its benefits and ease of implementation on both multi-layer perceptron and convolutional neural network versions as commonly used in current physics-informed neural network literature.", "keywords": "Physics-informed neural networks;physics-informed loss formulation;multi-layer perceptron;convolutional neural network;fluid dynamics", "primary_area": "", "supplementary_material": "", "author": "Jian Cheng Wong;Pao-Hsiung Chiu;Chin Chun Ooi;My Ha Dao;Yew-Soon Ong", "authorids": "~Jian_Cheng_Wong1;chiuph@ihpc.a-star.edu.sg;~Chin_Chun_Ooi1;daomh@ihpc.a-star.edu.sg;~Yew-Soon_Ong1", "gender": "M;;M;;", "homepage": ";;;;", "dblp": ";;;;", "google_scholar": ";;https://scholar.google.com.sg/citations?user=7IDYKK8AAAAJ;;", "orcid": "0000-0002-3215-1888;;;;", "linkedin": ";;;;", "or_profile": "~Jian_Cheng_Wong1;chiuph@ihpc.a-star.edu.sg;~Chin_Chun_Ooi1;daomh@ihpc.a-star.edu.sg;~Yew-Soon_Ong1", "aff": "Institute of High Performance Computing, Singapore, A*STAR;;Institute of High Performance Computing, Singapore, A*STAR;;", "aff_domain": "ihpc.a-star.edu.sg;;ihpc.a-star.edu.sg;;", "position": "Researcher;;Researcher;;", "bibtex": "@misc{\nwong2023fastpinn,\ntitle={Fast-{PINN} for Complex Geometry: Solving {PDE}s with Boundary Connectivity Loss},\nauthor={Jian Cheng Wong and Pao-Hsiung Chiu and Chin Chun Ooi and My Ha Dao and Yew-Soon Ong},\nyear={2023},\nurl={https://openreview.net/forum?id=IIyox3dwad0}\n}", "github": "", "project": "", "reviewers": "Dt9j;DAxW;KS1E;3A2P", "site": "https://openreview.net/forum?id=IIyox3dwad0", "pdf_size": 17662080, "recommendation": "5;5;6;6", "confidence": "5;4;4;3", "correctness": "2;4;4;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "0;2;3;3", "wc_summary_paper": "73;70;126;83", "wc_strength_and_weaknesses": "314;559;146;132", "wc_clarity_quality_novelty_and_reproducibility": "4;73;21;28", "wc_summary_review": "21;26;42;46", "wc_review": "412;728;335;289", "wc_reply_reviewers": "11;0;20;44", "wc_reply_authors": "1620;2076;248;578", "reply_reviewers": "1;0;1;1", "reply_authors": "3;3;1;2", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 88.0, 22.4610774452162 ], "wc_strength_and_weaknesses_avg": [ 287.75, 172.2039125571774 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.5, 25.5 ], "wc_summary_review_avg": [ 33.75, 10.497023387608508 ], "wc_review_avg": [ 441.0, 171.42782737933769 ], "wc_reply_reviewers_avg": [ 18.75, 16.20763708873073 ], "wc_reply_authors_avg": [ 1130.5, 744.5876375551773 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.7071067811865475, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6483136880376945215&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Institute of High Performance Computing", "aff_unique_dep": "", "aff_unique_url": "https://www.ihpc.a-star.edu.sg", "aff_unique_abbr": "IHPC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "id": "IJV0augCyk", "title": "Logit Clipping for Robust Learning against Label Noise", "track": "main", "status": "Withdraw", "tldr": "We propose to clamp the norm of the logit output, which can enhance the noise-robostness of existing loss functions with theoretical guarantee.", "abstract": "In the presence of noisy labels, designing robust loss functions is critical for securing the generalization performance of deep neural networks. Cross Entropy (CE) loss has been shown to be not robust to noisy labels due to its unboundedness. To alleviate this issue, existing works typically design specialized robust losses with the symmetric condition, which usually lead to the underfitting issue. In this paper, our key idea is to induce a loss bound at the logit level, thus universally enhancing the noise robustness of existing losses. Specifically, we propose logit clipping (LogitClip), which clamps the norm of the logit vector to ensure that it is upper bounded by a constant. In this manner, CE loss equipped with our LogitClip method is effectively bounded, mitigating the overfitting to examples with noisy labels. Moreover, we present theoretical analyses to certify the noise-tolerant ability of LogitClip. Extensive experiments show that LogitClip not only significantly improves the noise robustness of CE loss, but also broadly enhances the generalization performance of popular robust losses.", "keywords": "noisy labels;robust loss functions;logit clipping;overfitting", "primary_area": "", "supplementary_material": "/attachment/9699684ea297e5863f51606bc85fcf4296aa897c.zip", "author": "Hongxin Wei;HUIPING ZHUANG;RENCHUNZI XIE;Lei Feng;Gang Niu;Bo An;Yixuan Li", "authorids": "~Hongxin_Wei1;~HUIPING_ZHUANG1;~RENCHUNZI_XIE1;~Lei_Feng1;~Gang_Niu1;~Bo_An2;~Yixuan_Li1", "gender": "M;M;;M;M;M;F", "homepage": "https://hongxin001.github.io/;https://zhuanghp.github.io/;;https://lfeng1995.github.io/;https://niug1984.github.io;https://personal.ntu.edu.sg/boan/;http://pages.cs.wisc.edu/~sharonli/", "dblp": "150/6350;194/5829;;76/847-6;26/3367-1;42/6178-1.html;144/6087-1", "google_scholar": "cABH034AAAAJ;https://scholar.google.com.sg/citations?user=vCXxuLkAAAAJ;;https://scholar.google.com.sg/citations?user=KomQOFkAAAAJ;https://scholar.google.co.jp/citations?user=HOkcy00AAAAJ;PEEpuNwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-4612-5445;;0000-0003-2839-5799;;0000-0002-7064-7438;", "linkedin": ";;;;;;liyixuan", "or_profile": "~Hongxin_Wei1;~HUIPING_ZHUANG1;~RENCHUNZI_XIE1;~Lei_Feng1;~Gang_Niu1;~Bo_An2;~Yixuan_Li1", "aff": "Southern University of Science and Technology;South China University of Technology;;Nanyang Technological University;RIKEN;Nanyang Technological University;Cornell University", "aff_domain": "sustech.edu.cn;scut.edu.cn;;ntu.edu.sg;riken.jp;ntu.edu.sg;cornell.edu", "position": "Assistant Professor;Associate Professor;;Visiting Professor;Research Scientist (tenured);Full Professor;Graduate Student", "bibtex": "@misc{\nwei2023logit,\ntitle={Logit Clipping for Robust Learning against Label Noise},\nauthor={Hongxin Wei and HUIPING ZHUANG and RENCHUNZI XIE and Lei Feng and Gang Niu and Bo An and Yixuan Li},\nyear={2023},\nurl={https://openreview.net/forum?id=IJV0augCyk}\n}", "github": "", "project": "", "reviewers": "iAA1;mAac;9jkk;8C9K", "site": "https://openreview.net/forum?id=IJV0augCyk", "pdf_size": 434648, "recommendation": "3;3;6;8", "confidence": "4;5;3;4", "correctness": "3;3;4;3", "technical_novelty": "1;2;3;3", "empirical_novelty": "3;2;3;4", "wc_summary_paper": "38;95;93;131", "wc_strength_and_weaknesses": "228;211;275;262", "wc_clarity_quality_novelty_and_reproducibility": "30;67;28;90", "wc_summary_review": "97;31;17;147", "wc_review": "393;404;413;630", "wc_reply_reviewers": "424;0;0;0", "wc_reply_authors": "1234;1317;563;935", "reply_reviewers": "3;0;0;0", "reply_authors": "5;2;1;2", "recommendation_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 89.25, 33.22931687531358 ], "wc_strength_and_weaknesses_avg": [ 244.0, 25.64176280991617 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 53.75, 26.06122598804592 ], "wc_summary_review_avg": [ 73.0, 52.32590180780452 ], "wc_review_avg": [ 460.0, 98.40477630684397 ], "wc_reply_reviewers_avg": [ 106.0, 183.597385602301 ], "wc_reply_authors_avg": [ 1012.25, 295.73584074305234 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.5000000000000001, "corr_recommendation_correctness": 0.2721655269759087, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11983533752833351399&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;2;4", "aff_unique_norm": "Southern University of Science and Technology;South China University of Technology;Nanyang Technological University;RIKEN;Cornell University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.sustech.edu.cn;https://www.scut.edu.cn;https://www.ntu.edu.sg;https://www.riken.jp;https://www.cornell.edu", "aff_unique_abbr": "SUSTech;SCUT;NTU;RIKEN;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;1;3", "aff_country_unique": "China;Singapore;Japan;United States" }, { "id": "IJn-rxhkZsN", "title": "VISION TRANSFORMER FOR MULTIVARIATE TIME- SERIES CLASSIFICATION (VITMTSC)", "track": "main", "status": "Reject", "tldr": "A Vision Transformer based Multivariate Time-Series Classification model that significantly outperforms current SOTA on commercial datasets.", "abstract": "Multivariate Time-Series Classification (MTSC) is an important issue in many disciplines because of the proliferation of disparate data sources and sensors (economics, retail, health, etc.). Nonetheless, it remains difficult due to the high-dimensionality and richness of data that is regularly updated. We present a Vision Transformer for Multivariate Time-Series Classification (VitMTSC) model that learns latent features from raw time-series data for classification tasks and is applicable to large-scale time-series data with millions of data samples of variable lengths. According to our knowledge, this is the first implementation of the Vision Transformer (ViT) for MTSC. We demonstrate that our approach works on datasets ranging from a few thousand to millions of samples and achieves close to the state-of-the-art (SOTA) results on open datasets. Using click-stream data from a major retail website, we demonstrate that our model can scale to millions of samples and vastly outperform previous neural net-based MTSC models in real-world applications. Our source code is publicly accessible at https://github.com/mtsc-research/vitmtsc to facilitate further research.\n", "keywords": "time-series classification;vision-transformer;transformer", "primary_area": "", "supplementary_material": "/attachment/cf0376affb8d9828f3727923cd28b24f42fb8e1b.zip", "author": "Prem Shankar Kumar;Ashutosh Joshi;Srinivas Adavi", "authorids": "~Prem_Shankar_Kumar1;~Ashutosh_Joshi1;~Srinivas_Adavi1", "gender": "M;M;", "homepage": ";;", "dblp": ";;", "google_scholar": ";9lLGM4QAAAAJ;", "orcid": ";0009-0009-5945-2312;", "linkedin": "premshankarkumar;aujoshi/;srinivasadavi/", "or_profile": "~Prem_Shankar_Kumar1;~Ashutosh_Joshi1;~Srinivas_Adavi1", "aff": "Amazon;Amazon;", "aff_domain": "amazon.com;amazon.com;", "position": "Researcher;Researcher;", "bibtex": "@misc{\nkumar2023vision,\ntitle={{VISION} {TRANSFORMER} {FOR} {MULTIVARIATE} {TIME}- {SERIES} {CLASSIFICATION} ({VITMTSC})},\nauthor={Prem Shankar Kumar and Ashutosh Joshi and Srinivas Adavi},\nyear={2023},\nurl={https://openreview.net/forum?id=IJn-rxhkZsN}\n}", "github": "", "project": "", "reviewers": "fdnF;YY3h;ivET", "site": "https://openreview.net/forum?id=IJn-rxhkZsN", "pdf_size": 1706345, "recommendation": "3;3;5", "confidence": "3;4;4", "correctness": "2;2;3", "technical_novelty": "1;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "85;14;57", "wc_strength_and_weaknesses": "445;36;128", "wc_clarity_quality_novelty_and_reproducibility": "180;20;66", "wc_summary_review": "90;69;40", "wc_review": "800;139;291", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "455;90;226", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 52.0, 29.20045661743437 ], "wc_strength_and_weaknesses_avg": [ 203.0, 175.1932266575014 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 88.66666666666667, 67.25738290742181 ], "wc_summary_review_avg": [ 66.33333333333333, 20.49932248202906 ], "wc_review_avg": [ 410.0, 282.6670597481544 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 257.0, 150.61429768340943 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10668348662460845983&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon.com, Inc.", "aff_unique_url": "https://www.amazon.com", "aff_unique_abbr": "Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "IJwhRE510b", "title": "ELODI: Ensemble Logit Difference Inhibition for Positive-Congruent Training", "track": "main", "status": "Reject", "tldr": "", "abstract": "Negative flips are errors introduced in a classification system when a legacy model is updated. Existing methods to reduce the negative flip rate (NFR) either do so at the expense of overall accuracy by forcing a new model to imitate the old models, or use ensembles, which multiply inference cost prohibitively. We analyze the role of ensembles in reducing NFR and observe that they remove negative flips that are typically not close to the decision boundary, but often exhibit large deviations in the distance among their logits. Based on the observation, we present a method, called Ensemble Logit Difference Inhibition ELODI, to train a classification system that achieves paragon performance in both error rate and NFR, at the inference cost of a single model. The method distills a homogeneous ensemble to a single student model which is used to update the classification system. ELODI also introduces a generalized distillation objective, Logit Difference Inhibition (LDI), which penalizes changes in the logits between the reference ensemble and the student single model.\nOn multiple image classification benchmarks, model updates with ELODI demonstrate superior accuracy retention and NFR reduction. ", "keywords": "positive-congruent training;negative flip;ensemble learning", "primary_area": "", "supplementary_material": "", "author": "Yue Zhao;Yantao Shen;Yuanjun Xiong;Shuo Yang;Wei Xia;Zhuowen Tu;Bernt Schiele;Stefano Soatto", "authorids": "~Yue_Zhao4;~Yantao_Shen2;~Yuanjun_Xiong3;~Shuo_Yang2;~Wei_Xia6;~Zhuowen_Tu1;~Bernt_Schiele1;~Stefano_Soatto1", "gender": "M;M;M;M;M;;M;", "homepage": "https://zhaoyue-zephyrus.github.io/;https://scholar.google.com.hk/citations?user=bEctTN0AAAAJ&hl=zh-CN;http://yjxiong.me/;http://shuoyang1213.me/;;;http://www.mpi-inf.mpg.de/~schiele;", "dblp": "48/76-6;86/3372;142/2644;;;;s/BerntSchiele;", "google_scholar": "https://scholar.google.com.hk/citations?user=6_U35tAAAAAJ;https://scholar.google.com.hk/citations?user=bEctTN0AAAAJ;ojKsx6AAAAAJ;U3KDpBUAAAAJ;OCdJxC8AAAAJ;;https://scholar.google.de/citations?user=z76PBfYAAAAJ;", "orcid": "0000-0003-2753-5921;;;;0009-0004-1073-1533;;0000-0001-9683-5237;", "linkedin": ";;;;wei-xia/;;;", "or_profile": "~Yue_Zhao4;~Yantao_Shen2;~Yuanjun_Xiong3;~Shuo_Yang2;~Wei_Xia6;~Zhuowen_Tu1;~Bernt_Schiele1;~Stefano_Soatto1", "aff": "University of Texas, Austin;Amazon;Moore Threads Inc.;;;;Amazon;", "aff_domain": "utexas.edu;amazon.com;moorethreads.com;;;;amazon.com;", "position": "PhD student;Researcher;Principal Researcher;;;;Principal Researcher;", "bibtex": "@misc{\nzhao2023elodi,\ntitle={{ELODI}: Ensemble Logit Difference Inhibition for Positive-Congruent Training},\nauthor={Yue Zhao and Yantao Shen and Yuanjun Xiong and Shuo Yang and Wei Xia and Zhuowen Tu and Bernt Schiele and Stefano Soatto},\nyear={2023},\nurl={https://openreview.net/forum?id=IJwhRE510b}\n}", "github": "", "project": "", "reviewers": "38Tg;asve;uaPa;V2YQ", "site": "https://openreview.net/forum?id=IJwhRE510b", "pdf_size": 1817942, "recommendation": "5;5;6;6", "confidence": "2;4;4;3", "correctness": "3;3;3;4", "technical_novelty": "2;4;3;3", "empirical_novelty": "2;4;4;3", "wc_summary_paper": "55;89;100;164", "wc_strength_and_weaknesses": "185;536;849;120", "wc_clarity_quality_novelty_and_reproducibility": "76;17;254;128", "wc_summary_review": "130;69;134;52", "wc_review": "446;711;1337;464", "wc_reply_reviewers": "22;0;132;19", "wc_reply_authors": "340;788;542;357", "reply_reviewers": "1;0;2;1", "reply_authors": "2;3;3;2", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 102.0, 39.45250308915773 ], "wc_strength_and_weaknesses_avg": [ 422.5, 292.70163989974503 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 118.75, 87.40530590301712 ], "wc_summary_review_avg": [ 96.25, 36.27929850479472 ], "wc_review_avg": [ 739.5, 360.50693474606004 ], "wc_reply_reviewers_avg": [ 43.25, 51.929639898616664 ], "wc_reply_authors_avg": [ 506.75, 180.6756416897419 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.30151134457776363, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10693361824052907641&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University of Texas at Austin;Amazon;Moore Threads Inc.", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.utexas.edu;https://www.amazon.com;https://www.moorethreads.com", "aff_unique_abbr": "UT Austin;Amazon;MTI", "aff_campus_unique_index": "0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "id": "IJwkJCFQ9EJ", "title": "Fast 6D Object Pose Refinement via Implicit Surface Representation Driven Optimization", "track": "main", "status": "Withdraw", "tldr": "In this paper, we propose a simple yet efficient self-supervised point cloud aligenment method via implicit neural network, which can serve as an alternative of ICP to achieve fast and accurate pose refinement.", "abstract": "Pose refinement after the initial pose estimator has been demonstrated to be effective for 6D object pose estimation. The iterative closest point (ICP) is the most popular refinement strategy, which however suffers from slow convergence due to the nature of iterative nonlinear optimization. In this paper, we propose a simple yet efficient self-supervised point cloud aligenment method via implicit neural network, which can serve as an alternative of ICP to achieve fast and accurate pose refinement. Our key idea is to encode the surface of target point cloud into a signed distance function (SDF); the optimal rigid transformation then can be derived by addressing a minimization problem over the SDF. The workflow of our method does not require any pose annotations. Experimental results show our method can achieve 6.4\\%, 16.2\\%, and 3.9\\% performance improvement over the prior art OVE6D (w/o ICP) on LINEMOD, Occluded LINEMOD and T-LESS datasets respectively, and is comparable with other SOTA methods even the supervised ones. Compared with point-to-plane ICP, our method has the obvious advantage on computation speed, due to the merit of full play to \nthe high parallel characteristics of deep learning based on GPU acceleration. ", "keywords": "Signed Distance Field;6D pose refinement;Implicit neural network;ICP", "primary_area": "", "supplementary_material": "", "author": "Bo Pang;Ge Gao;Zhihao Cui;Xiaolin Hu;Xiangyang Ji;Xianming Liu", "authorids": "~Bo_Pang5;gao.ge@mech-mind.net;cui.zhihao@mech-mind.net;~Xiaolin_Hu1;~Xiangyang_Ji1;~Xianming_Liu5", "gender": "M;;;M;;M", "homepage": "https://dl.acm.org/profile/99659585376;;;http://www.xlhu.cn/;;http://homepage.hit.edu.cn/xmliu", "dblp": ";;;60/6028-1;;89/58201.html", "google_scholar": ";;;PksdgoUAAAAJ;;", "orcid": ";;;0000-0002-4907-7354;;0000-0002-8857-1785", "linkedin": ";;;;;", "or_profile": "~Bo_Pang5;gao.ge@mech-mind.net;cui.zhihao@mech-mind.net;~Xiaolin_Hu1;~Xiangyang_Ji1;~Xianming_Liu5", "aff": "Harbin Institute of Technology;;;Tsinghua University;;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;;;tsinghua.edu.cn;;hit.edu.cn", "position": "PhD student;;;Associate Professor;;Full Professor", "bibtex": "@misc{\npang2023fast,\ntitle={Fast 6D Object Pose Refinement via Implicit Surface Representation Driven Optimization},\nauthor={Bo Pang and Ge Gao and Zhihao Cui and Xiaolin Hu and Xiangyang Ji and Xianming Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=IJwkJCFQ9EJ}\n}", "github": "", "project": "", "reviewers": "ud2J;M1Qp;qciX;nRM6", "site": "https://openreview.net/forum?id=IJwkJCFQ9EJ", "pdf_size": 1811587, "recommendation": "3;3;3;6", "confidence": "2;4;3;3", "correctness": "2;2;3;2", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "75;64;41;62", "wc_strength_and_weaknesses": "422;134;162;144", "wc_clarity_quality_novelty_and_reproducibility": "91;40;50;46", "wc_summary_review": "63;102;53;17", "wc_review": "651;340;306;269", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 60.5, 12.298373876248844 ], "wc_strength_and_weaknesses_avg": [ 215.5, 119.6442643840481 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.75, 20.09197601033806 ], "wc_summary_review_avg": [ 58.75, 30.26858932953434 ], "wc_review_avg": [ 391.5, 151.9119810943166 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:fvQXbksHzE4J:scholar.google.com/&scioq=Fast+6D+Object+Pose+Refinement+via+Implicit+Surface+Representation+Driven+Optimization&hl=en&as_sdt=0,11", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Harbin Institute of Technology;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "http://www.hit.edu.cn/;https://www.tsinghua.edu.cn", "aff_unique_abbr": "HIT;THU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "LDMIC: Learning-based Distributed Multi-view Image Coding", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12061", "id": "ILQVw4cA5F9", "poster": "/media/PosterPDFs/ICLR%202023/12061.png?t=1681139051.6411693", "openreview": "https://openreview.net/forum?id=ILQVw4cA5F9", "slides": "https://iclr.cc/virtual/2023/poster/12061", "video": "https://iclr.cc/virtual/2023/poster/12061", "author_site": "Xinjie Zhang, Jiawei Shao, Jun Zhang", "tldr": "We design a multi-view image compression framework based on symmetric distributed source coding paradigm, which achieves higher compression performance than previous multi-view image compression methods.", "abstract": "Multi-view image compression plays a critical role in 3D-related applications. Existing methods adopt a predictive coding architecture, which requires joint encoding to compress the corresponding disparity as well as residual information. This demands collaboration among cameras and enforces the epipolar geometric constraint between different views, which makes it challenging to deploy these methods in distributed camera systems with randomly overlapping fields of view. Meanwhile, distributed source coding theory indicates that efficient data compression of correlated sources can be achieved by independent encoding and joint decoding, which motivates us to design a learning-based distributed multi-view image coding (LDMIC) framework. With independent encoders, LDMIC introduces a simple yet effective joint context transfer module based on the cross-attention mechanism at the decoder to effectively capture the global inter-view correlations, which is insensitive to the geometric relationships between images. Experimental results show that LDMIC significantly outperforms both traditional and learning-based MIC methods while enjoying fast encoding speed. Code is released at https://github.com/Xinjie-Q/LDMIC.", "keywords": "Deep multi-view image compression;distributed source coding;cross-attention mechanism", "primary_area": "", "supplementary_material": "", "author": "Xinjie Zhang;Jiawei Shao;Jun Zhang", "authorids": "~Xinjie_Zhang2;~Jiawei_Shao1;~Jun_Zhang25", "gender": "M;;", "homepage": "https://xinjie-q.github.io/;https://shaojiawei07.github.io/;https://eejzhang.people.ust.hk/", "dblp": ";251/9479;z/JunZhang4", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;p26zthIAAAAJ;1Is687QAAAAJ", "orcid": "0000-0002-3194-7518;0000-0001-8836-1430;0000-0002-5222-1898", "linkedin": ";;", "or_profile": "~Xinjie_Zhang2;~Jiawei_Shao1;~Jun_Zhang25", "aff": "SenseTime;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "sensetime.com;ust.hk;ust.hk", "position": "Intern;PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhang2023ldmic,\ntitle={{LDMIC}: Learning-based Distributed Multi-view Image Coding},\nauthor={Xinjie Zhang and Jiawei Shao and Jun Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=ILQVw4cA5F9}\n}", "github": "", "project": "", "reviewers": "WX5p;8AGJ;XGBn;11ke", "pdf_size": 2191430, "recommendation": "6;6;6;8", "confidence": "3;3;2;3", "correctness": "3;3;3;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "62;132;44;59", "wc_strength_and_weaknesses": "147;248;136;46", "wc_clarity_quality_novelty_and_reproducibility": "9;195;60;9", "wc_summary_review": "61;49;55;41", "wc_review": "279;624;295;155", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "834;742;947;172", "reply_reviewers": "0;0;0;0", "reply_authors": "3;2;3;2", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 74.25, 34.03215391361528 ], "wc_strength_and_weaknesses_avg": [ 144.25, 71.57644514782778 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.25, 76.08342460746624 ], "wc_summary_review_avg": [ 51.5, 7.399324293474371 ], "wc_review_avg": [ 338.25, 173.64817159993365 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 673.75, 298.6455884489172 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 1.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16725402706416422695&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=ILQVw4cA5F9", "email": "sensetime.com;ust.hk;ust.hk", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "SenseTime;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.sensetime.com;https://www.ust.hk", "aff_unique_abbr": "SenseTime;HKUST", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "IM4Iwo58T4M", "title": "TOWARDS AN OBJECTIVE EVALUATION OF THE TRUSTWORTHINESS OF CLASSIFIERS", "track": "main", "status": "Reject", "tldr": "", "abstract": "With the widespread deployment of AI models in applications that impact human lives, research on model trustworthiness has become increasingly important, as a result of which model effectiveness alone (measured, e.g., with accuracy, F1, etc.) should not be the only criteria to evaluate predictive models; additionally the trustworthiness of these models should also be factored in. It has been argued that the features deemed important by a black-box model should be aligned with the human perception of the data, which in turn, should contribute to increasing the trustworthiness of a model. Existing research in XAI evaluates such alignments with user studies - the limitations being that these studies are subjective, difficult to reproduce, and consumes a large amount of time to conduct. We propose an evaluation framework, which provides a quantitative measure for trustworthiness of a black-box model, and hence, we are able to provide a fair comparison between a number of different black-box models. Our framework is applicable to both text and images, and our experiment results show that a model with a higher accuracy does not necessarily exhibit better trustworthiness.", "keywords": "Model trustworthiness;Explainable AI", "primary_area": "", "supplementary_material": "/attachment/bce8b6cce9d5735b906c5c5ed23b697b49865fee.zip", "author": "Manish Chandra;Debasis Ganguly", "authorids": "~Manish_Chandra1;~Debasis_Ganguly2", "gender": "M;M", "homepage": ";https://gdebasis.github.io/", "dblp": ";41/7272", "google_scholar": ";FhQENQgAAAAJ", "orcid": ";0000-0003-0050-7138", "linkedin": "m4manishchandra/;deb4it/", "or_profile": "~Manish_Chandra1;~Debasis_Ganguly2", "aff": "University of Glasgow;University of Glasgow", "aff_domain": "glasgow.ac.uk;glasgow.ac.uk", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nchandra2023towards,\ntitle={{TOWARDS} {AN} {OBJECTIVE} {EVALUATION} {OF} {THE} {TRUSTWORTHINESS} {OF} {CLASSIFIERS}},\nauthor={Manish Chandra and Debasis Ganguly},\nyear={2023},\nurl={https://openreview.net/forum?id=IM4Iwo58T4M}\n}", "github": "", "project": "", "reviewers": "KrGW;G8bv;8Lv7;isth", "site": "https://openreview.net/forum?id=IM4Iwo58T4M", "pdf_size": 1061146, "recommendation": "1;3;5;8", "confidence": "4;4;4;2", "correctness": "1;2;2;4", "technical_novelty": "2;1;2;3", "empirical_novelty": "1;1;2;3", "wc_summary_paper": "36;58;66;35", "wc_strength_and_weaknesses": "80;245;153;49", "wc_clarity_quality_novelty_and_reproducibility": "453;29;60;6", "wc_summary_review": "19;24;24;21", "wc_review": "588;356;303;111", "wc_reply_reviewers": "0;0;68;0", "wc_reply_authors": "321;272;207;25", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.25, 2.5860201081971503 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.25, 1.0897247358851685 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 48.75, 13.5531361684298 ], "wc_strength_and_weaknesses_avg": [ 131.75, 75.50289729540185 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 137.0, 183.44617739271646 ], "wc_summary_review_avg": [ 22.0, 2.1213203435596424 ], "wc_review_avg": [ 339.5, 169.97720435399566 ], "wc_reply_reviewers_avg": [ 17.0, 29.444863728670914 ], "wc_reply_authors_avg": [ 206.25, 112.18595054640309 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.8372183582789214, "corr_recommendation_correctness": 0.9536736127036032, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gtipbcHUvYgJ:scholar.google.com/&scioq=TOWARDS+AN+OBJECTIVE+EVALUATION+OF+THE+TRUSTWORTHINESS+OF+CLASSIFIERS&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Glasgow", "aff_unique_dep": "", "aff_unique_url": "https://www.gla.ac.uk", "aff_unique_abbr": "Glasgow", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "The Asymmetric Maximum Margin Bias of Quasi-Homogeneous Neural Networks", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10967", "id": "IM4xp7kGI5V", "poster": "/media/PosterPDFs/ICLR%202023/10967.png?t=1682950589.343779", "openreview": "https://openreview.net/forum?id=IM4xp7kGI5V", "slides": "https://iclr.cc/virtual/2023/poster/10967", "video": "https://iclr.cc/virtual/2023/poster/10967", "author_site": "Daniel Kunin, Atsushi Yamamura, Chao Ma, Surya Ganguli", "tldr": "We generalize implicit max-margin bias to a class of models which describes nearly all networks, identifying a competition between maximizing margin and minimizing an asymmetric parameter norm, which can degrade robustness and explain Neural Collapse", "abstract": "In this work, we explore the maximum-margin bias of quasi-homogeneous neural networks trained with gradient flow on an exponential loss and past a point of separability. We introduce the class of quasi-homogeneous models, which is expressive enough to describe nearly all neural networks with homogeneous activations, even those with biases, residual connections, and normalization layers, while structured enough to enable geometric analysis of its gradient dynamics. Using this analysis, we generalize the existing results of maximum-margin bias for homogeneous networks to this richer class of models. We find that gradient flow implicitly favors a subset of the parameters, unlike in the case of a homogeneous model where all parameters are treated equally. We demonstrate through simple examples how this strong favoritism toward minimizing an asymmetric norm can degrade the robustness of quasi-homogeneous models. On the other hand, we conjecture that this norm-minimization discards, when possible, unnecessary higher-order parameters, reducing the model to a sparser parameterization. Lastly, by applying our theorem to sufficiently expressive neural networks with normalization layers, we reveal a universal mechanism behind the empirical phenomenon of Neural Collapse.", "keywords": "margin;maximum-margin;implicit regularization;neural networks;neural collapse;gradient flow;implicit bias;robustness;homogeneous;symmetry;classification", "primary_area": "", "supplementary_material": "/attachment/3925e5817c49a6672c09f716a394f166eb62ed96.zip", "author": "Daniel Kunin;Atsushi Yamamura;Chao Ma;Surya Ganguli", "authorids": "~Daniel_Kunin1;~Atsushi_Yamamura1;~Chao_Ma8;~Surya_Ganguli1", "gender": ";;M;M", "homepage": "https://daniel-kunin.com/;;;http://ganguli-gang.stanford.edu/surya.html", "dblp": "234/8632;;;56/10453", "google_scholar": "qilW2ZMAAAAJ;;n2BTRgUAAAAJ;", "orcid": ";;;", "linkedin": ";atsushi-yamamura-6981a6149/;chao-ma-9b593a129/;", "or_profile": "~Daniel_Kunin1;~Atsushi_Yamamura1;~Chao_Ma8;~Surya_Ganguli1", "aff": ";Stanford University;Stanford University;Stanford University", "aff_domain": ";stanford.edu;stanford.edu;@stanford.edu", "position": ";PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nkunin2023the,\ntitle={The Asymmetric Maximum Margin Bias of Quasi-Homogeneous Neural Networks},\nauthor={Daniel Kunin and Atsushi Yamamura and Chao Ma and Surya Ganguli},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=IM4xp7kGI5V}\n}", "github": "", "project": "", "reviewers": "UHdE;CPzm;bFVN;i1KS", "pdf_size": 2092036, "recommendation": "8;8;8;10", "confidence": "3;3;3;4", "correctness": "4;2;3;4", "technical_novelty": "4;4;4;4", "empirical_novelty": "0;3;0;3", "wc_summary_paper": "114;92;207;125", "wc_strength_and_weaknesses": "152;232;476;503", "wc_clarity_quality_novelty_and_reproducibility": "6;37;75;13", "wc_summary_review": "20;51;119;28", "wc_review": "292;412;877;669", "wc_reply_reviewers": "0;30;57;136", "wc_reply_authors": "230;379;530;560", "reply_reviewers": "0;1;1;2", "reply_authors": "1;1;1;2", "recommendation_avg": [ 8.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 4.0, 0.0 ], "empirical_novelty_avg": [ 1.5, 1.5 ], "wc_summary_paper_avg": [ 134.5, 43.51149273467873 ], "wc_strength_and_weaknesses_avg": [ 340.75, 151.71581163477984 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.75, 26.966414296305693 ], "wc_summary_review_avg": [ 54.5, 38.939054944875075 ], "wc_review_avg": [ 562.5, 226.97632035082427 ], "wc_reply_reviewers_avg": [ 55.75, 50.529075788104414 ], "wc_reply_authors_avg": [ 424.75, 131.71062030071835 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.5222329678670935, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=395188813776400884&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=IM4xp7kGI5V", "email": ";stanford.edu;stanford.edu;@stanford.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "IN499pgOOEl", "title": "Dimensionless instance segmentation by learning graph representations of point clouds", "track": "main", "status": "Reject", "tldr": "Novel method for learning point cloud graph representation for instance segmentation", "abstract": "Point clouds are an increasingly common spatial data modality, being produced by sensors used in robotics and self-driving cars, and as natural intermediate representations of objects in microscopy and other bioimaging domains (e.g., cell locations over time, or filaments, membranes, or organelle boundaries in cryo-electron micrographs or tomograms). However, semantic and instance segmentation of this data remains challenging due to the complex nature of objects in point clouds. Especially in bioimaging domains where objects are often large and can be intersecting or overlapping. Furthermore, methods for operating on point clouds should not be sensitive to the specific orientation or translation of the point cloud, which is often arbitrary. Here, we frame the point cloud instance segmentation problem as a graph learning problem in which we seek to learn a function that accepts the point cloud as input and outputs a probability distribution over neighbor graphs in which connected components of the graph correspond to individual object instances. We introduce the Dimensionless Instance Segmentation Transformer (DIST), a deep neural network for spatially invariant instance segmentation of point clouds to solve this point cloud-to-graph problem. DIST uses an SO(n) invariant transformer layer architecture to operate on point clouds of arbitrary dimension and outputs, for each pair of points, the probability that an edge exists between them in the instance graph. We then decode the most likely set of instances using a graph cut. We demonstrate the power of DIST for the segmentation of biomolecules in cryo-electron micrographs and tomograms, far surpassing existing methods for membrane and filament segmentation in empirical evaluation. DIST also applies to scene and object understanding, performing competitively on the ScanNetV2 3D instance segmentation challenge. We anticipate that DIST will underpin a new generation of methods for point cloud segmentation in bioimaging and that our general model and approach will provide useful insights for point cloud segmentation methods in other domains.", "keywords": "instance segmentation;graph representation;point cloud segmentation", "primary_area": "", "supplementary_material": "", "author": "Robert Kiewisz;Tristan Bepler", "authorids": "~Robert_Kiewisz1;~Tristan_Bepler1", "gender": "M;M", "homepage": "https://github.com/RRobert92;", "dblp": ";217/3335", "google_scholar": "https://scholar.google.com/citations?hl=en;Roxjki8AAAAJ", "orcid": "0000-0003-2733-4978;0000-0001-5595-9954", "linkedin": "robert-kiewisz/;", "or_profile": "~Robert_Kiewisz1;~Tristan_Bepler1", "aff": "Spanish National Research Council;New York Structural Biology Center", "aff_domain": "csic.es;nysbc.org", "position": "Postdoc;Group Leader", "bibtex": "@misc{\nkiewisz2023dimensionless,\ntitle={Dimensionless instance segmentation by learning graph representations of point clouds},\nauthor={Robert Kiewisz and Tristan Bepler},\nyear={2023},\nurl={https://openreview.net/forum?id=IN499pgOOEl}\n}", "github": "", "project": "", "reviewers": "VdTC;zSCn;FSWQ;N2kb", "site": "https://openreview.net/forum?id=IN499pgOOEl", "pdf_size": 31093507, "recommendation": "3;3;3;8", "confidence": "5;3;4;4", "correctness": "2;2;2;4", "technical_novelty": "2;2;1;3", "empirical_novelty": "2;2;2;0", "wc_summary_paper": "77;141;50;56", "wc_strength_and_weaknesses": "185;189;146;100", "wc_clarity_quality_novelty_and_reproducibility": "102;149;29;94", "wc_summary_review": "28;86;61;43", "wc_review": "392;565;286;293", "wc_reply_reviewers": "0;330;44;0", "wc_reply_authors": "414;1394;592;278", "reply_reviewers": "0;3;1;0", "reply_authors": "2;5;2;1", "recommendation_avg": [ 4.25, 2.165063509461097 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 81.0, 36.062445840513924 ], "wc_strength_and_weaknesses_avg": [ 155.0, 35.92352989337211 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 93.5, 42.758040179596634 ], "wc_summary_review_avg": [ 54.5, 21.615966321217286 ], "wc_review_avg": [ 384.0, 112.59440483434335 ], "wc_reply_reviewers_avg": [ 93.5, 137.71982428103806 ], "wc_reply_authors_avg": [ 669.5, 432.85650047099904 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cGZb478GuwwJ:scholar.google.com/&scioq=Dimensionless+instance+segmentation+by+learning+graph+representations+of+point+clouds&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Spanish National Research Council;New York Structural Biology Center", "aff_unique_dep": ";", "aff_unique_url": "https://www.csic.es;https://www.nysbc.org", "aff_unique_abbr": "CSIC;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Spain;United States" }, { "title": "FedExP: Speeding Up Federated Averaging via Extrapolation", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10924", "id": "IPrzNbddXV", "poster": "/media/PosterPDFs/ICLR%202023/10924.png?t=1683045029.3188086", "openreview": "https://openreview.net/forum?id=IPrzNbddXV", "slides": "https://iclr.cc/virtual/2023/poster/10924", "video": "https://iclr.cc/virtual/2023/poster/10924", "author_site": "Divyansh Jhunjhunwala, Shiqiang Wang, Gauri Joshi", "tldr": "We propose FedExP, a method to adaptively determine the server step size in FedAvg for faster convergence. ", "abstract": "Federated Averaging (FedAvg) remains the most popular algorithm for Federated Learning (FL) optimization due to its simple implementation, stateless nature, and privacy guarantees combined with secure aggregation. Recent work has sought to generalize the vanilla averaging in FedAvg to a generalized gradient descent step by treating client updates as pseudo-gradients and using a server step size. While the use of a server step size has been shown to provide performance improvement theoretically, the practical benefit of the server step size has not been seen in most existing works. In this work, we present FedExP, a method to adaptively determine the server step size in FL based on dynamically varying pseudo-gradients throughout the FL process. We begin by considering the overparameterized convex regime, where we reveal an interesting similarity between FedAvg and the Projection Onto Convex Sets (POCS) algorithm. We then show how FedExP can be motivated as a novel extension to the extrapolation mechanism that is used to speed up POCS. Our theoretical analysis later also discusses the implications of FedExP in underparameterized and non-convex settings. Experimental results show that FedExP consistently converges faster than FedAvg and competing baselines on a range of realistic FL datasets.\u00a0", "keywords": "Federated Learning;Optimization;Step Size", "primary_area": "", "supplementary_material": "", "author": "Divyansh Jhunjhunwala;Shiqiang Wang;Gauri Joshi", "authorids": "~Divyansh_Jhunjhunwala1;~Shiqiang_Wang1;~Gauri_Joshi1", "gender": "M;M;", "homepage": "https://divyansh03.github.io/;https://shiqiang.wang;", "dblp": "285/4656;87/5094-1;", "google_scholar": "0E54wbUAAAAJ;kA_vmOcAAAAJ;", "orcid": ";;", "linkedin": "divyansh-jhunjhunwala-166691148/;;", "or_profile": "~Divyansh_Jhunjhunwala1;~Shiqiang_Wang1;~Gauri_Joshi1", "aff": "Carnegie Mellon University;IBM, International Business Machines;", "aff_domain": "cmu.edu;us.ibm.com;", "position": "PhD student;Research Staff Member;", "bibtex": "@inproceedings{\njhunjhunwala2023fedexp,\ntitle={FedExP: Speeding Up Federated Averaging via Extrapolation},\nauthor={Divyansh Jhunjhunwala and Shiqiang Wang and Gauri Joshi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=IPrzNbddXV}\n}", "github": "", "project": "", "reviewers": "Qfnz;AfRg;LWDX", "pdf_size": 2143944, "recommendation": "8;8;8", "confidence": "3;4;4", "correctness": "4;4;4", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "85;141;232", "wc_strength_and_weaknesses": "133;149;127", "wc_clarity_quality_novelty_and_reproducibility": "389;103;139", "wc_summary_review": "44;39;90", "wc_review": "651;432;588", "wc_reply_reviewers": "0;36;0", "wc_reply_authors": "1360;465;571", "reply_reviewers": "0;1;0", "reply_authors": "2;2;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 152.66666666666666, 60.57685660015346 ], "wc_strength_and_weaknesses_avg": [ 136.33333333333334, 9.285592184789412 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 210.33333333333334, 127.18839919147064 ], "wc_summary_review_avg": [ 57.666666666666664, 22.954060400915758 ], "wc_review_avg": [ 557.0, 92.05433178292046 ], "wc_reply_reviewers_avg": [ 12.0, 16.97056274847714 ], "wc_reply_authors_avg": [ 798.6666666666666, 399.27462005770195 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 72, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6512799339028319484&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=IPrzNbddXV", "email": "cmu.edu;us.ibm.com;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Carnegie Mellon University;International Business Machines", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.ibm.com", "aff_unique_abbr": "CMU;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "IQM-3_Tzldw", "title": "Learning to aggregate: A parameterized aggregator to debias aggregation for cross-device federated learning", "track": "main", "status": "Reject", "tldr": "Our idea is to learn an aggregator to debias aggregation to calibrate and control the direction of aggregated parameters to deal with both client drift and period drift.", "abstract": "Federated learning (FL) emerged as a novel machine learning setting that enables collaboratively training deep models on decentralized private data. Due to the heterogeneity (non-iidness) of the decentralized data, FL methods (e.g. FedAvg) suffers from unstable and slow convergence. Recent works explain the non-iid problem in FL as the client drift, and deal with it by enforcing regularization at local updates. However, these works neglect the heterogeneity among different communication rounds: the data of sampled candidates at different communication rounds are also of non-iid distribution, and we term it as period drift, which as well as client drift can lead to aggregation bias that degrade convergence. To deal with it, we propose a novel aggregation strategy, named FedPA, that uses a Parameterized Aggregator, as an alternative of averaging. We frame FedPA within a meta-learning setting, and formulates the aggregator as a meta-learner, to learn to aggregate the model parameters of clients. FedPA can directly learn the aggregation bias and well calibrate and control the direction of aggregated parameters to a better direction towards the optimum. Experiments show that FedPA can achieve competitive performances compared with conventional baselines.", "keywords": "Federated learning", "primary_area": "", "supplementary_material": "", "author": "Tao Shen;Kun Kuang;Yaliang Li;Feng Wang;Zheqi Lv;Hongxia Yang;Chao Wu;Fei Wu", "authorids": "~Tao_Shen4;~Kun_Kuang1;~Yaliang_Li1;~Feng_Wang10;~Zheqi_Lv1;~Hongxia_Yang2;~Chao_Wu1;~Fei_Wu1", "gender": ";M;M;M;;F;M;M", "homepage": ";http://kunkuang.github.io;https://sites.google.com/site/yaliangli/;;;https://www4.comp.polyu.edu.hk/~hongxyang/;;https://person.zju.edu.cn/wufei", "dblp": ";194/4245;https://dblp.org/pers/hd/l/Li:Yaliang;;;;45/3158-1;84/3254-1", "google_scholar": ";https://scholar.google.com.hk/citations?user=FOsNiMQAAAAJ;CCPBcdYAAAAJ;;;iJlC5mMAAAAJ;gpTPt58AAAAJ;XJLn4MYAAAAJ", "orcid": ";0009-0000-7528-8131;0000-0002-4204-6096;;;;0000-0003-0885-6869;", "linkedin": ";;;https://www.linkedin.cn/incareer/in/feng-wang-20462aa1;;;;", "or_profile": "~Tao_Shen4;~Kun_Kuang1;~Yaliang_Li1;~Feng_Wang10;~Zheqi_Lv1;~Hongxia_Yang2;~Chao_Wu1;~Fei_Wu1", "aff": ";Zhejiang University;Alibaba Group;;;ByteDance Inc.;Zhejiang University;Zhejiang University", "aff_domain": ";zju.edu.cn;alibaba-inc.com;;;bytedance.com;zju.edu.cn;zju.edu.cn", "position": ";Associate Professor;Staff Engineer;;;Principal Researcher;Associate Professor;Full Professor", "bibtex": "@misc{\nshen2023learning,\ntitle={Learning to aggregate: A parameterized aggregator to debias aggregation for cross-device federated learning},\nauthor={Tao Shen and Kun Kuang and Yaliang Li and Feng Wang and Zheqi Lv and Hongxia Yang and Chao Wu and Fei Wu},\nyear={2023},\nurl={https://openreview.net/forum?id=IQM-3_Tzldw}\n}", "github": "", "project": "", "reviewers": "o7iB;7sGj;L63q;5zVx", "site": "https://openreview.net/forum?id=IQM-3_Tzldw", "pdf_size": 1520828, "recommendation": "3;3;5;6", "confidence": "4;4;4;3", "correctness": "3;2;3;3", "technical_novelty": "3;1;2;3", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "72;72;105;84", "wc_strength_and_weaknesses": "218;184;258;109", "wc_clarity_quality_novelty_and_reproducibility": "162;51;34;146", "wc_summary_review": "19;74;32;30", "wc_review": "471;381;429;369", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 83.25, 13.47915056670857 ], "wc_strength_and_weaknesses_avg": [ 192.25, 54.73744148204225 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 98.25, 56.35767472137224 ], "wc_summary_review_avg": [ 38.75, 20.94486810653149 ], "wc_review_avg": [ 412.5, 40.55551750378732 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.7777777777777777, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SYYUP_SE7-gJ:scholar.google.com/&scioq=Learning+to+aggregate:+A+parameterized+aggregator+to+debias+aggregation+for+cross-device+federated+learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Zhejiang University;Alibaba Group;ByteDance", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.alibaba.com;https://www.bytedance.com", "aff_unique_abbr": "ZJU;Alibaba;ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "IUGwUr5_9wY", "title": "DISCO-DANCE: Learning to Discover Skills with Guidance", "track": "main", "status": "Reject", "tldr": "This paper proposes a novel unsupervised skill learning algorithm GSD, which attempts to provide direct guidance in order to accelerate the learning process of diverse skills by encouraging further exploration.", "abstract": "Unsupervised skill discovery (USD) allows agents to learn diverse and discriminable skills without access to pre-defined rewards,\nby maximizing the mutual information (MI) between skills and states reached by each skill.\nThe most common problem of MI-based skill discovery is insufficient exploration, because each skill is heavily penalized when it deviates from its initial settlement. Recent works introduced an auxiliary reward to encourage the exploration of the agent via maximizing the state's epistemic uncertainty or entropy. \nHowever, we have discovered that the performance of these auxiliary rewards decreases as the environment becomes more challenging. Therefore, we introduce a new unsupervised skill discovery algorithm, skill discovery with guidance (DISCO-DANCE), which (1) selects the guide skill which has the highest potential to reach the unexplored states, (2) guide other skills to follow the guide skill, then (3) the guided skills are diffused to maximize their discriminability in the unexplored states. Empirically, DISCO-DANCE substantially outperforms other USD baselines on challenging environments including two navigation benchmarks and a continuous control benchmark.", "keywords": "Unsupervised skill discovery;Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Hyunseung Kim;Byungkun Lee;Sejik Park;Hojoon Lee;Dongyoon Hwang;Kyushik Min;Jaegul Choo", "authorids": "~Hyunseung_Kim1;~Byungkun_Lee1;~Sejik_Park1;~Hojoon_Lee1;~Dongyoon_Hwang1;~Kyushik_Min1;~Jaegul_Choo1", "gender": "M;M;;M;M;M;M", "homepage": ";;;https://joonleesky.github.io/;;https://github.com/Kyushik;https://sites.google.com/site/jaegulchoo/", "dblp": "244/0949;;254/3093;;;228/4620;07/2074", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;RFjZjzkAAAAJ;;;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.co.kr/citations?user=dz8VK3IAAAAJ;GHJYsLEAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;ks-min/;", "or_profile": "~Hyunseung_Kim1;~Byungkun_Lee1;~Sejik_Park1;~Hojoon_Lee1;~Dongyoon_Hwang1;~Kyushik_Min1;~Jaegul_Choo1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Kakao;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kakaocorp.com;kaist.ac.kr", "position": "PhD student;PhD student;MS student;PhD student;PhD student;Researcher;Associate Professor", "bibtex": "@misc{\nkim2023discodance,\ntitle={{DISCO}-{DANCE}: Learning to Discover Skills with Guidance},\nauthor={Hyunseung Kim and Byungkun Lee and Sejik Park and Hojoon Lee and Dongyoon Hwang and Kyushik Min and Jaegul Choo},\nyear={2023},\nurl={https://openreview.net/forum?id=IUGwUr5_9wY}\n}", "github": "", "project": "", "reviewers": "RjRU;DqoP;xuUS", "site": "https://openreview.net/forum?id=IUGwUr5_9wY", "pdf_size": 10481140, "recommendation": "3;5;5", "confidence": "4;4;3", "correctness": "3;4;3", "technical_novelty": "2;3;2", "empirical_novelty": "2;0;2", "wc_summary_paper": "77;94;53", "wc_strength_and_weaknesses": "271;378;174", "wc_clarity_quality_novelty_and_reproducibility": "270;49;33", "wc_summary_review": "46;109;41", "wc_review": "664;630;301", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1135;813;384", "reply_reviewers": "0;0;0", "reply_authors": "3;2;2", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 74.66666666666667, 16.81930108205715 ], "wc_strength_and_weaknesses_avg": [ 274.3333333333333, 83.31599819695828 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 117.33333333333333, 108.14907407011039 ], "wc_summary_review_avg": [ 65.33333333333333, 30.944394574067136 ], "wc_review_avg": [ 531.6666666666666, 163.6955168055076 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 777.3333333333334, 307.6300086503627 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HHejOVnr8CoJ:scholar.google.com/&scioq=DISCO-DANCE:+Learning+to+Discover+Skills+with+Guidance&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Kakao Corp.", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.kakao.com", "aff_unique_abbr": "KAIST;Kakao", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "IUXAo-N9AGh", "title": "Progressive Transformation Learning For Leveraging Virtual Images in Training", "track": "main", "status": "Withdraw", "tldr": "We introduce progressive transformation learning (PTL) that progressively expands the training set with realistically transformed virtual images while addressing the large domain gap in training a transformation generator.", "abstract": "To effectively interrogate UAV-based images for detecting objects of interest, such as humans, it is essential to acquire large-scale UAV-based datasets that include human instances with various poses captured from widely varying viewing angles. As a viable alternative to laborious and costly data curation, we introduce Progressive Transformation Learning (PTL), which gradually augments a training dataset by adding transformed virtual images with enhanced realism. Generally, a virtual2real transformation generator in the conditional GAN framework suffers from quality degradation when a large domain gap exists between real and virtual images. To deal with the domain gap, PTL takes a novel approach that progressively iterates the following three steps: 1) select a subset from a pool of virtual images according to the domain gap, 2) transform the selected virtual images to enhance realism, and 3) add the transformed virtual images to the training set while removing them from the pool. In PTL, accurately quantifying the domain gap is critical. To do that, we theoretically demonstrate that the feature representation space of a given object detector can be modeled as a multivariate Gaussian distribution from which the Mahalanobis distance between a virtual object and the Gaussian distribution of each object category in the representation space can be readily computed. Experiments show that PTL results in a substantial performance increase over the baseline, especially in the small data and the cross-domain regime.", "keywords": "progressive learning;virtual image;synthetic image;low-shot learning;cross-domain detection;UAV-based human detection", "primary_area": "", "supplementary_material": "", "author": "Yi-Ting Shen;Hyungtae Lee;Heesung Kwon;Shuvra Shikhar Bhattacharyya", "authorids": "~Yi-Ting_Shen1;~Hyungtae_Lee3;~Heesung_Kwon2;~Shuvra_Shikhar_Bhattacharyya1", "gender": "M;M;M;M", "homepage": "https://dennisshen.github.io/;;http://www.ece.umd.edu/~ssb;https://htlee79.github.io/", "dblp": "211/5395;10/5946.html;;25/9990", "google_scholar": "PzjeN7MAAAAJ;ayph-mwAAAAJ;rNpUIKAAAAAJ;w9aeUnsAAAAJ", "orcid": "0000-0002-1167-5535;0009-0003-8335-7223;0000-0001-7719-1106;0000-0002-0631-9894", "linkedin": "yi-ting-shen-864867124/;heesung-kwon-44a1362b/;shuvra-bhattacharyya-3111904/;hyungtae-lee-03651647/", "or_profile": "~Yi-Ting_Shen1;~Heesung_Kwon2;~Shuvra_Shikhar_Bhattacharyya1;~Hyungtae_Lee1", "aff": "University of Maryland, College Park;DEVCOM Army Research Laboratory;University of Maryland, College Park;Army Research Laboratory", "aff_domain": "umd.edu;army.mil;umd.edu;army.mil", "position": "PhD student;Principal Researcher;Full Professor;Research Scientist", "bibtex": "@misc{\nshen2023progressive,\ntitle={Progressive Transformation Learning For Leveraging Virtual Images in Training},\nauthor={Yi-Ting Shen and Hyungtae Lee and Heesung Kwon and Shuvra Shikhar Bhattacharyya},\nyear={2023},\nurl={https://openreview.net/forum?id=IUXAo-N9AGh}\n}", "github": "", "project": "", "reviewers": "XXZr;4xW3;ZA5S", "site": "https://openreview.net/forum?id=IUXAo-N9AGh", "pdf_size": 1348443, "recommendation": "3;5;5", "confidence": "3;3;3", "correctness": "2;3;4", "technical_novelty": "2;2;2", "empirical_novelty": "2;0;2", "wc_summary_paper": "101;79;224", "wc_strength_and_weaknesses": "250;80;112", "wc_clarity_quality_novelty_and_reproducibility": "58;46;7", "wc_summary_review": "35;30;22", "wc_review": "444;235;365", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 134.66666666666666, 63.80351784101633 ], "wc_strength_and_weaknesses_avg": [ 147.33333333333334, 73.76238125825988 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.0, 21.77154105707724 ], "wc_summary_review_avg": [ 29.0, 5.354126134736337 ], "wc_review_avg": [ 348.0, 86.16650548018451 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8660254037844385, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3323121764317049069&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Maryland;United States Army Research Laboratory;Army Research Laboratory", "aff_unique_dep": ";Army Research Laboratory;", "aff_unique_url": "https://www/umd.edu;https://www.arl.army.mil;https://www.arl.army.mil", "aff_unique_abbr": "UMD;ARL;ARL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "IVE5g1af87", "title": "Switching One-Versus-the-Rest Loss to Increase Logit Margins for Adversarial Robustness", "track": "main", "status": "Reject", "tldr": "We prove that one-versus-rest loss (OVR) increases logit margins two times greater than cross-entropy and propose switching between cross-entropy and OVR by the criterion of logit margins to improve adversarial robustness.", "abstract": "Adversarial training is a promising method to improve the robustness against adversarial attacks. To enhance its performance, recent methods impose high weights on the cross-entropy loss for important data points near the decision boundary. However, these importance-aware methods are vulnerable to sophisticated attacks, e.g., Auto-Attack. In this paper, we experimentally investigate the cause of their vulnerability via margins between logits for the true label and the other labels because they should be large enough to prevent the largest logit from being flipped by the attacks. Our experiments reveal that the histogram of the logit margins of naive adversarial training has two peaks. Thus, the levels of difficulty in increasing logit margins are roughly divided into two: difficult samples (small logit margins) and easy samples (large logit margins). On the other hand, only one peak near zero appears in the histogram of importance-aware methods, i.e., they reduce the logit margins of easy samples. To increase logit margins of difficult samples without reducing those of easy samples, we propose switching one-versus-the-rest loss (SOVR), which switches from cross-entropy to one-versus-the-rest loss (OVR) for difficult samples. We derive trajectories of logit margins for a simple problem and prove that OVR increases logit margins two times larger than the weighted cross-entropy loss. Thus, SOVR increases logit margins of difficult samples, unlike existing methods. We experimentally show that SOVR achieves better robustness against Auto-Attack than importance-aware methods.", "keywords": "Adversarial examples;Deep learning;Loss function;Adversarial training", "primary_area": "", "supplementary_material": "", "author": "Sekitoshi Kanai;Shin'ya Yamaguchi;Masanori Yamada;Hiroshi Takahashi;Kentaro Ohno;Yasutoshi Ida", "authorids": "~Sekitoshi_Kanai1;~Shin'ya_Yamaguchi1;~Masanori_Yamada1;~Hiroshi_Takahashi1;~Kentaro_Ohno2;~Yasutoshi_Ida1", "gender": "M;M;M;M;M;M", "homepage": "https://sekitoshi.github.io/;https://yshinya6.github.io/;;https://takahashihiroshi.github.io/;https://sites.google.com/view/kentaro-ohno/;http://yasutoshi.github.io/", "dblp": "209/4874;https://dblp.uni-trier.de/pers/y/Yamaguchi:Shin=ya;21/5452;54/2994;;120/6855", "google_scholar": "qa2i5_IAAAAJ;_xJYVD0AAAAJ;https://scholar.google.co.jp/citations?user=XtbHpWkAAAAJ;https://scholar.google.co.jp/citations?user=ncTryO4AAAAJ;https://scholar.google.co.jp/citations?user=UDPzwEgAAAAJ;https://scholar.google.co.jp/citations?user=HFLzlEgAAAAJ", "orcid": "0000-0003-4383-4454;0000-0001-9113-7405;;0000-0001-5102-2830;;0000-0003-4279-9503", "linkedin": ";shin-ya-yamaguchi-32183a154/;;;;", "or_profile": "~Sekitoshi_Kanai1;~Shin'ya_Yamaguchi1;~Masanori_Yamada1;~Hiroshi_Takahashi1;~Kentaro_Ohno2;~Yasutoshi_Ida1", "aff": "NTT;NTT;NTT;Kyoto University;NTT;NTT", "aff_domain": "ntt.co.jp;ntt.co.jp;ntt.co.jp;kyoto-u.ac.jp;ntt.co.jp;ntt.co.jp", "position": "Researcher;Researcher;Researcher;PhD student;Researcher;Researcher", "bibtex": "@misc{\nkanai2023switching,\ntitle={Switching One-Versus-the-Rest Loss to Increase Logit Margins for Adversarial Robustness},\nauthor={Sekitoshi Kanai and Shin'ya Yamaguchi and Masanori Yamada and Hiroshi Takahashi and Kentaro Ohno and Yasutoshi Ida},\nyear={2023},\nurl={https://openreview.net/forum?id=IVE5g1af87}\n}", "github": "", "project": "", "reviewers": "aV6p;G3JG;Fx8Y;HRBR", "site": "https://openreview.net/forum?id=IVE5g1af87", "pdf_size": 846706, "recommendation": "5;5;6;6", "confidence": "3;5;5;3", "correctness": "3;4;4;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;4;3", "wc_summary_paper": "48;27;105;73", "wc_strength_and_weaknesses": "215;91;214;252", "wc_clarity_quality_novelty_and_reproducibility": "6;31;35;22", "wc_summary_review": "8;50;32;18", "wc_review": "277;199;386;365", "wc_reply_reviewers": "20;147;105;0", "wc_reply_authors": "1319;979;1066;1363", "reply_reviewers": "1;1;1;0", "reply_authors": "3;3;3;2", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 1.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 63.25, 29.089302157322372 ], "wc_strength_and_weaknesses_avg": [ 193.0, 60.848171706305195 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 23.5, 11.146748404803978 ], "wc_summary_review_avg": [ 27.0, 15.7797338380595 ], "wc_review_avg": [ 306.75, 74.44586959664048 ], "wc_reply_reviewers_avg": [ 68.0, 60.28681447878964 ], "wc_reply_authors_avg": [ 1181.75, 162.93767980427364 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5IeI8OHpNCkJ:scholar.google.com/&scioq=Switching+One-Versus-the-Rest+Loss+to+Increase+Logit+Margins+for+Adversarial+Robustness&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "NTT Corporation;Kyoto University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntt.co.jp;https://www.kyoto-u.ac.jp", "aff_unique_abbr": "NTT;Kyoto U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Japan" }, { "title": "A Simple Yet Powerful Deep Active Learning With Snapshots Ensembles", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11592", "id": "IVESH65r0Ar", "poster": "/media/PosterPDFs/ICLR%202023/11592.png?t=1681914856.7894576", "openreview": "https://openreview.net/forum?id=IVESH65r0Ar", "slides": "https://iclr.cc/virtual/2023/poster/11592", "video": "https://iclr.cc/virtual/2023/poster/11592", "author_site": "Seohyeon Jung, Sanghyun Kim, Juho Lee", "tldr": "", "abstract": "Given an unlabeled pool of data and the experts who can label them, active learning aims to build an agent that can effectively acquire data to be queried to the experts, maximizing the gain in performance when trained with them. While there are several principles for active learning, a prevailing approach is to estimate uncertainties of predictions for unlabeled samples and use them to define acquisition functions. Active learning with the uncertainty principle works well for deep learning, especially for large-scale image classification tasks with deep neural networks. Still, it is often overlooked how the uncertainty of predictions is estimated, despite the common findings on the difficulty of accurately estimating uncertainties of deep neural networks. In this paper, we highlight the effectiveness of snapshot ensembles for deep active learning. Compared to the previous approaches based on Monte-Carlo dropout or deep ensembles, we show that a simple acquisition strategy based on uncertainties estimated from parameter snapshots gathered from a single optimization path significantly improves the quality of the acquired samples. Based on this observation, we further propose an efficient active learning algorithm that maintains a single learning trajectory throughout the entire active learning episodes, unlike the existing algorithms training models from scratch for every active learning episode. Through the extensive empirical comparison, we demonstrate the effectiveness of snapshot ensembles for deep active learning.", "keywords": "Active learning;Snapshot ensemble;Uncertainty estimation", "primary_area": "", "supplementary_material": "/attachment/d9d9d9391d4f301b8e6f6cb919f966dbb81e620d.zip", "author": "Seohyeon Jung;Sanghyun Kim;Juho Lee", "authorids": "~Seohyeon_Jung1;~Sanghyun_Kim2;~Juho_Lee2", "gender": "F;M;M", "homepage": ";https://nannullna.github.io;https://juho.lee.github.io", "dblp": "350/4069;;55/3410-1", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;BBQXZhkAAAAJ;Py4URJUAAAAJ", "orcid": ";0009-0008-9163-168X;", "linkedin": ";;", "or_profile": "~Seohyeon_Jung1;~Sanghyun_Kim2;~Juho_Lee2", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.edu;kaist.edu;kaist.ac.kr", "position": "MS student;MS student;Assistant Professor", "bibtex": "@inproceedings{\njung2023a,\ntitle={A Simple Yet Powerful Deep Active Learning With Snapshots Ensembles},\nauthor={Seohyeon Jung and Sanghyun Kim and Juho Lee},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=IVESH65r0Ar}\n}", "github": "", "project": "", "reviewers": "WKSq;s1VV;qYVA;qtq1", "pdf_size": 1446434, "recommendation": "3;6;8;8", "confidence": "4;4;4;4", "correctness": "2;4;3;4", "technical_novelty": "1;3;3;4", "empirical_novelty": "3;3;4;3", "wc_summary_paper": "100;39;72;65", "wc_strength_and_weaknesses": "249;141;124;227", "wc_clarity_quality_novelty_and_reproducibility": "23;359;61;33", "wc_summary_review": "46;31;22;33", "wc_review": "418;570;279;358", "wc_reply_reviewers": "0;0;585;0", "wc_reply_authors": "1157;655;698;182", "reply_reviewers": "0;0;3;0", "reply_authors": "2;1;4;1", "recommendation_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 1.0897247358851685 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 69.0, 21.714050750608465 ], "wc_strength_and_weaknesses_avg": [ 185.25, 53.65806090421084 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 119.0, 139.26234236145822 ], "wc_summary_review_avg": [ 33.0, 8.573214099741124 ], "wc_review_avg": [ 406.25, 106.6217027626177 ], "wc_reply_reviewers_avg": [ 146.25, 253.3124306069483 ], "wc_reply_authors_avg": [ 673.0, 345.0673847236218 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.6998739952495694, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1695061569582474417&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=IVESH65r0Ar", "email": "kaist.edu;kaist.edu;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "IW3vvB8uggX", "title": "Understanding the Complexity Gains of Contextual Multi-task RL with Curricula", "track": "main", "status": "Reject", "tldr": "", "abstract": "Reinforcement learning (RL) problems can be challenging without well-shaped rewards. Prior work on provably efficient RL methods generally proposes to address this issue with dedicated exploration strategies, such as novelty-based bonuses. However, another way to tackle this challenge is to reformulate it as a multi-task RL problem, where the task space contains not only the challenging task of interest but also easier tasks that implicitly function as a curriculum. Such a reformulation opens up the possibility of running existing multi-task RL methods as a more efficient alternative to solving a single challenging task from scratch. In this work, we provide a theoretical framework that reformulates a single-task RL problem as a multi-task RL problem defined by a curriculum. Under mild regularity conditions on the curriculum, we show that sequentially solving each task in the multi-task RL problem is more computationally efficient than solving the original single-task problem, without any explicit exploration bonuses or other exploration strategies. We also show that our theoretical insights can be translated into an effective practical learning algorithm that can accelerate curriculum learning on simulated robotic goal-reaching tasks.", "keywords": "policy gradient methods;multi-task RL", "primary_area": "", "supplementary_material": "", "author": "Qiyang Li;Yuexiang Zhai;Yi Ma;Sergey Levine", "authorids": "~Qiyang_Li1;~Yuexiang_Zhai1;~Yi_Ma4;~Sergey_Levine1", "gender": "M;;M;M", "homepage": "https://colinqiyangli.github.io/;;http://people.eecs.berkeley.edu/~yima/;https://people.eecs.berkeley.edu/~svlevine/", "dblp": ";241/6124.html;;80/7594", "google_scholar": "qlwwdfEAAAAJ;78WTKm4AAAAJ;https://scholar.google.com.hk/citations?user=XqLiBQMAAAAJ;8R35rCwAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Qiyang_Li1;~Yuexiang_Zhai1;~Yi_Ma4;~Sergey_Levine1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Google", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;google.com", "position": "PhD student;PhD student;Full Professor;Research Scientist", "bibtex": "@misc{\nli2023understanding,\ntitle={Understanding the Complexity Gains of Contextual Multi-task {RL} with Curricula},\nauthor={Qiyang Li and Yuexiang Zhai and Yi Ma and Sergey Levine},\nyear={2023},\nurl={https://openreview.net/forum?id=IW3vvB8uggX}\n}", "github": "", "project": "", "reviewers": "5mDQ;haUz;cM8j", "site": "https://openreview.net/forum?id=IW3vvB8uggX", "pdf_size": 1379609, "recommendation": "5;6;6", "confidence": "2;3;4", "correctness": "3;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "0;3;3", "wc_summary_paper": "59;71;51", "wc_strength_and_weaknesses": "156;231;86", "wc_clarity_quality_novelty_and_reproducibility": "2;135;13", "wc_summary_review": "46;29;45", "wc_review": "263;466;195", "wc_reply_reviewers": "0;360;71", "wc_reply_authors": "1020;2469;840", "reply_reviewers": "0;4;1", "reply_authors": "4;6;4", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 60.333333333333336, 8.219218670625303 ], "wc_strength_and_weaknesses_avg": [ 157.66666666666666, 59.207732227772034 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.0, 60.27160746708741 ], "wc_summary_review_avg": [ 40.0, 7.788880963698615 ], "wc_review_avg": [ 308.0, 115.1202270092735 ], "wc_reply_reviewers_avg": [ 143.66666666666666, 155.6927173063089 ], "wc_reply_authors_avg": [ 1443.0, 729.2036752512977 ], "reply_reviewers_avg": [ 1.6666666666666667, 1.699673171197595 ], "reply_authors_avg": [ 4.666666666666667, 0.9428090415820634 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.8660254037844385, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ql4ztg4JGcUJ:scholar.google.com/&scioq=Understanding+the+Complexity+Gains+of+Contextual+Multi-task+RL+with+Curricula&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of California, Berkeley;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.berkeley.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;Google", "aff_campus_unique_index": "0;0;0;1", "aff_campus_unique": "Berkeley;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "IWoHx6bY4Zm", "title": "Lightweight Equivariant Graph Representation Learning for Protein Engineering", "track": "main", "status": "Reject", "tldr": "We design a lightweight pre-training model for multi-task protein representation learning from its 3D structure and sequence. ", "abstract": "This work tackles the issue of directed evolution in computational protein design that makes accurate predictions of the function of a protein mutant. We design a lightweight pre-training graph neural network model for multi-task protein representation learning from its 3D structure. Rather than reconstructing and optimizing the protein structure, the trained model recovers the amino acid types and key properties of the central residues from a given noisy three-dimensional local environment. On the prediction task for the higher-order mutants, where many amino acid sites of the protein are mutated, the proposed training strategy achieves remarkably higher performance by 20% improvement at the cost of requiring less than 1% of computational resources that are required by popular transformer-based state-of-the-art deep learning models for protein design.", "keywords": "graph neural networks", "primary_area": "", "supplementary_material": "/attachment/1aa53422650980d40b1e396b8f40087eadc4ad43.zip", "author": "Bingxin Zhou;Outongyi Lv;Kai Yi;Xinye Xiong;Pan Tan;Liang Hong;Yu Guang Wang", "authorids": "~Bingxin_Zhou1;harry_lv@sjtu.edu.cn;~Kai_Yi2;~Xinye_Xiong2;tpan1039@alumni.sjtu.edu.cn;hongl3liang@sjtu.edu.cn;~Yu_Guang_Wang1", "gender": "F;;M;F;;;M", "homepage": ";;;https://sjtu.edu.cn;;;https://yuguangwang.github.io/", "dblp": ";;;;;;03/10023-1", "google_scholar": "OsvArmcAAAAJ;;A_YCRFwAAAAJ;;;;cMSEByAAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Bingxin_Zhou1;harry_lv@sjtu.edu.cn;~Kai_Yi2;~Xinye_Xiong2;tpan1039@alumni.sjtu.edu.cn;hongl3liang@sjtu.edu.cn;~Yu_Guang_Wang1", "aff": "Shanghai Jiaotong University;;University of New South Wales;Shanghai Jiaotong University;;;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;;unsw.edu.au;sjtu.edu.cn;;;sjtu.edu.cn", "position": "Researcher;;PhD student;MS student;;;Associate Professor", "bibtex": "@misc{\nzhou2023lightweight,\ntitle={Lightweight Equivariant Graph Representation Learning for Protein Engineering},\nauthor={Bingxin Zhou and Outongyi Lv and Kai Yi and Xinye Xiong and Pan Tan and Liang Hong and Yu Guang Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=IWoHx6bY4Zm}\n}", "github": "", "project": "", "reviewers": "G8po;LEve;cBgt", "site": "https://openreview.net/forum?id=IWoHx6bY4Zm", "pdf_size": 2896501, "recommendation": "3;5;5", "confidence": "5;4;5", "correctness": "2;1;2", "technical_novelty": "2;2;3", "empirical_novelty": "2;1;2", "wc_summary_paper": "76;84;118", "wc_strength_and_weaknesses": "356;1002;215", "wc_clarity_quality_novelty_and_reproducibility": "59;48;489", "wc_summary_review": "87;74;117", "wc_review": "578;1208;939", "wc_reply_reviewers": "0;0;1219", "wc_reply_authors": "1125;1462;1909", "reply_reviewers": "0;0;2", "reply_authors": "2;2;3", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "correctness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 92.66666666666667, 18.208667044996883 ], "wc_strength_and_weaknesses_avg": [ 524.3333333333334, 342.6313211342802 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 198.66666666666666, 205.34577884361028 ], "wc_summary_review_avg": [ 92.66666666666667, 18.00617178142601 ], "wc_review_avg": [ 908.3333333333334, 258.10893479734915 ], "wc_reply_reviewers_avg": [ 406.3333333333333, 574.6421108442677 ], "wc_reply_authors_avg": [ 1498.6666666666667, 321.1150711020722 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": -0.5, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11288399274233294389&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;University of New South Wales", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.unsw.edu.au", "aff_unique_abbr": "SJTU;UNSW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;Australia" }, { "id": "IXBC0sG3cN", "title": "Tree-structure segmentation for logistic regression", "track": "main", "status": "Reject", "tldr": "Practitioners, in particular in the banking industry, often perform clustering to obtain \"client segments\" on which they fit separate supervised models. We perform both by learning \"logistic regression trees\".", "abstract": "The decision for a financial institution to accept or deny a loan is based on the probability of a client paying back their debt in time. This probability is given by a model such as a logistic regression, and estimated based on, e.g., the clients\u2019 characteristics, their credit history, the repayment performance. Historically, different models have been developed on different markets and/or credit products and/or addressed population. We show that this amounts to modelling default as a mixture model composed of a decision tree and logistic regression on its leaves (thereafter \u201clogistic regression tree\u201d). We seek to optimise this practice by considering the population to which a client belongs as a latent variable, which we will estimate. After exposing the context, the notations and the problem formalisation, we will conduct estimation using a Stochastic-Expectation-Maximisation (SEM) algorithm. We will finally show the performance on simulated data, and on real retail credit data from [COMPANY], as well as real open-source data.", "keywords": "logistic regression;decision tree;credit scoring;segmentation", "primary_area": "", "supplementary_material": "", "author": "Adrien Ehrhardt", "authorids": "~Adrien_Ehrhardt1", "gender": "M", "homepage": "https://adimajo.github.io/", "dblp": "", "google_scholar": "https://scholar.google.fr/citations?user=ISAbU0cAAAAJ", "orcid": "", "linkedin": "adrien-ehrhardt/", "or_profile": "~Adrien_Ehrhardt1", "aff": "Cr\u00e9dit Agricole S.A.", "aff_domain": "credit-agricole-sa.fr", "position": "Researcher", "bibtex": "@misc{\nehrhardt2023treestructure,\ntitle={Tree-structure segmentation for logistic regression},\nauthor={Adrien Ehrhardt},\nyear={2023},\nurl={https://openreview.net/forum?id=IXBC0sG3cN}\n}", "github": "", "project": "", "reviewers": "tW1b;hAgH;heWb", "site": "https://openreview.net/forum?id=IXBC0sG3cN", "pdf_size": 834049, "recommendation": "3;3;3", "confidence": "3;3;3", "correctness": "2;3;1", "technical_novelty": "2;2;2", "empirical_novelty": "0;2;2", "wc_summary_paper": "84;44;97", "wc_strength_and_weaknesses": "276;392;223", "wc_clarity_quality_novelty_and_reproducibility": "128;25;21", "wc_summary_review": "39;40;40", "wc_review": "527;501;381", "wc_reply_reviewers": "0;0;17", "wc_reply_authors": "381;350;493", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 2.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 75.0, 22.55363976538303 ], "wc_strength_and_weaknesses_avg": [ 297.0, 70.57383840111481 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 58.0, 49.52440475832765 ], "wc_summary_review_avg": [ 39.666666666666664, 0.4714045207910317 ], "wc_review_avg": [ 469.6666666666667, 63.58895781152225 ], "wc_reply_reviewers_avg": [ 5.666666666666667, 8.013876853447538 ], "wc_reply_authors_avg": [ 408.0, 61.4220373047546 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UxP7hPrLUrAJ:scholar.google.com/&scioq=Tree-structure+segmentation+for+logistic+regression&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Cr\u00e9dit Agricole S.A.", "aff_unique_dep": "", "aff_unique_url": "https://www.credit-agricole.com", "aff_unique_abbr": "Cr\u00e9dit Agricole", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "id": "IXsI73NDuqN", "title": "Asymmetric Certified Robustness via Feature-Convex Neural Networks", "track": "main", "status": "Withdraw", "tldr": "We propose a novel, convexity-based learning architecture which enables closed-form adversarial robustness certificates for all norm balls in an asymmetric robustness setting.", "abstract": "Recent works have introduced input-convex neural networks (ICNNs) as learning models with advantageous training, inference, and generalization properties linked to their convex structure. In this paper, we propose a novel feature-convex neural network (FCNN) architecture as the composition of an ICNN with a Lipschitz feature map in order to achieve adversarial robustness. We consider the asymmetric binary classification setting with one \"sensitive\" class, and for this class we prove deterministic, closed-form, and easily-computable certified robust radii for arbitrary $\\ell_p$-norms. We theoretically justify the use of these models by characterizing their decision region geometry, extending the universal approximation theorem for ICNN regression to the classification setting, and proving a lower bound on the probability that such models perfectly fit even unstructured uniformly distributed data in sufficiently high dimensions. Experiments on Malimg malware classification as well as subsets of MNIST, CIFAR-10, and ImageNet-scale datasets show that FCNNs can attain orders of magnitude larger certified $\\ell_1$-radii than competing methods while maintaining substantial $\\ell_2$- and $\\ell_{\\infty}$-radii.", "keywords": "robustness;certification;convex;machine learning", "primary_area": "", "supplementary_material": "/attachment/cace2ee58bda2b58587fe4be53681bb72cb61a97.zip", "author": "Samuel Pfrommer;Brendon G. Anderson;Julien Piet;Somayeh Sojoudi", "authorids": "~Samuel_Pfrommer1;~Brendon_G._Anderson1;piet@berkeley.edu;~Somayeh_Sojoudi1", "gender": ";;;F", "homepage": "https://sam.pfrommer.us/;https://brendon-anderson.github.io/;;https://eecs.berkeley.edu/~sojoudi/", "dblp": ";225/6104;;06/7000", "google_scholar": "ysS4V1UAAAAJ;kNA83jQAAAAJ;;kNH8zcgAAAAJ", "orcid": ";;;", "linkedin": "sampfrommer/;;;", "or_profile": "~Samuel_Pfrommer1;~Brendon_G._Anderson1;piet@berkeley.edu;~Somayeh_Sojoudi1", "aff": "University of California, Berkeley;University of California, Berkeley;;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;;berkeley.edu", "position": "PhD student;PhD student;;Assistant Professor", "bibtex": "@misc{\npfrommer2023asymmetric,\ntitle={Asymmetric Certified Robustness via Feature-Convex Neural Networks},\nauthor={Samuel Pfrommer and Brendon G. Anderson and Julien Piet and Somayeh Sojoudi},\nyear={2023},\nurl={https://openreview.net/forum?id=IXsI73NDuqN}\n}", "github": "", "project": "", "reviewers": "Ngmj;Uj8h;2ZAQ;R47y", "site": "https://openreview.net/forum?id=IXsI73NDuqN", "pdf_size": 574397, "recommendation": "3;5;6;6", "confidence": "4;5;4;4", "correctness": "3;3;4;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "69;74;67;44", "wc_strength_and_weaknesses": "170;95;230;272", "wc_clarity_quality_novelty_and_reproducibility": "519;288;76;11", "wc_summary_review": "108;55;69;47", "wc_review": "866;512;442;374", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "762;455;348;167", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 63.5, 11.543396380615196 ], "wc_strength_and_weaknesses_avg": [ 191.75, 66.58969514872402 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 223.5, 198.99309033230276 ], "wc_summary_review_avg": [ 69.75, 23.44541533008106 ], "wc_review_avg": [ 548.5, 189.6911964219742 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 433.0, 216.04744849222357 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8164965809277259, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2791609541535311514&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "I_HxBH2SeW", "title": "Controllable Evaluation and Generation of Physical Adversarial Patch on Face Recognition", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Recent studies have revealed the vulnerability of face recognition models against physical adversarial patches, which raises security concerns about the deployed face recognition systems. However, it is still challenging to ensure the reproducibility for most attack algorithms under complex physical conditions, which leads to the lack of a systematic evaluation of the existing methods. It is therefore imperative to develop a framework that can readily and fairly evaluate the vulnerability of face recognition in the physical world. To this end, we propose to simulate the complex transformations of faces in the physical world via 3D face modeling, which serves as a digital counterpart of physical faces. The generic framework allows us to control different face variations and physical conditions to conduct reproducible evaluations conveniently. With this digital simulator, we further propose a Face3DAdv method considering the 3D face transformations and realistic physical variations. Extensive experiments validate that Face3DAdv can significantly improve the effectiveness of diverse physically realizable adversarial patches in both simulated and physical environments, against various white-box and black-box face recognition models.", "keywords": "Physical adversarial attacks;face recogntion;robustness evaluation", "primary_area": "", "supplementary_material": "/attachment/57ca0de33ace1a518f113b2f42f7de300fa8fde8.zip", "author": "Xiao Yang;Yinpeng Dong;Tianyu Pang;Chang Liu;Zihao Xiao;Hang Su;Jun Zhu", "authorids": "~Xiao_Yang4;~Yinpeng_Dong2;~Tianyu_Pang1;~Chang_Liu22;~Zihao_Xiao1;~Hang_Su3;~Jun_Zhu2", "gender": "M;M;M;F;M;M;M", "homepage": "https://ml.cs.tsinghua.edu.cn/~xiaoyang/;https://dongyp13.github.io;https://p2333.github.io/;https://elenacliu.github.io/;;http://ml.cs.tsinghua.edu.cn/~jun;", "dblp": "57/33851;183/0980;202/2550;;207/2005;50/2644-1;26/5371-6", "google_scholar": "bwkwp0MAAAAJ;6_4ad84AAAAJ;wYDbtFsAAAAJ;;fOfxmrcAAAAJ;axsP38wAAAAJ;dxN1_X0AAAAJ", "orcid": "0000-0001-9502-9962;;0000-0003-0639-6176;;;;", "linkedin": ";;%E5%A4%A9%E5%AE%87-%E5%BA%9E-b3999017a/;;;;", "or_profile": "~Xiao_Yang4;~Yinpeng_Dong2;~Tianyu_Pang1;~Chang_Liu22;~Zihao_Xiao1;~Jun_Zhu2;~Hang_Su2", "aff": "Tsinghua University;Tsinghua University;Sea AI Lab;Peking University;RealAI;Tsinghua University;Tsinghua University", "aff_domain": "mail.tsinghua.edu.cn;tsinghua.edu.cn;sea.com;pku.edu.cn;realai.ai;mail.tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Postdoc;Research Scientist;MS student;Researcher;Professor;Associate Professor", "bibtex": "@misc{\nyang2023controllable,\ntitle={Controllable Evaluation and Generation of Physical Adversarial Patch on Face Recognition},\nauthor={Xiao Yang and Yinpeng Dong and Tianyu Pang and Chang Liu and Zihao Xiao and Hang Su and Jun Zhu},\nyear={2023},\nurl={https://openreview.net/forum?id=I_HxBH2SeW}\n}", "github": "", "project": "", "reviewers": "KyPX;gWVm;q7sp;HXJq", "site": "https://openreview.net/forum?id=I_HxBH2SeW", "pdf_size": 6819387, "recommendation": "5;5;5;8", "confidence": "3;5;4;4", "correctness": "4;3;2;4", "technical_novelty": "1;2;2;4", "empirical_novelty": "3;2;2;4", "wc_summary_paper": "45;78;28;68", "wc_strength_and_weaknesses": "98;225;39;154", "wc_clarity_quality_novelty_and_reproducibility": "39;48;13;42", "wc_summary_review": "22;20;5;41", "wc_review": "204;371;85;305", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 1.0897247358851685 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 54.75, 19.536824204563032 ], "wc_strength_and_weaknesses_avg": [ 129.0, 68.74227229296396 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.5, 13.388427838995884 ], "wc_summary_review_avg": [ 22.0, 12.786711852544421 ], "wc_review_avg": [ 241.25, 108.05178156791308 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5222329678670935, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11614292954793165363&as_sdt=5,28&sciodt=0,28&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;2;3;0;0", "aff_unique_norm": "Tsinghua University;Sea AI Lab;Peking University;RealAI", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;;http://www.pku.edu.cn;https://www.realai.co", "aff_unique_abbr": "THU;;Peking U;RealAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;2;0;0", "aff_country_unique": "China;;United States" }, { "id": "I_IJf5oDRo", "title": "Explainable Recommender with Geometric Information Bottleneck", "track": "main", "status": "Reject", "tldr": "To consider user-item interactions for an interpretable recommender system, we propose to incorporate the geometric regularisation derived from user-item interaction graphs to learn the latent factors of review text in a variational network.", "abstract": "Explainable recommender systems have attracted much interest in recent years as they can explain their recommendation decisions, enhancing user trust in the systems. Most explainable recommender systems rely on human-generated rationales or annotated aspect features from user reviews to train models for rational generation or extraction. The rationales produced are often confined to a single review. To avoid the expensive human annotation process and to generate explanations beyond individual reviews, we propose an explainable recommender system trained on user reviews by developing a transferable Geometric Information Bottleneck (GIANT), which leverages the prior knowledge acquired through clustering on a user-item graph built on user-item rating interactions, since graph nodes in the same cluster tend to share common characteristics or preferences. We then feed user reviews and item reviews into a variational network to learn latent topic distributions which are regularised by the distributions of user/item estimated based on their distances to various cluster centroids of the user-item graph. By iteratively refining the instance-level review latent topics with GIANT, our method learns a robust latent space from the text for rating prediction and explanation generation. Experimental results on three e-commerce datasets show that our model significantly improves the interpretability of a variational recommender using a standard Gaussian prior, in terms of coherence, diversity and faithfulness, while achieving performance comparable to existing content-based recommender systems in terms of rating prediction accuracy. ", "keywords": "Interpretability;Recommender System;Information Extraction", "primary_area": "", "supplementary_material": "", "author": "Hanqi Yan;Lin Gui;Yulan He", "authorids": "~Hanqi_Yan2;~Lin_Gui3;~Yulan_He1", "gender": "M;F;F", "homepage": ";https://www.kcl.ac.uk/people/yulan-he;https://hanqi-qi.github.io/homepage/", "dblp": "34/8605-3;75/5430;https://dblp.uni-trier.de/pid/254/8174", "google_scholar": "https://scholar.google.com.ph/citations?user=1b3Eyx4AAAAJ;https://scholar.google.co.uk/citations?user=SP9r32UAAAAJ;YmWi1lgAAAAJ", "orcid": ";0000-0003-3948-5845;", "linkedin": ";yulan-he-277234a/?originalSubdomain=uk;", "or_profile": "~Lin_Gui3;~Yulan_He1;~hanqi_yan1", "aff": "King's College London, University of London;King's College London, University of London;The University of Warwick", "aff_domain": "kcl.ac.uk;kcl.ac.uk;warwick.ac.uk", "position": "Lecturer;Full Professor;PhD student", "bibtex": "@misc{\nyan2023explainable,\ntitle={Explainable Recommender with Geometric Information Bottleneck},\nauthor={Hanqi Yan and Lin Gui and Yulan He},\nyear={2023},\nurl={https://openreview.net/forum?id=I_IJf5oDRo}\n}", "github": "", "project": "", "reviewers": "fUWF;Scqk;JQQm", "site": "https://openreview.net/forum?id=I_IJf5oDRo", "pdf_size": 754450, "recommendation": "5;5;5", "confidence": "3;3;5", "correctness": "4;2;3", "technical_novelty": "3;3;2", "empirical_novelty": "3;0;2", "wc_summary_paper": "224;149;117", "wc_strength_and_weaknesses": "328;525;307", "wc_clarity_quality_novelty_and_reproducibility": "87;18;9", "wc_summary_review": "73;67;9", "wc_review": "712;759;442", "wc_reply_reviewers": "64;760;176", "wc_reply_authors": "588;4001;1737", "reply_reviewers": "1;5;1", "reply_authors": "2;8;5", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 163.33333333333334, 44.842935774644474 ], "wc_strength_and_weaknesses_avg": [ 386.6666666666667, 98.19142302439433 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.0, 34.84250278036869 ], "wc_summary_review_avg": [ 49.666666666666664, 28.85981442921782 ], "wc_review_avg": [ 637.6666666666666, 139.68138347284827 ], "wc_reply_reviewers_avg": [ 333.3333333333333, 305.14404613049373 ], "wc_reply_authors_avg": [ 2108.6666666666665, 1417.9196811604747 ], "reply_reviewers_avg": [ 2.3333333333333335, 1.8856180831641267 ], "reply_authors_avg": [ 5.0, 2.449489742783178 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1588108551748267516&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff_unique_index": "0;0;1", "aff_unique_norm": "King's College London;University of Warwick", "aff_unique_dep": ";", "aff_unique_url": "https://www.kcl.ac.uk;https://warwick.ac.uk", "aff_unique_abbr": "KCL;Warwick", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "DualAfford: Learning Collaborative Visual Affordance for Dual-gripper Manipulation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12233", "id": "I_YZANaz5X", "poster": "/media/PosterPDFs/ICLR%202023/12233.png?t=1680791387.9935675", "openreview": "https://openreview.net/forum?id=I_YZANaz5X", "slides": "https://iclr.cc/virtual/2023/poster/12233", "video": "https://iclr.cc/virtual/2023/poster/12233", "author_site": "Yan Zhao, Ruihai Wu, Zhehuan Chen, Yourong Zhang, Qingnan Fan, Kaichun Mo, Hao Dong", "tldr": "We propose a novel learning framework to learn collaborative affordance for dual-gripper manipulation tasks.", "abstract": "It is essential yet challenging for future home-assistant robots to understand and manipulate diverse 3D objects in daily human environments. Towards building scalable systems that can perform diverse manipulation tasks over various 3D shapes, recent works have advocated and demonstrated promising results learning visual actionable affordance, which labels every point over the input 3D geometry with an action likelihood of accomplishing the downstream task (e.g., pushing or picking-up). However, these works only studied single-gripper manipulation tasks, yet many real-world tasks require two hands to achieve collaboratively. In this work, we propose a novel learning framework, DualAfford, to learn collaborative affordance for dual-gripper manipulation tasks. The core design of the approach is to reduce the quadratic problem for two grippers into two disentangled yet interconnected subtasks for efficient learning. Using the large-scale PartNet-Mobility and ShapeNet datasets, we set up four benchmark tasks for dual-gripper manipulation. Experiments prove the effectiveness and superiority of our method over three baselines. We will release code and data upon acceptance. ", "keywords": "Visual Actionable Representation for Robotics;Visual Understanding of 3D Shapes", "primary_area": "", "supplementary_material": "", "author": "Yan Zhao;Ruihai Wu;Zhehuan Chen;Yourong Zhang;Qingnan Fan;Kaichun Mo;Hao Dong", "authorids": "~Yan_Zhao5;~Ruihai_Wu1;~Zhehuan_Chen1;~Yourong_Zhang1;~Qingnan_Fan2;~Kaichun_Mo1;~Hao_Dong3", "gender": "F;M;M;M;M;M;M", "homepage": "https://sxy7147.github.io;https://warshallrho.github.io/;https://www.cnblogs.com/ACMLCZH;;https://fqnchina.github.io/;https://cs.stanford.edu/~kaichun/;https://zsdonghao.github.io", "dblp": "88/5320-35;248/8028.html;;;;172/1283;14/1525-3.html", "google_scholar": "iIs4TDMAAAAJ;https://scholar.google.com/citations?hl=en;LvNUzlEAAAAJ;;;pL7JsOsAAAAJ;xLFL4sMAAAAJ", "orcid": ";;;;;;0000-0003-2261-9122", "linkedin": ";;;yourong-zhang-2b1aab23a/;;;", "or_profile": "~Yan_Zhao5;~Ruihai_Wu1;~Zhehuan_Chen1;~Yourong_Zhang1;~Qingnan_Fan2;~Kaichun_Mo1;~Hao_Dong3", "aff": "Peking University;Peking University;Peking University;Peking University;Tencent AI Lab;NVIDIA;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;tencent.com;nvidia.com;pku.edu.cn", "position": "PhD student;PhD student;Undergrad student;Undergrad student;Senior Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nzhao2023dualafford,\ntitle={DualAfford: Learning Collaborative Visual Affordance for Dual-gripper Manipulation},\nauthor={Yan Zhao and Ruihai Wu and Zhehuan Chen and Yourong Zhang and Qingnan Fan and Kaichun Mo and Hao Dong},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=I_YZANaz5X}\n}", "github": "", "project": "", "reviewers": "RF6q;VYuj;yumu", "pdf_size": 33253292, "recommendation": "6;8;8", "confidence": "3;4;4", "correctness": "3;3;4", "technical_novelty": "2;2;3", "empirical_novelty": "3;2;4", "wc_summary_paper": "147;169;30", "wc_strength_and_weaknesses": "261;623;200", "wc_clarity_quality_novelty_and_reproducibility": "113;35;49", "wc_summary_review": "298;65;22", "wc_review": "819;892;301", "wc_reply_reviewers": "11;0;15", "wc_reply_authors": "1559;3104;391", "reply_reviewers": "1;0;1", "reply_authors": "3;7;2", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 115.33333333333333, 61.00455356410336 ], "wc_strength_and_weaknesses_avg": [ 361.3333333333333, 186.6946407610269 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.66666666666667, 33.95421754199158 ], "wc_summary_review_avg": [ 128.33333333333334, 121.24997136311231 ], "wc_review_avg": [ 670.6666666666666, 263.08722676878267 ], "wc_reply_reviewers_avg": [ 8.666666666666666, 6.342099196813483 ], "wc_reply_authors_avg": [ 1684.6666666666667, 1111.1364552665088 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 4.0, 2.160246899469287 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.9999999999999998, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=304392083953863626&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=I_YZANaz5X", "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;tencent.com;nvidia.com;pku.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;1;2;0", "aff_unique_norm": "Peking University;Tencent;NVIDIA", "aff_unique_dep": ";Tencent AI Lab;NVIDIA Corporation", "aff_unique_url": "http://www.pku.edu.cn;https://ai.tencent.com;https://www.nvidia.com", "aff_unique_abbr": "Peking U;Tencent AI Lab;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "IajGRJuM7D3", "title": "Stable, Efficient, and Flexible Monotone Operator Implicit Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "We propose stable, efficient, and flexible implicit graph neural networks leveraging monotone operator theory", "abstract": "Implicit graph neural networks (IGNNs) that solve a fixed-point equilibrium equation for representation learning can learn the long-range dependencies (LRD) in the underlying graphs and show remarkable performance for various graph learning tasks. However, the expressivity of IGNNs is limited by the constraints for their well-posedness guarantee. Moreover, when IGNNs become effective for learning LRD, their eigenvalues converge to the value that slows down the convergence, and their performance is unstable across different tasks. In this paper, we provide a new well-posedness condition of IGNNs leveraging monotone operator theory. The new well-posedness characterization informs us to design effective parameterizations to improve the accuracy, efficiency, and stability of IGNNs. Leveraging accelerated operator splitting schemes and graph diffusion convolution, we design efficient and flexible implementations of monotone operator IGNNs that are significantly faster and more accurate than existing IGNNs.", "keywords": "implicit graph neural networks;monotone operator;accelerated operator splitting;orthogonal parameterization", "primary_area": "", "supplementary_material": "/attachment/4ef3b0e89100df55cb5de7c0db98ddd5a37b9f85.zip", "author": "Justin Baker;Qingsong Wang;Bao Wang", "authorids": "~Justin_Baker1;~Qingsong_Wang1;~Bao_Wang1", "gender": ";;M", "homepage": ";;https://www.math.utah.edu/~bwang/index.html", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Justin_Baker1;~Qingsong_Wang1;~Bao_Wang1", "aff": ";;University of Utah", "aff_domain": ";;utah.edu", "position": ";;Assistant Professor", "bibtex": "@misc{\nbaker2023stable,\ntitle={Stable, Efficient, and Flexible Monotone Operator Implicit Graph Neural Networks},\nauthor={Justin Baker and Qingsong Wang and Bao Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=IajGRJuM7D3}\n}", "github": "", "project": "", "reviewers": "FLVV;Y8Ag;Ti7k;h7JA", "site": "https://openreview.net/forum?id=IajGRJuM7D3", "pdf_size": 1119285, "recommendation": "5;5;6;6", "confidence": "4;2;4;4", "correctness": "3;3;3;2", "technical_novelty": "3;2;3;2", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "58;120;63;55", "wc_strength_and_weaknesses": "451;60;412;404", "wc_clarity_quality_novelty_and_reproducibility": "51;550;71;16", "wc_summary_review": "45;63;40;48", "wc_review": "605;793;586;523", "wc_reply_reviewers": "73;217;221;23", "wc_reply_authors": "2689;2038;2170;2124", "reply_reviewers": "1;1;2;1", "reply_authors": "5;5;5;6", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 74.0, 26.711420778386163 ], "wc_strength_and_weaknesses_avg": [ 331.75, 157.89929543858008 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 172.0, 219.12439389533972 ], "wc_summary_review_avg": [ 49.0, 8.573214099741124 ], "wc_review_avg": [ 626.75, 100.6687016902473 ], "wc_reply_reviewers_avg": [ 133.5, 87.31981447529536 ], "wc_reply_authors_avg": [ 2255.25, 254.86798053109771 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 5.25, 0.4330127018922193 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:uBM2DT9q5lEJ:scholar.google.com/&scioq=Stable,+Efficient,+and+Flexible+Monotone+Operator+Implicit+Graph+Neural+Networks&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of Utah", "aff_unique_dep": "", "aff_unique_url": "https://www.utah.edu", "aff_unique_abbr": "Utah", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Diffusion Models for Causal Discovery via Topological Ordering", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11398", "id": "Idusfje4-Wq", "poster": "/media/PosterPDFs/ICLR%202023/11398.png?t=1680861855.0333543", "openreview": "https://openreview.net/forum?id=Idusfje4-Wq", "slides": "https://iclr.cc/virtual/2023/poster/11398", "video": "https://iclr.cc/virtual/2023/poster/11398", "author_site": "Pedro Sanchez, Xiao Liu, Alison O'Neil, Sotirios Tsaftaris", "tldr": "We use diffusion models for causal discovery by iteratively finding and removing leaves in causal graph, resulting in a efficient topological ordering algorithm for high-dimensional graphs.", "abstract": "Discovering causal relations from observational data becomes possible with additional assumptions such as considering the functional relations to be constrained as nonlinear with additive noise (ANM). Even with strong assumptions, causal discovery involves an expensive search problem over the space of directed acyclic graphs (DAGs). \\emph{Topological ordering} approaches reduce the optimisation space of causal discovery by searching over a permutation rather than graph space.\nFor ANMs, the \\emph{Hessian} of the data log-likelihood can be used for finding leaf nodes in a causal graph, allowing its topological ordering. However, existing computational methods for obtaining the Hessian still do not scale as the number of variables and the number of samples are increased. Therefore, inspired by recent innovations in diffusion probabilistic models (DPMs), we propose \\emph{DiffAN}, a topological ordering algorithm that leverages DPMs for learning a Hessian function. We introduce theory for updating the learned Hessian without re-training the neural network, and we show that computing with a subset of samples gives an accurate approximation of the ordering, which allows scaling to datasets with more samples and variables. We show empirically that our method scales exceptionally well to datasets with up to $500$ nodes and up to $10^5$ samples while still performing on par over small datasets with state-of-the-art causal discovery methods.\nImplementation is available at \\url{https://github.com/vios-s/DiffAN} .", "keywords": "Diffusion Models;Causal Discovery;Topological Ordering;Score-based Methods", "primary_area": "", "supplementary_material": "", "author": "Pedro Sanchez;Xiao Liu;Alison Q O'Neil;Sotirios A. Tsaftaris", "authorids": "~Pedro_Sanchez1;~Xiao_Liu13;~Alison_Q_O'Neil1;~Sotirios_A._Tsaftaris1", "gender": "M;M;;F", "homepage": "https://vios.science/team/sanchez;https://www.eng.ed.ac.uk/about/people/mr-xiao-liu;https://vios.science/;", "dblp": "14/8283;;14/613;167/9700", "google_scholar": "KPchGe4AAAAJ;Cxyloc4AAAAJ;jC1uFnYAAAAJ;", "orcid": "0000-0003-2435-3049;;;", "linkedin": "https://linkedin.com/in/pedro-sanches-ppsg;xiao-liu-020b59158/;;", "or_profile": "~Pedro_Sanchez1;~Xiao_Liu13;~Sotirios_A._Tsaftaris1;~Alison_O'Neil1", "aff": "University of Edinburgh, University of Edinburgh;University of Edinburgh;University of Edinburgh;Canon Medical Research Europe", "aff_domain": "ed.ac.uk;ed.ac.uk;ed.ac.uk;medical.canon", "position": "PhD student;PhD student;Professor in machine learning and computer vision;Scientist", "bibtex": "@inproceedings{\nsanchez2023diffusion,\ntitle={Diffusion Models for Causal Discovery via Topological Ordering},\nauthor={Pedro Sanchez and Xiao Liu and Alison Q O'Neil and Sotirios A. Tsaftaris},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Idusfje4-Wq}\n}", "github": "", "project": "", "reviewers": "YaE5;1RL4;vVJW;F1bu", "pdf_size": 799934, "recommendation": "5;5;6;8", "confidence": "4;4;3;3", "correctness": "3;2;4;3", "technical_novelty": "4;2;3;3", "empirical_novelty": "2;2;0;3", "wc_summary_paper": "53;94;79;57", "wc_strength_and_weaknesses": "276;295;64;60", "wc_clarity_quality_novelty_and_reproducibility": "83;54;155;25", "wc_summary_review": "104;39;69;21", "wc_review": "516;482;367;163", "wc_reply_reviewers": "270;294;0;0", "wc_reply_authors": "1026;1104;389;133", "reply_reviewers": "2;1;0;0", "reply_authors": "3;2;1;1", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 70.75, 16.67895380412093 ], "wc_strength_and_weaknesses_avg": [ 173.75, 111.96065157009403 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 79.25, 48.30307961196677 ], "wc_summary_review_avg": [ 58.25, 31.491070162825526 ], "wc_review_avg": [ 382.0, 137.96919946132905 ], "wc_reply_reviewers_avg": [ 141.0, 141.25508840392263 ], "wc_reply_authors_avg": [ 663.0, 412.98486655082166 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8164965809277259, "corr_recommendation_correctness": 0.28867513459481287, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9889748450746301237&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=Idusfje4-Wq", "email": "ed.ac.uk;ed.ac.uk;ed.ac.uk;medical.canon", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Edinburgh;Canon Medical Research Europe", "aff_unique_dep": ";", "aff_unique_url": "https://www.ed.ac.uk;https://www.canon-europe.com/en/research-development/canon-medical-research-europe", "aff_unique_abbr": "Edinburgh;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "Iewi8zwGsZr", "title": "Promoting Semantic Connectivity: Dual Nearest Neighbors Contrastive Learning for Unsupervised Domain Generalization", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Domain Generalization (DG) has achieved great success in generalizing knowledge from source domains to unseen target domains. However, current DG methods rely heavily on labeled source data, which are usually costly and unavailable. Thus, we study a more practical unsupervised domain generalization (UDG) problem. Learning invariant visual representation from different views, i.e., contrastive learning, promises well semantic features for in-domain unsupervised learning. However, it fails in cross-domain scenarios. In this paper, we first delve into the failure of vanilla contrastive learning and point out that semantic connectivity is the key to UDG. Specifically, suppressing the intra-domain connectivity and encouraging the intra-class connectivity help to learn the domain-invariant semantic information. Then, we propose a novel unsupervised domain generalization approach, namely Dual Nearest Neighbors contrastive learning with strong Augmentation (DN$^2$A). DN$^2$A leverages strong augmentations to suppress the intra-domain connectivity and proposes a novel dual nearest neighbors search strategy to find trustworthy cross domain neighbors along with in-domain neighbors to encourage intra-class connectivity. Experimental results demonstrate that our DN$^2$A outperforms the state-of-the-art by a large margin, e.g., 12.01% and 13.11% accuracy gain with only 1% labels for linear evaluation on PACS and DomainNet, respectively. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yuchen Liu;Yaoming Wang;Yabo Chen;Wenrui Dai;Chenglin Li;Junni Zou;Hongkai Xiong", "authorids": "~Yuchen_Liu4;~Yaoming_Wang1;~Yabo_Chen1;~Wenrui_Dai1;~Chenglin_Li2;~Junni_Zou1;~Hongkai_Xiong1", "gender": ";;M;;M;F;M", "homepage": "https://min.sjtu.edu.cn/;;;;https://min.sjtu.edu.cn/En/FacultyShow/4?Vid=17;http://www.cs.sjtu.edu.cn/~zou-jn;http://min.sjtu.edu.cn", "dblp": "69/10440-6;;96/8624.html;16/5135.html;;91/4613;21/3569", "google_scholar": "https://scholar.google.com.hk/citations?user=GRcH3nAAAAAJ;;6aHx1rgAAAAJ;Xg8MhyAAAAAJ;ltW2JMcAAAAJ;https://scholar.google.com/citations?hl=zh-CN;bB16iN4AAAAJ", "orcid": "0000-0002-3096-448X;;;;;;0000-0003-4552-0029", "linkedin": ";;;;;;", "or_profile": "~Yuchen_Liu4;~Yaoming_Wang1;~Yabo_Chen1;~Wenrui_Dai1;~Chenglin_Li2;~Junni_Zou1;~Hongkai_Xiong1", "aff": "Shanghai Jiaotong University;;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;;PhD student;Associate Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@misc{\nliu2023promoting,\ntitle={Promoting Semantic Connectivity: Dual Nearest Neighbors Contrastive Learning for Unsupervised Domain Generalization},\nauthor={Yuchen Liu and Yaoming Wang and Yabo Chen and Wenrui Dai and Chenglin Li and Junni Zou and Hongkai Xiong},\nyear={2023},\nurl={https://openreview.net/forum?id=Iewi8zwGsZr}\n}", "github": "", "project": "", "reviewers": "fWJs;nXCD;2DB3;RWUy", "site": "https://openreview.net/forum?id=Iewi8zwGsZr", "pdf_size": 14638282, "recommendation": "3;5;6;6", "confidence": "5;3;4;3", "correctness": "2;3;4;3", "technical_novelty": "2;3;3;4", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "104;97;79;45", "wc_strength_and_weaknesses": "296;261;286;274", "wc_clarity_quality_novelty_and_reproducibility": "54;40;25;3", "wc_summary_review": "15;42;6;47", "wc_review": "469;440;396;369", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "820;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "2;0;0;0", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 81.25, 22.829531313629722 ], "wc_strength_and_weaknesses_avg": [ 279.25, 13.102957681378658 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.5, 18.9010581714358 ], "wc_summary_review_avg": [ 27.5, 17.38533865071371 ], "wc_review_avg": [ 418.5, 38.62965182343739 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 205.0, 355.0704155516198 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.5, 0.8660254037844386 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.7385489458759963, "corr_recommendation_correctness": 0.8660254037844386, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3246019407872400609&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "IfxsiXMZoNX", "title": "Debiasing the Pre-trained Language Model Through Fine-tuning the Downstream Tasks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Recent studies have revealed that the widely-used pre-trained language models propagate societal biases from the large unmoderated pre-training corpora. Existing solutions mostly focused on debiasing the pre-training corpora or embedding models. Thus, these approaches need a separate pre-training process and extra training datasets which are resource-intensive and costly. Indeed, studies showed that these approaches hurt the models' performance on downstream tasks. In this study, we focus on gender debiasing and propose Gender-tuning, which comprises of the two training processes: gender-word perturbation and fine-tuning. This combination aims to interrupt gender word association with other words in training examples and classifies the perturbed example according to the ground-truth label. Gender-tuning uses a joint-loss for training both the perturbation model and fine-tuning. Comprehensive experiments show that Gender-tuning effectively reduces gender biases scores in pre-trained language models and, at the same time, improves performance on downstream tasks. Gender-tuning is applicable as a plug-and-play debiasing tool for pre-trained language models. The source\ncode and pre-trained models will be available on the author\u2019s GitHub page.", "keywords": "NLP;Debiasing pre-trained langugae model;Social biases;Robustness", "primary_area": "", "supplementary_material": "", "author": "Somayeh Ghanbarzadeh;Yan Huang;Hamid Palangi;Radames Cruz Moreno;Hamed Khanpour", "authorids": "~Somayeh_Ghanbarzadeh1;~Yan_Huang5;~Hamid_Palangi1;~Radames_Cruz_Moreno1;~Hamed_Khanpour2", "gender": "F;F;M;M;M", "homepage": ";http://www.cs.unt.edu;https://www.hamidpalangi.com/;;", "dblp": ";75/6434-2.html;01/963;;191/6085", "google_scholar": ";dRc-rTAAAAAJ;https://scholar.google.ca/citations?user=B1lAghgAAAAJ;;", "orcid": ";;;;", "linkedin": "somayeh-ghanbarzadeh-07aa178b/;;;radamescruz;", "or_profile": "~Somayeh_Ghanbarzadeh1;~Yan_Huang5;~Hamid_Palangi1;~Radames_Cruz_Moreno1;~Hamed_Khanpour2", "aff": ";, University of North Texas;Google;Microsoft;", "aff_domain": ";cse.unt.edu;google.com;microsoft.com;", "position": ";Full Professor;Staff Research Scientist;Software Engineer;", "bibtex": "@misc{\nghanbarzadeh2023,\ntitle={ Debiasing the Pre-trained Language Model Through Fine-tuning the Downstream Tasks},\nauthor={Somayeh Ghanbarzadeh and Yan Huang and Hamid Palangi and Radames Cruz Moreno and Hamed Khanpour},\nyear={2023},\nurl={https://openreview.net/forum?id=IfxsiXMZoNX}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=IfxsiXMZoNX", "pdf_size": 281303, "recommendation": "", "confidence": "", "correctness": "", "technical_novelty": "", "empirical_novelty": "", "wc_summary_paper": "", "wc_strength_and_weaknesses": "", "wc_clarity_quality_novelty_and_reproducibility": "", "wc_summary_review": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "recommendation_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "correctness_avg": [ 0, 0 ], "technical_novelty_avg": [ 0, 0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 0, 0 ], "wc_strength_and_weaknesses_avg": [ 0, 0 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 0, 0 ], "wc_summary_review_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 1, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0, "corr_recommendation_correctness": 0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5997859667342405273&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of North Texas;Google;Microsoft", "aff_unique_dep": ";Google;Microsoft Corporation", "aff_unique_url": "https://www.unt.edu;https://www.google.com;https://www.microsoft.com", "aff_unique_abbr": "UNT;Google;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Ih0fKoIUyEh", "title": "Wide Graph Neural Network", "track": "main", "status": "Reject", "tldr": "This paper proposes a unified view to understend GNNs, and it motivates a new model called wide graph neural network.", "abstract": "Usually, graph neural networks (GNNs) suffer from several problems, e.g., over-smoothing (in the spatial domain), poor flexibility (in the spectral domain), and low performance on heterophily (in both domains). In this paper, we provide a new GNN framework, called Wide Graph Neural Networks (WGNN) to solve these problems. It is motivated by our proposed unified view of GNNs from the perspective of dictionary learning. In light of this view, we formulate the graph learning in GNNs as learning representations from the dictionaries, where the fixed graph information is regarded as the dictionary and the trainable parameters are representations. Then, the dictionaries of spatial GNNs encode the adjacency matrix multiplication, while spectral ones sum its polynomials. Differently, WGNN directly concatenates all polynomials as the dictionary, where each polynomial is a sub-dictionary. Beyond polynomials, WGNN allows sub-dictionaries with an arbitrary size, for instance, the principal components of the adjacency matrix. This wide concatenation structure enjoys the great capability of avoiding over-smoothing and promoting flexibility, while the supplement of principal components can significantly improve the representation of heterophilic graphs. We provide a detailed theoretical analysis and conduct extensive experiments on eight datasets to demonstrate the superiority of the proposed WGNN. ", "keywords": "Graph neural networks;represenation learning;dictionary learning", "primary_area": "", "supplementary_material": "/attachment/e74f872aca0c6075fd99816f291d74b400df0f3a.zip", "author": "Jiaqi Sun;Lin Zhang;Guangyi Chen;Kun Zhang;Peng XU;Yujiu Yang", "authorids": "~Jiaqi_Sun1;~Lin_Zhang9;~Guangyi_Chen1;~Kun_Zhang1;~Peng_XU10;~Yujiu_Yang2", "gender": ";;M;M;;M", "homepage": ";;https://chengy12.github.io/;http://www.andrew.cmu.edu/user/kunz1/;;https://sites.google.com/view/iigroup-thu", "dblp": ";;c/GuangyiChen-2;96/3115-1;;30/3847", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;RGoypN4AAAAJ;;4gH3sxsAAAAJ", "orcid": ";;;;;0000-0002-6427-1024", "linkedin": ";;;;;", "or_profile": "~Jiaqi_Sun1;~Lin_Zhang9;~Guangyi_Chen1;~Kun_Zhang1;~Peng_XU10;~Yujiu_Yang2", "aff": ";;Carnegie Mellon University;Carnegie Mellon University;;Tsinghua University", "aff_domain": ";;cmu.edu;cmu.edu;;tsinghua.edu.cn", "position": ";;Postdoc;Associate Professor;;Associate Professor", "bibtex": "@misc{\nsun2023wide,\ntitle={Wide Graph Neural Network},\nauthor={Jiaqi Sun and Lin Zhang and Guangyi Chen and Kun Zhang and Peng XU and Yujiu Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=Ih0fKoIUyEh}\n}", "github": "", "project": "", "reviewers": "KH5p;HybF;Hwom;Uh1P", "site": "https://openreview.net/forum?id=Ih0fKoIUyEh", "pdf_size": 3016078, "recommendation": "1;3;6;6", "confidence": "4;4;4;3", "correctness": "1;3;4;3", "technical_novelty": "1;2;3;3", "empirical_novelty": "1;2;4;3", "wc_summary_paper": "62;31;41;73", "wc_strength_and_weaknesses": "497;293;372;292", "wc_clarity_quality_novelty_and_reproducibility": "67;47;43;24", "wc_summary_review": "32;17;79;69", "wc_review": "658;388;535;458", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "57;55;57;57", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 2.1213203435596424 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 1.0897247358851685 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 51.75, 16.60383991732033 ], "wc_strength_and_weaknesses_avg": [ 363.5, 83.63163277133839 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.25, 15.270478054075452 ], "wc_summary_review_avg": [ 49.25, 25.557533136044253 ], "wc_review_avg": [ 509.75, 100.1458311663546 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 56.5, 0.8660254037844386 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5443310539518174, "corr_recommendation_correctness": 0.8651809126974003, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16011321519748040522&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "Carnegie Mellon University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "CMU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;China" }, { "id": "IiDeZZZ18zi", "title": "ChemSpacE: Interpretable and Interactive Chemical Space Exploration", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Discovering meaningful molecules in the vast combinatorial chemical space has been a long-standing challenge in many fields from materials science to drug discovery. Recent advances in machine learning, especially generative models, have made remarkable progress and demonstrate considerable promise for automated molecule design. Nevertheless, most molecule generative models remain black-box systems, whose utility is limited by a lack of interpretability and human participation in the generation process. In this work we propose \\textbf{Chem}ical \\textbf{Spac}e \\textbf{E}xplorer (ChemSpacE), a simple yet effective method for exploring the chemical space with pre-trained deep generative models. It enables users to interact with existing generative models and inform the molecule generation process. \nWe demonstrate the efficacy of ChemSpacE on the molecule optimization task and the molecule manipulation task in single property and multi-property settings. On the molecule optimization task, the performance of ChemSpacE is on par with previous black-box optimization methods yet is considerably faster and more sample efficient. Furthermore, the interface from ChemSpacE facilitates human-in-the-loop chemical space exploration and interactive molecule design.", "keywords": "Molecule Generation;Molecule Manipulation;Human-in-the-loop Molecule Design;Chemical Space Exploration", "primary_area": "", "supplementary_material": "/attachment/36837bb2d7a60968e069cea560a8d7e8019cf003.zip", "author": "Yuanqi Du;Xian Liu;Nilay Mahesh Shah;Shengchao Liu;Jieyu Zhang;Bolei Zhou", "authorids": "~Yuanqi_Du1;~Xian_Liu1;~Nilay_Mahesh_Shah1;~Shengchao_Liu1;~Jieyu_Zhang1;~Bolei_Zhou5", "gender": "M;M;M;M;M;M", "homepage": "https://yuanqidu.github.io/;https://alvinliu0.github.io/;;https://chao1224.github.io/;https://jieyuz2.github.io/;https://boleizhou.github.io/", "dblp": "266/2837;;;;;46/8066", "google_scholar": "fAc_zZMAAAAJ;https://scholar.google.com/citations?hl=en-us;;F1ws3XUAAAAJ;T_INUHUAAAAJ;9D4aG8AAAAAJ", "orcid": ";0000-0001-9817-7418;;0000-0003-2030-2367;0000-0002-1846-2436;", "linkedin": ";xian-liu-9840b52a3/;nilaymshah/;;jieyu-zhang-3baaa8154/;", "or_profile": "~Yuanqi_Du1;~Xian_Liu1;~Nilay_Mahesh_Shah1;~Shengchao_Liu1;~Jieyu_Zhang1;~Bolei_Zhou5", "aff": "Cornell University;The Chinese University of Hong Kong;;MILA-UdeM;University of Washington;University of California, Los Angeles", "aff_domain": "cornell.edu;cuhk.edu.hk;;mila.quebec;cs.washington.edu;ucla.edu", "position": "PhD student;PhD student;;PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\ndu2023chemspace,\ntitle={ChemSpacE: Interpretable and Interactive Chemical Space Exploration},\nauthor={Yuanqi Du and Xian Liu and Nilay Mahesh Shah and Shengchao Liu and Jieyu Zhang and Bolei Zhou},\nyear={2023},\nurl={https://openreview.net/forum?id=IiDeZZZ18zi}\n}", "github": "", "project": "", "reviewers": "jFQ4;mVMw;ohmp;89EC", "site": "https://openreview.net/forum?id=IiDeZZZ18zi", "pdf_size": 3253086, "recommendation": "3;3;5;5", "confidence": "3;3;3;2", "correctness": "2;4;2;4", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "97;84;58;67", "wc_strength_and_weaknesses": "213;254;284;80", "wc_clarity_quality_novelty_and_reproducibility": "661;50;34;24", "wc_summary_review": "121;30;48;207", "wc_review": "1092;418;424;378", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 1.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 76.5, 15.074813431681335 ], "wc_strength_and_weaknesses_avg": [ 207.75, 77.94348914437947 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 192.25, 270.79177886339164 ], "wc_summary_review_avg": [ 101.5, 69.79434074479104 ], "wc_review_avg": [ 578.0, 297.28437564056406 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17502106062879348873&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Cornell University;Chinese University of Hong Kong;Mila;University of Washington;University of California, Los Angeles", "aff_unique_dep": ";;Montreal Institute for Learning Algorithms;;", "aff_unique_url": "https://www.cornell.edu;https://www.cuhk.edu.hk;https://mila.quebec;https://www.washington.edu;https://www.ucla.edu", "aff_unique_abbr": "Cornell;CUHK;MILA;UW;UCLA", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Hong Kong SAR;Los Angeles", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "United States;China;Canada" }, { "id": "Iki4ufHeEGN", "title": "Hybrid and Collaborative Passage Reranking", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In information retrieval system, the initial passage retrieval results may be unsatisfactory, which can be refined by a reranking scheme. Existing solutions to passage reranking focus on enriching the interaction between query and each passage separately, neglecting the context among the top-ranked passages in the initial retrieval list. To tackle this problem, we propose a Hybrid and Collaborative Passage Reranking (HybRank) method, which leverages the substantial similarity measurements of upstream retrievers for passage collaboration and incorporates the lexical and semantic properties of sparse and dense retrievers for reranking. Besides, built on off-the-shelf retriever features, the flexible plug-in HybRank is capable of enhancing arbitrary passage list. Extensive experiments demonstrate the stable improvements of performance over prevalent retrieval methods, and verify the effectiveness of the core components in HybRank.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Zongmeng Zhang;Wengang Zhou;Jiaxin Shi;Houqiang Li", "authorids": "~Zongmeng_Zhang1;~Wengang_Zhou1;~Jiaxin_Shi3;~Houqiang_Li1", "gender": "M;M;M;M", "homepage": "https://zmzhang2000.github.io/;http://staff.ustc.edu.cn/~zhwg/index.html;;https://staff.ustc.edu.cn/~lihq/", "dblp": "303/1427;22/4544-1;;59/7017.html", "google_scholar": "yKVZMKMAAAAJ;8s1JF8YAAAAJ;8XcQHUEAAAAJ;7sFMIKoAAAAJ", "orcid": "0000-0003-3880-8913;0000-0003-1690-9836;;0000-0003-2188-3028", "linkedin": ";;;", "or_profile": "~Zongmeng_Zhang1;~Wengang_Zhou1;~Jiaxin_Shi3;~Houqiang_Li1", "aff": "University of Science and Technology of China;University of Science and Technology of China;Huawei Technologies Ltd.;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;huawei.com;ustc.edu.cn", "position": "PhD student;Full Professor;Researcher;Professor", "bibtex": "@misc{\nzhang2023hybrid,\ntitle={Hybrid and Collaborative Passage Reranking},\nauthor={Zongmeng Zhang and Wengang Zhou and Jiaxin Shi and Houqiang Li},\nyear={2023},\nurl={https://openreview.net/forum?id=Iki4ufHeEGN}\n}", "github": "", "project": "", "reviewers": "r4pr;aG4A;2aeu", "site": "https://openreview.net/forum?id=Iki4ufHeEGN", "pdf_size": 377487, "recommendation": "3;5;5", "confidence": "4;3;4", "correctness": "3;3;4", "technical_novelty": "2;3;2", "empirical_novelty": "2;2;3", "wc_summary_paper": "90;58;166", "wc_strength_and_weaknesses": "224;337;45", "wc_clarity_quality_novelty_and_reproducibility": "42;118;7", "wc_summary_review": "41;68;38", "wc_review": "397;581;256", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1162;1371;357", "reply_reviewers": "0;0;0", "reply_authors": "2;3;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 104.66666666666667, 45.29410067056808 ], "wc_strength_and_weaknesses_avg": [ 202.0, 120.21924416110204 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.666666666666664, 46.33453235858639 ], "wc_summary_review_avg": [ 49.0, 13.490737563232042 ], "wc_review_avg": [ 411.3333333333333, 133.06723446772648 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 963.3333333333334, 437.1501140594867 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4135178818011643921&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Science and Technology of China;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "http://www.ustc.edu.cn;https://www.huawei.com", "aff_unique_abbr": "USTC;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "IlkQffBxiC7", "title": "Breaking Large Language Model-based Code Generation", "track": "main", "status": "Withdraw", "tldr": "We present BreaC, a novel method for breaking large language model-based code generators such that they excessively generate erroneous code.", "abstract": "We propose BreaC, a new method for attacking large language models (LLMs) to excessively generate erroneous code. BreaC works by training a class-conditional language model (CCLM) that conditions code generation on a binary attribute specifying whether the output code should contain errors. The CCLM is not only able to generate erroneous programs but can also control other, much larger LLMs to do so without access to their weights. The training of the CCLM leverages unlikelihood training, as well as reinforcement learning that treats the two generation branches of the CCLM as adversaries. We instantiate BreaC on the task of generating code with compilation and parsing errors. Our extensive evaluation demonstrates that BreaC is effective in both adversarial and benign scenarios. For the adversarial scenario, BreaC greatly reduces the compilation rate of various LLMs while maintaining the perplexity of generated programs. For the benign scenario, BreaC is able to produce realistic erroneous programs from correct programs, enabling one to construct parallel training datasets. We demonstrate the high utility of these datasets by training neural bug fixers that significantly surpass the state-of-the-art.", "keywords": "large language models;code generation;controlled generation;attacks;reliability;reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Jingxuan He;Jiacheng Shen;Martin Vechev", "authorids": "~Jingxuan_He1;~Jiacheng_Shen1;~Martin_Vechev1", "gender": "M;M;M", "homepage": "https://www.sri.inf.ethz.ch/people/jingxuan;https://jachinshen.github.io/;https://www.sri.inf.ethz.ch/people/martin", "dblp": ";;93/2189.html", "google_scholar": "ylHZY58AAAAJ;;https://scholar.google.ch/citations?user=aZ1Rh50AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jingxuan_He1;~Jiacheng_Shen1;~Martin_Vechev1", "aff": "ETHZ - ETH Zurich;;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;;ethz.ch", "position": "PhD student;;Full Professor", "bibtex": "@misc{\nhe2023breaking,\ntitle={Breaking Large Language Model-based Code Generation},\nauthor={Jingxuan He and Jiacheng Shen and Martin Vechev},\nyear={2023},\nurl={https://openreview.net/forum?id=IlkQffBxiC7}\n}", "github": "", "project": "", "reviewers": "yVCQ;3XcL;gHAT", "site": "https://openreview.net/forum?id=IlkQffBxiC7", "pdf_size": 315514, "recommendation": "3;3;6", "confidence": "4;3;3", "correctness": "2;2;4", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "47;140;122", "wc_strength_and_weaknesses": "676;247;84", "wc_clarity_quality_novelty_and_reproducibility": "59;51;63", "wc_summary_review": "38;43;27", "wc_review": "820;481;296", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "205;90;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;0", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 103.0, 40.27406113120453 ], "wc_strength_and_weaknesses_avg": [ 335.6666666666667, 249.68291001899902 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.666666666666664, 4.988876515698588 ], "wc_summary_review_avg": [ 36.0, 6.683312551921141 ], "wc_review_avg": [ 532.3333333333334, 216.9797737629529 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 98.33333333333333, 83.8980863243548 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.6666666666666666, 0.4714045207910317 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TVbiT-b_om4J:scholar.google.com/&scioq=Breaking+Large+Language+Model-based+Code+Generation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Accurate Image Restoration with Attention Retractable Transformer", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12005", "id": "IloMJ5rqfnt", "poster": "/media/PosterPDFs/ICLR%202023/12005.png?t=1680759972.84462", "openreview": "https://openreview.net/forum?id=IloMJ5rqfnt", "slides": "https://iclr.cc/virtual/2023/poster/12005", "video": "https://iclr.cc/virtual/2023/poster/12005", "author_site": "Jiale Zhang, Yulun Zhang, Jinjin Gu, Yongbing Zhang, Linghe Kong, Xin Yuan", "tldr": "A new SOTA image restoration method attention retractable Transformer.", "abstract": "Recently, Transformer-based image restoration networks have achieved promising improvements over convolutional neural networks due to parameter-independent global interactions. To lower computational cost, existing works generally limit self-attention computation within non-overlapping windows. However, each group of tokens are always from a dense area of the image. This is considered as a dense attention strategy since the interactions of tokens are restrained in dense regions. Obviously, this strategy could result in restricted receptive fields. To address this issue, we propose \\textbf{A}ttention \\textbf{R}etractable \\textbf{T}ransformer (ART) for image restoration, which presents both dense and sparse attention modules in the network. The sparse attention module allows tokens from sparse areas to interact and thus provides a wider receptive field. Furthermore, the alternating application of dense and sparse attention modules greatly enhances representation ability of Transformer while providing retractable attention on the input image.We conduct extensive experiments on image super-resolution, denoising, and JPEG compression artifact reduction tasks. Experimental results validate that our proposed ART outperforms state-of-the-art methods on various benchmark datasets both quantitatively and visually. We also provide code and models at~\\url{https://github.com/gladzhang/ART}.", "keywords": "Image restoration;Dense and sparse attention", "primary_area": "", "supplementary_material": "/attachment/77fe90ca1af98ee50e60942c3068310e2c0a3ee8.zip", "author": "Jiale Zhang;Yulun Zhang;Jinjin Gu;Yongbing Zhang;Linghe Kong;Xin Yuan", "authorids": "~Jiale_Zhang3;~Yulun_Zhang1;~Jinjin_Gu1;~Yongbing_Zhang1;~Linghe_Kong1;~Xin_Yuan4", "gender": "M;M;M;M;M;M", "homepage": ";http://yulunzhang.com/;http://www.jasongt.com;;https://www.cs.sjtu.edu.cn/~linghe.kong/;https://en.westlake.edu.cn/faculty/xin-yuan.html", "dblp": ";166/2763-1.html;209/5709;95/5329;23/7909;78/713-2", "google_scholar": ";ORmLjWoAAAAJ;uMQ-G-QAAAAJ;;https://scholar.google.com.tw/citations?user=-wm2X-8AAAAJ;cS9CbWkAAAAJ", "orcid": ";0000-0002-2288-5079;0000-0002-4389-6236;;0000-0001-9266-3044;0000-0002-8311-7524", "linkedin": "zhangjiale01801423b;yulun-zhang-1116b5b9/;jinjingu;;;xin-yuan-0024bb31/", "or_profile": "~Jiale_Zhang3;~Yulun_Zhang1;~Jinjin_Gu1;~Yongbing_Zhang1;~Linghe_Kong1;~Xin_Yuan4", "aff": "Shanghai Jiaotong University;Swiss Federal Institute of Technology;University of Sydney;Harbin Institute of Technology;Shanghai Jiaotong University;Westlake University", "aff_domain": "sjtu.edu.cn;ethz.ch;sydney.edu.au;hit.edu.cn;sjtu.edu.cn;westlake.edu.cn", "position": "MS student;Postdoc;PhD student;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2023accurate,\ntitle={Accurate Image Restoration with Attention Retractable Transformer},\nauthor={Jiale Zhang and Yulun Zhang and Jinjin Gu and Yongbing Zhang and Linghe Kong and Xin Yuan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=IloMJ5rqfnt}\n}", "github": "", "project": "", "reviewers": "dTRi;f4x2;wRmT;eLG6", "pdf_size": 2149270, "recommendation": "6;8;8;8", "confidence": "4;5;5;5", "correctness": "3;4;4;3", "technical_novelty": "3;3;4;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "78;38;57;38", "wc_strength_and_weaknesses": "209;284;403;271", "wc_clarity_quality_novelty_and_reproducibility": "38;22;38;33", "wc_summary_review": "52;8;30;39", "wc_review": "377;352;528;381", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1884;323;700;671", "reply_reviewers": "0;0;0;0", "reply_authors": "3;1;1;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 52.75, 16.513252253871737 ], "wc_strength_and_weaknesses_avg": [ 291.75, 70.20461167188378 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.75, 6.53356717268599 ], "wc_summary_review_avg": [ 32.25, 16.037066439969625 ], "wc_review_avg": [ 409.5, 69.31269724949391 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 894.5, 590.234063740818 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 134, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17330891137498389312&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=IloMJ5rqfnt", "email": "sjtu.edu.cn;ethz.ch;sydney.edu.au;hit.edu.cn;sjtu.edu.cn;westlake.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;0;4", "aff_unique_norm": "Shanghai Jiao Tong University;Swiss Federal Institute of Technology;University of Sydney;Harbin Institute of Technology;Westlake University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.ethz.ch;https://www.sydney.edu.au;http://www.hit.edu.cn/;https://www.westlake.edu.cn", "aff_unique_abbr": "SJTU;ETH Zurich;USYD;HIT;WU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;1;2;0;0;0", "aff_country_unique": "China;Switzerland;Australia" }, { "id": "Io0mSpdqnHJ", "title": "Contextual Subspace Approximation with Neural Householder Transforms", "track": "main", "status": "Reject", "tldr": "We propose a method that trains a neural network to compute a context-dependent basis for high dimensional actuation commands. ", "abstract": "Choosing an appropriate action representation is an integral part of solving robotic manipulation problems. Published approaches include latent action models which compress the control space into a low dimensional manifold. These involve training a conditional autoencoder, where the current observation and a low-dimensional action are passed through a neural network decoder to compute high dimensional actuation commands. Such models can have a large number of parameters, and can be difficult to interpret from a user perspective. In this work, we propose that similar performance gains in robotics tasks can be achieved by restructuring the neural network to map observations to a basis for a context-dependent linear actuation subspace. This results in an action interface wherein a user\u2019s actions determine a linear combination of a state-conditioned actuation basis. We introduce the Neural Householder Transform (NHT) as a method for computing this basis. Our results show that reinforcement learning agents trained with NHT in kinematic manipulation and locomotion environments tend to be more robust to hyperparameter choice and achieve higher final success rates compared to agents trained with alternative action representations. NHT agents outperformed agents trained with joint velocity/torque actions, agents trained with an SVD actuation basis, and agents trained with a LASER action interface in the WAMWipe, WAMGrasp, and HalfCheetah environments.", "keywords": "robotics;RL;representation learning", "primary_area": "", "supplementary_material": "", "author": "Kerrick Johnstonbaugh;Michael Przystupa;Jacob Keller;Martin Jagersand", "authorids": "~Kerrick_Johnstonbaugh1;~Michael_Przystupa1;jjkeller@ucsd.edu;~Martin_Jagersand1", "gender": "M;M;;M", "homepage": ";https://github.com/gamerDecathlete;;http://www.ualberta.ca/science/about-us/contact-us/faculty-directory/martin-jagersand", "dblp": ";;;", "google_scholar": "EvxlxPgAAAAJ;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Kerrick_Johnstonbaugh1;~Michael_Przystupa1;jjkeller@ucsd.edu;~Martin_Jagersand1", "aff": "Huawei Technologies Ltd.;University of Alberta;;University of Alberta", "aff_domain": "huawei.com;ualberta.ca;;", "position": "Associate Researcher;PhD student;;Full Professor", "bibtex": "@misc{\njohnstonbaugh2023contextual,\ntitle={Contextual Subspace Approximation with Neural Householder Transforms},\nauthor={Kerrick Johnstonbaugh and Michael Przystupa and Jacob Keller and Martin Jagersand},\nyear={2023},\nurl={https://openreview.net/forum?id=Io0mSpdqnHJ}\n}", "github": "", "project": "", "reviewers": "2zLC;1kbF;XySg", "site": "https://openreview.net/forum?id=Io0mSpdqnHJ", "pdf_size": 6447734, "recommendation": "5;5;5", "confidence": "4;4;4", "correctness": "4;3;3", "technical_novelty": "3;3;2", "empirical_novelty": "3;3;2", "wc_summary_paper": "75;149;93", "wc_strength_and_weaknesses": "160;175;61", "wc_clarity_quality_novelty_and_reproducibility": "123;1;93", "wc_summary_review": "170;59;38", "wc_review": "528;384;285", "wc_reply_reviewers": "66;86;0", "wc_reply_authors": "1459;547;1032", "reply_reviewers": "1;1;0", "reply_authors": "9;4;5", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 105.66666666666667, 31.510139461590597 ], "wc_strength_and_weaknesses_avg": [ 132.0, 50.57667446560717 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 72.33333333333333, 51.90589775952461 ], "wc_summary_review_avg": [ 89.0, 57.91372894228103 ], "wc_review_avg": [ 399.0, 99.76973488989535 ], "wc_reply_reviewers_avg": [ 50.666666666666664, 36.745370078721784 ], "wc_reply_authors_avg": [ 1012.6666666666666, 372.57333357191425 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 6.0, 2.160246899469287 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:J1BmaaNm3JUJ:scholar.google.com/&scioq=Contextual+Subspace+Approximation+with+Neural+Householder+Transforms&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Huawei;University of Alberta", "aff_unique_dep": "Huawei Technologies;", "aff_unique_url": "https://www.huawei.com;https://www.ualberta.ca", "aff_unique_abbr": "Huawei;UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;Canada" }, { "title": "GPViT: A High Resolution Non-Hierarchical Vision Transformer with Group Propagation", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11986", "id": "IowKt5rYWsK", "poster": "/media/PosterPDFs/ICLR%202023/11986.png?t=1681934700.63099", "openreview": "https://openreview.net/forum?id=IowKt5rYWsK", "slides": "https://iclr.cc/virtual/2023/poster/11986", "video": "https://iclr.cc/virtual/2023/poster/11986", "author_site": "Chenhongyi Yang, Jiarui Xu, Shalini De Mello, Elliot J Crowley, Xiaolong Wang", "tldr": "A high-resolution vision transformer architecture based on a new efficient global information exchange mechanism for general visual recognition.", "abstract": "We present the Group Propagation Vision Transformer (GPViT): a novel non- hierarchical (i.e. non-pyramidal) transformer model designed for general visual recognition with high-resolution features. High-resolution features (or tokens) are a natural fit for tasks that involve perceiving fine-grained details such as detection and segmentation, but exchanging global information between these features is expensive in memory and computation because of the way self-attention scales. We provide a highly efficient alternative Group Propagation Block (GP Block) to exchange global information. In each GP Block, features are first grouped to- gether by a fixed number of learnable group tokens; we then perform Group Propagation where global information is exchanged between the grouped fea- tures; finally, global information in the updated grouped features is returned back to the image features through a transformer decoder. We evaluate GPViT on a variety of visual recognition tasks including image classification, semantic seg- mentation, object detection, and instance segmentation. Our method achieves significant performance gains over previous works across all tasks, especially on tasks that require high-resolution outputs, for example, our GPViT-L3 out- performs Swin Transformer-B by 2.0 mIoU on ADE20K semantic segmentation with only half as many parameters. Code and pre-trained models are available at https://github.com/ChenhongyiYang/GPViT.", "keywords": "Visual Recognition;Vision transformer architecture", "primary_area": "", "supplementary_material": "", "author": "Chenhongyi Yang;Jiarui Xu;Shalini De Mello;Elliot J. Crowley;Xiaolong Wang", "authorids": "~Chenhongyi_Yang3;~Jiarui_Xu1;~Shalini_De_Mello1;~Elliot_J._Crowley1;~Xiaolong_Wang3", "gender": "M;M;Not Specified;M;M", "homepage": "https://chenhongyiyang.com;https://jerryxu.net/;https://research.nvidia.com/person/shalini-de-mello;https://elliotjcrowley.github.io;https://xiaolonw.github.io/", "dblp": "255/5498;;206/7364;157/3601;91/952-4", "google_scholar": "5i2hUToAAAAJ;2GKLw94AAAAJ;xQM4BlMAAAAJ;https://scholar.google.co.uk/citations?user=RyKtqiQAAAAJ;Y8O9N_0AAAAJ", "orcid": ";;;;", "linkedin": "chenhongyi-yang-9914571a1;;shalini-de-mello-02b8251/;;", "or_profile": "~Chenhongyi_Yang3;~Jiarui_Xu1;~Shalini_De_Mello1;~Elliot_J._Crowley1;~Xiaolong_Wang3", "aff": "University of Edinburgh, University of Edinburgh;University of California, San Diego;NVIDIA;University of Edinburgh;University of California, San Diego", "aff_domain": "ed.ac.uk;ucsd.edu;nvidia.com;ed.ac.uk;ucsd.edu", "position": "PhD student;PhD student;Principal Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nyang2023gpvit,\ntitle={{GPV}iT: A High Resolution Non-Hierarchical Vision Transformer with Group Propagation},\nauthor={Chenhongyi Yang and Jiarui Xu and Shalini De Mello and Elliot J. Crowley and Xiaolong Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=IowKt5rYWsK}\n}", "github": "", "project": "", "reviewers": "oVeK;vWHX;6Qyc", "pdf_size": 14390016, "recommendation": "5;8;10", "confidence": "3;3;4", "correctness": "3;3;4", "technical_novelty": "2;3;4", "empirical_novelty": "2;3;4", "wc_summary_paper": "140;74;129", "wc_strength_and_weaknesses": "511;132;112", "wc_clarity_quality_novelty_and_reproducibility": "125;71;2", "wc_summary_review": "69;36;22", "wc_review": "845;313;265", "wc_reply_reviewers": "0;0;21", "wc_reply_authors": "1124;302;182", "reply_reviewers": "0;0;1", "reply_authors": "3;1;1", "recommendation_avg": [ 7.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 114.33333333333333, 28.871362204709975 ], "wc_strength_and_weaknesses_avg": [ 251.66666666666666, 183.55804410473425 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.0, 50.33885179461288 ], "wc_summary_review_avg": [ 42.333333333333336, 19.70335560817553 ], "wc_review_avg": [ 474.3333333333333, 262.83243500163536 ], "wc_reply_reviewers_avg": [ 7.0, 9.899494936611665 ], "wc_reply_authors_avg": [ 536.0, 418.6549892214352 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.8029550685469661, "corr_recommendation_correctness": 0.8029550685469661, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12492273528184038952&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=IowKt5rYWsK", "email": "ed.ac.uk;ucsd.edu;nvidia.com;ed.ac.uk;ucsd.edu", "author_num": 5, "aff_unique_index": "0;1;2;0;1", "aff_unique_norm": "University of Edinburgh;University of California, San Diego;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://www.ed.ac.uk;https://www.ucsd.edu;https://www.nvidia.com", "aff_unique_abbr": "Edinburgh;UCSD;NVIDIA", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Voint Cloud: Multi-View Point Cloud Representation for 3D Understanding", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11854", "id": "IpGgfpMucHj", "poster": "/media/PosterPDFs/ICLR%202023/11854.png?t=1680793014.7794576", "openreview": "https://openreview.net/forum?id=IpGgfpMucHj", "slides": "https://iclr.cc/virtual/2023/poster/11854", "video": "https://iclr.cc/virtual/2023/poster/11854", "author_site": "Abdullah Hamdi, Silvio Giancola, Bernard Ghanem", "tldr": "We propose voint cloud, a novel 3D data structure, that combines multi-view and point clouds for robust 3D understanding tasks.", "abstract": "Multi-view projection methods have demonstrated promising performance on 3D understanding tasks like 3D classification and segmentation. However, it remains unclear how to combine such multi-view methods with the widely available 3D point clouds. Previous methods use unlearned heuristics to combine features at the point level. To this end, we introduce the concept of the multi-view point cloud (Voint cloud), representing each 3D point as a set of features extracted from several view-points. This novel 3D Voint cloud representation combines the compactness of 3D point cloud representation with the natural view-awareness of multi-view representation. Naturally, we can equip this new representation with convolutional and pooling operations. We deploy a Voint neural network (VointNet) to learn representations in the Voint space. Our novel representation achieves state-of-the-art performance on 3D classification, shape retrieval, and robust 3D part segmentation on standard benchmarks ( ScanObjectNN, ShapeNet Core55, and ShapeNet Parts). Further analysis shows that VointNet improves the robustness to occlusion compared to other methods.", "keywords": "multi-view;point cloud;3D understanding", "primary_area": "", "supplementary_material": "/attachment/6257674ec65bbe0c67df50d3836201606a6d5876.zip", "author": "Abdullah Hamdi;Silvio Giancola;Bernard Ghanem", "authorids": "~Abdullah_Hamdi1;~Silvio_Giancola1;~Bernard_Ghanem1", "gender": "M;M;M", "homepage": "https://silviogiancola.com/;https://ivul.kaust.edu.sa;https://abdullahamdi.com/", "dblp": "173/3571;37/2516;205/2625", "google_scholar": "2kq5Zl0AAAAJ;rVsGTeEAAAAJ;tQkWPKAAAAAJ", "orcid": "0000-0002-3937-9834;0000-0002-5534-587X;0000-0003-3989-7540", "linkedin": "silvio-giancola/;bernardghanem/;ajhamdi/", "or_profile": "~Silvio_Giancola1;~Bernard_Ghanem1;~Abdullah_Jamal_Hamdi1", "aff": "KAUST;King Abdullah University of Science and Technology;KAUST", "aff_domain": "kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "position": "Research Scientist;Full Professor;PhD student", "bibtex": "@inproceedings{\nhamdi2023voint,\ntitle={Voint Cloud: Multi-View Point Cloud Representation for 3D Understanding },\nauthor={Abdullah Hamdi and Silvio Giancola and Bernard Ghanem},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=IpGgfpMucHj}\n}", "github": "", "project": "", "reviewers": "14aj;LsWo;KkFB;aJdJ", "pdf_size": 32829097, "recommendation": "6;6;6;8", "confidence": "4;3;5;5", "correctness": "4;4;4;4", "technical_novelty": "3;3;4;2", "empirical_novelty": "3;3;0;2", "wc_summary_paper": "73;58;158;44", "wc_strength_and_weaknesses": "211;76;407;183", "wc_clarity_quality_novelty_and_reproducibility": "24;11;44;31", "wc_summary_review": "28;27;31;35", "wc_review": "336;172;640;293", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "415;219;567;410", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 83.25, 44.358623738795146 ], "wc_strength_and_weaknesses_avg": [ 219.25, 119.5332066833313 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.5, 11.926860441876563 ], "wc_summary_review_avg": [ 30.25, 3.112474899497183 ], "wc_review_avg": [ 360.25, 172.34322586049038 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 402.75, 123.43495250535805 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5222329678670935, "corr_recommendation_correctness": 0.0, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3172617976355376545&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=IpGgfpMucHj", "email": "kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaust.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Saudi Arabia" }, { "id": "IqN5SgOmxp", "title": "Reinforcement Learning using a Molecular Fragment Based Approach for Reaction Discovery", "track": "main", "status": "Withdraw", "tldr": "A multi-pronged deep learning approach using a fragment based method is applied to chemical reaction discovery", "abstract": "Deep learning methods have recently been applied to both predictive and generative tasks in the molecular space. While molecular generation and prediction of an associated property are now reasonably common, studies on reaction outcome due to the generated molecules remain less explored. Chemical reactions present a complex scenario as they involve multiple molecules and the breaking/forming of bonds. In reaction discovery, one aims to maximise yield and/or selectivity, which depends on a multitude of factors, including partner reactants and reaction conditions. We propose a multi-pronged approach that combines policy gradient reinforcement learning with a recurrent neural network-based deep generative model to identify prospective new reactants, whose yield/selectivity is estimated by a pre-trained regressor. Using SMILES (simplified molecular-input line-entry system) as the raw representation, our approach involves attaching a user-defined core fragment to the generated molecules for reaction-specific learning. On three distinct reaction types (alcohol deoxyflourination, imine-thiol coupling, asymmetric hydrogenation of imines and alkenes), we obtain notable improvements in yield and enantioselectivity. The generated molecules are diverse, while remaining synthetically accessible.", "keywords": "reinforcement learning;transfer learning;reaction discovery;deep generative model", "primary_area": "", "supplementary_material": "/attachment/a82eb726bdbcd70666d37941e98ed7fbeeb41a4a.zip", "author": "Ajnabiul Hoque;Mihir Jitendra Surve;Shivaram Kalyanakrishnan;Raghavan B Sunoj", "authorids": "~Ajnabiul_Hoque1;~Mihir_Jitendra_Surve1;~Shivaram_Kalyanakrishnan1;~Raghavan_B_Sunoj1", "gender": "M;M;M;M", "homepage": ";;https://www.cse.iitb.ac.in/~shivaram/;https://www.chem.iitb.ac.in/facultyuserview/r-b-sunoj", "dblp": ";;16/4410;", "google_scholar": ";;https://scholar.google.com.tw/citations?user=YZkeEqAAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-9807-3061;;;0000-0002-6484-2878", "linkedin": ";mihir-surve-3a9840171/;;", "or_profile": "~Ajnabiul_Hoque1;~Mihir_Jitendra_Surve1;~Shivaram_Kalyanakrishnan1;~Raghavan_B_Sunoj1", "aff": "Indian Institute of Technology, Bombay;Indian Institute of Technology, Bombay;Indian Institute of Technology Bombay, Indian Institute of Technology Bombay;Department of Chemistry and Centre for Machine Intelligence and Data Science, Indian Institute of Technology, Bombay", "aff_domain": "iitb.ac.in;iitb.ac.in;cse.iitb.ac.in;iitb.ac.in", "position": "PhD student;MS student;Associate Professor;Full Professor", "bibtex": "@misc{\nhoque2023reinforcement,\ntitle={Reinforcement Learning using a Molecular Fragment Based Approach for Reaction Discovery},\nauthor={Ajnabiul Hoque and Mihir Jitendra Surve and Shivaram Kalyanakrishnan and Raghavan B Sunoj},\nyear={2023},\nurl={https://openreview.net/forum?id=IqN5SgOmxp}\n}", "github": "", "project": "", "reviewers": "Vswi;xGhN;vM46;iHCh;rUiL", "site": "https://openreview.net/forum?id=IqN5SgOmxp", "pdf_size": 4832292, "recommendation": "3;3;3;5;6", "confidence": "4;5;4;3;5", "correctness": "2;2;3;3;4", "technical_novelty": "2;2;1;2;3", "empirical_novelty": "2;2;2;0;3", "wc_summary_paper": "274;92;18;164;30", "wc_strength_and_weaknesses": "447;448;187;228;94", "wc_clarity_quality_novelty_and_reproducibility": "357;30;22;214;49", "wc_summary_review": "233;52;28;77;11", "wc_review": "1311;622;255;683;184", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 4.0, 1.2649110640673518 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "correctness_avg": [ 2.8, 0.7483314773547882 ], "technical_novelty_avg": [ 2.0, 0.6324555320336759 ], "empirical_novelty_avg": [ 1.8, 0.9797958971132713 ], "wc_summary_paper_avg": [ 115.6, 94.70290386255323 ], "wc_strength_and_weaknesses_avg": [ 280.8, 142.86973087396785 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 134.4, 131.69145758172778 ], "wc_summary_review_avg": [ 80.2, 79.58241011680911 ], "wc_review_avg": [ 611.0, 401.09101211570425 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8451542547285165, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Gs7BUw91IAUJ:scholar.google.com/&scioq=Reinforcement+Learning+using+a+Molecular+Fragment+Based+Approach+for+Reaction+Discovery&hl=en&as_sdt=0,11", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Indian Institute of Technology Bombay", "aff_unique_dep": "", "aff_unique_url": "https://www.iitb.ac.in", "aff_unique_abbr": "IIT Bombay", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Bombay", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "India" }, { "id": "IrUFsuTxVfY", "title": "A Data-Based Perspective on Transfer Learning", "track": "main", "status": "Withdraw", "tldr": "In this work, we present a framework for probing the impact of the source dataset on transfer learning performance.", "abstract": "It is commonly believed that more pre-training data leads to better transfer learning performance. However, recent evidence suggests that removing data from the source dataset can actually help too. In this work, we present a framework for probing the impact of the source dataset's composition on transfer learning performance. Our framework facilitates new capabilities such as identifying transfer learning brittleness and detecting pathologies such as data-leakage and the presence of misleading examples in the source dataset. In particular, we demonstrate that removing detrimental datapoints identified by our framework improves transfer performance from ImageNet on a variety of transfer tasks.", "keywords": "transfer learning;datasets;subpopulations", "primary_area": "", "supplementary_material": "/attachment/b2a22b2d286f293caa72dec462ea2540b9260664.zip", "author": "Saachi Jain;Hadi Salman;Alaa Khaddaj;Eric Wong;Sung Min Park;Aleksander Madry", "authorids": "~Saachi_Jain1;~Hadi_Salman1;~Alaa_Khaddaj1;~Eric_Wong1;~Sung_Min_Park2;~Aleksander_Madry1", "gender": "F;M;;M;;M", "homepage": "http://people.csail.mit.edu/saachij/;https://hadisalman.com/;;http://riceric22.github.io/;https://sungminpark.com;https://people.csail.mit.edu/madry/", "dblp": "227/2617;192/3204;;64/1811-1.html;28/157;67/2454", "google_scholar": "6hsn3EYAAAAJ;Kr8JjF0AAAAJ;BA1kFjMAAAAJ;pWnTMRkAAAAJ;;SupjsEUAAAAJ", "orcid": ";;;;;", "linkedin": ";;alaa-khaddaj;;;", "or_profile": "~Saachi_Jain1;~Hadi_Salman1;~Alaa_Khaddaj1;~Eric_Wong1;~Sung_Min_Park2;~Aleksander_Madry1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;University of Pennsylvania;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;upenn.edu;mit.edu;mit.edu", "position": "PhD student;PhD Student;PhD student;Assistant Professor;PhD student;Professor", "bibtex": "@misc{\njain2023a,\ntitle={A Data-Based Perspective on Transfer Learning},\nauthor={Saachi Jain and Hadi Salman and Alaa Khaddaj and Eric Wong and Sung Min Park and Aleksander Madry},\nyear={2023},\nurl={https://openreview.net/forum?id=IrUFsuTxVfY}\n}", "github": "", "project": "", "reviewers": "Uvvt;zDA7;hpFP;Qmsi", "site": "https://openreview.net/forum?id=IrUFsuTxVfY", "pdf_size": 5653950, "recommendation": "3;3;5;6", "confidence": "5;3;4;4", "correctness": "3;2;3;3", "technical_novelty": "1;2;2;2", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "129;63;86;139", "wc_strength_and_weaknesses": "71;341;297;267", "wc_clarity_quality_novelty_and_reproducibility": "28;15;33;60", "wc_summary_review": "56;35;20;44", "wc_review": "284;454;436;510", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 104.25, 31.04331651096577 ], "wc_strength_and_weaknesses_avg": [ 244.0, 103.29085148259743 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.0, 16.38596960817394 ], "wc_summary_review_avg": [ 38.75, 13.141061600951424 ], "wc_review_avg": [ 421.0, 83.6719785830358 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10694989536565640048&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;University of Pennsylvania", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.upenn.edu", "aff_unique_abbr": "MIT;UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "AGRO: Adversarial discovery of error-prone Groups for Robust Optimization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10942", "id": "IrzkT99fDJH", "poster": "/media/PosterPDFs/ICLR%202023/10942.png?t=1681147762.0531924", "openreview": "https://openreview.net/forum?id=IrzkT99fDJH", "slides": "https://iclr.cc/virtual/2023/poster/10942", "video": "https://iclr.cc/virtual/2023/poster/10942", "author_site": "Bhargavi Paranjape, Pradeep Dasigi, Vivek Srikumar, Luke Zettlemoyer, Hannaneh Hajishirzi", "tldr": "AGRO is an end-to-end robust optimization technique that discovers error-prone groups and optimizes for their accuracy, resulting in improved robustness to test-time distributional shifts.", "abstract": "Models trained via empirical risk minimization (ERM) are known to rely on spurious correlations between labels and task-independent input features, resulting in poor generalization to distributional shifts. Group distributionally robust optimization (G-DRO) can alleviate this problem by minimizing the worst-case loss over a set of pre-defined groups over training data. G-DRO successfully improves performance of the worst group, where the correlation does not hold. However, G-DRO assumes that the spurious correlations and associated worst groups are known in advance, making it challenging to apply them to new tasks with potentially multiple unknown correlations. We propose AGRO---Adversarial Group discovery for Distributionally Robust Optimization---an end-to-end approach that jointly identifies error-prone groups and improves accuracy on them. AGRO equips G-DRO with an adversarial slicing model to find a group assignment for training examples which maximizes worst-case loss over the discovered groups. On the WILDS benchmark, AGRO results in 8\\% higher model performance on average on known worst-groups, compared to prior group discovery approaches used with G-DRO. AGRO also improves out-of-distribution performance on SST2, QQP, and MS-COCO---datasets where potential spurious correlations are as yet uncharacterized. Human evaluation of ARGO groups shows that they contain well-defined, yet previously unstudied spurious correlations that lead to model errors.", "keywords": "robust optimization;distributionally robust;slice discovery;error analysis;adversarial learning", "primary_area": "", "supplementary_material": "", "author": "Bhargavi Paranjape;Pradeep Dasigi;Vivek Srikumar;Luke Zettlemoyer;Hannaneh Hajishirzi", "authorids": "~Bhargavi_Paranjape1;~Pradeep_Dasigi1;~Vivek_Srikumar1;~Luke_Zettlemoyer1;~Hannaneh_Hajishirzi1", "gender": "Unspecified;M;;M;F", "homepage": "https://bhargaviparanjape.github.io/;https://pdasigi.github.io/;https://svivek.com;https://www.cs.washington.edu/people/faculty/lsz/;https://homes.cs.washington.edu/~hannaneh/", "dblp": "188/5984;27/7184;37/44;21/6793;52/1296", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?authorid=Bpd76vcAAAAJ;TsTUfOIAAAAJ;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ;LOV6_WIAAAAJ", "orcid": ";0000-0001-7127-1316;;;", "linkedin": ";;;luke-zettlemoyer-a0109b226/;", "or_profile": "~Bhargavi_Paranjape1;~Pradeep_Dasigi1;~Vivek_Srikumar1;~Luke_Zettlemoyer1;~Hannaneh_Hajishirzi1", "aff": "University of Washington;Allen Institute for Artificial Intelligence;University of Utah;Meta;University of Washington", "aff_domain": "cs.washington.edu;allenai.org;utah.edu;meta.com;uw.edu", "position": "PhD student;Research Scientist;Associate Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nparanjape2023agro,\ntitle={{AGRO}: Adversarial discovery of error-prone Groups for Robust Optimization},\nauthor={Bhargavi Paranjape and Pradeep Dasigi and Vivek Srikumar and Luke Zettlemoyer and Hannaneh Hajishirzi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=IrzkT99fDJH}\n}", "github": "", "project": "", "reviewers": "YtmE;UiKr;eLL5;uGUw", "pdf_size": 3657373, "recommendation": "5;5;6;8", "confidence": "3;3;3;4", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "131;102;73;58", "wc_strength_and_weaknesses": "614;252;207;168", "wc_clarity_quality_novelty_and_reproducibility": "82;20;194;45", "wc_summary_review": "30;39;37;35", "wc_review": "857;413;511;306", "wc_reply_reviewers": "0;0;58;0", "wc_reply_authors": "711;739;779;245", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 91.0, 27.99107000455681 ], "wc_strength_and_weaknesses_avg": [ 310.25, 177.87126665091245 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 85.25, 66.54838465357368 ], "wc_summary_review_avg": [ 35.25, 3.344772040064913 ], "wc_review_avg": [ 521.75, 206.68983405092763 ], "wc_reply_reviewers_avg": [ 14.5, 25.11473670974872 ], "wc_reply_authors_avg": [ 618.5, 216.99020715230446 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.9428090415820632, "corr_recommendation_correctness": 0.9428090415820632, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2230020840324816866&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=IrzkT99fDJH", "email": "cs.washington.edu;allenai.org;utah.edu;meta.com;uw.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of Washington;Allen Institute for Artificial Intelligence;University of Utah;Meta", "aff_unique_dep": ";;;Meta Platforms, Inc.", "aff_unique_url": "https://www.washington.edu;https://allenai.org;https://www.utah.edu;https://meta.com", "aff_unique_abbr": "UW;AI2;Utah;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "IsCg7qoy8i9", "title": "Benchmarking Algorithms for Domain Generalization in Federated Learning", "track": "main", "status": "Reject", "tldr": "Benchmarking algorithms for domain generalization in federated learning on multiple realistic datasets.", "abstract": "In this paper, we present a unified platform to study domain generalization in the federated learning (FL) context and conduct extensive empirical evaluations of the current state-of-the-art domain generalization algorithms adapted to FL. In particular, we perform a fair comparison of nine existing algorithms in solving domain generalization {either centralized domain generalization algorithms adapted to the FL context or existing FL domain generalization algorithms } to comprehensively explore the challenges introduced by FL. These challenges include statistical heterogeneity among clients, the number of clients, the number of communication rounds, etc. The evaluations are conducted on three diverse datasets including PACS (image dataset covering photo, sketch, cartoon, and painting domains), iWildCam (image dataset with 323 domains), and Py150 (natural language processing dataset with 8421 domains). The experiments show that the challenges brought by federated learning stay unsolved in the realistic experiment setting. Furthermore, the code base supports fair and reproducible new algorithm evaluation with easy implementation.", "keywords": "Domain Generalization;Federated Learning;Benchmark.", "primary_area": "", "supplementary_material": "", "author": "Ruqi Bai;Saurabh Bagchi;David I. Inouye", "authorids": "~Ruqi_Bai1;~Saurabh_Bagchi1;~David_I._Inouye1", "gender": "M;M;M", "homepage": "https://ruqibai.netlify.app/;https://saurabhbagchi.us;http://davidinouye.com", "dblp": ";57/95.html;76/10817", "google_scholar": ";https://scholar.google.com.tw/citations?user=3EfsOvYAAAAJ;SVMQ_g4AAAAJ", "orcid": ";;", "linkedin": "ruqi-bai/;;", "or_profile": "~Ruqi_Bai1;~Saurabh_Bagchi1;~David_I_Inouye1", "aff": "Purdue University;Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu;purdue.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@misc{\nbai2023benchmarking,\ntitle={Benchmarking Algorithms for Domain Generalization in Federated Learning},\nauthor={Ruqi Bai and Saurabh Bagchi and David I. Inouye},\nyear={2023},\nurl={https://openreview.net/forum?id=IsCg7qoy8i9}\n}", "github": "", "project": "", "reviewers": "oUmB;qSGW;fDMj;UVoT", "site": "https://openreview.net/forum?id=IsCg7qoy8i9", "pdf_size": 555900, "recommendation": "5;6;6;6", "confidence": "3;5;3;4", "correctness": "3;3;3;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "98;52;47;27", "wc_strength_and_weaknesses": "307;113;172;223", "wc_clarity_quality_novelty_and_reproducibility": "37;34;34;57", "wc_summary_review": "56;92;22;90", "wc_review": "498;291;275;397", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1844;897;1102;1775", "reply_reviewers": "0;0;0;0", "reply_authors": "5;3;4;5", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 56.0, 25.990382836734053 ], "wc_strength_and_weaknesses_avg": [ 203.75, 71.19471539377062 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.5, 9.604686356149273 ], "wc_summary_review_avg": [ 65.0, 28.653097563788805 ], "wc_review_avg": [ 365.25, 89.84535324656473 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1404.5, 412.1568269481897 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 4.25, 0.82915619758885 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5222329678670935, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9523881431315312455&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "IskSBCo0-0", "title": "Recycling Scraps: Improving Private Learning by Leveraging Intermediate Checkpoints", "track": "main", "status": "Reject", "tldr": "DP-ML benchmarks and deployments typically use only the final model to make predictions. In this work, for the first time, we comprehensively explore various methods that aggregate intermediate checkpoints to improve the utility of DP training.", "abstract": "All state-of-the-art (SOTA) differentially private machine learning (DP ML) methods are iterative in nature, and their privacy analyses allow publicly releasing the intermediate training checkpoints. However, DP ML benchmarks, and even practical deployments, typically use only the final training checkpoint to make predictions. In this work, for the first time, we comprehensively explore various methods that aggregate intermediate checkpoints to improve the utility of DP training. Empirically, we demonstrate that checkpoint aggregations provide significant gains in the prediction accuracy over the existing SOTA for CIFAR10 and StackOverflow datasets, and that these gains get magnified in settings with periodically varying training data distributions. For instance, we improve SOTA StackOverflow accuracies to 22.7\\% (+0.43\\% absolute) for $\\epsilon=8.2$, and 23.84\\% (+0.43\\%) for $\\epsilon=18.9$. Theoretically, we show that uniform tail averaging of checkpoints improves the empirical risk minimization bound compared to the last checkpoint of DP-SGD. Lastly, we initiate an exploration into estimating the uncertainty that DP noise adds in the predictions of DP ML models. We prove that, under standard assumptions on the loss function, the sample variance from last few checkpoints provides a good approximation of the variance of the final model of a DP run. Empirically, we show that the last few checkpoints can provide a reasonable lower bound for the variance of a converged DP model. ", "keywords": "Differential privacy;training checkpoints;confidence intervals;uncertainty", "primary_area": "", "supplementary_material": "", "author": "Virat Shejwalkar;Arun Ganesh;Rajiv Mathews;Om Thakkar;Abhradeep Guha Thakurta", "authorids": "~Virat_Shejwalkar1;~Arun_Ganesh1;~Rajiv_Mathews1;~Om_Thakkar1;~Abhradeep_Guha_Thakurta1", "gender": "M;M;M;M;M", "homepage": "https://people.cs.umass.edu/~vshejwalkar/;https://people.eecs.berkeley.edu/~arunganesh/;;https://athakurta.squarespace.com/;http://www.omthakkar.com/", "dblp": "243/3113.html;201/4732;;31/8315;https://dblp.uni-trier.de/pid/166/1707", "google_scholar": "M6GAEdUAAAAJ;fmwchbsAAAAJ;xFBrVYgAAAAJ;1rV69hMAAAAJ;j5N3bKYAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Virat_Shejwalkar1;~Arun_Ganesh1;~Rajiv_Mathews1;~Abhradeep_Guha_Thakurta1;~Om_Dipakbhai_Thakkar1", "aff": "University of Massachusetts at Amherst;Google;Google;Google;Google", "aff_domain": "cs.umass.edu;google.com;google.com;google.com;google.com", "position": "PhD student;Researcher;Senior Staff Software Engineer;Senior Research Scientist;Researcher", "bibtex": "@misc{\nshejwalkar2023recycling,\ntitle={Recycling Scraps: Improving Private Learning by Leveraging Intermediate Checkpoints},\nauthor={Virat Shejwalkar and Arun Ganesh and Rajiv Mathews and Om Thakkar and Abhradeep Guha Thakurta},\nyear={2023},\nurl={https://openreview.net/forum?id=IskSBCo0-0}\n}", "github": "", "project": "", "reviewers": "374t;J6hu;PDAE", "site": "https://openreview.net/forum?id=IskSBCo0-0", "pdf_size": 555478, "recommendation": "5;5;6", "confidence": "3;3;4", "correctness": "4;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "229;86;31", "wc_strength_and_weaknesses": "78;279;159", "wc_clarity_quality_novelty_and_reproducibility": "10;233;142", "wc_summary_review": "27;473;19", "wc_review": "344;1071;351", "wc_reply_reviewers": "0;161;0", "wc_reply_authors": "359;1514;331", "reply_reviewers": "0;1;0", "reply_authors": "1;3;1", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 115.33333333333333, 83.45191562943431 ], "wc_strength_and_weaknesses_avg": [ 172.0, 82.57118141337206 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 128.33333333333334, 91.55083590855715 ], "wc_summary_review_avg": [ 173.0, 212.1571744407119 ], "wc_review_avg": [ 588.6666666666666, 341.07314301904347 ], "wc_reply_reviewers_avg": [ 53.666666666666664, 75.8961278473561 ], "wc_reply_authors_avg": [ 734.6666666666666, 551.1904288799733 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.9999999999999997, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13430734973658303888&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "University of Massachusetts Amherst;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.umass.edu;https://www.google.com", "aff_unique_abbr": "UMass Amherst;Google", "aff_campus_unique_index": "0;1;1;1;1", "aff_campus_unique": "Amherst;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "ItUvrU0dQpC", "title": "Theoretical generalization bounds for improving the efficiency of deep online training", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In the era of data explosion, online machine learning in which learning models are updated in real-time has become essential due to the growth of data in practice. In particular, it is more challenging to collect and annotate new massive data accurately and timely compared to traditional offline supervised training settings. Although this online training framework has been shown to be practically beneficial, there has been a lack of theoretical guarantees for the learning performance, especially for the case with noisy labels. This paper aims to investigate a learning theory for both original deep online training and online training with noisy labels. We first introduce a theoretical bound of the gaps of empirical risks and gaps of generalization risks in micro-batch online training when learning with both clean and noisy labels. Those bounds will efficiently help guide the online training scheme when receiving new data. We next analyze the impact of micro-batch size on the learning performance of models with noisy labels through our experimental results on CIFAR10, and CIFAR100 datasets using different noise, which consistently demonstrates the merit of the bounds above in the online training setting.", "keywords": "online training;generalization risk;noisy label", "primary_area": "", "supplementary_material": "/attachment/604843ed8a1f28254caa236ff43cdb706f6cfedc.zip", "author": "Nguyen Dinh Quoc;Tuan-Duy Hien Nguyen;Toan Tran", "authorids": "~Nguyen_Dinh_Quoc1;~Tuan-Duy_Hien_Nguyen1;~Toan_Tran1", "gender": "M;Not Specified;M", "homepage": ";https://duynht.github.io/;", "dblp": ";265/4122;207/8479-3", "google_scholar": "https://scholar.google.com.vn/citations?user=nGMSVVAAAAAJ;A4cqJA8AAAAJ;https://scholar.google.com.au/citations?user=PnwSuNMAAAAJ", "orcid": ";0000-0001-8201-1843;0000-0001-7182-7548", "linkedin": "dinh-quoc-nguyen-7ab95a146/;itstuanduy/;", "or_profile": "~Nguyen_Dinh_Quoc1;~Tuan-Duy_Hien_Nguyen1;~Toan_Tran1", "aff": "VinAI;VinAI (acquired by Qualcomm AI Research);Hanoi University of Science and Technology", "aff_domain": "vinai.io;vinai.io;hust.edu.vn", "position": "AI Resident;AI Resident (Engineering + Research);Lecturer", "bibtex": "@misc{\nquoc2023theoretical,\ntitle={Theoretical generalization bounds for improving the efficiency of deep online training},\nauthor={Nguyen Dinh Quoc and Tuan-Duy Hien Nguyen and Toan Tran},\nyear={2023},\nurl={https://openreview.net/forum?id=ItUvrU0dQpC}\n}", "github": "", "project": "", "reviewers": "5Rep;vq2B;CsR6;iZpV", "site": "https://openreview.net/forum?id=ItUvrU0dQpC", "pdf_size": 4322985, "recommendation": "3;3;3;3", "confidence": "4;4;4;3", "correctness": "1;1;1;1", "technical_novelty": "2;1;2;2", "empirical_novelty": "1;1;2;2", "wc_summary_paper": "93;59;50;11", "wc_strength_and_weaknesses": "375;179;27;266", "wc_clarity_quality_novelty_and_reproducibility": "17;25;21;99", "wc_summary_review": "54;19;183;26", "wc_review": "539;282;281;402", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 1.0, 0.0 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 0.5 ], "wc_summary_paper_avg": [ 53.25, 29.192250684042847 ], "wc_strength_and_weaknesses_avg": [ 211.75, 127.27799299171872 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.5, 33.89321466016465 ], "wc_summary_review_avg": [ 70.5, 66.25896165802781 ], "wc_review_avg": [ 376.0, 106.19086589721358 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GYST6Nn6BcYJ:scholar.google.com/&scioq=Theoretical+generalization+bounds+for+improving+the+efficiency+of+deep+online+training&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "VinAI;Hanoi University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.vinai.co;https://www.hust.edu.vn", "aff_unique_abbr": "VinAI;HUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hanoi", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Vietnam" }, { "id": "Itn7dH7muI", "title": "Getting away with more network pruning: From sparsity to geometry and linear regions", "track": "main", "status": "Withdraw", "tldr": "If we prune with the maximum number of linear regions in mind, we can improve accuracy considerably", "abstract": "One surprising trait of neural networks is the extent to which their connections can be pruned with little to no effect on accuracy. But when we cross a critical level of parameter sparsity, pruning any further leads to a sudden drop in accuracy. What could explain such a drop? In this work, we explore how sparsity may affect the geometry of the linear regions defined by a neural network and consequently reduce its expected maximum number of linear regions. We observe that sparsity affects accuracy in pruned neural networks similarly to how it affects the number of linear regions as well as - and more so - our proposed upper bound on that number. Conversely, we find out that selecting the sparsity on each layer to maximize the bound very often improves accuracy in comparison to using the same sparsity across all layers, thereby providing us guidance on where to prune. ", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/2ca47b92fa74227b4c346a5754031a1de4906fec.zip", "author": "Junyang Cai;Khai-Nguyen Nguyen;Nishant Shrestha;Aidan Good;Ruisen Tu;Xin Yu;Thiago Serra", "authorids": "jc092@bucknell.edu;~Khai-Nguyen_Nguyen1;ns037@bucknell.edu;~Aidan_Good1;ruisent2@illinois.edu;~Xin_Yu4;~Thiago_Serra1", "gender": ";M;;;;F;M", "homepage": ";;;;;https://www.cs.utah.edu/~xiyu;https://thiagoserra.com/", "dblp": ";;;;;;119/6438", "google_scholar": ";;;;;tWAfvQsAAAAJ;Wyk2Q9sAAAAJ", "orcid": ";;;;;;", "linkedin": ";nkn002/;;aidan-good/;;;", "or_profile": "jc092@bucknell.edu;~Khai-Nguyen_Nguyen1;ns037@bucknell.edu;~Aidan_Good1;ruisent2@illinois.edu;~Xin_Yu4;~Thiago_Serra1", "aff": ";Bucknell University;;Bucknell University;;University of Utah;Bucknell University", "aff_domain": ";bucknell.edu;;bucknell.edu;;cs.utah.edu;bucknell.edu", "position": ";Undergrad student;;Undergrad student;;PhD student;Assistant Professor", "bibtex": "@misc{\ncai2023getting,\ntitle={Getting away with more network pruning: From sparsity to geometry and linear regions},\nauthor={Junyang Cai and Khai-Nguyen Nguyen and Nishant Shrestha and Aidan Good and Ruisen Tu and Xin Yu and Thiago Serra},\nyear={2023},\nurl={https://openreview.net/forum?id=Itn7dH7muI}\n}", "github": "", "project": "", "reviewers": "NCHx;836N;nY49;EJzy", "site": "https://openreview.net/forum?id=Itn7dH7muI", "pdf_size": 477657, "recommendation": "1;3;5;8", "confidence": "3;4;3;3", "correctness": "1;3;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "0;2;2;3", "wc_summary_paper": "75;78;118;119", "wc_strength_and_weaknesses": "592;191;199;62", "wc_clarity_quality_novelty_and_reproducibility": "204;12;7;55", "wc_summary_review": "90;24;20;44", "wc_review": "961;305;344;280", "wc_reply_reviewers": "544;0;32;49", "wc_reply_authors": "2513;326;436;494", "reply_reviewers": "3;0;1;1", "reply_authors": "5;2;2;2", "recommendation_avg": [ 4.25, 2.5860201081971503 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 1.0897247358851685 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 97.5, 21.02974084481309 ], "wc_strength_and_weaknesses_avg": [ 261.0, 198.68693968149995 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 69.5, 79.86394680955857 ], "wc_summary_review_avg": [ 44.5, 27.798381247835277 ], "wc_review_avg": [ 472.5, 282.956268705961 ], "wc_reply_reviewers_avg": [ 156.25, 224.557759830294 ], "wc_reply_authors_avg": [ 942.25, 908.8779827347563 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.27907278609297376, "corr_recommendation_correctness": 0.909316700484831, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17619003740445698195&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Bucknell University;University of Utah", "aff_unique_dep": ";", "aff_unique_url": "https://www.bucknell.edu;https://www.utah.edu", "aff_unique_abbr": "Bucknell;Utah", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A framework for benchmarking Class-out-of-distribution detection and its application to ImageNet", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11778", "id": "Iuubb9W6Jtk", "poster": "", "openreview": "https://openreview.net/forum?id=Iuubb9W6Jtk", "slides": "https://iclr.cc/virtual/2023/poster/11778", "video": "https://iclr.cc/virtual/2023/poster/11778", "author_site": "Ido Galil, Mohammed Dabbah, Ran El-Yaniv", "tldr": "We present a framework for benchmarking the performance of image classifiers in detecting OOD. We apply it to benchmark 525 pretrained ImageNet classifiers, and analyze their performance resulting in interesting conclusions", "abstract": "When deployed for risk-sensitive tasks, deep neural networks must be able to detect instances with labels from outside the distribution for which they were trained.\nIn this paper we present a novel framework to benchmark the ability of image classifiers to detect class-out-of-distribution instances\n(i.e., instances whose true labels do not appear in the training distribution) at various levels of detection difficulty.\nWe apply this technique to ImageNet, and benchmark 525 pretrained, publicly available, ImageNet-1k classifiers. \nThe code for generating a benchmark for any ImageNet-1k classifier, along with the benchmarks prepared for the above-mentioned 525 models is available at https://github.com/mdabbah/COOD_benchmarking.\n\nThe usefulness of the proposed framework and its advantage over alternative existing benchmarks is demonstrated by analyzing the results obtained for these models, which reveals numerous novel observations including:\n(1) knowledge distillation consistently improves class-out-of-distribution (C-OOD) detection performance; (2) a subset of ViTs performs better C-OOD detection than any other model; (3) the language\u2013-vision CLIP model achieves good zero-shot detection performance, with its best instance outperforming 96% of all other models evaluated; (4) accuracy and in-distribution ranking are positively correlated to C-OOD detection; and \n(5) we compare various confidence functions for C-OOD detection.\nOur companion paper, also published in ICLR 2023 (What Can We Learn From The Selective Prediction And Uncertainty Estimation Performance Of 523 Imagenet Classifiers), examines the uncertainty estimation performance (ranking, calibration, and selective prediction performance) of these classifiers in an in-distribution setting.", "keywords": "benchmarking;out of distribution;class out of distribution;OOD;OOD detection", "primary_area": "", "supplementary_material": "", "author": "Ido Galil;Mohammed Dabbah;Ran El-Yaniv", "authorids": "~Ido_Galil1;~Mohammed_Dabbah1;~Ran_El-Yaniv1", "gender": "M;M;M", "homepage": "https://idogalil.github.io/;https://csngx.cs.technion.ac.il/people/mdabbah/;http://www.cs.technion.ac.il/~rani/", "dblp": ";;04/1896", "google_scholar": ";;https://scholar.google.com.tw/citations?user=D9eVSd8AAAAJ", "orcid": ";;", "linkedin": "ido-galil/;mohammed-dabbah-0b375a143/;", "or_profile": "~Ido_Galil1;~Mohammed_Dabbah1;~Ran_El-Yaniv1", "aff": "Computer Science Departmen, Technion-Israel Institute of Technology;;Deci", "aff_domain": "cs.technion.ac.il;;deci.ai", "position": "PhD student;;Chief Scientist", "bibtex": "@inproceedings{\ngalil2023a,\ntitle={A framework for benchmarking Class-out-of-distribution detection and its application to ImageNet},\nauthor={Ido Galil and Mohammed Dabbah and Ran El-Yaniv},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Iuubb9W6Jtk}\n}", "github": "", "project": "", "reviewers": "3d28;gv9a;CQ4c", "pdf_size": 3683824, "recommendation": "8;8;8", "confidence": "4;3;4", "correctness": "4;3;3", "technical_novelty": "3;3;4", "empirical_novelty": "3;4;4", "wc_summary_paper": "51;87;54", "wc_strength_and_weaknesses": "106;304;590", "wc_clarity_quality_novelty_and_reproducibility": "17;58;56", "wc_summary_review": "24;62;46", "wc_review": "198;511;746", "wc_reply_reviewers": "0;0;260", "wc_reply_authors": "41;533;1705", "reply_reviewers": "0;0;2", "reply_authors": "1;1;3", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 64.0, 16.30950643030009 ], "wc_strength_and_weaknesses_avg": [ 333.3333333333333, 198.67785203411296 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.666666666666664, 18.87385022252275 ], "wc_summary_review_avg": [ 44.0, 15.57776192739723 ], "wc_review_avg": [ 485.0, 224.47420044777232 ], "wc_reply_reviewers_avg": [ 86.66666666666667, 122.56517540566823 ], "wc_reply_authors_avg": [ 759.6666666666666, 697.9767586070152 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8907739233332783530&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=Iuubb9W6Jtk", "email": "cs.technion.ac.il;;deci.ai", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Technion-Israel Institute of Technology;Deci", "aff_unique_dep": "Computer Science Department;", "aff_unique_url": "https://www.technion.ac.il;https://www.deci.ai", "aff_unique_abbr": "Technion;Deci", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "id": "Ivkh2_UdL9O", "title": "Efficient Hyperparameter Optimization Through Tensor Completion", "track": "main", "status": "Reject", "tldr": "An approach for hyperparameter optimization based on tensor completion methods.", "abstract": "Hyperparameter optimization is a prerequisite for state-of-the-art performance in machine learning, with current strategies including Bayesian optimisation, Hyperband, and evolutionary methods. Whereas such methods have been shown to improve performance, none of these is designed to explicitly take advantage of the underlying data structure. To this end, we introduce a completely different approach for hyperaparameter optimization, based on low-rank tensor completion. This is achieved by first forming a multi-dimensional tensor which comprises performance scores for different combinations of hyperparameters. Based on the realistic underlying assumption that the so-formed tensor has a low rank structure, this then allows for reliable estimates of the unobserved validation scores of combinations of hyperparameters to be obtained through tensor completion, and from only a fraction of known elements. Through extensive experimentation on various datasets and learning models, the proposed method is shown to exhibit competitive or superior performance to the state-of-the-art hyperparameter optimization strategies. Distinctive advantages of the proposed method include its ability to simultaneously handle any hyperparameter type (e.g., kind of optimizer, number of neurons, number of layer, etc.), its relative simplicity compared to the competing methods, as well as the ability to suggest multiple optimal combinations of hyperparameters.\n\n", "keywords": "hyperparameter optimization;tensor completion", "primary_area": "", "supplementary_material": "/attachment/4be28107a74748044800760ac3631c23db0c423e.zip", "author": "Aaman Rebello;Kriton Konstantinidis;Yao Lei Xu;Danilo Mandic", "authorids": "aaman.rebello18@imperial.ac.uk;~Kriton_Konstantinidis1;~Yao_Lei_Xu1;~Danilo_Mandic1", "gender": ";M;M;", "homepage": ";;;http://www.commsp.ee.ic.ac.uk/~mandic", "dblp": ";;274/6931;", "google_scholar": ";;KBJsa6QAAAAJ;https://scholar.google.co.uk/citations?user=hcxWZkcAAAAJ", "orcid": ";;;", "linkedin": ";kritonkonstantinidis/;;", "or_profile": "aaman.rebello18@imperial.ac.uk;~Kriton_Konstantinidis1;~Yao_Lei_Xu1;~Danilo_Mandic1", "aff": ";;Imperial College London;Imperial College London", "aff_domain": ";;imperial.ac.uk;imperial.ac.uk", "position": ";;PhD student;Full Professor", "bibtex": "@misc{\nrebello2023efficient,\ntitle={Efficient Hyperparameter Optimization Through Tensor Completion},\nauthor={Aaman Rebello and Kriton Konstantinidis and Yao Lei Xu and Danilo Mandic},\nyear={2023},\nurl={https://openreview.net/forum?id=Ivkh2_UdL9O}\n}", "github": "", "project": "", "reviewers": "C2WJ;PJTn;BwZ7;1YoG", "site": "https://openreview.net/forum?id=Ivkh2_UdL9O", "pdf_size": 397143, "recommendation": "3;3;5;5", "confidence": "4;2;4;3", "correctness": "1;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "1;3;3;2", "wc_summary_paper": "62;80;71;56", "wc_strength_and_weaknesses": "433;200;197;108", "wc_clarity_quality_novelty_and_reproducibility": "60;24;30;260", "wc_summary_review": "17;110;82;50", "wc_review": "572;414;380;474", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1694;462;688;578", "reply_reviewers": "0;0;0;0", "reply_authors": "4;2;2;2", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 67.25, 9.093266739736606 ], "wc_strength_and_weaknesses_avg": [ 234.5, 120.41698385194674 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 93.5, 97.09145173494936 ], "wc_summary_review_avg": [ 64.75, 34.79493497622894 ], "wc_review_avg": [ 460.0, 72.89718787443039 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 855.5, 490.6595051560705 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.30151134457776363, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9309135294546165920&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "Iwq3HPz96O", "title": "Multi-Level Contrastive Learning for Dense Prediction Task", "track": "main", "status": "Withdraw", "tldr": "Multi-Level Contrastive Learning is an efficient self-supervised method to learn region-level feature representation for dense prediction tasks.", "abstract": "In this work, we present Multi-Level Contrastive Learning for Dense Prediction Task (MCL), an efficient self-supervised method to learn region-level feature representation for dense prediction tasks. This approach is motivated by the three key factors in detection: localization, scale consistency and recognition. Considering the above factors, we design a novel pretext task, which explicitly encodes absolute position and scale information simultaneously by assembling multi-scale images in a montage manner to mimic multi-object scenario. Unlike the existing image-level self-supervised methods, our method constructs a multi-level contrastive loss by considering each sub-region of the montage image as a singleton to learn a regional semantic representation for translation and scale consistency, while reducing the pre-training epochs to the same as supervised pre-training. Extensive experiments show that MCL consistently outperforms the recent state-of-the-art methods on various datasets with significant margins. In particular, MCL obtains 42.5 AP^bb and 38.3 AP^mk on COCO with the 1x schedule and surpasses MoCo by 4.0 AP^bb and 3.1 AP^mk, when using Mask R-CNN with an R50-FPN backbone pre-trained with 100 epochs. In addition, we further explore the alignment between pretext task and downstream tasks. We extend our pretext task to supervised pre-training, which achieves a similar performance with self-supervised learning, demonstrating the importance of the alignment between pretext task and downstream tasks. ", "keywords": "Self-supervised learning;Detection;Segmentation", "primary_area": "", "supplementary_material": "", "author": "Qiushan Guo;Yizhou Yu;Jiannan Wu;Yi Jiang;Dongdong Yu;Zehuan Yuan;Ping Luo", "authorids": "~Qiushan_Guo1;~Yizhou_Yu1;~Jiannan_Wu2;~Yi_Jiang2;~Dongdong_Yu1;~Zehuan_Yuan1;~Ping_Luo2", "gender": "M;M;M;M;M;M;", "homepage": "https://guoqiushan.github.io/;;;https://enjoyyi.github.io/;;https://shallowyuan.github.io/;http://luoping.me/", "dblp": "231/1814;90/6896.html;277/0616;;156/2054;227/3298;54/4989-2.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;e38fTZQAAAAJ;1euA66EAAAAJ;https://scholar.google.com.hk/citations?user=6dikuoYAAAAJ;B2RmjSYAAAAJ;;https://scholar.google.com.hk/citations?hl=en", "orcid": ";;;0000-0002-2133-8719;;;0000-0002-6685-7950", "linkedin": ";;;;;;", "or_profile": "~Qiushan_Guo1;~Yizhou_Yu1;~Jiannan_Wu2;~Yi_Jiang2;~Dongdong_Yu1;~Zehuan_Yuan1;~Luo_Ping2", "aff": "The University of Hong Kong;The University of Hong Kong;University of Hong Kong;Bytedance;Institute of Automation, Chinese Academy of Sciences;ByteDance Inc.;The University of Hong Kong", "aff_domain": "hku.hk;hku.hk;hku.hk;bytedance.com;ia.ac.cn;bytedance.com;hku.hk", "position": "PhD student;Full Professor;PhD student;Researcher;PhD student;Researcher;Assistant Professor", "bibtex": "@misc{\nguo2023multilevel,\ntitle={Multi-Level Contrastive Learning for Dense Prediction Task},\nauthor={Qiushan Guo and Yizhou Yu and Jiannan Wu and Yi Jiang and Dongdong Yu and Zehuan Yuan and Ping Luo},\nyear={2023},\nurl={https://openreview.net/forum?id=Iwq3HPz96O}\n}", "github": "", "project": "", "reviewers": "Wp5Z;Un22;WiYV;FKwJ", "site": "https://openreview.net/forum?id=Iwq3HPz96O", "pdf_size": 1445638, "recommendation": "3;5;5;6", "confidence": "5;5;5;5", "correctness": "4;3;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "92;42;65;46", "wc_strength_and_weaknesses": "156;239;198;151", "wc_clarity_quality_novelty_and_reproducibility": "597;34;33;15", "wc_summary_review": "112;39;24;12", "wc_review": "957;354;320;224", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 5.0, 0.0 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 61.25, 19.76581645164196 ], "wc_strength_and_weaknesses_avg": [ 186.0, 35.63004350263974 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 169.75, 246.78875075659343 ], "wc_summary_review_avg": [ 46.75, 38.86756359742658 ], "wc_review_avg": [ 463.75, 288.74069249068447 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.9271726499455306, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13845144214757865837&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;1;2;1;0", "aff_unique_norm": "University of Hong Kong;ByteDance;Chinese Academy of Sciences", "aff_unique_dep": ";;Institute of Automation", "aff_unique_url": "https://www.hku.hk;https://www.bytedance.com;http://www.ia.cas.cn", "aff_unique_abbr": "HKU;Bytedance;CAS", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "Ix4Ytiwor4U", "title": "DITTO: Offline Imitation Learning with World Models", "track": "main", "status": "Reject", "tldr": "Completely offline imitation learning with world models, using RL on a latent matching objective in the model.", "abstract": "We propose DITTO, a fully offline approach to imitation learning which addresses the problem of covariate shift without access to an oracle or any additional online interactions. By unrolling agent policies in the latent space of a learned world model and penalizing drift from expert demonstrations, we can use online reinforcement learning algorithms to learn policies which solve the imitation objective, without access to the underlying environment or reward function. Decoupling policy and world model learning lets us leverage datasets of any quality to learn latent representations which provide a natural reward signal for imitation learning, avoiding the need for complex adversarial or sparse imitation-inducing rewards. Compared to competitive baselines, our method achieves state-of-the-art performance in a variety of challenging environments from pixel observations alone.", "keywords": "world models;imitation learning;reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/a3db35d0050faf664dcdfb5e428f24a2a2c10e37.zip", "author": "Branton DeMoss;Paul Duckworth;Nick Hawes;Ingmar Posner", "authorids": "~Branton_DeMoss1;~Paul_Duckworth1;~Nick_Hawes1;~Ingmar_Posner1", "gender": ";M;M;", "homepage": "https://www.robots.ox.ac.uk/~bdemoss/;http://www.robots.ox.ac.uk/~scpd/;https://www.robots.ox.ac.uk/~nickh/;", "dblp": "339/7295;179/2160;35/1190;59/542", "google_scholar": "RtSDseMAAAAJ;I64MZDoAAAAJ;bRsi4zoAAAAJ;dPk-iwsAAAAJ", "orcid": "0000-0001-6828-6787;0000-0001-9052-6919;0000-0002-7556-6098;0000-0001-6270-700X", "linkedin": ";;;ingmar-posner-20b49a", "or_profile": "~Branton_DeMoss1;~Paul_Duckworth1;~Nick_Hawes1;~Ingmar_Posner1", "aff": "University of Oxford;InstaDeep;University of Oxford;University of Oxford", "aff_domain": "robots.ox.ac.uk;instadeep.com;ox.ac.uk;ox.ac.uk", "position": "PhD student;Principal Researcher;Associate Professor;Full Professor", "bibtex": "@misc{\ndemoss2023ditto,\ntitle={{DITTO}: Offline Imitation Learning with World Models},\nauthor={Branton DeMoss and Paul Duckworth and Nick Hawes and Ingmar Posner},\nyear={2023},\nurl={https://openreview.net/forum?id=Ix4Ytiwor4U}\n}", "github": "", "project": "", "reviewers": "RnpT;Nmum;pKRJ;ZmXh", "site": "https://openreview.net/forum?id=Ix4Ytiwor4U", "pdf_size": 609224, "recommendation": "5;5;6;6", "confidence": "4;3;4;4", "correctness": "2;3;4;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "181;60;77;53", "wc_strength_and_weaknesses": "184;238;296;252", "wc_clarity_quality_novelty_and_reproducibility": "464;16;33;70", "wc_summary_review": "33;27;46;111", "wc_review": "862;341;452;486", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "487;147;439;507", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 92.75, 51.69320554966581 ], "wc_strength_and_weaknesses_avg": [ 242.5, 39.98437194704951 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 145.75, 184.77604687837652 ], "wc_summary_review_avg": [ 54.25, 33.47667098144617 ], "wc_review_avg": [ 535.25, 196.1216140561769 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 395.0, 145.29969029560937 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6201823162655395865&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Oxford;InstaDeep", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.instadeep.com", "aff_unique_abbr": "Oxford;InstaDeep", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Toeplitz Neural Network for Sequence Modeling", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11844", "id": "IxmWsm4xrua", "poster": "", "openreview": "https://openreview.net/forum?id=IxmWsm4xrua", "slides": "https://iclr.cc/virtual/2023/poster/11844", "video": "https://iclr.cc/virtual/2023/poster/11844", "author_site": "Qin Zhen, Xiaodong Han, Weixuan Sun, Bowen He, Dong Li, Dongxu Li, Yuchao Dai, Lingpeng Kong, Yiran Zhong", "tldr": "An efficient method that uses Toeplitz matrices to model sequences.", "abstract": "Sequence modeling has important applications in natural language processing and computer vision. Recently, the transformer-based models have shown strong performance on various sequence modeling tasks, which rely on attention to capture pairwise token relations, and position embedding to inject positional information. While showing good performance, the transformer models are inefficient to scale to long input sequences, mainly due to the quadratic space-time complexity of attention. To overcome this inefficiency, we propose to model sequences with a relative position encoded Toeplitz matrix and use a Toeplitz matrix-vector production trick to reduce the space-time complexity of the sequence modeling to log linear. A lightweight sub-network called relative position encoder is proposed to generate relative position coefficients with a fixed budget of parameters, enabling the proposed Toeplitz neural network to deal with varying sequence lengths. In addition, despite being trained on 512-token sequences, our model can extrapolate input sequence length up to 14K tokens in inference with consistent performance. Extensive experiments on autoregressive and bidirectional language modeling, image modeling, and the challenging Long-range Arena Benchmark show that our method achieves better performance than its competitors in most downstream tasks while being significantly faster.", "keywords": "Toeplitz Matrix;Sequence Modeling;Relative position", "primary_area": "", "supplementary_material": "", "author": "Zhen Qin;Xiaodong Han;Weixuan Sun;Bowen He;Dong Li;Dongxu Li;Yuchao Dai;Lingpeng Kong;Yiran Zhong", "authorids": "~Zhen_Qin6;~Xiaodong_Han3;~Weixuan_Sun1;~Bowen_He3;~Dong_Li11;~Dongxu_Li2;~Yuchao_Dai1;~Lingpeng_Kong1;~Yiran_Zhong1", "gender": ";M;F;M;M;M;M;M;M", "homepage": "https://github.com/Doraemonzzz;https://weixuansun.github.io/weixuansun-github.io/;https://www.linkedin.cn/incareer/;;http://npu-cvr.cn/;https://ikekonglp.github.io/;;https://sites.google.com/view/dongxu-li/home;https://github.com/hannnnnxd", "dblp": ";186/6724;;;65/7804;144/7656;158/9624;;", "google_scholar": "https://scholar.google.com.sg/citations?user=IcBRtycAAAAJ;vIS56AoAAAAJ;;bxmsqZIAAAAJ;https://scholar.google.com.tw/citations?user=fddAbqsAAAAJ;f1hBi5wAAAAJ;https://scholar.google.com.sg/citations?user=E9NVOBUAAAAJ;https://scholar.google.com/citations?view_op=list_works;", "orcid": ";;;;0000-0002-4432-7406;;;;", "linkedin": ";;;;;;;;", "or_profile": "~Zhen_Qin6;~Weixuan_Sun1;~Bowen_He3;~Dong_Li11;~Yuchao_Dai1;~Lingpeng_Kong1;~Yiran_Zhong1;~Dongxu_Li1;~xiaodong_han1", "aff": "Sensetime;Australian National University;;Shanghai AI Lab;Northwestern Polytechnical University;Department of Computer Science, The University of Hong Kong;Shanghai AI Lab;SalesForce.com;sensetime", "aff_domain": "sensetime.com;anu.edu.au;;org.cn;nwpu.edu.cn;cs.hku.hk;pjlab.org.cn;salesforce.com;sensetime.com", "position": "Researcher;PhD student;;Researcher;Professor;Assistant Professor;PI;Researcher;Researcher", "bibtex": "@inproceedings{\nqin2023toeplitz,\ntitle={Toeplitz Neural Network for Sequence Modeling},\nauthor={Zhen Qin and Xiaodong Han and Weixuan Sun and Bowen He and Dong Li and Dongxu Li and Yuchao Dai and Lingpeng Kong and Yiran Zhong},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=IxmWsm4xrua}\n}", "github": "", "project": "", "reviewers": "kNVV;gJML;k2X2;4U54", "pdf_size": 542283, "recommendation": "6;6;8;8", "confidence": "3;4;4;4", "correctness": "2;3;4;4", "technical_novelty": "3;3;3;4", "empirical_novelty": "2;3;3;4", "wc_summary_paper": "102;139;92;96", "wc_strength_and_weaknesses": "156;408;112;200", "wc_clarity_quality_novelty_and_reproducibility": "33;71;17;23", "wc_summary_review": "19;80;59;31", "wc_review": "310;698;280;350", "wc_reply_reviewers": "24;28;0;0", "wc_reply_authors": "553;1573;349;451", "reply_reviewers": "1;1;0;0", "reply_authors": "4;3;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 107.25, 18.673175948402566 ], "wc_strength_and_weaknesses_avg": [ 219.0, 113.46805717910217 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.0, 21.0 ], "wc_summary_review_avg": [ 47.25, 23.836683913665507 ], "wc_review_avg": [ 409.5, 168.40650224976469 ], "wc_reply_reviewers_avg": [ 13.0, 13.076696830622021 ], "wc_reply_authors_avg": [ 731.5, 491.1646872485847 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=599923718621829643&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=IxmWsm4xrua", "email": "sensetime.com;anu.edu.au;;org.cn;nwpu.edu.cn;cs.hku.hk;pjlab.org.cn;salesforce.com;sensetime.com", "author_num": 9, "aff_unique_index": "0;1;2;3;4;2;5;0", "aff_unique_norm": "SenseTime;Australian National University;Shanghai AI Lab;Northwestern Polytechnical University;University of Hong Kong;Salesforce", "aff_unique_dep": ";;;;Department of Computer Science;", "aff_unique_url": "https://www.sensetime.com;https://www.anu.edu.au;https://www.shanghaiailab.com;https://www.nwpu.edu.cn;https://www.hku.hk;https://www.salesforce.com", "aff_unique_abbr": "SenseTime;ANU;SAIL;NWPU;HKU;Salesforce", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;0;0;2;0", "aff_country_unique": "China;Australia;United States" }, { "id": "Iyi7eb9VIW", "title": "A Quasi-Bayesian Nonparametric Density Estimator via Autoregressive Predictive Updates", "track": "main", "status": "Withdraw", "tldr": "We introduce a Quasi-Bayesian nonparametric density estimator for moderate-sized data sets that is inspired by an autoregressive Dirichlet Process Mixture Model.", "abstract": "Bayesian methods are a popular choice for statistical inference in small-data regimes due to the regularization effect induced by the prior. In the context of density estimation, the standard nonparametric Bayesian approach is to target the posterior predictive of the Dirichlet process mixture model. In general, direct estimation of the posterior predictive is intractable and so methods typically resort to approximating the posterior distribution as an intermediate step. The recent development of quasi-Bayesian predictive copula updates, however, has made it possible to perform tractable predictive density estimation without the need for posterior approximation. Although these estimators are computationally appealing, they tend to struggle on non-smooth data distributions. This is due to the comparatively restrictive form of the likelihood models from which the proposed copula updates were derived. To address this shortcoming, we consider a Bayesian nonparametric model with an autoregressive likelihood decomposition and a Gaussian process prior. While the predictive update of such a model is typically intractable, we derive a quasi-Bayesian predictive update that achieves state-of-the-art results on moderate-sized examples.", "keywords": "Bayesian nonparametrics;Dirichlet Process Mixture Models;Quasi-Bayes", "primary_area": "", "supplementary_material": "/attachment/74a31c0fedc1a49d96b335d5160e08e47a171c3f.zip", "author": "Sahra Ghalebikesabi;Christopher C. Holmes;Edwin Fong;Brieuc Lehmann", "authorids": "~Sahra_Ghalebikesabi1;~Christopher_C._Holmes1;~Edwin_Fong1;~Brieuc_Lehmann1", "gender": ";M;M;M", "homepage": ";;https://edfong.github.io/;https://brieuclehmann.github.io", "dblp": ";08/6129;236/6357;315/0352", "google_scholar": ";;eT4pY6QAAAAJ;gqVlOygAAAAJ", "orcid": ";;;0000-0002-7302-4391", "linkedin": ";;;brieuc-lehmann/", "or_profile": "~Sahra_Ghalebikesabi1;~Christopher_C._Holmes1;~Edwin_Fong1;~Brieuc_Lehmann1", "aff": ";University of Oxford;Novo Nordisk;University College London, University of London", "aff_domain": ";ox.ac.uk;novonordisk.com;ucl.ac.uk", "position": ";Full Professor;Data scientist;Assistant Professor", "bibtex": "@misc{\nghalebikesabi2023a,\ntitle={A Quasi-Bayesian Nonparametric Density Estimator via Autoregressive Predictive Updates},\nauthor={Sahra Ghalebikesabi and Christopher C. Holmes and Edwin Fong and Brieuc Lehmann},\nyear={2023},\nurl={https://openreview.net/forum?id=Iyi7eb9VIW}\n}", "github": "", "project": "", "reviewers": "hYYH;x2ue;m6PL", "site": "https://openreview.net/forum?id=Iyi7eb9VIW", "pdf_size": 3375033, "recommendation": "3;6;6", "confidence": "3;3;4", "correctness": "3;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "1;2;2", "wc_summary_paper": "39;72;198", "wc_strength_and_weaknesses": "363;73;252", "wc_clarity_quality_novelty_and_reproducibility": "537;88;74", "wc_summary_review": "30;137;74", "wc_review": "969;370;598", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 103.0, 68.51277253184256 ], "wc_strength_and_weaknesses_avg": [ 229.33333333333334, 119.47198648869767 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 233.0, 215.03643102197046 ], "wc_summary_review_avg": [ 80.33333333333333, 43.91152721350309 ], "wc_review_avg": [ 645.6666666666666, 246.8526326013604 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3578716147099646118&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Oxford;Novo Nordisk;University College London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ox.ac.uk;https://www.novonordisk.com;https://www.ucl.ac.uk", "aff_unique_abbr": "Oxford;NN;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;Denmark" }, { "id": "IzI055GrvG", "title": "Object Tracking by Hierarchical Part-Whole Attention", "track": "main", "status": "Reject", "tldr": "", "abstract": "We present in this paper that hierarchical representations of objects can provide an informative and low-noisy proxy to associate objects of interest in multi-object tracking. This is aligned with our intuition that we usually only need to compare a little region of the body of target objects to distinguish them from other objects. We build the hierarchical representation in levels of (1) target body parts, (2) the whole target body, and (3) the union area of the target and other objects of overlap. Furthermore, with the spatio-temporal attention mechanism by transformer, we can solve the tracking in a global fashion and keeps the process online. We design our method by combining the representation with the transformer and name it Hierarchical Part-Whole Attention, or HiPWA for short. The experiments on multiple datasets suggest its good effectiveness. Moreover, previous methods mostly focus on leveraging transformers to exploit long temporal context during association which requires heavy computation resources. But HiPWA focuses on a more informative representation of objects on every single frame instead. So it is more robust with the length of temporal context and more computationally economic. ", "keywords": "multi-object tracking;transformer;visual representation", "primary_area": "", "supplementary_material": "/attachment/06aa49973741c8f8df860c7ebe18f951452309da.zip", "author": "Jinkun Cao;Jiangmiao Pang;Xinshuo Weng;Rawal Khirodkar;Kris M. Kitani", "authorids": "~Jinkun_Cao1;~Jiangmiao_Pang1;~Xinshuo_Weng3;~Rawal_Khirodkar1;~Kris_M._Kitani1", "gender": "M;M;F;M;M", "homepage": "https://www.jinkuncao.com;https://oceanpang.github.io/;http://www.xinshuoweng.com;https://rawalkhirodkar.github.io/;http://www.cs.cmu.edu/~kkitani/", "dblp": "224/0126;231/7630;192/1952.html;230/4108;42/163", "google_scholar": "xDtTbmQAAAAJ;https://scholar.google.com/citations?authuser=0;dthSEsoAAAAJ;https://scholar.google.com/citations?view_op=list_works;yv3sH74AAAAJ", "orcid": ";0000-0002-6711-9319;0000-0002-7894-4381;;0000-0002-9389-4060", "linkedin": ";;xinshuoweng;rawalkhirodkar/;", "or_profile": "~Jinkun_Cao1;~Jiangmiao_Pang1;~Xinshuo_Weng3;~Rawal_Khirodkar1;~Kris_M._Kitani1", "aff": "Carnegie Mellon University;Shanghai AI Laboratory ;NVIDIA;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;pjlab.org.cn;nvidia.com;cmu.edu;cmu.edu", "position": "PhD student;Research Scientist;Researcher;PhD student;Associate Professor", "bibtex": "@misc{\ncao2023object,\ntitle={Object Tracking by Hierarchical Part-Whole Attention},\nauthor={Jinkun Cao and Jiangmiao Pang and Xinshuo Weng and Rawal Khirodkar and Kris M. Kitani},\nyear={2023},\nurl={https://openreview.net/forum?id=IzI055GrvG}\n}", "github": "", "project": "", "reviewers": "rS4y;YxoG;hWAz", "site": "https://openreview.net/forum?id=IzI055GrvG", "pdf_size": 6420671, "recommendation": "6;6;8", "confidence": "5;3;3", "correctness": "4;3;4", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "71;36;79", "wc_strength_and_weaknesses": "90;221;91", "wc_clarity_quality_novelty_and_reproducibility": "77;17;14", "wc_summary_review": "52;20;30", "wc_review": "290;294;214", "wc_reply_reviewers": "0;0;21", "wc_reply_authors": "781;397;207", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 62.0, 18.672618098881223 ], "wc_strength_and_weaknesses_avg": [ 134.0, 61.51964455900787 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.0, 29.017236257093817 ], "wc_summary_review_avg": [ 34.0, 13.366625103842281 ], "wc_review_avg": [ 266.0, 36.8057966449127 ], "wc_reply_reviewers_avg": [ 7.0, 9.899494936611665 ], "wc_reply_authors_avg": [ 461.6666666666667, 238.75417390185152 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SPqsWOB9n_AJ:scholar.google.com/&scioq=Object+Tracking+by+Hierarchical+Part-Whole+Attention&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Carnegie Mellon University;Shanghai AI Laboratory;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://www.cmu.edu;https://www.shanghai-ai-lab.com;https://www.nvidia.com", "aff_unique_abbr": "CMU;SAIL;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;China" }, { "id": "J0IhgZ8ziv-", "title": "ENHANCING THE PRIVACY OF FEDERATED LEARNING THROUGH DATA SYNTHESIS", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Federated Learning (FL) is a distributed machine learning architecture where edge devices collaboratively learn the shared model, while the training data is securely held at the edge devices. FL promises a way forward to preserving data privacy by sending model updates in the form of gradients or the weights themselves. However, these updates still contain the essence of the original training data and can be\nreconstructed using gradient-based attacks. To overcome this, we propose a novel Privacy-Preserving Federated Learning algorithm (PPFed) wherein we generate a condensed dataset from the original training data at each edge device. The client\u2019s then train their local models on the condensed dataset which is then broadcasted to the server, followed by regular federated averaging. Our method provides privacy by being robust against gradient-based attacks, which holds across different benchmark datasets and CNN based architectures", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/4a5466a3e617e5b6585b4db81f890b60f2d4271c.zip", "author": "M Yashwanth;Sai Anuroop Kesanapalli;Gaurav Kumar Nayak;Anirban Chakraborty;Yogesh Simmhan", "authorids": "~M_Yashwanth2;~Sai_Anuroop_Kesanapalli1;~Gaurav_Kumar_Nayak2;~Anirban_Chakraborty1;~Yogesh_Simmhan2", "gender": "M;M;M;M;M", "homepage": ";https://ksanu1998.github.io;https://sites.google.com/view/gauravnayak;https://anirbanchakraborty.github.io/;http://cds.iisc.ac.in/faculty/simmhan/", "dblp": "348/7177.html;296/4529;241/6244;73/2286-1;", "google_scholar": ";d9_YakoAAAAJ;https://scholar.google.co.in/citations?user=cLCeKTkAAAAJ;https://scholar.google.co.in/citations?user=NtAsZK-2HjcC;https://scholar.google.co.in/citations?user=HIOx9E0AAAAJ", "orcid": ";;0000-0002-6406-6178;0000-0002-6946-9152;0000-0003-4140-7774", "linkedin": "yashwanth-mandula-aba700a5/;https://linkedin.com/in/ksanu1998/;;;", "or_profile": "~M_Yashwanth2;~Sai_Anuroop_Kesanapalli1;~Gaurav_Kumar_Nayak2;~Anirban_Chakraborty1;~Yogesh_Simmhan2", "aff": "Indian Institute of Science;University of Southern California;University of Central Florida;Indian Institute of Science;Indian Institute of Science, Bangalore", "aff_domain": "iisc.ac.in;usc.edu;ucf.edu;iisc.ac.in;iisc.ac.in", "position": "PhD student;MS student;Postdoc;Assistant Professor;Associate Professor", "bibtex": "@misc{\nyashwanth2023enhancing,\ntitle={{ENHANCING} {THE} {PRIVACY} {OF} {FEDERATED} {LEARNING} {THROUGH} {DATA} {SYNTHESIS}},\nauthor={M Yashwanth and Sai Anuroop Kesanapalli and Gaurav Kumar Nayak and Anirban Chakraborty and Yogesh Simmhan},\nyear={2023},\nurl={https://openreview.net/forum?id=J0IhgZ8ziv-}\n}", "github": "", "project": "", "reviewers": "XtoY;WJHr;D2Da;SNzE", "site": "https://openreview.net/forum?id=J0IhgZ8ziv-", "pdf_size": 618053, "recommendation": "1;3;3;5", "confidence": "4;4;3;3", "correctness": "1;1;2;2", "technical_novelty": "1;1;2;2", "empirical_novelty": "1;1;0;2", "wc_summary_paper": "98;57;73;48", "wc_strength_and_weaknesses": "477;243;181;284", "wc_clarity_quality_novelty_and_reproducibility": "64;11;27;97", "wc_summary_review": "12;28;11;58", "wc_review": "651;339;292;487", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 1.5, 0.5 ], "technical_novelty_avg": [ 1.5, 0.5 ], "empirical_novelty_avg": [ 1.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 69.0, 18.986837546047525 ], "wc_strength_and_weaknesses_avg": [ 296.25, 110.61052165142338 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.75, 33.37195679009548 ], "wc_summary_review_avg": [ 27.25, 18.9917745353087 ], "wc_review_avg": [ 442.25, 140.36982403636475 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.7071067811865476, "corr_recommendation_correctness": 0.7071067811865476, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MGW9_SBbe3cJ:scholar.google.com/&scioq=ENHANCING+THE+PRIVACY+OF+FEDERATED+LEARNING+THROUGH+DATA+SYNTHESIS&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Indian Institute of Science;University of Southern California;University of Central Florida", "aff_unique_dep": ";;", "aff_unique_url": "https://www.iisc.ac.in;https://www.usc.edu;https://www.ucf.edu", "aff_unique_abbr": "IISc;USC;UCF", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Los Angeles;Bangalore", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "India;United States" }, { "id": "J13x0dErg1", "title": "Rethinking Knowledge Distillation via Cross-Entropy", "track": "main", "status": "Withdraw", "tldr": "A new teacher-based knowledge distillation method and a new teacher-free knowledge distillation method", "abstract": "Knowledge Distillation (KD) has developed extensively and boosted various tasks. The classical KD method adds the KD loss to the original cross-entropy (CE) loss. We try to decompose the KD loss to explore its relation with the CE loss. Surprisingly, we find it can be regarded as a combination of the CE loss and an extra loss which has the identical form as the CE loss. However, we notice the extra loss forces the student's relative probability to learn the teacher's absolute probability. Moreover, the sum of the two probabilities is different, making it hard to optimize. To address this issue, we revise the formulation and propose a distributed loss. In addition, we utilize teachers' target output as the soft target, proposing the soft loss. Combining the soft loss and the distributed loss, we propose a new KD loss (NKD). Furthermore, we smooth students' target output to treat it as the soft target for training without teachers and propose a teacher-free new KD loss (tf-NKD). Our method achieves state-of-the-art performance on CIFAR-100 and ImageNet. For example, with ResNet-34 as the teacher, we boost the ImageNet Top-1 accuracy of ResNet18 from 69.90% to 71.96%. In training without teachers, MobileNet, ResNet-18 and SwinTransformer-Tiny achieve 70.04%, 70.76%, and 81.48%, which are 0.83%, 0.86%, and 0.30% higher than the baseline, respectively.", "keywords": "Knowledge Distillation;Image Classification", "primary_area": "", "supplementary_material": "/attachment/8bc5e67be74ed593a64d33f3d60fda2ef702e2d4.zip", "author": "Zhendong Yang;Zhe Li;Yuan Gong;Tianke Zhang;Shanshan Lao;Chun Yuan;Yu Li", "authorids": "~Zhendong_Yang2;~Zhe_Li11;~Yuan_Gong2;~Tianke_Zhang1;~Shanshan_Lao1;~Chun_Yuan1;~Yu_Li4", "gender": "M;M;F;M;;M;M", "homepage": ";;;https://scholar.google.com/citations?hl=zh-CN&user=nsKcpUEAAAAJ&view_op=list_works&sortby=pubdate;;https://www.sigs.tsinghua.edu.cn/fg3/105064.jhtml;https://yu-li.github.io/", "dblp": "14/1820;;;281/6616.html;;;34/2997-3", "google_scholar": "M9qKrogAAAAJ;mmTJPJ4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com.hk/citations?user=fYdxi2sAAAAJ;j9lwU7kAAAAJ", "orcid": ";;;;;;", "linkedin": ";;https://www.linkedin.cn/injobs/in/%E6%B8%8A-%E9%BE%9A-546219216;;https://www.linkedin.cn/incareer/in/%E7%8F%8A%E7%8F%8A-%E5%8A%B3-87b50723a;;", "or_profile": "~Zhendong_Yang2;~Zhe_Li11;~Yuan_Gong2;~Tianke_Zhang1;~Shanshan_Lao1;~Chun_Yuan1;~Yu_Li4", "aff": " Tsinghua University;AMD;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;International Digital Economy Academy", "aff_domain": "mails.tsinghua.edu.cn;amd.com;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;idea.edu.cn", "position": "MS student;Researcher;MS student;MS student;MS student;Full Professor;Principal Researcher", "bibtex": "@misc{\nyang2023rethinking,\ntitle={Rethinking Knowledge Distillation via Cross-Entropy},\nauthor={Zhendong Yang and Zhe Li and Yuan Gong and Tianke Zhang and Shanshan Lao and Chun Yuan and Yu Li},\nyear={2023},\nurl={https://openreview.net/forum?id=J13x0dErg1}\n}", "github": "", "project": "", "reviewers": "HpFC;QvdC;3xCr;ZkzL", "site": "https://openreview.net/forum?id=J13x0dErg1", "pdf_size": 2494169, "recommendation": "3;3;3;5", "confidence": "4;4;4;4", "correctness": "3;3;4;2", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "63;81;162;107", "wc_strength_and_weaknesses": "161;625;259;117", "wc_clarity_quality_novelty_and_reproducibility": "28;32;110;10", "wc_summary_review": "21;70;81;5", "wc_review": "273;808;612;239", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "235;339;164;192", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 103.25, 37.35220877003126 ], "wc_strength_and_weaknesses_avg": [ 290.5, 199.8468163369134 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.0, 38.43175770115127 ], "wc_summary_review_avg": [ 44.25, 31.995116814914116 ], "wc_review_avg": [ 483.0, 237.64574475466628 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 232.5, 66.48496070541066 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.816496580927726, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2587833531216742078&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;0;0;0;2", "aff_unique_norm": "Tsinghua University;Advanced Micro Devices, Inc.;International Digital Economy Academy", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.amd.com;", "aff_unique_abbr": "THU;AMD;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;United States;" }, { "id": "J1fysSeRdk", "title": "Shape Analysis by Shadow Synthesis", "track": "main", "status": "Withdraw", "tldr": "We propose a method to reconstruct a 3D object from just its shadow by inverting an implicit 3D generative model", "abstract": "3D reconstruction is a fundamental problem in computer vision, and the task is especially challenging when the object to reconstruct is partially or fully occluded. We introduce a method that uses the shadows cast by an unobserved object in order to infer the possible 3D volumes under occlusion. We create a differentiable image formation model that allows us to jointly infer the 3D shape of an object, its pose, and the position of a light source. Since the approach is end-to-end differentiable, we are able to integrate learned priors of object geometry in order to generate realistic 3D shapes of different object categories. Experiments and visualizations show that the method is able to generate multiple possible solutions that are consistent with the observation of the shadow. Our approach works even when the position of the light source and object pose are both unknown. Our approach is also robust to real-world images where ground-truth shadow mask is unknown.", "keywords": "3D Reconstruction;Shadow;Differentiable Rendering;Neural Fields", "primary_area": "", "supplementary_material": "", "author": "Ruoshi Liu;Sachit Menon;Chengzhi Mao;Dennis Park;Simon Stent;Carl Vondrick", "authorids": "~Ruoshi_Liu2;~Sachit_Menon1;~Chengzhi_Mao2;~Dennis_Park1;~Simon_Stent1;~Carl_Vondrick2", "gender": "M;;M;;M;M", "homepage": "https://ruoshiliu.github.io/;http://sachit-menon.github.io/;http://www.cs.columbia.edu/~mcz/;;;http://www.cs.columbia.edu/~vondrick/", "dblp": "283/4797;220/3331;;92/8610;146/2461;26/8610", "google_scholar": "suAawHYAAAAJ;https://scholar.google.com/citations?pli=1;pTTEiHUAAAAJ;;f3aij5UAAAAJ;3MzhkFIAAAAJ", "orcid": ";;;;;", "linkedin": "ruoshi-liu-a5046aa0/;;;;;", "or_profile": "~Ruoshi_Liu2;~Sachit_Menon1;~Chengzhi_Mao2;~Dennis_Park1;~Simon_Stent1;~Carl_Vondrick2", "aff": "Columbia University;Meta Facebook;Columbia University;Toyota Research Institute;Woven Planet;Columbia University", "aff_domain": "columbia.edu;meta.com;columbia.edu;tri.global;woven-planet.global;columbia.edu", "position": "PhD student;Intern;PhD student;Sr. Research Scientist;Research Manager;Assistant Professor", "bibtex": "@misc{\nliu2023shape,\ntitle={Shape Analysis by Shadow Synthesis},\nauthor={Ruoshi Liu and Sachit Menon and Chengzhi Mao and Dennis Park and Simon Stent and Carl Vondrick},\nyear={2023},\nurl={https://openreview.net/forum?id=J1fysSeRdk}\n}", "github": "", "project": "", "reviewers": "7VBb;6FYg;7ZFh", "site": "https://openreview.net/forum?id=J1fysSeRdk", "pdf_size": 7268394, "recommendation": "3;5;6", "confidence": "3;4;4", "correctness": "2;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;0", "wc_summary_paper": "175;86;55", "wc_strength_and_weaknesses": "170;131;150", "wc_clarity_quality_novelty_and_reproducibility": "138;55;64", "wc_summary_review": "130;26;8", "wc_review": "613;298;277", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 105.33333333333333, 50.86146762421321 ], "wc_strength_and_weaknesses_avg": [ 150.33333333333334, 15.92342788332825 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 85.66666666666667, 37.1872140511882 ], "wc_summary_review_avg": [ 54.666666666666664, 53.773186213535425 ], "wc_review_avg": [ 396.0, 153.68148880070103 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.9449111825230683, "corr_recommendation_correctness": 0.9449111825230683, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7gc_npQGpwEJ:scholar.google.com/&scioq=Shape+Analysis+by+Shadow+Synthesis&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;3;0", "aff_unique_norm": "Columbia University;Meta;Toyota Research Institute;Woven Planet", "aff_unique_dep": ";Meta Platforms, Inc.;;", "aff_unique_url": "https://www.columbia.edu;https://meta.com;https://www.tri.global;https://www.woven-planet.com", "aff_unique_abbr": "Columbia;Meta;TRI;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "United States;Japan" }, { "title": "Anti-Symmetric DGN: a stable architecture for Deep Graph Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11227", "id": "J3Y7cgZOOS", "poster": "/media/PosterPDFs/ICLR%202023/11227.png?t=1681370324.3135233", "openreview": "https://openreview.net/forum?id=J3Y7cgZOOS", "slides": "https://iclr.cc/virtual/2023/poster/11227", "video": "https://iclr.cc/virtual/2023/poster/11227", "author_site": "Alessio Gravina, Davide Bacciu, Claudio Gallicchio", "tldr": "", "abstract": "Deep Graph Networks (DGNs) currently dominate the research landscape of learning from graphs, due to their efficiency and ability to implement an adaptive message-passing scheme between the nodes. However, DGNs are typically limited in their ability to propagate and preserve long-term dependencies between nodes, i.e., they suffer from the over-squashing phenomena. As a result, \nwe can expect them to under-perform, since different problems require to capture interactions at different (and possibly large) radii in order to be effectively solved. In this work, we present Anti-Symmetric Deep Graph Networks (A-DGNs), a framework for stable and non-dissipative DGN design, conceived through the lens of ordinary differential equations. We give theoretical proof that our method is stable and non-dissipative, leading to two key results: long-range information between nodes is preserved, and no gradient vanishing or explosion occurs in training. We empirically validate the proposed approach on several graph benchmarks, showing that A-DGN yields to improved performance and enables to learn effectively even when dozens of layers are used.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Alessio Gravina;Davide Bacciu;Claudio Gallicchio", "authorids": "~Alessio_Gravina1;~Davide_Bacciu1;~Claudio_Gallicchio1", "gender": ";M;M", "homepage": "http://pages.di.unipi.it/gravina/;http://pages.di.unipi.it/bacciu/;https://sites.google.com/site/cgallicch/", "dblp": ";07/6626;41/9473", "google_scholar": "oAzxkbYAAAAJ;https://scholar.google.it/citations?user=1d5n2WkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-5526-2479;0000-0001-5213-2468;", "linkedin": "alessio-gravina/;bacciu/;claudio-gallicchio-05a47038/", "or_profile": "~Alessio_Gravina1;~Davide_Bacciu1;~Claudio_Gallicchio1", "aff": "University of Pisa;University of Pisa;University of Pisa", "aff_domain": "unipi.it;unipi.it;unipi.it", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ngravina2023antisymmetric,\ntitle={Anti-Symmetric {DGN}: a stable architecture for Deep Graph Networks},\nauthor={Alessio Gravina and Davide Bacciu and Claudio Gallicchio},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=J3Y7cgZOOS}\n}", "github": "", "project": "", "reviewers": "Cm1J;Hf3x;fEsH;29BN", "pdf_size": 1089066, "recommendation": "5;5;6;8", "confidence": "4;4;3;4", "correctness": "2;4;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "71;61;80;69", "wc_strength_and_weaknesses": "130;377;81;189", "wc_clarity_quality_novelty_and_reproducibility": "322;55;301;142", "wc_summary_review": "27;23;60;36", "wc_review": "550;516;522;436", "wc_reply_reviewers": "438;616;505;0", "wc_reply_authors": "1401;2207;814;546", "reply_reviewers": "2;4;2;0", "reply_authors": "6;6;3;1", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 70.25, 6.7592529172978875 ], "wc_strength_and_weaknesses_avg": [ 194.25, 112.22605535257844 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 205.0, 111.10130512284724 ], "wc_summary_review_avg": [ 36.5, 14.361406616345072 ], "wc_review_avg": [ 506.0, 42.40283009422838 ], "wc_reply_reviewers_avg": [ 389.75, 233.82939828002807 ], "wc_reply_authors_avg": [ 1242.0, 637.2020872533298 ], "reply_reviewers_avg": [ 2.0, 1.4142135623730951 ], "reply_authors_avg": [ 4.0, 2.1213203435596424 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.4923659639173309, "gs_citation": 74, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3715279376268257006&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=J3Y7cgZOOS", "email": "unipi.it;unipi.it;unipi.it", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Pisa", "aff_unique_dep": "", "aff_unique_url": "https://www.unipi.it", "aff_unique_abbr": "UNIP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Italy" }, { "id": "J3_WcZW3oV1", "title": "Dataset Projection: Finding Target-aligned Subsets of Auxiliary Data", "track": "main", "status": "Withdraw", "tldr": "We project datasets to find subsets of auxiliary datasets that are most aligned with a target dataset.", "abstract": "To obtain more training data for a target task, one can draw upon related but distinct datasets, or auxiliary datasets. We put forth the problem of dataset projection---finding subsets of auxiliary datasets that are most aligned with a target dataset. These so-called projected datasets can be used as training data to improve performance on target tasks while being substantially smaller than the auxiliary dataset. We then develop a framework for solving such dataset projection problems and demonstrate in a variety of vision and language settings that the resulting projected datasets, when compared to the original auxiliary datasets, (1) are closer approximations of target datasets and (2) can be used to improve test performance or provide analysis for the target datasets. ", "keywords": "datasets;auxiliary data;dataset projection", "primary_area": "", "supplementary_material": "/attachment/7d6e517f313a9530a8581574d76d003eb1eee171.zip", "author": "Eric Wong;Kai Yuanqing Xiao;Aleksander Madry", "authorids": "~Eric_Wong1;~Kai_Yuanqing_Xiao1;~Aleksander_Madry1", "gender": "M;;M", "homepage": "http://riceric22.github.io/;https://kaixiao.github.io/;https://people.csail.mit.edu/madry/", "dblp": "64/1811-1.html;;67/2454", "google_scholar": "pWnTMRkAAAAJ;xblGvQgAAAAJ;SupjsEUAAAAJ", "orcid": ";0000-0002-9496-3072;", "linkedin": ";kaixiao/;", "or_profile": "~Eric_Wong1;~Kai_Yuanqing_Xiao1;~Aleksander_Madry1", "aff": "University of Pennsylvania;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "upenn.edu;mit.edu;mit.edu", "position": "Assistant Professor;PhD student;Professor", "bibtex": "@misc{\nwong2023dataset,\ntitle={Dataset Projection: Finding Target-aligned Subsets of Auxiliary Data},\nauthor={Eric Wong and Kai Yuanqing Xiao and Aleksander Madry},\nyear={2023},\nurl={https://openreview.net/forum?id=J3_WcZW3oV1}\n}", "github": "", "project": "", "reviewers": "6rWN;kxek;PTjh", "site": "https://openreview.net/forum?id=J3_WcZW3oV1", "pdf_size": 11045142, "recommendation": "3;3;5", "confidence": "5;4;4", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "47;71;123", "wc_strength_and_weaknesses": "582;447;200", "wc_clarity_quality_novelty_and_reproducibility": "22;66;22", "wc_summary_review": "80;35;73", "wc_review": "731;619;418", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 80.33333333333333, 31.72100600898752 ], "wc_strength_and_weaknesses_avg": [ 409.6666666666667, 158.1693888069229 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.666666666666664, 20.741798914805393 ], "wc_summary_review_avg": [ 62.666666666666664, 19.770910168449223 ], "wc_review_avg": [ 589.3333333333334, 129.4921705054874 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12095389600328976160&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Pennsylvania;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.upenn.edu;https://web.mit.edu", "aff_unique_abbr": "UPenn;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "J41IW8Z7mE", "title": "Membership Inference Attacks Against Text-to-image Generation Models", "track": "main", "status": "Withdraw", "tldr": "We perform the first privacy analysis of text-to-image generation models through the lens of membership inference.", "abstract": "Text-to-image generation models have recently attracted unprecedented attention as they unlatch imaginative applications in all areas of life. However, developing such models requires huge amounts of data that might contain privacy-sensitive information, e.g., face identity. While privacy risks have been extensively demonstrated in the image classification and GAN generation domains, privacy risks in the text-to-image generation domain are largely unexplored. In this paper, we perform the first privacy analysis of text-to-image generation models through the lens of membership inference. Specifically, we propose three key intuitions about membership information and design four attack methodologies accordingly. We conduct comprehensive evaluations on two mainstream text-to-image generation models including sequence-to-sequence modeling and diffusion-based modeling. The empirical results show that all of the proposed attacks can achieve significant performance, in some cases even close to an accuracy of 1, and thus the corresponding risk is much more severe than that shown by existing membership inference attacks. We further conduct an extensive ablation study to analyze the factors that may affect the attack performance, which can guide developers and researchers to be alert to vulnerabilities in text-to-image generation models. All these findings indicate that our proposed attacks pose a realistic privacy threat to the text-to-image generation models.", "keywords": "Text-to-image Generation Model;Membership inference attacks", "primary_area": "", "supplementary_material": "", "author": "Yixin Wu;Ning Yu;Zheng Li;Michael Backes;Yang Zhang", "authorids": "~Yixin_Wu1;~Ning_Yu2;~Zheng_Li17;director@cispa.de;~Yang_Zhang15", "gender": "F;;M;;M", "homepage": "https://yxoh.github.io/;;https://zhenglisec.github.io/;;https://yangzhangalmo.github.io/", "dblp": "47/2819-1;;10/1143-23;;06/6785-16", "google_scholar": "NNnpxzAAAAAJ;;xEAaaGsAAAAJ;;Xeb2888AAAAJ", "orcid": "0000-0002-3000-9423;;0000-0002-4466-7523;;0000-0003-3612-7348", "linkedin": ";;;;", "or_profile": "~Yixin_Wu1;~Ning_Yu2;~Zheng_Li17;director@cispa.de;~Yang_Zhang15", "aff": "CISPA Helmholtz Center for Information Security;;CISPA Helmholtz Center for Information Security;;CISPA Helmholtz Center for Information Security", "aff_domain": "cispa.de;;cispa.de;;cispa.de", "position": "PhD student;;PhD student;;Assistant Professor", "bibtex": "@misc{\nwu2023membership,\ntitle={Membership Inference Attacks Against Text-to-image Generation Models},\nauthor={Yixin Wu and Ning Yu and Zheng Li and Michael Backes and Yang Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=J41IW8Z7mE}\n}", "github": "", "project": "", "reviewers": "2Evr;iKhU;UvRe;ERvw", "site": "https://openreview.net/forum?id=J41IW8Z7mE", "pdf_size": 2140102, "recommendation": "3;3;5;6", "confidence": "3;3;3;3", "correctness": "2;2;2;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "44;43;62;45", "wc_strength_and_weaknesses": "399;42;331;177", "wc_clarity_quality_novelty_and_reproducibility": "61;435;31;47", "wc_summary_review": "27;34;18;35", "wc_review": "531;554;442;304", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 48.5, 7.826237921249264 ], "wc_strength_and_weaknesses_avg": [ 237.25, 138.4781119888627 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 143.5, 168.63199577778826 ], "wc_summary_review_avg": [ 28.5, 6.800735254367722 ], "wc_review_avg": [ 457.75, 98.1284235071572 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.7777777777777777, "gs_citation": 68, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1774053045308276137&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "CISPA Helmholtz Center for Information Security", "aff_unique_dep": "", "aff_unique_url": "https://www.cispa.de/", "aff_unique_abbr": "CISPA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Fooling SHAP with Stealthily Biased Sampling", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11807", "id": "J4mJjotSauh", "poster": "/media/PosterPDFs/ICLR%202023/11807.png?t=1680794620.8997662", "openreview": "https://openreview.net/forum?id=J4mJjotSauh", "slides": "https://iclr.cc/virtual/2023/poster/11807", "video": "https://iclr.cc/virtual/2023/poster/11807", "author_site": "Gabriel Laberge, Ulrich A\u00efvodji, Satoshi Hara, Mario Marchand, Foutse Khomh", "tldr": "We show that Shapley-based explanation techniques commonly used in ML can be manipulated to show false compliance (e.g., during an algorithmic fairness audit) and that this type of attack can be hard to detect.", "abstract": "SHAP explanations aim at identifying which features contribute the most to the difference in model prediction at a specific input versus \na background distribution. Recent studies have shown that they can be manipulated by malicious adversaries to produce arbitrary desired \nexplanations. However, existing attacks focus solely on altering the black-box model itself. In this paper, we propose a complementary family \nof attacks that leave the model intact and manipulate SHAP explanations using stealthily biased sampling of the data points used to approximate expectations w.r.t the background distribution. In the context of fairness audit, we show that our attack can reduce the importance of a sensitive feature when explaining the difference in outcomes between groups while remaining undetected. More precisely, experiments performed on real-world datasets showed that our attack could yield up to a 90\\% relative decrease in amplitude of the sensitive feature attribution. These results highlight the manipulability of SHAP explanations and encourage auditors to treat them with skepticism.", "keywords": "Explainability;Robustness;SHAP;Stealthily Sampling", "primary_area": "", "supplementary_material": "/attachment/3e4202cf3ff2265ab6d657635b51f56284c2c6e6.zip", "author": "gabriel laberge;Ulrich A\u00efvodji;Satoshi Hara;Mario Marchand;Foutse Khomh", "authorids": "~gabriel_laberge1;~Ulrich_A\u00efvodji1;~Satoshi_Hara1;~Mario_Marchand1;~Foutse_Khomh1", "gender": "M;M;M;;M", "homepage": "https://gablabc.github.io/;https://aivodji.github.io/;https://sites.google.com/site/sato9hara/;http://www2.ift.ulaval.ca/~mmarchand/;", "dblp": "248/8241;217/4301;08/778-1;01/4590;", "google_scholar": "ixX6rmwAAAAJ;47kuuqIAAAAJ;https://scholar.google.co.jp/citations?user=ELhfkiMAAAAJ;https://scholar.google.ca/citations?user=M792u2sAAAAJ;https://scholar.google.ca/citations?user=YYXb3KIAAAAJ", "orcid": ";0000-0003-4247-1444;;;", "linkedin": "gabriel-laberge/;umaivodji/;;;", "or_profile": "~gabriel_laberge1;~Ulrich_A\u00efvodji1;~Satoshi_Hara1;~Mario_Marchand1;~Foutse_Khomh1", "aff": "\u00c9cole Polytechnique de Montr\u00e9al;\u00c9cole de technologie sup\u00e9rieure, Universit\u00e9 du Qu\u00e9bec;Osaka University;Laval university;\u00c9cole Polytechnique de Montr\u00e9al, Universit\u00e9 de Montr\u00e9al", "aff_domain": "polymtl.ca;etsmtl.ca;osaka-u.ac.jp;ulaval.ca;polymtl.ca", "position": "PhD student;Assistant Professor;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nlaberge2023fooling,\ntitle={Fooling {SHAP} with Stealthily Biased Sampling},\nauthor={gabriel laberge and Ulrich A{\\\"\\i}vodji and Satoshi Hara and Mario Marchand and Foutse Khomh},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=J4mJjotSauh}\n}", "github": "", "project": "", "reviewers": "fFZy;L3wE;yVbY", "pdf_size": 1086528, "recommendation": "6;6;8", "confidence": "4;4;4", "correctness": "4;4;3", "technical_novelty": "2;2;4", "empirical_novelty": "2;2;4", "wc_summary_paper": "90;238;110", "wc_strength_and_weaknesses": "70;163;196", "wc_clarity_quality_novelty_and_reproducibility": "428;91;276", "wc_summary_review": "40;28;16", "wc_review": "628;520;598", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_paper_avg": [ 146.0, 65.56421788343599 ], "wc_strength_and_weaknesses_avg": [ 143.0, 53.34791467339656 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 265.0, 137.79937106774713 ], "wc_summary_review_avg": [ 28.0, 9.797958971132712 ], "wc_review_avg": [ 582.0, 45.51922670696417 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.9999999999999998, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12829599577065546796&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=J4mJjotSauh", "email": "polymtl.ca;etsmtl.ca;osaka-u.ac.jp;ulaval.ca;polymtl.ca", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "\u00c9cole Polytechnique de Montr\u00e9al;Universit\u00e9 du Qu\u00e9bec;Osaka University;Laval University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.polymtl.ca;https://www.etsmtl.ca;https://www.osaka-u.ac.jp;https://www.laval.ca", "aff_unique_abbr": "Polytechnique Montr\u00e9al;ETS;Osaka U;Laval", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Montr\u00e9al;\u00c9cole de technologie sup\u00e9rieure;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Canada;Japan" }, { "title": "Sparsity May Cry: Let Us Fail (Current) Sparse Neural Networks Together!", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11532", "id": "J6F3lLg4Kdp", "poster": "/media/PosterPDFs/ICLR%202023/11532.png?t=1682470337.6564953", "openreview": "https://openreview.net/forum?id=J6F3lLg4Kdp", "slides": "https://iclr.cc/virtual/2023/poster/11532", "video": "https://iclr.cc/virtual/2023/poster/11532", "author_site": "Shiwei Liu, Tianlong Chen, Zhenyu Zhang, Xuxi Chen, Tianjin Huang, AJAY JAISWAL, Zhangyang Wang", "tldr": "In this work, we assemble a large-scale, difficult and diverse benchmark for sparse neural networks, on which current SOTA sparse networks are actually prone to significant performance degradation, sometimes even at trivial sparsity levels, e.g., 5%.", "abstract": "Sparse Neural Networks (SNNs) have received voluminous attention predominantly due to growing computational and memory footprints of consistently exploding parameter count in large-scale models. Similar to their dense counterparts, recent SNNs generalize just as well and are equipped with numerous favorable benefits (e.g., low complexity, high scalability, and robustness), sometimes even better than the original dense networks. As research effort is focused on developing increasingly sophisticated sparse algorithms, it is startling that a comprehensive benchmark to evaluate the effectiveness of these algorithms has been highly overlooked. In absence of a carefully crafted evaluation benchmark, most if not all, sparse algorithms are evaluated against fairly simple and naive tasks (eg. CIFAR-10/100, ImageNet, GLUE, etc.), which can potentially camouflage many advantages as well unexpected predicaments of SNNs. In pursuit of a more general evaluation and unveiling the true potential of sparse algorithms, we introduce \u201cSparsity May Cry\u201d Benchmark (SMC-Bench), a collection of carefully-curated 4 diverse tasks with 10 datasets, that accounts for capturing a wide range of domain-specific and sophisticated knowledge. Our systemic evaluation of the most representative sparse algorithms reveals an important obscured observation: the state-of-the-art magnitude- and/or gradient-based sparse algorithms seemingly fail to perform on SMC-Bench when applied out-of-the-box, sometimes at significantly trivial sparsity as low as 5%. The observations seek the immediate attention of the sparsity research community to reconsider the highly proclaimed benefits of SNNs. We further conduct a thorough investigation into the reasons for the failure of common SNNs. Our analysis points out that such failure is intimately related to the \u201clazy regime\u201d of large model training, which hints us with stronger pruning recipes that alleviate the failure on SMC-Bench (though still more or less suffering). By incorporating these well-thought and diverse tasks, SMC-Bench is designed to favor and encourage the development of more scalable and generalizable sparse algorithms. We open-source SMC-Bench to assist researchers in building next-generation sparse algorithms that scale and generalize: https://github.com/VITA-Group/SMC-Bench.", "keywords": "Sparse Neural Networks;Benchmark;Sparsity;Neural Network Pruning", "primary_area": "", "supplementary_material": "", "author": "Shiwei Liu;Tianlong Chen;Zhenyu Zhang;Xuxi Chen;Tianjin Huang;AJAY KUMAR JAISWAL;Zhangyang Wang", "authorids": "~Shiwei_Liu2;~Tianlong_Chen1;~Zhenyu_Zhang4;~Xuxi_Chen1;~Tianjin_Huang1;~AJAY_KUMAR_JAISWAL1;~Zhangyang_Wang1", "gender": "M;M;M;Unspecified;M;M;M", "homepage": "https://shiweiliuiiiiiii.github.io/;https://tianlong-chen.github.io;https://zhenyu.gallery;;https://research.tue.nl/nl/persons/tianjin-huang;https://ajay1994.github.io/;https://vita-group.github.io", "dblp": "234/8697-3.html;;01/1844-15;267/9662;189/3972;30/9707;119/4026", "google_scholar": "73IbXtsAAAAJ;LE3ctn0AAAAJ;ZLyJRxoAAAAJ;afsDlKYAAAAJ;https://scholar.google.co.uk/citations?user=yFLmPsoAAAAJ;I783HxYAAAAJ;pxFyKAIAAAAJ", "orcid": ";0000-0001-7774-8197;;;;;", "linkedin": ";tianlong-chen-783862167/;zhenyu-allen-zhang-a9b1391a3/;;;;", "or_profile": "~Shiwei_Liu2;~Tianlong_Chen1;~Zhenyu_Zhang4;~Xuxi_Chen1;~Tianjin_Huang1;~AJAY_KUMAR_JAISWAL1;~Zhangyang_Wang1", "aff": "University of Texas at Austin;University of Texas, Austin;University of Texas at Austin;University of Texas at Austin;;Amazon;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;utexas.edu;;amazon.com;utexas.edu", "position": "Postdoc;PhD student;PhD student;PhD student;;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nliu2023sparsity,\ntitle={Sparsity May Cry: Let Us Fail (Current) Sparse Neural Networks Together!},\nauthor={Shiwei Liu and Tianlong Chen and Zhenyu Zhang and Xuxi Chen and Tianjin Huang and AJAY KUMAR JAISWAL and Zhangyang Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=J6F3lLg4Kdp}\n}", "github": "", "project": "", "reviewers": "11zQ;yFvQ;wzLQ;JUeu", "pdf_size": 778570, "recommendation": "6;8;8;8", "confidence": "5;4;4;4", "correctness": "3;3;3;4", "technical_novelty": "1;3;4;2", "empirical_novelty": "3;4;4;3", "wc_summary_paper": "90;39;41;94", "wc_strength_and_weaknesses": "399;199;418;109", "wc_clarity_quality_novelty_and_reproducibility": "7;18;20;47", "wc_summary_review": "28;20;17;3", "wc_review": "524;276;496;253", "wc_reply_reviewers": "374;11;0;0", "wc_reply_authors": "2427;1081;972;350", "reply_reviewers": "4;1;0;0", "reply_authors": "9;3;2;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 1.118033988749895 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 66.0, 26.04803255526221 ], "wc_strength_and_weaknesses_avg": [ 281.25, 131.3399691639982 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 23.0, 14.713938969562161 ], "wc_summary_review_avg": [ 17.0, 9.027735042633894 ], "wc_review_avg": [ 387.25, 123.41672293494103 ], "wc_reply_reviewers_avg": [ 96.25, 160.42190467638764 ], "wc_reply_authors_avg": [ 1207.5, 757.2894096710979 ], "reply_reviewers_avg": [ 1.25, 1.6393596310755 ], "reply_authors_avg": [ 3.75, 3.112474899497183 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17982370768467625&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=J6F3lLg4Kdp", "email": "utexas.edu;utexas.edu;utexas.edu;utexas.edu;;amazon.com;utexas.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "University of Texas at Austin;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.utexas.edu;https://www.amazon.com", "aff_unique_abbr": "UT Austin;Amazon", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "J7CTp-jNyJ", "title": "SYNG4ME: Model Evaluation using Synthetic Test Data", "track": "main", "status": "Reject", "tldr": "Evaluating ML supervised models by generating synthetic test data", "abstract": "Model evaluation is a crucial step in ensuring reliable machine learning systems. Currently, predictive models are evaluated on held-out test data, quantifying aggregate model performance. Limitations of available test data make it challenging to evaluate model performance on small subgroups or when the environment changes. Synthetic test data provides a unique opportunity to address this challenge; instead of evaluating predictive models on real data, we propose to use synthetic data. This brings two advantages. First, supplementing and increasing the amount of evaluation data can lower the variance of model performance estimates compared to evaluation on the original test data. This is especially true for local performance evaluation in low-density regions, e.g. minority or intersectional groups. Second, generative models can be conditioned as to induce a shift in the synthetic data distribution, allowing us to evaluate how supervised models could perform in different target settings. In this work, we propose SYNG4ME: an automated suite of synthetic data generators for model evaluation. By generating smart synthetic data sets, data practitioners have a new tool for exploring how supervised models may perform on subgroups of the data, and how robust methods are to distributional shifts. We show experimentally that SYNG4ME achieves more accurate performance estimates compared to using the test data alone.", "keywords": "Model Evaluation;Synthetic data", "primary_area": "", "supplementary_material": "/attachment/553c5a2d2e7d13df2f135c350e6661a0d228fba2.zip", "author": "Boris van Breugel;Nabeel Seedat;Fergus Imrie;Mihaela van der Schaar", "authorids": "~Boris_van_Breugel2;~Nabeel_Seedat1;~Fergus_Imrie1;~Mihaela_van_der_Schaar2", "gender": ";;;F", "homepage": ";;;https://www.vanderschaar-lab.com", "dblp": "284/0835;227/8368;281/4466;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;4qCGgpsAAAAJ;DZ3S--MAAAAJ", "orcid": ";;0000-0002-6241-0123;", "linkedin": ";nabeel-seedat/;;", "or_profile": "~Boris_van_Breugel2;~Nabeel_Seedat1;~Fergus_Imrie1;~Mihaela_van_der_Schaar2", "aff": "University of Cambridge;University of Cambridge;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "cam.ac.uk;cam.ac.uk;ucla.edu;ucla.edu", "position": "PhD student;PhD student;Postdoc;Full Professor", "bibtex": "@misc{\nbreugel2023syngme,\ntitle={{SYNG}4{ME}: Model Evaluation using Synthetic Test Data},\nauthor={Boris van Breugel and Nabeel Seedat and Fergus Imrie and Mihaela van der Schaar},\nyear={2023},\nurl={https://openreview.net/forum?id=J7CTp-jNyJ}\n}", "github": "", "project": "", "reviewers": "DdMu;L3gg;NW99;kuUj", "site": "https://openreview.net/forum?id=J7CTp-jNyJ", "pdf_size": 2417238, "recommendation": "5;5;6;6", "confidence": "4;3;4;2", "correctness": "3;4;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "32;65;115;85", "wc_strength_and_weaknesses": "173;140;75;215", "wc_clarity_quality_novelty_and_reproducibility": "11;64;180;65", "wc_summary_review": "36;28;46;40", "wc_review": "252;297;416;405", "wc_reply_reviewers": "0;0;0;50", "wc_reply_authors": "3729;3211;1411;1457", "reply_reviewers": "0;0;0;1", "reply_authors": "8;9;5;4", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 74.25, 30.194163343268844 ], "wc_strength_and_weaknesses_avg": [ 150.75, 51.17799820235254 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 80.0, 61.72924752497798 ], "wc_summary_review_avg": [ 37.5, 6.5383484153110105 ], "wc_review_avg": [ 342.5, 69.94462095114963 ], "wc_reply_reviewers_avg": [ 12.5, 21.650635094610966 ], "wc_reply_authors_avg": [ 2452.0, 1034.4703959031403 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 6.5, 2.0615528128088303 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.30151134457776363, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:63UfH7kcUVMJ:scholar.google.com/&scioq=SYNG4ME:+Model+Evaluation+using+Synthetic+Test+Data&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of Cambridge;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.ucla.edu", "aff_unique_abbr": "Cambridge;UCLA", "aff_campus_unique_index": "0;0;1;1", "aff_campus_unique": "Cambridge;Los Angeles", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Learning rigid dynamics with face interaction graph networks", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10983", "id": "J7Uh781A05p", "poster": "", "openreview": "https://openreview.net/forum?id=J7Uh781A05p", "slides": "https://iclr.cc/virtual/2023/poster/10983", "video": "https://iclr.cc/virtual/2023/poster/10983", "author_site": "Kelsey Allen, Yulia Rubanova, Tatiana Lopez-Guevara, William Whitney, Alvaro Sanchez Gonzalez, Peter Battaglia, Tobias Pfaff", "tldr": "Face to face, multi-index collisions improve accuracy and efficiency of graph network models for rigid body dynamics", "abstract": "Simulating rigid collisions among arbitrary shapes is notoriously difficult due to complex geometry and the strong non-linearity of the interactions. While graph neural network (GNN)-based models are effective at learning to simulate complex physical dynamics, such as fluids, cloth and articulated bodies, they have been less effective and efficient on rigid-body physics, except with very simple shapes. Existing methods that model collisions through the meshes' nodes are often inaccurate because they struggle when collisions occur on faces far from nodes. Alternative approaches that represent the geometry densely with many particles are prohibitively expensive for complex shapes. Here we introduce the ``Face Interaction Graph Network'' (FIGNet) which extends beyond GNN-based methods, and computes interactions between mesh faces, rather than nodes. Compared to learned node- and particle-based methods, FIGNet is around 4x more accurate in simulating complex shape interactions, while also 8x more computationally efficient on sparse, rigid meshes. Moreover, FIGNet can learn frictional dynamics directly from real-world data, and can be more accurate than analytical solvers given modest amounts of training data. FIGNet represents a key step forward in one of the few remaining physical domains which have seen little competition from learned simulators, and offers allied fields such as robotics, graphics and mechanical design a new tool for simulation and model-based planning.", "keywords": "graph networks;rigid body dynamics;physics", "primary_area": "", "supplementary_material": "/attachment/462f8b4ff1957cd17d5c3be1f255932873d86438.zip", "author": "Kelsey R Allen;Yulia Rubanova;Tatiana Lopez-Guevara;William F Whitney;Alvaro Sanchez-Gonzalez;Peter Battaglia;Tobias Pfaff", "authorids": "~Kelsey_R_Allen1;~Yulia_Rubanova2;~Tatiana_Lopez-Guevara1;~William_F_Whitney1;~Alvaro_Sanchez-Gonzalez1;~Peter_Battaglia1;~Tobias_Pfaff1", "gender": "F;F;;M;M;M;F", "homepage": ";;http://willwhitney.com;;;http://tobiaspfaff.com;https://yuliarubanova.github.io/", "dblp": "153/9528;208/0996;160/8671;222/1889;41/3400;67/7591;222/3085", "google_scholar": "kpcjFekAAAAJ;Op4nexcAAAAJ;aQcYWDMAAAAJ;https://scholar.google.co.uk/citations?user=d1oQ8NcAAAAJ;https://scholar.google.co.uk/citations?user=nQ7Ij30AAAAJ;3oUgDKQAAAAJ;u_HzE9wAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;https://linkedin.com/in/yulia-rubanova-031702100", "or_profile": "~Kelsey_R_Allen1;~Tatiana_Lopez-Guevara1;~William_F_Whitney1;~Alvaro_Sanchez-Gonzalez1;~Peter_Battaglia1;~Tobias_Pfaff1;~Yulia_Rubanova1", "aff": "Google;Google;Google DeepMind;Google DeepMind;Google DeepMind;Deepmind;Google DeepMind", "aff_domain": "deepmind.com;google.com;deepmind.com;google.com;google.com;google.com;deepmind.com", "position": "Research Scientist;Researcher;Researcher;Senior Research Engineer;Researcher;Research scientist;Research Scientist", "bibtex": "@inproceedings{\nallen2023learning,\ntitle={Learning rigid dynamics with face interaction graph networks},\nauthor={Kelsey R Allen and Yulia Rubanova and Tatiana Lopez-Guevara and William F Whitney and Alvaro Sanchez-Gonzalez and Peter Battaglia and Tobias Pfaff},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=J7Uh781A05p}\n}", "github": "", "project": "", "reviewers": "edAB;dbQg;sivF;zfn2", "pdf_size": 27446267, "recommendation": "6;8;10;10", "confidence": "4;4;4;4", "correctness": "3;3;4;4", "technical_novelty": "3;3;4;4", "empirical_novelty": "3;2;4;4", "wc_summary_paper": "53;72;36;168", "wc_strength_and_weaknesses": "54;166;445;261", "wc_clarity_quality_novelty_and_reproducibility": "12;32;37;100", "wc_summary_review": "11;33;130;35", "wc_review": "130;303;648;564", "wc_reply_reviewers": "0;26;199;65", "wc_reply_authors": "179;764;1496;253", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;2;1", "recommendation_avg": [ 8.5, 1.6583123951777 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 82.25, 51.119345653089105 ], "wc_strength_and_weaknesses_avg": [ 231.5, 143.3954322842956 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.25, 32.96494350063413 ], "wc_summary_review_avg": [ 52.25, 45.86597322634722 ], "wc_review_avg": [ 411.25, 206.27817989307545 ], "wc_reply_reviewers_avg": [ 72.5, 76.61103053738411 ], "wc_reply_authors_avg": [ 673.0, 525.8436079292017 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15791481098405653146&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=J7Uh781A05p", "email": "deepmind.com;google.com;deepmind.com;google.com;google.com;google.com;deepmind.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Google;DeepMind", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://deepmind.com", "aff_unique_abbr": "Google;DeepMind", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1;1;1;1;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "J923QzIz8Sh", "title": "Intra-Instance VICReg: Bag of Self-Supervised Image Patch Embedding Explains the Performance", "track": "main", "status": "Withdraw", "tldr": "We show that Siamese-network-based SSL methods essentially learn a distributed representation of image patches and aggregate them to form the instance representation.", "abstract": "Recently, self-supervised learning (SSL) has achieved tremendous empirical advancements in learning image representation. However, our understanding and knowledge of the representation are still limited. This work shows that the success of the SOTA Siamese-network-based SSL approaches is primarily based on learning a distributed representation of image patches. In particular, we show that when we learn a representation only for fixed-scale image patches and aggregate different patch representations for an image (instance), it can achieve on par or even better results than the baseline methods on several benchmarks. Further, we show that the patch representation aggregation can also improve various SOTA baseline methods by a large margin. We also establish a formal connection between the Siamese-network-based SSL objective and the image patches co-occurrence statistics modeling, which supplements the prevailing invariance perspective. By visualizing the nearest neighbors of different image patches in the embedding space and projection space, we show that while the projection has more invariance, the embedding space tends to preserve more equivariance and locality. While it is important to push the SOTA engineering frontier, we show that it is also a promising direction to simplify the SOTA methods to build better understanding.", "keywords": "self-supervised learning;explainable machine learning;co-occurrence statistics modeling", "primary_area": "", "supplementary_material": "", "author": "Yubei Chen;Adrien Bardes;ZENGYI LI;Yann LeCun", "authorids": "~Yubei_Chen1;~Adrien_Bardes1;~ZENGYI_LI1;~Yann_LeCun1", "gender": "M;M;M;M", "homepage": "https://redwood.berkeley.edu/people/yubei-chen/;;;http://yann.lecun.com", "dblp": "30/10064;292/3848.html;;l/YannLeCun", "google_scholar": "WeyLqFUAAAAJ;SvRU8F8AAAAJ;rstPxpcAAAAJ;WLN3QrAAAAAJ", "orcid": ";;;", "linkedin": "yubei-chen-05998a39/;adrien-bardes-48a080129/;;", "or_profile": "~Yubei_Chen1;~Adrien_Bardes1;~ZENGYI_LI1;~Yann_LeCun1", "aff": "New York University;INRIA;;New York University", "aff_domain": "nyu.edu;inria.fr;;nyu.edu", "position": "Postdoctoral Associate;PhD student;;Full Professor", "bibtex": "@misc{\nchen2023intrainstance,\ntitle={Intra-Instance {VICR}eg: Bag of Self-Supervised Image Patch Embedding Explains the Performance},\nauthor={Yubei Chen and Adrien Bardes and ZENGYI LI and Yann LeCun},\nyear={2023},\nurl={https://openreview.net/forum?id=J923QzIz8Sh}\n}", "github": "", "project": "", "reviewers": "3x3W;Jrgg;ZprC", "site": "https://openreview.net/forum?id=J923QzIz8Sh", "pdf_size": 16371920, "recommendation": "3;5;5", "confidence": "4;3;4", "correctness": "2;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;2", "wc_summary_paper": "53;105;51", "wc_strength_and_weaknesses": "483;188;421", "wc_clarity_quality_novelty_and_reproducibility": "161;57;14", "wc_summary_review": "180;93;34", "wc_review": "877;443;520", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 69.66666666666667, 24.997777679003566 ], "wc_strength_and_weaknesses_avg": [ 364.0, 126.99868765726151 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 77.33333333333333, 61.71079502179681 ], "wc_summary_review_avg": [ 102.33333333333333, 59.9685102551516 ], "wc_review_avg": [ 613.3333333333334, 189.07200274557368 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gj40DBQdZaQJ:scholar.google.com/&scioq=Intra-Instance+VICReg:+Bag+of+Self-Supervised+Image+Patch+Embedding+Explains+the+Performance&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "New York University;INRIA", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://www.inria.fr", "aff_unique_abbr": "NYU;INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;France" }, { "id": "J9Z3MlnPU_f", "title": "Compound Tokens: Channel Fusion for Vision-Language Representation Learning", "track": "main", "status": "Withdraw", "tldr": "We provide a new multi-modal fusion method that concatenates tokens along the channel dimension. ", "abstract": "We present an effective method for fusing visual-and-language representations for several question answering tasks including visual question answering and visual entailment.\nIn contrast to prior works that concatenate unimodal representations or use only cross-attention, we compose multimodal representations via channel fusion. By fusing on the channels, the model is able to more effectively align the tokens compared to standard methods. These multimodal representations, which we call compound tokens are generated with cross-attention transformer layers. First, vision tokens are used as queries to retrieve compatible text tokens through cross-attention. We then chain the vision tokens and the queried text tokens along the channel dimension. We call the resulting representations compound tokens. A second group of compound tokens are generated using an analogous process where the text tokens serve as queries to the cross-attention layer. We concatenate all the compound tokens for further processing with multimodal encoder. We demonstrate the effectiveness of compound tokens using an encoder-decoder vision-language model trained end-to-end in the open-vocabulary setting. Compound Tokens achieve highly competitive performance across a range of question answering tasks including GQA, VQA2.0, and SNLI-VE.\nWe plan to make the code public. ", "keywords": "question answering tasks;multi-modal fusion;vision-language model;representation learning", "primary_area": "", "supplementary_material": "", "author": "Maxwell Mbabilla Aladago;AJ Piergiovanni", "authorids": "~Maxwell_Mbabilla_Aladago1;~AJ_Piergiovanni1", "gender": "M;", "homepage": "https://www.linkedin.com/in/maladago/;http://homes.sice.indiana.edu/ajpiergi/", "dblp": "283/5383;175/9876", "google_scholar": "ekf53bkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": "maladago/;", "or_profile": "~Maxwell_Mbabilla_Aladago1;~AJ_Piergiovanni1", "aff": "Google;Google", "aff_domain": "google.com;google.com", "position": "Intern;Research Scientist", "bibtex": "@misc{\naladago2023compound,\ntitle={Compound Tokens: Channel Fusion for Vision-Language Representation Learning},\nauthor={Maxwell Mbabilla Aladago and AJ Piergiovanni},\nyear={2023},\nurl={https://openreview.net/forum?id=J9Z3MlnPU_f}\n}", "github": "", "project": "", "reviewers": "FCec;8gZ6;zQ9A;g8Y7", "site": "https://openreview.net/forum?id=J9Z3MlnPU_f", "pdf_size": 1424720, "recommendation": "3;3;3;5", "confidence": "2;4;4;4", "correctness": "4;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;0", "wc_summary_paper": "71;29;56;96", "wc_strength_and_weaknesses": "93;121;13;90", "wc_clarity_quality_novelty_and_reproducibility": "105;25;113;26", "wc_summary_review": "58;23;24;63", "wc_review": "327;198;206;275", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 63.0, 24.279621084357967 ], "wc_strength_and_weaknesses_avg": [ 79.25, 40.114679358060435 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 67.25, 41.8471922594575 ], "wc_summary_review_avg": [ 42.0, 18.587630295441105 ], "wc_review_avg": [ 251.5, 52.87958017987662 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11398922098695252195&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "J9p5s5jwna", "title": "Conditional Policy Similarity: An Overlooked Factor in Zero-Shot Coordination", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Multi-Agent Reinforcement Learning (MARL) in cooperative tasks usually follows the self-play setting, where agents are trained by playing with a fixed group of agents. However, in the face of Zero-Shot Coordination (ZSC), where an agent must coordinate with unseen partners, self-play agents may fail. ZSC performance is traditionally measured by cross-play, where individually trained agents are required to play with each other. However, cross-play score varies a lot for different combinations of agents, making it not reliable enough to only use a model's averaged cross-play score with several models to evaluate its ZSC performance. We think the reason for this phenomenon may be that cross-play score is highly related to the similarity between an agent's training partner and ZSC partner, and this similarity varies widely. Therefore, we define the Conditional Policy Similarity between an agent's Training partner and Testing partner (CPSTT) and conduct abundant experiments to confirm a strong linear correlation between CPSTT and cross-play score. Based on it, we propose a new criterion to evaluate ZSC performance: a model is considered better if it has higher cross-play score compared to another model given the same CPSTT. Furthermore, we put forward a Similarity-Based Robust Training (SBRT) scheme that improves agents' ZSC performance by disturbing their partners' actions during training according to a pre-defined CPSTT value. We apply SBRT to four MARL frameworks and their ZSC performance is improved whether measured by the traditional criterion or ours.", "keywords": "multi-agent reinforcement learning;zero-shot coordination;conditional policy similarity", "primary_area": "", "supplementary_material": "", "author": "Lebin Yu;Yunbo Qiu;quanming yao;Xu-Dong Zhang;Jian Wang", "authorids": "~Lebin_Yu1;~Yunbo_Qiu1;~quanming_yao1;~Xu-Dong_Zhang1;~Jian_Wang21", "gender": ";;M;M;M", "homepage": ";;https://lars-group.github.io/;https://ieeexplore.ieee.org/author/37291966300;http://web.ee.tsinghua.edu.cn/wangjian1/zh_CN/index.htm", "dblp": ";;158/1014;03/6760;39/449-30", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/schhp?hl=en;a-9TTg4AAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Lebin_Yu1;~Yunbo_Qiu1;~quanming_yao1;~Xu-Dong_Zhang1;~Jian_Wang21", "aff": "Tsinghua University;;Department of Electronic Engineering;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;;Assistant Professor;Full Professor;Full Professor", "bibtex": "@misc{\nyu2023conditional,\ntitle={Conditional Policy Similarity: An Overlooked Factor in Zero-Shot Coordination},\nauthor={Lebin Yu and Yunbo Qiu and quanming yao and Xu-Dong Zhang and Jian Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=J9p5s5jwna}\n}", "github": "", "project": "", "reviewers": "9zxG;Ggdv;GT4b;32HP", "site": "https://openreview.net/forum?id=J9p5s5jwna", "pdf_size": 766788, "recommendation": "3;5;5;6", "confidence": "5;4;2;3", "correctness": "3;3;4;4", "technical_novelty": "1;2;3;3", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "36;98;74;144", "wc_strength_and_weaknesses": "630;352;81;186", "wc_clarity_quality_novelty_and_reproducibility": "35;130;50;90", "wc_summary_review": "43;12;35;44", "wc_review": "744;592;240;464", "wc_reply_reviewers": "145;0;0;0", "wc_reply_authors": "1383;1155;490;396", "reply_reviewers": "1;0;0;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 88.0, 39.16631205513228 ], "wc_strength_and_weaknesses_avg": [ 312.25, 207.34075214486901 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 76.25, 36.9754986443726 ], "wc_summary_review_avg": [ 33.5, 12.893796958227627 ], "wc_review_avg": [ 510.0, 184.72682533947255 ], "wc_reply_reviewers_avg": [ 36.25, 62.7868417743718 ], "wc_reply_authors_avg": [ 856.0, 422.1036602542082 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.7181848464596079, "corr_recommendation_correctness": 0.6882472016116854, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hz2suX9C06oJ:scholar.google.com/&scioq=Conditional+Policy+Similarity:+An+Overlooked+Factor+in+Zero-Shot+Coordination&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Tsinghua University;Institution Name Not Provided", "aff_unique_dep": ";Department of Electronic Engineering", "aff_unique_url": "https://www.tsinghua.edu.cn;", "aff_unique_abbr": "THU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "title": "Mosaic Representation Learning for Self-supervised Visual Pre-training", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11303", "id": "JAezPMehaUu", "poster": "/media/PosterPDFs/ICLR%202023/11303.png?t=1680937713.3387203", "openreview": "https://openreview.net/forum?id=JAezPMehaUu", "slides": "https://iclr.cc/virtual/2023/poster/11303", "video": "https://iclr.cc/virtual/2023/poster/11303", "author_site": "Zhaoqing Wang, Ziyu Chen, Yaqian Li, Yandong Guo, Jun Yu, Mingming Gong, Tongliang Liu", "tldr": "We propose a simple and effective mosaic representation learning framework consisting of a new data augmentation strategy, which aims to adequately learn discriminative feature representations.", "abstract": "Self-supervised learning has achieved significant success in learning visual representations without the need for manual annotation. To obtain generalizable representations, a meticulously designed data augmentation strategy is one of the most crucial parts. Recently, multi-crop strategies utilizing a set of small crops as positive samples have been shown to learn spatially structured features. However, it overlooks the diverse contextual backgrounds, which reduces the variance of the input views and degenerates the performance. To address this problem, we propose a mosaic representation learning framework (MosRep), consisting of a new data augmentation strategy that enriches the backgrounds of each small crop and improves the quality of visual representations. Specifically, we randomly sample numbers of small crops from different input images and compose them into a mosaic view, which is equivalent to introducing different background information for each small crop. Additionally, we further jitter the mosaic view to prevent memorizing the spatial locations of each crop. Along with optimization, our MosRep gradually extracts more discriminative features. Extensive experimental results demonstrate that our method improves the performance far greater than the multi-crop strategy on a series of downstream tasks, e.g., +7.4% and +4.9% than the multi-crop strategy on ImageNet-1K with 1% label and 10% label, respectively. Code is available at https://github.com/DerrickWang005/MosRep.git.\n", "keywords": "self-supervised learning;computer vision", "primary_area": "", "supplementary_material": "", "author": "Zhaoqing Wang;Ziyu Chen;Yaqian Li;Yandong Guo;Jun Yu;Mingming Gong;Tongliang Liu", "authorids": "~Zhaoqing_Wang1;~Ziyu_Chen5;~Yaqian_Li1;~Yandong_Guo2;~Jun_Yu3;~Mingming_Gong1;~Tongliang_Liu1", "gender": "M;M;M;M;M;M;M", "homepage": "https://derrickwang005.github.io/;;;;https://faculty.ustc.edu.cn/yujun_AI/en/index.htm;https://mingming-gong.github.io/;https://tongliang-liu.github.io/", "dblp": ";;154/1961.html;28/4272;50/5754-1.html;98/8479;150/6667", "google_scholar": "ZqOjPKQAAAAJ;;;fWDoWsQAAAAJ;efZyqyQAAAAJ;https://scholar.google.com.au/citations?user=6BmiCJIAAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ", "orcid": ";;;;0000-0002-3197-8103;0000-0001-7147-5589;", "linkedin": "%E5%85%86%E5%8D%BF-%E7%8E%8B-ba58221b7/;zailchen17/;;;;;", "or_profile": "~Zhaoqing_Wang1;~Ziyu_Chen5;~Yaqian_Li1;~Yandong_Guo2;~Jun_Yu3;~Mingming_Gong1;~Tongliang_Liu1", "aff": "The University of Sydney;Chiplego;;AI^2 Robotics;University of Science and Technology of China;University of Melbourne;University of Sydney", "aff_domain": "uni.sydney.edu.au;chiplego.com;;ai2robotics.com;ustc.edu.cn;unimelb.edu.au;sydney.edu.au", "position": "PhD student;Researcher;;Chief Scientist;Associate Professor;Assistant Professor;Lecturer", "bibtex": "@inproceedings{\nwang2023mosaic,\ntitle={Mosaic Representation Learning for Self-supervised Visual Pre-training},\nauthor={Zhaoqing Wang and Ziyu Chen and Yaqian Li and Yandong Guo and Jun Yu and Mingming Gong and Tongliang Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JAezPMehaUu}\n}", "github": "", "project": "", "reviewers": "rkci;hAGo;ed1B", "pdf_size": 1910128, "recommendation": "5;8;8", "confidence": "3;3;4", "correctness": "3;3;4", "technical_novelty": "2;3;2", "empirical_novelty": "3;0;2", "wc_summary_paper": "62;109;119", "wc_strength_and_weaknesses": "58;332;94", "wc_clarity_quality_novelty_and_reproducibility": "38;22;53", "wc_summary_review": "49;47;31", "wc_review": "207;510;297", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "592;222;621", "reply_reviewers": "0;0;0", "reply_authors": "3;2;1", "recommendation_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 96.66666666666667, 24.850665092821068 ], "wc_strength_and_weaknesses_avg": [ 161.33333333333334, 121.57119541879794 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.666666666666664, 12.657891697365017 ], "wc_summary_review_avg": [ 42.333333333333336, 8.055363982396381 ], "wc_review_avg": [ 338.0, 127.05117079350352 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 478.3333333333333, 181.64128263023127 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.5, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12129960301970534591&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=JAezPMehaUu", "email": "uni.sydney.edu.au;chiplego.com;;ai2robotics.com;ustc.edu.cn;unimelb.edu.au;sydney.edu.au", "author_num": 7, "aff_unique_index": "0;1;2;3;4;0", "aff_unique_norm": "University of Sydney;Chiplego;AI^2 Robotics;University of Science and Technology of China;University of Melbourne", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.sydney.edu.au;;http://ai2robotics.org/;http://www.ustc.edu.cn;https://www.unimelb.edu.au", "aff_unique_abbr": "USYD;;AI^2 Robotics;USTC;UniMelb", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;2;3;0;0", "aff_country_unique": "Australia;;United States;China" }, { "id": "JDuEddUsSb", "title": "Efficient Discovery of Dynamical Laws in Symbolic Form", "track": "main", "status": "Reject", "tldr": "Given a time series that is governed by an ordinary differential equation (ODE), our model infers the mathematical expression of the ODE.", "abstract": "We propose a transformer-based sequence-to-sequence model that recovers scalar ordinary differential equations (ODEs) in symbolic form from time-series data of a single observed solution trajectory of the ODE. Our method is efficiently scalable: after one-time pretraining on a large set of ODEs, we can infer the governing laws of a new observed solution in a few forward passes of the model. First, we generate and make available a large dataset of more than 3M ODEs together with more than 63M numerical solutions for different initial conditions that may serve as a useful benchmark for future work on machine learning for dynamical systems. Then we show that our model performs better or on par with existing methods in various test cases in terms of accurate symbolic recovery of the ODE, especially for more complex expressions. Reliably recovering the symbolic form of dynamical laws is important as it allows for further dissemination of the inferred dynamics as well as meaningful modifications for predictions under interventions.", "keywords": "Symbolic;ODE;Transformer", "primary_area": "", "supplementary_material": "", "author": "S\u00f6ren Becker;Michal Klein;Alexander Neitz;Giambattista Parascandolo;Niki Kilbertus", "authorids": "~S\u00f6ren_Becker2;~Michal_Klein1;~Alexander_Neitz1;~Giambattista_Parascandolo1;~Niki_Kilbertus1", "gender": ";M;;;", "homepage": ";https://github.com/michalk8;;;", "dblp": ";332/4607;180/8340;;202/1966", "google_scholar": ";zByzdzcAAAAJ;;;uQZjTq4AAAAJ", "orcid": ";0000-0002-2433-6380;;;", "linkedin": ";michal-klein-148697165/;;;", "or_profile": "~S\u00f6ren_Becker2;~Michal_Klein1;~Alexander_Neitz1;~Giambattista_Parascandolo1;~Niki_Kilbertus1", "aff": ";Apple;Google DeepMind;;Helmholtz AI", "aff_domain": ";apple.com;deepmind.com;;helmholtz-muenchen.de", "position": ";Researcher;Researcher;;Group Leader", "bibtex": "@misc{\nbecker2023efficient,\ntitle={Efficient Discovery of Dynamical Laws in Symbolic Form},\nauthor={S{\\\"o}ren Becker and Michal Klein and Alexander Neitz and Giambattista Parascandolo and Niki Kilbertus},\nyear={2023},\nurl={https://openreview.net/forum?id=JDuEddUsSb}\n}", "github": "", "project": "", "reviewers": "UR3m;qT9N;iM1D;LsD3", "site": "https://openreview.net/forum?id=JDuEddUsSb", "pdf_size": 1000028, "recommendation": "3;3;5;8", "confidence": "4;4;3;5", "correctness": "3;3;4;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "97;72;36;289", "wc_strength_and_weaknesses": "54;50;138;562", "wc_clarity_quality_novelty_and_reproducibility": "606;33;7;131", "wc_summary_review": "89;324;30;50", "wc_review": "846;479;211;1032", "wc_reply_reviewers": "688;361;0;0", "wc_reply_authors": "1022;1757;383;587", "reply_reviewers": "2;1;0;0", "reply_authors": "3;3;2;1", "recommendation_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 123.5, 97.9808654789291 ], "wc_strength_and_weaknesses_avg": [ 201.0, 211.36461387848252 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 194.25, 242.17904017482604 ], "wc_summary_review_avg": [ 123.25, 117.82906050716012 ], "wc_review_avg": [ 642.0, 318.6086941688817 ], "wc_reply_reviewers_avg": [ 262.25, 286.60283930903404 ], "wc_reply_authors_avg": [ 937.25, 526.5550184928446 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5183210553488161, "corr_recommendation_correctness": 0.8551861104941366, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:l36SHYD_e9wJ:scholar.google.com/&scioq=Efficient+Discovery+of+Dynamical+Laws+in+Symbolic+Form&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Apple;Google;Helmholtz Association of German Research Centres", "aff_unique_dep": "Apple Inc.;Google DeepMind;Helmholtz AI", "aff_unique_url": "https://www.apple.com;https://deepmind.com;https://www.helmholtz-ai.de", "aff_unique_abbr": "Apple;DeepMind;Helmholtz AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United States;United Kingdom;Germany" }, { "id": "JFf-bPQu5RB", "title": "Leveraged Asymmetric Loss with Disambiguation for Multi-label Recognition with One-Positive Annotations", "track": "main", "status": "Reject", "tldr": "", "abstract": "In the problem of multi-label learning from single positive labels (SPL), we learn the potential multiple labels from one observable single positive annotation. Despite many efforts to solve this problem, an effective algorithm with sound theoretical understanding is still in need. In this paper, we propose a novel loss function for the SPL problem, called leveraged asymmetric loss with disambiguation (LASD), where we introduce a pair of leverage parameters to address the severe negative-positive imbalance. From the theoretical perspective, we analyze the SPL problem, for the first time, from the perspective of risk consistency, which links the SPL loss with losses for ordinary multi-label classification. We prove the consistency of our proposed LASD loss to the cost-sensitive Hamming loss, which provides guidance to the empirical choice of our proposed leverage parameters. In experiments, we demonstrate the effectiveness of our proposed LASD loss function over other state-of-the-art methods and empirically verify our theoretical results.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Jingyi Cui;Tao Huang;Hanyuan Hang;Yisen Wang;James Kwok", "authorids": "~Jingyi_Cui1;~Tao_Huang2;~Hanyuan_Hang1;~Yisen_Wang1;~James_Kwok1", "gender": "F;;M;M;", "homepage": "https://zero-lab-pku.github.io/personwise/cuijingyi/;;;https://yisenwang.github.io/;", "dblp": "216/3282;;180/5385;172/1346-1;", "google_scholar": ";;;uMWPDboAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Jingyi_Cui1;~Tao_Huang2;~Hanyuan_Hang1;~Yisen_Wang1;~James_Kwok1", "aff": "Peking University;;University of Twente;Peking University;", "aff_domain": "pku.edu.cn;;utwente.nl;pku.edu.cn;", "position": "PhD student;;Assistant Professor;Assistant Professor;", "bibtex": "@misc{\ncui2023leveraged,\ntitle={Leveraged Asymmetric Loss with Disambiguation for Multi-label Recognition with One-Positive Annotations},\nauthor={Jingyi Cui and Tao Huang and Hanyuan Hang and Yisen Wang and James Kwok},\nyear={2023},\nurl={https://openreview.net/forum?id=JFf-bPQu5RB}\n}", "github": "", "project": "", "reviewers": "HXy7;PKX1;k1k2;k9E5", "site": "https://openreview.net/forum?id=JFf-bPQu5RB", "pdf_size": 398734, "recommendation": "3;3;5;6", "confidence": "5;5;4;2", "correctness": "2;2;3;3", "technical_novelty": "3;2;3;2", "empirical_novelty": "3;2;2;4", "wc_summary_paper": "95;60;78;103", "wc_strength_and_weaknesses": "289;191;115;73", "wc_clarity_quality_novelty_and_reproducibility": "125;36;34;218", "wc_summary_review": "57;11;50;46", "wc_review": "566;298;277;440", "wc_reply_reviewers": "25;189;0;0", "wc_reply_authors": "769;700;389;939", "reply_reviewers": "1;1;0;0", "reply_authors": "2;1;1;2", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 84.0, 16.537835408541227 ], "wc_strength_and_weaknesses_avg": [ 167.0, 82.15838362577492 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 103.25, 75.76072531331785 ], "wc_summary_review_avg": [ 41.0, 17.76231966833161 ], "wc_review_avg": [ 395.25, 116.83187707128565 ], "wc_reply_reviewers_avg": [ 53.5, 78.89391611524934 ], "wc_reply_authors_avg": [ 699.25, 199.12354833118056 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.9428090415820632, "corr_recommendation_correctness": 0.9622504486493761, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JS-gYj9bmH4J:scholar.google.com/&scioq=Leveraged+Asymmetric+Loss+with+Disambiguation+for+Multi-label+Recognition+with+One-Positive+Annotations&hl=en&as_sdt=0,11", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Peking University;University of Twente", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.utwente.nl", "aff_unique_abbr": "Peking U;UT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;Netherlands" }, { "id": "JFtHy-Ve7e", "title": "Batch Normalization Explained", "track": "main", "status": "Reject", "tldr": "Batch normalization adapts the geometry of the deep network to the data manifold and serves as a smart initialization and a margin maximization method", "abstract": "A critically important, ubiquitous, and yet poorly understood ingredient in modern deep networks (DNs) is batch normalization (BN), which centers and normalizes the feature maps. To date, only limited progress has been made understanding why BN boosts DN learning and inference performance; work has focused exclusively on showing that BN smooths a DN's loss landscape. In this paper, we study BN theoretically from the perspective of function approximation; we exploit the fact that most of today's state-of-the-art DNs are continuous piecewise affine (CPA) splines that fit a predictor to the training data via affine mappings defined over a partition of the input space (the so-called ``linear regions''). We demonstrate that BN is an unsupervised learning technique that -- independent of the DN's weights or gradient-based learning -- adapts the geometry of a DN's spline partition to match the data. BN provides a ``smart initialization'' that boosts the performance of DN learning, because it adapts even a DN initialized with random weights to align its spline partition with the data. We also show that the variation of BN statistics between mini-batches introduces a dropout-like random perturbation to the partition boundaries and hence the decision boundary for classification problems. This per mini-batch perturbation reduces overfitting and improves generalization by increasing the margin between the training samples and the decision boundary. ", "keywords": "batch normalization;continuous piecewise linear networks;unsupervised learning", "primary_area": "", "supplementary_material": "", "author": "Randall Balestriero;Richard Baraniuk", "authorids": "~Randall_Balestriero1;~Richard_Baraniuk1", "gender": "M;", "homepage": "https://randallbalestriero.github.io/;http://richb.rice.edu/", "dblp": "175/5364;32/2804", "google_scholar": "S1x_xqcAAAAJ;https://scholar.google.com.tw/citations?user=N-BBA20AAAAJ", "orcid": ";", "linkedin": "randallbalestriero/;richard-baraniuk", "or_profile": "~Randall_Balestriero1;~Richard_Baraniuk1", "aff": "Meta Facebook;William Marsh Rice University", "aff_domain": "facebook.com;rice.edu", "position": "Postdoc;C. Sidney Burrus Professor", "bibtex": "@misc{\nbalestriero2023batch,\ntitle={Batch Normalization Explained},\nauthor={Randall Balestriero and Richard Baraniuk},\nyear={2023},\nurl={https://openreview.net/forum?id=JFtHy-Ve7e}\n}", "github": "", "project": "", "reviewers": "3myR;991a;maYa;jK7q", "site": "https://openreview.net/forum?id=JFtHy-Ve7e", "pdf_size": 3123328, "recommendation": "3;3;3;6", "confidence": "4;4;5;3", "correctness": "3;1;2;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "0;3;2;3", "wc_summary_paper": "83;76;113;103", "wc_strength_and_weaknesses": "98;335;387;49", "wc_clarity_quality_novelty_and_reproducibility": "771;39;125;41", "wc_summary_review": "82;19;56;421", "wc_review": "1034;469;681;614", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 1.118033988749895 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 93.75, 14.889173919328098 ], "wc_strength_and_weaknesses_avg": [ 217.25, 145.95268925237383 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 244.0, 306.2368364517894 ], "wc_summary_review_avg": [ 144.5, 161.1994106688979 ], "wc_review_avg": [ 699.5, 207.76970424005518 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.7745966692414834, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17777423017711524869&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Meta;Rice University", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.rice.edu", "aff_unique_abbr": "Meta;Rice", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning to Generate Columns with Application to Vertex Coloring", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12012", "id": "JHW30A4DXtO", "poster": "", "openreview": "https://openreview.net/forum?id=JHW30A4DXtO", "slides": "https://iclr.cc/virtual/2023/poster/12012", "video": "https://iclr.cc/virtual/2023/poster/12012", "author_site": "Yuan Sun, Andreas Ernst, Xiaodong Li, Jake Weiner", "tldr": "", "abstract": "We present a new column generation approach based on Machine Learning (ML) for solving combinatorial optimization problems. The aim of our method is to generate high-quality columns that belong to an optimal integer solution, in contrast to the traditional approach that aims at solving linear programming relaxations. To achieve this aim, we design novel features to characterize a column, and develop an effective ML model to predict whether a column belongs to an optimal integer solution. We then use the ML model as a filter to select high-quality columns generated from a sampling method and use the selected columns to construct an integer solution. Our method is computationally fast compared to the traditional methods that generate columns by repeatedly solving a pricing problem. We demonstrate the efficacy of our method on the vertex coloring problem, by empirically showing that the columns selected by our ML model are significantly better, in terms of the integer solution that can be constructed from them, than those selected randomly or based only on their reduced cost. Further, we show that the columns generated by our method can be used as a warm start to boost the performance of a column generation-based heuristic.", "keywords": "Machine learning;combinatorial optimization;column generation", "primary_area": "", "supplementary_material": "/attachment/efce8ee923bb8e6fa56643aa864bba79ab81ed87.zip", "author": "Yuan Sun;Andreas T Ernst;Xiaodong Li;Jake Weiner", "authorids": "~Yuan_Sun1;~Andreas_T_Ernst1;~Xiaodong_Li5;~Jake_Weiner1", "gender": "M;M;M;M", "homepage": "https://scholars.latrobe.edu.au/y6sun;https://research.monash.edu/en/persons/andreas-ernst;https://titan.csit.rmit.edu.au/~e46507/;", "dblp": "75/5247-3;;;", "google_scholar": "B49vHtUAAAAJ;https://scholar.google.com.au/citations?user=hSDlhWUAAAAJ;https://scholar.google.com.au/citations?user=AQewL04AAAAJ;ibO_QnAAAAAJ", "orcid": "0000-0003-2911-0070;0000-0002-1101-8359;;", "linkedin": ";;;", "or_profile": "~Yuan_Sun1;~Andreas_T_Ernst1;~Xiaodong_Li5;~Jake_Weiner1", "aff": "La Trobe University;Monash University;Royal Melbourne Institute of Technology;", "aff_domain": "latrobe.edu.au;monash.edu;rmit.edu.au;", "position": "Assistant Professor;Full Professor;Full Professor;", "bibtex": "@inproceedings{\nsun2023learning,\ntitle={Learning to Generate Columns with Application to Vertex Coloring},\nauthor={Yuan Sun and Andreas T Ernst and Xiaodong Li and Jake Weiner},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JHW30A4DXtO}\n}", "github": "", "project": "", "reviewers": "igoj;795f;DZnS", "pdf_size": 491102, "recommendation": "6;6;8", "confidence": "4;4;5", "correctness": "4;4;4", "technical_novelty": "2;2;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "87;144;146", "wc_strength_and_weaknesses": "111;107;295", "wc_clarity_quality_novelty_and_reproducibility": "10;28;20", "wc_summary_review": "16;62;85", "wc_review": "224;341;546", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "360;448;795", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 125.66666666666667, 27.35365098523819 ], "wc_strength_and_weaknesses_avg": [ 171.0, 87.69644614616186 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 19.333333333333332, 7.363574011458175 ], "wc_summary_review_avg": [ 54.333333333333336, 28.686039965266886 ], "wc_review_avg": [ 370.3333333333333, 133.08226361498697 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 534.3333333333334, 187.7877051945154 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.9999999999999997, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11976626682701565369&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=JHW30A4DXtO", "email": "latrobe.edu.au;monash.edu;rmit.edu.au;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "La Trobe University;Monash University;Royal Melbourne Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.latrobe.edu.au;https://www.monash.edu;https://www.rmit.edu.au", "aff_unique_abbr": "LTU;Monash;RMIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "Non-parametric Outlier Synthesis", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11574", "id": "JHklpEZqduQ", "poster": "", "openreview": "https://openreview.net/forum?id=JHklpEZqduQ", "slides": "https://iclr.cc/virtual/2023/poster/11574", "video": "https://iclr.cc/virtual/2023/poster/11574", "author_site": "Leitian Tao, Xuefeng Du, Xiaojin Zhu, Yixuan Li", "tldr": "", "abstract": "Out-of-distribution (OOD) detection is indispensable for safely deploying machine learning models in the wild. One of the key challenges is that models lack supervision signals from unknown data, and as a result, can produce overconfident predictions on OOD data. Recent work on outlier synthesis modeled the feature space as parametric Gaussian distribution, a strong and restrictive assumption that might not hold in reality. In this paper, we propose a novel framework, non-parametric outlier synthesis (NPOS), which generates artificial OOD training data and facilitates learning a reliable decision boundary between ID and OOD data. Importantly, our proposed synthesis approach does not make any distributional assumption on the ID embeddings, thereby offering strong flexibility and generality. We show that our synthesis approach can be mathematically interpreted as a rejection sampling framework. Extensive experiments show that NPOS can achieve superior OOD detection performance, outperforming the competitive rivals by a significant margin. Code is publicly available at https://github.com/deeplearning-wisc/npos.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Leitian Tao;Xuefeng Du;Jerry Zhu;Yixuan Li", "authorids": "~Leitian_Tao1;~Xuefeng_Du1;~Jerry_Zhu1;~Yixuan_Li1", "gender": "M;M;F;M", "homepage": "https://taoleitian.github.io/;https://d12306.github.io/;http://pages.cs.wisc.edu/~sharonli/;http://pages.cs.wisc.edu/~jerryzhu/", "dblp": "296/3739;34/3557;144/6087-1;z/XiaojinZhu", "google_scholar": "F5S6V6sAAAAJ;GE_aEh4AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=hqTu-QcAAAAJ", "orcid": ";;;", "linkedin": ";xuefeng-du-094723192/;liyixuan;", "or_profile": "~Leitian_Tao1;~Xuefeng_Du1;~Yixuan_Li1;~Xiaojin_Zhu1", "aff": "Wuhan University;University of Wisconsin, Madison;Cornell University;University of Wisconsin, Madison", "aff_domain": "whu.edu.cn;wisc.edu;cornell.edu;wisc.edu", "position": "Undergrad student;PhD student;Graduate Student;Associate Professor", "bibtex": "@inproceedings{\ntao2023nonparametric,\ntitle={Non-parametric Outlier Synthesis},\nauthor={Leitian Tao and Xuefeng Du and Jerry Zhu and Yixuan Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JHklpEZqduQ}\n}", "github": "", "project": "", "reviewers": "M7jy;8Tpk;fjV3", "pdf_size": 801825, "recommendation": "6;6;6", "confidence": "4;3;3", "correctness": "3;4;4", "technical_novelty": "2;2;3", "empirical_novelty": "2;3;4", "wc_summary_paper": "68;118;190", "wc_strength_and_weaknesses": "567;163;131", "wc_clarity_quality_novelty_and_reproducibility": "52;25;15", "wc_summary_review": "61;9;31", "wc_review": "748;315;367", "wc_reply_reviewers": "137;0;0", "wc_reply_authors": "2333;357;296", "reply_reviewers": "1;0;0", "reply_authors": "5;1;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 125.33333333333333, 50.075498555237125 ], "wc_strength_and_weaknesses_avg": [ 287.0, 198.42042905574684 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.666666666666668, 15.627610892974724 ], "wc_summary_review_avg": [ 33.666666666666664, 21.31248981752771 ], "wc_review_avg": [ 476.6666666666667, 193.0325246054549 ], "wc_reply_reviewers_avg": [ 45.666666666666664, 64.58241934837135 ], "wc_reply_authors_avg": [ 995.3333333333334, 946.2009417783423 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 121, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14406867948073960262&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=JHklpEZqduQ", "email": "whu.edu.cn;wisc.edu;cornell.edu;wisc.edu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Wuhan University;University of Wisconsin;Cornell University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.whu.edu.cn/;https://www.wisc.edu;https://www.cornell.edu", "aff_unique_abbr": "WHU;UW;Cornell", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "China;United States" }, { "id": "JIl_kij_aov", "title": "Early Stopping for Deep Image Prior", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep image prior (DIP) and its variants have shown remarkable potential for solving inverse problems in computational imaging (CI), needing no separate training data. Practical DIP models are often substantially overparameterized. During the learning process, these models learn the desired visual content first and then pick up the potential modeling and observational noise, i.e., overfitting. Thus, the practicality of DIP hinges on early stopping (ES) that captures the transition period. In this regard, the majority of prior DIP works for CI tasks only demonstrate the potential of the models---reporting the peak performance against the groundtruth but providing no clue about how to operationally obtain near-peak performance without access to the groundtruth. In this paper, we set to break this practicality barrier of DIP, and propose an efficient ES strategy that consistently detects near-peak performance across several CI tasks and DIP variants. Simply based on the running variance of DIP intermediate reconstructions, our ES method not only outpaces the existing ones---which only work in very narrow regimes, but also remains effective when combined with methods that try to mitigate overfitting.", "keywords": "early stopping;deep image prior;deep generative models;overparametrization;overfitting", "primary_area": "", "supplementary_material": "/attachment/f6fbfa26b1939d01f085c8cf9d71c5476bd9b9d8.zip", "author": "Hengkang Wang;Taihui Li;Zhong Zhuang;Tiancong Chen;Hengyue Liang;Ju Sun", "authorids": "~Hengkang_Wang1;~Taihui_Li1;~Zhong_Zhuang1;~Tiancong_Chen1;~Hengyue_Liang1;~Ju_Sun2", "gender": "M;M;M;M;M;", "homepage": ";https://taihui.github.io/;;https://sites.google.com/view/tiancong-chen;https://www.linkedin.com/in/lianghengyue/;http://www.sunju.org", "dblp": "175/7774;174/3814.html;;242/8507;248/8130;31/6843.html", "google_scholar": "APqDZvUAAAAJ;1zrHEeYAAAAJ;;Y97x5I8AAAAJ;aWVo5AEAAAAJ;V6FaD-UAAAAJ", "orcid": ";0000-0002-3758-8923;0000-0002-4135-1988;;0000-0001-9498-6402;0000-0002-2017-5903", "linkedin": "hk-wang/;;;;lianghengyue/;", "or_profile": "~Hengkang_Wang1;~Taihui_Li1;~Zhong_Zhuang1;~Tiancong_Chen1;~Hengyue_Liang1;~Ju_Sun1", "aff": "University of Minnesota, Twin Cities;University of Minnesota, Minneapolis;University of California, Los Angeles;University of Minnesota, Minneapolis;University of Minnesota, Minneapolis;University of Minnesota, Twin Cities", "aff_domain": "umn.edu;umn.edu;ucla.edu;umn.edu;umn.edu;umn.edu", "position": "PhD student;PhD student;Postdoc;PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nwang2023early,\ntitle={Early Stopping for Deep Image Prior},\nauthor={Hengkang Wang and Taihui Li and Zhong Zhuang and Tiancong Chen and Hengyue Liang and Ju Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=JIl_kij_aov}\n}", "github": "", "project": "", "reviewers": "hubG;9Jvw;kTJ2;sEH9;cx5R", "site": "https://openreview.net/forum?id=JIl_kij_aov", "pdf_size": 21918390, "recommendation": "5;5;6;6;6", "confidence": "3;4;4;4;3", "correctness": "3;3;3;3;3", "technical_novelty": "3;3;2;3;3", "empirical_novelty": "3;0;3;3;3", "wc_summary_paper": "41;62;75;68;55", "wc_strength_and_weaknesses": "197;138;269;188;109", "wc_clarity_quality_novelty_and_reproducibility": "32;28;113;189;17", "wc_summary_review": "40;104;113;144;44", "wc_review": "310;332;570;589;225", "wc_reply_reviewers": "0;0;92;229;0", "wc_reply_authors": "800;817;1137;894;464", "reply_reviewers": "0;0;1;1;0", "reply_authors": "3;4;5;4;3", "recommendation_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.8, 0.39999999999999997 ], "empirical_novelty_avg": [ 2.4, 1.2000000000000002 ], "wc_summary_paper_avg": [ 60.2, 11.651609330903607 ], "wc_strength_and_weaknesses_avg": [ 180.2, 54.915935756390425 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 75.8, 66.11928614254694 ], "wc_summary_review_avg": [ 89.0, 40.625115384451526 ], "wc_review_avg": [ 405.2, 146.85693718718227 ], "wc_reply_reviewers_avg": [ 64.2, 89.77393831173946 ], "wc_reply_authors_avg": [ 822.4, 215.90794334623266 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 3.8, 0.7483314773547882 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.16666666666666669, "corr_recommendation_correctness": 0.0, "gs_citation": 80, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14519631950437438728&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "University of Minnesota;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.minnesota.edu;https://www.ucla.edu", "aff_unique_abbr": "UMN;UCLA", "aff_campus_unique_index": "0;1;2;1;1;0", "aff_campus_unique": "Twin Cities;Minneapolis;Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "JInmhyuvn6", "title": "Data Pricing Mechanism Based on Property Rights Compensation Distribution", "track": "main", "status": "Reject", "tldr": "This paper proposes the first data valuation mechanism based on modern property rights theory. We integrate ownership to clearify ownership and estimate its value while using the core instead of Shapley value to assign compensation.", "abstract": "While machine learning (ML) benefits from data, it also faces the challenges of ambiguous data ownership, including privacy violations and increased costs of using data. Yet existing approaches to data valuation may focus on preventing privacy breaches, but do not truly protect data ownership. This is because a data trading marketplace that protects data ownership should achieve this goal: once data is traded, its ownership does not transfer to a new owner but merely enlarges its coverage. Considering that the transfer of property rights in the process of data trading makes compensation necessary, this paper proposes the first data valuation mechanism based on modern property rights theory. Specifically, we propose the integration of property rights to improve the final revenue of the entire workflow called the \u201cdata chain\u201d while compensating process executors who lost ownership after integration. Then, we consider the expectations of both the integrator and the integrated party during the compensation allocation. For the former, we apply compound interest to assess a total compensation equivalent to the time value for the Data chain. For the latter, we respect and meet their expectations as much as possible. To achieve this, we provide the framework based on Least-core to assign the compensation and prove that our framework can also work compared to existing algorithms. Finally, to cope with more complex situations, we adjust the traditional Least-core and demonstrate theoretically and experimentally that the compensation mechanism is feasible and effective in solving the data pricing problem.", "keywords": "data valuation;game theory;data ownership;modern property rights theory", "primary_area": "", "supplementary_material": "", "author": "shiqian Liu;Peizheng Wang;Chao Wu", "authorids": "~shiqian_Liu1;~Peizheng_Wang1;~Chao_Wu1", "gender": ";M;M", "homepage": ";https://github.com/peizhengwang;", "dblp": ";346/0183;45/3158-1", "google_scholar": ";;gpTPt58AAAAJ", "orcid": "0000-0002-9343-9121;0000-0001-8426-1897;0000-0003-0885-6869", "linkedin": ";;", "or_profile": "~shiqian_Liu1;~Peizheng_Wang1;~Chao_Wu1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;MS student;Associate Professor", "bibtex": "@misc{\nliu2023data,\ntitle={Data Pricing Mechanism Based on Property Rights Compensation Distribution},\nauthor={shiqian Liu and Peizheng Wang and Chao Wu},\nyear={2023},\nurl={https://openreview.net/forum?id=JInmhyuvn6}\n}", "github": "", "project": "", "reviewers": "ELSX;WvJ5;V9Lr", "site": "https://openreview.net/forum?id=JInmhyuvn6", "pdf_size": 406102, "recommendation": "5;6;8", "confidence": "3;3;3", "correctness": "3;4;3", "technical_novelty": "4;3;3", "empirical_novelty": "3;4;2", "wc_summary_paper": "12;92;85", "wc_strength_and_weaknesses": "259;160;110", "wc_clarity_quality_novelty_and_reproducibility": "24;43;608", "wc_summary_review": "28;46;73", "wc_review": "323;341;876", "wc_reply_reviewers": "202;187;324", "wc_reply_authors": "784;922;1282", "reply_reviewers": "1;2;1", "reply_authors": "1;2;3", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 63.0, 36.175498153676706 ], "wc_strength_and_weaknesses_avg": [ 176.33333333333334, 61.9157133170212 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 225.0, 270.9329560364827 ], "wc_summary_review_avg": [ 49.0, 18.49324200890693 ], "wc_review_avg": [ 513.3333333333334, 256.54932382595666 ], "wc_reply_reviewers_avg": [ 237.66666666666666, 61.35325763333372 ], "wc_reply_authors_avg": [ 996.0, 209.93332274796205 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.18898223650461363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_07IFKkfl8wJ:scholar.google.com/&scioq=Data+Pricing+Mechanism+Based+on+Property+Rights+Compensation+Distribution&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "JIptuwnqwn", "title": "Quantized Disentangled Representations for Object-Centric Visual Tasks", "track": "main", "status": "Reject", "tldr": "We propose quantised disentangled representations that demonstrate state-of-the art performace in set prediction tasks mong a class of object-centric methods.", "abstract": "Recently, the pre-quantization of image features into discrete latent variables has helped to achieve remarkable results in image modeling. In this paper, we propose a method to learn discrete latent variables applied to object-centric tasks. In our approach, each object is assigned a slot which is represented as a vector generated by sampling from non-overlapping sets of low-dimensional discrete variables.\nWe empirically demonstrate that embeddings from the learned discrete latent spaces have the disentanglement property. The model is trained with a set prediction and object discovery as downstream tasks. It achieves the state-of-the-art results on the CLEVR dataset among a class of object-centric methods for set prediction task. We also demonstrate manipulation of individual objects in a scene with controllable image generation in the object discovery setting.", "keywords": "quantised representation;disentangled representation;object-centric task", "primary_area": "", "supplementary_material": "/attachment/a93a2cf6a7c0f9acb854c4aa1157d6610b89b781.zip", "author": "Daniil Kirilenko;Alexandr Korchemnyi;Alexey Kovalev;Aleksandr Panov", "authorids": "~Daniil_Kirilenko1;~Alexandr_Korchemnyi1;~Alexey_Kovalev3;~Aleksandr_Panov1", "gender": "M;M;M;M", "homepage": ";;;http://grafft.github.io", "dblp": "304/3977;;245/7675;177/9975", "google_scholar": ";_gTWrpMAAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.ru/citations?hl=ru", "orcid": "0000-0002-4835-9413;;0000-0003-2180-0990;0000-0002-9747-3837", "linkedin": ";;alexey-kovalev-831433286/;", "or_profile": "~Daniil_Kirilenko1;~Alexandr_Korchemnyi1;~Alexey_Kovalev3;~Aleksandr_Panov1", "aff": "Universita della Svizzera Italiana;Moscow Institute of Physics and Technology;Federal Research Center \u00abComputer Science and Control\u00bb of Russian Academy of Sciences;Federal Research Center \u00abComputer Science and Control\u00bb of Russian Academy of Sciences", "aff_domain": "usi.ch;mipt.edu;frccsc.ru;frccsc.ru", "position": "PhD student;MS student;Researcher;Principal Researcher", "bibtex": "@misc{\nkirilenko2023quantized,\ntitle={Quantized Disentangled Representations for Object-Centric Visual Tasks},\nauthor={Daniil Kirilenko and Alexandr Korchemnyi and Alexey Kovalev and Aleksandr Panov},\nyear={2023},\nurl={https://openreview.net/forum?id=JIptuwnqwn}\n}", "github": "", "project": "", "reviewers": "1Nh6;88V8;Eih1;vXo7", "site": "https://openreview.net/forum?id=JIptuwnqwn", "pdf_size": 1205201, "recommendation": "1;3;3;3", "confidence": "5;4;4;5", "correctness": "2;2;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "41;65;23;92", "wc_strength_and_weaknesses": "114;191;376;162", "wc_clarity_quality_novelty_and_reproducibility": "198;63;64;3", "wc_summary_review": "240;68;97;28", "wc_review": "593;387;560;285", "wc_reply_reviewers": "0;49;0;0", "wc_reply_authors": "581;309;752;203", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 55.25, 25.926579026165406 ], "wc_strength_and_weaknesses_avg": [ 210.75, 99.29092355296127 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 82.0, 71.3827710305505 ], "wc_summary_review_avg": [ 108.25, 79.91362524626197 ], "wc_review_avg": [ 456.25, 126.08206652811494 ], "wc_reply_reviewers_avg": [ 12.25, 21.21762239271875 ], "wc_reply_authors_avg": [ 461.25, 217.22612066692164 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13798308001996004208&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Universita della Svizzera Italiana;Moscow Institute of Physics and Technology;Russian Academy of Sciences", "aff_unique_dep": ";;Computer Science and Control", "aff_unique_url": "https://www.usi.ch;https://www.mipt.ru/en;https://www.ras.ru", "aff_unique_abbr": "USI;MIPT;RAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Switzerland;Russian Federation" }, { "title": "LAVA: Data Valuation without Pre-Specified Learning Algorithms", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12066", "id": "JJuP86nBl4q", "poster": "/media/PosterPDFs/ICLR%202023/12066.png?t=1680813420.9174254", "openreview": "https://openreview.net/forum?id=JJuP86nBl4q", "slides": "https://iclr.cc/virtual/2023/poster/12066", "video": "https://iclr.cc/virtual/2023/poster/12066", "author_site": "Hoang Anh Just, Feiyang Kang, Tianhao Wang, Yi Zeng, Myeongseob Ko, Ming Jin, Ruoxi Jia", "tldr": "We propose LAVA: a novel model-agnostic approach to data valuation using a non-conventional, class-wise Wasserstein discrepancy.", "abstract": "Traditionally, data valuation is posed as a problem of equitably splitting the validation performance of a learning algorithm among the training data. As a result, the calculated data values depend on many design choices of the underlying learning algorithm. However, this dependence is undesirable for many use cases of data valuation, such as setting priorities over different data sources in a data acquisition process and informing pricing mechanisms in a data marketplace. In these scenarios, data needs to be valued before the actual analysis and the choice of the learning algorithm is still undetermined then. Another side-effect of the dependence is that to assess the value of individual points, one needs to re-run the learning algorithm with and without a point, which incurs a large computation burden. \n\nThis work leapfrogs over the current limits of data valuation methods by introducing a new framework that can value training data in a way that is oblivious to the downstream learning algorithm. Our main results are as follows. $\\textbf{(1)}$ We develop a proxy for the validation performance associated with a training set based on a non-conventional $\\textit{class-wise}$ $\\textit{Wasserstein distance}$ between the training and the validation set. We show that the distance characterizes the upper bound of the validation performance for any given model under certain Lipschitz conditions. $\\textbf{(2)}$ We develop a novel method to value individual data based on the sensitivity analysis of the $\\textit{class-wise}$ Wasserstein distance. Importantly, these values can be directly obtained $\\textit{for free}$ from the output of off-the-shelf optimization solvers once the Wasserstein distance is computed. $\\textbf{(3) }$We evaluate our new data valuation framework over various use cases related to detecting low-quality data\nand show that, surprisingly, the learning-agnostic feature of our framework enables a $\\textit{significant improvement}$ over the state-of-the-art performance while being $\\textit{orders of magnitude faster.}$ ", "keywords": "data valuation;optimal transport;model agnostic;data-driven", "primary_area": "", "supplementary_material": "", "author": "Hoang Anh Just;Feiyang Kang;Tianhao Wang;Yi Zeng;Myeongseob Ko;Ming Jin;Ruoxi Jia", "authorids": "~Hoang_Anh_Just1;~Feiyang_Kang1;~Tianhao_Wang2;~Yi_Zeng3;~Myeongseob_Ko1;~Ming_Jin2;~Ruoxi_Jia1", "gender": ";M;M;M;M;M;", "homepage": "https://justhoanganh.com;;https://tianhaowang.netlify.app/;https://yizeng623.github.io/;;http://www.jinming.tech/;https://ruoxijia.info/", "dblp": "307/2901;218/1175;274/2144;75/148;234/1494;;147/5355-1", "google_scholar": "XcBDQhAAAAAJ;_6mV_iEAAAAJ;nvQOtgkAAAAJ;slUNmHQAAAAJ;https://scholar.google.com/citations?hl=en;YdxdTtkAAAAJ;JCrug-YAAAAJ", "orcid": ";;;0000-0002-6901-9194;;;", "linkedin": ";;tian-hao-wang/;chnyizeng/;;;", "or_profile": "~Hoang_Anh_Just1;~Feiyang_Kang1;~Tianhao_Wang2;~Yi_Zeng3;~Myeongseob_Ko1;~Ming_Jin2;~Ruoxi_Jia1", "aff": "Virginia Polytechnic Institute and State University;Virginia Tech;Princeton University;Virginia Tech;Virginia Polytechnic Institute and State University;Virginia Tech;Virginia Tech", "aff_domain": "vt.edu;vt.edu;princeton.edu;vt.edu;vt.edu;vt.edu;vt.edu", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\njust2023lava,\ntitle={{LAVA}: Data Valuation without Pre-Specified Learning Algorithms},\nauthor={Hoang Anh Just and Feiyang Kang and Tianhao Wang and Yi Zeng and Myeongseob Ko and Ming Jin and Ruoxi Jia},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JJuP86nBl4q}\n}", "github": "", "project": "", "reviewers": "5BsF;BSJ2;uRtv;7VZM", "pdf_size": 2672755, "recommendation": "8;8;8;8", "confidence": "2;4;3;3", "correctness": "2;3;4;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;4;3;4", "wc_summary_paper": "120;131;111;99", "wc_strength_and_weaknesses": "413;151;374;78", "wc_clarity_quality_novelty_and_reproducibility": "166;608;67;16", "wc_summary_review": "55;77;43;33", "wc_review": "754;967;595;226", "wc_reply_reviewers": "328;42;42;0", "wc_reply_authors": "4754;3405;1243;10", "reply_reviewers": "4;1;1;0", "reply_authors": "14;14;6;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 115.25, 11.755317945508747 ], "wc_strength_and_weaknesses_avg": [ 254.0, 142.53596037491732 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 214.25, 233.64115112710775 ], "wc_summary_review_avg": [ 52.0, 16.401219466856727 ], "wc_review_avg": [ 635.5, 270.76973612278016 ], "wc_reply_reviewers_avg": [ 103.0, 131.03053079339944 ], "wc_reply_authors_avg": [ 2353.0, 1843.4515182125078 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 8.75, 5.539629951540085 ], "replies_avg": [ 48, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7335609014330510177&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=JJuP86nBl4q", "email": "vt.edu;vt.edu;princeton.edu;vt.edu;vt.edu;vt.edu;vt.edu", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0;0", "aff_unique_norm": "Virginia Tech;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://www.vt.edu;https://www.princeton.edu", "aff_unique_abbr": "VT;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "JKFSUPa70W6M", "title": "Don\u2019t Bet on Sparsity: Designing Brain-inspired Distance-preserving Encoder", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Multi-headed self-attention-based Transformers have been a central area of research for quite some time. Albeit showing a significant improvement in understanding short-term and long-term contexts from sequences, encoders of Transformer and its variants fail to preserve layer-wise contextual information. Further, text representations learned by Transformer-based encoders are usually of low entropy with low variance, which contradicts typical human brain functions. In this work, we propose TransJect, an encoder model that guarantees a theoretical bound for layer-wise distance preservation between any pair of tokens. We propose a simple alternative to dot product attention to ensure Lipschitz continuity that allows TransJect to learn injective mappings to transform token representations to different manifolds and preserve Euclidean distance between every pair of tokens in subsequent layers. Our evaluation on several benchmark short- and long-sequence classification tasks shows a remarkable improvement of 3.1% and 11%, on average, respectively. Furthermore, empirical results suggest that TransJect is layer-agnostic; in fact, it prefers shallower architectures than deeper ones and prevents layer-wise incremental learning beyond a threshold. Our empirical analyses also show the generalization capabilities of TransJect and the robustness under different hyperparameter configurations. We conduct detailed statistical analysis to confirm the necessity of high-entropic representations to achieve human-like cognition. ", "keywords": "Orthogonal attention;Lipschitz;Entropic Transformer", "primary_area": "", "supplementary_material": "/attachment/3b56f19c62832c0480acb5ce813df8967f272720.zip", "author": "Ayan Sengupta;Md Shad Akhtar;Tanmoy Chakraborty", "authorids": "~Ayan_Sengupta1;~Md_Shad_Akhtar1;~Tanmoy_Chakraborty2", "gender": "M;;M", "homepage": "https://victor7246.github.io/;;http://tanmoychak.com", "dblp": ";184/8579.html;65/2136-2.html", "google_scholar": "90EGfboAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.in/citations?user=C5S9JnIAAAAJ", "orcid": ";;0000-0002-0210-0369", "linkedin": ";;tanmoy-chakraborty-89553324/", "or_profile": "~Ayan_Sengupta1;~Md_Shad_Akhtar1;~Tanmoy_Chakraborty2", "aff": "Indian Institute of Technology, Delhi;Indraprastha Institute of Information Technology, Delhi;Indian Institute of Technology, Delhi", "aff_domain": "iitd.ac.in;iiitd.ac.in;iitd.ac.in", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@misc{\nsengupta2023dont,\ntitle={Don{\\textquoteright}t Bet on Sparsity: Designing Brain-inspired Distance-preserving Encoder},\nauthor={Ayan Sengupta and Md Shad Akhtar and Tanmoy Chakraborty},\nyear={2023},\nurl={https://openreview.net/forum?id=JKFSUPa70W6M}\n}", "github": "", "project": "", "reviewers": "BNub;NRyH;VtMJ;oBzy", "site": "https://openreview.net/forum?id=JKFSUPa70W6M", "pdf_size": 1509836, "recommendation": "3;3;3;5", "confidence": "4;4;3;4", "correctness": "3;2;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "63;48;94;44", "wc_strength_and_weaknesses": "164;391;218;141", "wc_clarity_quality_novelty_and_reproducibility": "11;50;24;67", "wc_summary_review": "8;73;17;31", "wc_review": "246;562;353;283", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 62.25, 19.651653874419832 ], "wc_strength_and_weaknesses_avg": [ 228.5, 97.89407540806543 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.0, 21.85177338341216 ], "wc_summary_review_avg": [ 32.25, 24.913600703230355 ], "wc_review_avg": [ 361.0, 122.24360923991078 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PlMGg3R0w0IJ:scholar.google.com/&scioq=Don%E2%80%99t+Bet+on+Sparsity:+Designing+Brain-inspired+Distance-preserving+Encoder&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Indian Institute of Technology Delhi;Indraprastha Institute of Information Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitdelhi.ac.in;http://www.iiitd.ac.in", "aff_unique_abbr": "IIT Delhi;IIIT-D", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Delhi", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India" }, { "id": "JKuBOuzntQ", "title": "Removing Backdoors in Pre-trained Models by Regularized Continual Pre-training", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Large-scale pre-trained models (PTMs) have become the cornerstones of deep learning. Trained on massive data, general-purpose PTMs allow quick adaptation to a broad range of downstream tasks with superior performance. However, recent researches reveal that PTMs are vulnerable to backdoor attacks even before being fine-tuned on downstream tasks. By associating specific triggers with pre-defined embeddings, the attackers are capable of implanting transferable task-agnostic backdoors in PTMs, and controlling model outputs on any downstream task at inference time. As a result, all downstream applications can be highly risky after the backdoored PTMs are released and deployed. Given such an emergent threat, it is essential to defend PTMs against backdoor attacks and thus build reliable AI systems. Although there are a series of works aiming to erase backdoors on downstream models, as far as we know, no defenses against PTMs have been proposed. Worse still, existing backdoor-repairing defenses require task-specific knowledge (i.e., some clean downstream data), making them unsuitable for backdoored PTMs. To this end, we propose the first task-irrelevant backdoor removal method for PTMs. Motivated by the sparse activation phenomenon, we design a simple and effective backdoor eraser by continually pre-training the backdoored PTMs with a regularization term, guiding the models to \"forget'' backdoors. Our method only needs a few auxiliary task-irrelevant data, e.g., unlabelled plain texts, and thus is practical in typical applications. We conduct extensive experiments across modalities (vision and language) and architectures (CNNs and Transformers) on pre-trained VGG, ViT, BERT and CLIP models. The results show that our method can effectively remove backdoors and preserve benign functionalities in PTMs.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/27c258880daa94fa2e48bf46a5947e02d059c2a4.zip", "author": "Biru Zhu;Ganqu Cui;Yangyi Chen;Yujia Qin;Lifan Yuan;Chong Fu;Yangdong Deng;Zhiyuan Liu;Maosong Sun;Ming Gu", "authorids": "~Biru_Zhu1;~Ganqu_Cui1;~Yangyi_Chen1;~Yujia_Qin1;~Lifan_Yuan1;~Chong_Fu2;~Yangdong_Deng1;~Zhiyuan_Liu1;~Maosong_Sun1;~Ming_Gu2", "gender": "F;M;M;M;;;M;M;M;F", "homepage": ";https://cgq15.github.io/;https://yangyi-chen.github.io/;https://yujia-qin.github.io/;;;http://www.thss.tsinghua.edu.cn/publish/soften/3131/2014/20140115102144786540201/20140115102144786540201_.html;http://nlp.csai.tsinghua.edu.cn/~lzy;https://www.cs.tsinghua.edu.cn/csen/info/1312/4394.htm;", "dblp": "286/7971.html;232/3064;05/10083;126/2333;;;90/5987;53/3245-1;95/3291-1;76/2502-1", "google_scholar": ";3IVSzZgAAAAJ;https://scholar.google.com/citations?hl=en;;;;;dT0v5u0AAAAJ;https://scholar.google.com.tw/citations?user=zIgT0HMAAAAJ;", "orcid": ";;;;;;;0000-0002-7709-2543;;", "linkedin": ";;yangyi-chen-4006a11b2/;yujia-qin-672595181/;;;;;;", "or_profile": "~Biru_Zhu1;~Ganqu_Cui1;~Yangyi_Chen1;~Yujia_Qin1;~Lifan_Yuan1;~Chong_Fu2;~Yangdong_Deng1;~Zhiyuan_Liu1;~Maosong_Sun1;~Ming_Gu2", "aff": "Tsinghua University;Tsinghua University;Department of Computer Science, University of Illinois at Urbana-Champaign;Tsinghua University;;;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;cs.illinois.edu;tsinghua.edu.cn;;;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;PhD student;PhD student;;;Associate Professor;Associate Professor;Full Professor;Full Professor", "bibtex": "@misc{\nzhu2023removing,\ntitle={Removing Backdoors in Pre-trained Models by Regularized Continual Pre-training},\nauthor={Biru Zhu and Ganqu Cui and Yangyi Chen and Yujia Qin and Lifan Yuan and Chong Fu and Yangdong Deng and Zhiyuan Liu and Maosong Sun and Ming Gu},\nyear={2023},\nurl={https://openreview.net/forum?id=JKuBOuzntQ}\n}", "github": "", "project": "", "reviewers": "KEZm;U24e;m2cs;ST1r", "site": "https://openreview.net/forum?id=JKuBOuzntQ", "pdf_size": 501741, "recommendation": "3;3;5;6", "confidence": "4;3;4;4", "correctness": "2;2;2;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;0;1;3", "wc_summary_paper": "54;34;87;53", "wc_strength_and_weaknesses": "31;823;482;225", "wc_clarity_quality_novelty_and_reproducibility": "307;3;16;18", "wc_summary_review": "22;25;153;21", "wc_review": "414;885;738;317", "wc_reply_reviewers": "0;146;0;0", "wc_reply_authors": "2028;4514;2460;1331", "reply_reviewers": "0;1;0;0", "reply_authors": "4;7;4;3", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 57.0, 19.06567596493762 ], "wc_strength_and_weaknesses_avg": [ 390.25, 296.6726942271567 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 86.0, 127.72431248591633 ], "wc_summary_review_avg": [ 55.25, 56.45518133882842 ], "wc_review_avg": [ 588.5, 231.53023560649697 ], "wc_reply_reviewers_avg": [ 36.5, 63.21985447626402 ], "wc_reply_authors_avg": [ 2583.25, 1185.265661149432 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.5, 1.5 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": 0.5555555555555555, "corr_recommendation_correctness": 0.7777777777777777, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11857636645209296432&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1;0;0;0;0;0", "aff_unique_norm": "Tsinghua University;University of Illinois Urbana-Champaign", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.tsinghua.edu.cn;https://illinois.edu", "aff_unique_abbr": "THU;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;1;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "The Lie Derivative for Measuring Learned Equivariance", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11044", "id": "JL7Va5Vy15J", "poster": "/media/PosterPDFs/ICLR%202023/11044.png?t=1681143611.669246", "openreview": "https://openreview.net/forum?id=JL7Va5Vy15J", "slides": "https://iclr.cc/virtual/2023/poster/11044", "video": "https://iclr.cc/virtual/2023/poster/11044", "author_site": "Nate Gruver, Marc A Finzi, Micah Goldblum, Andrew Wilson", "tldr": "", "abstract": "Equivariance guarantees that a model's predictions capture key symmetries in data. When an image is translated or rotated, an equivariant model's representation of that image will translate or rotate accordingly. The success of convolutional neural networks has historically been tied to translation equivariance directly encoded in their architecture. The rising success of vision transformers, which have no explicit architectural bias towards equivariance, challenges this narrative and suggests that augmentations and training data might also play a significant role in their performance. In order to better understand the role of equivariance in recent vision models, we apply the Lie derivative, a method for measuring equivariance with strong mathematical foundations and minimal hyperparameters. Using the Lie derivative, we study the equivariance properties of hundreds of pretrained models, spanning CNNs, transformers, and Mixer architectures. The scale of our analysis allows us to separate the impact of architecture from other factors like model size or training method. Surprisingly, we find that many violations of equivariance can be linked to spatial aliasing in ubiquitous network layers, such as pointwise non-linearities, and that as models get larger and more accurate they tend to display more equivariance, regardless of architecture. For example, transformers can be more equivariant than convolutional neural networks after training.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/5261b69b57661a9fd7cb61b3c1445fab10de7754.zip", "author": "Nate Gruver;Marc Anton Finzi;Micah Goldblum;Andrew Gordon Wilson", "authorids": "~Nate_Gruver1;~Marc_Anton_Finzi1;~Micah_Goldblum1;~Andrew_Gordon_Wilson1", "gender": "M;M;;Not Specified", "homepage": "https://ngruver.github.io/;https://mfinzi.github.io;;https://cims.nyu.edu/~andrewgw", "dblp": "223/5568;222/3062;241/7231;65/10453", "google_scholar": "R5QNdhcAAAAJ;ysMAhlwAAAAJ;pGDKzuUAAAAJ;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Nate_Gruver1;~Marc_Anton_Finzi1;~Micah_Goldblum1;~Andrew_Gordon_Wilson1", "aff": "New York University;New York University;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu;nyu.edu", "position": "PhD student;PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\ngruver2023the,\ntitle={The Lie Derivative for Measuring Learned Equivariance},\nauthor={Nate Gruver and Marc Anton Finzi and Micah Goldblum and Andrew Gordon Wilson},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JL7Va5Vy15J}\n}", "github": "", "project": "", "reviewers": "46iy;TkYs;rDjb", "pdf_size": 3026116, "recommendation": "8;8;8", "confidence": "4;4;5", "correctness": "4;3;3", "technical_novelty": "4;3;3", "empirical_novelty": "4;4;3", "wc_summary_paper": "112;228;74", "wc_strength_and_weaknesses": "83;1096;518", "wc_clarity_quality_novelty_and_reproducibility": "278;30;15", "wc_summary_review": "64;141;23", "wc_review": "537;1495;630", "wc_reply_reviewers": "18;15;0", "wc_reply_authors": "41;787;634", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 138.0, 65.50318058435535 ], "wc_strength_and_weaknesses_avg": [ 565.6666666666666, 414.9267673002336 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 107.66666666666667, 120.59942878066305 ], "wc_summary_review_avg": [ 76.0, 48.91489207456832 ], "wc_review_avg": [ 887.3333333333334, 431.3593500654517 ], "wc_reply_reviewers_avg": [ 11.0, 7.874007874011811 ], "wc_reply_authors_avg": [ 487.3333333333333, 321.72693735872076 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7051067605864303772&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=JL7Va5Vy15J", "email": "nyu.edu;nyu.edu;nyu.edu;nyu.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "The Onset of Variance-Limited Behavior for Networks in the Lazy and Rich Regimes", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10929", "id": "JLINxPOVTh7", "poster": "/media/PosterPDFs/ICLR%202023/10929.png?t=1680908888.9342453", "openreview": "https://openreview.net/forum?id=JLINxPOVTh7", "slides": "https://iclr.cc/virtual/2023/poster/10929", "video": "https://iclr.cc/virtual/2023/poster/10929", "author_site": "Alexander Atanasov, Blake Bordelon, Sabarish Sainathan, Cengiz Pehlevan", "tldr": "Empirical study of neural networks in the overparameterized regime shows how finite-width effects are brought on by initialization variance as sample size grows.", "abstract": "For small training set sizes $P$, the generalization error of wide neural networks is well-approximated by the error of an infinite width neural network (NN), either in the kernel or mean-field/feature-learning regime. However, after a critical sample size $P^*$, we empirically find the finite-width network generalization becomes worse than that of the infinite width network. In this work, we empirically study the transition from infinite-width behavior to this \\textit{variance-limited} regime as a function of sample size $P$ and network width $N$. We find that finite-size effects can become relevant for very small dataset sizes on the order of $P^* \\sim \\sqrt{N}$ for polynomial regression with ReLU networks. We discuss the source of these effects using an argument based on the variance of the NN's final neural tangent kernel (NTK). This transition can be pushed to larger $P$ by enhancing feature learning or by ensemble averaging the networks. We find that the learning curve for regression with the final NTK is an accurate approximation of the NN learning curve. Using this, we provide a toy model which also exhibits $P^* \\sim \\sqrt{N}$ scaling and has $P$-dependent benefits from feature learning. ", "keywords": "Feature Learning;Neural Tangent Kernel;Scaling Laws;Deep Ensembles", "primary_area": "", "supplementary_material": "/attachment/489cd95ba97af4ebf082a26b985f5ba0c6f19bf8.zip", "author": "Alexander Atanasov;Blake Bordelon;Sabarish Sainathan;Cengiz Pehlevan", "authorids": "~Alexander_Atanasov1;~Blake_Bordelon1;~Sabarish_Sainathan1;~Cengiz_Pehlevan2", "gender": "M;M;M;", "homepage": "http://abatanasov.com/;https://blakebordelon.github.io/;https://pehlevan.seas.harvard.edu/people/sabarish-sainathan;https://pehlevan.seas.harvard.edu/", "dblp": "305/3785.html;228/6993;;145/3480", "google_scholar": "abMQRYIAAAAJ;yeQ8_pgAAAAJ;;veDLTPEAAAAJ", "orcid": "0000-0002-3338-0324;0000-0003-0455-9445;;0000-0001-9767-6063", "linkedin": "alexatanasov/;;;", "or_profile": "~Alexander_Atanasov1;~Blake_Bordelon1;~Sabarish_Sainathan1;~Cengiz_Pehlevan2", "aff": "Harvard University;Harvard University;Harvard University;School of Engineering and Applied Sciences, Harvard University", "aff_domain": "harvard.edu;harvard.edu;harvard.edu;seas.harvard.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\natanasov2023the,\ntitle={The Onset of Variance-Limited Behavior for Networks in the Lazy and Rich Regimes},\nauthor={Alexander Atanasov and Blake Bordelon and Sabarish Sainathan and Cengiz Pehlevan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JLINxPOVTh7}\n}", "github": "", "project": "", "reviewers": "6LDR;JK78;4Na8;PWNi", "pdf_size": 6568592, "recommendation": "6;8;8;8", "confidence": "2;4;2;4", "correctness": "4;4;4;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "2;3;3;0", "wc_summary_paper": "229;174;172;256", "wc_strength_and_weaknesses": "217;102;192;80", "wc_clarity_quality_novelty_and_reproducibility": "372;59;265;2", "wc_summary_review": "71;64;93;29", "wc_review": "889;399;722;367", "wc_reply_reviewers": "17;0;22;72", "wc_reply_authors": "1675;227;681;83", "reply_reviewers": "1;0;1;1", "reply_authors": "3;1;1;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 1.0 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 207.75, 36.044243645830605 ], "wc_strength_and_weaknesses_avg": [ 147.75, 57.95849808267982 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 174.5, 150.24396826495231 ], "wc_summary_review_avg": [ 64.25, 22.993205518152532 ], "wc_review_avg": [ 594.25, 219.63762769616685 ], "wc_reply_reviewers_avg": [ 27.75, 26.81767141270845 ], "wc_reply_authors_avg": [ 666.5, 622.6786892129841 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14865548101787304929&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=JLINxPOVTh7", "email": "harvard.edu;harvard.edu;harvard.edu;seas.harvard.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Wasserstein Auto-encoded MDPs: Formal Verification of Efficiently Distilled RL Policies with Many-sided Guarantees", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11247", "id": "JLLTtEdh1ZY", "poster": "/media/PosterPDFs/ICLR%202023/11247.png?t=1682428650.6880739", "openreview": "https://openreview.net/forum?id=JLLTtEdh1ZY", "slides": "https://iclr.cc/virtual/2023/poster/11247", "video": "https://iclr.cc/virtual/2023/poster/11247", "author_site": "Florent Delgrange, Ann Nowe, Guillermo Perez", "tldr": "Formal Verification of Efficiently Distilled RL Policies with Many-sided Guarantees", "abstract": "Although deep reinforcement learning (DRL) has many success stories, the large-scale deployment of policies learned through these advanced techniques in safety-critical scenarios is hindered by their lack of formal guarantees. Variational Markov Decision Processes (VAE-MDPs) are discrete latent space models that provide a reliable framework for distilling formally verifiable controllers from any RL policy. While the related guarantees address relevant practical aspects such as the satisfaction of performance and safety properties, the VAE approach suffers from several learning flaws (posterior collapse, slow learning speed, poor dynamics estimates), primarily due to the absence of abstraction and representation guarantees to support latent optimization. We introduce the Wasserstein auto-encoded MDP (WAE-MDP), a latent space model that fixes those issues by minimizing a penalized form of the optimal transport between the behaviors of the agent executing the original policy and the distilled policy, for which the formal guarantees apply. Our approach yields bisimulation guarantees while learning the distilled policy, allowing concrete optimization of the abstraction and representation model quality. Our experiments show that, besides distilling policies up to 10 times faster, the latent model quality is indeed better in general. Moreover, we present experiments from a simple time-to-failure verification algorithm on the latent space. The fact that our approach enables such simple verification techniques highlights its applicability.", "keywords": "Reinforcement learning;Formal Verification;Representation Learning", "primary_area": "", "supplementary_material": "/attachment/6414335526cf827e94743950dd11580f8ef15ba7.zip", "author": "Florent Delgrange;Ann Nowe;Guillermo Perez", "authorids": "~Florent_Delgrange1;~Ann_Nowe1;~Guillermo_Perez1", "gender": "M;F;M", "homepage": "https://delgrange.me;https://ai.vub.ac.be/team/ann-nowe/?utm_source=www.google.com&utm_medium=organic&utm_campaign=Google&referrer-analytics=1;https://www.uantwerpen.be/en/staff/guillermoalberto-perez/", "dblp": "234/7693;95/232.html;135/6266.html", "google_scholar": "https://scholar.google.com.hk/citations?hl=fr;https://scholar.google.be/citations?user=LH5QKbgAAAAJ;https://scholar.google.nl/citations?user=MP0yUsgAAAAJ", "orcid": "0000-0003-2254-0596;;0000-0002-1200-4952", "linkedin": ";;", "or_profile": "~Florent_Delgrange1;~Ann_Nowe1;~Guillermo_Perez1", "aff": "Vrije Universiteit Brussel & Universiteit Antwerpen;Vrije Universiteit Brussel;University of Antwerp", "aff_domain": "vub.ac.be;vub.be;uantwerpen.be", "position": "PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\ndelgrange2023wasserstein,\ntitle={Wasserstein Auto-encoded {MDP}s: Formal Verification of Efficiently Distilled {RL} Policies with Many-sided Guarantees},\nauthor={Florent Delgrange and Ann Nowe and Guillermo Perez},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JLLTtEdh1ZY}\n}", "github": "", "project": "", "reviewers": "MegB;67Kn;JqQE;cw5C", "pdf_size": 1956183, "recommendation": "5;6;8;8", "confidence": "2;3;4;2", "correctness": "3;3;4;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "82;67;75;31", "wc_strength_and_weaknesses": "186;119;441;57", "wc_clarity_quality_novelty_and_reproducibility": "64;50;36;49", "wc_summary_review": "39;73;33;15", "wc_review": "371;309;585;152", "wc_reply_reviewers": "0;16;12;50", "wc_reply_authors": "452;865;756;618", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;2", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 63.75, 19.638928178492836 ], "wc_strength_and_weaknesses_avg": [ 200.75, 146.01776433023483 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.75, 9.908960591303208 ], "wc_summary_review_avg": [ 40.0, 21.0 ], "wc_review_avg": [ 354.25, 155.30514318592284 ], "wc_reply_reviewers_avg": [ 19.5, 18.567444627627143 ], "wc_reply_authors_avg": [ 672.75, 154.61140805257548 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.4061811972299616, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15824382751575363253&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=JLLTtEdh1ZY", "email": "vub.ac.be;vub.be;uantwerpen.be", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Vrije Universiteit Brussel;University of Antwerp", "aff_unique_dep": ";", "aff_unique_url": "https://www.vub.be;https://www.uantwerp.be", "aff_unique_abbr": "VUB;UA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Brussels;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Belgium" }, { "title": "Latent Graph Inference using Product Manifolds", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11161", "id": "JLR_B7n_Wqr", "poster": "/media/PosterPDFs/ICLR%202023/11161.png?t=1682285057.5455005", "openreview": "https://openreview.net/forum?id=JLR_B7n_Wqr", "slides": "https://iclr.cc/virtual/2023/poster/11161", "video": "https://iclr.cc/virtual/2023/poster/11161", "author_site": "Haitz S\u00e1ez de Oc\u00e1riz Borde, Anees Kazi, Federico Barbero, Pietro Lio", "tldr": "", "abstract": "Graph Neural Networks usually rely on the assumption that the graph topology is available to the network as well as optimal for the downstream task. Latent graph inference allows models to dynamically learn the intrinsic graph structure of problems where the connectivity patterns of data may not be directly accessible. In this work, we generalize the discrete Differentiable Graph Module (dDGM) for latent graph learning. The original dDGM architecture used the Euclidean plane to encode latent features based on which the latent graphs were generated. By incorporating Riemannian geometry into the model and generating more complex embedding spaces, we can improve the performance of the latent graph inference system. In particular, we propose a computationally tractable approach to produce product manifolds of constant curvature model spaces that can encode latent features of varying structure. The latent representations mapped onto the inferred product manifold are used to compute richer similarity measures that are leveraged by the latent graph learning model to obtain optimized latent graphs. Moreover, the curvature of the product manifold is learned during training alongside the rest of the network parameters and based on the downstream task, rather than it being a static embedding space. Our novel approach is tested on a wide range of datasets, and outperforms the original dDGM model.", "keywords": "Latent Graph Inference;Product Manifolds;Graph Neural Networks", "primary_area": "", "supplementary_material": "", "author": "Haitz S\u00e1ez de Oc\u00e1riz Borde;Anees Kazi;Federico Barbero;Pietro Lio", "authorids": "~Haitz_S\u00e1ez_de_Oc\u00e1riz_Borde1;anees.kazi77@gmail.com;fb548@cam.ac.uk;~Pietro_Lio1", "gender": "M;;;M", "homepage": "https://www.linkedin.com/in/haitz-s%C3%A1ez-de-oc%C3%A1riz-borde-0933a9199/;;;https://www.cst.cam.ac.uk/people/pl219", "dblp": ";;;l/PietroLio.html", "google_scholar": "aP0OakUAAAAJ;;;https://scholar.google.co.uk/citations?user=3YrWf7EAAAAJ", "orcid": ";;;0000-0002-0540-5053", "linkedin": ";;;", "or_profile": "~Haitz_S\u00e1ez_de_Oc\u00e1riz_Borde1;anees.kazi77@gmail.com;fb548@cam.ac.uk;~Pietro_Lio1", "aff": "University of Oxford;;;University of Cambridge", "aff_domain": "ox.ac.uk;;;cam.ac.uk", "position": "PhD student;;;Full Professor", "bibtex": "@inproceedings{\nborde2023latent,\ntitle={Latent Graph Inference using Product Manifolds},\nauthor={Haitz S{\\'a}ez de Oc{\\'a}riz Borde and Anees Kazi and Federico Barbero and Pietro Lio},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JLR_B7n_Wqr}\n}", "github": "", "project": "", "reviewers": "R63Y;DMwU;vGZV", "pdf_size": 6167583, "recommendation": "5;6;8", "confidence": "4;2;3", "correctness": "3;3;4", "technical_novelty": "2;3;4", "empirical_novelty": "1;3;3", "wc_summary_paper": "78;70;56", "wc_strength_and_weaknesses": "475;52;77", "wc_clarity_quality_novelty_and_reproducibility": "28;13;17", "wc_summary_review": "77;67;22", "wc_review": "658;202;172", "wc_reply_reviewers": "406;25;0", "wc_reply_authors": "1479;678;356", "reply_reviewers": "4;1;0", "reply_authors": "5;2;1", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_paper_avg": [ 68.0, 9.092121131323903 ], "wc_strength_and_weaknesses_avg": [ 201.33333333333334, 193.7805173098908 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 19.333333333333332, 6.342099196813483 ], "wc_summary_review_avg": [ 55.333333333333336, 23.921166824012207 ], "wc_review_avg": [ 344.0, 222.36906259639628 ], "wc_reply_reviewers_avg": [ 143.66666666666666, 185.77824295529217 ], "wc_reply_authors_avg": [ 837.6666666666666, 472.1598128694234 ], "reply_reviewers_avg": [ 1.6666666666666667, 1.699673171197595 ], "reply_authors_avg": [ 2.6666666666666665, 1.699673171197595 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.3273268353539886, "corr_recommendation_correctness": 0.944911182523068, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4094050890425141340&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=JLR_B7n_Wqr", "email": "ox.ac.uk;;;cam.ac.uk", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of Oxford;University of Cambridge", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.cam.ac.uk", "aff_unique_abbr": "Oxford;Cambridge", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "(Certified!!) Adversarial Robustness for Free!", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11081", "id": "JLg5aHHv7j", "poster": "", "openreview": "https://openreview.net/forum?id=JLg5aHHv7j", "slides": "https://iclr.cc/virtual/2023/poster/11081", "video": "https://iclr.cc/virtual/2023/poster/11081", "author_site": "Nicholas Carlini, Florian Tramer, Krishnamurthy Dvijotham, Leslie Rice, Mingjie Sun, Zico Kolter", "tldr": "Using an off-the-shelf diffusion model as a denoiser gives state-of-the-art certified adversarial robustness.", "abstract": "In this paper we show how to achieve state-of-the-art certified adversarial robustness to 2-norm bounded perturbations by relying exclusively on off-the-shelf pretrained models. To do so, we instantiate the denoised smoothing approach of Salman et al. by combining a pretrained denoising diffusion probabilistic model and a standard high-accuracy classifier. This allows us to certify 71% accuracy on ImageNet under adversarial perturbations constrained to be within a 2-norm of 0.5, an improvement of 14 percentage points over the prior certified SoTA using any approach, or an improvement of 30 percentage points over denoised smoothing. We obtain these results using only pretrained diffusion models and image classifiers, without requiring any fine tuning or retraining of model parameters.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Nicholas Carlini;Florian Tramer;Krishnamurthy Dj Dvijotham;Leslie Rice;Mingjie Sun;J Zico Kolter", "authorids": "~Nicholas_Carlini1;~Florian_Tramer1;~Krishnamurthy_Dj_Dvijotham1;~Leslie_Rice1;~Mingjie_Sun1;~J_Zico_Kolter1", "gender": ";M;F;M;M;M", "homepage": "http://nicholas.carlini.com;http://floriantramer.com;https://leslierice1.github.io/;https://eric-mingjie.github.io/;http://www.zicokolter.com;http://dvij.github.io", "dblp": "145/1806;158/7224;;54/3913;67/2526;16/8758", "google_scholar": ";https://scholar.google.ch/citations?user=ijH0-a8AAAAJ;HpT4p-UAAAAJ;wCZbouUAAAAJ;UXh1I6UAAAAJ;BUtloecAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Nicholas_Carlini1;~Florian_Tramer1;~Leslie_Rice1;~Mingjie_Sun1;~Zico_Kolter1;~Krishnamurthy_Dvijotham2", "aff": "Google;ETHZ - ETH Zurich;Carnegie Mellon University;Computer Science Department, Carnegie Mellon University;Carnegie Mellon University;Google Brain", "aff_domain": "google.com;ethz.ch;cmu.edu;cs.cmu.edu;cmu.edu;google.com", "position": "Researcher;Assistant Professor;PhD student;PhD student;Full Professor;research scientist ", "bibtex": "@inproceedings{\ncarlini2023certified,\ntitle={(Certified!!) Adversarial Robustness for Free!},\nauthor={Nicholas Carlini and Florian Tramer and Krishnamurthy Dj Dvijotham and Leslie Rice and Mingjie Sun and J Zico Kolter},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JLg5aHHv7j}\n}", "github": "", "project": "", "reviewers": "VkUH;kRsg;Kvhz;PB4j", "pdf_size": 3164239, "recommendation": "6;6;8;8", "confidence": "4;4;4;4", "correctness": "3;3;3;4", "technical_novelty": "3;2;2;3", "empirical_novelty": "0;4;4;3", "wc_summary_paper": "39;82;28;109", "wc_strength_and_weaknesses": "68;181;298;149", "wc_clarity_quality_novelty_and_reproducibility": "45;29;71;40", "wc_summary_review": "91;31;36;17", "wc_review": "243;323;433;315", "wc_reply_reviewers": "0;0;80;0", "wc_reply_authors": "96;160;252;179", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 1.6393596310755 ], "wc_summary_paper_avg": [ 64.5, 32.668792447839266 ], "wc_strength_and_weaknesses_avg": [ 174.0, 82.59237252918697 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.25, 15.417117110536587 ], "wc_summary_review_avg": [ 43.75, 28.154706533721853 ], "wc_review_avg": [ 328.5, 67.9025036357276 ], "wc_reply_reviewers_avg": [ 20.0, 34.64101615137755 ], "wc_reply_authors_avg": [ 171.75, 55.60744104883806 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10102578473051301550&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=JLg5aHHv7j", "email": "google.com;ethz.ch;cmu.edu;cs.cmu.edu;cmu.edu;google.com", "author_num": 6, "aff_unique_index": "0;1;2;2;2;0", "aff_unique_norm": "Google;ETH Zurich;Carnegie Mellon University", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.ethz.ch;https://www.cmu.edu", "aff_unique_abbr": "Google;ETHZ;CMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United States;Switzerland" }, { "id": "JOix_wb4AeM", "title": "In-distribution and Out-of-distribution Generalization for Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph neural networks (GNNs) are models that allow learning with structured data of varying size. Despite their popularity, theoretical understanding of the generalization of GNNs is an under-explored topic. In this work, we expand the theoretical understanding of both in-distribution and out-of-distribution generalization of GNNs. Firstly, we improve upon the state-of-the-art PAC-Bayes (in-distribution) generalization bound primarily by reducing an exponential dependency on the node degree to a linear dependency. Secondly, utilizing tools from spectral graph theory, we prove some rigorous guarantees about the out-of-distribution (OOD) size generalization of GNNs, where graphs in the training set have different numbers of nodes and edges from those in the test set. To empirically verify our theoretical findings, we conduct experiments on both synthetic and real-world graph datasets. Our computed generalization gaps for the in-distribution case significantly improve the state-of-the-art PAC-Bayes results. For the OOD case, experiments on community classification tasks in large social networks show that GNNs achieve strong size generalization performance in cases guaranteed by our theory.", "keywords": "Graph Neural Networks;Generalization Bounds;Out-of-distribution generalization;Learning theory", "primary_area": "", "supplementary_material": "", "author": "Emmanuel Sales;Renjie Liao;Nick Harvey", "authorids": "~Emmanuel_Sales1;~Renjie_Liao1;~Nick_Harvey1", "gender": "M;M;M", "homepage": "https://emsal.me/about;https://lrjconan.github.io/;https://www.cs.ubc.ca/~nickhar/", "dblp": ";08/8180;93/4141", "google_scholar": ";2wrS35MAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Emmanuel_Sales1;~Renjie_Liao1;~Nick_Harvey1", "aff": ";Department of Electrical and Computer Engineering, The University of British Columbia;University of British Columbia", "aff_domain": ";ece.ubc.ca;ubc.ca", "position": ";Assistant Professor;Full Professor", "bibtex": "@misc{\nsales2023indistribution,\ntitle={In-distribution and Out-of-distribution Generalization for Graph Neural Networks},\nauthor={Emmanuel Sales and Renjie Liao and Nick Harvey},\nyear={2023},\nurl={https://openreview.net/forum?id=JOix_wb4AeM}\n}", "github": "", "project": "", "reviewers": "MGXu;y9pP;KCgB;h6gY;Lvuo", "site": "https://openreview.net/forum?id=JOix_wb4AeM", "pdf_size": 497388, "recommendation": "3;5;6;6;6", "confidence": "4;3;3;3;3", "correctness": "2;3;4;3;3", "technical_novelty": "2;2;3;3;3", "empirical_novelty": "2;2;2;2;3", "wc_summary_paper": "72;46;230;406;55", "wc_strength_and_weaknesses": "588;261;316;318;341", "wc_clarity_quality_novelty_and_reproducibility": "28;45;103;54;40", "wc_summary_review": "28;67;124;97;33", "wc_review": "716;419;773;875;469", "wc_reply_reviewers": "351;225;49;0;0", "wc_reply_authors": "859;226;167;567;422", "reply_reviewers": "1;1;1;0;0", "reply_authors": "3;1;1;1;1", "recommendation_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.6, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_paper_avg": [ 161.8, 139.4021520637325 ], "wc_strength_and_weaknesses_avg": [ 364.8, 114.65670499364614 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.0, 25.899806949087477 ], "wc_summary_review_avg": [ 69.8, 36.8423669163641 ], "wc_review_avg": [ 650.4, 176.76606009073123 ], "wc_reply_reviewers_avg": [ 125.0, 140.07283819499054 ], "wc_reply_authors_avg": [ 448.2, 249.80104083049773 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9432422182837988, "corr_recommendation_correctness": 0.8134892168199606, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=546301435478004968&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "Department of Electrical and Computer Engineering", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_campus_unique_index": "0", "aff_campus_unique": "Vancouver;", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "JQK0BsKpE8", "title": "NetBooster: Empowering Tiny Deep Learning By Standing on the Shoulders of Deep Giants", "track": "main", "status": "Withdraw", "tldr": "We propose an expansion-then-contraction training strategy on both width and depth dimension to fully unleash tiny neural network's potential on large scale datasets and downstream tasks.", "abstract": "Tiny deep learning has attracted increasingly growing interest driven by the substantial demand for deep learning solutions in numerous Internet-of-Things (IoT) applications. Nevertheless, due to the under-fitting issue, it is still a challenge to unleash tiny deep learning\u2019s full potential on large-scale datasets. Consequently, tiny neural networks\u2019 (TNNs\u2019) downstream task performance is limited due to the inferior learned representations during pretraining. To this end, we propose a framework dubbed NetBooster which empowers tiny deep learning from a novel perspective by augmenting the architecture of TNNs via an expansion-then-contraction strategy. Specifically, during training, our proposed NetBooster first expands each/some layer(s) of a given TNN into multi-layer blocks, favoring the learning of more complex features to generate an expanded counterpart model (i.e., deep giant), and then contracts the expanded layers by gradually removing the non-linear layers from the expanded ones to recover efficiency. NetBooster\u2019s expansion-then-contraction training empowers its trained TNNs to benefit from the superior performance of their expanded counterparts while preserving the TNNs\u2019 original complexity and thus inference efficiency. Extensive experiments and ablation studies on two tasks, seven datasets, and six networks validate that NetBooster consistently leads to a nontrivial accuracy boost (e.g., 1.3% \u223c 2.5%) on top of state-of-the-art TNNs on ImageNet and as much as 4.7% higher accuracy on various downstream datasets, while maintaining their inference complexity/efficiency.", "keywords": "Network Training;Transfer Learning", "primary_area": "", "supplementary_material": "", "author": "Zhongzhi Yu;Yonggan Fu;Jiayi Yuan;Haoran You;Yingyan Lin", "authorids": "~Zhongzhi_Yu1;~Yonggan_Fu1;~Jiayi_Yuan1;~Haoran_You1;~Yingyan_Lin1", "gender": "M;M;;M;F", "homepage": ";https://www.yongganfu.com/;https://jy-yuan.github.io/;http://haoranyou.com/;https://eiclab.scs.gatech.edu/", "dblp": "198/8338;244/8166;251/4029-1.html;230/4247;120/6981", "google_scholar": "KjvcaBQAAAAJ;https://scholar.google.com/citations?hl=en;XMrlrV8AAAAJ;z5Eku1sAAAAJ;dio8IesAAAAJ", "orcid": ";;;0000-0002-2873-2153;", "linkedin": "zhongzhi-yu/;yonggan-fu-b211831b0;;haoran-you-b4b958165/;yingyan-celine-lin-a281211a/", "or_profile": "~Zhongzhi_Yu1;~Yonggan_Fu1;~Jiayi_Yuan1;~Haoran_You1;~Yingyan_Lin1", "aff": "Georgia Institute of Technology;Rice University;Rice University;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;rice.edu;rice.edu;gatech.edu;gatech.edu", "position": "PhD student;PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@misc{\nyu2023netbooster,\ntitle={NetBooster: Empowering Tiny Deep Learning By Standing on the Shoulders of Deep Giants},\nauthor={Zhongzhi Yu and Yonggan Fu and Jiayi Yuan and Haoran You and Yingyan Lin},\nyear={2023},\nurl={https://openreview.net/forum?id=JQK0BsKpE8}\n}", "github": "", "project": "", "reviewers": "fUr1;mp5Z;ANVE", "site": "https://openreview.net/forum?id=JQK0BsKpE8", "pdf_size": 1486972, "recommendation": "3;3;5", "confidence": "3;5;3", "correctness": "3;2;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "113;27;88", "wc_strength_and_weaknesses": "296;539;111", "wc_clarity_quality_novelty_and_reproducibility": "56;31;41", "wc_summary_review": "19;36;28", "wc_review": "484;633;268", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 76.0, 36.12016980395672 ], "wc_strength_and_weaknesses_avg": [ 315.3333333333333, 175.26424494332994 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.666666666666664, 10.274023338281628 ], "wc_summary_review_avg": [ 27.666666666666668, 6.944222218666553 ], "wc_review_avg": [ 461.6666666666667, 149.84510521053252 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5000000000000001, "corr_recommendation_correctness": 0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12906186936837683647&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Georgia Institute of Technology;Rice University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.rice.edu", "aff_unique_abbr": "Georgia Tech;Rice", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Interaction-Based Disentanglement of Entities for Object-Centric World Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10829", "id": "JQc2VowqCzz", "poster": "/media/PosterPDFs/ICLR%202023/10829.png?t=1682614301.42934", "openreview": "https://openreview.net/forum?id=JQc2VowqCzz", "slides": "https://iclr.cc/virtual/2023/poster/10829", "video": "https://iclr.cc/virtual/2023/poster/10829", "author_site": "Akihiro Nakano, Masahiro Suzuki, Yutaka Matsuo", "tldr": "We present a structured, action-conditioned probabilistic model that learns to disentangle object representations based on interactions and demonstrate its ability to solve downstream tasks.", "abstract": "Perceiving the world compositionally in terms of space and time is essential to understanding object dynamics and solving downstream tasks. Object-centric learning using generative models has improved in its ability to learn distinct representations of individual objects and predict their interactions, and how to utilize the learned representations to solve untrained, downstream tasks is a focal question. However, as models struggle to predict object interactions and track the objects accurately, especially for unseen configurations, using object-centric representations in downstream tasks is still a challenge. This paper proposes STEDIE, a new model that disentangles object representations, based on interactions, into interaction-relevant relational features and interaction-irrelevant global features without supervision. Empirical evaluation shows that the proposed model factorizes global features, unaffected by interactions from relational features that are necessary to predict outcome of interactions. We also show that STEDIE achieves better performance in planning tasks and understanding causal relationships. In both tasks, our model not only achieves better performance in terms of reconstruction ability but also utilizes the disentangled representations to solve the tasks in a structured manner.", "keywords": "object-centric;object-oriented;world models;self-supervised learning;probabilistic deep learning;structured models;video prediction;physics prediction;planning;variational autoencoders;model-based reinforcement learning;VAEs;unsupervised", "primary_area": "", "supplementary_material": "", "author": "Akihiro Nakano;Masahiro Suzuki;Yutaka Matsuo", "authorids": "~Akihiro_Nakano1;~Masahiro_Suzuki1;~Yutaka_Matsuo1", "gender": "M;M;M", "homepage": ";;http://ymatsuo.com", "dblp": "307/5510;;m/YMatsuo.html", "google_scholar": "RyIUFfkAAAAJ;r2nt5kUAAAAJ;Dy8iau4AAAAJ", "orcid": ";;", "linkedin": "akihiro-nakano-871195189/;;", "or_profile": "~Akihiro_Nakano1;~Masahiro_Suzuki1;~Yutaka_Matsuo1", "aff": "The University of Tokyo, The University of Tokyo;The University of Tokyo, Tokyo Institute of Technology;The University of Tokyo", "aff_domain": "weblab.t.u-tokyo.ac.jp;u-tokyo.ac.jp;u-tokyo.ac.jp", "position": "MS student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nnakano2023interactionbased,\ntitle={Interaction-Based Disentanglement of Entities for Object-Centric World Models},\nauthor={Akihiro Nakano and Masahiro Suzuki and Yutaka Matsuo},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JQc2VowqCzz}\n}", "github": "", "project": "", "reviewers": "qwR6;sHVk;Hvkp;wuf5", "pdf_size": 3493679, "recommendation": "5;6;6;6", "confidence": "3;4;2;4", "correctness": "3;3;3;3", "technical_novelty": "3;3;3;4", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "67;77;34;73", "wc_strength_and_weaknesses": "158;94;210;288", "wc_clarity_quality_novelty_and_reproducibility": "80;32;17;57", "wc_summary_review": "25;44;6;62", "wc_review": "330;247;267;480", "wc_reply_reviewers": "125;149;0;0", "wc_reply_authors": "539;1084;556;450", "reply_reviewers": "1;2;0;0", "reply_authors": "1;2;1;1", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 62.75, 16.97608612136496 ], "wc_strength_and_weaknesses_avg": [ 187.5, 71.09676504595691 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.5, 24.046829312822098 ], "wc_summary_review_avg": [ 34.25, 20.90902915010642 ], "wc_review_avg": [ 331.0, 91.31538753134654 ], "wc_reply_reviewers_avg": [ 68.5, 69.02354670690285 ], "wc_reply_authors_avg": [ 657.25, 249.65113158165337 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.17407765595569782, "corr_recommendation_correctness": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5323289051041149271&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=JQc2VowqCzz", "email": "weblab.t.u-tokyo.ac.jp;u-tokyo.ac.jp;u-tokyo.ac.jp", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_campus_unique_index": "1", "aff_campus_unique": ";Tokyo", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "id": "JRFSLFyYAII", "title": "Population-Based Reinforcement Learning for Combinatorial Optimization Problems", "track": "main", "status": "Reject", "tldr": "We present a population-based RL method for CO problems: the training procedure makes the agents complementary to maximize the population's performance.", "abstract": "Applying reinforcement learning to combinatorial optimization problems is attractive as it obviates the need for expert knowledge or pre-solved instances. However, it is unrealistic to expect an agent to solve these (often NP-)hard problems in a single shot at inference due to their inherent complexity, thus leading approaches are often augmented with additional search strategies, from stochastic sampling and beam-search to explicit fine-tuning.\nIn this paper, we argue for the benefits of learning a population of complementary agents, which can be simultaneously rolled out at inference. To this end, we introduce Poppy, a simple theoretically grounded training procedure for populations. Instead of relying on a predefined or hand-crafted notion of diversity, Poppy induces an unsupervised specialization targeted solely at maximizing the performance of the whole population. We show that Poppy leads to a set of complementary heuristics, and obtain state-of-the-art results on three popular NP-hard problems: the traveling salesman (TSP), the capacitated vehicle routing (CVRP), and 0-1 knapsack (KP). On TSP specifically, Poppy divides by 5 the optimality gap while reducing the inference time by more than 10 compared to previous state-of-the-art reinforcement learning approaches.", "keywords": "reinforcement learning;combinatorial optimization;population", "primary_area": "", "supplementary_material": "", "author": "Nathan Grinsztajn;Daniel Furelos-Blanco;Thomas D Barrett", "authorids": "~Nathan_Grinsztajn1;~Daniel_Furelos-Blanco1;~Thomas_D_Barrett1", "gender": "M;;M", "homepage": "https://nathangrinsztajn.github.io/;https://www.danielfurelos.com;", "dblp": ";223/0191;248/8263", "google_scholar": "yVHIYEYAAAAJ;https://scholar.google.co.uk/citations?user=IfMKjBgAAAAJ;nJa1KGIAAAAJ", "orcid": "0000-0001-6817-5972;;0000-0001-6241-3028", "linkedin": "nathan-grinsztajn-960379139/?locale=en_US;;tom-barrett-62b180a2/", "or_profile": "~Nathan_Grinsztajn1;~Daniel_Furelos-Blanco1;~Thomas_D_Barrett1", "aff": "InstaDeep;Imperial College London, Imperial College London;InstaDeep", "aff_domain": "instadeep.com;imperial.ac.uk;instadeep.com", "position": "Researcher;PhD student;Researcher", "bibtex": "@misc{\ngrinsztajn2023populationbased,\ntitle={Population-Based Reinforcement Learning for Combinatorial Optimization Problems},\nauthor={Nathan Grinsztajn and Daniel Furelos-Blanco and Thomas D Barrett},\nyear={2023},\nurl={https://openreview.net/forum?id=JRFSLFyYAII}\n}", "github": "", "project": "", "reviewers": "rRMu;5SRy;rpzd", "site": "https://openreview.net/forum?id=JRFSLFyYAII", "pdf_size": 1227247, "recommendation": "5;5;6", "confidence": "3;4;4", "correctness": "3;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;2", "wc_summary_paper": "97;77;87", "wc_strength_and_weaknesses": "245;342;483", "wc_clarity_quality_novelty_and_reproducibility": "78;95;222", "wc_summary_review": "53;35;97", "wc_review": "473;549;889", "wc_reply_reviewers": "0;25;0", "wc_reply_authors": "1251;1538;1361", "reply_reviewers": "0;1;0", "reply_authors": "2;2;2", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 87.0, 8.16496580927726 ], "wc_strength_and_weaknesses_avg": [ 356.6666666666667, 97.71500510270785 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 131.66666666666666, 64.25124296246901 ], "wc_summary_review_avg": [ 61.666666666666664, 26.042699979499478 ], "wc_review_avg": [ 637.0, 180.87196207999366 ], "wc_reply_reviewers_avg": [ 8.333333333333334, 11.785113019775793 ], "wc_reply_authors_avg": [ 1383.3333333333333, 118.22671252959525 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10376339695107766517&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "InstaDeep;Imperial College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.instadeep.com;https://www.imperial.ac.uk", "aff_unique_abbr": "InstaDeep;ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "JSZvTDggUvz", "title": "Understanding Masked Image Modeling via Learning Occlusion Invariant Feature", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Recently, Masked Image Modeling (MIM) achieves great success in self-supervised visual recognition. However, as a reconstruction-based framework, it is still an open question to understand how MIM works, since MIM appears very different from previous well-studied siamese approaches such as contrastive learning. In this paper, we propose a new viewpoint: MIM implicitly learns occlusion-invariant features, which is analogous to other siamese methods while the latter learns other invariance. By relaxing MIM formulation into an equivalent siamese form, MIM methods can be interpreted in a unified framework with conventional methods, among which only a) data transformations, i.e. what invariance to learn, and b) similarity measurements are different. Furthermore, taking MAE (He et al., 2021) as a representative example of MIM, we empirically find the success of MIM models relates a little to the choice of similarity functions, but the learned occlusion invariant feature introduced by masked image \u2013 it turns out to be a favored initialization for vision transformers, even though the learned feature could be less semantic. We hope our findings could inspire researchers to develop more powerful self-supervised methods in computer vision community.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Xiangwen Kong;Xiangyu Zhang", "authorids": "~Xiangwen_Kong1;~Xiangyu_Zhang1", "gender": ";M", "homepage": ";", "dblp": ";95/3760-5.html", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;yuB-cfoAAAAJ", "orcid": ";0000-0003-2138-4608", "linkedin": ";", "or_profile": "~Xiangwen_Kong1;~Xiangyu_Zhang1", "aff": "MEGVII Technology;MEGVII Technology", "aff_domain": "megvii.com;megvii.com", "position": "Researcher;Principal Researcher", "bibtex": "@misc{\nkong2023understanding,\ntitle={Understanding Masked Image Modeling via Learning Occlusion Invariant Feature},\nauthor={Xiangwen Kong and Xiangyu Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=JSZvTDggUvz}\n}", "github": "", "project": "", "reviewers": "mC5H;uDY6;pxhX;UhyS", "site": "https://openreview.net/forum?id=JSZvTDggUvz", "pdf_size": 491036, "recommendation": "1;1;5;8", "confidence": "4;4;3;4", "correctness": "3;1;3;4", "technical_novelty": "1;1;2;4", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "80;22;129;53", "wc_strength_and_weaknesses": "138;504;267;156", "wc_clarity_quality_novelty_and_reproducibility": "29;11;273;44", "wc_summary_review": "16;21;58;24", "wc_review": "263;558;727;277", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.75, 2.947456530637899 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 1.0897247358851685 ], "technical_novelty_avg": [ 2.0, 1.224744871391589 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 71.0, 39.274673773310965 ], "wc_strength_and_weaknesses_avg": [ 266.25, 145.88415781022968 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 89.25, 106.72950622953336 ], "wc_summary_review_avg": [ 29.75, 16.55860803328589 ], "wc_review_avg": [ 456.25, 195.66217697858724 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.24485105343719588, "corr_recommendation_correctness": 0.7588927544773257, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5962552400166695847&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0", "aff_unique_norm": "Megvii Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.megvii.com", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Rarity Score : A New Metric to Evaluate the Uncommonness of Synthesized Images", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12261", "id": "JTGimap_-F", "poster": "/media/PosterPDFs/ICLR%202023/12261.png?t=1682568279.7975142", "openreview": "https://openreview.net/forum?id=JTGimap_-F", "slides": "https://iclr.cc/virtual/2023/poster/12261", "video": "https://iclr.cc/virtual/2023/poster/12261", "author_site": "Jiyeon Han, Hwanil Choi, Yunjey Choi, Junho Kim, Jung-Woo Ha, Jaesik Choi", "tldr": "", "abstract": "Evaluation metrics in image synthesis play a key role to measure performances of generative models. However, most metrics mainly focus on image fidelity. Existing diversity metrics are derived by comparing distributions, and thus they cannot quantify the diversity or rarity degree of each generated image. In this work, we propose a new evaluation metric, called `rarity score', to measure both image-wise uncommonness and model-wise diversified generation performance. \nWe first show empirical observation that typical samples are close to each other and distinctive samples are far from each other in nearest-neighbor distances on latent spaces represented by feature extractor networks such as VGG16. We then show that one can effectively filter typical or distinctive samples with the proposed metric. We also use our metric to demonstrate that the extent to which different generative models produce rare images can be effectively compared. Further, our metric can be used to compare rarities between datasets that share the same concept such as CelebA-HQ and FFHQ. Finally, we analyze the use of metrics in different designs of feature extractors to better understand the relationship between feature spaces and resulting high-rarity images. Code will be publicly available for the research community.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Jiyeon Han;Hwanil Choi;Yunjey Choi;Junho Kim;Jung-Woo Ha;Jaesik Choi", "authorids": "~Jiyeon_Han1;~Hwanil_Choi1;~Yunjey_Choi3;~Junho_Kim3;~Jung-Woo_Ha1;~Jaesik_Choi1", "gender": ";M;M;M;M;M", "homepage": ";;http://bit.ly/jhkim_resume;https://aidljwha.wordpress.com/;https://sailab.kaist.ac.kr/jaesik;https://yunjey.github.io/", "dblp": ";;;66/867-1;13/1402;210/0980", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;QrF3kNwAAAAJ;WtjDugkAAAAJ;https://scholar.google.co.kr/citations?user=eGj3ay4AAAAJ;RqMLVzUAAAAJ;v_4lOaAAAAAJ", "orcid": ";;0000-0003-3712-8510;0000-0002-7400-7681;;", "linkedin": ";hwanil-choi-19808314a/;taki0112/;jung-woo-ha-b2782862?trk=hp-identity-name;;", "or_profile": "~Jiyeon_Han1;~Hwanil_Choi1;~Junho_Kim3;~Jung-Woo_Ha1;~Jaesik_Choi1;~yunjey_choi1", "aff": "Korea Advanced Institute of Science & Technology;LG AI Research;NAVER;NAVER AI Lab;Korea Advanced Institute of Science & Technology;NAVER", "aff_domain": "kaist.ac.kr;lgresearch.ai;navercorp.com;navercorp.com;kaist.ac.kr;navercorp.com", "position": "PhD student;Researcher;Research Scientist;Head (Executive Director);Associate Professor;Research Scientist", "bibtex": "@inproceedings{\nhan2023rarity,\ntitle={Rarity Score : A New Metric to Evaluate the Uncommonness of Synthesized Images},\nauthor={Jiyeon Han and Hwanil Choi and Yunjey Choi and Junho Kim and Jung-Woo Ha and Jaesik Choi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JTGimap_-F}\n}", "github": "", "project": "", "reviewers": "vLSP;8Kcg;tiDM;3DoB", "pdf_size": 24850070, "recommendation": "6;6;8;8", "confidence": "3;4;4;3", "correctness": "3;3;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "62;71;172;77", "wc_strength_and_weaknesses": "342;196;92;101", "wc_clarity_quality_novelty_and_reproducibility": "17;14;848;29", "wc_summary_review": "48;5;37;21", "wc_review": "469;286;1149;228", "wc_reply_reviewers": "25;13;226;0", "wc_reply_authors": "781;588;1305;342", "reply_reviewers": "1;1;2;0", "reply_authors": "4;3;5;2", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 95.5, 44.48876262608346 ], "wc_strength_and_weaknesses_avg": [ 182.75, 100.56683101301343 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 227.0, 358.57844330076506 ], "wc_summary_review_avg": [ 27.75, 16.269219403523945 ], "wc_review_avg": [ 533.0, 366.6012820490403 ], "wc_reply_reviewers_avg": [ 66.0, 92.79816808536685 ], "wc_reply_authors_avg": [ 754.0, 354.1292137059579 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1584172437029963149&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=JTGimap_-F", "email": "kaist.ac.kr;lgresearch.ai;navercorp.com;navercorp.com;kaist.ac.kr;navercorp.com", "author_num": 6, "aff_unique_index": "0;1;2;2;0;2", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;LG;NAVER Corporation", "aff_unique_dep": ";LG AI Research;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.lgaires.com;https://www.naver.com", "aff_unique_abbr": "KAIST;LG AI;NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "JUNKYmGGuEw", "title": "Neural multi-event forecasting on spatio-temporal point processes using probabilistically enriched transformers", "track": "main", "status": "Reject", "tldr": " In this work, we introduce a novel neural network that is capable of simultaneous multi-event forecasting of spatio-temporal distributions associated with stochastic discrete events.", "abstract": "Predicting discrete events in time and space has many scientific applications, such as predicting hazardous earthquakes and outbreaks of infectious diseases. History-dependent spatio-temporal Hawkes processes are often used to mathematically model these point events. However, previous approaches have faced numerous challenges, particularly when attempting to forecast multiple future events. In this work, we propose a new neural architecture for multi-event forecasting of spatio-temporal point processes, utilizing transformers, augmented with normalizing flows and probabilistic layers. Our network makes batched predictions of complex history-dependent spatio-temporal distributions of future discrete events, achieving state-of-the-art performance on a variety of benchmark datasets including the South California Earthquakes, Citibike, Covid19, and Hawkes synthetic Pinwheel datasets. More generally, we illustrate how our network can be applied to any dataset of discrete events with associated markers, even when no underlying physics is known.", "keywords": "Stochastic Point Processes;Multi-event Prediction;Transformers;Normalizing Flows;Hawkes Process;Deep Learning;Generative Models", "primary_area": "", "supplementary_material": "/attachment/248f8c7651fd2214f450a2790ac124492704e812.zip", "author": "Negar Erfanian;Santiago Segarra;Maarten V. de Hoop", "authorids": "~Negar_Erfanian1;~Santiago_Segarra1;~Maarten_V._de_Hoop2", "gender": "F;M;", "homepage": ";http://segarra.rice.edu/;http://maartendehoop.rice.edu/", "dblp": ";125/2340;60/4525", "google_scholar": ";O1aSMXQAAAAJ;", "orcid": ";;", "linkedin": "negar-erfanian93/;;", "or_profile": "~Negar_Erfanian1;~Santiago_Segarra1;~Maarten_v._de_Hoop1", "aff": "Rice University;Rice University;Rice University", "aff_domain": "rice.edu;rice.edu;rice.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nerfanian2023neural,\ntitle={Neural multi-event forecasting on spatio-temporal point processes using probabilistically enriched transformers},\nauthor={Negar Erfanian and Santiago Segarra and Maarten V. de Hoop},\nyear={2023},\nurl={https://openreview.net/forum?id=JUNKYmGGuEw}\n}", "github": "", "project": "", "reviewers": "boMt;yeaA;mvAP;q56W", "site": "https://openreview.net/forum?id=JUNKYmGGuEw", "pdf_size": 3552789, "recommendation": "3;5;5;8", "confidence": "4;4;3;4", "correctness": "2;2;3;1", "technical_novelty": "2;3;1;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "45;69;41;27", "wc_strength_and_weaknesses": "692;128;100;101", "wc_clarity_quality_novelty_and_reproducibility": "3;55;128;42", "wc_summary_review": "36;103;73;52", "wc_review": "776;355;342;222", "wc_reply_reviewers": "294;33;0;0", "wc_reply_authors": "4108;790;1315;567", "reply_reviewers": "1;1;0;0", "reply_authors": "7;2;3;1", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 45.5, 15.124483462254174 ], "wc_strength_and_weaknesses_avg": [ 255.25, 252.40778018912175 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.0, 45.238258145070084 ], "wc_summary_review_avg": [ 66.0, 25.06990227344335 ], "wc_review_avg": [ 423.75, 209.87660064904807 ], "wc_reply_reviewers_avg": [ 81.75, 123.28092918209207 ], "wc_reply_authors_avg": [ 1695.0, 1419.3641181881412 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 2.277608394786075 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.08084520834544431, "corr_recommendation_correctness": -0.5940885257860046, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2620732971033321658&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Rice University", "aff_unique_dep": "", "aff_unique_url": "https://www.rice.edu", "aff_unique_abbr": "Rice", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Understanding Train-Validation Split in Meta-Learning with Neural Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11061", "id": "JVlyfHEEm0k", "poster": "", "openreview": "https://openreview.net/forum?id=JVlyfHEEm0k", "slides": "https://iclr.cc/virtual/2023/poster/11061", "video": "https://iclr.cc/virtual/2023/poster/11061", "author_site": "Xinzhe Zuo, Zixiang Chen, Huaxiu Yao, Yuan Cao, Quanquan Gu", "tldr": "", "abstract": "The goal of meta-learning is to learn a good prior model from a collection of tasks such that the learned prior is able to adapt quickly to new tasks without accessing many data from the new tasks. A common practice in meta-learning is to perform a train-validation split on each task, where the training set is used for adapting the model parameter to that specific task and the validation set is used for learning a prior model that is shared across all tasks. Despite its success and popularity in multitask learning and few-shot learning, the understanding of the train-validation split is still limited, especially when the neural network models are used. In this paper, we study the benefit of train-validation split for classification problems with neural network models trained by gradient descent. We prove that the train-validation split is necessary to learn a good prior model when the noise in the training sample is large, while the train-train method fails. We validate our theory by conducting experiment on both synthetic and real datasets. To the best of our knowledge, this is the first work towards the theoretical understanding of train-validation split in meta-learning with neural networks.", "keywords": "meta-learning;neural networks;deep learning;train-validation split;convolutional neural network", "primary_area": "", "supplementary_material": "", "author": "Xinzhe Zuo;Zixiang Chen;Huaxiu Yao;Yuan Cao;Quanquan Gu", "authorids": "~Xinzhe_Zuo1;~Zixiang_Chen1;~Huaxiu_Yao1;~Yuan_Cao1;~Quanquan_Gu1", "gender": "M;M;M;M;M", "homepage": ";https://sites.google.com/view/zxchen;http://huaxiuyao.mystrikingly.com;https://yuancaohku.github.io/;http://web.cs.ucla.edu/~qgu/", "dblp": ";137/3624;197/1635;;50/4597", "google_scholar": ";6nrCHr0AAAAJ;A20BZnQAAAAJ;-VGnHI4AAAAJ;GU9HgNAAAAAJ", "orcid": ";;;;", "linkedin": "xinzhe-zuo-08b7b2194/;;huaxiuyao/;;", "or_profile": "~Xinzhe_Zuo1;~Zixiang_Chen1;~Huaxiu_Yao1;~Yuan_Cao1;~Quanquan_Gu1", "aff": "University of California, Los Angeles; University of California, Los Angeles;Computer Science Department, Stanford University;University of Hong Kong;University of California, Los Angeles", "aff_domain": "math.ucla.edu;cs.ucla.edu;cs.stanford.edu;hku.hk;cs.ucla.edu", "position": "PhD student;PhD student;Postdoc;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nzuo2023understanding,\ntitle={Understanding Train-Validation Split in Meta-Learning with Neural Networks},\nauthor={Xinzhe Zuo and Zixiang Chen and Huaxiu Yao and Yuan Cao and Quanquan Gu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JVlyfHEEm0k}\n}", "github": "", "project": "", "reviewers": "7Hwe;YDGh;SqBj;kJcp", "pdf_size": 554806, "recommendation": "3;5;6;6", "confidence": "3;5;2;2", "correctness": "2;3;4;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "1;3;2;2", "wc_summary_paper": "96;66;56;40", "wc_strength_and_weaknesses": "374;36;105;207", "wc_clarity_quality_novelty_and_reproducibility": "142;38;14;24", "wc_summary_review": "91;33;32;17", "wc_review": "703;173;207;288", "wc_reply_reviewers": "348;0;0;0", "wc_reply_authors": "1537;80;548;254", "reply_reviewers": "2;0;0;0", "reply_authors": "6;2;2;2", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 64.5, 20.414455662593603 ], "wc_strength_and_weaknesses_avg": [ 180.5, 127.20554233208551 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.5, 51.232314021523564 ], "wc_summary_review_avg": [ 43.25, 28.287585616308792 ], "wc_review_avg": [ 342.75, 212.14426105836566 ], "wc_reply_reviewers_avg": [ 87.0, 150.68842025849233 ], "wc_reply_authors_avg": [ 604.75, 563.6263722538185 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.0, 1.7320508075688772 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.33333333333333326, "corr_recommendation_correctness": 0.8660254037844386, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2870862042708026533&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=JVlyfHEEm0k", "email": "math.ucla.edu;cs.ucla.edu;cs.stanford.edu;hku.hk;cs.ucla.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of California, Los Angeles;Stanford University;University of Hong Kong", "aff_unique_dep": ";Computer Science Department;", "aff_unique_url": "https://www.ucla.edu;https://www.stanford.edu;https://www.hku.hk", "aff_unique_abbr": "UCLA;Stanford;HKU", "aff_campus_unique_index": "0;0;1;2;0", "aff_campus_unique": "Los Angeles;Stanford;Hong Kong SAR", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;China" }, { "id": "JX1OCjfABRj", "title": "Self-Adaptive Perturbation Radii for Adversarial Training", "track": "main", "status": "Reject", "tldr": "", "abstract": "Adversarial training has been shown to be the most popular and effective technique to protect models from imperceptible adversarial samples. Despite its success, it also accompanies the significant performance degeneration to clean data. To achieve a good performance on both clean and adversarial samples, the main effort is searching for an adaptive perturbation radius for each training sample, which essentially suffers from a conflict between exact searching and computational overhead. To address this conflict, in this paper, firstly we show the superiority of adaptive perturbation radii intuitively and theoretically regarding the accuracy and robustness respectively. Then we propose our novel self-adaptive adjustment framework for perturbation radii without tedious searching. We also discuss this framework on both deep neural networks (DNNs) and kernel support vector machines (SVMs). Finally, extensive experimental results show that our framework can improve not only natural generalization performance but also adversarial robustness. It is also competitive with existing searching strategies in terms of running time.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/43ee9828a52e1d814bd418c1206771128a264d2d.zip", "author": "Huimin Wu;Zhang Chenkang;Bin Gu", "authorids": "~Huimin_Wu1;~Zhang_Chenkang1;~Bin_Gu1", "gender": ";M;M", "homepage": "https://www.researchgate.net/profile/Huimin-Wu-7;https://github.com/paperimpl;https://mbzuai.ac.ae/study/faculty/bin-gu/", "dblp": ";;29/1758-1", "google_scholar": ";;Vo8OgCgAAAAJ", "orcid": ";;0000-0001-6049-1815", "linkedin": ";;", "or_profile": "~Huimin_Wu1;~Zhang_Chenkang1;~Bin_Gu1", "aff": "NUIST;;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "nuist.edu.cn;;mbzuai.ac.ae", "position": "MS student;;Assistant Professor", "bibtex": "@misc{\nwu2023selfadaptive,\ntitle={Self-Adaptive Perturbation Radii for Adversarial Training},\nauthor={Huimin Wu and Zhang Chenkang and Bin Gu},\nyear={2023},\nurl={https://openreview.net/forum?id=JX1OCjfABRj}\n}", "github": "", "project": "", "reviewers": "CG7K;qfVT;LAk2", "site": "https://openreview.net/forum?id=JX1OCjfABRj", "pdf_size": 501631, "recommendation": "3;5;6", "confidence": "4;4;5", "correctness": "3;2;2", "technical_novelty": "2;2;2", "empirical_novelty": "3;2;2", "wc_summary_paper": "30;35;25", "wc_strength_and_weaknesses": "254;255;91", "wc_clarity_quality_novelty_and_reproducibility": "12;10;3", "wc_summary_review": "29;13;21", "wc_review": "325;313;140", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 30.0, 4.08248290463863 ], "wc_strength_and_weaknesses_avg": [ 200.0, 77.07572034477957 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 8.333333333333334, 3.858612300930075 ], "wc_summary_review_avg": [ 21.0, 6.531972647421808 ], "wc_review_avg": [ 259.3333333333333, 84.52350100547316 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.7559289460184544, "corr_recommendation_correctness": -0.9449111825230683, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IjG4b8avuYIJ:scholar.google.com/&scioq=Self-Adaptive+Perturbation+Radii+for+Adversarial+Training&hl=en&as_sdt=0,44", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Nanjing University of Information Science & Technology;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "http://www.nuist.edu.cn/;https://mbzuai.ac.ae", "aff_unique_abbr": "NUIST;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United Arab Emirates" }, { "id": "JXkz3zm8gJ", "title": "Learning to Learn with Generative Models of Neural Network Checkpoints", "track": "main", "status": "Reject", "tldr": "We construct a dataset of neural network checkpoints and train a loss-conditional generative model on the parameters. The generative model can train neural networks with unseen initializations in one step.", "abstract": "We explore a data-driven approach for learning to optimize neural networks. We construct a dataset of neural network checkpoints and train a generative model on the parameters. In particular, our model is a conditional diffusion transformer that, given an initial input parameter vector and a prompted loss, error, or return, predicts the distribution over parameter updates that achieve the desired metric. At test time, it can optimize neural networks with unseen parameters for downstream tasks in just one update. We find that our approach successfully generates parameters for a wide range of loss prompts. Moreover, it can sample multimodal parameter solutions and has favorable scaling properties. We apply our method to different neural network architectures and tasks in supervised and reinforcement learning.\n", "keywords": "diffusion;DDPMs;learning to learn;generative models;transformers", "primary_area": "", "supplementary_material": "/attachment/642c48247ee5b7a9962b01f2621ff17464442881.zip", "author": "William Peebles;Ilija Radosavovic;Tim Brooks;Alexei A Efros;Jitendra Malik", "authorids": "~William_Peebles1;~Ilija_Radosavovic1;~Tim_Brooks1;~Alexei_A_Efros1;~Jitendra_Malik2", "gender": ";M;;M;M", "homepage": "https://www.wpeebles.com/;https://people.eecs.berkeley.edu/~ilija;https://timothybrooks.com;https://people.eecs.berkeley.edu/~malik/;http://www.eecs.berkeley.edu/~efros/", "dblp": ";211/6740;15/2138;58/2944;40/6158", "google_scholar": "b_RBE3EAAAAJ;UKpinl8AAAAJ;sonlKXIAAAAJ;oY9R5YQAAAAJ;https://scholar.google.com.tw/citations?user=d97bGd8AAAAJ", "orcid": ";;;0000-0003-3695-1580;0000-0001-5720-8070", "linkedin": ";;;;alexei-efros-890736a3/", "or_profile": "~William_Peebles1;~Ilija_Radosavovic1;~Tim_Brooks1;~Jitendra_Malik2;~Alyosha_Efros1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Professor", "bibtex": "@misc{\npeebles2023learning,\ntitle={Learning to Learn with Generative Models of Neural Network Checkpoints},\nauthor={William Peebles and Ilija Radosavovic and Tim Brooks and Alexei A Efros and Jitendra Malik},\nyear={2023},\nurl={https://openreview.net/forum?id=JXkz3zm8gJ}\n}", "github": "", "project": "", "reviewers": "rREn;bACh;Wayk;iYN2", "site": "https://openreview.net/forum?id=JXkz3zm8gJ", "pdf_size": 9853742, "recommendation": "5;5;5;8", "confidence": "3;3;3;4", "correctness": "3;3;3;4", "technical_novelty": "3;4;3;3", "empirical_novelty": "2;4;2;3", "wc_summary_paper": "120;106;36;103", "wc_strength_and_weaknesses": "374;149;144;293", "wc_clarity_quality_novelty_and_reproducibility": "38;34;10;133", "wc_summary_review": "18;37;31;119", "wc_review": "550;326;221;648", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "168;127;114;90", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 91.25, 32.53747839031168 ], "wc_strength_and_weaknesses_avg": [ 240.0, 97.80337417492302 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 53.75, 46.991355588022785 ], "wc_summary_review_avg": [ 51.25, 39.71382001268576 ], "wc_review_avg": [ 436.25, 170.4880860940142 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 124.75, 28.27874643614883 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 75, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15204824897270517069&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Backpropagation through Combinatorial Algorithms: Identity with Projection Works", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10752", "id": "JZMR727O29", "poster": "/media/PosterPDFs/ICLR%202023/10752.png?t=1682766043.2593095", "openreview": "https://openreview.net/forum?id=JZMR727O29", "slides": "https://iclr.cc/virtual/2023/poster/10752", "video": "https://iclr.cc/virtual/2023/poster/10752", "author_site": "Subham Sahoo, Anselm Paulus, Marin Vlastelica Pogan\u010di\u0107, V\u00edt Musil, Volodymyr Kuleshov, Georg Martius", "tldr": "We propose a simple alternative for differentiating through combinatorial solvers with linear objectives, that is on par with SoTA, has no hyperparameters, and is more robust to perturbations.", "abstract": "Embedding discrete solvers as differentiable layers has given modern deep learning architectures combinatorial expressivity and discrete reasoning capabilities. The derivative of these solvers is zero or undefined, therefore a meaningful replacement is crucial for effective gradient-based learning. Prior works rely on smoothing the solver with input perturbations, relaxing the solver to continuous problems, or interpolating the loss landscape with techniques that typically require additional solver calls, introduce extra hyper-parameters, or compromise performance. We propose a principled approach to exploit the geometry of the discrete solution space to treat the solver as a negative identity on the backward pass and further provide a theoretical justification. Our experiments demonstrate that such a straightforward hyper-parameter-free approach is able to compete with previous more complex methods on numerous experiments such as backpropagation through discrete samplers, deep graph matching, and image retrieval. Furthermore, we substitute the previously proposed problem-specific and label-dependent margin with a generic regularization procedure that prevents cost collapse and increases robustness.", "keywords": "combinatorial optimization;deep learning;representation learning;gradient descent;backpropagation;argmin differentiation;deep graph matching;retrieval", "primary_area": "", "supplementary_material": "/attachment/f58bb43cbfc0fa7d91544abb709ae58698448eb2.zip", "author": "Subham Sekhar Sahoo;Anselm Paulus;Marin Vlastelica;V\u00edt Musil;Volodymyr Kuleshov;Georg Martius", "authorids": "~Subham_Sekhar_Sahoo1;~Anselm_Paulus1;~Marin_Vlastelica1;~V\u00edt_Musil1;~Volodymyr_Kuleshov1;~Georg_Martius1", "gender": "M;;M;;M;M", "homepage": ";;http://vejtek.matfyz.cz/;https://www.cs.cornell.edu/~kuleshov/;https://uni-tuebingen.de/de/264672;https://jimimvp.github.io/", "dblp": ";255/5245;255/4994;81/8612;47/2706;226/9727", "google_scholar": "Z7DoDbAAAAAJ;njZL5CQAAAAJ;https://scholar.google.cz/citations?user=hA1rlU4AAAAJ;RY_t8XAAAAAJ;https://scholar.google.de/citations?user=b-JF-UIAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-6083-227X;;;0000-0002-2959-4119", "linkedin": "shakeh3r/;;;;;mvlastelica/", "or_profile": "~Subham_Sekhar_Sahoo1;~Anselm_Paulus1;~V\u00edt_Musil1;~Volodymyr_Kuleshov1;~Georg_Martius1;~Marin_Vlastelica_Pogan\u010di\u01071", "aff": "Department of Computer Science, Cornell University;Facebook AI Research;Masaryk University, Brno;Cornell University;Max Planck Institute for Intelligent Systems;Max Planck Institute for Intelligent Systems, Max-Planck Institute", "aff_domain": "cs.cornell.edu;meta.com;muni.cz;cornell.edu;tuebingen.mpg.de;tuebingen.mpg.de", "position": "PhD student;Intern;Researcher;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nsahoo2023backpropagation,\ntitle={Backpropagation through Combinatorial Algorithms: Identity with Projection Works},\nauthor={Subham Sekhar Sahoo and Anselm Paulus and Marin Vlastelica and V{\\'\\i}t Musil and Volodymyr Kuleshov and Georg Martius},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JZMR727O29}\n}", "github": "", "project": "", "reviewers": "NUvG;MfHU;bT3d;cnD6", "pdf_size": 813640, "recommendation": "3;6;6;8", "confidence": "3;4;3;4", "correctness": "4;4;3;4", "technical_novelty": "2;4;3;3", "empirical_novelty": "2;0;3;3", "wc_summary_paper": "133;27;101;86", "wc_strength_and_weaknesses": "140;219;391;205", "wc_clarity_quality_novelty_and_reproducibility": "239;38;17;39", "wc_summary_review": "263;48;23;20", "wc_review": "775;332;532;350", "wc_reply_reviewers": "0;14;32;34", "wc_reply_authors": "1194;773;529;376", "reply_reviewers": "0;1;1;1", "reply_authors": "3;2;1;1", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 86.75, 38.447204059593204 ], "wc_strength_and_weaknesses_avg": [ 238.75, 92.81803434677983 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.25, 90.35035971151416 ], "wc_summary_review_avg": [ 88.5, 101.33237389896676 ], "wc_review_avg": [ 497.25, 178.42557972443302 ], "wc_reply_reviewers_avg": [ 20.0, 13.92838827718412 ], "wc_reply_authors_avg": [ 718.0, 309.14640544570466 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.7001400420140049, "corr_recommendation_correctness": -0.08084520834544431, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3635316784193466403&as_sdt=5,40&sciodt=0,40&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=JZMR727O29", "email": "cs.cornell.edu;meta.com;muni.cz;cornell.edu;tuebingen.mpg.de;tuebingen.mpg.de", "author_num": 6, "aff_unique_index": "0;1;2;0;3;3", "aff_unique_norm": "Cornell University;Meta;Masaryk University;Max Planck Institute for Intelligent Systems", "aff_unique_dep": "Department of Computer Science;Facebook AI Research;;Intelligent Systems", "aff_unique_url": "https://www.cornell.edu;https://research.facebook.com;https://www.muni.cz;https://www.mpi-is.mpg.de", "aff_unique_abbr": "Cornell;FAIR;MU;MPI-IS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Brno", "aff_country_unique_index": "0;0;1;0;2;2", "aff_country_unique": "United States;Czech Republic;Germany" }, { "id": "JZRBSoJv7lb", "title": "Similarity of Neural Architectures Based on Input Gradient Transferability", "track": "main", "status": "Reject", "tldr": "We propose a similarity score between neural networks. We provide analyses on 69 neural architectures using the proposed score.", "abstract": "In this paper, we aim to design a quantitative similarity function between two neural architectures. Specifically, we define a model similarity using input gradient transferability. We generate adversarial samples of two networks and measure the average accuracy of the networks on adversarial samples of each other. If two networks are highly correlated, then the attack transferability will be high, resulting in high similarity. Using the similarity score, we investigate two topics: (1) Which network component contributes to the model diversity? (2) How does model diversity affect practical scenarios? We answer the first question by providing feature importance analysis and clustering analysis. The second question is validated by two different scenarios: model ensemble and knowledge distillation. Our findings show that model diversity takes a key role when interacting with different neural architectures. For example, we found that more diversity leads to better ensemble performance. We also observe that the relationship between teacher and student networks and distillation performance depends on the choice of the base architecture of the teacher and student networks. We expect our analysis tool helps a high-level understanding of differences between various neural architectures as well as practical guidance when using multiple architectures.", "keywords": "neural architecture similarity;model similarity;model diversity;model ensemble;knowledge distillation", "primary_area": "", "supplementary_material": "", "author": "Jaehui Hwang;Dongyoon Han;Byeongho Heo;Song Park;Sanghyuk Chun;Jong-Seok Lee", "authorids": "~Jaehui_Hwang1;~Dongyoon_Han1;~Byeongho_Heo1;~Song_Park1;~Sanghyuk_Chun1;~Jong-Seok_Lee1", "gender": "F;M;M;F;M;", "homepage": "https://j-h-hwang.github.io/;https://dongyoonhan.github.io/;https://sites.google.com/view/byeongho-heo/home;https://8uos.github.io;https://sanghyukchun.github.io/home/;http://mcml.yonsei.ac.kr", "dblp": "234/1038;151/8876;142/2705;;213/1095.html;70/1152", "google_scholar": "https://scholar.google.co.kr/citations?hl=ko;jcP7m1QAAAAJ;https://scholar.google.co.kr/citations?user=4_7rLDIAAAAJ;https://scholar.google.co.kr/citations?user=VR1c0H8AAAAJ;https://scholar.google.co.kr/citations?user=4_uj0xcAAAAJ;YGwwt6cAAAAJ", "orcid": ";0000-0002-9130-8195;;;0000-0002-4533-2610;", "linkedin": "jaehui-hwang-1b2a51194/;https://linkedin.com/in/dongyoon-han-04961a120/en;byeongho-heo-1a7756122/;;https://kr.linkedin.com/in/sanghyukchun/en;", "or_profile": "~Jaehui_Hwang1;~Dongyoon_Han1;~Byeongho_Heo1;~Song_Park1;~Sanghyuk_Chun1;~Jong-Seok_Lee1", "aff": "Yonsei University;NAVER;NAVER AI Lab;NAVER;NAVER AI Lab;Yonsei University", "aff_domain": "yonsei.ac.kr;navercorp.com;navercorp.com;navercorp.com;navercorp.com;yonsei.ac.kr", "position": "PhD student;Research Scientist;Researcher;Researcher;Lead research scientist;Full Professor", "bibtex": "@misc{\nhwang2023similarity,\ntitle={Similarity of Neural Architectures Based on Input Gradient Transferability},\nauthor={Jaehui Hwang and Dongyoon Han and Byeongho Heo and Song Park and Sanghyuk Chun and Jong-Seok Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=JZRBSoJv7lb}\n}", "github": "", "project": "", "reviewers": "Bk5e;yYKW;kjGm;UduE;ynKk", "site": "https://openreview.net/forum?id=JZRBSoJv7lb", "pdf_size": 1409288, "recommendation": "1;3;5;6;8", "confidence": "4;4;4;4;4", "correctness": "1;1;3;3;4", "technical_novelty": "1;2;2;3;3", "empirical_novelty": "1;3;3;3;3", "wc_summary_paper": "50;89;48;56;70", "wc_strength_and_weaknesses": "87;214;431;135;101", "wc_clarity_quality_novelty_and_reproducibility": "72;99;35;16;79", "wc_summary_review": "9;80;50;44;36", "wc_review": "218;482;564;251;286", "wc_reply_reviewers": "0;0;206;0;84", "wc_reply_authors": "1221;1030;3900;987;689", "reply_reviewers": "0;0;1;0;1", "reply_authors": "4;4;8;4;3", "recommendation_avg": [ 4.6, 2.4166091947189146 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.4, 1.2 ], "technical_novelty_avg": [ 2.2, 0.7483314773547882 ], "empirical_novelty_avg": [ 2.6, 0.8000000000000002 ], "wc_summary_paper_avg": [ 62.6, 15.278743403827423 ], "wc_strength_and_weaknesses_avg": [ 193.6, 126.60742474278513 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.2, 30.287951399855356 ], "wc_summary_review_avg": [ 43.8, 22.894540834006698 ], "wc_review_avg": [ 360.2, 137.12826112804026 ], "wc_reply_reviewers_avg": [ 58.0, 80.83563570604241 ], "wc_reply_authors_avg": [ 1565.4, 1179.6817536946141 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 4.6, 1.7435595774162693 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9517467718927232, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14387170132209722262&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;1;0", "aff_unique_norm": "Yonsei University;NAVER Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.yonsei.ac.kr;https://www.naver.com", "aff_unique_abbr": "Yonsei;NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Consolidator: Mergable Adapter with Group Connections for Visual Adaptation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11225", "id": "J_Cja7cpgW", "poster": "/media/PosterPDFs/ICLR%202023/11225.png?t=1683048404.2523856", "openreview": "https://openreview.net/forum?id=J_Cja7cpgW", "slides": "https://iclr.cc/virtual/2023/poster/11225", "video": "https://iclr.cc/virtual/2023/poster/11225", "author_site": "Tianxiang Hao, Hui Chen, Yuchen Guo, Guiguang Ding", "tldr": "We propose a module named consolidator to achieve both parameter- and inference-efficient transfer learning for vision transformers", "abstract": "Recently, transformers have shown strong ability as visual feature extractors, surpassing traditional convolution-based models in various scenarios. However, the success of vision transformers largely owes to their capacity to accommodate numerous parameters. As a result, new challenges for adapting a well-trained transformer to downstream tasks arise. On the one hand, classic fine-tuning tunes all parameters in a huge model for every downstream task and thus easily falls into an overfitting situation, leading to inferior performance. On the other hand, on resource-limited devices, fine-tuning stores a full copy of all parameters and thus is usually impracticable for the shortage of storage space. However, few works have focused on how to efficiently and effectively transfer knowledge in a vision transformer. Existing methods did not dive into the properties of visual features, leading to inferior performance. Moreover, some of them bring heavy inference cost though benefiting storage. To tackle these problems, we propose consolidator to achieve efficient transfer learning for large vision models. Our consolidator modifies the pre-trained model with the addition of a small set of tunable parameters to temporarily store the task-specific knowledge while freezing the backbone model during adaptation. Motivated by the success of group-wise convolution, we adopt grouped connections across the features extracted by fully connected layers to construct tunable parts in a consolidator. To further enhance the model's capacity to transfer knowledge under a constrained storage budget and keep inference efficient, we consolidate the parameters in two stages: 1. between adaptation and storage, and 2. between loading and inference. On a series of downstream visual tasks, our consolidator can reach up to 7.56 better accuracy than full fine-tuning with merely 0.35% parameters, and outperform state-of-the-art parameter-efficient tuning methods by a clear margin. Code is available at github.", "keywords": "Efficient Transfer Learning;Groups Connections;Vision Transformer", "primary_area": "", "supplementary_material": "", "author": "Tianxiang Hao;Hui Chen;Yuchen Guo;Guiguang Ding", "authorids": "~Tianxiang_Hao1;~Hui_Chen7;~Yuchen_Guo1;~Guiguang_Ding1", "gender": "M;M;M;M", "homepage": ";https://huichen24.github.io/;;http://ise.thss.tsinghua.edu.cn/MIG/dgg.html", "dblp": "270/0611;;;51/740", "google_scholar": "36f-FRkAAAAJ;erpvWcIAAAAJ;PNMUgAoAAAAJ;https://scholar.google.com.tw/citations?user=B7F3yt4AAAAJ", "orcid": ";0000-0003-4180-5801;;0000-0003-0137-9975", "linkedin": ";;;", "or_profile": "~Tianxiang_Hao1;~Hui_Chen7;~Yuchen_Guo1;~Guiguang_Ding1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Postdoc;Researcher;Full Professor", "bibtex": "@inproceedings{\nhao2023consolidator,\ntitle={Consolidator: Mergable Adapter with Group Connections for Visual Adaptation},\nauthor={Tianxiang Hao and Hui Chen and Yuchen Guo and Guiguang Ding},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=J_Cja7cpgW}\n}", "github": "", "project": "", "reviewers": "2eph;zLNL;HhUN;ULMu", "pdf_size": 333295, "recommendation": "5;5;5;8", "confidence": "4;4;4;4", "correctness": "4;4;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "38;53;41;70", "wc_strength_and_weaknesses": "183;170;130;331", "wc_clarity_quality_novelty_and_reproducibility": "30;4;11;35", "wc_summary_review": "2;4;20;45", "wc_review": "253;231;202;481", "wc_reply_reviewers": "0;0;0;36", "wc_reply_authors": "809;827;738;1243", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;2;3", "recommendation_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 50.5, 12.579745625409124 ], "wc_strength_and_weaknesses_avg": [ 203.5, 76.15937237136346 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 20.0, 12.864680330268607 ], "wc_summary_review_avg": [ 17.75, 17.210098779495716 ], "wc_review_avg": [ 291.75, 110.75056433264798 ], "wc_reply_reviewers_avg": [ 9.0, 15.588457268119896 ], "wc_reply_authors_avg": [ 904.25, 198.3877201340849 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=421015673026970114&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=J_Cja7cpgW", "email": "tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "J_kUIC1DNHJ", "title": "Dynamic Loss for Learning with Label Noise", "track": "main", "status": "Withdraw", "tldr": "To handle the mismatch between the statics of robust loss functions and the dynamics of DNNs learning with label noise, we propose a dynamic loss function which improves robustness gradually.", "abstract": "Label noise is verified seriously harmful to deep neural networks (DNNs). A simple and scalable strategy to handle this problem is to design robust loss functions, which improve generalization in the presence of label noise by reconciling fitting ability with robustness. However, the widely-used static trade-off between the two contradicts the dynamics of DNNs learning with label noise, leading to an inferior performance. Therefore, in this paper, we propose a dynamic loss function to solve this problem. Specifically, DNNs tend to first learn generalized patterns, then gradually overfit label noise. In light of this, we make fitting ability stronger initially, then gradually increase the weight of robustness. Moreover, we let DNNs put more emphasis on easy examples than hard ones at the later stage since the former are correctly labeled with a higher probability, further reducing the negative impact of label noise. Extensive experimental results on various benchmark datasets demonstrate the state-of-the-art performance of our method. We will open-source our code very soon.", "keywords": "label noise;robust loss function;dynamic", "primary_area": "", "supplementary_material": "", "author": "Xiu-Chuan Li;Xiaobo Xia;Fei Zhu;Tongliang Liu;Xu-yao Zhang;Cheng-lin Liu", "authorids": "~Xiu-Chuan_Li1;~Xiaobo_Xia1;~Fei_Zhu1;~Tongliang_Liu1;~Xu-yao_Zhang1;~Cheng-lin_Liu1", "gender": ";M;M;M;;M", "homepage": "https://xiuchuanli.github.io/;https://xiaoboxia.github.io/;http://www.nlpr.ia.ac.cn/pal/People/ZhuFei.html;https://tongliang-liu.github.io/;;http://www.nlpr.ia.ac.cn/liucl/", "dblp": "291/8244.html;242/8072;;150/6667;;24/3006-1.html", "google_scholar": ";jRsugY0AAAAJ;fjZ1CBwAAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;;8r3y8IMAAAAJ", "orcid": ";;;;;0000-0002-6743-4175", "linkedin": ";;;;;", "or_profile": "~Xiu-Chuan_Li1;~Xiaobo_Xia1;~Fei_Zhu1;~Tongliang_Liu1;~Xu-yao_Zhang1;~Cheng-lin_Liu1", "aff": "Institute of Automation, Chinese Academy of Sciences;The University of Sydney;Institute of Automation, Chinese Academy of Sciences;University of Sydney;;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;sydney.edu.au;ia.ac.cn;sydney.edu.au;;ia.ac.cn", "position": "MS student;PhD student;PhD student;Lecturer;;Full Professor", "bibtex": "@misc{\nli2023dynamic,\ntitle={Dynamic Loss for Learning with Label Noise},\nauthor={Xiu-Chuan Li and Xiaobo Xia and Fei Zhu and Tongliang Liu and Xu-yao Zhang and Cheng-lin Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=J_kUIC1DNHJ}\n}", "github": "", "project": "", "reviewers": "6KEi;Prtn;VADj;NyiR", "site": "https://openreview.net/forum?id=J_kUIC1DNHJ", "pdf_size": 410645, "recommendation": "1;3;3;6", "confidence": "5;4;4;5", "correctness": "2;2;3;3", "technical_novelty": "1;2;2;2", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "23;38;124;59", "wc_strength_and_weaknesses": "315;341;311;90", "wc_clarity_quality_novelty_and_reproducibility": "31;26;4;281", "wc_summary_review": "20;16;45;55", "wc_review": "389;421;484;485", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.25, 1.7853571071357126 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 61.0, 38.555155297314 ], "wc_strength_and_weaknesses_avg": [ 264.25, 101.26049328341236 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 85.5, 113.32806360297523 ], "wc_summary_review_avg": [ 34.0, 16.446884203398525 ], "wc_review_avg": [ 444.75, 41.33022501753408 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.14002800840280097, "corr_recommendation_correctness": 0.7001400420140049, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2512518862101204295&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Sydney", "aff_unique_dep": "Institute of Automation;", "aff_unique_url": "http://www.ia.cas.cn;https://www.sydney.edu.au", "aff_unique_abbr": "CAS;USYD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "China;Australia" }, { "title": "A Time Series is Worth 64 Words: Long-term Forecasting with Transformers", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10876", "id": "Jbdc0vTOcol", "poster": "/media/PosterPDFs/ICLR%202023/10876.png?t=1681610030.284921", "openreview": "https://openreview.net/forum?id=Jbdc0vTOcol", "slides": "https://iclr.cc/virtual/2023/poster/10876", "video": "https://iclr.cc/virtual/2023/poster/10876", "author_site": "Yuqi Nie, Nam Nguyen, Phanwadee Sinthong, Jayant Kalagnanam", "tldr": "Channel-independent patch time series transformer works very well for long-term forecasting and representation learning.", "abstract": "We propose an efficient design of Transformer-based models for multivariate time series forecasting and self-supervised representation learning. It is based on two key components: (i) segmentation of time series into subseries-level patches which are served as input tokens to Transformer; (ii) channel-independence where each channel contains a single univariate time series that shares the same embedding and Transformer weights across all the series. Patching design naturally has three-fold benefit: local semantic information is retained in the embedding; computation and memory usage of the attention maps are quadratically reduced given the same look-back window; and the model can attend longer history. Our channel-independent patch time series Transformer (PatchTST) can improve the long-term forecasting accuracy significantly when compared with that of SOTA Transformer-based models. We also apply our model to self-supervised pre-training tasks and attain excellent fine-tuning performance, which outperforms supervised training on large datasets. Transferring of masked pre-training performed on one dataset to other datasets also produces SOTA forecasting accuracy.", "keywords": "time series;transformer;forecasting;channel-independence;self-supervised learning;representation learning", "primary_area": "", "supplementary_material": "", "author": "Yuqi Nie;Nam H Nguyen;Phanwadee Sinthong;Jayant Kalagnanam", "authorids": "~Yuqi_Nie1;~Nam_H_Nguyen1;~Phanwadee_Sinthong1;~Jayant_Kalagnanam1", "gender": ";M;;M", "homepage": ";;;https://researcher.watson.ibm.com/researcher/view.php?person=us-jayant", "dblp": ";76/2975;217/4824.html;05/4351", "google_scholar": "GBS7CdkAAAAJ;zzBcUpEAAAAJ;;dlytHK4AAAAJ", "orcid": ";;;", "linkedin": "yuqi-nie-890aa01ba/;;;jay-ant-kalagnanam-4b74913/", "or_profile": "~Yuqi_Nie1;~Nam_H_Nguyen1;~Phanwadee_Sinthong1;~Jayant_Kalagnanam1", "aff": "Princeton University;International Business Machines;IBM, International Business Machines;IBM TJ Watson Research Center", "aff_domain": "princeton.edu;ibm.com;us.ibm.com;researcher.watson.ibm.com", "position": "PhD student;Senior Research Scientist;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nnie2023a,\ntitle={A Time Series is Worth 64 Words: Long-term Forecasting with Transformers},\nauthor={Yuqi Nie and Nam H Nguyen and Phanwadee Sinthong and Jayant Kalagnanam},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Jbdc0vTOcol}\n}", "github": "", "project": "", "reviewers": "3Fzy;zBqz;FEPh;2c5h", "pdf_size": 3945903, "recommendation": "5;5;6;8", "confidence": "4;4;3;4", "correctness": "3;3;3;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "71;63;82;52", "wc_strength_and_weaknesses": "109;287;87;222", "wc_clarity_quality_novelty_and_reproducibility": "12;8;101;67", "wc_summary_review": "38;23;230;62", "wc_review": "230;381;500;403", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "828;1565;1149;786", "reply_reviewers": "0;0;0;0", "reply_authors": "2;3;2;2", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 67.0, 10.977249200050075 ], "wc_strength_and_weaknesses_avg": [ 176.25, 81.92488938045629 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.0, 38.92942331964346 ], "wc_summary_review_avg": [ 88.25, 83.01317666491266 ], "wc_review_avg": [ 378.5, 96.72254132310627 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1082.0, 312.2138690064873 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9428090415820632, "gs_citation": 1749, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7311177819928402035&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=Jbdc0vTOcol", "email": "princeton.edu;ibm.com;us.ibm.com;researcher.watson.ibm.com", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Princeton University;International Business Machines Corporation;International Business Machines;IBM", "aff_unique_dep": ";;;Research Center", "aff_unique_url": "https://www.princeton.edu;https://www.ibm.com;https://www.ibm.com;https://www.ibm.com/research", "aff_unique_abbr": "Princeton;IBM;IBM;IBM", "aff_campus_unique_index": "1", "aff_campus_unique": ";TJ Watson", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Jbfd7BpQaa-", "title": "Balance is Essence: Accelerating Sparse Training via Adaptive Gradient Correction", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Despite impressive performance on a wide variety of tasks, deep neural networks require significant memory and computation costs, which prohibits their application in resource-constrained scenarios. Sparse training is one of the most common techniques to reduce these costs, however, the sparsity constraints add difficulty to the optimization, resulting in an increase in training time and instability. In this work, we aim to overcome this problem and achieve space-time co-efficiency. To accelerate and stabilize the convergence of sparse training, we analyze the gradient changes and develop an adaptive gradient correction method. Specifically, we approximate the correlation between the current and previous gradients, which is used to balance the two gradients to obtain a corrected gradient. Our method can be used with most popular sparse training pipelines under both standard and adversarial setups. Theoretically, we prove that our method can accelerate the convergence rate of sparse training. Extensive experiments on multiple datasets, model architectures, and sparsities demonstrate that our method outperforms leading sparse training methods by up to \\textbf{5.0\\%} in accuracy given the same number of training epochs, and reduces the number of training epochs by up to \\textbf{52.1\\%} to achieve the same accuracy.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/4bda6b5c78d95d838b0cfe812ab91c19c4e8cdce.zip", "author": "Bowen Lei;Dongkuan Xu;Ruqi Zhang;Shuren He;Bani Mallick", "authorids": "~Bowen_Lei1;~Dongkuan_Xu2;~Ruqi_Zhang1;dri.tea@tamu.edu;~Bani_Mallick1", "gender": "M;M;F;;M", "homepage": "https://stevenboys.github.io/;https://dongkuanx27.github.io/;https://ruqizhang.github.io/;;https://artsci.tamu.edu/statistics/contact/profiles/bani-mallick.html", "dblp": "334/7726.html;142/8139;;;", "google_scholar": "xF9ZTgYAAAAJ;https://scholar.google.com/citations?hl=en;4ojpmc8AAAAJ;;T8grPNsAAAAJ", "orcid": "0000-0001-7141-7485;0000-0002-1456-9658;;;", "linkedin": "bowen-lei-9ba238192/;dongkuan-dk-xu-%F0%9F%87%BA%F0%9F%87%A6-05038087/;;;", "or_profile": "~Bowen_Lei1;~Dongkuan_Xu2;~Ruqi_Zhang1;dri.tea@tamu.edu;~Bani_Mallick1", "aff": "Texas A&M University - College Station;North Carolina State University;Purdue University;;Texas A&M", "aff_domain": "tamu.edu;ncsu.edu;purdue.edu;;stat.tamu.edu", "position": "PhD student;Assistant Professor;Assistant Professor;;Full Professor", "bibtex": "@misc{\nlei2023balance,\ntitle={Balance is Essence: Accelerating Sparse Training via Adaptive Gradient Correction},\nauthor={Bowen Lei and Dongkuan Xu and Ruqi Zhang and Shuren He and Bani Mallick},\nyear={2023},\nurl={https://openreview.net/forum?id=Jbfd7BpQaa-}\n}", "github": "", "project": "", "reviewers": "UoMt;vxdp;TmTX;d7Lx", "site": "https://openreview.net/forum?id=Jbfd7BpQaa-", "pdf_size": 1303210, "recommendation": "3;5;6;6", "confidence": "4;3;4;3", "correctness": "2;2;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "176;84;28;31", "wc_strength_and_weaknesses": "801;350;207;263", "wc_clarity_quality_novelty_and_reproducibility": "96;113;4;19", "wc_summary_review": "129;36;34;54", "wc_review": "1202;583;273;367", "wc_reply_reviewers": "596;0;244;60", "wc_reply_authors": "2065;814;1840;593", "reply_reviewers": "3;0;1;2", "reply_authors": "4;2;3;3", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 79.75, 59.868084151741485 ], "wc_strength_and_weaknesses_avg": [ 405.25, 234.0986704362073 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 58.0, 47.185802949616104 ], "wc_summary_review_avg": [ 63.25, 38.75161286966002 ], "wc_review_avg": [ 606.25, 361.85451150980555 ], "wc_reply_reviewers_avg": [ 225.0, 232.29937580630732 ], "wc_reply_authors_avg": [ 1328.0, 634.3764655155486 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.40824829046386296, "corr_recommendation_correctness": 0.8164965809277259, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8998522336797744903&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Texas A&M University;North Carolina State University;Purdue University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tamu.edu;https://www.ncsu.edu;https://www.purdue.edu", "aff_unique_abbr": "TAMU;NCSU;Purdue", "aff_campus_unique_index": "0", "aff_campus_unique": "College Station;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "MoDem: Accelerating Visual Model-Based Reinforcement Learning with Demonstrations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11332", "id": "JdTnc9gjVfJ", "poster": "/media/PosterPDFs/ICLR%202023/11332.png?t=1681154066.6673968", "openreview": "https://openreview.net/forum?id=JdTnc9gjVfJ", "slides": "https://iclr.cc/virtual/2023/poster/11332", "video": "https://iclr.cc/virtual/2023/poster/11332", "author_site": "Nicklas Hansen, Yixin Lin, Hao Su, Xiaolong Wang, Vikash Kumar, Aravind Rajeswaran", "tldr": "We find that leveraging just a handful of demonstrations can dramatically improve the sample-efficiency of model-based RL, but requires three key ingredients: policy pretraining, targeted exploration, and oversampling of demonstration data.", "abstract": "Poor sample efficiency continues to be the primary challenge for deployment of deep Reinforcement Learning (RL) algorithms for real-world applications, and in particular for visuo-motor control. Model-based RL has the potential to be highly sample efficient by concurrently learning a world model and using synthetic rollouts for planning and policy improvement. However, in practice, sample-efficient learning with model-based RL is bottlenecked by the exploration challenge. In this work, we find that leveraging just a handful of demonstrations can dramatically improve the sample-efficiency of model-based RL. Simply appending demonstrations to the interaction dataset, however, does not suffice. We identify key ingredients for leveraging demonstrations in model learning -- policy pretraining, targeted exploration, and oversampling of demonstration data -- which forms the three phases of our model-based RL framework. We empirically study three complex visuo-motor control domains and find that our method is 160%-250% more successful in completing sparse reward tasks compared to prior approaches in the low data regime (100k interaction steps, 5 demonstrations). Code and videos are available at https://nicklashansen.github.io/modemrl.", "keywords": "model-based reinforcement learning;visual reinforcement learning;learning from demonstrations", "primary_area": "", "supplementary_material": "", "author": "Nicklas Hansen;Yixin Lin;Hao Su;Xiaolong Wang;Vikash Kumar;Aravind Rajeswaran", "authorids": "~Nicklas_Hansen1;~Yixin_Lin1;~Hao_Su1;~Xiaolong_Wang3;~Vikash_Kumar2;~Aravind_Rajeswaran1", "gender": "Non-Binary;M;M;M;M;M", "homepage": "https://nicklashansen.github.io;https://yixinlin.net;http://ai.ucsd.edu/~haosu;https://xiaolonw.github.io/;http://vikashplus.github.io/;http://aravindr93.github.io/", "dblp": "258/0744.html;236/9891;09/4945-1;91/952-4;82/7475;164/5778", "google_scholar": "OFtDgzwAAAAJ;;1P8Zu04AAAAJ;Y8O9N_0AAAAJ;nu3W--sAAAAJ;_EJrRVAAAAAJ", "orcid": "0000-0001-9897-4003;;;;;", "linkedin": "ncklas;;;;;", "or_profile": "~Nicklas_Hansen1;~Yixin_Lin1;~Hao_Su1;~Xiaolong_Wang3;~Vikash_Kumar2;~Aravind_Rajeswaran1", "aff": "University of California, San Diego;Facebook AI Research;University of California, San Diego;University of California, San Diego;Meta Facebook;Meta Facebook", "aff_domain": "ucsd.edu;facebook.com;ucsd.edu;ucsd.edu;facebook.com;meta.com", "position": "PhD student;Research engineer;Assistant Professor;Assistant Professor;Researcher;Research Scientist", "bibtex": "@inproceedings{\nhansen2023modem,\ntitle={MoDem: Accelerating Visual Model-Based Reinforcement Learning with Demonstrations},\nauthor={Nicklas Hansen and Yixin Lin and Hao Su and Xiaolong Wang and Vikash Kumar and Aravind Rajeswaran},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JdTnc9gjVfJ}\n}", "github": "", "project": "", "reviewers": "FmiB;9fcC;6dVv;Ycvo", "pdf_size": 2937243, "recommendation": "6;6;6;8", "confidence": "4;3;4;3", "correctness": "4;3;4;4", "technical_novelty": "2;1;2;3", "empirical_novelty": "3;2;3;4", "wc_summary_paper": "116;57;120;46", "wc_strength_and_weaknesses": "190;338;83;93", "wc_clarity_quality_novelty_and_reproducibility": "130;66;47;15", "wc_summary_review": "64;41;16;27", "wc_review": "500;502;266;181", "wc_reply_reviewers": "4;46;0;0", "wc_reply_authors": "398;618;331;252", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 84.75, 33.50652921446804 ], "wc_strength_and_weaknesses_avg": [ 176.0, 102.44266689226711 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.5, 41.97916149710473 ], "wc_summary_review_avg": [ 37.0, 17.930421077041107 ], "wc_review_avg": [ 362.25, 141.96896667934158 ], "wc_reply_reviewers_avg": [ 12.5, 19.41004894378167 ], "wc_reply_authors_avg": [ 399.75, 136.19173065939063 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=665293981515921733&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=JdTnc9gjVfJ", "email": "ucsd.edu;facebook.com;ucsd.edu;ucsd.edu;facebook.com;meta.com", "author_num": 6, "aff_unique_index": "0;1;0;0;1;1", "aff_unique_norm": "University of California, San Diego;Meta", "aff_unique_dep": ";Facebook AI Research", "aff_unique_url": "https://www.ucsd.edu;https://research.facebook.com", "aff_unique_abbr": "UCSD;FAIR", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Logical Entity Representation in Knowledge-Graphs for Differentiable Rule Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12221", "id": "JdgO-ht1uTN", "poster": "/media/PosterPDFs/ICLR%202023/12221.png?t=1680921570.822693", "openreview": "https://openreview.net/forum?id=JdgO-ht1uTN", "slides": "https://iclr.cc/virtual/2023/poster/12221", "video": "https://iclr.cc/virtual/2023/poster/12221", "author_site": "Chi Han, Qizheng He, Charles Yu, Xinya Du, Hanghang Tong, Heng Ji", "tldr": "We propose logical entity representation (LERP) to incorporate contextual information of entities into logical rule learning.", "abstract": "Probabilistic logical rule learning has shown great strength in logical rule mining and knowledge graph completion. It learns logical rules to predict missing edges by reasoning on existing edges in the knowledge graph. However, previous efforts have largely been limited to only modeling chain-like Horn clauses such as R1(x; z) ^ R2(z; y) ) H(x; y). This formulation overlooks additional contextual information from neighboring sub-graphs of entity variables x, y and z. Intuitively, there is a large gap here, as local sub-graphs have been found to provide important information for knowledge graph completion. Inspired by these observations, we propose Logical Entity RePresentation (LERP) to encode contextual information of entities in the knowledge graph. A LERP is designed as a vector of probabilistic logical functions on the entity\u2019s neighboring sub-graph. It is an interpretable representation while allowing for differentiable optimization. We can then incorporate LERP into probabilistic logical rule learning to learn more expressive rules. Empirical results demonstrate that with LERP, our model outperforms other rule learning methods in knowledge graph completion and is comparable or even superior to state-of-the-art black-box methods. Moreover, we find that our model can discover a more expressive family of logical rules. LERP can also be further combined with embedding learning methods like TransE to make it more interpretable.", "keywords": "Probabilistic Logical Rule Learning;Knowledge Graph Completion;Logical Representation Learning", "primary_area": "", "supplementary_material": "/attachment/b3b03f5aebd95011424905631af65b1576f8a3f4.zip", "author": "Chi Han;Qizheng He;Charles Yu;Xinya Du;Hanghang Tong;Heng Ji", "authorids": "~Chi_Han1;~Qizheng_He1;~Charles_Yu1;~Xinya_Du1;~Hanghang_Tong3;~Heng_Ji3", "gender": "M;;;M;;F", "homepage": "https://glaciohound.github.io;;;https://xinyadu.github.io;http://tonghanghang.org;http://blender.cs.illinois.edu/hengji.html", "dblp": "255/6993;;265/1258.html;200/8114;58/1757;", "google_scholar": "https://scholar.google.com.sg/citations?user=DcSvbuAAAAAJ;s61PLLkAAAAJ;;R-lKQqkAAAAJ;RaINcuUAAAAJ;z7GCqT4AAAAJ", "orcid": "0000-0001-6235-5841;;;;0000-0003-4405-3887;", "linkedin": "chi-han-b01a93141/;;charles-t-yu/;;htong/;", "or_profile": "~Chi_Han1;~Qizheng_He1;~Charles_Yu1;~Xinya_Du1;~Hanghang_Tong3;~Heng_Ji3", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Texas at Dallas;University of Illinois, Urbana Champaign;University of Illinois, Urbana-Champaign", "aff_domain": "illinois.edu;illinois.edu;illinois.edu;utdallas.edu;illinois.edu;uiuc.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nhan2023logical,\ntitle={Logical Entity Representation in Knowledge-Graphs for Differentiable Rule Learning},\nauthor={Chi Han and Qizheng He and Charles Yu and Xinya Du and Hanghang Tong and Heng Ji},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JdgO-ht1uTN}\n}", "github": "", "project": "", "reviewers": "F1BJ;dYXp;N1P8;i7dT", "pdf_size": 3045663, "recommendation": "5;5;6;8", "confidence": "4;4;3;3", "correctness": "2;2;3;4", "technical_novelty": "3;3;2;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "37;67;35;45", "wc_strength_and_weaknesses": "159;66;103;240", "wc_clarity_quality_novelty_and_reproducibility": "11;443;8;47", "wc_summary_review": "23;55;233;46", "wc_review": "230;631;379;378", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "951;1331;282;504", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 46.0, 12.68857754044952 ], "wc_strength_and_weaknesses_avg": [ 142.0, 65.55532015023647 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 127.25, 182.94312640818185 ], "wc_summary_review_avg": [ 89.25, 83.81042596240637 ], "wc_review_avg": [ 404.5, 144.13968919072914 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 767.0, 405.0759188102892 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.8164965809277259, "corr_recommendation_correctness": 0.9847319278346618, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7725341232882404725&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=JdgO-ht1uTN", "email": "illinois.edu;illinois.edu;illinois.edu;utdallas.edu;illinois.edu;uiuc.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;0;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Texas at Dallas;University of Illinois", "aff_unique_dep": ";;", "aff_unique_url": "https://illinois.edu;https://www.utdallas.edu;https://illinois.edu", "aff_unique_abbr": "UIUC;UT Dallas;UIUC", "aff_campus_unique_index": "0;0;0;1;0;0", "aff_campus_unique": "Urbana-Champaign;Dallas", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Jdj0fZhswJC", "title": "Convergence is Not Enough: Average-Case Performance of No-Regret Learning Dynamics", "track": "main", "status": "Reject", "tldr": "Beyond convergence, average case metrics rely on regions of attraction to compare the performance of different dynamics in multi-agent games. ", "abstract": "Learning in games involves two main challenges, even in settings in which agents seek to coordinate: convergence to equilibria and selection of good equilibria. Unfortunately, solving the issue of convergence, which is the focus of state-of-the-art models, conveys little information about the quality of the equilibria that are eventually reached, often none at all. In this paper, we study a class of games in which q-replicator (QRD), a widely-studied class of no-regret learning dynamics that include gradient descent, \u201cstandard\u201d replicator, and log-barrier dynamics as special cases, can be shown to converge pointwise to Nash equilibria. This is the starting point for our main task, which is the mathematically challenging problem of performance. In our main contribution, we quantify both conceptually and experimentally the outcome of optimal learning dynamics via average performance metrics, i.e., metrics that couple the regions of attraction with the quality of each attracting point. We provide an exhaustive comparison between gradient descent and \u201cstandard\u201d replicator in a class of games with severe equilibrium selection problems and empirically extend our results to all dynamics in the QRD class. Our results combine tools from machine learning, game theory, and dynamical systems and provide a framework to initiate the systematic comparison of different optimal learning dynamics in arbitrary games.", "keywords": "q-replicator dynamics;potential games;average price of anarchy;learning", "primary_area": "", "supplementary_material": "/attachment/a02c02af7e54873170002ab2726a0d1e864ce557.zip", "author": "Iosif Sakos;Stefanos Leonardos;William Overman;Stelios Andrew Stavroulakis;Ioannis Panageas;Georgios Piliouras", "authorids": "~Iosif_Sakos1;~Stefanos_Leonardos1;~William_Overman1;~Stelios_Andrew_Stavroulakis1;~Ioannis_Panageas1;~Georgios_Piliouras1", "gender": "M;M;;M;;M", "homepage": ";https://stefanosleonardos.com/;https://steliostavroulakis.github.io/;https://panageas.github.io;;https://willoverman.github.io/", "dblp": "271/1082;192/1237;315/4382;139/3829;62/1236;294/4924", "google_scholar": "https://scholar.google.gr/citations?user=69xvSfQAAAAJ;PtiGrVsAAAAJ;PPCdElkAAAAJ;5NiFWuwAAAAJ;;B2XPxEkAAAAJ", "orcid": "0000-0002-1871-9078;;0000-0002-2059-3152;;;", "linkedin": "joseph-sakos-3b3a6a200?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BP9xevRgnRfKhbYYoPyDf3Q%3D%3D;stefanos-leonardos/;https://linkedin.com/in/steliostavroulakis;;;", "or_profile": "~Iosif_Sakos1;~Stefanos_Leonardos1;~Stelios_Andrew_Stavroulakis1;~Ioannis_Panageas1;~Georgios_Piliouras1;~Will_Overman1", "aff": "Singapore University of Technology and Design;King's College London, University of London;University of California, Irvine;Donald Bren School of Information and Computer Sciences, University of California, Irvine;Singapore University of Technology and Design;Stanford University", "aff_domain": "sutd.edu.sg;kcl.ac.uk;uci.edu;ics.uci.edu;sutd.edu.sg;stanford.edu", "position": "PhD student;Lecturer;PhD student;Assistant Professor;Associate Professor;PhD student", "bibtex": "@misc{\nsakos2023convergence,\ntitle={Convergence is Not Enough: Average-Case Performance of No-Regret Learning Dynamics},\nauthor={Iosif Sakos and Stefanos Leonardos and William Overman and Stelios Andrew Stavroulakis and Ioannis Panageas and Georgios Piliouras},\nyear={2023},\nurl={https://openreview.net/forum?id=Jdj0fZhswJC}\n}", "github": "", "project": "", "reviewers": "S7pu;jma7;jh9A", "site": "https://openreview.net/forum?id=Jdj0fZhswJC", "pdf_size": 2872922, "recommendation": "5;5;8", "confidence": "3;2;4", "correctness": "4;4;4", "technical_novelty": "2;2;4", "empirical_novelty": "2;0;2", "wc_summary_paper": "35;21;97", "wc_strength_and_weaknesses": "208;183;220", "wc_clarity_quality_novelty_and_reproducibility": "158;18;4", "wc_summary_review": "38;33;8", "wc_review": "439;255;329", "wc_reply_reviewers": "250;0;0", "wc_reply_authors": "622;344;126", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 51.0, 33.0252428706689 ], "wc_strength_and_weaknesses_avg": [ 203.66666666666666, 15.412837362262524 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.0, 69.53176732017292 ], "wc_summary_review_avg": [ 26.333333333333332, 13.123346456686352 ], "wc_review_avg": [ 341.0, 75.59541432300419 ], "wc_reply_reviewers_avg": [ 83.33333333333333, 117.85113019775793 ], "wc_reply_authors_avg": [ 364.0, 202.98440005741 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.8660254037844387, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16133583257887683427&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;2;0;3", "aff_unique_norm": "Singapore University of Technology and Design;King's College London;University of California, Irvine;Stanford University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sutd.edu.sg;https://www.kcl.ac.uk;https://www.uci.edu;https://www.stanford.edu", "aff_unique_abbr": "SUTD;KCL;UCI;Stanford", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Irvine;Stanford", "aff_country_unique_index": "0;1;2;2;0;2", "aff_country_unique": "Singapore;United Kingdom;United States" }, { "id": "Jg-oXkENo2p", "title": "Re-calibrated Wasserstein GAN for large-scale imputation with informative missing", "track": "main", "status": "Withdraw", "tldr": "We develop a novel method for imputing missing data in large scale health records using a Wasserstein GAN whose loss function is reweighted by missingness probability estimates", "abstract": "Missing data are pervasive in electronic health records (EHR) and oftentimes the missingness is informative (i.e. Missing Not At Random). Presently available imputation methods typically do not account for this informative missingness or are computationally infeasible to handle the scale of EHR data. We develop a deep learning imputation method based on \\textit{recalibrating} a Wasserstein Generative Adversarial Network (WGAN) to account for informative missingness in high-dimensional quantitative medical data. We propose a new quantile re-weighting technique to ensure distributional equivariance under informative missingness and integrate it with WGAN to enable efficient imputations in large-scale observational data in presence of informative missingness and covariate imbalance. Results from our proposed algorithm show better recovery compared to present methods in both synthetic and real-world data from the Reactions to Acute Hospitalization (REACH) and laboratory test results of COVID-19 patients in the New York Metropolitan area from the INSIGHT dataset.\n", "keywords": "deep learning;data imputation;missing data;neural networks;Wasserstein GAN;quantile regression", "primary_area": "", "supplementary_material": "/attachment/e012c907b2c1d4a568bc8fad59787497ef58ce9d.zip", "author": "Mark He;Yiming Li;Katharina Schultebraucks;Ian Kronish;Jianhua Hu;Ying Wei", "authorids": "~Mark_He1;yl4925@cumc.columbia.edu;ks3796@cumc.columbia.edu;ik2293@cumc.columbia.edu;jh3992@cumc.columbia.edu;yw2148@cumc.columbia.edu", "gender": "M;;;;;", "homepage": "https://sites.google.com/view/markhe;;;;;", "dblp": ";;;;;", "google_scholar": "CvbDaZUAAAAJ;;;;;", "orcid": ";;;;;", "linkedin": "mark-he-80b828131/;;;;;", "or_profile": "~Mark_He1;yl4925@cumc.columbia.edu;ks3796@cumc.columbia.edu;ik2293@cumc.columbia.edu;jh3992@cumc.columbia.edu;yw2148@cumc.columbia.edu", "aff": "Columbia University;;;;;", "aff_domain": "columbia.edu;;;;;", "position": "Postdoc;;;;;", "bibtex": "@misc{\nhe2023recalibrated,\ntitle={Re-calibrated Wasserstein {GAN} for large-scale imputation with informative missing},\nauthor={Mark He and Yiming Li and Katharina Schultebraucks and Ian Kronish and Jianhua Hu and Ying Wei},\nyear={2023},\nurl={https://openreview.net/forum?id=Jg-oXkENo2p}\n}", "github": "", "project": "", "reviewers": "rqcD;chDB;vJjM;d4wE", "site": "https://openreview.net/forum?id=Jg-oXkENo2p", "pdf_size": 824183, "recommendation": "3;3;3;5", "confidence": "4;3;3;2", "correctness": "3;2;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "78;94;44;61", "wc_strength_and_weaknesses": "303;273;138;65", "wc_clarity_quality_novelty_and_reproducibility": "109;65;24;324", "wc_summary_review": "56;37;46;29", "wc_review": "546;469;252;479", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 69.25, 18.673175948402566 ], "wc_strength_and_weaknesses_avg": [ 194.75, 97.33543804802031 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 130.5, 115.69031938757884 ], "wc_summary_review_avg": [ 42.0, 10.074720839804943 ], "wc_review_avg": [ 436.5, 110.55880788069307 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RmctjCGbPdcJ:scholar.google.com/&scioq=Re-calibrated+Wasserstein+GAN+for+large-scale+imputation+with+informative+missing&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "JgwnZxlxA46", "title": "On Gradient Descent Convergence beyond the Edge of Stability", "track": "main", "status": "Reject", "tldr": "We prove convergence results of Gradient Descent beyond Edge of Stability in several nonlinear and high-dimensional problems.", "abstract": "Gradient Descent (GD) is a powerful workhorse of modern machine learning thanks to its scalability and efficiency in high-dimensional spaces. Its ability to find local minimisers is only guaranteed for losses with Lipschitz gradients, where it can be seen as a `bona-fide' discretisation of an underlying gradient flow. Yet, many ML setups involving overparametrised models do not fall into this problem class, which has motivated research beyond the so-called ``Edge of Stability'' (EoS), where the step-size crosses the admissibility threshold inversely proportional to the Lipschitz constant above. Perhaps surprisingly, GD has been empirically observed to still converge regardless of local instability and oscillatory behavior.\n\nThe incipient theoretical analysis of this phenomena has mainly focused in the overparametrised regime, where the effect of choosing a large learning rate may be associated to a `Sharpness-Minimisation' implicit regularisation within the manifold of minimisers, under appropriate asymptotic limits. In contrast, in this work we directly examine the conditions for such unstable convergence, focusing on simple, yet representative, learning problems. Specifically, we characterize a local condition involving third-order derivatives that stabilizes oscillations of GD above the EoS, and leverage such property in a teacher-student setting, under population loss. Finally, focusing on Matrix Factorization, we establish a non-asymptotic `Local Implicit Bias' of GD above the EoS, whereby quasi-symmetric initializations converge to symmetric solutions --- where sharpness is minimum amongst all minimisers. ", "keywords": "gradient descent;edge of stability", "primary_area": "", "supplementary_material": "", "author": "Lei Chen;Joan Bruna", "authorids": "~Lei_Chen4;~Joan_Bruna1", "gender": "M;M", "homepage": "https://leichen2018.github.io;http://cims.nyu.edu/~bruna", "dblp": "09/3666-62;44/8776", "google_scholar": "lqVlvEMAAAAJ;L4bNmsMAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Lei_Chen4;~Joan_Bruna1", "aff": "New York University;New York University", "aff_domain": "nyu.edu;nyu.edu", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nchen2023on,\ntitle={On Gradient Descent Convergence beyond the Edge of Stability},\nauthor={Lei Chen and Joan Bruna},\nyear={2023},\nurl={https://openreview.net/forum?id=JgwnZxlxA46}\n}", "github": "", "project": "", "reviewers": "RMoF;WSV5;Hb8V;zoGM", "site": "https://openreview.net/forum?id=JgwnZxlxA46", "pdf_size": 966918, "recommendation": "5;5;5;6", "confidence": "3;4;3;3", "correctness": "3;3;2;2", "technical_novelty": "2;2;4;2", "empirical_novelty": "2;0;0;2", "wc_summary_paper": "160;106;51;87", "wc_strength_and_weaknesses": "247;98;326;371", "wc_clarity_quality_novelty_and_reproducibility": "75;377;25;21", "wc_summary_review": "131;60;43;37", "wc_review": "613;641;445;516", "wc_reply_reviewers": "0;24;144;68", "wc_reply_authors": "748;1013;734;1204", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;2", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 1.0, 1.0 ], "wc_summary_paper_avg": [ 101.0, 39.37638886439436 ], "wc_strength_and_weaknesses_avg": [ 260.5, 103.78945033094645 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 124.5, 147.3253202949174 ], "wc_summary_review_avg": [ 67.75, 37.47916087641237 ], "wc_review_avg": [ 553.75, 78.062074658569 ], "wc_reply_reviewers_avg": [ 59.0, 54.79963503528103 ], "wc_reply_authors_avg": [ 924.75, 195.82820915281843 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14222779157904082432&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "JhsVJoK13u", "title": "Active Learning at the ImageNet Scale", "track": "main", "status": "Withdraw", "tldr": "We identify sampling-imbalance as a major failure mode in large-scale active learning, and we propose Balanced Selection, a simple, scalable AL algorithm to remedy it.", "abstract": "Active learning (AL) algorithms aim to identify an optimal subset of data for annotation, such that deep neural networks (DNN) can achieve better performance when trained on this labeled subset. AL is especially impactful in industrial scale settings where data labeling costs are high and practitioners use every tool at their disposal to improve model performance. The recent success of self-supervised pretraining (SSP) highlights the importance of harnessing abundant unlabeled data to boost model performance. By combining AL with SSP, we can make use of unlabeled data while simultaneously labeling and training on particularly informative samples.\nIn this work, we study a combination of AL and SSP on ImageNet. We find that performance on small toy datasets \u2013 the typical benchmark setting in the literature \u2013 is not representative of performance on ImageNet due to the class imbalanced samples selected by an active learner. Among the existing baselines we test, popular AL algorithms across a variety of small and large scale settings fail to outperform random sampling. To remedy the class-imbalance problem, we propose Balanced Selection (BASE), a simple, scalable AL algorithm that outperforms random sampling consistently by selecting more balanced samples for annotation than existing methods.", "keywords": "active learning;large-scale active learning", "primary_area": "", "supplementary_material": "", "author": "Hong-Min Chu;Ping-yeh Chiang;Zeyad Emam;Wojciech Czaja;Richard Leapman;Micah Goldblum;Tom Goldstein", "authorids": "~Hong-Min_Chu1;~Ping-yeh_Chiang1;~Zeyad_Emam1;~Wojciech_Czaja1;leapmanr@mail.nih.gov;~Micah_Goldblum1;~Tom_Goldstein1", "gender": ";;M;;;;M", "homepage": ";;https://www.linkedin.com/in/zeyademam/;;;;https://www.cs.umd.edu/~tomg/", "dblp": "185/0720;236/4288;220/1358;;;241/7231;25/8184", "google_scholar": ";WUoMq1IAAAAJ;;;;pGDKzuUAAAAJ;KmSuVtgAAAAJ", "orcid": ";;;;;;", "linkedin": ";;zeyademam/;;;;", "or_profile": "~Hong-Min_Chu1;~Ping-yeh_Chiang1;~Zeyad_Emam1;~Wojciech_Czaja1;leapmanr@mail.nih.gov;~Micah_Goldblum1;~Tom_Goldstein1", "aff": "Department of Computer Science, University of Maryland, College Park;University of Maryland, College Park;University of Maryland Institute for Advanced Computer Studies, University of Maryland, College Park;;;New York University;University of Maryland, College Park", "aff_domain": "cs.umd.edu;umd.edu;umiacs.umd.edu;;;nyu.edu;umd.edu", "position": "PhD student;PhD student;PhD student;;;Postdoc;Full Professor", "bibtex": "@misc{\nchu2023active,\ntitle={Active Learning at the ImageNet Scale},\nauthor={Hong-Min Chu and Ping-yeh Chiang and Zeyad Emam and Wojciech Czaja and Richard Leapman and Micah Goldblum and Tom Goldstein},\nyear={2023},\nurl={https://openreview.net/forum?id=JhsVJoK13u}\n}", "github": "", "project": "", "reviewers": "fdA8;7EyJ;UWqu", "site": "https://openreview.net/forum?id=JhsVJoK13u", "pdf_size": 7325388, "recommendation": "3;3;5", "confidence": "3;4;4", "correctness": "3;2;4", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "44;73;78", "wc_strength_and_weaknesses": "500;337;195", "wc_clarity_quality_novelty_and_reproducibility": "40;7;36", "wc_summary_review": "38;12;38", "wc_review": "622;429;347", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 65.0, 14.98888477061141 ], "wc_strength_and_weaknesses_avg": [ 344.0, 124.61407090159067 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.666666666666668, 14.70449666674185 ], "wc_summary_review_avg": [ 29.333333333333332, 12.256517540566824 ], "wc_review_avg": [ 466.0, 115.27647924302106 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.8660254037844387, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8703093568101232507&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "University of Maryland, College Park;University of Maryland;New York University", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": "https://www/umd.edu;https://www/umd.edu;https://www.nyu.edu", "aff_unique_abbr": "UMD;UMD;NYU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Ji1_32XWMxK", "title": "Optimistic Exploration in Reinforcement Learning Using Symbolic Model Estimates", "track": "main", "status": "Reject", "tldr": "", "abstract": "There has been increasing interest in using symbolic models along with reinforcement learning (RL) problems, where these coarser abstract models are used as a way to provide higher level guidance to the RL agent. However, most of these works are limited by their assumption that they have access to a symbolic approximation of the underlying problem. To address this problem, we introduce a new method for learning optimistic symbolic approximations of the underlying world model. We will see how these representations, coupled with fast diverse planners developed from the automated planning community, provides us with a new paradigm for optimistic exploration in sparse reward settings. We also investigate how we could speed up the learning process by generalizing learned model dynamics across similar actions with minimal human input. We will evaluate the method, by testing it on multiple benchmark domains and compare it with other RL strategies for sparse reward settings, including hierarchical RL and intrinsic reward based exploration.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/23d758b0fca7e0dbc3eacd36231c64bbdf03ac1f.zip", "author": "Sarath Sreedharan;Michael Katz", "authorids": "~Sarath_Sreedharan1;~Michael_Katz1", "gender": ";", "homepage": ";https://researcher.watson.ibm.com/researcher/view.php?person=ibm-Michael.Katz1", "dblp": "162/5110;75/1295-1", "google_scholar": ";pltkfcMAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Sarath_Sreedharan1;~Michael_Katz1", "aff": "Colorado State University;International Business Machines", "aff_domain": "colostate.edu;ibm.com", "position": "Assistant Professor;Principal Researcher", "bibtex": "@misc{\nsreedharan2023optimistic,\ntitle={Optimistic Exploration in Reinforcement Learning Using Symbolic Model Estimates},\nauthor={Sarath Sreedharan and Michael Katz},\nyear={2023},\nurl={https://openreview.net/forum?id=Ji1_32XWMxK}\n}", "github": "", "project": "", "reviewers": "dzDP;mjER;m6BR;DcwG", "site": "https://openreview.net/forum?id=Ji1_32XWMxK", "pdf_size": 417476, "recommendation": "3;3;6;6", "confidence": "4;3;2;3", "correctness": "2;3;3;4", "technical_novelty": "2;2;4;3", "empirical_novelty": "2;2;4;3", "wc_summary_paper": "93;67;71;83", "wc_strength_and_weaknesses": "313;195;90;127", "wc_clarity_quality_novelty_and_reproducibility": "52;138;202;27", "wc_summary_review": "30;29;118;62", "wc_review": "488;429;481;299", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "564;684;258;768", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;2", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 78.5, 10.23474474522936 ], "wc_strength_and_weaknesses_avg": [ 181.25, 84.87748523607424 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 104.75, 69.62533662396183 ], "wc_summary_review_avg": [ 59.75, 36.15504805694496 ], "wc_review_avg": [ 424.25, 75.82009957788237 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 568.5, 193.37205072088366 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.7071067811865476, "corr_recommendation_correctness": 0.7071067811865476, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18445748508864238637&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1", "aff_unique_norm": "Colorado State University;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.colostate.edu;https://www.ibm.com", "aff_unique_abbr": "CSU;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Theoretical Characterization of the Generalization Performance of Overfitted Meta-Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11209", "id": "Jifob4dSh99", "poster": "", "openreview": "https://openreview.net/forum?id=Jifob4dSh99", "slides": "https://iclr.cc/virtual/2023/poster/11209", "video": "https://iclr.cc/virtual/2023/poster/11209", "author_site": "Peizhong Ju, Yingbin Liang, Ness Shroff", "tldr": "", "abstract": "Meta-learning has arisen as a successful method for improving training performance by training over many similar tasks, especially with deep neural networks (DNNs). However, the theoretical understanding of when and why overparameterized models such as DNNs can generalize well in meta-learning is still limited. As an initial step towards addressing this challenge, this paper studies the generalization performance of overfitted meta-learning under a linear regression model with Gaussian features. In contrast to a few recent studies along the same line, our framework allows the number of model parameters to be arbitrarily larger than the number of features in the ground truth signal, and hence naturally captures the overparameterized regime in practical deep meta-learning. We show that the overfitted min $\\ell_2$-norm solution of model-agnostic meta-learning (MAML) can be beneficial, which is similar to the recent remarkable findings on \"benign overfitting\" and \"double descent\" phenomenon in the classical (single-task) linear regression. However, due to the uniqueness of meta-learning such as task-specific gradient descent inner training and the diversity/fluctuation of the ground-truth signals among training tasks, we find new and interesting properties that do not exist in single-task linear regression. We first provide a high-probability upper bound (under reasonable tightness) on the generalization error, where certain terms decrease when the number of features increases. Our analysis suggests that benign overfitting is more significant and easier to observe when the noise and the diversity/fluctuation of the ground truth of each training task are large. Under this circumstance, we show that the overfitted min $\\ell_2$-norm solution can achieve an even lower generalization error than the underparameterized solution.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/1f61d67f46cffd8192416131a6f911d1d17cf476.zip", "author": "Peizhong Ju;Yingbin Liang;Ness Shroff", "authorids": "~Peizhong_Ju1;~Yingbin_Liang1;~Ness_Shroff1", "gender": "M;F;M", "homepage": ";https://sites.google.com/view/yingbinliang/home;http://newslab.ece.ohio-state.edu/", "dblp": "167/9021;51/332;67/1991", "google_scholar": "VDzpfOYAAAAJ;lGgLAiIAAAAJ;https://scholar.google.com.tw/citations?user=5kL-ZrAAAAAJ", "orcid": "0000-0002-4569-3539;;0000-0002-4606-6879", "linkedin": ";;nessshroff/", "or_profile": "~Peizhong_Ju1;~Yingbin_Liang1;~Ness_Shroff1", "aff": "Ohio State University, Columbus;The Ohio State University;Ohio State University, Columbus", "aff_domain": "osu.edu;osu.edu;osu.edu", "position": "Postdoc;Professor;Full Professor", "bibtex": "@inproceedings{\nju2023theoretical,\ntitle={Theoretical Characterization of the Generalization Performance of Overfitted Meta-Learning},\nauthor={Peizhong Ju and Yingbin Liang and Ness Shroff},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Jifob4dSh99}\n}", "github": "", "project": "", "reviewers": "uCi7;LapN;Akh7;3kzv", "pdf_size": 674824, "recommendation": "6;6;8;8", "confidence": "4;2;3;2", "correctness": "4;3;4;3", "technical_novelty": "4;3;3;4", "empirical_novelty": "4;0;0;3", "wc_summary_paper": "47;60;58;54", "wc_strength_and_weaknesses": "216;325;194;43", "wc_clarity_quality_novelty_and_reproducibility": "139;39;2;26", "wc_summary_review": "69;21;35;134", "wc_review": "471;445;289;257", "wc_reply_reviewers": "625;26;0;15", "wc_reply_authors": "2117;942;550;765", "reply_reviewers": "3;1;0;1", "reply_authors": "5;3;1;2", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 1.7853571071357126 ], "wc_summary_paper_avg": [ 54.75, 4.968651728587948 ], "wc_strength_and_weaknesses_avg": [ 194.5, 100.55471147589256 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.5, 52.23265262266507 ], "wc_summary_review_avg": [ 64.75, 43.62553724597555 ], "wc_review_avg": [ 365.5, 93.64160400164022 ], "wc_reply_reviewers_avg": [ 166.5, 264.87591434481163 ], "wc_reply_authors_avg": [ 1093.5, 607.0026770945907 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.30151134457776363, "corr_recommendation_correctness": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14040879153679951922&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=Jifob4dSh99", "email": "osu.edu;osu.edu;osu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Ohio State University", "aff_unique_dep": "", "aff_unique_url": "https://www.osu.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Columbus;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Auto-Encoding Goodness of Fit", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10785", "id": "JjCAdMUlu9v", "poster": "", "openreview": "https://openreview.net/forum?id=JjCAdMUlu9v", "slides": "https://iclr.cc/virtual/2023/poster/10785", "video": "https://iclr.cc/virtual/2023/poster/10785", "author_site": "Aaron Palmer, Zhiyi Chi, Derek Aguiar, Jinbo Bi", "tldr": "", "abstract": "For generative autoencoders to learn a meaningful latent representation for data generation, a careful balance must be achieved between reconstruction error and how close the distribution in the latent space is to the prior. However, this balance is challenging to achieve due to a lack of criteria that work both at the mini-batch (local) and aggregated posterior (global) level. In this work, we develop the Goodness of Fit Autoencoder (GoFAE), which incorporates hypothesis tests at two levels. At the mini-batch level, it uses GoF test statistics as regularization objectives. At a more global level, it selects a regularization coefficient based on higher criticism, i.e., a test on the uniformity of the local GoF p-values. We justify the use of GoF tests by providing a relaxed $L_2$-Wasserstein bound on the distance between the latent distribution and target prior. We propose to use GoF tests and prove that optimization based on these tests can be done with stochastic gradient (SGD) descent on a compact Riemannian manifold. Empirically, we show that our higher criticism parameter selection procedure balances reconstruction and generation using mutual information and uniformity of p-values respectively. Finally, we show that GoFAE achieves comparable FID scores and mean squared errors with competing deep generative models while retaining statistical indistinguishability from Gaussian in the latent space based on a variety of hypothesis tests.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Aaron Palmer;Zhiyi Chi;Derek Aguiar;Jinbo Bi", "authorids": "~Aaron_Palmer1;~Zhiyi_Chi1;~Derek_Aguiar1;~Jinbo_Bi1", "gender": ";M;Not Specified;F", "homepage": ";https://stat.uconn.edu/zhiyi-chi/;https://www.derekaguiar.com;https://jinbo-bi.uconn.edu/", "dblp": ";;31/8064.html;26/3430", "google_scholar": ";;kXRSW2QAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-9166-8783;0000-0001-6996-4092", "linkedin": ";;derekaguiar/;", "or_profile": "~Aaron_Palmer1;~Zhiyi_Chi1;~Derek_Aguiar1;~Jinbo_Bi1", "aff": "University of Connecticut;University of Connecticut;University of Connecticut;University of Connecticut", "aff_domain": "uconn.edu;uconn.edu;uconn.edu;uconn.edu", "position": "PhD student;Full Professor;Assistant Professor;Professor", "bibtex": "@inproceedings{\npalmer2023autoencoding,\ntitle={Auto-Encoding Goodness of Fit},\nauthor={Aaron Palmer and Zhiyi Chi and Derek Aguiar and Jinbo Bi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JjCAdMUlu9v}\n}", "github": "", "project": "", "reviewers": "ySEm;5naA;i5br;Fj2Y", "pdf_size": 29730214, "recommendation": "5;6;6;6", "confidence": "2;2;4;3", "correctness": "2;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "121;88;213;70", "wc_strength_and_weaknesses": "277;284;345;201", "wc_clarity_quality_novelty_and_reproducibility": "274;88;96;44", "wc_summary_review": "18;54;87;37", "wc_review": "690;514;741;352", "wc_reply_reviewers": "420;0;13;125", "wc_reply_authors": "1972;735;505;367", "reply_reviewers": "1;0;1;1", "reply_authors": "4;1;1;2", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 123.0, 55.08629593646681 ], "wc_strength_and_weaknesses_avg": [ 276.75, 51.109563684304725 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 125.5, 87.99289744064575 ], "wc_summary_review_avg": [ 49.0, 25.367301787931645 ], "wc_review_avg": [ 574.25, 153.48350888613408 ], "wc_reply_reviewers_avg": [ 139.5, 169.08060208078277 ], "wc_reply_authors_avg": [ 894.75, 635.6911101313278 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5222329678670935, "corr_recommendation_correctness": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9263166495678232749&as_sdt=4000005&sciodt=0,18&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=JjCAdMUlu9v", "email": "uconn.edu;uconn.edu;uconn.edu;uconn.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Connecticut", "aff_unique_dep": "", "aff_unique_url": "https://www.uconn.edu", "aff_unique_abbr": "UConn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "JjEtPDn0eRb", "title": "MATS: Memory Attention for Time-Series forecasting", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Long-term time series forecasting (LTSF) is still very challenging in many real-world applications. A fundamental difficulty is in efficiently modeling both the short-term temporal patterns and long-term dependencies. in this paper, we introduce a novel two-stage attention-based LTSF model called Memory Attention for Time-Series forecasting (MATS). In stage I, short-term temporal patterns are extracted to a memory bank such that the input time series is represented by a much shorter sequence of memory attentions. In stage II, a sequence-to-sequence predictor is trained to discover long-term dependencies in the memory attention sequence, and forecast memory attentions corresponding to the time series in the future. The use of attention allows a flexible representation, and its shorter sequence length enables the model to more easily learn long-term dependencies. Extensive experiments on a number of multivariate and univariate benchmark datasets demonstrate that MATS outperforms SOTA LTSF methods almost all the time.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Jincheng YU;Lifeng Shen;Han Shi;James Kwok", "authorids": "~Jincheng_YU1;~Lifeng_Shen1;~Han_Shi1;~James_Kwok1", "gender": "M;M;M;", "homepage": "https://lovesykun.cn;https://www.lshenae.cn/;https://han-shi.github.io/;", "dblp": ";65/9544;;", "google_scholar": ";https://scholar.google.com.hk/citations?user=MW_qCGoAAAAJ;https://scholar.google.com.hk/citations?user=Johp_14AAAAJ;", "orcid": ";0000-0003-0787-3835;;", "linkedin": ";;;", "or_profile": "~Jincheng_YU1;~Lifeng_Shen1;~Han_Shi1;~James_Kwok1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Huawei Technologies Ltd.;", "aff_domain": "ust.hk;ust.hk;huawei.com;", "position": "MS student;PhD student;Principal Researcher;", "bibtex": "@misc{\nyu2023mats,\ntitle={{MATS}: Memory Attention for Time-Series forecasting},\nauthor={Jincheng YU and Lifeng Shen and Han Shi and James Kwok},\nyear={2023},\nurl={https://openreview.net/forum?id=JjEtPDn0eRb}\n}", "github": "", "project": "", "reviewers": "dCxR;zPyW;V16Y", "site": "https://openreview.net/forum?id=JjEtPDn0eRb", "pdf_size": 783141, "recommendation": "5;6;8", "confidence": "3;2;4", "correctness": "3;4;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;0;3", "wc_summary_paper": "34;94;184", "wc_strength_and_weaknesses": "64;122;282", "wc_clarity_quality_novelty_and_reproducibility": "31;6;133", "wc_summary_review": "54;29;56", "wc_review": "183;251;655", "wc_reply_reviewers": "0;0;87", "wc_reply_authors": "598;790;1531", "reply_reviewers": "0;0;1", "reply_authors": "2;2;3", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 104.0, 61.644140029689765 ], "wc_strength_and_weaknesses_avg": [ 156.0, 92.18821327407677 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.666666666666664, 54.932281543328926 ], "wc_summary_review_avg": [ 46.333333333333336, 12.283683848458853 ], "wc_review_avg": [ 363.0, 208.333066666496 ], "wc_reply_reviewers_avg": [ 29.0, 41.012193308819754 ], "wc_reply_authors_avg": [ 973.0, 402.2760246397988 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.6546536707079772, "corr_recommendation_correctness": 0.7559289460184545, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7chOugRLly0J:scholar.google.com/&scioq=MATS:+Memory+Attention+for+Time-Series+forecasting&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "Hong Kong University of Science and Technology;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.ust.hk;https://www.huawei.com", "aff_unique_abbr": "HKUST;Huawei", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Sparse Distributed Memory is a Continual Learner", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10761", "id": "JknGeelZJpHP", "poster": "", "openreview": "https://openreview.net/forum?id=JknGeelZJpHP", "slides": "https://iclr.cc/virtual/2023/poster/10761", "video": "https://iclr.cc/virtual/2023/poster/10761", "author_site": "Trenton Bricken, Xander Davies, Deepak Singh, Dmitry Krotov, Gabriel Kreiman", "tldr": "Improving Sparse Distributed Memory via additional neurobiology results in a deep learning model with strong, organic continual learning and insights into sparse models more broadly.", "abstract": "Continual learning is a problem for artificial neural networks that their biological counterparts are adept at solving. Building on work using Sparse Distributed Memory (SDM) to connect a core neural circuit with the powerful Transformer model, we create a modified Multi-Layered Perceptron (MLP) that is a strong continual learner. We find that every component of our MLP variant translated from biology is necessary for continual learning. Our solution is also free from any memory replay or task information, and introduces novel methods to train sparse networks that may be broadly applicable.", "keywords": "Sparse Distributed Memory;Sparsity;Top-K Activation;Continual Learning;Biologically Inspired", "primary_area": "", "supplementary_material": "", "author": "Trenton Bricken;Xander Davies;Deepak Singh;Dmitry Krotov;Gabriel Kreiman", "authorids": "~Trenton_Bricken1;~Xander_Davies1;~Deepak_Singh1;~Dmitry_Krotov2;~Gabriel_Kreiman1", "gender": ";M;;M;M", "homepage": "https://trentbrick.github.io/;;https://mitibmwatsonailab.mit.edu/people/dmitry-krotov/;http://klab.tch.harvard.edu;https://xanderdavies.com/", "dblp": ";;182/2341;12/1367;", "google_scholar": "CP6aLusAAAAJ;;WeD9ll0AAAAJ;WxZ_6nsAAAAJ;", "orcid": ";;;0000-0003-3505-8475;", "linkedin": ";deepak-t-singh/;krotovdmitry;kreiman/;alexander-l-davies", "or_profile": "~Trenton_Bricken1;~Deepak_Singh1;~Dmitry_Krotov2;~Gabriel_Kreiman1;~Alexander_Davies1", "aff": "Harvard University;Harvard University;Massachusetts Institute of Technology;Harvard Medical School;Harvard University", "aff_domain": "harvard.edu;harvard.edu;mit.edu;harvard.edu;harvard.edu", "position": "PhD student;Undergrad student;Researcher;Full Professor;Undergrad student", "bibtex": "@inproceedings{\nbricken2023sparse,\ntitle={Sparse Distributed Memory is a Continual Learner},\nauthor={Trenton Bricken and Xander Davies and Deepak Singh and Dmitry Krotov and Gabriel Kreiman},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JknGeelZJpHP}\n}", "github": "", "project": "", "reviewers": "HvB9;1XFe;krw5;2ptq", "pdf_size": 13945368, "recommendation": "5;6;8;8", "confidence": "3;4;3;4", "correctness": "3;3;4;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;2;4;3", "wc_summary_paper": "143;74;206;60", "wc_strength_and_weaknesses": "441;318;282;87", "wc_clarity_quality_novelty_and_reproducibility": "182;71;191;73", "wc_summary_review": "70;28;55;48", "wc_review": "836;491;734;268", "wc_reply_reviewers": "241;171;12;479", "wc_reply_authors": "1616;2413;515;2157", "reply_reviewers": "1;2;1;2", "reply_authors": "4;5;1;6", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 120.75, 58.39252948794049 ], "wc_strength_and_weaknesses_avg": [ 282.0, 127.0846174798508 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 129.25, 57.34271967739235 ], "wc_summary_review_avg": [ 50.25, 15.105876340020794 ], "wc_review_avg": [ 582.25, 220.5089283906663 ], "wc_reply_reviewers_avg": [ 225.75, 168.11807606560336 ], "wc_reply_authors_avg": [ 1675.25, 729.0488238108611 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 4.0, 1.8708286933869707 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.19245008972987526, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6419799684933930936&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=JknGeelZJpHP", "email": "harvard.edu;harvard.edu;mit.edu;harvard.edu;harvard.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Harvard University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://web.mit.edu", "aff_unique_abbr": "Harvard;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boston", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Jm-MaqTF6om", "title": "End-to-end Invariance Learning with Relational Inductive Biases in Multi-Object Robotic Manipulation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Although reinforcement learning has seen remarkable progress over the last years, solving robust dexterous object-manipulation tasks in multi-object settings remains a challenge. In this paper, we focus on models that can learn manipulation tasks in fixed multi-object settings \\emph{and} extrapolate this skill zero-shot without any drop in performance when the number of objects changes. We consider the generic task of moving a single cube out of a set to a goal position. We find that previous approaches, which primarily leverage attention and graph neural network-based architectures, do not exhibit this invariance when the number of input objects changes while scaling as $K^2$. We analyse effects on generalization of different relational inductive biases and then propose an efficient plug-and-play module that overcomes these limitations. Besides exceeding performances in their training environment, we show that our approach, which scales linearly in $K$, allows agents to extrapolate and generalize zero-shot to any new object number.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Davide Mambelli;Frederik Tr\u00e4uble;Stefan Bauer;Bernhard Sch\u00f6lkopf;Francesco Locatello", "authorids": "~Davide_Mambelli1;~Frederik_Tr\u00e4uble1;~Stefan_Bauer1;~Bernhard_Sch\u00f6lkopf1;~Francesco_Locatello1", "gender": ";M;;;M", "homepage": "https://dmambelli.github.io;https://ei.is.tuebingen.mpg.de/person/ftraeuble;https://cifar.ca/bios/stefan-bauer/;;https://twitter.com/FrancescoLocat8", "dblp": ";;;;195/6074", "google_scholar": ";https://scholar.google.de/citations?user=oc2OOyMAAAAJ;O-oICE8AAAAJ;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Davide_Mambelli1;~Frederik_Tr\u00e4uble1;~Stefan_Bauer1;~Bernhard_Sch\u00f6lkopf1;~Francesco_Locatello1", "aff": "Delft University of Technology;Max Planck Institute for Intelligent Systems;KTH Royal Institute of Technology;;Amazon", "aff_domain": "tudelft.nl;is.tuebingen.mpg.de;kth.se;;amazon.com", "position": "PhD student;PhD student;Assistant Professor;;Senior Applied Scientist", "bibtex": "@misc{\nmambelli2023endtoend,\ntitle={End-to-end Invariance Learning with Relational Inductive Biases in Multi-Object Robotic Manipulation},\nauthor={Davide Mambelli and Frederik Tr{\\\"a}uble and Stefan Bauer and Bernhard Sch{\\\"o}lkopf and Francesco Locatello},\nyear={2023},\nurl={https://openreview.net/forum?id=Jm-MaqTF6om}\n}", "github": "", "project": "", "reviewers": "T4pA;Pmgk;4jkK;E3aJ;vRaR", "site": "https://openreview.net/forum?id=Jm-MaqTF6om", "pdf_size": 3795946, "recommendation": "3;3;3;5;6", "confidence": "4;4;4;4;2", "correctness": "3;3;2;4;2", "technical_novelty": "2;2;2;3;2", "empirical_novelty": "3;2;2;2;2", "wc_summary_paper": "57;103;84;49;65", "wc_strength_and_weaknesses": "497;110;54;118;48", "wc_clarity_quality_novelty_and_reproducibility": "50;24;47;37;5", "wc_summary_review": "130;84;455;30;16", "wc_review": "734;321;640;234;134", "wc_reply_reviewers": "85;0;354;0;0", "wc_reply_authors": "389;149;602;127;44", "reply_reviewers": "1;0;1;0;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 4.0, 1.2649110640673518 ], "confidence_avg": [ 3.6, 0.8 ], "correctness_avg": [ 2.8, 0.7483314773547882 ], "technical_novelty_avg": [ 2.2, 0.39999999999999997 ], "empirical_novelty_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_paper_avg": [ 71.6, 19.530488985173925 ], "wc_strength_and_weaknesses_avg": [ 165.4, 168.20653970639788 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.6, 16.523922052587878 ], "wc_summary_review_avg": [ 143.0, 161.17816229253887 ], "wc_review_avg": [ 412.6, 233.63013504254968 ], "wc_reply_reviewers_avg": [ 87.8, 137.11075814829408 ], "wc_reply_authors_avg": [ 262.2, 205.0594060266439 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.7905694150420948, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:O6G1_ZPaLIsJ:scholar.google.com/&scioq=End-to-end+Invariance+Learning+with+Relational+Inductive+Biases+in+Multi-Object+Robotic+Manipulation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Delft University of Technology;Max Planck Institute for Intelligent Systems;KTH Royal Institute of Technology;Amazon", "aff_unique_dep": ";Intelligent Systems;;Amazon.com, Inc.", "aff_unique_url": "https://www.tudelft.nl;https://www.mpi-is.mpg.de;https://www.kth.se;https://www.amazon.com", "aff_unique_abbr": "TU Delft;MPI-IS;KTH;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;3", "aff_country_unique": "Netherlands;Germany;Sweden;United States" }, { "title": "Individual Privacy Accounting with Gaussian Differential Privacy", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10727", "id": "JmC_Tld3v-f", "poster": "/media/PosterPDFs/ICLR%202023/10727.png?t=1682581463.4801142", "openreview": "https://openreview.net/forum?id=JmC_Tld3v-f", "slides": "https://iclr.cc/virtual/2023/poster/10727", "video": "https://iclr.cc/virtual/2023/poster/10727", "author_site": "Antti Koskela, Marlon Tobaben, Antti Honkela", "tldr": "Accurate privacy analysis of fully adaptive compositions using Gaussian differential privacy", "abstract": "Individual privacy accounting enables bounding differential privacy (DP) loss individually for each participant involved in the analysis. This can be informative as often the individual privacy losses are considerably smaller than those indicated by the DP bounds that are based on considering worst-case bounds at each data access. In order to account for the individual losses in a principled manner, we need a privacy accountant for adaptive compositions of mechanisms, where the loss incurred at a given data access is allowed to be smaller than the worst-case loss. This kind of analysis has been carried out for the R\u00e9nyi differential privacy by Feldman and Zrnic (2021), however not yet for the so-called optimal privacy accountants. We make first steps in this direction by providing a careful analysis using the Gaussian differential privacy which gives optimal bounds for the Gaussian mechanism, one of the most versatile DP mechanisms. This approach is based on determining a certain supermartingale for the hockey-stick divergence and on extending the R\u00e9nyi divergence-based fully adaptive composition results by Feldman and Zrnic (2021). We also consider measuring the individual $(\\varepsilon,\\delta)$-privacy losses using the so-called privacy loss distributions. Using the Blackwell theorem, we can then use the results of Feldman and Zrnic (2021) to construct an approximative individual $(\\varepsilon,\\delta)$-accountant. We also show how to speed up the FFT-based individual DP accounting using the Plancherel theorem.", "keywords": "differential privacy;gaussian differential privacy;fully adaptive compositions;privacy accounting;individual privacy loss", "primary_area": "", "supplementary_material": "/attachment/00682f302c9a714c5589e3637d2eb35e56f2bf5e.zip", "author": "Antti Koskela;Marlon Tobaben;Antti Honkela", "authorids": "~Antti_Koskela1;~Marlon_Tobaben1;~Antti_Honkela1", "gender": "M;;M", "homepage": ";https://www.helsinki.fi/en/about-us/people/people-finder/marlon-tobaben-9428638;https://www.cs.helsinki.fi/u/ahonkela/", "dblp": "124/9273;330/3364;h/AnttiHonkela", "google_scholar": "https://scholar.google.fi/citations?hl=fi;pgyBA6YAAAAJ;XsyLs6AAAAAJ", "orcid": ";0000-0002-9778-0853;0000-0001-9193-8093", "linkedin": ";marlon-tobaben/;", "or_profile": "~Antti_Koskela1;~Marlon_Tobaben1;~Antti_Honkela1", "aff": "Nokia Bell Labs;University of Helsinki;University of Helsinki", "aff_domain": "nokia-bell-labs.com;helsinki.fi;helsinki.fi", "position": "Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nkoskela2023individual,\ntitle={Individual Privacy Accounting with Gaussian Differential Privacy},\nauthor={Antti Koskela and Marlon Tobaben and Antti Honkela},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JmC_Tld3v-f}\n}", "github": "", "project": "", "reviewers": "ERD6;6fJh;MJHk;wyyZ", "pdf_size": 536104, "recommendation": "5;6;6;6", "confidence": "3;3;4;3", "correctness": "4;4;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "38;195;62;24", "wc_strength_and_weaknesses": "102;386;472;172", "wc_clarity_quality_novelty_and_reproducibility": "24;66;47;21", "wc_summary_review": "25;36;57;17", "wc_review": "189;683;638;234", "wc_reply_reviewers": "0;0;94;0", "wc_reply_authors": "368;558;732;454", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 79.75, 67.91308784026832 ], "wc_strength_and_weaknesses_avg": [ 283.0, 151.17208737065187 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.5, 18.309833423600555 ], "wc_summary_review_avg": [ 33.75, 15.022899187573616 ], "wc_review_avg": [ 436.0, 225.62468836543576 ], "wc_reply_reviewers_avg": [ 23.5, 40.703193977868615 ], "wc_reply_authors_avg": [ 528.0, 135.639227364358 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6998653020606236082&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "pdf": "https://openreview.net/pdf?id=JmC_Tld3v-f", "email": "nokia-bell-labs.com;helsinki.fi;helsinki.fi", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Nokia Bell Labs;University of Helsinki", "aff_unique_dep": ";", "aff_unique_url": "https://www.nokialabs.com;https://www.helsinki.fi", "aff_unique_abbr": "Nokia Bell Labs;UH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Finland" }, { "title": "Over-Training with Mixup May Hurt Generalization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10804", "id": "JmkjrlVE-DG", "poster": "", "openreview": "https://openreview.net/forum?id=JmkjrlVE-DG", "slides": "https://iclr.cc/virtual/2023/poster/10804", "video": "https://iclr.cc/virtual/2023/poster/10804", "author_site": "Zixuan Liu, Ziqiao Wang, Hongyu Guo, Yongyi Mao", "tldr": "We empirically discovered a U-shaped generalization curve of Mixup training.", "abstract": "Mixup, which creates synthetic training instances by linearly interpolating random sample pairs, is a simple and yet effective regularization technique to boost the performance of deep models trained with SGD. In this work, we report a previously unobserved phenomenon in Mixup raining: on a number of standard datasets, the performance of Mixup-trained models starts to decay after training for a large number of epochs, giving rise to a U-shaped generalization curve. This behavior is further aggravated when the size of original dataset is reduced. To help understand such a behavior of Mixup, we show theoretically that Mixup training may introduce undesired data-dependent label noises to the synthesized data. Via analyzing a least-square regression problem with a random feature model, we explain why noisy labels may cause the U-shaped curve to occur: Mixup improves generalization through fitting the clean patterns at the early training stage, but as training progresses, Mixup becomes over-fitting to the noise in the synthetic data. Extensive experiments are performed on a variety of benchmark datasets, validating this explanation.", "keywords": "Mixup;Generalization;Overfitting;Regularization", "primary_area": "", "supplementary_material": "/attachment/a44da85e3e9bb6259713faf4aada353a89165607.zip", "author": "Zixuan Liu;Ziqiao Wang;Hongyu Guo;Yongyi Mao", "authorids": "~Zixuan_Liu3;~Ziqiao_Wang1;~Hongyu_Guo1;~Yongyi_Mao2", "gender": "M;M;M;M", "homepage": ";https://ziqiaowanggeothe.github.io;https://hongyuharryguo.github.io/;http://www.eecs.uottawa.ca/~yymao", "dblp": "254/3346;222/9220;;86/2933", "google_scholar": ";iBL7APIAAAAJ;https://scholar.google.ca/citations?user=bZUqlakAAAAJ;https://scholar.google.ca/citations?user=jM5l70wAAAAJ", "orcid": ";0000-0003-0504-4830;;0000-0001-5298-5778", "linkedin": "zixuan-liu-181b4610a/;ziqiao-wang-987565155/?locale=en_US;harry-h-y-guo-a582087/;", "or_profile": "~Zixuan_Liu3;~Ziqiao_Wang1;~Hongyu_Guo1;~Yongyi_Mao1", "aff": "University of Ottawa;University of Ottawa;National Research Council Canada;University of Ottawa", "aff_domain": "uottawa.ca;uottawa.ca;nrc-cnrc.gc.ca;eecs.uottawa.ca", "position": "PhD student;PhD student;Senior Research Officer;Full Professor", "bibtex": "@inproceedings{\nliu2023overtraining,\ntitle={Over-Training with Mixup May Hurt Generalization},\nauthor={Zixuan Liu and Ziqiao Wang and Hongyu Guo and Yongyi Mao},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JmkjrlVE-DG}\n}", "github": "", "project": "", "reviewers": "gGCe;Zo8A;VPXk;9xtA", "pdf_size": 4802972, "recommendation": "5;5;6;8", "confidence": "4;4;3;3", "correctness": "3;3;3;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "4;3;2;3", "wc_summary_paper": "110;128;76;190", "wc_strength_and_weaknesses": "596;457;114;132", "wc_clarity_quality_novelty_and_reproducibility": "62;32;9;107", "wc_summary_review": "41;53;333;66", "wc_review": "809;670;532;495", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1209;913;981;218", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;1", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 126.0, 41.400483088968905 ], "wc_strength_and_weaknesses_avg": [ 324.75, 207.74669070769815 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 52.5, 36.65037516861185 ], "wc_summary_review_avg": [ 123.25, 121.42152815707765 ], "wc_review_avg": [ 626.5, 123.91630239802994 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 830.25, 370.09280930599016 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8164965809277259, "corr_recommendation_correctness": 0.0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12457266095421215168&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=JmkjrlVE-DG", "email": "uottawa.ca;uottawa.ca;nrc-cnrc.gc.ca;eecs.uottawa.ca", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Ottawa;National Research Council Canada", "aff_unique_dep": ";", "aff_unique_url": "https://www.uottawa.ca;https://www.nrc-cnrc.gc.ca", "aff_unique_abbr": "U Ottawa;NRC-CNRC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "id": "Jp7NLnL3n_1", "title": "Spurious Features in Continual Learning", "track": "main", "status": "Reject", "tldr": "This paper show that catastrophic forgetting is partially due to spurious features.", "abstract": "Continual Learning (CL) is the research field addressing learning without forgetting when the data distribution is not static. \nThis paper studies spurious features' influence on continual learning algorithms.\nWe show that continual learning algorithms solve tasks by selecting features that are not generalizable. \nOur experiments highlight that continual learning algorithms face two related problems: (1) spurious features (SP) and (2) local spurious features (LSP). The first one is due to a covariate shift between training and testing data, while the second is due to the limited access to data at each training step.\nWe study (1) through a consistent set of continual learning experiments varying spurious correlation amount and data distribution support.\nWe show that (2) is a major cause of performance decrease in continual learning along with catastrophic forgetting. \nThis paper presents a different way of understanding performance decrease in continual learning by highlighting the influence of (local) spurious features in algorithms capabilities.", "keywords": "Spurious Features;Continual Learning;Plasticity", "primary_area": "", "supplementary_material": "", "author": "Timothee LESORT", "authorids": "~Timothee_LESORT1", "gender": "M", "homepage": "", "dblp": "", "google_scholar": "5NttkuoAAAAJ", "orcid": "", "linkedin": "https://fr.linkedin.com/in/timoth\u00e9e-lesort-128039aa", "or_profile": "~Timothee_LESORT1", "aff": "Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal", "aff_domain": "mila.umontreal.ca", "position": "Postdoc", "bibtex": "@misc{\nlesort2023spurious,\ntitle={Spurious Features in Continual Learning},\nauthor={Timothee LESORT},\nyear={2023},\nurl={https://openreview.net/forum?id=Jp7NLnL3n_1}\n}", "github": "", "project": "", "reviewers": "g5vD;9EtC;22hc;mcta", "site": "https://openreview.net/forum?id=Jp7NLnL3n_1", "pdf_size": 1606960, "recommendation": "3;3;3;5", "confidence": "5;3;4;4", "correctness": "2;2;2;3", "technical_novelty": "2;1;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "19;75;56;104", "wc_strength_and_weaknesses": "652;407;358;621", "wc_clarity_quality_novelty_and_reproducibility": "44;31;24;66", "wc_summary_review": "45;48;25;68", "wc_review": "760;561;463;859", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 63.5, 30.858548248418945 ], "wc_strength_and_weaknesses_avg": [ 509.5, 128.6438883118821 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.25, 15.990231392947383 ], "wc_summary_review_avg": [ 46.5, 15.239750654128171 ], "wc_review_avg": [ 660.75, 156.69137659743754 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1809026710689622971&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "University of Montreal", "aff_unique_dep": "Montreal Institute for Learning Algorithms", "aff_unique_url": "https://www.umontreal.ca", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0", "aff_campus_unique": "Montreal", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "Gradient Gating for Deep Multi-Rate Learning on Graphs", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11249", "id": "JpRExTbl1-", "poster": "", "openreview": "https://openreview.net/forum?id=JpRExTbl1-", "slides": "https://iclr.cc/virtual/2023/poster/11249", "video": "https://iclr.cc/virtual/2023/poster/11249", "author_site": "T. Konstantin Rusch, Benjamin Chamberlain, Michael W Mahoney, Michael Bronstein, Siddhartha Mishra", "tldr": "", "abstract": "We present Gradient Gating (G$^2$), a novel framework for improving the performance of Graph Neural Networks (GNNs). Our framework is based on gating the output of GNN layers with a mechanism for multi-rate flow of message passing information across nodes of the underlying graph. Local gradients are harnessed to further modulate message passing updates. Our framework flexibly allows one to use any basic GNN layer as a wrapper around which the multi-rate gradient gating mechanism is built. We rigorously prove that G$^2$ alleviates the oversmoothing problem and allows the design of deep GNNs. Empirical results are presented to demonstrate that the proposed framework achieves state-of-the-art performance on a variety of graph learning tasks, including on large-scale heterophilic graphs.", "keywords": "GNNs;message-passing;oversmoothing;heterophilic graphs;multi-rate learning;gating;large graphs", "primary_area": "", "supplementary_material": "/attachment/0740061a414b8cbe27a41e3a7133735903b2d923.zip", "author": "T. Konstantin Rusch;Benjamin Paul Chamberlain;Michael W. Mahoney;Michael M. Bronstein;Siddhartha Mishra", "authorids": "~T._Konstantin_Rusch1;~Benjamin_Paul_Chamberlain1;~Michael_W._Mahoney1;~Michael_M._Bronstein1;~Siddhartha_Mishra1", "gender": ";M;;M;M", "homepage": "https://konstantinrusch.com;;;http://www.inf.usi.ch/bronstein/;http://www.sam.math.ethz.ch/", "dblp": "266/1519;;;07/2668;07/2856.html", "google_scholar": "9LajlSsAAAAJ;https://scholar.google.co.uk/citations?user=Tr8LSOEAAAAJ;;UU3N6-UAAAAJ;FmEqyNcAAAAJ", "orcid": ";;;;", "linkedin": ";;;mbronstein/;", "or_profile": "~T._Konstantin_Rusch1;~Benjamin_Paul_Chamberlain1;~Michael_W._Mahoney1;~Michael_M._Bronstein1;~Siddhartha_Mishra1", "aff": "Swiss Federal Institute of Technology;Twitter;;Twitter;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;twitter.com;;twitter.com;ethz.ch", "position": "PhD student;ML Researcher;;Head of Graph ML;Full Professor", "bibtex": "@inproceedings{\nrusch2023gradient,\ntitle={Gradient Gating for Deep Multi-Rate Learning on Graphs},\nauthor={T. Konstantin Rusch and Benjamin Paul Chamberlain and Michael W. Mahoney and Michael M. Bronstein and Siddhartha Mishra},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JpRExTbl1-}\n}", "github": "", "project": "", "reviewers": "f9D8;WoCb;8MQJ;uYz6;xxu8", "pdf_size": 569799, "recommendation": "3;6;6;8;8", "confidence": "3;4;4;4;3", "correctness": "2;3;3;3;3", "technical_novelty": "3;3;3;3;4", "empirical_novelty": "2;2;3;3;4", "wc_summary_paper": "23;63;71;64;33", "wc_strength_and_weaknesses": "152;371;346;90;93", "wc_clarity_quality_novelty_and_reproducibility": "26;22;106;452;27", "wc_summary_review": "34;21;47;40;33", "wc_review": "235;477;570;646;186", "wc_reply_reviewers": "0;0;32;39;97", "wc_reply_authors": "922;975;759;1166;464", "reply_reviewers": "0;0;1;1;1", "reply_authors": "2;2;2;3;2", "recommendation_avg": [ 6.2, 1.8330302779823362 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "correctness_avg": [ 2.8, 0.39999999999999997 ], "technical_novelty_avg": [ 3.2, 0.39999999999999997 ], "empirical_novelty_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_paper_avg": [ 50.8, 19.0829767070025 ], "wc_strength_and_weaknesses_avg": [ 210.4, 123.18214156280935 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 126.6, 165.70528054349987 ], "wc_summary_review_avg": [ 35.0, 8.602325267042627 ], "wc_review_avg": [ 422.8, 182.0806414751442 ], "wc_reply_reviewers_avg": [ 33.6, 35.52238730716166 ], "wc_reply_authors_avg": [ 857.2, 235.66196129201677 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.31180478223116176, "corr_recommendation_correctness": 0.8728715609439697, "gs_citation": 68, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13290851817379077939&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=JpRExTbl1-", "email": "ethz.ch;twitter.com;;twitter.com;ethz.ch", "author_num": 5, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Swiss Federal Institute of Technology;Twitter, Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://twitter.com", "aff_unique_abbr": "ETH Zurich;Twitter", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Switzerland;United States" }, { "title": "Implicit Bias in Leaky ReLU Networks Trained on High-Dimensional Data", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10729", "id": "JpbLyEI5EwW", "poster": "/media/PosterPDFs/ICLR%202023/10729.png?t=1682699253.925712", "openreview": "https://openreview.net/forum?id=JpbLyEI5EwW", "slides": "https://iclr.cc/virtual/2023/poster/10729", "video": "https://iclr.cc/virtual/2023/poster/10729", "author_site": "Spencer Frei, Gal Vardi, Peter Bartlett, Nathan Srebro, Wei Hu", "tldr": "", "abstract": "The implicit biases of gradient-based optimization algorithms are conjectured to be a major factor in the success of modern deep learning. In this work, we investigate the implicit bias of gradient flow and gradient descent in two-layer fully-connected neural networks with leaky ReLU activations when the training data are nearly-orthogonal, a common property of high-dimensional data. For gradient flow, we leverage recent work on the implicit bias for homogeneous neural networks to show that asymptotically, gradient flow produces a neural network with rank at most two. Moreover, this network is an $\\ell_2$-max-margin solution (in parameter space), and has a linear decision boundary that corresponds to an approximate-max-margin linear predictor. For gradient descent, provided the random initialization variance is small enough, we show that a single step of gradient descent suffices to drastically reduce the rank of the network, and that the rank remains small throughout training. We provide experiments which suggest that a small initialization scale is important for finding low-rank neural networks with gradient descent. ", "keywords": "implicit bias;gradient descent;gradient flow;neural networks", "primary_area": "", "supplementary_material": "", "author": "Spencer Frei;Gal Vardi;Peter Bartlett;Nathan Srebro;Wei Hu", "authorids": "~Spencer_Frei1;~Gal_Vardi1;~Peter_Bartlett1;~Nathan_Srebro1;~Wei_Hu1", "gender": "M;M;M;M;M", "homepage": "http://spencerfrei.github.io/;https://sites.google.com/view/galvardi/home;https://www.stat.berkeley.edu/~bartlett/;http://ttic.uchicago.edu/~nati/;https://weihu.me", "dblp": "250/2714;https://dblp.uni-trier.de/pid/167/9638.html;https://dblp.org/pers/hd/b/Bartlett:Peter_L=;50/3633;", "google_scholar": "c7N8SoEAAAAJ;https://scholar.google.co.il/citations?hl=en;yQNhFGUAAAAJ;https://scholar.google.com.tw/citations?user=ZnT-QpMAAAAJ;ZybgAqkAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Spencer_Frei1;~Gal_Vardi1;~Peter_Bartlett1;~Nathan_Srebro1;~Wei_Hu1", "aff": "University of California, Berkeley;Toyota Technological Institute at Chicago;University of California, Berkeley;University of Chicago;Google", "aff_domain": "berkeley.edu;ttic.edu;berkeley;uchicago.edu;google.com", "position": "Postdoc;Postdoc;Professor;Full Professor;Visiting researcher", "bibtex": "@inproceedings{\nfrei2023implicit,\ntitle={Implicit Bias in Leaky Re{LU} Networks Trained on High-Dimensional Data },\nauthor={Spencer Frei and Gal Vardi and Peter Bartlett and Nathan Srebro and Wei Hu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JpbLyEI5EwW}\n}", "github": "", "project": "", "reviewers": "kRaJ;ewuA;MUCK;BzfX", "pdf_size": 578875, "recommendation": "6;8;8;10", "confidence": "4;4;4;4", "correctness": "4;4;4;4", "technical_novelty": "3;2;3;4", "empirical_novelty": "0;3;3;2", "wc_summary_paper": "97;99;104;132", "wc_strength_and_weaknesses": "148;601;94;181", "wc_clarity_quality_novelty_and_reproducibility": "76;56;4;16", "wc_summary_review": "49;123;22;17", "wc_review": "370;879;224;346", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "111;750;195;275", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 8.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 108.0, 14.089002803605371 ], "wc_strength_and_weaknesses_avg": [ 256.0, 201.59241057143 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.0, 29.189039038652847 ], "wc_summary_review_avg": [ 52.75, 42.346044679521135 ], "wc_review_avg": [ 454.75, 251.11887125423291 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 332.75, 247.78052284229284 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5199896990708518685&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=JpbLyEI5EwW", "email": "berkeley.edu;ttic.edu;berkeley;uchicago.edu;google.com", "author_num": 5, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "University of California, Berkeley;Toyota Technological Institute at Chicago;University of Chicago;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://www.berkeley.edu;https://www.tti-chicago.org;https://www.uchicago.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;TTI Chicago;UChicago;Google", "aff_campus_unique_index": "0;1;0;3", "aff_campus_unique": "Berkeley;Chicago;;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Jpctg2jSnMA", "title": "A Scalable Training Strategy for Blind Multi-Distribution Noise Removal", "track": "main", "status": "Withdraw", "tldr": "A Scalable Training Strategy for Blind Multi-Distribution Noise Removal", "abstract": "Despite recent advances, developing general-purpose universal denoising and artifact-removal networks remains largely an open problem: Given fixed network weights, one inherently trades-off specialization at one task (e.g., removing Poisson noise) for performance at another (e.g., removing speckle noise). In addition, training such a network is challenging due to the curse of dimensionality: As one increases the dimensions of the specification-space (i.e., the number of parameters needed to describe the noise distribution) the number of unique specifications one needs to train for grows exponentially. Uniformly sampling this space will result in a network that does well at very challenging problem specifications but poorly at easy problem specifications, where even large errors will have a small effect on the overall mean-squared-error. \nIn this work we propose training denoising networks using an adaptive-sampling strategy. Our work improves upon a recent universal denoiser training strategy by extending the results to higher dimensions and by incorporating a polynomial approximation of the true specification-loss landscape. We test our method on joint Poisson-Gaussian-speckle noise and demonstrate that, with our training strategy, a single trained generalist denoiser network can achieve mean-squared-errors within a relatively uniform bound of specialized denoiser networks across a large range of operating conditions.", "keywords": "denoising;image restoration;curriculum learning", "primary_area": "", "supplementary_material": "", "author": "Kevin Zhang;Sakshum Kulshrestha;Christopher Metzler", "authorids": "~Kevin_Zhang3;sakshum2001@gmail.com;~Christopher_Metzler1", "gender": "M;;M", "homepage": "https://kevinwzhang.com;;https://www.cs.umd.edu/~metzler/", "dblp": ";;147/4828", "google_scholar": "Sm16yZ4AAAAJ;;on7GFpYAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Kevin_Zhang3;sakshum2001@gmail.com;~Christopher_Metzler1", "aff": "University of Maryland, College Park;;University of Maryland, College Park", "aff_domain": "umd.edu;;umd.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@misc{\nzhang2023a,\ntitle={A Scalable Training Strategy for Blind Multi-Distribution Noise Removal},\nauthor={Kevin Zhang and Sakshum Kulshrestha and Christopher Metzler},\nyear={2023},\nurl={https://openreview.net/forum?id=Jpctg2jSnMA}\n}", "github": "", "project": "", "reviewers": "b5LB;SyNz;pjxY;tV2R", "site": "https://openreview.net/forum?id=Jpctg2jSnMA", "pdf_size": 4820507, "recommendation": "3;3;5;5", "confidence": "4;3;1;3", "correctness": "3;2;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "83;44;17;50", "wc_strength_and_weaknesses": "318;192;127;104", "wc_clarity_quality_novelty_and_reproducibility": "11;25;12;49", "wc_summary_review": "11;36;32;23", "wc_review": "423;297;188;226", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "619;380;177;142", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 48.5, 23.47871376374779 ], "wc_strength_and_weaknesses_avg": [ 185.25, 83.16061267210577 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 24.25, 15.31951369985353 ], "wc_summary_review_avg": [ 25.5, 9.604686356149273 ], "wc_review_avg": [ 283.5, 89.53909760545949 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 329.5, 190.2452364712452 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.6882472016116854, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8284620068000394297&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "Jqas82UP428", "title": "Lower Bounds for Differentially Private ERM: Unconstrained and Non-Euclidean", "track": "main", "status": "Reject", "tldr": "", "abstract": "We consider the lower bounds of differentially private empirical risk minimization (DP-ERM) for convex functions in both constrained and unconstrained cases concerning the general $\\ell_p$ norm beyond the $\\ell_2$ norm considered by most of the previous works.\n\nWe provide a simple black-box reduction approach that can generalize lower bounds in constrained to unconstrained cases.\nMoreover, for $(\\epsilon,\\delta)$-DP, we achieve the optimal $\\Omega(\\frac{\\sqrt{d \\log(1/\\delta)}}{\\epsilon n})$ lower bounds for both constrained and unconstrained cases and any $\\ell_p$ geometry where $p\\geq 1$ by considering $\\ell_1$ loss over the $\\ell_{\\infty}$ ball.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Zhou Lu;Daogao Liu", "authorids": "~Zhou_Lu1;~Daogao_Liu1", "gender": ";M", "homepage": "https://leozoroaster.github.io/;https://daogaoliu.github.io/", "dblp": "68/11524;245/4078", "google_scholar": "17_nX_kAAAAJ;auA3AaQAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Zhou_Lu1;~Daogao_Liu1", "aff": "Princeton University;University of Washington, Seattle", "aff_domain": "princeton.edu;uw.edu", "position": "PhD student;PhD student", "bibtex": "@misc{\nlu2023lower,\ntitle={Lower Bounds for Differentially Private {ERM}: Unconstrained and Non-Euclidean},\nauthor={Zhou Lu and Daogao Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=Jqas82UP428}\n}", "github": "", "project": "", "reviewers": "hdk6;kgZa;9oeX", "site": "https://openreview.net/forum?id=Jqas82UP428", "pdf_size": 362780, "recommendation": "5;5;5", "confidence": "4;2;4", "correctness": "3;3;4", "technical_novelty": "2;3;2", "empirical_novelty": "0;0;0", "wc_summary_paper": "92;77;64", "wc_strength_and_weaknesses": "165;76;210", "wc_clarity_quality_novelty_and_reproducibility": "92;77;2", "wc_summary_review": "30;2;47", "wc_review": "379;232;323", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "94;278;368", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 77.66666666666667, 11.440668201153676 ], "wc_strength_and_weaknesses_avg": [ 150.33333333333334, 55.67963920700476 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.0, 39.370039370059054 ], "wc_summary_review_avg": [ 26.333333333333332, 18.55322673343433 ], "wc_review_avg": [ 311.3333333333333, 60.576856600153455 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 246.66666666666666, 114.03313358649562 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13514524214868719901&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Princeton University;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;https://www.washington.edu", "aff_unique_abbr": "Princeton;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "JrVIWD81Z0u", "title": "FedProp: Cross-client Label Propagation for Federated Semi-supervised Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Federated learning (FL) allows multiple clients to jointly train a machine learning model in such a way that no client has to share their data with any other participating party. In the supervised setting, where all client data is fully labeled, FL has been widely adopted for learning tasks that require data privacy. However, it is an ongoing research question how to best perform federated learning in a semi-supervised setting, where the clients possess data that is only partially labeled or even completely unlabeled. In this work, we propose a new method, FedProp, that follows a manifold-based approach to semi-supervised learning (SSL). It estimates the data manifold jointly from the data of multiple clients and computes pseudo-labels using cross-client label propagation. To avoid that clients have to share their data with anyone, FedProp employs two cryptographically secure yet highly efficient protocols: multi-party Hamming distance computation and secure aggregation. Experiments on three standard benchmarks show that FedProp achieves higher classification accuracy than previous federated SSL methods. Furthermore, as a pseudo-label-based technique, FedProp is complementary to other federated SSL approaches, in particular consistency-based ones. We demonstrate experimentally that further accuracy gains are possible by combining both.", "keywords": "federated learning;semi-supervised learning;label propagation;cryptographically secure computation", "primary_area": "", "supplementary_material": "", "author": "Jonathan Scott;Michelle Yeo;Christoph H Lampert", "authorids": "~Jonathan_Scott1;~Michelle_Yeo1;~Christoph_H_Lampert1", "gender": ";F;M", "homepage": ";;http://cvml.ist.ac.at/", "dblp": "35/4737;205/2556.html;67/2136", "google_scholar": ";;https://scholar.google.at/citations?user=iCf3SwgAAAAJ", "orcid": ";;0000-0001-8622-7887", "linkedin": "jonny-scott-71b245103/;;", "or_profile": "~Jonathan_Scott1;~Michelle_Yeo1;~Christoph_H_Lampert1", "aff": "Institute of Science and Technology;Institute of Science and Technology;Institute of Science and Technology Austria", "aff_domain": "ist.ac.at;ist.ac.at;ist.ac.at", "position": "PhD student;PhD student;Professor", "bibtex": "@misc{\nscott2023fedprop,\ntitle={FedProp: Cross-client Label Propagation for Federated Semi-supervised Learning},\nauthor={Jonathan Scott and Michelle Yeo and Christoph H Lampert},\nyear={2023},\nurl={https://openreview.net/forum?id=JrVIWD81Z0u}\n}", "github": "", "project": "", "reviewers": "8bDJ;ZBdp;G4t3;jCXS", "site": "https://openreview.net/forum?id=JrVIWD81Z0u", "pdf_size": 348300, "recommendation": "3;3;5;6", "confidence": "5;3;4;4", "correctness": "2;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "0;3;2;2", "wc_summary_paper": "83;75;224;36", "wc_strength_and_weaknesses": "33;235;145;269", "wc_clarity_quality_novelty_and_reproducibility": "27;20;66;61", "wc_summary_review": "352;19;83;34", "wc_review": "495;349;518;400", "wc_reply_reviewers": "0;0;0;115", "wc_reply_authors": "1024;650;861;1048", "reply_reviewers": "0;0;0;1", "reply_authors": "2;1;2;2", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 104.5, 71.24780698379425 ], "wc_strength_and_weaknesses_avg": [ 170.5, 91.404321560854 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.5, 20.22992832414391 ], "wc_summary_review_avg": [ 122.0, 134.88328287819806 ], "wc_review_avg": [ 440.5, 68.90029027515051 ], "wc_reply_reviewers_avg": [ 28.75, 49.79646071760522 ], "wc_reply_authors_avg": [ 895.75, 159.08232931410075 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14634029018078505180&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "Institute of Science and Technology;Institute of Science and Technology Austria", "aff_unique_dep": ";", "aff_unique_url": ";https://www.ist.ac.at", "aff_unique_abbr": ";IST Austria", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";Austria" }, { "title": "Token Merging: Your ViT But Faster", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12173", "id": "JroZRaRw7Eu", "poster": "", "openreview": "https://openreview.net/forum?id=JroZRaRw7Eu", "slides": "https://iclr.cc/virtual/2023/poster/12173", "video": "https://iclr.cc/virtual/2023/poster/12173", "author_site": "Daniel Bolya, Cheng-Yang Fu, Xiaoliang Dai, Peizhao Zhang, Christoph Feichtenhofer, Judy Hoffman", "tldr": "We merge tokens in a ViT at runtime using a fast custom matching algorithm. Our method, ToMe, can increase training and inference speed, lower training memory, and can be applied with and without training.", "abstract": "We introduce Token Merging (ToMe), a simple method to increase the throughput of existing ViT models without needing to train. ToMe gradually combines similar tokens in a transformer using a general and light-weight matching algorithm that is as fast as pruning while being more accurate. Off-the-shelf, ToMe can 2x the throughput of state-of-the-art ViT-L @ 512 and ViT-H @ 518 models on images and 2.2x the throughput of ViT-L on video with only a 0.2-0.3% accuracy drop in each case. ToMe can also easily be applied during training, improving in practice training speed up to 2x for MAE fine-tuning on video. Training with ToMe further minimizes accuracy drop, leading to 2x the throughput of ViT-B on audio for only a 0.4% mAP drop. Qualitatively, we find that ToMe merges object parts into one token, even over multiple frames of video. Overall, ToMe\u2019s accuracy and speed are competitive with state-of-the-art on images, video, and audio.", "keywords": "token merging;token pruning;inference speed;training speed;throughput;off-the-shelf;fine tuning", "primary_area": "", "supplementary_material": "", "author": "Daniel Bolya;Cheng-Yang Fu;Xiaoliang Dai;Peizhao Zhang;Christoph Feichtenhofer;Judy Hoffman", "authorids": "~Daniel_Bolya1;~Cheng-Yang_Fu1;~Xiaoliang_Dai1;~Peizhao_Zhang1;~Christoph_Feichtenhofer4;~Judy_Hoffman1", "gender": "M;M;M;M;M;F", "homepage": "https://dbolya.github.io;http://www.cs.unc.edu/~cyfu/;;;http://feichtenhofer.github.io/;https://www.cc.gatech.edu/~judy/", "dblp": "239/4186.html;22/7453;192/3904;23/8011.html;127/1937;45/10336", "google_scholar": "K3ht_ZUAAAAJ;IYDJuOAAAAAJ;u4olrOcAAAAJ;eqQQkM4AAAAJ;UxuqG1EAAAAJ;mqpjAt4AAAAJ", "orcid": ";;;;;", "linkedin": "daniel-bolya-060398130/;;;;christoph-feichtenhofer-549433a1;", "or_profile": "~Daniel_Bolya1;~Cheng-Yang_Fu1;~Xiaoliang_Dai1;~Peizhao_Zhang1;~Christoph_Feichtenhofer4;~Judy_Hoffman1", "aff": "Georgia Institute of Technology;Meta Facebook;Meta Facebook;Meta;Meta FAIR;Georgia Institute of Technology", "aff_domain": "gatech.edu;fb.com;fb.com;meta.com;meta.com;gatech.edu", "position": "PhD student;Research Scientist;Research Scientist;Research Scientist;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nbolya2023token,\ntitle={Token Merging: Your ViT But Faster},\nauthor={Daniel Bolya and Cheng-Yang Fu and Xiaoliang Dai and Peizhao Zhang and Christoph Feichtenhofer and Judy Hoffman},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JroZRaRw7Eu}\n}", "github": "", "project": "", "reviewers": "Gshq;eArF;MRoC;vh1A", "pdf_size": 9106572, "recommendation": "6;8;8;10", "confidence": "4;4;4;4", "correctness": "3;3;4;4", "technical_novelty": "3;3;3;4", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "114;64;50;80", "wc_strength_and_weaknesses": "315;164;131;192", "wc_clarity_quality_novelty_and_reproducibility": "15;76;37;45", "wc_summary_review": "30;26;51;86", "wc_review": "474;330;269;403", "wc_reply_reviewers": "0;0;20;55", "wc_reply_authors": "569;364;429;549", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 8.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 77.0, 23.853720883753127 ], "wc_strength_and_weaknesses_avg": [ 200.5, 69.54315207121402 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.25, 21.867498713844707 ], "wc_summary_review_avg": [ 48.25, 23.7736724129866 ], "wc_review_avg": [ 369.0, 76.97726937219844 ], "wc_reply_reviewers_avg": [ 18.75, 22.465250944514285 ], "wc_reply_authors_avg": [ 477.75, 84.73303665041162 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.7071067811865476, "gs_citation": 505, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1693652227952946120&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=JroZRaRw7Eu", "email": "gatech.edu;fb.com;fb.com;meta.com;meta.com;gatech.edu", "author_num": 6, "aff_unique_index": "0;1;1;1;1;0", "aff_unique_norm": "Georgia Institute of Technology;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.gatech.edu;https://meta.com", "aff_unique_abbr": "Georgia Tech;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Js4vqB4bWVh", "title": "Decentralized Online Bandit Optimization on Directed Graphs with Regret Bounds", "track": "main", "status": "Reject", "tldr": "", "abstract": "We consider a decentralized multiplayer game, played over $T$ rounds, with a leader-follower hierarchy described by a directed acyclic graph. For each round, the graph structure dictates the order of the players and how players observe the actions of one another. By the end of each round, all players receive a joint bandit-reward based on their joint action that is used to update the player strategies towards the goal of minimizing the joint pseudo-regret. We present a learning algorithm inspired by the single-player multi-armed bandit problem and show that it achieves sub-linear joint pseudo-regret in the number of rounds for both adversarial and stochastic bandit rewards. Furthermore, we quantify the cost incurred due to the decentralized nature of our problem compared to the centralized setting. ", "keywords": "Bandit optimization;multi-agent learning;decentralized learning;joint bandit-rewards", "primary_area": "", "supplementary_material": "", "author": "Johan \u00d6stman;Ather Gattami;Daniel Gillblad", "authorids": "~Johan_\u00d6stman1;~Ather_Gattami1;~Daniel_Gillblad1", "gender": "M;M;", "homepage": ";https://www.ai.se;", "dblp": "119/7992;;48/5973", "google_scholar": "sIJo_uAAAAAJ;https://scholar.google.se/citations?hl=sv;", "orcid": "0000-0003-4138-0508;;", "linkedin": "joestman/;ather-gattami-7935012/;", "or_profile": "~Johan_\u00d6stman1;~Ather_Gattami1;~Daniel_Gillblad1", "aff": "AI Sweden;AI Sweden;AI Sweden", "aff_domain": "ai.se;ai.se;ai.se", "position": "Researcher;Principal Researcher;co-Director", "bibtex": "@misc{\n{\\\"o}stman2023decentralized,\ntitle={Decentralized Online Bandit Optimization on Directed Graphs with Regret Bounds},\nauthor={Johan {\\\"O}stman and Ather Gattami and Daniel Gillblad},\nyear={2023},\nurl={https://openreview.net/forum?id=Js4vqB4bWVh}\n}", "github": "", "project": "", "reviewers": "fbii;8PKf;uQZP;ibWi", "site": "https://openreview.net/forum?id=Js4vqB4bWVh", "pdf_size": 393589, "recommendation": "3;3;6;8", "confidence": "2;3;3;4", "correctness": "3;3;4;4", "technical_novelty": "2;2;4;4", "empirical_novelty": "0;2;2;3", "wc_summary_paper": "80;43;39;191", "wc_strength_and_weaknesses": "160;206;59;158", "wc_clarity_quality_novelty_and_reproducibility": "253;29;10;58", "wc_summary_review": "44;25;61;38", "wc_review": "537;303;169;445", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 1.0 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 88.25, 61.43848549565655 ], "wc_strength_and_weaknesses_avg": [ 145.75, 53.63942113781617 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 87.5, 97.0682749408889 ], "wc_summary_review_avg": [ 42.0, 12.942179105544785 ], "wc_review_avg": [ 363.5, 139.85260097688567 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.8333333333333334, "corr_recommendation_correctness": 0.9428090415820635, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ClfIWUDTFewJ:scholar.google.com/&scioq=Decentralized+Online+Bandit+Optimization+on+Directed+Graphs+with+Regret+Bounds&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "AI Sweden", "aff_unique_dep": "", "aff_unique_url": "https://www.aisweden.org", "aff_unique_abbr": "AI Sweden", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Sweden" }, { "id": "JsrvkgM8gO2", "title": "Large Learning Rate Matters for Non-Convex Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "When training neural networks, it has been widely observed that a large step size is essential in stochastic gradient descent (SGD) for obtaining superior models. However, the effect of large step sizes on the success of SGD is not well understood theoretically. \nSeveral previous works have attributed this success to the stochastic noise present in SGD. However, we show through a novel set of experiments that the stochastic noise is not sufficient to explain good non-convex training, and that instead the effect of a large learning rate itself is essential for obtaining best performance.\nWe demonstrate the same effects also in the noise-less case, i.e. for full-batch GD. We formally prove that GD with large step size---on certain non-convex function classes---follows a different trajectory than GD with a small step size, which can lead to convergence to a global minimum instead of a local one. \nFinally, we also demonstrate the difference in trajectories for small and large learning rates for real neural networks, again observing that large learning rates allow escaping from a local minimum, confirming this behavior is indeed relevant in practice.", "keywords": "large learning rates;GD;SGD;non-convex optimization", "primary_area": "", "supplementary_material": "/attachment/07e76f215ca4bf2fdf91ad8e5a80607d0a5d8bfb.zip", "author": "Amirkeivan Mohtashami;Martin Jaggi;Sebastian U Stich", "authorids": "~Amirkeivan_Mohtashami1;~Martin_Jaggi1;~Sebastian_U_Stich1", "gender": "M;M;M", "homepage": ";https://mlo.epfl.ch;https://www.sstich.ch", "dblp": "271/7873;17/4402;04/10549", "google_scholar": "YT1udC0AAAAJ;https://scholar.google.ch/citations?user=r1TJBr8AAAAJ;https://scholar.google.ch/citations?user=8l-mDfQAAAAJ", "orcid": ";0000-0003-1579-5558;", "linkedin": ";;", "or_profile": "~Amirkeivan_Mohtashami1;~Martin_Jaggi1;~Sebastian_U_Stich1", "aff": "Swiss Federal Institute of Technology Lausanne;EPFL;CISPA Helmholtz Center for Information Security", "aff_domain": "epfl.ch;epfl.ch;cispa.de", "position": "PhD student;Associate Professor;Tenure Track Faculty", "bibtex": "@misc{\nmohtashami2023large,\ntitle={Large Learning Rate Matters for Non-Convex Optimization},\nauthor={Amirkeivan Mohtashami and Martin Jaggi and Sebastian U Stich},\nyear={2023},\nurl={https://openreview.net/forum?id=JsrvkgM8gO2}\n}", "github": "", "project": "", "reviewers": "juk2;hE2g;opfE", "site": "https://openreview.net/forum?id=JsrvkgM8gO2", "pdf_size": 3953344, "recommendation": "3;5;6", "confidence": "4;4;2", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "1;3;2", "wc_summary_paper": "79;115;69", "wc_strength_and_weaknesses": "257;522;66", "wc_clarity_quality_novelty_and_reproducibility": "91;12;23", "wc_summary_review": "95;25;69", "wc_review": "522;674;227", "wc_reply_reviewers": "38;149;0", "wc_reply_authors": "941;1523;160", "reply_reviewers": "1;1;0", "reply_authors": "3;4;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 87.66666666666667, 19.754043186705406 ], "wc_strength_and_weaknesses_avg": [ 281.6666666666667, 186.976528532921 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.0, 34.93804039534368 ], "wc_summary_review_avg": [ 63.0, 28.890598240027266 ], "wc_review_avg": [ 474.3333333333333, 185.5735852850711 ], "wc_reply_reviewers_avg": [ 62.333333333333336, 63.21568019267231 ], "wc_reply_authors_avg": [ 874.6666666666666, 558.415814803111 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.7559289460184546, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:z0TC7-CZ8a8J:scholar.google.com/&scioq=Large+Learning+Rate+Matters+for+Non-Convex+Optimization&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;EPFL;CISPA Helmholtz Center for Information Security", "aff_unique_dep": ";;", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch;https://www.cispa.de/", "aff_unique_abbr": "EPFL;EPFL;CISPA", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Switzerland;Germany" }, { "title": "Human-level Atari 200x faster", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12259", "id": "JtC6yOHRoJJ", "poster": "", "openreview": "https://openreview.net/forum?id=JtC6yOHRoJJ", "slides": "https://iclr.cc/virtual/2023/poster/12259", "video": "https://iclr.cc/virtual/2023/poster/12259", "author_site": "Steven Kapturowski, V\u00edctor Campos, Ray Jiang, Nemanja Rakicevic, Hado van Hasselt, Charles Blundell, Adria Puigdomenech Badia", "tldr": "We propose an RL agent 'MEME' that achieves human-level performance on all 57 Atari games within 390M environment frames, only 1/200 of the experience required by Agent57.", "abstract": "The task of building general agents that perform well over a wide range of tasks has been an important goal in reinforcement learning since its inception. The problem has been subject of research of a large body of work, with performance frequently measured by observing scores over the wide range of environments contained in the Atari 57 benchmark. Agent57 was the first agent to surpass the human benchmark on all 57 games, but this came at the cost of poor data-efficiency, requiring nearly 80 billion frames of experience to achieve. Taking Agent57 as a starting point, we employ a diverse set of strategies to achieve a 200-fold reduction of experience needed to outperform the human baseline, within our novel agent MEME. We investigate a range of instabilities and bottlenecks we encountered while reducing the data regime, and propose effective solutions to build a more robust and efficient agent. We also demonstrate competitive performance with high-performing methods such as Muesli and MuZero. Our contributions aim to achieve faster propagation of learning signals related to rare events, stabilize learning under differing value scales, improve the neural network architecture, and make updates more robust under a rapidly-changing policy.", "keywords": "Reinforcement Learning;Data-efficiency;Exploration;Off-policy", "primary_area": "", "supplementary_material": "", "author": "Steven Kapturowski;V\u00edctor Campos;Ray Jiang;Nemanja Rakicevic;Hado van Hasselt;Charles Blundell;Adria Puigdomenech Badia", "authorids": "~Steven_Kapturowski1;~V\u00edctor_Campos1;~Ray_Jiang1;~Nemanja_Rakicevic1;~Hado_van_Hasselt1;~Charles_Blundell1;~Adria_Puigdomenech_Badia2", "gender": ";M;;M;M;;Unspecified", "homepage": ";https://imatge.upc.edu/web/people/victor-campos;;http://nemanja-rakicevic.github.io/;http://hadovanhasselt.com;http://www.gatsby.ucl.ac.uk/~ucgtcbl/;https://scholar.google.co.uk/citations?user=DcWRJW4AAAAJ", "dblp": ";98/8044;217/3543;172/7923;https://dblp.uni-trier.de/pers/h/Hasselt:Hado_van.html;35/8396;", "google_scholar": ";8fzVqSkAAAAJ;;5Jnn-YoAAAAJ;;https://scholar.google.co.uk/citations?user=f31mvPsAAAAJ;", "orcid": ";http://orcid.org/0000-0001-5260-869X;;0000-0003-3323-2193;;;", "linkedin": "stevenkapturowski/;;;;;;", "or_profile": "~Steven_Kapturowski1;~V\u00edctor_Campos1;~Ray_Jiang1;~Nemanja_Rakicevic1;~Hado_van_Hasselt1;~Charles_Blundell1;~Adria_Puigdomenech_Badia1", "aff": "Google DeepMind;Google DeepMind;Google;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind", "aff_domain": "deepmind.com;deepmind.com;google.com;google.com;google.com;google.com;deepmind.com", "position": "Staff Research Engineer;Researcher;Research Scientist;Researcher;Research scientist;Research Scientist;Research Engineer", "bibtex": "@inproceedings{\nkapturowski2023humanlevel,\ntitle={Human-level Atari 200x faster},\nauthor={Steven Kapturowski and V{\\'\\i}ctor Campos and Ray Jiang and Nemanja Rakicevic and Hado van Hasselt and Charles Blundell and Adria Puigdomenech Badia},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JtC6yOHRoJJ}\n}", "github": "", "project": "", "reviewers": "SNik;tLk9;zSus", "pdf_size": 9787578, "recommendation": "3;8;8", "confidence": "5;4;3", "correctness": "2;4;4", "technical_novelty": "1;3;3", "empirical_novelty": "2;3;4", "wc_summary_paper": "73;54;62", "wc_strength_and_weaknesses": "152;243;126", "wc_clarity_quality_novelty_and_reproducibility": "24;65;22", "wc_summary_review": "46;22;37", "wc_review": "295;384;247", "wc_reply_reviewers": "64;0;0", "wc_reply_authors": "388;404;41", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.333333333333333, 2.357022603955158 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "technical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 63.0, 7.788880963698615 ], "wc_strength_and_weaknesses_avg": [ 173.66666666666666, 50.16195991209098 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.0, 19.8158185969358 ], "wc_summary_review_avg": [ 35.0, 9.899494936611665 ], "wc_review_avg": [ 308.6666666666667, 56.75874871379351 ], "wc_reply_reviewers_avg": [ 21.333333333333332, 30.169889330626027 ], "wc_reply_authors_avg": [ 277.6666666666667, 167.47603476982079 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": 1.0, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2432824289873812298&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=JtC6yOHRoJJ", "email": "deepmind.com;deepmind.com;google.com;google.com;google.com;google.com;deepmind.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "United Kingdom;United States" }, { "id": "JunUr1y3Wa6", "title": "Pruning by Active Attention Manipulation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Structured pruning of a CNN is typically achieved by applying discrete masks on the CNN's filter weights or activation maps, post-training. Here, we present a new filter-importance-scoring concept named pruning by active attention manipulation (PAAM), that sparsifies the CNN's set of filters through a particular attention mechanism, during-training. PAAM learns continuous filter scores from the filter weights by optimizing a cost function regularized by an additive term in the scores. As the filters are not independent, we use attention to dynamically learn their correlations. Moreover, by training the pruning scores of all layers simultaneously, PAAM can account for layer inter-dependencies, which is essential to finding a performant sparse sub-network. PAAM can also train and generate a pruned network from scratch in a straightforward, one-stage training process without requiring a pre-trained network. Finally, PAAM does not need layer-specific hyperparameters and pre-defined layer budgets, since it can implicitly determine the appropriate number of filters in each layer. Our experimental results on different network architectures suggest that PAAM outperforms state-of-the-art structured-pruning methods (SOTA). On CIFAR-10 dataset, without requiring a pre-trained baseline network, we obtain 1.02% and 1.19% accuracy gain and 52.3% and 54% parameters reduction, on ResNet56 and ResNet110, respectively. Similarly, on the ImageNet dataset, PAAM achieves 1.06% accuracy gain while pruning 51.1% of the parameters on ResNet50. For Cifar-10, this is better than the SOTA with a margin of 9.5% and 6.6%, respectively, and on ImageNet with a margin of 11%.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/17db58a0f9810c00f7d70f750630a3fd55600902.zip", "author": "Zahra Babaiee;Lucas Liebenwein;Ramin Hasani;Daniela Rus;Radu Grosu", "authorids": "~Zahra_Babaiee1;~Lucas_Liebenwein1;~Ramin_Hasani1;~Daniela_Rus1;~Radu_Grosu1", "gender": "F;M;F;M;M", "homepage": "https://informatics.tuwien.ac.at/people/zahra-babaiee;http://lucasliebenwein.com;https://www.csail.mit.edu/person/daniela-rus;https://ti.tuwien.ac.at/cps/people/grosu;http://www.raminhasani.com", "dblp": ";205/2692;r/DanielaRus;94/5421;190/3168", "google_scholar": ";e7ab8u4AAAAJ;https://scholar.google.com/citations?hl=en;1g_muAgAAAAJ;https://scholar.google.at/citations?user=YarJF3QAAAAJ", "orcid": ";0000-0002-3229-6665;;0000-0001-5715-2142;0000-0002-9889-5222", "linkedin": "zahra-babaiee-5b4ba314b;lucas-liebenwein/;;;raminhasani/", "or_profile": "~Zahra_Babaiee1;~Lucas_Liebenwein1;~Daniela_Rus1;~Radu_Grosu1;~Ramin_M._Hasani1", "aff": "TU Wien Vienna University of Technology;NVIDIA;Massachusetts Institute of Technology;TU Wien Vienna University of Technology;Massachusetts Institute of Technology", "aff_domain": "tuwien.ac.at;nvidia.com;mit.edu;tuwien.ac.at;mit.edu", "position": "PhD student;Researcher;Full Professor;Full Professor;Researcher", "bibtex": "@misc{\nbabaiee2023pruning,\ntitle={Pruning by Active Attention Manipulation},\nauthor={Zahra Babaiee and Lucas Liebenwein and Ramin Hasani and Daniela Rus and Radu Grosu},\nyear={2023},\nurl={https://openreview.net/forum?id=JunUr1y3Wa6}\n}", "github": "", "project": "", "reviewers": "12a5;wYHx;rXzi", "site": "https://openreview.net/forum?id=JunUr1y3Wa6", "pdf_size": 609876, "recommendation": "3;6;8", "confidence": "4;3;4", "correctness": "2;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;3;3", "wc_summary_paper": "32;72;79", "wc_strength_and_weaknesses": "123;353;280", "wc_clarity_quality_novelty_and_reproducibility": "26;72;66", "wc_summary_review": "39;50;97", "wc_review": "220;547;522", "wc_reply_reviewers": "0;0;550", "wc_reply_authors": "385;672;2464", "reply_reviewers": "0;0;6", "reply_authors": "1;1;10", "recommendation_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 61.0, 20.704266871026046 ], "wc_strength_and_weaknesses_avg": [ 252.0, 95.96179795453327 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.666666666666664, 20.417857108151406 ], "wc_summary_review_avg": [ 62.0, 25.152865973217974 ], "wc_review_avg": [ 429.6666666666667, 148.6076115891182 ], "wc_reply_reviewers_avg": [ 183.33333333333334, 259.2724864350674 ], "wc_reply_authors_avg": [ 1173.6666666666667, 919.8957670422352 ], "reply_reviewers_avg": [ 2.0, 2.8284271247461903 ], "reply_authors_avg": [ 4.0, 4.242640687119285 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.1147078669352809, "corr_recommendation_correctness": 0.9176629354822472, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SJaHGTdDMacJ:scholar.google.com/&scioq=Pruning+by+Active+Attention+Manipulation&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "Vienna University of Technology;NVIDIA;Massachusetts Institute of Technology", "aff_unique_dep": ";NVIDIA Corporation;", "aff_unique_url": "https://www.tuwien.ac.at;https://www.nvidia.com;https://web.mit.edu", "aff_unique_abbr": "TU Wien;NVIDIA;MIT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Vienna;", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "Austria;United States" }, { "id": "Jw5ivmKS2C", "title": "Posthoc Privacy guarantees for neural network queries", "track": "main", "status": "Reject", "tldr": "We present a framework for achieving formal privacy guarantees in adversarially trained ML models", "abstract": "Cloud based machine learning inference is an emerging paradigm where users share their data with a service provider. Due to increased concerns over data privacy, recent works have proposed using Adversarial Representation Learning (ARL) to learn a privacy-preserving encoding of sensitive user data before it is shared with an untrusted service provider. Traditionally, the privacy of these encodings is evaluated empirically as they lack formal guarantees. In this work, we develop a new framework that provides formal privacy guarantees for an arbitrarily trained neural network by linking its local Lipschitz constant with its local sensitivity. To utilize local sensitivity for guaranteeing privacy, we extend the Propose-Test-Release(PTR) framework to make it tractable for neural network based queries. We verify the efficacy of our framework experimentally on real-world datasets and elucidate the role of ARL in improving the privacy-utility tradeoff.", "keywords": "data privacy;differential privacy;privacy preserving machine learning;adversarial learning", "primary_area": "", "supplementary_material": "", "author": "Abhishek Singh;Praneeth Vepakomma;Vivek Sharma;Ramesh Raskar", "authorids": "~Abhishek_Singh5;~Praneeth_Vepakomma2;~Vivek_Sharma1;~Ramesh_Raskar2", "gender": "M;;M;M", "homepage": "https://tremblerz.github.io/;https://praneeth.mit.edu/;https://vivoutlaw.github.io/;https://www.media.mit.edu/people/raskar/overview/", "dblp": "27/2328-5;131/6694;;r/RameshRaskar", "google_scholar": "https://scholar.google.co.in/citations?user=3QygpzAAAAAJ;T_mPgZIAAAAJ;fNbVXwQAAAAJ;", "orcid": "0000-0003-0217-9801;;;0000-0002-3254-3224", "linkedin": "tremblerz/;;vivoutlaw/;", "or_profile": "~Abhishek_Singh5;~Praneeth_Vepakomma2;~Vivek_Sharma1;~Ramesh_Raskar1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Sony Research;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;sony.com;mit.edu", "position": "PhD student;PhD student;Senior Research Scientist;Associate Professor", "bibtex": "@misc{\nsingh2023posthoc,\ntitle={Posthoc Privacy guarantees for neural network queries},\nauthor={Abhishek Singh and Praneeth Vepakomma and Vivek Sharma and Ramesh Raskar},\nyear={2023},\nurl={https://openreview.net/forum?id=Jw5ivmKS2C}\n}", "github": "", "project": "", "reviewers": "yTGh;KPwN;82bi", "site": "https://openreview.net/forum?id=Jw5ivmKS2C", "pdf_size": 686350, "recommendation": "3;6;6", "confidence": "3;4;4", "correctness": "3;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;3;2", "wc_summary_paper": "58;49;86", "wc_strength_and_weaknesses": "78;250;100", "wc_clarity_quality_novelty_and_reproducibility": "109;13;23", "wc_summary_review": "57;64;10", "wc_review": "302;376;219", "wc_reply_reviewers": "316;0;0", "wc_reply_authors": "233;1013;286", "reply_reviewers": "1;0;0", "reply_authors": "2;3;2", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 64.33333333333333, 15.755069730795297 ], "wc_strength_and_weaknesses_avg": [ 142.66666666666666, 76.42570829824798 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.333333333333336, 43.091633629846164 ], "wc_summary_review_avg": [ 43.666666666666664, 23.976840677805924 ], "wc_review_avg": [ 299.0, 64.13007614736371 ], "wc_reply_reviewers_avg": [ 105.33333333333333, 148.963828569966 ], "wc_reply_authors_avg": [ 510.6666666666667, 355.86170847051744 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16418050976150516067&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Sony", "aff_unique_dep": ";Research", "aff_unique_url": "https://web.mit.edu;https://www.sony.com", "aff_unique_abbr": "MIT;Sony", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Japan" }, { "id": "JwUmXwqXhr", "title": "Chasing Better Deep Image Priors Between Over- and Under-parameterization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep Neural Networks (DNNs) are well-known to act as over-parameterized deep image priors (DIP) that regularize various image inverse problems. Meanwhile, researchers also proposed extremely compact, under-parameterized image priors (e.g., deep decoder) that are strikingly competent for image restoration too, despite a loss of accuracy. These two extremes push us to think whether there exists a better solution in the middle: between over- and under-parameterized image priors, can one identify \"intermediate\" parameterized image priors that achieve better trade-offs between performance, efficiency, and even preserving strong transferability? Drawing inspirations from the lottery ticket hypothesis (LTH), we conjecture and study a novel \"lottery image prior\" (LIP) by exploiting DNN inherent sparsity, stated as: given an over-parameterized DNN-based image prior, it will contain a sparse subnetwork that can be trained in isolation, to match the original DNN's performance when being applied as a prior to various image inverse problems}. Our results validate the superiority of LIPs: we can successfully locate the LIP subnetworks from over-parameterized DIPs at substantial sparsity ranges. Those LIP subnetworks significantly outperform deep decoders under comparably compact model sizes (by often fully preserving the effectiveness of their over-parameterized counterparts), and they also possess high transferability across different images as well as restoration task types. Besides, we also extend LIP to compressive sensing image reconstruction, where a pre-trained GAN generator is used as the prior (in contrast to untrained DIP or deep decoder), and confirm its validity in this setting too. To our best knowledge, this is the first time that LTH is demonstrated to be relevant in the context of inverse problems or image priors. Codes will be publicly available upon acceptance.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Qiming Wu;Xiaohan Chen;Yifan Jiang;Zhangyang Wang", "authorids": "~Qiming_Wu1;~Xiaohan_Chen1;~Yifan_Jiang2;~Zhangyang_Wang1", "gender": ";M;M;M", "homepage": ";http://xiaohanchen.com;https://yifanjiang19.github.io/;https://vita-group.github.io", "dblp": ";94/3802;81/7246-1;119/4026", "google_scholar": ";https://scholar.google.com/citations?authuser=1;PMeFEOIAAAAJ;pxFyKAIAAAAJ", "orcid": ";0000-0002-0360-0402;;", "linkedin": ";xiaohan-chen-400b00147/;;", "or_profile": "~Qiming_Wu1;~Xiaohan_Chen1;~Yifan_Jiang2;~Zhangyang_Wang1", "aff": ";Alibaba Group;University of Texas, Austin;University of Texas, Austin", "aff_domain": ";alibaba-inc.com;utexas.edu;utexas.edu", "position": ";Researcher;PhD student;Assistant Professor", "bibtex": "@misc{\nwu2023chasing,\ntitle={Chasing Better Deep Image Priors Between Over- and Under-parameterization},\nauthor={Qiming Wu and Xiaohan Chen and Yifan Jiang and Zhangyang Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=JwUmXwqXhr}\n}", "github": "", "project": "", "reviewers": "fDwJ;syh7;bXEi;eUHy", "site": "https://openreview.net/forum?id=JwUmXwqXhr", "pdf_size": 4163459, "recommendation": "5;5;5;5", "confidence": "4;4;4;4", "correctness": "3;2;2;3", "technical_novelty": "3;2;1;2", "empirical_novelty": "0;3;2;2", "wc_summary_paper": "73;85;47;34", "wc_strength_and_weaknesses": "294;117;464;274", "wc_clarity_quality_novelty_and_reproducibility": "126;113;32;97", "wc_summary_review": "94;20;31;67", "wc_review": "587;335;574;472", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "791;658;764;1482", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;3", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 59.75, 20.24073862288627 ], "wc_strength_and_weaknesses_avg": [ 287.25, 122.92960383894516 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 92.0, 36.13170353027933 ], "wc_summary_review_avg": [ 53.0, 29.368350311176826 ], "wc_review_avg": [ 492.0, 100.9925739844272 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 923.75, 326.1168310590547 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1817311133794245188&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Alibaba Group;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://www.utexas.edu", "aff_unique_abbr": "Alibaba;UT Austin", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "Scaling Forward Gradient With Local Losses", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11318", "id": "JxpBP1JM15-", "poster": "", "openreview": "https://openreview.net/forum?id=JxpBP1JM15-", "slides": "https://iclr.cc/virtual/2023/poster/11318", "video": "https://iclr.cc/virtual/2023/poster/11318", "author_site": "Mengye Ren, Simon Kornblith, Renjie Liao, Geoffrey E Hinton", "tldr": "", "abstract": "Forward gradient learning computes a noisy directional gradient and is a biologically plausible alternative to backprop for learning deep neural networks. The standard forward gradient algorithm suffers from the curse of dimensionality in the number of parameters. In this paper, we propose to scale forward gradient by adding a large number of local greedy loss functions. We consider block-wise, patch-wise, and channel group-wise local losses, and show that activity perturbation reduces variance compared to weight perturbation. Inspired by MLPMixer, we also propose a new architecture, LocalMixer, that is more suitable for local learning. We find local learning can work well with both supervised classification and self-supervised contrastive learning. Empirically, it can match backprop on MNIST and CIFAR-10 and significantly outperform backprop-free algorithms on ImageNet.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Mengye Ren;Simon Kornblith;Renjie Liao;Geoffrey Hinton", "authorids": "~Mengye_Ren1;~Simon_Kornblith1;~Renjie_Liao1;~Geoffrey_Hinton1", "gender": ";M;M;M", "homepage": "http://www.cs.toronto.edu/~mren;;https://lrjconan.github.io/;https://www.cs.toronto.edu/~hinton/bio.html", "dblp": "163/1952;220/4059;08/8180;10/3248", "google_scholar": "XcQ9WqMAAAAJ;1O3RPmsAAAAJ;2wrS35MAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Mengye_Ren1;~Simon_Kornblith1;~Renjie_Liao1;~Geoffrey_Hinton1", "aff": "New York University;Google;Department of Electrical and Computer Engineering, The University of British Columbia;University of Toronto", "aff_domain": "nyu.edu;google.com;ece.ubc.ca;utoronto.ca", "position": "Assistant Professor;Research Scientist;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nren2023scaling,\ntitle={Scaling Forward Gradient With Local Losses},\nauthor={Mengye Ren and Simon Kornblith and Renjie Liao and Geoffrey Hinton},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=JxpBP1JM15-}\n}", "github": "", "project": "", "reviewers": "RQfz;g2TA;jdBC", "pdf_size": 2311123, "recommendation": "8;8;8", "confidence": "3;3;5", "correctness": "4;4;3", "technical_novelty": "3;3;2", "empirical_novelty": "3;3;3", "wc_summary_paper": "36;56;48", "wc_strength_and_weaknesses": "323;488;114", "wc_clarity_quality_novelty_and_reproducibility": "163;41;49", "wc_summary_review": "44;73;30", "wc_review": "566;658;241", "wc_reply_reviewers": "0;366;0", "wc_reply_authors": "972;1216;46", "reply_reviewers": "0;4;0", "reply_authors": "2;4;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 46.666666666666664, 8.219218670625303 ], "wc_strength_and_weaknesses_avg": [ 308.3333333333333, 153.03666953453418 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 84.33333333333333, 55.7215298505783 ], "wc_summary_review_avg": [ 49.0, 17.90716802475106 ], "wc_review_avg": [ 488.3333333333333, 178.8786056395665 ], "wc_reply_reviewers_avg": [ 122.0, 172.5340546095176 ], "wc_reply_authors_avg": [ 744.6666666666666, 503.9744261589295 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.8856180831641267 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10071267733783487147&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "pdf": "https://openreview.net/pdf?id=JxpBP1JM15-", "email": "nyu.edu;google.com;ece.ubc.ca;utoronto.ca", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "New York University;Google;University of British Columbia;University of Toronto", "aff_unique_dep": ";Google;Department of Electrical and Computer Engineering;", "aff_unique_url": "https://www.nyu.edu;https://www.google.com;https://www.ubc.ca;https://www.utoronto.ca", "aff_unique_abbr": "NYU;Google;UBC;U of T", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Vancouver", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United States;Canada" }, { "id": "JyD-NobfNL_", "title": "Distinguishing Feature Model for Ranking From Pairwise Comparisons", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We consider the problem of ranking a set of items from pairwise comparisons among them when the underlying preferences are intransitive in nature. Intransitivity is a common occurrence in real world data sets and we introduce a flexible and natural parametric model for pairwise comparisons that we call the \\emph{Distinguishing Feature Model} (DF) to capture this. Under this model, the items have an unknown but fixed embedding and the pairwise comparison between a pair of items depends probabilisitically on the feature in the embedding that can best distinguish the items. We study several theoretical properties including how it generalizes the popular transitive Bradley-Terry-Luce model. With just an embedding dimension $d = 3$, we show that the proposed model can capture arbitrarily long cyclic dependencies. Furthermore, we explicitly show the type of preference relations that cannot be modelled under the DF model for $d=3$. On the algorithmic side, we propose a Siamese type neural network based algorithm which can learn to predict well under the DF model while at the same time being interpretable in the sense that the embeddings learnt can be extracted directly from the learnt model. Our experimental results show that the model outperforms standard baselines in both synthetic and real world ranking datasets.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/f8022a4280827bae3b6e70f481284f8cd985ff65.zip", "author": "Elisha Parhi;Arun Rajkumar", "authorids": "~Elisha_Parhi1;~Arun_Rajkumar4", "gender": ";M", "homepage": ";", "dblp": ";32/11350", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Elisha_Parhi1;~Arun_Rajkumar4", "aff": ";Indian Institute of Technology Madras", "aff_domain": ";iitm.ac.in", "position": ";Assistant Professor", "bibtex": "@misc{\nparhi2023distinguishing,\ntitle={Distinguishing Feature Model for Ranking From Pairwise Comparisons},\nauthor={Elisha Parhi and Arun Rajkumar},\nyear={2023},\nurl={https://openreview.net/forum?id=JyD-NobfNL_}\n}", "github": "", "project": "", "reviewers": "34Xj;gg5y;KFhi;DJcP", "site": "https://openreview.net/forum?id=JyD-NobfNL_", "pdf_size": 648699, "recommendation": "1;3;5;5", "confidence": "3;3;3;3", "correctness": "2;3;3;4", "technical_novelty": "2;2;3;2", "empirical_novelty": "1;2;3;2", "wc_summary_paper": "90;47;240;110", "wc_strength_and_weaknesses": "37;45;283;235", "wc_clarity_quality_novelty_and_reproducibility": "474;788;257;128", "wc_summary_review": "47;38;64;40", "wc_review": "648;918;844;513", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 1.6583123951777 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 121.75, 71.96657210121933 ], "wc_strength_and_weaknesses_avg": [ 150.0, 110.34944494649713 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 411.75, 249.95037007374083 ], "wc_summary_review_avg": [ 47.25, 10.231690964840562 ], "wc_review_avg": [ 730.75, 159.80515479796014 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8528028654224417, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wfAFxdR7M_YJ:scholar.google.com/&scioq=Distinguishing+Feature+Model+for+Ranking+From+Pairwise+Comparisons&hl=en&as_sdt=0,48", "gs_version_total": 2, "aff_unique_index": "0", "aff_unique_norm": "Indian Institute of Technology Madras", "aff_unique_dep": "", "aff_unique_url": "https://www.iitm.ac.in", "aff_unique_abbr": "IIT Madras", "aff_campus_unique_index": "0", "aff_campus_unique": "Madras", "aff_country_unique_index": "0", "aff_country_unique": "India" }, { "id": "Jzliv-bxZla", "title": "Mitigating Propagation Failures in PINNs using Evolutionary Sampling", "track": "main", "status": "Reject", "tldr": "", "abstract": "Despite the success of physics-informed neural networks (PINNs) in approximating partial differential equations (PDEs), it is known that PINNs can sometimes fail to converge to the correct solution in problems involving complicated PDEs. This is reflected in several recent studies on characterizing and mitigating the ``failure modes'' of PINNs. While most of these studies have focused on balancing loss functions or adaptively tuning PDE coefficients, what is missing is a thorough understanding of the connection between failure modes of PINNs and sampling strategies used for training PINNs. In this paper, we provide a novel perspective of failure modes of PINNs by hypothesizing that the training of PINNs rely on successful ``propagation'' of solution from initial and/or boundary condition points to interior points. We show that PINNs with poor sampling strategies can get stuck at trivial solutions if there are propagation failures. We additionally demonstrate that propagation failures are characterized by highly imbalanced PDE residual fields where very high residuals are observed over very narrow regions. To mitigate propagation failures, we propose a novel evolutionary sampling (Evo) method that can incrementally accumulate collocation points in regions of high PDE residuals with little to no computational overhead. We provide an extension of Evo to respect the principle of causality while solving time-dependent PDEs. We theoretically analyze the behavior of Evo and empirically demonstrate its efficacy and efficiency in comparison with baselines on a variety of PDE problems.", "keywords": "Physics-informed Neural Networks;Adaptive Sampling;Failure Modes of PINNs.", "primary_area": "", "supplementary_material": "/attachment/af9fcd12986bdf5382c18f0c4f37bc796965c92f.zip", "author": "Arka Daw;Jie Bu;Sifan Wang;Paris Perdikaris;Anuj Karpatne", "authorids": "~Arka_Daw1;~Jie_Bu1;~Sifan_Wang1;~Paris_Perdikaris1;~Anuj_Karpatne1", "gender": "M;;M;M;", "homepage": "https://people.cs.vt.edu/darka/;;;https://directory.seas.upenn.edu/paris-perdikaris/;http://people.cs.vt.edu/karpatne/", "dblp": "252/5645;;;180/9141;09/9720", "google_scholar": "pz2Nm8AAAAAJ;;cVbvfXsAAAAJ;h_zkt1oAAAAJ;", "orcid": ";;;0000-0002-2816-3229;", "linkedin": "arka-daw-1207a41a3/;;;paris-perdikaris-093068102/;", "or_profile": "~Arka_Daw1;~Jie_Bu1;~Sifan_Wang1;~Paris_Perdikaris1;~Anuj_Karpatne1", "aff": "Virginia Tech;;University of Pennsylvania;University of Pennsylvania;Virginia Polytechnic Institute and State University", "aff_domain": "vt.edu;;upenn.edu;upenn.edu;vt.edu", "position": "PhD student;;PhD student;Associate Professor;Assistant Professor", "bibtex": "@misc{\ndaw2023mitigating,\ntitle={Mitigating Propagation Failures in {PINN}s using Evolutionary Sampling},\nauthor={Arka Daw and Jie Bu and Sifan Wang and Paris Perdikaris and Anuj Karpatne},\nyear={2023},\nurl={https://openreview.net/forum?id=Jzliv-bxZla}\n}", "github": "", "project": "", "reviewers": "9MDS;3fsB;rJ3m", "site": "https://openreview.net/forum?id=Jzliv-bxZla", "pdf_size": 29595930, "recommendation": "3;6;8", "confidence": "4;3;4", "correctness": "3;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "78;72;63", "wc_strength_and_weaknesses": "315;149;448", "wc_clarity_quality_novelty_and_reproducibility": "47;66;52", "wc_summary_review": "24;61;43", "wc_review": "464;348;606", "wc_reply_reviewers": "0;0;71", "wc_reply_authors": "3918;540;3290", "reply_reviewers": "0;0;1", "reply_authors": "7;1;7", "recommendation_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 71.0, 6.164414002968976 ], "wc_strength_and_weaknesses_avg": [ 304.0, 122.3138040724213 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.0, 8.04155872120988 ], "wc_summary_review_avg": [ 42.666666666666664, 15.107025591499546 ], "wc_review_avg": [ 472.6666666666667, 105.50618728565428 ], "wc_reply_reviewers_avg": [ 23.666666666666668, 33.469720976163245 ], "wc_reply_authors_avg": [ 2582.6666666666665, 1466.9608795813958 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 5.0, 2.8284271247461903 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.1147078669352809, "corr_recommendation_correctness": 0.0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8962340077435314868&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Virginia Tech;University of Pennsylvania", "aff_unique_dep": ";", "aff_unique_url": "https://www.vt.edu;https://www.upenn.edu", "aff_unique_abbr": "VT;UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "JzrPpPnTUhk", "title": "Unleash Model Capacity for Universal Dense Retrieval by Task Specialty Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Universal dense retrieval, with one unified representation space to empower various retrieval scenarios, has many appealing advantages in simplicity, efficiency, and potential to break echo chambers with cross-scenario information access. However, standard multi-task trained dense retrievers often fail to meet the accuracy of scenario-specific models. In this paper, we analyze the multi-task learning in universal retrieval and show that the model capacity is not the main bottleneck. It is the optimization failed to fully utilize the network parameters to capture task-specific signals. This motivated our development of TACO-DR, which conducts multi-task learning for universal retrieval with TAsk speCialty Optimization. TACO-DR dynamically adjusts the learning rate for each parameter regrading each task based on its task-specific sensitivity, to encourage parameters to better capture task specific signals. On the KILT benchmark, TACO-DR outperforms various multi-task learning methods and achieves better overall accuracy than single-task models. Our analysis shows that TACO-DR better utilizes the model capacity with more task-specific parameters. Our code and model checkpoints will be open-sourced.", "keywords": "Dense Retrieval;Multi-task;Parameter sensitivity", "primary_area": "", "supplementary_material": "", "author": "Wenzheng Zhang;Chenyan Xiong;Karl Stratos;Arnold Overwijk", "authorids": "~Wenzheng_Zhang1;~Chenyan_Xiong1;~Karl_Stratos2;~Arnold_Overwijk1", "gender": "M;M;M;M", "homepage": "https://wenzhengzhang.github.io/;https://www.cs.cmu.edu/~cx/;;http://karlstratos.com/", "dblp": ";18/10886;16/7404;07/11293", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;E9BaEBYAAAAJ;zKiMGDgAAAAJ;Fx8-1JMAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Wenzheng_Zhang1;~Chenyan_Xiong1;~Arnold_Overwijk1;~Karl_Stratos1", "aff": "Rutgers University;Microsoft Research;Meta;Rutgers University", "aff_domain": "cs.rutgers.edu;research.microsoft.com;meta.com;rutgers.edu", "position": "PhD student;Principal Researcher;Engineering Manager;Assistant Professor", "bibtex": "@misc{\nzhang2023unleash,\ntitle={Unleash Model Capacity for Universal Dense Retrieval by Task Specialty Optimization},\nauthor={Wenzheng Zhang and Chenyan Xiong and Karl Stratos and Arnold Overwijk},\nyear={2023},\nurl={https://openreview.net/forum?id=JzrPpPnTUhk}\n}", "github": "", "project": "", "reviewers": "6W5P;UpYV;KDMG;ZTG9", "site": "https://openreview.net/forum?id=JzrPpPnTUhk", "pdf_size": 1286687, "recommendation": "5;5;5;6", "confidence": "4;3;3;2", "correctness": "3;4;3;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;2;4", "wc_summary_paper": "109;42;70;58", "wc_strength_and_weaknesses": "83;203;131;135", "wc_clarity_quality_novelty_and_reproducibility": "206;33;97;19", "wc_summary_review": "93;62;69;22", "wc_review": "491;340;367;234", "wc_reply_reviewers": "0;0;104;0", "wc_reply_authors": "378;211;458;204", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 69.75, 24.742423082632794 ], "wc_strength_and_weaknesses_avg": [ 138.0, 42.74342054632502 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 88.75, 73.80506418939015 ], "wc_summary_review_avg": [ 61.5, 25.53918557824427 ], "wc_review_avg": [ 358.0, 91.47404003322472 ], "wc_reply_reviewers_avg": [ 26.0, 45.033320996790806 ], "wc_reply_authors_avg": [ 312.75, 109.01232728457823 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WuqwWoTXBlMJ:scholar.google.com/&scioq=Unleash+Model+Capacity+for+Universal+Dense+Retrieval+by+Task+Specialty+Optimization&hl=en&as_sdt=0,11", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Rutgers University;Microsoft;Meta", "aff_unique_dep": ";Microsoft Research;Meta Platforms, Inc.", "aff_unique_url": "https://www.rutgers.edu;https://www.microsoft.com/en-us/research;https://meta.com", "aff_unique_abbr": "Rutgers;MSR;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "K-3Qq-CC78", "title": "BYPASSING THE STABILITY-PLASTICITY TRADEOFF TO REDUCE PREDICTIVE CHURN", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The impact of an ML model is largely a function of how much trust users have in its predictions. As more data is gathered over time, the model can be updated to take advantage of a larger sample size and have improved performance. Even when model updates improve aggregate metrics such as accuracy, this can lead to errors on samples the previous model got right causing apparent regressions in performance known as predictive churn. Such prediction flips erode user trust thereby reducing the effectiveness of the human-AI team as a whole.\nCurrent approaches for reducing predictive churn fall mainly into two categories: ensembles and distillation. While ensembles are the most effective, they comes at the cost of having to train and use multiple models for inference. Distillation is much more efficient both in terms of training and inference, but is far less effective at reducing churn. We propose a missing middle-ground solution called StackMem based on accumulating models over time which achieves comparable performance to ensembles without any training time increases or changes to training procedures. Additionally, StackMem can be applied to models which are already deployed, unlike ensembles. We demonstrate the effectiveness of StackMem on several computer vision benchmark datasets comparing against STOTA churn reduction methods.", "keywords": "Preditive churn;Stability;Distillation;Ensembles", "primary_area": "", "supplementary_material": "", "author": "George Alexandru Adam;Benjamin Haibe-Kains;Anna Goldenberg", "authorids": "~George_Alexandru_Adam1;~Benjamin_Haibe-Kains1;~Anna_Goldenberg1", "gender": "M;M;F", "homepage": ";http://bhklab.ca/;http://goldenberglab.ca/", "dblp": "15/7361;18/3992;06/3543", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=cEepZOEAAAAJ", "orcid": ";0000-0002-7684-0079;0000-0002-2416-833X", "linkedin": ";;", "or_profile": "~George_Alexandru_Adam1;~Benjamin_Haibe-Kains1;~Anna_Goldenberg1", "aff": "Toronto University;Toronto University;University of Toronto", "aff_domain": "utoronto.ca;utoronto.ca;utoronto.ca", "position": "MS student;Associate Professor;Full Professor", "bibtex": "@misc{\nadam2023bypassing,\ntitle={{BYPASSING} {THE} {STABILITY}-{PLASTICITY} {TRADEOFF} {TO} {REDUCE} {PREDICTIVE} {CHURN}},\nauthor={George Alexandru Adam and Benjamin Haibe-Kains and Anna Goldenberg},\nyear={2023},\nurl={https://openreview.net/forum?id=K-3Qq-CC78}\n}", "github": "", "project": "", "reviewers": "VAE1;bFoz;wW3E;mXtq;tZq9", "site": "https://openreview.net/forum?id=K-3Qq-CC78", "pdf_size": 590447, "recommendation": "3;5;5;5;8", "confidence": "3;3;4;3;4", "correctness": "3;3;4;3;4", "technical_novelty": "2;2;2;2;3", "empirical_novelty": "2;1;2;0;3", "wc_summary_paper": "116;34;131;75;125", "wc_strength_and_weaknesses": "166;23;53;264;93", "wc_clarity_quality_novelty_and_reproducibility": "190;212;20;119;11", "wc_summary_review": "19;27;241;52;39", "wc_review": "491;296;445;510;268", "wc_reply_reviewers": "0;26;12;0;0", "wc_reply_authors": "700;293;451;490;24", "reply_reviewers": "0;1;1;0;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 5.2, 1.6 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 2.2, 0.39999999999999997 ], "empirical_novelty_avg": [ 1.6, 1.019803902718557 ], "wc_summary_paper_avg": [ 96.2, 36.74452340145399 ], "wc_strength_and_weaknesses_avg": [ 119.8, 86.57805726626117 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 110.4, 83.40887242973615 ], "wc_summary_review_avg": [ 75.6, 83.44962552342581 ], "wc_review_avg": [ 402.0, 100.62405279057289 ], "wc_reply_reviewers_avg": [ 7.6, 10.3072789813801 ], "wc_reply_authors_avg": [ 391.6, 225.02142120251574 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.6634034720037775, "corr_recommendation_correctness": 0.6634034720037775, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vThoryJuPoQJ:scholar.google.com/&scioq=BYPASSING+THE+STABILITY-PLASTICITY+TRADEOFF+TO+REDUCE+PREDICTIVE+CHURN&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "id": "K1CNgCJtLLr", "title": "CrystalBox: Efficient Model-Agnostic Explanations for Deep RL Controllers", "track": "main", "status": "Reject", "tldr": "", "abstract": "Practical adoption of Reinforcement Learning (RL) controllers is hindered by a lack of explainability. Particularly, in input-driven environments such as computer systems where the state dynamics are affected by external processes, explainability can serve as a key towards increased real-world deployment of RL controllers. In this work, we propose a novel framework, CrystalBox, for generating black-box post-hoc explanations for RL controllers in input-driven environments. CrystalBox is built on the principle of separation between policy learning and explanation computation. As the explanations are generated completely outside the training loop, CrystalBox is generalizable to a large family of input-driven RL controllers.To generate explanations, CrystalBox combines the natural decomposability of reward functions in systems environments with the explanatory power of decomposed returns. CrystalBox predicts these decomposed future returns using on policy Q-function approximations. Our design leverages two complementary approaches for this computation: sampling- and learning-based methods. We evaluate CrystalBox with RL controllers in real-world settings and demonstrate that it generates high-fidelity explanations.\n", "keywords": "explainability;reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Sagar Patel;Sangeetha Abdu Jyothi;Nina Narodytska", "authorids": "sagar.patel@uci.edu;~Sangeetha_Abdu_Jyothi1;~Nina_Narodytska1", "gender": ";F;F", "homepage": ";https://www.ics.uci.edu/~sabdujyo;", "dblp": ";;87/3366", "google_scholar": ";https://scholar.google.com/citations?hl=en;", "orcid": ";;", "linkedin": ";;", "or_profile": "sagar.patel@uci.edu;~Sangeetha_Abdu_Jyothi1;~Nina_Narodytska1", "aff": ";University of California, Irvine;VMware", "aff_domain": ";uci.edu;vmware.com", "position": ";Assistant Professor;Researcher", "bibtex": "@misc{\npatel2023crystalbox,\ntitle={CrystalBox: Efficient Model-Agnostic Explanations for Deep {RL} Controllers},\nauthor={Sagar Patel and Sangeetha Abdu Jyothi and Nina Narodytska},\nyear={2023},\nurl={https://openreview.net/forum?id=K1CNgCJtLLr}\n}", "github": "", "project": "", "reviewers": "24j8;iPCV;NU4N", "site": "https://openreview.net/forum?id=K1CNgCJtLLr", "pdf_size": 1759133, "recommendation": "3;3;5", "confidence": "4;4;4", "correctness": "2;3;4", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "32;86;81", "wc_strength_and_weaknesses": "356;479;408", "wc_clarity_quality_novelty_and_reproducibility": "343;50;148", "wc_summary_review": "93;23;102", "wc_review": "824;638;739", "wc_reply_reviewers": "0;471;0", "wc_reply_authors": "447;756;214", "reply_reviewers": "0;2;0", "reply_authors": "1;2;1", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 66.33333333333333, 24.36299561949547 ], "wc_strength_and_weaknesses_avg": [ 414.3333333333333, 50.41384289612879 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 180.33333333333334, 121.78213698604935 ], "wc_summary_review_avg": [ 72.66666666666667, 35.31131389355102 ], "wc_review_avg": [ 733.6666666666666, 76.02777270328404 ], "wc_reply_reviewers_avg": [ 157.0, 222.03152929257593 ], "wc_reply_authors_avg": [ 472.3333333333333, 221.9944944262257 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8660254037844387, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10909083160104938445&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of California, Irvine;VMware, Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.uci.edu;https://www.vmware.com", "aff_unique_abbr": "UCI;VMware", "aff_campus_unique_index": "0", "aff_campus_unique": "Irvine;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "K1DdnjL6p7", "title": "A simple Training-Free Method for Rejection Option", "track": "main", "status": "Withdraw", "tldr": "We present a simple yet effective method to implement the rejection option for a pre-trained classifier. ", "abstract": "We present a simple yet effective method to implement the rejection option for a pre-trained classifier. Our method is based on a sound mathematical framework, enjoys good properties, and is hyperparameter free. It is lightweight, since it does not require any re-training of the network, and it is flexible, since it can be used with any model that outputs soft-probabilities. We compare our solution to state-of-the-art methods considering popular benchmarks (Cifar-10, Cifar-100, SVHN), and various models (VGG-16, DenseNet-121, ResNet-34). At evaluation time, our method, which is applied post-training to any classification model, achieves similar or better results with respect to its competitors that usually require further training and/or tuning of the models.", "keywords": "rejection option;safety AI;deep learning", "primary_area": "", "supplementary_material": "/attachment/a49b186f5be074f8911d10de338bb5410e56bde1.zip", "author": "Eduardo Dadalto C\u00e2mara Gomes;Marco Romanelli;Federica Granese;Pablo Piantanida", "authorids": "~Eduardo_Dadalto_C\u00e2mara_Gomes1;~Marco_Romanelli1;~Federica_Granese1;~Pablo_Piantanida2", "gender": ";;F;M", "homepage": ";;https://fgranese.github.io/;https://www.pablo-piantanida.org", "dblp": ";;251/6090;44/1416", "google_scholar": ";;https://scholar.google.ca/citations?hl=it;https://scholar.google.fr/citations?user=QyBEFv0AAAAJ", "orcid": ";;0000-0002-0084-521X;", "linkedin": ";;federica-granese-201b311a0/;pablo-piantanida-60a51bb5/?locale=en_US", "or_profile": "~Eduardo_Dadalto_C\u00e2mara_Gomes1;~Marco_Romanelli1;~Federica_Granese1;~Pablo_Piantanida2", "aff": ";;\u00c9cole Polytechnique;Mila - Quebec AI Institute ", "aff_domain": ";;polytechnique.edu;mila.quebec", "position": ";;PhD student;Full Professor", "bibtex": "@misc{\ngomes2023a,\ntitle={A simple Training-Free Method for Rejection Option},\nauthor={Eduardo Dadalto C{\\^a}mara Gomes and Marco Romanelli and Federica Granese and Pablo Piantanida},\nyear={2023},\nurl={https://openreview.net/forum?id=K1DdnjL6p7}\n}", "github": "", "project": "", "reviewers": "CYfN;FUcJ;kw8Q", "site": "https://openreview.net/forum?id=K1DdnjL6p7", "pdf_size": 3801548, "recommendation": "3;3;5", "confidence": "3;5;4", "correctness": "2;2;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;3;2", "wc_summary_paper": "55;98;82", "wc_strength_and_weaknesses": "354;593;203", "wc_clarity_quality_novelty_and_reproducibility": "63;200;64", "wc_summary_review": "22;59;78", "wc_review": "494;950;427", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 78.33333333333333, 17.745108872274887 ], "wc_strength_and_weaknesses_avg": [ 383.3333333333333, 160.56220670575695 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 109.0, 64.34801214230838 ], "wc_summary_review_avg": [ 53.0, 23.25224003546038 ], "wc_review_avg": [ 623.6666666666666, 232.36800315208825 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15279866671326667493&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Ecole Polytechnique;Quebec AI Institute", "aff_unique_dep": ";AI Institute", "aff_unique_url": "https://www.polytechnique.edu;https://mila.quebec", "aff_unique_abbr": "X;Mila", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "France;Canada" }, { "id": "K1KJ0NbFu1h", "title": "Learning Dictionaries over Datasets through Wasserstein Barycenters", "track": "main", "status": "Reject", "tldr": "We apply Wasserstein Dictionary Learning to datasets understood as empirical distributions.", "abstract": "Dictionary learning consists of trying to represent objects in terms of basic elements (atoms) weighted by an importance factor (representation). Non-linear dictionary learning using optimal transport as a metric has been previously studied for normalized non-negative data on a fixed grid. We propose a new framework by using Wasserstein Dictionary Learning on datasets understood as empirical distributions. We leverage Wasserstein barycenters for learning a dictionary of virtual datasets and embeddings in a simplex. We apply our method for unsupervised domain adaptation, improving the state-of-the-art over 1.96% and 2.70%, respectively, and manifold learning of Gaussian distributions and color histograms.", "keywords": "Dictionary Learning;Optimal Transport;Domain Adaptation;Manifold Learning", "primary_area": "", "supplementary_material": "", "author": "Eduardo Fernandes Montesuma;Fred Maurice Ngole Mboula;Antoine Souloumiac", "authorids": "~Eduardo_Fernandes_Montesuma1;fred-maurice.ngole-mboula@cea.fr;antoine.souloumiac@cea.fr", "gender": "M;;", "homepage": "https://eddardd.github.io/my-personal-blog/;;", "dblp": "251/3328;;", "google_scholar": "https://scholar.google.com/citations?hl=pt-PT;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Eduardo_Fernandes_Montesuma1;fred-maurice.ngole-mboula@cea.fr;antoine.souloumiac@cea.fr", "aff": "CEA;;", "aff_domain": "cea.fr;;", "position": "PhD student;;", "bibtex": "@misc{\nmontesuma2023learning,\ntitle={Learning Dictionaries over Datasets through Wasserstein Barycenters},\nauthor={Eduardo Fernandes Montesuma and Fred Maurice Ngole Mboula and Antoine Souloumiac},\nyear={2023},\nurl={https://openreview.net/forum?id=K1KJ0NbFu1h}\n}", "github": "", "project": "", "reviewers": "6SsD;LdfT;fvrm", "site": "https://openreview.net/forum?id=K1KJ0NbFu1h", "pdf_size": 3572138, "recommendation": "3;3;5", "confidence": "3;4;2", "correctness": "1;3;3", "technical_novelty": "1;2;3", "empirical_novelty": "2;1;3", "wc_summary_paper": "98;43;141", "wc_strength_and_weaknesses": "221;356;393", "wc_clarity_quality_novelty_and_reproducibility": "213;44;48", "wc_summary_review": "76;27;89", "wc_review": "608;470;671", "wc_reply_reviewers": "202;0;52", "wc_reply_authors": "563;889;1158", "reply_reviewers": "1;0;1", "reply_authors": "1;2;2", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 94.0, 40.10818702792071 ], "wc_strength_and_weaknesses_avg": [ 323.3333333333333, 73.92037758441323 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 101.66666666666667, 78.74148983999619 ], "wc_summary_review_avg": [ 64.0, 26.695817400234567 ], "wc_review_avg": [ 583.0, 83.94045508573325 ], "wc_reply_reviewers_avg": [ 84.66666666666667, 85.64007369346562 ], "wc_reply_authors_avg": [ 870.0, 243.2789893654334 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": 0.5000000000000001, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7yR-LEg0JmYJ:scholar.google.com/&scioq=Learning+Dictionaries+over+Datasets+through+Wasserstein+Barycenters&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Commissariat \u00e0 l'\u00c9nergie Atomique et aux \u00c9nergies Alternatives", "aff_unique_dep": "", "aff_unique_url": "https://www cea fr", "aff_unique_abbr": "CEA", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "id": "K1NKDaNM9i", "title": "Counterfactual Vision-Language Data Synthesis with Intra-Sample Contrast Learning", "track": "main", "status": "Reject", "tldr": "Counterfactual Vision-Language Data Synthesis with Intra-Sample Contrast Learning for Visual Commonsense Reasoning", "abstract": "Existing Visual Learning (VL) benchmarks often contain exploitative biases. Most former works only attempted to mitigate biases in semantically low-level and conventional visual-question-answering typed datasets like VQA and GQA. However, these methods cannot generalize to recently emerging highly semantic VL datasets like VCR and are also difficult to scale due to many severe problems like high-cost labors, drastically disrupting the data distribution\\textit{, etc.}To resolve those problems and also address other unique biases on VCR-like datasets, we first conduct in-depth analysis and identify important biases in VCR dataset. We further propose a generalized solution that synthesizes counterfactual image and text data based on the original query's semantic focus while producing less distortion to the data distribution. To utilize our synthesized data, we also design an innovative intra-sample contrastive training strategy to assist QA learning in Visual Commonsense Reasoning (VCR). Moreover, our synthesized VL data also serve as a highly-semantic debiased benchmark for evaluating future VL models' robustness. Extensive experiments show that our proposed synthesized data and training strategy improve existing VL models' performances on both the original VCR dataset and our proposed debiased benchmark.", "keywords": "counterfactual;data augmentation;vision language;kowledge distillation;vcr;vqa;visual question answering;commonsense reasoning;multimodal;robust;domain-shift;debiased", "primary_area": "", "supplementary_material": "", "author": "Zhecan Wang;Yicheng He;Wenhao Li;Haoxuan You;Long Chen;Noel C Codella;Yulei Niu;Kai-Wei Chang;Shih-Fu Chang", "authorids": "~Zhecan_Wang2;yh3330@columbia.edu;wl2750@columbia.edu;~Haoxuan_You1;~Long_Chen8;~Noel_C_Codella1;~Yulei_Niu1;~Kai-Wei_Chang1;~Shih-Fu_Chang3", "gender": "M;;;M;M;M;M;M;M", "homepage": "https://www.zhecanwang.com/;;;https://hxyou.github.io/;https://zjuchenlong.github.io/;http://www.noelcodella.com/;https://yuleiniu.github.io;http://kwchang.net;http://www.ee.columbia.edu/~sfchang/", "dblp": "167/4251;;;210/2628;64/5725-16;;165/2982;18/2428;c/ShihFuChang", "google_scholar": "uqHPnmgAAAAJ;;;BhysChMAAAAJ;https://scholar.google.com.sg/citations?user=-gtmMpIAAAAJ;8BnjC-4AAAAJ;WXd3dDwAAAAJ;fqDBtzYAAAAJ;OMVTRscAAAAJ", "orcid": "0009-0003-7785-4637;;;;0000-0001-6148-9709;;;0000-0001-5365-0072;", "linkedin": "jameszhecanwang/;;;;;noel-c-f-codella-ph-d-1b1b1723/;;kai-wei-chang-41239040;", "or_profile": "~Zhecan_Wang2;yh3330@columbia.edu;wl2750@columbia.edu;~Haoxuan_You1;~Long_Chen8;~Noel_C_Codella1;~Yulei_Niu1;~Kai-Wei_Chang1;~Shih-Fu_Chang3", "aff": "Columbia University;;;Columbia University;Columbia University;Microsoft;Columbia University;Amazon;Columbia University", "aff_domain": "columbia.edu;;;columbia.edu;columbia.edu;microsoft.com;columbia.edu;amazon.com;ee.columbia.edu", "position": "PhD student;;;PhD student;Postdoc;Principal Researcher;Postdoc;Researcher;Full Professor", "bibtex": "@misc{\nwang2023counterfactual,\ntitle={Counterfactual Vision-Language Data Synthesis with Intra-Sample Contrast Learning},\nauthor={Zhecan Wang and Yicheng He and Wenhao Li and Haoxuan You and Long Chen and Noel C Codella and Yulei Niu and Kai-Wei Chang and Shih-Fu Chang},\nyear={2023},\nurl={https://openreview.net/forum?id=K1NKDaNM9i}\n}", "github": "", "project": "", "reviewers": "rKpc;BL7G;nbLD;EKkA", "site": "https://openreview.net/forum?id=K1NKDaNM9i", "pdf_size": 15669603, "recommendation": "1;1;3;3", "confidence": "3;1;4;4", "correctness": "2;4;2;2", "technical_novelty": "3;4;2;2", "empirical_novelty": "3;4;3;2", "wc_summary_paper": "58;29;121;73", "wc_strength_and_weaknesses": "252;29;179;181", "wc_clarity_quality_novelty_and_reproducibility": "1;29;58;181", "wc_summary_review": "6;29;19;30", "wc_review": "317;116;377;465", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.0, 1.0 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 70.25, 33.29695932063467 ], "wc_strength_and_weaknesses_avg": [ 160.25, 81.28153234283911 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 67.25, 68.69634269741003 ], "wc_summary_review_avg": [ 21.0, 9.669539802906858 ], "wc_review_avg": [ 318.75, 128.34791583816232 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.8164965809277259, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UhBpUsdD5gcJ:scholar.google.com/&scioq=Counterfactual+Vision-Language+Data+Synthesis+with+Intra-Sample+Contrast+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0;2;0", "aff_unique_norm": "Columbia University;Microsoft;Amazon", "aff_unique_dep": ";Microsoft Corporation;Amazon.com, Inc.", "aff_unique_url": "https://www.columbia.edu;https://www.microsoft.com;https://www.amazon.com", "aff_unique_abbr": "Columbia;Microsoft;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "K1Z-P0Le0DT", "title": "Recurrent Real-valued Neural Autoregressive Density Estimator for Online Density Estimation and Classification of Streaming Data", "track": "main", "status": "Reject", "tldr": "", "abstract": "In contrast with the traditional offline learning, where complete data accessibility is assumed, many modern applications involve processing data in a streaming fashion. This online learning setting raises various challenges, including concept drift, hardware memory constraints, etc. In this paper, we propose the Recurrent Real-valued Neural Autoregressive Density Estimator (RRNADE), a flexible density-based model for online classification and density estimation. RRNADE combines a neural Gaussian mixture density module with a recurrent module. This combination allows RRNADE to exploit possible sequential correlations in the streaming task, which are often ignored in the classical streaming setting where each input is assumed to be independent from the previous ones. We showcase the ability of RRNADE to adapt to concept drifts on synthetic density estimation tasks. We also apply RRNADE to online classification tasks on both real world and synthetic datasets and compare it with multiple density based as well as nondensity based online classification methods. In almost all of these tasks, RRNADE outperforms the other methods. Lastly, we conduct an ablation study demonstrating the complementary benefits of the density and the recurrent modules.", "keywords": "density estimation;online learning;streaming data;classification", "primary_area": "", "supplementary_material": "", "author": "Tianyu Li;Bogdan Mazoure;Guillaume Rabusseau", "authorids": "~Tianyu_Li3;~Bogdan_Mazoure1;~Guillaume_Rabusseau1", "gender": "M;M;M", "homepage": "http://rl.cs.mcgill.ca/;https://bmazoure.github.io;https://www-labs.iro.umontreal.ca/~grabus/", "dblp": ";;143/7327", "google_scholar": ";https://scholar.google.ca/citations?user=NaxShlcAAAAJ;https://scholar.google.fr/citations?user=t2i4V4EAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Tianyu_Li3;~Bogdan_Mazoure1;~Guillaume_Rabusseau1", "aff": "Samsung;Apple;Universit\u00e9 de Montr\u00e9al", "aff_domain": "samsung.com;apple.com;umontreal.ca", "position": "Researcher;Research Scientist;Associate Professor", "bibtex": "@misc{\nli2023recurrent,\ntitle={Recurrent Real-valued Neural Autoregressive Density Estimator for Online Density Estimation and Classification of Streaming Data},\nauthor={Tianyu Li and Bogdan Mazoure and Guillaume Rabusseau},\nyear={2023},\nurl={https://openreview.net/forum?id=K1Z-P0Le0DT}\n}", "github": "", "project": "", "reviewers": "qnzA;BvnL;jzmZ;7rkR;ybf6;fRiQ", "site": "https://openreview.net/forum?id=K1Z-P0Le0DT", "pdf_size": 821047, "recommendation": "3;3;3;3;5;5", "confidence": "3;4;4;3;3;4", "correctness": "2;4;3;3;3;4", "technical_novelty": "2;2;2;2;3;3", "empirical_novelty": "1;0;1;2;3;3", "wc_summary_paper": "97;56;83;69;99;168", "wc_strength_and_weaknesses": "134;249;219;609;252;208", "wc_clarity_quality_novelty_and_reproducibility": "60;35;11;58;141;283", "wc_summary_review": "52;18;56;42;54;57", "wc_review": "343;358;369;778;546;716", "wc_reply_reviewers": "0;0;0;115;94;256", "wc_reply_authors": "0;0;206;391;229;217", "reply_reviewers": "0;0;0;1;1;1", "reply_authors": "0;0;1;1;1;1", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.1666666666666665, 0.6871842709362768 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.1055415967851334 ], "wc_summary_paper_avg": [ 95.33333333333333, 35.798820588890294 ], "wc_strength_and_weaknesses_avg": [ 278.5, 152.85804961902835 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 98.0, 91.88398482144027 ], "wc_summary_review_avg": [ 46.5, 13.65955099310857 ], "wc_review_avg": [ 518.3333333333334, 176.07447919054658 ], "wc_reply_reviewers_avg": [ 77.5, 92.69978425001862 ], "wc_reply_authors_avg": [ 173.83333333333334, 137.56140043227566 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 0.6666666666666666, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.34299717028501764, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:sTRKKJCiVI4J:scholar.google.com/&scioq=Recurrent+Real-valued+Neural+Autoregressive+Density+Estimator+for+Online+Density+Estimation+and+Classification+of+Streaming+Data&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Samsung;Apple;Universit\u00e9 de Montr\u00e9al", "aff_unique_dep": "Samsung;Apple Inc.;", "aff_unique_url": "https://www.samsung.com;https://www.apple.com;https://www.umontreal.ca", "aff_unique_abbr": "Samsung;Apple;UdeM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "South Korea;United States;Canada" }, { "id": "K2OixmPDou3", "title": "Unleashing Mask: Explore the Intrinsic Out-of-distribution Detection Capability", "track": "main", "status": "Reject", "tldr": "", "abstract": "Out-of-distribution (OOD) detection is an important aspect for safely deploying machine learning models in real-world applications. Previous approaches either design better scoring functions or utilize the knowledge from unknown outliers to equip the well-trained models with the ability of OOD detection. However, few of them explore to excavate the intrinsic OOD detection capability of a given model. In this work, we discover the existence of an intermediate stage of a model trained on in-distribution data having higher OOD detection performance than that of its final stage across different settings, and further identify the critical attribution to be learning with atypical samples. Based on such empirical insights, we propose a new method, Unleashing Mask (UM), to reveal the once-covered detection capability of a given model. To be specific, we utilize the mask to figure out the memorized atypical samples and fine-tune the model to forget them. Extensive experiments have been conducted to characterize and verify the effectiveness of our method.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Jianing Zhu;Hengzhuang Li;Jiangchao Yao;Tongliang Liu;Jianliang Xu;Bo Han", "authorids": "~Jianing_Zhu2;~Hengzhuang_Li1;~Jiangchao_Yao1;~Tongliang_Liu1;~Jianliang_Xu1;~Bo_Han1", "gender": "M;M;M;M;M;M", "homepage": "https://zfancy.github.io/;;https://sunarker.github.io/;https://tongliang-liu.github.io/;http://www.comp.hkbu.edu.hk/~xujl;https://bhanml.github.io/", "dblp": "129/6807;;166/5900;150/6667;x/JianliangXu;241/0472-3", "google_scholar": "82uNA3MAAAAJ;https://scholar.google.com/citations?hl=en;w8oDh9QAAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;https://scholar.google.com.tw/citations?user=LJNsBeoAAAAJ;nTNjqHwAAAAJ", "orcid": ";;;;0000-0001-9404-5848;", "linkedin": ";https://www.linkedin.cn/in/%E6%81%92%E5%BA%84-%E6%9D%8E-49b056251;;;;", "or_profile": "~Jianing_Zhu2;~Hengzhuang_Li1;~Jiangchao_Yao1;~Tongliang_Liu1;~Jianliang_Xu1;~bo_han2", "aff": "Hong Kong Baptist University;Huazhong University of Science and Technology;Shanghai Artificial Intelligence Laboratory;University of Sydney;Hong Kong Baptist University;RIKEN", "aff_domain": "hkbu.edu.hk;hust.edu.cn;pjlab.org.cn;sydney.edu.au;hkbu.edu.hk;riken.jp", "position": "PhD student;Undergrad student;Researcher;Lecturer;Full Professor;Adjunct Scientist", "bibtex": "@misc{\nzhu2023unleashing,\ntitle={Unleashing Mask: Explore the Intrinsic Out-of-distribution Detection Capability},\nauthor={Jianing Zhu and Hengzhuang Li and Jiangchao Yao and Tongliang Liu and Jianliang Xu and Bo Han},\nyear={2023},\nurl={https://openreview.net/forum?id=K2OixmPDou3}\n}", "github": "", "project": "", "reviewers": "Ga94;PAPe;3ovB;miMq", "site": "https://openreview.net/forum?id=K2OixmPDou3", "pdf_size": 2541491, "recommendation": "3;5;6;8", "confidence": "4;3;3;4", "correctness": "3;2;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "1;3;3;3", "wc_summary_paper": "92;115;63;76", "wc_strength_and_weaknesses": "291;185;411;241", "wc_clarity_quality_novelty_and_reproducibility": "92;269;33;4", "wc_summary_review": "31;73;46;59", "wc_review": "506;642;553;380", "wc_reply_reviewers": "0;0;30;0", "wc_reply_authors": "3501;2778;3117;2591", "reply_reviewers": "0;0;1;0", "reply_authors": "14;15;6;5", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 86.5, 19.397164741270824 ], "wc_strength_and_weaknesses_avg": [ 282.0, 83.38465086573187 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 99.5, 102.87006367257678 ], "wc_summary_review_avg": [ 52.25, 15.54630181104175 ], "wc_review_avg": [ 520.25, 94.5631402820359 ], "wc_reply_reviewers_avg": [ 7.5, 12.99038105676658 ], "wc_reply_authors_avg": [ 2996.75, 346.84749890982346 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 10.0, 4.527692569068709 ], "replies_avg": [ 47, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.16012815380508713, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1232404013033496443&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2;3;0;4", "aff_unique_norm": "Hong Kong Baptist University;Huazhong University of Science and Technology;Shanghai Artificial Intelligence Laboratory;University of Sydney;RIKEN", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.hkbu.edu.hk;http://www.hust.edu.cn;http://www.shailab.org/;https://www.sydney.edu.au;https://www.riken.jp", "aff_unique_abbr": "HKBU;HUST;Shanghai AI Lab;USYD;RIKEN", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;1;0;2", "aff_country_unique": "China;Australia;Japan" }, { "id": "K2d8p6cjSe5", "title": "Less is More: Rethinking Few-Shot Learning and Recurrent Neural Nets", "track": "main", "status": "Reject", "tldr": "", "abstract": "The statistical supervised learning framework assumes an input-output set with a joint probability distribution that is reliably represented by the training dataset. The learner is then required to output a prediction rule learned from the training dataset's input-output pairs. In this work, we provide meaningful insights into the asymptotic equipartition property (AEP) \\citep{Shannon:1948} in the context of machine learning, and illuminate some of its potential ramifications for few-shot learning. We provide theoretical guarantees for reliable learning under the information-theoretic AEP, and for the generalization error with respect to the sample size. We then focus on a highly efficient recurrent neural net (RNN) framework and propose a reduced-entropy algorithm for few-shot learning. We also propose a mathematical intuition for the RNN as an approximation of a sparse coding solver. We verify the applicability, robustness, and computational efficiency of the proposed approach with image deblurring and optical coherence tomography (OCT) speckle suppression. Our experimental results demonstrate significant potential for improving learning models' sample efficiency, generalization, and time complexity, that can therefore be leveraged for practical real-time applications. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Deborah Pereg;Martin Villiger;Brett Bouma;Polina Golland", "authorids": "~Deborah_Pereg1;~Martin_Villiger1;~Brett_Bouma1;~Polina_Golland1", "gender": "F;M;M;", "homepage": ";https://octresearch.org;;https://people.csail.mit.edu/polina", "dblp": ";;;g/PolinaGolland", "google_scholar": ";hgZsMGgAAAAJ;6hq8iLUAAAAJ;", "orcid": "0000-0002-2453-6577;;0000-0002-4531-2206;", "linkedin": ";;;", "or_profile": "~Deborah_Pereg1;~Martin_Villiger1;~Brett_Bouma1;~Polina_Golland1", "aff": "Massachusetts General Hospital, Harvard University;Massachusetts General Hospital, Harvard University;Massachusetts General Hospital, Harvard University;Massachusetts Institute of Technology", "aff_domain": "mgh.harvard.edu;mgh.harvard.edu;mgh.harvard.edu;mit.edu", "position": "Postdoc;Assistant Professor;Full Professor;Full Professor", "bibtex": "@misc{\npereg2023less,\ntitle={Less is More: Rethinking Few-Shot Learning and Recurrent Neural Nets},\nauthor={Deborah Pereg and Martin Villiger and Brett Bouma and Polina Golland},\nyear={2023},\nurl={https://openreview.net/forum?id=K2d8p6cjSe5}\n}", "github": "", "project": "", "reviewers": "WHvp;uFRZ;WSnj;GAhp", "site": "https://openreview.net/forum?id=K2d8p6cjSe5", "pdf_size": 2752300, "recommendation": "3;3;3;5", "confidence": "3;3;3;4", "correctness": "2;1;2;3", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;2;1;3", "wc_summary_paper": "73;64;61;145", "wc_strength_and_weaknesses": "434;50;94;265", "wc_clarity_quality_novelty_and_reproducibility": "18;172;34;21", "wc_summary_review": "28;16;38;42", "wc_review": "553;302;227;473", "wc_reply_reviewers": "1035;0;0;0", "wc_reply_authors": "3373;1519;1703;1581", "reply_reviewers": "8;0;0;0", "reply_authors": "8;3;3;3", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 85.75, 34.491846862700754 ], "wc_strength_and_weaknesses_avg": [ 210.75, 151.86733519753352 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.25, 64.22373003804746 ], "wc_summary_review_avg": [ 31.0, 10.04987562112089 ], "wc_review_avg": [ 388.75, 130.15831706041686 ], "wc_reply_reviewers_avg": [ 258.75, 448.168146458447 ], "wc_reply_authors_avg": [ 2044.0, 770.1486869429824 ], "reply_reviewers_avg": [ 2.0, 3.4641016151377544 ], "reply_authors_avg": [ 4.25, 2.165063509461097 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6466905136947684990&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Harvard University;Massachusetts Institute of Technology", "aff_unique_dep": "Massachusetts General Hospital;", "aff_unique_url": "https://www.harvard.edu;https://web.mit.edu", "aff_unique_abbr": "Harvard;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "K2spEiswXVf", "title": "MALIBO: Meta-Learning for Likelihood-free Bayesian Optimization", "track": "main", "status": "Reject", "tldr": "A Meta-learning method for likelihood-free Bayesian optimization, scalable and robust to different scales across datasets.", "abstract": "Bayesian Optimization (BO) is a popular method to optimize expensive black-box functions. Typically, BO only uses observations from the current task. Recently proposed methods try to warm-start BO by exploiting knowledge from related tasks, yet suffer from scalability issues and sensitivity to heterogeneous scale across multiple datasets. We propose a novel approach to solve these problems by combining a meta-learning technique and a likelihood-free acquisition function. The meta-learning model simultaneously learns the underlying (task-agnostic) data distribution and a latent feature representation for individual tasks. The likelihood-free BO technique has less stringent assumptions about the problems and works with any classification algorithm, making it computation efficient and robust to different scales across tasks. Finally, gradient boosting is used as a residual model on top to adapt to distribution drifts between new and prior tasks, which might otherwise weaken the usefulness of the meta-learned features. Experiments show that the meta-model learns an effective prior for warm-starting optimization algorithms, while being cheap to evaluate and invariant to changes of scale across different datasets.", "keywords": "Bayesian Optimization;Meta-learning", "primary_area": "", "supplementary_material": "", "author": "Jiarong Pan;Stefan Falkner;Felix Berkenkamp;Joaquin Vanschoren", "authorids": "~Jiarong_Pan2;~Stefan_Falkner1;~Felix_Berkenkamp1;~Joaquin_Vanschoren1", "gender": "M;M;M;M", "homepage": ";https://berkenkamp.me;http://www.win.tue.nl/~jvanscho/;", "dblp": "168/1232;168/8558;85/5045;296/3036", "google_scholar": "https://scholar.google.de/citations?user=r7FWJEkAAAAJ;https://scholar.google.ch/citations?user=N_tCEl8AAAAJ;HhDsD9UAAAAJ;3KjbaRUAAAAJ", "orcid": ";;0000-0001-7044-9805;", "linkedin": "stefan-falkner-b4142771;berkenkamp/;;gary-pan/", "or_profile": "~Stefan_Falkner1;~Felix_Berkenkamp1;~Joaquin_Vanschoren1;~Jiarong_Pan1", "aff": "Robert Bosch GmbH;Bosch;Eindhoven University of Technology;Eindhoven University of Technology", "aff_domain": "de.bosch.de;bosch.com;tue.nl;tue.nl", "position": "Research Scientist;Research Scientist;Associate Professor;PhD student", "bibtex": "@misc{\npan2023malibo,\ntitle={{MALIBO}: Meta-Learning for Likelihood-free Bayesian Optimization},\nauthor={Jiarong Pan and Stefan Falkner and Felix Berkenkamp and Joaquin Vanschoren},\nyear={2023},\nurl={https://openreview.net/forum?id=K2spEiswXVf}\n}", "github": "", "project": "", "reviewers": "YYiE;Xo11;Xog3;YSQy", "site": "https://openreview.net/forum?id=K2spEiswXVf", "pdf_size": 7692730, "recommendation": "3;5;6;6", "confidence": "3;3;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;2;4;3", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "56;54;136;80", "wc_strength_and_weaknesses": "337;56;430;116", "wc_clarity_quality_novelty_and_reproducibility": "78;65;61;182", "wc_summary_review": "27;299;184;6", "wc_review": "498;474;811;384", "wc_reply_reviewers": "404;195;118;86", "wc_reply_authors": "1518;994;815;436", "reply_reviewers": "1;1;1;2", "reply_authors": "3;2;2;2", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 81.5, 33.087006513131406 ], "wc_strength_and_weaknesses_avg": [ 234.75, 153.8105571799283 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 96.5, 49.76193324218825 ], "wc_summary_review_avg": [ 129.0, 119.85199205686988 ], "wc_review_avg": [ 541.75, 161.15578643039782 ], "wc_reply_reviewers_avg": [ 200.75, 123.8534920783423 ], "wc_reply_authors_avg": [ 940.75, 389.43508765903465 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.8164965809277259, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17421855921906059459&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Robert Bosch GmbH;Eindhoven University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.bosch.com;https://www.tue.nl", "aff_unique_abbr": "Bosch;TU/e", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "Germany;Netherlands" }, { "id": "K5UfKyHIBS", "title": "Imitation Improvement Learning for Large-scale Capacitated Vehicle Routing Problems", "track": "main", "status": "Withdraw", "tldr": "We propose an imitation learning and a clockwise clustering framework to efficiently solve large-scale capacitated vehicle routing problems", "abstract": "Recent works using deep reinforcement learning (RL) to solve routing problems such as the capacitated vehicle routing problem (CVRP) have focused on improvement learning-based methods, which involve improving a given solution until it becomes near-optimal. Although adequate solutions can be achieved for small problem instances, their efficiency degrades for large-scale ones. In this work, we propose a new improvement learning-based framework based on imitation learning where classical heuristics serve as experts to encourage the policy model to mimic and produce similar and better solutions. Moreover, to improve scalability, we propose Clockwise Clustering, a novel augmented framework for decomposing large-scale CVRP into subproblems by clustering sequentially nodes in clockwise order, and then learning to solve them simultaneously. Our approaches enhance state-of-the-art CVRP solvers while attaining competitive solution quality on several well-known datasets, including real-world instances with sizes up to 30,000 nodes. Our best methods are able to achieve new state-of-the-art solutions for several large instances and generalize to a wide range of CVRP variants and solvers. We also contribute new datasets and results to test the generalizability of our deep RL algorithms.", "keywords": "capacitated vehicle routing;deep reinforcement learning;imitation learning;clockwise clustering", "primary_area": "", "supplementary_material": "/attachment/5602c35abea434b1429d366210323a686b50d967.zip", "author": "Viet The Bui;Tien Anh Mai", "authorids": "~Viet_The_Bui1;~Tien_Anh_Mai1", "gender": "M;M", "homepage": ";https://sites.google.com/view/tien-mai/", "dblp": ";229/2286.html", "google_scholar": "rpPDGm4AAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Viet_The_Bui1;~Tien_Anh_Mai1", "aff": "Singapore Management University;Singapore Management University", "aff_domain": "smu.edu.sg;smu.edu.sg", "position": "Researcher;Assistant Professor", "bibtex": "@misc{\nbui2023imitation,\ntitle={Imitation Improvement Learning for Large-scale Capacitated Vehicle Routing Problems},\nauthor={Viet The Bui and Tien Anh Mai},\nyear={2023},\nurl={https://openreview.net/forum?id=K5UfKyHIBS}\n}", "github": "", "project": "", "reviewers": "BxLj;ezkU;yxcG;fi4j", "site": "https://openreview.net/forum?id=K5UfKyHIBS", "pdf_size": 1022210, "recommendation": "3;3;5;5", "confidence": "5;4;3;4", "correctness": "2;2;3;4", "technical_novelty": "2;1;2;3", "empirical_novelty": "2;1;2;3", "wc_summary_paper": "55;148;74;78", "wc_strength_and_weaknesses": "125;70;100;69", "wc_clarity_quality_novelty_and_reproducibility": "45;437;228;24", "wc_summary_review": "25;19;39;269", "wc_review": "250;674;441;440", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "301;465;106;275", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 88.75, 35.29429840639986 ], "wc_strength_and_weaknesses_avg": [ 91.0, 23.24865587512534 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 183.5, 166.4819810069546 ], "wc_summary_review_avg": [ 88.0, 104.75208828467335 ], "wc_review_avg": [ 451.25, 150.29200743885218 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 286.75, 127.2642428178473 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.7071067811865475, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8Mg_1x_34UAJ:scholar.google.com/&scioq=Imitation+Improvement+Learning+for+Large-scale+Capacitated+Vehicle+Routing+Problems&hl=en&as_sdt=0,33", "gs_version_total": 6, "aff_unique_index": "0;0", "aff_unique_norm": "Singapore Management University", "aff_unique_dep": "", "aff_unique_url": "https://www.smu.edu.sg", "aff_unique_abbr": "SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "id": "K5qR1F14qPE", "title": "Motion-inductive Self-supervised Object Discovery in Videos", "track": "main", "status": "Reject", "tldr": "We propose a motion-inductive model through directly processing consecutive RGB frames to segment the foreground objects and train it by flow reconstruction between pairwise frames, i.e. without any mask annotations.", "abstract": "In this paper, we consider the task of unsupervised object discovery in videos. Previous works have shown promising results via processing optical flows to segment objects. However, taking flow as input brings about two drawbacks. First, flow cannot capture sufficient cues when objects remain static or partially occluded. Second, it is challenging to establish temporal coherency from flow-only input, due to the missing texture information. To tackle these limitations, we propose a model for directly processing consecutive RGB frames, and infer the optical flow between any pair of frames using a layered representation, with the opacity channels being treated as the segmentation. Additionally, to enforce object permanence, we apply temporal consistency loss on the inferred masks from randomly-paired frames, which refer to the motions at different paces, and encourage the model to segment the objects even if they may not move at the current time point. Experimentally, we demonstrate superior performance over previous state-of-the-art methods on three public video segmentation datasets (DAVIS2016, SegTrackv2, and FBMS-59), while being computationally efficient by avoiding the overhead of computing optical flow as input.", "keywords": "Video Object Segmentation;Motion Segmentation;Object Discovery", "primary_area": "", "supplementary_material": "/attachment/0f49e2cef00daddd9fc5ce424ddda5cd9265acd1.zip", "author": "Shuangrui Ding;Weidi Xie;Yabo Chen;Rui Qian;XIAOPENG ZHANG;Hongkai Xiong;Qi Tian", "authorids": "~Shuangrui_Ding1;~Weidi_Xie3;~Yabo_Chen1;~Rui_Qian2;~XIAOPENG_ZHANG7;~Hongkai_Xiong1;~Qi_Tian3", "gender": "M;M;M;M;M;M;M", "homepage": "https://mark12ding.github.io;https://weidixie.github.io;;https://github.com/shvdiwnkozbw;https://sites.google.com/site/zxphistory/;http://min.sjtu.edu.cn;https://www.qitian1987.com/index.html", "dblp": "267/1780;199/1718;96/8624.html;;;21/3569;78/1467-1.html", "google_scholar": "RZOIVhYAAAAJ;https://scholar.google.co.uk/citations?user=Vtrqj4gAAAAJ;6aHx1rgAAAAJ;QehSWiQAAAAJ;Ud6aBAcAAAAJ;bB16iN4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;0000-0003-4552-0029;0000-0002-7252-5047", "linkedin": ";;;;;;", "or_profile": "~Shuangrui_Ding1;~Weidi_Xie3;~Yabo_Chen1;~Rui_Qian2;~XIAOPENG_ZHANG7;~Hongkai_Xiong1;~Qi_Tian3", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;The Chinese University of Hong Kong;Huawei Technologies Ltd.;Shanghai Jiaotong University;Huawei Technologies Ltd.", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;cuhk.edu.hk;huawei.com;sjtu.edu.cn;huawei.com", "position": "MS student;Associate Professor;PhD student;PhD student;Principal Researcher;Full Professor;Principal Researcher", "bibtex": "@misc{\nding2023motioninductive,\ntitle={Motion-inductive Self-supervised Object Discovery in Videos},\nauthor={Shuangrui Ding and Weidi Xie and Yabo Chen and Rui Qian and XIAOPENG ZHANG and Hongkai Xiong and Qi Tian},\nyear={2023},\nurl={https://openreview.net/forum?id=K5qR1F14qPE}\n}", "github": "", "project": "", "reviewers": "HWKn;fGvA;NSCm;7qqQ", "site": "https://openreview.net/forum?id=K5qR1F14qPE", "pdf_size": 9708238, "recommendation": "3;5;5;8", "confidence": "4;4;3;4", "correctness": "2;3;3;4", "technical_novelty": "2;2;2;4", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "84;134;88;93", "wc_strength_and_weaknesses": "261;297;436;251", "wc_clarity_quality_novelty_and_reproducibility": "8;185;60;37", "wc_summary_review": "12;33;54;38", "wc_review": "365;649;638;419", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "187;421;489;366", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;2", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 99.75, 20.029665498954294 ], "wc_strength_and_weaknesses_avg": [ 311.25, 74.02828851189253 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 72.5, 67.51481318940311 ], "wc_summary_review_avg": [ 34.25, 15.006248698458919 ], "wc_review_avg": [ 517.75, 127.25049115818767 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 365.75, 112.02092438468806 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.08084520834544431, "corr_recommendation_correctness": 0.9901475429766743, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1022444508670503683&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;1;2;0;2", "aff_unique_norm": "Shanghai Jiao Tong University;Chinese University of Hong Kong;Huawei", "aff_unique_dep": ";;Huawei Technologies", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.cuhk.edu.hk;https://www.huawei.com", "aff_unique_abbr": "SJTU;CUHK;Huawei", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "K5si8PjaSy", "title": "Explainable Artificial Intelligence: Reaping the Fruits of Decision Trees", "track": "main", "status": "Reject", "tldr": "This work assessed node weight patterns toward explaining artificial intelligence systems.", "abstract": "The recent push for explainable artificial intelligence (XAI) has given rise to extensive work toward understanding the inner workings of neural networks. Much of that work, however, has focused on manipulating input data feeding the network to assess their effect on network output. It is shown in this study that XAI can benefit from investigating the network node, the most fundamental unit of neural networks. Whereas studies on XAI have mostly benefited from a focus on manipulating input data, assessing patterns in node weights may prove equally beneficial, if not more significant, especially when realizing that weight values may not be as random as previously thought. A manipulated, a contrived, and a real dataset were used in this study. Datasets were run on convolutional and deep neural network models. Node rank stability was the central construct to investigate neuronal patterns in this study. Rank stability was defined as the number of epochs wherein nodes held their rank in terms of weight value compared to their rank at the last epoch, when the model reached convergence, or stability (defined in this study as accuracy $\\geq$ 0.90). Findings indicated that neural networks behaved like a decision tree, in that rank stability increased as weight absolute values increased. Decision tree behavior may assist in more efficient pruning algorithms, which may produce distilled models simpler to explain to technical and non-technical audiences.", "keywords": "Explainable artificial intelligence;XAI;decision trees;explainability;neural networks;pruning", "primary_area": "", "supplementary_material": "", "author": "Ralf Peter Riedel;Aviv Segev", "authorids": "~Ralf_Peter_Riedel1;segev@southalabama.edu", "gender": "M;", "homepage": "https://www.southalabama.edu/colleges/soc/;", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Ralf_Peter_Riedel1;segev@southalabama.edu", "aff": "University of South Alabama;", "aff_domain": "southalabama.edu;", "position": "PhD student;", "bibtex": "@misc{\nriedel2023explainable,\ntitle={Explainable Artificial Intelligence: Reaping the Fruits of Decision Trees},\nauthor={Ralf Peter Riedel and Aviv Segev},\nyear={2023},\nurl={https://openreview.net/forum?id=K5si8PjaSy}\n}", "github": "", "project": "", "reviewers": "cBc4;b5kq;ZHzr;5BfE", "site": "https://openreview.net/forum?id=K5si8PjaSy", "pdf_size": 259395, "recommendation": "1;3;3;5", "confidence": "5;3;3;3", "correctness": "2;2;2;4", "technical_novelty": "1;2;1;2", "empirical_novelty": "2;2;3;0", "wc_summary_paper": "33;92;130;44", "wc_strength_and_weaknesses": "99;933;427;57", "wc_clarity_quality_novelty_and_reproducibility": "42;129;54;30", "wc_summary_review": "141;75;86;35", "wc_review": "315;1229;697;166", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 1.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 74.75, 38.854697270729055 ], "wc_strength_and_weaknesses_avg": [ 379.0, 350.4654048547445 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.75, 38.615896985568 ], "wc_summary_review_avg": [ 84.25, 37.86406607853942 ], "wc_review_avg": [ 601.75, 410.6758910625263 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XhzYOufBJxQJ:scholar.google.com/&scioq=Explainable+Artificial+Intelligence:+Reaping+the+Fruits+of+Decision+Trees&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of South Alabama", "aff_unique_dep": "", "aff_unique_url": "https://www.southalabama.edu", "aff_unique_abbr": "USA", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "K75z1mX4VTo", "title": "An Empirical Study on the Efficacy of Deep Active Learning Techniques", "track": "main", "status": "Reject", "tldr": "Our paper provides a comprehensive empirical study of existing deep active learning methods.", "abstract": "Deep Active Learning (DAL) has been advocated as a promising method to reduce labeling costs in supervised learning. However, existing evaluations of DAL methods are based on different settings, and their results are controversial. To tackle this issue, this paper comprehensively evaluates 19 existing DAL methods in a uniform setting, including traditional fully-\\underline{s}upervised \\underline{a}ctive \\underline{l}earning (SAL) strategies and emerging \\underline{s}emi-\\underline{s}upervised \\underline{a}ctive \\underline{l}earning (SSAL) techniques. We have several non-trivial findings. First, most SAL methods cannot achieve higher accuracy than random selection. Second, semi-supervised training brings significant performance improvement compared to pure SAL methods. Third, performing data selection in the SSAL setting can achieve a significant and consistent performance improvement, especially with abundant unlabeled data. Our findings produce the following guidance for practitioners: one should (i) apply SSAL as early as possible and (ii) collect more unlabeled data whenever possible, for better model performance. We will release our code upon acceptance.", "keywords": "deep neural networks;active learning;semi-supervised learning", "primary_area": "", "supplementary_material": "", "author": "YU LI;Muxi Chen;Yannan Liu;Daojing He;Qiang Xu", "authorids": "~YU_LI10;~Muxi_Chen1;~Yannan_Liu1;~Daojing_He1;~Qiang_Xu1", "gender": "Not Specified;M;M;M;M", "homepage": "http://liyu.one;https://github.com/mixiancmx;https://liuyannan.github.io/;http://faculty.hitsz.edu.cn/hedaojing;https://github.com/cure-lab", "dblp": "34/2997-7;316/2877;120/1767;60/7270;43/1230-1", "google_scholar": "M0zhrM8AAAAJ;;;;https://scholar.google.com.tw/citations?user=eSiKPqUAAAAJ", "orcid": ";;;0000-0002-3820-8128;", "linkedin": ";;;;", "or_profile": "~YU_LI10;~Muxi_Chen1;~Yannan_Liu1;~Daojing_He1;~Qiang_Xu1", "aff": "Harbin Institute of Technology (Shen Zhen);The Chinese University of Hong Kong;ByteDance;Harbin Institute of Technology;The Chinese University of Hong Kong", "aff_domain": "hit.edu.cn;cse.cuhk.edu.hk;bytedance.com;hit.edu.cn;cuhk.edu.hk", "position": "Assistant Professor;PhD student;Researcher;Full Professor;Full Professor", "bibtex": "@misc{\nli2023an,\ntitle={An Empirical Study on the Efficacy of Deep Active Learning Techniques},\nauthor={YU LI and Muxi Chen and Yannan Liu and Daojing He and Qiang Xu},\nyear={2023},\nurl={https://openreview.net/forum?id=K75z1mX4VTo}\n}", "github": "", "project": "", "reviewers": "aA7n;9BF2;5Vo7;ysmb", "site": "https://openreview.net/forum?id=K75z1mX4VTo", "pdf_size": 1285026, "recommendation": "3;5;5;6", "confidence": "4;3;3;4", "correctness": "3;3;3;3", "technical_novelty": "2;3;2;2", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "33;62;90;255", "wc_strength_and_weaknesses": "495;466;117;301", "wc_clarity_quality_novelty_and_reproducibility": "55;50;61;130", "wc_summary_review": "33;89;74;86", "wc_review": "616;667;342;772", "wc_reply_reviewers": "14;0;0;0", "wc_reply_authors": "780;811;329;461", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 110.0, 86.10749096333025 ], "wc_strength_and_weaknesses_avg": [ 344.75, 150.88136896250643 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 74.0, 32.56531897586756 ], "wc_summary_review_avg": [ 70.5, 22.36626924634504 ], "wc_review_avg": [ 599.25, 158.81652149571846 ], "wc_reply_reviewers_avg": [ 3.5, 6.06217782649107 ], "wc_reply_authors_avg": [ 595.25, 205.90820163364063 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.2294157338705618, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3Ims3uu0zEMJ:scholar.google.com/&scioq=An+Empirical+Study+on+the+Efficacy+of+Deep+Active+Learning+Techniques&hl=en&as_sdt=0,7", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;1", "aff_unique_norm": "Harbin Institute of Technology;Chinese University of Hong Kong;ByteDance", "aff_unique_dep": ";;", "aff_unique_url": "http://www.hit.edu.cn/;https://www.cuhk.edu.hk;https://www.bytedance.com", "aff_unique_abbr": "HIT;CUHK;ByteDance", "aff_campus_unique_index": "0;1;3;1", "aff_campus_unique": "Shenzhen;Hong Kong SAR;;Harbin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Agree to Disagree: Diversity through Disagreement for Better Transferability", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11047", "id": "K7CbYQbyYhY", "poster": "", "openreview": "https://openreview.net/forum?id=K7CbYQbyYhY", "slides": "https://iclr.cc/virtual/2023/poster/11047", "video": "https://iclr.cc/virtual/2023/poster/11047", "author_site": "Matteo Pagliardini, Martin Jaggi, Fran\u00e7ois Fleuret, Sai Karimireddy", "tldr": "", "abstract": "Gradient-based learning algorithms have an implicit \\emph{simplicity bias} which in effect can limit the diversity of predictors being sampled by the learning procedure. This behavior can hinder the transferability of trained models by (i) favoring the learning of simpler but spurious features --- present in the training data but absent from the test data --- and (ii) by only leveraging a small subset of predictive features. Such an effect is especially magnified when the test distribution does not exactly match the train distribution---referred to as the Out of Distribution (OOD) generalization problem. However, given only the training data, it is not always possible to apriori assess if a given feature is spurious or transferable. Instead, we advocate for learning an ensemble of models which capture a diverse set of predictive features. Towards this, we propose a new algorithm D-BAT (Diversity-By-disAgreement Training), which enforces agreement among the models on the training data, but disagreement on the OOD data. We show how D-BAT naturally emerges from the notion of generalized discrepancy, as well as demonstrate in multiple experiments how the proposed method can mitigate shortcut-learning, enhance uncertainty and OOD detection, as well as improve transferability.", "keywords": "OOD generalization;Diversity;Ensemble", "primary_area": "", "supplementary_material": "", "author": "Matteo Pagliardini;Martin Jaggi;Fran\u00e7ois Fleuret;Sai Praneeth Karimireddy", "authorids": "~Matteo_Pagliardini1;~Martin_Jaggi1;~Fran\u00e7ois_Fleuret2;~Sai_Praneeth_Karimireddy1", "gender": "M;M;M;M", "homepage": ";https://mlo.epfl.ch;https://spkreddy.org;https://fleuret.org/francois/", "dblp": "140/7789;17/4402;217/3342;90/5265", "google_scholar": "https://scholar.google.ch/citations?user=FXacC3oAAAAJ;https://scholar.google.ch/citations?user=r1TJBr8AAAAJ;wKJeOQoAAAAJ;https://scholar.google.ch/citations?user=Bj1tRlsAAAAJ", "orcid": ";0000-0003-1579-5558;;0000-0001-9457-7393", "linkedin": ";;;francois-fleuret/", "or_profile": "~Matteo_Pagliardini1;~Martin_Jaggi1;~Sai_Praneeth_Karimireddy1;~Francois_Fleuret1", "aff": "Swiss Federal Institute of Technology Lausanne;EPFL;University of California, Berkeley;University of Geneva", "aff_domain": "epfl.ch;epfl.ch;berkeley.edu;unige.ch", "position": "PhD student;Associate Professor;Postdoc;Full Professor", "bibtex": "@inproceedings{\npagliardini2023agree,\ntitle={Agree to Disagree: Diversity through Disagreement for Better Transferability},\nauthor={Matteo Pagliardini and Martin Jaggi and Fran{\\c{c}}ois Fleuret and Sai Praneeth Karimireddy},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=K7CbYQbyYhY}\n}", "github": "", "project": "", "reviewers": "3kp3;Yjzs;nyu8;Xnkg", "pdf_size": 1999415, "recommendation": "8;8;8;8", "confidence": "4;4;3;4", "correctness": "3;3;3;3", "technical_novelty": "3;4;3;3", "empirical_novelty": "4;4;3;3", "wc_summary_paper": "94;58;43;41", "wc_strength_and_weaknesses": "59;155;107;112", "wc_clarity_quality_novelty_and_reproducibility": "44;20;50;496", "wc_summary_review": "365;32;24;90", "wc_review": "562;265;224;739", "wc_reply_reviewers": "7;5;0;8", "wc_reply_authors": "516;93;160;602", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 59.0, 21.24852936087578 ], "wc_strength_and_weaknesses_avg": [ 108.25, 34.0101087913579 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 152.5, 198.63723215953246 ], "wc_summary_review_avg": [ 127.75, 139.32403776807504 ], "wc_review_avg": [ 447.5, 212.92075990846922 ], "wc_reply_reviewers_avg": [ 5.0, 3.082207001484488 ], "wc_reply_authors_avg": [ 342.75, 219.65811503333993 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 81, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13294421345472937989&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=K7CbYQbyYhY", "email": "epfl.ch;epfl.ch;berkeley.edu;unige.ch", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;EPFL;University of California, Berkeley;University of Geneva", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch;https://www.berkeley.edu;https://www.unige.ch", "aff_unique_abbr": "EPFL;EPFL;UC Berkeley;UNIGE", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Lausanne;;Berkeley", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Switzerland;United States" }, { "id": "K7YxdCYmd6w", "title": "End-to-End Speech Synthesis Based on Deep Conditional Schr\u00f6dinger Bridges", "track": "main", "status": "Reject", "tldr": "", "abstract": " Speech synthesis plays an important role in human-computer interaction. Existing methods mainly employ traditional two-stage pipeline, e.g. text-to-speech and vocoder. In this paper, we propose a system called Schr\\\"on, which can generate speech waves in an end-to-end mamaner by solving Schr\\\"odinger bridge problems (SBP). In order to make SBP suitable for speech synthesis, we generalize SBP from two aspects. The first generalization makes it possible to accept condition variables, which are used to control the generated speech, and the second generalization allows it to handle variable-size input. Besides these two generalizations, we propose two techniques to fill the large information gap between text and speech waveforms for generating high-quality voice. The first technique is to use a text-mel joint representation as the conditional input of the conditional SBP. The second one is to use a branch network for the generation of mel scores as a regularization, so that the text features will not be degenerated. Experimental results show that Schr\\\"on achieves state-of-the-art MOS of 4.52 on public data set LJSpeech. Audio samples are available at https://schron.github.io/.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Shoule Wu;Ziqiang Shi", "authorids": "~Shoule_Wu1;~Ziqiang_Shi1", "gender": "M;M", "homepage": ";https://shiziqiang.github.io/", "dblp": ";91/8676", "google_scholar": ";s_sc6dQAAAAJ", "orcid": ";0000-0002-3105-6213", "linkedin": "shoule-wu-119868212/;ziqiang-shi-0394508a/", "or_profile": "~Shoule_Wu1;~Ziqiang_Shi1", "aff": "Yangzhou University;FUJITSU RESEARCH & DEVELOPMENT CENTER CO.,LTD.", "aff_domain": "yzu.edu.cn;fujitsu.com", "position": "Lecturer;Researcher", "bibtex": "@misc{\nwu2023endtoend,\ntitle={End-to-End Speech Synthesis Based on Deep Conditional Schr\\\"odinger Bridges},\nauthor={Shoule Wu and Ziqiang Shi},\nyear={2023},\nurl={https://openreview.net/forum?id=K7YxdCYmd6w}\n}", "github": "", "project": "", "reviewers": "E7xt;8t3V;HSkn;yCEE", "site": "https://openreview.net/forum?id=K7YxdCYmd6w", "pdf_size": 1803882, "recommendation": "1;3;3;5", "confidence": "2;3;4;2", "correctness": "2;1;2;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "3;1;2;2", "wc_summary_paper": "74;65;115;46", "wc_strength_and_weaknesses": "81;394;338;110", "wc_clarity_quality_novelty_and_reproducibility": "197;24;17;79", "wc_summary_review": "38;23;21;280", "wc_review": "390;506;491;515", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "correctness_avg": [ 2.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 75.0, 25.20912533191106 ], "wc_strength_and_weaknesses_avg": [ 230.75, 137.07548103143756 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 79.25, 72.09845698764988 ], "wc_summary_review_avg": [ 90.5, 109.60497251493656 ], "wc_review_avg": [ 475.5, 50.102395152327794 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:C7cTQsyKVt4J:scholar.google.com/&scioq=End-to-End+Speech+Synthesis+Based+on+Deep+Conditional+Schr%C3%B6dinger+Bridges&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Yangzhou University;Fujitsu Research & Development Center", "aff_unique_dep": ";", "aff_unique_url": "https://www.yzu.edu.cn;https://www.fujitsu.com/global/", "aff_unique_abbr": "YZU;FRDC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;Japan" }, { "id": "K8oz8DyuJD", "title": "Supernet Training for Federated Image Classification Under System Heterogeneity", "track": "main", "status": "Reject", "tldr": "We propose a novel framework to tackle issues of data-heterogeneity and model-heterogeneity simultaneously by referring to supernet training.", "abstract": "Efficient deployment of deep neural networks across many devices and resource constraints, particularly on edge devices, is one of the most challenging problems in the presence of data-privacy preservation issues. Conventional approaches have evolved to either improve a single global model while keeping each local heterogeneous training data decentralized (i.e. data heterogeneity; Federated Learning (FL)) or to train an overarching network that supports diverse architectural settings to address heterogeneous systems equipped with different computational capabilities (i.e. system heterogeneity; Neural Architecture Search). However, few studies have considered both directions simultaneously. This paper proposes the federation of supernet training (FedSup) framework to consider both scenarios simultaneously, i.e., where clients send and receive a supernet that contains all possible architectures sampled from itself. The approach is inspired by observing that averaging parameters during model aggregation for FL is similar to weight-sharing in supernet training. Thus, the proposed FedSup framework combines a weight-sharing approach widely used for training single shot models with FL averaging (FedAvg). Furthermore, we develop an efficient algorithm (E-FedSup) by sending the sub-model to clients on the broadcast stage to reduce communication costs and training overhead, including several strategies to enhance supernet training in the FL environment. We verify the proposed approach with extensive empirical evaluations. The resulting framework also ensures data and model heterogeneity robustness on several standard benchmarks.", "keywords": "Federated Learning;Image Classification;Supernet Training;System Heterogeneity", "primary_area": "", "supplementary_material": "/attachment/f18913afcd14b120f2c1d29430c88cd00133e345.zip", "author": "Taehyeon Kim;Se-Young Yun", "authorids": "~Taehyeon_Kim1;~Se-Young_Yun1", "gender": "M;M", "homepage": "https://taehyeon.oopy.io/;https://fbsqkd.github.io", "dblp": ";23/8862", "google_scholar": "https://scholar.google.co.kr/citations?user=wDEaSpwAAAAJ;X_IAjb8AAAAJ", "orcid": ";", "linkedin": "taehyeon-k-6a1239207/;seyoung-yun-395130ab/", "or_profile": "~Taehyeon_Kim1;~Se-Young_Yun1", "aff": "Dynamo AI;KAIST", "aff_domain": "dynamo.ai;kaist.ac.kr", "position": "PhD Intern;Assistant Professor", "bibtex": "@misc{\nkim2023supernet,\ntitle={Supernet Training for Federated Image Classification Under System Heterogeneity},\nauthor={Taehyeon Kim and Se-Young Yun},\nyear={2023},\nurl={https://openreview.net/forum?id=K8oz8DyuJD}\n}", "github": "", "project": "", "reviewers": "Zkr8;zfru;jbFv", "site": "https://openreview.net/forum?id=K8oz8DyuJD", "pdf_size": 1555691, "recommendation": "5;6;6", "confidence": "2;3;2", "correctness": "2;3;4", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "29;38;71", "wc_strength_and_weaknesses": "239;232;82", "wc_clarity_quality_novelty_and_reproducibility": "55;58;8", "wc_summary_review": "78;27;33", "wc_review": "401;355;194", "wc_reply_reviewers": "0;218;0", "wc_reply_authors": "1211;1270;371", "reply_reviewers": "0;1;0", "reply_authors": "4;5;2", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 46.0, 18.05547008526779 ], "wc_strength_and_weaknesses_avg": [ 184.33333333333334, 72.41700230071818 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.333333333333336, 22.895899681432528 ], "wc_summary_review_avg": [ 46.0, 22.759613353482084 ], "wc_review_avg": [ 316.6666666666667, 88.74808292139173 ], "wc_reply_reviewers_avg": [ 72.66666666666667, 102.76618553244491 ], "wc_reply_authors_avg": [ 950.6666666666666, 410.5933376089886 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 1.247219128924647 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.8660254037844385, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4813321151447243253&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Dynamo AI;Korea Advanced Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.dynamo.ai;https://www.kaist.ac.kr", "aff_unique_abbr": ";KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;South Korea" }, { "title": "Symmetric Pruning in Quantum Neural Networks", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11285", "id": "K96AogLDT2K", "poster": "", "openreview": "https://openreview.net/forum?id=K96AogLDT2K", "slides": "https://iclr.cc/virtual/2023/poster/11285", "video": "https://iclr.cc/virtual/2023/poster/11285", "author_site": "Xinbiao Wang, Junyu Liu, Tongliang Liu, Yong Luo, Yuxuan Du, Dacheng Tao", "tldr": "We prove how the symmetry enhances the training performance of QNNs and then devise an efficient symmetric pruning scheme to distill a symmetric ansatz from an over-parameterized and asymmetric ansatz.", "abstract": "Many fundamental properties of a quantum system are captured by its Hamiltonian and ground state. Despite the significance, ground states preparation (GSP) is classically intractable for large-scale Hamiltonians. Quantum neural networks (QNNs), which exert the power of modern quantum machines, have emerged as a leading protocol to conquer this issue. As such, the performance enhancement of QNNs becomes the core in GSP. Empirical evidence showed that QNNs with handcraft symmetric ans\\\"atze generally experience better trainability than those with asymmetric ans\\\"atze, while theoretical explanations remain vague. To fill this knowledge gap, here we propose the effective quantum neural tangent kernel (EQNTK) and connect this concept with over-parameterization theory to quantify the convergence of QNNs towards the global optima. We uncover that the advance of symmetric ans\\\"atze attributes to their large EQNTK value with low effective dimension, which requests few parameters and quantum circuit depth to reach the over-parameterization regime permitting a benign loss landscape and fast convergence. Guided by EQNTK, we further devise a symmetric pruning (SP) scheme to automatically tailor a symmetric ansatz from an over-parameterized and asymmetric one to greatly improve the performance of QNNs when the explicit symmetry information of Hamiltonian is unavailable. Extensive numerical simulations are conducted to validate the analytical results of EQNTK and the effectiveness of SP. ", "keywords": "quantum neural networks;symmetry;pruning;quantum neural tangent kernel;effective dimension", "primary_area": "", "supplementary_material": "", "author": "Xinbiao Wang;Junyu Liu;Tongliang Liu;Yong Luo;Yuxuan Du;Dacheng Tao", "authorids": "~Xinbiao_Wang1;~Junyu_Liu4;~Tongliang_Liu1;~Yong_Luo2;~Yuxuan_Du2;~Dacheng_Tao1", "gender": "M;M;M;M;M;", "homepage": ";https://sites.google.com/view/junyuliu/main;https://tongliang-liu.github.io/;;https://github.com/yuxuan-du/Yuxuan-Du.github.io;", "dblp": ";;150/6667;57/5272-2.html;;", "google_scholar": "YR3JifsAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;zb1oVGIAAAAJ;https://scholar.google.com.au/citations?user=50sFkzIAAAAJ;", "orcid": "0000-0002-9898-820X;;;;0000-0002-1193-9756;", "linkedin": ";liujunyu123456;;;;", "or_profile": "~Xinbiao_Wang1;~Junyu_Liu4;~Tongliang_Liu1;~Yong_Luo2;~Yuxuan_Du2;~Dacheng_Tao1", "aff": "Wuhan University;University of Chicago;University of Sydney;Wuhan University;JD.com;", "aff_domain": "whu.edu.cn;uchicago.edu;sydney.edu.au;whu.edu.cn;jd.com;", "position": "PhD student;Postdoc;Lecturer;Professor;Researcher;", "bibtex": "@inproceedings{\nwang2023symmetric,\ntitle={Symmetric Pruning in Quantum Neural Networks},\nauthor={Xinbiao Wang and Junyu Liu and Tongliang Liu and Yong Luo and Yuxuan Du and Dacheng Tao},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=K96AogLDT2K}\n}", "github": "", "project": "", "reviewers": "WF6w;1zgw;feix", "pdf_size": 1783456, "recommendation": "8;8;8", "confidence": "4;4;2", "correctness": "4;4;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "129;87;62", "wc_strength_and_weaknesses": "102;294;115", "wc_clarity_quality_novelty_and_reproducibility": "30;9;53", "wc_summary_review": "29;25;60", "wc_review": "290;415;290", "wc_reply_reviewers": "0;0;11", "wc_reply_authors": "128;209;799", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 92.66666666666667, 27.644569488820444 ], "wc_strength_and_weaknesses_avg": [ 170.33333333333334, 87.60644319277486 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.666666666666668, 17.96910929592474 ], "wc_summary_review_avg": [ 38.0, 15.641824275533422 ], "wc_review_avg": [ 331.6666666666667, 58.92556509887896 ], "wc_reply_reviewers_avg": [ 3.6666666666666665, 5.185449728701348 ], "wc_reply_authors_avg": [ 378.6666666666667, 299.05443577308057 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1525356550939804957&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=K96AogLDT2K", "email": "whu.edu.cn;uchicago.edu;sydney.edu.au;whu.edu.cn;jd.com;", "author_num": 6, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Wuhan University;University of Chicago;University of Sydney;JD.com", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.whu.edu.cn/;https://www.uchicago.edu;https://www.sydney.edu.au;https://www.jd.com", "aff_unique_abbr": "WHU;UChicago;USYD;JD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "China;United States;Australia" }, { "id": "K9DghwWLWF3", "title": "Improve the Adaptation Process by Reasoning From Failed and Successful Cases", "track": "main", "status": "Withdraw", "tldr": "This work presents a new approach to the adaptation process in the case-based reasoning paradigm", "abstract": "Usually, existing works on adaptation in reasoning-based systems assume that the case base holds only successful cases, i.e., cases having solutions believed to be appropriate for the corresponding problems. However, in practice, the case base could hold failed cases, resulting from an earlier adaptation process but discarded by the revision process. Not considering failed cases would be missing an interesting opportunity to learn more knowledge for improving the adaptation process.\nThis paper proposes a novel approach to the adaptation process in the case-based reasoning paradigm, based on an improved barycentric approach by considering the failed cases.The experiment performed on real data demonstrates the benefit of the method considering the failed cases in the adaptation process compared to the classical ones that ignore them, thus, improving the performance of the case-based reasoning system.", "keywords": "case-based reasoning;adaptation;failed cases;artificial potential field", "primary_area": "", "supplementary_material": "", "author": "Boulmaiz Fateh;Reignier Patrick Reignier;Ploix Stephane", "authorids": "~Boulmaiz_Fateh1;patrick.reignier@univ-grenoble-alpes.fr;stephane.ploix@grenoble-inp.fr", "gender": "M;;", "homepage": ";;", "dblp": ";;", "google_scholar": "dUwyqbUAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Boulmaiz_Fateh1;patrick.reignier@univ-grenoble-alpes.fr;stephane.ploix@grenoble-inp.fr", "aff": "St. Johns River State College;;", "aff_domain": "sjrstate.edu;;", "position": "PhD student;;", "bibtex": "@misc{\nfateh2023improve,\ntitle={Improve the Adaptation Process by Reasoning From Failed and Successful Cases},\nauthor={Boulmaiz Fateh and Reignier Patrick Reignier and Ploix Stephane},\nyear={2023},\nurl={https://openreview.net/forum?id=K9DghwWLWF3}\n}", "github": "", "project": "", "reviewers": "Jp3g;pkR2;exCo;MeGq", "site": "https://openreview.net/forum?id=K9DghwWLWF3", "pdf_size": 1938278, "recommendation": "3;5;5;5", "confidence": "2;2;2;3", "correctness": "2;3;3;2", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "48;63;16;62", "wc_strength_and_weaknesses": "66;58;190;348", "wc_clarity_quality_novelty_and_reproducibility": "50;141;15;42", "wc_summary_review": "46;26;42;49", "wc_review": "210;288;263;501", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 47.25, 18.9917745353087 ], "wc_strength_and_weaknesses_avg": [ 165.5, 117.64671691126786 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.0, 47.41835087811469 ], "wc_summary_review_avg": [ 40.75, 8.870597499605086 ], "wc_review_avg": [ 315.5, 110.73955932727925 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7YmUi-7OT74J:scholar.google.com/&scioq=Improve+the+Adaptation+Process+by+Reasoning+From+Failed+and+Successful+Cases&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "St. Johns River State College", "aff_unique_dep": "", "aff_unique_url": "https://www.sjrstate.edu/", "aff_unique_abbr": "SJRSC", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Active Image Indexing", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11510", "id": "K9RHxPpjn2", "poster": "/media/PosterPDFs/ICLR%202023/11510.png?t=1681915695.852958", "openreview": "https://openreview.net/forum?id=K9RHxPpjn2", "slides": "https://iclr.cc/virtual/2023/poster/11510", "video": "https://iclr.cc/virtual/2023/poster/11510", "author_site": "Pierre Fernandez, Matthijs Douze, Herve Jegou, Teddy Furon", "tldr": "In the context of image tracing, instead of watermarking an image with an ID, we slightly modify it to make its representation more indexing-friendly, which makes plain content-based indexing much more robust (62% \u2192 100% accuracy for some settings).", "abstract": "Image copy detection and retrieval from large databases leverage two components. First, a neural network maps an image to a vector representation, that is relatively robust to various transformations of the image. Second, an efficient but approximate similarity search algorithm trades scalability (size and speed) against quality of the search, thereby introducing a source of error. \nThis paper improves the robustness of image copy detection with active indexing, that optimizes the interplay of these two components. We reduce the quantization loss of a given image representation by making imperceptible changes to the image before its release. The loss is back-propagated through the deep neural network back to the image, under perceptual constraints. These modifications make the image more retrievable. \nOur experiments show that the retrieval and copy detection of activated images is significantly improved. For instance, activation improves by $+40\\%$ the Recall1@1 on various image transformations, and for several popular indexing structures based on product quantization and locality sensitivity hashing.", "keywords": "Indexing;Copy detection;Image similarity search;Watermarking", "primary_area": "", "supplementary_material": "", "author": "Pierre Fernandez;Matthijs Douze;Herve Jegou;Teddy Furon", "authorids": "~Pierre_Fernandez1;~Matthijs_Douze1;~Herve_Jegou1;~Teddy_Furon1", "gender": "M;;Unspecified;M", "homepage": "https://pierrefdz.github.io/;https://research.facebook.com/people/douze-matthijs/;;http://people.rennes.inria.fr/Teddy.Furon", "dblp": "309/5729;64/5801;19/2115;00/3862", "google_scholar": "osCX1YQAAAAJ;yZmnFbkAAAAJ;1lcY2z4AAAAJ;https://scholar.google.com/citations?hl=fr", "orcid": "0000-0003-3890-2248;;;0000-0002-1565-765X", "linkedin": "pierrefdz/;;;", "or_profile": "~Pierre_Fernandez1;~Matthijs_Douze1;~Herve_Jegou1;~Teddy_Furon1", "aff": "Universit\u00e9 Rennes 1;Meta;Meta;INRIA", "aff_domain": "univ-rennes1.fr;meta.com;fb.com;inria.fr", "position": "PhD student;researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nfernandez2023active,\ntitle={Active Image Indexing},\nauthor={Pierre Fernandez and Matthijs Douze and Herve Jegou and Teddy Furon},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=K9RHxPpjn2}\n}", "github": "", "project": "", "reviewers": "pXd8;DYH1;gK1Y", "pdf_size": 9853716, "recommendation": "6;6;8", "confidence": "4;3;4", "correctness": "4;3;4", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "68;82;123", "wc_strength_and_weaknesses": "115;168;146", "wc_clarity_quality_novelty_and_reproducibility": "200;25;31", "wc_summary_review": "74;67;62", "wc_review": "457;342;362", "wc_reply_reviewers": "0;0;98", "wc_reply_authors": "609;472;212", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 91.0, 23.338094752285727 ], "wc_strength_and_weaknesses_avg": [ 143.0, 21.740898478827106 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 85.33333333333333, 81.11856891133017 ], "wc_summary_review_avg": [ 67.66666666666667, 4.9216076867444665 ], "wc_review_avg": [ 387.0, 50.16638981097471 ], "wc_reply_reviewers_avg": [ 32.666666666666664, 46.197643037521104 ], "wc_reply_authors_avg": [ 431.0, 164.64709735269147 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.49999999999999983, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10473374000482346363&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=K9RHxPpjn2", "email": "univ-rennes1.fr;meta.com;fb.com;inria.fr", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Universit\u00e9 Rennes 1;Meta;INRIA", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.univ-rennes1.fr;https://meta.com;https://www.inria.fr", "aff_unique_abbr": "UR1;Meta;INRIA", "aff_campus_unique_index": "0", "aff_campus_unique": "Rennes;", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "France;United States" }, { "id": "KAB29urre4C", "title": "Style Spectroscope: Improve Interpretability and Controllability through Fourier Analysis", "track": "main", "status": "Reject", "tldr": "", "abstract": "Universal style transfer (UST) infuses styles from arbitrary reference images into content images. Existing methods, while enjoying many practical successes, are unable of explaining experimental observations, including different performances of UST algorithms in preserving the spatial structure of content images. In addition, methods are limited to cumbersome global controls on stylization, so that they require additional spatial masks for desired stylization. In this work, we provide a systematic Fourier analysis on a general framework for UST. We present an equivalent form of the framework in the frequency domain. The form implies that existing algorithms treat all frequency components and pixels of feature maps equally, except for the zero-frequency component. We connect Fourier amplitude and phase with Gram matrices and a content reconstruction loss in style transfer, respectively. Based on such equivalence and connections, we can thus interpret different structure preservation behaviors between algorithms with Fourier phase. Given the interpretations we have, we propose two manipulations in practice for structure preservation and desired stylization. Both qualitative and quantitative experiments demonstrate the competitive performance of our method against the state-of-the-art methods. We also conduct experiments to demonstrate (1) the abovementioned equivalence, (2) the interpretability based on Fourier amplitude and phase and (3) the controllability associated with frequency components.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Zhiyu Jin;Xuli Shen;Bin Li;Xiangyang Xue", "authorids": "~Zhiyu_Jin2;~Xuli_Shen1;~Bin_Li4;~Xiangyang_Xue2", "gender": "M;M;M;M", "homepage": "https://github.com/Dimlife;;https://aimpressionist.github.io/publications;http://homepage.fudan.edu.cn//xyxue", "dblp": ";277/5252;89/6764-15;84/3791", "google_scholar": ";OHUOaj4AAAAJ;8t97oL8AAAAJ;", "orcid": ";;0000-0002-9633-0033;0000-0002-4897-9209", "linkedin": ";%E6%97%AD%E7%AB%8B-%E6%B2%88-2074b2124/;;", "or_profile": "~Zhiyu_Jin2;~Xuli_Shen1;~Bin_Li4;~Xiangyang_Xue2", "aff": "Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "MS student;PhD student;Associate Professor;Full Professor", "bibtex": "@misc{\njin2023style,\ntitle={Style Spectroscope: Improve Interpretability and Controllability through Fourier Analysis},\nauthor={Zhiyu Jin and Xuli Shen and Bin Li and Xiangyang Xue},\nyear={2023},\nurl={https://openreview.net/forum?id=KAB29urre4C}\n}", "github": "", "project": "", "reviewers": "niHJ;vcHP;pCyN;eNXa", "site": "https://openreview.net/forum?id=KAB29urre4C", "pdf_size": 33758953, "recommendation": "3;3;5;8", "confidence": "4;5;3;4", "correctness": "2;3;3;3", "technical_novelty": "1;2;2;4", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "78;76;71;53", "wc_strength_and_weaknesses": "706;295;178;202", "wc_clarity_quality_novelty_and_reproducibility": "7;31;29;42", "wc_summary_review": "123;13;51;44", "wc_review": "914;415;329;341", "wc_reply_reviewers": "0;0;23;0", "wc_reply_authors": "289;504;30;31", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 1.0897247358851685 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 69.5, 9.86154146165801 ], "wc_strength_and_weaknesses_avg": [ 345.25, 212.8137389831775 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.25, 12.695963925594622 ], "wc_summary_review_avg": [ 57.75, 40.295005894031085 ], "wc_review_avg": [ 499.75, 241.42428937453664 ], "wc_reply_reviewers_avg": [ 5.75, 9.959292143521045 ], "wc_reply_authors_avg": [ 213.5, 198.15965785194524 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.34554737023254406, "corr_recommendation_correctness": 0.49374193110101877, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5411608112583092646&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "On the Feasibility of Cross-Task Transfer with Model-Based Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11590", "id": "KB1sc5pNKFv", "poster": "/media/PosterPDFs/ICLR%202023/11590.png?t=1681662621.4250262", "openreview": "https://openreview.net/forum?id=KB1sc5pNKFv", "slides": "https://iclr.cc/virtual/2023/poster/11590", "video": "https://iclr.cc/virtual/2023/poster/11590", "author_site": "Yifan Xu, Nicklas Hansen, Zirui Wang, Yung-Chieh Chan, Hao Su, Zhuowen Tu", "tldr": "We investigate the feasibility of pretraining and cross-task transfer in model-based RL, and improve sample-efficiency substantially over baselines on the Atari100k benchmark.", "abstract": "Reinforcement Learning (RL) algorithms can solve challenging control problems directly from image observations, but they often require millions of environment interactions to do so. Recently, model-based RL algorithms have greatly improved sample-efficiency by concurrently learning an internal model of the world, and supplementing real environment interactions with imagined rollouts for policy improvement. However, learning an effective model of the world from scratch is challenging, and in stark contrast to humans that rely heavily on world understanding and visual cues for learning new skills. In this work, we investigate whether internal models learned by modern model-based RL algorithms can be leveraged to solve new, distinctly different tasks faster. We propose Model-Based Cross-Task Transfer (XTRA), a framework for sample-efficient online RL with scalable pretraining and finetuning of learned world models. By offline multi-task pretraining and online cross-task finetuning, we achieve substantial improvements over a baseline trained from scratch; we improve mean performance of model-based algorithm EfficientZero by 23%, and by as much as 71% in some instances. Project page: https://nicklashansen.github.io/xtra", "keywords": "model-based reinforcement learning;visual reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Yifan Xu;Nicklas Hansen;Zirui Wang;Yung-Chieh Chan;Hao Su;Zhuowen Tu", "authorids": "~Yifan_Xu3;~Nicklas_Hansen1;~Zirui_Wang5;~Yung-Chieh_Chan1;~Hao_Su1;~Zhuowen_Tu1", "gender": ";Non-Binary;M;M;M;", "homepage": ";https://nicklashansen.github.io;https://zwcolin.github.io;https://github.com/JerryYC;http://ai.ucsd.edu/~haosu;", "dblp": ";258/0744.html;;;09/4945-1;", "google_scholar": ";OFtDgzwAAAAJ;https://scholar.google.com/citations?hl=en;;1P8Zu04AAAAJ;", "orcid": ";0000-0001-9897-4003;0009-0005-1329-5607;;;", "linkedin": ";ncklas;zwcolin/;;;", "or_profile": "~Yifan_Xu3;~Nicklas_Hansen1;~Zirui_Wang5;~Yung-Chieh_Chan1;~Hao_Su1;~Zhuowen_Tu1", "aff": ";University of California, San Diego;Princeton University;Stanford University;University of California, San Diego;", "aff_domain": ";ucsd.edu;princeton.edu;stanford.edu;ucsd.edu;", "position": ";PhD student;MS student;MS student;Assistant Professor;", "bibtex": "@inproceedings{\nxu2023on,\ntitle={On the Feasibility of Cross-Task Transfer with Model-Based Reinforcement Learning},\nauthor={Yifan Xu and Nicklas Hansen and Zirui Wang and Yung-Chieh Chan and Hao Su and Zhuowen Tu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=KB1sc5pNKFv}\n}", "github": "", "project": "", "reviewers": "k2wV;bd3b;MLsj;8xCu", "pdf_size": 5374247, "recommendation": "6;6;6;6", "confidence": "3;4;4;3", "correctness": "4;3;4;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;4;4;2", "wc_summary_paper": "100;150;214;78", "wc_strength_and_weaknesses": "332;561;423;142", "wc_clarity_quality_novelty_and_reproducibility": "125;88;129;9", "wc_summary_review": "105;64;44;32", "wc_review": "662;863;810;261", "wc_reply_reviewers": "47;0;0;23", "wc_reply_authors": "597;900;984;768", "reply_reviewers": "1;0;0;1", "reply_authors": "1;2;2;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 135.5, 52.29483722127836 ], "wc_strength_and_weaknesses_avg": [ 364.5, 152.14877587414236 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 87.75, 48.194268331410534 ], "wc_summary_review_avg": [ 61.25, 27.725214156071004 ], "wc_review_avg": [ 649.0, 235.81242545718408 ], "wc_reply_reviewers_avg": [ 17.5, 19.448650338776723 ], "wc_reply_authors_avg": [ 812.25, 146.19229630866326 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13825009109200629311&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=KB1sc5pNKFv", "email": ";ucsd.edu;princeton.edu;stanford.edu;ucsd.edu;", "author_num": 6, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of California, San Diego;Princeton University;Stanford University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucsd.edu;https://www.princeton.edu;https://www.stanford.edu", "aff_unique_abbr": "UCSD;Princeton;Stanford", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "San Diego;;Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "KDTaSChivXd", "title": "Learning from student's mistakes: Improving mean teacher for end-to-end semi-supervised video action detection", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In this work, we focus on semi-supervised learning for video action detection. We present Enhanced Mean Teacher, a simple end-to-end student-teacher based framework which rely on pseudo-labels to learn from unlabeled samples. Limited amount of data make the teacher prone to unreliable boundaries while detecting the spatio-temporal actions. We propose a novel auxiliary module, which learns from student\u2019s mistakes on labeled samples and improve the spatio-temporal pseudo-labels generated by the teacher on unlabeled set. The proposed framework utilize spatial and temporal augmentations to generate pseudo-labels where both classification as well as spatio-temporal consistencies are used to train the model. We evaluate our approach on two action detection benchmark datasets, UCF101-24, and JHMDB-21. On UCF101-24, our approach outperforms the supervised baseline by an approximate margin of 19% on f-mAP@0.5 and 25% on v-mAP@0.5. Using merely 10-15% of the annotations in UCF-101-24, the proposed approach provides a competitive performance compared to the supervised baseline trained on 100% annotations. We also evaluate the effectiveness of Enhanced Mean Teacher for video object segmentation demonstrating its generalization capability to other tasks in video domain.", "keywords": "semi-supervised;activity detection;student-teacher;video understanding", "primary_area": "", "supplementary_material": "", "author": "Akash Kumar;Yogesh S Rawat", "authorids": "~Akash_Kumar3;~Yogesh_S_Rawat1", "gender": "M;M", "homepage": ";https://www.crcv.ucf.edu/person/rawat/", "dblp": "239/0195;148/2258", "google_scholar": "https://scholar.google.co.in/citations?user=gsHhV5kAAAAJ;D_JvEcwAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Akash_Kumar3;~Yogesh_S_Rawat1", "aff": "University of Central Florida;University of Central Florida", "aff_domain": "ucf.edu;ucf.edu", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nkumar2023learning,\ntitle={Learning from student's mistakes: Improving mean teacher for end-to-end semi-supervised video action detection},\nauthor={Akash Kumar and Yogesh S Rawat},\nyear={2023},\nurl={https://openreview.net/forum?id=KDTaSChivXd}\n}", "github": "", "project": "", "reviewers": "26bW;wMms;dhvY;U1oD", "site": "https://openreview.net/forum?id=KDTaSChivXd", "pdf_size": 2394941, "recommendation": "3;3;5;6", "confidence": "3;5;4;3", "correctness": "3;2;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "100;134;93;86", "wc_strength_and_weaknesses": "279;440;267;185", "wc_clarity_quality_novelty_and_reproducibility": "21;88;47;73", "wc_summary_review": "38;25;25;30", "wc_review": "438;687;432;374", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 103.25, 18.430613120566555 ], "wc_strength_and_weaknesses_avg": [ 292.75, 92.39149040901981 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.25, 25.557533136044253 ], "wc_summary_review_avg": [ 29.5, 5.315072906367325 ], "wc_review_avg": [ 482.75, 120.54330134851956 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.4061811972299616, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yBzl5dm-dVwJ:scholar.google.com/&scioq=Learning+from+student%27s+mistakes:+Improving+mean+teacher+for+end-to-end+semi-supervised+video+action+detection&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Central Florida", "aff_unique_dep": "", "aff_unique_url": "https://www.ucf.edu", "aff_unique_abbr": "UCF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Faster Gradient-Free Methods for Escaping Saddle Points", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11628", "id": "KDhFkA6MQsW", "poster": "/media/PosterPDFs/ICLR%202023/11628.png?t=1680841972.8909416", "openreview": "https://openreview.net/forum?id=KDhFkA6MQsW", "slides": "https://iclr.cc/virtual/2023/poster/11628", "video": "https://iclr.cc/virtual/2023/poster/11628", "author_site": "Hualin Zhang, Bin Gu", "tldr": "", "abstract": "Escaping from saddle points has become an important research topic in non-convex optimization. In this paper, we study the case when calculations of explicit gradients are expensive or even infeasible, and only function values are accessible. \nCurrently, there have two types of gradient-free (zeroth-order) methods based on random perturbation and negative curvature finding proposed to escape saddle points efficiently and converge to an $\\epsilon$-approximate second-order stationary point. \nNesterov's accelerated gradient descent (AGD) method can escape saddle points faster than gradient descent (GD) which have been verified in first-order algorithms. However, whether AGD could accelerate the gradient-free methods is still unstudied. To unfold this mystery, in this paper, we propose two accelerated variants for the two types of gradient-free methods of escaping saddle points. We show that our algorithms can find an $\\epsilon$-approximate second-order stationary point with $\\tilde{\\mathcal{O}}(1/\\epsilon^{1.75})$ iteration complexity and $\\tilde{\\mathcal{O}}(d/\\epsilon^{1.75})$ oracle complexity, where $d$ is the problem dimension. Thus, our methods achieve a comparable convergence rate to their first-order counterparts and have fewer oracle complexity compared to prior derivative-free methods for finding second-order stationary points.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/6605c218da865de303c0bd714b53282fef6827fd.zip", "author": "Hualin Zhang;Bin Gu", "authorids": "~Hualin_Zhang1;~Bin_Gu1", "gender": "M;M", "homepage": "https://github.com/zhanghualin0;https://mbzuai.ac.ae/study/faculty/bin-gu/", "dblp": "303/7916;29/1758-1", "google_scholar": ";Vo8OgCgAAAAJ", "orcid": ";0000-0001-6049-1815", "linkedin": ";", "or_profile": "~Hualin_Zhang1;~Bin_Gu1", "aff": "NUIST;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "nuist.edu.cn;mbzuai.ac.ae", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023faster,\ntitle={Faster Gradient-Free Methods for Escaping Saddle Points},\nauthor={Hualin Zhang and Bin Gu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=KDhFkA6MQsW}\n}", "github": "", "project": "", "reviewers": "85d1;kuK4;sdTo;Y99G", "pdf_size": 773463, "recommendation": "6;8;8;8", "confidence": "4;5;4;3", "correctness": "4;4;3;3", "technical_novelty": "3;4;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "85;73;44;59", "wc_strength_and_weaknesses": "87;598;45;243", "wc_clarity_quality_novelty_and_reproducibility": "22;76;647;15", "wc_summary_review": "3;39;33;12", "wc_review": "197;786;769;329", "wc_reply_reviewers": "0;0;21;32", "wc_reply_authors": "303;446;608;410", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 65.25, 15.335824073065002 ], "wc_strength_and_weaknesses_avg": [ 243.25, 217.695171053471 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 190.0, 264.9028123671019 ], "wc_summary_review_avg": [ 21.75, 14.7542366796795 ], "wc_review_avg": [ 520.25, 261.5180443105217 ], "wc_reply_reviewers_avg": [ 13.25, 13.808964479641476 ], "wc_reply_authors_avg": [ 441.75, 109.44947464469621 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10483742106275719012&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=KDhFkA6MQsW", "email": "nuist.edu.cn;mbzuai.ac.ae", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Nanjing University of Information Science & Technology;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "http://www.nuist.edu.cn/;https://mbzuai.ac.ae", "aff_unique_abbr": "NUIST;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United Arab Emirates" }, { "title": "Modeling Multimodal Aleatoric Uncertainty in Segmentation with Mixture of Stochastic Experts", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11924", "id": "KE_wJD2RK4", "poster": "/media/PosterPDFs/ICLR%202023/11924.png?t=1682522648.3528109", "openreview": "https://openreview.net/forum?id=KE_wJD2RK4", "slides": "https://iclr.cc/virtual/2023/poster/11924", "video": "https://iclr.cc/virtual/2023/poster/11924", "author_site": "Zhitong Gao, Yucong Chen, Chuyu Zhang, Xuming He", "tldr": "We propose a novel mixture of stochastic experts (MoSE) model training with a Wasserstein-like loss, which produces an efficient two-level representation for the multi-modal aleatoric uncertainty in semantic segmentation.", "abstract": "Equipping predicted segmentation with calibrated uncertainty is essential for safety-critical applications. In this work, we focus on capturing the data-inherent uncertainty (aka aleatoric uncertainty) in segmentation, typically when ambiguities exist in input images. Due to the high-dimensional output space and potential multiple modes in segmenting ambiguous images, it remains challenging to predict well-calibrated uncertainty for segmentation. To tackle this problem, we propose a novel mixture of stochastic experts (MoSE) model, where each expert network estimates a distinct mode of the aleatoric uncertainty and a gating network predicts the probabilities of an input image being segmented in those modes. This yields an efficient two-level uncertainty representation. To learn the model, we develop a Wasserstein-like loss that directly minimizes the distribution distance between the MoSE and ground truth annotations. The loss can easily integrate traditional segmentation quality measures and be efficiently optimized via constraint relaxation. We validate our method on the LIDC-IDRI dataset and a modified multimodal Cityscapes dataset. Results demonstrate that our method achieves the state-of-the-art or competitive performance on all metrics.", "keywords": "Semantic Segmentation;Aleatoric Uncertainty;Stochastic Segmentation;Multiple Annotations", "primary_area": "", "supplementary_material": "/attachment/3dbbc7d31bc0910fa95eb36a42a920f5106d87e7.zip", "author": "Zhitong Gao;Yucong Chen;Chuyu Zhang;Xuming He", "authorids": "~Zhitong_Gao1;~Yucong_Chen1;~Chuyu_Zhang1;~Xuming_He3", "gender": "F;M;M;M", "homepage": "https://gaozhitong.github.io/;https://ethan-chin.github.io;;https://faculty.sist.shanghaitech.edu.cn/faculty/hexm/index.html", "dblp": ";;270/8658;03/4230", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;YIG2lwoAAAAJ;V7IktkcAAAAJ;0KyeZ2QAAAAJ", "orcid": "0000-0002-3707-4850;;;", "linkedin": ";;;", "or_profile": "~Zhitong_Gao1;~Yucong_Chen1;~Chuyu_Zhang1;~Xuming_He3", "aff": "ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "position": "MS student;Undergrad student;PhD student;Associate Professor", "bibtex": "@inproceedings{\ngao2023modeling,\ntitle={Modeling Multimodal Aleatoric Uncertainty in Segmentation with Mixture of Stochastic Experts},\nauthor={Zhitong Gao and Yucong Chen and Chuyu Zhang and Xuming He},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=KE_wJD2RK4}\n}", "github": "", "project": "", "reviewers": "bd5h;LSxs;VqFH", "pdf_size": 2805788, "recommendation": "6;6;6", "confidence": "4;3;4", "correctness": "3;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "101;56;146", "wc_strength_and_weaknesses": "737;160;546", "wc_clarity_quality_novelty_and_reproducibility": "56;17;106", "wc_summary_review": "69;24;52", "wc_review": "963;257;850", "wc_reply_reviewers": "88;39;65", "wc_reply_authors": "2371;815;3507", "reply_reviewers": "1;1;1", "reply_authors": "6;4;8", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 101.0, 36.742346141747674 ], "wc_strength_and_weaknesses_avg": [ 481.0, 240.00138888487012 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.666666666666664, 36.42648609032841 ], "wc_summary_review_avg": [ 48.333333333333336, 18.553226733434325 ], "wc_review_avg": [ 690.0, 309.633116230591 ], "wc_reply_reviewers_avg": [ 64.0, 20.016659728003237 ], "wc_reply_authors_avg": [ 2231.0, 1103.4539712496696 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 6.0, 1.632993161855452 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15942258204460019601&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=KE_wJD2RK4", "email": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "ShanghaiTech University", "aff_unique_dep": "", "aff_unique_url": "https://www.shanghaitech.edu.cn", "aff_unique_abbr": "ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Not All Tasks Are Born Equal: Understanding Zero-Shot Generalization", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11281", "id": "KGV-GBh8fb", "poster": "/media/PosterPDFs/ICLR%202023/11281.png?t=1681753429.3888807", "openreview": "https://openreview.net/forum?id=KGV-GBh8fb", "slides": "https://iclr.cc/virtual/2023/poster/11281", "video": "https://iclr.cc/virtual/2023/poster/11281", "author_site": "Jing Zhou, Zongyu Lin, Yanan Zheng, Jian Li, Zhilin Yang", "tldr": "", "abstract": "Recent work has achieved remarkable zero-shot performance with multi-task prompted pretraining, but little has been understood. For the first time, we show that training on a small number of key tasks beats using all the training tasks, while removing these key tasks substantially hurts performance. We also find that these key tasks are mostly question answering (QA) tasks. These novel findings combined deepen our understanding about zero-shot generalization\u2014training on certain tasks such as QA encodes general knowledge transferable to a wide range of tasks. In addition, to automate this procedure, we devise a method that (1) identifies key training tasks without observing the test tasks by examining the pairwise generalization results and (2) resamples training tasks for better data distribution. Empirically, our approach achieves improved results across various model scales and tasks.", "keywords": "Zero-Shot Learning;Multi-Task Learning;Transfer Learning", "primary_area": "", "supplementary_material": "", "author": "Jing Zhou;Zongyu Lin;Yanan Zheng;Jian Li;Zhilin Yang", "authorids": "~Jing_Zhou4;~Zongyu_Lin1;~Yanan_Zheng1;~Jian_Li2;~Zhilin_Yang2", "gender": "F;M;F;M;M", "homepage": "https://github.com/sesshousama/zhoujing.github.io/blob/gh-pages/index.md;;https://zheng-yanan.githun.io;http://iiis.tsinghua.edu.cn/~jianli;http://kimiyoung.github.io/", "dblp": ";273/7646;93/7107;33/5448-15;54/6349", "google_scholar": ";4ahRAd4AAAAJ;0DqJ8eIAAAAJ;zX7i1EkAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Jing_Zhou4;~Zongyu_Lin1;~Yanan_Zheng1;~Jian_Li2;~Zhilin_Yang1", "aff": "Tsinghua University;Moonshot AI;Moonshot AI;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;msh.team;moonshot.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Researcher;Researcher;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhou2023not,\ntitle={Not All Tasks Are Born Equal: Understanding Zero-Shot Generalization},\nauthor={Jing Zhou and Zongyu Lin and Yanan Zheng and Jian Li and Zhilin Yang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=KGV-GBh8fb}\n}", "github": "", "project": "", "reviewers": "nkcG;wgeL;ioK2;DTcG", "pdf_size": 746645, "recommendation": "5;6;8;8", "confidence": "3;4;4;4", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;4", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "97;48;113;62", "wc_strength_and_weaknesses": "339;354;519;276", "wc_clarity_quality_novelty_and_reproducibility": "35;8;527;20", "wc_summary_review": "21;19;70;34", "wc_review": "492;429;1229;392", "wc_reply_reviewers": "0;0;259;0", "wc_reply_authors": "378;464;683;235", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 80.0, 26.105554964413226 ], "wc_strength_and_weaknesses_avg": [ 372.0, 89.77471804467001 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 147.5, 219.31313230173882 ], "wc_summary_review_avg": [ 36.0, 20.457272545478784 ], "wc_review_avg": [ 635.5, 344.51741610548515 ], "wc_reply_reviewers_avg": [ 64.75, 112.1502897900848 ], "wc_reply_authors_avg": [ 440.0, 162.39919950541628 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.7777777777777777, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8401332418913673262&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=KGV-GBh8fb", "email": "tsinghua.edu.cn;msh.team;moonshot.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Tsinghua University;Moonshot AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://moonshot.ai", "aff_unique_abbr": "THU;Moonshot AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "China;United States" }, { "id": "KICUSNslb7Q", "title": "Union Subgraph Neural Networks", "track": "main", "status": "Withdraw", "tldr": "We propose a Union Subgraph Network that introduces local structural information by a shortest-path-based descriptor.", "abstract": "Graph Neural Networks (GNNs) are widely used for graph representation learning in many application domains. The expressiveness of GNNs is upper-bounded by 1-dimensional Weisfeiler-Lehman (1-WL) test as they operate on rooted subtrees in message passing. In this paper, we empower GNNs by injecting neighbor-connectivity information extracted from a new type of substructures. We first investigate different kinds of connectivities existing in a local neighborhood and identify a substructure called union subgraph, which is able to capture the complete picture of the neighborhood. We then design a shortest-path-based substructure descriptor that possesses three nice properties and can effectively encode the high-order connectivities in union subgraphs. By infusing the encoded neighbor connectivities, we propose a novel model, namely Union Subgraph Neural Network (UnionSNN), which is proven to be strictly more powerful than 1-WL in distinguishing non-isomorphic graphs. Our extensive experiments on both graph-level and node-level classification tasks demonstrate that UnionSNN outperforms state-of-the-art baseline models, with competitive computational efficiency.\n", "keywords": "Graph Neural Network;Representation Learning", "primary_area": "", "supplementary_material": "/attachment/3dd18345807274333d09c86e333ea1aeffe57bee.zip", "author": "Jiaxing Xu;Aihu Zhang;Qingtian Bian;Yiping Ke", "authorids": "~Jiaxing_Xu2;zhan0547@e.ntu.edu.sg;bian0027@e.ntu.edu.sg;~Yiping_Ke1", "gender": ";;;F", "homepage": ";;;https://keyiping.wixsite.com/index", "dblp": ";;;07/3111", "google_scholar": ";;;https://scholar.google.com.tw/citations?user=30Fp0YYAAAAJ", "orcid": ";;;0000-0001-9473-3202", "linkedin": ";;;", "or_profile": "~Jiaxing_Xu2;zhan0547@e.ntu.edu.sg;bian0027@e.ntu.edu.sg;~Yiping_Ke1", "aff": ";;;Nanyang Technological University", "aff_domain": ";;;ntu.edu.sg", "position": ";;;Associate Professor", "bibtex": "@misc{\nxu2023union,\ntitle={Union Subgraph Neural Networks},\nauthor={Jiaxing Xu and Aihu Zhang and Qingtian Bian and Yiping Ke},\nyear={2023},\nurl={https://openreview.net/forum?id=KICUSNslb7Q}\n}", "github": "", "project": "", "reviewers": "5gGi;TUPm;6BpF;S6Hp", "site": "https://openreview.net/forum?id=KICUSNslb7Q", "pdf_size": 606765, "recommendation": "3;5;5;6", "confidence": "4;3;3;4", "correctness": "3;3;1;4", "technical_novelty": "1;3;2;2", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "89;27;23;66", "wc_strength_and_weaknesses": "185;251;225;561", "wc_clarity_quality_novelty_and_reproducibility": "84;7;58;52", "wc_summary_review": "40;32;35;58", "wc_review": "398;317;341;737", "wc_reply_reviewers": "0;0;0;51", "wc_reply_authors": "472;1142;810;753", "reply_reviewers": "0;0;0;1", "reply_authors": "1;2;2;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 1.0897247358851685 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 51.25, 27.517040175135115 ], "wc_strength_and_weaknesses_avg": [ 305.5, 149.37452928796128 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.25, 27.716195626384224 ], "wc_summary_review_avg": [ 41.25, 10.084022015049353 ], "wc_review_avg": [ 448.25, 169.28581600358606 ], "wc_reply_reviewers_avg": [ 12.75, 22.083647796503186 ], "wc_reply_authors_avg": [ 794.25, 238.07811218169553 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.2294157338705618, "corr_recommendation_correctness": 0.15789473684210528, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18035380518613897339&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_country_unique_index": "0", "aff_country_unique": "Singapore" }, { "id": "KJ8iuccbPB", "title": "Representing Latent Dimensions Using Compressed Number Lines", "track": "main", "status": "Reject", "tldr": "", "abstract": "Humans use log-compressed number lines to represent different quantities, including elapsed time, traveled distance, numerosity, sound frequency, etc. Inspired by recent cognitive science and computational neuroscience work, we developed a neural network that learns to construct log-compressed number lines. The network computes a discrete approximation of a real-domain Laplace transform using an RNN with analytically derived weights giving rise to a log-compressed timeline of the past. The network learns to extract latent variables from the input and uses them for global modulation of the recurrent weights turning a timeline into a number line over relevant dimensions. The number line representation greatly simplifies learning on a set of problems that require learning associations in different spaces - problems that humans can typically solve easily. This approach illustrates how combining deep learning with cognitive models can result in systems that learn to represent latent variables in a brain-like manner and exhibit human-like behavior manifested through Weber-Fechner law.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/f52a6bd854663abcccab1843aea5e31baa06f3c6.zip", "author": "Sahaj Singh Maini;James Mochizuki-Freeman;Chirag Shankar Indi;Brandon G Jacques;Per B Sederberg;Marc Howard;Zoran Tiganj", "authorids": "~Sahaj_Singh_Maini1;~James_Mochizuki-Freeman1;~Chirag_Shankar_Indi1;~Brandon_G_Jacques1;~Per_B_Sederberg1;~Marc_Howard2;~Zoran_Tiganj1", "gender": "M;M;M;M;M;;M", "homepage": ";https://homes.luddy.indiana.edu/jmochizu/;;https://psychology.as.virginia.edu/people/profile/bgj5hk;http://sites.bu.edu/tcn/;https://homes.luddy.indiana.edu/ztiganj/;https://compmem.org", "dblp": ";;;;57/3087;61/2669;", "google_scholar": "https://scholar.google.co.in/citations?user=LDQQEX8AAAAJ;UzFeRLYAAAAJ;;;qu3oPf0AAAAJ;BrWVCo8AAAAJ;V87qgTwAAAAJ", "orcid": ";0009-0005-6721-2344;;;;0000-0001-5581-9636;", "linkedin": ";;chirag-indi/;;;;", "or_profile": "~Sahaj_Singh_Maini1;~James_Mochizuki-Freeman1;~Chirag_Shankar_Indi1;~Brandon_G_Jacques1;~Marc_Howard2;~Zoran_Tiganj1;~Per_Sederberg1", "aff": "Indiana University, Bloomington;Indiana University;;;Boston University;Indiana University, Bloomington;University of Virginia", "aff_domain": "iu.edu;indiana.edu;;;bu.edu;iu.edu;virginia.edu", "position": "PhD student;PhD student;;;Full Professor;Assistant Professor;Associate Professor", "bibtex": "@misc{\nmaini2023representing,\ntitle={Representing Latent Dimensions Using Compressed Number Lines},\nauthor={Sahaj Singh Maini and James Mochizuki-Freeman and Chirag Shankar Indi and Brandon G Jacques and Per B Sederberg and Marc Howard and Zoran Tiganj},\nyear={2023},\nurl={https://openreview.net/forum?id=KJ8iuccbPB}\n}", "github": "", "project": "", "reviewers": "GVDk;8ytJ;p2xG;ZzkM", "site": "https://openreview.net/forum?id=KJ8iuccbPB", "pdf_size": 5689253, "recommendation": "1;3;5;6", "confidence": "4;4;3;2", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;2;4;0", "wc_summary_paper": "66;54;183;44", "wc_strength_and_weaknesses": "133;555;469;144", "wc_clarity_quality_novelty_and_reproducibility": "341;67;63;58", "wc_summary_review": "42;28;94;55", "wc_review": "582;704;809;301", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.75, 1.920286436967152 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 86.75, 56.11316690403421 ], "wc_strength_and_weaknesses_avg": [ 325.25, 189.24900924443435 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 132.25, 120.56403900002687 ], "wc_summary_review_avg": [ 54.75, 24.5903944661325 ], "wc_review_avg": [ 599.0, 189.8802254053855 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.9028289727756884, "corr_recommendation_correctness": 0.676481425202546, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3371700607576244117&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Indiana University;Boston University;University of Virginia", "aff_unique_dep": ";;", "aff_unique_url": "https://www.indiana.edu;https://www.bu.edu;https://www.virginia.edu", "aff_unique_abbr": "IU;BU;UVA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Bloomington;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Alternating Differentiation for Optimization Layers", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12178", "id": "KKBMz-EL4tD", "poster": "", "openreview": "https://openreview.net/forum?id=KKBMz-EL4tD", "slides": "https://iclr.cc/virtual/2023/poster/12178", "video": "https://iclr.cc/virtual/2023/poster/12178", "author_site": "Haixiang Sun, Ye Shi, Jingya Wang, Hoang Tuan, H. Vincent Poor, Dacheng Tao", "tldr": "We propose a new implicit differentiation framework (Alt-Diff) that decouples optimization layers in an alternating way to increase the computational speed. We also prove the convergence of Alt-Diff and show the upper bound of truncated error.", "abstract": "The idea of embedding optimization problems into deep neural networks as optimization layers to encode constraints and inductive priors has taken hold in recent years. Most existing methods focus on implicitly differentiating Karush\u2013Kuhn\u2013Tucker (KKT) conditions in a way that requires expensive computations on the Jacobian matrix, which can be slow and memory-intensive. In this paper, we developed a new framework, named Alternating Differentiation (Alt-Diff), that differentiates optimization problems (here, specifically in the form of convex optimization problems with polyhedral constraints) in a fast and recursive way. Alt-Diff decouples the differentiation procedure into a primal update and a dual update in an alternating way. Accordingly, Alt-Diff substantially decreases the dimensions of the Jacobian matrix especially for optimization with large-scale constraints and thus increases the computational speed of implicit differentiation. We show that the gradients obtained by Alt-Diff are consistent with those obtained by differentiating KKT conditions. In addition, we propose to truncate Alt-Diff to further accelerate the computational speed. Under some standard assumptions, we show that the truncation error of gradients is upper bounded by the same order of variables' estimation error. Therefore, Alt-Diff can be truncated to further increase computational speed without sacrificing much accuracy. A series of comprehensive experiments validate the superiority of Alt-Diff. ", "keywords": "Alternating differentiation;optimization layers;unrolling;implicit models", "primary_area": "", "supplementary_material": "", "author": "Haixiang Sun;Ye Shi;Jingya Wang;Hoang Duong Tuan;H. Vincent Poor;Dacheng Tao", "authorids": "~Haixiang_Sun1;~Ye_Shi1;~Jingya_Wang3;~Hoang_Duong_Tuan1;~H._Vincent_Poor1;~Dacheng_Tao1", "gender": "M;M;F;M;M;", "homepage": ";http://faculty.sist.shanghaitech.edu.cn/faculty/shiye;https://faculty.sist.shanghaitech.edu.cn/faculty/wangjingya/;https://profiles.uts.edu.au/Tuan.Hoang;http://ee.princeton.edu/people/faculty/h-vincent-poor;", "dblp": "185/9693;34/11191-1;;;p/HVincentPoor;", "google_scholar": ";gMqbZPUAAAAJ;https://scholar.google.com.au/citations?user=vmvJV_IAAAAJ;;Dq93mOUAAAAJ;", "orcid": ";;;;;", "linkedin": "haixiang-sun-582451323/;;;;vince-poor-974a3/;", "or_profile": "~Haixiang_Sun1;~Ye_Shi1;~Jingya_Wang3;~Hoang_Duong_Tuan1;~H._Vincent_Poor1;~Dacheng_Tao1", "aff": "ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;University of Technology Sydney;Princeton University;", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;uts.edu.au;princeton.edu;", "position": "MS student;Assistant Professor;Assistant Professor;Full Professor;Full Professor;", "bibtex": "@inproceedings{\nsun2023alternating,\ntitle={Alternating Differentiation for Optimization Layers},\nauthor={Haixiang Sun and Ye Shi and Jingya Wang and Hoang Duong Tuan and H. Vincent Poor and Dacheng Tao},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=KKBMz-EL4tD}\n}", "github": "", "project": "", "reviewers": "apdv;kA5n;mj5g", "pdf_size": 752009, "recommendation": "6;6;8", "confidence": "4;4;4", "correctness": "2;2;4", "technical_novelty": "3;4;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "238;151;121", "wc_strength_and_weaknesses": "568;476;190", "wc_clarity_quality_novelty_and_reproducibility": "166;67;49", "wc_summary_review": "82;37;60", "wc_review": "1054;731;420", "wc_reply_reviewers": "47;522;0", "wc_reply_authors": "1281;1327;265", "reply_reviewers": "1;2;0", "reply_authors": "2;3;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 170.0, 49.61854492022111 ], "wc_strength_and_weaknesses_avg": [ 411.3333333333333, 160.94995771633148 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 94.0, 51.43928459844674 ], "wc_summary_review_avg": [ 59.666666666666664, 18.372685039360892 ], "wc_review_avg": [ 735.0, 258.8448698867078 ], "wc_reply_reviewers_avg": [ 189.66666666666666, 235.77720180053223 ], "wc_reply_authors_avg": [ 957.6666666666666, 490.14918363924903 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6148254735506307957&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=KKBMz-EL4tD", "email": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;uts.edu.au;princeton.edu;", "author_num": 6, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "ShanghaiTech University;University of Technology Sydney;Princeton University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.shanghaitech.edu.cn;https://www.uts.edu.au;https://www.princeton.edu", "aff_unique_abbr": "ShanghaiTech;UTS;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;2", "aff_country_unique": "China;Australia;United States" }, { "id": "KL6i1IdwQ6z", "title": "xTrimoDock: Cross-Modal Transformer for Multi-Chain Protein Docking", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The structure of a protein\u2013protein complex plays a critical role in understanding the dynamics of binding, delineating biological mechanisms, and developing intervention strategies. Rigid protein-protein docking, assuming no conformational change within proteins, predicts the 3D structure of protein complexes from unbound chains. According to the number of chains, rigid docking is divided into binary complex setting that contains only two chains, and more ubiquitous multi-chain complex setting. Most existing docking methods are tailored for binary complexes, and are computationally expensive or not guaranteed to find accurate complex structures. In this paper, we propose a novel model xTrimoDock for the docking of multi-chain complexes, which can simultaneously employ information from both sequence modality and structure modality of involved protein chains. Specifically, xTrimoDock leverages a cross-modal transformer to integrate representations from protein sequences and structures, and conducts a multi-step prediction of rotations and translations to accomplish the multi-chain docking. Extensive experiments reflect the promising results of the proposed model in the harder multi-chain complex setting.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Ruijia Wang;Shaochuan Li;Xiao Wang;Zhiyuan Lu;Bing Yang;Hui Li;Chuan Shi;Le Song", "authorids": "~Ruijia_Wang2;~Shaochuan_Li1;~Xiao_Wang2;~Zhiyuan_Lu1;~Bing_Yang3;~Hui_Li2;~Chuan_Shi1;~Le_Song1", "gender": "F;;M;;M;;M;M", "homepage": ";;https://wangxiaocs.github.io/;;;;http://www.shichuan.org/;http://www.cc.gatech.edu/~lsong", "dblp": ";;49/67-17;;;;64/3041-1;94/3481", "google_scholar": "https://scholar.google.ca/citations?user=DpsuBrsAAAAJ;;MnzarAQAAAAJ;YHjKBWQAAAAJ;;;tUq_v90AAAAJ;Xl4E0CsAAAAJ", "orcid": ";;0000-0002-4444-7811;;0000-0003-1983-3988;;0000-0002-3734-0266;", "linkedin": ";;;;;;;", "or_profile": "~Ruijia_Wang2;~Shaochuan_Li1;~Xiao_Wang2;~Zhiyuan_Lu1;~Bing_Yang3;~Hui_Li2;~Chuan_Shi1;~Le_Song1", "aff": "Beijing University of Posts and Telecommunications;;Beihang University;None;;;Beijing University of Post and Telecommunication;College of Computing, Georgia Institute of Technology", "aff_domain": "bupt.edu.cn;;buaa.edu.cn;none.com;;;bupt.edu.cn;cc.gatech.edu", "position": "PhD student;;Associate Professor;None;;;Full Professor;Associate Professor", "bibtex": "@misc{\nwang2023xtrimodock,\ntitle={xTrimoDock: Cross-Modal Transformer for Multi-Chain Protein Docking},\nauthor={Ruijia Wang and Shaochuan Li and Xiao Wang and Zhiyuan Lu and Bing Yang and Hui Li and Chuan Shi and Le Song},\nyear={2023},\nurl={https://openreview.net/forum?id=KL6i1IdwQ6z}\n}", "github": "", "project": "", "reviewers": "s6ax;o9EB;Xwqf", "site": "https://openreview.net/forum?id=KL6i1IdwQ6z", "pdf_size": 2867654, "recommendation": "5;5;8", "confidence": "5;4;3", "correctness": "2;3;1", "technical_novelty": "2;3;1", "empirical_novelty": "2;2;1", "wc_summary_paper": "232;57;75", "wc_strength_and_weaknesses": "236;126;127", "wc_clarity_quality_novelty_and_reproducibility": "51;45;34", "wc_summary_review": "70;44;33", "wc_review": "589;272;269", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 2.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 121.33333333333333, 78.59742715608161 ], "wc_strength_and_weaknesses_avg": [ 163.0, 51.62040940041707 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.333333333333336, 7.039570693980959 ], "wc_summary_review_avg": [ 49.0, 15.513435037626794 ], "wc_review_avg": [ 376.6666666666667, 150.1473350486855 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": -0.8660254037844387, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3O5HVzFaCEsJ:scholar.google.com/&scioq=xTrimoDock:+Cross-Modal+Transformer+for+Multi-Chain+Protein+Docking&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Beihang University;Google;Georgia Institute of Technology", "aff_unique_dep": ";;Google AI;College of Computing", "aff_unique_url": "http://www.bupt.edu.cn/;http://www.buaa.edu.cn/;https://ai.google;https://www.gatech.edu", "aff_unique_abbr": "BUPT;BUAA;Google AI;Georgia Tech", "aff_campus_unique_index": "0;2;0;3", "aff_campus_unique": "Beijing;;Mountain View;Atlanta", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "China;United States" }, { "title": "Video Scene Graph Generation from Single-Frame Weak Supervision", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11638", "id": "KLrGlNoxzb4", "poster": "/media/PosterPDFs/ICLR%202023/11638.png?t=1682675022.9867225", "openreview": "https://openreview.net/forum?id=KLrGlNoxzb4", "slides": "https://iclr.cc/virtual/2023/poster/11638", "video": "https://iclr.cc/virtual/2023/poster/11638", "author_site": "Siqi Chen, Jun Xiao, Long Chen", "tldr": "We propose a novel method for weakly-supervised VidSGG task with only single-frame weak supervision.", "abstract": "Video scene graph generation (VidSGG) aims to generate a sequence of graph-structure representations for the given video. However, all existing VidSGG methods are fully-supervised, i.e., they need dense and costly manual annotations. In this paper, we propose the first weakly-supervised VidSGG task with only single-frame weak supervision: SF-VidSGG. By ``weakly-supervised\", we mean that SF-VidSGG relaxes the training supervision from two different levels: 1) It only provides single-frame annotations instead of all-frame annotations. 2) The single-frame ground-truth annotation is still a weak image SGG annotation, i.e., an unlocalized scene graph. To solve this new task, we also propose a novel Pseudo Label Assignment based method, dubbed as PLA. PLA is a two-stage method, which generates pseudo visual relation annotations for the given video at the first stage, and then trains a fully-supervised VidSGG model with these pseudo labels. Specifically, PLA consists of three modules: an object PLA module, a predicate PLA module, and a future predicate prediction (FPP) module. Firstly, in the object PLA, we localize all objects for every frame. Then, in the predicate PLA, we design two different teachers to assign pseudo predicate labels. Lastly, in the FPP module, we fusion these two predicate pseudo labels by the regularity of relation transition in videos. Extensive ablations and results on the benchmark Action Genome have demonstrated the effectiveness of our PLA.", "keywords": "computer vision;video scene graph generation;weakly-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Siqi Chen;Jun Xiao;Long Chen", "authorids": "~Siqi_Chen3;~Jun_Xiao1;~Long_Chen8", "gender": "M;M;M", "homepage": ";;https://zjuchenlong.github.io/", "dblp": ";71/2308-1;64/5725-16", "google_scholar": ";fqOwFhQAAAAJ;https://scholar.google.com.sg/citations?user=-gtmMpIAAAAJ", "orcid": ";;0000-0001-6148-9709", "linkedin": "%E9%99%88-%E6%80%9D%E5%90%AF-7b8b91192/;;", "or_profile": "~Siqi_Chen3;~Jun_Xiao1;~Long_Chen8", "aff": "Zhejiang University;Zhejiang University;Columbia University", "aff_domain": "zju.edu.cn;zju.edu.cn;columbia.edu", "position": "MS student;Full Professor;Postdoc", "bibtex": "@inproceedings{\nchen2023video,\ntitle={Video Scene Graph Generation from Single-Frame Weak Supervision},\nauthor={Siqi Chen and Jun Xiao and Long Chen},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=KLrGlNoxzb4}\n}", "github": "", "project": "", "reviewers": "hgye;XWwS;spBp;Ca8D", "pdf_size": 1594320, "recommendation": "6;6;6;8", "confidence": "3;3;4;4", "correctness": "2;3;3;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "43;58;95;86", "wc_strength_and_weaknesses": "304;173;378;110", "wc_clarity_quality_novelty_and_reproducibility": "43;39;46;30", "wc_summary_review": "30;20;65;29", "wc_review": "420;290;584;255", "wc_reply_reviewers": "139;256;163;66", "wc_reply_authors": "1964;2537;1421;425", "reply_reviewers": "2;3;1;1", "reply_authors": "5;6;4;2", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 70.5, 20.93442141545832 ], "wc_strength_and_weaknesses_avg": [ 241.25, 105.50207343934052 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.5, 6.020797289396148 ], "wc_summary_review_avg": [ 36.0, 17.190113437671084 ], "wc_review_avg": [ 387.25, 129.16147839042412 ], "wc_reply_reviewers_avg": [ 156.0, 67.89329863837814 ], "wc_reply_authors_avg": [ 1586.75, 778.2076763306823 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 4.25, 1.479019945774904 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18348772269616651386&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=KLrGlNoxzb4", "email": "zju.edu.cn;zju.edu.cn;columbia.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Zhejiang University;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.columbia.edu", "aff_unique_abbr": "ZJU;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "id": "KNL8KSH7b_F", "title": "UPop: Unified and Progressive Pruning for Compressing Vision-Language Transformers", "track": "main", "status": "Withdraw", "tldr": "For the first time, we propose a multimodal compression approach UPop for vision-language Transformers from the perspective of pruning.", "abstract": "Data from the real world contains a vast amount of multimodal information, among which vision and language are the two most representative modalities. On the other hand, researchers have spent much effort on model compression to reduce the huge memory and computational consumption of increasingly large models. However, how to compress multimodal models, especially vison-language Transformers, is still under-explored. This paper proposes the Unified and Progressive Pruning (UPop) that compresses vison-language Transformers via pruning. UPop incorporates 1) unifiedly searching countless multimodal subnetworks in a continuous optimization space from the uncompressed model; 2) progressively and simultaneously retraining the subnetwork. The subnetworks are learned in multiple components, including the self-attention modules, MLPs in both vision and language branches, and cross-attention modules. To ease the progress of pruning, we design \\textit{Unified Pruning} to automatically assign the optimal pruning ratio to each compressiable component, instead of manually assigning each component a pruning ratio. To explore the limitation of compression ratio, we propose \\textit{Progressive Pruning} to maintain convergence between search and retrain. In addition, UPop enables zero-cost subnetwork selection after searching countless multimodal subnetworks, and the searched subnetwork can be used without any retraining. Experiments on multiple discriminative and generative vision-lanuage tasks demonstrate the versatility of the proposed UPop. For example, we achieve \\textbf{2$\\times $} compression and \\textbf{1.66$\\times$} FLOPs reduction on COCO dataset of Image Caption with \\textbf{0.8} SPICE drop, \\textbf{4$\\times $} compression and \\textbf{2.96$\\times$} FLOPs reduction with \\textbf{2.1} SPICE drop.", "keywords": "Multimodal Model;Model Compression;Vision-Language Transformers", "primary_area": "", "supplementary_material": "", "author": "Dachuan Shi;Chaofan Tao;Ying Jin;Zhendong Yang;Chun Yuan;Jiaqi Wang", "authorids": "~Dachuan_Shi2;~Chaofan_Tao1;~Ying_Jin1;~Zhendong_Yang2;~Chun_Yuan1;~Jiaqi_Wang1", "gender": "M;M;F;M;M;M", "homepage": "https://www.dachuanshi.com;;https://jin-ying.github.io/;;https://www.sigs.tsinghua.edu.cn/fg3/105064.jhtml;https://myownskyw7.github.io/", "dblp": "283/0549;239/5831;46/176/;14/1820;;44/740-3", "google_scholar": "https://scholar.google.com/citations?hl=en;gjmfLroAAAAJ;RSqGfysAAAAJ;M9qKrogAAAAJ;https://scholar.google.com.hk/citations?user=fYdxi2sAAAAJ;https://scholar.google.com.hk/citations?user=GDvt570AAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Dachuan_Shi2;~Chaofan_Tao1;~Ying_Jin1;~Zhendong_Yang2;~Chun_Yuan1;~Jiaqi_Wang1", "aff": "Tsinghua University;The University of Hong Kong;The Chinese University of Hong Kong; Tsinghua University;Tsinghua University;Shanghai AI Laboratory", "aff_domain": "tsinghua.edu.cn;hku.hk;ie.cuhk.edu;mails.tsinghua.edu.cn;tsinghua.edu.cn;pjlab.org.cn", "position": "MS student;PhD Student;PhD student;MS student;Full Professor;Research Scientist", "bibtex": "@misc{\nshi2023upop,\ntitle={{UP}op: Unified and Progressive Pruning for Compressing Vision-Language Transformers},\nauthor={Dachuan Shi and Chaofan Tao and Ying Jin and Zhendong Yang and Chun Yuan and Jiaqi Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=KNL8KSH7b_F}\n}", "github": "", "project": "", "reviewers": "2dys;vBgg;3JnZ", "site": "https://openreview.net/forum?id=KNL8KSH7b_F", "pdf_size": 1297081, "recommendation": "6;6;6", "confidence": "3;2;4", "correctness": "3;3;3", "technical_novelty": "3;2;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "65;63;141", "wc_strength_and_weaknesses": "188;68;72", "wc_clarity_quality_novelty_and_reproducibility": "8;10;22", "wc_summary_review": "13;56;24", "wc_review": "274;197;259", "wc_reply_reviewers": "0;0;100", "wc_reply_authors": "713;764;538", "reply_reviewers": "0;0;1", "reply_authors": "1;1;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 89.66666666666667, 36.307330144506935 ], "wc_strength_and_weaknesses_avg": [ 109.33333333333333, 55.649698012557884 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 13.333333333333334, 6.182412330330469 ], "wc_summary_review_avg": [ 31.0, 18.239152027072603 ], "wc_review_avg": [ 243.33333333333334, 33.32999983331666 ], "wc_reply_reviewers_avg": [ 33.333333333333336, 47.14045207910317 ], "wc_reply_authors_avg": [ 671.6666666666666, 96.7826889938944 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7066950519917910506&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;0;0;3", "aff_unique_norm": "Tsinghua University;University of Hong Kong;Chinese University of Hong Kong;Shanghai AI Laboratory", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.hku.hk;https://www.cuhk.edu.hk;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "THU;HKU;CUHK;SAIL", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "KNSRDB-clPX", "title": "Improving Protein Interaction Prediction using Pretrained Structure Embedding", "track": "main", "status": "Reject", "tldr": "", "abstract": "The prediction of protein-protein interactions (PPIs) is a critical problem because the knowledge of PPIs unravels the cellular behavior and its functionality. So far most previous works on PPI predictions mainly focused on sequence and network information and ignored the structural information of protein physical binding. We design a novel method, called xxx, which can leverage pretrained structure embedding and can be transferred to new ppi predictions. Experimental results on PPi predictions show that our pretrained structure embedding leads to significant improvement in PPI prediction comparing to sequence and network based methods. Furthermore, we show that embeddings pretrained based on ppi from different species can be transferred to improve the prediction for human proteins. ", "keywords": "pretrianing;protein;PPI", "primary_area": "", "supplementary_material": "", "author": "Chunchen Wang;YiWu Sun;Bing Yang;Shaochuan Li;Cheng Yang;Hui Li;Chuan Shi;Le Song", "authorids": "~Chunchen_Wang1;~YiWu_Sun1;~Bing_Yang3;~Shaochuan_Li1;~Cheng_Yang6;~Hui_Li2;~Chuan_Shi1;~Le_Song1", "gender": "M;M;M;;M;;M;M", "homepage": "http://shichuan.org;https://github.com/SYW23;;;https://albertyang33.github.io/;;http://www.shichuan.org/;http://www.cc.gatech.edu/~lsong", "dblp": ";;;;49/1457-2;;64/3041-1;94/3481", "google_scholar": ";;;;OlLjVUcAAAAJ;;tUq_v90AAAAJ;Xl4E0CsAAAAJ", "orcid": ";0000-0002-0061-0779;0000-0003-1983-3988;;0000-0001-7821-0030;;0000-0002-3734-0266;", "linkedin": ";;;;;;;", "or_profile": "~Chunchen_Wang1;~YiWu_Sun1;~Bing_Yang3;~Shaochuan_Li1;~Cheng_Yang6;~Hui_Li2;~Chuan_Shi1;~Le_Song1", "aff": "Beijing University of Posts and Telecommunications;;;;Beijing University of Posts and Telecommunications;;Beijing University of Post and Telecommunication;College of Computing, Georgia Institute of Technology", "aff_domain": "bupt.edu.cn;;;;bupt.edu.cn;;bupt.edu.cn;cc.gatech.edu", "position": "PhD student;;;;Associate Professor;;Full Professor;Associate Professor", "bibtex": "@misc{\nwang2023improving,\ntitle={Improving Protein Interaction Prediction using Pretrained Structure Embedding},\nauthor={Chunchen Wang and YiWu Sun and Bing Yang and Shaochuan Li and Cheng Yang and Hui Li and Chuan Shi and Le Song},\nyear={2023},\nurl={https://openreview.net/forum?id=KNSRDB-clPX}\n}", "github": "", "project": "", "reviewers": "3nJt;AAa4;p7hw;23GN", "site": "https://openreview.net/forum?id=KNSRDB-clPX", "pdf_size": 488938, "recommendation": "3;3;3;3", "confidence": "4;4;4;4", "correctness": "1;2;4;3", "technical_novelty": "2;2;1;2", "empirical_novelty": "1;2;1;2", "wc_summary_paper": "98;13;28;60", "wc_strength_and_weaknesses": "109;102;78;263", "wc_clarity_quality_novelty_and_reproducibility": "8;137;2;57", "wc_summary_review": "254;17;21;44", "wc_review": "469;269;129;424", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.5, 1.118033988749895 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 0.5 ], "wc_summary_paper_avg": [ 49.75, 32.62188682464581 ], "wc_strength_and_weaknesses_avg": [ 138.0, 73.07872467414849 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.0, 54.04165060395547 ], "wc_summary_review_avg": [ 84.0, 98.68890515149107 ], "wc_review_avg": [ 322.75, 134.22811739721303 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YTdYIogRk4YJ:scholar.google.com/&scioq=Improving+Protein+Interaction+Prediction+using+Pretrained+Structure+Embedding&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Georgia Institute of Technology", "aff_unique_dep": ";College of Computing", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.gatech.edu", "aff_unique_abbr": "BUPT;Georgia Tech", "aff_campus_unique_index": "0;0;0;1", "aff_campus_unique": "Beijing;Atlanta", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "KPyHNVpear1", "title": "Conditional Execution Of Cascaded Models Improves The Accuracy-Efficiency Trade-Off", "track": "main", "status": "Reject", "tldr": "We show how to combine pairs of pretrained models to improve the entire ImageNet accuracy-compute Pareto front.", "abstract": "The compute effort required to perform inference on state-of-the-art deep learning models is ever growing. Practical applications are commonly limited to a certain cost per inference. Cascades of pretrained models with conditional execution address these requirements based on the intuition that some inputs are easy enough that they can be processed correctly by a small model allowing for an early exit. If the small model is not sufficiently confident in its prediction, the input is passed on to a larger model. The selection of the confidence threshold allows to trade off compute effort against accuracy. In this work, we explore the effective design of model cascades, and thoroughly evaluate the impact on the accuracy-compute trade-off. We find that they not only interpolate favorably between pretrained models, but that this trade-off curve commonly outperforms single models. This allows us to redefine most of the ImageNet Pareto front already with 2-model cascades, achieving an average reduction in compute effort at equal accuracy of almost 3.1x above 86% and more than 1.9x between 80% and 86% top-1 accuracy. We confirm the wide applicability and effectiveness of the method on the GLUE benchmark. We release the code to reproduce our experiments in the supplementary material and use only publicly available models and datasets.", "keywords": "inference;efficiency;cascades;pretrained", "primary_area": "", "supplementary_material": "/attachment/6d44dbb5e875ed9f3df9117d7c8f77ee91a93528.zip", "author": "Luzian Lebovitz;Lukas Cavigelli;Michele Magno;Lorenz K Muller", "authorids": "~Luzian_Lebovitz1;~Lukas_Cavigelli1;~Michele_Magno1;~Lorenz_K_Muller1", "gender": ";M;M;M", "homepage": "https://pbl.ee.ethz.ch/people/person-detail.MTg4MjAw.TGlzdC8zNzIyLDEwOTgwMDQ2ODc=.html;;https://ee.ethz.ch/the-department/people-a-z/person-detail.michele-magno.html;", "dblp": ";137/9406;;139/1372", "google_scholar": ";https://scholar.google.ch/citations?user=15o2H4cAAAAJ;ytj7UUcAAAAJ;https://scholar.google.ch/citations?user=DxppwfcAAAAJ", "orcid": ";0000-0003-1767-7715;;", "linkedin": ";lcavigelli/;;", "or_profile": "~Luzian_Lebovitz1;~Lukas_Cavigelli1;~Michele_Magno1;~Lorenz_K_Muller1", "aff": ";Huawei Technologies;ETHZ - ETH Zurich;Huawei Technologies Ltd.", "aff_domain": ";huawei.com;ethz.ch;huawei.com", "position": ";Principal Researcher;Principal Researcher;Researcher", "bibtex": "@misc{\nlebovitz2023conditional,\ntitle={Conditional Execution Of Cascaded Models Improves The Accuracy-Efficiency Trade-Off},\nauthor={Luzian Lebovitz and Lukas Cavigelli and Michele Magno and Lorenz K Muller},\nyear={2023},\nurl={https://openreview.net/forum?id=KPyHNVpear1}\n}", "github": "", "project": "", "reviewers": "Bbfc;J6Fm;Q4Xh;CNbJ", "site": "https://openreview.net/forum?id=KPyHNVpear1", "pdf_size": 1108391, "recommendation": "3;3;3;8", "confidence": "3;4;4;4", "correctness": "4;3;3;4", "technical_novelty": "1;1;1;4", "empirical_novelty": "2;1;1;4", "wc_summary_paper": "62;64;78;52", "wc_strength_and_weaknesses": "68;125;260;111", "wc_clarity_quality_novelty_and_reproducibility": "15;48;30;44", "wc_summary_review": "83;56;103;26", "wc_review": "228;293;471;233", "wc_reply_reviewers": "0;127;0;0", "wc_reply_authors": "273;635;735;300", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.25, 2.165063509461097 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 1.75, 1.299038105676658 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 64.0, 9.273618495495704 ], "wc_strength_and_weaknesses_avg": [ 141.0, 71.84358008896828 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.25, 12.968712349342937 ], "wc_summary_review_avg": [ 67.0, 28.956864471140516 ], "wc_review_avg": [ 306.25, 98.49714462866423 ], "wc_reply_reviewers_avg": [ 31.75, 54.99261314031185 ], "wc_reply_authors_avg": [ 485.75, 202.58748110384315 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wd3MyADL6M0J:scholar.google.com/&scioq=Conditional+Execution+Of+Cascaded+Models+Improves+The+Accuracy-Efficiency+Trade-Off&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Huawei;ETH Zurich", "aff_unique_dep": "Huawei Technologies;", "aff_unique_url": "https://www.huawei.com;https://www.ethz.ch", "aff_unique_abbr": "Huawei;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;Switzerland" }, { "id": "KQ-ipHOmBc", "title": "Few-Shot Text Classification with Dual Contrastive Consistency Training", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this paper, we explore how to utilize pre-trained language model to perform few-shot text classification where only a few annotated examples are given for each class. Since using traditional cross-entropy loss to fine-tune language model under this scenario causes serious overfitting and leads to sub-optimal generalization of model, we adopt supervised contrastive learning on few labeled data and consistency-regularization on vast unlabeled data. Moreover, we propose a novel contrastive consistency to further boost model performance and refine sentence representation. After conducting extensive experiments on four datasets, we demonstrate that our model (FTCC) can outperform state-of-the-art methods and has better robustness. ", "keywords": "Few-Shot Learning;Contrastive Learning;Consistency Training", "primary_area": "", "supplementary_material": "", "author": "Liwen Sun", "authorids": "~Liwen_Sun2", "gender": "M", "homepage": "", "dblp": "31/7993", "google_scholar": "SvhUG8wAAAAJ", "orcid": "", "linkedin": "dominic-liwen-sun/", "or_profile": "~Liwen_Sun2", "aff": "University of Illinois at Urbana-Champagne ", "aff_domain": "cs.illinois.edu", "position": "Undergrad student", "bibtex": "@misc{\nsun2023fewshot,\ntitle={Few-Shot Text Classification with Dual Contrastive Consistency Training},\nauthor={Liwen Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=KQ-ipHOmBc}\n}", "github": "", "project": "", "reviewers": "CtgC;6ZcP;3uH4;auWg", "site": "https://openreview.net/forum?id=KQ-ipHOmBc", "pdf_size": 1059717, "recommendation": "3;3;3;5", "confidence": "4;4;3;4", "correctness": "2;2;4;3", "technical_novelty": "1;2;2;2", "empirical_novelty": "1;2;0;2", "wc_summary_paper": "39;57;79;144", "wc_strength_and_weaknesses": "131;308;189;171", "wc_clarity_quality_novelty_and_reproducibility": "45;32;92;88", "wc_summary_review": "44;6;53;12", "wc_review": "259;403;413;415", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 79.75, 39.707524475847144 ], "wc_strength_and_weaknesses_avg": [ 199.75, 65.92941301118948 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.25, 26.1951808545007 ], "wc_summary_review_avg": [ 28.75, 20.116846174288852 ], "wc_review_avg": [ 372.5, 65.68675665611752 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.17407765595569782, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6039141297595942980&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "When and Why Vision-Language Models Behave like Bags-Of-Words, and What to Do About It?", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10875", "id": "KRLUvxh8uaX", "poster": "", "openreview": "https://openreview.net/forum?id=KRLUvxh8uaX", "slides": "https://iclr.cc/virtual/2023/poster/10875", "video": "https://iclr.cc/virtual/2023/poster/10875", "author_site": "Mert Yuksekgonul, Federico Bianchi, Ria Kalluri, Dan Jurafsky, James Y Zou", "tldr": "", "abstract": "Despite the success of large vision and language models (VLMs) in many downstream applications, it is unclear how well they encode the compositional relationships between objects and attributes. Here, we create the Attribution, Relation, and Order (ARO) benchmark to systematically evaluate the ability of VLMs to understand different types of relationships, attributes, and order information. ARO consists of \\emph{Visual Genome Attribution}, to test the understanding of objects' properties; \\emph{Visual Genome Relation}, to test for relational understanding; and \\emph{COCO-Order \\& Flickr30k-Order}, to test for order sensitivity in VLMs. ARO is orders of magnitude larger than previous benchmarks of compositionality, with more than 50,000 test cases. We present the settings where state-of-the-art VLMs behave like bags-of-words---i.e. when they have poor relational understanding, can blunder when linking objects to their attributes, and demonstrate a severe lack of order sensitivity. VLMs are predominantly trained and evaluated on large scale datasets with rich compositional structure in the images and captions. Yet, training on these datasets has not been enough to address the lack of compositional understanding, and evaluating on these datasets has failed to surface this deficiency. To understand why these limitations emerge and are not represented in the standard tests, we zoom into the evaluation and training procedures. We demonstrate that it is possible to perform well on image-text retrieval over existing datasets without using the composition and order information. This further motivates the value of using ARO to benchmark VLMs. Given that contrastive pretraining optimizes for retrieval on large datasets with similar shortcuts, we hypothesize that this can explain why the models do not need to learn to represent compositional information. This finding suggests a natural solution: composition-aware hard negative mining. We show that a simple-to-implement modification of contrastive learning significantly improves the performance on tasks requiring understanding of order and compositionality. ", "keywords": "vision-language models;clip;contrastive learning;retrieval;vision-language pretraining;multimodal representation learning", "primary_area": "", "supplementary_material": "", "author": "Mert Yuksekgonul;Federico Bianchi;Pratyusha Kalluri;Dan Jurafsky;James Zou", "authorids": "~Mert_Yuksekgonul1;~Federico_Bianchi1;~Pratyusha_Kalluri1;~Dan_Jurafsky1;~James_Zou1", "gender": "M;;Unspecified;M;", "homepage": "https://cs.stanford.edu/~merty;https://federicobianchi.io;https://pkalluri.github.io;http://web.stanford.edu/~jurafsky/;", "dblp": "249/5558;122/8815-1;https://dblp.org/pers/hd/k/Kalluri:Pratyusha;31/985;", "google_scholar": "https://scholar.google.com/citations?hl=en;1okGjb8AAAAJ;;uZg9l58AAAAJ;23ZXZvEAAAAJ", "orcid": ";0000-0003-0776-361X;;;", "linkedin": ";federico-bianchi-3b7998121/;;;", "or_profile": "~Mert_Yuksekgonul1;~Federico_Bianchi1;~Pratyusha_Kalluri1;~Dan_Jurafsky1;~James_Zou1", "aff": "Microsoft;Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "microsoft.com;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "Intern;Postdoc;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nyuksekgonul2023when,\ntitle={When and Why Vision-Language Models Behave like Bags-Of-Words, and What to Do About It?},\nauthor={Mert Yuksekgonul and Federico Bianchi and Pratyusha Kalluri and Dan Jurafsky and James Zou},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=KRLUvxh8uaX}\n}", "github": "", "project": "", "reviewers": "y4fM;LKvk;U4Mo;J2a5", "pdf_size": 1598975, "recommendation": "6;6;8;8", "confidence": "4;4;4;4", "correctness": "4;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "95;53;73;240", "wc_strength_and_weaknesses": "103;139;303;228", "wc_clarity_quality_novelty_and_reproducibility": "13;2;161;8", "wc_summary_review": "103;54;84;43", "wc_review": "314;248;621;519", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "313;555;661;350", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 115.25, 73.54038006428848 ], "wc_strength_and_weaknesses_avg": [ 193.25, 78.00761180807935 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.0, 66.5093978321861 ], "wc_summary_review_avg": [ 71.0, 23.80126047082381 ], "wc_review_avg": [ 425.5, 150.74896351219135 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 469.75, 143.83562667155866 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 430, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6739303225910004251&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=KRLUvxh8uaX", "email": "microsoft.com;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Microsoft;Stanford University", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.stanford.edu", "aff_unique_abbr": "Microsoft;Stanford", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "KUP3ic8jdGo", "title": "Limitations of the NTK for Understanding Generalization in Deep Learning", "track": "main", "status": "Reject", "tldr": "1. Neural networks have significantly better scaling than neural tangent kernels. 2. The empirical NTK continues to evolve throughout the training, in contrast with prior work which suggests that it stabilizes after a few epochs of training.", "abstract": "The \u201cNeural Tangent Kernel\u201d (NTK) (Jacot et al., 2018), and its empirical variants have been proposed as a proxy to capture certain behaviors of real neural networks. In this work, we study NTKs through the lens of scaling laws, and demonstrate that they fall short of explaining important aspects of neural network generalization. In particular, we demonstrate realistic settings where finite-width neural networks have significantly better data scaling exponents as compared to their corresponding empirical and infinite NTKs at initialization. This reveals a more fundamental difference between the real networks and NTKs, beyond just a few percentage points of test accuracy. Further, we show that even if the empirical NTK is allowed to be pre-trained on a constant number of samples, the kernel scaling does not catch up to the neural network scaling. Finally, we show that the empirical NTK continues to evolve throughout most of the training, in contrast with prior work which suggests that it stabilizes after a few epochs of training. Altogether, our work establishes concrete limitations of the NTK approach in understanding generalization of real networks on natural datasets.", "keywords": "scaling laws;ntk;time dynamics", "primary_area": "", "supplementary_material": "/attachment/4b025a9f00e8e17481e2686146b4668efd9307d7.zip", "author": "Nikhil Vyas;Yamini Bansal;Preetum Nakkiran", "authorids": "~Nikhil_Vyas1;~Yamini_Bansal1;~Preetum_Nakkiran1", "gender": "M;F;", "homepage": "https://nikhilvyas.github.io/;;http://preetum.nakkiran.org", "dblp": "176/1074;;151/6343", "google_scholar": ";uj1OljkAAAAJ;zithBbUAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Nikhil_Vyas1;~Yamini_Bansal1;~Preetum_Nakkiran1", "aff": "Harvard University;Google;Apple", "aff_domain": "harvard.edu;google.com;apple.com", "position": "Postdoc;Researcher;Principal Researcher", "bibtex": "@misc{\nvyas2023limitations,\ntitle={Limitations of the {NTK} for Understanding Generalization in Deep Learning},\nauthor={Nikhil Vyas and Yamini Bansal and Preetum Nakkiran},\nyear={2023},\nurl={https://openreview.net/forum?id=KUP3ic8jdGo}\n}", "github": "", "project": "", "reviewers": "fuCi;PzLb;8CKY;mmPh", "site": "https://openreview.net/forum?id=KUP3ic8jdGo", "pdf_size": 1129821, "recommendation": "3;5;6;8", "confidence": "4;3;4;4", "correctness": "3;2;4;4", "technical_novelty": "1;2;2;2", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "64;60;147;107", "wc_strength_and_weaknesses": "119;128;314;30", "wc_clarity_quality_novelty_and_reproducibility": "136;138;234;45", "wc_summary_review": "61;2;101;153", "wc_review": "380;328;796;335", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 94.5, 35.471819801075895 ], "wc_strength_and_weaknesses_avg": [ 147.75, 103.3449926218005 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 138.25, 66.83702192647425 ], "wc_summary_review_avg": [ 79.25, 55.2556558191105 ], "wc_review_avg": [ 459.75, 195.15682796151407 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.16012815380508713, "corr_recommendation_correctness": 0.5853694070049635, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16622472325034208462&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Harvard University;Google;Apple", "aff_unique_dep": ";Google;Apple Inc.", "aff_unique_url": "https://www.harvard.edu;https://www.google.com;https://www.apple.com", "aff_unique_abbr": "Harvard;Google;Apple", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "KVljrqehulG", "title": "Efficient Automatic Machine Learning via Design Graphs", "track": "main", "status": "Reject", "tldr": "We propose FALCON, an efficient AutoML method that searches for the optimal model design on design graphs.", "abstract": "Despite the success of automated machine learning (AutoML), which aims to find the best design, including the architecture of deep networks and hyper-parameters, conventional AutoML methods are computationally expensive and hardly provide insights into the relations of different model design choices. To tackle the challenges, we propose FALCON, an efficient sample-based method to search for the optimal model design. Our key insight is to model the design space of possible model designs as a design graph, where the nodes represent design choices, and the edges denote design similarities. FALCON features 1) a task-agnostic module, which performs message passing on the design graph via a Graph Neural Network (GNN), and 2) a task-specific module, which conducts label propagation of the known model performance information on the design graph. Both modules are combined to predict the design performances in the design space, navigating the search direction. We conduct extensive experiments on 27 node and graph classification tasks from various application domains, and an image classification task on the CIFAR-10 dataset. We empirically show that FALCON can efficiently obtain the well-performing designs for each task using only 30 explored nodes. Specifically, FALCON has a comparable time cost with the one-shot approaches while achieving an average improvement of 3.3% compared with the best baselines.", "keywords": "Automated Machine Learning;Sample efficiency;Design graph;Graph Neural Networks", "primary_area": "", "supplementary_material": "", "author": "Ying-Xin Wu;Jiaxuan You;Jure Leskovec;Zhitao Ying", "authorids": "~Ying-Xin_Wu1;~Jiaxuan_You2;~Jure_Leskovec1;~Zhitao_Ying1", "gender": ";M;M;F", "homepage": "http://cs.stanford.edu/~jure/;https://www.cs.yale.edu/homes/ying-rex;https://cs.stanford.edu/~jiaxuan/;https://cs.stanford.edu/~shirwu", "dblp": "l/JureLeskovec;209/4936;192/4727;79/4173-2", "google_scholar": "Q_kKkIUAAAAJ;6fqNXooAAAAJ;NDbMl7oAAAAJ;r2cVEucAAAAJ", "orcid": "0000-0002-5411-923X;;;", "linkedin": "leskovec/;rex-ying-92770148/;jiaxuan-you-5859b37b/;", "or_profile": "~Jure_Leskovec1;~Zhitao_Ying1;~Jiaxuan_You1;~Yingxin_Wu1", "aff": "Kumo.AI;Yale University;Computer Science Department, Stanford University;Computer Science Department, Stanford University", "aff_domain": "kumo.ai;yale.edu;cs.stanford.edu;cs.stanford.edu", "position": "Chief Scientist;Assistant Professor;Lecturer;PhD student", "bibtex": "@misc{\nwu2023efficient,\ntitle={Efficient Automatic Machine Learning via Design Graphs},\nauthor={Ying-Xin Wu and Jiaxuan You and Jure Leskovec and Zhitao Ying},\nyear={2023},\nurl={https://openreview.net/forum?id=KVljrqehulG}\n}", "github": "", "project": "", "reviewers": "1F3M;ZhyY;Wvmw;xdfg", "site": "https://openreview.net/forum?id=KVljrqehulG", "pdf_size": 4534457, "recommendation": "3;5;5;8", "confidence": "2;4;4;4", "correctness": "1;2;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "81;98;135;83", "wc_strength_and_weaknesses": "224;423;217;230", "wc_clarity_quality_novelty_and_reproducibility": "138;43;96;14", "wc_summary_review": "26;36;27;178", "wc_review": "469;600;475;505", "wc_reply_reviewers": "0;0;125;0", "wc_reply_authors": "1210;1138;977;360", "reply_reviewers": "0;0;1;0", "reply_authors": "3;3;3;1", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 99.25, 21.660736367907717 ], "wc_strength_and_weaknesses_avg": [ 273.5, 86.43639279840407 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 72.75, 47.7879430400598 ], "wc_summary_review_avg": [ 66.75, 64.34817402226733 ], "wc_review_avg": [ 512.25, 52.466060458166666 ], "wc_reply_reviewers_avg": [ 31.25, 54.12658773652741 ], "wc_reply_authors_avg": [ 921.25, 334.8383005272844 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.7276068751089989, "corr_recommendation_correctness": 0.8021806287494232, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17250341728262144386&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Kumo.AI;Yale University;Stanford University", "aff_unique_dep": ";;Computer Science Department", "aff_unique_url": "https://www.kumo.ai;https://www.yale.edu;https://www.stanford.edu", "aff_unique_abbr": "Kumo.AI;Yale;Stanford", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "KWSPJ1tuYX", "title": "Correcting the Sub-optimal Bit Allocation", "track": "main", "status": "Withdraw", "tldr": "Correcting the bit allocation in neural video compression by extending semi-amortized variational inference to non-factorized latent.", "abstract": "In this paper, we investigate the problem of bit allocation in Neural Video Compression (NVC). First, we reveal that a recent bit allocation approach claimed to be optimal is, in fact, sub-optimal due to its implementation. Specifically, we find that its sub-optimality lies in the improper application of semi-amortized variational inference (SAVI) on latent with non-factorized variational posterior. Then, we show that the corrected version of SAVI on non-factorized latent requires recursively applying back-propagating through gradient ascent, based on which we derive the corrected optimal bit allocation algorithm. Due to the computational in-feasibility of the corrected bit allocation, we design an efficient approximation to make it practical. Empirical results show that our proposed correction significantly improves the incorrect bit allocation in terms of R-D performance and bitrate error, and outperforms all other bit allocation methods by a large margin. The source code is provided in the supplementary material.", "keywords": "neural video compression;semi-amortized variational auto-encoder", "primary_area": "", "supplementary_material": "/attachment/9456768a3351d978b1452885f1b45767bf31e004.zip", "author": "Tongda Xu;Han Gao;Yuanyuan Wang;Hongwei Qin;Yan Wang;Jingjing Liu;Ya-Qin Zhang", "authorids": "~Tongda_Xu1;~Han_Gao4;~Yuanyuan_Wang3;~Hongwei_Qin2;~Yan_Wang12;~Jingjing_Liu2;~Ya-Qin_Zhang1", "gender": "Non-Binary;;M;M;;;M", "homepage": "https://tongdaxu.github.io/;;https://www.linkedin.com/in/wang-yuan-4440265a/;http://qinhongwei.com/academic;http://researchgate.net/profile/Yan_Wang154?ev=hdr_xprf;https://air.tsinghua.edu.cn/en/info/1046/1194.htm#:~:text=Jingjing%20Liu%20is%20Professor%2C%20Principal,CVPR%2C%20ACL%2C%20etc.);https://air.tsinghua.edu.cn/en/info/1046/1188.htm", "dblp": "227/8096;;;161/1819;59/2227-80;30/3008-1;09/2187", "google_scholar": "LO8GS7sAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;;ZGM7HfgAAAAJ;QOZnsYYAAAAJ;BzJ_GboAAAAJ;mDOMfxIAAAAJ", "orcid": ";0000-0002-9577-7394;;;;;", "linkedin": ";;;;;jingjing-liu-65703431/;", "or_profile": "~Tongda_Xu1;~Han_Gao4;~Yuanyuan_Wang3;~Hongwei_Qin2;~Yan_Wang12;~Jingjing_Liu2;~Ya-Qin_Zhang1", "aff": "Tsinghua University;University of Electronic Science and Technology of China;Sensetime;SenseTime Co.;Tsinghua University;Tsinghua University;AIR, Tsinghua University", "aff_domain": "air.tsinghua.edu.cn;uestc.edu;sensetime.com;sensetime.com;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "Researcher;MS student;Researcher;Researcher;Assistant Professor;Full Professor;Full Professor", "bibtex": "@misc{\nxu2023correcting,\ntitle={Correcting the Sub-optimal Bit Allocation},\nauthor={Tongda Xu and Han Gao and Yuanyuan Wang and Hongwei Qin and Yan Wang and Jingjing Liu and Ya-Qin Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=KWSPJ1tuYX}\n}", "github": "", "project": "", "reviewers": "rNPE;s8sJ;w52F;bAfb", "site": "https://openreview.net/forum?id=KWSPJ1tuYX", "pdf_size": 5313551, "recommendation": "1;3;6;8", "confidence": "4;5;3;2", "correctness": "3;2;3;4", "technical_novelty": "3;2;3;4", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "55;47;89;152", "wc_strength_and_weaknesses": "176;171;132;195", "wc_clarity_quality_novelty_and_reproducibility": "8;78;18;98", "wc_summary_review": "9;82;29;18", "wc_review": "248;378;268;463", "wc_reply_reviewers": "0;0;11;0", "wc_reply_authors": "182;1195;520;585", "reply_reviewers": "0;0;1;0", "reply_authors": "1;3;1;2", "recommendation_avg": [ 4.5, 2.692582403567252 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 85.75, 41.372545244400904 ], "wc_strength_and_weaknesses_avg": [ 168.5, 22.89650628371062 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.5, 38.32427429188973 ], "wc_summary_review_avg": [ 34.5, 28.324018076537094 ], "wc_review_avg": [ 339.25, 86.91770533096235 ], "wc_reply_reviewers_avg": [ 2.75, 4.763139720814412 ], "wc_reply_authors_avg": [ 620.5, 365.2714743858327 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.8304547985373998, "corr_recommendation_correctness": 0.6565321642986128, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1417224540508984302&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;2;0;0;0", "aff_unique_norm": "Tsinghua University;University of Electronic Science and Technology of China;SenseTime", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.uestc.edu.cn;https://www.sensetime.com", "aff_unique_abbr": "THU;UESTC;SenseTime", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Meta-learning Adaptive Deep Kernel Gaussian Processes for Molecular Property Prediction", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12044", "id": "KXRSh0sdVTP", "poster": "/media/PosterPDFs/ICLR%202023/12044.png?t=1682775473.6841102", "openreview": "https://openreview.net/forum?id=KXRSh0sdVTP", "slides": "https://iclr.cc/virtual/2023/poster/12044", "video": "https://iclr.cc/virtual/2023/poster/12044", "author_site": "Wenlin Chen, Austin Tripp, Jos\u00e9 Miguel Hern\u00e1ndez Lobato", "tldr": "This paper proposes a meta-learning approach for fitting deep kernel GPs via implicit differentiation, which outperforms previous SOTA methods on a variety of real-world chemical tasks.", "abstract": "We propose Adaptive Deep Kernel Fitting with Implicit Function Theorem (ADKF-IFT), a novel framework for learning deep kernel Gaussian processes (GPs) by interpolating between meta-learning and conventional deep kernel learning. Our approach employs a bilevel optimization objective where we meta-learn generally useful feature representations across tasks, in the sense that task-specific GP models estimated on top of such features achieve the lowest possible predictive loss on average. We solve the resulting nested optimization problem using the implicit function theorem (IFT). We show that our ADKF-IFT framework contains previously proposed Deep Kernel Learning (DKL) and Deep Kernel Transfer (DKT) as special cases. Although ADKF-IFT is a completely general method, we argue that it is especially well-suited for drug discovery problems and demonstrate that it significantly outperforms previous state-of-the-art methods on a variety of real-world few-shot molecular property prediction tasks and out-of-domain molecular property prediction and optimization tasks.", "keywords": "meta-learning;few-shot learning;Gaussian processes;deep kernel learning;bilevel optimization;chemistry;molecules;drug discovery", "primary_area": "", "supplementary_material": "", "author": "Wenlin Chen;Austin Tripp;Jos\u00e9 Miguel Hern\u00e1ndez-Lobato", "authorids": "~Wenlin_Chen2;~Austin_Tripp1;~Jos\u00e9_Miguel_Hern\u00e1ndez-Lobato1", "gender": ";M;", "homepage": "https://wenlin-chen.github.io/;https://www.austintripp.ca/;http://jmhl.org", "dblp": ";267/5455;40/6058", "google_scholar": "https://scholar.google.com/citations?hl=en;WAvRaxMAAAAJ;BEBccCQAAAAJ", "orcid": ";0000-0002-0138-7740;0000-0001-7610-949X", "linkedin": ";;", "or_profile": "~Wenlin_Chen2;~Austin_Tripp1;~Jose_Miguel_Hernandez_Lobato1", "aff": "Max Planck Institute for Intelligent Systems;University of Cambridge;University of Cambridge", "aff_domain": "tuebingen.mpg.de;cam.ac.uk;cam.ac.uk", "position": "Doctoral Researcher;PhD student;Associate Professor", "bibtex": "@inproceedings{\nchen2023metalearning,\ntitle={Meta-learning Adaptive Deep Kernel Gaussian Processes for Molecular Property Prediction},\nauthor={Wenlin Chen and Austin Tripp and Jos{\\'e} Miguel Hern{\\'a}ndez-Lobato},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=KXRSh0sdVTP}\n}", "github": "", "project": "", "reviewers": "BY35;fnkh;sNf8", "pdf_size": 830491, "recommendation": "6;8;8", "confidence": "3;3;4", "correctness": "3;3;4", "technical_novelty": "3;2;3", "empirical_novelty": "0;3;3", "wc_summary_paper": "119;124;181", "wc_strength_and_weaknesses": "133;738;122", "wc_clarity_quality_novelty_and_reproducibility": "86;75;48", "wc_summary_review": "48;74;44", "wc_review": "386;1011;395", "wc_reply_reviewers": "0;41;0", "wc_reply_authors": "485;1034;508", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 141.33333333333334, 28.122746823325937 ], "wc_strength_and_weaknesses_avg": [ 331.0, 287.8274946329253 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 69.66666666666667, 15.965240019770729 ], "wc_summary_review_avg": [ 55.333333333333336, 13.299958228840001 ], "wc_review_avg": [ 597.3333333333334, 292.5295806504969 ], "wc_reply_reviewers_avg": [ 13.666666666666666, 19.3275853524323 ], "wc_reply_authors_avg": [ 675.6666666666666, 253.55385139168277 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.49999999999999983, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18050493687647013836&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=KXRSh0sdVTP", "email": "tuebingen.mpg.de;cam.ac.uk;cam.ac.uk", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;University of Cambridge", "aff_unique_dep": "Intelligent Systems;", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.cam.ac.uk", "aff_unique_abbr": "MPI-IS;Cambridge", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Germany;United Kingdom" }, { "id": "KZzvKrfKt7K", "title": "CEREAL: Few-Sample Clustering Evaluation", "track": "main", "status": "Reject", "tldr": "We introduce CEREAL, a comprehensive framework for few-sample clustering evaluation based on active pseudo-labeling.", "abstract": "Evaluating clustering quality with reliable evaluation metrics like normalized mutual information (NMI) requires labeled data that can be expensive to annotate. We focus on the underexplored problem of estimating clustering quality with limited labels. We adapt existing approaches from the few-sample model evaluation literature to actively sub-sample, with a learned surrogate model, the most informative data points for annotation to estimate the evaluation metric. However, we find that their estimation can be biased and only relies on the labeled data. To that end, we introduce CEREAL, a comprehensive framework for few-sample clustering evaluation that extends active sampling approaches in three key ways. First, we propose novel NMI-based acquisition functions that account for the distinctive properties of clustering and uncertainties from a learned surrogate model. Next, we use ideas from semi-supervised learning and train the surrogate model with both the labeled and unlabeled data. Finally, we pseudo-label the unlabeled data with the surrogate model. We run experiments to estimate NMI in an active sampling pipeline on three datasets across vision and language. Our results show that CEREAL reduces the area under the absolute error curve by up to 57% compared to the best sampling baseline. We perform an extensive ablation study to show that our framework is agnostic to the choice of clustering algorithm and evaluation metric. We also extend CEREAL from clusterwise annotations to pairwise annotations. Overall, CEREAL can efficiently evaluate clustering with limited human annotations. ", "keywords": "evaluation metrics;clustering;surrogate functions;active learning;semi-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Nihal V. Nayak;Ethan R. Elenberg;Clemens Rosenbaum", "authorids": "~Nihal_V._Nayak1;~Ethan_R._Elenberg2;~Clemens_Rosenbaum1", "gender": ";M;", "homepage": "https://nihalnayak.github.io/;https://people.cs.umass.edu/~cgbr/;http://eelenberg.github.io/", "dblp": "203/9278;182/2594;150/5501", "google_scholar": "Bx497RMAAAAJ;JkHX5H8AAAAJ;Kh-DC4IAAAAJ", "orcid": "0000-0002-3150-1997;;", "linkedin": ";;", "or_profile": "~Nihal_V._Nayak1;~Clemens_Rosenbaum1;~Ethan_R_Elenberg1", "aff": "Brown University;ASAPP;ASAPP", "aff_domain": "brown.edu;asapp.com;asapp.com", "position": "PhD student;Researcher;Researcher", "bibtex": "@misc{\nnayak2023cereal,\ntitle={{CEREAL}: Few-Sample Clustering Evaluation},\nauthor={Nihal V. Nayak and Ethan R. Elenberg and Clemens Rosenbaum},\nyear={2023},\nurl={https://openreview.net/forum?id=KZzvKrfKt7K}\n}", "github": "", "project": "", "reviewers": "W5ci;6PbC;Z3Vb;cxVs", "site": "https://openreview.net/forum?id=KZzvKrfKt7K", "pdf_size": 426758, "recommendation": "3;3;5;6", "confidence": "4;2;4;3", "correctness": "4;3;4;3", "technical_novelty": "1;3;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "22;27;88;86", "wc_strength_and_weaknesses": "143;97;294;594", "wc_clarity_quality_novelty_and_reproducibility": "29;49;32;64", "wc_summary_review": "4;23;26;70", "wc_review": "198;196;440;814", "wc_reply_reviewers": "0;0;358;818", "wc_reply_authors": "263;362;1189;2056", "reply_reviewers": "0;0;1;2", "reply_authors": "1;1;3;4", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 55.75, 31.307946275666183 ], "wc_strength_and_weaknesses_avg": [ 282.0, 194.31546515910668 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.5, 14.080127840328723 ], "wc_summary_review_avg": [ 30.75, 24.180312239505923 ], "wc_review_avg": [ 412.0, 252.4083992263332 ], "wc_reply_reviewers_avg": [ 294.0, 335.98511871807654 ], "wc_reply_authors_avg": [ 967.5, 724.0243435133932 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.17407765595569782, "corr_recommendation_correctness": -0.19245008972987526, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bR85pxqs7E8J:scholar.google.com/&scioq=CEREAL:+Few-Sample+Clustering+Evaluation&hl=en&as_sdt=0,47", "gs_version_total": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Brown University;ASAPP", "aff_unique_dep": ";", "aff_unique_url": "https://www.brown.edu;https://www.asapp.com", "aff_unique_abbr": "Brown;ASAPP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "KaKXygtEGK", "title": "Efficient Approximations of Complete Interatomic Potentials for Crystal Property Prediction", "track": "main", "status": "Reject", "tldr": "We propose to directly model complete interactions for crystals with potential summations", "abstract": "We study the problem of crystal material property prediction. A crystal structure consists of a minimal unit cell that is repeated infinitely in 3D space. How to accurately represent such repetitive structures in machine learning models remains unresolved. Current methods construct graphs by establishing edges only between nearby nodes, thereby failing to faithfully capture infinite repeating patterns and distant interatomic interactions. In this work, we propose several innovations to overcome these limitations. First, we propose to model physics-principled interatomic potentials directly instead of only using distances as in existing methods. These potentials include the Coulomb potential, London dispersion potential, and Pauli repulsion potential. Second, we propose to model the complete set of potentials among all atoms, instead of only between nearby atoms as in prior methods. This is enabled by our approximations of infinite potential summations with provable error bounds. We further develop efficient algorithms to compute the approximations. Finally, we propose to incorporate our computations of complete interatomic potentials into message passing neural networks for representation learning. We perform experiments on the JARVIS and Materials Project benchmarks for evaluation. Results show that the use of complete interatomic potentials leads to consistent performance improvements with reasonable computational costs.", "keywords": "graph neural network;material property prediction;crystal property prediction;crystal structure modeling;interatomic potential", "primary_area": "", "supplementary_material": "", "author": "Yuchao Lin;Keqiang Yan;Youzhi Luo;Yi Liu;Xiaoning Qian;Shuiwang Ji", "authorids": "~Yuchao_Lin1;~Keqiang_Yan2;~Youzhi_Luo1;~Yi_Liu12;~Xiaoning_Qian2;~Shuiwang_Ji1", "gender": "M;M;M;;M;M", "homepage": "https://kruskallin.github.io/;;https://lyzustc.github.io/;;https://www.ece.tamu.edu/~xqian;http://people.tamu.edu/~sji", "dblp": "322/5499;272/6760;280/0590;;62/4504;84/6405", "google_scholar": ";cv52C8oAAAAJ;3lqQFIoAAAAJ;;dXGlddgAAAAJ;BZGj6sAAAAAJ", "orcid": ";;0000-0002-3763-0239;;0000-0002-4347-2476;0000-0002-4205-4563", "linkedin": ";;youzhi-luo-139981172/;;;shuiwang-ji-9a040715/", "or_profile": "~Yuchao_Lin1;~Keqiang_Yan2;~Youzhi_Luo1;~Yi_Liu12;~Xiaoning_Qian2;~Shuiwang_Ji1", "aff": "Texas A&M;Texas A&M University;Texas A&M University;;Texas A&M;Texas A&M University", "aff_domain": "tamu.edu;tamu.edu;tamu.edu;;tamu.edu;tamu.edu", "position": "PhD student;PhD student;PhD student;;Full Professor;Professor", "bibtex": "@misc{\nlin2023efficient,\ntitle={Efficient Approximations of Complete Interatomic Potentials for Crystal Property Prediction},\nauthor={Yuchao Lin and Keqiang Yan and Youzhi Luo and Yi Liu and Xiaoning Qian and Shuiwang Ji},\nyear={2023},\nurl={https://openreview.net/forum?id=KaKXygtEGK}\n}", "github": "", "project": "", "reviewers": "FZse;q5TT;FEhN", "site": "https://openreview.net/forum?id=KaKXygtEGK", "pdf_size": 730363, "recommendation": "3;6;6", "confidence": "5;3;4", "correctness": "1;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "0;3;3", "wc_summary_paper": "55;50;99", "wc_strength_and_weaknesses": "172;216;85", "wc_clarity_quality_novelty_and_reproducibility": "24;29;124", "wc_summary_review": "39;23;83", "wc_review": "290;318;391", "wc_reply_reviewers": "157;59;41", "wc_reply_authors": "782;1670;826", "reply_reviewers": "1;1;1", "reply_authors": "2;3;1", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 68.0, 22.015146301277824 ], "wc_strength_and_weaknesses_avg": [ 157.66666666666666, 54.43242497711166 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.0, 46.007245806140865 ], "wc_summary_review_avg": [ 48.333333333333336, 25.368396787253932 ], "wc_review_avg": [ 333.0, 42.57542327055207 ], "wc_reply_reviewers_avg": [ 85.66666666666667, 50.97275960964074 ], "wc_reply_authors_avg": [ 1092.6666666666667, 408.6313198416826 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": 1.0, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3795682291681810561&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "KaeYRGTaODt", "title": "Multi-Agent Policy Transfer via Task Relationship Modeling", "track": "main", "status": "Reject", "tldr": "We propose to model task relationships by learning effect-based task representations for more efficient multi-agent policy transfer.", "abstract": "Team adaptation to new cooperative tasks is a hallmark of human intelligence, which has yet to be fully realized in learning agents. Previous works on multi-agent transfer learning accommodate teams of different sizes, but heavily rely on the generalization ability of neural networks for adapting to unseen tasks. We posit that the relationship among tasks provides the key information for policy adaptation. To utilize such relationship for efficient transfer, we try to discover and exploit the knowledge among tasks from different teams, propose to learn effect-based task representations as a common latent space among tasks, and use it to build an alternatively fixed training scheme. We demonstrate that the task representation can capture the relationship among teams and generalize to unseen tasks. As a result, the proposed method can help transfer learned cooperation knowledge to new tasks after training on a few source tasks, and the learned transferred policies can also help solve tasks that are hard to learn from scratch.", "keywords": "Multi-agent reinforcement learning;cooperative transfer learning", "primary_area": "", "supplementary_material": "/attachment/97d3e3f6372e7064fb5765eaa869754eba1cf145.zip", "author": "Rong-Jun Qin;Feng Chen;Tonghan Wang;Lei Yuan;Xiaoran Wu;Yipeng Kang;Zongzhang Zhang;Chongjie Zhang;Yang Yu", "authorids": "~Rong-Jun_Qin1;~Feng_Chen12;~Tonghan_Wang1;~Lei_Yuan2;~Xiaoran_Wu1;~Yipeng_Kang1;~Zongzhang_Zhang1;~Chongjie_Zhang1;~Yang_Yu5", "gender": "M;M;M;M;F;M;M;;M", "homepage": "http://www.lamda.nju.edu.cn/qinrj/;;https://tonghanwang.github.io/;http://www.lamda.nju.edu.cn/yuanl/;https://www.xiaoranwu.com/;;http://www.lamda.nju.edu.cn/zhangzz;;http://www.lamda.nju.edu.cn/yuy", "dblp": ";21/3047-42;175/6039-1.html;23/6750-1;;267/2079;90/8724;29/6693;46/2181-1", "google_scholar": ";QgorT8QAAAAJ;-AR1yc4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;OcQEZngAAAAJ;sG7WEAgAAAAJ;LjxqXycAAAAJ;PG2lDSwAAAAJ", "orcid": ";;;;;;;;", "linkedin": ";;;;;;;;", "or_profile": "~Rong-Jun_Qin1;~Feng_Chen12;~Tonghan_Wang1;~Lei_Yuan2;~Xiaoran_Wu1;~Yipeng_Kang1;~Zongzhang_Zhang1;~Chongjie_Zhang1;~Yang_Yu2", "aff": "Nanjing University;Nanjing University;Tsinghua University;Nanjing University;;Tsinghua University;Nanjing University;Tsinghua University;Nanjing University", "aff_domain": "nju.edu.cn;lamda.nju.edu.cn;tsinghua.edu.cn;nju.edu.cn;;mails.tsinghua.edu.cn;nju.edu.cn;tsinghua.edu.cn;nju.edu.cn", "position": "PhD student;MS student;MS student;PhD student;;PhD student;Associate Professor;Assistant Professor;Professor", "bibtex": "@misc{\nqin2023multiagent,\ntitle={Multi-Agent Policy Transfer via Task Relationship Modeling},\nauthor={Rong-Jun Qin and Feng Chen and Tonghan Wang and Lei Yuan and Xiaoran Wu and Yipeng Kang and Zongzhang Zhang and Chongjie Zhang and Yang Yu},\nyear={2023},\nurl={https://openreview.net/forum?id=KaeYRGTaODt}\n}", "github": "", "project": "", "reviewers": "dhhQ;oPef;AfTn;U9UT", "site": "https://openreview.net/forum?id=KaeYRGTaODt", "pdf_size": 1632919, "recommendation": "3;6;6;6", "confidence": "4;4;2;4", "correctness": "2;2;3;4", "technical_novelty": "2;3;3;2", "empirical_novelty": "1;0;3;3", "wc_summary_paper": "91;95;76;87", "wc_strength_and_weaknesses": "539;227;202;266", "wc_clarity_quality_novelty_and_reproducibility": "51;115;6;62", "wc_summary_review": "66;86;37;55", "wc_review": "747;523;321;470", "wc_reply_reviewers": "0;17;0;67", "wc_reply_authors": "911;1571;555;1052", "reply_reviewers": "0;1;0;1", "reply_authors": "3;4;2;3", "recommendation_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 1.299038105676658 ], "wc_summary_paper_avg": [ 87.25, 7.084313657652377 ], "wc_strength_and_weaknesses_avg": [ 308.5, 135.01944304432604 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 58.5, 38.78466191679386 ], "wc_summary_review_avg": [ 61.0, 17.76231966833161 ], "wc_review_avg": [ 515.25, 152.928700707225 ], "wc_reply_reviewers_avg": [ 21.0, 27.44995446262161 ], "wc_reply_authors_avg": [ 1022.25, 364.93518260096545 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5222329678670935, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15909057787118372574&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;1;0;1;0;1;0", "aff_unique_norm": "Nanjing University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Nanjing U;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "KajSampr4_", "title": "Communication-Optimal Distributed Graph Clustering under Duplication Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We consider the problem of clustering graph nodes over large-scale distributed graphs, when graph edges with possibly edge duplicates are observed distributively. Although edge duplicates across different sites appear to be beneficial at the first glance, in fact they could make the clustering task more complicated since potentially their processing would need extra computations and communications. We propose the first communication-optimal algorithms for two well-established communication models namely the message passing and the blackboard models. Specifically, given a graph on $n$ nodes with edges observed at $s$ sites, our algorithms achieve communication costs $\\tilde{O}(ns)$ and $\\tilde{O}(n+s)$ ($\\tilde{O}$ hides a polylogarithmic factor), which almost match their lower bounds, $\\Omega(ns)$ and $\\Omega(n+s)$, in the message passing and the blackboard models respectively. The communication costs are asymptotically the same as those under non-duplication models, under a mild assumption on edge distribution. Our algorithms can also guarantee clustering quality nearly as good as that of centralizing all edges and then applying any standard clustering algorithm.", "keywords": "Graph Clustering;Distributed Computation;Communication Complexity;Duplication Models", "primary_area": "", "supplementary_material": "", "author": "Chunjiang Zhu", "authorids": "~Chunjiang_Zhu1", "gender": "", "homepage": "https://chunjiangzhu.github.io/", "dblp": "123/7707", "google_scholar": "0IOSvAQAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Chunjiang_Zhu1", "aff": "University of North Carolina Greensboro", "aff_domain": "uncg.edu", "position": "Assistant Professor", "bibtex": "@misc{\nzhu2023communicationoptimal,\ntitle={Communication-Optimal Distributed Graph Clustering under Duplication Models},\nauthor={Chunjiang Zhu},\nyear={2023},\nurl={https://openreview.net/forum?id=KajSampr4_}\n}", "github": "", "project": "", "reviewers": "jTGD;qPC4;m6rY", "site": "https://openreview.net/forum?id=KajSampr4_", "pdf_size": 422550, "recommendation": "1;3;5", "confidence": "3;4;4", "correctness": "4;3;4", "technical_novelty": "1;2;3", "empirical_novelty": "0;0;0", "wc_summary_paper": "30;202;351", "wc_strength_and_weaknesses": "52;165;123", "wc_clarity_quality_novelty_and_reproducibility": "9;133;97", "wc_summary_review": "3;46;233", "wc_review": "94;546;804", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 1.632993161855452 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 194.33333333333334, 131.15978380924884 ], "wc_strength_and_weaknesses_avg": [ 113.33333333333333, 46.635704014080126 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 79.66666666666667, 52.08539995899896 ], "wc_summary_review_avg": [ 94.0, 99.84321041846894 ], "wc_review_avg": [ 481.3333333333333, 293.44088937221335 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0.8660254037844385, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:R5g39QraJAIJ:scholar.google.com/&scioq=Communication-Optimal+Distributed+Graph+Clustering+under+Duplication+Models&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of North Carolina at Greensboro", "aff_unique_dep": "", "aff_unique_url": "https://www.uncg.edu", "aff_unique_abbr": "UNCG", "aff_campus_unique_index": "0", "aff_campus_unique": "Greensboro", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "PEER: A Collaborative Language Model", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12165", "id": "KbYevcLjnc", "poster": "", "openreview": "https://openreview.net/forum?id=KbYevcLjnc", "slides": "https://iclr.cc/virtual/2023/poster/12165", "video": "https://iclr.cc/virtual/2023/poster/12165", "author_site": "Timo Schick, Jane Dwivedi-Yu, Zhengbao Jiang, Fabio Petroni, Patrick Lewis, Gautier Izacard, Qingfei You, Christoforos Nalmpantis, Edouard Grave, Sebastian Riedel", "tldr": "We introduce PEER, a language model trained to mimic the collaborative editing process by which humans often write text.", "abstract": "Textual content is often the output of a collaborative writing process: We start with an initial draft, ask for suggestions, and repeatedly make changes.\nAgnostic of this process, today\u2019s language models are trained to generate only the final result. As a consequence, they lack several abilities crucial for collaborative writing: They are unable to update existing texts, difficult to control and incapable of verbally planning or explaining their actions.\nTo address these shortcomings, we introduce PEER, a collaborative language model that is trained to imitate the entire writing process itself. PEER can write drafts, add suggestions, propose edits and provide explanations for its actions. Crucially, we train multiple instances of PEER able to infill various parts of the writing process, enabling the use of self-training techniques for increasing the quality, amount and diversity of training data. This unlocks PEER's full potential by making it applicable in domains for which no edit histories are available and improving its ability to follow instructions, to write useful comments, and to explain its actions. We show that PEER achieves strong performance across various domains and editing tasks.", "keywords": "Language Models;Controllability;Prompting;Zero-Shot Learning;Editing", "primary_area": "", "supplementary_material": "", "author": "Timo Schick;Jane A. Yu;Zhengbao Jiang;Fabio Petroni;Patrick Lewis;Gautier Izacard;Qingfei You;Christoforos Nalmpantis;Edouard Grave;Sebastian Riedel", "authorids": "~Timo_Schick1;~Jane_A._Yu1;~Zhengbao_Jiang2;~Fabio_Petroni2;~Patrick_Lewis2;~Gautier_Izacard1;qingfeiyou@fb.com;~Christoforos_Nalmpantis1;~Edouard_Grave1;~Sebastian_Riedel1", "gender": ";;M;M;M;Unspecified;;;;M", "homepage": "http://timoschick.com;;;http://www.fabiopetroni.com/;https://patricklewis.io;;;;;https://www.riedelcastro.org/", "dblp": "203/9176;;;118/5349;227/3197;222/3621;;222/6212;50/10261;18/3348-1.html", "google_scholar": ";;;https://scholar.google.it/citations?user=vxQc2L4AAAAJ;JN7Zg-kAAAAJ;https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.co.uk/citations?user=1Z4PmxIAAAAJ;7UV4ET4AAAAJ;https://scholar.google.com.tw/citations?user=AcCtcrsAAAAJ", "orcid": ";;;;0000-0002-2192-9543;;;0000-0002-7398-5862;;", "linkedin": ";;;petronifabio/;patrick-s-h-lewis/;;;christoforos-nalmpantis/;edouard-grave-63099823/;", "or_profile": "~Timo_Schick1;~Jane_A._Yu1;~Zhengbao_Jiang2;~Fabio_Petroni2;~Patrick_Lewis2;~Gautier_Izacard1;qingfeiyou@fb.com;~Christoforos_Nalmpantis1;~Edouard_Grave1;~Sebastian_Riedel1", "aff": "Meta Facebook;;School of Computer Science, Carnegie Mellon University;Samaya AI;Cohere;Meta Facebook;;Meta Facebook;Meta Facebook;University College London", "aff_domain": "fb.com;;cs.cmu.edu;samaya.ai;cohere.ai;fb.com;;facebook.com;fb.com;ucl.ac.uk", "position": "Researcher;;PhD student;Researcher;Research Scientist;PhD student;;Postdoc;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nschick2023peer,\ntitle={{PEER}: A Collaborative Language Model},\nauthor={Timo Schick and Jane A. Yu and Zhengbao Jiang and Fabio Petroni and Patrick Lewis and Gautier Izacard and Qingfei You and Christoforos Nalmpantis and Edouard Grave and Sebastian Riedel},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=KbYevcLjnc}\n}", "github": "", "project": "", "reviewers": "WDUk;fvQS;tmW2;eo6j", "pdf_size": 548854, "recommendation": "6;8;8;8", "confidence": "4;4;5;4", "correctness": "4;3;4;4", "technical_novelty": "3;3;4;3", "empirical_novelty": "4;3;4;4", "wc_summary_paper": "105;109;70;85", "wc_strength_and_weaknesses": "199;136;88;332", "wc_clarity_quality_novelty_and_reproducibility": "144;148;38;34", "wc_summary_review": "69;83;18;37", "wc_review": "517;476;214;488", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "376;321;147;554", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 92.25, 15.738090735537142 ], "wc_strength_and_weaknesses_avg": [ 188.75, 91.59523732159877 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 91.0, 55.036351623268054 ], "wc_summary_review_avg": [ 51.75, 25.645418694183956 ], "wc_review_avg": [ 423.75, 122.01306282525654 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 349.5, 145.20760999341599 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 129, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12527373189959778249&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=KbYevcLjnc", "email": "fb.com;;cs.cmu.edu;samaya.ai;cohere.ai;fb.com;;facebook.com;fb.com;ucl.ac.uk", "author_num": 10, "aff_unique_index": "0;1;2;3;0;0;0;4", "aff_unique_norm": "Meta;Carnegie Mellon University;Samaya AI;Cohere;University College London", "aff_unique_dep": "Meta Platforms, Inc.;School of Computer Science;;;", "aff_unique_url": "https://meta.com;https://www.cmu.edu;;https://cohere.ai;https://www.ucl.ac.uk", "aff_unique_abbr": "Meta;CMU;;;UCL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0;0;0;2", "aff_country_unique": "United States;;United Kingdom" }, { "id": "KdAxKVwAmP", "title": "STViT: Semantic Tokens for Efficient Global and Local Vision Transformers", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The quadratic computational complexity to the number of tokens limits the practical applications of Vision Transformers (ViTs). Several works propose to prune redundant tokens to achieve efficient ViTs. However, these methods generally suffer from (i) dramatic accuracy drops, (ii) application difficulty in the local vision transformer, and (iii) non-general-purpose networks for downstream tasks. In this work, we propose a novel Semantic Token ViT (STViT), for efficient global and local vision transformers, which can also be revised to serve as backbone for downstream tasks. The semantic tokens represent cluster centers, and they are initialized by pooling image tokens in space and recovered by attention, which can adaptively represent global or local semantic information. Due to the cluster properties, a few semantic tokens can attain the same effect as vast image tokens, for both global and local vision transformers. For instance, only 16 semantic tokens on DeiT-(Tiny,Small,Base) can achieve the same accuracy with more than 100% inference speed improvement and nearly 60% FLOPs reduction; on Swin-(Tiny,Small,Base), we can employ 16 semantic tokens in each window to further speed it up by around 20% with slight accuracy increase. Besides great success in image classification, we also extend our method to video recognition. In addition, we design a STViT-R(ecover) network to restore the detailed spatial information based on the STViT, making it work for downstream tasks, which is powerless for previous token sparsification methods. Experiments demonstrate that our method can achieve competitive results compared to the original networks in object detection and instance segmentation, with over 30% FLOPs reduction for backbone.", "keywords": "token reduction algorithm;efficient vision transformer;global and local vision transformer;downstream tasks", "primary_area": "", "supplementary_material": "", "author": "Shuning Chang;Pichao WANG;Ming Lin;Fan Wang;David Junhao Zhang;Rong Jin;Mike Zheng Shou", "authorids": "~Shuning_Chang1;~Pichao_WANG3;~Ming_Lin4;~Fan_Wang6;~David_Junhao_Zhang1;~Rong_Jin1;~Mike_Zheng_Shou1", "gender": "M;M;M;F;M;;M", "homepage": "https://www.ece.nus.edu.sg/lv/people_student.html;https://wangpichao.github.io/;https://minglin-home.github.io/;;https://scholar.google.com/citations?user=6dCcnNEAAAAJ&hl=en;http://www.columbia.edu/~zs2262/;https://www.cse.msu.edu/~rongjin/", "dblp": ";;;;307/3295;284/0807;j/RongJin", "google_scholar": ";;https://scholar.google.com/citations?hl=en;WCRGTHsAAAAJ;6dCcnNEAAAAJ;h1-3lSoAAAAJ;", "orcid": ";;;0000-0001-7320-1119;;;", "linkedin": ";;;;;;", "or_profile": "~Shuning_Chang1;~Pichao_WANG3;~Ming_Lin4;~Fan_Wang6;~Junhao_Zhang1;~Zheng_Shou1;~Rong_Jin3", "aff": "National University of Singapore;Amazon;Amazon;Alibaba Group;National University of Singapore;National University of Singapore;Twitter", "aff_domain": "u.nus.edu;amazon.com;amazon.com;alibaba-inc.com;nus.edu;nus.edu.sg;twitter.com", "position": "PhD student;Researcher;Researcher;Senior Staff Algorithm Engineer;PhD student;Assistant Professor;Researcher", "bibtex": "@misc{\nchang2023stvit,\ntitle={{STV}iT: Semantic Tokens for Efficient Global and Local Vision Transformers},\nauthor={Shuning Chang and Pichao WANG and Ming Lin and Fan Wang and David Junhao Zhang and Rong Jin and Mike Zheng Shou},\nyear={2023},\nurl={https://openreview.net/forum?id=KdAxKVwAmP}\n}", "github": "", "project": "", "reviewers": "rc2E;zzRD;RURD;icbc", "site": "https://openreview.net/forum?id=KdAxKVwAmP", "pdf_size": 6653945, "recommendation": "3;5;6;6", "confidence": "4;4;5;4", "correctness": "4;3;3;3", "technical_novelty": "3;2;2;2", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "57;52;69;57", "wc_strength_and_weaknesses": "125;154;350;100", "wc_clarity_quality_novelty_and_reproducibility": "29;15;24;49", "wc_summary_review": "57;17;75;8", "wc_review": "268;238;518;214", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 58.75, 6.2599920127744575 ], "wc_strength_and_weaknesses_avg": [ 182.25, 98.71771624181751 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.25, 12.457427503300993 ], "wc_summary_review_avg": [ 39.25, 27.680092124124155 ], "wc_review_avg": [ 309.5, 121.88826850849921 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.4714045207910316, "corr_recommendation_correctness": -0.9428090415820632, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IpqxeQtjnT4J:scholar.google.com/&scioq=STViT:+Semantic+Tokens+for+Efficient+Global+and+Local+Vision+Transformers&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;1;2;0;0;3", "aff_unique_norm": "National University of Singapore;Amazon;Alibaba Group;Twitter, Inc.", "aff_unique_dep": ";Amazon.com, Inc.;;", "aff_unique_url": "https://www.nus.edu.sg;https://www.amazon.com;https://www.alibaba.com;https://twitter.com", "aff_unique_abbr": "NUS;Amazon;Alibaba;Twitter", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2;0;0;1", "aff_country_unique": "Singapore;United States;China" }, { "title": "Calibrating the Rigged Lottery: Making All Tickets Reliable", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10965", "id": "KdwnGErdT6", "poster": "/media/PosterPDFs/ICLR%202023/10965.png?t=1682475581.2306485", "openreview": "https://openreview.net/forum?id=KdwnGErdT6", "slides": "https://iclr.cc/virtual/2023/poster/10965", "video": "https://iclr.cc/virtual/2023/poster/10965", "author_site": "Bowen Lei, Ruqi Zhang, Dongkuan Xu, Bani Mallick", "tldr": "", "abstract": "Although sparse training has been successfully used in various deep learning tasks to save memory and reduce inference time, the reliability of the produced sparse models remains unexplored. Previous research has shown that deep neural networks tend to be over-confident, and we find that sparse training exacerbates this problem. Therefore, calibrating the sparse models is crucial for reliable prediction and decision making. In this paper, we propose a new sparse training method to produce sparse models with improved confidence calibration. In contrast to previous research that uses only one mask to control the sparse topology, our method utilizes two masks, including a deterministic mask and a random mask. The former efficiently searches and activates important weights by exploiting the magnitude of weights and gradients. While the latter brings better exploration and finds more appropriate weight values by random updates. Theoretically, we prove our method can be viewed as a hierarchical variational approximation of a probabilistic deep Gaussian process. Extensive experiments on multiple datasets, model architectures, and sparsities show that our method can reduce ECE values by up to 47.8\\% and simultaneously maintain or even improve accuracy with only a slight increase in computational and storage burden.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/3c36578a4a623bf3a9baeae01a3d60011ef56c4a.zip", "author": "Bowen Lei;Ruqi Zhang;Dongkuan Xu;Bani Mallick", "authorids": "~Bowen_Lei1;~Ruqi_Zhang1;~Dongkuan_Xu2;~Bani_Mallick1", "gender": "M;F;M;M", "homepage": "https://stevenboys.github.io/;https://ruqizhang.github.io/;https://dongkuanx27.github.io/;https://artsci.tamu.edu/statistics/contact/profiles/bani-mallick.html", "dblp": "334/7726.html;;142/8139;", "google_scholar": "xF9ZTgYAAAAJ;4ojpmc8AAAAJ;https://scholar.google.com/citations?hl=en;T8grPNsAAAAJ", "orcid": "0000-0001-7141-7485;;0000-0002-1456-9658;", "linkedin": "bowen-lei-9ba238192/;;dongkuan-dk-xu-%F0%9F%87%BA%F0%9F%87%A6-05038087/;", "or_profile": "~Bowen_Lei1;~Ruqi_Zhang1;~Dongkuan_Xu2;~Bani_Mallick1", "aff": "Texas A&M University - College Station;Purdue University;North Carolina State University;Texas A&M", "aff_domain": "tamu.edu;purdue.edu;ncsu.edu;stat.tamu.edu", "position": "PhD student;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nlei2023calibrating,\ntitle={Calibrating the Rigged Lottery: Making All Tickets Reliable},\nauthor={Bowen Lei and Ruqi Zhang and Dongkuan Xu and Bani Mallick},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=KdwnGErdT6}\n}", "github": "", "project": "", "reviewers": "HnVS;3t4S;db8U;JN9S", "pdf_size": 598314, "recommendation": "6;6;8;8", "confidence": "4;4;4;5", "correctness": "3;2;2;3", "technical_novelty": "4;3;4;2", "empirical_novelty": "4;2;4;2", "wc_summary_paper": "64;36;151;96", "wc_strength_and_weaknesses": "244;327;641;434", "wc_clarity_quality_novelty_and_reproducibility": "55;22;261;22", "wc_summary_review": "26;96;115;101", "wc_review": "389;481;1168;653", "wc_reply_reviewers": "53;154;153;440", "wc_reply_authors": "505;2187;2100;3456", "reply_reviewers": "1;2;2;1", "reply_authors": "3;5;4;7", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 3.0, 1.0 ], "wc_summary_paper_avg": [ 86.75, 42.73976485662971 ], "wc_strength_and_weaknesses_avg": [ 411.5, 148.63798303260174 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 90.0, 99.64185867395288 ], "wc_summary_review_avg": [ 84.5, 34.48550420104076 ], "wc_review_avg": [ 672.75, 301.22448024687503 ], "wc_reply_reviewers_avg": [ 200.0, 144.51124523717868 ], "wc_reply_authors_avg": [ 2062.0, 1046.9663318368935 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 4.75, 1.479019945774904 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8318340475202521830&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=KdwnGErdT6", "email": "tamu.edu;purdue.edu;ncsu.edu;stat.tamu.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Texas A&M University;Purdue University;North Carolina State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tamu.edu;https://www.purdue.edu;https://www.ncsu.edu", "aff_unique_abbr": "TAMU;Purdue;NCSU", "aff_campus_unique_index": "0", "aff_campus_unique": "College Station;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Ke2uzCpFcP0", "title": "High-Precision Regressors for Particle Physics", "track": "main", "status": "Reject", "tldr": "We design and build high-precision regressors that speed up Monte Carlo simulations in particle physics by a thousand to a million times", "abstract": "Monte Carlo simulations of physics processes at particle colliders like the Large Hadron Collider at CERN take up a major fraction of the computational budget. For some simulations, a single data point takes seconds, minutes, or even hours to compute from first principles. Since the necessary number of data points per simulation is on the order of $10^9$ -- $10^{12}$, machine learning regressors can be used in place of physics simulators to significantly reduce this computational burden. However, this task requires high precision regressors that can deliver data with relative errors less than 1\\% or even 0.1\\% over the entire domain of the function. In this paper, we develop optimal training strategies and tune various machine learning regressors to satisfy the high-precision requirement. We leverage symmetry arguments from particle physics to optimize the performance of the regressors. Inspired by ResNets, we design a Deep Neural Network with skip connections that outperform fully connected Deep Neural Networks. We find that at lower dimensions, boosted decision trees far outperform neural networks while at higher dimensions neural networks perform better. We show that these regressors can speed up simulations by a factor of $10^3$ -- $10^6$ over the first-principles computations currently used in Monte Carlo simulations. Additionally, using symmetry arguments derived from particle physics, we reduce the number of regressors necessary for each simulation by an order of magnitude. Our work can significantly reduce the training and storage burden of Monte Carlo simulations at current and future collider experiments.", "keywords": "Boosted Decision Trees;Skip Connections;Deep Neural Networks;Particle Physics;Monte Carlo Simulation;Symmetry", "primary_area": "", "supplementary_material": "", "author": "Fady Bishara;Ayan Paul;Jennifer Dy", "authorids": "~Fady_Bishara1;~Ayan_Paul1;~Jennifer_Dy1", "gender": "M;M;", "homepage": ";https://www1.coe.neu.edu/~apaul2/;https://mllabneu.github.io/", "dblp": ";;24/6000", "google_scholar": ";https://scholar.google.it/citations?user=CWtp0rQAAAAJ;6h7b0fAAAAAJ", "orcid": "0000-0002-2426-7776;0000-0002-2156-4062;", "linkedin": ";ayan--paul/;", "or_profile": "~Fady_Bishara1;~Ayan_Paul1;~Jennifer_Dy1", "aff": "Deutsches Elektronen-Synchrotron DESY;Northeastern University;Northeastern University", "aff_domain": "desy.de;neu.edu;northeastern.edu", "position": "Researcher;Postdoc;Full Professor", "bibtex": "@misc{\nbishara2023highprecision,\ntitle={High-Precision Regressors for Particle Physics},\nauthor={Fady Bishara and Ayan Paul and Jennifer Dy},\nyear={2023},\nurl={https://openreview.net/forum?id=Ke2uzCpFcP0}\n}", "github": "", "project": "", "reviewers": "XmXm;P5Fn;D2vG;XeYb", "site": "https://openreview.net/forum?id=Ke2uzCpFcP0", "pdf_size": 1274911, "recommendation": "1;3;5;6", "confidence": "5;5;4;3", "correctness": "3;1;4;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;1;0;2", "wc_summary_paper": "46;64;44;78", "wc_strength_and_weaknesses": "192;30;214;274", "wc_clarity_quality_novelty_and_reproducibility": "18;63;174;20", "wc_summary_review": "20;378;104;38", "wc_review": "276;535;536;410", "wc_reply_reviewers": "0;0;0;196", "wc_reply_authors": "807;1547;849;647", "reply_reviewers": "0;0;0;3", "reply_authors": "1;2;1;4", "recommendation_avg": [ 3.75, 1.920286436967152 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 1.224744871391589 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 58.0, 13.92838827718412 ], "wc_strength_and_weaknesses_avg": [ 177.5, 90.29257998307502 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.75, 63.369452419916016 ], "wc_summary_review_avg": [ 135.0, 143.73934743138358 ], "wc_review_avg": [ 439.25, 107.2785509782827 ], "wc_reply_reviewers_avg": [ 49.0, 84.870489570875 ], "wc_reply_authors_avg": [ 962.5, 345.7755775065671 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9028289727756884, "corr_recommendation_correctness": 0.5314940034527338, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13358987522770081300&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff_unique_index": "0;1;1", "aff_unique_norm": "Deutsches Elektronen-Synchrotron;Northeastern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.desy.de;https://www.northeastern.edu", "aff_unique_abbr": "DESY;NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Germany;United States" }, { "title": "Statistical Inference for Fisher Market Equilibrium", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12125", "id": "KemSBwOYJC", "poster": "", "openreview": "https://openreview.net/forum?id=KemSBwOYJC", "slides": "https://iclr.cc/virtual/2023/poster/12125", "video": "https://iclr.cc/virtual/2023/poster/12125", "author_site": "Luofeng Liao, Yuan Gao, Christian Kroer", "tldr": "We propose a statistical inference framework for Fisher market equilibrium.", "abstract": "Statistical inference under market equilibrium effects has attracted increasing attention recently. In this paper we focus on the specific case of linear Fisher markets. They have been widely use in fair resource allocation of food/blood donations and budget management in large-scale Internet ad auctions. In resource allocation, it is crucial to quantify the variability of the resource received by the agents (such as blood banks and food banks) in addition to fairness and efficiency properties of the systems. For ad auction markets, it is important to establish statistical properties of the platform's revenues in addition to their expected values. To this end, we propose a statistical framework based on the concept of infinite-dimensional Fisher markets. In our framework, we observe a market formed by a finite number of items sampled from an underlying distribution (the ``observed market'') and aim to infer several important equilibrium quantities of the underlying long-run market. These equilibrium quantities include individual utilities, social welfare, and pacing multipliers. Through the lens of sample average approximation (SSA), we derive a collection of statistical results and show that the observed market provides useful statistical information of the long-run market. In other words, the equilibrium quantities of the observed market converge to the true ones of the long-run market with strong statistical guarantees. These include consistency, finite sample bounds, asymptotics, and confidence. As an extension, we discuss revenue inference in quasilinear Fisher markets.", "keywords": "Fisher market equilibrium;first-price auction;statistical inference under interference;revenue management", "primary_area": "", "supplementary_material": "/attachment/1eab01e734c6b38a3f331cdc27f5a8c32fa0f12c.zip", "author": "Luofeng Liao;Yuan Gao;Christian Kroer", "authorids": "~Luofeng_Liao1;~Yuan_Gao10;~Christian_Kroer1", "gender": "M;M;M", "homepage": ";http://gaoyuancolumbia.weebly.com/;http://www.columbia.edu/~ck2945/", "dblp": ";76/2452.html;64/10660", "google_scholar": "2kVrHEUAAAAJ;OUwPugkAAAAJ;https://scholar.google.ch/citations?user=ckHwjPAAAAAJ", "orcid": ";;0000-0002-9009-8683", "linkedin": "luofeng-liao-7a1027181/;gaoyuan-richard;", "or_profile": "~Luofeng_Liao1;~Yuan_Gao10;~Christian_Kroer1", "aff": "Columbia University;Microsoft;Columbia University", "aff_domain": "columbia.edu;microsoft.com;columbia.edu", "position": "PhD student;Data Scientist;Assistant Professor", "bibtex": "@inproceedings{\nliao2023statistical,\ntitle={Statistical Inference for Fisher Market Equilibrium},\nauthor={Luofeng Liao and Yuan Gao and Christian Kroer},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=KemSBwOYJC}\n}", "github": "", "project": "", "reviewers": "PJb7;CZ5S;8p3H", "pdf_size": 375638, "recommendation": "6;8;8", "confidence": "2;2;3", "correctness": "3;4;4", "technical_novelty": "2;4;3", "empirical_novelty": "2;0;0", "wc_summary_paper": "51;50;178", "wc_strength_and_weaknesses": "44;543;155", "wc_clarity_quality_novelty_and_reproducibility": "40;29;74", "wc_summary_review": "51;63;60", "wc_review": "186;685;467", "wc_reply_reviewers": "0;9;29", "wc_reply_authors": "446;1428;798", "reply_reviewers": "0;1;1", "reply_authors": "1;2;1", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 0.6666666666666666, 0.9428090415820634 ], "wc_summary_paper_avg": [ 93.0, 60.10546286874985 ], "wc_strength_and_weaknesses_avg": [ 247.33333333333334, 213.9226236022943 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.666666666666664, 19.154343864744856 ], "wc_summary_review_avg": [ 58.0, 5.0990195135927845 ], "wc_review_avg": [ 446.0, 204.25637484951764 ], "wc_reply_reviewers_avg": [ 12.666666666666666, 12.119772641798562 ], "wc_reply_authors_avg": [ 890.6666666666666, 406.2194261999569 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.49999999999999983, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3310001713456209425&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=KemSBwOYJC", "email": "columbia.edu;microsoft.com;columbia.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Columbia University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.columbia.edu;https://www.microsoft.com", "aff_unique_abbr": "Columbia;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "CANIFE: Crafting Canaries for Empirical Privacy Measurement in Federated Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11157", "id": "Kf7Yyf4O0u", "poster": "/media/PosterPDFs/ICLR%202023/11157.png?t=1681920046.666213", "openreview": "https://openreview.net/forum?id=Kf7Yyf4O0u", "slides": "https://iclr.cc/virtual/2023/poster/11157", "video": "https://iclr.cc/virtual/2023/poster/11157", "author_site": "Samuel Maddock, Alexandre Sablayrolles, Pierre Stock", "tldr": "Crafting canaries to measure empirical privacy of DP-FL training under a realistic threat model", "abstract": "Federated Learning (FL) is a setting for training machine learning models in distributed environments where the clients do not share their raw data but instead send model updates to a server. However, model updates can be subject to attacks and leak private information. Differential Privacy (DP) is a leading mitigation strategy which involves adding noise to clipped model updates, trading off performance for strong theoretical privacy guarantees. Previous work has shown that the threat model of DP is conservative and that the obtained guarantees may be vacuous or may overestimate information leakage in practice. In this paper, we aim to achieve a tighter measurement of the model exposure by considering a realistic threat model. We propose a novel method, CANIFE, that uses canaries - carefully crafted samples by a strong adversary to evaluate the empirical privacy of a training round. We apply this attack to vision models trained on CIFAR-10 and CelebA and to language models trained on Sent140 and Shakespeare. In particular, in realistic FL scenarios, we demonstrate that the empirical per-round epsilon obtained with CANIFE is 4 -- 5$\\times$ lower than the theoretical bound.", "keywords": "Federated Learning;Differential Privacy;Empirical Privacy;Model Auditing;Membership Inference Attack", "primary_area": "", "supplementary_material": "", "author": "Samuel Maddock;Alexandre Sablayrolles;Pierre Stock", "authorids": "~Samuel_Maddock1;~Alexandre_Sablayrolles1;~Pierre_Stock1", "gender": "M;;M", "homepage": "https://warwick.ac.uk/fac/sci/dcs/people/u1714078/;;https://research.fb.com/people/stock-pierre/", "dblp": "289/1670;186/7749;210/2208", "google_scholar": "ohQy__cAAAAJ;Wy8wM-cAAAAJ;https://scholar.google.fr/citations?user=3e2-59cAAAAJ", "orcid": ";;", "linkedin": "samuel-maddock/;;", "or_profile": "~Samuel_Maddock1;~Alexandre_Sablayrolles1;~Pierre_Stock1", "aff": "University of Warwick;Meta Facebook;Meta Facebook", "aff_domain": "warwick.ac.uk;fb.com;fb.com", "position": "PhD student;Researcher;Research Scientist", "bibtex": "@inproceedings{\nmaddock2023canife,\ntitle={{CANIFE}: Crafting Canaries for Empirical Privacy Measurement in Federated Learning},\nauthor={Samuel Maddock and Alexandre Sablayrolles and Pierre Stock},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Kf7Yyf4O0u}\n}", "github": "", "project": "", "reviewers": "FybW;rAJ2;K314;hrNj", "pdf_size": 751821, "recommendation": "5;5;8;8", "confidence": "3;3;4;3", "correctness": "3;3;4;4", "technical_novelty": "2;2;3;4", "empirical_novelty": "4;2;3;3", "wc_summary_paper": "353;48;92;134", "wc_strength_and_weaknesses": "605;82;300;207", "wc_clarity_quality_novelty_and_reproducibility": "100;80;58;119", "wc_summary_review": "175;29;13;43", "wc_review": "1233;239;463;503", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1238;907;804;981", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;1;2", "recommendation_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 156.75, 117.31448120330244 ], "wc_strength_and_weaknesses_avg": [ 298.5, 193.12495954692133 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 89.25, 22.708753818736948 ], "wc_summary_review_avg": [ 65.0, 64.389440128021 ], "wc_review_avg": [ 609.5, 373.7736614583751 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 982.5, 160.34727936575663 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5773502691896258, "corr_recommendation_correctness": 1.0, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15128961897831343945&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=Kf7Yyf4O0u", "email": "warwick.ac.uk;fb.com;fb.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Warwick;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.warwick.ac.uk;https://meta.com", "aff_unique_abbr": "Warwick;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Automating Nearest Neighbor Search Configuration with Constrained Optimization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12105", "id": "KfptQCEKVW4", "poster": "/media/PosterPDFs/ICLR%202023/12105.png?t=1682462289.4509897", "openreview": "https://openreview.net/forum?id=KfptQCEKVW4", "slides": "https://iclr.cc/virtual/2023/poster/12105", "video": "https://iclr.cc/virtual/2023/poster/12105", "author_site": "Philip Sun, Ruiqi Guo, Sanjiv Kumar", "tldr": "", "abstract": "The approximate nearest neighbor (ANN) search problem is fundamental to efficiently serving many real-world machine learning applications. A number of techniques have been developed for ANN search that are efficient, accurate, and scalable. However, such techniques typically have a number of parameters that affect the speed-recall tradeoff, and exhibit poor performance when such parameters aren't properly set. Tuning these parameters has traditionally been a manual process, demanding in-depth knowledge of the underlying search algorithm. This is becoming an increasingly unrealistic demand as ANN search grows in popularity. To tackle this obstacle to ANN adoption, this work proposes a constrained optimization-based approach to tuning quantization-based ANN algorithms. Our technique takes just a desired search cost or recall as input, and then generates tunings that, empirically, are very close to the speed-recall Pareto frontier and give leading performance on standard benchmarks.", "keywords": "AutoML;Convex Optimization;Vector Retrieval;Hyperparameter Search;ANN", "primary_area": "", "supplementary_material": "/attachment/2a23bfe0f53f7fc4992b3493bb2fafabf20cbcfe.zip", "author": "Philip Sun;Ruiqi Guo;Sanjiv Kumar", "authorids": "~Philip_Sun1;~Ruiqi_Guo3;~Sanjiv_Kumar1", "gender": ";M;", "homepage": ";http://aqua.cs.uiuc.edu/site/;http://www.sanjivk.com/", "dblp": "280/1666;78/7198;", "google_scholar": "K-GJnwIAAAAJ;Cgb68qkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Philip_Sun1;~Ruiqi_Guo3;~Sanjiv_Kumar1", "aff": "Google;Google;Google", "aff_domain": "google.com;google.com;google.com", "position": "Researcher;Researcher;Research Scientist", "bibtex": "@inproceedings{\nsun2023automating,\ntitle={Automating Nearest Neighbor Search Configuration with Constrained Optimization},\nauthor={Philip Sun and Ruiqi Guo and Sanjiv Kumar},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=KfptQCEKVW4}\n}", "github": "", "project": "", "reviewers": "vMnt;F6KH;DrxR;cFiP", "pdf_size": 454027, "recommendation": "5;6;8;8", "confidence": "4;4;4;3", "correctness": "4;4;4;3", "technical_novelty": "3;3;4;4", "empirical_novelty": "3;3;4;3", "wc_summary_paper": "41;107;50;67", "wc_strength_and_weaknesses": "336;64;83;228", "wc_clarity_quality_novelty_and_reproducibility": "17;336;70;40", "wc_summary_review": "27;63;45;29", "wc_review": "421;570;248;364", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "390;766;29;163", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 66.25, 25.31180554602931 ], "wc_strength_and_weaknesses_avg": [ 177.75, 111.22584007324916 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 115.75, 128.54255132056466 ], "wc_summary_review_avg": [ 41.0, 14.491376746189438 ], "wc_review_avg": [ 400.75, 115.90809937187306 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 337.0, 279.2803251215524 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5555555555555555, "corr_recommendation_correctness": -0.5555555555555555, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3644141297895357289&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=KfptQCEKVW4", "email": "google.com;google.com;google.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Khh7jHEJJFX", "title": "Uncertainty-Driven Active Vision for Implicit Scene Reconstruction", "track": "main", "status": "Withdraw", "tldr": "We use neural rendering to approximate the observable uncertainty of an occupancy based scene reconstruction model, which we use to select camera parameters for a next-best-view task.", "abstract": "Multi-view implicit scene reconstruction methods have become increasingly popular due to their ability to represent complex scene details. Recent efforts have been devoted to improving the representation of input information and to reducing the number of views required to obtain high quality reconstructions. Yet, perhaps surprisingly, the study of which views to select to maximally improve scene understanding remains largely unexplored. We propose an uncertainty-driven active vision approach for implicit scene reconstruction, which leverages occupancy uncertainty accumulated across the scene using volume rendering to select the next view to acquire. To this end, we develop an occupancy-based reconstruction method which accurately represents scenes using either 2D or 3D supervision. We evaluate our proposed approach on the ABC dataset and the in the wild CO3D dataset, and show that: (1) we are able to obtain high quality state-of-the-art occupancy reconstructions; (2) our perspective conditioned uncertainty definition is effective to drive improvements in next best view selection and outperforms strong baseline approaches; and (3) we can further improve shape understanding by performing a gradient-based search on the view selection candidates. Overall, our results highlight the importance of view selection for implicit scene reconstruction, making it a promising avenue to explore further.", "keywords": "Neural Rendering;3D Reconstruction;Scene Reconstruction;Next Best View;Uncertainty Estimation", "primary_area": "", "supplementary_material": "/attachment/66c2c51f2c5ecd81790ed13c0bfdaa5676755feb.zip", "author": "Edward J. Smith;Michal Drozdzal;Derek Nowrouzezahrai;David Meger;Adriana Romero-Soriano", "authorids": "~Edward_J._Smith1;~Michal_Drozdzal1;~Derek_Nowrouzezahrai1;~David_Meger2;~Adriana_Romero-Soriano1", "gender": "M;M;Not Specified;M;F", "homepage": "https://edwardsmith1884.github.io/;;https://www.cim.mcgill.ca/~derek/;http://www.cim.mcgill.ca/~dmeger/;https://sites.google.com/site/adriromsor/home", "dblp": "45/6211;24/9794;30/4225;51/3415.html;54/10771", "google_scholar": "FUUlY5wAAAAJ;https://scholar.google.ca/citations?user=XK_ktwQAAAAJ;https://scholar.google.ca/citations?user=nCZ2PMcAAAAJ;https://scholar.google.com.tw/citations?user=gFwEytkAAAAJ;https://scholar.google.ca/citations?user=Sm15FXIAAAAJ", "orcid": ";;;;", "linkedin": "edward-james-smith-721754b2/;;;;https://ca.linkedin.com/in/adriana-romero-a6415123", "or_profile": "~Edward_J._Smith1;~Michal_Drozdzal1;~Derek_Nowrouzezahrai1;~David_Meger2;~Adriana_Romero1", "aff": "McGill University;Meta;McGill University;McGill University;Meta", "aff_domain": "mcgill.ca;fb.com;mcgill.ca;mcgill.ca;meta.com", "position": "PhD student;Research Scientst;Full Professor;Associate Professor;Research Scientist", "bibtex": "@misc{\nsmith2023uncertaintydriven,\ntitle={Uncertainty-Driven Active Vision for Implicit Scene Reconstruction},\nauthor={Edward J. Smith and Michal Drozdzal and Derek Nowrouzezahrai and David Meger and Adriana Romero-Soriano},\nyear={2023},\nurl={https://openreview.net/forum?id=Khh7jHEJJFX}\n}", "github": "", "project": "", "reviewers": "jS2x;PPin;D7AG;ZqDZ", "site": "https://openreview.net/forum?id=Khh7jHEJJFX", "pdf_size": 6312150, "recommendation": "3;3;5;5", "confidence": "4;2;3;4", "correctness": "1;2;3;3", "technical_novelty": "2;1;2;2", "empirical_novelty": "1;0;2;2", "wc_summary_paper": "65;44;60;63", "wc_strength_and_weaknesses": "217;238;273;186", "wc_clarity_quality_novelty_and_reproducibility": "10;33;88;43", "wc_summary_review": "30;13;67;29", "wc_review": "322;328;488;321", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 58.0, 8.276472678623424 ], "wc_strength_and_weaknesses_avg": [ 228.5, 31.65833223655346 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.5, 28.341665441536776 ], "wc_summary_review_avg": [ 34.75, 19.803724397193573 ], "wc_review_avg": [ 364.75, 71.20875999482087 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.30151134457776363, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13505769987190600745&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;0;1", "aff_unique_norm": "McGill University;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.mcgill.ca;https://meta.com", "aff_unique_abbr": "McGill;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "Canada;United States" }, { "title": "Scaling Pareto-Efficient Decision Making via Offline Multi-Objective RL", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11257", "id": "Ki4ocDm364", "poster": "/media/PosterPDFs/ICLR%202023/11257.png?t=1680814838.1065722", "openreview": "https://openreview.net/forum?id=Ki4ocDm364", "slides": "https://iclr.cc/virtual/2023/poster/11257", "video": "https://iclr.cc/virtual/2023/poster/11257", "author_site": "Baiting Zhu, Meihua Dang, Aditya Grover", "tldr": "We introduce new dataset & benchmarks and propose new algorithms for offline Multi-Objective Reinforcement Learning (MORL)", "abstract": "The goal of multi-objective reinforcement learning (MORL) is to learn policies that simultaneously optimize multiple competing objectives. In practice, an agent's preferences over the objectives may not be known apriori, and hence, we require policies that can generalize to arbitrary preferences at test time. In this work, we propose a new data-driven setup for offline MORL, where we wish to learn a preference-agnostic policy agent using only a finite dataset of offline demonstrations of other agents and their preferences. The key contributions of this work are two-fold. First, we introduce D4MORL, (D)atasets for MORL that are specifically designed for offline settings. It contains 1.8 million annotated demonstrations obtained by rolling out reference policies that optimize for randomly sampled preferences on 6 MuJoCo environments with 2-3 objectives each. Second, we propose Pareto-Efficient Decision Agents (PEDA), a family of offline MORL algorithms that builds and extends Decision Transformers via a novel preference-and-return-conditioned policy. Empirically, we show that PEDA closely approximates the behavioral policy on the D4MORL benchmark and provides an excellent approximation of the Pareto-front with appropriate conditioning, as measured by the hypervolume and sparsity metrics. ", "keywords": "Reinforcement Learning;Offline Reinforcement Learning;Multi-Objective Reinforcement Learning;Decision Transformer;Sequential Decision Making", "primary_area": "", "supplementary_material": "/attachment/f4b45a45477de686d5416f2591c1e51ab2ccde3f.zip", "author": "Baiting Zhu;Meihua Dang;Aditya Grover", "authorids": "~Baiting_Zhu1;~Meihua_Dang1;~Aditya_Grover1", "gender": "M;F;M", "homepage": "https://baitingzbt.github.io/;https://cs.stanford.edu/~mhdang/;https://aditya-grover.github.io", "dblp": ";270/9145;162/5052", "google_scholar": "SDAr2FEAAAAJ;TiZrG7IAAAAJ;oOhnPUgAAAAJ", "orcid": ";;", "linkedin": "baitingzbt/;;", "or_profile": "~Baiting_Zhu1;~Meihua_Dang1;~Aditya_Grover1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;ucla.edu;ucla.edu", "position": "Undergrad student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nzhu2023scaling,\ntitle={Scaling Pareto-Efficient Decision Making via Offline Multi-Objective {RL}},\nauthor={Baiting Zhu and Meihua Dang and Aditya Grover},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Ki4ocDm364}\n}", "github": "", "project": "", "reviewers": "wtvp;6PPS;oRVV;gXRB", "pdf_size": 1772351, "recommendation": "5;6;6;8", "confidence": "4;4;4;3", "correctness": "3;3;4;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;0;4", "wc_summary_paper": "108;94;112;85", "wc_strength_and_weaknesses": "85;116;196;445", "wc_clarity_quality_novelty_and_reproducibility": "60;38;21;11", "wc_summary_review": "87;10;31;68", "wc_review": "340;258;360;609", "wc_reply_reviewers": "0;0;28;0", "wc_reply_authors": "322;412;437;656", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 99.75, 10.825317547305483 ], "wc_strength_and_weaknesses_avg": [ 210.5, 141.31613495988347 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.5, 18.580904176062045 ], "wc_summary_review_avg": [ 49.0, 30.20761493398643 ], "wc_review_avg": [ 391.75, 131.12279550101118 ], "wc_reply_reviewers_avg": [ 7.0, 12.12435565298214 ], "wc_reply_authors_avg": [ 456.75, 122.73014095974958 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9271726499455306, "corr_recommendation_correctness": -0.13245323570650439, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3253066489565592188&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=Ki4ocDm364", "email": "ucla.edu;ucla.edu;ucla.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "KiT3-iN8wHJ", "title": "Uncertainty and Traffic Light Aware Pedestrian Crossing Intention Prediction", "track": "main", "status": "Reject", "tldr": "We improve pedestrian crossing intention model performance and robustness using traffic light status and predicting uncertainty estimation.", "abstract": "Predicting Vulnerable Road User (VRU) crossing intention is one of the major challenges in automated driving. Crossing intention prediction systems trained only on pedestrian features underperform in situations that are most obvious to humans, as the latter take additional context features into consideration. Moreover, such systems tend to be over-confident for out-of-distribution samples, therefore making them less reliable to be used by downstream tasks like sensor fusion and trajectory planning for automated vehicles. In this work, we demonstrate that the results of crossing intention prediction systems can be improved by incorporating traffic light status as an additional input. Further, we make the model robust and interpretable by estimating uncertainty. Experiments on the PIE dataset show that the F1-score improved from 0.77 to 0.82 and above for three different baseline systems when considering traffic-light context. By adding uncertainty, we show increased uncertainty values for out-of-distribution samples, therefore leading to interpretable and reliable predictions of crossing intention.", "keywords": "deep learning;computer vision;recurrent neural networks;uncertainty estimation;intention prediction;attention mechanism;autonomous driving", "primary_area": "", "supplementary_material": "", "author": "Minali Upreti;Jayanth Ramesh;Chandan R Kumar;Bodhisattwa Chakraborty;VIKRAM BALISAVIRA;Phillip Czech;Vitali Kaiser;Markus Roth", "authorids": "~Minali_Upreti1;~Jayanth_Ramesh1;~Chandan_R_Kumar1;~Bodhisattwa_Chakraborty1;~VIKRAM_BALISAVIRA1;~Phillip_Czech1;~Vitali_Kaiser1;~Markus_Roth1", "gender": "F;M;M;M;M;M;M;M", "homepage": ";;;;;;;https://intelligent-vehicles.org/people/", "dblp": ";;;;;;;", "google_scholar": "https://scholar.google.nl/citations?user=wmhqmx8AAAAJ;;9QibE1YAAAAJ;lZPyLqoAAAAJ;;https://scholar.google.com/citations?hl=de;;https://scholar.google.de/citations?user=mByY0v4AAAAJ", "orcid": ";;;0000-0003-2230-178X;;;;0000-0002-9041-1414", "linkedin": ";jayanth-ramesh-vasisht-18b5674b/;;bodhisattwa;vikram-balisavira-12605722;;vitalikaiser/;", "or_profile": "~Minali_Upreti1;~Jayanth_Ramesh1;~Chandan_R_Kumar1;~Bodhisattwa_Chakraborty1;~VIKRAM_BALISAVIRA1;~Phillip_Czech1;~Vitali_Kaiser1;~Markus_Roth1", "aff": "Mercedes Benz Research & Development;;Mercedes Benz Research & Development;Mercedes Benz Research & Development;;Universit\u00e4t Stuttgart;Mercedes-Benz AG;Delft University of Technology", "aff_domain": "daimler.com;;daimler.com;daimler.com;;uni-stuttgart.de;mercedes-benz.com;tudelft.nl", "position": "Researcher;;Researcher;Researcher;;PhD student;Researcher;PhD student", "bibtex": "@misc{\nupreti2023uncertainty,\ntitle={Uncertainty and Traffic Light Aware Pedestrian Crossing Intention Prediction},\nauthor={Minali Upreti and Jayanth Ramesh and Chandan R Kumar and Bodhisattwa Chakraborty and VIKRAM BALISAVIRA and Phillip Czech and Vitali Kaiser and Markus Roth},\nyear={2023},\nurl={https://openreview.net/forum?id=KiT3-iN8wHJ}\n}", "github": "", "project": "", "reviewers": "MM9u;ZTh8;DUfQ", "site": "https://openreview.net/forum?id=KiT3-iN8wHJ", "pdf_size": 2704645, "recommendation": "3;3;5", "confidence": "5;3;4", "correctness": "3;3;3", "technical_novelty": "1;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "63;25;85", "wc_strength_and_weaknesses": "31;124;239", "wc_clarity_quality_novelty_and_reproducibility": "9;8;44", "wc_summary_review": "381;15;16", "wc_review": "484;172;384", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 57.666666666666664, 24.78350706058814 ], "wc_strength_and_weaknesses_avg": [ 131.33333333333334, 85.0738241502964 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 20.333333333333332, 16.73983937265296 ], "wc_summary_review_avg": [ 137.33333333333334, 172.29883600561232 ], "wc_review_avg": [ 346.6666666666667, 130.08031706945093 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7635758647745881800&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2;3", "aff_unique_norm": "Mercedes-Benz Research & Development;University of Stuttgart;Mercedes-Benz AG;Delft University of Technology", "aff_unique_dep": "Research & Development;;;", "aff_unique_url": "https://www.mercedes-benz.com;https://www.uni-stuttgart.de;https://www.mercedes-benz.com;https://www.tudelft.nl", "aff_unique_abbr": "MB R&D;Uni Stuttgart;MBAG;TU Delft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "Germany;Netherlands" }, { "id": "Ki_26lfEmey", "title": "Joint Attention-Driven Domain Fusion and Noise-Tolerant Learning for Multi-Source Domain Adaptation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Multi-source Unsupervised Domain Adaptation (MUDA) transfers knowledge from multiple source domains with labeled data to an unlabeled target domain.\nRecently, endeavours have been made in establishing connections among different domains to enable feature interaction. However, these approaches essentially enhance category information and thus lack the transfer of the domain-specific information. Moreover, few research has explored the connection between pseudo-label generation and the framework\u2019s learning capabilities, crucial for ensuring robust MUDA. In this paper, we propose a novel framework, which significantly reduces the domain discrepancy and demonstrates new state-of-the-art performance. In particular, we first propose a Contrary Attention-based Domain Merge (CADM) module to enable the interaction among the features so as to achieve the mixture of domain-specific information instead of focusing on the category information. Secondly, to enable the network to correct the pseudo labels during training, we propose an adaptive and reverse cross-entropy loss, which can adaptively impose constraints on the pseudo-label generation process. We conduct experiments on four benchmark datasets, showing that our approach can efficiently fuse all domains for MUDA while showing much better performance than the prior methods.", "keywords": "Multi-source Unsupervised Domain Adaptation;Attention Mechanism;Noisy Label Learning", "primary_area": "", "supplementary_material": "/attachment/0d64ab522490251b45806d4b5c45e477903850ec.zip", "author": "Tong Xu;Lin Wang;Wu Ning;Chunyan Lyu;Kejun Wang", "authorids": "~Tong_Xu4;~Lin_Wang2;~Wu_Ning1;~Chunyan_Lyu1;~Kejun_Wang1", "gender": "M;M;M;;M", "homepage": ";https://dr.ntu.edu.sg/cris/rp/rp02550;https://github.com/tomFoxxxx;http://www.hrbeu.edu.cn/;http://www.hrbeu.edu.cn/", "dblp": ";;;;", "google_scholar": ";SReb2csAAAAJ;;;", "orcid": "0000-0002-4382-4988;0000-0002-7485-4493;;;", "linkedin": ";;https://www.linkedin.cn/incareer/in/ACoAADMYRq4BCfbNkbFXrZzClx3PejuQoo41Ab0;;", "or_profile": "~Tong_Xu4;~Lin_Wang2;~Wu_Ning1;~Chunyan_Lyu1;~Kejun_Wang1", "aff": "Harbin Engineering University;Hong Kong University of Science and Technology;Huawei Technologies Ltd.;;Harbin Engineering University", "aff_domain": "hrbeu.edu.cn;ust.hk;huawei.com;;hrbeu.edu.cn", "position": "MS student;Assistant Professor;Researcher;;Full Professor", "bibtex": "@misc{\nxu2023joint,\ntitle={Joint Attention-Driven Domain Fusion and Noise-Tolerant Learning for Multi-Source Domain Adaptation},\nauthor={Tong Xu and Lin Wang and Wu Ning and Chunyan Lyu and Kejun Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=Ki_26lfEmey}\n}", "github": "", "project": "", "reviewers": "buhi;eMFp;javN;2VJ6", "site": "https://openreview.net/forum?id=Ki_26lfEmey", "pdf_size": 2388543, "recommendation": "3;5;5;8", "confidence": "4;3;3;3", "correctness": "3;2;4;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "81;104;102;59", "wc_strength_and_weaknesses": "245;397;262;182", "wc_clarity_quality_novelty_and_reproducibility": "58;45;20;21", "wc_summary_review": "22;55;9;37", "wc_review": "406;601;393;299", "wc_reply_reviewers": "0;0;0;49", "wc_reply_authors": "880;1164;1114;886", "reply_reviewers": "0;0;0;1", "reply_authors": "2;3;2;2", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 86.5, 18.255136263528684 ], "wc_strength_and_weaknesses_avg": [ 271.5, 78.346984626085 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.0, 16.170961628796228 ], "wc_summary_review_avg": [ 30.75, 17.151894939043906 ], "wc_review_avg": [ 424.75, 109.8143319426021 ], "wc_reply_reviewers_avg": [ 12.25, 21.21762239271875 ], "wc_reply_authors_avg": [ 1011.0, 129.23234889144436 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.7276068751089989, "corr_recommendation_correctness": 0.46442036401282394, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7246392883936366148&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Harbin Engineering University;Hong Kong University of Science and Technology;Huawei", "aff_unique_dep": ";;Huawei Technologies", "aff_unique_url": "http://www.heu.edu.cn;https://www.ust.hk;https://www.huawei.com", "aff_unique_abbr": "HEU;HKUST;Huawei", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "KjKZaJ5Gbv", "title": "Efficient Multi-Task Reinforcement Learning via Selective Behavior Sharing", "track": "main", "status": "Withdraw", "tldr": "Sharing behaviors between tasks to improve exploration for multitask reinforcement learning.", "abstract": "The ability to leverage shared behaviors between tasks is critical for sample efficient multi-task reinforcement learning (MTRL). Prior approaches based on parameter sharing or policy distillation share behaviors uniformly across tasks and states or focus on learning one optimal policy. Therefore, they are fundamentally limited when tasks have conflicting behaviors because no one optimal policy exists. Our key insight is that, we can instead share exploratory behavior which can be helpful even when the optimal behaviors differ. Furthermore, as we learn each task, we can guide the exploration by sharing behaviors in a task and state dependent way. To this end, we propose a novel MTRL method, Q-switch Mixture of policies (QMP), that learns to selectively shares exploratory behavior between tasks by using a mixture of policies based on estimated discounted returns to gather training data. Experimental results in manipulation and locomotion tasks demonstrate that our method outperforms prior behavior sharing methods, highlighting the importance of task and state dependent sharing. ", "keywords": "Reinforcement Learning;Multitask Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Grace Zhang;Ayush Jain;Injune Hwang;Shao-Hua Sun;Joseph J Lim", "authorids": "~Grace_Zhang1;~Ayush_Jain2;~Injune_Hwang1;~Shao-Hua_Sun1;~Joseph_J_Lim1", "gender": "F;;M;M;M", "homepage": "https://gracehzhang.github.io/;https://ayushj240.github.io/;;http://shaohua0116.github.io;http://people.csail.mit.edu/lim/", "dblp": "13/2999;131/6283-3.html;260/3346;158/9680;08/3086", "google_scholar": ";-zEc_sAAAAAJ;haW9gXcAAAAJ;uXsfnaQAAAAJ;jTnQTBoAAAAJ", "orcid": ";;;0000-0001-7579-6734;", "linkedin": ";;;shaohua0116/;", "or_profile": "~Grace_Zhang1;~Ayush_Jain2;~Injune_Hwang1;~Shao-Hua_Sun1;~Joseph_J_Lim1", "aff": "University of Southern California;University of Southern California;Korea Advanced Institute of Science & Technology;National Taiwan University;Korea Advanced Institute of Science & Technology", "aff_domain": "usc.edu;usc.edu;kaist.edu;ntu.edu.tw;kaist.ac.kr", "position": "PhD student;PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@misc{\nzhang2023efficient,\ntitle={Efficient Multi-Task Reinforcement Learning via Selective Behavior Sharing},\nauthor={Grace Zhang and Ayush Jain and Injune Hwang and Shao-Hua Sun and Joseph J Lim},\nyear={2023},\nurl={https://openreview.net/forum?id=KjKZaJ5Gbv}\n}", "github": "", "project": "", "reviewers": "WkeY;BK7Q;AKUS", "site": "https://openreview.net/forum?id=KjKZaJ5Gbv", "pdf_size": 5518446, "recommendation": "3;3;5", "confidence": "4;4;5", "correctness": "2;2;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;1;2", "wc_summary_paper": "75;40;105", "wc_strength_and_weaknesses": "669;84;421", "wc_clarity_quality_novelty_and_reproducibility": "30;42;37", "wc_summary_review": "38;21;27", "wc_review": "812;187;590", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 73.33333333333333, 26.562295750848715 ], "wc_strength_and_weaknesses_avg": [ 391.3333333333333, 239.7447716959758 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.333333333333336, 4.921607686744467 ], "wc_summary_review_avg": [ 28.666666666666668, 7.039570693980958 ], "wc_review_avg": [ 529.6666666666666, 258.6971631507045 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14235104083531205494&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;2;1", "aff_unique_norm": "University of Southern California;Korea Advanced Institute of Science and Technology;National Taiwan University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.usc.edu;https://www.kaist.ac.kr;https://www.ntu.edu.tw", "aff_unique_abbr": "USC;KAIST;NTU", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Los Angeles;;Taiwan", "aff_country_unique_index": "0;0;1;2;1", "aff_country_unique": "United States;South Korea;China" }, { "id": "KjzZrBsORz", "title": "Towards Generalized Combinatorial Solvers via Reward Adjustment Policy Optimization", "track": "main", "status": "Withdraw", "tldr": "Towards Generalized Combinatorial Solvers via Reward Adjustment Policy Optimization", "abstract": "Recent reinforcement learning approaches have achieved impressive success in solving combinatorial optimization (CO) problems. However, most existing works focus on evaluating their solvers under a prevalent fixed-size protocol, ignoring generalization to differentRecent reinforcement learning approaches have achieved impressive success in solving combinatorial optimization (CO) problems. However, most existing works focus on evaluating their solvers under a prevalent fixed-size protocol, ignoring generalization to different-size instances. When the solver is confronted with instances of the size it has not been trained on, the performance drops dramatically. In practice, these approaches that lack size-insensitive generalization capacities are unacceptable since an additional training period is repeated for each new instance size. We observe the main obstacle preventing us from training a generalized combinatorial solver is oscillating reward signals. Reward oscillation mainly includes two sides: 1) The conventional reward fails to depict the actual performance of solvers for different instance sizes. 2) The inherent difficulties varying across different sizes worsen training stability. Thus, we present Reward Adjustment Policy Optimization (RAPO), an end-to-end approach to building combinatorial solvers for a wide range of CO problems. RAPO contains a reward adjustment method across instances with variable sizes to address the first side of reward oscillation, along with a promising curriculum strategy to alleviate another side. We conduct experiments on three popular CO problems, namely, the traveling salesman problem (TSP), the capacitated vehicle routing problem (CVRP), and the 0-1 knapsack problem (KP). RAPO exhibits significant improvement in generalization to instances with variable sizes consistently on all benchmarks. Remarkably, RAPO even outperforms its fixed-size counterparts in its well-trained size by a clear margin. size instances. When the solver is confronted with instances of the size it has not been trained on, the performance drops dramatically. In practice, these approaches that lack size-insensitive generalization capacities are unacceptable since an additional training period is repeated for each new instance size. We observe the main obstacle preventing us from training a generalized combinatorial solver is oscillating reward signals. Reward oscillation mainly includes two sides: 1) The conventional reward fails to depict the actual performance of solvers for different instance sizes. 2) The inherent difficulties varying across different sizes worsen training stability. Thus, we present Reward Adjustment Policy Optimization (RAPO), an end-to-end approach to building combinatorial solvers for a wide range of CO problems. RAPO contains a reward adjustment method across instances with variable sizes to address the first side of reward oscillation, along with a promising curriculum strategy to alleviate another side. We conduct experiments on three popular CO problems, namely, the traveling salesman problem (TSP), the capacitated vehicle routing problem (CVRP), and the 0-1 knapsack problem (KP). RAPO exhibits significant improvement in generalization to instances with variable sizes consistently on all benchmarks. Remarkably, RAPO even outperforms its fixed-size counterparts in its well-trained size by a clear margin. ", "keywords": "combinatorial optimization;reinforcement learning;traveling salesman problem;vehicle routing problem", "primary_area": "", "supplementary_material": "", "author": "Jincheng Zhong;Haoyu Ma;Jianmin Wang;Mingsheng Long", "authorids": "~Jincheng_Zhong1;~Haoyu_Ma3;~Jianmin_Wang1;~Mingsheng_Long5", "gender": "M;;M;", "homepage": ";;https://www.thss.tsinghua.edu.cn/en/faculty/jianminwang.htm;", "dblp": "257/2831;;06/3456-1.html;", "google_scholar": ";;https://scholar.google.com.tw/citations?user=MiovcboAAAAJ;", "orcid": ";;0000-0001-6841-7943;", "linkedin": ";;;", "or_profile": "~Jincheng_Zhong1;~Haoyu_Ma3;~Jianmin_Wang1;~Mingsheng_Long5", "aff": "Tsinghua University;;Tsinghua University;", "aff_domain": "tsinghua.edu.cn;;tsinghua.edu.cn;", "position": "PhD student;;Full Professor;", "bibtex": "@misc{\nzhong2023towards,\ntitle={Towards Generalized Combinatorial Solvers via Reward Adjustment Policy Optimization},\nauthor={Jincheng Zhong and Haoyu Ma and Jianmin Wang and Mingsheng Long},\nyear={2023},\nurl={https://openreview.net/forum?id=KjzZrBsORz}\n}", "github": "", "project": "", "reviewers": "VKLv;d5gn;5XbG;GdMy", "site": "https://openreview.net/forum?id=KjzZrBsORz", "pdf_size": 1895869, "recommendation": "1;3;5;5", "confidence": "4;5;3;4", "correctness": "2;2;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "1;1;2;3", "wc_summary_paper": "150;89;292;198", "wc_strength_and_weaknesses": "362;292;699;121", "wc_clarity_quality_novelty_and_reproducibility": "312;55;114;774", "wc_summary_review": "40;11;100;143", "wc_review": "864;447;1205;1236", "wc_reply_reviewers": "0;116;66;53", "wc_reply_authors": "475;358;293;483", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.5, 1.6583123951777 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 182.25, 74.2104271649207 ], "wc_strength_and_weaknesses_avg": [ 368.5, 209.9886901716376 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 313.75, 282.260850101462 ], "wc_summary_review_avg": [ 73.5, 51.383363066268835 ], "wc_review_avg": [ 938.0, 318.84557390686797 ], "wc_reply_reviewers_avg": [ 58.75, 41.27574953892418 ], "wc_reply_authors_avg": [ 402.25, 80.16662335411165 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.42640143271122083, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SeDvi5PfTbQJ:scholar.google.com/&scioq=Towards+Generalized+Combinatorial+Solvers+via+Reward+Adjustment+Policy+Optimization&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "Kk-kJl9fmm", "title": "The Power of Feel-Good Thompson Sampling: A Unified Framework for Linear Bandits", "track": "main", "status": "Reject", "tldr": "", "abstract": "Linear contextual bandit is one of the most popular models in online decision-making with bandit feedback. Prior work has studied different variants of this model, e.g., misspecified, non-stationary, and multi-task/life-long linear contextual bandits. However, there is no single framework that can unify the algorithm design and analysis for these variants. In this paper, we propose a unified framework for linear contextual bandits based on feel-good Thompson sampling (Zhang, 2021). The algorithm derived from our framework achieves nearly minimax optimal regret in various settings and resolves the respective open problem in each setting. Specifically, let $d$ be the dimension of the context and $T$ be the length of the horizon, our algorithm achieves an $\\widetilde{\\mathcal{O}}(d\\sqrt{ST})$ regret bound for non-stationary linear bandits with at most $S$ switches, $\\widetilde{\\mathcal{O}}(d^{\\frac{5}{6}} T^{\\frac{2}{3}} P^{\\frac{1}{3}})$ regret for non-stationary linear bandits with bounded path length $P$, and $\\widetilde{\\mathcal{O}}(d\\sqrt{kT} + \\sqrt{dkMT})$ regret for (generalized) lifelong linear bandits over $M$ tasks that share an unknown representation of dimension $k$. We believe our framework will shed light on the design and analysis of other linear contextual bandit variants.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/d85ebf2e79ab3c63d7738841e51245405f228470.zip", "author": "Zhiyuan Fan;Quanquan Gu", "authorids": "~Zhiyuan_Fan1;~Quanquan_Gu1", "gender": "M;M", "homepage": "https://fan-zhiyuan.org/;http://web.cs.ucla.edu/~qgu/", "dblp": ";50/4597", "google_scholar": ";GU9HgNAAAAAJ", "orcid": "0000-0001-7468-0895;", "linkedin": ";", "or_profile": "~Zhiyuan_Fan1;~Quanquan_Gu1", "aff": "Tsinghua University;University of California, Los Angeles", "aff_domain": "tsinghua.edu.cn;cs.ucla.edu", "position": "Undergrad student;Associate Professor", "bibtex": "@misc{\nfan2023the,\ntitle={The Power of Feel-Good Thompson Sampling: A Unified Framework for Linear Bandits},\nauthor={Zhiyuan Fan and Quanquan Gu},\nyear={2023},\nurl={https://openreview.net/forum?id=Kk-kJl9fmm}\n}", "github": "", "project": "", "reviewers": "oQHX;vpXj;iGav", "site": "https://openreview.net/forum?id=Kk-kJl9fmm", "pdf_size": 1310778, "recommendation": "5;5;6", "confidence": "3;3;4", "correctness": "4;3;4", "technical_novelty": "3;2;3", "empirical_novelty": "0;0;0", "wc_summary_paper": "70;44;52", "wc_strength_and_weaknesses": "79;345;133", "wc_clarity_quality_novelty_and_reproducibility": "21;30;22", "wc_summary_review": "16;23;50", "wc_review": "186;442;257", "wc_reply_reviewers": "0;35;0", "wc_reply_authors": "103;750;234", "reply_reviewers": "0;1;0", "reply_authors": "2;3;1", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 55.333333333333336, 10.873004286866728 ], "wc_strength_and_weaknesses_avg": [ 185.66666666666666, 114.80224542906622 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 24.333333333333332, 4.027681991198191 ], "wc_summary_review_avg": [ 29.666666666666668, 14.65908895153068 ], "wc_review_avg": [ 295.0, 107.91045670678382 ], "wc_reply_reviewers_avg": [ 11.666666666666666, 16.49915822768611 ], "wc_reply_authors_avg": [ 362.3333333333333, 279.28997276824833 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.9999999999999997, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1378859802690424952&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Tsinghua University;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.ucla.edu", "aff_unique_abbr": "THU;UCLA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "id": "KkI8sjKqtnV", "title": "Fair Federated Learning via Bounded Group Loss", "track": "main", "status": "Reject", "tldr": "", "abstract": "In federated learning, fair prediction across protected groups is an important constraint for many applications. Unfortunately, prior work studying group fair federated learning lacks formal convergence or fairness guarantees. In this work we propose a general framework for provably fair federated learning. In particular, we explore and extend the notion of Bounded Group Loss as a theoretically-grounded approach for group fairness. Using this setup, we propose a scalable federated optimization method that optimizes the empirical risk under a number of group fairness constraints. We provide convergence guarantees for the method as well as fairness guarantees for the resulting solution. Empirically, we evaluate our method across common benchmarks from fair ML and federated learning, showing that it can provide both fairer and more accurate predictions than baseline approaches.", "keywords": "Federated Learning;Group Fairness", "primary_area": "", "supplementary_material": "/attachment/1cf4d6dd341924b8f841547d12410294e7d9d9bb.zip", "author": "Shengyuan Hu;Steven Wu;Virginia Smith", "authorids": "~Shengyuan_Hu2;~Steven_Wu1;~Virginia_Smith1", "gender": ";F;M", "homepage": ";;https://zstevenwu.com/", "dblp": "226/6584-1;120/0921;137/8350", "google_scholar": "m_ZHHToAAAAJ;;MbF6rTEAAAAJ", "orcid": ";;", "linkedin": ";;zstevenwu/", "or_profile": "~Shengyuan_Hu2;~Virginia_Smith1;~Zhiwei_Steven_Wu1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@misc{\nhu2023fair,\ntitle={Fair Federated Learning via Bounded Group Loss},\nauthor={Shengyuan Hu and Steven Wu and Virginia Smith},\nyear={2023},\nurl={https://openreview.net/forum?id=KkI8sjKqtnV}\n}", "github": "", "project": "", "reviewers": "KBz5;SyJQ;juWF;ScJT", "site": "https://openreview.net/forum?id=KkI8sjKqtnV", "pdf_size": 674326, "recommendation": "3;3;3;6", "confidence": "3;4;4;4", "correctness": "2;2;3;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "45;12;100;61", "wc_strength_and_weaknesses": "360;216;183;133", "wc_clarity_quality_novelty_and_reproducibility": "67;5;8;31", "wc_summary_review": "30;4;24;45", "wc_review": "502;237;315;270", "wc_reply_reviewers": "0;0;0;120", "wc_reply_authors": "490;514;245;627", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;2;2", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 54.5, 31.65833223655346 ], "wc_strength_and_weaknesses_avg": [ 223.0, 84.43636657270373 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.75, 24.79289212657531 ], "wc_summary_review_avg": [ 25.75, 14.703315952532613 ], "wc_review_avg": [ 331.0, 102.53535975457442 ], "wc_reply_reviewers_avg": [ 30.0, 51.96152422706632 ], "wc_reply_authors_avg": [ 469.0, 139.2892673539494 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.8703882797784892, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10014171068747541779&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Out-of-Distribution Detection based on In-Distribution Data Patterns Memorization with Modern Hopfield Energy", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11256", "id": "KkazG4lgKL", "poster": "/media/PosterPDFs/ICLR%202023/11256.png?t=1681202269.2315004", "openreview": "https://openreview.net/forum?id=KkazG4lgKL", "slides": "https://iclr.cc/virtual/2023/poster/11256", "video": "https://iclr.cc/virtual/2023/poster/11256", "author_site": "Jinsong Zhang, Qiang Fu, Xu Chen, Lun Du, Zelin Li, Gang Wang, Xiaoguang Liu, Shi Han, Dongmei Zhang", "tldr": "We propose a novel out-of-distribution detection method motivated by Modern Hopfield Energy, and futhur derive a simplified version that is effective, efficient and hyperparameter-free.", "abstract": "Out-of-Distribution (OOD) detection is essential for safety-critical applications of deep neural networks. OOD detection is challenging since DNN models may produce very high logits value even for OOD samples. Hence, it is of great difficulty to discriminate OOD data by directly adopting Softmax on output logits as the confidence score. Differently, we detect the OOD sample with Hopfield energy in a store-then-compare paradigm. In more detail, penultimate layer outputs on the training set are considered as the representations of in-distribution (ID) data. Thus they can be transformed into stored patterns that serve as anchors to measure the discrepancy of unseen data for OOD detection. Starting from the energy function defined in Modern Hopfield Network for the discrepancy score calculation, we derive a simplified version SHE with theoretical analysis. In SHE, we utilize only one stored pattern to present each class, and these patterns can be obtained by simply averaging the penultimate layer outputs of training samples within this class. SHE has the advantages of hyperparameterfree\nand high computational efficiency. The evaluations of nine widely-used OOD datasets show the promising performance of such a simple yet effective approach and its superiority over State-of-the-Art models. Code is available at https://github.com/zjs975584714/SHE ood detection.", "keywords": "Out-of-Distribution detection;Hopfield Energy;Hyperparameter-Free", "primary_area": "", "supplementary_material": "/attachment/dba009416f8215c570d1710796737eebbcd8f821.zip", "author": "Jinsong Zhang;Qiang Fu;Xu Chen;Lun Du;Zelin Li;Gang Wang;xiaoguang Liu;Shi Han;Dongmei Zhang", "authorids": "~Jinsong_Zhang5;~Qiang_Fu7;~Xu_Chen18;~Lun_Du1;lizlcycy@gmail.com;~Gang_Wang8;~xiaoguang_Liu3;~Shi_Han1;~Dongmei_Zhang2", "gender": "M;M;;M;;M;;M;", "homepage": ";;;https://www.microsoft.com/en-us/research/people/ludu/;;https://cc.nankai.edu.cn/2021/0323/c13619a490377/page.htm;;https://www.microsoft.com/en-us/research/people/shihan/;https://www.microsoft.com/en-us/research/people/dongmeiz/", "dblp": ";;;213/3199;;71/4292-1;;23/3395;87/461-1", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;bwTLZSIAAAAJ;;3XUANDAAAAAJ;;;;wLabxmYAAAAJ;jLlBBl4AAAAJ", "orcid": ";0000-0002-5821-7267;;;;0000-0003-0387-2501;;0000-0002-0360-6089;0000-0002-9230-2799", "linkedin": ";qiang-fu-08301285/;;;;;;shi-han-86888526/;dongmei-zhang-38a86317/", "or_profile": "~Jinsong_Zhang5;~Qiang_Fu7;~Xu_Chen18;~Lun_Du1;lizlcycy@gmail.com;~Gang_Wang8;~xiaoguang_Liu3;~Shi_Han1;~Dongmei_Zhang2", "aff": "Nankai University;Microsoft;;Microsoft Research Asia;;Nankai University;;Microsoft;Microsoft", "aff_domain": "nankai.edu.cn;microsoft.com;;microsoft.com;;nankai.edu.cn;;microsoft.com;microsoft.com", "position": "MS student;Researcher;;Researcher;;Full Professor;;Researcher;Assistant Managing Director, Microsoft Research Asia", "bibtex": "@inproceedings{\nzhang2023outofdistribution,\ntitle={Out-of-Distribution Detection based on In-Distribution Data Patterns Memorization with Modern Hopfield Energy},\nauthor={Jinsong Zhang and Qiang Fu and Xu Chen and Lun Du and Zelin Li and Gang Wang and xiaoguang Liu and Shi Han and Dongmei Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=KkazG4lgKL}\n}", "github": "", "project": "", "reviewers": "ANA9;bCQs;boCN;sLwk", "pdf_size": 5067369, "recommendation": "6;6;6;6", "confidence": "3;3;4;4", "correctness": "3;3;3;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "29;111;16;118", "wc_strength_and_weaknesses": "256;358;354;521", "wc_clarity_quality_novelty_and_reproducibility": "24;52;13;8", "wc_summary_review": "44;43;30;128", "wc_review": "353;564;413;775", "wc_reply_reviewers": "30;18;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 68.5, 46.295248136282844 ], "wc_strength_and_weaknesses_avg": [ 372.25, 95.10093322360197 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 24.25, 17.03489066592445 ], "wc_summary_review_avg": [ 61.25, 38.931831449342326 ], "wc_review_avg": [ 526.25, 162.8977823667345 ], "wc_reply_reviewers_avg": [ 12.0, 12.727922061357855 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18426007330389610440&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=KkazG4lgKL", "email": "nankai.edu.cn;microsoft.com;;microsoft.com;;nankai.edu.cn;;microsoft.com;microsoft.com", "author_num": 9, "aff_unique_index": "0;1;1;0;1;1", "aff_unique_norm": "Nankai University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "http://www.nankai.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "NKU;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;0;0;1;1", "aff_country_unique": "China;United States" }, { "title": "Cross-Level Distillation and Feature Denoising for Cross-Domain Few-Shot Classification", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12203", "id": "Kn-HA8DFik", "poster": "/media/PosterPDFs/ICLR%202023/12203.png?t=1682262713.5737023", "openreview": "https://openreview.net/forum?id=Kn-HA8DFik", "slides": "https://iclr.cc/virtual/2023/poster/12203", "video": "https://iclr.cc/virtual/2023/poster/12203", "author_site": "Hao ZHENG, Runqi Wang, Jianzhuang Liu, Asako Kanezaki", "tldr": "We design a cross-level distillation and a feature denoising operation for handling cross-domain few-shot classification. Our approach can surpass the SOTA method by 5.44% on 1-shot and 1.37% on 5-shot classification tasks in the BSCD-FSL benchmark.", "abstract": "The conventional few-shot classification aims at learning a model on a large labeled base dataset and rapidly adapting to a target dataset that is from the same distribution as the base dataset. However, in practice, the base and the target datasets of few-shot classification are usually from different domains, which is the problem of cross-domain few-shot classification. We tackle this problem by making a small proportion of unlabeled images in the target domain accessible in the training stage. In this setup, even though the base data are sufficient and labeled, the large domain shift still makes transferring the knowledge from the base dataset difficult. We meticulously design a cross-level knowledge distillation method, which can strengthen the ability of the model to extract more discriminative features in the target dataset by guiding the network's shallow layers to learn higher-level information. Furthermore, in order to alleviate the overfitting in the evaluation stage, we propose a feature denoising operation which can reduce the feature redundancy and mitigate overfitting. Our approach can surpass the previous state-of-the-art method, Dynamic-Distillation, by 5.44% on 1-shot and 1.37% on 5-shot classification tasks on average in the BSCD-FSL benchmark. The implementation code will be available at https://gitee.com/mindspore/models/tree/master/research/cv/CLDFD.", "keywords": "cross-domain few-shot classification;cross-level distillation;feature denoising", "primary_area": "", "supplementary_material": "/attachment/c83d67940aaa85c20f6df12996bac404532c1789.zip", "author": "Hao ZHENG;Runqi Wang;Jianzhuang Liu;Asako Kanezaki", "authorids": "~Hao_ZHENG4;~Runqi_Wang2;~Jianzhuang_Liu3;~Asako_Kanezaki1", "gender": "M;M;M;F", "homepage": "https://github.com/jarucezh;;;https://kanezaki.github.io/", "dblp": ";https://dblp.uni-trier.de/pid/266/9915.html;l/JianzhuangLiu;37/7634", "google_scholar": "w7IvRBUAAAAJ;;sKauaAwAAAAJ;4lLAESYAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Hao_ZHENG4;~Runqi_Wang2;~Jianzhuang_Liu3;~Asako_Kanezaki1", "aff": "Tokyo Institute of Technology, Tokyo Institute of Technology;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Tokyo Institute of Technology", "aff_domain": "titech.ac.jp;huawei.com;huawei.com;titech.ac.jp", "position": "PhD student;Researcher;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nzheng2023crosslevel,\ntitle={Cross-Level Distillation and Feature Denoising for Cross-Domain Few-Shot Classification},\nauthor={Hao ZHENG and Runqi Wang and Jianzhuang Liu and Asako Kanezaki},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Kn-HA8DFik}\n}", "github": "", "project": "", "reviewers": "am4M;8cJ8;yRW4", "pdf_size": 29618060, "recommendation": "3;6;8", "confidence": "4;5;4", "correctness": "3;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "376;75;80", "wc_strength_and_weaknesses": "653;98;62", "wc_clarity_quality_novelty_and_reproducibility": "98;49;35", "wc_summary_review": "50;70;22", "wc_review": "1177;292;199", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1191;665;32", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 177.0, 140.72905409568654 ], "wc_strength_and_weaknesses_avg": [ 271.0, 270.51432494417 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.666666666666664, 27.010286106510527 ], "wc_summary_review_avg": [ 47.333333333333336, 19.68643074697787 ], "wc_review_avg": [ 556.0, 440.7516307400348 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 629.3333333333334, 473.8314280651108 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.11470786693528084, "corr_recommendation_correctness": 0.8029550685469661, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1010206948872859377&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=Kn-HA8DFik", "email": "titech.ac.jp;huawei.com;huawei.com;titech.ac.jp", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Tokyo Institute of Technology;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.titech.ac.jp;https://www.huawei.com", "aff_unique_abbr": "Titech;Huawei", "aff_campus_unique_index": "0", "aff_campus_unique": "Tokyo;", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Japan;China" }, { "id": "Kn43SKplAn", "title": "3D Surface Reconstruction in the Wild by Deforming Shape Priors from Synthetic Data", "track": "main", "status": "Reject", "tldr": "A method for single view 3D reconstruction without camera pose supervision", "abstract": "We present a new method for category-specific 3D reconstruction from a single image. A limitation of current color image-based 3D reconstruction models is that they do not generalize across datasets, due to domain shift. In contrast, we show that one can learn to reconstruct objects across datasets by shape priors learned from synthetic 3D data and a point cloud pose canonicalization method. Given a single depth image at test time, we first place this partial point cloud in a canonical pose. Then, we use a neural deformation field in the canonical coordinate frame to reconstruct the 3D surface of the object. Finally, we jointly optimize object pose and 3D shape to fit the partial depth observation. Our approach achieves state-of-the-art reconstruction performance across several real-world datasets, even when trained without ground truth camera poses (which are required by some of the state-of-the-art methods). We further show that our method generalizes to different input modalities, from dense depth images to sparse and noisy LIDAR scans. ", "keywords": "3D reconstruction;pose estimation;shape deformation", "primary_area": "", "supplementary_material": "/attachment/0c9fda12519f79fdbfc17d9cfd0343e1c4fc78d7.zip", "author": "Nicolai Haeni;Jun-Jee Chao;Volkan Isler", "authorids": "~Nicolai_Haeni1;~Jun-Jee_Chao1;~Volkan_Isler1", "gender": "M;M;M", "homepage": "https://nicolaihaeni.github.io/;;https://www.cs.utexas.edu/~isler/", "dblp": "200/8534;271/4521;42/3703", "google_scholar": "jn0K8-sAAAAJ;MXbgcw8AAAAJ;Q5KT-hEAAAAJ", "orcid": "0000-0003-4042-3318;;0000-0002-0868-5441", "linkedin": "nicolai-haeni/;jun-jee-chao-2b210915b;volkan-isler", "or_profile": "~Nicolai_Haeni1;~Jun-Jee_Chao1;~Volkan_Isler1", "aff": "University of Minnesota, Minneapolis;Samsung;Samsung", "aff_domain": "umn.edu;samsung.com;samsung.com", "position": "PhD student;Intern;Principal Researcher", "bibtex": "@misc{\nhaeni2023d,\ntitle={3D Surface Reconstruction in the Wild by Deforming Shape Priors from Synthetic Data},\nauthor={Nicolai Haeni and Jun-Jee Chao and Volkan Isler},\nyear={2023},\nurl={https://openreview.net/forum?id=Kn43SKplAn}\n}", "github": "", "project": "", "reviewers": "aneS;Xiit;qugy", "site": "https://openreview.net/forum?id=Kn43SKplAn", "pdf_size": 34094495, "recommendation": "3;5;5", "confidence": "3;2;5", "correctness": "2;3;4", "technical_novelty": "1;2;2", "empirical_novelty": "1;2;2", "wc_summary_paper": "49;86;82", "wc_strength_and_weaknesses": "138;328;263", "wc_clarity_quality_novelty_and_reproducibility": "18;58;28", "wc_summary_review": "27;34;21", "wc_review": "232;506;394", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "115;318;166", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 72.33333333333333, 16.579773487261185 ], "wc_strength_and_weaknesses_avg": [ 243.0, 78.84584115009915 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.666666666666664, 16.99673171197595 ], "wc_summary_review_avg": [ 27.333333333333332, 5.312459150169742 ], "wc_review_avg": [ 377.3333333333333, 112.47913386737807 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 199.66666666666666, 86.22580175845795 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.18898223650461363, "corr_recommendation_correctness": 0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17014356343382082523&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Minnesota;Samsung", "aff_unique_dep": ";Samsung", "aff_unique_url": "https://www.minnesota.edu;https://www.samsung.com", "aff_unique_abbr": "UMN;Samsung", "aff_campus_unique_index": "0", "aff_campus_unique": "Minneapolis;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;South Korea" }, { "title": "DropIT: Dropping Intermediate Tensors for Memory-Efficient DNN Training", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12216", "id": "Kn6i2BZW69w", "poster": "/media/PosterPDFs/ICLR%202023/12216.png?t=1681352263.3328426", "openreview": "https://openreview.net/forum?id=Kn6i2BZW69w", "slides": "https://iclr.cc/virtual/2023/poster/12216", "video": "https://iclr.cc/virtual/2023/poster/12216", "author_site": "Joya Chen, Kai Xu, Yuhui Wang, Yifei Cheng, Angela Yao", "tldr": "DropIT can save memory & improve accuracy, providing a new perspective of dropping in activation compressed training than quantization.", "abstract": "A standard hardware bottleneck when training deep neural networks is GPU memory. The bulk of memory is occupied by caching intermediate tensors for gradient computation in the backward pass. We propose a novel method to reduce this footprint - Dropping Intermediate Tensors (DropIT). DropIT drops min-k elements of the intermediate tensors and approximates gradients from the sparsified tensors in the backward pass. Theoretically, DropIT reduces noise on estimated gradients and therefore has a higher rate of convergence than vanilla-SGD. Experiments show that we can drop up to 90\\% of the intermediate tensor elements in fully-connected and convolutional layers while achieving higher testing accuracy for Visual Transformers and Convolutional Neural Networks on various tasks (e.g., classification, object detection, instance segmentation). Our code and models are available at https://github.com/chenjoya/dropit.", "keywords": "dropping intermediate tensors;dropping activations;activation compressed training;top-k;vision transformer;cnn", "primary_area": "", "supplementary_material": "", "author": "Joya Chen;Kai Xu;Yuhui Wang;Yifei Cheng;Angela Yao", "authorids": "~Joya_Chen1;~Kai_Xu7;~Yuhui_Wang2;~Yifei_Cheng1;~Angela_Yao1", "gender": "M;M;M;;", "homepage": "https://chenjoya.github.io/;https://kai422.github.io/;https://maxt123.github.io/;;http://www.angelayao.com", "dblp": "247/9518;;;;64/8484", "google_scholar": "https://scholar.google.com.ph/citations?user=IIx9dc8AAAAJ;https://scholar.google.com/citations?hl=en;;;https://scholar.google.ch/citations?user=-LJCZMMAAAAJ", "orcid": ";;;0000-0003-3859-2921;", "linkedin": ";;;;", "or_profile": "~Joya_Chen1;~Kai_Xu7;~Yuhui_Wang2;~Yifei_Cheng1;~Angela_Yao1", "aff": "National University of Singapore;National University of Singapore;;University of Science and Technology of China,;National University of Singapore", "aff_domain": "u.nus.edu;nus.edu.sg;;ustc.edu.cn;nus.edu.sg", "position": "PhD student;PhD student;;PhD student;Associate Professor", "bibtex": "@inproceedings{\nchen2023dropit,\ntitle={Drop{IT}: Dropping Intermediate Tensors for Memory-Efficient {DNN} Training},\nauthor={Joya Chen and Kai Xu and Yuhui Wang and Yifei Cheng and Angela Yao},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Kn6i2BZW69w}\n}", "github": "", "project": "", "reviewers": "Ltqg;zGSK;xcyW;GQ3x", "pdf_size": 711692, "recommendation": "5;6;6;6", "confidence": "5;3;4;4", "correctness": "3;3;2;3", "technical_novelty": "3;3;2;2", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "101;123;59;140", "wc_strength_and_weaknesses": "120;368;359;210", "wc_clarity_quality_novelty_and_reproducibility": "28;70;199;89", "wc_summary_review": "151;35;28;75", "wc_review": "400;596;645;514", "wc_reply_reviewers": "131;0;85;46", "wc_reply_authors": "403;722;1233;828", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;3;2", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 105.75, 30.326349928733592 ], "wc_strength_and_weaknesses_avg": [ 264.25, 104.27457743860677 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 96.5, 63.1605098142819 ], "wc_summary_review_avg": [ 72.25, 48.874200760728556 ], "wc_review_avg": [ 538.75, 92.77762391870144 ], "wc_reply_reviewers_avg": [ 65.5, 48.32442446630896 ], "wc_reply_authors_avg": [ 796.5, 296.61296330403366 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4510687556087074627&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=Kn6i2BZW69w", "email": "u.nus.edu;nus.edu.sg;;ustc.edu.cn;nus.edu.sg", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "National University of Singapore;University of Science and Technology of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;http://www.ustc.edu.cn", "aff_unique_abbr": "NUS;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Singapore;China" }, { "id": "KnqaT58PV7", "title": "Federated Semi-supervised Learning with Dual Regulator", "track": "main", "status": "Reject", "tldr": "", "abstract": "Federated learning emerges as a powerful method to learn from decentralized heterogeneous data while protecting data privacy. Federated semi-supervised learning (FSSL) is even more practical and challenging, where only a fraction of data can be labeled due to high annotation cost. Existing FSSL methods, however, assume independent and identically distributed (IID) labeled data across clients and consistent class distribution between labeled and unlabeled data within a client. In this work, we propose a novel FSSL framework with dual regulator, FedDure, to optimize and customize model training according to specific data distributions of clients. FedDure lifts the previous assumption with a coarse-grained regulator (C-reg) and a fine-grained regulator (F-reg): C-reg regularizes the updating of local model by tracking the learning effect on labeled data distribution; F-reg learns an adaptive weighting scheme tailored for unlabeled instances in each client. We further formulate the client model training as bi-level optimization that adaptively optimize the model in the client with two regulators. Theoretically, we show the convergence guarantee of dual regulator. Empirically, we demonstrate that FedDure is superior to the existing methods across wide range of settings, notably by more than 12% on CIFAR-10 and CINIC-10 datasets.", "keywords": "federated learning;semi-supervised learning;dual regulator;class imbalance", "primary_area": "", "supplementary_material": "", "author": "Sikai Bai;Shuaicheng Li;Weiming Zhuang;Kunlin Yang;Jun Hou;Shuai Yi;Shuai Zhang;Junyu Gao;Song Guo", "authorids": "~Sikai_Bai1;~Shuaicheng_Li2;~Weiming_Zhuang1;~Kunlin_Yang1;~Jun_Hou2;~Shuai_Yi3;~Shuai_Zhang14;~Junyu_Gao2;~Song_Guo5", "gender": "M;M;;M;M;Not Specified;M;M;F", "homepage": "https://white1973.github.io/;;https://weiming.me/;https://github.com/Youngkl0726;https://scholar.google.com/citations?hl=zh-CN&pli=1&user=afbbNmwAAAAJ;;http://gjy3035.github.io/;https://cse.hkust.edu.hk/~songguo/;https://github.com/", "dblp": "https://dblp.uni-trier.de/search?q=MFI%3A%20Multi-range%20Feature%20Interchange%20for%20Video%20Action%20Recognition;232/6936;274/0724;;150/6633;;153/4522-1.html;01/267-1;", "google_scholar": ";P_Gi41MAAAAJ;lLuLAzEAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;RsZcMZcAAAAJ;RiDA3o4AAAAJ;https://scholar.google.com/citations?hl=en;TTTKWlgAAAAJ", "orcid": ";;;;;;;;", "linkedin": ";;;;;;;;", "or_profile": "~Sikai_Bai1;~Shuaicheng_Li2;~Weiming_Zhuang1;~Kunlin_Yang1;~Shuai_Yi3;~Shuai_Zhang14;~Junyu_Gao2;~Song_Guo5;~JUN_HOU1", "aff": "The Hong Kong Polytechnic University, Hong Kong Polytechnic University;Sensetime Group Limited;Sony Research;;SenseTime Group Limited;sensetime;Xidian University;The Hong Kong Polytechnic University;Sensetime", "aff_domain": "comp.polyu.edu.hk;sensetime.com;sony.com;;sensetime.com;sensetime.com;xidian.edu.cn;polyu.edu.hk;sensetime.com", "position": "RA;Researcher;Researcher;;Researcher;Researcher;Postdoc;Full Professor;Researcher", "bibtex": "@misc{\nbai2023federated,\ntitle={Federated Semi-supervised Learning with Dual Regulator},\nauthor={Sikai Bai and Shuaicheng Li and Weiming Zhuang and Kunlin Yang and Jun Hou and Shuai Yi and Shuai Zhang and Junyu Gao and Song Guo},\nyear={2023},\nurl={https://openreview.net/forum?id=KnqaT58PV7}\n}", "github": "", "project": "", "reviewers": "JdwN;yeCB;DPn5", "site": "https://openreview.net/forum?id=KnqaT58PV7", "pdf_size": 582086, "recommendation": "5;6;6", "confidence": "3;2;3", "correctness": "2;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "245;75;50", "wc_strength_and_weaknesses": "67;175;142", "wc_clarity_quality_novelty_and_reproducibility": "470;6;15", "wc_summary_review": "38;42;32", "wc_review": "820;298;239", "wc_reply_reviewers": "83;0;33", "wc_reply_authors": "1013;392;541", "reply_reviewers": "1;0;1", "reply_authors": "4;2;2", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 123.33333333333333, 86.63460945578018 ], "wc_strength_and_weaknesses_avg": [ 128.0, 45.18849411078001 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 163.66666666666666, 216.64153700423094 ], "wc_summary_review_avg": [ 37.333333333333336, 4.109609335312651 ], "wc_review_avg": [ 452.3333333333333, 261.09300173607784 ], "wc_reply_reviewers_avg": [ 38.666666666666664, 34.120700787384514 ], "wc_reply_authors_avg": [ 648.6666666666666, 264.70654611390995 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0wxGUOjrSwMJ:scholar.google.com/&scioq=Federated+Semi-supervised+Learning+with+Dual+Regulator&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4;5;0;4", "aff_unique_norm": "Hong Kong Polytechnic University;SenseTime Group;Sony;SenseTime Group Limited;SenseTime;Xidian University", "aff_unique_dep": ";;Research;;;", "aff_unique_url": "https://www.polyu.edu.hk;https://www.sensetime.com;https://www.sony.com;https://www.sensetime.com;https://www.sensetime.com;http://www.xidian.edu.cn/", "aff_unique_abbr": "PolyU;SenseTime;Sony;SenseTime;SenseTime;Xidian", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;1;0;0;0;0;0", "aff_country_unique": "China;Japan" }, { "id": "KoEa6h1o6D1", "title": "Interventional Rationalization", "track": "main", "status": "Reject", "tldr": "We propose a causal intervention method to remove spurious correlations in selective rationalization.", "abstract": "Selective rationalizations improve the explainability of neural networks by selecting a subsequence of the input (i.e., rationales) to explain the prediction results. Although existing methods have achieved promising results, they still suffer from adopting the spurious correlations in data (aka., shortcuts) to compose rationales and make predictions. Inspired by the causal theory, in this paper, we develop an interventional rationalization (Inter-RAT) to discover the causal rationales. Specifically, we first analyse the causalities among the input, rationales and results with a structural causal model. Then, we discover spurious correlations between input and rationales, and between rationales and results, respectively, by identifying the confounder in the causalities. Next, based on the backdoor adjustment, we propose a causal intervention method to remove the spurious correlations in input and rationales. Further, we discuss reasons why spurious correlations between the selected rationales and results exist by analysing the limitations of the sparsity constraint in the rationalization, and employ the causal intervention method to remove these correlations. Extensive experimental results on three real-world datasets clearly validate the effectiveness of our proposed method.\n", "keywords": "rationalization;causal intervention", "primary_area": "", "supplementary_material": "/attachment/9b19e8437f98c5e373a5b1462cf5a2fc4ee33340.zip", "author": "Linan Yue;Qi Liu;Li Wang;Yanqing An;Yichao Du;Zhenya Huang", "authorids": "~Linan_Yue1;~Qi_Liu3;~Li_Wang18;~Yanqing_An1;~Yichao_Du1;~Zhenya_Huang2", "gender": "M;M;F;M;M;M", "homepage": "https://yuelinan.github.io/;http://staff.ustc.edu.cn/~qiliuql/;;http://home.ustc.edu.cn/~anyq/;http://staff.ustc.edu.cn/~huangzhy/;", "dblp": "297/1080;95/2446-3;;296/9937;178/8690;271/6727", "google_scholar": "https://scholar.google.com.hk/citations?user=XDaNgG4AAAAJ;5EoHAFwAAAAJ;poE7k1wAAAAJ;gjj3AZ4AAAAJ;dVZuU90AAAAJ;UC4wSP0AAAAJ", "orcid": "0000-0002-5980-6098;0000-0001-6956-5550;;0000-0001-7977-775X;0000-0003-1661-0420;", "linkedin": ";;;;;", "or_profile": "~Linan_Yue1;~Qi_Liu3;~Li_Wang18;~Yanqing_An1;~Zhenya_Huang2;~Du_Yichao1", "aff": "University of Science and Technology of China;University of Science and Technology of China;;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;;mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;Full Professor;;MS student;Associate Professor;PhD student", "bibtex": "@misc{\nyue2023interventional,\ntitle={Interventional Rationalization},\nauthor={Linan Yue and Qi Liu and Li Wang and Yanqing An and Yichao Du and Zhenya Huang},\nyear={2023},\nurl={https://openreview.net/forum?id=KoEa6h1o6D1}\n}", "github": "", "project": "", "reviewers": "DUoA;vjRf;qL6H;78ZX", "site": "https://openreview.net/forum?id=KoEa6h1o6D1", "pdf_size": 3147331, "recommendation": "3;3;5;5", "confidence": "3;3;3;4", "correctness": "2;3;2;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "73;84;84;58", "wc_strength_and_weaknesses": "493;128;391;377", "wc_clarity_quality_novelty_and_reproducibility": "42;26;83;33", "wc_summary_review": "11;4;16;46", "wc_review": "619;242;574;514", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "615;410;328;412", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 74.75, 10.662434056068061 ], "wc_strength_and_weaknesses_avg": [ 347.25, 134.2690861665484 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.0, 22.102036105300343 ], "wc_summary_review_avg": [ 19.25, 16.021469970012117 ], "wc_review_avg": [ 487.25, 146.41272997932933 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 441.25, 105.88525629189363 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17154124882416195825&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "Kot3IIgXGbb", "title": "Learning Globally Smooth Functions on Manifolds", "track": "main", "status": "Reject", "tldr": "We present a constrained learning approach to learn smooth functions over manifold data. ", "abstract": "Smoothness and low dimensional structures play central roles in improving generalization and stability in learning and statistics. The combination of these properties has led to many advances in semi-supervised learning, generative modeling, and control of dynamical systems. However, learning smooth functions is generally challenging, except in simple cases such as learning linear or kernel models. Typical methods are either too conservative, relying on crude upper bounds such as spectral normalization, too lax, penalizing smoothness on average, or too computationally intensive, requiring the solution of large-scale semi-definite programs. These issues are only exacerbated when trying to simultaneously exploit low dimensionality using, e.g., manifolds. This work proposes to overcome these obstacles by combining techniques from semi-infinite constrained learning and manifold regularization. To do so, it shows that, under typical conditions, the problem of learning a Lipschitz continuous function on a manifold is equivalent to a dynamically weighted manifold regularization problem. This observation leads to a practical algorithm based on a weighted Laplacian penalty whose weights are adapted using stochastic gradient techniques. We prove that, under mild conditions, this method estimates the Lipschitz constant of the solution, learning a globally smooth solution as a byproduct. Numerical examples illustrate the advantages of using this method to impose global smoothness on manifolds as opposed to imposing smoothness on average.", "keywords": "Lipschitz functions;Manifolds;Machine Learning", "primary_area": "", "supplementary_material": "/attachment/3d337ac64c401fbd59984c9578f67b652629ed04.zip", "author": "Juan Cervino;Luiz F. O. Chamon;Benjamin David Haeffele;Rene Vidal;Alejandro Ribeiro", "authorids": "~Juan_Cervino1;~Luiz_F._O._Chamon1;~Benjamin_David_Haeffele1;~Rene_Vidal1;~Alejandro_Ribeiro1", "gender": "M;M;;;M", "homepage": "https://juancervino.github.io/;https://www.luizchamon.com;;http://www.vision.jhu.edu;https://alelab.seas.upenn.edu", "dblp": ";120/6982;;v/ReneVidal;32/15", "google_scholar": "lbyYN_sAAAAJ;https://scholar.google.ca/citations?user=FIm-l-sAAAAJ;;https://scholar.google.com/citations?hl=en;7mrPM4kAAAAJ", "orcid": ";0000-0001-7731-6650;;;0000-0003-4230-9906", "linkedin": ";luiz-chamon-abb07a18;;rene-vidal-74844928/;", "or_profile": "~Juan_Cervino1;~Luiz_F._O._Chamon1;~Benjamin_David_Haeffele1;~Rene_Vidal1;~Alejandro_Ribeiro1", "aff": "University of Pennsylvania;Universit\u00e4t Stuttgart;;Amazon;University of Pennsylvania", "aff_domain": "upenn.edu;uni-stuttgart.de;;amazon.com;upenn.edu", "position": "PhD student;Principal Researcher;;Principal Researcher;Full Professor", "bibtex": "@misc{\ncervino2023learning,\ntitle={Learning Globally Smooth Functions on Manifolds},\nauthor={Juan Cervino and Luiz F. O. Chamon and Benjamin David Haeffele and Rene Vidal and Alejandro Ribeiro},\nyear={2023},\nurl={https://openreview.net/forum?id=Kot3IIgXGbb}\n}", "github": "", "project": "", "reviewers": "FBLh;2RoL;HRYo", "site": "https://openreview.net/forum?id=Kot3IIgXGbb", "pdf_size": 1405481, "recommendation": "5;6;6", "confidence": "2;3;3", "correctness": "3;4;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;2;2", "wc_summary_paper": "73;66;56", "wc_strength_and_weaknesses": "101;94;121", "wc_clarity_quality_novelty_and_reproducibility": "36;48;97", "wc_summary_review": "39;180;57", "wc_review": "249;388;331", "wc_reply_reviewers": "42;0;11", "wc_reply_authors": "562;296;495", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 65.0, 6.97614984548545 ], "wc_strength_and_weaknesses_avg": [ 105.33333333333333, 11.440668201153676 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.333333333333336, 26.386023236217735 ], "wc_summary_review_avg": [ 92.0, 62.65780079128216 ], "wc_review_avg": [ 322.6666666666667, 57.05163353392161 ], "wc_reply_reviewers_avg": [ 17.666666666666668, 17.78263822446552 ], "wc_reply_authors_avg": [ 451.0, 112.96312082563347 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.9999999999999997, "corr_recommendation_correctness": 0.9999999999999997, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1588214914814302179&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Pennsylvania;University of Stuttgart;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.upenn.edu;https://www.uni-stuttgart.de;https://www.amazon.com", "aff_unique_abbr": "UPenn;Uni Stuttgart;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Germany" }, { "title": "Reparameterization through Spatial Gradient Scaling", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10836", "id": "Kpdewuy7RU6", "poster": "", "openreview": "https://openreview.net/forum?id=Kpdewuy7RU6", "slides": "https://iclr.cc/virtual/2023/poster/10836", "video": "https://iclr.cc/virtual/2023/poster/10836", "author_site": "Alexander Detkov, Mohammad Salameh, Muhammad Fetrat Qharabagh, Jialin Zhang, Wei Lu, SHANGLING JUI, Di Niu", "tldr": "", "abstract": "Reparameterization aims to improve the generalization of deep neural networks by transforming a convolution operation into equivalent multi-branched structures during training. However, there exists a gap in understanding how reparameterization may change and benefit learning processes for neural networks. In this paper, we present a novel spatial gradient scaling method to redistribute learning focus among weights in convolutional neural networks. We prove that spatial gradient scaling achieves the same learning dynamics as a branched reparameterization yet without introducing structural changes into the network. We further propose an analytical approach that dynamically learns scalings for each convolutional layer based on the spatial characteristics of its input feature map gauged by mutual information. Experiments on CIFAR-10, CIFAR-100, and ImageNet show that without searching for reparameterized structures, our proposed scaling method outperforms the state-of-the-art reparameterization methods at a lower computational cost.", "keywords": "reparameterization;deep learning;convolutional neural networks;neural architectures", "primary_area": "", "supplementary_material": "", "author": "Alexander Detkov;Mohammad Salameh;Muhammad Fetrat;Jialin Zhang;Robin Luwei;SHANGLING JUI;Di Niu", "authorids": "~Alexander_Detkov1;~Mohammad_Salameh1;fetratqh@ualberta.ca;~Jialin_Zhang2;robin.luwei@hisilicon.com;~SHANGLING_JUI1;~Di_Niu1", "gender": "M;M;;M;;M;M", "homepage": "http://alexanderdetkov.github.io/;;;https://github.com/ColinJLZhang;;;https://www.ualberta.ca/~dniu", "dblp": ";91/9402;;;;;82/4953", "google_scholar": "https://scholar.google.ca/citations?user=6-Qb6W4AAAAJ;https://scholar.google.ca/citations?hl=en;;;;;https://scholar.google.ca/citations?user=3kC5OogAAAAJ", "orcid": ";;;;;0000-0002-1047-4264;0000-0002-5250-7327", "linkedin": "alexander-d-detkov;mohammadsalameh;;;;;", "or_profile": "~Alexander_Detkov1;~Mohammad_Salameh1;fetratqh@ualberta.ca;~Jialin_Zhang2;robin.luwei@hisilicon.com;~SHANGLING_JUI1;~Di_Niu1", "aff": "University of Alberta;Huawei Technologies Ltd.;;Huawei Technologies Ltd.;;Huawei Technologies Ltd.;University of Alberta", "aff_domain": "ualberta.ca;huawei.com;;huawei.com;;huawei.com;ualberta.ca", "position": "Undergrad student;Principal Researcher;;Researcher;;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\ndetkov2023reparameterization,\ntitle={Reparameterization through Spatial Gradient Scaling},\nauthor={Alexander Detkov and Mohammad Salameh and Muhammad Fetrat and Jialin Zhang and Robin Luwei and SHANGLING JUI and Di Niu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Kpdewuy7RU6}\n}", "github": "", "project": "", "reviewers": "xBEc;ahFw;v3Lx;SeDu", "pdf_size": 3847716, "recommendation": "6;6;8;8", "confidence": "3;4;4;3", "correctness": "4;4;3;3", "technical_novelty": "3;3;4;3", "empirical_novelty": "2;3;4;4", "wc_summary_paper": "82;15;51;20", "wc_strength_and_weaknesses": "244;70;149;125", "wc_clarity_quality_novelty_and_reproducibility": "223;21;31;19", "wc_summary_review": "102;14;66;17", "wc_review": "651;120;297;181", "wc_reply_reviewers": "93;0;0;0", "wc_reply_authors": "1219;431;799;144", "reply_reviewers": "1;0;0;0", "reply_authors": "4;1;2;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 42.0, 26.89795531262553 ], "wc_strength_and_weaknesses_avg": [ 147.0, 62.90071541723512 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.5, 86.43350044976773 ], "wc_summary_review_avg": [ 49.75, 36.55389856089224 ], "wc_review_avg": [ 312.25, 205.65185994782541 ], "wc_reply_reviewers_avg": [ 23.25, 40.2701812759764 ], "wc_reply_authors_avg": [ 648.25, 403.09637495269044 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1756676414418056470&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=Kpdewuy7RU6", "email": "ualberta.ca;huawei.com;;huawei.com;;huawei.com;ualberta.ca", "author_num": 7, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "University of Alberta;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.ualberta.ca;https://www.huawei.com", "aff_unique_abbr": "UAlberta;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "Canada;China" }, { "id": "KrGgylZ0tw_", "title": "Test-time recalibration of conformal predictors under distribution shift based on unlabeled examples", "track": "main", "status": "Reject", "tldr": "We propose a novel test-time recalibration method for conformal prediction based on unlabeled examples that provides excellent uncertainty estimates under natural distribution shifts.", "abstract": "Modern image classifiers achieve high predictive accuracy, but the predictions typically come without reliable uncertainty estimates. Conformal prediction algorithms provide uncertainty estimates by predicting a set of classes based on the probability estimates of the classifier (for example, the softmax scores). To provide such sets, conformal prediction algorithms often rely on estimating a cutoff threshold for the probability estimates, and this threshold is chosen based on a calibration set. Conformal prediction methods guarantee reliability only when the calibration set is from the same distribution as the test set. Therefore, the methods need to be recalibrated for new distributions. However, in practice, labeled data from new distributions is rarely available, making calibration infeasible. In this work, we consider the problem of predicting the cutoff threshold for a new distribution only based on unlabeled examples. While it is impossible in general to guarantee reliability when calibrating based on unlabeled examples, we show that our method provides excellent uncertainty estimates under natural distribution shifts.", "keywords": "classification;uncertainty estimation;conformal prediction", "primary_area": "", "supplementary_material": "/attachment/e916a08b324d1e58c07b0972729e751669b7d699.zip", "author": "Fatih Furkan Yilmaz;Reinhard Heckel", "authorids": "~Fatih_Furkan_Yilmaz1;~Reinhard_Heckel1", "gender": "M;M", "homepage": ";", "dblp": "251/3198;81/9668", "google_scholar": ";ZWV0I7cAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Fatih_Furkan_Yilmaz1;~Reinhard_Heckel1", "aff": "Rice University;Technical University Munich", "aff_domain": "rice.edu;tum.de", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nyilmaz2023testtime,\ntitle={Test-time recalibration of conformal predictors under distribution shift based on unlabeled examples},\nauthor={Fatih Furkan Yilmaz and Reinhard Heckel},\nyear={2023},\nurl={https://openreview.net/forum?id=KrGgylZ0tw_}\n}", "github": "", "project": "", "reviewers": "h3g1;9GdY;4zCm;gDoT", "site": "https://openreview.net/forum?id=KrGgylZ0tw_", "pdf_size": 445057, "recommendation": "3;3;5;5", "confidence": "3;4;4;3", "correctness": "3;2;3;4", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "68;67;61;108", "wc_strength_and_weaknesses": "288;109;602;223", "wc_clarity_quality_novelty_and_reproducibility": "10;4;2;75", "wc_summary_review": "33;3;35;36", "wc_review": "399;183;700;442", "wc_reply_reviewers": "0;0;0;110", "wc_reply_authors": "490;223;667;518", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 76.0, 18.66815470259447 ], "wc_strength_and_weaknesses_avg": [ 305.5, 182.78197394710452 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 22.75, 30.309858132297485 ], "wc_summary_review_avg": [ 26.75, 13.754544703478919 ], "wc_review_avg": [ 431.0, 183.71853472091487 ], "wc_reply_reviewers_avg": [ 27.5, 47.63139720814412 ], "wc_reply_authors_avg": [ 474.5, 160.03202804438865 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.7071067811865475, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11104901666161996369&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Rice University;Technical University of Munich", "aff_unique_dep": ";", "aff_unique_url": "https://www.rice.edu;https://www.tum.de", "aff_unique_abbr": "Rice;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Germany" }, { "id": "Krk0Gnft2Zc", "title": "Discrete State-Action Abstraction via the Successor Representation", "track": "main", "status": "Reject", "tldr": "We give a max-entropy regularized model for clustering states based on their successor representation, then train options to navigate between clusters.", "abstract": "While the difficulty of reinforcement learning problems is typically related to the complexity of their state spaces, Abstraction proposes that solutions often lie in simpler underlying latent spaces. Prior works have focused on learning either a continuous or dense abstraction, or require a human to provide one. Information-dense representations capture features irrelevant for solving tasks, and continuous spaces can struggle to represent discrete objects. In this work we automatically learn a sparse discrete abstraction of the underlying environment. We do so using a simple end-to-end trainable model based on the successor representation and max-entropy regularization. We describe an algorithm to apply our model, named Discrete State-Action Abstraction (DSAA), which computes an action abstraction in the form of temporally extended actions, i.e., Options, to transition between discrete abstract states. Empirically, we demonstrate the effects of different exploration schemes on our resulting abstraction, and show that it is efficient for solving downstream tasks.", "keywords": "reinforcement learning;abstraction;successor representation;options;discrete;sparse reward;representation learning;intrinsic motivation", "primary_area": "", "supplementary_material": "/attachment/33539f9877acbe0f8ac760d71f9afc83cddc0472.zip", "author": "Amnon Attali;Pedro Cisneros-Velarde;Marco Morales;Nancy Amato", "authorids": "~Amnon_Attali1;~Pedro_Cisneros-Velarde1;~Marco_Morales1;~Nancy_Amato1", "gender": ";;F;M", "homepage": "https://parasollab.web.illinois.edu/people/aattali2/;https://sites.google.com/view/pcisnerosv;https://parasollab.web.illinois.edu/people/amato/;https://parasollab.web.illinois.edu/people/moralesa/", "dblp": ";188/3800;;63/6094", "google_scholar": ";VvVRo5oAAAAJ;AmaB9c4AAAAJ;UcID0wkAAAAJ", "orcid": ";;;0000-0003-1824-2350", "linkedin": ";;;", "or_profile": "~Amnon_Attali1;~Pedro_Cisneros-Velarde1;~Nancy_Amato1;~Marco_Antonio_Morales_Aguirre1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;Instituto Tecnol\u00f3gico Auton\u00f3mo de M\u00e9xico", "aff_domain": "illinois.edu;illinois.edu;illinois.edu;itam.mx", "position": "PhD student;Postdoc;Full Professor;Associate Professor", "bibtex": "@misc{\nattali2023discrete,\ntitle={Discrete State-Action Abstraction via the Successor Representation},\nauthor={Amnon Attali and Pedro Cisneros-Velarde and Marco Morales and Nancy Amato},\nyear={2023},\nurl={https://openreview.net/forum?id=Krk0Gnft2Zc}\n}", "github": "", "project": "", "reviewers": "TM9s;icxB;vZz4;C2ny", "site": "https://openreview.net/forum?id=Krk0Gnft2Zc", "pdf_size": 1267863, "recommendation": "3;3;5;8", "confidence": "3;4;4;4", "correctness": "3;3;3;4", "technical_novelty": "3;2;2;4", "empirical_novelty": "1;2;2;4", "wc_summary_paper": "57;89;99;70", "wc_strength_and_weaknesses": "130;308;475;85", "wc_clarity_quality_novelty_and_reproducibility": "73;117;10;41", "wc_summary_review": "120;71;55;19", "wc_review": "380;585;639;215", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "306;327;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;0;0", "recommendation_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_paper_avg": [ 78.75, 16.315253599009733 ], "wc_strength_and_weaknesses_avg": [ 249.5, 154.6067592312833 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.25, 39.619281921811755 ], "wc_summary_review_avg": [ 66.25, 36.29996556472196 ], "wc_review_avg": [ 454.75, 168.80517616471363 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 158.25, 158.42407487500125 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.49374193110101877, "corr_recommendation_correctness": 0.9169493006161777, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17488319375208419145&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Instituto Tecnol\u00f3gico Aut\u00f3nomo de M\u00e9xico", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.itam.mx", "aff_unique_abbr": "UIUC;ITAM", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;Mexico" }, { "title": "Equiformer: Equivariant Graph Attention Transformer for 3D Atomistic Graphs", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12104", "id": "KwmPfARgOTD", "poster": "/media/PosterPDFs/ICLR%202023/12104.png?t=1682359093.9226995", "openreview": "https://openreview.net/forum?id=KwmPfARgOTD", "slides": "https://iclr.cc/virtual/2023/poster/12104", "video": "https://iclr.cc/virtual/2023/poster/12104", "author_site": "Yi-Lun Liao, Tess Smidt", "tldr": "We propose an equivariant graph neural network based on Transformer networks and propose a novel attention mechanism, which improves upon self-attention in typical Transformers.", "abstract": "Despite their widespread success in various domains, Transformer networks have yet to perform well across datasets in the domain of 3D atomistic graphs such as molecules even when 3D-related inductive biases like translational invariance and rotational equivariance are considered. In this paper, we demonstrate that Transformers can generalize well to 3D atomistic graphs and present Equiformer, a graph neural network leveraging the strength of Transformer architectures and incorporating SE(3)/E(3)-equivariant features based on irreducible representations (irreps). First, we propose a simple and effective architecture by only replacing original operations in Transformers with their equivariant counterparts and including tensor products. Using equivariant operations enables encoding equivariant information in channels of irreps features without complicating graph structures. With minimal modifications to Transformers, this architecture has already achieved strong empirical results. Second, we propose a novel attention mechanism called equivariant graph attention, which improves upon typical attention in Transformers through replacing dot product attention with multi-layer perceptron attention and including non-linear message passing. With these two innovations, Equiformer achieves competitive results to previous models on QM9, MD17 and OC20 datasets.", "keywords": "equivariant neural networks;graph neural networks;computational physics;transformer networks", "primary_area": "", "supplementary_material": "", "author": "Yi-Lun Liao;Tess Smidt", "authorids": "~Yi-Lun_Liao1;~Tess_Smidt1", "gender": "M;F", "homepage": ";https://blondegeek.github.io/", "dblp": "225/6644.html;215/4978.html", "google_scholar": ";", "orcid": ";0000-0001-5581-5344", "linkedin": "yilunliao/;", "or_profile": "~Yi-Lun_Liao1;~Tess_Smidt1", "aff": "Meta Facebook;Massachusetts Institute of Technology", "aff_domain": "meta.com;mit.edu", "position": "Intern;Assistant Professor", "bibtex": "@inproceedings{\nliao2023equiformer,\ntitle={Equiformer: Equivariant Graph Attention Transformer for 3D Atomistic Graphs},\nauthor={Yi-Lun Liao and Tess Smidt},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=KwmPfARgOTD}\n}", "github": "", "project": "", "reviewers": "8jGX;1yMo;d6yC;sfAh", "pdf_size": 1590630, "recommendation": "5;6;6;8", "confidence": "2;4;4;4", "correctness": "3;3;3;4", "technical_novelty": "3;2;2;2", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "57;27;77;56", "wc_strength_and_weaknesses": "89;89;233;94", "wc_clarity_quality_novelty_and_reproducibility": "194;34;66;38", "wc_summary_review": "50;43;43;224", "wc_review": "390;193;419;412", "wc_reply_reviewers": "38;109;96;0", "wc_reply_authors": "1439;1229;1777;978", "reply_reviewers": "1;3;1;0", "reply_authors": "5;6;4;3", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 54.25, 17.82379028153103 ], "wc_strength_and_weaknesses_avg": [ 126.25, 61.66593468034033 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.0, 65.26101439603893 ], "wc_summary_review_avg": [ 90.0, 77.41769823496433 ], "wc_review_avg": [ 353.5, 93.280490993562 ], "wc_reply_reviewers_avg": [ 60.75, 44.09861108923953 ], "wc_reply_authors_avg": [ 1355.75, 292.89193826392693 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 4.5, 1.118033988749895 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.6622661785325219, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 262, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15539620048638480723&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=KwmPfARgOTD", "email": "meta.com;mit.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Meta;Massachusetts Institute of Technology", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://web.mit.edu", "aff_unique_abbr": "Meta;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Efficient Certified Training and Robustness Verification of Neural ODEs", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10747", "id": "KyoVpYvWWnK", "poster": "", "openreview": "https://openreview.net/forum?id=KyoVpYvWWnK", "slides": "https://iclr.cc/virtual/2023/poster/10747", "video": "https://iclr.cc/virtual/2023/poster/10747", "author_site": "Mustafa Zeqiri, Mark N M\u00fcller, Marc Fischer, Martin Vechev", "tldr": "We enable certified training and scalable robustness verification of neural ODEs.", "abstract": "Neural Ordinary Differential Equations (NODEs) are a novel neural architecture, built around initial value problems with learned dynamics which are solved during inference. Thought to be inherently more robust against adversarial perturbations, they were recently shown to be vulnerable to strong adversarial attacks, highlighting the need for formal guarantees. However, despite significant progress in robustness verification for standard feed-forward architectures, the verification of high dimensional NODEs remains an open problem. In this work we address this challenge and propose GAINS, an analysis framework for NODEs combining three key ideas: (i) a novel class of ODE solvers, based on variable but discrete time steps, (ii) an efficient graph representation of solver trajectories, and (iii) a novel abstraction algorithm operating on this graph representation. Together, these advances enable the efficient analysis and certified training of high-dimensional NODEs, by reducing the runtime from an intractable $\\mathcal{O}(\\exp(d)+\\exp(T))$ to $\\mathcal{O}(d+T^2\\log^2T)$ in the dimensionality $d$ and integration time $T$. In an extensive evaluation on computer vision (MNIST and Fashion-MNIST) and time-series forecasting (Physio-Net) problems, we demonstrate the effectiveness of both our certified training and verification methods.", "keywords": "Neural ODEs;Adversarial Robustness;Certified Robustness;Robustness Verification;Certified Training", "primary_area": "", "supplementary_material": "/attachment/408f3b313ca5d6d3606122210675062949336be0.zip", "author": "Mustafa Zeqiri;Mark Niklas Mueller;Marc Fischer;Martin Vechev", "authorids": "~Mustafa_Zeqiri1;~Mark_Niklas_Mueller2;~Marc_Fischer1;~Martin_Vechev1", "gender": "M;M;M;M", "homepage": ";https://www.sri.inf.ethz.ch/people/mark;;https://www.sri.inf.ethz.ch/people/martin", "dblp": ";287/4254;37/9373-2;93/2189.html", "google_scholar": ";RBpmcCAAAAAJ;;https://scholar.google.ch/citations?user=aZ1Rh50AAAAJ", "orcid": " 0000-0001-5379-6792;0000-0002-2496-6542;;", "linkedin": ";mark-m%C3%BCller-8bb4b1140/;;", "or_profile": "~Mustafa_Zeqiri1;~Mark_Niklas_Mueller2;~Marc_Fischer1;~Martin_Vechev1", "aff": "ETHZ - ETH Zurich;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "position": "MS student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzeqiri2023efficient,\ntitle={Efficient Certified Training and Robustness Verification of Neural {ODE}s},\nauthor={Mustafa Zeqiri and Mark Niklas Mueller and Marc Fischer and Martin Vechev},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=KyoVpYvWWnK}\n}", "github": "", "project": "", "reviewers": "CQh5;bH4g;LBBD;yJLc", "pdf_size": 418670, "recommendation": "6;6;8;8", "confidence": "4;3;3;4", "correctness": "4;4;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;3;3;0", "wc_summary_paper": "322;63;74;115", "wc_strength_and_weaknesses": "395;56;293;281", "wc_clarity_quality_novelty_and_reproducibility": "147;68;66;23", "wc_summary_review": "67;27;60;66", "wc_review": "931;214;493;485", "wc_reply_reviewers": "0;0;18;31", "wc_reply_authors": "482;227;315;662", "reply_reviewers": "0;0;1;1", "reply_authors": "1;2;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 143.5, 104.86300586956298 ], "wc_strength_and_weaknesses_avg": [ 256.25, 123.80907680780113 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 76.0, 44.760473634670134 ], "wc_summary_review_avg": [ 55.0, 16.38596960817394 ], "wc_review_avg": [ 530.75, 256.92837036808527 ], "wc_reply_reviewers_avg": [ 12.25, 13.083864108129525 ], "wc_reply_authors_avg": [ 421.5, 166.3377587921636 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18163787597971319428&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=KyoVpYvWWnK", "email": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "id": "KyxJ9Yfxo2", "title": "Hidden Schema Networks", "track": "main", "status": "Reject", "tldr": "A neural language model that discovers networks of symbols (schemata) from text datasets via a VAE framework with pretrained BERT and GPT-2 as encoder and decoder, respectively.", "abstract": "Most modern language models infer representations that, albeit powerful, lack both compositionality and semantic interpretability. Starting from the assumption that a large proportion of semantic content is necessarily relational, we introduce a neural language model that discovers networks of symbols (schemata) from text datasets. Using a variational autoencoder (VAE) framework, our model encodes sentences into sequences of symbols (composed representation), which correspond to the nodes visited by biased random walkers on a global latent graph. We first demonstrate that the model is able to uncover ground-truth graphs from artificially generated datasets of random token sequences. Next we leverage pretrained BERT and GPT-2 language models as encoder and decoder, respectively, to train our model on language modelling and commonsense knowledge generation tasks. Qualitatively, the model is able to infer schema networks whose nodes (symbols) can be interpreted as encoding different aspects of natural language (as e.g. topics, sentiments). Quantitatively, our results show that the model successfully interprets the encoded symbol sequences, as it achieves state-of-the-art scores on VAE language modeling benchmarks. Source code to reproduce all experiments is provided with the supplementary material.", "keywords": "Discrete representation learning;Unsupervised knowledge graph learning;Relational inductive biases;Semantic representation;Pretrained language models;Discrete VAE;Neuro-symbolic AI;Language modelling", "primary_area": "", "supplementary_material": "/attachment/88ff8bfcf0cf8682c73257716bfd65eeda469589.zip", "author": "Ramses J Sanchez;Lukas Alexander Conrads;Pascal Welke;Kostadin Cvejoski;Cesar Ojeda", "authorids": "~Ramses_J_Sanchez1;~Lukas_Alexander_Conrads2;~Pascal_Welke1;~Kostadin_Cvejoski1;~Cesar_Ojeda1", "gender": "M;M;M;M;M", "homepage": ";;https://pwelke.de;;https://dblp.org/pid/167/6031.html", "dblp": "243/6057;;174/0119;185/2496;", "google_scholar": "https://scholar.google.de/citations?user=32AoF2wAAAAJ;;https://scholar.google.de/citations?user=hgwvC6gAAAAJ;k1WSOfoAAAAJ;_-oU1lEAAAAJ", "orcid": ";;0000-0002-2123-3781;;", "linkedin": ";lukas-alexander-conrads-a755b423a/;;;", "or_profile": "~Ramses_J_Sanchez1;~Lukas_Alexander_Conrads2;~Pascal_Welke1;~Kostadin_Cvejoski1;~Cesar_Ojeda1", "aff": "Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;University of Bonn;Fraunhofer IAIS;Universit\u00e4t Potsdam", "aff_domain": "bit.uni-bonn.de;uni-bonn.de;uni-bonn.de;iais.fraunhofer.de;uni-potsdam.de", "position": "Postdoc;MS student;Postdoc;Researcher;Postdoc", "bibtex": "@misc{\nsanchez2023hidden,\ntitle={Hidden Schema Networks},\nauthor={Ramses J Sanchez and Lukas Alexander Conrads and Pascal Welke and Kostadin Cvejoski and Cesar Ojeda},\nyear={2023},\nurl={https://openreview.net/forum?id=KyxJ9Yfxo2}\n}", "github": "", "project": "", "reviewers": "Dats;nMiy;5yft;tUks", "site": "https://openreview.net/forum?id=KyxJ9Yfxo2", "pdf_size": 1948618, "recommendation": "3;3;8;8", "confidence": "5;4;3;3", "correctness": "2;2;3;3", "technical_novelty": "3;2;3;4", "empirical_novelty": "3;2;3;4", "wc_summary_paper": "59;41;146;38", "wc_strength_and_weaknesses": "229;477;259;214", "wc_clarity_quality_novelty_and_reproducibility": "60;23;139;21", "wc_summary_review": "71;180;75;89", "wc_review": "419;721;619;362", "wc_reply_reviewers": "398;0;0;304", "wc_reply_authors": "1452;832;740;1240", "reply_reviewers": "1;0;0;2", "reply_authors": "3;2;1;3", "recommendation_avg": [ 5.5, 2.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 71.0, 44.039754767709596 ], "wc_strength_and_weaknesses_avg": [ 294.75, 106.462141158254 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.75, 47.77224612680463 ], "wc_summary_review_avg": [ 103.75, 44.52737921773524 ], "wc_review_avg": [ 530.25, 145.72812871920095 ], "wc_reply_reviewers_avg": [ 175.5, 178.6190079470827 ], "wc_reply_authors_avg": [ 1066.0, 291.6779045454078 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.9045340337332909, "corr_recommendation_correctness": 1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4929522955020407608&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;University of Bonn;Fraunhofer Institute for Applied Information Technology;University of Potsdam", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uni-bonn.de/;https://www.uni-bonn.de/;https://www.iais.fraunhofer.de/;https://www.uni-potsdam.de", "aff_unique_abbr": "Uni Bonn;UBonn;Fraunhofer IAIS;UP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "id": "Kyz1SaAcnd", "title": "Adversarial Policies Beat Professional-Level Go AIs", "track": "main", "status": "Reject", "tldr": "", "abstract": "We attack the state-of-the-art Go-playing AI system, KataGo, by training an adversarial policy that plays against a frozen KataGo victim. Our attack achieves a >99% win-rate against KataGo without search, and a >80% win-rate when KataGo uses enough search to be near-superhuman. To the best of our knowledge, this is the first successful end-to-end attack against a Go AI playing at the level of a top human professional. Notably, the adversary does not win by learning to play Go better than KataGo---in fact, the adversary is easily beaten by human amateurs. Instead, the adversary wins by tricking KataGo into ending the game prematurely at a point that is favorable to the adversary. Our results demonstrate that even professional-level AI systems may harbor surprising failure modes.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Tony Tong Wang;Adam Gleave;Nora Belrose;Tom Tseng;Michael D Dennis;Yawen Duan;Viktor Pogrebniak;Sergey Levine;Stuart Russell", "authorids": "~Tony_Tong_Wang1;~Adam_Gleave1;belrose.nora@gmail.com;~Tom_Tseng1;~Michael_D_Dennis1;~Yawen_Duan1;~Viktor_Pogrebniak1;~Sergey_Levine1;~Stuart_Russell1", "gender": "M;M;;M;M;M;M;M;M", "homepage": "https://tonytwang.net;https://gleave.me;;https://tomhmtseng.com/;;;;https://people.eecs.berkeley.edu/~svlevine/;https://people.eecs.berkeley.edu/~russell/", "dblp": ";189/0008.html;;;;;;80/7594;", "google_scholar": "YWiob00AAAAJ;lBunDH0AAAAJ;;okAQFcwAAAAJ;WXXu26AAAAAJ;IJQlPvYAAAAJ;;8R35rCwAAAAJ;https://scholar.google.com.tw/citations?user=KJGrjCAAAAAJ", "orcid": ";0000-0002-3467-528X;;0000-0002-6422-288X;;;;;", "linkedin": ";adamgleave/;;https://linkedin.com/in/tomtseng/;;yawen-duan/;avtomaton;;", "or_profile": "~Tony_Tong_Wang1;~Adam_Gleave1;belrose.nora@gmail.com;~Tom_Tseng1;~Michael_D_Dennis1;~Yawen_Duan1;~Viktor_Pogrebniak1;~Sergey_Levine1;~Stuart_Russell1", "aff": "Massachusetts Institute of Technology;FAR.AI;;FAR AI;University of California, Berkeley;University of Cambridge;;Google;University of California, Berkeley", "aff_domain": "mit.edu;far.ai;;far.ai;berkeley.edu;cam.ac.uk;;google.com;berkeley.edu", "position": "PhD student;Principal Researcher;;Researcher;PhD student;MS student;;Research Scientist;Full Professor", "bibtex": "@misc{\nwang2023adversarial,\ntitle={Adversarial Policies Beat Professional-Level Go {AI}s},\nauthor={Tony Tong Wang and Adam Gleave and Nora Belrose and Tom Tseng and Michael D Dennis and Yawen Duan and Viktor Pogrebniak and Sergey Levine and Stuart Russell},\nyear={2023},\nurl={https://openreview.net/forum?id=Kyz1SaAcnd}\n}", "github": "", "project": "", "reviewers": "6zA8;t1v2;7dnb", "site": "https://openreview.net/forum?id=Kyz1SaAcnd", "pdf_size": 624898, "recommendation": "3;5;6", "confidence": "5;4;5", "correctness": "1;3;4", "technical_novelty": "2;2;4", "empirical_novelty": "1;2;4", "wc_summary_paper": "53;105;106", "wc_strength_and_weaknesses": "414;430;151", "wc_clarity_quality_novelty_and_reproducibility": "46;164;7", "wc_summary_review": "21;57;5", "wc_review": "534;756;269", "wc_reply_reviewers": "480;64;0", "wc_reply_authors": "2398;1027;135", "reply_reviewers": "2;1;0", "reply_authors": "5;2;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 1.247219128924647 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.3333333333333335, 1.247219128924647 ], "wc_summary_paper_avg": [ 88.0, 24.752104287649296 ], "wc_strength_and_weaknesses_avg": [ 331.6666666666667, 127.91750814055474 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 72.33333333333333, 66.74495403316185 ], "wc_summary_review_avg": [ 27.666666666666668, 21.74600857373345 ], "wc_review_avg": [ 519.6666666666666, 199.0750835461055 ], "wc_reply_reviewers_avg": [ 181.33333333333334, 212.79933166144 ], "wc_reply_authors_avg": [ 1186.6666666666667, 930.7388940454186 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.6666666666666665, 1.699673171197595 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.18898223650461357, "corr_recommendation_correctness": 1.0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10723606396802185050&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;3;4;5;3", "aff_unique_norm": "Massachusetts Institute of Technology;FAR.AI;FAR AI;University of California, Berkeley;University of Cambridge;Google", "aff_unique_dep": ";;;;;Google", "aff_unique_url": "https://web.mit.edu;https://www.far.ai;https://www.far.ai;https://www.berkeley.edu;https://www.cam.ac.uk;https://www.google.com", "aff_unique_abbr": "MIT;FAR.AI;FAR AI;UC Berkeley;Cambridge;Google", "aff_campus_unique_index": "1;2;3;1", "aff_campus_unique": ";Berkeley;Cambridge;Mountain View", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "KzfhxLoh6s0", "title": "Robust Constrained Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Constrained reinforcement learning is to maximize the reward subject to constraints on utilities/costs. However, in practice it is often the case that the training environment is not the same as the test one, due to, e.g., modeling error, adversarial attack, non-stationarity, resulting in severe performance degradation and more importantly constraint violation in the test environment. To address this challenge, we formulate the framework of robust constrained reinforcement learning under model uncertainty, where the MDP is not fixed but lies in some uncertainty set. The goal is two fold: 1) to guarantee that constraints on utilities/costs are satisfied for all MDPs in the uncertainty set, and 2) to maximize the worst-case reward performance over the uncertainty set. We design a robust primal-dual approach, and further develop theoretical guarantee on its convergence, complexity and robust feasibility. We then investigate a concrete example of $\\delta$-contamination uncertainty set, design an online and model-free algorithm and theoretically characterize its sample complexity. ", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/5d069961a62d1325edf48db800f84a346b2c9def.zip", "author": "Yue Wang;Fei Miao;Shaofeng Zou", "authorids": "~Yue_Wang16;~Fei_Miao1;~Shaofeng_Zou1", "gender": ";F;", "homepage": "https://sites.google.com/view/ywangub;http://www.feimiao.org;", "dblp": "33/4822-68;143/6002;", "google_scholar": "ndMi_z8AAAAJ;fH2YF6YAAAAJ;", "orcid": "0009-0001-9786-052X;0000-0003-0066-4379;", "linkedin": ";fei-miao-76964727/;", "or_profile": "~Yue_Wang16;~Fei_Miao1;~Shaofeng_Zou1", "aff": "State University of New York, Buffalo;University of Connecticut;", "aff_domain": "buffalo.edu;uconn.edu;", "position": "PhD student;Assistant Professor;", "bibtex": "@misc{\nwang2023,\ntitle={ Robust Constrained Reinforcement Learning},\nauthor={Yue Wang and Fei Miao and Shaofeng Zou},\nyear={2023},\nurl={https://openreview.net/forum?id=KzfhxLoh6s0}\n}", "github": "", "project": "", "reviewers": "yY2u;TPvy;gZnw", "site": "https://openreview.net/forum?id=KzfhxLoh6s0", "pdf_size": 1704885, "recommendation": "3;5;6", "confidence": "5;4;3", "correctness": "3;2;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;0;2", "wc_summary_paper": "132;84;146", "wc_strength_and_weaknesses": "378;341;150", "wc_clarity_quality_novelty_and_reproducibility": "30;33;184", "wc_summary_review": "33;46;72", "wc_review": "573;504;552", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 120.66666666666667, 26.5497436689865 ], "wc_strength_and_weaknesses_avg": [ 289.6666666666667, 99.90773521382394 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 82.33333333333333, 71.89962138673302 ], "wc_summary_review_avg": [ 50.333333333333336, 16.21384867602041 ], "wc_review_avg": [ 543.0, 28.879058156387302 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9819805060619659, "corr_recommendation_correctness": 0.3273268353539886, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8978600799566283071&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "State University of New York at Buffalo;University of Connecticut", "aff_unique_dep": ";", "aff_unique_url": "https://www.buffalo.edu;https://www.uconn.edu", "aff_unique_abbr": "SUNY Buffalo;UConn", "aff_campus_unique_index": "0", "aff_campus_unique": "Buffalo;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Training language models to summarize narratives improves brain alignment", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11210", "id": "KzkLAE49H9b", "poster": "/media/PosterPDFs/ICLR%202023/11210.png?t=1681984950.3446853", "openreview": "https://openreview.net/forum?id=KzkLAE49H9b", "slides": "https://iclr.cc/virtual/2023/poster/11210", "video": "https://iclr.cc/virtual/2023/poster/11210", "author_site": "Khai Loong Aw, Mariya Toneva", "tldr": "We show that training language models for deeper narrative understanding (characters, emotions, relationships) results in richer representations that have improved alignment to human brain activity.", "abstract": "Building systems that achieve a deeper understanding of language is one of the central goals of natural language processing (NLP). Towards this goal, recent works have begun to train language models on narrative datasets which require extracting the most critical information by integrating across long contexts. However, it is still an open question whether these models are learning a deeper understanding of the text, or if the models are simply learning a heuristic to complete the task. This work investigates this further by turning to the one language processing system that truly understands complex language: the human brain. We show that training language models for deeper narrative understanding results in richer representations that have improved alignment to human brain activity. We further find that the improvements in brain alignment are larger for character names than for other discourse features, which indicates that these models are learning important narrative elements. Taken together, these results suggest that this type of training can indeed lead to deeper language understanding. These findings have consequences both for cognitive neuroscience by revealing some of the significant factors behind brain-NLP alignment, and for NLP by highlighting that understanding of long-range context can be improved beyond language modeling.", "keywords": "language;nlp;neuroscience;fMRI;interpretability", "primary_area": "", "supplementary_material": "", "author": "Khai Loong Aw;Mariya Toneva", "authorids": "~Khai_Loong_Aw1;~Mariya_Toneva1", "gender": "M;F", "homepage": "https://awwkl.github.io/;https://mtoneva.com", "dblp": "313/9975;160/4677", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?user=a61sk-4AAAAJ", "orcid": ";0000-0002-2407-9871", "linkedin": ";", "or_profile": "~Khai_Loong_Aw1;~Mariya_K_Toneva1", "aff": "Singapore Management University;Max Planck Institute for Software Systems", "aff_domain": "smu.edu.sg;mpi-sws.org", "position": "Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\naw2023training,\ntitle={Training language models to summarize narratives improves brain alignment},\nauthor={Khai Loong Aw and Mariya Toneva},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=KzkLAE49H9b}\n}", "github": "", "project": "", "reviewers": "m5W3;ajVU;nyjm;Z82i", "pdf_size": 12374408, "recommendation": "5;6;8;8", "confidence": "4;4;3;4", "correctness": "3;3;3;3", "technical_novelty": "4;3;4;2", "empirical_novelty": "4;3;3;4", "wc_summary_paper": "66;52;49;99", "wc_strength_and_weaknesses": "417;366;33;215", "wc_clarity_quality_novelty_and_reproducibility": "220;119;83;182", "wc_summary_review": "53;102;68;65", "wc_review": "756;639;233;561", "wc_reply_reviewers": "0;179;0;0", "wc_reply_authors": "790;792;420;764", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 66.5, 19.83053201505194 ], "wc_strength_and_weaknesses_avg": [ 257.75, 149.5148404005435 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 151.0, 53.3151010502653 ], "wc_summary_review_avg": [ 72.0, 18.207141456033124 ], "wc_review_avg": [ 547.25, 194.25289573131207 ], "wc_reply_reviewers_avg": [ 44.75, 77.50927363870726 ], "wc_reply_authors_avg": [ 691.5, 157.13926943956434 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5555555555555555, "corr_recommendation_correctness": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8209084847839902675&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=KzkLAE49H9b", "email": "smu.edu.sg;mpi-sws.org", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Singapore Management University;Max Planck Institute for Software Systems", "aff_unique_dep": ";", "aff_unique_url": "https://www.smu.edu.sg;https://www.mpi-sws.org", "aff_unique_abbr": "SMU;MPI-SWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Singapore;Germany" }, { "title": "CoRTX: Contrastive Framework for Real-time Explanation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12109", "id": "L2MUOUp0beo", "poster": "", "openreview": "https://openreview.net/forum?id=L2MUOUp0beo", "slides": "https://iclr.cc/virtual/2023/poster/12109", "video": "https://iclr.cc/virtual/2023/poster/12109", "author_site": "Yu-Neng Chuang, Guanchu Wang, Fan Yang, Quan Zhou, Pushkar Tripathi, Xuanting Cai, Xia Hu", "tldr": "Learning real-time model explainer with limited explanation labels.", "abstract": "Recent advancements in explainable machine learning provide effective and faithful solutions for interpreting model behaviors. However, many explanation methods encounter efficiency issues, which largely limit their deployments in practical scenarios. Real-time explainer (RTX) frameworks have thus been proposed to accelerate the model explanation process by learning an one-feed-forward explainer. Existing RTX frameworks typically build the explainer under the supervised learning paradigm, which requires large amounts of explanation labels as the ground truth. Considering that accurate explanation labels are usually hard to obtain, due to constrained computational resources and limited human efforts, effective explainer training is still challenging in practice. In this work, we propose a COntrastive Real-Time eXplanation (CoRTX) framework to learn the explanation-oriented representation and relieve the intensive dependence of explainer training on explanation labels. Specifically, we design a synthetic strategy to select positive and negative instances for explanation representation learning. Theoretical analysis show that our selection strategy can benefit the contrastive learning process on explanation tasks. Experimental results on three real-world datasets further demonstrate the efficiency and efficacy of our proposed CoRTX framework.", "keywords": "Interpretability;explainability;real-time explanation;feature attribution;feature importance ranking", "primary_area": "", "supplementary_material": "", "author": "Yu-Neng Chuang;Guanchu Wang;Fan Yang;Quan Zhou;Pushkar Tripathi;Xuanting Cai;Xia Hu", "authorids": "~Yu-Neng_Chuang1;~Guanchu_Wang1;~Fan_Yang27;~Quan_Zhou5;~Pushkar_Tripathi2;~Xuanting_Cai1;~Xia_Hu4", "gender": "M;M;M;M;;M;M", "homepage": ";https://guanchuwang.github.io/home;https://yangfan.sites.wfu.edu/;;;;https://cs.rice.edu/~xh37/index.html", "dblp": "207/7875;213/0985;;;;;256/9406.html", "google_scholar": ";_QL5218AAAAJ;RXFeW-8AAAAJ;;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=pcCS60IAAAAJ", "orcid": ";;0000-0003-3442-754X;;;;", "linkedin": "ync/;;;quan-zhou-quantum/;pushkar-tripathi-88391442/;xuanting-c-093b983a/;", "or_profile": "~Yu-Neng_Chuang1;~Guanchu_Wang1;~Fan_Yang27;~Quan_Zhou5;~Pushkar_Tripathi2;~Xuanting_Cai1;~Xia_Hu2", "aff": "Rice University;Rice University;Rice University;;;Meta Facebook;Rice University", "aff_domain": "rice.edu;rice.edu;rice.edu;;;facebook.com;rice.edu", "position": "PhD student;PhD student;PhD student;;;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nchuang2023cortx,\ntitle={Co{RTX}: Contrastive Framework for Real-time Explanation},\nauthor={Yu-Neng Chuang and Guanchu Wang and Fan Yang and Quan Zhou and Pushkar Tripathi and Xuanting Cai and Xia Hu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=L2MUOUp0beo}\n}", "github": "", "project": "", "reviewers": "bBUe;A6qm;MLHw;ZCbd", "pdf_size": 18115616, "recommendation": "6;6;6;8", "confidence": "4;3;3;3", "correctness": "2;3;2;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;0;1;3", "wc_summary_paper": "163;126;273;72", "wc_strength_and_weaknesses": "327;214;365;427", "wc_clarity_quality_novelty_and_reproducibility": "152;56;77;4", "wc_summary_review": "22;53;165;58", "wc_review": "664;449;880;561", "wc_reply_reviewers": "24;18;256;8", "wc_reply_authors": "1794;1040;2743;609", "reply_reviewers": "1;1;2;1", "reply_authors": "4;3;7;2", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 158.5, 73.60197008232863 ], "wc_strength_and_weaknesses_avg": [ 333.25, 77.55119276967956 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 72.25, 53.16189895028205 ], "wc_summary_review_avg": [ 74.5, 54.03933752369657 ], "wc_review_avg": [ 638.5, 158.81514411415557 ], "wc_reply_reviewers_avg": [ 76.5, 103.79185902564805 ], "wc_reply_authors_avg": [ 1546.5, 810.6042499271763 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.0, 1.8708286933869707 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15402735822845392479&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=L2MUOUp0beo", "email": "rice.edu;rice.edu;rice.edu;;;facebook.com;rice.edu", "author_num": 7, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Rice University;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.rice.edu;https://meta.com", "aff_unique_abbr": "Rice;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "L3zKVQKyV4F", "title": "Visual Timing For Sound Source Depth Estimation in the Wild", "track": "main", "status": "Reject", "tldr": "We propose a passive audiovisual depth estimation scheme based on the difference of propagation velocity between light and sound.", "abstract": "Depth estimation enables a wide variety of 3D applications, such as robotics and autonomous driving. Despite significant work on various depth sensors, it is challenging to develop an all-in-one method to meet multiple basic criteria. In this paper, we propose a novel audio-visual learning scheme by integrating semantic features with physical spatial cues to boost monocular depth with only one microphone. Inspired by the flash-to-bang theory, we develop FBDepth, the first passive audio-visual depth estimation framework. It is based on the difference between the time-of-flight (ToF) of the light and the sound. We formulate sound source depth estimation as an audio-visual event localization task for collision events. To approach decimeter-level depth accuracy, we design a coarse-to-fine pipeline to push the temporary localization accuracy from event-level to millisecond-level by aligning audio-visual correspondence and manipulating optical flow. FBDepth feeds the estimated visual timestamp together with the audio clip and object visual features to regress the source depth. We use a mobile phone to collect 3.6K+ video clips with 24 different objects at up to 60m. FBDepth shows superior performance especially at a long range compared to monocular and stereo methods.", "keywords": "Sparse Depth Estimation;Audio-Visual", "primary_area": "", "supplementary_material": "", "author": "Wei Sun;Lili Qiu", "authorids": "~Wei_Sun10;~Lili_Qiu1", "gender": "M;F", "homepage": "https://www.cs.utexas.edu/~weisun/;https://www.cs.utexas.edu/~lili/", "dblp": "09/5042.html;", "google_scholar": "odTy4-YAAAAJ;https://scholar.google.com.tw/citations?user=16posrQAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Wei_Sun10;~Lili_Qiu1", "aff": "Samsung Research America;University of Texas at Austin", "aff_domain": "samsung.com;cs.utexas.edu", "position": "Researcher;Full Professor", "bibtex": "@misc{\nsun2023visual,\ntitle={Visual Timing For Sound Source Depth Estimation in the Wild},\nauthor={Wei Sun and Lili Qiu},\nyear={2023},\nurl={https://openreview.net/forum?id=L3zKVQKyV4F}\n}", "github": "", "project": "", "reviewers": "RxUQ;SYPH;Febd;6qDM", "site": "https://openreview.net/forum?id=L3zKVQKyV4F", "pdf_size": 13549610, "recommendation": "3;6;6;6", "confidence": "3;3;3;3", "correctness": "2;4;3;3", "technical_novelty": "2;3;3;2", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "100;17;84;102", "wc_strength_and_weaknesses": "246;180;189;173", "wc_clarity_quality_novelty_and_reproducibility": "44;89;46;113", "wc_summary_review": "23;38;48;102", "wc_review": "413;324;367;490", "wc_reply_reviewers": "0;0;80;57", "wc_reply_authors": "586;798;457;985", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;3;3", "recommendation_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 75.75, 34.629286738250904 ], "wc_strength_and_weaknesses_avg": [ 197.0, 28.853076092507017 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.0, 29.266021253323792 ], "wc_summary_review_avg": [ 52.75, 29.794084983432533 ], "wc_review_avg": [ 398.5, 61.491869381244214 ], "wc_reply_reviewers_avg": [ 34.25, 35.202095108104004 ], "wc_reply_authors_avg": [ 706.5, 201.68353923907623 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4566434885114301956&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Samsung;University of Texas at Austin", "aff_unique_dep": "Samsung Research America;", "aff_unique_url": "https://www.samsung.com/us/careers/research/;https://www.utexas.edu", "aff_unique_abbr": "SRA;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "L5pRidCQlRc", "title": "The Final Ascent: When Bigger Models Generalize Worse on Noisy-Labeled Data", "track": "main", "status": "Reject", "tldr": "When noise-to-sample-size ratio is sufficiently large, increasing the width or density of the model beyond a certain point only hurts the generalization performance.", "abstract": "Increasing the size of overparameterized neural networks has been shown to improve their generalization performance. However, real-world datasets often contain a significant fraction of noisy labels, which can drastically harm the performance of the models trained on them. In this work, we study how neural networks' test loss changes with model size when the training set contains noisy labels. We show that under a sufficiently large noise-to-sample size ratio, generalization error eventually increases with model size. First, we provide a theoretical analysis on random feature regression and show that this phenomenon occurs as the variance of the generalization loss experiences a second ascent under large noise-to-sample size ratio. Then, we present extensive empirical evidence confirming that our theoretical results hold for neural networks. Furthermore, we empirically observe that the adverse effect of network size is more pronounced when robust training methods are employed to learn from noisy-labeled data. Our results have important practical implications: First, larger models should be employed with extra care, particularly when trained on smaller dataset or using robust learning methods. Second, a large sample size can alleviate the effect of noisy labels and allow larger models to achieve a superior performance even under noise. ", "keywords": "supervised learning;generalization;overfitting;memorization", "primary_area": "", "supplementary_material": "", "author": "Yihao Xue;Kyle Whitecross;Baharan Mirzasoleiman", "authorids": "~Yihao_Xue1;~Kyle_Whitecross1;~Baharan_Mirzasoleiman1", "gender": ";M;F", "homepage": ";;http://web.cs.ucla.edu/~baharan/", "dblp": "271/2194;312/6436;52/10075", "google_scholar": "vMHVm8MAAAAJ;;x63j7HEAAAAJ", "orcid": ";;", "linkedin": ";kyle-whitecross-705a24189/;", "or_profile": "~Yihao_Xue1;~Kyle_Whitecross1;~Baharan_Mirzasoleiman1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;ucla.edu;ucla.edu", "position": "PhD student;Undergrad student;Assistant Professor", "bibtex": "@misc{\nxue2023the,\ntitle={The Final Ascent: When Bigger Models Generalize Worse on Noisy-Labeled Data},\nauthor={Yihao Xue and Kyle Whitecross and Baharan Mirzasoleiman},\nyear={2023},\nurl={https://openreview.net/forum?id=L5pRidCQlRc}\n}", "github": "", "project": "", "reviewers": "fFmQ;RtYb;romc;gtPU", "site": "https://openreview.net/forum?id=L5pRidCQlRc", "pdf_size": 1375630, "recommendation": "3;5;6;8", "confidence": "3;4;5;3", "correctness": "3;4;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "61;139;99;135", "wc_strength_and_weaknesses": "72;335;115;162", "wc_clarity_quality_novelty_and_reproducibility": "89;24;17;27", "wc_summary_review": "35;139;17;33", "wc_review": "257;637;248;357", "wc_reply_reviewers": "181;0;10;0", "wc_reply_authors": "1446;692;61;483", "reply_reviewers": "1;0;1;0", "reply_authors": "3;2;1;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 108.5, 31.539657575820318 ], "wc_strength_and_weaknesses_avg": [ 171.0, 99.89244215655157 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.25, 28.951468011138918 ], "wc_summary_review_avg": [ 56.0, 48.425200051213004 ], "wc_review_avg": [ 374.75, 157.33781331898572 ], "wc_reply_reviewers_avg": [ 47.75, 77.04016809431299 ], "wc_reply_authors_avg": [ 670.5, 502.1227439580884 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.08362420100070908, "corr_recommendation_correctness": 0.5547001962252291, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4158511330811864505&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "L5yBcwO1yKH", "title": "MixMask: Revisiting Masked Siamese Self-supervised Learning in Asymmetric Distance", "track": "main", "status": "Withdraw", "tldr": "A New Masking Strategy for Masked Siamese Self-supervised Learning", "abstract": "Recent advances in self-supervised learning integrate masked modeling and Siamese Networks into a single framework to fully reap the advantages of both the two techniques. However, these approaches simply inherit the default loss design from previous siamese networks and ignore the distance change after employing masking operation in the frameworks. In this paper, we propose a filling-based masking strategy called MixMask to prevent information loss in vanilla masking method due to the randomly erased areas in an image. We further introduce a dynamic loss function design with soft distance to adapt the integrated architecture and avoid mismatches between transformed input and objective in Masked Siamese ConvNets (MSCN). The dynamic loss distance is calculated according to the mix-masking scheme. Extensive experiments are conducted on various datasets of CIFAR-100, Tiny-ImageNet and ImangeNet-1K. The results demonstrate that the proposed framework can achieve better accuracy on linear evaluation and semi-supervised learning, which outperforms the state-of-the-art MSCN by a significant margin. We also show the superiority on downstream tasks of object detection and segmentation. Our source code will be publicly available.", "keywords": "MixMask;Masked Siamese Networks;Self-supervised Learning", "primary_area": "", "supplementary_material": "", "author": "Kirill Vishniakov;Eric Xing;Zhiqiang Shen", "authorids": "~Kirill_Vishniakov1;~Eric_Xing1;~Zhiqiang_Shen1", "gender": "M;M;", "homepage": "https://kirill-vish.github.io/;http://www.cs.cmu.edu/~epxing/;", "dblp": ";36/3855;", "google_scholar": "H02tLFMAAAAJ;https://scholar.google.com.tw/citations?user=5pKTRxEAAAAJ;", "orcid": ";;", "linkedin": "https://linkedin.com/in/kirill-vishniakov-605aa0142;;", "or_profile": "~Kirill_Vishniakov1;~Eric_Xing1;~Zhiqiang_Shen1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;School of Computer Science, Carnegie Mellon University;", "aff_domain": "mbzuai.ac.ae;cs.cmu.edu;", "position": "MS student;Full Professor;", "bibtex": "@misc{\nvishniakov2023mixmask,\ntitle={MixMask: Revisiting Masked Siamese Self-supervised Learning in Asymmetric Distance},\nauthor={Kirill Vishniakov and Eric Xing and Zhiqiang Shen},\nyear={2023},\nurl={https://openreview.net/forum?id=L5yBcwO1yKH}\n}", "github": "", "project": "", "reviewers": "uHpf;ryV2;Zj5c;TTof;QtWs", "site": "https://openreview.net/forum?id=L5yBcwO1yKH", "pdf_size": 1183239, "recommendation": "3;3;5;5;6", "confidence": "3;4;4;3;3", "correctness": "2;2;3;3;3", "technical_novelty": "3;2;2;2;3", "empirical_novelty": "0;2;2;2;3", "wc_summary_paper": "151;67;25;98;42", "wc_strength_and_weaknesses": "196;335;227;239;167", "wc_clarity_quality_novelty_and_reproducibility": "264;20;53;44;26", "wc_summary_review": "58;16;26;34;39", "wc_review": "669;438;331;415;274", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 4.4, 1.2 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "correctness_avg": [ 2.6, 0.4898979485566356 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 1.8, 0.9797958971132713 ], "wc_summary_paper_avg": [ 76.6, 44.598654688230226 ], "wc_strength_and_weaknesses_avg": [ 232.8, 56.92240332241779 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 81.4, 92.07301450479396 ], "wc_summary_review_avg": [ 34.6, 14.051334456200237 ], "wc_review_avg": [ 425.4, 135.23401938861392 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.2721655269759087, "corr_recommendation_correctness": 0.9525793444156803, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8226435677880555591&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;Carnegie Mellon University", "aff_unique_dep": ";School of Computer Science", "aff_unique_url": "https://mbzuai.ac.ae;https://www.cmu.edu", "aff_unique_abbr": "MBZUAI;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;1", "aff_country_unique": "United Arab Emirates;United States" }, { "id": "L64Bs1OSNjZ", "title": "Learning DAGs from Fourier-Sparse Data", "track": "main", "status": "Withdraw", "tldr": "We leverage recent causal Fourier analysis to pose the novel problem of learning DAGs from data with sparse spectrum and propose a solution that has better performance over existing DAG learning methods.", "abstract": "We present a novel perspective on learning directed acyclic graphs (DAGs) from data, leveraging a recent novel form of causal Fourier analysis on DAGs. We build on prior work that learned DAGs from data generated by a structural equation model (SEM). First, we show that data generated by linear SEMs can be characterized in the frequency domain as having dense spectra with random coefficients. Then we propose the new problem of learning DAGs from approximately Fourier-sparse data, which we solve by minimizing the $L^1$ norm of the spectrum. We provide a motivation for this problem and compare our method to prior DAG learning methods, showing superior performance.", "keywords": "directed acyclic graph;DAG learning;causal Fourier analysis;structural equation models;additive noise;Fourier-sparse", "primary_area": "", "supplementary_material": "/attachment/262da4e121e20f35eba63b4b777350ee081c9caa.zip", "author": "Panagiotis Misiakos;Chris Wendler;Markus P\u00fcschel", "authorids": "~Panagiotis_Misiakos1;~Chris_Wendler1;~Markus_P\u00fcschel1", "gender": "M;M;M", "homepage": "https://acl.inf.ethz.ch/people/panosm/;https://wendlerc.github.io/;https://acl.inf.ethz.ch/", "dblp": "270/4194;248/7764;37/6355", "google_scholar": "PlqKbB4AAAAJ;https://scholar.google.com/citations?hl=en;az9ZryAAAAAJ", "orcid": ";;0000-0001-8834-8551", "linkedin": ";;", "or_profile": "~Panagiotis_Misiakos1;~Chris_Wendler1;~Markus_P\u00fcschel1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;Swiss Federal Institute of Technology;Department of Computer Science, ETHZ - ETH Zurich", "aff_domain": "inf.ethz.ch;ethz.ch;inf.ethz.ch", "position": "PhD student;PhD student;Full Professor", "bibtex": "@misc{\nmisiakos2023learning,\ntitle={Learning {DAG}s from Fourier-Sparse Data},\nauthor={Panagiotis Misiakos and Chris Wendler and Markus P{\\\"u}schel},\nyear={2023},\nurl={https://openreview.net/forum?id=L64Bs1OSNjZ}\n}", "github": "", "project": "", "reviewers": "cT2j;uHH8;R97q;o8xF", "site": "https://openreview.net/forum?id=L64Bs1OSNjZ", "pdf_size": 844382, "recommendation": "3;3;5;5", "confidence": "4;4;3;2", "correctness": "2;4;2;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "158;48;64;40", "wc_strength_and_weaknesses": "821;336;344;83", "wc_clarity_quality_novelty_and_reproducibility": "154;64;24;28", "wc_summary_review": "133;11;25;62", "wc_review": "1266;459;457;213", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "502;483;423;206", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 77.5, 47.27314248069405 ], "wc_strength_and_weaknesses_avg": [ 396.0, 266.87918614983823 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 67.5, 52.313956072925706 ], "wc_summary_review_avg": [ 57.75, 47.27248142418589 ], "wc_review_avg": [ 598.75, 398.0102856711118 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 403.5, 117.69558190518453 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9045340337332909, "corr_recommendation_correctness": -0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KI8cHWmk3dAJ:scholar.google.com/&scioq=Learning+DAGs+from+Fourier-Sparse+Data&hl=en&as_sdt=0,11", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "id": "L6CKiPH3hI", "title": "Enriching Online Knowledge Distillation with Specialist Ensemble", "track": "main", "status": "Reject", "tldr": "Online knowledge distillation with an ensemble of specialized teachers that are explicitly estimated for each imbalanced label prior.", "abstract": "Online Knowledge Distillation (KD) has an advantage over traditional KD works in that it removes the necessity for a pre-trained teacher. Indeed, an ensemble of small teachers has become typical guidance for a student's learning trajectory. Previous works emphasized diversity to create helpful ensemble knowledge and further argued that the size of diversity should be significant to prevent homogenization. This paper proposes a well-founded online KD framework with naturally derived specialists. In supervised learning, the parameters of a classifier are optimized by stochastic gradient descent based on a training dataset distribution. If the training dataset is shifted, the optimal point and corresponding parameters change accordingly, which is natural and explicit.\nWe first introduce a label prior shift to induce evident diversity among the same teachers, which assigns a skewed label distribution to each teacher and simultaneously specializes them through importance sampling. Compared to previous works, our specialization achieves the highest level of diversity and maintains it throughout training. Second, we propose a new aggregation that uses post-compensation in specialist outputs and conventional model averaging. The aggregation empirically exhibits the advantage of ensemble calibration even if applied to previous diversity-eliciting methods. Finally, through extensive experiments, we demonstrate the efficacy of our framework on top-1 error rate, negative log-likelihood, and notably expected calibration error.", "keywords": "Online knowledge distillation;Label prior shift;Ensemble learning", "primary_area": "", "supplementary_material": "/attachment/36a95ef6e9dd3b04257c19859c458b5f758c60ca.zip", "author": "Mincheol Park;Woojeong Kim;Junsik Bang;Won Woo Ro;Suhyun Kim", "authorids": "~Mincheol_Park1;~Woojeong_Kim1;~Junsik_Bang1;~Won_Woo_Ro1;~Suhyun_Kim1", "gender": "M;F;M;M;", "homepage": ";https://sites.google.com/view/woojeongkim/;https://github.com/junsik1998;http://escal.yonsei.ac.kr/;https://kdst.tistory.com/", "dblp": "270/1814;243/0064;;r/WonWooRo;45/6898-1", "google_scholar": "kSIW-XAAAAAJ;fGCEQQgAAAAJ;;GVfD5LQAAAAJ;", "orcid": ";;;0000-0001-5390-6445;", "linkedin": "mincheol-park-66b166186;woojeong-kim-072ab4160/;;;", "or_profile": "~Mincheol_Park1;~Woojeong_Kim1;~Junsik_Bang1;~Won_Woo_Ro1;~Suhyun_Kim1", "aff": "Korea Institute of Science and Technology;Cornell University;Konkuk University;Yonsei University;Korea Institute of Science and Technology", "aff_domain": "kist.re.kr;cornell.edu;konkuk.ac.kr;yonsei.ac.kr;kist.re.kr", "position": "Research assistant;PhD student;Undergrad student;Full Professor;Principal Researcher", "bibtex": "@misc{\npark2023enriching,\ntitle={Enriching Online Knowledge Distillation with Specialist Ensemble},\nauthor={Mincheol Park and Woojeong Kim and Junsik Bang and Won Woo Ro and Suhyun Kim},\nyear={2023},\nurl={https://openreview.net/forum?id=L6CKiPH3hI}\n}", "github": "", "project": "", "reviewers": "HD42;uYCq;Xcq2;D2AU", "site": "https://openreview.net/forum?id=L6CKiPH3hI", "pdf_size": 1187742, "recommendation": "3;3;6;6", "confidence": "4;3;3;4", "correctness": "1;3;3;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;0;3", "wc_summary_paper": "44;145;58;86", "wc_strength_and_weaknesses": "211;263;182;89", "wc_clarity_quality_novelty_and_reproducibility": "7;89;11;63", "wc_summary_review": "19;30;35;29", "wc_review": "281;527;286;267", "wc_reply_reviewers": "109;0;0;0", "wc_reply_authors": "1150;785;942;130", "reply_reviewers": "1;0;0;0", "reply_authors": "3;1;2;1", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 1.0897247358851685 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 83.25, 38.72579889427718 ], "wc_strength_and_weaknesses_avg": [ 186.25, 63.20354024894492 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.5, 34.767082132384935 ], "wc_summary_review_avg": [ 28.25, 5.80409338312195 ], "wc_review_avg": [ 340.25, 108.04484022849032 ], "wc_reply_reviewers_avg": [ 27.25, 47.198384506251905 ], "wc_reply_authors_avg": [ 751.75, 381.600822195131 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.6882472016116854, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:L4DcIIeJWgYJ:scholar.google.com/&scioq=Enriching+Online+Knowledge+Distillation+with+Specialist+Ensemble&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Korea Institute of Science and Technology;Cornell University;Konkuk University;Yonsei University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.kist.re.kr;https://www.cornell.edu;http://www.konkuk.edu;https://www.yonsei.ac.kr", "aff_unique_abbr": "KIST;Cornell;KU;Yonsei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "South Korea;United States" }, { "title": "Deja Vu: Continual Model Generalization for Unseen Domains", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12153", "id": "L8iZdgeKmI6", "poster": "/media/PosterPDFs/ICLR%202023/12153.png?t=1681052219.554597", "openreview": "https://openreview.net/forum?id=L8iZdgeKmI6", "slides": "https://iclr.cc/virtual/2023/poster/12153", "video": "https://iclr.cc/virtual/2023/poster/12153", "author_site": "Chenxi Liu, Lixu Wang, Lingjuan Lyu, Chen Sun, Xiao Wang, Qi Zhu", "tldr": "To tackle continual domain shift in real-world applications, this work proposes a novel framework for achieving target domain generalization, target domain adaptation, and forgetting compensation at the same time.", "abstract": "In real-world applications, deep learning models often run in non-stationary environments where the target data distribution continually shifts over time. There have been numerous domain adaptation (DA) methods in both online and offline modes to improve cross-domain adaptation ability. However, these DA methods typically only provide good performance after a long period of adaptation, and perform poorly on new domains before and during adaptation \u2013 in what we call the \u201cUnfamiliar Period\u201d, especially when domain shifts happen suddenly and significantly. On the other hand, domain generalization (DG) methods have been proposed to improve the model generalization ability on unadapted domains. However, existing DG works are ineffective for continually changing domains due to severe catastrophic forgetting of learned knowledge. To overcome these limitations of DA and DG in handling the Unfamiliar Period during continual domain shift, we propose RaTP, a framework that focuses on improving models\u2019 target domain generalization (TDG) capability, while also achieving effective target domain adaptation (TDA) capability right after training on certain domains and forgetting alleviation (FA) capability on past domains. RaTP includes a training-free data augmentation module to prepare data for TDG, a novel pseudo-labeling mechanism to provide reliable supervision for TDA, and a prototype contrastive alignment algorithm to align different domains for achieving TDG, TDA and FA. Extensive experiments on Digits, PACS, and DomainNet demonstrate that RaTP significantly outperforms state-of-the-art works from Continual DA, Source-Free DA, Test-Time/Online DA, Single DG, Multiple DG and Unified DA&DG in TDG, and achieves comparable TDA and FA capabilities.", "keywords": "Domain Generalization;Domain Adaptation", "primary_area": "", "supplementary_material": "/attachment/2c60cb61a7f70fb4cec5e6ac7aded6b60035b0f6.zip", "author": "Chenxi Liu;Lixu Wang;Lingjuan Lyu;Chen Sun;Xiao Wang;Qi Zhu", "authorids": "~Chenxi_Liu2;~Lixu_Wang1;~Lingjuan_Lyu1;chen.sun@sony.com;~Xiao_Wang11;~Qi_Zhu2", "gender": "M;;F;;M;", "homepage": "https://dawnliu35.github.io/;;https://sites.google.com/view/lingjuan-lyu;;https://wangxiao1254.github.io/;http://zhulab.ece.northwestern.edu/", "dblp": ";;178/9876;;150/9413;66/5923-2.html", "google_scholar": ";;;;QbWLR8QAAAAJ;TN09YMcAAAAJ", "orcid": ";;;;;", "linkedin": "chenxi-liu-8b9719211/;;;;;", "or_profile": "~Chenxi_Liu2;~Lixu_Wang1;~Lingjuan_Lyu1;chen.sun@sony.com;~Xiao_Wang11;~Qi_Zhu2", "aff": "Northwestern University;;Sony;;Northwestern University;Northwestern University", "aff_domain": "northwestern.edu;;sony.com;;northwestern.edu;northwestern.edu", "position": "MS student;;scientist;;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nliu2023deja,\ntitle={Deja Vu: Continual Model Generalization for Unseen Domains},\nauthor={Chenxi Liu and Lixu Wang and Lingjuan Lyu and Chen Sun and Xiao Wang and Qi Zhu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=L8iZdgeKmI6}\n}", "github": "", "project": "", "reviewers": "sJyy;jYsN;CHgo;Nus3", "pdf_size": 866042, "recommendation": "5;5;8;8", "confidence": "3;4;5;3", "correctness": "2;3;4;4", "technical_novelty": "2;2;4;3", "empirical_novelty": "0;3;4;3", "wc_summary_paper": "99;269;169;71", "wc_strength_and_weaknesses": "142;510;345;255", "wc_clarity_quality_novelty_and_reproducibility": "12;456;37;30", "wc_summary_review": "29;365;23;19", "wc_review": "282;1600;574;375", "wc_reply_reviewers": "0;932;11;13", "wc_reply_authors": "884;5644;867;635", "reply_reviewers": "0;3;1;1", "reply_authors": "4;11;3;2", "recommendation_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 1.5 ], "wc_summary_paper_avg": [ 152.0, 76.40026177965623 ], "wc_strength_and_weaknesses_avg": [ 313.0, 134.57154231114393 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 133.75, 186.27449503353915 ], "wc_summary_review_avg": [ 109.0, 147.844512918133 ], "wc_review_avg": [ 707.75, 525.829047790249 ], "wc_reply_reviewers_avg": [ 239.0, 400.13435243677844 ], "wc_reply_authors_avg": [ 2007.5, 2101.8373509860367 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 5.0, 3.5355339059327378 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.3015113445777637, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=229945822446457518&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=L8iZdgeKmI6", "email": "northwestern.edu;;sony.com;;northwestern.edu;northwestern.edu", "author_num": 6, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Northwestern University;Sony Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.northwestern.edu;https://www.sony.com", "aff_unique_abbr": "NU;Sony", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Japan" }, { "id": "L8qKBr_bht", "title": "Transformers with Multiresolution Attention Heads", "track": "main", "status": "Reject", "tldr": "We propose the Transformer with Multiresolution-head Attention (MrsFormer), a class of efficient transformers inspired by the multiresolution approximation (MRA) for approximating a signal f using wavelet bases", "abstract": "We propose the Transformer with Multiresolution-head Attention (MrsFormer), a class of efficient transformers inspired by the multiresolution approximation (MRA) for approximating a signal f using wavelet bases. MRA decomposes a signal into components that lie on orthogonal subspaces at different scales. Similarly, MrsFormer decomposes the attention heads in the multi-head attention into fine-scale and coarse-scale heads, modeling the attention patterns between tokens and between groups of tokens. Computing the attention heads in MrsFormer requires significantly less computation and memory footprint compared to the standard softmax transformer with multi-head attention. We analyze and validate the advantage of MrsFormer over the standard transformers on a wide range of applications including image and time series classification.", "keywords": "transformer;multiresolution analysis;attention heads", "primary_area": "", "supplementary_material": "/attachment/9589252511b6c315f85b61cbe2a9926be38b6408.zip", "author": "Tan Minh Nguyen;Tho Tran Huu;Tam Minh Nguyen;Minh Pham;Nhat Ho;Stanley Osher", "authorids": "~Tan_Minh_Nguyen1;~Tho_Tran_Huu1;~Tam_Minh_Nguyen1;~Minh_Pham1;~Nhat_Ho1;~Stanley_Osher1", "gender": "M;M;F;M;M;M", "homepage": "https://tanmnguyen89.github.io/;;;;https://nhatptnk8912.github.io/;https://www.math.ucla.edu/~sjo/", "dblp": "255/4725;337/2038;251/1464;34/3955;203/4479;", "google_scholar": "OizOh88AAAAJ;fG3mIYEAAAAJ;;;https://scholar.google.ca/citations?user=Xs7cKMwAAAAJ;", "orcid": ";;;;;", "linkedin": ";;tam-nguyen-6a3935132/;;nhat-pham-minh-ho-267b8164/;", "or_profile": "~Tan_Minh_Nguyen1;~Tho_Tran_Huu1;~Tam_Minh_Nguyen1;~Minh_Pham1;~Nhat_Ho1;~Stanley_Osher1", "aff": "University of California, Los Angeles;;FPT Software;University of California, Los Angeles;University of Texas, Austin;University of California, Los Angeles", "aff_domain": "ucla.edu;;fsoft.com.vn;ucla.edu;utexas.edu;ucla.edu", "position": "Postdoc;;FPT AI Residency;Postdoc;Assistant Professor;Full Professor", "bibtex": "@misc{\nnguyen2023transformers,\ntitle={Transformers with Multiresolution Attention Heads},\nauthor={Tan Minh Nguyen and Tho Tran Huu and Tam Minh Nguyen and Minh Pham and Nhat Ho and Stanley Osher},\nyear={2023},\nurl={https://openreview.net/forum?id=L8qKBr_bht}\n}", "github": "", "project": "", "reviewers": "sjo4;fFTA;VX4u", "site": "https://openreview.net/forum?id=L8qKBr_bht", "pdf_size": 2573494, "recommendation": "3;3;6", "confidence": "4;3;4", "correctness": "4;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "74;26;52", "wc_strength_and_weaknesses": "38;43;53", "wc_clarity_quality_novelty_and_reproducibility": "353;22;32", "wc_summary_review": "21;97;54", "wc_review": "486;188;191", "wc_reply_reviewers": "185;0;0", "wc_reply_authors": "1875;1398;846", "reply_reviewers": "1;0;0", "reply_authors": "5;4;3", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 50.666666666666664, 19.61858529274955 ], "wc_strength_and_weaknesses_avg": [ 44.666666666666664, 6.236095644623236 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 135.66666666666666, 153.7320901944534 ], "wc_summary_review_avg": [ 57.333333333333336, 31.116269413639905 ], "wc_review_avg": [ 288.3333333333333, 139.77680621460613 ], "wc_reply_reviewers_avg": [ 61.666666666666664, 87.20983634634086 ], "wc_reply_authors_avg": [ 1373.0, 420.4592727006981 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 4.0, 0.816496580927726 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AOrrJaFft20J:scholar.google.com/&scioq=Transformers+with+Multiresolution+Attention+Heads&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "University of California, Los Angeles;FPT Corporation;University of Texas at Austin", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucla.edu;https://www.fpt-software.com;https://www.utexas.edu", "aff_unique_abbr": "UCLA;FPT;UT Austin", "aff_campus_unique_index": "0;0;2;0", "aff_campus_unique": "Los Angeles;;Austin", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Vietnam" }, { "id": "L97ftsVhiUi", "title": "CAKE: CAusal and collaborative proxy-tasKs lEarning for Semi-Supervised Domain Adaptation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Semi-supervised domain adaptation (SSDA) adapts a learner to a new domain by effectively utilizing source domain data and a few labeled target samples. It is a practical yet under-investigated research topic. In this paper, we analyze the SSDA problem from two perspectives that have previously been overlooked, and correspondingly decompose it into two \\emph{key subproblems}: \\emph{robust domain adaptation (DA) learning} and \\emph{maximal cross-domain data utilization}. \\textbf{(i)} From a causal theoretical view, a robust DA model should distinguish the invariant ``concept'' (key clue to image label) from the nuisance of confounding factors across domains. To achieve this goal, we propose to generate \\emph{concept-invariant samples} to enable the model to classify the samples through causal intervention, yielding improved generalization guarantees; \\textbf{(ii)} Based on the robust DA theory, we aim to exploit the maximal utilization of rich source domain data and a few labeled target samples to boost SSDA further. Consequently, we propose a collaboratively debiasing learning framework that utilizes two complementary semi-supervised learning (SSL) classifiers to mutually exchange their unbiased knowledge, which helps unleash the potential of source and target domain training data, thereby producing more convincing pseudo-labels. Such obtained labels facilitate cross-domain feature alignment and duly improve the invariant concept learning. In our experimental study, we show that the proposed model significantly outperforms SOTA methods in terms of effectiveness and generalisability on SSDA datasets.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Wenqiao Zhang;CHANGSHUO LIU;Can Cui;Beng Chin Ooi", "authorids": "~Wenqiao_Zhang1;~CHANGSHUO_LIU1;cuican@comp.nus.edu.sg;~Beng_Chin_Ooi1", "gender": "M;M;;M", "homepage": ";;;http://www.comp.nus.edu.sg/~ooibc/", "dblp": "250/4486.html;;;o/BengChinOoi", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;https://scholar.google.com.tw/citations?user=9560QjYAAAAJ", "orcid": "0000-0002-5988-7609;;;0000-0003-4446-1100", "linkedin": ";changshuo-liu-9413711b3/;;beng-chin-ooi-34b0634/", "or_profile": "~Wenqiao_Zhang1;~CHANGSHUO_LIU1;cuican@comp.nus.edu.sg;~Beng_Chin_Ooi1", "aff": "National University of Singapore;National University of Singapore;;National University of Singapore", "aff_domain": "nus.edu.sg;u.nus.edu;;comp.nus.edu.sg", "position": "Postdoc;PhD student;;Full Professor", "bibtex": "@misc{\nzhang2023cake,\ntitle={{CAKE}: {CA}usal and collaborative proxy-tasKs lEarning for Semi-Supervised Domain Adaptation},\nauthor={Wenqiao Zhang and CHANGSHUO LIU and Can Cui and Beng Chin Ooi},\nyear={2023},\nurl={https://openreview.net/forum?id=L97ftsVhiUi}\n}", "github": "", "project": "", "reviewers": "sxkD;HFEB;dEun", "site": "https://openreview.net/forum?id=L97ftsVhiUi", "pdf_size": 3738616, "recommendation": "3;5;6", "confidence": "5;4;4", "correctness": "2;2;3", "technical_novelty": "2;3;3", "empirical_novelty": "1;3;3", "wc_summary_paper": "53;43;43", "wc_strength_and_weaknesses": "594;354;60", "wc_clarity_quality_novelty_and_reproducibility": "19;21;19", "wc_summary_review": "34;6;34", "wc_review": "700;424;156", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_paper_avg": [ 46.333333333333336, 4.714045207910317 ], "wc_strength_and_weaknesses_avg": [ 336.0, 218.3758228376026 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 19.666666666666668, 0.9428090415820634 ], "wc_summary_review_avg": [ 24.666666666666668, 13.199326582148888 ], "wc_review_avg": [ 426.6666666666667, 222.09507473652408 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.944911182523068, "corr_recommendation_correctness": 0.7559289460184545, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2046347455647738493&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "id": "L9RXJBTQaDf", "title": "A new photoreceptor-inspired CNN layer enables deep learning models of retina to generalize across lighting conditions", "track": "main", "status": "Reject", "tldr": "A new bio-inspired deep learning model that enables generalization in dynamic lighting conditions", "abstract": "As we move our eyes, and as lighting changes in our environment, the light intensity reaching our retinas changes dramatically and on multiple timescales. Despite these changing conditions, our retinas effortlessly extract visual information that allows downstream brain areas to make sense of the visual world. Such processing capabilities are desirable in many settings, including computer vision systems that operate in dynamic lighting environments like in self-driving cars, and in algorithms that translate visual inputs into neural signals for use in vision-restoring prosthetics. To mimic retinal processing, we first require models that can predict retinal ganglion cell (RGC) responses reliably. While existing state-of-the-art deep learning models can accurately predict RGC responses to visual scenes under steady-state lighting conditions, these models fail under dynamic lighting conditions. This is because changes in lighting markedly alter RGC responses: adaptation mechanisms dynamically tune RGC receptive fields on multiple timescales. Because current deep learning models of the retina have no in-built notion of light level or these adaptive mechanisms, they are unable to accurately predict RGC responses under lighting conditions that they were not trained on. We present here a new deep learning model of the retina that can predict RGC responses to visual scenes at different light levels without requiring training at each light level. Our model combines a fully trainable biophysical front end capturing the fast and slow adaptation mechanisms in the photoreceptors with convolutional neural networks (CNNs) capturing downstream retinal processing. We tested our model\u2019s generalization performance across light levels using monkey and rat retinal data. Whereas conventional CNN models without the photoreceptor layer failed to predict RGC responses when the lighting conditions changed, our model with the photoreceptor layer as a front end fared much better in this challenge. Overall, our work demonstrates a new hybrid approach that equips deep learning models with biological vision mechanisms enabling them to adapt to dynamic environments.", "keywords": "retina model;photoreceptor model;bio-inspired artificial vision;retina predictor;dynamic environments", "primary_area": "", "supplementary_material": "", "author": "Saad Idrees;Greg D Field;Frederick Rieke;Joel Zylberberg", "authorids": "~Saad_Idrees1;~Greg_D_Field1;~Frederick_Rieke1;joelzy@yorku.ca", "gender": "M;M;M;", "homepage": ";https://www.neuro.duke.edu/research/faculty-labs/field-lab;https://depts.washington.edu/riekelab/;", "dblp": ";;;", "google_scholar": "3HNPrOsAAAAJ;DGTvaW0AAAAJ;;", "orcid": "0000-0002-8514-1237;;;", "linkedin": "saad-idrees-7a023562/;;;", "or_profile": "~Saad_Idrees1;~Greg_D_Field1;~Frederick_Rieke1;joelzy@yorku.ca", "aff": "York University;;University of Washington;", "aff_domain": "yorku.ca;;u.washington.edu;", "position": "Postdoc;;Researcher;", "bibtex": "@misc{\nidrees2023a,\ntitle={A new photoreceptor-inspired {CNN} layer enables deep learning models of retina to generalize across lighting conditions},\nauthor={Saad Idrees and Greg D Field and Frederick Rieke and Joel Zylberberg},\nyear={2023},\nurl={https://openreview.net/forum?id=L9RXJBTQaDf}\n}", "github": "", "project": "", "reviewers": "Rcws;TU1Q;b3Ty", "site": "https://openreview.net/forum?id=L9RXJBTQaDf", "pdf_size": 1851526, "recommendation": "3;6;6", "confidence": "3;4;3", "correctness": "2;4;3", "technical_novelty": "3;3;3", "empirical_novelty": "2;4;3", "wc_summary_paper": "105;38;91", "wc_strength_and_weaknesses": "484;64;297", "wc_clarity_quality_novelty_and_reproducibility": "34;2;272", "wc_summary_review": "77;72;177", "wc_review": "700;176;837", "wc_reply_reviewers": "211;0;0", "wc_reply_authors": "1416;265;1602", "reply_reviewers": "2;0;0", "reply_authors": "2;1;3", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 78.0, 28.855964143772194 ], "wc_strength_and_weaknesses_avg": [ 281.6666666666667, 171.80673897014506 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 102.66666666666667, 120.44731443895108 ], "wc_summary_review_avg": [ 108.66666666666667, 48.36206042848969 ], "wc_review_avg": [ 571.0, 284.85200835989673 ], "wc_reply_reviewers_avg": [ 70.33333333333333, 99.46635388690768 ], "wc_reply_authors_avg": [ 1094.3333333333333, 591.3229987822974 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.8660254037844387, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:n-nOdnwzyWIJ:scholar.google.com/&scioq=A+new+photoreceptor-inspired+CNN+layer+enables+deep+learning+models+of+retina+to+generalize+across+lighting+conditions&hl=en&as_sdt=0,11", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "York University;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.yorku.ca;https://www.washington.edu", "aff_unique_abbr": "York U;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;United States" }, { "id": "L9pW5fknjO", "title": "Resource Efficient Self-Supervised Learning for Speech Recognition", "track": "main", "status": "Reject", "tldr": "", "abstract": "Representation learning from sequential data using self-supervised learning (SSL) has proven to be a powerful technique and improved state-of-the-art (SOTA) results when fine tuned for various downstream tasks, including Automatic Speech Recognition (ASR). So far the success of SSL frameworks, e.g., Wav2Vec-2.0, for sequence-to-sequence (seq2seq) modeling is primarily carried out by masking intermediate features and then solving a contrastive task in an end-to-end manner. Although very successful, the overall training time (for example, days or weeks) and demanding resource requirements for achieving SOTA performance remain a significant barrier to further improving ASR solutions using such approaches. In this work we show that non-contrastive learning, such as an extension of the Barlow\u2013Twins methodology, when applied to seq2seq SSL modeling improves convergence, while reducing training time. Our results show that Wav2Vec-2.0 architecture pre-training with a non-contrastive SSL approach reduces the GPU training hours by 2.3 times, compared to masking based SSL approaches, while achieving a significant improvement (i.e., up to 6% relative WER decrease) in the model performance for the ASR task. We further demonstrate that a combination of both masking based SSL and non-contrastive SSL improves the ASR performance, e.g., up to 12% relative WER decrease, for all splits of LibriSpeech evaluation dataset.\n", "keywords": "SSL;ASR", "primary_area": "", "supplementary_material": "", "author": "Abhinav Mehrotra;Alberto Gil Couto Pimentel Ramos;Nicholas Donald Lane;Sourav Bhattacharya", "authorids": "~Abhinav_Mehrotra1;~Alberto_Gil_Couto_Pimentel_Ramos1;~Nicholas_Donald_Lane1;~Sourav_Bhattacharya1", "gender": "M;M;M;M", "homepage": "https://abhinavmehrotra.github.io/;;;http://niclane.org", "dblp": "154/4273;;69/3637;03/2663.html", "google_scholar": "https://scholar.google.co.uk/citations?user=AbeyFKwAAAAJ;;EU-ESvsAAAAJ;https://scholar.google.co.uk/citations?hl=en", "orcid": ";;;0000-0002-2728-8273", "linkedin": ";albertogilramos;;niclane", "or_profile": "~Abhinav_Mehrotra1;~Alberto_Gil_Couto_Pimentel_Ramos1;~Sourav_Bhattacharya1;~Nic_Lane2", "aff": "Samsung AI Center;Samsung;Samsung AI Center;Samsung", "aff_domain": "samsung.com;samsung.com;samsung.com;samsung.com", "position": "Researcher;Researcher;Principal Researcher;Laboratory Director", "bibtex": "@misc{\nmehrotra2023resource,\ntitle={Resource Efficient Self-Supervised Learning for Speech Recognition},\nauthor={Abhinav Mehrotra and Alberto Gil Couto Pimentel Ramos and Nicholas Donald Lane and Sourav Bhattacharya},\nyear={2023},\nurl={https://openreview.net/forum?id=L9pW5fknjO}\n}", "github": "", "project": "", "reviewers": "5kcU;5Dwq;hCQK;t7qA", "site": "https://openreview.net/forum?id=L9pW5fknjO", "pdf_size": 244312, "recommendation": "3;5;5;6", "confidence": "4;4;5;4", "correctness": "2;3;2;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "161;71;186;101", "wc_strength_and_weaknesses": "618;416;532;286", "wc_clarity_quality_novelty_and_reproducibility": "27;22;1005;128", "wc_summary_review": "49;58;166;24", "wc_review": "855;567;1889;539", "wc_reply_reviewers": "245;179;758;0", "wc_reply_authors": "2025;1827;3520;1393", "reply_reviewers": "1;1;3;0", "reply_authors": "4;4;7;3", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 129.75, 45.87687325875642 ], "wc_strength_and_weaknesses_avg": [ 463.0, 124.82387592123551 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 295.5, 411.80729716701234 ], "wc_summary_review_avg": [ 74.25, 54.41679428264771 ], "wc_review_avg": [ 962.5, 549.0289154498149 ], "wc_reply_reviewers_avg": [ 295.5, 281.6686883556637 ], "wc_reply_authors_avg": [ 2191.25, 800.4837209462788 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 4.5, 1.5 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.13245323570650439, "corr_recommendation_correctness": 0.6882472016116854, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14783914207395859219&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Samsung", "aff_unique_dep": "AI Center", "aff_unique_url": "https://www.samsung.com/global/careers/ai-center/", "aff_unique_abbr": "Samsung AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "LB6KMRUqng2", "title": "Transferability Between Regression Tasks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We consider the problem of estimating how well deep neural network regression models would transfer from source to target tasks. We focus on regression tasks, which received little previous attention, and develop novel transferability estimation methods that are simple, computationally efficient, yet effective and theoretically grounded. We propose two families of transferability estimators, both of which utilize the mean squared error of a regularized linear regression model to estimate the transferability. We prove novel theoretical bounds connecting our methods with the expected risk of the optimal target models obtained from the actual transfer learning process. We test our methods extensively in various challenging, practical scenarios and show they significantly outperform existing state-of-the-art regression task transferability estimators, in both accuracy and efficiency.", "keywords": "Transferability estimation;Transfer learning", "primary_area": "", "supplementary_material": "", "author": "Cuong Ngoc Nguyen;Phong Tran The;Lam Si Tung Ho;Vu C. Dinh;Anh Tuan Tran;Tal Hassner;Cuong V Nguyen", "authorids": "~Cuong_Ngoc_Nguyen1;~Phong_Tran_The1;~Lam_Si_Tung_Ho1;~Vu_C._Dinh1;~Anh_Tuan_Tran2;~Tal_Hassner2;~Cuong_V_Nguyen1", "gender": "M;M;M;M;M;M;M", "homepage": "https://cuongnn218.github.io/;https://p0lyfish.github.io/;https://sites.google.com/site/lamho86;http://vucdinh.github.io;https://sites.google.com/site/anhttranusc/;https://talhassner.github.io/home/;https://nvcuong.github.io/", "dblp": "329/5158;;;125/5383;150/5269-1;62/6;36/9125", "google_scholar": "MntXp5MAAAAJ;-BPaFHcAAAAJ;https://scholar.google.com.vn/citations?hl=en;n5niSOoAAAAJ;FYZ5ODQAAAAJ;ehe5pyIAAAAJ;CG9yOXoAAAAJ", "orcid": ";;;;0000-0002-3120-4036;0000-0003-2275-1406;", "linkedin": "cuongnn218/;phong-t-28b077114/;;;https://linkedin.com/in/anh-tran-97814b19;talhassner/;cuong-nguyen-0b582736", "or_profile": "~Cuong_Ngoc_Nguyen1;~Phong_Tran_The1;~Lam_Si_Tung_Ho1;~Vu_C._Dinh1;~Anh_Tuan_Tran2;~Tal_Hassner2;~Cuong_V_Nguyen1", "aff": "Florida International University;Mohamed bin Zayed University of Artificial Intelligence;Dalhousie University;University of Delaware;VinAI Research;Meta inc.;Florida International University", "aff_domain": "fiu.edu;mbzuai.ac.ae;dal.ca;udel.edu;vinai.io;meta.com;fiu.edu", "position": "MS student;PhD student;Associate Professor;Assistant Professor;Research Scientist;Researcher & Research Manager;Assistant Professor", "bibtex": "@misc{\nnguyen2023transferability,\ntitle={Transferability Between Regression Tasks},\nauthor={Cuong Ngoc Nguyen and Phong Tran The and Lam Si Tung Ho and Vu C. Dinh and Anh Tuan Tran and Tal Hassner and Cuong V Nguyen},\nyear={2023},\nurl={https://openreview.net/forum?id=LB6KMRUqng2}\n}", "github": "", "project": "", "reviewers": "zwfU;jFNa;gvjb;RXhe;bgL4", "site": "https://openreview.net/forum?id=LB6KMRUqng2", "pdf_size": 1573331, "recommendation": "3;3;3;5;5", "confidence": "4;4;4;5;3", "correctness": "3;3;2;3;3", "technical_novelty": "2;1;2;3;3", "empirical_novelty": "2;1;2;3;2", "wc_summary_paper": "86;107;61;53;22", "wc_strength_and_weaknesses": "698;290;156;210;288", "wc_clarity_quality_novelty_and_reproducibility": "71;19;6;42;15", "wc_summary_review": "21;35;17;42;35", "wc_review": "876;451;240;347;360", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 3.8, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "correctness_avg": [ 2.8, 0.39999999999999997 ], "technical_novelty_avg": [ 2.2, 0.7483314773547882 ], "empirical_novelty_avg": [ 2.0, 0.6324555320336759 ], "wc_summary_paper_avg": [ 65.8, 29.019993108200424 ], "wc_strength_and_weaknesses_avg": [ 328.4, 191.54696552020863 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.6, 23.431602591372194 ], "wc_summary_review_avg": [ 30.0, 9.423375191511797 ], "wc_review_avg": [ 454.8, 220.98452434503193 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.408248290463863, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4934605411050849411&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;3;4;5;0", "aff_unique_norm": "Florida International University;Mohamed bin Zayed University of Artificial Intelligence;Dalhousie University;University of Delaware;VinAI Research;Meta", "aff_unique_dep": ";;;;;Meta Platforms, Inc.", "aff_unique_url": "https://www.fiu.edu;https://mbzuai.ac.ae;https://www.dal.ca;https://www.udel.edu;https://www.vinai.io/;https://www.meta.com", "aff_unique_abbr": "FIU;MBZUAI;Dal;UD;VinAI;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;3;0;0", "aff_country_unique": "United States;United Arab Emirates;Canada;Vietnam" }, { "title": "Disentangling the Mechanisms Behind Implicit Regularization in SGD", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11378", "id": "LE5LxBgjB4V", "poster": "/media/PosterPDFs/ICLR%202023/11378.png?t=1681684172.2655332", "openreview": "https://openreview.net/forum?id=LE5LxBgjB4V", "slides": "https://iclr.cc/virtual/2023/poster/11378", "video": "https://iclr.cc/virtual/2023/poster/11378", "author_site": "Zachary Novack, Simran Kaur, Tanya Marwah, Saurabh Garg, Zachary Lipton", "tldr": "", "abstract": "A number of competing hypotheses have been proposed to explain why small-batch Stochastic Gradient Descent (SGD) leads to improved generalization over the full-batch regime, with recent work crediting the implicit regularization of various quantities throughout training. However, to date, empirical evidence assessing the explanatory power of these hypotheses is lacking. In this paper, we conduct an extensive empirical evaluation, focusing on the ability of various theorized mechanisms to close the small-to-large batch generalization gap. Additionally, we characterize how the quantities that SGD has been claimed to (implicitly) regularize change over the course of training. By using micro-batches, i.e. disjoint smaller subsets of each mini-batch, we empirically show that explicitly penalizing the gradient norm or the Fisher Information Matrix trace, averaged over micro-batches, in the large-batch regime recovers small-batch SGD generalization, whereas Jacobian-based regularizations fail to do so. This generalization performance is shown to often be correlated with how well the regularized model\u2019s gradient norms resemble those of small-batch SGD. We additionally show that this behavior breaks down as the micro-batch size approaches the batch size. Finally, we note that in this line of inquiry, positive experimental findings on CIFAR10 are often reversed on other datasets like CIFAR100, highlighting the need to test hypotheses on a wider collection of datasets.", "keywords": "deep learning;generalization;implicit regularization;sgd", "primary_area": "", "supplementary_material": "", "author": "Zachary Novack;Simran Kaur;Tanya Marwah;Saurabh Garg;Zachary Chase Lipton", "authorids": "~Zachary_Novack1;~Simran_Kaur1;~Tanya_Marwah1;~Saurabh_Garg3;~Zachary_Chase_Lipton1", "gender": "M;F;F;M;Unspecified", "homepage": "https://zacharynovack.github.io/;;https://tm157.github.io/;http://saurabhgarg1996.github.io/;http://zacklipton.com", "dblp": "334/7662;211/3465;190/7486;80/208;", "google_scholar": "fZKJdb0AAAAJ;AMHNjTIAAAAJ;_Y_XvN4AAAAJ;SAnJ1hIAAAAJ;MN9Kfg8AAAAJ", "orcid": ";;;;", "linkedin": "zachary-novack/;;;saurabh-garg-b680b5b8/;", "or_profile": "~Zachary_Novack1;~Simran_Kaur1;~Tanya_Marwah1;~Saurabh_Garg3;~Zachary_Chase_Lipton1", "aff": "University of California, San Diego;Princeton University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "ucsd.edu;princeton.edu;cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nnovack2023disentangling,\ntitle={Disentangling the Mechanisms Behind Implicit Regularization in {SGD}},\nauthor={Zachary Novack and Simran Kaur and Tanya Marwah and Saurabh Garg and Zachary Chase Lipton},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=LE5LxBgjB4V}\n}", "github": "", "project": "", "reviewers": "rQEx;i18E;fWma;QhHW", "pdf_size": 1399570, "recommendation": "5;6;6;6", "confidence": "3;3;4;4", "correctness": "3;4;4;3", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;0;3;3", "wc_summary_paper": "51;108;287;116", "wc_strength_and_weaknesses": "207;215;279;167", "wc_clarity_quality_novelty_and_reproducibility": "69;24;30;110", "wc_summary_review": "39;7;54;69", "wc_review": "366;354;650;462", "wc_reply_reviewers": "8;0;0;217", "wc_reply_authors": "968;318;924;587", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;2;1", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 140.5, 88.21706184180019 ], "wc_strength_and_weaknesses_avg": [ 217.0, 40.149719799769464 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 58.25, 34.513584282134474 ], "wc_summary_review_avg": [ 42.25, 22.949673200287624 ], "wc_review_avg": [ 458.0, 118.490505948789 ], "wc_reply_reviewers_avg": [ 56.25, 92.86650364905529 ], "wc_reply_authors_avg": [ 699.25, 264.90127878136036 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13753621829567743236&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=LE5LxBgjB4V", "email": "ucsd.edu;princeton.edu;cmu.edu;cmu.edu;cmu.edu", "author_num": 5, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "University of California, San Diego;Princeton University;Carnegie Mellon University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucsd.edu;https://www.princeton.edu;https://www.cmu.edu", "aff_unique_abbr": "UCSD;Princeton;CMU", "aff_campus_unique_index": "0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Conditional Antibody Design as 3D Equivariant Graph Translation", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11821", "id": "LFHFQbjxIiP", "poster": "/media/PosterPDFs/ICLR%202023/11821.png?t=1681548076.867594", "openreview": "https://openreview.net/forum?id=LFHFQbjxIiP", "slides": "https://iclr.cc/virtual/2023/poster/11821", "video": "https://iclr.cc/virtual/2023/poster/11821", "author_site": "Xiangzhe Kong, Wenbing Huang, Yang Liu", "tldr": "", "abstract": "Antibody design is valuable for therapeutic usage and biological research. Existing deep-learning-based methods encounter several key issues: 1) incomplete context for Complementarity-Determining Regions (CDRs) generation; 2) incapability of capturing the entire 3D geometry of the input structure; 3) inefficient prediction of the CDR sequences in an autoregressive manner. In this paper, we propose Multi-channel Equivariant Attention Network (MEAN) to co-design 1D sequences and 3D structures of CDRs. To be specific, MEAN formulates antibody design as a conditional graph translation problem by importing extra components including the target antigen and the light chain of the antibody. Then, MEAN resorts to E(3)-equivariant message passing along with a proposed attention mechanism to better capture the geometrical correlation between different components. Finally, it outputs both the 1D sequences and 3D structure via a multi-round progressive full-shot scheme, which enjoys more efficiency and precision against previous autoregressive approaches. Our method significantly surpasses state-of-the-art models in sequence and structure modeling, antigen-binding CDR design, and binding affinity optimization. Specifically, the relative improvement to baselines is about 23\\% in antigen-binding CDR design and 34\\% for affinity optimization.", "keywords": "conditional antibody generation;equivariant;multi-channel attention", "primary_area": "", "supplementary_material": "", "author": "Xiangzhe Kong;Wenbing Huang;Yang Liu", "authorids": "~Xiangzhe_Kong1;~Wenbing_Huang1;~Yang_Liu19", "gender": "M;M;M", "homepage": "https://kxz18.github.io/;https://gsai.ruc.edu.cn/english/wenbing_huang;http://nlp.csai.tsinghua.edu.cn/~ly/", "dblp": "293/7526;155/3181-1.html;51/3710-5", "google_scholar": "0oSFYmkAAAAJ;0yNkmO4AAAAJ;https://scholar.google.com.hk/citations?user=lVhoKNcAAAAJ", "orcid": ";;0000-0002-3087-242X", "linkedin": ";;", "or_profile": "~Xiangzhe_Kong1;~Wenbing_Huang1;~Yang_Liu19", "aff": "Tsinghua University;Renmin University of China;Tsinghua University", "aff_domain": "tsinghua.edu.cn;ruc.edu.cn;tsinghua.edu.cn", "position": "PhD student;Associate Professor;Professor", "bibtex": "@inproceedings{\nkong2023conditional,\ntitle={Conditional Antibody Design as 3D Equivariant Graph Translation},\nauthor={Xiangzhe Kong and Wenbing Huang and Yang Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=LFHFQbjxIiP}\n}", "github": "", "project": "", "reviewers": "tzFw;n2oD;JzYy;HYV9", "pdf_size": 15552203, "recommendation": "8;8;8;8", "confidence": "4;4;3;3", "correctness": "4;3;4;4", "technical_novelty": "3;4;4;3", "empirical_novelty": "4;3;4;3", "wc_summary_paper": "64;396;67;117", "wc_strength_and_weaknesses": "378;630;169;56", "wc_clarity_quality_novelty_and_reproducibility": "95;286;40;221", "wc_summary_review": "36;159;32;34", "wc_review": "573;1471;308;428", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "703;1453;341;166", "reply_reviewers": "0;0;0;0", "reply_authors": "1;3;1;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 161.0, 137.30076474659563 ], "wc_strength_and_weaknesses_avg": [ 308.25, 218.75142856676388 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 160.5, 97.7509590745789 ], "wc_summary_review_avg": [ 65.25, 54.14505979311501 ], "wc_review_avg": [ 695.0, 457.7439240448747 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 665.75, 494.0553486199699 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 86, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6145965657482055104&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=LFHFQbjxIiP", "email": "tsinghua.edu.cn;ruc.edu.cn;tsinghua.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Tsinghua University;Renmin University of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ruc.edu.cn", "aff_unique_abbr": "THU;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "LGbzYw_pnsc", "title": "Nearing or Surpassing: Overall Evaluation of Human-Machine Dynamic Vision Ability", "track": "main", "status": "Reject", "tldr": "", "abstract": "Dynamic visual ability (DVA), a fundamental function of the human visual system, has been successfully modeled by many computer vision tasks in recent decades. However, the prosperity developments mainly concentrate on using deep neural networks (DNN) to simulate the human DVA system, but evaluation systems still simply compare performance between machines, making it tough to determine how far the gap is between humans and machines in dynamic vision tasks. In fact, neglecting this issue not only makes it hard to determine the correctness of current research routes, but also cannot truly measure the DVA intelligence of machines. To answer the question, this work designs a comprehensive evaluation system based on the 3E paradigm -- we carefully pick 87 videos from various dimensions to construct the environment, confirming it can cover both perceptual and cognitive components of DVA; select 20 representative machines and 15 human subjects to form the task executors, ensuring that different model structures can help us observe the effectiveness of research development; and finally quantify their DVA with a strict evaluation process. Based on detailed experimental analyses, we first determine that the current algorithm research route has effectively shortened the gap. Besides, we further summarize the weaknesses of different executors, and design a human-machine cooperation mechanism with superhuman performance. In summary, the contributions include: (1) Quantifying the DVA of humans and machines, (2) proposing a new view to evaluate DVA intelligence based on the human-machine comparison, and (3) providing a possibility of human-machine cooperation. The datasets, toolkits, codes, and evaluation metrics will be open-sourced to help researchers develop intelligent research on dynamic vision tasks.", "keywords": "Dynamic Visual Ability;Machine Intelligence Evaluation;Single Object Tracking", "primary_area": "", "supplementary_material": "/attachment/7f68350971d9ec5dcf1917e32df5a294933d570b.zip", "author": "Shiyu Hu;Xin Zhao;Yipei Wang;Yanhu Shan;Kaiqi Huang", "authorids": "~Shiyu_Hu1;~Xin_Zhao4;~Yipei_Wang2;~Yanhu_Shan3;~Kaiqi_Huang1", "gender": "F;M;;M;M", "homepage": "https://huuuuusy.github.io/;https://www.xinzhaoai.com/;;https://sites.google.com/site/yanhushan/;https://people.ucas.ac.cn/~huangkaiqi?language=en", "dblp": ";68/2766-12;;https://dblp.org/pers/s/Shan:Yanhu.html;89/7026", "google_scholar": "49W-Rx4AAAAJ;Emz6Cbv7LqEC;;_nc83HsAAAAJ;caQ-OmYAAAAJ", "orcid": "0000-0002-5872-7566;0000-0002-7660-9897;;;", "linkedin": "hushiyu1995/;;;;", "or_profile": "~Shiyu_Hu1;~Xin_Zhao4;~Yipei_Wang2;~Yanhu_Shan3;~Kaiqi_Huang1", "aff": "Chinese academy of science;Institute of Automation, Chinese Academy of Sciences;;;Institute of automation, Chinese academy of science", "aff_domain": "ia.ac.cn;ia.ac.cn;;;nlpr.ia.ac.cn", "position": "PhD student;Associate Professor;;;Professor", "bibtex": "@misc{\nhu2023nearing,\ntitle={Nearing or Surpassing: Overall Evaluation of Human-Machine Dynamic Vision Ability},\nauthor={Shiyu Hu and Xin Zhao and Yipei Wang and Yanhu Shan and Kaiqi Huang},\nyear={2023},\nurl={https://openreview.net/forum?id=LGbzYw_pnsc}\n}", "github": "", "project": "", "reviewers": "RdAz;zwYU;co2u", "site": "https://openreview.net/forum?id=LGbzYw_pnsc", "pdf_size": 52214423, "recommendation": "3;3;6", "confidence": "3;3;5", "correctness": "3;3;4", "technical_novelty": "3;1;2", "empirical_novelty": "3;2;3", "wc_summary_paper": "63;78;94", "wc_strength_and_weaknesses": "158;198;144", "wc_clarity_quality_novelty_and_reproducibility": "13;100;26", "wc_summary_review": "32;29;24", "wc_review": "266;405;288", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "478;765;331", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 78.33333333333333, 12.657891697365017 ], "wc_strength_and_weaknesses_avg": [ 166.66666666666666, 22.88133640230735 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.333333333333336, 38.31738798799081 ], "wc_summary_review_avg": [ 28.333333333333332, 3.299831645537222 ], "wc_review_avg": [ 319.6666666666667, 61.004553564103354 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 524.6666666666666, 180.22640082839018 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:o3gqEa9CdT4J:scholar.google.com/&scioq=Nearing+or+Surpassing:+Overall+Evaluation+of+Human-Machine+Dynamic+Vision+Ability&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "", "aff_unique_url": "http://www.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Distributional Meta-Gradient Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11653", "id": "LGkmUauBUL", "poster": "", "openreview": "https://openreview.net/forum?id=LGkmUauBUL", "slides": "https://iclr.cc/virtual/2023/poster/11653", "video": "https://iclr.cc/virtual/2023/poster/11653", "author_site": "Haiyan Yin, shuicheng YAN, Zhongwen Xu", "tldr": "A model-free meta gradient RL algorithm with distributional return", "abstract": "Meta-gradient reinforcement learning (RL) algorithms have substantially boosted the performance of RL agents by learning an adaptive return. All the existing algorithms adhere to the same reward learning principle, where the adaptive return is simply formulated in the form of expected cumulative rewards, upon which the policy and critic update rules are specified under well-adopted distance metrics. In this paper, we present a novel algorithm that builds on the success of meta-gradient RL algorithms and effectively improves such algorithms by following a simple recipe, i.e., going beyond the expected return to formulate and learn the return in a more expressive form, value distributions. To this end, we first formulate a distributional return that could effectively capture bootstrapping and discounting behaviors over distributions, to form an informative distributional return target in value update. Then we derive an efficient meta update rule to learn the adaptive distributional return with meta-gradients. For empirical evaluation, we first present an illustrative example on a toy two-color grid-world domain, which validates the benefit of learning distributional return over expectation; then we conduct extensive comparisons on a large-scale RL benchmark Atari 2600, where we confirm that our proposed method with distributional return works seamlessly well with the actor-critic framework and leads to state-of-the-art median human normalized score among meta-gradient RL literature.", "keywords": "Reinforcement Learning;Meta Learning", "primary_area": "", "supplementary_material": "/attachment/6b615a9f179c2e7c5a8e0e3dfcf256395300ed5c.zip", "author": "Haiyan Yin;Shuicheng YAN;Zhongwen Xu", "authorids": "~Haiyan_Yin1;~Shuicheng_YAN3;~Zhongwen_Xu1", "gender": ";M;M", "homepage": ";https://yanshuicheng.ai/;https://zhongwen.one/", "dblp": ";y/ShuichengYan;130/5077", "google_scholar": ";https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ;https://scholar.google.co.uk/citations?user=T4xuHn8AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Haiyan_Yin1;~Shuicheng_YAN3;~Zhongwen_Xu1", "aff": ";sea Group;Sea AI Lab", "aff_domain": ";sea.com;sea.com", "position": ";Researcher;Principal Researcher", "bibtex": "@inproceedings{\nyin2023distributional,\ntitle={Distributional Meta-Gradient Reinforcement Learning},\nauthor={Haiyan Yin and Shuicheng YAN and Zhongwen Xu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=LGkmUauBUL}\n}", "github": "", "project": "", "reviewers": "LNTi;ApMf;s6vn;K6pA", "pdf_size": 2648327, "recommendation": "6;6;6;8", "confidence": "4;4;4;4", "correctness": "4;3;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "119;89;60;91", "wc_strength_and_weaknesses": "183;197;396;79", "wc_clarity_quality_novelty_and_reproducibility": "668;103;93;48", "wc_summary_review": "156;43;146;23", "wc_review": "1126;432;695;241", "wc_reply_reviewers": "122;0;154;0", "wc_reply_authors": "1803;714;1765;313", "reply_reviewers": "1;0;2;0", "reply_authors": "4;1;4;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 89.75, 20.873128658636684 ], "wc_strength_and_weaknesses_avg": [ 213.75, 114.67208683895136 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 228.0, 254.87742151865865 ], "wc_summary_review_avg": [ 92.0, 59.5273046592906 ], "wc_review_avg": [ 623.5, 331.88740560617845 ], "wc_reply_reviewers_avg": [ 69.0, 69.92138442565336 ], "wc_reply_authors_avg": [ 1148.75, 651.0170408675951 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8708060085349141186&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=LGkmUauBUL", "email": ";sea.com;sea.com", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Sea Group;Sea AI Lab", "aff_unique_dep": ";", "aff_unique_url": ";", "aff_unique_abbr": ";", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "", "aff_country_unique": "" }, { "id": "LHBiPX5BOwZ", "title": "A Robustly and Effectively Optimized Pretraining Approach for Masked Autoencoder", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recently, Masked Image Modeling (MIM) has increasingly reshaped the status quo of self-supervised visual pre-training. This paper does not describe a novel MIM method, but to unravel several fundamental ingredients to robustly and effectively pre-train a Masked AutoEncoder (MAE) with improved downstream performance as a byproduct. We highlight the great significance for the whole autoencoder to encourage high-variance interactions across different tokens, while simultaneously for the reconstructed target to smooth the inter-patch variances. First, at the decoding phase, we apply the standard dropout upon the attention probabilities as noise to randomly mask out the edge connection across different tokens. Otherwise, their shortcut interactions might hinder the emergence of meaningful contextual representation. Second, we point out that the per-patch normalization will fail unless the patch pixels rely on some population statistics to reduce inter-patch variance and then smooth the reconstruction. Third, we show that autoencoders with different capacities encounter the issue to varying degrees and the learnable masked tokens can be employed to manipulate the variance dependent on its inserted position and ratio in the model. The proposed techniques here are simple and effective to benefit the pre-training of a masked autoencoder stably and obtain superior performance across different downstream tasks. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Ruijia Xu;Yixiao Ge;Kun Yi;XUYUAN XU;Yexin Wang;Ying-Cong Chen;Hao Chen;Ying Shan", "authorids": "~Ruijia_Xu3;~Yixiao_Ge2;~Kun_Yi1;~XUYUAN_XU1;~Yexin_Wang3;~Ying-Cong_Chen1;~Hao_Chen1;~Ying_Shan2", "gender": "M;F;M;M;M;M;M;M", "homepage": ";https://geyixiao.com/;http://www.lamda.nju.edu.cn/yik/;;;https://www.yingcong.me/;https://cse.hkust.edu.hk/~jhc/;", "dblp": "209/9703;228/6649;202/8470;;51/2047.html;137/6578;86/475-11;68/5910", "google_scholar": "https://scholar.google.co.jp/citations?user=-1scaLMAAAAJ;TtU74NAAAAAJ;6xtzo4AAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;;https://scholar.google.com.hk/citations?user=n7j4bJUAAAAJ;https://scholar.google.com.hk/citations?user=Z_t5DjwAAAAJ;4oXBp9UAAAAJ", "orcid": ";;;;;;0000-0002-8400-3780;0000-0001-7673-8325", "linkedin": ";;;;yexin-wang-73304332/;;;YingShanProfile/", "or_profile": "~Ruijia_Xu3;~Yixiao_Ge2;~Kun_Yi1;~XUYUAN_XU1;~Yexin_Wang3;~Ying-Cong_Chen1;~Hao_Chen1;~Ying_Shan2", "aff": "Hong Kong University of Science and Technology;Tencent;Tencent ARC Lab;PCG AI Technology Center;Tencent;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Tencent PCG ARC Lab", "aff_domain": "ust.hk;tencent.com;tencent.com;tencent.com;tencent.com;hkust-gz.edu.cn;ust.hk;arc.tencent.com", "position": "PhD student;Researcher;Researcher;expert engineer;Researcher;Assistant Professor;Assistant Professor;Director", "bibtex": "@misc{\nxu2023a,\ntitle={A Robustly and Effectively Optimized Pretraining Approach for Masked Autoencoder},\nauthor={Ruijia Xu and Yixiao Ge and Kun Yi and XUYUAN XU and Yexin Wang and Ying-Cong Chen and Hao Chen and Ying Shan},\nyear={2023},\nurl={https://openreview.net/forum?id=LHBiPX5BOwZ}\n}", "github": "", "project": "", "reviewers": "6ZYw;GC97;sWps;Ukam", "site": "https://openreview.net/forum?id=LHBiPX5BOwZ", "pdf_size": 564628, "recommendation": "1;3;3;3", "confidence": "4;4;4;5", "correctness": "2;4;3;1", "technical_novelty": "1;1;2;2", "empirical_novelty": "1;1;2;2", "wc_summary_paper": "26;25;76;46", "wc_strength_and_weaknesses": "65;44;168;283", "wc_clarity_quality_novelty_and_reproducibility": "11;29;61;200", "wc_summary_review": "17;22;22;94", "wc_review": "119;120;327;623", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 1.118033988749895 ], "technical_novelty_avg": [ 1.5, 0.5 ], "empirical_novelty_avg": [ 1.5, 0.5 ], "wc_summary_paper_avg": [ 43.25, 20.680606857633556 ], "wc_strength_and_weaknesses_avg": [ 140.0, 94.96578331167495 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 75.25, 74.21716445674815 ], "wc_summary_review_avg": [ 38.75, 31.963846764743444 ], "wc_review_avg": [ 297.25, 206.26969602925195 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.2581988897471611, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:G-rFU5ak4GAJ:scholar.google.com/&scioq=A+Robustly+and+Effectively+Optimized+Pretraining+Approach+for+Masked+Autoencoder&hl=en&as_sdt=0,18", "gs_version_total": 0, "aff_unique_index": "0;1;1;2;1;0;0;1", "aff_unique_norm": "Hong Kong University of Science and Technology;Tencent;PCG AI Technology Center", "aff_unique_dep": ";Tencent Holdings Limited;", "aff_unique_url": "https://www.ust.hk;https://www.tencent.com;", "aff_unique_abbr": "HKUST;Tencent;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "id": "LI4mXhTg23M", "title": "Learning Disentanglement in Autoencoders through Euler Encoding", "track": "main", "status": "Reject", "tldr": "We propose the first deterministic model that is aiming to achieve disentanglement based on autoencoders without a pair of images or labels by explicitly introducing inductive biases into a model architecture through Euler encoding.", "abstract": "Noting the importance of factorizing (or disentangling) the latent space, we propose a novel, non-probabilistic disentangling framework for autoencoders, based on the principles of symmetry transformations that are independent of one another. To the best of our knowledge, this is the first deterministic model that is aiming to achieve disentanglement based on autoencoders without pairs of images or labels, by explicitly introducing inductive biases into a model architecture through Euler encoding. The proposed model is then compared with a number of state-of-the-art models, relevant to disentanglement, including symmetry-based and generative models based on autoencoders. Our evaluation using six different disentanglement metrics, including the unsupervised disentanglement metric we propose here in this paper, shows that the proposed model can offer better disentanglement, especially when variances of the features are different, where other methods may struggle. We believe that this model opens several opportunities for linear disentangled representation learning based on deterministic autoencoders.", "keywords": "disentanglement;disentangling;linear disentangled representations;autoencoder;latent space;factorizing;latent-space factorization;latent-space regularization", "primary_area": "", "supplementary_material": "/attachment/0097014705f030a9338edca4e7c68ee89fc177d5.zip", "author": "Jaehoon Cha;Jeyan Thiyagalingam", "authorids": "~Jaehoon_Cha1;~Jeyan_Thiyagalingam1", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": "https://scholar.google.com/citations?hl=en;", "orcid": ";", "linkedin": ";", "or_profile": "~Jaehoon_Cha1;~Jeyan_Thiyagalingam1", "aff": "Science and Technology Facilities Council;", "aff_domain": "stfc.ac.uk;", "position": "Researcher;", "bibtex": "@misc{\ncha2023learning,\ntitle={Learning Disentanglement in Autoencoders through Euler Encoding},\nauthor={Jaehoon Cha and Jeyan Thiyagalingam},\nyear={2023},\nurl={https://openreview.net/forum?id=LI4mXhTg23M}\n}", "github": "", "project": "", "reviewers": "LTGT;T3ur;9cKh;JNBF", "site": "https://openreview.net/forum?id=LI4mXhTg23M", "pdf_size": 15788372, "recommendation": "3;5;6;6", "confidence": "4;4;2;4", "correctness": "1;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "33;76;25;73", "wc_strength_and_weaknesses": "110;304;92;189", "wc_clarity_quality_novelty_and_reproducibility": "794;51;12;38", "wc_summary_review": "48;59;44;38", "wc_review": "985;490;173;338", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "961;521;201;379", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 51.75, 22.949673200287624 ], "wc_strength_and_weaknesses_avg": [ 173.75, 83.58341641737313 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 223.75, 329.53328739294307 ], "wc_summary_review_avg": [ 47.25, 7.660776723022281 ], "wc_review_avg": [ 496.5, 303.5 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 515.5, 281.08850919238944 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.4714045207910316, "corr_recommendation_correctness": 0.9428090415820632, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1xMIWytOmZkJ:scholar.google.com/&scioq=Learning+Disentanglement+in+Autoencoders+through+Euler+Encoding&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Science and Technology Facilities Council", "aff_unique_dep": "", "aff_unique_url": "https://www.stfc.ac.uk", "aff_unique_abbr": "STFC", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "DexDeform: Dexterous Deformable Object Manipulation with Human Demonstrations and Differentiable Physics", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11043", "id": "LIV7-_7pYPl", "poster": "", "openreview": "https://openreview.net/forum?id=LIV7-_7pYPl", "slides": "https://iclr.cc/virtual/2023/poster/11043", "video": "https://iclr.cc/virtual/2023/poster/11043", "author_site": "Sizhe Li, Zhiao Huang, Tao Chen, Tao Du, Hao Su, Joshua B Tenenbaum, Chuang Gan", "tldr": "We investigate the problem of learning dexterous manipulation of deformable objects using multi-fingered hands.", "abstract": "In this work, we aim to learn dexterous manipulation of deformable objects using multi-fingered hands. Reinforcement learning approaches for dexterous rigid object manipulation would struggle in this setting due to the complexity of physics interaction with deformable objects. At the same time, previous trajectory optimization approaches with differentiable physics for deformable manipulation would suffer from local optima caused by the explosion of contact modes from hand-object interactions. To address these challenges, we propose DexDeform, a principled framework that abstracts dexterous manipulation skills from human demonstration, and refines the learned skills with differentiable physics. Concretely, we first collect a small set of human demonstrations using teleoperation. And we then train a skill model using demonstrations for planning over action abstractions in imagination. To explore the goal space, we further apply augmentations to the existing deformable shapes in demonstrations and use a gradient optimizer to refine the actions planned by the skill model. Finally, we adopt the refined trajectories as new demonstrations for finetuning the skill model. To evaluate the effectiveness of our approach, we introduce a suite of six challenging dexterous deformable object manipulation tasks. Compared with baselines, DexDeform is able to better explore and generalize across novel goals unseen in the initial human demonstrations. Additional materials can be found at our project website: https://sites.google.com/view/dexdeform.", "keywords": "Deformable Object Manipulation;Dexterous Manipulation;Differentiable Physics", "primary_area": "", "supplementary_material": "", "author": "Sizhe Li;Zhiao Huang;Tao Chen;Tao Du;Hao Su;Joshua B. Tenenbaum;Chuang Gan", "authorids": "~Sizhe_Li1;~Zhiao_Huang1;~Tao_Chen1;~Tao_Du1;~Hao_Su1;~Joshua_B._Tenenbaum1;~Chuang_Gan1", "gender": ";M;M;;M;;M", "homepage": "https://sizhe-li.github.io/;;https://taochenshh.github.io;https://people.iiis.tsinghua.edu.cn/~taodu/;http://ai.ucsd.edu/~haosu;;http://people.csail.mit.edu/ganchuang/", "dblp": ";172/1410;;51/3026-1;09/4945-1;t/JoshuaBTenenbaum;139/6993", "google_scholar": ";;gdUv1PIAAAAJ;https://scholar.google.com/citations?hl=en;1P8Zu04AAAAJ;;PTeSCbIAAAAJ", "orcid": ";;;0000-0001-7337-7667;;;", "linkedin": ";;;;;;", "or_profile": "~Sizhe_Li1;~Zhiao_Huang1;~Tao_Chen1;~Tao_Du1;~Hao_Su1;~Joshua_B._Tenenbaum1;~Chuang_Gan1", "aff": "Massachusetts Institute of Technology;University of California, San Diego, University of California, San Diego;Massachusetts Institute of Technology;Shanghai Qi Zhi Institute;University of California, San Diego;Massachusetts Institute of Technology;MIT-IBM Watson AI Lab", "aff_domain": "mit.edu;eng.ucsd.edu;mit.edu;sqz.ac.cn;ucsd.edu;mit.edu;ibm.com", "position": "PhD student;PhD student;PhD student;Principal investigator;Assistant Professor;Professor;PhD student", "bibtex": "@inproceedings{\nli2023dexdeform,\ntitle={DexDeform: Dexterous Deformable Object Manipulation with Human Demonstrations and Differentiable Physics},\nauthor={Sizhe Li and Zhiao Huang and Tao Chen and Tao Du and Hao Su and Joshua B. Tenenbaum and Chuang Gan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=LIV7-_7pYPl}\n}", "github": "", "project": "", "reviewers": "WWJM;whUB;uqMy", "pdf_size": 1951080, "recommendation": "6;8;8", "confidence": "4;3;4", "correctness": "3;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "114;107;95", "wc_strength_and_weaknesses": "361;314;165", "wc_clarity_quality_novelty_and_reproducibility": "100;26;44", "wc_summary_review": "53;88;41", "wc_review": "628;535;345", "wc_reply_reviewers": "21;264;35", "wc_reply_authors": "1746;1221;670", "reply_reviewers": "1;2;1", "reply_authors": "5;6;2", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 105.33333333333333, 7.845734863959881 ], "wc_strength_and_weaknesses_avg": [ 280.0, 83.55038400071341 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.666666666666664, 31.510139461590597 ], "wc_summary_review_avg": [ 60.666666666666664, 19.93879523831757 ], "wc_review_avg": [ 502.6666666666667, 117.77473790626277 ], "wc_reply_reviewers_avg": [ 106.66666666666667, 111.39818470493832 ], "wc_reply_authors_avg": [ 1212.3333333333333, 439.3179056471774 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 4.333333333333333, 1.699673171197595 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11110197538039139217&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=LIV7-_7pYPl", "email": "mit.edu;eng.ucsd.edu;mit.edu;sqz.ac.cn;ucsd.edu;mit.edu;ibm.com", "author_num": 7, "aff_unique_index": "0;1;0;2;1;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;University of California, San Diego;Shanghai Qi Zhi Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.ucsd.edu;https://www.qz.io", "aff_unique_abbr": "MIT;UCSD;", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "id": "LKXAKOxu-T", "title": "Multi-stationary point losses for robust model", "track": "main", "status": "Withdraw", "tldr": "We propose a familiy of Multi-stationary point losses, which improved robustness. ", "abstract": "We identify that cross-entropy (CE) loss does not guarantee robust boundary for neural networks. The reason is that CE loss has only one asymptotic stationary point. It stops pushing the boundary forward as long as the sample is correctly classified, which left the boundary right next to the samples. A robust boundary should be kept in the middle of samples from different classes, thus maximizing the margins from the boundary to the samples. In this paper, we propose a family of new losses, called multi-stationary point (MS) losses, which introduce additional stationary points beyond the asymptotic stationary point. We prove that robust boundary can be guaranteed by MS loss without losing much accuracy. With MS loss, bigger perturbations are required to generate adversarial examples. We demonstrate that robustness is improved under a variety of adversarial attacks by applying MS loss. Moreover, robust boundary learned by MS loss also performs well on imbalanced datasets. Finally, we modified other losses into two-stationary-point forms, and improved model robustness is observed.", "keywords": "Robustness;MS loss;Cross-entropy loss;Multi-stationary point losses;Adversarial attack", "primary_area": "", "supplementary_material": "/attachment/1d6f49bf89980be6120cabf72bdb8df6ebaca010.zip", "author": "Weiwei Gao;Yao Li;Junqi Gao;Zhichang Guo;Dazhi Zhang", "authorids": "~Weiwei_Gao2;yaoli0508@hit.edu.cn;21s112090@stu.hit.edu.cn;mathgzc@hit.edu.cn;zhangdazhi@hit.edu.cn", "gender": ";;;;", "homepage": "https://b0ydugpag1.feishu.cn/docx/doxcnTme2Q5VAJTzZ47RNsjkYdS;;;;", "dblp": ";;;;", "google_scholar": ";;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Weiwei_Gao2;yaoli0508@hit.edu.cn;21s112090@stu.hit.edu.cn;mathgzc@hit.edu.cn;zhangdazhi@hit.edu.cn", "aff": "Harbin Institute of Technology;;;;", "aff_domain": "hit.edu.cn;;;;", "position": "PhD student;;;;", "bibtex": "@misc{\ngao2023multistationary,\ntitle={Multi-stationary point losses for robust model},\nauthor={Weiwei Gao and Yao Li and Junqi Gao and Zhichang Guo and Dazhi Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=LKXAKOxu-T}\n}", "github": "", "project": "", "reviewers": "inuP;MGsf;BVto", "site": "https://openreview.net/forum?id=LKXAKOxu-T", "pdf_size": 685698, "recommendation": "1;3;8", "confidence": "4;3;4", "correctness": "2;2;4", "technical_novelty": "2;2;4", "empirical_novelty": "2;2;4", "wc_summary_paper": "22;27;85", "wc_strength_and_weaknesses": "172;270;76", "wc_clarity_quality_novelty_and_reproducibility": "11;22;8", "wc_summary_review": "24;14;2", "wc_review": "229;333;171", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.0, 2.943920288775949 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_paper_avg": [ 44.666666666666664, 28.592928418676454 ], "wc_strength_and_weaknesses_avg": [ 172.66666666666666, 79.20157125265186 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 13.666666666666666, 6.018490028422597 ], "wc_summary_review_avg": [ 13.333333333333334, 8.993825042154695 ], "wc_review_avg": [ 244.33333333333334, 67.01906859659836 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.24019223070763068, "corr_recommendation_correctness": 0.960768922830523, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-wTPSfv9_QYJ:scholar.google.com/&scioq=Multi-stationary+point+losses+for+robust+model&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0", "aff_campus_unique": "Harbin", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "LLy2vm_p35C", "title": "Few-Shot Transferable Robust Representation Learning via Bilevel Attacks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Existing adversarial learning methods for enhancing the robustness of deep neural networks assume the availability of a large amount of data from which we can generate adversarial examples. However, in an adversarial meta-learning setting, the model needs to train with only a few adversarial examples to learn a robust model for unseen tasks, which is a very difficult goal to achieve. Further, learning transferable robust representations for unseen domains is a difficult problem even with a large amount of data. To tackle such a challenge, we propose a novel adversarial self-supervised meta-learning framework with bilevel attacks which aims to learn robust representations that can generalize across tasks and domains. Specifically, in the inner loop, we update the parameters of the given encoder by taking inner gradient steps using two different sets of augmented samples, and generate adversarial examples for each view by maximizing the instance classification loss. Then, in the outer loop, we meta-learn the encoder parameter to maximize the agreement between the two adversarial examples, which enables it to learn robust representations. We experimentally validate the effectiveness of our approach on unseen domain adaptation tasks, on which it achieves impressive performance. Specifically, our method significantly outperforms the state-of-the-art meta-adversarial learning methods on few-shot learning tasks, as well as self-supervised learning baselines in standard learning settings with large-scale datasets.", "keywords": "robust meta-learning;unseen domain;self-supervised learning;robustness", "primary_area": "", "supplementary_material": "", "author": "Hyeonjeong Ha;Minseon Kim;Sung Ju Hwang", "authorids": "~Hyeonjeong_Ha1;~Minseon_Kim1;~Sung_Ju_Hwang1", "gender": ";;", "homepage": "https://hyeonjeongha.github.io/;https://kim-minseon.github.io/;", "dblp": "331/5333;247/5952;", "google_scholar": "https://scholar.google.com/citations?hl=ko;ZwObZNwAAAAJ;", "orcid": ";;", "linkedin": "hyeonjeong-ha-bb93b0285/;minseon-kim-707a84174;", "or_profile": "~Hyeonjeong_Ha1;~Minseon_Kim1;~Sung_Ju_Hwang1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;", "aff_domain": "kaist.edu;kaist.ac.kr;", "position": "MS student;PhD student;", "bibtex": "@misc{\nha2023fewshot,\ntitle={Few-Shot Transferable Robust Representation Learning via Bilevel Attacks},\nauthor={Hyeonjeong Ha and Minseon Kim and Sung Ju Hwang},\nyear={2023},\nurl={https://openreview.net/forum?id=LLy2vm_p35C}\n}", "github": "", "project": "", "reviewers": "KTHv;WWPz;Xygn;NDZi", "site": "https://openreview.net/forum?id=LLy2vm_p35C", "pdf_size": 9990544, "recommendation": "5;6;6;6", "confidence": "3;3;3;4", "correctness": "3;3;3;3", "technical_novelty": "1;3;2;3", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "83;33;39;49", "wc_strength_and_weaknesses": "187;177;180;376", "wc_clarity_quality_novelty_and_reproducibility": "26;19;50;12", "wc_summary_review": "39;77;49;23", "wc_review": "335;306;318;460", "wc_reply_reviewers": "76;155;0;0", "wc_reply_authors": "1206;989;570;1606", "reply_reviewers": "1;1;0;0", "reply_authors": "4;4;2;3", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 51.0, 19.339079605813716 ], "wc_strength_and_weaknesses_avg": [ 230.0, 84.3712036182962 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.75, 14.306903927824496 ], "wc_summary_review_avg": [ 47.0, 19.6468827043885 ], "wc_review_avg": [ 354.75, 61.633493329520114 ], "wc_reply_reviewers_avg": [ 57.75, 64.14972720129057 ], "wc_reply_authors_avg": [ 1092.75, 374.2601869021069 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5054489874663429477&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "LMuVjYmHNh4", "title": "Ensemble Homomorphic Encrypted Data Classification", "track": "main", "status": "Reject", "tldr": "", "abstract": "Homomorphic encryption (HE) is encryption that permits users to perform computations on encrypted data without first decrypting it. HE can be used for privacy-preserving outsourced computation and analysis, allowing data to be encrypted and outsourced to commercial cloud environments for processing while encrypted or sensitive data. HE enables new services by removing privacy barriers inhibiting data sharing or increasing the security of existing services. A convolution neural network (CNN) with shallow architecture can be homomorphically evaluated using addition and multiplication by replacing the activation function, such as ReLU, with a low polynomial degree. To achieve the same performance as the ReLU activation function, we study the impact of applying the ensemble techniques to solve the accuracy problem. Our experimental results empirically show that the ensemble approach can reduce bias, and variance, increasing accuracy to achieve the same ReLU performance with parallel and sequential techniques. We demonstrate the effectiveness and robustness of our method using three data sets: MNIST, FMNIST, and CIFAR-10 ", "keywords": "Machine Learning Privacy;Homomorphic Encrypted Data Classification;Ensemble Learning", "primary_area": "", "supplementary_material": "/attachment/40c7654144cf3a0337e0fbf644546ea277c5d0ec.zip", "author": "Dana R Alsagheer;Hadi Mansouifar;lei Xu;Qian Lou;Weidong Shi;lin chen", "authorids": "~Dana_R_Alsagheer1;farhadiman@gmail.com;xuleimath@gmail.com;~Qian_Lou1;~Weidong_Shi1;lin.chen@ttu.edu", "gender": "F;;;M;;", "homepage": ";;;https://qlou.org;;", "dblp": ";;;207/3962.html;;", "google_scholar": "DEStBFQAAAAJ;;;SBYgXLoAAAAJ;;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Dana_R_Alsagheer1;farhadiman@gmail.com;xuleimath@gmail.com;~Qian_Lou1;~Weidong_Shi1;lin.chen@ttu.edu", "aff": "University of Houston;;;University of Central Florida;;", "aff_domain": "uh.edu;;;ucf.edu;;", "position": "PhD student;;;Assistant Professor;;", "bibtex": "@misc{\nalsagheer2023ensemble,\ntitle={Ensemble Homomorphic Encrypted Data Classification},\nauthor={Dana R Alsagheer and Hadi Mansouifar and lei Xu and Qian Lou and Weidong Shi and lin chen},\nyear={2023},\nurl={https://openreview.net/forum?id=LMuVjYmHNh4}\n}", "github": "", "project": "", "reviewers": "Kc3a;WXCk;pCgq;kymY", "site": "https://openreview.net/forum?id=LMuVjYmHNh4", "pdf_size": 565994, "recommendation": "1;1;1;3", "confidence": "4;5;4;2", "correctness": "3;2;4;3", "technical_novelty": "1;1;1;2", "empirical_novelty": "1;1;1;1", "wc_summary_paper": "67;17;48;72", "wc_strength_and_weaknesses": "252;109;85;70", "wc_clarity_quality_novelty_and_reproducibility": "56;41;13;47", "wc_summary_review": "40;29;41;41", "wc_review": "415;196;187;230", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 1.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 1.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.0, 0.0 ], "wc_summary_paper_avg": [ 51.0, 21.575449010391416 ], "wc_strength_and_weaknesses_avg": [ 129.0, 72.3636649154809 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.25, 16.068213964221414 ], "wc_summary_review_avg": [ 37.75, 5.0682837331783235 ], "wc_review_avg": [ 257.0, 92.62019218291441 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.9271726499455306, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bg0Q0kODvGcJ:scholar.google.com/&scioq=Ensemble+Homomorphic+Encrypted+Data+Classification&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of Houston;University of Central Florida", "aff_unique_dep": ";", "aff_unique_url": "https://www.uh.edu;https://www.ucf.edu", "aff_unique_abbr": "UH;UCF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Boosting Causal Discovery via Adaptive Sample Reweighting", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10711", "id": "LNpMtk15AS4", "poster": "/media/PosterPDFs/ICLR%202023/10711.png?t=1682255632.6860917", "openreview": "https://openreview.net/forum?id=LNpMtk15AS4", "slides": "https://iclr.cc/virtual/2023/poster/10711", "video": "https://iclr.cc/virtual/2023/poster/10711", "author_site": "An Zhang, fangfu Liu, Wenchang Ma, Zhibo Cai, Xiang Wang, Tat-Seng Chua", "tldr": "Automatically learn the adaptive weights for each observation to boost score-based causal discovery performance. ", "abstract": "Under stringent model type and variable distribution assumptions, score-based causal discovery methods learn the directed acyclic graph (DAG) from observational data by evaluating candidate graphs over an averaged score function. Despite the great success in low-dimensional linear systems, it has been observed that these approaches overly exploits easier-to-fit samples, thus inevitably learning spurious edges. Worse still, the common homogeneity assumption of most causal discovery methods can be easily violated due to the widespread existence of heterogeneous data in the real world, resulting in performance vulnerability when noise distributions vary. We propose a simple yet effective model-agnostic framework to boost causal discovery performance by dynamically learning the adaptive weights for the Reweighted Score function, ReScore for short, where the learned weights tailors quantitatively to the important degree of each samples. Intuitively, we leverage the bilevel optimization scheme to alternatively train a standard DAG learner first, then upweight the samples that the DAG learner fails to fit well and downweight the samples that the DAG learner easily extracts the causation information from. Extensive experiments on both synthetic and real-world datasets are carried out to validate the effectiveness of ReScore. We observe consistent and significant boosts in structure learning performance. We further visualize that ReScore concurrently mitigates the influence of spurious edges and generalizes to heterogeneous data. Finally, we perform theoretical analysis to guarantee the structure identifiability and the weight adaptive properties of ReScore. Our codes are available at https://github.com/anzhang314/ReScore.", "keywords": "Causal Structure Learning;Score-based Causal Discovery;Adaptive Sample Reweighting", "primary_area": "", "supplementary_material": "", "author": "An Zhang;Fangfu Liu;Wenchang Ma;Zhibo Cai;Xiang Wang;Tat-Seng Chua", "authorids": "~An_Zhang2;~Fangfu_Liu2;~Wenchang_Ma1;~Zhibo_Cai1;~Xiang_Wang6;~Tat-Seng_Chua2", "gender": "M;M;M;M;F;M", "homepage": "https://liuff19.github.io/;;https://github.com/caizhibo2;https://github.com/xiangwang1223;https://github.com/anzhang314;http://www.comp.nus.edu.sg/~chuats/", "dblp": "342/1749;277/0953;;31/2864-10;78/5581-3;", "google_scholar": ";;;https://scholar.google.com.sg/citations?user=HdhaQB0AAAAJ;https://scholar.google.com.sg/citations?user=BcX7GJcAAAAJ;https://scholar.google.com.tw/citations?user=Z9DWCBEAAAAJ", "orcid": ";;;0000-0002-6148-6329;;0000-0001-6097-7807", "linkedin": "%E8%8A%B3%E7%94%AB-%E5%88%98-482856229/;;;;;", "or_profile": "~Fangfu_Liu2;~Wenchang_Ma1;~Zhibo_Cai1;~Xiang_Wang6;~AN_ZHANG1;~Tat-seng_Chua1", "aff": "Department of Electronic Engineering, Tsinghua University;;Renmin University of China;University of Science and Technology of China;National University of Singapore;National University of Singapore", "aff_domain": "tsinghua.edu.cn;;ruc.edu.cn;ustc.edu.cn;nus.edu.sg;nus.edu.sg", "position": "PhD student;;Lecturer;Full Professor;Postdoc;Full Professor", "bibtex": "@inproceedings{\nzhang2023boosting,\ntitle={Boosting Causal Discovery via Adaptive Sample Reweighting},\nauthor={An Zhang and Fangfu Liu and Wenchang Ma and Zhibo Cai and Xiang Wang and Tat-Seng Chua},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=LNpMtk15AS4}\n}", "github": "", "project": "", "reviewers": "x5dL;ueyq;m1MC;Q3NX", "pdf_size": 975596, "recommendation": "6;6;8;8", "confidence": "3;4;4;4", "correctness": "3;4;3;4", "technical_novelty": "3;3;4;3", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "53;58;32;97", "wc_strength_and_weaknesses": "107;203;64;615", "wc_clarity_quality_novelty_and_reproducibility": "35;36;44;46", "wc_summary_review": "36;61;383;46", "wc_review": "231;358;523;804", "wc_reply_reviewers": "0;0;0;44", "wc_reply_authors": "631;744;1134;2014", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;2;3", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 60.0, 23.484037131634757 ], "wc_strength_and_weaknesses_avg": [ 247.25, 218.20217116243367 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.25, 4.815340071064556 ], "wc_summary_review_avg": [ 131.5, 145.47594302839215 ], "wc_review_avg": [ 479.0, 214.30468963604133 ], "wc_reply_reviewers_avg": [ 11.0, 19.05255888325765 ], "wc_reply_authors_avg": [ 1130.75, 543.0162865881649 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "pdf": "https://openreview.net/pdf?id=LNpMtk15AS4", "email": "tsinghua.edu.cn;;ruc.edu.cn;ustc.edu.cn;nus.edu.sg;nus.edu.sg", "author_num": 6, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Tsinghua University;Renmin University of China;University of Science and Technology of China;National University of Singapore", "aff_unique_dep": "Department of Electronic Engineering;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ruc.edu.cn;http://www.ustc.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "THU;RUC;USTC;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1", "aff_country_unique": "China;Singapore" }, { "id": "LOMA7vSa2Y", "title": "MetaMD: Principled Optimiser Meta-Learning for Deep Learning", "track": "main", "status": "Reject", "tldr": "We proposed a meta-learning based algorithm, learning optimisers under the mirror descent framework.", "abstract": "Optimiser design influences learning speed and generalisation in training machine learning models. Several studies have attempted to learn more effective gradient-descent optimisers via solving a bi-level optimisation problem where generalisation error is minimised with respect to optimiser parameters. However, most existing neural network oriented optimiser learning methods are intuitively motivated, without clear theoretical support, and focus on learning implicit biases that improve generalisation, rather than speed of convergence. We take a different perspective starting from mirror descent rather than gradient descent, and meta-learning the corresponding Bregman divergence. Within this paradigm, we formalise a novel meta-learning objective of optimising the rate of convergence. The resulting framework, termed Meta Mirror Descent (MetaMD), learns to accelerate optimisation speed. Unlike many meta-learned neural network optimisers, it also supports convergence guarantees and uniquely does so without requiring validation data. We empirically evaluate our framework on a variety of tasks and architectures in terms of convergence rate and generalisation error and demonstrate strong performance.", "keywords": "Meta-learning;Optimiser Learning", "primary_area": "", "supplementary_material": "/attachment/36feaf6bb4b125f14f778cd42dedc5ce41ac68fc.zip", "author": "Boyan Gao;Henry Gouk;Jan Stuehmer;Massimiliano Pontil;Timothy Hospedales", "authorids": "~Boyan_Gao1;~Henry_Gouk1;~Jan_Stuehmer1;~Massimiliano_Pontil4;~Timothy_Hospedales1", "gender": ";M;M;Not Specified;M", "homepage": "https://www.researchgate.net/profile/Boyan_Gao2;https://www.henrygouk.com;;https://www.iit.it/web/computational-statistics-and-machine-learning;http://homepages.inf.ed.ac.uk/thospeda/", "dblp": "251/3330;172/0943;91/8483;;32/3545", "google_scholar": "WIuM3SIAAAAJ;https://scholar.google.co.nz/citations?user=i1bzlyAAAAAJ;pGukv5YAAAAJ;lcOacs8AAAAJ;https://scholar.google.fr/citations?user=nHhtvqkAAAAJ", "orcid": ";;0009-0002-0122-5482;0000-0001-9415-098X;0000-0003-4867-7486", "linkedin": ";;;;timothyhospedales/", "or_profile": "~Boyan_Gao1;~Henry_Gouk1;~Jan_Stuehmer1;~Massimiliano_Pontil4;~Timothy_Hospedales1", "aff": "University of Oxford;University of Edinburgh;Karlsruhe Institute of Technology;University College London, University of London;Samsung AI Research Centre", "aff_domain": "eng.ox.ac.uk;ed.ac.uk;kit.edu;ucl.ac.uk;samsung.com", "position": "Postdoc;RAEng Research Fellow;Assistant Professor;Full Professor;Principal Researcher", "bibtex": "@misc{\ngao2023metamd,\ntitle={Meta{MD}: Principled Optimiser Meta-Learning for Deep Learning},\nauthor={Boyan Gao and Henry Gouk and Jan Stuehmer and Massimiliano Pontil and Timothy Hospedales},\nyear={2023},\nurl={https://openreview.net/forum?id=LOMA7vSa2Y}\n}", "github": "", "project": "", "reviewers": "Ced8;bxcN;f8bx;c6CM", "site": "https://openreview.net/forum?id=LOMA7vSa2Y", "pdf_size": 1681242, "recommendation": "3;5;6;8", "confidence": "4;2;4;2", "correctness": "2;4;4;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "176;52;196;53", "wc_strength_and_weaknesses": "520;267;279;137", "wc_clarity_quality_novelty_and_reproducibility": "113;18;70;269", "wc_summary_review": "73;190;45;54", "wc_review": "882;527;590;513", "wc_reply_reviewers": "576;0;0;0", "wc_reply_authors": "2069;713;206;792", "reply_reviewers": "1;0;0;0", "reply_authors": "5;2;1;2", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.0, 1.0 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 119.25, 67.12441806079215 ], "wc_strength_and_weaknesses_avg": [ 300.75, 138.29022922824302 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 117.5, 93.71365962334413 ], "wc_summary_review_avg": [ 90.5, 58.3288093483829 ], "wc_review_avg": [ 628.0, 149.48745766785922 ], "wc_reply_reviewers_avg": [ 144.0, 249.41531628991834 ], "wc_reply_authors_avg": [ 945.0, 686.7914530627183 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5547001962252291, "corr_recommendation_correctness": 0.4181210050035454, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TswMeBLXOREJ:scholar.google.com/&scioq=MetaMD:+Principled+Optimiser+Meta-Learning+for+Deep+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "University of Oxford;University of Edinburgh;Karlsruhe Institute of Technology;University College London;Samsung", "aff_unique_dep": ";;;;AI Research", "aff_unique_url": "https://www.ox.ac.uk;https://www.ed.ac.uk;https://www.kit.edu;https://www.ucl.ac.uk;https://www.samsung.com/global/researchers/samsung-ai-research-centre/", "aff_unique_abbr": "Oxford;Edinburgh;KIT;UCL;SARC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;2", "aff_country_unique": "United Kingdom;Germany;South Korea" }, { "id": "LOTGOB5_Xh2", "title": "Architecture-Agnostic Masked Image Modeling -- From ViT back to CNN", "track": "main", "status": "Reject", "tldr": "We delve deep into masked image modeling (MIM) working mechanism and propose a generic pre-training framework (A$^2$MIM) for Transformers and CNNs.", "abstract": "Masked image modeling (MIM), an emerging self-supervised pre-training method, has shown impressive success across numerous downstream vision tasks with Vision transformers (ViTs). Its underlying idea is simple: a portion of the input image is randomly masked out and then reconstructed via the pre-text task. However, the working principle behind MIM is not well explained, and previous studies insist that MIM primarily works for the Transformer family but is incompatible with CNNs. In this paper, we first study interactions among patches to understand what knowledge is learned and how it is acquired via the MIM task. We observe that MIM essentially teaches the model to learn better middle-order interactions among patches and extract more generalized features. Based on this fact, we propose an Architecture-Agnostic Masked Image Modeling framework (A$^2$MIM), which is compatible with both Transformers and CNNs in a unified way. Extensive experiments on popular benchmarks show that our A$^2$MIM learns better representations without explicit design and endows the backbone model with the stronger capability to transfer to various downstream tasks for both Transformers and CNNs.", "keywords": "Self-supervised Learning;Vision Transformer;Representation Learning;Unsupervised Learning", "primary_area": "", "supplementary_material": "/attachment/314ece02d4f259e3603cd221e24ae7c1101d6976.zip", "author": "Siyuan Li;Di Wu;Fang Wu;Zelin Zang;Lei Shang;Baigui Sun;Xuansong Xie;Stan Z. Li", "authorids": "~Siyuan_Li6;~Di_Wu10;~Fang_Wu1;~Zelin_Zang2;~Lei_Shang1;~Baigui_Sun1;~Xuansong_Xie1;~Stan_Z._Li2", "gender": "M;M;;M;;M;M;", "homepage": "https://lupin1998.github.io/;;;;;;;", "dblp": "63/9705-2;;;226/7615;;186/8016;234/8028;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;;foERjnQAAAAJ;WO1eMcIAAAAJ;ZNhTHywAAAAJ;M0Ei1zkAAAAJ;", "orcid": "0000-0001-6806-2468;;;;;0000-0001-7722-4748;;", "linkedin": "https://www.linkedin.cn/incareer/in/siyuan-li-lupin1998/;;;;;;;", "or_profile": "~Siyuan_Li6;~Di_Wu10;~Fang_Wu1;~Zelin_Zang2;~Lei_Shang1;~Baigui_Sun1;~Xuansong_Xie1;~Stan_Z._Li2", "aff": "Alibaba Group;Westlake University;;Westlake University, Zhejiang University, National University of Singapore;Alibaba Group;Alibaba Group;Alibaba Group;", "aff_domain": "alibaba-inc.com;westlake.edu.cn;;westlake.edu.cn;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;", "position": "Intern;PhD student;;PhD student;Researcher;Researcher;Researcher;", "bibtex": "@misc{\nli2023architectureagnostic,\ntitle={Architecture-Agnostic Masked Image Modeling -- From ViT back to {CNN}},\nauthor={Siyuan Li and Di Wu and Fang Wu and Zelin Zang and Lei Shang and Baigui Sun and Xuansong Xie and Stan Z. Li},\nyear={2023},\nurl={https://openreview.net/forum?id=LOTGOB5_Xh2}\n}", "github": "", "project": "", "reviewers": "dEJJ;nEcw;RicF;yVqw", "site": "https://openreview.net/forum?id=LOTGOB5_Xh2", "pdf_size": 0, "recommendation": "3;5;5;8", "confidence": "5;4;4;4", "correctness": "2;2;3;3", "technical_novelty": "1;3;2;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "93;57;112;53", "wc_strength_and_weaknesses": "494;242;480;157", "wc_clarity_quality_novelty_and_reproducibility": "14;50;158;7", "wc_summary_review": "48;35;58;10", "wc_review": "649;384;808;227", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1601;861;956;488", "reply_reviewers": "0;0;0;0", "reply_authors": "3;2;2;1", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 78.75, 24.722206616724165 ], "wc_strength_and_weaknesses_avg": [ 343.25, 146.94110214640423 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.25, 60.41264354421184 ], "wc_summary_review_avg": [ 37.75, 17.977416388346796 ], "wc_review_avg": [ 517.0, 225.77311620297044 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 976.5, 400.74711477439234 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.7276068751089989, "corr_recommendation_correctness": 0.7001400420140049, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6873439072132775305&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;1;0;0;0", "aff_unique_norm": "Alibaba Group;Westlake University", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://www.westlake.edu.cn", "aff_unique_abbr": "Alibaba;WU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "LPcxnvN9vLw", "title": "Memory Learning of Multivariate Asynchronous Time Series", "track": "main", "status": "Withdraw", "tldr": "Modeling Multivariate Asynchronous Time Series", "abstract": "Sequential observations from complex systems are usually collected irregularly and asynchronously across variables. Besides, they are typically both serially and cross-sectionally dependent. Recurrent networks are always used to model such sequential data, trying to simultaneously capture marginal dynamics and dependence dynamics with one shared memory. This leads to two problems. First, some heterogeneous marginal information is difficult to be preserved in the shared memory. Second, in an asynchronous setting, missing values across variables will introduce bias in the shared memory. To solve these problems, this paper designs a new architecture that seamlessly integrates continuous-time ODE solvers with a set of memory-aware GRU blocks. It learns memory profiles separately and addresses the issue of asynchronous observations. Numerical results confirm that this new architecture outperforms a variety of state-of-the-art baseline models on datasets from various fields.", "keywords": "Multivariate Asynchronous Time Series;Gated Recurrent Unit;Sequential Models", "primary_area": "", "supplementary_material": "/attachment/cc31108f9dc236423817225061abde9063256499.zip", "author": "YIJUN LI;Cheuk Hang LEUNG;Chaoqun Wang;Yiyan HUANG;Qi WU;Dongdong WANG;Zhixiang Huang", "authorids": "~YIJUN_LI6;~Cheuk_Hang_LEUNG2;~Chaoqun_Wang5;~Yiyan_HUANG2;~Qi_WU5;~Dongdong_WANG2;~Zhixiang_Huang1", "gender": "M;;M;;M;M;M", "homepage": ";;;;;http://www.cityu.edu.hk/stfprofile/qiwu55.htm;", "dblp": ";;;;;;", "google_scholar": "lfJisoYAAAAJ;;;;;60AO2VAAAAAJ;", "orcid": "0000-0001-7237-1378;0000-0001-9138-828X;;0000-0002-3911-9055;0000-0001-9956-6861;0000-0002-4028-981X;0000-0003-1268-2208", "linkedin": ";;huangzhixiang-b7210ab8/;;;qi-wu-27802817;", "or_profile": "~YIJUN_LI6;~Chaoqun_Wang5;~Zhixiang_Huang1;~Cheuk_Hang_Leung1;~WANG_Dongdong1;~Qi_Wu2;~Yiyan_Huang1", "aff": "City University Hong Kong ;City University of Hong Kong;;City University of Hong Kong;Jingdong Technology;City University of Hong Kong;City University of Hong Kong", "aff_domain": "cityu.edu;cityu.edu.hk;;cityu.edu.hk;jd.com;cityu.edu.hk;my.cityu.edu.hk", "position": "PhD student;PhD student;;Research Assistant;Researcher;Associate Professor, SDSc;PhD student", "bibtex": "@misc{\nli2023memory,\ntitle={Memory Learning of Multivariate Asynchronous Time Series},\nauthor={YIJUN LI and Cheuk Hang LEUNG and Chaoqun Wang and Yiyan HUANG and Qi WU and Dongdong WANG and Zhixiang Huang},\nyear={2023},\nurl={https://openreview.net/forum?id=LPcxnvN9vLw}\n}", "github": "", "project": "", "reviewers": "cF7q;znQn;kU5n;LetJ", "site": "https://openreview.net/forum?id=LPcxnvN9vLw", "pdf_size": 519313, "recommendation": "3;3;5;6", "confidence": "3;4;3;5", "correctness": "3;2;4;3", "technical_novelty": "2;1;2;3", "empirical_novelty": "2;2;3;0", "wc_summary_paper": "44;144;63;144", "wc_strength_and_weaknesses": "135;326;105;354", "wc_clarity_quality_novelty_and_reproducibility": "293;24;1;152", "wc_summary_review": "60;94;12;40", "wc_review": "532;588;181;690", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 98.75, 45.745901455758855 ], "wc_strength_and_weaknesses_avg": [ 230.0, 110.95269262167548 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 117.5, 116.51716611727218 ], "wc_summary_review_avg": [ 51.5, 29.878922336657325 ], "wc_review_avg": [ 497.75, 191.44761032721198 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.5222329678670935, "corr_recommendation_correctness": 0.5443310539518174, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dGdKL-9uyiQJ:scholar.google.com/&scioq=Memory+Learning+of+Multivariate+Asynchronous+Time+Series&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "City University of Hong Kong;Jingdong Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cityu.edu.hk;https://jd.com", "aff_unique_abbr": "CityU;JD Tech", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "LPwlqyrnwg", "title": "On Stability and Generalization of Bilevel Optimization Problems", "track": "main", "status": "Reject", "tldr": "Generalization bounds in different settings for single-timescale gradient-based method ", "abstract": "(Stochastic) bilevel optimization is a frequently encountered problem in machine learning with a wide range of applications such as meta-learning, hyper-parameter optimization, and reinforcement learning. Most of the existing studies on this problem only focused on analyzing the convergence or improving the convergence rate, while little effort has been devoted to understanding its generalization behaviors. In this paper, we conduct a thorough analysis on the generalization of first-order (gradient-based) methods for the bilevel optimization problem. We first establish a fundamental connection between algorithmic stability and generalization error in different forms and give a high probability generalization bound which improves the previous best one from $O(\\sqrt{n})$ to $O(\\log n)$, where $n$ is the sample size. We then provide the first stability bounds for the general case where both inner and outer level parameters are subject to continuous update, while existing work allows only the outer level parameter to be updated. Our analysis can be applied in various standard settings such as strongly-convex-strongly-convex (SC-SC), convex-convex (C-C), and nonconvex-nonconvex (NC-NC). Our analysis for the NC-NC setting can also be extended to a particular nonconvex-strongly-convex (NC-SC) setting that is commonly encountered in practice. Finally, we corroborate our theoretical analysis and demonstrate how iterations can affect the generalization error by experiments on meta-learning and hyper-parameter optimization.", "keywords": "bilevel optimization;generalization;stability", "primary_area": "", "supplementary_material": "/attachment/76caa72d5d08ed9b0bfbbae84a8bc5f919c55b41.zip", "author": "Meng Ding;Mingxi Lei;Yunwen Lei;Di Wang;Jinhui Xu", "authorids": "~Meng_Ding3;~Mingxi_Lei1;~Yunwen_Lei1;~Di_Wang1;~Jinhui_Xu1", "gender": "F;;M;;M", "homepage": ";https://mingxilei.github.io;https://leiyw.github.io/;;https://www.cse.buffalo.edu/~jinhui/", "dblp": ";;https://dblp.org/pers/l/Lei:Yunwen;;24/6437-1.html", "google_scholar": "Ipwvf8oAAAAJ;xWNNQ_IAAAAJ;https://scholar.google.com.hk/citations?user=g3dg0rsAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Meng_Ding3;~Mingxi_Lei1;~Yunwen_Lei1;~Di_Wang1;~Jinhui_Xu1", "aff": "State University of New York at Buffalo;State University of New York at Buffalo;University of Hong Kong;;University at Buffalo, State University of New York", "aff_domain": "buffalo.edu;buffalo.edu;hku.hk;;buffalo.edu", "position": "PhD student;PhD student;Assistant Professor;;Full Professor", "bibtex": "@misc{\nding2023on,\ntitle={On Stability and Generalization of Bilevel Optimization Problems},\nauthor={Meng Ding and Mingxi Lei and Yunwen Lei and Di Wang and Jinhui Xu},\nyear={2023},\nurl={https://openreview.net/forum?id=LPwlqyrnwg}\n}", "github": "", "project": "", "reviewers": "Wyf6;GoE2;rxeE;jxJW", "site": "https://openreview.net/forum?id=LPwlqyrnwg", "pdf_size": 4130794, "recommendation": "1;3;5;6", "confidence": "5;4;4;2", "correctness": "2;2;3;4", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;0;2;3", "wc_summary_paper": "75;68;98;39", "wc_strength_and_weaknesses": "737;591;298;188", "wc_clarity_quality_novelty_and_reproducibility": "26;76;9;6", "wc_summary_review": "69;99;29;23", "wc_review": "907;834;434;256", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "596;675;76;251", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.75, 1.920286436967152 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 70.0, 21.059439688652688 ], "wc_strength_and_weaknesses_avg": [ 453.5, 220.19820616889683 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.25, 28.047950014216724 ], "wc_summary_review_avg": [ 55.0, 30.95157508108432 ], "wc_review_avg": [ 607.75, 271.41147267571426 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 399.5, 245.56923667267446 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8661541520797733, "corr_recommendation_correctness": 0.9028289727756884, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yf_QthyvxQwJ:scholar.google.com/&scioq=On+Stability+and+Generalization+of+Bilevel+Optimization+Problems&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "State University of New York at Buffalo;University of Hong Kong;University at Buffalo", "aff_unique_dep": ";;", "aff_unique_url": "https://www.buffalo.edu;https://www.hku.hk;https://www.buffalo.edu", "aff_unique_abbr": "SUNY Buffalo;HKU;UB", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Buffalo;Hong Kong SAR", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "The Role of Coverage in Online Reinforcement Learning", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10809", "id": "LQIjzPdDt3q", "poster": "", "openreview": "https://openreview.net/forum?id=LQIjzPdDt3q", "slides": "https://iclr.cc/virtual/2023/poster/10809", "video": "https://iclr.cc/virtual/2023/poster/10809", "author_site": "Tengyang Xie, Dylan Foster, Yu Bai, Nan Jiang, Sham Kakade", "tldr": "This paper shows surprising connections between online and offline learnability, in particular, how coverage in offline RL enables exploration in online RL.", "abstract": "Coverage conditions---which assert that the data logging distribution adequately covers the state space---play a fundamental role in determining the sample complexity of offline reinforcement learning. While such conditions might seem irrelevant to online reinforcement learning at first glance, we establish a new connection by showing---somewhat surprisingly---that the mere existence of a data distribution with good coverage can enable sample-efficient online RL. Concretely, we show that coverability---that is, existence of a data distribution that satisfies a ubiquitous coverage condition called concentrability---can be viewed as a structural property of the underlying MDP, and can be exploited by standard algorithms for sample-efficient exploration, even when the agent does not know said distribution. We complement this result by proving that several weaker notions of coverage, despite being sufficient for offline RL, are insufficient for online RL. We also show that existing complexity measures for online RL, including Bellman rank and Bellman-Eluder dimension, fail to optimally capture coverability, and propose a new complexity measure, the self-normalized coefficient, to provide a unification.", "keywords": "reinforcement learning theory;online RL;offline RL;learnability;general function approximation", "primary_area": "", "supplementary_material": "", "author": "Tengyang Xie;Dylan J Foster;Yu Bai;Nan Jiang;Sham M. Kakade", "authorids": "~Tengyang_Xie1;~Dylan_J_Foster1;~Yu_Bai1;~Nan_Jiang2;~Sham_M._Kakade1", "gender": ";;;M;M", "homepage": "https://tengyangxie.github.io/;http://dylanfoster.net;https://yubai.org;http://nanjiang.cs.illinois.edu;https://shamulent.github.io", "dblp": "227/3335;167/4271;03/6325-17.html;06/4489-8;s/SMKakade", "google_scholar": "rlmROVsAAAAJ;RqwU8xsAAAAJ;owqhKD8AAAAJ;nUlanA8AAAAJ;https://scholar.google.com.tw/citations?user=wb-DKCIAAAAJ", "orcid": ";;;;", "linkedin": ";;;nan-jiang-28139937/;", "or_profile": "~Tengyang_Xie1;~Dylan_J_Foster1;~Yu_Bai1;~Nan_Jiang2;~Sham_M._Kakade1", "aff": "Department of Computer Science, University of Illinois, Urbana Champaign;Microsoft Research;Salesforce Research;University of Illinois, Urbana Champaign;Harvard University", "aff_domain": "cs.illinois.edu;microsoft.com;salesforce.com;illinois.edu;harvard.edu", "position": "PhD student;Principal Researcher;Research Scientist;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nxie2023the,\ntitle={The Role of Coverage in Online Reinforcement Learning},\nauthor={Tengyang Xie and Dylan J Foster and Yu Bai and Nan Jiang and Sham M. Kakade},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=LQIjzPdDt3q}\n}", "github": "", "project": "", "reviewers": "p7d7;PRRf;LEoz", "pdf_size": 649764, "recommendation": "5;8;8", "confidence": "3;4;3", "correctness": "3;4;4", "technical_novelty": "2;3;3", "empirical_novelty": "0;3;0", "wc_summary_paper": "120;36;77", "wc_strength_and_weaknesses": "159;111;175", "wc_clarity_quality_novelty_and_reproducibility": "15;13;80", "wc_summary_review": "30;1;20", "wc_review": "324;161;352", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "385;761;142", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 77.66666666666667, 34.29609631171195 ], "wc_strength_and_weaknesses_avg": [ 148.33333333333334, 27.19477073916152 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.0, 31.123410267299864 ], "wc_summary_review_avg": [ 17.0, 12.027745701779143 ], "wc_review_avg": [ 279.0, 84.21797116213776 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 429.3333333333333, 254.6426690264004 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 1.0, "gs_citation": 89, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15999970943068032040&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=LQIjzPdDt3q", "email": "cs.illinois.edu;microsoft.com;salesforce.com;illinois.edu;harvard.edu", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "University of Illinois Urbana-Champaign;Microsoft;Salesforce;Harvard University", "aff_unique_dep": "Department of Computer Science;Microsoft Research;Salesforce Research;", "aff_unique_url": "https://illinois.edu;https://www.microsoft.com/en-us/research;https://research.salesforce.com;https://www.harvard.edu", "aff_unique_abbr": "UIUC;MSR;Salesforce;Harvard", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "LR_KWiUgS8F", "title": "AD-NEGF: An End-to-End Differentiable Quantum Transport Simulator for Sensitivity Analysis and Inverse Problems", "track": "main", "status": "Reject", "tldr": "We provide to the best of our knowledge the first end-to-end differentiable quantum transport simulator, which can compute differential quantities and perform atomic level device optimization.", "abstract": "Quantum transport theory describes transport phenomena from first principles, which is essential for domains such as semiconductor fabrication. As a representative, the Non-Equilibrium Green Function (NEGF) method achieves superiority in numerical accuracy. However, its tremendous computational cost makes it unbearable for high-throughput simulation tasks such as sensitivity analysis, inverse design, etc. In this work, we propose AD-NEGF, to the best of our knowledge the first Automatic Differentiation (AD) based quantum transport simulator. AD-NEGF calculates gradient information efficiently by utilizing automatic differentiation and implicit layer techniques, while guaranteeing the correctness of the forward simulation. Such gradient information enables accurate and efficient calculation of differential physical quantities and solving inverse problems that are intractable by traditional optimization methods.", "keywords": "Quantum Transport;Non-Equilibrium Green Function;Automatic Differentiation;Differentiable Programming;Deep Learning;Sensitivity Analysis;Inverse Design", "primary_area": "", "supplementary_material": "/attachment/8037ca268ccd4f2153909b547ecf3f6f54bbb74a.zip", "author": "Zhanghao Zhouyin;Xiang Chen;Peng Zhang;Jun Wang;Lei Wang;Hong Guo", "authorids": "~Zhanghao_Zhouyin1;~Xiang_Chen8;~Peng_Zhang17;~Jun_Wang2;~Lei_Wang2;hong.guo@mcgill.ca", "gender": "M;;M;M;;", "homepage": ";;http://cic.tju.edu.cn/faculty/zhangpeng/index.html;http://www0.cs.ucl.ac.uk/staff/jun.wang/;;", "dblp": ";;21/1048-2%20;w/JunWang12;;", "google_scholar": "vSMmfvUAAAAJ;2cj3OTIAAAAJ;tvDb5_cAAAAJ;https://scholar.google.co.uk/citations?user=wIE1tY4AAAAJ;;", "orcid": ";;0000-0003-0228-9330;;;", "linkedin": "%E5%BC%A0%E7%9A%93-%E5%91%A8%E5%AF%85-4aaa471a5/?locale=en_US;;;;;", "or_profile": "~Zhanghao_Zhouyin1;~Xiang_Chen8;~Peng_Zhang17;~Jun_Wang2;~Lei_Wang2;hong.guo@mcgill.ca", "aff": "Tianjin University;Huawei Technologies Ltd.;Tianjin University;University College London;;", "aff_domain": "tju.edu.cn;huawei.com;tju.edu.cn;ucl.ac.uk;;", "position": "MS student;Researcher;Full Professor;Professor;;", "bibtex": "@misc{\nzhouyin2023adnegf,\ntitle={{AD}-{NEGF}: An End-to-End Differentiable Quantum Transport Simulator for Sensitivity Analysis and Inverse Problems},\nauthor={Zhanghao Zhouyin and Xiang Chen and Peng Zhang and Jun Wang and Lei Wang and Hong Guo},\nyear={2023},\nurl={https://openreview.net/forum?id=LR_KWiUgS8F}\n}", "github": "", "project": "", "reviewers": "5rbi;uefF;4fBj;FLBL", "site": "https://openreview.net/forum?id=LR_KWiUgS8F", "pdf_size": 1589808, "recommendation": "3;5;6;6", "confidence": "3;2;2;3", "correctness": "2;3;3;4", "technical_novelty": "2;3;3;2", "empirical_novelty": "0;3;0;3", "wc_summary_paper": "72;148;46;28", "wc_strength_and_weaknesses": "436;209;59;67", "wc_clarity_quality_novelty_and_reproducibility": "116;110;37;9", "wc_summary_review": "92;65;60;35", "wc_review": "716;532;202;139", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "611;721;146;479", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 2.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.5, 1.5 ], "wc_summary_paper_avg": [ 73.5, 45.76843890717707 ], "wc_strength_and_weaknesses_avg": [ 192.75, 152.5915708681184 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.0, 46.12483062299525 ], "wc_summary_review_avg": [ 63.0, 20.23610634484806 ], "wc_review_avg": [ 397.25, 236.9465920835326 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 489.25, 215.90319011075312 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.40824829046386296, "corr_recommendation_correctness": 0.8660254037844386, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10186073947960714354&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Tianjin University;Huawei;University College London", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "http://www.tju.edu.cn;https://www.huawei.com;https://www.ucl.ac.uk", "aff_unique_abbr": "TJU;Huawei;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "Fuzzy Alignments in Directed Acyclic Graph for Non-Autoregressive Machine Translation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11822", "id": "LSz-gQyd0zE", "poster": "/media/PosterPDFs/ICLR%202023/11822.png?t=1682325319.925473", "openreview": "https://openreview.net/forum?id=LSz-gQyd0zE", "slides": "https://iclr.cc/virtual/2023/poster/11822", "video": "https://iclr.cc/virtual/2023/poster/11822", "author_site": "Zhengrui Ma, Chenze Shao, Shangtong Gui, Min Zhang, Yang Feng", "tldr": "We introduce a fuzzy alignment objective in Directed Acyclic Graph for NAT, setting a new state of the art for NAT on the raw training data.", "abstract": "Non-autoregressive translation (NAT) reduces the decoding latency but suffers from performance degradation due to the multi-modality problem. Recently, the structure of directed acyclic graph has achieved great success in NAT, which tackles the multi-modality problem by introducing dependency between vertices. However, training it with negative log-likelihood loss implicitly requires a strict alignment between reference tokens and vertices, weakening its ability to handle multiple translation modalities. In this paper, we hold the view that all paths in the graph are fuzzily aligned with the reference sentence. We do not require the exact alignment but train the model to maximize a fuzzy alignment score between the graph and reference, which takes captured translations in all modalities into account. Extensive experiments on major WMT benchmarks show that our method substantially improves translation performance and increases prediction confidence, setting a new state of the art for NAT on the raw training data.", "keywords": "Machine translation;Non-autoregressive generation;Fuzzy alignment", "primary_area": "", "supplementary_material": "", "author": "Zhengrui Ma;Chenze Shao;Shangtong Gui;Min Zhang;Yang Feng", "authorids": "~Zhengrui_Ma1;~Chenze_Shao1;~Shangtong_Gui1;~Min_Zhang9;~Yang_Feng4", "gender": "M;M;M;M;", "homepage": "http://nlp.ict.ac.cn/~mazhengrui;;;https://zhangmin-nlp-ai.github.io/;http://people.ucas.edu.cn/~yangfeng?language=en", "dblp": "276/3133;227/3123;342/3895.html;83/5342-5;07/6095-4.html", "google_scholar": "dUgq6tEAAAAJ;LH_rZf8AAAAJ;OZ0ZTxUAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": ";;%E5%B0%9A%E5%BD%A4-%E6%A1%82-9598a6199/;;", "or_profile": "~Zhengrui_Ma1;~Chenze_Shao1;~Shangtong_Gui1;~Min_Zhang9;~Yang_Feng4", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Harbin Institute of Technology, Shenzhen;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn;hit.edu.cn;ict.ac.cn", "position": "PhD student;PhD student;MS student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nma2023fuzzy,\ntitle={Fuzzy Alignments in Directed Acyclic Graph for Non-Autoregressive Machine Translation},\nauthor={Zhengrui Ma and Chenze Shao and Shangtong Gui and Min Zhang and Yang Feng},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=LSz-gQyd0zE}\n}", "github": "", "project": "", "reviewers": "Qie9;zAdY;5ZNc", "pdf_size": 532358, "recommendation": "5;6;8", "confidence": "4;3;5", "correctness": "4;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "60;78;76", "wc_strength_and_weaknesses": "62;142;251", "wc_clarity_quality_novelty_and_reproducibility": "25;50;14", "wc_summary_review": "20;52;16", "wc_review": "167;322;357", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "268;436;617", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 71.33333333333333, 8.055363982396383 ], "wc_strength_and_weaknesses_avg": [ 151.66666666666666, 77.46110134914312 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.666666666666668, 15.062831370260005 ], "wc_summary_review_avg": [ 29.333333333333332, 16.110727964792762 ], "wc_review_avg": [ 282.0, 82.56310717667223 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 440.3333333333333, 142.5115979685708 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.6546536707079772, "corr_recommendation_correctness": -0.7559289460184545, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5428556331216850153&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=LSz-gQyd0zE", "email": "ict.ac.cn;ict.ac.cn;ict.ac.cn;hit.edu.cn;ict.ac.cn", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Chinese Academy of Sciences;Harbin Institute of Technology", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;http://en.hhit.edu.cn/", "aff_unique_abbr": "CAS;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "LTvSyvRaJO", "title": "Improving Vision Attention with Random Walk Graph Kernel", "track": "main", "status": "Withdraw", "tldr": "We approach a novel linear attention mechanism based on random walk graph kernel, can be widely used in vision transformer with long sequence inputs", "abstract": "Vision transformers, which propose to tokenize an image and introduce attention mechanism to learn cross-token relationship, have advanced many computer vision tasks.However, the attention module owns a quadratic computational complexity and hence suffers from slow computing speed and high memory cost, hindering it from handling long sequences of tokens.Some attempts optimize the quadratic attention with linear approximation yet observe undesired performance drop.This work balances the trade-off between modeling efficiency and capacity of vision attention.We notice that, by treating queries and keys as nodes in a graph, existing algorithms are akin to modeling one-step interaction between nodes.To strengthen the cross-node connection for a more representative attention, we introduce multi-step interaction, which is equivalent to solving an inverse matrix as in random walk graph kernel.We then come up with a new strategy to construct queries and keys, with the help of bipartite graph, to ease the calculation of matrix inversion.The effectiveness of our approach is verified on various visual tasks. We also make it possible to learn a vision transformer with extremely long sequences of tokens.We achieved the competitive results on the semantic segmentation task with 15% fewer parameters and 10-25% less computation. In addition, the vision transformer based quantization method can be applied to 512x512 or even 1024x1024 resolution images. Code will be made publicly available.", "keywords": "vision transformer;long sequence modeling", "primary_area": "", "supplementary_material": "", "author": "Yifei Zhang;Kecheng Zheng;Yujun Shen;Yu Liu;Lianghua Huang;Zhantao Yang;Han Zhang;Deli Zhao;Fan Cheng", "authorids": "~Yifei_Zhang4;~Kecheng_Zheng2;~Yujun_Shen1;~Yu_Liu23;~Lianghua_Huang2;~Zhantao_Yang1;~Han_Zhang16;~Deli_Zhao1;~Fan_Cheng1", "gender": ";M;;M;M;M;M;M;M", "homepage": ";https://zkcys001.github.io/;;https://github.com/liuyuyuil;;;https://github.com/bibona;https://zhaodeli.github.io;http://www.cs.sjtu.edu.cn/~chengfan", "dblp": ";228/1362;;97/2274-63;166/6155;285/8489.html;26/4189-10;77/1992;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;hMDQifQAAAAJ;;8zksQb4AAAAJ;JYVCn3AAAAAJ;Fz3X5FwAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=sFfkf94AAAAJ", "orcid": "0009-0005-4831-883X;;;;0000-0002-9686-9354;0000-0003-2765-295X;;0000-0002-8838-578X;0000-0002-4307-6334", "linkedin": "zhang-yf-2bb8a61a1;;;;;;;;", "or_profile": "~Yifei_Zhang4;~Kecheng_Zheng2;~Yujun_Shen1;~Yu_Liu23;~Lianghua_Huang2;~Zhantao_Yang1;~Han_Zhang16;~Deli_Zhao1;~Fan_Cheng1", "aff": "Shanghai Jiaotong University;Zhejiang University;;Alibaba Group;Alibaba Group;Shanghai Jiaotong University;Shanghai Jiaotong University;Alibaba Group;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;zju.edu.cn;;alibaba-inc.com;aliabba-inc.com;sjtu.edu.cn;sjtu.edu.cn;alibaba-inc.com;sjtu.edu.cn", "position": "PhD student;Postdoc;;Researcher;Researcher;PhD student;PhD student;Director;Associate Professor", "bibtex": "@misc{\nzhang2023improving,\ntitle={Improving Vision Attention with Random Walk Graph Kernel},\nauthor={Yifei Zhang and Kecheng Zheng and Yujun Shen and Yu Liu and Lianghua Huang and Zhantao Yang and Han Zhang and Deli Zhao and Fan Cheng},\nyear={2023},\nurl={https://openreview.net/forum?id=LTvSyvRaJO}\n}", "github": "", "project": "", "reviewers": "gYvo;7tZq;rXiy;u4B9;9bsS", "site": "https://openreview.net/forum?id=LTvSyvRaJO", "pdf_size": 803441, "recommendation": "3;3;5;5;5", "confidence": "4;5;3;4;3", "correctness": "3;3;3;4;4", "technical_novelty": "2;3;3;3;4", "empirical_novelty": "2;2;2;2;4", "wc_summary_paper": "84;42;61;67;79", "wc_strength_and_weaknesses": "89;288;175;256;212", "wc_clarity_quality_novelty_and_reproducibility": "57;13;45;42;34", "wc_summary_review": "214;17;41;54;29", "wc_review": "444;360;322;419;354", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 4.2, 0.9797958971132712 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 3.0, 0.6324555320336759 ], "empirical_novelty_avg": [ 2.4, 0.8 ], "wc_summary_paper_avg": [ 66.6, 14.786480311419618 ], "wc_strength_and_weaknesses_avg": [ 204.0, 69.12307863514182 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.2, 14.606847709208173 ], "wc_summary_review_avg": [ 71.0, 72.55067194726732 ], "wc_review_avg": [ 379.8, 44.84818836920841 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.7637626158259733, "corr_recommendation_correctness": 0.6666666666666667, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CIUDeatgmP0J:scholar.google.com/&scioq=Improving+Vision+Attention+with+Random+Walk+Graph+Kernel&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;2;0;0;2;0", "aff_unique_norm": "Shanghai Jiao Tong University;Zhejiang University;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.zju.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "SJTU;ZJU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "LUOSN8opID1", "title": "Constrained Hierarchical Deep Reinforcement Learning with Differentiable Formal Specifications", "track": "main", "status": "Reject", "tldr": "This paper uses differentiable formal specifications to constrain the policy updates in hierarchical deep reinforcement learning. ", "abstract": "Formal logic specifications are a useful tool to describe desired agent behavior and have been explored as a means to shape rewards in Deep Reinforcement Learning (DRL) systems over a variety of problems and domains. Prior work, however, has failed to consider the possibility of making these specifications differentiable, which would yield a more informative signal of the objective via the specification gradient. This paper examines precisely such an approach by exploring a Lagrangian method to constrain policy updates using a differentiable style of temporal logic specifications that associates logic formulae with real-valued quantitative semantics. This constrained learning mechanism is then used in a hierarchical setting where a high-level specification-guided neural network path planner works with a low-level control policy to navigate through planned waypoints. The effectiveness of our approach is demonstrated over four robot dynamics with five different types of Linear Temporal Logic (LTL) specifications. Our demo videos are collected at https://sites.google.com/view/schrl.", "keywords": "Deep Reinforcement Learning;Differentiable Formal Specification Language;Robot Navigation;Robot Planning and Control", "primary_area": "", "supplementary_material": "", "author": "Zikang Xiong;Joe Eappen;Ahmed H Qureshi;Suresh Jagannathan", "authorids": "~Zikang_Xiong1;~Joe_Eappen2;~Ahmed_H_Qureshi1;~Suresh_Jagannathan1", "gender": "M;M;M;M", "homepage": "https://xiong.zikang.me;https://jeappen.github.io/;http://www.cs.purdue.edu/homes/suresh;https://qureshiahmed.github.io/", "dblp": "https://dblp.uni-trier.de/pid/242/4529.html;267/5377;j/SJagannathan.html;222/2796", "google_scholar": "H-EoAgYAAAAJ;98R6dEQAAAAJ;https://scholar.google.com/scholar?hl=en;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0001-9386-5545;0000-0001-6871-2424;", "linkedin": ";jeappen/;;", "or_profile": "~Zikang_Xiong1;~Joe_Eappen2;~Suresh_Jagannathan1;~Ahmed_Qureshi1", "aff": "Purdue University;J.P. Morgan Chase; Purdue University;Purdue University", "aff_domain": "purdue.edu;jpmorgan.com;cs.purdue.edu;purdue.edu", "position": "PhD student;Intern;Full Professor;Assistant Professor", "bibtex": "@misc{\nxiong2023constrained,\ntitle={Constrained Hierarchical Deep Reinforcement Learning with Differentiable Formal Specifications},\nauthor={Zikang Xiong and Joe Eappen and Ahmed H Qureshi and Suresh Jagannathan},\nyear={2023},\nurl={https://openreview.net/forum?id=LUOSN8opID1}\n}", "github": "", "project": "", "reviewers": "Pa7M;Ea5U;KU3V;urxX", "site": "https://openreview.net/forum?id=LUOSN8opID1", "pdf_size": 2364036, "recommendation": "3;5;6;8", "confidence": "5;3;4;4", "correctness": "2;4;4;4", "technical_novelty": "1;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "53;49;114;64", "wc_strength_and_weaknesses": "676;243;59;255", "wc_clarity_quality_novelty_and_reproducibility": "113;30;228;110", "wc_summary_review": "123;44;8;28", "wc_review": "965;366;409;457", "wc_reply_reviewers": "682;85;19;0", "wc_reply_authors": "3442;1371;377;181", "reply_reviewers": "2;1;1;0", "reply_authors": "7;4;1;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 70.0, 25.990382836734053 ], "wc_strength_and_weaknesses_avg": [ 308.25, 226.0855756124216 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 120.25, 70.55627186862979 ], "wc_summary_review_avg": [ 50.75, 43.61980628109208 ], "wc_review_avg": [ 549.25, 242.18213703739588 ], "wc_reply_reviewers_avg": [ 196.5, 282.07312881591537 ], "wc_reply_authors_avg": [ 1342.75, 1293.2502416392583 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.25, 2.48746859276655 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.39223227027636803, "corr_recommendation_correctness": 0.8006407690254357, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17998645987101003638&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Purdue University;JPMorgan Chase & Co.", "aff_unique_dep": ";", "aff_unique_url": "https://www.purdue.edu;https://www.jpmorganchase.com", "aff_unique_abbr": "Purdue;JPM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "LUQ2Csy_LUm", "title": "INSPIRE: A Framework for Integrating Individual User Preferences in Recourse", "track": "main", "status": "Reject", "tldr": "", "abstract": "Most recourse generation approaches optimize for indirect distance-based metrics like diversity, proximity, and sparsity, or a shared cost function across all users to generate recourse. The latter is an unrealistic assumption because users can have diverse feature preferences which they might be willing to act upon and any changes to any undesirable feature might lead to an impractical recourse. In this work, we propose a novel framework to incorporate the individuality of users in both recourse generation and evaluation procedure by focusing on the cost incurred by a user when opting for a recourse. To achieve this, we first propose an objective function, Expected Minimum Cost (EMC) that is based on two key ideas: (1) the user should be comfortable adopting at least one solution when presented with multiple options, and (2) we can approximately optimize for users' satisfaction even when their true cost functions (i.e., costs associated with feature changes) are unknown. EMC samples multiple plausible cost functions based on diverse feature preferences in the population and then finds a recourse set with one good solution for each category of user preferences. We optimize EMC with a novel discrete optimization algorithm, Cost-Optimized Local Search (COLS), that is guaranteed to improve the quality of the recourse set over iterations. Our evaluation framework computes the fraction of satisfied users by simulating each user's cost function and then computing the incurred cost for the provided recourse set. Experimental evaluation on popular real-world datasets demonstrates that our method satisfies up to 25.9% more users compared to strong baselines. Moreover, the human evaluation shows that our recourses are preferred more than twice as often as the strongest baseline.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/92cf05a3bc50bbc35e1e71364895e4cf6d6b6654.zip", "author": "Prateek Yadav;Peter Hase;Mohit Bansal", "authorids": "~Prateek_Yadav1;~Peter_Hase1;~Mohit_Bansal2", "gender": "M;;M", "homepage": "http://prateek-yadav.github.io;;https://www.cs.unc.edu/~mbansal/", "dblp": "220/5741;;32/5243.html", "google_scholar": "1lXhc0kAAAAJ;;DN8QtscAAAAJ", "orcid": ";;", "linkedin": "prateek-yadav-40bb34a8;;", "or_profile": "~Prateek_Yadav1;~Peter_Hase1;~Mohit_Bansal2", "aff": "Department of Computer Science, University of North Carolina, Chapel Hill;;University of North Carolina at Chapel Hill", "aff_domain": "cs.unc.edu;;unc.edu", "position": "Graduate Student;;Full Professor", "bibtex": "@misc{\nyadav2023inspire,\ntitle={{INSPIRE}: A Framework for Integrating Individual User Preferences in Recourse},\nauthor={Prateek Yadav and Peter Hase and Mohit Bansal},\nyear={2023},\nurl={https://openreview.net/forum?id=LUQ2Csy_LUm}\n}", "github": "", "project": "", "reviewers": "Zf2f;93yL;7m29;guRt;LeNz", "site": "https://openreview.net/forum?id=LUQ2Csy_LUm", "pdf_size": 1284511, "recommendation": "5;5;6;6;8", "confidence": "4;4;2;4;2", "correctness": "2;4;3;3;4", "technical_novelty": "3;2;3;3;3", "empirical_novelty": "2;2;3;3;2", "wc_summary_paper": "144;92;44;108;106", "wc_strength_and_weaknesses": "577;318;165;313;519", "wc_clarity_quality_novelty_and_reproducibility": "74;49;133;375;70", "wc_summary_review": "52;50;41;97;66", "wc_review": "847;509;383;893;761", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "1045;590;815;698;580", "reply_reviewers": "0;0;0;0;0", "reply_authors": "3;2;2;2;2", "recommendation_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "correctness_avg": [ 3.2, 0.7483314773547882 ], "technical_novelty_avg": [ 2.8, 0.39999999999999997 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 98.8, 32.33821269025238 ], "wc_strength_and_weaknesses_avg": [ 378.4, 150.11675456124144 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 140.2, 120.67377511290512 ], "wc_summary_review_avg": [ 61.2, 19.610201426808448 ], "wc_review_avg": [ 678.6, 198.62487256131848 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 745.6, 172.25399850221186 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.74535599249993, "corr_recommendation_correctness": 0.48795003647426666, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MjT9ba8hUsEJ:scholar.google.com/&scioq=INSPIRE:+A+Framework+for+Integrating+Individual+User+Preferences+in+Recourse&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of North Carolina", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.unc.edu", "aff_unique_abbr": "UNC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "LUdVQkS2CK", "title": "Gamma Sampling: Fine-grained Controlling Language Models without Training", "track": "main", "status": "Reject", "tldr": "We propose a new simple guided decoding method which does not require any training data to achieve fine-grained controllable text generation while maintaining a fast generation speed.", "abstract": "The dominant approaches for controlling language models achieve prominence in controlling high-level attributes (e.g. topic and sentiment). However, these methods often require condition-specific data or are computationally expensive. We propose a new simple guided decoding method, Gamma Sampling, which does not require any training data to achieve fine-grained controllable text generation while maintaining a fast generation speed. Gamma Sampling introduces attribute-related information (provided by humans or language models themselves) into the sampling process to guide language models to generate texts with desired attributes. Since no training is involved, Gamma Sampling can be easily applied to any language model for controllable text generation. Through experiments, we show that Gamma Sampling-steered GPT2-small (117M) outperforms baselines such as PPLM (345M) and CTRL (1.6B) in diversity, attribute relevance, and overall quality of generated samples.", "keywords": "guided-decoding;fine-grained control;data-free;fast generation speed", "primary_area": "", "supplementary_material": "/attachment/3f476439ad3386a2c88f11ab9dc9977cdfc8f2a8.zip", "author": "Shangda Wu;Maosong Sun", "authorids": "~Shangda_Wu1;~Maosong_Sun1", "gender": ";M", "homepage": "https://github.com/sanderwood;https://www.cs.tsinghua.edu.cn/csen/info/1312/4394.htm", "dblp": "309/6600;95/3291-1", "google_scholar": ";https://scholar.google.com.tw/citations?user=zIgT0HMAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Shangda_Wu1;~Maosong_Sun1", "aff": "Central Conservatory of Music;Tsinghua University", "aff_domain": "ccom.edu.cn;tsinghua.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@misc{\nwu2023gamma,\ntitle={Gamma Sampling: Fine-grained Controlling Language Models without Training},\nauthor={Shangda Wu and Maosong Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=LUdVQkS2CK}\n}", "github": "", "project": "", "reviewers": "aPW9;VRfW;UMzU;Yejv", "site": "https://openreview.net/forum?id=LUdVQkS2CK", "pdf_size": 3106760, "recommendation": "5;5;5;6", "confidence": "3;5;5;4", "correctness": "3;4;2;2", "technical_novelty": "3;3;2;3", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "109;120;64;165", "wc_strength_and_weaknesses": "166;486;613;198", "wc_clarity_quality_novelty_and_reproducibility": "41;5;6;127", "wc_summary_review": "52;33;39;53", "wc_review": "368;644;722;543", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "258;401;766;358", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 114.5, 35.92005011132362 ], "wc_strength_and_weaknesses_avg": [ 365.75, 189.49455796935172 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.75, 49.650654577759596 ], "wc_summary_review_avg": [ 44.25, 8.525696452489967 ], "wc_review_avg": [ 569.25, 132.3921731070232 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 445.75, 192.03694306044346 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.17407765595569782, "corr_recommendation_correctness": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:r6CXR3Mp2zAJ:scholar.google.com/&scioq=Gamma+Sampling:+Fine-grained+Controlling+Language+Models+without+Training&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Central Conservatory of Music;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ccom.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "CCOM;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "LUql3ZOFwFD", "title": "Differentially Private Conditional Text Generation For Synthetic Data Production", "track": "main", "status": "Reject", "tldr": "synthesis of private text classification datasets via conditional text generation through GPT-2 fine-tuned with DP-SGD", "abstract": "Companies have faced increasing pressure in recent years to anonymize user collected data when sharing internally or to third parties. Text data in particular contains copious amounts of personally identifiable information that has proven to be difficult to de-identify while remain useful for the party of interest. Previous works have suggested that synthetic text generation could provide a promising avenue to curate high performant and private datasets. In this paper, we introduce an approach to synthesize high utility text classification datasets by performing conditional generation through a large language model, distilGPT2, while providing measurable guarantees via differential privacy. We show that naive approaches suffer heavily from utility loss by entangling task-relevant factors in the transformer embedding space, making controlled generation more difficult. We analyze how incorporating a secondary learning objective can improve the performance of the generative model, improving utility of the generated data.", "keywords": "differential privacy;conditional text generation;NLP", "primary_area": "", "supplementary_material": "", "author": "Pranav Putta;Ander Steele;Joseph W Ferrara", "authorids": "~Pranav_Putta1;~Ander_Steele1;~Joseph_W_Ferrara1", "gender": "M;;M", "homepage": "https://github.com/pranav-putta;;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";ander-steele-6747569b/;joewferrara/", "or_profile": "~Pranav_Putta1;~Ander_Steele1;~Joseph_W_Ferrara1", "aff": "Georgia Institute of Technology;;", "aff_domain": "gatech.edu;;", "position": "Undergrad student;;", "bibtex": "@misc{\nputta2023differentially,\ntitle={Differentially Private Conditional Text Generation For Synthetic Data Production},\nauthor={Pranav Putta and Ander Steele and Joseph W Ferrara},\nyear={2023},\nurl={https://openreview.net/forum?id=LUql3ZOFwFD}\n}", "github": "", "project": "", "reviewers": "EbEe;fPMV;Hw7g;hB6f", "site": "https://openreview.net/forum?id=LUql3ZOFwFD", "pdf_size": 2153248, "recommendation": "3;3;3;5", "confidence": "3;4;4;4", "correctness": "3;2;3;4", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;1;2;3", "wc_summary_paper": "38;231;26;62", "wc_strength_and_weaknesses": "123;612;222;117", "wc_clarity_quality_novelty_and_reproducibility": "17;60;26;45", "wc_summary_review": "31;54;9;24", "wc_review": "209;957;283;248", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 89.25, 82.85944424143817 ], "wc_strength_and_weaknesses_avg": [ 268.5, 202.65549585441792 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.0, 16.688319268278637 ], "wc_summary_review_avg": [ 29.5, 16.224980739587952 ], "wc_review_avg": [ 424.25, 308.69513682596295 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4377348598873047636&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "LV8OmADmoOe", "title": "Improving the Transferability of Adversarial Attacks through Experienced Precise Nesterov Momentum", "track": "main", "status": "Reject", "tldr": "Our proposed EPN is more effective than traditional momentum in improving transferability, and extensive experiments show that EPN-based attacks are more transferable than SOTA.", "abstract": "Deep Neural Networks are vulnerable to adversarial attacks, which makes adversarial attacks serve as a method to evaluate the robustness of DNNs. However, adversarial attacks have high white-box attack success rates but poor transferability, making black-box attacks impracticable in the real world. Momentum-based attacks were proposed to accelerate optimization to improve transferability. Nevertheless, conventional momentum-based attacks accelerate optimization inefficiently during early iterations since the initial value of momentum is zero, which leads to unsatisfactory transferability. Therefore, we propose Experienced Momentum (EM), which is the pre-trained momentum. Initializing the momentum to EM can help accelerate optimization during the early iterations. Moreover, the pre-update of conventional Nesterov momentum based attacks is rough, prompting us to propose Precise Nesterov momentum (PN). PN refines the pre-update by considering the gradient of the current data point. Finally, we integrate EM with PN as Experienced Precise Nesterov momentum (EPN) to further improve transferability. Extensive experiments against normally trained and defense models demonstrate that our EPN is more effective than conventional momentum in the improvement of transferability. Specifically, the attack success rates of our EPN-based attacks are $\\sim$11.9% and $\\sim$13.1% higher than conventional momentum-based attacks on average against normally trained and defense models, respectively.", "keywords": "adversarial attacks;transferability;black-box;momentum", "primary_area": "", "supplementary_material": "/attachment/98fe9a3feedfd5d72f12bb16be614672d24844d5.zip", "author": "Hao Wu;Jinwei Wang;Jiawei Zhang;Bin Ma;Xiangyang Luo;wang yu", "authorids": "~Hao_Wu35;~Jinwei_Wang1;~Jiawei_Zhang10;~Bin_Ma5;~Xiangyang_Luo2;~wang_yu3", "gender": "M;M;M;;M;M", "homepage": "https://github.com/tlemangen;https://cc.nankai.edu.cn/2021/0323/c13619a559015/page.htm;;;https://ieeexplore.ieee.org/author/37399489600;https://github.com/assassin-wy", "dblp": ";;;;;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;I6eXZ9AAAAAJ;;;", "orcid": "0000-0003-2324-2152;;0000-0003-1745-7763;;0000-0003-3225-4649;", "linkedin": ";;;;;", "or_profile": "~Hao_Wu35;~Jinwei_Wang1;~Jiawei_Zhang10;~Bin_Ma5;~Xiangyang_Luo2;~wang_yu3", "aff": "Nanjing University of Information Science and Technology;NUIST;Nanjing University of Information Science and Technology;;Information Engineering University;Hangzhou Normal University", "aff_domain": "nuist.edu.cn;nuist.edu.cn;nuist.edu.cn;;zhaosheng.plaieu.edu.cn;stu.hznu.edu.cn", "position": "MS student;Full Professor;PhD student;;Full Professor;MS student", "bibtex": "@misc{\nwu2023improving,\ntitle={Improving the Transferability of Adversarial Attacks through Experienced Precise Nesterov Momentum},\nauthor={Hao Wu and Jinwei Wang and Jiawei Zhang and Bin Ma and Xiangyang Luo and wang yu},\nyear={2023},\nurl={https://openreview.net/forum?id=LV8OmADmoOe}\n}", "github": "", "project": "", "reviewers": "shJD;WpNN;u7X7;aDJy", "site": "https://openreview.net/forum?id=LV8OmADmoOe", "pdf_size": 949934, "recommendation": "3;3;5;6", "confidence": "5;5;4;4", "correctness": "2;2;3;2", "technical_novelty": "2;1;2;3", "empirical_novelty": "1;0;0;3", "wc_summary_paper": "116;197;75;19", "wc_strength_and_weaknesses": "315;523;93;117", "wc_clarity_quality_novelty_and_reproducibility": "20;26;33;61", "wc_summary_review": "15;44;11;45", "wc_review": "466;790;212;242", "wc_reply_reviewers": "63;115;82;177", "wc_reply_authors": "1458;939;906;413", "reply_reviewers": "1;1;1;1", "reply_authors": "4;3;2;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 101.75, 64.88210462061168 ], "wc_strength_and_weaknesses_avg": [ 262.0, 173.5770722186545 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.0, 15.700318468107582 ], "wc_summary_review_avg": [ 28.75, 15.81731646013318 ], "wc_review_avg": [ 427.5, 231.15957691603435 ], "wc_reply_reviewers_avg": [ 109.25, 43.31498008772485 ], "wc_reply_authors_avg": [ 929.0, 369.7046118186788 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.9622504486493761, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9262852986479718296&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "Nanjing University of Information Science and Technology;Nanjing University of Information Science & Technology;Information Engineering University;Hangzhou Normal University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.nuist.edu.cn;http://www.nuist.edu.cn/;;https://www.hghu.edu.cn", "aff_unique_abbr": ";NUIST;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Betty: An Automatic Differentiation Library for Multilevel Optimization", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12030", "id": "LV_MeMS38Q9", "poster": "", "openreview": "https://openreview.net/forum?id=LV_MeMS38Q9", "slides": "https://iclr.cc/virtual/2023/poster/12030", "video": "https://iclr.cc/virtual/2023/poster/12030", "author_site": "Sang Choe, Willie Neiswanger, Pengtao Xie, Eric Xing", "tldr": "We develop a scalable, user-friendly, and modular automatic differentiation library for multilevel optimization based on a novel interpretation of multilevel optimization as a dataflow graph.", "abstract": "Gradient-based multilevel optimization (MLO) has gained attention as a framework for studying numerous problems, ranging from hyperparameter optimization and meta-learning to neural architecture search and reinforcement learning. However, gradients in MLO, which are obtained by composing best-response Jacobians via the chain rule, are notoriously difficult to implement and memory/compute intensive. We take an initial step towards closing this gap by introducing Betty, a software library for large-scale MLO. At its core, we devise a novel dataflow graph for MLO, which allows us to (1) develop efficient automatic differentiation for MLO that reduces the computational complexity from $\\mathcal{O}(d^3)$ to $\\mathcal{O}(d^2)$, (2) incorporate systems support such as mixed-precision and data-parallel training for scalability, and (3) facilitate implementation of MLO programs of arbitrary complexity while allowing a modular interface for diverse algorithmic and systems design choices. We empirically demonstrate that Betty can be used to implement an array of MLO programs, while also observing up to 11% increase in test accuracy, 14% decrease in GPU memory usage, and 20% decrease in training wall time over existing implementations on multiple benchmarks. We also showcase that Betty enables scaling MLO to models with hundreds of millions of parameters. We open-source the code at https://github.com/leopard-ai/betty.", "keywords": "Multilevel Optimization;Automatic Differentiation;Bilevel Optimization;Meta Learning;Software Library", "primary_area": "", "supplementary_material": "/attachment/50604f54232b54885bc61c52e0d6de2f1634a8d4.zip", "author": "Sang Keun Choe;Willie Neiswanger;Pengtao Xie;Eric Xing", "authorids": "~Sang_Keun_Choe1;~Willie_Neiswanger2;~Pengtao_Xie3;~Eric_Xing1", "gender": ";M;M;M", "homepage": ";https://willieneis.github.io/;https://pengtaoxie.github.io/;http://www.cs.cmu.edu/~epxing/", "dblp": ";120/7593.html;133/1998;36/3855", "google_scholar": ";QwKHApEAAAAJ;cnncomYAAAAJ;https://scholar.google.com.tw/citations?user=5pKTRxEAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Sang_Keun_Choe1;~Willie_Neiswanger2;~Pengtao_Xie3;~Eric_Xing1", "aff": ";Stanford University;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University", "aff_domain": ";stanford.edu; ;cs.cmu.edu", "position": ";Postdoc;Graduate Student;Full Professor", "bibtex": "@inproceedings{\nchoe2023betty,\ntitle={Betty: An Automatic Differentiation Library for Multilevel Optimization},\nauthor={Sang Keun Choe and Willie Neiswanger and Pengtao Xie and Eric Xing},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=LV_MeMS38Q9}\n}", "github": "", "project": "", "reviewers": "5V8s;G2gf;gnFX;MAHC", "pdf_size": 586591, "recommendation": "6;8;8;10", "confidence": "3;3;3;4", "correctness": "4;4;4;3", "technical_novelty": "3;4;3;3", "empirical_novelty": "3;2;3;4", "wc_summary_paper": "86;31;96;116", "wc_strength_and_weaknesses": "152;10;96;88", "wc_clarity_quality_novelty_and_reproducibility": "42;19;26;474", "wc_summary_review": "52;37;67;94", "wc_review": "332;97;285;772", "wc_reply_reviewers": "0;5;0;0", "wc_reply_authors": "771;190;185;871", "reply_reviewers": "0;1;0;0", "reply_authors": "2;1;1;2", "recommendation_avg": [ 8.0, 1.4142135623730951 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 82.25, 31.499007920885383 ], "wc_strength_and_weaknesses_avg": [ 86.5, 50.584088407324295 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 140.25, 192.87090890022787 ], "wc_summary_review_avg": [ 62.5, 21.0535032714273 ], "wc_review_avg": [ 371.5, 247.3828005339094 ], "wc_reply_reviewers_avg": [ 1.25, 2.165063509461097 ], "wc_reply_authors_avg": [ 504.25, 318.72195955095407 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.816496580927726, "corr_recommendation_correctness": -0.816496580927726, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3976157010235069531&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=LV_MeMS38Q9", "email": ";stanford.edu; ;cs.cmu.edu", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Stanford University;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.cmu.edu", "aff_unique_abbr": "Stanford;CMU", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Stanford;;Pittsburgh", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "LVujM8Yxsi", "title": "DoE2Vec: Representation Learning for Exploratory Landscape Analysis", "track": "main", "status": "Reject", "tldr": "We propose DoE2Vec, a variational autoencoder (VAE)-based methodology to learn optimization landscape characteristics for downstream meta-learning tasks.", "abstract": "We propose DoE2Vec, a variational autoencoder (VAE)-based methodology to learn optimization landscape characteristics for downstream meta-learning tasks, e.g., automated selection of optimization algorithms. Principally, using large training data sets generated with a random function generator, DoE2Vec self-learns an informative latent representation for any design of experiments (DoE).\nUnlike the classical exploratory landscape analysis (ELA) method, our approach does not require any feature engineering and is easily applicable for high dimensional search spaces. For validation, we inspect the quality of latent reconstructions and analyze the latent representations using different experiments.\nThe latent representations not only show promising potentials in identifying similar (cheap-to-evaluate) surrogate functions, but also can boost performances when being used complementary to the ELA features in classification tasks.", "keywords": "autoencoder;optimization;exploratory landscape analysis;representation learning", "primary_area": "", "supplementary_material": "/attachment/c04642e6ea055bdaf318c6b669e2ee48ddea66ba.zip", "author": "Bas Van Stein;Fu Xing Long;Moritz A. Frenzel;Peter Krause;Markus Gitterle;Thomas B\u00e4ck", "authorids": "~Bas_Van_Stein1;~Fu_Xing_Long1;~Moritz_A._Frenzel1;~Peter_Krause1;~Markus_Gitterle1;~Thomas_B\u00e4ck1", "gender": "F;M;;;M;", "homepage": "https://nikivanstein.nl;;;;;", "dblp": "169/3047;;;;;", "google_scholar": "https://scholar.google.nl/citations?user=DST6voAAAAAJ;;;https://scholar.google.de/citations?user=UPdM-SUAAAAJ;;", "orcid": "0000-0002-0013-7969;0000-0003-4550-5777;0000-0002-4025-8773;;0000-0001-8760-1682;", "linkedin": ";;;peter-krause-9349b0203/;;", "or_profile": "~Bas_Van_Stein1;~Fu_Xing_Long1;~Moritz_A._Frenzel1;~Peter_Krause1;~Markus_Gitterle1;~Thomas_B\u00e4ck1", "aff": "Leiden University, Leiden University;Leiden University, Leiden University;Leiden University, Leiden University;divis intelligent solutions GmbH;Hochschule M\u00fcnchen;", "aff_domain": "liacs.leidenuniv.nl;liacs.leidenuniv.nl;liacs.leidenuniv.nl;divis-gmbh.de;hm.edu;", "position": "Assistant Professor;PhD student;Researcher;Head of R&D;Full Professor;", "bibtex": "@misc{\nstein2023doevec,\ntitle={DoE2Vec: Representation Learning for Exploratory Landscape Analysis},\nauthor={Bas Van Stein and Fu Xing Long and Moritz A. Frenzel and Peter Krause and Markus Gitterle and Thomas B{\\\"a}ck},\nyear={2023},\nurl={https://openreview.net/forum?id=LVujM8Yxsi}\n}", "github": "", "project": "", "reviewers": "wn3f;7ZSu;jSUv;KUFH", "site": "https://openreview.net/forum?id=LVujM8Yxsi", "pdf_size": 7938737, "recommendation": "3;3;3;5", "confidence": "3;2;2;3", "correctness": "2;2;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;1;2;3", "wc_summary_paper": "69;84;51;21", "wc_strength_and_weaknesses": "300;232;71;187", "wc_clarity_quality_novelty_and_reproducibility": "81;32;32;14", "wc_summary_review": "92;61;27;38", "wc_review": "542;409;181;260", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "580;355;201;430", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 2.5, 0.5 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 56.25, 23.466731770743024 ], "wc_strength_and_weaknesses_avg": [ 197.5, 83.38015351389082 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.75, 24.923633362734254 ], "wc_summary_review_avg": [ 54.5, 24.88473427625861 ], "wc_review_avg": [ 348.0, 138.73535958795796 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 391.5, 136.59886529543354 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.8703882797784891, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16902676106072186895&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Leiden University;divis intelligent solutions;Hochschule M\u00fcnchen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.universiteitleiden.nl;;https://www.munich-university.de", "aff_unique_abbr": "LU;;HM", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Leiden;", "aff_country_unique_index": "0;0;0;1;1", "aff_country_unique": "Netherlands;Germany" }, { "id": "LVum7knUA7g", "title": "Stationary Deep Reinforcement Learning with Quantum K-spin Hamiltonian Equation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Instability is a major issue of deep reinforcement learning (DRL) algorithms --- high variance of cumulative rewards over multiple runs. The instability is mainly caused by the existence of \\textit{many local minimas} and worsened by the \\textit{multiple fixed points} issue of Bellman's optimality equation. As a fix, we propose a quantum K-spin Hamiltonian regularization term (called \\textit{H-term}) to help a policy network converge to a high-quality local minima. First, we take a quantum perspective by modeling a policy as a \\textit{K-spin Ising model} and employ a Hamiltonian equation to measure the \\textit{energy} of a policy. Then, we derive a novel Hamiltonian policy gradient theorem and design a generic actor-critic algorithm that utilizes the H-term to regularize the policy network. Finally, the proposed method significantly reduces the variance of cumulative rewards by $65.2\\% \\sim 85.6\\%$ on six MuJoCo tasks; achieves an approximation ratio $\\leq 1.05$ over $90\\%$ test cases and reduces its variance by $60.16\\% \\sim 94.52\\%$ on two combinatorial optimization tasks and two non-convex optimization tasks, compared with those of existing algorithms over $20$ runs, respectively.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/b87b1eba7e1f765f6ccaec8e4b00f3f029b609b6.zip", "author": "Xiao-Yang Liu;Zechu Li;Shixun Wu;Xiaodong Wang", "authorids": "~Xiao-Yang_Liu1;~Zechu_Li1;~Shixun_Wu1;~Xiaodong_Wang1", "gender": "M;M;M;", "homepage": "http://www.tensorlet.org/publications/;;http://rum.tifa123.tk;http://ee.columbia.edu/~wangx", "dblp": "125/9849;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Xiao-Yang_Liu1;~Zechu_Li1;~Shixun_Wu1;~Xiaodong_Wang1", "aff": "Columbia University;Technische Universit\u00e4t Darmstadt;;Columbia University", "aff_domain": "columbia.edu;tu-darmstadt.de;;ee.columbia.edu", "position": "PhD student;MS student;;Full Professor", "bibtex": "@misc{\nliu2023stationary,\ntitle={Stationary Deep Reinforcement Learning with Quantum K-spin Hamiltonian Equation},\nauthor={Xiao-Yang Liu and Zechu Li and Shixun Wu and Xiaodong Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=LVum7knUA7g}\n}", "github": "", "project": "", "reviewers": "947x;arYu;6uJh", "site": "https://openreview.net/forum?id=LVum7knUA7g", "pdf_size": 1260092, "recommendation": "3;3;5", "confidence": "3;4;3", "correctness": "2;2;2", "technical_novelty": "2;1;2", "empirical_novelty": "3;2;3", "wc_summary_paper": "75;19;43", "wc_strength_and_weaknesses": "233;233;260", "wc_clarity_quality_novelty_and_reproducibility": "57;20;117", "wc_summary_review": "61;45;68", "wc_review": "426;317;488", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1606;1016;1627", "reply_reviewers": "0;0;0", "reply_authors": "3;2;3", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.0, 0.0 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 45.666666666666664, 22.939534045447004 ], "wc_strength_and_weaknesses_avg": [ 242.0, 12.727922061357855 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.66666666666667, 39.96943276499625 ], "wc_summary_review_avg": [ 58.0, 9.626352718795768 ], "wc_review_avg": [ 410.3333333333333, 70.68396014812853 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1416.3333333333333, 283.2082076651185 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-0WBpsAqpIQJ:scholar.google.com/&scioq=Stationary+Deep+Reinforcement+Learning+with+Quantum+K-spin+Hamiltonian+Equation&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0;1;0", "aff_unique_norm": "Columbia University;Technische Universit\u00e4t Darmstadt", "aff_unique_dep": ";", "aff_unique_url": "https://www.columbia.edu;https://www.tu-darmstadt.de", "aff_unique_abbr": "Columbia;TUD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Germany" }, { "id": "Lb8ZnWW_In6", "title": "LEARNING DYNAMIC ABSTRACT REPRESENTATIONS FOR SAMPLE-EFFICIENT REINFORCEMENT LEARNING", "track": "main", "status": "Reject", "tldr": "", "abstract": "In many real-world problems, the learning agent needs to learn a problem\u2019s abstractions and solution simultaneously. However, most such abstractions need to be designed and refined by hand for different problems and domains of application. This paper presents a novel top-down approach for constructing state abstractions while carrying out reinforcement learning. Starting with state variables and a simulator, it presents a novel domain-independent approach for dynamically computing an abstraction based on the dispersion of Q-values in abstract states as the agent continues acting and learning. Extensive empirical evaluation on multiple domains and problems shows that this approach automatically learns abstractions that are finely-tuned to the problem, yield powerful sample efficiency, and result in the RL agent significantly outperforming existing approaches.", "keywords": "Sequential Decision-Making;Reinforcement Learning;Learning Abstract Representations", "primary_area": "", "supplementary_material": "/attachment/20175b90c5a68cc7664772673325a896a4e6328e.zip", "author": "Mehdi Dadvar;Rashmeet Kaur Nayyar;Siddharth Srivastava", "authorids": "~Mehdi_Dadvar1;~Rashmeet_Kaur_Nayyar1;~Siddharth_Srivastava2", "gender": "M;F;", "homepage": "https://www.mdadvar.net/;https://www.rashmeetnayyar.com/;", "dblp": ";317/0477;", "google_scholar": "-vWmCLwAAAAJ;N2ZUg4YAAAAJ;", "orcid": ";0000-0002-6790-4448;", "linkedin": ";https://www.linkedin.com/authwall?trk=gf&trkInfo=AQHyzhJFPKin4AAAAX_136TIGCeBBQjeIxu0m21YrGI_m4qDo5H-2SEK5jXDtqsRuQFQ1LpfLGnpgGky3y6TKZ5ZhW5pzo9JlRUq5ZtRRTOPeGcnxN-LIMFyG_Jjhbxle_bVmyA=&originalReferer=https://www.rashmeetnayyar.com/&sessionRedirect=https%3A%2F%2Fwww.linkedin.com%2Fin%2Frashmeetnayyar%2F;", "or_profile": "~Mehdi_Dadvar1;~Rashmeet_Kaur_Nayyar1;~Siddharth_Srivastava2", "aff": "Arizona State University;Arizona State University;", "aff_domain": "asu.edu;asu.edu;", "position": "PhD student;PhD student;", "bibtex": "@misc{\ndadvar2023learning,\ntitle={{LEARNING} {DYNAMIC} {ABSTRACT} {REPRESENTATIONS} {FOR} {SAMPLE}-{EFFICIENT} {REINFORCEMENT} {LEARNING}},\nauthor={Mehdi Dadvar and Rashmeet Kaur Nayyar and Siddharth Srivastava},\nyear={2023},\nurl={https://openreview.net/forum?id=Lb8ZnWW_In6}\n}", "github": "", "project": "", "reviewers": "6Ztx;3y7Q;4Xkd", "site": "https://openreview.net/forum?id=Lb8ZnWW_In6", "pdf_size": 2025041, "recommendation": "3;5;5", "confidence": "4;4;3", "correctness": "2;2;2", "technical_novelty": "2;2;3", "empirical_novelty": "1;2;0", "wc_summary_paper": "64;43;28", "wc_strength_and_weaknesses": "180;56;758", "wc_clarity_quality_novelty_and_reproducibility": "266;331;24", "wc_summary_review": "58;47;26", "wc_review": "568;477;836", "wc_reply_reviewers": "0;141;595", "wc_reply_authors": "765;693;1551", "reply_reviewers": "0;1;2", "reply_authors": "1;2;3", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 45.0, 14.7648230602334 ], "wc_strength_and_weaknesses_avg": [ 331.3333333333333, 305.9164737128239 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 207.0, 132.0934012987275 ], "wc_summary_review_avg": [ 43.666666666666664, 13.27487183449325 ], "wc_review_avg": [ 627.0, 152.3832886725663 ], "wc_reply_reviewers_avg": [ 245.33333333333334, 253.86391805234726 ], "wc_reply_authors_avg": [ 1003.0, 388.6077714096824 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2854708534140286081&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Arizona State University", "aff_unique_dep": "", "aff_unique_url": "https://www.asu.edu", "aff_unique_abbr": "ASU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "LcQ3aRCEuKK", "title": "Holographic-(V)AE: an end-to-end SO(3)-Equivariant (Variational) Autoencoder in Fourier Space", "track": "main", "status": "Reject", "tldr": "", "abstract": "Group-equivariant neural networks have emerged as a data-efficient approach to solve classification and regression tasks, while respecting the relevant symmetries of the data. However, little work has been done to extend this paradigm to the unsupervised and generative domains. Here, we present Holographic-(V)AE (H-(V)AE), a fully end-to-end SO(3)-equivariant (variational) autoencoder in Fourier space, suitable for unsupervised learning and generation of data distributed around a specified origin. H-(V)AE is trained to reconstruct the spherical Fourier encoding of data, learning in the process a latent space with a maximally informative invariant embedding alongside an equivariant frame describing the orientation of the data. We extensively test the performance of H-(V)AE on diverse datasets and show that its latent space efficiently encodes the categorical features of spherical images and structural features of protein atomic environments. Our work can further be seen as a case study for equivariant modeling of a data distribution by reconstructing its Fourier encoding.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/1af4e050beb14b104b41a55c5c903e63a7f397b7.zip", "author": "Gian Marco Visani;Michael Neal Pun;Armita Nourmohammad", "authorids": "~Gian_Marco_Visani1;~Michael_Neal_Pun1;~Armita_Nourmohammad1", "gender": ";M;", "homepage": ";https://sites.google.com/view/michaelpun/home;https://sites.google.com/uw.edu/statphysevol/welcome", "dblp": ";;", "google_scholar": "Tn0o66IAAAAJ;;", "orcid": "0000-0003-0888-0922;;", "linkedin": "gianmarcovisani/;;", "or_profile": "~Gian_Marco_Visani1;~Michael_Neal_Pun1;~Armita_Nourmohammad1", "aff": "Department of Computer Science and Engineering, University of Washington;University of Washington;University of Washington, Seattle", "aff_domain": "cs.washington.edu;u.washington.edu;uw.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nvisani2023holographicvae,\ntitle={Holographic-(V){AE}: an end-to-end {SO}(3)-Equivariant (Variational) Autoencoder in Fourier Space},\nauthor={Gian Marco Visani and Michael Neal Pun and Armita Nourmohammad},\nyear={2023},\nurl={https://openreview.net/forum?id=LcQ3aRCEuKK}\n}", "github": "", "project": "", "reviewers": "Wgbf;FEZB;nuL1", "site": "https://openreview.net/forum?id=LcQ3aRCEuKK", "pdf_size": 8562025, "recommendation": "3;5;5", "confidence": "2;3;3", "correctness": "3;3;2", "technical_novelty": "2;3;2", "empirical_novelty": "1;2;2", "wc_summary_paper": "36;177;77", "wc_strength_and_weaknesses": "125;703;330", "wc_clarity_quality_novelty_and_reproducibility": "226;142;21", "wc_summary_review": "52;31;16", "wc_review": "439;1053;444", "wc_reply_reviewers": "0;123;48", "wc_reply_authors": "790;787;818", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 96.66666666666667, 59.21899094791205 ], "wc_strength_and_weaknesses_avg": [ 386.0, 239.26693600802153 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 129.66666666666666, 84.14405636895705 ], "wc_summary_review_avg": [ 33.0, 14.7648230602334 ], "wc_review_avg": [ 645.3333333333334, 288.27109154790776 ], "wc_reply_reviewers_avg": [ 57.0, 50.61620293937506 ], "wc_reply_authors_avg": [ 798.3333333333334, 13.960261060914616 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.9999999999999998, "corr_recommendation_correctness": -0.49999999999999983, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3593758938444069107&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 10, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "Department of Computer Science and Engineering", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "LdQUvGLk7yU", "title": "Tree Structure LSTM for Chinese Named Entity Recognition", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In this paper, we remodel the bi-directional LSTM (Bi-LSTM) network for Chinese Named Entity Recognition (NER). We convert LSTM from chain-like structure into tree structure which is fixed to the dependency parsing tree of the sentence. The new structure model can fully leverage the syntax information of the sentence and has the capability of capturing long-range dependencies. In addition, we use dependency parsing label embedding to improve the performance of our approach. Experimental studies on four benchmarking Chinese NER datasets have verified the effectiveness of our approach.", "keywords": "LSTM;NER;dependency parsing;tree structure;Chinese", "primary_area": "", "supplementary_material": "/attachment/bd1fd51d9f0f7c34083bef5c768b4e04a20da891.zip", "author": "YUAN GAO;Hui Wang;Hongcheng Li", "authorids": "~YUAN_GAO17;~Hui_Wang14;~Hongcheng_Li2", "gender": "M;M;", "homepage": "https://yhoon.github.io/;https://github.com/bingren84;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~YUAN_GAO17;~Hui_Wang14;~Hongcheng_Li2", "aff": ";Tsinghua University;", "aff_domain": ";tsinghua.edu.cn;", "position": ";PhD student;", "bibtex": "@misc{\ngao2023tree,\ntitle={Tree Structure {LSTM} for Chinese Named Entity Recognition},\nauthor={YUAN GAO and Hui Wang and Hongcheng Li},\nyear={2023},\nurl={https://openreview.net/forum?id=LdQUvGLk7yU}\n}", "github": "", "project": "", "reviewers": "tCYn;kEEc;4btm;nBaH", "site": "https://openreview.net/forum?id=LdQUvGLk7yU", "pdf_size": 515648, "recommendation": "1;1;3;3", "confidence": "5;4;4;5", "correctness": "3;2;3;3", "technical_novelty": "1;3;2;2", "empirical_novelty": "1;1;2;3", "wc_summary_paper": "48;51;39;16", "wc_strength_and_weaknesses": "230;109;93;54", "wc_clarity_quality_novelty_and_reproducibility": "5;31;205;26", "wc_summary_review": "26;81;17;10", "wc_review": "309;272;354;106", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "15;0;15;15", "reply_reviewers": "0;0;0;0", "reply_authors": "1;0;1;1", "recommendation_avg": [ 2.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 38.5, 13.720422734012244 ], "wc_strength_and_weaknesses_avg": [ 121.5, 65.75902979819577 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.75, 80.41260784230293 ], "wc_summary_review_avg": [ 33.5, 28.00446392988089 ], "wc_review_avg": [ 260.25, 93.67063307141679 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 11.25, 6.49519052838329 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.75, 0.4330127018922193 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_Q98IIMVBOwJ:scholar.google.com/&scioq=Tree+Structure+LSTM+for+Chinese+Named+Entity+Recognition&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "LdUByi1hN3", "title": "Label-Free Synthetic Pretraining of Object Detectors", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We propose a new approach, Synthetic Optimized Layout with Instance Detection (SOLID), to pretrain object detectors with synthetic images. Our ``SOLID'' approach consists of two main components: (1) generating synthetic images using a collection of unlabelled 3D models with optimized scene arrangement; (2) pretraining an object detector on \"instance detection\" task - given a query image depicting an object, detecting all instances of the exact same object in a target image. Our approach does not need any semantic labels for pretraining and allows the use of arbitrary, diverse 3D models. Experiments on COCO show that with optimized data generation and a proper pretraining task, synthetic data can be highly effective data for pretraining object detectors. In particular, pretraining on rendered images achieves performance competitive with pretraining on real images while using significantly less computing resources.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/0e276125ba1d575f043655cf94fe392df20c38ed.zip", "author": "Hei Law;Jia Deng", "authorids": "~Hei_Law2;~Jia_Deng1", "gender": ";M", "homepage": ";", "dblp": "151/6539;07/6526-1.html", "google_scholar": ";U3Eub-EAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Hei_Law2;~Jia_Deng1", "aff": ";Princeton University", "aff_domain": ";princeton.edu", "position": ";Assistant Professor", "bibtex": "@misc{\nlaw2023labelfree,\ntitle={Label-Free Synthetic Pretraining of Object Detectors},\nauthor={Hei Law and Jia Deng},\nyear={2023},\nurl={https://openreview.net/forum?id=LdUByi1hN3}\n}", "github": "", "project": "", "reviewers": "aPyn;GMZg;WGhk;Tb34", "site": "https://openreview.net/forum?id=LdUByi1hN3", "pdf_size": 7940538, "recommendation": "3;3;5;5", "confidence": "5;4;3;4", "correctness": "2;2;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;0", "wc_summary_paper": "95;64;49;71", "wc_strength_and_weaknesses": "308;325;93;153", "wc_clarity_quality_novelty_and_reproducibility": "62;249;15;27", "wc_summary_review": "41;28;33;37", "wc_review": "506;666;190;288", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 69.75, 16.60383991732033 ], "wc_strength_and_weaknesses_avg": [ 219.75, 99.2304766692169 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 88.25, 94.40173462389343 ], "wc_summary_review_avg": [ 34.75, 4.815340071064556 ], "wc_review_avg": [ 412.5, 185.74915881370768 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.7071067811865475, "corr_recommendation_correctness": 1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7858675554503844837&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "LeZ39Gkwbi0", "title": "ProtoGNN: Prototype-Assisted Message Passing Framework for Non-Homophilous Graphs", "track": "main", "status": "Reject", "tldr": "Class prototype-assisted message passing framework for improving node representation learning on non-homophilous graphs", "abstract": "Many well-known Graph Neural Network (GNN) models assume the underlying graphs are homophilous, where nodes share similar features and labels with their neighbours. They rely on message passing that iteratively aggregates neighbour's features and often suffer performance degradation on non-homophilous graphs where useful information is hardly available in the local neighbourhood. In addition, earlier studies show that in some cases GNNs are even outperformed by Multi-Layer Perceptron, indicating insufficient exploitation of node feature information. Motivated by the two limitations, we propose ProtoGNN, a novel message passing framework that augments existing GNNs by effectively combining node features with structural information. ProtoGNN learns multiple class prototypes for each class from raw node features with the slot-attention mechanism. These prototype representations are then transferred onto the structural node features with explicit message passing to all non-training nodes irrespective of distance. This form of message passing, from training nodes to class prototypes to non-training nodes, also serves as a shortcut that bypasses local graph neighbourhoods and captures global information. ProtoGNN is a generic framework which can be applied onto any of the existing GNN backbones to improve node representations when node features are strong and local graph information is scarce. We demonstrate through extensive experiments that ProtoGNN brings performance improvement to various GNN backbones and achieves state-of-the-art on several non-homophilous datasets.", "keywords": "Graph Neural Networks;Graph representation learning;Non-homophilous Graph;Heterophily;Non-homophily;Node Classification", "primary_area": "", "supplementary_material": "", "author": "Yanfei Dong;Mohammed Haroon Dupty;Lambert Deng;Yong Liang Goh;Wee Sun Lee", "authorids": "~Yanfei_Dong1;~Mohammed_Haroon_Dupty1;~Lambert_Deng1;~Yong_Liang_Goh1;~Wee_Sun_Lee1", "gender": "F;;M;M;M", "homepage": ";https://dmharoon.github.io;;;http://www.comp.nus.edu.sg/~leews/", "dblp": ";186/7914;;276/5015;86/1498", "google_scholar": ";https://scholar.google.com/citations?hl=en;;qbOcjgIAAAAJ;https://scholar.google.com.sg/citations?user=8PCrLgwAAAAJ", "orcid": ";;;;", "linkedin": ";;deng-yuan-lambert-1411014b;;", "or_profile": "~Yanfei_Dong1;~Mohammed_Haroon_Dupty1;~Lambert_Deng1;~Yong_Liang_Goh1;~Wee_Sun_Lee1", "aff": "National University of Singapore;National University of Singapore;Tsinghua University;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;nus.edu;tsinghua.edu.cn;u.nus.edu;nus.edu.sg", "position": "PhD student;PhD student;Intern;PhD student;Full Professor", "bibtex": "@misc{\ndong2023protognn,\ntitle={Proto{GNN}: Prototype-Assisted Message Passing Framework for Non-Homophilous Graphs},\nauthor={Yanfei Dong and Mohammed Haroon Dupty and Lambert Deng and Yong Liang Goh and Wee Sun Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=LeZ39Gkwbi0}\n}", "github": "", "project": "", "reviewers": "A8KJ;mEwF;v2yj;Fqiu", "site": "https://openreview.net/forum?id=LeZ39Gkwbi0", "pdf_size": 1785409, "recommendation": "5;5;5;6", "confidence": "4;2;4;3", "correctness": "3;4;2;3", "technical_novelty": "2;1;3;2", "empirical_novelty": "2;0;2;2", "wc_summary_paper": "65;36;47;74", "wc_strength_and_weaknesses": "89;62;270;271", "wc_clarity_quality_novelty_and_reproducibility": "48;14;14;35", "wc_summary_review": "46;29;25;43", "wc_review": "248;141;356;423", "wc_reply_reviewers": "0;0;0;25", "wc_reply_authors": "642;570;1310;966", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;3;3", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 55.5, 14.874474780643517 ], "wc_strength_and_weaknesses_avg": [ 173.0, 97.96683112155868 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.75, 14.49784466739798 ], "wc_summary_review_avg": [ 35.75, 8.926785535678562 ], "wc_review_avg": [ 292.0, 107.23105893350116 ], "wc_reply_reviewers_avg": [ 6.25, 10.825317547305483 ], "wc_reply_authors_avg": [ 872.0, 293.5915530120034 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.17407765595569782, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13929752839052679723&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "National University of Singapore;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.tsinghua.edu.cn", "aff_unique_abbr": "NUS;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Singapore;China" }, { "id": "LemVOgJ4yP", "title": "Learning to Cooperate and Communicate Over Imperfect Channels", "track": "main", "status": "Reject", "tldr": "We investigate communication in multi-agent reinforcement learning and propose an adaptive message size selection that enables agents to use an imperfect communication channel more efficiently.", "abstract": "Information exchange in multi-agent systems improves the cooperation among agents, especially in partially observable settings. This can be seen as part of the problem in which the agents learn how to communicate and to solve a shared task simultaneously. In the real world, communication is often carried out over imperfect channels and this requires the agents to deal with uncertainty due to potential information loss. In this paper, we consider a cooperative multi-agent system where the agents act and exchange information in a decentralized manner using a limited and unreliable channel. To cope with such channel constraints, we propose a novel communication approach based on independent Q-learning. Our method allows agents to dynamically adapt how much information to share by sending messages of different size, depending on their local observations and the channel properties. In addition to this message size selection, agents learn to encode and decode messages to improve their policies. We show that our approach outperforms approaches without adaptive capabilities and discuss its limitations in different environments.", "keywords": "multi-agent systems;deep reinforcement learning;emergent communication;imperfect communication channels", "primary_area": "", "supplementary_material": "/attachment/7f7aee00888eb7527e72eb704553a68753a6287b.zip", "author": "Jannis Weil;Gizem Ekinci;Heinz Koeppl;Tobias Meuser", "authorids": "~Jannis_Weil1;~Gizem_Ekinci1;~Heinz_Koeppl1;~Tobias_Meuser1", "gender": "M;F;M;M", "homepage": "https://www.kom.tu-darmstadt.de/kom-multimedia-communications-lab/people/staff/jannis-weil/;https://www.bcs.tu-darmstadt.de/team_sos/ekincigizem_sos.en.jsp;;https://www.kom.tu-darmstadt.de/kom-multimedia-communications-lab/people/staff/tobias-meuser", "dblp": ";;41/6084;192/0792", "google_scholar": "https://scholar.google.de/citations?user=owmjp8sAAAAJ;T-wSNhQAAAAJ;https://scholar.google.de/citations?user=WaPW80kAAAAJ;https://scholar.google.de/citations?user=fmYxg1oAAAAJ", "orcid": ";0000-0002-3526-9697;;", "linkedin": ";https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=&cad=rja&uact=8&ved=2ahUKEwjz0rzWqNX3AhUaPuwKHay2A3wQFnoECAYQAQ&url=https%3A%2F%2Fde.linkedin.com%2Fin%2Fgizemekinci&usg=AOvVaw0382-RkWD3msNz0vdd_gA1;;", "or_profile": "~Jannis_Weil1;~Gizem_Ekinci1;~Heinz_Koeppl1;~Tobias_Meuser1", "aff": "Technische Universit\u00e4t Darmstadt;Technische Universit\u00e4t Darmstadt;TU Darmstadt;Technische Universit\u00e4t Darmstadt", "aff_domain": "tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de", "position": "PhD student;PhD student;Full Professor;Postdoc", "bibtex": "@misc{\nweil2023learning,\ntitle={Learning to Cooperate and Communicate Over Imperfect Channels},\nauthor={Jannis Weil and Gizem Ekinci and Heinz Koeppl and Tobias Meuser},\nyear={2023},\nurl={https://openreview.net/forum?id=LemVOgJ4yP}\n}", "github": "", "project": "", "reviewers": "rgRs;JWRp;hSti", "site": "https://openreview.net/forum?id=LemVOgJ4yP", "pdf_size": 1221220, "recommendation": "3;5;5", "confidence": "4;3;3", "correctness": "3;3;3", "technical_novelty": "2;3;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "70;90;54", "wc_strength_and_weaknesses": "598;95;169", "wc_clarity_quality_novelty_and_reproducibility": "41;9;30", "wc_summary_review": "41;55;46", "wc_review": "750;249;299", "wc_reply_reviewers": "30;0;0", "wc_reply_authors": "306;44;165", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 71.33333333333333, 14.72714802291635 ], "wc_strength_and_weaknesses_avg": [ 287.3333333333333, 221.74209243072357 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.666666666666668, 13.27487183449325 ], "wc_summary_review_avg": [ 47.333333333333336, 5.792715732327589 ], "wc_review_avg": [ 432.6666666666667, 225.31508801873184 ], "wc_reply_reviewers_avg": [ 10.0, 14.142135623730951 ], "wc_reply_authors_avg": [ 171.66666666666666, 107.06488167876938 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.9999999999999998, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:x8xJQr7ZHJwJ:scholar.google.com/&scioq=Learning+to+Cooperate+and+Communicate+Over+Imperfect+Channels&hl=en&as_sdt=0,10", "gs_version_total": 3, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt", "aff_unique_dep": "", "aff_unique_url": "https://www.tu-darmstadt.de", "aff_unique_abbr": "TUD", "aff_campus_unique_index": "1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Adversarial Training of Self-supervised Monocular Depth Estimation against Physical-World Attacks", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11551", "id": "LfdEuhjR5GV", "poster": "/media/PosterPDFs/ICLR%202023/11551.png?t=1681802147.985461", "openreview": "https://openreview.net/forum?id=LfdEuhjR5GV", "slides": "https://iclr.cc/virtual/2023/poster/11551", "video": "https://iclr.cc/virtual/2023/poster/11551", "author_site": "Zhiyuan Cheng, James Liang, Guanhong Tao, Dongfang Liu, Xiangyu Zhang", "tldr": "Use self-supervised adversarial training to harden monocular depth estimation models against physical-world adversarial attacks.", "abstract": "Monocular Depth Estimation (MDE) is a critical component in applications such as autonomous driving. There are various attacks against MDE networks. These attacks, especially the physical ones, pose a great threat to the security of such systems. Traditional adversarial training method requires ground-truth labels and hence cannot be directly applied to self-supervised MDE that does not have depth ground truth. Some self-supervised model hardening technique (e.g., contrastive learning) ignores the domain knowledge of MDE and can hardly achieve optimal performance. In this work, we propose a novel adversarial training method for self-supervised MDE models based on view synthesis without using the depth ground truth. We improve adversarial robustness against physical-world attacks using $L_0$-norm-bounded perturbation in training. We compare our method with supervised learning-based and contrastive learning-based methods that are tailored for MDE. Results on two representative MDE networks show that we achieve better robustness against various adversarial attacks with nearly no benign performance degradation.", "keywords": "Adversarial Training;Monocular Depth Estimation;Adversarial Attack;Self-supervised Learning.", "primary_area": "", "supplementary_material": "/attachment/8a8b0a0b1d26ff1f3e8db33b4688f49b73cb7124.zip", "author": "Zhiyuan Cheng;James Chenhao Liang;Guanhong Tao;Dongfang Liu;Xiangyu Zhang", "authorids": "~Zhiyuan_Cheng2;~James_Chenhao_Liang1;~Guanhong_Tao1;~Dongfang_Liu1;~Xiangyu_Zhang3", "gender": "M;M;;;M", "homepage": "https://bob-cheng.github.io;https://jamesliang819.github.io/;;https://www.rit.edu/directory/dxleec-dongfang-liu;https://www.cs.purdue.edu/homes/xyzhang", "dblp": "324/1963;323/3403;;;", "google_scholar": "dVchB-gAAAAJ;cR8m4CcAAAAJ;;uICY0vEAAAAJ;PXbu1wIAAAAJ", "orcid": "0000-0001-7280-6079;;;;", "linkedin": "bobchengzy/;;;;", "or_profile": "~Zhiyuan_Cheng2;~James_Chenhao_Liang1;~Guanhong_Tao1;~Dongfang_Liu1;~Xiangyu_Zhang3", "aff": "Purdue University;Rochester Institute of Technology;;Rochester Institute of Technology;Purdue University", "aff_domain": "purdue.edu;rit.edu;;rit.edu;cs.purdue.edu", "position": "PhD student;PhD student;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ncheng2023adversarial,\ntitle={Adversarial Training of Self-supervised Monocular Depth Estimation against Physical-World Attacks},\nauthor={Zhiyuan Cheng and James Chenhao Liang and Guanhong Tao and Dongfang Liu and Xiangyu Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=LfdEuhjR5GV}\n}", "github": "", "project": "", "reviewers": "9eWp;jvhn;fV94;QLeq", "pdf_size": 9364769, "recommendation": "6;6;8;8", "confidence": "4;2;4;4", "correctness": "3;3;4;3", "technical_novelty": "2;2;4;3", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "58;83;177;76", "wc_strength_and_weaknesses": "207;17;81;244", "wc_clarity_quality_novelty_and_reproducibility": "41;14;99;50", "wc_summary_review": "20;159;39;57", "wc_review": "326;273;396;427", "wc_reply_reviewers": "0;0;21;130", "wc_reply_authors": "1308;583;178;2487", "reply_reviewers": "0;0;1;2", "reply_authors": "2;2;1;8", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 98.5, 46.230401252855245 ], "wc_strength_and_weaknesses_avg": [ 137.25, 92.03905421069906 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.0, 30.71644510681534 ], "wc_summary_review_avg": [ 68.75, 53.72324915713866 ], "wc_review_avg": [ 355.5, 60.06038627914409 ], "wc_reply_reviewers_avg": [ 37.75, 53.94615370904584 ], "wc_reply_authors_avg": [ 1139.0, 877.2573738647056 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 2.7726341266023544 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16591657468404586826&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=LfdEuhjR5GV", "email": "purdue.edu;rit.edu;;rit.edu;cs.purdue.edu", "author_num": 5, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Purdue University;Rochester Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.purdue.edu;https://www.rit.edu", "aff_unique_abbr": "Purdue;RIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Lgp4Y2Tor34", "title": "Smooth image-to-image translations with latent space interpolations", "track": "main", "status": "Withdraw", "tldr": "We are regularizing the latent spaces to have super smooth image-to-image translations. We also created a metric to quantitatively measure how smooth the translations.", "abstract": "Multi-domain image-to-image (I2I) translations can transform a source image according to the style of a target domain. One important, desired characteristic of these transformations, is their graduality, which corresponds to a smooth change between the source and the target image when their respective latent-space representations are linearly interpolated. However, state-of-the-art methods usually perform poorly when evaluated using inter-domain interpolations, often producing abrupt changes in the appearance or non-realistic intermediate images. In this paper, we argue that one of the main reasons behind this problem is the lack of sufficient inter-domain training data and we propose two different regularization methods to alleviate this issue: a new shrinkage loss, which compacts the latent space, and a Mixup data-augmentation strategy, which flattens the style representations between domains. We also propose a new metric to quantitatively evaluate the degree of the interpolation smoothness, an aspect which is not sufficiently covered by the existing I2I translation metrics. Using both our proposed metric and standard evaluation protocols, we show that our regularization techniques can improve the state-of-the-art multi-domain I2I translations by a large margin. Our code will be made publicly available upon the acceptance of this article.", "keywords": "image-to-image translation;GANs;mixup;latent spaces interpolation", "primary_area": "", "supplementary_material": "/attachment/8dc930ed45eecb7bdb377c3c46c4823100de37c2.zip", "author": "Yahui Liu;Enver Sangineto;Yajing Chen;Linchao Bao;Haoxian Zhang;Nicu Sebe;Bruno Lepri;Marco De Nadai", "authorids": "~Yahui_Liu1;~Enver_Sangineto1;~Yajing_Chen1;~Linchao_Bao1;~Haoxian_Zhang1;~Nicu_Sebe1;~Bruno_Lepri1;~Marco_De_Nadai1", "gender": "M;;;M;;M;M;M", "homepage": "https://yhlleo.github.io/;;;http://linchaobao.github.io/;;http://disi.unitn.it/~sebe/;;http://www.marcodena.it", "dblp": ";http://dblp.uni-trier.de/pers/hd/s/Sangineto:Enver;140/2315;126/1018;;20/3519;99/6489;177/8843", "google_scholar": "P8qd0rEAAAAJ;https://scholar.google.it/citations?user=eJZlvlAAAAAJ;;xQZMbkUAAAAJ;https://scholar.google.com.hk/citations?user=r_aOHlQAAAAJ;https://scholar.google.it/citations?user=stFCYOAAAAAJ;JfcopG0AAAAJ;FJ-QYSYAAAAJ", "orcid": ";;;0000-0001-9543-3754;0000-0001-7078-868X;0000-0002-6597-7248;0000-0003-1275-2333;0000-0001-8466-3933", "linkedin": ";;;;;;brunolepri/?originalSubdomain=it;", "or_profile": "~Yahui_Liu1;~Enver_Sangineto1;~Yajing_Chen1;~Linchao_Bao1;~Haoxian_Zhang1;~Nicu_Sebe1;~Bruno_Lepri1;~Marco_De_Nadai1", "aff": "Huawei Technologies Ltd.;University of Trento;;Tencent AI Lab;Tencent AI Lab;University of Trento;Fondazione Bruno Kessler;Spotify", "aff_domain": "huawei.com;unitn.it;;tencent.com;tencent.com;unitn.it;fbk.eu;spotify.com", "position": "Principal Researcher;Postdoc;;Researcher;Researcher;Full Professor;Principal Researcher;Researcher", "bibtex": "@misc{\nliu2023smooth,\ntitle={Smooth image-to-image translations with latent space interpolations},\nauthor={Yahui Liu and Enver Sangineto and Yajing Chen and Linchao Bao and Haoxian Zhang and Nicu Sebe and Bruno Lepri and Marco De Nadai},\nyear={2023},\nurl={https://openreview.net/forum?id=Lgp4Y2Tor34}\n}", "github": "", "project": "", "reviewers": "CkwW;9JZd;EDpm;MbDj", "site": "https://openreview.net/forum?id=Lgp4Y2Tor34", "pdf_size": 36849548, "recommendation": "3;3;5;6", "confidence": "4;4;4;2", "correctness": "3;3;3;3", "technical_novelty": "2;2;2;4", "empirical_novelty": "2;2;2;0", "wc_summary_paper": "40;17;54;44", "wc_strength_and_weaknesses": "195;138;390;134", "wc_clarity_quality_novelty_and_reproducibility": "22;15;12;38", "wc_summary_review": "16;15;22;19", "wc_review": "273;185;478;235", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 38.75, 13.5531361684298 ], "wc_strength_and_weaknesses_avg": [ 214.25, 104.29854984610284 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 21.75, 10.059199769365355 ], "wc_summary_review_avg": [ 18.0, 2.7386127875258306 ], "wc_review_avg": [ 292.75, 111.41448514443712 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.7777777777777777, "corr_recommendation_correctness": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3683562161720813216&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;2;1;3;4", "aff_unique_norm": "Huawei;University of Trento;Tencent;Fondazione Bruno Kessler;Spotify", "aff_unique_dep": "Huawei Technologies;;Tencent AI Lab;;", "aff_unique_url": "https://www.huawei.com;https://www.unitn.it;https://ai.tencent.com;https://www.fbk.eu;https://www.spotify.com", "aff_unique_abbr": "Huawei;UniTN;Tencent AI Lab;FBK;Spotify", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1;1;2", "aff_country_unique": "China;Italy;Sweden" }, { "id": "LiWGbK8_iOB", "title": "G-Censor: Graph Contrastive Learning with Task-Oriented Counterfactual Views", "track": "main", "status": "Reject", "tldr": "Graph Contrastive learning with task-oriented counterfactual positive/negative views, a model-agnostic framework designed for node property prediction tasks.", "abstract": "Graph Contrastive learning (GCL) has achieved great success in learning representations from unlabeled graph-structure data. However, how to automatically obtain the optimal contrastive views w.r.t specific downstream tasks is little studied. Theoretically, a downstream task can be causally correlated to particular sub-structures in graphs. The existing GCL methods may fail to enhance model performance on a given task when the task-related semantics are incomplete/preserved in the positive/negative views. To address this problem, we propose G-CENSOR, i.e., Graph Contrastive lEarniNg with taSk-oriented cOunteRfactual views, a model-agnostic framework designed for node property prediction tasks. G-CENSOR can simultaneously generate the optimal task-oriented counterfactual positive/negative views for raw ego-graphs and train graph neural networks (GNNs) with a contrastive objective between the raw ego-graphs and their corresponding counterfac-tual views. Extensive experiments on eight real-world datasets demonstrate that G-CENSOR can consistently outperform existing state-of-the-art GCL methods to improve the task performance and generalizability of a series of typical GNNs. To the best of our knowledge, this is a pioneer investigation to explore task-oriented graph contrastive learning from a counterfactual perspective in node property pre- diction tasks. We will release the source code after the review process.", "keywords": "graph contrastive learning;node property prediction;task-oriented counterfactual views", "primary_area": "", "supplementary_material": "", "author": "tianqianjin lin;Yangyang Kang;Zhuoren Jiang;Xurui Li;Changlong Sun;cui huang;Xiaozhong Liu", "authorids": "~tianqianjin_lin1;~Yangyang_Kang1;~Zhuoren_Jiang2;~Xurui_Li1;~Changlong_Sun2;huangcui@zju.edu.cn;~Xiaozhong_Liu2", "gender": "M;M;M;;M;;M", "homepage": ";;https://person.zju.edu.cn/en/0020027;;;;https://www.wpi.edu/people/faculty/xliu14", "dblp": ";162/0109;132/5378.html;;https://dblp.uni-trier.de/pers/hd/s/Sun:Changlong;;11/6389.html", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;GNkq4n8AAAAJ;;https://scholar.google.com/citations?;;1BUByMcAAAAJ", "orcid": ";;0000-0001-8562-8347;;;;", "linkedin": "tianqianjin-lin-2a2900223/;;;;;;", "or_profile": "~tianqianjin_lin1;~Yangyang_Kang1;~Zhuoren_Jiang2;~Xurui_Li1;~Changlong_Sun2;huangcui@zju.edu.cn;~Xiaozhong_Liu2", "aff": "Alibaba Group;Alibaba Group;Zhejiang University;;Alibaba Group;;Worcester Polytechnic Institute", "aff_domain": "alibaba-inc.com;alibaba.com;zju.edu.cn;;alibaba-inc.com;;wpi.edu", "position": "Intern;Staff Algorithm Engineer;Assistant Professor;;Researcher;;Associate Professor", "bibtex": "@misc{\nlin2023gcensor,\ntitle={G-Censor: Graph Contrastive Learning with Task-Oriented Counterfactual Views},\nauthor={tianqianjin lin and Yangyang Kang and Zhuoren Jiang and Xurui Li and Changlong Sun and cui huang and Xiaozhong Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=LiWGbK8_iOB}\n}", "github": "", "project": "", "reviewers": "tYMD;EBjy;VhaN;DcTY;iyAN", "site": "https://openreview.net/forum?id=LiWGbK8_iOB", "pdf_size": 3140081, "recommendation": "3;3;3;5;5", "confidence": "5;4;5;4;4", "correctness": "3;2;3;3;3", "technical_novelty": "2;2;3;3;3", "empirical_novelty": "2;0;2;3;2", "wc_summary_paper": "108;115;43;82;77", "wc_strength_and_weaknesses": "159;265;333;198;155", "wc_clarity_quality_novelty_and_reproducibility": "187;67;34;25;52", "wc_summary_review": "49;82;52;57;53", "wc_review": "503;529;462;362;337", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 3.8, 0.9797958971132712 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "correctness_avg": [ 2.8, 0.39999999999999997 ], "technical_novelty_avg": [ 2.6, 0.4898979485566356 ], "empirical_novelty_avg": [ 1.8, 0.9797958971132712 ], "wc_summary_paper_avg": [ 85.0, 25.55777768116782 ], "wc_strength_and_weaknesses_avg": [ 222.0, 68.12341741281041 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.0, 58.81836447913185 ], "wc_summary_review_avg": [ 58.6, 11.976643937263896 ], "wc_review_avg": [ 438.6, 76.23280133905615 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.6666666666666665, "corr_recommendation_correctness": 0.40824829046386296, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Q3ZOBAmIhCsJ:scholar.google.com/&scioq=G-Censor:+Graph+Contrastive+Learning+with+Task-Oriented+Counterfactual+Views&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Alibaba Group;Zhejiang University;Worcester Polytechnic Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.alibaba.com;https://www.zju.edu.cn;https://www.wpi.edu", "aff_unique_abbr": "Alibaba;ZJU;WPI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "How robust is unsupervised representation learning to distribution shift?", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11153", "id": "LiXDW7CF94J", "poster": "/media/PosterPDFs/ICLR%202023/11153.png?t=1680731278.616672", "openreview": "https://openreview.net/forum?id=LiXDW7CF94J", "slides": "https://iclr.cc/virtual/2023/poster/11153", "video": "https://iclr.cc/virtual/2023/poster/11153", "author_site": "Yuge Shi, Imant Daunhawer, Julia E Vogt, Philip Torr, Amartya Sanyal", "tldr": "Representations learned from self-supervised learning and auto-encoder based algorithms are surprisingly robust to distribution shift.", "abstract": "The robustness of machine learning algorithms to distributions shift is primarily discussed in the context of supervised learning (SL). As such, there is a lack of insight on the robustness of the representations learned from unsupervised methods, such as self-supervised learning (SSL) and auto-encoder based algorithms (AE), to distribution shift. We posit that the input-driven objectives of unsupervised algorithms lead to representations that are more robust to distribution shift than the target-driven objective of SL. We verify this by extensively evaluating the performance of SSL and AE on both synthetic and realistic distribution shift datasets. Following observations that the linear layer used for classification itself can be susceptible to spurious correlations, we evaluate the representations using a linear\nhead trained on a small amount of out-of-distribution (OOD) data, to isolate the robustness of the learned representations from that of the linear head. We also develop \u201ccontrollable\u201d versions of existing realistic domain generalisation datasets with adjustable degrees of distribution shifts. This allows us to study the robustness of different learning algorithms under versatile yet realistic distribution shift\nconditions. Our experiments show that representations learned from unsupervised learning algorithms generalise better than SL under a wide variety of extreme as well as realistic distribution shifts.", "keywords": "distribution shift;OOD generalisation;spurious correlation;simplicity bias;SSL;unsupervised learning;auto-encoder", "primary_area": "", "supplementary_material": "", "author": "Yuge Shi;Imant Daunhawer;Julia E Vogt;Philip Torr;Amartya Sanyal", "authorids": "~Yuge_Shi1;~Imant_Daunhawer2;~Julia_E_Vogt1;~Philip_Torr1;~Amartya_Sanyal1", "gender": "F;;F;;M", "homepage": "https://yugeten.github.io/;https://mds.inf.ethz.ch/team/detail/imant-daunhawer/;http://mds.inf.ethz.ch;http://www.robots.ox.ac.uk/~tvg/;https://amartya18x.github.io", "dblp": "227/4684;259/0541;13/8412;;203/8807", "google_scholar": "https://scholar.google.co.uk/citations?user=t6B_Z7MAAAAJ;;UoeV-8kAAAAJ;;", "orcid": ";;;;0000-0002-4190-0449", "linkedin": ";;julia-vogt-50b53895;;", "or_profile": "~Yuge_Shi1;~Imant_Daunhawer2;~Julia_E_Vogt1;~Philip_Torr1;~Amartya_Sanyal1", "aff": "University of Oxford;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology;University of Oxford;Swiss Federal Institute of Technology", "aff_domain": "ox.ac.uk;ethz.ch;ethz.ch;ox.ac.uk;ethz.ch", "position": "PhD student;PhD student;Assistant Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nshi2023how,\ntitle={How robust is unsupervised representation learning to distribution shift?},\nauthor={Yuge Shi and Imant Daunhawer and Julia E Vogt and Philip Torr and Amartya Sanyal},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=LiXDW7CF94J}\n}", "github": "", "project": "", "reviewers": "ekJ8;JB4R;Bhk8;xV8K", "pdf_size": 2949884, "recommendation": "5;5;6;8", "confidence": "4;3;3;3", "correctness": "3;3;4;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "31;62;67;84", "wc_strength_and_weaknesses": "411;213;130;293", "wc_clarity_quality_novelty_and_reproducibility": "55;52;11;102", "wc_summary_review": "16;62;46;32", "wc_review": "513;389;254;511", "wc_reply_reviewers": "26;0;51;65", "wc_reply_authors": "1029;1163;481;547", "reply_reviewers": "1;0;1;1", "reply_authors": "4;2;1;1", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 61.0, 19.144189719076646 ], "wc_strength_and_weaknesses_avg": [ 261.75, 103.66623124238674 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.0, 32.225766088644036 ], "wc_summary_review_avg": [ 39.0, 17.0 ], "wc_review_avg": [ 416.75, 106.54195183119182 ], "wc_reply_reviewers_avg": [ 35.5, 24.804233509624925 ], "wc_reply_authors_avg": [ 805.0, 295.7532755524442 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.4714045207910316, "corr_recommendation_correctness": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7120036066433446355&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=LiXDW7CF94J", "email": "ox.ac.uk;ethz.ch;ethz.ch;ox.ac.uk;ethz.ch", "author_num": 5, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "University of Oxford;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.ethz.ch", "aff_unique_abbr": "Oxford;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "United Kingdom;Switzerland" }, { "id": "Liuo-Bk-beq", "title": "The Biased Artist: Exploiting Cultural Biases via Homoglyphs in Text-Guided Image Generation Models", "track": "main", "status": "Reject", "tldr": "Inducing cultural biases into text-conditional image generation models by replacing single characters in the text prompts with homoglyphs.", "abstract": "Text-guided image generation models, such as DALL-E2 and Stable Diffusion, have recently received much attention from academia and the general public. Provided with textual descriptions, these models are capable of generating high-quality images depicting various concepts and styles. However, such models are trained on large amounts of public data and implicitly learn relationships from their training data that are not immediately apparent. We demonstrate that common multimodal models implicitly learned cultural biases that can be triggered and injected into the generated images by simply replacing single characters in the textual description with visually similar non-Latin characters. These so-called homoglyph replacements enable malicious users or service providers to induce biases into the generated images and even render the whole generation process useless. We practically illustrate such attacks on DALL-E2 and Stable Diffusion as text-guided image generation models and further show that CLIP also behaves similarly. Our results further indicate that text encoders trained on multilingual data provide a way to mitigate the effects of homoglyph replacements.", "keywords": "Text-Guided Image Generation Models;Bias;DALL-E 2;Security", "primary_area": "", "supplementary_material": "", "author": "Lukas Struppek;Dominik Hintersdorf;Kristian Kersting", "authorids": "~Lukas_Struppek1;~Dominik_Hintersdorf1;~Kristian_Kersting1", "gender": "M;M;M", "homepage": "https://lukasstruppek.github.io/;https://d0mih.github.io/;http://www.ml.informatik.tu-darmstadt.de/", "dblp": "306/1485;306/1325;40/3793", "google_scholar": "tU8K5qsAAAAJ;DKITUfsAAAAJ;QY-earAAAAAJ", "orcid": "0000-0003-0626-3672;0000-0003-4976-6894;0000-0002-2873-9152", "linkedin": "lukas-struppek/;;", "or_profile": "~Lukas_Struppek1;~Dominik_Hintersdorf1;~Kristian_Kersting1", "aff": "Technische Universit\u00e4t Darmstadt;CS Department, TU Darmstadt, Technische Universit\u00e4t Darmstadt;TU Darmstadt", "aff_domain": "tu-darmstadt.de;cs.tu-darmstadt.de;tu-darmstadt.de", "position": "PhD student;PhD student;Full Professor", "bibtex": "@misc{\nstruppek2023the,\ntitle={The Biased Artist: Exploiting Cultural Biases via Homoglyphs in Text-Guided Image Generation Models},\nauthor={Lukas Struppek and Dominik Hintersdorf and Kristian Kersting},\nyear={2023},\nurl={https://openreview.net/forum?id=Liuo-Bk-beq}\n}", "github": "", "project": "", "reviewers": "wkRc;k5Mu;GVzw;4KN7", "site": "https://openreview.net/forum?id=Liuo-Bk-beq", "pdf_size": 10844919, "recommendation": "1;3;5;6", "confidence": "5;4;3;4", "correctness": "1;4;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "68;147;92;86", "wc_strength_and_weaknesses": "208;311;89;466", "wc_clarity_quality_novelty_and_reproducibility": "7;18;51;188", "wc_summary_review": "35;50;49;123", "wc_review": "318;526;281;863", "wc_reply_reviewers": "343;0;0;26", "wc_reply_authors": "1364;475;444;830", "reply_reviewers": "1;0;0;1", "reply_authors": "3;2;2;2", "recommendation_avg": [ 3.75, 1.920286436967152 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 1.0897247358851685 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 98.25, 29.498940658945703 ], "wc_strength_and_weaknesses_avg": [ 268.5, 138.4675052133171 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.0, 72.27378501227122 ], "wc_summary_review_avg": [ 64.25, 34.43381332353418 ], "wc_review_avg": [ 497.0, 231.02705469273508 ], "wc_reply_reviewers_avg": [ 92.25, 145.15917986817092 ], "wc_reply_authors_avg": [ 778.25, 370.6294476967528 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.7364596943186587, "corr_recommendation_correctness": 0.5674803065350239, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17803278504330617461&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt", "aff_unique_dep": "", "aff_unique_url": "https://www.tu-darmstadt.de", "aff_unique_abbr": "TUD", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "Ll21hhIJ_oG", "title": "KeyCLD: Learning Constrained Lagrangian Dynamics in Keypoint Coordinates from Images", "track": "main", "status": "Reject", "tldr": "We learn unsupervised keypoint representations as state, jointly with constrained Lagrangian dynamics, based on videos of dynamical systems.", "abstract": "We present KeyCLD, a framework to learn Lagrangian dynamics from images. Learned keypoint representations derived from images are directly used as positional state vector for jointly learning constrained Lagrangian dynamics. KeyCLD is trained unsupervised end-to-end on sequences of images. Our method explicitly models the mass matrix, potential energy and the input matrix, thus allowing energy based control. We demonstrate learning of Lagrangian dynamics from images on the dm_control pendulum, cartpole and acrobot environments, wether they are unactuated, underactuated or fully actuated. Trained models are able to produce long-term video predictions, showing that the dynamics are accurately learned. Our method strongly outperforms recent works on learning Lagrangian or Hamiltonian dynamics from images. The benefits of including a Lagrangian prior and prior knowledge of a constraint function is further investigated and empirically evaluated.", "keywords": "Lagrangian;dynamics;keypoints;images;unsupervised", "primary_area": "", "supplementary_material": "/attachment/733cc3bc6197f44fe96f2264cb34d9594d86d611.zip", "author": "Rembert Daems;Jeroen Taets;Francis wyffels;Guillaume Crevecoeur", "authorids": "~Rembert_Daems1;~Jeroen_Taets1;~Francis_wyffels1;~Guillaume_Crevecoeur1", "gender": ";M;;", "homepage": "https://rdaems.github.io/;;;https://users.ugent.be/~gcreveco/", "dblp": "322/9167;322/8993.html;;", "google_scholar": "-X_5BNcAAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-5225-4884;;;0000-0001-7630-8579", "linkedin": "rembert-daems/;;;guillaume-crevecoeur-a301b328/", "or_profile": "~Rembert_Daems1;~Jeroen_Taets1;~Francis_wyffels1;~Guillaume_Crevecoeur1", "aff": "Ghent University;Universiteit Gent;;", "aff_domain": "ugent.be;ugent.be;;", "position": "PhD student;PhD student;;", "bibtex": "@misc{\ndaems2023keycld,\ntitle={Key{CLD}: Learning Constrained Lagrangian Dynamics in Keypoint Coordinates from Images},\nauthor={Rembert Daems and Jeroen Taets and Francis wyffels and Guillaume Crevecoeur},\nyear={2023},\nurl={https://openreview.net/forum?id=Ll21hhIJ_oG}\n}", "github": "", "project": "", "reviewers": "NWGA;U2jo;ooc4", "site": "https://openreview.net/forum?id=Ll21hhIJ_oG", "pdf_size": 2334393, "recommendation": "5;5;6", "confidence": "4;5;5", "correctness": "2;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;2", "wc_summary_paper": "51;91;89", "wc_strength_and_weaknesses": "694;584;339", "wc_clarity_quality_novelty_and_reproducibility": "41;2;61", "wc_summary_review": "78;38;21", "wc_review": "864;715;510", "wc_reply_reviewers": "319;0;0", "wc_reply_authors": "733;324;209", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 77.0, 18.40289832245635 ], "wc_strength_and_weaknesses_avg": [ 539.0, 148.38014242703323 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.666666666666664, 24.499433100017278 ], "wc_summary_review_avg": [ 45.666666666666664, 23.893281249943232 ], "wc_review_avg": [ 696.3333333333334, 145.12140511386397 ], "wc_reply_reviewers_avg": [ 106.33333333333333, 150.37804213233912 ], "wc_reply_authors_avg": [ 422.0, 224.86588595575512 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6717959950822143509&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 10, "aff_unique_index": "0;1", "aff_unique_norm": "Ghent University;University of Ghent", "aff_unique_dep": ";", "aff_unique_url": "https://www.ugent.be/en;https://www.ugent.be/en", "aff_unique_abbr": "UGent;UGent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Belgium" }, { "id": "LlOOSDGLD24", "title": "Flexible Relation Preserving for Adversarial Training", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "In this study, we revisit the representation learning problem for adversarial training from the perspective of relation preservation. Typical adversarial training methods tend to pull clean and adversarial samples closer to improve robustness. However, our experimental analysis reveals that such operation would lead to cluttered feature representations thus decreasing the accuracy for both clean and adversarial samples. To alleviate the problem, we build a robust discriminative feature space for both clean and adversarial samples by taking into account a relational prior which preserves the relationship between features of clean samples. A flexible relationship preserving adversarial training (FRPAT) strategy is proposed to transfer the well-generalized relational structure of the standard training model into the adversarial training model. Moreover, it acts as an extra regularization term mathematically, making it easy to be combined with various popular adversarial training algorithms in a plug-and-play way to achieve the best of both worlds. Extensive experiments on CIFAR10 and CIFAR100 demonstrate the superiority of our algorithm. Without additional data, it improves clean generalizability up to $\\textbf{8.78\\%}$ and robust generalizability up to $\\textbf{3.04\\%}$ on these datasets.", "keywords": "adversarial training;adversarial robustness;relationship knowledge distillation", "primary_area": "", "supplementary_material": "/attachment/7b566a72be13e2016b35ae5d3d3ea650cb416435.zip", "author": "Xiaoyue Mi;Fan Tang;Yepeng Weng;Danding Wang;Peng Li;Yang Liu;Zonghan Yang;Sheng Tang;Juan Cao", "authorids": "~Xiaoyue_Mi1;~Fan_Tang1;~Yepeng_Weng1;~Danding_Wang1;~Peng_Li2;~Yang_Liu19;~Zonghan_Yang1;~Sheng_Tang1;~Juan_Cao1", "gender": "F;M;M;;M;M;M;M;F", "homepage": ";http://ivc.ia.ac.cn:8001/zh/papers;https://github.com/Monoclinic/monoclinic.github.io;;http://www.lpeng.net/;http://nlp.csai.tsinghua.edu.cn/~ly/;https://minicheshire.github.io/;http://www.ict.cas.cn/sourcedb_2018_ict_cas/cn/jssrck/200909/t20090917_2496726.html;https://www.ict.ac.cn/sourcedb/cn/jssrck/201011/t20101123_3028158.html", "dblp": ";46/6804;289/1270;;83/6353-30;51/3710-5;222/7860;https://dblp.uni-trier.de/pid/62/1647;75/2820-1.html", "google_scholar": "9eDMXxMAAAAJ;PdKElfwAAAAJ;https://scholar.google.com/citations?hl=en;;hgYzkOQAAAAJ;https://scholar.google.com.hk/citations?user=lVhoKNcAAAAJ;rt9HOIUAAAAJ;https://scholar.google.com/citations?hl=zh-CN;fSBdNg0AAAAJ", "orcid": ";;;;0000-0003-1374-5979;0000-0002-3087-242X;;;0000-0002-7857-1546", "linkedin": ";;;;;;;;", "or_profile": "~Xiaoyue_Mi1;~Fan_Tang1;~Yepeng_Weng1;~Danding_Wang1;~Peng_Li2;~Yang_Liu19;~Zonghan_Yang1;~Sheng_Tang1;~Juan_Cao1", "aff": "University of Chinese Academy of Sciences;Institute of Computing Technology, CAS;Lenovo Group Limited;;Tsinghua University;Tsinghua University;Department of Computer Science and Technology, Tsinghua University;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;ict.cas.cn;lenovo.com;;tsinghua.edu.cn;tsinghua.edu.cn;cs.tsinghua.edu.cn;ict.ac.cn;ict.ac.cn", "position": "PhD student;Associate Professor;Researcher;;Associate Professor;Professor;PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nmi2023flexible,\ntitle={Flexible Relation Preserving for Adversarial Training},\nauthor={Xiaoyue Mi and Fan Tang and Yepeng Weng and Danding Wang and Peng Li and Yang Liu and Zonghan Yang and Sheng Tang and Juan Cao},\nyear={2023},\nurl={https://openreview.net/forum?id=LlOOSDGLD24}\n}", "github": "", "project": "", "reviewers": "BM3U;dCrC;q8J7", "site": "https://openreview.net/forum?id=LlOOSDGLD24", "pdf_size": 2917421, "recommendation": "1;3;5", "confidence": "5;4;4", "correctness": "1;3;2", "technical_novelty": "1;3;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "76;86;128", "wc_strength_and_weaknesses": "663;316;494", "wc_clarity_quality_novelty_and_reproducibility": "63;116;34", "wc_summary_review": "31;25;35", "wc_review": "833;543;691", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "395;455;441", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 3.0, 1.632993161855452 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 96.66666666666667, 22.5289936649544 ], "wc_strength_and_weaknesses_avg": [ 491.0, 141.6780387592469 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 71.0, 33.95094500402996 ], "wc_summary_review_avg": [ 30.333333333333332, 4.109609335312651 ], "wc_review_avg": [ 689.0, 118.40045044959359 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 430.3333333333333, 25.629843715654783 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.8660254037844385, "corr_recommendation_correctness": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Q-1INP-mM80J:scholar.google.com/&scioq=Flexible+Relation+Preserving+for+Adversarial+Training&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;3;3;1;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences;Lenovo Group Limited;Tsinghua University", "aff_unique_dep": ";Institute of Computing Technology;;", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ict.ac.cn;https://www.lenovo.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "UCAS;CAS;Lenovo;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "LlWfawcSpf", "title": "MABA-Net: Masked Additive Binary Activation Network", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Despite significant reduction in memory footprint and computational cost, binary neural networks suffer from noticeable accuracy degradation compared to real-valued counterparts. A few works have attempted to narrow the accuracy gap by increasing the representation bit-width or the network width/depth, but they come at the expense of increased memory and/or compute. In this work, we find that the imbalanced ratio of activations to weights may be the main cause of degraded performance and increased memory overhead. We propose Masked Additive Binary Activation Network (MABA-Net) to reduce approximation errors and the activation bit-width, with minimum increase in the activation size. MABA-Net balances the ratio of the activation size to the weight size, leading to significant memory saving on large CNNs. We demonstrate MABA-Net's superior performance on the ImageNet dataset under various network configurations. Experimental results show that MABA-Net achieves competitive accuracy without increase of computational cost, while reducing memory usage compared to state-of-the-art. We will release the codes upon acceptance.", "keywords": "Binary Neural Networks;Quantization;Binarization", "primary_area": "", "supplementary_material": "", "author": "Xue Geng;Jiawei Hu;Jie Lin;Zhe Wang;Shaohua Li;Min Wu;Mohamed M. Sabry Aly", "authorids": "~Xue_Geng1;jiaweihu@uchicago.edu;~Jie_Lin1;~Zhe_Wang12;~Shaohua_Li2;~Min_Wu2;~Mohamed_M._Sabry_Aly1", "gender": "F;;M;M;;M;M", "homepage": ";;;https://www.linkedin.com/in/wangzhemark/?originalSubdomain=sg;;https://sites.google.com/site/wumincf/;", "dblp": "149/3281;;88/6731;;;16/0-8;", "google_scholar": "ZYVZ1bgAAAAJ;;;Xqu6fAkAAAAJ;;https://scholar.google.com.sg/citations?user=Hji1uWQAAAAJ;eCo7XWkAAAAJ", "orcid": ";;;;;0000-0003-0977-3600;", "linkedin": "xue-geng-9963b95a/;;;;;;mohamed-m-sabry-aly/", "or_profile": "~Xue_Geng1;jiaweihu@uchicago.edu;~Jie_Lin1;~Zhe_Wang12;~Shaohua_Li2;~Min_Wu2;~Mohamed_M._Sabry_Aly1", "aff": "Institute for Infocomm Research, A*STAR;;;, A*STAR;;Institute for Infocomm Research (I2R), A*STAR;Nanyang Technological University", "aff_domain": "i2r.a-star.edu.sg;;;i2r.a-star.edu.sg;;i2r.a-star.edu.sg;ntu.edu.sg", "position": "Research Scientist;;;Researcher;;Researcher;Assistant Professor", "bibtex": "@misc{\ngeng2023mabanet,\ntitle={{MABA}-Net: Masked Additive Binary Activation Network},\nauthor={Xue Geng and Jiawei Hu and Jie Lin and Zhe Wang and Shaohua Li and Min Wu and Mohamed M. Sabry Aly},\nyear={2023},\nurl={https://openreview.net/forum?id=LlWfawcSpf}\n}", "github": "", "project": "", "reviewers": "D6ip;h8Kp;X9gr", "site": "https://openreview.net/forum?id=LlWfawcSpf", "pdf_size": 1004144, "recommendation": "3;5;6", "confidence": "5;4;4", "correctness": "2;3;4", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "42;63;40", "wc_strength_and_weaknesses": "308;120;107", "wc_clarity_quality_novelty_and_reproducibility": "17;77;60", "wc_summary_review": "20;82;24", "wc_review": "387;342;231", "wc_reply_reviewers": "0;168;0", "wc_reply_authors": "733;803;76", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 48.333333333333336, 10.402991022884823 ], "wc_strength_and_weaknesses_avg": [ 178.33333333333334, 91.84165116595459 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.333333333333336, 25.249862485874168 ], "wc_summary_review_avg": [ 42.0, 28.331372481167705 ], "wc_review_avg": [ 320.0, 65.55913361233506 ], "wc_reply_reviewers_avg": [ 56.0, 79.19595949289332 ], "wc_reply_authors_avg": [ 537.3333333333334, 327.4612784572993 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.944911182523068, "corr_recommendation_correctness": 0.9819805060619659, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Fe8ty7sDi9UJ:scholar.google.com/&scioq=MABA-Net:+Masked+Additive+Binary+Activation+Network&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Institute for Infocomm Research;A*STAR;Nanyang Technological University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.i2r.a-star.edu.sg;https://www.a-star.edu.sg;https://www.ntu.edu.sg", "aff_unique_abbr": "I2R;A*STAR;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "id": "LmNckrTpTBo", "title": "Training A Multi-stage Deep Classifier with Feedback Signals", "track": "main", "status": "Reject", "tldr": "", "abstract": "Multi-Stage Classifier (MSC) - several classifiers working sequentially in an arranged order and classification decision is partially made at each step - is widely used in industrial applications for various resource limitation reasons. The classifiers of a multi-stage process are usually Neural Network (NN) models trained independently or in their inference order without considering the signals from the latter stages. Aimed at two-stage binary classification process, the most common type of MSC, we propose a novel training framework, named Feedback Training. The classifiers are trained in an order reverse to their actual working order, and the classifier at the later stage is used to guide the training of initial-stage classifier via a sample weighting method. We experimentally show the efficacy of our proposed approach, and its great superiority under the scenario of few-shot training. ", "keywords": "multi-stage classification;training framework", "primary_area": "", "supplementary_material": "", "author": "Chao Xu;Yu Yang;Rongzhao Wang;Guan Wang;Bojia Lin", "authorids": "xuchao@microsoft.com;~Yu_Yang17;~Rongzhao_Wang2;guwang@microsoft.com;bojial@microsoft.com", "gender": ";M;;;", "homepage": ";https://github.com/sweetiemelody;;;", "dblp": ";;;;", "google_scholar": ";;;;", "orcid": ";;;;", "linkedin": ";;%E8%8D%A3%E9%92%8A-%E7%8E%8B-16ab4a111/;;", "or_profile": "xuchao@microsoft.com;~Yu_Yang17;~Rongzhao_Wang2;guwang@microsoft.com;bojial@microsoft.com", "aff": ";;;;", "aff_domain": ";;;;", "position": ";;;;", "bibtex": "@misc{\nxu2023training,\ntitle={Training A Multi-stage Deep Classifier with Feedback Signals},\nauthor={Chao Xu and Yu Yang and Rongzhao Wang and Guan Wang and Bojia Lin},\nyear={2023},\nurl={https://openreview.net/forum?id=LmNckrTpTBo}\n}", "github": "", "project": "", "reviewers": "7ycA;ND4V;WfBE", "site": "https://openreview.net/forum?id=LmNckrTpTBo", "pdf_size": 1265920, "recommendation": "1;3;5", "confidence": "2;4;4", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "39;65;35", "wc_strength_and_weaknesses": "178;69;433", "wc_clarity_quality_novelty_and_reproducibility": "63;24;8", "wc_summary_review": "17;11;11", "wc_review": "297;169;487", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 1.632993161855452 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 46.333333333333336, 13.299958228840001 ], "wc_strength_and_weaknesses_avg": [ 226.66666666666666, 152.5348776145603 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.666666666666668, 23.098821518760552 ], "wc_summary_review_avg": [ 13.0, 2.8284271247461903 ], "wc_review_avg": [ 317.6666666666667, 130.64285497322675 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.8660254037844387, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zbAajxy4F2MJ:scholar.google.com/&scioq=Training+A+Multi-stage+Deep+Classifier+with+Feedback+Signals&hl=en&as_sdt=0,33", "gs_version_total": 3 }, { "id": "Lmff9URfo5", "title": "Cooperation or Competition: Avoiding Player Domination for Multi-target Robustness by Adaptive Budgets", "track": "main", "status": "Withdraw", "tldr": "For multi-target adversarial training, we identify a phenomenon named player domination which leads to the non-convergence of previous algorithms and further design a novel adaptive budget method to achieve better robustness.", "abstract": "Despite incredible advances, deep learning has been shown to be susceptible to adversarial attacks. Numerous approaches were proposed to train robust networks both empirically and certifiably. However, most of them defend against only a single type of attack, while recent work steps forward at defending against multiple attacks. In this paper, to understand multi-target robustness, we view this problem as a bargaining game in which different players (adversaries) negotiate to reach an agreement on a joint direction of parameter updating. We identify a phenomenon named \\emph{player domination} in the bargaining game, and show that with this phenomenon, some of the existing max-based approaches such as MAX and MSD do not converge. Based on our theoretical results, we design a novel framework that adjusts the budgets of different adversaries to avoid player domination. Experiments on two benchmarks show that employing the proposed framework to the existing approaches significantly advances multi-target robustness.", "keywords": "Multi-target Adversarial Training;Bargaining Game", "primary_area": "", "supplementary_material": "", "author": "Yimu Wang;Dinghuai Zhang;Yihan Wu;Heng Huang;Hongyang Zhang", "authorids": "~Yimu_Wang1;~Dinghuai_Zhang1;~Yihan_Wu1;~Heng_Huang1;~Hongyang_Zhang1", "gender": "M;;M;M;M", "homepage": "https://yimuwangcs.github.io;;https://yihwu.github.io/;https://www.cs.umd.edu/~heng/;https://hongyanz.github.io/", "dblp": "140/7766;;;03/281;23/10537-1", "google_scholar": "TV2vnN8AAAAJ;;cajTg_wAAAAJ;4OqLaDwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": "yimu-wang-854743151/;;;;", "or_profile": "~Yimu_Wang1;~Dinghuai_Zhang1;~Yihan_Wu1;~Heng_Huang1;~Hongyang_Zhang1", "aff": "University of Waterloo;;University of Pittsburgh;University of Pittsburgh;School of Computer Science, University of Waterloo", "aff_domain": "uwaterloo.ca;;pitt.edu;pitt.edu;uwaterloo.ca", "position": "PhD student;;PhD student;Full Professor;Assistant Professor", "bibtex": "@misc{\nwang2023cooperation,\ntitle={Cooperation or Competition: Avoiding Player Domination for Multi-target Robustness by Adaptive Budgets},\nauthor={Yimu Wang and Dinghuai Zhang and Yihan Wu and Heng Huang and Hongyang Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=Lmff9URfo5}\n}", "github": "", "project": "", "reviewers": "D2Kt;L8ZN;C3Wu;hg7g", "site": "https://openreview.net/forum?id=Lmff9URfo5", "pdf_size": 659588, "recommendation": "3;3;5;6", "confidence": "3;4;4;2", "correctness": "3;2;3;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "100;65;136;93", "wc_strength_and_weaknesses": "240;148;265;191", "wc_clarity_quality_novelty_and_reproducibility": "2;12;163;45", "wc_summary_review": "66;42;43;47", "wc_review": "408;267;607;376", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 98.5, 25.30316185775999 ], "wc_strength_and_weaknesses_avg": [ 211.0, 45.07216435894775 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.5, 64.07222487162437 ], "wc_summary_review_avg": [ 49.5, 9.7082439194738 ], "wc_review_avg": [ 414.5, 122.81795471347013 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5222329678670935, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14010637677588581756&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Waterloo;University of Pittsburgh", "aff_unique_dep": ";", "aff_unique_url": "https://uwaterloo.ca;https://www.pitt.edu", "aff_unique_abbr": "UW;Pitt", "aff_campus_unique_index": "1", "aff_campus_unique": ";Waterloo", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Canada;United States" }, { "id": "LnQn5-rN-LR", "title": "HIVE: HIerarchical Volume Encoding for Neural Implicit Surface Reconstruction", "track": "main", "status": "Reject", "tldr": "", "abstract": "Neural implicit surface reconstruction has become a new trend in reconstructing a detailed 3D shape from images. In previous methods, however, the 3D scene is only encoded by the MLPs which do not have an explicit 3D structure. To better represent 3D shapes, we introduce a volume encoding to explicitly encode the spatial information. We further design hierarchical volumes to encode the scene structures in multiple scales. The high-resolution volumes capture the high-frequency geometry details since spatially varying features could be learned from different 3D points, while the low-resolution volumes enforce the spatial consistency to keep the shape smooth since adjacent locations possess the same low-resolution feature. In addition, we adopt a sparse structure to reduce the memory consumption at high-resolution volumes, and two regularization terms to enhance results smoothness. This hierarchical volume encoding could be appended to any implicit surface reconstruction method as a plug-and-play module, and can generate a smooth and clean reconstruction with more details. Superior performance is demonstrated in DTU, EPFL, and BlendedMVS datasets with significant improvement on the standard metrics. The code of our method will be made public.", "keywords": "neural implicit surface reconstruction;multi-view surface reconstruction;hierarchical volume encoding", "primary_area": "", "supplementary_material": "", "author": "Xiaodong Gu;Weihao Yuan;Heng Li;Zilong Dong;Ping Tan", "authorids": "~Xiaodong_Gu3;~Weihao_Yuan1;~Heng_Li6;~Zilong_Dong2;~Ping_Tan2", "gender": "M;M;M;M;M", "homepage": ";https://www.weihao-yuan.com;http://hengli.me;http://www.cs.sfu.ca/~pingtan/;https://baike.baidu.com/item/%E8%91%A3%E5%AD%90%E9%BE%99/62931048", "dblp": "71/4467-4;217/2047-1;02/3672-9;;81/1423", "google_scholar": "aJPO514AAAAJ;m3tqxRQAAAAJ;tjbbehcAAAAJ;XhyKVFMAAAAJ;GHOQKCwAAAAJ", "orcid": "0000-0003-2623-7973;;0000-0001-5143-5061;0000-0002-4506-6973;0000-0002-6833-9102", "linkedin": ";;;;", "or_profile": "~Xiaodong_Gu3;~Weihao_Yuan1;~Heng_Li6;~Ping_Tan2;~Zlong_Dong1", "aff": "Alibaba Group;Alibaba Group;Simon Fraser University;Hong Kong University of Science and Technology;Alibaba Group", "aff_domain": "alibaba-inc.com;alibaba-inc.com;sfu.ca;ust.hk;alibaba-inc.com", "position": "Researcher;Researcher;PhD student;Full Professor;Researcher", "bibtex": "@misc{\ngu2023hive,\ntitle={{HIVE}: {HI}erarchical Volume Encoding for Neural Implicit Surface Reconstruction},\nauthor={Xiaodong Gu and Weihao Yuan and Heng Li and Zilong Dong and Ping Tan},\nyear={2023},\nurl={https://openreview.net/forum?id=LnQn5-rN-LR}\n}", "github": "", "project": "", "reviewers": "wTJb;hdwP;ZvSi;frLo", "site": "https://openreview.net/forum?id=LnQn5-rN-LR", "pdf_size": 26950303, "recommendation": "3;5;6;6", "confidence": "4;5;5;3", "correctness": "3;4;3;4", "technical_novelty": "2;2;3;2", "empirical_novelty": "3;2;4;3", "wc_summary_paper": "116;92;113;71", "wc_strength_and_weaknesses": "392;461;225;337", "wc_clarity_quality_novelty_and_reproducibility": "181;69;33;84", "wc_summary_review": "68;79;107;64", "wc_review": "757;701;478;556", "wc_reply_reviewers": "0;0;0;65", "wc_reply_authors": "1058;683;356;275", "reply_reviewers": "0;0;0;1", "reply_authors": "2;1;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 98.0, 18.12456896039186 ], "wc_strength_and_weaknesses_avg": [ 353.75, 86.34632302536107 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 91.75, 54.76027300881543 ], "wc_summary_review_avg": [ 79.5, 16.80029761641144 ], "wc_review_avg": [ 623.0, 111.30363875453489 ], "wc_reply_reviewers_avg": [ 16.25, 28.145825622994256 ], "wc_reply_authors_avg": [ 593.0, 308.8761887876759 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.40824829046386296, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:aCcrdArdHkcJ:scholar.google.com/&scioq=HIVE:+HIerarchical+Volume+Encoding+for+Neural+Implicit+Surface+Reconstruction&hl=en&as_sdt=0,48", "gs_version_total": 3, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Alibaba Group;Simon Fraser University;Hong Kong University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.alibaba.com;https://www.sfu.ca;https://www.ust.hk", "aff_unique_abbr": "Alibaba;SFU;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;Canada" }, { "title": "Empowering Graph Representation Learning with Test-Time Graph Transformation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10910", "id": "Lnxl5pr018", "poster": "/media/PosterPDFs/ICLR%202023/10910.png?t=1682917792.922745", "openreview": "https://openreview.net/forum?id=Lnxl5pr018", "slides": "https://iclr.cc/virtual/2023/poster/10910", "video": "https://iclr.cc/virtual/2023/poster/10910", "author_site": "Wei Jin, Tong Zhao, Jiayuan Ding, Yozen Liu, Jiliang Tang, Neil Shah", "tldr": "Transforming the test graph data can enhance the generalization and robustness of graph neural networks.", "abstract": "As powerful tools for representation learning on graphs, graph neural networks (GNNs) have facilitated various applications from drug discovery to recommender systems. Nevertheless, the effectiveness of GNNs is immensely challenged by issues related to data quality, such as distribution shift, abnormal features and adversarial attacks. Recent efforts have been made on tackling these issues from a modeling perspective which requires additional cost of changing model architectures or re-training model parameters. In this work, we provide a data-centric view to tackle these issues and propose a graph transformation framework named GTrans which adapts and refines graph data at test time to achieve better performance. We provide theoretical analysis on the design of the framework and discuss why adapting graph data works better than adapting the model. Extensive experiments have demonstrated the effectiveness of GTrans on three distinct scenarios for eight benchmark datasets where suboptimal data is presented. Remarkably, GTrans performs the best in most cases with improvements up to 2.8%, 8.2% and 3.8% over the best baselines on three experimental settings. ", "keywords": "graph neural networks;out-of-distribution generalization;adversarial robustness", "primary_area": "", "supplementary_material": "/attachment/2a3d1c234348f39b3038a440ca87fd513e949edb.zip", "author": "Wei Jin;Tong Zhao;Jiayuan Ding;Yozen Liu;Jiliang Tang;Neil Shah", "authorids": "~Wei_Jin4;~Tong_Zhao3;~Jiayuan_Ding1;~Yozen_Liu1;~Jiliang_Tang1;~Neil_Shah2", "gender": ";M;M;;M;M", "homepage": "http://www.cs.emory.edu/~wjin30/;https://tzhao.io/;;https://www.linkedin.com/in/yozen-liu-531a67130/;https://www.cse.msu.edu/~tangjili/;http://nshah.net", "dblp": "66/2173-9;94/6503-3;197/1055;242/8056.html;64/10812;71/7771", "google_scholar": "eWow24EAAAAJ;05cRc-MAAAAJ;7lwkXGEAAAAJ;i3U2JjEAAAAJ;WtzKMWAAAAAJ;Qut69OgAAAAJ", "orcid": ";0000-0001-7660-1732;;;0000-0001-7125-3898;0000-0003-3261-8430", "linkedin": ";;jiayuand/;;;", "or_profile": "~Wei_Jin4;~Tong_Zhao3;~Jiayuan_Ding1;~Yozen_Liu1;~Jiliang_Tang1;~Neil_Shah2", "aff": "Michigan State University;Snap Inc.;Michigan State University;Snap Inc.;Michigan State University;Snap Inc.", "aff_domain": "msu.edu;snap.com;msu.edu;snapchat.com;msu.edu;snap.com", "position": "PhD student;Researcher;PhD student;Researcher;Full Professor;Research Scientist", "bibtex": "@inproceedings{\njin2023empowering,\ntitle={Empowering Graph Representation Learning with Test-Time Graph Transformation},\nauthor={Wei Jin and Tong Zhao and Jiayuan Ding and Yozen Liu and Jiliang Tang and Neil Shah},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Lnxl5pr018}\n}", "github": "", "project": "", "reviewers": "2PUQ;WrUj;aTyX;7RMb;3Tit", "pdf_size": 830135, "recommendation": "3;6;6;8;8", "confidence": "4;4;4;5;3", "correctness": "3;3;3;3;4", "technical_novelty": "2;2;2;3;3", "empirical_novelty": "3;3;2;2;3", "wc_summary_paper": "52;93;59;87;82", "wc_strength_and_weaknesses": "290;484;287;302;30", "wc_clarity_quality_novelty_and_reproducibility": "28;35;54;80;95", "wc_summary_review": "39;50;52;18;126", "wc_review": "409;662;452;487;333", "wc_reply_reviewers": "0;211;0;18;0", "wc_reply_authors": "1853;2618;1917;2236;127", "reply_reviewers": "0;1;0;1;0", "reply_authors": "3;4;3;3;1", "recommendation_avg": [ 6.2, 1.8330302779823362 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_paper_avg": [ 74.6, 16.131955864060625 ], "wc_strength_and_weaknesses_avg": [ 278.6, 144.7336864727766 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 58.4, 25.67956385922471 ], "wc_summary_review_avg": [ 57.0, 36.55133376499413 ], "wc_review_avg": [ 468.6, 109.50543365513877 ], "wc_reply_reviewers_avg": [ 45.8, 82.89366682684509 ], "wc_reply_authors_avg": [ 1750.2, 855.8325537159708 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.8, 0.9797958971132712 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.4909902530309828, "gs_citation": 79, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12479531068894109978&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=Lnxl5pr018", "email": "msu.edu;snap.com;msu.edu;snapchat.com;msu.edu;snap.com", "author_num": 6, "aff_unique_index": "0;1;0;1;0;1", "aff_unique_norm": "Michigan State University;Snap Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.msu.edu;https://www.snapinc.com", "aff_unique_abbr": "MSU;Snap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "LoJ6oXzc_P3", "title": "Stealing and Defending Transformer-based Encoders", "track": "main", "status": "Reject", "tldr": "We perform attacks against transformer-based encoders and propose a new defense against extraction of vision transformers that combines watermarking with dataset inference.", "abstract": "Self-supervised learning (SSL) has become the predominant approach to training on large amounts of unlabeled data. New real-world APIs offer services to generate high-dimensional representations for given inputs based on SSL encoders with transformer architectures. Recent efforts highlight that it is possible to steal high-quality SSL encoders trained on convolutional neural networks. In this work, we are the first to extend this line of work to stealing and defending transformer-based encoders in both language and vision domains. We show that it is possible to steal transformer-based sentence embedding models solely using their returned representations and with 40x fewer queries than the number of victim's training data points. We also decrease the number of required stealing queries for the vision encoders by leveraging semi-supervised learning. Finally, to defend vision transformers against stealing attacks, we propose a defense technique that combines watermarking with dataset inference. Our method creates a unique encoder signature based on a private data subset that acts as a secret seed during training. By applying dataset inference on the seed, we can then successfully identify stolen transformers.", "keywords": "model stealing;model extraction;defenses against model extraction;transformers;encoders;self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/4c66662a6c93b3ec83c893212f2b6ab478094780.zip", "author": "Adam Dziedzic;Franziska Boenisch;Mingjian Jiang;Haonan Duan;Nicolas Papernot", "authorids": "~Adam_Dziedzic1;~Franziska_Boenisch2;~Mingjian_Jiang1;~Haonan_Duan2;~Nicolas_Papernot1", "gender": ";;M;M;M", "homepage": ";;https://www.cs.toronto.edu/~mjjiang/;https://www.cs.toronto.edu/~haonand/;https://www.papernot.fr", "dblp": ";;;273/7767;162/1405", "google_scholar": ";;;5WVNRqoAAAAJ;cGxq0cMAAAAJ", "orcid": ";;;;", "linkedin": ";;mingjian-jiang-279318201/;;nicolaspapernot", "or_profile": "~Adam_Dziedzic1;~Franziska_Boenisch2;~Mingjian_Jiang1;~Haonan_Duan2;~Nicolas_Papernot1", "aff": ";;University of Toronto;Department of Computer Science, University of Toronto;Google", "aff_domain": ";;cs.toronto.edu;cs.toronto.edu;google.com", "position": ";;Undergrad student;PhD student;Research Scientist", "bibtex": "@misc{\ndziedzic2023stealing,\ntitle={Stealing and Defending Transformer-based Encoders},\nauthor={Adam Dziedzic and Franziska Boenisch and Mingjian Jiang and Haonan Duan and Nicolas Papernot},\nyear={2023},\nurl={https://openreview.net/forum?id=LoJ6oXzc_P3}\n}", "github": "", "project": "", "reviewers": "e9r9;rUX6;zM28;MeVg", "site": "https://openreview.net/forum?id=LoJ6oXzc_P3", "pdf_size": 430623, "recommendation": "3;5;5;6", "confidence": "2;3;3;4", "correctness": "2;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "130;46;113;73", "wc_strength_and_weaknesses": "809;203;170;130", "wc_clarity_quality_novelty_and_reproducibility": "64;73;27;31", "wc_summary_review": "18;28;69;28", "wc_review": "1021;350;379;262", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "2064;520;556;373", "reply_reviewers": "0;0;0;0", "reply_authors": "5;3;3;2", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 90.5, 32.98863440641337 ], "wc_strength_and_weaknesses_avg": [ 328.0, 278.90589810902173 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.75, 20.054612935681405 ], "wc_summary_review_avg": [ 35.75, 19.62619423117992 ], "wc_review_avg": [ 503.0, 302.15476167024076 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 878.25, 688.0168511744462 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.9733285267845754, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vt3TAL2PwkwJ:scholar.google.com/&scioq=Stealing+and+Defending+Transformer-based+Encoders&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Toronto;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.utoronto.ca;https://www.google.com", "aff_unique_abbr": "U of T;Google", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Toronto;Mountain View", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Canada;United States" }, { "id": "LoOd40EaGA8", "title": "Challenging Common Assumptions about Catastrophic Forgetting", "track": "main", "status": "Reject", "tldr": "We propose a framework SCoLe (Scaling Continual Learning) to study knowledge accumulation in continual learning with SGD training.", "abstract": "Standard gradient descent algorithms applied to sequences of tasks are known to induce catastrophic forgetting in deep neural networks. When trained on a new task, the model's parameters are updated in a way that degrades performance on past tasks. \nThis article explores continual learning (CL) on long sequences of tasks sampled from a finite environment.\n\\textbf{We show that in this setting, learning with stochastic gradient descent (SGD) results in knowledge retention and accumulation without specific memorization mechanisms.} This is in contrast to the current notion of forgetting from the CL literature, which shows that training on new tasks with such an approach results in forgetting previous tasks, especially in class-incremental settings.\nTo study this phenomenon, we propose an experimental framework, \\Scole{} (Scaling Continual Learning), which allows to generate arbitrarily long task sequences. Our experiments show that the previous results obtained on relatively short task sequences may not reveal certain phenomena that emerge in longer ones.", "keywords": "Continual Learning;Knowledge Accumulation;Scaling", "primary_area": "", "supplementary_material": "", "author": "Timothee LESORT;Oleksiy Ostapenko;Pau Rodriguez;Md Rifat Arefin;Diganta Misra;Laurent Charlin;Irina Rish", "authorids": "~Timothee_LESORT1;~Oleksiy_Ostapenko1;~Pau_Rodriguez2;~Md_Rifat_Arefin1;~Diganta_Misra1;~Laurent_Charlin1;~Irina_Rish1", "gender": "M;M;;;;M;F", "homepage": ";;;;;http://www.cs.toronto.edu/~lcharlin/;http://irina-rish.com", "dblp": ";;;;;48/5717;", "google_scholar": "5NttkuoAAAAJ;mqLVUGgAAAAJ;;;;Cul0g2YAAAAJ;Avse5gIAAAAJ", "orcid": ";;;;;0000-0002-6545-9459;", "linkedin": "https://fr.linkedin.com/in/timoth\u00e9e-lesort-128039aa;;;;;;irina-rish-8b2162", "or_profile": "~Timothee_LESORT1;~Oleksiy_Ostapenko1;~Pau_Rodriguez2;~Md_Rifat_Arefin1;~Diganta_Misra1;~Laurent_Charlin1;~Irina_Rish1", "aff": "Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal;University of Montreal;;;;Mila - Quebec Artificial Intelligence Institute;University of Montreal", "aff_domain": "mila.umontreal.ca;umontreal.ca;;;;mila.quebec;mila.quebec", "position": "Postdoc;PhD student;;;;Principal Researcher;Professor", "bibtex": "@misc{\nlesort2023challenging,\ntitle={Challenging Common Assumptions about Catastrophic Forgetting},\nauthor={Timothee LESORT and Oleksiy Ostapenko and Pau Rodriguez and Md Rifat Arefin and Diganta Misra and Laurent Charlin and Irina Rish},\nyear={2023},\nurl={https://openreview.net/forum?id=LoOd40EaGA8}\n}", "github": "", "project": "", "reviewers": "3eiy;Pyx6;E4AE;DHkU", "site": "https://openreview.net/forum?id=LoOd40EaGA8", "pdf_size": 942506, "recommendation": "3;3;5;6", "confidence": "4;5;4;4", "correctness": "4;2;2;3", "technical_novelty": "1;1;2;3", "empirical_novelty": "1;1;2;3", "wc_summary_paper": "66;129;70;102", "wc_strength_and_weaknesses": "290;487;449;162", "wc_clarity_quality_novelty_and_reproducibility": "125;21;74;56", "wc_summary_review": "12;11;54;49", "wc_review": "493;648;647;369", "wc_reply_reviewers": "0;114;234;0", "wc_reply_authors": "306;844;577;462", "reply_reviewers": "0;1;2;0", "reply_authors": "1;2;3;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 1.75, 0.82915619758885 ], "empirical_novelty_avg": [ 1.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 91.75, 25.635668510885374 ], "wc_strength_and_weaknesses_avg": [ 347.0, 129.88263933259134 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 69.0, 37.52998800959041 ], "wc_summary_review_avg": [ 31.5, 20.081085628023203 ], "wc_review_avg": [ 539.25, 116.79121328250683 ], "wc_reply_reviewers_avg": [ 87.0, 96.79359482941007 ], "wc_reply_authors_avg": [ 547.25, 196.4782112601802 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.5555555555555555, "corr_recommendation_correctness": -0.17407765595569782, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16381541815462330384&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Montreal;Quebec Artificial Intelligence Institute", "aff_unique_dep": "Montreal Institute for Learning Algorithms;Artificial Intelligence", "aff_unique_url": "https://www.umontreal.ca;https://mila.quebec", "aff_unique_abbr": "UM;Mila", "aff_campus_unique_index": "0", "aff_campus_unique": "Montreal;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Fast Sampling of Diffusion Models with Exponential Integrator", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10904", "id": "Loek7hfb46P", "poster": "", "openreview": "https://openreview.net/forum?id=Loek7hfb46P", "slides": "https://iclr.cc/virtual/2023/poster/10904", "video": "https://iclr.cc/virtual/2023/poster/10904", "author_site": "Qinsheng Zhang, Yongxin Chen", "tldr": "Training-free acceleration for diffusion model, 4.17 FID with 10 NFEs on CIFAR10", "abstract": "The past few years have witnessed the great success of Diffusion models~(DMs) in generating high-fidelity samples in generative modeling tasks. A major limitation of the DM is its notoriously slow sampling procedure which normally requires hundreds to thousands of time discretization steps of the learned diffusion process to reach the desired accuracy. Our goal is to develop a fast sampling method for DMs with a much less number of steps while retaining high sample quality. To this end, we systematically analyze the sampling procedure in DMs and identify key factors that affect the sample quality, among which the method of discretization is most crucial. By carefully examining the learned diffusion process, we propose Diffusion Exponential Integrator Sampler~(DEIS). It is based on the Exponential Integrator designed for discretizing ordinary differential equations (ODEs) and leverages a semilinear structure of the learned diffusion process to reduce the discretization error. The proposed method can be applied to any DMs and can generate high-fidelity samples in as few as 10 steps. Moreover, by directly using pre-trained DMs, we achieve state-of-art sampling performance when the number of score function evaluation~(NFE) is limited, e.g., 4.17 FID with 10 NFEs, 2.86 FID with only 20 NFEs on CIFAR10.", "keywords": "Fast diffusion model;generative model", "primary_area": "", "supplementary_material": "/attachment/9fc043477b28bec771c25d67491bae8c1050e0ae.zip", "author": "Qinsheng Zhang;Yongxin Chen", "authorids": "~Qinsheng_Zhang1;~Yongxin_Chen1", "gender": "M;M", "homepage": "https://qsh-zh.github.io/;https://yongxin.ae.gatech.edu/", "dblp": ";", "google_scholar": ";X8BYiV4AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Qinsheng_Zhang1;~Yongxin_Chen1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023fast,\ntitle={Fast Sampling of Diffusion Models with Exponential Integrator},\nauthor={Qinsheng Zhang and Yongxin Chen},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Loek7hfb46P}\n}", "github": "", "project": "", "reviewers": "5JHW;HoiQ;xzme;xTdk", "pdf_size": 35184163, "recommendation": "5;6;6;8", "confidence": "2;3;3;4", "correctness": "3;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "17;115;107;37", "wc_strength_and_weaknesses": "76;191;568;332", "wc_clarity_quality_novelty_and_reproducibility": "36;45;56;371", "wc_summary_review": "29;79;44;103", "wc_review": "158;430;775;843", "wc_reply_reviewers": "0;0;68;0", "wc_reply_authors": "480;363;1318;819", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;2", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 69.0, 42.68489194082609 ], "wc_strength_and_weaknesses_avg": [ 291.75, 183.4616785598562 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 127.0, 141.051409067758 ], "wc_summary_review_avg": [ 63.75, 29.029080247227952 ], "wc_review_avg": [ 551.5, 275.9207313704427 ], "wc_reply_reviewers_avg": [ 17.0, 29.444863728670914 ], "wc_reply_authors_avg": [ 745.0, 370.7944174337041 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.9733285267845754, "corr_recommendation_correctness": 0.0, "gs_citation": 425, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11407986920537847246&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=Loek7hfb46P", "email": "gatech.edu;gatech.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "LofRPZeXNNk", "title": "Preserving Semantics in Textual Adversarial Attacks", "track": "main", "status": "Reject", "tldr": "We propose a novel sentence encoder that improves the quality of textual adversarial attacks.", "abstract": "Adversarial attacks in NLP challenge the way we look at language models. The goal of this kind of adversarial attack is to modify the input text to fool a classifier while maintaining the original meaning of the text. Although most existing adversarial attacks claim to fulfill the constraint of semantics preservation, careful scrutiny shows otherwise. We show that the problem lies in the text encoders used to determine the similarity of adversarial examples, specifically in the way they are trained. Unsupervised training methods make these encoders more susceptible to problems with antonym recognition. To overcome this, we introduce a simple, fully supervised sentence embedding technique called Semantics-Preserving-Encoder (SPE). The results show that our solution minimizes the variation in the meaning of the adversarial examples generated. It also significantly improves the overall quality of adversarial examples, as confirmed by human evaluators. Furthermore, it can be used as a component in any existing attack to speed up its execution while maintaining similar attack success.", "keywords": "NLP;Adversarial Attacks;Sentence Encoders;Semantics Similarity", "primary_area": "", "supplementary_material": "", "author": "David Herel;Hugo Cisneros;Tomas Mikolov", "authorids": "~David_Herel1;~Hugo_Cisneros1;~Tomas_Mikolov2", "gender": "M;M;", "homepage": "https://davidherel.com;https://hugocisneros.com;", "dblp": ";https://dblp.uni-trier.de/pid/238/0166;", "google_scholar": "l8jIktAAAAAJ;_uSIl-0AAAAJ;https://scholar.google.cz/citations?user=oBu8kMMAAAAJ", "orcid": ";0000-0003-3439-4565;", "linkedin": ";;", "or_profile": "~David_Herel1;~Hugo_Cisneros1;~Tomas_Mikolov2", "aff": "CIIRC, Czech Technical University, Czech Technical University of Prague;INRIA;CIIRC, Czech Technical University, Czech Technical University of Prague", "aff_domain": "ciirc.cvut.cz;inria.fr;ciirc.cvut.cz", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nherel2023preserving,\ntitle={Preserving Semantics in Textual Adversarial Attacks},\nauthor={David Herel and Hugo Cisneros and Tomas Mikolov},\nyear={2023},\nurl={https://openreview.net/forum?id=LofRPZeXNNk}\n}", "github": "", "project": "", "reviewers": "6oWy;dmKS;QUPU", "site": "https://openreview.net/forum?id=LofRPZeXNNk", "pdf_size": 334858, "recommendation": "3;3;5", "confidence": "4;3;4", "correctness": "3;2;2", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "63;66;97", "wc_strength_and_weaknesses": "275;170;399", "wc_clarity_quality_novelty_and_reproducibility": "8;22;83", "wc_summary_review": "32;36;70", "wc_review": "378;294;649", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "400;361;336", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 75.33333333333333, 15.369522511198005 ], "wc_strength_and_weaknesses_avg": [ 281.3333333333333, 93.59605879641633 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.666666666666664, 32.561053764001905 ], "wc_summary_review_avg": [ 46.0, 17.048949136725895 ], "wc_review_avg": [ 440.3333333333333, 151.4823055306754 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 365.6666666666667, 26.335442953471574 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": -0.5, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7809180558108203357&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Czech Technical University;INRIA", "aff_unique_dep": "CIIRC;", "aff_unique_url": "https://www.cvut.cz;https://www.inria.fr", "aff_unique_abbr": "CTU;INRIA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Prague;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Czech Republic;France" }, { "id": "Lq2rsxfNt8", "title": "Fusion of Deep Transfer Learning with Mixed convolution network", "track": "main", "status": "Withdraw", "tldr": "Fusion of Deep Transfer Learning", "abstract": "Global thirst in computer vision for image classification is performance improvement and parameter optimization. The evolution of deep learning raised the boundaries of the model size to hundreds of millions of parameters. This obliquely influences the training time of the model. Thus, contemporary research has diverted to parameter optimization par with performance. In this paper, a fusion-based deep transfer learning approach has been furbished with a mixed convolution block. The proposed mixed convolution block has been designed using two convolution paths including residual and separable convolutions. The residual convolution avoids vanishing gradient while separable convolution includes depthwise features. The experiments on the popular Fashion-MNIST bench mark dataset have proved that the proposed mixed convolution enticed the pre-trained models. It has been observed that there is a clear improvement of 1\\% than the base models. Further, the proposed fusion model exhibits a competing performance of 96.04\\% with existing models.", "keywords": "Deep Transfer Learning;Fusion model;Mixed Convolution Network;Feature Enhancement Network", "primary_area": "", "supplementary_material": "", "author": "Isunuri BV", "authorids": "~Isunuri_BV1", "gender": "M", "homepage": "", "dblp": "", "google_scholar": "2nux5iMAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Isunuri_BV1", "aff": "IIITDM Kancheepuram", "aff_domain": "iiitdm.ac.in", "position": "PhD student", "bibtex": "@misc{\nbv2023fusion,\ntitle={Fusion of Deep Transfer Learning with Mixed convolution network},\nauthor={Isunuri BV},\nyear={2023},\nurl={https://openreview.net/forum?id=Lq2rsxfNt8}\n}", "github": "", "project": "", "reviewers": "n7XU;zEhz;kVQC;BG17", "site": "https://openreview.net/forum?id=Lq2rsxfNt8", "pdf_size": 236921, "recommendation": "1;1;1;3", "confidence": "4;4;5;3", "correctness": "2;3;1;3", "technical_novelty": "1;1;2;1", "empirical_novelty": "1;1;0;1", "wc_summary_paper": "14;46;25;61", "wc_strength_and_weaknesses": "147;101;178;121", "wc_clarity_quality_novelty_and_reproducibility": "2;90;38;18", "wc_summary_review": "31;93;18;48", "wc_review": "194;330;259;248", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 1.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 1.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 0.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 36.5, 18.227726133558185 ], "wc_strength_and_weaknesses_avg": [ 136.75, 28.86498744153546 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.0, 33.15116890850155 ], "wc_summary_review_avg": [ 47.5, 28.341665441536776 ], "wc_review_avg": [ 257.75, 48.427135987997474 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IkWSYieAVjQJ:scholar.google.com/&scioq=Fusion+of+Deep+Transfer+Learning+with+Mixed+convolution+network&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Indian Institute of Information Technology Design and Manufacturing Kancheepuram", "aff_unique_dep": "", "aff_unique_url": "https://www.iiitdm.ac.in", "aff_unique_abbr": "IIITDM Kancheepuram", "aff_campus_unique_index": "0", "aff_campus_unique": "Kancheepuram", "aff_country_unique_index": "0", "aff_country_unique": "India" }, { "title": "Planning with Large Language Models for Code Generation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11072", "id": "Lr8cOOtYbfL", "poster": "", "openreview": "https://openreview.net/forum?id=Lr8cOOtYbfL", "slides": "https://iclr.cc/virtual/2023/poster/11072", "video": "https://iclr.cc/virtual/2023/poster/11072", "author_site": "Shun Zhang, Zhenfang Chen, Yikang Shen, Mingyu Ding, Joshua B Tenenbaum, Chuang Gan", "tldr": "We provide a novel framework for code generation by combining the advantages of a large language model and a planning algorithm.", "abstract": "Existing large language model-based code generation pipelines typically use beam search or sampling algorithms during the decoding process. Although the programs they generate achieve high token-matching-based scores, they often fail to compile or generate incorrect outputs. The main reason is that conventional Transformer decoding algorithms may not be the best choice for code generation. In this work, we propose a novel Transformer decoding algorithm, Planning-Guided Transformer Decoding (PG-TD), that uses a planning algorithm to do lookahead search and guide the Transformer to generate better programs. Specifically, instead of simply optimizing the likelihood of the generated sequences, the Transformer makes use of a planner that generates candidate programs and tests them on public test cases. The Transformer can therefore make more informed decisions and generate tokens that will eventually lead to higher-quality programs. We also design a mechanism that shares information between the Transformer and the planner to make our algorithm computationally efficient. We empirically evaluate our framework with several large language models as backbones on public coding challenge benchmarks, showing that 1) it can generate programs that consistently achieve higher performance compared with competing baseline methods; 2) it enables controllable code generation, such as concise codes and highly-commented codes by optimizing modified objective.", "keywords": "Large Language Model;Code Generation;Planning", "primary_area": "", "supplementary_material": "", "author": "Shun Zhang;Zhenfang Chen;Yikang Shen;Mingyu Ding;Joshua B. Tenenbaum;Chuang Gan", "authorids": "~Shun_Zhang6;~Zhenfang_Chen1;~Yikang_Shen1;~Mingyu_Ding1;~Joshua_B._Tenenbaum1;~Chuang_Gan1", "gender": ";M;M;M;;M", "homepage": "https://shunzh.github.io/;https://zfchenunique.github.io;;https://dingmyu.github.io/;;http://people.csail.mit.edu/ganchuang/", "dblp": ";207/5321;152/8226;188/5243;t/JoshuaBTenenbaum;139/6993", "google_scholar": ";QSRdIzAAAAAJ;qff5rRYAAAAJ;w4yTWwoAAAAJ;;PTeSCbIAAAAJ", "orcid": ";;;0000-0001-6556-8359;;", "linkedin": ";\u632f\u65b9-\u9648-512011bb/;;dingmyu/;;", "or_profile": "~Shun_Zhang6;~Zhenfang_Chen1;~Yikang_Shen1;~Mingyu_Ding1;~Joshua_B._Tenenbaum1;~Chuang_Gan1", "aff": "MIT-IBM Watson AI Lab;MIT-IBM Watson AI lab;International Business Machines;University of California, Berkeley;Massachusetts Institute of Technology;MIT-IBM Watson AI Lab", "aff_domain": "ibm.com;ibm.com;ibm.com;berkeley.edu;mit.edu;ibm.com", "position": "Researcher;Researcher;Researcher;Postdoc;Professor;PhD student", "bibtex": "@inproceedings{\nzhang2023planning,\ntitle={Planning with Large Language Models for Code Generation},\nauthor={Shun Zhang and Zhenfang Chen and Yikang Shen and Mingyu Ding and Joshua B. Tenenbaum and Chuang Gan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Lr8cOOtYbfL}\n}", "github": "", "project": "", "reviewers": "fkta;FcyD;kmKW;Xzif", "pdf_size": 912011, "recommendation": "3;6;8;8", "confidence": "4;4;4;3", "correctness": "3;3;3;4", "technical_novelty": "2;3;2;4", "empirical_novelty": "2;3;3;0", "wc_summary_paper": "82;110;223;59", "wc_strength_and_weaknesses": "522;283;442;66", "wc_clarity_quality_novelty_and_reproducibility": "14;416;574;86", "wc_summary_review": "129;183;269;31", "wc_review": "747;992;1508;242", "wc_reply_reviewers": "0;465;1038;0", "wc_reply_authors": "1475;2863;2645;224", "reply_reviewers": "0;4;3;0", "reply_authors": "4;8;5;1", "recommendation_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 118.5, 62.978170821325065 ], "wc_strength_and_weaknesses_avg": [ 328.25, 174.14128602947665 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 272.5, 230.8132361889153 ], "wc_summary_review_avg": [ 153.0, 86.3365507765975 ], "wc_review_avg": [ 872.25, 455.9113811915645 ], "wc_reply_reviewers_avg": [ 375.75, 426.8831075364777 ], "wc_reply_authors_avg": [ 1801.75, 1052.7799805752388 ], "reply_reviewers_avg": [ 1.75, 1.7853571071357126 ], "reply_authors_avg": [ 4.5, 2.5 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.49374193110101877, "corr_recommendation_correctness": 0.49374193110101877, "gs_citation": 177, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12393959577946554002&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=Lr8cOOtYbfL", "email": "ibm.com;ibm.com;ibm.com;berkeley.edu;mit.edu;ibm.com", "author_num": 6, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;International Business Machines Corporation;University of California, Berkeley", "aff_unique_dep": "IBM Watson AI Lab;;", "aff_unique_url": "https://www.mitibmwatsonailab.org;https://www.ibm.com;https://www.berkeley.edu", "aff_unique_abbr": "MIT-IBM AI Lab;IBM;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Lrxaf7IPVT", "title": "On the Importance of Diversity in Data-free Model Stealing", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Machine learning as a Service (MLaaS) allows users to query the machine learning model in an API manner, which provides an opportunity for users to enjoy the benefits brought by the high-performance model trained on valuable data. This interface boosts the flourish of machine learning based applications, while on the other hand, introduces the attack surface for model stealing attacks. Existing model stealing attacks have relaxed their attack assumptions to the data-free setting, while keeping the effectiveness. However, these methods are complex and consist of several components, which obscure the core on which the attack really depends. In this paper, we revisit the model stealing problem from a diversity perspective and demonstrate that keeping the generated data samples more diverse across all the classes is the critical point for improving the attack performance. Based on this conjecture, we provide a simplified attack framework. We empirically signify our conjecture by evaluating the effectiveness of our attack, and experimental results show that our approach is able to achieve comparable or even better performance compared with the state-of-the-art method. Furthermore, benefiting from the absence of redundant components, our method demonstrates its advantages in attack efficiency and query budget.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yiyong Liu;Rui Wen;Michael Backes;Yang Zhang", "authorids": "~Yiyong_Liu1;~Rui_Wen3;director@cispa.de;~Yang_Zhang15", "gender": "M;M;;M", "homepage": "https://liu199604.github.io/;https://ruiwen-ai.github.io/;;https://yangzhangalmo.github.io/", "dblp": "284/7130;63/10765-2;;06/6785-16", "google_scholar": ";https://scholar.google.com/citations?hl=en;;Xeb2888AAAAJ", "orcid": ";0009-0009-0691-7569;;0000-0003-3612-7348", "linkedin": ";;;", "or_profile": "~Yiyong_Liu1;~Rui_Wen3;director@cispa.de;~Yang_Zhang15", "aff": "CISPA, saarland university, saarland informatics campus;CISPA Helmholtz Center for Information Security;;CISPA Helmholtz Center for Information Security", "aff_domain": "cispa.saarland;cispa.de;;cispa.de", "position": "PhD student;PhD student;;Assistant Professor", "bibtex": "@misc{\nliu2023on,\ntitle={On the Importance of Diversity in Data-free Model Stealing},\nauthor={Yiyong Liu and Rui Wen and Michael Backes and Yang Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=Lrxaf7IPVT}\n}", "github": "", "project": "", "reviewers": "rt7N;WeXM;ZT6s;h1zp", "site": "https://openreview.net/forum?id=Lrxaf7IPVT", "pdf_size": 2249470, "recommendation": "3;3;3;5", "confidence": "4;4;5;4", "correctness": "2;2;4;3", "technical_novelty": "2;2;1;2", "empirical_novelty": "2;2;1;2", "wc_summary_paper": "63;127;158;134", "wc_strength_and_weaknesses": "243;518;168;272", "wc_clarity_quality_novelty_and_reproducibility": "157;135;32;21", "wc_summary_review": "28;53;167;34", "wc_review": "491;833;525;461", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 120.5, 35.131894341182345 ], "wc_strength_and_weaknesses_avg": [ 300.25, 131.3209332132543 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 86.25, 60.37952881565076 ], "wc_summary_review_avg": [ 70.5, 56.47344508704954 ], "wc_review_avg": [ 577.5, 149.24057759202086 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GVjcre4P5fwJ:scholar.google.com/&scioq=On+the+Importance+of+Diversity+in+Data-free+Model+Stealing&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Saarland University;CISPA Helmholtz Center for Information Security", "aff_unique_dep": "CISPA;", "aff_unique_url": "https://www.uni-saarland.de;https://www.cispa.de/", "aff_unique_abbr": "Saarland U;CISPA", "aff_campus_unique_index": "0", "aff_campus_unique": "Saarland Informatics Campus;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "DeCap: Decoding CLIP Latents for Zero-Shot Captioning via Text-Only Training", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11202", "id": "Lt8bMlhiwx2", "poster": "", "openreview": "https://openreview.net/forum?id=Lt8bMlhiwx2", "slides": "https://iclr.cc/virtual/2023/poster/11202", "video": "https://iclr.cc/virtual/2023/poster/11202", "author_site": "wei li, Linchao Zhu, Longyin Wen, Yi Yang", "tldr": "We propose a simple framework with a lightweight visual-aware language decoder for zero-shot captioning.", "abstract": "Large-scale pre-trained multi-modal models (e.g., CLIP) demonstrate strong zero-shot transfer capability in many discriminative tasks, e.g., image classification. Their adaptation to zero-shot image-conditioned text generation tasks has drawn increasing interest. Prior arts approach to zero-shot captioning by either utilizing the existing large language models (e.g., GPT-2) or pre-training the encoder-decoder network in an end-to-end manner. However, the large language models may not generate sensible descriptions due to the task discrepancy between captioning and language modeling, while the end-to-end pre-training requires paired data and extensive computational resources. In this work, we propose a simple framework, named DeCap, for zero-shot captioning. We introduce a lightweight visual-aware language decoder. This decoder is both data-efficient and computation-efficient: 1) it only requires the \\textit{text} data for training, easing the burden on the collection of paired data. 2) it does not require end-to-end training. When trained with text-only data, the decoder takes the text embedding extracted from the off-the-shelf CLIP encoder as a prefix embedding. The challenge is that the decoder is trained on the text corpus but at the inference stage, it needs to generate captions based on visual inputs. Though the CLIP text embedding and the visual embedding are correlated, the \\textit{modality gap} issue is widely observed in multi-modal contrastive models that prevents us from directly taking the visual embedding as the prefix embedding. We propose a training-free mechanism to reduce the modality gap. We project the visual embedding into the CLIP text embedding space, while the projected embedding retains the information of the visual input. Taking the projected embedding as the prefix embedding, the decoder generates high-quality descriptions that match the visual input. The experiments show that DeCap outperforms other zero-shot captioning methods and unpaired captioning methods by a large margin on the typical image captioning benchmarks, i.e., MSCOCO and NoCaps. We apply DeCap to video captioning and achieve state-of-the-art zero-shot performance on MSR-VTT and ActivityNet-Captions. The code is available at https://github.com/dhg-wei/DeCap.", "keywords": "Zero-shot captioning;Decoder training;Multi-modal learning", "primary_area": "", "supplementary_material": "", "author": "Wei Li;Linchao Zhu;Longyin Wen;Yi Yang", "authorids": "~Wei_Li55;~Linchao_Zhu1;~Longyin_Wen1;~Yi_Yang22", "gender": "M;M;M;M", "homepage": "https://github.com/lw-2018;http://ffmpbgrnn.github.io/;;https://person.zju.edu.cn/yiyang", "dblp": ";172/1383.html;119/1468;33/4854-1.html", "google_scholar": "hDubMJwAAAAJ;9ZukE28AAAAJ;5HDWtHsAAAAJ;RMSuNFwAAAAJ", "orcid": ";;0000-0001-5525-492X;", "linkedin": ";;longyin-wen-16934689/;", "or_profile": "~Wei_Li55;~Linchao_Zhu1;~Longyin_Wen1;~Yi_Yang22", "aff": "National University of Singapore;Zhejiang University;Bytedance Inc.;Zhejiang University", "aff_domain": "nus.edu;zju.edu.cn;bytedance.com;zju.edu.cn", "position": "Intern;Assistant Professor;Research Manager;Full Professor", "bibtex": "@inproceedings{\nli2023decap,\ntitle={DeCap: Decoding {CLIP} Latents for Zero-Shot Captioning via Text-Only Training},\nauthor={Wei Li and Linchao Zhu and Longyin Wen and Yi Yang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Lt8bMlhiwx2}\n}", "github": "", "project": "", "reviewers": "CANm;1suB;Sz68;oCpD;MHPt;9VQ6", "pdf_size": 4608030, "recommendation": "6;6;6;6;6;8", "confidence": "4;4;5;4;4;4", "correctness": "3;2;3;3;3;3", "technical_novelty": "3;2;3;3;2;3", "empirical_novelty": "3;2;3;3;3;3", "wc_summary_paper": "111;86;62;84;130;54", "wc_strength_and_weaknesses": "292;200;227;337;330;155", "wc_clarity_quality_novelty_and_reproducibility": "65;16;4;14;97;9", "wc_summary_review": "54;41;129;167;40;47", "wc_review": "522;343;422;602;597;265", "wc_reply_reviewers": "19;0;24;274;0;22", "wc_reply_authors": "1401;771;1081;2547;1403;809", "reply_reviewers": "1;0;1;2;0;1", "reply_authors": "4;1;3;6;3;2", "recommendation_avg": [ 6.333333333333333, 0.7453559924999298 ], "confidence_avg": [ 4.166666666666667, 0.372677996249965 ], "correctness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_paper_avg": [ 87.83333333333333, 26.28318008832941 ], "wc_strength_and_weaknesses_avg": [ 256.8333333333333, 67.70873076806433 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.166666666666664, 34.59005958685562 ], "wc_summary_review_avg": [ 79.66666666666667, 49.75830472281877 ], "wc_review_avg": [ 458.5, 126.43937941427373 ], "wc_reply_reviewers_avg": [ 56.5, 97.76118861797866 ], "wc_reply_authors_avg": [ 1335.3333333333333, 596.8265149010127 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.6871842709362768 ], "reply_authors_avg": [ 3.1666666666666665, 1.5723301886761007 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.2, "corr_recommendation_correctness": 0.19999999999999998, "gs_citation": 115, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16101070388819196729&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=Lt8bMlhiwx2", "email": "nus.edu;zju.edu.cn;bytedance.com;zju.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "National University of Singapore;Zhejiang University;Bytedance Inc.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://www.zju.edu.cn;https://www.bytedance.com", "aff_unique_abbr": "NUS;ZJU;Bytedance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Singapore;China" }, { "id": "Lv3MfAEgvVv", "title": "Hyperbolic Binary Neural Network", "track": "main", "status": "Withdraw", "tldr": "We propose a Hyperbolic Binary Neural Network that updates the parameters in hyperbolic space.", "abstract": "Binary Neural Network (BNN) converts the full-precision weights and activations to the extreme 1-bit counterparts, which is especially suitable to be deployed on lightweight mobile devices. Neural network binarization is usually formulated as a constrained optimization problem, which restricts its optimized potential. In this paper, we introduce the dynamic exponential map that converts a constrained problem in the Riemannian manifold into an unconstrained one in the Euclidean space. Specifically, we propose a Hyperbolic Binary Neural Network (HBNN) by representing the parameter vector in the Euclidean space as the one in the hyperbolic space, which would enable us to optimize the parameter in an unconstrained space. By analyzing the parameterized representation, we present that the dynamic exponential map is a diffeomorphism in the Poincar\u00e9 ball. Theoretically, this property will not create extra saddle points or local minima in the Poincar\u00e9 ball, which also explains the good performance of the HBNN. Experiments on CIFAR10, CIFAR100, and ImageNet classification datasets with VGGsmall, ResNet18, and ResNet34 demonstrate the superiorities of our HBNN over existing state-of-the-art methods.", "keywords": "Neural network quantization;Hyperbolic geometry;Riemannian manifold", "primary_area": "", "supplementary_material": "", "author": "Jun Chen;Jingyang Xiang;Tianxin Huang;Xiangrui Zhao;Yong Liu", "authorids": "~Jun_Chen9;~Jingyang_Xiang2;~Tianxin_Huang1;~Xiangrui_Zhao1;~Yong_Liu11", "gender": "M;;M;M;M", "homepage": ";;https://tianxinhuang.github.io/;http://www.zju.edu.cn;https://person.zju.edu.cn/en/yongliu", "dblp": ";;251/3784;;29/4867-7", "google_scholar": "YKc2O78AAAAJ;;https://scholar.google.com.hk/citations?user=Fg7WYfcAAAAJ;;https://scholar.google.com.hk/citations?user=qYcgBbEAAAAJ", "orcid": "0000-0001-6568-8801;;;;0000-0003-4822-8939", "linkedin": ";;;;", "or_profile": "~Jun_Chen9;~Jingyang_Xiang2;~Tianxin_Huang1;~Xiangrui_Zhao1;~Yong_Liu11", "aff": "Zhejiang University;;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;;PhD student;PhD student;Full Professor", "bibtex": "@misc{\nchen2023hyperbolic,\ntitle={Hyperbolic Binary Neural Network},\nauthor={Jun Chen and Jingyang Xiang and Tianxin Huang and Xiangrui Zhao and Yong Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=Lv3MfAEgvVv}\n}", "github": "", "project": "", "reviewers": "Visi;nZ7A;y8sU;r9yP", "site": "https://openreview.net/forum?id=Lv3MfAEgvVv", "pdf_size": 580988, "recommendation": "1;3;5;6", "confidence": "5;5;3;3", "correctness": "1;3;3;3", "technical_novelty": "1;2;3;3", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "47;101;35;68", "wc_strength_and_weaknesses": "297;136;80;56", "wc_clarity_quality_novelty_and_reproducibility": "13;66;12;401", "wc_summary_review": "5;210;52;58", "wc_review": "362;513;179;583", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.75, 1.920286436967152 ], "confidence_avg": [ 4.0, 1.0 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 62.75, 25.043711785596 ], "wc_strength_and_weaknesses_avg": [ 142.25, 93.94246909678284 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 123.0, 161.9830238018787 ], "wc_summary_review_avg": [ 81.25, 77.11476836508037 ], "wc_review_avg": [ 409.25, 155.0812287157927 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.911322376865767, "corr_recommendation_correctness": 0.8268106308031117, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KI15thtlGFIJ:scholar.google.com/&scioq=Hyperbolic+Binary+Neural+Network&hl=en&as_sdt=0,33", "gs_version_total": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "LvwOdSbB9Ic", "title": "Minibatch Stochastic Three Points Method for Unconstrained Smooth Minimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this paper, we propose a new zero order optimization method called minibatch stochastic three points (MiSTP) method to solve an unconstrained minimization problem in a setting where only an approximation of the objective function evaluation is possible. It is based on the recently proposed stochastic three points (STP) method (Bergou et al., 2020). At each iteration, MiSTP generates a random search direction in a similar manner to STP, but chooses the next iterate based solely on the approximation of the objective function rather than its exact evaluations. We also analyze our method\u2019s complexity in the nonconvex and convex cases and evaluate its performance on multiple machine learning tasks.", "keywords": "Zero-order optimization;Machine Learning.", "primary_area": "", "supplementary_material": "/attachment/c0940d703e53b7b220ed1d879c373db6f6a3bcda.zip", "author": "Soumia Boucherouite;Grigory Malinovsky;Peter Richt\u00e1rik;El houcine Bergou", "authorids": "~Soumia_Boucherouite1;~Grigory_Malinovsky1;~Peter_Richt\u00e1rik1;~El_houcine_Bergou1", "gender": "F;M;M;M", "homepage": ";https://grigory-malinovsky.github.io;https://ecrc.kaust.edu.sa/Pages/Bergou.aspx;https://richtarik.org", "dblp": ";262/3277.html;https://dblp.uni-trier.de/pers/b/Bergou:El_Houcine.html;62/8001", "google_scholar": ";4w2W9KQAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0003-4380-5848", "linkedin": "soumia-boucherouite;;ehbergou/;richtarik/", "or_profile": "~Soumia_Boucherouite1;~Grigory_Malinovsky1;~El_houcine_Bergou1;~Peter_Richtarik1", "aff": "College of Computing - Mohammed VI Polytechnic University;King Abdullah University of Science and Technology;;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "um6p.ma;kaust.edu.sa;;kaust.edu.sa", "position": "PhD student;PhD student;;Full Professor", "bibtex": "@misc{\nboucherouite2023minibatch,\ntitle={Minibatch Stochastic Three Points Method for Unconstrained Smooth Minimization},\nauthor={Soumia Boucherouite and Grigory Malinovsky and Peter Richt{\\'a}rik and El houcine Bergou},\nyear={2023},\nurl={https://openreview.net/forum?id=LvwOdSbB9Ic}\n}", "github": "", "project": "", "reviewers": "o1C6;KsoD;jzrD;dcdP", "site": "https://openreview.net/forum?id=LvwOdSbB9Ic", "pdf_size": 1243550, "recommendation": "3;5;5;5", "confidence": "5;4;3;3", "correctness": "3;2;3;4", "technical_novelty": "1;2;3;2", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "73;42;40;83", "wc_strength_and_weaknesses": "108;239;146;142", "wc_clarity_quality_novelty_and_reproducibility": "21;38;23;69", "wc_summary_review": "12;31;45;58", "wc_review": "214;350;254;352", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "373;468;331;322", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 59.5, 18.848076824970764 ], "wc_strength_and_weaknesses_avg": [ 158.75, 48.6280526034099 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.75, 19.201236939322424 ], "wc_summary_review_avg": [ 36.5, 17.066048165876012 ], "wc_review_avg": [ 292.5, 60.18928476066151 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 373.5, 57.85542325486868 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8703882797784891, "corr_recommendation_correctness": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2446787015516382234&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff_unique_index": "0;1;1", "aff_unique_norm": "Mohammed VI Polytechnic University;King Abdullah University of Science and Technology", "aff_unique_dep": "College of Computing;", "aff_unique_url": "https://www.um6p.ma;https://www.kast.kau.edu.sa", "aff_unique_abbr": "UM6P;KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Morocco;Saudi Arabia" }, { "id": "LxEfHeknf4z", "title": "CompletionFormer: Depth Completion with Convolutions and Vision Transformers", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "This paper proposes a joint convolutional attention and Transformer block, which deeply couples the convolutional layer and Vision Transformer into one block, as the basic unit to construct our depth completion model in a pyramidal structure. This hybrid structure naturally benefits both the local connectivity of convolutions and the global context of the Transformer in one single model. As a result, our CompletionFormer outperforms state-of-the-art CNNs-based methods on the outdoor KITTI Depth Completion benchmark and indoor NYUv2 dataset, achieving significantly higher efficiency (near 1/3 FLOPs) compared to pure Transformer-based methods. Especially when the captured depth is highly sparse, the performance gap with other methods gets much larger.", "keywords": "Depth;Depth Completion;Vision Transformer;Computer Vision", "primary_area": "", "supplementary_material": "", "author": "Youmin Zhang;xianda guo;Matteo Poggi;Zheng Zhu;Guan Huang;Stefano Mattoccia", "authorids": "~Youmin_Zhang1;~xianda_guo1;~Matteo_Poggi1;~Zheng_Zhu1;~Guan_Huang1;~Stefano_Mattoccia3", "gender": "M;M;;M;M;M", "homepage": "https://youmi-zym.github.io;https://xiandaguo.net/;https://vision.disi.unibo.it/~mpoggi/;http://www.zhengzhu.net/;;https://stefanomattoccia.github.io/", "dblp": "119/5917-8;315/4454;167/0722;29/4319.html/;93/10768.html/;05/6147", "google_scholar": "qLiVWVwAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.it/citations?user=bve0VwgAAAAJ;https://scholar.google.com.hk/citations?user=NmwjI0AAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0003-2822-4690;0000-0002-3337-2236;;;0000-0002-3681-7704", "linkedin": ";;;;;stefano-mattoccia-4a17901b?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BfHUJAhFCT7mrHOeaOvsyXw%3D%3D", "or_profile": "~Youmin_Zhang1;~xianda_guo1;~Matteo_Poggi1;~Zheng_Zhu1;~Guan_Huang1;~Stefano_Mattoccia3", "aff": "University of Bologna;PhiGent Robotics;University di Bologna;PhiGent Robotics;Xforward AI Technology;University of Bologna", "aff_domain": "unibo.it;phigent.ai;unibo.it;phigent.ai;xforwardai.com;unibo.it", "position": "PhD student;Researcher;Assistant Professor;Researcher;Researcher;Associate Professor", "bibtex": "@misc{\nzhang2023completionformer,\ntitle={CompletionFormer: Depth Completion with Convolutions and Vision Transformers},\nauthor={Youmin Zhang and xianda guo and Matteo Poggi and Zheng Zhu and Guan Huang and Stefano Mattoccia},\nyear={2023},\nurl={https://openreview.net/forum?id=LxEfHeknf4z}\n}", "github": "", "project": "", "reviewers": "MwL9;ku5N;94wz;rvn1", "site": "https://openreview.net/forum?id=LxEfHeknf4z", "pdf_size": 3510948, "recommendation": "1;3;6;6", "confidence": "4;4;5;3", "correctness": "2;3;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "70;51;53;108", "wc_strength_and_weaknesses": "233;209;274;149", "wc_clarity_quality_novelty_and_reproducibility": "55;107;26;352", "wc_summary_review": "15;34;14;66", "wc_review": "373;401;367;675", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 2.1213203435596424 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 70.5, 22.874658467395747 ], "wc_strength_and_weaknesses_avg": [ 216.25, 45.25138119439008 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 135.0, 128.60209951629872 ], "wc_summary_review_avg": [ 32.25, 21.05201890555868 ], "wc_review_avg": [ 454.0, 128.2380598730346 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8164965809277261, "gs_citation": 119, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3935870154155135926&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0;1;2;0", "aff_unique_norm": "University of Bologna;PhiGent Robotics;Xforward AI Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unibo.it;;", "aff_unique_abbr": "Unibo;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;2;0", "aff_country_unique": "Italy;;China" }, { "id": "LzPN-BHiJuc", "title": "Linearly Constrained Bilevel Optimization: A Smoothed Implicit Gradient Approach", "track": "main", "status": "Reject", "tldr": "", "abstract": "This work develops analysis and algorithms for solving a class of bilevel optimization problems where the lower-level (LL) problems have linear constraints. Most of the existing approaches for constrained bilevel problems rely on value function based approximate reformulations, which suffer from issues such as non-convex and non-differentiable constraints. In contrast, in this work, we develop an implicit gradient-based approach, which is easy to implement, and is suitable for machine learning applications. We first provide in-depth understanding of the problem, by showing that the implicit objective for such problems is in general non-differentiable. However, if we add some small (linear) perturbation to the LL objective, the resulting problem becomes differentiable almost surely. This key observation opens the door for developing (deterministic and stochastic) gradient-based algorithms similar to the state-of-the-art ones for unconstrained bi-level problems. We show that when the implicit function is assumed to be strongly-convex, convex and non-convex, the resulting algorithms converge with guaranteed rate. Finally, we experimentally corroborate the theoretical findings and evaluate the performance of the proposed framework on numerical and adversarial learning problems. To our knowledge, this is the first time that (implicit) gradient-based methods have been developed and analyzed for the considered class of bilevel problems.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Prashant Khanduri;Ioannis Tsaknakis;Yihua Zhang;Jia Liu;Sijia Liu;Jiawei Zhang;Mingyi Hong", "authorids": "~Prashant_Khanduri1;~Ioannis_Tsaknakis1;~Yihua_Zhang1;~Jia_Liu1;~Sijia_Liu1;~Jiawei_Zhang6;~Mingyi_Hong1", "gender": "M;;M;M;M;M;M", "homepage": "https://sites.google.com/view/khanduri-prashant/home?authuser=0;;https://yihua-zhang.com;https://kevinliu-osu.github.io/index.html;https://lsjxjtu.github.io/;https://www.cuhk.edu.cn/;http://people.ece.umn.edu/~mhong/mingyi.html", "dblp": "158/4888;;;;128/6972-1;;57/8053", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;Ofx3dScAAAAJ;C7dO_UgAAAAJ;;qRnP-p0AAAAJ", "orcid": ";;;;;0000-0002-9420-384X;", "linkedin": "prashant-khanduri-0497894b/;;zhangyihua/;;;;", "or_profile": "~Prashant_Khanduri1;~Ioannis_Tsaknakis1;~Yihua_Zhang1;~Jia_Liu1;~Sijia_Liu1;~Jiawei_Zhang6;~Mingyi_Hong1", "aff": "Wayne State University;;Michigan State University;The Ohio State University;Michigan State University;Massachusetts Institute of Technology;University of Minnesota, Minneapolis", "aff_domain": "wayne.edu;;msu.edu;osu.edu;msu.edu;mit.edu;umn.edu", "position": "Assistant Professor;;PhD student;Assistant Professor;Assistant Professor;Postdoc;Associate Professor", "bibtex": "@misc{\nkhanduri2023linearly,\ntitle={Linearly Constrained Bilevel Optimization: A Smoothed Implicit Gradient Approach},\nauthor={Prashant Khanduri and Ioannis Tsaknakis and Yihua Zhang and Jia Liu and Sijia Liu and Jiawei Zhang and Mingyi Hong},\nyear={2023},\nurl={https://openreview.net/forum?id=LzPN-BHiJuc}\n}", "github": "", "project": "", "reviewers": "gXua;eqw3;YhfE;1APN", "site": "https://openreview.net/forum?id=LzPN-BHiJuc", "pdf_size": 692213, "recommendation": "5;6;8;8", "confidence": "3;2;4;4", "correctness": "2;4;3;4", "technical_novelty": "2;4;3;3", "empirical_novelty": "4;3;3;0", "wc_summary_paper": "131;88;30;85", "wc_strength_and_weaknesses": "574;335;652;465", "wc_clarity_quality_novelty_and_reproducibility": "2;2;15;30", "wc_summary_review": "74;18;57;80", "wc_review": "781;443;754;660", "wc_reply_reviewers": "175;0;12;108", "wc_reply_authors": "1418;1350;2357;605", "reply_reviewers": "1;0;1;1", "reply_authors": "3;3;4;2", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 1.5 ], "wc_summary_paper_avg": [ 83.5, 35.8503835404867 ], "wc_strength_and_weaknesses_avg": [ 506.5, 119.22772328615522 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 12.25, 11.54068888758379 ], "wc_summary_review_avg": [ 57.25, 24.180312239505923 ], "wc_review_avg": [ 659.5, 132.82036741403783 ], "wc_reply_reviewers_avg": [ 73.75, 71.89706183148238 ], "wc_reply_authors_avg": [ 1432.5, 621.7863379007293 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.7543365091413573, "corr_recommendation_correctness": 0.5222329678670935, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13545323192705514398&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2;1;3;4", "aff_unique_norm": "Wayne State University;Michigan State University;Ohio State University;Massachusetts Institute of Technology;University of Minnesota", "aff_unique_dep": ";;;;", "aff_unique_url": "https://wayne.edu;https://www.msu.edu;https://www.osu.edu;https://web.mit.edu;https://www.minnesota.edu", "aff_unique_abbr": "WSU;MSU;OSU;MIT;UMN", "aff_campus_unique_index": "1", "aff_campus_unique": ";Minneapolis", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Better Teacher Better Student: Dynamic Prior Knowledge for Knowledge Distillation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11265", "id": "M0_sUuEyHs", "poster": "", "openreview": "https://openreview.net/forum?id=M0_sUuEyHs", "slides": "https://iclr.cc/virtual/2023/poster/11265", "video": "https://iclr.cc/virtual/2023/poster/11265", "author_site": "Martin Zong, Zengyu Qiu, Xinzhu Ma, Kunlin Yang, Chunya Liu, Jun Hou, Shuai Yi, Wanli Ouyang", "tldr": "The proposed method dynamically introduces part of teacher's features to student as prior knowledge before applying knowledge distillation. ", "abstract": "Knowledge distillation (KD) has shown very promising capabilities in transferring learning representations from large models (teachers) to small models (students). However, as the capacity gap between students and teachers becomes larger, existing KD methods fail to achieve better results. Our work shows that the 'prior knowledge' is vital to KD, especially when applying large teachers. Particularly, we propose the dynamic prior knowledge (DPK), which integrates part of teacher's features as the prior knowledge before the feature distillation. This means that our method also takes the teacher's feature as `input', not just `target'. Besides, we dynamically adjust the ratio of the prior knowledge during the training phase according to the feature gap, thus guiding the student in an appropriate difficulty. To evaluate the proposed method, we conduct extensive experiments on two image classification benchmarks (i.e. CIFAR100 and ImageNet) and an object detection benchmark (\\i.e. MS COCO). The results demonstrate the superiority of our method in performance under varying settings. Besides, our DPK makes the performance of the student model positively correlated with that of the teacher model, which means that we can further boost the accuracy of students by applying larger teachers. More importantly, DPK provides a fast solution in teacher model selection for any given model. Our codes will be publicly available for reproducibility.", "keywords": "Knowledge Distillation", "primary_area": "", "supplementary_material": "", "author": "Martin Zong;Zengyu Qiu;Xinzhu Ma;Kunlin Yang;Chunya Liu;Jun Hou;Shuai Yi;Wanli Ouyang", "authorids": "~Martin_Zong1;~Zengyu_Qiu1;~Xinzhu_Ma1;~Kunlin_Yang1;~Chunya_Liu1;~Jun_Hou2;~Shuai_Yi2;~Wanli_Ouyang1", "gender": "M;F;M;M;M;;;", "homepage": "https://test.github.io;https://github.com/Cuibaby;https://github.com/xinzhuma;https://github.com/Youngkl0726;;;;", "dblp": ";;191/3902;;;;;", "google_scholar": ";;8PuKa_8AAAAJ;;;;;", "orcid": ";0000-0001-5481-8826;;;;;;", "linkedin": ";;;;https://www.linkedin.cn/incareer/in/%E6%98%A5%E4%BA%9A-%E5%88%98-022800111;;;", "or_profile": "~Martin_Zong1;~Zengyu_Qiu1;~Xinzhu_Ma1;~Kunlin_Yang1;~Chunya_Liu1;~Jun_Hou2;~Shuai_Yi2;~Wanli_Ouyang1", "aff": ";East China Normal University;University of Sydney;;Sensetime;;;", "aff_domain": ";ecnu.edu.cn;sydney.edu.au;;sensetime.com;;;", "position": ";MS student;PhD student;;Researcher;;;", "bibtex": "@inproceedings{\nzong2023better,\ntitle={Better Teacher Better Student: Dynamic Prior Knowledge for Knowledge Distillation},\nauthor={Martin Zong and Zengyu Qiu and Xinzhu Ma and Kunlin Yang and Chunya Liu and Jun Hou and Shuai Yi and Wanli Ouyang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=M0_sUuEyHs}\n}", "github": "", "project": "", "reviewers": "cqrC;ERUi;aN37", "pdf_size": 4338426, "recommendation": "5;6;8", "confidence": "5;3;4", "correctness": "2;4;4", "technical_novelty": "2;2;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "68;61;78", "wc_strength_and_weaknesses": "217;280;186", "wc_clarity_quality_novelty_and_reproducibility": "47;4;46", "wc_summary_review": "37;70;50", "wc_review": "369;415;360", "wc_reply_reviewers": "0;22;0", "wc_reply_authors": "1085;1071;541", "reply_reviewers": "0;1;0", "reply_authors": "2;2;1", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 69.0, 6.97614984548545 ], "wc_strength_and_weaknesses_avg": [ 227.66666666666666, 39.10953279643667 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.333333333333336, 20.038851153585515 ], "wc_summary_review_avg": [ 52.333333333333336, 13.572848714334887 ], "wc_review_avg": [ 381.3333333333333, 24.087802353519553 ], "wc_reply_reviewers_avg": [ 7.333333333333333, 10.370899457402697 ], "wc_reply_authors_avg": [ 899.0, 253.20874129197566 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.3273268353539886, "corr_recommendation_correctness": 0.7559289460184546, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4530847307876513430&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=M0_sUuEyHs", "email": ";ecnu.edu.cn;sydney.edu.au;;sensetime.com;;;", "author_num": 8, "aff_unique_index": "0;1;2", "aff_unique_norm": "East China Normal University;University of Sydney;SenseTime", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ecnu.edu.cn;https://www.sydney.edu.au;https://www.sensetime.com", "aff_unique_abbr": "ECNU;USYD;SenseTime", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;Australia" }, { "id": "M1BrqvlID5J", "title": "Accurate and Efficient Soma Reconstruction in a Full Adult Fly Brain", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Neuron reconstruction in a full adult fly brain from high-resolution electron microscopy (EM) data is regarded as a cornerstone for neuroscientists to explore how neurons inspire intelligence. As the central part of neurons, somas in the full brain indicate the origin of neurogenesis and neural functions. However, due to the absence of EM datasets specifically annotated for somas, existing deep learning-based neuron reconstruction methods cannot directly provide accurate soma distribution and morphology. Moreover, full brain neuron reconstruction remains extremely time-consuming due to the unprecedentedly large size of EM data. In this paper, we develop an efficient soma reconstruction method for obtaining accurate soma distribution and morphology information in a full adult fly brain. To this end, we first make a high-resolution EM dataset with fine-grained 3D manual annotations on somas. Relying on this dataset, we propose an efficient, two-stage deep learning algorithm for predicting accurate locations and boundaries of 3D soma instances. Further, we deploy a parallelized, high-throughput data processing pipeline for executing the above algorithm on the full brain. Finally, we provide quantitative and qualitative results to validate the superiority of the proposed method, as well as comprehensive statistics of the reconstructed somas in the full adult fly brain from the biological perspective.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/56b716be244b09ab662506d412fb9b43bf81ac45.zip", "author": "Xiaoyu Liu;Bo Hu;Mingxing Li;Wei Huang;Yueyi Zhang;Zhiwei Xiong", "authorids": "~Xiaoyu_Liu6;~Bo_Hu5;~Mingxing_Li1;~Wei_Huang5;~Yueyi_Zhang2;~Zhiwei_Xiong1", "gender": "M;M;M;M;;M", "homepage": ";;https://scholar.google.com/citations?hl=en&user=-pfkprkAAAAJ&view_op=list_works&sortby=pubdate;https://github.com/weih527;;", "dblp": "78/6196;;;81/6685-36;;54/6827", "google_scholar": "https://scholar.google.nl/citations?user=0qyOmX8AAAJ;;https://scholar.google.com/citations?hl=en;C4zmoy4AAAAJ;LatWlFAAAAAJ;Snl0HPEAAAAJ", "orcid": ";my-orcid?orcid=0000-0002-6540-5119;;0000-0001-7513-3105;;", "linkedin": ";;;;;", "or_profile": "~Xiaoyu_Liu6;~Bo_Hu5;~Mingxing_Li1;~Wei_Huang5;~Yueyi_Zhang2;~Zhiwei_Xiong1", "aff": "University of Science and Technology of China;University of Science and Technology of China;Alibaba Group;University of Science and Technology of China;University of Science and Technology of China;USTC", "aff_domain": "ustc.edu.cn;ustc.edu.cn;alibaba-inc.com;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;PhD student;Researcher;PhD student;Associate Researcher;Professor", "bibtex": "@misc{\nliu2023accurate,\ntitle={Accurate and Efficient Soma Reconstruction in a Full Adult Fly Brain},\nauthor={Xiaoyu Liu and Bo Hu and Mingxing Li and Wei Huang and Yueyi Zhang and Zhiwei Xiong},\nyear={2023},\nurl={https://openreview.net/forum?id=M1BrqvlID5J}\n}", "github": "", "project": "", "reviewers": "mFfm;5EYE;6PPL", "site": "https://openreview.net/forum?id=M1BrqvlID5J", "pdf_size": 20497152, "recommendation": "1;3;5", "confidence": "4;5;3", "correctness": "3;2;4", "technical_novelty": "1;1;2", "empirical_novelty": "1;1;3", "wc_summary_paper": "44;59;39", "wc_strength_and_weaknesses": "29;56;226", "wc_clarity_quality_novelty_and_reproducibility": "175;474;127", "wc_summary_review": "65;132;54", "wc_review": "313;721;446", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 1.632993161855452 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 1.3333333333333333, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.9428090415820634 ], "wc_summary_paper_avg": [ 47.333333333333336, 8.498365855987974 ], "wc_strength_and_weaknesses_avg": [ 103.66666666666667, 87.20219161364136 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 258.6666666666667, 153.51945226003843 ], "wc_summary_review_avg": [ 83.66666666666667, 34.4705993887867 ], "wc_review_avg": [ 493.3333333333333, 169.89473865373884 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:umwwamQVpFoJ:scholar.google.com/&scioq=Accurate+and+Efficient+Soma+Reconstruction+in+a+Full+Adult+Fly+Brain&hl=en&as_sdt=0,48", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "University of Science and Technology of China;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "USTC;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning with Logical Constraints but without Shortcut Satisfaction", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10738", "id": "M2unceRvqhh", "poster": "", "openreview": "https://openreview.net/forum?id=M2unceRvqhh", "slides": "https://iclr.cc/virtual/2023/poster/10738", "video": "https://iclr.cc/virtual/2023/poster/10738", "author_site": "Zenan Li, Zehua Liu, Yuan Yao, Jingwei Xu, Taolue Chen, Xiaoxing Ma, Jian Lu", "tldr": "", "abstract": "Recent studies have started to explore the integration of logical knowledge into deep learning via encoding logical constraints as an additional loss function. However, existing approaches tend to vacuously satisfy logical constraints through shortcuts, failing to fully exploit the knowledge. In this paper, we present a new framework for learning with logical constraints. Specifically, we address the shortcut satisfaction issue by introducing dual variables for logical connectives, encoding how the constraint is satisfied. We further propose a variational framework where the encoded logical constraint is expressed as a distributional loss that is compatible with the model's original training loss. The theoretical analysis shows that the proposed approach bears some nice properties, and the experimental evaluations demonstrate its superior performance in both model generalizability and constraint satisfaction.", "keywords": "training with logical constraints;logical formula encoding;variational learning;stochastic gradient descent ascent", "primary_area": "", "supplementary_material": "", "author": "Zenan Li;Zehua Liu;Yuan Yao;Jingwei Xu;Taolue Chen;Xiaoxing Ma;Jian L\\\"{u}", "authorids": "~Zenan_Li3;~Zehua_Liu3;~Yuan_Yao7;~Jingwei_Xu3;~Taolue_Chen2;~Xiaoxing_Ma1;lj@nju.edu.cn", "gender": "M;;M;M;;;", "homepage": "https://lizn-zn.github.io/;;;http://ics.nju.edu.cn/people/jingweixu/;;;", "dblp": "242/2285;;25/4120-1;148/9997-1;;;", "google_scholar": "eu4eqTcAAAAJ;;;15maGTwAAAAJ;;;", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Zenan_Li3;~Zehua_Liu3;~Yuan_Yao7;~Jingwei_Xu3;~Taolue_Chen2;~Xiaoxing_Ma1;lj@nju.edu.cn", "aff": "Microsoft Research;;Nanjing University;Nanjing University;;;", "aff_domain": "research.microsoft.com;;nju.edu.cn;nju.edu.cn;;;", "position": "Intern;;Associate Professor;Assistant Professor;;;", "bibtex": "@inproceedings{\nli2023learning,\ntitle={Learning with Logical Constraints but without Shortcut Satisfaction},\nauthor={Zenan Li and Zehua Liu and Yuan Yao and Jingwei Xu and Taolue Chen and Xiaoxing Ma and Jian L{\\textbackslash}''{\\{}u{\\}}},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=M2unceRvqhh}\n}", "github": "", "project": "", "reviewers": "LWuq;X3bi;C2ss;UtYh", "pdf_size": 1211897, "recommendation": "6;6;8;8", "confidence": "2;3;4;3", "correctness": "3;3;4;4", "technical_novelty": "4;3;3;4", "empirical_novelty": "4;3;3;3", "wc_summary_paper": "120;81;132;130", "wc_strength_and_weaknesses": "413;38;87;93", "wc_clarity_quality_novelty_and_reproducibility": "29;11;128;255", "wc_summary_review": "13;124;22;11", "wc_review": "575;254;369;489", "wc_reply_reviewers": "337;0;0;7", "wc_reply_authors": "611;193;28;53", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 115.75, 20.571521577170707 ], "wc_strength_and_weaknesses_avg": [ 157.75, 148.90496130082437 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 105.75, 97.0035437497002 ], "wc_summary_review_avg": [ 42.5, 47.23610906922796 ], "wc_review_avg": [ 421.75, 121.37828265385863 ], "wc_reply_reviewers_avg": [ 86.0, 144.94309228107423 ], "wc_reply_authors_avg": [ 221.25, 233.64329115127615 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.7071067811865475, "corr_recommendation_correctness": 1.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8772623186491454962&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=M2unceRvqhh", "email": "research.microsoft.com;;nju.edu.cn;nju.edu.cn;;;", "author_num": 7, "aff_unique_index": "0;1;1", "aff_unique_norm": "Microsoft;Nanjing University", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.nju.edu.cn", "aff_unique_abbr": "MSR;Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;China" }, { "id": "M3GzgrA7U4", "title": "Graph Neural Networks as Gradient Flows: understanding graph convolutions via energy", "track": "main", "status": "Reject", "tldr": "We apply the gradient flow formalism to GNNs to both develop new frameworks and provide a better theoretical understanding of existing ones.", "abstract": "Gradient flows are differential equations that minimize an energy functional and constitute the main descriptors of physical systems. We apply this formalism to Graph Neural Networks (GNNs) to develop new frameworks for learning on graphs as well as provide a better theoretical understanding of existing ones. We derive GNNs as a gradient flow equation of a parametric energy that provides a physics-inspired interpretation of GNNs as learning particle dynamics in the feature space. In particular, we show that in graph convolutional models (GCN), the positive/negative eigenvalues of the channel mixing matrix correspond to attractive/repulsive forces between adjacent features. We rigorously prove how the channel-mixing can learn to steer the dynamics towards low or high frequencies, which allows to deal with heterophilic graphs. We show that the same class of energies is decreasing along a larger family of GNNs; albeit not gradient flows, they retain their inductive bias. We experimentally evaluate an instance of the gradient flow framework that is principled, more efficient than GCN, and achieves competitive performance on graph datasets of varying homophily often outperforming recent baselines specifically designed to target heterophily.", "keywords": "Graph Neural Networks;Gradient flows;Energy functionals;Spectral theory", "primary_area": "", "supplementary_material": "", "author": "Francesco Di Giovanni;James Rowbottom;Benjamin Paul Chamberlain;Thomas Markovich;Michael M. Bronstein", "authorids": "~Francesco_Di_Giovanni1;~James_Rowbottom1;~Benjamin_Paul_Chamberlain1;~Thomas_Markovich1;~Michael_M._Bronstein1", "gender": "M;;M;;M", "homepage": "https://francescodgv.github.io/;;;http://thomasmarkovich.com;http://www.inf.usi.ch/bronstein/", "dblp": ";295/8782;;;07/2668", "google_scholar": "yzjjeqsAAAAJ;;https://scholar.google.co.uk/citations?user=Tr8LSOEAAAAJ;;UU3N6-UAAAAJ", "orcid": ";;;;", "linkedin": ";https://linkedin.com/in/jamesrowbottom;;;mbronstein/", "or_profile": "~Francesco_Di_Giovanni1;~James_Rowbottom1;~Benjamin_Paul_Chamberlain1;~Thomas_Markovich1;~Michael_M._Bronstein1", "aff": "University of Cambridge;University of Cambridge;Twitter;Twitter;Twitter", "aff_domain": "cam.ac.uk;cam.ac.uk;twitter.com;twitter.com;twitter.com", "position": "Postdoc;PhD student;ML Researcher;Researcher;Head of Graph ML", "bibtex": "@misc{\ngiovanni2023graph,\ntitle={Graph Neural Networks as Gradient Flows: understanding graph convolutions via energy},\nauthor={Francesco Di Giovanni and James Rowbottom and Benjamin Paul Chamberlain and Thomas Markovich and Michael M. Bronstein},\nyear={2023},\nurl={https://openreview.net/forum?id=M3GzgrA7U4}\n}", "github": "", "project": "", "reviewers": "Hwvc;zvZU;DfeD;Sead", "site": "https://openreview.net/forum?id=M3GzgrA7U4", "pdf_size": 638305, "recommendation": "3;3;5;6", "confidence": "4;2;4;3", "correctness": "3;2;2;4", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "43;114;68;88", "wc_strength_and_weaknesses": "436;83;140;203", "wc_clarity_quality_novelty_and_reproducibility": "82;380;74;32", "wc_summary_review": "38;91;887;134", "wc_review": "599;668;1169;457", "wc_reply_reviewers": "293;0;0;0", "wc_reply_authors": "2336;1617;1912;781", "reply_reviewers": "1;0;0;0", "reply_authors": "3;2;3;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 78.25, 26.080404521402652 ], "wc_strength_and_weaknesses_avg": [ 215.5, 134.19482106251343 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 142.0, 138.7155362603627 ], "wc_summary_review_avg": [ 287.5, 347.7876507295795 ], "wc_review_avg": [ 723.25, 268.36204556531465 ], "wc_reply_reviewers_avg": [ 73.25, 126.87272165442026 ], "wc_reply_authors_avg": [ 1661.5, 568.9817659644287 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.17407765595569782, "corr_recommendation_correctness": 0.5222329678670935, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12418793278039517524&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;1;1", "aff_unique_norm": "University of Cambridge;Twitter, Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://twitter.com", "aff_unique_abbr": "Cambridge;Twitter", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "M4UxoupR3az", "title": "The Reward Hypothesis is False", "track": "main", "status": "Reject", "tldr": "We argue that the reward hypothesis is false, by providing several counterexamples. We also provide necessary and sufficient conditions for when a MORL problem can be reduced to ordinary RL, and describe a new way to express tasks for RL agents.", "abstract": "The reward hypothesis is the hypothesis that \"all of what we mean by goals and purposes can be well thought of as the maximisation of the expected value of the cumulative sum of a received scalar signal\". In this paper, we will argue that this hypothesis is false. We will look at three natural classes of reinforcement learning tasks (multi-objective reinforcement learning, risk-averse reinforcement learning, and modal reinforcement learning), and then prove mathematically that these tasks cannot be expressed using any scalar, Markovian reward function. We thus disprove the reward hypothesis by providing many examples of tasks which are both natural and intuitive to describe, but which are nonetheless impossible to express using reward functions. In the process, we provide necessary and sufficient conditions for when a multi-objective reinforcement learning problem can be reduced to ordinary, scalar reward reinforcement learning. We also call attention to a new class of reinforcement learning problems (namely those we call \"modal\" problems), which have so far not been given any systematic treatment in the reinforcement learning literature.", "keywords": "the reward hypothesis;reward functions;multi-objective reinforcement learning;MORL", "primary_area": "", "supplementary_material": "/attachment/044e2ccd82d3a95e0d6189dbd9a82c0d46a5d872.zip", "author": "Joar Max Viktor Skalse;Alessandro Abate", "authorids": "~Joar_Max_Viktor_Skalse1;~Alessandro_Abate1", "gender": "M;M", "homepage": ";https://www.cs.ox.ac.uk/people/alessandro.abate/", "dblp": "242/8125;19/3904", "google_scholar": "GuzLUmQAAAAJ;https://scholar.google.co.uk/citations?hl=en", "orcid": ";0000-0002-5627-9093", "linkedin": ";", "or_profile": "~Joar_Max_Viktor_Skalse1;~Alessandro_Abate1", "aff": "University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk", "position": "PhD student;Full Professor", "bibtex": "@misc{\nskalse2023the,\ntitle={The Reward Hypothesis is False},\nauthor={Joar Max Viktor Skalse and Alessandro Abate},\nyear={2023},\nurl={https://openreview.net/forum?id=M4UxoupR3az}\n}", "github": "", "project": "", "reviewers": "abfw;UBdX;Y3kp;UtAh;p6jG;dSf2", "site": "https://openreview.net/forum?id=M4UxoupR3az", "pdf_size": 308328, "recommendation": "3;5;5;6;6;8", "confidence": "4;3;2;3;3;4", "correctness": "3;3;3;3;3;3", "technical_novelty": "2;2;2;3;3;3", "empirical_novelty": "0;0;0;0;0;0", "wc_summary_paper": "71;120;67;55;39;270", "wc_strength_and_weaknesses": "683;786;201;473;265;1163", "wc_clarity_quality_novelty_and_reproducibility": "69;27;32;28;145;72", "wc_summary_review": "61;125;80;67;43;22", "wc_review": "884;1058;380;623;492;1527", "wc_reply_reviewers": "730;126;0;115;65;94", "wc_reply_authors": "1528;662;236;382;89;582", "reply_reviewers": "1;1;0;1;1;1", "reply_authors": "3;1;1;1;1;1", "recommendation_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.1666666666666665, 0.6871842709362768 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 103.66666666666667, 78.4169766200038 ], "wc_strength_and_weaknesses_avg": [ 595.1666666666666, 328.09267830226804 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.166666666666664, 41.47053840445716 ], "wc_summary_review_avg": [ 66.33333333333333, 32.060706306768864 ], "wc_review_avg": [ 827.3333333333334, 387.35413025063025 ], "wc_reply_reviewers_avg": [ 188.33333333333334, 245.7116648069892 ], "wc_reply_authors_avg": [ 579.8333333333334, 466.19967705789855 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.372677996249965 ], "reply_authors_avg": [ 1.3333333333333333, 0.74535599249993 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.08084520834544429, "corr_recommendation_correctness": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8181638727236501981&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "M8rwWdaGa6x", "title": "Optimal Scalarizations for Provable Multiobjective Optimization", "track": "main", "status": "Withdraw", "tldr": "Don't linearly combine your objectives: Hypervolume scalarizations provide provable and more optimal multiobjective optimization.", "abstract": "Linear scalarization is a simple and widely-used technique that can be deployed in any multiobjective setting to combine diverse objectives into one reward function, but such heuristics are not theoretically understood. To that end, we perform a case study of the multiobjective stochastic linear bandits framework with $k$ objectives and our goal is to provably scalarize and explore a diverse set of optimal actions on the Pareto frontier, as measured by the dominated hypervolume. Even in this elementary convex setting, the choice of scalarizations and weight distribution surprisingly affects performance, and the natural use of linear scalarization with uniform weights is suboptimal due to a non-uniform Pareto curvature. Instead, we suggest the usage of the theoretically-inspired hypervolume scalarizations with non-adaptive uniform weights, showing that it comes with novel hypervolume regret bounds of $\\tilde{O}( d T^{-1/2} + T^{-1/k})$, with optimal matching lower bounds of $\\Omega(T^{-1/k})$. We support our theory with strong empirical performance of the hypervolume scalarization that consistently outperforms both the linear and Chebyshev scalarizations in high dimensions.", "keywords": "multiobjective optimization;scalarization;linear bandits", "primary_area": "", "supplementary_material": "/attachment/f4434bdd819df2a89ca2ab9a872dcb0c06b64e77.zip", "author": "Qiuyi Zhang", "authorids": "~Qiuyi_Zhang1", "gender": "M", "homepage": "https://qiuyiz.github.io", "dblp": "133/8559", "google_scholar": "mE11hO8AAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Qiuyi_Zhang1", "aff": "Google", "aff_domain": "google.com", "position": "Researcher", "bibtex": "@misc{\nzhang2023optimal,\ntitle={Optimal Scalarizations for Provable Multiobjective Optimization},\nauthor={Qiuyi Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=M8rwWdaGa6x}\n}", "github": "", "project": "", "reviewers": "Dt6D;ptQr;jHqz", "site": "https://openreview.net/forum?id=M8rwWdaGa6x", "pdf_size": 649732, "recommendation": "3;5;6", "confidence": "4;3;3", "correctness": "2;4;4", "technical_novelty": "3;3;2", "empirical_novelty": "2;3;3", "wc_summary_paper": "111;67;58", "wc_strength_and_weaknesses": "344;261;154", "wc_clarity_quality_novelty_and_reproducibility": "83;117;39", "wc_summary_review": "89;116;22", "wc_review": "627;561;273", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 78.66666666666667, 23.156472577277878 ], "wc_strength_and_weaknesses_avg": [ 253.0, 77.77317446695015 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 79.66666666666667, 31.930480039541457 ], "wc_summary_review_avg": [ 75.66666666666667, 39.51652256405611 ], "wc_review_avg": [ 487.0, 153.7010084547268 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": -0.9449111825230683, "corr_recommendation_correctness": 0.944911182523068, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:G8rBLvTzOq8J:scholar.google.com/&scioq=Optimal+Scalarizations+for+Provable+Multiobjective+Optimization&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Addressing Parameter Choice Issues in Unsupervised Domain Adaptation by Aggregation", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11626", "id": "M95oDwJXayG", "poster": "/media/PosterPDFs/ICLR%202023/11626.png?t=1682531721.5392406", "openreview": "https://openreview.net/forum?id=M95oDwJXayG", "slides": "https://iclr.cc/virtual/2023/poster/11626", "video": "https://iclr.cc/virtual/2023/poster/11626", "author_site": "Marius-Constantin Dinu, Markus Holzleitner, Maximilian Beck, Hoan Nguyen, Andrea Huber, Hamid Eghbalzadeh, Bernhard A. Moser, Sergei Pereverzyev, Sepp Hochreiter, Werner Zellinger", "tldr": "A method for addressing the issue of hyper-parameter selection in unsupervised domain adaptation.", "abstract": "We study the problem of choosing algorithm hyper-parameters in unsupervised domain adaptation, i.e., with labeled data in a source domain and unlabeled data in a target domain, drawn from a different input distribution. We follow the strategy to compute several models using different hyper-parameters, and, to subsequently compute a linear aggregation of the models. While several heuristics exist that follow this strategy, methods are still missing that rely on thorough theories for bounding the target error. In this turn, we propose a method that extends weighted least squares to vector-valued functions, e.g., deep neural networks. We show that the target error of the proposed algorithm is asymptotically not worse than twice the error of the unknown optimal aggregation. We also perform a large scale empirical comparative study on several datasets, including text, images, electroencephalogram, body sensor signals and signals from mobile phones. Our method outperforms deep embedded validation (DEV) and importance weighted validation (IWV) on all datasets, setting a new state-of-the-art performance for solving parameter choice issues in unsupervised domain adaptation with theoretical error guarantees. We further study several competitive heuristics, all outperforming IWV and DEV on at least five datasets. However, our method outperforms each heuristic on at least five of seven datasets.", "keywords": "Domain adaptation;parameter choice;model selection;aggregation;importance weighting", "primary_area": "", "supplementary_material": "", "author": "Marius-Constantin Dinu;Markus Holzleitner;Maximilian Beck;Hoan Duc Nguyen;Andrea Huber;Hamid Eghbal-zadeh;Bernhard A. Moser;Sergei Pereverzyev;Sepp Hochreiter;Werner Zellinger", "authorids": "~Marius-Constantin_Dinu1;~Markus_Holzleitner1;~Maximilian_Beck1;~Hoan_Duc_Nguyen1;~Andrea_Huber1;~Hamid_Eghbal-zadeh1;~Bernhard_A._Moser1;~Sergei_Pereverzyev1;~Sepp_Hochreiter1;~Werner_Zellinger1", "gender": ";;M;M;F;M;;M;M;", "homepage": ";;http://maxbeck.ai;https://hoannguyen92.github.io/;;http://eghbalz.github.io/;https://www.scch.at/team/bernhard.moser;https://www.ricam.oeaw.ac.at/people/member/?firstname=Sergei&lastname=Pereverzyev;https://www.jku.at/en/institute-for-machine-learning/about-us/team/sepp-hochreiter/;", "dblp": ";271/0626;;354/9387;;https://dblp.uni-trier.de/pid/170/1944.html;74/4251;;h/SeppHochreiter.html;", "google_scholar": ";518MXv8AAAAJ;_YcZWcYAAAAJ;RbGyFLIAAAAJ;;-yGxzA4AAAAJ;https://scholar.google.at/citations?user=_DuPpPEAAAAJ;;https://scholar.google.at/citations?user=tvUH3WMAAAAJ;", "orcid": ";;;0009-0009-5297-787X;;;0000-0001-8373-7523;0000-0001-5980-7026;0000-0001-7449-2528;", "linkedin": ";;maximilianmbeck/;hoan-nguyen-3944b610b/;andrea-huber-69a1b023b/;;bernhard-moser-00794988/;;https://linkedin.com/in/sepp-hochreiter-41514846;", "or_profile": "~Marius-Constantin_Dinu1;~Markus_Holzleitner1;~Maximilian_Beck1;~Hoan_Duc_Nguyen1;~Andrea_Huber1;~Hamid_Eghbal-zadeh1;~Bernhard_A._Moser1;~Sergei_Pereverzyev1;~Sepp_Hochreiter1;~Werner_Zellinger1", "aff": ";Johannes Kepler University Linz;Johannes Kepler University Linz;The Johann Radon Institute for Computational and Applied Mathematics;Johannes Kepler Universit\u00e4t Linz;Meta;Software Competence Center Hagenberg;;Johannes Kepler University Linz;", "aff_domain": ";jku.at;jku.at;ricam.oeaw.ac.at;jku.at;meta.com;scch.at;;jku.at;", "position": ";Postdoc;PhD student;PhD student;MS student;Researcher;Principal Researcher;;Full Professor;", "bibtex": "@inproceedings{\ndinu2023addressing,\ntitle={Addressing Parameter Choice Issues in Unsupervised Domain Adaptation by Aggregation},\nauthor={Marius-Constantin Dinu and Markus Holzleitner and Maximilian Beck and Hoan Duc Nguyen and Andrea Huber and Hamid Eghbal-zadeh and Bernhard A. Moser and Sergei Pereverzyev and Sepp Hochreiter and Werner Zellinger},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=M95oDwJXayG}\n}", "github": "", "project": "", "reviewers": "WzwU;V758;2X6X;AZPJ", "pdf_size": 749742, "recommendation": "6;6;6;8", "confidence": "4;3;3;3", "correctness": "3;3;3;4", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "53;69;62;108", "wc_strength_and_weaknesses": "247;74;122;170", "wc_clarity_quality_novelty_and_reproducibility": "28;9;46;163", "wc_summary_review": "287;25;48;23", "wc_review": "615;177;278;464", "wc_reply_reviewers": "0;0;0;33", "wc_reply_authors": "276;119;118;221", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 73.0, 20.988091861815356 ], "wc_strength_and_weaknesses_avg": [ 153.25, 63.888085743744114 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.5, 60.043734061099165 ], "wc_summary_review_avg": [ 95.75, 110.85435264345735 ], "wc_review_avg": [ 383.5, 168.70462352881736 ], "wc_reply_reviewers_avg": [ 8.25, 14.289419162443238 ], "wc_reply_authors_avg": [ 183.5, 67.84725491867744 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 1.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2131049954628286374&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=M95oDwJXayG", "email": ";jku.at;jku.at;ricam.oeaw.ac.at;jku.at;meta.com;scch.at;;jku.at;", "author_num": 10, "aff_unique_index": "0;0;1;2;3;4;0", "aff_unique_norm": "Johannes Kepler University;Johann Radon Institute for Computational and Applied Mathematics;Johannes Kepler University Linz;Meta;Software Competence Center Hagenberg", "aff_unique_dep": ";Institute for Computational and Applied Mathematics;;Meta Platforms, Inc.;", "aff_unique_url": "https://www.jku.at;https://www.ricam.oeaw.ac.at/;https://www.jku.at;https://meta.com;https://www.scc-hagenberg.at", "aff_unique_abbr": "JKU;RICAM;JKU;Meta;", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Linz;", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "Austria;United States" }, { "title": "Deep Generative Modeling on Limited Data with Regularization by Nontransferable Pre-trained Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11892", "id": "M9u_ctqFUlg", "poster": "/media/PosterPDFs/ICLR%202023/11892.png?t=1680963667.1265914", "openreview": "https://openreview.net/forum?id=M9u_ctqFUlg", "slides": "https://iclr.cc/virtual/2023/poster/11892", "video": "https://iclr.cc/virtual/2023/poster/11892", "author_site": "Yong Zhong, Hongtao Liu, Xiaodong Liu, Fan Bao, Weiran Shen, Chongxuan Li", "tldr": "", "abstract": "Deep generative models (DGMs) are data-eager because learning a complex model on limited data suffers from a large variance and easily overfits. Inspired by the classical perspective of the bias-variance tradeoff, we propose regularized deep generative model (Reg-DGM), which leverages a nontransferable pre-trained model to reduce the variance of generative modeling with limited data. Formally, Reg-DGM optimizes a weighted sum of a certain divergence and the expectation of an energy function, where the divergence is between the data and the model distributions, and the energy function is defined by the pre-trained model w.r.t. the model distribution. We analyze a simple yet representative Gaussian-fitting case to demonstrate how the weighting hyperparameter trades off the bias and the variance. Theoretically, we characterize the existence and the uniqueness of the global minimum of Reg-DGM in a non-parametric setting and prove its convergence with neural networks trained by gradient-based methods. Empirically, with various pre-trained feature extractors and a data-dependent energy function, Reg-DGM consistently improves the generation performance of strong DGMs with limited data and achieves competitive results to the state-of-the-art methods. Our implementation is available at https://github.com/ML-GSAI/Reg-ADA-APA.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/15c1389934d2c590a8b4e0912c4849372ea9b278.zip", "author": "Yong Zhong;Hongtao Liu;Xiaodong Liu;Fan Bao;Weiran Shen;Chongxuan Li", "authorids": "~Yong_Zhong2;~Hongtao_Liu2;~Xiaodong_Liu2;~Fan_Bao1;~Weiran_Shen1;~Chongxuan_Li1", "gender": ";M;M;M;M;M", "homepage": ";https://www.ht6.info;;https://baofff.github.io/;https://www.weiran-shen.info/;http://ml.cs.tsinghua.edu.cn/~chongxuan", "dblp": ";;;71/3877;159/2147;161/9965", "google_scholar": ";;;;-lXgERkAAAAJ;UKMcQn4AAAAJ", "orcid": ";;;;0000-0003-4366-9276;0000-0002-0912-9076", "linkedin": ";;%E6%99%93%E5%86%AC-%E5%88%98-036a75115/;;;", "or_profile": "~Yong_Zhong2;~Hongtao_Liu2;~Xiaodong_Liu2;~Fan_Bao1;~Weiran_Shen1;~Chongxuan_Li1", "aff": ";Renmin University of China;Renmin University of China;Tsinghua University;Renmin University of China;Renmin University of China", "aff_domain": ";ruc.edu.cn;ruc.edu.cn;tsinghua.edu.cn;ruc.edu.cn;ruc.edu.cn", "position": ";PhD student;MS student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhong2023deep,\ntitle={Deep Generative Modeling on Limited Data with Regularization by Nontransferable Pre-trained Models},\nauthor={Yong Zhong and Hongtao Liu and Xiaodong Liu and Fan Bao and Weiran Shen and Chongxuan Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=M9u_ctqFUlg}\n}", "github": "", "project": "", "reviewers": "rg8D;ZXuR;rAFU;5xKN", "pdf_size": 51561409, "recommendation": "6;6;6;8", "confidence": "3;4;3;3", "correctness": "4;2;3;4", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "77;69;78;196", "wc_strength_and_weaknesses": "225;216;192;102", "wc_clarity_quality_novelty_and_reproducibility": "78;33;4;36", "wc_summary_review": "61;158;35;31", "wc_review": "441;476;309;365", "wc_reply_reviewers": "0;27;0;0", "wc_reply_authors": "250;1136;521;138", "reply_reviewers": "0;1;0;0", "reply_authors": "1;6;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 105.0, 52.65453446760307 ], "wc_strength_and_weaknesses_avg": [ 183.75, 48.715372317164935 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.75, 26.38536526182649 ], "wc_summary_review_avg": [ 71.25, 51.39248485916983 ], "wc_review_avg": [ 397.75, 65.08215961382966 ], "wc_reply_reviewers_avg": [ 6.75, 11.691342951089922 ], "wc_reply_authors_avg": [ 511.25, 386.6441354786078 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 2.165063509461097 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5222329678670935, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10261120306683810008&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=M9u_ctqFUlg", "email": ";ruc.edu.cn;ruc.edu.cn;tsinghua.edu.cn;ruc.edu.cn;ruc.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Renmin University of China;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ruc.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "RUC;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "MB_O268uCY", "title": "The ethical ambiguity of AI data enrichment: Measuring gaps in research ethics norms and practices", "track": "main", "status": "Reject", "tldr": "This paper shows how AI researchers engage with research ethics when employing crowdworkers. The work finds research ethics disclosures are infrequent in AI papers, inconsistently following venue publication policies.", "abstract": "The technical progression of artificial intelligence (AI) research has been built on breakthroughs in fields such as computer science, statistics, and mathematics. However, in the past decade AI researchers have increasingly looked to the social sciences, turning to human interactions to solve the challenges of model development. Paying crowdsourcing workers to generate or curate data, or \u2018data enrichment\u2019, has become indispensable for many areas of AI research, from natural language processing to inverse reinforcement learning. Other fields that routinely interact with crowdsourcing workers, such as Psychology, have developed common governance requirements and norms to ensure research is undertaken ethically. This study explores how, and to what extent, comparable research ethics requirements and norms have developed for AI research and data enrichment. We focus on the approach taken by two leading AI conferences: ICLR and NeurIPS. In a longitudinal study of accepted papers, and a comparison with Springer journal articles and Psychology papers, this work finds that ICLR and NeurIPS have established protocols for human data collection which are inconsistently followed by authors. Whilst Psychology papers engaging with crowdsourcing workers frequently disclose ethics reviews, payment data, demographic data and other information, such disclosures are far less common in leading AI conferences despite similar guidance. The work concludes with hypotheses to explain these gaps in research ethics practices and considerations for its implications.", "keywords": "ethics;disclosures;crowdsourcing;data enrichment", "primary_area": "", "supplementary_material": "", "author": "Will Hawkins;Brent Mittelstadt", "authorids": "~Will_Hawkins1;brent.mittelstadt@oii.ox.ac.uk", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;", "orcid": ";", "linkedin": "wthawkins/;", "or_profile": "~Will_Hawkins1;brent.mittelstadt@oii.ox.ac.uk", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@misc{\nhawkins2023the,\ntitle={The ethical ambiguity of {AI} data enrichment: Measuring gaps in research ethics norms and practices},\nauthor={Will Hawkins and Brent Mittelstadt},\nyear={2023},\nurl={https://openreview.net/forum?id=MB_O268uCY}\n}", "github": "", "project": "", "reviewers": "8fXg;QRKz;aCRE;rXNM", "site": "https://openreview.net/forum?id=MB_O268uCY", "pdf_size": 149216, "recommendation": "3;3;5;10", "confidence": "3;3;4;5", "correctness": "2;2;3;3", "technical_novelty": "2;2;2;4", "empirical_novelty": "0;2;2;4", "wc_summary_paper": "130;58;72;68", "wc_strength_and_weaknesses": "428;96;266;560", "wc_clarity_quality_novelty_and_reproducibility": "60;39;26;56", "wc_summary_review": "40;49;109;38", "wc_review": "658;242;473;722", "wc_reply_reviewers": "162;0;0;0", "wc_reply_authors": "1386;665;790;388", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 5.25, 2.8613807855648994 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 82.0, 28.178005607210743 ], "wc_strength_and_weaknesses_avg": [ 337.5, 174.01939547073482 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.25, 13.626720074911644 ], "wc_summary_review_avg": [ 59.0, 29.16333314283537 ], "wc_review_avg": [ 523.75, 186.60436088151852 ], "wc_reply_reviewers_avg": [ 40.5, 70.14805770653953 ], "wc_reply_authors_avg": [ 807.25, 364.4361226607483 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.9746972340815895, "corr_recommendation_correctness": 0.7863336509949341, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10091218249082437972&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5 }, { "id": "MCe881WzBr0", "title": "Variational Classification", "track": "main", "status": "Reject", "tldr": "We show how we can view a classifier as a latent variable model and impose class conditional priors on this latent space that renders the classifier more robust to OOD and adversarial data", "abstract": "Classification tasks, ubiquitous across machine learning, are commonly tackled by a suitably designed neural network with a softmax output layer, mapping each data point to a categorical distribution over class labels. \nWe extend this familiar model from a latent variable perspective to variational classification (VC), analogous to how the variational auto-encoder relates to its deterministic counterpart. We derive a training objective based on the ELBO together with an \\textit{adversarial} approach for optimising it.\n\nWithin this framework, we identify design choices made implicitly in off-the-shelf softmax functions and can instead include domain-specific assumptions, such as class-conditional latent priors. We demonstrate benefits of the VC model in image classification. We show on several standard datasets, that treating inputs to the softmax layer as latent variables under a mixture of Gaussians prior, improves several desirable aspects of a classifier, such as prediction accuracy, calibration, out-of-domain calibration and adversarial robustness.", "keywords": "Latent priors;classification", "primary_area": "", "supplementary_material": "", "author": "Shehzaad Zuzar Dhuliawala;Mrinmaya Sachan;Carl Allen", "authorids": "~Shehzaad_Zuzar_Dhuliawala3;~Mrinmaya_Sachan3;~Carl_Allen1", "gender": "M;M;M", "homepage": "https://carl-allen.github.io/;https://sites.google.com/site/mrinsachan/;https://shehzaadzd.github.io", "dblp": "220/5654;86/10440.html;184/8733", "google_scholar": "https://scholar.google.co.uk/citations?user=wRcURR8AAAAJ;Tpp9ZjoAAAAJ;7O33ij4AAAAJ", "orcid": "0000-0002-1536-657X;;", "linkedin": ";;", "or_profile": "~Carl_Allen1;~MRINMAYA_SACHAN2;~Shehzaad_Zuzar_Dhuliawala1", "aff": "ETHZ - ETH Zurich;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;ethz.ch;ethz.ch", "position": "Postdoc;Assistant Professor;PhD student", "bibtex": "@misc{\ndhuliawala2023variational,\ntitle={Variational Classification},\nauthor={Shehzaad Zuzar Dhuliawala and Mrinmaya Sachan and Carl Allen},\nyear={2023},\nurl={https://openreview.net/forum?id=MCe881WzBr0}\n}", "github": "", "project": "", "reviewers": "aooA;qvVd;Lvkv", "site": "https://openreview.net/forum?id=MCe881WzBr0", "pdf_size": 851388, "recommendation": "5;5;5", "confidence": "4;3;4", "correctness": "3;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;3;2", "wc_summary_paper": "79;65;114", "wc_strength_and_weaknesses": "131;326;367", "wc_clarity_quality_novelty_and_reproducibility": "30;53;13", "wc_summary_review": "29;37;41", "wc_review": "269;481;535", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "130;239;555", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 86.0, 20.607442021431645 ], "wc_strength_and_weaknesses_avg": [ 274.6666666666667, 102.95738061072758 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.0, 16.391054470858997 ], "wc_summary_review_avg": [ 35.666666666666664, 4.988876515698588 ], "wc_review_avg": [ 428.3333333333333, 114.80224542906622 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 308.0, 180.235031740965 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10869855821842194241&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "id": "MEdZ-7BOsKM", "title": "Human Pose Estimation in the Dark", "track": "main", "status": "Withdraw", "tldr": "We for the first time tackle human pose estimation under extremely low-light conditions, and introduce a new training strategy and new datasets for the challenging task.", "abstract": "We study human pose estimation in extremely low-light images. This task is challenging due to the difficulty of collecting real low-light images with accurate labels, and severely corrupted inputs that degrade prediction quality significantly. To address the first issue, we develop a dedicated camera system and build a new dataset of real low-light images with accurate pose labels. Thanks to our camera system, each low-light image in our dataset is coupled with a near-perfectly aligned well-lit image, which enables accurate pose labeling and is used as privileged information during training. We also propose a new model that fully exploits the privileged information to learn representation insensitive to lighting conditions. Our model demonstrates outstanding performance on real extremely low-light images, and extensive analyses validate that both of our dataset and model contribute to the success.", "keywords": "Low-light image understanding;Robustness;Learning using privileged information;Human pose estimation", "primary_area": "", "supplementary_material": "", "author": "Sohyun Lee;Jaesung Rim;Boseung Jeong;Geonu Kim;ByungJu Woo;Haechan Lee;Sunghyun Cho;Suha Kwak", "authorids": "~Sohyun_Lee1;~Jaesung_Rim1;~Boseung_Jeong1;~Geonu_Kim2;~ByungJu_Woo1;~Haechan_Lee1;~Sunghyun_Cho3;~Suha_Kwak3", "gender": ";M;M;M;M;M;M;M", "homepage": "https://sohyun-l.github.io/;;;http://cg.postech.ac.kr;https://github.com/ByungJu-Woo;https://cg.postech.ac.kr;http://scho.pe.kr;https://suhakwak.github.io/", "dblp": "317/6799;279/1980;277/5731;194/2645;;;22/6163.html;65/6173", "google_scholar": "https://scholar.google.com/citations?hl=ko;Bsvwoq8m0e8C;CiQLGVMAAAAJ;;;;JcmBvtUAAAAJ;-gscDIEAAAAJ", "orcid": ";;;;;;0000-0001-7627-3513;", "linkedin": "sohyun-lee-858616233/;jsrim-1b36231a3/;;;;;sunghyun-cho-63a96099/;", "or_profile": "~Sohyun_Lee1;~Jaesung_Rim1;~Boseung_Jeong1;~Geonu_Kim2;~ByungJu_Woo1;~Haechan_Lee1;~Sunghyun_Cho3;~Suha_Kwak3", "aff": "POSTECH;POSTECH;POSTECH;Pohang University of Science and Technology;Pohang University of Science and Technology;Pohang University of Science and Technology;POSTECH;POSTECH", "aff_domain": "postech.ac.kr;postech.ac.kr;postech.ac.kr;postech.ac.kr;postech.edu;postech.edu;postech.ac.kr;postech.ac.kr", "position": "PhD student;PhD student;PhD student;MS student;Undergrad student;PhD student;Associate Professor;Associate Professor", "bibtex": "@misc{\nlee2023human,\ntitle={Human Pose Estimation in the Dark},\nauthor={Sohyun Lee and Jaesung Rim and Boseung Jeong and Geonu Kim and ByungJu Woo and Haechan Lee and Sunghyun Cho and Suha Kwak},\nyear={2023},\nurl={https://openreview.net/forum?id=MEdZ-7BOsKM}\n}", "github": "", "project": "", "reviewers": "zyXf;iC86;HxkV;5n7X", "site": "https://openreview.net/forum?id=MEdZ-7BOsKM", "pdf_size": 0, "recommendation": "3;5;5;6", "confidence": "4;3;3;4", "correctness": "2;3;3;3", "technical_novelty": "2;1;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "103;105;39;143", "wc_strength_and_weaknesses": "279;312;63;305", "wc_clarity_quality_novelty_and_reproducibility": "27;53;61;253", "wc_summary_review": "56;21;30;99", "wc_review": "465;491;193;800", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 97.5, 37.34635189680513 ], "wc_strength_and_weaknesses_avg": [ 239.75, 102.78466568511084 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 98.5, 90.08190717341635 ], "wc_summary_review_avg": [ 51.5, 30.286135441815617 ], "wc_review_avg": [ 487.25, 215.00276161017095 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.2294157338705618, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DvKiNqlc2f8J:scholar.google.com/&scioq=Human+Pose+Estimation+in+the+Dark&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Pohang University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.postech.ac.kr", "aff_unique_abbr": "POSTECH", "aff_campus_unique_index": "0;0;0;0;0;0;0;0", "aff_campus_unique": "Pohang", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "MFD2b2cwr5d", "title": "Learning from Others: Similarity-based Regularization for Mitigating Artifacts", "track": "main", "status": "Reject", "tldr": "Similarity regularization reduces intrinsic and extrinsic bias in NLU models", "abstract": "Common methods for mitigating spurious correlations in natural language understanding (NLU) usually operate in the output space, encouraging a main model to behave differently from a bias model by down-weighing examples where the bias model is confident.\nWhile improving out of distribution (OOD) performance, it was recently observed that the internal representations of the presumably debiased models are actually more, rather than less biased. \nWe propose SimgReg, a new method for debiasing internal model components via similarity-based regularization, in representation space: We encourage the model to learn representations that are either similar to an unbiased model or different from a biased model. We experiment with three NLU tasks and different kinds of biases.\nWe find that SimReg improves OOD performance, with little in-distribution degradation. Moreover, the representations learned by SimReg are less biased than in other methods.\n", "keywords": "NLP;robustness;spurious correlations;Dataset bias;natural language understanding;shortcut learning", "primary_area": "", "supplementary_material": "", "author": "Reda Igbaria;Yonatan Belinkov", "authorids": "~Reda_Igbaria1;~Yonatan_Belinkov1", "gender": "M;M", "homepage": ";https://www.belinkov.com", "dblp": ";136/8705", "google_scholar": ";https://scholar.google.com/citations?authorid=K-6ujU4AAAAJ", "orcid": ";", "linkedin": "redaig/;", "or_profile": "~Reda_Igbaria1;~Yonatan_Belinkov1", "aff": ";Technion, Technion", "aff_domain": ";technion.ac.il", "position": ";Assistant Professor", "bibtex": "@misc{\nigbaria2023,\ntitle={ Learning from Others: Similarity-based Regularization for Mitigating Artifacts},\nauthor={Reda Igbaria and Yonatan Belinkov},\nyear={2023},\nurl={https://openreview.net/forum?id=MFD2b2cwr5d}\n}", "github": "", "project": "", "reviewers": "qJJe;73u6;UAbR;2WEz", "site": "https://openreview.net/forum?id=MFD2b2cwr5d", "pdf_size": 605982, "recommendation": "3;3;5;5", "confidence": "5;4;4;3", "correctness": "2;2;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "98;86;91;93", "wc_strength_and_weaknesses": "479;769;183;158", "wc_clarity_quality_novelty_and_reproducibility": "95;54;34;8", "wc_summary_review": "69;129;35;29", "wc_review": "741;1038;343;288", "wc_reply_reviewers": "0;0;61;0", "wc_reply_authors": "435;634;330;198", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 92.0, 4.301162633521313 ], "wc_strength_and_weaknesses_avg": [ 397.25, 249.01041644879035 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.75, 31.78344694963087 ], "wc_summary_review_avg": [ 65.5, 39.70831147253683 ], "wc_review_avg": [ 602.5, 306.2241825852426 ], "wc_reply_reviewers_avg": [ 15.25, 26.413774815425377 ], "wc_reply_authors_avg": [ 399.25, 159.43866375506286 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.7071067811865475, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:sK5j22N1jyIJ:scholar.google.com/&scioq=Learning+from+Others:+Similarity-based+Regularization+for+Mitigating+Artifacts&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.technion.ac.il/en/", "aff_unique_abbr": "Technion", "aff_country_unique_index": "0", "aff_country_unique": "Israel" }, { "id": "MGnPyYQ2QAA", "title": "On a Benefit of Masked Language Model Pretraining: Robustness to Simplicity Bias", "track": "main", "status": "Reject", "tldr": "We theoretically and empirically show that MLM pretraining makes models robust to lexicon-level spurious features.", "abstract": "Despite the success of pretrained masked language models (MLM), why MLM pretraining is useful is still a question not fully answered. In this work we theoretically and empirically show that MLM pretraining makes models robust to lexicon-level spurious features, partly answering the question. Our explanation is that MLM pretraining may alleviate problems brought by simplicity bias (Shahet al., 2020), which refers to the phenomenon that a deep model tends to rely excessively on simple features. In NLP tasks, those simple features could be token-level features whose spurious association with the label can be learned easily. We show that MLM pretraining makes learning from the context easier. Thus, pretrained models are less likely to rely excessively on a single token. We also explore the theoretical explanations of MLM\u2019s efficacy in causal settings. Compared with Wei et al. (2021), we achieve similar results with milder assumptions. Finally, we close the gap between our theories and real-world practices by conducting experiments on real-world tasks.", "keywords": "language model;robustness;pretraining", "primary_area": "", "supplementary_material": "/attachment/a80c4089c4fc0c81494762ec0b6c6df43b43ce4d.zip", "author": "Ting-Rui Chiang", "authorids": "~Ting-Rui_Chiang1", "gender": "Not Specified", "homepage": "https://ctinray.github.io/", "dblp": "230/3609", "google_scholar": "aIgoIxwAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Ting-Rui_Chiang1", "aff": "University of Southern California", "aff_domain": "usc.edu", "position": "PhD student", "bibtex": "@misc{\nchiang2023on,\ntitle={On a Benefit of Masked Language Model Pretraining: Robustness to Simplicity Bias},\nauthor={Ting-Rui Chiang},\nyear={2023},\nurl={https://openreview.net/forum?id=MGnPyYQ2QAA}\n}", "github": "", "project": "", "reviewers": "vBVm;RC11;QRgo", "site": "https://openreview.net/forum?id=MGnPyYQ2QAA", "pdf_size": 675469, "recommendation": "3;3;5", "confidence": "3;4;2", "correctness": "2;2;3", "technical_novelty": "1;3;3", "empirical_novelty": "1;3;3", "wc_summary_paper": "179;88;125", "wc_strength_and_weaknesses": "538;277;460", "wc_clarity_quality_novelty_and_reproducibility": "217;23;94", "wc_summary_review": "40;36;90", "wc_review": "974;424;769", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "244;58;227", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_paper_avg": [ 130.66666666666666, 37.366057086910075 ], "wc_strength_and_weaknesses_avg": [ 425.0, 109.38921336219582 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 111.33333333333333, 80.14292787819079 ], "wc_summary_review_avg": [ 55.333333333333336, 24.567367696917707 ], "wc_review_avg": [ 722.3333333333334, 226.94835437948333 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 176.33333333333334, 83.96163144886809 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hEKq8hrcSDYJ:scholar.google.com/&scioq=On+a+Benefit+of+Masked+Language+Model+Pretraining:+Robustness+to+Simplicity+Bias&hl=en&as_sdt=0,44", "gs_version_total": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "MHXO5xRCSXh", "title": "PET-NeuS: Positional Encoding Triplanes for Neural Surfaces", "track": "main", "status": "Withdraw", "tldr": "We improve NeuS by introducing Tri-planes, modulated positional encoding, and learned self-attention convolutions.", "abstract": "The signed distance function (SDF) represented by an MLP network is commonly used for multi-view neural surface reconstruction. We build on the successful recent method NeuS to extend it by three new components. The first component is to borrow the Tri-plane representation from EG3D and represent signed distance fields as a mixture of tri-planes and MLPs instead of representing it with MLPs only. Discretizing the scene space with Tri-planes leads to a more expressive data structure but involving tri-planes will introduce noise due to discrete discontinuities. The second component is to use a new type of positional encoding with learnable weights to combat noise in the reconstruction process. We divide the features in the tri-plane into multiple frequency bands and modulate them with sin and cos functions of different frequency. The third component is to use learnable convolution operations on the tri-plane features using self-attention convolution to produce features with different frequency. The experiments show that PET-NeuS achieves high-fidelity surface reconstruction on standard datasets. Following previous work and using the Chamfer metric as the most important way to measure surface reconstruction quality, we are able to improve upon the NeuS baseline by 25\\% on Nerf-synthetic (0.84 compared to 1.12) and by 14\\% on DTU (0.75 compared to 0.87). The qualitative evaluation reveals how our method can better control the interference of high-frequency noise.", "keywords": "Multi-view Surface Reconstruction;Neural Radiance Fields;Signed Distance Functions", "primary_area": "", "supplementary_material": "/attachment/026aed070472f84ae93900f90e25eddcd9077a30.zip", "author": "Yiqun Wang;Ivan Skorokhodov;Peter Wonka", "authorids": "~Yiqun_Wang1;~Ivan_Skorokhodov1;~Peter_Wonka1", "gender": "M;M;M", "homepage": ";https://universome.github.io/;http://peterwonka.net", "dblp": "71/2818-1;223/0010;98/5522", "google_scholar": "g55eWKgAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=0EKXSXgAAAAJ", "orcid": ";0000-0002-7611-9310;0000-0003-0627-9746", "linkedin": ";ivan-skorokhodov;", "or_profile": "~Yiqun_Wang1;~Ivan_Skorokhodov1;~Peter_Wonka1", "aff": "Chongqing University;KAUST;KAUST", "aff_domain": "cqu.edu.cn;kaust.edu.sa;kaust.edu.sa", "position": "Associate Professor;PhD student;Full Professor", "bibtex": "@misc{\nwang2023petneus,\ntitle={{PET}-NeuS: Positional Encoding Triplanes for Neural Surfaces},\nauthor={Yiqun Wang and Ivan Skorokhodov and Peter Wonka},\nyear={2023},\nurl={https://openreview.net/forum?id=MHXO5xRCSXh}\n}", "github": "", "project": "", "reviewers": "JSuc;xyVs;haKG;Ma2m", "site": "https://openreview.net/forum?id=MHXO5xRCSXh", "pdf_size": 21092628, "recommendation": "3;3;5;8", "confidence": "5;5;5;5", "correctness": "2;3;3;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "78;77;127;121", "wc_strength_and_weaknesses": "438;335;331;171", "wc_clarity_quality_novelty_and_reproducibility": "77;526;55;22", "wc_summary_review": "39;87;68;25", "wc_review": "632;1025;581;339", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 5.0, 0.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 100.75, 23.34925052330374 ], "wc_strength_and_weaknesses_avg": [ 318.75, 95.47872799739217 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 170.0, 206.46670433752752 ], "wc_summary_review_avg": [ 54.75, 24.23195204683271 ], "wc_review_avg": [ 644.25, 246.1192546307582 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8638684255813602, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14276326967624506299&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;1", "aff_unique_norm": "Chongqing University;King Abdullah University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cqu.edu.cn;https://www.kaust.edu.sa", "aff_unique_abbr": "CQU;KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;Saudi Arabia" }, { "id": "MHgYMtHpKsC", "title": "Learning Shareable Bases for Personalized Federated Image Classification", "track": "main", "status": "Reject", "tldr": "", "abstract": "Personalized federated learning (PFL) aims to leverage the collective wisdom of clients' data while constructing customized models that are tailored to individual client's data distributions. The existing work of PFL mostly aims to personalize for participating clients. In this paper, we focus on a less studied but practically important scenario---generating a personalized model for a new client efficiently. Different from most previous approaches that learn a whole or partial network for each client, we explicitly model the clients' overall meta distribution and embed each client into a low dimension space. We propose FedBasis, a novel PFL algorithm that learns a set of few, shareable basis models, upon which each client only needs to learn the coefficients for combining them into a personalized network. FedBasis is parameter-efficient, robust, and more accurate compared to other competitive PFL baselines, especially in a low data regime, without increasing the inference cost. To demonstrate its applicability, we further present a PFL evaluation protocol for image classification, featuring larger data discrepancies across clients in both the image and label spaces as well as more faithful training and test splits.", "keywords": "Federated learning;Computer vision", "primary_area": "", "supplementary_material": "", "author": "Hong-You Chen;Jike zhong;Mingda Zhang;Xuhui Jia;Hang Qi;Boqing Gong;Wei-Lun Chao;Li Zhang", "authorids": "~Hong-You_Chen1;zhong.523@osu.edu;~Mingda_Zhang1;~Xuhui_Jia1;~Hang_Qi1;~Boqing_Gong1;~Wei-Lun_Chao1;~Li_Zhang28", "gender": ";;M;M;;M;M;M", "homepage": "https://sites.google.com/view/hongyouc/%E9%A6%96%E9%A0%81;;https://people.cs.pitt.edu/~mzhang/;https://scholar.google.com/citations?view_op=search_authors&mauthors=xuhui+jia&hl=en&oi=ao;;http://boqinggong.info;https://sites.google.com/view/wei-lun-harry-chao;https://research.google/people/105588/", "dblp": "228/5569;;25/10133;116/8360;96/1046-1;29/7457;64/8842;", "google_scholar": "uxlU7J8AAAAJ;;4aIwj4QAAAAJ;https://scholar.google.com/citations?view_op=search_authors;72jdrSUAAAAJ;lv9ZeVUAAAAJ;PGKakWwAAAAJ;", "orcid": ";;;;;;0000-0003-1269-7231;", "linkedin": ";;;;;boqing-gong-46aa5821/;;", "or_profile": "~Hong-You_Chen1;zhong.523@osu.edu;~Mingda_Zhang1;~Xuhui_Jia1;~Hang_Qi1;~Boqing_Gong1;~Wei-Lun_Chao1;~Li_Zhang28", "aff": ";;Google DeepMind;Google;Google;Google;Ohio State University;Google", "aff_domain": ";;google.com;google.com;google.com;google.com;osu.edu;google.com", "position": ";;Software Engineer;Researcher;Researcher;Research Scientist;Assistant Professor;Software engineer", "bibtex": "@misc{\nchen2023learning,\ntitle={Learning Shareable Bases for Personalized Federated Image Classification},\nauthor={Hong-You Chen and Jike zhong and Mingda Zhang and Xuhui Jia and Hang Qi and Boqing Gong and Wei-Lun Chao and Li Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=MHgYMtHpKsC}\n}", "github": "", "project": "", "reviewers": "2JAA;ES1X;5bfz", "site": "https://openreview.net/forum?id=MHgYMtHpKsC", "pdf_size": 6360412, "recommendation": "5;5;8", "confidence": "4;5;3", "correctness": "3;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "3;2;2", "wc_summary_paper": "56;67;104", "wc_strength_and_weaknesses": "242;29;346", "wc_clarity_quality_novelty_and_reproducibility": "16;16;103", "wc_summary_review": "25;184;93", "wc_review": "339;296;646", "wc_reply_reviewers": "0;0;31", "wc_reply_authors": "124;124;86", "reply_reviewers": "0;0;1", "reply_authors": "2;2;2", "recommendation_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 75.66666666666667, 20.531818125912658 ], "wc_strength_and_weaknesses_avg": [ 205.66666666666666, 131.9402221546645 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.0, 41.012193308819754 ], "wc_summary_review_avg": [ 100.66666666666667, 65.13746148637425 ], "wc_review_avg": [ 427.0, 155.84821675805813 ], "wc_reply_reviewers_avg": [ 10.333333333333334, 14.613540144521982 ], "wc_reply_authors_avg": [ 111.33333333333333, 17.9133717900592 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Dlp2OV7arPgJ:scholar.google.com/&scioq=Learning+Shareable+Bases+for+Personalized+Federated+Image+Classification&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Google;Ohio State University", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.osu.edu", "aff_unique_abbr": "DeepMind;OSU", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Open-Vocabulary Object Detection upon Frozen Vision and Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11429", "id": "MIMwy4kh9lf", "poster": "/media/PosterPDFs/ICLR%202023/11429.png?t=1682115050.3578255", "openreview": "https://openreview.net/forum?id=MIMwy4kh9lf", "slides": "https://iclr.cc/virtual/2023/poster/11429", "video": "https://iclr.cc/virtual/2023/poster/11429", "author_site": "Weicheng Kuo, Yin Cui, Xiuye Gu, AJ Piergiovanni, Anelia Angelova", "tldr": "We propose a novel open-vocabulary detection approach by building upon frozen vision and language models.", "abstract": "We present F-VLM, a simple open-vocabulary object detection method built uponFrozenVision andLanguageModels. F-VLM simplifies the current multi-stage training pipeline by eliminating the need for knowledge distillation or detection-tailored pretraining. Surprisingly, we observe that a frozen VLM: 1) retains the locality-sensitive features necessary for detection, and 2) is a strong region classifier. We finetune only the detector head and combine the detector and VLM outputs for each region at inference time. F-VLM shows compelling scaling behavior and achieves +6.5 mask AP improvement over the previous state of theart on novel categories of LVIS open-vocabulary detection benchmark. In addition, we demonstrate very competitive results on COCO open-vocabulary detection benchmark and cross-dataset transfer detection, in addition to significant training speed-up and compute savings. Code will be released.\n", "keywords": "open-vocabulary recognition;object detection;vision and language", "primary_area": "", "supplementary_material": "", "author": "Weicheng Kuo;Yin Cui;Xiuye Gu;AJ Piergiovanni;Anelia Angelova", "authorids": "~Weicheng_Kuo1;~Yin_Cui1;~Xiuye_Gu1;~AJ_Piergiovanni1;~Anelia_Angelova1", "gender": "M;M;F;;", "homepage": "https://weichengkuo.github.io/;https://ycui.me/;https://laoreja.github.io/;http://homes.sice.indiana.edu/ajpiergi/;https://research.google/people/aneliaangelova/", "dblp": "163/2203;47/8023.html;199/1920;175/9876;46/3065", "google_scholar": ";iP5m52IAAAAJ;qCrypnoAAAAJ;https://scholar.google.com/citations?hl=en;nkmDOPgAAAAJ", "orcid": ";0000-0003-2882-2033;;;", "linkedin": ";;xiuyegu/;;", "or_profile": "~Weicheng_Kuo1;~Yin_Cui1;~Xiuye_Gu1;~AJ_Piergiovanni1;~Anelia_Angelova1", "aff": "Google Deepmind;Google;Google;Google;California Institute of Technology", "aff_domain": "google.com;google.com;google.com;google.com;caltech.edu", "position": "Research Scientist;Research Scientist;Researcher;Research Scientist;PhD student", "bibtex": "@inproceedings{\nkuo2023openvocabulary,\ntitle={Open-Vocabulary Object Detection upon Frozen Vision and Language Models},\nauthor={Weicheng Kuo and Yin Cui and Xiuye Gu and AJ Piergiovanni and Anelia Angelova},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=MIMwy4kh9lf}\n}", "github": "", "project": "", "reviewers": "e3Lv;x64o;kxyx", "pdf_size": 8465108, "recommendation": "8;8;8", "confidence": "5;5;4", "correctness": "3;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "4;2;3", "wc_summary_paper": "140;81;63", "wc_strength_and_weaknesses": "514;313;182", "wc_clarity_quality_novelty_and_reproducibility": "167;119;19", "wc_summary_review": "35;46;244", "wc_review": "856;559;508", "wc_reply_reviewers": "17;28;0", "wc_reply_authors": "793;944;568", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 94.66666666666667, 32.8870119584549 ], "wc_strength_and_weaknesses_avg": [ 336.3333333333333, 136.538964727615 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 101.66666666666667, 61.65134944905009 ], "wc_summary_review_avg": [ 108.33333333333333, 96.03587292719784 ], "wc_review_avg": [ 641.0, 153.44705927452634 ], "wc_reply_reviewers_avg": [ 15.0, 11.51810169544733 ], "wc_reply_authors_avg": [ 768.3333333333334, 154.48912223483208 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 228, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8055277125890470910&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=MIMwy4kh9lf", "email": "google.com;google.com;google.com;google.com;caltech.edu", "author_num": 5, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "DeepMind;Google;California Institute of Technology", "aff_unique_dep": "DeepMind;Google;", "aff_unique_url": "https://deepmind.com;https://www.google.com;https://www.caltech.edu", "aff_unique_abbr": "DeepMind;Google;Caltech", "aff_campus_unique_index": "1;1;1;2", "aff_campus_unique": ";Mountain View;Pasadena", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "MIy9IfYlecR", "title": "Learning Test Time Augmentation with Cascade Loss Prediction", "track": "main", "status": "Reject", "tldr": "", "abstract": "Data augmentation has been a successful common practice for improving the performance of deep neural network during training stage. In recent years, studies on test time augmentation (TTA) have also been promising due to its effectiveness on improving the robustness against out-of-distribution data at inference. Instead of simply adopting pre-defined handcrafted geometric operations such as croping and flipping, recent TTA methods learn predictive transformations which are supposed to provide the best performance gain on each test sample. However, the desired iteration number of transformation is proportional to the inference time of the predictor, and the gain by ensembling multiple augmented inputs still requires additional forward pass of the target model. In this paper, we propose a cascade method for test time augmentation prediction. It only requires a single forward pass of the transformation predictor, while can output multiple desirable transformations iteratively. These transformations will then be adopted sequentially on the test sample at once before the target model inference. The experimental results show that our method provides a better trade-off between computational cost and overall performance at test time, and shows significant improvement compared to existing methods.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Siyang Pan;Jiaqian Yu;Dongwook Lee;Qiang Wang;ChangBeom Park;ByungIn Yoo", "authorids": "~Siyang_Pan1;~Jiaqian_Yu1;~Dongwook_Lee1;~Qiang_Wang1;~ChangBeom_Park2;~ByungIn_Yoo1", "gender": "M;F;M;M;M;M", "homepage": ";;;;http://www.sait.samsung.co.kr;https://sites.google.com/site/architectmind/", "dblp": "250/5753;164/7325;25/6543-5;64/5630-23;;26/1322", "google_scholar": ";8f7l1dIAAAAJ;https://scholar.google.co.kr/citations?user=E_QibGEAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.co.kr/citations?user=JNgRAf8AAAAJ", "orcid": ";;0000-0003-1373-505X;0000-0001-5632-4408;;0000-0002-4065-7512", "linkedin": ";;;;;", "or_profile": "~Siyang_Pan1;~Jiaqian_Yu1;~Dongwook_Lee1;~Qiang_Wang1;~ChangBeom_Park2;~ByungIn_Yoo1", "aff": "Samsung;Samsung R&D Institute China - Beijing;Samsung Advanced Institute of Technology;Samsung;Sait;Samsung Advanced Institute of Technology", "aff_domain": "samsung.com;samsung.com;samsung.com;samsung.com;sait.samsung.co.kt;samsung.com", "position": "Researcher;Researcher;Researcher;Researcher;Principal Researcher;Research Master", "bibtex": "@misc{\npan2023learning,\ntitle={Learning Test Time Augmentation with Cascade Loss Prediction},\nauthor={Siyang Pan and Jiaqian Yu and Dongwook Lee and Qiang Wang and ChangBeom Park and ByungIn Yoo},\nyear={2023},\nurl={https://openreview.net/forum?id=MIy9IfYlecR}\n}", "github": "", "project": "", "reviewers": "N36J;yeBc;2Tn9;6hqy", "site": "https://openreview.net/forum?id=MIy9IfYlecR", "pdf_size": 761933, "recommendation": "3;3;3;3", "confidence": "3;4;4;3", "correctness": "3;3;3;3", "technical_novelty": "2;2;1;2", "empirical_novelty": "2;2;2;1", "wc_summary_paper": "40;75;50;90", "wc_strength_and_weaknesses": "297;96;58;128", "wc_clarity_quality_novelty_and_reproducibility": "27;8;274;35", "wc_summary_review": "57;28;66;31", "wc_review": "421;207;448;284", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "177;23;195;118", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 63.75, 19.803724397193573 ], "wc_strength_and_weaknesses_avg": [ 144.75, 91.3273644643269 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 86.0, 108.9839437715483 ], "wc_summary_review_avg": [ 45.5, 16.347782724271816 ], "wc_review_avg": [ 340.0, 98.80536422684752 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 128.25, 67.10951869891484 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2k_lw7q70OUJ:scholar.google.com/&scioq=Learning+Test+Time+Augmentation+with+Cascade+Loss+Prediction&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Samsung;SAIT Polytechnic", "aff_unique_dep": "Samsung;", "aff_unique_url": "https://www.samsung.com;https://www.sait.ca", "aff_unique_abbr": "Samsung;SAIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;1;0;0;2;0", "aff_country_unique": "South Korea;China;Canada" }, { "id": "MJSIkA72S4k", "title": "On the Implicit Bias Towards Depth Minimization in Deep Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent results in the literature suggest that the penultimate (second-to-last) layer representations of neural networks that are trained for classification exhibit a clustering property called neural collapse (NC). We study the implicit bias of stochastic gradient descent (SGD) in favor of low-depth solutions when training deep neural networks. We characterize a notion of effective depth that measures the first layer for which sample embeddings are separable using the nearest-class center classifier. Furthermore, we hypothesize and empirically show that SGD implicitly selects neural networks of small effective depths. \n\nSecondly, while neural collapse emerges even when generalization should be impossible - we argue that the \\emph{degree of separability} in the intermediate layers is related to generalization. We derive a generalization bound based on comparing the effective depth of the network with the minimal depth required to fit the same dataset with partially corrupted labels. Remarkably, this bound provides non-trivial estimations of the test performance. Finally, we empirically show that the effective depth of a trained neural network monotonically increases when increasing the number of random labels in data.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/66783fea7f2daf1017561f7cf932d3efb00c1d41.zip", "author": "Tomer Galanti;Liane Galanti;Ido Ben-Shaul", "authorids": "~Tomer_Galanti1;~Liane_Galanti1;~Ido_Ben-Shaul1", "gender": "M;;M", "homepage": "https://tomergalanti.github.io;;https://www.idobenshaul.com", "dblp": "198/1490;;270/8226", "google_scholar": ";;ArjvABYAAAAJ", "orcid": ";;", "linkedin": "tomer-galanti-5880b1104/;;ido-ben-shaul-482449147/", "or_profile": "~Tomer_Galanti1;~Liane_Galanti1;~Ido_Ben-Shaul1", "aff": "Massachusetts Institute of Technology;;eBay", "aff_domain": "mit.edu;;ebay.com", "position": "Postdoc;;Researcher", "bibtex": "@misc{\ngalanti2023on,\ntitle={On the Implicit Bias Towards Depth Minimization in Deep Neural Networks},\nauthor={Tomer Galanti and Liane Galanti and Ido Ben-Shaul},\nyear={2023},\nurl={https://openreview.net/forum?id=MJSIkA72S4k}\n}", "github": "", "project": "", "reviewers": "JHSQ;VKrE;nZgi", "site": "https://openreview.net/forum?id=MJSIkA72S4k", "pdf_size": 5659116, "recommendation": "3;5;6", "confidence": "3;2;4", "correctness": "2;3;4", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;2", "wc_summary_paper": "16;63;75", "wc_strength_and_weaknesses": "162;106;208", "wc_clarity_quality_novelty_and_reproducibility": "42;13;13", "wc_summary_review": "87;65;56", "wc_review": "307;247;352", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "556;508;343", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 51.333333333333336, 25.460208605237746 ], "wc_strength_and_weaknesses_avg": [ 158.66666666666666, 41.70797951897881 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 22.666666666666668, 13.670731102939918 ], "wc_summary_review_avg": [ 69.33333333333333, 13.021349989749739 ], "wc_review_avg": [ 302.0, 43.01162633521314 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 469.0, 91.22499657440389 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3273268353539886, "corr_recommendation_correctness": 0.9819805060619659, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mZI_dIeqs3gJ:scholar.google.com/&scioq=On+the+Implicit+Bias+Towards+Depth+Minimization+in+Deep+Neural+Networks&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Massachusetts Institute of Technology;eBay Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.ebay.com", "aff_unique_abbr": "MIT;eBay", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Multi-Grained Self-Interpretable Symbolic-Neural Model For Single/Multi-Labeled Text Classification", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12223", "id": "MLJ5TF5FtXH", "poster": "", "openreview": "https://openreview.net/forum?id=MLJ5TF5FtXH", "slides": "https://iclr.cc/virtual/2023/poster/12223", "video": "https://iclr.cc/virtual/2023/poster/12223", "author_site": "Xiang Hu, XinYu KONG, Kewei Tu", "tldr": "An inherently interpretable model architecture with explicit unsupervised label to constituent alignments.", "abstract": "Deep neural networks based on layer-stacking architectures have historically suffered from poor inherent interpretability. Meanwhile, symbolic probabilistic models function with clear interpretability, but how to combine them with neural networks to enhance their performance remains to be explored. In this paper, we try to marry these two systems for text classification via a structured language model. We propose a Symbolic-Neural model that can learn to explicitly predict class labels of text spans from a constituency tree without requiring any access to span-level gold labels. As the structured language model learns to predict constituency trees in a self-supervised manner, only raw texts and sentence-level labels are required as training data, which makes it essentially a general constituent-level self-interpretable classification model. Our experiments demonstrate that our approach could achieve good prediction accuracy in downstream tasks. Meanwhile, the predicted span labels are consistent with human rationales to a certain degree.", "keywords": "Interpretability;natural language processing;text classification;unsupervised learning;structured language model;multiple instance learning;recursive neural network", "primary_area": "", "supplementary_material": "/attachment/a5b8f9da5ac2bb1ffb016d227d86e09533069857.zip", "author": "Xiang Hu;XinYu KONG;Kewei Tu", "authorids": "~Xiang_Hu2;~XinYu_KONG1;~Kewei_Tu1", "gender": "M;M;M", "homepage": ";https://faculty.sist.shanghaitech.edu.cn/faculty/tukw/;https://imhuim982.github.io/", "dblp": ";22/918;", "google_scholar": ";5gi3Pm0AAAAJ;D4hnUZkAAAAJ", "orcid": "0000-0003-1251-0617;;0000-0001-7994-3121", "linkedin": ";;", "or_profile": "~XinYu_KONG1;~Kewei_Tu1;~Hu_Xiang1", "aff": "Alibaba Group;ShanghaiTech University;Alibaba Group", "aff_domain": "antgroup.com;shanghaitech.edu.cn;alibaba-inc.com", "position": "Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nhu2023a,\ntitle={A Multi-Grained Self-Interpretable Symbolic-Neural Model For Single/Multi-Labeled Text Classification},\nauthor={Xiang Hu and XinYu KONG and Kewei Tu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=MLJ5TF5FtXH}\n}", "github": "", "project": "", "reviewers": "AUue;1uUN;Vws4", "pdf_size": 2210670, "recommendation": "5;6;6", "confidence": "3;3;3", "correctness": "3;4;2", "technical_novelty": "3;3;3", "empirical_novelty": "2;4;3", "wc_summary_paper": "271;145;324", "wc_strength_and_weaknesses": "323;150;603", "wc_clarity_quality_novelty_and_reproducibility": "59;37;418", "wc_summary_review": "89;34;121", "wc_review": "742;366;1466", "wc_reply_reviewers": "25;0;379", "wc_reply_authors": "593;201;1180", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 246.66666666666666, 75.07477753694793 ], "wc_strength_and_weaknesses_avg": [ 358.6666666666667, 186.64821337359635 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 171.33333333333334, 174.6507626347188 ], "wc_summary_review_avg": [ 81.33333333333333, 35.92894221778438 ], "wc_review_avg": [ 858.0, 456.5026469437682 ], "wc_reply_reviewers_avg": [ 134.66666666666666, 173.0709552627348 ], "wc_reply_authors_avg": [ 658.0, 402.30916801219763 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6979470057347987379&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=MLJ5TF5FtXH", "email": "antgroup.com;shanghaitech.edu.cn;alibaba-inc.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Alibaba Group;ShanghaiTech University", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://www.shanghaitech.edu.cn", "aff_unique_abbr": "Alibaba;ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "MLStcoDEhqi", "title": "DREAM: Domain-free Reverse Engineering Attributes of Black-box Model", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep learning models are usually black boxes when deployed on machine learning platforms. Prior works have shown that the attributes (e.g., the number of convolutional layers) of a target black-box neural network can be exposed through a sequence of queries. There is a crucial limitation that these works assume the dataset used for training the target model to be known beforehand, and leverage this dataset for model attribute attack. However, it is difficult to access the training dataset of the target black-box model in reality. Therefore, whether the attributes of a target black-box model could be still revealed in this case is doubtful. In this paper, we investigate a new problem of Domain-free Reverse Engineering the Attributes of a black-box target Model, called DREAM, without requiring the model's training dataset available, and put forward a general and principled framework by casting this problem as an out of distribution (OOD) generalization problem. At the heart of our framework, we devise a multi-discriminator generative adversarial network (MDGAN) to learn domain invariant features. Based on these features, we can learn a domain-free model to inversely infer the attributes of a target black-box model with unknown training data. This makes our method one of the kinds that can gracefully apply to an arbitrary domain for model attribute reverse engineering with good generalization ability. Extensive experimental studies are conducted and the results validate the superiority of our proposed method over the baselines.", "keywords": "Model Attribute inference;domain-free method", "primary_area": "", "supplementary_material": "", "author": "Rongqing Li;Jiaqi Yu;Changsheng Li;Wenhan Luo;Ye Yuan;Guoren Wang", "authorids": "~Rongqing_Li1;~Jiaqi_Yu1;~Changsheng_Li4;~Wenhan_Luo1;~Ye_Yuan15;~Guoren_Wang2", "gender": ";M;M;M;;M", "homepage": ";https://github.com/Jacky980825;;https://whluo.github.io/;;https://guorenwang.github.io/", "dblp": ";;;64/9877;;", "google_scholar": ";;FfJnUioAAAAJ;g20Q12MAAAAJ;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;0000-0001-9789-7632;0000-0002-5697-4168;;", "linkedin": ";;;wenhan-luo-a1843480/;;", "or_profile": "~Rongqing_Li1;~Jiaqi_Yu1;~Changsheng_Li4;~Wenhan_Luo1;~Ye_Yuan15;~Guoren_Wang2", "aff": ";Beijing Institute of Technology;Beijing Institute of Technology;Sun Yat-sen University;;Beijing Institute of Technology", "aff_domain": ";bit.edu.cn;bit.edu.cn;sysu.edu.cn;;bit.edu.cn", "position": ";MS student;Full Professor;Associate Professor;;Full Professor", "bibtex": "@misc{\nli2023dream,\ntitle={{DREAM}: Domain-free Reverse Engineering Attributes of Black-box Model},\nauthor={Rongqing Li and Jiaqi Yu and Changsheng Li and Wenhan Luo and Ye Yuan and Guoren Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=MLStcoDEhqi}\n}", "github": "", "project": "", "reviewers": "iGeV;W21i;p3er;sNFK", "site": "https://openreview.net/forum?id=MLStcoDEhqi", "pdf_size": 1025038, "recommendation": "3;6;6;6", "confidence": "4;4;4;3", "correctness": "2;4;4;4", "technical_novelty": "1;3;3;2", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "90;73;99;62", "wc_strength_and_weaknesses": "338;107;441;206", "wc_clarity_quality_novelty_and_reproducibility": "36;39;113;21", "wc_summary_review": "44;63;68;75", "wc_review": "508;282;721;364", "wc_reply_reviewers": "283;0;0;0", "wc_reply_authors": "1629;325;1276;843", "reply_reviewers": "1;0;0;0", "reply_authors": "4;2;3;2", "recommendation_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 81.0, 14.404860290887934 ], "wc_strength_and_weaknesses_avg": [ 273.0, 126.97834461040985 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 52.25, 35.73076405564258 ], "wc_summary_review_avg": [ 62.5, 11.5 ], "wc_review_avg": [ 468.75, 166.5973814320021 ], "wc_reply_reviewers_avg": [ 70.75, 122.54259463549806 ], "wc_reply_authors_avg": [ 1018.25, 487.53429366558413 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:fXfV-V4jNFwJ:scholar.google.com/&scioq=DREAM:+Domain-free+Reverse+Engineering+Attributes+of+Black-box+Model&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Beijing Institute of Technology;Sun Yat-sen University", "aff_unique_dep": ";", "aff_unique_url": "http://www.bit.edu.cn/;http://www.sysu.edu.cn/", "aff_unique_abbr": "BIT;SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "MMBILyoRKQ", "title": "Iterative Relaxing Gradient Projection for Continual Learning", "track": "main", "status": "Reject", "tldr": "We propose a novel gradient projection approach to facilitate forward knowledge transfer within a fixed network capacity by iterative searching and relaxing the critical subspace of the frozen space.", "abstract": "A critical capability for intelligent systems is to continually learn given a sequence of tasks. An ideal continual learner should be able to avoid catastrophic forgetting and effectively leverage past learned experiences to master new knowledge. Among different continual learning algorithms, gradient projection approaches impose hard constraints on the optimization space for new tasks to minimize task interference, yet hinder forward knowledge transfer at the same time. Recent methods use expansion-based techniques to relax the constraints, but a growing network can be computationally expensive. Therefore, it remains a challenge whether we can improve forward knowledge transfer for gradient projection approaches \\textit{using a fixed network architecture}. In this work, we propose the Iterative Relaxing Gradient Projection (IRGP) framework. The basic idea is to iteratively search for the parameter subspaces most related to the current task and relax these parameters, then reuse the frozen spaces to facilitate forward knowledge transfer while consolidating previous knowledge. Our framework requires neither memory buffers nor extra parameters. Extensive experiments have demonstrated the superiority of our framework over several strong baselines. We also provide theoretical guarantees for our iterative relaxing strategies. ", "keywords": "continual learning;gradient projection methods", "primary_area": "", "supplementary_material": "/attachment/cb1af10b6242c389191fced251670ce8ffb9389f.zip", "author": "Zeyuan Yang;Zonghan Yang;Peng Li;Yang Liu", "authorids": "~Zeyuan_Yang3;~Zonghan_Yang1;~Peng_Li2;~Yang_Liu19", "gender": "M;M;M;M", "homepage": "https://miicheyang.github.io/;https://minicheshire.github.io/;http://www.lpeng.net/;http://nlp.csai.tsinghua.edu.cn/~ly/", "dblp": "260/6331-2.html;222/7860;83/6353-30;51/3710-5", "google_scholar": "k_qpTh4AAAAJ;rt9HOIUAAAAJ;hgYzkOQAAAAJ;https://scholar.google.com.hk/citations?user=lVhoKNcAAAAJ", "orcid": ";;0000-0003-1374-5979;0000-0002-3087-242X", "linkedin": ";;;", "or_profile": "~Zeyuan_Yang3;~Zonghan_Yang1;~Peng_Li2;~Yang_Liu19", "aff": ", Tsinghua University;Department of Computer Science and Technology, Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "cs.tsinghua.edu.cn;cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;PhD student;Associate Professor;Professor", "bibtex": "@misc{\nyang2023iterative,\ntitle={Iterative Relaxing Gradient Projection for Continual Learning},\nauthor={Zeyuan Yang and Zonghan Yang and Peng Li and Yang Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=MMBILyoRKQ}\n}", "github": "", "project": "", "reviewers": "AinZ;tNh3;Vp6m", "site": "https://openreview.net/forum?id=MMBILyoRKQ", "pdf_size": 973278, "recommendation": "5;6;6", "confidence": "4;4;3", "correctness": "2;2;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;2;1", "wc_summary_paper": "21;117;139", "wc_strength_and_weaknesses": "128;437;523", "wc_clarity_quality_novelty_and_reproducibility": "35;96;112", "wc_summary_review": "56;82;67", "wc_review": "240;732;841", "wc_reply_reviewers": "0;118;124", "wc_reply_authors": "716;715;664", "reply_reviewers": "0;1;1", "reply_authors": "2;2;2", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 92.33333333333333, 51.23366948490893 ], "wc_strength_and_weaknesses_avg": [ 362.6666666666667, 169.60804488257298 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 81.0, 33.1762967593833 ], "wc_summary_review_avg": [ 68.33333333333333, 10.656244908763854 ], "wc_review_avg": [ 604.3333333333334, 261.43747924801363 ], "wc_reply_reviewers_avg": [ 80.66666666666667, 57.09251750935105 ], "wc_reply_authors_avg": [ 698.3333333333334, 24.280765135299085 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zFh6ihNt160J:scholar.google.com/&scioq=Iterative+Relaxing+Gradient+Projection+for+Continual+Learning&hl=en&as_sdt=0,48", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "MMKqOJgRiw4", "title": "Pyramidal Denoising Diffusion Probabilistic Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recently, diffusion model have demonstrated impressive image generation performances, and have been extensively studied in various computer vision tasks. Unfortunately, training and evaluating diffusion models consume a lot of time and computational resources. To address this problem, here we present a novel pyramidal diffusion model that can generate high resolution images starting from much coarser resolution images using a {\\em single} score function trained with a positional embedding. This enables a neural network to be much lighter and also enables time-efficient image generation without compromising its performances. Furthermore, we show that the proposed approach can be also efficiently used for multi-scale super-resolution problem using a single score function.", "keywords": "Diffusion Model;Image Generation;Super Resolution", "primary_area": "", "supplementary_material": "", "author": "Dohoon Ryu;Jong Chul Ye", "authorids": "~Dohoon_Ryu1;~Jong_Chul_Ye1", "gender": ";M", "homepage": ";https://bispl.weebly.com/", "dblp": ";15/5613", "google_scholar": ";HNMjoNEAAAAJ", "orcid": ";", "linkedin": "dohoon-ryu-856b24199;", "or_profile": "~Dohoon_Ryu1;~Jong_Chul_Ye1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr", "position": "MS student;Full Professor", "bibtex": "@misc{\nryu2023pyramidal,\ntitle={Pyramidal Denoising Diffusion Probabilistic Models},\nauthor={Dohoon Ryu and Jong Chul Ye},\nyear={2023},\nurl={https://openreview.net/forum?id=MMKqOJgRiw4}\n}", "github": "", "project": "", "reviewers": "4NKS;VnnC;HWig;REZk", "site": "https://openreview.net/forum?id=MMKqOJgRiw4", "pdf_size": 9586353, "recommendation": "3;5;5;6", "confidence": "4;4;4;3", "correctness": "2;2;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "0;2;2;0", "wc_summary_paper": "58;86;79;85", "wc_strength_and_weaknesses": "458;340;319;206", "wc_clarity_quality_novelty_and_reproducibility": "7;130;18;25", "wc_summary_review": "53;23;31;61", "wc_review": "576;579;447;377", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "733;670;716;430", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 1.0, 1.0 ], "wc_summary_paper_avg": [ 77.0, 11.291589790636214 ], "wc_strength_and_weaknesses_avg": [ 330.75, 89.41301639023258 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.0, 49.49242366261729 ], "wc_summary_review_avg": [ 42.0, 15.524174696260024 ], "wc_review_avg": [ 494.75, 86.3781656438709 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 637.25, 121.85519069781148 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.6622661785325219, "corr_recommendation_correctness": 0.6882472016116854, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4487576247986640167&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "MMiaF8KppTZ", "title": "Logical view on fairness of a binary classification task", "track": "main", "status": "Reject", "tldr": "The fairness of a binary classifier is a logical phenomenon since its loss is not expressible in the first-order logic of a suitable model.", "abstract": "Ethical, Interpretable/Explainable, and Responsible AI are an active area of research and important social initiative. \n\nWe prove that, with no regards to data, fairness and trustworthiness are algorithmically undecidable for a basic machine learning task, the binary classification. Therefore, even the approach based on not only improving but fully solving the three usually assumed issues -- the insufficient quality of measurements, the complex consequences of (mis)measurements, and the limits of existing social theories -- is only heuristics. We show that, effectively, the fairness of a classifier is not even a (version of bias-variance) trade-off since it is a logical phenomenon. \nNamely, we reveal a language $L$ and an $L-$theory $T$ for binary classification task such that the very notion of loss is not expressible in the first-order logic formula in $L$. ", "keywords": "binary classification;fairness;first-order logic;decidability", "primary_area": "", "supplementary_material": "", "author": "Serge Berger", "authorids": "~Serge_Berger1", "gender": "M", "homepage": "", "dblp": "263/4744", "google_scholar": "tqhqtokAAAAJ", "orcid": "0000-0002-5044-5361", "linkedin": "serge-berger-0933bb25/", "or_profile": "~Serge_Berger1", "aff": "AIDIL", "aff_domain": "aidillic.com", "position": "Researcher", "bibtex": "@misc{\nberger2023logical,\ntitle={Logical view on fairness of a binary classification task},\nauthor={Serge Berger},\nyear={2023},\nurl={https://openreview.net/forum?id=MMiaF8KppTZ}\n}", "github": "", "project": "", "reviewers": "ExFJ;isj2;ZpSj", "site": "https://openreview.net/forum?id=MMiaF8KppTZ", "pdf_size": 227584, "recommendation": "1;3;5", "confidence": "1;3;2", "correctness": "1;2;4", "technical_novelty": "1;1;3", "empirical_novelty": "0;0;0", "wc_summary_paper": "7;28;254", "wc_strength_and_weaknesses": "1;152;89", "wc_clarity_quality_novelty_and_reproducibility": "1;27;143", "wc_summary_review": "1;47;38", "wc_review": "10;254;524", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 1.632993161855452 ], "confidence_avg": [ 2.0, 0.816496580927726 ], "correctness_avg": [ 2.3333333333333335, 1.247219128924647 ], "technical_novelty_avg": [ 1.6666666666666667, 0.9428090415820634 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 96.33333333333333, 111.81631763248551 ], "wc_strength_and_weaknesses_avg": [ 80.66666666666667, 61.92647970689832 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.0, 61.73059749157355 ], "wc_summary_review_avg": [ 28.666666666666668, 19.90533150244482 ], "wc_review_avg": [ 262.6666666666667, 209.9290885566415 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.9819805060619659, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5j81-p75jzAJ:scholar.google.com/&scioq=Logical+view+on+fairness+of+a+binary+classification+task&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "AIDIL", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "" }, { "id": "MND1kmmNy0O", "title": "Solving Math Word Problems with Process-based and Outcome-based Feedback", "track": "main", "status": "Reject", "tldr": "Both process- and outcome-based feedback with all the tricks achieve similar final-answer error rates and SOTA results, but generating accurate reasoning steps requires either process-based supervision, or a reward model that emulates it.", "abstract": "Recent work has shown that prompting language models to generate reasoning steps improves performance on many reasoning tasks. When moving beyond prompting, this raises the question of how we should supervise the finetuning of such models: outcome-based approaches which supervise the final result, or process-based approaches which supervise the reasoning process itself? Differences between these approaches might naturally be expected not just in final-answer errors but also in reasoning errors, which can be difficult to detect and are problematic in many real-world domains such as education. We run the first comprehensive comparison between process- and outcome-based approaches trained on a natural language task, GSM8K. We find that pure outcome-based supervision produces similar final-answer error rates with less label supervision. However, for correct reasoning steps we find it necessary to use process-based supervision or supervision from learned reward models that emulate process-based feedback. In total, we improve the previous best results from 16.8% $\\rightarrow$ 12.7% final-answer error and 14.0% $\\rightarrow$ 3.4% reasoning error among final-answer-correct solutions.", "keywords": "language models;reasoning;reward models", "primary_area": "", "supplementary_material": "", "author": "Jonathan Uesato;Nate Kushman;Ramana Kumar;H. Francis Song;Noah Yamamoto Siegel;Lisa Wang;Antonia Creswell;Geoffrey Irving;Irina Higgins", "authorids": "~Jonathan_Uesato1;~Nate_Kushman1;~Ramana_Kumar1;~H._Francis_Song1;~Noah_Yamamoto_Siegel1;~Lisa_Wang3;~Antonia_Creswell2;~Geoffrey_Irving2;~Irina_Higgins1", "gender": ";M;;;;F;F;M;F", "homepage": ";http://www.kushman.org;;;;;;https://naml.us;https://scholar.google.com/citations?user=YWVuCKUAAAAJ&hl=en", "dblp": "198/1298;;;150/6469;259/1484;;183/6675;95/4978;155/7461", "google_scholar": ";https://scholar.google.co.uk/citations?user=I_YIc0YAAAAJ;OyX1-qYAAAAJ;oVF9D6EAAAAJ;l2E0LR4AAAAJ;5KmYPkIAAAAJ;;TrdtzgwAAAAJ;YWVuCKUAAAAJ", "orcid": ";;;;0000-0002-5746-117X;;;;0000-0002-1890-2091", "linkedin": ";nate-kushman-2304502;;;noah-y-siegel-8751925b;;;geoffreyirving;https://uk.linkedin.com/in/irina-higgins-74455235", "or_profile": "~Jonathan_Uesato1;~Nate_Kushman1;~Ramana_Kumar1;~H._Francis_Song1;~Noah_Yamamoto_Siegel1;~Lisa_Wang3;~Antonia_Creswell2;~Geoffrey_Irving2;~Irina_Higgins1", "aff": "Google DeepMind;Google DeepMind;Google DeepMind;;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind", "aff_domain": "google.com;google.com;deepmind.com;;deepmind.com;deepmind.com;google.com;deepmind.com;google.com", "position": "Researcher;Researcher;Researcher;;Researcher;Researcher;Researcher;Safety Researcher;Staff Research Scientist", "bibtex": "@misc{\nuesato2023solving,\ntitle={Solving Math Word Problems with Process-based and Outcome-based Feedback},\nauthor={Jonathan Uesato and Nate Kushman and Ramana Kumar and H. Francis Song and Noah Yamamoto Siegel and Lisa Wang and Antonia Creswell and Geoffrey Irving and Irina Higgins},\nyear={2023},\nurl={https://openreview.net/forum?id=MND1kmmNy0O}\n}", "github": "", "project": "", "reviewers": "GLbN;pkRc;ytw5", "site": "https://openreview.net/forum?id=MND1kmmNy0O", "pdf_size": 511014, "recommendation": "3;3;5", "confidence": "4;3;4", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;0", "wc_summary_paper": "98;73;88", "wc_strength_and_weaknesses": "255;189;225", "wc_clarity_quality_novelty_and_reproducibility": "7;53;22", "wc_summary_review": "83;31;39", "wc_review": "443;346;374", "wc_reply_reviewers": "0;35;0", "wc_reply_authors": "556;339;223", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 86.33333333333333, 10.274023338281628 ], "wc_strength_and_weaknesses_avg": [ 223.0, 26.981475126464083 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.333333333333332, 19.154343864744856 ], "wc_summary_review_avg": [ 51.0, 22.861904265976328 ], "wc_review_avg": [ 387.6666666666667, 40.762182909598394 ], "wc_reply_reviewers_avg": [ 11.666666666666666, 16.49915822768611 ], "wc_reply_authors_avg": [ 372.6666666666667, 138.01529705877616 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8269799122311213503&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "MQ2IvNeZJD", "title": "Can you Trust your Disentanglement?", "track": "main", "status": "Reject", "tldr": "by exposing problems in disentanglment metrics, and introducing new metrics and a new task, we make the case that existing disentangled models actually produce representations that are largely entangled", "abstract": "There has been growing interest, in recent years, in learning disentangled representations of data. These are representations in which distinct features, such as size or shape, are represented by distinct neurons. Measuring disentanglement, i.e., quantifying the extent to which a given representation is disentangled, is not straightforward. Multiple metrics have been proposed. In this paper, we identify two failings of existing metrics, and show how they can assign a high score to a model which is still entangled. We then propose two new metrics which redress these problems. Additionally, we introduce the task of recognizing novel combinations of familiar features (NCFF), which we argue is doable if and only if the model is disentangled. As well as being desirable in itself, NCFF provides a tangible downstream task that can help focus the field of disentanglement research, in contrast to the set of bespoke metrics that are currently used. We then show empirically that existing methods perform poorly on our proposed metrics and fail at recognizing NCFF and so, we argue, are not disentangled.", "keywords": "deep learning;disentanglement", "primary_area": "", "supplementary_material": "/attachment/b894aa9ff6c92122b269f8a3c6fe5365ca21ae34.zip", "author": "Louis Mahon;Lei Sha;Thomas Lukasiewicz", "authorids": "~Louis_Mahon1;~Lei_Sha1;~Thomas_Lukasiewicz2", "gender": ";M;", "homepage": ";https://shalei120.github.io;https://www.cs.ox.ac.uk/people/thomas.lukasiewicz/", "dblp": ";93/3906;l/ThomasLukasiewicz", "google_scholar": "https://scholar.google.co.uk/citations?hl=en;https://scholar.google.com.hk/citations?user=EbZ_P6gAAAAJ;arjucpEAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Louis_Mahon1;~Lei_Sha1;~Thomas_Lukasiewicz2", "aff": "School of Informatics, University of Edinburgh;Beihang University;Department of Computer Science, University of Oxford", "aff_domain": "inf.ed.ac.uk;buaa.edu.cn;cs.ox.ac.uk", "position": "Postdoc;Full Professor;Full Professor", "bibtex": "@misc{\nmahon2023can,\ntitle={Can you Trust your Disentanglement?},\nauthor={Louis Mahon and Lei Sha and Thomas Lukasiewicz},\nyear={2023},\nurl={https://openreview.net/forum?id=MQ2IvNeZJD}\n}", "github": "", "project": "", "reviewers": "DhWb;kR7C;JbiH;HoHs", "site": "https://openreview.net/forum?id=MQ2IvNeZJD", "pdf_size": 180725, "recommendation": "1;3;6;8", "confidence": "3;4;4;3", "correctness": "2;2;4;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "38;97;60;104", "wc_strength_and_weaknesses": "74;665;143;77", "wc_clarity_quality_novelty_and_reproducibility": "410;101;34;48", "wc_summary_review": "51;42;59;67", "wc_review": "573;905;296;296", "wc_reply_reviewers": "0;218;35;0", "wc_reply_authors": "1080;1973;459;54", "reply_reviewers": "0;1;1;0", "reply_authors": "2;3;2;1", "recommendation_avg": [ 4.5, 2.692582403567252 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 1.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 74.75, 27.012728481217888 ], "wc_strength_and_weaknesses_avg": [ 239.75, 247.062112635669 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 148.25, 153.17371674017707 ], "wc_summary_review_avg": [ 54.75, 9.283722313813572 ], "wc_review_avg": [ 517.5, 250.6795763519637 ], "wc_reply_reviewers_avg": [ 63.25, 90.48031553879551 ], "wc_reply_authors_avg": [ 891.5, 723.4702827345433 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9284766908852594, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Yyct8JH0uNgJ:scholar.google.com/&scioq=Can+you+Trust+your+Disentanglement%3F&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Edinburgh;Beihang University;University of Oxford", "aff_unique_dep": "School of Informatics;;Department of Computer Science", "aff_unique_url": "https://www.ed.ac.uk;http://www.buaa.edu.cn/;https://www.ox.ac.uk", "aff_unique_abbr": "Edinburgh;BUAA;Oxford", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Edinburgh;;Oxford", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;China" }, { "title": "Simplifying Model-based RL: Learning Representations, Latent-space Models, and Policies with One Objective", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11780", "id": "MQcmfgRxf7a", "poster": "/media/PosterPDFs/ICLR%202023/11780.png?t=1680795211.0348854", "openreview": "https://openreview.net/forum?id=MQcmfgRxf7a", "slides": "https://iclr.cc/virtual/2023/poster/11780", "video": "https://iclr.cc/virtual/2023/poster/11780", "author_site": "Raj Ghugare, Homanga Bharadhwaj, Benjamin Eysenbach, Sergey Levine, Russ Salakhutdinov", "tldr": "We present a joint objective for latent space model based RL which lower bounds the RL objective. Maximising this bound jointly with the encoder, model and the policy matches the performance of SOTA methods, while being 6-10 times faster. ", "abstract": "While reinforcement learning (RL) methods that learn an internal model of the environment have the potential to be more sample efficient than their model-free counterparts, learning to model raw observations from high dimensional sensors can be challenging.\nPrior work has addressed this challenge by learning low-dimensional representation of observations through auxiliary objectives, such as reconstruction or value prediction. However, the alignment between these auxiliary objectives and the RL objective is often unclear.\nIn this work, we propose a single objective which jointly optimizes a latent-space model and policy to achieve high returns while remaining self-consistent. This objective is a lower bound on expected returns. Unlike prior bounds for model-based RL on policy exploration or model guarantees, our bound is directly on the overall RL objective. We demonstrate that the resulting algorithm matches or improves the sample-efficiency of the best prior model-based and model-free RL methods. While sample efficient methods typically are computationally demanding, our method attains the performance of SAC in about 50\\% less wall-clock time.\n", "keywords": "Latent-space models;objective mismatch;model based RL", "primary_area": "", "supplementary_material": "/attachment/6d53cd85ef5c7ec3515a591ebfe84bf144da501d.zip", "author": "Raj Ghugare;Homanga Bharadhwaj;Benjamin Eysenbach;Sergey Levine;Russ Salakhutdinov", "authorids": "~Raj_Ghugare1;~Homanga_Bharadhwaj1;~Benjamin_Eysenbach1;~Sergey_Levine1;~Russ_Salakhutdinov1", "gender": "M;M;M;M;M", "homepage": "https://github.com/RajGhugare19;https://homangab.github.io/;https://ben-eysenbach.github.io/;https://people.eecs.berkeley.edu/~svlevine/;https://www.cs.cmu.edu/~rsalakhu/", "dblp": ";223/5842;192/1863;80/7594;", "google_scholar": "hzxdkrIAAAAJ;https://scholar.google.ca/citations?user=wwW4HRQAAAAJ;DRnOvU8AAAAJ;8R35rCwAAAAJ;", "orcid": ";;0009-0000-7136-6307;;", "linkedin": "raj-ghugare-917137169/;;benjamin-eysenbach-a7235775/;;", "or_profile": "~Raj_Ghugare1;~Homanga_Bharadhwaj1;~Benjamin_Eysenbach1;~Sergey_Levine1;~Russ_Salakhutdinov1", "aff": "Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;Meta Facebook;Carnegie Mellon University;Google;School of Computer Science, Carnegie Mellon University", "aff_domain": "mila.umontreal.ca;facebook.com;cmu.edu;google.com;cs.cmu.edu", "position": "Intern;Visiting Researcher;PhD student;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nghugare2023simplifying,\ntitle={Simplifying Model-based {RL}: Learning Representations, Latent-space Models, and Policies with One Objective},\nauthor={Raj Ghugare and Homanga Bharadhwaj and Benjamin Eysenbach and Sergey Levine and Russ Salakhutdinov},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=MQcmfgRxf7a}\n}", "github": "", "project": "", "reviewers": "km6q;PJmH;JaMR;RXsd;11kg", "pdf_size": 1564120, "recommendation": "6;6;6;6;8", "confidence": "2;4;3;3;5", "correctness": "3;4;3;3;4", "technical_novelty": "3;3;3;3;3", "empirical_novelty": "3;3;3;3;3", "wc_summary_paper": "40;48;92;69;70", "wc_strength_and_weaknesses": "356;692;67;111;108", "wc_clarity_quality_novelty_and_reproducibility": "2;33;320;121;64", "wc_summary_review": "29;33;55;375;51", "wc_review": "427;806;534;676;293", "wc_reply_reviewers": "108;632;0;373;0", "wc_reply_authors": "1064;1470;715;219;327", "reply_reviewers": "1;4;0;1;0", "reply_authors": "3;5;2;1;1", "recommendation_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 63.8, 18.312837027615355 ], "wc_strength_and_weaknesses_avg": [ 266.8, 235.86555492483424 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 108.0, 113.05750749065716 ], "wc_summary_review_avg": [ 108.6, 133.5748479317869 ], "wc_review_avg": [ 547.2, 180.44766554322612 ], "wc_reply_reviewers_avg": [ 222.6, 245.95251574236843 ], "wc_reply_authors_avg": [ 759.0, 464.4794936270061 ], "reply_reviewers_avg": [ 1.2, 1.4696938456699067 ], "reply_authors_avg": [ 2.4, 1.4966629547095764 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.7844645405527362, "corr_recommendation_correctness": 0.6123724356957947, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7394169693844525047&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=MQcmfgRxf7a", "email": "mila.umontreal.ca;facebook.com;cmu.edu;google.com;cs.cmu.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;2", "aff_unique_norm": "University of Montreal;Meta;Carnegie Mellon University;Google", "aff_unique_dep": "Montreal Institute for Learning Algorithms;Meta Platforms, Inc.;;Google", "aff_unique_url": "https://www.mila.quebec;https://meta.com;https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "MILA;Meta;CMU;Google", "aff_campus_unique_index": "0;2;3", "aff_campus_unique": "Montreal;;Mountain View;Pittsburgh", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "Canada;United States" }, { "id": "MR8pqi9R7xP", "title": "Watch What You Pretrain For: Targeted, Transferable Adversarial Examples on Self-Supervised Speech Recognition models", "track": "main", "status": "Reject", "tldr": "We show that recent Self-supervised ASR model are uniquely vulnerable to adversarial attacks requiring no model access", "abstract": "A targeted adversarial attack produces audio samples that can force an Automatic Speech Recognition (ASR) system to output attacker-chosen text. To exploit ASR models in real-world, black-box settings, an adversary can leverage the transferability property, i.e. that an adversarial sample produced for a proxy ASR can also fool a different remote ASR. However recent work has shown that transferability against large ASR models is very difficult. In this work, we show that modern ASR architectures, specifically ones based on Self-Supervised Learning, are in fact vulnerable to transferability. We successfully demonstrate this phenomenon by evaluating state-of-the-art self-supervised ASR models like Wav2Vec2, HuBERT, Data2Vec and WavLM. We show that with low-level additive noise achieving a 30dB Signal-Noise Ratio, we can achieve target transferability with up to 80\\% accuracy. Next, we 1) use an ablation study to show that Self-Supervised learning is the main cause of that phenomenon, and 2) we provide an explanation for this phenomenon. Through this we show that modern ASR architectures are uniquely vulnerable to adversarial security threats.", "keywords": "Speech recognition;adversarial attacks;self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/4e01ceb33b51e805ebe081f889cab938c2d53b51.zip", "author": "Raphael Olivier;Hadi Abdullah;Bhiksha Raj", "authorids": "~Raphael_Olivier1;~Hadi_Abdullah1;~Bhiksha_Raj1", "gender": "M;;M", "homepage": ";https://hadiabdullah.github.io/;https://www.cs.cmu.edu/directory/bhikshar/", "dblp": "225/7698;205/2013;60/3996", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Raphael_Olivier1;~Hadi_Abdullah1;~Bhiksha_Raj1", "aff": "Carnegie Mellon University;VISA;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "cmu.edu;visa.com;mbzuai.ac.ae", "position": "PhD student;Researcher;Full Professor", "bibtex": "@misc{\nolivier2023watch,\ntitle={Watch What You Pretrain For: Targeted, Transferable Adversarial Examples on Self-Supervised Speech Recognition models},\nauthor={Raphael Olivier and Hadi Abdullah and Bhiksha Raj},\nyear={2023},\nurl={https://openreview.net/forum?id=MR8pqi9R7xP}\n}", "github": "", "project": "", "reviewers": "xki8;6Mf8;fvn3;5s8w", "site": "https://openreview.net/forum?id=MR8pqi9R7xP", "pdf_size": 444708, "recommendation": "3;6;6;6", "confidence": "3;4;4;4", "correctness": "2;3;3;3", "technical_novelty": "2;4;3;2", "empirical_novelty": "2;4;3;2", "wc_summary_paper": "75;28;130;68", "wc_strength_and_weaknesses": "220;268;258;259", "wc_clarity_quality_novelty_and_reproducibility": "25;8;24;38", "wc_summary_review": "49;43;27;53", "wc_review": "369;347;439;418", "wc_reply_reviewers": "0;398;14;102", "wc_reply_authors": "557;992;383;990", "reply_reviewers": "0;2;1;1", "reply_authors": "1;2;1;2", "recommendation_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 75.25, 36.34126442489309 ], "wc_strength_and_weaknesses_avg": [ 251.25, 18.45772196128222 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 23.75, 10.638961415476606 ], "wc_summary_review_avg": [ 43.0, 9.899494936611665 ], "wc_review_avg": [ 393.25, 36.85359548266627 ], "wc_reply_reviewers_avg": [ 128.5, 160.4330078256965 ], "wc_reply_authors_avg": [ 730.5, 267.6663034451666 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6249725177255003418&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Carnegie Mellon University;VISA;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;https://www.visa.com;https://mbzuai.ac.ae", "aff_unique_abbr": "CMU;VISA;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;United Arab Emirates" }, { "id": "MRfbe7VAoqu", "title": "Sequential Brick Assembly with Efficient Constraint Satisfaction", "track": "main", "status": "Reject", "tldr": "We address the problem of generating a sequence of LEGO brick assembly with high-fidelity structures, satisfying physical constraints between bricks.", "abstract": "We address the problem of generating a sequence of LEGO brick assembly with high-fidelity structures, satisfying physical constraints between bricks. The assembly problem is challenging since the number of possible structures increases exponentially with the number of available bricks, complicating the physical constraints to satisfy across bricks. To tackle this problem, our method performs a brick structure assessment to predict the next brick position and its confidence by employing a U-shaped sparse 3D convolutional network. The convolution filter efficiently validates physical constraints in a parallelizable and scalable manner, allowing to process of different brick types. To generate a novel structure, we devise a sampling strategy to determine the next brick position by considering attachable positions under physical constraints. Instead of using handcrafted brick assembly datasets, our model is trained with a large number of 3D objects that allow to create a new high-fidelity structure. We demonstrate that our method successfully generates diverse brick structures while handling two different brick types and outperforms existing methods based on Bayesian optimization, graph generative model, and reinforcement learning, all of which are limited to a single brick type.", "keywords": "combinatorial problem;brick assembly", "primary_area": "", "supplementary_material": "", "author": "Seokjun Ahn;Jungtaek Kim;Minsu Cho;Jaesik Park", "authorids": "~Seokjun_Ahn1;~Jungtaek_Kim1;~Minsu_Cho1;~Jaesik_Park3", "gender": "Not Specified;M;M;M", "homepage": ";https://jungtaekkim.github.io;http://cvlab.postech.ac.kr/~mcho/;http://jaesik.info", "dblp": "300/8777;31/3193-1;;00/10336", "google_scholar": "JDEvLgwAAAAJ;KXNUYWgAAAAJ;5TyoF5QAAAAJ;_3q6KBIAAAAJ", "orcid": ";0000-0002-1905-1399;;", "linkedin": ";jungtaekkim;minsu-cho-062b3750/;", "or_profile": "~Seokjun_Ahn1;~Jungtaek_Kim1;~Minsu_Cho1;~Jaesik_Park3", "aff": "POSTECH;University of Pittsburgh;POSTECH;Pohang University of Science and Technology", "aff_domain": "postech.ac.kr;pitt.edu;postech.ac.kr;postech.edu", "position": "MS student;Postdoc;Associate Professor;Associate Professor", "bibtex": "@misc{\nahn2023sequential,\ntitle={Sequential Brick Assembly with Efficient Constraint Satisfaction},\nauthor={Seokjun Ahn and Jungtaek Kim and Minsu Cho and Jaesik Park},\nyear={2023},\nurl={https://openreview.net/forum?id=MRfbe7VAoqu}\n}", "github": "", "project": "", "reviewers": "LQmJ;khxJ;sD14;3PGp", "site": "https://openreview.net/forum?id=MRfbe7VAoqu", "pdf_size": 2705253, "recommendation": "3;5;5;6", "confidence": "4;3;3;4", "correctness": "2;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "75;84;72;57", "wc_strength_and_weaknesses": "352;337;46;259", "wc_clarity_quality_novelty_and_reproducibility": "107;43;893;41", "wc_summary_review": "48;33;69;59", "wc_review": "582;497;1080;416", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "781;657;795;415", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 72.0, 9.72111104761179 ], "wc_strength_and_weaknesses_avg": [ 248.5, 122.12800661600926 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 271.0, 360.0916549991127 ], "wc_summary_review_avg": [ 52.25, 13.36740438529485 ], "wc_review_avg": [ 643.75, 258.617840645227 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 662.0, 152.38438240187213 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.2294157338705618, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16537475375620203850&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Pohang University of Science and Technology;University of Pittsburgh", "aff_unique_dep": ";", "aff_unique_url": "https://www.postech.ac.kr;https://www.pitt.edu", "aff_unique_abbr": "POSTECH;Pitt", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pohang;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "South Korea;United States" }, { "id": "MT1Pcdo8sGG", "title": "Automatically Answering and Generating Machine Learning Final Exams", "track": "main", "status": "Reject", "tldr": "", "abstract": "Can a machine learn machine learning? We propose to answer this question using the same criteria we use to answer a similar question: can a human learn machine learning? We automatically answer final exams in MIT's recent large machine learning course and generate new questions at a human level. Recently, program synthesis and few-shot learning solved university-level problem set questions in mathematics and STEM courses at a human level. In this work, we solve questions from final exams that differ from problem sets in several ways: the questions are longer, have multiple parts, are more complicated, and span a broader set of topics. We provide a new dataset and benchmark of questions from machine learning final exams and code for automatically answering these questions and generating new questions. To make our dataset a reproducible benchmark, we use automatic checkers for multiple choice questions, questions with numeric answers, and questions with expression answers, and evaluate a large free language model, Meta\u2019s OPT, and compare the results with Open AI\u2019s GPT-3 and Codex. A student survey comparing the quality, appropriateness, and difficulty of machine-generated questions with human-written questions shows that across multiple aspects, machine-generated questions are indistinguishable from human-generated questions and are suitable for final exams. We perform ablation studies comparing zero-shot learning with few-shot learning, chain-of-thought prompting, GPT-3 and OPT pre-trained on text and Codex fine-tuned on code on a range of machine learning topics and find that few-shot learning methods perform best. We make our data and code publicly available for the machine learning community.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/d5be8e192f2165d4fea2460c059f41badd334181.zip", "author": "Sarah Zhang;Reece S Shuttleworth;Zad Chin;Pedro Lantigua;Saisamrit Surbehera;Gregory Hunter;Derek Austin;Yann Hicke;Leonard Tang;Sathwik Karnik;Darnell Granberry;Iddo Drori", "authorids": "~Sarah_Zhang1;~Reece_S_Shuttleworth1;zadchin@college.harvard.edu;lantigua@mit.edu;ss6365@columbia.edu;geh2129@columbia.edu;~Derek_Austin1;~Yann_Hicke1;~Leonard_Tang1;~Sathwik_Karnik1;darnellg@mit.edu;~Iddo_Drori1", "gender": "F;M;;;;;M;;M;;;M", "homepage": ";https://reeceshuttle.me;;;;;;;http://leonardtang.me/;;;https://www.cs.columbia.edu/~idrori", "dblp": ";;;;;;;;306/7940;;;86/2557", "google_scholar": ";J1d4PXYAAAAJ;;;;;;;18ZQFjEAAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;;;;;;0000-0001-9797-3885", "linkedin": "sarahzzhang/;reece-shuttleworth-8ab69a220;;;;;derekaustin22/;yannhicke;leonard-tang/;sathwik-karnik-a72768172/;;iddodrori", "or_profile": "~Sarah_Zhang1;~Reece_S_Shuttleworth1;zadchin@college.harvard.edu;lantigua@mit.edu;ss6365@columbia.edu;geh2129@columbia.edu;~Derek_Austin1;~Yann_Hicke1;~Leonard_Tang1;~Sathwik_Karnik1;darnellg@mit.edu;~Iddo_Drori1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;;;;;Columbia University;Cornell University;Harvard University;Computer Science and Artificial Intelligence Laboratory, Electrical Engineering & Computer Science;;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;;;;;columbia.edu;cornell.edu;harvard.edu;csail.mit.edu;;mit.edu", "position": "Undergrad student;Undergrad student;;;;;MS student;MS student;Undergrad student;MS student;;Visiting Associate Professor", "bibtex": "@misc{\nzhang2023automatically,\ntitle={Automatically Answering and Generating Machine Learning Final Exams},\nauthor={Sarah Zhang and Reece S Shuttleworth and Zad Chin and Pedro Lantigua and Saisamrit Surbehera and Gregory Hunter and Derek Austin and Yann Hicke and Leonard Tang and Sathwik Karnik and Darnell Granberry and Iddo Drori},\nyear={2023},\nurl={https://openreview.net/forum?id=MT1Pcdo8sGG}\n}", "github": "", "project": "", "reviewers": "ffgb;FkVg;bhJL", "site": "https://openreview.net/forum?id=MT1Pcdo8sGG", "pdf_size": 248098, "recommendation": "3;5;8", "confidence": "4;4;4", "correctness": "3;4;3", "technical_novelty": "2;4;3", "empirical_novelty": "2;4;3", "wc_summary_paper": "44;176;45", "wc_strength_and_weaknesses": "55;108;61", "wc_clarity_quality_novelty_and_reproducibility": "11;13;46", "wc_summary_review": "27;41;32", "wc_review": "137;338;184", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 88.33333333333333, 61.99103877891456 ], "wc_strength_and_weaknesses_avg": [ 74.66666666666667, 23.697163449568293 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 23.333333333333332, 16.048537489614297 ], "wc_summary_review_avg": [ 33.333333333333336, 5.792715732327589 ], "wc_review_avg": [ 219.66666666666666, 85.84611555309625 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 12, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.1147078669352809, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18092835230883229230&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;1;2;3;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Columbia University;Cornell University;Harvard University", "aff_unique_dep": ";;;", "aff_unique_url": "https://web.mit.edu;https://www.columbia.edu;https://www.cornell.edu;https://www.harvard.edu", "aff_unique_abbr": "MIT;Columbia;Cornell;Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "MT2l4ziaxeE", "title": "Know Your Boundaries: The Advantage of Explicit Behavior Cloning in Offline RL", "track": "main", "status": "Reject", "tldr": "", "abstract": "We introduce an offline reinforcement learning (RL) algorithm that explicitly clones a behavior policy to constrain value learning. In offline RL, it is often important to prevent a policy from selecting unobserved actions, since the consequence of these actions cannot be presumed without additional information about the environment. One straightforward way to implement such a constraint is to explicitly model a given data distribution via behavior cloning and directly force a policy not to select uncertain actions. However, many offline RL methods instantiate the constraint indirectly---for example, pessimistic value estimation---due to a concern about errors when modeling a potentially complex behavior policy. In this work, we argue that it is not only viable but beneficial to explicitly model the behavior policy for offline RL because the constraint can be realized in a stable way with the trained model. We first suggest a theoretical framework that allows us to incorporate behavior-cloned models into value-based offline RL methods, enjoying the strength of both explicit behavior cloning and value learning. Then, we propose a practical method utilizing a score-based generative model for behavior cloning. With the proposed method, we show state-of-the-art performance on several datasets within the D4RL and Robomimic benchmarks and achieve competitive performance across all datasets tested.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/0ea0f6f5c83c35a9278debb7916e2ad95408abf4.zip", "author": "Wonjoon Goo;Scott Niekum", "authorids": "~Wonjoon_Goo1;~Scott_Niekum1", "gender": ";M", "homepage": "http://dev.wonjoon.me;https://people.cs.umass.edu/~sniekum/index.php", "dblp": "185/7860;62/8399", "google_scholar": "AmBlcsMAAAAJ;4wXYfSUAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Wonjoon_Goo1;~Scott_Niekum1", "aff": "University of Texas, Austin;University of Massachusetts at Amherst", "aff_domain": "cs.utexas.edu;umass.edu", "position": "PhD student;Associate Professor", "bibtex": "@misc{\ngoo2023know,\ntitle={Know Your Boundaries: The Advantage of Explicit Behavior Cloning in Offline {RL}},\nauthor={Wonjoon Goo and Scott Niekum},\nyear={2023},\nurl={https://openreview.net/forum?id=MT2l4ziaxeE}\n}", "github": "", "project": "", "reviewers": "8gEu;8jQf;piV5;CL7N", "site": "https://openreview.net/forum?id=MT2l4ziaxeE", "pdf_size": 331647, "recommendation": "3;6;6;8", "confidence": "3;4;4;3", "correctness": "2;3;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "74;76;72;111", "wc_strength_and_weaknesses": "910;226;125;23", "wc_clarity_quality_novelty_and_reproducibility": "196;59;154;38", "wc_summary_review": "53;32;73;18", "wc_review": "1233;393;424;190", "wc_reply_reviewers": "638;0;0;0", "wc_reply_authors": "4696;1058;686;0", "reply_reviewers": "6;0;0;0", "reply_authors": "9;2;1;0", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 83.25, 16.08376510646683 ], "wc_strength_and_weaknesses_avg": [ 321.0, 347.55071572361925 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 111.75, 65.39256456203564 ], "wc_summary_review_avg": [ 44.0, 20.868636754709208 ], "wc_review_avg": [ 560.0, 398.8151200744525 ], "wc_reply_reviewers_avg": [ 159.5, 276.2621038072359 ], "wc_reply_authors_avg": [ 1610.0, 1821.673406513912 ], "reply_reviewers_avg": [ 1.5, 2.598076211353316 ], "reply_authors_avg": [ 3.0, 3.5355339059327378 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.14002800840280097, "corr_recommendation_correctness": 0.9901475429766743, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jm_uLbdapcMJ:scholar.google.com/&scioq=Know+Your+Boundaries:+The+Advantage+of+Explicit+Behavior+Cloning+in+Offline+RL&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of Texas at Austin;University of Massachusetts Amherst", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.umass.edu", "aff_unique_abbr": "UT Austin;UMass Amherst", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Austin;Amherst", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "The Provable Benefit of Unsupervised Data Sharing for Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11875", "id": "MTTPLcwvqTt", "poster": "", "openreview": "https://openreview.net/forum?id=MTTPLcwvqTt", "slides": "https://iclr.cc/virtual/2023/poster/11875", "video": "https://iclr.cc/virtual/2023/poster/11875", "author_site": "Hao Hu, Yiqin Yang, Qianchuan Zhao, Chongjie Zhang", "tldr": "We propose a principled way to leverage unlabeled offline RL dataset with guarantees in linear MDPs and it outperforms previous methods.", "abstract": "Self-supervised methods have become crucial for advancing deep learning by leveraging data itself to reduce the need for expensive annotations. However, the question of how to conduct self-supervised offline reinforcement learning (RL) in a principled way remains unclear.\nIn this paper, we address this issue by investigating the theoretical benefits of utilizing reward-free data in linear Markov Decision Processes (MDPs) within a semi-supervised setting. Further, we propose a novel, Provable Data Sharing algorithm (PDS) to utilize such reward-free data for offline RL. PDS uses additional penalties on the reward function learned from labeled data to prevent overestimation, ensuring a conservative algorithm. Our results on various offline RL tasks demonstrate that PDS significantly improves the performance of offline RL algorithms with reward-free data. Overall, our work provides a promising approach to leveraging the benefits of unlabeled data in offline RL while maintaining theoretical guarantees. We believe our findings will contribute to developing more robust self-supervised RL methods.\n", "keywords": "offline reinforcement learning;unsupervised learning;data sharing", "primary_area": "", "supplementary_material": "/attachment/94993f64f73aff53249593c4838ac169f493cca6.zip", "author": "Hao Hu;Yiqin Yang;Qianchuan Zhao;Chongjie Zhang", "authorids": "~Hao_Hu3;~Yiqin_Yang1;~Qianchuan_Zhao1;~Chongjie_Zhang1", "gender": "M;M;M;", "homepage": "https://mousehu.github.io;https://www.researchgate.net/profile/Yiqin-Yang-2;;", "dblp": "67/6924-6;180/7725;82/3427;29/6693", "google_scholar": "https://scholar.google.com/citations?hl=en;aHTi5IEAAAAJ;;LjxqXycAAAAJ", "orcid": ";;0000-0002-7952-5621;", "linkedin": "hao-hu-tsinghua;;;", "or_profile": "~Hao_Hu3;~Yiqin_Yang1;~Qianchuan_Zhao1;~Chongjie_Zhang1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nhu2023the,\ntitle={The Provable Benefit of Unsupervised Data Sharing for Offline Reinforcement Learning},\nauthor={Hao Hu and Yiqin Yang and Qianchuan Zhao and Chongjie Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=MTTPLcwvqTt}\n}", "github": "", "project": "", "reviewers": "ZyTC;5Qve;dc2J", "pdf_size": 1664071, "recommendation": "6;6;8", "confidence": "4;3;4", "correctness": "3;3;3", "technical_novelty": "3;2;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "74;59;88", "wc_strength_and_weaknesses": "1067;288;507", "wc_clarity_quality_novelty_and_reproducibility": "92;123;42", "wc_summary_review": "52;64;87", "wc_review": "1285;534;724", "wc_reply_reviewers": "186;0;49", "wc_reply_authors": "2381;1142;897", "reply_reviewers": "3;0;1", "reply_authors": "5;4;3", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 73.66666666666667, 11.841546445554409 ], "wc_strength_and_weaknesses_avg": [ 620.6666666666666, 328.0247280651601 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 85.66666666666667, 33.369979855486214 ], "wc_summary_review_avg": [ 67.66666666666667, 14.522013940527977 ], "wc_review_avg": [ 847.6666666666666, 318.8210922898435 ], "wc_reply_reviewers_avg": [ 78.33333333333333, 78.7160861719015 ], "wc_reply_authors_avg": [ 1473.3333333333333, 649.5641273619992 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 4.0, 0.816496580927726 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.49999999999999983, "corr_recommendation_correctness": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8053085275239257811&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=MTTPLcwvqTt", "email": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "MW0hjtzYRkW", "title": "RISC-V MICROARCHITECTURE EXPLORATION VIA REINFORCEMENT LEARNING", "track": "main", "status": "Withdraw", "tldr": "Microarchitecture design space exploration via reinforcement learning for RISC-V processors", "abstract": "Microarchitecture determines a processor's detailed structure, affecting the processor's performance, power, and area (PPA).\nDeciding on a microarchitecture to achieve a good balance between the PPA values is a non-trivial problem.\nPrevious arts mainly require expert knowledge.\nThe solution becomes inefficient as nowadays processors become increasingly complicated.\nMachine learning has solved problems automatically with high-quality results via reduced access to domain knowledge.\nIn this paper, we formulate the problem as a Markov decision process and propose an end-to-end solution framework via reinforcement learning.\nFirstly, a dynamically-weighted reward design is proposed to accommodate the optimization of multiple negatively-correlated objectives.\nSecondly, local heuristic search is adopted in the action design with prior knowledge of microarchitectures.\nThirdly, lightweight calibrated PPA models are incorporated to accelerate the learning process.\nExperimenting with electronic design automation (EDA) tools on famous RISC-V processors demonstrate that our methodology can learn from experience and outperform human implementations and previous arts' solutions in PPA and overall running time.", "keywords": "Design Space Exploration;Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Chen Bai;Jianwang Zhai;Yuzhe Ma;Wenlong Lyu;Zhitang Chen;Bei Yu;Martin D. Wong", "authorids": "~Chen_Bai1;~Jianwang_Zhai1;~Yuzhe_Ma2;~Wenlong_Lyu1;~Zhitang_Chen1;~Bei_Yu2;~Martin_D._Wong1", "gender": ";;M;M;M;M;M", "homepage": "https://baichen318.github.io/;;https://yuzhe-ma.com;;;http://www.cse.cuhk.edu.hk/~byu/index.html;https://www.ece.illinois.edu/directory/profile/mdfwong/", "dblp": ";;172/4863;219/4148;06/10875;28/4556-1.html;", "google_scholar": "eEQsWDIAAAAJ;;;;;tGneTm4AAAAJ;https://scholar.google.com.tw/citations?user=WPhoQiUAAAAJ", "orcid": "0000-0002-1742-0090;;;;;0000-0001-6406-4810;", "linkedin": "chen-bai-b48b95152/;;;;;yubei/;", "or_profile": "~Chen_Bai1;~Jianwang_Zhai1;~Yuzhe_Ma2;~Wenlong_Lyu1;~Zhitang_Chen1;~Bei_Yu2;~Martin_D._Wong1", "aff": "Department of Computer Science and Engineering, The Chinese University of Hong Kong;;The Hong Kong University of Science and Technology (Guangzhou);;Huawei Technologies Ltd.;Department of Computer Science and Engineering, The Chinese University of Hong Kong;University of Illinois, Urbana Champaign", "aff_domain": "cse.cuhk.edu.hk;;hkust-gz.edu.cn;;huawei.com;cse.cuhk.edu.hk;", "position": "PhD student;;Assistant Professor;;Researcher;Associate Professor;", "bibtex": "@misc{\nbai2023riscv,\ntitle={{RISC}-V {MICROARCHITECTURE} {EXPLORATION} {VIA} {REINFORCEMENT} {LEARNING}},\nauthor={Chen Bai and Jianwang Zhai and Yuzhe Ma and Wenlong Lyu and Zhitang Chen and Bei Yu and Martin D. Wong},\nyear={2023},\nurl={https://openreview.net/forum?id=MW0hjtzYRkW}\n}", "github": "", "project": "", "reviewers": "m1U6;av5H;cmZi;A5zz", "site": "https://openreview.net/forum?id=MW0hjtzYRkW", "pdf_size": 2230257, "recommendation": "3;3;3;5", "confidence": "4;4;3;3", "correctness": "2;2;2;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "132;92;202;30", "wc_strength_and_weaknesses": "253;109;109;93", "wc_clarity_quality_novelty_and_reproducibility": "44;324;123;120", "wc_summary_review": "57;31;158;19", "wc_review": "486;556;592;262", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 114.0, 62.465990746965666 ], "wc_strength_and_weaknesses_avg": [ 141.0, 64.99230723708769 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 152.75, 103.81564188502617 ], "wc_summary_review_avg": [ 66.25, 54.72373799367145 ], "wc_review_avg": [ 474.0, 128.195163715329 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Jf_T4LadFigJ:scholar.google.com/&scioq=RISC-V+MICROARCHITECTURE+EXPLORATION+VIA+REINFORCEMENT+LEARNING&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Chinese University of Hong Kong;Hong Kong University of Science and Technology;Huawei;University of Illinois Urbana-Champaign", "aff_unique_dep": "Department of Computer Science and Engineering;;Huawei Technologies;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.ust.hk;https://www.huawei.com;https://illinois.edu", "aff_unique_abbr": "CUHK;HKUST;Huawei;UIUC", "aff_campus_unique_index": "0;1;0;3", "aff_campus_unique": "Hong Kong SAR;Guangzhou;;Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "MWGDhOQkr3", "title": "Towards Reliable Link Prediction with Robust Graph Information Bottleneck", "track": "main", "status": "Reject", "tldr": "We provide an information-theory-guided principle and its two instantiations for robust link prediction under inherent edge noise.", "abstract": "Link prediction on graphs has achieved great success with the rise of deep graph learning. However, the potential robustness under the edge noise is less investigated. We reveal that the inherent edge noise that naturally perturbs both input topology and target label leads to severe performance degradation and representation collapse. Here, we propose an information-theory guided principle, Robust Graph Information Bottleneck (RGIB), to extract reliable supervision signals and avoid representation collapse. Different from the general information bottleneck, RGIB decouples and balances the mutual dependence among graph topology, edge label, and representation, building a new learning objective for robust representation. We also provide two implementations, RGIB-SSL and RGIB-REP, that benefit from different methodologies, i.e., self-supervised learning and data reparametrization, for indirect and direct data denoising, respectively. Extensive experiments on six benchmarks with various scenarios verify the effectiveness of the proposed RGIB.", "keywords": "Robust link prediction;Inherent edge noise;Graph representation learning", "primary_area": "", "supplementary_material": "", "author": "Zhanke Zhou;Jiangchao Yao;Jiaxu Liu;Xiawei Guo;LI He;Shuo Yuan;quanming yao;Liang Wang;Bo Han", "authorids": "~Zhanke_Zhou1;~Jiangchao_Yao1;~Jiaxu_Liu1;~Xiawei_Guo2;~LI_He2;~Shuo_Yuan1;~quanming_yao1;~Liang_Wang15;~Bo_Han1", "gender": "M;M;M;M;M;M;M;M;M", "homepage": "https://andrewzhou924.github.io/;https://sunarker.github.io/;;;;;https://lars-group.github.io/;;https://bhanml.github.io/", "dblp": "285/5311;166/5900;;185/1356.html;;;158/1014;;241/0472-3", "google_scholar": "GVXErr0AAAAJ;w8oDh9QAAAAJ;;;YBcGfoIAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/schhp?hl=en;3hcLUEAAAAAJ;nTNjqHwAAAAJ", "orcid": ";;;;;;;;", "linkedin": ";;jiaxu-liu-984379aa/;;;;;;", "or_profile": "~Zhanke_Zhou1;~Jiangchao_Yao1;~Jiaxu_Liu1;~Xiawei_Guo2;~LI_He2;~Shuo_Yuan1;~quanming_yao1;~Liang_Wang15;~bo_han2", "aff": "Hong Kong Baptist University;Shanghai Artificial Intelligence Laboratory;Alibaba Group;;;;Department of Electronic Engineering;Alibaba Group;RIKEN", "aff_domain": "hkbu.edu.hk;pjlab.org.cn;alibaba-inc.com;;;;tsinghua.edu.cn;alibaba-inc.com;riken.jp", "position": "PhD student;Researcher;Researcher;;;;Assistant Professor;Senior Tech Expert;Adjunct Scientist", "bibtex": "@misc{\nzhou2023towards,\ntitle={Towards Reliable Link Prediction with Robust Graph Information Bottleneck},\nauthor={Zhanke Zhou and Jiangchao Yao and Jiaxu Liu and Xiawei Guo and LI He and Shuo Yuan and quanming yao and Liang Wang and Bo Han},\nyear={2023},\nurl={https://openreview.net/forum?id=MWGDhOQkr3}\n}", "github": "", "project": "", "reviewers": "sr13;m5JE;zAFv;gCKh", "site": "https://openreview.net/forum?id=MWGDhOQkr3", "pdf_size": 3602816, "recommendation": "5;5;6;6", "confidence": "4;4;3;3", "correctness": "3;2;3;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;2;3;0", "wc_summary_paper": "35;38;57;69", "wc_strength_and_weaknesses": "373;114;130;221", "wc_clarity_quality_novelty_and_reproducibility": "8;435;51;65", "wc_summary_review": "16;279;24;17", "wc_review": "432;866;262;372", "wc_reply_reviewers": "0;347;0;196", "wc_reply_authors": "3458;3350;1245;1341", "reply_reviewers": "0;3;0;1", "reply_authors": "12;10;6;5", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 49.75, 13.953046262375826 ], "wc_strength_and_weaknesses_avg": [ 209.5, 102.84089653440405 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 139.75, 171.75181949545689 ], "wc_summary_review_avg": [ 84.0, 112.62548557054038 ], "wc_review_avg": [ 483.0, 229.37523841949462 ], "wc_reply_reviewers_avg": [ 135.75, 145.87044765818743 ], "wc_reply_authors_avg": [ 2348.5, 1056.7356575795102 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 8.25, 2.8613807855648994 ], "replies_avg": [ 43, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17512038218823428276&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;2;4", "aff_unique_norm": "Hong Kong Baptist University;Shanghai Artificial Intelligence Laboratory;Alibaba Group;Institution Name Not Provided;RIKEN", "aff_unique_dep": ";;;Department of Electronic Engineering;", "aff_unique_url": "https://www.hkbu.edu.hk;http://www.shailab.org/;https://www.alibaba.com;;https://www.riken.jp", "aff_unique_abbr": "HKBU;Shanghai AI Lab;Alibaba;;RIKEN", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;2", "aff_country_unique": "China;;Japan" }, { "id": "MWoZh1gvbxA", "title": "Hidden Poison: Machine unlearning enables camouflaged poisoning attacks", "track": "main", "status": "Reject", "tldr": "We show that machine unlearning can be used to implement a new type of camouflaged data poisoning attack. ", "abstract": "We introduce camouflaged data poisoning attacks, a new attack vector that arises in the context of machine unlearning and other settings when model retraining may be induced. An adversary first adds a few carefully crafted points to the training dataset such that the impact on the model's predictions is minimal. The adversary subsequently triggers a request to remove a subset of the introduced points at which point the attack is unleashed and the model's predictions are negatively affected. In particular, we consider clean-label targeted attacks (in which the goal is to cause the model to misclassify a specific test point) on datasets including CIFAR-10, Imagenette, and Imagewoof. This attack is realized by constructing camouflage datapoints that mask the effect of a poisoned dataset.", "keywords": "Machine Unlearning;Poisoning Attack;Camouflaging Poisons", "primary_area": "", "supplementary_material": "/attachment/09fdc10491b5210f61fba033e8d52f8f33904153.zip", "author": "Jimmy Z. Di;Jack Douglas;Jayadev Acharya;Gautam Kamath;Ayush Sekhari", "authorids": "~Jimmy_Z._Di1;~Jack_Douglas2;~Jayadev_Acharya2;~Gautam_Kamath1;~Ayush_Sekhari1", "gender": "M;M;M;M;M", "homepage": ";;https://people.ece.cornell.edu/acharya/;http://www.gautamkamath.com/;https://ayush.sekhari.com/", "dblp": ";;74/5865;73/11140;203/8152", "google_scholar": ";;70vJVxcAAAAJ;MK6zHkYAAAAJ;jH9i188AAAAJ", "orcid": ";;;;", "linkedin": "jimmy-di-0319/;jack-douglas-910896150/;;;", "or_profile": "~Jimmy_Z._Di1;~Jack_Douglas2;~Jayadev_Acharya2;~Gautam_Kamath1;~Ayush_Sekhari1", "aff": "University of Waterloo;University of Waterloo;Cornell University;University of Waterloo;Massachusetts Institute of Technology", "aff_domain": "uwaterloo.ca;uwaterloo.ca;cornell.edu;uwaterloo.ca;mit.edu", "position": "MS student;Undergrad student;Assistant Professor;Assistant Professor;Postdoc", "bibtex": "@misc{\ndi2023hidden,\ntitle={Hidden Poison: Machine unlearning enables camouflaged poisoning attacks},\nauthor={Jimmy Z. Di and Jack Douglas and Jayadev Acharya and Gautam Kamath and Ayush Sekhari},\nyear={2023},\nurl={https://openreview.net/forum?id=MWoZh1gvbxA}\n}", "github": "", "project": "", "reviewers": "yC6u;XcYH;xqqh", "site": "https://openreview.net/forum?id=MWoZh1gvbxA", "pdf_size": 44404701, "recommendation": "5;6;6", "confidence": "4;5;2", "correctness": "3;3;3", "technical_novelty": "3;2;2", "empirical_novelty": "3;4;2", "wc_summary_paper": "116;95;63", "wc_strength_and_weaknesses": "500;384;38", "wc_clarity_quality_novelty_and_reproducibility": "1;214;15", "wc_summary_review": "4;46;13", "wc_review": "621;739;129", "wc_reply_reviewers": "0;207;0", "wc_reply_authors": "1547;1289;429", "reply_reviewers": "0;2;0", "reply_authors": "3;4;2", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 91.33333333333333, 21.791945504908202 ], "wc_strength_and_weaknesses_avg": [ 307.3333333333333, 196.2470098852181 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 76.66666666666667, 97.27738117820034 ], "wc_summary_review_avg": [ 21.0, 18.05547008526779 ], "wc_review_avg": [ 496.3333333333333, 264.173343764195 ], "wc_reply_reviewers_avg": [ 69.0, 97.58073580374356 ], "wc_reply_authors_avg": [ 1088.3333333333333, 477.9688506819479 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.18898223650461363, "corr_recommendation_correctness": 0.0, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4576916714254473890&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "University of Waterloo;Cornell University;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://uwaterloo.ca;https://www.cornell.edu;https://web.mit.edu", "aff_unique_abbr": "UW;Cornell;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "Canada;United States" }, { "id": "MXoeggsH7yP", "title": "Improving the Latent Space of Image Style Transfer", "track": "main", "status": "Withdraw", "tldr": "We find a widespread problem in style transfer caused by the inappropriate pre-trained encoders to provide supervision signals and design a training scheme to alleviate this problem. ", "abstract": "Existing neural style transfer studies utilize statistical information of features from a pre-trained encoder as representations of the style and achieve significant improvement in synthesizing artistic images. However, in some cases, the feature statistics from the pre-trained encoder may not be consistent with the visual style we perceived. The style distance between some images of different styles is small than that of the same style. In such an inappropriate latent space, the objective function of the existing methods will be optimized in the wrong direction, resulting in bad stylization results. In addition, the lack of content details in the features extracted by the pre-trained encoder also leads to the content leak problem. In order to solve these issues in the latent space used by style transfer, we propose two contrastive training schemes to get a refined encoder that is more suitable for this task. The style contrastive loss pulls the stylized result closer to the same visual style image and pushes it away from the content image. The content contrastive loss enables the encoder to retain more available details. The training scheme can be directly added to existing style transfer methods and significantly improve their results. Extensive experimental results demonstrate the effectiveness and superiority of our methods. ", "keywords": "style transfer;contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Yunpeng Bai;Cairong Wang;Chun Yuan", "authorids": "~Yunpeng_Bai1;~Cairong_Wang1;~Chun_Yuan1", "gender": "M;M;M", "homepage": ";https://www.sigs.tsinghua.edu.cn/fg3/105064.jhtml;https://github.com/branchCode", "dblp": ";;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=fYdxi2sAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yunpeng_Bai1;~Chun_Yuan1;~Wang_Cairong1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;Full Professor;MS student", "bibtex": "@misc{\nbai2023improving,\ntitle={Improving the Latent Space of Image Style Transfer},\nauthor={Yunpeng Bai and Cairong Wang and Chun Yuan},\nyear={2023},\nurl={https://openreview.net/forum?id=MXoeggsH7yP}\n}", "github": "", "project": "", "reviewers": "LFK7;7BRF;ChmK;w12z", "site": "https://openreview.net/forum?id=MXoeggsH7yP", "pdf_size": 5171019, "recommendation": "3;3;3;5", "confidence": "4;4;5;4", "correctness": "3;3;1;3", "technical_novelty": "3;2;2;2", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "67;76;92;55", "wc_strength_and_weaknesses": "225;442;118;423", "wc_clarity_quality_novelty_and_reproducibility": "57;20;48;55", "wc_summary_review": "66;12;60;47", "wc_review": "415;550;318;580", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 72.5, 13.5 ], "wc_strength_and_weaknesses_avg": [ 302.0, 136.03859746410208 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.0, 14.815532390029054 ], "wc_summary_review_avg": [ 46.25, 20.932928605429293 ], "wc_review_avg": [ 465.75, 105.54234932007151 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iretqhxUX2IJ:scholar.google.com/&scioq=Improving+the+Latent+Space+of+Image+Style+Transfer&hl=en&as_sdt=0,14", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Does Zero-Shot Reinforcement Learning Exist?", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11424", "id": "MYEap_OcQI", "poster": "/media/PosterPDFs/ICLR%202023/11424.png?t=1681920807.4594228", "openreview": "https://openreview.net/forum?id=MYEap_OcQI", "slides": "https://iclr.cc/virtual/2023/poster/11424", "video": "https://iclr.cc/virtual/2023/poster/11424", "author_site": "Ahmed Touati, J\u00e9r\u00e9my Rapin, Yann Ollivier", "tldr": "We revisit zero-shot RL based on successor representations, we introduce improved losses and new models and evaluate them systematically on the unsupervised RL benchmark.", "abstract": "A zero-shot RL agent is an agent that can solve any RL task in a given environment, instantly with no additional planning or learning, after an initial reward-free learning phase. This marks a shift from the reward-centric RL paradigm towards controllable agents that can follow arbitrary instructions in an environment. Current RL agents can solve families of related tasks at best, or require planning anew for each task. Strategies for approximate zero-shot RL have been suggested using successor features (SFs) (Borsa et al., 2018) or forward-backward (FB) representations (Touati & Ollivier, 2021), but testing has been limited. \nAfter clarifying the relationships between these schemes, we introduce improved losses and new SF models, and test the viability of zero-shot RL schemes systematically on tasks from the Unsupervised RL benchmark (Laskin et al., 2021). To disentangle universal representation learning from exploration, we work in an offline setting and repeat the tests on several existing replay buffers.\nSFs appear to suffer from the choice of the elementary state features. SFs with Laplacian eigenfunctions do well, while SFs based on auto-encoders, inverse curiosity, transition models, low-rank transition matrix, contrastive learning, or diversity (APS), perform unconsistently. In contrast, FB representations jointly learn the elementary and successor features from a single, principled criterion. They perform best and consistently across the board, reaching $85\\%$ of supervised RL performance with a good replay buffer, in a zero-shot manner.", "keywords": "controllable agents;zero-shot RL;self-supervised representation learning;successor representation;offline RL", "primary_area": "", "supplementary_material": "", "author": "Ahmed Touati;J\u00e9r\u00e9my Rapin;Yann Ollivier", "authorids": "~Ahmed_Touati1;~J\u00e9r\u00e9my_Rapin1;~Yann_Ollivier2", "gender": "M;M;M", "homepage": ";;http://www.yann-ollivier.org/rech/", "dblp": "147/5871;133/8584;63/343", "google_scholar": "https://scholar.google.fr/citations?user=D4LT5xAAAAAJ;tQ8DdN8AAAAJ;", "orcid": ";;", "linkedin": "ahmed-touati-4a132a76/;j%C3%A9r%C3%A9my-rapin-13851613/;", "or_profile": "~Ahmed_Touati1;~J\u00e9r\u00e9my_Rapin1;~Yann_Ollivier2", "aff": "Meta Facebook;Meta Facebook;Meta Artificial Intelligence Research", "aff_domain": "fb.com;meta.com;meta.com", "position": "Researcher;Research Engineer;Research scientist", "bibtex": "@inproceedings{\ntouati2023does,\ntitle={Does Zero-Shot Reinforcement Learning Exist?},\nauthor={Ahmed Touati and J{\\'e}r{\\'e}my Rapin and Yann Ollivier},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=MYEap_OcQI}\n}", "github": "", "project": "", "reviewers": "zaXC;uRSG;UcNw;NRQ8", "pdf_size": 996535, "recommendation": "3;8;8;10", "confidence": "3;4;4;4", "correctness": "2;4;4;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;4;3", "wc_summary_paper": "59;95;74;55", "wc_strength_and_weaknesses": "56;463;1177;222", "wc_clarity_quality_novelty_and_reproducibility": "402;61;68;60", "wc_summary_review": "56;54;56;52", "wc_review": "573;673;1375;389", "wc_reply_reviewers": "0;861;0;0", "wc_reply_authors": "461;1111;799;79", "reply_reviewers": "0;3;0;0", "reply_authors": "1;3;1;1", "recommendation_avg": [ 7.25, 2.5860201081971503 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 70.75, 15.690363284513205 ], "wc_strength_and_weaknesses_avg": [ 479.5, 427.9126663233983 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 147.75, 146.8236612402783 ], "wc_summary_review_avg": [ 54.5, 1.6583123951777 ], "wc_review_avg": [ 752.5, 373.5568899110281 ], "wc_reply_reviewers_avg": [ 215.25, 372.8239363292008 ], "wc_reply_authors_avg": [ 612.5, 384.336766391143 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.9488474727161108, "corr_recommendation_correctness": 0.9488474727161108, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14054233643517801817&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=MYEap_OcQI", "email": "fb.com;meta.com;meta.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "MZFDUB40NJ", "title": "Uncertainty-aware off policy learning", "track": "main", "status": "Reject", "tldr": "We consider the estimation uncertainty of logging policy, and proposed a new estimator for improved off-policy learning by controlling the effect of inaccurate estimation of logging policy.", "abstract": "Off-policy learning, referring to the procedure of policy optimization with access only to logged feedback data, has shown importance in various real-world applications, such as search engines, recommender systems, etc. While the ground-truth logging policy, which generates the logged data, is usually unknown, previous work directly takes its estimated value in off-policy learning, resulting in a biased estimator. This estimator has both high bias and variance on samples with small and inaccurate estimated logging probabilities. \nIn this work, we explicitly model the uncertainty in the estimated logging policy and propose a novel \\underline{U}ncertainty-aware \\underline{I}nverse \\underline{P}ropensity \\underline{S}core estimator (UIPS) for improved off-policy learning. Experiment results on synthetic and three real-world recommendation datasets demonstrate the advantageous sample efficiency of the proposed UIPS estimator.", "keywords": "off-policy learning;uncertainty", "primary_area": "", "supplementary_material": "/attachment/e9a65ffd3e287223aefe3f550880ebce45327b93.zip", "author": "Xiaoying Zhang;Junpu Chen;Hongning Wang;Hong Xie;Hang Li", "authorids": "~Xiaoying_Zhang3;~Junpu_Chen1;~Hongning_Wang1;~Hong_Xie2;~Hang_Li4", "gender": "F;M;M;M;M", "homepage": "https://github.com/Xiaoyinggit;https://sites.google.com/view/junpu-chen;http://www.cs.virginia.edu/~hw5x/;https://hongxie.github.io/;https://hangli-hl.github.io/", "dblp": "46/7725;72/3514;05/6545;39/3657-4;https://dblp.org/pers/hd/l/Li_0001:Hang", "google_scholar": "lwKg4C4AAAAJ;;qkdvKNoAAAAJ;https://scholar.google.com/citations?view_op=list_works;nTl5mSwAAAAJ", "orcid": ";;0000-0002-6524-9195;0000-0001-7935-7210;0000-0001-9628-3487", "linkedin": ";;;;hang-li-84aa6314/", "or_profile": "~Xiaoying_Zhang3;~Junpu_Chen1;~Hongning_Wang1;~Hong_Xie2;~Hang_Li4", "aff": "ByteDance AILab;ChongQing University;University of Virginia;Chongqing Institute of Green and Intelligent Technology, Chinese Academy of Sciences;ByteDance Technology", "aff_domain": "bytedance.com;cqu.edu.cn;virginia.edu;cigit.ac.cn;bytedance.com", "position": "Researcher;MS student;Associate Professor;Researcher;Head of Research", "bibtex": "@misc{\nzhang2023uncertaintyaware,\ntitle={Uncertainty-aware off policy learning},\nauthor={Xiaoying Zhang and Junpu Chen and Hongning Wang and Hong Xie and Hang Li},\nyear={2023},\nurl={https://openreview.net/forum?id=MZFDUB40NJ}\n}", "github": "", "project": "", "reviewers": "4K6H;NQXb;DgbC;FTLw", "site": "https://openreview.net/forum?id=MZFDUB40NJ", "pdf_size": 1031330, "recommendation": "3;5;6;8", "confidence": "4;4;3;3", "correctness": "4;3;4;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "102;91;87;49", "wc_strength_and_weaknesses": "296;348;317;382", "wc_clarity_quality_novelty_and_reproducibility": "103;167;25;90", "wc_summary_review": "40;106;65;66", "wc_review": "541;712;494;587", "wc_reply_reviewers": "0;0;29;0", "wc_reply_authors": "590;984;696;691", "reply_reviewers": "0;0;1;0", "reply_authors": "1;2;2;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 82.25, 19.967160539245434 ], "wc_strength_and_weaknesses_avg": [ 335.75, 32.48364973336586 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 96.25, 50.415151492383714 ], "wc_summary_review_avg": [ 69.25, 23.636571240347024 ], "wc_review_avg": [ 583.5, 81.14955329513528 ], "wc_reply_reviewers_avg": [ 7.25, 12.55736835487436 ], "wc_reply_authors_avg": [ 740.25, 146.9462061436089 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8320502943378437, "corr_recommendation_correctness": -0.5547001962252291, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YMM8ziHNq_QJ:scholar.google.com/&scioq=Uncertainty-aware+off+policy+learning&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "ByteDance;Chongqing University;University of Virginia;Chinese Academy of Sciences", "aff_unique_dep": "AILab;;;Institute of Green and Intelligent Technology", "aff_unique_url": "https://ailab.bytedance.com/;https://www.cqu.edu.cn/;https://www.virginia.edu;http://www.cas.cn/", "aff_unique_abbr": "ByteDance AILab;CQU;UVA;CAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chongqing", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Real-time variational method for learning neural trajectory and its dynamics", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10849", "id": "M_MvkWgQSt", "poster": "", "openreview": "https://openreview.net/forum?id=M_MvkWgQSt", "slides": "https://iclr.cc/virtual/2023/poster/10849", "video": "https://iclr.cc/virtual/2023/poster/10849", "author_site": "Matthew Dowling, Yuan Zhao, Il Memming Park", "tldr": "A real-time variational Bayesian method aimed at uncovering latent neural trajectories and their dynamical systems.", "abstract": "Latent variable models have become instrumental in computational neuroscience for reasoning about neural computation. This has fostered the development of powerful offline algorithms for extracting latent neural trajectories from neural recordings. However, despite the potential of real-time alternatives to give immediate feedback to experimentalists, and enhance experimental design, they have received markedly less attention. In this work, we introduce the exponential family variational Kalman filter (eVKF), an online recursive Bayesian method aimed at inferring latent trajectories while simultaneously learning the dynamical system generating them. eVKF works for arbitrary likelihoods and utilizes the constant base measure exponential family to model the latent state stochasticity. We derive a closed-form variational analog to the predict step of the Kalman filter which leads to a provably tighter bound on the ELBO compared to another online variational method. We validate our method on synthetic and real-world data, and, notably, show that it achieves competitive performance.", "keywords": "neural dynamics;neural trajectory;online variational inference", "primary_area": "", "supplementary_material": "/attachment/7fafd529937b69307412f151ca86fa33c950131d.zip", "author": "Matthew Dowling;Yuan Zhao;Il Memming Park", "authorids": "~Matthew_Dowling2;~Yuan_Zhao1;~Il_Memming_Park1", "gender": "M;;M", "homepage": ";;http://catniplab.github.io/", "dblp": ";65/2105-4;00/4652-2", "google_scholar": "https://scholar.google.com/citations?hl=en;XLpD5N0AAAAJ;CsmltusAAAAJ", "orcid": ";0000-0002-6123-8579;0000-0002-4255-7750", "linkedin": ";;memming/", "or_profile": "~Matthew_Dowling2;~Yuan_Zhao1;~Il_Memming_Park1", "aff": "State University of New York, Stony Brook;National Institute of Mental Health;Stony Brook University", "aff_domain": "stonybrook.edu;nih.gov;stonybrook.edu", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\ndowling2023realtime,\ntitle={Real-time variational method for learning neural trajectory and its dynamics},\nauthor={Matthew Dowling and Yuan Zhao and Il Memming Park},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=M_MvkWgQSt}\n}", "github": "", "project": "", "reviewers": "b7sM;6sm4;8dja;Hbu9", "pdf_size": 7460167, "recommendation": "6;6;8;8", "confidence": "4;3;4;4", "correctness": "3;3;4;3", "technical_novelty": "2;3;3;4", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "76;99;114;95", "wc_strength_and_weaknesses": "205;114;126;225", "wc_clarity_quality_novelty_and_reproducibility": "27;64;363;37", "wc_summary_review": "29;13;48;51", "wc_review": "337;290;651;408", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 96.0, 13.546217184144066 ], "wc_strength_and_weaknesses_avg": [ 167.5, 48.21047603996459 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 122.75, 139.3670961884476 ], "wc_summary_review_avg": [ 35.25, 15.368392889303683 ], "wc_review_avg": [ 421.5, 139.00089927766655 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11549963396408876897&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=M_MvkWgQSt", "email": "stonybrook.edu;nih.gov;stonybrook.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "State University of New York;National Institute of Mental Health;Stony Brook University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stonybrook.edu;https://www.nimh.nih.gov;https://www.stonybrook.edu", "aff_unique_abbr": "SUNY Stony Brook;NIMH;SBU", "aff_campus_unique_index": "0", "aff_campus_unique": "Stony Brook;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "M_c03_fU2cl", "title": "Domain-Unified Prompt Representations for Source-Free Domain Generalization", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Domain generalization (DG), aiming to make models work on unseen domains, is a surefire way toward general artificial intelligence. Limited by the scale and diversity of current DG datasets, it is difficult for existing methods to scale to diverse domains in open-world scenarios (e.g., science fiction and pixelate style). Therefore, the source-free domain generalization (SFDG) task is necessary and challenging. To address this challenge, we propose an approach based on large-scale vision-language pretraining models (e.g., CLIP), which exploits the extensive domain information embedded in it. The proposed scheme generates diverse prompts from a domain bank that contains many more diverse domains than existing DG datasets. Furthermore, our method yields domain-unified representations from these prompts, thus being able to cope with samples from open-world domains. Extensive experiments on mainstream DG datasets, namely PACS, VLCS, OfficeHome, and DomainNet, show that the proposed method achieves competitive performance compared to state-of-the-art DG methods that require source domain data for training.", "keywords": "Source-free domain generalization;vision-language pretraining model", "primary_area": "", "supplementary_material": "", "author": "Hongjing Niu;Hanting Li;Feng Zhao;Bin Li", "authorids": "~Hongjing_Niu1;~Hanting_Li1;~Feng_Zhao6;~Bin_Li8", "gender": "M;M;M;M", "homepage": ";;https://bivlab123.github.io/;http://staff.ustc.edu.cn/~binli", "dblp": "267/9397;276/2638.html;181/2734-4;89/6764-25", "google_scholar": "Y436boQAAAAJ;Zd9oQeMAAAAJ;https://scholar.google.co.uk/citations?hl=en;", "orcid": "0000-0002-9480-6464;;0000-0001-6767-8105;0000-0002-2332-3959", "linkedin": ";;;", "or_profile": "~Hongjing_Niu1;~Hanting_Li1;~Feng_Zhao6;~Bin_Li8", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nniu2023domainunified,\ntitle={Domain-Unified Prompt Representations for Source-Free Domain Generalization},\nauthor={Hongjing Niu and Hanting Li and Feng Zhao and Bin Li},\nyear={2023},\nurl={https://openreview.net/forum?id=M_c03_fU2cl}\n}", "github": "", "project": "", "reviewers": "UajX;eFx2;T6gD;ynap", "site": "https://openreview.net/forum?id=M_c03_fU2cl", "pdf_size": 931309, "recommendation": "3;5;5;6", "confidence": "5;3;4;4", "correctness": "2;3;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "51;47;138;79", "wc_strength_and_weaknesses": "180;54;180;203", "wc_clarity_quality_novelty_and_reproducibility": "14;144;39;53", "wc_summary_review": "25;25;76;72", "wc_review": "270;270;433;407", "wc_reply_reviewers": "0;0;0;49", "wc_reply_authors": "398;529;317;236", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 78.75, 36.36189626518397 ], "wc_strength_and_weaknesses_avg": [ 154.25, 58.63605972437097 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.5, 49.08411148223017 ], "wc_summary_review_avg": [ 49.5, 24.540782383616055 ], "wc_review_avg": [ 345.0, 75.56123344678804 ], "wc_reply_reviewers_avg": [ 12.25, 21.21762239271875 ], "wc_reply_authors_avg": [ 370.0, 108.20120147207238 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.6488856845230502, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1738802216021936196&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "MbWntPvE5Tg", "title": "Planning Immediate Landmarks of Targets for Model-Free Skill Transfer across Agents", "track": "main", "status": "Reject", "tldr": "We propose PILoT, a learning framework for transferring multi-task skills across agents.", "abstract": "In reinforcement learning applications, agents usually need to deal with various input/output features when specified with different state and action spaces by their developers or physical restrictions, indicating re-training from scratch and considerable sample inefficiency, especially when agents follow similar solution steps to achieve tasks.\nIn this paper, we aim to transfer pre-trained skills to alleviate the above challenge. Specifically, we propose PILoT, i.e., Planning Immediate Landmarks of Targets. PILoT utilizes the universal decoupled policy optimization to learn a goal-conditioned state planner; then, we distill a goal-planner to plan immediate landmarks in a model-free style that can be shared among different agents. In our experiments, we show the power of PILoT on various transferring challenges, including few-shot transferring across action spaces and dynamics, from low-dimensional vector states to image inputs, from simple robot to complicated morphology; and we also illustrate PILoT provides a zero-shot transfer solution from a simple 2D navigation task to the harder Ant-Maze task.", "keywords": "reinforcement learning;transfer learning", "primary_area": "", "supplementary_material": "", "author": "Minghuan Liu;Zhengbang Zhu;Menghui Zhu;Yuzheng Zhuang;Weinan Zhang;Jianye HAO", "authorids": "~Minghuan_Liu1;~Zhengbang_Zhu1;~Menghui_Zhu1;~Yuzheng_Zhuang1;~Weinan_Zhang1;~Jianye_HAO1", "gender": "M;M;F;M;M;M", "homepage": "http://minghuanliu.com;https://github.com/zbzhu99;;http://wnzhang.net;http://www.icdai.org/jianye.html;https://dblp.org/pid/257/2525.html", "dblp": "249/7554;277/0869;;28/10261-1;21/7664.html;257/2525.html", "google_scholar": ";;https://scholar.google.com/citations?hl=en;Qzss0GEAAAAJ;;", "orcid": ";;;0000-0002-0127-2425;0000-0002-0422-8235;0000-0002-8567-2185", "linkedin": ";;;;;", "or_profile": "~Minghuan_Liu1;~Zhengbang_Zhu1;~Yuzheng_Zhuang1;~Weinan_Zhang1;~Jianye_HAO1;~Menghui_Zhu2", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Huawei Technologies Ltd.;Shanghai Jiaotong University;Tianjin University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;huawei.com;sjtu.edu.cn;tju.edu.cn;sjtu.edu.cn", "position": "PhD student;PhD student;Research Engineer;Associate Professor;Associate Professor;MS student", "bibtex": "@misc{\nliu2023planning,\ntitle={Planning Immediate Landmarks of Targets for Model-Free Skill Transfer across Agents},\nauthor={Minghuan Liu and Zhengbang Zhu and Menghui Zhu and Yuzheng Zhuang and Weinan Zhang and Jianye HAO},\nyear={2023},\nurl={https://openreview.net/forum?id=MbWntPvE5Tg}\n}", "github": "", "project": "", "reviewers": "7PL4;7J1u;1XEZ;LhAd", "site": "https://openreview.net/forum?id=MbWntPvE5Tg", "pdf_size": 5168979, "recommendation": "3;3;5;5", "confidence": "3;4;3;4", "correctness": "3;3;3;2", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "209;90;299;132", "wc_strength_and_weaknesses": "366;415;541;881", "wc_clarity_quality_novelty_and_reproducibility": "44;27;100;90", "wc_summary_review": "59;54;73;114", "wc_review": "678;586;1013;1217", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;30", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 182.5, 79.65707752610561 ], "wc_strength_and_weaknesses_avg": [ 550.75, 201.07259261271787 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.25, 30.55630049596973 ], "wc_summary_review_avg": [ 75.0, 23.569047498785352 ], "wc_review_avg": [ 873.5, 254.13037992337712 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 7.5, 12.99038105676658 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.25, 0.4330127018922193 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AiF07kUlaJcJ:scholar.google.com/&scioq=Planning+Immediate+Landmarks+of+Targets+for+Model-Free+Skill+Transfer+across+Agents&hl=en&as_sdt=0,33", "gs_version_total": 4, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "Shanghai Jiao Tong University;Huawei;Tianjin University", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.huawei.com;http://www.tju.edu.cn", "aff_unique_abbr": "SJTU;Huawei;TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "MdKAP5oHJ5l", "title": "In-Time Refining Optimization Trajectories Toward Improved Robust Generalization", "track": "main", "status": "Withdraw", "tldr": "We propose a new method named weighted optimization trajectories (WOT) that refines the optimization trajectories of adversarial training in time to improve robust generalization.", "abstract": "Despite the fact that adversarial training has become the de facto method for improving robustness of deep neural networks, it is well-known that vanilla adversarial training suffers from daunting robust overfitting, resulting in unsatisfactory robust generalization. A number of approaches have been proposed to address these drawbacks such as extra regularization, adversarial weights perturbation, and training with more data over the last few years. However, the robust generalization improvement is yet far from satisfactory. In this paper, we approach this challenge with a brand new perspective -- refining historical optimization trajectories. We propose a new method named \\textbf{Weighted Optimization Trajectories (WOT)} that leverages the optimization trajectories of adversarial training in time. We have conducted extensive experiments to demonstrate the effectiveness of WOT under various state-of-the-art adversarial attacks. Our results show that WOT integrates seamlessly with the existing adversarial training methods and consistently overcomes the robust overfitting issue, resulting in better adversarial robustness. For example, WOT boosts the robust accuracy of AT-PGD under AA-$L_{\\infty}$ attack by 1.53\\% $\\sim$ 6.11\\% and meanwhile increases the clean accuracy by 0.55\\%$\\sim$5.47\\% across SVHN, CIFAR-10, CIFAR-100, and Tiny-ImageNet datasets. Codes are included in the supplementary.", "keywords": "Adversarial Robustness;Optimization Trajectories;Robust overfitting", "primary_area": "", "supplementary_material": "/attachment/9eedebea7d90db219c0101d30bace6fe49ed65d9.zip", "author": "Tianjin Huang;Shiwei Liu;Tianlong Chen;Meng Fang;Li Shen;Vlado Menkovski;Lu Yin;Yulong Pei;Mykola Pechenizkiy", "authorids": "~Tianjin_Huang1;~Shiwei_Liu2;~Tianlong_Chen1;~Meng_Fang1;~Li_Shen1;~Vlado_Menkovski2;~Lu_Yin1;~Yulong_Pei1;~Mykola_Pechenizkiy1", "gender": "M;M;M;M;M;M;;;M", "homepage": "https://research.tue.nl/nl/persons/tianjin-huang;https://shiweiliuiiiiiii.github.io/;https://tianlong-chen.github.io;;https://sites.google.com/site/mathshenli/home;https://vlamen.github.io;https://luuyin.com/;;http://www.win.tue.nl/~mpechen/", "dblp": "189/3972;234/8697-3.html;;67/463;91/3680-8;06/726;87/2528-6;;37/4649", "google_scholar": "https://scholar.google.co.uk/citations?user=yFLmPsoAAAAJ;73IbXtsAAAAJ;LE3ctn0AAAAJ;IcNYP1oAAAAJ;yVhgENIAAAAJ;2s9HUEMAAAAJ;G4Xe1NkAAAAJ;;https://scholar.google.com.tw/citations?user=F0uFT_kAAAAJ", "orcid": ";;0000-0001-7774-8197;;;0000-0001-5262-0605;;;0000-0003-4955-0743", "linkedin": ";;tianlong-chen-783862167/;;;;;;mpechen/", "or_profile": "~Tianjin_Huang1;~Shiwei_Liu2;~Tianlong_Chen1;~Meng_Fang1;~Li_Shen1;~Vlado_Menkovski2;~Lu_Yin1;~Yulong_Pei1;~Mykola_Pechenizkiy1", "aff": ";University of Texas at Austin;University of Texas, Austin;Eindhoven University of Technology;JD Explore Academy;Eindhoven University of Technology;University of Aberdeen;;Eindhoven University of Technology", "aff_domain": ";utexas.edu;utexas.edu;tue.nl;jd.com;tue.nl;abdn.ac.uk;;tue.nl", "position": ";Postdoc;PhD student;Assistant Professor;Researcher;Assistant Professor;Assistant Professor;;Full Professor", "bibtex": "@misc{\nhuang2023intime,\ntitle={In-Time Refining Optimization Trajectories Toward Improved Robust Generalization},\nauthor={Tianjin Huang and Shiwei Liu and Tianlong Chen and Meng Fang and Li Shen and Vlado Menkovski and Lu Yin and Yulong Pei and Mykola Pechenizkiy},\nyear={2023},\nurl={https://openreview.net/forum?id=MdKAP5oHJ5l}\n}", "github": "", "project": "", "reviewers": "vEfj;7QYy;NLr8", "site": "https://openreview.net/forum?id=MdKAP5oHJ5l", "pdf_size": 10546590, "recommendation": "5;5;5", "confidence": "4;4;3", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;3", "wc_summary_paper": "91;163;27", "wc_strength_and_weaknesses": "361;345;35", "wc_clarity_quality_novelty_and_reproducibility": "40;250;36", "wc_summary_review": "43;111;288", "wc_review": "535;869;386", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 93.66666666666667, 55.553777749332426 ], "wc_strength_and_weaknesses_avg": [ 247.0, 150.04888092440632 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 108.66666666666667, 99.95109915464772 ], "wc_summary_review_avg": [ 147.33333333333334, 103.26772110501047 ], "wc_review_avg": [ 596.6666666666666, 201.94773801379625 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5anOLmJw9bcJ:scholar.google.com/&scioq=In-Time+Refining+Optimization+Trajectories+Toward+Improved+Robust+Generalization&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;1;3;1", "aff_unique_norm": "University of Texas at Austin;Eindhoven University of Technology;JD;University of Aberdeen", "aff_unique_dep": ";;JD Explore Academy;", "aff_unique_url": "https://www.utexas.edu;https://www.tue.nl;;https://www.abdn.ac.uk", "aff_unique_abbr": "UT Austin;TU/e;;Aberdeen", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;1;1;3;1", "aff_country_unique": "United States;Netherlands;;United Kingdom" }, { "id": "MdSGM9PEQ7", "title": "Admeta: A Novel Double Exponential Moving Average to Adaptive and Non-adaptive Momentum Optimizers with Bidirectional Looking", "track": "main", "status": "Reject", "tldr": "We propose a bidirectional-looking framework, Admeta, in which a novel double exponential moving average mechanism is proposed to adaptive and non-adaptive momentum optimizers.", "abstract": "Optimizer is an essential component for the success of deep learning, which guides the neural network to update the parameters according to the loss on the training set. SGD and Adam are two classical and effective optimizers on which researchers have proposed many variants, such as SGDM and RAdam. In this paper, we innovatively combine the backward-looking and forward-looking aspects of the optimizer algorithm and propose a novel \\textsc{Admeta} (\\textbf{A} \\textbf{D}ouble exponential \\textbf{M}oving averag\\textbf{E} \\textbf{T}o \\textbf{A}daptive and non-adaptive momentum) optimizer framework. For backward-looking part, we propose a DEMA variant scheme, which is motivated by a metric in the stock market, to replace the common exponential moving average scheme. While in the forward-looking part, we present a dynamic lookahead strategy which asymptotically approaching a set value, maintaining its speed at early stage and high convergence performance at final stage. Based on this idea, we provide two optimizer implementations, \\textsc{AdmetaR} and \\textsc{AdmetaS}, the former based on RAdam and the latter based on SGDM. Through extensive experiments on diverse tasks, we find that the proposed \\textsc{Admeta} optimizer outperforms our base optimizers and shows advantages over recently proposed competitive optimizers. We also provide theoretical proof of these two algorithms, which verifies the convergence of our proposed \\textsc{Admeta}.", "keywords": "optimizer;double exponential moving average;bidirectional looking;Adam;SGD", "primary_area": "", "supplementary_material": "/attachment/a9ae112a62df69a6bbbfab1f4974415ab6784020.zip", "author": "Yineng Chen;Zuchao Li;Lefei Zhang;Bo Du;hai zhao", "authorids": "chernyn@whu.edu.cn;~Zuchao_Li1;~Lefei_Zhang1;~Bo_Du1;~hai_zhao1", "gender": ";M;M;M;M", "homepage": ";https://zcli-charlie.github.io/;;;http://bcmi.sjtu.edu.cn/~zhaohai/", "dblp": ";198/9339;28/10770;70/6443-1.html;25/1145-1.html", "google_scholar": ";PyzBf5oAAAAJ;BLKHwNwAAAAJ;Shy1gnMAAAAJ;https://scholar.google.com.tw/citations?user=4dU5KS0AAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "chernyn@whu.edu.cn;~Zuchao_Li1;~Lefei_Zhang1;~Bo_Du1;~hai_zhao1", "aff": ";Wuhan University;Wuhan University;Wuhan University;Shanghai Jiaotong University", "aff_domain": ";whu.edu.cn;whu.edu.cn;whu.edu.cn;sjtu.edu.cn", "position": ";Researcher;Full Professor;Full Professor;Full Professor", "bibtex": "@misc{\nchen2023admeta,\ntitle={Admeta: A Novel Double Exponential Moving Average to Adaptive and Non-adaptive Momentum Optimizers with Bidirectional Looking},\nauthor={Yineng Chen and Zuchao Li and Lefei Zhang and Bo Du and hai zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=MdSGM9PEQ7}\n}", "github": "", "project": "", "reviewers": "kSFP;merB;dVGC;RS9p", "site": "https://openreview.net/forum?id=MdSGM9PEQ7", "pdf_size": 2788385, "recommendation": "6;6;6;6", "confidence": "3;5;4;4", "correctness": "2;3;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "1;3;3;3", "wc_summary_paper": "117;84;123;61", "wc_strength_and_weaknesses": "196;205;339;335", "wc_clarity_quality_novelty_and_reproducibility": "18;27;30;19", "wc_summary_review": "88;45;60;22", "wc_review": "419;361;552;437", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "563;592;1002;969", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;2;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 96.25, 25.193004981542 ], "wc_strength_and_weaknesses_avg": [ 268.75, 68.33877010892134 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 23.5, 5.123475382979799 ], "wc_summary_review_avg": [ 53.75, 23.962209831315644 ], "wc_review_avg": [ 442.25, 69.3086394326133 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 781.5, 204.59044454714888 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10912555974560433742&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Wuhan University;Shanghai Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "http://www.whu.edu.cn/;https://www.sjtu.edu.cn", "aff_unique_abbr": "WHU;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "MdiVU9lMmVS", "title": "Very Large Scale Multi-Agent Reinforcement Learning with Graph Attention Mean Field", "track": "main", "status": "Reject", "tldr": "A multi-agent reinforcement learning method solving very large scale problem by mean-field technique combining graph attention mechanism.", "abstract": "With recent advances in reinforcement learning, we have witnessed countless successes of intelligent agents in various domains. Especially, multi-agent reinforcement learning (MARL) is suitable for many real-world scenarios and has vast potential applications. However, typical MARL methods can only handle tens of agents, leaving scenarios with up to hundreds or even thousands of agents almost unexplored. There exist two key challenges in scaling up the number of agents: (1) agent-agent interactions are critical in multi-agent systems while the number of interactions grows quadratically with the number of agents, causing great computational complexity and difficulty in strategies-learning; (2) the strengths of interactions vary among agents and over time, making it difficult to precisely model such interactions. In this paper, we propose the Graph Attention Mean Field (GAT-MF) method, where we convert agent-agent interactions into interactions between each agent and a weighted mean field, greatly reducing the computational complexity. We mathematically prove the correctness of this conversion. We design a graph attention mechanism to automatically capture the different and time-varying strengths of interactions, ensuring the ability of our method to precisely model interactions among the agents. We conduct extensive experiments in both manual and real-world scenarios with up to more than 3000 agents, demonstrating that comparing existing MARL methods, our method reaches superior performance and 9.4 times computational efficiency.", "keywords": "Multi-agent reinforcement learning;large-scale problems;graph attention;mean field", "primary_area": "", "supplementary_material": "/attachment/951dcf970a2cbf63ae9813086bccd3bfd2bc35a8.zip", "author": "Qianyue Hao", "authorids": "~Qianyue_Hao1", "gender": "M", "homepage": "https://scholar.google.com/citations?user=3qDk0OcAAAAJ", "dblp": "272/9909", "google_scholar": "3qDk0OcAAAAJ", "orcid": "0000-0002-7109-3588", "linkedin": "", "or_profile": "~Qianyue_Hao1", "aff": "Electronic Engineering, Tsinghua University, Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn", "position": "PhD student", "bibtex": "@misc{\nhao2023very,\ntitle={Very Large Scale Multi-Agent Reinforcement Learning with Graph Attention Mean Field},\nauthor={Qianyue Hao},\nyear={2023},\nurl={https://openreview.net/forum?id=MdiVU9lMmVS}\n}", "github": "", "project": "", "reviewers": "cg8c;T6mP;vFJW", "site": "https://openreview.net/forum?id=MdiVU9lMmVS", "pdf_size": 28598641, "recommendation": "3;3;5", "confidence": "3;4;3", "correctness": "2;2;2", "technical_novelty": "3;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "20;94;130", "wc_strength_and_weaknesses": "15;525;353", "wc_clarity_quality_novelty_and_reproducibility": "60;19;55", "wc_summary_review": "167;63;22", "wc_review": "262;701;560", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 81.33333333333333, 45.791799362865056 ], "wc_strength_and_weaknesses_avg": [ 297.6666666666667, 211.85110074976924 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.666666666666664, 18.263503375736967 ], "wc_summary_review_avg": [ 84.0, 61.030047244506264 ], "wc_review_avg": [ 507.6666666666667, 183.00151790505882 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15947393548018612375&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "Electronic Engineering", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "Mf9fQ0OgMzo", "title": "Preventing Mode Collapse When Imitating Latent Policies from Observations", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Imitation from observations only (ILfO) is an extension of the classic imitation learning setting to cases where expert observations are easy to obtain but no expert actions are available. Most existing ILfO methods either require access to task-specific cost functions or large amounts of interactions with the target environment. Learning a forward dynamics model in combination with a latent policy has been shown to solve these issues. However, the limited supervision in the ILfO scenario can lead to a mode collapse in learning the generative forward model and the corresponding latent policy. In this paper, we analyse the mode collapse problem and show that it can occur whenever the expert is deterministic, and may also occur due to bad initialization of the models. Under the assumption of piecewise continuous system dynamics, we propose a method to prevent the mode collapse using clustering of expert transitions to pre-train the generative model and the latent policy. We show that the resulting method prevents mode collapse and improves performance in five different OpenAI Gym environments.", "keywords": "Imitation Learning;Imitation from Observations Only;Latent Policy Learning", "primary_area": "", "supplementary_material": "", "author": "Oliver Struckmeier;Ville Kyrki", "authorids": "~Oliver_Struckmeier1;~Ville_Kyrki1", "gender": "M;", "homepage": ";https://irobotics.aalto.fi", "dblp": ";07/2806", "google_scholar": "https://scholar.google.fi/citations?user=TSZpN5gAAAAJ;8OBnyXQAAAAJ", "orcid": "0000-0003-4536-3190;", "linkedin": "oliverstruckmeier/;", "or_profile": "~Oliver_Struckmeier1;~Ville_Kyrki1", "aff": "Aalto University;Aalto University", "aff_domain": "aalto.fi;aalto.fi", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nstruckmeier2023preventing,\ntitle={Preventing Mode Collapse When Imitating Latent Policies from Observations},\nauthor={Oliver Struckmeier and Ville Kyrki},\nyear={2023},\nurl={https://openreview.net/forum?id=Mf9fQ0OgMzo}\n}", "github": "", "project": "", "reviewers": "tmHd;tYYy;vXYv;mhG8", "site": "https://openreview.net/forum?id=Mf9fQ0OgMzo", "pdf_size": 28036575, "recommendation": "3;3;3;5", "confidence": "4;4;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "42;86;45;107", "wc_strength_and_weaknesses": "255;263;442;677", "wc_clarity_quality_novelty_and_reproducibility": "47;13;92;85", "wc_summary_review": "60;36;121;38", "wc_review": "404;398;700;907", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 70.0, 27.54087870784082 ], "wc_strength_and_weaknesses_avg": [ 409.25, 171.71542592324082 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.25, 31.72045869781835 ], "wc_summary_review_avg": [ 63.75, 34.368408458932166 ], "wc_review_avg": [ 602.25, 214.15458785652947 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PWG3lWFYjFkJ:scholar.google.com/&scioq=Preventing+Mode+Collapse+When+Imitating+Latent+Policies+from+Observations&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Aalto University", "aff_unique_dep": "", "aff_unique_url": "https://www.aalto.fi", "aff_unique_abbr": "Aalto", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Finland" }, { "title": "SpeedyZero: Mastering Atari with Limited Data and Time", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11600", "id": "Mg5CLXZgvLJ", "poster": "/media/PosterPDFs/ICLR%202023/11600.png?t=1680965204.9891944", "openreview": "https://openreview.net/forum?id=Mg5CLXZgvLJ", "slides": "https://iclr.cc/virtual/2023/poster/11600", "video": "https://iclr.cc/virtual/2023/poster/11600", "author_site": "Yixuan Mei, Jiaxuan Gao, Weirui Ye, Shaohuai Liu, Yang Gao, Yi Wu", "tldr": "SpeedyZero is a distributed model-based RL training system based on EfficientZero, featuring fast training speed and high sample efficiency.", "abstract": "Many recent breakthroughs of deep reinforcement learning (RL) are mainly built upon large-scale distributed training of model-free methods using millions to billions of samples. On the other hand, state-of-the-art model-based RL methods can achieve human-level sample efficiency but often take a much longer over all training time than model-free methods. However, high sample efficiency and fast training time are both important to many real-world applications. We develop SpeedyZero, a distributed RL system built upon a state-of-the-art model-based RL method, EfficientZero, with a dedicated system design for fast distributed computation. We also develop two novel algorithmic techniques, Priority Refresh and Clipped LARS, to stabilize training with massive parallelization and large batch size. SpeedyZero maintains on-par sample efficiency compared with EfficientZero while achieving a 14.5X speedup in wall-clock time, leading to human-level performances on the Atari benchmark within 35 minutes using only 300k samples. In addition, we also present an in-depth analysis on the fundamental challenges in further scaling our system to bring insights to the community.", "keywords": "Reinforcement Learning System;Distributed Training;Model-Based Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/8915c2a5f12660eea265bb312232559886c1e83f.zip", "author": "Yixuan Mei;Jiaxuan Gao;Weirui Ye;Shaohuai Liu;Yang Gao;Yi Wu", "authorids": "~Yixuan_Mei1;~Jiaxuan_Gao1;~Weirui_Ye1;~Shaohuai_Liu1;~Yang_Gao1;~Yi_Wu1", "gender": "M;M;M;M;M;M", "homepage": "https://antonymei.github.io/;https://github.com/samjia2000/;https://yewr.github.io/;https://liushaohuai5.github.io;http://yang-gao.weebly.com;https://jxwuyi.weebly.com", "dblp": ";304/2243;245/3595;https://dblp.org/rec/conf/nips/YeLKAG21;89/4402-29;", "google_scholar": ";;_GgST9AAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;dusV5HMAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;yang-gao-45245348/;", "or_profile": "~Yixuan_Mei1;~Jiaxuan_Gao1;~Weirui_Ye1;~Shaohuai_Liu1;~Yang_Gao1;~Yi_Wu1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "Undergrad student;Undergrad student;PhD student;MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nmei2023speedyzero,\ntitle={SpeedyZero: Mastering Atari with Limited Data and Time},\nauthor={Yixuan Mei and Jiaxuan Gao and Weirui Ye and Shaohuai Liu and Yang Gao and Yi Wu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Mg5CLXZgvLJ}\n}", "github": "", "project": "", "reviewers": "Rw6q;Qh8r;xxoB", "pdf_size": 939675, "recommendation": "5;6;6", "confidence": "3;4;2", "correctness": "2;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "136;63;44", "wc_strength_and_weaknesses": "584;239;56", "wc_clarity_quality_novelty_and_reproducibility": "44;80;18", "wc_summary_review": "73;114;51", "wc_review": "837;496;169", "wc_reply_reviewers": "153;22;23", "wc_reply_authors": "772;371;178", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 81.0, 39.65686153324121 ], "wc_strength_and_weaknesses_avg": [ 293.0, 218.91094079556646 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.333333333333336, 25.42090128658349 ], "wc_summary_review_avg": [ 79.33333333333333, 26.10661899893503 ], "wc_review_avg": [ 500.6666666666667, 272.72982153691146 ], "wc_reply_reviewers_avg": [ 66.0, 61.51964455900787 ], "wc_reply_authors_avg": [ 440.3333333333333, 247.40564980524508 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9999999999999997, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13052117715006181044&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=Mg5CLXZgvLJ", "email": "tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Optimal Transport for Offline Imitation Learning", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10969", "id": "MhuFzFsrfvH", "poster": "/media/PosterPDFs/ICLR%202023/10969.png?t=1682714069.7764735", "openreview": "https://openreview.net/forum?id=MhuFzFsrfvH", "slides": "https://iclr.cc/virtual/2023/poster/10969", "video": "https://iclr.cc/virtual/2023/poster/10969", "author_site": "Yicheng Luo, Zhengyao Jiang, samuel cohen, Edward Grefenstette, Marc Deisenroth", "tldr": "We present an offline imitation learning based on optimal transport that demonstrates strong performance and sample efficiency", "abstract": "With the advent of large datasets, offline reinforcement learning is a promising framework for learning good decision-making policies without the need to interact with the real environment.\nHowever, offline RL requires the dataset to be reward-annotated, which presents practical challenges when reward engineering is difficult or when obtaining reward annotations is labor-intensive.\nIn this paper, we introduce Optimal Transport Relabeling (OTR), an imitation learning algorithm that can automatically relabel offline data of mixed and unknown quality with rewards from a few good demonstrations. OTR's key idea is to use optimal transport to compute an optimal alignment between an unlabeled trajectory in the dataset and an expert demonstration to obtain a similarity measure that can be interpreted as a reward, which can then be used by an offline RL algorithm to learn the policy. OTR is easy to implement and computationally efficient. On D4RL benchmarks, we demonstrate that OTR with a single demonstration can consistently match the performance of offline RL with ground-truth rewards.\n", "keywords": "offline reinforcement learning;optimal transport;imitation learning", "primary_area": "", "supplementary_material": "", "author": "Yicheng Luo;zhengyao jiang;Samuel Cohen;Edward Grefenstette;Marc Peter Deisenroth", "authorids": "~Yicheng_Luo1;~zhengyao_jiang2;~Samuel_Cohen1;~Edward_Grefenstette1;~Marc_Peter_Deisenroth1", "gender": "M;M;M;M;M", "homepage": "https://luoyicheng.net/;https://zhengyaojiang.github.io/;;http://egrefen.com/;https://deisenroth.cc", "dblp": ";;;http://dblp.uni-trier.de/pers/hd/g/Grefenstette:Edward;76/5043", "google_scholar": "635-7jQAAAAJ;https://scholar.google.co.jp/citations?user=J8pFrgwAAAAJ;CmdjfTsAAAAJ;https://scholar.google.co.uk/citations?user=ezllEwMAAAAJ;https://scholar.google.co.uk/citations?user=GDabimYAAAAJ", "orcid": "0000-0003-0547-411X;;;;", "linkedin": "yichengluo/;;;;", "or_profile": "~Yicheng_Luo1;~zhengyao_jiang2;~Samuel_Cohen1;~Edward_Grefenstette1;~Marc_Deisenroth1", "aff": "University College London, University of London;University College London;University College London;Cohere;University College London", "aff_domain": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;cohere.com;ucl.ac.uk", "position": "PhD student;PhD student;PhD student;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nluo2023optimal,\ntitle={Optimal Transport for Offline Imitation Learning},\nauthor={Yicheng Luo and zhengyao jiang and Samuel Cohen and Edward Grefenstette and Marc Peter Deisenroth},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=MhuFzFsrfvH}\n}", "github": "", "project": "", "reviewers": "C2wE;eZXx;bBjs;heNC", "pdf_size": 867055, "recommendation": "5;5;6;6", "confidence": "4;2;3;4", "correctness": "3;3;4;3", "technical_novelty": "2;1;3;3", "empirical_novelty": "2;1;3;3", "wc_summary_paper": "95;61;108;149", "wc_strength_and_weaknesses": "184;79;213;114", "wc_clarity_quality_novelty_and_reproducibility": "146;83;195;319", "wc_summary_review": "23;16;84;56", "wc_review": "448;239;600;638", "wc_reply_reviewers": "272;0;0;0", "wc_reply_authors": "1515;791;427;519", "reply_reviewers": "1;0;0;0", "reply_authors": "6;3;3;4", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 103.25, 31.499007920885383 ], "wc_strength_and_weaknesses_avg": [ 147.5, 53.47195526628889 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 185.75, 86.57186321201594 ], "wc_summary_review_avg": [ 44.75, 27.23394022171599 ], "wc_review_avg": [ 481.25, 156.89387336668057 ], "wc_reply_reviewers_avg": [ 68.0, 117.77945491468365 ], "wc_reply_authors_avg": [ 813.0, 426.82549127248717 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.0, 1.224744871391589 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.30151134457776363, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5467826538336699738&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=MhuFzFsrfvH", "email": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;cohere.com;ucl.ac.uk", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University College London;Cohere", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucl.ac.uk;https://cohere.ai", "aff_unique_abbr": "UCL;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "UNICORN: A Unified Backdoor Trigger Inversion Framework", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11162", "id": "Mj7K4lglGyj", "poster": "", "openreview": "https://openreview.net/forum?id=Mj7K4lglGyj", "slides": "https://iclr.cc/virtual/2023/poster/11162", "video": "https://iclr.cc/virtual/2023/poster/11162", "author_site": "Zhenting Wang, Kai Mei, Juan Zhai, Shiqing Ma", "tldr": "", "abstract": "The backdoor attack, where the adversary uses inputs stamped with triggers (e.g., a patch) to activate pre-planted malicious behaviors, is a severe threat to Deep Neural Network (DNN) models. Trigger inversion is an effective way of identifying backdoor models and understanding embedded adversarial behaviors. A challenge of trigger inversion is that there are many ways of constructing the trigger. Existing methods cannot generalize to various types of triggers by making certain assumptions or attack-specific constraints. The fundamental reason is that existing work does not formally define the trigger and the inversion problem. This work formally defines and analyzes the trigger and the inversion problem. Then, it proposes a unified framework to invert backdoor triggers based on the formalization of triggers and the identified inner behaviors of backdoor models from our analysis. Our prototype UNICORN is general and effective in inverting backdoor triggers in DNNs. The code can be found at https://github.com/RU-System-Software-and-Security/UNICORN.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Zhenting Wang;Kai Mei;Juan Zhai;Shiqing Ma", "authorids": "~Zhenting_Wang1;~Kai_Mei1;~Juan_Zhai1;~Shiqing_Ma2", "gender": "M;;F;", "homepage": "https://zhentingwang.github.io/;;https://people.cs.umass.edu/~juanzhai/;https://people.cs.umass.edu/~shiqingma/", "dblp": "263/4521;224/4831;154/5678;172/8745", "google_scholar": "QSYVbj8AAAAJ;8slSsa8AAAAJ;sq0OCfwAAAAJ;X_mDnjkAAAAJ", "orcid": ";;0000-0001-5017-8016;0000-0003-1551-8948", "linkedin": ";;;shiqing-ma-6590b086", "or_profile": "~Zhenting_Wang1;~Kai_Mei1;~Juan_Zhai1;~Shiqing_Ma2", "aff": "Sony AI;Rutgers University, New Brunswick;Rutgers University;Rutgers University", "aff_domain": "sony.com;rutgers.edu;rutgers.edu;rutgers.edu", "position": "Intern;PhD student;Lecturer;Assistant Professor", "bibtex": "@inproceedings{\nwang2023unicorn,\ntitle={{UNICORN}: A Unified Backdoor Trigger Inversion Framework},\nauthor={Zhenting Wang and Kai Mei and Juan Zhai and Shiqing Ma},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Mj7K4lglGyj}\n}", "github": "", "project": "", "reviewers": "8Hde;z7uV;kjQZ", "pdf_size": 970583, "recommendation": "6;6;6", "confidence": "4;3;3", "correctness": "3;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "104;47;74", "wc_strength_and_weaknesses": "862;348;39", "wc_clarity_quality_novelty_and_reproducibility": "153;31;26", "wc_summary_review": "47;34;135", "wc_review": "1166;460;274", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "2676;1354;1242", "reply_reviewers": "0;0;0", "reply_authors": "5;2;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 75.0, 23.280893453645632 ], "wc_strength_and_weaknesses_avg": [ 416.3333333333333, 339.4449718126472 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.0, 58.72534943843814 ], "wc_summary_review_avg": [ 72.0, 44.86275366789991 ], "wc_review_avg": [ 633.3333333333334, 384.2302550410221 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1757.3333333333333, 651.2026480972639 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6448430318822208814&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=Mj7K4lglGyj", "email": "sony.com;rutgers.edu;rutgers.edu;rutgers.edu", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Sony;Rutgers University", "aff_unique_dep": "Sony AI;", "aff_unique_url": "https://www.sony.com;https://www.rutgers.edu", "aff_unique_abbr": "Sony AI;Rutgers", "aff_campus_unique_index": "1", "aff_campus_unique": ";New Brunswick", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Japan;United States" }, { "id": "MjikLUwiB3M", "title": "Towards a Complete Theory of Neural Networks with Few Neurons", "track": "main", "status": "Reject", "tldr": "We analytically study the landscapes of neural networks with a few neurons, shedding light on how the neurons move following gradient flow. ", "abstract": "Deep learning has seen unprecedented progress thanks to the deployment of models with millions of parameters. \nOn the theoretical side, an immense amount of effort has gone to understanding the dynamics of overparameterized networks. \nAlthough now there is a well-developed theory of networks with infinitely many neurons, the classic problem of understanding how a neural network with a few neurons learns remains unsolved.\nTo attack this problem, we analytically study the landscapes of neural networks with few neurons. \nWe prove for the first time that a student network with one neuron has only one critical point --its global minimum-- when learning from a teacher network with arbitrarily many orthogonal neurons. \nIn addition, we prove how a neuron addition mechanism turns a minimum into a line of critical points with transitions from saddles to local minima via non-strict saddles. \nFinally, we discuss how the insights we get from our novel proof techniques may shed light on the dynamics of neural networks with few neurons.", "keywords": "theory of neural networks;non-convex landscapes;critical manifolds;gradient flow dynamics", "primary_area": "", "supplementary_material": "", "author": "Berfin Simsek;Valentin Schmutz;Wulfram Gerstner;Johanni Brea", "authorids": "~Berfin_Simsek1;~Valentin_Schmutz1;~Wulfram_Gerstner1;~Johanni_Brea1", "gender": "F;;;", "homepage": "https://www.bsimsek.com/;;https://lcnwww.epfl.ch/gerstner/;", "dblp": "244/2455;;g/WGerstner;", "google_scholar": "Ysi38KIAAAAJ;;https://scholar.google.ch/citations?user=vSd2RnEAAAAJ;", "orcid": ";;0000-0002-4344-2189;", "linkedin": ";;;", "or_profile": "~Berfin_Simsek1;~Valentin_Schmutz1;~Wulfram_Gerstner1;~Johanni_Brea1", "aff": "EPFL;;EPFL - EPF Lausanne;", "aff_domain": "epfl.ch;;epfl.ch;", "position": "PhD student;;Full Professor;", "bibtex": "@misc{\nsimsek2023towards,\ntitle={Towards a Complete Theory of Neural Networks with Few Neurons},\nauthor={Berfin Simsek and Valentin Schmutz and Wulfram Gerstner and Johanni Brea},\nyear={2023},\nurl={https://openreview.net/forum?id=MjikLUwiB3M}\n}", "github": "", "project": "", "reviewers": "rakv;Atsv;vz8N;P9i2", "site": "https://openreview.net/forum?id=MjikLUwiB3M", "pdf_size": 3925916, "recommendation": "3;3;5;6", "confidence": "3;4;5;2", "correctness": "2;4;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "0;0;2;0", "wc_summary_paper": "52;109;164;79", "wc_strength_and_weaknesses": "361;178;391;88", "wc_clarity_quality_novelty_and_reproducibility": "8;37;2;51", "wc_summary_review": "70;52;35;6", "wc_review": "491;376;592;224", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "388;208;406;191", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 0.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 101.0, 41.587257663856604 ], "wc_strength_and_weaknesses_avg": [ 254.5, 126.0446349512743 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 24.5, 20.22992832414391 ], "wc_summary_review_avg": [ 40.75, 23.573024837725004 ], "wc_review_avg": [ 420.75, 136.9075874449623 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 298.25, 99.13721551465927 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.2581988897471611, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Eg-ZTOOeTcUJ:scholar.google.com/&scioq=Towards+a+Complete+Theory+of+Neural+Networks+with+Few+Neurons&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "EPFL", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "What Is Missing in IRM Training and Evaluation? Challenges and Solutions", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11927", "id": "MjsDeTcDEy", "poster": "/media/PosterPDFs/ICLR%202023/11927.png?t=1683084102.0872707", "openreview": "https://openreview.net/forum?id=MjsDeTcDEy", "slides": "https://iclr.cc/virtual/2023/poster/11927", "video": "https://iclr.cc/virtual/2023/poster/11927", "author_site": "Yihua Zhang, Pranay Sharma, Parikshit Ram, Mingyi Hong, Kush Varshney, Sijia Liu", "tldr": "", "abstract": "Invariant risk minimization (IRM) has received increasing attention as a way to acquire environment-agnostic data representations and predictions, and also a principled solution for preventing spurious correlations from being learned and improving models\u2019 out-of-distribution generalization. Yet, recent works have found that the optimality of the originally-proposed IRM optimization (IRMV1) may be compromised in practice or could be impossible to achieve in some scenarios. Therefore, a series of advanced IRM algorithms have been developed that show practical improvement over IRMV1. In this work, we revisit these recent IRM advancements and identify and resolve three practical limitations in IRM training and evaluation. First, we find that the effect of batch size during training has been chronically overlooked in previous studies, leaving room for further improvement. We propose small-batch training and highlight the improvements over a set of large-batch optimization techniques. Second, we find that improper selection of evaluation environments could give a false sense of invariance for IRM. To alleviate this effect, we leverage diversified test-time environments to precisely characterize the invariance of IRM when applied in practice. Third, we revisit Ahuja et al. (2020)\u2019s proposal to convert IRM into an ensemble game and identify a limitation when a single invariant predictor is desired instead of an ensemble of individual predictors. We propose a new IRM variant to address this limitation based on a novel viewpoint of ensemble IRM games as consensus-constrained bi-level optimization. Lastly, we conduct extensive experiments (covering 7 existing IRM variants and 7 datasets) to justify the practical significance of revisiting IRM training and evaluation in a principled manner.", "keywords": "invariant risk minimization;bi-level optimization", "primary_area": "", "supplementary_material": "/attachment/b4fb2f3b5dd52e624bb844c978e02bc019e84127.zip", "author": "Yihua Zhang;Pranay Sharma;Parikshit Ram;Mingyi Hong;Kush R. Varshney;Sijia Liu", "authorids": "~Yihua_Zhang1;~Pranay_Sharma2;~Parikshit_Ram1;~Mingyi_Hong1;~Kush_R._Varshney1;~Sijia_Liu1", "gender": "M;M;M;M;M;M", "homepage": "https://yihua-zhang.com;https://rithram.github.io/;http://people.ece.umn.edu/~mhong/mingyi.html;http://krvarshney.github.io;https://lsjxjtu.github.io/;https://sites.google.com/view/pranay-sharma/home", "dblp": ";99/8314;57/8053;;128/6972-1;81/9976", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;JaXmmnkAAAAJ;qRnP-p0AAAAJ;hMZMhLoAAAAJ;C7dO_UgAAAAJ;QR-VKssAAAAJ", "orcid": ";0000-0002-9456-029X;;;;", "linkedin": "zhangyihua/;parikshit-ram-4861325/;;kushvarshney;;", "or_profile": "~Yihua_Zhang1;~Parikshit_Ram1;~Mingyi_Hong1;~Kush_R._Varshney1;~Sijia_Liu1;~PRANAY_SHARMA1", "aff": "Michigan State University;International Business Machines;University of Minnesota, Minneapolis;International Business Machines;Michigan State University;Carnegie Mellon University", "aff_domain": "msu.edu;ibm.com;umn.edu;ibm.com;msu.edu;cmu.edu", "position": "PhD student;Principal Researcher;Associate Professor;Research Staff Member;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nzhang2023what,\ntitle={What Is Missing in {IRM} Training and Evaluation? Challenges and Solutions},\nauthor={Yihua Zhang and Pranay Sharma and Parikshit Ram and Mingyi Hong and Kush R. Varshney and Sijia Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=MjsDeTcDEy}\n}", "github": "", "project": "", "reviewers": "3u9X;bEZc;U4vc", "pdf_size": 1298336, "recommendation": "6;6;8", "confidence": "3;3;4", "correctness": "3;3;4", "technical_novelty": "3;2;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "72;65;82", "wc_strength_and_weaknesses": "345;110;244", "wc_clarity_quality_novelty_and_reproducibility": "71;20;24", "wc_summary_review": "28;15;32", "wc_review": "516;210;382", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 73.0, 6.97614984548545 ], "wc_strength_and_weaknesses_avg": [ 233.0, 96.2531384769695 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.333333333333336, 23.156472577277878 ], "wc_summary_review_avg": [ 25.0, 7.2571803523590805 ], "wc_review_avg": [ 369.3333333333333, 125.24464947542558 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.9999999999999998, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6728201817877160832&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=MjsDeTcDEy", "email": "msu.edu;ibm.com;umn.edu;ibm.com;msu.edu;cmu.edu", "author_num": 6, "aff_unique_index": "0;1;2;1;0;3", "aff_unique_norm": "Michigan State University;International Business Machines Corporation;University of Minnesota;Carnegie Mellon University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.msu.edu;https://www.ibm.com;https://www.minnesota.edu;https://www.cmu.edu", "aff_unique_abbr": "MSU;IBM;UMN;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Minneapolis", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Mass-Editing Memory in a Transformer", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11880", "id": "MkbcAHIYgyS", "poster": "", "openreview": "https://openreview.net/forum?id=MkbcAHIYgyS", "slides": "https://iclr.cc/virtual/2023/poster/11880", "video": "https://iclr.cc/virtual/2023/poster/11880", "author_site": "Kevin Meng, Arnab Sen Sharma, Alex J Andonian, Yonatan Belinkov, David Bau", "tldr": "An algorithm that can make tens of thousands of edits to an autoregressive transformer's memory.", "abstract": "Recent work has shown exciting promise in updating large language models with new memories, so as to replace obsolete information or add specialized knowledge. However, this line of work is predominantly limited to updating single associations. We develop MEMIT, a method for directly updating a language model with many memories, demonstrating experimentally that it can scale up to thousands of associations for GPT-J (6B) and GPT-NeoX (20B), exceeding prior work by an order of magnitude. Our code and data will be open-sourced upon publication.", "keywords": "language models;GPT;transformers;model editing;factual associations;memory", "primary_area": "", "supplementary_material": "/attachment/7163bba87b77b7773eaac5f74f70dcb09ed7097d.zip", "author": "Kevin Meng;Arnab Sen Sharma;Alex J Andonian;Yonatan Belinkov;David Bau", "authorids": "~Kevin_Meng1;~Arnab_Sen_Sharma1;~Alex_J_Andonian1;~Yonatan_Belinkov1;~David_Bau1", "gender": "M;M;M;M;M", "homepage": "https://mengk.me/;https://arnab-api.github.io/;;https://www.belinkov.com;https://baulab.info/", "dblp": "06/8478;254/2046;;136/8705;47/3614", "google_scholar": "UcZbFroAAAAJ;https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.com/citations?authorid=K-6ujU4AAAAJ;CYI6cKgAAAAJ", "orcid": ";0000-0002-0407-6526;;;0000-0003-1744-6765", "linkedin": "kmeng01/;arnab-api/;;;david-bau-4b8130/", "or_profile": "~Kevin_Meng1;~Arnab_Sen_Sharma1;~Alex_J_Andonian1;~Yonatan_Belinkov1;~David_Bau1", "aff": "Northeastern University;Northeastern University;Massachusetts Institute of Technology;Technion, Technion;Northeastern University", "aff_domain": "neu.edu;northeasterd.edu;mit.edu;technion.ac.il;northeastern.edu", "position": "Researcher;PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nmeng2023massediting,\ntitle={Mass-Editing Memory in a Transformer},\nauthor={Kevin Meng and Arnab Sen Sharma and Alex J Andonian and Yonatan Belinkov and David Bau},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=MkbcAHIYgyS}\n}", "github": "", "project": "", "reviewers": "6w9b;J5Nr;NCki;55ot", "pdf_size": 2487403, "recommendation": "6;6;8;8", "confidence": "2;4;4;4", "correctness": "4;3;3;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "44;23;70;338", "wc_strength_and_weaknesses": "88;117;527;189", "wc_clarity_quality_novelty_and_reproducibility": "36;61;135;109", "wc_summary_review": "19;89;104;120", "wc_review": "187;290;836;756", "wc_reply_reviewers": "0;0;29;0", "wc_reply_authors": "250;459;981;253", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 118.75, 127.67414577744391 ], "wc_strength_and_weaknesses_avg": [ 230.25, 175.23038406623436 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 85.25, 38.899710795840114 ], "wc_summary_review_avg": [ 83.0, 38.54218468120353 ], "wc_review_avg": [ 517.25, 282.5379399301977 ], "wc_reply_reviewers_avg": [ 7.25, 12.55736835487436 ], "wc_reply_authors_avg": [ 485.75, 298.2191937149586 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 533, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8809464855702381001&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=MkbcAHIYgyS", "email": "neu.edu;northeasterd.edu;mit.edu;technion.ac.il;northeastern.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Northeastern University;Massachusetts Institute of Technology;Technion - Israel Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.northeastern.edu;https://web.mit.edu;https://www.technion.ac.il/en/", "aff_unique_abbr": "NEU;MIT;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;Israel" }, { "id": "Mmgcp3MRp7q", "title": "Identifying Latent Causal Content for Multi-Source Domain Adaptation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Multi-source domain adaptation (MSDA) learns to predict the labels in target domain data, under the setting that data from multiple source domains are labelled and data from the target domain are unlabelled. Most methods for this task focus on learning invariant representations across domains. However, their success relies heavily on the assumption that the label distribution remains consistent across domains, which may not hold in general real-world problems. In this paper, we propose a new and more flexible assumption, termed \\textit{latent covariate shift}, where a latent content variable $\\mathbf{z}_c$ and a latent style variable $\\mathbf{z}_s$ are introduced in the generative process, with the marginal distribution of $\\mathbf{z}_c$ changing across domains and the conditional distribution of the label given $\\mathbf{z}_c$ remaining invariant across domains. We show that although (completely) identifying the proposed latent causal model is challenging, the latent content variable can be identified up to scaling by using its dependence with labels from source domains, together with the identifiability conditions of nonlinear ICA. This motivates us to propose a novel method for MSDA, which learns the invariant label distribution conditional on the latent content variable, instead of learning invariant representations. Empirical evaluation on simulation and real data demonstrates the effectiveness of the proposed method.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yuhang Liu;Zhen Zhang;Dong Gong;Mingming Gong;Biwei Huang;Kun Zhang;Javen Qinfeng Shi", "authorids": "~Yuhang_Liu1;~Zhen_Zhang2;~Dong_Gong1;~Mingming_Gong1;~Biwei_Huang1;~Kun_Zhang1;~Javen_Qinfeng_Shi1", "gender": ";M;M;M;F;M;M", "homepage": "https://sites.google.com/view/yuhangliu/homepage;https://zzhang.org;https://donggong1.github.io;https://mingming-gong.github.io/;;http://www.andrew.cmu.edu/user/kunz1/;https://cs.adelaide.edu.au/~javen/", "dblp": ";19/5112-8;125/5032;98/8479;165/3288;96/3115-1;http://dblp.uni-trier.de/pers/hd/s/Shi:Qinfeng", "google_scholar": "5xZspvQAAAAJ;https://scholar.google.com.sg/citations?user=4X6Hqg0AAAAJ;https://scholar.google.com.au/citations?user=e2u6hRoAAAAJ;https://scholar.google.com.au/citations?user=6BmiCJIAAAAJ;;RGoypN4AAAAJ;https://scholar.google.com.au/citations?user=h6O9vYkAAAAJ", "orcid": ";0000-0003-2805-4396;0000-0002-2668-9630;0000-0001-7147-5589;;;0000-0002-9126-2107", "linkedin": ";;;;;;", "or_profile": "~Yuhang_Liu1;~Zhen_Zhang2;~Dong_Gong1;~Mingming_Gong1;~Biwei_Huang1;~Kun_Zhang1;~Javen_Shi1", "aff": "The University of Adelaide;The University of Adelaide;University of New South Wales;University of Melbourne;University of California, San Diego;Carnegie Mellon University;University of Adelaide", "aff_domain": "adelaide.edu.au;adelaide.edu.au;unsw.edu.au;unimelb.edu.au;ucsd.edu;cmu.edu;adelaide.edu.au", "position": "Postdoc;Postdoc;Assistant Professor;Assistant Professor;Assistant Professor;Associate Professor;Professor", "bibtex": "@misc{\nliu2023identifying,\ntitle={Identifying Latent Causal Content for Multi-Source Domain Adaptation},\nauthor={Yuhang Liu and Zhen Zhang and Dong Gong and Mingming Gong and Biwei Huang and Kun Zhang and Javen Qinfeng Shi},\nyear={2023},\nurl={https://openreview.net/forum?id=Mmgcp3MRp7q}\n}", "github": "", "project": "", "reviewers": "wpga;fg87;x2cf", "site": "https://openreview.net/forum?id=Mmgcp3MRp7q", "pdf_size": 1986627, "recommendation": "3;3;5", "confidence": "4;5;3", "correctness": "3;2;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;2;2", "wc_summary_paper": "75;83;62", "wc_strength_and_weaknesses": "262;351;59", "wc_clarity_quality_novelty_and_reproducibility": "63;2;27", "wc_summary_review": "13;20;236", "wc_review": "413;456;384", "wc_reply_reviewers": "94;0;33", "wc_reply_authors": "748;694;781", "reply_reviewers": "1;0;1", "reply_authors": "2;2;2", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 73.33333333333333, 8.65383665716478 ], "wc_strength_and_weaknesses_avg": [ 224.0, 122.19929077808376 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.666666666666668, 25.037749277618563 ], "wc_summary_review_avg": [ 89.66666666666667, 103.51274747048735 ], "wc_review_avg": [ 417.6666666666667, 29.578520735305357 ], "wc_reply_reviewers_avg": [ 42.333333333333336, 38.93869826049944 ], "wc_reply_authors_avg": [ 741.0, 35.86084215408221 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": 0.5, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6928804652960203849&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;3;4;0", "aff_unique_norm": "University of Adelaide;University of New South Wales;University of Melbourne;University of California, San Diego;Carnegie Mellon University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.adelaide.edu.au;https://www.unsw.edu.au;https://www.unimelb.edu.au;https://www.ucsd.edu;https://www.cmu.edu", "aff_unique_abbr": "Adelaide;UNSW;UniMelb;UCSD;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;0;0;1;1;0", "aff_country_unique": "Australia;United States" }, { "title": "Active Learning for Object Detection with Evidential Deep Learning and Hierarchical Uncertainty Aggregation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11241", "id": "MnEjsw-vj-X", "poster": "/media/PosterPDFs/ICLR%202023/11241.png?t=1682488729.5285342", "openreview": "https://openreview.net/forum?id=MnEjsw-vj-X", "slides": "https://iclr.cc/virtual/2023/poster/11241", "video": "https://iclr.cc/virtual/2023/poster/11241", "author_site": "Younghyun Park, Wonjeong Choi, Soyeong Kim, Dong-Jun Han, Jaekyun Moon", "tldr": "We propose an active learning method for object detection using evidential deep learning and novel uncertainty aggregation method.", "abstract": "Despite the huge success of object detection, the training process still requires an immense amount of labeled data. Although various active learning solutions for object detection have been proposed, most existing works do not take advantage of epistemic uncertainty, which is an important metric for capturing the usefulness of the sample. Also, previous works pay little attention to the attributes of each bounding box (e.g., nearest object, box size) when computing the informativeness of an image. In this paper, we propose a new active learning strategy for object detection that overcomes the shortcomings of prior works. To make use of epistemic uncertainty, we adopt evidential deep learning (EDL) and propose a new module termed model evidence head (MEH), that makes EDL highly compatible with object detection. Based on the computed epistemic uncertainty of each bounding box, we propose hierarchical uncertainty aggregation (HUA) for obtaining the informativeness of an image. HUA realigns all bounding boxes into multiple levels based on the attributes and aggregates uncertainties in a bottom-up order, to effectively capture the context within the image. Experimental results show that our method outperforms existing state-of-the-art methods by a considerable margin.", "keywords": "Active Learning;Object Detection;Uncertainty Estimation;Bayesian Learning", "primary_area": "", "supplementary_material": "", "author": "Younghyun Park;Wonjeong Choi;Soyeong Kim;Dong-Jun Han;Jaekyun Moon", "authorids": "~Younghyun_Park1;~Wonjeong_Choi1;~Soyeong_Kim1;~Dong-Jun_Han1;~Jaekyun_Moon2", "gender": "M;M;F;M;M", "homepage": "https://github.com/MoonLab-YH;;http://;https://sites.google.com/view/djhan930/home?authuser=0;http://comstolab.kaist.ac.kr/people.html", "dblp": "137/2568;327/3749;;201/0078;78/2744", "google_scholar": ";https://scholar.google.co.kr/citations?user=RXg2-oUAAAAJ;;https://scholar.google.co.kr/citations?user=-YR-GxUAAAAJ;", "orcid": ";;;;", "linkedin": ";wonjeong-choi-2503b71b1/;;;", "or_profile": "~Younghyun_Park1;~Wonjeong_Choi1;~Soyeong_Kim1;~Dong-Jun_Han1;~Jaekyun_Moon2", "aff": "Korea Advanced Institute of Science & Technology;KAIST;;Korea Advanced Institute of Science & Technology;KAIST", "aff_domain": "kaist.ac.kr;ee.kaist.ac.kr;;kaist.ac.kr;kaist.edu", "position": "PhD student;PhD student;;Postdoc;Full Professor", "bibtex": "@inproceedings{\npark2023active,\ntitle={Active Learning for Object Detection with Evidential Deep Learning and Hierarchical Uncertainty Aggregation},\nauthor={Younghyun Park and Wonjeong Choi and Soyeong Kim and Dong-Jun Han and Jaekyun Moon},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=MnEjsw-vj-X}\n}", "github": "", "project": "", "reviewers": "P2kB;tNAG;4FYw;HUy5;boP4", "pdf_size": 2002006, "recommendation": "6;6;6;6;6", "confidence": "3;3;4;4;5", "correctness": "4;3;3;3;4", "technical_novelty": "4;2;3;3;3", "empirical_novelty": "4;2;3;3;3", "wc_summary_paper": "174;74;75;49;43", "wc_strength_and_weaknesses": "202;329;283;179;199", "wc_clarity_quality_novelty_and_reproducibility": "290;12;57;55;42", "wc_summary_review": "56;57;21;41;47", "wc_review": "722;472;436;324;331", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "500;1484;801;1121;826", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;2;2;2;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 3.0, 0.6324555320336759 ], "empirical_novelty_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_paper_avg": [ 83.0, 47.290591030351905 ], "wc_strength_and_weaknesses_avg": [ 238.4, 57.624994577006255 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 91.2, 100.69240289118142 ], "wc_summary_review_avg": [ 44.4, 13.10877568653915 ], "wc_review_avg": [ 457.0, 144.5378843071947 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 946.4, 332.98924907570216 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12364884051837359530&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=MnEjsw-vj-X", "email": "kaist.ac.kr;ee.kaist.ac.kr;;kaist.ac.kr;kaist.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "Mof47lISH6N", "title": "DifFace: Blind Face Restoration with Diffused Error Contraction", "track": "main", "status": "Reject", "tldr": "We propose a new blind face restoration method that consists of an error compressor and a Markov chain partially borrowed from a pre-trained diffusion model. ", "abstract": "While deep learning-based methods for blind face restoration have achieved unprecedented success, they still suffer from two major limitations. First, most of them deteriorate when facing complex degradations out of their training data. Second, these methods require multiple constraints, e.g., fidelity, perceptual, and adversarial losses, which requires laborious hyper-parameters tuning to stabilize and balance their influences. In this work, we propose a novel method named DifFace, being able to cope with unseen and complex degradations more gracefully without complicated loss designs. The key of our method is to establish a posterior distribution from the observed low-quality (LQ) image to its high-quality (HQ) counterpart. In particular, we design a transition distribution from the LQ image to the intermediate state of a pre-trained diffusion model and then gradually transmit from this intermediate state to the HQ target by recursively applying a pre-trained diffusion model. The transition distribution only relies on a restoration backbone that is trained with L2 loss on some synthetic data, which favorably avoids the cumbersome training process in existing methods. Moreover, the transition distribution is capable of contracting the error of the restoration backbone and thus makes our method more robust to unknown degradations. Comprehensive experiments show that DifFace is superior to current state-of-the-art methods, especially in cases with severe degradations. Code and model will be released.", "keywords": "Face Restoration;Diffusion Model;Super-resolution", "primary_area": "", "supplementary_material": "", "author": "Zongsheng Yue;Chen Change Loy", "authorids": "~Zongsheng_Yue1;~Chen_Change_Loy2", "gender": "M;M", "homepage": "https://zsyoaoa.github.io/;https://www.mmlab-ntu.com/person/ccloy/index.html", "dblp": "198/4455;01/5855", "google_scholar": "F554LkQAAAAJ;https://scholar.google.co.uk/citations?user=559LF80AAAAJ", "orcid": "0000-0002-9178-671X;0000-0001-5345-1591", "linkedin": ";", "or_profile": "~Zongsheng_Yue1;~Chen_Change_Loy2", "aff": "Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg", "position": "Postdoc;Full Professor", "bibtex": "@misc{\nyue2023difface,\ntitle={DifFace: Blind Face Restoration with Diffused Error Contraction},\nauthor={Zongsheng Yue and Chen Change Loy},\nyear={2023},\nurl={https://openreview.net/forum?id=Mof47lISH6N}\n}", "github": "", "project": "", "reviewers": "yge8;sp9a;FutU;SR7T", "site": "https://openreview.net/forum?id=Mof47lISH6N", "pdf_size": 16464260, "recommendation": "5;5;6;8", "confidence": "5;4;4;3", "correctness": "3;3;4;4", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;3;4;4", "wc_summary_paper": "39;56;105;101", "wc_strength_and_weaknesses": "51;314;252;251", "wc_clarity_quality_novelty_and_reproducibility": "143;17;49;21", "wc_summary_review": "4;16;55;36", "wc_review": "237;403;461;409", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "248;1382;407;229", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;1", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 75.25, 28.428638729281428 ], "wc_strength_and_weaknesses_avg": [ 217.0, 99.17913086935174 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.5, 50.87976021956079 ], "wc_summary_review_avg": [ 27.75, 19.447043477094404 ], "wc_review_avg": [ 377.5, 84.19471479849551 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 566.5, 475.8752462568315 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.8660254037844386, "corr_recommendation_correctness": 0.8164965809277259, "gs_citation": 92, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11421542656954576766&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Pushing the Accuracy-Group Robustness Frontier with Introspective Self-play", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11297", "id": "MofT9KEF0kw", "poster": "/media/PosterPDFs/ICLR%202023/11297.png?t=1682833785.6302729", "openreview": "https://openreview.net/forum?id=MofT9KEF0kw", "slides": "https://iclr.cc/virtual/2023/poster/11297", "video": "https://iclr.cc/virtual/2023/poster/11297", "author_site": "Jeremiah Zhe Liu, Krishnamurthy Dvijotham, Jihyeon Lee, Quan Yuan, Balaji Lakshminarayanan, Deepak Ramachandran", "tldr": "Principled training method to improve deep model's uncertainty and active learning performance under dataset bias.", "abstract": "Standard empirical risk minimization (ERM) training can produce deep neural network (DNN) models that are accurate on average but under-perform in under-represented population subgroups, especially when there are imbalanced group distributions in the long-tailed training data. Therefore, approaches that improve the accuracy - group robustness trade-off frontier of a DNN model (i.e. improving worst-group accuracy without sacrificing average accuracy, or vice versa) is of crucial importance. Uncertainty-based active learning (AL) can potentially improve the frontier by preferentially sampling underrepresented subgroups to create a more balanced training dataset. However, the quality of uncertainty estimates from modern DNNs tend to degrade in the presence of spurious correlations and dataset bias, compromising the effectiveness of AL for sampling tail groups. In this work, we propose Introspective Self-play (ISP), a simple approach to improve the uncertainty estimation of a deep neural network under dataset bias, by adding an auxiliary introspection task requiring a model to predict the bias for each data point in addition to the label. We show that ISP provably improves the bias-awareness of the model representation and the resulting uncertainty estimates. On two real-world tabular and language tasks,ISP serves as a simple \u201cplug-in\u201d for AL model training, consistently improving both the tail-group sampling rate and the final accuracy-fairness trade-off frontier of popular AL methods.", "keywords": "Uncertainty Quantification;Spurious Correlation;Active Learning", "primary_area": "", "supplementary_material": "", "author": "Jeremiah Zhe Liu;Krishnamurthy Dj Dvijotham;Jihyeon Lee;Quan Yuan;Balaji Lakshminarayanan;Deepak Ramachandran", "authorids": "~Jeremiah_Zhe_Liu1;~Krishnamurthy_Dj_Dvijotham1;~Jihyeon_Lee2;~Quan_Yuan8;~Balaji_Lakshminarayanan1;~Deepak_Ramachandran2", "gender": "M;;;M;M;M", "homepage": ";https://jlee24.github.io;https://github.com/yq911122;http://www.gatsby.ucl.ac.uk/~balaji/;;http://dvij.github.io", "dblp": "199/2301;;;71/8324;80/703;16/8758", "google_scholar": "9jrmcG4AAAAJ;Ssrt_TAAAAAJ;;QYn8RbgAAAAJ;WbM9EAIAAAAJ;BUtloecAAAAJ", "orcid": ";0000-0002-8894-5628;;;;", "linkedin": ";https://linkedin.com/in/jihyeonlee;;;;", "or_profile": "~Jeremiah_Zhe_Liu1;~Jihyeon_Lee2;~Quan_Yuan8;~Balaji_Lakshminarayanan1;~Deepak_Ramachandran2;~Krishnamurthy_Dvijotham2", "aff": "Google DeepMind;Google;Google;Google Brain;Google;Google Brain", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com", "position": "Research Scientist;Researcher;Researcher;Research Scientist;Staff Researcher;research scientist ", "bibtex": "@inproceedings{\nliu2023pushing,\ntitle={Pushing the Accuracy-Group Robustness Frontier with Introspective Self-play},\nauthor={Jeremiah Zhe Liu and Krishnamurthy Dj Dvijotham and Jihyeon Lee and Quan Yuan and Balaji Lakshminarayanan and Deepak Ramachandran},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=MofT9KEF0kw}\n}", "github": "", "project": "", "reviewers": "zGVn;dJid;1jcC", "pdf_size": 16272038, "recommendation": "6;8;8", "confidence": "4;2;5", "correctness": "3;4;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "41;32;110", "wc_strength_and_weaknesses": "145;55;1087", "wc_clarity_quality_novelty_and_reproducibility": "22;106;15", "wc_summary_review": "10;36;54", "wc_review": "218;229;1266", "wc_reply_reviewers": "0;0;146", "wc_reply_authors": "541;599;2405", "reply_reviewers": "0;0;2", "reply_authors": "1;1;4", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 61.0, 34.84250278036869 ], "wc_strength_and_weaknesses_avg": [ 429.0, 466.72475828907983 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.666666666666664, 41.34677200889515 ], "wc_summary_review_avg": [ 33.333333333333336, 18.06162291219209 ], "wc_review_avg": [ 571.0, 491.4597304629003 ], "wc_reply_reviewers_avg": [ 48.666666666666664, 68.82506003549064 ], "wc_reply_authors_avg": [ 1181.6666666666667, 865.3513095205259 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.18898223650461365, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=342609614386825809&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=MofT9KEF0kw", "email": "google.com;google.com;google.com;google.com;google.com;google.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1;1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "MpGP-z07TmM", "title": "Learning Specialized Activation Functions for Physics-informed Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "At the heart of network architectures lie the non-linear activation functions, the choice of which affects the model optimization and task performance. In computer vision and natural language processing, the Rectified Linear Unit is widely adopted across different tasks. However, there is no such default choice of activation functions in the context of physics-informed neural networks (PINNs). It is observed that PINNs exhibit high sensitivity to activation functions due to the various characteristics of each physics system, which makes the choice of the suitable activation function for PINNs a critical issue. Existing works usually choose activation functions in an inefficient trial-and-error manner. To address this problem, we propose to search automatically for the optimal activation function when solving different PDEs. This is achieved by learning an adaptive activation function as linear combinations of a set of candidate functions, whose coefficients can be directly optimized by gradient descent. In addition to its efficient optimization, the proposed method enables the discovery of novel activation function and the incorporation with prior knowledge about the PDE system. We can further enhance its search space with adaptive slope. The effectiveness of the proposed adaptive activation function is demonstrated on a series of benchmarks, including the Poisson's equation, Burgers' equation, Allen-Cahn equation, convection equation, Korteweg\u2013de Vries equation and Cahn-Hilliard equation. The performance gain of the proposed method is further interpreted from the neural tangent kernel perspective. Code will be released. ", "keywords": "Physics-informed neural network;adaptive activation functions", "primary_area": "", "supplementary_material": "", "author": "Honghui Wang;Lu Lu;Shiji Song;Gao Huang", "authorids": "~Honghui_Wang1;~Lu_Lu1;~Shiji_Song1;~Gao_Huang1", "gender": "M;M;M;M", "homepage": ";https://lu.seas.upenn.edu;;http://www.gaohuang.net", "dblp": ";01/2086-10;72/5351;", "google_scholar": "https://scholar.google.com.hk/citations?user=FzJ1aIsAAAAJ;wD_wsWUAAAAJ;;-P9LwcgAAAAJ", "orcid": ";0000-0002-5476-5768;;", "linkedin": ";;;", "or_profile": "~Honghui_Wang1;~Lu_Lu1;~Shiji_Song1;~Gao_Huang1", "aff": "Department of Automation, Tsinghua University;University of Pennsylvania;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;seas.upenn.edu;mail.tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Assistant Professor;Full Professor;Associate Professor", "bibtex": "@misc{\nwang2023learning,\ntitle={Learning Specialized Activation Functions for Physics-informed Neural Networks},\nauthor={Honghui Wang and Lu Lu and Shiji Song and Gao Huang},\nyear={2023},\nurl={https://openreview.net/forum?id=MpGP-z07TmM}\n}", "github": "", "project": "", "reviewers": "E27E;J89g;5qLY;qn6w", "site": "https://openreview.net/forum?id=MpGP-z07TmM", "pdf_size": 1036724, "recommendation": "3;6;6;8", "confidence": "5;3;4;5", "correctness": "1;3;4;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "1;3;2;2", "wc_summary_paper": "81;56;69;71", "wc_strength_and_weaknesses": "172;71;137;100", "wc_clarity_quality_novelty_and_reproducibility": "34;141;13;28", "wc_summary_review": "39;19;45;93", "wc_review": "326;287;264;292", "wc_reply_reviewers": "505;0;30;15", "wc_reply_authors": "1887;341;829;643", "reply_reviewers": "3;0;1;1", "reply_authors": "5;1;3;2", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 1.224744871391589 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 69.25, 8.898735865278843 ], "wc_strength_and_weaknesses_avg": [ 120.0, 38.05916446797013 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.0, 50.80846386184097 ], "wc_summary_review_avg": [ 49.0, 27.16615541441225 ], "wc_review_avg": [ 292.25, 22.16275028059469 ], "wc_reply_reviewers_avg": [ 137.5, 212.44116832666873 ], "wc_reply_authors_avg": [ 925.0, 582.0738784724839 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.1266600992762247, "corr_recommendation_correctness": 0.914659120760047, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16337235856331689136&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Tsinghua University;University of Pennsylvania", "aff_unique_dep": "Department of Automation;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.upenn.edu", "aff_unique_abbr": "THU;UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Where to Begin? On the Impact of Pre-Training and Initialization in Federated Learning", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11439", "id": "Mpa3tRJFBb", "poster": "", "openreview": "https://openreview.net/forum?id=Mpa3tRJFBb", "slides": "https://iclr.cc/virtual/2023/poster/11439", "video": "https://iclr.cc/virtual/2023/poster/11439", "author_site": "John Nguyen, Jianyu Wang, Kshitiz Malik, Maziar Sanjabi, Michael Rabbat", "tldr": "Stop worrying about heterogeneity and start from pre-trained weights.", "abstract": "An oft-cited challenge of federated learning is the presence of heterogeneity. \\emph{Data heterogeneity} refers to the fact that data from different clients may follow very different distributions. \\emph{System heterogeneity} refers to client devices having different system capabilities. A considerable number of federated optimization methods address this challenge. In the literature, empirical evaluations usually start federated training from random initialization. However, in many practical applications of federated learning, the server has access to proxy data for the training task that can be used to pre-train a model before starting federated training. Using four standard federated learning benchmark datasets, we empirically study the impact of starting from a pre-trained model in federated learning. Unsurprisingly, starting from a pre-trained model reduces the training time required to reach a target error rate and enables the training of more accurate models (up to 40\\%) than is possible when starting from random initialization. Surprisingly, we also find that starting federated learning from a pre-trained initialization reduces the effect of both data and system heterogeneity. We recommend future work proposing and evaluating federated optimization methods to evaluate the performance when starting from random and pre-trained initializations. This study raises several questions for further work on understanding the role of heterogeneity in federated optimization.", "keywords": "federated learning;optimization", "primary_area": "", "supplementary_material": "/attachment/a5ac0842a4260488fe734ff835266c7882c12307.zip", "author": "John Nguyen;Jianyu Wang;Kshitiz Malik;Maziar Sanjabi;Michael Rabbat", "authorids": "~John_Nguyen1;~Jianyu_Wang2;~Kshitiz_Malik2;~Maziar_Sanjabi1;~Michael_Rabbat1", "gender": ";M;M;M;M", "homepage": "https://johnlnguyen.github.io/;;https://sites.google.com/view/maziar;;", "dblp": ";;21/8577;47/1744;", "google_scholar": "3CTTUYgAAAAJ;5nrx1YwAAAAJ;bc_N2-oAAAAJ;https://scholar.google.ch/citations?user=cMPKe9UAAAAJ;pkAWLt8AAAAJ", "orcid": ";;;;", "linkedin": "qlk/;;;;", "or_profile": "~John_Nguyen1;~Jianyu_Wang2;~Maziar_Sanjabi1;~Michael_Rabbat1;~KSHITIZ_MALIK1", "aff": "Meta Facebook;Meta;Meta;Mila;", "aff_domain": "fb.com;meta.com;meta.com;mila.quebec;", "position": "Researcher;Researcher;Researcher;Associate Member;", "bibtex": "@inproceedings{\nnguyen2023where,\ntitle={Where to Begin? On the Impact of Pre-Training and Initialization in Federated Learning},\nauthor={John Nguyen and Jianyu Wang and Kshitiz Malik and Maziar Sanjabi and Michael Rabbat},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Mpa3tRJFBb}\n}", "github": "", "project": "", "reviewers": "EYvh;Kusi;aMPP", "pdf_size": 732556, "recommendation": "6;8;8", "confidence": "3;4;4", "correctness": "3;4;3", "technical_novelty": "3;3;2", "empirical_novelty": "0;3;3", "wc_summary_paper": "46;63;59", "wc_strength_and_weaknesses": "110;153;382", "wc_clarity_quality_novelty_and_reproducibility": "68;25;149", "wc_summary_review": "32;51;41", "wc_review": "256;292;631", "wc_reply_reviewers": "10;0;122", "wc_reply_authors": "255;195;1088", "reply_reviewers": "1;0;1", "reply_authors": "2;2;5", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 56.0, 7.2571803523590805 ], "wc_strength_and_weaknesses_avg": [ 215.0, 119.38453277818977 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 80.66666666666667, 51.40903509003927 ], "wc_summary_review_avg": [ 41.333333333333336, 7.760297817881877 ], "wc_review_avg": [ 393.0, 168.9319389576761 ], "wc_reply_reviewers_avg": [ 44.0, 55.3052137385497 ], "wc_reply_authors_avg": [ 512.6666666666666, 407.5588573718184 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.9999999999999998, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 131, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2244603056474424018&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=Mpa3tRJFBb", "email": "fb.com;meta.com;meta.com;mila.quebec;", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Meta;Mila", "aff_unique_dep": "Meta Platforms, Inc.;Quebec Artificial Intelligence Institute", "aff_unique_url": "https://meta.com;https://mila.quebec", "aff_unique_abbr": "Meta;Mila", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;Canada" }, { "id": "MpikUXtGQCI", "title": "On the Convergence and Calibration of Deep Learning with Differential Privacy", "track": "main", "status": "Withdraw", "tldr": "We show that differentially private deep learning can be severely mis-calibrated due to the gradient clipping, which can be alleviated by a new clipping method.", "abstract": "Differentially private (DP) neural network achieves the privacy usually at the cost of slower convergence (and thus lower performance) than its non-private counterpart. To analyze the difficulty of DP training, this work gives the first convergence analysis through the lens of training dynamics and the neural tangent kernel (NTK). We successfully characterize the effects of two key components in the DP training: the per-sample gradient clipping (flat or layerwise) and the noise addition. Our analysis not only initiates a general principled framework to understand the DP deep learning with any network architecture and loss function, but also motivates a new clipping method -- the \\textit{global clipping}, that significantly improves the convergence, as well as preserves the same DP guarantee and computational efficiency as the existing method, which we term as \\textit{local clipping}.\n\nTheoretically speaking, we precisely characterize the effect of per-sample clipping on the NTK matrix and show that the noise scale of DP optimizers does not affect the convergence in the \\textit{gradient flow} regime. In particular, we shed light on several behaviors that are only guaranteed by our global clipping. For example, the global clipping can preserve the positive semi-definiteness of NTK, which is almost certainly broken by the local clipping; DP gradient descent (GD) with global clipping converges monotonically to zero loss, while the convergence of local clipping can be non-monotone; the global clipping is surprisingly effective at learning \\textit{calibrated classifiers}, whereas existing DP classifiers are oftentimes over-confident and unreliable. Notably, our analysis framework easily extends to other optimizers, e.g., DP-Adam. We demonstrate through numerous experiments that DP optimizers equipped with global clipping perform strongly. Implementation-wise, the global clipping can be realized by inserting only one line of code into the Pytorch \\texttt{Opacus} library.", "keywords": "deep learning;differential privacy;calibration;convergence;neural tangent kernel", "primary_area": "", "supplementary_material": "/attachment/53290e32846a77e57efe942e378080c59ff02070.zip", "author": "Zhiqi Bu;Hua Wang;Qi Long", "authorids": "~Zhiqi_Bu1;~Hua_Wang7;~Qi_Long1", "gender": "M;M;M", "homepage": "https://sites.google.com/view/zhiqi-bu;https://statistics.wharton.upenn.edu/profile/wanghua/;https://www.med.upenn.edu/long-lab/", "dblp": "245/2573;;47/7320", "google_scholar": "MEvTLxIAAAAJ;;gfklepYAAAAJ", "orcid": ";;0000-0003-0660-5230", "linkedin": ";;qi-long-9652a0125/", "or_profile": "~Zhiqi_Bu1;~Hua_Wang7;~Qi_Long1", "aff": "Amazon;The Wharton School, University of Pennsylvania;University of Pennsylvania", "aff_domain": "amazon.com;wharton.upenn.edu;upenn.edu", "position": "Researcher;PhD student;Professor", "bibtex": "@misc{\nbu2023on,\ntitle={On the Convergence and Calibration of Deep Learning with Differential Privacy},\nauthor={Zhiqi Bu and Hua Wang and Qi Long},\nyear={2023},\nurl={https://openreview.net/forum?id=MpikUXtGQCI}\n}", "github": "", "project": "", "reviewers": "Tosw;2Z8K;PYzJ;uXWK;m1RY", "site": "https://openreview.net/forum?id=MpikUXtGQCI", "pdf_size": 870522, "recommendation": "3;3;3;5;6", "confidence": "4;3;3;4;2", "correctness": "3;2;2;3;4", "technical_novelty": "2;2;2;2;2", "empirical_novelty": "3;3;2;2;4", "wc_summary_paper": "85;53;44;49;99", "wc_strength_and_weaknesses": "914;401;57;141;309", "wc_clarity_quality_novelty_and_reproducibility": "43;173;315;12;41", "wc_summary_review": "37;31;29;32;82", "wc_review": "1079;658;445;234;531", "wc_reply_reviewers": "41;0;0;0;0", "wc_reply_authors": "504;650;289;115;78", "reply_reviewers": "1;0;0;0;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 4.0, 1.2649110640673518 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "correctness_avg": [ 2.8, 0.7483314773547882 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_paper_avg": [ 66.0, 21.872356983187704 ], "wc_strength_and_weaknesses_avg": [ 364.4, 300.29025958229147 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 116.8, 113.6809570684554 ], "wc_summary_review_avg": [ 42.2, 20.073863604199367 ], "wc_review_avg": [ 589.4, 281.0641207980841 ], "wc_reply_reviewers_avg": [ 8.2, 16.4 ], "wc_reply_authors_avg": [ 327.2, 220.92840469256097 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.42257712736425823, "corr_recommendation_correctness": 0.8451542547285165, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3243055467196602428&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;1", "aff_unique_norm": "Amazon;University of Pennsylvania", "aff_unique_dep": "Amazon.com, Inc.;The Wharton School", "aff_unique_url": "https://www.amazon.com;https://www.wharton.upenn.edu", "aff_unique_abbr": "Amazon;UPenn Wharton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "MpwWSMOlkc", "title": "Local Distance Preserving Auto-encoders using Continuous k-Nearest Neighbours Graphs", "track": "main", "status": "Reject", "tldr": "", "abstract": "Auto-encoder models that preserve similarities in the data are a popular tool in representation learning. In this paper we introduce several auto-encoder models that preserve local distances when mapping from the data space to the latent space. We use a local distance-preserving loss that is based on the continuous k-nearest neighbours graph which is known to capture topological features at all scales simultaneously. To improve training performance, we formulate learning as a constraint optimisation problem with local distance preservation as the main objective and reconstruction accuracy as a constraint. We generalise this approach to hierarchical variational auto-encoders thus learning generative models with geometrically consistent latent and data spaces. Our method provides state-of-the-art or comparable performance across several standard datasets and evaluation metrics.", "keywords": "manifold learning;representational learning;generative models", "primary_area": "", "supplementary_material": "", "author": "Nutan Chen;Patrick van der Smagt;Botond Cseke", "authorids": "~Nutan_Chen2;~Patrick_van_der_Smagt1;~Botond_Cseke2", "gender": "M;M;M", "homepage": "https://argmax.org;;https://argmax.ai/team/nutan-chen/", "dblp": "24/6573.html;85/1418;", "google_scholar": "https://scholar.google.de/citations?user=5ybzvbsAAAAJ;v23xgC0AAAAJ;HH3n9scAAAAJ", "orcid": "0000-0003-4418-4916;;", "linkedin": "smagt/;;", "or_profile": "~Patrick_van_der_Smagt1;~Botond_Cseke2;~Nutan_Chen1", "aff": "Machine Learning Research Lab; Volkswagen Group;;Machine Learning Research Lab, Volkswagen Group", "aff_domain": "volkswagen.de;;volkswagen.de", "position": "Full Professor;;Researcher", "bibtex": "@misc{\nchen2023local,\ntitle={Local Distance Preserving Auto-encoders using Continuous k-Nearest Neighbours Graphs},\nauthor={Nutan Chen and Patrick van der Smagt and Botond Cseke},\nyear={2023},\nurl={https://openreview.net/forum?id=MpwWSMOlkc}\n}", "github": "", "project": "", "reviewers": "bRM4;n97h;1cQZ", "site": "https://openreview.net/forum?id=MpwWSMOlkc", "pdf_size": 7207344, "recommendation": "3;3;5", "confidence": "4;4;3", "correctness": "3;2;4", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "83;92;53", "wc_strength_and_weaknesses": "134;46;518", "wc_clarity_quality_novelty_and_reproducibility": "65;9;35", "wc_summary_review": "22;15;31", "wc_review": "304;162;637", "wc_reply_reviewers": "0;0;166", "wc_reply_authors": "65;126;754", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 76.0, 16.673332000533065 ], "wc_strength_and_weaknesses_avg": [ 232.66666666666666, 204.93467793963575 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.333333333333336, 22.88133640230735 ], "wc_summary_review_avg": [ 22.666666666666668, 6.548960901462833 ], "wc_review_avg": [ 367.6666666666667, 199.0750835461055 ], "wc_reply_reviewers_avg": [ 55.333333333333336, 78.25315045131126 ], "wc_reply_authors_avg": [ 315.0, 311.4171907051161 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.8660254037844387, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=622774513607929825&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Machine Learning Research Lab;Volkswagen Group", "aff_unique_dep": "Machine Learning Research;", "aff_unique_url": ";https://www.volkswagenag.com", "aff_unique_abbr": ";VW Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1", "aff_country_unique": ";Germany" }, { "id": "Mqul6rfHoq", "title": "Multi-agent Linear Contextual Bandits with Bounded O(1) Regret", "track": "main", "status": "Withdraw", "tldr": "A recurring linear contextual bandit problem with $O(1)$ regret policy is proposed.", "abstract": "Asymptotically unbounded regret of order $O(\\sqrt{T})$ has been proved to be the lowest possible regret order that can be achieved in typical linear contextual bandit settings. Here we present a linear contextual bandit setting with repetitive arrivals of a set of agents where bounded, i.e., $O(1)$, expected regret can be achieved for each agent. We provide a novel Counterfactual UCB (CFUCB) policy where agents benefit from the experiences of other agents. It is shown that sharing of information is a Subgame Perfect Nash Equilibrium for the agents with respect to the order of the regret, which results in each agent realizing bounded regret. Personalized recommender systems and adaptive experimentation are two important applications.", "keywords": "Bounded regret;Linear Contextual bandit;Recurring arrivals;Counterfactual;Upper Confidence Bound", "primary_area": "", "supplementary_material": "", "author": "Hyunwook Kang", "authorids": "~Hyunwook_Kang2", "gender": "M", "homepage": "https://sites.google.com/view/hyunwookkang", "dblp": "", "google_scholar": "UXvZAZMAAAAJ", "orcid": "0000-0002-9617-0893", "linkedin": "", "or_profile": "~HYUNWOOK_KANG1", "aff": "Texas A&M", "aff_domain": "tamu.edu", "position": "PhD student", "bibtex": "@misc{\nkang2023multiagent,\ntitle={Multi-agent Linear Contextual Bandits with Bounded O(1) Regret},\nauthor={Hyunwook Kang},\nyear={2023},\nurl={https://openreview.net/forum?id=Mqul6rfHoq}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=Mqul6rfHoq", "pdf_size": 354729, "recommendation": "", "confidence": "", "correctness": "", "technical_novelty": "", "empirical_novelty": "", "wc_summary_paper": "", "wc_strength_and_weaknesses": "", "wc_clarity_quality_novelty_and_reproducibility": "", "wc_summary_review": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "recommendation_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "correctness_avg": [ 0, 0 ], "technical_novelty_avg": [ 0, 0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 0, 0 ], "wc_strength_and_weaknesses_avg": [ 0, 0 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 0, 0 ], "wc_summary_review_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 1, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0, "corr_recommendation_correctness": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:rkLkch6FvYwJ:scholar.google.com/&scioq=Multi-agent+Linear+Contextual+Bandits+with+Bounded+O(1)+Regret&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "Ms1Zs8s7rg", "title": "Demystifying Approximate RL with $\\epsilon$-greedy Exploration: A Differential Inclusion View", "track": "main", "status": "Reject", "tldr": "We provide the first framework for analyzing value-based RL methods with function approximation and $\\epsilon$-greedy exploration, answering a long standing open question.", "abstract": "Q-learning and SARSA(0) with $\\epsilon$-greedy exploration are leading reinforcement learning methods, and their tabular forms converge to the optimal Q-function under reasonable conditions. However, with function approximation, they exhibit unexpected behaviors, such as i.) policy oscillation and chattering, and ii.) convergence to different attractors (possibly even the worst policy) on different runs, ii.) multiple attractors, and iii.) worst policy convergence, apart from the textbook instability. Accordingly, a theory to explain these phenomena has been a long-standing open problem, even for basic linear function approximation (Sutton, 1999). Our work uses differential inclusion theory to provide the first framework for resolving this problem. We further illustrate via numerical examples how this framework helps explain these algorithms' asymptotic behaviors.", "keywords": "differential inclusion;epsilon-greedy exploration;function approximation;value-based RL;Q-learning;SARSA;policy oscillation;chattering;discontinuous policies;stability", "primary_area": "", "supplementary_material": "/attachment/dc8468b8e2a0f83ffae005ff519ac563ec265271.zip", "author": "Aditya Gopalan;Gugan Thoppe", "authorids": "~Aditya_Gopalan1;~Gugan_Thoppe1", "gender": "M;M", "homepage": "https://ece.iisc.ac.in/~aditya/;", "dblp": "90/9826;117/3710", "google_scholar": "dM5_1NsAAAAJ;https://scholar.google.co.in/citations?user=X5zV3s8AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Aditya_Gopalan1;~Gugan_Thoppe1", "aff": "Indian Institute of Science;Indian Institute of Science", "aff_domain": "iisc.ac.in;iisc.ac.in", "position": "Associate Professor;Assistant Professor", "bibtex": "@misc{\ngopalan2023demystifying,\ntitle={Demystifying Approximate {RL} with \\${\\textbackslash}epsilon\\$-greedy Exploration: A Differential Inclusion View},\nauthor={Aditya Gopalan and Gugan Thoppe},\nyear={2023},\nurl={https://openreview.net/forum?id=Ms1Zs8s7rg}\n}", "github": "", "project": "", "reviewers": "ieqE;A5T2;JuQi;svEP", "site": "https://openreview.net/forum?id=Ms1Zs8s7rg", "pdf_size": 1093784, "recommendation": "5;5;5;8", "confidence": "3;3;2;2", "correctness": "3;3;3;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "2;0;2;4", "wc_summary_paper": "47;73;40;76", "wc_strength_and_weaknesses": "135;143;126;31", "wc_clarity_quality_novelty_and_reproducibility": "91;173;31;71", "wc_summary_review": "501;57;28;51", "wc_review": "774;446;225;229", "wc_reply_reviewers": "0;0;45;0", "wc_reply_authors": "1114;533;1319;22", "reply_reviewers": "0;0;1;0", "reply_authors": "3;2;3;1", "recommendation_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 2.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 59.0, 15.732132722552274 ], "wc_strength_and_weaknesses_avg": [ 108.75, 45.29003753586433 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 91.5, 51.7759596724194 ], "wc_summary_review_avg": [ 159.25, 197.60614236404697 ], "wc_review_avg": [ 418.5, 223.8799901733069 ], "wc_reply_reviewers_avg": [ 11.25, 19.48557158514987 ], "wc_reply_authors_avg": [ 747.0, 508.25534920943034 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SCjhLzE3jQQJ:scholar.google.com/&scioq=Demystifying+Approximate+RL+with+%24%5Cepsilon%24-greedy+Exploration:+A+Differential+Inclusion+View&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Indian Institute of Science", "aff_unique_dep": "", "aff_unique_url": "https://www.iisc.ac.in", "aff_unique_abbr": "IISc", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "India" }, { "id": "Ms4S3XC3vtW", "title": "Concealing Sensitive Samples for Enhanced Privacy in Federated Learning", "track": "main", "status": "Withdraw", "tldr": "A method for improving privacy in federated learning by obfuscating sensitive data with adaptively synthesized concealed samples.", "abstract": "Federated Learning (FL) is a distributed learning paradigm that promises to protect users\u2019 privacy by not requiring the clients to share their raw and private data with the server. Despite the success, recent studies reveal the vulnerability of FL to model inversion attacks by showing that they can reconstruct users\u2019 private data via eavesdropping on the shared gradient information. Most existing defence methods to preserve privacy in FL are formulated to protect all data samples equally, which in turn proven brittle against attacks and compromising the FL performance. In this paper, we argue that data containing sensitive information should take precedence. We present a simple, yet effective defence strategy that obfuscates the gradients of the sensitive data with concealed samples. In doing so, we propose to synthesize concealed samples to simulate the sensitive data at the gradient level. Furthermore, we employ a gradient projection technique to obscure sensitive data without compromising the quality of the shared gradients, hence enabling FL to retain its performance. Compared to the previous art, our empirical evaluations suggest that the proposed technique provides the strongest protection while simultaneously maintaining the FL performance. We also provide examples of how the proposed method can be combined with other defences to boost the privacy-performance trade-off even further.", "keywords": "Privacy preserving;Model inversion attack;Federated Learning", "primary_area": "", "supplementary_material": "/attachment/dee4ec5f94f29604ac033f3507b26adc8a0b7faa.zip", "author": "Jing Wu;Munawar Hayat;Mingyi Zhou;Mehrtash Harandi", "authorids": "~Jing_Wu6;~Munawar_Hayat2;~Mingyi_Zhou2;~Mehrtash_Harandi2", "gender": "F;;M;", "homepage": "https://jingwu321.github.io/;;https://zhoumingyi.github.io/;", "dblp": "88/3604-21.html;;238/4774;", "google_scholar": "https://scholar.google.com.au/citations?user=wRWAmm4AAAAJ;;2emq9AoAAAAJ;", "orcid": "0009-0004-7049-5480;;0000-0003-3514-0372;", "linkedin": ";;mingyi-zhou-920417208/;", "or_profile": "~Jing_Wu6;~Munawar_Hayat2;~Mingyi_Zhou2;~Mehrtash_Harandi2", "aff": "Monash University;;Monash University;", "aff_domain": "monash.edu;;monash.edu;", "position": "PhD student;;PhD student;", "bibtex": "@misc{\nwu2023concealing,\ntitle={Concealing Sensitive Samples for Enhanced Privacy in Federated Learning},\nauthor={Jing Wu and Munawar Hayat and Mingyi Zhou and Mehrtash Harandi},\nyear={2023},\nurl={https://openreview.net/forum?id=Ms4S3XC3vtW}\n}", "github": "", "project": "", "reviewers": "evNy;3x3J;456T;ve34", "site": "https://openreview.net/forum?id=Ms4S3XC3vtW", "pdf_size": 865360, "recommendation": "3;5;5;8", "confidence": "4;3;4;4", "correctness": "2;3;2;4", "technical_novelty": "2;3;2;4", "empirical_novelty": "2;3;2;0", "wc_summary_paper": "112;103;53;69", "wc_strength_and_weaknesses": "494;322;35;93", "wc_clarity_quality_novelty_and_reproducibility": "84;95;11;93", "wc_summary_review": "20;41;215;16", "wc_review": "710;561;314;271", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 84.25, 24.138920854089562 ], "wc_strength_and_weaknesses_avg": [ 236.0, 183.58240656446358 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.75, 34.744603897583865 ], "wc_summary_review_avg": [ 73.0, 82.5318120484459 ], "wc_review_avg": [ 464.0, 180.05138155537713 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.08084520834544431, "corr_recommendation_correctness": 0.8866206949335731, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kdTDECje1yUJ:scholar.google.com/&scioq=Concealing+Sensitive+Samples+for+Enhanced+Privacy+in+Federated+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Monash University", "aff_unique_dep": "", "aff_unique_url": "https://www.monash.edu", "aff_unique_abbr": "Monash", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Australia" }, { "id": "MsjB2ohCJO1", "title": "How to Do a Vocab Swap? A Study of Embedding Replacement for Pre-trained Transformers", "track": "main", "status": "Withdraw", "tldr": "We investigate strategies for swapping the vocabularies of transformer encoders using smart initializations.", "abstract": "There are a wide range of different tokenizers and vocabularies that have been used to train language models, and training a language model on just one of these can be prohibitively expensive. The ability to swap the vocabulary of a model after it has been trained enables models to be adapted to different tokenizers, and even different languages, without the computational or data cost of from-scratch training. In this paper, we ask when such swaps are possible, and how to perform them effectively? The major challenge of performing a vocab swap is re-learning the parameters of the embedding layer for the vocabulary. We observe that it is possible to re-learn the embedding for a vocabulary using a naive initialization, and we investigate strong initialization strategies that enable learning of new embeddings for swapped vocabularies, even when those vocabularies come from a different source language than the original language model.", "keywords": "transfer learning;transformers;language models", "primary_area": "", "supplementary_material": "", "author": "Neel Jain;John Kirchenbauer;Jonas Geiping;Tom Goldstein", "authorids": "~Neel_Jain1;~John_Kirchenbauer1;~Jonas_Geiping1;~Tom_Goldstein1", "gender": ";M;M;M", "homepage": ";https://jwkirchenbauer.notion.site/;https://jonasgeiping.github.io/;https://www.cs.umd.edu/~tomg/", "dblp": ";321/0678;190/7229;25/8184", "google_scholar": "https://scholar.google.com/citations?hl=en;48GJrbsAAAAJ;https://scholar.google.de/citations?user=206vNCEAAAAJ;KmSuVtgAAAAJ", "orcid": ";;;", "linkedin": "neel-jain-0a6a239/;johnkirchenbauer/;;", "or_profile": "~Neel_Jain1;~John_Kirchenbauer1;~Jonas_Geiping1;~Tom_Goldstein1", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;umd.edu;umd.edu", "position": "PhD student;PhD student;Postdoc;Full Professor", "bibtex": "@misc{\njain2023how,\ntitle={How to Do a Vocab Swap? A Study of Embedding Replacement for Pre-trained Transformers},\nauthor={Neel Jain and John Kirchenbauer and Jonas Geiping and Tom Goldstein},\nyear={2023},\nurl={https://openreview.net/forum?id=MsjB2ohCJO1}\n}", "github": "", "project": "", "reviewers": "myJw;TbWT;U9gg;aFYD", "site": "https://openreview.net/forum?id=MsjB2ohCJO1", "pdf_size": 750164, "recommendation": "3;3;3;6", "confidence": "3;5;4;4", "correctness": "2;3;2;3", "technical_novelty": "2;1;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "55;82;81;189", "wc_strength_and_weaknesses": "118;130;154;408", "wc_clarity_quality_novelty_and_reproducibility": "78;533;253;149", "wc_summary_review": "76;72;53;41", "wc_review": "327;817;541;787", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "214;314;536;207", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 101.75, 51.52365961381237 ], "wc_strength_and_weaknesses_avg": [ 202.5, 119.35137200719562 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 253.25, 173.09011381358556 ], "wc_summary_review_avg": [ 60.5, 14.221462653327892 ], "wc_review_avg": [ 618.0, 199.23102168086174 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 317.75, 132.9254960494788 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=31050797906834442&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "MtGmCCPJD-", "title": "Repository-Level Prompt Generation for Large Language Models of Code", "track": "main", "status": "Reject", "tldr": "", "abstract": "With the success of large language models (LLMs) of code and their use as code assistants (e.g.\\ Codex used in GitHub Copilot, techniques for introducing domain-specific knowledge in the prompt design process become important. In this work, we propose a framework called Repo-Level Prompt Generator that learns to generate example-specific prompts using prompt proposals. The prompt proposals take context from the entire repository, thereby incorporating both the structure of the repository and the context from other relevant files (e.g.\\ imports, parent class files). Our technique doesn't require any access to the weights of the LLM, making it applicable in cases where we only have black-box access to the LLM. We conduct experiments on the task of single-line code-autocompletion using code repositories taken from Google Code archives. We demonstrate that an oracle constructed from our prompt proposals gives a remarkably high relative improvement of 36\\% over Codex, showing the quality of these proposals. Further, we show that when we train a model to select the best prompt proposal, we can achieve significant performance gains over Codex and other baselines.", "keywords": "prompt generation;codex;large language models of code;code-autocompletion;source code;LLM;retrieval", "primary_area": "", "supplementary_material": "/attachment/8ddd555978a0e2525836555b76dcc440a3152623.zip", "author": "Disha Shrivastava;Hugo Larochelle;Daniel Tarlow", "authorids": "~Disha_Shrivastava1;~Hugo_Larochelle1;~Daniel_Tarlow1", "gender": "F;M;", "homepage": "https://shrivastavadisha.github.io/;https://mila.quebec/en/directory/hugo-larochelle;", "dblp": "203/9100;86/3862.html;", "google_scholar": "https://scholar.google.co.in/citations?user=7R8dnlUAAAAJ;https://scholar.google.ca/citations?user=U89FHq4AAAAJ;", "orcid": ";;", "linkedin": "disha-shrivastava-8398a212/;;", "or_profile": "~Disha_Shrivastava1;~Hugo_Larochelle1;~Daniel_Tarlow1", "aff": "Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal;Google;", "aff_domain": "mila.umontreal.ca;google.com;", "position": "PhD student;Research Scientist;", "bibtex": "@misc{\nshrivastava2023repositorylevel,\ntitle={Repository-Level Prompt Generation for Large Language Models of Code},\nauthor={Disha Shrivastava and Hugo Larochelle and Daniel Tarlow},\nyear={2023},\nurl={https://openreview.net/forum?id=MtGmCCPJD-}\n}", "github": "", "project": "", "reviewers": "xrrm;oQyE;VAEL;9is8", "site": "https://openreview.net/forum?id=MtGmCCPJD-", "pdf_size": 509561, "recommendation": "3;5;6;8", "confidence": "4;4;3;3", "correctness": "2;4;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "198;62;56;29", "wc_strength_and_weaknesses": "232;228;52;101", "wc_clarity_quality_novelty_and_reproducibility": "46;26;24;54", "wc_summary_review": "64;37;234;38", "wc_review": "540;353;366;222", "wc_reply_reviewers": "0;0;0;8", "wc_reply_authors": "894;294;805;206", "reply_reviewers": "0;0;0;1", "reply_authors": "2;1;2;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 86.25, 65.70530800475711 ], "wc_strength_and_weaknesses_avg": [ 153.25, 78.69363061900245 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.5, 12.835497652993435 ], "wc_summary_review_avg": [ 93.25, 81.97979934105719 ], "wc_review_avg": [ 370.25, 113.03622207062655 ], "wc_reply_reviewers_avg": [ 2.0, 3.4641016151377544 ], "wc_reply_authors_avg": [ 549.75, 302.99865923795767 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.8320502943378437, "corr_recommendation_correctness": 0.39223227027636803, "gs_citation": 136, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12758269585405420635&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1", "aff_unique_norm": "University of Montreal;Google", "aff_unique_dep": "Montreal Institute for Learning Algorithms;Google", "aff_unique_url": "https://www.umontreal.ca;https://www.google.com", "aff_unique_abbr": "UM;Google", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Montreal;Mountain View", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;United States" }, { "id": "MuWgF-FVzON", "title": "The Impact of Approximation Errors on Warm-Start Reinforcement Learning: A Finite-time Analysis", "track": "main", "status": "Reject", "tldr": "", "abstract": "Warm-Start reinforcement learning (RL), aided by a prior policy obtained from offline training, is emerging as a promising RL approach for practical applications. Recent empirical studies have demonstrated that the performance of Warm-Start RL can be improved \\textit{quickly} in some cases but become \\textit{stagnant} in other cases, calling for a fundamental understanding, especially when the function approximation is used. To fill this void, we take a finite time analysis approach to quantify the impact of approximation errors on the learning performance of Warm-Start RL. Specifically, we consider the widely used Actor-Critic (A-C) method with a prior policy. We first quantify the approximation errors in the Actor update and the Critic update, respectively. Next, we cast the Warm-Start A-C algorithm as Newton's method with perturbation, and study the impact of the approximation errors on the finite-time learning performance with inaccurate Actor/Critic updates. Under some general technical conditions, we obtain lower bounds on the sub-optimality gap of the Warm-Start A-C algorithm to quantify the impact of the bias and error propagation. We also derive the upper bounds, which provide insights on achieving the desired finite-learning performance in the Warm-Start A-C algorithm.", "keywords": "Reinforcement Learning;Finite-time Analysis;Approximation Error;Warm Start", "primary_area": "", "supplementary_material": "/attachment/24cacce1f122895b17bc9f186b8f76612e183266.zip", "author": "Hang Wang;Sen Lin;Junshan Zhang", "authorids": "~Hang_Wang2;~Sen_Lin1;~Junshan_Zhang1", "gender": "M;;M", "homepage": "https://ustcmike.github.io/;https://slin70.github.io/;https://faculty.engineering.ucdavis.edu/jzhang/", "dblp": ";70/9499-1.html;59/1232.html", "google_scholar": "Xdb3u_q3RKwC;94-TbUsAAAAJ;UtAdFs8AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Hang_Wang2;~Sen_Lin1;~Junshan_Zhang1", "aff": "University of California, Davis;Ohio State University, Columbus;University of California, Davis", "aff_domain": "ucdavis.edu;osu.edu;ucdavis.edu", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@misc{\nwang2023the,\ntitle={The Impact of Approximation Errors on Warm-Start Reinforcement Learning: A Finite-time Analysis},\nauthor={Hang Wang and Sen Lin and Junshan Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=MuWgF-FVzON}\n}", "github": "", "project": "", "reviewers": "7KE9;uKPa;gGbf;528u", "site": "https://openreview.net/forum?id=MuWgF-FVzON", "pdf_size": 5826280, "recommendation": "3;5;6;6", "confidence": "4;2;2;2", "correctness": "2;4;4;3", "technical_novelty": "2;3;3;2", "empirical_novelty": "0;0;0;2", "wc_summary_paper": "65;115;30;82", "wc_strength_and_weaknesses": "508;444;119;140", "wc_clarity_quality_novelty_and_reproducibility": "23;69;27;26", "wc_summary_review": "98;105;240;32", "wc_review": "694;733;416;280", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 0.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 73.0, 30.651264247988205 ], "wc_strength_and_weaknesses_avg": [ 302.75, 174.8790653566058 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.25, 18.965429075030176 ], "wc_summary_review_avg": [ 118.75, 75.57570707575286 ], "wc_review_avg": [ 530.75, 189.47212855721023 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9428090415820632, "corr_recommendation_correctness": 0.7385489458759963, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Hycgs-yA3kUJ:scholar.google.com/&scioq=The+Impact+of+Approximation+Errors+on+Warm-Start+Reinforcement+Learning:+A+Finite-time+Analysis&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Davis;Ohio State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucdavis.edu;https://www.osu.edu", "aff_unique_abbr": "UC Davis;OSU", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Davis;Columbus", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "MuoduaZpQxE", "title": "MetaP: How to Transfer Your Knowledge on Learning Hidden Physics", "track": "main", "status": "Reject", "tldr": "Meta-learning method to transfer hidden physics", "abstract": "Gradient-based meta-learning methods have primarily focused on classical machine learning tasks such as image classification and function regression, where they were found to perform well by recovering the underlying common representation among a set of given tasks. Recently, PDE-solving deep learning methods, such as neural operators, are starting to make an important impact on learning and predicting the response of a complex physical system directly from observational data. Since the data acquisition in this context is commonly challenging and costly, the call of utilization and transfer of existing knowledge to new and unseen physical systems is even more acute.\n\nHerein, we propose a novel meta-learnt approach for transfer-learning knowledge between neural operators, which can be seen as transferring the knowledge of solution operators between governing (unknown) PDEs with varying parameter fields. With the key theoretical observation that the underlying parameter field can be captured in the first layer of the neural operator model, in contrast to typical final-layer transfer in existing meta-learning methods, our approach is a provably universal solution operator for multiple PDE solving tasks. As applications, we demonstrate the efficacy of our proposed approach on heterogeneous material modeling tasks, which shows that our method can handle complex and nonlinear physical response learning tasks while greatly improving the sampling efficiency in new and unseen materials.", "keywords": "meta-learning;neural operator;parametric PDEs", "primary_area": "", "supplementary_material": "/attachment/b5a3eea6d6afa76f0372ab306b1a4a5550b0fa79.zip", "author": "Lu Zhang;Huaiqian You;Tian Gao;Mo Yu;Chung-Hao Lee;Yue Yu", "authorids": "luz319@lehigh.edu;~Huaiqian_You1;~Tian_Gao1;~Mo_Yu1;ch.lee@ou.edu;~Yue_Yu3", "gender": ";M;;M;;", "homepage": ";;https://sites.google.com/view/tiangao/home;http://researcher.ibm.com/researcher/view.php?person=us-yum;;", "dblp": ";;;32/7445.html;;", "google_scholar": ";;5rweipAAAAAJ;vC8DssQAAAAJ;;", "orcid": ";0000-0002-8273-5742;0000-0002-0337-6682;;;", "linkedin": ";;;;;", "or_profile": "luz319@lehigh.edu;~Huaiqian_You1;~Tian_Gao1;~Mo_Yu1;ch.lee@ou.edu;~Yue_Yu3", "aff": ";;Rensselaer Polytechnic Institute;WeChat AI, Tencent;;", "aff_domain": ";;rpi.edu;tencent.com;;", "position": ";;PhD student;Principal Researcher;;", "bibtex": "@misc{\nzhang2023metap,\ntitle={MetaP: How to Transfer Your Knowledge on Learning Hidden Physics},\nauthor={Lu Zhang and Huaiqian You and Tian Gao and Mo Yu and Chung-Hao Lee and Yue Yu},\nyear={2023},\nurl={https://openreview.net/forum?id=MuoduaZpQxE}\n}", "github": "", "project": "", "reviewers": "evBd;GCvR;dEyR;koC6", "site": "https://openreview.net/forum?id=MuoduaZpQxE", "pdf_size": 1851532, "recommendation": "5;5;5;6", "confidence": "2;3;4;3", "correctness": "3;3;3;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "76;62;104;54", "wc_strength_and_weaknesses": "95;598;567;200", "wc_clarity_quality_novelty_and_reproducibility": "383;31;103;34", "wc_summary_review": "81;79;59;31", "wc_review": "635;770;833;319", "wc_reply_reviewers": "0;0;352;26", "wc_reply_authors": "201;510;1518;752", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;3;2", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 74.0, 19.026297590440446 ], "wc_strength_and_weaknesses_avg": [ 365.0, 220.91740538038192 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 137.75, 144.49459332445628 ], "wc_summary_review_avg": [ 62.5, 20.11839953873071 ], "wc_review_avg": [ 639.25, 198.2503152582613 ], "wc_reply_reviewers_avg": [ 94.5, 149.04613379755946 ], "wc_reply_authors_avg": [ 745.25, 487.01610599650604 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9882939074432564636&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff_unique_index": "0;1", "aff_unique_norm": "Rensselaer Polytechnic Institute;Tencent", "aff_unique_dep": ";WeChat AI", "aff_unique_url": "https://www.rpi.edu;https://www.tencent.com", "aff_unique_abbr": "RPI;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;China" }, { "title": "A Laplace-inspired Distribution on SO(3) for Probabilistic Rotation Estimation", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11894", "id": "Mvetq8DO05O", "poster": "/media/PosterPDFs/ICLR%202023/11894.png?t=1682824634.18222", "openreview": "https://openreview.net/forum?id=Mvetq8DO05O", "slides": "https://iclr.cc/virtual/2023/poster/11894", "video": "https://iclr.cc/virtual/2023/poster/11894", "author_site": "Yingda Yin, Yang Wang, He Wang, Baoquan Chen", "tldr": "", "abstract": "Estimating the 3DoF rotation from a single RGB image is an important yet challenging problem. Probabilistic rotation regression has raised more and more attention with the benefit of expressing uncertainty information along with the prediction. Though modeling noise using Gaussian-resembling Bingham distribution and matrix Fisher distribution is natural, they are shown to be sensitive to outliers for the nature of quadratic punishment to deviations. In this paper, we draw inspiration from multivariate Laplace distribution and propose a novel Rotation Laplace distribution on SO(3). Rotation Laplace distribution is robust to the disturbance of outliers and enforces much gradient to the low-error region, resulting in a better convergence. Our extensive experiments show that our proposed distribution achieves state-of-the-art performance for rotation regression tasks over both probabilistic and non-probabilistic baselines. Our project page is at pku-epic.github.io/RotationLaplace.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yingda Yin;Yang Wang;He Wang;Baoquan Chen", "authorids": "~Yingda_Yin1;~Yang_Wang34;~He_Wang5;~Baoquan_Chen1", "gender": "M;M;M;M", "homepage": "https://yd-yin.github.io/;https://github.com/2000010767;https://hughw19.github.io;https://baoquanchen.info", "dblp": "255/4832;;01/6368-10;23/4197", "google_scholar": ";;roCAWkoAAAAJ;iHWtrEAAAAAJ", "orcid": ";;;", "linkedin": ";;;baoquan/", "or_profile": "~Yingda_Yin1;~Yang_Wang34;~He_Wang5;~Baoquan_Chen1", "aff": "Peking University;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;Undergrad student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nyin2023a,\ntitle={A Laplace-inspired Distribution on {SO}(3) for Probabilistic Rotation Estimation},\nauthor={Yingda Yin and Yang Wang and He Wang and Baoquan Chen},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Mvetq8DO05O}\n}", "github": "", "project": "", "reviewers": "z3mk;biJ1;eVhd", "pdf_size": 11256262, "recommendation": "6;8;8", "confidence": "4;4;5", "correctness": "2;4;3", "technical_novelty": "4;3;4", "empirical_novelty": "3;4;4", "wc_summary_paper": "69;200;58", "wc_strength_and_weaknesses": "82;155;134", "wc_clarity_quality_novelty_and_reproducibility": "219;161;24", "wc_summary_review": "22;26;109", "wc_review": "392;542;325", "wc_reply_reviewers": "0;338;0", "wc_reply_authors": "1165;1034;649", "reply_reviewers": "0;2;0", "reply_authors": "4;3;1", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 109.0, 64.50322989329035 ], "wc_strength_and_weaknesses_avg": [ 123.66666666666667, 30.684777260973487 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 134.66666666666666, 81.75709279458403 ], "wc_summary_review_avg": [ 52.333333333333336, 40.102646075068684 ], "wc_review_avg": [ 419.6666666666667, 90.72424642227065 ], "wc_reply_reviewers_avg": [ 112.66666666666667, 159.3347280273687 ], "wc_reply_authors_avg": [ 949.3333333333334, 218.99822424444957 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.8660254037844385, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2117213795046271985&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=Mvetq8DO05O", "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "Mwpw3weZrK8", "title": "GAIN: Enhancing Byzantine Robustness in Federated Learning with Gradient Decomposition", "track": "main", "status": "Reject", "tldr": "", "abstract": "Federated learning provides a privacy-aware learning framework by enabling participants to jointly train models without exposing their private data. However, federated learning has exhibited vulnerabilities to Byzantine attacks, where the adversary aims to destroy the convergence and performance of the global model. Meanwhile, we observe that most existing robust AGgregation Rules (AGRs) fail to stop the aggregated gradient deviating from the optimal gradient (the average of honest gradients) in the non-IID setting. We attribute the reason of the failure of these AGRs to two newly proposed concepts: identification failure and integrity failure. The identification failure mainly comes from the exacerbated curse of dimensionality in the non-IID setting. The integrity failure is a combined result of conservative filtering strategy and gradient heterogeneity. In order to address both failures, we propose GAIN, a gradient decomposition scheme that can help adapt existing robust algorithms to heterogeneous datasets. We theoretically show that integrating exisiting robust AGRs into our GAIN can mitigate the deviation of aggregated gradient, thus improve the performance. Experiments on various real-world datasets verify the efficacy of our proposed GAIN", "keywords": "Federated Learning;Byzantine Robustness.", "primary_area": "", "supplementary_material": "/attachment/e731973b4b8cbda89ba46adb72fa0d5123bd629c.zip", "author": "Yuchen Liu;Chen Chen;Lingjuan Lyu;Fangzhao Wu;Tianlei Hu;Sai Wu;Gang Chen", "authorids": "~Yuchen_Liu8;~Chen_Chen20;~Lingjuan_Lyu1;~Fangzhao_Wu1;~Tianlei_Hu1;~Sai_Wu2;~Gang_Chen6", "gender": ";M;F;;M;M;M", "homepage": ";https://cc233.github.io/;https://sites.google.com/view/lingjuan-lyu;;;https://person.zju.edu.cn/0011057;", "dblp": ";65/4423-43;178/9876;;02/3803;30/1186.html;67/6383-1", "google_scholar": ";;;;;RMaqDKAAAAAJ;", "orcid": "0000-0002-3629-128X;0000-0001-7359-8515;;;;;0000-0002-7483-0045", "linkedin": ";;;;;;", "or_profile": "~Yuchen_Liu8;~Chen_Chen20;~Lingjuan_Lyu1;~Fangzhao_Wu1;~Tianlei_Hu1;~Sai_Wu2;~Gang_Chen6", "aff": "Zhejiang University;Zhejiang University;Sony;;Zhejiang University;Zhejiang University;College of Computer Science and Technology, Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;sony.com;;zju.edu.cn;zju.edu.cn;cs.zju.edu.cn", "position": "PhD student;PhD student;scientist;;Associate Professor;Full Professor;Full Professor", "bibtex": "@misc{\nliu2023gain,\ntitle={{GAIN}: Enhancing Byzantine Robustness in Federated Learning with Gradient Decomposition},\nauthor={Yuchen Liu and Chen Chen and Lingjuan Lyu and Fangzhao Wu and Tianlei Hu and Sai Wu and Gang Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=Mwpw3weZrK8}\n}", "github": "", "project": "", "reviewers": "Bimn;Fwja;TG9e;bm6A", "site": "https://openreview.net/forum?id=Mwpw3weZrK8", "pdf_size": 516241, "recommendation": "3;6;8;8", "confidence": "5;4;4;4", "correctness": "3;4;4;4", "technical_novelty": "1;4;3;4", "empirical_novelty": "1;4;3;3", "wc_summary_paper": "76;90;92;79", "wc_strength_and_weaknesses": "37;145;158;167", "wc_clarity_quality_novelty_and_reproducibility": "5;19;22;101", "wc_summary_review": "9;52;30;67", "wc_review": "127;306;302;414", "wc_reply_reviewers": "60;17;24;0", "wc_reply_authors": "852;709;303;309", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 1.224744871391589 ], "empirical_novelty_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 84.25, 6.869315832017043 ], "wc_strength_and_weaknesses_avg": [ 126.75, 52.40407903970835 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.75, 37.64555086593899 ], "wc_summary_review_avg": [ 39.5, 21.982947936980608 ], "wc_review_avg": [ 287.25, 102.85274668184609 ], "wc_reply_reviewers_avg": [ 25.25, 21.878928218722233 ], "wc_reply_authors_avg": [ 543.25, 242.58645366136997 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.9169493006161777, "corr_recommendation_correctness": 0.9169493006161777, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11992335526276696807&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Zhejiang University;Sony Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.sony.com", "aff_unique_abbr": "ZJU;Sony", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;Japan" }, { "title": "On The Inadequacy of Optimizing Alignment and Uniformity in Contrastive Learning of Sentence Representations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12185", "id": "MxvHVNukama", "poster": "/media/PosterPDFs/ICLR%202023/12185.png?t=1682166548.729861", "openreview": "https://openreview.net/forum?id=MxvHVNukama", "slides": "https://iclr.cc/virtual/2023/poster/12185", "video": "https://iclr.cc/virtual/2023/poster/12185", "author_site": "Zhijie Nie, Richong Zhang, Yongyi Mao", "tldr": "", "abstract": "Contrastive learning is widely used in areas such as visual representation learning (VRL) and sentence representation learning (SRL). Considering the differences between VRL and SRL in terms of negative sample size and evaluation focus, we believe that the solid findings obtained in VRL may not be entirely carried over to SRL. In this work, we consider the suitability of the decoupled form of contrastive loss, i.e., alignment and uniformity, in SRL. We find a performance gap between sentence representations obtained by jointly optimizing alignment and uniformity on the STS task and those obtained using contrastive loss. Further, we find that the joint optimization of alignment and uniformity during training is prone to overfitting, which does not occur on the contrastive loss. Analyzing them based on the variation of the gradient norms, we find that there is a property of ``gradient dissipation'' in contrastive loss and believe that it is the key to preventing overfitting. We simulate similar \"gradient dissipation\" of contrastive loss on four optimization objectives of two forms, and achieve the same or even better performance than contrastive loss on the STS tasks, confirming our hypothesis.", "keywords": "Sentence representation learning;Contrastive learning;Alignment;Uniformity", "primary_area": "", "supplementary_material": "", "author": "Zhijie Nie;Richong Zhang;Yongyi Mao", "authorids": "~Zhijie_Nie1;~Richong_Zhang1;~Yongyi_Mao2", "gender": "M;M;M", "homepage": "https://arthurizijar.github.io/;http://act.buaa.edu.cn/zhangrc;http://www.eecs.uottawa.ca/~yymao", "dblp": "73/873;61/1229;86/2933", "google_scholar": "eIrizC8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.ca/citations?user=jM5l70wAAAAJ", "orcid": "0009-0004-3933-0522;0000-0002-1207-0300;0000-0001-5298-5778", "linkedin": ";;", "or_profile": "~Zhijie_Nie1;~Richong_Zhang1;~Yongyi_Mao1", "aff": "Beihang University;Beihang University;University of Ottawa", "aff_domain": "act.buaa.edu.cn;buaa.edu.cn;eecs.uottawa.ca", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nnie2023on,\ntitle={On The Inadequacy of Optimizing Alignment and Uniformity in Contrastive Learning of Sentence Representations},\nauthor={Zhijie Nie and Richong Zhang and Yongyi Mao},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=MxvHVNukama}\n}", "github": "", "project": "", "reviewers": "Rqm3;DG2P;gozg;qhtV", "pdf_size": 1023651, "recommendation": "5;6;6;6", "confidence": "2;3;3;3", "correctness": "3;4;2;3", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;3;3;4", "wc_summary_paper": "80;75;152;173", "wc_strength_and_weaknesses": "228;83;304;249", "wc_clarity_quality_novelty_and_reproducibility": "2;137;33;25", "wc_summary_review": "31;24;66;53", "wc_review": "341;319;555;500", "wc_reply_reviewers": "278;111;108;0", "wc_reply_authors": "534;898;515;656", "reply_reviewers": "1;1;1;0", "reply_authors": "3;3;3;2", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 120.0, 43.17985641476822 ], "wc_strength_and_weaknesses_avg": [ 216.0, 81.64863746566749 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.25, 51.924825469133744 ], "wc_summary_review_avg": [ 43.5, 16.830032679706836 ], "wc_review_avg": [ 428.75, 100.94645858077439 ], "wc_reply_reviewers_avg": [ 124.25, 99.39410193769045 ], "wc_reply_authors_avg": [ 650.75, 152.65872886933127 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16497669497212104003&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=MxvHVNukama", "email": "act.buaa.edu.cn;buaa.edu.cn;eecs.uottawa.ca", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Beihang University;University of Ottawa", "aff_unique_dep": ";", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.uottawa.ca", "aff_unique_abbr": "BUAA;U Ottawa", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;Canada" }, { "title": "Bort: Towards Explainable Neural Networks with Bounded Orthogonal Constraint", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12080", "id": "My57qBufZWs", "poster": "/media/PosterPDFs/ICLR%202023/12080.png?t=1681226743.9030478", "openreview": "https://openreview.net/forum?id=My57qBufZWs", "slides": "https://iclr.cc/virtual/2023/poster/12080", "video": "https://iclr.cc/virtual/2023/poster/12080", "author_site": "Borui Zhang, Wenzhao Zheng, Jie Zhou, Jiwen Lu", "tldr": "We propose an optimizer, Bort, for training explainable neural networks with boundedness and orthogonality constraints.", "abstract": "Deep learning has revolutionized human society, yet the black-box nature of deep neural networks hinders further application to reliability-demanded industries. In the attempt to unpack them, many works observe or impact internal variables to improve the comprehensibility and invertibility of the black-box models. However, existing methods rely on intuitive assumptions and lack mathematical guarantees. To bridge this gap, we introduce Bort, an optimizer for improving model explainability with boundedness and orthogonality constraints on model parameters, derived from the sufficient conditions of model comprehensibility and invertibility. We perform reconstruction and backtracking on the model representations optimized by Bort and observe a clear improvement in model explainability. Based on Bort, we are able to synthesize explainable adversarial samples without additional parameters and training. Surprisingly, we find Bort constantly improves the classification accuracy of various architectures including ResNet and DeiT on MNIST, CIFAR-10, and ImageNet. Code: https://github.com/zbr17/Bort.", "keywords": "Neural network;explainable AI;optimizer.", "primary_area": "", "supplementary_material": "/attachment/2e9e6aa1a26d98b368ddec31ca1dd559caea09ec.zip", "author": "Borui Zhang;Wenzhao Zheng;Jie Zhou;Jiwen Lu", "authorids": "~Borui_Zhang1;~Wenzhao_Zheng1;~Jie_Zhou3;~Jiwen_Lu1", "gender": "M;;M;M", "homepage": "http://boruizhang.site/;https://wzzheng.net;https://www.tsinghua.edu.cn/publish/auen/1713/2011/20110506105532098625469/20110506105532098625469_.html;http://ivg.au.tsinghua.edu.cn/Jiwen_Lu/", "dblp": "230/7918;230/1277;00/5012-1;http://dblp.uni-trier.de/pers/hd/l/Lu:Jiwen", "google_scholar": "MUN3ZNgAAAAJ;LdK9scgAAAAJ;;TN8uDQoAAAAJ", "orcid": "0000-0001-7237-7454;;;0000-0002-6121-5529", "linkedin": "BoruiZhang-THU;;;", "or_profile": "~Borui_Zhang1;~Wenzhao_Zheng1;~Jie_Zhou3;~Jiwen_Lu1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2023bort,\ntitle={Bort: Towards Explainable Neural Networks with Bounded Orthogonal Constraint},\nauthor={Borui Zhang and Wenzhao Zheng and Jie Zhou and Jiwen Lu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=My57qBufZWs}\n}", "github": "", "project": "", "reviewers": "dhB8;t9ra;eZA8", "pdf_size": 16329067, "recommendation": "6;6;8", "confidence": "4;4;4", "correctness": "3;3;4", "technical_novelty": "3;3;3", "empirical_novelty": "2;3;2", "wc_summary_paper": "23;97;61", "wc_strength_and_weaknesses": "236;105;266", "wc_clarity_quality_novelty_and_reproducibility": "65;1126;17", "wc_summary_review": "27;19;48", "wc_review": "351;1347;392", "wc_reply_reviewers": "0;208;0", "wc_reply_authors": "638;801;756", "reply_reviewers": "0;1;0", "reply_authors": "2;2;2", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 60.333333333333336, 30.214051182999096 ], "wc_strength_and_weaknesses_avg": [ 202.33333333333334, 69.90628647617386 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 402.6666666666667, 511.8491531257578 ], "wc_summary_review_avg": [ 31.333333333333332, 12.229290885229428 ], "wc_review_avg": [ 696.6666666666666, 460.1596341367731 ], "wc_reply_reviewers_avg": [ 69.33333333333333, 98.05214032453459 ], "wc_reply_authors_avg": [ 731.6666666666666, 68.7329776906415 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7529918036681121246&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=My57qBufZWs", "email": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "MzQEMwIzlL", "title": "Cross-Domain Few-Shot Relation Extraction via Representation Learning and Domain Adaptation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Few-shot relation extraction aims to recognize novel relations with few labeled sentences in each relation. Previous metric-based few-shot relation extraction methods classify by comparing the embeddings of query sentence embedding with those prototypes generated by the few labeled sentences embedding using a learned metric function. However, the generalization ability of these methods on unseen relations in different domains is limited, since these domains always have significant discrepancies from those in the training dataset. Because the prototype is essential for extracting relations between entities in the latent space. To extract new relations in various domains more effectively, we propose to learn more interpretable and robust prototypes by learning from prior knowledge and intrinsic semantics of relations. We improve the prototype representation of relations more efficiently by using prior knowledge to explore the connections between relations. The geometric interpretability of the prototype is improved by making the classification margins between sentence embedding clearer through contrastive learning. Besides, for better-extracting relations in different domains, using a cross-domain approach makes the generation process of the prototype take into account the gap between other domains, which makes the prototype more robust. The experimental results on the benchmark FewRel dataset demonstrate the advantages of the proposed method over some state-of-the-art methods.", "keywords": "few-shot;domain adaptation;relation extraction", "primary_area": "", "supplementary_material": "", "author": "Zhongju Yuan", "authorids": "~Zhongju_Yuan2", "gender": "F", "homepage": "https://www.zhongjuyuan.site/", "dblp": "239/3442", "google_scholar": "KuT_a6cAAAAJ", "orcid": "0000-0001-7914-7862", "linkedin": "", "or_profile": "~Zhongju_Yuan2", "aff": "Universiteit Gent", "aff_domain": "ugent.be", "position": "PhD student", "bibtex": "@misc{\nyuan2023crossdomain,\ntitle={Cross-Domain Few-Shot Relation Extraction via Representation Learning and Domain Adaptation},\nauthor={Zhongju Yuan},\nyear={2023},\nurl={https://openreview.net/forum?id=MzQEMwIzlL}\n}", "github": "", "project": "", "reviewers": "4EFD;TwgH;2Uqt;GWaW", "site": "https://openreview.net/forum?id=MzQEMwIzlL", "pdf_size": 352250, "recommendation": "3;5;5;5", "confidence": "2;3;4;3", "correctness": "3;4;2;4", "technical_novelty": "1;2;3;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "131;71;91;104", "wc_strength_and_weaknesses": "97;102;139;301", "wc_clarity_quality_novelty_and_reproducibility": "7;92;178;102", "wc_summary_review": "9;30;93;27", "wc_review": "244;295;501;534", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 99.25, 21.775846711436962 ], "wc_strength_and_weaknesses_avg": [ 159.75, 83.14858687915267 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 94.75, 60.602702084973075 ], "wc_summary_review_avg": [ 39.75, 31.77558024647229 ], "wc_review_avg": [ 393.5, 125.84613621402923 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0.816496580927726, "corr_recommendation_correctness": 0.17407765595569782, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1776895954663499373&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0", "aff_unique_norm": "University of Ghent", "aff_unique_dep": "", "aff_unique_url": "https://www.ugent.be/en", "aff_unique_abbr": "UGent", "aff_country_unique_index": "0", "aff_country_unique": "Belgium" }, { "id": "N-S6pJrlkK", "title": "Improving Language Model Pretraining with Text Structure Information", "track": "main", "status": "Reject", "tldr": "A pretraining task that distinguishes text structure relationships between sentences can improve general-purpose language model pretraining.", "abstract": "Inter-sentence pretraining tasks learn from sentence relationships and facilitate high-level language understanding that cannot be directly learned in word-level pretraining tasks. However, we have found experimentally that existing inter-sentence methods for general-purpose language pretraining improve performance only at a relatively small scale but not at larger scales. For an alternative, we propose Text Structure Prediction (TSP), a more sophisticated inter-sentence task that uses text structure to provide more abundant self-supervised learning signals to pretraining models at larger scales. TSP classifies sentence pairs over six designed text structure relationships and it can be seen as an implicit form of learning high-level language understanding by identifying key concepts and relationships in texts. Experiments show that TSP provides improved performance on language understanding tasks for models at various scales. Our approach thus serves as an initial attempt to demonstrate that the exploitation of text structure can facilitate language understanding.", "keywords": "Language Model Pretraining;Representation Learning", "primary_area": "", "supplementary_material": "/attachment/43a1f59272782c0fff4628eab1eaa3ed22d3440b.zip", "author": "Yi-Siang Wang;Ryohei Sasano;Koichi Takeda", "authorids": "~Yi-Siang_Wang1;~Ryohei_Sasano2;~Koichi_Takeda1", "gender": ";M;", "homepage": "https://richarddwang.github.io/;http://cr.fvcrc.i.nagoya-u.ac.jp/~sasano/index-e.html;https://researchmap.jp/takedasu?lang=en", "dblp": ";17/757;24/4299-3.html", "google_scholar": ";g9mNQ9MAAAAJ;IaZThNIAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yi-Siang_Wang1;~Ryohei_Sasano2;~Koichi_Takeda1", "aff": ";RIKEN;Nagoya University", "aff_domain": ";riken.jp;ac.jp", "position": ";Researcher;Professor", "bibtex": "@misc{\nwang2023improving,\ntitle={Improving Language Model Pretraining with Text Structure Information},\nauthor={Yi-Siang Wang and Ryohei Sasano and Koichi Takeda},\nyear={2023},\nurl={https://openreview.net/forum?id=N-S6pJrlkK}\n}", "github": "", "project": "", "reviewers": "SQUJ;t9DH;VZCD;LMAd", "site": "https://openreview.net/forum?id=N-S6pJrlkK", "pdf_size": 365754, "recommendation": "3;5;6;8", "confidence": "4;4;4;4", "correctness": "2;3;4;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "183;43;72;64", "wc_strength_and_weaknesses": "1005;98;62;269", "wc_clarity_quality_novelty_and_reproducibility": "51;14;22;46", "wc_summary_review": "53;26;20;18", "wc_review": "1292;181;176;397", "wc_reply_reviewers": "40;0;8;0", "wc_reply_authors": "1408;841;213;442", "reply_reviewers": "1;0;1;0", "reply_authors": "3;3;1;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 90.5, 54.44492630172255 ], "wc_strength_and_weaknesses_avg": [ 358.5, 381.3610494006959 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.25, 15.610493265749165 ], "wc_summary_review_avg": [ 29.25, 14.02453207775575 ], "wc_review_avg": [ 511.5, 459.369404727829 ], "wc_reply_reviewers_avg": [ 12.0, 16.492422502470642 ], "wc_reply_authors_avg": [ 726.0, 453.3690549651575 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5883484054145521, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Une2D65sdlQJ:scholar.google.com/&scioq=Improving+Language+Model+Pretraining+with+Text+Structure+Information&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "RIKEN;Nagoya University", "aff_unique_dep": ";", "aff_unique_url": "https://www.riken.jp;https://www.nagoya-u.ac.jp", "aff_unique_abbr": "RIKEN;Nagoya U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "id": "N-eul1pdagX", "title": "Exact Representation of Sparse Networks with Symmetric Nonnegative Embeddings", "track": "main", "status": "Reject", "tldr": "We expand on previous bounds for exact factorization of undirected graphs and extend them to our proposed model, which is more interpretable.", "abstract": "Many models for undirected graphs are based on factorizing the graph's adjacency matrix; these models find a vector representation of each node such that the predicted probability of a link between two nodes increases with the similarity (dot product) of their associated vectors. Recent work has shown that these models are unable to capture key structures in real-world graphs, particularly heterophilous structures, wherein links occur between dissimilar nodes. In contrast, a factorization with two vectors per node, based on logistic principal components analysis (LPCA), has been proven not only to represent such structures, but also to provide exact low-rank factorization of any graph with bounded max degree. However, this bound has limited applicability to real-world networks, which often have power law degree distributions with high max degree. Further, the LPCA model lacks interpretability since its asymmetric factorization does not reflect the undirectedness of the graph. We address the above issues in two ways. First, we prove a new bound for the LPCA model in terms of arboricity rather than max degree; this greatly increases the bound's applicability to many sparse real-world networks. Second, we propose an alternative graph model whose factorization is symmetric and nonnegative, which allows for link predictions to be interpreted in terms of node clusters. We show that the bounds for exact representation in the LPCA model extend to our new model. On the empirical side, our model is optimized effectively on real-world graphs with gradient descent on a cross-entropy loss. We demonstrate its effectiveness on a variety of foundational tasks, such as community detection and link prediction.", "keywords": "graph;network;embeddings;arboricity;factorization;model;community;nonnegative", "primary_area": "", "supplementary_material": "/attachment/5c5bef7cf8b97d84cfe7797caae9e54bce22233d.zip", "author": "Sudhanshu Chanpuriya;Ryan A. Rossi;Anup Rao;Tung Mai;Nedim Lipka;Zhao Song;Cameron N Musco", "authorids": "~Sudhanshu_Chanpuriya1;~Ryan_A._Rossi2;~Anup_Rao1;~Tung_Mai1;~Nedim_Lipka1;~Zhao_Song6;~Cameron_N_Musco1", "gender": ";;;M;;;M", "homepage": ";;;;;;https://people.cs.umass.edu/~cmusco/", "dblp": ";;63/6846;177/8902.html;;;149/2327", "google_scholar": ";;pkwXPU0AAAAJ;eUt8nlIAAAAJ;;;EeYGZCwAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Sudhanshu_Chanpuriya1;~Ryan_A._Rossi2;~Anup_Rao1;~Tung_Mai1;~Nedim_Lipka1;~Zhao_Song6;~Cameron_N_Musco1", "aff": ";;Adobe Systems;Adobe;;;University of Massachusetts, Amherst", "aff_domain": ";;adobe.com;adobe.com;;;umass.edu", "position": ";;Researcher;Research Scientist;;;Assistant Professor", "bibtex": "@misc{\nchanpuriya2023exact,\ntitle={Exact Representation of Sparse Networks with Symmetric Nonnegative Embeddings},\nauthor={Sudhanshu Chanpuriya and Ryan A. Rossi and Anup Rao and Tung Mai and Nedim Lipka and Zhao Song and Cameron N Musco},\nyear={2023},\nurl={https://openreview.net/forum?id=N-eul1pdagX}\n}", "github": "", "project": "", "reviewers": "pv1p;N53t;s4Ez", "site": "https://openreview.net/forum?id=N-eul1pdagX", "pdf_size": 464076, "recommendation": "5;5;6", "confidence": "3;4;4", "correctness": "3;3;4", "technical_novelty": "3;3;3", "empirical_novelty": "2;0;3", "wc_summary_paper": "84;53;109", "wc_strength_and_weaknesses": "290;139;266", "wc_clarity_quality_novelty_and_reproducibility": "74;49;413", "wc_summary_review": "55;94;273", "wc_review": "503;335;1061", "wc_reply_reviewers": "0;46;109", "wc_reply_authors": "285;642;1890", "reply_reviewers": "0;1;2", "reply_authors": "1;2;5", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 82.0, 22.90560339014597 ], "wc_strength_and_weaknesses_avg": [ 231.66666666666666, 66.25372106950337 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 178.66666666666666, 166.0127170496954 ], "wc_summary_review_avg": [ 140.66666666666666, 94.91867864417179 ], "wc_review_avg": [ 633.0, 310.3159680068043 ], "wc_reply_reviewers_avg": [ 51.666666666666664, 44.67910274638718 ], "wc_reply_authors_avg": [ 939.0, 688.0712172442617 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.6666666666666665, 1.699673171197595 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.9999999999999997, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1162379834652537135&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1", "aff_unique_norm": "Adobe;University of Massachusetts Amherst", "aff_unique_dep": "Adobe Systems Incorporated;", "aff_unique_url": "https://www.adobe.com;https://www.umass.edu", "aff_unique_abbr": "Adobe;UMass Amherst", "aff_campus_unique_index": "1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "N3FlFslv_J", "title": "Multi-Hypothesis 3D human pose estimation metrics favor miscalibrated distributions", "track": "main", "status": "Reject", "tldr": "Pose estimation metrics favor overconfident models; we propose cGNF, a model capable of maximizing likelihood and thus estimating accurate and well-calibrated distributions of 3D poses.", "abstract": "Due to depth ambiguities and occlusions, lifting 2D poses to 3D is a highly ill-posed problem. Well-calibrated distributions of possible poses can make these ambiguities explicit and preserve the resulting uncertainty for downstream tasks. This study shows that previous attempts, which account for these ambiguities via multiple hypotheses generation, produce miscalibrated distributions. We identify that miscalibration can be attributed to the use of sample-based metrics such as $\\operatorname{minMPJPE}$. In a series of simulations, we show that minimizing $\\operatorname{minMPJPE}$, as commonly done, should converge to the correct mean prediction. However, it fails to correctly capture the uncertainty, thus resulting in a miscalibrated distribution. To mitigate this problem, we propose an accurate and well-calibrated model called Conditional Graph Normalizing Flow (cGNFs). Our model is structured such that a single cGNF can estimate both conditional and marginal densities within the same model - effectively solving a zero-shot density estimation problem. We evaluate cGNF on the Human 3.6M dataset and show that cGNF provides a well-calibrated distribution estimate while being close to state-of-the-art in terms of overall $\\operatorname{minMPJPE}$. Furthermore, cGNF outperforms previous methods on occluded joints while it remains well-calibrated.", "keywords": "Pose estimation;calibration;metrics;graph neural networks", "primary_area": "", "supplementary_material": "/attachment/61aec38f70ae420b99a2bba9af7fcd2a71ff3cfa.zip", "author": "Pawe\u0142 A. Pierzchlewicz;R. James Cotton;Mohammad Bashiri;Fabian H. Sinz", "authorids": "~Pawe\u0142_A._Pierzchlewicz1;~R._James_Cotton1;~Mohammad_Bashiri3;~Fabian_H._Sinz1", "gender": ";;M;M", "homepage": ";;https://mohammadbashiri.github.io/;https://sinzlab.org", "dblp": ";;229/0971;53/5834", "google_scholar": ";;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com/citations?hl=de", "orcid": ";;;0000-0002-1348-9736", "linkedin": ";;;", "or_profile": "~Pawe\u0142_A._Pierzchlewicz1;~R._James_Cotton1;~Mohammad_Bashiri3;~Fabian_H._Sinz1", "aff": ";;University of Tuebingen;Baylor College of Medicine", "aff_domain": ";;uni-tuebingen.de;bcm.edu", "position": ";;PhD student;Assistant Professor", "bibtex": "@misc{\npierzchlewicz2023multihypothesis,\ntitle={Multi-Hypothesis 3D human pose estimation metrics favor miscalibrated distributions},\nauthor={Pawe{\\l} A. Pierzchlewicz and R. James Cotton and Mohammad Bashiri and Fabian H. Sinz},\nyear={2023},\nurl={https://openreview.net/forum?id=N3FlFslv_J}\n}", "github": "", "project": "", "reviewers": "Sgm5;8uoB;F59S;AasC", "site": "https://openreview.net/forum?id=N3FlFslv_J", "pdf_size": 13637384, "recommendation": "3;3;3;6", "confidence": "3;4;3;4", "correctness": "3;3;3;3", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "113;50;130;67", "wc_strength_and_weaknesses": "124;339;401;686", "wc_clarity_quality_novelty_and_reproducibility": "59;36;60;33", "wc_summary_review": "45;23;68;43", "wc_review": "341;448;659;829", "wc_reply_reviewers": "0;423;421;227", "wc_reply_authors": "349;1265;704;1410", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;2;3", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 90.0, 32.626676202150904 ], "wc_strength_and_weaknesses_avg": [ 387.5, 200.66701273502827 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.0, 12.549900398011133 ], "wc_summary_review_avg": [ 44.75, 15.943258763502524 ], "wc_review_avg": [ 569.25, 188.6297630280015 ], "wc_reply_reviewers_avg": [ 267.75, 173.88124539466583 ], "wc_reply_authors_avg": [ 932.0, 427.5646149998851 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18202189264378668274&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of Tuebingen;Baylor College of Medicine", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.bcm.edu", "aff_unique_abbr": "Uni T\u00fcbingen;BCM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Germany;United States" }, { "id": "N3fc0aKFB-0", "title": "Reward-free Policy Learning through Active Human Involvement", "track": "main", "status": "Withdraw", "tldr": "We propose a reward-free policy learning method called Proxy Value Propagation that conveys human intents explicitly to the learning policy through active human involvement", "abstract": "Despite the success of reinforcement learning (RL) in many control tasks, the behaviors of the learned agents are largely limited by the hand-crafted reward function in the environment, which might not truthfully reflect human intents and preferences. This work proposes a reward-free policy learning method called Proxy Value Propagation that conveys human intents explicitly to the learning policy through active involvement. We adopt an interactive learning setting where human subjects can actively intervene and demonstrate to the agent. Our key insight is that a latent value function can be learned from active human involvement, which in return guides the learning policy to emulate human behaviors. The proposed method first relabels and propagates the proxy values of human demonstrations to other states, and then optimizes the policies to comply with the human intents expressed through the proxy value function. The proposed method can be incorporated into many existing RL algorithms with minimum modifications. Experiments on various tasks and human control devices demonstrate the generality and efficiency of our method. Theoretic guarantee on the learning safety is also provided. Demo video and code are available in the supplementary material. ", "keywords": "Human-in-the-loop Reinforcement Learning;Safety;Sample Efficiency;Reward-free", "primary_area": "", "supplementary_material": "/attachment/1d9674912e86b1184b5800c2c99d66f4aa353a6f.zip", "author": "Zhenghao Peng;Wenjie Mo;Chenda Duan;Quanyi Li;Bolei Zhou", "authorids": "~Zhenghao_Peng1;~Wenjie_Mo1;~Chenda_Duan1;~Quanyi_Li1;~Bolei_Zhou5", "gender": "M;M;M;M;M", "homepage": "https://pengzhenghao.github.io;https://wenjie-mo.github.io/;https://chendaduan.com/;https://quanyili.github.io;https://boleizhou.github.io/", "dblp": "220/3963;344/2097;349/8266;270/7691;46/8066", "google_scholar": "JZ8ws6IAAAAJ;;DooYOyoAAAAJ;Ty49X3UAAAAJ;9D4aG8AAAAAJ", "orcid": ";;0009-0003-8652-3960;;", "linkedin": ";wenjie-mo;chenda-d/;https://www.linkedin.com/mwlite/in/quanyi-li-2b7985183;", "or_profile": "~Zhenghao_Peng1;~Wenjie_Mo1;~Chenda_Duan1;~Quanyi_Li1;~Bolei_Zhou5", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;Shanghai Artificial Intelligence Laboratory;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;ucla.edu;ucla.edu;pjlab.org.cn;ucla.edu", "position": "PhD student;Undergrad student;MS student;Researcher;Assistant Professor", "bibtex": "@misc{\npeng2023rewardfree,\ntitle={Reward-free Policy Learning through Active Human Involvement},\nauthor={Zhenghao Peng and Wenjie Mo and Chenda Duan and Quanyi Li and Bolei Zhou},\nyear={2023},\nurl={https://openreview.net/forum?id=N3fc0aKFB-0}\n}", "github": "", "project": "", "reviewers": "hHb7;6qMp;uDKA;RMcE", "site": "https://openreview.net/forum?id=N3fc0aKFB-0", "pdf_size": 5694000, "recommendation": "3;5;5;6", "confidence": "4;4;4;3", "correctness": "3;3;3;3", "technical_novelty": "2;3;3;2", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "164;93;212;75", "wc_strength_and_weaknesses": "952;487;444;340", "wc_clarity_quality_novelty_and_reproducibility": "51;88;291;136", "wc_summary_review": "43;74;80;66", "wc_review": "1210;742;1027;617", "wc_reply_reviewers": "290;0;0;48", "wc_reply_authors": "700;618;1063;996", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;2;2", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 136.0, 55.06813960903346 ], "wc_strength_and_weaknesses_avg": [ 555.75, 234.9344323423027 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 141.5, 91.42346525919919 ], "wc_summary_review_avg": [ 65.75, 14.042346669983617 ], "wc_review_avg": [ 899.0, 233.0654414536827 ], "wc_reply_reviewers_avg": [ 84.5, 120.25285859388126 ], "wc_reply_authors_avg": [ 844.25, 188.9952049656287 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.6622661785325219, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:fGoYFK9_GAAJ:scholar.google.com/&scioq=Reward-free+Policy+Learning+through+Active+Human+Involvement&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of California, Los Angeles;Shanghai Artificial Intelligence Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucla.edu;http://www.shailab.org/", "aff_unique_abbr": "UCLA;Shanghai AI Lab", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;China" }, { "id": "N3kGYG3ZcTi", "title": "Suppression helps: Lateral Inhibition-inspired Convolutional Neural Network for Image Classification", "track": "main", "status": "Reject", "tldr": "Improving feature learning with lateral inhibition", "abstract": "Convolutional neural networks (CNNs) have become powerful and popular tools since deep learning emerged for image classification in the computer vision field. For better recognition, the dimensions of depth and width have been explored, leading to convolutional neural networks with more layers and more channels. In addition to these factors, neurobiology also suggests the widely existing lateral inhibition (e.g., Mach band effect), which increases the contrast of nearby neuron excitation in the lateral direction, to help recognition. However, such an important mechanism has not been well explored in modern convolutional neural networks. In this paper, we explicitly explore the filter dimension in the lateral direction and propose our lateral inhibition-inspired (LI) design. Our naive design incorporates the low-pass filter, while eliminating the central weight to mimic the inhibition strength decay. The inhibition value is computed from the filtering result of the input, with a simple learnable weight parameter per channel for multiplication to decide the strength. Then the inhibition value is subtracted from the input as suppression, which could increase the contrast to help recognition. We also suggest an alternative using depthwise convolution, as a general form. Our design could work on both the plain convolution and the convolutional block with residual connection, while being compatible with existing modules. Without any channel attention along the channel dimension, the preliminary results demonstrate an absolute improvement of 3.68\\% and 0.69\\% over AlexNet and ResNet-18, respectively, in the ImageNet data set, with little increase in parameters, indicating the merits of our design to help feature learning for image classification.", "keywords": "Lateral Inhibition;Convolutional Neural Networks", "primary_area": "", "supplementary_material": "", "author": "Chengyuan Zhuang;Xiaohui Yuan;XUAN GUO", "authorids": "~Chengyuan_Zhuang1;~Xiaohui_Yuan1;~XUAN_GUO2", "gender": "M;;", "homepage": ";;", "dblp": "291/9281;;", "google_scholar": "__kvkTYAAAAJ;4F2la7sAAAAJ;4tqv2FYAAAAJ", "orcid": "0000-0002-1479-2358;;", "linkedin": ";;", "or_profile": "~Chengyuan_Zhuang1;~Xiaohui_Yuan1;~XUAN_GUO2", "aff": "University of North Texas;University of North Texas, Denton;University of North Texas", "aff_domain": "unt.edu;unt.edu;unt.edu", "position": "PhD student;professor;Assistant Professor", "bibtex": "@misc{\nzhuang2023suppression,\ntitle={Suppression helps: Lateral Inhibition-inspired Convolutional Neural Network for Image Classification},\nauthor={Chengyuan Zhuang and Xiaohui Yuan and XUAN GUO},\nyear={2023},\nurl={https://openreview.net/forum?id=N3kGYG3ZcTi}\n}", "github": "", "project": "", "reviewers": "KkWZ;GVUz;Ct44;edpQ", "site": "https://openreview.net/forum?id=N3kGYG3ZcTi", "pdf_size": 199010, "recommendation": "1;3;3;6", "confidence": "5;5;5;5", "correctness": "2;2;2;3", "technical_novelty": "1;1;1;3", "empirical_novelty": "0;1;1;2", "wc_summary_paper": "112;80;51;65", "wc_strength_and_weaknesses": "119;249;194;47", "wc_clarity_quality_novelty_and_reproducibility": "27;8;27;176", "wc_summary_review": "46;25;61;72", "wc_review": "304;362;333;360", "wc_reply_reviewers": "123;103;295;0", "wc_reply_authors": "483;734;1086;748", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 3.25, 1.7853571071357126 ], "confidence_avg": [ 5.0, 0.0 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 1.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 1.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 77.0, 22.66053838724932 ], "wc_strength_and_weaknesses_avg": [ 152.25, 76.29998361729837 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.5, 67.70708973216911 ], "wc_summary_review_avg": [ 51.0, 17.621010186706094 ], "wc_review_avg": [ 339.75, 23.60481942316018 ], "wc_reply_reviewers_avg": [ 130.25, 105.95134496550763 ], "wc_reply_authors_avg": [ 762.75, 214.35644963471475 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8892972917998875, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:59A7mIgxUyIJ:scholar.google.com/&scioq=Suppression+helps:+Lateral+Inhibition-inspired+Convolutional+Neural+Network+for+Image+Classification&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of North Texas", "aff_unique_dep": "", "aff_unique_url": "https://www.unt.edu", "aff_unique_abbr": "UNT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Denton", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Scaffolding a Student to Instill Knowledge", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11234", "id": "N4K5ck-BTT", "poster": "/media/PosterPDFs/ICLR%202023/11234.png?t=1682447078.858065", "openreview": "https://openreview.net/forum?id=N4K5ck-BTT", "slides": "https://iclr.cc/virtual/2023/poster/11234", "video": "https://iclr.cc/virtual/2023/poster/11234", "author_site": "Anil Kag, Durmus Alp Emre Acar, Aditya Gangrade, Venkatesh Saligrama", "tldr": "We develop a novel KD scheme where the teacher scaffolds the student's prediction on hard-to-learn examples. It smoothens student's loss landscape so that the student encounters fewer local minima. As a result it has good generalization properties.", "abstract": "We propose a novel knowledge distillation (KD) method to selectively instill teacher knowledge into a student model motivated by situations where the student's capacity is significantly smaller than that of the teachers. In vanilla KD, the teacher primarily sets a predictive target for the student to follow, and we posit that this target is overly optimistic due to the student's lack of capacity. We develop a novel scaffolding scheme where the teacher, in addition to setting a predictive target, also scaffolds the student's prediction by censoring hard-to-learn examples. Scaffolding utilizes the same information as the teacher's soft-max predictions as inputs, and in this sense, our proposal can be viewed as a natural variant of vanilla KD. We show on synthetic examples that censoring hard-examples leads to smoothening the student's loss landscape so that the student encounters fewer local minima. As a result, it has good generalization properties. Against vanilla KD, we achieve improved performance and are comparable to more intrusive techniques that leverage feature matching on benchmark datasets.\n", "keywords": "knowledge distillation;tiny capacity student;large capacity teacher;budget constrained learning", "primary_area": "", "supplementary_material": "/attachment/30e8c92f83414b651fa24bf6b6811e4341c8c1fd.zip", "author": "Anil Kag;Durmus Alp Emre Acar;Aditya Gangrade;Venkatesh Saligrama", "authorids": "~Anil_Kag1;~Durmus_Alp_Emre_Acar1;~Aditya_Gangrade1;~Venkatesh_Saligrama1", "gender": "M;;;", "homepage": "https://anilkagak2.github.io/;;;https://venkatesh-saligrama.github.io/", "dblp": "213/9132;;;67/4721", "google_scholar": "bZdVsMkAAAAJ;https://scholar.google.com/citations?hl=en;;S4z3uzMAAAAJ", "orcid": ";;;0000-0002-0675-2268", "linkedin": ";;;venkatesh-saligrama-91175a16/", "or_profile": "~Anil_Kag1;~Durmus_Alp_Emre_Acar1;~Aditya_Gangrade1;~Venkatesh_Saligrama1", "aff": "Boston University;Boston University;;Boston University", "aff_domain": "bu.edu;bu.edu;;bu.edu", "position": "PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nkag2023scaffolding,\ntitle={Scaffolding a Student to Instill Knowledge},\nauthor={Anil Kag and Durmus Alp Emre Acar and Aditya Gangrade and Venkatesh Saligrama},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=N4K5ck-BTT}\n}", "github": "", "project": "", "reviewers": "DdZ6;sPvE;XDpR", "pdf_size": 5237933, "recommendation": "6;6;8", "confidence": "4;2;4", "correctness": "4;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "119;65;43", "wc_strength_and_weaknesses": "128;132;79", "wc_clarity_quality_novelty_and_reproducibility": "357;25;26", "wc_summary_review": "21;59;63", "wc_review": "625;281;211", "wc_reply_reviewers": "13;0;0", "wc_reply_authors": "466;458;332", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 75.66666666666667, 31.930480039541457 ], "wc_strength_and_weaknesses_avg": [ 113.0, 24.097026095903757 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 136.0, 156.27113190434972 ], "wc_summary_review_avg": [ 47.666666666666664, 18.926759422104517 ], "wc_review_avg": [ 372.3333333333333, 180.93338246130503 ], "wc_reply_reviewers_avg": [ 4.333333333333333, 6.128258770283412 ], "wc_reply_authors_avg": [ 418.6666666666667, 61.369554521946974 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": -0.49999999999999983, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8212357541849917636&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=N4K5ck-BTT", "email": "bu.edu;bu.edu;;bu.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Boston University", "aff_unique_dep": "", "aff_unique_url": "https://www.bu.edu", "aff_unique_abbr": "BU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "N4k3klHNzQj", "title": "Graph MLP-Mixer", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have shown great potential in the field of graph representation learning. Standard GNNs define a local message-passing mechanism which propagates information over the whole graph domain by stacking multiple layers. This paradigm suffers from two major limitations, over-squashing and poor long-range dependencies, that can be solved using global attention but significantly increases the computational cost to quadratic complexity. In this work, we consider an alternative approach to overcome these structural limitations while keeping a low complexity cost. Motivated by the recent MLP-Mixer architecture introduced in computer vision, we propose to generalize this network to graphs. This GNN model, namely Graph MLP-Mixer, can make long-range connections without over-squashing or high complexity due to the mixer layer applied to the graph patches extracted from the original graph. As a result, this architecture exhibits promising results when comparing standard GNNs vs. Graph MLP-Mixers on benchmark graph datasets.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Xiaoxin He;Bryan Hooi;Thomas Laurent;Adam Perold;Yann LeCun;Xavier Bresson", "authorids": "~Xiaoxin_He1;~Bryan_Hooi1;~Thomas_Laurent1;~Adam_Perold1;~Yann_LeCun1;~Xavier_Bresson6", "gender": "F;;M;M;M;M", "homepage": "https://xiaoxinhe.github.io/;http://bhooi.github.io;http://thomaslaurent.lmu.build/homepage.html;http://yann.lecun.com;https://www.comp.nus.edu.sg/cs/people/xaviercs/;", "dblp": "72/5872;169/9975;47/8889-1;l/YannLeCun;95/378;", "google_scholar": "icT6GMsAAAAJ;;_Ag_9uAAAAAJ;WLN3QrAAAAAJ;https://scholar.google.com.sg/citations?hl=en;", "orcid": ";0000-0002-5645-1754;;;;", "linkedin": "he-xiaoxin-a130601b4/;;;;;adam-perold-50a21251/", "or_profile": "~Xiaoxin_He1;~Bryan_Hooi1;~Thomas_Laurent1;~Yann_LeCun1;~Xavier_Bresson6;~Adam_Perold2", "aff": "National University of Singapore;National University of Singapore;Loyola Marymount University;New York University;National University of Singapore;", "aff_domain": "nus.edu;nus.edu.sg;lmu.edu;nyu.edu;nus.edu.sg;", "position": "PhD student;Assistant Professor;Full Professor;Full Professor;Associate Professor;", "bibtex": "@misc{\nhe2023graph,\ntitle={Graph {MLP}-Mixer},\nauthor={Xiaoxin He and Bryan Hooi and Thomas Laurent and Adam Perold and Yann LeCun and Xavier Bresson},\nyear={2023},\nurl={https://openreview.net/forum?id=N4k3klHNzQj}\n}", "github": "", "project": "", "reviewers": "Fxzu;oFvZ;BAFm;jedP", "site": "https://openreview.net/forum?id=N4k3klHNzQj", "pdf_size": 817190, "recommendation": "5;5;5;6", "confidence": "3;3;4;4", "correctness": "3;2;3;2", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;2;3;2", "wc_summary_paper": "28;114;65;95", "wc_strength_and_weaknesses": "150;71;243;133", "wc_clarity_quality_novelty_and_reproducibility": "13;10;57;208", "wc_summary_review": "24;16;43;43", "wc_review": "215;211;408;479", "wc_reply_reviewers": "0;0;0;140", "wc_reply_authors": "1005;1171;1594;1841", "reply_reviewers": "0;0;0;1", "reply_authors": "3;3;4;3", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 75.5, 32.515380975778214 ], "wc_strength_and_weaknesses_avg": [ 149.25, 61.596976386832495 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 72.0, 80.6938659378766 ], "wc_summary_review_avg": [ 31.5, 11.84271928232701 ], "wc_review_avg": [ 328.25, 117.9605336542693 ], "wc_reply_reviewers_avg": [ 35.0, 60.6217782649107 ], "wc_reply_authors_avg": [ 1402.75, 331.87073914402276 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:sGQRllOY7usJ:scholar.google.com/&scioq=Graph+MLP-Mixer&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "National University of Singapore;Loyola Marymount University;New York University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://www.lmu.edu;https://www.nyu.edu", "aff_unique_abbr": "NUS;LMU;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "Singapore;United States" }, { "id": "N5fNFLO_MyD", "title": "N-Student Learning: An Approach to Model Uncertainty and Combat Overfitting", "track": "main", "status": "Reject", "tldr": "A pseudo-label based multi-network training setup to help combat the problem of overfitting.", "abstract": "This work presents N-Student Learning, a pseudo-label based multi-network training setup that can be applied to nearly any supervised learning architecture in order to help combat the problem of overfitting and control the way in which a network models uncertainty in the data. The effectiveness of N-Student Learning relies on the idea that a network's predictions on unseen data are largely independent of any instance-dependent noise in the labels. In N-Student Learning, each student network is assigned a subset of the training dataset such that no data point is in every student's training subset. Unbiased pseudo-labels can thus be generated for every data point in the training set by taking the predictions of appropriate student networks. Training on these unbiased pseudo-labels minimizes the extent to which each network overfits to instance-dependent noise in the data. Furthermore, based on prior knowledge of the domain, we can control how the networks learn to model uncertainty that is present in the dataset by adjusting the way that pseudo-labels are generated. While this method is largely inspired by the general problem of overfitting, a natural application is found in the problem of classification with noisy labels \u2014 a domain where overfitting is a significant concern. After developing intuition through a toy classification task, we proceed to demonstrate that N-Student Learning performs favorably on benchmark datasets when compared to state-of-the-art methods in the problem of classification with noisy labels.", "keywords": "Noisy Labels;Pseudo-Labels;Overfitting;Supervised Learning", "primary_area": "", "supplementary_material": "", "author": "Ryan Y. Xu;Solomon E Garber;Antonella Di Lillo;James Storer", "authorids": "~Ryan_Y._Xu1;~Solomon_E_Garber1;antonella.dilillo@tufts.edu;~James_Storer1", "gender": "M;;;M", "homepage": "https://ryanxu.net/;;;http://www.cs.brandeis.edu/~storer/", "dblp": ";;;", "google_scholar": ";PiefKtQAAAAJ;;", "orcid": ";;;", "linkedin": "ryxu/;;;", "or_profile": "~Ryan_Y._Xu1;~Solomon_E_Garber1;antonella.dilillo@tufts.edu;~James_Storer1", "aff": ";Brandeis University;;", "aff_domain": ";brandeis.edu;;", "position": ";PhD student;;", "bibtex": "@misc{\nxu2023nstudent,\ntitle={N-Student Learning: An Approach to Model Uncertainty and Combat Overfitting},\nauthor={Ryan Y. Xu and Solomon E Garber and Antonella Di Lillo and James Storer},\nyear={2023},\nurl={https://openreview.net/forum?id=N5fNFLO_MyD}\n}", "github": "", "project": "", "reviewers": "X44G;Riap;JspX", "site": "https://openreview.net/forum?id=N5fNFLO_MyD", "pdf_size": 3201104, "recommendation": "3;3;3", "confidence": "4;4;4", "correctness": "3;3;2", "technical_novelty": "1;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "46;79;77", "wc_strength_and_weaknesses": "262;194;172", "wc_clarity_quality_novelty_and_reproducibility": "50;21;114", "wc_summary_review": "76;16;27", "wc_review": "434;310;390", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 67.33333333333333, 15.107025591499548 ], "wc_strength_and_weaknesses_avg": [ 209.33333333333334, 38.30868772948971 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.666666666666664, 38.852999312222416 ], "wc_summary_review_avg": [ 39.666666666666664, 26.08107018935807 ], "wc_review_avg": [ 378.0, 51.329004146453755 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1nvtvi4clyIJ:scholar.google.com/&scioq=N-Student+Learning:+An+Approach+to+Model+Uncertainty+and+Combat+Overfitting&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Brandeis University", "aff_unique_dep": "", "aff_unique_url": "https://www.brandeis.edu", "aff_unique_abbr": "Brandeis", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "N5gn1KjCWW", "title": "Supervised Metric Learning for Retrieval via Contextual Similarity Optimization", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Existing deep metric learning approaches fall into three general categories: contrastive learning, average precision (AP) maximization, and classification. We propose a novel alternative approach, contextual similarity optimization, inspired by work in unsupervised metric learning. Contextual similarity is a discrete similarity measure based on relationships between neighborhood sets, and is widely used in the unsupervised setting as pseudo-supervision. Inspired by this success, we propose a framework which optimizes a combination of contextual and cosine similarities. Contextual similarity calculation involves several non-differentiable operations, including the heaviside function and intersection of sets. We show how to circumvent non-differentiability to explicitly optimize contextual similarity, and we further incorporate appropriate similarity regularization to yield our novel metric learning loss. The resulting loss function achieves state-of-the-art Recall @ 1 accuracy on standard supervised image retrieval benchmarks when combined with the standard contrastive loss.", "keywords": "Image Retrieval;Metric Learning;Contextual Similarity", "primary_area": "", "supplementary_material": "/attachment/666cd2cf0264b0630bdad90fffe196243a70ab0c.zip", "author": "Christopher Liao;Theodoros Tsiligkaridis;Brian Kulis", "authorids": "~Christopher_Liao1;~Theodoros_Tsiligkaridis1;~Brian_Kulis1", "gender": "M;M;", "homepage": ";https://sites.google.com/view/theo-t;http://people.bu.edu/bkulis/", "dblp": ";64/10412;43/3732", "google_scholar": ";hVUVOTIAAAAJ;okcbLqoAAAAJ", "orcid": ";;", "linkedin": "christopher-liao-524996a1;;", "or_profile": "~Christopher_Liao1;~Theodoros_Tsiligkaridis1;~Brian_Kulis1", "aff": "Boston University;MIT Lincoln Laboratory, Massachusetts Institute of Technology;Amazon", "aff_domain": "bu.edu;ll.mit.edu;amazon.com", "position": "PhD student;Senior AI Research Scientist;Amazon Scholar", "bibtex": "@misc{\nliao2023supervised,\ntitle={Supervised Metric Learning for Retrieval via Contextual Similarity Optimization},\nauthor={Christopher Liao and Theodoros Tsiligkaridis and Brian Kulis},\nyear={2023},\nurl={https://openreview.net/forum?id=N5gn1KjCWW}\n}", "github": "", "project": "", "reviewers": "BCPY;UL4h;HeWT;iZCj", "site": "https://openreview.net/forum?id=N5gn1KjCWW", "pdf_size": 1173126, "recommendation": "3;3;5;8", "confidence": "4;5;4;4", "correctness": "3;3;3;4", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "58;34;40;65", "wc_strength_and_weaknesses": "307;65;287;148", "wc_clarity_quality_novelty_and_reproducibility": "21;30;18;44", "wc_summary_review": "24;30;34;43", "wc_review": "410;159;379;300", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 49.25, 12.676257334087218 ], "wc_strength_and_weaknesses_avg": [ 201.75, 99.91840421063579 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.25, 10.108783309577865 ], "wc_summary_review_avg": [ 32.75, 6.905613658466566 ], "wc_review_avg": [ 312.0, 97.01288574204975 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.49374193110101877, "corr_recommendation_correctness": 0.9169493006161777, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17628876135823477807&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Boston University;Massachusetts Institute of Technology;Amazon", "aff_unique_dep": ";Lincoln Laboratory;Amazon.com, Inc.", "aff_unique_url": "https://www.bu.edu;https://web.mit.edu;https://www.amazon.com", "aff_unique_abbr": "BU;MIT;Amazon", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "N6NO4o_b5r", "title": "Group-wise Verifiable Distributed Computing for Machine Learning under Adversarial Attacks", "track": "main", "status": "Reject", "tldr": "This paper tackles adversarial attack and straggler effect in distributed computing by proposing Group-wise Verifiable Coded Computing. ", "abstract": "Distributed computing has been a promising solution in machine learning to accelerate the training procedure on large-scale dataset by utilizing multiple workers in parallel. However, there remain two major issues that still need to be addressed: i) adversarial attacks from malicious workers, and ii) the effect of slow workers known as stragglers. In this paper, we tackle both problems simultaneously by proposing Group-wise Verifiable Coded Computing (GVCC), which leverages coding techniques and group-wise verification to provide robustness to adversarial attacks and resiliency to straggler effects in distributed computing. The key idea of GVCC is to verify a group of computation results from workers at a time, while providing resilience to stragglers through encoding tasks assigned to workers with Group-wise Verifiable Codes. Experimental results show that GVCC outperforms the existing methods in terms of overall processing time and verification time for executing matrix multiplication, which is a key computational component in machine learning and deep learning. ", "keywords": "Adversarial attack;Verifiable computing;Distributed Computing;Coded computing", "primary_area": "", "supplementary_material": "/attachment/a85df679a0f6bb917825a866a7923f97a6989b19.zip", "author": "Sangwoo Hong;Heecheol Yang;Youngseok Yoon;Jungwoo Lee", "authorids": "~Sangwoo_Hong1;~Heecheol_Yang1;~Youngseok_Yoon2;~Jungwoo_Lee1", "gender": "M;M;M;M", "homepage": ";http://sites.google.com/view/dncl;https://youngseok-yoon.github.io/;https://cml.snu.ac.kr", "dblp": "48/334;;;34/516-1", "google_scholar": "5HqydTsAAAAJ;;b3MwZEwAAAAJ;j98IWfoAAAAJ", "orcid": "0000-0002-0270-2781;;0000-0002-7499-3055;0000-0002-6804-980X", "linkedin": "sangwoohong1995;;youngseok-yoon-65508516a/;", "or_profile": "~Sangwoo_Hong1;~Heecheol_Yang1;~Youngseok_Yoon2;~Jungwoo_Lee1", "aff": "Seoul National University;;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;;snu.ac.kr;snu.ac.kr", "position": "PhD student;;MS student;Full Professor", "bibtex": "@misc{\nhong2023groupwise,\ntitle={Group-wise Verifiable Distributed Computing for Machine Learning under Adversarial Attacks},\nauthor={Sangwoo Hong and Heecheol Yang and Youngseok Yoon and Jungwoo Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=N6NO4o_b5r}\n}", "github": "", "project": "", "reviewers": "4Emk;Z7en;d7sM;Ma5n", "site": "https://openreview.net/forum?id=N6NO4o_b5r", "pdf_size": 734050, "recommendation": "3;3;6;8", "confidence": "3;3;2;4", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;1;3;0", "wc_summary_paper": "41;37;38;76", "wc_strength_and_weaknesses": "135;206;49;140", "wc_clarity_quality_novelty_and_reproducibility": "29;99;2;17", "wc_summary_review": "34;52;23;20", "wc_review": "239;394;112;253", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 48.0, 16.232683080747925 ], "wc_strength_and_weaknesses_avg": [ 132.5, 55.76064920712455 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.75, 37.19122880465231 ], "wc_summary_review_avg": [ 32.25, 12.537443918119834 ], "wc_review_avg": [ 249.5, 99.88618523099179 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.8164965809277261, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6Ybqz-MA9aEJ:scholar.google.com/&scioq=Group-wise+Verifiable+Distributed+Computing+for+Machine+Learning+under+Adversarial+Attacks&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "N6iz-EQkuar", "title": "Network Controllability Perspectives on Graph Representation", "track": "main", "status": "Withdraw", "tldr": "We develop a novel graph representation method using network control properties and demonstrate its theoretical merits. ", "abstract": "Graph representations in fixed dimensional feature space are vital in applying learning tools and data mining algorithms to perform graph analytics. Such representations must encode the graph's topological and structural information at the local and global scales without posing significant computation overhead. This paper employs a unique approach grounded in networked control system theory to obtain expressive graph representations with desired properties. We consider graphs as networked dynamical systems and study their controllability properties to explore the underlying graph structure. The controllability of a networked dynamical system profoundly depends on the underlying network topology, and we exploit this relationship to design novel graph representations using controllability Gramian and related metrics. We discuss the merits of this new approach in terms of the desired properties (for instance, permutation and scale invariance) of the proposed representations. Our evaluation of various benchmark datasets in the graph classification framework demonstrates that the proposed representations either outperform (sometimes by more than 6%), or give similar results to the state-of-the-art embeddings. ", "keywords": "Graph Representation;Network Controllability;Graph Classification", "primary_area": "", "supplementary_material": "/attachment/fc25fbc8127b0dc338ad64a0afbf461d3b7ce4a7.zip", "author": "Anwar Said;Obaid Ullah Ahmad;Waseem Abbas;Mudassir Shabbir;Xenofon D. Koutsoukos", "authorids": "~Anwar_Said1;~Obaid_Ullah_Ahmad1;~Waseem_Abbas2;~Mudassir_Shabbir1;~Xenofon_D._Koutsoukos1", "gender": "M;M;;;M", "homepage": "https://anwar-said.github.io/Representation-Learning-Blogs/index.html;;http://www.wabbas.com/;;http://engineering.vanderbilt.edu/bio/xenofon-koutsoukos", "dblp": "212/4751;;79/11046;78/7323;11/5453", "google_scholar": "eSEelAgAAAAJ;EwYiUJQAAAAJ;U9oBOmMAAAAJ;https://scholar.google.com.pk/citations?user=bRKvwRYAAAAJ;https://scholar.google.com.tw/citations?user=NHZdlVkAAAAJ", "orcid": "0000-0002-6715-0068;;;;", "linkedin": "anwar-said-9bb596b4;obaid-ullah-ahmad-2b92a4121?original_referer=https%3A%2F%2Fwww.google.com%2F;;;", "or_profile": "~Anwar_Said1;~Obaid_Ullah_Ahmad1;~Waseem_Abbas2;~Mudassir_Shabbir1;~Xenofon_D._Koutsoukos1", "aff": "Vanderbilt University;University of Texas at Dallas;The University of Texas at Dallas;ITU of Punjab Lahore, Pakistan;Vanderbilt University", "aff_domain": "vanderbilt.edu;utdallas.edu;utdallas.edu;itu.edu.pk;vanderbilt.edu", "position": "Postdoc;PhD student;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@misc{\nsaid2023network,\ntitle={Network Controllability Perspectives on Graph Representation},\nauthor={Anwar Said and Obaid Ullah Ahmad and Waseem Abbas and Mudassir Shabbir and Xenofon D. Koutsoukos},\nyear={2023},\nurl={https://openreview.net/forum?id=N6iz-EQkuar}\n}", "github": "", "project": "", "reviewers": "JJwQ;N76E;K54D", "site": "https://openreview.net/forum?id=N6iz-EQkuar", "pdf_size": 598152, "recommendation": "3;5;6", "confidence": "5;3;3", "correctness": "2;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "0;2;3", "wc_summary_paper": "110;81;44", "wc_strength_and_weaknesses": "324;503;84", "wc_clarity_quality_novelty_and_reproducibility": "91;72;38", "wc_summary_review": "109;76;21", "wc_review": "634;732;187", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 78.33333333333333, 27.010286106510527 ], "wc_strength_and_weaknesses_avg": [ 303.6666666666667, 171.6592231395939 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 67.0, 21.924111536540465 ], "wc_summary_review_avg": [ 68.66666666666667, 36.298148100909444 ], "wc_review_avg": [ 517.6666666666666, 237.21485806940697 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.944911182523068, "corr_recommendation_correctness": 0.9449111825230683, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7718574137880342488&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1;2;0", "aff_unique_norm": "Vanderbilt University;University of Texas at Dallas;ITU of Punjab", "aff_unique_dep": ";;", "aff_unique_url": "https://www.vanderbilt.edu;https://www.utdallas.edu;", "aff_unique_abbr": "Vanderbilt;UT Dallas;", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Dallas;Lahore", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;Pakistan" }, { "id": "N7Tv4aZ4Cyx", "title": "SGD and Weight Decay Provably Induce a Low-Rank Bias in Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "We analyze deep ReLU neural networks trained with mini-batch Stochastic Gradient Descent (SGD) and weight decay. We show, both theoretically and empirically, that when training a neural network using SGD with weight decay and small batch size, the resulting weight matrices tend to be of small rank. Our analysis relies on a minimal set of assumptions; the neural networks may be arbitrarily wide or deep and may include residual connections, as well as convolutional layers. \nThe same analysis implies the inherent presence of SGD ``noise'', defined as the inability of SGD to converge to a stationary point. In particular, we prove that SGD noise must always be present, even asymptotically, as long as we incorporate weight decay and the batch size is smaller than the total number of training samples. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Tomer Galanti;Zachary S Siegel;Aparna Gupte;Tomaso Poggio", "authorids": "~Tomer_Galanti1;~Zachary_S_Siegel1;~Aparna_Gupte1;~Tomaso_Poggio1", "gender": "M;M;M;", "homepage": "https://tomergalanti.github.io;https://zacharysiegel.org/;https://cbmm.mit.edu/about/people/poggio;", "dblp": "198/1490;;12/5544;", "google_scholar": ";EOPTuH0AAAAJ;WgAGy7wAAAAJ;", "orcid": ";;;", "linkedin": "tomer-galanti-5880b1104/;;;aparna-gupte-216247142/", "or_profile": "~Tomer_Galanti1;~Zachary_S_Siegel1;~Tomaso_Poggio1;~Aparna_Ajit_Gupte1", "aff": "Massachusetts Institute of Technology;Department of Computer Science, Princeton University;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;cs.princeton.edu;mit.edu;mit.edu", "position": "Postdoc;Undergrad student;Full Professor;Undergrad student", "bibtex": "@misc{\ngalanti2023sgd,\ntitle={{SGD} and Weight Decay Provably Induce a Low-Rank Bias in Neural Networks},\nauthor={Tomer Galanti and Zachary S Siegel and Aparna Gupte and Tomaso Poggio},\nyear={2023},\nurl={https://openreview.net/forum?id=N7Tv4aZ4Cyx}\n}", "github": "", "project": "", "reviewers": "g7pt;897C;g7fW;c3Es", "site": "https://openreview.net/forum?id=N7Tv4aZ4Cyx", "pdf_size": 2476355, "recommendation": "1;3;3;3", "confidence": "4;3;3;4", "correctness": "1;2;3;2", "technical_novelty": "1;2;2;2", "empirical_novelty": "3;2;2;2", "wc_summary_paper": "21;127;110;123", "wc_strength_and_weaknesses": "215;506;248;474", "wc_clarity_quality_novelty_and_reproducibility": "6;22;23;44", "wc_summary_review": "42;20;28;75", "wc_review": "284;675;409;716", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.0, 0.7071067811865476 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 95.25, 43.3265219005634 ], "wc_strength_and_weaknesses_avg": [ 360.75, 130.26775310874137 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 23.75, 13.497684986693088 ], "wc_summary_review_avg": [ 41.25, 21.016362672927016 ], "wc_review_avg": [ 521.0, 180.5920817754754 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9877620938322414754&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Princeton University", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://web.mit.edu;https://www.princeton.edu", "aff_unique_abbr": "MIT;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "N7ts-GTfuy", "title": "3D-Aware Video Generation", "track": "main", "status": "Reject", "tldr": "3D-Aware Video Generation", "abstract": "Generative models have emerged as an essential building block for many image synthesis and editing tasks. Recent advances in this field have also enabled high-quality 3D or video content to be generated that exhibits either multi-view or temporal consistency. With our work, we explore 4D generative adversarial networks (GANs) that learn unconditional generation of 3D-aware videos. By combining neural implicit representations with time-aware discriminator, we develop a GAN framework that synthesizes 3D video supervised only with monocular videos. We show that our method learns a rich embedding of decomposable 3D structures and motions that enables new visual effects of spatio-temporal renderings while producing imagery with quality comparable to that of existing 3D or video GANs.", "keywords": "video generation;3D;generative model;3D-aware image synthesis", "primary_area": "", "supplementary_material": "/attachment/31d539770a4a1b37c89673bbbfd6546dccfdb043.zip", "author": "Sherwin Bahmani;Jeong Joon Park;Despoina Paschalidou;Hao Tang;Gordon Wetzstein;Leonidas Guibas;Luc Van Gool;Radu Timofte", "authorids": "~Sherwin_Bahmani1;~Jeong_Joon_Park2;~Despoina_Paschalidou1;~Hao_Tang6;~Gordon_Wetzstein3;~Leonidas_Guibas1;~Luc_Van_Gool1;~Radu_Timofte1", "gender": "M;Not Specified;F;M;M;M;;M", "homepage": "https://sherwinbahmani.github.io;https://jjparkcv.github.io/;https://paschalidoud.github.io/;https://ha0tang.github.io/;http://web.stanford.edu/~gordonwz/;http://geometry.stanford.edu/;;https://www.informatik.uni-wuerzburg.de/computervision/", "dblp": "307/5156;227/2840;188/1138;07/5751-5;13/4660;g/LeonidasJGuibas;61/5017;24/8616", "google_scholar": "XPD09yEAAAAJ;aD5pXLoAAAAJ;https://scholar.google.de/citations?user=zxFlR6sAAAAJ;9zJkeEMAAAAJ;VOf45S0AAAAJ;https://scholar.google.com.tw/citations?user=5JlEyTAAAAAJ;https://scholar.google.be/citations?user=TwMib_QAAAAJ;https://scholar.google.ch/citations?user=u3MwH5kAAAAJ", "orcid": ";;;0000-0002-2077-1246;0000-0002-9243-6885;;;0000-0002-1478-0402", "linkedin": "sherwin-bahmani-a2b5691a9/;;;hao-tang-887475138/;gordon-wetzstein-2406723/;;;https://ch.linkedin.com/in/radutimofte", "or_profile": "~Sherwin_Bahmani1;~Jeong_Joon_Park2;~Despoina_Paschalidou1;~Hao_Tang6;~Gordon_Wetzstein3;~Leonidas_Guibas1;~Luc_Van_Gool1;~Radu_Timofte1", "aff": "Simon Fraser University;Stanford University;Stanford University;ETH Zurich;Stanford University;Stanford University;KU Leuven;Bayerische Julius-Maximilians-Universit\u00e4t W\u00fcrzburg", "aff_domain": "sfu.ca;stanford.edu;stanford.edu;vision.ee.ethz.ch;stanford.edu;stanford.edu;kuleuven.be;uni-wuerzburg.de", "position": "Intern;Postdoc;Postdoc;Postdoc;Associate Professor;Full Professor;Emeritus;Full Professor", "bibtex": "@misc{\nbahmani2023daware,\ntitle={3D-Aware Video Generation},\nauthor={Sherwin Bahmani and Jeong Joon Park and Despoina Paschalidou and Hao Tang and Gordon Wetzstein and Leonidas Guibas and Luc Van Gool and Radu Timofte},\nyear={2023},\nurl={https://openreview.net/forum?id=N7ts-GTfuy}\n}", "github": "", "project": "", "reviewers": "PyKL;rreh;dFag;jBEi", "site": "https://openreview.net/forum?id=N7ts-GTfuy", "pdf_size": 16336129, "recommendation": "3;5;5;8", "confidence": "4;3;4;5", "correctness": "3;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;0;3;4", "wc_summary_paper": "62;50;79;82", "wc_strength_and_weaknesses": "388;78;310;184", "wc_clarity_quality_novelty_and_reproducibility": "88;29;15;75", "wc_summary_review": "53;7;17;34", "wc_review": "591;164;421;375", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1188;450;447;73", "reply_reviewers": "0;0;0;0", "reply_authors": "3;2;2;2", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 1.479019945774904 ], "wc_summary_paper_avg": [ 68.25, 13.007209539328564 ], "wc_strength_and_weaknesses_avg": [ 240.0, 118.51582172857766 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.75, 30.50717128807586 ], "wc_summary_review_avg": [ 27.75, 17.483921184905864 ], "wc_review_avg": [ 387.75, 152.18635779858852 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 539.5, 404.5803381282882 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.5940885257860046, "corr_recommendation_correctness": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15190731611578120402&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;2;1;1;3;4", "aff_unique_norm": "Simon Fraser University;Stanford University;ETH Zurich;Katholieke Universiteit Leuven;University of W\u00fcrzburg", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.sfu.ca;https://www.stanford.edu;https://www.ethz.ch;https://www.kuleuven.be;https://www.uni-wuerzburg.de", "aff_unique_abbr": "SFU;Stanford;ETHZ;KU Leuven;JMU", "aff_campus_unique_index": "1;1;1;1;2", "aff_campus_unique": ";Stanford;W\u00fcrzburg", "aff_country_unique_index": "0;1;1;2;1;1;3;4", "aff_country_unique": "Canada;United States;Switzerland;Belgium;Germany" }, { "id": "N8N2VMkWdVf", "title": "Triplet Similarity Learning on Concordance Constraint", "track": "main", "status": "Reject", "tldr": "A simple and elegant loss function is proposed to exploit the concordance constraint of triplet similarity for deep metric learning.", "abstract": "Triplet-based loss functions have been the paradigm of choice for robust deep metric learning (DML). However, conventional triplet-based losses require carefully tuning a decision boundary, i.e., violation margin. When performing online triplet mining on each mini-batch, choosing a good global and constant prior value for violation margin is challenging and irrational. To circumvent this issue, we propose a novel yet efficient concordance-induced triplet (CIT) loss as an objective function to train DML models. We formulate the similarity of triplet samples as a concordance constraint problem, then directly optimize concordance during DML model learning. Triplet concordance refers to the predicted ordering of intra-class and inter-class similarities being correct, which is invariant to any monotone transformation of the decision boundary of triplet samples. Hence, our CIT loss is free from the plague of adopting the violation margin as a prior constraint. In addition, due to the high training complexity of triplet-based losses, we introduce a partial likelihood term for CIT loss to impose additional penalties on hard triplet samples, thus enforcing fast convergence. We extensively experiment on a variety of DML tasks to demonstrate the elegance and simplicity of our CIT loss against its counterparts. In particular, on face recognition, person re-identification, as well as image retrieval datasets, our method can achieve comparable performances with state-of-the-arts without tuning any hyper-parameters laboriously.", "keywords": "Metric Learning;Triplet Loss;Concordance;Hard samples", "primary_area": "", "supplementary_material": "", "author": "Jiansheng Fang;Jiajian Li;Jiang Liu", "authorids": "~Jiansheng_Fang1;~Jiajian_Li1;~Jiang_Liu5", "gender": "M;;M", "homepage": ";https://github.com/genhao3;https://faculty.sustech.edu.cn/liuj/", "dblp": "246/7515.html;;23/108-1.html", "google_scholar": "t2qaSOUAAAAJ;;NHt3fUcAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jiansheng_Fang1;~Jiajian_Li1;~Jiang_Liu5", "aff": "Guangzhou Guangri Stock Co., Ltd.;;Southern University of Science and Technology", "aff_domain": "gri.com;;sustech.edu.cn", "position": "Researcher;;Full Professor", "bibtex": "@misc{\nfang2023triplet,\ntitle={Triplet Similarity Learning on Concordance Constraint},\nauthor={Jiansheng Fang and Jiajian Li and Jiang Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=N8N2VMkWdVf}\n}", "github": "", "project": "", "reviewers": "nK2W;mdeV;FXz5;TC7M", "site": "https://openreview.net/forum?id=N8N2VMkWdVf", "pdf_size": 5049512, "recommendation": "3;3;5;6", "confidence": "5;4;4;1", "correctness": "3;3;4;4", "technical_novelty": "2;3;3;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "75;105;76;20", "wc_strength_and_weaknesses": "305;379;199;52", "wc_clarity_quality_novelty_and_reproducibility": "20;115;44;116", "wc_summary_review": "29;55;47;37", "wc_review": "429;654;366;225", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "639;1165;658;182", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 1.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 69.0, 30.748983723043597 ], "wc_strength_and_weaknesses_avg": [ 233.75, 122.8970605832377 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.75, 42.60501731017135 ], "wc_summary_review_avg": [ 42.0, 9.848857801796104 ], "wc_review_avg": [ 418.5, 154.7328342660342 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 661.0, 347.8325746677559 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.8339503888294595, "corr_recommendation_correctness": 0.9622504486493761, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16581921803720754469&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Guangzhou Guangri Stock Co., Ltd.;Southern University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": ";https://www.sustech.edu.cn", "aff_unique_abbr": ";SUSTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "MECTA: Memory-Economic Continual Test-Time Model Adaptation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12141", "id": "N92hjSf5NNh", "poster": "/media/PosterPDFs/ICLR%202023/12141.png?t=1682111977.3214848", "openreview": "https://openreview.net/forum?id=N92hjSf5NNh", "slides": "https://iclr.cc/virtual/2023/poster/12141", "video": "https://iclr.cc/virtual/2023/poster/12141", "author_site": "Junyuan Hong, Lingjuan Lyu, Jiayu Zhou, Michael Spranger", "tldr": "", "abstract": "Continual Test-time Adaptation (CTA) is a promising art to secure accuracy gains in continually-changing environments. The state-of-the-art adaptations improve out-of-distribution model accuracy via computation-efficient online test-time gradient descents but meanwhile cost about times of memory versus the inference, even if only a small portion of parameters are updated. Such high memory consumption of CTA substantially impedes wide applications of advanced CTA on memory-constrained devices. In this paper, we provide a novel solution, dubbed MECTA, to drastically improve the memory efficiency of gradient-based CTA. Our profiling shows that the major memory overhead comes from the intermediate cache for back-propagation, which scales by the batch size, channel, and layer number. Therefore, we propose to reduce batch sizes, adopt an adaptive normalization layer to maintain stable and accurate predictions, and stop the back-propagation caching heuristically. On the other hand, we prune the networks to reduce the computation and memory overheads in optimization and recover the parameters afterward to avoid forgetting. The proposed MECTA is efficient and can be seamlessly plugged into state-of-the-art CTA algorithms at negligible overhead on computation and memory. On three datasets, CIFAR10, CIFAR100, and ImageNet, MECTA improves the accuracy by at least 6% with constrained memory and significantly reduces the memory costs of ResNet50 on ImageNet by at least 70% with comparable accuracy. Our codes can be accessed at https://github.com/SonyAI/MECTA.", "keywords": "continual test-time adaptation;memory efficiency", "primary_area": "", "supplementary_material": "/attachment/11ae0198cc3c381b6115f79b63f54f10c61f4d0e.zip", "author": "Junyuan Hong;Lingjuan Lyu;Jiayu Zhou;Michael Spranger", "authorids": "~Junyuan_Hong1;~Lingjuan_Lyu1;~Jiayu_Zhou1;~Michael_Spranger2", "gender": "M;F;M;", "homepage": "https://jyhong.gitlab.io/;https://sites.google.com/view/lingjuan-lyu;http://jiayuzhou.github.io/;", "dblp": "185/1316;178/9876;73/1353;", "google_scholar": "7Cbv6doAAAAJ;;https://scholar.google.com.tw/citations?user=yQKlLTQAAAAJ;", "orcid": "0000-0002-5718-5187;;0000-0003-4336-6777;", "linkedin": ";;jiayuzhou/;", "or_profile": "~Junyuan_Hong1;~Lingjuan_Lyu1;~Jiayu_Zhou1;~Michael_Spranger2", "aff": "Michigan State University;Sony;Michigan State University;", "aff_domain": "msu.edu;sony.com;msu.edu;", "position": "PhD student;scientist;Associate Professor;", "bibtex": "@inproceedings{\nhong2023mecta,\ntitle={{MECTA}: Memory-Economic Continual Test-Time Model Adaptation},\nauthor={Junyuan Hong and Lingjuan Lyu and Jiayu Zhou and Michael Spranger},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=N92hjSf5NNh}\n}", "github": "", "project": "", "reviewers": "QbXv;wXpt;pnAa;veMt", "pdf_size": 1336473, "recommendation": "6;8;8;8", "confidence": "4;4;4;5", "correctness": "3;4;4;4", "technical_novelty": "2;3;4;4", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "250;149;124;89", "wc_strength_and_weaknesses": "579;144;194;365", "wc_clarity_quality_novelty_and_reproducibility": "422;72;173;308", "wc_summary_review": "405;40;23;39", "wc_review": "1656;405;514;801", "wc_reply_reviewers": "337;12;0;0", "wc_reply_authors": "1117;477;315;159", "reply_reviewers": "1;1;0;0", "reply_authors": "4;2;1;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 153.0, 59.92078103629825 ], "wc_strength_and_weaknesses_avg": [ 320.5, 170.2622976468954 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 243.75, 132.66946709774635 ], "wc_summary_review_avg": [ 126.75, 160.78926425604416 ], "wc_review_avg": [ 844.0, 490.6154298429677 ], "wc_reply_reviewers_avg": [ 87.25, 144.27642738853774 ], "wc_reply_authors_avg": [ 517.0, 364.20049423360206 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 1.0, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4283353743074610447&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=N92hjSf5NNh", "email": "msu.edu;sony.com;msu.edu;", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Michigan State University;Sony Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.msu.edu;https://www.sony.com", "aff_unique_abbr": "MSU;Sony", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Japan" }, { "title": "Towards Open Temporal Graph Neural Networks", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11595", "id": "N9Pk5iSCzAn", "poster": "", "openreview": "https://openreview.net/forum?id=N9Pk5iSCzAn", "slides": "https://iclr.cc/virtual/2023/poster/11595", "video": "https://iclr.cc/virtual/2023/poster/11595", "author_site": "Kaituo Feng, Changsheng Li, Xiaolu Zhang, JUN ZHOU", "tldr": "In this paper, we propose a general and principled learning approach for open temporal graphs where the class set for nodes is open.", "abstract": "Graph neural networks (GNNs) for temporal graphs have recently attracted increasing attentions, where a common assumption is that the class set for nodes is closed. However, in real-world scenarios, it often faces the open set problem with the dynamically increased class set as the time passes by. This will bring two big challenges to the existing dynamic GNN methods: (i) How to dynamically propagate appropriate information in an open temporal graph, where new class nodes are often linked to old class nodes. This case will lead to a sharp contradiction. This is because typical GNNs are prone to make the embeddings of connected nodes become similar, while we expect the embeddings of these two interactive nodes to be distinguishable since they belong to different classes. (ii) How to avoid catastrophic knowledge forgetting over old classes when learning new classes occurred in temporal graphs. In this paper, we propose a general and principled learning approach for open temporal graphs, called OTGNet, with the goal of addressing the above two challenges. We assume the knowledge of a node can be disentangled into class-relevant and class-agnostic one, and thus explore a new message passing mechanism by extending the information bottleneck principle to only propagate class-agnostic knowledge between nodes of different classes, avoiding aggregating conflictive information. Moreover, we devise a strategy to select both important and diverse triad sub-graph structures for effective class-incremental learning. Extensive experiments on three real-world datasets of different domains demonstrate the superiority of our method, compared to the baselines.", "keywords": "Temporal Graph Neural Networks;Open Temporal Graphs;Class-Incremental Learning", "primary_area": "", "supplementary_material": "", "author": "Kaituo Feng;Changsheng Li;Xiaolu Zhang;JUN ZHOU", "authorids": "~Kaituo_Feng1;~Changsheng_Li4;~Xiaolu_Zhang2;~JUN_ZHOU6", "gender": "M;M;F;M", "homepage": "https://github.com/tulerfeng;;https://scholar.google.com/citations?user=cAz9PToAAAAJ;https://scholar.google.com/citations?user=mCVvloEAAAAJ&hl=en", "dblp": "322/6044;;48/5176;99/3847-11", "google_scholar": "m1iCh00AAAAJ;FfJnUioAAAAJ;;mCVvloEAAAAJ", "orcid": ";0000-0001-9789-7632;0000-0001-8055-0245;0000-0001-6033-6102", "linkedin": ";;;", "or_profile": "~Kaituo_Feng1;~Changsheng_Li4;~Xiaolu_Zhang2;~JUN_ZHOU6", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Ant Group;Ant Group", "aff_domain": "bit.edu.cn;bit.edu.cn;antfin.com;antgroup.com", "position": "MS student;Full Professor;Researcher;Researcher", "bibtex": "@inproceedings{\nfeng2023towards,\ntitle={Towards Open Temporal Graph Neural Networks},\nauthor={Kaituo Feng and Changsheng Li and Xiaolu Zhang and JUN ZHOU},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=N9Pk5iSCzAn}\n}", "github": "", "project": "", "reviewers": "FhGE;Riiz;KnTc;WeDg", "pdf_size": 2259456, "recommendation": "6;6;6;8", "confidence": "3;3;2;4", "correctness": "3;3;2;4", "technical_novelty": "3;2;4;4", "empirical_novelty": "3;3;4;4", "wc_summary_paper": "98;126;40;107", "wc_strength_and_weaknesses": "565;355;251;165", "wc_clarity_quality_novelty_and_reproducibility": "106;78;10;31", "wc_summary_review": "24;91;32;33", "wc_review": "793;650;333;336", "wc_reply_reviewers": "34;0;0;15", "wc_reply_authors": "942;978;903;239", "reply_reviewers": "1;0;0;1", "reply_authors": "2;2;2;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 92.75, 32.08874413248359 ], "wc_strength_and_weaknesses_avg": [ 334.0, 149.37536610833797 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.25, 37.8310388437854 ], "wc_summary_review_avg": [ 45.0, 26.78619047195775 ], "wc_review_avg": [ 528.0, 199.99874999609372 ], "wc_reply_reviewers_avg": [ 12.25, 13.970952007647869 ], "wc_reply_authors_avg": [ 765.5, 305.12989037457476 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.816496580927726, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4492146497430655743&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=N9Pk5iSCzAn", "email": "bit.edu.cn;bit.edu.cn;antfin.com;antgroup.com", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Beijing Institute of Technology;Ant Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.bit.edu.cn/;https://www.antgroup.com", "aff_unique_abbr": "BIT;Ant Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "An Image is Worth One Word: Personalizing Text-to-Image Generation using Textual Inversion", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12191", "id": "NAQvF08TcyG", "poster": "", "openreview": "https://openreview.net/forum?id=NAQvF08TcyG", "slides": "https://iclr.cc/virtual/2023/poster/12191", "video": "https://iclr.cc/virtual/2023/poster/12191", "author_site": "Rinon Gal, Yuval Alaluf, Yuval Atzmon, Or Patashnik, Amit Bermano, Gal Chechik, Daniel Cohen-Or", "tldr": "We present the task of personalized text-to-image generation, and introduce an inversion-based method that allows us to synthesize novel scenes of user-provided visual concepts, guided by natural language instructions.", "abstract": "Text-to-image models offer unprecedented freedom to guide creation through natural language. Yet, it is unclear how such freedom can be exercised to generate images of specific unique concepts, modify their appearance, or compose them in new roles and novel scenes.\nIn other words, we ask: how can we use language-guided models to turn *our* cat into a painting, or imagine a new product based on *our* favorite toy? \nHere we present a simple approach that allows such creative freedom. Using only $3$-$5$ images of a user-provided concept, like an object or a style, we learn to represent it through new ``words\" in the embedding space of a frozen text-to-image model.\nThese ``words\" can be composed into natural language sentences, guiding *personalized* creation in an intuitive way.\nNotably, we find evidence that a *single* word embedding is sufficient for capturing unique and varied concepts. \nWe compare our approach to a wide range of baselines, and demonstrate that it can more faithfully portray the concepts across a range of applications and tasks. Our code, data and new words will be available.", "keywords": "Personalized generation;text-to-image;inversion", "primary_area": "", "supplementary_material": "", "author": "Rinon Gal;Yuval Alaluf;Yuval Atzmon;Or Patashnik;Amit Haim Bermano;Gal Chechik;Daniel Cohen-or", "authorids": "~Rinon_Gal1;~Yuval_Alaluf1;~Yuval_Atzmon1;~Or_Patashnik1;~Amit_Haim_Bermano2;~Gal_Chechik1;~Daniel_Cohen-or2", "gender": ";M;;F;M;;", "homepage": ";https://yuval-alaluf.github.io;;https://orpatashnik.github.io/;https://www.cs.tau.ac.il/~amberman/;https://chechiklab.biu.ac.il/~gal/;", "dblp": ";271/8139;182/5849;271/8264;97/10458;c/GalChechik;", "google_scholar": ";uvaPP80AAAAJ;VJZj2MsAAAAJ;-SlS0mgAAAAJ;https://scholar.google.co.il/citations?user=EPO5_f4AAAAJ;Wk2gAZUAAAAJ;", "orcid": ";;;;;0000-0001-9164-5303;", "linkedin": ";;;;;;", "or_profile": "~Rinon_Gal1;~Yuval_Alaluf1;~Yuval_Atzmon1;~Or_Patashnik1;~Amit_Haim_Bermano2;~Gal_Chechik1;~Daniel_Cohen-or2", "aff": ";Tel Aviv University;NVIDIA;Tel Aviv University;Tel Aviv University;NVIDIA;", "aff_domain": ";tau.ac.il;nvidia.com;tau.post.ac.il;tau.ac.il;nvidia.com;", "position": ";MS student;Researcher;PhD student;Associate Professor;Principal Researcher;", "bibtex": "@inproceedings{\ngal2023an,\ntitle={An Image is Worth One Word: Personalizing Text-to-Image Generation using Textual Inversion},\nauthor={Rinon Gal and Yuval Alaluf and Yuval Atzmon and Or Patashnik and Amit Haim Bermano and Gal Chechik and Daniel Cohen-or},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=NAQvF08TcyG}\n}", "github": "", "project": "", "reviewers": "xRCj;ah2h;W72Q;a4CY", "pdf_size": 34315899, "recommendation": "6;6;8;8", "confidence": "4;4;3;4", "correctness": "3;3;3;3", "technical_novelty": "3;3;4;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "88;35;72;91", "wc_strength_and_weaknesses": "287;159;149;222", "wc_clarity_quality_novelty_and_reproducibility": "8;13;41;41", "wc_summary_review": "68;14;26;55", "wc_review": "451;221;288;409", "wc_reply_reviewers": "0;59;0;60", "wc_reply_authors": "1150;538;129;238", "reply_reviewers": "0;1;0;1", "reply_authors": "3;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 71.5, 22.276669409945463 ], "wc_strength_and_weaknesses_avg": [ 204.25, 55.36865087754983 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.75, 15.35211711784404 ], "wc_summary_review_avg": [ 40.75, 21.672274915199836 ], "wc_review_avg": [ 342.25, 92.0960775494809 ], "wc_reply_reviewers_avg": [ 29.75, 29.752100766164396 ], "wc_reply_authors_avg": [ 513.75, 396.69659375900875 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 1810, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9604081622631184742&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=NAQvF08TcyG", "email": ";tau.ac.il;nvidia.com;tau.post.ac.il;tau.ac.il;nvidia.com;", "author_num": 7, "aff_unique_index": "0;1;0;0;1", "aff_unique_norm": "Tel Aviv University;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.tau.ac.il;https://www.nvidia.com", "aff_unique_abbr": "TAU;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "Israel;United States" }, { "id": "NAuVe6pQ7Jb", "title": "Efficient, probabilistic analysis of combinatorial neural codes", "track": "main", "status": "Withdraw", "tldr": "We improve the computational complexity of previous methods and introduce a hypothesis-checking procedure to study algebraic, geometric, and topological features of neural codes.", "abstract": "Artificial and biological neural networks (ANNs and BNNs) can encode inputs in the form of combinations of individual neurons' activities. These combinatorial neural codes present a computational challenge for direct and efficient analysis due to their high dimensionality and often large volumes of data. Here we improve the computational complexity -- from factorial to quadratic time -- of direct algebraic methods previously applied to small examples and apply them to large neural codes generated by experiments. These methods provide a novel and efficient way of probing algebraic, geometric, and topological characteristics of combinatorial neural codes and provide insights into how such characteristics are related to learning and experience in neural networks. We introduce a procedure to perform hypothesis testing on the intrinsic features of neural codes using information geometry. We then apply these methods to neural activities from an ANN for image classification and a BNN for 2D navigation to, without observing any inputs or outputs, estimate the structure and dimensionality of the stimulus or task space. Additionally, we demonstrate how an ANN varies its internal representations across network depth and during learning.", "keywords": "neural code;topology;algebra;information geometry", "primary_area": "", "supplementary_material": "", "author": "Thomas F Burns;Irwansyah Irwansyah", "authorids": "~Thomas_F_Burns1;~Irwansyah_Irwansyah1", "gender": "M;M", "homepage": "https://tfburns.com/;", "dblp": "311/5096;200/8053", "google_scholar": "xifCmHAAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-1123-2929;0000-0003-1531-2317", "linkedin": "tfburns/;irwansyah-36420493", "or_profile": "~Thomas_F_Burns1;~Irwansyah_Irwansyah1", "aff": "Okinawa Institute of Science and Technology (OIST);University of Mataram", "aff_domain": "oist.jp;unram.ac.id", "position": "PhD student;Lecturer", "bibtex": "@misc{\nburns2023efficient,\ntitle={Efficient, probabilistic analysis of combinatorial neural codes},\nauthor={Thomas F Burns and Irwansyah Irwansyah},\nyear={2023},\nurl={https://openreview.net/forum?id=NAuVe6pQ7Jb}\n}", "github": "", "project": "", "reviewers": "akHJ;z6A5;V2Gp", "site": "https://openreview.net/forum?id=NAuVe6pQ7Jb", "pdf_size": 541621, "recommendation": "3;3;6", "confidence": "2;2;2", "correctness": "3;2;4", "technical_novelty": "2;2;4", "empirical_novelty": "0;2;2", "wc_summary_paper": "261;73;44", "wc_strength_and_weaknesses": "61;423;32", "wc_clarity_quality_novelty_and_reproducibility": "30;48;40", "wc_summary_review": "32;30;4", "wc_review": "384;574;120", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 2.0, 0.0 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 126.0, 96.19078264920536 ], "wc_strength_and_weaknesses_avg": [ 172.0, 177.8782355058276 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.333333333333336, 7.363574011458174 ], "wc_summary_review_avg": [ 22.0, 12.754084313139327 ], "wc_review_avg": [ 359.3333333333333, 186.16360785311636 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8660254037844387, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:y3ZkoqS8gf4J:scholar.google.com/&scioq=Efficient,+probabilistic+analysis+of+combinatorial+neural+codes&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Okinawa Institute of Science and Technology;University of Mataram", "aff_unique_dep": ";", "aff_unique_url": "https://www.oist.jp;https://unram.ac.id", "aff_unique_abbr": "OIST;Unram", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Japan;Indonesia" }, { "id": "NB69ih1tiA1", "title": "Revisiting Over-smoothing in Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Shallow graph neural networks (GNNs) are state-of-the-art models for relational data. However, it is known that deep GNNs suffer from over-smoothing where, as the number of layers increases, node representations become nearly indistinguishable and model performance on the downstream task degrades significantly. Despite multiple approaches being proposed to address this problem, it is unclear when any of these methods (or their combination) works best and how they perform when evaluated under exactly the same experimental setting. In this paper, we systematically and carefully evaluate different methods for alleviating over-smoothing in GNNs. Furthermore, inspired by standard deeply supervised nets, we propose a general architecture that helps alleviate over-smoothing based on the idea of layer-wise supervision. We term this architecture deeply supervised GNNs (or DSGNNs for short). Our experiments show that deeper GNNs can indeed provide better performance when trained on a combination of different approaches and that DSGNNs are robust under various conditions and can provide the best performance in missing-feature scenarios. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Pantelis Elinas;Edwin V. Bonilla", "authorids": "~Pantelis_Elinas1;~Edwin_V._Bonilla1", "gender": ";M", "homepage": "https://www.thejournal.club/c/profile/2/;http://ebonilla.github.io/", "dblp": "95/3273;23/1754", "google_scholar": "Y_mqs0kAAAAJ;https://scholar.google.com.au/citations?user=uDLRZQMAAAAJ", "orcid": "0000-0003-2175-8518;0000-0002-9904-2408", "linkedin": ";", "or_profile": "~Pantelis_Elinas1;~Edwin_V_Bonilla1", "aff": "CSIRO;CSIRO's Data61", "aff_domain": "data61.csiro.au;data61.csiro.au", "position": "Researcher;Principal Research Scientist", "bibtex": "@misc{\nelinas2023revisiting,\ntitle={Revisiting Over-smoothing in Graph Neural Networks},\nauthor={Pantelis Elinas and Edwin V. Bonilla},\nyear={2023},\nurl={https://openreview.net/forum?id=NB69ih1tiA1}\n}", "github": "", "project": "", "reviewers": "QdpM;K9Pi;FUiX;NBPk", "site": "https://openreview.net/forum?id=NB69ih1tiA1", "pdf_size": 379082, "recommendation": "3;3;3;3", "confidence": "4;4;5;4", "correctness": "3;2;3;3", "technical_novelty": "2;2;1;1", "empirical_novelty": "2;2;1;2", "wc_summary_paper": "66;99;78;72", "wc_strength_and_weaknesses": "159;276;116;285", "wc_clarity_quality_novelty_and_reproducibility": "37;88;44;58", "wc_summary_review": "63;25;11;77", "wc_review": "325;488;249;492", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 1.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 78.75, 12.43734296383275 ], "wc_strength_and_weaknesses_avg": [ 209.0, 73.16761578731399 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.75, 19.562400159489634 ], "wc_summary_review_avg": [ 44.0, 26.92582403567252 ], "wc_review_avg": [ 388.5, 105.00595221224367 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9618893817387378487&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Commonwealth Scientific and Industrial Research Organisation;CSIRO", "aff_unique_dep": ";Data61", "aff_unique_url": "https://www.csiro.au;https://www.csiro.au", "aff_unique_abbr": "CSIRO;CSIRO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Australia" }, { "id": "NBES8BZ5wnZ", "title": "SKTformer: A Skeleton Transformer for Long Sequence Data", "track": "main", "status": "Reject", "tldr": "We design an efficient Transformer model for long sequence data", "abstract": "Transformers have become a preferred tool for modeling sequential data. Many studies of using Transformers for long sequence modeling focus on reducing computational complexity. They usually exploit the low-rank structure of data and approximate a long sequence by a sub-sequence. One challenge with such approaches is how to make an appropriate tradeoff between information preserving and noise reduction: the longer the sub-sequence used to approximate the long sequence, the better the information is preserved but at a price of introducing more noise into the model and of course more computational costs. We propose skeleton transformer, SKTformer for short, an efficient transformer architecture that effectively addresses the tradeoff. It introduces two mechanisms to effectively reduce the impact of noise while still keeping the computation linear to the sequence length: a smoothing block to mix information over long sequences and a matrix sketch method that simultaneously selects columns and rows from the input matrix. We verify the effectiveness of SKTformer both theoretically and empirically. Extensive studies over both Long Range Arena (LRA) datasets and six time-series forecasting show that SKTformer significantly outperforms both villain Transformer and other state-of-the-art variants of Transformer. Code is available at\nhttps://anonymous.4open.science/r/SKTFormer-B33B/", "keywords": "Efficient Trasnformer;Long Sequence Data;CUR decomposition;Robustness;matrix sketching", "primary_area": "", "supplementary_material": "/attachment/b500570f82909afde4dd08572ab4664638a943b8.zip", "author": "xue wang;Tian Zhou;Jianqing Zhu;Jialin Liu;Kun Yuan;Tao Yao;Wotao Yin;Rong Jin;HanQin Cai", "authorids": "~xue_wang1;~Tian_Zhou2;~Jianqing_Zhu2;~Jialin_Liu1;~Kun_Yuan4;~Tao_Yao2;~Wotao_Yin1;~Rong_Jin3;~HanQin_Cai1", "gender": "M;M;;M;;;M;M;M", "homepage": "https://www.linkedin.com/in/xue-wang-98739572/;https://scholar.google.com/citations?user=9o5r8bUAAAAJ&hl=en;;https://liujl11git.github.io/;;;http://wotaoyin.com;https://www.cse.msu.edu/~rongjin/;https://hqcai.org", "dblp": ";31/4578-4.html;;;;;76/2265;j/RongJin;209/9975", "google_scholar": ";9o5r8bUAAAAJ;;QS6Lj5sAAAAJ;;oaqXSegAAAAJ;kpQGGFUAAAAJ;;TqWjx18AAAAJ", "orcid": ";0000-0003-1789-5413;;;;0000-0002-2124-5678;0000-0001-6697-9731;;0000-0002-2937-1986", "linkedin": ";;;;;;;;", "or_profile": "~xue_wang1;~Tian_Zhou2;~Jianqing_Zhu2;~Jialin_Liu1;~Kun_Yuan4;~Tao_Yao2;~Wotao_Yin1;~Rong_Jin3;~HanQin_Cai1", "aff": "Alibaba Group US;Alibaba Group;;Alibaba Group US;;Shanghai Jiaotong University;Alibaba Group US;Twitter;University of Central Florida", "aff_domain": "alibaba-inc.com;alibaba-inc.com;;alibaba-inc.com;;sjtu.edu.cn;alibaba-inc.com;twitter.com;ucf.edu", "position": "Researcher;Researcher;;Researcher;;Full Professor;Principal Researcher;Researcher;Assistant Professor", "bibtex": "@misc{\nwang2023sktformer,\ntitle={{SKT}former: A Skeleton Transformer for Long Sequence Data},\nauthor={xue wang and Tian Zhou and Jianqing Zhu and Jialin Liu and Kun Yuan and Tao Yao and Wotao Yin and Rong Jin and HanQin Cai},\nyear={2023},\nurl={https://openreview.net/forum?id=NBES8BZ5wnZ}\n}", "github": "", "project": "", "reviewers": "6UUx;9G8D;ELnS;FKSC", "site": "https://openreview.net/forum?id=NBES8BZ5wnZ", "pdf_size": 929787, "recommendation": "6;6;6;6", "confidence": "3;4;3;3", "correctness": "3;2;3;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "74;88;63;62", "wc_strength_and_weaknesses": "327;197;63;139", "wc_clarity_quality_novelty_and_reproducibility": "44;12;27;504", "wc_summary_review": "58;11;43;300", "wc_review": "503;308;196;1005", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 71.75, 10.497023387608508 ], "wc_strength_and_weaknesses_avg": [ 181.5, 96.51295249861543 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 146.75, 206.5688444562732 ], "wc_summary_review_avg": [ 103.0, 114.99782606640875 ], "wc_review_avg": [ 503.0, 309.95080254775917 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9069899446950112851&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0;2;3", "aff_unique_norm": "Alibaba Group;Shanghai Jiao Tong University;Twitter, Inc.;University of Central Florida", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.alibaba.com;https://www.sjtu.edu.cn;https://twitter.com;https://www.ucf.edu", "aff_unique_abbr": "Alibaba;SJTU;Twitter;UCF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "id": "NCNT1r62-UV", "title": "Projected Latent Distillation for Data-Agnostic Consolidation in Multi-Agent Continual Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Many real-world applications are characterized by non-stationary distributions. In this setting, independent expert models trained on subsets of the data can benefit from each other and improve their generalization and forward transfer by sharing knowledge.\nIn this paper, we formalize this problem as a multi-agent continual learning scenario, where agents are trained independently but they can communicate by sharing the model parameters after each learning experience. We split the learning problem into two phases: adaptation and consolidation. Adaptation is a learning phase that optimizes the current task, while consolidation prevents forgetting by combining expert models together, enabling knowledge sharing. We propose Data-Agnostic Consolidation (DAC), a novel double knowledge distillation method. The method performs distillation in the latent space via a novel Projected Latent Distillation (PLD) loss. Experimental results show state-of-the-art accuracy on SplitCIFAR100 even when a single out-of-distribution image is used as the only source of data during consolidation.", "keywords": "continual learning;knowledge distillation", "primary_area": "", "supplementary_material": "/attachment/3429d885be8691096992feda505af1611dc3fd18.zip", "author": "Antonio Carta;Andrea Cossu;Vincenzo Lomonaco;Joost van de Weijer;Davide Bacciu", "authorids": "~Antonio_Carta1;~Andrea_Cossu1;~Vincenzo_Lomonaco1;~Joost_van_de_Weijer5;~Davide_Bacciu1", "gender": ";M;M;M;M", "homepage": "http://pages.di.unipi.it/carta/;https://www.andreacossu.com/;https://vincenzolomonaco.com;http://pages.di.unipi.it/bacciu/;http://lamp.cvc.uab.es/", "dblp": "178/6658;262/6262;157/5127;07/6626;67/3379", "google_scholar": ";0Kst5iEAAAAJ;https://scholar.google.it/citations?user=rQLINtQAAAAJ;https://scholar.google.it/citations?user=1d5n2WkAAAAJ;https://scholar.google.es/citations?user=Gsw2iUEAAAAJ", "orcid": ";;;0000-0001-5213-2468;0000-0002-9656-9706", "linkedin": ";;;bacciu/;", "or_profile": "~Antonio_Carta1;~Andrea_Cossu1;~Vincenzo_Lomonaco1;~Davide_Bacciu1;~Joost_van_de_Weijer1", "aff": "University of Pisa;Scuola Normale Superiore;University of Pisa;University of Pisa;Computer Vision Center, Universitat Aut\u00f3noma de Barcelona", "aff_domain": "unipi.it;sns.it;unipi.it;unipi.it;cvc.uab.es", "position": "Assistant Professor;PhD student;Assistant Professor;Full Professor;Researcher", "bibtex": "@misc{\ncarta2023projected,\ntitle={Projected Latent Distillation for Data-Agnostic Consolidation in Multi-Agent Continual Learning},\nauthor={Antonio Carta and Andrea Cossu and Vincenzo Lomonaco and Joost van de Weijer and Davide Bacciu},\nyear={2023},\nurl={https://openreview.net/forum?id=NCNT1r62-UV}\n}", "github": "", "project": "", "reviewers": "Peod;CmJC;A6Ga;kNci", "site": "https://openreview.net/forum?id=NCNT1r62-UV", "pdf_size": 13032656, "recommendation": "3;3;3;6", "confidence": "4;3;3;2", "correctness": "3;1;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "0;2;1;3", "wc_summary_paper": "48;66;47;57", "wc_strength_and_weaknesses": "203;203;316;80", "wc_clarity_quality_novelty_and_reproducibility": "28;597;71;90", "wc_summary_review": "17;56;49;80", "wc_review": "296;922;483;307", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "486;806;330;149", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 1.0897247358851685 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 54.5, 7.697402159170326 ], "wc_strength_and_weaknesses_avg": [ 200.5, 83.47604446785915 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 196.5, 232.31713238588324 ], "wc_summary_review_avg": [ 50.5, 22.5 ], "wc_review_avg": [ 502.0, 253.585291371562 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 442.75, 241.25854907132307 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.662266178532522, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:H2Z3LEsAZfAJ:scholar.google.com/&scioq=Projected+Latent+Distillation+for+Data-Agnostic+Consolidation+in+Multi-Agent+Continual+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "University of Pisa;Scuola Normale Superiore;Universitat Aut\u00f3noma de Barcelona", "aff_unique_dep": ";;Computer Vision Center", "aff_unique_url": "https://www.unipi.it;https://www.sns.it;https://www.uab.cat", "aff_unique_abbr": "UNIP;SNS;UAB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Italy;Spain" }, { "title": "Learning Achievement Structure for Structured Exploration in Domains with Sparse Reward", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10811", "id": "NDWl9qcUpvy", "poster": "", "openreview": "https://openreview.net/forum?id=NDWl9qcUpvy", "slides": "https://iclr.cc/virtual/2023/poster/10811", "video": "https://iclr.cc/virtual/2023/poster/10811", "author_site": "Zihan Zhou, Animesh Garg", "tldr": "", "abstract": "We propose Structured Exploration with Achievements (SEA), a multi-stage reinforcement learning algorithm designed for achievement-based environments, a particular type of environment with an internal achievement set. SEA first uses offline data to learn a representation of the known achievements with a determinant loss function, then recovers the dependency graph of the learned achievements with a heuristic algorithm, and finally interacts with the environment online to learn policies that master known achievements and explore new ones with a controller built with the recovered dependency graph. We empirically demonstrate that SEA can recover the achievement structure accurately and improve exploration in hard domains such as Crafter that are procedurally generated with high-dimensional observations like images.", "keywords": "deep reinforcement learning;structured exploration", "primary_area": "", "supplementary_material": "/attachment/56e21b2fb4ad4ba27420b3f61e69f611fa6d5496.zip", "author": "Zihan Zhou;Animesh Garg", "authorids": "~Zihan_Zhou1;~Animesh_Garg1", "gender": "M;M", "homepage": ";http://animesh.garg.tech", "dblp": "00/6525-2;123/5728", "google_scholar": ";zp8V7ZMAAAAJ", "orcid": ";0000-0003-0482-4296", "linkedin": ";animeshgarg/", "or_profile": "~Zihan_Zhou1;~Animesh_Garg1", "aff": "Nuro, Inc;University of Toronto", "aff_domain": "nuro.ai;toronto.edu", "position": "Intern;Assistant Professor", "bibtex": "@inproceedings{\nzhou2023learning,\ntitle={Learning Achievement Structure for Structured Exploration in Domains with Sparse Reward},\nauthor={Zihan Zhou and Animesh Garg},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=NDWl9qcUpvy}\n}", "github": "", "project": "", "reviewers": "Jys6;wFTY;kuVW;uoYd", "pdf_size": 1085133, "recommendation": "5;5;8;8", "confidence": "4;4;3;4", "correctness": "3;3;3;4", "technical_novelty": "3;2;3;4", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "117;80;95;46", "wc_strength_and_weaknesses": "663;343;430;105", "wc_clarity_quality_novelty_and_reproducibility": "64;31;166;160", "wc_summary_review": "22;43;59;29", "wc_review": "866;497;750;340", "wc_reply_reviewers": "206;428;0;17", "wc_reply_authors": "1123;2143;303;597", "reply_reviewers": "1;1;0;1", "reply_authors": "2;4;1;1", "recommendation_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 84.5, 25.83118270617898 ], "wc_strength_and_weaknesses_avg": [ 385.25, 199.67019682466383 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 105.25, 58.9549616232595 ], "wc_summary_review_avg": [ 38.25, 14.16642156650719 ], "wc_review_avg": [ 613.25, 206.61724879593186 ], "wc_reply_reviewers_avg": [ 162.75, 173.17530857486582 ], "wc_reply_authors_avg": [ 1041.5, 700.5189148053034 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5773502691896258, "corr_recommendation_correctness": 0.5773502691896258, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9210936205554332837&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=NDWl9qcUpvy", "email": "nuro.ai;toronto.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Nuro, Inc;University of Toronto", "aff_unique_dep": ";", "aff_unique_url": "https://www.nuro.ai;https://www.utoronto.ca", "aff_unique_abbr": "Nuro;U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Canada" }, { "title": "CktGNN: Circuit Graph Neural Network for Electronic Design Automation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10780", "id": "NE2911Kq1sp", "poster": "/media/PosterPDFs/ICLR%202023/10780.png?t=1681116241.8352418", "openreview": "https://openreview.net/forum?id=NE2911Kq1sp", "slides": "https://iclr.cc/virtual/2023/poster/10780", "video": "https://iclr.cc/virtual/2023/poster/10780", "author_site": "Zehao Dong, Weidong Cao, Muhan Zhang, Dacheng Tao, Yixin Chen, Xuan Zhang", "tldr": "", "abstract": "The electronic design automation of analog circuits has been a longstanding challenge in the integrated circuit field due to the huge design space and complex design trade-offs among circuit specifications. In the past decades, intensive research efforts have only been paid to automate the transistor sizing with a given circuit topology. By recognizing the graph nature of circuits, this paper presents a Circuit Graph Neural Network (CktGNN) that simultaneously automates the circuit topology generation and device sizing based on the encoder-dependent optimization subroutines. Particularly, CktGNN encodes circuit graphs using a two-level GNN framework (of nested GNN) where circuits are represented as combinations of subgraphs in a known subgraph basis. In this way, it significantly improves efficiency by reducing the number of subgraphs to perform message passing.\n\nNonetheless, another critical roadblock to advancing learning-assisted circuit design automation is a lack of public benchmarks to perform canonical assessment and reproducible research. To tackle the challenge, we introduce Open Circuit Benchmark (OCB), an open-sourced dataset that contains $10$K distinct operational amplifiers with carefully-extracted circuit specifications from physical implementations. OCB also equips with communicative circuit generation and evaluation capabilities such that it can be used to generalize the applicability of CktGNN to design various analog circuits by efficiently producing corresponding datasets. Experiments on OCB show the extraordinary advantages of CktGNN through representation-based optimization frameworks over other recent powerful GNN baselines and manual design from human experts. Our work paves the way toward a learning-based open-sourced design automation flow for analog circuits.", "keywords": "Graph Neural Networks;Electronic Design Automation;Benchmark Graph Dataset", "primary_area": "", "supplementary_material": "/attachment/dc70f5623639cf5f02562d1cc85ec02297369514.zip", "author": "Zehao Dong;Weidong Cao;Muhan Zhang;Dacheng Tao;Yixin Chen;Xuan Zhang", "authorids": "~Zehao_Dong1;~Weidong_Cao2;~Muhan_Zhang1;~Dacheng_Tao1;~Yixin_Chen1;~Xuan_Zhang1", "gender": "M;;M;;M;", "homepage": "https://www.zehaodong.com;;https://muhanzhang.github.io/;;https://www.cse.wustl.edu/~yixin.chen/;", "dblp": "292/7480;;157/5518;;59/983;", "google_scholar": ";;https://scholar.google.com.hk/citations?user=OBBqkosAAAAJ;;NByrsK0AAAAJ;", "orcid": ";;0000-0002-7680-6401;;;", "linkedin": ";;jerry-muhan-zhang-a33a1777/;;;", "or_profile": "~Zehao_Dong1;~Weidong_Cao2;~Muhan_Zhang1;~Dacheng_Tao1;~Yixin_Chen1;~Xuan_Zhang1", "aff": "Washington University, St. Louis;;Peking University;;Washington University, Saint Louis;", "aff_domain": "wustl.edu;;pku.edu.cn;;wustl.edu;", "position": "PhD student;;Assistant Professor;;Full Professor;", "bibtex": "@inproceedings{\ndong2023cktgnn,\ntitle={Ckt{GNN}: Circuit Graph Neural Network for Electronic Design Automation},\nauthor={Zehao Dong and Weidong Cao and Muhan Zhang and Dacheng Tao and Yixin Chen and Xuan Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=NE2911Kq1sp}\n}", "github": "", "project": "", "reviewers": "uYkn;F1jP;nGNe;ESXP", "pdf_size": 744892, "recommendation": "6;6;6;8", "confidence": "3;5;2;4", "correctness": "3;4;3;3", "technical_novelty": "2;3;4;3", "empirical_novelty": "4;3;4;3", "wc_summary_paper": "47;57;79;113", "wc_strength_and_weaknesses": "70;120;174;150", "wc_clarity_quality_novelty_and_reproducibility": "78;25;5;65", "wc_summary_review": "27;56;25;52", "wc_review": "222;258;283;380", "wc_reply_reviewers": "24;0;0;0", "wc_reply_authors": "1446;788;989;1252", "reply_reviewers": "2;0;0;0", "reply_authors": "3;2;2;2", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 74.0, 25.317977802344327 ], "wc_strength_and_weaknesses_avg": [ 128.5, 38.81687777243296 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.25, 29.4819860253681 ], "wc_summary_review_avg": [ 40.0, 14.089002803605371 ], "wc_review_avg": [ 285.75, 58.576339079870806 ], "wc_reply_reviewers_avg": [ 6.0, 10.392304845413264 ], "wc_reply_authors_avg": [ 1118.75, 250.5387944011865 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.2581988897471611, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13709401719737825136&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=NE2911Kq1sp", "email": "wustl.edu;;pku.edu.cn;;wustl.edu;", "author_num": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "Washington University in St. Louis;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://wustl.edu;http://www.pku.edu.cn", "aff_unique_abbr": "WUSTL;Peking U", "aff_campus_unique_index": "0;2", "aff_campus_unique": "St. Louis;;Saint Louis", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "id": "NE5P2sEK4Z5", "title": "Closed-loop Transcription via Convolutional Sparse Coding", "track": "main", "status": "Reject", "tldr": "This paper combines the recent closed-loop transcription framework with convolutional sparse coding layers and demonstrates superior generative autoencoding performance.", "abstract": "Autoencoding has been a popular and effective framework for learning generative models for images, with much empirical success. Autoencoders often use generic deep networks as the encoder and decoder, which are difficult to interpret, and the learned representations lack clear structure. In this work, we replace the encoder and decoder with standard convolutional sparse coding and decoding layers, obtained from unrolling an optimization algorithm for solving a (convexified) sparse coding program. Furthermore, to avoid computational difficulties in minimizing distributional distance between the real and generated images, we utilize the recent closed-loop transcription (CTRL) framework that maximizes the rate reduction of the learned sparse representations. We show that such a simple framework demonstrates surprisingly competitive performance on large datasets, such as ImageNet-1K, compared to existing autoencoding and generative methods under fair conditions. Even with simpler networks and less computational resources, our method demonstrates splendid visual quality in regenerated images with striking sample-wise consistency. More surprisingly, the learned autoencoder generalizes to unseen datasets. Our method enjoys several side benefits, including more structured and interpretable representations, more stable convergence, scalability to large datasets -- indeed, our method is the first sparse coding generative method to scale up to ImageNet -- and trainability with smaller batch sizes.", "keywords": "Convolutional Sparse Coding;Inverse Models;Rate Reduction", "primary_area": "", "supplementary_material": "", "author": "Xili Dai;Ke Chen;Shengbang Tong;Jingyuan Zhang;Xingjian Gao;Yuexiang Zhai;Mingyang Li;Xiaojun Yuan;Heung-Yeung Shum;Lionel Ni;Yi Ma", "authorids": "~Xili_Dai2;~Ke_Chen14;~Shengbang_Tong1;~Jingyuan_Zhang2;~Xingjian_Gao1;~Yuexiang_Zhai1;~Mingyang_Li3;~Xiaojun_Yuan1;~Heung-Yeung_Shum1;~Lionel_Ni1;~Yi_Ma4", "gender": "M;M;M;;M;;M;M;M;M;M", "homepage": "https://delay-xili.github.io/;https://www.google.com;https://tsb0601.github.io/petertongsb/;;;;;;https://www.microsoft.com/en-us/research/people/hshum/;http://repository.ust.hk/ir/AuthorProfile/ni-lionel;http://people.eecs.berkeley.edu/~yima/", "dblp": "170/8561;;306/1406;;;241/6124.html;;;;n/LionelMNi;", "google_scholar": "CtRMD1UAAAAJ;;https://scholar.google.com/citations?hl=en;;;78WTKm4AAAAJ;;https://scholar.google.com.hk/citations?user=o6W_m00AAAAJ;;https://scholar.google.com.tw/citations?user=OzMYwDIAAAAJ;https://scholar.google.com.hk/citations?user=XqLiBQMAAAAJ", "orcid": ";;;;;;;;;;", "linkedin": "xili-daley-dai-b87030179/;;;;xgao0613;;http://www.linkedin.com/in/limy;;;;", "or_profile": "~Xili_Dai2;~Ke_Chen14;~Shengbang_Tong1;~Jingyuan_Zhang2;~Xingjian_Gao1;~Yuexiang_Zhai1;~Mingyang_Li3;~Xiaojun_Yuan1;~Heung-Yeung_Shum1;~Lionel_Ni1;~Yi_Ma4", "aff": "Hong Kong University of Science and Technology (Guangzhou);Tsinghua University;University of California, Berkeley;;;University of California, Berkeley;Tsinghua University;University of Electronic Science and Technology of China, Tsinghua University;;Hong Kong University of Science and Technology;University of California, Berkeley", "aff_domain": "hkust.edu;tsinghua.edu.cn;berkeley.edu;;;berkeley.edu;tsinghua.edu.cn;uestc.edu.cn;;ust.hk;berkeley.edu", "position": "PhD student;MS student;Undergrad student;;;PhD student;PhD student;Full Professor;;Full Professor;Full Professor", "bibtex": "@misc{\ndai2023closedloop,\ntitle={Closed-loop Transcription via Convolutional Sparse Coding},\nauthor={Xili Dai and Ke Chen and Shengbang Tong and Jingyuan Zhang and Xingjian Gao and Yuexiang Zhai and Mingyang Li and Xiaojun Yuan and Heung-Yeung Shum and Lionel Ni and Yi Ma},\nyear={2023},\nurl={https://openreview.net/forum?id=NE5P2sEK4Z5}\n}", "github": "", "project": "", "reviewers": "b6Q7;qbVZ;Ti8A;uge7", "site": "https://openreview.net/forum?id=NE5P2sEK4Z5", "pdf_size": 36196224, "recommendation": "3;6;6;6", "confidence": "3;4;4;4", "correctness": "2;3;3;3", "technical_novelty": "2;3;1;3", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "40;94;244;201", "wc_strength_and_weaknesses": "328;727;1158;361", "wc_clarity_quality_novelty_and_reproducibility": "21;4;32;45", "wc_summary_review": "23;14;83;75", "wc_review": "412;839;1517;682", "wc_reply_reviewers": "0;254;1673;237", "wc_reply_authors": "850;1358;5077;642", "reply_reviewers": "0;1;7;2", "reply_authors": "3;4;10;2", "recommendation_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 144.75, 81.49041354662522 ], "wc_strength_and_weaknesses_avg": [ 643.5, 335.7934633074325 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.5, 15.041608956491324 ], "wc_summary_review_avg": [ 48.75, 30.548117781624452 ], "wc_review_avg": [ 862.5, 407.5699326496006 ], "wc_reply_reviewers_avg": [ 541.0, 661.2280242095006 ], "wc_reply_authors_avg": [ 1981.75, 1805.9225308689186 ], "reply_reviewers_avg": [ 2.5, 2.692582403567252 ], "reply_authors_avg": [ 4.75, 3.112474899497183 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 11, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4133966857352073597&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;2;1;3;0;2", "aff_unique_norm": "Hong Kong University of Science and Technology;Tsinghua University;University of California, Berkeley;University of Electronic Science and Technology of China", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ust.hk;https://www.tsinghua.edu.cn;https://www.berkeley.edu;https://www.uestc.edu.cn", "aff_unique_abbr": "HKUST;THU;UC Berkeley;UESTC", "aff_campus_unique_index": "0;2;2;0;2", "aff_campus_unique": "Hong Kong SAR;;Berkeley", "aff_country_unique_index": "0;0;1;1;0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "NEEtm5laNK1", "title": "CHiLS: Zero-Shot Image Classification with Hierarchical Label Sets", "track": "main", "status": "Reject", "tldr": "", "abstract": "Open vocabulary models (e.g. CLIP) have shown strong performance on zeroshot classification through their ability generate embeddings for each class based on their (natural language) names. Prior work has focused on improving the accuracy of these models through prompt engineering or by incorporating a small amount of labeled downstream data (via finetuning). In this paper, we propose Classification with Hierarchical Label Sets (or CHiLS), an alternative strategy that proceeds in three steps: (i) for each class, produce a set of subclasses, using either existing label hierarchies or by querying GPT-3; (ii) perform the standard zero-shot CLIP procedure as though these subclasses were the labels of interest; (iii) map the predicted subclass back to its parent to produce the final prediction. Across numerous datasets, CHiLS leads to improved accuracy yielding gains of over 30% in situations where known hierarchies are available and more modest gains when they are not. CHiLS is simple to implement within existing CLIP pipelines and requires no additional training cost.", "keywords": "open vocabulary models;CLIP;zero-shot learning;zero-shot image classification", "primary_area": "", "supplementary_material": "", "author": "Zachary Novack;Saurabh Garg;Zachary Chase Lipton", "authorids": "~Zachary_Novack1;~Saurabh_Garg3;~Zachary_Chase_Lipton1", "gender": "M;M;Unspecified", "homepage": "https://zacharynovack.github.io/;http://saurabhgarg1996.github.io/;http://zacklipton.com", "dblp": "334/7662;80/208;", "google_scholar": "fZKJdb0AAAAJ;SAnJ1hIAAAAJ;MN9Kfg8AAAAJ", "orcid": ";;", "linkedin": "zachary-novack/;saurabh-garg-b680b5b8/;", "or_profile": "~Zachary_Novack1;~Saurabh_Garg3;~Zachary_Chase_Lipton1", "aff": "University of California, San Diego;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "ucsd.edu;cmu.edu;cmu.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nnovack2023chils,\ntitle={{CH}i{LS}: Zero-Shot Image Classification with Hierarchical Label Sets},\nauthor={Zachary Novack and Saurabh Garg and Zachary Chase Lipton},\nyear={2023},\nurl={https://openreview.net/forum?id=NEEtm5laNK1}\n}", "github": "", "project": "", "reviewers": "oLxZ;msk3;VtPj;ohpn", "site": "https://openreview.net/forum?id=NEEtm5laNK1", "pdf_size": 2756789, "recommendation": "3;5;5;6", "confidence": "3;5;3;4", "correctness": "3;3;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "61;82;69;81", "wc_strength_and_weaknesses": "92;939;49;328", "wc_clarity_quality_novelty_and_reproducibility": "46;159;26;29", "wc_summary_review": "40;115;97;114", "wc_review": "239;1295;241;552", "wc_reply_reviewers": "0;184;0;0", "wc_reply_authors": "763;2368;371;797", "reply_reviewers": "0;1;0;0", "reply_authors": "1;4;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 73.25, 8.728545125048045 ], "wc_strength_and_weaknesses_avg": [ 352.0, 355.1598231782418 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.0, 54.80419691957907 ], "wc_summary_review_avg": [ 91.5, 30.581857366746057 ], "wc_review_avg": [ 581.75, 431.04487875394136 ], "wc_reply_reviewers_avg": [ 46.0, 79.67433714816836 ], "wc_reply_authors_avg": [ 1074.75, 765.1948689712967 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.48420012470625223, "corr_recommendation_correctness": 0.6622661785325219, "gs_citation": 99, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16585154469770363135&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of California, San Diego;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsd.edu;https://www.cmu.edu", "aff_unique_abbr": "UCSD;CMU", "aff_campus_unique_index": "0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Simultaneous Navigation and Construction in Grid Worlds", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11511", "id": "NEtep2C7yD", "poster": "", "openreview": "https://openreview.net/forum?id=NEtep2C7yD", "slides": "https://iclr.cc/virtual/2023/poster/11511", "video": "https://iclr.cc/virtual/2023/poster/11511", "author_site": "Wenyu Han, Haoran Wu, Eisuke Hirota, Alexander Gao, Lerrel Pinto, Ludovic Righetti, Chen Feng", "tldr": "Position-related representation learning improves DRL consistently when addressing the localization-planning interdependence challenge in the proposed mobile construction tasks.", "abstract": "We propose to study a new learning task, mobile construction, to enable an agent to build designed structures in 1/2/3D grid worlds while navigating in the same evolving environments. Unlike existing robot learning tasks such as visual navigation and object manipulation, this task is challenging because of the interdependence between accurate localization and strategic construction planning. In pursuit of generic and adaptive solutions to this partially observable Markov decision process (POMDP) based on deep reinforcement learning (RL), we design\na Deep Recurrent Q-Network (DRQN) with explicit recurrent position estimation in this dynamic grid world. Our extensive experiments show that pre-training this position estimation module before Q-learning can significantly improve the construction performance measured by the intersection-over-union score, achieving the best results in our benchmark of various baselines including model-free and model-based RL, a handcrafted SLAM-based policy, and human players. Our code is available at: https://ai4ce.github.io/SNAC/.", "keywords": "Navigation;Localization;Construction;Deep reinforcement learning;Representation learning", "primary_area": "", "supplementary_material": "/attachment/edfa517faedfae0f3fdd076b62bce58ae75ebd6b.zip", "author": "Wenyu Han;Haoran Wu;Eisuke Hirota;Alexander Gao;Lerrel Pinto;Ludovic Righetti;Chen Feng", "authorids": "~Wenyu_Han1;~Haoran_Wu2;eh3019@nyu.edu;~Alexander_Gao1;~Lerrel_Pinto1;~Ludovic_Righetti1;~Chen_Feng2", "gender": "M;M;;M;M;M;M", "homepage": "https://ai4ce.github.io/;;;https://gaoalexander.github.io;https://www.lerrelpinto.com/;https://engineering.nyu.edu/faculty/ludovic-righetti;https://ai4ce.github.io/", "dblp": ";;;289/1694.html;168/8304;;01/161-2", "google_scholar": ";;;uoNPrRUAAAAJ;pmVPj94AAAAJ;LuA1j4oAAAAJ;YeG8ZM0AAAAJ", "orcid": ";;;;;0000-0002-6458-9112;0000-0003-3211-1576", "linkedin": ";haoran-lucas-ng-4053471a0/;;;;;simbaforrest/", "or_profile": "~Wenyu_Han1;~Haoran_Wu2;eh3019@nyu.edu;~Alexander_Gao1;~Lerrel_Pinto1;~Ludovic_Righetti1;~Chen_Feng2", "aff": "New York University;New York University;;University of Maryland, College Park;New York University;Max-Planck Institute;New York University", "aff_domain": "nyu.edu;nyu.edu;;umd.edu;cs.nyu.edu;mpg.de;nyu.edu", "position": "PhD student;MS student;;PhD student;Assistant Professor;Research Group Leader;Assistant Professor", "bibtex": "@inproceedings{\nhan2023learning,\ntitle={Learning Simultaneous Navigation and Construction in Grid Worlds },\nauthor={Wenyu Han and Haoran Wu and Eisuke Hirota and Alexander Gao and Lerrel Pinto and Ludovic Righetti and Chen Feng},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=NEtep2C7yD}\n}", "github": "", "project": "", "reviewers": "cAFg;FZE1;SRjN;fEQb", "pdf_size": 8665320, "recommendation": "6;6;8;8", "confidence": "3;4;3;4", "correctness": "4;4;4;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "81;120;88;125", "wc_strength_and_weaknesses": "224;123;234;204", "wc_clarity_quality_novelty_and_reproducibility": "13;46;478;20", "wc_summary_review": "128;44;114;152", "wc_review": "446;333;914;501", "wc_reply_reviewers": "0;0;261;110", "wc_reply_authors": "457;252;698;423", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 103.5, 19.241881404893856 ], "wc_strength_and_weaknesses_avg": [ 196.25, 43.64845358085439 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 139.25, 195.96348511903946 ], "wc_summary_review_avg": [ 109.5, 40.18395202067612 ], "wc_review_avg": [ 548.5, 219.54099844903686 ], "wc_reply_reviewers_avg": [ 92.75, 107.01722992116737 ], "wc_reply_authors_avg": [ 457.5, 159.10766794846816 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3849816121626176312&as_sdt=10005&sciodt=0,8&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=NEtep2C7yD", "email": "nyu.edu;nyu.edu;;umd.edu;cs.nyu.edu;mpg.de;nyu.edu", "author_num": 7, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "New York University;University of Maryland;Max-Planck-Gesellschaft zur F\u00f6rderung der Wissenschaften e.V.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nyu.edu;https://www/umd.edu;https://www.mpg.de", "aff_unique_abbr": "NYU;UMD;MPG", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "United States;Germany" }, { "id": "NFcRC4aYSWf", "title": "Highway Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "a novel adaptive multi-step Bellman Optimality Equation for efficient credit assignment that converges to the optimal value function with better contraction rate under mild assumptions", "abstract": "Traditional Dynamic Programming (DP) approaches suffer from slow backward credit-assignment (CA): only a one-step search is performed at each update. A popular solution for multi-step CA is to use multi-step Bellman operators. Unfortunately, in the control settings, existing methods typically suffer from the large variance of multi-step off-policy corrections or are biased, preventing convergence. To overcome these problems, we introduce a novel multi-step Bellman optimality equation with adaptive lookahead steps. We first derive a new multi-step Value Iteration (VI) method that converges to the optimal Value Function (VF) with an exponential contraction rate but linear computational complexity. Given some trial, our so-called Highway RL performs rapid CA, by picking a policy and a possible lookahead (up to the trial end) that maximize the near-term reward during lookahead plus a DP-based estimate of the cumulative reward for the remaining part of the trial. Highway RL does not require off-policy corrections. Under mild assumptions, it achieves better convergence rates than the traditional one-step Bellman Optimality Operator. We then derive Highway Q-Learning, a convergent multi-step off-policy variant of Q-learning. We show that our Highway algorithms significantly outperform DP approaches on toy tasks. Finally, we propose a deep function approximation variant called Highway DQN. We evaluate it on visual MinAtar Games, outperforming similar multi-step methods.", "keywords": "reinforcement learning;off-policy learning;credit assignment;Bellman Equation", "primary_area": "", "supplementary_material": "", "author": "Yuhui Wang;Haozhe Liu;Miroslav Strupl;Francesco Faccio;Qingyuan Wu;Xiaoyang Tan;J\u00fcrgen Schmidhuber", "authorids": "~Yuhui_Wang1;~Haozhe_Liu1;~Miroslav_Strupl1;~Francesco_Faccio1;~Qingyuan_Wu1;~Xiaoyang_Tan2;~J\u00fcrgen_Schmidhuber1", "gender": "M;M;M;M;M;M;M", "homepage": "https://wangyuhuix.github.io/;https://haozheliu-st.github.io/;;;;http://parnec.nuaa.edu.cn/xtan;http://people.idsia.ch/~juergen/", "dblp": ";201/5238;298/0579;227/3214;;79/768;s/JurgenSchmidhuber", "google_scholar": "https://scholar.google.com.tw/citations?hl=zh-CN;QX51P54AAAAJ;;0z3DkrkAAAAJ;CYfMzb8AAAAJ;rHMtSOYAAAAJ;https://scholar.google.ch/citations?user=gLnCTgIAAAAJ", "orcid": ";;;;;;", "linkedin": ";;miroslav-%C5%A1trupl-4b3643173/;;;;", "or_profile": "~Yuhui_Wang1;~Haozhe_Liu1;~Miroslav_Strupl1;~Francesco_Faccio1;~Qingyuan_Wu1;~Xiaoyang_Tan2;~J\u00fcrgen_Schmidhuber1", "aff": "King Abdullah University of Science and Technology;King Abdullah University of Science and Technology;IDSIA USI-SUPSI Istituto Dalle Molle di studi sull'intelligenza artificiale;The Swiss AI Lab IDSIA - USI - SUPSI;University of Liverpool;Nanjing University of Aeronautics and Astronautics;IDSIA", "aff_domain": "kaust.edu.sa;kaust.edu.sa;idsia.ch;idsia.ch;liverpool.ac.uk;nuaa.edu.cn;idsia.ch", "position": "Postdoc;PhD student;Postdoc;PhD student;PhD student;Full Professor;Scientific Director", "bibtex": "@misc{\nwang2023highway,\ntitle={Highway Reinforcement Learning},\nauthor={Yuhui Wang and Haozhe Liu and Miroslav Strupl and Francesco Faccio and Qingyuan Wu and Xiaoyang Tan and J{\\\"u}rgen Schmidhuber},\nyear={2023},\nurl={https://openreview.net/forum?id=NFcRC4aYSWf}\n}", "github": "", "project": "", "reviewers": "NWV2;ftfH;dbJn;7nDd", "site": "https://openreview.net/forum?id=NFcRC4aYSWf", "pdf_size": 2151059, "recommendation": "5;5;6;6", "confidence": "4;4;4;3", "correctness": "3;2;4;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "1;3;2;3", "wc_summary_paper": "88;75;130;53", "wc_strength_and_weaknesses": "578;185;318;142", "wc_clarity_quality_novelty_and_reproducibility": "22;41;38;26", "wc_summary_review": "95;33;42;33", "wc_review": "783;334;528;254", "wc_reply_reviewers": "0;104;0;0", "wc_reply_authors": "1420;1299;471;318", "reply_reviewers": "0;1;0;0", "reply_authors": "2;3;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 86.5, 28.0579756931964 ], "wc_strength_and_weaknesses_avg": [ 305.75, 170.04760362910147 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.75, 7.949056547792323 ], "wc_summary_review_avg": [ 50.75, 25.810608284191986 ], "wc_review_avg": [ 474.75, 203.95756298799023 ], "wc_reply_reviewers_avg": [ 26.0, 45.033320996790806 ], "wc_reply_authors_avg": [ 877.0, 487.4038366693475 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.7071067811865475, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17380631037900460355&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;2;3;4;5", "aff_unique_norm": "King Abdullah University of Science and Technology;Istituto dalle Molle di Studi sull'Intelligenza Artificiale;Swiss AI Lab IDSIA;University of Liverpool;Nanjing University of Aeronautics and Astronautics;Institute of Digital Technologies", "aff_unique_dep": ";;AI Lab;;;", "aff_unique_url": "https://www.kast.kau.edu.sa;https://www.idsia.ch/;https://www.idsia.ch/;https://www.liverpool.ac.uk;http://www.nuaa.edu.cn;https://www.idsia.ch", "aff_unique_abbr": "KAUST;IDSIA;IDSIA;Liv Uni;NUAA;IDSIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;2;3;1", "aff_country_unique": "Saudi Arabia;Switzerland;United Kingdom;China" }, { "id": "NFzHAognkpQ", "title": "Steerable Equivariant Representation Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Pre-trained deep image representations are useful for post-training tasks such as classification through transfer learning, image retrieval, and object detection. Data augmentations are a crucial aspect of pre-training robust representations in both supervised and self-supervised settings. Data augmentations explicitly or implicitly promote \\emph{invariance} in the embedding space to the input image transformations. This invariance reduces generalization to those downstream tasks which rely on sensitivity to these particular data augmentations. In this paper, we propose a method of learning representations that are instead \\emph{equivariant} to data augmentations. We achieve this equivariance through the use of \\emph{steerable} representations. Our representations can be manipulated directly in embedding space via learned linear maps. We demonstrate that our resulting steerable and equivariant representations lead to better performance on transfer learning and robustness: e.g. we improve linear probe top-1 accuracy by between 1\\% to 3\\% for transfer; and ImageNet-C accuracy by upto 3.4\\%. We further show that the steerability of our representations provides significant speedup (nearly $50\\times$) for test-time augmentations; by applying a large number of augmentations for out-of-distribution detection, we significantly improve OOD AUC on the ImageNet-C dataset over an invariant representation.", "keywords": "representation;visual;equivariance;equivariant", "primary_area": "", "supplementary_material": "/attachment/ecaae3a79b0fbe3ff1eaf58a88ad7490e7249781.zip", "author": "Sangnie Bhardwaj;Willie McClinton;Tongzhou Wang;Guillaume Lajoie;Chen Sun;Phillip Isola;Dilip Krishnan", "authorids": "~Sangnie_Bhardwaj1;~Willie_McClinton1;~Tongzhou_Wang1;~Guillaume_Lajoie1;~Chen_Sun1;~Phillip_Isola1;~Dilip_Krishnan1", "gender": "F;;M;M;M;M;M", "homepage": "https://sangnie.github.io/;https://wmcclinton.github.io/;https://www.tongzhouwang.info/;https://dms.umontreal.ca/~lajoie/;https://chensun.me;http://web.mit.edu/phillipi/;http://dilipkay.wordpress.com", "dblp": ";;201/8645;31/10384;01/6072-2;36/9988;08/2316", "google_scholar": "0IWgVz4AAAAJ;nwefjOEAAAAJ;14HASnUAAAAJ;;vQa7heEAAAAJ;ROILf3EAAAAJ;_MEuWIMAAAAJ", "orcid": ";;;;;0000-0002-1411-6704;", "linkedin": ";;;;;phillip-isola-a9955b20/;", "or_profile": "~Sangnie_Bhardwaj1;~Willie_McClinton1;~Tongzhou_Wang1;~Guillaume_Lajoie1;~Chen_Sun1;~Phillip_Isola1;~Dilip_Krishnan1", "aff": "Google;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Mila - Quebec Artificial Intelligence Institute;Google;Massachusetts Institute of Technology;Google", "aff_domain": "google.com;mit.edu;mit.edu;mila.quebec;google.com;mit.edu;google.com", "position": "Researcher;PhD student;PhD student;Associate Professor;Research Scientist;Associate Professor;Research Scientist", "bibtex": "@misc{\nbhardwaj2023steerable,\ntitle={Steerable Equivariant Representation Learning},\nauthor={Sangnie Bhardwaj and Willie McClinton and Tongzhou Wang and Guillaume Lajoie and Chen Sun and Phillip Isola and Dilip Krishnan},\nyear={2023},\nurl={https://openreview.net/forum?id=NFzHAognkpQ}\n}", "github": "", "project": "", "reviewers": "nTJZ;EeNj;D95j;k3c6", "site": "https://openreview.net/forum?id=NFzHAognkpQ", "pdf_size": 16051809, "recommendation": "3;5;5;6", "confidence": "3;3;4;4", "correctness": "2;3;3;2", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "50;59;43;129", "wc_strength_and_weaknesses": "358;250;193;387", "wc_clarity_quality_novelty_and_reproducibility": "19;36;24;55", "wc_summary_review": "31;181;33;41", "wc_review": "458;526;293;612", "wc_reply_reviewers": "32;0;79;28", "wc_reply_authors": "636;280;392;609", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 70.25, 34.390223901568305 ], "wc_strength_and_weaknesses_avg": [ 297.0, 78.81306998207847 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.5, 13.865424623862047 ], "wc_summary_review_avg": [ 71.5, 63.330482391972986 ], "wc_review_avg": [ 472.25, 116.99652772625349 ], "wc_reply_reviewers_avg": [ 34.75, 28.367014294775544 ], "wc_reply_authors_avg": [ 479.25, 148.92846437132158 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.6882472016116854, "corr_recommendation_correctness": 0.2294157338705618, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11423115877223606222&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1;2;0;1;0", "aff_unique_norm": "Google;Massachusetts Institute of Technology;Quebec Artificial Intelligence Institute", "aff_unique_dep": "Google;;Artificial Intelligence", "aff_unique_url": "https://www.google.com;https://web.mit.edu;https://mila.quebec", "aff_unique_abbr": "Google;MIT;Mila", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "United States;Canada" }, { "id": "NGIFt6BNvLe", "title": "Double Wins: Boosting Accuracy and Efficiency of Graph Neural Networks by Reliable Knowledge Distillation", "track": "main", "status": "Reject", "tldr": "", "abstract": "The recent breakthrough achieved by graph neural networks (GNNs) with few labeled data accelerates the pace of deploying GNNs on real-world applications. While several efforts have been made to scale GNNs training for large-scale graphs, GNNs still suffer from the scalability challenge of model inference, due to the graph dependency issue incurred by the message passing mechanism, therefore hindering its deployment in resource-constrained applications. A recent study~\\citep{zhang2021graph} revealed that GNNs can be compressed to inference-friendly multi-layer perceptrons (MLPs), by training MLPs using the soft labels of labeled and unlabeled nodes from the teacher. However, blindly leveraging the soft labels of all unlabeled nodes may be suboptimal, since the teacher model would inevitably make wrong predictions. This intriguing observation motivates us to ask: \\textit{Is it possible to train a stronger MLP student by making better use of the unlabeled data?} \n\nThis paper studies cross-model knowledge distillation - from GNN teacher to MLP student in a semi-supervised setting, showing their strong promise in achieving a ``sweet point'' in co-optimizing model accuracy and efficiency. Our proposed solution, dubbed \\textit{Reliable Knowledge Distillation for MLP optimization} (\\textbf{RKD-MLP}), is the first noise-aware knowledge distillation framework for GNNs distillation. Its core idea is to use a meta-policy to filter out those unreliable soft labels. To train the meta-policy, we design a reward-driven objective based on a meta-set and adopt policy gradient to optimize the expected reward. Then we apply the meta-policy to the unlabeled nodes and select the most reliable soft labels for distillation. Extensive experiments across various GNN backbones, on 7 small graphs and 2 large-scale datasets from the challenging Open Graph Benchmark, demonstrate the superiority of our proposal. Moreover, our RKD-MLP model shows good robustness w.r.t. graph topology and node feature noises. The code is available at \\url{https://anonymous.4open.science/r/RKD-MLP-F2A6/}.", "keywords": "Graph Neural Networks;Reliable Knowledge Distillation;Model Inference Acceleration", "primary_area": "", "supplementary_material": "", "author": "Qiaoyu Tan;Daochen Zha;Soo-Hyun Choi;Li Li;Rui Chen;Xia Hu", "authorids": "~Qiaoyu_Tan2;~Daochen_Zha1;~Soo-Hyun_Choi1;~Li_Li11;~Rui_Chen4;~Xia_Hu4", "gender": "M;;M;M;;", "homepage": "https://qiaoyu-tan.github.io/;http://dczha.com/;;;;", "dblp": "197/5465.html;167/0903;185/1826;53/2189-35;;", "google_scholar": "V9bOnV4AAAAJ;jK0NgMcAAAAJ;Lm1BDEoAAAAJ;FPcI7HkAAAAJ;;", "orcid": "0000-0001-8999-968X;0000-0002-6677-7504;0000-0001-5768-9978;0000-0002-3365-8904;;", "linkedin": ";daochen-zha;soo-hyun-choi-706b5297/;li-li-b8a08664/;;", "or_profile": "~Qiaoyu_Tan2;~Daochen_Zha1;~Soo-Hyun_Choi1;~Li_Li11;~Rui_Chen4;~Xia_Hu4", "aff": "Texas A&M;Rice University;Samsung Electronics America;Samsung;;", "aff_domain": "tamu.edu;rice.edu;samsung.com;samsung.com;;", "position": "PhD student;PhD student;Principal Researcher;Researcher;;", "bibtex": "@misc{\ntan2023double,\ntitle={Double Wins: Boosting Accuracy and Efficiency of Graph Neural Networks by Reliable Knowledge Distillation},\nauthor={Qiaoyu Tan and Daochen Zha and Soo-Hyun Choi and Li Li and Rui Chen and Xia Hu},\nyear={2023},\nurl={https://openreview.net/forum?id=NGIFt6BNvLe}\n}", "github": "", "project": "", "reviewers": "aYE5;ZL6r;GWUx;asz6", "site": "https://openreview.net/forum?id=NGIFt6BNvLe", "pdf_size": 789086, "recommendation": "3;3;6;6", "confidence": "4;5;4;4", "correctness": "3;2;3;3", "technical_novelty": "2;1;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "86;38;171;57", "wc_strength_and_weaknesses": "197;760;70;171", "wc_clarity_quality_novelty_and_reproducibility": "152;17;15;54", "wc_summary_review": "20;65;151;38", "wc_review": "455;880;407;320", "wc_reply_reviewers": "0;0;0;9", "wc_reply_authors": "1039;2180;348;1130", "reply_reviewers": "0;0;0;1", "reply_authors": "2;4;1;2", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 88.0, 50.87730338766 ], "wc_strength_and_weaknesses_avg": [ 299.5, 270.0689726717973 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.5, 55.616993805850385 ], "wc_summary_review_avg": [ 68.5, 50.25186563700894 ], "wc_review_avg": [ 515.5, 215.93575433447793 ], "wc_reply_reviewers_avg": [ 2.25, 3.897114317029974 ], "wc_reply_authors_avg": [ 1174.25, 654.6893824555275 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5773502691896258, "corr_recommendation_correctness": 0.5773502691896258, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12281433538237410503&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Texas A&M University;Rice University;Samsung", "aff_unique_dep": ";;Samsung Electronics America", "aff_unique_url": "https://www.tamu.edu;https://www.rice.edu;https://www.samsung.com/us/", "aff_unique_abbr": "TAMU;Rice;SEA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;South Korea" }, { "id": "NGMAKE75_N7", "title": "Solving and Learning non-Markovian Stochastic Control problems in continuous-time with Neural RDEs", "track": "main", "status": "Reject", "tldr": "We propose a novel framework for solving non-Markovian stochastic control problems in continuous-time using Neural RDEs", "abstract": "We propose a novel framework for solving continuous-time, non-Markovian stochastic control problems with the use of neural rough differential equations (Neural RDEs). By parameterising the control process as the solution of a Neural RDE driven by the state process, we show that the control-state joint dynamics are governed by an uncontrolled RDE with structured vector fields, allowing for efficient trajectories simulation, Monte-Carlo estimation of the value function and backpropagation. To deal with input paths of infinite 1-variation, we refine the existing universal approximation result to a probabilistic density result for Neural RDEs driven by random rough paths. Experiments on various non-Markovian problems indicate how the proposed framework is time-resolution-invariant and capable of learning optimal solutions with higher accuracy than traditional RNN-based approaches. Finally, we discuss possible extensions of this framework to the setting of non-Markovian continuous-time reinforcement learning and provide promising empirical evidence in this direction.", "keywords": "stochastic control;neural RDEs;rough paths;reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Melker H\u00f6glund;Emilio Ferrucci;Camilo Hern\u00e1ndez;Aitor Muguruza Gonzalez;Cristopher Salvi;Leandro S\u00e1nchez-Betancourt;Yufei Zhang", "authorids": "~Melker_H\u00f6glund1;~Emilio_Ferrucci1;~Camilo_Hern\u00e1ndez1;~Aitor_Muguruza_Gonzalez1;~Cristopher_Salvi1;~Leandro_S\u00e1nchez-Betancourt1;~Yufei_Zhang2", "gender": "M;Not Specified;;M;M;M;", "homepage": ";https://people.maths.ox.ac.uk/rossiferrucc/;https://sites.google.com/view/camilohernandez/home;;https://www.maths.ox.ac.uk/people/cristopher.salvi;https://leandro-sbetancourt.github.io;https://yufei-zhang.github.io/", "dblp": ";;;;;;", "google_scholar": ";;;;FVxJ4iIAAAAJ;https://scholar.google.co.uk/citations?user=RLQF_UMAAAAJ;", "orcid": ";;;0000-0002-1289-7076;;0000-0001-6447-7105;", "linkedin": "melker-h%C3%B6glund;emilioferrucci/;;amuguruza/;cristopher-salvi/;leandro-sanchez-betancourt/;", "or_profile": "~Melker_H\u00f6glund1;~Emilio_Ferrucci1;~Camilo_Hern\u00e1ndez1;~Aitor_Muguruza_Gonzalez1;~Cristopher_Salvi1;~Leandro_S\u00e1nchez-Betancourt1;~Yufei_Zhang2", "aff": ";University of Oxford;Princeton University;Imperial College London;Imperial College London;King's College London, University of London;", "aff_domain": ";ox.ac.uk;princeton.edu;ic.ac.uk;ic.ac.uk;kcl.ac.uk;", "position": ";Postdoc;Postdoc;Lecturer;Assistant Professor;Lecturer;", "bibtex": "@misc{\nh{\\\"o}glund2023solving,\ntitle={Solving and Learning non-Markovian Stochastic Control problems in continuous-time with Neural {RDE}s},\nauthor={Melker H{\\\"o}glund and Emilio Ferrucci and Camilo Hern{\\'a}ndez and Aitor Muguruza Gonzalez and Cristopher Salvi and Leandro S{\\'a}nchez-Betancourt and Yufei Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=NGMAKE75_N7}\n}", "github": "", "project": "", "reviewers": "8dWF;kQGa;v9Ub", "site": "https://openreview.net/forum?id=NGMAKE75_N7", "pdf_size": 481077, "recommendation": "5;5;5", "confidence": "4;3;2", "correctness": "3;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;2;2", "wc_summary_paper": "139;63;34", "wc_strength_and_weaknesses": "159;267;213", "wc_clarity_quality_novelty_and_reproducibility": "16;46;89", "wc_summary_review": "77;32;75", "wc_review": "391;408;411", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "210;814;578", "reply_reviewers": "0;0;0", "reply_authors": "1;2;1", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 78.66666666666667, 44.274396915398206 ], "wc_strength_and_weaknesses_avg": [ 213.0, 44.090815370097204 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.333333333333336, 29.95923155816176 ], "wc_summary_review_avg": [ 61.333333333333336, 20.75786330258702 ], "wc_review_avg": [ 403.3333333333333, 8.806563209081936 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 534.0, 248.53705290492738 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PBPyy6jcsewJ:scholar.google.com/&scioq=Solving+and+Learning+non-Markovian+Stochastic+Control+problems+in+continuous-time+with+Neural+RDEs&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;2;3", "aff_unique_norm": "University of Oxford;Princeton University;Imperial College London;King's College London", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ox.ac.uk;https://www.princeton.edu;https://www.imperial.ac.uk;https://www.kcl.ac.uk", "aff_unique_abbr": "Oxford;Princeton;ICL;KCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United Kingdom;United States" }, { "id": "NGv_ui-1wz", "title": "Fair Graph Message Passing with Transparency", "track": "main", "status": "Reject", "tldr": "We aim to achieve fair message passsing with transparency to explictly use sensitive attributes in forward progagation instead of backward propagation..", "abstract": "Recent advanced works achieve fair representations and predictions through regularization, adversarial debiasing, and contrastive learning in graph neural networks (GNNs). These methods \\textit{implicitly} encode the sensitive attribute information in the well-trained model weight via \\textit{backward propagation}. In practice, we not only pursue a fair machine learning model but also lend such fairness perception to the public. For current fairness methods,\nhow the sensitive attribute information usage makes the model achieve fair prediction still remains a black box. In this work, we first propose the concept \\textit{transparency} to describe \\textit{whether} the model embraces the ability of lending fairness perception to the public \\textit{or not}. Motivated by the fact that current fairness models lack of transparency, we aim to pursue a fair machine learning model with transparency via \\textit{explicitly} rendering sensitive attribute usage for fair prediction in \\textit{forward propagation} . Specifically, we develop an effective and transparent \\textsf{F}air \\textsf{M}essage \\textsf{P}assing (FMP) scheme adopting sensitive attribute information in forward propagation. In this way, FMP explicitly uncovers how sensitive attributes influence final prediction. Additionally, FMP scheme can aggregate useful information from neighbors and mitigate bias in a unified framework to simultaneously achieve graph smoothness and fairness objectives. An acceleration approach is also adopted to improve the efficiency of FMP. Experiments on node classification tasks demonstrate that the proposed FMP outperforms the state-of-the-art baselines in terms of fairness and accuracy on three real-world datasets. The code is available in {\\color{blue}\\url{https://anonymous.4open.science/r/FMP-AD84}}.", "keywords": "Fairness;Transparency;Graph Neural Networks", "primary_area": "", "supplementary_material": "", "author": "Zhimeng Jiang;Xiaotian Han;Chao Fan;Zirui Liu;Na Zou;Ali Mostafavi;Xia Hu", "authorids": "~Zhimeng_Jiang1;~Xiaotian_Han1;~Chao_Fan2;~Zirui_Liu1;~Na_Zou2;~Ali_Mostafavi2;~Xia_Hu4", "gender": "M;M;;M;F;M;M", "homepage": "http://www.zhimengjiang.com/;https://ahxt.github.io/;https://fanchaolab.com;https://zirui-ray-liu.github.io/;https://nzou1.github.io/;;https://cs.rice.edu/~xh37/index.html", "dblp": "217/3235;;;196/8629-1.html;152/0090-1.html;;256/9406.html", "google_scholar": "5Es3Yk4AAAAJ;Uromx98AAAAJ;3k_B_zUAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;DFNvQPYAAAAJ;https://scholar.google.com.tw/citations?user=pcCS60IAAAAJ", "orcid": "0000-0001-6933-3952;;;;0000-0003-1984-795X;;", "linkedin": ";;;;na-zou-a1721535/;;", "or_profile": "~Zhimeng_Jiang1;~Xiaotian_Han1;~Chao_Fan2;~Zirui_Liu1;~Na_Zou2;~Ali_Mostafavi2;~Xia_Hu2", "aff": "Texas A&M University;Texas A&M University;Clemson University;Rice University;Texas A&M University - College Station;Texas A&M;Rice University", "aff_domain": "tamu.edu;tamu.edu;clemson.edu;rice.edu;tamu.edu;tamu.edu;rice.edu", "position": "PhD student;PhD student;Assistant Professor;PhD student;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@misc{\njiang2023fair,\ntitle={Fair Graph Message Passing with Transparency},\nauthor={Zhimeng Jiang and Xiaotian Han and Chao Fan and Zirui Liu and Na Zou and Ali Mostafavi and Xia Hu},\nyear={2023},\nurl={https://openreview.net/forum?id=NGv_ui-1wz}\n}", "github": "", "project": "", "reviewers": "QgmT;RsB6;Sogu;hAa7", "site": "https://openreview.net/forum?id=NGv_ui-1wz", "pdf_size": 1411370, "recommendation": "3;5;5;6", "confidence": "3;4;3;4", "correctness": "3;4;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "73;66;132;245", "wc_strength_and_weaknesses": "435;162;1058;197", "wc_clarity_quality_novelty_and_reproducibility": "32;61;67;58", "wc_summary_review": "36;14;48;20", "wc_review": "576;303;1305;520", "wc_reply_reviewers": "0;0;653;0", "wc_reply_authors": "896;688;2876;275", "reply_reviewers": "0;0;2;0", "reply_authors": "3;2;7;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 129.0, 71.71122645722913 ], "wc_strength_and_weaknesses_avg": [ 463.0, 359.2234680529656 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.5, 13.388427838995884 ], "wc_summary_review_avg": [ 29.5, 13.369741957120938 ], "wc_review_avg": [ 676.0, 377.1955726145258 ], "wc_reply_reviewers_avg": [ 163.25, 282.75729433561924 ], "wc_reply_authors_avg": [ 1183.75, 1002.2605387323198 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.25, 2.277608394786075 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.6882472016116854, "corr_recommendation_correctness": 0.13245323570650439, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9759174719249598734&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;0;0;2", "aff_unique_norm": "Texas A&M University;Clemson University;Rice University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tamu.edu;https://www.clemson.edu;https://www.rice.edu", "aff_unique_abbr": "TAMU;Clemson;Rice", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "NHfSJAWhKTw", "title": "A Closer Look at Self-supervised Lightweight Vision Transformers", "track": "main", "status": "Reject", "tldr": "", "abstract": "Self-supervised learning on large-scale Vision Transformers (ViTs) as pre-training methods has achieved promising downstream performance. Yet, how much these pre-training paradigms promote lightweight ViTs' performance is considerably less studied. In this work, we mainly develop and benchmark self-supervised pre-training methods, e.g., contrastive-learning-based MoCo-v3, masked-image-modeling-based MAE on image classification tasks, and some downstream dense prediction tasks. We surprisingly find that if proper pre-training is adopted, even vanilla lightweight ViTs show comparable performance on ImageNet to previous SOTA networks with delicate architecture design. We also point out some defects of such pre-training, \\eg, failing to benefit from large-scale pre-training data and showing inferior performance on data-insufficient downstream tasks. Furthermore, we analyze and clearly show the effect of such pre-training by analyzing the properties of the layer representation and attention maps for related models. Finally, based on the above analyses, a distillation strategy during pre-training is developed, which leads to further downstream performance improvement for MAE-based pre-training.", "keywords": "Self-supervised Learning;Vision Transformers;Lightweight Networks", "primary_area": "", "supplementary_material": "/attachment/17de5180bd96cdf218c90a9861708440cb303e89.zip", "author": "Shaoru Wang;Jin Gao;Zeming Li;Weiming Hu", "authorids": "~Shaoru_Wang1;~Jin_Gao1;~Zeming_Li2;~Weiming_Hu1", "gender": "M;M;;M", "homepage": ";https://people.ucas.edu.cn/~jgao?language=en;;http://weiminghu.people-ai.net/", "dblp": "255/5225;;;", "google_scholar": "Vl6LhukAAAAJ;W1o3B-0AAAAJ;;", "orcid": ";;;0000-0001-9237-8825", "linkedin": ";;;", "or_profile": "~Shaoru_Wang1;~Jin_Gao1;~Zeming_Li2;~Weiming_Hu1", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese Academy of Sciences;;Institute of automation, Chinese academy of science", "aff_domain": "ia.ac.cn;ia.ac.cn;;nlpr.ia.ac.cn", "position": "PhD student;Associate Professor;;Full Professor", "bibtex": "@misc{\nwang2023a,\ntitle={A Closer Look at Self-supervised Lightweight Vision Transformers},\nauthor={Shaoru Wang and Jin Gao and Zeming Li and Weiming Hu},\nyear={2023},\nurl={https://openreview.net/forum?id=NHfSJAWhKTw}\n}", "github": "", "project": "", "reviewers": "PEwP;UM5C;nNkX", "site": "https://openreview.net/forum?id=NHfSJAWhKTw", "pdf_size": 1274963, "recommendation": "3;5;6", "confidence": "5;4;4", "correctness": "2;3;4", "technical_novelty": "1;2;2", "empirical_novelty": "1;3;2", "wc_summary_paper": "74;114;106", "wc_strength_and_weaknesses": "337;232;113", "wc_clarity_quality_novelty_and_reproducibility": "28;32;190", "wc_summary_review": "42;61;86", "wc_review": "481;439;495", "wc_reply_reviewers": "0;50;56", "wc_reply_authors": "629;402;352", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 98.0, 17.281975195754296 ], "wc_strength_and_weaknesses_avg": [ 227.33333333333334, 91.50713390526202 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.33333333333333, 75.44239892497822 ], "wc_summary_review_avg": [ 63.0, 18.01850900231944 ], "wc_review_avg": [ 471.6666666666667, 23.79542439676633 ], "wc_reply_reviewers_avg": [ 35.333333333333336, 25.104227178350307 ], "wc_reply_authors_avg": [ 461.0, 120.53491886862771 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.944911182523068, "corr_recommendation_correctness": 0.9819805060619659, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5304154925904695168&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Preserving Pre-trained Features Helps Calibrate Fine-tuned Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10734", "id": "NI7StoWHJPT", "poster": "", "openreview": "https://openreview.net/forum?id=NI7StoWHJPT", "slides": "https://iclr.cc/virtual/2023/poster/10734", "video": "https://iclr.cc/virtual/2023/poster/10734", "author_site": "Guande He, Jianfei Chen, Jun Zhu", "tldr": "", "abstract": "Large pre-trained language models (PLMs) have demonstrated strong performance on natural language understanding (NLU) tasks through fine-tuning. However, fine-tuned models still suffer from overconfident predictions, especially in out-of-domain settings. In this paper, we tackle the problem of calibrating fine-tuned language models. We demonstrate that the PLMs are well-calibrated on the masked language modeling task with robust predictive confidence under domain shift, yet the fine-tuned models fail to retain such property due to catastrophic forgetting, which impacts the calibration on the downstream classification task. In light of these observations, we evaluate the calibration of several methods that preserve pre-trained features and show that preserving pre-trained features can improve the calibration of fine-tuned language models. Among these methods, our proposed method that encourages the fine-tuned model to learn generative representations with auxiliary language modeling objective achieves competitive accuracy and the lowest expected calibration error compared to several strong baselines under both in-domain and out-of-domain settings on three downstream NLU tasks.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/5ebce949850200fff3de07db46dd9d685fb49593.zip", "author": "Guande He;Jianfei Chen;Jun Zhu", "authorids": "~Guande_He1;~Jianfei_Chen1;~Jun_Zhu2", "gender": ";M;M", "homepage": "https://guandehe.github.io/;http://ml.cs.tsinghua.edu.cn/~jianfei;http://ml.cs.tsinghua.edu.cn/~jun", "dblp": "348/7035.html;48/6809-1;50/2644-1", "google_scholar": "3rddMeMAAAAJ;di5RZ1MAAAAJ;axsP38wAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Guande_He1;~Jianfei_Chen1;~Jun_Zhu2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn", "position": "MS student;Assistant Professor;Professor", "bibtex": "@inproceedings{\nhe2023preserving,\ntitle={Preserving Pre-trained Features Helps Calibrate Fine-tuned Language Models},\nauthor={Guande He and Jianfei Chen and Jun Zhu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=NI7StoWHJPT}\n}", "github": "", "project": "", "reviewers": "eTyr;YFTC;Kwo1", "pdf_size": 528823, "recommendation": "6;6;6", "confidence": "3;4;3", "correctness": "3;4;3", "technical_novelty": "2;3;2", "empirical_novelty": "3;3;2", "wc_summary_paper": "103;67;128", "wc_strength_and_weaknesses": "316;281;349", "wc_clarity_quality_novelty_and_reproducibility": "101;35;65", "wc_summary_review": "36;54;45", "wc_review": "556;437;587", "wc_reply_reviewers": "39;62;56", "wc_reply_authors": "452;1206;1377", "reply_reviewers": "1;1;1", "reply_authors": "2;3;4", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 99.33333333333333, 25.037749277618563 ], "wc_strength_and_weaknesses_avg": [ 315.3333333333333, 27.7648858972784 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 67.0, 26.981475126464083 ], "wc_summary_review_avg": [ 45.0, 7.3484692283495345 ], "wc_review_avg": [ 526.6666666666666, 64.65463805654025 ], "wc_reply_reviewers_avg": [ 52.333333333333336, 9.741092797468305 ], "wc_reply_authors_avg": [ 1011.6666666666666, 401.8543130151633 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14816888668466083555&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=NI7StoWHJPT", "email": "mails.tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "NIzeVwedJzB", "title": "Shuffle Gaussian Mechanism for Differential Privacy", "track": "main", "status": "Withdraw", "tldr": "We give a first non-trivial study of Gaussian mechanism in the shuffle model using R{\\'e}nyi differential privacy (RDP).", "abstract": "We study Gaussian mechanism in the shuffle model of differential privacy (DP).\nWe present the \\textit{first} non-trivial privacy guarantee of the mechanism by showing that its R{\\'e}nyi differential privacy (RDP) is of the form:\n$$\n\\epsilon(\\lambda) =\n% D_{\\lambda}(\\calM(D)||\\calM(D')) =\n\\frac{1}{\\lambda-1}\\log\\left(\\frac{e^{-\\lambda/2\\sigma^2}}{n^\\lambda}\\sum_{\\substack{k_1+\\dotsc+k_n=\\lambda;\\\\k_1,\\dotsc,k_n\\geq 0}}\\binom{\\lambda}{k_1,\\dotsc,k_n}e^{\\sum_{i=1}^nk_i^2/2\\sigma^2}\\right)\n$$\nWe further prove that the RDP is strictly upper-bounded by the Gaussian RDP without shuffling.\nThe shuffle Gaussian RDP is advantageous in composing multiple DP mechanisms, where we demonstrate its improvement over the state-of-the-art approximate DP composition theorems in privacy guarantees of the shuffle model.\nOur formalism also has immediate application in several problems studied in the literature, including learning with stochastic gradient descent and distributed/federated learning, of which an empirical study is presented to demonstrate the efficacy of learning privately while employing the shuffle Gaussian mechanism.", "keywords": "differential privacy;shuffle model;dp-sgd;federated learning", "primary_area": "", "supplementary_material": "/attachment/7d8218cbc0f06eb1743bb038bd55447bf4afc4af.zip", "author": "Seng Pei Liew;Tsubasa Takahashi", "authorids": "~Seng_Pei_Liew1;~Tsubasa_Takahashi1", "gender": "Not Specified;M", "homepage": "https://spliew.github.io/;https://sites.google.com/view/tsubasa-takahashi/", "dblp": "259/7221;85/5862-1", "google_scholar": "KQL8tB8AAAAJ;s-jrZ94AAAAJ", "orcid": "0000-0003-2419-2505;0000-0002-0646-0222", "linkedin": ";", "or_profile": "~Seng_Pei_Liew1;~Tsubasa_Takahashi1", "aff": "LINE Corporation;LINE Corporation", "aff_domain": "linecorp.com;linecorp.com", "position": "Researcher;Senior Researcher", "bibtex": "@misc{\nliew2023shuffle,\ntitle={Shuffle Gaussian Mechanism for Differential Privacy},\nauthor={Seng Pei Liew and Tsubasa Takahashi},\nyear={2023},\nurl={https://openreview.net/forum?id=NIzeVwedJzB}\n}", "github": "", "project": "", "reviewers": "bDod;ZU6q;JY8F", "site": "https://openreview.net/forum?id=NIzeVwedJzB", "pdf_size": 362043, "recommendation": "3;3;6", "confidence": "3;4;3", "correctness": "4;3;4", "technical_novelty": "2;2;4", "empirical_novelty": "2;2;3", "wc_summary_paper": "65;65;202", "wc_strength_and_weaknesses": "240;473;449", "wc_clarity_quality_novelty_and_reproducibility": "31;21;206", "wc_summary_review": "3;44;73", "wc_review": "339;603;930", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 110.66666666666667, 64.58241934837135 ], "wc_strength_and_weaknesses_avg": [ 387.3333333333333, 104.64012402303217 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 86.0, 84.9509662491644 ], "wc_summary_review_avg": [ 40.0, 28.717010057919794 ], "wc_review_avg": [ 624.0, 241.73125573661343 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.5, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6751474124347667142&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "LINE Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.linecorp.com", "aff_unique_abbr": "LINE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Empowering Networks With Scale and Rotation Equivariance Using A Similarity Convolution", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12166", "id": "NJENsJ37sQ", "poster": "", "openreview": "https://openreview.net/forum?id=NJENsJ37sQ", "slides": "https://iclr.cc/virtual/2023/poster/12166", "video": "https://iclr.cc/virtual/2023/poster/12166", "author_site": "ZIKAI SUN, Thierry Blu", "tldr": "", "abstract": "The translational equivariant nature of Convolutional Neural Networks (CNNs) is a reason for its great success in computer vision. However, networks do not enjoy more general equivariance properties such as rotation or scaling, ultimately limiting their generalization performance. To address this limitation, we devise a method that endows CNNs with simultaneous equivariance with respect to translation, rotation, and scaling. Our approach defines a convolution-like operation and ensures equivariance based on our proposed scalable Fourier-Argand representation. The method maintains similar efficiency as a traditional network and hardly introduces any additional learnable parameters, since it does not face the computational issue that often occurs in group-convolution operators. We validate the efficacy of our approach in the image classification task, demonstrating its robustness and the generalization ability to both scaled and rotated inputs.", "keywords": "Representation Learning", "primary_area": "", "supplementary_material": "", "author": "Zikai Sun;Thierry Blu", "authorids": "~Zikai_Sun1;thierry.blu@m4x.org", "gender": "M;", "homepage": "https://zikaisun.github.io;", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Zikai_Sun1;thierry.blu@m4x.org", "aff": "The Chinese University of Hong Kong;", "aff_domain": "cuhk.edu.hk;", "position": "PhD student;", "bibtex": "@inproceedings{\nsun2023empowering,\ntitle={Empowering Networks With Scale and Rotation Equivariance Using A Similarity Convolution},\nauthor={Zikai Sun and Thierry Blu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=NJENsJ37sQ}\n}", "github": "", "project": "", "reviewers": "MrDN;RpmD;TG34;Ke2s", "pdf_size": 4281635, "recommendation": "6;8;8;8", "confidence": "4;4;4;4", "correctness": "3;4;4;3", "technical_novelty": "3;3;4;4", "empirical_novelty": "3;1;3;4", "wc_summary_paper": "62;52;84;97", "wc_strength_and_weaknesses": "839;345;234;255", "wc_clarity_quality_novelty_and_reproducibility": "79;48;178;39", "wc_summary_review": "28;22;79;83", "wc_review": "1008;467;575;474", "wc_reply_reviewers": "0;0;17;0", "wc_reply_authors": "1548;657;541;923", "reply_reviewers": "0;0;1;0", "reply_authors": "2;1;1;2", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 73.75, 17.725334975678173 ], "wc_strength_and_weaknesses_avg": [ 418.25, 246.47248832273348 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 86.0, 55.14979601050216 ], "wc_summary_review_avg": [ 53.0, 28.115831839019098 ], "wc_review_avg": [ 631.0, 221.8163654918185 ], "wc_reply_reviewers_avg": [ 4.25, 7.361215932167728 ], "wc_reply_authors_avg": [ 917.25, 389.60645207696444 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16999679930155410665&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "pdf": "https://openreview.net/pdf?id=NJENsJ37sQ", "email": "cuhk.edu.hk;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "NM1Lt3ZBhal", "title": "Pseudo-Edge: Semi-Supervised Link Prediction with Graph Neural Networks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Pseudo-labeling is one of the powerful Semi-Supervised Learning (SSL) approaches, which generates confident pseudo-labels of unlabeled data and leverages them for training. Recently, pseudo-labeling has been further extended to Graph Neural networks (GNNs) to address the data sparsity problem due to the nature of graph-structured data. Despite their success in the graph domain, they have been mainly designed for node-level tasks by utilizing node-level algorithms (e.g., Label Propagation) for pseudo-labeling, which can not be directly applied to the link prediction task. Besides, existing works for link prediction only use given edges as positively-labeled data, and there have been no attempts to leverage non-visible edges for training a model in a semi-supervised manner. To address these limitations, we revisit the link prediction task in a semi-supervised fashion and propose a novel pseudo-labeling framework, Pseudo-Edge, that generates qualified pseudo-labels in consideration of graph structures and harnesses them for link prediction. Specifically, our framework constructs distance-based potential edge candidates and carefully selects pseudo-labels through our relation-aware pseudo-labels generation, which reflects the comparative superiority of each unlabeled edge over its local neighborhoods in graphs. Also, we propose uncertainty-aware pseudo-labels generation that can effectively filter out over-confident samples when the model overfits to specific graph structures. Extensive experiments show that our method achieved remarkable performance across five link prediction benchmark datasets and GNN architectures, compared to state-of-the-art GNN-based semi/self-supervised models.", "keywords": "Graph Neural Networks;Link Prediction;Pseudo-labeling;Semi-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Hyunjin Seo;Seongjun Yun;Buru Chang;Jaewoo Kang", "authorids": "~Hyunjin_Seo2;~Seongjun_Yun1;~Buru_Chang1;~Jaewoo_Kang1", "gender": "F;M;Not Specified;M", "homepage": "https://github.com/hyunjin72;https://www.linkedin.com/in/seongjun-yun-01475919b;https://sites.google.com/view/buru-chang;https://dmis.korea.ac.kr", "dblp": ";72/6305;221/3390;k/JaewooKang", "google_scholar": "MFDOhRUAAAAJ;8-MZ2RwAAAAJ;https://scholar.google.co.kr/citations?hl=ko;https://scholar.google.co.kr/citations?user=RaBZafQAAAAJ", "orcid": ";;0000-0002-7595-9035;0000-0001-6798-9106", "linkedin": "hyunjin-seo-97525629a/?originalSubdomain=kr;;;", "or_profile": "~Hyunjin_Seo2;~Seongjun_Yun1;~Buru_Chang1;~Jaewoo_Kang1", "aff": "Korea Advanced Institute of Science & Technology;Amazon;Hyperconnect;Korea University", "aff_domain": "kaist.ac.kr;amazon.com;hpcnt.com;korea.ac.kr", "position": "MS student;Researcher;Research Scientist;Full Professor", "bibtex": "@misc{\nseo2023pseudoedge,\ntitle={Pseudo-Edge: Semi-Supervised Link Prediction with Graph Neural Networks},\nauthor={Hyunjin Seo and Seongjun Yun and Buru Chang and Jaewoo Kang},\nyear={2023},\nurl={https://openreview.net/forum?id=NM1Lt3ZBhal}\n}", "github": "", "project": "", "reviewers": "bvW5;wH66;Bjsu;qiTa", "site": "https://openreview.net/forum?id=NM1Lt3ZBhal", "pdf_size": 1897638, "recommendation": "3;3;3;5", "confidence": "3;2;5;4", "correctness": "2;3;2;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "1;3;2;2", "wc_summary_paper": "44;24;64;93", "wc_strength_and_weaknesses": "247;174;274;288", "wc_clarity_quality_novelty_and_reproducibility": "5;16;16;46", "wc_summary_review": "42;11;35;49", "wc_review": "338;225;389;476", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 56.25, 25.498774480354932 ], "wc_strength_and_weaknesses_avg": [ 245.75, 43.968028156832325 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 20.75, 15.2540978100968 ], "wc_summary_review_avg": [ 34.25, 14.306903927824496 ], "wc_review_avg": [ 357.0, 90.78821509425109 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.2581988897471611, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13336910123908060238&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Amazon;Hyperconnect;Korea University", "aff_unique_dep": ";Amazon.com, Inc.;;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.amazon.com;;https://www.korea.ac.kr", "aff_unique_abbr": "KAIST;Amazon;;KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "South Korea;United States;" }, { "id": "NMoeVEwekzC", "title": "On Convergence of Average-Reward Off-Policy Control Algorithms in Weakly-Communicating MDPs", "track": "main", "status": "Reject", "tldr": "Showing average-reward off-policy control algorithms converge in weakly-communicating MDPs", "abstract": "We show two average-reward off-policy control algorithms, Differential Q Learning (Wan, Naik, \\& Sutton 2021a) and RVI Q Learning (Abounadi Bertsekas \\& Borkar 2001), converge in weakly-communicating MDPs. Weakly-communicating MDPs are the most general class of MDPs that a learning algorithm with a single stream of experience can guarantee obtaining a policy achieving optimal reward rate. The original convergence proofs of the two algorithms require that all optimal policies induce unichains, which is not necessarily true for weakly-communicating MDPs. To the best of our knowledge, our results are the first showing average-reward off-policy control algorithms converge in weakly-communicating MDPs. As a direct extension, we show that average-reward options algorithms introduced by (Wan, Naik, \\& Sutton 2021b) converge if the Semi-MDP induced by options is weakly-communicating. ", "keywords": "Reinforcement Learning;Average-Reward;Off-Policy;Convergence", "primary_area": "", "supplementary_material": "/attachment/acf305b3de91c5ecd9470e4674c5d84042ce9f9e.zip", "author": "Yi Wan;Richard S. Sutton", "authorids": "~Yi_Wan1;~Richard_S._Sutton1", "gender": "M;M", "homepage": "https://sites.google.com/view/yi-wan/;http://richsutton.com", "dblp": ";48/6070", "google_scholar": "zMVstroAAAAJ;https://scholar.google.ca/citations?user=6m4wv6gAAAAJ", "orcid": ";0000-0002-3679-3415", "linkedin": ";richard-sutton-0653545/", "or_profile": "~Yi_Wan1;~Richard_S_Sutton1", "aff": "University of Alberta;Google DeepMind", "aff_domain": "ualberta.ca;deepmind.com", "position": "PhD student;Research Scientist", "bibtex": "@misc{\nwan2023on,\ntitle={On Convergence of Average-Reward Off-Policy Control Algorithms in Weakly-Communicating {MDP}s},\nauthor={Yi Wan and Richard S. Sutton},\nyear={2023},\nurl={https://openreview.net/forum?id=NMoeVEwekzC}\n}", "github": "", "project": "", "reviewers": "8rbg;9oBN;fj9u", "site": "https://openreview.net/forum?id=NMoeVEwekzC", "pdf_size": 839059, "recommendation": "3;3;6", "confidence": "3;3;3", "correctness": "3;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;0;0", "wc_summary_paper": "45;88;107", "wc_strength_and_weaknesses": "126;352;16", "wc_clarity_quality_novelty_and_reproducibility": "17;421;10", "wc_summary_review": "66;38;89", "wc_review": "254;899;222", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 0.6666666666666666, 0.9428090415820634 ], "wc_summary_paper_avg": [ 80.0, 25.93581821856921 ], "wc_strength_and_weaknesses_avg": [ 164.66666666666666, 139.8697807088992 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 149.33333333333334, 192.11859763061864 ], "wc_summary_review_avg": [ 64.33333333333333, 20.853989759489405 ], "wc_review_avg": [ 458.3333333333333, 311.8721247919124 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15391817329043470867&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1", "aff_unique_norm": "University of Alberta;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.ualberta.ca;https://deepmind.com", "aff_unique_abbr": "UAlberta;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;United Kingdom" }, { "id": "NN1sraxIyZ", "title": "Global Counterfactual Explanations Are Reliable Or Efficient, But Not Both", "track": "main", "status": "Reject", "tldr": "", "abstract": "Counterfactual explanations have been widely studied in explainability, with a range of application dependent methods emerging in fairness, recourse and model understanding. The major shortcoming associated with these methods, however, is their inability to provide explanations beyond the local or instance-level. While many works touch upon the notion of a global explanation, typically suggesting to aggregate masses of local explanations in the hope of ascertaining global properties, few provide frameworks that are both reliable and computationally tractable. Meanwhile, practitioners are requesting more efficient and interactive explainability tools. We take this opportunity to investigate existing methods, improving the efficiency of Actionable Recourse Summaries (AReS), one of the only known global recourse frameworks, and proposing Global & Efficient Counterfactual Explanations (GLOBE-CE), a novel and flexible framework that tackles the scalability issues associated with current state-of-the-art, particularly on higher dimensional datasets and in the presence of continuous features. Furthermore, we provide a unique mathematical analysis of categorical feature translations, utilising it in our method. Experimental evaluation with real world datasets and user studies verify the speed, reliability and interpretability improvements of our framework.", "keywords": "Global;counterfactual;explanations;recourse;fairness;efficiency;reliability;black box", "primary_area": "", "supplementary_material": "", "author": "Dan Ley;Saumitra Mishra;Daniele Magazzeni", "authorids": "~Dan_Ley1;~Saumitra_Mishra1;~Daniele_Magazzeni1", "gender": "M;M;M", "homepage": "https://www.dan-ley.com/;https://sites.google.com/site/saumitramishrac4dm/;https://nms.kcl.ac.uk/daniele.magazzeni/", "dblp": "290/1369;208/1387;14/4672", "google_scholar": "zQ_f9AEAAAAJ;https://scholar.google.co.uk/citations?user=On6E6ogAAAAJ;", "orcid": ";;", "linkedin": "dan-ley/;;", "or_profile": "~Dan_Ley1;~Saumitra_Mishra1;~Daniele_Magazzeni1", "aff": "Harvard University, Harvard University;J.P. Morgan Chase;", "aff_domain": "g.harvard.edu;jpmorgan.com;", "position": "PhD student;Researcher;", "bibtex": "@misc{\nley2023global,\ntitle={Global Counterfactual Explanations Are Reliable Or Efficient, But Not Both},\nauthor={Dan Ley and Saumitra Mishra and Daniele Magazzeni},\nyear={2023},\nurl={https://openreview.net/forum?id=NN1sraxIyZ}\n}", "github": "", "project": "", "reviewers": "XGq4;T2G1;e9Jt;tDod;NXG7", "site": "https://openreview.net/forum?id=NN1sraxIyZ", "pdf_size": 4904826, "recommendation": "1;5;5;6;8", "confidence": "2;4;2;3;2", "correctness": "2;3;4;3;3", "technical_novelty": "2;3;4;3;4", "empirical_novelty": "0;3;2;3;3", "wc_summary_paper": "17;56;81;63;43", "wc_strength_and_weaknesses": "280;80;289;154;31", "wc_clarity_quality_novelty_and_reproducibility": "2;69;74;77;16", "wc_summary_review": "37;130;55;70;14", "wc_review": "336;335;499;364;104", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "297;764;511;398;341", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 5.0, 2.280350850198276 ], "confidence_avg": [ 2.6, 0.8 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 3.2, 0.7483314773547882 ], "empirical_novelty_avg": [ 2.2, 1.16619037896906 ], "wc_summary_paper_avg": [ 52.0, 21.372880011828073 ], "wc_strength_and_weaknesses_avg": [ 166.8, 103.8140645577467 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.6, 31.928670501604042 ], "wc_summary_review_avg": [ 61.2, 39.14792459377636 ], "wc_review_avg": [ 327.6, 127.14181058959323 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 462.2, 167.07291821237814 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.10963225241337866, "corr_recommendation_correctness": 0.5547001962252291, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mEZuovk_NTQJ:scholar.google.com/&scioq=Global+Counterfactual+Explanations+Are+Reliable+Or+Efficient,+But+Not+Both&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Harvard University;JPMorgan Chase & Co.", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.jpmorganchase.com", "aff_unique_abbr": "Harvard;JPM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "NERDS: A General Framework to Train Camera Denoisers from Raw-RGB Noisy Image Pairs", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11706", "id": "NO0ThzteQdI", "poster": "", "openreview": "https://openreview.net/forum?id=NO0ThzteQdI", "slides": "https://iclr.cc/virtual/2023/poster/11706", "video": "https://iclr.cc/virtual/2023/poster/11706", "author_site": "Heewon Kim, Kyoung Mu Lee", "tldr": "", "abstract": " We aim to train accurate denoising networks for smartphone/digital cameras from single noisy images. Downscaling is commonly used as a practical denoiser for low-resolution images. Based on this processing, we found that the pixel variance of the natural images is more robust to downscaling than the pixel variance of the camera noises. Intuitively, downscaling easily removes high-frequency noises than natural textures. To utilize this property, we can adopt noisy/clean image synthesis at low-resolution to train camera denoisers. On this basis, we propose a new solution pipeline -- NERDS that estimates camera noises and synthesizes noisy-clean image pairs from only noisy images. In particular, it first models the noise in raw-sensor images as a Poisson-Gaussian distribution, then estimates the noise parameters using the difference of pixel variances by downscaling. We formulate the noise estimation as a gradient-descent-based optimization problem through a reparametrization trick. We further introduce a new Image Signal Processor (ISP) estimation method that enables denoiser training in a human-readable RGB space by transforming the synthetic raw images to the style of a given RGB noisy image. The noise and ISP estimations utilize rich augmentation to synthesize image pairs for denoiser training. Experiments show that our NERDS can accurately train CNN-based denoisers (e.g., DnCNN, ResNet-style network) outperforming previous noise-synthesis-based and self-supervision-based denoisers in real datasets.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Heewon Kim;Kyoung Mu Lee", "authorids": "~Heewon_Kim2;~Kyoung_Mu_Lee2", "gender": "M;M", "homepage": "https://sites.google.com/view/kimheewon/;https://cv.snu.ac.kr/kmlee/", "dblp": ";17/4029", "google_scholar": "B1Yuz3gAAAAJ;Hofj9kAAAAAJ", "orcid": "0000-0001-7777-9823;", "linkedin": ";", "or_profile": "~Heewon_Kim2;~Kyoung_Mu_Lee1", "aff": "Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nkim2023nerds,\ntitle={{NERDS}: A General Framework to Train Camera Denoisers from Raw-{RGB} Noisy Image Pairs},\nauthor={Heewon Kim and Kyoung Mu Lee},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=NO0ThzteQdI}\n}", "github": "", "project": "", "reviewers": "nCCv;2oSJ;aUMk;HZ4f", "pdf_size": 4092872, "recommendation": "3;8;8;8", "confidence": "5;4;4;3", "correctness": "2;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;4;3;2", "wc_summary_paper": "58;113;261;142", "wc_strength_and_weaknesses": "354;578;382;59", "wc_clarity_quality_novelty_and_reproducibility": "16;205;162;438", "wc_summary_review": "31;122;118;40", "wc_review": "459;1018;923;679", "wc_reply_reviewers": "0;79;27;30", "wc_reply_authors": "666;722;759;613", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "recommendation_avg": [ 6.75, 2.165063509461097 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 143.5, 74.24452841792451 ], "wc_strength_and_weaknesses_avg": [ 343.25, 185.4202995898777 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 205.25, 151.5410422954785 ], "wc_summary_review_avg": [ 77.75, 42.39324828318774 ], "wc_review_avg": [ 769.75, 217.89604746300472 ], "wc_reply_reviewers_avg": [ 34.0, 28.48683906648823 ], "wc_reply_authors_avg": [ 690.0, 55.43013620766234 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18348552830849216531&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=NO0ThzteQdI", "email": "snu.ac.kr;snu.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "NOKUQ9JMohJ", "title": "ONLINE RESTLESS BANDITS WITH UNOBSERVED STATES", "track": "main", "status": "Reject", "tldr": "We propose TSEETC to slove the restless bandits with unknown transition kernels,unknown reward functions and unobserved states.", "abstract": "We study the online restless bandit problem, where each arm evolves according to a Markov chain independently, and the reward of pulling an arm depends on both the current state of the corresponding Markov chain and the action. The agent (decision maker) does not know the transition kernels and reward functions, and cannot observe the states of arms all the time. The goal is to sequentially choose which arms to pull so as to maximize the expected cumulative rewards collected. In this paper, we propose TSEETC, a learning algorithm based on Thompson Sampling with Episodic Explore-Then-Commit. The algorithm proceeds in episodes of increasing length and each episode is divided into exploration and exploitation phases. In the exploration phase in each episode, action-reward samples are collected in a round-robin way and then used to update the posterior as a mixture of Dirichlet distributions. At the beginning of the exploitation phase, TSEETC generates a sample from the posterior distribution as true parameters. It then follows the optimal policy for the sampled model for the rest of the episode. We establish the Bayesian regret bound $\\tilde {\\mathcal{O}}(\\sqrt{T})$ for TSEETC, where $T$ is the time horizon. This is the first bound that is close to the lower bound of restless bandits, especially in an unobserved state setting. We show through simulations that TSEETC outperforms existing algorithms in regret.", "keywords": "Thompson Sampling;Explore-Then-Commit;online restless bandit", "primary_area": "", "supplementary_material": "/attachment/60b14ab31c454131cb8121bebdb2714dead588a5.zip", "author": "Bowen Jiang;Bo Jiang;Jian Li;TAO LIN;Xinbing Wang;Chenghu Zhou", "authorids": "~Bowen_Jiang3;~Bo_Jiang2;~Jian_Li14;~TAO_LIN3;~Xinbing_Wang1;~Chenghu_Zhou3", "gender": ";M;M;M;M;M", "homepage": ";https://jhc.sjtu.edu.cn/~bjiang/;https://sites.google.com/stonybrook.edu/jianli;;http://www.cs.sjtu.edu.cn/~wang-xb/;http://www.igsnrr.cas.cn/gkjj/ysfc/ysfc_zhouchenghu/", "dblp": ";34/2005-3.html;33/5448-8;64/4492-1.html;96/1149.html;85/1324.html", "google_scholar": ";WxAIZtMAAAAJ;h039Yq4AAAAJ;;https://scholar.google.com.tw/citations?user=CT5yZbwAAAAJ;", "orcid": ";;;0000-0003-1170-636X;0000-0002-0357-8356;", "linkedin": "https://www.linkedin.cn/incareer/in/jiang-bowen-034382191;;;;;", "or_profile": "~Bowen_Jiang3;~Bo_Jiang2;~Jian_Li14;~TAO_LIN3;~Xinbing_Wang1;~Chenghu_Zhou3", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;State University of New York, Binghamton;Communication University of China;Shanghai Jiaotong University;IGSNRR, Chinese Academy of Sciences, Beijing, China", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;binghamton.edu;cuc.edu.cn;cs.sjtu.edu.cn;lreis.ac.cn", "position": "PhD student;Associate Professor;Assistant Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@misc{\njiang2023online,\ntitle={{ONLINE} {RESTLESS} {BANDITS} {WITH} {UNOBSERVED} {STATES}},\nauthor={Bowen Jiang and Bo Jiang and Jian Li and TAO LIN and Xinbing Wang and Chenghu Zhou},\nyear={2023},\nurl={https://openreview.net/forum?id=NOKUQ9JMohJ}\n}", "github": "", "project": "", "reviewers": "csbH;EDfN;WLZD;PrWt", "site": "https://openreview.net/forum?id=NOKUQ9JMohJ", "pdf_size": 446908, "recommendation": "5;6;6;6", "confidence": "3;4;3;4", "correctness": "4;4;4;3", "technical_novelty": "2;3;3;4", "empirical_novelty": "0;0;0;3", "wc_summary_paper": "81;135;114;65", "wc_strength_and_weaknesses": "181;279;282;695", "wc_clarity_quality_novelty_and_reproducibility": "248;60;20;76", "wc_summary_review": "126;57;13;67", "wc_review": "636;531;429;903", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "613;486;295;352", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 0.75, 1.299038105676658 ], "wc_summary_paper_avg": [ 98.75, 27.38955092731533 ], "wc_strength_and_weaknesses_avg": [ 359.25, 198.05854563739481 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 101.0, 87.28688332160795 ], "wc_summary_review_avg": [ 65.75, 40.28259549731124 ], "wc_review_avg": [ 624.75, 176.5338140413898 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 436.5, 123.25278901509694 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13654161328331049678&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1;2;0;3", "aff_unique_norm": "Shanghai Jiao Tong University;State University of New York at Binghamton;Communication University of China;Chinese Academy of Sciences", "aff_unique_dep": ";;;IGSNRR", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.binghamton.edu;http://www.cuc.edu.cn/;http://www.cas.cn", "aff_unique_abbr": "SJTU;SUNY Binghamton;CUC;CAS", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Binghamton;Beijing", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "H2RBox: Horizontal Box Annotation is All You Need for Oriented Object Detection", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12226", "id": "NPfDKT9OUJ3", "poster": "/media/PosterPDFs/ICLR%202023/12226.png?t=1680843457.253407", "openreview": "https://openreview.net/forum?id=NPfDKT9OUJ3", "slides": "https://iclr.cc/virtual/2023/poster/12226", "video": "https://iclr.cc/virtual/2023/poster/12226", "author_site": "Xue Yang, Gefan Zhang, Wentong Li, Yue Zhou, Xuehui Wang, Junchi Yan", "tldr": "", "abstract": "Oriented object detection emerges in many applications from aerial images to autonomous driving, while many existing detection benchmarks are annotated with horizontal bounding box only which is also less costive than fine-grained rotated box, leading to a gap between the readily available training corpus and the rising demand for oriented object detection. This paper proposes a simple yet effective oriented object detection approach called H2RBox merely using horizontal box annotation for weakly-supervised training, which closes the above gap and shows competitive performance even against those trained with rotated boxes. The cores of our method are weakly- and self-supervised learning, which predicts the angle of the object by learning the consistency of two different views. To our best knowledge, H2RBox is the first horizontal box annotation-based oriented object detector. Compared to an alternative i.e. horizontal box-supervised instance segmentation with our post adaption to oriented object detection, our approach is not susceptible to the prediction quality of mask and can perform more robustly in complex scenes containing a large number of dense objects and outliers. Experimental results show that H2RBox has significant performance and speed advantages over horizontal box-supervised instance segmentation methods, as well as lower memory requirements. While compared to rotated box-supervised oriented object detectors, our method shows very close performance and speed. The source code is available at PyTorch-based \\href{https://github.com/yangxue0827/h2rbox-mmrotate}{MMRotate} and Jittor-based \\href{https://github.com/yangxue0827/h2rbox-jittor}{JDet}.", "keywords": "Oriented Object Detection;Rotated Object Detection", "primary_area": "", "supplementary_material": "/attachment/4962586cea05f07de17b7faf16b00a8b9230a993.zip", "author": "Xue Yang;Gefan Zhang;Wentong Li;Yue Zhou;Xuehui Wang;Junchi Yan", "authorids": "~Xue_Yang2;~Gefan_Zhang1;~Wentong_Li2;~Yue_Zhou4;~Xuehui_Wang3;~Junchi_Yan2", "gender": "M;M;M;M;M;M", "homepage": "https://yangxue.site/;https://github.com/zhanggefan;https://cslwt.github.io;https://zytx121.github.io/;https://huiserwang.site;http://thinklab.sjtu.edu.cn/", "dblp": "13/1779-5;;;78/6191-5;78/6531;60/7949.html", "google_scholar": "2xTlvV0AAAAJ;;MJjM6BcAAAAJ;https://scholar.google.com.hk/citations?user=v-aQ8GsAAAAJ;JuRztWYAAAAJ;ga230VoAAAAJ", "orcid": "0000-0002-7084-9101;;;0000-0002-3080-6721;;0000-0001-9639-7679", "linkedin": ";;;;;", "or_profile": "~Xue_Yang2;~Gefan_Zhang1;~Wentong_Li2;~Yue_Zhou4;~Xuehui_Wang3;~Junchi_Yan1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Alibaba Group;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;antgroup.com;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;MS student;Intern;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nyang2023hrbox,\ntitle={H2{RB}ox: Horizontal Box Annotation is All You Need for Oriented Object Detection},\nauthor={Xue Yang and Gefan Zhang and Wentong Li and Yue Zhou and Xuehui Wang and Junchi Yan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=NPfDKT9OUJ3}\n}", "github": "", "project": "", "reviewers": "uvAp;o5T8;jpAe;upuU", "pdf_size": 25160212, "recommendation": "6;6;8;10", "confidence": "5;3;4;5", "correctness": "4;3;4;3", "technical_novelty": "3;2;3;4", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "63;59;40;27", "wc_strength_and_weaknesses": "82;72;131;40", "wc_clarity_quality_novelty_and_reproducibility": "11;9;21;64", "wc_summary_review": "18;27;15;41", "wc_review": "174;167;207;172", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "510;149;617;292", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;2;1", "recommendation_avg": [ 7.5, 1.6583123951777 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 47.25, 14.566657131957214 ], "wc_strength_and_weaknesses_avg": [ 81.25, 32.64486942844159 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.25, 22.26404051379713 ], "wc_summary_review_avg": [ 25.25, 10.108783309577865 ], "wc_review_avg": [ 180.0, 15.795568998931314 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 392.0, 182.75530088071318 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.4545454545454545, "corr_recommendation_correctness": -0.30151134457776363, "gs_citation": 78, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5404865604859099103&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=NPfDKT9OUJ3", "email": "sjtu.edu.cn;sjtu.edu.cn;antgroup.com;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "SJTU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Deep Transformers without Shortcuts: Modifying Self-attention for Faithful Signal Propagation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11572", "id": "NPrsUQgMjKK", "poster": "/media/PosterPDFs/ICLR%202023/11572.png?t=1682417732.0508745", "openreview": "https://openreview.net/forum?id=NPrsUQgMjKK", "slides": "https://iclr.cc/virtual/2023/poster/11572", "video": "https://iclr.cc/virtual/2023/poster/11572", "author_site": "Bobby He, James Martens, Guodong Zhang, Aleksandar Botev, Andrew Brock, Samuel L Smith, Yee Whye Teh", "tldr": "Understanding and improving signal propagation in self-attention layers to train deep transformers without skip connections and/or normalisation.", "abstract": "Skip connections and normalisation layers form two standard architectural components that are ubiquitous for the training of Deep Neural Networks (DNNs), but whose precise roles are poorly understood. Recent approaches such as Deep Kernel Shaping have made progress towards reducing our reliance on them, using insights from wide NN kernel theory to improve signal propagation in vanilla DNNs (which we define as networks without skips or normalisation). However, these approaches are incompatible with the self-attention layers present in transformers, whose kernels are intrinsically more complicated to analyse and control. And so the question remains: \\emph{is it possible to train deep vanilla transformers?} We answer this question in the affirmative by designing several approaches that use combinations of parameter initialisations, bias matrices and location-dependent rescaling to achieve faithful signal propagation in vanilla transformers. Our methods address various intricacies specific to signal propagation in transformers, including the interaction with positional encoding and causal masking. In experiments on WikiText-103 and C4, our approaches enable deep transformers without normalisation to train at speeds matching their standard counterparts, and deep vanilla transformers to reach the same performance as standard ones after about 5 times more iterations.", "keywords": "signal propagation;neural networks and kernels;deep transformers;self-attention;residual connections;layer normalisation;rank collapse;positional encoding", "primary_area": "", "supplementary_material": "", "author": "Bobby He;James Martens;Guodong Zhang;Aleksandar Botev;Andrew Brock;Samuel L Smith;Yee Whye Teh", "authorids": "~Bobby_He1;~James_Martens1;~Guodong_Zhang1;~Aleksandar_Botev1;~Andrew_Brock1;~Samuel_L_Smith1;~Yee_Whye_Teh2", "gender": ";M;M;M;;M;M", "homepage": "http://csml.stats.ox.ac.uk/people/he/;http://www.cs.toronto.edu/~jmartens/;http://www.cs.toronto.edu/~gdzhang/;;https://www.github.com/ajbrock;https://www.samtalksml.net/;http://csml.stats.ox.ac.uk/people/teh/", "dblp": "270/3685;12/8412;28/4937;183/6489;;;88/2483", "google_scholar": ";;B_TZBtwAAAAJ;8k7RD8QAAAAJ;https://scholar.google.co.uk/citations?user=NIxD36wAAAAJ;https://scholar.google.co.uk/citations?user=fyEqU5oAAAAJ;https://scholar.google.co.uk/citations?user=y-nUzMwAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;aleksandarbotev/;;;", "or_profile": "~Bobby_He1;~James_Martens1;~Guodong_Zhang1;~Aleksandar_Botev1;~Andrew_Brock1;~Samuel_L_Smith1;~Yee_Whye_Teh1", "aff": "University of Oxford;Google DeepMind;Department of Computer Science, University of Toronto;Google DeepMind;Google DeepMind;babylon health;University of Oxford", "aff_domain": "ox.ac.uk;google.com;cs.toronto.edu;deepmind.com;deepmind.com;babylonhealth.com;ox.ac.uk", "position": "PhD student;Research Scientist;PhD student;Researcher;Research Scientist;Data scientist;Full Professor", "bibtex": "@inproceedings{\nhe2023deep,\ntitle={Deep Transformers without Shortcuts: Modifying Self-attention for Faithful Signal Propagation},\nauthor={Bobby He and James Martens and Guodong Zhang and Aleksandar Botev and Andrew Brock and Samuel L Smith and Yee Whye Teh},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=NPrsUQgMjKK}\n}", "github": "", "project": "", "reviewers": "sotB;HAdV;QFbf;2AHB", "pdf_size": 3841381, "recommendation": "6;6;8;8", "confidence": "4;3;3;3", "correctness": "4;3;3;3", "technical_novelty": "3;4;2;4", "empirical_novelty": "4;0;3;2", "wc_summary_paper": "53;70;86;85", "wc_strength_and_weaknesses": "319;60;285;46", "wc_clarity_quality_novelty_and_reproducibility": "27;55;34;204", "wc_summary_review": "65;16;81;29", "wc_review": "464;201;486;364", "wc_reply_reviewers": "0;0;53;26", "wc_reply_authors": "1081;1133;1617;1352", "reply_reviewers": "0;0;1;1", "reply_authors": "3;3;4;3", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 1.479019945774904 ], "wc_summary_paper_avg": [ 73.5, 13.425721582097552 ], "wc_strength_and_weaknesses_avg": [ 177.5, 125.176874861134 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 80.0, 72.32910893962402 ], "wc_summary_review_avg": [ 47.75, 26.280934153869037 ], "wc_review_avg": [ 378.75, 112.45304575688468 ], "wc_reply_reviewers_avg": [ 19.75, 21.935986415021322 ], "wc_reply_authors_avg": [ 1295.75, 211.52467350169815 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13564688146150100065&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=NPrsUQgMjKK", "email": "ox.ac.uk;google.com;cs.toronto.edu;deepmind.com;deepmind.com;babylonhealth.com;ox.ac.uk", "author_num": 7, "aff_unique_index": "0;1;2;1;1;3;0", "aff_unique_norm": "University of Oxford;Google;University of Toronto;Babylon Health", "aff_unique_dep": ";Google DeepMind;Department of Computer Science;", "aff_unique_url": "https://www.ox.ac.uk;https://deepmind.com;https://www.utoronto.ca;https://www.babylonhealth.com", "aff_unique_abbr": "Oxford;DeepMind;U of T;Babylon", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "United Kingdom;Canada" }, { "id": "NQuCQoHqqSY", "title": "Temporally Consistent Video Transformer for Long-Term Video Prediction", "track": "main", "status": "Reject", "tldr": "An efficient temporally consistent video prediction model able to generate long videos referencing hundreds of frames of past context in complex 3D environments and Kinetics-600.", "abstract": "Generating long, temporally consistent video remains an open challenge in video generation. Primarily due to computational limitations, most prior methods limit themselves to training on a small subset of frames that are then extended to generate longer videos through a sliding window fashion. Although these techniques may produce sharp videos, they have difficulty retaining long-term temporal consistency due to their limited context length. In this work, we present Temporally Consistent Video Transformer (TECO), a vector-quantized latent dynamics video prediction model that learns compressed representations to efficiently condition on long videos of hundreds of frames during both training and generation. We use a MaskGit prior for dynamics prediction which enables both sharper and faster generations compared to prior work. Our experiments show that TECO outperforms SOTA baselines in a variety of video prediction benchmarks ranging from simple mazes in DMLab, large 3D worlds in Minecraft, and complex real-world videos from Kinetics-600. In addition, to better understand the capabilities of video prediction models in modeling temporal consistency, we introduce several challenging video prediction tasks consisting of agents randomly traversing 3D scenes of varying difficulty. This presents a challenging benchmark for video prediction in partially observable environments where a model must understand what parts of the scenes to re-create versus invent depending on its past observations or generations. An anonymized website with samples can be found at https://sites.google.com/view/iclr23-teco", "keywords": "video generation;video prediction;generative modeling;latent dynamics models", "primary_area": "", "supplementary_material": "/attachment/67113603f8aa62ef70737e6afbe386540e6f6977.zip", "author": "Wilson Yan;Danijar Hafner;Stephen James;Pieter Abbeel", "authorids": "~Wilson_Yan1;~Danijar_Hafner1;~Stephen_James1;~Pieter_Abbeel2", "gender": "M;;M;M", "homepage": "https://wilson1yan.github.io/;https://danijar.com;https://stepjam.github.io/;https://people.eecs.berkeley.edu/~pabbeel/", "dblp": ";184/8088;163/5669;", "google_scholar": "tR2Qw0YAAAAJ;VINmGpYAAAAJ;OXtG-isAAAAJ;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ", "orcid": ";0000-0002-9534-7271;;", "linkedin": ";;;", "or_profile": "~Wilson_Yan1;~Danijar_Hafner1;~Stephen_James1;~Pieter_Abbeel2", "aff": "University of California, Berkeley;University of Toronto;Dyson;Covariant", "aff_domain": "berkeley.edu;cs.toronto;dyson.com;covariant.ai", "position": "PhD student;PhD student;Principal Researcher;Founder", "bibtex": "@misc{\nyan2023temporally,\ntitle={Temporally Consistent Video Transformer for Long-Term Video Prediction},\nauthor={Wilson Yan and Danijar Hafner and Stephen James and Pieter Abbeel},\nyear={2023},\nurl={https://openreview.net/forum?id=NQuCQoHqqSY}\n}", "github": "", "project": "", "reviewers": "zBcT;HpHv;7Nbj;JK8Y", "site": "https://openreview.net/forum?id=NQuCQoHqqSY", "pdf_size": 3928007, "recommendation": "5;5;6;6", "confidence": "4;3;3;5", "correctness": "3;4;3;4", "technical_novelty": "3;2;2;2", "empirical_novelty": "3;2;3;2", "wc_summary_paper": "63;102;89;84", "wc_strength_and_weaknesses": "407;59;461;351", "wc_clarity_quality_novelty_and_reproducibility": "33;134;22;88", "wc_summary_review": "62;36;48;69", "wc_review": "565;331;620;592", "wc_reply_reviewers": "34;166;50;0", "wc_reply_authors": "448;230;1090;333", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 84.5, 14.044571905188139 ], "wc_strength_and_weaknesses_avg": [ 319.5, 155.34719179953012 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 69.25, 44.97429821575874 ], "wc_summary_review_avg": [ 53.75, 12.735285626950029 ], "wc_review_avg": [ 527.0, 114.81942344394523 ], "wc_reply_reviewers_avg": [ 62.5, 62.42395373572552 ], "wc_reply_authors_avg": [ 525.25, 335.05326069149066 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.30151134457776363, "corr_recommendation_correctness": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1250394911470146315&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of California, Berkeley;University of Toronto;Dyson;Covariant", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.berkeley.edu;https://www.utoronto.ca;https://www.dyson.com;", "aff_unique_abbr": "UC Berkeley;U of T;;", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United States;Canada;United Kingdom;" }, { "title": "Multimodal Analogical Reasoning over Knowledge Graphs", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12140", "id": "NRHajbzg8y0P", "poster": "/media/PosterPDFs/ICLR%202023/12140.png?t=1680761826.6286263", "openreview": "https://openreview.net/forum?id=NRHajbzg8y0P", "slides": "https://iclr.cc/virtual/2023/poster/12140", "video": "https://iclr.cc/virtual/2023/poster/12140", "author_site": "Ningyu Zhang, Lei Li, Xiang Chen, Xiaozhuan Liang, Shumin Deng, Huajun Chen", "tldr": "Multimodal analogical reasoning over knowledge graphs with a new dataset MARS and a new framework MarT.", "abstract": "Analogical reasoning is fundamental to human cognition and holds an important place in various fields. However, previous studies mainly focus on single-modal analogical reasoning and ignore taking advantage of structure knowledge. Notably, the research in cognitive psychology has demonstrated that information from multimodal sources always brings more powerful cognitive transfer than single modality sources. To this end, we introduce the new task of multimodal analogical reasoning over knowledge graphs, which requires multimodal reasoning ability with the help of background knowledge. Specifically, we construct a Multimodal Analogical Reasoning dataSet (MARS) and a multimodal knowledge graph MarKG. We evaluate with multimodal knowledge graph embedding and pre-trained Transformer baselines, illustrating the potential challenges of the proposed task. We further propose a novel model-agnostic Multimodal analogical reasoning framework with Transformer (MarT) motivated by the structure mapping theory, which can obtain better performance. We hope our work can deliver benefits and inspire future research. Code and datasets are available in https://github.com/zjunlp/MKG_Analogy.", "keywords": "knowledge graph;multimodal;analogical reasoning;prompt learning;pre-trained language model", "primary_area": "", "supplementary_material": "/attachment/e7f2a9783245d1f2dca6bd1a79fdf43c88a8c5b2.zip", "author": "Ningyu Zhang;Lei Li;Xiang Chen;Xiaozhuan Liang;Shumin Deng;Huajun Chen", "authorids": "~Ningyu_Zhang1;~Lei_Li18;~Xiang_Chen5;~Xiaozhuan_Liang1;~Shumin_Deng1;~Huajun_Chen1", "gender": "M;M;M;M;F;M", "homepage": "https://person.zju.edu.cn/en/ningyu;;https://faculty.nuaa.edu.cn/ChenXiang/zh_CN/index.htm;;https://231sm.github.io/;", "dblp": "139/4181-1.html;13/7007-40;64/3062-16;295/8804;213/1853;94/5089", "google_scholar": "xQDOPvsAAAAJ;ySUbqpEAAAAJ;pXivdn8AAAAJ;https://scholar.google.com.hk/citations?user=Rmt2jcYAAAAJ;3am3hL4AAAAJ;", "orcid": "0000-0002-1970-0678;;0000-0002-2594-0600;;;", "linkedin": "ningyuzhang/;;;;;", "or_profile": "~Ningyu_Zhang1;~Lei_Li18;~Xiang_Chen5;~Xiaozhuan_Liang1;~Shumin_Deng1;~Huajun_Chen1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;National University of Singapore;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;nus.edu.sg;zju.edu.cn", "position": "Associate Professor;MS student;PhD student;MS student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nzhang2023multimodal,\ntitle={Multimodal Analogical Reasoning over Knowledge Graphs},\nauthor={Ningyu Zhang and Lei Li and Xiang Chen and Xiaozhuan Liang and Shumin Deng and Huajun Chen},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=NRHajbzg8y0P}\n}", "github": "", "project": "", "reviewers": "R5x6;Hqif;JzuQ", "pdf_size": 9835643, "recommendation": "5;5;8", "confidence": "5;4;3", "correctness": "3;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "98;212;248", "wc_strength_and_weaknesses": "199;146;751", "wc_clarity_quality_novelty_and_reproducibility": "2;173;165", "wc_summary_review": "2;20;34", "wc_review": "301;551;1198", "wc_reply_reviewers": "115;0;143", "wc_reply_authors": "1209;1126;1680", "reply_reviewers": "1;0;1", "reply_authors": "3;2;3", "recommendation_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 186.0, 63.93746945258312 ], "wc_strength_and_weaknesses_avg": [ 365.3333333333333, 273.5645363630958 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 113.33333333333333, 78.79227260475625 ], "wc_summary_review_avg": [ 18.666666666666668, 13.097921802925667 ], "wc_review_avg": [ 683.3333333333334, 377.9650189575162 ], "wc_reply_reviewers_avg": [ 86.0, 61.87622052668267 ], "wc_reply_authors_avg": [ 1338.3333333333333, 243.95946840043374 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": 1.0, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14172844937615326394&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=NRHajbzg8y0P", "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;nus.edu.sg;zju.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Zhejiang University;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "ZJU;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;Singapore" }, { "title": "Designing BERT for Convolutional Networks: Sparse and Hierarchical Masked Modeling", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12227", "id": "NRxydtWup1S", "poster": "/media/PosterPDFs/ICLR%202023/12227.png?t=1682947095.158487", "openreview": "https://openreview.net/forum?id=NRxydtWup1S", "slides": "https://iclr.cc/virtual/2023/poster/12227", "video": "https://iclr.cc/virtual/2023/poster/12227", "author_site": "Keyu Tian, Yi Jiang, qishuai diao, Chen Lin, Liwei Wang, Zehuan Yuan", "tldr": "This paper presents a simple yet powerful framework to pre-train convolutional network (convnet) with Sparse masKed modeling.", "abstract": "We identify and overcome two key obstacles in extending the success of BERT-style pre-training, or masked image modeling, to convolutional networks (convnets): (i) convolution operation cannot handle irregular, randomly masked input images; (ii) the single-scale nature of BERT pre-training is inconsistent with convnet\u2019s hierarchical structure. For (i), we treat unmasked pixels as sparse voxels of 3D point clouds and use sparse convolution to encode. This is the first use of sparse convolution for 2D masked modeling. For (ii), we develop a hierarchical decoder to reconstruct images from multi-scale encoded features. Our method, called Sparse masKed modeling (SparK), is general: it can be used directly on any convolutional model without backbone modifications. We validate it on both classical (ResNet) and modern (ConvNeXt) models: on three downstream tasks, it surpasses both state-of-the-art contrastive learning and transformer-based masked modeling by similarly large margins (around +1.0%). The improvements on object detection and instance segmentation are more significant (up to +3.5%), validating the strong transferability of features learned. We also find SparK\u2019s favorable scaling behavior by observing more gains on larger networks. All of these findings support the promising future of generative pre-training on convnets. Both codes and pre-trained models have been released at https://github.com/keyu-tian/SparK.", "keywords": "Self-Supervised Learning;Masked Autoencoding;Masked Pre-training;Masked Modeling;Convolutional Neural Networks", "primary_area": "", "supplementary_material": "", "author": "Keyu Tian;Yi Jiang;qishuai diao;Chen Lin;Liwei Wang;Zehuan Yuan", "authorids": "~Keyu_Tian1;~Yi_Jiang2;~qishuai_diao1;~Chen_Lin2;~Liwei_Wang1;~Zehuan_Yuan1", "gender": "M;M;M;M;M;M", "homepage": ";https://enjoyyi.github.io/;https://github.com/dqshuai;https://scholar.google.com/citations?hl=en&user=rObgGWIAAAAJ;http://www.liweiwang-pku.com/;https://shallowyuan.github.io/", "dblp": "265/5609;;;;;227/3298", "google_scholar": "6FdkbygAAAAJ;https://scholar.google.com.hk/citations?user=6dikuoYAAAAJ;;https://scholar.google.com/citations?hl=en;VZHxoh8AAAAJ;", "orcid": "0000-0001-5909-2091;0000-0002-2133-8719;;;;", "linkedin": "keyu-tian/;;;;;", "or_profile": "~Keyu_Tian1;~Yi_Jiang2;~qishuai_diao1;~Chen_Lin2;~Liwei_Wang1;~Zehuan_Yuan1", "aff": "Peking University;Bytedance;ByteDance Inc.;University of Oxford, University of Oxford;Peking University;ByteDance Inc.", "aff_domain": "pku.edu.cn;bytedance.com;bytedance.com;eng.ox.ac.uk;pku.edu.cn;bytedance.com", "position": "MS student;Researcher;Researcher;PhD student;Full Professor;Researcher", "bibtex": "@inproceedings{\ntian2023designing,\ntitle={Designing {BERT} for Convolutional Networks: Sparse and Hierarchical Masked Modeling},\nauthor={Keyu Tian and Yi Jiang and qishuai diao and Chen Lin and Liwei Wang and Zehuan Yuan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=NRxydtWup1S}\n}", "github": "", "project": "", "reviewers": "Zhkz;71p6;tYbJ", "pdf_size": 4227524, "recommendation": "5;6;8", "confidence": "5;4;4", "correctness": "3;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "88;47;98", "wc_strength_and_weaknesses": "213;512;140", "wc_clarity_quality_novelty_and_reproducibility": "95;77;74", "wc_summary_review": "34;48;21", "wc_review": "430;684;333", "wc_reply_reviewers": "254;139;0", "wc_reply_authors": "1482;1344;602", "reply_reviewers": "1;1;0", "reply_authors": "3;4;2", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 77.66666666666667, 22.065558884580486 ], "wc_strength_and_weaknesses_avg": [ 288.3333333333333, 160.93960219770509 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 82.0, 9.273618495495704 ], "wc_summary_review_avg": [ 34.333333333333336, 11.025223605694151 ], "wc_review_avg": [ 482.3333333333333, 147.99624619864142 ], "wc_reply_reviewers_avg": [ 131.0, 103.84924971643592 ], "wc_reply_authors_avg": [ 1142.6666666666667, 386.43786333927244 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.7559289460184544, "corr_recommendation_correctness": 0.944911182523068, "gs_citation": 138, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8072796476168839940&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=NRxydtWup1S", "email": "pku.edu.cn;bytedance.com;bytedance.com;eng.ox.ac.uk;pku.edu.cn;bytedance.com", "author_num": 6, "aff_unique_index": "0;1;1;2;0;1", "aff_unique_norm": "Peking University;ByteDance;University of Oxford", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://www.bytedance.com;https://www.ox.ac.uk", "aff_unique_abbr": "Peking U;Bytedance;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;United Kingdom" }, { "id": "NSMlX2F21C7", "title": "Contrastive Consistent Representation Distillation", "track": "main", "status": "Reject", "tldr": "We propose Contrastive Consistent Representation Distillation (CoCoRD) to provide consistent representations for efficient contrastive-learning-based distillation.", "abstract": "The combination of knowledge distillation with contrastive learning has great potential to distill structural knowledge. Most of the contrastive-learning-based distillation methods treat the entire training dataset as the memory bank and maintain two memory banks, one for the student and one for the teacher. Besides, the representations in the two memory banks are updated in a momentum manner, leading to representation inconsistency. In this work, we propose Contrastive Consistent Representation Distillation (CoCoRD) to provide consistent representations for efficient contrastive-learning-based distillation. Instead of momentum-updating the cached representations, CoCoRD updates the encoders in a momentum manner. Specifically, the teacher is equipped with a momentum-updated projection head to generate consistent representations. These teacher representations are cached in a fixed-size queue which serves as the only memory bank in CoCoRD and is significantly smaller than the entire training dataset. Additionally, a slow-moving student, implemented as a momentum-based moving average of the student, is built to facilitate contrastive learning. CoCoRD, which utilizes only one memory bank and much fewer negative keys, provides highly competitive results under typical teacher-student settings. On ImageNet, CoCoRD-distilled ResNet50 outperforms the teacher ResNet101 by 0.2% top-1 accuracy. Furthermore, in PASCAL VOC and COCO detection, the detectors whose backbones are initialized by CoCoRD-distilled models exhibit considerable performance improvements.", "keywords": "contrastive learning;knowledge distillation;model compression", "primary_area": "", "supplementary_material": "/attachment/7a5d98a411c8e1ed0dcdd4a86d6a3ade391b8bf0.zip", "author": "Shipeng Fu;Haoran Yang;Xiaomin Yang", "authorids": "~Shipeng_Fu1;~Haoran_Yang5;~Xiaomin_Yang1", "gender": "M;M;F", "homepage": ";;", "dblp": "258/0459;241/5752;127/4863", "google_scholar": ";;", "orcid": "0000-0002-5015-3860;0000-0003-0751-6491;", "linkedin": ";;", "or_profile": "~Shipeng_Fu1;~Haoran_Yang5;~Xiaomin_Yang1", "aff": "Sichuan University;Sichuan University;Sichuan University", "aff_domain": "scu.edu.cn;scu.edu.cn;scu.edu.cn", "position": "PhD student;PhD student;Full Professor", "bibtex": "@misc{\nfu2023contrastive,\ntitle={Contrastive Consistent Representation Distillation},\nauthor={Shipeng Fu and Haoran Yang and Xiaomin Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=NSMlX2F21C7}\n}", "github": "", "project": "", "reviewers": "iNks;mvUm;gYcu;FUQY", "site": "https://openreview.net/forum?id=NSMlX2F21C7", "pdf_size": 1389851, "recommendation": "3;5;5;6", "confidence": "4;5;3;3", "correctness": "2;3;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "36;156;132;85", "wc_strength_and_weaknesses": "223;517;255;67", "wc_clarity_quality_novelty_and_reproducibility": "6;58;72;28", "wc_summary_review": "8;86;37;28", "wc_review": "273;817;496;208", "wc_reply_reviewers": "0;218;0;0", "wc_reply_authors": "1445;1967;799;485", "reply_reviewers": "0;1;0;0", "reply_authors": "4;5;2;3", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 102.25, 45.99116763031789 ], "wc_strength_and_weaknesses_avg": [ 265.5, 161.68719800899513 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.0, 25.709920264364882 ], "wc_summary_review_avg": [ 39.75, 28.69124430902222 ], "wc_review_avg": [ 448.5, 238.0593413416075 ], "wc_reply_reviewers_avg": [ 54.5, 94.39676901250381 ], "wc_reply_authors_avg": [ 1174.0, 573.9416346633166 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.3458572319330373, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17237706429032249477&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Sichuan University", "aff_unique_dep": "", "aff_unique_url": "https://www.scu.edu.cn", "aff_unique_abbr": "SCU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "NT51Ty0-Bfu", "title": "Offline Reinforcement Learning with Differential Privacy", "track": "main", "status": "Reject", "tldr": "We present the first provably efficient offline RL algorithms with differential privacy guarantees.", "abstract": "The offline reinforcement learning (RL) problem is often motivated by the need to learn data-driven decision policies in financial, legal and healthcare applications. However, the learned policy could retain sensitive information of individuals in the training data (e.g., treatment and outcome of patients), thus susceptible to various privacy risks. We design offline RL algorithms with differential privacy guarantees which provably prevent such risks. These algorithms also enjoy strong instance-dependent learning bounds under both tabular and linear Markov Decision Process (MDP) settings. Our theory and simulation suggest that the privacy guarantee comes at (almost) no drop in utility comparing to the non-private counterpart for a medium-size dataset.", "keywords": "offline reinforcement learning;differential privacy", "primary_area": "", "supplementary_material": "/attachment/3cd66b5e1fc86fd2ae548be20ccb059ee6a63667.zip", "author": "Dan Qiao;Yu-Xiang Wang", "authorids": "~Dan_Qiao1;~Yu-Xiang_Wang1", "gender": "M;", "homepage": ";http://www.cs.ucsb.edu/~yuxiangw/publications.html", "dblp": ";62/1637-3.html", "google_scholar": "EyfAUuUAAAAJ;HGNZ1fkAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Dan_Qiao1;~Yu-Xiang_Wang1", "aff": ", University of California, Santa Barbara;UC Santa Barbara", "aff_domain": "cs.ucsb.edu;ucsb.edu", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nqiao2023offline,\ntitle={Offline Reinforcement Learning with Differential Privacy},\nauthor={Dan Qiao and Yu-Xiang Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=NT51Ty0-Bfu}\n}", "github": "", "project": "", "reviewers": "g4wC;MbLZ;txBL", "site": "https://openreview.net/forum?id=NT51Ty0-Bfu", "pdf_size": 836924, "recommendation": "3;5;5", "confidence": "4;3;3", "correctness": "3;3;4", "technical_novelty": "2;4;3", "empirical_novelty": "0;1;2", "wc_summary_paper": "17;66;68", "wc_strength_and_weaknesses": "159;376;68", "wc_clarity_quality_novelty_and_reproducibility": "41;227;69", "wc_summary_review": "25;154;42", "wc_review": "242;823;247", "wc_reply_reviewers": "0;28;0", "wc_reply_authors": "320;859;294", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 1.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 50.333333333333336, 23.584363935078308 ], "wc_strength_and_weaknesses_avg": [ 201.0, 129.2001031991332 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 112.33333333333333, 81.88338591490272 ], "wc_summary_review_avg": [ 73.66666666666667, 57.22664492077872 ], "wc_review_avg": [ 437.3333333333333, 272.7151546129323 ], "wc_reply_reviewers_avg": [ 9.333333333333334, 13.199326582148887 ], "wc_reply_authors_avg": [ 491.0, 260.431692899821 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.9999999999999998, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8837374383297621855&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Santa Barbara", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsb.edu", "aff_unique_abbr": "UCSB", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Santa Barbara", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "NTCYXulK9qm", "title": "Co-Evolution As More Than a Scalable Alternative for Multi-Agent Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "Evolutionary Algorithms can be competitively used for policy search in multi-agent reinforcement and can scale to a high number of agents.", "abstract": "In recent years, gradient based multi-agent reinforcement learning is growing in success. One contributing factor is the use of shared parameters for learning policy networks. While this approach scales well with the number of agents during execution it lacks this ambiguity for training as the number of produced samples grows linearly with the number of agents. For a very large number of agents, this could lead to an inefficient use of the circumstantial amount of produced samples. Moreover in single-agent reinforcement learning policy search with evolutionary algorithms showed viable success when sampling can be parallelized on a larger scale. The here proposed method does not only consider sampling in concurrent environments but further investigates sampling diverse parameters from the population in co-evolution in joint environments during training. This co-evolutionary policy search has shown to be capable of training a large number of agents. Beyond that, it has been shown to produce competitive results in smaller environments in comparison to gradient descent based methods. This surprising result make evolutionary algorithms a promising candidate for further research in the context of multi-agent reinforcement learning.", "keywords": "reinforcement learning;multi-agent reinforcement learning;policy search;co-evolution;evolutionary algorithm", "primary_area": "", "supplementary_material": "/attachment/8a051ec7ab62f7d2eeba9ad6c3e65ce59a31a77d.zip", "author": "Patrick Grzywok", "authorids": "~Patrick_Grzywok1", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "grzywok/", "or_profile": "~Patrick_Grzywok1", "aff": "Technische Universit\u00e4t Berlin", "aff_domain": "tu-berlin.de", "position": "PhD student", "bibtex": "@misc{\ngrzywok2023coevolution,\ntitle={Co-Evolution As More Than a Scalable Alternative for Multi-Agent Reinforcement Learning},\nauthor={Patrick Grzywok},\nyear={2023},\nurl={https://openreview.net/forum?id=NTCYXulK9qm}\n}", "github": "", "project": "", "reviewers": "3o7H;9b7B;d1BT;da8i", "site": "https://openreview.net/forum?id=NTCYXulK9qm", "pdf_size": 2641295, "recommendation": "1;1;3;3", "confidence": "3;5;4;3", "correctness": "1;1;2;2", "technical_novelty": "1;1;2;2", "empirical_novelty": "1;1;2;1", "wc_summary_paper": "46;56;48;81", "wc_strength_and_weaknesses": "206;18;416;206", "wc_clarity_quality_novelty_and_reproducibility": "62;10;68;90", "wc_summary_review": "33;71;35;170", "wc_review": "347;155;567;547", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 1.5, 0.5 ], "technical_novelty_avg": [ 1.5, 0.5 ], "empirical_novelty_avg": [ 1.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 57.75, 13.935117509371782 ], "wc_strength_and_weaknesses_avg": [ 211.5, 140.82169577163882 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.5, 29.338541204361203 ], "wc_summary_review_avg": [ 77.25, 55.64339583454626 ], "wc_review_avg": [ 404.0, 167.5320864789787 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": -0.30151134457776363, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ojLJAJBbpO0J:scholar.google.com/&scioq=Co-Evolution+As+More+Than+a+Scalable+Alternative+for+Multi-Agent+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Technische Universit\u00e4t Berlin", "aff_unique_dep": "", "aff_unique_url": "https://www.tu-berlin.de", "aff_unique_abbr": "TU Berlin", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "id": "NTTc8wZktaT", "title": "Substructured Graph Convolution for Non-overlapping Graph Decomposition", "track": "main", "status": "Reject", "tldr": "A novel graph convolution for non-overlapping graph decomposition.", "abstract": "Graph convolutional networks have been widely used to solve the graph problems such as node classification, link prediction, and recommender systems. It is well known that large graphs require large amount of memory and time to train graph convolutional networks. To deal with large graphs, many methods are being done, such as graph sampling or decomposition. In particular, graph decomposition has the advantage of parallel computation, but information loss occurs in the interface part. In this paper, we propose a novel substructured graph convolution that reinforces the interface part lost by graph decomposition. Numerical results indicate that the proposed method is robust in the number of subgraphs compared to other methods.", "keywords": "Graph convolution;non-overlapping graph decomposition;parallel computation;substructuring method", "primary_area": "", "supplementary_material": "/attachment/d41de406f595aa34b925f3d374f8a91f62c97111.zip", "author": "Youngkyu Lee;Chang-Ock Lee", "authorids": "~Youngkyu_Lee1;colee@kaist.edu", "gender": "M;", "homepage": "https://sites.google.com/view/youngkyulee/home;", "dblp": "288/0290;", "google_scholar": "https://scholar.google.com/citations?hl=ko;", "orcid": "0000-0002-3669-2913;", "linkedin": ";", "or_profile": "~Youngkyu_Lee1;colee@kaist.edu", "aff": "KAIST;", "aff_domain": "kaist.ac.kr;", "position": "PhD student;", "bibtex": "@misc{\nlee2023substructured,\ntitle={Substructured Graph Convolution for Non-overlapping Graph Decomposition},\nauthor={Youngkyu Lee and Chang-Ock Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=NTTc8wZktaT}\n}", "github": "", "project": "", "reviewers": "Tixk;zYgm;TgT5", "site": "https://openreview.net/forum?id=NTTc8wZktaT", "pdf_size": 1266793, "recommendation": "3;3;3", "confidence": "4;4;2", "correctness": "3;2;2", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "24;87;116", "wc_strength_and_weaknesses": "143;127;87", "wc_clarity_quality_novelty_and_reproducibility": "26;55;228", "wc_summary_review": "14;14;110", "wc_review": "207;283;541", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 75.66666666666667, 38.4042821686448 ], "wc_strength_and_weaknesses_avg": [ 119.0, 23.55136231020759 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 103.0, 89.1777251709566 ], "wc_summary_review_avg": [ 46.0, 45.254833995939045 ], "wc_review_avg": [ 343.6666666666667, 142.94365634364084 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lXPma3sphiAJ:scholar.google.com/&scioq=Substructured+Graph+Convolution+for+Non-overlapping+Graph+Decomposition&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "id": "NUBuJsAq1U", "title": "Determinant regularization for Deep Metric Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Distance Metric Learning (DML) aims to learn the distance metric that better reflects the semantical similarities in the data. Current \\textit{pair-based} and \\textit{proxy-based} methods on DML focus on reducing the distance between similar samples while expanding the distance of dissimilar ones. However, we reveal that shrinking the distance between similar samples may distort the feature space, increasing the distance between points of the same class region and, therefore, harming the generalization of the model. Traditional regularization terms (such as $L_2$-norm on weights) cannot be adopted to solve this issue as they are based on linear projection. To alleviate this issue, we adopt the structure of normalizing flow as the deep metric layer and calculate the determinant of the Jacobi Matrix as the regularization term. At last, we conduct experiments on several \\textit{pair-based} and \\textit{proxy-based} algorithms that demonstrate the benefits of our method.\n\n", "keywords": "Deep Metric Learning;Generalization;Jacobi Matrix", "primary_area": "", "supplementary_material": "", "author": "Kun Song;Ruben Solozabal;Martin Tak\u00e1\u010d;Fakhri Karray", "authorids": "~Kun_Song2;~Ruben_Solozabal1;~Martin_Tak\u00e1\u010d1;~Fakhri_Karray1", "gender": "Not Specified;M;M;M", "homepage": ";https://mbzuai.ac.ae/study/faculty/professor-fakhreddine-fakhri-karray/;;http://mtakac.com", "dblp": "204/8320;k/FakhriKarray;;42/3759-1.html", "google_scholar": ";9_Hpd5kAAAAJ;https://scholar.google.com/citations?hl=zh-TW;qKQD-2cAAAAJ", "orcid": "0000-0001-5523-7492;0000-0002-6900-315X;;0000-0001-7455-2025", "linkedin": ";;;martintakac/", "or_profile": "~Ruben_Solozabal1;~Fakhri_Karray1;~Kun_Kun1;~Martin_Takac3", "aff": "Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae", "position": "Postdoc;Full Professor;Postdoc;Associate Professor", "bibtex": "@misc{\nsong2023determinant,\ntitle={Determinant regularization for Deep Metric Learning},\nauthor={Kun Song and Ruben Solozabal and Martin Tak{\\'a}{\\v{c}} and Fakhri Karray},\nyear={2023},\nurl={https://openreview.net/forum?id=NUBuJsAq1U}\n}", "github": "", "project": "", "reviewers": "obNj;VbA6;n2tf;F9Bh", "site": "https://openreview.net/forum?id=NUBuJsAq1U", "pdf_size": 435388, "recommendation": "1;3;3;5", "confidence": "4;2;3;5", "correctness": "3;2;1;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "82;75;66;67", "wc_strength_and_weaknesses": "242;189;237;88", "wc_clarity_quality_novelty_and_reproducibility": "140;47;73;86", "wc_summary_review": "20;52;21;24", "wc_review": "484;363;397;265", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 72.5, 6.5 ], "wc_strength_and_weaknesses_avg": [ 189.0, 61.87487373724491 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 86.5, 33.93007515464709 ], "wc_summary_review_avg": [ 29.25, 13.216939887886303 ], "wc_review_avg": [ 377.25, 78.40400181113206 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.31622776601683794, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:APoEHKvi5VUJ:scholar.google.com/&scioq=Determinant+regularization+for+Deep+Metric+Learning&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": "", "aff_unique_url": "https://mbzuai.ac.ae", "aff_unique_abbr": "MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Arab Emirates" }, { "id": "NUU2tFxUjRa", "title": "Consistent Data Distribution Sampling for Large-scale Retrieval", "track": "main", "status": "Reject", "tldr": "A novel negative sampling strategy to tackle training-inference inconsistency of data distribution for large-scale retrieval.", "abstract": "Retrieving candidate items with low latency and computational cost is important for large-scale advertising systems. Negative sampling is a general approach to model million-scale items with rich features in the retrieval. The training-inference inconsistency of data distribution brought from sampling negatives is a key challenge. In this work, we propose a novel negative sampling strategy Consistent Data Distribution Sampling (CDDS) to solve such an issue. Specifically, we employ a relative large-scale of uniform training negatives and batch negatives to adequately train long-tail and hot items respectively, and employ high divergence negatives to improve the learning convergence. To make the above training samples approximate the serving item data distribution, we introduce an auxiliary loss based on an asynchronous item embedding matrix over the entire item pool. Offline experiments on real datasets achieve SOTA performance. Online experiments with multiple advertising scenarios show that our method has achieved significant increases in GMV. The source code will be released in the future.", "keywords": "Retrieval;Neural Networks;Deep Learning;Recommender Systems;Information Systems", "primary_area": "", "supplementary_material": "", "author": "Hongyu Ou;Jun Yin;Huanqin Wu;ANAN LIU;Lin Zhao;Tao Chen;Yuekui Yang;TAO YANG", "authorids": "~Hongyu_Ou1;~Jun_Yin9;~Huanqin_Wu1;~ANAN_LIU4;~Lin_Zhao10;~Tao_Chen11;~Yuekui_Yang2;~TAO_YANG11", "gender": "M;;M;M;M;M;M;M", "homepage": "https://github.com/adam7643/;;;;;http://www.baidu.com;https://www.linkedin.cn/incareer/in/ACoAAD4mIxEBgyrpfvgy38VlbRpZtbDl6xW8f4M;", "dblp": ";;249/9193.html;;79/2690;;;", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.com/citations?hl=zh-CN;wHTMZjAAAAAJ;;;https://scholar.google.com.hk/citations?user=39zL0eoAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;;;;https://www.linkedin.com/;https://www.linkedin.cn/incareer/in/ACoAAD4mIxEBgyrpfvgy38VlbRpZtbDl6xW8f4M;", "or_profile": "~Hongyu_Ou1;~Jun_Yin9;~Huanqin_Wu1;~ANAN_LIU4;~Yuekui_Yang2;~TAO_YANG11;~Zhao_Lin2;~Vito_Chen1", "aff": "Tencent Group;Tencent;;Tencent;Tsinghua University;;;", "aff_domain": "tencent.com;tencent.com;;tencent.com;tsinghua.edu.cn;;;", "position": "Researcher;Researcher;;Researcher;PhD student;;;", "bibtex": "@misc{\nou2023consistent,\ntitle={Consistent Data Distribution Sampling for Large-scale Retrieval},\nauthor={Hongyu Ou and Jun Yin and Huanqin Wu and ANAN LIU and Lin Zhao and Tao Chen and Yuekui Yang and TAO YANG},\nyear={2023},\nurl={https://openreview.net/forum?id=NUU2tFxUjRa}\n}", "github": "", "project": "", "reviewers": "WYde;sTd5;RorQ;ExGn", "site": "https://openreview.net/forum?id=NUU2tFxUjRa", "pdf_size": 1038935, "recommendation": "3;3;5;5", "confidence": "3;4;4;3", "correctness": "2;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "0;2;2;2", "wc_summary_paper": "60;123;102;64", "wc_strength_and_weaknesses": "90;175;121;162", "wc_clarity_quality_novelty_and_reproducibility": "25;47;35;181", "wc_summary_review": "38;41;57;43", "wc_review": "213;386;315;450", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 87.25, 26.356925086208367 ], "wc_strength_and_weaknesses_avg": [ 137.0, 33.66749173906485 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 72.0, 63.41135544995076 ], "wc_summary_review_avg": [ 44.75, 7.292976072907411 ], "wc_review_avg": [ 341.0, 87.98579430794496 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HQqqWzcNOPYJ:scholar.google.com/&scioq=Consistent+Data+Distribution+Sampling+for+Large-scale+Retrieval&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Tencent;Tsinghua University", "aff_unique_dep": "Tencent;", "aff_unique_url": "https://www.tencent.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Tencent;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Simple Emergent Action Representations from Multi-Task Policy Training", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10828", "id": "NUl0ylt7SM", "poster": "/media/PosterPDFs/ICLR%202023/10828.png?t=1682401025.0382986", "openreview": "https://openreview.net/forum?id=NUl0ylt7SM", "slides": "https://iclr.cc/virtual/2023/poster/10828", "video": "https://iclr.cc/virtual/2023/poster/10828", "author_site": "Pu Hua, Yubei Chen, Huazhe Xu", "tldr": "We discover emergent action representations from multi-task training and further use them to perform task generalization.", "abstract": "The low-level sensory and motor signals in deep reinforcement learning, which exist in high-dimensional spaces such as image observations or motor torques, are inherently challenging to understand or utilize directly for downstream tasks. While sensory representations have been extensively studied, the representations of motor actions are still an area of active exploration. Our work reveals that a space containing meaningful action representations emerges when a multi-task policy network takes as inputs both states and task embeddings. Moderate constraints are added to improve its representation ability. Therefore, interpolated or composed embeddings can function as a high-level interface within this space, providing instructions to the agent for executing meaningful action sequences. Empirical results demonstrate that the proposed action representations are effective for intra-action interpolation and inter-action composition with limited or no additional learning. Furthermore, our approach exhibits superior task adaptation ability compared to strong baselines in Mujoco locomotion tasks. Our work sheds light on the promising direction of learning action representations for efficient, adaptable, and composable RL, forming the basis of abstract action planning and the understanding of motor signal space. Project page: https://sites.google.com/view/emergent-action-representation/", "keywords": "action representation;reinforcement learning;representation learning", "primary_area": "", "supplementary_material": "", "author": "Pu Hua;Yubei Chen;Huazhe Xu", "authorids": "~Pu_Hua1;~Yubei_Chen1;~Huazhe_Xu1", "gender": "M;M;M", "homepage": "https://piao-0429.github.io/;https://redwood.berkeley.edu/people/yubei-chen/;http://hxu.rocks", "dblp": "331/5335;30/10064;164/9006", "google_scholar": "https://scholar.google.com/citations?hl=en;WeyLqFUAAAAJ;t9HPFawAAAAJ", "orcid": "0009-0008-1301-7131;;", "linkedin": "https://www.linkedin.cn/incareer/in/pu-hua-315462215;yubei-chen-05998a39/;", "or_profile": "~Pu_Hua1;~Yubei_Chen1;~Huazhe_Xu1", "aff": "Electronic Engineering, Tsinghua University;New York University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;nyu.edu;tsinghua.edu.cn", "position": "Undergrad student;Postdoctoral Associate;Assistant Professor", "bibtex": "@inproceedings{\nhua2023simple,\ntitle={Simple Emergent Action Representations from Multi-Task Policy Training},\nauthor={Pu Hua and Yubei Chen and Huazhe Xu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=NUl0ylt7SM}\n}", "github": "", "project": "", "reviewers": "VRfP;oCDb;YSuL;MYUq", "pdf_size": 6871087, "recommendation": "5;5;6;6", "confidence": "4;3;3;3", "correctness": "4;2;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;4;3", "wc_summary_paper": "111;112;129;61", "wc_strength_and_weaknesses": "142;554;491;131", "wc_clarity_quality_novelty_and_reproducibility": "33;46;60;54", "wc_summary_review": "61;107;21;27", "wc_review": "347;819;701;273", "wc_reply_reviewers": "0;329;0;0", "wc_reply_authors": "487;1209;1185;342", "reply_reviewers": "0;2;0;0", "reply_authors": "2;3;3;2", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 103.25, 25.420218331084413 ], "wc_strength_and_weaknesses_avg": [ 329.5, 194.31996809386317 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.25, 10.108783309577865 ], "wc_summary_review_avg": [ 54.0, 34.19064199455752 ], "wc_review_avg": [ 535.0, 230.32585612562042 ], "wc_reply_reviewers_avg": [ 82.25, 142.46117892254017 ], "wc_reply_authors_avg": [ 805.75, 394.6855552208619 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3309569235868270176&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=NUl0ylt7SM", "email": "mails.tsinghua.edu.cn;nyu.edu;tsinghua.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Tsinghua University;New York University", "aff_unique_dep": "Electronic Engineering;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.nyu.edu", "aff_unique_abbr": "THU;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "title": "LilNetX: Lightweight Networks with EXtreme Model Compression and Structured Sparsification", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11516", "id": "NVZvalzCLg", "poster": "/media/PosterPDFs/ICLR%202023/11516.png?t=1682373078.3479552", "openreview": "https://openreview.net/forum?id=NVZvalzCLg", "slides": "https://iclr.cc/virtual/2023/poster/11516", "video": "https://iclr.cc/virtual/2023/poster/11516", "author_site": "Sharath Girish, Kamal Gupta, Saurabh Singh, Abhinav Shrivastava", "tldr": "", "abstract": "We introduce LilNetX, an end-to-end trainable technique for neural networks that enables learning models with specified accuracy-rate-computation trade-off. Prior works approach these problems one at a time and often require post-processing or multistage training which become less practical and do not scale very well for large datasets or architectures. Our method constructs a joint training objective that penalizes the self information of network parameters in a latent representation space to encourage small model size while also introducing priors to increase structured sparsity in the parameter space to reduce computation. When compared with existing state-of-the-art model compression methods, we achieve up to 50% smaller model size and 98% model sparsity on ResNet-20 on the CIFAR-10 dataset as well as 37% smaller model size and 71% structured sparsity on ResNet-50 trained on ImageNet while retaining the same accuracy as those methods. We show that the resulting sparsity can improve the inference time of the models by almost 1.8 times the dense ResNet-50 baseline model. Code is available at https://github.com/Sharath-girish/LilNetX.\n", "keywords": "Quantization;Model Compression;Sparsity;Pruning", "primary_area": "", "supplementary_material": "/attachment/da813a4ada480792e8b1a1db8e63866f3d99607f.zip", "author": "Sharath Girish;Kamal Gupta;Saurabh Singh;Abhinav Shrivastava", "authorids": "~Sharath_Girish1;~Kamal_Gupta1;~Saurabh_Singh1;~Abhinav_Shrivastava2", "gender": ";;M;M", "homepage": "https://sharath-girish.github.io/;https://kampta.github.io;http://www.saurabhsingh.info;http://abhinavsh.info", "dblp": "232/3030;;75/5436-5;65/10572", "google_scholar": "KRB9iksAAAAJ;tC3td8cAAAAJ;L7fTK1MAAAAJ;mIF9BowAAAAJ", "orcid": "0000-0003-4364-0262;;;0000-0001-8928-8554", "linkedin": ";kamalgupta09;;", "or_profile": "~Sharath_Girish1;~Kamal_Gupta1;~Saurabh_Singh1;~Abhinav_Shrivastava2", "aff": "University of Maryland, College Park;University of Maryland, College Park;Google;Department of Computer Science, University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;google.com;cs.umd.edu", "position": "PhD student;PhD student;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\ngirish2023lilnetx,\ntitle={LilNetX: Lightweight Networks with {EX}treme Model Compression and Structured Sparsification},\nauthor={Sharath Girish and Kamal Gupta and Saurabh Singh and Abhinav Shrivastava},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=NVZvalzCLg}\n}", "github": "", "project": "", "reviewers": "1dzT;HHQc;TxzT;gqW8", "pdf_size": 1023077, "recommendation": "6;6;6;8", "confidence": "4;4;4;4", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "39;83;45;34", "wc_strength_and_weaknesses": "160;250;73;203", "wc_clarity_quality_novelty_and_reproducibility": "6;82;1;55", "wc_summary_review": "11;68;1;23", "wc_review": "216;483;120;315", "wc_reply_reviewers": "0;57;0;17", "wc_reply_authors": "794;781;232;497", "reply_reviewers": "0;1;0;1", "reply_authors": "2;3;1;2", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 50.25, 19.30511590226798 ], "wc_strength_and_weaknesses_avg": [ 171.5, 65.17092910186258 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.0, 33.91902121229326 ], "wc_summary_review_avg": [ 25.75, 25.606395685453272 ], "wc_review_avg": [ 283.5, 134.23952473098228 ], "wc_reply_reviewers_avg": [ 18.5, 23.286262044390035 ], "wc_reply_authors_avg": [ 576.0, 231.3687532922283 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12963842915124125326&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=NVZvalzCLg", "email": "umd.edu;umd.edu;google.com;cs.umd.edu", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Maryland;Google;University of Maryland, College Park", "aff_unique_dep": ";Google;Department of Computer Science", "aff_unique_url": "https://www/umd.edu;https://www.google.com;https://www/umd.edu", "aff_unique_abbr": "UMD;Google;UMD", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "College Park;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "NWZOL5kZv6", "title": "Analyzing adversarial robustness of vision transformers against spatial and spectral attacks", "track": "main", "status": "Withdraw", "tldr": "We discover that Transformers are vulnerable to adversarial attacks perturbing phase information of images in the frequency domain.", "abstract": "Vision Transformers have emerged as a powerful architecture that can outperform convolutional neural networks (CNNs) in image classification tasks. Several attempts have been made to understand robustness of Transformers against adversarial attacks, but existing studies draw inconsistent results, i.e., some conclude that Transformers are more robust than CNNs, while some others find that they have similar degrees of robustness. In this paper, we address two issues unexplored in the existing studies examining adversarial robustness of Transformers. First, we argue that the image quality should be simultaneously considered in evaluating adversarial robustness. We find that the superiority of one architecture to another in terms of robustness can change depending on the attack strength expressed by the quality of the attacked images. Second, by noting that Transformers and CNNs rely on different types of information in images, we formulate an attack framework as a tool for implementing flexible attacks, where an image can be attacked in the spectral domain as well as in the spatial domain. This attack perturbs the magnitude and phase information of particular frequency components selectively. Through extensive experiments, we find that Transformers tend to rely more on phase information and low frequency information than CNNs, and thus sometimes they are even more vulnerable under frequency-selective attacks. It is our hope that this work provides new perspectives in understanding the properties and adversarial robustness of Transformers.", "keywords": "Transformers;adversarial attack;Fourier transform", "primary_area": "", "supplementary_material": "/attachment/daa56b624e6f3ad32ef8cedbe00395d97cd2cc96.zip", "author": "Gihyun Kim;Jong-Seok Lee", "authorids": "kkh9314@yonsei.ac.kr;~Jong-Seok_Lee1", "gender": ";", "homepage": ";http://mcml.yonsei.ac.kr", "dblp": ";70/1152", "google_scholar": ";YGwwt6cAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "kkh9314@yonsei.ac.kr;~Jong-Seok_Lee1", "aff": ";Yonsei University", "aff_domain": ";yonsei.ac.kr", "position": ";Full Professor", "bibtex": "@misc{\nkim2023analyzing,\ntitle={Analyzing adversarial robustness of vision transformers against spatial and spectral attacks},\nauthor={Gihyun Kim and Jong-Seok Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=NWZOL5kZv6}\n}", "github": "", "project": "", "reviewers": "VUPr;nnZM;zagK", "site": "https://openreview.net/forum?id=NWZOL5kZv6", "pdf_size": 3849810, "recommendation": "3;3;5", "confidence": "4;4;4", "correctness": "3;2;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;2", "wc_summary_paper": "95;100;56", "wc_strength_and_weaknesses": "304;498;174", "wc_clarity_quality_novelty_and_reproducibility": "183;17;15", "wc_summary_review": "69;48;34", "wc_review": "651;663;279", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 83.66666666666667, 19.669491322575904 ], "wc_strength_and_weaknesses_avg": [ 325.3333333333333, 133.12984472144313 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 71.66666666666667, 78.72878903058412 ], "wc_summary_review_avg": [ 50.333333333333336, 14.383632673594278 ], "wc_review_avg": [ 531.0, 178.25823964125755 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6971847816688210945&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Yonsei University", "aff_unique_dep": "", "aff_unique_url": "https://www.yonsei.ac.kr", "aff_unique_abbr": "Yonsei", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "id": "NYjXrU_f20G", "title": "Adversarial Attack Detection Through Network Transport Dynamics", "track": "main", "status": "Reject", "tldr": "We propose a detector of adversarial attacks inspired by the dynamic viewpoint of neural networks and a regularization that improves detection of adversarial attacks and test accuracy.", "abstract": "Adversarial attacks are perturbations to the input that don't change its class for a human observer, but fool a neural network into changing its prediction. In this paper, we propose a detector of such attacks that is based on the view of residual networks as discrete dynamical systems. The detector tells clean inputs from abnormal ones by comparing the discrete vector fields they follow throughout the network's layers before the final classification layer. We compare this detector favorably to other detectors on seen and unseen attacks. We also show that regularizing this vector field during training makes the network more regular on the data distribution's support, thus making the network's activations on clean samples more distinguishable from those of abnormal samples. This regularization of the network's dynamics improves the performance of any detection method that uses the internal embeddings as inputs, while also improving the network's test accuracy.", "keywords": "Adversarial Attacks;Deep Learning;Optimal Transport;Residual Networks;Regularization", "primary_area": "", "supplementary_material": "/attachment/57427f5afe6f5a48952e0620e1a3e543f766456f.zip", "author": "Skander Karkar;patrick gallinari;Alain Rakotomamonjy", "authorids": "~Skander_Karkar1;~patrick_gallinari1;~Alain_Rakotomamonjy1", "gender": "M;M;", "homepage": "https://github.com/skander-karkar;;", "dblp": "274/7334.html;g/PatrickGallinari;", "google_scholar": "QfeddpUAAAAJ;rFaxB20AAAAJ;", "orcid": ";;", "linkedin": "skander-karkar-8b11b2266/;;", "or_profile": "~Skander_Karkar1;~patrick_gallinari1;~Alain_Rakotomamonjy1", "aff": "Sorbonne University;Sorbonne Universite;", "aff_domain": "sorbonne-universite.fr;sorbonne-universite.fr;", "position": "PhD student;Full Professor;", "bibtex": "@misc{\nkarkar2023adversarial,\ntitle={Adversarial Attack Detection Through Network Transport Dynamics},\nauthor={Skander Karkar and patrick gallinari and Alain Rakotomamonjy},\nyear={2023},\nurl={https://openreview.net/forum?id=NYjXrU_f20G}\n}", "github": "", "project": "", "reviewers": "jGWF;J6gT;Xhhn", "site": "https://openreview.net/forum?id=NYjXrU_f20G", "pdf_size": 605400, "recommendation": "5;5;8", "confidence": "3;2;5", "correctness": "3;3;4", "technical_novelty": "3;3;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "77;60;167", "wc_strength_and_weaknesses": "213;266;70", "wc_clarity_quality_novelty_and_reproducibility": "29;30;101", "wc_summary_review": "52;29;1242", "wc_review": "371;385;1580", "wc_reply_reviewers": "0;0;273", "wc_reply_authors": "1189;1663;1276", "reply_reviewers": "0;0;3", "reply_authors": "2;2;3", "recommendation_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 101.33333333333333, 46.949145063805176 ], "wc_strength_and_weaknesses_avg": [ 183.0, 82.7808351411525 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 53.333333333333336, 33.70789554721894 ], "wc_summary_review_avg": [ 441.0, 566.4703581535989 ], "wc_review_avg": [ 778.6666666666666, 566.6570587420774 ], "wc_reply_reviewers_avg": [ 91.0, 128.69343417595164 ], "wc_reply_authors_avg": [ 1376.0, 206.0242704149198 ], "reply_reviewers_avg": [ 1.0, 1.4142135623730951 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.9449111825230683, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YtYtLZrQC9wJ:scholar.google.com/&scioq=Adversarial+Attack+Detection+Through+Network+Transport+Dynamics&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Sorbonne University", "aff_unique_dep": "", "aff_unique_url": "https://www.sorbonne.universite.fr", "aff_unique_abbr": "Sorbonne", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "id": "NYtq-CsRP3H", "title": "Parameter Averaging for Feature Ranking", "track": "main", "status": "Reject", "tldr": "In this work, we introduce a novel method based on parameter averaging to estimate accurate and robust feature importance in tabular data setting, referred as XTab.", "abstract": "Neural Networks are known to be sensitive to initialisation. The methods that rely on neural networks for feature ranking are not robust since they can have variations in their ranking when the model is initialized and trained with different random seeds. In this work, we introduce a novel method based on parameter averaging to estimate accurate and robust feature importance in tabular data setting, referred as XTab. We first initialize and train multiple instances of a shallow network (referred as local masks) with \"different random seeds\" for a downstream task. We then obtain a global mask model by \"averaging the parameters\" of local masks. We show that although the parameter averaging might result in a global model with higher loss, it still leads to the discovery of the ground-truth feature importance more consistently than an individual model does. We conduct extensive experiments on a variety of synthetic and real-world data, demonstrating that the XTab can be used to obtain the global feature importance that is not sensitive to sub-optimal model initialisation.", "keywords": "Parameter averaging;feature ranking;feature importance;robustness;interpretability;tabular data", "primary_area": "", "supplementary_material": "/attachment/d08f41d61e5d997d9116b0651ed2677063794ed8.zip", "author": "Talip Ucar;Ehsan Hajiramezanali", "authorids": "~Talip_Ucar2;~Ehsan_Hajiramezanali1", "gender": ";M", "homepage": ";http://ehsanhajiramezanali.github.io/", "dblp": ";225/3486", "google_scholar": ";20I_DMoAAAAJ", "orcid": ";", "linkedin": ";ehsan-hajiramezanali-978a3b52/", "or_profile": "~Talip_Ucar2;~Ehsan_Hajiramezanali1", "aff": ";Genentech", "aff_domain": ";gene.come", "position": ";Principal Researcher", "bibtex": "@misc{\nucar2023parameter,\ntitle={Parameter Averaging for Feature Ranking},\nauthor={Talip Ucar and Ehsan Hajiramezanali},\nyear={2023},\nurl={https://openreview.net/forum?id=NYtq-CsRP3H}\n}", "github": "", "project": "", "reviewers": "ZHfL;iffa;brop;G9vP", "site": "https://openreview.net/forum?id=NYtq-CsRP3H", "pdf_size": 3704935, "recommendation": "3;5;5;5", "confidence": "3;4;3;3", "correctness": "2;2;4;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "43;104;47;96", "wc_strength_and_weaknesses": "167;61;245;14", "wc_clarity_quality_novelty_and_reproducibility": "85;460;20;10", "wc_summary_review": "30;25;53;10", "wc_review": "325;650;365;130", "wc_reply_reviewers": "0;83;0;0", "wc_reply_authors": "692;1851;754;472", "reply_reviewers": "0;1;0;0", "reply_authors": "1;4;1;1", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 72.5, 27.681221071332818 ], "wc_strength_and_weaknesses_avg": [ 121.75, 90.19250246001604 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 143.75, 184.84368396025872 ], "wc_summary_review_avg": [ 29.5, 15.435349040433131 ], "wc_review_avg": [ 367.5, 185.75857988259924 ], "wc_reply_reviewers_avg": [ 20.75, 35.94005425705421 ], "wc_reply_authors_avg": [ 942.25, 535.0291464023245 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6NBfz7Y479sJ:scholar.google.com/&scioq=Parameter+Averaging+for+Feature+Ranking&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Genentech", "aff_unique_dep": "", "aff_unique_url": "https://www.genentech.com", "aff_unique_abbr": "Genentech", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "NZ8Gb5GOrRu", "title": "Deep Power Laws for Hyperparameter Optimization", "track": "main", "status": "Reject", "tldr": "Multi-fidelity hyperparameter optimization with deep power laws that achieves state-of-the-art results across diverse benchmarks.", "abstract": "Hyperparameter optimization is an important subfield of machine learning that focuses on tuning the hyperparameters of a chosen algorithm to achieve peak performance. Recently, there has been a stream of methods that tackle the issue of hyperparameter optimization, however, most of the methods do not exploit the scaling law property of learning curves. In this work, we propose Deep Power Law (DPL), a neural network model conditioned to yield predictions that follow a power-law scaling pattern. Our model dynamically decides which configurations to pause and train incrementally by making use of multi-fidelity estimation. We compare our method against 7 state-of-the-art competitors on 3 benchmarks related to tabular, image, and NLP datasets covering 59 diverse search spaces. Our method achieves the best results across all benchmarks by obtaining the best any-time results compared to all competitors.", "keywords": "hyperparameter optimization;multi-fidelity optimization;power laws;deep neural networks;deep power laws.", "primary_area": "", "supplementary_material": "", "author": "Arlind Kadra;Maciej Janowski;Martin Wistuba;Josif Grabocka", "authorids": "~Arlind_Kadra1;~Maciej_Janowski1;~Martin_Wistuba1;~Josif_Grabocka1", "gender": "M;M;M;M", "homepage": ";https://github.com/worstseed;;https://www.utn.de/departments/department-engineering/machine-learning-lab/", "dblp": "252/5295;219/8260;https://dblp.uni-trier.de/pers/hd/w/Wistuba:Martin;117/4936", "google_scholar": "bMa0KUcAAAAJ;;https://scholar.google.co.uk/citations?user=pTULHVsAAAAJ;KRy27XcAAAAJ", "orcid": "0000-0001-9308-6576;;;", "linkedin": ";;https://linkedin.com/in/wistuba/;", "or_profile": "~Arlind_Kadra1;~Maciej_Janowski1;~Martin_Wistuba1;~Josif_Grabocka1", "aff": "Universit\u00e4t Freiburg;Albert-Ludwigs-Universit\u00e4t Freiburg;Amazon;Universit\u00e4t Freiburg", "aff_domain": "uni-freiburg.de;uni-freiburg.de;amazon.com;uni-freiburg.de", "position": "PhD student;PhD student;Researcher;Assistant Professor", "bibtex": "@misc{\nkadra2023deep,\ntitle={Deep Power Laws for Hyperparameter Optimization},\nauthor={Arlind Kadra and Maciej Janowski and Martin Wistuba and Josif Grabocka},\nyear={2023},\nurl={https://openreview.net/forum?id=NZ8Gb5GOrRu}\n}", "github": "", "project": "", "reviewers": "KFvH;vKZP;3aYj;KEEA", "site": "https://openreview.net/forum?id=NZ8Gb5GOrRu", "pdf_size": 573903, "recommendation": "3;5;5;6", "confidence": "4;4;3;1", "correctness": "3;2;2;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "48;240;96;73", "wc_strength_and_weaknesses": "142;821;228;128", "wc_clarity_quality_novelty_and_reproducibility": "117;209;65;21", "wc_summary_review": "69;135;38;20", "wc_review": "376;1405;427;242", "wc_reply_reviewers": "119;228;98;0", "wc_reply_authors": "821;1543;856;181", "reply_reviewers": "1;1;1;0", "reply_authors": "2;3;3;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 114.25, 74.55995909333642 ], "wc_strength_and_weaknesses_avg": [ 329.75, 286.19606478776046 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 103.0, 70.0 ], "wc_summary_review_avg": [ 65.5, 43.78641341786285 ], "wc_review_avg": [ 612.5, 462.51189173901247 ], "wc_reply_reviewers_avg": [ 111.25, 80.99807096468409 ], "wc_reply_authors_avg": [ 850.25, 481.8419735763998 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.7492686492653552, "corr_recommendation_correctness": -0.2294157338705618, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12594621753386084057&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Freiburg;Albert-Ludwigs-Universit\u00e4t Freiburg;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.uni-freiburg.de;https://www.uni-freiburg.de;https://www.amazon.com", "aff_unique_abbr": "Uni Freiburg;Albert-Ludwigs-Universit\u00e4t;Amazon", "aff_campus_unique_index": "1", "aff_campus_unique": ";Freiburg", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Germany;United States" }, { "id": "NZZoABNZECq", "title": "Mechanistic Mode Connectivity", "track": "main", "status": "Reject", "tldr": "", "abstract": "With the rise of pretrained models, fine-tuning has become of central importance in deep learning. However, unlike retraining from scratch, fine-tuning can fail to qualitatively change the behavior of a pre-trained network. For instance, we find in practice that naive fine-tuning does not eliminate a model\u2019s sensitivity to spurious features. To understand and address this limitation, we study the geometry of neural network loss landscapes through the lens of mode-connectivity. Our work addresses two questions about mode-connectivity: 1) Are models trained on different data distributions mode-connected? 2) Can we fine tune a pre-trained model to switch modes? We define a notion of mechanistic mode-connectivity, and find that only models that already share the same invariances (which we call \u201cmechanistically similar\u201d) are mechanistically mode-connected. We hypothesize this property explains inability of naive fine-tuning methods to induce invariance to spurious features. Based on our analysis, we propose and validate a method of \u201cmechanistic fine-tuning\u201d called connectivity-based fine-tuning (CBFT)", "keywords": "Loss landscapes;Mechanisms;Mode Connectivity", "primary_area": "", "supplementary_material": "/attachment/36a3bcce238490edcc73ff967d3c6bb10d09dc86.zip", "author": "Ekdeep Singh Lubana;Eric J Bigelow;Robert P. Dick;David Krueger;Hidenori Tanaka", "authorids": "~Ekdeep_Singh_Lubana1;~Eric_J_Bigelow1;~Robert_P._Dick1;~David_Krueger1;~Hidenori_Tanaka1", "gender": "M;;M;M;", "homepage": "https://ekdeepslubana.github.io/;;http://robertdick.org/;https://mila.umontreal.ca/en/person/david-scott-krueger/;https://sites.google.com/view/htanaka/home", "dblp": "228/2683;;84/523.html;142/2741.html;", "google_scholar": "https://scholar.google.co.in/citations?user=OP7S3vsAAAAJ;;;https://scholar.google.ca/citations?user=5Uz70IoAAAAJ;f_pWOGIAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Ekdeep_Singh_Lubana1;~Eric_J_Bigelow1;~Robert_P._Dick1;~David_Krueger1;~Hidenori_Tanaka1", "aff": "University of Michigan;;University of Michigan;University of Cambridge;Physics & Informatics Lab, NTT Research, Inc.", "aff_domain": "umich.edu;;umich.edu;cam.ac.uk;ntt-research.com", "position": "PhD student;;Full Professor;Assistant Professor;Senior Research Scientist", "bibtex": "@misc{\nlubana2023mechanistic,\ntitle={Mechanistic Mode Connectivity},\nauthor={Ekdeep Singh Lubana and Eric J Bigelow and Robert P. Dick and David Krueger and Hidenori Tanaka},\nyear={2023},\nurl={https://openreview.net/forum?id=NZZoABNZECq}\n}", "github": "", "project": "", "reviewers": "X6v6;Y4FF;iiL3;bN6r;fq5i", "site": "https://openreview.net/forum?id=NZZoABNZECq", "pdf_size": 5743017, "recommendation": "5;6;6;6;6", "confidence": "3;3;4;2;3", "correctness": "2;3;3;3;3", "technical_novelty": "3;3;3;3;3", "empirical_novelty": "3;3;3;3;3", "wc_summary_paper": "63;54;100;47;100", "wc_strength_and_weaknesses": "528;223;430;98;187", "wc_clarity_quality_novelty_and_reproducibility": "89;39;43;14;91", "wc_summary_review": "58;33;79;20;80", "wc_review": "738;349;652;179;458", "wc_reply_reviewers": "178;11;0;0;0", "wc_reply_authors": "2501;492;1871;697;1091", "reply_reviewers": "1;1;0;0;0", "reply_authors": "5;1;4;2;3", "recommendation_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "correctness_avg": [ 2.8, 0.39999999999999997 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 72.8, 22.7806935803105 ], "wc_strength_and_weaknesses_avg": [ 293.2, 160.09672076591698 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.2, 30.109134826494103 ], "wc_summary_review_avg": [ 54.0, 24.14125100321025 ], "wc_review_avg": [ 475.2, 202.12807820785315 ], "wc_reply_reviewers_avg": [ 37.8, 70.22933859862273 ], "wc_reply_authors_avg": [ 1330.4, 751.5284691879609 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5083555174805166145&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Michigan;University of Cambridge;NTT Research, Inc.", "aff_unique_dep": ";;Physics & Informatics Lab", "aff_unique_url": "https://www.umich.edu;https://www.cam.ac.uk;https://www.ntt-research.com", "aff_unique_abbr": "UM;Cambridge;NTT Research", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Human-Guided Fair Classification for Natural Language Processing", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10687", "id": "N_g8TT9Cy7f", "poster": "/media/PosterPDFs/ICLR%202023/10687.png?t=1682260389.2606275", "openreview": "https://openreview.net/forum?id=N_g8TT9Cy7f", "slides": "https://iclr.cc/virtual/2023/poster/10687", "video": "https://iclr.cc/virtual/2023/poster/10687", "author_site": "Florian Eddie Dorner, Momchil Peychev, Nikola Konstantinov, Naman Goel, Elliott Ash, Martin Vechev", "tldr": "We provide new methods for generating individual fairness specifications for NLP based on LLMs and validate them in a human study. ", "abstract": "Text classifiers have promising applications in high-stake tasks such as resume screening and content moderation. These classifiers must be fair and avoid discriminatory decisions by being invariant to perturbations of sensitive attributes such as gender or ethnicity. However, there is a gap between human intuition about these perturbations and the formal similarity specifications capturing them. While existing research has started to address this gap, current methods are based on hardcoded word replacements, resulting in specifications with limited expressivity or ones that fail to fully align with human intuition (e.g., in cases of asymmetric counterfactuals). This work proposes novel methods for bridging this gap by discovering expressive and intuitive individual fairness specifications. We show how to leverage unsupervised style transfer and GPT-3's zero-shot capabilities to automatically generate expressive candidate pairs of semantically similar sentences that differ along sensitive attributes. We then validate the generated pairs via an extensive crowdsourcing study, which confirms that a lot of these pairs align with human intuition about fairness in the context of toxicity classification. Finally, we show how limited amounts of human feedback can be leveraged to learn a similarity specification that can be used to train downstream fairness-aware models. ", "keywords": "Individual Fairness;Style Transfer;NLP;Crowdsourcing;Human Evaluation", "primary_area": "", "supplementary_material": "", "author": "Florian E. Dorner;Momchil Peychev;Nikola Konstantinov;Naman Goel;Elliott Ash;Martin Vechev", "authorids": "~Florian_E._Dorner1;~Momchil_Peychev1;~Nikola_Konstantinov1;~Naman_Goel1;~Elliott_Ash1;~Martin_Vechev1", "gender": "M;M;M;M;;M", "homepage": "https://flodorner.github.io/;https://www.sri.inf.ethz.ch/people/momchil;https://nikolakon.github.io/;http://goelnaman.github.io;https://elliottash.com;https://www.sri.inf.ethz.ch/people/martin", "dblp": "285/5327;210/2351;217/1964;163/3862;271/7737;93/2189.html", "google_scholar": "aYHq31IAAAAJ;RuhLJ8oAAAAJ;https://scholar.google.at/citations?user=0_lvOo8AAAAJ;;o5uDfHMAAAAJ;https://scholar.google.ch/citations?user=aZ1Rh50AAAAJ", "orcid": ";0000-0003-0927-6356;;;0000-0002-6817-7529;", "linkedin": "florian-dorner-242b48172/;;;;;", "or_profile": "~Florian_E._Dorner1;~Momchil_Peychev1;~Nikola_Konstantinov1;~Naman_Goel1;~Elliott_Ash1;~Martin_Vechev1", "aff": "ETHZ - ETH Zurich;ETH Zurich;ETHZ - ETH Zurich;University of Oxford;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;ethz.ch;ethz.ch;oxford.ac.uk;ethz.ch;ethz.ch", "position": "PhD student;PhD student;Postdoc;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ndorner2023humanguided,\ntitle={Human-Guided Fair Classification for Natural Language Processing},\nauthor={Florian E. Dorner and Momchil Peychev and Nikola Konstantinov and Naman Goel and Elliott Ash and Martin Vechev},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=N_g8TT9Cy7f}\n}", "github": "", "project": "", "reviewers": "Y9Ec;cacL;6aFV;Cvse", "pdf_size": 364213, "recommendation": "6;8;8;8", "confidence": "4;4;4;3", "correctness": "3;3;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "106;73;59;226", "wc_strength_and_weaknesses": "57;100;225;230", "wc_clarity_quality_novelty_and_reproducibility": "283;336;69;11", "wc_summary_review": "34;23;29;37", "wc_review": "480;532;382;504", "wc_reply_reviewers": "0;10;24;0", "wc_reply_authors": "1258;807;898;381", "reply_reviewers": "0;1;1;0", "reply_authors": "2;2;2;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 116.0, 65.76093065034893 ], "wc_strength_and_weaknesses_avg": [ 153.0, 76.05590049430748 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 174.75, 137.583383807784 ], "wc_summary_review_avg": [ 30.75, 5.3091901453988255 ], "wc_review_avg": [ 474.5, 56.48672410398748 ], "wc_reply_reviewers_avg": [ 8.5, 9.836157786453 ], "wc_reply_authors_avg": [ 836.0, 312.1674230280924 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10789049393003528377&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=N_g8TT9Cy7f", "email": "ethz.ch;ethz.ch;ethz.ch;oxford.ac.uk;ethz.ch;ethz.ch", "author_num": 6, "aff_unique_index": "0;0;0;1;2;2", "aff_unique_norm": "ETH Zurich;University of Oxford;Swiss Federal Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ethz.ch;https://www.ox.ac.uk;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;Oxford;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "Switzerland;United Kingdom" }, { "id": "Nae2_YHH-24", "title": "Improving Translation Capabilities of Pre-Trained Multilingual Sequence-to-Sequence Models for Low-Resource Languages", "track": "main", "status": "Withdraw", "tldr": "Empirical experiments on data and techniques for leveraging Pre-Trained Multilingual Sequence-to-Sequence Models for Low-Resource language translation", "abstract": "Performance of Pre-trained Multilingual Sequence-to-Sequence (PMSS) models for translation heavily depends on the amount of monolingual data used in model pre-training. Thus these models under-perform for low-resource languages included in the model, even more for the languages unseen by the model. In this paper, we focus on the domain-specific translation of low-resource language (LRL) pairs. For a given domain-specific translation task, we investigate the most effective way of utilizing parallel data from auxiliary domains. the possibility of leveraging the available bitext to improve translation capabilities of PMSS models for low-resource languages. We experiment with several Transfer Learning protocols, considering the domain divergence of the available data.} %Therefore, to determine whether pre-trained multilingual sequence-to-sequence (PMSS) models can be leveraged for low-resource language (LRL) translation, we conducted a large-scale empirical experiment. We systematically studied transfer learning frameworks under a breadth of data scenarios under for LRLs in a non-English-centric manner. \n", "keywords": "Pre-Trained Multilingual Sequence-to-Sequence Model;Machine Translation;Low Resource Languages;Empirical Experiments", "primary_area": "", "supplementary_material": "", "author": "En-Shiun Annie Lee", "authorids": "~En-Shiun_Annie_Lee1", "gender": "F", "homepage": "https://www.cs.toronto.edu/~ealee/public/", "dblp": "10/8510.html", "google_scholar": "H84vuJ0AAAAJ", "orcid": "0000-0003-4592-3522", "linkedin": "drannielee/", "or_profile": "~En-Shiun_Annie_Lee1", "aff": "University of Toronto", "aff_domain": "utoronto.ca", "position": "Assistant Professor", "bibtex": "@misc{\nlee2023improving,\ntitle={Improving Translation Capabilities of Pre-Trained Multilingual Sequence-to-Sequence Models for Low-Resource Languages},\nauthor={En-Shiun Annie Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=Nae2_YHH-24}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=Nae2_YHH-24", "pdf_size": 868426, "recommendation": "", "confidence": "", "correctness": "", "technical_novelty": "", "empirical_novelty": "", "wc_summary_paper": "", "wc_strength_and_weaknesses": "", "wc_clarity_quality_novelty_and_reproducibility": "", "wc_summary_review": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "recommendation_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "correctness_avg": [ 0, 0 ], "technical_novelty_avg": [ 0, 0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 0, 0 ], "wc_strength_and_weaknesses_avg": [ 0, 0 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 0, 0 ], "wc_summary_review_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 1, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0, "corr_recommendation_correctness": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jUMqRYCECkQJ:scholar.google.com/&scioq=Improving+Translation+Capabilities+of+Pre-Trained+Multilingual+Sequence-to-Sequence+Models+for+Low-Resource+Languages&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "Diffusion-based Image Translation using disentangled style and content representation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10846", "id": "Nayau9fwXU", "poster": "/media/PosterPDFs/ICLR%202023/10846.png?t=1681019271.395157", "openreview": "https://openreview.net/forum?id=Nayau9fwXU", "slides": "https://iclr.cc/virtual/2023/poster/10846", "video": "https://iclr.cc/virtual/2023/poster/10846", "author_site": "Gihyun Kwon, Jong Ye", "tldr": "We propose a new method which enables image translation using Denoising Diffusion Probabilistic Model.", "abstract": "Diffusion-based image translation guided by semantic texts or a single target image has enabled flexible style transfer which is not limited to the specific domains. \nUnfortunately, due to the stochastic nature of diffusion models, it is often difficult to maintain the original content of the image during the reverse diffusion.\nTo address this, here we present a novel diffusion-based unsupervised image translation method, dubbed as DiffuseIT, using disentangled style and content representation.\n Specifically, inspired by the slicing Vision Transformer, we extract intermediate keys of multihead self attention layer from ViT model and used them as the content preservation loss. Then, an image guided style transfer is performed by matching the [CLS] classification token from the denoised samples and target image, whereas additional CLIP loss is used for the text-driven style transfer.\n To further accelerate the semantic change during the reverse diffusion, we also propose a novel semantic divergence loss and resampling strategy. \n Our experimental results show that the proposed method outperforms state-of-the-art baseline models in both text-guided and image-guided translation tasks. ", "keywords": "DDPM;CLIP;Image Translation;ViT", "primary_area": "", "supplementary_material": "", "author": "Gihyun Kwon;Jong Chul Ye", "authorids": "~Gihyun_Kwon1;~Jong_Chul_Ye1", "gender": "M;M", "homepage": "https://sites.google.com/view/gihyunkwon;https://bispl.weebly.com/", "dblp": "241/7060;15/5613", "google_scholar": "yexbg8gAAAAJ;HNMjoNEAAAAJ", "orcid": ";", "linkedin": "gihyun-kwon-b4665a233/;", "or_profile": "~Gihyun_Kwon1;~Jong_Chul_Ye1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nkwon2023diffusionbased,\ntitle={Diffusion-based Image Translation using disentangled style and content representation},\nauthor={Gihyun Kwon and Jong Chul Ye},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Nayau9fwXU}\n}", "github": "", "project": "", "reviewers": "9VMA;swjc;VSVg;CBU4", "pdf_size": 21490611, "recommendation": "6;6;6;8", "confidence": "2;4;5;3", "correctness": "4;4;3;4", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "61;73;128;47", "wc_strength_and_weaknesses": "85;208;394;111", "wc_clarity_quality_novelty_and_reproducibility": "31;44;52;14", "wc_summary_review": "59;24;24;30", "wc_review": "236;349;598;202", "wc_reply_reviewers": "0;63;0;17", "wc_reply_authors": "111;463;819;210", "reply_reviewers": "0;2;0;1", "reply_authors": "2;3;2;2", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 77.25, 30.711357833869865 ], "wc_strength_and_weaknesses_avg": [ 199.5, 121.28994187483148 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.25, 14.376630342329875 ], "wc_summary_review_avg": [ 34.25, 14.49784466739798 ], "wc_review_avg": [ 346.25, 155.2004751925715 ], "wc_reply_reviewers_avg": [ 20.0, 25.777897509300484 ], "wc_reply_authors_avg": [ 400.75, 273.47246205056916 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.2581988897471611, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 173, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15965160042866706564&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=Nayau9fwXU", "email": "kaist.ac.kr;kaist.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Near-optimal Coresets for Robust Clustering", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11273", "id": "Nc1ZkRW8Vde", "poster": "", "openreview": "https://openreview.net/forum?id=Nc1ZkRW8Vde", "slides": "https://iclr.cc/virtual/2023/poster/11273", "video": "https://iclr.cc/virtual/2023/poster/11273", "author_site": "Lingxiao Huang, Shaofeng Jiang, Jianing Lou, Xuan Wu", "tldr": "We obtain an \\epsilon-coreset of near-optimal size for (k, z)-clustering (which includes k-median and k-means) with m outliers", "abstract": "We consider robust clustering problems in $\\mathbb{R}^d$, specifically $k$-clustering problems (e.g., $k$-Median and $k$-Means) with $m$ \\emph{outliers}, where the cost for a given center set $C \\subset \\mathbb{R}^d$ aggregates the distances from $C$ to all but the furthest $m$ data points, instead of all points as in classical clustering. We focus on the $\\epsilon$-coreset for robust clustering, a small proxy of the dataset that preserves the clustering cost within $\\epsilon$-relative error for all center sets. Our main result is an $\\epsilon$-coreset of size $O(m + \\mathrm{poly}(k \\epsilon^{-1}))$ that can be constructed in near-linear time. This significantly improves previous results, which either suffers an exponential dependence on $(m + k)$ [Feldman and Schulman, SODA'12], or has a weaker bi-criteria guarantee [Huang et al., FOCS'18]. Furthermore, we show this dependence in $m$ is nearly-optimal, and the fact that it is isolated from other factors may be crucial for dealing with large number of outliers. We construct our coresets by adapting to the outlier setting a recent framework [Braverman et al., FOCS'22] which was designed for capacity-constrained clustering, overcoming a new challenge that the participating terms in the cost, particularly the excluded $m$ outlier points, are dependent on the center set $C$. We validate our coresets on various datasets, and we observe a superior size-accuracy tradeoff compared with popular baselines including uniform sampling and sensitivity sampling. We also achieve a significant speedup of existing approximation algorithms for robust clustering using our coresets.", "keywords": "clustering;outlier;robustness;coreset", "primary_area": "", "supplementary_material": "/attachment/bf00513be771e6dd37ac023ada087c068d55b321.zip", "author": "Lingxiao Huang;Shaofeng H.-C. Jiang;Jianing Lou;Xuan Wu", "authorids": "~Lingxiao_Huang2;~Shaofeng_H.-C._Jiang1;~Jianing_Lou1;~Xuan_Wu2", "gender": "M;M;M;M", "homepage": "https://sites.google.com/site/lingxiaohuang1990;https://shaofengjiang.cn;https://jianinglou.github.io/;", "dblp": "119/4814.html;157/6062;304/2105;54/2088-2", "google_scholar": ";;;lmvViv8AAAAJ", "orcid": ";0000-0001-7972-827X;;", "linkedin": ";;;", "or_profile": "~Lingxiao_Huang2;~Shaofeng_H.-C._Jiang1;~Jianing_Lou1;~Xuan_Wu2", "aff": "Nanjing University;Peking University;School of EECS, Peking University;Huawei Technologies Ltd.", "aff_domain": "nju.edu.cn;pku.edu.cn;pku.edu.cn;huawei.com", "position": "Associate Professor;Assistant Professor;Undergrad student;Researcher", "bibtex": "@inproceedings{\nhuang2023nearoptimal,\ntitle={Near-optimal Coresets for Robust Clustering},\nauthor={Lingxiao Huang and Shaofeng H.-C. Jiang and Jianing Lou and Xuan Wu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Nc1ZkRW8Vde}\n}", "github": "", "project": "", "reviewers": "qzTd;7BQh;1Ghq;A4Ec", "pdf_size": 585027, "recommendation": "8;8;8;8", "confidence": "3;4;3;4", "correctness": "4;4;4;4", "technical_novelty": "3;4;4;3", "empirical_novelty": "3;3;4;2", "wc_summary_paper": "68;188;253;75", "wc_strength_and_weaknesses": "91;55;197;176", "wc_clarity_quality_novelty_and_reproducibility": "32;10;38;280", "wc_summary_review": "44;17;30;60", "wc_review": "235;270;518;591", "wc_reply_reviewers": "0;0;0;36", "wc_reply_authors": "259;75;381;1293", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;3", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 146.0, 78.00320506235626 ], "wc_strength_and_weaknesses_avg": [ 129.75, 58.63179598136151 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 90.0, 110.19074371289088 ], "wc_summary_review_avg": [ 37.75, 16.005858302509115 ], "wc_review_avg": [ 403.5, 153.68880896148553 ], "wc_reply_reviewers_avg": [ 9.0, 15.588457268119896 ], "wc_reply_authors_avg": [ 502.0, 469.4944089123959 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3430178802402567709&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=Nc1ZkRW8Vde", "email": "nju.edu.cn;pku.edu.cn;pku.edu.cn;huawei.com", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Nanjing University;Peking University;Huawei", "aff_unique_dep": ";;Huawei Technologies", "aff_unique_url": "https://www.nju.edu.cn;http://www.pku.edu.cn;https://www.huawei.com", "aff_unique_abbr": "Nanjing U;Peking U;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "NdFKHCFxXjS", "title": "CAN MACHINE TELL THE DISTORTION DIFFERENCE? A REVERSE ENGINEERING STUDY OF ADVERSARIAL ATTACKS", "track": "main", "status": "Desk Reject", "tldr": "", "abstract": "Deep neural networks have achieved remarkable performance in many areas, including image-related classification tasks. However, various studies have shown that they are vulnerable to adversarial examples \u2013 images that are carefully crafted to fool well-trained deep neural networks by introducing imperceptible perturbations to the original images. To better understand the inherent characteristics of adversarial attacks, we study the features of three common attack families: gradient-based, score-based, and decision-based. In this paper, we demonstrate that given adversarial examples, attacks from different families can be successfully identified with a simple model. To investigate the reason behind it, we further study the perturbation patterns of different attacks with carefully designed experiments. Experimental results on CIFAR10 and Tiny ImageNet confirm the differences of attacks in distortion patterns. ", "keywords": "adversarial learning;reverse engineering;deep learning;neural network", "primary_area": "", "supplementary_material": "", "author": "Xiawei Wang;Yao Li;Cho-Jui Hsieh;Thomas Chun Man Lee", "authorids": "~Xiawei_Wang1;~Yao_Li1;~Cho-Jui_Hsieh1;~Thomas_Chun_Man_Lee1", "gender": "F;F;M;", "homepage": ";https://liyao880.github.io/yaoli/;http://web.cs.ucla.edu/~chohsieh/index.html;", "dblp": ";;14/2770;", "google_scholar": ";bQ6YhCwAAAAJ;Wy89g4IAAAAJ;", "orcid": ";0000-0002-7195-5774;;", "linkedin": "xiawei-wang/;yao-li-b189574a/;;", "or_profile": "~Xiawei_Wang1;~Yao_Li1;~Cho-Jui_Hsieh1;~Thomas_Chun_Man_Lee1", "aff": "University of California, Davis;University of North Carolina, Chapel Hill;Amazon;", "aff_domain": "ucdavis.edu;unc.edu;amazon.com;", "position": "PhD student;Assistant Professor;visiting scholar;", "bibtex": "@misc{\nwang2023can,\ntitle={{CAN} {MACHINE} {TELL} {THE} {DISTORTION} {DIFFERENCE}? A {REVERSE} {ENGINEERING} {STUDY} {OF} {ADVERSARIAL} {ATTACKS}},\nauthor={Xiawei Wang and Yao Li and Cho-Jui Hsieh and Thomas Chun Man Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=NdFKHCFxXjS}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=NdFKHCFxXjS", "pdf_size": 1002328, "recommendation": "", "confidence": "", "correctness": "", "technical_novelty": "", "empirical_novelty": "", "wc_summary_paper": "", "wc_strength_and_weaknesses": "", "wc_clarity_quality_novelty_and_reproducibility": "", "wc_summary_review": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "recommendation_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "correctness_avg": [ 0, 0 ], "technical_novelty_avg": [ 0, 0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 0, 0 ], "wc_strength_and_weaknesses_avg": [ 0, 0 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 0, 0 ], "wc_summary_review_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 1, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0, "corr_recommendation_correctness": 0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=214918482763224633&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Davis;University of North Carolina;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.ucdavis.edu;https://www.unc.edu;https://www.amazon.com", "aff_unique_abbr": "UC Davis;UNC;Amazon", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Davis;Chapel Hill;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning What and Where: Disentangling Location and Identity Tracking Without Supervision", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11146", "id": "NeDc-Ak-H_", "poster": "", "openreview": "https://openreview.net/forum?id=NeDc-Ak-H_", "slides": "https://iclr.cc/virtual/2023/poster/11146", "video": "https://iclr.cc/virtual/2023/poster/11146", "author_site": "Manuel Traub, Sebastian Otte, Tobias Menge, Matthias Karlbauer, Jannik Thuemmel, Martin V. Butz", "tldr": "Loci: an unsupervised disentangled LOCation and Identity tracking system, which excels on the CATER and related object tracking challenges featuring emergent object permanence and stable entity disentanglement via fully unsupervised learning.", "abstract": "Our brain can almost effortlessly decompose visual data streams into background and salient objects. Moreover, it can anticipate object motion and interactions, which are crucial abilities for conceptual planning and reasoning. Recent object reasoning datasets, such as CATER, have revealed fundamental shortcomings of current vision-based AI systems, particularly when targeting explicit object representations, object permanence, and object reasoning. Here we introduce a self-supervised LOCation and Identity tracking system (Loci), which excels on the CATER tracking challenge. Inspired by the dorsal and ventral pathways in the brain, Loci tackles the binding problem by processing separate, slot-wise encodings of 'what' and 'where'. Loci's predictive coding-like processing encourages active error minimization, such that individual slots tend to encode individual objects. Interactions between objects and object dynamics are processed in the disentangled latent space. Truncated backpropagation through time combined with forward eligibility accumulation significantly speeds up learning and improves memory efficiency. Besides exhibiting superior performance in current benchmarks, Loci effectively extracts objects from video streams and separates them into location and Gestalt components. We believe that this separation offers a representation that will facilitate effective planning and reasoning on conceptual levels.", "keywords": "object permanence;CATER;unsupervised learning;binding problem", "primary_area": "", "supplementary_material": "/attachment/f61adfb6b668422424db3778120718242a277e06.zip", "author": "Manuel Traub;Sebastian Otte;Tobias Menge;Matthias Karlbauer;Jannik Thuemmel;Martin V. Butz", "authorids": "~Manuel_Traub1;~Sebastian_Otte1;~Tobias_Menge1;~Matthias_Karlbauer1;~Jannik_Thuemmel1;~Martin_V._Butz2", "gender": "M;;Not Specified;M;;M", "homepage": "https://manuel-traub.de/;;https://There_are_no_urls_of_mine.com;https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/cognitive-modeling/staff/matthias-karlbauer/;;https://cm.inf.uni-tuebingen.de", "dblp": "189/1743;;;;;b/MartinVButz.html", "google_scholar": "wz7mZEMAAAAJ;;;;;https://scholar.google.de/citations?user=dIcpfzAAAAAJ", "orcid": "0000-0003-0897-1701;;;0000-0002-4509-7921;;0000-0002-8120-8537", "linkedin": ";;;;jannik-th%C3%BCmmel-40a45a17a/;martin-butz-85b971150/", "or_profile": "~Manuel_Traub1;~Sebastian_Otte1;~Tobias_Menge1;~Matthias_Karlbauer1;~Jannik_Thuemmel1;~Martin_V._Butz2", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Uppsala University;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University of Tuebingen", "aff_domain": "uni-tuebingen.de;;uni-tuebingen.de;uu.se;uni-tuebingen.de;uni-tuebingen.de", "position": "PhD student;;PhD student;Intern;PhD student;Full Professor", "bibtex": "@inproceedings{\ntraub2023learning,\ntitle={Learning What and Where: Disentangling Location and Identity Tracking Without Supervision},\nauthor={Manuel Traub and Sebastian Otte and Tobias Menge and Matthias Karlbauer and Jannik Thuemmel and Martin V. Butz},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=NeDc-Ak-H_}\n}", "github": "", "project": "", "reviewers": "fFzL;Nkjj;BwtU;vHnv", "pdf_size": 2863979, "recommendation": "6;6;8;8", "confidence": "4;3;3;3", "correctness": "3;4;4;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "0;3;3;3", "wc_summary_paper": "60;62;65;55", "wc_strength_and_weaknesses": "149;211;159;102", "wc_clarity_quality_novelty_and_reproducibility": "337;100;37;38", "wc_summary_review": "82;67;257;35", "wc_review": "628;440;518;230", "wc_reply_reviewers": "104;121;0;29", "wc_reply_authors": "250;284;429;201", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 60.5, 3.640054944640259 ], "wc_strength_and_weaknesses_avg": [ 155.25, 38.71934271136327 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 128.0, 123.3349098998333 ], "wc_summary_review_avg": [ 110.25, 86.40999652817953 ], "wc_review_avg": [ 454.0, 145.55411364849843 ], "wc_reply_reviewers_avg": [ 63.5, 50.42072986381692 ], "wc_reply_authors_avg": [ 291.0, 84.9617561023782 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9443141080888006778&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=NeDc-Ak-H_", "email": "uni-tuebingen.de;;uni-tuebingen.de;uu.se;uni-tuebingen.de;uni-tuebingen.de", "author_num": 6, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;Uppsala University;University of Tuebingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.uu.se;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;UU;Uni T\u00fcbingen", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Germany;Sweden" }, { "id": "NeH20Y8mDvp", "title": "Edge Wasserstein Distance Loss for Oriented Object Detection", "track": "main", "status": "Withdraw", "tldr": "This paper proposes a novel orinted object regression loss", "abstract": "Regression loss design is an essential topic for oriented object detection. Due to the periodicity of the angle and the ambiguity of width and height definition, traditional L1-distance losses and its variants have been suffered from the metric discontinuity and the square-like problem. As a solution, the distribution based methods show significant advantage by representing oriented boxes as distributions. Differing from exploited the Gaussian distribution to get analytical form of distance measure, we propose a novel oriented regression loss, Wasserstein Distance(EWD) loss, to alleviate the square-like problem.Specifically, for the oriented box representation, we choose a specially-designed distribution whose probability density function is only nonzero over the edges. On this basis, we develop Wasserstein distance as the measure. Besides, based on the edge representation of oriented box, the EWD loss can be generalized to quadrilateral and polynomial regression scenery. Experiments on multiple popular datasets and different detectors show the effectiveness of the proposed method.", "keywords": "oriented object detection;regression loss design", "primary_area": "", "supplementary_material": "", "author": "Yuke Zhu;Yumeng Ruan;Zihua Xiong;Yue Zhang;Feng Wei;Bing Han;Sheng Guo", "authorids": "~Yuke_Zhu4;~Yumeng_Ruan1;~Zihua_Xiong1;~Yue_Zhang16;~Feng_Wei2;~Bing_Han3;~Sheng_Guo4", "gender": "M;M;M;F;M;F;M", "homepage": ";https://github.com/shulou1996;https://xiongzihua.git.io;;;;", "dblp": ";;321/1681;;;;32/319-5", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;kXsKEncAAAAJ;https://scholar.google.com/citations?hl=en;C9yANEQAAAAJ;https://scholar.google.com.hk/citations?user=mbpgOmEAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Yuke_Zhu4;~Yumeng_Ruan1;~Zihua_Xiong1;~Yue_Zhang16;~Feng_Wei2;~Bing_Han3;~Sheng_Guo2", "aff": "Alibaba Group;;Alibaba Group;Alibaba Group;;mybank, antgroup;Ant Group", "aff_domain": "antgroup.com;;alibaba-inc.com;alibaba-inc.com;;mybank.cn;antgroup.com", "position": "Researcher;;Researcher;Researcher;;Researcher;Researcher", "bibtex": "@misc{\nzhu2023edge,\ntitle={Edge Wasserstein Distance Loss for Oriented Object Detection},\nauthor={Yuke Zhu and Yumeng Ruan and Zihua Xiong and Yue Zhang and Feng Wei and Bing Han and Sheng Guo},\nyear={2023},\nurl={https://openreview.net/forum?id=NeH20Y8mDvp}\n}", "github": "", "project": "", "reviewers": "sB8r;jpqG;aEZZ;rPs4", "site": "https://openreview.net/forum?id=NeH20Y8mDvp", "pdf_size": 2067082, "recommendation": "3;5;5;6", "confidence": "5;5;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "0;2;3;3", "wc_summary_paper": "49;67;48;59", "wc_strength_and_weaknesses": "277;170;181;46", "wc_clarity_quality_novelty_and_reproducibility": "54;103;61;30", "wc_summary_review": "25;32;7;20", "wc_review": "405;372;297;155", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 55.75, 7.790218225441442 ], "wc_strength_and_weaknesses_avg": [ 168.5, 82.06247619953957 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.0, 26.315394733881533 ], "wc_summary_review_avg": [ 21.0, 9.137833441248533 ], "wc_review_avg": [ 307.25, 96.2194756793031 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.6882472016116854, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5277153921082761040&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Alibaba Group;MYbank;Ant Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.alibaba.com;;https://www.antgroup.com", "aff_unique_abbr": "Alibaba;;Ant Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "id": "NgEuFT-SIgI", "title": "ExtraMix: Extrapolatable Data Augmentation for Regression using Generative Models", "track": "main", "status": "Withdraw", "tldr": "We introduce a new data augmentation method of non-Euclidean data for regression tasks. This method exploits a mixup concept for generating extrapolated samples. Our method can not only generate reliable pseudo-labels, but also improve predictors.", "abstract": "The primary objective of material science is discovery of novel materials. Because an unseen region can have a high probability of target materials (molecules), high predictive accuracy in out-of-distribution and few-shot regions is essential. However, limited data are available in material science because of high labeling costs. To overcome these difficulties, numerous techniques have been proposed for image and text domains. However, applying these techniques to material data is difficult because the data consists of combinatorial (non-Euclidean) input and continuous labels. In particular, in mixup-based methods, mixed labels are clustered in the middle range of the training set, which renders structured samples invalid. In this study, a novel data augmentation method is proposed for non-Euclidean input with regression tasks. (1) A mixup technique capable of extrapolation is defined to broaden not only the structure but also the label distribution. In contrast to existing mixup-based methods, the proposed method minimizes label imbalance. (2) The proposed method optimizes pseudo-label from the mixup-based approaches using decoder's knowledge of generative models. We proved that the proposed method generates high-quality pseudo data for the ZINC database. Furthermore, the phosphorescent organic light-emitting diode was used to prove that the method is effective in real problems with large-sized and highly complex properties. Moreover, this method can improve property prediction models.", "keywords": "mixup;out-of-distribution;optimization;generative models;molecule", "primary_area": "", "supplementary_material": "", "author": "Kisoo Kwon;Kuhwan Jeong;Sanghyun Park;Sangha Park;Hoshik Lee;Seung-Yeon Kwak;Sungmin Kim;Kyunghyun Cho", "authorids": "~Kisoo_Kwon1;~Kuhwan_Jeong1;~Sanghyun_Park1;~Sangha_Park1;~Hoshik_Lee1;~Seung-Yeon_Kwak1;~Sungmin_Kim4;~Kyunghyun_Cho1", "gender": ";M;M;F;M;;M;M", "homepage": ";;https://www.insead.edu/phd/student-profiles/sanghyun-park;;;;;http://kyunghyuncho.me", "dblp": ";;;;;;;41/9736", "google_scholar": "https://scholar.google.com/citations?hl=ko;;;https://scholar.google.com/citations?hl=ko;https://scholar.google.co.kr/citations?user=wHuOXJEAAAAJ;https://scholar.google.co.kr/citations?user=V1jvvhgAAAAJ;FnGOYioAAAAJ;https://scholar.google.fi/citations?user=0RAmmIAAAAAJ", "orcid": ";;;;;0000-0001-8009-3128;;", "linkedin": ";kuhwan-jeong-30a6551b8;;;;;;", "or_profile": "~Kisoo_Kwon1;~Kuhwan_Jeong1;~Sanghyun_Park1;~Sangha_Park1;~Hoshik_Lee1;~Seung-Yeon_Kwak1;~Sungmin_Kim4;~Kyunghyun_Cho1", "aff": "Samsung;;INSEAD;Samsung;;Samsung;;New York University", "aff_domain": "samsung.com;;insead.edu;samsung.com;;samsung.com;;nyu.edu", "position": "Principal Researcher;;PhD student;Principal Researcher;;Principal Researcher;;Associate Professor", "bibtex": "@misc{\nkwon2023extramix,\ntitle={ExtraMix: Extrapolatable Data Augmentation for Regression using Generative Models},\nauthor={Kisoo Kwon and Kuhwan Jeong and Sanghyun Park and Sangha Park and Hoshik Lee and Seung-Yeon Kwak and Sungmin Kim and Kyunghyun Cho},\nyear={2023},\nurl={https://openreview.net/forum?id=NgEuFT-SIgI}\n}", "github": "", "project": "", "reviewers": "oq6L;wm8E;A9Rv;zBNS", "site": "https://openreview.net/forum?id=NgEuFT-SIgI", "pdf_size": 2881347, "recommendation": "3;3;5;5", "confidence": "5;3;4;3", "correctness": "2;3;3;3", "technical_novelty": "1;2;2;2", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "96;108;37;76", "wc_strength_and_weaknesses": "179;227;435;145", "wc_clarity_quality_novelty_and_reproducibility": "5;30;42;49", "wc_summary_review": "11;34;37;23", "wc_review": "291;399;551;293", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 79.25, 26.93858756505248 ], "wc_strength_and_weaknesses_avg": [ 246.5, 112.66210543035311 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.5, 16.740669042783207 ], "wc_summary_review_avg": [ 26.25, 10.231690964840562 ], "wc_review_avg": [ 383.5, 106.11668106381767 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.30151134457776363, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7073325783773277041&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Samsung;INSEAD;New York University", "aff_unique_dep": "Samsung;;", "aff_unique_url": "https://www.samsung.com;https://www.insead.edu;https://www.nyu.edu", "aff_unique_abbr": "Samsung;INSEAD;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;2", "aff_country_unique": "South Korea;France;United States" }, { "id": "NhR0jUSuelq", "title": "Improving Model Consistency of Decentralized Federated Learning via Sharpness Aware Minimization and Multiple Gossip Approaches", "track": "main", "status": "Reject", "tldr": "", "abstract": "To mitigate the privacy leakages and reduce the communication burden of Federated Learning (FL), decentralized FL (DFL) discards the central server and each client only communicates with its neighbors in the decentralized communication network. However, existing DFL algorithms tend to feature high inconsistency among local models, which results in severe distribution shifts across clients and inferior performance compared with centralized FL (CFL), especially on heterogeneous data or with sparse connectivity of communication topology.\nTo alleviate this challenge, we propose two DFL algorithms named DFedSAM and DFedSAM-MGS to improve the performance.\nSpecifically, DFedSAM leverages gradient perturbation to generate local flatness models via Sharpness Aware Minimization (SAM), which searches for model parameters with uniformly low loss function values. \nIn addition, DFedSAM-MGS further boosts DFedSAM by adopting the technique of Multiple Gossip Steps (MGS) for a better model consistency, which accelerates the aggregation of local flatness models and better balances the communication complexity and learning performance.\nIn the theoretical perspective, we present the improved convergence rates $\\small \\mathcal{O}\\big(\\frac{1}{T}+\\frac{1}{T^2(1-\\lambda)^2}\\big)$ and $\\small \\mathcal{O}\\big(\\frac{1}{T}+\\frac{\\lambda^Q+1}{T^2(1-\\lambda^Q)^2}\\big)$ in the stochastic non-convex setting for DFedSAM and DFedSAM-MGS, respectively, where $1-\\lambda$ is the spectral gap of the gossip matrix $W$ and $Q$ is the gossip steps in MGS. Meanwhile, we empirically confirm that our methods can achieve competitive performance compared with CFL baselines and outperform existing DFL baselines. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yifan Shi;Li Shen;Kang Wei;Yan Sun;Bo Yuan;Xueqian Wang;Dacheng Tao", "authorids": "~Yifan_Shi1;~Li_Shen1;~Kang_Wei1;~Yan_Sun3;~Bo_Yuan5;~Xueqian_Wang1;~Dacheng_Tao1", "gender": ";M;Not Specified;M;M;M;", "homepage": ";https://sites.google.com/site/mathshenli/home;https://adamwei-boop.github.io/kang-wei.github.io/;;http://boyuan.global-optimization.com/;;", "dblp": ";91/3680-8;;;41/1662-3;43/3563-1;", "google_scholar": ";yVhgENIAAAAJ;CYqbLN8AAAAJ;_-hoDQkAAAAJ;FMiooBoAAAAJ;h9dN_ykAAAAJ;", "orcid": ";;0000-0001-8794-2153;0000-0003-2271-252X;0000-0003-2169-0007;0000-0003-3542-0593;", "linkedin": ";;;;;;", "or_profile": "~Yifan_Shi1;~Li_Shen1;~Kang_Wei1;~Yan_Sun3;~Bo_Yuan5;~Xueqian_Wang1;~Dacheng_Tao1", "aff": ";JD Explore Academy;Hong Kong Polytechnic University;University of Sydney;Research Institute of Tsinghua University in Shenzhen;Tsinghua University;", "aff_domain": ";jd.com;polyu.edu.hk;uni.sydney.edu.au;tsinghua-sz.org;tsinghua.edu.cn;", "position": ";Researcher;Postdoc;MS student;Researcher;Full Professor;", "bibtex": "@misc{\nshi2023improving,\ntitle={Improving Model Consistency of Decentralized Federated Learning via Sharpness Aware Minimization and Multiple Gossip Approaches},\nauthor={Yifan Shi and Li Shen and Kang Wei and Yan Sun and Bo Yuan and Xueqian Wang and Dacheng Tao},\nyear={2023},\nurl={https://openreview.net/forum?id=NhR0jUSuelq}\n}", "github": "", "project": "", "reviewers": "Yu1u;YEku;LjuW;YESX", "site": "https://openreview.net/forum?id=NhR0jUSuelq", "pdf_size": 2228206, "recommendation": "3;3;5;6", "confidence": "5;4;3;4", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "38;70;38;72", "wc_strength_and_weaknesses": "125;358;119;11", "wc_clarity_quality_novelty_and_reproducibility": "17;51;49;28", "wc_summary_review": "14;93;36;146", "wc_review": "194;572;242;257", "wc_reply_reviewers": "156;0;0;0", "wc_reply_authors": "1256;694;448;522", "reply_reviewers": "2;0;0;0", "reply_authors": "4;2;2;2", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 54.5, 16.515144564913744 ], "wc_strength_and_weaknesses_avg": [ 153.25, 126.61827474736812 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.25, 14.306903927824496 ], "wc_summary_review_avg": [ 72.25, 51.421663722598474 ], "wc_review_avg": [ 316.25, 149.47972270512145 ], "wc_reply_reviewers_avg": [ 39.0, 67.54998149518622 ], "wc_reply_authors_avg": [ 730.0, 316.52803983217666 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.5443310539518174, "corr_recommendation_correctness": 0.7777777777777777, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yFEtvSFwN-gJ:scholar.google.com/&scioq=Improving+Model+Consistency+of+Decentralized+Federated+Learning+via+Sharpness+Aware+Minimization+and+Multiple+Gossip+Approaches&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "JD;Hong Kong Polytechnic University;University of Sydney;Tsinghua University", "aff_unique_dep": "JD Explore Academy;;;Research Institute", "aff_unique_url": ";https://www.polyu.edu.hk;https://www.sydney.edu.au;http://www.tsinghua.edu.cn", "aff_unique_abbr": ";PolyU;USYD;Tsinghua", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Hong Kong SAR;Shenzhen", "aff_country_unique_index": "1;2;1;1", "aff_country_unique": ";China;Australia" }, { "id": "NiEtU7blzN", "title": "Large Language Models Can Self-improve", "track": "main", "status": "Reject", "tldr": "Improving Reasoning Ability of Large Language Models in An Unsupervised Fashion", "abstract": "Large Language Models (LLMs) have achieved excellent performances in various tasks. However, fine-tuning an LLM requires extensive supervision. Human, on the other hand, may improve their reasoning abilities by self-thinking without external inputs. In this work, we demonstrate that an LLM is also capable of self-improving with only unlabeled datasets. We use a pre-trained LLM to generate \u201chigh-confidence\u201d rationale-augmented answers for unlabeled questions using Chain-of-Thought prompting and self-consistency, and fine-tune the LLM using those self-generated solutions as target outputs. We show that our approach improves the general reasoning ability of a 540B-parameter LLM (74.4%\u219282.1% on GSM8K, 78.2%\u219283.0% on DROP, 90.0%\u219294.4% on OpenBookQA, and 63.4%\u219267.9% on ANLI-A3) and achieves state-of-the-art-level performance, without any ground truth label. We conduct ablation studies and show that finetuning on reasoning is critical for self-improvement.", "keywords": "natural language processing;unsupervised learning;chain of thought", "primary_area": "", "supplementary_material": "", "author": "Jiaxin Huang;Shixiang Shane Gu;Le Hou;Yuexin Wu;Xuezhi Wang;Hongkun Yu;Jiawei Han", "authorids": "~Jiaxin_Huang1;~Shixiang_Shane_Gu1;~Le_Hou1;~Yuexin_Wu1;~Xuezhi_Wang3;~Hongkun_Yu2;~Jiawei_Han1", "gender": "F;M;M;;M;M;M", "homepage": "https://teapot123.github.io/;http://vision.cs.stonybrook.edu/~lehhou/home/index.html;https://crickwu.github.io;https://research.google/people/105995/;;http://hanj.cs.illinois.edu/;https://sites.google.com/view/gugurus/home", "dblp": "187/2874-1;161/9892;09/1661;70/4090-2;;h/JiaweiHan.html;121/0550", "google_scholar": "DnxrVXgAAAAJ;kQ0HeQIAAAAJ;sd0nprMAAAAJ;ScLUQ-YAAAAJ;;https://scholar.google.com.tw/citations?user=Kv9AbjMAAAAJ;B8wslVsAAAAJ", "orcid": ";0000-0001-7323-5300;;;;0000-0002-3629-2696;", "linkedin": ";;;;;;", "or_profile": "~Jiaxin_Huang1;~Le_Hou1;~Yuexin_Wu1;~Xuezhi_Wang3;~Hongkun_Yu2;~Jiawei_Han1;~Shixiang_Gu1", "aff": "University of Illinois, Urbana Champaign;Google Research;Google;Google DeepMind;;University of Illinois at Urbana-Champaign (UIUC);OpenAI", "aff_domain": "illinois.edu;google.com;google.com;google.com;;illinois.edu;openai.com", "position": "PhD student;Software Engineer;Software Engineer;Research Scientist;;Full Professor;Researcher", "bibtex": "@misc{\nhuang2023large,\ntitle={Large Language Models Can Self-improve},\nauthor={Jiaxin Huang and Shixiang Shane Gu and Le Hou and Yuexin Wu and Xuezhi Wang and Hongkun Yu and Jiawei Han},\nyear={2023},\nurl={https://openreview.net/forum?id=NiEtU7blzN}\n}", "github": "", "project": "", "reviewers": "nphB;ombs;hq7M", "site": "https://openreview.net/forum?id=NiEtU7blzN", "pdf_size": 424845, "recommendation": "3;3;8", "confidence": "4;5;4", "correctness": "2;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;3", "wc_summary_paper": "76;73;66", "wc_strength_and_weaknesses": "242;489;174", "wc_clarity_quality_novelty_and_reproducibility": "44;95;119", "wc_summary_review": "18;95;135", "wc_review": "380;752;494", "wc_reply_reviewers": "183;0;0", "wc_reply_authors": "1361;669;291", "reply_reviewers": "1;0;0", "reply_authors": "3;2;1", "recommendation_avg": [ 4.666666666666667, 2.357022603955158 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 71.66666666666667, 4.189935029992178 ], "wc_strength_and_weaknesses_avg": [ 301.6666666666667, 135.34236423070035 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 86.0, 31.272991542223778 ], "wc_summary_review_avg": [ 82.66666666666667, 48.55466564147626 ], "wc_review_avg": [ 542.0, 155.61490931141526 ], "wc_reply_reviewers_avg": [ 61.0, 86.2670273047588 ], "wc_reply_authors_avg": [ 773.6666666666666, 443.05103794283366 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.5000000000000001, "gs_citation": 572, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11002147151935925310&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;1;1;0;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;Google;OpenAI", "aff_unique_dep": ";Google Research;", "aff_unique_url": "https://illinois.edu;https://research.google;https://openai.com", "aff_unique_abbr": "UIUC;Google Research;OpenAI", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Urbana-Champaign;Mountain View;", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Multitask Prompt Tuning Enables Parameter-Efficient Transfer Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11659", "id": "Nk2pDtuhTq", "poster": "", "openreview": "https://openreview.net/forum?id=Nk2pDtuhTq", "slides": "https://iclr.cc/virtual/2023/poster/11659", "video": "https://iclr.cc/virtual/2023/poster/11659", "author_site": "Zhen Wang, Rameswar Panda, Leonid Karlinsky, Rogerio Feris, Huan Sun, Yoon Kim", "tldr": "We propose multitask prompt tuning which learns a single transferable prompt by decomposing and distilling knowledge from multiple task-specific source prompts.", "abstract": "Prompt tuning, in which a base pretrained model is adapted to each task via conditioning on learned prompt vectors, has emerged as a promising approach for efficiently adapting large language models to multiple downstream tasks. However, existing methods typically learn soft prompt vectors from scratch, and it has not been clear how to exploit the rich cross-task knowledge with prompt vectors in a multitask learning setting. We propose multitask prompt tuning (MPT), which first learns a single transferable prompt by distilling knowledge from multiple task-specific source prompts. We then learn multiplicative low rank updates to this shared prompt to efficiently adapt it to each downstream target task. Extensive experiments on 23 NLP datasets demonstrate that our proposed approach outperforms the state-of-the-art methods, including the full finetuning baseline in some cases, despite only tuning $0.035\\%$ as many task-specific parameters.", "keywords": "Prompt Tuning;Multitask Learning;Transfer Learning", "primary_area": "", "supplementary_material": "", "author": "Zhen Wang;Rameswar Panda;Leonid Karlinsky;Rogerio Feris;Huan Sun;Yoon Kim", "authorids": "~Zhen_Wang6;~Rameswar_Panda1;~Leonid_Karlinsky3;~Rogerio_Feris1;~Huan_Sun1;~Yoon_Kim1", "gender": "M;M;M;M;F;", "homepage": "https://zhenwang9102.github.io;https://rpand002.github.io/;;http://rogerioferis.com;https://u.osu.edu/ihudas/people/;https://people.csail.mit.edu/yoonkim/", "dblp": "78/6727;126/0986;05/4463;;33/2952-1.html;", "google_scholar": "asBaytUAAAAJ;_ySuu6gAAAAJ;https://scholar.google.co.il/citations?user=WbO7tjYAAAAJ;xt3XLjcAAAAJ;wIFkulcAAAAJ;n_ts4eYAAAAJ", "orcid": "0000-0001-7407-5118;;;;;", "linkedin": "zhenwang9102/;;;;huan-sun-81527924/?originalSubdomain=cn;", "or_profile": "~Zhen_Wang6;~Rameswar_Panda1;~Leonid_Karlinsky3;~Rogerio_Feris1;~Huan_Sun1;~Yoon_Kim1", "aff": "University of California, San Diego;MIT-IBM Watson AI Lab;International Business Machines;International Business Machines;The Ohio State University, Columbus;Massachusetts Institute of Technology", "aff_domain": "ucsd.edu;ibm.com;ibm.com;ibm.com;osu.edu;mit.edu", "position": "Postdoc;Research Scientist;Principal Researcher;Research Manager;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2023multitask,\ntitle={Multitask Prompt Tuning Enables Parameter-Efficient Transfer Learning},\nauthor={Zhen Wang and Rameswar Panda and Leonid Karlinsky and Rogerio Feris and Huan Sun and Yoon Kim},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Nk2pDtuhTq}\n}", "github": "", "project": "", "reviewers": "iVfY;6YdE;Tz5t;FfyU", "pdf_size": 1237773, "recommendation": "6;6;6;8", "confidence": "4;4;4;4", "correctness": "3;3;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "97;45;109;163", "wc_strength_and_weaknesses": "158;336;201;124", "wc_clarity_quality_novelty_and_reproducibility": "5;118;34;34", "wc_summary_review": "36;2;50;37", "wc_review": "296;501;394;358", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "936;1362;1017;245", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 103.5, 41.93745342769396 ], "wc_strength_and_weaknesses_avg": [ 204.75, 80.53997454680501 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.75, 42.25147926404471 ], "wc_summary_review_avg": [ 31.25, 17.76759691123141 ], "wc_review_avg": [ 387.25, 74.4425113762291 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 890.0, 405.2943374882013 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 128, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14110457886088564537&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=Nk2pDtuhTq", "email": "ucsd.edu;ibm.com;ibm.com;ibm.com;osu.edu;mit.edu", "author_num": 6, "aff_unique_index": "0;1;2;2;3;1", "aff_unique_norm": "University of California, San Diego;Massachusetts Institute of Technology;International Business Machines Corporation;Ohio State University", "aff_unique_dep": ";IBM Watson AI Lab;;", "aff_unique_url": "https://www.ucsd.edu;https://www.mitibmwatsonailab.org;https://www.ibm.com;https://www.osu.edu", "aff_unique_abbr": "UCSD;MIT-IBM AI Lab;IBM;OSU", "aff_campus_unique_index": "0;2", "aff_campus_unique": "San Diego;;Columbus", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "DamoFD: Digging into Backbone Design on Face Detection", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12130", "id": "NkJOhtNKX91", "poster": "/media/PosterPDFs/ICLR%202023/12130.png?t=1680797343.8012927", "openreview": "https://openreview.net/forum?id=NkJOhtNKX91", "slides": "https://iclr.cc/virtual/2023/poster/12130", "video": "https://iclr.cc/virtual/2023/poster/12130", "author_site": "Yang Liu, Jiankang Deng, Fei Wang, Lei Shang, Xuansong Xie, Baigui Sun", "tldr": "We propose a novel DDSAR score to characterize stage-wise detection ability, based on which, we employ off-the-shelf NAS technology to search FD-friendly backbone architectures.", "abstract": "Face detection (FD) has achieved remarkable success over the past few years, yet,\nthese leaps often arrive when consuming enormous computation costs. Moreover,\nwhen considering a realistic situation, i.e., building a lightweight face detector\nunder a computation-scarce scenario, such heavy computation cost limits the application\nof the face detector. To remedy this, several pioneering works design\ntiny face detectors through off-the-shelf neural architecture search (NAS) technologies,\nwhich are usually applied to the classification task. Thus, the searched\narchitectures are sub-optimal for the face detection task since some design criteria\nbetween detection and classification task are different. As a representative, the\nface detection backbone design needs to guarantee the stage-level detection ability\nwhile it is not required for the classification backbone. Furthermore, the detection\nbackbone consumes a vast body of inference budgets in the whole detection framework.\nConsidering the intrinsic design requirement and the virtual importance role\nof the face detection backbone, we thus ask a critical question: How to employ\nNAS to search FD-friendly backbone architecture? To cope with this question,\nwe propose a distribution-dependent stage-aware ranking score (DDSAR-Score)\nto explicitly characterize the stage-level expressivity and identify the individual\nimportance of each stage, thus satisfying the aforementioned design criterion of\nthe FD backbone. Based on our proposed DDSAR-Score, we conduct comprehensive\nexperiments on the challenging Wider Face benchmark dataset and achieve\ndominant performance across a wide range of compute regimes. In particular,\ncompared to the tiniest face detector SCRFD-0.5GF, our method is +2.5 % better\nin Average Precision (AP) score when using the same amount of FLOPs. The\ncode is avaliable at https://github.com/ly19965/FaceMaas/tree/master/face_project/face_detection/DamoFD.", "keywords": "Face Detection;Neural Architecture Search;Network Expressivity", "primary_area": "", "supplementary_material": "", "author": "Yang Liu;Jiankang Deng;Fei Wang;Lei Shang;Xuansong Xie;Baigui Sun", "authorids": "~Yang_Liu51;~Jiankang_Deng1;~Fei_Wang15;~Lei_Shang1;~Xuansong_Xie1;~Baigui_Sun1", "gender": "M;M;;M;M;M", "homepage": ";https://jiankangdeng.github.io/;;;;", "dblp": "27/3367-5;156/7808;;234/8028;186/8016;52/3194-15", "google_scholar": "t1emSE0AAAAJ;Z_UoQFsAAAAJ;WO1eMcIAAAAJ;M0Ei1zkAAAAJ;ZNhTHywAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0002-3709-6216;;;0000-0001-7722-4748;", "linkedin": ";jiankang-deng-b45b21b4/?originalSubdomain=uk;;;;", "or_profile": "~Yang_Liu51;~Jiankang_Deng1;~Lei_Shang1;~Xuansong_Xie1;~Baigui_Sun1;~Steven_Wang2", "aff": "Alibaba Group;;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group", "aff_domain": "alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com", "position": "Researcher at Alibaba Group;;Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nliu2023damofd,\ntitle={Damo{FD}: Digging into Backbone Design on Face Detection},\nauthor={Yang Liu and Jiankang Deng and Fei Wang and Lei Shang and Xuansong Xie and Baigui Sun},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=NkJOhtNKX91}\n}", "github": "", "project": "", "reviewers": "9WwS;XsaM;g2gn;8dSd", "pdf_size": 289035, "recommendation": "6;6;6;8", "confidence": "4;4;4;4", "correctness": "3;3;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "146;69;64;66", "wc_strength_and_weaknesses": "146;148;98;158", "wc_clarity_quality_novelty_and_reproducibility": "44;24;16;24", "wc_summary_review": "48;34;24;9", "wc_review": "384;275;202;257", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "334;805;565;343", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 86.25, 34.54254622925183 ], "wc_strength_and_weaknesses_avg": [ 137.5, 23.25403190846697 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.0, 10.344080432788601 ], "wc_summary_review_avg": [ 28.75, 14.236836024903848 ], "wc_review_avg": [ 279.5, 66.05490140784407 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 511.75, 192.93959546967025 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17009808668384062302&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=NkJOhtNKX91", "email": "alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Alibaba Group", "aff_unique_dep": "", "aff_unique_url": "https://www.alibaba.com", "aff_unique_abbr": "Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "Nkd7AS2USRd", "title": "Protein structure generation via folding diffusion", "track": "main", "status": "Reject", "tldr": "Inspired by the protein folding process, we introduce a new diffusion-based generative model that acts on the inter-residue angles in protein backbones and generates diverse, designable protein structures without needing equivariance mechanisms.", "abstract": "The ability to computationally generate novel yet physically foldable protein structures could lead to new biological discoveries and new treatments targeting yet incurable diseases. Despite recent advances in protein structure prediction, directly generating diverse, novel protein structures from neural networks remains difficult. In this work, we present a new diffusion-based generative model that designs protein backbone structures via a procedure that mirrors the native folding process. We describe protein backbone structure as a series of consecutive angles capturing the relative orientation of the constituent amino acid residues, and generate new structures by denoising from a random, unfolded state towards a stable folded structure. Not only does this mirror how proteins biologically twist into energetically favorable conformations, the inherent shift and rotational invariance of this representation crucially alleviates the need for complex equivariant networks. We train a denoising diffusion probabilistic model with a simple transformer backbone and demonstrate that our resulting model unconditionally generates highly realistic protein structures with complexity and structural patterns akin to those of naturally-occurring proteins. As a useful resource, we release the first open-source codebase and trained models for protein structure diffusion.", "keywords": "Generative modeling of protein backbone structures;structural biology;diffusion;diffusion modeling;generative modeling;proteins;internal coordinates", "primary_area": "", "supplementary_material": "/attachment/e578f88f2d5539c24a2a0721aad52d836e675d01.zip", "author": "Kevin Eric Wu;Kevin K Yang;Rianne van den Berg;James Zou;Alex Xijie Lu;Ava P Amini", "authorids": "~Kevin_Eric_Wu1;~Kevin_K_Yang1;~Rianne_van_den_Berg1;~James_Zou1;~Alex_Xijie_Lu1;~Ava_P_Amini1", "gender": "M;;F;;M;F", "homepage": "https://GitHub.com/wukevin;;https://research.google/people/RiannevandenBerg/;;http://alexluresearch.com/;https://avaamini.com/", "dblp": ";216/0400;198/1077;;;", "google_scholar": ";mq-Vzk8AAAAJ;KARgiboAAAAJ;23ZXZvEAAAAJ;https://scholar.google.ca/citations?user=gz7gLggAAAAJ;w_wosd4AAAAJ", "orcid": ";;0000-0001-5076-2802;;0000-0001-9568-3155;0000-0002-8601-6040", "linkedin": ";;;;;", "or_profile": "~Kevin_Eric_Wu1;~Kevin_K_Yang1;~Rianne_van_den_Berg1;~James_Zou1;~Alex_Xijie_Lu1;~Ava_Soleimany1", "aff": "Stanford University;;Microsoft;Stanford University;Microsoft Research;Microsoft", "aff_domain": "stanford.edu;;microsoft.com;stanford.edu;microsoft.com;microsoft.com", "position": "PhD student;;Researcher;Assistant Professor;Senior Researcher;Researcher", "bibtex": "@misc{\nwu2023protein,\ntitle={Protein structure generation via folding diffusion},\nauthor={Kevin Eric Wu and Kevin K Yang and Rianne van den Berg and James Zou and Alex Xijie Lu and Ava P Amini},\nyear={2023},\nurl={https://openreview.net/forum?id=Nkd7AS2USRd}\n}", "github": "", "project": "", "reviewers": "DFe6;t1c7;Rmta;84e9", "site": "https://openreview.net/forum?id=Nkd7AS2USRd", "pdf_size": 26469487, "recommendation": "3;5;6;8", "confidence": "3;4;3;4", "correctness": "3;1;4;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "1;2;2;4", "wc_summary_paper": "80;31;96;57", "wc_strength_and_weaknesses": "196;105;347;130", "wc_clarity_quality_novelty_and_reproducibility": "34;774;65;31", "wc_summary_review": "49;91;37;34", "wc_review": "359;1001;545;252", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1307;1501;841;671", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;2", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 1.224744871391589 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_paper_avg": [ 66.0, 24.50510150968569 ], "wc_strength_and_weaknesses_avg": [ 194.5, 94.112964037905 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 226.0, 316.6678070154906 ], "wc_summary_review_avg": [ 52.75, 22.78568629644497 ], "wc_review_avg": [ 539.25, 286.4649847712631 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1080.0, 336.590255355083 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5547001962252291, "corr_recommendation_correctness": 0.45291081365783825, "gs_citation": 209, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14313496208169821117&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff_unique_index": "0;1;0;1;1", "aff_unique_norm": "Stanford University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.stanford.edu;https://www.microsoft.com", "aff_unique_abbr": "Stanford;Microsoft", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Decision Transformer under Random Frame Dropping", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10978", "id": "NmZXv4467ai", "poster": "", "openreview": "https://openreview.net/forum?id=NmZXv4467ai", "slides": "https://iclr.cc/virtual/2023/poster/10978", "video": "https://iclr.cc/virtual/2023/poster/10978", "author_site": "Kaizhe Hu, Ray Chen Zheng, Yang Gao, Huazhe Xu", "tldr": "Learning to control against random frame dropping through three original modifications to the Decision Transformer.", "abstract": "Controlling agents remotely with deep reinforcement learning~(DRL) in the real world is yet to come. One crucial stepping stone is to devise RL algorithms that are robust in the face of dropped information from corrupted communication or malfunctioning sensors. Typical RL methods usually require considerable online interaction data that are costly and unsafe to collect in the real world. Furthermore, when applying to the frame dropping scenarios, they perform unsatisfactorily even with moderate drop rates. To address these issues, we propose Decision Transformer under Random Frame Dropping~(DeFog), an offline RL algorithm that enables agents to act robustly in frame dropping scenarios without online interaction. DeFog first randomly masks out data in the offline datasets and explicitly adds the time span of frame dropping as inputs. After that, a finetuning stage on the same offline dataset with a higher mask rate would further boost the performance. Empirical results show that DeFog outperforms strong baselines under severe frame drop rates like 90\\%, while maintaining similar returns under non-frame-dropping conditions in the regular MuJoCo control benchmarks and the Atari environments. Our approach offers a robust and deployable solution for controlling agents in real-world environments with limited or unreliable data.", "keywords": "Decision Transformer;Reinforcement Learning;Frame Dropping", "primary_area": "", "supplementary_material": "", "author": "Kaizhe Hu;Ray Chen Zheng;Yang Gao;Huazhe Xu", "authorids": "~Kaizhe_Hu1;~Ray_Chen_Zheng1;~Yang_Gao1;~Huazhe_Xu1", "gender": "M;M;M;M", "homepage": "https://hukz18.github.io/;https://zhengrc19.github.io/;http://yang-gao.weebly.com;http://hxu.rocks", "dblp": "330/4940;235/8101;89/4402-29;164/9006", "google_scholar": "mPpYLhcAAAAJ;gwUGHwsAAAAJ;https://scholar.google.com/citations?hl=en;t9HPFawAAAAJ", "orcid": ";;;", "linkedin": "%E5%BC%80%E5%93%B2-%E8%83%A1-40137718a/?miniProfileUrn=urn%3Ali%3Afs_miniProfile%3AACoAACyMbIEBJhMDJ4b7wLQyHotP_JGOnWDoEDU;ray-zheng-366053132/;yang-gao-45245348/;", "or_profile": "~Kaizhe_Hu1;~Ray_Chen_Zheng1;~Yang_Gao1;~Huazhe_Xu1", "aff": "Tsinghua University;Department of Computer Science and Technology, Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Undergrad student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nhu2023decision,\ntitle={Decision Transformer under Random Frame Dropping},\nauthor={Kaizhe Hu and Ray Chen Zheng and Yang Gao and Huazhe Xu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=NmZXv4467ai}\n}", "github": "", "project": "", "reviewers": "dgXg;qcaZ;4AHJ", "pdf_size": 2222461, "recommendation": "6;6;6", "confidence": "4;3;4", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "3;3;3", "wc_summary_paper": "59;107;98", "wc_strength_and_weaknesses": "941;274;356", "wc_clarity_quality_novelty_and_reproducibility": "650;88;164", "wc_summary_review": "59;36;164", "wc_review": "1709;505;782", "wc_reply_reviewers": "125;43;0", "wc_reply_authors": "3050;1314;1250", "reply_reviewers": "1;1;0", "reply_authors": "5;3;3", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 88.0, 20.83266665599966 ], "wc_strength_and_weaknesses_avg": [ 523.6666666666666, 296.99195649414855 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 300.6666666666667, 248.95693514251727 ], "wc_summary_review_avg": [ 86.33333333333333, 55.71554740126155 ], "wc_review_avg": [ 998.6666666666666, 514.8542404301327 ], "wc_reply_reviewers_avg": [ 56.0, 51.85235449491823 ], "wc_reply_authors_avg": [ 1871.3333333333333, 833.8526381934773 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9620642351999445447&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=NmZXv4467ai", "email": "tsinghua.edu.cn;cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "Nn-7OXvqmSW", "title": "Variance Covariance Regularization Enforces Pairwise Independence in Self-Supervised Representations", "track": "main", "status": "Reject", "tldr": "We study how SSL methods such as VICReg and Barlow Twins enforce pairwise independence of representations via their Variance Covariance regularization (VCReg), improve VICReg using our findings and show VCReg to be beneficial outside of SSL.", "abstract": "Self-Supervised Learning (SSL) methods such as VICReg, Barlow Twins or W-MSE avoid collapse of their joint embedding architectures by constraining or regularizing the covariance matrix of their projector\u2019s output. This study highlights important properties of such strategy, which we coin Variance-Covariance regularization (VCReg). More precisely, we show that VCReg enforces pairwise independence between the features of the learned representation. This result emerges by bridging VCReg applied on the projector\u2019s output to kernel independence criteria applied on the projector\u2019s input. This provides the first theoretical motivations and explanations of VCReg. We empirically validate our findings where (i) we put in evidence which projector\u2019s characteristics favor pairwise independence, (ii) we use these findings to obtain nontrivial performance gains for VICReg, (iii) we demonstrate that the scope of VCReg goes beyond SSL by using it to solve Independent Component Analysis. We hope that our findings will support the adoption of VCReg in SSL and beyond.", "keywords": "Self-supervised learning;VICReg;Barlow Twins;HSIC", "primary_area": "", "supplementary_material": "", "author": "Gr\u00e9goire Mialon;Randall Balestriero;Yann LeCun", "authorids": "~Gr\u00e9goire_Mialon1;~Randall_Balestriero1;~Yann_LeCun1", "gender": ";M;M", "homepage": ";https://randallbalestriero.github.io/;http://yann.lecun.com", "dblp": "228/9191;175/5364;l/YannLeCun", "google_scholar": ";S1x_xqcAAAAJ;WLN3QrAAAAAJ", "orcid": ";;", "linkedin": ";randallbalestriero/;", "or_profile": "~Gr\u00e9goire_Mialon1;~Randall_Balestriero1;~Yann_LeCun1", "aff": "Meta Facebook;Meta Facebook;New York University", "aff_domain": "fb.com;facebook.com;nyu.edu", "position": "Postdoc;Postdoc;Full Professor", "bibtex": "@misc{\nmialon2023variance,\ntitle={Variance Covariance Regularization Enforces Pairwise Independence in Self-Supervised Representations},\nauthor={Gr{\\'e}goire Mialon and Randall Balestriero and Yann LeCun},\nyear={2023},\nurl={https://openreview.net/forum?id=Nn-7OXvqmSW}\n}", "github": "", "project": "", "reviewers": "EpBg;MKGJ;UspR;NhDr;PbKY", "site": "https://openreview.net/forum?id=Nn-7OXvqmSW", "pdf_size": 4652371, "recommendation": "3;3;5;6;6", "confidence": "4;4;2;5;3", "correctness": "3;3;3;3;3", "technical_novelty": "2;2;3;3;2", "empirical_novelty": "2;0;2;3;2", "wc_summary_paper": "133;130;29;83;180", "wc_strength_and_weaknesses": "227;279;82;422;119", "wc_clarity_quality_novelty_and_reproducibility": "223;151;32;10;45", "wc_summary_review": "57;8;22;16;67", "wc_review": "640;568;165;531;411", "wc_reply_reviewers": "97;0;0;140;0", "wc_reply_authors": "738;548;253;469;160", "reply_reviewers": "1;0;0;1;0", "reply_authors": "2;1;1;2;1", "recommendation_avg": [ 4.6, 1.3564659966250536 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 1.8, 0.9797958971132712 ], "wc_summary_paper_avg": [ 111.0, 51.21327952787246 ], "wc_strength_and_weaknesses_avg": [ 225.8, 121.16996327473241 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 92.2, 81.45526379553381 ], "wc_summary_review_avg": [ 34.0, 23.50319127267614 ], "wc_review_avg": [ 463.0, 166.4247577735957 ], "wc_reply_reviewers_avg": [ 47.4, 59.62415617851543 ], "wc_reply_authors_avg": [ 433.6, 207.11021220596535 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.11566298639324801, "corr_recommendation_correctness": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18354171781656515248&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Meta;New York University", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.nyu.edu", "aff_unique_abbr": "Meta;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "NnHz2rU0Hjp", "title": "Masked Siamese ConvNets: Towards an Effective Masking Strategy for General-purpose Siamese Networks", "track": "main", "status": "Reject", "tldr": "We propose a masking strategy for siamese networks with ConvNets.", "abstract": "Siamese Networks are a popular self-supervised learning framework that learns useful representation without human supervision by encouraging representations to be invariant to distortions. Existing methods heavily rely on hand-crafted augmentations, which are not easily adapted to new domains. To explore a general-purpose or domain-agnostic siamese network, we investigate using masking as augmentations in siamese networks. Recently, masking for siamese networks has only been shown useful with transformer architectures, e.g. MSN and data2vec. In this work, we identify the underlying problems of masking for siamese networks with arbitrary backbones, including ConvNets. We propose an effective and general-purpose masking strategy and demonstrate its effectiveness on various siamese network frameworks. Our method generally improves siamese networks' performances in the few-shot image classification, and object detection tasks.", "keywords": "self-supervised learning;siamese networks;masking;convNets", "primary_area": "", "supplementary_material": "/attachment/8057411f740a1ef1b6d0c6e752c5de2354c6f669.zip", "author": "Li Jing;Jiachen Zhu;Yann LeCun", "authorids": "~Li_Jing1;~Jiachen_Zhu1;~Yann_LeCun1", "gender": "M;M;M", "homepage": "http://jingli.io/;https://cs.nyu.edu/~jz3224/;http://yann.lecun.com", "dblp": "59/6222;250/0741-2;l/YannLeCun", "google_scholar": "VhxDLwcAAAAJ;https://scholar.google.com/citations?hl=en;WLN3QrAAAAAJ", "orcid": ";;", "linkedin": "li-jing-568b3765/;;", "or_profile": "~Li_Jing1;~Jiachen_Zhu1;~Yann_LeCun1", "aff": "OpenAI;New York University;New York University", "aff_domain": "openai.com;nyu.edu;nyu.edu", "position": "Researcher;PhD student;Full Professor", "bibtex": "@misc{\njing2023masked,\ntitle={Masked Siamese ConvNets: Towards an Effective Masking Strategy for General-purpose Siamese Networks },\nauthor={Li Jing and Jiachen Zhu and Yann LeCun},\nyear={2023},\nurl={https://openreview.net/forum?id=NnHz2rU0Hjp}\n}", "github": "", "project": "", "reviewers": "Kw3b;nzgE;29aC", "site": "https://openreview.net/forum?id=NnHz2rU0Hjp", "pdf_size": 3273864, "recommendation": "5;5;6", "confidence": "5;5;2", "correctness": "3;3;3", "technical_novelty": "3;3;2", "empirical_novelty": "0;3;3", "wc_summary_paper": "100;88;99", "wc_strength_and_weaknesses": "31;419;220", "wc_clarity_quality_novelty_and_reproducibility": "189;10;50", "wc_summary_review": "44;47;47", "wc_review": "364;564;416", "wc_reply_reviewers": "0;281;114", "wc_reply_authors": "672;828;367", "reply_reviewers": "0;1;2", "reply_authors": "2;2;3", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 1.4142135623730951 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 95.66666666666667, 5.436502143433363 ], "wc_strength_and_weaknesses_avg": [ 223.33333333333334, 158.41787216795402 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.0, 76.71158104658427 ], "wc_summary_review_avg": [ 46.0, 1.4142135623730951 ], "wc_review_avg": [ 448.0, 84.72701261502536 ], "wc_reply_reviewers_avg": [ 131.66666666666666, 115.39593676651802 ], "wc_reply_authors_avg": [ 622.3333333333334, 191.45118321795653 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9999999999999999, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dfbZfmNjoroJ:scholar.google.com/&scioq=Masked+Siamese+ConvNets:+Towards+an+Effective+Masking+Strategy+for+General-purpose+Siamese+Networks&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;1;1", "aff_unique_norm": "OpenAI;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://openai.com;https://www.nyu.edu", "aff_unique_abbr": "OpenAI;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "GoBigger: A Scalable Platform for Cooperative-Competitive Multi-Agent Interactive Simulation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11881", "id": "NnOZT_CR26Z", "poster": "/media/PosterPDFs/ICLR%202023/11881.png?t=1681294340.5249333", "openreview": "https://openreview.net/forum?id=NnOZT_CR26Z", "slides": "https://iclr.cc/virtual/2023/poster/11881", "video": "https://iclr.cc/virtual/2023/poster/11881", "author_site": "Ming Zhang, Shenghan Zhang, Zhenjie Yang, Lekai Chen, Jinliang Zheng, yang chao, Chuming Li, Hang Zhou, Yazhe Niu, Yu Liu", "tldr": "", "abstract": "The emergence of various multi-agent environments has motivated powerful algorithms to explore agents' cooperation or competition. Even though this has greatly promoted the development of multi-agent reinforcement learning (MARL), it is still not enough to support further exploration on the behavior of swarm intelligence between multiple teams, and cooperation between multiple agents due to their limited scalability. To alleviate this, we introduce GoBigger, a scalable platform for cooperative-competition multi-agent interactive simulation. GoBigger is an enhanced environment for the Agar-like game, enabling the simulation of multiple scales of agent intra-team cooperation and inter-team competition. Compared with existing multi-agent simulation environments, our platform supports multi-team games with more than two teams simultaneously, which dramatically expands the diversity of agent cooperation and competition, and can more effectively simulate the swarm intelligent agent behavior. Besides, in GoBigger, the cooperation between the agents in a team can lead to much higher performance. We offer a diverse set of challenging scenarios, built-in bots, and visualization tools for best practices in benchmarking. We evaluate several state-of-the-art algorithms on GoBigger and demonstrate the potential of the environment. We believe this platform can inspire various emerging research directions in MARL, swarm intelligence, and large-scale agent interactive learning. Both GoBigger and its related benchmark are open-sourced. More information could be found at https://github.com/opendilab/GoBigger.", "keywords": "Reinforcement Learning;Environment;Cooperation;Competition;Scalable", "primary_area": "", "supplementary_material": "", "author": "Ming Zhang;Shenghan Zhang;Zhenjie Yang;Lekai Chen;Jinliang Zheng;Chao Yang;Chuming Li;Hang Zhou;Yazhe Niu;Yu Liu", "authorids": "~Ming_Zhang10;~Shenghan_Zhang1;~Zhenjie_Yang1;~Lekai_Chen1;~Jinliang_Zheng1;~Chao_Yang3;~Chuming_Li1;~Hang_Zhou9;~Yazhe_Niu1;~Yu_Liu2", "gender": "M;M;M;;M;;;M;M;M", "homepage": "https://scholar.google.com/citations?user=cPDARWYAAAAJ&hl=zh-CN;;https://github.com/jayyoung0802;;https://2toinf.github.io/;;;https://github.com/upia99;https://github.com/PaParaZz1;http://liuyu.us", "dblp": ";;;;156/3720.html;;;;252/5570.html;97/2274-15", "google_scholar": ";R_ExyUQAAAAJ;jVlRiUEAAAAJ;;3j5AHFsAAAAJ;;;;P3BUrBQAAAAJ;", "orcid": ";;;;0009-0000-0605-2969;;;;;", "linkedin": ";;;;;;;;;", "or_profile": "~Ming_Zhang10;~Shenghan_Zhang1;~Zhenjie_Yang1;~Lekai_Chen1;~Jinliang_Zheng1;~Chao_Yang3;~Chuming_Li1;~Hang_Zhou9;~Yazhe_Niu1;~Yu_Liu2", "aff": ";;Shanghai Jiaotong University;;Beijing University of Posts and Telecommunications;;;;The Chinese University of Hong Kong;SenseTime", "aff_domain": ";;sjtu.edu.cn;;bupt.edu.cn;;;;cuhk.edu.hk;sensetime.com", "position": ";;PhD student;;Undergrad student;;;;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nzhang2023gobigger,\ntitle={GoBigger: A Scalable Platform for Cooperative-Competitive Multi-Agent Interactive Simulation},\nauthor={Ming Zhang and Shenghan Zhang and Zhenjie Yang and Lekai Chen and Jinliang Zheng and Chao Yang and Chuming Li and Hang Zhou and Yazhe Niu and Yu Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=NnOZT_CR26Z}\n}", "github": "", "project": "", "reviewers": "tWZb;DcyR;3xPp", "pdf_size": 4324212, "recommendation": "3;5;6", "confidence": "5;4;4", "correctness": "3;3;3", "technical_novelty": "1;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "38;122;103", "wc_strength_and_weaknesses": "155;246;104", "wc_clarity_quality_novelty_and_reproducibility": "42;101;110", "wc_summary_review": "27;68;180", "wc_review": "262;537;497", "wc_reply_reviewers": "0;30;0", "wc_reply_authors": "775;610;325", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 87.66666666666667, 35.96603335865043 ], "wc_strength_and_weaknesses_avg": [ 168.33333333333334, 58.73291713813946 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 84.33333333333333, 30.15883876338006 ], "wc_summary_review_avg": [ 91.66666666666667, 64.66494843078092 ], "wc_review_avg": [ 432.0, 121.31226923385229 ], "wc_reply_reviewers_avg": [ 10.0, 14.142135623730951 ], "wc_reply_authors_avg": [ 570.0, 185.87630295441105 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": -0.944911182523068, "corr_recommendation_correctness": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4301292112437551166&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=NnOZT_CR26Z", "email": ";;sjtu.edu.cn;;bupt.edu.cn;;;;cuhk.edu.hk;sensetime.com", "author_num": 10, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Shanghai Jiao Tong University;Beijing University of Posts and Telecommunications;Chinese University of Hong Kong;SenseTime", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.bupt.edu.cn/;https://www.cuhk.edu.hk;https://www.sensetime.com", "aff_unique_abbr": "SJTU;BUPT;CUHK;SenseTime", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Beijing;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "No6QvMxdQMo", "title": "SELCOR: Self-Correction for Weakly Supervised Learning", "track": "main", "status": "Withdraw", "tldr": "We propose a self-training based method to reduce noise in weak labels for weakly supervised learning.", "abstract": "Powerful machine learning models often require training with large amounts of labeled data. Collecting precise labels from human supervision is expensive and time-consuming. Instead, it is much easier to obtain large-scale weak labels from multiple weak supervision sources such as rules or knowledge graphs, whereas weak labels could be noisy and make models prone to overfitting. We propose a self-training method for weakly supervised learning without using any true label. Our method learns a joint model with a corrector and a predictor, where the predictor generates pseudo clean labels and the corrector revises weak labels to reproduces the pseudo labels generated by the predictor. The joint model is trained by encouraging consistency between label generation and label correction, such that the predictor and corrector can iteratively improve each other to generate more reliable pseudo label for self-training. In this way, our method makes full use of weak labels and effectively suppresses label noise in weak labels and pseudo labels. Experiments on 8 benchmark datasets show that our method outperforms existing weakly supervised methods by large margins.", "keywords": "Weakly Supervised Learning;Label Noise;Self-Training", "primary_area": "", "supplementary_material": "/attachment/810e81483796ad930a2f812438202b9517236b2a.zip", "author": "Yang Zhou;Wee Sun Lee", "authorids": "~Yang_Zhou6;~Wee_Sun_Lee1", "gender": "M;M", "homepage": "https://yangzhou.netlify.app/;http://www.comp.nus.edu.sg/~leews/", "dblp": "07/4580-17.html;86/1498", "google_scholar": "_-cbldUAAAAJ;https://scholar.google.com.sg/citations?user=8PCrLgwAAAAJ", "orcid": "0000-0002-0873-619X;", "linkedin": ";", "or_profile": "~Yang_Zhou6;~Wee_Sun_Lee1", "aff": "Institute of High Performance Computing, Singapore, A*STAR;National University of Singapore", "aff_domain": "ihpc.a-star.edu.sg;nus.edu.sg", "position": "Researcher;Full Professor", "bibtex": "@misc{\nzhou2023selcor,\ntitle={{SELCOR}: Self-Correction for Weakly Supervised Learning},\nauthor={Yang Zhou and Wee Sun Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=No6QvMxdQMo}\n}", "github": "", "project": "", "reviewers": "baMC;5zzy;LADp", "site": "https://openreview.net/forum?id=No6QvMxdQMo", "pdf_size": 1054360, "recommendation": "3;5;5", "confidence": "4;3;4", "correctness": "3;3;3", "technical_novelty": "3;2;2", "empirical_novelty": "2;2;1", "wc_summary_paper": "100;54;32", "wc_strength_and_weaknesses": "365;221;165", "wc_clarity_quality_novelty_and_reproducibility": "95;36;24", "wc_summary_review": "20;56;26", "wc_review": "580;367;247", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 62.0, 28.331372481167705 ], "wc_strength_and_weaknesses_avg": [ 250.33333333333334, 84.243034660967 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.666666666666664, 31.030450993965413 ], "wc_summary_review_avg": [ 34.0, 15.748015748023622 ], "wc_review_avg": [ 398.0, 137.70257804413103 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oIjZ5jXEDPQJ:scholar.google.com/&scioq=SELCOR:+Self-Correction+for+Weakly+Supervised+Learning&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Institute of High Performance Computing;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.ihpc.a-star.edu.sg;https://www.nus.edu.sg", "aff_unique_abbr": "IHPC;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "id": "NpZ7TIs6ws", "title": "Defense against Backdoor Attacks via Identifying and Purifying Bad Neurons", "track": "main", "status": "Reject", "tldr": "We design a backdoor defense method that identifies and purifies the backdoored neurons of victim models with a novel yet effective metric called benign salience.", "abstract": "Recent studies reveal the vulnerability of neural networks to backdoor attacks. By embedding backdoors into the hidden neurons with poisoned training data, the backdoor attacker can override normal predictions of the victim model to the attacker-chosen ones whenever the backdoor pattern is present in a testing input. In this paper, to mitigate public concerns about the attack, we propose a novel backdoor defense via identifying and purifying the backdoored neurons of the victim neural network. Specifically, we first define a new metric, called benign salience. By combining the first-order gradient to retain the connections between neurons, benign salience can identify the backdoored neurons with high accuracy. Then, a new Adaptive Regularization (AR) mechanism is proposed to assist in purifying these identified bad neurons via fine-tuning. Due to the ability to adapt to different magnitudes of parameters, AR can provide faster and more stable convergence than the common regularization mechanisms in neuron purifying. Finally, we test the defense effect of our method on ten different backdoor attacks with three benchmark datasets. Experimental results show that our method can decrease the attack success rate by more than 95% on average, which is the best among six state-of-the-art defense methods.", "keywords": "backdoor defense;security;neuron importance evaluation", "primary_area": "", "supplementary_material": "/attachment/f2fbd5f662b2457c5088414e880728be99d4adab.zip", "author": "Mingyuan Fan;Yang Liu;Cen Chen;Ximeng Liu;Wenzhong Guo", "authorids": "~Mingyuan_Fan1;~Yang_Liu71;~Cen_Chen1;~Ximeng_Liu1;~Wenzhong_Guo1", "gender": ";M;F;M;M", "homepage": ";;https://sites.google.com/site/chencenpersonalwebsite/;http://2uu.org/;http://cmcs.fzu.edu.cn/website/f/teacherDetail?id=23", "dblp": ";;152/6215-1.html;134/3945;", "google_scholar": ";IheXzHkAAAAJ;https://scholar.google.com.sg/citations?user=3Mn4S9UAAAAJ;hWyGZU4AAAAJ;", "orcid": ";;0000-0003-0325-1705;;", "linkedin": ";;;;", "or_profile": "~Mingyuan_Fan1;~Yang_Liu71;~Cen_Chen1;~Ximeng_Liu1;~Wenzhong_Guo1", "aff": ";Xi'an University of Electronic Science and Technology;East China Normal University;Fuzhou University;Fuzhou University", "aff_domain": ";xidian.edu.cn;dase.ecnu.edu.cn;fzu.edu.cn;fzu.edu.cn", "position": ";Associate Professor;Associate Professor;Full Professor;Full Professor", "bibtex": "@misc{\nfan2023defense,\ntitle={Defense against Backdoor Attacks via Identifying and Purifying Bad Neurons},\nauthor={Mingyuan Fan and Yang Liu and Cen Chen and Ximeng Liu and Wenzhong Guo},\nyear={2023},\nurl={https://openreview.net/forum?id=NpZ7TIs6ws}\n}", "github": "", "project": "", "reviewers": "krJA;1xEU;etm6;pBHg", "site": "https://openreview.net/forum?id=NpZ7TIs6ws", "pdf_size": 4609696, "recommendation": "3;5;5;5", "confidence": "3;3;3;4", "correctness": "2;3;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;0", "wc_summary_paper": "35;138;38;59", "wc_strength_and_weaknesses": "297;4;76;113", "wc_clarity_quality_novelty_and_reproducibility": "16;18;200;7", "wc_summary_review": "38;208;10;58", "wc_review": "386;368;324;237", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "574;420;340;308", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 67.5, 41.740268326880695 ], "wc_strength_and_weaknesses_avg": [ 122.5, 108.10296018148624 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.25, 80.79101125744126 ], "wc_summary_review_avg": [ 78.5, 76.68604827476769 ], "wc_review_avg": [ 328.75, 57.57332281534565 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 410.5, 102.83360345723571 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 1.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17098752366651509266&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Xi'an University of Electronic Science and Technology;East China Normal University;Fuzhou University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.xidian.edu.cn/;http://www.ecnu.edu.cn;https://www.fznu.edu.cn", "aff_unique_abbr": "Xidian University;ECNU;FZU", "aff_campus_unique_index": "0", "aff_campus_unique": "Xi'an;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Interpretability in the Wild: a Circuit for Indirect Object Identification in GPT-2 Small", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11341", "id": "NpsVSN6o4ul", "poster": "/media/PosterPDFs/ICLR%202023/11341.png?t=1682885776.8460147", "openreview": "https://openreview.net/forum?id=NpsVSN6o4ul", "slides": "https://iclr.cc/virtual/2023/poster/11341", "video": "https://iclr.cc/virtual/2023/poster/11341", "author_site": "Kevin Wang, Alexandre Variengien, Arthur Conmy, Buck Shlegeris, Jacob Steinhardt", "tldr": "We find a large circuit for a natural language task in GPT-2 small and provide quantitative evaluation of our human-understandable explanation.", "abstract": "Research in mechanistic interpretability seeks to explain behaviors of ML models in terms of their internal components. However, most previous work either focuses on simple behaviors in small models, or describes complicated behaviors in larger models with broad strokes. In this work, we bridge this gap by presenting an explanation for how GPT-2 small performs a natural language task that requires logical reasoning: indirect object identification (IOI). Our explanation encompasses 28 attention heads grouped into 7 main classes, which we discovered using a combination of interpretability approaches including causal interventions and projections.\nTo our knowledge, this investigation is the largest end-to-end attempt at reverse-engineering a natural behavior \"in the wild\" in a language model. We evaluate the reliability of our explanation using three quantitative criteria - faithfulness, completeness and minimality. Though these criteria support our explanation, they also point to remaining gaps in our understanding. \nOur work provides evidence that a mechanistic understanding of large ML models is feasible, opening opportunities to scale our understanding to both larger models and more complex tasks.", "keywords": "Mechanistic Interpretability;Transformers;Language Models;Interpretability;Transparency;Science of ML", "primary_area": "", "supplementary_material": "", "author": "Kevin Ro Wang;Alexandre Variengien;Arthur Conmy;Buck Shlegeris;Jacob Steinhardt", "authorids": "~Kevin_Ro_Wang1;~Alexandre_Variengien1;~Arthur_Conmy1;buck@rdwrs.com;~Jacob_Steinhardt1", "gender": "M;M;M;;", "homepage": "https://kevinrowang.com/;https://avariengien.github.io/;https://arthurconmy.github.io/;;", "dblp": ";;;;35/10625", "google_scholar": ";BQOwWG8AAAAJ;;;", "orcid": "0000-0002-1113-7729;;;;", "linkedin": ";;;;", "or_profile": "~Kevin_Ro_Wang1;~Alexandre_Variengien1;~Arthur_Conmy1;buck@rdwrs.com;~Jacob_Steinhardt1", "aff": ";EPFL - EPF Lausanne;Redwood Research;;University of California, Berkeley", "aff_domain": ";epfl.ch;rdwrs.com;;berkeley.edu", "position": ";MS student;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\nwang2023interpretability,\ntitle={Interpretability in the Wild: a Circuit for Indirect Object Identification in {GPT}-2 Small},\nauthor={Kevin Ro Wang and Alexandre Variengien and Arthur Conmy and Buck Shlegeris and Jacob Steinhardt},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=NpsVSN6o4ul}\n}", "github": "", "project": "", "reviewers": "jy1a;wPfU;sZ8X;iR9D", "pdf_size": 1147268, "recommendation": "3;3;8;8", "confidence": "4;4;3;4", "correctness": "2;2;4;4", "technical_novelty": "3;2;4;3", "empirical_novelty": "3;2;4;4", "wc_summary_paper": "184;72;78;275", "wc_strength_and_weaknesses": "1004;351;115;843", "wc_clarity_quality_novelty_and_reproducibility": "283;54;211;137", "wc_summary_review": "55;34;25;123", "wc_review": "1526;511;429;1378", "wc_reply_reviewers": "0;0;0;21", "wc_reply_authors": "1662;778;228;330", "reply_reviewers": "0;0;0;1", "reply_authors": "3;1;1;1", "recommendation_avg": [ 5.5, 2.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 1.0 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 152.25, 83.70894516119529 ], "wc_strength_and_weaknesses_avg": [ 578.25, 359.7216806087729 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 171.25, 85.13041465892199 ], "wc_summary_review_avg": [ 59.25, 38.382124745771954 ], "wc_review_avg": [ 961.0, 494.6306702985572 ], "wc_reply_reviewers_avg": [ 5.25, 9.093266739736606 ], "wc_reply_authors_avg": [ 749.5, 565.9971289679834 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 472, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16774486063708325192&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=NpsVSN6o4ul", "email": ";epfl.ch;rdwrs.com;;berkeley.edu", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "EPFL;Redwood Research;University of California, Berkeley", "aff_unique_dep": ";;", "aff_unique_url": "https://www.epfl.ch;https://www.redwoodresearch.org;https://www.berkeley.edu", "aff_unique_abbr": "EPFL;Redwood Research;UC Berkeley", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Lausanne;;Berkeley", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Switzerland;United States" }, { "id": "NqaGPQXblk", "title": "Visual Transformation Telling", "track": "main", "status": "Reject", "tldr": "Visual Transformation Telling: a new task that requires to reason and describe transformations from a series of images.", "abstract": "In this paper, we propose a new visual reasoning task, called Visual Transformation Telling (VTT). Given a series of states (i.e.~images), a machine is required to describe what happened (i.e.~transformation) between every two adjacent states. Different from most existing visual reasoning tasks, which focus on state reasoning, VTT concentrates on transformation reasoning. Moreover, describing the transformation in the form of language is more natural and closer to the real application than the property change way in the previous TVR task. We collect 13,547 samples from two instructional video datasets, i.e.~CrossTask and COIN, and extract desired states and transformation descriptions to form a suitable VTT benchmark dataset. After that, we introduce an end-to-end learning model for VTT, named TTNet. TTNet consists of three components to mimic human's cognition process of reasoning transformation. First, an image encoder, e.g. CLIP, reads content from each image, then a context encoder links the image content together, and at last, a transformation decoder autoregressively generates transformation descriptions between every two adjacent images. This basic version of TTNet is difficult to meet the cognitive challenge of VTT, that is to identify abstract transformations from images with small visual differences, and the descriptive challenge, which asks to describe the transformation consistently. In response to these difficulties, we propose three strategies to improve TTNet. Specifically, TTNet leverages difference features to emphasize small visual gaps, masked transformation model to stress context by forcing attention to neighbor transformations, and auxiliary category and topic classification tasks to make transformations consistent by sharing underlying semantics among representations. We adapt some typical methods from visual storytelling and dense video captioning tasks, considering their similarity with VTT. Our experimental results show that TTNet achieves better performance on transformation reasoning. In addition, our empirical analysis demonstrates the soundness of each module in TTNet, and provides some insight into transformation reasoning.", "keywords": "visual reasoning;transformation;captioning", "primary_area": "", "supplementary_material": "/attachment/6b191feb564028df6d02482a5352762acf02b7c9.zip", "author": "Xin Hong;Yanyan Lan;Liang Pang;Jiafeng Guo;Xueqi Cheng", "authorids": "~Xin_Hong1;~Yanyan_Lan2;~Liang_Pang1;~Jiafeng_Guo1;~Xueqi_Cheng1", "gender": "M;;M;M;M", "homepage": "https://hongxin2019.github.io;;https://pl8787.github.io/;http://www.bigdatalab.ac.cn/gjf/;https://people.ucas.ac.cn/~cxq?language=en", "dblp": "54/1309;00/6040.html;37/11078;02/146;44/912", "google_scholar": "gW-9WOQAAAAJ;;1dgQHBkAAAAJ;https://scholar.google.com/citations?view_op=list_works;hY8aLqAAAAAJ", "orcid": "0000-0003-1524-9362;;0000-0003-1161-8546;;", "linkedin": ";;;;", "or_profile": "~Xin_Hong1;~Yanyan_Lan2;~Liang_Pang1;~Jiafeng_Guo1;~Xueqi_Cheng1", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Tsinghua University;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technolgy, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy", "aff_domain": "ict.ac.cn;tsinghua.edu.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "position": "PhD student;Full Professor;Associate Professor;Researcher;Full Professor", "bibtex": "@misc{\nhong2023visual,\ntitle={Visual Transformation Telling},\nauthor={Xin Hong and Yanyan Lan and Liang Pang and Jiafeng Guo and Xueqi Cheng},\nyear={2023},\nurl={https://openreview.net/forum?id=NqaGPQXblk}\n}", "github": "", "project": "", "reviewers": "NhJc;ipHd;tWf2", "site": "https://openreview.net/forum?id=NqaGPQXblk", "pdf_size": 8712622, "recommendation": "5;5;6", "confidence": "4;4;3", "correctness": "3;3;3", "technical_novelty": "3;2;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "64;42;132", "wc_strength_and_weaknesses": "514;222;890", "wc_clarity_quality_novelty_and_reproducibility": "78;44;67", "wc_summary_review": "91;59;143", "wc_review": "747;367;1232", "wc_reply_reviewers": "99;0;0", "wc_reply_authors": "1490;692;1308", "reply_reviewers": "1;0;0", "reply_authors": "4;3;3", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 79.33333333333333, 38.30868772948971 ], "wc_strength_and_weaknesses_avg": [ 542.0, 273.42762601219846 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.0, 14.165686240583852 ], "wc_summary_review_avg": [ 97.66666666666667, 34.61534662865912 ], "wc_review_avg": [ 782.0, 354.00094161833334 ], "wc_reply_reviewers_avg": [ 33.0, 46.66904755831214 ], "wc_reply_authors_avg": [ 1163.3333333333333, 341.4648184252987 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.9999999999999997, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8492190954247441662&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences;Tsinghua University", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "CAS;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "NrJ-x9KbdZ", "title": "Your Denoising Implicit Model is a Sub-optimal Ensemble of Denoising Predictions", "track": "main", "status": "Reject", "tldr": "", "abstract": "Denoising diffusion models construct a Markov denoising process to learn the transport from Gaussian noise distribution to the data distribution, however require thousands of denoising steps to achieve the SOTA generative performance. Denoising diffusion implicit models (DDIMs) introduce non-Markovian process to largely reduce the required steps, but its performance degenerates as the sampling steps further reducing. In this work, we show that DDIMs belong to our $\\textit{ensemble denoising implicit models}$ which heavily rely on the convex ensemble of obtained denoising predictions. We propose improved DDIM (iDDIM) to demonstrate DDIMs adopt sub-optimal ensemble coefficients. The iDDIM can largely improve on DDIMs, but still deteriorates in the case of a few sampling steps. Thus we further propose $\\textit{generalized denoising implicit model}$ (GDIM) that replace the ensemble prediction with a probabilistic inference conditioned on the obtained states. Then a specific instance $t$-GDIM that only depends on the latest state is parameterized by the conditional energy-based model (EBM) and variational sampler. The models are jointly trained with variational maximum likelihood. Extensive experiments show $t$-GDIM can reduces the sampling steps to only 4 and remains comparable generative quality to other generative models.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/d859279493c397c5512a6a2f5b44766b7f41ea78.zip", "author": "Ge Kan;Jinhu Lu;Tian Wang;Lei Huang;Baochang Zhang;Aichun Zhu;Hichem Snoussi", "authorids": "~Ge_Kan1;~Jinhu_Lu1;~Tian_Wang2;~Lei_Huang1;~Baochang_Zhang1;~Aichun_Zhu1;~Hichem_Snoussi1", "gender": "M;M;M;M;M;M;M", "homepage": ";http://www.futureforum.org.cn/cn/people/152.html;;https://huangleibuaa.github.io/;https://dblp.uni-trier.de/pid/80/3887-1.html;;https://recherche.utt.fr/research-directory/hichem-snoussi", "dblp": ";https://dblp.uni-trier.de/pid/25/6209;;18/1763-15;https://dblp.uni-trier.de/pid/80/3887-1.html;154/1314.html;11/2394", "google_scholar": ";mCjNN7kAAAAJ;IP_f-voAAAAJ;https://scholar.google.com.hk/citations?user=yTshbKkAAAAJ;;https://scholar.google.cz/citations?view_op=list_works;8opMdjMAAAAJ", "orcid": "0000-0002-7652-4017;;;;;;", "linkedin": ";;;;;;hichem-snoussi-utt/?locale=en_US", "or_profile": "~Ge_Kan1;~Jinhu_Lu1;~Tian_Wang2;~Lei_Huang1;~Baochang_Zhang1;~Aichun_Zhu1;~Hichem_Snoussi1", "aff": "Beihang University;Beihang University;Beihang University, China;Beihang University;Beihang University;Nanjing Tech University;Universit\u00e9 de Technologie de Troyes, France", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;njtech.edu.cn;utt.fr", "position": "MS student;Full Professor;Associate Professor;Associate Professor;Professor;Associate Professor;Full Professor", "bibtex": "@misc{\nkan2023your,\ntitle={Your Denoising Implicit Model is a Sub-optimal Ensemble of Denoising Predictions},\nauthor={Ge Kan and Jinhu Lu and Tian Wang and Lei Huang and Baochang Zhang and Aichun Zhu and Hichem Snoussi},\nyear={2023},\nurl={https://openreview.net/forum?id=NrJ-x9KbdZ}\n}", "github": "", "project": "", "reviewers": "fQjf;UwWS;sayW;CiYD", "site": "https://openreview.net/forum?id=NrJ-x9KbdZ", "pdf_size": 5223148, "recommendation": "5;5;5;6", "confidence": "4;3;4;4", "correctness": "3;4;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "0;2;2;3", "wc_summary_paper": "164;64;128;176", "wc_strength_and_weaknesses": "607;41;130;455", "wc_clarity_quality_novelty_and_reproducibility": "272;126;96;36", "wc_summary_review": "80;10;46;54", "wc_review": "1123;241;400;721", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1003;470;506;465", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 133.0, 43.57751713900185 ], "wc_strength_and_weaknesses_avg": [ 308.25, 231.29134765485716 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 132.5, 86.81445732134712 ], "wc_summary_review_avg": [ 47.5, 25.034975534240093 ], "wc_review_avg": [ 621.25, 337.35913727065406 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 611.0, 226.87331266590172 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bd_s7EYXruUJ:scholar.google.com/&scioq=Your+Denoising+Implicit+Model+is+a+Sub-optimal+Ensemble+of+Denoising+Predictions&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;1;2", "aff_unique_norm": "Beihang University;Nanjing Tech University;Universit\u00e9 de Technologie de Troyes", "aff_unique_dep": ";;", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.njtech.edu.cn;https://wwwutt.fr", "aff_unique_abbr": "BUAA;Nanjing Tech;UTT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "China;France" }, { "id": "Nvlqsofsc6-", "title": "A Neural PDE Solver with Temporal Stencil Modeling", "track": "main", "status": "Reject", "tldr": "We propose a novel Temporal Stencil Modeling (TSM) method for solving time-dependent PDEs in conservation form.", "abstract": "Numerical simulation of non-linear partial differential equations plays a crucial role in modeling physical science and engineering phenomena, such as weather, climate, and aerodynamics. Recent Machine Learning (ML) models trained on low-resolution spatio-temporal signals have shown new promises in capturing important dynamics in high-resolution signals, under the condition that the models can effectively recover the missing details. However, this study shows that significant information is often lost in the low-resolution down-sampled features. To address such issues, we propose a new approach, namely Temporal Stencil Modeling (TSM), which combines the strengths of advanced time-series sequence modeling (with the HiPPO features) and state-of-the-art neural PDE solvers (with learnable stencil modeling). TSM aims to recover the lost information from the PDE trajectories and can be regarded as a temporal generalization of classic finite volume methods such as WENO. Our experimental results show that TSM achieves the new state-of-the-art simulation accuracy for 2-D incompressible Navier-Stokes turbulent flows: it significantly outperforms the previously reported best results by 19.9% in terms of the highly-correlated duration time, and reduces the inference latency into 80%. We also show a strong generalization ability of the proposed method to various out-of-distribution turbulent flow settings.", "keywords": "neural PDE solver;Navier-Stokes equation;turbulent flow;Computational Fluid Dynamics;CFD", "primary_area": "", "supplementary_material": "", "author": "Zhiqing Sun;Yiming Yang;Shinjae Yoo", "authorids": "~Zhiqing_Sun1;~Yiming_Yang1;~Shinjae_Yoo1", "gender": "M;F;M", "homepage": "https://www.cs.cmu.edu/~zhiqings/;http://www.cs.cmu.edu/~yiming/;", "dblp": "211/7692;25/1666;69/1062", "google_scholar": "https://scholar.google.com/citations?hl=en;MlZq4XwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0001-8322-607X;", "linkedin": "zhiqing-sun-5781b3100/;yiming-yang-24100924/;", "or_profile": "~Zhiqing_Sun1;~Yiming_Yang1;~Shinjae_Yoo1", "aff": "Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Brookhaven National Lab", "aff_domain": "cs.cmu.edu;cs.cmu.edu;bnl.gov", "position": "PhD student;Full Professor;Scientist", "bibtex": "@misc{\nsun2023a,\ntitle={A Neural {PDE} Solver with Temporal Stencil Modeling},\nauthor={Zhiqing Sun and Yiming Yang and Shinjae Yoo},\nyear={2023},\nurl={https://openreview.net/forum?id=Nvlqsofsc6-}\n}", "github": "", "project": "", "reviewers": "41X5;PBxt;EevF;ay4D", "site": "https://openreview.net/forum?id=Nvlqsofsc6-", "pdf_size": 3662109, "recommendation": "6;6;6;8", "confidence": "3;4;4;2", "correctness": "3;2;3;4", "technical_novelty": "2;4;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "42;171;47;46", "wc_strength_and_weaknesses": "129;303;185;106", "wc_clarity_quality_novelty_and_reproducibility": "8;212;17;13", "wc_summary_review": "41;133;151;29", "wc_review": "220;819;400;194", "wc_reply_reviewers": "21;0;0;0", "wc_reply_authors": "259;498;454;112", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 76.5, 54.59166603063145 ], "wc_strength_and_weaknesses_avg": [ 180.75, 76.20490469779487 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.5, 86.37273875477146 ], "wc_summary_review_avg": [ 88.5, 54.043963585214584 ], "wc_review_avg": [ 408.25, 250.06236722065958 ], "wc_reply_reviewers_avg": [ 5.25, 9.093266739736606 ], "wc_reply_authors_avg": [ 330.75, 155.05059658060011 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.8703882797784891, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2413546325457705006&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1", "aff_unique_norm": "Carnegie Mellon University;Brookhaven National Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.bnl.gov", "aff_unique_abbr": "CMU;BNL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "NxPQ3QOGTWl", "title": "KALM: Knowledge-Aware Integration of Local, Document, and Global Contexts for Long Document Understanding", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "With the advent of pre-trained language models (LMs), increasing research efforts have been focusing on infusing commonsense and domain-specific knowledge to prepare LMs for downstream tasks. These works attempt to leverage knowledge graphs, the \\textit{de facto} standard of symbolic knowledge representation, along with pre-trained LMs. While existing approaches leverage external knowledge, it remains an open question how to jointly incorporate knowledge graphs represented in varying contexts --- from local (e.g., sentence), document-level, to global knowledge, to enable knowledge-rich and interpretable exchange across contexts. In addition, incorporating varying contexts can especially benefit long document understanding tasks that leverage pre-trained LMs, typically bounded by the input sequence length. In light of these challenges, we propose \\textbf{KALM}, a language model that jointly leverages knowledge in local, document-level, and global contexts for long document understanding. KALM firstly encodes long documents and knowledge graphs into the three knowledge-aware context representations. KALM then processes each context with context-specific layers. These context-specific layers are followed by a ContextFusion layer that facilitates knowledge exchange to derive an overarching document representation. Extensive experiments demonstrate that KALM achieves state-of-the-art performance on three long document understanding tasks across 6 datasets/settings. Further analyses reveal that the three knowledge-aware contexts are complementary and they all contribute to model performance, while the importance and information exchange patterns of different contexts vary on different tasks and datasets.", "keywords": "natural language processing;long document understanding;knowledge graphs", "primary_area": "", "supplementary_material": "/attachment/2491598f6fae41c164927125eb152e20ab07fdac.zip", "author": "Shangbin Feng;Zhaoxuan Tan;Wenqian Zhang;Zhenyu Lei;Yulia Tsvetkov", "authorids": "~Shangbin_Feng1;~Zhaoxuan_Tan1;~Wenqian_Zhang1;~Zhenyu_Lei1;~Yulia_Tsvetkov1", "gender": "M;M;M;M;F", "homepage": "https://bunsenfeng.github.io/;https://tamsiuhin.github.io/;;;https://homes.cs.washington.edu/~yuliats/", "dblp": "295/9571;301/7706;137/6026;229/5143-4;75/8157", "google_scholar": "Y3rLP9UAAAAJ;0KE2CZsAAAAJ;M2-UAYUAAAAJ;;SEDPkrsAAAAJ", "orcid": "0000-0002-4133-1987;0000-0001-8230-6238;;0000-0002-5606-3268;0000-0002-4634-7128", "linkedin": ";zhaoxuan-tan-927132213/;;zhenyu-lei-aaa386229/;", "or_profile": "~Shangbin_Feng1;~Zhaoxuan_Tan1;~Wenqian_Zhang1;~Zhenyu_Lei1;~Yulia_Tsvetkov1", "aff": "University of Washington;Xi'an Jiaotong University;Xi'an Jiaotong University;University of Virginia, Charlottesville;Department of Computer Science, University of Washington", "aff_domain": "cs.washington.edu;xjtu.edu.cn;xjtu.edu.cn;virginia.edu;cs.washington.edu", "position": "PhD student;Undergrad student;Undergrad student;PhD student;Assistant Professor", "bibtex": "@misc{\nfeng2023kalm,\ntitle={{KALM}: Knowledge-Aware Integration of Local, Document, and Global Contexts for Long Document Understanding},\nauthor={Shangbin Feng and Zhaoxuan Tan and Wenqian Zhang and Zhenyu Lei and Yulia Tsvetkov},\nyear={2023},\nurl={https://openreview.net/forum?id=NxPQ3QOGTWl}\n}", "github": "", "project": "", "reviewers": "Smxx;Lyf3;s7vY;XCQW;8cof", "site": "https://openreview.net/forum?id=NxPQ3QOGTWl", "pdf_size": 976659, "recommendation": "5;5;6;6;6", "confidence": "5;4;5;4;4", "correctness": "3;3;3;4;3", "technical_novelty": "2;3;2;3;2", "empirical_novelty": "2;3;2;3;2", "wc_summary_paper": "70;228;170;77;107", "wc_strength_and_weaknesses": "267;661;109;26;207", "wc_clarity_quality_novelty_and_reproducibility": "10;98;2;108;85", "wc_summary_review": "27;141;2;46;33", "wc_review": "374;1128;283;257;432", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "1921;3630;1273;1361;1624", "reply_reviewers": "0;0;0;0;0", "reply_authors": "6;10;5;4;6", "recommendation_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 130.4, 60.2348736198558 ], "wc_strength_and_weaknesses_avg": [ 254.0, 219.56138093936283 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.6, 45.2442261509687 ], "wc_summary_review_avg": [ 49.8, 47.788701593577535 ], "wc_review_avg": [ 494.8, 322.76517779958857 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1961.8, 864.1841007563146 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 6.2, 2.039607805437114 ], "replies_avg": [ 38, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.16666666666666663, "corr_recommendation_correctness": 0.408248290463863, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5871906016288780253&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;1;2;0", "aff_unique_norm": "University of Washington;Xi'an Jiao Tong University;University of Virginia", "aff_unique_dep": ";;", "aff_unique_url": "https://www.washington.edu;https://www.xjtu.edu.cn;https://www.virginia.edu", "aff_unique_abbr": "UW;XJTU;UVA", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Charlottesville;Seattle", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "United States;China" }, { "id": "NxnYzayR2CW", "title": "Personalized Semantics Excitation for Federated Image Classification", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Federated learning casts a light on the collaboration of distributed local clients with privacy protected to attain a more generic global model. However, significant distribution shift in input/label space across different clients makes it challenging to well generalize to all clients, which motivates personalized federated learning (PFL). Existing PFL methods typically customize the local model by fine-tuning with limited local supervision and the global model regularizer, which secures local specificity but risks ruining the global discriminative knowledge. In this paper, we propose a novel Personalized Semantics Excitation ($\\textbf{PSE}$) mechanism to breakthrough this limitation by exciting and fusing $\\textit{personalized}$ semantics from the global model during local model customization. Specifically, PSE explores channel-wise gradient differentiation across global and local models to identify important low-level semantics mostly from convolutional layers which are embedded into the client-specific training. In addition, PSE deploys the collaboration of global and local models to enrich high-level feature representations and facilitate the robustness of client classifier through a cross-model attention module. Extensive experiments and analysis on various image classification benchmarks demonstrate the effectiveness and advantage of our method over the state-of-the-art PFL methods.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/6d931965799bc15280409341579c595ce22b5331.zip", "author": "Haifeng Xia;Kai Li;Zhengming Ding", "authorids": "~Haifeng_Xia2;~Kai_Li11;~Zhengming_Ding5", "gender": "M;;M", "homepage": ";;http://www.cs.tulane.edu/~zding1/", "dblp": "191/6730.html;https://dblp.uni-trier.de/pers/hd/l/Li_0012:Kai;122/3547", "google_scholar": "41LFIbQAAAAJ;YsROc4UAAAAJ;TKbyRRsAAAAJ", "orcid": ";;0000-0002-6994-5278", "linkedin": ";;", "or_profile": "~Haifeng_Xia2;~Kai_Li11;~Zhengming_Ding5", "aff": "Southeast University;NEC-Labs;Tulane University", "aff_domain": "seu.edu.cn;nec-labs.com;tulane.edu", "position": "Associate Professor;NEC Labs, America;Assistant Professor", "bibtex": "@misc{\nxia2023personalized,\ntitle={Personalized Semantics Excitation for Federated Image Classification},\nauthor={Haifeng Xia and Kai Li and Zhengming Ding},\nyear={2023},\nurl={https://openreview.net/forum?id=NxnYzayR2CW}\n}", "github": "", "project": "", "reviewers": "ESVb;DVj1;9UF8;Ahnu", "site": "https://openreview.net/forum?id=NxnYzayR2CW", "pdf_size": 772267, "recommendation": "3;5;5;8", "confidence": "3;5;3;4", "correctness": "2;3;4;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;0;2;3", "wc_summary_paper": "111;47;41;48", "wc_strength_and_weaknesses": "142;245;441;76", "wc_clarity_quality_novelty_and_reproducibility": "683;19;54;8", "wc_summary_review": "43;45;39;50", "wc_review": "979;356;575;182", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 61.75, 28.560243346302215 ], "wc_strength_and_weaknesses_avg": [ 226.0, 137.96919946132905 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 191.0, 284.56370112858735 ], "wc_summary_review_avg": [ 44.25, 3.960744879438715 ], "wc_review_avg": [ 523.0, 297.8296492963721 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.37998029782867415, "corr_recommendation_correctness": 0.39605901719066966, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5425251419602338540&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Southeast University;NEC Laboratories;Tulane University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.seu.edu.cn/;https://www.nec-labs.com;https://www.tulane.edu", "aff_unique_abbr": "SEU;NEC-Labs;Tulane", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "id": "NxnqA1iKeT", "title": "From Distance to Dependency: A Paradigm Shift of Full-reference Image Quality Assessment", "track": "main", "status": "Withdraw", "tldr": "Beyond distance measure, we propose the first Deep Image Dependency (DID) based full-reference image quality assessment model to capture transformation-invariant texture perception.", "abstract": "Deep learning-based full-reference image quality assessment (FR-IQA) models typically rely on the feature distance between the reference and distorted images. However, the underlying assumption of these models that the distance in the deep feature domain could quantify the quality degradation does not scientifically align with the invariant texture perception, especially when the images are generated artificially by neural networks. In this paper, we bring a radical shift in inferring the quality with learned features and propose the Deep Image Dependency (DID) based FR-IQA model. The feature dependency facilitates the comparisons of deep learning features in a high-order manner with Brownian distance covariance, which is characterized by the joint distribution of the features from reference and test images, as well as their marginal distributions. This enables the quantification of the feature dependency against nonlinear transformation, which is far beyond the computation of the numerical errors in the feature space. Experiments on image quality prediction, texture image similarity, and geometric invariance validate the appealing performance of our proposed measure, and the implementation will be publicly available.", "keywords": "Image quality assessment;brownian distance covariance;distance dependency", "primary_area": "", "supplementary_material": "", "author": "Hanwei Zhu;Baoliang Chen;Lingyu Zhu;Zhaopeng Feng;Shiqi Wang", "authorids": "~Hanwei_Zhu1;~Baoliang_Chen2;lingyzhu-c@my.cityu.edu.hk;zhaopengfeng@stu.hit.edu.cn;~Shiqi_Wang1", "gender": "M;;;;M", "homepage": "https://h4nwei.github.io/;;;;https://www.cs.cityu.edu.hk/~shiqwang/", "dblp": "214/8898;;;;58/9145-1", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;Pr7s2VUAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Hanwei_Zhu1;~Baoliang_Chen2;lingyzhu-c@my.cityu.edu.hk;zhaopengfeng@stu.hit.edu.cn;~Shiqi_Wang1", "aff": "City University of Hong Kong;;;;City University of Hong Kong", "aff_domain": "cityu.edu.hk;;;;cityu.edu.hk", "position": "PhD student;;;;Assistant Professor", "bibtex": "@misc{\nzhu2023from,\ntitle={From Distance to Dependency: A Paradigm Shift of Full-reference Image Quality Assessment},\nauthor={Hanwei Zhu and Baoliang Chen and Lingyu Zhu and Zhaopeng Feng and Shiqi Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=NxnqA1iKeT}\n}", "github": "", "project": "", "reviewers": "9yRh;HeoU;LoKr", "site": "https://openreview.net/forum?id=NxnqA1iKeT", "pdf_size": 5423473, "recommendation": "3;5;6", "confidence": "4;4;4", "correctness": "3;2;3", "technical_novelty": "2;2;3", "empirical_novelty": "1;3;3", "wc_summary_paper": "43;95;108", "wc_strength_and_weaknesses": "128;532;170", "wc_clarity_quality_novelty_and_reproducibility": "18;118;41", "wc_summary_review": "10;97;29", "wc_review": "199;842;348", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_paper_avg": [ 82.0, 28.083209693100727 ], "wc_strength_and_weaknesses_avg": [ 276.6666666666667, 181.36029211366957 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.0, 42.7629122799964 ], "wc_summary_review_avg": [ 45.333333333333336, 37.3482113211448 ], "wc_review_avg": [ 463.0, 274.81023755796775 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.18898223650461363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5520909016587284194&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "City University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cityu.edu.hk", "aff_unique_abbr": "CityU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "NxpyLebsLAR", "title": "DELVING INTO THE HIERARCHICAL STRUCTURE FOR EFFICIENT LARGE-SCALE BI-LEVEL LEARNING", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent years have witnessed growing interest and emerging successes of bi-level learning in a wide range of applications, such as meta learning and hyper-parameter optimization. While current bi-level learning approaches suffer from high memory and computation costs especially for large-scale deep learning scenarios, which is due to the hierarchical optimization therein. {\\textit {It is therefore interesting to know whether the hierarchical structure can be untied for efficient learning}.} To answer this question, we introduce NSGame that, transforming the hierarchical bi-level learning problem into a parallel Nash game, incorporates the tastes of hierarchy by a very small scale Stackelberg game.\nWe prove that strong differential Stackelberg equilibrium (SDSE) of the bi-level learning problem corresponds to local Nash equilibrium of the NSGame. To obtain such SDSE from NSGame, we introduce a two-time scale stochastic gradient descent (TTS-SGD) method, and provide theoretical guarantee that local Nash equilibrium obtained by the TTS-SGD method is SDSE of the bi-level learning problem. We compare NSGame with representative bi-level learning models, such as MWN and MLC, experimental results on class imbalance learning and noisy label learning have verified that the proposed NSGame achieves comparable and even better results than the corresponding meta learning models, while NSGame is computationally more efficient.", "keywords": "Bi-level optimization;Meta learning;Nash game", "primary_area": "", "supplementary_material": "", "author": "Xixi Jia;Renzhen Wang;Deyu Meng;Xiangchu Feng", "authorids": "~Xixi_Jia2;~Renzhen_Wang1;~Deyu_Meng1;~Xiangchu_Feng1", "gender": "M;M;M;M", "homepage": ";;http://dymeng.gr.xjtu.edu.cn;", "dblp": "216/9686;242/6299;22/5614;80/4229", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=QZ1-nnwAAAAJ;an6w-64AAAAJ;https://scholar.google.com.hk/citations?user=SuG38OIAAAAJ", "orcid": ";;0000-0002-1294-8283;", "linkedin": ";;;", "or_profile": "~Xixi_Jia2;~Renzhen_Wang1;~Deyu_Meng1;~Xiangchu_Feng1", "aff": "Xidian University;Xi'an Jiaotong University;Xi'an Jiaotong University;", "aff_domain": "xidian.edu.cn;xjtu.edu.cn;xjtu.edu.cn;", "position": "Associate Professor;Assistant Professor;Full Professor;", "bibtex": "@misc{\njia2023delving,\ntitle={{DELVING} {INTO} {THE} {HIERARCHICAL} {STRUCTURE} {FOR} {EFFICIENT} {LARGE}-{SCALE} {BI}-{LEVEL} {LEARNING}},\nauthor={Xixi Jia and Renzhen Wang and Deyu Meng and Xiangchu Feng},\nyear={2023},\nurl={https://openreview.net/forum?id=NxpyLebsLAR}\n}", "github": "", "project": "", "reviewers": "j9bH;4hKq;gPRh", "site": "https://openreview.net/forum?id=NxpyLebsLAR", "pdf_size": 1183753, "recommendation": "3;5;5", "confidence": "3;2;3", "correctness": "2;3;3", "technical_novelty": "2;3;2", "empirical_novelty": "2;2;0", "wc_summary_paper": "67;200;137", "wc_strength_and_weaknesses": "265;345;247", "wc_clarity_quality_novelty_and_reproducibility": "218;61;186", "wc_summary_review": "25;21;48", "wc_review": "575;627;618", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 134.66666666666666, 54.32208472517314 ], "wc_strength_and_weaknesses_avg": [ 285.6666666666667, 42.59368758656876 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 155.0, 67.73969786370964 ], "wc_summary_review_avg": [ 31.333333333333332, 11.897712198383164 ], "wc_review_avg": [ 606.6666666666666, 22.69116323349001 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7070212028498532866&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Xidian University;Xi'an Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "http://www.xidian.edu.cn/;https://www.xjtu.edu.cn", "aff_unique_abbr": "Xidian;XJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "FIGARO: Controllable Music Generation using Learned and Expert Features", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10959", "id": "NyR8OZFHw6i", "poster": "", "openreview": "https://openreview.net/forum?id=NyR8OZFHw6i", "slides": "https://iclr.cc/virtual/2023/poster/10959", "video": "https://iclr.cc/virtual/2023/poster/10959", "author_site": "Dimitri von R\u00fctte, Luca Biggio, Yannic Kilcher, Thomas Hofmann", "tldr": "We achieve state-of-the-art results in symbolic music style transfer by enabling human-interpretable control over the generation process while improving sample quality at the same time.", "abstract": "Recent symbolic music generative models have achieved significant improvements in the quality of the generated samples. Nevertheless, it remains hard for users to control the output in such a way that it matches their expectation. To address this limitation, high-level, human-interpretable conditioning is essential. In this work, we release FIGARO, a Transformer-based conditional model trained to generate symbolic music based on a sequence of high-level control codes. To this end, we propose description-to-sequence learning, which consists of automatically extracting fine-grained, human-interpretable features (the description) and training a sequence-to-sequence model to reconstruct the original sequence given only the description as input. FIGARO achieves state-of-the-art performance in multi-track symbolic music generation both in terms of style transfer and sample quality. We show that performance can be further improved by combining human-interpretable with learned features. Our extensive experimental evaluation shows that FIGARO is able to generate samples that closely adhere to the content of the input descriptions, even when they deviate significantly from the training distribution.", "keywords": "symbolic music;style transfer;music generation;controllable generation;human-interpretability;self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/bd9381f3f9c72c27f1e6eb72cf79123daa5e9dc1.zip", "author": "Dimitri von R\u00fctte;Luca Biggio;Yannic Kilcher;Thomas Hofmann", "authorids": "~Dimitri_von_R\u00fctte1;~Luca_Biggio1;~Yannic_Kilcher1;~Thomas_Hofmann1", "gender": ";M;M;M", "homepage": ";;;http://www.da.inf.ethz.ch/", "dblp": ";279/2333;https://dblp.org/pers/k/Kilcher:Yannic.html;h/ThHofmann", "google_scholar": "wVQcUf8AAAAJ;6HtmuegAAAAJ;;T3hAyLkAAAAJ", "orcid": ";;;", "linkedin": "dimitri-von-r%C3%BCtte-890633215/;;;thomas-hofmann-1ab2402/", "or_profile": "~Dimitri_von_R\u00fctte1;~Luca_Biggio1;~Yannic_Kilcher1;~Thomas_Hofmann1", "aff": "ETH Zurich;Swiss Federal Institute of Technology;DeepJudge;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;ethz.ch;deepjudge.ai;ethz.ch", "position": "MS student;PhD student;CTO;Full Professor", "bibtex": "@inproceedings{\nr{\\\"u}tte2023figaro,\ntitle={{FIGARO}: Controllable Music Generation using Learned and Expert Features},\nauthor={Dimitri von R{\\\"u}tte and Luca Biggio and Yannic Kilcher and Thomas Hofmann},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=NyR8OZFHw6i}\n}", "github": "", "project": "", "reviewers": "g33b;GHu3;W9Fj;itPu", "pdf_size": 1003100, "recommendation": "5;6;6;8", "confidence": "4;4;3;2", "correctness": "3;4;4;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "69;54;150;39", "wc_strength_and_weaknesses": "188;287;137;39", "wc_clarity_quality_novelty_and_reproducibility": "305;22;45;113", "wc_summary_review": "29;40;69;53", "wc_review": "591;403;401;244", "wc_reply_reviewers": "0;0;0;24", "wc_reply_authors": "1103;416;778;128", "reply_reviewers": "0;0;0;1", "reply_authors": "2;1;1;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 78.0, 42.90104893822527 ], "wc_strength_and_weaknesses_avg": [ 162.75, 89.51640911028548 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 121.25, 111.23932533056824 ], "wc_summary_review_avg": [ 47.75, 14.922717580923388 ], "wc_review_avg": [ 409.75, 122.92960383894516 ], "wc_reply_reviewers_avg": [ 6.0, 10.392304845413264 ], "wc_reply_authors_avg": [ 606.25, 367.8235820335613 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.899228803025897, "corr_recommendation_correctness": -0.2294157338705618, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15890934015448884903&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=NyR8OZFHw6i", "email": "ethz.ch;ethz.ch;deepjudge.ai;ethz.ch", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology;DeepJudge", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch;", "aff_unique_abbr": "ETHZ;ETH Zurich;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland;" }, { "id": "NzcUQuhEGef", "title": "Graph Contrastive Learning Under Heterophily: Utilizing Graph Filters to Generate Graph Views", "track": "main", "status": "Reject", "tldr": "We proposed HLCL, a contrastive learning framework that leverages a high-pass graph filter as our augmentation method to generate meaningful representations for heterophily graphs.", "abstract": "Graph Neural Networks have achieved tremendous success in (semi-)supervised tasks for which task-specific node labels are available. However, obtaining labels is expensive in many domains, specially as the graphs grow larger in size. Hence, there has been a growing interest in the application of self-supervised techniques, in particular contrastive learning (CL), to graph data. In general, CL methods work by maximizing the agreement between encoded augmentations of the same example, and minimizing agreement between encoded augmentations of different examples. However, we show that existing graph CL methods perform very poorly on graphs with heterophily, in which connected nodes tend to belong to different classes. First, we show that this is attributed to the ineffectiveness of existing graph augmentation methods. Then, we leverage graph filters to directly generate augmented graph views for graph CL under heterophily. In particular, instead of explicitly augmenting the graph topology and encoding the augmentations, we use a high-pass filter in the encoder to generate node representations only based on high-frequency graph signals. Then, we contrast the high-pass filtered representations with their low-pass counterparts produced by the same encoder, to generate representations. Our experimental results confirm that our proposed method, HLCL, outperforms state-of-the-art CL methods on benchmark graphs with heterophily, by up to 10%.", "keywords": "GNN;Contrastive learning;Heterophily;Graph Representation Learning", "primary_area": "", "supplementary_material": "", "author": "Wenhan Yang;Baharan Mirzasoleiman", "authorids": "~Wenhan_Yang5;~Baharan_Mirzasoleiman1", "gender": "M;F", "homepage": ";http://web.cs.ucla.edu/~baharan/", "dblp": ";52/10075", "google_scholar": ";x63j7HEAAAAJ", "orcid": ";", "linkedin": "wenhan-yang-6413981b4/;", "or_profile": "~Wenhan_Yang5;~Baharan_Mirzasoleiman1", "aff": "University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;ucla.edu", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nyang2023graph,\ntitle={Graph Contrastive Learning Under Heterophily: Utilizing Graph Filters to Generate Graph Views},\nauthor={Wenhan Yang and Baharan Mirzasoleiman},\nyear={2023},\nurl={https://openreview.net/forum?id=NzcUQuhEGef}\n}", "github": "", "project": "", "reviewers": "KBX6;32Ub;vWUj;TBxC", "site": "https://openreview.net/forum?id=NzcUQuhEGef", "pdf_size": 6114980, "recommendation": "3;3;5;8", "confidence": "4;4;4;4", "correctness": "3;2;4;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;0;3;3", "wc_summary_paper": "49;81;103;194", "wc_strength_and_weaknesses": "110;389;274;27", "wc_clarity_quality_novelty_and_reproducibility": "34;98;97;37", "wc_summary_review": "90;33;59;54", "wc_review": "283;601;533;312", "wc_reply_reviewers": "0;0;374;0", "wc_reply_authors": "605;1664;2496;8", "reply_reviewers": "0;0;1;0", "reply_authors": "3;5;5;1", "recommendation_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 106.75, 53.9090669553833 ], "wc_strength_and_weaknesses_avg": [ 200.0, 140.7355676437197 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.5, 31.0201547384922 ], "wc_summary_review_avg": [ 59.0, 20.38381711063951 ], "wc_review_avg": [ 432.25, 137.2613838630516 ], "wc_reply_reviewers_avg": [ 93.5, 161.94675050769004 ], "wc_reply_authors_avg": [ 1193.25, 957.8124490212058 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.6583123951777 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.7735449421179452, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3852004501016033918&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "NzrpxT5hTY_", "title": "FADE: Enabling Large-Scale Federated Adversarial Training on Resource-Constrained Edge Devices", "track": "main", "status": "Reject", "tldr": "We propose a novel framework to enable large-scale federated adversarial training on resource-constrained edge devices.", "abstract": "Federated adversarial training can effectively complement adversarial robustness into the privacy-preserving federated learning systems. However, the high demand for memory capacity and computing power makes large-scale federated adversarial training infeasible on resource-constrained edge devices. Few previous studies in federated adversarial training have tried to tackle both memory and computational constraints at the same time. In this paper, we propose a new framework named Federated Adversarial Decoupled Learning (FADE) to enable AT on resource-constrained edge devices. FADE decouples the entire model into small modules to fit into the resource budget of each edge device respectively, and each device only needs to perform AT on a single module in each communication round. We also propose an auxiliary weight decay to alleviate objective inconsistency and achieve better accuracy-robustness balance in FADE. FADE offers a theoretical guarantee for convergence and adversarial robustness, and our experimental results show that FADE can significantly reduce the consumption of memory and computing power while maintaining accuracy and robustness.", "keywords": "Federated Learning;Adversarial Training", "primary_area": "", "supplementary_material": "", "author": "Minxue Tang;Jianyi Zhang;Mingyuan Ma;Louis DiValentin;Aolin Ding;Amin Hassanzadeh;Hai Li;Yiran Chen", "authorids": "~Minxue_Tang2;~Jianyi_Zhang1;~Mingyuan_Ma1;~Louis_DiValentin1;~Aolin_Ding1;~Amin_Hassanzadeh2;~Hai_Li1;~Yiran_Chen1", "gender": "M;;M;;;M;F;M", "homepage": ";https://jayzhang42.github.io/;https://cei.pratt.duke.edu;https://www.linkedin.com/in/louis-divalentin-991abb28/;;;https://ece.duke.edu/faculty/hai-helen-li;https://ece.duke.edu/people/yiran-chen/", "dblp": "250/9350;;;;;;30/5330-1;80/1641", "google_scholar": "LQ5jYxMAAAAJ;https://scholar.google.com/citations?hl=en;;;;PMLDZB0AAAAJ;E6Tpfq8AAAAJ;", "orcid": ";;;;;;0000-0003-3228-6544;0000-0002-1486-8412", "linkedin": ";;;louis-divalentin-991abb28/;;aminhassanzadeh/;;", "or_profile": "~Minxue_Tang2;~Jianyi_Zhang1;~Mingyuan_Ma1;~Louis_DiValentin1;~Aolin_Ding1;~Amin_Hassanzadeh2;~Hai_Li1;~Yiran_Chen1", "aff": "Duke University;Pittsburgh Supercomputing Center & Duke University;Duke University;;;Accenture;Duke University;Duke University", "aff_domain": "duke.edu;duke.edu;duke.edu;;;accenture.com;duke.edu;duke.edu", "position": "PhD student;Principal investigator;PhD student;;;Researcher;Professor;Professor", "bibtex": "@misc{\ntang2023fade,\ntitle={{FADE}: Enabling Large-Scale Federated Adversarial Training on Resource-Constrained Edge Devices},\nauthor={Minxue Tang and Jianyi Zhang and Mingyuan Ma and Louis DiValentin and Aolin Ding and Amin Hassanzadeh and Hai Li and Yiran Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=NzrpxT5hTY_}\n}", "github": "", "project": "", "reviewers": "8e8c;iHWT;xYLU", "site": "https://openreview.net/forum?id=NzrpxT5hTY_", "pdf_size": 526601, "recommendation": "6;6;6", "confidence": "4;4;4", "correctness": "3;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;2", "wc_summary_paper": "60;41;84", "wc_strength_and_weaknesses": "309;185;234", "wc_clarity_quality_novelty_and_reproducibility": "154;22;135", "wc_summary_review": "77;95;33", "wc_review": "600;343;486", "wc_reply_reviewers": "0;0;52", "wc_reply_authors": "767;533;783", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 61.666666666666664, 17.594190960528863 ], "wc_strength_and_weaknesses_avg": [ 242.66666666666666, 50.99237415753675 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 103.66666666666667, 58.265675048770255 ], "wc_summary_review_avg": [ 68.33333333333333, 26.042699979499478 ], "wc_review_avg": [ 476.3333333333333, 105.14223170966503 ], "wc_reply_reviewers_avg": [ 17.333333333333332, 24.51303508113365 ], "wc_reply_authors_avg": [ 694.3333333333334, 114.26674445738308 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12950288469325911206&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;0;0", "aff_unique_norm": "Duke University;Pittsburgh Supercomputing Center;Accenture", "aff_unique_dep": ";;", "aff_unique_url": "https://www.duke.edu;https://www.psc.edu;https://www.accenture.com", "aff_unique_abbr": "Duke;PSC;Accenture", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Words are all you need? Language as an approximation for human similarity judgments", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11371", "id": "O-G91-4cMdv", "poster": "", "openreview": "https://openreview.net/forum?id=O-G91-4cMdv", "slides": "https://iclr.cc/virtual/2023/poster/11371", "video": "https://iclr.cc/virtual/2023/poster/11371", "author_site": "Raja Marjieh, Pol van Rijn, Ilia Sucholutsky, Theodore Sumers, Harin Lee, Thomas L. Griffiths, Nori Jacoby", "tldr": "We show that machine embeddings of text descriptions can predict human similarity judgments better than models trained from images, audio and video.", "abstract": "Human similarity judgments are a powerful supervision signal for machine learning applications based on techniques such as contrastive learning, information retrieval, and model alignment, but classical methods for collecting human similarity judgments are too expensive to be used at scale. Recent methods propose using pre-trained deep neural networks (DNNs) to approximate human similarity, but pre-trained DNNs may not be available for certain domains (e.g., medical images, low-resource languages) and their performance in approximating human similarity has not been extensively tested. We conducted an evaluation of 611 pre-trained models across three domains -- images, audio, video -- and found that there is a large gap in performance between human similarity judgments and pre-trained DNNs. To address this gap, we propose a new class of similarity approximation methods based on language. To collect the language data required by these new methods, we also developed and validated a novel adaptive tag collection pipeline. We find that our proposed language-based methods are significantly cheaper, in the number of human judgments, than classical methods, but still improve performance over the DNN-based methods. Finally, we also develop `stacked' methods that combine language embeddings with DNN embeddings, and find that these consistently provide the best approximations for human similarity across all three of our modalities. Based on the results of this comprehensive study, we provide a concise guide for researchers interested in collecting or approximating human similarity data. To accompany this guide, we also release all of the similarity and language data, a total of 206,339 human judgments, that we collected in our experiments, along with a detailed breakdown of all modeling results.", "keywords": "cognitive science;language;perception;representational similarity", "primary_area": "", "supplementary_material": "", "author": "Raja Marjieh;Pol Van Rijn;Ilia Sucholutsky;Theodore Sumers;Harin Lee;Thomas L. Griffiths;Nori Jacoby", "authorids": "~Raja_Marjieh1;~Pol_Van_Rijn1;~Ilia_Sucholutsky1;~Theodore_Sumers1;~Harin_Lee1;~Thomas_L._Griffiths1;~Nori_Jacoby1", "gender": "M;;M;M;M;;M", "homepage": ";https://pol.works;https://ilia10000.github.io/;https://www.tedsumers.info/;https://harinlee.info;http://cocosci.princeton.edu/tom/;http://www.norijacoby.com/", "dblp": "271/7867;https://dblp.uni-trier.de/pid/271/8093.html;239/5108;275/8880;;34/4472;", "google_scholar": "h-pwCMUAAAAJ;siv0KZEAAAAJ;https://scholar.google.ca/citations?user=6MfHyuMAAAAJ;xZal_nUAAAAJ;https://scholar.google.co.kr/citations?user=sNyJrSQAAAAJ;https://scholar.google.com/citations?hl=en;zyR_DF4AAAAJ", "orcid": ";0000-0002-4044-9123;0000-0003-4121-7479;0000-0002-6128-0291;0000-0001-5579-4547;;", "linkedin": "raja-marjieh-505b0781/?originalSubdomain=il;;iliasu/;;;;", "or_profile": "~Raja_Marjieh1;~Pol_Van_Rijn1;~Ilia_Sucholutsky1;~Theodore_Sumers1;~Harin_Lee1;~Thomas_L._Griffiths1;~Nori_Jacoby1", "aff": "Princeton University;Max-Planck Institute;Princeton University;Princeton University;Max Planck Institute for Empirical Aesthetics;Princeton University;", "aff_domain": "princeton.edu;mpg.de;princeton.edu;princeton.edu;ae.mpg.de;princeton.edu;", "position": "PhD student;PhD student;Postdoc;PhD student;Researcher;Professor;", "bibtex": "@inproceedings{\nmarjieh2023words,\ntitle={Words are all you need? Language as an approximation for human similarity judgments},\nauthor={Raja Marjieh and Pol Van Rijn and Ilia Sucholutsky and Theodore Sumers and Harin Lee and Thomas L. Griffiths and Nori Jacoby},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=O-G91-4cMdv}\n}", "github": "", "project": "", "reviewers": "hUBS;AJDz;kd7y;ZARH", "pdf_size": 5249399, "recommendation": "5;8;8;10", "confidence": "3;2;4;3", "correctness": "3;3;4;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;3;4", "wc_summary_paper": "74;75;83;64", "wc_strength_and_weaknesses": "55;463;182;267", "wc_clarity_quality_novelty_and_reproducibility": "26;29;61;58", "wc_summary_review": "32;150;50;30", "wc_review": "187;717;376;419", "wc_reply_reviewers": "0;47;0;0", "wc_reply_authors": "428;1396;155;732", "reply_reviewers": "0;1;0;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 7.75, 1.7853571071357126 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 74.0, 6.745368781616021 ], "wc_strength_and_weaknesses_avg": [ 241.75, 148.35325240789297 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.5, 16.070158679988197 ], "wc_summary_review_avg": [ 65.5, 49.40394721072396 ], "wc_review_avg": [ 424.75, 189.96364783821141 ], "wc_reply_reviewers_avg": [ 11.75, 20.351596988934308 ], "wc_reply_authors_avg": [ 677.75, 462.18739435428137 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.7001400420140049, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8762165114551121480&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=O-G91-4cMdv", "email": "princeton.edu;mpg.de;princeton.edu;princeton.edu;ae.mpg.de;princeton.edu;", "author_num": 7, "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "Princeton University;Max-Planck-Gesellschaft zur F\u00f6rderung der Wissenschaften e.V.;Max Planck Institute for Empirical Aesthetics", "aff_unique_dep": ";;", "aff_unique_url": "https://www.princeton.edu;https://www.mpg.de;https://www.ea.mpg.de", "aff_unique_abbr": "Princeton;MPG;MPIEA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1;0", "aff_country_unique": "United States;Germany" }, { "id": "O0sS_cujvV0", "title": "Smoothed-SGDmax: A Stability-Inspired Algorithm to Improve Adversarial Generalization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Unlike standard training, deep neural networks can suffer from serious overfitting problems in adversarial settings, which is studied extensively by empirical papers. Recent research (e.g., Xing et al. (2021); Xiao et al. (2022)) show that SGDmax-based adversarial training algorithms with $1/s(T)$ training loss incurs a stability-based generalization bound in $\\Theta(c+s(T)/n)$. Here $T$ is the number of iterations, $n$ is the number of samples, $s(T)\\rightarrow \\infty$ as $T\\rightarrow \\infty$, and $c$ is a $n$-independent term. This reveals that adversarial training can have nonvanishing generalization errors even if the sample size $n$ goes to infinity. A natural question arises: can we eliminate the nonvanishing term $c$ by designing a more generalizable algorithm? We give an affirmative answer in this paper. First, by an adaptation of information-theoretical lower bound on the complexity of solving Lipschitz-convex problems using randomized algorithms, we show that a minimax lower bound for adversarial generalization gap is $\\Omega(s(T)/n)$ given training loss $1/s(T)$. This implies that SGDmax does not achieve the lower bound. Next, by observing that the nonvanishing generalization error term for SGDmax comes from the non-smoothness of the adversarial loss function, we employ a smoothing technique to smooth the adversarial loss function. Based on the smoothed loss function, we design a smoothed SGDmax algorithm achieving generalization bound $\\mathcal{O}(s(T)/n)$, which matches the minimax lower bound. Experimentally, we show that our algorithm improves adversarial generalization on common datasets.", "keywords": "Adversarial Training;Robust Overfitting;Generalization Bound", "primary_area": "", "supplementary_material": "", "author": "Jiancong Xiao;Jiawei Zhang;Zhi-Quan Luo;Asuman E. Ozdaglar", "authorids": "~Jiancong_Xiao1;~Jiawei_Zhang6;~Zhi-Quan_Luo1;~Asuman_E._Ozdaglar1", "gender": "M;M;M;F", "homepage": "https://jiancongxiao.github.io;https://www.cuhk.edu.cn/;;https://asu.mit.edu/", "dblp": "330/4306;;;35/2875", "google_scholar": "_vGY3joAAAAJ;;dW3gcXoAAAAJ;https://scholar.google.com.tw/citations?user=nWnBSOsAAAAJ", "orcid": ";0000-0002-9420-384X;;", "linkedin": ";;;", "or_profile": "~Jiancong_Xiao1;~Jiawei_Zhang6;~Zhi-Quan_Luo1;~Asuman_E._Ozdaglar1", "aff": "The Chinese University of Hong Kong, Shenzhen;Massachusetts Institute of Technology;The Chinese University of Hong Kong, Shenzhen;Massachusetts Institute of Technology", "aff_domain": "cuhk.edu.cn;mit.edu;cuhk.edu.cn;mit.edu", "position": "PhD student;Postdoc;Full Professor;PhD student", "bibtex": "@misc{\nxiao2023smoothedsgdmax,\ntitle={Smoothed-{SGD}max: A Stability-Inspired Algorithm to Improve Adversarial Generalization},\nauthor={Jiancong Xiao and Jiawei Zhang and Zhi-Quan Luo and Asuman E. Ozdaglar},\nyear={2023},\nurl={https://openreview.net/forum?id=O0sS_cujvV0}\n}", "github": "", "project": "", "reviewers": "rzkp;9aTN;Kcyd;xxrH", "site": "https://openreview.net/forum?id=O0sS_cujvV0", "pdf_size": 374663, "recommendation": "5;5;6;6", "confidence": "3;3;3;3", "correctness": "2;3;4;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "3;2;3;2", "wc_summary_paper": "125;57;35;60", "wc_strength_and_weaknesses": "51;218;166;61", "wc_clarity_quality_novelty_and_reproducibility": "440;24;67;24", "wc_summary_review": "57;84;34;27", "wc_review": "673;383;302;172", "wc_reply_reviewers": "0;0;25;14", "wc_reply_authors": "1046;839;274;360", "reply_reviewers": "0;0;1;1", "reply_authors": "3;2;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 69.25, 33.60338524613257 ], "wc_strength_and_weaknesses_avg": [ 124.0, 70.53013540324447 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 138.75, 174.81043304105165 ], "wc_summary_review_avg": [ 50.5, 22.299103120977758 ], "wc_review_avg": [ 382.5, 183.8348443576462 ], "wc_reply_reviewers_avg": [ 9.75, 10.497023387608508 ], "wc_reply_authors_avg": [ 629.75, 322.6347586668244 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7184609666495437005&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Chinese University of Hong Kong;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;https://web.mit.edu", "aff_unique_abbr": "CUHK;MIT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "China;United States" }, { "id": "O2cW5Q3bH_M", "title": "Gradient flow in the gaussian covariate model: exact solution of learning curves and multiple descent structures", "track": "main", "status": "Reject", "tldr": "", "abstract": "A recent line of work has shown remarkable behaviors of the generalization error curves in simple learning models. Even the least-squares regression has shown atypical features such as the model-wise double descent, and further works have observed triple or multiple descents. Another important characteristic are the epoch-wise descent structures which emerge during training. The observations of model-wise and epoch-wise descents have been analytically derived in limited theoretical settings (such as the random feature model) and are otherwise experimental. In this work, we provide a full and unified analysis of the whole time-evolution of the generalization curve, in the asymptotic large-dimensional regime and under gradient-flow, within a wider theoretical setting stemming from a gaussian covariate model. In particular, we cover most cases already disparately observed in the literature, and also provide examples of the existence of multiple descent structures as a function of a model parameter or time. Furthermore, we show that our theoretical predictions adequately match the learning curves obtained by gradient descent over realistic datasets.\nTechnically we compute averages of rational expressions involving random matrices using recent developments in random matrix theory based on \"linear pencils\". Another contribution, which is also of independent interest in random matrix theory, is a new derivation of related fixed point equations (and an extension there-off) using Dyson brownian motions.", "keywords": "Gaussian Covariate Model;Gradient Flow;Gradient Descent;Double Descent;Epoch-wise Double Descent;Random Matrix;Linear Pencil;Cauchy Integrals;High-dimensional Limits;Stieltjes Transform;Random Feature Model", "primary_area": "", "supplementary_material": "", "author": "Antoine Bodin;Nicolas Macris", "authorids": "~Antoine_Bodin1;~Nicolas_Macris1", "gender": ";M", "homepage": "https://people.epfl.ch/antoine.bodin/?lang=en;", "dblp": ";47/5851", "google_scholar": ";", "orcid": ";0000-0003-2189-7411", "linkedin": ";", "or_profile": "~Antoine_Bodin1;~Nicolas_Macris1", "aff": "Swiss Federal Institute of Technology Lausanne;Ecole Polytechnique Federale Lausanne", "aff_domain": "epfl.ch;epfl.ch", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nbodin2023gradient,\ntitle={Gradient flow in the gaussian covariate model: exact solution of learning curves and multiple descent structures},\nauthor={Antoine Bodin and Nicolas Macris},\nyear={2023},\nurl={https://openreview.net/forum?id=O2cW5Q3bH_M}\n}", "github": "", "project": "", "reviewers": "FVzp;amF8;utMN;RBfi", "site": "https://openreview.net/forum?id=O2cW5Q3bH_M", "pdf_size": 756326, "recommendation": "5;6;6;6", "confidence": "4;4;3;3", "correctness": "3;4;4;4", "technical_novelty": "3;3;2;3", "empirical_novelty": "0;3;3;0", "wc_summary_paper": "91;89;57;85", "wc_strength_and_weaknesses": "414;135;143;147", "wc_clarity_quality_novelty_and_reproducibility": "135;231;45;70", "wc_summary_review": "57;34;22;52", "wc_review": "697;489;267;354", "wc_reply_reviewers": "0;0;0;26", "wc_reply_authors": "530;376;358;360", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 1.5 ], "wc_summary_paper_avg": [ 80.5, 13.738631664034086 ], "wc_strength_and_weaknesses_avg": [ 209.75, 118.00291309963497 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 120.25, 71.88662949394693 ], "wc_summary_review_avg": [ 41.25, 14.02453207775575 ], "wc_review_avg": [ 451.75, 162.19028176805168 ], "wc_reply_reviewers_avg": [ 6.5, 11.258330249197702 ], "wc_reply_authors_avg": [ 406.0, 71.93052203341777 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16821758322111776673&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;Ecole Polytechnique Federale de Lausanne", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "id": "O4fNuE8F51T", "title": "PMixUp: Simultaneous Utilization of Part-of-Speech Replacement and Feature Space Interpolation for Text Data Augmentation", "track": "main", "status": "Reject", "tldr": "We propose novel text augmentation method that accomplishes cutting-edge state-of-the-art performance in various benchmark settings.", "abstract": "Data augmentation has become a de facto technique in various NLP tasks to overcome the lack of a large-scale, qualified training set. The previous studies presented several data augmentation methods, such as replacing tokens with synonyms or interpolating feature space of given text input. While they are known to be convenient and promising, several limits exist. First, prior studies simply treated topic classification and sentiment analysis under the same category of text classification while we presume they have distinct characteristics. Second, previously-proposed replacement-based methods bear several improvement avenues as they utilize heuristics or statistical approaches for choosing synonyms. Lastly, while the feature space interpolation method achieved current state-of-the-art, prior studies have not comprehensively utilized it with replacement-based methods. To mitigate these drawbacks, we first analyzed which POS tags are important in each text classification task, and resulted that nouns are essential to topic classification, while sentiment analysis regards verbs and adjectives as important POS information. Contrary to the aforementioned analysis, we discover that augmenting verbs and adjective tokens commonly improves text classification performance regardless of its type. Lastly, we propose PMixUp, a novel data augmentation strategy that simultaneously utilizes replacement-based and feature space interpolation methods. We examine that they are new state-of-the-art in nine public benchmark settings, especially under the few training samples. ", "keywords": "text augmentation;part-of-speech;feature space interpolation", "primary_area": "", "supplementary_material": "/attachment/6afda641e309a219e4ad5a0a57c19065c089c9bf.zip", "author": "Hyeon Soo Kim;Hyejin Won;Kyung Ho Park", "authorids": "~Hyeon_Soo_Kim1;~Hyejin_Won1;~Kyung_Ho_Park1", "gender": "M;;M", "homepage": ";https://blog.naver.com/wonwone567;https://alex-kpark.github.io/profile/", "dblp": ";;", "google_scholar": ";;8cXGGgkAAAAJ", "orcid": ";;", "linkedin": "shawnhyeonsoo/;;", "or_profile": "~Hyeon_Soo_Kim1;~Hyejin_Won1;~Kyung_Ho_Park1", "aff": "SOCAR;SOCAR;", "aff_domain": "socar.kr;socar.kr;", "position": "Researcher;Researcher;", "bibtex": "@misc{\nkim2023pmixup,\ntitle={{PM}ixUp: Simultaneous Utilization of Part-of-Speech Replacement and Feature Space Interpolation for Text Data Augmentation},\nauthor={Hyeon Soo Kim and Hyejin Won and Kyung Ho Park},\nyear={2023},\nurl={https://openreview.net/forum?id=O4fNuE8F51T}\n}", "github": "", "project": "", "reviewers": "8rka;Rbzx;mExN;iN19", "site": "https://openreview.net/forum?id=O4fNuE8F51T", "pdf_size": 468509, "recommendation": "3;5;6;8", "confidence": "4;3;4;3", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "55;68;61;104", "wc_strength_and_weaknesses": "301;111;49;137", "wc_clarity_quality_novelty_and_reproducibility": "64;49;23;53", "wc_summary_review": "113;39;15;23", "wc_review": "533;267;148;317", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "750;423;311;406", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 72.0, 19.03943276465977 ], "wc_strength_and_weaknesses_avg": [ 149.5, 93.12760063482791 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.25, 15.039531242695032 ], "wc_summary_review_avg": [ 47.5, 38.79110722833263 ], "wc_review_avg": [ 316.25, 139.38682685246837 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 472.5, 165.8018395555369 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5547001962252291, "corr_recommendation_correctness": 0.8006407690254357, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10101354712262698752&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "SOCAR", "aff_unique_dep": "", "aff_unique_url": "https://www.socar.az", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Azerbaijan" }, { "id": "O5PXo5Y0csVi", "title": "Relaxed Attention for Transformer Models", "track": "main", "status": "Reject", "tldr": "A simple smoothing in the attention function of transformer models contributes to improved regularization and internal language model suppression.", "abstract": "The powerful modeling capabilities of all-attention-based transformer architectures often cause overfitting and - for natural language processing tasks - lead to an implicitly learned internal language model in the autoregressive transformer decoder complicating the integration of external language models. In this paper, we explore relaxed attention, a simple and easy-to-implement smoothing of the attention weights, yielding a two-fold improvement to the general transformer architecture: First, relaxed attention provides regularization when applied to the self-attention layers in the encoder. Second, we show that it naturally supports the integration of an external language model as it suppresses the implicitly learned internal language model by relaxing the cross attention in the decoder. We demonstrate the benefit of relaxed attention across several tasks with clear improvement in combination with recent benchmark approaches. Specifically, we exceed the former state-of-the-art performance of 26.90% word error rate on the largest public lip-reading LRS3 benchmark with a word error rate of 26.31%, as well as we achieve a top-performing BLEU score of 37.67 on the IWSLT14 (DE$\\rightarrow$EN) machine translation task without external language models and virtually no additional model parameters. Code and models will be made publicly available.", "keywords": "transformer;attention;regularization;internal language model;relaxed attention", "primary_area": "", "supplementary_material": "/attachment/5729c6de669e8e183264010669a38ea3c8f010da.zip", "author": "Timo Lohrenz;Bj\u00f6rn M\u00f6ller;Zhengyang Li;Tim Fingscheidt", "authorids": "~Timo_Lohrenz1;~Bj\u00f6rn_M\u00f6ller1;~Zhengyang_Li1;~Tim_Fingscheidt1", "gender": "M;M;;M", "homepage": "https://www.tu-braunschweig.de/en/ifn/institute/team/sv/lohrenz;https://www.tu-braunschweig.de/ifn/institut/abt/sv/bjoern-moeller-dipl-wirt-inf;https://www.tu-braunschweig.de/ifn/institut/abt/sv/zhengyang-li-msc;https://www.ifn.ing.tu-bs.de/en/ifn/sp/fingscheidt/", "dblp": ";;;03/7820", "google_scholar": "https://scholar.google.de/citations?user=9u1wQE0AAAAJ;;;https://scholar.google.de/citations?hl=de", "orcid": ";;;0000-0002-8895-5041", "linkedin": "timo-lohrenz/;;zhengyang-li-33b414173/;tim-fingscheidt-3048951/de?originalSubdomain=de", "or_profile": "~Timo_Lohrenz1;~Bj\u00f6rn_M\u00f6ller1;~Zhengyang_Li1;~Tim_Fingscheidt1", "aff": "Technische Universit\u00e4t Carolo-Wilhelmina Braunschweig;Technische Universit\u00e4t Carolo-Wilhelmina Braunschweig;Institute for Communications Technology;Technische Universit\u00e4t Braunschweig", "aff_domain": "tu-bs.de;tu-bs.de;tu-braunschweig.de;tu-bs.de", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@misc{\nlohrenz2023relaxed,\ntitle={Relaxed Attention for Transformer Models},\nauthor={Timo Lohrenz and Bj{\\\"o}rn M{\\\"o}ller and Zhengyang Li and Tim Fingscheidt},\nyear={2023},\nurl={https://openreview.net/forum?id=O5PXo5Y0csVi}\n}", "github": "", "project": "", "reviewers": "WiHX;4P4f;2CCe;ztHw", "site": "https://openreview.net/forum?id=O5PXo5Y0csVi", "pdf_size": 524294, "recommendation": "3;5;5;5", "confidence": "4;4;2;3", "correctness": "3;3;2;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "88;56;60;128", "wc_strength_and_weaknesses": "249;260;87;49", "wc_clarity_quality_novelty_and_reproducibility": "33;18;55;36", "wc_summary_review": "56;14;49;16", "wc_review": "426;348;251;229", "wc_reply_reviewers": "30;0;0;0", "wc_reply_authors": "561;720;289;344", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 83.0, 28.75760768909681 ], "wc_strength_and_weaknesses_avg": [ 161.25, 94.29309359650897 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.5, 13.162446581088183 ], "wc_summary_review_avg": [ 33.75, 18.925842121290138 ], "wc_review_avg": [ 313.5, 78.88757823637381 ], "wc_reply_reviewers_avg": [ 7.5, 12.99038105676658 ], "wc_reply_authors_avg": [ 478.5, 172.57534586377048 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5222329678670935, "corr_recommendation_correctness": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9038503828046600181&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Technische Universit\u00e4t Braunschweig;Institute for Communications Technology", "aff_unique_dep": ";", "aff_unique_url": "https://tu-braunschweig.de;", "aff_unique_abbr": "TU Braunschweig;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Braunschweig;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany;" }, { "title": "Guarded Policy Optimization with Imperfect Online Demonstrations", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10735", "id": "O5rKg7IRQIO", "poster": "", "openreview": "https://openreview.net/forum?id=O5rKg7IRQIO", "slides": "https://iclr.cc/virtual/2023/poster/10735", "video": "https://iclr.cc/virtual/2023/poster/10735", "author_site": "Zhenghai Xue, Zhenghao Peng, Quanyi Li, ZHIHAN LIU, Bolei Zhou", "tldr": "Introducing a new policy optimization method exploiting imperfect online demonstrations from a guardian policy.", "abstract": "The Teacher-Student Framework (TSF) is a reinforcement learning setting where a teacher agent guards the training of a student agent by intervening and providing online demonstrations. Assuming optimal, the teacher policy has the perfect timing and capability to intervene in the learning process of the student agent, providing safety guarantee and exploration guidance. Nevertheless, in many real-world settings it is expensive or even impossible to obtain a well-performing teacher policy. In this work, we relax the assumption of a well-performing teacher and develop a new method that can incorporate arbitrary teacher policies with modest or inferior performance. We instantiate an Off-Policy Reinforcement Learning algorithm, termed Teacher-Student Shared Control (TS2C), which incorporates teacher intervention based on trajectory-based value estimation. Theoretical analysis validates that the proposed TS2C algorithm attains efficient exploration and substantial safety guarantee without being affected by the teacher's own performance. Experiments on various continuous control tasks show that our method can exploit teacher policies at different performance levels while maintaining a low training cost. Moreover, the student policy surpasses the imperfect teacher policy in terms of higher accumulated reward in held-out testing environments. Code is available at https://metadriverse.github.io/TS2C.", "keywords": "reinforcement learning;guarded policy optimization;imperfect demonstrations;shared control;metadrive simulator", "primary_area": "", "supplementary_material": "/attachment/62129eec9c12aca7f6493309a41f209bc9dca059.zip", "author": "Zhenghai Xue;Zhenghao Peng;Quanyi Li;Zhihan Liu;Bolei Zhou", "authorids": "~Zhenghai_Xue1;~Zhenghao_Peng1;~Quanyi_Li1;~Zhihan_Liu1;~Bolei_Zhou5", "gender": ";M;M;M;M", "homepage": ";https://pengzhenghao.github.io;https://quanyili.github.io;;https://boleizhou.github.io/", "dblp": ";220/3963;270/7691;;46/8066", "google_scholar": ";JZ8ws6IAAAAJ;Ty49X3UAAAAJ;0VVg_R4AAAAJ;9D4aG8AAAAAJ", "orcid": ";;;;", "linkedin": ";;https://www.linkedin.com/mwlite/in/quanyi-li-2b7985183;;", "or_profile": "~Zhenghai_Xue1;~Zhenghao_Peng1;~Quanyi_Li1;~Zhihan_Liu1;~Bolei_Zhou5", "aff": ";University of California, Los Angeles;Shanghai Artificial Intelligence Laboratory;Northwestern University;University of California, Los Angeles", "aff_domain": ";cs.ucla.edu;pjlab.org.cn;northwestern.edu;ucla.edu", "position": ";PhD student;Researcher;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nxue2023guarded,\ntitle={Guarded Policy Optimization with Imperfect Online Demonstrations},\nauthor={Zhenghai Xue and Zhenghao Peng and Quanyi Li and Zhihan Liu and Bolei Zhou},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=O5rKg7IRQIO}\n}", "github": "", "project": "", "reviewers": "oviQ;wVZY;rkHD;L3ge", "pdf_size": 3952121, "recommendation": "5;6;8;8", "confidence": "3;3;4;4", "correctness": "3;4;4;4", "technical_novelty": "3;2;4;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "174;82;30;121", "wc_strength_and_weaknesses": "168;200;76;111", "wc_clarity_quality_novelty_and_reproducibility": "167;47;14;35", "wc_summary_review": "109;44;6;22", "wc_review": "618;373;126;289", "wc_reply_reviewers": "0;251;0;0", "wc_reply_authors": "714;1914;342;265", "reply_reviewers": "0;2;0;0", "reply_authors": "1;5;2;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 101.75, 52.74644537786409 ], "wc_strength_and_weaknesses_avg": [ 138.75, 48.25647624930772 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.75, 59.63797028739325 ], "wc_summary_review_avg": [ 45.25, 39.20060586266493 ], "wc_review_avg": [ 351.5, 177.65204755363783 ], "wc_reply_reviewers_avg": [ 62.75, 108.68618817494705 ], "wc_reply_authors_avg": [ 808.75, 660.3171113184937 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.9622504486493761, "corr_recommendation_correctness": 0.7777777777777777, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12427354041606628093&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=O5rKg7IRQIO", "email": ";cs.ucla.edu;pjlab.org.cn;northwestern.edu;ucla.edu", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of California, Los Angeles;Shanghai Artificial Intelligence Laboratory;Northwestern University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucla.edu;http://www.shailab.org/;https://www.northwestern.edu", "aff_unique_abbr": "UCLA;Shanghai AI Lab;NU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "id": "O7gAffL9a0", "title": "Confidence and Dispersity Speak: Characterising Prediction Matrix for Unsupervised Accuracy Estimation", "track": "main", "status": "Reject", "tldr": "This work proposes a simple but effective method (prediction diversity) to predict how well a model generalize to out-of-distribution datasets", "abstract": "This work focuses on estimating how well a model performs on out-of-distribution (OOD) datasets without using labels. Our intuition is that a well-performing model should give predictions with high confidence and high dispersity. While recent methods study the prediction confidence, this work newly finds dispersity is another informative cue. Confidence reflects whether the individual prediction is certain; dispersity indicates how the overall predictions are distributed across all categories. To achieve a more accurate estimation, we propose to jointly consider these two properties by using the nuclear norm of the prediction matrix. In our experiments, we extensively validate the effectiveness of nuclear norm for various models (e.g., ViT and ConvNeXt), different datasets (e.g., ImageNet and CUB-200), and diverse types of distribution shifts (e.g., style shift and reproduction shift). We show that the nuclear norm is more accurate and robust in predicting OOD accuracy than existing methods. Lastly, we study the limitation of the nuclear norm and discuss potential directions.", "keywords": "Out-of-distribution generalization;Unsupervised Accuracy Estimation;Prediction DIversity;Distribution Shift", "primary_area": "", "supplementary_material": "", "author": "Weijian Deng;Yumin Suh;Stephen Gould;Liang Zheng", "authorids": "~Weijian_Deng1;~Yumin_Suh1;~Stephen_Gould1;~Liang_Zheng4", "gender": "M;;M;M", "homepage": "http://weijiandeng.xyz;https://yuminsuh.github.io/;http://users.cecs.anu.edu.au/~sgould/;http://zheng-lab.cecs.anu.edu.au/", "dblp": "198/1517;119/1522;89/1569.html;61/7360-1", "google_scholar": "https://scholar.google.com.hk/citations?user=lReHnAEAAAAJ;a9k4nwQAAAAJ;YvdzeM8AAAAJ;https://scholar.google.com.au/citations?user=vNHqr3oAAAAJ", "orcid": ";;0000-0001-8929-7899;", "linkedin": ";;;liang-zheng-76341311a/", "or_profile": "~Weijian_Deng1;~Yumin_Suh1;~Stephen_Gould1;~Liang_Zheng4", "aff": "Australian National University;NEC-Labs;Australian National University;Australian National University", "aff_domain": "anu.edu.au;nec-labs.com;anu.edu.au;anu.edu.au", "position": "PhD student;Researcher;Full Professor;Senior Lecturer", "bibtex": "@misc{\ndeng2023confidence,\ntitle={Confidence and Dispersity Speak: Characterising Prediction Matrix for Unsupervised Accuracy Estimation},\nauthor={Weijian Deng and Yumin Suh and Stephen Gould and Liang Zheng},\nyear={2023},\nurl={https://openreview.net/forum?id=O7gAffL9a0}\n}", "github": "", "project": "", "reviewers": "JXCX;UZsk;eGpU", "site": "https://openreview.net/forum?id=O7gAffL9a0", "pdf_size": 2975036, "recommendation": "3;6;8", "confidence": "2;3;3", "correctness": "3;3;4", "technical_novelty": "3;2;3", "empirical_novelty": "1;3;4", "wc_summary_paper": "69;173;101", "wc_strength_and_weaknesses": "167;496;141", "wc_clarity_quality_novelty_and_reproducibility": "43;130;111", "wc_summary_review": "74;78;97", "wc_review": "353;877;450", "wc_reply_reviewers": "0;75;0", "wc_reply_authors": "773;2081;750", "reply_reviewers": "0;1;0", "reply_authors": "2;5;2", "recommendation_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 1.247219128924647 ], "wc_summary_paper_avg": [ 114.33333333333333, 43.4920171474669 ], "wc_strength_and_weaknesses_avg": [ 268.0, 161.56938653924098 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 94.66666666666667, 37.3482113211448 ], "wc_summary_review_avg": [ 83.0, 10.03327796219494 ], "wc_review_avg": [ 560.0, 227.6239589029825 ], "wc_reply_reviewers_avg": [ 25.0, 35.35533905932738 ], "wc_reply_authors_avg": [ 1201.3333333333333, 622.0891325918569 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.9176629354822472, "corr_recommendation_correctness": 0.8029550685469661, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10738745788947089836&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Australian National University;NEC Laboratories", "aff_unique_dep": ";", "aff_unique_url": "https://www.anu.edu.au;https://www.nec-labs.com", "aff_unique_abbr": "ANU;NEC-Labs", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Australia;United States" }, { "id": "O7x_ldrlaO7", "title": "Structural Privacy in Graphs", "track": "main", "status": "Reject", "tldr": "Make the structure of the graph private in addition to the privacy of node features and labels", "abstract": "Graph Neural Networks (GNNs) gained popularity to address the tasks over the graph-structured data that best represent many real-world systems. The privacy of the participants of these systems is at risk if the GNNs are not carefully designed. Existing works in privacy-preserving GNNs primarily ensure the privacy of features and labels of a node. In order to ensure complete privacy related to graph data, its structure also needs to be privatized. We provide a method SPGraph to privatize the graph structure by adding noise to the neighborhood data of the node. Our method addresses two challenges in introducing structural privacy in graphs. Applying randomization on the set of actual neighbors to introduce noise leads to a reduction in the degree of a node, which is undesirable. To overcome this first challenge, we introduce $\\lambda$-selector that samples nodes to be added to the set of neighbors. The second challenge is to denoise the neighborhood so that the noise added in the neighborhood does not significantly impact the accuracy. In this view, we use $p$-hop neighborhood to compensate for the loss of actual neighbors in the randomization. We continue to use the node and label privacy as implemented in the previous methods for privacy in GNNs. We conduct extensive experiments over real-world datasets to show the impact of perturbation in the graph structure. ", "keywords": "Privacy;Graph Neural Networks;Differential Privacy;Graph Structure", "primary_area": "", "supplementary_material": "", "author": "Rucha Bhalchandra Joshi;Subhankar Mishra", "authorids": "~Rucha_Bhalchandra_Joshi1;~Subhankar_Mishra1", "gender": "F;M", "homepage": "https://ruchajoshi.github.io/;https://www.niser.ac.in/~smishra/", "dblp": "266/5932;147/8391", "google_scholar": "Xfxu5jQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Rucha_Bhalchandra_Joshi1;~Subhankar_Mishra1", "aff": "National Institute of Science Education and Research, Bhubaneswar India ;National Institute of Science Education and Research", "aff_domain": "niser.ac.in;niser.ac.in", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\njoshi2023structural,\ntitle={Structural Privacy in Graphs},\nauthor={Rucha Bhalchandra Joshi and Subhankar Mishra},\nyear={2023},\nurl={https://openreview.net/forum?id=O7x_ldrlaO7}\n}", "github": "", "project": "", "reviewers": "nodK;nZ2k;Q4eW", "site": "https://openreview.net/forum?id=O7x_ldrlaO7", "pdf_size": 1704345, "recommendation": "1;3;3", "confidence": "4;4;5", "correctness": "3;1;2", "technical_novelty": "2;2;1", "empirical_novelty": "2;1;1", "wc_summary_paper": "16;99;32", "wc_strength_and_weaknesses": "68;37;245", "wc_clarity_quality_novelty_and_reproducibility": "178;18;4", "wc_summary_review": "10;241;16", "wc_review": "272;395;297", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.0, 0.816496580927726 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_summary_paper_avg": [ 49.0, 35.95367389665021 ], "wc_strength_and_weaknesses_avg": [ 116.66666666666667, 91.62362625921813 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.66666666666667, 78.93175674101155 ], "wc_summary_review_avg": [ 89.0, 107.50813922675809 ], "wc_review_avg": [ 321.3333333333333, 53.080651423617205 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": -0.8660254037844387, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;0", "aff_unique_norm": "National Institute of Science Education and Research", "aff_unique_dep": "", "aff_unique_url": "https://www.niser.ac.in", "aff_unique_abbr": "NISER", "aff_campus_unique_index": "0", "aff_campus_unique": "Bhubaneswar;", "aff_country_unique_index": "0;0", "aff_country_unique": "India" }, { "id": "O8EK-eWjUm", "title": "Open Set Recognition by Mitigating Prompt Bias", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Existing open set recognition (OSR) methods are usually performed on relatively small datasets by training a visual model from scratch. OSR on large-scale datasets have been rarely studied for their great complexity and difficulty. Recently, vision-language (VL) pre-training has promoted closed-set image recognition with prompt engineering on datasets with various scales. However, prompts tuned on the training data often exhibit label bias towards known classes, leading to the poor performance in recognizing unknown data in the open environment. In this paper, we aim at developing a new paradigm for OSR both on small and large-scale datasets by prompt engineering on VL models in a divide-and-conquer strategy. Firstly, the closed-set data is processed as the combination of one or more groups. Each group is devised with a group-specific prompt. Then, we propose the Group-specific Contrastive Tuning (GCTu), in which negative label words are introduced into tuning to mitigate the label bias of group-specific prompts. In inference, to achieve comprehensive predictions both on small and large-scale datasets, we propose the Group Combined Testing (GCTe). It determines the optimal prediction prompt among the multiple group-specific predictions by focusing on the group-wise closed-set probability distributions. Our method namely GCT2 achieves excellent performance on both small and large-scale OSR benchmarks. The strong and wide applicability of our method is also verified in ablation studies.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Ning Liao;XIAOPENG ZHANG;Dongsheng Jiang;Junchi Yan;Qi Tian", "authorids": "~Ning_Liao1;~XIAOPENG_ZHANG7;~Dongsheng_Jiang2;~Junchi_Yan2;~Qi_Tian3", "gender": "M;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=6aARLhMAAAAJ&hl=zh-CN;https://sites.google.com/site/zxphistory/;https://www.qitian1987.com/index.html;https://sites.google.com/site/dongshengjiangbme/;http://thinklab.sjtu.edu.cn/", "dblp": "44/1117;;78/1467-1.html;85/8729;60/7949.html", "google_scholar": "6aARLhMAAAAJ;Ud6aBAcAAAAJ;https://scholar.google.com/citations?hl=en;-eGIgsoAAAAJ;ga230VoAAAAJ", "orcid": "0000-0002-3764-2555;;0000-0002-7252-5047;;0000-0001-9639-7679", "linkedin": ";;;;", "or_profile": "~Ning_Liao1;~XIAOPENG_ZHANG7;~Qi_Tian3;~Dongsheng_Jiang1;~Junchi_Yan1", "aff": "Shanghai Jiaotong University;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;huawei.com;huawei.com;huawei.com;sjtu.edu.cn", "position": "PhD student;Principal Researcher;Principal Researcher;Principal Researcher;Associate Professor", "bibtex": "@misc{\nliao2023open,\ntitle={Open Set Recognition by Mitigating Prompt Bias},\nauthor={Ning Liao and XIAOPENG ZHANG and Dongsheng Jiang and Junchi Yan and Qi Tian},\nyear={2023},\nurl={https://openreview.net/forum?id=O8EK-eWjUm}\n}", "github": "", "project": "", "reviewers": "dbVk;B1qt;ZBZa;PRye", "site": "https://openreview.net/forum?id=O8EK-eWjUm", "pdf_size": 954732, "recommendation": "3;5;6;6", "confidence": "4;4;4;4", "correctness": "2;2;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "166;61;163;71", "wc_strength_and_weaknesses": "555;264;392;225", "wc_clarity_quality_novelty_and_reproducibility": "77;10;87;21", "wc_summary_review": "77;57;19;25", "wc_review": "875;392;661;342", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 115.25, 49.38813116529112 ], "wc_strength_and_weaknesses_avg": [ 359.0, 128.92439645001252 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.75, 33.662850443775554 ], "wc_summary_review_avg": [ 44.5, 23.680160472429236 ], "wc_review_avg": [ 567.5, 215.02848648493065 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8164965809277259, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:POBXGLArJuEJ:scholar.google.com/&scioq=Open+Set+Recognition+by+Mitigating+Prompt+Bias&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.huawei.com", "aff_unique_abbr": "SJTU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Quasi-optimal Reinforcement Learning with Continuous Actions", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11527", "id": "O8Vc52xFSUR", "poster": "/media/PosterPDFs/ICLR%202023/11527.png?t=1681752939.9341629", "openreview": "https://openreview.net/forum?id=O8Vc52xFSUR", "slides": "https://iclr.cc/virtual/2023/poster/11527", "video": "https://iclr.cc/virtual/2023/poster/11527", "author_site": "Yuhan Li, Wenzhuo Zhou, Ruoqing Zhu", "tldr": "The paper proposes a novel learning algorithm for reliable continuous action allocations.", "abstract": "Many real-world applications of reinforcement learning (RL) require making decisions in continuous action environments. In particular, determining the optimal dose level plays a vital role in developing medical treatment regimes. One challenge in adapting existing RL algorithms to medical applications, however, is that the popular infinite support stochastic policies, e.g., Gaussian policy, may assign riskily high dosages and harm patients seriously. Hence, it is important to induce a policy class whose support only contains near-optimal actions, and shrink the action-searching area for effectiveness and reliability. To achieve this, we develop a novel quasi-optimal learning algorithm, which can be easily optimized in off-policy settings with guaranteed convergence under general function approximations. Theoretically, we analyze the consistency, sample complexity, adaptability, and convergence of the proposed algorithm. We evaluate our algorithm with comprehensive simulated experiments and a dose suggestion real application to Ohio Type 1 diabetes dataset.", "keywords": "Continuous Treatments;Markov Decision Process;Safe Action Allocation", "primary_area": "", "supplementary_material": "", "author": "Yuhan Li;Wenzhuo Zhou;Ruoqing Zhu", "authorids": "~Yuhan_Li1;~Wenzhuo_Zhou1;~Ruoqing_Zhu1", "gender": "M;M;M", "homepage": "https://stat.illinois.edu/directory/profile/yuhanli8;https://sites.google.com/view/wenzhuozhou;https://sites.google.com/site/teazrq/", "dblp": "116/8661-5;281/9005;", "google_scholar": "-EmQgKYAAAAJ;;uyzMyb8AAAAJ", "orcid": "0009-0005-3535-6781;;", "linkedin": ";;", "or_profile": "~Yuhan_Li1;~Wenzhuo_Zhou1;~Ruoqing_Zhu1", "aff": "University of Illinois, Urbana Champaign;;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;;uiuc.edu", "position": "PhD student;;Associate Professor", "bibtex": "@inproceedings{\nli2023quasioptimal,\ntitle={Quasi-optimal Reinforcement Learning with Continuous Actions},\nauthor={Yuhan Li and Wenzhuo Zhou and Ruoqing Zhu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=O8Vc52xFSUR}\n}", "github": "", "project": "", "reviewers": "xW9e;RoFn;Y35L", "pdf_size": 1386319, "recommendation": "6;6;8", "confidence": "5;3;4", "correctness": "3;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "64;94;66", "wc_strength_and_weaknesses": "881;195;461", "wc_clarity_quality_novelty_and_reproducibility": "163;62;28", "wc_summary_review": "142;41;57", "wc_review": "1250;392;612", "wc_reply_reviewers": "190;0;0", "wc_reply_authors": "5221;2342;1870", "reply_reviewers": "1;0;0", "reply_authors": "11;4;4", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 74.66666666666667, 13.695092389449425 ], "wc_strength_and_weaknesses_avg": [ 512.3333333333334, 282.40081838093573 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 84.33333333333333, 57.33139531608217 ], "wc_summary_review_avg": [ 80.0, 44.3245605355165 ], "wc_review_avg": [ 751.3333333333334, 363.86932941495485 ], "wc_reply_reviewers_avg": [ 63.333333333333336, 89.56685895029602 ], "wc_reply_authors_avg": [ 3144.3333333333335, 1481.0141420286604 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 6.333333333333333, 3.299831645537222 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9546174680358825785&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=O8Vc52xFSUR", "email": "illinois.edu;;uiuc.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "OA-gbD-ANFt", "title": "Inverse Optimal Transport with Application to Contrastive Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Previous works in contrastive learning (CL) mainly focus on pairwise views to learn the representations by attracting the positive samples and repelling negative ones. In this work, we understand the CL with a collective point set matching view and solve this problem with the formulation of inverse optimal transport(IOT), which is a min-min optimization to learn the features. By varying the relaxation degree of constraints in inner minimization of IOT, one can naturally get three different contrastive losses and reveal that InfoNCE is a special case of them, which shows a new and more generalized understanding view of CL. Besides, with our soft matching view, a uniformity penalty is also proposed to improve the representation learning. Experimental results show the effectiveness of our methods.", "keywords": "Contrastive Learning;Inverse Optimal Transoprt", "primary_area": "", "supplementary_material": "", "author": "Liangliang Shi;Jingtao Fan;Junchi Yan", "authorids": "~Liangliang_Shi1;fjt0324@sjtu.edu.cn;~Junchi_Yan2", "gender": "M;;", "homepage": ";;", "dblp": "89/8730;;", "google_scholar": "Qf1k8lUAAAAJ;;", "orcid": "0000-0001-7033-4207;;", "linkedin": ";;", "or_profile": "~Liangliang_Shi1;fjt0324@sjtu.edu.cn;~Junchi_Yan2", "aff": "Shanghai Jiaotong University;;", "aff_domain": "sjtu.edu.cn;;", "position": "PhD student;;", "bibtex": "@misc{\nshi2023inverse,\ntitle={Inverse Optimal Transport with Application to Contrastive Learning},\nauthor={Liangliang Shi and Jingtao Fan and Junchi Yan},\nyear={2023},\nurl={https://openreview.net/forum?id=OA-gbD-ANFt}\n}", "github": "", "project": "", "reviewers": "FQ6D;rpVF;oAFN;p1CM", "site": "https://openreview.net/forum?id=OA-gbD-ANFt", "pdf_size": 4203475, "recommendation": "3;3;3;5", "confidence": "4;3;4;4", "correctness": "3;3;4;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "15;54;148;113", "wc_strength_and_weaknesses": "142;241;100;318", "wc_clarity_quality_novelty_and_reproducibility": "63;32;14;40", "wc_summary_review": "33;37;27;52", "wc_review": "253;364;289;523", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 82.5, 51.451433410547466 ], "wc_strength_and_weaknesses_avg": [ 200.25, 85.10104288432663 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.25, 17.597940220378067 ], "wc_summary_review_avg": [ 37.25, 9.229707470987366 ], "wc_review_avg": [ 357.25, 103.73614365302 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7vgdMbcmYi4J:scholar.google.com/&scioq=Inverse+Optimal+Transport+with+Application+to+Contrastive+Learning&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "OA4o8yKW3q", "title": "Towards Robust Dataset Learning", "track": "main", "status": "Withdraw", "tldr": "We study the problem of learning a robust dataset such that any classifier naturally trained on the dataset is adversarially robust. ", "abstract": "We study the problem of learning a robust dataset such that any classifier naturally trained on the dataset is adversarially robust. Such a dataset benefits the downstream tasks as natural training is much faster than adversarial training, and demonstrates that the desired property of robustness is transferable between models and data. In this work, we propose a principled, tri-level optimization to formulate the robust dataset learning problem. We show that, under an abstraction model that characterizes robust vs. non-robust features, the proposed method provably learns a robust dataset. Extensive experiments on MNIST, CIFAR10, and TinyImageNet demostrate the effectiveness of our algorithm with different network initializations and architectures.", "keywords": "robust dataset learning", "primary_area": "", "supplementary_material": "/attachment/34c5a77e03846e7b59174de9dddf3bd5c58ec54c.zip", "author": "Yihan Wu;Xinda Li;Florian Kerschbaum;Heng Huang;Hongyang Zhang", "authorids": "~Yihan_Wu1;~Xinda_Li1;~Florian_Kerschbaum1;~Heng_Huang1;~Hongyang_Zhang1", "gender": "M;M;;M;M", "homepage": "https://yihwu.github.io/;;;https://www.cs.umd.edu/~heng/;https://hongyanz.github.io/", "dblp": ";;;03/281;23/10537-1", "google_scholar": "cajTg_wAAAAJ;Y0R4jk4AAAAJ;;4OqLaDwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": ";xinda-l-a85676167/;;;", "or_profile": "~Yihan_Wu1;~Xinda_Li1;~Florian_Kerschbaum1;~Heng_Huang1;~Hongyang_Zhang1", "aff": "University of Pittsburgh;;;University of Pittsburgh;School of Computer Science, University of Waterloo", "aff_domain": "pitt.edu;;;pitt.edu;uwaterloo.ca", "position": "PhD student;;;Full Professor;Assistant Professor", "bibtex": "@misc{\nwu2023towards,\ntitle={Towards Robust Dataset Learning},\nauthor={Yihan Wu and Xinda Li and Florian Kerschbaum and Heng Huang and Hongyang Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=OA4o8yKW3q}\n}", "github": "", "project": "", "reviewers": "AXQR;PQC7;cZpn;Tcxp", "site": "https://openreview.net/forum?id=OA4o8yKW3q", "pdf_size": 1575541, "recommendation": "3;3;5;6", "confidence": "3;4;4;3", "correctness": "3;2;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "81;97;95;67", "wc_strength_and_weaknesses": "420;1200;539;340", "wc_clarity_quality_novelty_and_reproducibility": "101;102;82;31", "wc_summary_review": "75;56;69;30", "wc_review": "677;1455;785;468", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 85.0, 12.083045973594572 ], "wc_strength_and_weaknesses_avg": [ 624.75, 339.5845807748049 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 79.0, 28.83574171059243 ], "wc_summary_review_avg": [ 57.5, 17.298843892006193 ], "wc_review_avg": [ 846.25, 369.4748807429268 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.19245008972987526, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2859734081670314830&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Pittsburgh;University of Waterloo", "aff_unique_dep": ";School of Computer Science", "aff_unique_url": "https://www.pitt.edu;https://uwaterloo.ca", "aff_unique_abbr": "Pitt;UWaterloo", "aff_campus_unique_index": "1", "aff_campus_unique": ";Waterloo", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Canada" }, { "title": "Autoregressive Conditional Neural Processes", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11116", "id": "OAsXFPBfTBh", "poster": "", "openreview": "https://openreview.net/forum?id=OAsXFPBfTBh", "slides": "https://iclr.cc/virtual/2023/poster/11116", "video": "https://iclr.cc/virtual/2023/poster/11116", "author_site": "Wessel Bruinsma, Stratis Markou, James Requeima, Andrew Y. K. Foong, Tom Andersson, Anna Vaughan, Anthony Buonomo, Scott Hosking, Richard E Turner", "tldr": "", "abstract": "Conditional neural processes (CNPs; Garnelo et al., 2018a) are attractive meta-learning models which produce well-calibrated predictions and are trainable via a simple maximum likelihood procedure. Although CNPs have many advantages, they are unable to model dependencies in their predictions. Various works propose solutions to this, but these come at the cost of either requiring approximate inference or being limited to Gaussian predictions. In this work, we instead propose to change how CNPs are deployed at test time, without any modifications to the model or training procedure. Instead of making predictions independently for every target point, we autoregressively define a joint predictive distribution using the chain rule of probability, taking inspiration from the neural autoregressive density estimator (NADE) literature. We show that this simple procedure allows factorised Gaussian CNPs to model highly dependent, non-Gaussian predictive distributions. Perhaps surprisingly, in an extensive range of tasks with synthetic and real data, we show that CNPs in autoregressive (AR) mode not only significantly outperform non-AR CNPs, but are also competitive with more sophisticated models that are significantly more computationally expensive and challenging to train. This performance is remarkable given that AR CNPs are not trained to model joint dependencies. Our work provides an example of how ideas from neural distribution estimation can benefit neural processes, and motivates research into the AR deployment of other neural process models.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Wessel Bruinsma;Stratis Markou;James Requeima;Andrew Y. K. Foong;Tom Andersson;Anna Vaughan;Anthony Buonomo;Scott Hosking;Richard E Turner", "authorids": "~Wessel_Bruinsma1;~Stratis_Markou1;~James_Requeima1;~Andrew_Y._K._Foong1;tomand@bas.ac.uk;~Anna_Vaughan1;ab2707@cam.ac.uk;jask@bas.ac.uk;~Richard_E_Turner1", "gender": ";M;M;M;;;;;M", "homepage": "https://wessel.ai;;http://jamesr.info;https://andrewfoongyk.github.io/;;https://github.com/annavaughan;;;https://rich-turner-group.github.io/", "dblp": "242/3348.html;300/3941;;243/7014;;;;;40/5352", "google_scholar": "QRQwz3cAAAAJ;;https://scholar.google.ca/citations?hl=en;2UOjgIUAAAAJ;;;;;https://scholar.google.co.uk/citations?user=DgLEyZgAAAAJ", "orcid": ";;;;;;;;", "linkedin": ";stratos-m-85884b94/;;;;;;;", "or_profile": "~Wessel_Bruinsma1;~Stratis_Markou1;~James_Requeima1;~Andrew_Y._K._Foong1;tomand@bas.ac.uk;~Anna_Vaughan1;ab2707@cam.ac.uk;jask@bas.ac.uk;~Richard_E_Turner1", "aff": "Microsoft Research;University of Cambridge;University of Toronto;Microsoft;;University of Cambridge;;;Microsoft Research", "aff_domain": "microsoft.com;cam.ac.uk;cs.toronto;microsoft.com;;cam.ac.uk;;;research.microsoft.com", "position": "Researcher;PhD student;Postdoc;Researcher;;PhD student;;;Researcher", "bibtex": "@inproceedings{\nbruinsma2023autoregressive,\ntitle={Autoregressive Conditional Neural Processes},\nauthor={Wessel Bruinsma and Stratis Markou and James Requeima and Andrew Y. K. Foong and Tom Andersson and Anna Vaughan and Anthony Buonomo and Scott Hosking and Richard E Turner},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=OAsXFPBfTBh}\n}", "github": "", "project": "", "reviewers": "K56J;AdCH;WrGZ", "pdf_size": 9100486, "recommendation": "5;6;8", "confidence": "3;2;3", "correctness": "4;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "3;3;4", "wc_summary_paper": "116;66;91", "wc_strength_and_weaknesses": "125;296;47", "wc_clarity_quality_novelty_and_reproducibility": "18;181;405", "wc_summary_review": "98;81;89", "wc_review": "357;624;632", "wc_reply_reviewers": "0;73;72", "wc_reply_authors": "1710;2167;1673", "reply_reviewers": "0;1;1", "reply_authors": "5;6;5", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 91.0, 20.412414523193153 ], "wc_strength_and_weaknesses_avg": [ 156.0, 103.99038417084533 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 201.33333333333334, 158.64495649790516 ], "wc_summary_review_avg": [ 89.33333333333333, 6.944222218666553 ], "wc_review_avg": [ 537.6666666666666, 127.79236631696313 ], "wc_reply_reviewers_avg": [ 48.333333333333336, 34.179265969622904 ], "wc_reply_authors_avg": [ 1850.0, 224.66122644254096 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 5.333333333333333, 0.4714045207910317 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.18898223650461363, "corr_recommendation_correctness": -0.7559289460184545, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8423181852288020352&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=OAsXFPBfTBh", "email": "microsoft.com;cam.ac.uk;cs.toronto;microsoft.com;;cam.ac.uk;;;research.microsoft.com", "author_num": 9, "aff_unique_index": "0;1;2;0;1;0", "aff_unique_norm": "Microsoft;University of Cambridge;University of Toronto", "aff_unique_dep": "Microsoft Research;;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.cam.ac.uk;https://www.utoronto.ca", "aff_unique_abbr": "MSR;Cambridge;U of T", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;2;0;1;0", "aff_country_unique": "United States;United Kingdom;Canada" }, { "title": "HyperDeepONet: learning operator with complex target function space using the limited resources via hypernetwork", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11155", "id": "OAw6V3ZAhSd", "poster": "/media/PosterPDFs/ICLR%202023/11155.png?t=1682648632.6946275", "openreview": "https://openreview.net/forum?id=OAw6V3ZAhSd", "slides": "https://iclr.cc/virtual/2023/poster/11155", "video": "https://iclr.cc/virtual/2023/poster/11155", "author_site": "Jae Yong Lee, Sung Woong Cho, Hyung Ju Hwang", "tldr": "", "abstract": "Fast and accurate predictions for complex physical dynamics are a big challenge across various applications. Real-time prediction on resource-constrained hardware is even more crucial in the real-world problems. The deep operator network (DeepONet) has recently been proposed as a framework for learning nonlinear mappings between function spaces. However, the DeepONet requires many parameters and has a high computational cost when learning operators, particularly those with complex (discontinuous or non-smooth) target functions. In this study, we propose HyperDeepONet, which uses the expressive power of the hypernetwork to enable learning of a complex operator with smaller set of parameters. The DeepONet and its variant models can be thought of as a method of injecting the input function information into the target function. From this perspective, these models can be viewed as a special case of HyperDeepONet. We analyze the complexity of DeepONet and conclude that HyperDeepONet needs relatively lower complexity to obtain the desired accuracy for operator learning. HyperDeepONet was successfully applied to various operator learning problems using low computational resources compared to other benchmarks.", "keywords": "Hypernetwork;Operator learning;Deep operator network;DeepONet", "primary_area": "", "supplementary_material": "/attachment/111c804865f470b22a85cc23f6f716baee4a84bd.zip", "author": "Jae Yong Lee;SungWoong CHO;Hyung Ju Hwang", "authorids": "~Jae_Yong_Lee2;~SungWoong_CHO1;~Hyung_Ju_Hwang1", "gender": "M;M;", "homepage": "http://www.jaeyong-lee.com/;;http://hjhwang.postech.ac.kr", "dblp": "72/6418;;", "google_scholar": "C1FGMygAAAAJ;cknw4HUAAAAJ;", "orcid": "0000-0003-0193-545X;;", "linkedin": "jae-yong-lee-31b675158/;;", "or_profile": "~Jae_Yong_Lee2;~SungWoong_CHO1;~Hyung_Ju_Hwang1", "aff": "Korea Institute for Advanced Study (KIAS);Pohang University of Science and Technology;POSTECH", "aff_domain": "kias.re.kr;postech.edu;postech.ac.kr", "position": "Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nlee2023hyperdeeponet,\ntitle={HyperDeep{ON}et: learning operator with complex target function space using the limited resources via hypernetwork},\nauthor={Jae Yong Lee and SungWoong CHO and Hyung Ju Hwang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=OAw6V3ZAhSd}\n}", "github": "", "project": "", "reviewers": "eaJx;8Dyr;LzY8", "pdf_size": 5181946, "recommendation": "8;8;8", "confidence": "3;3;2", "correctness": "3;4;3", "technical_novelty": "3;3;3", "empirical_novelty": "2;3;2", "wc_summary_paper": "132;57;129", "wc_strength_and_weaknesses": "205;133;398", "wc_clarity_quality_novelty_and_reproducibility": "70;176;55", "wc_summary_review": "76;51;119", "wc_review": "483;417;701", "wc_reply_reviewers": "63;307;191", "wc_reply_authors": "1218;1373;2117", "reply_reviewers": "1;2;2", "reply_authors": "3;3;5", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 106.0, 34.66987164671943 ], "wc_strength_and_weaknesses_avg": [ 245.33333333333334, 111.8818821595148 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 100.33333333333333, 53.85371131335539 ], "wc_summary_review_avg": [ 82.0, 28.083209693100727 ], "wc_review_avg": [ 533.6666666666666, 121.3516469695497 ], "wc_reply_reviewers_avg": [ 187.0, 99.65273035229224 ], "wc_reply_authors_avg": [ 1569.3333333333333, 392.3946426183155 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10503021712486374697&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=OAw6V3ZAhSd", "email": "kias.re.kr;postech.edu;postech.ac.kr", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Korea Institute for Advanced Study;Pohang University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.kias.re.kr;https://www.postech.ac.kr", "aff_unique_abbr": "KIAS;POSTECH", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Pohang", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "OE4uriQtuDJ", "title": "Multi-View Masked Autoencoders for Visual Control", "track": "main", "status": "Reject", "tldr": "We present a framework for multi-view representation learning via masked view reconstruction.", "abstract": "This paper investigates how to leverage data from multiple cameras to learn representations beneficial for visual control. To this end, we present the Multi-View Masked Autoencoder (MV-MAE), a simple and scalable framework for multi-view representation learning. Our main idea is to mask multiple viewpoints from video frames at random and train a video autoencoder to reconstruct pixels of both masked and unmasked viewpoints. This allows the model to learn representations that capture useful information of the current viewpoint but also the cross-view information from different viewpoints. We evaluate MV-MAE on challenging RLBench visual manipulation tasks by training a reinforcement learning agent on top of frozen representations. Our experiments demonstrate that MV-MAE significantly outperforms other multi-view representation learning approaches. Moreover, we show that the number of cameras can differ between the representation learning phase and the behavior learning phase. By training a single-view control agent on top of multi-view representations from MV-MAE, we achieve 62.3% success rate while the single-view representation learning baseline achieves 42.3%.", "keywords": "visual control;masked autoencoder;representation learning;world model", "primary_area": "", "supplementary_material": "", "author": "Younggyo Seo;Junsu Kim;Stephen James;Kimin Lee;Jinwoo Shin;Pieter Abbeel", "authorids": "~Younggyo_Seo1;~Junsu_Kim1;~Stephen_James1;~Kimin_Lee1;~Jinwoo_Shin1;~Pieter_Abbeel2", "gender": "M;M;M;M;M;M", "homepage": "https://younggyo.me/;https://sites.google.com/view/junsu-kim;https://stepjam.github.io/;https://sites.google.com/view/kiminlee;https://sites.google.com/site/mijirim/;https://people.eecs.berkeley.edu/~pabbeel/", "dblp": "265/5586;;163/5669;183/6849;31/7062;", "google_scholar": "tI1-YwIAAAAJ;1o9cS8UAAAAJ;OXtG-isAAAAJ;92M8xv4AAAAJ;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ", "orcid": ";;;;;", "linkedin": ";junsu-kim-b170b3168/;;;;", "or_profile": "~Younggyo_Seo1;~Junsu_Kim1;~Stephen_James1;~Kimin_Lee1;~Jinwoo_Shin1;~Pieter_Abbeel2", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Dyson;Google;Korea Advanced Institute of Science & Technology;Covariant", "aff_domain": "kaist.ac.kr;kaist.ac.kr;dyson.com;google.com;kaist.ac.kr;covariant.ai", "position": "PhD student;Ph.D. student;Principal Researcher;Researcher;Full Professor;Founder", "bibtex": "@misc{\nseo2023multiview,\ntitle={Multi-View Masked Autoencoders for Visual Control},\nauthor={Younggyo Seo and Junsu Kim and Stephen James and Kimin Lee and Jinwoo Shin and Pieter Abbeel},\nyear={2023},\nurl={https://openreview.net/forum?id=OE4uriQtuDJ}\n}", "github": "", "project": "", "reviewers": "HaAG;ntdx;mrqQ;Zds9", "site": "https://openreview.net/forum?id=OE4uriQtuDJ", "pdf_size": 4622753, "recommendation": "5;5;5;6", "confidence": "4;4;3;3", "correctness": "4;4;4;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;0;2;3", "wc_summary_paper": "60;53;50;62", "wc_strength_and_weaknesses": "305;211;257;92", "wc_clarity_quality_novelty_and_reproducibility": "16;57;111;32", "wc_summary_review": "178;97;28;20", "wc_review": "559;418;446;206", "wc_reply_reviewers": "299;112;146;0", "wc_reply_authors": "644;602;789;230", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 56.25, 4.9180788932265 ], "wc_strength_and_weaknesses_avg": [ 216.25, 79.06128951642516 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.0, 36.00694377477767 ], "wc_summary_review_avg": [ 80.75, 63.62929749730072 ], "wc_review_avg": [ 407.25, 127.61930692493202 ], "wc_reply_reviewers_avg": [ 139.25, 106.88632980882073 ], "wc_reply_authors_avg": [ 566.25, 206.1581613713122 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BVynNZAFcfAJ:scholar.google.com/&scioq=Multi-View+Masked+Autoencoders+for+Visual+Control&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;0;3", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Dyson;Google;Covariant", "aff_unique_dep": ";;Google;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.dyson.com;https://www.google.com;", "aff_unique_abbr": "KAIST;;Google;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;2;0", "aff_country_unique": "South Korea;United Kingdom;United States;" }, { "id": "OIcMPYZXFPL", "title": "Mastering Spatial Graph Prediction of Road Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Accurately predicting road networks from satellite images requires a global understanding of the network topology. We propose to capture such high-level information by introducing a graph-based framework that simulates the addition of sequences of graph edges using a reinforcement learning (RL) approach. In particular, given a partially generated graph associated with a satellite image, an RL agent nominates modifications that maximize a cumulative reward. As opposed to standard supervised techniques that tend to be more restricted to commonly used surrogate losses, these rewards can be based on various complex, potentially non-continuous, metrics of interest. This yields more power and flexibility to encode problem-dependent knowledge. Empirical results on several benchmark datasets demonstrate enhanced performance and increased high-level reasoning about the graph topology when using a tree-based search. We further highlight the superiority of our approach under substantial occlusions by introducing a new synthetic benchmark dataset for this task.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/76532e7f3130fa1f957f011e2441a6087bea97b0.zip", "author": "Sotiris Anagnostidis;Aurelien Lucchi;Thomas Hofmann", "authorids": "~Sotiris_Anagnostidis1;~Aurelien_Lucchi1;~Thomas_Hofmann1", "gender": "M;M;M", "homepage": ";http://people.inf.ethz.ch/alucchi/;http://www.da.inf.ethz.ch/", "dblp": "286/1763;14/5780;h/ThHofmann", "google_scholar": "qjzTKWUAAAAJ;https://scholar.google.ch/citations?user=V1ONSgIAAAAJ;T3hAyLkAAAAJ", "orcid": ";;", "linkedin": "sotiris-anagnostidis-b064a5129/;;thomas-hofmann-1ab2402/", "or_profile": "~Sotiris_Anagnostidis1;~Aurelien_Lucchi1;~Thomas_Hofmann1", "aff": "ETH Zurich;University of Basel;Swiss Federal Institute of Technology", "aff_domain": "inf.ethz.ch;unibas.ch;ethz.ch", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nanagnostidis2023mastering,\ntitle={Mastering Spatial Graph Prediction of Road Networks},\nauthor={Sotiris Anagnostidis and Aurelien Lucchi and Thomas Hofmann},\nyear={2023},\nurl={https://openreview.net/forum?id=OIcMPYZXFPL}\n}", "github": "", "project": "", "reviewers": "d6d1;zSe9;ro23;BpnF", "site": "https://openreview.net/forum?id=OIcMPYZXFPL", "pdf_size": 34306971, "recommendation": "3;5;6;8", "confidence": "4;3;3;4", "correctness": "3;2;4;4", "technical_novelty": "2;3;2;4", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "95;79;77;27", "wc_strength_and_weaknesses": "216;314;107;104", "wc_clarity_quality_novelty_and_reproducibility": "470;140;40;29", "wc_summary_review": "151;121;193;17", "wc_review": "932;654;417;177", "wc_reply_reviewers": "0;94;0;0", "wc_reply_authors": "1273;1907;772;431", "reply_reviewers": "0;1;0;0", "reply_authors": "2;3;2;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 69.5, 25.509802037648196 ], "wc_strength_and_weaknesses_avg": [ 185.25, 86.95796398260484 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 169.75, 178.66221620700892 ], "wc_summary_review_avg": [ 120.5, 64.9980768946282 ], "wc_review_avg": [ 545.0, 279.9365999650635 ], "wc_reply_reviewers_avg": [ 23.5, 40.703193977868615 ], "wc_reply_authors_avg": [ 1095.75, 555.934067583558 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5853694070049635, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6856384871616901518&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2", "aff_unique_norm": "ETH Zurich;University of Basel;Swiss Federal Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ethz.ch;https://www.unibas.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;UniBas;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "SMART: Sentences as Basic Units for Text Evaluation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11456", "id": "OIe3kpwl40D", "poster": "", "openreview": "https://openreview.net/forum?id=OIe3kpwl40D", "slides": "https://iclr.cc/virtual/2023/poster/11456", "video": "https://iclr.cc/virtual/2023/poster/11456", "author_site": "Reinald Kim Amplayo, Peter Liu, Yao Zhao, Shashi Narayan", "tldr": "", "abstract": "Widely used evaluation metrics for text generation either do not work well with longer texts or fail to evaluate all aspects of text quality. In this paper, we introduce a new metric called SMART to mitigate such limitations. Specifically, we treat sentences as basic units of matching instead of tokens, and use a sentence matching function to soft-match candidate and reference sentences. Candidate sentences are also compared to sentences in the source documents to allow grounding (e.g., factuality) evaluation. Our results show that system-level correlations of our proposed metric with a model-based matching function outperforms all competing metrics on the SummEval summarization meta-evaluation dataset, while the same metric with a string-based matching function is competitive with current model-based metrics. The latter does not use any neural model, which is useful during model development phases where resources can be limited and fast evaluation is required. SMART also outperforms all factuality evaluation metrics on the TRUE benchmark. Finally, we also conducted extensive analyses showing that our proposed metrics work well with longer summaries and are less biased towards specific models.", "keywords": "summarization;evaluation", "primary_area": "", "supplementary_material": "/attachment/b11c7ac3349b5678cec335884021300cbe87be0f.zip", "author": "Reinald Kim Amplayo;Peter J Liu;Yao Zhao;Shashi Narayan", "authorids": "~Reinald_Kim_Amplayo2;~Peter_J_Liu1;~Yao_Zhao5;~Shashi_Narayan1", "gender": "M;;;M", "homepage": "https://rktamplayo.github.io;http://www.peterjliu.com;;https://sites.google.com/corp/view/shashinarayan/", "dblp": ";190/7667;;74/8458", "google_scholar": ";;p7L3HrMAAAAJ;prEcE9IAAAAJ", "orcid": ";;;", "linkedin": ";p3t3rliu;;", "or_profile": "~Reinald_Kim_Amplayo2;~Peter_J_Liu1;~Yao_Zhao5;~Shashi_Narayan1", "aff": "Google;Google Brain;Google;Google", "aff_domain": "google.com;google.com;google.com;google.com", "position": "Researcher;Research Scientist;Researcher;Research Scientist", "bibtex": "@inproceedings{\namplayo2023smart,\ntitle={{SMART}: Sentences as Basic Units for Text Evaluation},\nauthor={Reinald Kim Amplayo and Peter J Liu and Yao Zhao and Shashi Narayan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=OIe3kpwl40D}\n}", "github": "", "project": "", "reviewers": "TC5h;7UHv;k533;K7DY", "pdf_size": 336070, "recommendation": "5;6;6;8", "confidence": "4;4;4;4", "correctness": "2;3;4;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "118;189;201;67", "wc_strength_and_weaknesses": "310;461;272;125", "wc_clarity_quality_novelty_and_reproducibility": "146;72;156;82", "wc_summary_review": "53;76;137;40", "wc_review": "627;798;766;314", "wc_reply_reviewers": "194;191;0;0", "wc_reply_authors": "895;632;563;236", "reply_reviewers": "1;1;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 143.75, 54.49483920519447 ], "wc_strength_and_weaknesses_avg": [ 292.0, 119.55542647659286 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 114.0, 37.33630940518894 ], "wc_summary_review_avg": [ 76.5, 37.23237838226293 ], "wc_review_avg": [ 626.25, 191.3953695887129 ], "wc_reply_reviewers_avg": [ 96.25, 96.25584397843073 ], "wc_reply_authors_avg": [ 581.5, 234.81109428644976 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.7608859102526822, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13982401478732172488&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=OIe3kpwl40D", "email": "google.com;google.com;google.com;google.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Multi-Rate VAE: Train Once, Get the Full Rate-Distortion Curve", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11747", "id": "OJ8aSjCaMNK", "poster": "", "openreview": "https://openreview.net/forum?id=OJ8aSjCaMNK", "slides": "https://iclr.cc/virtual/2023/poster/11747", "video": "https://iclr.cc/virtual/2023/poster/11747", "author_site": "Juhan Bae, Michael Zhang, Michael Ruan, Duanyang Wang, So Hasegawa, Jimmy Ba, Roger Grosse", "tldr": "MR-VAEs can construct the rate-distortion curve in a single training run.", "abstract": "Variational autoencoders (VAEs) are powerful tools for learning latent representations of data used in a wide range of applications. In practice, VAEs usually require multiple training rounds to choose the amount of information the latent variable should retain. This trade-off between the reconstruction error (distortion) and the KL divergence (rate) is typically parameterized by a hyperparameter $\\beta$. In this paper, we introduce Multi-Rate VAE (MR-VAE), a computationally efficient framework for learning optimal parameters corresponding to various $\\beta$ in a single training run. The key idea is to explicitly formulate a response function using hypernetworks that maps $\\beta$ to the optimal parameters. MR-VAEs construct a compact response hypernetwork where the pre-activations are conditionally gated based on $\\beta$. We justify the proposed architecture by analyzing linear VAEs and showing that it can represent response functions exactly for linear VAEs. With the learned hypernetwork, MR-VAEs can construct the rate-distortion curve without additional training and can be deployed with significantly less hyperparameter tuning. Empirically, our approach is competitive and often exceeds the performance of multiple $\\beta$-VAEs training with minimal computation and memory overheads.", "keywords": "Variational Autoencoders;VAEs;Hypernetworks;Response Functions;Hyperparameter Tuning", "primary_area": "", "supplementary_material": "/attachment/856e9f14f87161c138e8dfb837592c25c1393b90.zip", "author": "Juhan Bae;Michael R. Zhang;Michael Ruan;Eric Wang;So Hasegawa;Jimmy Ba;Roger Baker Grosse", "authorids": "~Juhan_Bae2;~Michael_R._Zhang1;~Michael_Ruan2;ericdywang@gmail.com;hasegawa.sou@fujitsu.com;~Jimmy_Ba1;~Roger_Baker_Grosse1", "gender": "M;;;;;M;M", "homepage": "http://www.juhanbae.com/;;;;;http://jimmylba.github.io;http://www.cs.toronto.edu/~rgrosse/", "dblp": "158/9492;;;;;https://dblp.org/pers/b/Ba:Jimmy.html;26/7058", "google_scholar": "https://scholar.google.ca/citations?user=9RFr4usAAAAJ;;;;;https://scholar.google.ca/citations?user=ymzxRhAAAAAJ;xgQd1qgAAAAJ", "orcid": ";;0000-0003-0436-3995;;;;", "linkedin": ";;m-ruan/;;;;", "or_profile": "~Juhan_Bae2;~Michael_R._Zhang1;~Michael_Ruan2;ericdywang@gmail.com;hasegawa.sou@fujitsu.com;~Jimmy_Ba1;~Roger_Baker_Grosse1", "aff": "University of Toronto;;;;;Department of Computer Science, University of Toronto;Vector Institute", "aff_domain": "cs.toronto.edu;;;;;cs.toronto.edu;vectorinstitute.ai", "position": "PhD student;;;;;Assistant Professor;Faculty Member", "bibtex": "@inproceedings{\nbae2023multirate,\ntitle={Multi-Rate {VAE}: Train Once, Get the Full Rate-Distortion Curve},\nauthor={Juhan Bae and Michael R. Zhang and Michael Ruan and Eric Wang and So Hasegawa and Jimmy Ba and Roger Baker Grosse},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=OJ8aSjCaMNK}\n}", "github": "", "project": "", "reviewers": "MZus;47TW;fEL1", "pdf_size": 1569164, "recommendation": "8;8;8", "confidence": "4;4;4", "correctness": "3;4;4", "technical_novelty": "4;3;4", "empirical_novelty": "4;3;4", "wc_summary_paper": "70;88;96", "wc_strength_and_weaknesses": "292;74;123", "wc_clarity_quality_novelty_and_reproducibility": "38;54;10", "wc_summary_review": "96;61;40", "wc_review": "496;277;269", "wc_reply_reviewers": "16;0;0", "wc_reply_authors": "663;27;15", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 84.66666666666667, 10.873004286866728 ], "wc_strength_and_weaknesses_avg": [ 163.0, 93.38450977901348 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.0, 18.184242262647807 ], "wc_summary_review_avg": [ 65.66666666666667, 23.098821518760552 ], "wc_review_avg": [ 347.3333333333333, 105.17393001859138 ], "wc_reply_reviewers_avg": [ 5.333333333333333, 7.542472332656507 ], "wc_reply_authors_avg": [ 235.0, 302.6813505982818 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1662014048399300874&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=OJ8aSjCaMNK", "email": "cs.toronto.edu;;;;;cs.toronto.edu;vectorinstitute.ai", "author_num": 7, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Toronto;Vector Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.utoronto.ca;https://vectorinstitute.ai/", "aff_unique_abbr": "U of T;Vector Institute", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "id": "OK6LV2q50l", "title": "FedPSE: Personalized Sparsification with Element-wise Aggregation for Federated Learning", "track": "main", "status": "Reject", "tldr": "We propose a federated learning framework to resolve the bidirectional communication challenge on Non-IID datasets.", "abstract": "Federated learning (FL) is a popular distributed machine learning framework in which clients aggregate models' parameters instead of sharing their individual data. In FL, clients communicate with the server under limited network bandwidth frequently, which arises the communication challenge. To resolve this challenge, multiple compression methods have been proposed to reduce the transmitted parameters. However, these techniques show that the federated performance degrades significantly with Non-IID (non-identically independently distributed) datasets. To address this issue, we propose an effective method, called FedPSE, which solves the efficiency challenge of FL with heterogeneous data. FedPSE compresses the local updates on clients using Top-K sparsification and aggregates these updates on the server by element-wise average. Then clients download the personalized sparse updates from the server to update their individual local models. We then theoretically analyze the convergence of FedPSE under the non-convex setting. Moreover, extensive experiments on four benchmark tasks demonstrate that our FedPSE outperforms the state-of-the-art methods on Non-IID datasets in terms of both efficiency and accuracy.", "keywords": "Federated Learning;Non-IID;Communication Efficiency", "primary_area": "", "supplementary_material": "", "author": "Longfei Zheng;Yingting Liu;Xiaolong Xu;Chaochao Chen;Weipeng Sun;Xiaolong Hu;Lei Wang;Li Wang", "authorids": "~Longfei_Zheng1;~Yingting_Liu1;~Xiaolong_Xu1;~Chaochao_Chen3;sunwp@jlu.edu.cn;~Xiaolong_Hu1;~Lei_Wang30;~Li_Wang24", "gender": "M;F;M;;;M;M;", "homepage": ";;;https://sites.google.com/site/ccchomepage/;;;;", "dblp": ";;10/137;26/1492-1;;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?view_op=list_works;qZTMyzwAAAAJ;;;;", "orcid": "0000-0003-3604-2598;0000-0002-9579-8064;;0000-0003-1419-964X;;;;", "linkedin": ";;;ccchomepage/;;xiaolong-hu-a4a69b89/;%E7%A3%8A-%E7%8E%8B-b4994abb/;", "or_profile": "~Longfei_Zheng1;~Yingting_Liu1;~Xiaolong_Xu1;~Chaochao_Chen3;sunwp@jlu.edu.cn;~Xiaolong_Hu1;~Lei_Wang30;~Li_Wang24", "aff": ";;;Zhejiang University;;;Ant Group;", "aff_domain": ";;;zju.edu.cn;;;antgroup.com;", "position": ";;;Distinguished Research Fellow;;;Principal Researcher;", "bibtex": "@misc{\nzheng2023fedpse,\ntitle={Fed{PSE}: Personalized Sparsification with Element-wise Aggregation for Federated Learning},\nauthor={Longfei Zheng and Yingting Liu and Xiaolong Xu and Chaochao Chen and Weipeng Sun and Xiaolong Hu and Lei Wang and Li Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=OK6LV2q50l}\n}", "github": "", "project": "", "reviewers": "7ra3;E2iq;DP2J", "site": "https://openreview.net/forum?id=OK6LV2q50l", "pdf_size": 1049373, "recommendation": "3;5;6", "confidence": "3;4;3", "correctness": "3;3;3", "technical_novelty": "2;3;2", "empirical_novelty": "2;3;2", "wc_summary_paper": "92;52;66", "wc_strength_and_weaknesses": "390;350;213", "wc_clarity_quality_novelty_and_reproducibility": "33;28;48", "wc_summary_review": "5;42;39", "wc_review": "520;472;366", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "613;485;438", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 70.0, 16.57307052620807 ], "wc_strength_and_weaknesses_avg": [ 317.6666666666667, 75.79064732684517 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.333333333333336, 8.498365855987975 ], "wc_summary_review_avg": [ 28.666666666666668, 16.779617264870957 ], "wc_review_avg": [ 452.6666666666667, 64.3393779543722 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 512.0, 73.95043385042895 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.18898223650461363, "corr_recommendation_correctness": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2798805252685203590&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Zhejiang University;Ant Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.antgroup.com", "aff_unique_abbr": "ZJU;Ant Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Disentanglement of Correlated Factors via Hausdorff Factorized Support", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11102", "id": "OKcJhpQiGiX", "poster": "/media/PosterPDFs/ICLR%202023/11102.png?t=1680913752.0266614", "openreview": "https://openreview.net/forum?id=OKcJhpQiGiX", "slides": "https://iclr.cc/virtual/2023/poster/11102", "video": "https://iclr.cc/virtual/2023/poster/11102", "author_site": "Karsten Roth, Mark Ibrahim, Zeynep Akata, Pascal Vincent, Diane Bouchacourt", "tldr": "We develop a method that allows for disentangled representation learning not only under the assumption of independent factors of variation but instead fundamentally allows for much more realistic correlations during training.", "abstract": "A grand goal in deep learning research is to learn representations capable of generalizing across distribution shifts.\nDisentanglement is one promising direction aimed at aligning a model's representation with the underlying factors generating the data (e.g. color or background). Existing disentanglement methods, however, rely on an often unrealistic assumption: that factors are statistically independent. In reality, factors (like object color and shape) are correlated. To address this limitation, we consider the use of a relaxed disentanglement criterion -- the Hausdorff Factorized Support (HFS) criterion -- that encourages only pairwise factorized support, rather than a factorial distribution, by minimizing a Hausdorff distance. This allows for arbitrary distributions of the factors over their support, including correlations between them. We show that the use of HFS consistently facilitates disentanglement and recovery of ground-truth factors across a variety of correlation settings and benchmarks, even under severe training correlations and correlation shifts, with in parts over +60% in relative improvement over existing disentanglement methods. In addition, we find that leveraging HFS for representation learning can even facilitate transfer to downstream tasks such as classification under distribution shifts. We hope our original approach and positive empirical results inspire further progress on the open problem of robust generalization. Code available at https://github.com/facebookresearch/disentangling-correlated-factors.", "keywords": "disentanglement;representation learning;generalization", "primary_area": "", "supplementary_material": "", "author": "Karsten Roth;Mark Ibrahim;Zeynep Akata;Pascal Vincent;Diane Bouchacourt", "authorids": "~Karsten_Roth1;~Mark_Ibrahim1;~Zeynep_Akata1;~Pascal_Vincent1;~Diane_Bouchacourt3", "gender": "Not Specified;;F;M;F", "homepage": "https://karroth.com/;https://markibrahim.me/;https://eml-unitue.de/people/zeynep-akata;http://www.iro.umontreal.ca/~vincentp;https://dianebouchacourt.github.io/", "dblp": "234/7803;180/5660;117/4838;43/861;176/1498", "google_scholar": "93ZjIs0AAAAJ;AqYyoCMAAAAJ;jQl9RtkAAAAJ;WBCKQMsAAAAJ;", "orcid": ";;0000-0002-1432-7747;;", "linkedin": ";;zeynep-akata-36182045/?ppe=1;;", "or_profile": "~Karsten_Roth1;~Mark_Ibrahim1;~Zeynep_Akata1;~Pascal_Vincent1;~Diane_Nicole_Bouchacourt1", "aff": "University of Tuebingen;Facebook AI Research (FAIR) Meta;University of T\u00fcbingen;Facebook A.I. Research;Meta AI Research", "aff_domain": "uni-tuebingen.de;ai.facebook.com;uni-tuebingen.de;fb.com;meta.com", "position": "PhD student;Researcher;Full Professor;Research Scientist;Researcher", "bibtex": "@inproceedings{\nroth2023disentanglement,\ntitle={Disentanglement of Correlated Factors via Hausdorff Factorized Support},\nauthor={Karsten Roth and Mark Ibrahim and Zeynep Akata and Pascal Vincent and Diane Bouchacourt},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=OKcJhpQiGiX}\n}", "github": "", "project": "", "reviewers": "niK9;e6Ls;rYAe", "pdf_size": 14363980, "recommendation": "6;8;8", "confidence": "4;4;4", "correctness": "4;3;3", "technical_novelty": "3;3;4", "empirical_novelty": "3;3;3", "wc_summary_paper": "60;113;107", "wc_strength_and_weaknesses": "170;206;683", "wc_clarity_quality_novelty_and_reproducibility": "22;43;120", "wc_summary_review": "23;36;100", "wc_review": "275;398;1010", "wc_reply_reviewers": "0;0;24", "wc_reply_authors": "468;742;2238", "reply_reviewers": "0;0;1", "reply_authors": "1;1;4", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 93.33333333333333, 23.697163449568293 ], "wc_strength_and_weaknesses_avg": [ 353.0, 233.8076132207846 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.666666666666664, 42.12943019895817 ], "wc_summary_review_avg": [ 53.0, 33.65511352924941 ], "wc_review_avg": [ 561.0, 321.4373967042416 ], "wc_reply_reviewers_avg": [ 8.0, 11.313708498984761 ], "wc_reply_authors_avg": [ 1149.3333333333333, 777.8883096063483 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.9999999999999998, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17261639157292822043&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=OKcJhpQiGiX", "email": "uni-tuebingen.de;ai.facebook.com;uni-tuebingen.de;fb.com;meta.com", "author_num": 5, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "University of Tuebingen;Meta;University of T\u00fcbingen", "aff_unique_dep": ";Facebook AI Research;", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.meta.com;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;Meta AI;Uni T\u00fcbingen", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;1", "aff_country_unique": "Germany;United States" }, { "id": "OKfmDPNPwYF", "title": "Evaluating Fairness Without Sensitive Attributes: A Framework Using Only Auxiliary Models", "track": "main", "status": "Reject", "tldr": "To evaluate fairness without access to any sensitive attribute, we propose a general framework with only off-the-shelf auxiliary models.", "abstract": "Although the volume of literature and public attention on machine learning fairness has been growing significantly in recent years, in practice some tasks as basic as measuring fairness, which is the first step in studying and promoting fairness, can be challenging. This is because the sensitive attributes are often unavailable in a machine learning system due to privacy regulations. The straightforward solution is to use auxiliary models to predict the missing sensitive attributes. However, our theoretical analyses show that the estimation error of the directly measured fairness metrics is proportional to the error rates of auxiliary models' predictions. Existing works that attempt to reduce the estimation error often require strong assumptions, e.g. access to the ground-truth sensitive attributes in a subset of samples, auxiliary models' training data and the target data are i.i.d, or some form of conditional independence. In this paper, we drop those assumptions and propose a framework that uses only off-the-shelf auxiliary models. The main challenge is how to reduce the negative impact of imperfectly predicted sensitive attributes on the fairness metrics without knowing the ground-truth sensitive attribute values. Inspired by the noisy label learning literature, we first derive a closed-form relationship between the directly measured fairness metrics and their corresponding ground-truth metrics. And then we estimate some key statistics (most importantly transition matrix in the noisy label literature), which we use, together with the derived relationship, to calibrate the fairness metrics. Our framework can be applied to all popular group fairness definitions as well as multi-class classifiers and multi-category sensitive attributes. In addition, we theoretically prove the upper bound of the estimation error in our calibrated metrics and show our method can substantially decrease the estimation error especially when auxiliary models are inaccurate or the target model is highly biased. Experiments on COMPAS and CelebA validate our theoretical analyses and show our method can measure fairness significantly more accurately than baselines under favorable circumstances.", "keywords": "Fairness evaluation;noise transition matrix;sensitive attributes", "primary_area": "", "supplementary_material": "", "author": "Zhaowei Zhu;Yuanshun Yao;Jiankai Sun;Yang Liu;Hang Li", "authorids": "~Zhaowei_Zhu1;~Yuanshun_Yao2;~Jiankai_Sun5;~Yang_Liu3;~Hang_Li4", "gender": "M;M;M;M;M", "homepage": "https://www.zzw.ai;http://jiankai.me;http://www.yliuu.com;https://hangli-hl.github.io/;https://kevyao.com", "dblp": "202/1712;;51/3710-18;https://dblp.org/pers/hd/l/Li_0001:Hang;186/1486", "google_scholar": "YS8pSQoAAAAJ;GQ6xw-oAAAAJ;jKrIVCIAAAAJ;nTl5mSwAAAAJ;AG51Bv4AAAAJ", "orcid": "0000-0003-3894-5862;;0000-0001-8420-6011;0000-0001-9628-3487;", "linkedin": ";jiankaisun/;;hang-li-84aa6314/;", "or_profile": "~Zhaowei_Zhu1;~Jiankai_Sun5;~Yang_Liu3;~Hang_Li4;~Kevin_Yao1", "aff": "University of California, Santa Cruz;ByteDance Inc.;University of California, Santa Cruz;ByteDance Technology;ByteDance Research", "aff_domain": "ucsc.edu;bytedance.com;ucsc.edu;bytedance.com;bytedance.com", "position": "PhD student;Research Scientist;Assistant Professor;Head of Research;Researcher", "bibtex": "@misc{\nzhu2023evaluating,\ntitle={Evaluating Fairness Without Sensitive Attributes: A Framework Using Only Auxiliary Models},\nauthor={Zhaowei Zhu and Yuanshun Yao and Jiankai Sun and Yang Liu and Hang Li},\nyear={2023},\nurl={https://openreview.net/forum?id=OKfmDPNPwYF}\n}", "github": "", "project": "", "reviewers": "oWCf;vWhJ;pHkG;Za6Z", "site": "https://openreview.net/forum?id=OKfmDPNPwYF", "pdf_size": 788961, "recommendation": "3;5;6;6", "confidence": "3;2;2;4", "correctness": "4;2;4;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "3;2;3;2", "wc_summary_paper": "91;34;49;89", "wc_strength_and_weaknesses": "297;17;104;235", "wc_clarity_quality_novelty_and_reproducibility": "112;15;14;25", "wc_summary_review": "24;35;61;56", "wc_review": "524;101;228;405", "wc_reply_reviewers": "0;0;16;102", "wc_reply_authors": "1401;161;486;1088", "reply_reviewers": "0;0;1;1", "reply_authors": "3;1;1;2", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 65.75, 24.833193511910626 ], "wc_strength_and_weaknesses_avg": [ 163.25, 109.47231385149398 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.5, 40.929817981515626 ], "wc_summary_review_avg": [ 44.0, 15.116216457830975 ], "wc_review_avg": [ 314.5, 162.13034879380234 ], "wc_reply_reviewers_avg": [ 29.5, 42.36448984704053 ], "wc_reply_authors_avg": [ 784.0, 487.349463937327 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18163376935146563913&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;1;1", "aff_unique_norm": "University of California, Santa Cruz;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsc.edu;https://www.bytedance.com", "aff_unique_abbr": "UCSC;ByteDance", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Santa Cruz;", "aff_country_unique_index": "0;1;0;1;1", "aff_country_unique": "United States;China" }, { "title": "ILA-DA: Improving Transferability of Intermediate Level Attack with Data Augmentation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11711", "id": "OM7doLjQbOQ", "poster": "/media/PosterPDFs/ICLR%202023/11711.png?t=1680680203.7445347", "openreview": "https://openreview.net/forum?id=OM7doLjQbOQ", "slides": "https://iclr.cc/virtual/2023/poster/11711", "video": "https://iclr.cc/virtual/2023/poster/11711", "author_site": "Chiu Wai Yan, Tsz Him Cheung, Dit-Yan Yeung", "tldr": "We proposed ILA-DA, a method that employs 3 novel augmentation techniques to improve the transferability of adversarial attacks.", "abstract": "Adversarial attack aims to generate deceptive inputs to fool a machine learning model. In deep learning, an adversarial input created for a specific neural network can also trick other neural networks. This intriguing property is known as black-box transferability of adversarial examples. To improve black-box transferability, a previously proposed method called Intermediate Level Attack (ILA) fine-tunes an adversarial example by maximizing its perturbation on an intermediate layer of the source model. Meanwhile, it has been shown that simple image transformations can also enhance attack transferability. Based on these two observations, we propose ILA-DA, which employs three novel augmentation techniques to enhance ILA. Specifically, we propose (1) an automated way to apply effective image transformations, (2) an efficient reverse adversarial update technique, and (3) an attack interpolation method to create more transferable adversarial examples. Shown by extensive experiments, ILA-DA greatly outperforms ILA and other state-of-the-art attacks by a large margin. On ImageNet, we attain an average attack success rate of 84.5%, which is 19.5% better than ILA and 4.7% better than the previous state-of-the-art across nine undefended models. For defended models, ILA-DA also leads existing attacks and provides further gains when incorporated into more advanced attack methods.", "keywords": "adversarial examples;adversarial transferability;data augmentation", "primary_area": "", "supplementary_material": "/attachment/784c1f27b0adacea09bd6fbe606563a3cf476fc6.zip", "author": "Chiu Wai Yan;Tsz-Him Cheung;Dit-Yan Yeung", "authorids": "~Chiu_Wai_Yan1;~Tsz-Him_Cheung1;~Dit-Yan_Yeung2", "gender": "M;M;M", "homepage": ";;https://cse.hkust.edu.hk/faculty/dyyeung/", "dblp": "350/3858;295/5321;41/5668", "google_scholar": ";;nEsOOx8AAAAJ", "orcid": "0000-0002-7277-5580;0000-0002-3600-2927;0000-0003-3716-8125", "linkedin": ";;", "or_profile": "~Chiu_Wai_Yan1;~Tsz-Him_Cheung1;~Dit-Yan_Yeung2", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "hkust.edu;ust.hk;ust.hk", "position": "Researcher;PhD student;Chair Professor", "bibtex": "@inproceedings{\nyan2023ilada,\ntitle={{ILA}-{DA}: Improving Transferability of Intermediate Level Attack with Data Augmentation},\nauthor={Chiu Wai Yan and Tsz-Him Cheung and Dit-Yan Yeung},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=OM7doLjQbOQ}\n}", "github": "", "project": "", "reviewers": "KZsc;weXS;vZBZ;CjSu", "pdf_size": 6640785, "recommendation": "5;5;6;8", "confidence": "4;3;3;5", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "57;60;43;32", "wc_strength_and_weaknesses": "132;240;245;134", "wc_clarity_quality_novelty_and_reproducibility": "42;40;77;32", "wc_summary_review": "14;37;45;29", "wc_review": "245;377;410;227", "wc_reply_reviewers": "82;101;32;107", "wc_reply_authors": "1080;958;709;297", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 48.0, 11.247221879201993 ], "wc_strength_and_weaknesses_avg": [ 187.75, 54.78309502027062 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.75, 17.297037318569906 ], "wc_summary_review_avg": [ 31.25, 11.453711188955307 ], "wc_review_avg": [ 314.75, 79.86355551814607 ], "wc_reply_reviewers_avg": [ 80.5, 29.483045975611137 ], "wc_reply_authors_avg": [ 761.0, 299.4035737929659 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.7385489458759963, "corr_recommendation_correctness": 0.9428090415820632, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10851351387540783209&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=OM7doLjQbOQ", "email": "hkust.edu;ust.hk;ust.hk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "OMwyBv1UBh", "title": "Towards Federated Learning of Deep Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "We study the problem of graph representation learning under a federated setting and propose a novel framework for federated learning of deep graph neural networks via reconstructing neighborhood information of nodes.", "abstract": "Graph neural networks (GNNs) learn node representations by recursively aggregating neighborhood information on graph data. However, in the federated setting, data samples (nodes) located in different clients may be connected to each other, leading to huge information loss to the training method.\nExisting federated graph learning frameworks solve such a problem by generating missing neighbors or sending information across clients directly. None are suitable for training deep GNNs, which require a more expansive receptive field and higher communication costs.\nIn this work, we introduce a novel framework named $Fed^2GNN$ for federated graph learning of deep GNNs via reconstructing neighborhood information of nodes. Specifically, we design a graph structure named rooted tree. The node embedding obtained by encoding on the rooted tree is the same as that obtained by encoding on the induced subgraph surrounding the node, which allows us to reconstruct the neighborhood information by building the rooted tree of the node. An encoder-decoder framework is then proposed, wherein we first encode missing neighbor information and then decode it to build the rooted tree.\nExtensive experiments on real-world network datasets show the effectiveness of our framework for training deep GNNs while also achieving better performance for training shadow GNN models", "keywords": "federated learning;graph representation learning;deep graph neural networks", "primary_area": "", "supplementary_material": "", "author": "Zhihua Tian;Yuan Ding;Rui Zhang;Jian Liu;Kui Ren", "authorids": "~Zhihua_Tian1;~Yuan_Ding2;~Rui_Zhang17;~Jian_Liu6;~Kui_Ren4", "gender": ";M;M;M;M", "homepage": ";https://github.com/Dy1anT;;https://person.zju.edu.cn/en/jianliu;", "dblp": "278/0000.html;94/489;https://dblp.org/search?q=adversarial%20examples%20for%20proof%20of%20author%3ARui_Zhang%3A;35/295-12.html;20/6179-1.html", "google_scholar": ";;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?view_op=list_works", "orcid": ";;;;0000-0003-3441-6277", "linkedin": ";;;;", "or_profile": "~Zhihua_Tian1;~Yuan_Ding2;~Rui_Zhang17;~Jian_Liu6;~Kui_Ren4", "aff": "Zhejiang University;Zhejiang University;Zhejiang University,;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;MS student;PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\ntian2023towards,\ntitle={Towards Federated Learning of Deep Graph Neural Networks},\nauthor={Zhihua Tian and Yuan Ding and Rui Zhang and Jian Liu and Kui Ren},\nyear={2023},\nurl={https://openreview.net/forum?id=OMwyBv1UBh}\n}", "github": "", "project": "", "reviewers": "53S9;SQ7X;YSgt", "site": "https://openreview.net/forum?id=OMwyBv1UBh", "pdf_size": 2346724, "recommendation": "3;5;6", "confidence": "4;4;2", "correctness": "2;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "120;38;126", "wc_strength_and_weaknesses": "341;216;231", "wc_clarity_quality_novelty_and_reproducibility": "226;14;5", "wc_summary_review": "44;33;46", "wc_review": "731;301;408", "wc_reply_reviewers": "573;0;13", "wc_reply_authors": "2327;1192;989", "reply_reviewers": "2;0;1", "reply_authors": "4;3;3", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 94.66666666666667, 40.14418457953226 ], "wc_strength_and_weaknesses_avg": [ 262.6666666666667, 55.7275116576683 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 81.66666666666667, 102.12519549824887 ], "wc_summary_review_avg": [ 41.0, 5.715476066494082 ], "wc_review_avg": [ 480.0, 182.78037823209215 ], "wc_reply_reviewers_avg": [ 195.33333333333334, 267.1033923824672 ], "wc_reply_authors_avg": [ 1502.6666666666667, 588.753674204152 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.7559289460184546, "corr_recommendation_correctness": 0.9449111825230683, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13440140819790462641&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Quantized Compressed Sensing with Score-Based Generative Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11274", "id": "OOWLRfAI_V_", "poster": "/media/PosterPDFs/ICLR%202023/11274.png?t=1681033057.6024868", "openreview": "https://openreview.net/forum?id=OOWLRfAI_V_", "slides": "https://iclr.cc/virtual/2023/poster/11274", "video": "https://iclr.cc/virtual/2023/poster/11274", "author_site": "Xiangming Meng, Yoshiyuki Kabashima", "tldr": "Quantized Compressed Sensing with Score-Based Generative Models", "abstract": "We consider the general problem of recovering a high-dimensional signal from noisy quantized measurements. Quantization, especially coarse quantization such as 1-bit sign measurements, leads to severe information loss and thus a good prior knowledge of the unknown signal is helpful for accurate recovery. Motivated by the power of score-based generative models (SGM, also known as diffusion models) in capturing the rich structure of natural signals beyond simple sparsity, we propose an unsupervised data-driven approach called quantized compressed sensing with SGM (QCS-SGM), where the prior distribution is modeled by a pre-trained SGM. To perform posterior sampling, an annealed pseudo-likelihood score called ${\\textit{noise perturbed pseudo-likelihood score}}$ is introduced and combined with the prior score of SGM. The proposed QCS-SGM applies to an arbitrary number of quantization bits. Experiments on a variety of baseline datasets demonstrate that the proposed QCS-SGM significantly outperforms existing state-of-the-art algorithms by a large margin for both in-distribution and out-of-distribution samples. Moreover, as a posterior sampling method, QCS-SGM can be easily used to obtain confidence intervals or uncertainty estimates of the reconstructed results. $\\textit{The code is available at}$ https://github.com/mengxiangming/QCS-SGM.", "keywords": "generative models;compressed sensing;linear inverse problems;quantization", "primary_area": "", "supplementary_material": "/attachment/524d7f586a63a886d632e526f0e9df08b671491b.zip", "author": "Xiangming Meng;Yoshiyuki Kabashima", "authorids": "~Xiangming_Meng1;~Yoshiyuki_Kabashima1", "gender": "M;M", "homepage": "https://mengxiangming.github.io;https://researchmap.jp/yoshiyuki.kabashima?lang=en", "dblp": "157/5875;36/7039", "google_scholar": "https://scholar.google.co.jp/citations?user=oV70ZoQAAAAJ;NLBZuoEAAAAJ", "orcid": ";0000-0002-2949-7108", "linkedin": ";", "or_profile": "~Xiangming_Meng1;~Yoshiyuki_Kabashima1", "aff": "The University of Tokyo;The University of Tokyo", "aff_domain": "u-tokyo.ac.jp;u-tokyo.ac.jp", "position": "Project Assistant Professor ;Full Professor", "bibtex": "@inproceedings{\nmeng2023quantized,\ntitle={Quantized Compressed Sensing with Score-Based Generative Models},\nauthor={Xiangming Meng and Yoshiyuki Kabashima},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=OOWLRfAI_V_}\n}", "github": "", "project": "", "reviewers": "CEkX;DMn5;CmCq", "pdf_size": 14934672, "recommendation": "6;6;8", "confidence": "3;4;4", "correctness": "3;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "2;3;4", "wc_summary_paper": "65;96;151", "wc_strength_and_weaknesses": "259;172;151", "wc_clarity_quality_novelty_and_reproducibility": "25;61;103", "wc_summary_review": "61;31;15", "wc_review": "410;360;420", "wc_reply_reviewers": "29;0;0", "wc_reply_authors": "1161;1338;614", "reply_reviewers": "1;0;0", "reply_authors": "3;2;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 104.0, 35.56215216584433 ], "wc_strength_and_weaknesses_avg": [ 194.0, 46.75467891024384 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.0, 31.874754901018456 ], "wc_summary_review_avg": [ 35.666666666666664, 19.067132861433457 ], "wc_review_avg": [ 396.6666666666667, 26.2466929133727 ], "wc_reply_reviewers_avg": [ 9.666666666666666, 13.67073110293992 ], "wc_reply_authors_avg": [ 1037.6666666666667, 308.169145474076 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.49999999999999983, "corr_recommendation_correctness": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1434028757043347437&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=OOWLRfAI_V_", "email": "u-tokyo.ac.jp;u-tokyo.ac.jp", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Rethinking Symbolic Regression: Morphology and Adaptability in the Context of Evolutionary Algorithms", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10765", "id": "OPGy07PojsZ", "poster": "/media/PosterPDFs/ICLR%202023/10765.png?t=1682738430.6817298", "openreview": "https://openreview.net/forum?id=OPGy07PojsZ", "slides": "https://iclr.cc/virtual/2023/poster/10765", "video": "https://iclr.cc/virtual/2023/poster/10765", "author_site": "Kei Sen Fong, Shelvia Wongso, Mehul Motani", "tldr": "", "abstract": "Symbolic Regression (SR) is the well-studied problem of finding closed-form analytical expressions that describe the relationship between variables in a measurement dataset. In this paper, we rethink SR from two perspectives: morphology and adaptability. Morphology: Current SR algorithms typically use several man-made heuristics to influence the morphology (or structure) of the expressions in the search space. These man-made heuristics may introduce unintentional bias and data leakage, especially with the relatively few equation-recovery benchmark problems available for evaluating SR approaches. To address this, we formulate a novel minimalistic approach, based on constructing a depth-aware mathematical language model trained on terminal walks of expression trees, as a replacement to these heuristics. Adaptability: Current SR algorithms tend to select expressions based on only a single fitness function (e.g., MSE on the training set). We promote the use of an adaptability framework in evolutionary SR which uses fitness functions that alternate across generations. This leads to robust expressions that perform well on the training set and are close to the true functional form. We demonstrate this by alternating fitness functions that quantify faithfulness to values (via MSE) and empirical derivatives (via a novel theoretically justified fitness metric coined MSEDI). Proof-of-concept: We combine these ideas into a minimalistic evolutionary SR algorithm that outperforms all benchmark and state of-the-art SR algorithms in problems with unknown constants added, which we claim are more reflective of SR performance for real-world applications. Our claim is then strengthened by reproducing the superior performance on real-world regression datasets from SRBench. For researchers interested in equation-recovery problems, we also propose a set of conventions that can be used to promote fairness in comparison across SR methods and to reduce unintentional bias.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Kei Sen Fong;Shelvia Wongso;Mehul Motani", "authorids": "~Kei_Sen_Fong1;~Shelvia_Wongso1;~Mehul_Motani1", "gender": "M;;M", "homepage": "https://www.linkedin.com/in/kei-sen-fong-4120b81b6/;;https://mehulmotani.github.io/", "dblp": "350/4391;;83/4035", "google_scholar": "ZZ8AURkAAAAJ;f27noUwAAAAJ;https://scholar.google.com.sg/citations?user=Bm9BwEQAAAAJ", "orcid": "0009-0000-4135-4858;0009-0004-8465-1640;", "linkedin": "kei-sen-fong-4120b81b6/;sw1039/;", "or_profile": "~Kei_Sen_Fong1;~Shelvia_Wongso1;~Mehul_Motani1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;nus.edu.sg;nus.edu.sg", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nfong2023rethinking,\ntitle={Rethinking Symbolic Regression: Morphology and Adaptability in the Context of Evolutionary Algorithms},\nauthor={Kei Sen Fong and Shelvia Wongso and Mehul Motani},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=OPGy07PojsZ}\n}", "github": "", "project": "", "reviewers": "vhnA;yTKT;jSK3;Kqo2", "pdf_size": 412791, "recommendation": "3;6;6;8", "confidence": "3;4;3;4", "correctness": "2;3;4;3", "technical_novelty": "3;3;4;3", "empirical_novelty": "2;3;4;3", "wc_summary_paper": "76;111;47;106", "wc_strength_and_weaknesses": "214;36;410;296", "wc_clarity_quality_novelty_and_reproducibility": "73;54;27;65", "wc_summary_review": "27;465;40;79", "wc_review": "390;666;524;546", "wc_reply_reviewers": "0;102;13;0", "wc_reply_authors": "573;1183;376;252", "reply_reviewers": "0;1;1;0", "reply_authors": "1;3;1;1", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 85.0, 25.700194551792794 ], "wc_strength_and_weaknesses_avg": [ 239.0, 136.31214179228496 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.75, 17.383541066192468 ], "wc_summary_review_avg": [ 152.75, 181.29034033836442 ], "wc_review_avg": [ 531.5, 97.95279475339129 ], "wc_reply_reviewers_avg": [ 28.75, 42.62261723545376 ], "wc_reply_authors_avg": [ 596.0, 357.71287368502686 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.7001400420140049, "corr_recommendation_correctness": 0.5940885257860046, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3198892852737214836&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=OPGy07PojsZ", "email": "nus.edu.sg;nus.edu.sg;nus.edu.sg", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Leveraging Unlabeled Data to Track Memorization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11254", "id": "ORp91sAbzI", "poster": "/media/PosterPDFs/ICLR%202023/11254.png?t=1682933114.7446926", "openreview": "https://openreview.net/forum?id=ORp91sAbzI", "slides": "https://iclr.cc/virtual/2023/poster/11254", "video": "https://iclr.cc/virtual/2023/poster/11254", "author_site": "Mahsa Forouzesh, Hanie Sedghi, Patrick Thiran", "tldr": "We propose a practical metric to track memorization for neural networks, which together with the overall training accuracy can distinguish models with low label noise memorization on the training set and high generalization to unseen data.", "abstract": "Deep neural networks may easily memorize noisy labels present in real-world data, which degrades their ability to generalize. It is therefore important to track and evaluate the robustness of models against noisy label memorization. We propose a metric, called $\\textit{susceptibility}$, to gauge such memorization for neural networks. Susceptibility is simple and easy to compute during training. Moreover, it does not require access to ground-truth labels and it only uses unlabeled data. We empirically show the effectiveness of our metric in tracking memorization on various architectures and datasets and provide theoretical insights into the design of the susceptibility metric. Finally, we show through extensive experiments on datasets with synthetic and real-world label noise that one can utilize susceptibility and the overall training accuracy to distinguish models that maintain a low memorization on the training set and generalize well to unseen clean data. ", "keywords": "memorization;label noise;generalization;unlabeled data;deep learning", "primary_area": "", "supplementary_material": "/attachment/3c5316ed5eb5864894ecb9bcc7c1458f1d6d9891.zip", "author": "Mahsa Forouzesh;Hanie Sedghi;Patrick Thiran", "authorids": "~Mahsa_Forouzesh2;~Hanie_Sedghi1;~Patrick_Thiran1", "gender": "F;;F", "homepage": "https://haniesedghi.com/;https://people.epfl.ch/patrick.thiran;", "dblp": "66/8332;t/PThiran;271/4480", "google_scholar": "_9GX96fDWAMC;https://scholar.google.ch/citations?user=7Ek7pqgAAAAJ;", "orcid": ";;", "linkedin": "hanie-sedghi-71bb2582;;", "or_profile": "~Hanie_Sedghi1;~Patrick_Thiran1;~mahsa_forouzesh1", "aff": "Google Research, Brain team;EPFL;School of Computer and Communication Sciences, Swiss Federal Institute of Technology Lausanne", "aff_domain": "google.com;epfl.ch;ic.epfl.ch", "position": "Senior Research Scientist;Full Professor;PhD student", "bibtex": "@inproceedings{\nforouzesh2023leveraging,\ntitle={Leveraging Unlabeled Data to Track Memorization},\nauthor={Mahsa Forouzesh and Hanie Sedghi and Patrick Thiran},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=ORp91sAbzI}\n}", "github": "", "project": "", "reviewers": "ZDzu;QKAt;E6P4;QseH", "pdf_size": 13089737, "recommendation": "5;6;6;8", "confidence": "3;3;3;4", "correctness": "3;3;3;3", "technical_novelty": "2;3;4;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "53;73;70;71", "wc_strength_and_weaknesses": "141;147;163;401", "wc_clarity_quality_novelty_and_reproducibility": "28;27;40;20", "wc_summary_review": "21;27;43;76", "wc_review": "243;274;316;568", "wc_reply_reviewers": "0;76;0;287", "wc_reply_authors": "207;475;439;1315", "reply_reviewers": "0;1;0;3", "reply_authors": "1;1;1;4", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 66.75, 8.011710179481033 ], "wc_strength_and_weaknesses_avg": [ 213.0, 108.83933112620639 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.75, 7.189401922274203 ], "wc_summary_review_avg": [ 41.75, 21.34683817336891 ], "wc_review_avg": [ 350.25, 128.35960228981702 ], "wc_reply_reviewers_avg": [ 90.75, 117.4763274025878 ], "wc_reply_authors_avg": [ 609.0, 420.3855373344806 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.9271726499455306, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13377023408390939389&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=ORp91sAbzI", "email": "google.com;epfl.ch;ic.epfl.ch", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;EPFL;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": "Google Research;;School of Computer and Communication Sciences", "aff_unique_url": "https://research.google;https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "Google;EPFL;EPFL", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Mountain View;;Lausanne", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Switzerland" }, { "id": "OSS-yWzE9Yu", "title": "HRBP: Hardware-friendly Regrouping towards Block-wise Pruning for Sparse Training", "track": "main", "status": "Reject", "tldr": "This paper proposes a novel block-wise pruning algorithm, which accelerates the sparse training of convolutional neural networks at both forward and backward pass. ", "abstract": "Recently, pruning at initialization and training a sparse network from scratch (sparse training) become increasingly popular. However, most sparse training literature addresses only the unstructured sparsity, which in practice brings little benefit to the training acceleration on GPU due to the irregularity of non-zero weights. In this paper, we work on sparse training with fine-grained structured sparsity, by extracting a few dense blocks from unstructured sparse weights. For Convolutional Neural networks (CNN), however, the extracted dense blocks will be broken in backpropagation due to the shape transformation of convolution filters implemented by GEMM. Thus, previous block-wise pruning methods can only be used to accelerate the forward pass of sparse CNN training. To this end, we propose the Hardware-friendly Regrouping towards Block-based Pruning (HRBP), where the grouping is conducted on the kernel-wise mask. With HRBP, extracted dense blocks are preserved in backpropagation. We further propose HRBP++ to reduce zero kernels by extracting common sparse kernel patterns on all kernels within one block. Extensive experiments on CIFAR-10, CIFAR-100, and ImageNet demonstrate that HRBP (HRBP++) can almost match the accuracy of unstructured sparse training methods while achieving a huge acceleration on hardware. ", "keywords": "efficient training;sparse training;fine-grained structured sparsity;grouping algorithm", "primary_area": "", "supplementary_material": "/attachment/1d757723e13f44d650e201845aa91d05589ae135.zip", "author": "Haoyu Ma;Chengming Zhang;lizhi xiang;Xiaolong Ma;Geng Yuan;Wenkai Zhang;Shiwei Liu;Tianlong Chen;Dingwen Tao;Yanzhi Wang;Zhangyang Wang;Xiaohui Xie", "authorids": "~Haoyu_Ma1;~Chengming_Zhang1;~lizhi_xiang1;~Xiaolong_Ma2;~Geng_Yuan1;~Wenkai_Zhang1;~Shiwei_Liu2;~Tianlong_Chen1;~Dingwen_Tao1;~Yanzhi_Wang3;~Zhangyang_Wang1;~Xiaohui_Xie2", "gender": "M;M;M;M;M;M;M;M;M;M;M;", "homepage": "https://www.ics.uci.edu/~haoyum3/;https://chengmingzh8.github.io/;;https://xiaolongma2016.com;;;https://shiweiliuiiiiiii.github.io/;https://tianlong-chen.github.io;https://www.dingwentao.com/;https://web.northeastern.edu/yanzhiwang/;https://vita-group.github.io;https://www.ics.uci.edu/~xhx/", "dblp": "144/1634;78/5836-6;;;205/3007;;234/8697-3.html;;162/0802.html;;119/4026;", "google_scholar": "8jugwosAAAAJ;F4TtJZEAAAAJ;TTNhq6sAAAAJ;https://scholar.google.com/citations?hl=en;tBIAgtgAAAAJ;;73IbXtsAAAAJ;LE3ctn0AAAAJ;Ppjzn_EAAAAJ;https://scholar.google.com/citations?hl=en;pxFyKAIAAAAJ;1CR0meYAAAAJ", "orcid": "0000-0001-6646-2644;0000-0003-3008-9133;;0000-0003-3753-7648;0000-0001-9844-992X;;;0000-0001-7774-8197;0000-0001-5422-4497;;;", "linkedin": "haoyu-ma-53517915a/;chengming-zhang-71a3b1120/;;xiaolong-ma-66b98910b/;;www.linkedin.com/in/kevin-wenkai-zhang;;tianlong-chen-783862167/;dingwentao/;;;", "or_profile": "~Haoyu_Ma1;~Chengming_Zhang1;~lizhi_xiang1;~Xiaolong_Ma2;~Geng_Yuan1;~Wenkai_Zhang1;~Shiwei_Liu2;~Tianlong_Chen1;~Dingwen_Tao1;~Yanzhi_Wang3;~Zhangyang_Wang1;~Xiaohui_Xie2", "aff": "Meta Platforms, Inc;Indiana University;Washington State University;Clemson University;Northeastern University;;University of Texas at Austin;University of Texas, Austin;Indiana University;Northeastern University;University of Texas, Austin;University of California, Irvine", "aff_domain": "fb.com;iu.edu;wsu.edu;clemson.edu;northeastern.edu;;utexas.edu;utexas.edu;iu.edu;northeastern.edu;utexas.edu;uci.edu", "position": "Intern;PhD student;PhD student;Assistant Professor;PhD student;;Postdoc;PhD student;Associate Professor;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@misc{\nma2023hrbp,\ntitle={{HRBP}: Hardware-friendly Regrouping towards Block-wise Pruning for Sparse Training},\nauthor={Haoyu Ma and Chengming Zhang and lizhi xiang and Xiaolong Ma and Geng Yuan and Wenkai Zhang and Shiwei Liu and Tianlong Chen and Dingwen Tao and Yanzhi Wang and Zhangyang Wang and Xiaohui Xie},\nyear={2023},\nurl={https://openreview.net/forum?id=OSS-yWzE9Yu}\n}", "github": "", "project": "", "reviewers": "5xYn;fvk3;oxCu;WyN2", "site": "https://openreview.net/forum?id=OSS-yWzE9Yu", "pdf_size": 1844564, "recommendation": "5;5;5;5", "confidence": "2;5;4;5", "correctness": "2;3;3;4", "technical_novelty": "3;2;2;2", "empirical_novelty": "3;2;2;2", "wc_summary_paper": "46;115;25;37", "wc_strength_and_weaknesses": "107;94;102;269", "wc_clarity_quality_novelty_and_reproducibility": "11;228;82;7", "wc_summary_review": "26;73;61;7", "wc_review": "190;510;270;320", "wc_reply_reviewers": "0;179;0;0", "wc_reply_authors": "300;262;383;547", "reply_reviewers": "0;2;0;0", "reply_authors": "2;3;4;4", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 55.75, 35.00982005095142 ], "wc_strength_and_weaknesses_avg": [ 143.0, 72.89375830618147 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 82.0, 89.41755979672001 ], "wc_summary_review_avg": [ 41.75, 26.47050245084139 ], "wc_review_avg": [ 322.5, 117.76565713313877 ], "wc_reply_reviewers_avg": [ 44.75, 77.50927363870726 ], "wc_reply_authors_avg": [ 373.0, 109.57417578973615 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 12, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7579977163783020170&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4;5;5;1;4;5;6", "aff_unique_norm": "Meta;Indiana University;Washington State University;Clemson University;Northeastern University;University of Texas at Austin;University of California, Irvine", "aff_unique_dep": "Meta Platforms, Inc;;;;;;", "aff_unique_url": "https://www.meta.com;https://www.indiana.edu;https://wsu.edu;https://www.clemson.edu;https://www.northeastern.edu;https://www.utexas.edu;https://www.uci.edu", "aff_unique_abbr": "Meta;IU;WSU;Clemson;NEU;UT Austin;UCI", "aff_campus_unique_index": "1;1;1;2", "aff_campus_unique": ";Austin;Irvine", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "OT1xF6_56J", "title": "Rethinking Backdoor Data Poisoning Attacks in the Context of Semi-Supervised Learning", "track": "main", "status": "Reject", "tldr": "We investigate vulnerabilities of semi-supervised learning methods to backdoor data poisoning attacks in unlabeled data and identify characteristics necessary for attack success. ", "abstract": "Semi-supervised learning methods can train high-accuracy machine learning models with a fraction of the labeled training samples required for traditional supervised learning. Such methods do not typically involve close review of the unlabeled training samples, making them tempting targets for data poisoning attacks. In this paper we investigate the vulnerabilities of semi-supervised learning methods to backdoor data poisoning attacks on the unlabeled samples. We show that a simple poisoning attack using adversarially perturbed samples is highly effective - achieving an average attack success rate of 93.6%. We introduce a generalized attack framework targeting semi-supervised learning methods to better understand and exploit their limitations and to motivate future defense strategies.", "keywords": "data poisoning;backdoor attacks;semi-supervised learning", "primary_area": "", "supplementary_material": "/attachment/d81dfe357522b41b1aedb925e6e7b4fb24d13837.zip", "author": "Marissa Catherine Connor;Vincent Emanuele", "authorids": "~Marissa_Catherine_Connor1;~Vincent_Emanuele1", "gender": "F;M", "homepage": ";", "dblp": "255/7830;", "google_scholar": "xFUS92IAAAAJ;", "orcid": ";", "linkedin": ";vae2dc/", "or_profile": "~Marissa_Catherine_Connor1;~Vincent_Emanuele1", "aff": "Embedded Intelligence;Embedded Intelligence", "aff_domain": "embedintel.com;embedintel.com", "position": "Researcher;CTO", "bibtex": "@misc{\nconnor2023rethinking,\ntitle={Rethinking Backdoor Data Poisoning Attacks in the Context of Semi-Supervised Learning},\nauthor={Marissa Catherine Connor and Vincent Emanuele},\nyear={2023},\nurl={https://openreview.net/forum?id=OT1xF6_56J}\n}", "github": "", "project": "", "reviewers": "u9yW;zodC;nc1F", "site": "https://openreview.net/forum?id=OT1xF6_56J", "pdf_size": 1308857, "recommendation": "1;3;3", "confidence": "3;4;4", "correctness": "3;2;2", "technical_novelty": "1;2;3", "empirical_novelty": "1;2;3", "wc_summary_paper": "36;73;36", "wc_strength_and_weaknesses": "201;327;307", "wc_clarity_quality_novelty_and_reproducibility": "20;90;16", "wc_summary_review": "19;122;30", "wc_review": "276;612;389", "wc_reply_reviewers": "0;78;0", "wc_reply_authors": "163;554;204", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "recommendation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 48.333333333333336, 17.441967269268172 ], "wc_strength_and_weaknesses_avg": [ 278.3333333333333, 55.28913897764089 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.0, 33.980386499665755 ], "wc_summary_review_avg": [ 57.0, 46.180804092898455 ], "wc_review_avg": [ 425.6666666666667, 139.60022285878424 ], "wc_reply_reviewers_avg": [ 26.0, 36.76955262170047 ], "wc_reply_authors_avg": [ 307.0, 175.45559742187385 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": -1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2061242836172926805&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Embedded Intelligence", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "", "aff_country_unique": "" }, { "id": "OTIhUlChVaT", "title": "Generative Multi-Flow Networks: Centralized, Independent and Conservation", "track": "main", "status": "Reject", "tldr": "Generative Multi-Flow Networks", "abstract": "Generative flow networks utilize the flow matching loss to learn a stochastic policy for generating objects from a sequence of actions, such that the probability of generating a pattern can be proportional to the corresponding given reward. However, existing works can only handle single flow model tasks and cannot directly generalize to multi-agent flow networks due to limitations such as flow estimation complexity and independent sampling. In this paper, we propose the framework of generative multi-flow networks (GMFlowNets) that can be applied to multiple agents to generate objects collaboratively through a series of joint actions. Then, the centralized flow network algorithm is proposed for centralized training GMFlowNets, while the independent flow network algorithm is proposed to achieve decentralized execution of GMFlowNets. Based on the independent global conservation condition, the flow conservation network algorithm is then proposed to realize centralized training with decentralized execution paradigm. Theoretical analysis proves that using the multi-flow matching loss function can train a unique Markovian flow, and the flow conservation network can ensure independent policies can generate samples with probability proportional to the reward function. Experimental results demonstrate the performance superiority of the proposed algorithms compared to reinforcement learning and MCMC-based methods.", "keywords": "GFlowNets;Multi-Flow", "primary_area": "", "supplementary_material": "/attachment/4067ddd344bad15303b5bf31a092a8ac534d0c92.zip", "author": "Yinchuan Li;Haozhi Wang;Shuang Luo;yunfeng shao;Jianye HAO", "authorids": "~Yinchuan_Li1;~Haozhi_Wang1;~Shuang_Luo1;~yunfeng_shao1;~Jianye_HAO1", "gender": "M;M;;M;M", "homepage": "https://yinchuanll.github.io/;;https://github.com/isluoshuang;https://www.researchgate.net/profile/Yunfeng-Shao;http://www.icdai.org/jianye.html", "dblp": "236/4930;;;;21/7664.html", "google_scholar": "M6YfuCTSaKsC;;;;", "orcid": "0000-0002-4263-5130;0000-0003-0732-8992;;;0000-0002-0422-8235", "linkedin": "yinchuan-li-835791189;;;;", "or_profile": "~Yinchuan_Li1;~Haozhi_Wang1;~Shuang_Luo1;~yunfeng_shao1;~Jianye_HAO1", "aff": "Huawei Noah's Ark Lab (AI Lab);Tianjin University;Zhejiang University;Huawei Technologies Ltd.;Tianjin University", "aff_domain": "huawei.com;edu.cn;zju.edu.cn;huawei.com;tju.edu.cn", "position": "Principal Researcher;PhD student;PhD student;Principal Researcher;Associate Professor", "bibtex": "@misc{\nli2023generative,\ntitle={Generative Multi-Flow Networks: Centralized, Independent and Conservation},\nauthor={Yinchuan Li and Haozhi Wang and Shuang Luo and yunfeng shao and Jianye HAO},\nyear={2023},\nurl={https://openreview.net/forum?id=OTIhUlChVaT}\n}", "github": "", "project": "", "reviewers": "4dkX;pmcj;YqtU;45Kq", "site": "https://openreview.net/forum?id=OTIhUlChVaT", "pdf_size": 1366168, "recommendation": "3;3;3;5", "confidence": "4;5;3;5", "correctness": "1;2;2;2", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;1", "wc_summary_paper": "51;72;139;84", "wc_strength_and_weaknesses": "406;344;321;1570", "wc_clarity_quality_novelty_and_reproducibility": "230;19;24;48", "wc_summary_review": "46;83;45;58", "wc_review": "733;518;529;1760", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 1.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 86.5, 32.53075467922624 ], "wc_strength_and_weaknesses_avg": [ 660.25, 526.163650872996 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 80.25, 87.15037291945457 ], "wc_summary_review_avg": [ 58.0, 15.313392831113555 ], "wc_review_avg": [ 885.0, 512.3851090732438 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5222329678670935, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5577189490819570871&as_sdt=8000005&sciodt=0,19&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;1", "aff_unique_norm": "Huawei;Tianjin University;Zhejiang University", "aff_unique_dep": "Noah's Ark Lab (AI Lab);;", "aff_unique_url": "https://www.huawei.com;http://www.tju.edu.cn;https://www.zju.edu.cn", "aff_unique_abbr": "Huawei;TJU;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Global Explainability of GNNs via Logic Combination of Learned Concepts", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11248", "id": "OTbRTIY4YS", "poster": "/media/PosterPDFs/ICLR%202023/11248.png?t=1682079488.7644234", "openreview": "https://openreview.net/forum?id=OTbRTIY4YS", "slides": "https://iclr.cc/virtual/2023/poster/11248", "video": "https://iclr.cc/virtual/2023/poster/11248", "author_site": "Steve Azzolin, Antonio Longa, Pietro Barbiero, Pietro Lio, Andrea Passerini", "tldr": "We propose GLGExplainer, the first Global Explainer for GNNs capable of generating explanations as arbitrary Boolean combinations of graphical concepts.", "abstract": "While instance-level explanation of GNN is a well-studied problem with plenty of approaches being developed, providing a global explanation for the behaviour of a GNN is much less explored, despite its potential in interpretability and debugging. Existing solutions either simply list local explanations for a given class, or generate a synthetic prototypical graph with maximal score for a given class, completely missing any combinatorial aspect that the GNN could have learned.\nIn this work, we propose GLGExplainer (Global Logic-based GNN Explainer), the first Global Explainer capable of generating explanations as arbitrary Boolean combinations of learned graphical concepts. GLGExplainer is a fully differentiable architecture that takes local explanations as inputs and combines them into a logic formula over graphical concepts, represented as clusters of local explanations. \nContrary to existing solutions, GLGExplainer provides accurate and human-interpretable global explanations that are perfectly aligned with ground-truth explanations (on synthetic data) or match existing domain knowledge (on real-world data). Extracted formulas are faithful to the model predictions, to the point of providing insights into some occasionally incorrect rules learned by the model, making GLGExplainer a promising diagnostic tool for learned GNNs.", "keywords": "Explainability;Graph Neural Networks;Concept Learning", "primary_area": "", "supplementary_material": "", "author": "Steve Azzolin;Antonio Longa;Pietro Barbiero;Pietro Lio;Andrea Passerini", "authorids": "~Steve_Azzolin2;~Antonio_Longa1;~Pietro_Barbiero1;~Pietro_Lio1;~Andrea_Passerini2", "gender": "M;M;M;M;M", "homepage": "https://steveazzolin.github.io/;https://antoniolonga.github.io/;http://www.pietrobarbiero.eu/;https://www.cst.cam.ac.uk/people/pl219;http://disi.unitn.it/~passerini/", "dblp": "321/0453;303/4796;238/7860;l/PietroLio.html;00/6186", "google_scholar": ";https://scholar.google.com/citations?hl=it;https://scholar.google.it/citations?user=4gbToQoAAAAJ;https://scholar.google.co.uk/citations?user=3YrWf7EAAAAJ;https://scholar.google.it/citations?user=IIXgkLoAAAAJ", "orcid": "0009-0005-3418-0585;0000-0003-0337-1838;0000-0003-3155-2564;0000-0002-0540-5053;0000-0002-2765-5395", "linkedin": "steve-azzolin/;;;;", "or_profile": "~Steve_Azzolin2;~Antonio_Longa1;~Pietro_Barbiero1;~Pietro_Lio1;~Andrea_Passerini2", "aff": "University of Trento;University of Trento;University of Cambridge;University of Cambridge;University of Trento", "aff_domain": "unitn.it;unitn.it;cam.ac.uk;cam.ac.uk;unitn.it", "position": "MS student;PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nazzolin2023global,\ntitle={Global Explainability of {GNN}s via Logic Combination of Learned Concepts},\nauthor={Steve Azzolin and Antonio Longa and Pietro Barbiero and Pietro Lio and Andrea Passerini},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=OTbRTIY4YS}\n}", "github": "", "project": "", "reviewers": "8iKd;gQT1;CpLz", "pdf_size": 872681, "recommendation": "5;5;8", "confidence": "2;4;3", "correctness": "3;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "94;53;188", "wc_strength_and_weaknesses": "351;249;152", "wc_clarity_quality_novelty_and_reproducibility": "64;74;24", "wc_summary_review": "58;51;26", "wc_review": "567;427;390", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 111.66666666666667, 56.51155240794183 ], "wc_strength_and_weaknesses_avg": [ 250.66666666666666, 81.24995726494602 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.0, 21.602468994692867 ], "wc_summary_review_avg": [ 45.0, 13.73559851869101 ], "wc_review_avg": [ 461.3333333333333, 76.2291865946954 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2576996178667137782&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "pdf": "https://openreview.net/pdf?id=OTbRTIY4YS", "email": "unitn.it;unitn.it;cam.ac.uk;cam.ac.uk;unitn.it", "author_num": 5, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "University of Trento;University of Cambridge", "aff_unique_dep": ";", "aff_unique_url": "https://www.unitn.it;https://www.cam.ac.uk", "aff_unique_abbr": "UniTN;Cambridge", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "Italy;United Kingdom" }, { "id": "OTiSSCBm1QD", "title": "Temporal Relevance Analysis for Video Action Models", "track": "main", "status": "Reject", "tldr": "The paper provides a deep analysis of the temporal modeling for action recognition.", "abstract": "In this paper, we provide a deep analysis of temporal modeling for action recognition, an important but underexplored problem in\nthe literature. We first propose a new approach to quantify the temporal relationships between frames captured by CNN-based action models based on layer-wise relevance propagation. We then conduct comprehensive experiments and in-depth analysis to provide a better understanding of how temporal modeling is affected by various factors such as dataset, network architecture, and input frames. With this, we further study some important questions for action recognition that lead to interesting findings. Our analysis shows that there is no strong correlation between temporal relevance and model performance; and action models tend to capture local temporal information, but less long-range dependencies.", "keywords": "Temporal analysis;Frame relevance;Video data;Action recognition", "primary_area": "", "supplementary_material": "", "author": "Quanfu Fan;Donghyun Kim;Chun-Fu Chen;Stan Sclaroff;Kate Saenko;Sarah Adel Bargal", "authorids": "~Quanfu_Fan1;~Donghyun_Kim2;~Chun-Fu_Chen1;~Stan_Sclaroff1;~Kate_Saenko1;~Sarah_Adel_Bargal1", "gender": "M;M;M;;F;F", "homepage": ";https://cs-people.bu.edu/donhk;;;http://ai.bu.edu;https://bargal.georgetown.domains/", "dblp": "66/3950;;48/915;;88/2754;166/4828", "google_scholar": "kCxHiwUAAAAJ;https://scholar.google.co.kr/citations?user=UsqNPH4AAAAJ;9gqd5cYAAAAJ;;https://scholar.google.com.tw/citations?user=9xDADY4AAAAJ;zjlFcrEAAAAJ", "orcid": ";;;;0000-0002-5704-7614;0000-0003-3157-0412", "linkedin": ";;;;;", "or_profile": "~Quanfu_Fan1;~Donghyun_Kim2;~Chun-Fu_Chen1;~Stan_Sclaroff1;~Kate_Saenko1;~Sarah_Adel_Bargal1", "aff": "MIT-IBM Watson AI Lab;MIT-IBM Watson AI Lab;JPMorganChase, GTAR;;Boston University, Boston University;Georgetown University", "aff_domain": "us.ibm.com;ibm.com;jpmchase.com;;bu.edu;georgetown.edu", "position": "Researcher;Researcher;Executive Director;;Full Professor;Assistant Professor", "bibtex": "@misc{\nfan2023temporal,\ntitle={Temporal Relevance Analysis for Video Action Models},\nauthor={Quanfu Fan and Donghyun Kim and Chun-Fu Chen and Stan Sclaroff and Kate Saenko and Sarah Adel Bargal},\nyear={2023},\nurl={https://openreview.net/forum?id=OTiSSCBm1QD}\n}", "github": "", "project": "", "reviewers": "4NU1;6sPt;GyKL", "site": "https://openreview.net/forum?id=OTiSSCBm1QD", "pdf_size": 4554455, "recommendation": "3;5;6", "confidence": "4;4;2", "correctness": "2;3;4", "technical_novelty": "2;2;3", "empirical_novelty": "1;3;3", "wc_summary_paper": "28;59;52", "wc_strength_and_weaknesses": "253;387;147", "wc_clarity_quality_novelty_and_reproducibility": "70;59;31", "wc_summary_review": "29;38;39", "wc_review": "380;543;269", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_paper_avg": [ 46.333333333333336, 13.27487183449325 ], "wc_strength_and_weaknesses_avg": [ 262.3333333333333, 98.20160668520427 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 53.333333333333336, 16.418147141366337 ], "wc_summary_review_avg": [ 35.333333333333336, 4.4969125210773475 ], "wc_review_avg": [ 397.3333333333333, 112.52950230445744 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.7559289460184546, "corr_recommendation_correctness": 0.9819805060619659, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8bTzsAnU4LIJ:scholar.google.com/&scioq=Temporal+Relevance+Analysis+for+Video+Action+Models&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "Massachusetts Institute of Technology;JPMorgan Chase;Boston University;Georgetown University", "aff_unique_dep": "IBM Watson AI Lab;Global Technology, Analytics, and Research (GTAR);;", "aff_unique_url": "https://www.mitibmwatsonailab.org;https://www.jpmorganchase.com;https://www.bu.edu;https://www.georgetown.edu", "aff_unique_abbr": "MIT-IBM AI Lab;JPM;BU;GU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boston", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "OUMNXSAek8", "title": "Learning Implicit Scale Conditioned Memory Compensation for Talking Head Generation", "track": "main", "status": "Reject", "tldr": "We propose a novel implicit scale conditioned memory compensation network (MCNet) for high-fidelity talking head generation.", "abstract": "Talking head video generation aims to animate the pose and expression of a person in a target driving video using motion information contained in the video, while maintaining a person's identity in a given still source image. Highly dynamic and complex motions in the driving video cause ambiguous generation from the source image, because the still source image cannot provide sufficient appearance information for occluded regions or delicate expressions, which severely produces artifacts and significantly degrades the generation quality. However, existing works mainly focus on learning more accurate motion estimation and representation in 2D and 3D, and they ignore the facial structural prior in addressing the facial ambiguities. Therefore, effective handling of the ambiguities in the dramatic appearance changes of the source to largely improve facial details and completeness in generation still remains barely explored. To this end, we propose a novel implicit scale conditioned memory compensation network (MCNet) for high-fidelity talking head generation. Specifically, considering human faces are symmetric and structured, we aim to automatically learn a representative global facial memory bank from all training data as a prior to compensate for the facial generation features. Each face in the source image contains a scale that can be reflected in detected facial keypoints. To better query the learned global memory, we further propose to learn implicit scale representations from the discrete keypoints, which can be used to condition on the query of the global memory, to obtain scale-aware memory for the feature compensation. Extensive experiments from quantitative and qualitative perspectives demonstrate that MCNet can learn representative and complementary facial memory, and can clearly outperform previous state-of-the-art methods on VoxCeleb1 and CelebV datasets. ", "keywords": "Talking Head Generation", "primary_area": "", "supplementary_material": "/attachment/268ad7517cc1389f296c55cb5e96f9a2923df8f3.zip", "author": "Fa-Ting Hong;Dan Xu", "authorids": "~Fa-Ting_Hong2;~Dan_Xu4", "gender": "M;M", "homepage": "https://www.danxurgb.net;http://harlanhong.github.io", "dblp": "16/3823-2.html;239/4037", "google_scholar": "OuSPv-AAAAAJ;NBV1HVIAAAAJ", "orcid": "0000-0003-0136-9603;", "linkedin": ";", "or_profile": "~Dan_Xu4;~Fa-Ting_Hong1", "aff": "VGG, University of Oxford;Department of Computer Science and Engineering, Hong Kong University of Science and Technology", "aff_domain": "ox.ac.uk;cse.ust.hk", "position": "Postdoc;PhD student", "bibtex": "@misc{\nhong2023learning,\ntitle={Learning Implicit Scale Conditioned Memory Compensation for Talking Head Generation},\nauthor={Fa-Ting Hong and Dan Xu},\nyear={2023},\nurl={https://openreview.net/forum?id=OUMNXSAek8}\n}", "github": "", "project": "", "reviewers": "StKu;gGqi;dF7D", "site": "https://openreview.net/forum?id=OUMNXSAek8", "pdf_size": 4532666, "recommendation": "6;6;6", "confidence": "4;3;4", "correctness": "4;4;4", "technical_novelty": "3;3;3", "empirical_novelty": "0;3;3", "wc_summary_paper": "37;157;121", "wc_strength_and_weaknesses": "494;256;368", "wc_clarity_quality_novelty_and_reproducibility": "43;29;47", "wc_summary_review": "30;21;55", "wc_review": "604;463;591", "wc_reply_reviewers": "136;0;67", "wc_reply_authors": "721;743;686", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 105.0, 50.27922035990614 ], "wc_strength_and_weaknesses_avg": [ 372.6666666666667, 97.21911106133174 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.666666666666664, 7.717224601860151 ], "wc_summary_review_avg": [ 35.333333333333336, 14.383632673594278 ], "wc_review_avg": [ 552.6666666666666, 63.625641232306826 ], "wc_reply_reviewers_avg": [ 67.66666666666667, 55.52376868413103 ], "wc_reply_authors_avg": [ 716.6666666666666, 23.471022323045258 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:C1hhlHtcUuEJ:scholar.google.com/&scioq=Learning+Implicit+Scale+Conditioned+Memory+Compensation+for+Talking+Head+Generation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of Oxford;Hong Kong University of Science and Technology", "aff_unique_dep": "VGG;Department of Computer Science and Engineering", "aff_unique_url": "https://www.ox.ac.uk;https://www.ust.hk", "aff_unique_abbr": "Oxford;HKUST", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Oxford;Hong Kong SAR", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;China" }, { "id": "OUV0Fh5Lgm2", "title": "Robust Quantity-Aware Aggregation for Federated Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Federated learning (FL) enables multiple clients to collaboratively train models without sharing their local data, and becomes an important privacy-preserving machine learning framework. However, classical FL faces serious security and robustness problem, e.g., malicious clients can poison model updates and at the same time claim large quantities to amplify the impact of their model updates in the model aggregation. Existing defense methods for FL, while all handling malicious model updates, either treat all quantities benign or simply ignore/truncate the quantities of all clients. The former is vulnerable to quantity-enhanced attack, while the latter leads to sub-optimal performance since the local data on different clients is usually in significantly different sizes. In this paper, we propose a robust quantity-aware aggregation algorithm for federated learning, called FedRA, to perform the aggregation with awareness of local data quantities while being able to defend against quantity-enhanced attacks. More specifically, we propose a method to filter malicious clients by jointly considering the uploaded model updates and data quantities from different clients, and performing quantity-aware weighted averaging on model updates from remaining clients. Moreover, as the number of malicious clients participating in the federated learning may dynamically change in different rounds, we also propose a malicious client number estimator to predict how many suspicious clients should be filtered in each round. Experiments on four public datasets demonstrate the effectiveness of our FedRA method in defending FL against quantity-enhanced attacks. Our code is available at \\url{https://anonymous.4open.science/r/FedRA-4C1E}.\n", "keywords": "Federated Learning;Robustness;Defense", "primary_area": "", "supplementary_material": "", "author": "Jingwei Yi;Fangzhao Wu;Huishuai Zhang;Bin Benjamin Zhu;Tao Qi;Guangzhong Sun;Xing Xie", "authorids": "~Jingwei_Yi1;~Fangzhao_Wu1;~Huishuai_Zhang3;~Bin_Benjamin_Zhu1;~Tao_Qi1;~Guangzhong_Sun1;~Xing_Xie3", "gender": "F;;;M;M;M;M", "homepage": ";;;https://www.microsoft.com/en-us/research/people/binzhu/;https://taoqi98.github.io/;;http://research.microsoft.com/en-us/people/xingx/", "dblp": "290/2312;;;85/5693.html;;44/1372;08/6809-1", "google_scholar": "BPnONGoAAAAJ;;;zyXRIGgAAAAJ;iRr7c9wAAAAJ;;5EQfAFIAAAAJ", "orcid": "0009-0001-2786-6395;;;0000-0002-3571-7808;0000-0001-6544-2596;0000-0002-0794-7681;0000-0002-8608-8482", "linkedin": ";;;;;;xingx/", "or_profile": "~Jingwei_Yi1;~Fangzhao_Wu1;~Huishuai_Zhang3;~Bin_Benjamin_Zhu1;~Tao_Qi1;~Guangzhong_Sun1;~Xing_Xie3", "aff": "University of Science and Technology of China;;;Microsoft Research Asia;Tsinghua University;University of Science and Technology of China;Microsoft Research Asia", "aff_domain": "ustc.edu.cn;;;microsoft.com;tsinghua.edu.cn;ustc.edu.cn;microsoft.com", "position": "PhD student;;;Principal Researcher;PhD student;Full Professor;Senior Principal Researcher", "bibtex": "@misc{\nyi2023robust,\ntitle={Robust Quantity-Aware Aggregation for Federated Learning},\nauthor={Jingwei Yi and Fangzhao Wu and Huishuai Zhang and Bin Benjamin Zhu and Tao Qi and Guangzhong Sun and Xing Xie},\nyear={2023},\nurl={https://openreview.net/forum?id=OUV0Fh5Lgm2}\n}", "github": "", "project": "", "reviewers": "MrgL;GJR1;ZGtU", "site": "https://openreview.net/forum?id=OUV0Fh5Lgm2", "pdf_size": 569916, "recommendation": "3;5;5", "confidence": "5;4;3", "correctness": "2;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "104;48;100", "wc_strength_and_weaknesses": "520;187;247", "wc_clarity_quality_novelty_and_reproducibility": "18;15;70", "wc_summary_review": "39;31;10", "wc_review": "681;281;427", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 84.0, 25.508168626278653 ], "wc_strength_and_weaknesses_avg": [ 318.0, 144.9206679531943 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.333333333333336, 25.249862485874168 ], "wc_summary_review_avg": [ 26.666666666666668, 12.229290885229428 ], "wc_review_avg": [ 463.0, 165.2714938114455 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.8660254037844385, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6294999154508762399&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;0;1", "aff_unique_norm": "University of Science and Technology of China;Microsoft;Tsinghua University", "aff_unique_dep": ";Research;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.microsoft.com/en-us/research/group/asia;https://www.tsinghua.edu.cn", "aff_unique_abbr": "USTC;MSR Asia;THU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "OVbY-QCCjAh", "title": "SAGE: Semantic-Aware Global Explanations for Named Entity Recognition", "track": "main", "status": "Reject", "tldr": "", "abstract": "In the last decades, deep learning approaches achieved impressive results in many research fields, \nsuch as Computer Vision and Natural Language Processing (NLP). \nNLP in particular has greatly benefit from unsupervised methods that allow to learn distributed representation of language.\nOn the race for better performances Language Models have reached hundred of billions parameters nowadays. \nDespite the remarkable results, deep models are still far from being fully exploited in real world applications. \nIndeed, these approaches are black-boxes, i.e. they are not interpretable by design nor explainable, which is often crucial to make decisions in business. \nSeveral task-agnostic methods have been proposed in literature to explain models' decisions. \nMost techniques rely on the \"local\" assumption, i.e. explanations are made example-wise.\nIn this paper instead, we present a post-hoc method to produce highly interpretable global rules to explain NLP classifiers. \nRules are extracted with a data mining approach on a semantically enriched input representation, instead of using words/wordpieces solely. \nSemantic information yields more abstract and general rules that are both more explanatory and less complex, while being also better at reflecting the model behaviour.\nIn the experiments we focus on Named Entity Recognition, an NLP task where explainability is under-investigated. \nWe explain the predictions of BERT NER classifiers trained on two popular benchmarks, CoNLL03 and Ontonotes, and compare our model against LIME.", "keywords": "Explainable AI;Named Entity Recognition;Language Models;Natural Language Processing", "primary_area": "", "supplementary_material": "", "author": "Andrea Zugarini;Leonardo Rigutini", "authorids": "~Andrea_Zugarini1;~Leonardo_Rigutini1", "gender": "M;M", "homepage": ";", "dblp": "198/0918;05/345", "google_scholar": "leSVEswAAAAJ;https://scholar.google.it/citations?user=O6eBVrEAAAAJ", "orcid": ";0000-0002-6309-2542", "linkedin": "https://it.linkedin.com/in/andrea-zugarini-930a8898;rigutini/", "or_profile": "~Andrea_Zugarini1;~Leonardo_Rigutini1", "aff": "Expert.ai Srl;University of Siena", "aff_domain": "expert.ai;unisi.it", "position": "Researcher;Lecturer", "bibtex": "@misc{\nzugarini2023sage,\ntitle={{SAGE}: Semantic-Aware Global Explanations for Named Entity Recognition},\nauthor={Andrea Zugarini and Leonardo Rigutini},\nyear={2023},\nurl={https://openreview.net/forum?id=OVbY-QCCjAh}\n}", "github": "", "project": "", "reviewers": "rFXT;4Q6v;3Fgr;RoTP;WCcH", "site": "https://openreview.net/forum?id=OVbY-QCCjAh", "pdf_size": 501857, "recommendation": "3;3;3;5;6", "confidence": "3;2;4;4;4", "correctness": "1;2;3;4;4", "technical_novelty": "2;3;2;3;3", "empirical_novelty": "1;3;0;3;3", "wc_summary_paper": "30;26;139;175;68", "wc_strength_and_weaknesses": "69;245;171;263;202", "wc_clarity_quality_novelty_and_reproducibility": "77;158;114;79;40", "wc_summary_review": "33;59;46;42;56", "wc_review": "209;488;470;559;366", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "104;128;657;35;107", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 4.0, 1.2649110640673518 ], "confidence_avg": [ 3.4, 0.8 ], "correctness_avg": [ 2.8, 1.16619037896906 ], "technical_novelty_avg": [ 2.6, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.0, 1.2649110640673518 ], "wc_summary_paper_avg": [ 87.6, 59.62751042933119 ], "wc_strength_and_weaknesses_avg": [ 190.0, 68.55654600401044 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 93.6, 39.81256083197864 ], "wc_summary_review_avg": [ 47.2, 9.453041838477178 ], "wc_review_avg": [ 418.4, 121.54768611536791 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 206.2, 227.56572676921274 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.5929270612815711, "corr_recommendation_correctness": 0.8134892168199606, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14741008841253618410&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Expert.ai;University of Siena", "aff_unique_dep": ";", "aff_unique_url": "https://www.expert.ai;https://www.unisi.it", "aff_unique_abbr": "Expert.ai;UniSi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Italy" }, { "title": "Differentiable Gaussianization Layers for Inverse Problems Regularized by Deep Generative Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12088", "id": "OXP9Ns0gnIq", "poster": "", "openreview": "https://openreview.net/forum?id=OXP9Ns0gnIq", "slides": "https://iclr.cc/virtual/2023/poster/12088", "video": "https://iclr.cc/virtual/2023/poster/12088", "tldr": "", "abstract": "Deep generative models such as GANs, normalizing flows, and diffusion models are powerful regularizers for inverse problems. They exhibit great potential for helping reduce ill-posedness and attain high-quality results. However, the latent tensors of such deep generative models can fall out of the desired high-dimensional standard Gaussian distribution during inversion, particularly in the presence of data noise and inaccurate forward models, leading to low-fidelity solutions. To address this issue, we propose to reparameterize and Gaussianize the latent tensors using novel differentiable data-dependent layers wherein custom operators are defined by solving optimization problems. These proposed layers constrain inverse problems to obtain high-fidelity in-distribution solutions. We validate our technique on three inversion tasks: compressive-sensing MRI, image deblurring, and eikonal tomography (a nonlinear PDE-constrained inverse problem) using two representative deep generative models: StyleGAN2 and Glow. Our approach achieves state-of-the-art performance in terms of accuracy and consistency.", "keywords": "Deep generative models;inverse problems;Gaussianization", "primary_area": "", "supplementary_material": "", "author": "Dongzhuo Li", "authorids": "~Dongzhuo_Li1", "gender": "", "homepage": "", "dblp": "", "google_scholar": "https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-2341-1553", "linkedin": "dongzhuo-li-b06153ab", "or_profile": "~Dongzhuo_Li1", "aff": "Meta", "aff_domain": "meta.com", "position": "Machine Learning Engineer", "bibtex": "@inproceedings{\nli2023differentiable,\ntitle={Differentiable Gaussianization Layers for Inverse Problems Regularized by Deep Generative Models},\nauthor={Dongzhuo Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=OXP9Ns0gnIq}\n}", "github": "", "project": "", "reviewers": "k6hw;VNJA;NCs6;YtuR", "pdf_size": 8418457, "recommendation": "5;6;6;8", "confidence": "4;4;3;3", "correctness": "3;3;4;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "40;130;142;116", "wc_strength_and_weaknesses": "234;648;184;123", "wc_clarity_quality_novelty_and_reproducibility": "170;121;45;19", "wc_summary_review": "102;32;74;14", "wc_review": "546;931;445;272", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1141;1557;312;94", "reply_reviewers": "0;0;0;0", "reply_authors": "2;4;1;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 107.0, 39.761790704142086 ], "wc_strength_and_weaknesses_avg": [ 297.25, 206.28545149864544 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 88.75, 60.04321360486962 ], "wc_summary_review_avg": [ 55.5, 34.565155865408734 ], "wc_review_avg": [ 548.5, 241.59728889207346 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 776.0, 596.5748067091 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": -0.6882472016116854, "corr_recommendation_correctness": 0.6882472016116854, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15429396934206956306&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=OXP9Ns0gnIq", "email": "meta.com", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "OYKIo3ySkxA", "title": "DIGEST: FAST AND COMMUNICATION EFFICIENT DECENTRALIZED LEARNING WITH LOCAL UPDATES", "track": "main", "status": "Reject", "tldr": "", "abstract": "Decentralized learning advocates the elimination of centralized parameter servers\n(aggregation points) for potentially better utilization of underlying resources, de-\nlay reduction, and resiliency against parameter server unavailability and catas-\ntrophic failures. Gossip based decentralized algorithms, where each node in a net-\nwork has its own locally kept model on which it effectuates the learning by talking\nto its neighbors, received a lot of attention recently. Despite their potential, Gossip\nalgorithms introduce huge communication costs. In this work, we show that nodes\ndo not need to communicate as frequently as in Gossip for fast convergence; in\nfact, a sporadic exchange of a digest of a trained model is sufficient. Thus, we\ndesign a fast and communication-efficient decentralized learning mechanism; DI-\nGEST by particularly focusing on stochastic gradient descent (SGD). DIGEST is\na decentralized algorithm building on local-SGD algorithms, which are originally\ndesigned for communication efficient centralized learning. We show through anal-\nysis and experiments that DIGEST significantly reduces the communication cost\nwithout hurting convergence time for both iid and non-iid data.", "keywords": "Decentralized Learning;Distributed Optimization;Communication Efficient Learning;Local SGD;Federated Learning", "primary_area": "", "supplementary_material": "", "author": "Peyman Gholami;Hulya Seferoglu", "authorids": "~Peyman_Gholami1;~Hulya_Seferoglu1", "gender": "M;F", "homepage": ";https://sites.google.com/uic.edu/nrl/home", "dblp": ";27/825", "google_scholar": "mjfYEY8AAAAJ;3hy76zkAAAAJ", "orcid": "0009-0005-0604-2038;", "linkedin": "peyman-gholami-373049204;", "or_profile": "~Peyman_Gholami1;~Hulya_Seferoglu1", "aff": "University of Illinois at Chicago;", "aff_domain": "uic.edu;", "position": "PhD student;", "bibtex": "@misc{\ngholami2023digest,\ntitle={{DIGEST}: {FAST} {AND} {COMMUNICATION} {EFFICIENT} {DECENTRALIZED} {LEARNING} {WITH} {LOCAL} {UPDATES}},\nauthor={Peyman Gholami and Hulya Seferoglu},\nyear={2023},\nurl={https://openreview.net/forum?id=OYKIo3ySkxA}\n}", "github": "", "project": "", "reviewers": "yt5V;cXkn;j1sH", "site": "https://openreview.net/forum?id=OYKIo3ySkxA", "pdf_size": 1272206, "recommendation": "3;5;5", "confidence": "3;3;3", "correctness": "2;4;3", "technical_novelty": "3;2;3", "empirical_novelty": "0;2;3", "wc_summary_paper": "210;28;47", "wc_strength_and_weaknesses": "539;38;138", "wc_clarity_quality_novelty_and_reproducibility": "57;10;75", "wc_summary_review": "86;39;25", "wc_review": "892;115;285", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1336;318;729", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 95.0, 81.68639217560454 ], "wc_strength_and_weaknesses_avg": [ 238.33333333333334, 216.48761832698165 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.333333333333336, 27.402351886086148 ], "wc_summary_review_avg": [ 50.0, 26.08958923913266 ], "wc_review_avg": [ 430.6666666666667, 333.5129516059142 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 794.3333333333334, 418.1565363460063 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8660254037844385, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5133090184804135753&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0", "aff_unique_norm": "University of Illinois at Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uic.edu", "aff_unique_abbr": "UIC", "aff_campus_unique_index": "0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "OZG9yDOz0b", "title": "Do Spiking Neural Networks Learn Similar Representation with Artificial Neural Networks? A Pilot Study on SNN Representation", "track": "main", "status": "Withdraw", "tldr": "Systematic study on the representation difference between ANNs and SNNs are conducted in this work. ", "abstract": "Spiking Neural Networks (SNNs) have recently driven much research interest owing to their bio-plausibility and energy efficiency. The biomimicry spatial-temporal communication and computation mechanisms are the key differences that set SNNs apart from current Artificial Neural Networks (ANNs). However, some essential questions exist pertaining to SNNs and yet are little studied: Do SNNs learn similar representation with ANN? Does the time dimension in spiking neurons provide additional information? In this paper, we aim to answer these questions by conducting a representation similarity analysis between SNNs and ANNs using Centered Kernel Alignment~(CKA). We start by analyzing the spatial dimension of the networks, including both the width and the depth. Furthermore, our analysis of residual connection shows that SNN learns a periodic pattern, which rectifies the representations in SNN to ANN-like. We additionally investigate the effect of the time dimension on SNN representation, finding that deeper layers encourage more dynamics along the time dimension. Other aspects like potential improvement in terms of accuracy, efficiency, and adversarial robustness are also analyzed using CKA. We hope this work will inspire future research to fully comprehend the representation of SNNs.", "keywords": "Spiking Neural Networks;Artificial Neural Network;Representation Similarity Analysis", "primary_area": "", "supplementary_material": "", "author": "Yuhang Li;Youngeun Kim;Hyoungseob Park;Priyadarshini Panda", "authorids": "~Yuhang_Li1;~Youngeun_Kim1;~Hyoungseob_Park1;~Priyadarshini_Panda1", "gender": "M;M;M;F", "homepage": ";https://youngryan1993.github.io/homepage/;;https://intelligentcomputinglab.yale.edu/", "dblp": ";58/2943;268/8125;168/8446", "google_scholar": "3UzXL-AAAAAJ;bh5Ve0EAAAAJ;A3c4pHkAAAAJ;qA5WsYUAAAAJ", "orcid": ";;0000-0003-0787-2082;", "linkedin": ";youngeun-kim-3b97b6179/;;", "or_profile": "~Yuhang_Li1;~Youngeun_Kim1;~Hyoungseob_Park1;~Priyadarshini_Panda1", "aff": "Yale University;Yale University;Yale University;Yale University", "aff_domain": "yale.edu;yale.edu;yale.edu;yale.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nli2023do,\ntitle={Do Spiking Neural Networks Learn Similar Representation with Artificial Neural Networks? A Pilot Study on {SNN} Representation},\nauthor={Yuhang Li and Youngeun Kim and Hyoungseob Park and Priyadarshini Panda},\nyear={2023},\nurl={https://openreview.net/forum?id=OZG9yDOz0b}\n}", "github": "", "project": "", "reviewers": "6F57;XB5C;HUUX;GsS5", "site": "https://openreview.net/forum?id=OZG9yDOz0b", "pdf_size": 3022542, "recommendation": "3;3;3;6", "confidence": "4;4;3;5", "correctness": "2;2;3;3", "technical_novelty": "3;2;2;4", "empirical_novelty": "2;2;3;0", "wc_summary_paper": "37;51;82;41", "wc_strength_and_weaknesses": "413;83;189;172", "wc_clarity_quality_novelty_and_reproducibility": "32;34;65;9", "wc_summary_review": "44;12;18;19", "wc_review": "526;180;354;241", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 52.75, 17.640507362318125 ], "wc_strength_and_weaknesses_avg": [ 214.25, 121.60463601359942 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.0, 19.912307751739878 ], "wc_summary_review_avg": [ 23.25, 12.275483697190918 ], "wc_review_avg": [ 325.25, 131.64606906398686 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.816496580927726, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LT93aiNJtQAJ:scholar.google.com/&scioq=Do+Spiking+Neural+Networks+Learn+Similar+Representation+with+Artificial+Neural+Networks%3F+A+Pilot+Study+on+SNN+Representation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Yale University", "aff_unique_dep": "", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "O_er9uNktN", "title": "Cross-utterance Conditioned Coherent Speech Editing via Biased Training and Entire Inference", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Text-based speech editing systems are developed to enable users to select, cut, copy and paste speech based on the transcript. Existing state-of-art editing systems based on neural networks do partial inferences with no exception, that is, only generate new words that need to be replaced or inserted. This manner usually leads to the prosody of the edited part being inconsistent with the previous and subsequent speech and the failure to handle the alteration of intonation. To address these problems, we propose a cross-utterance conditioned coherent speech editing system, which first does the entire reasoning at inference time. Benefiting from a cross-utterance conditioned variational autoencoder, our proposed system can forge speech by utilizing speaker information, context and acoustic features, and the mel-spectrogram of unedited fragments from the original audio. Also, we apply biased training to concentrate more attention on the part that needs to be reconstructed throughout training. Experiments conducted on subjective and objective metrics demonstrate that our approach outperforms the partial inference method on various editing operations regarding naturalness and prosody consistency.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Cheng Yu;Yang Li;Weiqin Zu;Fanglei Sun;Zheng Tian;Jun Wang", "authorids": "~Cheng_Yu4;~Yang_Li40;~Weiqin_Zu1;~Fanglei_Sun1;~Zheng_Tian1;~Jun_Wang2", "gender": ";M;M;F;M;M", "homepage": ";https://liyang.page;https://github.com/ZUWQ;;;http://www0.cs.ucl.ac.uk/staff/jun.wang/", "dblp": ";;;;17/2752-2.html;w/JunWang12", "google_scholar": ";msAmwaoAAAAJ;l7Fo9XoAAAAJ;VugNoHkAAAAJ;;https://scholar.google.co.uk/citations?user=wIE1tY4AAAAJ", "orcid": ";;0009-0002-9745-4457;;;", "linkedin": ";;;;;", "or_profile": "~Cheng_Yu4;~Yang_Li40;~Weiqin_Zu1;~Fanglei_Sun1;~Zheng_Tian1;~Jun_Wang2", "aff": ";University of Manchester;ShanghaiTech University;ShanghaiTech;ShanghaiTech University;University College London", "aff_domain": ";cs.manchester.ac.uk;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;ucl.ac.uk", "position": ";PhD student;MS student;Researcher;Assistant Professor;Professor", "bibtex": "@misc{\nyu2023crossutterance,\ntitle={Cross-utterance Conditioned Coherent Speech Editing via Biased Training and Entire Inference},\nauthor={Cheng Yu and Yang Li and Weiqin Zu and Fanglei Sun and Zheng Tian and Jun Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=O_er9uNktN}\n}", "github": "", "project": "", "reviewers": "btJ9;nbvi;3AaJ;zx83", "site": "https://openreview.net/forum?id=O_er9uNktN", "pdf_size": 1160402, "recommendation": "3;5;6;8", "confidence": "3;3;4;4", "correctness": "4;3;4;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "30;161;22;52", "wc_strength_and_weaknesses": "51;144;42;46", "wc_clarity_quality_novelty_and_reproducibility": "38;217;20;32", "wc_summary_review": "134;83;30;48", "wc_review": "253;605;114;178", "wc_reply_reviewers": "396;0;0;0", "wc_reply_authors": "646;786;490;538", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 66.25, 55.7959451931769 ], "wc_strength_and_weaknesses_avg": [ 70.75, 42.41093608964556 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 76.75, 81.23230576562504 ], "wc_summary_review_avg": [ 73.75, 39.663427738913335 ], "wc_review_avg": [ 287.5, 189.79528445143202 ], "wc_reply_reviewers_avg": [ 99.0, 171.47302994931886 ], "wc_reply_authors_avg": [ 615.0, 113.74972527439353 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.8320502943378437, "corr_recommendation_correctness": -0.5547001962252291, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7sws_mU-2CsJ:scholar.google.com/&scioq=Cross-utterance+Conditioned+Coherent+Speech+Editing+via+Biased+Training+and+Entire+Inference&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "University of Manchester;ShanghaiTech University;University College London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.manchester.ac.uk;https://www.shanghaitech.edu.cn;https://www.ucl.ac.uk", "aff_unique_abbr": "UoM;ShanghaiTech;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "United Kingdom;China" }, { "title": "Structure by Architecture: Structured Representations without Regularization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11196", "id": "O_lFCPaF48t", "poster": "", "openreview": "https://openreview.net/forum?id=O_lFCPaF48t", "slides": "https://iclr.cc/virtual/2023/poster/11196", "video": "https://iclr.cc/virtual/2023/poster/11196", "author_site": "Felix Leeb, Giulia Lanzillotta, Yashas Annadani, michel besserve, Stefan Bauer, Bernhard Schoelkopf", "tldr": "A novel autoencoder architecture to structure the learned representation without regularizing the objective and improve sampling for generative modeling.", "abstract": "We study the problem of self-supervised structured representation learning using autoencoders for downstream tasks such as generative modeling. Unlike most methods which rely on matching an arbitrary, relatively unstructured, prior distribution for sampling, we propose a sampling technique that relies solely on the independence of latent variables, thereby avoiding the trade-off between reconstruction quality and generative performance typically observed in VAEs. We design a novel autoencoder architecture capable of learning a structured representation without the need for aggressive regularization. Our structural decoders learn a hierarchy of latent variables, thereby ordering the information without any additional regularization or supervision. We demonstrate how these models learn a representation that improves results in a variety of downstream tasks including generation, disentanglement, and extrapolation using several challenging and natural image datasets.", "keywords": "Autoencoder;Structure;Generative;Architecture;Disentanglement;Regularization;Hybridization", "primary_area": "", "supplementary_material": "/attachment/9ead92e3a55b64f801f7b886a53b4eb47c22e234.zip", "author": "Felix Leeb;Giulia Lanzillotta;Yashas Annadani;Michel Besserve;Stefan Bauer;Bernhard Sch\u00f6lkopf", "authorids": "~Felix_Leeb1;~Giulia_Lanzillotta1;~Yashas_Annadani1;~Michel_Besserve1;~Stefan_Bauer1;~Bernhard_Sch\u00f6lkopf1", "gender": ";F;;M;;", "homepage": "https://ei.is.mpg.de/person/fleeb;;https://yashasannadani.com;https://computational-homeostasis.com;https://cifar.ca/bios/stefan-bauer/;", "dblp": ";;190/7411;71/511;;", "google_scholar": ";eiB9OOkAAAAJ;ExgzcVMAAAAJ;https://scholar.google.de/citations?user=Nbq6kI0AAAAJ;O-oICE8AAAAJ;", "orcid": ";0009-0008-2047-8251;;;;", "linkedin": ";giulia-lanzillotta-0aab3186/;;;;", "or_profile": "~Felix_Leeb1;~Giulia_Lanzillotta1;~Yashas_Annadani1;~Michel_Besserve1;~Stefan_Bauer1;~Bernhard_Sch\u00f6lkopf1", "aff": "Max Planck Institute for Intelligent Systems, Max-Planck Institute;Swiss Federal Institute of Technology;KTH Royal Institute of Technology;MPI for Intelligent Systems;KTH Royal Institute of Technology;", "aff_domain": "tuebingen.mpg.de;ethz.ch;kth.se;tuebingen.mpg.de;kth.se;", "position": "PhD student;PhD student;PhD student;Senior research scientist;Assistant Professor;", "bibtex": "@inproceedings{\nleeb2023structure,\ntitle={Structure by Architecture: Structured Representations without Regularization},\nauthor={Felix Leeb and Giulia Lanzillotta and Yashas Annadani and Michel Besserve and Stefan Bauer and Bernhard Sch{\\\"o}lkopf},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=O_lFCPaF48t}\n}", "github": "", "project": "", "reviewers": "ECfH;hCAA;X8MV;HNCN", "pdf_size": 28387337, "recommendation": "6;6;6;8", "confidence": "4;3;3;3", "correctness": "3;3;2;4", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "213;59;114;60", "wc_strength_and_weaknesses": "346;133;55;128", "wc_clarity_quality_novelty_and_reproducibility": "38;23;52;48", "wc_summary_review": "66;36;126;19", "wc_review": "663;251;347;255", "wc_reply_reviewers": "22;8;119;0", "wc_reply_authors": "544;344;616;180", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 111.5, 62.68372994645421 ], "wc_strength_and_weaknesses_avg": [ 165.5, 108.68877586945213 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.25, 11.188722000300123 ], "wc_summary_review_avg": [ 61.75, 40.73312534044006 ], "wc_review_avg": [ 379.0, 168.40427548016706 ], "wc_reply_reviewers_avg": [ 37.25, 47.8506792010312 ], "wc_reply_authors_avg": [ 421.0, 171.1461363864227 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8105003920899412580&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=O_lFCPaF48t", "email": "tuebingen.mpg.de;ethz.ch;kth.se;tuebingen.mpg.de;kth.se;", "author_num": 6, "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;Swiss Federal Institute of Technology;KTH Royal Institute of Technology", "aff_unique_dep": "Intelligent Systems;;", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.ethz.ch;https://www.kth.se", "aff_unique_abbr": "MPI-IS;ETH Zurich;KTH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;2", "aff_country_unique": "Germany;Switzerland;Sweden" }, { "id": "O_m1c-A5w6w", "title": "StepGCN: Step-oriented Graph Convolutional Networks in Representation Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Graph Convolutional Networks (GCNs) are employed to address a number of tasks in our society with their representation learning approach. Nonetheless, despite their effectiveness and usefulness, the majority of GCN-oriented approaches have an over-smoothing concern. Over-smoothing is the problem of node representations converging into a certain value, making the nodes indistinguishable. To effectively address the over-smoothing problem, we introduce StepGCN, a GCN model that integrates step learning techniques with graph residual connection networks. With our StepGCN, we achieved significant performance improvements in multiple representation learning benchmark datasets, and demonstrate that step learning can be expanded to other graph networks. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Juyeob Lee;Minyoung Lee;Eunil Park", "authorids": "jjuyeah.lee@gmail.com;eda2@naver.com;~Eunil_Park1", "gender": ";;M", "homepage": ";;http://eunilpark.com", "dblp": ";;56/9255", "google_scholar": ";;https://scholar.google.com/citations?hl=ko", "orcid": ";;", "linkedin": ";;", "or_profile": "jjuyeah.lee@gmail.com;eda2@naver.com;~Eunil_Park1", "aff": ";;Sungkyunkwan University", "aff_domain": ";;skku.edu", "position": ";;Associate Professor", "bibtex": "@misc{\nlee2023stepgcn,\ntitle={Step{GCN}: Step-oriented Graph Convolutional Networks in Representation Learning},\nauthor={Juyeob Lee and Minyoung Lee and Eunil Park},\nyear={2023},\nurl={https://openreview.net/forum?id=O_m1c-A5w6w}\n}", "github": "", "project": "", "reviewers": "uCMz;HaaT;uiE4", "site": "https://openreview.net/forum?id=O_m1c-A5w6w", "pdf_size": 1421778, "recommendation": "3;3;3", "confidence": "3;5;5", "correctness": "2;3;4", "technical_novelty": "3;4;2", "empirical_novelty": "0;4;2", "wc_summary_paper": "19;114;37", "wc_strength_and_weaknesses": "380;125;211", "wc_clarity_quality_novelty_and_reproducibility": "38;24;4", "wc_summary_review": "42;14;4", "wc_review": "479;277;256", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.0, 1.632993161855452 ], "wc_summary_paper_avg": [ 56.666666666666664, 41.20140234938072 ], "wc_strength_and_weaknesses_avg": [ 238.66666666666666, 105.92555037488462 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 22.0, 13.9522996909709 ], "wc_summary_review_avg": [ 20.0, 16.08311744241976 ], "wc_review_avg": [ 337.3333333333333, 100.53965497365813 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IsKXLmPcoKwJ:scholar.google.com/&scioq=StepGCN:+Step-oriented+Graph+Convolutional+Networks+in+Representation+Learning&hl=en&as_sdt=0,24", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Sungkyunkwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.skku.edu", "aff_unique_abbr": "SKKU", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "id": "ObWiIiKihBf", "title": "Boomerang: Local sampling on image manifolds using diffusion models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Diffusion models can be viewed as mapping points in a high-dimensional latent space onto a low-dimensional learned manifold, typically an image manifold. The intermediate values between the latent space and image manifold can be interpreted as noisy images which are determined by the noise scheduling scheme employed during pre-training. We exploit this interpretation to introduce Boomerang, a local image manifold sampling approach using the dynamics of diffusion models. We call it Boomerang because we first add noise to an input image, moving it closer to the latent space, then bring it back to the image space through diffusion dynamics. We use this method to generate images which are similar, but nonidentical, to the original input images on the image manifold. We are able to set how close the generated image is to the original based on how much noise we add. Additionally, the generated images have a degree of stochasticity, allowing us to locally sample as many times as we want without repetition. We show three applications for which Boomerang can be used. First, we provide a framework for constructing privacy-preserving datasets having controllable degrees of anonymity. Second, we show how to use Boomerang for data augmentation while staying on the image manifold. Third, we introduce a framework for image super-resolution with 8x upsampling. Boomerang does not require any modification to the training of diffusion models and can be used with pretrained models on a single, inexpensive GPU.", "keywords": "Diffusion models;local sampling;image manifolds", "primary_area": "", "supplementary_material": "/attachment/d2258699a4e2ac95e4ade5a2a59778da5aa31436.zip", "author": "Lorenzo Luzi;Ali Siahkoohi;Paul M Mayer;Josue Casco-Rodriguez;Richard Baraniuk", "authorids": "~Lorenzo_Luzi1;~Ali_Siahkoohi1;~Paul_M_Mayer1;jc135@rice.edu;~Richard_Baraniuk1", "gender": "M;M;M;;", "homepage": ";https://alisiahkoohi.github.io;http://www.djmyr.com;;http://richb.rice.edu/", "dblp": ";00/10956;;;32/2804", "google_scholar": "https://scholar.google.com/citations?hl=en;sxRMqYIAAAAJ;;;https://scholar.google.com.tw/citations?user=N-BBA20AAAAJ", "orcid": ";0000-0001-8779-2247;;;", "linkedin": ";alisiahkoohi/;;;richard-baraniuk", "or_profile": "~Lorenzo_Luzi1;~Ali_Siahkoohi1;~Paul_M_Mayer1;jc135@rice.edu;~Richard_Baraniuk1", "aff": "Rice University;Rice University;;;William Marsh Rice University", "aff_domain": "rice.edu;rice.edu;;;rice.edu", "position": "PhD student;Postdoc;;;C. Sidney Burrus Professor", "bibtex": "@misc{\nluzi2023boomerang,\ntitle={Boomerang: Local sampling on image manifolds using diffusion models},\nauthor={Lorenzo Luzi and Ali Siahkoohi and Paul M Mayer and Josue Casco-Rodriguez and Richard Baraniuk},\nyear={2023},\nurl={https://openreview.net/forum?id=ObWiIiKihBf}\n}", "github": "", "project": "", "reviewers": "z9Wv;Wv4p;qWcF;ydwA", "site": "https://openreview.net/forum?id=ObWiIiKihBf", "pdf_size": 29723641, "recommendation": "3;3;3;8", "confidence": "4;4;4;3", "correctness": "3;3;2;4", "technical_novelty": "1;1;1;3", "empirical_novelty": "1;1;2;3", "wc_summary_paper": "48;42;64;109", "wc_strength_and_weaknesses": "74;166;222;67", "wc_clarity_quality_novelty_and_reproducibility": "52;13;36;49", "wc_summary_review": "63;24;23;32", "wc_review": "237;245;345;257", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "368;322;401;72", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.25, 2.165063509461097 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 1.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 1.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 65.75, 26.233328038966004 ], "wc_strength_and_weaknesses_avg": [ 132.25, 64.89366301881871 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.5, 15.370426148939398 ], "wc_summary_review_avg": [ 35.5, 16.25576820700886 ], "wc_review_avg": [ 271.0, 43.31281565541543 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 290.75, 129.3742149734637 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10360617629140579448&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Rice University", "aff_unique_dep": "", "aff_unique_url": "https://www.rice.edu", "aff_unique_abbr": "Rice", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "OboQ71j1Bn", "title": "Reduce, Reuse, Recycle: Compositional Generation with Energy-Based Diffusion Models and MCMC", "track": "main", "status": "Reject", "tldr": "We show how diffusion models can be composed together to create new models and demonstrate how to make them perform well at this task.", "abstract": "Since their introduction, diffusion models have quickly become the prevailing approach to generative modeling in many domains. They can be interpreted as learning the gradients of a time-varying sequence of log-probability density functions. This interpretation has motivated classifier-based and classifier-free guidance as methods for post-hoc control of diffusion models. In this work, we build upon these ideas using the score-based interpretation of diffusion models, and explore alternative ways to condition, modify, and reuse diffusion models for tasks involving compositional generation and guidance. In particular, we investigate why certain types of composition fail using current techniques and present a number of solutions. We conclude that the sampler (not the model) is responsible for this failure and propose new samplers, inspired by MCMC, which enable successful compositional generation. Further, we propose an energy-based parameterization of diffusion models which enables the use of new compositional operators and more sophisticated, Metropolis-corrected samplers. Intriguingly we find these samplers lead to notable improvements in compositional generation across a wide variety of problems such as classifier-guided ImageNet modeling and compositional text-to-image generation.", "keywords": "Generative models;diffusion models;compositional generation", "primary_area": "", "supplementary_material": "", "author": "Yilun Du;Conor Durkan;Robin Strudel;Joshua B. Tenenbaum;Sander Dieleman;Rob Fergus;Jascha Sohl-Dickstein;Arnaud Doucet;Will Sussman Grathwohl", "authorids": "~Yilun_Du1;~Conor_Durkan1;~Robin_Strudel1;~Joshua_B._Tenenbaum1;~Sander_Dieleman1;~Rob_Fergus1;~Jascha_Sohl-Dickstein2;~Arnaud_Doucet2;~Will_Sussman_Grathwohl2", "gender": ";M;M;;M;M;;M;M", "homepage": "https://yilundu.github.io;https://conormdurkan.github.io;https://rstrudel.github.io;;http://benanne.github.io/;http://cs.nyu.edu/fergus/;https://www.stats.ox.ac.uk/~doucet/;http://www.cs.toronto.edu/~wgrathwohl/;http://sohldickstein.com", "dblp": "204/4379;230/4513;238/0225;t/JoshuaBTenenbaum;https://dblp.org/pers/d/Dieleman:Sander.html;77/3763;68/1628;192/1565;51/7117", "google_scholar": ";xSR1VNYAAAAJ;5d3RVLIAAAAJ;;https://scholar.google.co.uk/citations?user=yNNIKJsAAAAJ;https://scholar.google.com.tw/citations?user=GgQ9GEkAAAAJ;W4SZGV8AAAAJ;;-3zYIjQAAAAJ", "orcid": ";;;;;;0000-0002-7662-419X;;", "linkedin": ";;;;;;;will-grathwohl-b44a383b/;", "or_profile": "~Yilun_Du1;~Conor_Durkan1;~Robin_Strudel1;~Joshua_B._Tenenbaum1;~Sander_Dieleman1;~Rob_Fergus1;~Arnaud_Doucet2;~Will_Sussman_Grathwohl2;~Jascha_Sohl-Dickstein1", "aff": "Massachusetts Institute of Technology;Google DeepMind;;Massachusetts Institute of Technology;Google DeepMind;Google;University of Oxford;Google DeepMind;Google", "aff_domain": "mit.edu;deepmind.com;;mit.edu;deepmind.com;google.com;ox.ac.uk;deepmind.com;google.com", "position": "PhD student;Research Scientist;;Professor;Research Scientist;Research scientist;Full Professor;Senior Research Scientist;Research Scientist", "bibtex": "@misc{\ndu2023reduce,\ntitle={Reduce, Reuse, Recycle: Compositional Generation with Energy-Based Diffusion Models and {MCMC}},\nauthor={Yilun Du and Conor Durkan and Robin Strudel and Joshua B. Tenenbaum and Sander Dieleman and Rob Fergus and Jascha Sohl-Dickstein and Arnaud Doucet and Will Sussman Grathwohl},\nyear={2023},\nurl={https://openreview.net/forum?id=OboQ71j1Bn}\n}", "github": "", "project": "", "reviewers": "ndtR;P5qB;5cCF;8B2n", "site": "https://openreview.net/forum?id=OboQ71j1Bn", "pdf_size": 14829045, "recommendation": "5;5;6;6", "confidence": "3;3;4;3", "correctness": "3;3;3;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "86;92;76;70", "wc_strength_and_weaknesses": "315;228;229;56", "wc_clarity_quality_novelty_and_reproducibility": "27;90;76;59", "wc_summary_review": "50;26;46;31", "wc_review": "478;436;427;216", "wc_reply_reviewers": "194;0;15;0", "wc_reply_authors": "1390;589;276;304", "reply_reviewers": "2;0;1;0", "reply_authors": "5;2;1;2", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 81.0, 8.54400374531753 ], "wc_strength_and_weaknesses_avg": [ 207.0, 94.06115032254283 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.0, 23.50531854708632 ], "wc_summary_review_avg": [ 38.25, 10.0093706095838 ], "wc_review_avg": [ 389.25, 101.86111868617976 ], "wc_reply_reviewers_avg": [ 52.25, 82.06818811208153 ], "wc_reply_authors_avg": [ 639.75, 450.136854189923 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 162, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2483186678951812118&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;0;1;1;2;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;Google;University of Oxford", "aff_unique_dep": ";Google DeepMind;", "aff_unique_url": "https://web.mit.edu;https://deepmind.com;https://www.ox.ac.uk", "aff_unique_abbr": "MIT;DeepMind;Oxford", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;1;0;1;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Critic Sequential Monte Carlo", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11987", "id": "ObtGcyKmwna", "poster": "/media/PosterPDFs/ICLR%202023/11987.png?t=1680986170.7060003", "openreview": "https://openreview.net/forum?id=ObtGcyKmwna", "slides": "https://iclr.cc/virtual/2023/poster/11987", "video": "https://iclr.cc/virtual/2023/poster/11987", "author_site": "Vasileios Lioutas, Jonathan Lavington, Justice Sefas, Matthew Niedoba, Yunpeng Liu, Berend Zwartsenberg, Setareh Dabiri, Frank Wood, Adam Scibior", "tldr": "We present a novel method called CriticSMC capable of being deployed in model-predictive planning and model-free online control cases within environments with hard constraints taking advantage of informative prior policies.", "abstract": "We introduce CriticSMC, a new algorithm for planning as inference built from a composition of sequential Monte Carlo with learned Soft-Q function heuristic factors. These heuristic factors, obtained from parametric approximations of the marginal likelihood ahead, more effectively guide SMC towards the desired target distribution, which is particularly helpful for planning in environments with hard constraints placed sparsely in time. Compared with previous work, we modify the placement of such heuristic factors, which allows us to cheaply propose and evaluate large numbers of putative action particles, greatly increasing inference and planning efficiency. CriticSMC is compatible with informative priors, whose density function need not be known, and can be used as a model-free control algorithm. Our experiments on collision avoidance in a high-dimensional simulated driving task show that CriticSMC significantly reduces collision rates at a low computational cost while maintaining realism and diversity of driving behaviors across vehicles and environment scenarios.", "keywords": "sequential monte carlo;reinforcement learning as inference;soft Q-learning;heuristic factors;driving behavior models", "primary_area": "", "supplementary_material": "/attachment/40e58b68354e9f20fd591786296c42fe903ea0cc.zip", "author": "Vasileios Lioutas;Jonathan Wilder Lavington;Justice Sefas;Matthew Niedoba;Yunpeng Liu;Berend Zwartsenberg;Setareh Dabiri;Frank Wood;Adam Scibior", "authorids": "~Vasileios_Lioutas1;~Jonathan_Wilder_Lavington1;~Justice_Sefas1;~Matthew_Niedoba2;~Yunpeng_Liu1;~Berend_Zwartsenberg1;~Setareh_Dabiri1;~Frank_Wood2;~Adam_Scibior1", "gender": "M;M;M;M;M;M;;M;", "homepage": "http://www.vlioutas.com/;https://wilderlavington.github.io/;;;;https://bzwartsenberg.github.io/;;http://www.robots.ox.ac.uk/~fwood/;https://www.cs.ubc.ca/~ascibior/", "dblp": "224/6571;282/4019;321/4203;243/2863;02/8137-7.html;;;44/4750;167/6446", "google_scholar": "2jhOrwoAAAAJ;;https://scholar.google.com/citations?hl%3Den=;uSl2vYwAAAAJ;;;;d4yNzXIAAAAJ;https://scholar.google.co.uk/citations?user=Gpw8Z0cAAAAJ", "orcid": ";;;;;;;;", "linkedin": "vasileioslioutas/;;;;larry-liu-323b51126/;;;frank-wood-43529114?trk=hp-identity-name;", "or_profile": "~Vasileios_Lioutas1;~Jonathan_Wilder_Lavington1;~Justice_Sefas1;~Matthew_Niedoba2;~Yunpeng_Liu1;~Berend_Zwartsenberg1;~Setareh_Dabiri1;~Frank_Wood2;~Adam_Scibior1", "aff": "University of British Columbia;;Inverted AI;Inverted AI;University of British Columbia;Inverted AI;;University of British Columbia;Inverted AI", "aff_domain": "ubc.ca;;inverted.ai;inverted.ai;cs.ubc.ca;inverted.ai;;cs.ubc.ca;inverted.ai", "position": "PhD student;;Researcher;Researcher;PhD student;Researcher;;Full Professor;Researcher", "bibtex": "@inproceedings{\nlioutas2023critic,\ntitle={Critic Sequential Monte Carlo},\nauthor={Vasileios Lioutas and Jonathan Wilder Lavington and Justice Sefas and Matthew Niedoba and Yunpeng Liu and Berend Zwartsenberg and Setareh Dabiri and Frank Wood and Adam Scibior},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=ObtGcyKmwna}\n}", "github": "", "project": "", "reviewers": "toeN;9uTg;qbn3;ViDn", "pdf_size": 7609464, "recommendation": "1;6;6;6", "confidence": "3;4;3;3", "correctness": "2;4;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "1;3;3;3", "wc_summary_paper": "56;45;104;168", "wc_strength_and_weaknesses": "102;204;513;141", "wc_clarity_quality_novelty_and_reproducibility": "40;232;12;129", "wc_summary_review": "203;62;40;23", "wc_review": "401;543;669;461", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "2036;676;1329;574", "reply_reviewers": "0;0;0;0", "reply_authors": "3;1;2;1", "recommendation_avg": [ 4.75, 2.165063509461097 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 93.25, 48.52512235945418 ], "wc_strength_and_weaknesses_avg": [ 240.0, 161.76371657451494 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 103.25, 85.97492366963753 ], "wc_summary_review_avg": [ 82.0, 71.21446482281532 ], "wc_review_avg": [ 518.5, 100.45272519946883 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1153.75, 585.9677358865418 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8982933911500310814&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=ObtGcyKmwna", "email": "ubc.ca;;inverted.ai;inverted.ai;cs.ubc.ca;inverted.ai;;cs.ubc.ca;inverted.ai", "author_num": 9, "aff_unique_index": "0;1;1;0;1;0;1", "aff_unique_norm": "University of British Columbia;Inverted AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.ubc.ca;https://www.inverted.ai", "aff_unique_abbr": "UBC;Inverted AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1;0;1", "aff_country_unique": "Canada;United States" }, { "title": "Reversible Column Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11922", "id": "Oc2vlWU0jFY", "poster": "/media/PosterPDFs/ICLR%202023/11922.png?t=1682272724.128075", "openreview": "https://openreview.net/forum?id=Oc2vlWU0jFY", "slides": "https://iclr.cc/virtual/2023/poster/11922", "video": "https://iclr.cc/virtual/2023/poster/11922", "author_site": "Yuxuan Cai, Yizhuang Zhou, Qi Han, Jianjian Sun, Xiangwen Kong, Jun Li, Xiangyu Zhang", "tldr": "", "abstract": "We propose a new neural network design paradigm Reversible Column Network (RevCol). The main body of RevCol is composed of multiple copies of subnetworks, named columns respectively, between which multi-level reversible connections are employed. Such architectural scheme attributes RevCol very different behavior from conventional networks: during forward propagation, features in RevCol are learned to be gradually disentangled when passing through each column, whose total information is maintained rather than compressed or discarded as other network does. Our experiments suggest that CNN-style RevCol models can achieve very competitive performances on multiple computer vision tasks such as image classification, object detection and semantic segmentation, especially with large parameter budget and large dataset. For example, after ImageNet-22K pre-training, RevCol-XL obtains 88.2% ImageNet-1K accuracy. Given more pre-training data, our largest model RevCol-H reaches 90.0% on ImageNet-1K, 63.8% AP$_{box}$ on COCO detection minival set, 61.0% mIoU on ADE20k segmentation. To our knowledge, it is the best COCO detection and ADE20k segmentation result among pure (static) CNN models. Moreover, as a general macro architecture fashion, RevCol can also be introduced into transformers or other neural networks, which is demonstrated to improve the performances in both computer vision and NLP tasks. \nWe release code and models at https://github.com/megvii-research/RevCol", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yuxuan Cai;Yizhuang Zhou;Qi Han;Jianjian Sun;Xiangwen Kong;Jun Li;Xiangyu Zhang", "authorids": "~Yuxuan_Cai1;~Yizhuang_Zhou1;~Qi_Han3;~Jianjian_Sun1;~Xiangwen_Kong1;lijun2021@ia.ac.cn;~Xiangyu_Zhang1", "gender": "M;M;M;M;;;M", "homepage": "https://nightsnack.github.io;;http://hanqer.github.io;;;;", "dblp": ";;76/5895;322/9274;;;95/3760-5.html", "google_scholar": "EzYiBeUAAAAJ;VRSGDDEAAAAJ;DuEUlAQAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?view_op=list_works;;yuB-cfoAAAAJ", "orcid": ";;;;;;0000-0003-2138-4608", "linkedin": ";;;;;;", "or_profile": "~Yuxuan_Cai1;~Yizhuang_Zhou1;~Qi_Han3;~Jianjian_Sun1;~Xiangwen_Kong1;lijun2021@ia.ac.cn;~Xiangyu_Zhang1", "aff": "Megvii Technology Inc.;Megvii Technology Inc.;Megvii Technology Inc.;Megvii Technology Inc.;MEGVII Technology;;MEGVII Technology", "aff_domain": "megvii.com;megvii.com;megvii.com;megvii.com;megvii.com;;megvii.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;;Principal Researcher", "bibtex": "@inproceedings{\ncai2023reversible,\ntitle={Reversible Column Networks},\nauthor={Yuxuan Cai and Yizhuang Zhou and Qi Han and Jianjian Sun and Xiangwen Kong and Jun Li and Xiangyu Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Oc2vlWU0jFY}\n}", "github": "", "project": "", "reviewers": "cXh3;BtZK;AtgT", "pdf_size": 1778160, "recommendation": "6;6;6", "confidence": "3;4;3", "correctness": "3;3;3", "technical_novelty": "3;2;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "94;55;55", "wc_strength_and_weaknesses": "184;66;231", "wc_clarity_quality_novelty_and_reproducibility": "79;13;54", "wc_summary_review": "55;22;23", "wc_review": "412;156;363", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1148;631;1166", "reply_reviewers": "0;0;0", "reply_authors": "2;1;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 68.0, 18.384776310850235 ], "wc_strength_and_weaknesses_avg": [ 160.33333333333334, 69.4086129781856 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.666666666666664, 27.207025236549146 ], "wc_summary_review_avg": [ 33.333333333333336, 15.326085243430198 ], "wc_review_avg": [ 310.3333333333333, 110.94843647188344 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 981.6666666666666, 248.06764310477 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 101, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5758858029948676340&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=Oc2vlWU0jFY", "email": "megvii.com;megvii.com;megvii.com;megvii.com;megvii.com;;megvii.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Megvii Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.megvii.com", "aff_unique_abbr": "Megvii", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "OcHQVmfLn2c", "title": "Prototypical Context-aware Dynamics Generalization for High-dimensional Model-based Reinforcement Learning", "track": "main", "status": "Withdraw", "tldr": "We present a prototypical Context-aware Dynamics (ProtoCAD) model to capture the local dynamics by time consistent latent context.", "abstract": "The latent world model provides a promising way to learn policies in a compact latent space for tasks with high-dimensional observations, however, its generalization across diverse environments with unseen dynamics remains challenging. Although the recurrent structure utilized in current advances helps to capture local dynamics, modeling only state transitions without an explicit understanding of environmental context limits the generalization ability of the dynamics model. To address this issue, we propose a Prototypical Context-aware Dynamics (ProtoCAD) model, which captures the local dynamics by time consistent latent context and enables dynamics generalization in high-dimensional control tasks. ProtoCAD extracts useful contextual information with the help of the prototypes clustered over batch and benefits model-based RL in two folds: 1) It utilizes a temporal consistent prototypical regularizer that encourages the prototype assignments produced for different time parts of the same latent trajectory to be temporal consistent instead of comparing the feature; 2) A context representation is designed which combines both the projection embedding of latent states and aggregated prototypes and can significantly improve the dynamics generalization ability. Extensive experiments show that ProtoCAD surpasses existing methods in terms of dynamics generalization. Compared with the recurrent-based model RSSM, ProtoCAD delivers 13.2% and 26.7% better mean and median performance across all dynamics generalization tasks.", "keywords": "model-based reinforcement learning;dynamics generalization;prototypical representation learning;latent world model", "primary_area": "", "supplementary_material": "", "author": "Junjie Wang;Yao Mu;Dong Li;qichao Zhang;Dongbin Zhao;Yuzheng Zhuang;Ping Luo;Bin Wang;Jianye HAO", "authorids": "~Junjie_Wang4;~Yao_Mu1;~Dong_Li10;~qichao_Zhang2;~Dongbin_Zhao1;~Yuzheng_Zhuang1;~Ping_Luo2;~Bin_Wang12;~Jianye_HAO1", "gender": "M;M;M;M;M;F;M;M;", "homepage": ";https://yaomarkmu.github.io/;;;http://people.ucas.ac.cn/~zhaodongbin?language=en;;http://binwang.top;http://www.icdai.org/jianye.html;http://luoping.me/", "dblp": ";260/0674;47/4826-16;;40/255;;13/1898-34;21/7664.html;54/4989-2.html", "google_scholar": "faRNMHoAAAAJ;;;snkECPAAAAAJ;;https://scholar.google.com/citations?hl=en;KWZG_YsAAAAJ;;https://scholar.google.com.hk/citations?hl=en", "orcid": ";;;0000-0001-9747-391X;0000-0001-8218-9633;;0000-0002-0267-3749;0000-0002-0422-8235;0000-0002-6685-7950", "linkedin": ";;;;;;;;", "or_profile": "~Junjie_Wang4;~Yao_Mu1;~Dong_Li10;~qichao_Zhang2;~Dongbin_Zhao1;~Yuzheng_Zhuang1;~Bin_Wang12;~Jianye_HAO1;~Luo_Ping2", "aff": "University of Chinese Academy of Sciences;The University of Hong Kong;Huawei Technologies Ltd.;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Huawei Technologies Ltd.;Huawei Noah's Ark Lab;Tianjin University;The University of Hong Kong", "aff_domain": "mails.ucas.edu.cn;hku.hk;huawei.com;ia.ac.cn;ia.ac.cn;huawei.com;huawei.com;tju.edu.cn;hku.hk", "position": "PhD student;PhD student;Principal Researcher;Associate Professor;Full Professor;Research Engineer;Senior Researcher;Associate Professor;Assistant Professor", "bibtex": "@misc{\nwang2023prototypical,\ntitle={Prototypical Context-aware Dynamics Generalization for High-dimensional Model-based Reinforcement Learning},\nauthor={Junjie Wang and Yao Mu and Dong Li and qichao Zhang and Dongbin Zhao and Yuzheng Zhuang and Ping Luo and Bin Wang and Jianye HAO},\nyear={2023},\nurl={https://openreview.net/forum?id=OcHQVmfLn2c}\n}", "github": "", "project": "", "reviewers": "NW8q;Z26D;degt;U5c9", "site": "https://openreview.net/forum?id=OcHQVmfLn2c", "pdf_size": 3811488, "recommendation": "3;3;5;5", "confidence": "4;4;3;5", "correctness": "2;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "80;39;48;55", "wc_strength_and_weaknesses": "283;162;682;221", "wc_clarity_quality_novelty_and_reproducibility": "177;21;82;108", "wc_summary_review": "70;43;74;9", "wc_review": "610;265;886;393", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "279;368;331;410", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 55.5, 15.239750654128171 ], "wc_strength_and_weaknesses_avg": [ 337.0, 203.72898664647602 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 97.0, 55.95087130688851 ], "wc_summary_review_avg": [ 49.0, 25.990382836734053 ], "wc_review_avg": [ 538.5, 235.5 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 347.0, 48.19232304008596 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10463891352480903636&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;3;3;2;2;4;1", "aff_unique_norm": "University of Chinese Academy of Sciences;University of Hong Kong;Huawei;Chinese Academy of Sciences;Tianjin University", "aff_unique_dep": ";;Huawei Technologies;Institute of Automation;", "aff_unique_url": "http://www.ucas.ac.cn;https://www.hku.hk;https://www.huawei.com;http://www.ia.cas.cn;http://www.tju.edu.cn", "aff_unique_abbr": "UCAS;HKU;Huawei;CAS;TJU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "OdZcJYT5Z4k", "title": "Generalized structure-aware missing view completion network for incomplete multi-view clustering", "track": "main", "status": "Withdraw", "tldr": "A general incomplete multi-view clustering framework via missing view completion and recurrent graph constraint.", "abstract": "In recent years, incomplete multi-view clustering has been widely regarded as a challenging problem. The missing views inevitably damage the effective information of the multi-view data itself. To date, existing methods for incomplete multi-view clustering usually bypass invalid views according to prior missing information, which is considered as a second-best scheme based on evasion. Other methods that attempt to recover missing information are mostly applicable to specific two-view datasets. To handle these problems, we design a general structure-aware missing view completion network (SMVC) for incomplete multi-view clustering. Concretely, we build a two-stage autoencoder network with the self-attention structure to synchronously extract high-level semantic representations of multiple views and recover the missing data. In addition, we develop a recurrent graph reconstruction mechanism that cleverly leverages the restored views to promote the representation learning and the further data reconstruction. Sufficient experimental results confirm that our SMVC has obvious advantages over other top methods.", "keywords": "Incomplete multi-view clustering;Missing view imputation;Representation learning;Deep neural network", "primary_area": "", "supplementary_material": "", "author": "Chengliang Liu;Jie Wen;Zhihao Wu;Xiaoling Luo;Chao Huang;Yong Xu", "authorids": "~Chengliang_Liu1;~Jie_Wen1;~Zhihao_Wu1;~Xiaoling_Luo2;~Chao_Huang6;~Yong_Xu9", "gender": ";;M;F;;M", "homepage": ";;;;;https://www.yongxu.org", "dblp": ";;27/8792-2;;;", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?hl=zh-CN;;https://scholar.google.com.hk/citations?user=zOVgYQYAAAAJ", "orcid": ";;0000-0002-2704-0614;0000-0003-3678-3185;;", "linkedin": ";;;;;", "or_profile": "~Chengliang_Liu1;~Jie_Wen1;~Zhihao_Wu1;~Xiaoling_Luo2;~Chao_Huang6;~Yong_Xu9", "aff": ";;Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology;;Harbin Institute of Technology", "aff_domain": ";;hit.edu.cn;hit.edu.cn;;hit.edu.cn", "position": ";;PhD student;PhD student;;Full Professor", "bibtex": "@misc{\nliu2023generalized,\ntitle={Generalized structure-aware missing view completion network for incomplete multi-view clustering},\nauthor={Chengliang Liu and Jie Wen and Zhihao Wu and Xiaoling Luo and Chao Huang and Yong Xu},\nyear={2023},\nurl={https://openreview.net/forum?id=OdZcJYT5Z4k}\n}", "github": "", "project": "", "reviewers": "3yps;Cu9o;4vaY;VN69", "site": "https://openreview.net/forum?id=OdZcJYT5Z4k", "pdf_size": 4798892, "recommendation": "6;8;8;8", "confidence": "4;4;5;5", "correctness": "3;4;4;3", "technical_novelty": "3;3;4;3", "empirical_novelty": "3;4;4;3", "wc_summary_paper": "64;59;72;50", "wc_strength_and_weaknesses": "166;113;241;209", "wc_clarity_quality_novelty_and_reproducibility": "33;15;40;38", "wc_summary_review": "25;46;41;30", "wc_review": "288;233;394;327", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 61.25, 7.980444849756184 ], "wc_strength_and_weaknesses_avg": [ 182.25, 48.027986632795674 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.5, 9.86154146165801 ], "wc_summary_review_avg": [ 35.5, 8.381527307120106 ], "wc_review_avg": [ 310.5, 58.645119148996535 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18271148260262610065&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://en.hhit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Shenzhen;Harbin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "OfaJyiYonBk", "title": "Iteratively Learning Novel Strategies with Diversity Measured in State Distances", "track": "main", "status": "Reject", "tldr": "We develop an iterative RL algorithm for discovering diverse high-reward strategies with provable convergence properties. ", "abstract": "In complex reinforcement learning (RL) problems, policies with similar rewards may have substantially different behaviors. Yet, to not only optimize rewards but also discover as many diverse strategies as possible remains a challenging problem. A natural approach to this task is constrained population-based training (PBT), which simultaneously learns a collection of policies subject to diversity constraints. However, due to the unaffordable computation cost of PBT, we adopt an alternative approach, iterative learning (IL), which repeatedly learns a single novel policy that is sufficiently different from previous ones. We first analyze these two frameworks and prove that, for any policy pool derived by PBT, we can always use IL to obtain another policy pool of the same rewards and competitive diversity scores. In addition, we also present a novel state-based diversity measure with two tractable realizations. Such a metric can impose a stronger and much smoother diversity constraint than existing action-based metrics. Combining IL and the state-based diversity measure, we develop a powerful diversity-driven RL algorithm, State-based Intrinsic-reward Policy Optimization (SIPO), with provable convergence properties. We empirically examine our algorithm in complex multi-agent environments including StarCraft Multi-Agent Challenge and Google Research Football. SIPO is able to consistently derive strategically diverse and human-interpretable policies that cannot be discovered by existing baselines.", "keywords": "diverse behavior;multi-agent reinforcement learning;deep reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/a967ca8dddbed738130933a2d5257d504320e44c.zip", "author": "Wei Fu;Weihua Du;Jingwei Li;Sunli Chen;Jingzhao Zhang;Yi Wu", "authorids": "~Wei_Fu1;~Weihua_Du1;~Jingwei_Li2;~Sunli_Chen1;~Jingzhao_Zhang2;~Yi_Wu1", "gender": "M;M;;M;M;M", "homepage": "https://garrett4wade.github.io/;https://stiglidu.github.io/;;https://eeeeeerickkk.github.io/;https://sites.google.com/view/jingzhao/home;https://jxwuyi.weebly.com", "dblp": ";229/1269;;359/3746;220/5559;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;;7LxAwLwAAAAJ;8NudxYsAAAAJ;dusV5HMAAAAJ", "orcid": ";0000-0002-8856-0277;;;;", "linkedin": ";;;;;", "or_profile": "~Wei_Fu1;~Weihua_Du1;~Jingwei_Li2;~Sunli_Chen1;~Jingzhao_Zhang2;~Yi_Wu1", "aff": "Institute for Interdisciplinary Information Sciences, Tsinghua University, Tsinghua University;Tsinghua University;;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;;tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Undergrad student;;Undergrad student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nfu2023iteratively,\ntitle={Iteratively Learning Novel Strategies with Diversity Measured in State Distances},\nauthor={Wei Fu and Weihua Du and Jingwei Li and Sunli Chen and Jingzhao Zhang and Yi Wu},\nyear={2023},\nurl={https://openreview.net/forum?id=OfaJyiYonBk}\n}", "github": "", "project": "", "reviewers": "GEJD;aXw1;nUUy", "site": "https://openreview.net/forum?id=OfaJyiYonBk", "pdf_size": 4881727, "recommendation": "5;6;6", "confidence": "5;4;3", "correctness": "2;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;2", "wc_summary_paper": "101;125;176", "wc_strength_and_weaknesses": "370;206;474", "wc_clarity_quality_novelty_and_reproducibility": "29;103;150", "wc_summary_review": "109;105;115", "wc_review": "609;539;915", "wc_reply_reviewers": "360;16;0", "wc_reply_authors": "2019;314;863", "reply_reviewers": "3;1;0", "reply_authors": "6;1;2", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 134.0, 31.272991542223778 ], "wc_strength_and_weaknesses_avg": [ 350.0, 110.32074449833388 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 94.0, 49.80629143659129 ], "wc_summary_review_avg": [ 109.66666666666667, 4.109609335312651 ], "wc_review_avg": [ 687.6666666666666, 163.26937523273887 ], "wc_reply_reviewers_avg": [ 125.33333333333333, 166.06290642069615 ], "wc_reply_authors_avg": [ 1065.3333333333333, 710.6149136878254 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 3.0, 2.160246899469287 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.8660254037844385, "corr_recommendation_correctness": 0.9999999999999997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4756990887126282461&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "Institute for Interdisciplinary Information Sciences", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "Tsinghua", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "TabCaps: A Capsule Neural Network for Tabular Data Classification with BoW Routing", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11829", "id": "OgbtSLESnI", "poster": "/media/PosterPDFs/ICLR%202023/11829.png?t=1682328298.2798023", "openreview": "https://openreview.net/forum?id=OgbtSLESnI", "slides": "https://iclr.cc/virtual/2023/poster/11829", "video": "https://iclr.cc/virtual/2023/poster/11829", "author_site": "Jintai Chen, KuanLun Liao, Yanwen Fang, Danny Chen, Jian Wu", "tldr": "We proposed a capsule neural network for tabular data classification.", "abstract": "Records in a table are represented by a collection of heterogeneous scalar features. Previous work often made predictions for records in a paradigm that processed each feature as an operating unit, which requires to well cope with the heterogeneity. In this paper, we propose to encapsulate all feature values of a record into vectorial features and process them collectively rather than have to deal with individual ones, which directly captures the representations at the data level and benefits robust performances. Specifically, we adopt the concept of \"capsules\" to organize features into vectorial features, and devise a novel capsule neural network called \"TabCaps\" to process the vectorial features for classification. In TabCaps, a record is encoded into several vectorial features by some optimizable multivariate Gaussian kernels in the primary capsule layer, where each vectorial feature represents a specific \"profile\" of the input record and is transformed into senior capsule layer under the guidance of a new straightforward routing algorithm. The design of routing algorithm is motivated by the Bag-of-Words (BoW) model, which performs capsule feature grouping straightforwardly and efficiently, in lieu of the computationally complex clustering of previous routing algorithms. Comprehensive experiments show that TabCaps achieves competitive and robust performances in tabular data classification tasks.", "keywords": "capsule neural network", "primary_area": "", "supplementary_material": "", "author": "Jintai Chen;KuanLun Liao;Yanwen Fang;Danny Chen;Jian Wu", "authorids": "~Jintai_Chen1;~KuanLun_Liao1;~Yanwen_Fang1;~Danny_Chen1;~Jian_Wu6", "gender": "M;M;F;Not Specified;M", "homepage": "https://whatashot.github.io/;;https://www.researchgate.net/profile/Yanwen-Fang-2;https://engineering.nd.edu/faculty/danny-chen/;https://scholar.google.com/citations?hl=zh-TW&user=VO9XIXYAAAAJ", "dblp": "249/3929;;;c/DannyZChen.html;96/2744-1", "google_scholar": "https://scholar.google.com/citations?hl=en;YN49RX4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;tRerdSIAAAAJ;https://scholar.google.com/citations?hl=zh-TW", "orcid": "0000-0002-3199-2597;;;0000-0001-6565-2884;", "linkedin": "jintai-chen-3a09921b0/;;;;", "or_profile": "~Jintai_Chen1;~KuanLun_Liao1;~Yanwen_Fang1;~Danny_Chen1;~Jian_Wu6", "aff": "Zhejiang University;Zhejiang University;The University of Hong Kong;University of Notre Dame, USA;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;hku.hk;nd.edu;zju.edu.cn", "position": "PhD student;MS student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023tabcaps,\ntitle={TabCaps: A Capsule Neural Network for Tabular Data Classification with BoW Routing},\nauthor={Jintai Chen and KuanLun Liao and Yanwen Fang and Danny Chen and Jian Wu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=OgbtSLESnI}\n}", "github": "", "project": "", "reviewers": "NLd7;sBG9;Zv1Z;NQgm", "pdf_size": 2160157, "recommendation": "5;6;6;8", "confidence": "2;2;4;4", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "31;67;80;180", "wc_strength_and_weaknesses": "191;132;144;191", "wc_clarity_quality_novelty_and_reproducibility": "73;8;60;146", "wc_summary_review": "24;49;34;64", "wc_review": "319;256;318;581", "wc_reply_reviewers": "0;45;0;33", "wc_reply_authors": "754;657;904;1096", "reply_reviewers": "0;2;0;1", "reply_authors": "2;3;2;4", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 1.0 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 89.5, 55.24717187331855 ], "wc_strength_and_weaknesses_avg": [ 164.5, 26.837473800639284 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 71.75, 49.286788290575394 ], "wc_summary_review_avg": [ 42.75, 15.155444566227676 ], "wc_review_avg": [ 368.5, 125.31260910219689 ], "wc_reply_reviewers_avg": [ 19.5, 19.956202043475106 ], "wc_reply_authors_avg": [ 852.75, 165.7307681150365 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.6882472016116854, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5681120184288605007&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=OgbtSLESnI", "email": "zju.edu.cn;zju.edu.cn;hku.hk;nd.edu;zju.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Zhejiang University;University of Hong Kong;University of Notre Dame", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.hku.hk;https://www.nd.edu", "aff_unique_abbr": "ZJU;HKU;Notre Dame", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "Ogh8umAChpo", "title": "Denoising MCMC for Accelerating Diffusion-Based Generative Models", "track": "main", "status": "Reject", "tldr": "We combine MCMC and diffusion models to accelerate score-based sampling.", "abstract": "Diffusion models are powerful generative models that simulate the reverse of diffusion processes using score functions to synthesize data from noise. The sampling process of diffusion models can be interpreted as solving the reverse stochastic differential equation (SDE) or the ordinary differential equation (ODE) of the diffusion process, which often requires up to thousands of discretization steps to generate a single image. This has sparked a great interest in developing efficient integration techniques for reverse-S/ODEs. Here, we propose an orthogonal approach to accelerating score-based sampling: Denoising MCMC (DMCMC). DMCMC first uses MCMC to produce samples in the product space of data and variance (or diffusion time). Then, a reverse-S/ODE integrator is used to denoise the MCMC samples. Since MCMC traverses close to the data manifold, the computation cost of producing a clean sample for DMCMC is much less than that of producing a clean sample from noise. To verify the proposed concept, we show that Denoising Langevin Gibbs (DLG), an instance of DMCMC, successfully accelerates all six reverse-S/ODE integrators considered in this work on the tasks of CIFAR10 and CelebA-HQ-256 image generation. Notably, combined with integrators of Karras et al. (2022) and pre-trained score models of Song et al. (2021b), DLG achieves state-of-the-art results. In the limited number of score function evaluation (NFE) settings on CIFAR10, we have $3.86$ FID with $\\approx 10$ NFE and $2.63$ FID with $\\approx 20$ NFE. On CelebA-HQ-256, we have $6.99$ FID with $\\approx 160$ NFE, which beats the current best record of Kim et al. (2022) among score-based models, $7.16$ FID with $4000$ NFE.", "keywords": "Markov Chain Monte Carlo;Diffusion Models;Score-Based Models", "primary_area": "", "supplementary_material": "", "author": "Beomsu Kim;Jong Chul Ye", "authorids": "~Beomsu_Kim1;~Jong_Chul_Ye1", "gender": "M;M", "homepage": ";https://bispl.weebly.com/", "dblp": ";15/5613", "google_scholar": "https://scholar.google.co.kr/citations?user=TofIFUgAAAAJ;HNMjoNEAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Beomsu_Kim1;~Jong_Chul_Ye1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr", "position": "MS student;Full Professor", "bibtex": "@misc{\nkim2023denoising,\ntitle={Denoising {MCMC} for Accelerating Diffusion-Based Generative Models},\nauthor={Beomsu Kim and Jong Chul Ye},\nyear={2023},\nurl={https://openreview.net/forum?id=Ogh8umAChpo}\n}", "github": "", "project": "", "reviewers": "uZQw;DAST;vzaX;7H8V", "site": "https://openreview.net/forum?id=Ogh8umAChpo", "pdf_size": 42399857, "recommendation": "5;6;6;6", "confidence": "4;4;3;3", "correctness": "3;3;3;3", "technical_novelty": "2;3;3;2", "empirical_novelty": "3;3;3;2", "wc_summary_paper": "237;49;102;125", "wc_strength_and_weaknesses": "570;219;222;221", "wc_clarity_quality_novelty_and_reproducibility": "76;19;65;37", "wc_summary_review": "101;19;45;52", "wc_review": "984;306;434;435", "wc_reply_reviewers": "0;0;146;23", "wc_reply_authors": "2806;1074;2324;1370", "reply_reviewers": "0;0;1;1", "reply_authors": "7;4;6;4", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 128.25, 68.56885225814999 ], "wc_strength_and_weaknesses_avg": [ 308.0, 151.2696268257445 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.25, 22.520823697191894 ], "wc_summary_review_avg": [ 54.25, 29.65952629426168 ], "wc_review_avg": [ 539.75, 261.7979898700523 ], "wc_reply_reviewers_avg": [ 42.25, 60.631571808753236 ], "wc_reply_authors_avg": [ 1893.5, 700.6459519614739 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 5.25, 1.299038105676658 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10022611850539518104&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "Oh0cnNTn5Di", "title": "Lattice Convolutional Networks for Learning Ground States of Quantum Many-Body Systems", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep learning methods have been shown to be effective in representing ground-state wave functions of quantum many-body systems. Existing methods use convolutional neural networks (CNNs) for square lattices due to their image-like structures. For non-square lattices, existing method uses graph neural network (GNN) in which structure information is not precisely captured, thereby requiring additional hand-crafted sublattice encoding. In this work, we propose lattice convolutions in which a set of proposed operations are used to convert non-square lattices into grid-like augmented lattices on which regular convolution can be applied. Based on the proposed lattice convolutions, we design lattice convolutional networks (LCN) that use self-gating and attention mechanisms. Experimental results show that our method achieves performance on par or better than the GNN method on spin 1/2 $J_1$-$J_2$ Heisenberg model over the square, honeycomb, triangular, and kagome lattices while without using hand-crafted encoding.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Cong Fu;Xuan Zhang;Huixin Zhang;Hongyi Ling;Shenglong Xu;Shuiwang Ji", "authorids": "~Cong_Fu2;~Xuan_Zhang3;~Huixin_Zhang1;~Hongyi_Ling1;~Shenglong_Xu1;~Shuiwang_Ji1", "gender": "M;M;F;;M;M", "homepage": "https://congfu.github.io/;https://github.com/floatlazer;;;https://physics.tamu.edu/directory/slxu/;http://people.tamu.edu/~sji", "dblp": "45/3990-3;;;259/0934;295/8446;84/6405", "google_scholar": "https://scholar.google.is/citations?user=7fv5TGMAAAAJ;https://scholar.google.com/citations?view_op=list_works;;ei8O1BEAAAAJ;jgM3RZoAAAAJ;BZGj6sAAAAAJ", "orcid": ";;;;;0000-0002-4205-4563", "linkedin": ";;%E8%95%99%E5%BF%83-%E5%BC%A0-a2670a229/;;;shuiwang-ji-9a040715/", "or_profile": "~Cong_Fu2;~Xuan_Zhang3;~Huixin_Zhang1;~Hongyi_Ling1;~Shenglong_Xu1;~Shuiwang_Ji1", "aff": "Texas A&M;Texas A&M;Texas A&M University - College Station;Texas A&M University - College Station;Texas A&M University - College Station;Texas A&M University", "aff_domain": "tamu.edu;tamu.edu;tamu.edu;tamu.edu;tamu.edu;tamu.edu", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor;Professor", "bibtex": "@misc{\nfu2023lattice,\ntitle={Lattice Convolutional Networks for Learning Ground States of Quantum Many-Body Systems},\nauthor={Cong Fu and Xuan Zhang and Huixin Zhang and Hongyi Ling and Shenglong Xu and Shuiwang Ji},\nyear={2023},\nurl={https://openreview.net/forum?id=Oh0cnNTn5Di}\n}", "github": "", "project": "", "reviewers": "aQ26;xp3P;maSg", "site": "https://openreview.net/forum?id=Oh0cnNTn5Di", "pdf_size": 629403, "recommendation": "3;3;8", "confidence": "4;3;5", "correctness": "2;4;3", "technical_novelty": "3;1;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "132;38;69", "wc_strength_and_weaknesses": "532;67;114", "wc_clarity_quality_novelty_and_reproducibility": "183;28;67", "wc_summary_review": "70;53;406", "wc_review": "917;186;656", "wc_reply_reviewers": "0;0;470", "wc_reply_authors": "1949;500;1346", "reply_reviewers": "0;0;3", "reply_authors": "4;2;3", "recommendation_avg": [ 4.666666666666667, 2.357022603955158 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 79.66666666666667, 39.109532796436675 ], "wc_strength_and_weaknesses_avg": [ 237.66666666666666, 209.00770852344712 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 92.66666666666667, 65.8297467377443 ], "wc_summary_review_avg": [ 176.33333333333334, 162.54708719492808 ], "wc_review_avg": [ 586.3333333333334, 302.46799647051733 ], "wc_reply_reviewers_avg": [ 156.66666666666666, 221.5601247717849 ], "wc_reply_authors_avg": [ 1265.0, 594.3180966452225 ], "reply_reviewers_avg": [ 1.0, 1.4142135623730951 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.8660254037844387, "corr_recommendation_correctness": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9730960868731318686&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Oh5nigv45PI", "title": "NAG-GS: semi-implicit, accelerated and robust stochastic optimizer.", "track": "main", "status": "Reject", "tldr": "", "abstract": "Classical machine learning models such as deep neural networks are usually trained by using Stochastic Gradient Descent-based (SGD) algorithms. The classical SGD can be interpreted as a discretization of the stochastic gradient flow. In this paper we propose a novel, robust and accelerated stochastic optimizer that relies on two key elements: (1) an accelerated Nesterov-like Stochastic Differential Equation (SDE) and (2) its semi-implicit Gauss-Seidel type discretization. The convergence and stability of the obtained method, referred to as NAG-GS, are first studied extensively in the case of the minimization of a quadratic function. This analysis allows us to come up with an optimal step size (or learning rate) in terms of rate of convergence while ensuring the stability of NAG-GS. This is achieved by the careful analysis of the spectral radius of the iteration matrix and the covariance matrix at stationarity with respect to all hyperparameters of our method. We show that NAG-GS is competitive with state-of-the-art methods such as momentum SGD with weight decay and AdamW for the training of machine learning models such as the logistic regression model, the residual networks models on standard computer vision datasets, and Transformers in the frame of the GLUE benchmark.", "keywords": "Accelerated gradient methods;stochastic optimization;stochastic differential equations;semi-implicit solver;convergence analysis;deep neural networks", "primary_area": "", "supplementary_material": "", "author": "Valentin Leplat;Daniil Merkulov;Aleksandr Katrutsa;Daniel Bershatsky;Ivan Oseledets", "authorids": "~Valentin_Leplat1;~Daniil_Merkulov1;~Aleksandr_Katrutsa1;~Daniel_Bershatsky1;~Ivan_Oseledets1", "gender": "M;;;M;M", "homepage": "https://sites.google.com/view/valentinleplat/;;;https://github.com/daskol;http://oseledets.github.io", "dblp": ";;;;56/7175", "google_scholar": ";;;XthC2z8AAAAJ;https://scholar.google.ru/citations?user=5kMqBQEAAAAJ", "orcid": "0000-0002-3313-1547;;;0000-0001-8917-8187;", "linkedin": ";;;;", "or_profile": "~Valentin_Leplat1;~Daniil_Merkulov1;~Aleksandr_Katrutsa1;~Daniel_Bershatsky1;~Ivan_Oseledets1", "aff": "Skoltech;;;Skoltech;Institute of Numerical Mathematics", "aff_domain": "skoltech.ru;;;skoltech.ru;inm.ras.ru", "position": "Postdoc;;;MS student;Researcher", "bibtex": "@misc{\nleplat2023naggs,\ntitle={{NAG}-{GS}: semi-implicit, accelerated and robust stochastic optimizer.},\nauthor={Valentin Leplat and Daniil Merkulov and Aleksandr Katrutsa and Daniel Bershatsky and Ivan Oseledets},\nyear={2023},\nurl={https://openreview.net/forum?id=Oh5nigv45PI}\n}", "github": "", "project": "", "reviewers": "ksyw;AnLK;Dog8;DiaF", "site": "https://openreview.net/forum?id=Oh5nigv45PI", "pdf_size": 1937435, "recommendation": "3;3;5;5", "confidence": "4;4;3;2", "correctness": "3;4;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "68;49;58;42", "wc_strength_and_weaknesses": "432;190;250;144", "wc_clarity_quality_novelty_and_reproducibility": "24;9;21;16", "wc_summary_review": "57;27;21;77", "wc_review": "581;275;350;279", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 54.25, 9.756408150543928 ], "wc_strength_and_weaknesses_avg": [ 254.0, 109.42577392917995 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 17.5, 5.678908345800274 ], "wc_summary_review_avg": [ 45.5, 22.73213584333861 ], "wc_review_avg": [ 371.25, 124.72043737896368 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.9045340337332909, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13436589729931682792&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Skolkovo Institute of Science and Technology;Institute of Numerical Mathematics", "aff_unique_dep": ";", "aff_unique_url": "https://www.skoltech.ru;", "aff_unique_abbr": "Skoltech;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Russian Federation;" }, { "title": "Harnessing Mixed Offline Reinforcement Learning Datasets via Trajectory Weighting", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11717", "id": "OhUAblg27z", "poster": "/media/PosterPDFs/ICLR%202023/11717.png?t=1682740104.1279774", "openreview": "https://openreview.net/forum?id=OhUAblg27z", "slides": "https://iclr.cc/virtual/2023/poster/11717", "video": "https://iclr.cc/virtual/2023/poster/11717", "author_site": "Zhang-Wei Hong, Pulkit Agrawal, Remi Tachet des Combes, Romain Laroche", "tldr": "We propose a sample selection strategy that enables offline reinforcement learning algorithms to learn a better policy in mixed datasets with sparse high-return trajectories.", "abstract": "Most offline reinforcement learning (RL) algorithms return a target policy maximizing a trade-off between (1) the expected performance gain over the behavior policy that collected the dataset, and (2) the risk stemming from the out-of-distribution-ness of the induced state-action occupancy. It follows that the performance of the target policy is strongly related to the performance of the behavior policy and, thus, the trajectory return distribution of the dataset. We show that in mixed datasets consisting of mostly low-return trajectories and minor high-return trajectories, state-of-the-art offline RL algorithms are overly restrained by low-return trajectories and fail to exploit high-performing trajectories to the fullest. To overcome this issue, we show that, in deterministic MDPs with stochastic initial states, the dataset sampling can be re-weighted to induce an artificial dataset whose behavior policy has a higher return. This re-weighted sampling strategy may be combined with any offline RL algorithm. We further analyze that the opportunity for performance improvement over the behavior policy correlates with the positive-sided variance of the returns of the trajectories in the dataset. We empirically show that while CQL, IQL, and TD3+BC achieve only a part of this potential policy improvement, these same algorithms combined with our reweighted sampling strategy fully exploit the dataset. Furthermore, we empirically demonstrate that, despite its theoretical limitation, the approach may still be efficient in stochastic environments. ", "keywords": "offline reinforcement learning;reinforcement learning;sampling;experience replay", "primary_area": "", "supplementary_material": "/attachment/34e8d3e176011fbdcfe5599473804d34c4e4e220.zip", "author": "Zhang-Wei Hong;Pulkit Agrawal;Remi Tachet des Combes;Romain Laroche", "authorids": "~Zhang-Wei_Hong1;~Pulkit_Agrawal1;~Remi_Tachet_des_Combes1;~Romain_Laroche1", "gender": "M;M;M;M", "homepage": ";https://people.eecs.berkeley.edu/~pulkitag/;;https://www.researchgate.net/profile/Romain_Laroche", "dblp": "198/0600;149/2672;146/0392;65/9019", "google_scholar": "GZkyN4cAAAAJ;UpZmJI0AAAAJ;1MZF70cAAAAJ;RiIOKJMAAAAJ", "orcid": ";;;", "linkedin": ";;;romain-laroche-6282397/?originalSubdomain=ca", "or_profile": "~Zhang-Wei_Hong1;~Pulkit_Agrawal1;~Remi_Tachet_des_Combes1;~Romain_Laroche1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Microsoft Research;Microsoft", "aff_domain": "mit.edu;mit.edu;microsoft.com;microsoft.com", "position": "PhD student;Assistant Professor;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nhong2023harnessing,\ntitle={Harnessing Mixed Offline Reinforcement Learning Datasets via Trajectory Weighting},\nauthor={Zhang-Wei Hong and Pulkit Agrawal and Remi Tachet des Combes and Romain Laroche},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=OhUAblg27z}\n}", "github": "", "project": "", "reviewers": "Z9su;BiNt;C84m;56VB", "pdf_size": 787435, "recommendation": "6;6;6;8", "confidence": "4;4;4;4", "correctness": "4;3;3;4", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;2;2;4", "wc_summary_paper": "117;132;39;102", "wc_strength_and_weaknesses": "159;414;149;129", "wc_clarity_quality_novelty_and_reproducibility": "28;1;53;112", "wc_summary_review": "70;72;32;58", "wc_review": "374;619;273;401", "wc_reply_reviewers": "0;92;56;0", "wc_reply_authors": "581;2770;878;1176", "reply_reviewers": "0;1;1;0", "reply_authors": "1;6;2;2", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 97.5, 35.40127116361784 ], "wc_strength_and_weaknesses_avg": [ 212.75, 116.6927054275459 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.5, 41.015241069631664 ], "wc_summary_review_avg": [ 58.0, 15.937377450509228 ], "wc_review_avg": [ 416.75, 126.13955565166701 ], "wc_reply_reviewers_avg": [ 37.0, 39.1279950930277 ], "wc_reply_authors_avg": [ 1351.25, 845.6971606313929 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 1.920286436967152 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17776366118816086377&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=OhUAblg27z", "email": "mit.edu;mit.edu;microsoft.com;microsoft.com", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://web.mit.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MIT;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "OhdF1l90VoC", "title": "Coupling Semi-supervised Learning with Reinforcement Learning for Better Decision Making -- An application to Cryo-EM Data Collection", "track": "main", "status": "Withdraw", "tldr": "We proposed an iterative semi-supervised learning framework for dual-learning of RL and the perception model with applications to Cryo-EM.", "abstract": "We consider a semi-supervised Reinforcement Learning (RL) approach that takes inputs from a perception model. Performance of such an approach can be significantly limited by the quality of the perception model in the low labeled data regime. We propose a novel iterative framework that simultaneously couples and improves the training of both RL and the perception model. The perception model takes pseudo labels generated from the trajectories of a trained RL agent believing that the decision-model can correct errors made by the perception model. We apply the framework to cryo-electron microscopy (cryo-EM) data collection, whose goal is to find as many high-quality micrographs taken by cryo-electron microscopy as possible by navigating at different magnification levels. Our proposed method significantly outperforms various baseline methods in terms of both RL rewards and the accuracy of the perception model. We further provide some theoretical insights into the benefits of coupling the decision model and the perception model by showing that RL-generated pseudo labels are biased towards localization which aligns with the underlying data generating mechanism. Our iterative framework that couples both sides of the semi-supervised RL can be applied to a wide range of sequential decision-making tasks when the labeled data is \nlimited.", "keywords": "Reinforcement Learning;Semi-supervised Learning;Cryo-EM", "primary_area": "", "supplementary_material": "/attachment/984dbff1f2ca92408c47dca92ff1a691875ceab5.zip", "author": "Ziping Xu;Quanfu Fan;Yilai Li;Emma R Lee;John Maxwell Cohn;Ambuj Tewari;Seychelle M. Vos;Michael Cianfrocco", "authorids": "~Ziping_Xu1;~Quanfu_Fan1;yilai@umich.edu;~Emma_R_Lee1;~John_Maxwell_Cohn1;~Ambuj_Tewari1;seyvos@mit.edu;~Michael_Cianfrocco1", "gender": "M;M;;;M;M;;M", "homepage": "https://zipingxu.github.io;;;;http://johncohn.org;https://www.ambujtewari.com;;https://www.lsi.umich.edu/science/our-labs/michael-cianfrocco-lab", "dblp": "258/0573;66/3950;;;;24/567;;", "google_scholar": "V-VcaYIAAAAJ;kCxHiwUAAAAJ;;;;ttbl4FsAAAAJ;;X-PKF2AAAAAJ", "orcid": ";;;;;0000-0001-6969-7844;;0000-0002-2067-4999", "linkedin": ";;;emma-lee-ba08601a0/;;;;", "or_profile": "~Ziping_Xu1;~Quanfu_Fan1;yilai@umich.edu;~Emma_R_Lee1;~John_Maxwell_Cohn1;~Ambuj_Tewari1;seyvos@mit.edu;~Michael_Cianfrocco1", "aff": "University of Michigan;MIT-IBM Watson AI Lab;;Massachusetts Institute of Technology;;University of Michigan - Ann Arbor;;University of Michigan - Ann Arbor", "aff_domain": "umich.edu;us.ibm.com;;mit.edu;;umich.edu;;umich.edu", "position": "PhD student;Researcher;;Undergrad student;;Full Professor;;Assistant Professor", "bibtex": "@misc{\nxu2023coupling,\ntitle={Coupling Semi-supervised Learning with Reinforcement Learning for Better Decision Making -- An application to Cryo-{EM} Data Collection},\nauthor={Ziping Xu and Quanfu Fan and Yilai Li and Emma R Lee and John Maxwell Cohn and Ambuj Tewari and Seychelle M. Vos and Michael Cianfrocco},\nyear={2023},\nurl={https://openreview.net/forum?id=OhdF1l90VoC}\n}", "github": "", "project": "", "reviewers": "tTf5;yuYG;HRQq", "site": "https://openreview.net/forum?id=OhdF1l90VoC", "pdf_size": 4808709, "recommendation": "3;3;3", "confidence": "4;3;3", "correctness": "2;2;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;3;2", "wc_summary_paper": "31;67;86", "wc_strength_and_weaknesses": "292;369;338", "wc_clarity_quality_novelty_and_reproducibility": "18;313;3", "wc_summary_review": "30;41;23", "wc_review": "371;790;450", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 61.333333333333336, 22.808380526074668 ], "wc_strength_and_weaknesses_avg": [ 333.0, 31.63331577098213 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 111.33333333333333, 142.73129377337762 ], "wc_summary_review_avg": [ 31.333333333333332, 7.408703590297623 ], "wc_review_avg": [ 537.0, 181.78192062652067 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ML65OjooP4sJ:scholar.google.com/&scioq=Coupling+Semi-supervised+Learning+with+Reinforcement+Learning+for+Better+Decision+Making+--+An+application+to+Cryo-EM+Data+Collection&hl=en&as_sdt=0,44", "gs_version_total": 2, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "University of Michigan;Massachusetts Institute of Technology", "aff_unique_dep": ";IBM Watson AI Lab", "aff_unique_url": "https://www.umich.edu;https://www.mitibmwatsonailab.org", "aff_unique_abbr": "UM;MIT-IBM AI Lab", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "OhjGzRE5N6o", "title": "Protein Sequence Design in a Latent Space via Model-based Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "This study investigates why many model-based biological sequence design methods produce results that empirically fail and proposes a novel optimization process that can efficiently traverse a latent representation space instead of the sequence space.", "abstract": "Proteins are complex molecules responsible for different functions in the human body. Enhancing the functionality of a protein and/or cellular fitness can significantly impact various industries. However, their optimization remains challenging, and sequences generated by data-driven methods often fail in wet lab experiments. This study investigates the limitations of existing model-based sequence design methods and presents a novel optimization framework that can efficiently traverse the latent representation space instead of the protein sequence space. Our framework generates proteins with higher functionality and cellular fitness by modeling the sequence design task as a Markov decision process and applying model-based reinforcement learning. We discuss the results in a comprehensive evaluation of two distinct proteins, GPF and His3, along with the predicted structure of optimized sequences using deep learning-based structure prediction.", "keywords": "Biological sequence design;Model-based reinforcement learning;Protein design;Representation learning", "primary_area": "", "supplementary_material": "/attachment/1699e2a2ea2eb4e9f770eeef7bdc3397acf1c108.zip", "author": "Minji Lee;Luiz Felipe Vecchietti;Hyunkyu Jung;Hyunjoo Ro;Meeyoung Cha;Ho Min Kim", "authorids": "~Minji_Lee1;~Luiz_Felipe_Vecchietti1;~Hyunkyu_Jung1;nhj0229@ibs.re.kr;~Meeyoung_Cha2;~Ho_Min_Kim1", "gender": "F;M;M;;F;M", "homepage": "https://haewonc.github.io/;https://lfelipesv.github.io/;;;https://www.mpi-sp.org/cha;", "dblp": ";248/4211;;;57/4924;", "google_scholar": ";xPV0ONYAAAAJ;https://scholar.google.com/citations?hl=ko;;iFlnVCoAAAAJ;https://scholar.google.co.kr/citations?user=AhBBZ-AAAAAJ", "orcid": ";0000-0003-2862-6200;;;0000-0003-4085-9648;", "linkedin": ";;;;meeyoungcha/;", "or_profile": "~Minji_Lee1;~Luiz_Felipe_Vecchietti1;~Hyunkyu_Jung1;nhj0229@ibs.re.kr;~Meeyoung_Cha2;~Ho_Min_Kim1", "aff": "Korea Advanced Institute of Science & Technology;Institute for Basic Science;Korea Advanced Institute of Science & Technology;;Korea Advanced Institute of Science & Technology;", "aff_domain": "kaist.ac.kr;ibs.re.kr;kaist.ac.kr;;kaist.ac.kr;", "position": "Undergrad student;Researcher;MS student;;Full Professor;", "bibtex": "@misc{\nlee2023protein,\ntitle={Protein Sequence Design in a Latent Space via Model-based Reinforcement Learning},\nauthor={Minji Lee and Luiz Felipe Vecchietti and Hyunkyu Jung and Hyunjoo Ro and Meeyoung Cha and Ho Min Kim},\nyear={2023},\nurl={https://openreview.net/forum?id=OhjGzRE5N6o}\n}", "github": "", "project": "", "reviewers": "62rv;KJ1Y;f1s7;F4US", "site": "https://openreview.net/forum?id=OhjGzRE5N6o", "pdf_size": 5578239, "recommendation": "3;3;3;8", "confidence": "5;4;4;4", "correctness": "2;2;2;4", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "61;82;75;91", "wc_strength_and_weaknesses": "381;462;466;26", "wc_clarity_quality_novelty_and_reproducibility": "472;87;34;998", "wc_summary_review": "127;89;74;30", "wc_review": "1041;720;649;1145", "wc_reply_reviewers": "0;0;539;0", "wc_reply_authors": "3137;1906;1742;2616", "reply_reviewers": "0;0;2;0", "reply_authors": "6;4;4;5", "recommendation_avg": [ 4.25, 2.165063509461097 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 77.25, 10.96300597464035 ], "wc_strength_and_weaknesses_avg": [ 333.75, 180.8872231529911 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 397.75, 385.5816223577052 ], "wc_summary_review_avg": [ 80.0, 34.734708865916815 ], "wc_review_avg": [ 888.75, 209.04589807025633 ], "wc_reply_reviewers_avg": [ 134.75, 233.3938463199062 ], "wc_reply_authors_avg": [ 2350.25, 560.5632769812878 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 4.75, 0.82915619758885 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6994667862798290956&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Institute for Basic Science", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.ibs.re.kr", "aff_unique_abbr": "KAIST;IBS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "OiLPUTbiic5Y", "title": "Multi-User Reinforcement Learning with Low Rank Rewards", "track": "main", "status": "Reject", "tldr": "A statistically efficient method for learning policies collaboratively across multiple users with same state-space transitions but low-rank reward matrix. ", "abstract": " In this work, we consider the problem of collaborative multi-user reinforcement learning. In this setting there are multiple users with the same state-action space and transition probabilities but with different rewards. Under the assumption that the reward matrix of the $N$ users has a low-rank structure -- a standard and practically successful assumption in the offline collaborative filtering setting-- the question is can we design algorithms with significantly lower sample complexity compared to the ones that learn the MDP individually for each user. Our main contribution is an algorithm which explores rewards collaboratively with $N$ user-specific MDPs and can learn rewards efficiently in two key settings: tabular MDPs and linear MDPs. When $N$ is large and the rank is constant, the sample complexity per MDP depends logarithmically over the size of the state-space, which represents an exponential reduction (in the state-space size) when compared to the standard ``non-collaborative'' algorithms. ", "keywords": "Low Rank Matrix Estimation;Collaborative Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Dheeraj Mysore Nagaraj;Suhas S Kowshik;Praneeth Netrapalli;Naman Agarwal;Prateek Jain", "authorids": "~Dheeraj_Mysore_Nagaraj1;~Suhas_S_Kowshik1;~Praneeth_Netrapalli1;~Naman_Agarwal1;~Prateek_Jain1", "gender": "M;M;M;M;M", "homepage": "https://dheerajmn.mit.edu;;http://praneethnetrapalli.org/;https://naman33k.github.io;http://prateekjain.org", "dblp": "215/5097;234/7703;http://dblp.uni-trier.de/pers/hd/n/Netrapalli:Praneeth;72/3910;https://dblp.uni-trier.de/pers/j/Jain_0002:Prateek.html", "google_scholar": "0g80b7sAAAAJ;gfqNwuMAAAAJ;https://scholar.google.co.in/citations?user=mim8FQkAAAAJ;sEMrGicAAAAJ;qYhRbJoAAAAJ", "orcid": ";0000-0001-5440-6186;;;", "linkedin": "dheeraj-m-nagaraj-01739792/;;;;", "or_profile": "~Dheeraj_Mysore_Nagaraj1;~Suhas_S_Kowshik1;~Praneeth_Netrapalli1;~Naman_Agarwal1;~Prateek_Jain1", "aff": "Google;Amazon;Google;Google;Google", "aff_domain": "google.com;amazon.com;google.com;google.com;google.com", "position": "Research Scientist;Researcher;Research Scientist;Researcher;Researcher", "bibtex": "@misc{\nnagaraj2023multiuser,\ntitle={Multi-User Reinforcement Learning with Low Rank Rewards},\nauthor={Dheeraj Mysore Nagaraj and Suhas S Kowshik and Praneeth Netrapalli and Naman Agarwal and Prateek Jain},\nyear={2023},\nurl={https://openreview.net/forum?id=OiLPUTbiic5Y}\n}", "github": "", "project": "", "reviewers": "iGHL;r7Gm;d781;HAK3;XKWe", "site": "https://openreview.net/forum?id=OiLPUTbiic5Y", "pdf_size": 491198, "recommendation": "5;6;6;6;6", "confidence": "2;3;4;3;2", "correctness": "3;3;3;3;4", "technical_novelty": "3;3;3;3;3", "empirical_novelty": "3;0;0;0;0", "wc_summary_paper": "231;73;230;79;99", "wc_strength_and_weaknesses": "322;420;592;207;85", "wc_clarity_quality_novelty_and_reproducibility": "21;52;66;160;20", "wc_summary_review": "31;43;132;103;17", "wc_review": "605;588;1020;549;221", "wc_reply_reviewers": "5;10;356;11;223", "wc_reply_authors": "449;355;848;528;229", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;2;2;2", "recommendation_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 0.6, 1.2000000000000002 ], "wc_summary_paper_avg": [ 142.4, 72.44749823147795 ], "wc_strength_and_weaknesses_avg": [ 325.2, 174.26806936441338 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.8, 51.28118563371951 ], "wc_summary_review_avg": [ 65.2, 44.44502221846671 ], "wc_review_avg": [ 596.6, 254.06817982580972 ], "wc_reply_reviewers_avg": [ 121.0, 143.8791159272255 ], "wc_reply_authors_avg": [ 481.8, 208.47004580994366 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5345224838248488, "corr_recommendation_correctness": 0.2500000000000001, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17820018382290229266&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Google;Amazon", "aff_unique_dep": "Google;Amazon.com, Inc.", "aff_unique_url": "https://www.google.com;https://www.amazon.com", "aff_unique_abbr": "Google;Amazon", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Oj1ceY_qohC", "title": "Contrastive Adversarial Loss for Point Cloud Reconstruction", "track": "main", "status": "Withdraw", "tldr": "Learn a point cloud reconstruction loss by contrastive constraint and adversarial training", "abstract": "For point cloud reconstruction-related tasks, the reconstruction losses to evaluate the shape differences between reconstructed results and the ground truths are typically used to train the task networks. The Chamfer Distance (CD) and Earth Mover's Distance (EMD) are two widely-used reconstruction losses, which firstly use predefined strategies to match points in two point clouds and then apply the average distances from points to their matched neighbors as differentiable measurements of shape differences. However, the predefined matching rules may deviate from the real shape differences and cause defective reconstructed results. To solve the above problem, we propose a learning-based Contrastive adversarial Loss (CALoss) to train a reconstruction-related task network without the predefined matching rules. CALoss learns to evaluate shape differences by combining the contrastive constraint with the adversarial strategy. Specifically, we use the contrastive constraint to help CALoss learn shape similarity, while we introduce the adversarial strategy to help CALoss mine differences between reconstructed results and ground truths. According to experiments on reconstruction-related tasks, CALoss can help task networks improve reconstruction performances and learn more representative representations.", "keywords": "Point clouds;reconstruction loss;learning-based", "primary_area": "", "supplementary_material": "", "author": "Tianxin Huang;Zhonggan Ding;Jiangning Zhang;Ying Tai;Zhenyu Zhang;Mingang Chen;Chengjie Wang;Yong Liu", "authorids": "~Tianxin_Huang1;~Zhonggan_Ding1;~Jiangning_Zhang1;~Ying_Tai1;~Zhenyu_Zhang2;~Mingang_Chen1;~Chengjie_Wang1;~Yong_Liu11", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://tianxinhuang.github.io/;;https://www.researchgate.net/profile/Jiangning_Zhang2;https://tyshiwo.github.io/;https://jessezhang92.github.io/;;;https://person.zju.edu.cn/en/yongliu", "dblp": "251/3784;320/6813;241/9593;158/1384;01/1844-5;22/1557.html;;29/4867-7", "google_scholar": "https://scholar.google.com.hk/citations?user=Fg7WYfcAAAAJ;ARJYdDoAAAAJ;https://scholar.google.com.hk/citations?user=2hA4X9wAAAAJ;NKaiUasAAAAJ;4daxK2AAAAAJ;;fqte5H4AAAAJ;https://scholar.google.com.hk/citations?user=qYcgBbEAAAAJ", "orcid": ";;;;;;0000-0003-4216-8090;0000-0003-4822-8939", "linkedin": ";;;;;;;", "or_profile": "~Tianxin_Huang1;~Zhonggan_Ding1;~Jiangning_Zhang1;~Ying_Tai1;~Zhenyu_Zhang2;~Mingang_Chen1;~Chengjie_Wang1;~Yong_Liu11", "aff": "Zhejiang University;Tencent AI Lab;Tencent Youtu Lab;Tencent Youtu Lab;Tencent Youtu Lab;Shanghai Development Center of Computer Software Technology;Tencent YouTu Lab;Zhejiang University", "aff_domain": "zju.edu.cn;tencent.com;tencent.com;tencent.com;tencent.com;sscenter.sh.cn;tencent.com;zju.edu.cn", "position": "PhD student;Researcher;Principal Researcher;Principal Researcher;Research Scientist;Principal Researcher;Researcher;Full Professor", "bibtex": "@misc{\nhuang2023contrastive,\ntitle={Contrastive Adversarial Loss for Point Cloud Reconstruction},\nauthor={Tianxin Huang and Zhonggan Ding and Jiangning Zhang and Ying Tai and Zhenyu Zhang and Mingang Chen and Chengjie Wang and Yong Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=Oj1ceY_qohC}\n}", "github": "", "project": "", "reviewers": "qmDN;ATbx;JYSq;7o7C", "site": "https://openreview.net/forum?id=Oj1ceY_qohC", "pdf_size": 1720066, "recommendation": "3;5;5;6", "confidence": "4;5;3;3", "correctness": "3;3;4;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "112;31;82;93", "wc_strength_and_weaknesses": "384;65;66;85", "wc_clarity_quality_novelty_and_reproducibility": "95;174;74;42", "wc_summary_review": "32;32;41;281", "wc_review": "623;302;263;501", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 79.5, 29.9874973947477 ], "wc_strength_and_weaknesses_avg": [ 150.0, 135.33477010731573 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 96.25, 48.694840589121966 ], "wc_summary_review_avg": [ 96.5, 106.58447354094311 ], "wc_review_avg": [ 422.25, 146.9036674150785 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.3458572319330373, "corr_recommendation_correctness": 0.13245323570650439, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3635818321333649508&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;1;2;1;0", "aff_unique_norm": "Zhejiang University;Tencent;Shanghai Development Center of Computer Software Technology", "aff_unique_dep": ";Tencent AI Lab;", "aff_unique_url": "https://www.zju.edu.cn;https://ai.tencent.com;", "aff_unique_abbr": "ZJU;Tencent AI Lab;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Blurring Diffusion Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11631", "id": "OjDkC57x5sz", "poster": "", "openreview": "https://openreview.net/forum?id=OjDkC57x5sz", "slides": "https://iclr.cc/virtual/2023/poster/11631", "video": "https://iclr.cc/virtual/2023/poster/11631", "author_site": "Emiel Hoogeboom, Tim Salimans", "tldr": "We show that blurring can equivalently be defined through a Gaussian diffusion process with non-isotropic noise, bridging the gap between inverse heat dissipation and denoising diffusion", "abstract": "Recently, Rissanen et al., (2022) have presented a new type of diffusion process for generative modeling based on heat dissipation, or blurring, as an alternative to isotropic Gaussian diffusion. Here, we show that blurring can equivalently be defined through a Gaussian diffusion process with non-isotropic noise. In making this connection, we bridge the gap between inverse heat dissipation and denoising diffusion, and we shed light on the inductive bias that results from this modeling choice. Finally, we propose a generalized class of diffusion models that offers the best of both standard Gaussian denoising diffusion and inverse heat dissipation, which we call Blurring Diffusion Models. ", "keywords": "blurring;diffusion;generative model", "primary_area": "", "supplementary_material": "", "author": "Emiel Hoogeboom;Tim Salimans", "authorids": "~Emiel_Hoogeboom1;~Tim_Salimans1", "gender": ";M", "homepage": ";", "dblp": "217/1488;116/2791", "google_scholar": "https://scholar.google.nl/citations?user=nkTd_BIAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Emiel_Hoogeboom1;~Tim_Salimans1", "aff": "Google;Google", "aff_domain": "google.com;google.com", "position": "Researcher;Research Scientist", "bibtex": "@inproceedings{\nhoogeboom2023blurring,\ntitle={Blurring Diffusion Models},\nauthor={Emiel Hoogeboom and Tim Salimans},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=OjDkC57x5sz}\n}", "github": "", "project": "", "reviewers": "HZtN;PRJZ;6MNZ;oaCX", "pdf_size": 3406196, "recommendation": "5;5;6;8", "confidence": "2;4;4;3", "correctness": "3;2;3;3", "technical_novelty": "2;3;3;4", "empirical_novelty": "1;3;3;3", "wc_summary_paper": "66;34;22;112", "wc_strength_and_weaknesses": "132;82;256;172", "wc_clarity_quality_novelty_and_reproducibility": "42;8;67;12", "wc_summary_review": "41;36;41;31", "wc_review": "281;160;386;327", "wc_reply_reviewers": "0;0;0;25", "wc_reply_authors": "213;133;198;182", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 58.5, 34.8245602987317 ], "wc_strength_and_weaknesses_avg": [ 160.5, 63.692621236686435 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.25, 23.98306694315804 ], "wc_summary_review_avg": [ 37.25, 4.14578098794425 ], "wc_review_avg": [ 288.5, 83.00150601043333 ], "wc_reply_reviewers_avg": [ 6.25, 10.825317547305483 ], "wc_reply_authors_avg": [ 181.5, 30.07074990750979 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.4714045207910316, "gs_citation": 81, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15994219771067326533&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=OjDkC57x5sz", "email": "google.com;google.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "StyleMorph: Disentangled 3D-Aware Image Synthesis with a 3D Morphable StyleGAN", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11636", "id": "Ojpb1y8jflw", "poster": "/media/PosterPDFs/ICLR%202023/11636.png?t=1680910407.0955768", "openreview": "https://openreview.net/forum?id=Ojpb1y8jflw", "slides": "https://iclr.cc/virtual/2023/poster/11636", "video": "https://iclr.cc/virtual/2023/poster/11636", "author_site": "Eric-Tuan Le, Edward Bartrum, Iasonas Kokkinos", "tldr": "A deformable 3D-aware photorealistic image generator", "abstract": "We introduce StyleMorph, a 3D-aware generative model that disentangles 3D shape, camera pose, object appearance, and background appearance for high quality image synthesis. We account for shape variability by morphing a canonical 3D object template, effectively learning a 3D morphable model in an entirely unsupervised manner through backprop. We chain 3D morphable modelling with deferred neural rendering by performing an implicit surface rendering of \u201cTemplate Object Coordinates\u201d (TOCS), which can be understood as an unsupervised counterpart to UV maps. This provides a detailed 2D TOCS map signal that reflects the compounded geometric effects of non-rigid shape variation, camera pose, and perspective projection. We combine 2D TOCS maps with an independent appearance code to condition a StyleGAN-based deferred neural rendering (DNR) network for foreground image (object) synthesis; we use a separate code for background synthesis and do late fusion to deliver the final result. We show competitive synthesis results on 4 datasets (FFHQ faces, AFHQ Cats, Dogs, Wild), while achieving the joint disentanglement of shape, pose, object and background texture.", "keywords": "3D-aware GAN;Template-based;Morphable;Disentanglement;Photorealistic;Neural Radiance Field;StyleGAN", "primary_area": "", "supplementary_material": "/attachment/e33562667f2b8fe85a985f848a95242cbc52b50e.zip", "author": "Eric-Tuan Le;Edward Bartrum;Iasonas Kokkinos", "authorids": "~Eric-Tuan_Le1;~Edward_Bartrum1;~Iasonas_Kokkinos1", "gender": "M;M;M", "homepage": "http://erictuanle.com;;", "dblp": "244/2292;;21/4922", "google_scholar": "-UB3zm8AAAAJ;;ZO3Ek1gAAAAJ", "orcid": ";;", "linkedin": "erictuanle/;edbartrum/;", "or_profile": "~Eric-Tuan_Le1;~Edward_Bartrum1;~Iasonas_Kokkinos1", "aff": "University College London;Meta Reality Labs;Snap Inc.", "aff_domain": "ucl.ac.uk;meta.com;snapchat.com", "position": "PhD student;Intern;Principal Researcher", "bibtex": "@inproceedings{\nle2023stylemorph,\ntitle={StyleMorph: Disentangled 3D-Aware Image Synthesis with a 3D Morphable Style{GAN}},\nauthor={Eric-Tuan Le and Edward Bartrum and Iasonas Kokkinos},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Ojpb1y8jflw}\n}", "github": "", "project": "", "reviewers": "cRkf;XLzz;J5vi;tGFV;2TB7", "pdf_size": 17379703, "recommendation": "3;6;6;8;8", "confidence": "4;4;2;3;3", "correctness": "2;3;4;4;4", "technical_novelty": "2;3;3;3;3", "empirical_novelty": "2;3;3;3;3", "wc_summary_paper": "48;74;72;85;61", "wc_strength_and_weaknesses": "489;110;222;488;100", "wc_clarity_quality_novelty_and_reproducibility": "108;83;82;43;25", "wc_summary_review": "22;44;33;40;20", "wc_review": "667;311;409;656;206", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "1766;266;589;504;210", "reply_reviewers": "0;0;0;0;0", "reply_authors": "3;1;1;1;1", "recommendation_avg": [ 6.2, 1.8330302779823362 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "correctness_avg": [ 3.4, 0.8 ], "technical_novelty_avg": [ 2.8, 0.39999999999999997 ], "empirical_novelty_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_paper_avg": [ 68.0, 12.569805089976535 ], "wc_strength_and_weaknesses_avg": [ 281.8, 174.12225590084685 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.2, 29.982661656363994 ], "wc_summary_review_avg": [ 31.8, 9.516301802696255 ], "wc_review_avg": [ 449.8, 184.42494408295207 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 667.0, 567.4687656602785 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.46656947481584343, "corr_recommendation_correctness": 0.9001487972234685, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2785890746441003316&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=Ojpb1y8jflw", "email": "ucl.ac.uk;meta.com;snapchat.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University College London;Meta;Snap Inc.", "aff_unique_dep": ";Meta Reality Labs;", "aff_unique_url": "https://www.ucl.ac.uk;https://www.meta.com;https://www.snapinc.com", "aff_unique_abbr": "UCL;MRL;Snap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "OmGZ7ymnSno", "title": "On the Nonconvex Convergence of SGD", "track": "main", "status": "Withdraw", "tldr": "This paper shows that the $\\epsilon$-stationary point exists in the final iterates of SGDs in minimizing nonconvex objectives, not just anywhere in the entire range of iterates---A much stronger result than the existing one.", "abstract": "Stochastic gradient descent (SGD) and its variants are the main workhorses for solving large-scale optimization problems with nonconvex objective functions. Although the convergence of SGDs in the (strongly) convex case is well-understood, their convergence for nonconvex functions stands on weak mathematical foundations. Most existing studies on the nonconvex convergence of SGD show the complexity results based on either the minimum of the expected gradient norm or the functional sub-optimality gap (for functions with extra structural property) by searching over the entire range of iterates. Hence the last iterations of SGDs do not necessarily maintain the same complexity guarantee. This paper shows that the $\\epsilon$-stationary point exists in the final iterates of SGDs, not just anywhere in the entire range of iterates---A much stronger result than the existing one. Additionally, our analyses allow us to measure the \\emph{density of the $\\epsilon$-stationary points} in the final iterates of SGD, and we recover the classical $O(\\frac{1}{\\sqrt{T}})$ asymptotic rate under various existing assumptions on the regularity of the objective function and the bounds on the stochastic gradient. ", "keywords": "Stochastic gradient descent;nonconvex optimization;nonsmooth optimization;random-reshuffling stochastic gradient descent", "primary_area": "", "supplementary_material": "/attachment/00c2b4f4dbf62e31cb603422e2829065ea3a79ce.zip", "author": "Aritra Dutta;El houcine Bergou;Soumia Boucherouite;Xin Li", "authorids": "~Aritra_Dutta1;~El_houcine_Bergou1;~Soumia_Boucherouite1;~Xin_Li39", "gender": "M;M;F;M", "homepage": "https://sciences.ucf.edu/math/person/aritra-dutta/;https://ecrc.kaust.edu.sa/Pages/Bergou.aspx;;https://sciences.ucf.edu/math/xli/", "dblp": "189/9262;https://dblp.uni-trier.de/pers/b/Bergou:El_Houcine.html;;", "google_scholar": "vquoiHsAAAAJ;;;", "orcid": "0000-0001-6994-1659;;;", "linkedin": "aritra-dutta-7b28052b/;ehbergou/;soumia-boucherouite;", "or_profile": "~Aritra_Dutta1;~El_houcine_Bergou1;~Soumia_Boucherouite1;~Xin_Li39", "aff": "University of Southern Denmark (SDU);;College of Computing - Mohammed VI Polytechnic University;University of Central Florida", "aff_domain": "sdu.dk;;um6p.ma;ucf.edu", "position": "Assistant Professor;;PhD student;Full Professor", "bibtex": "@misc{\ndutta2023on,\ntitle={On the Nonconvex Convergence of {SGD}},\nauthor={Aritra Dutta and El houcine Bergou and Soumia Boucherouite and Xin Li},\nyear={2023},\nurl={https://openreview.net/forum?id=OmGZ7ymnSno}\n}", "github": "", "project": "", "reviewers": "ezFt;X8GL;KCtW;GZv7", "site": "https://openreview.net/forum?id=OmGZ7ymnSno", "pdf_size": 995454, "recommendation": "1;3;3;3", "confidence": "4;4;5;4", "correctness": "1;3;1;3", "technical_novelty": "1;2;2;2", "empirical_novelty": "0;0;2;2", "wc_summary_paper": "27;82;107;63", "wc_strength_and_weaknesses": "25;229;148;70", "wc_clarity_quality_novelty_and_reproducibility": "10;110;19;51", "wc_summary_review": "143;35;58;98", "wc_review": "205;456;332;282", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.0, 1.0 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.0, 1.0 ], "wc_summary_paper_avg": [ 69.75, 29.200813344836817 ], "wc_strength_and_weaknesses_avg": [ 118.0, 77.73995111909963 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.5, 39.16950344336777 ], "wc_summary_review_avg": [ 83.5, 41.08831950810352 ], "wc_review_avg": [ 318.75, 91.24520535348692 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Southern Denmark;Mohammed VI Polytechnic University;University of Central Florida", "aff_unique_dep": ";College of Computing;", "aff_unique_url": "https://www.sdu.dk;https://www.um6p.ma;https://www.ucf.edu", "aff_unique_abbr": "SDU;UM6P;UCF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Denmark;Morocco;United States" }, { "id": "Om_QvnjjBL2", "title": "D2Match: Leveraging Deep Learning and Degeneracy for Subgraph Matching", "track": "main", "status": "Reject", "tldr": "", "abstract": "Subgraph matching is a fundamental building block for many graph-based applications and is challenging due to its high-order combinatorial nature. However, previous methods usually tackle it by combinatorial optimization or representation learning and suffer from exponential computational cost or matching without theoretical guarantees. In this paper, we develop D2Match by leveraging the efficiency of Deep learning and Degeneracy for subgraph matching. More specifically, we prove that subgraph matching can degenerate to subtree matching, and subsequently is equivalent to finding a perfect matching on a bipartite graph. This matching procedure can be implemented by the built-in tree-structured aggregation mechanism on graph neural networks, which yields linear time complexity. Moreover, circle structures, abstracted as {\\em supernodes}, and node attributes can be easily incorporated in D2Match to boost the matching. Finally, we conduct extensive experiments to show the superior performance of our D2Match and confirm that our D2Match indeed tries to exploit the subtrees and differs from existing learning-based subgraph matching methods that depend on memorizing the data distribution divergence.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/d38f0fd575dbf325fcaf6cac368bdb1133a3ebe2.zip", "author": "Xuanzhou Liu;Lin Zhang;Jiaqi Sun;Yujiu Yang;Haiqin Yang", "authorids": "~Xuanzhou_Liu1;~Lin_Zhang9;~Jiaqi_Sun1;~Yujiu_Yang2;~Haiqin_Yang2", "gender": "M;;;M;", "homepage": "https://github.com/XuanzhouLiu;;;https://sites.google.com/view/iigroup-thu;", "dblp": ";;;30/3847;", "google_scholar": ";;;4gH3sxsAAAAJ;", "orcid": ";;;0000-0002-6427-1024;", "linkedin": ";;;;", "or_profile": "~Xuanzhou_Liu1;~Lin_Zhang9;~Jiaqi_Sun1;~Yujiu_Yang2;~Haiqin_Yang2", "aff": "Electronic Engineering, Tsinghua University, Tsinghua University;;;Tsinghua University;", "aff_domain": "mails.tsinghua.edu.cn;;;tsinghua.edu.cn;", "position": "MS student;;;Associate Professor;", "bibtex": "@misc{\nliu2023dmatch,\ntitle={D2Match: Leveraging Deep Learning and Degeneracy for Subgraph Matching},\nauthor={Xuanzhou Liu and Lin Zhang and Jiaqi Sun and Yujiu Yang and Haiqin Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=Om_QvnjjBL2}\n}", "github": "", "project": "", "reviewers": "763b;6wEv;9bfF", "site": "https://openreview.net/forum?id=Om_QvnjjBL2", "pdf_size": 427383, "recommendation": "5;6;6", "confidence": "4;4;4", "correctness": "3;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;4", "wc_summary_paper": "45;31;48", "wc_strength_and_weaknesses": "163;101;132", "wc_clarity_quality_novelty_and_reproducibility": "110;119;13", "wc_summary_review": "78;44;43", "wc_review": "396;295;236", "wc_reply_reviewers": "0;0;99", "wc_reply_authors": "1244;888;454", "reply_reviewers": "0;0;1", "reply_authors": "4;2;2", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 41.333333333333336, 7.408703590297623 ], "wc_strength_and_weaknesses_avg": [ 132.0, 25.311394008759507 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 80.66666666666667, 47.98842453018111 ], "wc_summary_review_avg": [ 55.0, 16.268579122549905 ], "wc_review_avg": [ 309.0, 66.06562394064456 ], "wc_reply_reviewers_avg": [ 33.0, 46.66904755831214 ], "wc_reply_authors_avg": [ 862.0, 323.0397292387837 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11576332594110729758&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "Electronic Engineering", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "OmpIgSvg7-Z", "title": "Prometheus: Endowing Low Sample and Communication Complexities to Constrained Decentralized Stochastic Bilevel Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "\tIn recent years, constrained decentralized stochastic bilevel optimization has become increasingly important due to its versatility in modeling a wide range of multi-agent learning problems, such as multi-agent reinforcement learning and multi-agent meta-learning with safety constraints. However, one under-explored and fundamental challenge in constrained decentralized stochastic bilevel optimization is how to achieve low sample and communication complexities, which, if not addressed appropriately, could affect the long-term prospect of many emerging multi-agent learning paradigms that use decentralized bilevel optimization as a bedrock. In this paper, we investigate a class of constrained decentralized bilevel optimization problems, where multiple agents collectively solve a nonconvex-strongly-convex bilevel problem with constraints in the upper-level variables. Such problems arise naturally in many multi-agent reinforcement learning and meta learning problems. In this paper, we propose an algorithm called Prometheus (proximal tracked stochastic recursive estimator) that achieves the first $\\mathcal{O}(\\epsilon^{-1})$ results in both sample and communication complexities for constrained decentralized bilevel optimization, where $\\epsilon>0$ is the desired stationarity error. Collectively, the results in this work contribute to a theoretical foundation for low sample- and communication-complexity constrained decentralized bilevel learning.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/2665ccd85060a80f9ec7cec5304060d62836abc6.zip", "author": "Zhuqing Liu;Xin Zhang;Prashant Khanduri;Songtao Lu;Jia Liu", "authorids": "~Zhuqing_Liu2;~Xin_Zhang16;~Prashant_Khanduri1;~Songtao_Lu1;~Jia_Liu1", "gender": "F;M;M;M;M", "homepage": "https://github.com/Zhuqing-Liu;https://xinzhang-nac.github.io/;https://sites.google.com/view/khanduri-prashant/home?authuser=0;https://songtaogithub.github.io/;https://kevinliu-osu.github.io/index.html", "dblp": "195/1161;76/1584-54.html;158/4888;05/2887;", "google_scholar": ";9u5Pa0gAAAAJ;;LRsjX7kAAAAJ;Ofx3dScAAAAJ", "orcid": "0000-0003-0146-5101;0000-0002-0784-2038;;;", "linkedin": ";;prashant-khanduri-0497894b/;;", "or_profile": "~Zhuqing_Liu2;~Xin_Zhang16;~Prashant_Khanduri1;~Songtao_Lu1;~Jia_Liu1", "aff": "Ohio State University;Meta Facebook;Wayne State University;IBM Thomas J. Watson Research Center;The Ohio State University", "aff_domain": "osu.edu;fb.com;wayne.edu;ibm.com;osu.edu", "position": "PhD student;Research Scientist;Assistant Professor;Researcher;Assistant Professor", "bibtex": "@misc{\nliu2023prometheus,\ntitle={Prometheus: Endowing Low Sample and Communication Complexities to Constrained Decentralized Stochastic Bilevel Learning},\nauthor={Zhuqing Liu and Xin Zhang and Prashant Khanduri and Songtao Lu and Jia Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=OmpIgSvg7-Z}\n}", "github": "", "project": "", "reviewers": "Rqm7;gfGV;1rpW", "site": "https://openreview.net/forum?id=OmpIgSvg7-Z", "pdf_size": 917641, "recommendation": "5;6;6", "confidence": "3;3;3", "correctness": "3;4;4", "technical_novelty": "3;3;3", "empirical_novelty": "0;0;3", "wc_summary_paper": "215;60;109", "wc_strength_and_weaknesses": "370;253;219", "wc_clarity_quality_novelty_and_reproducibility": "169;9;39", "wc_summary_review": "174;21;26", "wc_review": "928;343;393", "wc_reply_reviewers": "0;12;730", "wc_reply_authors": "2533;870;2734", "reply_reviewers": "0;1;3", "reply_authors": "4;2;6", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 1.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 128.0, 64.68899958004195 ], "wc_strength_and_weaknesses_avg": [ 280.6666666666667, 64.6752571613665 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 72.33333333333333, 69.44222218666553 ], "wc_summary_review_avg": [ 73.66666666666667, 70.97573920400939 ], "wc_review_avg": [ 554.6666666666666, 264.774537211484 ], "wc_reply_reviewers_avg": [ 247.33333333333334, 341.33203124751645 ], "wc_reply_authors_avg": [ 2045.6666666666667, 835.3619308752079 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 4.0, 1.632993161855452 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SWQ32vjTVjkJ:scholar.google.com/&scioq=Prometheus:+Endowing+Low+Sample+and+Communication+Complexities+to+Constrained+Decentralized+Stochastic+Bilevel+Learning&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Ohio State University;Meta;Wayne State University;IBM", "aff_unique_dep": ";Meta Platforms, Inc.;;Research", "aff_unique_url": "https://www.osu.edu;https://meta.com;https://wayne.edu;https://www.ibm.com/research", "aff_unique_abbr": "OSU;Meta;WSU;IBM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Yorktown Heights", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Diffusion Posterior Sampling for General Noisy Inverse Problems", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11877", "id": "OnD9zGAGT0k", "poster": "", "openreview": "https://openreview.net/forum?id=OnD9zGAGT0k", "slides": "https://iclr.cc/virtual/2023/poster/11877", "video": "https://iclr.cc/virtual/2023/poster/11877", "author_site": "Hyungjin Chung, Jeongsol Kim, Michael McCann, Marc Klasky, Jong Ye", "tldr": "We propose a diffusion model-based general inverse problem solver that scales to nonlinear problems and different noise statistics.", "abstract": "Diffusion models have been recently studied as powerful generative inverse problem solvers, owing to their high quality reconstructions and the ease of combining existing iterative solvers. However, most works focus on solving simple linear inverse problems in noiseless settings, which significantly under-represents the complexity of real-world problems. In this work, we extend diffusion solvers to efficiently handle general noisy (non)linear inverse problems via the Laplace approximation of the posterior sampling. Interestingly, the resulting posterior sampling scheme is a blended version of diffusion sampling with the manifold constrained gradient without a strict measurement consistency projection step, yielding a more desirable generative path in noisy settings compared to the previous studies. Our method demonstrates that diffusion models can incorporate various measurement noise statistics such as Gaussian and Poisson, and also efficiently handle noisy nonlinear inverse problems such as Fourier phase retrieval and non-uniform deblurring.", "keywords": "Diffusion model;Inverse problem;Posterior sampling", "primary_area": "", "supplementary_material": "", "author": "Hyungjin Chung;Jeongsol Kim;Michael Thompson Mccann;Marc Louis Klasky;Jong Chul Ye", "authorids": "~Hyungjin_Chung1;~Jeongsol_Kim1;mccann@lanl.gov;mklasky@lanl.gov;~Jong_Chul_Ye1", "gender": "M;M;;;M", "homepage": "https://www.hj-chung.com/;https://bispl.weebly.com/;;;https://bispl.weebly.com/", "dblp": "262/0382;282/3103;;;15/5613", "google_scholar": "https://scholar.google.co.kr/citations?user=KdchEyoAAAAJ;ZaVNwcQAAAAJ;;;HNMjoNEAAAAJ", "orcid": "0000-0003-3202-0893;;;;", "linkedin": "hyungjin-chung-060b42148/;;;;", "or_profile": "~Hyungjin_Chung1;~Jeongsol_Kim1;mccann@lanl.gov;mklasky@lanl.gov;~Jong_Chul_Ye1", "aff": "NVIDIA;Korea Advanced Institute of Science & Technology;;;Korea Advanced Institute of Science & Technology", "aff_domain": "nvidia.com;kaist.ac.kr;;;kaist.ac.kr", "position": "Intern;PhD student;;;Full Professor", "bibtex": "@inproceedings{\nchung2023diffusion,\ntitle={Diffusion Posterior Sampling for General Noisy Inverse Problems},\nauthor={Hyungjin Chung and Jeongsol Kim and Michael Thompson Mccann and Marc Louis Klasky and Jong Chul Ye},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=OnD9zGAGT0k}\n}", "github": "", "project": "", "reviewers": "ooZZ;VKPQ;dAym;3uLY", "pdf_size": 26982747, "recommendation": "6;6;8;8", "confidence": "3;5;4;4", "correctness": "3;3;4;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;3;2", "wc_summary_paper": "65;91;77;155", "wc_strength_and_weaknesses": "139;375;91;233", "wc_clarity_quality_novelty_and_reproducibility": "61;16;73;27", "wc_summary_review": "50;44;35;32", "wc_review": "315;526;276;447", "wc_reply_reviewers": "76;0;18;6", "wc_reply_authors": "252;843;66;1200", "reply_reviewers": "1;0;1;1", "reply_authors": "1;2;1;2", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 97.0, 34.72751070837067 ], "wc_strength_and_weaknesses_avg": [ 209.5, 108.34551213594405 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.25, 23.466731770743024 ], "wc_summary_review_avg": [ 40.25, 7.1545440106270926 ], "wc_review_avg": [ 391.0, 100.45148082532184 ], "wc_reply_reviewers_avg": [ 25.0, 30.14962686336267 ], "wc_reply_authors_avg": [ 590.25, 454.1279417741216 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 784, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=696239910969416231&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=OnD9zGAGT0k", "email": "nvidia.com;kaist.ac.kr;;;kaist.ac.kr", "author_num": 5, "aff_unique_index": "0;1;1", "aff_unique_norm": "NVIDIA;Korea Advanced Institute of Science and Technology", "aff_unique_dep": "NVIDIA Corporation;", "aff_unique_url": "https://www.nvidia.com;https://www.kaist.ac.kr", "aff_unique_abbr": "NVIDIA;KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;South Korea" }, { "title": "Visual Imitation Learning with Patch Rewards", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12010", "id": "OnM3R47KIiU", "poster": "/media/PosterPDFs/ICLR%202023/12010.png?t=1681994435.8000426", "openreview": "https://openreview.net/forum?id=OnM3R47KIiU", "slides": "https://iclr.cc/virtual/2023/poster/12010", "video": "https://iclr.cc/virtual/2023/poster/12010", "author_site": "Minghuan Liu, Tairan He, Weinan Zhang, shuicheng YAN, Zhongwen Xu", "tldr": "We leverage to learn patch reward and present PatchAIL, an intuitive and principled learning framework for efficient visual imitation learning. ", "abstract": "Visual imitation learning enables reinforcement learning agents to learn to behave from expert visual demonstrations such as videos or image sequences, without explicit, well-defined rewards. \nPrevious reseaches either adopt supervised learning techniques or induce simple and coarse scalar rewards from pixels, neglecting the dense information contained in the image demonstrations.\nIn this work, we propose to measure the expertise of various local regions of image samples, or called patches, and recover multi-dimensional patch rewards accordingly. \nPatch reward is a more precise rewarding characterization that serves as fine-grained expertise measurement and visual explainability tool.\nSpecifically, we present Adversarial Imitation Learning with Patch Rewards (PatchAIL), which employs a patch-based discriminator to measure the expertise of different local parts from given images and provide patch rewards.\nThe patch-based knowledge is also used to regularize the aggregated reward and stabilize the training.\nWe evaluate our method on the standard pixel-based benchmark DeepMind Control Suite. \nThe experiment results have demonstrated that PatchAIL outperforms baseline methods and provides valuable interpretations for visual demonstrations. ", "keywords": "imitation learning;reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/c80c29346e12a6c7b708568ff4bd43c978f634d2.zip", "author": "Minghuan Liu;Tairan He;Weinan Zhang;Shuicheng YAN;Zhongwen Xu", "authorids": "~Minghuan_Liu1;~Tairan_He1;~Weinan_Zhang1;~Shuicheng_YAN3;~Zhongwen_Xu1", "gender": "M;M;M;M;M", "homepage": "http://minghuanliu.com;https://tairanhe.com;http://wnzhang.net;https://yanshuicheng.ai/;https://zhongwen.one/", "dblp": "249/7554;263/2891.html;28/10261-1;y/ShuichengYan;130/5077", "google_scholar": ";TVWH2U8AAAAJ;Qzss0GEAAAAJ;https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ;https://scholar.google.co.uk/citations?user=T4xuHn8AAAAJ", "orcid": ";;0000-0002-0127-2425;;", "linkedin": ";tairan-he-41a904294/;;;", "or_profile": "~Minghuan_Liu1;~Tairan_He1;~Weinan_Zhang1;~Shuicheng_YAN3;~Zhongwen_Xu1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;sea Group;Sea AI Lab", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sea.com;sea.com", "position": "PhD student;Undergrad student;Associate Professor;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nliu2023visual,\ntitle={Visual Imitation Learning with Patch Rewards},\nauthor={Minghuan Liu and Tairan He and Weinan Zhang and Shuicheng YAN and Zhongwen Xu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=OnM3R47KIiU}\n}", "github": "", "project": "", "reviewers": "AJEj;psvi;nR86;52Vk", "pdf_size": 6841361, "recommendation": "5;6;8;8", "confidence": "5;4;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;2;3;4", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "49;130;66;103", "wc_strength_and_weaknesses": "162;202;275;99", "wc_clarity_quality_novelty_and_reproducibility": "31;62;50;82", "wc_summary_review": "131;35;47;126", "wc_review": "373;429;438;410", "wc_reply_reviewers": "164;29;65;0", "wc_reply_authors": "707;579;579;361", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;2;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 87.0, 31.583223394707513 ], "wc_strength_and_weaknesses_avg": [ 184.5, 63.86117756509036 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.25, 18.525320510047862 ], "wc_summary_review_avg": [ 84.75, 43.9907660765302 ], "wc_review_avg": [ 412.5, 24.944939366532843 ], "wc_reply_reviewers_avg": [ 64.5, 61.88901356460612 ], "wc_reply_authors_avg": [ 556.5, 124.3814696809778 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.7777777777777777, "corr_recommendation_correctness": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2338670851075992360&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=OnM3R47KIiU", "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sea.com;sea.com", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Shanghai Jiao Tong University;Sea Group;Sea AI Lab", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;;", "aff_unique_abbr": "SJTU;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "title": "Deep Learning on Implicit Neural Representations of Shapes", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11846", "id": "OoOIW-3uadi", "poster": "/media/PosterPDFs/ICLR%202023/11846.png?t=1680770237.756353", "openreview": "https://openreview.net/forum?id=OoOIW-3uadi", "slides": "https://iclr.cc/virtual/2023/poster/11846", "video": "https://iclr.cc/virtual/2023/poster/11846", "author_site": "Luca De Luigi, Adriano Cardace, Riccardo Spezialetti, Pierluigi Zama Ramirez, Samuele Salti, Luigi Di Stefano", "tldr": "", "abstract": "Implicit Neural Representations (INRs) have emerged in the last few years as a powerful tool to encode continuously a variety of different signals like images, videos, audio and 3D shapes. When applied to 3D shapes, INRs allow to overcome the fragmentation and shortcomings of the popular discrete representations used so far. Yet, considering that INRs consist in neural networks, it is not clear whether and how it may be possible to feed them into deep learning pipelines aimed at solving a downstream task. In this paper, we put forward this research problem and propose inr2vec, a framework that can compute a compact latent representation for an input INR in a single inference pass. We verify that inr2vec can embed effectively the 3D shapes represented by the input INRs and show how the produced embeddings can be fed into deep learning pipelines to solve several tasks by processing exclusively INRs.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Luca De Luigi;Adriano Cardace;Riccardo Spezialetti;Pierluigi Zama Ramirez;Samuele Salti;Luigi di Stefano", "authorids": "~Luca_De_Luigi1;~Adriano_Cardace1;~Riccardo_Spezialetti2;~Pierluigi_Zama_Ramirez1;~Samuele_Salti1;~Luigi_di_Stefano1", "gender": "M;M;M;M;M;M", "homepage": "https://www.unibo.it/sitoweb/luca.deluigi4;https://github.com/adricarda;;https://pierlui92.github.io/;https://www.unibo.it/sitoweb/samuele.salti/en;https://www.unibo.it/sitoweb/luigi.distefano/en", "dblp": "283/4438;;176/1487;228/7804;31/495.html;00/2029", "google_scholar": "PpHLOpQAAAAJ;0uhdTI0AAAAJ;DYADxJAAAAAJ;https://scholar.google.com/citations?hl=it;https://scholar.google.it/citations?user=1kcIJG0AAAAJ;https://scholar.google.it/citations?user=xZVTzyAAAAAJ", "orcid": "0000-0002-2654-7480;;;0000-0001-7734-5064;;0000-0001-6014-6421", "linkedin": ";adriano-cardace/;;pierluigi-zama-ramirez-b02770171/;;", "or_profile": "~Luca_De_Luigi1;~Adriano_Cardace1;~Riccardo_Spezialetti2;~Pierluigi_Zama_Ramirez1;~Samuele_Salti1;~Luigi_di_Stefano1", "aff": "Alma Mater Studiorium - Universit\u00e0 di Bologna;University of Bologna;Eyecan.ai;;University of Bologna;University of Bologna", "aff_domain": "unibo.it;unibo.it;eyecan.ai;;unibo.it;unibo.it", "position": "PhD student;PhD student;Researcher;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nluigi2023deep,\ntitle={Deep Learning on Implicit Neural Representations of Shapes},\nauthor={Luca De Luigi and Adriano Cardace and Riccardo Spezialetti and Pierluigi Zama Ramirez and Samuele Salti and Luigi di Stefano},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=OoOIW-3uadi}\n}", "github": "", "project": "", "reviewers": "HTw1;shEg;rRwy;VZbJ", "pdf_size": 16016865, "recommendation": "6;6;8;8", "confidence": "4;4;4;4", "correctness": "3;3;4;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;3;3;4", "wc_summary_paper": "102;68;133;74", "wc_strength_and_weaknesses": "330;140;645;424", "wc_clarity_quality_novelty_and_reproducibility": "32;20;124;48", "wc_summary_review": "90;30;59;44", "wc_review": "554;258;961;590", "wc_reply_reviewers": "133;47;117;72", "wc_reply_authors": "898;691;716;787", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 94.25, 25.791229129298976 ], "wc_strength_and_weaknesses_avg": [ 384.75, 181.77647675098117 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.0, 40.496913462633174 ], "wc_summary_review_avg": [ 55.75, 22.275266552838374 ], "wc_review_avg": [ 590.75, 249.57902055260976 ], "wc_reply_reviewers_avg": [ 92.25, 34.390223901568305 ], "wc_reply_authors_avg": [ 773.0, 80.30255288594503 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6951912100890865248&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=OoOIW-3uadi", "email": "unibo.it;unibo.it;eyecan.ai;;unibo.it;unibo.it", "author_num": 6, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Universit\u00e0 di Bologna;University of Bologna;Eyecan.ai", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unibo.it;https://www.unibo.it;https://www.eyecan.ai", "aff_unique_abbr": "Unibo;Unibo;Eyecan.ai", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Italy;South Korea" }, { "title": "Sample-Efficient Reinforcement Learning by Breaking the Replay Ratio Barrier", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11457", "id": "OpC-9aBBVJe", "poster": "/media/PosterPDFs/ICLR%202023/11457.png?t=1682549355.6699984", "openreview": "https://openreview.net/forum?id=OpC-9aBBVJe", "slides": "https://iclr.cc/virtual/2023/poster/11457", "video": "https://iclr.cc/virtual/2023/poster/11457", "author_site": "Pierluca D'Oro, Max Schwarzer, Evgenii Nikishin, Pierre-Luc Bacon, Marc G Bellemare, Aaron Courville", "tldr": "The combination of a large number of updates and resets drastically improves the sample efficiency of deep RL algorithms.", "abstract": "Increasing the replay ratio, the number of updates of an agent's parameters per environment interaction, is an appealing strategy for improving the sample efficiency of deep reinforcement learning algorithms. In this work, we show that fully or partially resetting the parameters of deep reinforcement learning agents causes better replay ratio scaling capabilities to emerge. We push the limits of the sample efficiency of carefully-modified algorithms by training them using an order of magnitude more updates than usual, significantly improving their performance in the Atari 100k and DeepMind Control Suite benchmarks. We then provide an analysis of the design choices required for favorable replay ratio scaling to be possible and discuss inherent limits and tradeoffs.", "keywords": "reinforcement learning;sample efficiency;resets", "primary_area": "", "supplementary_material": "", "author": "Pierluca D'Oro;Max Schwarzer;Evgenii Nikishin;Pierre-Luc Bacon;Marc G Bellemare;Aaron Courville", "authorids": "~Pierluca_D'Oro1;~Max_Schwarzer1;~Evgenii_Nikishin1;~Pierre-Luc_Bacon1;~Marc_G_Bellemare1;~Aaron_Courville3", "gender": "M;;M;;M;", "homepage": "https://proceduralia.github.io;;http://evgenii-nikishin.github.io/;;http://www.marcgbellemare.info;", "dblp": "248/8326;;294/4770;;38/4525;56/1688", "google_scholar": "https://scholar.google.it/citations?user=AuVp7pkAAAAJ;YmWRSvgAAAAJ;ez9FSEAAAAAJ;;https://scholar.google.co.uk/citations?user=uyYPun0AAAAJ;https://scholar.google.ca/citations?user=km6CP8cAAAAJ", "orcid": ";;;;;", "linkedin": ";maxaschwarzer/;;;;", "or_profile": "~Pierluca_D'Oro1;~Max_Schwarzer1;~Evgenii_Nikishin1;~Pierre-Luc_Bacon1;~Marc_G_Bellemare1;~Aaron_Courville3", "aff": "Universit\u00e9 de Montr\u00e9al;University of Montreal;University of Montreal;;Google;Universit\u00e9 de Montr\u00e9al", "aff_domain": "umontreal.ca;umontreal.ca;umontreal.ca;;google.com; ", "position": "PhD student;PhD student;PhD student;;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nd'oro2023sampleefficient,\ntitle={Sample-Efficient Reinforcement Learning by Breaking the Replay Ratio Barrier},\nauthor={Pierluca D'Oro and Max Schwarzer and Evgenii Nikishin and Pierre-Luc Bacon and Marc G Bellemare and Aaron Courville},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=OpC-9aBBVJe}\n}", "github": "", "project": "", "reviewers": "X4J7;MFRL;qf9b", "pdf_size": 3687899, "recommendation": "8;8;8", "confidence": "5;4;4", "correctness": "4;3;4", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;4", "wc_summary_paper": "35;128;62", "wc_strength_and_weaknesses": "429;583;100", "wc_clarity_quality_novelty_and_reproducibility": "32;7;102", "wc_summary_review": "45;103;29", "wc_review": "541;821;293", "wc_reply_reviewers": "274;20;35", "wc_reply_authors": "595;382;379", "reply_reviewers": "2;1;1", "reply_authors": "2;1;2", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 75.0, 39.06404996924922 ], "wc_strength_and_weaknesses_avg": [ 370.6666666666667, 201.45195181206086 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.0, 40.2077936060494 ], "wc_summary_review_avg": [ 59.0, 31.790984046843636 ], "wc_review_avg": [ 551.6666666666666, 215.68701604150607 ], "wc_reply_reviewers_avg": [ 109.66666666666667, 116.36246053698855 ], "wc_reply_authors_avg": [ 452.0, 101.1236866416568 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 116, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17016721433824749072&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=OpC-9aBBVJe", "email": "umontreal.ca;umontreal.ca;umontreal.ca;;google.com; ", "author_num": 6, "aff_unique_index": "0;1;1;2;0", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;University of Montreal;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.umontreal.ca;https://wwwumontreal.ca;https://www.google.com", "aff_unique_abbr": "UdeM;UM;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Canada;United States" }, { "id": "Opcegzztjay", "title": "Causal Explanations of Structural Causal Models", "track": "main", "status": "Reject", "tldr": "As a step towards causal XIL, we propose a solution to the lack of truly causal explanations from existing methods.", "abstract": "In explanatory interactive learning (XIL) the user queries the learner, then the learner explains its answer to the user and finally the loop repeats. XIL is attractive for two reasons, (1) the learner becomes better and (2) the user's trust increases. For both reasons to hold, the learner's explanations must be useful to the user and the user must be allowed to ask useful questions. Ideally, both questions and explanations should be grounded in a causal model since they avoid spurious fallacies. Ultimately, we seem to seek a causal variant of XIL. The question part on the user's end we believe to be solved since the user's mental model can provide the causal model. But how would the learner provide causal explanations? In this work we show that existing explanation methods are not guaranteed to be causal even when provided with a Structural Causal Model (SCM). Specifically, we use the popular, proclaimed causal explanation method CXPlain to illustrate how the generated explanations leave open the question of truly causal explanations. Thus as a step towards causal XIL, we propose a solution to the lack of causal explanations. We solve this problem by deriving from first principles an explanation method that makes full use of a given SCM, which we refer to as SC$\\textbf{E}$ ($\\textbf{E}$ standing for explanation). Since SCEs make use of structural information, any causal graph learner can now provide human-readable explanations. We conduct several experiments including a user study with 22 participants to investigate the virtue of SCE as causal explanations of SCMs.", "keywords": "explanatory interactive learning;explainable artificial intelligence;causal explanations;structural causal models;user study", "primary_area": "", "supplementary_material": "", "author": "Matej Ze\u010devi\u0107;Devendra Singh Dhami;Constantin A. Rothkopf;Kristian Kersting", "authorids": "~Matej_Ze\u010devi\u01071;~Devendra_Singh_Dhami1;~Constantin_A._Rothkopf1;~Kristian_Kersting1", "gender": "M;M;M;M", "homepage": "https://sites.google.com/view/devendradhami;http://www.ml.informatik.tu-darmstadt.de/;https://www.pip.tu-darmstadt.de;https://www.matej-zecevic.de", "dblp": "201/2130;40/3793;71/5555;286/1847", "google_scholar": "aVlaHfkAAAAJ;QY-earAAAAAJ;https://scholar.google.com/citations?hl=en;gzJZcPUAAAAJ", "orcid": ";0000-0002-2873-9152;;", "linkedin": ";;;", "or_profile": "~Devendra_Singh_Dhami1;~Kristian_Kersting1;~Constantin_Rothkopf1;~Matej_Zecevic1", "aff": "CS Department, TU Darmstadt, TU Darmstadt;TU Darmstadt;Technische Universit\u00e4t Darmstadt;TU Darmstadt", "aff_domain": "cs.tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de", "position": "Postdoctoral researcher;Full Professor;Full Professor;PhD student", "bibtex": "@misc{\nze{\\v{c}}evi{\\'c}2023causal,\ntitle={Causal Explanations of Structural Causal Models},\nauthor={Matej Ze{\\v{c}}evi{\\'c} and Devendra Singh Dhami and Constantin A. Rothkopf and Kristian Kersting},\nyear={2023},\nurl={https://openreview.net/forum?id=Opcegzztjay}\n}", "github": "", "project": "", "reviewers": "9oUq;vYZo;Upiw;j3te", "site": "https://openreview.net/forum?id=Opcegzztjay", "pdf_size": 1427922, "recommendation": "3;3;6;8", "confidence": "4;4;3;4", "correctness": "1;2;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;1;2;3", "wc_summary_paper": "48;429;74;202", "wc_strength_and_weaknesses": "522;2062;138;481", "wc_clarity_quality_novelty_and_reproducibility": "19;281;37;161", "wc_summary_review": "24;94;29;48", "wc_review": "613;2866;278;892", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "503;3052;283;471", "reply_reviewers": "0;0;0;0", "reply_authors": "1;5;1;1", "recommendation_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 188.25, 150.7255369869353 ], "wc_strength_and_weaknesses_avg": [ 800.75, 743.2917916269491 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 124.5, 105.60658123431513 ], "wc_summary_review_avg": [ 48.75, 27.616797424755827 ], "wc_review_avg": [ 1162.25, 1007.3942562373483 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1077.25, 1143.216159569134 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.2721655269759087, "corr_recommendation_correctness": 0.8528028654224419, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9756157599610554250&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.tu-darmstadt.de", "aff_unique_abbr": "TU Darmstadt", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "OpzV3lp3IMC", "title": "Self-conditioned Embedding Diffusion for Text Generation", "track": "main", "status": "Reject", "tldr": "Our continuous diffusion framework operates on word embeddings, enabling flexible and scalable diffusion models for text generation.", "abstract": "Can continuous diffusion models bring the same performance breakthrough on natural language they did for image generation? To circumvent the discrete nature of text data, we can simply project tokens in a continuous space of embeddings, as is standard in language modeling. We propose Self-conditioned Embedding Diffusion (SED), a continuous diffusion mechanism that operates on token embeddings and allows to learn flexible and scalable diffusion models for both conditional and unconditional text generation. Through qualitative and quantitative evaluation, we show that our text diffusion models generate samples comparable with those produced by standard autoregressive language models \u2014 while being in theory more efficient on accelerator hardware at inference time. Our work paves the way for scaling up diffusion models for text, similarly to autoregressive models, and for improving performance with recent refinements to continuous diffusion.", "keywords": "language models;diffusion models;generative models", "primary_area": "", "supplementary_material": "", "author": "Robin Strudel;Corentin Tallec;Florent Altch\u00e9;Yilun Du;Yaroslav Ganin;Arthur Mensch;Will Sussman Grathwohl;Nikolay Savinov;Sander Dieleman;Laurent Sifre;R\u00e9mi Leblond", "authorids": "~Robin_Strudel1;~Corentin_Tallec2;~Florent_Altch\u00e91;~Yilun_Du1;~Yaroslav_Ganin1;~Arthur_Mensch1;~Will_Sussman_Grathwohl2;~Nikolay_Savinov1;~Sander_Dieleman1;~Laurent_Sifre1;~R\u00e9mi_Leblond1", "gender": "M;M;;;;M;M;M;M;M;M", "homepage": "https://rstrudel.github.io;;;https://yilundu.github.io;http://yaroslav.ganin.net;http://www.cs.toronto.edu/~wgrathwohl/;https://www.nsavinov.com;http://benanne.github.io/;http://www.cmap.polytechnique.fr/~sifre/;http://www.di.ens.fr/~rleblond/;https://amensch.fr", "dblp": "238/0225;;177/8921.html;204/4379;http://dblp.uni-trier.de/pers/hd/g/Ganin:Yaroslav;192/1565;151/8855;https://dblp.org/pers/d/Dieleman:Sander.html;http://dblp.uni-trier.de/pers/hd/s/Sifre:Laurent;182/2062;156/2229", "google_scholar": "5d3RVLIAAAAJ;OPKX4GgLCxIC;;;https://scholar.google.ca/citations?user=NxaTlNcAAAAJ;;https://scholar.google.ch/citations?user=qUIOyQYAAAAJ;https://scholar.google.co.uk/citations?user=yNNIKJsAAAAJ;https://scholar.google.co.uk/citations?user=0kVh58wAAAAJ;https://scholar.google.fr/citations?user=6UPPnIQAAAAJ;https://scholar.google.fr/citations?user=F8riAN8AAAAJ", "orcid": ";;;;;;;;;;", "linkedin": ";;;;;will-grathwohl-b44a383b/;;;sifre/;;", "or_profile": "~Robin_Strudel1;~Corentin_Tallec2;~Florent_Altch\u00e91;~Yilun_Du1;~Yaroslav_Ganin1;~Will_Sussman_Grathwohl2;~Nikolay_Savinov1;~Sander_Dieleman1;~Laurent_Sifre1;~R\u00e9mi_Leblond1;~Arthur_Mensch2", "aff": ";Google DeepMind;Google DeepMind;Massachusetts Institute of Technology;Google DeepMind;Google DeepMind;Google;Google DeepMind;;Google DeepMind;Google DeepMind", "aff_domain": ";deepmind.com;deepmind.com;mit.edu;google.com;deepmind.com;google.com;deepmind.com;;google.com;deepmind.com", "position": ";Research Scientist;Researcher;PhD student;Research Scientist;Senior Research Scientist;Research Scientist;Research Scientist;;Research scientist;Researcher", "bibtex": "@misc{\nstrudel2023selfconditioned,\ntitle={Self-conditioned Embedding Diffusion for Text Generation},\nauthor={Robin Strudel and Corentin Tallec and Florent Altch{\\'e} and Yilun Du and Yaroslav Ganin and Arthur Mensch and Will Sussman Grathwohl and Nikolay Savinov and Sander Dieleman and Laurent Sifre and R{\\'e}mi Leblond},\nyear={2023},\nurl={https://openreview.net/forum?id=OpzV3lp3IMC}\n}", "github": "", "project": "", "reviewers": "gGpy;Sxgy;U4Mu;avZf", "site": "https://openreview.net/forum?id=OpzV3lp3IMC", "pdf_size": 964066, "recommendation": "5;5;5;6", "confidence": "5;4;4;3", "correctness": "4;4;4;3", "technical_novelty": "3;2;2;3", "empirical_novelty": "2;2;4;3", "wc_summary_paper": "90;161;40;149", "wc_strength_and_weaknesses": "330;215;277;245", "wc_clarity_quality_novelty_and_reproducibility": "76;20;66;17", "wc_summary_review": "29;96;58;53", "wc_review": "525;492;441;464", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 110.0, 48.53349358947901 ], "wc_strength_and_weaknesses_avg": [ 266.75, 42.59327998640161 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.75, 26.508253431714433 ], "wc_summary_review_avg": [ 59.0, 24.01041440708594 ], "wc_review_avg": [ 480.5, 31.40461749488441 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 11, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": -1.0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17283009902737173288&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;0;0;0;0;0;0", "aff_unique_norm": "Google;Massachusetts Institute of Technology", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://web.mit.edu", "aff_unique_abbr": "DeepMind;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;0;0;1;0;0;0", "aff_country_unique": "United Kingdom;United States" }, { "id": "OqPD_6kukm", "title": "Self-supervised Speech Enhancement using Multi-Modal Data", "track": "main", "status": "Reject", "tldr": "Using clean low resolution IMU data to supervise the multimodal denoiser", "abstract": "Modern earphones come equipped with microphones and inertial measurement units (IMU). When a user wears the earphone, the IMU can serve as a second modality for detecting speech signals. Specifically, as humans speak to their earphones (e.g., during phone calls), the throat\u2019s vibrations propagate through the skull to ultimately induce a vibration in the IMU. The IMU data is heavily distorted\n(compared to the microphone\u2019s recordings), but IMUs offer a critical advantage \u2014 they are not interfered by ambient sounds. This presents an opportunity in multi-modal speech enhancement, i.e., can the distorted but uninterfered IMU signal enhance the user\u2019s speech when the microphone\u2019s signal suffers from strong ambient interference?\nWe combine the best of both modalities (microphone and IMU) by designing a cooperative and self-supervised network architecture that does not rely on clean speech data from the user. Instead, using only noisy speech recordings, the IMU learns to give hints on where the target speech is likely located. The microphone uses this hint to enrich the speech signal, which then trains the IMU to improve subsequent hints. This iterative approach yields promising results, comparable to a supervised denoiser trained on clean speech signals. When clean signals are also available to our architecture, we observe promising SI-SNR improvement. We believe this result can aid speech-related applications in earphones and hearing aids, and potentially generalize to others, like audio-visual denoising.", "keywords": "multi-modal;selfsupervise;denoising;iterative algorithm;attention map;expectation maximization;IMU", "primary_area": "", "supplementary_material": "", "author": "Yu-Lin Wei;Bashima Islam;RAJALAXMI RAJAGOPALAN;romit choudhury", "authorids": "~Yu-Lin_Wei1;bislam@wpi.edu;~RAJALAXMI_RAJAGOPALAN1;~romit_choudhury1", "gender": "M;;F;M", "homepage": "https://yulinlw2.web.illinois.edu/;;https://rrajalaxmi.web.illinois.edu/;http://croy.web.engr.illinois.edu/", "dblp": ";;;19/6964", "google_scholar": "WTZ-8hwAAAAJ;;;https://scholar.google.com.tw/citations?user=dq2wG-AAAAAJ", "orcid": ";;;", "linkedin": ";;rajalaxmi-rajagopalan;", "or_profile": "~Yu-Lin_Wei1;bislam@wpi.edu;~RAJALAXMI_RAJAGOPALAN1;~romit_choudhury1", "aff": "University of Illinois, Urbana Champaign;;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;;illinois.edu;illinois.edu", "position": "PhD student;;PhD student;Full Professor", "bibtex": "@misc{\nwei2023selfsupervised,\ntitle={Self-supervised Speech Enhancement using Multi-Modal Data},\nauthor={Yu-Lin Wei and Bashima Islam and RAJALAXMI RAJAGOPALAN and romit choudhury},\nyear={2023},\nurl={https://openreview.net/forum?id=OqPD_6kukm}\n}", "github": "", "project": "", "reviewers": "uCfP;dV4m;euGt;L1Xs;NmkT", "site": "https://openreview.net/forum?id=OqPD_6kukm", "pdf_size": 1353160, "recommendation": "3;5;5;5;5", "confidence": "4;4;2;4;4", "correctness": "2;3;4;3;3", "technical_novelty": "3;3;3;1;3", "empirical_novelty": "3;3;3;2;3", "wc_summary_paper": "71;353;131;61;201", "wc_strength_and_weaknesses": "223;751;120;286;54", "wc_clarity_quality_novelty_and_reproducibility": "300;124;54;43;64", "wc_summary_review": "75;69;78;38;341", "wc_review": "669;1297;383;428;660", "wc_reply_reviewers": "33;256;37;0;236", "wc_reply_authors": "583;587;215;687;715", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 4.6, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.8 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.6, 0.8 ], "empirical_novelty_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_paper_avg": [ 163.4, 107.17760960200596 ], "wc_strength_and_weaknesses_avg": [ 286.8, 245.5894134526161 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 117.0, 95.69952977940905 ], "wc_summary_review_avg": [ 120.2, 111.31468905764415 ], "wc_review_avg": [ 687.4, 326.38909295501895 ], "wc_reply_reviewers_avg": [ 112.4, 110.01927103921386 ], "wc_reply_authors_avg": [ 557.4, 179.11069203149208 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.25, "corr_recommendation_correctness": 0.790569415042095, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12168381357125251358&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Or8rcTLo7U", "title": "Maximal Correlation-Based Post-Nonlinear Learning for Bivariate Causal Discovery", "track": "main", "status": "Reject", "tldr": "", "abstract": "Bivariate causal discovery aims to determine the causal relationship between two random variables from passive observational data (as intervention is not affordable in many scientific fields), which is considered fundamental and challenging. Designing algorithms based on the post-nonlinear (PNL) model has aroused much attention for its generality. However, the state-of-the-art (SOTA) PNL-based algorithms involve highly non-convex objectives for neural network training, which are time-consuming and unable to produce meaningful solutions with finite samples. In this paper, we propose a novel method that incorporates maximal correlation into the PNL model learning (short as MC-PNL) such that the underlying nonlinearities can be accurately recovered. Owing to the benign structure of our objective function when modeling the nonlinearities with linear combinations of random Fourier features, the target optimization problem can be solved rather efficiently and rapidly via the block coordinate descent. We also compare the MC-PNL with SOTA methods on the downstream synthetic and real causal discovery tasks to show its superiority in time and accuracy. Our code is available at https://anonymous.4open.science/r/MC-PNL-E446/.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/6f66723e7ce51a8c7159e8d7d313741db092010e.zip", "author": "Tianjian Zhang;Feng Yin;Zhi-Quan Luo", "authorids": "~Tianjian_Zhang1;~Feng_Yin1;~Zhi-Quan_Luo1", "gender": "M;M;M", "homepage": "https://sse.cuhk.edu.cn/en/teacher/309;https://sse.cuhk.edu.cn/en/faculty/yinfeng;", "dblp": "190/6500;59/6917;", "google_scholar": ";4mW1N5oAAAAJ;dW3gcXoAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Tianjian_Zhang1;~Feng_Yin1;~Zhi-Quan_Luo1", "aff": "The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nzhang2023maximal,\ntitle={Maximal Correlation-Based Post-Nonlinear Learning for Bivariate Causal Discovery},\nauthor={Tianjian Zhang and Feng Yin and Zhi-Quan Luo},\nyear={2023},\nurl={https://openreview.net/forum?id=Or8rcTLo7U}\n}", "github": "", "project": "", "reviewers": "9qg4;7wPL;E1Jp;2uGF", "site": "https://openreview.net/forum?id=Or8rcTLo7U", "pdf_size": 2589355, "recommendation": "3;3;6;6", "confidence": "5;5;4;3", "correctness": "3;3;3;3", "technical_novelty": "2;3;3;2", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "31;163;37;87", "wc_strength_and_weaknesses": "32;725;347;127", "wc_clarity_quality_novelty_and_reproducibility": "18;25;11;16", "wc_summary_review": "203;67;45;43", "wc_review": "284;980;440;273", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "450;766;484;180", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 79.5, 52.88430769141258 ], "wc_strength_and_weaknesses_avg": [ 307.75, 266.62086846306687 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 17.5, 5.024937810560445 ], "wc_summary_review_avg": [ 89.5, 66.20234134832393 ], "wc_review_avg": [ 494.25, 288.1200921490898 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 470.0, 207.55240302150202 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9045340337332909, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MvFfb4WsSiYJ:scholar.google.com/&scioq=Maximal+Correlation-Based+Post-Nonlinear+Learning+for+Bivariate+Causal+Discovery&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.cn", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Shenzhen;Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "BC-IRL: Learning Generalizable Reward Functions from Demonstrations", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10896", "id": "Ovnwe_sDQW", "poster": "", "openreview": "https://openreview.net/forum?id=Ovnwe_sDQW", "slides": "https://iclr.cc/virtual/2023/poster/10896", "video": "https://iclr.cc/virtual/2023/poster/10896", "author_site": "Andrew Szot, Amy Zhang, Dhruv Batra, Zsolt Kira, Franziska Meier", "tldr": "", "abstract": "How well do reward functions learned with inverse reinforcement learning (IRL) generalize? We illustrate that state-of-the-art IRL algorithms, which maximize a maximum-entropy objective, learn rewards that overfit to the demonstrations. Such rewards struggle to provide meaningful rewards for states not covered by the demonstrations, a major detriment when using the reward to learn policies in new situations. We introduce BC-IRL a new inverse reinforcement learning method that learns reward functions that generalize better when compared to maximum-entropy IRL approaches. In contrast to the MaxEnt framework, which learns to maximize rewards around demonstrations, BC-IRL updates reward parameters such that the policy trained with the new reward matches the expert demonstrations better. We show that BC-IRL learns rewards that generalize better on an illustrative simple task and two continuous robotic control tasks, achieving over twice the success rate of baselines in challenging generalization settings.", "keywords": "inverse reinforcement learning;reward learning;reinforcement learning;imitation learning", "primary_area": "", "supplementary_material": "/attachment/bd04a0d3d1c932ab9fcbdde395e15b935331669f.zip", "author": "Andrew Szot;Amy Zhang;Dhruv Batra;Zsolt Kira;Franziska Meier", "authorids": "~Andrew_Szot1;~Amy_Zhang1;~Dhruv_Batra1;~Zsolt_Kira1;~Franziska_Meier2", "gender": "M;;Not Specified;M;", "homepage": "https://www.andrewszot.com;;https://dhruvbatra.com;https://faculty.cc.gatech.edu/~zk15;", "dblp": ";;67/6586;36/4127;", "google_scholar": "IwIWKPYAAAAJ;;_bs7PqgAAAAJ;2a5XgNAAAAAJ;", "orcid": ";;;0000-0002-2626-2004;", "linkedin": ";;;;", "or_profile": "~Andrew_Szot1;~Amy_Zhang1;~Dhruv_Batra1;~Zsolt_Kira1;~Franziska_Meier2", "aff": "Georgia Institute of Technology;;Georgia Institute of Technology;Georgia Tech Research Institute;", "aff_domain": "gatech.edu;;gatech.edu;gtri.gatech.edu;", "position": "PhD student;;Associate Professor;Senior Research Scientist;", "bibtex": "@inproceedings{\nszot2023bcirl,\ntitle={{BC}-{IRL}: Learning Generalizable Reward Functions from Demonstrations},\nauthor={Andrew Szot and Amy Zhang and Dhruv Batra and Zsolt Kira and Franziska Meier},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Ovnwe_sDQW}\n}", "github": "", "project": "", "reviewers": "pza4;9FVR;RnMY", "pdf_size": 3416179, "recommendation": "3;8;8", "confidence": "4;2;4", "correctness": "2;1;4", "technical_novelty": "2;4;4", "empirical_novelty": "2;3;4", "wc_summary_paper": "52;92;80", "wc_strength_and_weaknesses": "185;176;120", "wc_clarity_quality_novelty_and_reproducibility": "16;24;24", "wc_summary_review": "15;63;58", "wc_review": "268;355;282", "wc_reply_reviewers": "0;58;0", "wc_reply_authors": "485;406;113", "reply_reviewers": "0;1;0", "reply_authors": "2;2;1", "recommendation_avg": [ 6.333333333333333, 2.357022603955158 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 2.3333333333333335, 1.247219128924647 ], "technical_novelty_avg": [ 3.3333333333333335, 0.9428090415820634 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 74.66666666666667, 16.75974011996871 ], "wc_strength_and_weaknesses_avg": [ 160.33333333333334, 28.75567576825293 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 21.333333333333332, 3.7712361663282534 ], "wc_summary_review_avg": [ 45.333333333333336, 21.545816814923082 ], "wc_review_avg": [ 301.6666666666667, 38.14300576631172 ], "wc_reply_reviewers_avg": [ 19.333333333333332, 27.34146220587984 ], "wc_reply_authors_avg": [ 334.6666666666667, 160.0256923816367 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5000000000000001, "corr_recommendation_correctness": 0.18898223650461368, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9795488566665516522&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=Ovnwe_sDQW", "email": "gatech.edu;;gatech.edu;gtri.gatech.edu;", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "Georgia Institute of Technology;Georgia Tech Research Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.gtri.gatech.edu", "aff_unique_abbr": "Georgia Tech;GTRI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Ox0ZtZKG9_-", "title": "Cross-Domain Autonomous Driving Perception using Contrastive Appearance Adaptation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Addressing domain shifts for complex perception tasks in autonomous driving has long been a challenging problem. In this paper, we show that existing domain adaptation methods pay little attention to the \\textit{content mismatch} issue between source and target images, thereby weakening the domain adaptation performance and the decoupling of domain-invariant and domain-specific representations. To solve the aforementioned problems, we propose an image-level domain adaptation framework that aims at adapting source-domain images to the target domain with content-aligned image pairs. Our framework consists of three mutual-beneficial modules in a cycle: a \\textit{cross-domain content alignment} module to generate source-target pairs with consistent content representations in a self-supervised manner, \\textit{a reference-guided image synthesis} using the generated content-aligned source-target image pairs, and a \\textit{contrastive learning} module to self-supervise domain-invariant feature extractor from the generated images. Our contrastive appearance adaptation is task-agnostic and robust to complex perception tasks in autonomous driving. Our proposed method demonstrates state-of-the-art results in cross-domain object detection, semantic segmentation, and depth estimation as well as better image synthesis ability qualitatively and quantitatively.", "keywords": "Domain adaptation;Object detection;Semantic segmentation;Depth estimation", "primary_area": "", "supplementary_material": "", "author": "Ziqiang Zheng;Yingshu Chen;Binh-Son Hua;Yang Wu;Sai-Kit Yeung", "authorids": "~Ziqiang_Zheng2;~Yingshu_Chen1;~Binh-Son_Hua1;~Yang_Wu1;~Sai-Kit_Yeung3", "gender": "F;M;M;M;M", "homepage": ";https://sonhua.github.io;;http://www.saikit.org/;https://zhengziqiang.github.io/", "dblp": "298/7733;44/8499;56/1428-1;144/7479;", "google_scholar": "t-tYhTMAAAAJ;sV_VjsAAAAAJ;https://scholar.google.com.hk/citations?user=vwOQ-UIAAAAJ;https://scholar.google.com.tw/citations?user=16iMMwwAAAAJ;bjvs9i0AAAAJ", "orcid": "0000-0001-5418-2813;0000-0002-5706-8634;;;", "linkedin": "yingshu-chen-a0550aa7/;binh-son-hua-40895b14/;;;", "or_profile": "~Yingshu_Chen1;~Binh-Son_Hua1;~Yang_Wu1;~Sai-kit_Yeung1;~Zheng_Ziqiang1", "aff": "Hong Kong University of Science and Technology;VinAI Research;Tencent AI Lab;;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;vinai.io;tencent.com;;ust.hk", "position": "PhD student;Research Scientist;Principal Researcher;;PhD student", "bibtex": "@misc{\nzheng2023crossdomain,\ntitle={Cross-Domain Autonomous Driving Perception using Contrastive Appearance Adaptation},\nauthor={Ziqiang Zheng and Yingshu Chen and Binh-Son Hua and Yang Wu and Sai-Kit Yeung},\nyear={2023},\nurl={https://openreview.net/forum?id=Ox0ZtZKG9_-}\n}", "github": "", "project": "", "reviewers": "NfAp;9QXT;Pzi2;dioB", "site": "https://openreview.net/forum?id=Ox0ZtZKG9_-", "pdf_size": 51897943, "recommendation": "3;5;5;6", "confidence": "5;3;3;4", "correctness": "2;3;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "76;50;95;245", "wc_strength_and_weaknesses": "904;58;121;473", "wc_clarity_quality_novelty_and_reproducibility": "65;9;136;84", "wc_summary_review": "90;67;61;61", "wc_review": "1135;184;413;863", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 116.5, 75.88972262434486 ], "wc_strength_and_weaknesses_avg": [ 389.0, 336.7736628657295 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.5, 45.41200281863816 ], "wc_summary_review_avg": [ 69.75, 11.94518731540029 ], "wc_review_avg": [ 648.75, 372.12657456838525 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.6225430174794673, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18404023882794579912&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Hong Kong University of Science and Technology;VinAI Research;Tencent", "aff_unique_dep": ";;Tencent AI Lab", "aff_unique_url": "https://www.ust.hk;https://www.vinai.io/;https://ai.tencent.com", "aff_unique_abbr": "HKUST;VinAI;Tencent AI Lab", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;Vietnam" }, { "id": "OxBl7cSgo6_", "title": "Heterogeneous-Agent Mirror Learning", "track": "main", "status": "Reject", "tldr": "A general theoretical framework for development of multi-agent reinforcement learning algorithms.", "abstract": "The necessity for cooperation among intelligent machines has popularised cooperative multi-agent reinforcement learning (MARL) in the artificial intelligence (AI) research community. However, many research endeavours have been focused on developing practical MARL algorithms whose effectiveness has been studied only empirically, thereby lacking theoretical guarantees. As recent studies have revealed, MARL methods often achieve performance that is unstable in terms of reward monotonicity or suboptimal at convergence. To resolve these issues, in this paper, we introduce a novel framework named Heterogeneous-Agent Mirror Learning (HAML) that provides a general template for MARL algorithmic designs. We prove that algorithms derived from the HAML template satisfy the desired properties of the monotonic improvement of the joint reward and the convergence to Nash equilibrium. We verify the practicality of HAML by proving that the current state-of-the-art cooperative MARL algorithms, HATRPO and HAPPO, are in fact HAML instances. Next, as a natural outcome of our theory, we propose HAML extensions of two well-known RL algorithms, HAA2C (for A2C) and HADDPG (for DDPG), and demonstrate their effectiveness against strong baselines on StarCraftII and Multi-Agent MuJoCo tasks.", "keywords": "deep multi-agent reinforcement learning;multi-agent reinforcement learning theory", "primary_area": "", "supplementary_material": "", "author": "Jakub Grudzien Kuba;Xidong Feng;Shiyao Ding;Hao Dong;Yaodong Yang", "authorids": "~Jakub_Grudzien_Kuba1;~Xidong_Feng1;~Shiyao_Ding1;~Hao_Dong2;~Yaodong_Yang1", "gender": ";M;M;M;M", "homepage": "https://waterhorse1.github.io/;https://www.dingshiyao.com;https://www.yangyaodong.com;https://zsdonghao.github.io;", "dblp": ";241/5574.html;170/1496-1;14/1525-3.html;", "google_scholar": "JfOLNu8AAAAJ;;https://scholar.google.co.uk/citations?user=6yL0xw8AAAAJ;xLFL4sMAAAAJ;", "orcid": ";;0000-0001-8132-5613;0000-0003-2261-9122;", "linkedin": ";;yaodong-yang;;kuba-grudzie%C5%84-58039114b/", "or_profile": "~Xidong_Feng1;~Shiyao_Ding1;~Yaodong_Yang1;~Hao_Dong3;~Jakub_Grudzien1", "aff": "University College London;Kyoto University;Peking University;Peking University;University of California, Berkeley", "aff_domain": "ucl.ac.uk;kyoto-u.ac.jp;pku.edu.cn;pku.edu.cn;berkeley.edu", "position": "PhD student;Assistant Professor;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@misc{\nkuba2023heterogeneousagent,\ntitle={Heterogeneous-Agent Mirror Learning},\nauthor={Jakub Grudzien Kuba and Xidong Feng and Shiyao Ding and Hao Dong and Yaodong Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=OxBl7cSgo6_}\n}", "github": "", "project": "", "reviewers": "iCs6;Axzm;iVJ4;T26V", "site": "https://openreview.net/forum?id=OxBl7cSgo6_", "pdf_size": 669793, "recommendation": "3;6;6;8", "confidence": "4;4;3;2", "correctness": "3;4;3;3", "technical_novelty": "2;3;3;4", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "125;58;37;50", "wc_strength_and_weaknesses": "650;253;92;134", "wc_clarity_quality_novelty_and_reproducibility": "328;144;14;14", "wc_summary_review": "10;50;52;45", "wc_review": "1113;505;195;243", "wc_reply_reviewers": "262;0;0;0", "wc_reply_authors": "965;516;394;190", "reply_reviewers": "2;0;0;0", "reply_authors": "3;1;1;1", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 67.5, 34.03307215048327 ], "wc_strength_and_weaknesses_avg": [ 282.25, 220.37964402367112 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 125.0, 128.65846260545786 ], "wc_summary_review_avg": [ 39.25, 17.07886120325357 ], "wc_review_avg": [ 514.0, 365.4052544778195 ], "wc_reply_reviewers_avg": [ 65.5, 113.44932789576146 ], "wc_reply_authors_avg": [ 516.25, 284.0601828838389 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8021806287494232, "corr_recommendation_correctness": 0.08084520834544431, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VQ_z9je3AnYJ:scholar.google.com/&scioq=Heterogeneous-Agent+Mirror+Learning&hl=en&as_sdt=0,24", "gs_version_total": 0, "aff_unique_index": "0;1;2;2;3", "aff_unique_norm": "University College London;Kyoto University;Peking University;University of California, Berkeley", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ucl.ac.uk;https://www.kyoto-u.ac.jp;http://www.pku.edu.cn;https://www.berkeley.edu", "aff_unique_abbr": "UCL;Kyoto U;Peking U;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;2;2;3", "aff_country_unique": "United Kingdom;Japan;China;United States" }, { "title": "Boosting Multiagent Reinforcement Learning via Permutation Invariant and Permutation Equivariant Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11697", "id": "OxNQXyZK-K8", "poster": "/media/PosterPDFs/ICLR%202023/11697.png?t=1681101257.1791873", "openreview": "https://openreview.net/forum?id=OxNQXyZK-K8", "slides": "https://iclr.cc/virtual/2023/poster/11697", "video": "https://iclr.cc/virtual/2023/poster/11697", "author_site": "Jianye HAO, Xiaotian Hao, Hangyu Mao, Weixun Wang, Yaodong Yang, Dong Li, YAN ZHENG, Zhen Wang", "tldr": "", "abstract": "The state space in Multiagent Reinforcement Learning (MARL) grows exponentially with the agent number. Such a curse of dimensionality results in poor scalability and low sample efficiency, inhibiting MARL for decades. To break this curse, we propose a unified agent permutation framework that exploits the permutation invariance (PI) and permutation equivariance (PE) inductive biases to reduce the multiagent state space. Our insight is that permuting the order of entities in the factored multiagent state space does not change the information. Specifically, we propose two novel implementations: a Dynamic Permutation Network (DPN) and a Hyper Policy Network (HPN). The core idea is to build separate entity-wise PI input and PE output network modules to connect the entity-factored state space and action space in an end-to-end way. DPN achieves such connections by two separate module selection networks, which consistently assign the same input module to the same input entity (guarantee PI) and assign the same output module to the same entity-related output (guarantee PE). To enhance the representation capability, HPN replaces the module selection networks of DPN with hypernetworks to directly generate the corresponding module weights. Extensive experiments in SMAC, Google Research Football and MPE validate that the proposed methods significantly boost the performance and the learning efficiency of existing MARL algorithms. Remarkably, in SMAC, we achieve 100% win rates in almost all hard and super-hard scenarios (never achieved before).", "keywords": "Multiagent Reinforcement Learning;Permutation Invariance;Permutation Equivariance", "primary_area": "", "supplementary_material": "/attachment/ccc3eb7f8d9dc92f73030a7227f56d5ced68c61b.zip", "author": "Jianye HAO;Xiaotian Hao;Hangyu Mao;Weixun Wang;Yaodong Yang;Dong Li;YAN ZHENG;Zhen Wang", "authorids": "~Jianye_HAO1;~Xiaotian_Hao1;~Hangyu_Mao2;~Weixun_Wang1;~Yaodong_Yang2;~Dong_Li10;~YAN_ZHENG1;~Zhen_Wang11", "gender": "M;M;;;M;M;M;M", "homepage": "http://www.icdai.org/jianye.html;;;http://n.musk.ndu.com;;;https://yanzzzzz.github.io;http://iopen.nwpu.edu.cn/info/1015/1351.htm?ivk_sa=1024320u", "dblp": "21/7664.html;144/3359;;84/998;170/1496-2;47/4826-16;10/2381-2;", "google_scholar": ";xgk9NPwAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com.hk/citations?user=tJuhd1kAAAAJ;https://scholar.google.co.uk/citations?hl=zh-CN", "orcid": "0000-0002-0422-8235;;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Jianye_HAO1;~Xiaotian_Hao1;~Hangyu_Mao2;~Weixun_Wang1;~Yaodong_Yang2;~Dong_Li10;~YAN_ZHENG1;~Zhen_Wang11", "aff": "Tianjin University;university of tianjin of china, Tianjin University;;Tianjin University;Department of Computer Science and Engineering, The Chinese University of Hong Kong;Huawei Technologies Ltd.;Tianjin Unibersity, China;Northwestern Polytechnical University", "aff_domain": "tju.edu.cn;tju.edu.cn;;tju.edu.cn;cse.cuhk.edu.hk;huawei.com;tju.edu.cn;nwpu.edu.cn", "position": "Associate Professor;PhD student;;PhD student;PhD student;Principal Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nhao2023boosting,\ntitle={Boosting Multiagent Reinforcement Learning via Permutation Invariant and Permutation Equivariant Networks},\nauthor={Jianye HAO and Xiaotian Hao and Hangyu Mao and Weixun Wang and Yaodong Yang and Dong Li and YAN ZHENG and Zhen Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=OxNQXyZK-K8}\n}", "github": "", "project": "", "reviewers": "JeYo;TTsq;RFWq;ZC3P", "pdf_size": 13542514, "recommendation": "6;6;6;8", "confidence": "4;3;4;3", "correctness": "3;2;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "84;84;92;103", "wc_strength_and_weaknesses": "369;331;241;118", "wc_clarity_quality_novelty_and_reproducibility": "64;34;74;36", "wc_summary_review": "31;35;61;173", "wc_review": "548;484;468;430", "wc_reply_reviewers": "24;79;0;0", "wc_reply_authors": "1210;2580;806;154", "reply_reviewers": "1;2;0;0", "reply_authors": "3;5;2;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 90.75, 7.790218225441442 ], "wc_strength_and_weaknesses_avg": [ 264.75, 96.63947174938406 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 52.0, 17.378147196982766 ], "wc_summary_review_avg": [ 75.0, 57.7408001330082 ], "wc_review_avg": [ 482.5, 42.59988262894629 ], "wc_reply_reviewers_avg": [ 25.75, 32.26743714644843 ], "wc_reply_authors_avg": [ 1187.5, 887.8664032386855 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14776501105096551402&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=OxNQXyZK-K8", "email": "tju.edu.cn;tju.edu.cn;;tju.edu.cn;cse.cuhk.edu.hk;huawei.com;tju.edu.cn;nwpu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;1;2;0;3", "aff_unique_norm": "Tianjin University;Chinese University of Hong Kong;Huawei;Northwestern Polytechnical University", "aff_unique_dep": ";Department of Computer Science and Engineering;Huawei Technologies;", "aff_unique_url": "http://www.tju.edu.cn;https://www.cuhk.edu.hk;https://www.huawei.com;https://www.nwpu.edu.cn", "aff_unique_abbr": "TJU;CUHK;Huawei;NWPU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Tianjin;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "Oy-e1gcBzo", "title": "Dynamic-Aware GANs: Time-Series Generation with Handy Self-Supervision", "track": "main", "status": "Withdraw", "tldr": "This paper presents Dynamic-Aware GANs as a data-efficient self-supervised paradigm for time-series data generation.", "abstract": "This paper presents Dynamic-Aware GAN (DAGAN) as a data-efficient self-supervised paradigm for time-series data generation.\nTo support sequential generation with sufficient clues of temporal dynamics, we explicitly model the transition dynamics within the data sequence through differencing, thus refining the vanilla sequence into one with inter-correlated triplets to characterize each time-step.\nThis localized triplet consistent structure contributes to a self-supervision mechanism, which can provide more aspects of supervision for the overall stepwise dependencies encoded within the training data. Such a handy self-supervision mechanism is simple but can be beneficial especially when a model is presented with limited training data. Based on the insight, we present DAGAN which generalizes the locally regularized triplet consistency to distributional-level via dynamic encoding and joint distribution matching. \nExperiments on various synthetic and real-world datasets verify that our model achieves superior generation results with better quality and diversity compared with the state-of-the-art benchmarks, especially when the training data is scarce. Moreover, benefited from the dynamic-conditional and dynamic-consistent design, our DAGAN is capable of generating sequences that present specified dynamics.", "keywords": "Time-series modelling;Self-supervision;Deep Generative Models", "primary_area": "", "supplementary_material": "/attachment/6a41cadfdbcf6fa145806b57f25a4633dd2ca6c8.zip", "author": "Yaxin Shi;Ponhvoan Srey;Ivor Tsang", "authorids": "~Yaxin_Shi1;ponhvoan.srey@gmail.com;~Ivor_Tsang1", "gender": "F;;", "homepage": "https://www.scopus.com/authid/detail.uri?authorId=57218191395;;", "dblp": ";;", "google_scholar": ";;", "orcid": "0000-0002-7416-5620;;", "linkedin": ";;", "or_profile": "~Yaxin_Shi1;ponhvoan.srey@gmail.com;~Ivor_Tsang1", "aff": "Centre for Frontier AI Research (CFAR) ;;", "aff_domain": "astar.edu.sg;;", "position": "Postdoc;;", "bibtex": "@misc{\nshi2023dynamicaware,\ntitle={Dynamic-Aware {GAN}s: Time-Series Generation with Handy Self-Supervision},\nauthor={Yaxin Shi and Ponhvoan Srey and Ivor Tsang},\nyear={2023},\nurl={https://openreview.net/forum?id=Oy-e1gcBzo}\n}", "github": "", "project": "", "reviewers": "q7To;4WRR;ZqW7", "site": "https://openreview.net/forum?id=Oy-e1gcBzo", "pdf_size": 6412168, "recommendation": "3;3;5", "confidence": "3;3;3", "correctness": "3;2;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "79;26;59", "wc_strength_and_weaknesses": "343;139;275", "wc_clarity_quality_novelty_and_reproducibility": "69;58;14", "wc_summary_review": "60;35;48", "wc_review": "551;258;396", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 54.666666666666664, 21.853044537445015 ], "wc_strength_and_weaknesses_avg": [ 252.33333333333334, 84.810900766876 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.0, 23.762715894162152 ], "wc_summary_review_avg": [ 47.666666666666664, 10.208928554075703 ], "wc_review_avg": [ 401.6666666666667, 119.68384277847291 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ClfutcJj8vMJ:scholar.google.com/&scioq=Dynamic-Aware+GANs:+Time-Series+Generation+with+Handy+Self-Supervision&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Centre for Frontier AI Research", "aff_unique_dep": "AI Research", "aff_unique_url": "", "aff_unique_abbr": "CFAR" }, { "id": "Oys81jfesjQ", "title": "Simultaneously Learning Stochastic and Adversarial Markov Decision Process with Linear Function Approximation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Reinforcement learning (RL) has been commonly used in practice. To deal with the numerous states and actions in real applications, the function approximation method has been widely employed to improve the learning efficiency, among which the linear function approximation has attracted great interest both theoretically and empirically. Previous works on the linear Markov Decision Process (MDP) mainly study two settings, the stochastic setting where the reward is generated in a stochastic way and the adversarial setting where the reward can be chosen arbitrarily by an adversary. All these works treat these two environments separately. However, the learning agents often have no idea of how rewards are generated and a wrong reward type can severely disrupt the performance of those specially designed algorithms. So a natural question is whether an algorithm can be derived that can efficiently learn in both environments but without knowing the reward type. In this paper, we first consider such best-of-both-worlds problem for linear MDP with the known transition. We propose an algorithm and prove it can simultaneously achieve $O(\\text{poly} \\log K)$ regret in the stochastic setting and $O(\\sqrt{K})$ regret in the adversarial setting where $K$ is the horizon. To the best of our knowledge, it is the first such result for linear MDP. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Fang Kong;XiangCheng Zhang;Baoxiang Wang;Shuai Li", "authorids": "~Fang_Kong2;~XiangCheng_Zhang1;~Baoxiang_Wang1;~Shuai_Li3", "gender": "F;M;;F", "homepage": ";;;http://shuaili8.github.io", "dblp": "48/7676-2;;;57/2281-10", "google_scholar": "q1Z41BQAAAAJ;;;https://scholar.google.com.hk/citations?user=kMZgQxcAAAAJ", "orcid": ";;;", "linkedin": ";%E6%A9%A1%E6%88%90-%E7%AB%A0-019354242/;;", "or_profile": "~Fang_Kong2;~XiangCheng_Zhang1;~Baoxiang_Wang1;~Shuai_Li3", "aff": "Shanghai Jiaotong University; Tsinghua University, Tsinghua University;;John Hopcroft Center, Shanghai Jiao Tong University", "aff_domain": "sjtu.edu.cn;mails.tsinghua.edu.cn;;sjtu.edu.cn", "position": "PhD student;Undergrad student;;Assistant Professor", "bibtex": "@misc{\nkong2023simultaneously,\ntitle={Simultaneously Learning Stochastic and Adversarial Markov Decision Process with Linear Function Approximation},\nauthor={Fang Kong and XiangCheng Zhang and Baoxiang Wang and Shuai Li},\nyear={2023},\nurl={https://openreview.net/forum?id=Oys81jfesjQ}\n}", "github": "", "project": "", "reviewers": "GWBz;PB4v;WXEg", "site": "https://openreview.net/forum?id=Oys81jfesjQ", "pdf_size": 375803, "recommendation": "3;5;6", "confidence": "3;2;4", "correctness": "4;3;4", "technical_novelty": "2;2;3", "empirical_novelty": "0;0;0", "wc_summary_paper": "51;35;46", "wc_strength_and_weaknesses": "284;428;162", "wc_clarity_quality_novelty_and_reproducibility": "68;140;42", "wc_summary_review": "167;46;43", "wc_review": "570;649;293", "wc_reply_reviewers": "190;202;0", "wc_reply_authors": "707;711;413", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 44.0, 6.683312551921141 ], "wc_strength_and_weaknesses_avg": [ 291.3333333333333, 108.71777939028904 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.33333333333333, 41.45144415122617 ], "wc_summary_review_avg": [ 85.33333333333333, 57.76004001229762 ], "wc_review_avg": [ 504.0, 152.64555894839086 ], "wc_reply_reviewers_avg": [ 130.66666666666666, 92.52507167729668 ], "wc_reply_authors_avg": [ 610.3333333333334, 139.54529332402757 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.3273268353539886, "corr_recommendation_correctness": -0.18898223650461363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:b0jbDQVbJUYJ:scholar.google.com/&scioq=Simultaneously+Learning+Stochastic+and+Adversarial+Markov+Decision+Process+with+Linear+Function+Approximation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "SJTU;THU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shanghai", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Graph Domain Adaptation via Theory-Grounded Spectral Regularization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11960", "id": "OysfLgrk8mk", "poster": "/media/PosterPDFs/ICLR%202023/11960.png?t=1681264839.7158728", "openreview": "https://openreview.net/forum?id=OysfLgrk8mk", "slides": "https://iclr.cc/virtual/2023/poster/11960", "video": "https://iclr.cc/virtual/2023/poster/11960", "author_site": "Yuning You, Tianlong Chen, Zhangyang Wang, Yang Shen", "tldr": "", "abstract": "Transfer learning on graphs drawn from varied distributions (domains) is in great demand across many applications. Emerging methods attempt to learn domain-invariant representations using graph neural networks (GNNs), yet the empirical performances vary and the theoretical foundation is limited. This paper aims at designing theory-grounded algorithms for graph domain adaptation (GDA). (i) As the first attempt, we derive a model-based GDA bound closely related to two GNN spectral properties: spectral smoothness (SS) and maximum frequency response (MFR). This is achieved by cross-pollinating between the OT-based (optimal transport) DA and graph filter theories. (ii) Inspired by the theoretical results, we propose algorithms regularizing spectral properties of SS and MFR to improve GNN transferability. We further extend the GDA theory into the more challenging scenario of conditional shift, where spectral regularization still applies. (iii) More importantly, our analyses of the theory reveal which regularization would improve performance of what transfer learning scenario, (iv) with numerical agreement with extensive real-world experiments: SS and MFR regularizations bring more benefits to the scenarios of node transfer and link transfer, respectively. In a nutshell, our study paves the way toward explicitly constructing and training GNNs that can capture more transferable representations across graph domains. Codes are released at https://github.com/Shen-Lab/GDA-SpecReg.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/8715b83f775f775eb0b7353f74fb3611ed1b3cbc.zip", "author": "Yuning You;Tianlong Chen;Zhangyang Wang;Yang Shen", "authorids": "~Yuning_You1;~Tianlong_Chen1;~Zhangyang_Wang1;~Yang_Shen4", "gender": "M;M;M;", "homepage": "https://yyou1996.github.io/;https://tianlong-chen.github.io;https://vita-group.github.io;https://shen-lab.github.io/", "dblp": "240/8556;;119/4026;95/5308-1.html", "google_scholar": "Pv-V2igAAAAJ;LE3ctn0AAAAJ;pxFyKAIAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0001-7774-8197;;0000-0002-1703-7796", "linkedin": ";tianlong-chen-783862167/;;", "or_profile": "~Yuning_You1;~Tianlong_Chen1;~Zhangyang_Wang1;~Yang_Shen4", "aff": "Texas A&M University;University of Texas, Austin;University of Texas, Austin;Texas A&M University - College Station", "aff_domain": "tamu.edu;utexas.edu;utexas.edu;tamu.edu", "position": "PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nyou2023graph,\ntitle={Graph Domain Adaptation via Theory-Grounded Spectral Regularization},\nauthor={Yuning You and Tianlong Chen and Zhangyang Wang and Yang Shen},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=OysfLgrk8mk}\n}", "github": "", "project": "", "reviewers": "no2m;xYtM;bQEk;1Hvj", "pdf_size": 542585, "recommendation": "5;6;6;6", "confidence": "3;3;3;4", "correctness": "3;4;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "117;104;77;67", "wc_strength_and_weaknesses": "357;145;95;97", "wc_clarity_quality_novelty_and_reproducibility": "101;42;43;86", "wc_summary_review": "39;21;10;25", "wc_review": "614;312;225;275", "wc_reply_reviewers": "121;0;45;0", "wc_reply_authors": "1582;194;221;231", "reply_reviewers": "1;0;1;0", "reply_authors": "4;1;2;1", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 91.25, 20.104414938017968 ], "wc_strength_and_weaknesses_avg": [ 173.5, 107.8181339107666 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.0, 26.04803255526221 ], "wc_summary_review_avg": [ 23.75, 10.37725879025863 ], "wc_review_avg": [ 356.5, 151.83955347668802 ], "wc_reply_reviewers_avg": [ 41.5, 49.439356791932475 ], "wc_reply_authors_avg": [ 557.0, 591.9387637247623 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16388637746896626831&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=OysfLgrk8mk", "email": "tamu.edu;utexas.edu;utexas.edu;tamu.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Texas A&M University;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.tamu.edu;https://www.utexas.edu", "aff_unique_abbr": "TAMU;UT Austin", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Austin;College Station", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Oz0npxjLAsI", "title": "MeGraph: Graph Representation Learning on Connected Multi-scale Graphs", "track": "main", "status": "Reject", "tldr": "We present a novel graph network architechture learning on a mega graph derived by connecting multi-scale graphs. The architechture allows repeated information exchange across multiple scaled graphs.", "abstract": "We present MeGraph, a novel network architecture for graph-structured data. Given any input graph, we create multi-scale graphs using graph pooling. Then, we connect them into a mega graph by bridging inter-graph edges according to the graph pooling results. Instead of universally stacking graph convolutions over the mega graph, we apply general graph convolutions over intra-graph edges, while the convolutions over inter-graph edges follow a bidirectional pathway to deliver the information along the hierarchy for one turn. Graph convolution and graph pooling are two core elementary operations of MeGraph. In our implementation, we adopt the graph full network (GFuN) and propose the stridden edge contraction pooling (S-EdgePool) with adjustable pooling ratio, which are extended from conventional graph convolution and edge contraction pooling. The MeGraph model enables information exchange across multi-scale graphs, repeatedly, for deeper understanding of wide-range correlations in graphs. This distinguishes MeGraph from many recent hierarchical graph neural networks like Graph U-Nets. We conduct comprehensive empirical studies on tens of public datasets, in which we observe consistent performance gains comparing to baselines. Specifically, we establish 5 new graph theory benchmark tasks that require long-term inference and deduction to solve, where MeGraph demonstrates dominated performance compared with popular graph neural networks.", "keywords": "Hierachical Graph Learning;Multi-scale;Graph Pooling;Graph Neural Networks(GNNs)", "primary_area": "", "supplementary_material": "", "author": "Honghua Dong;Jiawei Xu;Yu Yang;Rui Zhao;Chun Yuan;Xiu Li;Chris J. Maddison;Lei Han", "authorids": "~Honghua_Dong1;~Jiawei_Xu1;~Yu_Yang18;~Rui_Zhao1;~Chun_Yuan1;~Xiu_Li1;~Chris_J._Maddison1;~Lei_Han1", "gender": "M;M;M;M;M;F;M;M", "homepage": "https://dhh1995.github.io/;https://github.com/jiawei415;http://google.com;https://ruizhaogit.github.io;https://www.sigs.tsinghua.edu.cn/fg3/105064.jhtml;https://thusigsiclab.github.io/thu.github.io/introduction.html;https://www.leihan.org;http://www.cs.toronto.edu/~cmaddis/", "dblp": "238/2646;;;26/2578-11;;13/1206-1;75/2307-1;139/1388", "google_scholar": "MrGN4oMAAAAJ;;;N1yNDnQAAAAJ;https://scholar.google.com.hk/citations?user=fYdxi2sAAAAJ;https://scholar.google.com/citations?hl=zh-CN;Tz4_zi8AAAAJ;https://scholar.google.ca/citations?user=WjCG3owAAAAJ", "orcid": ";;;;;0000-0003-0403-1923;;", "linkedin": ";;;rui-zhao-profile/;;;;", "or_profile": "~Honghua_Dong1;~Jiawei_Xu1;~Yu_Yang18;~Rui_Zhao1;~Chun_Yuan1;~Xiu_Li1;~Lei_Han1;~Chris_J_Maddison1", "aff": "Department of Computer Science, University of Toronto;Tsinghua University;Shenzhen International Graduate School, Tsinghua University, Tsinghua University;Tencent AI Lab;Tsinghua University;Tsinghua University;Tencent Robotics X;Google", "aff_domain": "cs.toronto.edu;tsinghua.edu.cn;mails.tsinghua.edu.cn;tencent.com;tsinghua.edu.cn;tsinghua.edu.cn;tencent.com;google.com", "position": "PhD student;MS student;MS student;Researcher;Full Professor;Professor;Principal Researcher;Researcher", "bibtex": "@misc{\ndong2023megraph,\ntitle={MeGraph: Graph Representation Learning on Connected Multi-scale Graphs},\nauthor={Honghua Dong and Jiawei Xu and Yu Yang and Rui Zhao and Chun Yuan and Xiu Li and Chris J. Maddison and Lei Han},\nyear={2023},\nurl={https://openreview.net/forum?id=Oz0npxjLAsI}\n}", "github": "", "project": "", "reviewers": "AA1h;M2DM;F7rn;nn8y", "site": "https://openreview.net/forum?id=Oz0npxjLAsI", "pdf_size": 829930, "recommendation": "3;5;8;8", "confidence": "3;4;4;3", "correctness": "2;3;4;4", "technical_novelty": "2;2;3;4", "empirical_novelty": "1;2;4;4", "wc_summary_paper": "70;40;164;94", "wc_strength_and_weaknesses": "308;385;298;133", "wc_clarity_quality_novelty_and_reproducibility": "45;34;108;28", "wc_summary_review": "91;114;53;52", "wc_review": "514;573;623;307", "wc_reply_reviewers": "331;228;12;0", "wc_reply_authors": "2303;2248;813;43", "reply_reviewers": "1;1;1;0", "reply_authors": "6;6;1;1", "recommendation_avg": [ 6.0, 2.1213203435596424 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 1.299038105676658 ], "wc_summary_paper_avg": [ 92.0, 45.760244754590204 ], "wc_strength_and_weaknesses_avg": [ 281.0, 91.83953397094304 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 53.75, 31.90905044027478 ], "wc_summary_review_avg": [ 77.5, 26.291633650269812 ], "wc_review_avg": [ 504.25, 120.24012433459973 ], "wc_reply_reviewers_avg": [ 142.75, 141.57926225263358 ], "wc_reply_authors_avg": [ 1351.75, 963.2261870921077 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 2.5 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.23570226039551587, "corr_recommendation_correctness": 0.994936676326182, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lhUh_kSvKYcJ:scholar.google.com/&scioq=MeGraph:+Graph+Representation+Learning+on+Connected+Multi-scale+Graphs&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1;2;1;1;2;3", "aff_unique_norm": "University of Toronto;Tsinghua University;Tencent;Google", "aff_unique_dep": "Department of Computer Science;;Tencent AI Lab;Google", "aff_unique_url": "https://www.utoronto.ca;https://www.tsinghua.edu.cn;https://ai.tencent.com;https://www.google.com", "aff_unique_abbr": "U of T;THU;Tencent AI Lab;Google", "aff_campus_unique_index": "0;2;3", "aff_campus_unique": "Toronto;;Shenzhen;Mountain View", "aff_country_unique_index": "0;1;1;1;1;1;1;2", "aff_country_unique": "Canada;China;United States" }, { "id": "OzHFdcvucgb", "title": "QCRS: Improve Randomized Smoothing using Quasi-Concave Optimization", "track": "main", "status": "Withdraw", "tldr": "Improve traditional randomized smoothing using Quasi-Concave Optimization", "abstract": "Randomized smoothing is currently the state-of-the-art method that provides certified robustness for neural networks. However, it often cannot achieve an adequate certified region on real-world datasets. One way to obtain a larger certified region is to use an input-specific algorithm instead of using a fixed Gaussian filter for all data points. Several methods based on this idea have been proposed, but they either suffer from high computational costs or gain marginal improvement in certified radius. In this work, we show that by exploiting the quasiconvex problem structure, we can find the optimal certified radii for most data points with slight computational overhead. This observation leads to an efficient and effective input-specific randomized smoothing algorithm. We conduct extensive experiments and empirical analysis on Cifar10 and ImageNet. The results show that the proposed method significantly enhances the certified radii with low computational overhead.", "keywords": "Randomized Smoothing;Robustness", "primary_area": "", "supplementary_material": "", "author": "Bo-Han Kung;Shang-Tse Chen", "authorids": "~Bo-Han_Kung2;~Shang-Tse_Chen1", "gender": "M;M", "homepage": "https://kungbohan.github.io/;https://www.csie.ntu.edu.tw/~stchen", "dblp": ";24/9381", "google_scholar": "https://scholar.google.com.tw/citations?user=RggZ0BIAAAAJ;TLfsJRwAAAAJ", "orcid": "0000-0002-2191-7913;", "linkedin": ";shang-tse-chen-5a908627/", "or_profile": "~Bo-Han_Kung2;~Shang-Tse_Chen1", "aff": "Department of computer science and informational engineering, National Taiwan University;National Taiwan University", "aff_domain": "csie.ntu.edu.tw;ntu.edu.tw", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nkung2023qcrs,\ntitle={{QCRS}: Improve Randomized Smoothing using Quasi-Concave Optimization},\nauthor={Bo-Han Kung and Shang-Tse Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=OzHFdcvucgb}\n}", "github": "", "project": "", "reviewers": "n5tN;jrgh;pGBY;ousJ;96UE", "site": "https://openreview.net/forum?id=OzHFdcvucgb", "pdf_size": 3023952, "recommendation": "3;5;5;5;6", "confidence": "3;3;5;3;3", "correctness": "3;3;3;3;3", "technical_novelty": "2;2;2;3;3", "empirical_novelty": "2;2;2;3;3", "wc_summary_paper": "20;79;59;44;117", "wc_strength_and_weaknesses": "81;203;210;97;125", "wc_clarity_quality_novelty_and_reproducibility": "156;11;15;108;234", "wc_summary_review": "76;41;23;31;49", "wc_review": "333;334;307;280;525", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "368;341;252;341;457", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 4.8, 0.9797958971132712 ], "confidence_avg": [ 3.4, 0.8 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 63.8, 32.84752654310519 ], "wc_strength_and_weaknesses_avg": [ 143.2, 53.61492329566462 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 104.8, 85.0726748139495 ], "wc_summary_review_avg": [ 44.0, 18.264720090929398 ], "wc_review_avg": [ 355.8, 86.89165667657626 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 351.8, 65.60914570393368 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.10206207261596574, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17820363239621724760&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "National Taiwan University", "aff_unique_dep": "Department of Computer Science and Informational Engineering", "aff_unique_url": "https://www.ntu.edu.tw", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Effects of Graph Convolutions in Multi-layer Networks", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11413", "id": "P-73JPgRs0R", "poster": "", "openreview": "https://openreview.net/forum?id=P-73JPgRs0R", "slides": "https://iclr.cc/virtual/2023/poster/11413", "video": "https://iclr.cc/virtual/2023/poster/11413", "author_site": "Aseem Baranwal, Kimon Fountoulakis, Aukosh Jagannath", "tldr": "Theoretical and empirical insights into the performance of graph convolutions in multi-layer networks", "abstract": "Graph Convolutional Networks (GCNs) are one of the most popular architectures that are used to solve classification problems accompanied by graphical information. We present a rigorous theoretical understanding of the effects of graph convolutions in multi-layer networks. We study these effects through the node classification problem of a non-linearly separable Gaussian mixture model coupled with a stochastic block model. First, we show that a single graph convolution expands the regime of the distance between the means where multi-layer networks can classify the data by a factor of at least $1/\\sqrt[4]{\\rm deg}$, where ${\\rm deg}$ denotes the expected degree of a node. Second, we show that with a slightly stronger graph density, two graph convolutions improve this factor to at least $1/\\sqrt[4]{n}$, where $n$ is the number of nodes in the graph. Finally, we provide both theoretical and empirical insights into the performance of graph convolutions placed in different combinations among the layers of a neural network, concluding that the performance is mutually similar for all combinations of the placement. We present extensive experiments on both synthetic and real-world data that illustrate our results.", "keywords": "graph neural networks;node classification;classification threshold;contextual stochastic block model", "primary_area": "", "supplementary_material": "/attachment/97f161c63f4457d043ebfe08ecbc7c6d3abcda6b.zip", "author": "Aseem Baranwal;Kimon Fountoulakis;Aukosh Jagannath", "authorids": "~Aseem_Baranwal1;~Kimon_Fountoulakis1;~Aukosh_Jagannath1", "gender": "M;M;", "homepage": "https://aseemrb.me;https://opallab.ca;", "dblp": "285/5304;149/5799;", "google_scholar": "DPt626YAAAAJ;https://scholar.google.ca/citations?user=K-SafJUAAAAJ;", "orcid": "0000-0001-5318-6054;;", "linkedin": "aseemrb/;;", "or_profile": "~Aseem_Baranwal1;~Kimon_Fountoulakis1;~Aukosh_Jagannath1", "aff": "University of Waterloo;University of Waterloo;", "aff_domain": "uwaterloo.ca;uwaterloo.ca;", "position": "PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nbaranwal2023effects,\ntitle={Effects of Graph Convolutions in Multi-layer Networks},\nauthor={Aseem Baranwal and Kimon Fountoulakis and Aukosh Jagannath},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=P-73JPgRs0R}\n}", "github": "", "project": "", "reviewers": "Wv5W;k2Qb;4qct;vTDa", "pdf_size": 3775524, "recommendation": "6;8;8;8", "confidence": "5;4;3;4", "correctness": "3;4;3;4", "technical_novelty": "3;4;3;3", "empirical_novelty": "2;4;3;2", "wc_summary_paper": "31;85;61;105", "wc_strength_and_weaknesses": "254;212;299;70", "wc_clarity_quality_novelty_and_reproducibility": "8;10;46;53", "wc_summary_review": "29;20;17;24", "wc_review": "322;327;423;252", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1456;673;936;113", "reply_reviewers": "0;0;0;0", "reply_authors": "3;1;2;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 70.5, 27.617928959282953 ], "wc_strength_and_weaknesses_avg": [ 208.75, 85.81193098864516 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.25, 20.41292482717751 ], "wc_summary_review_avg": [ 22.5, 4.5 ], "wc_review_avg": [ 331.0, 60.83173513882372 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 794.5, 483.9444699549732 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5408026382485317586&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=P-73JPgRs0R", "email": "uwaterloo.ca;uwaterloo.ca;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "P0bfBJaD4KP", "title": "Universal Graph Neural Networks without Message Passing", "track": "main", "status": "Reject", "tldr": "", "abstract": "Message-Passing Graph Neural Networks (MP-GNNs) have become the de facto paradigm for learning on graph for years. Nevertheless, recent works also obtain promising empirical results with other kinds of architectures like global self-attention and even MLPs. This raises an important theoretical question: what is the minimal prerequisite for an expressive graph model? In this work, we theoretically show that when equipped with proper position encodings, even a simple Bag-of-Nodes (BoN) model (node-wise MLP followed by global readout) can be universal on graphs. We name this model as Universal Bag-of-Nodes (UBoN). Synthetic experiments on the EXP dataset show that UBoN indeed achieves expressive power beyond 1-WL test. On real-world graph classification tasks, UBoN also obtains comparable performance to MP-GNNs while enjoying better training and inference efficiency (50% less training time compared to GCN). We believe that our theoretical and empirical results might inspire more research on simple and expressive GNN architectures.\n", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "George Ma;Yifei Wang;Yisen Wang", "authorids": "~George_Ma1;~Yifei_Wang1;~Yisen_Wang1", "gender": "M;M;M", "homepage": "https://github.com/GeorgeMLP;https://yifeiwang77.com;https://yisenwang.github.io/", "dblp": "86/8408;00/555-1;172/1346-1", "google_scholar": "kiYSRMkAAAAJ;-CLy6YsAAAAJ;uMWPDboAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~George_Ma1;~Yifei_Wang1;~Yisen_Wang1", "aff": "Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "Undergrad student;PhD student;Assistant Professor", "bibtex": "@misc{\nma2023universal,\ntitle={Universal Graph Neural Networks without Message Passing},\nauthor={George Ma and Yifei Wang and Yisen Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=P0bfBJaD4KP}\n}", "github": "", "project": "", "reviewers": "sg6T;gwSj;8PYU;MeG5;ok4x", "site": "https://openreview.net/forum?id=P0bfBJaD4KP", "pdf_size": 401863, "recommendation": "1;1;1;5;6", "confidence": "5;5;4;3;4", "correctness": "1;2;1;4;4", "technical_novelty": "2;1;1;3;3", "empirical_novelty": "2;2;1;2;2", "wc_summary_paper": "172;69;33;109;40", "wc_strength_and_weaknesses": "269;320;51;695;97", "wc_clarity_quality_novelty_and_reproducibility": "49;24;5;60;19", "wc_summary_review": "82;22;13;75;20", "wc_review": "572;435;102;939;176", "wc_reply_reviewers": "728;0;81;0;0", "wc_reply_authors": "2153;930;897;1445;537", "reply_reviewers": "2;0;2;0;0", "reply_authors": "4;2;3;2;1", "recommendation_avg": [ 2.8, 2.227105745132009 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "correctness_avg": [ 2.4, 1.3564659966250536 ], "technical_novelty_avg": [ 2.0, 0.8944271909999159 ], "empirical_novelty_avg": [ 1.8, 0.4 ], "wc_summary_paper_avg": [ 84.6, 51.24295073471082 ], "wc_strength_and_weaknesses_avg": [ 286.4, 227.8908510669088 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.4, 20.16531675922796 ], "wc_summary_review_avg": [ 42.4, 29.70925781637771 ], "wc_review_avg": [ 444.8, 300.09158602000156 ], "wc_reply_reviewers_avg": [ 161.8, 284.8328632724813 ], "wc_reply_authors_avg": [ 1192.4, 560.7536357438978 ], "reply_reviewers_avg": [ 0.8, 0.9797958971132713 ], "reply_authors_avg": [ 2.4, 1.019803902718557 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.696022273069113, "corr_recommendation_correctness": 0.953328854398277, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lXYVkciVLqQJ:scholar.google.com/&scioq=Universal+Graph+Neural+Networks+without+Message+Passing&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "P17yA67o3VL", "title": "CAST: Concurrent Recognition and Segmentation with Adaptive Segment Tokens", "track": "main", "status": "Reject", "tldr": "A new ViT integrated with data-driven perceptual organization to simultaneously learn image segmentation for free while training the model for unsupervised recognition.", "abstract": "Recognizing an image and segmenting it into coherent regions are often treated as separate tasks. Human vision, however, has a general sense of segmentation hierarchy before recognition occurs. We are thus inspired to learn image recognition with hierarchical image segmentation based entirely on unlabeled images. Our insight is to learn fine-to-coarse features concurrently at superpixels, segments, and full image levels, enforcing consistency and goodness of feature induced segmentations while maximizing discrimination among image instances.\n\nOur model innovates vision transformers on three aspects. 1) We use adaptive segment tokens instead of fixed-shape patch tokens. 2) We create a token hierarchy by inserting graph pooling between transformer blocks, naturally producing consistent multi-scale segmentations while increasing the segment size and reducing the number of tokens. 3) We produce hierarchical image segmentation for free {\\it while} training for recognition by maximizing image-wise discrimination.\n\nOur work delivers the first concurrent recognition and hierarchical segmentation model without any supervision. Validated on ImageNet and PASCAL VOC, it achieves better recognition and segmentation with higher computational efficiency.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Tsung-Wei Ke;Jyh-Jing Hwang;Stella Yu", "authorids": "~Tsung-Wei_Ke2;~Jyh-Jing_Hwang1;~Stella_Yu2", "gender": ";M;F", "homepage": "https://twke18.github.io/;http://jyhjinghwang.github.io/;http://www.eecs.umich.edu/~stellayu", "dblp": "173/4984;156/0239;58/5089", "google_scholar": "WTEFsHMAAAAJ;ClTTUWkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Tsung-Wei_Ke2;~Jyh-Jing_Hwang1;~Stella_Yu2", "aff": "Carnegie Mellon University;Waymo;University of California, Berkeley", "aff_domain": "andrew.cmu.edu;waymo.com;berkeley.edu", "position": "Postdoc;Researcher;Adjunct Professor", "bibtex": "@misc{\nke2023cast,\ntitle={{CAST}: Concurrent Recognition and Segmentation with Adaptive Segment Tokens},\nauthor={Tsung-Wei Ke and Jyh-Jing Hwang and Stella Yu},\nyear={2023},\nurl={https://openreview.net/forum?id=P17yA67o3VL}\n}", "github": "", "project": "", "reviewers": "Jmwj;5DAX;BmNe;kF9A", "site": "https://openreview.net/forum?id=P17yA67o3VL", "pdf_size": 13612618, "recommendation": "5;6;6;6", "confidence": "3;4;3;3", "correctness": "3;3;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "138;35;23;64", "wc_strength_and_weaknesses": "700;164;71;337", "wc_clarity_quality_novelty_and_reproducibility": "91;10;13;37", "wc_summary_review": "91;34;26;28", "wc_review": "1020;243;133;466", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "706;413;187;236", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 65.0, 44.704585894514224 ], "wc_strength_and_weaknesses_avg": [ 318.0, 240.31749832253163 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.75, 32.47595264191645 ], "wc_summary_review_avg": [ 44.75, 26.864242032858474 ], "wc_review_avg": [ 465.5, 341.8819240615099 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 385.5, 203.241851005151 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=337886538241213516&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2", "aff_unique_norm": "Carnegie Mellon University;Waymo;University of California, Berkeley", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;https://www.waymo.com;https://www.berkeley.edu", "aff_unique_abbr": "CMU;Waymo;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "P1MaSJlwdT4", "title": "Go-Explore with a guide: Speeding up search in sparse reward settings with goal-directed intrinsic rewards", "track": "main", "status": "Reject", "tldr": "Speeding up search in sparse reward settings with goal-directed intrinsic rewards", "abstract": "Reinforcement Learning (RL) agents have traditionally been very sample-intensive to train, especially in environments with sparse rewards. Seeking inspiration from neuroscience experiments of rats learning the structure of a maze without needing extrinsic rewards, we seek to incorporate additional intrinsic rewards to guide behavior. We propose a potential-based goal-directed intrinsic reward (GDIR), which provides a reward signal regardless of whether the task is achieved, and ensures that learning can always take place. While GDIR may be similar to approaches such as reward shaping in incorporating goal-based rewards, we highlight that GDIR is innate to the agent and hence applicable across a wide range of environments without needing to rely on a properly shaped environment reward. We also note that GDIR is different from curiosity-based intrinsic motivation, which can diminish over time and lead to inefficient exploration. Go-Explore is a well-known state-of-the-art algorithm for sparse reward domains, and we demonstrate that by incorporating GDIR in the ``Go\" and ``Explore\" phases, we can improve Go-Explore's performance and enable it to learn faster across multiple environments, for both discrete (2D grid maze environments, Towers of Hanoi, Game of Nim) and continuous (Cart Pole and Mountain Car) state spaces. Furthermore, to consolidate learnt trajectories better, our method also incorporates a novel approach of hippocampal replay to update the values of GDIR and reset state visit and selection counts of states along the successful trajectory. As a benchmark, we also show that our proposed approaches learn significantly faster than traditional extrinsic-reward-based RL algorithms such as Proximal Policy Optimization, TD-learning, and Q-learning.", "keywords": "reinforcement learning;intrinsic motivation;goal-directed rewards;hippocampal replay;hard-exploration;sparse rewards", "primary_area": "", "supplementary_material": "/attachment/4bebf57658a895ba585dc531830c8c284b7f1472.zip", "author": "Chong Min John Tan;Mehul Motani", "authorids": "~Chong_Min_John_Tan2;~Mehul_Motani1", "gender": "M;M", "homepage": "https://delvingintotech.wordpress.com/;https://mehulmotani.github.io/", "dblp": ";83/4035", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.sg/citations?user=Bm9BwEQAAAAJ", "orcid": ";", "linkedin": "chong-min-tan-94652288/;", "or_profile": "~Chong_Min_John_Tan2;~Mehul_Motani1", "aff": "National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;nus.edu.sg", "position": "PhD student;Associate Professor", "bibtex": "@misc{\ntan2023goexplore,\ntitle={Go-Explore with a guide: Speeding up search in sparse reward settings with goal-directed intrinsic rewards},\nauthor={Chong Min John Tan and Mehul Motani},\nyear={2023},\nurl={https://openreview.net/forum?id=P1MaSJlwdT4}\n}", "github": "", "project": "", "reviewers": "eHGy;5gPZ;j1xa;JKcR", "site": "https://openreview.net/forum?id=P1MaSJlwdT4", "pdf_size": 560608, "recommendation": "1;3;3;3", "confidence": "5;2;4;3", "correctness": "1;2;3;3", "technical_novelty": "1;2;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "43;74;101;47", "wc_strength_and_weaknesses": "812;1225;153;98", "wc_clarity_quality_novelty_and_reproducibility": "169;131;272;5", "wc_summary_review": "88;161;52;7", "wc_review": "1112;1591;578;157", "wc_reply_reviewers": "98;0;103;0", "wc_reply_authors": "748;732;350;121", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 66.25, 23.33854108550918 ], "wc_strength_and_weaknesses_avg": [ 572.0, 470.1717771198097 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 144.25, 95.52323015895139 ], "wc_summary_review_avg": [ 77.0, 56.35157495580758 ], "wc_review_avg": [ 859.5, 541.2016722073205 ], "wc_reply_reviewers_avg": [ 50.25, 50.28108491271842 ], "wc_reply_authors_avg": [ 487.75, 264.9852590239691 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.7745966692414834, "corr_recommendation_correctness": 0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-7teR4xfbbcJ:scholar.google.com/&scioq=Go-Explore+with+a+guide:+Speeding+up+search+in+sparse+reward+settings+with+goal-directed+intrinsic+rewards&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Learning with Stochastic Orders", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11451", "id": "P3PJokAqGW", "poster": "/media/PosterPDFs/ICLR%202023/11451.png?t=1682521606.28382", "openreview": "https://openreview.net/forum?id=P3PJokAqGW", "slides": "https://iclr.cc/virtual/2023/poster/11451", "video": "https://iclr.cc/virtual/2023/poster/11451", "author_site": "Carles Domingo i Enrich, Yair Schiff, Youssef Mroueh", "tldr": "We propose and study discrepancies and distances between probability measures that arise from the convex or Choquet order, which capture dominance constraints and are useful in applications like image generation.", "abstract": "Learning high-dimensional distributions is often done with explicit likelihood modeling or implicit modeling via minimizing integral probability metrics (IPMs). In this paper, we expand this learning paradigm to stochastic orders, namely, the convex or Choquet order between probability measures. Towards this end, exploiting the relation between convex orders and optimal transport, we introduce the Choquet-Toland distance between probability measures, that can be used as a drop-in replacement for IPMs. We also introduce the Variational Dominance Criterion (VDC) to learn probability measures with dominance constraints, that encode the desired stochastic order between the learned measure and a known baseline. We analyze both quantities and show that they suffer from the curse of dimensionality and propose surrogates via input convex maxout networks (ICMNs), that enjoy parametric rates. We provide a min-max framework for learning with stochastic orders and validate it experimentally on synthetic and high-dimensional image generation, with promising results. Finally, our ICMNs class of convex functions and its derived Rademacher Complexity are of independent interest beyond their application in convex orders. Code to reproduce experimental results is available at https://github.com/yair-schiff/stochastic-orders-ICMN.", "keywords": "optimal transport;stochastic order;Choquet order;convex function;input convex neural network;integral probability metric;image generation;statistical rates", "primary_area": "", "supplementary_material": "/attachment/981c2a5e7614e0fe73e671c24e61a7b452789845.zip", "author": "Carles Domingo-Enrich;Yair Schiff;Youssef Mroueh", "authorids": "~Carles_Domingo-Enrich1;~Yair_Schiff1;~Youssef_Mroueh1", "gender": "M;M;", "homepage": "https://cdenrich.github.io;https://github.com/yair-schiff;", "dblp": "216/7444.html;;http://dblp.uni-trier.de/pers/hd/m/Mroueh:Youssef", "google_scholar": "1ZHcGwIAAAAJ;GhFrOdQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";yair-schiff;", "or_profile": "~Carles_Domingo-Enrich1;~Yair_Schiff1;~Youssef_Mroueh1", "aff": "New York University;Department of Computer Science, Cornell University;IBM", "aff_domain": "nyu.edu;cs.cornell.edu;us.ibm.com", "position": "PhD student;PhD student;Research Staff member", "bibtex": "@inproceedings{\ndomingo-enrich2023learning,\ntitle={Learning with Stochastic Orders},\nauthor={Carles Domingo-Enrich and Yair Schiff and Youssef Mroueh},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=P3PJokAqGW}\n}", "github": "", "project": "", "reviewers": "K5E1;K4TL;6Q2H;QEba", "pdf_size": 1440939, "recommendation": "5;6;8;8", "confidence": "3;2;3;3", "correctness": "2;4;4;3", "technical_novelty": "2;2;4;4", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "57;73;130;101", "wc_strength_and_weaknesses": "182;53;109;173", "wc_clarity_quality_novelty_and_reproducibility": "20;34;25;2", "wc_summary_review": "42;42;23;11", "wc_review": "301;202;287;287", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.0, 1.0 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 90.25, 27.83320858255476 ], "wc_strength_and_weaknesses_avg": [ 129.25, 52.25119615855698 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 20.25, 11.669940016983807 ], "wc_summary_review_avg": [ 29.5, 13.200378782444085 ], "wc_review_avg": [ 269.25, 39.245222639195205 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.5222329678670935, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5802653200563458129&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=P3PJokAqGW", "email": "nyu.edu;cs.cornell.edu;us.ibm.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "New York University;Cornell University;International Business Machines Corporation", "aff_unique_dep": ";Department of Computer Science;", "aff_unique_url": "https://www.nyu.edu;https://www.cornell.edu;https://www.ibm.com", "aff_unique_abbr": "NYU;Cornell;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "LMSeg: Language-guided Multi-dataset Segmentation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11679", "id": "P44WPn1_aJV", "poster": "/media/PosterPDFs/ICLR%202023/11679.png?t=1680756100.163476", "openreview": "https://openreview.net/forum?id=P44WPn1_aJV", "slides": "https://iclr.cc/virtual/2023/poster/11679", "video": "https://iclr.cc/virtual/2023/poster/11679", "author_site": "Qiang Zhou, Yuang Liu, Chaohui Yu, Jingliang Li, Zhibin Wang, Fan Wang", "tldr": "", "abstract": "It\u2019s a meaningful and attractive topic to build a general and inclusive segmentation model that can recognize more categories in various scenarios. A straightforward way is to combine the existing fragmented segmentation datasets and train a multi-dataset network. However, there are two major issues with multi-dataset segmentation: (i) the inconsistent taxonomy demands manual reconciliation to construct a unified taxonomy; (ii) the inflexible one-hot common taxonomy causes time-consuming model retraining and defective supervision of unlabeled categories. In this paper, we investigate the multi-dataset segmentation and propose a scalable Language-guided Multi-dataset Segmentation framework, dubbed LMSeg, which supports both semantic and panoptic segmentation. Specifically, we introduce a pretrained text encoder to map the category names to a text embedding space as a unified taxonomy, instead of using inflexible one-hot label. The model dynamically aligns the segment queries with the category embeddings. Instead of relabeling each dataset with the unified taxonomy, a category-guided decoding module is designed to dynamically guide predictions to each dataset\u2019s taxonomy. Furthermore, we adopt a dataset-aware augmentation strategy that assigns each dataset a specific image augmentation pipeline, which can suit the proper-\nties of images from different datasets. Extensive experiments demonstrate that our method achieves significant improvements on four segmentation datasets and three panoptic datasets, while the ablation study evaluates the effectiveness of each component.\n", "keywords": "Segmentation;Multi-dataset;Vision-language", "primary_area": "", "supplementary_material": "/attachment/c1f4f983dc2d836a791a65b0900f00ffe4693b70.zip", "author": "Qiang Zhou;Yuang Liu;Chaohui Yu;Jingliang Li;Zhibin Wang;Fan Wang", "authorids": "~Qiang_Zhou8;~Yuang_Liu1;~Chaohui_Yu1;~Jingliang_Li1;~Zhibin_Wang1;~Fan_Wang6", "gender": "M;M;M;M;M;F", "homepage": "https://mightyzau.github.io/;;https://richardych.github.io/;;;", "dblp": ";166/6324;14/10377;41/9968;;", "google_scholar": ";;b1Q-k20AAAAJ;;YHzKee8AAAAJ;WCRGTHsAAAAJ", "orcid": "0000-0003-3697-9348;0000-0002-7338-4696;0000-0002-7852-4491;;0000-0001-7618-7973;0000-0001-7320-1119", "linkedin": ";;;;;", "or_profile": "~Qiang_Zhou8;~Yuang_Liu1;~Chaohui_Yu1;~Jingliang_Li1;~Zhibin_Wang1;~Fan_Wang6", "aff": "Alibaba Group;East China Normal University;Alibaba Group;University of Chinese Academy of Sciences;Alibaba Group;Alibaba Group", "aff_domain": "alibaba-inc.com;ecnu.edu.cn;alibaba-inc.com;ucas.ac.cn;alibaba-inc.com;alibaba-inc.com", "position": "Researcher;PhD student;Researcher;PhD student;Researcher;Senior Staff Algorithm Engineer", "bibtex": "@inproceedings{\nzhou2023lmseg,\ntitle={{LMS}eg: Language-guided Multi-dataset Segmentation},\nauthor={Qiang Zhou and Yuang Liu and Chaohui Yu and Jingliang Li and Zhibin Wang and Fan Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=P44WPn1_aJV}\n}", "github": "", "project": "", "reviewers": "DHCs;G1t4;wkpT;6V2p", "pdf_size": 991357, "recommendation": "3;5;6;6", "confidence": "4;4;3;5", "correctness": "4;3;3;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "60;65;75;126", "wc_strength_and_weaknesses": "302;149;110;221", "wc_clarity_quality_novelty_and_reproducibility": "58;1;13;23", "wc_summary_review": "35;46;41;42", "wc_review": "455;261;239;412", "wc_reply_reviewers": "384;0;0;0", "wc_reply_authors": "846;490;385;766", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 81.5, 26.253571185650152 ], "wc_strength_and_weaknesses_avg": [ 195.5, 73.25469268244868 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 23.75, 21.25294097295713 ], "wc_summary_review_avg": [ 41.0, 3.9370039370059056 ], "wc_review_avg": [ 341.75, 93.32570653362342 ], "wc_reply_reviewers_avg": [ 96.0, 166.27687752661222 ], "wc_reply_authors_avg": [ 621.75, 190.06890198030817 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.40824829046386296, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17383932270641504401&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=P44WPn1_aJV", "email": "alibaba-inc.com;ecnu.edu.cn;alibaba-inc.com;ucas.ac.cn;alibaba-inc.com;alibaba-inc.com", "author_num": 6, "aff_unique_index": "0;1;0;2;0;0", "aff_unique_norm": "Alibaba Group;East China Normal University;University of Chinese Academy of Sciences", "aff_unique_dep": ";;", "aff_unique_url": "https://www.alibaba.com;http://www.ecnu.edu.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "Alibaba;ECNU;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "P45P8xfL_n", "title": "A MULTI-SCALE STRUCTURE-PRESERVING HETEROLOGOUS IMAGE TRANSFORMATION ALGORITHM BASED ON CONDITIONAL ADVERSARIAL NETWORK LEARNING", "track": "main", "status": "Reject", "tldr": "Proposed new model structure and two loss functions reduce distortion and blur in generated heterogenous images", "abstract": "Image transformation model learning is a basic technology for image enhancement, image super-resolution, image generation, multimodal image fusion, etc. which uses deep convolutional networks as a representation model for arbitrary functions, and uses fitting optimization with paired image training sets to solve the transformation model between images in the different sets. Affected by the complex and diverse changes of the 3D shape of the actual scene and the pixel-level optical properties of materials, the solution of the heterologous image conversion model is an ill-posed problem. In recent years, most of the proposed conditional adversarial learning methods for image transformation networks only consider the overall consistency loss constraint of the image, and the generated images often contain some pseudo-features or local structural deformations. In order to solve this problem, using the idea of multi-scale image coding and perception, this paper proposes a multi-scale structure-preserving heterologous image transformation method based on conditional adversarial network learning. First, using the idea of multi-scale coding and reconstruction, a multi-scale, step by step generator lightweight network structure is designed. Then, two image multi-scale structure loss functions are proposed, and combined with the existing overall consistency loss, a loss function for generative adversarial learning is designed. Finally, test experiments are performed on the KAIST-MPD-set1 dataset. The experimental results show that, compared with the state-of-the-art algorithms, the proposed algorithm can better suppress the local structural distortion, and has significant advantages in evaluation indicators such as RMSE, LPIPS, PSNR, and SSIM.", "keywords": "Heterologous Image Transformation;Multi-scale feature encoding;Generative Adversarial Networks", "primary_area": "", "supplementary_material": "", "author": "Rui Xiang;Guo-yo Wang;Pan Yu", "authorids": "~Rui_Xiang1;gywang@hust.edu.cn;yup@hust.edu.cn", "gender": "M;;", "homepage": ";;", "dblp": ";;", "google_scholar": ";;", "orcid": "0000-0003-2394-4568;;", "linkedin": ";;", "or_profile": "~Rui_Xiang1;gywang@hust.edu.cn;yup@hust.edu.cn", "aff": "Huazhong University of Science and Technology;;", "aff_domain": "hust.edu.cn;;", "position": "MS student;;", "bibtex": "@misc{\nxiang2023a,\ntitle={A {MULTI}-{SCALE} {STRUCTURE}-{PRESERVING} {HETEROLOGOUS} {IMAGE} {TRANSFORMATION} {ALGORITHM} {BASED} {ON} {CONDITIONAL} {ADVERSARIAL} {NETWORK} {LEARNING}},\nauthor={Rui Xiang and Guo-yo Wang and Pan Yu},\nyear={2023},\nurl={https://openreview.net/forum?id=P45P8xfL_n}\n}", "github": "", "project": "", "reviewers": "o1H8;5dFs;KAPg", "site": "https://openreview.net/forum?id=P45P8xfL_n", "pdf_size": 5440146, "recommendation": "3;3;3", "confidence": "5;2;3", "correctness": "2;2;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;1;2", "wc_summary_paper": "47;49;17", "wc_strength_and_weaknesses": "222;362;17", "wc_clarity_quality_novelty_and_reproducibility": "57;7;33", "wc_summary_review": "3;21;10", "wc_review": "329;439;77", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "210;343;209", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 37.666666666666664, 14.636332266733433 ], "wc_strength_and_weaknesses_avg": [ 200.33333333333334, 141.67647024902274 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.333333333333336, 20.417857108151406 ], "wc_summary_review_avg": [ 11.333333333333334, 7.408703590297622 ], "wc_review_avg": [ 281.6666666666667, 151.52850850215904 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 254.0, 62.93382768167424 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iCyIx_D8skkJ:scholar.google.com/&scioq=A+MULTI-SCALE+STRUCTURE-PRESERVING+HETEROLOGOUS+IMAGE+TRANSFORMATION+ALGORITHM+BASED+ON+CONDITIONAL+ADVERSARIAL+NETWORK+LEARNING&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "The Surprising Effectiveness of Equivariant Models in Domains with Latent Symmetry", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11040", "id": "P4MUGRM4Acu", "poster": "/media/PosterPDFs/ICLR%202023/11040.png?t=1681828371.1750443", "openreview": "https://openreview.net/forum?id=P4MUGRM4Acu", "slides": "https://iclr.cc/virtual/2023/poster/11040", "video": "https://iclr.cc/virtual/2023/poster/11040", "author_site": "Dian Wang, Jung Yeon Park, Neel Sortur, Lawson Wong, Robin Walters, Robert Platt", "tldr": "This paper discovers that equivariant models are surprisingly effective in domains with latent or partial symmetries. ", "abstract": "Extensive work has demonstrated that equivariant neural networks can significantly improve sample efficiency and generalization by enforcing an inductive bias in the network architecture. These applications typically assume that the domain symmetry is fully described by explicit transformations of the model inputs and outputs. However, many real-life applications contain only latent or partial symmetries which cannot be easily described by simple transformations of the input. In these cases, it is necessary to learn symmetry in the environment instead of imposing it mathematically on the network architecture. We discover, surprisingly, that imposing equivariance constraints that do not exactly match the domain symmetry is very helpful in learning the true symmetry in the environment. We differentiate between extrinsic and incorrect symmetry constraints and show that while imposing incorrect symmetry can impede the model's performance, imposing extrinsic symmetry can actually improve performance. We demonstrate that an equivariant model can significantly outperform non-equivariant methods on domains with latent symmetries both in supervised learning and in reinforcement learning for robotic manipulation and control problems.", "keywords": "Equivariant Learning;Reinforcement Learning;Robotics", "primary_area": "", "supplementary_material": "/attachment/c0cb7b1efe7121012642d1d53dc6d13a7df7a3ef.zip", "author": "Dian Wang;Jung Yeon Park;Neel Sortur;Lawson L.S. Wong;Robin Walters;Robert Platt", "authorids": "~Dian_Wang1;~Jung_Yeon_Park1;sortur.n@northeastern.edu;~Lawson_L.S._Wong2;~Robin_Walters1;~Robert_Platt1", "gender": "M;M;;;M;", "homepage": "https://pointw.github.io/;;;;http://www.robinwalters.com;http://www.ccs.neu.edu/home/rplatt/", "dblp": "191/1369-1;240/2704;;;258/3416;39/5434", "google_scholar": "CckjtfQAAAAJ;LZSRm9sAAAAJ;;;fnprJmUAAAAJ;Z4Y5S2oAAAAJ", "orcid": ";;;;;", "linkedin": "dianwang1007;;;;;", "or_profile": "~Dian_Wang1;~Jung_Yeon_Park1;sortur.n@northeastern.edu;~Lawson_L.S._Wong2;~Robin_Walters1;~Robert_Platt1", "aff": "Boston Dynamics AI Institute;Northeastern University;;;Northeastern University ;Northeastern University", "aff_domain": "theaiinstitute.com;northeastern.edu;;;northeastern.edu;neu.edu", "position": "Intern;PhD student;;;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2023the,\ntitle={The Surprising Effectiveness of Equivariant Models in Domains with Latent Symmetry},\nauthor={Dian Wang and Jung Yeon Park and Neel Sortur and Lawson L.S. Wong and Robin Walters and Robert Platt},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=P4MUGRM4Acu}\n}", "github": "", "project": "", "reviewers": "a4PJ;bQBQ;6UwC;ZXUL", "pdf_size": 13735852, "recommendation": "8;8;8;8", "confidence": "4;3;3;2", "correctness": "4;4;4;4", "technical_novelty": "3;4;3;3", "empirical_novelty": "3;4;2;3", "wc_summary_paper": "91;45;99;115", "wc_strength_and_weaknesses": "179;99;102;141", "wc_clarity_quality_novelty_and_reproducibility": "83;20;70;20", "wc_summary_review": "40;26;18;20", "wc_review": "393;190;289;296", "wc_reply_reviewers": "0;0;18;65", "wc_reply_authors": "285;291;215;287", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 87.5, 26.014419078657127 ], "wc_strength_and_weaknesses_avg": [ 130.25, 32.66018217952864 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.25, 28.621451745150875 ], "wc_summary_review_avg": [ 26.0, 8.602325267042627 ], "wc_review_avg": [ 292.0, 71.81573643707902 ], "wc_reply_reviewers_avg": [ 20.75, 26.58359456506964 ], "wc_reply_authors_avg": [ 269.5, 31.539657575820318 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1904879926100325801&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=P4MUGRM4Acu", "email": "theaiinstitute.com;northeastern.edu;;;northeastern.edu;neu.edu", "author_num": 6, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Boston Dynamics AI Institute;Northeastern University", "aff_unique_dep": "AI Institute;", "aff_unique_url": "https://www.bostondynamics.com/;https://www.northeastern.edu", "aff_unique_abbr": "BD AI;NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Understanding Zero-shot Adversarial Robustness for Large-Scale Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11946", "id": "P4bXCawRi5J", "poster": "/media/PosterPDFs/ICLR%202023/11946.png?t=1682970719.4643154", "openreview": "https://openreview.net/forum?id=P4bXCawRi5J", "slides": "https://iclr.cc/virtual/2023/poster/11946", "video": "https://iclr.cc/virtual/2023/poster/11946", "author_site": "Chengzhi Mao, Scott Geng, Junfeng Yang, Xin Wang, Carl Vondrick", "tldr": "", "abstract": "Pretrained large-scale vision-language models like CLIP have exhibited strong generalization over unseen tasks. Yet imperceptible adversarial perturbations can significantly reduce CLIP's performance on new tasks. In this work, we identify and explore the problem of adapting large-scale models for zero-shot adversarial robustness. We first identify two key factors during model adaption--training losses and adaptation methods--that affect the model's zero-shot adversarial robustness. We then propose a text-guided contrastive adversarial training loss, which aligns the text embeddings and the adversarial visual features with contrastive learning on a small set of training data. We apply this training loss to two adaption methods, model finetuning and visual prompt tuning. We find that visual prompt tuning is more effective in the absence of texts, while finetuning wins in the existence of text guidance. Overall, our approach significantly improves the zero-shot adversarial robustness over CLIP, seeing an average improvement of 31 points over ImageNet and 15 zero-shot datasets. We hope this work can shed light on understanding the zero-shot adversarial robustness of large-scale models. ", "keywords": "Adversarial Robustness;Zero-Shot Recognition", "primary_area": "", "supplementary_material": "/attachment/a3ffc447f2a037ced3c9565a6021d5075ddc5ba2.zip", "author": "Chengzhi Mao;Scott Geng;Junfeng Yang;Xin Wang;Carl Vondrick", "authorids": "~Chengzhi_Mao2;~Scott_Geng1;~Junfeng_Yang1;~Xin_Wang1;~Carl_Vondrick2", "gender": "M;;M;F;M", "homepage": "http://www.cs.columbia.edu/~mcz/;https://www.scottgeng.com/;https://www.cs.columbia.edu/~junfeng/;https://people.eecs.berkeley.edu/~xinw/;http://www.cs.columbia.edu/~vondrick/", "dblp": ";330/4056.html;71/3724.html;;26/8610", "google_scholar": "pTTEiHUAAAAJ;jCg1gRoAAAAJ;JJ9AvbAAAAAJ;e9gUdKwAAAAJ;3MzhkFIAAAAJ", "orcid": ";;0009-0000-2277-6545;;", "linkedin": ";;;xin-wang-aa83a577;", "or_profile": "~Chengzhi_Mao2;~Scott_Geng1;~Junfeng_Yang1;~Xin_Wang1;~Carl_Vondrick2", "aff": "Columbia University;Columbia University;, Columbia University;Microsoft;Columbia University", "aff_domain": "columbia.edu;columbia.edu;cs.columbia.edu;microsoft.com;columbia.edu", "position": "PhD student;Undergrad student;Full Professor;Senior Researcher;Assistant Professor", "bibtex": "@inproceedings{\nmao2023understanding,\ntitle={Understanding Zero-shot Adversarial Robustness for Large-Scale Models},\nauthor={Chengzhi Mao and Scott Geng and Junfeng Yang and Xin Wang and Carl Vondrick},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=P4bXCawRi5J}\n}", "github": "", "project": "", "reviewers": "J4qy;eUU6;8Zoa;Uucq", "pdf_size": 776407, "recommendation": "3;6;8;8", "confidence": "4;2;4;3", "correctness": "3;3;4;4", "technical_novelty": "2;1;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "74;70;106;232", "wc_strength_and_weaknesses": "294;136;128;81", "wc_clarity_quality_novelty_and_reproducibility": "36;26;124;35", "wc_summary_review": "47;50;54;36", "wc_review": "451;282;412;384", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "772;205;88;143", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 120.5, 65.86918854821273 ], "wc_strength_and_weaknesses_avg": [ 159.75, 80.30683345768279 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.25, 39.88342387508876 ], "wc_summary_review_avg": [ 46.75, 6.684870978560469 ], "wc_review_avg": [ 382.25, 62.57944950221278 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 302.0, 274.49316931391934 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.18417736717093933, "corr_recommendation_correctness": 0.8551861104941366, "gs_citation": 83, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5126691104432110904&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=P4bXCawRi5J", "email": "columbia.edu;columbia.edu;cs.columbia.edu;microsoft.com;columbia.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Columbia University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.columbia.edu;https://www.microsoft.com", "aff_unique_abbr": "Columbia;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Continuous-Discrete Convolution for Geometry-Sequence Modeling in Proteins", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12241", "id": "P5Z-Zl9XJ7", "poster": "/media/PosterPDFs/ICLR%202023/12241.png?t=1680929505.2999585", "openreview": "https://openreview.net/forum?id=P5Z-Zl9XJ7", "slides": "https://iclr.cc/virtual/2023/poster/12241", "video": "https://iclr.cc/virtual/2023/poster/12241", "author_site": "Hehe Fan, Zhangyang Wang, Yi Yang, Mohan Kankanhalli", "tldr": "This paper proposes a Continuous-Discrete Convolution (CDConv) for the (3+1)D geometry-sequence strutuere modeling in proteins.", "abstract": "The structure of proteins involves 3D geometry of amino acid coordinates and 1D sequence of peptide chains. The 3D structure exhibits irregularity because amino acids are distributed unevenly in Euclidean space and their coordinates are continuous variables. In contrast, the 1D structure is regular because amino acids are arranged uniformly in the chains and their sequential positions (orders) are discrete variables. Moreover, geometric coordinates and sequential orders are in two types of spaces and their units of length are incompatible. These inconsistencies make it challenging to capture the 3D and 1D structures while avoiding the impact of sequence and geometry modeling on each other. This paper proposes a Continuous-Discrete Convolution (CDConv) that uses irregular and regular approaches to model the geometry and sequence structures, respectively. Specifically, CDConv employs independent learnable weights for different regular sequential displacements but directly encodes geometric displacements due to their irregularity. In this way, CDConv significantly improves protein modeling by reducing the impact of geometric irregularity on sequence modeling. Extensive experiments on a range of tasks, including protein fold classification, enzyme reaction classification, gene ontology term prediction and enzyme commission number prediction, demonstrate the effectiveness of the proposed CDConv. ", "keywords": "Protein representation learning;3D geometry modeling;1D sequence modeling;continuous convolution;discrete convolution.", "primary_area": "", "supplementary_material": "", "author": "Hehe Fan;Zhangyang Wang;Yi Yang;Mohan Kankanhalli", "authorids": "~Hehe_Fan1;~Zhangyang_Wang1;~Yi_Yang22;~Mohan_Kankanhalli1", "gender": "M;M;M;M", "homepage": "https://hehefan.github.io;https://vita-group.github.io;https://person.zju.edu.cn/yiyang;https://www.comp.nus.edu.sg/~mohan", "dblp": "184/5722.html;119/4026;33/4854-1.html;09/3613.html", "google_scholar": "hVuflMQAAAAJ;pxFyKAIAAAAJ;RMSuNFwAAAAJ;6Lx_eowAAAAJ", "orcid": "0000-0001-9572-2345;;;0000-0002-4846-2015", "linkedin": ";;;mohan-kankanhalli-583417221", "or_profile": "~Hehe_Fan1;~Zhangyang_Wang1;~Yi_Yang22;~Mohan_Kankanhalli1", "aff": "National University of Singapore;University of Texas, Austin;Zhejiang University;National University of Singapore", "aff_domain": "nus.edu.sg;utexas.edu;zju.edu.cn;nus.edu.sg", "position": "Postdoc;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nfan2023continuousdiscrete,\ntitle={Continuous-Discrete Convolution for Geometry-Sequence Modeling in Proteins},\nauthor={Hehe Fan and Zhangyang Wang and Yi Yang and Mohan Kankanhalli},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=P5Z-Zl9XJ7}\n}", "github": "", "project": "", "reviewers": "JFLd;hLLm;qyBv;nEe2", "pdf_size": 2947587, "recommendation": "6;6;6;6", "confidence": "4;4;3;2", "correctness": "3;4;3;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "66;109;50;73", "wc_strength_and_weaknesses": "304;171;77;168", "wc_clarity_quality_novelty_and_reproducibility": "42;7;34;43", "wc_summary_review": "79;19;136;17", "wc_review": "491;306;297;301", "wc_reply_reviewers": "0;238;0;0", "wc_reply_authors": "960;1710;628;1045", "reply_reviewers": "0;2;0;0", "reply_authors": "2;4;1;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 74.5, 21.592822881689184 ], "wc_strength_and_weaknesses_avg": [ 180.0, 80.94751385929032 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.5, 14.568802284333465 ], "wc_summary_review_avg": [ 62.75, 49.08347481586853 ], "wc_review_avg": [ 348.75, 82.18994768218313 ], "wc_reply_reviewers_avg": [ 59.5, 103.05702305034819 ], "wc_reply_authors_avg": [ 1085.75, 392.65021011072946 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8110561702329380100&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=P5Z-Zl9XJ7", "email": "nus.edu.sg;utexas.edu;zju.edu.cn;nus.edu.sg", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "National University of Singapore;University of Texas at Austin;Zhejiang University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://www.utexas.edu;https://www.zju.edu.cn", "aff_unique_abbr": "NUS;UT Austin;ZJU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Singapore;United States;China" }, { "id": "P5ZTXA7zy6", "title": "When Neural ODEs meet Neural Operators", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Differential equation-based neural networks perform well in a variety of deep learning fields. Among those many methods, neural ordinary differential equations (NODEs) are one of the most fundamental work. NODEs have been applied to general downstream tasks such as image classification, time series classification, and image generation. The ODE function of NODEs can be understood as a special type of differential operators, which had been overlooked before. In this paper, therefore, we study the feasibility of modeling NODEs (or the ODE function of NODEs) as neural operators. Our neural operator-based methods are more rigorous than existing approaches when it comes to learning the differential operator (or the ODE function). To this end, we design a new neural operator structure called branched Fourier neural operator (BFNO), which is suitable for modeling the ODE function. It shows improved performance for several general machine learning tasks, as compared to existing various NODE models.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/86ca4db47e1f1a989847c07f1932fbe77ed2dec1.zip", "author": "Woojin Cho;Seunghyeon Cho;Hyundong Jin;Jinsung Jeon;Kookjin Lee;Sanghyun Hong;Dongeun Lee;Jonghyun Choi;Noseong Park", "authorids": "~Woojin_Cho1;~Seunghyeon_Cho1;~Hyundong_Jin1;~Jinsung_Jeon1;~Kookjin_Lee1;~Sanghyun_Hong1;~Dongeun_Lee1;~Jonghyun_Choi1;~Noseong_Park1", "gender": "M;;;;M;M;M;M;", "homepage": "https://woojin-cho.github.io/;;;https://sites.google.com/view/npark/home?authuser=0;https://scholar.google.com/citations?hl=en&user=KL89hVQAAAAJ&view_op=list_works;http://www.sanghyun-hong.com;;https://ppolon.github.io/;", "dblp": ";299/7779;;294/0098;122/5103;135/8991;62/688;21/11103;", "google_scholar": "cqIj5tQAAAAJ;93p6-YAAAAAJ;;0R6W6lsAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;;uiGWnm4AAAAJ;", "orcid": ";0000-0002-2582-751X;;0000-0002-9693-2739;;;;0000-0002-7934-8434;", "linkedin": "woojin-cho-02b905264/;;;jinsung-jeon-994942289/;;;;jonghyun-choi-459bb615/;", "or_profile": "~Woojin_Cho1;~Seunghyeon_Cho1;~Hyundong_Jin1;~Jinsung_Jeon1;~Kookjin_Lee1;~Sanghyun_Hong1;~Dongeun_Lee1;~Jonghyun_Choi1;~Noseong_Park1", "aff": "Yonsei University;Yonsei University;;Yonsei University;Arizona State University;Oregon State University;East Texas A&M University;Yonsei University;", "aff_domain": "yonsei.ac.kr;yonsei.ac.kr;;yonsei.ac.kr;asu.edu;oregonstate.edu;tamuc.edu;yonsei.ac.kr;", "position": "MS student;MS student;;PhD student;Assistant Professor;Assistant Professor;Associate Professor;Associate Professor;", "bibtex": "@misc{\ncho2023when,\ntitle={When Neural {ODE}s meet Neural Operators},\nauthor={Woojin Cho and Seunghyeon Cho and Hyundong Jin and Jinsung Jeon and Kookjin Lee and Sanghyun Hong and Dongeun Lee and Jonghyun Choi and Noseong Park},\nyear={2023},\nurl={https://openreview.net/forum?id=P5ZTXA7zy6}\n}", "github": "", "project": "", "reviewers": "1DD3;4xZK;E9AY;4r3Q", "site": "https://openreview.net/forum?id=P5ZTXA7zy6", "pdf_size": 4217946, "recommendation": "3;3;3;5", "confidence": "3;3;4;4", "correctness": "1;3;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;0;2;2", "wc_summary_paper": "54;48;36;80", "wc_strength_and_weaknesses": "216;112;205;88", "wc_clarity_quality_novelty_and_reproducibility": "19;5;54;6", "wc_summary_review": "31;20;98;33", "wc_review": "320;185;393;207", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 54.5, 16.08570794214541 ], "wc_strength_and_weaknesses_avg": [ 155.25, 56.03291443428586 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 21.0, 19.836834424877374 ], "wc_summary_review_avg": [ 45.5, 30.712375355872428 ], "wc_review_avg": [ 276.25, 84.65629037466738 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14420410472478891371&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2;3;0", "aff_unique_norm": "Yonsei University;Arizona State University;Oregon State University;East Texas A&M University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.yonsei.ac.kr;https://www.asu.edu;https://oregonstate.edu;https://www.etam.edu", "aff_unique_abbr": "Yonsei;ASU;OSU;ETAMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1;1;0", "aff_country_unique": "South Korea;United States" }, { "id": "P63GxgD7LIl", "title": "TransFool: An Adversarial Attack against Neural Machine Translation Models", "track": "main", "status": "Reject", "tldr": "We propose TransFool to build adversarial attacks against neural machine translation systems, which are fluent sentences and semantically similar to the original sentence, but highly degrade the translation quality. ", "abstract": "Deep neural networks have been shown to be vulnerable to small perturbations of their inputs known as adversarial attacks. In this paper, we consider the particular task of Neural Machine Translation (NMT), where security is often critical. We investigate the vulnerability of NMT models to adversarial attacks and propose a new attack algorithm called TransFool. It builds on a multi-term optimization problem and a gradient projection step to compute adversarial examples that fool NMT models. By integrating the embedding representation of a language model in the proposed attack, we generate fluent adversarial examples in the source language that maintain a high level of semantic similarity with the clean samples and render the attack largely undetectable. Experimental results demonstrate that, for multiple translation tasks and different NMT architectures, our white-box attack can severely degrade the translation quality for more than 60% of the sentences while the semantic similarity between the original sentence and the adversarial example stays very high. Moreover, we show that the proposed attack is transferable to unknown target models and can fool those quite easily. Finally, our method leads to improvement in terms of success rate, semantic similarity, and fluency compared to the existing attack strategies both in white-box and black-box settings. Hence, TransFool permits to better characterize the vulnerability of NMT systems and outlines the necessity to design strong defense mechanisms and more robust NMT systems for real-life applications.", "keywords": "Adversarial attack;deep neural network;language model;natural language processing;neural machine translation;robstness.", "primary_area": "", "supplementary_material": "", "author": "Sahar Sadrizadeh;Pascal Frossard;Ljiljana Dolamic", "authorids": "~Sahar_Sadrizadeh1;~Pascal_Frossard1;~Ljiljana_Dolamic1", "gender": "F;;F", "homepage": ";;", "dblp": "203/8867;;82/5397.html", "google_scholar": ";;", "orcid": ";;0000-0002-0656-5315", "linkedin": "sahar-sadrizadeh/;;", "or_profile": "~Sahar_Sadrizadeh1;~Pascal_Frossard1;~Ljiljana_Dolamic1", "aff": "EPFL - EPF Lausanne;;armasuisse", "aff_domain": "epfl.ch;;ar.admin.ch", "position": "Postdoc;;Researcher", "bibtex": "@misc{\nsadrizadeh2023transfool,\ntitle={TransFool: An Adversarial Attack against Neural Machine Translation Models},\nauthor={Sahar Sadrizadeh and Pascal Frossard and Ljiljana Dolamic},\nyear={2023},\nurl={https://openreview.net/forum?id=P63GxgD7LIl}\n}", "github": "", "project": "", "reviewers": "8Cx6;6HVS;ZrSf;Jg82", "site": "https://openreview.net/forum?id=P63GxgD7LIl", "pdf_size": 2069662, "recommendation": "3;5;6;6", "confidence": "5;4;4;4", "correctness": "3;3;4;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "151;45;129;63", "wc_strength_and_weaknesses": "411;171;128;129", "wc_clarity_quality_novelty_and_reproducibility": "69;11;67;63", "wc_summary_review": "41;39;66;31", "wc_review": "672;266;390;286", "wc_reply_reviewers": "0;34;0;0", "wc_reply_authors": "1549;761;604;732", "reply_reviewers": "0;1;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 97.0, 44.15880433163923 ], "wc_strength_and_weaknesses_avg": [ 209.75, 117.480583502126 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 52.5, 24.057223447438815 ], "wc_summary_review_avg": [ 44.25, 13.102957681378658 ], "wc_review_avg": [ 403.5, 162.0084874319861 ], "wc_reply_reviewers_avg": [ 8.5, 14.722431864335457 ], "wc_reply_authors_avg": [ 911.5, 372.7710423302754 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9428090415820632, "corr_recommendation_correctness": 0.4714045207910316, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11290590471768658923&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "EPFL;armasuisse", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.armasuisse.ch", "aff_unique_abbr": "EPFL;armasuisse", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "id": "P7h7UT9uDzb", "title": "CBP-QSNN: Spiking Neural Networks Quantized Using Constrained Backpropagation", "track": "main", "status": "Reject", "tldr": "We propose a method to quantize FP32 weights in spiking neural networks using constrained backpropagation.", "abstract": "Spiking Neural Networks (SNNs) support sparse event-based data processing at high power efficiency when implemented in event-based neuromorphic processors. However, the limited on-chip memory capacity of neuromorphic processors strictly delimits the depth and width of SNNs implemented. A direct solution is the use of quantized SNNs (QSNNs) in place of SNNs with FP32 weights. To this end, we propose a method to quantize the weights using constrained backpropagation (CBP) with the Lagrangian function (conventional loss function plus well-defined weight-constraint functions) as an objective function. This work utilizes CBP as a post-training algorithm for deep SNNs pre-trained using various state-of-the-art methods including direct training (TSSL-BP, STBP, and surrogate gradient) and DNN-to-SNN conversion (SNN-Calibration), validating CBP as a general framework for QSNNs. CBP-QSNNs highlight their high accuracy insomuch as the degradation of accuracy on CIFAR-10, DVS128 Gesture, and CIFAR10-DVS in the worst case is less than 1\\%. Particularly, CBP-QSNNs for SNN-Calibration-pretrained SNNs on CIFAR-100 highlight an unexpected large increase in accuracy by 3.72\\% while using small weight-memory (3.5\\% of the FP32 case).", "keywords": "Quantized spiking neural network;Constrained backpropagation;Binary weight;Lagrange multiplier method;Weight constraint", "primary_area": "", "supplementary_material": "/attachment/73049b977d7dfd5683282c935e67e5f0780640f9.zip", "author": "Donghyung Yoo;Doo Seok Jeong", "authorids": "~Donghyung_Yoo1;~Doo_Seok_Jeong1", "gender": ";M", "homepage": ";https://sites.google.com/site/dsjeonglab1/", "dblp": ";190/7424", "google_scholar": ";https://scholar.google.co.kr/citations?user=RgGVhWwAAAAJ", "orcid": ";0000-0001-7954-2213", "linkedin": "yoo-dong-hyung-b03735170/;", "or_profile": "~Donghyung_Yoo1;~Doo_Seok_Jeong1", "aff": "Hanyang University;Hanyang University", "aff_domain": "hanyang.ac.kr;hanyang.ac.kr", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nyoo2023cbpqsnn,\ntitle={{CBP}-{QSNN}: Spiking Neural Networks Quantized Using Constrained Backpropagation},\nauthor={Donghyung Yoo and Doo Seok Jeong},\nyear={2023},\nurl={https://openreview.net/forum?id=P7h7UT9uDzb}\n}", "github": "", "project": "", "reviewers": "6mKQ;k52R;4gmV", "site": "https://openreview.net/forum?id=P7h7UT9uDzb", "pdf_size": 5375150, "recommendation": "3;3;3", "confidence": "2;4;4", "correctness": "3;3;3", "technical_novelty": "3;2;1", "empirical_novelty": "4;3;2", "wc_summary_paper": "60;51;160", "wc_strength_and_weaknesses": "56;330;350", "wc_clarity_quality_novelty_and_reproducibility": "53;1;126", "wc_summary_review": "78;2;113", "wc_review": "247;384;749", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 90.33333333333333, 49.398605468395786 ], "wc_strength_and_weaknesses_avg": [ 245.33333333333334, 134.12763407375164 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.0, 51.27052434554055 ], "wc_summary_review_avg": [ 64.33333333333333, 46.33453235858639 ], "wc_review_avg": [ 460.0, 211.86945666298072 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16446119008981589333&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Hanyang University", "aff_unique_dep": "", "aff_unique_url": "https://www.hanyang.ac.kr", "aff_unique_abbr": "HYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "P880C39xAvM", "title": "Turning a Curse Into a Blessing: Enabling Data-Free Backdoor Unlearning via Stabilized Model Inversion", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Effectiveness of many existing backdoor removal techniques crucially rely on access to clean in-distribution data. However, as model is often trained on sensitive or proprietary datasets, it might not be practical to assume the availability of in-distribution samples. To address this problem, we propose a novel approach to reconstruct samples from a backdoored model and then use the reconstructed samples as a proxy for clean in-distribution data needed by the defenses.\nWe observe an interesting phenomenon that ensuring perceptual similarity between the synthesized samples and the clean training data is \\emph{not} adequate to enable effective defenses. We show that the model predictions at such synthesized samples can be unstable to small input perturbations, which misleads downstream backdoor removal techniques to remove these perturbations instead of underlying backdoor triggers. Moreover, unlike clean samples, the predictions at the synthesized samples can also be unstable to small model parameter changes. To tackle these issues, we design an optimization-based data reconstruction technique that ensures visual quality while promoting the stability to perturbations in both data and parameter space. We also observe that while reconstructed from a backdoored model, the synthesized samples do not contain backdoors, and further provide a theoretical analysis that sheds light on this observation. Our evaluation shows that our data synthesis technique can lead to state-of-the-art backdoor removal performance without clean in-distribution data access and the performance is on par with or sometimes even better than using the same amount of clean samples.", "keywords": "Backdoor Defenses", "primary_area": "", "supplementary_material": "", "author": "Si Chen;Yi Zeng;Won Park;Tianhao Wang;Xun Chen;Lingjuan Lyu;Zhuoqing Mao;Ruoxi Jia", "authorids": "~Si_Chen5;~Yi_Zeng3;~Won_Park1;~Tianhao_Wang2;~Xun_Chen1;~Lingjuan_Lyu1;~Zhuoqing_Mao1;~Ruoxi_Jia1", "gender": ";M;M;M;;F;F;", "homepage": ";https://yizeng623.github.io/;https://wonpark.io/;https://tianhaowang.netlify.app/;;https://sites.google.com/view/lingjuan-lyu;https://web.eecs.umich.edu/~zmao/;https://ruoxijia.info/", "dblp": ";75/148;;274/2144;;178/9876;;147/5355-1", "google_scholar": ";slUNmHQAAAAJ;;nvQOtgkAAAAJ;;;Ba_Ci9UAAAAJ;JCrug-YAAAAJ", "orcid": ";0000-0002-6901-9194;;;;;;", "linkedin": ";chnyizeng/;;tian-hao-wang/;;;;", "or_profile": "~Si_Chen5;~Yi_Zeng3;~Won_Park1;~Tianhao_Wang2;~Xun_Chen1;~Lingjuan_Lyu1;~Zhuoqing_Mao1;~Ruoxi_Jia1", "aff": ";Virginia Tech;University of Michigan;Princeton University;;Sony;University of Michigan;Virginia Tech", "aff_domain": ";vt.edu;umich.edu;princeton.edu;;sony.com;umich.edu;vt.edu", "position": ";PhD student;PhD student;PhD student;;scientist;Professor;Assistant Professor", "bibtex": "@misc{\nchen2023turning,\ntitle={Turning a Curse Into a Blessing: Enabling Data-Free Backdoor Unlearning via Stabilized Model Inversion},\nauthor={Si Chen and Yi Zeng and Won Park and Tianhao Wang and Xun Chen and Lingjuan Lyu and Zhuoqing Mao and Ruoxi Jia},\nyear={2023},\nurl={https://openreview.net/forum?id=P880C39xAvM}\n}", "github": "", "project": "", "reviewers": "jUM2;g6UR;kg8m;sqg3", "site": "https://openreview.net/forum?id=P880C39xAvM", "pdf_size": 7272268, "recommendation": "3;5;5;5", "confidence": "4;3;3;4", "correctness": "2;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "100;70;46;111", "wc_strength_and_weaknesses": "61;270;322;308", "wc_clarity_quality_novelty_and_reproducibility": "616;164;16;37", "wc_summary_review": "23;56;47;57", "wc_review": "800;560;431;513", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 81.75, 25.518375732009275 ], "wc_strength_and_weaknesses_avg": [ 240.25, 105.22446246001925 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 208.25, 242.1284524792574 ], "wc_summary_review_avg": [ 45.75, 13.699908758820257 ], "wc_review_avg": [ 576.0, 137.31897174097978 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BaFT_PO_lboJ:scholar.google.com/&scioq=Turning+a+Curse+Into+a+Blessing:+Enabling+Data-Free+Backdoor+Unlearning+via+Stabilized+Model+Inversion&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;1;0", "aff_unique_norm": "Virginia Tech;University of Michigan;Princeton University;Sony Corporation", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.vt.edu;https://www.umich.edu;https://www.princeton.edu;https://www.sony.com", "aff_unique_abbr": "VT;UM;Princeton;Sony", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;Japan" }, { "id": "P8DHF1Y_dph", "title": "Learning to Generate All Feasible Actions", "track": "main", "status": "Reject", "tldr": "We propose to train a generative neural network to generate all feasible actions within an interactive environment.", "abstract": "Several machine learning (ML) applications are characterized by searching for an optimal solution to a complex task. The search space for this optimal solution is often very large, so large in fact that this optimal solution is often not computable. Part of the problem is that many candidate solutions found via ML are actually infeasible and have to be discarded. Restricting the search space to only the feasible solution candidates simplifies finding an optimal solution for the tasks. Further, the set of feasible solutions could be re-used in multiple problems characterized by different tasks. In particular, we observe that complex tasks can be decomposed into subtasks and corresponding skills. We propose to learn a reusable and transferable skill by training an actor to generate all feasible actions. The trained actor can then propose feasible actions, among which an optimal one can be chosen according to a specific task. The actor is trained by interpreting the feasibility of each action as a target distribution. The training procedure minimizes a divergence of the actor's output distribution to this target. We derive the general optimization target for arbitrary f-divergences using a combination of kernel density estimates, resampling, and importance sampling. We further utilize an auxiliary critic to reduce the interactions with the environment. A preliminary comparison to related strategies shows that our approach learns to visit all the modes in the feasible action space, demonstrating the framework's potential for generating multimodal action distributions.", "keywords": "Generative neural networks;feasible actions;f-divergence", "primary_area": "", "supplementary_material": "", "author": "Mirco Theile;Daniele Bernardini;Raphael Trumpp;Cristina Piazza;Marco Caccamo;Alberto Sangiovanni-Vincentelli", "authorids": "~Mirco_Theile1;~Daniele_Bernardini1;~Raphael_Trumpp1;~Cristina_Piazza1;~Marco_Caccamo2;~Alberto_Sangiovanni-Vincentelli1", "gender": ";M;;F;;M", "homepage": "https://theilem.gitlab.io/;https://rtsl.cps.mw.tum.de/view_member?id=20;;https://www.in.tum.de/nhcr/home/;https://rtsl.cps.mw.tum.de/personal_page/mcaccamo/;https://www2.eecs.berkeley.edu/Faculty/Homepages/sangiovanni-vicentelli.html", "dblp": "233/1962;36/6589-2;;136/9528;86/450;s/ALSangiovanniV", "google_scholar": "88rL5TUAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.it/citations?user=H6Q0F6sAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-1574-8858;0000-0002-9416-9557;;0000-0002-0358-8677;;0000-0003-1298-8389", "linkedin": "mirco-theile;dbernardini/;;;;alberto-sangiovanni-vincentelli-5684a/?msgControlName=reply_to_sender&msgConversationId=2-NzRkNDBlYTEtMWFlOC01OGE1LTg4MTYtOWU5YjkwMjVhNWQwXzAwMA%3D%3D&msgOverlay=true", "or_profile": "~Mirco_Theile1;~Daniele_Bernardini1;~Raphael_Trumpp1;~Cristina_Piazza1;~Marco_Caccamo2;~Alberto_Sangiovanni-Vincentelli1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;;Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;University of California, Berkeley", "aff_domain": "tum.de;tum.de;;tum.de;tum.de;berkeley.edu", "position": "PhD student;Researcher;;Assistant Professor;Full Professor;Full Professor", "bibtex": "@misc{\ntheile2023learning,\ntitle={Learning to Generate All Feasible Actions},\nauthor={Mirco Theile and Daniele Bernardini and Raphael Trumpp and Cristina Piazza and Marco Caccamo and Alberto Sangiovanni-Vincentelli},\nyear={2023},\nurl={https://openreview.net/forum?id=P8DHF1Y_dph}\n}", "github": "", "project": "", "reviewers": "fUjg;bb3F;JUVt;RMrn", "site": "https://openreview.net/forum?id=P8DHF1Y_dph", "pdf_size": 1877991, "recommendation": "3;5;6;8", "confidence": "4;2;3;3", "correctness": "4;3;3;3", "technical_novelty": "4;3;3;4", "empirical_novelty": "3;2;3;4", "wc_summary_paper": "87;38;83;52", "wc_strength_and_weaknesses": "393;90;105;72", "wc_clarity_quality_novelty_and_reproducibility": "37;84;12;146", "wc_summary_review": "34;27;23;28", "wc_review": "551;239;223;298", "wc_reply_reviewers": "106;39;0;0", "wc_reply_authors": "512;240;91;202", "reply_reviewers": "1;1;0;0", "reply_authors": "1;2;1;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 65.0, 20.65187642806338 ], "wc_strength_and_weaknesses_avg": [ 165.0, 132.15332004910056 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 69.75, 51.050832510351874 ], "wc_summary_review_avg": [ 28.0, 3.9370039370059056 ], "wc_review_avg": [ 327.75, 131.88512994268913 ], "wc_reply_reviewers_avg": [ 36.25, 43.30343519860751 ], "wc_reply_authors_avg": [ 261.25, 154.77624979304804 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.39223227027636803, "corr_recommendation_correctness": -0.8006407690254357, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6469774514494332997&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.tum.de;https://www.berkeley.edu", "aff_unique_abbr": "TUM;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Germany;United States" }, { "title": "MLPInit: Embarrassingly Simple GNN Training Acceleration with MLP Initialization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12082", "id": "P8YIphWNEGO", "poster": "/media/PosterPDFs/ICLR%202023/12082.png?t=1682583013.5500295", "openreview": "https://openreview.net/forum?id=P8YIphWNEGO", "slides": "https://iclr.cc/virtual/2023/poster/12082", "video": "https://iclr.cc/virtual/2023/poster/12082", "author_site": "Xiaotian Han, Tong Zhao, Yozen Liu, Xia Hu, Neil Shah", "tldr": "we propose an embarrassingly simple, yet hugely effective initialization for GNN training acceleration by initializing GNN with full trained MLP.", "abstract": "Training graph neural networks (GNNs) on large graphs is complex and extremely time consuming. This is attributed to overheads caused by sparse matrix multiplication, which are sidestepped when training multi-layer perceptrons (MLPs) with only node features. MLPs, by ignoring graph context, are simple and faster for graph data, however they usually sacrifice prediction accuracy, limiting their applications for graph data. We observe that for most message passing-based GNNs, we can trivially derive an analog MLP (we call this a PeerMLP) with an equivalent weight space, by setting the trainable parameters with the same shapes, making us curious about how do GNNs using weights from a fully trained PeerMLP perform? Surprisingly, we find that GNNs initialized with such weights significantly outperform their PeerMLPs, motivating us to use PeerMLP training as a precursor, initialization step to GNN training. To this end, we propose an embarrassingly simple, yet hugely effective initialization method for GNN training acceleration, called \\mlpinit. Our extensive experiments on multiple large-scale graph datasets with diverse GNN architectures validate that MLPInit can accelerate the training of GNNs (up to 33\u00d7 speedup on OGB-Products) and often improve prediction performance (e.g., up to $7.97\\%$ improvement for GraphSAGE across $7$ datasets for node classification, and up to $17.81\\%$ improvement across $4$ datasets for link prediction on metric Hits@10). The code is available at https://github.com/snap-research/MLPInit-for-GNNs.", "keywords": "Graph Neural Network;Large-scale Graph;Accleration", "primary_area": "", "supplementary_material": "/attachment/5bb1f8c9b9c24346fa5ac68fe79410102a20b33b.zip", "author": "Xiaotian Han;Tong Zhao;Yozen Liu;Xia Hu;Neil Shah", "authorids": "~Xiaotian_Han1;~Tong_Zhao3;~Yozen_Liu1;~Xia_Hu4;~Neil_Shah2", "gender": "M;M;;M;M", "homepage": "https://ahxt.github.io/;https://tzhao.io/;https://www.linkedin.com/in/yozen-liu-531a67130/;http://nshah.net;https://cs.rice.edu/~xh37/index.html", "dblp": ";94/6503-3;242/8056.html;71/7771;256/9406.html", "google_scholar": "Uromx98AAAAJ;05cRc-MAAAAJ;i3U2JjEAAAAJ;Qut69OgAAAAJ;https://scholar.google.com.tw/citations?user=pcCS60IAAAAJ", "orcid": ";0000-0001-7660-1732;;0000-0003-3261-8430;", "linkedin": ";;;;", "or_profile": "~Xiaotian_Han1;~Tong_Zhao3;~Yozen_Liu1;~Neil_Shah2;~Xia_Hu2", "aff": "Texas A&M University;Snap Inc.;Snap Inc.;Snap Inc.;Rice University", "aff_domain": "tamu.edu;snap.com;snapchat.com;snap.com;rice.edu", "position": "PhD student;Researcher;Researcher;Research Scientist;Associate Professor", "bibtex": "@inproceedings{\nhan2023mlpinit,\ntitle={{MLPI}nit: Embarrassingly Simple {GNN} Training Acceleration with {MLP} Initialization},\nauthor={Xiaotian Han and Tong Zhao and Yozen Liu and Xia Hu and Neil Shah},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=P8YIphWNEGO}\n}", "github": "", "project": "", "reviewers": "uoW4;gkVw;hojB;pot2", "pdf_size": 2699740, "recommendation": "5;5;8;8", "confidence": "4;5;4;4", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "3;2;4;4", "wc_summary_paper": "24;74;96;58", "wc_strength_and_weaknesses": "104;702;296;704", "wc_clarity_quality_novelty_and_reproducibility": "5;80;8;35", "wc_summary_review": "49;102;67;124", "wc_review": "182;958;467;921", "wc_reply_reviewers": "494;0;34;284", "wc_reply_authors": "3982;2549;938;2417", "reply_reviewers": "4;0;1;3", "reply_authors": "8;5;4;7", "recommendation_avg": [ 6.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 63.0, 26.248809496813376 ], "wc_strength_and_weaknesses_avg": [ 451.5, 260.5009596911305 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.0, 30.074906483645133 ], "wc_summary_review_avg": [ 85.5, 29.278831943914703 ], "wc_review_avg": [ 632.0, 323.85258992325504 ], "wc_reply_reviewers_avg": [ 203.0, 200.6315030098713 ], "wc_reply_authors_avg": [ 2471.5, 1077.2893065467604 ], "reply_reviewers_avg": [ 2.0, 1.5811388300841898 ], "reply_authors_avg": [ 6.0, 1.5811388300841898 ], "replies_avg": [ 38, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5773502691896258, "corr_recommendation_correctness": 0.5773502691896258, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7042710863448834080&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=P8YIphWNEGO", "email": "tamu.edu;snap.com;snapchat.com;snap.com;rice.edu", "author_num": 5, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "Texas A&M University;Snap Inc.;Rice University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tamu.edu;https://www.snapinc.com;https://www.rice.edu", "aff_unique_abbr": "TAMU;Snap;Rice", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "P9yXPbfqbvC", "title": "Noise Transforms Feed-Forward Networks into Sparse Coding Networks", "track": "main", "status": "Reject", "tldr": "We find that noise alone induces networks to become Top-K, sparse coding networks. This resolves a difference between biological and artificial neural networks with regards to how sparse they are and how this sparsity is implemented.. ", "abstract": "A hallmark of biological neural networks, which distinguishes them from their artificial counterparts, is the high degree of sparsity in their activations. Here, we show that by simply injecting symmetric, random, noise during training in reconstruction or classification tasks, artificial neural networks with ReLU activation functions eliminate this difference; the neurons converge to a sparse coding solution where only a small fraction are active for any input. The resulting network learns receptive fields like those of primary visual cortex and remains sparse even when noise is removed in later stages of learning.", "keywords": "Sparse Coding;Sparsity;Top-K Activation;Noise;Biologically Inspired", "primary_area": "", "supplementary_material": "", "author": "Trenton Bricken;Bruno Olshausen;Gabriel Kreiman", "authorids": "~Trenton_Bricken1;~Bruno_Olshausen1;~Gabriel_Kreiman1", "gender": ";M;M", "homepage": "https://trentbrick.github.io/;http://redwood.berkeley.edu/bruno/;http://klab.tch.harvard.edu", "dblp": ";30/3869;12/1367", "google_scholar": "CP6aLusAAAAJ;4aqK_74AAAAJ;WxZ_6nsAAAAJ", "orcid": ";;0000-0003-3505-8475", "linkedin": ";;kreiman/", "or_profile": "~Trenton_Bricken1;~Bruno_Olshausen1;~Gabriel_Kreiman1", "aff": "Harvard University;UC Berkeley;Harvard Medical School", "aff_domain": "harvard.edu;;harvard.edu", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nbricken2023noise,\ntitle={Noise Transforms Feed-Forward Networks into Sparse Coding Networks},\nauthor={Trenton Bricken and Bruno Olshausen and Gabriel Kreiman},\nyear={2023},\nurl={https://openreview.net/forum?id=P9yXPbfqbvC}\n}", "github": "", "project": "", "reviewers": "AnUn;wvBd;39gB;6F1u", "site": "https://openreview.net/forum?id=P9yXPbfqbvC", "pdf_size": 25961958, "recommendation": "3;3;3;3", "confidence": "4;4;2;4", "correctness": "3;4;3;3", "technical_novelty": "1;2;2;1", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "163;129;34;121", "wc_strength_and_weaknesses": "298;151;106;107", "wc_clarity_quality_novelty_and_reproducibility": "141;74;69;49", "wc_summary_review": "133;43;30;33", "wc_review": "735;397;239;310", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "887;304;314;226", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 1.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 111.75, 47.57822506147114 ], "wc_strength_and_weaknesses_avg": [ 165.5, 78.62728534039567 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.25, 34.629286738250904 ], "wc_summary_review_avg": [ 59.75, 42.56392251661024 ], "wc_review_avg": [ 420.25, 190.14123040519118 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 432.75, 264.4649078800437 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5HGWPo35whwJ:scholar.google.com/&scioq=Noise+Transforms+Feed-Forward+Networks+into+Sparse+Coding+Networks&hl=en&as_sdt=0,28", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Harvard University;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.berkeley.edu", "aff_unique_abbr": "Harvard;UC Berkeley", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Berkeley;Boston", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "PAKkOriJBd", "title": "Coordination Scheme Probing for Generalizable Multi-Agent Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "A few-shot MARL approach for improving coordination ability with diverse teammates", "abstract": "Coordinating with previously unknown teammates without joint learning is a crucial need for real-world multi-agent applications, such as human-AI interaction. An active research topic on this problem is ad hoc teamwork, which improves agents' coordination ability in zero-shot settings. However, previous works can only solve the problem of a single agent's coordination with different teams, which is not in line with arbitrary group-to-group coordination in complex multi-agent scenarios. Moreover, they commonly suffer from limited adaptation ability within an episode in a zero-shot setting. To address these problems, we introduce the Coordination Scheme Probing (CSP) approach that applies a disentangled scheme probing module to represent and classify the newly arrived teammates beforehand with limited pre-collected episodic data and makes multi-agent control accordingly. To achieve generalization, CSP learns a meta-policy with multiple sub-policies that follow distinguished coordination schemes in an end-to-end fashion and automatically reuses it to coordinate with unseen teammates. Empirically, we show that the proposed method achieves remarkable performance compared to existing ad hoc teamwork and policy generalization methods in various multi-agent cooperative scenarios.", "keywords": "reinforcement learning;multi-agent reinforcement learning;agent modeling", "primary_area": "", "supplementary_material": "/attachment/aca6f5bfb6d1ae18430fb698c51207fbd73bc0bd.zip", "author": "Hao Ding;Chengxing Jia;Cong Guan;Feng Chen;Lei Yuan;Zongzhang Zhang;Yang Yu", "authorids": "~Hao_Ding5;~Chengxing_Jia1;~Cong_Guan1;~Feng_Chen12;~Lei_Yuan2;~Zongzhang_Zhang1;~Yang_Yu5", "gender": "Non-Binary;M;M;M;M;M;M", "homepage": ";http://www.lamda.nju.edu.cn/jiacx/;http://www.lamda.nju.edu.cn/guanc/;;http://www.lamda.nju.edu.cn/yuanl/;http://www.lamda.nju.edu.cn/zhangzz;http://www.lamda.nju.edu.cn/yuy", "dblp": ";;191/7206;21/3047-42;23/6750-1;90/8724;46/2181-1", "google_scholar": ";;;QgorT8QAAAAJ;https://scholar.google.com/citations?hl=zh-CN;sG7WEAgAAAAJ;PG2lDSwAAAAJ", "orcid": "0009-0001-4907-0304;;;;;;", "linkedin": "https://www.linkedin.cn/injobs/in/%E8%B1%AA-%E4%B8%81-83b107234;;;;;;", "or_profile": "~Hao_Ding5;~Chengxing_Jia1;~Cong_Guan1;~Feng_Chen12;~Lei_Yuan2;~Zongzhang_Zhang1;~Yang_Yu2", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;lamda.nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "MS student;PhD student;PhD student;MS student;PhD student;Associate Professor;Professor", "bibtex": "@misc{\nding2023coordination,\ntitle={Coordination Scheme Probing for Generalizable Multi-Agent Reinforcement Learning},\nauthor={Hao Ding and Chengxing Jia and Cong Guan and Feng Chen and Lei Yuan and Zongzhang Zhang and Yang Yu},\nyear={2023},\nurl={https://openreview.net/forum?id=PAKkOriJBd}\n}", "github": "", "project": "", "reviewers": "yqCj;td12;27TM;d7jH", "site": "https://openreview.net/forum?id=PAKkOriJBd", "pdf_size": 10467395, "recommendation": "3;5;6;8", "confidence": "4;3;3;4", "correctness": "2;3;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "118;81;151;108", "wc_strength_and_weaknesses": "376;822;312;183", "wc_clarity_quality_novelty_and_reproducibility": "94;60;83;64", "wc_summary_review": "40;60;28;19", "wc_review": "628;1023;574;374", "wc_reply_reviewers": "0;334;0;67", "wc_reply_authors": "2288;2149;1180;507", "reply_reviewers": "0;1;0;1", "reply_authors": "5;5;3;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 114.5, 25.04495957273639 ], "wc_strength_and_weaknesses_avg": [ 423.25, 240.4842770328239 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 75.25, 13.88119231190174 ], "wc_summary_review_avg": [ 36.75, 15.35211711784404 ], "wc_review_avg": [ 649.75, 235.35332481186663 ], "wc_reply_reviewers_avg": [ 100.25, 137.6996278135856 ], "wc_reply_authors_avg": [ 1531.0, 729.1690476151604 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.5, 1.6583123951777 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9805806756909202, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12947911628625399726&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "PArJcOptzg", "title": "DeepGuiser: Learning to Disguise Neural Architectures for Impeding Adversarial Transfer Attacks", "track": "main", "status": "Reject", "tldr": "DeepGuiser is an automatic, hardware-agnostic, and retrain-free neural architecture disguising method to disguise the neural architectures, to resist possible adversarial attacks rendered by the model extraction attacks. ", "abstract": "Security is becoming increasingly critical in deep learning applications. Recent researches demonstrate that NN models are vulnerable to adversarial attacks, which can mislead them with only small input perturbations. Moreover, adversaries who know the architecture of victim models can conduct more effective attacks. Unfortunately, the architectural knowledge can usually be stolen by the adversaries by exploiting the system-level hints through many side channels, which is referred to as the neural architecture extraction attack. Conventional countermeasures for neural architecture extraction can introduce large overhead, and different hardware platforms have diverse types of side-channel leakages such that many expert efforts are needed in developing hardware-specific countermeasures. In this paper, we propose DeepGuiser, an automatic, hardware-agnostic, and retrain-free neural architecture disguising method, to disguise the neural architectures to reduce the harm of neural architecture extraction attacks. In a nutshell, given a trained model, DeepGuiser outputs a deploy model that is functionally equivalent with the trained model but with a different (i.e., disguising) architecture. DeepGuiser can minimize the harm of the follow-up adversarial transfer attacks to the deploy model, even if the disguising architecture is completely stolen by the architecture extraction attack. Experiments demonstrate that DeepGuiser can effectively disguise diverse architectures and impede the adversarial transferability by 13.87% \u223c 32.59%, while only introducing 10% \u223c 40% extra inference latency.", "keywords": "Neural architecture extraction attack;neural architecture disguising;adversarial robustness;transferability predictor;policy learning", "primary_area": "", "supplementary_material": "/attachment/2b5d7a0f194809b2765f5a7bd38593882c16f30b.zip", "author": "Yi Cai;Chenyu Wang;Xuefei Ning;Zixuan Zhou;Dimin Niu;Huazhong Yang;Yu Wang", "authorids": "~Yi_Cai4;~Chenyu_Wang3;~Xuefei_Ning1;~Zixuan_Zhou2;dimin.niu@alibaba-inc.com;~Huazhong_Yang1;~Yu_Wang3", "gender": "M;M;Not Specified;M;;;M", "homepage": "https://nicsefc.ee.tsinghua.edu.cn/people/cai-y/;;https://nics-effalg.com/ningxuefei/;;;;https://nicsefc.ee.tsinghua.edu.cn", "dblp": ";;202/9525;;;;w/YuWang2.html", "google_scholar": ";QI96hfoAAAAJ;oVslpJsAAAAJ;iIrXDM8AAAAJ;;;https://scholar.google.com.hk/citations?user=j8JGVvoAAAAJ", "orcid": ";;;;;;0000-0001-6108-5157", "linkedin": ";;;;;;", "or_profile": "~Yi_Cai4;~Chenyu_Wang3;~Xuefei_Ning1;~Zixuan_Zhou2;dimin.niu@alibaba-inc.com;~Huazhong_Yang1;~Yu_Wang3", "aff": ";Dept of EE, Tsinghua University;Huawei Technologies Ltd.;Tsinghua University;;;Tsinghua University", "aff_domain": ";mails.tsinghua.edu.cn;huawei.com;tsinghua.edu.cn;;;tsinghua.edu.cn", "position": ";Undergrad student;Postdoc;MS student;;;Full Professor", "bibtex": "@misc{\ncai2023deepguiser,\ntitle={DeepGuiser: Learning to Disguise Neural Architectures for Impeding Adversarial Transfer Attacks},\nauthor={Yi Cai and Chenyu Wang and Xuefei Ning and Zixuan Zhou and Dimin Niu and Huazhong Yang and Yu Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=PArJcOptzg}\n}", "github": "", "project": "", "reviewers": "Tj1c;moEm;PPSy;DYHG", "site": "https://openreview.net/forum?id=PArJcOptzg", "pdf_size": 1674652, "recommendation": "3;3;5;6", "confidence": "5;3;4;4", "correctness": "2;2;3;3", "technical_novelty": "3;3;4;3", "empirical_novelty": "3;2;4;3", "wc_summary_paper": "112;72;64;66", "wc_strength_and_weaknesses": "269;371;245;52", "wc_clarity_quality_novelty_and_reproducibility": "21;102;4;20", "wc_summary_review": "18;99;21;168", "wc_review": "420;644;334;306", "wc_reply_reviewers": "55;57;140;0", "wc_reply_authors": "312;490;431;615", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 78.5, 19.56399754651385 ], "wc_strength_and_weaknesses_avg": [ 234.25, 115.3676189404982 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.75, 38.27123593509883 ], "wc_summary_review_avg": [ 76.5, 62.010079825783166 ], "wc_review_avg": [ 426.0, 132.68760303811354 ], "wc_reply_reviewers_avg": [ 63.0, 49.994999749974994 ], "wc_reply_authors_avg": [ 462.0, 109.14898075566258 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9622504486493761, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11876877158948221325&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Tsinghua University;Huawei", "aff_unique_dep": "Dept of EE;Huawei Technologies", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.huawei.com", "aff_unique_abbr": "THU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "FIT: A Metric for Model Sensitivity", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11056", "id": "PDG4-Y3aboN", "poster": "", "openreview": "https://openreview.net/forum?id=PDG4-Y3aboN", "slides": "https://iclr.cc/virtual/2023/poster/11056", "video": "https://iclr.cc/virtual/2023/poster/11056", "author_site": "Ben Zandonati, Adrian Pol, Maurizio Pierini, Olya Sirkin, Tal Kopetz", "tldr": "We propose the Fisher Information Trace (FIT) metric, to quantify the effects of mixed-precision quantization. FIT facilitates zero-shot performance prediction of quantized models, and is fast to compute.", "abstract": "Model compression is vital to the deployment of deep learning on edge devices. Low precision representations, achieved via quantization of weights and activations, can reduce inference time and memory requirements. However, quantifying and predicting the response of a model to the changes associated with this procedure remains challenging. This response is non-linear and heterogeneous throughout the network. Understanding which groups of parameters and activations are more sensitive to quantization than others is a critical stage in maximizing efficiency. For this purpose, we propose FIT. Motivated by an information geometric perspective, FIT combines the Fisher information with a model of quantization. We find that FIT can estimate the final performance of a network without retraining. FIT effectively fuses contributions from both parameter and activation quantization into a single metric. Additionally, FIT is fast to compute when compared to existing methods, demonstrating favourable convergence properties. These properties are validated experimentally across hundreds of quantization configurations, with a focus on layer-wise mixed-precision quantization.", "keywords": "Fisher Information;Quantization", "primary_area": "", "supplementary_material": "/attachment/9d042263205057132550e8f24b170be922154335.zip", "author": "Ben Zandonati;Adrian Alan Pol;Maurizio Pierini;Olya Sirkin;Tal Kopetz", "authorids": "~Ben_Zandonati1;~Adrian_Alan_Pol1;~Maurizio_Pierini1;~Olya_Sirkin1;~Tal_Kopetz1", "gender": ";M;M;F;", "homepage": ";https://adrianalan.me/;;;", "dblp": "331/3195;;218/6403;;", "google_scholar": ";dKzjiuEAAAAJ;;;", "orcid": ";0000-0002-9034-0230;0000-0003-1939-4268;;", "linkedin": "ben-zandonati-0925071a2/;adrianalanpol/;xmpierinix;olya-sirkin-72342013;talkopetz", "or_profile": "~Ben_Zandonati1;~Adrian_Alan_Pol1;~Maurizio_Pierini1;~Olya_Sirkin1;~Tal_Kopetz1", "aff": "University of Cambridge;Princeton University;CERN;Ceva;", "aff_domain": "cam.ac.uk;princeton.edu;cern.ch;ceva.co.il;", "position": "Undergrad student;Postdoc;Researcher;Researcher;", "bibtex": "@inproceedings{\nzandonati2023fit,\ntitle={{FIT}: A Metric for Model Sensitivity},\nauthor={Ben Zandonati and Adrian Alan Pol and Maurizio Pierini and Olya Sirkin and Tal Kopetz},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PDG4-Y3aboN}\n}", "github": "", "project": "", "reviewers": "QyLp;orGP;cnF4;uWSp;qJTT", "pdf_size": 5160710, "recommendation": "3;5;8;8;8", "confidence": "5;4;2;4;4", "correctness": "2;3;3;4;3", "technical_novelty": "1;2;3;4;3", "empirical_novelty": "2;2;4;3;3", "wc_summary_paper": "49;39;113;40;58", "wc_strength_and_weaknesses": "480;132;291;161;104", "wc_clarity_quality_novelty_and_reproducibility": "102;6;95;55;29", "wc_summary_review": "272;58;76;13;122", "wc_review": "903;235;575;269;313", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "1452;1048;444;910;944", "reply_reviewers": "0;0;0;0;0", "reply_authors": "2;2;1;1;1", "recommendation_avg": [ 6.4, 2.0591260281974 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.6, 1.019803902718557 ], "empirical_novelty_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_paper_avg": [ 59.8, 27.476535443901945 ], "wc_strength_and_weaknesses_avg": [ 233.6, 138.85042311782848 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.4, 37.032958293930555 ], "wc_summary_review_avg": [ 108.2, 89.04021563316208 ], "wc_review_avg": [ 459.0, 252.263354453238 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 959.6, 322.1239512982541 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.654267558217469, "corr_recommendation_correctness": 0.767868896042439, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1042285129473929313&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=PDG4-Y3aboN", "email": "cam.ac.uk;princeton.edu;cern.ch;ceva.co.il;", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Cambridge;Princeton University;European Organization for Nuclear Research;Ceva", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cam.ac.uk;https://www.princeton.edu;https://home.cern;", "aff_unique_abbr": "Cambridge;Princeton;CERN;", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United Kingdom;United States;Switzerland;" }, { "title": "FreeMatch: Self-adaptive Thresholding for Semi-supervised Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11372", "id": "PDrUPTXJI_A", "poster": "/media/PosterPDFs/ICLR%202023/11372.png?t=1680840614.9186316", "openreview": "https://openreview.net/forum?id=PDrUPTXJI_A", "slides": "https://iclr.cc/virtual/2023/poster/11372", "video": "https://iclr.cc/virtual/2023/poster/11372", "author_site": "Yidong Wang, Hao Chen, Qiang Heng, Wenxin Hou, Yue Fan, Zhen Wu, Jindong Wang, Marios Savvides, Takahiro Shinozaki, Bhiksha Raj, Bernt Schiele, Xing Xie", "tldr": "We propose FreeMatch to define and adjust the confidence threshold in a self-adaptive manner for semi-supervised learning.", "abstract": "Semi-supervised Learning (SSL) has witnessed great success owing to the impressive performances brought by various methods based on pseudo labeling and consistency regularization. However, we argue that existing methods might fail to utilize the unlabeled data more effectively since they either use a pre-defined / fixed threshold or an ad-hoc threshold adjusting scheme, resulting in inferior performance and slow convergence. We first analyze a motivating example to obtain intuitions on the relationship between the desirable threshold and model's learning status. Based on the analysis, we hence propose FreeMatch to adjust the confidence threshold in a self-adaptive manner according to the model's learning status. We further introduce a self-adaptive class fairness regularization penalty to encourage the model for diverse predictions during the early training stage. Extensive experiments indicate the superiority of FreeMatch especially when the labeled data are extremely rare. FreeMatch achieves 5.78%, 13.59%, and 1.28% error rate reduction over the latest state-of-the-art method FlexMatch on CIFAR-10 with 1 label per class, STL-10 with 4 labels per class, and ImageNet with 100 labels per class, respectively. Moreover, FreeMatch can also boost the performance of imbalanced SSL. The codes can be found at https://github.com/microsoft/Semi-supervised-learning.\n", "keywords": "Semi-Supervised Learning;Semi-Supervised Classification", "primary_area": "", "supplementary_material": "/attachment/cd9d8b3b6a8b92cf50500ede72dbce76c2a6b00c.zip", "author": "Yidong Wang;Hao Chen;Qiang Heng;Wenxin Hou;Yue Fan;Zhen Wu;Jindong Wang;Marios Savvides;Takahiro Shinozaki;Bhiksha Raj;Bernt Schiele;Xing Xie", "authorids": "~Yidong_Wang1;~Hao_Chen15;~Qiang_Heng1;~Wenxin_Hou1;~Yue_Fan1;~Zhen_Wu2;~Jindong_Wang1;~Marios_Savvides1;~Takahiro_Shinozaki1;~Bhiksha_Raj1;~Bernt_Schiele1;~Xing_Xie3", "gender": "M;M;M;M;M;M;;M;M;M;M;M", "homepage": "https://qianlanwyd.github.io/;https://hhhhhhao.github.io/;;https://houwx.net/;https://yue-fan.github.io/;https://wuzhen247.github.io/;;http://www.ts.ip.titech.ac.jp;https://www.cs.cmu.edu/directory/bhikshar/;http://www.mpi-inf.mpg.de/~schiele;http://research.microsoft.com/en-us/people/xingx/;https://jd92.wang/", "dblp": "59/6759.html;;;270/4628;;16/4485-2;13/3793;06/6505;60/3996;s/BerntSchiele;08/6809-1;19/2969-1", "google_scholar": ";tktqkhwAAAAJ;5dxuxNQAAAAJ;https://scholar.google.co.jp/citations?user=EbqaLAEAAAAJ;TNMSbOkAAAAJ;IoGlgtoAAAAJ;;dN0vEX8AAAAJ;;https://scholar.google.de/citations?user=z76PBfYAAAAJ;5EQfAFIAAAAJ;hBZ_tKsAAAAJ", "orcid": ";;;;0000-0001-6670-2011;0000-0002-7678-103X;;;;0000-0001-9683-5237;0000-0002-8608-8482;0000-0002-4833-0880", "linkedin": ";haochen97/;;;;;;;;;xingx/;jindong-wang/", "or_profile": "~Yidong_Wang1;~Hao_Chen15;~Qiang_Heng1;~Wenxin_Hou1;~Yue_Fan1;~Zhen_Wu2;~Marios_Savvides1;~Takahiro_Shinozaki1;~Bhiksha_Raj1;~Bernt_Schiele1;~Xing_Xie3;~Jindong_Wang4", "aff": "Peking University;Carnegie Mellon University;North Carolina State University;Microsoft;Google;Nanjing University;Carnegie Mellon University;Tokyo Institute of Technology;Mohamed bin Zayed University of Artificial Intelligence;Amazon;Microsoft Research Asia;Microsoft Research", "aff_domain": "pku.edu.cn;andrew.cmu.edu;ncsu.edu;microsoft.com;google.com;nju.edu.cn;cmu.edu;titech.ac.jp;mbzuai.ac.ae;amazon.com;microsoft.com;microsoft.com", "position": "PhD student;PhD student;PhD student;Applied Scientist;Intern;Researcher;Full Professor;Associate Professor;Full Professor;Principal Researcher;Senior Principal Researcher;Researcher", "bibtex": "@inproceedings{\nwang2023freematch,\ntitle={FreeMatch: Self-adaptive Thresholding for Semi-supervised Learning},\nauthor={Yidong Wang and Hao Chen and Qiang Heng and Wenxin Hou and Yue Fan and Zhen Wu and Jindong Wang and Marios Savvides and Takahiro Shinozaki and Bhiksha Raj and Bernt Schiele and Xing Xie},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PDrUPTXJI_A}\n}", "github": "", "project": "", "reviewers": "Ebth;axb6;zMTV", "pdf_size": 2365840, "recommendation": "5;8;8", "confidence": "5;3;4", "correctness": "2;4;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "58;44;194", "wc_strength_and_weaknesses": "239;126;217", "wc_clarity_quality_novelty_and_reproducibility": "7;23;93", "wc_summary_review": "25;30;58", "wc_review": "329;223;562", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1284;293;391", "reply_reviewers": "0;0;0", "reply_authors": "3;2;2", "recommendation_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 98.66666666666667, 67.65270791985262 ], "wc_strength_and_weaknesses_avg": [ 194.0, 48.91489207456832 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.0, 37.345236197762446 ], "wc_summary_review_avg": [ 37.666666666666664, 14.522013940527977 ], "wc_review_avg": [ 371.3333333333333, 141.59645318847348 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 656.0, 445.86171249241244 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 12, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": 0.8660254037844387, "gs_citation": 413, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16326867895345146465&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=PDrUPTXJI_A", "email": "pku.edu.cn;andrew.cmu.edu;ncsu.edu;microsoft.com;google.com;nju.edu.cn;cmu.edu;titech.ac.jp;mbzuai.ac.ae;amazon.com;microsoft.com;microsoft.com", "author_num": 12, "aff_unique_index": "0;1;2;3;4;5;1;6;7;8;3;3", "aff_unique_norm": "Peking University;Carnegie Mellon University;North Carolina State University;Microsoft;Google;Nanjing University;Tokyo Institute of Technology;Mohamed bin Zayed University of Artificial Intelligence;Amazon", "aff_unique_dep": ";;;Microsoft Corporation;Google;;;;Amazon.com, Inc.", "aff_unique_url": "http://www.pku.edu.cn;https://www.cmu.edu;https://www.ncsu.edu;https://www.microsoft.com;https://www.google.com;https://www.nju.edu.cn;https://www.titech.ac.jp;https://mbzuai.ac.ae;https://www.amazon.com", "aff_unique_abbr": "Peking U;CMU;NCSU;Microsoft;Google;Nanjing U;Titech;MBZUAI;Amazon", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Asia", "aff_country_unique_index": "0;1;1;1;1;0;1;2;3;1;0;1", "aff_country_unique": "China;United States;Japan;United Arab Emirates" }, { "title": "The Symmetric Generalized Eigenvalue Problem as a Nash Equilibrium", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11619", "id": "PEgBEB74JjB", "poster": "/media/PosterPDFs/ICLR%202023/11619.png?t=1682415412.2016675", "openreview": "https://openreview.net/forum?id=PEgBEB74JjB", "slides": "https://iclr.cc/virtual/2023/poster/11619", "video": "https://iclr.cc/virtual/2023/poster/11619", "author_site": "Ian Gemp, Charlie Chen, Brian McWilliams", "tldr": "We formulate the solution to the generalized eigenvalue problem as the Nash of a game, design an unbiased streaming-style algorithm to solve it, and analyze neural representations 1000x larger than before.", "abstract": "The symmetric generalized eigenvalue problem (SGEP) is a fundamental concept in numerical linear algebra. It captures the solution of many classical machine learning problems such as canonical correlation analysis, independent components analysis, partial least squares, linear discriminant analysis, principal components and others. Despite this, most general solvers are prohibitively expensive when dealing with *streaming data sets* (i.e., minibatches) and research has instead concentrated on finding efficient solutions to specific problem instances. In this work, we develop a game-theoretic formulation of the top-$k$ SGEP whose Nash equilibrium is the set of generalized eigenvectors. We also present a parallelizable algorithm with guaranteed asymptotic convergence to the Nash. Current state-of-the-art methods require $\\mathcal{O}(d^2k)$ runtime complexity per iteration which is prohibitively expensive when the number of dimensions ($d$) is large. We show how to modify this parallel approach to achieve $\\mathcal{O}(dk)$ runtime complexity. Empirically we demonstrate that this resulting algorithm is able to solve a variety of SGEP problem instances including a large-scale analysis of neural network activations.", "keywords": "generalized eigenvalue problem;nash;riemannian optimization;canonical correlation analysis;independent component analysis;distributed computing", "primary_area": "", "supplementary_material": "/attachment/5175e65352b48b17631ec3ecacf44f115e404b19.zip", "author": "Ian Gemp;Charlie Chen;Brian McWilliams", "authorids": "~Ian_Gemp1;~Charlie_Chen2;~Brian_McWilliams2", "gender": "M;M;M", "homepage": "https://imgemp.github.io/;;https://sites.google.com/view/mcbrian/", "dblp": "66/10996;;", "google_scholar": "5vo3MeEAAAAJ;PpYgkgsAAAAJ;https://scholar.google.ch/citations?user=IS4VSXAAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ian_Gemp1;~Charlie_Chen2;~Brian_McWilliams2", "aff": "Google DeepMind;Google DeepMind;Twitter Cortex", "aff_domain": "google.com;deepmind.com;twitter.com", "position": "Research Scientist;Researcher;Research Scientist", "bibtex": "@inproceedings{\ngemp2023the,\ntitle={The Symmetric Generalized Eigenvalue Problem as a Nash Equilibrium},\nauthor={Ian Gemp and Charlie Chen and Brian McWilliams},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PEgBEB74JjB}\n}", "github": "", "project": "", "reviewers": "dLpL;4TCc;U98f;aVkz", "pdf_size": 1203973, "recommendation": "6;8;8;8", "confidence": "4;3;3;4", "correctness": "4;4;4;4", "technical_novelty": "3;4;3;4", "empirical_novelty": "3;4;3;4", "wc_summary_paper": "42;421;193;65", "wc_strength_and_weaknesses": "225;304;430;86", "wc_clarity_quality_novelty_and_reproducibility": "2;13;128;19", "wc_summary_review": "27;28;28;71", "wc_review": "296;766;779;241", "wc_reply_reviewers": "0;0;21;0", "wc_reply_authors": "783;337;841;191", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 180.25, 150.43167053516356 ], "wc_strength_and_weaknesses_avg": [ 261.25, 124.83063526234255 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.5, 50.884673527497455 ], "wc_summary_review_avg": [ 38.5, 18.76832437912346 ], "wc_review_avg": [ 520.5, 252.79092151420312 ], "wc_reply_reviewers_avg": [ 5.25, 9.093266739736606 ], "wc_reply_authors_avg": [ 538.0, 279.5728885281976 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:M7qa4X1aiLcJ:scholar.google.com/&scioq=The+Symmetric+Generalized+Eigenvalue+Problem+as+a+Nash+Equilibrium&hl=en&as_sdt=0,5", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=PEgBEB74JjB", "email": "google.com;deepmind.com;twitter.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Google;Twitter", "aff_unique_dep": "Google DeepMind;Cortex", "aff_unique_url": "https://deepmind.com;https://twitter.com", "aff_unique_abbr": "DeepMind;Twitter", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Compositional Law Parsing with Latent Random Functions", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11514", "id": "PEuxUXIMLlA", "poster": "/media/PosterPDFs/ICLR%202023/11514.png?t=1680890470.9360402", "openreview": "https://openreview.net/forum?id=PEuxUXIMLlA", "slides": "https://iclr.cc/virtual/2023/poster/11514", "video": "https://iclr.cc/virtual/2023/poster/11514", "author_site": "Fan Shi, Bin Li, Xiangyang Xue", "tldr": "", "abstract": "Human cognition has compositionality. We understand a scene by decomposing the scene into different concepts (e.g., shape and position of an object) and learning the respective laws of these concepts, which may be either natural (e.g., laws of motion) or man-made (e.g., laws of a game). The automatic parsing of these laws indicates the model's ability to understand the scene, which makes law parsing play a central role in many visual tasks. This paper proposes a deep latent variable model for Compositional LAw Parsing (CLAP), which achieves the human-like compositionality ability through an encoding-decoding architecture to represent concepts in the scene as latent variables. CLAP employs concept-specific latent random functions instantiated with Neural Processes to capture the law of concepts. Our experimental results demonstrate that CLAP outperforms the baseline methods in multiple visual tasks such as intuitive physics, abstract visual reasoning, and scene representation. The law manipulation experiments illustrate CLAP's interpretability by modifying specific latent random functions on samples. For example, CLAP learns the laws of position-changing and appearance constancy from the moving balls in a scene, making it possible to exchange laws between samples or compose existing laws into novel laws.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/bf08a79c3650dc0dc52e7e9f04bf6ee671437cfa.zip", "author": "Fan Shi;Bin Li;Xiangyang Xue", "authorids": "~Fan_Shi1;~Bin_Li4;~Xiangyang_Xue2", "gender": ";M;M", "homepage": ";https://aimpressionist.github.io/publications;http://homepage.fudan.edu.cn//xyxue", "dblp": ";89/6764-15;84/3791", "google_scholar": "MRVWY-gAAAAJ;8t97oL8AAAAJ;", "orcid": ";0000-0002-9633-0033;0000-0002-4897-9209", "linkedin": ";;", "or_profile": "~Fan_Shi1;~Bin_Li4;~Xiangyang_Xue2", "aff": "Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nshi2023compositional,\ntitle={Compositional Law Parsing with Latent Random Functions},\nauthor={Fan Shi and Bin Li and Xiangyang Xue},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PEuxUXIMLlA}\n}", "github": "", "project": "", "reviewers": "rEh1;Fbxt;HFjB;i5ZN;4jVq", "pdf_size": 9594052, "recommendation": "6;6;6;6;8", "confidence": "3;4;4;2;2", "correctness": "4;4;3;3;4", "technical_novelty": "3;3;3;4;4", "empirical_novelty": "3;3;3;4;3", "wc_summary_paper": "31;102;79;88;127", "wc_strength_and_weaknesses": "75;226;183;165;48", "wc_clarity_quality_novelty_and_reproducibility": "88;56;74;134;27", "wc_summary_review": "256;19;41;69;47", "wc_review": "450;403;377;456;249", "wc_reply_reviewers": "0;12;0;48;0", "wc_reply_authors": "1415;668;532;486;123", "reply_reviewers": "0;1;0;1;0", "reply_authors": "2;1;1;1;1", "recommendation_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "correctness_avg": [ 3.6, 0.4898979485566356 ], "technical_novelty_avg": [ 3.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_paper_avg": [ 85.4, 31.664491153340833 ], "wc_strength_and_weaknesses_avg": [ 139.4, 67.16725392629952 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 75.8, 35.53252031590216 ], "wc_summary_review_avg": [ 86.4, 86.28232727505674 ], "wc_review_avg": [ 387.0, 75.0066663703967 ], "wc_reply_reviewers_avg": [ 12.0, 18.5903200617956 ], "wc_reply_authors_avg": [ 644.8, 425.19708371530487 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5590169943749475, "corr_recommendation_correctness": 0.408248290463863, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13035865741328429733&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=PEuxUXIMLlA", "email": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "PExjmUwEAAH", "title": "Interactive Sequential Generative Models", "track": "main", "status": "Reject", "tldr": "We propose a novel framework for multiagent trajectories that augments sequential generative models with latent social structures.", "abstract": "Understanding spatiotemporal relationships among several agents is of considerable relevance for many domains. Team sports represent a particularly interesting real-world proving ground since modeling interacting athletes requires capturing highly dynamic and complex agent-agent dependencies in addition to temporal components. However, existing generative methods in this field either entangle all latent factors into a single variable and are thus constrained in practical applicability, or they focus on uncovering interaction structures, which restricts their generative ability. To address this gap, we propose a framework for multiagent trajectories that augments sequential generative models with latent social structures. First, we derive a novel objective via approximate inference using a disentangled latent space that accurately describes the data generating process in such systems. Based on the proposed training criterion, we then present a model architecture that unifies insights from neural interaction inference and graph-structured variational recurrent neural networks for generating collective movements while allocating latent information. We validate our model on data from professional soccer and basketball. Our framework not only improves upon existing state-of-the-art approaches in forecasting trajectories, but also infers semantically meaningful representations that can be used in downstream tasks.", "keywords": "Generative Models and Autoencoders;Graph Neural Networks;Recurrent Networks;Sequential Models;Multi-Agent", "primary_area": "", "supplementary_material": "", "author": "Dennis Fassmeyer;Pascal Fassmeyer;Ulf Brefeld", "authorids": "~Dennis_Fassmeyer1;~Pascal_Fassmeyer1;~Ulf_Brefeld1", "gender": "M;M;M", "homepage": "https://ml3.leuphana.de/dennis.html;;http://ml3.leuphana.de/ulf.html", "dblp": "346/0882;;99/122", "google_scholar": ";https://scholar.google.de/citations?user=aCymQ-4AAAAJ;https://scholar.google.de/citations?user=oWmjswoAAAAJ", "orcid": ";;", "linkedin": ";;ulfbrefeld/", "or_profile": "~Dennis_Fassmeyer1;~Pascal_Fassmeyer1;~Ulf_Brefeld1", "aff": "Leuphana Universit\u00e4t L\u00fcneburg;Leuphana Universit\u00e4t L\u00fcneburg;Inst. of Information Systems / Machine Learning", "aff_domain": "leuphana.de;leuphana.de;leuphana.de", "position": "PhD student;Researcher;Full Professor", "bibtex": "@misc{\nfassmeyer2023interactive,\ntitle={Interactive Sequential Generative Models},\nauthor={Dennis Fassmeyer and Pascal Fassmeyer and Ulf Brefeld},\nyear={2023},\nurl={https://openreview.net/forum?id=PExjmUwEAAH}\n}", "github": "", "project": "", "reviewers": "Gms6;qCyi;yCKo;JaLg", "site": "https://openreview.net/forum?id=PExjmUwEAAH", "pdf_size": 3095319, "recommendation": "3;5;5;6", "confidence": "3;4;3;4", "correctness": "1;3;3;3", "technical_novelty": "1;3;3;3", "empirical_novelty": "3;3;4;3", "wc_summary_paper": "77;155;98;54", "wc_strength_and_weaknesses": "567;323;114;216", "wc_clarity_quality_novelty_and_reproducibility": "149;267;34;27", "wc_summary_review": "55;134;60;33", "wc_review": "848;879;306;330", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "2382;2368;941;705", "reply_reviewers": "0;0;0;0", "reply_authors": "4;5;2;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 96.0, 37.44996662214801 ], "wc_strength_and_weaknesses_avg": [ 305.0, 168.35230916147245 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 119.25, 98.09784656148166 ], "wc_summary_review_avg": [ 70.5, 38.04273912325452 ], "wc_review_avg": [ 590.75, 273.1019727134903 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1599.0, 780.4886290010893 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.0, 1.5811388300841898 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.6882472016116854, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9OVw_3ddcCUJ:scholar.google.com/&scioq=Interactive+Sequential+Generative+Models&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "Leuphana University L\u00fcneburg;Institute of Information Systems / Machine Learning", "aff_unique_dep": ";Department of Information Systems / Machine Learning", "aff_unique_url": "https://www.leuphana.de;", "aff_unique_abbr": "Leuphana;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "L\u00fcneburg;", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany;" }, { "id": "PFUIHZGE4DS", "title": "MESSAGENET: MESSAGE CLASSIFICATION USING NATURAL LANGUAGE PROCESSING AND META-DATA", "track": "main", "status": "Reject", "tldr": "We propose a deep neural network based on blocks for message classification using meta-data inputs", "abstract": "In this paper we propose a new Deep Learning (DL) approach for message classification. Our method\nis based on the state-of-the-art Natural Language Processing (NLP) building blocks, combined\nwith a novel technique for infusing the meta-data input that is typically available in messages\nsuch as the sender information, timestamps, attached image, audio, affiliations, and more. As we\ndemonstrate throughout the paper, going beyond the mere text by leveraging all available channels\nin the message, could yield an improved representation and higher classification accuracy. To\nachieve message representation, each type of input is processed in a dedicated block in the neural\nnetwork architecture that is suitable for the data type. Such an implementation enables training all\nblocks together simultaneously, and forming cross channels features in the network. We show in the\nExperiments Section that in some cases, message\u2019s meta-data holds an additional information that\ncannot be extracted just from the text, and when using this information we achieve better performance.\nFurthermore, we demonstrate that our multi-modality block approach outperforms other approaches\nfor injecting the meta data to the the text classifier.", "keywords": "Message classification \u00b7 Meta data injection \u00b7 Deep learning \u00b7 Natural language processing", "primary_area": "", "supplementary_material": "", "author": "Adar Kahana;Oren Elisha", "authorids": "~Adar_Kahana1;~Oren_Elisha1", "gender": "M;M", "homepage": ";", "dblp": ";", "google_scholar": "IXKTDFAAAAAJ;", "orcid": ";", "linkedin": "adar-kahana-400105141/;", "or_profile": "~Adar_Kahana1;~Oren_Elisha1", "aff": "Brown University;", "aff_domain": "brown.edu;", "position": "Postdoc;", "bibtex": "@misc{\nkahana2023messagenet,\ntitle={{MESSAGENET}: {MESSAGE} {CLASSIFICATION} {USING} {NATURAL} {LANGUAGE} {PROCESSING} {AND} {META}-{DATA}},\nauthor={Adar Kahana and Oren Elisha},\nyear={2023},\nurl={https://openreview.net/forum?id=PFUIHZGE4DS}\n}", "github": "", "project": "", "reviewers": "rydk;uH2i;7oAd;FKbD", "site": "https://openreview.net/forum?id=PFUIHZGE4DS", "pdf_size": 477988, "recommendation": "1;1;3;3", "confidence": "5;5;4;4", "correctness": "2;2;3;2", "technical_novelty": "1;1;1;1", "empirical_novelty": "1;1;1;1", "wc_summary_paper": "58;46;49;41", "wc_strength_and_weaknesses": "166;48;73;105", "wc_clarity_quality_novelty_and_reproducibility": "35;8;23;74", "wc_summary_review": "45;36;13;13", "wc_review": "304;138;158;233", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 1.0, 0.0 ], "empirical_novelty_avg": [ 1.0, 0.0 ], "wc_summary_paper_avg": [ 48.5, 6.18465843842649 ], "wc_strength_and_weaknesses_avg": [ 98.0, 44.15314258351267 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.0, 24.464259645450134 ], "wc_summary_review_avg": [ 26.75, 14.113380176272443 ], "wc_review_avg": [ 208.25, 65.65201824772792 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RZ8DN6-mwwgJ:scholar.google.com/&scioq=MESSAGENET:+MESSAGE+CLASSIFICATION+USING+NATURAL+LANGUAGE+PROCESSING+AND+META-DATA&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Brown University", "aff_unique_dep": "", "aff_unique_url": "https://www.brown.edu", "aff_unique_abbr": "Brown", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Bridging the Gap between ANNs and SNNs by Calibrating Offset Spikes", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12118", "id": "PFbzoWZyZRX", "poster": "/media/PosterPDFs/ICLR%202023/12118.png?t=1680790179.793159", "openreview": "https://openreview.net/forum?id=PFbzoWZyZRX", "slides": "https://iclr.cc/virtual/2023/poster/12118", "video": "https://iclr.cc/virtual/2023/poster/12118", "author_site": "Zecheng Hao, Jianhao Ding, Tong Bu, Tiejun Huang, Zhaofei Yu", "tldr": "A calibration method based on shifting initial membrane potential is proposed for ANN-SNN conversion to reach the same level of performance as BPTT.", "abstract": "Spiking Neural Networks (SNNs) have attracted great attention due to their distinctive characteristics of low power consumption and temporal information processing. ANN-SNN conversion, as the most commonly used training method for applying SNNs, can ensure that converted SNNs achieve comparable performance to ANNs on large-scale datasets. However, the performance degrades severely under low quantities of time-steps, which hampers the practical applications of SNNs to neuromorphic chips. \nIn this paper, instead of evaluating different conversion errors and then eliminating these errors, we define an offset spike to measure the degree of deviation between actual and desired SNN firing rates. We perform a detailed analysis of offset spike and note that the firing of one additional (or one less) spike is the main cause of conversion errors. Based on this, we propose an optimization strategy based on shifting the initial membrane potential and we theoretically prove the corresponding optimal shifting distance for calibrating the spike. In addition, we also note that our method has a unique iterative property that enables further reduction of conversion errors. The experimental results show that our proposed method achieves state-of-the-art performance on CIFAR-10, CIFAR-100, and ImageNet datasets. For example, we reach a top-1 accuracy of 67.12% on ImageNet when using 6 time-steps. To the best of our knowledge, this is the first time an ANN-SNN conversion has been shown to simultaneously achieve high accuracy and ultralow latency on complex datasets. Code is available at https://github.com/hzc1208/ANN2SNN_COS.", "keywords": "Spiking Neural Networks\uff0cSpike Calibration\uff0cUltra-low-latency Conversion", "primary_area": "", "supplementary_material": "/attachment/4f2e613e05aae1c3c0ac27226b995e556453b450.zip", "author": "Zecheng Hao;Jianhao Ding;Tong Bu;Tiejun Huang;Zhaofei Yu", "authorids": "~Zecheng_Hao1;~Jianhao_Ding1;~Tong_Bu1;~Tiejun_Huang1;~Zhaofei_Yu1", "gender": ";M;;M;M", "homepage": "https://hzc1208.github.io/;https://dingjianhao.github.io/;;https://idm.pku.edu.cn/~tjhuang/;https://yuzhaofei.github.io", "dblp": "339/6969;128/2534;;h/TiejunHuang;166/0573", "google_scholar": "txTkX7YAAAAJ;4rDfCSsAAAAJ;;https://scholar.google.com.tw/citations?user=knvEK4AAAAAJ;qaUgD50AAAAJ", "orcid": "0000-0001-9074-2857;;;0000-0002-4234-6099;", "linkedin": ";;;;", "or_profile": "~Zecheng_Hao1;~Jianhao_Ding1;~Tong_Bu1;~Tiejun_Huang1;~Zhaofei_Yu1", "aff": "Peking University;Institute of Automation, Chinese Academy of Sciences;;Peking University;Peking University", "aff_domain": "pku.edu.cn;ia.ac.cn;;pku.edu.cn;pku.edu.cn", "position": "Undergrad student;Intern;;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nhao2023bridging,\ntitle={Bridging the Gap between {ANN}s and {SNN}s by Calibrating Offset Spikes},\nauthor={Zecheng Hao and Jianhao Ding and Tong Bu and Tiejun Huang and Zhaofei Yu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PFbzoWZyZRX}\n}", "github": "", "project": "", "reviewers": "EoJ9;oG9M;rBPQ;NhRe", "pdf_size": 686327, "recommendation": "6;6;8;8", "confidence": "4;4;5;5", "correctness": "3;3;4;3", "technical_novelty": "3;3;3;4", "empirical_novelty": "3;3;4;4", "wc_summary_paper": "28;50;94;93", "wc_strength_and_weaknesses": "191;66;276;181", "wc_clarity_quality_novelty_and_reproducibility": "8;79;35;24", "wc_summary_review": "8;51;33;23", "wc_review": "235;246;438;321", "wc_reply_reviewers": "24;0;28;11", "wc_reply_authors": "549;459;1269;1157", "reply_reviewers": "1;0;1;1", "reply_authors": "1;2;2;2", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 66.25, 28.340562803162538 ], "wc_strength_and_weaknesses_avg": [ 178.5, 74.70776398741967 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.5, 26.348624252510795 ], "wc_summary_review_avg": [ 28.75, 15.626499928006911 ], "wc_review_avg": [ 310.0, 80.97221745759468 ], "wc_reply_reviewers_avg": [ 15.75, 11.053845484717073 ], "wc_reply_authors_avg": [ 858.5, 358.1211387226395 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=493917709077715392&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=PFbzoWZyZRX", "email": "pku.edu.cn;ia.ac.cn;;pku.edu.cn;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Peking University;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation", "aff_unique_url": "http://www.pku.edu.cn;http://www.ia.cas.cn", "aff_unique_abbr": "Peking U;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "PHcLZ8Yh6h4", "title": "Progressive Purification for Instance-Dependent Partial Label Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Partial-label learning (PLL) aims to train multi-class classifiers from instances with partial labels (PLs)---a PL for an instance is a set of candidate labels where a fixed but unknown candidate is the true label. In the last few years, the instance-independent generation process of PLs has been extensively studied, on the basis of which many practical and theoretical advances have been made in PLL, while relatively less attention has been paid to the practical setting of instance-dependent PLs, namely, the PL depends not only on the true label but the instance itself. In this paper, we propose a theoretically grounded and practically effective approach called progressive purification (POP) for instance-dependent PLL: in each epoch, POP updates the learning model while purifies each PL by progressively moving out false candidate labels for the next epoch of the model training. Theoretically, we prove that POP enlarges the region where the model is reliable by a promising rate, and eventually approximates the Bayes optimal classifier with mild assumptions; technically, POP is flexible with arbitrary losses and compatible with deep networks, so the previous advanced PLL losses can be embedded in it and the performance is often significantly improved.", "keywords": "Partial label learning", "primary_area": "", "supplementary_material": "", "author": "Ning Xu;Biao Liu;Jiaqi Lv;Congyu Qiao;Xin Geng", "authorids": "~Ning_Xu5;~Biao_Liu1;~Jiaqi_Lv1;~Congyu_Qiao3;~Xin_Geng1", "gender": "M;M;F;M;M", "homepage": "http://palm.seu.edu.cn/xuning/;http://palm.seu.edu.cn/homepage/liubiao/demo/demo/index.html;;http://palm.seu.edu.cn/homepage/qiaocongyu/demo/index.html;http://palm.seu.edu.cn/xgeng/index.htm", "dblp": "04/5856-9;;191/9417;277/9262;", "google_scholar": ";;PK8L9mYAAAAJ;;ZOCxkIcAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Ning_Xu5;~Biao_Liu1;~Jiaqi_Lv1;~Congyu_Qiao3;~Xin_Geng1", "aff": "Southeast University;Southeast University;RIKEN;Southeast University;Southeast University, China", "aff_domain": "seu.edu.cn;seu.edu.cn;riken.jp;seu.edu.cn;seu.edu.cn", "position": "Assistant Professor;MS student;Postdoc;PhD student;Professor", "bibtex": "@misc{\nxu2023progressive,\ntitle={Progressive Purification for Instance-Dependent Partial Label Learning},\nauthor={Ning Xu and Biao Liu and Jiaqi Lv and Congyu Qiao and Xin Geng},\nyear={2023},\nurl={https://openreview.net/forum?id=PHcLZ8Yh6h4}\n}", "github": "", "project": "", "reviewers": "K87B;T9T6;T1k7;s5av", "site": "https://openreview.net/forum?id=PHcLZ8Yh6h4", "pdf_size": 438574, "recommendation": "3;3;5;5", "confidence": "4;4;3;3", "correctness": "2;4;3;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "2;3;2;4", "wc_summary_paper": "13;121;37;101", "wc_strength_and_weaknesses": "457;160;171;231", "wc_clarity_quality_novelty_and_reproducibility": "8;19;52;52", "wc_summary_review": "16;14;46;16", "wc_review": "494;314;306;400", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 68.0, 44.395945760846224 ], "wc_strength_and_weaknesses_avg": [ 254.75, 119.85486014342514 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 32.75, 19.638928178492836 ], "wc_summary_review_avg": [ 23.0, 13.30413469565007 ], "wc_review_avg": [ 378.5, 76.18890995413965 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.30151134457776363, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11739973167667395961&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Southeast University;RIKEN", "aff_unique_dep": ";", "aff_unique_url": "https://www.seu.edu.cn/;https://www.riken.jp", "aff_unique_abbr": "SEU;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;Japan" }, { "id": "PHpK5B2iGpq", "title": "Self-supervised debiasing using low rank regularization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Spurious correlations can cause strong biases in deep neural networks, impairing generalization ability. While most of existing debiasing methods require full supervisions on either spurious attributes or target labels, training a debiased model from a limited amount of both annotations is still an open issue. To overcome such limitations, we first examined an interesting phenomenon by the spectral analysis of latent representations: spuriously correlated, easy-to-learn attributes make neural networks inductively biased towards encoding lower effective rank representations. We also show that a rank regularization can amplify this bias in a way that encourages highly correlated features. Motivated by these observations, we propose a self-supervised debiasing framework that is potentially compatible with unlabeled samples. We first pretrain a biased encoder in a self-supervised manner with the rank regularization, serving as a semantic bottleneck to enforce the encoder to learn the spuriously correlated attributes. This biased encoder is then used to discover and upweight bias-conflicting samples in a downstream task, serving as a boosting to effectively debias the main model. Remarkably, the proposed debiasing framework significantly improves the generalization performance of self-supervised learning baselines and, in some cases, even outperforms state-of-the-art supervised debiasing approaches.", "keywords": "Debiasing;spurious correlation;self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/76e553b0e328c30eb849216e098ac051f88ef5d1.zip", "author": "Geon Yeong Park;Chanyong Jung;Jong Chul Ye;Sang Wan Lee", "authorids": "~Geon_Yeong_Park1;~Chanyong_Jung1;~Jong_Chul_Ye1;~Sang_Wan_Lee1", "gender": "M;M;M;M", "homepage": "https://geonyeong-park.github.io/;https://sites.google.com/view/jcy132;https://bispl.weebly.com/;https://aibrain.kaist.ac.kr/sang-wan-lee", "dblp": "289/5924;221/2728;15/5613;77/6650", "google_scholar": "HGF4a14AAAAJ;https://scholar.google.com/citations?hl=en;HNMjoNEAAAAJ;0rMoHW4AAAAJ", "orcid": ";;;", "linkedin": ";chanyongjung/;;", "or_profile": "~Geon_Yeong_Park1;~Chanyong_Jung1;~Jong_Chul_Ye1;~Sang_Wan_Lee1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@misc{\npark2023selfsupervised,\ntitle={Self-supervised debiasing using low rank regularization},\nauthor={Geon Yeong Park and Chanyong Jung and Jong Chul Ye and Sang Wan Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=PHpK5B2iGpq}\n}", "github": "", "project": "", "reviewers": "3q4H;cTbQ;datJ;rRxe", "site": "https://openreview.net/forum?id=PHpK5B2iGpq", "pdf_size": 2606731, "recommendation": "3;6;6;8", "confidence": "4;2;3;4", "correctness": "2;3;3;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "178;83;93;71", "wc_strength_and_weaknesses": "560;60;293;580", "wc_clarity_quality_novelty_and_reproducibility": "93;16;37;43", "wc_summary_review": "72;46;25;46", "wc_review": "903;205;448;740", "wc_reply_reviewers": "0;0;74;0", "wc_reply_authors": "2386;153;1708;567", "reply_reviewers": "0;0;1;0", "reply_authors": "4;1;4;1", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 106.25, 42.150771048700875 ], "wc_strength_and_weaknesses_avg": [ 373.25, 213.41669920603684 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.25, 28.252212302756046 ], "wc_summary_review_avg": [ 47.25, 16.663958113245485 ], "wc_review_avg": [ 574.0, 268.25081546940356 ], "wc_reply_reviewers_avg": [ 18.5, 32.04293994002423 ], "wc_reply_authors_avg": [ 1203.5, 889.0316361075122 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.1266600992762247, "corr_recommendation_correctness": 0.9901475429766743, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=587174693536049422&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "PHtzmXK8am", "title": "TAN without a burn: Scaling laws of DP-SGD", "track": "main", "status": "Reject", "tldr": "Computationally friendly hyper-parameter search with DP-SGD for new state-of-the-art performance on ImageNet.", "abstract": "Differentially Private methods for training Deep Neural Networks (DNNs) have progressed recently, in particular with the use of massive batches and aggregated data augmentations for a large number of steps. These techniques require much more compute than their non-private counterparts, shifting the traditional privacy-accuracy trade-off to a privacy-accuracy-compute trade-off and making hyper-parameter search virtually impossible for realistic scenarios. In this work, we decouple privacy analysis and experimental behavior of noisy training to explore the trade-off with minimal computational requirements. We first use the tools of R\u00e9nyi Differential Privacy (RDP) to show that the privacy budget, when not overcharged, only depends on the total amount of noise (TAN) injected throughout training. We then derive scaling laws for training models with DP-SGD to optimize hyper-parameters with more than a $100\\times$ reduction in computational budget. We apply the proposed method on CIFAR-10 and ImageNet and, in particular, strongly improve the state-of-the-art on ImageNet with a $+9$ points gain in accuracy for a privacy budget $\\varepsilon=8$.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/904605c39a8d49278f53189dc48130f5776e772e.zip", "author": "Tom Sander;Pierre Stock;Alexandre Sablayrolles", "authorids": "~Tom_Sander1;~Pierre_Stock1;~Alexandre_Sablayrolles1", "gender": "M;M;", "homepage": ";https://research.fb.com/people/stock-pierre/;", "dblp": ";210/2208;186/7749", "google_scholar": ";https://scholar.google.fr/citations?user=3e2-59cAAAAJ;Wy8wM-cAAAAJ", "orcid": ";;", "linkedin": "tomsdr;;", "or_profile": "~Tom_Sander1;~Pierre_Stock1;~Alexandre_Sablayrolles1", "aff": "\u00c9cole Polytechnique;Meta Facebook;Meta Facebook", "aff_domain": "polytechnique.fr;fb.com;fb.com", "position": "PhD student;Research Scientist;Researcher", "bibtex": "@misc{\nsander2023tan,\ntitle={{TAN} without a burn: Scaling laws of {DP}-{SGD}},\nauthor={Tom Sander and Pierre Stock and Alexandre Sablayrolles},\nyear={2023},\nurl={https://openreview.net/forum?id=PHtzmXK8am}\n}", "github": "", "project": "", "reviewers": "MS1Z;AFbM;FyxV;6jSv", "site": "https://openreview.net/forum?id=PHtzmXK8am", "pdf_size": 1390439, "recommendation": "6;6;6;6", "confidence": "4;4;3;4", "correctness": "4;3;3;3", "technical_novelty": "4;2;2;3", "empirical_novelty": "4;3;3;3", "wc_summary_paper": "121;53;56;205", "wc_strength_and_weaknesses": "178;109;386;328", "wc_clarity_quality_novelty_and_reproducibility": "51;14;48;141", "wc_summary_review": "42;94;76;95", "wc_review": "392;270;566;769", "wc_reply_reviewers": "10;0;6;299", "wc_reply_authors": "338;118;822;1420", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 108.75, 61.856184007744936 ], "wc_strength_and_weaknesses_avg": [ 250.25, 111.40550928926271 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.5, 47.04519104010526 ], "wc_summary_review_avg": [ 76.75, 21.44032415799724 ], "wc_review_avg": [ 499.25, 187.9353279721511 ], "wc_reply_reviewers_avg": [ 78.75, 127.2111925107221 ], "wc_reply_authors_avg": [ 674.5, 500.11273729030336 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=422740438535856767&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;1", "aff_unique_norm": "Ecole Polytechnique;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.polytechnique.edu;https://meta.com", "aff_unique_abbr": "X;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "France;United States" }, { "title": "Restricted Strong Convexity of Deep Learning Models with Smooth Activations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11431", "id": "PINRbk7h01", "poster": "/media/PosterPDFs/ICLR%202023/11431.png?t=1680991884.2098136", "openreview": "https://openreview.net/forum?id=PINRbk7h01", "slides": "https://iclr.cc/virtual/2023/poster/11431", "video": "https://iclr.cc/virtual/2023/poster/11431", "author_site": "Arindam Banerjee, Pedro Cisneros-Velarde, Libin Zhu, Misha Belkin", "tldr": "", "abstract": "We consider the problem of optimization of deep learning models with smooth activation functions. While there exist influential results on the problem from the ``near initialization'' perspective, we shed considerable new light on the problem. In particular, we make two key technical contributions for such models with $L$ layers, $m$ width, and $\\sigma_0^2$ initialization variance. First, for suitable $\\sigma_0^2$, we establish a $O(\\frac{\\text{poly}(L)}{\\sqrt{m}})$ upper bound on the spectral norm of the Hessian of such models, considerably sharpening prior results. Second, we introduce a new analysis of optimization based on Restricted Strong Convexity (RSC) which holds as long as the squared norm of the average gradient of predictors is $\\Omega(\\frac{\\text{poly}(L)}{\\sqrt{m}})$ for the square loss. We also present results for more general losses. The RSC based analysis does not need the ``near initialization\" perspective and guarantees geometric convergence for gradient descent (GD). To the best of our knowledge, ours is the first result on establishing geometric convergence of GD based on RSC for deep learning models, thus becoming an alternative sufficient condition for convergence that does not depend on the widely-used Neural Tangent Kernel (NTK). We share preliminary experimental results supporting our theoretical advances.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Arindam Banerjee;Pedro Cisneros-Velarde;Libin Zhu;Misha Belkin", "authorids": "~Arindam_Banerjee4;~Pedro_Cisneros-Velarde1;~Libin_Zhu1;~Misha_Belkin1", "gender": ";;M;", "homepage": "https://arindam.cs.illinois.edu/;https://sites.google.com/view/pcisnerosv;;http://misha.belkin-wang.org/", "dblp": "82/4807.html;188/3800;260/0355;", "google_scholar": "RY7cuPAAAAAJ;VvVRo5oAAAAJ;hyTGiUcAAAAJ;Iwd9DdkAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Arindam_Banerjee4;~Pedro_Cisneros-Velarde1;~Libin_Zhu1;~Misha_Belkin1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of California, San Diego;University of California, San Diego", "aff_domain": "illinois.edu;illinois.edu;ucsd.edu;ucsd.edu", "position": "Professor;Postdoc;PhD student;Professor", "bibtex": "@inproceedings{\nbanerjee2023restricted,\ntitle={Restricted Strong Convexity of Deep Learning Models with Smooth Activations},\nauthor={Arindam Banerjee and Pedro Cisneros-Velarde and Libin Zhu and Misha Belkin},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PINRbk7h01}\n}", "github": "", "project": "", "reviewers": "uzLy;oVua;Qo6j;GhrZ", "pdf_size": 681683, "recommendation": "6;6;6;8", "confidence": "1;3;2;3", "correctness": "3;3;4;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;0;2;0", "wc_summary_paper": "15;89;126;104", "wc_strength_and_weaknesses": "49;336;331;462", "wc_clarity_quality_novelty_and_reproducibility": "8;45;88;57", "wc_summary_review": "29;79;35;116", "wc_review": "101;549;580;739", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "73;1118;778;1085", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;2;3", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 83.5, 41.68033109273485 ], "wc_strength_and_weaknesses_avg": [ 294.5, 151.14645215816347 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.5, 28.64000698323937 ], "wc_summary_review_avg": [ 64.75, 35.329697139941636 ], "wc_review_avg": [ 492.25, 237.1090202839192 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 763.5, 420.1288492831693 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5222329678670935, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14930863408625102513&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=PINRbk7h01", "email": "illinois.edu;illinois.edu;ucsd.edu;ucsd.edu", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of California, San Diego", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.ucsd.edu", "aff_unique_abbr": "UIUC;UCSD", "aff_campus_unique_index": "0;0;1;1", "aff_campus_unique": "Urbana-Champaign;San Diego", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "PJVZCd4Dn2w", "title": "Convexifying Transformers: Improving optimization and understanding of transformer networks", "track": "main", "status": "Reject", "tldr": "We first propose a convex alternative to the self-attention mechanism and then develop a convex analytic framework to train attention/transformer networks. ", "abstract": "Understanding the fundamental mechanism behind the success of transformer networks is still an open problem in the deep learning literature. Although their remarkable performance has been mostly attributed to the self-attention mechanism, the literature still lacks a solid analysis of these networks and interpretation of the functions learned by them. To this end, we study the training problem of attention/transformer networks and introduce a novel convex analytic approach to improve the understanding and optimization of these networks. Particularly, we first introduce a convex alternative to the self-attention mechanism and reformulate the regularized training problem of attention/transformer networks. Then, we cast the reformulation as a convex optimization problem that is interpretable and easier to optimize. Moreover, as a byproduct of our convex analysis, we reveal an implicit regularization mechanism, which promotes sparsity across tokens. Therefore, we not only improve the optimization of attention/transformer networks but also provide a solid theoretical understanding of the functions learned by them. We also demonstrate the effectiveness of our theory through several numerical experiments.", "keywords": "Convex optimization;transformers;attention;self-attention;group sparsity", "primary_area": "", "supplementary_material": "/attachment/93a7304500996703ec998c599fd98f877381a45c.zip", "author": "Tolga Ergen;Behnam Neyshabur;Harsh Mehta", "authorids": "~Tolga_Ergen1;~Behnam_Neyshabur1;~Harsh_Mehta1", "gender": "M;M;M", "homepage": "https://tolgaergen.github.io/;https://www.neyshabur.net;", "dblp": "202/7477.html;131/9898;122/1475", "google_scholar": "https://scholar.google.com.tr/citations?user=T1pWaCsAAAAJ;e1ucbCYAAAAJ;murJPNoAAAAJ", "orcid": "0000-0003-4806-0224;;", "linkedin": ";;", "or_profile": "~Tolga_Ergen1;~Behnam_Neyshabur1;~Harsh_Mehta1", "aff": "Stanford University;Google;Google Research", "aff_domain": "stanford.edu;google.com;google.com", "position": "PhD student;Research Scientist;Software Engineer", "bibtex": "@misc{\nergen2023convexifying,\ntitle={Convexifying Transformers: Improving optimization and understanding of transformer networks},\nauthor={Tolga Ergen and Behnam Neyshabur and Harsh Mehta},\nyear={2023},\nurl={https://openreview.net/forum?id=PJVZCd4Dn2w}\n}", "github": "", "project": "", "reviewers": "5uWH;Dm3W;qfuJ;MHEr", "site": "https://openreview.net/forum?id=PJVZCd4Dn2w", "pdf_size": 674576, "recommendation": "3;5;5;5", "confidence": "4;4;5;4", "correctness": "1;3;4;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "39;110;69;36", "wc_strength_and_weaknesses": "290;258;161;135", "wc_clarity_quality_novelty_and_reproducibility": "5;46;36;5", "wc_summary_review": "42;76;64;26", "wc_review": "376;490;330;202", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 1.0897247358851685 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 63.5, 29.78674201721296 ], "wc_strength_and_weaknesses_avg": [ 211.0, 64.6645188646757 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 23.0, 18.34393632784414 ], "wc_summary_review_avg": [ 52.0, 19.339079605813716 ], "wc_review_avg": [ 349.5, 103.17339773410586 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9360709362735309429&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "Stanford;Google", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Stanford;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Graph Contrastive Learning for Skeleton-based Action Recognition", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12137", "id": "PLUXnnxUdr4", "poster": "/media/PosterPDFs/ICLR%202023/12137.png?t=1681034378.4387894", "openreview": "https://openreview.net/forum?id=PLUXnnxUdr4", "slides": "https://iclr.cc/virtual/2023/poster/12137", "video": "https://iclr.cc/virtual/2023/poster/12137", "author_site": "Xiaohu Huang, Hao Zhou, Jian Wang, Haocheng Feng, Junyu Han, Errui Ding, Jingdong Wang, Xinggang Wang, Wenyu Liu, Bin Feng", "tldr": "For GCN-based methods in skeleton-based action recognition, this work extends the graph learning from using intra-sequence local context to exploring cross-sequence global context.", "abstract": "In the field of skeleton-based action recognition, current top-performing graph convolutional networks (GCNs) exploit intra-sequence context to construct adaptive graphs for feature aggregation. However, we argue that such context is still $\\textit{local}$ since the rich cross-sequence relations have not been explicitly investigated. In this paper, we propose a graph contrastive learning framework for skeleton-based action recognition ($\\textit{SkeletonGCL}$) to explore the $\\textit{global}$ context across all sequences. In specific, SkeletonGCL associates graph learning across sequences by enforcing graphs to be class-discriminative, i.e., intra-class compact and inter-class dispersed, which improves the GCN capacity to distinguish various action patterns. Besides, two memory banks are designed to enrich cross-sequence context from two complementary levels, i.e., instance and semantic levels, enabling graph contrastive learning in multiple context scales. Consequently, SkeletonGCL establishes a new training paradigm, and it can be seamlessly incorporated into current GCNs. Without loss of generality, we combine SkeletonGCL with three GCNs (2S-ACGN, CTR-GCN, and InfoGCN), and achieve consistent improvements on NTU60, NTU120, and NW-UCLA benchmarks. ", "keywords": "Skeleton-based Action Recognition", "primary_area": "", "supplementary_material": "/attachment/46e275d0b3c4f3fb7035ae347a5fe75790574e32.zip", "author": "Xiaohu Huang;Hao Zhou;Jian Wang;Haocheng Feng;Junyu Han;Errui Ding;Jingdong Wang;Xinggang Wang;Wenyu Liu;Bin Feng", "authorids": "~Xiaohu_Huang1;~Hao_Zhou13;~Jian_Wang11;~Haocheng_Feng1;~Junyu_Han1;~Errui_Ding2;~Jingdong_Wang1;~Xinggang_Wang1;~Wenyu_Liu3;~Bin_Feng2", "gender": "M;M;M;;;M;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=sBjFwuQAAAAJ&hl=en;;;;;;https://jingdongwang2017.github.io/;https://xwcv.github.io/index.htm;http://eic.hust.edu.cn/professor/liuwenyu/;http://eic.hust.edu.cn/aprofessor/fengbin/", "dblp": "24/2054;63/778;39/449-66;;;180/5531;49/3441;95/3056;42/4110-1.html;04/4053-1", "google_scholar": "sBjFwuQAAAAJ;xZ-0R3cAAAAJ;https://scholar.google.com.hk/citations?user=hDPRTekAAAAJ;;;1wzEtxcAAAAJ;z5SPCmgAAAAJ;qNCTLV0AAAAJ;D7jDk7gAAAAJ;", "orcid": ";0000-0001-9764-1012;;;;;0000-0002-4888-4445;0000-0001-6732-7823;0000-0002-4582-7488;0000-0003-2166-751X", "linkedin": ";;;;;;;;;", "or_profile": "~Xiaohu_Huang1;~Hao_Zhou13;~Jian_Wang11;~Haocheng_Feng1;~Junyu_Han1;~Errui_Ding2;~Jingdong_Wang1;~Xinggang_Wang1;~Wenyu_Liu3;~Bin_Feng2", "aff": "Huazhong University of Science and Technology;Baidu;Baidu;;;Baidu;Baidu;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;baidu.com;baidu.com;;;baidu.com;baidu.com;hust.edu.cn;hust.edu.cn;hust.edu.cn", "position": "MS student;Researcher;Engineer;;;Director;Chief Scientist for Computer Vision;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2023graph,\ntitle={Graph Contrastive Learning for Skeleton-based Action Recognition},\nauthor={Xiaohu Huang and Hao Zhou and Jian Wang and Haocheng Feng and Junyu Han and Errui Ding and Jingdong Wang and Xinggang Wang and Wenyu Liu and Bin Feng},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PLUXnnxUdr4}\n}", "github": "", "project": "", "reviewers": "YfcM;Cq4q;vLtp;YiMJ", "pdf_size": 885376, "recommendation": "5;6;8;8", "confidence": "5;4;3;4", "correctness": "2;4;4;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "68;109;66;117", "wc_strength_and_weaknesses": "404;299;80;342", "wc_clarity_quality_novelty_and_reproducibility": "54;190;46;108", "wc_summary_review": "82;80;22;76", "wc_review": "608;678;214;643", "wc_reply_reviewers": "78;75;0;77", "wc_reply_authors": "1379;1874;295;631", "reply_reviewers": "1;1;0;1", "reply_authors": "4;4;1;2", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 90.0, 23.18404623873926 ], "wc_strength_and_weaknesses_avg": [ 281.25, 122.03969641063517 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 99.5, 57.43474558139872 ], "wc_summary_review_avg": [ 65.0, 24.919871588754223 ], "wc_review_avg": [ 535.75, 187.40380865926926 ], "wc_reply_reviewers_avg": [ 57.5, 33.21520736048475 ], "wc_reply_authors_avg": [ 1044.75, 619.009844428988 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.5222329678670935, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2379071823579644488&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=PLUXnnxUdr4", "email": "hust.edu.cn;baidu.com;baidu.com;;;baidu.com;baidu.com;hust.edu.cn;hust.edu.cn;hust.edu.cn", "author_num": 10, "aff_unique_index": "0;1;1;1;1;0;0;0", "aff_unique_norm": "Huazhong University of Science and Technology;Baidu", "aff_unique_dep": ";Baidu, Inc.", "aff_unique_url": "http://www.hust.edu.cn;https://www.baidu.com", "aff_unique_abbr": "HUST;Baidu", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "PNyvODFNTkZ", "title": "Lightweight CNNs Under A Unifying Tensor View", "track": "main", "status": "Withdraw", "tldr": "A unifying\u00a0tensor view is introduced, which provides an easy-to-understand graphical illustration\u00a0of various lightweight CNN components. A novel shift layer pruning scheme is proposed in response to the framework.", "abstract": "Despite the decomposition of convolutional kernels for lightweight CNNs being well studied, previous works that relied on tensor network diagrams or higher dimensional abstraction lacked geometry intuition. Our work captures the CNN kernel as a 3D tensor and explores its various decompositions, allowing for a straightforward graphical and analytical perspective between different tensor approximation schemes and efficient CNN components, including pointwise and depthwise convolutions. Extensive experiments are conducted, showing that a pointwise-depthwise-pointwise (PDP) configuration via a canonical polyadic decomposition (CPD) initialization can be a viable starting point for lightweight CNNs. The compression ratio of VGG-16 can reach over $50\\%$ while its performance outperforms its randomly initialized counterpart by $>10\\%$ in terms of accuracy. FPGA experiments for the PDP model further demonstrate its hardware efficacy, namely, $2.4\\times$ faster and $1.4\\times$ more energy efficient than the standard conv2d. Furthermore, our framework offers a unique slice-wise illustration and is the first to ever draw a connection to the shift layer. Such insight inspires a first-of-its-kind pruning method for shift layers, achieving nearly $50\\%$ compression with $<1\\%$ drop in accuracy for ShiftResNet-20.", "keywords": "compression;tensor decomposition;CNNs;FPGA", "primary_area": "", "supplementary_material": "/attachment/c8466b350bdeaab47b002f33a5441f55ca1a6acb.zip", "author": "Jason Chun Lok Li;Rui Lin;Jiajun Zhou;Edmund Y. Lam;Ngai Wong", "authorids": "~Jason_Chun_Lok_Li1;~Rui_Lin3;~Jiajun_Zhou3;~Edmund_Y._Lam1;~Ngai_Wong1", "gender": "M;F;M;M;M", "homepage": ";https://rlin27.github.io/;https://www.eee.hku.hk/~elam/;https://www.eee.hku.hk/~nwong/;", "dblp": ";https://dblp.org/rec/journals/corr/abs-2203-13556;87/5852;88/3656;", "google_scholar": "Tcpdsh0AAAAJ;gx0RITkAAAAJ;;PM_uMYIAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;0000-0001-6268-950X;0000-0002-3026-0108;0000-0001-6328-4256", "linkedin": "jason-chun-lok-li-0590b3166;;;;", "or_profile": "~Jason_Chun_Lok_Li1;~Rui_Lin3;~Edmund_Y._Lam1;~Ngai_Wong1;~Jiajun_ZHOU2", "aff": "University of Hong Kong;Huawei Technologies Ltd.;The University of Hong Kong;The University of Hong Kong;, University of Hong Kong", "aff_domain": "eee.hku.hk;huawei.com;hku.hk;hku.hk;eee.hku.hk", "position": "PhD student;Researcher;Full Professor;Associate Professor;PhD student", "bibtex": "@misc{\nli2023lightweight,\ntitle={Lightweight {CNN}s Under A Unifying Tensor View},\nauthor={Jason Chun Lok Li and Rui Lin and Jiajun Zhou and Edmund Y. Lam and Ngai Wong},\nyear={2023},\nurl={https://openreview.net/forum?id=PNyvODFNTkZ}\n}", "github": "", "project": "", "reviewers": "5NtV;x6PZ;WC6z", "site": "https://openreview.net/forum?id=PNyvODFNTkZ", "pdf_size": 1541138, "recommendation": "1;3;3", "confidence": "4;5;4", "correctness": "1;4;2", "technical_novelty": "1;1;2", "empirical_novelty": "1;2;2", "wc_summary_paper": "68;46;89", "wc_strength_and_weaknesses": "62;248;126", "wc_clarity_quality_novelty_and_reproducibility": "33;9;227", "wc_summary_review": "46;36;41", "wc_review": "209;339;483", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 1.247219128924647 ], "technical_novelty_avg": [ 1.3333333333333333, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 67.66666666666667, 17.55625877635159 ], "wc_strength_and_weaknesses_avg": [ 145.33333333333334, 77.15496671562296 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 89.66666666666667, 97.60236791298775 ], "wc_summary_review_avg": [ 41.0, 4.08248290463863 ], "wc_review_avg": [ 343.6666666666667, 111.90869294007305 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.7559289460184544, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TTctY5yzqsoJ:scholar.google.com/&scioq=Lightweight+CNNs+Under+A+Unifying+Tensor+View&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Hong Kong;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.hku.hk;https://www.huawei.com", "aff_unique_abbr": "HKU;Huawei", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Switch-NeRF: Learning Scene Decomposition with Mixture of Experts for Large-scale Neural Radiance Fields", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11981", "id": "PQ2zoIZqvm", "poster": "/media/PosterPDFs/ICLR%202023/11981.png?t=1682005625.9828765", "openreview": "https://openreview.net/forum?id=PQ2zoIZqvm", "slides": "https://iclr.cc/virtual/2023/poster/11981", "video": "https://iclr.cc/virtual/2023/poster/11981", "author_site": "Zhenxing Mi, Dan Xu", "tldr": " We propose an applicable end-to-end sparse NeRF network with learning-based decomposition for large-scale scenes.", "abstract": "The Neural Radiance Fields (NeRF) have been recently applied to reconstruct building-scale and even city-scale scenes. To model a large-scale scene efficiently, a dominant strategy is to employ a divide-and-conquer paradigm via performing scene decomposition, which decomposes a complex scene into parts that are further processed by different sub-networks. Existing large-scale NeRFs mainly use heuristic hand-crafted scene decomposition, with regular 3D-distance-based or physical-street-block-based schemes. Although achieving promising results, the hand-crafted schemes limit the capabilities of NeRF in large-scale scene modeling in several aspects. Manually designing a universal scene decomposition rule for different complex scenes is challenging, leading to adaptation issues for different scenarios. The decomposition procedure is not learnable, hindering the network from jointly optimizing the scene decomposition and the radiance fields in an end-to-end manner. The different sub-networks are typically optimized independently, and thus hand-crafted rules are required to composite them to achieve a better consistency. To tackle these issues, we propose Switch-NeRF, a novel end-to-end large-scale NeRF with learning-based scene decomposition. We design a gating network to dispatch 3D points to different NeRF sub-networks. The gating network can be optimized together with the NeRF sub-networks for different scene partitions, by a design with the Sparsely Gated Mixture of Experts (MoE). The outputs from different sub-networks can also be fused in a learnable way in the unified framework to effectively guarantee the consistency of the whole scene. Furthermore, the proposed MoE-based Switch-NeRF model is carefully implemented and optimized to achieve both high-fidelity scene reconstruction and efficient computation. Our method establishes clear state-of-the-art performances on several large-scale datasets. To the best of our knowledge, we are the first to propose an applicable end-to-end sparse NeRF network with learning-based decomposition for large-scale scenes. Codes are released at https://github.com/MiZhenxing/Switch-NeRF.", "keywords": "Neural Radiance Fields;Mixture of Experts;Large-scale scene;Novel view synthesis;Sparse network", "primary_area": "", "supplementary_material": "", "author": "Zhenxing MI;Dan Xu", "authorids": "~Zhenxing_MI1;~Dan_Xu4", "gender": "M;M", "homepage": "https://mizhenxing.github.io/;https://www.danxurgb.net", "dblp": "229/1252;16/3823-2.html", "google_scholar": "ennCRJAAAAAJ;OuSPv-AAAAAJ", "orcid": ";0000-0003-0136-9603", "linkedin": ";", "or_profile": "~Zhenxing_MI1;~Dan_Xu4", "aff": "Hong Kong University of Science and Technology;VGG, University of Oxford", "aff_domain": "hkust.edu;ox.ac.uk", "position": "PhD student;Postdoc", "bibtex": "@inproceedings{\nmi2023switchnerf,\ntitle={Switch-Ne{RF}: Learning Scene Decomposition with Mixture of Experts for Large-scale Neural Radiance Fields},\nauthor={Zhenxing MI and Dan Xu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PQ2zoIZqvm}\n}", "github": "", "project": "", "reviewers": "7KWF;QZyS;YY1K;qZQg", "pdf_size": 10190670, "recommendation": "6;6;8;8", "confidence": "4;4;3;4", "correctness": "3;3;4;4", "technical_novelty": "3;3;3;4", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "134;81;59;64", "wc_strength_and_weaknesses": "401;419;302;44", "wc_clarity_quality_novelty_and_reproducibility": "57;20;65;431", "wc_summary_review": "61;15;60;42", "wc_review": "653;535;486;581", "wc_reply_reviewers": "0;0;0;83", "wc_reply_authors": "989;1796;233;957", "reply_reviewers": "0;0;0;1", "reply_authors": "2;3;1;2", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 84.5, 29.71952220342716 ], "wc_strength_and_weaknesses_avg": [ 291.5, 149.67715256511264 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 143.25, 166.9975673475515 ], "wc_summary_review_avg": [ 44.5, 18.634645153584223 ], "wc_review_avg": [ 563.75, 61.5116858816274 ], "wc_reply_reviewers_avg": [ 20.75, 35.94005425705421 ], "wc_reply_authors_avg": [ 993.75, 553.1091099412483 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 97, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9533631493913430631&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=PQ2zoIZqvm", "email": "hkust.edu;ox.ac.uk", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Hong Kong University of Science and Technology;University of Oxford", "aff_unique_dep": ";VGG", "aff_unique_url": "https://www.ust.hk;https://www.ox.ac.uk", "aff_unique_abbr": "HKUST;Oxford", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Hong Kong SAR;Oxford", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "Universal Vision-Language Dense Retrieval: Learning A Unified Representation Space for Multi-Modal Retrieval", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11952", "id": "PQOlkgsBsik", "poster": "/media/PosterPDFs/ICLR%202023/11952.png?t=1680866238.3790498", "openreview": "https://openreview.net/forum?id=PQOlkgsBsik", "slides": "https://iclr.cc/virtual/2023/poster/11952", "video": "https://iclr.cc/virtual/2023/poster/11952", "author_site": "Zhenghao Liu, Chenyan Xiong, Yuanhuiyi Lv, Zhiyuan Liu, Ge Yu", "tldr": "This paper presents Vision-Language Universal Search (VL-UnivSearch), which builds a unified model for multi-modal retrieval, leans universal representations for images and texts, and achieves the state-of-the-art. ", "abstract": "This paper presents Universal Vision-Language Dense Retrieval (UniVL-DR), which builds a unified model for multi-modal retrieval. UniVL-DR encodes queries and multi-modality resources in an embedding space for searching candidates from different modalities. To learn a unified embedding space for multi-modal retrieval, UniVL-DR proposes two techniques: 1) Universal embedding optimization strategy, which contrastively optimizes the embedding space using the modality-balanced hard negatives; 2) Image verbalization method, which bridges the modality gap between images and texts in the raw data space. UniVL-DR achieves the state-of-the-art on the multi-modal open-domain question answering benchmark, WebQA, and outperforms all retrieval models on the two subtasks, text-text retrieval and text-image retrieval. It demonstrates that universal multi-modal search is feasible to replace the divide-and-conquer pipeline with a united model and also benefits single/cross modality tasks. All source codes of this work are available at https://github.com/OpenMatch/UniVL-DR.", "keywords": "Multi-Modal Retrieval;Dense Retrieval;Universal Embedding Space;Modality-Balanced Hard Negative Training;Image Verbalization", "primary_area": "", "supplementary_material": "/attachment/34111b3de399ff3c6f152b4321321d759889f60f.zip", "author": "Zhenghao Liu;Chenyan Xiong;Yuanhuiyi Lv;Zhiyuan Liu;Ge Yu", "authorids": "~Zhenghao_Liu2;~Chenyan_Xiong1;20195257@stu.neu.edu.cn;~Zhiyuan_Liu1;yuge@mail.neu.edu.cn", "gender": "M;M;;M;", "homepage": "https://edwardzh.github.io;https://www.cs.cmu.edu/~cx/;;http://nlp.csai.tsinghua.edu.cn/~lzy;", "dblp": "243/2880.html;18/10886;;53/3245-1;", "google_scholar": "4vrZRk0AAAAJ;E9BaEBYAAAAJ;;dT0v5u0AAAAJ;", "orcid": ";;;0000-0002-7709-2543;", "linkedin": ";;;;", "or_profile": "~Zhenghao_Liu2;~Chenyan_Xiong1;20195257@stu.neu.edu.cn;~Zhiyuan_Liu1;yuge@mail.neu.edu.cn", "aff": "Northeastern University;Microsoft Research;;Tsinghua University;", "aff_domain": "neu.edu.cn;research.microsoft.com;;tsinghua.edu.cn;", "position": "Associate Professor;Principal Researcher;;Associate Professor;", "bibtex": "@inproceedings{\nliu2023universal,\ntitle={Universal Vision-Language Dense Retrieval: Learning A Unified Representation Space for Multi-Modal Retrieval},\nauthor={Zhenghao Liu and Chenyan Xiong and Yuanhuiyi Lv and Zhiyuan Liu and Ge Yu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PQOlkgsBsik}\n}", "github": "", "project": "", "reviewers": "qnyp;vCSr;FZm3", "pdf_size": 3619847, "recommendation": "5;6;6", "confidence": "3;4;4", "correctness": "3;2;3", "technical_novelty": "2;2;2", "empirical_novelty": "1;3;2", "wc_summary_paper": "83;37;51", "wc_strength_and_weaknesses": "306;190;169", "wc_clarity_quality_novelty_and_reproducibility": "55;34;16", "wc_summary_review": "26;51;55", "wc_review": "470;312;291", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "745;307;741", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 57.0, 19.252705437591537 ], "wc_strength_and_weaknesses_avg": [ 221.66666666666666, 60.24579284527307 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.0, 15.937377450509228 ], "wc_summary_review_avg": [ 44.0, 12.832251036613439 ], "wc_review_avg": [ 357.6666666666667, 79.89298397787435 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 597.6666666666666, 205.53885818069105 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.9999999999999997, "corr_recommendation_correctness": -0.4999999999999999, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10950655842888474133&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=PQOlkgsBsik", "email": "neu.edu.cn;research.microsoft.com;;tsinghua.edu.cn;", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Northeastern University;Microsoft;Tsinghua University", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "https://www.northeastern.edu;https://www.microsoft.com/en-us/research;https://www.tsinghua.edu.cn", "aff_unique_abbr": "NEU;MSR;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;China" }, { "id": "PQXP4WZNcM", "title": "Bringing Saccades and Fixations into Self-supervised Video Representation Learning", "track": "main", "status": "Reject", "tldr": "In this paper, we propose a self-supervised video representation learning method by taking inspiration from cognitive science and neuroscience on human visual perceptionization. ", "abstract": "In this paper, we propose a self-supervised video representation learning (video SSL) method by taking inspiration from cognitive science and neuroscience on human visual perception. Different from previous methods that mainly start from the inherent properties of videos, we argue that humans learn to perceive the world through the self-awareness of the semantic change or consistency in the input stimuli in the absence of labels, accompanied by representation reorganization during the post-learning rest periods. To this end, we first exploit the presence of saccades as an indicator of semantic change in a contrastive learning framework to mimic the self-awareness in human representation learning, where the saccades are generated without eye-tracking data. Second, we model the semantic consistency by minimizing the prediction error between the predicted and the true state of another time point during a fixation. Third, we later incorporate prototypical contrastive learning to reorganize the learned representations such that perceptually similar representations would be associated closer. Compared to previous counterparts, our method can capture finer-grained semantics from video instances, and the associations among similar ones are further strengthened. Experiments show that the proposed bio-inspired video SSL method significantly improves the Top-1 video retrieval accuracy on UCF101 and achieves superior performance on downstream tasks such as action recognition under comparable settings.", "keywords": "Self-supervised learning;video self-supervised learning;bio-inspired", "primary_area": "", "supplementary_material": "/attachment/3c16fcda47960072018b6419aa67d36e1bfa1aa4.zip", "author": "Qiuxia LAI;Ailing Zeng;Ye Wang;Lihong Cao;Qiang Xu", "authorids": "~Qiuxia_LAI1;~Ailing_Zeng1;~Ye_Wang13;~Lihong_Cao1;~Qiang_Xu1", "gender": "F;F;;M;M", "homepage": "https://ashleylqx.github.io/;https://ailingzeng.site/;;https://nimi.cuc.edu.cn/;https://github.com/cure-lab", "dblp": "210/4586.html;226/4720;;;43/1230-1", "google_scholar": "LwIItp4AAAAJ;Tn7fzS8AAAAJ;0W0KsU8AAAAJ;;https://scholar.google.com.tw/citations?user=eSiKPqUAAAAJ", "orcid": "0000-0001-6872-5540;;;;", "linkedin": "%E7%A7%8B%E9%9C%9E-%E8%B5%96-11813b169/;%E7%88%B1%E7%8E%B2-%E6%9B%BE-65504112a/;;;", "or_profile": "~Qiuxia_LAI1;~Ailing_Zeng1;~Ye_Wang13;~Lihong_Cao1;~Qiang_Xu1", "aff": "Communication University of China;International Digital Economy Academy;Communication University of China;Communication University of China;The Chinese University of Hong Kong", "aff_domain": "cuc.edu.cn;idea.edu.cn;cuc.edu.cn;cuc.edu.cn;cuhk.edu.hk", "position": "Assistant Professor;Researcher;Lecturer;Full Professor;Full Professor", "bibtex": "@misc{\nlai2023bringing,\ntitle={Bringing Saccades and Fixations into Self-supervised Video Representation Learning},\nauthor={Qiuxia LAI and Ailing Zeng and Ye Wang and Lihong Cao and Qiang Xu},\nyear={2023},\nurl={https://openreview.net/forum?id=PQXP4WZNcM}\n}", "github": "", "project": "", "reviewers": "ucm9;MEj8;zkjU;aMU3", "site": "https://openreview.net/forum?id=PQXP4WZNcM", "pdf_size": 1473002, "recommendation": "5;5;6;8", "confidence": "4;2;2;4", "correctness": "2;3;3;4", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;2;3;4", "wc_summary_paper": "103;58;82;58", "wc_strength_and_weaknesses": "148;292;183;214", "wc_clarity_quality_novelty_and_reproducibility": "22;56;29;42", "wc_summary_review": "36;46;171;54", "wc_review": "309;452;465;368", "wc_reply_reviewers": "0;70;0;0", "wc_reply_authors": "398;1446;1461;724", "reply_reviewers": "0;1;0;0", "reply_authors": "1;3;3;2", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 1.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 75.25, 18.779976038323372 ], "wc_strength_and_weaknesses_avg": [ 209.25, 53.176004927034526 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.25, 12.987975207860538 ], "wc_summary_review_avg": [ 76.75, 54.78765828176999 ], "wc_review_avg": [ 398.5, 63.688696014284986 ], "wc_reply_reviewers_avg": [ 17.5, 30.31088913245535 ], "wc_reply_authors_avg": [ 1007.25, 460.92481762213674 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.40824829046386296, "corr_recommendation_correctness": 0.8660254037844386, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bnmQYQk7FQcJ:scholar.google.com/&scioq=Bringing+Saccades+and+Fixations+into+Self-supervised+Video+Representation+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Communication University of China;International Digital Economy Academy;Chinese University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "http://www.cuc.edu.cn/;;https://www.cuhk.edu.hk", "aff_unique_abbr": "CUC;;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "id": "PQfP-d9BWkF", "title": "APLA: Class-imbalanced Semi-supervised Learning with Adapative Pseudo-labeling and Loss Adjustment", "track": "main", "status": "Withdraw", "tldr": "We use Class-Aware Pseudo-label Thresholding and Class-Aware Loss Adjustment to improve the performance of existing SSL algorithm in Class-imbalanced setting.", "abstract": "Semi-supervised learning (SSL) can substantially improve the performance of deep neural networks by utilizing unlabeled data when labeled data is scarce. Existing SSL algorithms implicitly assume that the class distribution of labeled datasets and unlabeled datasets are balanced, which means the different classes have the same numbers of training samples. However, they can hardly perform well on minority classes(the classes with few training examples) when the class distribution of training data is imbalanced, since the pseudo-labels learned from unlabeled data tend to be biased toward majority classes(the classes with a large number of training examples). To alleviate this issue, we propose a method called Adaptive Pseudo-labeling and Loss Adjustment (APLA) for class-imbalanced semi-supervised learning (CISSL), which includes Class-Aware Pseudo-label Thresholding (CAPT) that can utilize the imbalanced unlabeled data by dynamically adjusting the threshold for selecting pseudo-labels, and Class-Aware Loss Adjustment (CALA) that can mitigate the bias in both supervised loss and unsupervised loss. According to the experiments, APLA can deliver much higher accuracy than benchmark methods under various CISSL scenarios.", "keywords": "semi-supervised learning;class-imbalanced learning;class-imbalanced semi-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Qian Gui;Baoning Niu", "authorids": "~Qian_Gui1;niubaoning@tyut.edu.cn", "gender": ";", "homepage": ";", "dblp": "337/0894;", "google_scholar": ";", "orcid": "0000-0002-8824-1153;", "linkedin": ";", "or_profile": "~Qian_Gui1;niubaoning@tyut.edu.cn", "aff": "Taiyuan University of Technology;", "aff_domain": "tyut.edu.cn;", "position": "MS student;", "bibtex": "@misc{\ngui2023apla,\ntitle={{APLA}: Class-imbalanced Semi-supervised Learning with Adapative Pseudo-labeling and Loss Adjustment},\nauthor={Qian Gui and Baoning Niu},\nyear={2023},\nurl={https://openreview.net/forum?id=PQfP-d9BWkF}\n}", "github": "", "project": "", "reviewers": "Mc9x;NFEc;Aw99", "site": "https://openreview.net/forum?id=PQfP-d9BWkF", "pdf_size": 924831, "recommendation": "1;3;5", "confidence": "5;3;4", "correctness": "3;2;4", "technical_novelty": "1;1;1", "empirical_novelty": "1;1;1", "wc_summary_paper": "91;43;55", "wc_strength_and_weaknesses": "431;215;208", "wc_clarity_quality_novelty_and_reproducibility": "50;18;24", "wc_summary_review": "62;18;27", "wc_review": "634;294;314", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 1.632993161855452 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 1.0, 0.0 ], "empirical_novelty_avg": [ 1.0, 0.0 ], "wc_summary_paper_avg": [ 63.0, 20.396078054371138 ], "wc_strength_and_weaknesses_avg": [ 284.6666666666667, 103.51274747048737 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.666666666666668, 13.888444437333106 ], "wc_summary_review_avg": [ 35.666666666666664, 18.979521127315678 ], "wc_review_avg": [ 414.0, 155.7776192739723 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WG-vvJmXPDoJ:scholar.google.com/&scioq=APLA:+Class-imbalanced+Semi-supervised+Learning+with+Adapative+Pseudo-labeling+and+Loss+Adjustment&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Taiyuan University of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.tyut.edu.cn/", "aff_unique_abbr": "TYUT", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "PQk-8VyP-dv", "title": "Gradient Preconditioning for Non-Lipschitz smooth Nonconvex Optimization", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "First-order optimization methods often perform poorly on non-Lipschitz smooth and ill-conditioned problems. Recent work introduced the dual preconditioned gradient descent algorithm, which applies a nonlinear preconditioning to the gradient map to improve performance on convex functions satisfying relative smoothness -- a generalized version of Lipschitz gradient smoothness. In this paper, we significantly extend this prior work by providing a convergence analysis of this algorithm for non-Lipschitz smooth nonconvex problems. To this end, we exploit recent connections with generalized versions of convexity and smoothness, referred to as anisotropic convexity/smoothness, which guarantee convergence to a first-order stationary point. Further, we show that some recently proposed preconditioners based on power functions or relativistic dynamics are well-suited for a broad class of objectives. Our experiments demonstrate improved performance using these preconditioners on a variety of non-Lipschitz smooth, nonconvex optimization objectives, including large-scale deep learning tasks.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/b8b2d5744be9e1bb6d9db7960805b1574f433b9d.zip", "author": "Salma Tarmoun;Stewart Slocum;Benjamin David Haeffele;Rene Vidal", "authorids": "~Salma_Tarmoun1;~Stewart_Slocum1;~Benjamin_David_Haeffele1;~Rene_Vidal1", "gender": "F;M;;", "homepage": ";https://www.stewyslocum.com/;;http://www.vision.jhu.edu", "dblp": ";;;v/ReneVidal", "google_scholar": ";Z9voXDgAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": "salma-tarmoun-94aa5158/;stewart-s-757426124/;;rene-vidal-74844928/", "or_profile": "~Salma_Tarmoun1;~Stewart_Slocum1;~Benjamin_David_Haeffele1;~Rene_Vidal1", "aff": "University of Pennsylvania;Massachusetts Institute of Technology;;Amazon", "aff_domain": "upenn.edu;mit.edu;;amazon.com", "position": "PhD student;PhD student;;Principal Researcher", "bibtex": "@misc{\ntarmoun2023gradient,\ntitle={Gradient Preconditioning for Non-Lipschitz smooth Nonconvex Optimization},\nauthor={Salma Tarmoun and Stewart Slocum and Benjamin David Haeffele and Rene Vidal},\nyear={2023},\nurl={https://openreview.net/forum?id=PQk-8VyP-dv}\n}", "github": "", "project": "", "reviewers": "Lq6p;GzaW;ujQg", "site": "https://openreview.net/forum?id=PQk-8VyP-dv", "pdf_size": 641085, "recommendation": "3;5;5", "confidence": "4;3;4", "correctness": "3;3;3", "technical_novelty": "1;2;2", "empirical_novelty": "0;3;2", "wc_summary_paper": "111;50;45", "wc_strength_and_weaknesses": "127;123;173", "wc_clarity_quality_novelty_and_reproducibility": "119;102;8", "wc_summary_review": "121;52;66", "wc_review": "478;327;292", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 68.66666666666667, 30.00370347510824 ], "wc_strength_and_weaknesses_avg": [ 141.0, 22.686266036231405 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 76.33333333333333, 48.814842915745295 ], "wc_summary_review_avg": [ 79.66666666666667, 29.780679792256066 ], "wc_review_avg": [ 365.6666666666667, 80.70660168508866 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Z7WnVU51E4QJ:scholar.google.com/&scioq=Gradient+Preconditioning+for+Non-Lipschitz+smooth+Nonconvex+Optimization&hl=en&as_sdt=0,48", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Pennsylvania;Massachusetts Institute of Technology;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.upenn.edu;https://web.mit.edu;https://www.amazon.com", "aff_unique_abbr": "UPenn;MIT;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "PRpO-cOCQoX", "title": "Rethinking Missing Modality Learning: From a Decoding View", "track": "main", "status": "Reject", "tldr": "", "abstract": "Conventional pipeline of multimodal learning consists of three stages, including encoding, fusion, and decoding. Most existing methods under missing modality condition focus on the first stage and aim to learn the modality invariant representation or reconstruct missing features. However, these methods rely on strong assumptions (i.e., all the pre-defined modalities are available for each input sample during training and the number of modalities is fixed). To solve this problem, we propose a simple yet effective method called Interaction Augmented Prototype Decomposition (IPD) for a more general setting, where the number of modalities is arbitrary and there are various incomplete modality conditions happening in both training and inference phases, even there are unseen testing conditions. Different from the previous methods, we improve the decoding stage. Concretely, IPD jointly learns the common and modality-specific task prototypes. Considering that the number of missing modality conditions scales exponentially with the number of modalities ${\\bf O}({\\text 2^n})$ and different conditions may have implicit interaction, the low-rank partial prototype decomposition with enough theoretical analysis is employed for modality-specific components to reduce the complexity. The decomposition also can promote unseen generalization with the modality factors of existing conditions. To simulate the low-rank setup, we further constrain the explicit interaction of specific modality conditions by employing disentangled contrastive constraints. Extensive results on the newly-created benchmarks of multiple tasks illustrate the effectiveness of our proposed model. ", "keywords": "multimodal;decoding;tensor decomposition", "primary_area": "", "supplementary_material": "", "author": "Tao Jin;Zhou Zhao", "authorids": "~Tao_Jin2;~Zhou_Zhao2", "gender": "M;M", "homepage": "https://hugddygff.github.io/;https://dblp.uni-trier.de/pid/75/7785.html?", "dblp": "88/4850-4.html;75/7785", "google_scholar": ";https://scholar.google.com.hk/citations?user=IIoFY90AAAAJ", "orcid": "0000-0003-3564-1628;0000-0001-6121-0384", "linkedin": ";", "or_profile": "~Tao_Jin2;~Zhou_Zhao2", "aff": "Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn", "position": "PhD student;Associate Professor", "bibtex": "@misc{\njin2023rethinking,\ntitle={Rethinking Missing Modality Learning: From a Decoding View},\nauthor={Tao Jin and Zhou Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=PRpO-cOCQoX}\n}", "github": "", "project": "", "reviewers": "YKCH;QUyJ;pCS1;SiTt", "site": "https://openreview.net/forum?id=PRpO-cOCQoX", "pdf_size": 1238572, "recommendation": "3;5;5;6", "confidence": "4;3;4;3", "correctness": "3;3;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "61;14;80;93", "wc_strength_and_weaknesses": "140;17;164;682", "wc_clarity_quality_novelty_and_reproducibility": "237;26;11;66", "wc_summary_review": "58;28;9;170", "wc_review": "496;85;264;1011", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "905;227;220;775", "reply_reviewers": "0;0;0;0", "reply_authors": "3;2;2;2", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 62.0, 29.958304357890484 ], "wc_strength_and_weaknesses_avg": [ 250.75, 255.15032333900734 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 85.0, 90.03055037041592 ], "wc_summary_review_avg": [ 66.25, 62.39541249162473 ], "wc_review_avg": [ 464.0, 347.8052616048239 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 531.75, 311.66759135335195 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.6882472016116854, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zQU_VRR3rckJ:scholar.google.com/&scioq=Rethinking+Missing+Modality+Learning:+From+a+Decoding+View&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "PTUcygUoxuc", "title": "Recursion of Thought: Divide and Conquer Reasoning with Language Models", "track": "main", "status": "Reject", "tldr": "We unleash the reasoning capability of language models, which has been constrained by the maximum size of a single context, by letting them recursively create and utilize multiple contexts.", "abstract": "With the recent advances in language models, attempts are being made to apply them to solving multi-step reasoning problems. A major breakthrough in this line of research is to let language models generate intermediate steps, often called Chain of Thought (CoT), before producing a final answer. However, language models have an upper bound on the context size, i.e., the number of input tokens, such as 2048 for the recent GPT-3 and PaLM. Although several thousand tokens are enough to handle various tasks, solving more complex reasoning tasks can require orders of magnitude more tokens. Therefore, the context limit imposes a fundamental limit on the model's reasoning capability. Inspired by human's incredible reasoning ability based on abstraction and recursion, we propose Recursion of Thought (RoT) as a model-agnostic framework with the novel paradigm of teaching a language model to divide and conquer complex problems by recursively creating multiple contexts. Since RoT casts the context-related operations as tokens, a language model can trigger the recursion operations by simply producing the corresponding tokens. On multiple arithmetic and algorithmic reasoning tasks, we demonstrate that RoT dramatically improves the recent large-scale language model GPT-3 to solve extremely complex problems. Moreover, RoT can make tiny, randomly initialized Transformers or LSTMs to solve problems that even humans find daunting.", "keywords": "reasoning;language models;chain of thought", "primary_area": "", "supplementary_material": "/attachment/02fc7ac5a56e18bb2706265bc3af9928a1bed219.zip", "author": "Soochan Lee;Gunhee Kim", "authorids": "~Soochan_Lee1;~Gunhee_Kim1", "gender": "M;M", "homepage": "https://soochanlee.com;http://vision.snu.ac.kr/gunhee/", "dblp": "230/1398;45/115", "google_scholar": "8O3MKJkAAAAJ;https://scholar.google.co.kr/citations?user=CiSdOV0AAAAJ", "orcid": "0000-0002-1425-9262;0000-0002-9543-7453", "linkedin": ";", "or_profile": "~Soochan_Lee1;~Gunhee_Kim1", "aff": "Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr", "position": "PhD student;Full Professor", "bibtex": "@misc{\nlee2023recursion,\ntitle={Recursion of Thought: Divide and Conquer Reasoning with Language Models},\nauthor={Soochan Lee and Gunhee Kim},\nyear={2023},\nurl={https://openreview.net/forum?id=PTUcygUoxuc}\n}", "github": "", "project": "", "reviewers": "U3UZ;k2RA;kKKS", "site": "https://openreview.net/forum?id=PTUcygUoxuc", "pdf_size": 736326, "recommendation": "1;3;8", "confidence": "5;4;3", "correctness": "3;2;4", "technical_novelty": "2;2;4", "empirical_novelty": "1;2;3", "wc_summary_paper": "26;56;160", "wc_strength_and_weaknesses": "119;140;191", "wc_clarity_quality_novelty_and_reproducibility": "54;124;88", "wc_summary_review": "52;53;167", "wc_review": "251;373;606", "wc_reply_reviewers": "0;0;43", "wc_reply_authors": "665;894;389", "reply_reviewers": "0;0;1", "reply_authors": "1;2;1", "recommendation_avg": [ 4.0, 2.943920288775949 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 80.66666666666667, 57.418541333691934 ], "wc_strength_and_weaknesses_avg": [ 150.0, 30.23243291566195 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 88.66666666666667, 28.58126814696802 ], "wc_summary_review_avg": [ 90.66666666666667, 53.97736150976218 ], "wc_review_avg": [ 410.0, 147.27072576268057 ], "wc_reply_reviewers_avg": [ 14.333333333333334, 20.27039439401436 ], "wc_reply_authors_avg": [ 649.3333333333334, 206.46280267614526 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.9707253433941511, "corr_recommendation_correctness": 0.6933752452815365, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=216546777970263160&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "PTZhYSD8aUv", "title": "Model-based Value Exploration in Actor-critic Deep Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Off-policy method has demonstrated great potential on model-free deep reinforcement learning due to the sample-efficient advantage. However, it suffers extra instability due to some mismatched distributions from observations. Model-free on-policy counterparts usually have poor sample efficiency. Model-based algorithms, in contrast, are highly dependent on the goodness of expert demonstrations or learned dynamics.\nIn this work, we propose a method which involves training the dynamics to accelerate and gradually stabilize learning without adding sample-complexity. The dynamics model prediction can provide effective target value exploration, which is essentially different from the methods on-policy exploration, by adding valid diversity of transitions.\nDespite the existence of model bias, the model-based prediction can avoid the overestimation and distribution mismatch errors in off-policy learning, as the learned dynamics model is asymptotically accurate.\nBesides, to generalize the solution to large-scale reinforcement learning problems, we use global gaussian and deterministic function approximation to model the transition probability and reward function, respectively. To minimize the negative impact of potential model bias brought by the estimated dynamics, we adopt one-step global prediction for the model-based part of target value. By analyses and proofs, we show how the model-based prediction provides value exploration and asymptotical performance to the overall network. It can also be concluded that the convergence of proposed algorithm only depends on the accuracy of learnt dynamics model.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/b282940331bb85a3cff2f7e877bf90458f9e77a2.zip", "author": "Huihui Zhang", "authorids": "~Huihui_Zhang1", "gender": "M", "homepage": "", "dblp": "https://dblp.uni-trier.de/pid/32/7555", "google_scholar": "", "orcid": "", "linkedin": "https://www.linkedin.com/feed/", "or_profile": "~Huihui_Zhang1", "aff": "", "aff_domain": "", "position": "", "bibtex": "@misc{\nzhang2023modelbased,\ntitle={Model-based Value Exploration in Actor-critic Deep Reinforcement Learning},\nauthor={Huihui Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=PTZhYSD8aUv}\n}", "github": "", "project": "", "reviewers": "YZy8;Zou8;3k6t;WVWG", "site": "https://openreview.net/forum?id=PTZhYSD8aUv", "pdf_size": 2515530, "recommendation": "3;3;3;3", "confidence": "4;4;2;4", "correctness": "2;3;2;3", "technical_novelty": "3;3;3;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "30;90;52;73", "wc_strength_and_weaknesses": "295;273;406;313", "wc_clarity_quality_novelty_and_reproducibility": "11;348;19;22", "wc_summary_review": "36;178;18;19", "wc_review": "372;889;495;427", "wc_reply_reviewers": "26;23;0;18", "wc_reply_authors": "0;0;182;221", "reply_reviewers": "1;1;0;1", "reply_authors": "0;0;1;1", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 61.25, 22.509720122649238 ], "wc_strength_and_weaknesses_avg": [ 321.75, 50.66248612138966 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 100.0, 143.23931024687323 ], "wc_summary_review_avg": [ 62.75, 66.92299679482383 ], "wc_review_avg": [ 545.75, 202.90807647799534 ], "wc_reply_reviewers_avg": [ 16.75, 10.084022015049353 ], "wc_reply_authors_avg": [ 100.75, 101.6891710065531 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 0.5, 0.5 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:l3yKkIL1-owJ:scholar.google.com/&scioq=Model-based+Value+Exploration+in+Actor-critic+Deep+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0 }, { "title": "Training-Free Structured Diffusion Guidance for Compositional Text-to-Image Synthesis", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10778", "id": "PUIqjT4rzq7", "poster": "/media/PosterPDFs/ICLR%202023/10778.png?t=1681114882.327747", "openreview": "https://openreview.net/forum?id=PUIqjT4rzq7", "slides": "https://iclr.cc/virtual/2023/poster/10778", "video": "https://iclr.cc/virtual/2023/poster/10778", "author_site": "Weixi Feng, Xuehai He, Tsu-Jui Fu, Varun Jampani, Arjun Akula, Pradyumna Narayana, S Basu, Xin Wang, William Wang", "tldr": "We propose a training-free approach to incorporate language structured for compositional text-to-image synthesis", "abstract": "Large-scale diffusion models have achieved state-of-the-art results on text-to-image synthesis (T2I) tasks. Despite their ability to generate high-quality yet creative images, we observe that attribution-binding and compositional capabilities are still considered major challenging issues, especially when involving multiple objects. Attribute-binding requires the model to associate objects with the correct attribute descriptions, and compositional skills require the model to combine and generate multiple concepts into a single image. In this work, we improve these two aspects of T2I models to achieve more accurate image compositions. To do this, we incorporate linguistic structures with the diffusion guidance process based on the controllable properties of manipulating cross-attention layers in diffusion-based T2I models. We observe that keys and values in cross-attention layers have strong semantic meanings associated with object layouts and content. Therefore, by manipulating the cross-attention representations based on linguistic insights, we can better preserve the compositional semantics in the generated image. Built upon Stable Diffusion, a SOTA T2I model, our structured cross-attention design is efficient that requires no additional training samples. We achieve better compositional skills in qualitative and quantitative results, leading to a significant 5-8\\% advantage in head-to-head user comparison studies. Lastly, we conduct an in-depth analysis to reveal potential causes of incorrect image compositions and justify the properties of cross-attention layers in the generation process. ", "keywords": "Text-to-Image Synthesis;Diffusion Models;Compositional Generation", "primary_area": "", "supplementary_material": "/attachment/d70341618fd534208851e4800e2eac0d16d951d9.zip", "author": "Weixi Feng;Xuehai He;Tsu-Jui Fu;Varun Jampani;Arjun Reddy Akula;Pradyumna Narayana;Sugato Basu;Xin Eric Wang;William Yang Wang", "authorids": "~Weixi_Feng2;~Xuehai_He1;~Tsu-Jui_Fu2;~Varun_Jampani2;~Arjun_Reddy_Akula1;~Pradyumna_Narayana2;~Sugato_Basu1;~Xin_Eric_Wang2;~William_Yang_Wang2", "gender": "M;M;M;M;M;M;M;M;M", "homepage": "https://weixi-feng.github.io/;;https://tsujuifu.github.io;https://research.google/people/ArjunReddyAkula/;;https://eric-xw.github.io;http://sugatobasu.com/;https://varunjampani.github.io/;https://www.cs.ucsb.edu/~william/", "dblp": "322/1026;251/0763;218/5366.html;152/3930;;10/5630-61;76/5024;124/2785;08/9282", "google_scholar": "https://scholar.google.com/citations?hl=en;kDzxOzUAAAAJ;https://scholar.google.com.tw/citations?user=7QRDcC0AAAAJ;CNKX9bgAAAAJ;BV2dbjEAAAAJ;YjqluE0AAAAJ;https://scholar.google.com/citations?hl=en;1Cv6Sf4AAAAJ;gf8Ms_8AAAAJ", "orcid": "0000-0002-7201-5688;;;;;0000-0003-2605-5504;;;", "linkedin": "weixifeng/;;tsujuifu1996;arjun-akula-1b769939;;;;;", "or_profile": "~Weixi_Feng2;~Xuehai_He1;~Tsu-Jui_Fu2;~Arjun_Reddy_Akula1;~Pradyumna_Narayana2;~Xin_Eric_Wang2;~S_Basu1;~Varun_Jampani1;~William_Wang1", "aff": "University of California, Santa Barbara;University of California Santa Curz;UC Santa Barbara;Google Research;Google;University of California, Santa Cruz;Google;Google Research;UC Santa Barbara", "aff_domain": "ucsb.edu;ucsc.edu;ucsb.edu;google.com;google.com;ucsc.edu;google.com;google.com;ucsb.edu", "position": "PhD student;PhD student;PhD student;Research Scientist;Software Engineer;Assistant Professor;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nfeng2023trainingfree,\ntitle={Training-Free Structured Diffusion Guidance for Compositional Text-to-Image Synthesis},\nauthor={Weixi Feng and Xuehai He and Tsu-Jui Fu and Varun Jampani and Arjun Reddy Akula and Pradyumna Narayana and Sugato Basu and Xin Eric Wang and William Yang Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PUIqjT4rzq7}\n}", "github": "", "project": "", "reviewers": "mgR1;QzN2;ZtBV;5ZP1", "pdf_size": 8416265, "recommendation": "6;6;6;6", "confidence": "4;4;4;5", "correctness": "3;3;3;4", "technical_novelty": "2;3;3;2", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "245;67;79;61", "wc_strength_and_weaknesses": "271;141;325;98", "wc_clarity_quality_novelty_and_reproducibility": "31;10;42;139", "wc_summary_review": "68;23;74;30", "wc_review": "615;241;520;328", "wc_reply_reviewers": "28;0;0;325", "wc_reply_authors": "644;600;540;1004", "reply_reviewers": "1;0;0;2", "reply_authors": "2;2;2;3", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 113.0, 76.48529270389177 ], "wc_strength_and_weaknesses_avg": [ 208.75, 92.52668533996017 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.5, 49.56056900399752 ], "wc_summary_review_avg": [ 48.75, 22.487496525847426 ], "wc_review_avg": [ 426.0, 148.64891523317618 ], "wc_reply_reviewers_avg": [ 88.25, 137.16481874008363 ], "wc_reply_authors_avg": [ 697.0, 181.04971692880383 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 326, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11009706863402152282&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=PUIqjT4rzq7", "email": "ucsb.edu;ucsc.edu;ucsb.edu;google.com;google.com;ucsc.edu;google.com;google.com;ucsb.edu", "author_num": 9, "aff_unique_index": "0;1;0;2;2;1;2;2;0", "aff_unique_norm": "University of California, Santa Barbara;University of California, Santa Cruz;Google", "aff_unique_dep": ";;Google Research", "aff_unique_url": "https://www.ucsb.edu;https://www.ucsc.edu;https://research.google", "aff_unique_abbr": "UCSB;UCSC;Google Research", "aff_campus_unique_index": "0;1;0;2;2;1;2;2;0", "aff_campus_unique": "Santa Barbara;Santa Cruz;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "PUwbwZJz9dO", "title": "Measuring and Narrowing the Compositionality Gap in Language Models", "track": "main", "status": "Reject", "tldr": "Language models can solve complex problems when you let them 'talk things through', our new 'self-ask' prompt improves their ability to do that, and lets us easily plug in a search engine for even better performance.", "abstract": "We investigate the ability of language models to perform compositional reasoning tasks where the overall solution depends on correctly composing the answers to sub-problems. We measure how often models can correctly answer all sub-problems but not generate the overall solution, a ratio we call the compositionality gap. We evaluate this ratio by asking multi-hop questions with answers that require composing multiple facts unlikely to have been observed together during pretraining. In the GPT-3 family of models, as model size increases we show that the single-hop question answering performance improves faster than the multi-hop performance does, therefore the compositionality gap does not decrease. This surprising result suggests that while more powerful models memorize and recall more factual knowledge, they show no corresponding improvement in their ability to perform this kind of compositional reasoning.\nWe then demonstrate how elicitive prompting (such as chain of thought) narrows the compositionality gap by reasoning explicitly instead of implicitly. We present a new method, self-ask, that further improves on chain of thought. In our method, the model explicitly asks itself (and then answers) follow-up questions before answering the initial question. We finally show that self-ask's structured prompting lets us easily plug in a search engine to answer the follow-up questions, which additionally improves accuracy.", "keywords": "language modeling;prompting;question answering;retrieval", "primary_area": "", "supplementary_material": "", "author": "Ofir Press;Muru Zhang;Sewon Min;Ludwig Schmidt;Noah A. Smith;Mike Lewis", "authorids": "~Ofir_Press1;~Muru_Zhang1;~Sewon_Min1;~Ludwig_Schmidt1;~Noah_A._Smith2;~Mike_Lewis1", "gender": "M;M;F;M;M;M", "homepage": "https://ofir.io/about;https://nanami18.github.io/;https://www.sewonmin.com;http://people.csail.mit.edu/ludwigs/;;https://homes.cs.washington.edu/~nasmith/", "dblp": "185/0577;325/4648.html;203/9401;141/2720;19/6214;90/5204.html", "google_scholar": "LeHa8psAAAAJ;OJIXk7wAAAAJ;https://scholar.google.ca/citations?user=jU4IZs4AAAAJ;SWMKy70AAAAJ;SnQnQicAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;0000-0002-2310-6380", "linkedin": ";muruzhang/;;ludwig-schmidt-87ba3612/;;", "or_profile": "~Ofir_Press1;~Muru_Zhang1;~Sewon_Min1;~Ludwig_Schmidt1;~Mike_Lewis1;~Noah_Smith1", "aff": "University of Washington;University of Washington;Meta Facebook;Allen Institute for Artificial Intelligence;Facebook AI Research;Allen Institute for Artificial Intelligence", "aff_domain": "washington.edu;cs.washington.edu;fb.com;allenai.org;fb.com;allenai.org", "position": "PhD student;MS student;PhD student;Researcher;Research Scientist;Senior Director of NLP Research", "bibtex": "@misc{\npress2023measuring,\ntitle={Measuring and Narrowing the Compositionality Gap in Language Models},\nauthor={Ofir Press and Muru Zhang and Sewon Min and Ludwig Schmidt and Noah A. Smith and Mike Lewis},\nyear={2023},\nurl={https://openreview.net/forum?id=PUwbwZJz9dO}\n}", "github": "", "project": "", "reviewers": "b2sd;p9GT;SiwZ", "site": "https://openreview.net/forum?id=PUwbwZJz9dO", "pdf_size": 885646, "recommendation": "5;6;6", "confidence": "4;5;4", "correctness": "3;4;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "177;128;113", "wc_strength_and_weaknesses": "205;192;485", "wc_clarity_quality_novelty_and_reproducibility": "49;71;74", "wc_summary_review": "139;64;95", "wc_review": "570;455;767", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "3175;1304;2359", "reply_reviewers": "0;0;0", "reply_authors": "6;2;4", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 139.33333333333334, 27.329267990359508 ], "wc_strength_and_weaknesses_avg": [ 294.0, 135.16163163659525 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.66666666666667, 11.14550233153366 ], "wc_summary_review_avg": [ 99.33333333333333, 30.771559740918054 ], "wc_review_avg": [ 597.3333333333334, 128.83150063379514 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 2279.3333333333335, 765.9070149538752 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 4.0, 1.632993161855452 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.9999999999999997, "gs_citation": 453, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16461880739261590204&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1;2;1;2", "aff_unique_norm": "University of Washington;Meta;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.washington.edu;https://meta.com;https://allenai.org", "aff_unique_abbr": "UW;Meta;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "PWKs1IpMpv", "title": "Epistemological Bias As a Means for the Automated Detection of Injustices in News Media", "track": "main", "status": "Reject", "tldr": "We leverage the combined use of a fine-tuned epistemological detection model, two stereotype detection models, and a lexicon-based approach to show that epistemological biases can assist with the automatic detection of injustices in text.", "abstract": "Injustice occurs when someone experiences unfair treatment or their rights are violated. In the context of news media, injustices represent a form of bias through which discriminatory narratives can arise and spread. The automated identification of injustice in text has received little attention, due in part to the fact that underlying stereotypes are rarely explicitly stated and that instances often occur unconsciously due to the pervasive nature of prejudice in society. Here, we leverage the combined use of a fine-tuned BERT-based bias detection model, two stereotype detection models, and a lexicon-based approach to show that epistemological biases (i.e., words, which through their use, presupposes, entails, asserts, hedges, or boosts text to erode or assert a person's capacity as a knower) can assist with the automatic detection of injustice in text.", "keywords": "testimonial injustice;character injustice;framing bias;epistemological bias;news media", "primary_area": "", "supplementary_material": "/attachment/c0c2a441539cfeb3aceb32ae506bd9fa63919d0e.zip", "author": "Kenya S. Andrews;Lamogha Chiazor", "authorids": "~Kenya_S._Andrews1;lamogha.chiazor@ibm.com", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": ";", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@misc{\nandrews2023epistemological,\ntitle={Epistemological Bias As a Means for the Automated Detection of Injustices in News Media},\nauthor={Kenya S. Andrews and Lamogha Chiazor},\nyear={2023},\nurl={https://openreview.net/forum?id=PWKs1IpMpv}\n}", "github": "", "project": "", "reviewers": "P7BJ;VEMe;gnJX;zugw", "site": "https://openreview.net/forum?id=PWKs1IpMpv", "pdf_size": 1423716, "recommendation": "3;3;5;8", "confidence": "3;4;3;3", "correctness": "3;2;4;3", "technical_novelty": "2;1;2;4", "empirical_novelty": "2;1;2;3", "wc_summary_paper": "130;79;70;84", "wc_strength_and_weaknesses": "136;619;131;118", "wc_clarity_quality_novelty_and_reproducibility": "107;34;40;13", "wc_summary_review": "41;51;5;30", "wc_review": "414;783;246;245", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "579;893;605;356", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;1", "recommendation_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 1.0897247358851685 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 90.75, 23.209642392764263 ], "wc_strength_and_weaknesses_avg": [ 251.0, 212.56646019539394 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.5, 35.23137805990563 ], "wc_summary_review_avg": [ 31.75, 17.137313091613866 ], "wc_review_avg": [ 422.0, 219.48234553148004 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 608.25, 190.77391724237356 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.49374193110101877, "corr_recommendation_correctness": 0.34554737023254406, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3dqosroQV3wJ:scholar.google.com/&scioq=Epistemological+Bias+As+a+Means+for+the+Automated+Detection+of+Injustices+in+News+Media&hl=en&as_sdt=0,5", "gs_version_total": 0 }, { "id": "PWWW73yQVp", "title": "VARIATIONAL ADAPTIVE GRAPH TRANSFORMER FOR MULTIVARIATE TIME SERIES MODELING", "track": "main", "status": "Reject", "tldr": "", "abstract": "Multivariate time series (MTS) are widely collected by large-scale complex systems, such as internet services, IT infrastructures, and wearable devices. The modeling of MTS has long been an important but challenging task. To capture complex long-range dynamics, Transformers have been utilized in MTS modeling and achieved attractive performance. However, Transformers in general do not well capture the diverse relationships between different channels within MTS and have difficulty in modeling MTS with complex distributions due to the lack of stochasticity. In this paper, we first incorporate relational modeling into Transformer to develop an adaptive Graph Transformer (G-Trans) module for MTS. Then, we further consider stochastity by introducing a powerful embedding guided probabilistic generative module for G-Trans to construct Variational adaptive Graph Transformer (VG-Trans), which is a well-defined variational generative dynamic model. VG-Trans is utilized to learn expressive representations of MTS, being an plug-and-play framework that can be applied to forecasting and anomaly detection tasks of MTS. For efficient inference, we develop an autoencoding variational inference scheme with a combined prediction and reconstruction loss. Extensive experiments on diverse datasets show the efficient of VG-Trans on MTS modeling and improving the existing methods on VG-Trans outperforms state-of-the-art methods on a variety of MTS modeling tasks.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/5dd3db68c178a7f3aed18fde354ebc156a573eb8.zip", "author": "Long Tian;Wenchao Chen;Bo Chen;Muyao Wang;Liang Dai;BaoLin Sun;Mingyuan Zhou", "authorids": "~Long_Tian1;~Wenchao_Chen1;~Bo_Chen1;~Muyao_Wang1;dailiang@iie.ac.cn;xuanfeng.sbl@antgroup.com;~Mingyuan_Zhou1", "gender": "M;M;M;;;;M", "homepage": "https://faculty.xidian.edu.cn/TL1/zh_CN/index.htm;https://web.xidian.edu.cn/chenwenchao/;http://web.xidian.edu.cn/bchen/en/index.html;;;;http://mingyuanzhou.github.io", "dblp": ";;89/5615-1;;;;", "google_scholar": ";;;;;;LXwCIisAAAAJ", "orcid": ";;0000-0001-5151-9388;;;;", "linkedin": ";;;;;;", "or_profile": "~Long_Tian1;~Wenchao_Chen1;~Bo_Chen1;~Muyao_Wang1;dailiang@iie.ac.cn;xuanfeng.sbl@antgroup.com;~Mingyuan_Zhou1", "aff": "Xi'an University of Software Engineering Institute;Xidian University;Xidian University;;;;Google", "aff_domain": "xidian.edu.cn;xidian.edu;xidian.edu.cn;;;;google.com", "position": "Assistant Professor;Associate Professor;Full Professor;;;;Researcher", "bibtex": "@misc{\ntian2023variational,\ntitle={{VARIATIONAL} {ADAPTIVE} {GRAPH} {TRANSFORMER} {FOR} {MULTIVARIATE} {TIME} {SERIES} {MODELING}},\nauthor={Long Tian and Wenchao Chen and Bo Chen and Muyao Wang and Liang Dai and BaoLin Sun and Mingyuan Zhou},\nyear={2023},\nurl={https://openreview.net/forum?id=PWWW73yQVp}\n}", "github": "", "project": "", "reviewers": "RixC;Z2ri;XSSh", "site": "https://openreview.net/forum?id=PWWW73yQVp", "pdf_size": 1863062, "recommendation": "3;5;6", "confidence": "3;4;2", "correctness": "2;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "48;84;55", "wc_strength_and_weaknesses": "240;182;86", "wc_clarity_quality_novelty_and_reproducibility": "45;17;32", "wc_summary_review": "37;20;12", "wc_review": "370;303;185", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 62.333333333333336, 15.584892970081281 ], "wc_strength_and_weaknesses_avg": [ 169.33333333333334, 63.50503042191925 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.333333333333332, 11.440668201153676 ], "wc_summary_review_avg": [ 23.0, 10.424330514074594 ], "wc_review_avg": [ 286.0, 76.47657593450865 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.3273268353539886, "corr_recommendation_correctness": 0.9449111825230683, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:eCUdlm0oekcJ:scholar.google.com/&scioq=VARIATIONAL+ADAPTIVE+GRAPH+TRANSFORMER+FOR+MULTIVARIATE+TIME+SERIES+MODELING&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Xi'an University of Software Engineering;Xidian University;Google", "aff_unique_dep": "Software Engineering Institute;;Google", "aff_unique_url": "http://www.xauat.edu.cn;http://www.xidian.edu.cn/;https://www.google.com", "aff_unique_abbr": ";Xidian;Google", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Xi'an;;Mountain View", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "PXRN-uxHoIE", "title": "Learning Invariant Features for Online Continual Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "It has been shown recently that learning only discriminative features that are sufficient to separate the classes in a task using a traditional learning method has a major shortcoming for continual learning (CL). This is because many features that are not learned may be necessary for distinguishing classes of some future tasks. When such a future task arrives, these features have to be learned by updating the network, which causes catastrophic forgetting (CF). A recent work on online CL showed that if the learning method can learn as many features as possible from each class, called holistic representations, CF can be significantly reduced to achieve a large performance gain. This paper argues that learning only holistic representations is still insufficient. The learned representations should also be invariant and those features that are present in the data but are irrelevant to the class (e.g., the background information) should be ignored for better generalization across tasks. This new condition further boosts the performance significantly. This paper proposes several strategies and a loss to learn holistic and invariant representations and evaluates their effectiveness in online CL.", "keywords": "continual learning;online continual learning", "primary_area": "", "supplementary_material": "/attachment/0bb7ee8a7451ed92447d137bd4d49615bf582365.zip", "author": "Yiduo Guo;Bing Liu;Dongyan Zhao", "authorids": "~Yiduo_Guo2;~Bing_Liu1;~Dongyan_Zhao2", "gender": "M;M;M", "homepage": "https://www.cs.uic.edu/~liub/;https://www.wict.pku.edu.cn/zhaodongyan/en/;https://github.com/gydpku", "dblp": "l/BingLiu1.html;63/1870;196/5954.html", "google_scholar": "Kt1bjZoAAAAJ;lhR8-68AAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;", "linkedin": ";;", "or_profile": "~Bing_Liu1;~Dongyan_Zhao2;~Yiduo_GUO1", "aff": "University of Illinois at Chicago;Peking University;Peking University", "aff_domain": "uic.edu;pku.edu.cn;pku.edu.cn", "position": "Full Professor;Full Professor;PhD student", "bibtex": "@misc{\nguo2023learning,\ntitle={Learning Invariant Features for Online Continual Learning},\nauthor={Yiduo Guo and Bing Liu and Dongyan Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=PXRN-uxHoIE}\n}", "github": "", "project": "", "reviewers": "K6JM;SrHu;jHRu;YuYN", "site": "https://openreview.net/forum?id=PXRN-uxHoIE", "pdf_size": 2573143, "recommendation": "5;6;8;8", "confidence": "4;4;5;3", "correctness": "3;3;3;3", "technical_novelty": "2;2;4;3", "empirical_novelty": "3;3;0;3", "wc_summary_paper": "77;263;113;28", "wc_strength_and_weaknesses": "127;90;36;264", "wc_clarity_quality_novelty_and_reproducibility": "32;160;191;89", "wc_summary_review": "27;35;28;59", "wc_review": "263;548;368;440", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;211;129", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;1;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 120.25, 87.76495599041795 ], "wc_strength_and_weaknesses_avg": [ 129.25, 84.25964336501787 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 118.0, 61.90718859712497 ], "wc_summary_review_avg": [ 37.25, 12.93010054098575 ], "wc_review_avg": [ 404.75, 103.93116712516992 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 85.0, 89.80812880803163 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.5, 0.5 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zsPjFu0Fi-EJ:scholar.google.com/&scioq=Learning+Invariant+Features+for+Online+Continual+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Illinois at Chicago;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uic.edu;http://www.pku.edu.cn", "aff_unique_abbr": "UIC;Peking U", "aff_campus_unique_index": "0", "aff_campus_unique": "Chicago;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;China" }, { "title": "Offline Congestion Games: How Feedback Type Affects Data Coverage Requirement", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11450", "id": "PXVGer7hmJ", "poster": "", "openreview": "https://openreview.net/forum?id=PXVGer7hmJ", "slides": "https://iclr.cc/virtual/2023/poster/11450", "video": "https://iclr.cc/virtual/2023/poster/11450", "author_site": "Haozhe Jiang, Qiwen Cui, Zhihan Xiong, Maryam Fazel, Simon Du", "tldr": "", "abstract": "This paper investigates when one can efficiently recover an approximate Nash Equilibrium (NE) in offline congestion games. The existing dataset coverage assumption in offline general-sum games inevitably incurs a dependency on the number of actions, which can be exponentially large in congestion games. We consider three different types of feedback with decreasing revealed information. Starting from the facility-level (a.k.a., semi-bandit) feedback, we propose a novel one-unit deviation coverage condition and show a pessimism-type algorithm that can recover an approximate NE. For the agent-level (a.k.a., bandit) feedback setting, interestingly, we show the one-unit deviation coverage condition is not sufficient. On the other hand, we convert the game to multi-agent linear bandits and show that with a generalized data coverage assumption in offline linear bandits, we can efficiently recover the approximate NE. Lastly, we consider a novel type of feedback, the game-level feedback where only the total reward from all agents is revealed. Again, we show the coverage assumption for the agent-level feedback setting is insufficient in the game-level feedback setting, and with a stronger version of the data coverage assumption for linear bandits, we can recover an approximate NE. Together, our results constitute the first study of offline congestion games and imply formal separations between different types of feedback.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Haozhe Jiang;Qiwen Cui;Zhihan Xiong;Maryam Fazel;Simon Shaolei Du", "authorids": "~Haozhe_Jiang1;~Qiwen_Cui1;~Zhihan_Xiong1;~Maryam_Fazel1;~Simon_Shaolei_Du1", "gender": "M;M;M;F;M", "homepage": "https://astro-eric.github.io;;https://homes.cs.washington.edu/~zhihanx/;;http://simonshaoleidu.com", "dblp": "303/4241;276/6268;255/6096;10/2309;176/5602", "google_scholar": "HQCpSJMAAAAJ;AnSVkUYAAAAJ;OsSiEMEAAAAJ;vlN_kRoAAAAJ;OttawxUAAAAJ", "orcid": ";;;;", "linkedin": "haozhe-jiang-261b83212/;;zhihan-xiong/;;", "or_profile": "~Haozhe_Jiang1;~Qiwen_Cui1;~Zhihan_Xiong1;~Maryam_Fazel1;~Simon_Shaolei_Du1", "aff": "Tsinghua University;Department of Computer Science, University of Washington;University of Washington;University of Washington, Seattle;Meta Facebook", "aff_domain": "mails.tsinghua.edu.cn;cs.washington.edu;washington.edu;uw.edu;fb.com", "position": "Undergrad student;PhD student;PhD student;Full Professor;Visiting Professor", "bibtex": "@inproceedings{\njiang2023offline,\ntitle={Offline Congestion Games: How Feedback Type Affects Data Coverage Requirement},\nauthor={Haozhe Jiang and Qiwen Cui and Zhihan Xiong and Maryam Fazel and Simon Shaolei Du},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PXVGer7hmJ}\n}", "github": "", "project": "", "reviewers": "3Ebi;oumy;g7gc;Kv1e", "pdf_size": 659494, "recommendation": "6;8;8;8", "confidence": "4;3;4;2", "correctness": "3;4;4;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "0;0;3;4", "wc_summary_paper": "96;53;82;134", "wc_strength_and_weaknesses": "237;83;160;91", "wc_clarity_quality_novelty_and_reproducibility": "67;22;27;20", "wc_summary_review": "45;52;70;19", "wc_review": "445;210;339;264", "wc_reply_reviewers": "176;0;34;0", "wc_reply_authors": "850;50;199;206", "reply_reviewers": "2;0;1;0", "reply_authors": "3;1;2;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 1.75, 1.7853571071357126 ], "wc_summary_paper_avg": [ 91.25, 29.14939965076468 ], "wc_strength_and_weaknesses_avg": [ 142.75, 62.10625974891742 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.0, 19.222382786741086 ], "wc_summary_review_avg": [ 46.5, 18.309833423600555 ], "wc_review_avg": [ 314.5, 88.17737805128932 ], "wc_reply_reviewers_avg": [ 52.5, 72.64124172947486 ], "wc_reply_authors_avg": [ 326.25, 308.7396759407511 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5222329678670935, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8916432226141575030&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=PXVGer7hmJ", "email": "mails.tsinghua.edu.cn;cs.washington.edu;washington.edu;uw.edu;fb.com", "author_num": 5, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "Tsinghua University;University of Washington;Meta", "aff_unique_dep": ";Department of Computer Science;Meta Platforms, Inc.", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.washington.edu;https://meta.com", "aff_unique_abbr": "THU;UW;Meta", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "China;United States" }, { "id": "PXibCVxXdT", "title": "Wasserstein Fair Autoencoders", "track": "main", "status": "Reject", "tldr": "We present a framework based on Wasserstein autoencoders that can reinforce some theoretical weak links in the variational approaches on fair or disentangled represenation.", "abstract": "Autoencoders, or nonlinear factor models parameterized by neural networks, have become an indispensable tool for generative modeling and representation learning in high dimensions. Imposing structural constraints such as conditional independence on the latent variables (representation, or factors) in order to capture invariance or fairness with autoencoders has been attempted through adding ad hoc penalties to the loss function mostly in the variational autoencoder (VAE) context, often based on heuristic arguments. In this paper, we demonstrate that Wasserstein autoencoders (WAEs) are highly flexible in embracing structural constraints. Well-known extensions of VAEs for this purpose are gracefully handled within the framework of the seminal result by Tolstikhin et al. (2018). In particular, given a conditional independence structure of the generative model (decoder), corresponding encoder structure and penalties are induced from the functional constraints that define the WAE. This property of WAEs opens up a principled way of penalizing autoencoders to impose structural constraints. Utilizing this generative model structure, we present results on fair representation and conditional generation tasks, and compare them with other preceding methods.", "keywords": "conditional generation;fair representation;disentanglement;wasserstein autoencoder", "primary_area": "", "supplementary_material": "/attachment/d45c9f1f99c009c3a973e0f99625d00730460813.zip", "author": "Sungdong Lee;Hyunjong Lee;Joong-Ho Won", "authorids": "~Sungdong_Lee1;~Hyunjong_Lee1;~Joong-Ho_Won1", "gender": "M;M;", "homepage": "https://github.com/sdlee087;https://leehyunjong.github.io/;", "dblp": "296/9435;;", "google_scholar": ";https://scholar.google.com/citations?hl=ko;", "orcid": "0000-0003-0655-5050;;", "linkedin": ";;", "or_profile": "~Sungdong_Lee1;~Hyunjong_Lee1;~Joong-Ho_Won1", "aff": "Seoul National University;Seoul National University;", "aff_domain": "snu.ac.kr;snu.ac.kr;", "position": "PhD student;PhD student;", "bibtex": "@misc{\nlee2023wasserstein,\ntitle={Wasserstein Fair Autoencoders},\nauthor={Sungdong Lee and Hyunjong Lee and Joong-Ho Won},\nyear={2023},\nurl={https://openreview.net/forum?id=PXibCVxXdT}\n}", "github": "", "project": "", "reviewers": "4HcR;Zcji;ernN", "site": "https://openreview.net/forum?id=PXibCVxXdT", "pdf_size": 6727688, "recommendation": "1;3;5", "confidence": "4;4;2", "correctness": "1;2;3", "technical_novelty": "1;2;2", "empirical_novelty": "1;2;2", "wc_summary_paper": "68;247;53", "wc_strength_and_weaknesses": "297;218;263", "wc_clarity_quality_novelty_and_reproducibility": "124;73;31", "wc_summary_review": "42;25;21", "wc_review": "531;563;368", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 1.632993161855452 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "correctness_avg": [ 2.0, 0.816496580927726 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 122.66666666666667, 88.12995454945435 ], "wc_strength_and_weaknesses_avg": [ 259.3333333333333, 32.355662392986005 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 76.0, 38.02630668366309 ], "wc_summary_review_avg": [ 29.333333333333332, 9.104333522498443 ], "wc_review_avg": [ 487.3333333333333, 85.3867020612044 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.8660254037844387, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WnAWbAywDsAJ:scholar.google.com/&scioq=Wasserstein+Fair+Autoencoders&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "PY1wvNgwhPC", "title": "HSVC: Transformer-based Hierarchical Distillation for Software Vulnerability Classification", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Software vulnerabilities have diverse characteristics, attacks, and impacts on software systems, stakeholders, and organizations. Such diverse characteristics of vulnerabilities (i.e., CWE-IDs) often lead to more difficulty in handling the label distributions for a Deep Learning model (e.g., addressing a highly imbalanced multi-class classification problem). However, existing vulnerability detection approaches often treat vulnerabilities equally---which does not reflect reality. In this paper, we present a new approach to solving the highly imbalanced software vulnerability classification (SVC) problem by leveraging the hierarchical structure of CWE-IDs and knowledge distillation. Specifically, we split a complex label distribution into sub-distributions based on CWE abstract types (i.e., categorizations that group similar CWE-IDs), so similar CWE-IDs can be grouped and each group will have a more balanced label distribution. We learn TextCNN teachers on each of the simplified distributions respectively, however, they only perform well in their group. Thus, we build a transformer student model to generalize the performance of TextCNN teachers through our hierarchical knowledge distillation framework. We compare our approach with source code transformer models as well as long-tailed learning approaches proposed in the vision domain. Through an extensive evaluation using the real-world 8,636 vulnerabilities, our approach outperforms all of the baselines by 1.97%-13.89%. Our framework can be applied to any transformer-based SVC such as CodeBERT, GraphCodeBERT, and CodeGPT, with slight modifications. Training code and pre-trained models are available at https://github.com/HSVC-TEAM/HSVC.\n", "keywords": "Transformers-based models;Knowledge distillation;Long-tailed learning;Software vulnerability classification", "primary_area": "", "supplementary_material": "", "author": "Michael Fu;Van Nguyen;Chakkrit Tantithamthavorn;Trung Le;Dinh Phung", "authorids": "~Michael_Fu3;~Van_Nguyen2;~Chakkrit_Tantithamthavorn1;~Trung_Le2;~Dinh_Phung2", "gender": "M;M;M;M;M", "homepage": "https://michaelfu1998-create.github.io/;;http://chakkrit.com;;https://research.monash.edu/en/persons/dinh-phung", "dblp": ";;;;71/5859", "google_scholar": "1ndiadMAAAAJ;KPpmKZ0AAAAJ;idShgcoAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.au/citations?user=OtA9SwIAAAAJ", "orcid": "0000-0001-7211-3491;0000-0002-5838-3409;;;0000-0002-9977-8247", "linkedin": ";;;;https://linkedin.com/in/dinh-phung-6b537a6", "or_profile": "~Michael_Fu3;~Van_Nguyen2;~Chakkrit_Tantithamthavorn1;~Trung_Le2;~Dinh_Phung1", "aff": "Monash University;Monash University;Monash University;Monash University;Monash University", "aff_domain": "monash.edu;monash.edu;monash.edu;monash.edu;monash.edu", "position": "PhD student;Postdoc;Lecturer;Assistant Professor;Full Professor", "bibtex": "@misc{\nfu2023hsvc,\ntitle={{HSVC}: Transformer-based Hierarchical Distillation for Software Vulnerability Classification},\nauthor={Michael Fu and Van Nguyen and Chakkrit Tantithamthavorn and Trung Le and Dinh Phung},\nyear={2023},\nurl={https://openreview.net/forum?id=PY1wvNgwhPC}\n}", "github": "", "project": "", "reviewers": "33wi;bacf;6DkF;obFC", "site": "https://openreview.net/forum?id=PY1wvNgwhPC", "pdf_size": 470090, "recommendation": "3;3;5;6", "confidence": "4;4;4;4", "correctness": "3;4;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "82;161;65;167", "wc_strength_and_weaknesses": "166;53;217;230", "wc_clarity_quality_novelty_and_reproducibility": "34;501;33;55", "wc_summary_review": "76;45;16;48", "wc_review": "358;760;331;500", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 118.75, 45.6966902521397 ], "wc_strength_and_weaknesses_avg": [ 166.5, 69.75851202541521 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 155.75, 199.52365148021926 ], "wc_summary_review_avg": [ 46.25, 21.241174637952582 ], "wc_review_avg": [ 487.25, 170.05495435299733 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pJI3WO-u3psJ:scholar.google.com/&scioq=HSVC:+Transformer-based+Hierarchical+Distillation+for+Software+Vulnerability+Classification&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Monash University", "aff_unique_dep": "", "aff_unique_url": "https://www.monash.edu", "aff_unique_abbr": "Monash", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Australia" }, { "id": "PYSktOGKBkY", "title": "Provable Sharpness-Aware Minimization with Adaptive Learning Rate", "track": "main", "status": "Reject", "tldr": "We present the first convergence guarantee of the adaptive SAM method with a linear speedup property under the non-convex setting.", "abstract": "Sharpness aware minimization (SAM) optimizer has been extensively explored as it can converge fast and train deep neural networks efficiently via introducing extra perturbation steps to flatten the landscape of deep learning models. A combination of SAM with adaptive learning rate (AdaSAM) has also been explored to train large-scale deep neural networks without theoretical guarantee due to the dual difficulties in analyzing the perturbation step and the coupled adaptive learning rate. In this paper, we try to analyze the convergence rate of AdaSAM in the stochastic non-convex setting. We theoretically show that AdaSAM admit a $\\mathcal{O}(1/\\sqrt{bT})$ convergence rate and show linear speedup property with respect to mini-batch size b. To best of our knowledge, we are the first to provide the non-trivial convergence rate of SAM with an adaptive learning rate. To decouple the two stochastic gradient steps with the adaptive learning rate, we first introduce the delayed second-order momentum during the convergence to decompose them to make them independent while taking an expectation. Then we bound them by showing the adaptive learning rate has a limited range, which makes our analysis feasible. At last, we conduct experiments on several NLP tasks and they show that AdaSAM could achieve superior performance compared with SGD, AMSGrad, and SAM optimizer.", "keywords": "Adaptive learning rate;Sharpness aware minimization;mini-batch linear speedup", "primary_area": "", "supplementary_material": "/attachment/4cd7e507f3cdfaa85e1ef02ecf5b9fa79b61809c.zip", "author": "Hao Sun;Li Shen;Qihuang Zhong;Liang Ding;Shixiang Chen;Jingwei Sun;Guangzhong Sun;Dacheng Tao", "authorids": "~Hao_Sun5;~Li_Shen1;~Qihuang_Zhong1;~Liang_Ding3;~Shixiang_Chen1;~Jingwei_Sun3;~Guangzhong_Sun1;~Dacheng_Tao1", "gender": "M;M;M;M;M;;M;", "homepage": "http://home.ustc.edu.cn/~ustcsh;https://sites.google.com/site/mathshenli/home;https://www.qihuangzhong.top/;http://liamding.cc/;;https://faculty.ustc.edu.cn/sunjingwei;;", "dblp": ";91/3680-8;272/6439.html;88/3340-6.html;192/1537;https://dblp.uni-trier.de/pid/66/7761-1;44/1372;", "google_scholar": ";yVhgENIAAAAJ;https://scholar.google.com/citations?hl=zh-CN;lFCLvOAAAAAJ;https://scholar.google.com.hk/citations?user=WChDBRkAAAAJ;;;", "orcid": ";;;;0000-0002-3261-0714;;0000-0002-0794-7681;", "linkedin": ";;;;;;;", "or_profile": "~Hao_Sun5;~Li_Shen1;~Qihuang_Zhong1;~Liang_Ding3;~Shixiang_Chen1;~Jingwei_Sun3;~Guangzhong_Sun1;~Dacheng_Tao1", "aff": "University of Science and Technology of China;JD Explore Academy;Wuhan University;JD Explore Academy, JD.com Inc.;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;", "aff_domain": "ustc.edu.cn;jd.com;whu.edu.cn;jd.com;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;", "position": "PhD student;Researcher;PhD student;Research Scientist;Assistant Professor;Researcher;Full Professor;", "bibtex": "@misc{\nsun2023provable,\ntitle={Provable Sharpness-Aware Minimization with Adaptive Learning Rate },\nauthor={Hao Sun and Li Shen and Qihuang Zhong and Liang Ding and Shixiang Chen and Jingwei Sun and Guangzhong Sun and Dacheng Tao},\nyear={2023},\nurl={https://openreview.net/forum?id=PYSktOGKBkY}\n}", "github": "", "project": "", "reviewers": "w96Y;rzri;x2Pp;5Rfi", "site": "https://openreview.net/forum?id=PYSktOGKBkY", "pdf_size": 581181, "recommendation": "3;3;5;5", "confidence": "4;5;3;3", "correctness": "4;3;3;2", "technical_novelty": "1;1;2;4", "empirical_novelty": "1;1;2;3", "wc_summary_paper": "99;48;48;48", "wc_strength_and_weaknesses": "364;286;191;85", "wc_clarity_quality_novelty_and_reproducibility": "114;69;103;425", "wc_summary_review": "43;32;83;62", "wc_review": "620;435;425;620", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 1.224744871391589 ], "empirical_novelty_avg": [ 1.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 60.75, 22.083647796503186 ], "wc_strength_and_weaknesses_avg": [ 231.5, 104.43778052026958 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 177.75, 143.71042933621763 ], "wc_summary_review_avg": [ 55.0, 19.403607911932255 ], "wc_review_avg": [ 525.0, 95.06576670915771 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.9045340337332909, "corr_recommendation_correctness": -0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:k-JqJaPFhXcJ:scholar.google.com/&scioq=Provable+Sharpness-Aware+Minimization+with+Adaptive+Learning+Rate&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;0;0;0", "aff_unique_norm": "University of Science and Technology of China;JD;Wuhan University;JD.com Inc.", "aff_unique_dep": ";JD Explore Academy;;JD Explore Academy", "aff_unique_url": "http://www.ustc.edu.cn;;http://www.whu.edu.cn/;https://www.jd.com", "aff_unique_abbr": "USTC;;WHU;JD.com", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Scaling up and Stabilizing Differentiable Planning with Implicit Differentiation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10976", "id": "PYbe4MoHf32", "poster": "", "openreview": "https://openreview.net/forum?id=PYbe4MoHf32", "slides": "https://iclr.cc/virtual/2023/poster/10976", "video": "https://iclr.cc/virtual/2023/poster/10976", "author_site": "Linfeng Zhao, Huazhe Xu, Lawson Wong", "tldr": "", "abstract": "Differentiable planning promises end-to-end differentiability and adaptivity. However, an issue prevents it from scaling up to larger-scale problems: they need to differentiate through forward iteration layers to compute gradients, which couples forward computation and backpropagation and needs to balance forward planner performance and computational cost of the backward pass. To alleviate this issue, we propose to differentiate through the Bellman fixed-point equation to decouple forward and backward passes for Value Iteration Network and its variants, which enables constant backward cost (in planning horizon) and flexible forward budget and helps scale up to large tasks. We study the convergence stability, scalability, and efficiency of the proposed implicit version of VIN and its variants and demonstrate their superiorities on a range of planning tasks: 2D navigation, visual navigation, and 2-DOF manipulation in configuration space and workspace.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/f32e750cce3e87ba08105885ae2c2bd0f307ad13.zip", "author": "Linfeng Zhao;Huazhe Xu;Lawson L.S. Wong", "authorids": "~Linfeng_Zhao1;~Huazhe_Xu1;~Lawson_L.S._Wong2", "gender": ";M;M", "homepage": "http://lfzhao.com;http://hxu.rocks;https://www.ccs.neu.edu/home/lsw/", "dblp": "221/4652;164/9006;35/2573", "google_scholar": ";t9HPFawAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Linfeng_Zhao1;~Huazhe_Xu1;~Lawson_L._S._Wong1", "aff": "Boston Dynamics AI Institute;Tsinghua University;Northeastern University", "aff_domain": "theaiinstitute.com;tsinghua.edu.cn;northeastern.edu", "position": "Research Intern;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhao2023scaling,\ntitle={Scaling up and Stabilizing Differentiable Planning with Implicit Differentiation},\nauthor={Linfeng Zhao and Huazhe Xu and Lawson L.S. Wong},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PYbe4MoHf32}\n}", "github": "", "project": "", "reviewers": "3dQr;D5nz;Wf8g", "pdf_size": 2498955, "recommendation": "6;6;8", "confidence": "2;5;2", "correctness": "3;3;4", "technical_novelty": "2;3;4", "empirical_novelty": "2;2;4", "wc_summary_paper": "61;38;78", "wc_strength_and_weaknesses": "173;856;83", "wc_clarity_quality_novelty_and_reproducibility": "36;37;7", "wc_summary_review": "41;156;29", "wc_review": "311;1087;197", "wc_reply_reviewers": "102;520;0", "wc_reply_authors": "1139;2023;245", "reply_reviewers": "1;2;0", "reply_authors": "3;4;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 1.4142135623730951 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_paper_avg": [ 59.0, 16.391054470858997 ], "wc_strength_and_weaknesses_avg": [ 370.6666666666667, 345.14377036565827 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.666666666666668, 13.912424503139471 ], "wc_summary_review_avg": [ 75.33333333333333, 57.249939349798055 ], "wc_review_avg": [ 531.6666666666666, 395.4283191117646 ], "wc_reply_reviewers_avg": [ 207.33333333333334, 224.97604810784242 ], "wc_reply_authors_avg": [ 1135.6666666666667, 725.8692872840276 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.49999999999999994, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18050424615758247556&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=PYbe4MoHf32", "email": "theaiinstitute.com;tsinghua.edu.cn;northeastern.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Boston Dynamics AI Institute;Tsinghua University;Northeastern University", "aff_unique_dep": "AI Institute;;", "aff_unique_url": "https://www.bostondynamics.com/;https://www.tsinghua.edu.cn;https://www.northeastern.edu", "aff_unique_abbr": "BD AI;THU;NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "id": "PZZUcxazxSw", "title": "Policy Contrastive Imitation Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Adversarial imitation learning (AIL) is a popular method that has recently achieved much success. However, the performance of AIL is still unsatisfactory on the more challenging tasks. We find that one of the major reasons is due to the low quality of AIL discriminator representation. Since the AIL discriminator is trained via binary classification that does not necessarily discriminate the policy from the expert in a meaningful way, the resulting reward might not be meaningful either. We propose a new method called Policy Contrastive Imitation Learning (PCIL) to resolve this issue. PCIL learns a contrastive representation space by anchoring on different policies and uses a smooth cosine-similarity-based reward to encourage imitation learning. Our proposed representation learning objective can be viewed as a stronger version of the AIL objective and provide a more meaningful comparison between the agent and the policy. From a theoretical perspective, we show the validity of our method using the apprenticeship learning framework. Furthermore, our empirical evaluation on the DeepMind Control suite demonstrates that PCIL can achieve state-of-the-art performance. Finally, qualitative results suggest that PCIL builds a smoother and more meaningful representation space for imitation learning.", "keywords": "adversarial imitation learning;contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Jialei Huang;Zhao-Heng Yin;Yingdong Hu;Yang Gao", "authorids": "~Jialei_Huang1;~Zhao-Heng_Yin1;~Yingdong_Hu1;~Yang_Gao1", "gender": "M;M;M;M", "homepage": "https://github.com/JialeiHuang/JialeiHuang.github.io;;http://yang-gao.weebly.com;https://zhaohengyin.github.io", "dblp": ";219/8916;89/4402-29;264/9661", "google_scholar": ";HhotyAoAAAAJ;https://scholar.google.com/citations?hl=en;_egJxfMAAAAJ", "orcid": ";;;", "linkedin": ";;yang-gao-45245348/;", "or_profile": "~Jialei_Huang1;~Yingdong_Hu1;~Yang_Gao1;~Zhao_Heng_Yin1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Hong Kong University of Science and Technology", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;ust.hk", "position": "PhD student;PhD student;Assistant Professor;MPhil", "bibtex": "@misc{\nhuang2023policy,\ntitle={Policy Contrastive Imitation Learning},\nauthor={Jialei Huang and Zhao-Heng Yin and Yingdong Hu and Yang Gao},\nyear={2023},\nurl={https://openreview.net/forum?id=PZZUcxazxSw}\n}", "github": "", "project": "", "reviewers": "igbW;dovc;1ZsP", "site": "https://openreview.net/forum?id=PZZUcxazxSw", "pdf_size": 1153372, "recommendation": "5;5;8", "confidence": "3;3;4", "correctness": "2;2;4", "technical_novelty": "2;3;4", "empirical_novelty": "2;4;3", "wc_summary_paper": "56;77;90", "wc_strength_and_weaknesses": "369;1240;124", "wc_clarity_quality_novelty_and_reproducibility": "2;118;91", "wc_summary_review": "26;56;40", "wc_review": "453;1491;345", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 74.33333333333333, 14.007934259633796 ], "wc_strength_and_weaknesses_avg": [ 577.6666666666666, 478.9017528563545 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.33333333333333, 49.56028876249837 ], "wc_summary_review_avg": [ 40.666666666666664, 12.256517540566822 ], "wc_review_avg": [ 763.0, 516.6584945590268 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12203722466879478016&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Tsinghua University;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.ust.hk", "aff_unique_abbr": "THU;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "P_48ZG7ySK", "title": "Learning with Non-Uniform Label Noise: A Cluster-Dependent Semi-Supervised Approach", "track": "main", "status": "Withdraw", "tldr": "For the robust learning with non-uniform label noise, we propose a cluster-dependent sample selection algorithm followed by a semi-supervised training mechanism.", "abstract": "Learning with noisy labels is a challenging task in machine learning. Most existing methods explicitly or implicitly assume uniform label noise across all samples. In reality, label noise can be highly non-uniform in the feature space, with higher error rate for more difficult samples. Some recent works consider instance-dependent label noise but they require additional information such as some cleanly labeled data and confidence scores, which are usually unavailable or costly to obtain. In this paper, we consider learning with non-uniform label noise that requires no such additional information. we propose a cluster-dependent sample selection algorithm followed by a semi-supervised training mechanism based on the cluster-dependent label noise. The proposed self-adaptive multi-scale sample selection method increases the consistency of sample space by forcing the selection of clean samples from the entire feature space. Despite its simplicity, the proposed method can distinguish clean data from the corrupt ones more precisely and achieve state-of-the-art performance on image classification benchmarks, especially when the number of training samples is small and the noise rate is large.", "keywords": "Non-uniform label noise;Cluster-dependent sample selection mechanism;Semi-supervised training.", "primary_area": "", "supplementary_material": "", "author": "Mengtian Zhang;Bo Jiang;Yuye Ling;Xinbing Wang;Chenghu Zhou", "authorids": "~Mengtian_Zhang1;~Bo_Jiang2;yuye.ling@sjtu.edu.cn;~Xinbing_Wang1;~Chenghu_Zhou3", "gender": "M;M;;M;M", "homepage": "https://github.com/MattZ-99;https://jhc.sjtu.edu.cn/~bjiang/;;http://www.cs.sjtu.edu.cn/~wang-xb/;http://www.igsnrr.cas.cn/gkjj/ysfc/ysfc_zhouchenghu/", "dblp": ";34/2005-3.html;;96/1149.html;85/1324.html", "google_scholar": ";WxAIZtMAAAAJ;;https://scholar.google.com.tw/citations?user=CT5yZbwAAAAJ;", "orcid": ";;;0000-0002-0357-8356;", "linkedin": ";;;;", "or_profile": "~Mengtian_Zhang1;~Bo_Jiang2;yuye.ling@sjtu.edu.cn;~Xinbing_Wang1;~Chenghu_Zhou3", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;;Shanghai Jiaotong University;IGSNRR, Chinese Academy of Sciences, Beijing, China", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;;cs.sjtu.edu.cn;lreis.ac.cn", "position": "MS student;Associate Professor;;Full Professor;Full Professor", "bibtex": "@misc{\nzhang2023learning,\ntitle={Learning with Non-Uniform Label Noise: A Cluster-Dependent Semi-Supervised Approach},\nauthor={Mengtian Zhang and Bo Jiang and Yuye Ling and Xinbing Wang and Chenghu Zhou},\nyear={2023},\nurl={https://openreview.net/forum?id=P_48ZG7ySK}\n}", "github": "", "project": "", "reviewers": "wj2B;APd8;AJzT;FCAb", "site": "https://openreview.net/forum?id=P_48ZG7ySK", "pdf_size": 1216506, "recommendation": "3;5;5;6", "confidence": "3;3;2;3", "correctness": "3;3;2;3", "technical_novelty": "3;4;2;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "47;22;91;100", "wc_strength_and_weaknesses": "205;174;304;109", "wc_clarity_quality_novelty_and_reproducibility": "13;53;56;82", "wc_summary_review": "6;28;28;18", "wc_review": "271;277;479;309", "wc_reply_reviewers": "0;0;0;47", "wc_reply_authors": "570;349;1020;344", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;2;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 65.0, 31.913946794465897 ], "wc_strength_and_weaknesses_avg": [ 198.0, 70.32424901838625 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.0, 24.66779276708802 ], "wc_summary_review_avg": [ 20.0, 9.055385138137417 ], "wc_review_avg": [ 334.0, 84.95292814258964 ], "wc_reply_reviewers_avg": [ 11.75, 20.351596988934308 ], "wc_reply_authors_avg": [ 570.75, 274.9612472695016 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.13245323570650439, "corr_recommendation_correctness": -0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ryw5CTJXApoJ:scholar.google.com/&scioq=Learning+with+Non-Uniform+Label+Noise:+A+Cluster-Dependent+Semi-Supervised+Approach&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Shanghai Jiao Tong University;Chinese Academy of Sciences", "aff_unique_dep": ";IGSNRR", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.cas.cn", "aff_unique_abbr": "SJTU;CAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "P_O91UpSX0M", "title": "On the Dynamics under the Averaged Sample Margin Loss and Beyond", "track": "main", "status": "Withdraw", "tldr": "We investigate the dynamics of the averaged sample margin loss and provide some insights for improvements.", "abstract": "Recent works have studied implicit biases in deep learning, especially the behavior of last-layer features and classifier weights. However, they usually need to simplify the dynamics under gradient descent due to the intractability of loss functions and neural architectures. In this paper, we introduce a concise loss function as a surrogate, namely the Averaged Sample Margin (ASM) loss, which offers more mathematical opportunities to analyze the closed-form dynamics while requiring few simplifications or assumptions, and allows for more practical considerations. Based on the layer-peeled model that views last-layer features as free optimization variables, we build a complete analysis for the unconstrained, regularized, and spherical constrained cases. We show that these dynamics mainly \\textit{converge exponentially fast} to a solution depending on the initialization of features and classifier weights, which can help explain why the training of deep neural networks usually takes only a few hundred epochs. Our theoretical results can also aid in providing insights for improvements in practical training with the ASM loss or other losses, such as explicit feature regularization and rescaled learning rate for spherical cases. Finally, we empirically demonstrate these theoretical results and insights with extensive experiments.", "keywords": "Implicit bias;neural collapse;gradient descent", "primary_area": "", "supplementary_material": "/attachment/c2c52c0f5b4dd802733eb3d51d06ef4b13f89ca1.zip", "author": "Xiong Zhou;Xianming Liu;Hanzhang Wang;Deming Zhai;Junjun Jiang;Xiangyang Ji", "authorids": "~Xiong_Zhou3;~Xianming_Liu5;~Hanzhang_Wang2;~Deming_Zhai2;~Junjun_Jiang2;~Xiangyang_Ji1", "gender": "M;M;M;F;M;", "homepage": "https://hitcszx.github.io/;http://homepage.hit.edu.cn/xmliu;https://wd1511.github.io;;http://homepage.hit.edu.cn/jiangjunjun;", "dblp": ";89/58201.html;;69/8937;https://dblp.uni-trier.de/pers/hd/j/Jiang:Junjun;", "google_scholar": "BMGootgAAAAJ;;;;WNH2_rgAAAAJ;", "orcid": "0000-0002-0856-6696;0000-0002-8857-1785;;;0000-0002-5694-505X;", "linkedin": ";;;;;", "or_profile": "~Xiong_Zhou3;~Xianming_Liu5;~Hanzhang_Wang2;~Deming_Zhai2;~Junjun_Jiang2;~Xiangyang_Ji1", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;", "aff_domain": "hit.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn;", "position": "PhD student;Full Professor;PhD student;Associate Professor;Full Professor;", "bibtex": "@misc{\nzhou2023on,\ntitle={On the Dynamics under the Averaged Sample Margin Loss and Beyond},\nauthor={Xiong Zhou and Xianming Liu and Hanzhang Wang and Deming Zhai and Junjun Jiang and Xiangyang Ji},\nyear={2023},\nurl={https://openreview.net/forum?id=P_O91UpSX0M}\n}", "github": "", "project": "", "reviewers": "MXeN;YmzU;X1fQ", "site": "https://openreview.net/forum?id=P_O91UpSX0M", "pdf_size": 1753252, "recommendation": "1;6;8", "confidence": "4;4;3", "correctness": "4;3;4", "technical_novelty": "1;3;2", "empirical_novelty": "0;3;2", "wc_summary_paper": "114;90;149", "wc_strength_and_weaknesses": "295;299;221", "wc_clarity_quality_novelty_and_reproducibility": "48;37;100", "wc_summary_review": "36;34;90", "wc_review": "493;460;560", "wc_reply_reviewers": "470;0;176", "wc_reply_authors": "2202;1058;721", "reply_reviewers": "1;0;1", "reply_authors": "6;3;2", "recommendation_avg": [ 5.0, 2.943920288775949 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 117.66666666666667, 24.225789747475496 ], "wc_strength_and_weaknesses_avg": [ 271.6666666666667, 35.86394041683404 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.666666666666664, 27.475241379993168 ], "wc_summary_review_avg": [ 53.333333333333336, 25.94010194445829 ], "wc_review_avg": [ 504.3333333333333, 41.60395280365664 ], "wc_reply_reviewers_avg": [ 215.33333333333334, 193.88198013797523 ], "wc_reply_authors_avg": [ 1327.0, 633.8301560092157 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 1.699673171197595 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.720576692122892, "corr_recommendation_correctness": -0.24019223070763068, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:63XJooh4kxgJ:scholar.google.com/&scioq=On+the+Dynamics+under+the+Averaged+Sample+Margin+Loss+and+Beyond&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Harbin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Towards Understanding Why Mask Reconstruction Pretraining Helps in Downstream Tasks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12049", "id": "PaEUQiY40Dk", "poster": "/media/PosterPDFs/ICLR%202023/12049.png?t=1681056209.0698743", "openreview": "https://openreview.net/forum?id=PaEUQiY40Dk", "slides": "https://iclr.cc/virtual/2023/poster/12049", "video": "https://iclr.cc/virtual/2023/poster/12049", "author_site": "Jiachun Pan, Pan Zhou, shuicheng YAN", "tldr": "", "abstract": "For unsupervised pretraining, mask-reconstruction pretraining (MRP) approaches, e.g. MAE and data2vec, randomly mask input patches and then reconstruct the pixels or semantic features of these masked patches via an auto-encoder. Then for a downstream task, supervised fine-tuning the pretrained encoder remarkably surpasses the conventional \"supervised learning\" (SL) trained from scratch. However, it is still unclear 1) how MRP performs semantic (feature) learning in the pretraining phase and 2) why it helps in downstream tasks. To solve these problems, we first theoretically show that on an auto-encoder of a two/one-layered convolution encoder/decoder, MRP can capture all discriminative semantics of each potential semantic class in the pretraining dataset. Then considering the fact that the pretraining dataset is of huge size and high diversity and thus covers most semantics in downstream dataset, in fine-tuning phase, the pretrained encoder can capture as much semantics as it can in downstream datasets, and would not lost these semantics with theoretical guarantees. In contrast, SL only randomly captures some semantics due to lottery ticket hypothesis. So MRP provably achieves better performance than SL on the classification tasks. Experimental results testify to our data assumptions and also our theoretical implications. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Jiachun Pan;Pan Zhou;Shuicheng YAN", "authorids": "~Jiachun_Pan1;~Pan_Zhou3;~Shuicheng_YAN3", "gender": "F;;M", "homepage": ";;https://yanshuicheng.ai/", "dblp": "228/9156;;y/ShuichengYan", "google_scholar": ";;https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jiachun_Pan1;~Pan_Zhou3;~Shuicheng_YAN3", "aff": "National University of Singapore;;sea Group", "aff_domain": "u.nus.edu;;sea.com", "position": "PhD student;;Researcher", "bibtex": "@inproceedings{\npan2023towards,\ntitle={Towards Understanding Why Mask Reconstruction Pretraining Helps in Downstream Tasks},\nauthor={Jiachun Pan and Pan Zhou and Shuicheng YAN},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PaEUQiY40Dk}\n}", "github": "", "project": "", "reviewers": "TnqJ;J85J;1jPb;GkZ3", "pdf_size": 6128687, "recommendation": "6;6;6;8", "confidence": "4;3;2;4", "correctness": "3;4;3;4", "technical_novelty": "3;3;3;4", "empirical_novelty": "4;2;3;2", "wc_summary_paper": "69;38;50;76", "wc_strength_and_weaknesses": "255;117;269;132", "wc_clarity_quality_novelty_and_reproducibility": "7;86;15;35", "wc_summary_review": "20;38;16;60", "wc_review": "351;279;350;303", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1125;615;733;459", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 58.25, 15.07274029498286 ], "wc_strength_and_weaknesses_avg": [ 193.25, 69.13166785200542 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.75, 30.75203245315665 ], "wc_summary_review_avg": [ 33.5, 17.399712641305314 ], "wc_review_avg": [ 320.75, 30.93844695520446 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 733.0, 246.30468935852602 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5222329678670935, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15417509637182016908&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=PaEUQiY40Dk", "email": "u.nus.edu;;sea.com", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "National University of Singapore;Sea Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;", "aff_unique_abbr": "NUS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "Singapore;" }, { "id": "Pb-SC2gFOO", "title": "Backdoor Attacks in the Supply Chain of Masked Image Modeling", "track": "main", "status": "Withdraw", "tldr": "In this paper, we perform the first security risk quantification of MIM through the lens of backdoor attacks.", "abstract": "Masked image modeling (MIM) revolutionizes self-supervised learning (SSL) for image pre-training. In contrast to previous dominating self-supervised methods, i.e., contrastive learning, MIM attains state-of-the-art performance by masking and reconstructing random patches of the input image. However, the associated security and privacy risks of this novel generative method are unexplored. In this paper, we perform the first security risk quantification of MIM through the lens of backdoor attacks. Different from previous work, we are the first to systematically threat modeling on SSL in every phase of model supply chain, i.e., pre-training, release, and downstream phases. Our evaluation shows that models built with MIM are vulnerable to existing backdoor attacks in release and downstream phases and are compromised by our proposed method in pre-training phase. For instance, on CIFAR10 dataset, the attack success rate can reach 99.62%, 96.48%, and 98.89% in the downstream phase, release phase, and pre-training phase, respectively. We also take the first step to investigate the success factors of backdoor attacks in the pre-training phase and find the trigger number and trigger pattern play key roles in the success of backdoor attacks while trigger location has only tiny effects. In the end, our empirical study of the defense mechanisms across three detection-level on model supply chain phases indicates that different defenses are suitable for backdoor attacks in different phases. However, backdoor attacks in the release phase cannot be detected by all three detection-level methods, calling for more effective defenses in future research.", "keywords": "backdoor attack;masked image modeling", "primary_area": "", "supplementary_material": "", "author": "Xinyue Shen;Xinlei He;Zheng Li;Yun Shen;Michael Backes;Yang Zhang", "authorids": "~Xinyue_Shen2;~Xinlei_He1;~Zheng_Li17;~Yun_Shen3;~Michael_Backes1;~Yang_Zhang15", "gender": ";M;M;M;;M", "homepage": ";https://xinleihe.github.io/;https://zhenglisec.github.io/;https://uk.linkedin.com/in/yun-shen-24336257;;https://yangzhangalmo.github.io/", "dblp": ";227/7262;10/1143-23;;;06/6785-16", "google_scholar": ";6hZNEtoAAAAJ;xEAaaGsAAAAJ;Gx_JJ6cAAAAJ;;Xeb2888AAAAJ", "orcid": ";;0000-0002-4466-7523;;;0000-0003-3612-7348", "linkedin": ";;;;;", "or_profile": "~Xinyue_Shen2;~Xinlei_He1;~Zheng_Li17;~Yun_Shen3;~Michael_Backes1;~Yang_Zhang15", "aff": ";CISPA Helmholtz Center for Information Security;CISPA Helmholtz Center for Information Security;NetApp;;CISPA Helmholtz Center for Information Security", "aff_domain": ";cispa.de;cispa.de;netapp.com;;cispa.de", "position": ";PhD student;PhD student;Technical Director;;Assistant Professor", "bibtex": "@misc{\nshen2023backdoor,\ntitle={Backdoor Attacks in the Supply Chain of Masked Image Modeling},\nauthor={Xinyue Shen and Xinlei He and Zheng Li and Yun Shen and Michael Backes and Yang Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=Pb-SC2gFOO}\n}", "github": "", "project": "", "reviewers": "ygbg;dVjs;SAmu;a653", "site": "https://openreview.net/forum?id=Pb-SC2gFOO", "pdf_size": 1065668, "recommendation": "3;3;5;5", "confidence": "4;3;4;4", "correctness": "3;1;3;3", "technical_novelty": "2;1;2;2", "empirical_novelty": "2;0;2;3", "wc_summary_paper": "84;27;61;76", "wc_strength_and_weaknesses": "389;273;466;339", "wc_clarity_quality_novelty_and_reproducibility": "13;3;78;9", "wc_summary_review": "35;3;48;46", "wc_review": "521;306;653;470", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 62.0, 21.828879952943073 ], "wc_strength_and_weaknesses_avg": [ 366.75, 70.54209736037056 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.75, 30.375771595138122 ], "wc_summary_review_avg": [ 33.0, 18.01388353465182 ], "wc_review_avg": [ 487.5, 124.25880250509418 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16420481422559174135&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "CISPA Helmholtz Center for Information Security;NetApp", "aff_unique_dep": ";", "aff_unique_url": "https://www.cispa.de/;https://www.netapp.com", "aff_unique_abbr": "CISPA;NetApp", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Germany;United States" }, { "id": "PbXfwJEyKXT", "title": "Do We Really Need Graph Models for Skeleton-Based Action Recognition? A Topology-Agnostic Approach with Fully-Connected Networks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Graph Convolutional Networks (GCNs) have been dominating skeleton-based action recognition in recent years. While GCN-based approaches keep establishing new state-of-the-art results, the proposed architectures are getting increasingly sophisticated with a variety of add-ons. Many recent works attempt to relax the topology restriction imposed by the GCN framework, such as local/sparse connections and permutation invariance. However, the room for further innovation is extremely limited under such a framework. In this work, we present Topology-Agnostic Network (ToANet), a simple architecture based merely on Fully-Connected (FC) layers, as opposed to GCNs for skeleton-based action recognition. It is constructed by chaining FC layers applied across joints (aggregate joint information) and within each joint (transform joint features) in an alternate manner. Moreover, it contains a novel design of parallel paths for multi-relational modeling. ToANet proves to be a powerful architecture for learning the joint co-occurrence of human skeleton data. ToANet achieves better or comparable results to state-of-the-art GCNs on NTU RGB+D, NTU RGB+D 120 and Northwestern-UCLA datasets. These results challenge the convention of choosing GCNs as the de-facto option for skeleton-based action recognition. We hope that our work stimulates further research on non-GCN based methods, eliminating the restriction of topology.", "keywords": "skeleton-based;action recognition;topology-agnostic;fully-connected", "primary_area": "", "supplementary_material": "/attachment/c5f54b7660f6612a97b70bf62427d71ef612c43f.zip", "author": "Yuxuan Zhou;Chao Li;Biao Wang;Margret Keuper", "authorids": "~Yuxuan_Zhou2;~Chao_Li17;~Biao_Wang1;~Margret_Keuper1", "gender": "M;M;M;F", "homepage": ";;;https://www.vc.informatik.uni-siegen.de/en/keuper-margret", "dblp": "172/9870-4.html;66/190-64.html;https://dblp.org/pers/w/Wang:Biao.html;95/7589", "google_scholar": "ooVdh_kAAAAJ;o6zc8HMAAAAJ;Ltq_kHEAAAAJ;https://scholar.google.de/citations?user=KMqMQAcAAAAJ", "orcid": ";;;0000-0002-8437-7993", "linkedin": ";;;", "or_profile": "~Yuxuan_Zhou2;~Chao_Li17;~Biao_Wang1;~Margret_Keuper1", "aff": "Universit\u00e4t Mannheim;Alibaba Group;Alibaba Group;Universit\u00e4t Siegen", "aff_domain": "uni-mannheim.de;alibaba-inc.com;alibaba-inc.com;uni-siegen.de", "position": "PhD student;Researcher;Engineer;Full Professor", "bibtex": "@misc{\nzhou2023do,\ntitle={Do We Really Need Graph Models for Skeleton-Based Action Recognition? A Topology-Agnostic Approach with Fully-Connected Networks},\nauthor={Yuxuan Zhou and Chao Li and Biao Wang and Margret Keuper},\nyear={2023},\nurl={https://openreview.net/forum?id=PbXfwJEyKXT}\n}", "github": "", "project": "", "reviewers": "193b;BgLU;Ppr5;DMNf", "site": "https://openreview.net/forum?id=PbXfwJEyKXT", "pdf_size": 1345014, "recommendation": "5;5;5;5", "confidence": "5;5;4;5", "correctness": "3;3;3;3", "technical_novelty": "3;2;2;2", "empirical_novelty": "2;2;3;0", "wc_summary_paper": "52;27;40;32", "wc_strength_and_weaknesses": "96;97;184;112", "wc_clarity_quality_novelty_and_reproducibility": "15;45;38;7", "wc_summary_review": "198;28;43;20", "wc_review": "361;197;305;171", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "410;275;429;211", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;2;2", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 37.75, 9.443913383762052 ], "wc_strength_and_weaknesses_avg": [ 122.25, 36.21032311371993 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.25, 15.706288549495072 ], "wc_summary_review_avg": [ 72.25, 73.06974408057003 ], "wc_review_avg": [ 258.5, 77.63214540382096 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 331.25, 91.35199778877308 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:riEb4Xhea0AJ:scholar.google.com/&scioq=Do+We+Really+Need+Graph+Models+for+Skeleton-Based+Action+Recognition%3F+A+Topology-Agnostic+Approach+with+Fully-Connected+Networks&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of Mannheim;Alibaba Group;University of Siegen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-mannheim.de;https://www.alibaba.com;https://www.uni-siegen.de", "aff_unique_abbr": "UM;Alibaba;Uni Siegen", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Germany;China" }, { "title": "Learning Controllable Adaptive Simulation for Multi-resolution Physics", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11428", "id": "PbfgkZ2HdbE", "poster": "/media/PosterPDFs/ICLR%202023/11428.png?t=1681493684.3931057", "openreview": "https://openreview.net/forum?id=PbfgkZ2HdbE", "slides": "https://iclr.cc/virtual/2023/poster/11428", "video": "https://iclr.cc/virtual/2023/poster/11428", "author_site": "Tailin Wu, Takashi Maruyama, Qingqing Zhao, Gordon Wetzstein, Jure Leskovec", "tldr": "We introduce a method jointly learns the surrogate model and dynamically selects appropriate spatial resolutions that devote more compute to the highly dynamic regions", "abstract": "Simulating the time evolution of physical systems is pivotal in many scientific and engineering problems. An open challenge in simulating such systems is their multi-resolution dynamics: a small fraction of the system is extremely dynamic, and requires very fine-grained resolution, while a majority of the system is changing slowly and can be modeled by coarser spatial scales. Typical learning-based surrogate models use a uniform spatial scale, which needs to resolve to the finest required scale and can waste a huge compute to achieve required accuracy. In this work, we introduce Learning controllable Adaptive simulation for Multi-resolution Physics (LAMP) as the first full deep learning-based surrogate model that jointly learns the evolution model and optimizes appropriate spatial resolutions that devote more compute to the highly dynamic regions. LAMP consists of a Graph Neural Network (GNN) for learning the forward evolution, and a GNN-based actor-critic for learning the policy of spatial refinement and coarsening. We introduce learning techniques that optimizes LAMP with weighted sum of error and computational cost as objective, allowing LAMP to adapt to varying relative importance of error vs. computation tradeoff at inference time. We evaluate our method in a 1D benchmark of nonlinear PDEs and a challenging 2D mesh-based simulation. We demonstrate that our LAMP outperforms state-of-the-art deep learning surrogate models, and can adaptively trade-off computation to improve long-term prediction error: it achieves an average of 33.7% error reduction for 1D nonlinear PDEs, and outperforms MeshGraphNets + classical Adaptive Mesh Refinement (AMR) in 2D mesh-based simulations. Project website with data and code can be found at: http://snap.stanford.edu/lamp.", "keywords": "adaptive;multi-scale;error vs. computation;controllable", "primary_area": "", "supplementary_material": "/attachment/5f94078815937f73bf15a1acee2f32ebf16e1902.zip", "author": "Tailin Wu;Takashi Maruyama;Qingqing Zhao;Gordon Wetzstein;Jure Leskovec", "authorids": "~Tailin_Wu1;~Takashi_Maruyama2;~Qingqing_Zhao1;~Gordon_Wetzstein3;~Jure_Leskovec1", "gender": "M;M;F;M;", "homepage": "http://tailin.org;https://sites.google.com/view/tmaruyama/home;https://qingqing-zhao.github.io/;http://web.stanford.edu/~gordonwz/;http://cs.stanford.edu/~jure/", "dblp": "200/8994;15/1541;;13/4660;l/JureLeskovec", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en;VOf45S0AAAAJ;Q_kKkIUAAAAJ", "orcid": "0009-0003-1586-0820;;;0000-0002-9243-6885;0000-0002-5411-923X", "linkedin": ";;;gordon-wetzstein-2406723/;leskovec/", "or_profile": "~Tailin_Wu1;~Takashi_Maruyama2;~Qingqing_Zhao1;~Gordon_Wetzstein3;~Jure_Leskovec1", "aff": "Stanford University;NEC;Stanford University;Stanford University;Kumo.AI", "aff_domain": "stanford.edu;nec.com;stanford.edu;stanford.edu;kumo.ai", "position": "Postdoc;Researcher;PhD student;Associate Professor;Chief Scientist", "bibtex": "@inproceedings{\nwu2023learning,\ntitle={Learning Controllable Adaptive Simulation for Multi-resolution Physics},\nauthor={Tailin Wu and Takashi Maruyama and Qingqing Zhao and Gordon Wetzstein and Jure Leskovec},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PbfgkZ2HdbE}\n}", "github": "", "project": "", "reviewers": "WPia;tR71;mWzR;vMEa", "pdf_size": 7614934, "recommendation": "5;6;8;8", "confidence": "3;4;3;4", "correctness": "3;3;3;4", "technical_novelty": "3;2;3;4", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "81;64;192;35", "wc_strength_and_weaknesses": "246;63;557;526", "wc_clarity_quality_novelty_and_reproducibility": "207;490;35;85", "wc_summary_review": "53;47;48;254", "wc_review": "587;664;832;900", "wc_reply_reviewers": "284;0;0;44", "wc_reply_authors": "1848;2208;1433;2882", "reply_reviewers": "1;0;0;1", "reply_authors": "4;4;2;6", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 93.0, 59.476886266851594 ], "wc_strength_and_weaknesses_avg": [ 348.0, 204.324496818174 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 204.25, 176.4417396763022 ], "wc_summary_review_avg": [ 100.5, 88.65241113472324 ], "wc_review_avg": [ 745.75, 125.61523593895765 ], "wc_reply_reviewers_avg": [ 82.0, 118.0 ], "wc_reply_authors_avg": [ 2092.75, 531.829566214591 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 4.0, 1.4142135623730951 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.19245008972987526, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10471917644951801634&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=PbfgkZ2HdbE", "email": "stanford.edu;nec.com;stanford.edu;stanford.edu;kumo.ai", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Stanford University;NEC Corporation;Kumo.AI", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://www.nec.com;https://www.kumo.ai", "aff_unique_abbr": "Stanford;NEC;Kumo.AI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Japan" }, { "title": "Pessimism in the Face of Confounders: Provably Efficient Offline Reinforcement Learning in Partially Observable Markov Decision Processes", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11945", "id": "PbkBDQ5_UbV", "poster": "", "openreview": "https://openreview.net/forum?id=PbkBDQ5_UbV", "slides": "https://iclr.cc/virtual/2023/poster/11945", "video": "https://iclr.cc/virtual/2023/poster/11945", "author_site": "Miao Lu, Yifei Min, Zhaoran Wang, Zhuoran Yang", "tldr": "", "abstract": "We study offline reinforcement learning (RL) in partially observable Markov decision processes. In particular, we aim to learn an optimal policy from a dataset collected by a behavior policy which possibly depends on the latent state. Such a dataset is confounded in the sense that the latent state simultaneously affects the action and the observation, which is prohibitive for existing offline RL algorithms. To this end, we propose the \\underline{P}roxy variable \\underline{P}essimistic \\underline{P}olicy \\underline{O}ptimization (\\texttt{P3O}) algorithm, which addresses the confounding bias and the distributional shift between the optimal and behavior policies in the context of general function approximation. At the core of \\texttt{P3O} is a coupled sequence of pessimistic confidence regions constructed via proximal causal inference, which is formulated as minimax estimation. Under a partial coverage assumption on the confounded dataset, we prove that \\texttt{P3O} achieves a $n^{-1/2}$-suboptimality, where $n$ is the number of trajectories in the dataset. To our best knowledge, \\texttt{P3O} is the first provably efficient offline RL algorithm for POMDPs with a confounded dataset.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/dabd496e757cf0fa88413358a5bf2f84b6d9fcb2.zip", "author": "Miao Lu;Yifei Min;Zhaoran Wang;Zhuoran Yang", "authorids": "~Miao_Lu3;~Yifei_Min1;~Zhaoran_Wang1;~Zhuoran_Yang1", "gender": ";;Not Specified;M", "homepage": "https://miaolu3.github.io;;https://zhaoranwang.github.io/;https://zhuoranyang.github.io/", "dblp": "09/1168;;117/2756;", "google_scholar": "3jS17zQAAAAJ;;https://scholar.google.com.tw/citations?user=HSx0BgQAAAAJ;", "orcid": ";;;", "linkedin": "miao-lu-5bb9a31aa/;;;", "or_profile": "~Miao_Lu3;~Yifei_Min1;~Zhaoran_Wang1;~Zhuoran_Yang1", "aff": "University of Science and Technology of China;;;Yale University", "aff_domain": "ustc.edu.cn;;;yale.edu", "position": "Undergrad student;;;Assistant Professor", "bibtex": "@inproceedings{\nlu2023pessimism,\ntitle={Pessimism in the Face of Confounders: Provably Efficient Offline Reinforcement Learning in Partially Observable Markov Decision Processes},\nauthor={Miao Lu and Yifei Min and Zhaoran Wang and Zhuoran Yang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PbkBDQ5_UbV}\n}", "github": "", "project": "", "reviewers": "bBxr;gsEb;MuiH;ttMi", "pdf_size": 632515, "recommendation": "6;6;6;6", "confidence": "3;4;4;2", "correctness": "4;3;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "0;0;0;0", "wc_summary_paper": "73;82;151;103", "wc_strength_and_weaknesses": "138;116;211;95", "wc_clarity_quality_novelty_and_reproducibility": "11;49;12;4", "wc_summary_review": "34;30;17;26", "wc_review": "256;277;391;228", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "847;1327;1687;733", "reply_reviewers": "0;0;0;0", "reply_authors": "3;4;4;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 102.25, 30.177599308096063 ], "wc_strength_and_weaknesses_avg": [ 140.0, 43.72070447739835 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 19.0, 17.592612085759182 ], "wc_summary_review_avg": [ 26.75, 6.299801584177076 ], "wc_review_avg": [ 288.0, 61.95562928419015 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1148.5, 382.55293751322836 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11251430093999098707&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=PbkBDQ5_UbV", "email": "ustc.edu.cn;;;yale.edu", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of Science and Technology of China;Yale University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.yale.edu", "aff_unique_abbr": "USTC;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "id": "PcR6Lir5mxu", "title": "Planning With Uncertainty: Deep Exploration in Model-Based Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "Demonstrating deep exploration with MuZero by planning optimistically with epistemic uncertainty", "abstract": "Deep model-based reinforcement learning has shown super-human performance in many challenging domains. Low sample efficiency and limited exploration remain however as leading obstacles in the field. In this paper, we demonstrate deep exploration in model-based RL by incorporating epistemic uncertainty into planning trees, circumventing the standard approach of propagating uncertainty through value learning. We evaluate this approach with the state of the art model-based RL algorithm MuZero, and extend its training process to stabilize learning from explicitly-exploratory decisions. Our results demonstrate that planning with uncertainty is able to achieve effective deep exploration with standard uncertainty estimation mechanisms, and with it significant gains in sample efficiency.", "keywords": "Reinforcement learning;exploration;uncertainty;planning", "primary_area": "", "supplementary_material": "/attachment/e31a3a8d7a46bb3b7759455864f5e349e29a0ce9.zip", "author": "Yaniv Oren;Matthijs T. J. Spaan;Wendelin Boehmer", "authorids": "~Yaniv_Oren1;~Matthijs_T._J._Spaan1;~Wendelin_Boehmer1", "gender": "M;;M", "homepage": ";;https://reinforceAI.net", "dblp": "331/8510;;08/9988", "google_scholar": ";;https://scholar.google.de/citations?user=wI5MV8IAAAAJ", "orcid": "0000-0003-0155-5000;;0000-0002-4398-6792", "linkedin": "yaniv-oren-9a2a83135;;wendelin-boehmer", "or_profile": "~Yaniv_Oren1;~Matthijs_T._J._Spaan1;~Wendelin_Boehmer1", "aff": "Delft University of Technology;;Delft University of Technology", "aff_domain": "tudelft.nl;;tudelft.nl", "position": "PhD student;;Assistant Professor", "bibtex": "@misc{\noren2023planning,\ntitle={Planning With Uncertainty: Deep Exploration in Model-Based Reinforcement Learning},\nauthor={Yaniv Oren and Matthijs T. J. Spaan and Wendelin Boehmer},\nyear={2023},\nurl={https://openreview.net/forum?id=PcR6Lir5mxu}\n}", "github": "", "project": "", "reviewers": "6utc;3Uzt;YegC;u2v3", "site": "https://openreview.net/forum?id=PcR6Lir5mxu", "pdf_size": 564396, "recommendation": "3;3;3;5", "confidence": "3;4;4;3", "correctness": "2;4;2;3", "technical_novelty": "3;3;3;2", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "87;36;235;42", "wc_strength_and_weaknesses": "312;87;271;102", "wc_clarity_quality_novelty_and_reproducibility": "190;41;45;32", "wc_summary_review": "56;123;71;16", "wc_review": "645;287;622;192", "wc_reply_reviewers": "24;45;23;0", "wc_reply_authors": "360;70;83;146", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 100.0, 80.39589541761445 ], "wc_strength_and_weaknesses_avg": [ 193.0, 99.7020561473032 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 77.0, 65.41024384605213 ], "wc_summary_review_avg": [ 66.5, 38.31775045589185 ], "wc_review_avg": [ 436.5, 200.00812483496765 ], "wc_reply_reviewers_avg": [ 23.0, 15.921683328090658 ], "wc_reply_authors_avg": [ 164.75, 116.3343779800279 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.17407765595569782, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11574570215564085428&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Delft University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.tudelft.nl", "aff_unique_abbr": "TU Delft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "id": "Pe7R48fCkM_", "title": "Unravel Structured Heterogeneity of Tasks in Meta-Reinforcement Learning via Exploratory Clustering", "track": "main", "status": "Reject", "tldr": "We propose a method to automatically discover and utilize the cluster structures of tasks for meta-reinforcement learning.", "abstract": "Meta-reinforcement learning (meta-RL) is developed to quickly solve new tasks by leveraging knowledge from prior tasks. The assumption that tasks are drawn IID is typically made in previous studies, which ignore possible structured heterogeneity of tasks. The non-transferable knowledge caused by structured heterogeneity hinders fast adaptation in new tasks. In this paper, we formulate the structured heterogeneity of tasks via clustering such that transferable knowledge can be inferred within different clusters and non-transferable knowledge would be excluded across clusters thereby. To facilitate so, we develop a dedicated exploratory policy to discover task clusters by reducing uncertainty in posterior inference. Within the identified clusters, the exploitation policy is able to solve related tasks by utilizing knowledge shared within the clusters. Experiments on various MuJoCo tasks showed the proposed method can unravel cluster structures effectively in both rewards and state dynamics, proving strong advantages against a set of state-of-the-art baselines.", "keywords": "Meta Reinforcement Learning;Variational Inference", "primary_area": "", "supplementary_material": "/attachment/b7418b93f3b6414597a364528b5ae5b1464edd97.zip", "author": "Zhendong Chu;Hongning Wang", "authorids": "~Zhendong_Chu1;~Hongning_Wang1", "gender": "M;M", "homepage": "https://zdchu.github.io/;http://www.cs.virginia.edu/~hw5x/", "dblp": "236/6321;05/6545", "google_scholar": ";qkdvKNoAAAAJ", "orcid": ";0000-0002-6524-9195", "linkedin": ";", "or_profile": "~Zhendong_Chu1;~Hongning_Wang1", "aff": "University of Virginia;University of Virginia", "aff_domain": "virginia.edu;virginia.edu", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nchu2023unravel,\ntitle={Unravel Structured Heterogeneity of Tasks in Meta-Reinforcement Learning via Exploratory Clustering},\nauthor={Zhendong Chu and Hongning Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=Pe7R48fCkM_}\n}", "github": "", "project": "", "reviewers": "XnEo;cxKJ;XdEV;nQz2", "site": "https://openreview.net/forum?id=Pe7R48fCkM_", "pdf_size": 6722868, "recommendation": "5;5;5;6", "confidence": "3;4;4;4", "correctness": "3;2;2;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;0;3;3", "wc_summary_paper": "144;60;49;141", "wc_strength_and_weaknesses": "220;517;618;169", "wc_clarity_quality_novelty_and_reproducibility": "143;32;90;302", "wc_summary_review": "22;66;43;90", "wc_review": "529;675;800;702", "wc_reply_reviewers": "28;0;534;72", "wc_reply_authors": "342;303;1696;576", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;4;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 98.5, 44.18427322023075 ], "wc_strength_and_weaknesses_avg": [ 381.0, 190.74197230814198 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 141.75, 100.50466407087782 ], "wc_summary_review_avg": [ 55.25, 25.390697115282205 ], "wc_review_avg": [ 676.5, 97.03221114660842 ], "wc_reply_reviewers_avg": [ 158.5, 218.3088408654125 ], "wc_reply_authors_avg": [ 729.25, 567.834207757863 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UiC5RpGwt9MJ:scholar.google.com/&scioq=Unravel+Structured+Heterogeneity+of+Tasks+in+Meta-Reinforcement+Learning+via+Exploratory+Clustering&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Virginia", "aff_unique_dep": "", "aff_unique_url": "https://www.virginia.edu", "aff_unique_abbr": "UVA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Preference Transformer: Modeling Human Preferences using Transformers for RL", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11308", "id": "Peot1SFDX0", "poster": "", "openreview": "https://openreview.net/forum?id=Peot1SFDX0", "slides": "https://iclr.cc/virtual/2023/poster/11308", "video": "https://iclr.cc/virtual/2023/poster/11308", "author_site": "Changyeon Kim, Jongjin Park, Jinwoo Shin, Honglak Lee, Pieter Abbeel, Kimin Lee", "tldr": "We introduce a transformer-based architecture for preference-based RL considering non-Markovian rewards.", "abstract": "Preference-based reinforcement learning (RL) provides a framework to train agents using human preferences between two behaviors. However, preference-based RL has been challenging to scale since it requires a large amount of human feedback to learn a reward function aligned with human intent. In this paper, we present Preference Transformer, a neural architecture that models human preferences using transformers. Unlike prior approaches assuming human judgment is based on the Markovian rewards which contribute to the decision equally, we introduce a new preference model based on the weighted sum of non-Markovian rewards. We then design the proposed preference model using a transformer architecture that stacks causal and bidirectional self-attention layers. We demonstrate that Preference Transformer can solve a variety of control tasks using real human preferences, while prior approaches fail to work. We also show that Preference Transformer can induce a well-specified reward and attend to critical events in the trajectory by automatically capturing the temporal dependencies in human decision-making. Code is available on the project website: https://sites.google.com/view/preference-transformer.", "keywords": "preference-based reinforcement learning;human-in-the-loop reinforcement learning;deep reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/e57a7408350798f6d4a24a6a6a52e377bdceda87.zip", "author": "Changyeon Kim;Jongjin Park;Jinwoo Shin;Honglak Lee;Pieter Abbeel;Kimin Lee", "authorids": "~Changyeon_Kim1;~Jongjin_Park1;~Jinwoo_Shin1;~Honglak_Lee2;~Pieter_Abbeel2;~Kimin_Lee1", "gender": "M;M;M;M;M;M", "homepage": "https://changyeon.page;;https://sites.google.com/site/mijirim/;https://people.eecs.berkeley.edu/~pabbeel/;https://sites.google.com/view/kiminlee;http://web.eecs.umich.edu/~honglak", "dblp": "271/7767;30/1783;31/7062;;183/6849;58/2562", "google_scholar": "vEPeAaYAAAAJ;F9DGEgEAAAAJ;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;92M8xv4AAAAJ;fmSHtE8AAAAJ", "orcid": ";;;;;", "linkedin": "changyeon-kim-29972b194/;;;;;", "or_profile": "~Changyeon_Kim1;~Jongjin_Park1;~Jinwoo_Shin1;~Pieter_Abbeel2;~Kimin_Lee1;~Honglak_Lee1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Covariant;Google;University of Michigan", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;covariant.ai;google.com;umich.edu", "position": "MS student;PhD student;Full Professor;Founder;Researcher;Associate Professor", "bibtex": "@inproceedings{\nkim2023preference,\ntitle={Preference Transformer: Modeling Human Preferences using Transformers for {RL}},\nauthor={Changyeon Kim and Jongjin Park and Jinwoo Shin and Honglak Lee and Pieter Abbeel and Kimin Lee},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Peot1SFDX0}\n}", "github": "", "project": "", "reviewers": "v17x;EobQ;gNsz;WbtG", "pdf_size": 16906413, "recommendation": "5;6;6;8", "confidence": "3;3;3;2", "correctness": "3;3;3;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "99;50;65;48", "wc_strength_and_weaknesses": "279;52;335;131", "wc_clarity_quality_novelty_and_reproducibility": "278;63;2;29", "wc_summary_review": "71;28;117;51", "wc_review": "727;193;519;259", "wc_reply_reviewers": "115;0;0;0", "wc_reply_authors": "764;260;696;181", "reply_reviewers": "2;0;0;0", "reply_authors": "3;1;1;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 65.5, 20.426698215815495 ], "wc_strength_and_weaknesses_avg": [ 199.25, 113.05833671162866 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 93.0, 108.97476772170702 ], "wc_summary_review_avg": [ 66.75, 32.7595405950694 ], "wc_review_avg": [ 424.5, 212.9665466687198 ], "wc_reply_reviewers_avg": [ 28.75, 49.79646071760522 ], "wc_reply_authors_avg": [ 475.25, 257.4018016642463 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.9271726499455306, "corr_recommendation_correctness": 0.0, "gs_citation": 91, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17617074391552973494&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=Peot1SFDX0", "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;covariant.ai;google.com;umich.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;2;3", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Covariant;Google;University of Michigan", "aff_unique_dep": ";;Google;", "aff_unique_url": "https://www.kaist.ac.kr;;https://www.google.com;https://www.umich.edu", "aff_unique_abbr": "KAIST;;Google;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;2;2", "aff_country_unique": "South Korea;;United States" }, { "id": "PfHk0P9lgMy", "title": "Expected Gradients of Maxout Networks and Consequences to Parameter Initialization", "track": "main", "status": "Reject", "tldr": "We bound the gradients of a maxout network, formulate a parameter initialization strategy, and obtain results on expressivity and NTK.", "abstract": "We study the gradients of a maxout network with respect to inputs and parameters and obtain bounds for the moments depending on the architecture and the parameter distribution. We observe that the distribution of the input-output Jacobian depends on the input, which complicates a stable parameter initialization. Based on the moments of the gradients, we formulate parameter initialization strategies that avoid vanishing and exploding gradients in wide networks. Experiments with deep fully-connected and convolutional networks show that this strategy improves SGD and Adam training of deep maxout networks. In addition, we obtain refined bounds on the expected number of linear regions, results on the expected curve length distortion, and results on the NTK. ", "keywords": "maxout unit;input-output Jacobian;parameter initialization;expressivity;linear regions;curve distortion;NTK", "primary_area": "", "supplementary_material": "", "author": "Hanna Tseran;Guido Montufar", "authorids": "~Hanna_Tseran1;~Guido_Montufar1", "gender": "F;M", "homepage": "https://hanna-tseran.github.io/;http://www.math.ucla.edu/~montufar/", "dblp": "296/3738;", "google_scholar": "Ru1jQaAAAAAJ;https://scholar.google.de/citations?user=pDIuuVwAAAAJ", "orcid": ";0000-0002-0131-2669", "linkedin": "hanna-tseran/;", "or_profile": "~Hanna_Tseran1;~Guido_Montufar1", "aff": "Max-Planck Institute for Mathematics in the Sciences;UCLA ", "aff_domain": "mpg.de;math.ucla.edu", "position": "PhD student;Associate Professor", "bibtex": "@misc{\ntseran2023expected,\ntitle={Expected Gradients of Maxout Networks and Consequences to Parameter Initialization},\nauthor={Hanna Tseran and Guido Montufar},\nyear={2023},\nurl={https://openreview.net/forum?id=PfHk0P9lgMy}\n}", "github": "", "project": "", "reviewers": "pCnC;RjF3;Fz1h;uU8d;3PQc", "site": "https://openreview.net/forum?id=PfHk0P9lgMy", "pdf_size": 2656247, "recommendation": "5;6;6;6;8", "confidence": "4;2;2;2;2", "correctness": "3;3;2;4;4", "technical_novelty": "3;3;3;3;3", "empirical_novelty": "1;3;3;2;3", "wc_summary_paper": "84;65;36;75;70", "wc_strength_and_weaknesses": "198;178;116;268;72", "wc_clarity_quality_novelty_and_reproducibility": "83;311;19;10;128", "wc_summary_review": "113;60;51;26;28", "wc_review": "478;614;222;379;298", "wc_reply_reviewers": "0;46;11;29;0", "wc_reply_authors": "1695;1598;704;1466;183", "reply_reviewers": "0;1;1;1;0", "reply_authors": "4;3;1;3;1", "recommendation_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 2.4, 0.8000000000000002 ], "correctness_avg": [ 3.2, 0.7483314773547882 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.4, 0.8 ], "wc_summary_paper_avg": [ 66.0, 16.260381299342274 ], "wc_strength_and_weaknesses_avg": [ 166.4, 67.6863354008769 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 110.2, 109.30215002459926 ], "wc_summary_review_avg": [ 55.6, 31.537913691301775 ], "wc_review_avg": [ 398.2, 137.39927219603456 ], "wc_reply_reviewers_avg": [ 17.2, 17.88183435780569 ], "wc_reply_authors_avg": [ 1129.2, 588.120191797561 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.4, 1.2000000000000002 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.6123724356957945, "corr_recommendation_correctness": 0.4909902530309828, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4490346738888998506&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1", "aff_unique_norm": "Max-Planck Institute for Mathematics in the Sciences;University of California, Los Angeles", "aff_unique_dep": "Mathematics;", "aff_unique_url": "https://www.mis.mpg.de;https://www.ucla.edu", "aff_unique_abbr": "MPI MIS;UCLA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1", "aff_country_unique": "Germany;United States" }, { "id": "PfJrZvtoWUd", "title": "Dimensionality-Varying Diffusion Process", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Diffusion models, which learn to reverse a signal destruction process to generate new data, typically require the signal at each step to have the same dimension. We argue that, considering the spatial redundancy in image signals, there is no need to maintain a high dimensionality in the evolution process, especially in the early generation phase. To this end, we make a theoretical generalization of the forward diffusion process via signal decomposition. Concretely, we manage to decompose an image into multiple orthogonal components and control the attenuation of each component when perturbing the image. That way, along with the noise strength increasing, we are able to diminish those inconsequential components and thus use a lower-dimensional signal to represent the source, barely losing information. Such a reformulation allows to vary dimensions in both training and inference of diffusion models. Extensive experiments on a range of datasets suggest that our approach substantially reduces the computational cost and achieves on-par or even better synthesis performance compared to baseline method.We also show that our strategy facilitates high-resolution image synthesis and improves FID of diffusion model trained on FFHQ at $1024\\times1024$ resolution from 52.40 to 15.07. Code and models will be made publicly available.", "keywords": "Diffusion models;generative models", "primary_area": "", "supplementary_material": "", "author": "Han Zhang;Ruili Feng;Zhantao Yang;Lianghua Huang;Yu Liu;Yifei Zhang;Yujun Shen;Deli Zhao;Fan Cheng", "authorids": "~Han_Zhang16;~Ruili_Feng1;~Zhantao_Yang1;~Lianghua_Huang2;~Yu_Liu23;~Yifei_Zhang4;~Yujun_Shen1;~Deli_Zhao1;~Fan_Cheng1", "gender": "M;;M;M;M;;;M;M", "homepage": "https://github.com/bibona;https://github.com/RuiLiFeng;;;https://github.com/liuyuyuil;;;https://zhaodeli.github.io;http://www.cs.sjtu.edu.cn/~chengfan", "dblp": "26/4189-10;20/9594;285/8489.html;166/6155;97/2274-63;;;77/1992;", "google_scholar": ";;Fz3X5FwAAAAJ;JYVCn3AAAAAJ;8zksQb4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=sFfkf94AAAAJ", "orcid": ";;0000-0003-2765-295X;0000-0002-9686-9354;;0009-0005-4831-883X;;0000-0002-8838-578X;0000-0002-4307-6334", "linkedin": ";;;;;zhang-yf-2bb8a61a1;;;", "or_profile": "~Han_Zhang16;~Ruili_Feng1;~Zhantao_Yang1;~Lianghua_Huang2;~Yu_Liu23;~Yifei_Zhang4;~Yujun_Shen1;~Deli_Zhao1;~Fan_Cheng1", "aff": "Shanghai Jiaotong University;University of Science and Technology of China;Shanghai Jiaotong University;Alibaba Group;Alibaba Group;Shanghai Jiaotong University;;Alibaba Group;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;mail.ustc.edu.cn;sjtu.edu.cn;aliabba-inc.com;alibaba-inc.com;sjtu.edu.cn;;alibaba-inc.com;sjtu.edu.cn", "position": "PhD student;PhD student;PhD student;Researcher;Researcher;PhD student;;Director;Associate Professor", "bibtex": "@misc{\nzhang2023dimensionalityvarying,\ntitle={Dimensionality-Varying Diffusion Process},\nauthor={Han Zhang and Ruili Feng and Zhantao Yang and Lianghua Huang and Yu Liu and Yifei Zhang and Yujun Shen and Deli Zhao and Fan Cheng},\nyear={2023},\nurl={https://openreview.net/forum?id=PfJrZvtoWUd}\n}", "github": "", "project": "", "reviewers": "R7FL;a7VJ;xtTc;9mk9", "site": "https://openreview.net/forum?id=PfJrZvtoWUd", "pdf_size": 1825829, "recommendation": "3;3;5;5", "confidence": "4;4;4;4", "correctness": "3;3;2;4", "technical_novelty": "3;1;3;3", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "166;74;75;64", "wc_strength_and_weaknesses": "914;599;328;205", "wc_clarity_quality_novelty_and_reproducibility": "180;56;77;13", "wc_summary_review": "133;41;35;82", "wc_review": "1393;770;515;364", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 94.75, 41.360458169609295 ], "wc_strength_and_weaknesses_avg": [ 511.5, 272.6155718223007 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 81.5, 61.36978083715144 ], "wc_summary_review_avg": [ 72.75, 39.20698279643564 ], "wc_review_avg": [ 760.5, 392.94687936157476 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5342332659778584526&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0;2;2;0;2;0", "aff_unique_norm": "Shanghai Jiao Tong University;University of Science and Technology of China;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.ustc.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "SJTU;USTC;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "PfPrnKDtvIG", "title": "Multi-Agent Reinforcement Learning with Shared Resources for Inventory Management", "track": "main", "status": "Reject", "tldr": "We propose a scalable and effective method to control a large number of agents for inventory management.", "abstract": "In this paper, we consider the inventory management (IM) problem where we need to make replenishment decisions for a large number of stock keeping units (SKUs) to balance their supply and demand. In our setting, the constraint on the shared resources (such as the inventory capacity) couples the otherwise independent control for each SKU. We formulate the problem with this structure as Shared-Resource Stochastic Game (SRSG) and propose an efficient algorithm called Context-aware Decentralized PPO (CD-PPO). Through extensive experiments, we demonstrate that CD-PPO can accelerate the learning procedure compared with standard MARL algorithms.", "keywords": "Multi-Agent Reinforcement Learning;Inventory Mangement", "primary_area": "", "supplementary_material": "", "author": "Yuandong Ding;Mingxiao Feng;Guozi Liu;Wei Jiang;Chuheng Zhang;Li Zhao;Lei Song;Houqiang Li;Yan Jin;Jiang Bian", "authorids": "~Yuandong_Ding1;~Mingxiao_Feng1;~Guozi_Liu1;weij4@illinois.edu;~Chuheng_Zhang1;~Li_Zhao1;~Lei_Song3;~Houqiang_Li1;~Yan_Jin3;~Jiang_Bian1", "gender": "M;;;;M;F;M;M;F;M", "homepage": "https://yuandong1998.github.io/;https://fmxfranky.github.io/;;;;https://www.microsoft.com/en-us/research/people/lizo/;;https://staff.ustc.edu.cn/~lihq/;http://faculty.hust.edu.cn/jinyan1/zh_CN/index.htm;https://sites.google.com/view/jiangbian", "dblp": ";;;;241/9716;97/4708-7;76/893-1.html;59/7017.html;76/289-5.html;09/851-2.html", "google_scholar": ";;;;q7M83KQAAAAJ;b-LJkLQAAAAJ;pXDSOocAAAAJ;7sFMIKoAAAAJ;joz7edsAAAAJ;pZBEnY8AAAAJ", "orcid": ";;;;;;;0000-0003-2188-3028;;0000-0002-9472-600X", "linkedin": ";;guozi-liu-939a30221/;;;;;;;jbian/", "or_profile": "~Yuandong_Ding1;~Mingxiao_Feng1;~Guozi_Liu1;weij4@illinois.edu;~Chuheng_Zhang1;~Li_Zhao1;~Lei_Song3;~Houqiang_Li1;~Yan_Jin3;~Jiang_Bian1", "aff": "Huazhong University of Science and Technology;University of Science and Technology of China;Carnegie Mellon University;;Microsoft;Microsoft;Microsoft;University of Science and Technology of China;Huazhong University of Science and Technology;Microsoft", "aff_domain": "hust.edu.cn;ustc.edu.cn;andrew.cmu.edu;;microsoft.com;microsoft.com;microsoft.com;ustc.edu.cn;hust.edu.cn;microsoft.com", "position": "MS student;PhD student;MS student;;Researcher;Researcher;Principal Researcher;Professor;Associate Professor;Partner Research Manager", "bibtex": "@misc{\nding2023multiagent,\ntitle={Multi-Agent Reinforcement Learning with Shared Resources for Inventory Management},\nauthor={Yuandong Ding and Mingxiao Feng and Guozi Liu and Wei Jiang and Chuheng Zhang and Li Zhao and Lei Song and Houqiang Li and Yan Jin and Jiang Bian},\nyear={2023},\nurl={https://openreview.net/forum?id=PfPrnKDtvIG}\n}", "github": "", "project": "", "reviewers": "626R;Yg5s;cRM3;cFPA", "site": "https://openreview.net/forum?id=PfPrnKDtvIG", "pdf_size": 2984186, "recommendation": "3;5;5;6", "confidence": "3;3;4;4", "correctness": "2;2;2;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;1;3", "wc_summary_paper": "106;214;61;126", "wc_strength_and_weaknesses": "136;729;189;49", "wc_clarity_quality_novelty_and_reproducibility": "380;114;90;32", "wc_summary_review": "48;44;19;40", "wc_review": "670;1101;359;247", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 126.75, 55.60294506588657 ], "wc_strength_and_weaknesses_avg": [ 275.75, 266.41450317128005 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 154.0, 133.843191832831 ], "wc_summary_review_avg": [ 37.75, 11.188722000300123 ], "wc_review_avg": [ 594.25, 331.0810890099282 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": 0.6882472016116854, "corr_recommendation_correctness": 0.6622661785325219, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4861683893384653399&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;3;3;3;1;0;3", "aff_unique_norm": "Huazhong University of Science and Technology;University of Science and Technology of China;Carnegie Mellon University;Microsoft", "aff_unique_dep": ";;;Microsoft Corporation", "aff_unique_url": "http://www.hust.edu.cn;http://www.ustc.edu.cn;https://www.cmu.edu;https://www.microsoft.com", "aff_unique_abbr": "HUST;USTC;CMU;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;1;1;0;0;1", "aff_country_unique": "China;United States" }, { "title": "LexMAE: Lexicon-Bottlenecked Pretraining for Large-Scale Retrieval", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11193", "id": "PfpEtB3-csK", "poster": "", "openreview": "https://openreview.net/forum?id=PfpEtB3-csK", "slides": "https://iclr.cc/virtual/2023/poster/11193", "video": "https://iclr.cc/virtual/2023/poster/11193", "author_site": "Tao Shen, Xiubo Geng, Chongyang Tao, Can Xu, Xiaolong Huang, Binxing Jiao, Linjun Yang, Daxin Jiang", "tldr": "A new pre-training framework, dubbed lexicon-bottlenecked masked autoencoder, is proposed to learn importance-aware lexicon representations in line with the lexicon-weighting paradigm for large-scale retrieval. ", "abstract": "In large-scale retrieval, the lexicon-weighting paradigm, learning weighted sparse representations in vocabulary space, has shown promising results with high quality and low latency. Despite it deeply exploiting the lexicon-representing capability of pre-trained language models, a crucial gap remains between language modeling and lexicon-weighting retrieval -- the former preferring certain or low-entropy words whereas the latter favoring pivot or high-entropy words -- becoming the main barrier to lexicon-weighting performance for large-scale retrieval. To bridge this gap, we propose a brand-new pre-training framework, lexicon-bottlenecked masked autoencoder (LexMAE), to learn importance-aware lexicon representations. Essentially, we present a lexicon-bottlenecked module between a normal language modeling encoder and a weakened decoder, where a continuous bag-of-words bottleneck is constructed to learn a lexicon-importance distribution in an unsupervised fashion. The pre-trained LexMAE is readily transferred to the lexicon-weighting retrieval via fine-tuning. On the ad-hoc retrieval benchmark, MS-Marco, it achieves 42.6% MRR@10 with 45.8 QPS for the passage dataset and 44.4% MRR@100 with 134.8 QPS for the document dataset, by a CPU machine. And LexMAE shows state-of-the-art zero-shot transfer capability on BEIR benchmark with 12 datasets. ", "keywords": "Self-Supervised Learning;Lexicon Representation;Large-Scale Retrieval", "primary_area": "", "supplementary_material": "", "author": "Tao Shen;Xiubo Geng;Chongyang Tao;Can Xu;Xiaolong Huang;Binxing Jiao;Linjun Yang;Daxin Jiang", "authorids": "~Tao_Shen1;~Xiubo_Geng2;~Chongyang_Tao1;~Can_Xu2;~Xiaolong_Huang1;~Binxing_Jiao1;~Linjun_Yang1;~Daxin_Jiang2", "gender": "M;F;M;M;M;M;;M", "homepage": ";https://xiubo0211.github.io/;;;;;;https://www.microsoft.com/en-us/research/people/djiang/", "dblp": "95/4097-1;19/189;;;;78/2418;;77/5094", "google_scholar": "https://scholar.google.com.au/citations?user=SegyX9AAAAAJ;XxeX3FgAAAAJ;x_cOKuwAAAAJ;5aiE_NcAAAAJ;;;;N-wAHCoAAAAJ", "orcid": ";;;0000-0002-1949-5715;;;;", "linkedin": ";;;;xiaolong-huang-446182b4/;binxing-jiao-1711722b/;;", "or_profile": "~Tao_Shen1;~Xiubo_Geng2;~Chongyang_Tao1;~Can_Xu2;~Xiaolong_Huang1;~Binxing_Jiao1;~Linjun_Yang1;~Daxin_Jiang2", "aff": "University of Technology Sydney;Microsoft;Microsoft;Microsoft;Microsoft;Microsoft;;Microsoft", "aff_domain": "uts.edu.au;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;;microsoft.com", "position": "Postdoc;Researcher;Researcher;Researcher;Researcher;Researcher;;Researcher/Scientist", "bibtex": "@inproceedings{\nshen2023lexmae,\ntitle={Lex{MAE}: Lexicon-Bottlenecked Pretraining for Large-Scale Retrieval},\nauthor={Tao Shen and Xiubo Geng and Chongyang Tao and Can Xu and Xiaolong Huang and Binxing Jiao and Linjun Yang and Daxin Jiang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PfpEtB3-csK}\n}", "github": "", "project": "", "reviewers": "v7Be;KtuL;Bg1k;12su", "pdf_size": 729646, "recommendation": "6;6;8;8", "confidence": "4;3;4;4", "correctness": "3;3;4;4", "technical_novelty": "3;3;4;3", "empirical_novelty": "3;3;4;3", "wc_summary_paper": "72;105;74;175", "wc_strength_and_weaknesses": "162;244;65;225", "wc_clarity_quality_novelty_and_reproducibility": "52;124;26;39", "wc_summary_review": "35;62;145;34", "wc_review": "321;535;310;473", "wc_reply_reviewers": "19;0;0;0", "wc_reply_authors": "695;926;1350;745", "reply_reviewers": "1;0;0;0", "reply_authors": "2;2;3;2", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 106.5, 41.65633205168213 ], "wc_strength_and_weaknesses_avg": [ 174.0, 69.86773218017026 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.25, 37.9366247839736 ], "wc_summary_review_avg": [ 69.0, 45.29348739057305 ], "wc_review_avg": [ 409.75, 96.84362395119257 ], "wc_reply_reviewers_avg": [ 4.75, 8.227241335952167 ], "wc_reply_authors_avg": [ 929.0, 257.8090378555414 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 1.0, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3404456607009172872&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=PfpEtB3-csK", "email": "uts.edu.au;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;;microsoft.com", "author_num": 8, "aff_unique_index": "0;1;1;1;1;1;1", "aff_unique_norm": "University of Technology Sydney;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.uts.edu.au;https://www.microsoft.com", "aff_unique_abbr": "UTS;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;1", "aff_country_unique": "Australia;United States" }, { "title": "DySR: Adaptive Super-Resolution via Algorithm and System Co-design", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10992", "id": "Pgtn4l6eKjv", "poster": "/media/PosterPDFs/ICLR%202023/10992.png?t=1681359500.3580596", "openreview": "https://openreview.net/forum?id=Pgtn4l6eKjv", "slides": "https://iclr.cc/virtual/2023/poster/10992", "video": "https://iclr.cc/virtual/2023/poster/10992", "author_site": "Syed Zawad, Cheng Li, Zhewei Yao, Elton Zheng, Yuxiong He, Feng Yan", "tldr": "We present DySR, an algorithm and system co-design approach to maintain super-resolution streaming task QoS on mobile devices via fast model adaptation.", "abstract": "Super resolution (SR) is a promising approach for improving the quality of low resolution steaming services on mobile devices.\nOn mobile devices, the available computing and memory resources change dynamically depending on other running applications.\nDue to the high computation and memory demands of SR models, it is essential to adapt the model according to available resources to harvest the best possible model performance while maintaining quality of service (QoS), such as meeting a minimum framerate and avoiding interruptions. Nevertheless, there is no SR model or machine learning system that supports adaptive SR, and enabling adaptive SR model on mobile devices is challenging because adapting model can cause significant framerate drop or even service interruption. To address this challenge, we take an algorithm and system co-design approach and propose DySR that maintains QoS while maximizing the model performance. During the training stage, DySR employs an adaption-aware one-shot Neural Architecture Search to produce sub-graphs that share kernel operation weights for low model adaption overhead while striking a balance between performance and framerate. During the inference stage, an incremental model adaption method is developed for further reducing the model adaption overhead. We evaluate on a diverse set of hardware and datasets to show that DySR can generate models close to the Pareto frontier while maintaining a steady framerate throughput with a memory footprint of around 40\\% less compared to baseline methods.", "keywords": "super-resolution;quality of service;inference;deep learning;systems", "primary_area": "", "supplementary_material": "", "author": "Syed Zawad;Cheng Li;Zhewei Yao;Elton Zheng;Yuxiong He;Feng Yan", "authorids": "~Syed_Zawad1;~Cheng_Li10;~Zhewei_Yao1;elton.zheng@microsoft.com;~Yuxiong_He1;~Feng_Yan2", "gender": ";F;M;;;", "homepage": ";https://chengli.netlify.app/;;;;http://www.cs.uh.edu/~fyan/", "dblp": ";;195/2887;;https://dblp.org/pers/hd/h/He:Yuxiong;62/3960-1.html", "google_scholar": ";da9Vl6QAAAAJ;gpSeMjYAAAAJ;;SB3_eb0AAAAJ;iLE0_VAAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Syed_Zawad1;~Cheng_Li10;~Zhewei_Yao1;elton.zheng@microsoft.com;~Yuxiong_He1;~Feng_Yan2", "aff": ";Microsoft;Microsoft;;Microsoft;University of Houston", "aff_domain": ";microsoft.com;microsoft.com;;microsoft.com;uh.edu", "position": ";Researcher;Researcher;;Researcher;Associate Professor", "bibtex": "@inproceedings{\nzawad2023dysr,\ntitle={Dy{SR}: Adaptive Super-Resolution via Algorithm and System Co-design},\nauthor={Syed Zawad and Cheng Li and Zhewei Yao and Elton Zheng and Yuxiong He and Feng Yan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Pgtn4l6eKjv}\n}", "github": "", "project": "", "reviewers": "bgMe;bx3i;9DkJ;6CHN", "pdf_size": 1756751, "recommendation": "5;6;6;8", "confidence": "4;4;4;4", "correctness": "3;2;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "30;51;87;101", "wc_strength_and_weaknesses": "120;59;275;73", "wc_clarity_quality_novelty_and_reproducibility": "5;420;66;121", "wc_summary_review": "6;50;35;73", "wc_review": "161;580;463;368", "wc_reply_reviewers": "0;23;0;0", "wc_reply_authors": "194;760;195;19", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 67.25, 28.199069133572475 ], "wc_strength_and_weaknesses_avg": [ 131.75, 85.73615048507834 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 153.0, 159.51959127329783 ], "wc_summary_review_avg": [ 41.0, 24.320773014030618 ], "wc_review_avg": [ 393.0, 153.55617864482042 ], "wc_reply_reviewers_avg": [ 5.75, 9.959292143521045 ], "wc_reply_authors_avg": [ 292.0, 279.53801172649133 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.13245323570650439, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5265186923557832699&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=Pgtn4l6eKjv", "email": ";microsoft.com;microsoft.com;;microsoft.com;uh.edu", "author_num": 6, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Microsoft;University of Houston", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.uh.edu", "aff_unique_abbr": "Microsoft;UH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unbiased Supervised Contrastive Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11181", "id": "Ph5cJSfD2XN", "poster": "/media/PosterPDFs/ICLR%202023/11181.png?t=1682853524.0850468", "openreview": "https://openreview.net/forum?id=Ph5cJSfD2XN", "slides": "https://iclr.cc/virtual/2023/poster/11181", "video": "https://iclr.cc/virtual/2023/poster/11181", "author_site": "Carlo Alberto Barbano, Benoit Dufumier, Enzo Tartaglione, Marco Grangetto, Pietro Gori", "tldr": "We introduce FairKL, a debiasing regularization technique along with a metric learning theoretical framework and a novel formulation of the supervised contrastive loss, \u03f5-SupInfoNCE", "abstract": "Many datasets are biased, namely they contain easy-to-learn features that are highly correlated with the target class only in the dataset but not in the true underlying distribution of the data. For this reason, learning unbiased models from biased data has become a very relevant research topic in the last years. In this work, we tackle the problem of learning representations that are robust to biases. We first present a margin-based theoretical framework that allows us to clarify why recent contrastive losses (InfoNCE, SupCon, etc.) can fail when dealing with biased data. Based on that, we derive a novel formulation of the supervised contrastive loss ($\\epsilon$-SupInfoNCE), providing more accurate control of the minimal distance between positive and negative samples. \nFurthermore, thanks to our theoretical framework, we also propose FairKL, a new debiasing regularization loss, that works well even with extremely biased data. We validate the proposed losses on standard vision datasets including CIFAR10, CIFAR100, and ImageNet, and we assess the debiasing capability of FairKL with $\\epsilon$-SupInfoNCE, reaching state-of-the-art performance on a number of biased datasets, including real instances of biases \"in the wild\".", "keywords": "contrastive learning;debiasing;supervised learning;representation learning;deep learning;neural networks", "primary_area": "", "supplementary_material": "/attachment/96aacd506ad934b784291183b489a71607ef4408.zip", "author": "Carlo Alberto Barbano;Benoit Dufumier;Enzo Tartaglione;Marco Grangetto;Pietro Gori", "authorids": "~Carlo_Alberto_Barbano1;~Benoit_Dufumier1;~Enzo_Tartaglione1;~Marco_Grangetto1;~Pietro_Gori1", "gender": "M;;M;M;", "homepage": ";http://benoitdufumier.ml;https://perso.telecom-paristech.fr/etartaglione/index.html;https://www.di.unito.it/~mgrange/;https://perso.telecom-paristech.fr/pgori/index.html", "dblp": "262/6495;294/4585;170/0115;77/2058;134/9724", "google_scholar": "sq0-Os4AAAAJ;;https://scholar.google.it/citations?user=uKuvN64AAAAJ;Pt1gmQYAAAAJ;https://scholar.google.fr/citations?user=id9wCjsAAAAJ", "orcid": "0000-0001-9512-0440;0000-0002-8253-2363;0000-0003-4274-8298;0000-0002-2709-7864;", "linkedin": ";;enzo-tartaglione-490950a2;marco-grangetto-542aa31/;", "or_profile": "~Carlo_Alberto_Barbano1;~Benoit_Dufumier1;~Enzo_Tartaglione1;~Marco_Grangetto1;~Pietro_Gori1", "aff": "T\u00e9l\u00e9com Paris;EPFL - EPF Lausanne;T\u00e9l\u00e9com Paris;University of Turin;Telecom Paris", "aff_domain": "telecom-paris.fr;epfl.ch;telecom-paristech.fr;unito.it;telecom-paris.fr", "position": "PhD student;Postdoc;Associate Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nbarbano2023unbiased,\ntitle={Unbiased Supervised Contrastive Learning},\nauthor={Carlo Alberto Barbano and Benoit Dufumier and Enzo Tartaglione and Marco Grangetto and Pietro Gori},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Ph5cJSfD2XN}\n}", "github": "", "project": "", "reviewers": "BLxm;DYGt;pTc7", "pdf_size": 2710886, "recommendation": "6;6;8", "confidence": "4;3;3", "correctness": "3;3;4", "technical_novelty": "3;2;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "80;132;118", "wc_strength_and_weaknesses": "426;263;187", "wc_clarity_quality_novelty_and_reproducibility": "75;107;67", "wc_summary_review": "61;48;35", "wc_review": "642;550;407", "wc_reply_reviewers": "154;0;0", "wc_reply_authors": "1260;1220;437", "reply_reviewers": "3;0;0", "reply_authors": "3;2;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 110.0, 21.96967607104544 ], "wc_strength_and_weaknesses_avg": [ 292.0, 99.70289196741821 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.0, 17.281975195754296 ], "wc_summary_review_avg": [ 48.0, 10.614455552060438 ], "wc_review_avg": [ 533.0, 96.68850328072446 ], "wc_reply_reviewers_avg": [ 51.333333333333336, 72.59629620181887 ], "wc_reply_authors_avg": [ 972.3333333333334, 378.88989898855255 ], "reply_reviewers_avg": [ 1.0, 1.4142135623730951 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8315874745482602698&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 18, "pdf": "https://openreview.net/pdf?id=Ph5cJSfD2XN", "email": "telecom-paris.fr;epfl.ch;telecom-paristech.fr;unito.it;telecom-paris.fr", "author_num": 5, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "T\u00e9l\u00e9com Paris;EPFL;University of Turin;Telecom Paris", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.telecom-paris.fr;https://www.epfl.ch;https://www.unito.it;https://www.telecom-paris.fr", "aff_unique_abbr": "T\u00e9l\u00e9com Paris;EPFL;UNITO;Telecom Paris", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;1;0;2;0", "aff_country_unique": "France;Switzerland;Italy" }, { "title": "Choreographer: Learning and Adapting Skills in Imagination", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11101", "id": "PhkWyijGi5b", "poster": "/media/PosterPDFs/ICLR%202023/11101.png?t=1681856249.9119952", "openreview": "https://openreview.net/forum?id=PhkWyijGi5b", "slides": "https://iclr.cc/virtual/2023/poster/11101", "video": "https://iclr.cc/virtual/2023/poster/11101", "author_site": "Pietro Mazzaglia, Tim Verbelen, Bart Dhoedt, Alexandre Lacoste, Sai Rajeswar", "tldr": "Choreographer: a model-based agent that discovers and learns unsupervised skills in latent imagination, and it's able to efficiently coordinate and adapt the skills to solve downstream tasks.", "abstract": "Unsupervised skill learning aims to learn a rich repertoire of behaviors without external supervision, providing artificial agents with the ability to control and influence the environment. However, without appropriate knowledge and exploration, skills may provide control only over a restricted area of the environment, limiting their applicability. Furthermore, it is unclear how to leverage the learned skill behaviors for adapting to downstream tasks in a data-efficient manner. We present Choreographer, a model-based agent that exploits its world model to learn and adapt skills in imagination. Our method decouples the exploration and skill learning processes, being able to discover skills in the latent state space of the model. During adaptation, the agent uses a meta-controller to evaluate and adapt the learned skills efficiently by deploying them in parallel in imagination. Choreographer is able to learn skills both from offline data, and by collecting data simultaneously with an exploration policy. The skills can be used to effectively adapt to downstream tasks, as we show in the URL benchmark, where we outperform previous approaches from both pixels and states inputs. The skills also explore the environment thoroughly, finding sparse rewards more frequently, as shown in goal-reaching tasks from the DMC Suite and Meta-World. \nProject website: https://skillchoreographer.github.io/", "keywords": "unsupervised reinforcement learning;skill learning;world models", "primary_area": "", "supplementary_material": "", "author": "Pietro Mazzaglia;Tim Verbelen;Bart Dhoedt;Alexandre Lacoste;Sai Rajeswar", "authorids": "~Pietro_Mazzaglia1;~Tim_Verbelen1;~Bart_Dhoedt1;~Alexandre_Lacoste1;~Sai_Rajeswar2", "gender": ";M;M;M;M", "homepage": "https://mazpie.github.io/;https://scholar.google.be/citations?user=G86XVPEAAAAJ;;;https://sairajeswar.com/", "dblp": "266/6084;71/8853;39/211;59/6239.html;159/2116", "google_scholar": "c-PYVTgAAAAJ;https://scholar.google.be/citations?user=G86XVPEAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.ca/citations?user=h-sqIigAAAAJ", "orcid": "0000-0003-3319-5986;;0000-0002-7271-7479;;", "linkedin": "pietromazzaglia/;;;;sairajeswar/", "or_profile": "~Pietro_Mazzaglia1;~Tim_Verbelen1;~Bart_Dhoedt1;~Alexandre_Lacoste1;~sai_rajeswar_mudumba1", "aff": "Ghent University;Ghent University;Ghent University;ServiceNow;ServiceNow", "aff_domain": "ugent.be;ugent.be;ugent.be;servicenow.com;servicenow.com", "position": "PhD student;Postdoc;Full Professor;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nmazzaglia2023choreographer,\ntitle={Choreographer: Learning and Adapting Skills in Imagination},\nauthor={Pietro Mazzaglia and Tim Verbelen and Bart Dhoedt and Alexandre Lacoste and Sai Rajeswar},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PhkWyijGi5b}\n}", "github": "", "project": "", "reviewers": "2p4W;g6c7;LVu4;eUdn", "pdf_size": 12463387, "recommendation": "6;6;8;8", "confidence": "4;4;4;3", "correctness": "4;4;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "0;3;3;3", "wc_summary_paper": "94;93;129;107", "wc_strength_and_weaknesses": "352;131;182;133", "wc_clarity_quality_novelty_and_reproducibility": "29;36;165;16", "wc_summary_review": "68;69;29;38", "wc_review": "543;329;505;294", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "838;322;1041;371", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;2;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 105.75, 14.515078366994786 ], "wc_strength_and_weaknesses_avg": [ 199.5, 90.38390343418456 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.5, 60.18513105410671 ], "wc_summary_review_avg": [ 51.0, 17.790446874657196 ], "wc_review_avg": [ 417.75, 107.80856876890631 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 643.0, 305.55441413928224 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=622681183421571708&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=PhkWyijGi5b", "email": "ugent.be;ugent.be;ugent.be;servicenow.com;servicenow.com", "author_num": 5, "aff_unique_index": "0;0;0;1;1", "aff_unique_norm": "Ghent University;ServiceNow", "aff_unique_dep": ";", "aff_unique_url": "https://www.ugent.be/en;https://www.servicenow.com", "aff_unique_abbr": "UGent;ServiceNow", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1", "aff_country_unique": "Belgium;United States" }, { "id": "Pi5LI8sJYYz", "title": "Lossless Filter Pruning via Adaptive Clustering for Convolutional Neural Networks", "track": "main", "status": "Reject", "tldr": "We propose a clustering-based filter pruning method which uses equivalence to remove redundancy. Our solution can omit fine-tuning and achieve the best trade-off between performance and complexity compared with other algorithms.. ", "abstract": "The filter pruning method introduces structural sparsity by removing selected filters and is thus particularly effective for reducing complexity. However, previous works face two common limitations. 1) The pruned filters are prevented from contributing to the final outputs, resulting in performance degradation, especially when it comes to a large pruning rate. 2) To recover accuracy, the time-consuming fine-tuning step is required. The cost in time and the need for training data make it difficult to deploy in real-world scenarios. To address the aforementioned limitations, we propose a novel filter pruning method called Cluster Pruning (CP). Our CP reconstructs the redundant filters from the perspective of similarity and removes them equivalently using the proposed channel addition operation in a lossless manner. Pruning in such a way allows CP to preserve as many learned features as possible while getting rid of the need for fine-tuning. Specifically, each filter is first distinguished by clustering and then reconstructed as the centroid to which it belongs. Filters are then updated to eliminate the effect caused by mistakenly selected. After convergence, CP can equivalently remove identical filters through the proposed channel addition operation. The strategies for adjusting the pruning rate and the adaptive coefficient for clustering make our CP even smoother and more efficient. Extensive experiments on CIFAR-10 and ImageNet datasets show that our method achieves the best trade-off between performance and complexity compared with other state-of-the-art algorithms.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Tao Niu;Yinglei Teng;Panpan Zou;Yiding Liu", "authorids": "~Tao_Niu1;~Yinglei_Teng1;~Panpan_Zou1;~Yiding_Liu2", "gender": "M;F;F;", "homepage": ";https://teacher.bupt.edu.cn/tengyinglei/zh_CN/index.htm;;", "dblp": ";11/8605.html;;", "google_scholar": ";https://scholar.google.com.hk/citations?user=KLHNecUAAAAJ;;", "orcid": " 0000-0001-6149-2908;0000-0002-7170-4764; 0000-0002-7041-6242;0000-0002-8739-8482", "linkedin": ";;;", "or_profile": "~Tao_Niu1;~Yinglei_Teng1;~Panpan_Zou1;~Yiding_Liu2", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications", "aff_domain": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "position": "PhD student;Full Professor;MS student;MS student", "bibtex": "@misc{\nniu2023lossless,\ntitle={Lossless Filter Pruning via Adaptive Clustering for Convolutional Neural Networks},\nauthor={Tao Niu and Yinglei Teng and Panpan Zou and Yiding Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=Pi5LI8sJYYz}\n}", "github": "", "project": "", "reviewers": "hd69;ddXD;8kEQ;uzoq", "site": "https://openreview.net/forum?id=Pi5LI8sJYYz", "pdf_size": 1745437, "recommendation": "5;5;5;5", "confidence": "5;4;3;4", "correctness": "3;2;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "56;81;64;79", "wc_strength_and_weaknesses": "261;490;100;374", "wc_clarity_quality_novelty_and_reproducibility": "30;44;22;38", "wc_summary_review": "50;43;59;36", "wc_review": "397;658;245;527", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 70.0, 10.41633332799983 ], "wc_strength_and_weaknesses_avg": [ 306.25, 143.99717879180827 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.5, 8.2915619758885 ], "wc_summary_review_avg": [ 47.0, 8.514693182963201 ], "wc_review_avg": [ 456.75, 153.17045243779884 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10537932248021890600&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications", "aff_unique_dep": "", "aff_unique_url": "http://www.bupt.edu.cn/", "aff_unique_abbr": "BUPT", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Planckian Jitter: countering the color-crippling effects of color jitter on self-supervised training", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11852", "id": "Pia70sP2Oi1", "poster": "/media/PosterPDFs/ICLR%202023/11852.png?t=1680529042.0384202", "openreview": "https://openreview.net/forum?id=Pia70sP2Oi1", "slides": "https://iclr.cc/virtual/2023/poster/11852", "video": "https://iclr.cc/virtual/2023/poster/11852", "author_site": "Simone Zini, Alex Gomez-Villa, Marco Buzzelli, Bart\u0142omiej Twardowski, Andrew Bagdanov, Joost van de Weijer", "tldr": "", "abstract": "Several recent works on self-supervised learning are trained by mapping different augmentations of the same image to the same feature representation. The data augmentations used are of crucial importance to the quality of learned feature representations. In this paper, we analyze how the color jitter traditionally used in data augmentation negatively impacts the quality of the color features in learned feature representations. To address this problem, we propose a more realistic, physics-based color data augmentation - which we call Planckian Jitter - that creates realistic variations in chromaticity and produces a model robust to illumination changes that can be commonly observed in real life, while maintaining the ability to discriminate image content based on color information.\nExperiments confirm that such a representation is complementary to the representations learned with the currently-used color jitter augmentation and that a simple concatenation leads to significant performance gains on a wide range of downstream datasets. \nIn addition, we present a color sensitivity analysis that documents the impact of different training methods on model neurons and shows that the performance of the learned features is robust with respect to illuminant variations.\nOfficial code available at: https://github.com/TheZino/PlanckianJitter", "keywords": "Contrastive Learning;Self-Supervised Learning;Color Features;Illuminant Invariance", "primary_area": "", "supplementary_material": "/attachment/d3265be54a33dc989b8234333f00519bd697a617.zip", "author": "Simone Zini;Alex Gomez-Villa;Marco Buzzelli;Bart\u0142omiej Twardowski;Andrew D. Bagdanov;Joost van de weijer", "authorids": "~Simone_Zini1;~Alex_Gomez-Villa1;~Marco_Buzzelli1;~Bart\u0142omiej_Twardowski1;~Andrew_D._Bagdanov2;~Joost_van_de_weijer3", "gender": "M;F;M;M;M;M", "homepage": "http://www.ivl.disco.unimib.it/people/simone-zini/;https://sites.google.com/view/alex-gomez-villa/home?authuser=1;http://www.ivl.disco.unimib.it/people/marco-buzzelli/;;http://www.micc.unifi.it/bagdanov;http://lamp.cvc.uab.es/", "dblp": "238/0054;309/8894;167/1136;156/6628;64/3935;67/3379", "google_scholar": "https://scholar.google.it/citations?user=eo6mHZAAAAAJ;https://scholar.google.es/citations?user=A2dhwNgAAAAJ;https://scholar.google.it/citations?user=kSFvKBoAAAAJ;https://scholar.google.pl/citations?user=8yywECgAAAAJ;_Fk4YUcAAAAJ;https://scholar.google.es/citations?user=Gsw2iUEAAAAJ", "orcid": "0000-0002-8505-1581;0000-0003-0469-3425;0000-0003-1138-3345;0000-0003-2117-8679;;0000-0002-9656-9706", "linkedin": ";;marcobuzzelli/;bartlomiejtwardowski/;;", "or_profile": "~Simone_Zini1;~Alex_Gomez-Villa1;~Marco_Buzzelli1;~Bart\u0142omiej_Twardowski1;~Andrew_D._Bagdanov2;~Joost_van_de_Weijer1", "aff": "University of Milan - Bicocca;Computer Vision Center, Universitat Aut\u00f3noma de Barcelona;University of Milano - Bicocca;Computer Vision Center, Universitat Aut\u00f2noma de Barcelona;Universit\u00e0 degli Studi di Firenze;Computer Vision Center, Universitat Aut\u00f3noma de Barcelona", "aff_domain": "unimib.it;cvc.uab.es;unimib.it;cvc.uab.es;unifi.it;cvc.uab.es", "position": "Postdoc;PhD student;Postdoc;Postdoc;Associate Professor;Researcher", "bibtex": "@inproceedings{\nzini2023planckian,\ntitle={Planckian Jitter: countering the color-crippling effects of color jitter on self-supervised training},\nauthor={Simone Zini and Alex Gomez-Villa and Marco Buzzelli and Bart{\\l}omiej Twardowski and Andrew D. Bagdanov and Joost van de weijer},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Pia70sP2Oi1}\n}", "github": "", "project": "", "reviewers": "9p9z;w1qF;kWdv;HQBn", "pdf_size": 3160992, "recommendation": "3;6;8;8", "confidence": "5;4;3;4", "correctness": "2;3;4;4", "technical_novelty": "1;3;3;3", "empirical_novelty": "1;3;3;3", "wc_summary_paper": "32;47;95;118", "wc_strength_and_weaknesses": "171;228;227;300", "wc_clarity_quality_novelty_and_reproducibility": "12;23;48;32", "wc_summary_review": "9;26;43;36", "wc_review": "224;324;413;486", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "833;1158;620;464", "reply_reviewers": "0;0;0;0", "reply_authors": "4;4;2;2", "recommendation_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 73.0, 34.878360053190576 ], "wc_strength_and_weaknesses_avg": [ 231.5, 45.78482281280556 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.75, 13.179055353097201 ], "wc_summary_review_avg": [ 28.5, 12.776932339180638 ], "wc_review_avg": [ 361.75, 98.06216140795593 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 768.75, 260.1166805493258 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.8638684255813602, "corr_recommendation_correctness": 0.9945577827230725, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=906421005678918903&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=Pia70sP2Oi1", "email": "unimib.it;cvc.uab.es;unimib.it;cvc.uab.es;unifi.it;cvc.uab.es", "author_num": 6, "aff_unique_index": "0;1;2;3;4;1", "aff_unique_norm": "University of Milan;Universitat Aut\u00f3noma de Barcelona;University of Milano - Bicocca;Universitat Aut\u00f2noma de Barcelona;University of Florence", "aff_unique_dep": ";Computer Vision Center;;Computer Vision Center;", "aff_unique_url": "https://www.unimib.it;https://www.uab.cat;https://www.unimib.it;https://www.uab.cat;https://www.unifi.it", "aff_unique_abbr": "UNIMIB;UAB;UNIMIB;UAB;UNIFI", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Bicocca;;Milano", "aff_country_unique_index": "0;1;0;1;0;1", "aff_country_unique": "Italy;Spain" }, { "id": "PjT1TJ62vJW", "title": "Exploring semantic information in disease: Simple Data Augmentation Techniques for Chinese Disease Normalization", "track": "main", "status": "Reject", "tldr": "A novel data augmentation method in NLP to address the problem of Chinese Disease Normalization.", "abstract": "Disease is a core concept in the medical field, and the task of normalizing disease names is the basis of all disease-related tasks. However, due to the multi-axis and multi-grain nature of disease names, incorrect information is often injected and harms the performance when using general text data augmentation techniques. To address the above problem, we propose a set of data augmentation techniques that work together as an augmented training task for disease normalization, which is called Disease Data Augmentation (DDA). Our data augmentation methods are based on both the clinical disease corpus and standard disease corpus derived from ICD-10 coding. Extensive experiments are conducted to show the effectiveness of our proposed methods. The results demonstrate that our method can have up to 3\\% performance gain compared to non-augmented counterparts, and they can work even better on smaller datasets.", "keywords": "Data Augmentation;Medicine;Disease;Disease Normalization;Deep Learning;Natural Language Processing;Representation Learning", "primary_area": "", "supplementary_material": "", "author": "Wenqian Cui;Xiangling Fu;Shaohui Liu;Xien Liu;Ji Wu", "authorids": "~Wenqian_Cui1;fuxiangling@bupt.edu.cn;shaohuiliu@mail.tsinghua.edu.cn;~Xien_Liu4;wuji_ee@mail.tsinghua.edu.cn", "gender": "M;;;M;", "homepage": "https://github.com/dreamtheater123;;;;", "dblp": "274/8687;;;;", "google_scholar": "eFBIgV0AAAAJ;;;WOJZUW8AAAAJ;", "orcid": "0009-0005-1615-7576;;;;", "linkedin": "https://www.linkedin.cn/incareer/in/%E6%96%87%E8%B0%A6-%E5%B4%94-8184b6211;;;;", "or_profile": "~Wenqian_Cui1;fuxiangling@bupt.edu.cn;shaohuiliu@mail.tsinghua.edu.cn;~Xien_Liu4;wuji_ee@mail.tsinghua.edu.cn", "aff": "Queen Mary, University of London;;; Tsinghua University;", "aff_domain": "qmul.ac.uk;;;mail.tsinghua.edu.cn;", "position": "MS student;;;Postdoc;", "bibtex": "@misc{\ncui2023exploring,\ntitle={Exploring semantic information in disease: Simple Data Augmentation Techniques for Chinese Disease Normalization},\nauthor={Wenqian Cui and Xiangling Fu and Shaohui Liu and Xien Liu and Ji Wu},\nyear={2023},\nurl={https://openreview.net/forum?id=PjT1TJ62vJW}\n}", "github": "", "project": "", "reviewers": "6zpd;Ujvh;ethR;iwv2", "site": "https://openreview.net/forum?id=PjT1TJ62vJW", "pdf_size": 534991, "recommendation": "1;3;3;3", "confidence": "5;3;3;5", "correctness": "1;2;2;3", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "51;45;53;34", "wc_strength_and_weaknesses": "114;181;54;345", "wc_clarity_quality_novelty_and_reproducibility": "314;107;77;48", "wc_summary_review": "13;34;82;83", "wc_review": "492;367;266;510", "wc_reply_reviewers": "31;0;66;0", "wc_reply_authors": "245;282;112;407", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 1.0 ], "correctness_avg": [ 2.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 45.75, 7.39509972887452 ], "wc_strength_and_weaknesses_avg": [ 173.5, 108.73017060595463 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 136.5, 104.58130808132016 ], "wc_summary_review_avg": [ 53.0, 30.422031490352513 ], "wc_review_avg": [ 408.75, 99.1246059260767 ], "wc_reply_reviewers_avg": [ 24.25, 27.224758952100935 ], "wc_reply_authors_avg": [ 261.5, 105.13443774520317 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:E47vKXLXppAJ:scholar.google.com/&scioq=Exploring+semantic+information+in+disease:+Simple+Data+Augmentation+Techniques+for+Chinese+Disease+Normalization&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Queen Mary, University of London;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.qmul.ac.uk;https://www.tsinghua.edu.cn", "aff_unique_abbr": "QMUL;THU", "aff_campus_unique_index": "0", "aff_campus_unique": "London;", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;China" }, { "id": "Pk_di2bPAop", "title": "On the Adversarial Robustness against Natural Weather Perturbations", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Several algorithms are proposed to improve the robustness of deep neural networks against adversarial perturbations beyond $\\ell_p$ cases, i.e. weather perturbations. However, evaluations of existing robust training algorithms are over-optimistic. This is in part due to the lack of a standardized evaluation protocol across various robust training algorithms, leading to ad-hoc methods that test robustness on either random perturbations or the adversarial samples from generative models that are used for robust training, which is either uninformative of the worst case, or is heavily biased.\nIn this paper, we identify such evaluation bias in these existing works and propose the first standardized and fair evaluation that compares various robust training algorithms by using physics simulators for common adverse weather effects i.e. rain and snow. Additionally, our framework identified the lack of diversity in existing robust training algorithms. As a step to address this, we propose a light-weight generative adversarial network (GAN) with improved diverse weather effects controlled by latent codes that can be used in robust training.\nThe proposed robust training algorithm is evaluated on two streetview classification datasets (BIC\\_GSV, Places365), where it outperforms other robust training approaches based on generative models for worst-case adversarial rain and snow attacks.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yihan Wang;Yunhao Ba;Howard Chenyang Zhang;Huan Zhang;Achuta Kadambi;Stefano Soatto;Alex Wong;Cho-Jui Hsieh", "authorids": "~Yihan_Wang2;~Yunhao_Ba1;~Howard_Chenyang_Zhang1;~Huan_Zhang1;~Achuta_Kadambi2;~Stefano_Soatto1;~Alex_Wong2;~Cho-Jui_Hsieh1", "gender": "F;M;M;M;M;M;M;", "homepage": "https://yihanwang617.github.io;https://yhba-ucla.github.io/;https://howardzhang-cv.github.io/personal_website/;http://huan-zhang.com;http://visual.ee.ucla.edu;https://vision.cs.yale.edu/members/alex-wong/;http://web.cs.ucla.edu/~chohsieh/index.html;https://www.cs.ucla.edu/~soatto", "dblp": ";238/0340;63/5405;23/1797-1.html;;39/6537-1;14/2770;08/1262", "google_scholar": ";https://scholar.google.com/citations?hl=en;tBlMihEAAAAJ;LTa3GzEAAAAJ;;K9_XuM8AAAAJ;Wy89g4IAAAAJ;lH1PdF8AAAAJ", "orcid": ";0000-0001-8664-7195;;;;0000-0002-3157-6016;;0000-0003-2902-6362", "linkedin": ";;howard-zhang-8366b1147/;;achuta-kadambi/;;;stefano-soatto-5765aa6/", "or_profile": "~Yihan_Wang2;~Yunhao_Ba1;~Howard_Chenyang_Zhang1;~Huan_Zhang1;~Achuta_Kadambi2;~Alex_Wong2;~Cho-Jui_Hsieh1;~Stefano_Soatto2", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;Carnegie Mellon University;University of California, Los Angeles;Yale University;Amazon;UCLA Computer Science Department, University of California, Los Angeles", "aff_domain": "ucla.edu;ucla.edu;ucla.edu;cmu.edu;ucla.edu;yale.edu;amazon.com;cs.ucla.edu", "position": "PhD student;PhD student;PhD student;Postdoc;Assistant Professor;Assistant Professor;visiting scholar;Professor", "bibtex": "@misc{\nwang2023on,\ntitle={On the Adversarial Robustness against Natural Weather Perturbations},\nauthor={Yihan Wang and Yunhao Ba and Howard Chenyang Zhang and Huan Zhang and Achuta Kadambi and Stefano Soatto and Alex Wong and Cho-Jui Hsieh},\nyear={2023},\nurl={https://openreview.net/forum?id=Pk_di2bPAop}\n}", "github": "", "project": "", "reviewers": "NKF6;Uy2R;1qHh;Dvuo", "site": "https://openreview.net/forum?id=Pk_di2bPAop", "pdf_size": 3121155, "recommendation": "3;5;5;5", "confidence": "5;4;3;4", "correctness": "2;4;3;3", "technical_novelty": "1;2;2;2", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "89;42;17;229", "wc_strength_and_weaknesses": "233;74;274;281", "wc_clarity_quality_novelty_and_reproducibility": "51;3;5;263", "wc_summary_review": "45;24;5;32", "wc_review": "418;143;301;805", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 94.25, 81.97979934105719 ], "wc_strength_and_weaknesses_avg": [ 215.5, 83.72723571216238 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 80.5, 107.10158728982498 ], "wc_summary_review_avg": [ 26.5, 14.5 ], "wc_review_avg": [ 416.75, 244.47737625391844 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NCyZgJk5IcgJ:scholar.google.com/&scioq=On+the+Adversarial+Robustness+against+Natural+Weather+Perturbations&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0;2;3;0", "aff_unique_norm": "University of California, Los Angeles;Carnegie Mellon University;Yale University;Amazon", "aff_unique_dep": ";;;Amazon.com, Inc.", "aff_unique_url": "https://www.ucla.edu;https://www.cmu.edu;https://www.yale.edu;https://www.amazon.com", "aff_unique_abbr": "UCLA;CMU;Yale;Amazon", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Pkb5FA5AjqP", "title": "Automatically Auditing Large Language Models via Discrete Optimization", "track": "main", "status": "Reject", "tldr": "We cast auditing as a discrete optimization problem, and demonstrate how this reduction uncovers large language model failure modes. ", "abstract": "Auditing large language models for unexpected behaviors is critical to preempt catastrophic deployments, yet remains challenging. In this work, we cast auditing as a discrete optimization problem, where we automatically search for input-output pairs that match a desired target behavior. For example, we might aim to find non-toxic input that starts with ``Barack Obama'' and maps to a toxic output. Our optimization problem is difficult to solve as the set of feasible points is sparse, the space is discrete, and the language models we audit are non-linear and high-dimensional. To combat these challenges, we introduce a discrete optimization algorithm, ARCA, that is tailored to autoregressive language models. We demonstrate how our approach can: uncover derogatory completions about celebrities (e.g. ``Barack Obama is a legalized unborn'' $\\rightarrow$ ``child murderer'), produce French inputs that complete to English outputs, and find inputs that generate a specific name. Our work offers a promising new tool to uncover models' failure-modes before deployment. $\\textbf{Trigger Warning: This paper contains model behavior that can be offensive in nature.}$", "keywords": "large language models;safety;auditing;robustness", "primary_area": "", "supplementary_material": "", "author": "Erik Jones;Anca Dragan;Aditi Raghunathan;Jacob Steinhardt", "authorids": "~Erik_Jones3;~Anca_Dragan1;~Aditi_Raghunathan1;~Jacob_Steinhardt1", "gender": "M;F;F;", "homepage": "http://people.eecs.berkeley.edu/~erjones/;http://www.ancadragan.com/;https://www.cs.cmu.edu/~aditirag/;", "dblp": "264/5304;;166/1409;35/10625", "google_scholar": "_-CU2CsAAAAJ;;Ch9iRwQAAAAJ;", "orcid": ";;;", "linkedin": "erik-jones-879239133/;;;", "or_profile": "~Erik_Jones3;~Anca_Dragan1;~Aditi_Raghunathan1;~Jacob_Steinhardt1", "aff": "University of California, Berkeley;University of California, Berkeley;Carnegie Mellon University;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;cmu.edu;berkeley.edu", "position": "PhD student;Associate Professor;Assistant Professor;Assistant Professor", "bibtex": "@misc{\njones2023automatically,\ntitle={Automatically Auditing Large Language Models via Discrete Optimization},\nauthor={Erik Jones and Anca Dragan and Aditi Raghunathan and Jacob Steinhardt},\nyear={2023},\nurl={https://openreview.net/forum?id=Pkb5FA5AjqP}\n}", "github": "", "project": "", "reviewers": "ouPy;os8d;froq;hyXR", "site": "https://openreview.net/forum?id=Pkb5FA5AjqP", "pdf_size": 471102, "recommendation": "5;6;6;8", "confidence": "3;4;4;4", "correctness": "3;4;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "37;88;55;37", "wc_strength_and_weaknesses": "130;252;72;19", "wc_clarity_quality_novelty_and_reproducibility": "113;56;45;25", "wc_summary_review": "57;61;256;14", "wc_review": "337;457;428;95", "wc_reply_reviewers": "0;15;0;0", "wc_reply_authors": "840;615;426;37", "reply_reviewers": "0;1;0;0", "reply_authors": "2;2;2;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 54.25, 20.825165065372232 ], "wc_strength_and_weaknesses_avg": [ 118.25, 86.62671354726555 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.75, 32.69078616368839 ], "wc_summary_review_avg": [ 97.0, 93.6295893401226 ], "wc_review_avg": [ 329.25, 142.3066670960992 ], "wc_reply_reviewers_avg": [ 3.75, 6.49519052838329 ], "wc_reply_authors_avg": [ 479.5, 294.52886106458226 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.6622661785325219, "corr_recommendation_correctness": 0.6882472016116854, "gs_citation": 177, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4229261656919840555&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of California, Berkeley;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.cmu.edu", "aff_unique_abbr": "UC Berkeley;CMU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Contextual Convolutional Networks", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12124", "id": "PldynS56bN", "poster": "", "openreview": "https://openreview.net/forum?id=PldynS56bN", "slides": "https://iclr.cc/virtual/2023/poster/12124", "video": "https://iclr.cc/virtual/2023/poster/12124", "author_site": "Shuxian Liang, Xu Shen, Tongliang Liu, Xian-Sheng Hua", "tldr": "In this paper, we propose to augment potential category memberships as contextual priors in the convolution for contextualized representation learning.", "abstract": "This paper presents a new Convolutional Neural Network, named Contextual Convolutional Network, that capably serves as a general-purpose backbone for visual recognition. Most existing convolutional backbones follow the representation-to-classification paradigm, where representations of the input are firstly generated by category-agnostic convolutional operations, and then fed into classifiers for specific perceptual tasks (e.g., classification and segmentation). In this paper, we deviate from this classic paradigm and propose to augment potential category memberships as contextual priors in the convolution for contextualized representation learning. Specifically, top-k likely classes from the preceding stage are encoded as a contextual prior vector. Based on this vector and the preceding features, offsets for spatial sampling locations and kernel weights are generated to modulate the convolution operations. The new convolutions can readily replace their plain counterparts in existing CNNs and can be easily trained end-to-end by standard back-propagation without additional supervision. The qualities of Contextual Convolutional Networks make it compatible with a broad range of vision tasks and boost the state-of-the-art architecture ConvNeXt-Tiny by 1.8% on top-1 accuracy of ImageNet classification. The superiority of the proposed model reveals the potential of contextualized representation learning for vision tasks. Code is available at: \\url{https://github.com/liang4sx/contextual_cnn}.\n", "keywords": "Convolutional Neural Networks", "primary_area": "", "supplementary_material": "/attachment/1df2ca6e9b0410dbacf29cc7d6637c23d65e2c83.zip", "author": "Shuxian Liang;Xu Shen;Tongliang Liu;Xian-Sheng Hua", "authorids": "~Shuxian_Liang1;~Xu_Shen1;~Tongliang_Liu1;~Xian-Sheng_Hua1", "gender": "M;M;M;M", "homepage": ";;https://tongliang-liu.github.io/;", "dblp": "269/4581;09/10130-1.html;150/6667;56/5807-1", "google_scholar": ";38jwGs8AAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;https://scholar.google.co.uk/citations?user=6G-l4o0AAAAJ", "orcid": ";;;", "linkedin": ";;;xshua", "or_profile": "~Shuxian_Liang1;~Xu_Shen1;~Tongliang_Liu1;~Xian-Sheng_Hua1", "aff": "Alibaba Group;Alibaba Group;University of Sydney;Terminus Group", "aff_domain": "alibaba-inc.com;alibaba-inc.com;sydney.edu.au;tslsmart.com", "position": "Intern;Researcher;Lecturer;Principal Researcher", "bibtex": "@inproceedings{\nliang2023contextual,\ntitle={Contextual Convolutional Networks},\nauthor={Shuxian Liang and Xu Shen and Tongliang Liu and Xian-Sheng Hua},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PldynS56bN}\n}", "github": "", "project": "", "reviewers": "CrqV;y6tk;2om9;oBzi", "pdf_size": 2503999, "recommendation": "6;6;8;8", "confidence": "4;3;3;4", "correctness": "3;4;4;1", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;4", "wc_summary_paper": "50;85;178;84", "wc_strength_and_weaknesses": "191;117;123;122", "wc_clarity_quality_novelty_and_reproducibility": "16;38;134;104", "wc_summary_review": "9;33;119;40", "wc_review": "266;273;554;350", "wc_reply_reviewers": "80;0;0;0", "wc_reply_authors": "739;477;502;524", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 1.224744871391589 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 99.25, 47.59923843928598 ], "wc_strength_and_weaknesses_avg": [ 138.25, 30.53993287484437 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.0, 47.843494855622744 ], "wc_summary_review_avg": [ 50.25, 41.324175732856425 ], "wc_review_avg": [ 360.75, 116.33867585631187 ], "wc_reply_reviewers_avg": [ 20.0, 34.64101615137755 ], "wc_reply_authors_avg": [ 560.5, 104.38989414689527 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.40824829046386296, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17840581016494077570&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=PldynS56bN", "email": "alibaba-inc.com;alibaba-inc.com;sydney.edu.au;tslsmart.com", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Alibaba Group;University of Sydney;Terminus Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.alibaba.com;https://www.sydney.edu.au;", "aff_unique_abbr": "Alibaba;USYD;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;Australia;" }, { "id": "Plr5l7r0jY6", "title": "Language model with Plug-in Knowldge Memory", "track": "main", "status": "Withdraw", "tldr": "we propose a pre-training framework to decouple the knowledge storage from PLM", "abstract": "Large-scale pre-trained language models(PLM) have made impressive results in a wide range of NLP tasks and it has been revealed that one of the key factors to their success is the parameters of these models implicitly learn various types of knowledge in the pre-training corpus. However, encoding knowledge implicitly in the model parameters has two fundamental drawbacks. First, the knowledge is neither editable nor scalable once the model is trained, which is especially problematic in that knowledge is consistently evolving. Second, it lacks interpretability and prevents us from understanding what kind of knowledge PLM needs to solve certain task. In this paper, we introduce PlugLM, a pre-training model with differentiable plug-in memory(DPM). The key intuition behind is to decouple the knowledge storage from model parameters with an editable and scalable key-value memory and leverage knowledge in an explainable manner by knowledge retrieval in the DPM. We conduct extensive experiments under various settings to justify this design choice. In domain adaptation setting, PlugLM could be easily adapted to different domains with plugable in-domain memory---obtaining 3.95 F1 improvements across four domains, without any in-domain training. PlugLM could also keep absorbing new knowledge after pre-training is done by knowledge updating operation in the DPM without re-training. Finally, we show that by incorporating training samples into DPM with knowledge prompting, PlugLM could further be improved by the instruction of in-task knowledge.", "keywords": "pre-training;language model;memory", "primary_area": "", "supplementary_material": "", "author": "Xin Cheng;Yankai Lin;Dongyan Zhao;Rui Yan", "authorids": "~Xin_Cheng2;~Yankai_Lin1;~Dongyan_Zhao2;~Rui_Yan2", "gender": ";M;M;M", "homepage": ";https://linyankai.github.io/;https://www.wict.pku.edu.cn/zhaodongyan/en/;https://gsai.ruc.edu.cn/english/ruiyan", "dblp": ";161/0001.html;63/1870;19/2405-1", "google_scholar": ";https://scholar.google.com.hk/citations?user=j8K1FqEAAAAJ;lhR8-68AAAAJ;eLw6g-UAAAAJ", "orcid": ";0000-0002-9182-8158;;0000-0002-3356-6823", "linkedin": ";;;", "or_profile": "~Xin_Cheng2;~Yankai_Lin1;~Dongyan_Zhao2;~Rui_Yan2", "aff": ";Renmin University of China;Peking University;Renmin University of China", "aff_domain": ";ruc.edu.cn;pku.edu.cn;ruc.edu.cn", "position": ";Assistant Professor;Full Professor;Associate Professor", "bibtex": "@misc{\ncheng2023language,\ntitle={Language model with Plug-in Knowldge Memory},\nauthor={Xin Cheng and Yankai Lin and Dongyan Zhao and Rui Yan},\nyear={2023},\nurl={https://openreview.net/forum?id=Plr5l7r0jY6}\n}", "github": "", "project": "", "reviewers": "Ry8F;NzhP;QFpb", "site": "https://openreview.net/forum?id=Plr5l7r0jY6", "pdf_size": 1141566, "recommendation": "5;6;6", "confidence": "4;4;3", "correctness": "2;3;3", "technical_novelty": "2;3;4", "empirical_novelty": "2;3;3", "wc_summary_paper": "94;38;311", "wc_strength_and_weaknesses": "418;247;212", "wc_clarity_quality_novelty_and_reproducibility": "144;372;146", "wc_summary_review": "28;39;69", "wc_review": "684;696;738", "wc_reply_reviewers": "0;69;77", "wc_reply_authors": "922;558;972", "reply_reviewers": "0;1;1", "reply_authors": "3;2;4", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 147.66666666666666, 117.73510757440006 ], "wc_strength_and_weaknesses_avg": [ 292.3333333333333, 90.00123455943381 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 220.66666666666666, 107.01194118207349 ], "wc_summary_review_avg": [ 45.333333333333336, 17.326921891156037 ], "wc_review_avg": [ 706.0, 23.15167380558045 ], "wc_reply_reviewers_avg": [ 48.666666666666664, 34.56716489515576 ], "wc_reply_authors_avg": [ 817.3333333333334, 184.50895792767233 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QoFOSLylGXAJ:scholar.google.com/&scioq=Language+model+with+Plug-in+Knowldge+Memory&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Renmin University of China;Peking University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ruc.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "RUC;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Real-Time Image Demoir$\\acute{e}$ing on Mobile Devices", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12202", "id": "PmP_sf3JkrH", "poster": "", "openreview": "https://openreview.net/forum?id=PmP_sf3JkrH", "slides": "https://iclr.cc/virtual/2023/poster/12202", "video": "https://iclr.cc/virtual/2023/poster/12202", "author_site": "Yuxin Zhang, Mingbao Lin, Xunchao Li, Han Liu, Guozhi Wang, Fei Chao, Ren Shuai, Yafei Wen, Xiaoxin Chen, Rongrong Ji", "tldr": "This paper presents a dynamic demoireing acceleration method towards a real-time image demoireing on mobile devices.", "abstract": "Moir$\\acute{e}$ patterns appear frequently when taking photos of digital screens, drastically degrading the image quality. Despite the advance of CNNs in image demoir$\\acute{e}$ing, existing networks are with heavy design, causing massive computation burden for mobile devices. In this paper, we launch the first study on accelerating demoir$\\acute{e}$ing networks and propose a dynamic demoir$\\acute{e}$ing acceleration method (DDA) towards a real-time deployment on mobile devices. Our stimulus stems from a simple-yet-universal fact that moir${\\'e}$ patterns often unbalancedly distribute across an image. Consequently, excessive computation is wasted upon non-moir$\\acute{e}$ areas. Therefore, we reallocate computation costs in proportion to the complexity of image patches. In order to achieve this aim, we measure the complexity of an image patch by a novel moir$\\acute{e}$ prior that considers both colorfulness and frequency information of moir$\\acute{e}$ patterns. Then, we restore higher-complex image patches using larger networks and the lower-complex ones are assigned with smaller networks to relieve the computation burden. At last, we train all networks in a parameter-shared supernet paradigm to avoid additional parameter burden. Extensive experiments on several benchmarks demonstrate the efficacy of our DDA. In addition, the acceleration evaluated on the VIVO X80 Pro smartphone equipped with the chip of Snapdragon 8 Gen 1 also shows that our method can drastically reduce the inference time, leading to a real-time image demoir$\\acute{e}$ing on mobile devices. Source codes and models are released at https://github.com/zyxxmu/DDA.\n", "keywords": "Image Demoireing;Network Acceleration", "primary_area": "", "supplementary_material": "/attachment/4b3b0e9fc3865df993db29501eabe91dd625b326.zip", "author": "Yuxin Zhang;Mingbao Lin;Xunchao Li;Han Liu;Guozhi Wang;Fei Chao;Ren Shuai;Yafei Wen;Xiaoxin Chen;Rongrong Ji", "authorids": "~Yuxin_Zhang3;~Mingbao_Lin1;~Xunchao_Li1;~Han_Liu14;~Guozhi_Wang1;~Fei_Chao1;~Ren_Shuai1;~Yafei_Wen1;~Xiaoxin_Chen1;~Rongrong_Ji5", "gender": ";M;M;F;M;M;M;M;M;M", "homepage": ";http://lmb.bjbxit.cn/;;https://github.com/liuhan26;;https://cogsci.xmu.edu.cn/info/1034/1249.htm;https://github.com/rkshuai;;;http://mac.xmu.edu.cn/rrji-en.html", "dblp": "03/7346-2;211/5903;;;;118/5221-1.html;;145/8210.html;17/2084-1;86/5681", "google_scholar": "6IeJLJoAAAAJ;Dp3L1bsAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;srS6rNMAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;", "orcid": "0000-0002-4409-7030;0000-0003-1764-1894;;;;;;;;", "linkedin": ";mingbao-lin-890444105/;;;https://www.linkedin.cn/incareer/in/ACoAAD8TNyYB8UfHgUZ8evBg7RI0yUJrCwU5kf4;;;;xiaoxinchen/;", "or_profile": "~Yuxin_Zhang3;~Mingbao_Lin1;~Xunchao_Li1;~Han_Liu14;~Guozhi_Wang1;~Fei_Chao1;~Ren_Shuai1;~Yafei_Wen1;~Xiaoxin_Chen1;~Rongrong_Ji5", "aff": "Xiamen University;Xiamen University;Xiamen University;;;Xiamen University;vivo;vivo;vivo AI Lab;Xiamen University", "aff_domain": "xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;;;xmu.edu.cn;vivo.com;vivo.com;vivo.com;xmu.edu.cn", "position": "PhD student;PhD student;MS student;;;Associate Professor;Researcher;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nzhang2023realtime,\ntitle={Real-Time Image Demoir\\${\\textbackslash}acute\\{e\\}\\$ing on Mobile Devices},\nauthor={Yuxin Zhang and Mingbao Lin and Xunchao Li and Han Liu and Guozhi Wang and Fei Chao and Ren Shuai and Yafei Wen and Xiaoxin Chen and Rongrong Ji},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PmP_sf3JkrH}\n}", "github": "", "project": "", "reviewers": "LYcM;9cpB;Wo5f;Y5JK", "pdf_size": 6370893, "recommendation": "5;6;8;8", "confidence": "4;4;5;4", "correctness": "3;3;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "162;121;109;43", "wc_strength_and_weaknesses": "528;341;415;60", "wc_clarity_quality_novelty_and_reproducibility": "523;111;104;13", "wc_summary_review": "115;74;79;59", "wc_review": "1328;647;707;175", "wc_reply_reviewers": "87;453;18;0", "wc_reply_authors": "1507;877;598;110", "reply_reviewers": "1;1;1;0", "reply_authors": "4;3;1;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 108.75, 42.745613810074126 ], "wc_strength_and_weaknesses_avg": [ 336.0, 172.7035031491834 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 187.75, 197.37955187911436 ], "wc_summary_review_avg": [ 81.75, 20.559365262575593 ], "wc_review_avg": [ 714.25, 409.894727338618 ], "wc_reply_reviewers_avg": [ 139.5, 183.88923296376 ], "wc_reply_authors_avg": [ 773.0, 504.917319964368 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": 0.5555555555555555, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "pdf": "https://openreview.net/pdf?id=PmP_sf3JkrH", "email": "xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;;;xmu.edu.cn;vivo.com;vivo.com;vivo.com;xmu.edu.cn", "author_num": 10, "aff_unique_index": "0;0;0;0;1;1;1;0", "aff_unique_norm": "Xiamen University;vivo", "aff_unique_dep": ";", "aff_unique_url": "https://www.xmu.edu.cn;https://www.vivo.com.cn", "aff_unique_abbr": "XMU;vivo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "PoU_NgCStE5", "title": "Limits of Algorithmic Stability for Distributional Generalization", "track": "main", "status": "Reject", "tldr": "In this paper we empirically show that the more stable a learning algorithm is the more robust the resulting model is to covariate, label, and subpopulation shifts. ", "abstract": "As machine learning models become widely considered in safety critical settings, it is important to understand when models may fail after deployment. One cause of model failure is distribution shift, where the training and test data distributions differ. In this paper we investigate the benefits of training models using methods which are algorithmically stable towards improving model robustness, motivated by recent theoretical developments which show a connection between the two. We use techniques from differentially private stochastic gradient descent (DP-SGD) to control the level of algorithmic stability during training. We compare the performance of algorithmically stable training procedures to stochastic gradient descent (SGD) across a variety of possible distribution shifts - specifically covariate, label, and subpopulation shifts. We find that models trained with algorithmically stable procedures result in models with consistently lower generalization gap across various types of shifts and shift severities. as well as a higher absolute test performance in label shift. Finally, we demonstrate that there is there is a tradeoff between distributional robustness, stability, and performance.", "keywords": "Distribution Shift;Robustness;Evaluation", "primary_area": "", "supplementary_material": "/attachment/e81292f7438f78713fdc73eb1ba1e0949eb43546.zip", "author": "Neha Hulkund;Vinith Menon Suriyakumar;Taylor W. Killian;Marzyeh Ghassemi", "authorids": "~Neha_Hulkund1;~Vinith_Menon_Suriyakumar1;~Taylor_W._Killian1;~Marzyeh_Ghassemi2", "gender": "F;M;M;F", "homepage": "https://hulkund.github.io;;https://twkillian.github.io;https://www.healthyml.org/", "dblp": "297/5263;;192/1575;145/6563", "google_scholar": ";https://scholar.google.com/citations?hl=en;xA3RcaUAAAAJ;", "orcid": ";;;", "linkedin": ";vsuriyakumar;taylor-w-killian-49680b21/;", "or_profile": "~Neha_Hulkund1;~Vinith_Menon_Suriyakumar1;~Taylor_W._Killian1;~Marzyeh_Ghassemi2", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Vector Institute;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;vectorinstitute.ai;mit.edu", "position": "MS student;PhD student;Graduate Researcher;Assistant Professor", "bibtex": "@misc{\nhulkund2023limits,\ntitle={Limits of Algorithmic Stability for Distributional Generalization},\nauthor={Neha Hulkund and Vinith Menon Suriyakumar and Taylor W. Killian and Marzyeh Ghassemi},\nyear={2023},\nurl={https://openreview.net/forum?id=PoU_NgCStE5}\n}", "github": "", "project": "", "reviewers": "iJTD;6xKr;JhgW;weZZ", "site": "https://openreview.net/forum?id=PoU_NgCStE5", "pdf_size": 507226, "recommendation": "3;3;6;8", "confidence": "2;3;2;5", "correctness": "2;2;2;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "58;34;56;108", "wc_strength_and_weaknesses": "301;305;58;148", "wc_clarity_quality_novelty_and_reproducibility": "3;227;30;17", "wc_summary_review": "71;30;215;23", "wc_review": "433;596;359;296", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 64.0, 27.09243436828813 ], "wc_strength_and_weaknesses_avg": [ 203.0, 104.94998808956578 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 69.25, 91.57612953166344 ], "wc_summary_review_avg": [ 84.75, 77.40276157864136 ], "wc_review_avg": [ 421.0, 112.06917506611708 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.6735753140545634, "corr_recommendation_correctness": 0.8164965809277261, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lHl5ZNrtZSAJ:scholar.google.com/&scioq=Limits+of+Algorithmic+Stability+for+Distributional+Generalization&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Vector Institute", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://vectorinstitute.ai/", "aff_unique_abbr": "MIT;Vector Institute", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Canada" }, { "id": "PocqkbIelt", "title": "CounterNet: End-to-End Training of Prediction Aware Counterfactual Explanations", "track": "main", "status": "Reject", "tldr": "", "abstract": "Counterfactual (or CF) explanations are a type of local explanations for Machine Learning (ML) model predictions, which offer a contrastive case as an explanation by finding the smallest changes (in feature space) to the input data point, which will lead to a different prediction by the ML model. Existing CF explanation techniques suffer from two major limitations: (i) all of them are post-hoc methods designed for use with proprietary ML models --- as a result, their procedure for generating CF explanations is uninformed by the training of the ML model, which leads to misalignment between model predictions and explanations; and (ii) most of them rely on solving separate time-intensive optimization problems to find CF explanations for each input data point (which negatively impacts their runtime). This work makes a novel departure from the prevalent post-hoc paradigm (of generating CF explanations) by presenting CounterNet, an end-to-end learning framework which integrates predictive model training and the generation of counterfactual (CF) explanations into a single pipeline. We adopt a block-wise coordinate descent procedure which helps in effectively training CounterNet's network. Our extensive experiments on multiple real-world datasets show that CounterNet generates high-quality predictions, and consistently achieves 100% CF validity and very low proximity scores (thereby achieving a well-balanced cost-invalidity trade-off) for any new input instance, and runs 3X faster than existing state-of-the-art baselines. \n", "keywords": "Counterfactual Explanation;Algorithmic Recourse;Explainable AI;Interpretability", "primary_area": "", "supplementary_material": "/attachment/e53cf3ef844e9c5ab9f37cd6fa921e2e3cc0eb4b.zip", "author": "Hangzhi Guo;Thanh Hong Nguyen;Amulya Yadav", "authorids": "~Hangzhi_Guo1;~Thanh_Hong_Nguyen1;~Amulya_Yadav1", "gender": ";F;Not Specified", "homepage": ";https://ix.cs.uoregon.edu/~thanhhng/;http://amulyayadav.com", "dblp": ";117/4935;121/3511", "google_scholar": ";6fpZnQIAAAAJ;jI3_2koAAAAJ", "orcid": ";;", "linkedin": ";;amulyayadav/", "or_profile": "~Hangzhi_Guo1;~Thanh_Hong_Nguyen1;~Amulya_Yadav1", "aff": ";University of Oregon;Pennsylvania State University", "aff_domain": ";uoregon.edu;psu.edu", "position": ";Assistant Professor;Assistant Professor", "bibtex": "@misc{\nguo2023counternet,\ntitle={CounterNet: End-to-End Training of Prediction Aware Counterfactual Explanations},\nauthor={Hangzhi Guo and Thanh Hong Nguyen and Amulya Yadav},\nyear={2023},\nurl={https://openreview.net/forum?id=PocqkbIelt}\n}", "github": "", "project": "", "reviewers": "Mece;9tKf;Ycp6;MUfa", "site": "https://openreview.net/forum?id=PocqkbIelt", "pdf_size": 1154882, "recommendation": "3;3;3;10", "confidence": "5;4;4;4", "correctness": "1;2;3;3", "technical_novelty": "1;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "61;47;108;84", "wc_strength_and_weaknesses": "88;243;450;395", "wc_clarity_quality_novelty_and_reproducibility": "519;17;91;46", "wc_summary_review": "64;22;53;652", "wc_review": "732;329;702;1177", "wc_reply_reviewers": "114;0;0;0", "wc_reply_authors": "679;521;1560;425", "reply_reviewers": "1;0;0;0", "reply_authors": "2;2;3;2", "recommendation_avg": [ 4.75, 3.031088913245535 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 75.0, 23.18404623873926 ], "wc_strength_and_weaknesses_avg": [ 294.0, 141.04431927589286 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 168.25, 204.21480724962134 ], "wc_summary_review_avg": [ 197.75, 262.71312776486826 ], "wc_review_avg": [ 735.0, 300.5403467090567 ], "wc_reply_reviewers_avg": [ 28.5, 49.363448015713004 ], "wc_reply_authors_avg": [ 796.25, 450.18072759726175 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5222329678670935, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9809936915339699464&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1", "aff_unique_norm": "University of Oregon;Pennsylvania State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uoregon.edu;https://www.psu.edu", "aff_unique_abbr": "UO;PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Continual Transformers: Redundancy-Free Attention for Online Inference", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12002", "id": "PolHquob8M7", "poster": "", "openreview": "https://openreview.net/forum?id=PolHquob8M7", "slides": "https://iclr.cc/virtual/2023/poster/12002", "video": "https://iclr.cc/virtual/2023/poster/12002", "author_site": "Lukas Hedegaard, Arian Bakhtiarnia, Alexandros Iosifidis", "tldr": "A Transformer Decorder acceleration for online stream processing validated with experiments in Online Action Detection and Audio Classification.", "abstract": "Transformers in their common form are inherently limited to operate on whole token sequences rather than on one token at a time. Consequently, their use during online inference on time-series data entails considerable redundancy due to the overlap in successive token sequences. In this work, we propose novel formulations of the Scaled Dot-Product Attention, which enable Transformers to perform efficient online token-by-token inference on a continual input stream. Importantly, our modifications are purely to the order of computations, while the outputs and learned weights are identical to those of the original Transformer Encoder. We validate our Continual Transformer Encoder with experiments on the THUMOS14, TVSeries and GTZAN datasets with remarkable results: Our Continual one- and two-block architectures reduce the floating point operations per prediction by up to 63x and 2.6x, respectively, while retaining predictive performance.", "keywords": "Transformer;Continual Inference Networks;Online inference;Stream processing;Acceleration;Online Action Detection;Audio classification", "primary_area": "", "supplementary_material": "", "author": "Lukas Hedegaard;Arian Bakhtiarnia;Alexandros Iosifidis", "authorids": "~Lukas_Hedegaard1;~Arian_Bakhtiarnia1;~Alexandros_Iosifidis3", "gender": ";M;M", "homepage": ";https://www.tuni.fi/en/people/alexandros-iosifidis;", "dblp": ";01/9539;", "google_scholar": "65eZvp4AAAAJ;KjsL0KEAAAAJ;15ovcOoAAAAJ", "orcid": ";0000-0003-4807-1345;0000-0002-2841-864X", "linkedin": ";;lukashedegaard/", "or_profile": "~Arian_Bakhtiarnia1;~Alexandros_Iosifidis2;~Lukas_Hedegaard_Jensen1", "aff": "Aarhus University;Aarhus University;Aarhus University", "aff_domain": "au.dk;au.dk;eng.au.dk", "position": "PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nhedegaard2023continual,\ntitle={Continual Transformers: Redundancy-Free Attention for Online Inference},\nauthor={Lukas Hedegaard and Arian Bakhtiarnia and Alexandros Iosifidis},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PolHquob8M7}\n}", "github": "", "project": "", "reviewers": "RfrZ;DN9Y;bhNe", "pdf_size": 1515338, "recommendation": "6;8;8", "confidence": "3;4;2", "correctness": "4;4;4", "technical_novelty": "3;3;4", "empirical_novelty": "3;3;4", "wc_summary_paper": "45;48;90", "wc_strength_and_weaknesses": "131;270;44", "wc_clarity_quality_novelty_and_reproducibility": "35;44;28", "wc_summary_review": "16;53;27", "wc_review": "227;415;189", "wc_reply_reviewers": "19;24;0", "wc_reply_authors": "407;1052;0", "reply_reviewers": "1;1;0", "reply_authors": "1;2;0", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 61.0, 20.54263858417414 ], "wc_strength_and_weaknesses_avg": [ 148.33333333333334, 93.07464149213195 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.666666666666664, 6.548960901462833 ], "wc_summary_review_avg": [ 32.0, 15.513435037626794 ], "wc_review_avg": [ 277.0, 98.8062076322468 ], "wc_reply_reviewers_avg": [ 14.333333333333334, 10.338708279513881 ], "wc_reply_authors_avg": [ 486.3333333333333, 433.12533469603875 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.816496580927726 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16912376703515732575&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=PolHquob8M7", "email": "au.dk;au.dk;eng.au.dk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Aarhus University", "aff_unique_dep": "", "aff_unique_url": "https://au.dk", "aff_unique_abbr": "AU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Denmark" }, { "id": "Pqi9ZxxdjM", "title": "Leveraging the Third Dimension in Contrastive Learning", "track": "main", "status": "Reject", "tldr": "Depth signal improves contrastive learning", "abstract": "Self-Supervised Learning (SSL) methods operate on unlabeled data to learn robust representations useful for downstream tasks. Most SSL methods rely on augmentations obtained by transforming the 2D image pixel map. These augmentations ignore the fact that biological vision takes place in an immersive three-dimensional, temporally contiguous environment, and that low-level biological vision relies heavily on depth cues. Using a signal provided by a pretrained state-of-the-art RGB-to-depth model (the Depth Prediction Transformer, Ranftl et al., 2021), we explore two distinct approaches to incorporating depth signals into the SSL framework. First, we evaluate contrastive learning using an RGB+depth input representation. Second, we use the depth signal to generate novel views from slightly different camera positions, thereby producing a 3D augmentation for contrastive learning. We evaluate these two approaches on three different SSL methods---BYOL, SimSiam, and SwAV---using ImageNette (10 class subset of ImageNet) and ImageNet-100. We find that both approaches to incorporating depth signals improve the robustness and generalization of the baseline SSL methods, though the first approach (with depth-channel concatenation) is superior.", "keywords": "contrastive learning;depth;self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/e7e2918ffc2f2d19b3cd89dd2648e9917f1c5a1f.zip", "author": "Sumukh K Aithal;Anirudh Goyal;Alex Lamb;Yoshua Bengio;Michael Curtis Mozer", "authorids": "~Sumukh_K_Aithal1;~Anirudh_Goyal1;~Alex_Lamb1;~Yoshua_Bengio1;~Michael_Curtis_Mozer1", "gender": "M;M;M;M;M", "homepage": "https://anirudh9119.github.io/;http://yoshuabengio.org;https://www.cs.colorado.edu/~mozer;;https://sumukhaithal6.github.io/", "dblp": "172/1039;56/953;m/MichaelCMozer;;299/5911", "google_scholar": "krrh6OUAAAAJ;kukA0LcAAAAJ;lmjR_qMAAAAJ;https://scholar.google.ca/citations?user=BFzFy1YAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": ";yoshuabengio/?originalSubdomain=ca;;;sumukh-aithal-9801b4189", "or_profile": "~Anirudh_Goyal1;~Yoshua_Bengio1;~Michael_Curtis_Mozer1;~Alex_Matthew_Lamb1;~Sumukh_Aithal_K1", "aff": "Google DeepMind;University of Montreal;Google DeepMind;Microsoft Research NYC;Fujitsu Research and Development Center Co. Ltm.", "aff_domain": "google.com;umontreal.ca;google.com;microsoft.com;fujitsu.com", "position": "Researcher;Full Professor;Research Scientist;Researcher;Researcher", "bibtex": "@misc{\naithal2023leveraging,\ntitle={Leveraging the Third Dimension in Contrastive Learning},\nauthor={Sumukh K Aithal and Anirudh Goyal and Alex Lamb and Yoshua Bengio and Michael Curtis Mozer},\nyear={2023},\nurl={https://openreview.net/forum?id=Pqi9ZxxdjM}\n}", "github": "", "project": "", "reviewers": "TCcc;9hga;N68U;nFbm", "site": "https://openreview.net/forum?id=Pqi9ZxxdjM", "pdf_size": 6073119, "recommendation": "3;5;5;6", "confidence": "5;4;4;2", "correctness": "2;4;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "71;73;90;70", "wc_strength_and_weaknesses": "361;181;252;205", "wc_clarity_quality_novelty_and_reproducibility": "23;19;39;20", "wc_summary_review": "27;32;82;28", "wc_review": "482;305;463;323", "wc_reply_reviewers": "0;66;0;0", "wc_reply_authors": "589;246;338;252", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 76.0, 8.154753215150045 ], "wc_strength_and_weaknesses_avg": [ 249.75, 69.12081813751918 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.25, 8.073877630977572 ], "wc_summary_review_avg": [ 42.25, 23.025800746119558 ], "wc_review_avg": [ 393.25, 79.78839201287366 ], "wc_reply_reviewers_avg": [ 16.5, 28.578838324886476 ], "wc_reply_authors_avg": [ 356.25, 139.21992493892532 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.894736842105263, "corr_recommendation_correctness": 0.6488856845230502, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wiwB8frc7Q8J:scholar.google.com/&scioq=Leveraging+the+Third+Dimension+in+Contrastive+Learning&hl=en&as_sdt=0,23", "gs_version_total": 5, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "Google;University of Montreal;Microsoft;Fujitsu Research and Development Center", "aff_unique_dep": "Google DeepMind;;Microsoft Research;Research and Development", "aff_unique_url": "https://deepmind.com;https://wwwumontreal.ca;https://www.microsoft.com/en-us/research/group/microsoft-research-new-york-city;https://www.fujitsu.com/global/", "aff_unique_abbr": "DeepMind;UM;MSR NYC;Fujitsu R&D", "aff_campus_unique_index": "1", "aff_campus_unique": ";New York City", "aff_country_unique_index": "0;1;0;2;3", "aff_country_unique": "United Kingdom;Canada;United States;Japan" }, { "title": "Flow Matching for Generative Modeling", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11309", "id": "PqvMRDCJT9t", "poster": "", "openreview": "https://openreview.net/forum?id=PqvMRDCJT9t", "slides": "https://iclr.cc/virtual/2023/poster/11309", "video": "https://iclr.cc/virtual/2023/poster/11309", "author_site": "Yaron Lipman, Ricky T. Q. Chen, Heli Ben-Hamu, Maximilian Nickel, Matthew Le", "tldr": "We introduce a new simulation-free approach for training Continuous Normalizing Flows, generalizing the probability paths induced by simple diffusion processes. We obtain state-of-the-art on ImageNet in both NLL and FID among competing methods.", "abstract": "We introduce a new paradigm for generative modeling built on Continuous Normalizing Flows (CNFs), allowing us to train CNFs at unprecedented scale. Specifically, we present the notion of Flow Matching (FM), a simulation-free approach for training CNFs based on regressing vector fields of fixed conditional probability paths. Flow Matching is compatible with a general family of Gaussian probability paths for transforming between noise and data samples---which subsumes existing diffusion paths as specific instances. Interestingly, we find that employing FM with diffusion paths results in a more robust and stable alternative for training diffusion models. Furthermore, Flow Matching opens the door to training CNFs with other, non-diffusion probability paths. An instance of particular interest is using Optimal Transport (OT) displacement interpolation to define the conditional probability paths. These paths are more efficient than diffusion paths, provide faster training and sampling, and result in better generalization. Training CNFs using Flow Matching on ImageNet leads to consistently better performance than alternative diffusion-based methods in terms of both likelihood and sample quality, and allows fast and reliable sample generation using off-the-shelf numerical ODE solvers.", "keywords": "continuous normalizing flows;generative models", "primary_area": "", "supplementary_material": "", "author": "Yaron Lipman;Ricky T. Q. Chen;Heli Ben-Hamu;Maximilian Nickel;Matthew Le", "authorids": "~Yaron_Lipman1;~Ricky_T._Q._Chen1;~Heli_Ben-Hamu1;~Maximilian_Nickel1;~Matthew_Le2", "gender": ";;;M;", "homepage": ";;;https://mnick.github.io/;", "dblp": ";;;83/10622;", "google_scholar": ";;;KDqGTIUAAAAJ;", "orcid": ";;;0000-0001-5006-0827;", "linkedin": ";;;;", "or_profile": "~Yaron_Lipman1;~Ricky_T._Q._Chen1;~Heli_Ben-Hamu1;~Maximilian_Nickel1;~Matthew_Le2", "aff": ";;;Meta Facebook;", "aff_domain": ";;;fb.com;", "position": ";;;Research Scientist;", "bibtex": "@inproceedings{\nlipman2023flow,\ntitle={Flow Matching for Generative Modeling},\nauthor={Yaron Lipman and Ricky T. Q. Chen and Heli Ben-Hamu and Maximilian Nickel and Matthew Le},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PqvMRDCJT9t}\n}", "github": "", "project": "", "reviewers": "AKwV;LJky;tWC7;SQzg", "pdf_size": 22250908, "recommendation": "5;8;8;10", "confidence": "4;4;4;4", "correctness": "2;4;3;4", "technical_novelty": "2;3;4;4", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "91;99;80;103", "wc_strength_and_weaknesses": "481;147;550;156", "wc_clarity_quality_novelty_and_reproducibility": "18;180;49;55", "wc_summary_review": "38;32;24;77", "wc_review": "628;458;703;391", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "750;395;435;21", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.75, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 93.25, 8.78564169540279 ], "wc_strength_and_weaknesses_avg": [ 333.5, 183.65524767890517 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 75.5, 61.945540598173814 ], "wc_summary_review_avg": [ 42.75, 20.38841582860228 ], "wc_review_avg": [ 545.0, 125.63638008156714 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 400.25, 258.54919744605667 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8866206949335731, "gs_citation": 1222, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16255473144394033072&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=PqvMRDCJT9t", "email": ";;;fb.com;", "author_num": 5, "aff_unique_index": "0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "PrRWSVT2htx", "title": "CEPD: Co-Exploring Pruning and Decomposition for Compact DNN Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Pruning and decomposition are two important techniques to compress deep neural network (DNN) models. To date, these two popular yet distinct approaches are typically used in a separate way; while their efficient integration for better compression performance is little explored. In this paper, we perform systematic co-exploration on pruning and decomposition toward compact DNN models. We first investigate and analyze several important design factors for joint pruning and decomposition, including operational sequence, decomposition format, and optimization procedure. Based on the observations from our analysis, we then propose CEPD, a unified DNN compression framework that can simultaneously capture the benefits of pruning and decomposition in an efficient way. Empirical experiments demonstrate the promising performance of our proposed solution. Notably, on CIFAR-10 dataset, CEPD brings 0.72% and 0.45% accuracy increase over the baseline ResNet-56 and MobileNetV2 models, respectively, and meanwhile the computational costs are reduced by 43.0% and 44.2%, respectively. On the ImageNet dataset, our approach can enable 0.10% and 1.39% accuracy increase over the baseline ResNet-18 and ResNet-50 models with 59.4% and 54.6% fewer parameters, respectively. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yang Sui;Wanzhao Yang;Miao Yin;Yu Gong;Bo Yuan", "authorids": "~Yang_Sui1;~Wanzhao_Yang1;~Miao_Yin1;~Yu_Gong4;~Bo_Yuan3", "gender": "M;;;M;", "homepage": "https://eclipsess.github.io/yangsui.github.io/;;https://noodle-lab.github.io/;;", "dblp": "77/10522;;199/1982;;41/1662-1", "google_scholar": "Q2W1p6sAAAAJ;;ILDdu98AAAAJ;FR4HP5wAAAAJ;oUy9elEAAAAJ", "orcid": "0000-0003-3020-0612;;;;", "linkedin": "yang-sui-308055117/;;miao-yin-55ab64170/;;", "or_profile": "~Yang_Sui1;~Wanzhao_Yang1;~Miao_Yin1;~Yu_Gong4;~Bo_Yuan3", "aff": "Rutgers University;;Rutgers University;Rutgers University;Rutgers University", "aff_domain": "rutgers.edu;;rutgers.edu;rutgers.edu;rutgers.edu", "position": "PhD student;;PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nsui2023cepd,\ntitle={{CEPD}: Co-Exploring Pruning and Decomposition for Compact {DNN} Models},\nauthor={Yang Sui and Wanzhao Yang and Miao Yin and Yu Gong and Bo Yuan},\nyear={2023},\nurl={https://openreview.net/forum?id=PrRWSVT2htx}\n}", "github": "", "project": "", "reviewers": "eaMR;QXz2;ko8d;HPUX;pshK", "site": "https://openreview.net/forum?id=PrRWSVT2htx", "pdf_size": 914240, "recommendation": "5;5;5;5;5", "confidence": "3;4;3;4;4", "correctness": "2;3;3;3;3", "technical_novelty": "2;2;3;2;2", "empirical_novelty": "2;2;2;2;3", "wc_summary_paper": "47;74;130;113;63", "wc_strength_and_weaknesses": "80;523;187;70;311", "wc_clarity_quality_novelty_and_reproducibility": "11;67;74;53;29", "wc_summary_review": "21;7;52;7;26", "wc_review": "159;671;443;243;429", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "correctness_avg": [ 2.8, 0.39999999999999997 ], "technical_novelty_avg": [ 2.2, 0.39999999999999997 ], "empirical_novelty_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_paper_avg": [ 85.4, 31.167932238119356 ], "wc_strength_and_weaknesses_avg": [ 234.2, 168.68360916224194 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.8, 23.6 ], "wc_summary_review_avg": [ 22.6, 16.523922052587878 ], "wc_review_avg": [ 389.0, 177.90784131116874 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Sx3AsOVCeVQJ:scholar.google.com/&scioq=CEPD:+Co-Exploring+Pruning+and+Decomposition+for+Compact+DNN+Models&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Rutgers University", "aff_unique_dep": "", "aff_unique_url": "https://www.rutgers.edu", "aff_unique_abbr": "Rutgers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Spatio-temporal point processes with deep non-stationary kernels", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11011", "id": "PsIk0kO3hKd", "poster": "", "openreview": "https://openreview.net/forum?id=PsIk0kO3hKd", "slides": "https://iclr.cc/virtual/2023/poster/11011", "video": "https://iclr.cc/virtual/2023/poster/11011", "author_site": "Zheng Dong, Xiuyuan Cheng, Yao Xie", "tldr": "Deep non-stationary kernel for spatio-temporal point process data modeling with low-rank structure, and a barrier method for constraint MLE optimization.", "abstract": "Point process data are becoming ubiquitous in modern applications, such as social networks, health care, and finance. Despite the powerful expressiveness of the popular recurrent neural network (RNN) models for point process data, they may not successfully capture sophisticated non-stationary dependencies in the data due to their recurrent structures. Another popular type of deep model for point process data is based on representing the influence kernel (rather than the intensity function) by neural networks. We take the latter approach and develop a new deep non-stationary influence kernel that can model non-stationary spatio-temporal point processes. The main idea is to approximate the influence kernel with a novel and general low-rank decomposition, enabling efficient representation through deep neural networks and computational efficiency and better performance. We also take a new approach to maintain the non-negativity constraint of the conditional intensity by introducing a log-barrier penalty. We demonstrate our proposed method's good performance and computational efficiency compared with the state-of-the-art on simulated and real data. ", "keywords": "point process;neural network;non-stationary kernel;low-rank model", "primary_area": "", "supplementary_material": "/attachment/5653c8f37429f832e8348a7da593f4e059274f57.zip", "author": "Zheng Dong;Xiuyuan Cheng;Yao Xie", "authorids": "~Zheng_Dong3;~Xiuyuan_Cheng1;~Yao_Xie2", "gender": "M;;F", "homepage": "https://sites.google.com/view/zheng-dong/home;;http://www2.isye.gatech.edu/~yxie77", "dblp": ";79/9747;13/4242-2", "google_scholar": "iqZN-q4AAAAJ;I2gwdssAAAAJ;qvYp8ZQAAAAJ", "orcid": "0000-0002-1505-8569;;", "linkedin": "zheng-dong-23a264222/;;yaoxie/", "or_profile": "~Zheng_Dong3;~Xiuyuan_Cheng1;~Yao_Xie2", "aff": "Georgia Institute of Technology;Duke University;Georgia Institute of Technology", "aff_domain": "gatech.edu;duke.edu;gatech.edu", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ndong2023spatiotemporal,\ntitle={Spatio-temporal point processes with deep non-stationary kernels},\nauthor={Zheng Dong and Xiuyuan Cheng and Yao Xie},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PsIk0kO3hKd}\n}", "github": "", "project": "", "reviewers": "ZPhD;D4xW;GLBW;ggHf", "pdf_size": 3452795, "recommendation": "6;6;8;8", "confidence": "3;3;4;3", "correctness": "3;3;3;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;3;2;1", "wc_summary_paper": "27;67;201;69", "wc_strength_and_weaknesses": "156;167;289;185", "wc_clarity_quality_novelty_and_reproducibility": "7;6;105;62", "wc_summary_review": "21;9;81;81", "wc_review": "211;249;676;397", "wc_reply_reviewers": "0;0;0;9", "wc_reply_authors": "307;500;634;481", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 91.0, 65.68104749469211 ], "wc_strength_and_weaknesses_avg": [ 199.25, 52.84115346962063 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.0, 41.394444071638404 ], "wc_summary_review_avg": [ 48.0, 33.27160951922825 ], "wc_review_avg": [ 383.25, 182.74623799137424 ], "wc_reply_reviewers_avg": [ 2.25, 3.897114317029974 ], "wc_reply_authors_avg": [ 480.5, 116.23790259635624 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11528518800711239393&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=PsIk0kO3hKd", "email": "gatech.edu;duke.edu;gatech.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Georgia Institute of Technology;Duke University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.duke.edu", "aff_unique_abbr": "Georgia Tech;Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Pt1KTsjSfRG", "title": "Image Segmentation using Transfer Learning with DeepLabv3 to Facilitate Photogrammetric Limb Scanning", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this paper, we explore the use of deep learning (DL) in conjunction with photogrammetry for scanning amputated limbs. Combining these two technologies can expand the scope of prosthetic telemedicine by facilitating low-cost limb scanning using cell phones. Previous research identified image segmentation as one of the main limitations of using photogrammetry for limb scanning. Based on those limitations, this work sought to answer two main research questions: (1) Can a neural network be trained to identify and segment an amputated limb automatically? (2) Will segmenting 2D limb images using neural networks impact the accuracy of 3D models generated via photogrammetry? To answer the first question, transfer learning was applied to a neural network with the DeepLabv3 architecture. After training, the model was able to successfully identify and segment limb images with an IoU of 79.9%. To answer the second question, the fine-tuned DL model was applied to a dataset of 22 scans comprising 6312 limb images, then 3D models were rendered utilizing Agisoft Metashape. The Mean Absolute Error (MAE) of models rendered from images segmented with DL was 0.57 mm \u00b1 0.63 mm when compared to models rendered from ground truth images. These results are important because segmentation with DL makes photogrammetry for limb scanning feasible on a large clinical scale. Future work should focus on generalizing the segmentation model for different types of amputations and imaging conditions.", "keywords": "3D Scanning;Deep Learning;Image Segmentation;Photogrammetry;Telemedicine", "primary_area": "", "supplementary_material": "", "author": "Isaac A Cabrera;Yixuan Zhou;Eric K Ngo;Ramesh Rao;Albert Lin", "authorids": "~Isaac_A_Cabrera1;yiz044@ucsd.edu;e7ngo@ucsd.edu;~Ramesh_Rao1;~Albert_Lin1", "gender": "M;;;;", "homepage": "https://isaacacabrera.myportfolio.com/home;;;;", "dblp": ";;;;", "google_scholar": ";;;;", "orcid": "0000-0001-9846-4556;;;;", "linkedin": "isaac-a-cabrera-phd-866b80246/;;;;", "or_profile": "~Isaac_A_Cabrera1;yiz044@ucsd.edu;e7ngo@ucsd.edu;~Ramesh_Rao1;~Albert_Lin1", "aff": ";;;University of California-San Diego;University of California-San Diego", "aff_domain": ";;;;", "position": ";;;;", "bibtex": "@misc{\ncabrera2023image,\ntitle={Image Segmentation using Transfer Learning with DeepLabv3 to Facilitate Photogrammetric Limb Scanning},\nauthor={Isaac A Cabrera and Yixuan Zhou and Eric K Ngo and Ramesh Rao and Albert Lin},\nyear={2023},\nurl={https://openreview.net/forum?id=Pt1KTsjSfRG}\n}", "github": "", "project": "", "reviewers": "FWEb;xaFj;bNhp", "site": "https://openreview.net/forum?id=Pt1KTsjSfRG", "pdf_size": 22702506, "recommendation": "3;3;3", "confidence": "5;3;5", "correctness": "3;3;3", "technical_novelty": "1;2;1", "empirical_novelty": "2;2;2", "wc_summary_paper": "55;64;81", "wc_strength_and_weaknesses": "353;43;184", "wc_clarity_quality_novelty_and_reproducibility": "93;17;44", "wc_summary_review": "17;18;84", "wc_review": "518;142;393", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 1.3333333333333333, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 66.66666666666667, 10.780641085864152 ], "wc_strength_and_weaknesses_avg": [ 193.33333333333334, 126.72893206455352 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.333333333333336, 31.457201966410736 ], "wc_summary_review_avg": [ 39.666666666666664, 31.351058816073323 ], "wc_review_avg": [ 351.0, 156.3479026615537 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10268531364096593516&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "PuEOL1hhyrF", "title": "Active Sampling for Node Attribute Completion on Graphs", "track": "main", "status": "Reject", "tldr": "", "abstract": "Node attribute is one kind of crucial information on graphs, but real-world graphs usually face attribute-missing problem where attributes of partial nodes are missing and attributes of the other nodes are available. It is meaningful to restore the missing attributes so as to benefit downstream graph learning tasks. Popular GNN is not designed for this node attribute completion issue and is not capable of solving it. Recent proposed Structure-attribute Transformer (SAT) framework decouples the input of graph structures and node attributes by a distribution matching technique, and can work on it properly. However, SAT leverages nodes with observed attributes in an equally-treated way and neglects the different contributions of different nodes in learning. In this paper, we propose a novel active sampling algorithm (ATS) to more efficiently utilize the nodes with observed attributes and better restore the missing node attributes. Specifically, ATS contains two metrics that measure the representativeness and uncertainty of each node's information by considering the graph structures, representation similarity and learning bias. Then, these two metrics are linearly combined by a Beta distribution controlled weighting scheme to finally determine which nodes are selected into the train set in the next optimization step. This ATS algorithm can be combined with SAT framework together, and is learned in an iterative manner. Through extensive experiments on 4 public benchmark datasets and two downstream tasks, we show the superiority of ATS in node attribute completion.", "keywords": "Graph Neural Network;Node Attribute Completion;Active Sampling", "primary_area": "", "supplementary_material": "", "author": "Benyuan Liu;Xu Chen;Yanfeng Wang;Ya Zhang;Zhi Cao;Ivor Tsang", "authorids": "~Benyuan_Liu2;~Xu_Chen2;~Yanfeng_Wang1;~Ya_Zhang1;caozhi@abp2003.cn;~Ivor_Tsang1", "gender": ";M;M;F;;", "homepage": "https://www.sjtu.edu.cn/;https://xuchensjtu.github.io/xuchen.github.io/;https://cmic.sjtu.edu.cn/wangyanfeng/;https://annzhanglion.github.io/;;", "dblp": ";xxxxxx;55/5407-1.html;85/3714-2;;", "google_scholar": ";6Qa2JCwAAAAJ;https://scholar.google.com/citations?hl=zh-CN;pbjw9sMAAAAJ;;", "orcid": ";0000-0001-5299-7074;0000-0002-3196-2347;0000-0002-5390-9053;;", "linkedin": ";;;;;", "or_profile": "~Benyuan_Liu2;~Xu_Chen2;~Yanfeng_Wang1;~Ya_Zhang1;caozhi@abp2003.cn;~Ivor_Tsang1", "aff": "Shanghai Jiaotong University;Alibaba Group;Shanghai Jiaotong University;Shanghai Jiaotong University;;", "aff_domain": "sjtu.edu.cn;alibaba-inc.com;sjtu.edu.cn;sjtu.edu.cn;;", "position": "MS student;Researcher;Full Professor;Professor;;", "bibtex": "@misc{\nliu2023active,\ntitle={Active Sampling for Node Attribute Completion on Graphs},\nauthor={Benyuan Liu and Xu Chen and Yanfeng Wang and Ya Zhang and Zhi Cao and Ivor Tsang},\nyear={2023},\nurl={https://openreview.net/forum?id=PuEOL1hhyrF}\n}", "github": "", "project": "", "reviewers": "P86Z;TF65;RsqE;Yxf6", "site": "https://openreview.net/forum?id=PuEOL1hhyrF", "pdf_size": 821298, "recommendation": "1;3;3;5", "confidence": "4;4;3;4", "correctness": "3;3;2;4", "technical_novelty": "1;2;1;2", "empirical_novelty": "1;2;3;0", "wc_summary_paper": "80;60;60;106", "wc_strength_and_weaknesses": "277;106;252;216", "wc_clarity_quality_novelty_and_reproducibility": "14;29;57;45", "wc_summary_review": "14;258;47;45", "wc_review": "385;453;416;412", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "508;123;466;358", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 1.5, 0.5 ], "empirical_novelty_avg": [ 1.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 76.5, 18.887826767524103 ], "wc_strength_and_weaknesses_avg": [ 212.75, 65.33519342590179 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.25, 16.23845743905498 ], "wc_summary_review_avg": [ 91.0, 97.30107913070646 ], "wc_review_avg": [ 416.5, 24.212600025606502 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 363.75, 149.37934094110872 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CPM9t9D1EBQJ:scholar.google.com/&scioq=Active+Sampling+for+Node+Attribute+Completion+on+Graphs&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "SJTU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Imitating Human Behaviour with Diffusion Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11165", "id": "Pv1GPQzRrC8", "poster": "/media/PosterPDFs/ICLR%202023/11165.png?t=1682524223.3894656", "openreview": "https://openreview.net/forum?id=Pv1GPQzRrC8", "slides": "https://iclr.cc/virtual/2023/poster/11165", "video": "https://iclr.cc/virtual/2023/poster/11165", "author_site": "Tim Pearce, Tabish Rashid, Anssi Kanervisto, David Bignell, Mingfei Sun, Raluca Georgescu, Sergio Valcarcel Macua, Shan Zheng Tan, Ida Momennejad, Katja Hofmann, Sam Devlin", "tldr": "", "abstract": "Diffusion models have emerged as powerful generative models in the text-to-image domain. This paper studies their application as observation-to-action models for imitating human behaviour in sequential environments. Human behaviour is stochastic and multimodal, with structured correlations between action dimensions. Meanwhile, standard modelling choices in behaviour cloning are limited in their expressiveness and may introduce bias into the cloned policy. We begin by pointing out the limitations of these choices. We then propose that diffusion models are an excellent fit for imitating human behaviour, since they learn an expressive distribution over the joint action space. We introduce several innovations to make diffusion models suitable for sequential environments; designing suitable architectures, investigating the role of guidance, and developing reliable sampling strategies. Experimentally, diffusion models closely match human demonstrations in a simulated robotic control task and a modern 3D gaming environment.", "keywords": "imitation learning;behavioral cloning;behavioral cloning;diffusion models;generative models", "primary_area": "", "supplementary_material": "", "author": "Tim Pearce;Tabish Rashid;Anssi Kanervisto;Dave Bignell;Mingfei Sun;Raluca Georgescu;Sergio Valcarcel Macua;Shan Zheng Tan;Ida Momennejad;Katja Hofmann;Sam Devlin", "authorids": "~Tim_Pearce1;~Tabish_Rashid1;~Anssi_Kanervisto1;david.bignell@microsoft.com;~Mingfei_Sun1;~Raluca_Georgescu1;~Sergio_Valcarcel_Macua1;~Shan_Zheng_Tan1;~Ida_Momennejad1;~Katja_Hofmann1;~Sam_Devlin2", "gender": ";M;M;;M;F;M;M;F;F;M", "homepage": ";;;;https://research.manchester.ac.uk/en/persons/mingfei-sun;http://aka.ms/raluca;;;https://www.momen-nejad.org;https://www.microsoft.com/en-us/research/people/kahofman/;", "dblp": "142/9777;196/5069;186/7786;;195/7934.html;;33/9875;;;97/3500;64/7502", "google_scholar": "https://scholar.google.co.uk/citations?user=09k1kdQAAAAJ;d4BeWwcAAAAJ;https://scholar.google.fi/citations?user=iPimqbwAAAAJ;;2Uzgp5kAAAAJ;;NBqFpgoAAAAJ;;https://scholar.google.de/citations?user=OFdUAJwAAAAJ;https://scholar.google.co.uk/citations?hl=en;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-7479-4574;;;;0000-0003-4551-5625;0000-0002-7566-3429;0000-0003-0830-3973;;0000-0002-7769-3090", "linkedin": "tim-pearce-3b165b69/;;;;;;sergio-valcarcel-macua-97b89272/;https://uk.linkedin.com/in/shan-zheng-tan;ida-momennejad-8661a710/;;https://www.linkedin.com/pub/sam-devlin/83/810/b23", "or_profile": "~Tim_Pearce1;~Tabish_Rashid1;~Anssi_Kanervisto1;david.bignell@microsoft.com;~Mingfei_Sun1;~Raluca_Georgescu1;~Sergio_Valcarcel_Macua1;~Shan_Zheng_Tan1;~Ida_Momennejad1;~Katja_Hofmann1;~Sam_Devlin2", "aff": "Microsoft Research;Microsoft;Microsoft;;University of Manchester ;Microsoft;Microsoft Research;Microsoft Research;Microsoft Research;Microsoft;Microsoft Research", "aff_domain": "research.microsoft.com;microsoft.com;microsoft.com;;manchester.ac.uk;microsoft.com;microsoft.com;research.microsoft.com;research.microsoft.com;microsoft.com;microsoft.com", "position": "Researcher;Researcher;Researcher;;Assistant Professor;Researcher;Senior Researcher;Program Manager;Principal Researcher;Senior Principal Research Manager;Principal Researcher", "bibtex": "@inproceedings{\npearce2023imitating,\ntitle={Imitating Human Behaviour with Diffusion Models},\nauthor={Tim Pearce and Tabish Rashid and Anssi Kanervisto and Dave Bignell and Mingfei Sun and Raluca Georgescu and Sergio Valcarcel Macua and Shan Zheng Tan and Ida Momennejad and Katja Hofmann and Sam Devlin},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Pv1GPQzRrC8}\n}", "github": "", "project": "", "reviewers": "Ygtk;zdNu;hanC;PK6N", "pdf_size": 6777615, "recommendation": "6;6;8;8", "confidence": "3;4;3;3", "correctness": "3;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "128;51;100;57", "wc_strength_and_weaknesses": "185;100;251;60", "wc_clarity_quality_novelty_and_reproducibility": "62;253;77;22", "wc_summary_review": "49;218;23;24", "wc_review": "424;622;451;163", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "199;520;404;168", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 84.0, 31.662280397975127 ], "wc_strength_and_weaknesses_avg": [ 149.0, 74.19905659777623 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 103.5, 88.62420662550385 ], "wc_summary_review_avg": [ 78.5, 81.21114455541185 ], "wc_review_avg": [ 415.0, 164.1112427592942 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 322.75, 145.57708439174073 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 11, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 214, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15829155732460241941&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=Pv1GPQzRrC8", "email": "research.microsoft.com;microsoft.com;microsoft.com;;manchester.ac.uk;microsoft.com;microsoft.com;research.microsoft.com;research.microsoft.com;microsoft.com;microsoft.com", "author_num": 11, "aff_unique_index": "0;0;0;1;0;0;0;0;0;0", "aff_unique_norm": "Microsoft;University of Manchester", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.manchester.ac.uk", "aff_unique_abbr": "MSR;UoM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Min-Max Multi-objective Bilevel Optimization with Applications in Robust Machine Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10859", "id": "PvDY71zKsvP", "poster": "", "openreview": "https://openreview.net/forum?id=PvDY71zKsvP", "slides": "https://iclr.cc/virtual/2023/poster/10859", "video": "https://iclr.cc/virtual/2023/poster/10859", "author_site": "Alex Gu, Songtao Lu, Parikshit Ram, Tsui-Wei Weng", "tldr": "We study a generic min-max bilevel multi-objective optimization framework with novel theoretical analysis and applications in representation learning and hyperparameter optimization", "abstract": "We consider a generic min-max multi-objective bilevel optimization problem with applications in robust machine learning such as representation learning and hyperparameter optimization. We design MORBiT, a novel single-loop gradient descent-ascent bilevel optimization algorithm, to solve the generic problem and present a novel analysis showing that MORBiT converges to the first-order stationary point at a rate of $\\widetilde{\\mathcal{O}}(n^{1/2} K^{-2/5})$ for a class of weakly convex problems with $n$ objectives upon $K$ iterations of the algorithm. Our analysis utilizes novel results to handle the non-smooth min-max multi-objective setup and to obtain a sublinear dependence in the number of objectives $n$. Experimental results on robust representation learning and robust hyperparameter optimization showcase (i) the advantages of considering the min-max multi-objective setup, and (ii) convergence properties of the proposed \\morbit.", "keywords": "robust optimization;bilevel optimization;multi-objective optimization", "primary_area": "", "supplementary_material": "", "author": "Alex Gu;Songtao Lu;Parikshit Ram;Tsui-Wei Weng", "authorids": "~Alex_Gu1;~Songtao_Lu1;~Parikshit_Ram1;~Tsui-Wei_Weng1", "gender": "M;M;M;F", "homepage": "https://minimario.github.io/;https://songtaogithub.github.io/;https://rithram.github.io/;https://lilywenglab.github.io", "dblp": "285/4734;05/2887;99/8314;177/9197", "google_scholar": "jRQtBp0AAAAJ;LRsjX7kAAAAJ;JaXmmnkAAAAJ;v8GM4xoAAAAJ", "orcid": ";;0000-0002-9456-029X;", "linkedin": "alex-gu-8b7664175/;;parikshit-ram-4861325/;", "or_profile": "~Alex_Gu1;~Songtao_Lu1;~Parikshit_Ram1;~Tsui-Wei_Weng1", "aff": "Massachusetts Institute of Technology;IBM Thomas J. Watson Research Center;International Business Machines;University of California, San Diego", "aff_domain": "mit.edu;ibm.com;ibm.com;ucsd.edu", "position": "PhD student;Researcher;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\ngu2023minmax,\ntitle={Min-Max Multi-objective Bilevel Optimization with Applications in Robust Machine Learning},\nauthor={Alex Gu and Songtao Lu and Parikshit Ram and Tsui-Wei Weng},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PvDY71zKsvP}\n}", "github": "", "project": "", "reviewers": "XgUo;fwCP;UZe1", "pdf_size": 883040, "recommendation": "5;6;6", "confidence": "4;5;5", "correctness": "3;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "41;87;72", "wc_strength_and_weaknesses": "491;75;229", "wc_clarity_quality_novelty_and_reproducibility": "46;45;12", "wc_summary_review": "77;11;59", "wc_review": "655;218;372", "wc_reply_reviewers": "115;17;23", "wc_reply_authors": "3693;1126;902", "reply_reviewers": "1;1;1", "reply_authors": "8;3;4", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 66.66666666666667, 19.154343864744856 ], "wc_strength_and_weaknesses_avg": [ 265.0, 171.7284678399789 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.333333333333336, 15.797327481430381 ], "wc_summary_review_avg": [ 49.0, 27.85677655436824 ], "wc_review_avg": [ 415.0, 180.9769782780856 ], "wc_reply_reviewers_avg": [ 51.666666666666664, 44.850368510811094 ], "wc_reply_authors_avg": [ 1907.0, 1266.1992997418165 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 5.0, 2.160246899469287 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.9999999999999998, "corr_recommendation_correctness": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1465986133940382669&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=PvDY71zKsvP", "email": "mit.edu;ibm.com;ibm.com;ucsd.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Massachusetts Institute of Technology;IBM;International Business Machines Corporation;University of California, San Diego", "aff_unique_dep": ";Research;;", "aff_unique_url": "https://web.mit.edu;https://www.ibm.com/research;https://www.ibm.com;https://www.ucsd.edu", "aff_unique_abbr": "MIT;IBM;IBM;UCSD", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Yorktown Heights;San Diego", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Metadata Archaeology: Unearthing Data Subsets by Leveraging Training Dynamics", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11086", "id": "PvLnIaJbt9", "poster": "/media/PosterPDFs/ICLR%202023/11086.png?t=1682349179.4397762", "openreview": "https://openreview.net/forum?id=PvLnIaJbt9", "slides": "https://iclr.cc/virtual/2023/poster/11086", "video": "https://iclr.cc/virtual/2023/poster/11086", "author_site": "Muhammad Shoaib Ahmed Siddiqui, Nitarshan Rajkumar, Tegan Maharaj, David Krueger, Sara Hooker", "tldr": "Our work provides a unified and efficient framework for Metadata Archaeology -- uncovering and inferring metadata of examples in a dataset", "abstract": "Modern machine learning research relies on relatively few carefully curated datasets. Even in these datasets, and typically in `untidy' or raw data, practitioners are faced with significant issues of data quality and diversity which can be prohibitively labor intensive to address. Existing methods for dealing with these challenges tend to make strong assumptions about the particular issues at play, and often require a priori knowledge or metadata such as domain labels. Our work is orthogonal to these methods: we instead focus on providing a unified and efficient framework for Metadata Archaeology -- uncovering and inferring metadata of examples in a dataset. We curate different subsets of data that might exist in a dataset (e.g. mislabeled, atypical, or out-of-distribution examples) using simple transformations, and leverage differences in learning dynamics between these probe suites to infer metadata of interest. Our method is on par with far more sophisticated mitigation methods across different tasks: identifying and correcting mislabeled examples, classifying minority-group samples, prioritizing points relevant for training and enabling scalable human auditing of relevant examples.", "keywords": "Metadata archaeology;Learning curves;Loss trajectory;Data auditing", "primary_area": "", "supplementary_material": "", "author": "Shoaib Ahmed Siddiqui;Nitarshan Rajkumar;Tegan Maharaj;David Krueger;Sara Hooker", "authorids": "~Shoaib_Ahmed_Siddiqui1;~Nitarshan_Rajkumar1;~Tegan_Maharaj1;~David_Krueger1;~Sara_Hooker1", "gender": "M;M;F;M;", "homepage": ";https://www.nitarshan.com;http://teganmaharaj.com;https://mila.umontreal.ca/en/person/david-scott-krueger/;https://www.sarahooker.me/", "dblp": "208/3111;267/2949;;142/2741.html;210/2611", "google_scholar": "https://scholar.google.de/citations?user=9SOO4z0AAAAJ;Kle-Ny8AAAAJ;https://scholar.google.ca/citations?user=XpscC-EAAAAJ;https://scholar.google.ca/citations?user=5Uz70IoAAAAJ;2xy6h3sAAAAJ", "orcid": "0000-0003-4600-7331;0000-0002-8991-0881;;;", "linkedin": "shoaibahmedsiddiqui/;https://linkedin.com/in/nitarshan;;;", "or_profile": "~Shoaib_Ahmed_Siddiqui1;~Nitarshan_Rajkumar1;~Tegan_Maharaj1;~David_Krueger1;~Sara_Hooker1", "aff": "University of Cambridge;University of Cambridge;Ecole Polytechnique de Montreal;University of Cambridge;Cohere For AI", "aff_domain": "cam.ac.uk;cam.ac.uk;polymtl.ca;cam.ac.uk;cohere.com", "position": "PhD student;PhD student;PhD student;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nsiddiqui2023metadata,\ntitle={Metadata Archaeology: Unearthing Data Subsets by Leveraging Training Dynamics},\nauthor={Shoaib Ahmed Siddiqui and Nitarshan Rajkumar and Tegan Maharaj and David Krueger and Sara Hooker},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PvLnIaJbt9}\n}", "github": "", "project": "", "reviewers": "KmwB;3RHf;Kiey;o59Z", "pdf_size": 18042150, "recommendation": "5;8;8;8", "confidence": "5;3;4;4", "correctness": "3;3;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "134;43;119;24", "wc_strength_and_weaknesses": "43;97;64;229", "wc_clarity_quality_novelty_and_reproducibility": "4;181;379;29", "wc_summary_review": "4;60;31;25", "wc_review": "185;381;593;307", "wc_reply_reviewers": "0;0;17;0", "wc_reply_authors": "273;554;892;629", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;3;1", "recommendation_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 80.0, 47.281074437876306 ], "wc_strength_and_weaknesses_avg": [ 108.25, 72.32349203405488 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 148.25, 149.45463358491097 ], "wc_summary_review_avg": [ 30.0, 20.0124960961895 ], "wc_review_avg": [ 366.5, 148.31975593291676 ], "wc_reply_reviewers_avg": [ 4.25, 7.361215932167728 ], "wc_reply_authors_avg": [ 587.0, 220.49603171032354 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5930664427346777533&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=PvLnIaJbt9", "email": "cam.ac.uk;cam.ac.uk;polymtl.ca;cam.ac.uk;cohere.com", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "University of Cambridge;Ecole Polytechnique de Montreal;Cohere", "aff_unique_dep": ";;Cohere AI", "aff_unique_url": "https://www.cam.ac.uk;https://www.polymtl.ca;https://cohere.ai", "aff_unique_abbr": "Cambridge;Polytechnique Montreal;Cohere", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Cambridge;Montreal;", "aff_country_unique_index": "0;0;1;0;2", "aff_country_unique": "United Kingdom;Canada;United States" }, { "id": "PvOo1sHKzf", "title": "Counterfactual Memorization in Neural Language Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Modern neural language models widely used in tasks across NLP risk memorizing sensitive information from their training data. As models continue to scale up in parameters, training data, and compute, understanding memorization in language models is both important from a learning-theoretical point of view, and is practically crucial in real world applications. An open question in previous studies of memorization in language models is how to filter out \"common\" memorization. In fact, most memorization criteria strongly correlate with the number of occurrences in the training set, capturing \"common\" memorization such as familiar phrases, public knowledge or templated texts. In this paper, we provide a principled perspective inspired by a taxonomy of human memory in Psychology. From this perspective, we formulate a notion of counterfactual memorization, which characterizes how a model's predictions change if a particular document is omitted during training. We identify and study counterfactually-memorized training examples in standard text datasets. We further estimate the influence of each training example on the validation set and on generated texts, and show that this can provide direct evidence of the source of memorization at test time.", "keywords": "memorization;influence;language models", "primary_area": "", "supplementary_material": "", "author": "Chiyuan Zhang;Daphne Ippolito;Katherine Lee;Matthew Jagielski;Florian Tramer;Nicholas Carlini", "authorids": "~Chiyuan_Zhang1;~Daphne_Ippolito1;~Katherine_Lee1;~Matthew_Jagielski1;~Florian_Tramer1;~Nicholas_Carlini1", "gender": "M;F;F;M;M;", "homepage": "http://pluskid.org;http://www.daphnei.com;https://katelee168.github.io/;https://jagielski.github.io/;http://floriantramer.com;http://nicholas.carlini.com", "dblp": "21/8315;192/2031.html;115/5082.html;218/5156;158/7224;145/1806", "google_scholar": "l_G2vr0AAAAJ;;bjdB4K8AAAAJ;_8rw_GMAAAAJ;https://scholar.google.ch/citations?user=ijH0-a8AAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Chiyuan_Zhang1;~Daphne_Ippolito1;~Katherine_Lee1;~Matthew_Jagielski1;~Florian_Tramer1;~Nicholas_Carlini1", "aff": "Google;Carnegie Mellon University;Cornell University;Google;ETHZ - ETH Zurich;Google", "aff_domain": "google.com;cmu.edu;cornell.edu;google.com;ethz.ch;google.com", "position": "Research Scientist;Assistant Professor;PhD student;Researcher;Assistant Professor;Researcher", "bibtex": "@misc{\nzhang2023counterfactual,\ntitle={Counterfactual Memorization in Neural Language Models},\nauthor={Chiyuan Zhang and Daphne Ippolito and Katherine Lee and Matthew Jagielski and Florian Tramer and Nicholas Carlini},\nyear={2023},\nurl={https://openreview.net/forum?id=PvOo1sHKzf}\n}", "github": "", "project": "", "reviewers": "vz28;oD8E;B2wU;pWr8", "site": "https://openreview.net/forum?id=PvOo1sHKzf", "pdf_size": 4482670, "recommendation": "3;3;3;6", "confidence": "3;4;4;4", "correctness": "3;2;4;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "97;134;79;16", "wc_strength_and_weaknesses": "164;318;347;177", "wc_clarity_quality_novelty_and_reproducibility": "34;3;18;1", "wc_summary_review": "38;64;38;17", "wc_review": "333;519;482;211", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "218;195;265;281", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 81.5, 42.699531613356136 ], "wc_strength_and_weaknesses_avg": [ 251.5, 81.77560761009362 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 14.0, 13.285330255586423 ], "wc_summary_review_avg": [ 39.25, 16.663958113245485 ], "wc_review_avg": [ 386.25, 122.81973579193208 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 239.75, 34.694199803425356 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 172, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11355254777898331315&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2;0;3;0", "aff_unique_norm": "Google;Carnegie Mellon University;Cornell University;ETH Zurich", "aff_unique_dep": "Google;;;", "aff_unique_url": "https://www.google.com;https://www.cmu.edu;https://www.cornell.edu;https://www.ethz.ch", "aff_unique_abbr": "Google;CMU;Cornell;ETHZ", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "United States;Switzerland" }, { "id": "PxFpWq6FNiW", "title": "Prompt-Matched Semantic Segmentation", "track": "main", "status": "Withdraw", "tldr": "We proposed a generic and effective prompt tuning method for semantic segmentation.", "abstract": "The objective of this work is to explore how to effectively and efficiently adapt pre-trained visual foundation models to downstream tasks, e.g., image semantic segmentation. Conventional methods usually fine-tuned the entire networks for each specific dataset, which will be burdensome to store massive parameters of these networks. Several recent works attempted to insert some extra trainable parameters into the frozen networks to learn visual prompts for parameter-efficient tuning. However, these works showed poor generality as they were designed specifically for Transformers. Moreover, using limited information in these schemes, they exhibited a poor capacity to learn effective prompts. To alleviate these issues, we propose a novel Inter-Stage Prompt-Matched Framework for generic and effective visual prompt tuning. Specifically, to ensure generality, we divide the pre-trained backbone with frozen parameters into multiple stages and perform prompt learning between different stages, which makes the proposed scheme applicable to various architectures of CNN and Transformer. For effective tuning, a lightweight Semantic-aware Prompt Matcher (SPM) is designed to progressively learn reasonable prompts with a recurrent mechanism, guided by the rich information of interim semantic maps. Working as a deep matched filter of representation learning, the proposed SPM can well transform the output of the previous stage into a desirable input for the next stage, thus achieving the better matching/stimulating for the pre-trained knowledge. Finally, we apply the proposed method to handle various semantic segmentation tasks. Extensive experiments on five benchmarks show that the proposed scheme can achieve a promising trade-off between parameter efficiency and performance effectiveness.", "keywords": "foundation model;prompt tuning;semantic segmentation;model generality", "primary_area": "", "supplementary_material": "", "author": "Lingbo Liu;Bruce Yu;Jianlong Chang;Qi Tian;Chang Wen Chen", "authorids": "~Lingbo_Liu1;~Bruce_Yu1;~Jianlong_Chang2;~Qi_Tian3;~Chang_Wen_Chen1", "gender": "M;M;M;M;M", "homepage": "http://lingboliu.com/;https://bruceyo.github.io/;https://jianlongchange.github.io/;https://www.qitian1987.com/index.html;https://chenlab.comp.polyu.edu.hk/", "dblp": "20/5299;205/7544;92/2332;78/1467-1.html;29/4638", "google_scholar": "sh2DmQgAAAAJ;o2VAejIAAAAJ;RDwnNsQAAAAJ;https://scholar.google.com/citations?hl=en;w2HXPUUAAAAJ", "orcid": "0000-0001-8179-6685;0000-0001-9905-8154;;0000-0002-7252-5047;0000-0002-6720-234X", "linkedin": ";bruceyo/;;;chang-wen-chen-7b72095/", "or_profile": "~Lingbo_Liu1;~Bruce_Yu1;~Jianlong_Chang2;~Qi_Tian3;~Chang_Wen_Chen1", "aff": "Hong Kong Polytechnic University;The Hong Kong Polytechnic University, Hong Kong Polytechnic University;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Hong Kong Polytechnic University", "aff_domain": "polyu.edu.hk;comp.polyu.edu.hk;huawei.com;huawei.com;polyu.edu.hk", "position": "Researcher;Postdoc;Principal Researcher;Principal Researcher;Full Professor", "bibtex": "@misc{\nliu2023promptmatched,\ntitle={Prompt-Matched Semantic Segmentation},\nauthor={Lingbo Liu and Bruce Yu and Jianlong Chang and Qi Tian and Chang Wen Chen},\nyear={2023},\nurl={https://openreview.net/forum?id=PxFpWq6FNiW}\n}", "github": "", "project": "", "reviewers": "YMRs;eAGd;FPnC;Kaza", "site": "https://openreview.net/forum?id=PxFpWq6FNiW", "pdf_size": 723886, "recommendation": "3;3;5;6", "confidence": "5;3;5;4", "correctness": "3;3;1;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;0;2;0", "wc_summary_paper": "52;47;117;74", "wc_strength_and_weaknesses": "228;157;111;184", "wc_clarity_quality_novelty_and_reproducibility": "18;87;37;96", "wc_summary_review": "18;28;64;54", "wc_review": "316;319;329;408", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.0, 1.0 ], "wc_summary_paper_avg": [ 72.5, 27.62697956708261 ], "wc_strength_and_weaknesses_avg": [ 170.0, 42.45585943070756 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.5, 32.85194058195041 ], "wc_summary_review_avg": [ 41.0, 18.681541692269406 ], "wc_review_avg": [ 343.0, 37.83516882478523 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.17407765595569782, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17164796408694985140&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "Hong Kong Polytechnic University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.polyu.edu.hk;https://www.huawei.com", "aff_unique_abbr": "PolyU;Huawei", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "PxohstFQm9q", "title": "Simplicity bias in $1$-hidden layer neural networks", "track": "main", "status": "Reject", "tldr": "Gradient Descent on 1-hidden-layer neural network learns a function of essentially a lower dimensional projection of the input.", "abstract": "Recent works \\citep{shah2020pitfalls,chen2021intriguing} have demonstrated that neural networks exhibit extreme \\emph{simplicity bias} (SB). That is, they learn \\emph{only the simplest} features to solve a task at hand, even in the presence of other, more robust but more complex features. Due to lack of a general and rigorous definition of \\emph{features}, these works showcase SB on \\emph{semi-synthetic} datasets such as Color-MNIST, MNIST-CIFAR where defining features is relatively easier. \n\nIn this work, we rigorously define as well as thoroughly establish SB for \\emph{one hidden layer} neural networks. More concretely, (i) we define SB as the network essentially being a function of a low dimensional projection of the inputs (ii) theoretically, we show that when the data is linearly separable, the network primarily depends on only the linearly separable ($1$-dimensional) subspace even in the presence of an arbitrarily large number of other, more complex features which could have led to a significantly more robust classifier, (iii) empirically, we show that models trained on \\emph{real} datasets such as Imagenette and Waterbirds-Landbirds indeed depend on a low dimensional projection of the inputs, thereby demonstrating SB on these datasets, iv) finally, we present a natural ensemble approach that encourages diversity in models by training successive models on features not used by earlier models, and demonstrate that it yields models that are significantly more robust to Gaussian noise.", "keywords": "Simplicity Bias;Neural Network;Gradient Descent", "primary_area": "", "supplementary_material": "/attachment/98c14eef8aca6c9a547902e620d1e5add111d428.zip", "author": "Depen Morwani;Praneeth Netrapalli;jatin batra;Karthikeyan Shanmugam;Prateek Jain", "authorids": "~Depen_Morwani1;~Praneeth_Netrapalli1;~jatin_batra1;~Karthikeyan_Shanmugam1;~Prateek_Jain1", "gender": "M;M;;M;M", "homepage": ";http://praneethnetrapalli.org/;;https://sites.google.com/corp/view/karthikeyan-shanmugam/;http://prateekjain.org", "dblp": "277/5200;http://dblp.uni-trier.de/pers/hd/n/Netrapalli:Praneeth;157/6041;;https://dblp.uni-trier.de/pers/j/Jain_0002:Prateek.html", "google_scholar": "vOngxFUAAAAJ;https://scholar.google.co.in/citations?user=mim8FQkAAAAJ;;https://scholar.google.ca/citations?user=m4DyPcUAAAAJ;qYhRbJoAAAAJ", "orcid": ";;;0009-0008-2879-5868;", "linkedin": "depen-morwani-070298122/;;;;", "or_profile": "~Depen_Morwani1;~Praneeth_Netrapalli1;~jatin_batra1;~Karthikeyan_Shanmugam1;~Prateek_Jain1", "aff": "Harvard University, Harvard University;Google;Tata institute of fundamental research, Mumbai;Google Research;Google", "aff_domain": "g.harvard.edu;google.com;tifr.res.in;google.com;google.com", "position": "PhD student;Research Scientist;Assistant Professor;Researcher;Researcher", "bibtex": "@misc{\nmorwani2023simplicity,\ntitle={Simplicity bias in \\$1\\$-hidden layer neural networks},\nauthor={Depen Morwani and Praneeth Netrapalli and jatin batra and Karthikeyan Shanmugam and Prateek Jain},\nyear={2023},\nurl={https://openreview.net/forum?id=PxohstFQm9q}\n}", "github": "", "project": "", "reviewers": "iV59;Hiqj;EewH;3SUe", "site": "https://openreview.net/forum?id=PxohstFQm9q", "pdf_size": 1774253, "recommendation": "5;5;6;8", "confidence": "3;4;4;4", "correctness": "3;4;3;3", "technical_novelty": "3;2;2;4", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "60;80;55;254", "wc_strength_and_weaknesses": "150;155;296;91", "wc_clarity_quality_novelty_and_reproducibility": "137;22;18;41", "wc_summary_review": "68;50;70;21", "wc_review": "415;307;439;407", "wc_reply_reviewers": "211;0;27;12", "wc_reply_authors": "1070;289;405;328", "reply_reviewers": "2;0;1;1", "reply_authors": "5;2;2;3", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 112.25, 82.37224957472024 ], "wc_strength_and_weaknesses_avg": [ 173.0, 75.34255105848221 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.5, 48.41745553000488 ], "wc_summary_review_avg": [ 52.25, 19.651653874419832 ], "wc_review_avg": [ 392.0, 50.46781152378217 ], "wc_reply_reviewers_avg": [ 62.5, 86.2684762818957 ], "wc_reply_authors_avg": [ 523.0, 318.5569022953356 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.4714045207910316, "corr_recommendation_correctness": -0.4714045207910316, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9122465623094215232&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Harvard University;Google;Tata Institute of Fundamental Research", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.harvard.edu;https://www.google.com;https://www.tifr.res.in", "aff_unique_abbr": "Harvard;Google;TIFR", "aff_campus_unique_index": "1;2;1;1", "aff_campus_unique": ";Mountain View;Mumbai", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;India" }, { "id": "PxwqKdOshWI", "title": "Concentric Ring Loss for Face Forgery Detection", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Due to growing societal concerns about indistinguishable deepfake images, face forgery detection has received an increasing amount of interest in computer vision. Since the differences between actual and fake images are frequently small, improving the discriminative ability of learnt features is one of the primary problems in deepfake detection. In this paper, we propose a novel Concentric Ring Loss (CRL) to encourage the model to learn intra-class compressed and inter-class separated features. Specifically, we independently add margin penalties in angular and Euclidean space to force a more significant margin between real and fake images, and hence encourage better discriminating performance. Compared to softmax loss, CRL explicitly encourages intra-class compactness and inter-class separability. Moreover, a frequency-aware feature learning module is proposed to exploit high-frequency features and further improve the generalization ability of the model. Extensive experiments demonstrate the superiority of our methods over different datasets. We show that CRL consistently outperforms the state-of-the-art by a large margin.", "keywords": "face forgery detection;metric learning", "primary_area": "", "supplementary_material": "/attachment/fedea6c3a87e0db4b504db9fc908bf2cf93af847.zip", "author": "Yu Yin;Yue Bai;Yun Fu", "authorids": "~Yu_Yin2;~Yue_Bai1;~Yun_Fu1", "gender": "F;M;M", "homepage": "https://yin-yu.github.io/;https://yueb17.github.io/;http://www1.ece.neu.edu/~yunfu/", "dblp": ";119/0848;00/5815-1", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=h-JEcQ8AAAAJ", "orcid": "0000-0002-9588-5854;;0000-0002-5098-2853", "linkedin": ";;furaymond/", "or_profile": "~Yu_Yin2;~Yue_Bai1;~Yun_Fu1", "aff": "Northeastern University;Northeastern University;Northeastern University", "aff_domain": "neu.edu;neu.edu;northeastern.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@misc{\nyin2023concentric,\ntitle={Concentric Ring Loss for Face Forgery Detection},\nauthor={Yu Yin and Yue Bai and Yun Fu},\nyear={2023},\nurl={https://openreview.net/forum?id=PxwqKdOshWI}\n}", "github": "", "project": "", "reviewers": "VSNB;Gvj7;AkCP", "site": "https://openreview.net/forum?id=PxwqKdOshWI", "pdf_size": 831719, "recommendation": "3;5;6", "confidence": "4;5;4", "correctness": "3;2;4", "technical_novelty": "1;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "20;69;79", "wc_strength_and_weaknesses": "148;500;142", "wc_clarity_quality_novelty_and_reproducibility": "70;39;38", "wc_summary_review": "18;67;29", "wc_review": "256;675;288", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 56.0, 25.78113005022601 ], "wc_strength_and_weaknesses_avg": [ 263.3333333333333, 167.36653057154393 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.0, 14.854853303438128 ], "wc_summary_review_avg": [ 38.0, 20.992061991778385 ], "wc_review_avg": [ 406.3333333333333, 190.42467160416885 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.18898223650461357, "corr_recommendation_correctness": 0.3273268353539886, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13157097544535372280&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Proximal Operators to Discover Multiple Optima", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11909", "id": "PzBGIu-llo7", "poster": "/media/PosterPDFs/ICLR%202023/11909.png?t=1682277536.3191252", "openreview": "https://openreview.net/forum?id=PzBGIu-llo7", "slides": "https://iclr.cc/virtual/2023/poster/11909", "video": "https://iclr.cc/virtual/2023/poster/11909", "author_site": "Lingxiao Li, Noam Aigerman, Vladimir Kim, Jiajin Li, Kristjan Greenewald, Mikhail Yurochkin, Justin Solomon", "tldr": "", "abstract": "Finding multiple solutions of non-convex optimization problems is a ubiquitous yet challenging task. Most past algorithms either apply single-solution optimization methods from multiple random initial guesses or search in the vicinity of found solutions using ad hoc heuristics. We present an end-to-end method to learn the proximal operator of a family of training problems so that multiple local minima can be quickly obtained from initial guesses by iterating the learned operator, emulating the proximal-point algorithm that has fast convergence. The learned proximal operator can be further generalized to recover multiple optima for unseen problems at test time, enabling applications such as object detection. The key ingredient in our formulation is a proximal regularization term, which elevates the convexity of our training loss: by applying recent theoretical results, we show that for weakly-convex objectives with Lipschitz gradients, training of the proximal operator converges globally with a practical degree of over-parameterization. We further present an exhaustive benchmark for multi-solution optimization to demonstrate the effectiveness of our method.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Lingxiao Li;Noam Aigerman;Vladimir Kim;Jiajin Li;Kristjan Greenewald;Mikhail Yurochkin;Justin Solomon", "authorids": "~Lingxiao_Li1;~Noam_Aigerman1;~Vladimir_Kim1;~Jiajin_Li2;~Kristjan_Greenewald1;~Mikhail_Yurochkin1;~Justin_Solomon1", "gender": "M;;M;F;;M;M", "homepage": "http://people.csail.mit.edu/lingxiao/;;http://vova.kim;https://gerrili1996.github.io/;https://researcher.watson.ibm.com/researcher/view.php?person=ibm-Kristjan.H.Greenewald;https://moonfolk.github.io/;http://people.csail.mit.edu/jsolomon/", "dblp": ";;27/8698;;146/0563;191/6719;80/5094", "google_scholar": ";;5S1kGcAAAAAJ;;L3zNUG4AAAAJ;QjBF9sUAAAAJ;pImSVwoAAAAJ", "orcid": ";;;;;;0000-0002-7701-7586", "linkedin": ";;;;;mikhail-yurochkin-a45659114/;justin-solomon-8a587914/", "or_profile": "~Lingxiao_Li1;~Noam_Aigerman1;~Vladimir_Kim1;~Jiajin_Li2;~Kristjan_Greenewald1;~Mikhail_Yurochkin1;~Justin_Solomon1", "aff": "Massachusetts Institute of Technology;;Adobe Systems;Stanford University;MIT-IBM Watson AI Lab, IBM Research;IBM Research;Massachusetts Institute of Technology", "aff_domain": "mit.edu;;adobe.com;stanford.edu;ibm.com;ibm.com;mit.edu", "position": "PhD student;;Research Scientist;Postdoc;Research Scientist;Researcher;Associate Professor", "bibtex": "@inproceedings{\nli2023learning,\ntitle={Learning Proximal Operators to Discover Multiple Optima},\nauthor={Lingxiao Li and Noam Aigerman and Vladimir Kim and Jiajin Li and Kristjan Greenewald and Mikhail Yurochkin and Justin Solomon},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=PzBGIu-llo7}\n}", "github": "", "project": "", "reviewers": "HUdf;T3qd;4Grh", "pdf_size": 15862877, "recommendation": "5;8;8", "confidence": "2;3;3", "correctness": "3;4;4", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "30;22;266", "wc_strength_and_weaknesses": "314;44;721", "wc_clarity_quality_novelty_and_reproducibility": "4;129;34", "wc_summary_review": "25;35;91", "wc_review": "373;230;1112", "wc_reply_reviewers": "0;0;328", "wc_reply_authors": "1140;625;1392", "reply_reviewers": "0;0;2", "reply_authors": "4;2;4", "recommendation_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 106.0, 113.18421562508911 ], "wc_strength_and_weaknesses_avg": [ 359.6666666666667, 278.2640632365036 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.666666666666664, 53.28122454381927 ], "wc_summary_review_avg": [ 50.333333333333336, 29.044027881055953 ], "wc_review_avg": [ 571.6666666666666, 386.5077259541162 ], "wc_reply_reviewers_avg": [ 109.33333333333333, 154.6206828194584 ], "wc_reply_authors_avg": [ 1052.3333333333333, 319.2035226761899 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 3.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2741005343281217553&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=PzBGIu-llo7", "email": "mit.edu;;adobe.com;stanford.edu;ibm.com;ibm.com;mit.edu", "author_num": 7, "aff_unique_index": "0;1;2;3;3;0", "aff_unique_norm": "Massachusetts Institute of Technology;Adobe;Stanford University;IBM", "aff_unique_dep": ";Adobe Systems Incorporated;;AI Lab", "aff_unique_url": "https://web.mit.edu;https://www.adobe.com;https://www.stanford.edu;https://www.ibmwatsonai.org/", "aff_unique_abbr": "MIT;Adobe;Stanford;MIT-IBM AI Lab", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Neural Groundplans: Persistent Neural Scene Representations from a Single Image", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12181", "id": "Pza24zf9FpS", "poster": "", "openreview": "https://openreview.net/forum?id=Pza24zf9FpS", "slides": "https://iclr.cc/virtual/2023/poster/12181", "video": "https://iclr.cc/virtual/2023/poster/12181", "author_site": "Prafull Sharma, Ayush Tewari, Yilun Du, Sergey Zakharov, Rares Ambrus, Adrien Gaidon, William Freeman, Fredo Durand, Joshua B Tenenbaum, Vincent Sitzmann", "tldr": "We train a self-supervised model that learns to map a single image to a 3D representation of the scene, with separate components for the immovable and movable 3D regions.", "abstract": "We present a method to map 2D image observations of a scene to a persistent 3D scene representation, enabling novel view synthesis and disentangled representation of the movable and immovable components of the scene. Motivated by the bird\u2019s-eye-view (BEV) representation commonly used in vision and robotics, we propose conditional neural groundplans, ground-aligned 2D feature grids, as persistent and memory-efficient scene representations. Our method is trained self-supervised from unlabeled multi-view observations using differentiable rendering, and learns to complete geometry and appearance of occluded regions. In addition, we show that we can leverage multi-view videos at training time to learn to separately reconstruct static and movable components of the scene from a single image at test time. The ability to separately reconstruct movable objects enables a variety of downstream tasks using simple heuristics, such as extraction of object-centric 3D representations, novel view synthesis, instance-level segmentation, 3D bounding box prediction, and scene editing. This highlights the value of neural groundplans as a backbone for efficient 3D scene understanding models.", "keywords": "Neural scene representations;3D;nerf;scene understanding;neural rendering;object-centric representations", "primary_area": "", "supplementary_material": "/attachment/f67c6b5c6a68c1d794b5510929e3af944704caca.zip", "author": "Prafull Sharma;Ayush Tewari;Yilun Du;Sergey Zakharov;Rares Andrei Ambrus;Adrien Gaidon;William T. Freeman;Fredo Durand;Joshua B. Tenenbaum;Vincent Sitzmann", "authorids": "~Prafull_Sharma1;~Ayush_Tewari2;~Yilun_Du1;~Sergey_Zakharov1;~Rares_Andrei_Ambrus1;~Adrien_Gaidon1;~William_T._Freeman1;~Fredo_Durand1;~Joshua_B._Tenenbaum1;~Vincent_Sitzmann1", "gender": "M;;;M;M;;M;M;;M", "homepage": "https://prafullsharma.net;https://ayushtewari.com;https://yilundu.github.io;https://zakharos.github.io/;http://www.csc.kth.se/~raambrus/;https://adriengaidon.com/;https://billf.mit.edu/;http://people.csail.mit.edu/fredo/;;https://vsitzmann.github.io", "dblp": "224/2474;198/1021;204/4379;195/5832;25/76;06/7548.html;86/6650;87/2617;t/JoshuaBTenenbaum;192/1958", "google_scholar": "IUM1vGoAAAAJ;pDnzpeoAAAAJ;;https://scholar.google.de/citations?user=3DK3I-8AAAAJ;2xjjS3oAAAAJ;https://scholar.google.fr/citations?user=2StUgf4AAAAJ;https://scholar.google.com.tw/citations?user=0zZnyMEAAAAJ;https://scholar.google.com.tw/citations?user=NJ9c4ygAAAAJ;;X44QVV4AAAAJ", "orcid": ";;;;0000-0002-3111-3812;;;0000-0001-9919-069X;;0000-0002-0107-5704", "linkedin": ";;;;rare%C8%99-ambru%C8%99-b04812125/;adrien-gaidon-63ab2358/;;;;vincentsitzmann/", "or_profile": "~Prafull_Sharma1;~Ayush_Tewari2;~Yilun_Du1;~Sergey_Zakharov1;~Rares_Andrei_Ambrus1;~Adrien_Gaidon1;~William_T._Freeman1;~Fredo_Durand1;~Joshua_B._Tenenbaum1;~Vincent_Sitzmann1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Toyota Research Institute;Toyota Research Institute;Toyota Research Institute (TRI);Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;tri.global;tri.global;tri.global;mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;Postdoc;PhD student;Researcher;Researcher;Head of ML;Professor;Full Professor;Professor;Assistant Professor", "bibtex": "@inproceedings{\nsharma2023neural,\ntitle={Neural Groundplans: Persistent Neural Scene Representations from a Single Image},\nauthor={Prafull Sharma and Ayush Tewari and Yilun Du and Sergey Zakharov and Rares Andrei Ambrus and Adrien Gaidon and William T. Freeman and Fredo Durand and Joshua B. Tenenbaum and Vincent Sitzmann},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Pza24zf9FpS}\n}", "github": "", "project": "", "reviewers": "5tSi;eeHh;U5HL;KVRy", "pdf_size": 32149994, "recommendation": "6;6;6;6", "confidence": "3;4;4;3", "correctness": "4;3;4;3", "technical_novelty": "4;3;3;3", "empirical_novelty": "4;3;3;0", "wc_summary_paper": "132;79;58;94", "wc_strength_and_weaknesses": "596;385;352;332", "wc_clarity_quality_novelty_and_reproducibility": "110;62;113;17", "wc_summary_review": "66;63;70;28", "wc_review": "904;589;593;471", "wc_reply_reviewers": "0;32;60;0", "wc_reply_authors": "723;482;1083;787", "reply_reviewers": "0;1;1;0", "reply_authors": "3;2;3;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 1.5 ], "wc_summary_paper_avg": [ 90.75, 27.03123193641015 ], "wc_strength_and_weaknesses_avg": [ 416.25, 105.49022466560587 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 75.5, 39.37321424522006 ], "wc_summary_review_avg": [ 56.75, 16.78354849249705 ], "wc_review_avg": [ 639.25, 160.5184958190177 ], "wc_reply_reviewers_avg": [ 23.0, 25.03996805109783 ], "wc_reply_authors_avg": [ 768.75, 214.1289039340556 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4089945169608691083&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=Pza24zf9FpS", "email": "mit.edu;mit.edu;mit.edu;tri.global;tri.global;tri.global;mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 10, "aff_unique_index": "0;0;0;1;1;1;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Toyota Research Institute", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.tri.global", "aff_unique_abbr": "MIT;TRI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "PzbYN5d76a", "title": "Inducing Meaningful Units from Character Sequences with Dynamic Capacity Slot Attention", "track": "main", "status": "Reject", "tldr": "We propose an unsupervised method to learn the abstract meaning-bearing units in a sequence of characters with Dynamic Capacity Slot Attention. ", "abstract": "Characters do not convey meaning, but sequences of characters do. We propose an unsupervised distributional method to learn the abstract meaning-bearing units in a sequence of characters. Rather than segmenting the sequence, our Dynamic Capacity Slot Attention model discovers continuous representations of the \\textit{objects} in the sequence, extending an architecture for object discovery in images. We train our model on different languages and evaluate the quality of the obtained representations with forward and reverse probing classifiers. These experiments show that our model succeeds in discovering units which are similar to those proposed previously in form, content and level of abstraction, and which show promise for capturing meaningful information at a higher level of abstraction.", "keywords": "Unsupervised representation learning;Morphology induction;Deep learning", "primary_area": "", "supplementary_material": "", "author": "Melika Behjati;James Henderson", "authorids": "~Melika_Behjati1;~James_Henderson1", "gender": "F;M", "homepage": "https://www.idiap.ch/~mbehjati/;http://idiap.ch/~jhenderson/", "dblp": "243/6574;h/JamesHenderson.html", "google_scholar": "02sCdLEAAAAJ;CSib0ooAAAAJ", "orcid": ";0000-0003-3714-4799", "linkedin": ";james-henderson-3b68346b/", "or_profile": "~Melika_Behjati1;~James_Henderson1", "aff": "Idiap Research Institute;Idiap Research Institute", "aff_domain": "idiap.ch;idiap.ch", "position": "PhD student;Senior Researcher", "bibtex": "@misc{\nbehjati2023inducing,\ntitle={Inducing Meaningful Units from Character Sequences with Dynamic Capacity Slot Attention},\nauthor={Melika Behjati and James Henderson},\nyear={2023},\nurl={https://openreview.net/forum?id=PzbYN5d76a}\n}", "github": "", "project": "", "reviewers": "aQFF;nzdu;MN1D;RoTY", "site": "https://openreview.net/forum?id=PzbYN5d76a", "pdf_size": 1783954, "recommendation": "3;5;5;6", "confidence": "4;3;3;4", "correctness": "2;2;3;3", "technical_novelty": "3;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "97;115;129;134", "wc_strength_and_weaknesses": "1174;182;214;238", "wc_clarity_quality_novelty_and_reproducibility": "93;26;20;28", "wc_summary_review": "65;61;35;39", "wc_review": "1429;384;398;439", "wc_reply_reviewers": "0;0;364;0", "wc_reply_authors": "1153;243;261;260", "reply_reviewers": "0;0;1;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 118.75, 14.359230480774379 ], "wc_strength_and_weaknesses_avg": [ 452.0, 417.3200210869351 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.75, 29.73529048117741 ], "wc_summary_review_avg": [ 50.0, 13.152946437965905 ], "wc_review_avg": [ 662.5, 443.000282166953 ], "wc_reply_reviewers_avg": [ 91.0, 157.61662348876783 ], "wc_reply_authors_avg": [ 479.25, 389.0555069652658 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.2294157338705618, "corr_recommendation_correctness": 0.6882472016116854, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8821239013706930821&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0", "aff_unique_norm": "Idiap Research Institute", "aff_unique_dep": "", "aff_unique_url": "https://www.idiap.ch", "aff_unique_abbr": "Idiap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Sign and Basis Invariant Networks for Spectral Graph Representation Learning", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11795", "id": "Q-UHqMorzil", "poster": "", "openreview": "https://openreview.net/forum?id=Q-UHqMorzil", "slides": "https://iclr.cc/virtual/2023/poster/11795", "video": "https://iclr.cc/virtual/2023/poster/11795", "author_site": "Derek Lim, Joshua Robinson, Lingxiao Zhao, Tess Smidt, Suvrit Sra, Haggai Maron, Stefanie Jegelka", "tldr": "We develop neural networks invariant to the symmetries of eigenvectors, which are theoretically expressive and empirically improve performance in geometric learning tasks.", "abstract": "We introduce SignNet and BasisNet---new neural architectures that are invariant to two key symmetries displayed by eigenvectors: (i) sign flips, since if v is an eigenvector then so is -v; and (ii) more general basis symmetries, which occur in higher dimensional eigenspaces with infinitely many choices of basis eigenvectors. We prove that under certain conditions our networks are universal, i.e., they can approximate any continuous function of eigenvectors with the desired invariances. When used with Laplacian eigenvectors, our networks are provably more expressive than existing spectral methods on graphs; for instance, they subsume all spectral graph convolutions, certain spectral graph invariants, and previously proposed graph positional encodings as special cases. Experiments show that our networks significantly outperform existing baselines on molecular graph regression, learning expressive graph representations, and learning neural fields on triangle meshes. Our code is available at https://github.com/cptq/SignNet-BasisNet.", "keywords": "Invariance;Equivariance;Eigenvectors;Spectral;Neural Networks", "primary_area": "", "supplementary_material": "/attachment/3bb05ffaf6de6b62042e3e858646679f5cb7e2af.zip", "author": "Derek Lim;Joshua David Robinson;Lingxiao Zhao;Tess Smidt;Suvrit Sra;Haggai Maron;Stefanie Jegelka", "authorids": "~Derek_Lim1;~Joshua_David_Robinson1;~Lingxiao_Zhao1;~Tess_Smidt1;~Suvrit_Sra1;~Haggai_Maron1;~Stefanie_Jegelka3", "gender": "M;M;M;F;;M;F", "homepage": "https://cptq.github.io/;https://joshrobinson.mit.edu/;http://lingxiaozhao.com/;https://blondegeek.github.io/;https://optml.mit.edu;https://haggaim.github.io/;http://people.csail.mit.edu/stefje/", "dblp": "267/5433;15/4759;;215/4978.html;90/930;181/6629;38/7003", "google_scholar": "y9YTBIsAAAAJ;E02doCkAAAAJ;QKslW6EAAAAJ;;eyCw9goAAAAJ;https://scholar.google.co.il/citations?user=4v8uJrIAAAAJ;gTWUZlsAAAAJ", "orcid": ";;;0000-0001-5581-5344;;;", "linkedin": ";;;;;;", "or_profile": "~Derek_Lim1;~Joshua_David_Robinson1;~Lingxiao_Zhao1;~Tess_Smidt1;~Suvrit_Sra1;~Haggai_Maron1;~Stefanie_Jegelka3", "aff": "NVIDIA;Massachusetts Institute of Technology;Carnegie Mellon University;Massachusetts Institute of Technology;Massachusetts Institute of Technology;NVIDIA;Massachusetts Institute of Technology", "aff_domain": "nvidia.com;mit.edu;andrew.cmu.edu;mit.edu;mit.edu;nvidia.com;mit.edu", "position": "Intern;PhD student;PhD student;Assistant Professor;Associate Professor;Research Scientist;Associate Professor", "bibtex": "@inproceedings{\nlim2023sign,\ntitle={Sign and Basis Invariant Networks for Spectral Graph Representation Learning},\nauthor={Derek Lim and Joshua David Robinson and Lingxiao Zhao and Tess Smidt and Suvrit Sra and Haggai Maron and Stefanie Jegelka},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Q-UHqMorzil}\n}", "github": "", "project": "", "reviewers": "cG9j;a8ck;aMn3;BB19", "pdf_size": 15230402, "recommendation": "8;8;8;8", "confidence": "4;3;4;2", "correctness": "4;3;4;3", "technical_novelty": "4;3;4;3", "empirical_novelty": "4;3;4;3", "wc_summary_paper": "74;116;47;73", "wc_strength_and_weaknesses": "119;88;157;14", "wc_clarity_quality_novelty_and_reproducibility": "25;22;10;8", "wc_summary_review": "28;137;16;19", "wc_review": "246;363;230;114", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "311;586;262;47", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 77.5, 24.72347063015223 ], "wc_strength_and_weaknesses_avg": [ 94.5, 52.50952294584288 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 16.25, 7.361215932167728 ], "wc_summary_review_avg": [ 50.0, 50.42320894191484 ], "wc_review_avg": [ 238.25, 88.21670760122484 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 301.5, 191.9381410767542 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 193, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18399745378595628454&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=Q-UHqMorzil", "email": "nvidia.com;mit.edu;andrew.cmu.edu;mit.edu;mit.edu;nvidia.com;mit.edu", "author_num": 7, "aff_unique_index": "0;1;2;1;1;0;1", "aff_unique_norm": "NVIDIA;Massachusetts Institute of Technology;Carnegie Mellon University", "aff_unique_dep": "NVIDIA Corporation;;", "aff_unique_url": "https://www.nvidia.com;https://web.mit.edu;https://www.cmu.edu", "aff_unique_abbr": "NVIDIA;MIT;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Re-weighting Based Group Fairness Regularization via Classwise Robust Optimization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11647", "id": "Q-WfHzmiG9m", "poster": "/media/PosterPDFs/ICLR%202023/11647.png?t=1682950431.6709046", "openreview": "https://openreview.net/forum?id=Q-WfHzmiG9m", "slides": "https://iclr.cc/virtual/2023/poster/11647", "video": "https://iclr.cc/virtual/2023/poster/11647", "author_site": "Sangwon Jung, Taeeon Park, Sanghyuk Chun, Taesup Moon", "tldr": "", "abstract": "Many existing group fairness-aware training methods aim to achieve the group fairness by either re-weighting underrepresented groups based on certain rules or using weakly approximated surrogates for the fairness metrics in the objective as regularization terms. Although each of the learning schemes has its own strength in terms of applicability or performance, respectively, it is difficult for any method in the either category to be considered as a gold standard since their successful performances are typically limited to specific cases. To that end, we propose a principled method, dubbed as FairDRO, which unifies the two learning schemes by incorporating a well-justified group fairness metric into the training objective using a classwise distributionally robust optimization (DRO) framework. We then develop an iterative optimization algorithm that minimizes the resulting objective by automatically producing the correct re-weights for each group. Our experiments show that FairDRO is scalable and easily adaptable to diverse applications, and consistently achieves the state-of-the-art performance on several benchmark datasets in terms of the accuracy-fairness trade-off, compared to recent strong baselines. ", "keywords": "Group Fairness;DRO", "primary_area": "", "supplementary_material": "/attachment/a2b2791cac2303a09e36f7b9f9cdd771acd89e64.zip", "author": "Sangwon Jung;Taeeon Park;Sanghyuk Chun;Taesup Moon", "authorids": "~Sangwon_Jung1;~Taeeon_Park1;~Sanghyuk_Chun1;~Taesup_Moon1", "gender": "M;M;M;", "homepage": "https://successful-humor-4db.notion.site/Sangwon-Jung-70109a49767a470092a6ee0d02c78313;https://sites.google.com/view/taeeon;https://sanghyukchun.github.io/home/;https://mindlab-snu.github.io/people/pi/", "dblp": "236/3698;241/9857;213/1095.html;05/4084", "google_scholar": "WdC_a5IAAAAJ;HcXNOecAAAAJ;https://scholar.google.co.kr/citations?user=4_uj0xcAAAAJ;lQlioBoAAAAJ", "orcid": ";;0000-0002-4533-2610;0000-0002-9257-6503", "linkedin": ";;https://kr.linkedin.com/in/sanghyukchun/en;", "or_profile": "~Sangwon_Jung1;~Taeeon_Park1;~Sanghyuk_Chun1;~Taesup_Moon1", "aff": "Seoul National University;Seoul National University;NAVER AI Lab;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;navercorp.com;snu.ac.kr", "position": "PhD student;PhD student;Lead research scientist;Associate Professor", "bibtex": "@inproceedings{\njung2023reweighting,\ntitle={Re-weighting Based Group Fairness Regularization via Classwise Robust Optimization},\nauthor={Sangwon Jung and Taeeon Park and Sanghyuk Chun and Taesup Moon},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Q-WfHzmiG9m}\n}", "github": "", "project": "", "reviewers": "cPS3;U6MV;QUdb;uGcU", "pdf_size": 600611, "recommendation": "5;6;6;6", "confidence": "3;3;3;4", "correctness": "3;4;2;4", "technical_novelty": "3;3;3;2", "empirical_novelty": "1;3;2;3", "wc_summary_paper": "63;91;111;147", "wc_strength_and_weaknesses": "685;429;47;191", "wc_clarity_quality_novelty_and_reproducibility": "29;83;437;87", "wc_summary_review": "128;27;66;156", "wc_review": "905;630;661;581", "wc_reply_reviewers": "273;0;0;0", "wc_reply_authors": "1340;728;880;748", "reply_reviewers": "1;0;0;0", "reply_authors": "3;1;2;1", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 103.0, 30.59411708155671 ], "wc_strength_and_weaknesses_avg": [ 338.0, 242.3736784388932 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 159.0, 162.1295778073822 ], "wc_summary_review_avg": [ 94.25, 50.67728781219453 ], "wc_review_avg": [ 694.25, 124.97474744923471 ], "wc_reply_reviewers_avg": [ 68.25, 118.21246761657588 ], "wc_reply_authors_avg": [ 924.0, 247.1760506197961 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.17407765595569782, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8883047709059010858&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=Q-WfHzmiG9m", "email": "snu.ac.kr;snu.ac.kr;navercorp.com;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Seoul National University;NAVER Corporation", "aff_unique_dep": ";NAVER AI Lab", "aff_unique_url": "https://www.snu.ac.kr;https://www.naver.com", "aff_unique_abbr": "SNU;NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Order Matters: Agent-by-agent Policy Optimization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11076", "id": "Q-neeWNVv1", "poster": "/media/PosterPDFs/ICLR%202023/11076.png?t=1681031883.7738068", "openreview": "https://openreview.net/forum?id=Q-neeWNVv1", "slides": "https://iclr.cc/virtual/2023/poster/11076", "video": "https://iclr.cc/virtual/2023/poster/11076", "author_site": "Xihuai Wang, Zheng Tian, Ziyu Wan, Ying Wen, Jun Wang, Weinan Zhang", "tldr": "", "abstract": "While multi-agent trust region algorithms have achieved great success empirically in solving coordination tasks, most of them, however, suffer from a non-stationarity problem since agents update their policies simultaneously. In contrast, a sequential scheme that updates policies agent-by-agent provides another perspective and shows strong performance. However, sample inefficiency and lack of monotonic improvement guarantees for each agent are still the two significant challenges for the sequential scheme. In this paper, we propose the \\textbf{A}gent-by-\\textbf{a}gent \\textbf{P}olicy \\textbf{O}ptimization (A2PO) algorithm to improve the sample efficiency and retain the guarantees of monotonic improvement for each agent during training. We justify the tightness of the monotonic improvement bound compared with other trust region algorithms. From the perspective of sequentially updating agents, we further consider the effect of agent updating order and extend the theory of non-stationarity into the sequential update scheme. To evaluate A2PO, we conduct a comprehensive empirical study on four benchmarks: StarCraftII, Multi-agent MuJoCo, Multi-agent Particle Environment, and Google Research Football full game scenarios. A2PO consistently outperforms strong baselines.", "keywords": "Multi-agent Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Xihuai Wang;Zheng Tian;Ziyu Wan;Ying Wen;Jun Wang;Weinan Zhang", "authorids": "~Xihuai_Wang1;~Zheng_Tian1;~Ziyu_Wan2;~Ying_Wen1;~Jun_Wang2;~Weinan_Zhang1", "gender": "M;M;M;M;M;M", "homepage": "https://xihuai18.github.io/;;https://github.com/ziyuwan;https://yingwen.io;http://www0.cs.ucl.ac.uk/staff/jun.wang/;http://wnzhang.net", "dblp": "79/6482;17/2752-2.html;;41/4203-1;w/JunWang12;28/10261-1", "google_scholar": "hy6v3qUAAAAJ;;VEtZ7gYAAAAJ;_A1CxG8AAAAJ;https://scholar.google.co.uk/citations?user=wIE1tY4AAAAJ;Qzss0GEAAAAJ", "orcid": ";;;0000-0003-1247-2382;;0000-0002-0127-2425", "linkedin": ";;;wenying45;;", "or_profile": "~Xihuai_Wang1;~Zheng_Tian1;~Ziyu_Wan2;~Ying_Wen1;~Jun_Wang2;~Weinan_Zhang1", "aff": "Shanghai Jiaotong University;ShanghaiTech University;Shanghai Jiaotong University;Shanghai Jiaotong University;University College London;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;shanghaitech.edu.cn;sjtu.edu.cn;sjtu.edu.cn;ucl.ac.uk;sjtu.edu.cn", "position": "PhD student;Assistant Professor;PhD student;Assistant Professor;Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2023order,\ntitle={Order Matters: Agent-by-agent Policy Optimization},\nauthor={Xihuai Wang and Zheng Tian and Ziyu Wan and Ying Wen and Jun Wang and Weinan Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Q-neeWNVv1}\n}", "github": "", "project": "", "reviewers": "GdLP;7L45;GN3i;N8aU;ZzqH", "pdf_size": 4725234, "recommendation": "5;6;6;8;8", "confidence": "5;4;4;3;3", "correctness": "1;3;3;3;4", "technical_novelty": "2;2;3;3;3", "empirical_novelty": "2;3;3;3;4", "wc_summary_paper": "23;64;96;48;105", "wc_strength_and_weaknesses": "154;157;267;257;388", "wc_clarity_quality_novelty_and_reproducibility": "9;12;24;18;81", "wc_summary_review": "20;44;13;183;59", "wc_review": "206;277;400;506;633", "wc_reply_reviewers": "0;0;19;25;12", "wc_reply_authors": "1906;874;672;1186;442", "reply_reviewers": "0;0;1;1;1", "reply_authors": "4;2;1;2;1", "recommendation_avg": [ 6.6, 1.2 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "correctness_avg": [ 2.8, 0.9797958971132712 ], "technical_novelty_avg": [ 2.6, 0.4898979485566356 ], "empirical_novelty_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_paper_avg": [ 67.2, 30.30115509349437 ], "wc_strength_and_weaknesses_avg": [ 244.6, 86.14081494854805 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.8, 26.603759132874437 ], "wc_summary_review_avg": [ 63.8, 61.84302709279358 ], "wc_review_avg": [ 404.4, 153.74862601012083 ], "wc_reply_reviewers_avg": [ 11.2, 10.02796090937734 ], "wc_reply_authors_avg": [ 1016.0, 507.7353641415969 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.9799578870122229, "corr_recommendation_correctness": 0.7824758900557374, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11880653825360741708&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=Q-neeWNVv1", "email": "sjtu.edu.cn;shanghaitech.edu.cn;sjtu.edu.cn;sjtu.edu.cn;ucl.ac.uk;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "Shanghai Jiao Tong University;ShanghaiTech University;University College London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.shanghaitech.edu.cn;https://www.ucl.ac.uk", "aff_unique_abbr": "SJTU;ShanghaiTech;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;United Kingdom" }, { "id": "Q0XkE_srKnG", "title": "Learning from Labeled Images and Unlabeled Videos for Video Segmentation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Performance on video object segmentation still lags behind that of image segmentation due to a paucity of labeled videos. Annotations are time-consuming and laborious to collect, and may not be feasibly obtained in certain situations. However there is a growing amount of freely available unlabeled video data which has spurred interest in unsupervised video representation learning. In this work we focus on the setting in which there is no/little access to labeled videos for video object segmentation. To this end we leverage large-scale image segmentation datasets and adversarial learning to train 2D/3D networks for video object segmentation. We first motivate the treatment of images and videos as two separate domains by analyzing the performance gap of an image segmentation network trained on images and applied to videos. Through studies using several image and video segmentation datasets, we show how an adversarial loss placed at various locations within the network can make feature representations invariant to these domains and improve the performance when the network has access to only labeled images and unlabeled videos. To prevent the loss of discriminative semantic class information we apply our adversarial loss within clusters of features and show this boosts our method's performance within Transformer-based models.", "keywords": "Video;Segmentation;Representation", "primary_area": "", "supplementary_material": "", "author": "Cristina Mata;Michael S Ryoo", "authorids": "~Cristina_Mata1;~Michael_S_Ryoo1", "gender": ";M", "homepage": "https://www.cs.stonybrook.edu/;http://michaelryoo.com/", "dblp": "205/5898;r/MichaelSRyoo", "google_scholar": ";vcw0TJIAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Cristina_Mata1;~Michael_S_Ryoo1", "aff": "State University of New York, Stony Brook;Google DeepMind", "aff_domain": "stonybrook.edu;google.com", "position": "PhD student;Research Scientist", "bibtex": "@misc{\nmata2023learning,\ntitle={Learning from Labeled Images and Unlabeled Videos for Video Segmentation},\nauthor={Cristina Mata and Michael S Ryoo},\nyear={2023},\nurl={https://openreview.net/forum?id=Q0XkE_srKnG}\n}", "github": "", "project": "", "reviewers": "AueS;SNu6;KyqK;79eQ", "site": "https://openreview.net/forum?id=Q0XkE_srKnG", "pdf_size": 2574624, "recommendation": "3;3;5;6", "confidence": "5;3;4;4", "correctness": "2;4;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "45;87;49;138", "wc_strength_and_weaknesses": "161;163;287;203", "wc_clarity_quality_novelty_and_reproducibility": "136;16;7;33", "wc_summary_review": "25;67;13;40", "wc_review": "367;333;356;414", "wc_reply_reviewers": "0;0;0;53", "wc_reply_authors": "343;314;387;343", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 79.75, 37.41239767777521 ], "wc_strength_and_weaknesses_avg": [ 203.5, 51.03675146401855 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.0, 51.657526073167695 ], "wc_summary_review_avg": [ 36.25, 20.16649448962313 ], "wc_review_avg": [ 367.5, 29.516944286290883 ], "wc_reply_reviewers_avg": [ 13.25, 22.949673200287624 ], "wc_reply_authors_avg": [ 346.75, 26.080404521402652 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2913173224011849943&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "State University of New York;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.stonybrook.edu;https://deepmind.com", "aff_unique_abbr": "SUNY Stony Brook;DeepMind", "aff_campus_unique_index": "0", "aff_campus_unique": "Stony Brook;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Synthetic Data Generation of Many-to-Many Datasets via Random Graph Generation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10982", "id": "Q120_4COf-K", "poster": "", "openreview": "https://openreview.net/forum?id=Q120_4COf-K", "slides": "https://iclr.cc/virtual/2023/poster/10982", "video": "https://iclr.cc/virtual/2023/poster/10982", "author_site": "Kai Xu, Georgi Ganev, Emile Joubert, Rees Davison, Olivier Van Acker, Luke Robinson", "tldr": "We synthesise datasets with many-to-many relationships by first generating the relationships via random graph generation and then generating the data attributes.", "abstract": "Synthetic data generation (SDG) has become a popular approach to release private datasets.\nIn SDG, a generative model is fitted on the private real data, and samples drawn from the model are released as the protected synthetic data.\nWhile real-world datasets usually consist of multiple tables with potential \\emph{many-to-many} relationships (i.e.~\\emph{many-to-many datasets}), recent research in SDG mostly focuses on modeling tables \\emph{independently} or only considers generating datasets with special cases of many-to-many relationships such as \\emph{one-to-many}.\nIn this paper, we first study challenges of building faithful generative models for many-to-many datasets, identifying limitations of existing methods.\nWe then present a novel factorization for many-to-many generative models, which leads to a scalable generation framework by combining recent results from random graph theory and representation learning.\nFinally, we extend the framework to establish the notion of $(\\epsilon,\\delta)$-differential privacy.\nThrough a real-world dataset, we demonstrate that our method can generate synthetic datasets while preserving information within and across tables better than its closest competitor.", "keywords": "synthetic data generation;random graph generation;differential privacy", "primary_area": "", "supplementary_material": "", "author": "Kai Xu;Georgi Ganev;Emile Joubert;Rees Davison;Olivier Van Acker;Luke Robinson", "authorids": "~Kai_Xu4;~Georgi_Ganev1;~Emile_Joubert1;~Rees_Davison1;~Olivier_Van_Acker1;~Luke_Robinson1", "gender": "M;;;;M;M", "homepage": "https://xuk.ai;https://ganevgv.github.io/;http://hazy.com;;https://3vilm33pl3.com;https://posturban.vc/", "dblp": ";284/8917;;;;", "google_scholar": "https://scholar.google.ca/citations?user=kf3C60wAAAAJ;TVWEFoUAAAAJ;;;;", "orcid": ";;;;;", "linkedin": ";;;reesdavison;;drlukerobinson/", "or_profile": "~Kai_Xu4;~Georgi_Ganev1;~Emile_Joubert1;~Rees_Davison1;~Olivier_Van_Acker1;~Luke_Robinson1", "aff": "Amazon;Hazy;;;;Post Urban Ventures", "aff_domain": "amazon.com;hazy.com;;;;posturban.vc", "position": "Research scientist;Researcher;;;;Researcher", "bibtex": "@inproceedings{\nxu2023synthetic,\ntitle={Synthetic Data Generation of Many-to-Many Datasets via Random Graph Generation},\nauthor={Kai Xu and Georgi Ganev and Emile Joubert and Rees Davison and Olivier Van Acker and Luke Robinson},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Q120_4COf-K}\n}", "github": "", "project": "", "reviewers": "62xs;Ysb4;zdm5", "pdf_size": 600767, "recommendation": "6;6;8", "confidence": "3;2;2", "correctness": "3;3;3", "technical_novelty": "3;2;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "98;48;47", "wc_strength_and_weaknesses": "530;298;278", "wc_clarity_quality_novelty_and_reproducibility": "53;50;59", "wc_summary_review": "80;100;27", "wc_review": "761;496;411", "wc_reply_reviewers": "136;269;217", "wc_reply_authors": "1884;1533;1002", "reply_reviewers": "1;2;1", "reply_authors": "4;4;2", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 64.33333333333333, 23.809428571238094 ], "wc_strength_and_weaknesses_avg": [ 368.6666666666667, 114.37171367470582 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.0, 3.7416573867739413 ], "wc_summary_review_avg": [ 69.0, 30.80043289739069 ], "wc_review_avg": [ 556.0, 149.05256343540916 ], "wc_reply_reviewers_avg": [ 207.33333333333334, 54.725578013291816 ], "wc_reply_authors_avg": [ 1473.0, 362.5658560868632 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7664138233851883389&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=Q120_4COf-K", "email": "amazon.com;hazy.com;;;;posturban.vc", "author_num": 6, "aff_unique_index": "0;1;2", "aff_unique_norm": "Amazon;Hazy;Post Urban Ventures", "aff_unique_dep": "Amazon.com, Inc.;;", "aff_unique_url": "https://www.amazon.com;;", "aff_unique_abbr": "Amazon;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "United States;" }, { "id": "Q2WE65ToiLT", "title": "A Fairness Analysis on Differentially Private Aggregation of Teacher Ensembles", "track": "main", "status": "Withdraw", "tldr": "This paper analyzes the causes of the disparate impacts arising in a popular teacher ensemble model used for differentially private learning tasks", "abstract": "Private Aggregation of Teacher Ensembles (PATE) is an important\nprivate machine learning framework. It combines multiple\nlearning models used as teachers for a student model that\nlearns to predict an output chosen by noisy voting among the\nteachers. The resulting model satisfies differential privacy and has\nbeen shown effective in learning high-quality private models in\nsemi-supervised settings or when one wishes to protect the data\nlabels.\n\nThis paper asks whether this privacy-preserving framework introduces\nor exacerbates unfairness and shows that PATE can introduce\naccuracy disparity among individuals and groups of individuals. \nThe paper analyzes\nwhich algorithmic and data properties are responsible for the\ndisproportionate impacts, why these aspects are affecting different\ngroups disproportionately, and proposes guidelines to mitigate these\neffects.", "keywords": "Differential Privacy;Fairness;Semisupervised learning", "primary_area": "", "supplementary_material": "/attachment/3f32eccc362ae3cdece1eb7405ed66a1472bfdb6.zip", "author": "Cuong Tran;Ferdinando Fioretto", "authorids": "~Cuong_Tran1;~Ferdinando_Fioretto1", "gender": "M;M", "homepage": ";http://nandofioretto.com", "dblp": "275/3885;119/6404", "google_scholar": "RiYBF7sAAAAJ;ASf9Q04AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Cuong_Tran1;~Ferdinando_Fioretto1", "aff": "Syracuse University;Syracuse University", "aff_domain": "syr.edu;syr.edu", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\ntran2023a,\ntitle={A Fairness Analysis on Differentially Private Aggregation of Teacher Ensembles},\nauthor={Cuong Tran and Ferdinando Fioretto},\nyear={2023},\nurl={https://openreview.net/forum?id=Q2WE65ToiLT}\n}", "github": "", "project": "", "reviewers": "XB1k;h822;Unx5;eG8a", "site": "https://openreview.net/forum?id=Q2WE65ToiLT", "pdf_size": 3435888, "recommendation": "3;3;3;5", "confidence": "4;5;4;2", "correctness": "2;3;2;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "76;55;56;47", "wc_strength_and_weaknesses": "502;548;402;175", "wc_clarity_quality_novelty_and_reproducibility": "9;84;61;39", "wc_summary_review": "151;783;87;85", "wc_review": "738;1470;606;346", "wc_reply_reviewers": "0;362;0;0", "wc_reply_authors": "78;757;992;191", "reply_reviewers": "0;1;0;0", "reply_authors": "1;3;3;2", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 58.5, 10.688779163215974 ], "wc_strength_and_weaknesses_avg": [ 406.75, 143.83562667155866 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 48.25, 27.6891224129621 ], "wc_summary_review_avg": [ 276.5, 293.630294758562 ], "wc_review_avg": [ 790.0, 417.1618390984487 ], "wc_reply_reviewers_avg": [ 90.5, 156.7505980849834 ], "wc_reply_authors_avg": [ 504.5, 381.31253585477623 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.9271726499455306, "corr_recommendation_correctness": 0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:m0BPPKHPwpsJ:scholar.google.com/&scioq=A+Fairness+Analysis+on+Differentially+Private+Aggregation+of+Teacher+Ensembles&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Syracuse University", "aff_unique_dep": "", "aff_unique_url": "https://www.syracuse.edu", "aff_unique_abbr": "Syracuse", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Dilated convolution with learnable spacings", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12199", "id": "Q3-1vRh3HOA", "poster": "/media/PosterPDFs/ICLR%202023/12199.png?t=1680729766.3678048", "openreview": "https://openreview.net/forum?id=Q3-1vRh3HOA", "slides": "https://iclr.cc/virtual/2023/poster/12199", "video": "https://iclr.cc/virtual/2023/poster/12199", "author_site": "Ismail Khalfaoui Hassani, Thomas Pellegrini, Timoth\u00e9e Masquelier", "tldr": "Dilated convolution with learnable spacings: a new method that improves the accuracy of state-of-the-art CNNs", "abstract": "Recent works indicate that convolutional neural networks (CNN) need large receptive fields (RF) to compete with visual transformers and their attention mechanism. In CNNs, RFs can simply be enlarged by increasing the convolution kernel sizes. Yet the number of trainable parameters, which scales quadratically with the kernel's size in the 2D case, rapidly becomes prohibitive, and the training is notoriously difficult. This paper presents a new method to increase the RF size without increasing the number of parameters. The dilated convolution (DC) has already been proposed for the same purpose. DC can be seen as a convolution with a kernel that contains only a few non-zero elements placed on a regular grid. Here we present a new version of the DC in which the spacings between the non-zero elements, or equivalently their positions, are no longer fixed but learnable via backpropagation thanks to an interpolation technique. We call this method \u201cDilated Convolution with Learnable Spacings\u201d (DCLS) and generalize it to the n-dimensional convolution case. However, our main focus here will be on the 2D case. We first tried our approach on ResNet50: we drop-in replaced the standard convolutions with DCLS ones, which increased the accuracy of ImageNet1k classification at iso-parameters, but at the expense of the throughput. Next, we used the recent ConvNeXt state-of-the-art convolutional architecture and drop-in replaced the depthwise convolutions with DCLS ones. This not only increased the accuracy of ImageNet1k classification but also of typical downstream and robustness tasks, again at iso-parameters but this time with negligible cost on throughput, as ConvNeXt uses separable convolutions. Conversely, classic DC led to poor performance with both ResNet50 and ConvNeXt. The code of the method is based on PyTorch and available at: https://github.com/K-H-Ismail/Dilated-Convolution-with-Learnable-Spacings-PyTorch.", "keywords": "deep learning;convolution;dilated convolution;receptive field", "primary_area": "", "supplementary_material": "", "author": "Ismail Khalfaoui Hassani;Thomas Pellegrini;Timoth\u00e9e Masquelier", "authorids": "~Ismail_Khalfaoui_Hassani1;thomas.pellegrini@irit.fr;~Timoth\u00e9e_Masquelier1", "gender": "M;;", "homepage": "https://ismakh.com;;", "dblp": "308/6076;;07/7226", "google_scholar": "sFQlNhIAAAAJ;;fkzUZ-oAAAAJ", "orcid": "0009-0007-8858-4037;;0000-0001-8629-9506", "linkedin": "ismail-khalfaoui-hassani/;;", "or_profile": "~Ismail_Khalfaoui_Hassani1;thomas.pellegrini@irit.fr;~Timoth\u00e9e_Masquelier1", "aff": "University of Toulouse;;CNRS", "aff_domain": "univ-tlse3.fr;;cnrs.fr", "position": "PhD student;;Principal Researcher", "bibtex": "@inproceedings{\nhassani2023dilated,\ntitle={Dilated convolution with learnable spacings},\nauthor={Ismail Khalfaoui Hassani and Thomas Pellegrini and Timoth{\\'e}e Masquelier},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Q3-1vRh3HOA}\n}", "github": "", "project": "", "reviewers": "2NcJ;2fMY;8tHP;N17u;zKnb", "pdf_size": 1428471, "recommendation": "5;6;6;8;8", "confidence": "3;3;5;4;5", "correctness": "4;3;3;3;3", "technical_novelty": "2;3;2;3;2", "empirical_novelty": "3;3;2;3;2", "wc_summary_paper": "124;87;83;104;32", "wc_strength_and_weaknesses": "878;600;689;627;390", "wc_clarity_quality_novelty_and_reproducibility": "42;73;96;46;18", "wc_summary_review": "54;132;250;124;57", "wc_review": "1098;892;1118;901;497", "wc_reply_reviewers": "458;0;222;27;15", "wc_reply_authors": "1687;711;1489;1271;283", "reply_reviewers": "1;0;1;1;1", "reply_authors": "3;1;3;2;1", "recommendation_avg": [ 6.6, 1.2 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_paper_avg": [ 86.0, 30.639843341636066 ], "wc_strength_and_weaknesses_avg": [ 636.8, 157.01133717028208 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.0, 26.922109872742144 ], "wc_summary_review_avg": [ 123.4, 71.17190456914862 ], "wc_review_avg": [ 901.2, 223.2464109453946 ], "wc_reply_reviewers_avg": [ 144.4, 176.49090628131523 ], "wc_reply_authors_avg": [ 1088.2, 518.3135730424199 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.8944271909999159 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5590169943749475, "corr_recommendation_correctness": -0.6666666666666666, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11258009110645291420&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 20, "pdf": "https://openreview.net/pdf?id=Q3-1vRh3HOA", "email": "univ-tlse3.fr;;cnrs.fr", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Toulouse;Centre National de la Recherche Scientifique", "aff_unique_dep": ";", "aff_unique_url": "https://www.univ-toulouse.fr;https://www.cnrs.fr", "aff_unique_abbr": "UT;CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "id": "Q31C6XQOEvl", "title": "NAPG: Non-Autoregressive Program Generation for Hybrid Tabular-Textual Question Answering", "track": "main", "status": "Withdraw", "tldr": "We present a non-autoregressive program generation model for the numerical reasoning of hybrid question answering to address the exposure bias issue of autoregressive generation and to boost the decoding speed.", "abstract": "Hybrid tabular-textual question answering (QA) requires reasoning from heterogeneous information, and the types of reasoning are mainly divided into numerical reasoning and span extraction. The current numerical reasoning method uses LSTM to autoregressively decode program sequences, and each decoding step produces either an operator or an operand. However, the step-by-step decoding suffers from exposure bias, and the accuracy of program generation drops sharply with progressive decoding. In this paper, we propose a non-autoregressive program generation framework, which facilitates program generation in parallel. Our framework, which independently generates complete program tuples containing both operators and operands, can significantly boost the speed of program generation while addressing the error accumulation issue. Our experiments on the MultiHiertt dataset shows that our model can bring about large improvements (+7.97 EM and +6.38 F1 points) over the strong baseline, establishing the new state-of-the-art performance, while being much faster (~21x) in program generation. The performance drop of our method is also significantly smaller than the baseline with increasing numbers of numerical reasoning steps.", "keywords": "Tabular-Textual Question Answering;Non-Autoregressive Program Generation;Natural Language Processing", "primary_area": "", "supplementary_material": "", "author": "Tengxun Zhang;Hongfei Xu;Josef van Genabith;Deyi Xiong;Hongying ZAN", "authorids": "~Tengxun_Zhang1;~Hongfei_Xu2;~Josef_van_Genabith1;~Deyi_Xiong2;~Hongying_ZAN1", "gender": "M;M;M;M;F", "homepage": "https://github.com/ztx313;https://www.dfki.de/en/web/about-us/employee/person/hoxu01/;;https://dyxiong.github.io;http://www5.zzu.edu.cn/nlp", "dblp": "332/3558;https://dblp.uni-trier.de/pid/26/7840;82/3447;55/6548;06/441", "google_scholar": "paUyCh8AAAAJ;Zk-f3UwAAAAJ;rl8S6a8AAAAJ;QPLO3myO5PkC;", "orcid": ";0000-0001-8397-1459;;0000-0002-2353-5038;", "linkedin": ";;;;", "or_profile": "~Tengxun_Zhang1;~Hongfei_Xu2;~Josef_van_Genabith1;~Deyi_Xiong2;~Hongying_ZAN1", "aff": "Zhengzhou University;Zhengzhou University;Universit\u00e4t des Saarlandes;Tianjin University;Zhengzhou University", "aff_domain": "zzu.edu;zzu.edu.cn;uni-saarland.de;tju.edu.cn;zzu.edu.cn", "position": "MS student;Lecturer;Full Professor;Full Professor;Full Professor", "bibtex": "@misc{\nzhang2023napg,\ntitle={{NAPG}: Non-Autoregressive Program Generation for Hybrid Tabular-Textual Question Answering},\nauthor={Tengxun Zhang and Hongfei Xu and Josef van Genabith and Deyi Xiong and Hongying ZAN},\nyear={2023},\nurl={https://openreview.net/forum?id=Q31C6XQOEvl}\n}", "github": "", "project": "", "reviewers": "pvFV;p1nY;nGvQ", "site": "https://openreview.net/forum?id=Q31C6XQOEvl", "pdf_size": 485247, "recommendation": "3;3;5", "confidence": "3;4;4", "correctness": "3;3;4", "technical_novelty": "2;2;2", "empirical_novelty": "3;3;2", "wc_summary_paper": "82;81;185", "wc_strength_and_weaknesses": "135;152;72", "wc_clarity_quality_novelty_and_reproducibility": "75;31;136", "wc_summary_review": "30;51;139", "wc_review": "322;315;532", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 116.0, 48.792075859371536 ], "wc_strength_and_weaknesses_avg": [ 119.66666666666667, 34.41253001774532 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 80.66666666666667, 43.05293898859352 ], "wc_summary_review_avg": [ 73.33333333333333, 47.21816976075582 ], "wc_review_avg": [ 389.6666666666667, 100.68542871515996 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5, "corr_recommendation_correctness": 1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15807892152268046352&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Zhengzhou University;Universit\u00e4t des Saarlandes;Tianjin University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.zzu.edu.cn;https://www.uni-saarland.de;http://www.tju.edu.cn", "aff_unique_abbr": "ZZU;UDS;TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;Germany" }, { "id": "Q4B6g_ubd39", "title": "The Effects of Nonlinearity on Approximation Capacity of Recurrent Neural Networks", "track": "main", "status": "Withdraw", "tldr": "The nonlinear recurrent activations do not make the approximation capacity of RNN worse, however also not much better.", "abstract": "We study the effects of nonlinear recurrent activations on the approximation properties of recurrent neural networks (RNNs). Previous works indicate that in the linear setting, RNNs show good approximation performance when the target sequential relationship is smooth and has fast decaying memory. Otherwise, RNNs may suffer from the so-called \u201ccurse of memory\u201d, meaning that an exponentially large number of neurons is required for accurate approximation. A natural question is whether the recurrent nonlinearity has a substantial effect on RNNs\u2019 approximation capacity and approximation speed. In this paper, we present some negative results in this direction. We discover that, while the addition of nonlinearity does not shrink the hypothesis space, in the sense that nonlinear RNNs can still approximate linear functionals with the same approximation rates established for linear RNNs, it does not essentially alleviate the limitations of RNNs either. In particular, we prove that nonlinear RNNs fail to be universal approximators of arbitrary nonlinear functionals, and any linear functional that can be efficiently approximated must also possess an exponentially decaying memory. ", "keywords": "Recurrent Neural Network;Approximation Theory;Functional Analysis;Dynamical System", "primary_area": "", "supplementary_material": "/attachment/84ea4cc8ac3bb84d06101bc7dabab396c979144c.zip", "author": "Shida Wang;Zhong Li;Qianxiao Li", "authorids": "~Shida_Wang1;~Zhong_Li2;~Qianxiao_Li1", "gender": "M;M;M", "homepage": "https://radarfudan.github.io;https://www.microsoft.com/en-us/research/people/lzhong/;https://blog.nus.edu.sg/qianxiaoli/", "dblp": "245/6187;;172/0930.html", "google_scholar": "vA2YMfgAAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.sg/citations?user=zLgReYoAAAAJ", "orcid": ";;0000-0002-3903-3737", "linkedin": ";;", "or_profile": "~Shida_Wang1;~Zhong_Li2;~Qianxiao_Li1", "aff": "National University of Singapore;Microsoft Research Asia;National University of Singapore", "aff_domain": "nus.edu.sg;microsoft.com;nus.edu.sg", "position": "PhD student;Researcher;Assistant Professor", "bibtex": "@misc{\nwang2023the,\ntitle={The Effects of Nonlinearity on Approximation Capacity of Recurrent Neural Networks},\nauthor={Shida Wang and Zhong Li and Qianxiao Li},\nyear={2023},\nurl={https://openreview.net/forum?id=Q4B6g_ubd39}\n}", "github": "", "project": "", "reviewers": "GNHx;nbcx;8vKV;v9LS", "site": "https://openreview.net/forum?id=Q4B6g_ubd39", "pdf_size": 639791, "recommendation": "1;5;6;8", "confidence": "2;2;2;3", "correctness": "1;4;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "36;47;37;211", "wc_strength_and_weaknesses": "161;211;48;266", "wc_clarity_quality_novelty_and_reproducibility": "14;55;25;64", "wc_summary_review": "27;83;53;103", "wc_review": "238;396;163;644", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "572;627;214;428", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.0, 2.5495097567963922 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 1.224744871391589 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 82.75, 74.16999056222132 ], "wc_strength_and_weaknesses_avg": [ 171.5, 80.39434059683555 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 39.5, 20.62159062730128 ], "wc_summary_review_avg": [ 66.5, 28.926631328241456 ], "wc_review_avg": [ 360.25, 184.14990496875095 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 460.25, 159.66586203694263 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.6793662204867574, "corr_recommendation_correctness": 0.8807048459279792, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10578548332337007593&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "National University of Singapore;Microsoft", "aff_unique_dep": ";Research", "aff_unique_url": "https://www.nus.edu.sg;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "NUS;MSR Asia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;China" }, { "id": "Q5uQecAw0vO", "title": "Reinforcement Learning for Bandits with Continuous Actions and Large Context Spaces", "track": "main", "status": "Reject", "tldr": "We propose a reinforcement learning approach for the challenging contextual bandits scenario with continuous actions that can generalise to large context' spaces, unlike the current literature. ", "abstract": "We consider the challenging scenario of contextual bandits with continuous actions and large input ``context'' spaces, e.g. images. We posit that by modifying reinforcement learning (RL) algorithms for continuous control, we can outperform hand-crafted contextual bandit algorithms for continuous actions on standard benchmark datasets, i.e. vector contexts. We demonstrate that parametric policy networks outperform recently published tree-based policies in both average regret and costs on held-out samples. Furthermore, in contrast to previous work, we successfully demonstrate that RL algorithms can generalise contextual bandit problems with continuous actions to large context spaces. We obtain state-of-the-art performance using RL and significantly outperform previous methods on image contexts. Lastly, we introduce a new contextual bandits domain with multi-dimensional continuous action space and image context. ", "keywords": "Contextual bandits;Continuous actions;Image context;reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Paul Duckworth;Bruno Lacerda;Katherine Vallis;Nick Hawes", "authorids": "~Paul_Duckworth1;~Bruno_Lacerda1;katherine.vallis@oncology.ox.ac.uk;~Nick_Hawes1", "gender": "M;M;;M", "homepage": "http://www.robots.ox.ac.uk/~scpd/;https://bfalacerda.github.io/;;https://www.robots.ox.ac.uk/~nickh/", "dblp": "179/2160;87/10333;;35/1190", "google_scholar": "I64MZDoAAAAJ;https://scholar.google.co.uk/citations?user=k9XjG_MAAAAJ;;bRsi4zoAAAAJ", "orcid": "0000-0001-9052-6919;0000-0003-0862-331X;;0000-0002-7556-6098", "linkedin": ";;;", "or_profile": "~Paul_Duckworth1;~Bruno_Lacerda1;katherine.vallis@oncology.ox.ac.uk;~Nick_Hawes1", "aff": "InstaDeep;University of Oxford;;University of Oxford", "aff_domain": "instadeep.com;ox.ac.uk;;ox.ac.uk", "position": "Principal Researcher;Senior Researcher;;Associate Professor", "bibtex": "@misc{\nduckworth2023reinforcement,\ntitle={Reinforcement Learning for Bandits with Continuous Actions and Large Context Spaces},\nauthor={Paul Duckworth and Bruno Lacerda and Katherine Vallis and Nick Hawes},\nyear={2023},\nurl={https://openreview.net/forum?id=Q5uQecAw0vO}\n}", "github": "", "project": "", "reviewers": "rEXW;ejwg;QaQR;hxTb", "site": "https://openreview.net/forum?id=Q5uQecAw0vO", "pdf_size": 838298, "recommendation": "3;3;3;6", "confidence": "4;3;3;4", "correctness": "3;2;3;1", "technical_novelty": "1;1;1;2", "empirical_novelty": "2;2;1;3", "wc_summary_paper": "36;73;36;147", "wc_strength_and_weaknesses": "68;160;151;191", "wc_clarity_quality_novelty_and_reproducibility": "191;31;20;93", "wc_summary_review": "72;78;19;110", "wc_review": "367;342;226;541", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "595;383;357;378", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 1.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 73.0, 45.3155602414888 ], "wc_strength_and_weaknesses_avg": [ 142.5, 45.5 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.75, 67.88731472079301 ], "wc_summary_review_avg": [ 69.75, 32.66783586342995 ], "wc_review_avg": [ 369.0, 112.65655773189593 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 428.25, 96.76614852312765 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": -0.8703882797784892, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3246164304828324866&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "InstaDeep;University of Oxford", "aff_unique_dep": ";", "aff_unique_url": "https://www.instadeep.com;https://www.ox.ac.uk", "aff_unique_abbr": "InstaDeep;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "Q9yT-pxvWn8", "title": "CADet: Fully Self-Supervised Anomaly Detection With Contrastive Learning", "track": "main", "status": "Reject", "tldr": "We leverage self-supervised contrastive learning to simultaneously perform adversarial and unseen label detection using a statistic inspired by MMD, and without seeing out-of-distribution examples.", "abstract": "Handling out-of-distribution (OOD) samples has become a major stake in the real-world deployment of machine learning systems. This work explores the application of self-supervised contrastive learning to the simultaneous detection of two types of OOD samples: unseen classes and adversarial perturbations. Since in practice the distribution of such samples is not known in advance, we do not assume access to OOD examples. We show that similarity functions trained with contrastive learning can be leveraged with the maximum mean discrepancy (MMD) two-sample test to verify whether two independent sets of samples are drawn from the same distribution. Inspired by this approach, we introduce CADet (Contrastive Anomaly Detection), a method based on image augmentations to perform anomaly detection on single samples. CADet compares favorably to adversarial detection methods to detect adversarially perturbed samples on ImageNet. Simultaneously, it achieves comparable performance to unseen label detection methods on two challenging benchmarks: ImageNet-O and iNaturalist. Additionally, CADet is fully self-supervised and requires neither labels for in-distribution samples nor access to out-of-distribution examples.", "keywords": "Contrastive learning;OOD detection;adversarial detection;MMD;ImageNet-O;Anomaly detection", "primary_area": "", "supplementary_material": "", "author": "Charles Guille-Escuret;Pau Rodriguez;David Vazquez;Ioannis Mitliagkas;Joao Monteiro", "authorids": "~Charles_Guille-Escuret1;~Pau_Rodriguez2;~David_Vazquez1;~Ioannis_Mitliagkas1;~Joao_Monteiro1", "gender": "M;M;M;M;", "homepage": ";http://www.david-vazquez.com;http://mitliagkas.github.io/;;https://prlz77.github.io", "dblp": "243/7039;94/8653;83/8757;215/5354-2;190/7735", "google_scholar": "VNgVRmgAAAAJ;1jHvtfsAAAAJ;K757SxgAAAAJ;https://scholar.google.ca/citations?hl=en;https://scholar.google.es/citations?user=IwBx73wAAAAJ", "orcid": ";0000-0002-2845-8158;;;0000-0002-1689-8084", "linkedin": ";https://www.linkedin.com/company/david-vazquez/;;joao-monteiro-47180256/;", "or_profile": "~Charles_Guille-Escuret1;~David_Vazquez1;~Ioannis_Mitliagkas1;~Joao_Monteiro1;~Pau_Rodriguez_Lopez1", "aff": "Apple;ServiceNow research;Mila - Quebec AI Institute;ServiceNow Research;Apple", "aff_domain": "apple.com;servicenow.com;mila.quebec;servicenow.com;apple.com", "position": "Intern;Researcher;Principal Researcher;Researcher;Researcher", "bibtex": "@misc{\nguille-escuret2023cadet,\ntitle={{CAD}et: Fully Self-Supervised Anomaly Detection With Contrastive Learning},\nauthor={Charles Guille-Escuret and Pau Rodriguez and David Vazquez and Ioannis Mitliagkas and Joao Monteiro},\nyear={2023},\nurl={https://openreview.net/forum?id=Q9yT-pxvWn8}\n}", "github": "", "project": "", "reviewers": "qf3u;TDRY;FoD6;txxW", "site": "https://openreview.net/forum?id=Q9yT-pxvWn8", "pdf_size": 331724, "recommendation": "5;6;6;6", "confidence": "4;4;4;2", "correctness": "4;2;3;3", "technical_novelty": "2;3;4;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "129;298;73;229", "wc_strength_and_weaknesses": "154;415;381;316", "wc_clarity_quality_novelty_and_reproducibility": "170;49;58;193", "wc_summary_review": "41;116;33;32", "wc_review": "494;878;545;770", "wc_reply_reviewers": "0;379;0;0", "wc_reply_authors": "1015;1192;872;1331", "reply_reviewers": "0;3;0;0", "reply_authors": "2;3;2;2", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 182.25, 87.11307307172673 ], "wc_strength_and_weaknesses_avg": [ 316.5, 100.3356865726248 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 117.5, 64.59295627233669 ], "wc_summary_review_avg": [ 55.5, 35.103418636936205 ], "wc_review_avg": [ 671.75, 157.99742877654685 ], "wc_reply_reviewers_avg": [ 94.75, 164.11181401715112 ], "wc_reply_authors_avg": [ 1102.5, 173.9317394842011 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": -0.816496580927726, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "Apple;ServiceNow;Quebec AI Institute", "aff_unique_dep": "Apple Inc.;research;AI Institute", "aff_unique_url": "https://www.apple.com;https://www.servicenow.com;https://mila.quebec", "aff_unique_abbr": "Apple;ServiceNow;Mila", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;Canada" }, { "title": "MaskViT: Masked Visual Pre-Training for Video Prediction", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11473", "id": "QAV2CcLEDh", "poster": "", "openreview": "https://openreview.net/forum?id=QAV2CcLEDh", "slides": "https://iclr.cc/virtual/2023/poster/11473", "video": "https://iclr.cc/virtual/2023/poster/11473", "author_site": "Agrim Gupta, Stephen Tian, Yunzhi Zhang, Jiajun Wu, Roberto Mart\u00edn-Mart\u00edn, Li Fei-Fei", "tldr": "We propose to learn a Transformer based video prediction model via masked visual modeling. ", "abstract": "The ability to predict future visual observations conditioned on past observations and motor commands can enable embodied agents to plan solutions to a variety of tasks in complex environments. This work shows that we can create good video prediction models by pre-training transformers via masked visual modeling. Our approach, named MaskViT, is based on two simple design decisions. First, for memory and training efficiency, we use two types of window attention: spatial and spatiotemporal. Second, during training, we mask a variable percentage of tokens instead of a fixed mask ratio. For inference, MaskViT generates all tokens via iterative refinement where we incrementally decrease the masking ratio following a mask scheduling function. On several datasets we demonstrate that MaskViT outperforms prior works in video prediction, is parameter efficient, and can generate high resolution videos ($256 \\times $256). Further, we demonstrate the benefits of inference speedup (up to $512 \\times$) due to iterative decoding by using MaskViT for planning on a real robot. Our work suggests that we can endow embodied agents with powerful predictive models by leveraging the general framework of masked visual modeling with minimal domain knowledge. ", "keywords": "Video Prediction;Masked Visual Modeling;Visual MPC;Transformers", "primary_area": "", "supplementary_material": "", "author": "Agrim Gupta;Stephen Tian;Yunzhi Zhang;Jiajun Wu;Roberto Mart\u00edn-Mart\u00edn;Li Fei-Fei", "authorids": "~Agrim_Gupta1;~Stephen_Tian1;~Yunzhi_Zhang1;~Jiajun_Wu1;~Roberto_Mart\u00edn-Mart\u00edn1;~Li_Fei-Fei1", "gender": ";M;F;M;M;F", "homepage": ";http://s-tian.github.io;https://cs.stanford.edu/~yzzhang/;https://jiajunwu.com;https://robertomartinmartin.com/;https://profiles.stanford.edu/fei-fei-li", "dblp": "200/8282;237/9780;58/10932;117/4768;153/7670;79/2528", "google_scholar": "AxzVaI8AAAAJ;l19pn2sAAAAJ;https://scholar.google.com/citations?hl=en;2efgcS0AAAAJ;XOJE8OEAAAAJ;rDfyQnIAAAAJ", "orcid": ";;;0000-0002-4176-343X;0000-0002-9586-2759;", "linkedin": ";;;jiajunwu/;;fei-fei-li-4541247/", "or_profile": "~Agrim_Gupta1;~Stephen_Tian1;~Yunzhi_Zhang1;~Jiajun_Wu1;~Roberto_Mart\u00edn-Mart\u00edn1;~Li_Fei-Fei1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University;University of Texas at Austin;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;utexas.edu;stanford.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ngupta2023maskvit,\ntitle={MaskViT: Masked Visual Pre-Training for Video Prediction},\nauthor={Agrim Gupta and Stephen Tian and Yunzhi Zhang and Jiajun Wu and Roberto Mart{\\'\\i}n-Mart{\\'\\i}n and Li Fei-Fei},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=QAV2CcLEDh}\n}", "github": "", "project": "", "reviewers": "V2jW;f78f;dKZs;G4sC", "pdf_size": 6120163, "recommendation": "5;8;8;8", "confidence": "5;5;4;4", "correctness": "3;3;4;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "2;3;3;4", "wc_summary_paper": "97;149;42;141", "wc_strength_and_weaknesses": "220;344;540;169", "wc_clarity_quality_novelty_and_reproducibility": "66;210;29;121", "wc_summary_review": "55;67;168;56", "wc_review": "438;770;779;487", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 107.25, 42.558048592481306 ], "wc_strength_and_weaknesses_avg": [ 318.25, 142.9726809568877 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 106.5, 68.13405903070799 ], "wc_summary_review_avg": [ 86.5, 47.28900506460249 ], "wc_review_avg": [ 618.5, 156.9912417939294 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 135, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7649822845640359332&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=QAV2CcLEDh", "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;utexas.edu;stanford.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Stanford University;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.utexas.edu", "aff_unique_abbr": "Stanford;UT Austin", "aff_campus_unique_index": "0;0;0;0;1;0", "aff_campus_unique": "Stanford;Austin", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Does Deep Learning Learn to Abstract? A Systematic Probing Framework", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11602", "id": "QB1dMPEXau5", "poster": "/media/PosterPDFs/ICLR%202023/11602.png?t=1681197837.304783", "openreview": "https://openreview.net/forum?id=QB1dMPEXau5", "slides": "https://iclr.cc/virtual/2023/poster/11602", "video": "https://iclr.cc/virtual/2023/poster/11602", "author_site": "Shengnan An, Zeqi Lin, Bei Chen, Qiang Fu, Nanning Zheng, Jian-Guang Lou", "tldr": "We design a systematic probing framework along with a set of controlled probing tasks, providing strong evidence that PLMs have the abstraction capability. We conduct an in-depth analysis and provide insightful conclusions.", "abstract": "Abstraction is a desirable capability for deep learning models, which means to induce abstract concepts from concrete instances and flexibly apply them beyond the learning context. At the same time, there is a lack of clear understanding about both the presence and further characteristics of this capability in deep learning models. In this paper, we introduce a systematic probing framework to explore the abstraction capability of deep learning models from a transferability perspective. A set of controlled experiments are conducted based on this framework, providing strong evidence that two probed pre-trained language models (PLMs), T5 and GPT2, have the abstraction capability. We also conduct in-depth analysis, thus shedding further light: (1) the whole training phase exhibits a \"memorize-then-abstract\" two-stage process; (2) the learned abstract concepts are gathered in a few middle-layer attention heads, rather than being evenly distributed throughout the model; (3) the probed abstraction capabilities exhibit robustness against concept mutations, and are more robust to low-level/source-side mutations than high-level/target-side ones; (4) generic pre-training is critical to the emergence of abstraction capability, and PLMs exhibit better abstraction with larger model sizes and data scales.", "keywords": "Abstraction Capability;Probing Tasks;Deep Learning;Pre-Trained Language Model", "primary_area": "", "supplementary_material": "/attachment/4e46f11c5db707314f4a113b60c6f205caa258a6.zip", "author": "Shengnan An;Zeqi Lin;Bei Chen;Qiang Fu;Nanning Zheng;Jian-Guang Lou", "authorids": "~Shengnan_An1;~Zeqi_Lin1;~Bei_Chen3;~Qiang_Fu7;~Nanning_Zheng1;~Jian-Guang_Lou1", "gender": "M;M;F;M;M;M", "homepage": "https://shengnanan.github.io/;https://www.microsoft.com/en-us/research/people/zelin/;http://ml.cs.tsinghua.edu.cn/~beichen/;;;https://www.microsoft.com/en-us/research/people/jlou/", "dblp": "267/9518;https://dblp.uni-trier.de/pid/155/4370.html;;;07/256-1;37/1917", "google_scholar": "oPiRHWMAAAAJ;;Po65v_MAAAAJ;bwTLZSIAAAAJ;https://scholar.google.com/citations?hl=zh-CN;alDxINIAAAAJ", "orcid": ";;;0000-0002-5821-7267;;", "linkedin": ";;;qiang-fu-08301285/;;", "or_profile": "~Shengnan_An1;~Zeqi_Lin1;~Bei_Chen3;~Qiang_Fu7;~Nanning_Zheng1;~Jian-Guang_Lou1", "aff": "Microsoft;Microsoft Research;Microsoft;Microsoft;Xi'an Jiaotong University;Microsoft Research Asia", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;xjtu.edu.cn;microsoft.com", "position": "Intern;Researcher;Researcher;Researcher;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nan2023does,\ntitle={Does Deep Learning Learn to Abstract? A Systematic Probing Framework},\nauthor={Shengnan An and Zeqi Lin and Bei Chen and Qiang Fu and Nanning Zheng and Jian-Guang Lou},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=QB1dMPEXau5}\n}", "github": "", "project": "", "reviewers": "tkML;dxgs;VyFa;1xjY", "pdf_size": 1282440, "recommendation": "6;8;8;10", "confidence": "3;3;3;3", "correctness": "3;4;3;3", "technical_novelty": "1;4;3;3", "empirical_novelty": "2;4;3;4", "wc_summary_paper": "35;51;83;38", "wc_strength_and_weaknesses": "183;368;53;380", "wc_clarity_quality_novelty_and_reproducibility": "29;16;18;99", "wc_summary_review": "49;77;20;93", "wc_review": "296;512;174;610", "wc_reply_reviewers": "114;30;0;23", "wc_reply_authors": "542;340;114;395", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 8.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 1.0897247358851685 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 51.75, 19.01808349965895 ], "wc_strength_and_weaknesses_avg": [ 246.0, 136.0679977070288 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.5, 34.13575837739657 ], "wc_summary_review_avg": [ 59.75, 27.83320858255476 ], "wc_review_avg": [ 398.0, 172.13366899011942 ], "wc_reply_reviewers_avg": [ 41.75, 43.16465568031326 ], "wc_reply_authors_avg": [ 347.75, 153.83818609175032 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15570401822590110889&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=QB1dMPEXau5", "email": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;xjtu.edu.cn;microsoft.com", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Microsoft;Xi'an Jiao Tong University", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.xjtu.edu.cn", "aff_unique_abbr": "Microsoft;XJTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;0;1;1", "aff_country_unique": "United States;China" }, { "title": "Loss Landscapes are All You Need: Neural Network Generalization Can Be Explained Without the Implicit Bias of Gradient Descent", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11560", "id": "QC10RmRbZy9", "poster": "/media/PosterPDFs/ICLR%202023/11560.png?t=1684966700.1259859", "openreview": "https://openreview.net/forum?id=QC10RmRbZy9", "slides": "https://iclr.cc/virtual/2023/poster/11560", "video": "https://iclr.cc/virtual/2023/poster/11560", "author_site": "Ping-yeh Chiang, Renkun Ni, David Y. Miller, Arpit Bansal, Jonas Geiping, Micah Goldblum, Tom Goldstein", "tldr": "We empirically showed that a random optimizer performs just as well as SGD", "abstract": "It is commonly believed that the implicit regularization of optimizers is needed for neural networks to generalize in the overparameterized regime. In this paper, we observe experimentally that this implicit regularization behavior is {\\em generic}, i.e. it does not depend strongly on the choice of optimizer. We demonstrate this by training neural networks using several gradient-free optimizers, which do not benefit from properties that are often attributed to gradient-based optimizers. This includes a guess-and-check optimizer that generates uniformly random parameter vectors until finding one that happens to achieve perfect train accuracy, and a zeroth-order Pattern Search optimizer that uses no gradient computations. In the low sample and few-shot regimes, where zeroth order optimizers are most computationally tractable, we find that these non-gradient optimizers achieve test accuracy comparable to SGD. The code to reproduce results can be found at https://github.com/Ping-C/optimizer .", "keywords": "generalization;regularization", "primary_area": "", "supplementary_material": "", "author": "Ping-yeh Chiang;Renkun Ni;David Yu Miller;Arpit Bansal;Jonas Geiping;Micah Goldblum;Tom Goldstein", "authorids": "~Ping-yeh_Chiang1;~Renkun_Ni1;~David_Yu_Miller1;~Arpit_Bansal1;~Jonas_Geiping1;~Micah_Goldblum1;~Tom_Goldstein1", "gender": ";M;;;M;;M", "homepage": ";https://www.cs.umd.edu/~rn9zm/;;;https://jonasgeiping.github.io/;;https://www.cs.umd.edu/~tomg/", "dblp": "236/4288;183/7067;;;190/7229;241/7231;25/8184", "google_scholar": "WUoMq1IAAAAJ;;;;https://scholar.google.de/citations?user=206vNCEAAAAJ;pGDKzuUAAAAJ;KmSuVtgAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Ping-yeh_Chiang1;~Renkun_Ni1;~David_Yu_Miller1;~Arpit_Bansal1;~Jonas_Geiping1;~Micah_Goldblum1;~Tom_Goldstein1", "aff": "University of Maryland, College Park;Department of Computer Science, University of Maryland, College Park;;;University of Maryland, College Park;New York University;University of Maryland, College Park", "aff_domain": "umd.edu;cs.umd.edu;;;umd.edu;nyu.edu;umd.edu", "position": "PhD student;PhD student;;;Postdoc;Postdoc;Full Professor", "bibtex": "@inproceedings{\nchiang2023loss,\ntitle={Loss Landscapes are All You Need: Neural Network Generalization Can Be Explained Without the Implicit Bias of Gradient Descent},\nauthor={Ping-yeh Chiang and Renkun Ni and David Yu Miller and Arpit Bansal and Jonas Geiping and Micah Goldblum and Tom Goldstein},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=QC10RmRbZy9}\n}", "github": "", "project": "", "reviewers": "Qs6Q;rv2f;qbak", "pdf_size": 2801729, "recommendation": "5;8;8", "confidence": "4;4;4", "correctness": "2;3;4", "technical_novelty": "2;4;2", "empirical_novelty": "2;4;3", "wc_summary_paper": "96;162;102", "wc_strength_and_weaknesses": "269;174;571", "wc_clarity_quality_novelty_and_reproducibility": "15;735;42", "wc_summary_review": "21;79;87", "wc_review": "401;1150;802", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 120.0, 29.79932885150268 ], "wc_strength_and_weaknesses_avg": [ 338.0, 169.2591701110066 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 264.0, 333.22965054148466 ], "wc_summary_review_avg": [ 62.333333333333336, 29.4089933334837 ], "wc_review_avg": [ 784.3333333333334, 306.03304106292546 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8660254037844387, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5116086891646315741&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=QC10RmRbZy9", "email": "umd.edu;cs.umd.edu;;;umd.edu;nyu.edu;umd.edu", "author_num": 7, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "University of Maryland;University of Maryland, College Park;New York University", "aff_unique_dep": ";Department of Computer Science;", "aff_unique_url": "https://www/umd.edu;https://www/umd.edu;https://www.nyu.edu", "aff_unique_abbr": "UMD;UMD;NYU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "QCcrFi7q3u", "title": "Forget to Learn (F2L): Rethinking Replay Loss in Unsupervised Continuous Domain Adaptation", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Although continuous unsupervised domain adaptation (CUDA) has shown success in dealing with non-stationary data, catastrophic forgetting is still a challenge hindering its full potential. The current state-of-the-art (SOTA) focuses on training a single model to simultaneously perform adaptation (e.g., domain alignment) and knowledge retention (i.e., minimizing replay loss). However, the two conflicting objectives result in a hyper-parameter, which is difficult to tune yet significantly affecting model performance. Therefore, we propose to use two separate models so that one model is dedicated to the retention of historical knowledge (i.e., high stability) while the other to the adaptation to future domains (i.e., high plasticity). This allows the algorithm to forget to achieve better overall performance: dubbed as Forget to Learn (F2L), Specifically, F2L decomposes the training process into specialist model and generalist model, and uses knowledge distillation to transfer knowledge between the two models. We demonstrate the superiority of F2L compared to current CUDA trends (i.e., multi-task learning and single-task constrained learning) on different continuous unsupervised domain adaptation datasets.", "keywords": "Domain Adaptation;Lifelong Learning;Replay Loss;Knowledge Distillation;Stability Plasticity Dilemma", "primary_area": "", "supplementary_material": "", "author": "Mohamed Abubakr Hassan;Chi-Guhn Lee", "authorids": "~Mohamed_Abubakr_Hassan1;~Chi-Guhn_Lee1", "gender": "M;M", "homepage": "https://www.linkedin.com/in/mohamedabubakr1/;http://cglee.mie.utoronto.ca", "dblp": ";62/4690", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?user=ZpALG2AAAAAJ", "orcid": ";0000-0002-0916-0241", "linkedin": ";", "or_profile": "~Mohamed_Abubakr_Hassan1;~Chi-Guhn_Lee1", "aff": "University of Toronto;University of Toronto", "aff_domain": "utoronto.ca;mie.utoronto.ca", "position": "PhD student;Full Professor", "bibtex": "@misc{\nhassan2023forget,\ntitle={Forget to Learn (F2L): Rethinking Replay Loss in Unsupervised Continuous Domain Adaptation},\nauthor={Mohamed Abubakr Hassan and Chi-Guhn Lee},\nyear={2023},\nurl={https://openreview.net/forum?id=QCcrFi7q3u}\n}", "github": "", "project": "", "reviewers": "WdtT;RG4k;q51h", "site": "https://openreview.net/forum?id=QCcrFi7q3u", "pdf_size": 5536831, "recommendation": "1;3;3", "confidence": "5;5;4", "correctness": "2;2;2", "technical_novelty": "1;1;2", "empirical_novelty": "1;1;2", "wc_summary_paper": "169;89;104", "wc_strength_and_weaknesses": "385;90;773", "wc_clarity_quality_novelty_and_reproducibility": "70;492;49", "wc_summary_review": "32;116;138", "wc_review": "656;787;1064", "wc_reply_reviewers": "0;0;166", "wc_reply_authors": "1127;1030;1493", "reply_reviewers": "0;0;4", "reply_authors": "5;8;11", "recommendation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "correctness_avg": [ 2.0, 0.0 ], "technical_novelty_avg": [ 1.3333333333333333, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_summary_paper_avg": [ 120.66666666666667, 34.721111093332766 ], "wc_strength_and_weaknesses_avg": [ 416.0, 279.69388028104345 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 203.66666666666666, 204.0626265526237 ], "wc_summary_review_avg": [ 95.33333333333333, 45.67518168789504 ], "wc_review_avg": [ 835.6666666666666, 170.08298628088062 ], "wc_reply_reviewers_avg": [ 55.333333333333336, 78.25315045131126 ], "wc_reply_authors_avg": [ 1216.6666666666667, 199.36956192513998 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.8856180831641267 ], "reply_authors_avg": [ 8.0, 2.449489742783178 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XSTvA-1_QsEJ:scholar.google.com/&scioq=Forget+to+Learn+(F2L):+Rethinking+Replay+Loss+in+Unsupervised+Continuous+Domain+Adaptation&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Iterative Patch Selection for High-Resolution Image Recognition", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11820", "id": "QCrw0u9LQ7", "poster": "/media/PosterPDFs/ICLR%202023/11820.png?t=1682607316.4458807", "openreview": "https://openreview.net/forum?id=QCrw0u9LQ7", "slides": "https://iclr.cc/virtual/2023/poster/11820", "video": "https://iclr.cc/virtual/2023/poster/11820", "author_site": "Benjamin Bergner, Christoph Lippert, Aravindh Mahendran", "tldr": "We propose a simple, memory-efficient method that selects the most salient patches from a high-resolution image and then aggregates them into a global representation for image recognition.", "abstract": "High-resolution images are prevalent in various applications, such as autonomous driving and computer-aided diagnosis. However, training neural networks on such images is computationally challenging and easily leads to out-of-memory errors even on modern GPUs. We propose a simple method, Iterative Patch Selection (IPS), which decouples the memory usage from the input size and thus enables the processing of arbitrarily large images under tight hardware constraints. IPS achieves this by selecting only the most salient patches, which are then aggregated into a global representation for image recognition. For both patch selection and aggregation, a cross-attention based transformer is introduced, which exhibits a close connection to Multiple Instance Learning. Our method demonstrates strong performance and has wide applicability across different domains, training regimes and image sizes while using minimal accelerator memory. For example, we are able to finetune our model on whole-slide images consisting of up to 250k patches (>16 gigapixels) with only 5 GB of GPU VRAM at a batch size of 16.", "keywords": "high-resolution images;memory-efficient deep learning;multiple instance learning;transformer;image recognition;computer vision", "primary_area": "", "supplementary_material": "/attachment/73446273a8f34d23fbbe29c37f9e40e03a61d3e9.zip", "author": "Benjamin Bergner;Christoph Lippert;Aravindh Mahendran", "authorids": "~Benjamin_Bergner1;~Christoph_Lippert1;~Aravindh_Mahendran2", "gender": "M;M;M", "homepage": ";http://hpi.de/lippert;https://aravindhm.github.io/", "dblp": "187/6248;45/7970.html;131/5343", "google_scholar": "Nf63mO4AAAAJ;RVl8TE0AAAAJ;lAjGbLMAAAAJ", "orcid": ";0000-0001-6363-2556;0000-0002-2650-9871", "linkedin": "bbergner/;christoph-lippert-307b8135/;", "or_profile": "~Benjamin_Bergner1;~Christoph_Lippert1;~Aravindh_Mahendran2", "aff": "Qualcomm Inc, QualComm;Hasso Plattner Institute;Google", "aff_domain": "qti.qualcomm.com;hpi.de;google.com", "position": "Intern;Full Professor;Researcher", "bibtex": "@inproceedings{\nbergner2023iterative,\ntitle={Iterative Patch Selection for High-Resolution Image Recognition},\nauthor={Benjamin Bergner and Christoph Lippert and Aravindh Mahendran},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=QCrw0u9LQ7}\n}", "github": "", "project": "", "reviewers": "Ffm2;AtFj;jHHu;jpUm", "pdf_size": 5980075, "recommendation": "6;6;8;8", "confidence": "4;4;3;4", "correctness": "3;3;3;4", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "84;90;85;79", "wc_strength_and_weaknesses": "121;296;201;125", "wc_clarity_quality_novelty_and_reproducibility": "28;45;23;44", "wc_summary_review": "52;136;49;76", "wc_review": "285;567;358;324", "wc_reply_reviewers": "20;0;16;0", "wc_reply_authors": "886;933;335;183", "reply_reviewers": "1;0;1;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 84.5, 3.905124837953327 ], "wc_strength_and_weaknesses_avg": [ 185.75, 71.18769205417465 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.0, 9.669539802906858 ], "wc_summary_review_avg": [ 78.25, 34.945493271665235 ], "wc_review_avg": [ 383.5, 109.04700821205505 ], "wc_reply_reviewers_avg": [ 9.0, 9.1104335791443 ], "wc_reply_authors_avg": [ 584.25, 330.0783051035012 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10581887663084449070&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=QCrw0u9LQ7", "email": "qti.qualcomm.com;hpi.de;google.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Qualcomm Incorporated;Hasso Plattner Institute;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.qualcomm.com;https://www.hpi.de;https://www.google.com", "aff_unique_abbr": "Qualcomm;HPI;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Germany" }, { "id": "QCtizuT48D", "title": "FedCL: Critical Learning Periods-aware Adaptive Client Selection in Federated Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Federated learning (FL) is a distributed optimization paradigm that learns from data samples distributed across a number of clients. Adaptive client selection that is cognizant of the training progress of clients has become a major trend to improve FL efficiency but not yet well-understood. Most existing FL methods such as FedAvg and its state-of-the-art variants implicitly assume that all learning phases during the FL training process are equally important. Unfortunately, this assumption has been revealed to be invalid due to recent findings on critical learning (CL) periods, in which small gradient errors may lead to an irrecoverable deficiency on final test accuracy. In this paper, we develop FedCL, a CL periods-aware FL framework to reveal that adaptively augmenting exiting FL methods with CL periods, the resultant performance is significantly improved when the client selection is guided by the discovered CL periods. Experiments based on various machine learning models and datasets validate that the proposed FedCL framework consistently achieves an improved model accuracy while maintains comparable or even better communication efficiency as compared to state-of-the-art methods, demonstrating a promising and easily adopted method for tackling the heterogeneity of FL training. \n", "keywords": "Critical Learning Periods;Federated Learning;Client Selection", "primary_area": "", "supplementary_material": "/attachment/2a783f01597bf3ff55cffaff0ba4708bfa068ad1.zip", "author": "Gang Yan;Hao Wang;Xu Yuan;Jian Li", "authorids": "~Gang_Yan1;~Hao_Wang29;~Xu_Yuan1;~Jian_Li14", "gender": "M;M;M;M", "homepage": "https://www.gyan23.com/;https://www.haow.us;https://yuanxuyx.github.io/;https://sites.google.com/stonybrook.edu/jianli", "dblp": "203/8629;w/HaoWang-22;24/6114-1;33/5448-8", "google_scholar": "wyHzGcgAAAAJ;r-Ik__gAAAAJ;R3XkwA8AAAAJ;h039Yq4AAAAJ", "orcid": "0000-0002-7734-1589;0000-0002-1444-2657;;", "linkedin": "gang-yan-4b7622212/;haowanguoft/;;", "or_profile": "~Gang_Yan1;~Hao_Wang29;~Xu_Yuan1;~Jian_Li14", "aff": "State University of New York at Binghamton;Louisiana State University;University of Louisiana at Lafeyette;State University of New York, Binghamton", "aff_domain": "binghamton.edu;lsu.edu;louisiana.edu;binghamton.edu", "position": "PhD student;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nyan2023fedcl,\ntitle={Fed{CL}: Critical Learning Periods-aware Adaptive Client Selection in Federated Learning},\nauthor={Gang Yan and Hao Wang and Xu Yuan and Jian Li},\nyear={2023},\nurl={https://openreview.net/forum?id=QCtizuT48D}\n}", "github": "", "project": "", "reviewers": "dEoL;8m8J;29oH;phLC", "site": "https://openreview.net/forum?id=QCtizuT48D", "pdf_size": 2150425, "recommendation": "5;5;5;6", "confidence": "4;4;4;4", "correctness": "4;3;3;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;4;3;2", "wc_summary_paper": "119;100;16;74", "wc_strength_and_weaknesses": "206;441;147;298", "wc_clarity_quality_novelty_and_reproducibility": "27;148;57;21", "wc_summary_review": "182;51;65;50", "wc_review": "534;740;285;443", "wc_reply_reviewers": "0;183;0;62", "wc_reply_authors": "933;2264;229;1861", "reply_reviewers": "0;2;0;1", "reply_authors": "3;5;2;5", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 77.25, 38.80318930191177 ], "wc_strength_and_weaknesses_avg": [ 273.0, 110.92114316035514 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.25, 50.795546064591136 ], "wc_summary_review_avg": [ 87.0, 55.16792546398677 ], "wc_review_avg": [ 500.5, 164.49088120622372 ], "wc_reply_reviewers_avg": [ 61.25, 74.71069200589699 ], "wc_reply_authors_avg": [ 1321.75, 794.332227408658 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.75, 1.299038105676658 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:V-OWwm2cCrQJ:scholar.google.com/&scioq=FedCL:+Critical+Learning+Periods-aware+Adaptive+Client+Selection+in+Federated+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "State University of New York at Binghamton;Louisiana State University;University of Louisiana at Lafayette", "aff_unique_dep": ";;", "aff_unique_url": "https://www.binghamton.edu;https://www.lsu.edu;https://www.louisiana.edu", "aff_unique_abbr": "SUNY Binghamton;LSU;UL Lafayette", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Binghamton;;Lafayette", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "QDE5hzxVpS", "title": "Unveiling The Mask of Position-Information Pattern Through the Mist of Image Features", "track": "main", "status": "Reject", "tldr": "We demonstrate a more accurate paradigm in quantifying the strength of positional information in CNN models", "abstract": "Recent studies have shown that paddings in convolutional neural networks encode absolute position information which can negatively affect the model performance for certain tasks. However, existing metrics for quantifying the strength of positional information remain unreliable and frequently lead to erroneous results. To address this issue, we propose novel metrics for measuring and visualizing the encoded positional information. We formally define the encoded information as Position-information Pattern from Padding (PPP) and conduct a series of experiments to study its properties as well as its formation. The proposed metrics measure the presence of positional information more reliably than the existing metrics based on PosENet and tests in F-Conv. We also demonstrate that for any extant (and proposed) padding schemes, PPP is primarily a learning artifact and is less dependent on the characteristics of the underlying padding schemes.", "keywords": "positional information;position encoding;padding;CNN", "primary_area": "", "supplementary_material": "", "author": "Chieh Hubert Lin;Hung-Yu Tseng;Hsin-Ying Lee;Maneesh Kumar Singh;Ming-Hsuan Yang", "authorids": "~Chieh_Hubert_Lin1;~Hung-Yu_Tseng2;~Hsin-Ying_Lee2;~Maneesh_Kumar_Singh1;~Ming-Hsuan_Yang1", "gender": ";;M;M;M", "homepage": ";https://hytseng0509.github.io/;http://hsinyinglee.com/;https://arxiv.org/search/?query=Singh%2C+Maneesh&searchtype=author&abstracts=show&order=-announced_date_first&size=50;https://faculty.ucmerced.edu/mhyang/", "dblp": ";144/5474;149/7976-1.html;263/9205-1;79/3711.html", "google_scholar": ";hzOgd9MAAAAJ;;hdQhiFgAAAAJ;p9-ohHsAAAAJ", "orcid": ";;;0000-0002-7414-1813;0000-0003-4848-2304", "linkedin": ";;;maneesh-singh-3523ab9/;minghsuanyang/", "or_profile": "~Chieh_Hubert_Lin1;~Hung-Yu_Tseng2;~Hsin-Ying_Lee2;~Maneesh_Kumar_Singh1;~Ming-Hsuan_Yang1", "aff": ";Meta;Snap Inc.;Comcast ;University of California at Merced", "aff_domain": ";meta.com;snap.com;comcast.com;umcerced.edu", "position": ";Research Scientist;Researcher;Sr. Director;Professor", "bibtex": "@misc{\nlin2023unveiling,\ntitle={Unveiling The Mask of Position-Information Pattern Through the Mist of Image Features},\nauthor={Chieh Hubert Lin and Hung-Yu Tseng and Hsin-Ying Lee and Maneesh Kumar Singh and Ming-Hsuan Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=QDE5hzxVpS}\n}", "github": "", "project": "", "reviewers": "rkz4;D3Ci;Mohz;5amb", "site": "https://openreview.net/forum?id=QDE5hzxVpS", "pdf_size": 2987611, "recommendation": "3;5;5;8", "confidence": "2;4;3;3", "correctness": "2;4;2;3", "technical_novelty": "2;4;2;3", "empirical_novelty": "2;4;2;3", "wc_summary_paper": "71;69;28;65", "wc_strength_and_weaknesses": "136;422;42;86", "wc_clarity_quality_novelty_and_reproducibility": "36;198;68;26", "wc_summary_review": "19;21;63;41", "wc_review": "262;710;201;218", "wc_reply_reviewers": "0;0;137;0", "wc_reply_authors": "745;623;740;298", "reply_reviewers": "0;0;1;0", "reply_authors": "3;3;2;2", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 58.25, 17.597940220378067 ], "wc_strength_and_weaknesses_avg": [ 171.5, 148.40064016034432 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 82.0, 68.74590896918885 ], "wc_summary_review_avg": [ 36.0, 17.804493814764857 ], "wc_review_avg": [ 347.75, 210.32638327133378 ], "wc_reply_reviewers_avg": [ 34.25, 59.322740159234044 ], "wc_reply_authors_avg": [ 601.5, 181.89901044260796 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.39605901719066966, "corr_recommendation_correctness": 0.37998029782867415, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6182959298504810232&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Meta;Snap Inc.;Comcast Corporation;University of California, Merced", "aff_unique_dep": "Meta Platforms, Inc.;;;", "aff_unique_url": "https://meta.com;https://www.snapinc.com;https://www.comcast.com;https://www.ucmerced.edu", "aff_unique_abbr": "Meta;Snap;Comcast;UC Merced", "aff_campus_unique_index": "1", "aff_campus_unique": ";Merced", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "QEfpL9Iy2KD", "title": "Distribution Aware Metrics for Conditional Natural Language Generation", "track": "main", "status": "Reject", "tldr": "his work introduces alternative methods for the evaluation of conditional natural language generation based on language distributional divergences.", "abstract": "Traditional automated metrics for evaluating conditional natural language generation use pairwise comparisons between a single generated text and the best-matching gold-standard ground truth text. When multiple ground truths are available, scores are aggregated using an average or max operation across references. While this approach works well when diversity in the ground truth data (i.e. dispersion of the distribution of conditional texts) can be ascribed to noise, such as in automated speech recognition, it does not allow for robust evaluation in the case where diversity in the ground truths represents signal for the model. In this work we argue that existing metrics are not appropriate for domains such as visual description or summarization where ground truths are semantically diverse, and where the diversity in those captions captures useful additional information about the context. We propose a novel paradigm for multi-candidate evaluation of conditional language generation models, and a new family of metrics that compare the {\\em distributions} of reference and model-generated caption sets using small sample sets of each. We demonstrate the utility of our approach with a case study in visual description: where we show that existing models optimize for single-description quality over diversity, and gain some insights into how sampling methods and temperature impact description quality and diversity.", "keywords": "Natural Language Generation;Video Description;Image Description;Metrics", "primary_area": "", "supplementary_material": "/attachment/a63ccd83a7c8384430934ccb7c1e55ec49c958d3.zip", "author": "David Chan;Yiming Ni;Sudheendra Vijayanarasimhan;David A Ross;Austin Myers;John Canny", "authorids": "~David_Chan3;~Yiming_Ni1;~Sudheendra_Vijayanarasimhan1;~David_A_Ross1;~Austin_Myers1;~John_Canny1", "gender": "M;;M;M;M;M", "homepage": "https://people.eecs.berkeley.edu/~davidchan/;;https://research.google.com/pubs/105363.html;;http://www.cs.berkeley.edu/~jfc/;http://www.cs.toronto.edu/~dross/", "dblp": "80/9659;;;135/8626;;68/2171", "google_scholar": "qa4M89wAAAAJ;;y5fsjDAAAAAJ;Tw8DY-cAAAAJ;https://scholar.google.com.tw/citations?user=LAv0HTEAAAAJ;RqOzJR0AAAAJ", "orcid": ";;;;;", "linkedin": ";yiming-ni-a99760194/;;;;", "or_profile": "~David_Chan3;~Yiming_Ni1;~Sudheendra_Vijayanarasimhan1;~Austin_Myers1;~John_Canny1;~David_Alexander_Ross1", "aff": "University of California, Berkeley;University of California, Berkeley;Research, Google;Google;University of California, Berkeley;Research, Google", "aff_domain": "berkeley.edu;berkeley.edu;research.google.com;google.com;berkeley.edu;research.google.com", "position": "PhD student;Undergrad student;Researcher;Researcher;Full Professor;Software Engineer", "bibtex": "@misc{\nchan2023distribution,\ntitle={Distribution Aware Metrics for Conditional Natural Language Generation},\nauthor={David Chan and Yiming Ni and Sudheendra Vijayanarasimhan and David A Ross and Austin Myers and John Canny},\nyear={2023},\nurl={https://openreview.net/forum?id=QEfpL9Iy2KD}\n}", "github": "", "project": "", "reviewers": "URmg;ZEw9;niHx", "site": "https://openreview.net/forum?id=QEfpL9Iy2KD", "pdf_size": 18083989, "recommendation": "5;6;6", "confidence": "4;3;2", "correctness": "3;3;3", "technical_novelty": "3;2;3", "empirical_novelty": "0;3;3", "wc_summary_paper": "106;72;178", "wc_strength_and_weaknesses": "195;273;454", "wc_clarity_quality_novelty_and_reproducibility": "25;50;15", "wc_summary_review": "70;96;15", "wc_review": "396;491;662", "wc_reply_reviewers": "69;25;0", "wc_reply_authors": "711;507;730", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 118.66666666666667, 44.19150245113747 ], "wc_strength_and_weaknesses_avg": [ 307.3333333333333, 108.4875824947517 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.0, 14.719601443879744 ], "wc_summary_review_avg": [ 60.333333333333336, 33.76717669901086 ], "wc_review_avg": [ 516.3333333333334, 110.06159891422419 ], "wc_reply_reviewers_avg": [ 31.333333333333332, 28.522895287041877 ], "wc_reply_authors_avg": [ 649.3333333333334, 100.94332843509548 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.8660254037844385, "corr_recommendation_correctness": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17595911135915558890&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;1;0;1", "aff_unique_norm": "University of California, Berkeley;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.berkeley.edu;https://research.google", "aff_unique_abbr": "UC Berkeley;Google", "aff_campus_unique_index": "0;0;1;1;0;1", "aff_campus_unique": "Berkeley;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "QEmn_Hvh7j8", "title": "Private GANs, Revisited", "track": "main", "status": "Reject", "tldr": "", "abstract": "We show that with improved training, the standard approach for differentially private GANs -- updating the discriminator with noisy gradients -- achieves or competes with state-of-the-art results for private image synthesis. Existing instantiations of this approach neglect to consider how adding noise only to discriminator updates disrupts the careful balance between generator and discriminator necessary for successful GAN training. We show that a simple fix restores parity: taking more discriminator steps between generator steps. Finally, with the goal of restoring parity between generator and discriminator, we experiment with further modifications to improve discriminator training and see further improvements. For MNIST at $\\eps=10$, our private GANs improve the record FID from 48.4 to 13.0, as well as downstream classifier accuracy from 83.2\\% to 95.0\\%.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Alex Bie;Gautam Kamath;Guojun Zhang", "authorids": "~Alex_Bie1;~Gautam_Kamath1;~Guojun_Zhang1", "gender": ";M;M", "homepage": ";http://www.gautamkamath.com/;https://gordon-guojun-zhang.github.io/", "dblp": ";73/11140;56/4451", "google_scholar": ";MK6zHkYAAAAJ;https://scholar.google.ca/citations?user=p8Y0xJEAAAAJ", "orcid": ";;", "linkedin": ";;guojun-zhang-bbb009a4/", "or_profile": "~Alex_Bie1;~Gautam_Kamath1;~Guojun_Zhang1", "aff": ";University of Waterloo;Huawei Technologies Ltd.", "aff_domain": ";uwaterloo.ca;huawei.com", "position": ";Assistant Professor;Researcher", "bibtex": "@misc{\nbie2023private,\ntitle={Private {GAN}s, Revisited},\nauthor={Alex Bie and Gautam Kamath and Guojun Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=QEmn_Hvh7j8}\n}", "github": "", "project": "", "reviewers": "dASH;hSom;wVny", "site": "https://openreview.net/forum?id=QEmn_Hvh7j8", "pdf_size": 3233598, "recommendation": "3;5;5", "confidence": "2;3;3", "correctness": "3;4;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;4", "wc_summary_paper": "28;82;61", "wc_strength_and_weaknesses": "138;103;223", "wc_clarity_quality_novelty_and_reproducibility": "8;36;43", "wc_summary_review": "25;65;50", "wc_review": "199;286;377", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1759;1224;1657", "reply_reviewers": "0;0;0", "reply_authors": "3;3;3", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_paper_avg": [ 57.0, 22.22611077089287 ], "wc_strength_and_weaknesses_avg": [ 154.66666666666666, 50.38738819276992 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.0, 15.121728296285006 ], "wc_summary_review_avg": [ 46.666666666666664, 16.49915822768611 ], "wc_review_avg": [ 287.3333333333333, 72.67431152446892 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1546.6666666666667, 231.92862886864333 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.9999999999999998, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8898405665582241426&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of Waterloo;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://uwaterloo.ca;https://www.huawei.com", "aff_unique_abbr": "UW;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;China" }, { "id": "QFm186CbBp", "title": "Attention Based Models for Cell Type Classification on Single-Cell RNA-Seq Data", "track": "main", "status": "Withdraw", "tldr": "We propose two novel models through representation and attention learning for cell type classification task on single-cell RNA-seq data.", "abstract": "Cell type classification serves as one of the most fundamental analyses in bioinformatics. It helps discovering new cell types, recognizing tumor cells in cancer microenvironment and facilitating the downstream tasks such as trajectory inference. Single-cell RNA-sequencing (scRNA-seq) technology can profile the whole transcriptome of different cells, thus providing invaluable data for cell type classification. Existing cell type classification methods can be mainly categorized into statistical models and neural network models. The statistical models either make hypotheses on the gene expression distribution which may not be consistent with the real data, or heavily rely on prior knowledge such as marker genes for specific cell types. By contrast, the neural networks are more robust and flexible, while it is hard to interpret the biological meanings hidden behind a mass of model parameters. Recently, the attention mechanism has been widely applied in diverse fields due to the good interpretability of the attention weights. In this paper, we examine the effectiveness and interpretability of the attention mechanism by proposing two novel models for the cell type classification task. The first model classifies cells by a capsule attention network (CAN) that performs attention on the capsule features extracted for cells. To align the features with genes, the second model first factorizes the scRNA-seq matrix to obtain the representation vectors for all genes and cells, and then performs the attention operation on the cell and gene vectors. We name it Cell-Gene Representation Attention network(CGRAN). Experiments show that our attention-based models achieve higher accuracy in cell type classification compared to existing methods on diverse datasets. Moreover, the key genes picked by their high attention scores in different cell types perfectly match with the acknowledged marker genes.", "keywords": "Single-cell RNA-seq data cell type classification;attention mechanism;learning representations", "primary_area": "", "supplementary_material": "", "author": "Tianxu Wang;Yue Fan;Xiuli Ma", "authorids": "~Tianxu_Wang2;~Yue_Fan2;~Xiuli_Ma1", "gender": ";F;M", "homepage": "https://yuefan1014.github.io/;http://sai.pku.edu.cn/info/1362/2239.htm;", "dblp": ";;", "google_scholar": "https://scholar.google.com/citations?hl=en;;", "orcid": ";;0000-0002-6642-7838", "linkedin": ";;", "or_profile": "~Yue_Fan2;~Xiuli_Ma1;~tianxu_Wang1", "aff": "Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "MS student;Assistant Professor;MS student", "bibtex": "@misc{\nwang2023attention,\ntitle={Attention Based Models for Cell Type Classification on Single-Cell {RNA}-Seq Data},\nauthor={Tianxu Wang and Yue Fan and Xiuli Ma},\nyear={2023},\nurl={https://openreview.net/forum?id=QFm186CbBp}\n}", "github": "", "project": "", "reviewers": "tdoq;w2MV;UV3t;arZA", "site": "https://openreview.net/forum?id=QFm186CbBp", "pdf_size": 1457127, "recommendation": "3;3;5;5", "confidence": "3;4;2;3", "correctness": "3;3;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;1;2;3", "wc_summary_paper": "146;208;151;133", "wc_strength_and_weaknesses": "152;86;160;259", "wc_clarity_quality_novelty_and_reproducibility": "202;179;64;40", "wc_summary_review": "36;124;68;162", "wc_review": "536;597;443;594", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "431;228;285;391", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 159.5, 28.76195403653931 ], "wc_strength_and_weaknesses_avg": [ 164.25, 61.78339178128699 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 121.25, 70.24021284136317 ], "wc_summary_review_avg": [ 97.5, 48.77243073704652 ], "wc_review_avg": [ 542.5, 62.37988457828373 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 333.75, 81.07828007549247 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.7071067811865475, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FTMjnsw5b40J:scholar.google.com/&scioq=Attention+Based+Models+for+Cell+Type+Classification+on+Single-Cell+RNA-Seq+Data&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "QHWXmoYNw-Z", "title": "Boosting Out-of-Distribution Detection with Multiple Pre-trained Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Out-of-Distribution (OOD) detection, i.e., identifying whether an input is sampled from a novel distribution other than the training distribution, is a critical task for safely deploying machine learning systems in the open world. Recently, post hoc detection utilizing pre-trained models has shown promising performance and can be scaled to large-scale problems. This advance raises a natural question: Can we leverage the diversity of multiple pre-trained models to improve the performance of post hoc detection methods? In this work, we propose a detection enhancement method by ensembling multiple detection decisions derived from a zoo of pre-trained models. Our approach uses the p-value instead of the commonly used hard threshold and leverages a fundamental framework of multiple hypothesis testing to control the true positive rate for In-Distribution (ID) data. We focus on the usage of model zoos and provide systematic empirical comparisons with current state-of-the-art methods on various OOD detection benchmarks. The proposed ensemble scheme shows consistent improvement compared to single-model detectors and significantly outperforms the current competitive methods. Our method substantially improves the relative performance by $65.40\\%$ and $26.96\\%$ on the CIFAR10 and ImageNet benchmarks.", "keywords": "Out-of-Distribution Detection;Model Zoo;Ensemble", "primary_area": "", "supplementary_material": "/attachment/9ed6c711dbbe6a369a9a6d28d0f2eb1b4ada8da0.zip", "author": "Feng Xue;Zi He;Chuanlong Xie;Falong Tan;Zhenguo Li", "authorids": "xuefengme@163.com;hezi0107@hnu.edu.cn;~Chuanlong_Xie1;falongtan@hnu.edu.cn;~Zhenguo_Li1", "gender": ";;M;;M", "homepage": ";;;;http://www.ee.columbia.edu/~zgli/", "dblp": ";;;;23/6479", "google_scholar": ";;_fgE3u8AAAAJ;;XboZC1AAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "xuefengme@163.com;hezi0107@hnu.edu.cn;~Chuanlong_Xie1;falongtan@hnu.edu.cn;~Zhenguo_Li1", "aff": ";;Beijing Normal University;;Huawei Noah's Ark Lab", "aff_domain": ";;bnu.edu.cn;;huawei.com", "position": ";;Associate Professor;;Principal Researcher", "bibtex": "@misc{\nxue2023boosting,\ntitle={Boosting Out-of-Distribution Detection with Multiple Pre-trained Models },\nauthor={Feng Xue and Zi He and Chuanlong Xie and Falong Tan and Zhenguo Li},\nyear={2023},\nurl={https://openreview.net/forum?id=QHWXmoYNw-Z}\n}", "github": "", "project": "", "reviewers": "FQTf;Ty7t;Wjqr", "site": "https://openreview.net/forum?id=QHWXmoYNw-Z", "pdf_size": 527925, "recommendation": "5;5;6", "confidence": "4;5;4", "correctness": "3;4;3", "technical_novelty": "3;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "28;63;67", "wc_strength_and_weaknesses": "142;200;241", "wc_clarity_quality_novelty_and_reproducibility": "44;18;79", "wc_summary_review": "209;27;80", "wc_review": "423;308;467", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1166;1476;1458", "reply_reviewers": "0;0;0", "reply_authors": "3;3;3", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 52.666666666666664, 17.518244457961217 ], "wc_strength_and_weaknesses_avg": [ 194.33333333333334, 40.61472092179824 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.0, 24.99333244420733 ], "wc_summary_review_avg": [ 105.33333333333333, 76.43006970790721 ], "wc_review_avg": [ 399.3333333333333, 67.03398806244154 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1366.6666666666667, 142.0829178879557 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": -0.4999999999999999, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13653269541851923233&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Beijing Normal University;Huawei", "aff_unique_dep": ";Noah's Ark Lab", "aff_unique_url": "https://www.bnu.edu.cn;https://www.huawei.com", "aff_unique_abbr": "BNU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "QHevLM-OnA", "title": "Generalized Belief Transport", "track": "main", "status": "Reject", "tldr": "", "abstract": "Human learners have ability to adopt appropriate learning approaches depending on constraints such as prior on the hypothesis and urgency of decision. However, existing learning models are typically considered individually rather than in relation to one and other. To build agents that have the ability to move between different modes of learning over time, it is important to understand how learning models are related as points in a broader space of possibilities. We introduce a mathematical framework, Generalized Belief Transport (GBT), that unifies and generalizes prior models, including Bayesian inference, cooperative communication and classification, as parameterizations of three learning constraints within Unbalanced Optimal Transport (UOT). We visualize the space of learning models encoded by GBT as a cube which includes classic learning models as special points. We derive critical properties of this parameterized space including proving continuity and differentiability which is the basis for model interpolation, and study limiting behavior of the parameters, which allows attaching learning models on the boundaries. Moreover, we investigate the long-run behavior of GBT, explore convergence properties of models in GBT mathematical and computationally, and formulate conjectures about general behavior. We conclude with open questions and implications for more unified models of learning.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Junqi Wang;PEI WANG;Patrick Shafto", "authorids": "~Junqi_Wang1;~PEI_WANG1;~Patrick_Shafto1", "gender": "M;F;", "homepage": ";;http://www.shaftolab.com", "dblp": "213/0790;83/4555;03/5979", "google_scholar": "https://scholar.google.com/citations?pli=1;SO-bdTIAAAAJ;HUi6F7wAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Junqi_Wang1;~PEI_WANG1;~Patrick_Shafto1", "aff": "Beijing Institute for General Artificial Intelligence;Rutgers University;Rutgers University", "aff_domain": "bigai.ai;rutgers.edu;rutgers.edu", "position": "Researcher;Postdoc;Professor", "bibtex": "@misc{\nwang2023generalized,\ntitle={Generalized Belief Transport},\nauthor={Junqi Wang and PEI WANG and Patrick Shafto},\nyear={2023},\nurl={https://openreview.net/forum?id=QHevLM-OnA}\n}", "github": "", "project": "", "reviewers": "wkW3;KwJp;TGqt;pJWT", "site": "https://openreview.net/forum?id=QHevLM-OnA", "pdf_size": 976884, "recommendation": "1;5;6;6", "confidence": "4;3;4;2", "correctness": "1;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "1;0;2;2", "wc_summary_paper": "61;89;115;84", "wc_strength_and_weaknesses": "200;40;163;124", "wc_clarity_quality_novelty_and_reproducibility": "49;247;15;62", "wc_summary_review": "22;50;31;92", "wc_review": "332;426;324;362", "wc_reply_reviewers": "0;0;0;60", "wc_reply_authors": "656;328;775;561", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.5, 2.0615528128088303 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 87.25, 19.188212527486765 ], "wc_strength_and_weaknesses_avg": [ 131.75, 59.39854796205039 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 93.25, 90.41121335321189 ], "wc_summary_review_avg": [ 48.75, 26.93858756505248 ], "wc_review_avg": [ 361.0, 40.11234224026316 ], "wc_reply_reviewers_avg": [ 15.0, 25.98076211353316 ], "wc_reply_authors_avg": [ 580.0, 164.06248809523765 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5118906968889914, "corr_recommendation_correctness": 0.9801960588196067, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hMSp-7ukZCcJ:scholar.google.com/&scioq=Generalized+Belief+Transport&hl=en&as_sdt=0,10", "gs_version_total": 7, "aff_unique_index": "0;1;1", "aff_unique_norm": "Beijing Institute for General Artificial Intelligence;Rutgers University", "aff_unique_dep": ";", "aff_unique_url": "http://www.bigaiai.org/;https://www.rutgers.edu", "aff_unique_abbr": "BIGAI;Rutgers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "id": "QHiuyzE69Bx", "title": "Grounding High Dimensional Representation Similarity by Comparing Decodability and Network Performance", "track": "main", "status": "Reject", "tldr": "We evaluate representation similarity measures for sensitivity to decoding and network function using ablation on convolutional neural networks.", "abstract": "To understand and interpret neural networks, representation similarity metrics have been used to compare learned representations between and across networks. Recent experiments have compared these similarity metrics to find the best performing and the most robust metrics, noting that classic baselines perform surprisingly well. These experiments are mostly constrained to studying relatively low-dimensional representations because of the computational cost of prominent representation similarity metrics. We extend previous work to test representation similarity metrics on larger convolutional networks processing larger images. In order to make this work possible, we employ reformulated representation similarity metrics for use on very high-dimensional representations. Using these reformulated similarity metrics, we test how well each metric captures changes to representations induced by ablations in two popular convolutional networks. In order to ground the effects of changes to representations in function, we use linear decoding probes and network performance measures. These measures of function allow us to test how well similarity metrics capture changes in decodable information versus changes in network performance. Linear decoding methods index available information in the representation, while network performance measures index the information used by the network. We show that all the tested representation similarity metrics significantly predict changes in network function and decodability. Within these metrics, on average, Procrustes and CKA outperform regularized CCA-based methods. All metrics predict decodability changes significantly better than they do network function. Procrustes and CKA do not outperform regularized CCA-based metrics for all network and functionality measure combinations. We add to the growing literature on representational similarity metrics to facilitate the improvement of current metrics for network interpretability.", "keywords": "ablation;representation;semantic decoding;linear decoding;representation similarity;neural network interpretability;activation space", "primary_area": "", "supplementary_material": "/attachment/d9a6ee1b4eada3ab7e6dc68cfe1c8e734af2af29.zip", "author": "Lucas Hayne;Heejung Jung;Abhijit Suresh;R. McKell Carter", "authorids": "~Lucas_Hayne1;~Heejung_Jung3;~Abhijit_Suresh1;~R._McKell_Carter1", "gender": "M;;M;", "homepage": ";;http://abhijit-suresh.github.io/;https://www.colorado.edu/psych-neuro/r-mckell-carter", "dblp": "275/8716;;185/9757;", "google_scholar": "lWgnAsQAAAAJ;rzekplUAAAAJ;18FkVHkAAAAJ;k08ko14AAAAJ", "orcid": ";0000-0001-5839-1655;;", "linkedin": ";;abhijit-suresh/;", "or_profile": "~Lucas_Hayne1;~Heejung_Jung3;~Abhijit_Suresh1;~R._McKell_Carter1", "aff": "University of Colorado at Boulder;Dartmouth College;;University of Colorado, Boulder", "aff_domain": "colorado.edu;dartmouth.edu;;colorado.edu", "position": "PhD student;PhD student;;Assistant Professor", "bibtex": "@misc{\nhayne2023grounding,\ntitle={Grounding High Dimensional Representation Similarity by Comparing Decodability and Network Performance},\nauthor={Lucas Hayne and Heejung Jung and Abhijit Suresh and R. McKell Carter},\nyear={2023},\nurl={https://openreview.net/forum?id=QHiuyzE69Bx}\n}", "github": "", "project": "", "reviewers": "u5hR;ZbX5;TGPW", "site": "https://openreview.net/forum?id=QHiuyzE69Bx", "pdf_size": 3184053, "recommendation": "3;5;5", "confidence": "2;2;2", "correctness": "2;3;4", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "57;59;85", "wc_strength_and_weaknesses": "285;86;123", "wc_clarity_quality_novelty_and_reproducibility": "108;33;37", "wc_summary_review": "34;56;26", "wc_review": "484;234;271", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1208;995;537", "reply_reviewers": "0;0;0", "reply_authors": "2;2;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.0, 0.0 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 67.0, 12.754084313139327 ], "wc_strength_and_weaknesses_avg": [ 164.66666666666666, 86.41887653876451 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.333333333333336, 34.451253807211266 ], "wc_summary_review_avg": [ 38.666666666666664, 12.684198393626966 ], "wc_review_avg": [ 329.6666666666667, 110.17057481721496 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 913.3333333333334, 279.9551551389774 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:fzXF6CDb_XIJ:scholar.google.com/&scioq=Grounding+High+Dimensional+Representation+Similarity+by+Comparing+Decodability+and+Network+Performance&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Colorado;Dartmouth College", "aff_unique_dep": ";", "aff_unique_url": "https://www.colorado.edu;https://www.dartmouth.edu", "aff_unique_abbr": "CU;Dartmouth", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Boulder;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Heterogeneous Neuronal and Synaptic Dynamics for Spike-Efficient Unsupervised Learning: Theory and Design Principles", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11083", "id": "QIRtAqoXwj", "poster": "/media/PosterPDFs/ICLR%202023/11083.png?t=1682237347.07389", "openreview": "https://openreview.net/forum?id=QIRtAqoXwj", "slides": "https://iclr.cc/virtual/2023/poster/11083", "video": "https://iclr.cc/virtual/2023/poster/11083", "author_site": "Biswadeep Chakraborty, Saibal Mukhopadhyay", "tldr": "We prove that heterogeneity in neuronal dynamics improves the memory capacity while heterogeneity in the STDP synaptic dynamics improves the spike efficiency", "abstract": "This paper shows that the heterogeneity in neuronal and synaptic dynamics reduces the spiking activity of a Recurrent Spiking Neural Network (RSNN) while improving prediction performance, enabling spike-efficient (unsupervised) learning.\nWe analytically show that the diversity in neurons' integration/relaxation dynamics improves an RSNN's ability to learn more distinct input patterns (higher memory capacity), leading to improved classification and prediction performance. We further prove that heterogeneous Spike-Timing-Dependent-Plasticity (STDP) dynamics of synapses reduce spiking activity but preserve memory capacity. The analytical results motivate Heterogeneous RSNN design using Bayesian optimization to determine heterogeneity in neurons and synapses to improve $\\mathcal{E}$, defined as the ratio of spiking activity and memory capacity. The empirical results on time series classification and prediction tasks show that optimized HRSNN increases performance and reduces spiking activity compared to a homogeneous RSNN.", "keywords": "theory;spiking neural network;LIF;STDP;heterogeneity;memory capacity;spike efficiency;bayesian optimization", "primary_area": "", "supplementary_material": "/attachment/31b6b0a4e88e224c33282602b09655d1c43b63eb.zip", "author": "Biswadeep Chakraborty;Saibal Mukhopadhyay", "authorids": "~Biswadeep_Chakraborty1;~Saibal_Mukhopadhyay2", "gender": "M;M", "homepage": ";https://greenlab.ece.gatech.edu", "dblp": "238/0554;66/1210", "google_scholar": "8soIjY8AAAAJ;5KRtMEkAAAAJ", "orcid": ";0000-0002-8894-3390", "linkedin": ";", "or_profile": "~Biswadeep_Chakraborty1;~Saibal_Mukhopadhyay2", "aff": "Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nchakraborty2023heterogeneous,\ntitle={Heterogeneous Neuronal and Synaptic Dynamics for Spike-Efficient Unsupervised Learning: Theory and Design Principles},\nauthor={Biswadeep Chakraborty and Saibal Mukhopadhyay},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=QIRtAqoXwj}\n}", "github": "", "project": "", "reviewers": "LwLn;rFWy;Rdb2;MSRY", "pdf_size": 3854039, "recommendation": "3;6;6;8", "confidence": "3;4;4;3", "correctness": "2;3;3;4", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;2;0;4", "wc_summary_paper": "94;41;56;39", "wc_strength_and_weaknesses": "282;288;216;43", "wc_clarity_quality_novelty_and_reproducibility": "146;11;10;30", "wc_summary_review": "95;23;9;16", "wc_review": "617;363;291;128", "wc_reply_reviewers": "686;302;24;0", "wc_reply_authors": "5741;3383;638;1001", "reply_reviewers": "2;4;1;0", "reply_authors": "13;10;2;2", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 57.5, 22.073740054644116 ], "wc_strength_and_weaknesses_avg": [ 207.25, 98.94790295908247 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.25, 56.42417478350924 ], "wc_summary_review_avg": [ 35.75, 34.56425176392511 ], "wc_review_avg": [ 349.75, 176.22623953316372 ], "wc_reply_reviewers_avg": [ 253.0, 276.739950133695 ], "wc_reply_authors_avg": [ 2690.75, 2052.575988240143 ], "reply_reviewers_avg": [ 1.75, 1.479019945774904 ], "reply_authors_avg": [ 6.75, 4.866980583482946 ], "replies_avg": [ 45, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.14002800840280097, "corr_recommendation_correctness": 0.9901475429766743, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=103995168374811480&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=QIRtAqoXwj", "email": "gatech.edu;gatech.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "QIpfInYnAu2", "title": "Neural Unbalanced Optimal Transport via Cycle-Consistent Semi-Couplings", "track": "main", "status": "Reject", "tldr": "", "abstract": "Comparing unpaired samples of a distribution or population taken at different points in time is a fundamental task in many application domains where measuring populations is destructive and cannot be done repeatedly on the same sample, such as in single-cell biology. Optimal transport (OT) can solve this challenge by learning an optimal coupling of samples across distributions from unpaired data. However, the usual formulation of OT assumes conservation of mass, which is violated in unbalanced scenarios in which the population size changes (e.g., cell proliferation or death) between measurements. In this work, we introduce NubOT, a neural unbalanced OT formulation that relies on the formalism of semi-couplings to account for creation and destruction of mass. To estimate such semi-couplings and generalize out-of-sample, we derive an efficient parameterization based on neural optimal transport maps and propose a novel algorithmic scheme through a cycle-consistent training procedure. We apply our method to the challenging task of forecasting heterogeneous responses of multiple cancer cell lines to various drugs, where we observe that by accurately modeling cell proliferation and death, our method yields notable improvements over previous neural optimal transport methods.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Frederike L\u00fcbeck;Charlotte Bunne;Gabriele Gut;Jacobo Sarabia del Castillo;Lucas Pelkmans;David Alvarez-Melis", "authorids": "~Frederike_L\u00fcbeck1;~Charlotte_Bunne1;~Gabriele_Gut2;jacobo.sdc@gmail.com;~Lucas_Pelkmans1;~David_Alvarez-Melis1", "gender": "F;F;M;;M;M", "homepage": ";https://aimm.epfl.ch;;;https://pelkmanslab.org;https://dmelis.github.io/", "dblp": ";217/2348;;;;168/8255", "google_scholar": ";https://scholar.google.com/citations?hl=en;oXQhxCQAAAAJ;;;XsxZrYYAAAAJ", "orcid": ";0000-0003-1431-103X;;;;0000-0002-9591-8986", "linkedin": "frederike-l%C3%BCbeck-b3a6b4186/;bunnech/;;;;", "or_profile": "~Frederike_L\u00fcbeck1;~Charlotte_Bunne1;~Gabriele_Gut2;jacobo.sdc@gmail.com;~Lucas_Pelkmans1;~David_Alvarez-Melis1", "aff": ";ETHZ - ETH Zurich;University of Zurich;;University of Zurich;Microsoft", "aff_domain": ";ethz.ch;uzh.ch;;uzh.ch;microsoft.com", "position": ";PhD student;Postdoc;;Full Professor;Senior Researcher", "bibtex": "@misc{\nl{\\\"u}beck2023neural,\ntitle={Neural Unbalanced Optimal Transport via Cycle-Consistent Semi-Couplings},\nauthor={Frederike L{\\\"u}beck and Charlotte Bunne and Gabriele Gut and Jacobo Sarabia del Castillo and Lucas Pelkmans and David Alvarez-Melis},\nyear={2023},\nurl={https://openreview.net/forum?id=QIpfInYnAu2}\n}", "github": "", "project": "", "reviewers": "irGq;CvpT;rxRW;zLtd", "site": "https://openreview.net/forum?id=QIpfInYnAu2", "pdf_size": 25033623, "recommendation": "3;5;6;6", "confidence": "4;5;4;2", "correctness": "2;3;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "142;101;234;147", "wc_strength_and_weaknesses": "84;668;174;94", "wc_clarity_quality_novelty_and_reproducibility": "632;58;14;80", "wc_summary_review": "127;35;96;18", "wc_review": "985;862;518;339", "wc_reply_reviewers": "647;516;37;15", "wc_reply_authors": "2255;1991;618;333", "reply_reviewers": "2;2;1;1", "reply_authors": "6;4;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 156.0, 48.44068537913146 ], "wc_strength_and_weaknesses_avg": [ 255.0, 240.9834019180574 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 196.0, 252.8438253151538 ], "wc_summary_review_avg": [ 69.0, 44.30011286667337 ], "wc_review_avg": [ 676.0, 259.13799412668146 ], "wc_reply_reviewers_avg": [ 303.75, 281.69254072481226 ], "wc_reply_authors_avg": [ 1299.25, 835.122259013613 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.0, 2.1213203435596424 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.37463432463267754, "corr_recommendation_correctness": 0.8660254037844386, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8193322560543697822&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "ETH Zurich;University of Zurich;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.ethz.ch;https://www.unizh.ch;https://www.microsoft.com", "aff_unique_abbr": "ETHZ;UZH;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Switzerland;United States" }, { "id": "QK1R-vPGsop", "title": "Robust Generative Flows on Reliable Image Reconstruction without Training Data", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "A key application of computational imaging is to determine the hidden information from a set of observed but sparse measurements. To fully characterize the uncertainty naturally induced by the sparse measurements, a robust inverse solver that is able to estimate the complete posterior of the unrecoverable targets is therefore important, with a potential to probabilistically interpret the observational data for decision making. In this work, we propose a deep variational framework that leverages a deep generative model to learn an approximate posterior distribution for quantifying image reconstruction uncertainty without training data. This is achieved by parameterizing the target posterior using a flow-based model and minimizing their KL divergence. To perform accurate uncertainty estimation, we propose a robust flow-based model where the stability is enhanced by adding bi-directional regularization and the expressivity is improved by using gradient boosting. We also found that the statistics of latent distribution are conservatively propagated to the posterior distribution through an invertible transformation and therefore introduce a space-filling design to achieve significant variance reduction on both latent prior space and target posterior space. We demonstrate our method on several benchmark tasks and two real-world applications (fastMRI and black hole image reconstruction) and show that it achieves a reliable and high-quality image reconstruction with robust uncertainty estimation. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Sirui Bi;Victor Fung;Jiaxin Zhang", "authorids": "siruijhu@gmail.com;~Victor_Fung1;~Jiaxin_Zhang2", "gender": ";;M", "homepage": ";;https://jxzhangjhu.github.io/", "dblp": ";;32/7698-5.html", "google_scholar": ";2QsddMIAAAAJ;LiDm8jEAAAAJ", "orcid": ";;", "linkedin": ";;jiaxin-zhang-1425289b/", "or_profile": "siruijhu@gmail.com;~Victor_Fung1;~Jiaxin_Zhang2", "aff": ";Georgia Institute of Technology;Intuit AI Research", "aff_domain": ";gatech.edu;intuit.com", "position": ";Assistant Professor;Researcher", "bibtex": "@misc{\nbi2023robust,\ntitle={Robust Generative Flows on Reliable Image Reconstruction without Training Data},\nauthor={Sirui Bi and Victor Fung and Jiaxin Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=QK1R-vPGsop}\n}", "github": "", "project": "", "reviewers": "n5TH;r6Zq;uRXE;9Ttm", "site": "https://openreview.net/forum?id=QK1R-vPGsop", "pdf_size": 10674305, "recommendation": "3;3;5;6", "confidence": "3;5;4;4", "correctness": "3;2;3;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "45;56;128;148", "wc_strength_and_weaknesses": "317;1315;385;78", "wc_clarity_quality_novelty_and_reproducibility": "218;21;255;140", "wc_summary_review": "43;202;80;42", "wc_review": "623;1594;848;408", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 94.25, 44.488060195967186 ], "wc_strength_and_weaknesses_avg": [ 523.75, 470.84146748135936 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 158.5, 89.5837596889079 ], "wc_summary_review_avg": [ 91.75, 65.46898120484234 ], "wc_review_avg": [ 868.25, 446.96217681141655 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hCcE8zwrbqIJ:scholar.google.com/&scioq=Robust+Generative+Flows+on+Reliable+Image+Reconstruction+without+Training+Data&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Georgia Institute of Technology;Intuit", "aff_unique_dep": ";Intuit AI Research", "aff_unique_url": "https://www.gatech.edu;https://intuit.com/", "aff_unique_abbr": "Georgia Tech;Intuit", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "QL85H5Mkip", "title": "Uncovering Directions of Instability via Quadratic Approximation of Deep Neural Loss in Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Learning in MDPs with highly complex state representations is currently possible due to multiple advancements in reinforcement learning algorithm design. However, this incline in complexity, and furthermore the increase in the dimensions of the observation came at the cost of non-robustness that can be taken advantage of (i.e. moving along worst-case directions in the observation space). To solve this policy instability problem we propose a novel method to ascertain the presence of these non-robust directions via quadratic approximation of the deep neural policy loss. Our method provides a theoretical basis for the fundamental cut-off between stable observations and non-robust observations. Furthermore, our technique is computationally efficient, and does not depend on the methods used to produce the worst-case directions. We conduct extensive experiments in the Arcade Learning Environment with several different non-robust alteration techniques. Most significantly, we demonstrate the effectiveness of our approach even in the setting where alterations are explicitly optimized to circumvent our proposed method.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/ae5eb5ae4e02a88ee924332602207883fbd4b632.zip", "author": "Ezgi Korkmaz;Jonah Brown-Cohen", "authorids": "~Ezgi_Korkmaz2;~Jonah_Brown-Cohen1", "gender": ";M", "homepage": "https://ezgikorkmaz.github.io/;https://jonahbc.github.io/", "dblp": "300/7830.html;157/1513", "google_scholar": ";fRc3A80AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Ezgi_Korkmaz2;~Jonah_Brown-Cohen1", "aff": "University College London, University of London;Chalmers University", "aff_domain": "ucl.ac.uk;chalmers.se", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nkorkmaz2023uncovering,\ntitle={Uncovering Directions of Instability via Quadratic Approximation of Deep Neural Loss in Reinforcement Learning},\nauthor={Ezgi Korkmaz and Jonah Brown-Cohen},\nyear={2023},\nurl={https://openreview.net/forum?id=QL85H5Mkip}\n}", "github": "", "project": "", "reviewers": "8ZPv;9bNZ;Soiv;FZra", "site": "https://openreview.net/forum?id=QL85H5Mkip", "pdf_size": 1681178, "recommendation": "5;5;5;8", "confidence": "3;3;4;2", "correctness": "3;2;3;4", "technical_novelty": "3;2;2;4", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "72;54;79;94", "wc_strength_and_weaknesses": "110;143;181;71", "wc_clarity_quality_novelty_and_reproducibility": "140;104;67;83", "wc_summary_review": "101;138;39;58", "wc_review": "423;439;366;306", "wc_reply_reviewers": "0;0;154;0", "wc_reply_authors": "386;310;914;53", "reply_reviewers": "0;0;1;0", "reply_authors": "1;2;3;1", "recommendation_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 74.75, 14.376630342329875 ], "wc_strength_and_weaknesses_avg": [ 126.25, 40.604033050917494 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 98.5, 27.31757675929547 ], "wc_summary_review_avg": [ 84.0, 38.4252521136817 ], "wc_review_avg": [ 383.5, 52.32829062753722 ], "wc_reply_reviewers_avg": [ 38.5, 66.68395609140178 ], "wc_reply_authors_avg": [ 415.75, 313.0130788002316 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hBvEY14RwTsJ:scholar.google.com/&scioq=Uncovering+Directions+of+Instability+via+Quadratic+Approximation+of+Deep+Neural+Loss+in+Reinforcement+Learning&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University College London;Chalmers University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucl.ac.uk;https://www.chalmers.se", "aff_unique_abbr": "UCL;Chalmers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;Sweden" }, { "id": "QLVvgqcyuj", "title": "Noise$^+$2Noise: Co-taught De-noising Autoencoders for Time-Series Data", "track": "main", "status": "Reject", "tldr": "We combine Co-teaching and De-noising Autoencoders to recover clean signals from only noisy data in a time series setting.", "abstract": "We consider the task of learning to recover clean signals given only access to noisy data. Recent work in computer vision has addressed this problem in the context of images using denoising autoencoders (DAEs). However, to date DAEs for learning from noisy data have not been explored in the context of time-series data. DAEs for denoising images often rely on assumptions unlikely to hold in the context of time series, \\textit{e.g.}, multiple noisy samples of the same example. Here, we adapt DAEs to cleaning time-series data with noisy samples only. To recover the clean target signal when only given access to noisy target data, we leverage a noise-free auxiliary time-series signal that is related to the target signal. In addition to leveraging the relationship between the target signal and auxiliary signal, we iteratively filter and learn from clean samples using an approach based on co-teaching. Applied to the task of recovering carbohydrate values for blood glucose management, our approach reduces noise (MSE) in patient-reported carbohydrates from 72$g^2$ (95\\% CI: 54,93) to 18$g^2$ (13,25), outperforming the best baseline (MSE = 33$g^2$ (27,43)). We demonstrate strong time-series denoising performance, extending the applicability of DAEs to a previously under-explored setting.", "keywords": "De-noising;Co-teaching;Noise recovery;Time-series;self-supervised;RNN", "primary_area": "", "supplementary_material": "/attachment/3de5de97e5608d0df86ccefd2c24015e45b0501c.zip", "author": "Harry Rubin-Falcone;Joyce Lee;Jenna Wiens", "authorids": "~Harry_Rubin-Falcone1;joyclee@med.umich.edu;~Jenna_Wiens1", "gender": "M;;F", "homepage": ";;http://www-personal.umich.edu/~wiensj/", "dblp": ";;63/10451", "google_scholar": "6iljBF4AAAAJ;;fvEfKxkAAAAJ", "orcid": ";;0000-0002-1057-7722", "linkedin": ";;", "or_profile": "~Harry_Rubin-Falcone1;joyclee@med.umich.edu;~Jenna_Wiens1", "aff": "University of Michigan;;University of Michigan Ann Arbor", "aff_domain": "umich.edu;;umich.edu", "position": "Graduate Student;;Associate Professor", "bibtex": "@misc{\nrubin-falcone2023noisenoise,\ntitle={Noise\\${\\textasciicircum}+\\$2Noise: Co-taught De-noising Autoencoders for Time-Series Data},\nauthor={Harry Rubin-Falcone and Joyce Lee and Jenna Wiens},\nyear={2023},\nurl={https://openreview.net/forum?id=QLVvgqcyuj}\n}", "github": "", "project": "", "reviewers": "yPFM;SnFL;Azxw;reMC", "site": "https://openreview.net/forum?id=QLVvgqcyuj", "pdf_size": 1974321, "recommendation": "3;5;6;6", "confidence": "3;3;3;4", "correctness": "2;4;3;2", "technical_novelty": "3;2;2;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "92;107;97;103", "wc_strength_and_weaknesses": "100;172;105;239", "wc_clarity_quality_novelty_and_reproducibility": "32;134;16;98", "wc_summary_review": "26;40;24;24", "wc_review": "250;453;242;464", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "623;766;550;519", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 99.75, 5.717298313014636 ], "wc_strength_and_weaknesses_avg": [ 154.0, 56.71419575379695 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.0, 48.062459362791664 ], "wc_summary_review_avg": [ 28.5, 6.689544080129826 ], "wc_review_avg": [ 352.25, 106.35876785672161 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 614.5, 95.26935498889452 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.4714045207910316, "corr_recommendation_correctness": 0.24618298195866545, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:M4H5aLwv1coJ:scholar.google.com/&scioq=Noise%24%5E%2B%242Noise:+Co-taught+De-noising+Autoencoders+for+Time-Series+Data&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "QN_VgTeOYGl", "title": "Data Leakage in Tabular Federated Learning", "track": "main", "status": "Reject", "tldr": "We introduce a novel data leakage atack on FL for tabular data.", "abstract": "While federated learning (FL) promises to preserve privacy in distributed training of deep learning models, recent work in the image and NLP domains showed that training updates leak private data of participating clients. At the same time, most high-stakes applications of FL (e.g., legal and financial) use tabular data. Compared to the NLP and image domains, reconstruction of tabular data poses several unique challenges: (i) categorical features introduce a significantly more difficult mixed discrete-continuous optimization problem, (ii) the mix of categorical and continuous features causes high variance in the final reconstructions, and (iii) structured data makes it difficult for the adversary to judge reconstruction quality. In this work, we tackle these challenges and propose the first comprehensive reconstruction attack on tabular data, called TabLeak. TabLeak is based on three key ingredients: (i) a softmax structural prior, implicitly converting the mixed discrete-continuous optimization problem into an easier fully continuous one, (ii) a way to reduce the variance of our reconstructions through a pooled ensembling scheme exploiting the structure of tabular data, and (iii) an entropy measure which can successfully assess reconstruction quality. Our experimental evaluation demonstrates the effectiveness of TabLeak, reaching a state-of-the-art on four popular tabular datasets. For instance, on the Adult dataset, we improve attack accuracy by 10% compared to the baseline on the practically relevant batch size of 32 and further obtain non-trivial reconstructions for batch sizes as large as 128. Our findings are important as they show that performing FL on tabular data, which often poses high privacy risks, is highly vulnerable.", "keywords": "federated learning;tabular data;data leakage attacks;gradient inversion", "primary_area": "", "supplementary_material": "", "author": "Mark Vero;Mislav Balunovic;Dimitar Iliev Dimitrov;Martin Vechev", "authorids": "~Mark_Vero1;~Mislav_Balunovic1;~Dimitar_Iliev_Dimitrov2;~Martin_Vechev1", "gender": "M;M;M;M", "homepage": "https://www.sri.inf.ethz.ch/people/markvero;https://www.sri.inf.ethz.ch/people/mislav;https://www.sri.inf.ethz.ch/people/dimitadi;https://www.sri.inf.ethz.ch/people/martin", "dblp": "319/4985;231/7686;271/0915;93/2189.html", "google_scholar": "vguDYtQAAAAJ;fxkgmGwAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.ch/citations?user=aZ1Rh50AAAAJ", "orcid": ";;0000-0001-9813-0900;", "linkedin": "https://linkedin.com/in/mark-vero-9a32bb17a;;;", "or_profile": "~Mark_Vero1;~Mislav_Balunovic1;~Dimitar_Iliev_Dimitrov2;~Martin_Vechev1", "aff": "ETHZ - ETH Zurich;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "position": "MS student;PhD student;PhD student;Full Professor", "bibtex": "@misc{\nvero2023data,\ntitle={Data Leakage in Tabular Federated Learning},\nauthor={Mark Vero and Mislav Balunovic and Dimitar Iliev Dimitrov and Martin Vechev},\nyear={2023},\nurl={https://openreview.net/forum?id=QN_VgTeOYGl}\n}", "github": "", "project": "", "reviewers": "x5xg;oUHx;Gtmh", "site": "https://openreview.net/forum?id=QN_VgTeOYGl", "pdf_size": 2156190, "recommendation": "3;3;6", "confidence": "5;3;4", "correctness": "4;3;3", "technical_novelty": "1;2;3", "empirical_novelty": "1;2;4", "wc_summary_paper": "51;65;205", "wc_strength_and_weaknesses": "49;108;154", "wc_clarity_quality_novelty_and_reproducibility": "27;29;936", "wc_summary_review": "91;24;58", "wc_review": "218;226;1353", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1335;337;2554", "reply_reviewers": "0;0;0", "reply_authors": "4;2;5", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.3333333333333335, 1.247219128924647 ], "wc_summary_paper_avg": [ 107.0, 69.53176732017292 ], "wc_strength_and_weaknesses_avg": [ 103.66666666666667, 42.975445185464785 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 330.6666666666667, 428.03608362951 ], "wc_summary_review_avg": [ 57.666666666666664, 27.353650985238193 ], "wc_review_avg": [ 599.0, 533.1685161997722 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1408.6666666666667, 906.5841874249126 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.6666666666666665, 1.247219128924647 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.5, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=914860901648171209&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "id": "QP02DQ-FG-8", "title": "Incomplete to complete multiphysics forecasting - a hybrid approach for learning unknown phenomena", "track": "main", "status": "Reject", "tldr": "This paper proposes a hybrid framework that combines neural network models with an incomplete PDE solver to account for the effects of unknown physics present in the system to predict a long-term temporal evolution of a complete, multiphysics system", "abstract": "Modeling complex dynamical systems where only partial knowledge of their physical mechanisms is available is a crucial problem across all scientific and engineering disciplines. Purely data-driven approaches, which only make use of an artificial neural network and data, often fail to accurately simulate the evolution of the system dynamics over a sufficiently long time and in a physically consistent manner. Therefore, we propose a hybrid approach that uses a neural network model in combination with an incomplete PDE solver that provides known but incomplete physical information. In this study, we demonstrate that the results obtained from the incomplete PDEs can be efficiently corrected at every time step by the proposed hybrid neural network \u2013 PDE solver model, so that the effect of the unknown physics present in the system is correctly accounted for. For validation purposes, the obtained simulations of the hybrid model are successfully compared against results coming from the complete set of PDEs describing the full physics of the considered system. We demonstrate the validity of the proposed approach on a reactive flow, an archetypal multi-physics system that combines fluid mechanics and chemistry, the latter being the physics considered unknown. Experiments are made on planar and Bunsen-type flames at various operating conditions. The hybrid neural network - PDE approach correctly models the flame evolution of the cases under study for significantly long time windows, yields improved generalization, and allows for larger simulation time steps. ", "keywords": "neural physics simulations;multi-physics systems;reactive flows;differentiable PDE solvers", "primary_area": "", "supplementary_material": "", "author": "Nilam Nandkishor Tathawadekar;Nguyen Anh Khoa Doan;Camilo Fernando Silva;Nils Thuerey", "authorids": "~Nilam_Nandkishor_Tathawadekar2;~Nguyen_Anh_Khoa_Doan1;~Camilo_Fernando_Silva1;~Nils_Thuerey1", "gender": "F;M;M;M", "homepage": ";https://www.tudelft.nl/staff/n.a.k.doan/;https://www.epc.ed.tum.de/tfd/mitarbeiterinnen/dr-camilo-fernando-silva-garzon/;https://ge.in.tum.de", "dblp": ";;;42/478", "google_scholar": ";;1GHcqyYAAAAJ;https://scholar.google.com.tw/citations?user=GEehwv8AAAAJ", "orcid": "0000-0002-6703-0998;;;", "linkedin": ";;;", "or_profile": "~Nilam_Nandkishor_Tathawadekar2;~Nguyen_Anh_Khoa_Doan1;~Camilo_Fernando_Silva1;~Nils_Thuerey1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Delft University of Technology;Technische Universit\u00e4t M\u00fcnchen;Technical University Munich", "aff_domain": "tum.de;tudelft.nl;tum.de;tum.de", "position": "PhD student;Assistant Professor;Lecturer;Associate Professor", "bibtex": "@misc{\ntathawadekar2023incomplete,\ntitle={Incomplete to complete multiphysics forecasting - a hybrid approach for learning unknown phenomena},\nauthor={Nilam Nandkishor Tathawadekar and Nguyen Anh Khoa Doan and Camilo Fernando Silva and Nils Thuerey},\nyear={2023},\nurl={https://openreview.net/forum?id=QP02DQ-FG-8}\n}", "github": "", "project": "", "reviewers": "NRLv;c79Z;95Sg;hgQY", "site": "https://openreview.net/forum?id=QP02DQ-FG-8", "pdf_size": 2072512, "recommendation": "3;3;6;8", "confidence": "3;3;5;3", "correctness": "2;2;3;4", "technical_novelty": "2;1;3;3", "empirical_novelty": "2;3;3;0", "wc_summary_paper": "51;125;127;658", "wc_strength_and_weaknesses": "318;369;99;193", "wc_clarity_quality_novelty_and_reproducibility": "34;187;71;119", "wc_summary_review": "64;27;328;171", "wc_review": "467;708;625;1141", "wc_reply_reviewers": "0;0;0;47", "wc_reply_authors": "517;975;409;542", "reply_reviewers": "0;0;0;1", "reply_authors": "1;2;1;1", "recommendation_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 240.25, 243.12483933156645 ], "wc_strength_and_weaknesses_avg": [ 244.75, 105.74113438014555 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 102.75, 57.220516425492 ], "wc_summary_review_avg": [ 147.5, 116.85995892520243 ], "wc_review_avg": [ 735.25, 249.74424417791894 ], "wc_reply_reviewers_avg": [ 11.75, 20.351596988934308 ], "wc_reply_authors_avg": [ 610.75, 216.15778380618173 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.2721655269759087, "corr_recommendation_correctness": 0.994936676326182, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12923327947545303981&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Delft University of Technology;Technical University of Munich", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tum.de;https://www.tudelft.nl;https://www.tum.de", "aff_unique_abbr": "TUM;TU Delft;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Germany;Netherlands" }, { "id": "QP4nkeQ1BpT", "title": "Hidden Markov Mixture of Gaussian Process Functional Regression: Utilizing Multi-Scale Structure for Time-Series Forecasting", "track": "main", "status": "Reject", "tldr": "", "abstract": "The mixture of Gaussian process functional regressions (GPFRs) assumes that there are a batch of time-series or sample curves which are generated by independent random processes with different temporal structures. However, in the real situations, these structures are actually transferred in a random manner from a long time scale. Therefore, the assumption of independent curves is not true in practice. In order to get rid of this limitation, we propose the hidden Markov based GPFR mixture model (HM-GPFR) by describing these curves with both fine and coarse level temporal structures. Specifically, the temporal structure is described by the Gaussian process model at the fine level and hidden Markov process at the coarse level. The whole model can be regarded as a random process with state switching dynamics. To further enhance the robustness of the model, we also give a priori to the model parameters and develop Bayesian hidden Markov based GPFR mixture model (BHM-GPFR). Experimental results demonstrate that the proposed methods have both high prediction accuracy and good interpretability.\n", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Tao Li;Jinwen Ma", "authorids": "~Tao_Li9;~Jinwen_Ma1", "gender": "M;M", "homepage": "https://github.com/ltbyron;https://www.math.pku.edu.cn/teachers/jwma/homepage/", "dblp": ";m/JinwenMa", "google_scholar": ";", "orcid": ";0000-0002-7388-4295", "linkedin": ";", "or_profile": "~Tao_Li9;~Jinwen_Ma1", "aff": "Peking University;Peking University", "aff_domain": "edu.cn;pku.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@misc{\nli2023hidden,\ntitle={Hidden Markov Mixture of Gaussian Process Functional Regression: Utilizing Multi-Scale Structure for Time-Series Forecasting},\nauthor={Tao Li and Jinwen Ma},\nyear={2023},\nurl={https://openreview.net/forum?id=QP4nkeQ1BpT}\n}", "github": "", "project": "", "reviewers": "CSZt;tgKE;hJkz", "site": "https://openreview.net/forum?id=QP4nkeQ1BpT", "pdf_size": 2997440, "recommendation": "3;5;5", "confidence": "4;3;4", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;3;2", "wc_summary_paper": "68;40;76", "wc_strength_and_weaknesses": "31;143;294", "wc_clarity_quality_novelty_and_reproducibility": "31;33;29", "wc_summary_review": "95;40;29", "wc_review": "225;256;428", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 61.333333333333336, 15.4344492037203 ], "wc_strength_and_weaknesses_avg": [ 156.0, 107.76208362251849 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.0, 1.632993161855452 ], "wc_summary_review_avg": [ 54.666666666666664, 28.871362204709975 ], "wc_review_avg": [ 303.0, 89.28979038314888 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16497433817661357423&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Contrastive Audio-Visual Masked Autoencoder", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10966", "id": "QPtMRyk5rb", "poster": "/media/PosterPDFs/ICLR%202023/10966.png?t=1680929792.682869", "openreview": "https://openreview.net/forum?id=QPtMRyk5rb", "slides": "https://iclr.cc/virtual/2023/poster/10966", "video": "https://iclr.cc/virtual/2023/poster/10966", "author_site": "Yuan Gong, Andrew Rouditchenko, Alexander Liu, David Harwath, Leonid Karlinsky, Hilde Kuehne, James R Glass", "tldr": "We propose the Contrastive Audio-Visual Masked Auto-Encoder that combines contrastive learning and masked data modeling, two major self-supervised learning frameworks, to learn a joint and coordinated audio-visual representation.", "abstract": "In this paper, we first extend the recent Masked Auto-Encoder (MAE) model from a single modality to audio-visual multi-modalities. Subsequently, we propose the Contrastive Audio-Visual Masked Auto-Encoder (CAV-MAE) by combining contrastive learning and masked data modeling, two major self-supervised learning frameworks, to learn a joint and coordinated audio-visual representation.\nOur experiments show that the contrastive audio-visual correspondence learning objective not only enables the model to perform audio-visual retrieval tasks, but also helps the model learn a better joint representation. As a result, our fully self-supervised pretrained CAV-MAE achieves a new SOTA accuracy of 65.9% on VGGSound, and is comparable with the previous best supervised pretrained model on AudioSet in the audio-visual event classification task. Code and pretrained models are at https://github.com/yuangongnd/cav-mae.", "keywords": "multi-modal learning;audio-visual learning;self-supervised learning;masked autoencoder;contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Yuan Gong;Andrew Rouditchenko;Alexander H. Liu;David Harwath;Leonid Karlinsky;Hilde Kuehne;James R. Glass", "authorids": "~Yuan_Gong3;~Andrew_Rouditchenko1;~Alexander_H._Liu1;~David_Harwath1;~Leonid_Karlinsky3;~Hilde_Kuehne5;~James_R._Glass1", "gender": "M;;M;M;M;F;", "homepage": ";;https://alexander-h-liu.github.io/;https://www.cs.utexas.edu/~harwath/index.html;;https://hildekuehne.github.io;", "dblp": ";218/5458;227/2380;;05/4463;45/4963;", "google_scholar": "MuhvvOkAAAAJ;;LIiCDa0AAAAJ;C0kDOzcAAAAJ;https://scholar.google.co.il/citations?user=WbO7tjYAAAAJ;pxhCcH0AAAAJ;", "orcid": ";;;;;0000-0003-1079-4441;", "linkedin": ";;;;;hilde-kuehne-8b9aa661;", "or_profile": "~Yuan_Gong3;~Andrew_Rouditchenko1;~Alexander_H._Liu1;~David_Harwath1;~Leonid_Karlinsky3;~Hilde_Kuehne5;~James_R._Glass1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Meta Facebook;University of Texas, Austin;International Business Machines;Goethe University Frankfurt;", "aff_domain": "mit.edu;mit.edu;meta.com;utexas.edu;ibm.com;uni-frankfurt.de;", "position": "Postdoc;PhD student;Intern;Assistant Professor;Principal Researcher;Assistant Professor;", "bibtex": "@inproceedings{\ngong2023contrastive,\ntitle={Contrastive Audio-Visual Masked Autoencoder},\nauthor={Yuan Gong and Andrew Rouditchenko and Alexander H. Liu and David Harwath and Leonid Karlinsky and Hilde Kuehne and James R. Glass},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=QPtMRyk5rb}\n}", "github": "", "project": "", "reviewers": "oCcH;qVEJ;TNd7;M9QZ;Ee8Z", "pdf_size": 21745566, "recommendation": "6;6;6;8;8", "confidence": "5;3;5;4;3", "correctness": "2;3;3;4;3", "technical_novelty": "2;2;2;2;4", "empirical_novelty": "3;2;3;3;4", "wc_summary_paper": "32;46;54;72;94", "wc_strength_and_weaknesses": "256;228;81;186;105", "wc_clarity_quality_novelty_and_reproducibility": "19;44;35;35;32", "wc_summary_review": "21;54;302;67;58", "wc_review": "328;372;472;360;289", "wc_reply_reviewers": "194;85;0;27;14", "wc_reply_authors": "2325;1155;2039;1289;508", "reply_reviewers": "1;1;0;1;1", "reply_authors": "4;3;4;3;2", "recommendation_avg": [ 6.8, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.4, 0.8 ], "empirical_novelty_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_paper_avg": [ 59.6, 21.518364250100422 ], "wc_strength_and_weaknesses_avg": [ 171.2, 68.05115722748586 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.0, 8.07465169527454 ], "wc_summary_review_avg": [ 100.4, 101.99333311545416 ], "wc_review_avg": [ 364.2, 61.09795413923448 ], "wc_reply_reviewers_avg": [ 64.0, 71.14211129844264 ], "wc_reply_authors_avg": [ 1463.2, 649.9207336283403 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 3.2, 0.7483314773547882 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.45643546458763845, "corr_recommendation_correctness": 0.6454972243679028, "gs_citation": 156, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14514639288484949727&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=QPtMRyk5rb", "email": "mit.edu;mit.edu;meta.com;utexas.edu;ibm.com;uni-frankfurt.de;", "author_num": 7, "aff_unique_index": "0;0;1;2;3;4", "aff_unique_norm": "Massachusetts Institute of Technology;Meta;University of Texas at Austin;International Business Machines Corporation;Goethe University Frankfurt", "aff_unique_dep": ";Meta Platforms, Inc.;;;", "aff_unique_url": "https://web.mit.edu;https://meta.com;https://www.utexas.edu;https://www.ibm.com;https://www.uni-frankfurt.de", "aff_unique_abbr": "MIT;Meta;UT Austin;IBM;GU Frankfurt", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Austin;Frankfurt", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;Germany" }, { "title": "Achieving Near-Optimal Individual Regret & Low Communications in Multi-Agent Bandits", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11135", "id": "QTXKTXJKIh", "poster": "/media/PosterPDFs/ICLR%202023/11135.png?t=1681130015.0475347", "openreview": "https://openreview.net/forum?id=QTXKTXJKIh", "slides": "https://iclr.cc/virtual/2023/poster/11135", "video": "https://iclr.cc/virtual/2023/poster/11135", "author_site": "Xuchuang Wang, Lin Yang, Yu-Zhen Janice Chen, Xutong Liu, Mohammad Hajiesmaili, Don Towsley, John C.S. Lui", "tldr": "A near-optimal algorithm for both individual and group regrets and only requiring O(\\log (\\log T)) communication times", "abstract": "Cooperative multi-agent multi-armed bandits (CM2AB) study how distributed agents cooperatively play the same multi-armed bandit game. Most existing CM2AB works focused on maximizing the group performance of all agents---the accumulation of all agents' individual performance (i.e., individual reward). However, in many applications, the performance of the system is more sensitive to the ``bad'' agent---the agent with the worst individual performance. For example, in a drone swarm, a ``bad'' agent may crash into other drones and severely degrade the system performance. In that case, the key of the learning algorithm design is to coordinate computational and communicational resources among agents so to optimize the individual learning performance of the ``bad'' agent. In CM2AB, maximizing the group performance is equivalent to minimizing the group regret of all agents, and minimizing the individual performance can be measured by minimizing the maximum (worst) individual regret among agents. Minimizing the maximum individual regret was largely ignored in prior literature, and currently, there is little work on how to minimize this objective with a low communication overhead. In this paper, we propose a near-optimal algorithm on both individual and group regrets, in addition, we also propose a novel communication module in the algorithm, which only needs \\(O(\\log (\\log T))\\) communication times where \\(T\\) is the number of decision rounds. We also conduct simulations to illustrate the advantage of our algorithm by comparing it to other known baselines.", "keywords": "Multi-agent multi-armed bandits;individual regret;communication", "primary_area": "", "supplementary_material": "", "author": "Xuchuang Wang;Lin Yang;Yu-Zhen Janice Chen;Xutong Liu;Mohammad Hajiesmaili;Don Towsley;John C.S. Lui", "authorids": "~Xuchuang_Wang1;linyang@nju.edu.cn;~Yu-Zhen_Janice_Chen1;~Xutong_Liu1;~Mohammad_Hajiesmaili1;~Don_Towsley1;~John_C.S._Lui2", "gender": "M;;F;M;M;M;M", "homepage": "https://xuchuangw.com;;;https://xutongliu.me/;https://groups.cs.umass.edu/hajiesmaili/;;http://www.cse.cuhk.edu.hk/~cslui/Index.html", "dblp": "319/5123;;227/7171;70/3372-2;49/7911;t/DonaldFTowsley;l/JohnCSLui", "google_scholar": "QJ66dEcAAAAJ;;g2HMYpEAAAAJ;KNfY6BIAAAAJ;XCGuYKIAAAAJ;https://scholar.google.com.tw/citations?user=yYtaDFUAAAAJ;https://scholar.google.com.tw/citations?user=7LVjQ7MAAAAJ", "orcid": ";;;0000-0002-8628-5873;;;0000-0001-7466-0384", "linkedin": ";;;;;;", "or_profile": "~Xuchuang_Wang1;linyang@nju.edu.cn;~Yu-Zhen_Janice_Chen1;~Xutong_Liu1;~Mohammad_Hajiesmaili1;~Don_Towsley1;~John_C.S._Lui2", "aff": "The Chinese University of Hong Kong;;Department of Computer Science, University of Massachusetts at Amherst;The Chinese University of Hong Kong;College of Information and Computer Science, University of Massachusetts, Amherst;University of Massachusetts, Amherst;The Chinese University of Hong Kong", "aff_domain": "cuhk.edu.hk;;cs.umass.edu;cuhk.edu.hk;cics.umass.edu;umass.edu;cse.cuhk.edu.hk", "position": "PhD student;;PhD student;Postdoc;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023achieving,\ntitle={Achieving Near-Optimal Individual Regret \\& Low Communications in Multi-Agent Bandits},\nauthor={Xuchuang Wang and Lin Yang and Yu-Zhen Janice Chen and Xutong Liu and Mohammad Hajiesmaili and Don Towsley and John C.S. Lui},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=QTXKTXJKIh}\n}", "github": "", "project": "", "reviewers": "VQR1;coEu;3zzv", "pdf_size": 1897940, "recommendation": "6;6;8", "confidence": "4;3;4", "correctness": "3;4;4", "technical_novelty": "3;3;2", "empirical_novelty": "3;0;3", "wc_summary_paper": "147;54;68", "wc_strength_and_weaknesses": "249;92;114", "wc_clarity_quality_novelty_and_reproducibility": "43;136;35", "wc_summary_review": "32;74;51", "wc_review": "471;356;268", "wc_reply_reviewers": "0;38;18", "wc_reply_authors": "576;475;676", "reply_reviewers": "0;1;1", "reply_authors": "4;4;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 89.66666666666667, 40.94169295745136 ], "wc_strength_and_weaknesses_avg": [ 151.66666666666666, 69.4086129781856 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 71.33333333333333, 45.84272631023983 ], "wc_summary_review_avg": [ 52.333333333333336, 17.172329163188344 ], "wc_review_avg": [ 365.0, 83.11838946145834 ], "wc_reply_reviewers_avg": [ 18.666666666666668, 15.520595635763755 ], "wc_reply_authors_avg": [ 575.6666666666666, 82.05824489687527 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.49999999999999983, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15303877145389350941&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=QTXKTXJKIh", "email": "cuhk.edu.hk;;cs.umass.edu;cuhk.edu.hk;cics.umass.edu;umass.edu;cse.cuhk.edu.hk", "author_num": 7, "aff_unique_index": "0;1;0;1;1;0", "aff_unique_norm": "Chinese University of Hong Kong;University of Massachusetts Amherst", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.umass.edu", "aff_unique_abbr": "CUHK;UMass Amherst", "aff_campus_unique_index": "0;1;0;1;1;0", "aff_campus_unique": "Hong Kong SAR;Amherst", "aff_country_unique_index": "0;1;0;1;1;0", "aff_country_unique": "China;United States" }, { "id": "QTbAoQ5yMCg", "title": "Decentralized Robust V-learning for Solving Markov Games with Model Uncertainty", "track": "main", "status": "Withdraw", "tldr": "Robust reinforcement learning algorithm for Markov games", "abstract": "Markov game is a popular reinforcement learning framework for modeling competitive players in a dynamic environment. However, most of the existing works on Markov game focus on computing a certain equilibrium following uncertain interactions among the players, but ignores the uncertainty of the environment model, which is ubiquitous in practical scenarios. In this work, we develop a tractable solution to Markov games with model uncertainty. Specifically, we propose a new and tractable notion of robust correlated equilibrium for Markov games with environment model uncertainty. In particular, we prove that robust correlated equilibrium has a simple modification structure, and its characterization of equilibrium critically depends on the environment model uncertainty. Moreover, we propose the first fully-decentralized sample-based algorithm for computing such robust correlated equilibrium. Our analysis proves that the algorithm achieves the polynomial sample complexity $\\widetilde{\\mathcal{O}}( SA^2 H^5 p_{\\min}^{-2}\\epsilon^{-2})$ for computing an approximate robust correlated equilibrium with $\\epsilon$ accuracy. ", "keywords": "Machine Learning;Reinforcement Learning;Markov Games", "primary_area": "", "supplementary_material": "", "author": "Shaocong Ma;Ziyi Chen;Shaofeng Zou;Yi Zhou", "authorids": "~Shaocong_Ma1;~Ziyi_Chen2;~Shaofeng_Zou1;~Yi_Zhou2", "gender": "M;M;;M", "homepage": "https://mshaocong.github.io/;;;https://sites.google.com/site/yizhouhomepage/home", "dblp": "270/3742;37/1439-2;;", "google_scholar": ";zjSBVOIAAAAJ;;4fK8bYIAAAAJ", "orcid": ";;;", "linkedin": ";ziyi-chen-84616184/;;", "or_profile": "~Shaocong_Ma1;~Ziyi_Chen2;~Shaofeng_Zou1;~Yi_Zhou2", "aff": "University of Utah;University of Utah;;University of Utah", "aff_domain": "utah.edu;utah.edu;;utah.edu", "position": "PhD student;PhD student;;Assistant Professor", "bibtex": "@misc{\nma2023decentralized,\ntitle={Decentralized Robust V-learning for Solving Markov Games with Model Uncertainty},\nauthor={Shaocong Ma and Ziyi Chen and Shaofeng Zou and Yi Zhou},\nyear={2023},\nurl={https://openreview.net/forum?id=QTbAoQ5yMCg}\n}", "github": "", "project": "", "reviewers": "54HS;mQd8;oAoN;gJQT", "site": "https://openreview.net/forum?id=QTbAoQ5yMCg", "pdf_size": 975707, "recommendation": "3;5;5;6", "confidence": "3;4;3;3", "correctness": "3;3;4;4", "technical_novelty": "3;2;2;3", "empirical_novelty": "2;1;2;0", "wc_summary_paper": "241;48;35;66", "wc_strength_and_weaknesses": "216;243;167;139", "wc_clarity_quality_novelty_and_reproducibility": "224;74;56;52", "wc_summary_review": "142;24;29;44", "wc_review": "823;389;287;301", "wc_reply_reviewers": "0;27;65;0", "wc_reply_authors": "438;817;515;183", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 97.5, 83.5778080593168 ], "wc_strength_and_weaknesses_avg": [ 191.25, 40.64710936831794 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 101.5, 71.20919884396959 ], "wc_summary_review_avg": [ 59.75, 48.05400607649689 ], "wc_review_avg": [ 450.0, 218.87210877587853 ], "wc_reply_reviewers_avg": [ 23.0, 26.636441203734407 ], "wc_reply_authors_avg": [ 488.25, 226.1054787040774 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.13245323570650439, "corr_recommendation_correctness": 0.6882472016116854, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4814595335255486804&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Utah", "aff_unique_dep": "", "aff_unique_url": "https://www.utah.edu", "aff_unique_abbr": "Utah", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "DENSE RGB SLAM WITH NEURAL IMPLICIT MAPS", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11858", "id": "QUK1ExlbbA", "poster": "/media/PosterPDFs/ICLR%202023/11858.png?t=1680767779.288062", "openreview": "https://openreview.net/forum?id=QUK1ExlbbA", "slides": "https://iclr.cc/virtual/2023/poster/11858", "video": "https://iclr.cc/virtual/2023/poster/11858", "author_site": "Heng Li, Xiaodong Gu, Weihao Yuan, Luwei Yang, Zilong Dong, Ping Tan", "tldr": "", "abstract": "There is an emerging trend of using neural implicit functions for map representation in Simultaneous Localization and Mapping (SLAM). Some pioneer works have achieved encouraging results on RGB-D SLAM. In this paper, we present a dense RGB SLAM method with neural implicit map representation. To reach this challenging goal without depth input, we introduce a hierarchical feature volume to facilitate the implicit map decoder. This design effectively fuses shape cues across different scales to facilitate map reconstruction. Our method simultaneously solves the camera motion and the neural implicit map by matching the rendered and input video frames. To facilitate optimization, we further propose a photometric warping loss in the spirit of multi-view stereo to better constrain the camera pose and scene geometry. We evaluate our method on commonly used benchmarks and compare it with modern RGB and RGB-D SLAM systems. Our method achieves favorable results than previous methods and even surpasses some recent RGB-D SLAM methods.The code is at poptree.github.io/DIM-SLAM/.", "keywords": "dense RGB SLAM;implict funciton;RGB VO", "primary_area": "", "supplementary_material": "/attachment/965a3641565450364ebf01af9850bbd9b72d2a8f.zip", "author": "Heng Li;Xiaodong Gu;Weihao Yuan;luwei yang;Zilong Dong;Ping Tan", "authorids": "~Heng_Li6;~Xiaodong_Gu3;~Weihao_Yuan1;luweiy@sfu.ca;~Zilong_Dong2;~Ping_Tan2", "gender": "M;M;M;;;M", "homepage": "http://hengli.me;;https://www.weihao-yuan.com;;;http://www.cs.sfu.ca/~pingtan/", "dblp": "02/3672-9;71/4467-4;217/2047-1;;;", "google_scholar": "tjbbehcAAAAJ;aJPO514AAAAJ;m3tqxRQAAAAJ;;;XhyKVFMAAAAJ", "orcid": "0000-0001-5143-5061;0000-0003-2623-7973;;;;0000-0002-4506-6973", "linkedin": ";;;;;", "or_profile": "~Heng_Li6;~Xiaodong_Gu3;~Weihao_Yuan1;luweiy@sfu.ca;~Zilong_Dong2;~Ping_Tan2", "aff": "Simon Fraser University;Alibaba Group;Alibaba Group;;;Hong Kong University of Science and Technology", "aff_domain": "sfu.ca;alibaba-inc.com;alibaba-inc.com;;;ust.hk", "position": "PhD student;Researcher;Researcher;;;Full Professor", "bibtex": "@inproceedings{\nli2023dense,\ntitle={{DENSE} {RGB} {SLAM} {WITH} {NEURAL} {IMPLICIT} {MAPS}},\nauthor={Heng Li and Xiaodong Gu and Weihao Yuan and luwei yang and Zilong Dong and Ping Tan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=QUK1ExlbbA}\n}", "github": "", "project": "", "reviewers": "K6me;W3wZ;bVvS;pLyh", "pdf_size": 48831996, "recommendation": "6;6;6;8", "confidence": "3;3;4;4", "correctness": "3;3;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "80;77;76;96", "wc_strength_and_weaknesses": "241;413;206;453", "wc_clarity_quality_novelty_and_reproducibility": "9;36;10;497", "wc_summary_review": "20;130;58;80", "wc_review": "350;656;350;1126", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "274;580;606;867", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;2", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 82.25, 8.073877630977572 ], "wc_strength_and_weaknesses_avg": [ 328.25, 106.42221337671943 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 138.0, 207.5511985029236 ], "wc_summary_review_avg": [ 72.0, 39.774363602702685 ], "wc_review_avg": [ 620.5, 317.4629899689096 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 581.75, 210.15990935475776 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8618441434614419075&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=QUK1ExlbbA", "email": "sfu.ca;alibaba-inc.com;alibaba-inc.com;;;ust.hk", "author_num": 6, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Simon Fraser University;Alibaba Group;Hong Kong University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sfu.ca;https://www.alibaba.com;https://www.ust.hk", "aff_unique_abbr": "SFU;Alibaba;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Canada;China" }, { "title": "CO3: Cooperative Unsupervised 3D Representation Learning for Autonomous Driving", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11868", "id": "QUaDoIdgo0", "poster": "", "openreview": "https://openreview.net/forum?id=QUaDoIdgo0", "slides": "https://iclr.cc/virtual/2023/poster/11868", "video": "https://iclr.cc/virtual/2023/poster/11868", "author_site": "Runjian Chen, Yao Mu, Runsen Xu, Wenqi Shao, Chenhan Jiang, Hang Xu, Yu Qiao, Zhenguo Li, Ping Luo", "tldr": "We propose CO3, namely {Co}operative {Co}ntrastive Learning and {Co}ntextual Shape Prediction, to learn 3D representation for outdoor-scene point clouds in an unsupervised manner.", "abstract": "Unsupervised contrastive learning for indoor-scene point clouds has achieved great successes. However, unsupervised representation learning on outdoor-scene point clouds remains challenging because previous methods need to reconstruct the whole scene and capture partial views for the contrastive objective. This is infeasible in outdoor scenes with moving objects, obstacles, and sensors. In this paper, we propose CO3, namely {Co}operative {Co}ntrastive Learning and {Co}ntextual Shape Prediction, to learn 3D representation for outdoor-scene point clouds in an unsupervised manner. CO3 has several merits compared to existing methods. (1) It utilizes LiDAR point clouds from vehicle-side and infrastructure-side to build views that differ enough but meanwhile maintain common semantic information for contrastive learning, which are more appropriate than views built by previous methods. (2) Alongside the contrastive objective, we propose contextual shape prediction to bring more task-relevant information for unsupervised 3D point cloud representation learning and we also provide a theoretical analysis for this pre-training goal. (3) As compared to previous methods, representation learned by CO3 is able to be transferred to different outdoor scene dataset collected by different type of LiDAR sensors. (4) CO3 improves current state-of-the-art methods on Once, KITTI and NuScenes datasets by up to 2.58 mAP in 3D object detection task and 3.54 mIoU in LiDAR semantic segmentation task. Codes and models will be released.", "keywords": "Cooperative Contrastive Learning;Contextual Shape Prediction;Unsupervised Representation Learning;Autonomous Driving", "primary_area": "", "supplementary_material": "", "author": "Runjian Chen;Yao Mu;Runsen Xu;Wenqi Shao;Chenhan Jiang;Hang Xu;Yu Qiao;Zhenguo Li;Ping Luo", "authorids": "~Runjian_Chen1;~Yao_Mu1;~Runsen_Xu1;~Wenqi_Shao2;~Chenhan_Jiang1;~Hang_Xu1;~Yu_Qiao1;~Zhenguo_Li1;~Ping_Luo2", "gender": "M;M;M;M;F;M;;M;", "homepage": "https://runjian-chen.github.io;https://yaomarkmu.github.io/;;https://wqshao126.github.io/;https://jiangchenhan.github.io/;;;http://www.ee.columbia.edu/~zgli/;", "dblp": "257/4647;260/0674;289/6916;227/3122;202/0129;;;23/6479;", "google_scholar": "_USUMdAAAAAJ;;MOobrCcAAAAJ;Bs9mrwwAAAAJ;wvmhx4cAAAAJ;https://scholar.google.com.hk/citations?user=J_8TX6sAAAAJ;;XboZC1AAAAAJ;", "orcid": "0000-0003-0519-496X;;;;0000-0001-8771-3641;0000-0003-3645-8972;;;", "linkedin": ";;runsen-xu-4262a3272/;;;;;;", "or_profile": "~Runjian_Chen1;~Yao_Mu1;~Runsen_Xu1;~Wenqi_Shao2;~Chenhan_Jiang1;~Hang_Xu1;~Yu_Qiao1;~Zhenguo_Li1;~Ping_Luo2", "aff": "University of Hong Kong;The University of Hong Kong;The Chinese University of Hong Kong;Shanghai AI Laboratory;Hong Kong University of Science and Technology;Huawei Noah\u2018s Ark Lab;;Huawei Noah's Ark Lab;", "aff_domain": "hku.hk;hku.hk;ie.cuhk.edu;pjlab.org.cn;ust.hk;huawei.com;;huawei.com;", "position": "PhD student;PhD student;PhD student;Researcher;PhD student;Researcher;;Principal Researcher;", "bibtex": "@inproceedings{\nchen2023co,\ntitle={{CO}3: Cooperative Unsupervised 3D Representation Learning for Autonomous Driving},\nauthor={Runjian Chen and Yao Mu and Runsen Xu and Wenqi Shao and Chenhan Jiang and Hang Xu and Yu Qiao and Zhenguo Li and Ping Luo},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=QUaDoIdgo0}\n}", "github": "", "project": "", "reviewers": "1HxB;XpxU;BVa4;bVKL", "pdf_size": 18069207, "recommendation": "3;5;6;8", "confidence": "4;4;5;5", "correctness": "2;4;4;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "0;2;0;2", "wc_summary_paper": "71;90;72;138", "wc_strength_and_weaknesses": "381;82;122;512", "wc_clarity_quality_novelty_and_reproducibility": "85;25;37;93", "wc_summary_review": "36;100;25;119", "wc_review": "573;297;256;862", "wc_reply_reviewers": "0;0;0;44", "wc_reply_authors": "1252;783;914;1777", "reply_reviewers": "0;0;0;1", "reply_authors": "3;2;2;4", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.0, 1.0 ], "wc_summary_paper_avg": [ 92.75, 27.19719654670312 ], "wc_strength_and_weaknesses_avg": [ 274.25, 178.92788351735456 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.0, 29.444863728670914 ], "wc_summary_review_avg": [ 70.0, 40.25543441574069 ], "wc_review_avg": [ 497.0, 243.4553347125505 ], "wc_reply_reviewers_avg": [ 11.0, 19.05255888325765 ], "wc_reply_authors_avg": [ 1181.5, 384.0406879485558 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.8320502943378437, "corr_recommendation_correctness": 0.4181210050035454, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7904065435250997642&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=QUaDoIdgo0", "email": "hku.hk;hku.hk;ie.cuhk.edu;pjlab.org.cn;ust.hk;huawei.com;;huawei.com;", "author_num": 9, "aff_unique_index": "0;0;1;2;3;4;4", "aff_unique_norm": "University of Hong Kong;Chinese University of Hong Kong;Shanghai AI Laboratory;Hong Kong University of Science and Technology;Huawei", "aff_unique_dep": ";;;;Noah's Ark Lab", "aff_unique_url": "https://www.hku.hk;https://www.cuhk.edu.hk;https://www.shanghai-ai-lab.com;https://www.ust.hk;https://www.huawei.com", "aff_unique_abbr": "HKU;CUHK;SAIL;HKUST;Huawei", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "QVSoh6VM4nG", "title": "Vector Quantization and Shifting: Exploiting Latent Properties to Optimize Neural Codecs", "track": "main", "status": "Reject", "tldr": "We improve the performance of any neural codecs by uniform vector quantization and gradient of the entropy.", "abstract": "End-to-end image/video codecs are getting competitive compared to traditional compression techniques that have been developed through decades of manual engineering efforts. These trainable codecs have many advantages over traditional techniques such as easy adaptation on perceptual distortion metrics and high performance on specific domains thanks to their learning ability. However, state of the art neural codecs do not take advantage of vector quantization technique and existence of gradient of entropy in decoding device. In this research, we propose some theoretical insights about these two properties (quantization and entropy gradient), and show that this can improve the performances of many off-the-shelf codecs. First, we prove that non-uniform quantization map on neural codec's latent is not necessary. Thus, we improve the performance by using a predefined optimal uniform vector quantization map. Secondly, we theoretically show that gradient of entropy (available at decoder side) is correlated with the gradient of the reconstruction error (which is not available at decoder side). Thus, we use the former as a proxy in order to improve the compression performance. According to our results, we save between 2-4\\% of rate for the same quality with this proposal, for various pre-trained methods.", "keywords": "Neural Image Compression;Uniform Vector Quantization;Gradient of Entropy", "primary_area": "", "supplementary_material": "", "author": "Muhammet Balcilar;Bharath Bhushan Damodaran;Karam Naser;Franck Galpin;Pierre Hellier", "authorids": "~Muhammet_Balcilar1;~Bharath_Bhushan_Damodaran1;karam.naser@interdigital.com;franck.galpin@interdigital.com;~Pierre_Hellier1", "gender": "M;M;;;M", "homepage": "https://balcilar.weebly.com/;;;;", "dblp": "130/0818;189/3814;;;", "google_scholar": "https://scholar.google.fr/citations?hl=fr;DarhRtEAAAAJ;;;https://scholar.google.fr/citations?user=U2BX6Q8AAAAJ", "orcid": "0000-0003-1428-4297;;;;0000-0003-3603-2381", "linkedin": ";bbdamodaran/;;;", "or_profile": "~Muhammet_Balcilar1;~Bharath_Bhushan_Damodaran1;karam.naser@interdigital.com;franck.galpin@interdigital.com;~Pierre_Hellier1", "aff": "Interdigital Inc;Interdigital R&D;;;Interdigital", "aff_domain": "interdigital.com;interdigital.com;;;interdigital.com", "position": "Researcher;Researcher;;;principal scientist", "bibtex": "@misc{\nbalcilar2023vector,\ntitle={Vector Quantization and Shifting: Exploiting Latent Properties to Optimize Neural Codecs},\nauthor={Muhammet Balcilar and Bharath Bhushan Damodaran and Karam Naser and Franck Galpin and Pierre Hellier},\nyear={2023},\nurl={https://openreview.net/forum?id=QVSoh6VM4nG}\n}", "github": "", "project": "", "reviewers": "tojV;k9t4;7dNo;36qZ", "site": "https://openreview.net/forum?id=QVSoh6VM4nG", "pdf_size": 2679409, "recommendation": "3;5;6;6", "confidence": "5;4;3;4", "correctness": "3;3;3;4", "technical_novelty": "3;2;4;4", "empirical_novelty": "3;2;4;4", "wc_summary_paper": "91;82;116;38", "wc_strength_and_weaknesses": "274;211;407;135", "wc_clarity_quality_novelty_and_reproducibility": "53;10;46;2", "wc_summary_review": "89;44;75;107", "wc_review": "507;347;644;282", "wc_reply_reviewers": "167;70;133;15", "wc_reply_authors": "3918;3137;1698;818", "reply_reviewers": "2;1;1;1", "reply_authors": "7;7;3;3", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 3.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 81.75, 28.16358464400439 ], "wc_strength_and_weaknesses_avg": [ 256.75, 99.7355879312896 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.75, 22.072324299900995 ], "wc_summary_review_avg": [ 78.75, 23.047505287991584 ], "wc_review_avg": [ 445.0, 141.08330872218727 ], "wc_reply_reviewers_avg": [ 96.25, 58.409652455737145 ], "wc_reply_authors_avg": [ 2392.75, 1208.5953365374203 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 5.0, 2.0 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8660254037844386, "corr_recommendation_correctness": 0.4714045207910316, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5fJNBVcq9AYJ:scholar.google.com/&scioq=Vector+Quantization+and+Shifting:+Exploiting+Latent+Properties+to+Optimize+Neural+Codecs&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Interdigital Inc.;InterDigital", "aff_unique_dep": ";R&D", "aff_unique_url": "https://www.interdigital.com;https://www.interdigital.com", "aff_unique_abbr": "Interdigital;Interdigital", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Ensuring DNN Solution Feasibility for Optimization Problems with Linear Constraints", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11719", "id": "QVcDQJdFTG", "poster": "/media/PosterPDFs/ICLR%202023/11719.png?t=1680794806.838607", "openreview": "https://openreview.net/forum?id=QVcDQJdFTG", "slides": "https://iclr.cc/virtual/2023/poster/11719", "video": "https://iclr.cc/virtual/2023/poster/11719", "author_site": "Tianyu Zhao, Xiang Pan, Minghua Chen, Steven Low", "tldr": "This paper proposes a preventive learning framework to ensure DNN solution feasibility for optimization problems with linear constraints without post-processing.", "abstract": "We propose preventive learning as the first framework to guarantee Deep Neural Network (DNN) solution feasibility for optimization problems with linear constraints without post-processing, upon satisfying a mild condition on constraint calibration. Without loss of generality, we focus on problems with only inequality constraints. We systematically calibrate the inequality constraints used in training, thereby anticipating DNN prediction errors and ensuring the obtained solutions remain feasible. We characterize the calibration rate and a critical DNN size, based on which we can directly construct a DNN with provable solution feasibility guarantee. We further propose an Adversarial-Sample Aware training algorithm to improve its optimality performance. We apply the framework to develop DeepOPF+ for solving essential DC optimal power flow problems in grid operation. Simulation results over IEEE test cases show that it outperforms existing strong DNN baselines in ensuring 100\\% feasibility and attaining consistent optimality loss (<0.19%) and speedup (up to x228) in both light-load and heavy-load regimes, as compared to a state-of-the-art solver. We also apply our framework to a non-convex problem and show its performance advantage over existing schemes.", "keywords": "Deep learning;Deep neural network;Constrained optimization;Solution feasibility guarantee;Optimal power flow", "primary_area": "", "supplementary_material": "/attachment/8fc74f0bfa54e58735080e79ae43d252e6186422.zip", "author": "Tianyu Zhao;Xiang Pan;Minghua Chen;Steven Low", "authorids": "~Tianyu_Zhao2;~Xiang_Pan4;~Minghua_Chen1;~Steven_Low1", "gender": "M;M;M;M", "homepage": ";;https://www.mhchen.com;http://www.eas.caltech.edu/people/3109/profile/", "dblp": "139/3492;;12/4395-1.html;", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=d9unFWQAAAAJ;https://scholar.google.com.hk/citations?user=WzEQ9QwAAAAJ;https://scholar.google.com.tw/citations?user=4BrebIYAAAAJ", "orcid": "0000-0002-9541-0197;;0000-0003-4763-0037;", "linkedin": ";;;", "or_profile": "~Tianyu_Zhao2;~Xiang_Pan4;~Minghua_Chen1;~Steven_Low1", "aff": "Lenovo Machine Intelligence Center;Tencent;City University of Hong Kong;California Institute of Technology", "aff_domain": "lenovo.com;tencent.com;cityu.edu.hk;", "position": "Researcher;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhao2023ensuring,\ntitle={Ensuring {DNN} Solution Feasibility for Optimization Problems with Linear Constraints},\nauthor={Tianyu Zhao and Xiang Pan and Minghua Chen and Steven Low},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=QVcDQJdFTG}\n}", "github": "", "project": "", "reviewers": "Z4Mb;f2t2;crFR;ek1R", "pdf_size": 582262, "recommendation": "6;6;8;8", "confidence": "2;2;3;4", "correctness": "3;3;2;4", "technical_novelty": "2;3;2;4", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "83;20;88;157", "wc_strength_and_weaknesses": "60;166;577;175", "wc_clarity_quality_novelty_and_reproducibility": "30;50;104;28", "wc_summary_review": "38;40;27;44", "wc_review": "211;276;796;404", "wc_reply_reviewers": "20;26;0;60", "wc_reply_authors": "938;1423;2803;422", "reply_reviewers": "1;1;0;1", "reply_authors": "6;5;7;3", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 87.0, 48.49226742481733 ], "wc_strength_and_weaknesses_avg": [ 244.5, 197.22385758320416 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 53.0, 30.675723300355934 ], "wc_summary_review_avg": [ 37.25, 6.299801584177076 ], "wc_review_avg": [ 421.75, 226.95635593655447 ], "wc_reply_reviewers_avg": [ 26.5, 21.6043977004683 ], "wc_reply_authors_avg": [ 1396.5, 885.8353402297743 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 5.25, 1.479019945774904 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.9045340337332909, "corr_recommendation_correctness": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1510842326615521169&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=QVcDQJdFTG", "email": "lenovo.com;tencent.com;cityu.edu.hk;", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Lenovo;Tencent;City University of Hong Kong;California Institute of Technology", "aff_unique_dep": "Machine Intelligence Center;Tencent Holdings Limited;;", "aff_unique_url": "https://www.lenovo.com;https://www.tencent.com;https://www.cityu.edu.hk;https://www.caltech.edu", "aff_unique_abbr": "LMIC;Tencent;CityU;Caltech", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Hong Kong SAR;Pasadena", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Fake It Until You Make It : Towards Accurate Near-Distribution Novelty Detection", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10991", "id": "QWQM0ZwZdRS", "poster": "/media/PosterPDFs/ICLR%202023/10991.png?t=1682363978.9760277", "openreview": "https://openreview.net/forum?id=QWQM0ZwZdRS", "slides": "https://iclr.cc/virtual/2023/poster/10991", "video": "https://iclr.cc/virtual/2023/poster/10991", "author_site": "Hossein Mirzaei, Mohammadreza Salehi, Sajjad Shahabi, Efstratios Gavves, Cees G Snoek, Mohammad Sabokrou, Mohammad Hossein Rohban", "tldr": "", "abstract": "We aim for image-based novelty detection. Despite considerable progress, existing models either fail or face dramatic drop under the so-called ``near-distribution\" setup, where the differences between normal and anomalous samples are subtle. We first demonstrate existing methods could experience up to 20\\% decrease in their AUCs in the near-distribution setting. Next, we propose to exploit a score-based generative model to produce synthetic near-distribution anomalous data. Our model is then fine-tuned to distinguish such data from the normal samples. We make quantitative as well as qualitative evaluation of this strategy, and compare the results with a variety of GAN-based models. Effectiveness of our method for both near-distribution and standard novelty detection is assessed through extensive experiments on datasets in diverse applications such as medical images, object classification, and quality control. This reveals that our method significantly improves upon existing models, and consistently decreases the gap between the near-distribution and standard novelty detection AUCs by a considerable amount.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/d8147f8b9d26dad6dd805270cfcde7d861b2301f.zip", "author": "Hossein Mirzaei;Mohammadreza Salehi;Sajjad Shahabi;Efstratios Gavves;Cees G. M. Snoek;Mohammad Sabokrou;Mohammad Hossein Rohban", "authorids": "~Hossein_Mirzaei1;~Mohammadreza_Salehi2;~Sajjad_Shahabi1;~Efstratios_Gavves1;~Cees_G._M._Snoek1;~Mohammad_Sabokrou1;~Mohammad_Hossein_Rohban1", "gender": "M;M;;M;M;M;M", "homepage": ";;https://github.com/sajjad2014;https://www.egavves.com;https://sabokrou.github.io/;http://sharif.edu/~rohban/;http://www.ceessnoek.info", "dblp": ";260/6762;;03/8693;163/2030;43/8108;s/CeesSnoek", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;kpT3gcsAAAAJ;;https://scholar.google.nl/citations?user=QqfCvsgAAAAJ;jqHXvT0AAAAJ;pRyJ6FkAAAAJ;https://scholar.google.nl/citations?user=0uKdbscAAAAJ", "orcid": ";;;;;;0000-0001-9092-1556", "linkedin": "hossein-mirzaei-6bb2301aa;seyed-mohammadreza-salehi-dehnavi-39717b108?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BoKJGi6AERu2kmjEE%2FVHd%2Fg%3D%3D;;;;;cgmsnoek/", "or_profile": "~Hossein_Mirzaei1;~Mohammadreza_Salehi2;~Sajjad_Shahabi1;~Efstratios_Gavves1;~Mohammad_Sabokrou1;~Mohammad_Hossein_Rohban1;~Cees_Snoek1", "aff": "Sharif University of Technology, Sharif University of Technology;University of Amsterdam;University of Southern California;University of Amsterdam;Institute for Research in Fundamental Sciences (IPM);Sharif University of Technology;University of Amsterdam", "aff_domain": "ce.sharif.edu;uva.nl;usc.edu;uva.nl;ipm.ir;sharif.edu;uva.nl", "position": "MS student;PhD student;PhD student;Associate Professor;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nmirzaei2023fake,\ntitle={Fake It Until You Make It : Towards Accurate Near-Distribution Novelty Detection},\nauthor={Hossein Mirzaei and Mohammadreza Salehi and Sajjad Shahabi and Efstratios Gavves and Cees G. M. Snoek and Mohammad Sabokrou and Mohammad Hossein Rohban},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=QWQM0ZwZdRS}\n}", "github": "", "project": "", "reviewers": "mA9p;wBQG;xzvz;ahZi", "pdf_size": 7187062, "recommendation": "6;6;6;6", "confidence": "5;4;4;3", "correctness": "3;4;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "89;27;43;226", "wc_strength_and_weaknesses": "80;243;371;416", "wc_clarity_quality_novelty_and_reproducibility": "42;7;31;121", "wc_summary_review": "27;20;32;121", "wc_review": "238;297;477;884", "wc_reply_reviewers": "0;290;0;0", "wc_reply_authors": "696;2165;625;754", "reply_reviewers": "0;2;0;0", "reply_authors": "2;5;2;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 96.25, 78.29232082394799 ], "wc_strength_and_weaknesses_avg": [ 277.5, 130.5 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.25, 42.763155870445296 ], "wc_summary_review_avg": [ 50.0, 41.212862069989754 ], "wc_review_avg": [ 474.0, 252.55395463148068 ], "wc_reply_reviewers_avg": [ 72.5, 125.5736835487436 ], "wc_reply_authors_avg": [ 1060.0, 639.6057379354879 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6168185637331736868&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=QWQM0ZwZdRS", "email": "ce.sharif.edu;uva.nl;usc.edu;uva.nl;ipm.ir;sharif.edu;uva.nl", "author_num": 7, "aff_unique_index": "0;1;2;1;3;0;1", "aff_unique_norm": "Sharif University of Technology;University of Amsterdam;University of Southern California;Institute for Research in Fundamental Sciences", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sharif.edu;https://www.uva.nl;https://www.usc.edu;http://ipm.ir", "aff_unique_abbr": "SUT;UvA;USC;IPM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;2;1;0;0;1", "aff_country_unique": "Iran;Netherlands;United States" }, { "id": "QYiN3R9nVUG", "title": "SEQuence-rPPG: A Fast BVP Signal Extraction Method From Frame Sequences", "track": "main", "status": "Reject", "tldr": "A new rPPG method is proposed, which is very simple, fast and accurate.", "abstract": "Non-contact heart rate estimation has essential implications for the development of affective computing and telemedicine. However, existing deep learning-based methods often endeavor to achieve real-time measurements, so a simple, fast, pre-processing-free approach is needed. Our work consists of two main parts. Firstly, we proposed SEQ-rPPG, which first transforms the RGB frame sequence into the original BVP signal sequence by learning-based linear mapping and then outputs the final BVP signal using 1DCNN-based spectral transform, and time-domain filtering. Secondly, to address the shortcomings of the existing dataset in training the model, a new large-scale dataset was collected for training and testing. Our approach achieved competitive results on the collected large dataset(the best) and public dataset UBFC-rPPG(0.81 MAE with 30s time window, test only). It requires no complex pre-processing, has the fastest speed, can run in real-time on mobile ARM CPUs, and can achieve real-time beat-to-beat performance on desktop CPUs. Benefiting from the high-quality training set, other deep learning-based models reduced errors by at least 53$\\%$. We compared the methods with and without the spectral transformation, and the results show that the processing in the time domain is effective.", "keywords": "rPPG;Remote vital sensing;Signal processing", "primary_area": "", "supplementary_material": "/attachment/267953ba6c46fd65ada2d083c5c51b5ad51baa79.zip", "author": "Kegang Wang;Yantao Wei;Mingwen Tong;Jie Gao;ZhongJin Zhao;YuJian Ma;Yi Tian", "authorids": "~Kegang_Wang1;~Yantao_Wei2;~Mingwen_Tong1;~Jie_Gao9;~ZhongJin_Zhao1;~YuJian_Ma1;~Yi_Tian2", "gender": "M;M;M;M;F;M;F", "homepage": "https://github.com/1625298892;http://faculty.ccnu.edu.cn/2012980071;http://it.ccnu.edu.cn/info/1010/2201.htm;https://github.com;https://github.com/MeowsQAQ;https://meowsqaq.github.io/blog/;https://github.com/MeowsQAQ", "dblp": ";;;;;;", "google_scholar": ";zkzaeIoAAAAJ;;;;;", "orcid": "0000-0003-3469-0786;;;;;;", "linkedin": ";;;;;;", "or_profile": "~Kegang_Wang1;~Yantao_Wei2;~Mingwen_Tong1;~Jie_Gao9;~ZhongJin_Zhao1;~YuJian_Ma1;~Yi_Tian2", "aff": "Central China Normal University;Central China Normal University;Central China Normal University;Central China Normal University;Central China Normal University;Central China Normal University;Central China Normal University", "aff_domain": "ccnu.edu.cn;ccnu.edu.cn;ccnu.edu.cn;ccnu.edu.cn;ccnu.edu.cn;ccnu.edu.cn;ccnu.edu.cn", "position": "MS student;Associate Professor;Full Professor;MS student;MS student;MS student;MS student", "bibtex": "@misc{\nwang2023sequencerppg,\ntitle={{SEQ}uence-r{PPG}: A Fast {BVP} Signal Extraction Method From Frame Sequences},\nauthor={Kegang Wang and Yantao Wei and Mingwen Tong and Jie Gao and ZhongJin Zhao and YuJian Ma and Yi Tian},\nyear={2023},\nurl={https://openreview.net/forum?id=QYiN3R9nVUG}\n}", "github": "", "project": "", "reviewers": "V5oM;j11H;DUL3", "site": "https://openreview.net/forum?id=QYiN3R9nVUG", "pdf_size": 661041, "recommendation": "3;3;6", "confidence": "3;5;4", "correctness": "3;3;3", "technical_novelty": "2;1;3", "empirical_novelty": "2;1;2", "wc_summary_paper": "53;31;113", "wc_strength_and_weaknesses": "275;189;671", "wc_clarity_quality_novelty_and_reproducibility": "129;28;24", "wc_summary_review": "20;5;78", "wc_review": "477;253;886", "wc_reply_reviewers": "64;64;189", "wc_reply_authors": "216;298;592", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "recommendation_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_paper_avg": [ 65.66666666666667, 34.65384378231207 ], "wc_strength_and_weaknesses_avg": [ 378.3333333333333, 209.90368161505782 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.333333333333336, 48.58211833815217 ], "wc_summary_review_avg": [ 34.333333333333336, 31.47838764754143 ], "wc_review_avg": [ 538.6666666666666, 262.0742049284685 ], "wc_reply_reviewers_avg": [ 105.66666666666667, 58.92556509887896 ], "wc_reply_authors_avg": [ 368.6666666666667, 161.4297232716316 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gcDOmwm1qksJ:scholar.google.com/&scioq=SEQuence-rPPG:+A+Fast+BVP+Signal+Extraction+Method+From+Frame+Sequences&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Central China Normal University", "aff_unique_dep": "", "aff_unique_url": "http://www.ccnu.edu.cn", "aff_unique_abbr": "CCNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Meta Temporal Point Processes", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11395", "id": "QZfdDpTX1uM", "poster": "/media/PosterPDFs/ICLR%202023/11395.png?t=1682361273.0520558", "openreview": "https://openreview.net/forum?id=QZfdDpTX1uM", "slides": "https://iclr.cc/virtual/2023/poster/11395", "video": "https://iclr.cc/virtual/2023/poster/11395", "author_site": "Wonho Bae, Mohamed Ahmed, Frederick Tung, Gabriel Oliveira", "tldr": "We present a novel approach to train temporal point processes in a meta learning framework.", "abstract": "A temporal point process (TPP) is a stochastic process where its realization is a sequence of discrete events in time. Recent work in TPPs model the process using a neural network in a supervised learning framework, where a training set is a collection of all the sequences. In this work, we propose to train TPPs in a meta learning framework, where each sequence is treated as a different task, via a novel framing of TPPs as neural processes (NPs). We introduce context sets to model TPPs as an instantiation of NPs. Motivated by attentive NP, we also introduce local history matching to help learn more informative features. We demonstrate the potential of the proposed method on popular public benchmark datasets and tasks, and compare with state-of-the-art TPP methods.", "keywords": "Temporal Point Process;Asynchronous Time Series;Meta-learning", "primary_area": "", "supplementary_material": "/attachment/0fb1e27ea14e7542f44fa8d731110ef23a83bb97.zip", "author": "Wonho Bae;Mohamed Osama Ahmed;Frederick Tung;Gabriel L. Oliveira", "authorids": "~Wonho_Bae1;~Mohamed_Osama_Ahmed1;~Frederick_Tung1;~Gabriel_L._Oliveira1", "gender": "M;M;M;M", "homepage": "https://won-bae.github.io/;;https://sites.google.com/view/gabriel-leivas-oliveira/home;", "dblp": "259/5393;10/7697;117/2073;https://dblp.org/pers/hd/a/Ahmed:Mohamed_Osama", "google_scholar": "https://scholar.google.ca/citations?user=EEwA__kAAAAJ;https://scholar.google.ca/citations?user=T4EeZ9gAAAAJ;5anRZEcAAAAJ;https://scholar.google.ca/citations?user=jyVyVj4AAAAJ", "orcid": ";;0000-0003-0099-9873;0000-0001-6758-1178", "linkedin": "wonho-bae/;;;mohamed-osama-ahmed-91439a154/", "or_profile": "~Wonho_Bae1;~Frederick_Tung1;~Gabriel_L._Oliveira1;~Mohamed_Osama_Ahmed2", "aff": "University of British Columbia;Borealis AI;Borealis AI;", "aff_domain": "cs.ubc.ca;borealisai.com;borealisai.com;", "position": "PhD student;Researcher;Senior Machine Learning Researcher;", "bibtex": "@inproceedings{\nbae2023meta,\ntitle={Meta Temporal Point Processes},\nauthor={Wonho Bae and Mohamed Osama Ahmed and Frederick Tung and Gabriel L. Oliveira},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=QZfdDpTX1uM}\n}", "github": "", "project": "", "reviewers": "dU5L;AxMo;YAR9;Jm5S", "pdf_size": 437255, "recommendation": "3;6;6;8", "confidence": "4;4;3;4", "correctness": "3;4;4;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "53;49;113;89", "wc_strength_and_weaknesses": "267;52;209;367", "wc_clarity_quality_novelty_and_reproducibility": "79;246;36;45", "wc_summary_review": "4;57;39;22", "wc_review": "403;404;397;523", "wc_reply_reviewers": "0;16;0;58", "wc_reply_authors": "1780;1620;1687;1337", "reply_reviewers": "0;2;0;2", "reply_authors": "7;4;5;5", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 76.0, 26.43860813280457 ], "wc_strength_and_weaknesses_avg": [ 223.75, 114.13451493741934 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 101.5, 84.9543995329259 ], "wc_summary_review_avg": [ 30.5, 19.67866865415443 ], "wc_review_avg": [ 431.75, 52.75118482081706 ], "wc_reply_reviewers_avg": [ 18.5, 23.722352328552915 ], "wc_reply_authors_avg": [ 1606.0, 165.37381896781605 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 5.25, 1.0897247358851685 ], "replies_avg": [ 36, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.08084520834544431, "corr_recommendation_correctness": 0.14002800840280097, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5054099127431259627&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=QZfdDpTX1uM", "email": "cs.ubc.ca;borealisai.com;borealisai.com;", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of British Columbia;Borealis AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.ubc.ca;https://www.borealisai.com", "aff_unique_abbr": "UBC;Borealis AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "De Novo Molecular Generation via Connection-aware Motif Mining", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12174", "id": "Q_Jexl8-qDi", "poster": "/media/PosterPDFs/ICLR%202023/12174.png?t=1682165054.875407", "openreview": "https://openreview.net/forum?id=Q_Jexl8-qDi", "slides": "https://iclr.cc/virtual/2023/poster/12174", "video": "https://iclr.cc/virtual/2023/poster/12174", "author_site": "Zijie Geng, Shufang Xie, Yingce Xia, Lijun Wu, Tao Qin, Jie Wang, Yongdong Zhang, Feng Wu, Tie-Yan Liu", "tldr": "We propose a fragment-based model for molecular generation. It first mines connection-aware motifs from the molecule library and then leverage a connection-aware generator to generate novel drug candidates.", "abstract": "De novo molecular generation is an essential task for science discovery. Recently, fragment-based deep generative models have attracted much research attention due to their flexibility in generating novel molecules based on existing molecule fragments. However, the motif vocabulary, i.e., the collection of frequent fragments, is usually built upon heuristic rules, which brings difficulties to capturing common substructures from large amounts of molecules. In this work, we propose MiCaM to generate molecules based on mined connection-aware motifs. Specifically, it leverages a data-driven algorithm to automatically discover motifs from a molecule library by iteratively merging subgraphs based on their frequency. The obtained motif vocabulary consists of not only molecular motifs (i.e., the frequent fragments), but also their connection information, indicating how the motifs are connected with each other. Based on the mined connection-aware motifs, MiCaM builds a connection-aware generator, which simultaneously picks up motifs and determines how they are connected. We test our method on distribution-learning benchmarks (i.e., generating novel molecules to resemble the distribution of a given training set) and goal-directed benchmarks (i.e., generating molecules with target properties), and achieve significant improvements over previous fragment-based baselines. Furthermore, we demonstrate that our method can effectively mine domain-specific motifs for different tasks.", "keywords": "Molecular generation;Graph generation;Motifs mining", "primary_area": "", "supplementary_material": "/attachment/5a0fbd177e480d9f22e3e173b44c69f2bb2af511.zip", "author": "Zijie Geng;Shufang Xie;Yingce Xia;Lijun Wu;Tao Qin;Jie Wang;Yongdong Zhang;Feng Wu;Tie-Yan Liu", "authorids": "~Zijie_Geng1;~Shufang_Xie1;~Yingce_Xia1;~Lijun_Wu1;~Tao_Qin1;~Jie_Wang1;~Yongdong_Zhang2;~Feng_Wu1;~Tie-Yan_Liu1", "gender": "M;M;M;M;M;M;M;M;M", "homepage": "https://miralab.ai/people/zijie-geng/;;https://www.microsoft.com/en-us/research/people/yinxia/;https://apeterswu.github.io/;https://www.microsoft.com/en-us/research/people/taoqin/;http://staff.ustc.edu.cn/~jwangx;https://imcc.ustc.edu.cn/_upload/tpl/0d/13/3347/template3347/zhangyongdong.html;;http://member.acm.org/~tieyanliu", "dblp": "320/7568;https://dblp.uni-trier.de/pid/163/2704-3;http://dblp.uni-trier.de/pers/hd/x/Xia:Yingce;68/1284-3;14/6841;29/5259-5;z/YongdongZhang;25/3972-1;l/TieYanLiu", "google_scholar": "https://scholar.google.com.hk/citations?user=Ga66HL4AAAAJ;;GS5wRxYAAAAJ;https://scholar.google.com/citations?hl=en;Bl4SRU0AAAAJ;OugG4dUAAAAJ;https://scholar.google.com.hk/citations?user=hxGs4ukAAAAJ;5bInRDEAAAAJ;Nh832fgAAAAJ", "orcid": ";;;0000-0002-3530-590X;;;0000-0003-0066-3448;;0000-0002-0476-8020", "linkedin": ";;;lijun-wu-59340478/;;;;;", "or_profile": "~Zijie_Geng1;~Shufang_Xie1;~Yingce_Xia1;~Lijun_Wu1;~Tao_Qin1;~Jie_Wang1;~Yongdong_Zhang2;~Feng_Wu1;~Tie-Yan_Liu1", "aff": "University of Science and Technology of China;Renmin University of China;Microsoft;Microsoft Research;;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;Microsoft", "aff_domain": "mail.ustc.edu.cn;ruc.edu.cn;microsoft.com;microsoft.com;;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;microsoft.com", "position": "MS student;PhD student;Researcher;Researcher;;Full Professor;Full Professor;Full Professor;Distinguished Scientist", "bibtex": "@inproceedings{\ngeng2023de,\ntitle={De Novo Molecular Generation via Connection-aware Motif Mining},\nauthor={Zijie Geng and Shufang Xie and Yingce Xia and Lijun Wu and Tao Qin and Jie Wang and Yongdong Zhang and Feng Wu and Tie-Yan Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Q_Jexl8-qDi}\n}", "github": "", "project": "", "reviewers": "zHsw;5Fqs;3hrY;Gyc8", "pdf_size": 3299948, "recommendation": "6;6;6;8", "confidence": "4;5;4;3", "correctness": "3;2;3;4", "technical_novelty": "3;2;3;4", "empirical_novelty": "3;3;2;4", "wc_summary_paper": "293;164;68;107", "wc_strength_and_weaknesses": "192;615;226;197", "wc_clarity_quality_novelty_and_reproducibility": "258;53;25;23", "wc_summary_review": "62;59;29;26", "wc_review": "805;891;348;353", "wc_reply_reviewers": "53;539;88;23", "wc_reply_authors": "1013;3181;2252;504", "reply_reviewers": "1;2;1;1", "reply_authors": "3;7;5;3", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 158.0, 85.091127622097 ], "wc_strength_and_weaknesses_avg": [ 307.5, 178.0091289793869 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 89.75, 97.86055129621946 ], "wc_summary_review_avg": [ 44.0, 16.56804152578089 ], "wc_review_avg": [ 599.25, 250.60763655563252 ], "wc_reply_reviewers_avg": [ 175.75, 210.98030121317012 ], "wc_reply_authors_avg": [ 1737.5, 1048.1918955992744 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.5, 1.6583123951777 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=585561149174989554&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=Q_Jexl8-qDi", "email": "mail.ustc.edu.cn;ruc.edu.cn;microsoft.com;microsoft.com;;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;microsoft.com", "author_num": 9, "aff_unique_index": "0;1;2;2;0;0;0;2", "aff_unique_norm": "University of Science and Technology of China;Renmin University of China;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "http://www.ustc.edu.cn;http://www.ruc.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "USTC;RUC;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Direct Embedding of Temporal Network Edges via Time-Decayed Line Graphs", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11392", "id": "Qamz7Q_Ta1k", "poster": "/media/PosterPDFs/ICLR%202023/11392.png?t=1681511663.966272", "openreview": "https://openreview.net/forum?id=Qamz7Q_Ta1k", "slides": "https://iclr.cc/virtual/2023/poster/11392", "video": "https://iclr.cc/virtual/2023/poster/11392", "author_site": "Sudhanshu Chanpuriya, Ryan Rossi, Sungchul Kim, Tong Yu, Jane Hoffswell, Nedim Lipka, Shunan Guo, Cameron Musco", "tldr": "We propose a line graph-based method for temporal networks which directly embeds temporal edges.", "abstract": "Temporal networks model a variety of important phenomena involving timed interactions between entities. Existing methods for machine learning on temporal networks generally exhibit at least one of two limitations. First, many methods assume time to be discretized, so if the time data is continuous, the user must determine the discretization and discard precise time information. Second, edge representations can only be calculated indirectly from the nodes, which may be suboptimal for tasks like edge classification. We present a simple method that avoids both shortcomings: construct the line graph of the network, which includes a node for each interaction, and weigh the edges of this graph based on the difference in time between interactions. From this derived graph, edge representations for the original network can be computed with efficient classical methods. The simplicity of this approach facilitates explicit theoretical analysis: we can constructively show the effectiveness of our method's representations for a natural synthetic model of temporal networks. Empirical results on real-world networks demonstrate our method's efficacy and efficiency on both link classification and prediction.", "keywords": "temporal;networks;graphs;embedding", "primary_area": "", "supplementary_material": "/attachment/5df0f9f502417475dd75dec2b0ea2b6d64cc74be.zip", "author": "Sudhanshu Chanpuriya;Ryan A. Rossi;Sungchul Kim;Tong Yu;Jane Hoffswell;Nedim Lipka;Shunan Guo;Cameron N Musco", "authorids": "~Sudhanshu_Chanpuriya1;~Ryan_A._Rossi2;~Sungchul_Kim1;~Tong_Yu3;jhoffs@adobe.com;~Nedim_Lipka1;sguo@adobe.com;~Cameron_N_Musco1", "gender": ";;M;;;;;M", "homepage": ";;https://sites.google.com/site/subright;https://www.linkedin.com/in/tong-yu-42790744;;;;https://people.cs.umass.edu/~cmusco/", "dblp": ";;61/1573;32/1593-1;;;;149/2327", "google_scholar": ";;v8ISLgIAAAAJ;https://scholar.google.com/citations?hl=en;;;;EeYGZCwAAAAJ", "orcid": ";;0000-0003-3580-5290;0000-0002-5991-2050;;;;", "linkedin": ";;;tong-yu-42790744;;;;", "or_profile": "~Sudhanshu_Chanpuriya1;~Ryan_A._Rossi2;~Sungchul_Kim1;~Tong_Yu3;jhoffs@adobe.com;~Nedim_Lipka1;sguo@adobe.com;~Cameron_N_Musco1", "aff": ";;Adobe Systems;Adobe Research;;;;University of Massachusetts, Amherst", "aff_domain": ";;adobe.com;adobe.com;;;;umass.edu", "position": ";;Researcher;Senior Research Scientist;;;;Assistant Professor", "bibtex": "@inproceedings{\nchanpuriya2023direct,\ntitle={Direct Embedding of Temporal Network Edges via Time-Decayed Line Graphs},\nauthor={Sudhanshu Chanpuriya and Ryan A. Rossi and Sungchul Kim and Tong Yu and Jane Hoffswell and Nedim Lipka and Shunan Guo and Cameron N Musco},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Qamz7Q_Ta1k}\n}", "github": "", "project": "", "reviewers": "Vkhz;wNXK;C63A;Pn6V;qEEe;GdB7;HmSu;CTmU", "pdf_size": 420516, "recommendation": "3;5;5;6;8;8;8;8", "confidence": "4;4;2;4;5;4;4;4", "correctness": "3;3;3;3;3;4;4;3", "technical_novelty": "2;3;3;2;4;3;3;3", "empirical_novelty": "0;2;3;3;3;3;3;3", "wc_summary_paper": "123;80;81;71;89;158;185;118", "wc_strength_and_weaknesses": "732;221;87;169;607;423;294;308", "wc_clarity_quality_novelty_and_reproducibility": "4;28;133;59;121;35;46;167", "wc_summary_review": "107;34;26;54;64;21;62;122", "wc_review": "966;363;327;353;881;637;587;715", "wc_reply_reviewers": "36;134;0;0;63;0;0;172", "wc_reply_authors": "785;661;365;666;914;645;298;1234", "reply_reviewers": "1;1;0;0;1;0;0;1", "reply_authors": "2;2;1;1;2;1;1;3", "recommendation_avg": [ 6.375, 1.79843682124227 ], "confidence_avg": [ 3.875, 0.7806247497997998 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.875, 0.5994789404140899 ], "empirical_novelty_avg": [ 2.5, 1.0 ], "wc_summary_paper_avg": [ 113.125, 38.38436367845636 ], "wc_strength_and_weaknesses_avg": [ 355.125, 206.32585726224428 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 74.125, 54.64073000061401 ], "wc_summary_review_avg": [ 61.25, 34.368408458932166 ], "wc_review_avg": [ 603.625, 228.9328162911556 ], "wc_reply_reviewers_avg": [ 50.625, 63.54316938113805 ], "wc_reply_authors_avg": [ 696.0, 277.8443809041313 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.625, 0.6959705453537527 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.3895386619216038, "corr_recommendation_correctness": 0.5216720300383332, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2406462401185898802&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=Qamz7Q_Ta1k", "email": ";;adobe.com;adobe.com;;;;umass.edu", "author_num": 8, "aff_unique_index": "0;0;1", "aff_unique_norm": "Adobe;University of Massachusetts Amherst", "aff_unique_dep": "Adobe Systems Incorporated;", "aff_unique_url": "https://www.adobe.com;https://www.umass.edu", "aff_unique_abbr": "Adobe;UMass Amherst", "aff_campus_unique_index": "1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "QcA9iGaLpH4", "title": "What do large networks memorize?", "track": "main", "status": "Reject", "tldr": "Increasing model size may increase memorisation of certain training samples, while distillation inhibits memorisation", "abstract": "The success of modern neural models has prompted renewed study of the connection between memorisation and generalisation: such models typically generalise well, despite being able to perfectly fit (\"memorise\") completely random labels.\nTo more carefully study this issue, Feldman (2019); Feldman & Zhang (2020) provided a simple metric to quantify the degree of memorisation of a specific training example, and empirically quantified the corresponding memorisation profile of a ResNet model on image classification benchmarks.\nWhile an exciting first glimpse into how real-world models memorise, these studies leave open several questions about memorisation of practical networks.\nIn particular, how is memorisation affected by increasing model size, and by distilling a large model into a smaller one?\nWe present a systematic empirical analysis of these questions.\nOn standard image classification benchmarks, we find that training examples exhibit a diverse set of memorisation trajectories across model sizes, with some samples having increased memorisation under larger models.\nFurther, we find that distillation tends to inhibit memorisation of the student model, while also improving generalisation.\nFinally, we show that computationally tractable measures of memorisation do not capture the properties we identify for memorisation in the sense of Feldman (2019), despite highly correlating to the latter. ", "keywords": "memorization;overparameterization;example difficulty", "primary_area": "", "supplementary_material": "", "author": "Michal Lukasik;Aditya Krishna Menon;Ankit Singh Rawat;Vaishnavh Nagarajan;Sanjiv Kumar", "authorids": "~Michal_Lukasik1;~Aditya_Krishna_Menon1;~Ankit_Singh_Rawat1;~Vaishnavh_Nagarajan3;~Sanjiv_Kumar1", "gender": ";M;;M;M", "homepage": "https://mlukasik.github.io/;https://ankitsrawat.github.io/home/;http://www.sanjivk.com/;https://akmenon.github.io/;https://vaishnavh.github.io/", "dblp": "72/11338;https://dblp.org/pers/hd/r/Rawat:Ankit_Singh;;89/3514;161/0079", "google_scholar": "https://scholar.google.co.uk/citations?user=cLZLZCQAAAAJ;http://scholar.google.com/citations?user=U0_ab4cAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.nl/citations?user=LrsjJfwAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Michal_Lukasik1;~Ankit_Singh_Rawat1;~Sanjiv_Kumar1;~Aditya_Menon1;~Vaishnavh_Nagarajan1", "aff": "Google Research;Google;Google;Google;Google", "aff_domain": "google.com;google.com;google.com;google.com;google.com", "position": "Research Scientist;Research Scientist;Research Scientist;Research Scientist;Researcher", "bibtex": "@misc{\nlukasik2023what,\ntitle={What do large networks memorize?},\nauthor={Michal Lukasik and Aditya Krishna Menon and Ankit Singh Rawat and Vaishnavh Nagarajan and Sanjiv Kumar},\nyear={2023},\nurl={https://openreview.net/forum?id=QcA9iGaLpH4}\n}", "github": "", "project": "", "reviewers": "wKAi;E5os;bHhw", "site": "https://openreview.net/forum?id=QcA9iGaLpH4", "pdf_size": 2655787, "recommendation": "5;6;6", "confidence": "4;5;3", "correctness": "4;4;3", "technical_novelty": "1;3;3", "empirical_novelty": "2;4;3", "wc_summary_paper": "168;90;95", "wc_strength_and_weaknesses": "314;241;386", "wc_clarity_quality_novelty_and_reproducibility": "252;77;18", "wc_summary_review": "100;779;25", "wc_review": "834;1187;524", "wc_reply_reviewers": "0;108;0", "wc_reply_authors": "987;1914;936", "reply_reviewers": "0;3;0", "reply_authors": "2;5;2", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 117.66666666666667, 35.64952859280034 ], "wc_strength_and_weaknesses_avg": [ 313.6666666666667, 59.19647136630884 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 115.66666666666667, 99.3657665172244 ], "wc_summary_review_avg": [ 301.3333333333333, 339.1463138856476 ], "wc_review_avg": [ 848.3333333333334, 270.85830654093337 ], "wc_reply_reviewers_avg": [ 36.0, 50.91168824543142 ], "wc_reply_authors_avg": [ 1279.0, 449.4952725001677 ], "reply_reviewers_avg": [ 1.0, 1.4142135623730951 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:OhNJp1OvWAoJ:scholar.google.com/&scioq=What+do+large+networks+memorize%3F&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Research", "aff_unique_url": "https://research.google", "aff_unique_abbr": "Google Research", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "QcTbkoBycwk", "title": "Efficient Shapley Values Estimation by Amortization for Text Classification", "track": "main", "status": "Withdraw", "tldr": "We recognize the stability issue in model interpretation for text classifier and propose an amortized approach to generate stable interpretation efficiently.", "abstract": "Despite the popularity of Shapley Values in explaining neural text classification models, computing them is prohibitive for large pretrained models due to a large number of model evaluations as it needs to perform multiple model evaluations over various perturbed text inputs. In practice, Shapley Values are often estimated stochastically with a smaller number of model evaluations. However, we find that the estimated Shapley Values are quite sensitive to random seeds\u2014the top-ranked features often have little overlap under two different seeds, especially on examples with the longer input text. As a result, a much larger number of model evaluations is needed to reduce the sensitivity to an acceptable level. To mitigate the trade-off between stability and efficiency, we develop an amortized model that directly predicts Shapley Values of each input feature without additional model evaluation. It is trained on a set of examples with Shapley Values estimated from a large number of model evaluations to ensure stability. Experimental results on two text classification datasets demonstrate that, the proposed amortized model can estimate black-box explanation scores in milliseconds per sample in inference time and is up to 60 times more efficient than traditional methods.", "keywords": "text classification;model interpretation;amortization", "primary_area": "", "supplementary_material": "", "author": "Chenghao Yang;Fan Yin;He He;Kai-Wei Chang;Xiaofei Ma;Bing Xiang", "authorids": "~Chenghao_Yang1;~Fan_Yin1;~He_He2;~Kai-Wei_Chang1;~Xiaofei_Ma1;~Bing_Xiang2", "gender": "M;M;M;M;;F", "homepage": "https://yangalan123.github.io/;;http://kwchang.net;https://www.amazon.science/author/xiaofei-ma;;http://hhexiy.github.io", "dblp": "229/4179;;18/2428;;;08/8618-1", "google_scholar": "B28fiOAAAAAJ;klShdV0AAAAJ;fqDBtzYAAAAJ;Pc2SfvMAAAAJ;A6yjdJAAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-5365-0072;;;", "linkedin": "chenghao-yang-857b51178/;fan-y-60b666180/;kai-wei-chang-41239040;xiaofei-ma-b3627928;;", "or_profile": "~Chenghao_Yang1;~Fan_Yin1;~Kai-Wei_Chang1;~Xiaofei_Ma1;~Bing_Xiang2;~He_He1", "aff": "Google;University of California, Los Angeles;Amazon;Amazon Web Services;Goldman Sachs;New York University", "aff_domain": "google.com;cs.ucla.edu;amazon.com;amazon.com;gs.com;nyu.edu", "position": "Student Researcher;PhD student;Researcher;Applied Science Manager;Managing Director;Assistant Professor", "bibtex": "@misc{\nyang2023efficient,\ntitle={Efficient Shapley Values Estimation by Amortization for Text Classification},\nauthor={Chenghao Yang and Fan Yin and He He and Kai-Wei Chang and Xiaofei Ma and Bing Xiang},\nyear={2023},\nurl={https://openreview.net/forum?id=QcTbkoBycwk}\n}", "github": "", "project": "", "reviewers": "NHYE;7fhm;4Km7;QxAx", "site": "https://openreview.net/forum?id=QcTbkoBycwk", "pdf_size": 526395, "recommendation": "3;3;5;8", "confidence": "4;4;4;4", "correctness": "4;4;3;3", "technical_novelty": "2;1;2;3", "empirical_novelty": "2;0;2;3", "wc_summary_paper": "74;38;47;54", "wc_strength_and_weaknesses": "189;85;270;103", "wc_clarity_quality_novelty_and_reproducibility": "7;41;28;102", "wc_summary_review": "49;31;17;70", "wc_review": "319;195;362;329", "wc_reply_reviewers": "0;111;0;49", "wc_reply_authors": "437;695;723;8", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;1", "recommendation_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 53.25, 13.254716141811564 ], "wc_strength_and_weaknesses_avg": [ 161.75, 73.82877149187843 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.5, 35.344730866141845 ], "wc_summary_review_avg": [ 41.75, 19.866743568083823 ], "wc_review_avg": [ 301.25, 63.3733974156349 ], "wc_reply_reviewers_avg": [ 40.0, 45.612498287201944 ], "wc_reply_authors_avg": [ 465.75, 286.8339022849287 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.8551861104941366, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16766326899368840604&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;2;2;3;4", "aff_unique_norm": "Google;University of California, Los Angeles;Amazon;Goldman Sachs;New York University", "aff_unique_dep": "Google;;Amazon.com, Inc.;;", "aff_unique_url": "https://www.google.com;https://www.ucla.edu;https://www.amazon.com;https://www.goldmansachs.com;https://www.nyu.edu", "aff_unique_abbr": "Google;UCLA;Amazon;GS;NYU", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Mountain View;Los Angeles;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning to Segment from Noisy Annotations: A Spatial Correction Approach", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11471", "id": "Qc_OopMEBnC", "poster": "/media/PosterPDFs/ICLR%202023/11471.png?t=1682962588.1702688", "openreview": "https://openreview.net/forum?id=Qc_OopMEBnC", "slides": "https://iclr.cc/virtual/2023/poster/11471", "video": "https://iclr.cc/virtual/2023/poster/11471", "author_site": "Michael Yao, Yikai Zhang, Songzhu Zheng, Mayank Goswami, Prateek Prasanna, Chao Chen", "tldr": "", "abstract": "Noisy labels can significantly affect the performance of deep neural networks (DNNs). In medical image segmentation tasks, annotations are error-prone due to the high demand in annotation time and in the annotators' expertise. Existing methods mostly tackle label noise in classification tasks. Their independent-noise assumptions do not fit label noise in segmentation task. In this paper, we propose a novel noise model for segmentation problems that encodes spatial correlation and bias, which are prominent in segmentation annotations. Further, to mitigate such label noise, we propose a label correction method to recover true label progressively. We provide theoretical guarantees of the correctness of the proposed method. Experiments show that our approach outperforms current state-of-the-art methods on both synthetic and real-world noisy annotations.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Jiachen Yao;Yikai Zhang;Songzhu Zheng;Mayank Goswami;Prateek Prasanna;Chao Chen", "authorids": "~Jiachen_Yao1;~Yikai_Zhang1;~Songzhu_Zheng1;~Mayank_Goswami1;~Prateek_Prasanna3;~Chao_Chen1", "gender": "M;;M;;M;M", "homepage": ";;;;https://you.stonybrook.edu/imaginelab/;https://chaochen.github.io/", "dblp": ";;226/4925;;133/6611;66/3019-12", "google_scholar": ";;vq0hpV4AAAAJ;;uyA1Q18AAAAJ;J-iIIFAAAAAJ", "orcid": ";;;;;0000-0003-1703-6483", "linkedin": "https://www.linkedin.com/feed/;;;;;", "or_profile": "~Jiachen_Yao1;~Yikai_Zhang1;~Songzhu_Zheng1;~Mayank_Goswami1;~Prateek_Prasanna3;~Chao_Chen1", "aff": ", State University of New York at Stony Brook;;Morgan Stanley;;State University of New York, Stony Brook;State University of New York, Stony Brook", "aff_domain": "cs.stonybrook.edu;;morganstanley.com;;stonybrook.edu;stonybrook.edu", "position": "PhD student;;Researcher;;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nyao2023learning,\ntitle={Learning to Segment from Noisy Annotations: A Spatial Correction Approach},\nauthor={Jiachen Yao and Yikai Zhang and Songzhu Zheng and Mayank Goswami and Prateek Prasanna and Chao Chen},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Qc_OopMEBnC}\n}", "github": "", "project": "", "reviewers": "VNP8;VpVv;LKDX", "pdf_size": 7738484, "recommendation": "6;6;6", "confidence": "5;4;3", "correctness": "2;4;2", "technical_novelty": "2;4;3", "empirical_novelty": "2;4;3", "wc_summary_paper": "26;59;126", "wc_strength_and_weaknesses": "31;223;364", "wc_clarity_quality_novelty_and_reproducibility": "8;69;29", "wc_summary_review": "110;37;43", "wc_review": "175;388;562", "wc_reply_reviewers": "11;0;78", "wc_reply_authors": "787;245;1105", "reply_reviewers": "1;0;1", "reply_authors": "3;2;6", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 70.33333333333333, 41.60395280365664 ], "wc_strength_and_weaknesses_avg": [ 206.0, 136.47710430691296 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.333333333333336, 25.30261295246446 ], "wc_summary_review_avg": [ 63.333333333333336, 33.08910528994232 ], "wc_review_avg": [ 375.0, 158.25928092848142 ], "wc_reply_reviewers_avg": [ 29.666666666666668, 34.4705993887867 ], "wc_reply_authors_avg": [ 712.3333333333334, 355.0411556738489 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 1.699673171197595 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1453268456578337099&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=Qc_OopMEBnC", "email": "cs.stonybrook.edu;;morganstanley.com;;stonybrook.edu;stonybrook.edu", "author_num": 6, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "State University of New York at Stony Brook;Morgan Stanley;State University of New York", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stonybrook.edu;https://www.morganstanley.com;https://www.stonybrook.edu", "aff_unique_abbr": "SUNY Stony Brook;Morgan Stanley;SUNY Stony Brook", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stony Brook;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "QcffIcjq8bl", "title": "Dynamic Pretraining of Vision-Language Models", "track": "main", "status": "Withdraw", "tldr": "We propose a dynamic pretraining resampling of tasks that learns faster and better models", "abstract": " Vision-Language pretraining aims to learn universal cross-modal representations and to create models with broad capabilities. In this paper, we propose a novel dynamic pretraining resampling for a variety of pretraining tasks. Unlike recent large-scale vision-language approaches, we show that a set of diverse self- and weakly-supervised pretraining tasks dynamically sampled according to task difficulty provides strong performance. Further, the approach is sample-efficient, using much less data and compute to address a range of downstream tasks. We show that a single 330M pretrained model using only smaller and publicly accessible datasets, achieves competitive or SOTA performance on three diverse groups of tasks: visual question answering, text-based image localization by referring expressions, and video question answering. The code will be released.", "keywords": "pretraining;vision language;sampling;curriculum learning", "primary_area": "", "supplementary_material": "", "author": "AJ Piergiovanni;Weicheng Kuo;Wei Li;Anelia Angelova", "authorids": "~AJ_Piergiovanni1;~Weicheng_Kuo1;~Wei_Li50;~Anelia_Angelova1", "gender": ";M;F;", "homepage": "http://homes.sice.indiana.edu/ajpiergi/;https://weichengkuo.github.io/;;https://research.google/people/aneliaangelova/", "dblp": "175/9876;163/2203;;46/3065", "google_scholar": "https://scholar.google.com/citations?hl=en;;bEL0CR4AAAAJ;nkmDOPgAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~AJ_Piergiovanni1;~Weicheng_Kuo1;~Wei_Li50;~Anelia_Angelova1", "aff": "Google;Google Deepmind;;California Institute of Technology", "aff_domain": "google.com;google.com;;caltech.edu", "position": "Research Scientist;Research Scientist;;PhD student", "bibtex": "@misc{\npiergiovanni2023dynamic,\ntitle={Dynamic Pretraining of Vision-Language Models},\nauthor={AJ Piergiovanni and Weicheng Kuo and Wei Li and Anelia Angelova},\nyear={2023},\nurl={https://openreview.net/forum?id=QcffIcjq8bl}\n}", "github": "", "project": "", "reviewers": "b4o9;7bWb;Z2FK;1KUu", "site": "https://openreview.net/forum?id=QcffIcjq8bl", "pdf_size": 416801, "recommendation": "3;5;5;6", "confidence": "5;4;4;5", "correctness": "3;3;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;3;3;0", "wc_summary_paper": "116;97;110;60", "wc_strength_and_weaknesses": "517;288;538;190", "wc_clarity_quality_novelty_and_reproducibility": "246;25;162;22", "wc_summary_review": "45;35;36;54", "wc_review": "924;445;846;326", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 95.75, 21.75287337341897 ], "wc_strength_and_weaknesses_avg": [ 383.25, 148.53850510894472 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 113.75, 95.01677483476273 ], "wc_summary_review_avg": [ 42.5, 7.697402159170326 ], "wc_review_avg": [ 635.25, 254.76594650777014 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.2294157338705618, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15165407639943036936&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;DeepMind;California Institute of Technology", "aff_unique_dep": "Google;DeepMind;", "aff_unique_url": "https://www.google.com;https://deepmind.com;https://www.caltech.edu", "aff_unique_abbr": "Google;DeepMind;Caltech", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Mountain View;;Pasadena", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Provably Efficient Lifelong Reinforcement Learning with Linear Representation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11382", "id": "Qd0p0bl-A9t", "poster": "/media/PosterPDFs/ICLR%202023/11382.png?t=1680995685.6322505", "openreview": "https://openreview.net/forum?id=Qd0p0bl-A9t", "slides": "https://iclr.cc/virtual/2023/poster/11382", "video": "https://iclr.cc/virtual/2023/poster/11382", "author_site": "Sanae Amani, Lin Yang, Ching-An Cheng", "tldr": "We study lifelong RL, where the agent needs to solve a streaming sequence of tasks. We propose an algorithm with provable sublinear regret using sublinear number of planning calls for any sequence of tasks.", "abstract": "We theoretically study lifelong reinforcement learning (RL) with linear representation in a regret minimization setting. The goal of the agent is to learn a multi-task policy based on a linear representation while solving a sequence of tasks that may be adaptively chosen based on the agent's past behaviors. We frame the problem as a linearly parameterized contextual Markov decision process (MDP), where each task is specified by a context and the transition dynamics is context-independent, and we introduce a new completeness-style assumption on the representation which is sufficient to ensure the optimal multi-task policy is realizable under the linear representation. Under this assumption, we propose an algorithm, called UCB Lifelong Value Distillation (UCBlvd), that provably achieves sublinear regret for any sequence of tasks while using only sublinear planning calls. Specifically, for $K$ task episodes of horizon $H$, our algorithm has a regret bound $\\tilde{\\mathcal{O}}(\\sqrt{(d^3+d^\\prime d)H^4K})$ using $\\mathcal{O}(dH\\log(K))$ number of planning calls, where $d$ and $d^\\prime$ are the feature dimensions of the dynamics and rewards, respectively. This theoretical guarantee implies that our algorithm can enable a lifelong learning agent to learn to internalize experiences into a multi-task policy and rapidly solve new tasks.", "keywords": "Lifelong RL;Contextual MDP;Regret;Planning calls;Computation sharing;Streaming sequence of adversarial tasks", "primary_area": "", "supplementary_material": "/attachment/451e2a48aeb35fddb56c1d4f6d10130c29231ca0.zip", "author": "Sanae Amani;Lin Yang;Ching-An Cheng", "authorids": "~Sanae_Amani1;~Lin_Yang12;~Ching-An_Cheng1", "gender": "F;M;M", "homepage": ";http://www.chinganc.com;http://www.drlinyang.net", "dblp": ";123/6369;166/6264", "google_scholar": "qlwIFJsAAAAJ;bMZFLZ_V4goC;umivlPQAAAAJ", "orcid": ";;", "linkedin": "sanaeamani/;;", "or_profile": "~Sanae_Amani1;~Ching-An_Cheng1;~lin_Yang1", "aff": "University of California, Los Angeles;Microsoft Research;University of California, Los Angeles", "aff_domain": "ucla.edu;microsoft.com;ucla.edu", "position": "PhD student;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\namani2023provably,\ntitle={Provably Efficient Lifelong Reinforcement Learning with Linear Representation},\nauthor={Sanae Amani and Lin Yang and Ching-An Cheng},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Qd0p0bl-A9t}\n}", "github": "", "project": "", "reviewers": "vHiq;yDMc;HSfE;hkRh", "pdf_size": 722879, "recommendation": "6;6;6;6", "confidence": "3;3;4;3", "correctness": "4;4;4;4", "technical_novelty": "3;2;3;2", "empirical_novelty": "3;0;3;1", "wc_summary_paper": "137;143;41;83", "wc_strength_and_weaknesses": "149;146;174;237", "wc_clarity_quality_novelty_and_reproducibility": "80;110;8;57", "wc_summary_review": "12;69;29;26", "wc_review": "378;468;252;403", "wc_reply_reviewers": "0;0;18;0", "wc_reply_authors": "888;528;678;794", "reply_reviewers": "0;0;1;0", "reply_authors": "6;3;5;3", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.75, 1.299038105676658 ], "wc_summary_paper_avg": [ 101.0, 41.78516483155236 ], "wc_strength_and_weaknesses_avg": [ 176.5, 36.58209944768069 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.75, 37.27180569814132 ], "wc_summary_review_avg": [ 34.0, 21.20141504711419 ], "wc_review_avg": [ 375.25, 78.3752990424917 ], "wc_reply_reviewers_avg": [ 4.5, 7.794228634059948 ], "wc_reply_authors_avg": [ 722.0, 134.45445325462447 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.25, 1.299038105676658 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7031530379089169603&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=Qd0p0bl-A9t", "email": "ucla.edu;microsoft.com;ucla.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Los Angeles;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.ucla.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "UCLA;MSR", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "QfLU7FtXDUn", "title": "SpQAT: A Sparse Quantization-Aware Training Method", "track": "main", "status": "Withdraw", "tldr": "We develop an efficient sparse QAT method, dubbed SpQAT, based on the partly scratch-off lottery ticket phenomenon we observed.", "abstract": "Quantization-aware training (QAT) has been demonstrated to not only reduce computational cost and storage footprint, but well retain the performance of full-precision neural networks. However, the tedious retraining requirement greatly weakens the practical value of QAT methods. In this paper, we attempt to reduce the training costs of QAT methods, which to our best knowledge are barely investigated in the literature. Our motive stands upon a straightforward-yet-valuable observation: A large portion of quantized weights, referred to as the partly scratch-off lottery ticket, reach the optimal quantization level after a few training epochs. This naturally inspires us to reduce computation by freezing these weights in the remaining training period. Accordingly, we develop an efficient sparse QAT method, dubbed SpQAT. It freezes a weight once the distance between the full-precision one and its quantization level is smaller than a controllable threshold. Along these lines, we show that the proposed SpQAT accurately identifies the partly scratch-off lottery ticket and results in a sparse weight gradient where many weights are pulled out of the training and their related computations are avoided. Extensive experiments demonstrate the efficacy of our SpQAT with 20%-60% weight gradient sparsity. With the elimination of related gradient calculation in the backward propagation, the performance of our SpQAT is still on par with or even better than the compared baseline.", "keywords": "efficient training;quantization-aware training;network quantization", "primary_area": "", "supplementary_material": "/attachment/74cfc3c74e8ebb7952466d8e2c4554101171729f.zip", "author": "Yunshan Zhong;Mingbao Lin;Gongrui Nan;Fei Chao;Rongrong Ji", "authorids": "~Yunshan_Zhong1;~Mingbao_Lin1;~Gongrui_Nan1;~Fei_Chao1;~Rongrong_Ji5", "gender": "M;M;M;M;M", "homepage": ";http://lmb.bjbxit.cn/;;https://cogsci.xmu.edu.cn/info/1034/1249.htm;http://mac.xmu.edu.cn/rrji-en.html", "dblp": "239/4066;211/5903;;118/5221-1.html;86/5681", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;Dp3L1bsAAAAJ;xoy1rN4AAAAJ;srS6rNMAAAAJ;", "orcid": ";0000-0003-1764-1894;;;", "linkedin": ";mingbao-lin-890444105/;;;", "or_profile": "~Yunshan_Zhong1;~Mingbao_Lin1;~Gongrui_Nan1;~Fei_Chao1;~Rongrong_Ji5", "aff": "Xiamen University;Xiamen University;Xiamen University;Xiamen University;Xiamen University", "aff_domain": "xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn", "position": "PhD student;PhD student;MS student;Associate Professor;Full Professor", "bibtex": "@misc{\nzhong2023spqat,\ntitle={Sp{QAT}: A Sparse Quantization-Aware Training Method},\nauthor={Yunshan Zhong and Mingbao Lin and Gongrui Nan and Fei Chao and Rongrong Ji},\nyear={2023},\nurl={https://openreview.net/forum?id=QfLU7FtXDUn}\n}", "github": "", "project": "", "reviewers": "rrxG;2SwB;iTwj;eoge", "site": "https://openreview.net/forum?id=QfLU7FtXDUn", "pdf_size": 996383, "recommendation": "3;3;5;5", "confidence": "4;4;4;4", "correctness": "3;2;4;3", "technical_novelty": "2;1;2;2", "empirical_novelty": "2;1;2;2", "wc_summary_paper": "103;52;74;46", "wc_strength_and_weaknesses": "200;243;351;186", "wc_clarity_quality_novelty_and_reproducibility": "17;42;61;25", "wc_summary_review": "48;32;56;17", "wc_review": "368;369;542;274", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 68.75, 22.353690970396812 ], "wc_strength_and_weaknesses_avg": [ 245.0, 64.703168392282 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.25, 16.90229274388537 ], "wc_summary_review_avg": [ 38.25, 15.006248698458919 ], "wc_review_avg": [ 388.25, 96.78939766317383 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YBN2R52wNs8J:scholar.google.com/&scioq=SpQAT:+A+Sparse+Quantization-Aware+Training+Method&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Xiamen University", "aff_unique_dep": "", "aff_unique_url": "https://www.xmu.edu.cn", "aff_unique_abbr": "XMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "Qi4oCA89CmO", "title": "Why Did This Model Forecast This Future? Information-Theoretic Temporal Saliency for Counterfactual Explanations of Probabilistic Forecasts", "track": "main", "status": "Reject", "tldr": "We propose an information-theoretic saliency-based framework for counterfactual reasoning in probabilistic forecasting. For common distributions, we obtain a closed-form expression for the saliency of observed timesteps towards a model's forecasts. ", "abstract": "Probabilistic forecasting of multivariate time series is significant to several research domains where multiple futures exist for a single observed sequence. Identifying the observations on which a well-performing model bases its forecasts can enable domain experts to form data-driven hypotheses about the causal relationships between features. Consequently, we begin by revisiting the question: what constitutes a causal explanation? One hurdle in the landscape of explainable artificial intelligence is that what constitutes an explanation is not well-grounded. We build upon Miller's framework of explanations derived from research in multiple social science disciplines, and establish a conceptual link between counterfactual reasoning and saliency-based explanation techniques. However, the complication is a lack of a consistent and principled notion of saliency. Also, commonly derived saliency maps may be inconsistent with the data generation process and the underlying model. We therefore leverage a unifying definition of information-theoretic saliency grounded in preattentive human visual cognition and extend it to forecasting settings. In contrast to existing methods that require either explicit training of the saliency mechanism or access to the internal parameters of the underlying model, we obtain a closed-form solution for the resulting saliency map for commonly used density functions in probabilistic forecasting. To empirically evaluate our explainability framework in a principled manner, we construct a synthetic dataset of conversation dynamics and demonstrate that our method recovers the true salient timesteps for a forecast given a well-performing underlying model.", "keywords": "probabilistic forecasting;saliency;explainability", "primary_area": "", "supplementary_material": "/attachment/3624285712462a5559ee933f7edc26d7a4ddccbf.zip", "author": "Chirag Raman;Hayley Hung;Marco Loog", "authorids": "~Chirag_Raman2;~Hayley_Hung2;~Marco_Loog1", "gender": "M;F;", "homepage": "http://chiragraman.com;http://homepage.tudelft.nl/3e2t5/;", "dblp": "195/8280;13/4646.html;", "google_scholar": "TeoDF6MAAAAJ;ka-LsrYAAAAJ;", "orcid": ";0000-0001-9574-5395;", "linkedin": ";hayley-hung-2b89591/;", "or_profile": "~Chirag_Raman2;~Hayley_Hung2;~Marco_Loog1", "aff": "Delft University of Technology;Delft University of Technology;", "aff_domain": "tudelft.nl;tudelft.nl;", "position": "Assistant Professor;Associate Professor;", "bibtex": "@misc{\nraman2023why,\ntitle={Why Did This Model Forecast This Future? Information-Theoretic Temporal Saliency for Counterfactual Explanations of Probabilistic Forecasts},\nauthor={Chirag Raman and Hayley Hung and Marco Loog},\nyear={2023},\nurl={https://openreview.net/forum?id=Qi4oCA89CmO}\n}", "github": "", "project": "", "reviewers": "bLo2;YgxY;7H98;PbXQ", "site": "https://openreview.net/forum?id=Qi4oCA89CmO", "pdf_size": 8882567, "recommendation": "3;3;3;5", "confidence": "3;5;4;3", "correctness": "3;2;3;2", "technical_novelty": "3;2;2;3", "empirical_novelty": "1;2;1;2", "wc_summary_paper": "60;43;60;79", "wc_strength_and_weaknesses": "59;900;97;226", "wc_clarity_quality_novelty_and_reproducibility": "62;41;20;41", "wc_summary_review": "218;47;65;42", "wc_review": "399;1031;242;388", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "724;1653;430;594", "reply_reviewers": "0;0;0;0", "reply_authors": "2;3;2;2", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.5, 0.5 ], "wc_summary_paper_avg": [ 60.5, 12.737739202856996 ], "wc_strength_and_weaknesses_avg": [ 320.5, 340.2517450359366 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.0, 14.849242404917497 ], "wc_summary_review_avg": [ 93.0, 72.67392930067838 ], "wc_review_avg": [ 515.0, 304.2901575798994 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 850.25, 475.0317752529824 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5222329678670935, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:utOf_w--IU4J:scholar.google.com/&scioq=Why+Did+This+Model+Forecast+This+Future%3F+Information-Theoretic+Temporal+Saliency+for+Counterfactual+Explanations+of+Probabilistic+Forecasts&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Delft University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.tudelft.nl", "aff_unique_abbr": "TU Delft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "id": "QiORiW-NNqr", "title": "Low-complexity Deep Video Compression with A Distributed Coding Architecture", "track": "main", "status": "Withdraw", "tldr": "We design the first end-to-end distributed deep video compression framework based on the distributed coding paradigm, which outperforms traditional distributed video codecs and achieves competitive performance with H.264. ", "abstract": "Prevalent video compression methods follow a $predictive\\;coding$ architecture that relies on a heavy encoder to exploit the statistical redundancy, which makes it challenging to deploy them on resource-constrained devices. Meanwhile, as early as the 1970s, distributed source coding theory, namely, Slepian-Wolf and Wyner-Ziv theorems, has indicated that efficient compression of correlated sources can be achieved by exploiting the source statistics at the decoder only, with the help of effective side information (SI). This has inspired a $distributed\\;coding$ architecture that is promising to reduce the encoder complexity. While there have been some attempts to develop practical distributed video coding systems, traditional methods suffer from a substantial performance gap to the predictive coding architecture. Inspired by the recent successes of deep learning in enhancing image and video compression, we propose the first end-to-end distributed deep video compression (Distributed DVC) framework with neural network-based modules that can be optimized to improve the rate-distortion performance. A key ingredient is an effective SI generation module at the decoder, which helps to effectively exploit the inter-frame correlation without computation-intensive encoder-side motion estimation and compensation. Experiments show that Distributed DVC significantly outperforms conventional distributed video coding methods and H.264. Meanwhile, it enjoys $6\\sim7$ times encoding speedup against DVC with only 1.61\\% increase in the bitrate for 1080P test videos on the UVG dataset.", "keywords": "Deep video compression;distributed coding;low encoder complexity", "primary_area": "", "supplementary_material": "", "author": "Xinjie Zhang;Jiawei Shao;Jun Zhang", "authorids": "~Xinjie_Zhang2;~Jiawei_Shao1;~Jun_Zhang25", "gender": "M;;", "homepage": "https://xinjie-q.github.io/;https://shaojiawei07.github.io/;https://eejzhang.people.ust.hk/", "dblp": ";251/9479;z/JunZhang4", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;p26zthIAAAAJ;1Is687QAAAAJ", "orcid": "0000-0002-3194-7518;0000-0001-8836-1430;0000-0002-5222-1898", "linkedin": ";;", "or_profile": "~Xinjie_Zhang2;~Jiawei_Shao1;~Jun_Zhang25", "aff": "SenseTime;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "sensetime.com;ust.hk;ust.hk", "position": "Intern;PhD student;Associate Professor", "bibtex": "@misc{\nzhang2023lowcomplexity,\ntitle={Low-complexity Deep Video Compression with A Distributed Coding Architecture},\nauthor={Xinjie Zhang and Jiawei Shao and Jun Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=QiORiW-NNqr}\n}", "github": "", "project": "", "reviewers": "zQMg;a1Kx;6dhN", "site": "https://openreview.net/forum?id=QiORiW-NNqr", "pdf_size": 51663508, "recommendation": "3;5;6", "confidence": "4;5;3", "correctness": "2;4;3", "technical_novelty": "2;3;2", "empirical_novelty": "3;3;2", "wc_summary_paper": "86;87;67", "wc_strength_and_weaknesses": "402;301;26", "wc_clarity_quality_novelty_and_reproducibility": "61;35;13", "wc_summary_review": "77;43;191", "wc_review": "626;466;297", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 80.0, 9.201449161228174 ], "wc_strength_and_weaknesses_avg": [ 243.0, 158.88570315376606 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.333333333333336, 19.61858529274955 ], "wc_summary_review_avg": [ 103.66666666666667, 63.294725074228914 ], "wc_review_avg": [ 463.0, 134.33043834763092 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.3273268353539886, "corr_recommendation_correctness": 0.6546536707079772, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11067492964177695980&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1", "aff_unique_norm": "SenseTime;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.sensetime.com;https://www.ust.hk", "aff_unique_abbr": "SenseTime;HKUST", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "QjHSvrwkT8S", "title": "The Use of Open-Source Boards for Data Collection and Machine Learning in Remote Deployments", "track": "main", "status": "Reject", "tldr": "This paper describes how open source hardware are used for data collection and machine learning tasks in off-grid setups. ", "abstract": "Machine learning is being adopted in many walks of life to solve various problems. This is being driven by development of robust machine learning algorithms, availability of large datasets and low cost computation resources. Some machine learning applications require deployment of devices off-the-grid for data collection and real time monitoring. Such applications require development of systems that can operate autonomously during their deployment. Advancement in technology has seen development of low-cost and low-power open-source microcontrollers and single board computers. These boards can be interfaced with a wide array of sensors and can perform computation processes. The boards are finding wide applications in data collection and machine learning initiatives. This paper will describe how the boards are leveraged for off-grid deployments.", "keywords": "Open-source hardware;single board computer;microcontroller;on-board processing;edge computing;field programmable gate array", "primary_area": "", "supplementary_material": "", "author": "Gabriel Kiarie;Jason Kabi;Lorna Mugambi;Ciira wa Maina", "authorids": "~Gabriel_Kiarie1;~Jason_Kabi1;~Lorna_Mugambi1;~Ciira_wa_Maina1", "gender": "M;;F;M", "homepage": "https://kiariegabriel.github.io/;https://kabi23.github.io;;http://ciirawamaina.com/", "dblp": ";;;", "google_scholar": ";;;tSE5kyUAAAAJ", "orcid": "0000-0003-1413-7270;;;", "linkedin": "gabriel-kiarie-156351131/;;lorna-mugambi/;", "or_profile": "~Gabriel_Kiarie1;~Jason_Kabi1;~Lorna_Mugambi1;~Ciira_wa_Maina1", "aff": "Dedan Kimathi University of Technology;Dedan Kimathi University of Technology;Dedan Kimathi University of Technology;Dedan Kimathi University of Technology", "aff_domain": "dkut.ac.ke;dkut.ac.ke;dkut.ac.ke;dkut.ac.ke", "position": "Researcher;Researcher;MS student;Lecturer", "bibtex": "@misc{\nkiarie2023the,\ntitle={The Use of Open-Source Boards for Data Collection and Machine Learning in Remote Deployments},\nauthor={Gabriel Kiarie and Jason Kabi and Lorna Mugambi and Ciira wa Maina},\nyear={2023},\nurl={https://openreview.net/forum?id=QjHSvrwkT8S}\n}", "github": "", "project": "", "reviewers": "eTp3;3dHd;sMf8;HTpc", "site": "https://openreview.net/forum?id=QjHSvrwkT8S", "pdf_size": 2313558, "recommendation": "1;1;1;1", "confidence": "5;4;5;4", "correctness": "4;4;3;1", "technical_novelty": "1;1;1;1", "empirical_novelty": "0;0;1;1", "wc_summary_paper": "67;48;20;50", "wc_strength_and_weaknesses": "42;55;39;103", "wc_clarity_quality_novelty_and_reproducibility": "25;12;16;61", "wc_summary_review": "32;373;19;11", "wc_review": "166;488;94;225", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 1.0, 0.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.0, 1.224744871391589 ], "technical_novelty_avg": [ 1.0, 0.0 ], "empirical_novelty_avg": [ 0.5, 0.5 ], "wc_summary_paper_avg": [ 46.25, 16.857861667483217 ], "wc_strength_and_weaknesses_avg": [ 59.75, 25.68438241422207 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.5, 19.345542122153102 ], "wc_summary_review_avg": [ 108.75, 152.74877249915954 ], "wc_review_avg": [ 243.25, 148.72688896094076 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:31SvOMJsia8J:scholar.google.com/&scioq=The+Use+of+Open-Source+Boards+for+Data+Collection+and+Machine+Learning+in+Remote+Deployments&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Dedan Kimathi University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.dkuet.ac.ke", "aff_unique_abbr": "DKUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Kenya" }, { "title": "Revocable Deep Reinforcement Learning with Affinity Regularization for Outlier-Robust Graph Matching", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12157", "id": "QjQibO3scV_", "poster": "/media/PosterPDFs/ICLR%202023/12157.png?t=1682345361.0127704", "openreview": "https://openreview.net/forum?id=QjQibO3scV_", "slides": "https://iclr.cc/virtual/2023/poster/12157", "video": "https://iclr.cc/virtual/2023/poster/12157", "author_site": "Chang Liu, Zetian Jiang, Runzhong Wang, Lingxiao Huang, Pinyan Lu, Junchi Yan", "tldr": "", "abstract": "Graph matching (GM) has been a building block in various areas including computer vision and pattern recognition. Despite recent impressive progress, existing deep GM methods often have obvious difficulty in handling outliers, which are ubiquitous in practice. We propose a deep reinforcement learning based approach RGM, whose sequential node matching scheme naturally fits the strategy for selective inlier matching against outliers. A revocable action framework is devised to improve the agent's flexibility against the complex constrained GM. Moreover, we propose a quadratic approximation technique to regularize the affinity score, in the presence of outliers. As such, the agent can finish inlier matching timely when the affinity score stops growing, for which otherwise an additional parameter i.e. the number of inliers is needed to avoid matching outliers. In this paper, we focus on learning the back-end solver under the most general form of GM: the Lawler's QAP, whose input is the affinity matrix. Especially, our approach can also boost existing GM methods that use such input. Experiments on multiple real-world datasets demonstrate its performance regarding both accuracy and robustness.", "keywords": "Graph Matching;Reinforcement Learning;Quadratic Assignment;Affinity Regularization;Combinatorial Optimization.", "primary_area": "", "supplementary_material": "", "author": "Chang Liu;Zetian Jiang;Runzhong Wang;Lingxiao Huang;Pinyan Lu;Junchi Yan", "authorids": "~Chang_Liu7;~Zetian_Jiang1;~Runzhong_Wang1;~Lingxiao_Huang2;~Pinyan_Lu2;~Junchi_Yan2", "gender": "M;M;M;M;;M", "homepage": "https://only-changer.github.io/;http://thinklab.sjtu.edu.cn/member.html;http://runzhong.wang;https://sites.google.com/site/lingxiaohuang1990;http://pinyanlu.com;http://thinklab.sjtu.edu.cn/", "dblp": "52/5716;;239/4351;119/4814.html;;60/7949.html", "google_scholar": "BTu8eaQAAAAJ;;uoM0g3cAAAAJ;;;ga230VoAAAAJ", "orcid": ";;0000-0002-9566-738X;;;0000-0001-9639-7679", "linkedin": ";;;;;", "or_profile": "~Chang_Liu7;~Zetian_Jiang1;~Runzhong_Wang1;~Lingxiao_Huang2;~Pinyan_Lu2;~Junchi_Yan1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Nanjing University;Institute for Theoretical Computer Science;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;nju.edu.cn;shufe.edu.cn;sjtu.edu.cn", "position": "PhD student;PhD student;PhD student;Associate Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nliu2023revocable,\ntitle={Revocable Deep Reinforcement Learning with Affinity Regularization for Outlier-Robust Graph Matching},\nauthor={Chang Liu and Zetian Jiang and Runzhong Wang and Lingxiao Huang and Pinyan Lu and Junchi Yan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=QjQibO3scV_}\n}", "github": "", "project": "", "reviewers": "QGQe;bTMW;6KL8", "pdf_size": 3222972, "recommendation": "5;6;8", "confidence": "5;4;4", "correctness": "4;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "3;3;3", "wc_summary_paper": "134;60;193", "wc_strength_and_weaknesses": "201;207;281", "wc_clarity_quality_novelty_and_reproducibility": "109;9;88", "wc_summary_review": "157;34;81", "wc_review": "601;310;643", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1360;678;382", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 129.0, 54.41200847852123 ], "wc_strength_and_weaknesses_avg": [ 229.66666666666666, 36.38070306571267 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.66666666666667, 43.05293898859351 ], "wc_summary_review_avg": [ 90.66666666666667, 50.6776303927307 ], "wc_review_avg": [ 518.0, 148.07430567117308 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 806.6666666666666, 409.50159408182475 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.7559289460184544, "corr_recommendation_correctness": -0.7559289460184545, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6595718348044113973&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=QjQibO3scV_", "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;nju.edu.cn;shufe.edu.cn;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "Shanghai Jiao Tong University;Nanjing University;Institute for Theoretical Computer Science", "aff_unique_dep": ";;Theoretical Computer Science", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.nju.edu.cn;https://www.tu-dresden.de/ics", "aff_unique_abbr": "SJTU;Nanjing U;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;Germany" }, { "id": "QlK8nHnY8zc", "title": "NGswin: N-Gram Swin Transformer for Efficient Single Image Super-Resolution", "track": "main", "status": "Withdraw", "tldr": "In our efficient NGswin, N-Gram context is proposed for deep learning in single image super-resolution for the first in history.", "abstract": "In single image super-resolution (SISR), many deep learning-based methods suffer from intensive computational operations. In addition, while Swin Transformer-based methods such as SwinIR established state-of-the-art results, they still hold the problem of ignoring the broad regions when computing window self-attention (WSA) to reconstruct high-frequency information. In this paper, we propose the efficient NGswin network, which is the first attempt in history to introduce N-Gram to deep learning in images. For text analysis, N-Gram is a sequence of consecutive characters or words, but in an image, we define N-Gram as neighboring local windows (in WSA of Swin Transformer) which interact with each other by sliding-WSA. We propose N-Gram interaction, SCDP bottleneck, and a pooling-cascading mechanism, which enable the network to consider broad regions beneficial to recovering the degraded neighbor pixels. Moreover, we employ a hierarchical encoder with patch-merging, uni-Gram embedding, and a compact decoder to NGswin to enhance the network efficiency. Experimental results show that the proposed model achieves competitive performance in terms of PSNR and SSIM scores with fewer operations (Mult-Adds) compared to other methods.", "keywords": "N-Gram;Single Image Super-Resolution;Swin Transformer;Efficiency;Deep Learning", "primary_area": "", "supplementary_material": "", "author": "Haram Choi;Jeongmin Lee;Jihoon Yang", "authorids": "~Haram_Choi1;~Jeongmin_Lee1;~Jihoon_Yang1", "gender": "M;M;M", "homepage": ";;http://mllab.sogang.ac.kr", "dblp": "334/0086;;38/305", "google_scholar": "V7sp-zoAAAAJ;;", "orcid": "0009-0001-5469-2319;;", "linkedin": "haram-choi-9a0124244;jeongmin-lee-9a7399186/;", "or_profile": "~Haram_Choi1;~Jeongmin_Lee1;~Jihoon_Yang1", "aff": "Sogang University;LG Innotek;Sogang University", "aff_domain": "sogang.ac.kr;lginnotek.com;sogang.ac.kr", "position": "MS student;Researcher;Full Professor", "bibtex": "@misc{\nchoi2023ngswin,\ntitle={{NG}swin: N-Gram Swin Transformer for Efficient Single Image Super-Resolution},\nauthor={Haram Choi and Jeongmin Lee and Jihoon Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=QlK8nHnY8zc}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=QlK8nHnY8zc", "pdf_size": 5714536, "recommendation": "", "confidence": "", "correctness": "", "technical_novelty": "", "empirical_novelty": "", "wc_summary_paper": "", "wc_strength_and_weaknesses": "", "wc_clarity_quality_novelty_and_reproducibility": "", "wc_summary_review": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "recommendation_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "correctness_avg": [ 0, 0 ], "technical_novelty_avg": [ 0, 0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 0, 0 ], "wc_strength_and_weaknesses_avg": [ 0, 0 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 0, 0 ], "wc_summary_review_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 1, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0, "corr_recommendation_correctness": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YECMjyENp-MJ:scholar.google.com/&scioq=NGswin:+N-Gram+Swin+Transformer+for+Efficient+Single+Image+Super-Resolution&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Sogang University;LG", "aff_unique_dep": ";LG Innotek", "aff_unique_url": "https://www.sogang.ac.kr;https://www.lginnotek.com", "aff_unique_abbr": "Sogang;LG Innotek", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "QmH1_mn6SI", "title": "FP_AINet: Fusion Prototype with Adaptive Induction Network for Few-Shot Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Conventional prototypical network treats all samples equally and does not consider the effects of noisy samples, which leads to a biased class representation. In this paper, we propose a novel Fusion Prototype with Adaptive Induction Network (FP_AINet) for few-shot learning that can learn representative prototypes from a few support samples. Specifically, to address the problem of noisy samples in the support set, an adaptive induction network is developed, which can learn different class representations for diverse queries and assign adaptive scores for support samples according to their relative significance. Moreover, the proposed model can generate a more accurate prototype than comparison methods by considering the query-related samples. With an increasing of samples, the prototypical network is more expressive since the Adaptive Induction Network ignores the relative local features. As a result, a Gaussian-based fusion algorithm is designed to learn more representative prototypes. Extensive experiments are conducted on three datasets: miniImageNet, tieredImageNet, and CIFAR_FS. The experimental results compared with the state-of-the-art few-shot learning methods demonstrate the superiority of FP_AINet.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Mengping Dong;xue Liu;Chaojun Cen;YANG MI;Zhenbo Li", "authorids": "~Mengping_Dong1;~xue_Liu3;~Chaojun_Cen1;~YANG_MI2;~Zhenbo_Li1", "gender": "F;F;M;M;M", "homepage": "https://github.com/MengpingDong1994;http://faculty.cau.edu.cn/xxdqxy/lx/list.htm;https://github.com/cenchaojun;;http://faculty.cau.edu.cn/xxdqxy/lzb_en/list.htm", "dblp": ";;;;", "google_scholar": ";;;9krBE6oAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Mengping_Dong1;~xue_Liu3;~Chaojun_Cen1;~YANG_MI2;~Zhenbo_Li1", "aff": "China Agricultural University;China Agricultural University;China Agricultural University;;China Agricultural University", "aff_domain": "cau.edu.cn;cau.edu.cn;cau.edu.cn;;cau.edu.cn", "position": "PhD student;Associate Professor;PhD student;;Full Professor", "bibtex": "@misc{\ndong2023fpainet,\ntitle={{FP}\\_{AIN}et: Fusion Prototype with Adaptive Induction Network for Few-Shot Learning},\nauthor={Mengping Dong and xue Liu and Chaojun Cen and YANG MI and Zhenbo Li},\nyear={2023},\nurl={https://openreview.net/forum?id=QmH1_mn6SI}\n}", "github": "", "project": "", "reviewers": "x78M;WNef;8vWo;kAKG", "site": "https://openreview.net/forum?id=QmH1_mn6SI", "pdf_size": 779969, "recommendation": "3;5;5;6", "confidence": "4;5;4;2", "correctness": "3;2;2;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "40;75;32;68", "wc_strength_and_weaknesses": "363;251;64;85", "wc_clarity_quality_novelty_and_reproducibility": "42;14;23;25", "wc_summary_review": "17;32;15;44", "wc_review": "462;372;134;222", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1281;675;521;421", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 53.75, 18.14352501582865 ], "wc_strength_and_weaknesses_avg": [ 190.75, 123.03327801859137 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.0, 10.124228365658293 ], "wc_summary_review_avg": [ 27.0, 11.811011811017716 ], "wc_review_avg": [ 297.5, 127.51764583774279 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 724.5, 333.7914768234803 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.4736842105263159, "corr_recommendation_correctness": -0.2294157338705618, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cO6MQ-kJZ9YJ:scholar.google.com/&scioq=FP_AINet:+Fusion+Prototype+with+Adaptive+Induction+Network+for+Few-Shot+Learning&hl=en&as_sdt=0,10", "gs_version_total": 2, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "China Agricultural University", "aff_unique_dep": "", "aff_unique_url": "http://www.cau.edu.cn/", "aff_unique_abbr": "CAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "Qnxcl6zWobO", "title": "Quality Matters: Embracing Quality Clues for Robust 3D Multi-Object Tracking", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "3D Multi-Object Tracking (MOT) has achieved tremendous achievement thanks to the rapid development of 3D object detection and 2D MOT. Recent advanced works generally employ a series of object attributes, e.g., position, size, velocity, and appearance, to provide the clues for the association in 3D MOT. However, these cues may not be reliable due to some visual noise, such as occlusion and blur, leading to tracking performance bottleneck. To reveal the dilemma, we conduct extensive empirical analysis to expose the key bottleneck of each clue and how they correlate with each other. The analysis results motivate us to efficiently absorb the merits among all cues, and adaptively produce an optimal tacking manner. Specifically, we present \\textit{Location and Velocity Quality Learning}, which efficiently guides the network to estimate the quality of predicted object attributes. Based on these quality estimations, we propose a quality-aware object association (QOA) strategy to leverage the quality score as an important reference factor for achieving robust association. Despite its simplicity, extensive experiments indicate that the proposed strategy significantly boosts tracking performance by 2.2% AMOTA and our method outperforms all existing state-of-the-art works on nuScenes by a large margin. Moreover, QTrack achieves 48.0% and 51.1% AMOTA tracking performance on the nuScenes validation and test sets, which significantly reduces the performance gap between pure camera and LiDAR based trackers.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Jinrong Yang;En Yu;Zeming Li;Xiaoping Li;Wenbing Tao", "authorids": "~Jinrong_Yang1;~En_Yu1;~Zeming_Li2;~Xiaoping_Li2;~Wenbing_Tao1", "gender": "M;M;;M;M", "homepage": "https://yancie-yjr.github.io/;https://www.zhihu.com/people/yu-en-47-48;;http://mse.hust.edu.cn/info/1143/1374.htm;http://faculty.hust.edu.cn/taowenbing/zh_CN/index.htm", "dblp": "286/5463;213/4929;;;73/188.html", "google_scholar": "8Of_NYQAAAAJ;https://scholar.google.com.hk/citations?user=rWCQMNgAAAAJ;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Jinrong_Yang1;~En_Yu1;~Zeming_Li2;~Xiaoping_Li2;~Wenbing_Tao1", "aff": "Huazhong University of Science and Technology;Megvii Technology Inc.;;;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;megvii.com;;;hust.edu.cn", "position": "PhD student;Principal Researcher;;;Full Professor", "bibtex": "@misc{\nyang2023quality,\ntitle={Quality Matters: Embracing Quality Clues for Robust 3D Multi-Object Tracking},\nauthor={Jinrong Yang and En Yu and Zeming Li and Xiaoping Li and Wenbing Tao},\nyear={2023},\nurl={https://openreview.net/forum?id=Qnxcl6zWobO}\n}", "github": "", "project": "", "reviewers": "39j3;MWh3;czkm", "site": "https://openreview.net/forum?id=Qnxcl6zWobO", "pdf_size": 1856114, "recommendation": "3;3;5", "confidence": "5;4;5", "correctness": "3;3;4", "technical_novelty": "1;2;2", "empirical_novelty": "2;3;3", "wc_summary_paper": "73;117;34", "wc_strength_and_weaknesses": "135;246;157", "wc_clarity_quality_novelty_and_reproducibility": "7;106;2", "wc_summary_review": "10;38;51", "wc_review": "225;507;244", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 74.66666666666667, 33.9050963065371 ], "wc_strength_and_weaknesses_avg": [ 179.33333333333334, 47.98842453018112 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.333333333333336, 47.891080125171065 ], "wc_summary_review_avg": [ 33.0, 17.107503227141788 ], "wc_review_avg": [ 325.3333333333333, 128.69170740788064 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 1.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15331529363239518455&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Huazhong University of Science and Technology;Megvii Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.hust.edu.cn;https://www.megvii.com", "aff_unique_abbr": "HUST;Megvii", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "QrdSiDAv5ek", "title": "FACS: FAST ADAPTIVE CHANNEL SQUEEZING", "track": "main", "status": "Reject", "tldr": "Computationally Efficient Channel Squeezing in CNNs with high representation power", "abstract": "Channel squeezing is one of the central operations performed in CNN bottlenecks to reduce the number of channels in a feature map. This operation is carried out by using a 1 \u00d7 1 pointwise convolution which constitutes a significant amount of computations and parameters in a given network. ResNet-50 for instance, consists of 16 such layers which form 33% of total layers and 25% (1.05B/4.12B) of total FLOPs or computations. In the light of their predominance, we propose a novel \u201cFast Adaptive Channel Squeezing\u201d module which carries out the squeezing operation in a computationally efficient manner. The key benefit of FACS is that it neither alters the number of parameters nor affects the accuracy of a given network. When plugged into diverse CNNs architectures, namely ResNet, VGG, and MobileNet-v2, FACS achieves state-of-the-art performance on ImageNet and CIFAR datasets at dramatically reduced FLOPs. FACS also cuts the training time significantly, and lowers the latency which is particularly advantageous for fast inference on edge devices. The source-code will be made publicly available.", "keywords": "Fast Channel squeezing;Edge Devices;CNN", "primary_area": "", "supplementary_material": "/attachment/c762d290f14b3204fe8d434123ebbdc928cee78b.zip", "author": "Ashish Kumar;Laxmidhar Behera", "authorids": "~Ashish_Kumar2;~Laxmidhar_Behera1", "gender": "M;M", "homepage": "https://ashishkumar822.github.io;https://home.iitk.ac.in/~lbehera/", "dblp": "34/5378-6;14/1412", "google_scholar": "n-oRDEYAAAAJ;https://scholar.google.co.in/citations?user=QWTcyP8AAAAJ", "orcid": ";", "linkedin": "ashishkumar822/;laxmidhar-behera-a74a5b174/?originalSubdomain=in", "or_profile": "~Ashish_Kumar2;~Laxmidhar_Behera1", "aff": "Indian Institute of Technology, Kanpur;Indian Institute of Technology, Kanpur", "aff_domain": "iitk.ac.in;iitmandi.ac.in", "position": "PhD student;Director", "bibtex": "@misc{\nkumar2023facs,\ntitle={{FACS}: {FAST} {ADAPTIVE} {CHANNEL} {SQUEEZING}},\nauthor={Ashish Kumar and Laxmidhar Behera},\nyear={2023},\nurl={https://openreview.net/forum?id=QrdSiDAv5ek}\n}", "github": "", "project": "", "reviewers": "ZY5e;Ywmp;9T37;LDvY", "site": "https://openreview.net/forum?id=QrdSiDAv5ek", "pdf_size": 2762513, "recommendation": "5;5;5;5", "confidence": "5;4;4;4", "correctness": "2;3;3;3", "technical_novelty": "1;2;2;3", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "54;42;62;52", "wc_strength_and_weaknesses": "160;146;103;238", "wc_clarity_quality_novelty_and_reproducibility": "15;23;20;187", "wc_summary_review": "28;31;41;57", "wc_review": "257;242;226;534", "wc_reply_reviewers": "52;0;0;535", "wc_reply_authors": "1874;1636;1121;1609", "reply_reviewers": "1;0;0;1", "reply_authors": "4;3;3;4", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 52.5, 7.123903424387503 ], "wc_strength_and_weaknesses_avg": [ 161.75, 48.776915646645804 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.25, 72.65801745162058 ], "wc_summary_review_avg": [ 39.25, 11.321991874224253 ], "wc_review_avg": [ 314.75, 127.05781164493587 ], "wc_reply_reviewers_avg": [ 146.75, 225.1592491993167 ], "wc_reply_authors_avg": [ 1560.0, 273.6302249386935 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4IAjEDwVIrAJ:scholar.google.com/&scioq=FACS:+FAST+ADAPTIVE+CHANNEL+SQUEEZING&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Indian Institute of Technology Kanpur", "aff_unique_dep": "", "aff_unique_url": "https://www.iitk.ac.in", "aff_unique_abbr": "IIT Kanpur", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Kanpur", "aff_country_unique_index": "0;0", "aff_country_unique": "India" }, { "title": "Task Ambiguity in Humans and Language Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11306", "id": "QrnDe_9ZFd8", "poster": "", "openreview": "https://openreview.net/forum?id=QrnDe_9ZFd8", "slides": "https://iclr.cc/virtual/2023/poster/11306", "video": "https://iclr.cc/virtual/2023/poster/11306", "author_site": "Alex Tamkin, Kunal Handa, Avash Shrestha, Noah Goodman", "tldr": "We motivate the direction of studying task ambiguity in humans and language models, evaluating them on a new benchmark of ambiguously-specified tasks and develop methods for improving performance", "abstract": "Language models have recently achieved strong performance across a wide range of NLP benchmarks. However, real world tasks are often poorly specified, and agents must deduce the intended behavior from a combination of context, instructions, and examples. We investigate how both humans and models behave in the face of such task ambiguity by proposing AmbiBench, a new benchmark of six ambiguously-specified classification tasks. We evaluate humans and models on AmbiBench by seeing how well they identify the intended task using 1) instructions with varying degrees of ambiguity, and 2) different numbers of labeled examples. We find that the combination of model scaling (to 175B parameters) and reinforcement learning from human feedback (RLHF) enables models to approach or exceed the accuracy of human participants across tasks, but that either one of these alone is not sufficient. In addition, we show how to dramatically improve the accuracy of language models trained without RLHF by finetuning on a small number of ambiguous in-context examples, providing a promising direction for teaching models to generalize well in the face of ambiguity.", "keywords": "task ambiguity;safety;language models;few-shot learning;in-context learning", "primary_area": "", "supplementary_material": "", "author": "Alex Tamkin;Kunal Handa;Avash Shrestha;Noah Goodman", "authorids": "~Alex_Tamkin1;~Kunal_Handa1;avash@stanford.edu;~Noah_Goodman1", "gender": ";;;", "homepage": ";https://kunhanda.github.io/;;https://cocolab.stanford.edu/", "dblp": ";336/6747.html;;96/1216", "google_scholar": ";scdcthMAAAAJ;;OUpIbcQAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Alex_Tamkin1;~Kunal_Handa1;avash@stanford.edu;~Noah_Goodman1", "aff": ";Brown University;;Stanford University", "aff_domain": ";brown.edu;;stanford.edu", "position": ";Undergrad student;;Full Professor", "bibtex": "@inproceedings{\ntamkin2023task,\ntitle={Task Ambiguity in Humans and Language Models},\nauthor={Alex Tamkin and Kunal Handa and Avash Shrestha and Noah Goodman},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=QrnDe_9ZFd8}\n}", "github": "", "project": "", "reviewers": "zof8;3BBz;6dKC;o1Uh", "pdf_size": 4124919, "recommendation": "3;6;6;8", "confidence": "5;4;4;4", "correctness": "2;4;4;3", "technical_novelty": "1;3;3;2", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "68;268;266;148", "wc_strength_and_weaknesses": "201;173;202;181", "wc_clarity_quality_novelty_and_reproducibility": "42;123;105;596", "wc_summary_review": "32;139;49;42", "wc_review": "343;703;622;967", "wc_reply_reviewers": "208;0;0;67", "wc_reply_authors": "816;506;243;986", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;2", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 187.5, 84.38453649810491 ], "wc_strength_and_weaknesses_avg": [ 189.25, 12.577261228105266 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 216.5, 221.1588795413831 ], "wc_summary_review_avg": [ 65.5, 42.86315434029558 ], "wc_review_avg": [ 658.75, 222.49985955051747 ], "wc_reply_reviewers_avg": [ 68.75, 84.92165507101237 ], "wc_reply_authors_avg": [ 637.75, 285.5856920435616 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8892972917998875, "corr_recommendation_correctness": 0.5488604301969737, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12063067349839300457&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=QrnDe_9ZFd8", "email": ";brown.edu;;stanford.edu", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Brown University;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.brown.edu;https://www.stanford.edu", "aff_unique_abbr": "Brown;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Effective passive membership inference attacks in federated learning against overparameterized models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11045", "id": "QsCSLPP55Ku", "poster": "/media/PosterPDFs/ICLR%202023/11045.png?t=1681016492.4642446", "openreview": "https://openreview.net/forum?id=QsCSLPP55Ku", "slides": "https://iclr.cc/virtual/2023/poster/11045", "video": "https://iclr.cc/virtual/2023/poster/11045", "author_site": "Jiacheng Li, Ninghui Li, Bruno Ribeiro", "tldr": "The observation that gradients of large overparameterized neural networks that generalize well behave like high-dimensional independent isotropic random vectors, leads to a new class of passive membership inference attacks in federated learning.", "abstract": "This work considers the challenge of performing membership inference attacks in a federated learning setting ---for image classification--- where an adversary can only observe the communication between the central node and a single client (a passive white-box attack). Passive attacks are one of the hardest-to-detect attacks, since they can be performed without modifying how the behavior of the central server or its clients, and assumes *no access to private data instances*. The key insight of our method is empirically observing that, near parameters that generalize well in test, the gradient of large overparameterized neural network models statistically behave like high-dimensional independent isotropic random vectors. Using this insight, we devise two attacks that are often little impacted by existing and proposed defenses. Finally, we validated the hypothesis that our attack depends on the overparametrization by showing that increasing the level of overparametrization (without changing the neural network architecture) positively correlates with our attack effectiveness.", "keywords": "membership inference attack;federated learning;overparameterization;neural networks;image classification", "primary_area": "", "supplementary_material": "", "author": "Jiacheng Li;Ninghui Li;Bruno Ribeiro", "authorids": "~Jiacheng_Li6;ninghui@purdue.edu;~Bruno_Ribeiro1", "gender": "M;;M", "homepage": ";;https://www.cs.purdue.edu/homes/ribeirob/", "dblp": ";;15/606", "google_scholar": ";;KIEleCsAAAAJ", "orcid": ";;0000-0002-3527-6192", "linkedin": "jiachenglialex/;;", "or_profile": "~Jiacheng_Li6;ninghui@purdue.edu;~Bruno_Ribeiro1", "aff": "Purdue University;;Purdue University", "aff_domain": "purdue.edu;;purdue.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nli2023effective,\ntitle={Effective passive membership inference attacks in federated learning against overparameterized models},\nauthor={Jiacheng Li and Ninghui Li and Bruno Ribeiro},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=QsCSLPP55Ku}\n}", "github": "", "project": "", "reviewers": "nM4b;o3B4;61U3", "pdf_size": 597196, "recommendation": "6;6;8", "confidence": "4;4;4", "correctness": "3;2;4", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;4", "wc_summary_paper": "64;62;72", "wc_strength_and_weaknesses": "381;216;363", "wc_clarity_quality_novelty_and_reproducibility": "1;38;114", "wc_summary_review": "4;14;13", "wc_review": "450;330;562", "wc_reply_reviewers": "0;0;36", "wc_reply_authors": "567;809;1352", "reply_reviewers": "0;0;1", "reply_authors": "1;2;3", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 66.0, 4.320493798938574 ], "wc_strength_and_weaknesses_avg": [ 320.0, 73.90534486760751 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.0, 47.038990918882035 ], "wc_summary_review_avg": [ 10.333333333333334, 4.496912521077347 ], "wc_review_avg": [ 447.3333333333333, 94.73237156443525 ], "wc_reply_reviewers_avg": [ 12.0, 16.97056274847714 ], "wc_reply_authors_avg": [ 909.3333333333334, 328.23399512475174 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8660254037844385, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11991509680265510137&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=QsCSLPP55Ku", "email": "purdue.edu;;purdue.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "QsVditUhXR", "title": "Soft Diffusion: Score Matching For General Corruptions", "track": "main", "status": "Reject", "tldr": "We define a broader family of corruption processes that generalizes previously known diffusion models and we show how to learn to reverse them.", "abstract": "We define a broader family of corruption processes that generalizes previously known diffusion models. To reverse these general diffusions, we propose a new objective called Soft Score Matching that provably learns the score function for any linear corruption process and yields state of the art results for CelebA. Soft Score Matching incorporates the degradation process in the network. \nOur new loss trains the model to predict a clean image, that after corruption, matches the diffused observation.\nWe show that our objective learns the gradient of the likelihood under suitable regularity conditions for a family of corruption processes. \nWe further develop a principled way to select the corruption levels for general diffusion processes and a novel sampling method that we call Momentum Sampler. \nWe show experimentally that our framework works for general linear corruption processes, such as Gaussian blur and masking.\nWe achieve state-of-the-art FID score $1.85$ on CelebA-64, outperforming all previous linear diffusion models. \nWe also show significant computational benefits compared to vanilla denoising diffusion.", "keywords": "diffusion;score-based models;generative models", "primary_area": "", "supplementary_material": "", "author": "Giannis Daras;Mauricio Delbracio;Hossein Talebi;Alex Dimakis;Peyman Milanfar", "authorids": "~Giannis_Daras1;~Mauricio_Delbracio1;~Hossein_Talebi1;~Alex_Dimakis1;~Peyman_Milanfar1", "gender": "M;M;M;M;M", "homepage": "https://giannisdaras.github.io/;;;https://people.eecs.berkeley.edu/~alexdimakis/;http://www.milanfar.org", "dblp": "254/2703;90/10811;145/0046;19/5000.html;48/6882", "google_scholar": "LaScvbQAAAAJ;lDDm920AAAAJ;kqXlLzYAAAAJ;JSFmVQEAAAAJ;iGzDl8IAAAAJ", "orcid": ";;;;", "linkedin": ";;;alex-dimakis-b1b20320/;", "or_profile": "~Giannis_Daras1;~Mauricio_Delbracio1;~Hossein_Talebi1;~Alex_Dimakis1;~Peyman_Milanfar1", "aff": "University of Texas, Austin;Google;Google Research;University of Texas at Austin;Google", "aff_domain": "utexas.edu;google.com;google.com;utexas.edu;google.com", "position": "PhD student;Research Scientist;Google Research;Full Professor;Distinguished Scientist", "bibtex": "@misc{\ndaras2023soft,\ntitle={Soft Diffusion: Score Matching For General Corruptions},\nauthor={Giannis Daras and Mauricio Delbracio and Hossein Talebi and Alex Dimakis and Peyman Milanfar},\nyear={2023},\nurl={https://openreview.net/forum?id=QsVditUhXR}\n}", "github": "", "project": "", "reviewers": "x7zn;pNiX;dDmh", "site": "https://openreview.net/forum?id=QsVditUhXR", "pdf_size": 11591766, "recommendation": "3;3;5", "confidence": "4;4;4", "correctness": "2;2;3", "technical_novelty": "3;2;3", "empirical_novelty": "3;2;3", "wc_summary_paper": "57;30;74", "wc_strength_and_weaknesses": "148;399;570", "wc_clarity_quality_novelty_and_reproducibility": "1;9;152", "wc_summary_review": "14;54;107", "wc_review": "220;492;903", "wc_reply_reviewers": "203;304;335", "wc_reply_authors": "1366;2664;3580", "reply_reviewers": "2;1;2", "reply_authors": "2;5;6", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 53.666666666666664, 18.116904322268255 ], "wc_strength_and_weaknesses_avg": [ 372.3333333333333, 173.3096137616786 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 54.0, 69.37338586710806 ], "wc_summary_review_avg": [ 58.333333333333336, 38.09053542402481 ], "wc_review_avg": [ 538.3333333333334, 280.7517685706638 ], "wc_reply_reviewers_avg": [ 280.6666666666667, 56.357982772826624 ], "wc_reply_authors_avg": [ 2536.6666666666665, 908.3352293558194 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 4.333333333333333, 1.699673171197595 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 108, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10304551622106675105&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "University of Texas at Austin;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.utexas.edu;https://www.google.com", "aff_unique_abbr": "UT Austin;Google", "aff_campus_unique_index": "0;1;1;0;1", "aff_campus_unique": "Austin;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Qsbh0IgVG_8", "title": "Domain Specific Denoising Diffusion Probabilistic Models for Brain Dynamics", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The distribution differences in brain dynamics according to different human subjects, which is a kind of human-subject noise, as referred to as human artifacts, have severely limited the generalized ability of brain dynamics recognition. Previous human artifact removal methods normally utilize traditional spectrum filtering or blind Source Separation techniques, based on a simple assumption of prior distributions, which limit the capacity of learning domain variance of each subject. We propose a new approach to model the removal of the human artifacts as a generative denoising process, which can generate and learn subject-specific domain variance and the invariant brain signals, simultaneously. We propose Domain Specific Denoising Diffusion Probabilistic Model (DS-DDPM) to decompose the denoising process into the subject domain variance and invariant content at each step. Subtle constraints and probabilistic design are proposed to formulate domain variance and invariant content into orthogonal spaces and further supervise the domain variance with the subject classifier. This method is the first work to explicitly separate human subject-specific variance through generative denoising processes, which outperforms previous methods in two aspects, 1) DS-DDPM could learn more accurate subject-specific domain variance by domain generative learning rather than previous filtering methods 2) DS-DDPM is the first work could explicitly generate subject noise distribution. Comprehensive experimental results suggest that DS-DDPM could help alleviate domain distribution bias for cross-domain brain dynamics signal recognition.", "keywords": "Denoising Diffusion Probalistic Models;EEG Signal;Domain Variance Generation;Subject Difference;Deep Learning", "primary_area": "", "supplementary_material": "/attachment/7f407cbcdd53330c7330cdf1e69ad28edd011fc3.zip", "author": "Yiqun Duan;Jinzhao Zhou;Zhen Wang;Yu-Cheng Chang;Yu-Kai Wang;Chin-teng Lin", "authorids": "~Yiqun_Duan1;~Jinzhao_Zhou1;~Zhen_Wang9;~Yu-Cheng_Chang3;yukai.wang@uts.edu.au;~Chin-teng_Lin1", "gender": "M;;;M;;M", "homepage": "https://github.com/DuanYiqun;;;;;http://www.uts.edu.au/staff/chin-teng.lin", "dblp": "248/5526;;;;;", "google_scholar": "https://scholar.google.com.au/citations?user=GoQKrD0AAAAJ;;;;;nubkF1cAAAAJ", "orcid": ";;;0000-0001-9244-0318;;0000-0001-8371-8197", "linkedin": ";;;;;", "or_profile": "~Yiqun_Duan1;~Jinzhao_Zhou1;~Zhen_Wang9;~Yu-Cheng_Chang3;yukai.wang@uts.edu.au;~Chin-teng_Lin1", "aff": "University of Technology Sydney;;;University of Technology Sydney;;University of Technology Sydney", "aff_domain": "uts.edu.au;;;uts.edu.au;;uts.edu.au", "position": "PhD student;;;Researcher;;Full Professor", "bibtex": "@misc{\nduan2023domain,\ntitle={Domain Specific Denoising Diffusion Probabilistic Models for Brain Dynamics},\nauthor={Yiqun Duan and Jinzhao Zhou and Zhen Wang and Yu-Cheng Chang and Yu-Kai Wang and Chin-teng Lin},\nyear={2023},\nurl={https://openreview.net/forum?id=Qsbh0IgVG_8}\n}", "github": "", "project": "", "reviewers": "sVP3;YG5f;yi6k;Ddve", "site": "https://openreview.net/forum?id=Qsbh0IgVG_8", "pdf_size": 10082401, "recommendation": "1;3;5;5", "confidence": "4;5;5;3", "correctness": "2;2;3;3", "technical_novelty": "2;3;3;2", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "93;109;66;65", "wc_strength_and_weaknesses": "305;130;195;47", "wc_clarity_quality_novelty_and_reproducibility": "19;121;19;84", "wc_summary_review": "39;404;59;53", "wc_review": "456;764;339;249", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 1.6583123951777 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 83.25, 18.632968094214082 ], "wc_strength_and_weaknesses_avg": [ 169.25, 94.3090001007327 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.75, 43.751428548105714 ], "wc_summary_review_avg": [ 138.75, 153.31401599332006 ], "wc_review_avg": [ 452.0, 194.51092514303664 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.0909090909090909, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11031092705996354214&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Technology Sydney", "aff_unique_dep": "", "aff_unique_url": "https://www.uts.edu.au", "aff_unique_abbr": "UTS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "id": "QsgeAdRwILD", "title": "Accuracy Boosters: Epoch-Driven Mixed-Mantissa Block Floating-Point for DNN Training", "track": "main", "status": "Reject", "tldr": "We propose an epoch-driven mixed-mantissa Hybrid Block Floating-Point training method converting 99.7% of arithmetic operations in training to 4-bit mantissas and using 6-bit mantissas in the last epoch, while preserving/outperforming FP32 accuracy.", "abstract": "The unprecedented growth in DNN model complexity, size and the amount of training data have led to a commensurate increase in demand for computing and a search for minimal encoding. Recent research advocates Hybrid Block Floating-Point (HBFP) as a technique that minimizes silicon provisioning in accelerators by converting the majority of arithmetic operations in training to 8-bit fixed-point. In this paper, we perform a full-scale exploration of the HBFP design space including minimal mantissa encoding, varying block sizes, and mixed mantissa bit-width across layers and epochs. We propose Accuracy Boosters, an epoch-driven mixed-mantissa HBFP that uses 6-bit mantissa only in the last epoch and converts 99.7% of all arithmetic operations in training to 4-bit mantissas. Accuracy Boosters enable reducing silicon provisioning for an HBFP training accelerator by 16.98\u00d7 as compared to FP32, while preserving or outperforming FP32 accuracy.", "keywords": "DNN training;low-precision training;mixed-precision training;efficient training;number formats;numerical encodings;block floating-point;DNN accelerators", "primary_area": "", "supplementary_material": "/attachment/3458c0d87b599ef7b83591eab10e848ba2fdd028.zip", "author": "Simla Burcu Harma;Canberk S\u00f6nmez;Babak Falsafi;Martin Jaggi;Yunho Oh", "authorids": "~Simla_Burcu_Harma1;~Canberk_S\u00f6nmez1;~Babak_Falsafi1;~Martin_Jaggi1;~Yunho_Oh1", "gender": "F;M;M;M;M", "homepage": "https://sites.google.com/view/simla-burcu-harma/home;https://people.epfl.ch/canberk.sonmez?lang=en;https://parsa.epfl.ch/~falsafi;https://mlo.epfl.ch;https://yunho-oh.github.io/", "dblp": "334/1675;;f/BabakFalsafi;17/4402;128/6741", "google_scholar": "T5IPhSQAAAAJ;;LaNEuBUAAAAJ;https://scholar.google.ch/citations?user=r1TJBr8AAAAJ;https://scholar.google.co.kr/citations?user=Bh0k83sAAAAJ", "orcid": ";;0000-0001-5916-8068;0000-0003-1579-5558;0000-0001-6442-3705", "linkedin": "simla-burcu-harma-2b15a8b6/;;falsafi/?originalSubdomain=ch;;yunho-oh-comarch/", "or_profile": "~Simla_Burcu_Harma1;~Canberk_S\u00f6nmez1;~Babak_Falsafi1;~Martin_Jaggi1;~Yunho_Oh1", "aff": "EPFL - EPF Lausanne;;EPFL;EPFL;Korea University", "aff_domain": "epfl.ch;;epfl.ch;epfl.ch;korea.ac.kr", "position": "PhD student;;Full Professor;Associate Professor;Assistant Professor", "bibtex": "@misc{\nharma2023accuracy,\ntitle={Accuracy Boosters: Epoch-Driven Mixed-Mantissa Block Floating-Point for {DNN} Training},\nauthor={Simla Burcu Harma and Canberk S{\\\"o}nmez and Babak Falsafi and Martin Jaggi and Yunho Oh},\nyear={2023},\nurl={https://openreview.net/forum?id=QsgeAdRwILD}\n}", "github": "", "project": "", "reviewers": "VDoK;gLsp;L7nb;gycw;FLAx", "site": "https://openreview.net/forum?id=QsgeAdRwILD", "pdf_size": 282220, "recommendation": "3;3;3;6;8", "confidence": "5;4;4;4;2", "correctness": "3;3;2;3;4", "technical_novelty": "1;2;1;3;3", "empirical_novelty": "2;3;2;2;3", "wc_summary_paper": "131;217;71;69;45", "wc_strength_and_weaknesses": "150;608;70;136;54", "wc_clarity_quality_novelty_and_reproducibility": "134;11;39;66;46", "wc_summary_review": "62;93;40;28;101", "wc_review": "477;929;220;299;246", "wc_reply_reviewers": "248;0;0;0;0", "wc_reply_authors": "1013;1075;395;500;284", "reply_reviewers": "2;0;0;0;0", "reply_authors": "4;2;1;1;1", "recommendation_avg": [ 4.6, 2.0591260281974 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 2.0, 0.8944271909999159 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 106.6, 62.063193601360865 ], "wc_strength_and_weaknesses_avg": [ 203.6, 205.52917067900606 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.2, 41.344407118738566 ], "wc_summary_review_avg": [ 64.8, 28.57551399362748 ], "wc_review_avg": [ 434.2, 263.1496912405561 ], "wc_reply_reviewers_avg": [ 49.6, 99.20000000000002 ], "wc_reply_authors_avg": [ 653.4, 326.74675208791285 ], "reply_reviewers_avg": [ 0.4, 0.8000000000000002 ], "reply_authors_avg": [ 1.8, 1.1661903789690604 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8327041650040515, "corr_recommendation_correctness": 0.767868896042439, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3253460158169964748&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "EPFL;Korea University", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.korea.ac.kr", "aff_unique_abbr": "EPFL;KU", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Switzerland;South Korea" }, { "title": "Neuromechanical Autoencoders: Learning to Couple Elastic and Neural Network Nonlinearity", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11126", "id": "QubsmJT_A0", "poster": "", "openreview": "https://openreview.net/forum?id=QubsmJT_A0", "slides": "https://iclr.cc/virtual/2023/poster/11126", "video": "https://iclr.cc/virtual/2023/poster/11126", "author_site": "Deniz Oktay, Mehran Mirramezani, Eder Medina, Ryan P Adams", "tldr": "We introduce Neuromechanical Autoencoders, a framework for co-design of neural network and mechanical metamaterials for performing morphological computation.", "abstract": "Intelligent biological systems are characterized by their embodiment in a complex environment and the intimate interplay between their nervous systems and the nonlinear mechanical properties of their bodies. This coordination, in which the dynamics of the motor system co-evolved to reduce the computational burden on the brain, is referred to as \"mechanical intelligence\" or \"morphological computation\". In this work, we seek to develop machine learning analogs of this process, in which we jointly learn the morphology of complex nonlinear elastic solids along with a deep neural network to control it. By using a specialized differentiable simulator of elastic mechanics coupled to conventional deep learning architectures---which we refer to as neuromechanical autoencoders---we are able to learn to perform morphological computation via gradient descent. Key to our approach is the use of mechanical metamaterials---cellular solids, in particular---as the morphological substrate. Just as deep neural networks provide flexible and massively-parametric function approximators for perceptual and control tasks, cellular solid metamaterials are promising as a rich and learnable space for approximating a variety of actuation tasks. In this work we take advantage of these complementary computational concepts to co-design materials and neural network controls to achieve nonintuitive mechanical behavior. We demonstrate in simulation how it is possible to achieve translation, rotation, and shape matching, as well as a \"digital MNIST\" task. We additionally manufacture and evaluate one of the designs to verify its real-world behavior.\n", "keywords": "morphological computation;mechanical metamaterials;computational mechanics;mechanical co-design;automatic differentiation;differentiable simulation", "primary_area": "", "supplementary_material": "/attachment/91d23574b61b3876ac2bed58ed46c39c43073be9.zip", "author": "Deniz Oktay;Mehran Mirramezani;Eder Medina;Ryan P Adams", "authorids": "~Deniz_Oktay2;~Mehran_Mirramezani1;~Eder_Medina1;~Ryan_P_Adams1", "gender": "M;M;;M", "homepage": "https://www.cs.princeton.edu/~doktay/;;https://medinaeder.github.io;http://www.cs.princeton.edu/~rpa/", "dblp": ";;;32/909", "google_scholar": "NQ1BBEwAAAAJ;IZm1HGsAAAAJ;;grQ_GBgAAAAJ", "orcid": ";;;", "linkedin": "deniz-oktay-65b20a53;;;", "or_profile": "~Deniz_Oktay2;~Mehran_Mirramezani1;~Eder_Medina1;~Ryan_P_Adams1", "aff": "Princeton University;Princeton University;Department of Computer Science, Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;cs.princeton.edu;princeton.edu", "position": "PhD student;Postdoc;Postdoc;Professor", "bibtex": "@inproceedings{\noktay2023neuromechanical,\ntitle={Neuromechanical Autoencoders: Learning to Couple Elastic and Neural Network Nonlinearity},\nauthor={Deniz Oktay and Mehran Mirramezani and Eder Medina and Ryan P Adams},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=QubsmJT_A0}\n}", "github": "", "project": "", "reviewers": "twBy;fAWH;S3up;Fw7y", "pdf_size": 9704388, "recommendation": "6;8;8;8", "confidence": "5;3;3;2", "correctness": "3;4;4;4", "technical_novelty": "2;4;4;4", "empirical_novelty": "3;4;4;4", "wc_summary_paper": "146;174;73;101", "wc_strength_and_weaknesses": "151;76;179;98", "wc_clarity_quality_novelty_and_reproducibility": "27;115;67;193", "wc_summary_review": "45;10;91;96", "wc_review": "369;375;410;488", "wc_reply_reviewers": "125;0;54;0", "wc_reply_authors": "755;232;118;254", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 123.5, 39.092838218783754 ], "wc_strength_and_weaknesses_avg": [ 126.0, 40.98170323449234 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 100.5, 61.82839153657485 ], "wc_summary_review_avg": [ 60.5, 35.28809997718778 ], "wc_review_avg": [ 410.5, 47.40516849458506 ], "wc_reply_reviewers_avg": [ 44.75, 51.30972130113357 ], "wc_reply_authors_avg": [ 339.75, 245.23904154926066 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.9271726499455306, "corr_recommendation_correctness": 1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9276156223305376766&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=QubsmJT_A0", "email": "princeton.edu;princeton.edu;cs.princeton.edu;princeton.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "QugfmhDu5Y4", "title": "Self-Paced Learning Enhanced Physics-informed Neural Networks for Solving Partial Differential Equations", "track": "main", "status": "Reject", "tldr": "", "abstract": "There is a hit discussion on solving partial differential equation by neural network. The famous PINN (physics-informed neural networks) has drawn worldwide attention since it was put forward. Despite its success in solving nonlinear partial differential equation, the difficulty in converging and the inefficiency in training process are definitely huge concerns. Normally, data for PINN is randomly chosen for a given distribution. Additionally, it's fitted to a model in a meaningless way. Curriculum Learning is a learning strategy that trains a model from easy samples to hard ones, which represents the meaningful human learning order. Self-paced Learning (SPL) is one of the significant branches of Automatic Curriculum Learning, which takes example-wise the training loss as Difficulty Measurer. SPL is an efficient strategy in enhancing the convergence rate of numerous models. In this paper, we propose a novel SPL-PINN learning framework, with SPL to accelerate the convergence progress of PINN. We demonstrate the effectiveness of SPL-PINN in a typical parabolic equation and Burgers equation. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Xiaoting Han", "authorids": "~Xiaoting_Han1", "gender": "F", "homepage": "https://github.com/Hantinging", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Xiaoting_Han1", "aff": "Peking University", "aff_domain": "pku.edu.cn", "position": "MS student", "bibtex": "@misc{\nhan2023selfpaced,\ntitle={Self-Paced Learning Enhanced Physics-informed Neural Networks for Solving Partial Differential Equations},\nauthor={Xiaoting Han},\nyear={2023},\nurl={https://openreview.net/forum?id=QugfmhDu5Y4}\n}", "github": "", "project": "", "reviewers": "WN4N;VxbH;kVD8;mRmU", "site": "https://openreview.net/forum?id=QugfmhDu5Y4", "pdf_size": 451468, "recommendation": "1;1;3;3", "confidence": "5;4;4;4", "correctness": "2;1;3;3", "technical_novelty": "1;2;2;2", "empirical_novelty": "0;1;2;2", "wc_summary_paper": "29;26;70;38", "wc_strength_and_weaknesses": "38;50;67;62", "wc_clarity_quality_novelty_and_reproducibility": "10;62;24;18", "wc_summary_review": "26;6;7;10", "wc_review": "103;144;168;128", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.25, 0.82915619758885 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 40.75, 17.455300054711177 ], "wc_strength_and_weaknesses_avg": [ 54.25, 11.233320969330485 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.5, 19.96872554771586 ], "wc_summary_review_avg": [ 12.25, 8.073877630977572 ], "wc_review_avg": [ 135.75, 23.668280461410795 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1qxXFydQRy0J:scholar.google.com/&scioq=Self-Paced+Learning+Enhanced+Physics-informed+Neural+Networks+for+Solving+Partial+Differential+Equations&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "QutyHwpIKVw", "title": "MultiQuan RDP: Rate-Distortion-Perception Coding via Offset Quantizers", "track": "main", "status": "Withdraw", "tldr": "We propose the MultiQuan quantizers interpolating between single quantizer and dithered quantization for rate-distortion-perception coding.", "abstract": "The rate-distortion-perception (RDP) framework has attracted significant recent attention due to its application in neural compression. It is important to understand the underlying mechanism connecting procedures with common randomness and those without. Different from previous efforts, we study this problem from a quantizer design perspective. By analyzing an idealized setting, we provide an interpretation on the advantage of dithered quantization in the RDP setting, which further allows us to make a conceptual connection between randomized (dithered) quantizers and quantizers without common randomness. This new understanding leads to a new procedure for RDP coding based on multiple quantizers with offsets. Though the procedure can be viewed as intermediates between the two extremes, its explicit structure can be advantageous in some cases. Experimental results are given on both simple data sources and images to illustrate its behavior. ", "keywords": "information theory;quantization;rate-distortion-perception;compression", "primary_area": "", "supplementary_material": "", "author": "Ruida Zhou;Chao Tian", "authorids": "~Ruida_Zhou1;~Chao_Tian2", "gender": "M;", "homepage": "https://sites.google.com/view/ruida-zhou;", "dblp": "215/2026;", "google_scholar": "kXbo1twAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Ruida_Zhou1;~Chao_Tian2", "aff": "Texas A&M University;", "aff_domain": "tamu.edu;", "position": "PhD student;", "bibtex": "@misc{\nzhou2023multiquan,\ntitle={MultiQuan {RDP}: Rate-Distortion-Perception Coding via Offset Quantizers},\nauthor={Ruida Zhou and Chao Tian},\nyear={2023},\nurl={https://openreview.net/forum?id=QutyHwpIKVw}\n}", "github": "", "project": "", "reviewers": "SjAK;9kVW;yp4L;GVk8", "site": "https://openreview.net/forum?id=QutyHwpIKVw", "pdf_size": 1085187, "recommendation": "3;3;5;6", "confidence": "3;4;3;3", "correctness": "4;4;3;4", "technical_novelty": "2;2;4;4", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "124;154;113;52", "wc_strength_and_weaknesses": "494;429;195;379", "wc_clarity_quality_novelty_and_reproducibility": "79;38;109;37", "wc_summary_review": "43;32;43;34", "wc_review": "740;653;460;502", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 1.0 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 110.75, 37.09026152509578 ], "wc_strength_and_weaknesses_avg": [ 374.25, 111.23258290626897 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.75, 30.177599308096063 ], "wc_summary_review_avg": [ 38.0, 5.049752469181039 ], "wc_review_avg": [ 588.75, 113.0340103685612 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.5555555555555555, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QB8zNPi613kJ:scholar.google.com/&scioq=MultiQuan+RDP:+Rate-Distortion-Perception+Coding+via+Offset+Quantizers&hl=en&as_sdt=0,48", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "QvIyd7l718", "title": "Beyond Link Prediction: On Pre-Training Knowledge Graph Embeddings", "track": "main", "status": "Reject", "tldr": "", "abstract": "Knowledge graph embeddings (KGE) models provide low-dimensional representations of the entities and relations in a knowledge graph (KG). Most prior work focused on training and evaluating KGE models for the task of link prediction; the question of whether or not KGE models provide useful representations more generally remains largely open. In this work, we explore the suitability of KGE models (i) for more general graph-structure prediction tasks and (ii) for downstream tasks such as entity classification. For (i), we found that commonly trained KGE models often perform poorly at structural tasks other than link prediction. Based on this observation, we propose a more general multi-task training approach, which includes additional self-supervised tasks such as neighborhood prediction or domain prediction. In our experiments, these multi-task KGE models showed significantly better overall performance for structural prediction tasks. For (ii), we investigate whether KGE models provide useful features for a variety of downstream tasks. Here we view KGE models as a form of self-supervised pre-training and study the impact of both model training and model selection on downstream task performance. We found that multi-task pre-training can (but does not always) significantly improve performance and that KGE models can (but do not always) compete with or even outperform task-specific GNNs trained in a supervised fashion. Our work suggests that more research is needed on how to pre-train KGE models and on their suitability for downstream applications.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Daniel Ruffinelli;Rainer Gemulla", "authorids": "~Daniel_Ruffinelli1;~Rainer_Gemulla1", "gender": "M;M", "homepage": "https://www.uni-mannheim.de/dws/people/researchers/postdoctoral-research-fellows/daniel-ruffinelli/;https://dws.informatik.uni-mannheim.de/en/people/professors/prof-dr-rainer-gemulla/", "dblp": "194/1649;32/5357", "google_scholar": "4p48uSoAAAAJ;https://scholar.google.de/citations?user=OnKo6KkAAAAJ", "orcid": ";0000-0003-2762-0050", "linkedin": "daniel-ruffinelli;", "or_profile": "~Daniel_Ruffinelli1;~Rainer_Gemulla1", "aff": "University of Mannheim;Universit\u00e4t Mannheim, Germany", "aff_domain": "uni-mannheim.de;uni-mannheim.de", "position": "PhD student;Full Professor", "bibtex": "@misc{\nruffinelli2023beyond,\ntitle={Beyond Link Prediction: On Pre-Training Knowledge Graph Embeddings},\nauthor={Daniel Ruffinelli and Rainer Gemulla},\nyear={2023},\nurl={https://openreview.net/forum?id=QvIyd7l718}\n}", "github": "", "project": "", "reviewers": "x7Bp;RUJy;1Rac", "site": "https://openreview.net/forum?id=QvIyd7l718", "pdf_size": 241602, "recommendation": "5;5;6", "confidence": "4;4;3", "correctness": "3;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "119;77;77", "wc_strength_and_weaknesses": "88;362;260", "wc_clarity_quality_novelty_and_reproducibility": "39;98;63", "wc_summary_review": "324;61;52", "wc_review": "570;598;452", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "676;681;440", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 91.0, 19.79898987322333 ], "wc_strength_and_weaknesses_avg": [ 236.66666666666666, 113.07028296103663 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.66666666666667, 24.225789747475496 ], "wc_summary_review_avg": [ 145.66666666666666, 126.15422659938466 ], "wc_review_avg": [ 540.0, 63.26663154196426 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 599.0, 112.44850673382314 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.9999999999999997, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14836808735194523329&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Mannheim;Universit\u00e4t Mannheim", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-mannheim.de;https://www.uni-mannheim.de", "aff_unique_abbr": "UM;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "QwFw-CcUb10", "title": "Self-Architectural Knowledge Distillation for Spiking Neural Networks", "track": "main", "status": "Withdraw", "tldr": "We propose a Self-Architectural Knowledge Distillation framework (SAKD), which matches the knowledge (i.e., the features and logits) of ANNs to that of SNNs with the same architecture. ", "abstract": "Brain-inspired spiking neural networks (SNNs) have drawn wide attention recently since they are biologically plausible and neuromorphic hardware-friendly. To obtain low-latency (i.e., a small number of timesteps) SNNs, the surrogate gradients (SG) method has been widely applied. However, SNNs trained by the SG method still have a huge performance gap from artificial neural networks (ANNs). In this paper, we find that the knowledge distillation paradigm can effectively alleviate the performance gap by transferring the knowledge from ANNs (teacher) to SNNs (student), but it remains a problem to find the architecture of teacher-student pairs. We introduce neural architecture search (NAS) and find that the performance is insensitive to the architectures of SNNs. Hence, we choose the same architecture for ANN-teacher and SNN-student since it is easy to implement and the student can initiate the weight from the teacher. We thus propose a Self-Architectural Knowledge Distillation framework (SAKD), which matches the knowledge (i.e., the features and logits) of ANNs to that of SNNs with the same architecture. Although adopting a teacher model in training, SNNs trained via our SAKD still keep ultra-low latency (T=4) compared with other methods and achieve state-of-the-art performance on a variety of datasets (e.g., CIFAR-10, CIFAR-100, ImageNet, and DVS-CIFAR10), and we demonstrate that this simple training strategy can provide a new training paradigm of SNNs.\n", "keywords": "Spiking Neural Networks;Knowledge Distillation;Neural Architecture Search", "primary_area": "", "supplementary_material": "/attachment/edabd45b9fb2a5d806f0c6674bedc4663f7811ec.zip", "author": "Haonan Qiu;Munan Ning;Li Yuan;Wei Fang;Yanqi Chen;Changlin Li;Tao Sun;Zhengyu Ma;Yonghong Tian", "authorids": "~Haonan_Qiu4;~Munan_Ning1;~Li_Yuan2;~Wei_Fang2;~Yanqi_Chen1;~Changlin_Li2;~Tao_Sun11;~Zhengyu_Ma1;~Yonghong_Tian1", "gender": "M;M;;;M;M;;;M", "homepage": "https://github.com/Maybe2022;;;https://fangwei123456.github.io/;;;https://github.com/guidingstar123;;http://www.pkuml.org", "dblp": ";214/9635;;;284/9379;;;;86/5857", "google_scholar": ";zdBKgeUAAAAJ;;https://scholar.google.com.hk/citations?user=e2lED2gAAAAJ;QzFrppAAAAAJ;https://scholar.google.com/citations?hl=en;;;https://scholar.google.com/citations?hl=en", "orcid": ";;;;0000-0002-7658-9259;;;;0000-0002-2978-5935", "linkedin": ";;;;;;;;", "or_profile": "~Haonan_Qiu4;~Munan_Ning1;~Li_Yuan2;~Wei_Fang2;~Yanqi_Chen1;~Changlin_Li2;~Tao_Sun11;~Zhengyu_Ma1;~Yonghong_Tian1", "aff": "Peking University;Peking University;;School of Computer Science, Peking University;Peking University;University of Technology Sydney;Peking University;;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;;pku.edu.cn;pku.edu.cn;uts.edu.au;pku.edu.cn;;pku.edu.cn", "position": "MS student;PhD student;;PhD student;PhD student;PhD student;MS student;;Full Professor", "bibtex": "@misc{\nqiu2023selfarchitectural,\ntitle={Self-Architectural Knowledge Distillation for Spiking Neural Networks},\nauthor={Haonan Qiu and Munan Ning and Li Yuan and Wei Fang and Yanqi Chen and Changlin Li and Tao Sun and Zhengyu Ma and Yonghong Tian},\nyear={2023},\nurl={https://openreview.net/forum?id=QwFw-CcUb10}\n}", "github": "", "project": "", "reviewers": "dL3g;eEn5;7tY2;Tf9h", "site": "https://openreview.net/forum?id=QwFw-CcUb10", "pdf_size": 1938622, "recommendation": "5;5;5;5", "confidence": "5;5;4;4", "correctness": "3;3;2;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "3;2;3;2", "wc_summary_paper": "122;39;63;66", "wc_strength_and_weaknesses": "158;173;138;82", "wc_clarity_quality_novelty_and_reproducibility": "86;20;14;21", "wc_summary_review": "81;17;29;30", "wc_review": "447;249;244;199", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 72.5, 30.434355587066403 ], "wc_strength_and_weaknesses_avg": [ 137.75, 34.499094191007394 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.25, 29.422567868899545 ], "wc_summary_review_avg": [ 39.25, 24.641174890820444 ], "wc_review_avg": [ 284.75, 95.67751825794814 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5119173444927856937&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Peking University;University of Technology Sydney", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.uts.edu.au", "aff_unique_abbr": "Peking U;UTS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "China;Australia" }, { "title": "Coverage-centric Coreset Selection for High Pruning Rates", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11710", "id": "QwKvL6wC8Yi", "poster": "", "openreview": "https://openreview.net/forum?id=QwKvL6wC8Yi", "slides": "https://iclr.cc/virtual/2023/poster/11710", "video": "https://iclr.cc/virtual/2023/poster/11710", "author_site": "Haizhong Zheng, Rui Liu, Fan Lai, Atul Prakash", "tldr": "We study the importance of data coverage in coreset selection and propose a coverage-centric method for coreset selection, which we show achieves significantly better accuracy than SOTA methods with high pruning rates.", "abstract": "One-shot coreset selection aims to select a representative subset of the training data, given a pruning rate, that can later be used to train future models while retaining high accuracy. State-of-the-art coreset selection methods pick the highest importance examples based on an importance metric and are found to perform well at low pruning rates. However, at high pruning rates, they suffer from a catastrophic accuracy drop, performing worse than even random sampling. This paper explores the reasons behind this accuracy drop both theoretically and empirically. We first propose a novel metric to measure the coverage of a dataset on a specific distribution by extending the classical geometric set cover problem to a distribution cover problem. This metric helps explain why coresets selected by SOTA methods at high pruning rates perform poorly compared to random sampling because of worse data coverage. We then propose a novel one-shot coreset selection method, Coverage-centric Coreset Selection (CCS), that jointly considers overall data coverage upon a distribution as well as the importance of each example. We evaluate CCS on five datasets and show that, at high pruning rates (e.g., 90%), it achieves significantly better accuracy than previous SOTA methods (e.g., at least 19.56% higher on CIFAR10) as well as random selection (e.g., 7.04% higher on CIFAR10) and comparable accuracy at low pruning rates. We make our code publicly available at https://github.com/haizhongzheng/Coverage-centric-coreset-selection.", "keywords": "Coreset;Data coverage;Data pruning", "primary_area": "", "supplementary_material": "/attachment/1d9da35c3db97aaaf9628b4aca14f9d7e0d33c41.zip", "author": "Haizhong Zheng;Rui Liu;Fan Lai;Atul Prakash", "authorids": "~Haizhong_Zheng1;~Rui_Liu6;~Fan_Lai1;~Atul_Prakash1", "gender": "M;;;", "homepage": "http://zhenghaizhong.com/;;https://fanlai.me/;https://www.eecs.umich.edu/~aprakash", "dblp": "158/4817;42/469-13;179/2330;p/AtulPrakash", "google_scholar": "Zx6pKsQAAAAJ;hMR1iP4AAAAJ;PlWcEMsAAAAJ;kIkHa2IAAAAJ", "orcid": "0000-0003-3723-8701;;;0000-0002-4907-3687", "linkedin": "haizhong-zheng-1093a0a7/;;;atul-prakash-8729a44/", "or_profile": "~Haizhong_Zheng1;~Rui_Liu6;~Fan_Lai1;~Atul_Prakash1", "aff": "University of Michigan;Meta;University of Michigan;University of Michigan", "aff_domain": "umich.edu;meta.com;umich.edu;umich.edu", "position": "PhD student;Researcher;PhD student;Professor", "bibtex": "@inproceedings{\nzheng2023coveragecentric,\ntitle={Coverage-centric Coreset Selection for High Pruning Rates},\nauthor={Haizhong Zheng and Rui Liu and Fan Lai and Atul Prakash},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=QwKvL6wC8Yi}\n}", "github": "", "project": "", "reviewers": "Vae7;f2Gh;6QHN;17rD", "pdf_size": 500858, "recommendation": "5;5;6;6", "confidence": "4;4;3;4", "correctness": "3;3;3;4", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;0;3;4", "wc_summary_paper": "63;375;346;173", "wc_strength_and_weaknesses": "269;132;360;369", "wc_clarity_quality_novelty_and_reproducibility": "11;40;289;78", "wc_summary_review": "17;31;36;174", "wc_review": "360;578;1031;794", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1026;945;1492;1572", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;3;2", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 1.5 ], "wc_summary_paper_avg": [ 239.25, 127.74657529656128 ], "wc_strength_and_weaknesses_avg": [ 282.5, 95.29034578591894 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 104.5, 109.13867325563382 ], "wc_summary_review_avg": [ 64.5, 63.602279833351886 ], "wc_review_avg": [ 690.75, 249.26830424263733 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1258.75, 276.1986377591316 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 68, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13599577988155534566&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=QwKvL6wC8Yi", "email": "umich.edu;meta.com;umich.edu;umich.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Michigan;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.umich.edu;https://meta.com", "aff_unique_abbr": "UM;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "QwqxO8URJzn", "title": "$\\sigma$Reparam: Stable Transformer Training with Spectral Reparametrization", "track": "main", "status": "Reject", "tldr": "We introduce a weight reparameterization method which stabilizes transformer training across a variety of domains and setups, enabling simpler training recipes and robustness to hyperparameters without performance tradeoffs.", "abstract": "Training stability is of great importance to Transformers. In this work, we investigate the training dynamics of Transformers by examining the evolution of the attention layers. In particular, we track the \"attention entropy\" for each attention head during the course of training, which is a proxy of the attention's sharpness. We observe a common, non monotonic evolution of attention entropy across different settings: the attention entropy first quickly decreases in the initial phase of training, followed by quickly increasing, and finally entering a long stable phase. While the exact shape can be affected by hyperparameters such as warmup, initialization, learning rate etc., we found that there is a close correlation between the minima of attention entropy and the model's training stability. To this end, we propose a simple and efficient solution dubbed $\\sigma$Reparam, where we reparametrize all linear layers with Spectral Normalization and an additional learned scalar. We provide a lower bound on the attention entropy as a function of the spectral norms of the query and key projections, which suggests that small attention entropy can be obtained with large spectral norms. $\\sigma$Reparam decouples the growth rate of a weight matrix's spectral norm from its dimensionality, which we verify empirically. We conduct experiments with $\\sigma$Reparam on image classification, image self supervised learning, automatic speech recognition and language modeling tasks. We show that $\\sigma$Reparam provides great stability and robustness with respect to the choice of hyperparameters.", "keywords": "Transformers;self-attention;optimization;stability;spectral normalization;self-supervised learning;vision;speech;language;contrastive learning", "primary_area": "", "supplementary_material": "/attachment/0a7a92aadd6617152cf8a8a3dcec5b77708556d9.zip", "author": "Shuangfei Zhai;Tatiana Likhomanenko;Etai Littwin;Jason Ramapuram;Dan Busbridge;Yizhe Zhang;Jiatao Gu;Joshua M. Susskind", "authorids": "~Shuangfei_Zhai3;~Tatiana_Likhomanenko1;~Etai_Littwin1;~Jason_Ramapuram1;~Dan_Busbridge1;~Yizhe_Zhang2;~Jiatao_Gu1;~Joshua_M._Susskind1", "gender": "M;F;M;M;M;M;M;M", "homepage": "http://cs.binghamton.edu/~szhai2;https://github.com/tlikhomanenko/tlikhomanenko;;http://jramapuram.github.io;https://github.com/dbusbridge;https://dreasysnail.github.io;http://jiataogu.me;http://www.apple.com", "dblp": ";202/2094;;200/8958;220/3480;132/4966-2.html;164/5848.html;132/7797", "google_scholar": "G6vdBYsAAAAJ;https://scholar.google.ru/citations?user=x7Z3ysQAAAAJ;NOVS7vwAAAAJ;U-MT4IsAAAAJ;https://scholar.google.co.uk/citations?user=CvA9jjMAAAAJ;WDVMfggAAAAJ;https://scholar.google.com.sg/citations?user=cB1mFBsAAAAJ;Sv2TGqsAAAAJ", "orcid": ";0000-0003-0351-9839;;;0000-0002-2178-6917;;;", "linkedin": ";;;jramapuram/;danbusbridge/;;jiatao-gu-204b2672/;joshua-susskind-8ab2ab5/", "or_profile": "~Shuangfei_Zhai3;~Tatiana_Likhomanenko1;~Etai_Littwin1;~Jason_Ramapuram1;~Dan_Busbridge1;~Yizhe_Zhang2;~Jiatao_Gu1;~Joshua_M._Susskind1", "aff": "Apple;Apple;Apple;Apple;Apple;Apple;Apple;Apple", "aff_domain": "apple.com;apple.com;apple.com;apple.com;apple.com;apple.com;apple.com;apple.com", "position": "Research Scientist;Research Scientist;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher", "bibtex": "@misc{\nzhai2023sigmareparam,\ntitle={\\${\\textbackslash}sigma\\$Reparam: Stable Transformer Training with Spectral Reparametrization},\nauthor={Shuangfei Zhai and Tatiana Likhomanenko and Etai Littwin and Jason Ramapuram and Dan Busbridge and Yizhe Zhang and Jiatao Gu and Joshua M. Susskind},\nyear={2023},\nurl={https://openreview.net/forum?id=QwqxO8URJzn}\n}", "github": "", "project": "", "reviewers": "qd4k;fFS8;3X2m;janJ", "site": "https://openreview.net/forum?id=QwqxO8URJzn", "pdf_size": 919991, "recommendation": "3;3;3;6", "confidence": "4;4;4;2", "correctness": "3;2;2;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "109;59;103;93", "wc_strength_and_weaknesses": "287;61;165;353", "wc_clarity_quality_novelty_and_reproducibility": "6;243;79;112", "wc_summary_review": "28;63;4;160", "wc_review": "430;426;351;718", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1373;1091;910;808", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;1", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 91.0, 19.339079605813716 ], "wc_strength_and_weaknesses_avg": [ 216.5, 112.2886904367488 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 110.0, 85.83414239100895 ], "wc_summary_review_avg": [ 63.75, 59.39854796205039 ], "wc_review_avg": [ 481.25, 140.26292275580172 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1045.5, 214.53030089010736 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.8703882797784892, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RZB1ZSGm5MkJ:scholar.google.com/&scioq=%24%5Csigma%24Reparam:+Stable+Transformer+Training+with+Spectral+Reparametrization&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Apple", "aff_unique_dep": "Apple Inc.", "aff_unique_url": "https://www.apple.com", "aff_unique_abbr": "Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Qx0vjIvlkev", "title": "A Deep Dive into the Stability-Plasticity Dilemma in Class-Incremental Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "A fundamental objective in class-incremental learning is to strike a balance between stability and plasticity, where models should be both stable enough to retain knowledge learnt from previously seen classes, and plastic enough to learn concepts from new classes. While previous works demonstrate strong performance on class-incremental benchmarks, it is not clear whether their success comes from the models being stable, plastic, or a mixture of both. In this paper we aim to shed light on how effectively recent class-incremental learning algorithms address the stability-plasticity trade-off. We establish analytical tools that help measure the stability and plasticity feature representations, and employ such tools to investigate models trained with various class-incremental algorithms on large-scale class-incremental benchmarks. Surprisingly, we find that the majority of class-incremental algorithms heavily favor stability over plasticity, to the extent that the feature extractor of a model trained on the initial set of classes is no less effective than that of the final incremental model. Our observations not only inspire two simple algorithms that highlight the importance of analyzing feature representations, but also suggest that class-incremental research, in general, should strive for better feature representation learning.", "keywords": "continual learning;class-incremental learning;analysis", "primary_area": "", "supplementary_material": "/attachment/532e4795133c97b6b93693c01d891c9c292f6ae2.zip", "author": "Dongwan Kim;Bohyung Han", "authorids": "~Dongwan_Kim1;~Bohyung_Han1", "gender": "M;Not Specified", "homepage": "https://numpee.com;http://cvlab.snu.ac.kr/~bhhan", "dblp": "79/8174;73/4880.html", "google_scholar": "IfTIHT0AAAAJ;9aaeCToAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Dongwan_Kim1;~Bohyung_Han1", "aff": "NEC Labs;Seoul National University", "aff_domain": "nec-labs.com;snu.ac.kr", "position": "Intern;Full Professor", "bibtex": "@misc{\nkim2023a,\ntitle={A Deep Dive into the Stability-Plasticity Dilemma in Class-Incremental Learning},\nauthor={Dongwan Kim and Bohyung Han},\nyear={2023},\nurl={https://openreview.net/forum?id=Qx0vjIvlkev}\n}", "github": "", "project": "", "reviewers": "LWRg;rsKJ;fUvi", "site": "https://openreview.net/forum?id=Qx0vjIvlkev", "pdf_size": 865089, "recommendation": "3;3;5", "confidence": "5;4;4", "correctness": "2;2;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "137;35;58", "wc_strength_and_weaknesses": "336;565;110", "wc_clarity_quality_novelty_and_reproducibility": "79;22;79", "wc_summary_review": "44;33;81", "wc_review": "596;655;328", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 76.66666666666667, 43.683202975768864 ], "wc_strength_and_weaknesses_avg": [ 337.0, 185.754318029667 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.0, 26.870057685088806 ], "wc_summary_review_avg": [ 52.666666666666664, 20.531818125912658 ], "wc_review_avg": [ 526.3333333333334, 142.29624809608376 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SYVwFrOf7ioJ:scholar.google.com/&scioq=A+Deep+Dive+into+the+Stability-Plasticity+Dilemma+in+Class-Incremental+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "NEC Laboratories;Seoul National University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nec-labs.com;https://www.snu.ac.kr", "aff_unique_abbr": "NEC Labs;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;South Korea" }, { "id": "Qx8lUU8CzQ", "title": "VectorMapNet: End-to-end Vectorized HD Map Learning", "track": "main", "status": "Reject", "tldr": "We proposed an end-to-end method that directly generate vectorized map from sensor data.", "abstract": "Autonomous driving systems require a good understanding of surrounding environments, including moving obstacles and static High-Definition (HD) semantic map elements. Existing methods approach the semantic map problem by offline manual annotation, which suffers from serious scalability issues. Recent learning-based methods produce dense rasterized segmentation predictions to construct maps. However, these predictions do not include instance information of individual map elements and require heuristic post-processing to obtain vectorized maps. To tackle these challenges, we introduce an end-to-end vectorized HD map learning pipeline, termed VectorMapNet. VectorMapNet takes onboard sensor observations and predicts a sparse set of polylines in the bird's-eye view. This pipeline can explicitly model the spatial relation between map elements and generate vectorized maps that are friendly to downstream autonomous driving tasks. Extensive experiments show that VectorMapNet achieve strong map learning performance on both nuScenes and Argoverse2 dataset, surpassing previous state-of-the-art methods by 14.2 mAP and 14.6mAP. Qualitatively, we also show that VectorMapNet is capable of generating comprehensive maps and capturing more fine-grained details of road geometry. \nTo the best of our knowledge, VectorMapNet is the first work designed towards end-to-end vectorized map learning from onboard sensors.", "keywords": "Autonomous Driving;Map Learning;Transformer", "primary_area": "", "supplementary_material": "/attachment/c82ec73943bb2de3c9f7dc6e45fb08d151cc9e20.zip", "author": "Yicheng Liu;Tianyuan Yuan;Yue Wang;Yilun Wang;Hang Zhao", "authorids": "~Yicheng_Liu2;~Tianyuan_Yuan1;~Yue_Wang2;~Yilun_Wang1;~Hang_Zhao1", "gender": "M;M;M;;M", "homepage": "https://mrmoore98.github.io/liuyicheng/;;https://yuewang.xyz;;http://www.mit.edu/~hangzhao/", "dblp": ";344/9098;33/4822-41;;", "google_scholar": "vRmsgQUAAAAJ;https://scholar.google.com/citations?hl=en;v-AEFIEAAAAJ;https://scholar.google.com.hk/citations?hl=en;DmahiOYAAAAJ", "orcid": "0000-0003-3211-3088;;;;", "linkedin": ";;;yilunw/;", "or_profile": "~Yicheng_Liu2;~Tianyuan_Yuan1;~Yue_Wang2;~Yilun_Wang1;~Hang_Zhao1", "aff": "Tsinghua University;Tsinghua University;NVIDIA;Li Auto;Tsinghua University", "aff_domain": "mail.tsinghua.edu.cn;mails.tsinghua.edu.cn;nvidia.com;lixiang.com;tsinghua.edu.cn", "position": "PhD student;PhD student;Researcher;Researcher;Assistant Professor", "bibtex": "@misc{\nliu2023vectormapnet,\ntitle={VectorMapNet: End-to-end Vectorized {HD} Map Learning},\nauthor={Yicheng Liu and Tianyuan Yuan and Yue Wang and Yilun Wang and Hang Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=Qx8lUU8CzQ}\n}", "github": "", "project": "", "reviewers": "oajd;sk1h;TD4k;5FhD", "site": "https://openreview.net/forum?id=Qx8lUU8CzQ", "pdf_size": 11894680, "recommendation": "3;5;6;8", "confidence": "5;3;3;3", "correctness": "2;4;3;4", "technical_novelty": "3;3;3;4", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "74;60;115;67", "wc_strength_and_weaknesses": "157;156;299;118", "wc_clarity_quality_novelty_and_reproducibility": "180;35;25;21", "wc_summary_review": "64;9;46;23", "wc_review": "475;260;485;229", "wc_reply_reviewers": "0;184;0;0", "wc_reply_authors": "1675;705;721;458", "reply_reviewers": "0;1;0;0", "reply_authors": "3;1;1;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 79.0, 21.365860619221497 ], "wc_strength_and_weaknesses_avg": [ 182.5, 69.07423542826949 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 65.25, 66.44687727801812 ], "wc_summary_review_avg": [ 35.5, 21.10094784600919 ], "wc_review_avg": [ 362.25, 118.31182316235348 ], "wc_reply_reviewers_avg": [ 46.0, 79.67433714816836 ], "wc_reply_authors_avg": [ 889.75, 465.1974715107553 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8006407690254357, "corr_recommendation_correctness": 0.7526178090063818, "gs_citation": 241, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3228071181816153371&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Tsinghua University;NVIDIA;Li Auto", "aff_unique_dep": ";NVIDIA Corporation;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.nvidia.com;https://www.liauto.com", "aff_unique_abbr": "THU;NVIDIA;Li Auto", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United States" }, { "id": "Qyz2cMy-ty6", "title": "A New Paradigm for Federated Structure Non-IID Subgraph Learning", "track": "main", "status": "Reject", "tldr": "The first attempt to investigate the structure non-iid problem in federated subgraph learning.", "abstract": "Federated graph learning (FGL), a distributed training framework for graph neural networks (GNNs) has attracted much attention for breaking the centralized machine learning assumptions. Despite its effectiveness, the differences in data collection perspectives and quality lead to the challenges of heterogeneity, especially the domain-specific graph is partitioned into subgraphs in different institutions. However, existing FGL methods implement graph data augmentation or personalization with community split which follows the cluster homogeneity assumptions. Hence we investigate the above issues and suggest that subgraph heterogeneity is essentially the structure variations. From the observations on FGL, we first define the structure non-independent identical distribution (Non-IID) problem, which presents covariant shift challenges among client-wise subgraphs. Meanwhile, we propose a new paradigm for general federated data settings called Adaptive Federated Graph Learning (AdaFGL). The motivation behind it is to implement adaptive propagation mechanisms based on federated global knowledge and non-params label propagation. We conduct extensive experiments with community split and structure Non-IID settings, our approach achieves state-of-the-art performance on five benchmark datasets.", "keywords": "graph neural network;federated learning;structure non-iid subgraphs", "primary_area": "", "supplementary_material": "/attachment/9d1856847b853f885968b8d8a90daeadc474f664.zip", "author": "Xunkai Li;Wentao Zhang;Rong-Hua Li;Yulin Zhao;Yinlin Zhu;Guoren Wang", "authorids": "~Xunkai_Li1;~Wentao_Zhang1;~Rong-Hua_Li2;~Yulin_Zhao2;~Yinlin_Zhu1;~Guoren_Wang2", "gender": "M;;M;;M;M", "homepage": "https://xkli-allen.github.io/;;https://ronghuali.github.io/;;https://github.com/zylpopo;https://guorenwang.github.io/", "dblp": "275/2483;;37/548.html;;;", "google_scholar": "VfEdG18AAAAJ;;fOKGw-EAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-1230-7603;;0000-0002-3105-5325;;;", "linkedin": ";;;;;", "or_profile": "~Xunkai_Li1;~Wentao_Zhang1;~Rong-Hua_Li2;~Yulin_Zhao2;~Yinlin_Zhu1;~Guoren_Wang2", "aff": "Beijing Institute of Technology;;Beijing Institute of Technology;;Shandong University;Beijing Institute of Technology", "aff_domain": "bit.edu.cn;;bit.edu.cn;;sdu.edu.cn;bit.edu.cn", "position": "PhD student;;Full Professor;;Undergrad student;Full Professor", "bibtex": "@misc{\nli2023a,\ntitle={A New Paradigm for Federated Structure Non-{IID} Subgraph Learning},\nauthor={Xunkai Li and Wentao Zhang and Rong-Hua Li and Yulin Zhao and Yinlin Zhu and Guoren Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=Qyz2cMy-ty6}\n}", "github": "", "project": "", "reviewers": "fd7h;45Ek;3ztS", "site": "https://openreview.net/forum?id=Qyz2cMy-ty6", "pdf_size": 725482, "recommendation": "3;5;6", "confidence": "4;3;3", "correctness": "2;3;3", "technical_novelty": "3;2;2", "empirical_novelty": "3;2;3", "wc_summary_paper": "86;65;77", "wc_strength_and_weaknesses": "266;153;100", "wc_clarity_quality_novelty_and_reproducibility": "288;198;15", "wc_summary_review": "35;68;53", "wc_review": "675;484;245", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 76.0, 8.602325267042627 ], "wc_strength_and_weaknesses_avg": [ 173.0, 69.2290882986817 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 167.0, 113.58697108383514 ], "wc_summary_review_avg": [ 52.0, 13.490737563232042 ], "wc_review_avg": [ 468.0, 175.91096232658916 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.9449111825230683, "corr_recommendation_correctness": 0.9449111825230683, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14265081350149890117&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Beijing Institute of Technology;Shandong University", "aff_unique_dep": ";", "aff_unique_url": "http://www.bit.edu.cn/;http://www.sdu.edu.cn", "aff_unique_abbr": "BIT;SDU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "MaskFusion: Feature Augmentation for Click-Through Rate Prediction via Input-adaptive Mask Fusion", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11694", "id": "QzbKH8nNq_V", "poster": "", "openreview": "https://openreview.net/forum?id=QzbKH8nNq_V", "slides": "https://iclr.cc/virtual/2023/poster/11694", "video": "https://iclr.cc/virtual/2023/poster/11694", "author_site": "Chao Liao, Jianchao Tan, Jiyuan Jia, Yi Guo, Chengru Song", "tldr": "Feature Augmentation via Adaptive Mask Fusion", "abstract": "Click-through rate (CTR) prediction plays important role in the advertisement, recommendation, and retrieval applications. Given the feature set, how to fully utilize the information from the feature set is an active topic in deep CTR model designs. There are several existing deep CTR works focusing on feature interactions, feature attentions, and so on. They attempt to capture high-order feature interactions to enhance the generalization ability of deep CTR models. However, these works either suffer from poor high-order feature interaction modeling using DNN or ignore the balance between generalization and memorization during the recommendation. To mitigate these problems, we propose an adaptive feature fusion framework called MaskFusion, to additionally capture the explicit interactions between the input feature and the existing deep part structure of deep CTR models dynamically, besides the common feature interactions proposed in existing works. MaskFusion is an instance-aware feature augmentation method, which makes deep CTR models more personalized by assigning each feature with an instance-adaptive mask and fusing each feature with each hidden state vector in the deep part structure. MaskFusion can also be integrated into any existing deep CTR models flexibly. MaskFusion achieves state-of-the-art (SOTA) performance on all seven benchmarks deep CTR models with three public datasets.", "keywords": "Input-adaptive;Mask Fusion;Feature Augmentation;Click-Through rate prediction", "primary_area": "", "supplementary_material": "", "author": "Chao Liao;Jianchao Tan;Jiyuan Jia;Yi Guo;Chengru Song", "authorids": "~Chao_Liao2;~Jianchao_Tan1;~Jiyuan_Jia1;~Yi_Guo5;~Chengru_Song1", "gender": "M;M;M;M;M", "homepage": "https://github.com/TiaoziLiao;https://jianchaotan.github.io/;https://github.com/jiajiyuan;https://scholar.google.com/citations?hl=zh-TW&user=QJsDc4cAAAAJ&scilu=&scisig=AMD79ooAAAAAYPLWWnKq-cGkSB4FgGPptJ1acLO3qlTm&gmla=AJsN-F4AELcMHiSujlmlMcuAxp2jJcY1gzBiaBH_8Z1aeJHxzUmtdU_EkCgDU9bAP14zmfOqUCfD8bTmJ0tQ6msxfOTw29VqVqk7BQQ4PgfZSImX8K15aJh8R1-mcf78PztZmF-iBajj&sciund=11717972921654323345;", "dblp": ";165/9938;;;144/1365", "google_scholar": "xWoj6YsAAAAJ;1Gywy80AAAAJ;;;", "orcid": ";;;;", "linkedin": ";jianchao-tan-b58a96a7/;;;", "or_profile": "~Chao_Liao2;~Jianchao_Tan1;~Jiyuan_Jia1;~Yi_Guo5;~Chengru_Song1", "aff": "Kuaishou- \u5feb\u624b\u79d1\u6280;Kuaishou;;bytedance technology;Kuaishou- \u5feb\u624b\u79d1\u6280", "aff_domain": "kuaishou.com;kuaishou.com;;bytedance.com;kuaishou.com", "position": "Algorithm Engineer;Researcher;;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nliao2023maskfusion,\ntitle={MaskFusion: Feature Augmentation for Click-Through Rate Prediction via Input-adaptive Mask Fusion},\nauthor={Chao Liao and Jianchao Tan and Jiyuan Jia and Yi Guo and Chengru Song},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=QzbKH8nNq_V}\n}", "github": "", "project": "", "reviewers": "73YJ;bHvx;Ug5p;aUmA", "pdf_size": 2692205, "recommendation": "3;5;5;8", "confidence": "3;3;3;3", "correctness": "3;4;3;4", "technical_novelty": "2;3;2;4", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "71;51;107;94", "wc_strength_and_weaknesses": "113;62;85;158", "wc_clarity_quality_novelty_and_reproducibility": "41;52;7;11", "wc_summary_review": "10;74;33;77", "wc_review": "235;239;232;340", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 80.75, 21.47527648250425 ], "wc_strength_and_weaknesses_avg": [ 104.5, 35.78058132562969 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.75, 19.201236939322424 ], "wc_summary_review_avg": [ 48.5, 28.217902119044926 ], "wc_review_avg": [ 261.5, 45.38997686714546 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.7001400420140049, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5903852710870730228&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=QzbKH8nNq_V", "email": "kuaishou.com;kuaishou.com;;bytedance.com;kuaishou.com", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Kuaishou Technology;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "https://www.kuaishou.com;https://www.bytedance.com", "aff_unique_abbr": "Kuaishou;ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Causal Confusion and Reward Misidentification in Preference-Based Reward Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10822", "id": "R0Xxvr_X3ZA", "poster": "/media/PosterPDFs/ICLR%202023/10822.png?t=1682060578.044036", "openreview": "https://openreview.net/forum?id=R0Xxvr_X3ZA", "slides": "https://iclr.cc/virtual/2023/poster/10822", "video": "https://iclr.cc/virtual/2023/poster/10822", "author_site": "Jeremy Tien, Zhiyang He, Zackory Erickson, Anca Dragan, Daniel Brown", "tldr": "We identify and analyze important factors that influence causal confusion when learning rewards from human preference labels.", "abstract": "Learning policies via preference-based reward learning is an increasingly popular method for customizing agent behavior, but has been shown anecdotally to be prone to spurious correlations and reward hacking behaviors. While much prior work focuses on causal confusion in reinforcement learning and behavioral cloning, we focus on a systematic study of causal confusion and reward misidentification when learning from preferences. In particular, we perform a series of sensitivity and ablation analyses on several benchmark domains where rewards learned from preferences achieve minimal test error but fail to generalize to out-of-distribution states---resulting in poor policy performance when optimized. We find that the presence of non-causal distractor features, noise in the stated preferences, and partial state observability can all exacerbate reward misidentification. We also identify a set of methods with which to interpret misidentified learned rewards. In general, we observe that optimizing misidentified rewards drives the policy off the reward's training distribution, resulting in high predicted (learned) rewards but low true rewards. These findings illuminate the susceptibility of preference learning to reward misidentification and causal confusion---failure to consider even one of many factors can result in unexpected, undesirable behavior. ", "keywords": "reward learning;robustness;preference-based learning", "primary_area": "", "supplementary_material": "", "author": "Jeremy Tien;Jerry Zhi-Yang He;Zackory Erickson;Anca Dragan;Daniel S. Brown", "authorids": "~Jeremy_Tien1;~Jerry_Zhi-Yang_He1;~Zackory_Erickson1;~Anca_Dragan1;~Daniel_S._Brown1", "gender": "M;M;M;F;M", "homepage": ";https://herobotics.me;https://zackory.com;http://www.ancadragan.com/;https://www.cs.utah.edu/~dsbrown/", "dblp": ";;;;141/7769", "google_scholar": ";;wElkTtIAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": "jeremy-tien/;;;;", "or_profile": "~Jeremy_Tien1;~Jerry_Zhi-Yang_He1;~Zackory_Erickson1;~Anca_Dragan1;~Daniel_S._Brown1", "aff": "University of California, Berkeley;;Carnegie Mellon University;University of California, Berkeley;University of Utah", "aff_domain": "berkeley.edu;;cmu.edu;berkeley.edu;utah.edu", "position": "Undergrad student;;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ntien2023causal,\ntitle={Causal Confusion and Reward Misidentification in Preference-Based Reward Learning},\nauthor={Jeremy Tien and Jerry Zhi-Yang He and Zackory Erickson and Anca Dragan and Daniel S. Brown},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=R0Xxvr_X3ZA}\n}", "github": "", "project": "", "reviewers": "Ek1w;DzdV;hap7;DXr6;VP1L", "pdf_size": 7659788, "recommendation": "5;5;6;6;8", "confidence": "3;4;4;3;3", "correctness": "3;2;2;3;4", "technical_novelty": "2;2;3;2;2", "empirical_novelty": "2;1;3;3;3", "wc_summary_paper": "61;50;149;83;102", "wc_strength_and_weaknesses": "235;140;132;428;156", "wc_clarity_quality_novelty_and_reproducibility": "140;167;896;159;167", "wc_summary_review": "55;51;65;134;94", "wc_review": "491;408;1242;804;519", "wc_reply_reviewers": "0;39;365;143;13", "wc_reply_authors": "666;787;1553;591;586", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;2;5;2;1", "recommendation_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "correctness_avg": [ 2.8, 0.7483314773547882 ], "technical_novelty_avg": [ 2.2, 0.39999999999999997 ], "empirical_novelty_avg": [ 2.4, 0.8 ], "wc_summary_paper_avg": [ 89.0, 34.95711658589707 ], "wc_strength_and_weaknesses_avg": [ 218.2, 111.09707466895787 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 305.8, 295.26489801532455 ], "wc_summary_review_avg": [ 79.8, 30.992902413294566 ], "wc_review_avg": [ 692.8, 305.2955289551421 ], "wc_reply_reviewers_avg": [ 112.0, 136.12053482116502 ], "wc_reply_authors_avg": [ 836.6, 365.48958945502125 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.4, 1.3564659966250536 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.372677996249965, "corr_recommendation_correctness": 0.7319250547113999, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4597063200068097161&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=R0Xxvr_X3ZA", "email": "berkeley.edu;;cmu.edu;berkeley.edu;utah.edu", "author_num": 5, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of California, Berkeley;Carnegie Mellon University;University of Utah", "aff_unique_dep": ";;", "aff_unique_url": "https://www.berkeley.edu;https://www.cmu.edu;https://www.utah.edu", "aff_unique_abbr": "UC Berkeley;CMU;Utah", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Federated Nearest Neighbor Machine Translation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11368", "id": "R1U5G2spbLd", "poster": "/media/PosterPDFs/ICLR%202023/11368.png?t=1680880821.450402", "openreview": "https://openreview.net/forum?id=R1U5G2spbLd", "slides": "https://iclr.cc/virtual/2023/poster/11368", "video": "https://iclr.cc/virtual/2023/poster/11368", "author_site": "Yichao Du, Zhirui Zhang, Bingzhe Wu, lemao liu, Tong Xu, Enhong Chen", "tldr": "We propose a novel federated nearest neighbor machine translation framework to build low-overhead privacy-preserving MT systems in FL settings.", "abstract": "To protect user privacy and meet legal regulations, federated learning (FL) is attracting significant attention. Training neural machine translation (NMT) models with traditional FL algorithm (e.g., FedAvg) typically relies on multi-round model-based interactions. However, it is impractical and inefficient for machine translation tasks due to the vast communication overheads and heavy synchronization. In this paper, we propose a novel federated nearest neighbor (FedNN) machine translation framework that, instead of multi-round model-based interactions, leverages one-round memorization-based interaction to share knowledge across different clients to build low-overhead privacy-preserving systems. The whole approach equips the public NMT model trained on large-scale accessible data with a $k$-nearest-neighbor ($k$NN) classifier and integrates the external datastore constructed by private text data in all clients to form the final FL model. A two-phase datastore encryption strategy is introduced to achieve privacy-preserving during this process. Extensive experiments show that FedNN significantly reduces computational and communication costs compared with FedAvg, while maintaining promising performance in different FL settings.", "keywords": "Machine Translation;Federated Learning;Memorization Augmentation", "primary_area": "", "supplementary_material": "", "author": "Yichao Du;Zhirui Zhang;Bingzhe Wu;Lemao Liu;Tong Xu;Enhong Chen", "authorids": "~Yichao_Du1;~Zhirui_Zhang1;~Bingzhe_Wu1;redmondliu@tencent.com;~Tong_Xu2;~Enhong_Chen1", "gender": ";M;M;;M;M", "homepage": ";;;;http://staff.ustc.edu.cn/~tongxu/;http://staff.ustc.edu.cn/~cheneh", "dblp": ";202/1838;207/4843;;70/6770-1.html;07/258", "google_scholar": ";C8Ylo7sAAAAJ;_3hgtf8AAAAJ;;;Q9h02J0AAAAJ", "orcid": ";;;;0000-0003-4246-5386;0000-0002-4835-4102", "linkedin": ";;;;;", "or_profile": "~Yichao_Du1;~Zhirui_Zhang1;~Bingzhe_Wu1;redmondliu@tencent.com;~Tong_Xu2;~Enhong_Chen1", "aff": ";Tencent AI Lab;Tencent AI Lab;;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": ";tencent.com;tencent.com;;ustc.edu.cn;ustc.edu.cn", "position": ";Senior Researcher;Researcher;;Full Professor;Full Professor", "bibtex": "@inproceedings{\ndu2023federated,\ntitle={Federated Nearest Neighbor Machine Translation},\nauthor={Yichao Du and Zhirui Zhang and Bingzhe Wu and Lemao Liu and Tong Xu and Enhong Chen},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=R1U5G2spbLd}\n}", "github": "", "project": "", "reviewers": "61jW;acCF;Ljh8;1QjM", "pdf_size": 1304765, "recommendation": "6;6;6;6", "confidence": "4;4;3;4", "correctness": "4;4;4;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "4;3;3;3", "wc_summary_paper": "121;144;34;121", "wc_strength_and_weaknesses": "129;181;64;531", "wc_clarity_quality_novelty_and_reproducibility": "16;126;7;127", "wc_summary_review": "37;31;39;20", "wc_review": "303;482;144;799", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "86;610;161;2401", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;5", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 105.0, 42.053537306628556 ], "wc_strength_and_weaknesses_avg": [ 226.25, 180.76417648416955 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 69.0, 57.589061461357396 ], "wc_summary_review_avg": [ 31.75, 7.39509972887452 ], "wc_review_avg": [ 432.0, 243.297143427538 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 814.5, 937.626924741392 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3035132506736176402&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=R1U5G2spbLd", "email": ";tencent.com;tencent.com;;ustc.edu.cn;ustc.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Tencent;University of Science and Technology of China", "aff_unique_dep": "Tencent AI Lab;", "aff_unique_url": "https://ai.tencent.com;http://www.ustc.edu.cn", "aff_unique_abbr": "Tencent AI Lab;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "R2M14I9LEwW", "title": "A second order regression model shows edge of stability behavior", "track": "main", "status": "Reject", "tldr": "Recently observed non-linear learning effects like progressive sharpening and edge of stability occur generically in a simple, second order regression model.", "abstract": "Recent studies of learning algorithms have shown that there is a regime with an initial increase in the largest eigenvalue of the loss Hessian (progressive sharpening), followed by a stabilization of the eigenvalue near the maximum value which allows convergence (edge of stability). We consider a class of predictive models that are quadratic in the parameters, which we call second-order regression models. This is in contrast with the neural tangent kernel regime, where the predictive function is linear in the parameters. For quadratic objectives in two dimensions, we prove that this second order regression model exhibits both progressive sharpening and edge of stability behavior. We then show that in higher dimensions, the model shows this behavior generically without the structure of a neural network, due to a non-linearity induced in the learning dynamics. Finally, we show that edge of stability behavior in neural networks is correlated with the behavior in quadratic regression models.", "keywords": "deep learning theory;non-linear dynamics;optimization", "primary_area": "", "supplementary_material": "", "author": "Atish Agarwala;Jeffrey Pennington;Fabian Pedregosa", "authorids": "~Atish_Agarwala1;~Jeffrey_Pennington1;~Fabian_Pedregosa1", "gender": ";M;M", "homepage": ";;http://fa.bianp.net", "dblp": "265/6410.html;https://dblp.org/pers/p/Pennington:Jeffrey.html;11/9764", "google_scholar": "https://scholar.google.com/citations?hl=en;cn_FoswAAAAJ;https://scholar.google.fr/citations?hl=en", "orcid": ";;0000-0003-4025-3953", "linkedin": ";jpennin;http://www.linkedin.com/in/fabianpedregosa", "or_profile": "~Atish_Agarwala1;~Jeffrey_Pennington1;~Fabian_Pedregosa1", "aff": "Google;Google;Google AI", "aff_domain": "google.com;google.com;google.com", "position": "Researcher;Research Scientist;Research Scientist", "bibtex": "@misc{\nagarwala2023a,\ntitle={A second order regression model shows edge of stability behavior},\nauthor={Atish Agarwala and Jeffrey Pennington and Fabian Pedregosa},\nyear={2023},\nurl={https://openreview.net/forum?id=R2M14I9LEwW}\n}", "github": "", "project": "", "reviewers": "saA5;n7HJ;7fHN;GxcF;T8Nt", "site": "https://openreview.net/forum?id=R2M14I9LEwW", "pdf_size": 10917487, "recommendation": "5;6;6;6;8", "confidence": "4;4;3;1;3", "correctness": "3;3;3;4;4", "technical_novelty": "3;2;4;2;3", "empirical_novelty": "2;1;4;2;0", "wc_summary_paper": "51;39;116;50;97", "wc_strength_and_weaknesses": "411;54;50;60;66", "wc_clarity_quality_novelty_and_reproducibility": "43;516;4;10;66", "wc_summary_review": "35;38;10;10;7", "wc_review": "540;647;180;130;236", "wc_reply_reviewers": "0;612;0;0;0", "wc_reply_authors": "987;2296;76;8;63", "reply_reviewers": "0;6;0;0;0", "reply_authors": "3;9;1;1;1", "recommendation_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 2.8, 0.7483314773547882 ], "empirical_novelty_avg": [ 1.8, 1.32664991614216 ], "wc_summary_paper_avg": [ 70.6, 30.216551755618973 ], "wc_strength_and_weaknesses_avg": [ 128.2, 141.5039222071247 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 127.8, 195.40358236224841 ], "wc_summary_review_avg": [ 20.0, 13.549907748763458 ], "wc_review_avg": [ 346.6, 207.1459388933319 ], "wc_reply_reviewers_avg": [ 122.4, 244.80000000000004 ], "wc_reply_authors_avg": [ 686.0, 883.4720142709672 ], "reply_reviewers_avg": [ 1.2, 2.4000000000000004 ], "reply_authors_avg": [ 3.0, 3.0983866769659336 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.1863389981249825, "corr_recommendation_correctness": 0.6666666666666667, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11567682578623747362&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "R370fuGO7JJ", "title": "Multi-scale Attention for Diabetic Retinopathy Detection in Retinal Fundus Images", "track": "main", "status": "Reject", "tldr": "This paper proposed a novel deep learning-based approach for grading of Diabetic Retinopathy in fundus photograph", "abstract": "The diagnosis and/or grading of diabetic retinopathy (DR) in the retina fundus has traditionally been done by physicians using manual procedures. However, there has been a significant demand for automated eye diagnostic and grading systems due to the constant rise in the number of persons with diabetes over the past few decades. An excellent diagnostic and predictive value for treatment planning exists with automatic DR grading based on retinal fundus pictures. With the majority of the current automated DR grading systems, it is exceedingly challenging to capture significant features because of the minor changes between severity levels. This paper presents a deep learning-based method for automatically assessing diabetic retinopathy in retina fundus pictures. This paper presents a deep learning-based method for automatically assessing diabetic retinopathy in retina fundus pictures. In order to increase the discriminative ability of the retrieved features, we implement a multi-scale attention mechanism within a deep convolutional neural network architecture in this research. Additionally, we provide a brand-new loss function termed modified grading loss that enhances the training convergence of the suggested strategy by taking into account the distance between various grades of distinct DR categories. The suggested technique is trained, validated, and tested using a dataset about diabetic retinopathy that is openly available. The experimental findings are presented to illustrate how well the suggested strategy competes.", "keywords": "diabetes;deep learning;diabetic retinopathy;microvascular complication;hyperglycemia;attention;CNN", "primary_area": "", "supplementary_material": "", "author": "Temitope Ibrahim Amosa;Patrick Sebastian;Lila Iznita Izhar;Fatimat Adeola Adekola;Mardiyyah Adeola Salahudeen", "authorids": "~Temitope_Ibrahim_Amosa1;patrick_sebastian@utp.edu.my;lila.izhar@utp.edu.my;deeteemarh@gmail.com;temitopemard@gmail.com", "gender": "M;;;;", "homepage": ";;;;", "dblp": ";;;;", "google_scholar": "Lloa6s4AAAAJ;;;;", "orcid": ";;;;", "linkedin": "temitope-ibrahim-amosa-771583167/;;;;", "or_profile": "~Temitope_Ibrahim_Amosa1;patrick_sebastian@utp.edu.my;lila.izhar@utp.edu.my;deeteemarh@gmail.com;temitopemard@gmail.com", "aff": "Universiti Teknologi Petronas;;;;", "aff_domain": "utp.edu.my;;;;", "position": "MS student;;;;", "bibtex": "@misc{\namosa2023multiscale,\ntitle={Multi-scale Attention for Diabetic Retinopathy Detection in Retinal Fundus Images},\nauthor={Temitope Ibrahim Amosa and Patrick Sebastian and Lila Iznita Izhar and Fatimat Adeola Adekola and Mardiyyah Adeola Salahudeen},\nyear={2023},\nurl={https://openreview.net/forum?id=R370fuGO7JJ}\n}", "github": "", "project": "", "reviewers": "zeSz;mS2T;5o7H", "site": "https://openreview.net/forum?id=R370fuGO7JJ", "pdf_size": 3950639, "recommendation": "1;3;3", "confidence": "5;5;5", "correctness": "1;3;2", "technical_novelty": "1;2;1", "empirical_novelty": "0;2;1", "wc_summary_paper": "63;36;93", "wc_strength_and_weaknesses": "249;97;289", "wc_clarity_quality_novelty_and_reproducibility": "23;55;21", "wc_summary_review": "28;61;72", "wc_review": "363;249;475", "wc_reply_reviewers": "12;0;47", "wc_reply_authors": "0;109;0", "reply_reviewers": "1;0;1", "reply_authors": "0;1;0", "recommendation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 5.0, 0.0 ], "correctness_avg": [ 2.0, 0.816496580927726 ], "technical_novelty_avg": [ 1.3333333333333333, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 64.0, 23.280893453645632 ], "wc_strength_and_weaknesses_avg": [ 211.66666666666666, 82.70966623611106 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.0, 15.57776192739723 ], "wc_summary_review_avg": [ 53.666666666666664, 18.696404883173543 ], "wc_review_avg": [ 362.3333333333333, 92.26531790921705 ], "wc_reply_reviewers_avg": [ 19.666666666666668, 19.938795238317574 ], "wc_reply_authors_avg": [ 36.333333333333336, 51.383092766222454 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 0.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.8660254037844387, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BJCX1sMcObgJ:scholar.google.com/&scioq=Multi-scale+Attention+for+Diabetic+Retinopathy+Detection+in+Retinal+Fundus+Images&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Universiti Teknologi Petronas", "aff_unique_dep": "", "aff_unique_url": "https://www.utp.edu.my", "aff_unique_abbr": "UTP", "aff_country_unique_index": "0", "aff_country_unique": "Malaysia" }, { "id": "R3xfdth4Gyl", "title": "Learning Task Agnostic Temporal Consistency Correction", "track": "main", "status": "Withdraw", "tldr": "This work provides a general framework for task agnostic video temporal consistency correction capable of producing visually pleasing and temporally consistent videos without the requiring their unprocessed counterparts.", "abstract": "In many video restoration/translation tasks, image processing operations are naively extended to the video domain by processing each frame independently. This disregard for the temporal connection in video processing often leads to severe temporal inconsistencies. State-of-the-art techniques that address these inconsistencies rely on the availability of unprocessed videos to siphon consistent video dynamics to restore the temporal consistency of frame-wise processed videos. We propose a novel general framework for this task that learns to infer consistent motion dynamics from inconsistent videos to mitigate the temporal flicker while preserving the perceptual quality for both the temporally neighboring and relatively distant frames. The proposed framework produces state-of-the-art results on two benchmark datasets, DAVIS and videvo.net, processed by numerous image processing applications in a frame-wise processing manner. The code and the trained models will be released upon acceptance.", "keywords": "Video Processing;Temporal Consistency Correction;Video Restoration", "primary_area": "", "supplementary_material": "/attachment/70236f2f715659a60eee3bd536fc7df98acefbda.zip", "author": "Muhammad Kashif Ali;Dongjin Kim;Tae Hyun Kim", "authorids": "~Muhammad_Kashif_Ali1;~Dongjin_Kim3;~Tae_Hyun_Kim2", "gender": ";;M", "homepage": ";https://sites.google.com/view/lliger9/;https://sites.google.com/view/lliger9/", "dblp": ";16/9611-4;43/11343-6", "google_scholar": ";https://scholar.google.co.kr/citations?user=6I9aJxYAAAAJ;https://scholar.google.co.kr/citations?user=8soccsoAAAAJ", "orcid": ";;0000-0002-7995-3984", "linkedin": ";;", "or_profile": "~Muhammad_Kashif_Ali1;~Dongjin_Kim3;~Tae_Hyun_Kim2", "aff": ";Hanyang University;Hanyang University", "aff_domain": ";hanyang.ac.kr;hanyang.ac.kr", "position": ";MS student;Associate Professor", "bibtex": "@misc{\nali2023learning,\ntitle={Learning Task Agnostic Temporal Consistency Correction},\nauthor={Muhammad Kashif Ali and Dongjin Kim and Tae Hyun Kim},\nyear={2023},\nurl={https://openreview.net/forum?id=R3xfdth4Gyl}\n}", "github": "", "project": "", "reviewers": "pSau;oWNN;PzJU;hDJ7", "site": "https://openreview.net/forum?id=R3xfdth4Gyl", "pdf_size": 39881203, "recommendation": "3;3;5;5", "confidence": "4;4;4;3", "correctness": "3;2;3;3", "technical_novelty": "3;2;2;2", "empirical_novelty": "3;2;0;2", "wc_summary_paper": "113;66;218;88", "wc_strength_and_weaknesses": "496;202;180;186", "wc_clarity_quality_novelty_and_reproducibility": "757;62;313;82", "wc_summary_review": "142;97;59;39", "wc_review": "1508;427;770;395", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 121.25, 58.28110757355251 ], "wc_strength_and_weaknesses_avg": [ 266.0, 133.0338302838793 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 303.5, 279.79322722324787 ], "wc_summary_review_avg": [ 84.25, 39.31523241696531 ], "wc_review_avg": [ 775.0, 448.00055803536674 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2009764738951600385&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Hanyang University", "aff_unique_dep": "", "aff_unique_url": "https://www.hanyang.ac.kr", "aff_unique_abbr": "HYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "R498E9vaqZ", "title": "Adaptive Update Direction Rectification for Unsupervised Continual Learning", "track": "main", "status": "Reject", "tldr": "We propose an Actor-Critic framework with adaptive update direction rectification for unsupervised continual learning.", "abstract": "Recent works on continual learning have shown that unsupervised continual learning (UCL) methods rival or even beat supervised continual learning methods. However, most UCL methods typically adopt fixed learning strategies with pre-defined objectives and ignore the influence of the constant shift of data distributions on the newer training process. This non-adaptive paradigm tends to achieve sub-optimal performance, since the optimal update direction (to ensure the trade-off between old and new tasks) keeps changing during training over sequential tasks. In this work, we thus propose a novel UCL framework termed AUDR to adaptively rectify the update direction by a policy network (i.e., the Actor) at each training step based on the reward predicted by a value network (i.e., the Critic). Concretely, different from existing Actor-Critic based reinforcement learning works, there are three vital designs that make our AUDR applicable to the UCL setting: (1) A reward function to measure the score/value of the currently selected action, which provides the ground-truth reward to guide the Critic's predictions; (2) An action space for the Actor to select actions (i.e., update directions) according to the reward predicted by the Critic; (3) A multinomial sampling strategy with a lower-bound on the sampling probability of each action, which is designed to improve the variance of the Actor's selected actions for more diversified exploration. Extensive experiments show that our AUDR achieves state-of-the-art results under both the in-dataset and cross-dataset UCL settings. Importantly, our AUDR also shows superior performance when combined with other UCL methods, which suggests that our AUDR is highly extensible and versatile.", "keywords": "Continual learning;unsupervised learning;representation learning", "primary_area": "", "supplementary_material": "/attachment/28437bb4007563c3a4cec544860a332ca1d35ee2.zip", "author": "Yizhao Gao;Nanyi Fei;Zhiwu Lu", "authorids": "~Yizhao_Gao1;~Nanyi_Fei1;~Zhiwu_Lu1", "gender": "M;M;M", "homepage": ";;https://gsai.ruc.edu.cn/luzhiwu", "dblp": "132/7629;232/2227;53/5234", "google_scholar": "https://scholar.google.com/citations?hl=en;Oz6VqeQAAAAJ;OUXS8doAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yizhao_Gao1;~Nanyi_Fei1;~Zhiwu_Lu1", "aff": "Renmin University of China;Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn", "position": "PhD student;PhD student;Full Professor", "bibtex": "@misc{\ngao2023adaptive,\ntitle={Adaptive Update Direction Rectification for Unsupervised Continual Learning},\nauthor={Yizhao Gao and Nanyi Fei and Zhiwu Lu},\nyear={2023},\nurl={https://openreview.net/forum?id=R498E9vaqZ}\n}", "github": "", "project": "", "reviewers": "GyrZ;6y1n;kqaK;1R3g", "site": "https://openreview.net/forum?id=R498E9vaqZ", "pdf_size": 602278, "recommendation": "6;6;6;6", "confidence": "3;2;3;5", "correctness": "3;3;4;3", "technical_novelty": "2;3;3;2", "empirical_novelty": "3;3;3;2", "wc_summary_paper": "102;116;103;92", "wc_strength_and_weaknesses": "353;106;242;501", "wc_clarity_quality_novelty_and_reproducibility": "75;190;97;10", "wc_summary_review": "88;34;197;50", "wc_review": "618;446;639;653", "wc_reply_reviewers": "50;0;0;114", "wc_reply_authors": "588;597;303;1375", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;3", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 103.25, 8.525696452489967 ], "wc_strength_and_weaknesses_avg": [ 300.5, 145.0939350903407 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 93.0, 64.49418578445658 ], "wc_summary_review_avg": [ 92.25, 63.5781998801476 ], "wc_review_avg": [ 589.0, 83.49550886125553 ], "wc_reply_reviewers_avg": [ 41.0, 46.82947789587238 ], "wc_reply_authors_avg": [ 715.75, 398.5582611112207 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WpQz5u6HvcwJ:scholar.google.com/&scioq=Adaptive+Update+Direction+Rectification+for+Unsupervised+Continual+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Renmin University of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ruc.edu.cn", "aff_unique_abbr": "RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "R4ETr5gcg5v", "title": "Chopping Formers is what you need in Vision", "track": "main", "status": "Reject", "tldr": "In this work, we unify prior methods and present a new efficient factorization for a general fully-connected and dynamic layer.", "abstract": "This work presents a new dynamic and fully-connected layer (DFC) that generalizes existing layers and is free from hard inductive biases. Then, it describes how to factorize the DFC weights efficiently.\nUsing the Einstein convention as framework, we define the DFC as a fully connected layer with the weight tensor created as a function of the input. DFC is the non-linear extension of the most general case of linear layer for neural network, and therefore all major neural network layers, from convolution to self-attention, are particular cases of DFCs. A stack of DFCs interleaved by non-linearities defines a new super-class of neural networks: \\emph{Formers}.\nDFC has four major characteristics: it is Dynamic and Spatially Adaptive, it has a Global Receptive Field, and it mixes all the available channels' information. \nIn their complete form, DFCs are powerful layers free from hard inductive biases, but their use is limited in practice by their prohibitive computational cost. To overcome this limitation and deploy DFC in real computer-vision applications, we propose to use the CP decomposition, showing that it is possible to factorize the DFC layer into smaller, manageable blocks without losing any representational power. Finally, we propose ChoP'D Former, an architecture making use of a new decomposition of the DFC layer into five sequential operations, each incorporating one characteristic of the original DFC tensor. Chop'D Former leverages dynamic gating and integral image, achieves global spatial reasoning with constant time complexity, and has a receptive field that can adapt depending on the task. Extensive experiments demonstrate that our ChoP'D Former is competitive with state-of-the-art results on three well-known computer vision benchmarks, namely Large-Scale Classification, Object Detection, and Instance Segmentation, suppressing the need for expensive architecture search and hyperparameter optimization. ", "keywords": "Transformers;Tensor Decomposition;Deep learning Architectures", "primary_area": "", "supplementary_material": "/attachment/f36ffc7d3ae057650dad09d05b97b76912ebd4b9.zip", "author": "Francesca Babiloni;Thomas Tanay;Matteo Maggioni;Jiankang Deng;Ales Leonardis;Stefanos Zafeiriou", "authorids": "~Francesca_Babiloni2;~Thomas_Tanay1;~Matteo_Maggioni1;~Jiankang_Deng1;~Ales_Leonardis1;~Stefanos_Zafeiriou1", "gender": ";;;M;;M", "homepage": ";;;https://jiankangdeng.github.io/;;http://www.imperial.ac.uk/people/s.zafeiriou/", "dblp": "169/4678.html;;;156/7808;;25/1885.html", "google_scholar": ";;;Z_UoQFsAAAAJ;;QKOH5iYAAAAJ", "orcid": ";;;0000-0002-3709-6216;;", "linkedin": ";;;jiankang-deng-b45b21b4/?originalSubdomain=uk;;", "or_profile": "~Francesca_Babiloni2;~Thomas_Tanay1;~Matteo_Maggioni1;~Jiankang_Deng1;~Ales_Leonardis1;~Stefanos_Zafeiriou1", "aff": "Huawei Technologies Ltd.;;;;;Imperial College London", "aff_domain": "huawei.com;;;;;ic.ac.uk", "position": "Researcher;;;;;Full Professor", "bibtex": "@misc{\nbabiloni2023chopping,\ntitle={Chopping Formers is what you need in Vision},\nauthor={Francesca Babiloni and Thomas Tanay and Matteo Maggioni and Jiankang Deng and Ales Leonardis and Stefanos Zafeiriou},\nyear={2023},\nurl={https://openreview.net/forum?id=R4ETr5gcg5v}\n}", "github": "", "project": "", "reviewers": "cu3r;a5bE;8HGv;T6EK;P57x", "site": "https://openreview.net/forum?id=R4ETr5gcg5v", "pdf_size": 1038615, "recommendation": "3;3;5;6;6", "confidence": "5;4;4;4;4", "correctness": "2;2;3;3;4", "technical_novelty": "3;1;3;3;3", "empirical_novelty": "2;2;2;3;3", "wc_summary_paper": "57;24;69;138;77", "wc_strength_and_weaknesses": "470;438;209;180;228", "wc_clarity_quality_novelty_and_reproducibility": "48;8;27;16;68", "wc_summary_review": "80;26;42;12;44", "wc_review": "655;496;347;346;417", "wc_reply_reviewers": "502;150;0;41;47", "wc_reply_authors": "1730;1447;530;649;672", "reply_reviewers": "1;1;0;1;1", "reply_authors": "3;2;1;1;1", "recommendation_avg": [ 4.6, 1.3564659966250536 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "correctness_avg": [ 2.8, 0.7483314773547882 ], "technical_novelty_avg": [ 2.6, 0.8000000000000002 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 73.0, 37.1860188780676 ], "wc_strength_and_weaknesses_avg": [ 305.0, 123.03170323132164 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 33.4, 21.905250512148907 ], "wc_summary_review_avg": [ 40.8, 22.789471253190587 ], "wc_review_avg": [ 452.2, 115.43898821455427 ], "wc_reply_reviewers_avg": [ 148.0, 183.77921536452376 ], "wc_reply_authors_avg": [ 1005.6, 486.6697442825062 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5897678246195884, "corr_recommendation_correctness": 0.9063269671749656, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ua0xHOIz7KIJ:scholar.google.com/&scioq=Chopping+Formers+is+what+you+need+in+Vision&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Huawei;Imperial College London", "aff_unique_dep": "Huawei Technologies;", "aff_unique_url": "https://www.huawei.com;https://www.imperial.ac.uk", "aff_unique_abbr": "Huawei;ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United Kingdom" }, { "id": "R4oodnmxb9m", "title": "Offline Communication Learning with Multi-source Datasets", "track": "main", "status": "Reject", "tldr": "", "abstract": "Scalability and partial observability are two major challenges faced by multi-agent reinforcement learning. Recently researchers propose offline MARL algorithms to improve scalability by reducing online exploration cost, while the problem of partial observability is often ignored in the offline MARL setting. Communication is a promising approach to alleviate the miscoordination caused by partially observability, thus in this paper we focus on offline communication learning where agents learn from an fixed dataset. We find out that learning communications in an end-to-end manner from a given offline dateset without communication information is intractable, since the correct communication protocol space is too sparse compared with the exponentially growing joint state-action space when the number of agents increases. Besides, unlike offline policy learning which can be guided by reward signals, offline communication learning is struggling since communication messages implicitly impact the reward. Moreover, in real-world applications, offline MARL datasets are often collected from multi-source, leaving offline MARL communication learning more challenging. Therefore, we present a new benchmark which contains a diverse set of challenging offline MARL communication tasks with single/multi-source datasets, and propose a novel Multi-Head structure for Communication Imitation learning (MHCI) algorithm that automatically adapts to the distribution of the dataset. Empirical result shows the effectiveness of our method on various tasks of the new offline communication learning benchmark.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/ff7fede3ff2a6244499410bcd4921039a3a8fd9d.zip", "author": "Yihuan Mao;Rui Hu;Lulu Zheng;Jianhao Wang;Chongjie Zhang", "authorids": "~Yihuan_Mao2;~Rui_Hu6;~Lulu_Zheng1;~Jianhao_Wang1;~Chongjie_Zhang1", "gender": ";M;F;M;", "homepage": "http://group.iiis.tsinghua.edu.cn/~milab/person-maoyihuan.html;https://github.com/streek666;;http://group.iiis.tsinghua.edu.cn/~milab/;", "dblp": "232/2328;;;https://dblp.uni-trier.de/pid/239/5945;29/6693", "google_scholar": ";;;;LjxqXycAAAAJ", "orcid": ";;;;", "linkedin": ";;\u7490\u7490-\u90d1-2b44b61bb/;;", "or_profile": "~Yihuan_Mao2;~Rui_Hu6;~Lulu_Zheng1;~Jianhao_Wang1;~Chongjie_Zhang1", "aff": "Tsinghua University;Tsinghua University;;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Undergrad student;;PhD student;Assistant Professor", "bibtex": "@misc{\nmao2023offline,\ntitle={Offline Communication Learning with Multi-source Datasets},\nauthor={Yihuan Mao and Rui Hu and Lulu Zheng and Jianhao Wang and Chongjie Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=R4oodnmxb9m}\n}", "github": "", "project": "", "reviewers": "gcz9;311E;UfVH;zXRw", "site": "https://openreview.net/forum?id=R4oodnmxb9m", "pdf_size": 2920558, "recommendation": "3;3;5;5", "confidence": "4;4;5;4", "correctness": "3;3;3;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "79;102;57;46", "wc_strength_and_weaknesses": "179;287;44;187", "wc_clarity_quality_novelty_and_reproducibility": "59;109;4;34", "wc_summary_review": "20;61;4;52", "wc_review": "337;559;109;319", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "221;420;49;281", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 71.0, 21.482551058940835 ], "wc_strength_and_weaknesses_avg": [ 174.25, 86.40420996687604 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.5, 38.48701079585163 ], "wc_summary_review_avg": [ 34.25, 23.177305710543667 ], "wc_review_avg": [ 331.0, 159.25451327984396 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 242.75, 133.12846239628848 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:EFK2sUla5moJ:scholar.google.com/&scioq=Offline+Communication+Learning+with+Multi-source+Datasets&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "R6zTkW_w_PV", "title": "Correspondences between word learning in children and captioning models", "track": "main", "status": "Withdraw", "tldr": "We show that image captioning systems' performance correlates with the age at which children acquire words from a variety of word categories.", "abstract": "For human children as well as machine learning systems, a key challenge in learning a word is linking the word to the visual phenomena it describes. By organizing model output into word categories used to analyze child language learning data, we show a correspondence between word learning in children and the performance of image captioning models. Although captioning models are trained only on standard machine learning data, we find that their performance in producing words from a variety of word categories correlates with the age at which children acquire words from each of those categories. To explain why this correspondence exists, we show that the performance of captioning models is correlated with human judgments of the concreteness of words, suggesting that these models are capturing the complex real-world association between words and visual phenomena.", "keywords": "cognitive science;child development;language;image captioning;computer vision", "primary_area": "", "supplementary_material": "", "author": "Sunayana Rane;Mira L Nencheva;Zeyu Wang;Casey Lew-Williams;Olga Russakovsky;Thomas L. Griffiths", "authorids": "~Sunayana_Rane1;nencheva@princeton.edu;~Zeyu_Wang1;~Casey_Lew-Williams1;~Olga_Russakovsky1;~Thomas_L._Griffiths1", "gender": ";;;M;F;", "homepage": ";;;http://babylab.princeton.edu/;http://cs.princeton.edu/~olgarus;http://cocosci.princeton.edu/tom/", "dblp": ";;132/7882-4.html;;52/6883;34/4472", "google_scholar": ";;https://scholar.google.com/citations?hl=en;;TB5OwW8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-7057-1613;;0000-0001-5272-3241;", "linkedin": ";;;;;", "or_profile": "~Sunayana_Rane1;nencheva@princeton.edu;~Zeyu_Wang1;~Casey_Lew-Williams1;~Olga_Russakovsky1;~Thomas_L._Griffiths1", "aff": ";;Stanford University;Princeton University;Princeton University;Princeton University", "aff_domain": ";;stanford.edu;princeton.edu;princeton.edu;princeton.edu", "position": ";;Postdoc;Full Professor;Assistant Professor;Professor", "bibtex": "@misc{\nrane2023correspondences,\ntitle={Correspondences between word learning in children and captioning models },\nauthor={Sunayana Rane and Mira L Nencheva and Zeyu Wang and Casey Lew-Williams and Olga Russakovsky and Thomas L. Griffiths},\nyear={2023},\nurl={https://openreview.net/forum?id=R6zTkW_w_PV}\n}", "github": "", "project": "", "reviewers": "PbHw;f4ug;sQPi;JFTn", "site": "https://openreview.net/forum?id=R6zTkW_w_PV", "pdf_size": 2657864, "recommendation": "1;3;3;3", "confidence": "3;4;3;3", "correctness": "2;3;3;3", "technical_novelty": "1;1;2;2", "empirical_novelty": "1;2;3;2", "wc_summary_paper": "79;127;73;170", "wc_strength_and_weaknesses": "77;543;25;339", "wc_clarity_quality_novelty_and_reproducibility": "11;258;8;39", "wc_summary_review": "39;213;38;33", "wc_review": "206;1141;144;581", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 1.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 112.25, 39.36607041603213 ], "wc_strength_and_weaknesses_avg": [ 246.0, 208.72230355187247 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 79.0, 104.05046852369287 ], "wc_summary_review_avg": [ 80.75, 76.38839898832806 ], "wc_review_avg": [ 518.0, 396.6478286843381 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:R6LSWuGYnkAJ:scholar.google.com/&scioq=Correspondences+between+word+learning+in+children+and+captioning+models&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Stanford University;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.princeton.edu", "aff_unique_abbr": "Stanford;Princeton", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "R8GW1hR1kE", "title": "Guide Detectors in Pixel Space with Global Positioning and Abductive Matching", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "End-to-End object Detector ensembles prior knowledge in a concise framework. DETR (DEtection TRansformer) contains two steps: Learn object queries in the representation space and match the queries with boxes in the pixel space. The ambiguity of object queries in DETR lead to an uncertain assignment in the Hungarian Matching. The formulation loss in the pixel space will in turn affect the learning representations. Therefore, we propose the Abductive DETR, which learns object queries in the representation space with global positioning in the pixel space and matches object queries in the pixel space with the abductive awareness from the representation space. Experimentally, Abductive DETR can be transferred to other DETR-variants methods and achieves a satisfactory improvement. And it takes only 2 epochs to achieve the 98.7% accuracy of predicting the number of objects. Compared with other state-of-the-art methods on the MS COCO dataset, Abductive DETR also achieves outstanding performance and arrives at convergence much faster. Our code will be made publicly available soon.", "keywords": "Object Detection;Abductive DETR", "primary_area": "", "supplementary_material": "", "author": "Yiyuan Zhang;Wencheng Han;Jianbing Shen", "authorids": "~Yiyuan_Zhang1;~Wencheng_Han1;~Jianbing_Shen1", "gender": "M;M;M", "homepage": "https://invictus717.github.io/;https://wencheng256.github.io/;https://scholar.google.com/citations?user=_Q3NTToAAAAJ&hl=en", "dblp": "163/6458;280/3348.html;38/5435", "google_scholar": "KuYlJCIAAAAJ;hGZueIUAAAAJ;_Q3NTToAAAAJ", "orcid": "0000-0001-6643-9698;0009-0005-2358-6969;0000-0003-2656-3082", "linkedin": ";;", "or_profile": "~Yiyuan_Zhang1;~Wencheng_Han1;~Jianbing_Shen1", "aff": "Beijing Institute of Technology;University of Macau;University of Macau", "aff_domain": "bit.edu.cn;um.edu.mo;um.edu.mo", "position": "Undergrad student;PhD student;Full Professor", "bibtex": "@misc{\nzhang2023guide,\ntitle={Guide Detectors in Pixel Space with Global Positioning and Abductive Matching},\nauthor={Yiyuan Zhang and Wencheng Han and Jianbing Shen},\nyear={2023},\nurl={https://openreview.net/forum?id=R8GW1hR1kE}\n}", "github": "", "project": "", "reviewers": "Vt1d;M7ML;eKqp;XThV", "site": "https://openreview.net/forum?id=R8GW1hR1kE", "pdf_size": 701786, "recommendation": "1;5;6;8", "confidence": "4;4;2;4", "correctness": "2;3;2;1", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "67;93;89;52", "wc_strength_and_weaknesses": "312;212;322;149", "wc_clarity_quality_novelty_and_reproducibility": "107;24;120;27", "wc_summary_review": "30;21;70;28", "wc_review": "516;350;601;256", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 2.5495097567963922 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 2.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 75.25, 16.67895380412093 ], "wc_strength_and_weaknesses_avg": [ 248.75, 71.87967376108492 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 69.5, 44.252118593350986 ], "wc_summary_review_avg": [ 37.25, 19.201236939322424 ], "wc_review_avg": [ 430.75, 135.37978985062725 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.22645540682891915, "corr_recommendation_correctness": -0.4160251471689218, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Pfsd_5dLDAsJ:scholar.google.com/&scioq=Guide+Detectors+in+Pixel+Space+with+Global+Positioning+and+Abductive+Matching&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Beijing Institute of Technology;University of Macau", "aff_unique_dep": ";", "aff_unique_url": "http://www.bit.edu.cn/;https://www.um.edu.mo", "aff_unique_abbr": "BIT;UM", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Macau SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Efficient Discrete Multi Marginal Optimal Transport Regularization", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11461", "id": "R98ZfMt-jE", "poster": "", "openreview": "https://openreview.net/forum?id=R98ZfMt-jE", "slides": "https://iclr.cc/virtual/2023/poster/11461", "video": "https://iclr.cc/virtual/2023/poster/11461", "author_site": "Ronak Mehta, Jeffery Kline, Vishnu Lokhande, Glenn Fung, Vikas Singh", "tldr": "Using a fast algorithm for computing generalized earth mover's distances, we solve practical discrete multi-marginal optimal transport problems in neural network learning applications.", "abstract": "Optimal transport has emerged as a powerful tool for a variety of problems in machine learning, and it is frequently used to enforce distributional constraints. In this context, existing methods often use either a Wasserstein metric, or else they apply concurrent barycenter approaches when more than two distributions are considered. In this paper, we leverage multi-marginal optimal transport (MMOT), where we take advantage of a procedure that computes a generalized earth mover's distance as a sub-routine. We show that not only is our algorithm computationally more efficient compared to other barycentric-based distance methods, but it has the additional advantage that gradients used for backpropagation can be efficiently computed during the forward pass computation itself, which leads to substantially faster model training. We provide technical details about this new regularization term and its properties, and we present experimental demonstrations of faster runtimes when compared to standard Wasserstein-style methods. Finally, on a range of experiments designed to assess effectiveness at enforcing fairness, we demonstrate our method compares well with alternatives.", "keywords": "optimal transport;multi-marginal;earth mover's distance;fairness", "primary_area": "", "supplementary_material": "/attachment/b9427c84e18c7e95294487c9726e240d99ca5c59.zip", "author": "Ronak Mehta;Jeffery Kline;Vishnu Suresh Lokhande;Glenn Fung;Vikas Singh", "authorids": "~Ronak_Mehta1;~Jeffery_Kline1;~Vishnu_Suresh_Lokhande1;~Glenn_Fung2;~Vikas_Singh1", "gender": ";;;M;M", "homepage": ";;;https://www.ai-ml-amfam.com/;http://vsingh-www.cs.wisc.edu/", "dblp": ";;;https://dblp.uni-trier.de/pers/f/Fung:Glenn.html;", "google_scholar": ";;;AWAcQaAAAAAJ;d32BmwcAAAAJ", "orcid": ";;;;", "linkedin": ";jeff-kline;;glenn-fung/;", "or_profile": "~Ronak_Mehta1;~Jeffery_Kline1;~Vishnu_Suresh_Lokhande1;~Glenn_Fung2;~Vikas_Singh1", "aff": ";Affirm;;;University of Wisconsin, Madison", "aff_domain": ";affirm.com;;;wisc.edu", "position": ";Manager;;;Professor", "bibtex": "@inproceedings{\nmehta2023efficient,\ntitle={Efficient Discrete Multi Marginal Optimal Transport Regularization},\nauthor={Ronak Mehta and Jeffery Kline and Vishnu Suresh Lokhande and Glenn Fung and Vikas Singh},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=R98ZfMt-jE}\n}", "github": "", "project": "", "reviewers": "Gjfe;7jEQ;RRc2", "pdf_size": 2316741, "recommendation": "5;6;8", "confidence": "4;3;4", "correctness": "4;4;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "66;71;77", "wc_strength_and_weaknesses": "234;55;112", "wc_clarity_quality_novelty_and_reproducibility": "18;60;10", "wc_summary_review": "52;32;48", "wc_review": "370;218;247", "wc_reply_reviewers": "91;0;0", "wc_reply_authors": "1188;340;141", "reply_reviewers": "1;0;0", "reply_authors": "4;2;2", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 71.33333333333333, 4.496912521077347 ], "wc_strength_and_weaknesses_avg": [ 133.66666666666666, 74.66517855659953 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.333333333333332, 21.9291789378647 ], "wc_summary_review_avg": [ 44.0, 8.640987597877148 ], "wc_review_avg": [ 278.3333333333333, 65.89048152469032 ], "wc_reply_reviewers_avg": [ 30.333333333333332, 42.897811391983886 ], "wc_reply_authors_avg": [ 556.3333333333334, 453.9840917428226 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.18898223650461363, "corr_recommendation_correctness": -0.944911182523068, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3364499613516287829&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=R98ZfMt-jE", "email": ";affirm.com;;;wisc.edu", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Affirm;University of Wisconsin", "aff_unique_dep": ";", "aff_unique_url": "https://www.affirm.com;https://www.wisc.edu", "aff_unique_abbr": "Affirm;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "RBNk9cpT1AW", "title": "MS3: A Multimodal Supervised Pretrained Model for Semantic Segmentation", "track": "main", "status": "Withdraw", "tldr": "This paper proposes a multi-dataset pretraining model with multimodal supervision for semantic segmentation and outperforms ImageNet pretraining under both standard fine-tuning and some rapid deployment scenarios.", "abstract": "Due to the limited labeled data, current segmentation models are usually transferred from ImageNet pretrained models. This pipeline introduces task gaps, where the pretraining is based on global image-level recognition while the downstream is focused on local pixel level prediction. In this paper, we aim at mitigating this task gap and building a segmentation-oriented pretrained model, in this way different downstream segmentation tasks can be better and easily adapted. Towards this goal, we combine off-the-shelf annotations from diverse segmentation datasets and make use of both visual and language supervision for jointly training. The highlight is that the two kinds of supervision are complementary and can be boosted to better model the class relation from diverse datasets. The proposed learning framework, termed as MS3 (short for Multimodal Supervision for Semantic Segmentation), not only adjusts and improves the quality of language embeddings to fit the segmentation scene, but also generates momentum-updated visual embeddings for each category to facilitate better visual representation modeling. Besides, considering that the original one-by-one pixel-embedding pairing may cause similar classes from other datasets to be incorrectly pulled away, we further extend the original loss with multi-label mapping via cross-modal information exchange to better model the class relations. Experiments conducted on several benchmarks demonstrate that MS3 consistently outperforms the ImageNet pretrained models by a considerable margin under standard fine-tuning, as well as fitting some rapid deployment scenarios, e.g., frozen-backbone fine-tuning or zero shot predicting.", "keywords": "multi-dataset;multi-modal;semantic segmentation", "primary_area": "", "supplementary_material": "", "author": "Bowen Shi;XIAOPENG ZHANG;Wenrui Dai;Junni Zou;Hongkai Xiong;Qi Tian", "authorids": "~Bowen_Shi2;~XIAOPENG_ZHANG7;~Wenrui_Dai1;~Junni_Zou1;~Hongkai_Xiong1;~Qi_Tian3", "gender": "M;M;;F;M;M", "homepage": ";https://sites.google.com/site/zxphistory/;;http://www.cs.sjtu.edu.cn/~zou-jn;http://min.sjtu.edu.cn;https://www.qitian1987.com/index.html", "dblp": ";;16/5135.html;91/4613;21/3569;78/1467-1.html", "google_scholar": "lJHbpY0AAAAJ;Ud6aBAcAAAAJ;Xg8MhyAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;bB16iN4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;0000-0003-4552-0029;0000-0002-7252-5047", "linkedin": ";;;;;", "or_profile": "~Bowen_Shi2;~XIAOPENG_ZHANG7;~Wenrui_Dai1;~Junni_Zou1;~Hongkai_Xiong1;~Qi_Tian3", "aff": "Shanghai Jiaotong University;Huawei Technologies Ltd.;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Huawei Technologies Ltd.", "aff_domain": "sjtu.edu.cn;huawei.com;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;huawei.com", "position": "PhD student;Principal Researcher;Associate Professor;Full Professor;Full Professor;Principal Researcher", "bibtex": "@misc{\nshi2023ms,\ntitle={{MS}3: A Multimodal Supervised Pretrained Model for Semantic Segmentation},\nauthor={Bowen Shi and XIAOPENG ZHANG and Wenrui Dai and Junni Zou and Hongkai Xiong and Qi Tian},\nyear={2023},\nurl={https://openreview.net/forum?id=RBNk9cpT1AW}\n}", "github": "", "project": "", "reviewers": "sH18;XJTo;UcpV", "site": "https://openreview.net/forum?id=RBNk9cpT1AW", "pdf_size": 3799088, "recommendation": "3;5;5", "confidence": "4;5;5", "correctness": "2;4;3", "technical_novelty": "2;3;2", "empirical_novelty": "2;0;2", "wc_summary_paper": "54;43;44", "wc_strength_and_weaknesses": "304;263;184", "wc_clarity_quality_novelty_and_reproducibility": "26;7;1", "wc_summary_review": "16;30;19", "wc_review": "400;343;248", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 47.0, 4.96655480858378 ], "wc_strength_and_weaknesses_avg": [ 250.33333333333334, 49.80182950677838 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 11.333333333333334, 10.656244908763853 ], "wc_summary_review_avg": [ 21.666666666666668, 6.018490028422596 ], "wc_review_avg": [ 330.3333333333333, 62.69680126520721 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.9999999999999997, "corr_recommendation_correctness": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:tSkRB3TOrdEJ:scholar.google.com/&scioq=MS3:+A+Multimodal+Supervised+Pretrained+Model+for+Semantic+Segmentation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;0;1", "aff_unique_norm": "Shanghai Jiao Tong University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.huawei.com", "aff_unique_abbr": "SJTU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "$\\mathrm{SE}(3)$-Equivariant Attention Networks for Shape Reconstruction in Function Space", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10857", "id": "RDy3IbvjMqT", "poster": "", "openreview": "https://openreview.net/forum?id=RDy3IbvjMqT", "slides": "https://iclr.cc/virtual/2023/poster/10857", "video": "https://iclr.cc/virtual/2023/poster/10857", "author_site": "Evangelos Chatzipantazis, Stefanos Pertigkiozoglou, Edgar Dobriban, Kostas Daniilidis", "tldr": "", "abstract": "We propose a method for 3D shape reconstruction from unoriented point clouds. Our method consists of a novel SE(3)-equivariant coordinate-based network (TF-ONet), that parametrizes the occupancy field of the shape and respects the inherent symmetries of the problem. In contrast to previous shape reconstruction methods that align the input to a regular grid, we operate directly on the irregular point cloud. Our architecture leverages equivariant attention layers that operate on local tokens. This mechanism enables local shape modelling, a crucial property for scalability to large scenes. Given an unoriented, sparse, noisy point cloud as input, we produce equivariant features for each point. These serve as keys and values for the subsequent equivariant cross-attention blocks that parametrize the occupancy field. By querying an arbitrary point in space, we predict its occupancy score. We show that our method outperforms previous SO(3)-equivariant methods, as well as non-equivariant methods trained on SO(3)-augmented datasets. More importantly, local modelling together with SE(3)-equivariance create an ideal setting for SE(3) scene reconstruction. We show that by training only on single, aligned objects and without any pre-segmentation, we can reconstruct novel scenes containing arbitrarily many objects in random poses without any performance loss. \n", "keywords": "shape reconstruction;equivariance;neural fields;attention;3D vision;point clouds", "primary_area": "", "supplementary_material": "/attachment/15904cabcf3e48a1dbf995219bfaad779c8e4c83.zip", "author": "Evangelos Chatzipantazis;Stefanos Pertigkiozoglou;Edgar Dobriban;Kostas Daniilidis", "authorids": "~Evangelos_Chatzipantazis1;~Stefanos_Pertigkiozoglou1;~Edgar_Dobriban2;~Kostas_Daniilidis1", "gender": ";;;M", "homepage": "https://www.grasp.upenn.edu/people/evangelos-chatzipantazis/;https://www.grasp.upenn.edu/people/stefanos-pertigkiozoglou/;https://statistics.wharton.upenn.edu/profile/dobriban/;http://www.cis.upenn.edu/~kostas", "dblp": "306/8423;232/1802;99/11269;d/KostasDaniilidis", "google_scholar": "qQsYhTgAAAAJ;https://scholar.google.gr/citations?user=8Ti0EGEAAAAJ;aGvH4yMAAAAJ;dGs2BcIAAAAJ", "orcid": ";;;0000-0003-0498-0758", "linkedin": ";;edgar-dobriban/;", "or_profile": "~Evangelos_Chatzipantazis1;~Stefanos_Pertigkiozoglou1;~Edgar_Dobriban2;~Kostas_Daniilidis1", "aff": "School of Engineering and Applied Science, University of Pennsylvania;School of Engineering and Applied Science, University of Pennsylvania;The Wharton School, University of Pennsylvania;University of Pennsylvania", "aff_domain": "seas.upenn.edu;seas.upenn.edu;wharton.upenn.edu;upenn.edu", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nchatzipantazis2023mathrmseequivariant,\ntitle={\\${\\textbackslash}mathrm\\{{SE}\\}(3)\\$-Equivariant Attention Networks for Shape Reconstruction in Function Space},\nauthor={Evangelos Chatzipantazis and Stefanos Pertigkiozoglou and Edgar Dobriban and Kostas Daniilidis},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=RDy3IbvjMqT}\n}", "github": "", "project": "", "reviewers": "YvKA;dRvs;i4in;Ut5F", "pdf_size": 10143752, "recommendation": "6;6;6;8", "confidence": "3;4;3;3", "correctness": "4;3;4;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;2;3;4", "wc_summary_paper": "65;90;52;215", "wc_strength_and_weaknesses": "272;179;56;116", "wc_clarity_quality_novelty_and_reproducibility": "32;48;52;114", "wc_summary_review": "83;157;25;71", "wc_review": "452;474;185;516", "wc_reply_reviewers": "210;141;0;0", "wc_reply_authors": "1215;1077;395;256", "reply_reviewers": "1;1;0;0", "reply_authors": "3;3;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 105.5, 64.6780488264759 ], "wc_strength_and_weaknesses_avg": [ 155.75, 79.97616832532051 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.5, 31.22098653149833 ], "wc_summary_review_avg": [ 84.0, 47.38143096192854 ], "wc_review_avg": [ 406.75, 130.07569911401592 ], "wc_reply_reviewers_avg": [ 87.75, 91.07791993672231 ], "wc_reply_authors_avg": [ 735.75, 416.053707470562 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11062561852440343472&as_sdt=5,30&sciodt=0,30&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=RDy3IbvjMqT", "email": "seas.upenn.edu;seas.upenn.edu;wharton.upenn.edu;upenn.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "School of Engineering and Applied Science", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "RHWAEeEYmwW", "title": "Conservative Exploration in Linear MDPs under Episode-wise Constraints", "track": "main", "status": "Withdraw", "tldr": "We studied conservative exploration with offline dataset during online learning for Linear MDPs and prove that the regret of our algorithm matches the constraint-free counterpart.", "abstract": "This paper investigates conservative exploration in reinforcement learning where the performance of the learning agent is guaranteed to above certain threshold throughout the learning process. It focuses on the episodic linear Markov Decision Process (MDP) setting where the transition kernels and the reward functions are assumed to be linear. With the knowledge of an existing safe baseline policy, two algorithms based on Least-Squares Value Iteration (LSVI) (Bradtke and Barto, 1996; Osband et al., 2016), coined StepMix-LSVI and EpsMix-LSVI, are proposed to balance the exploitation and exploration while ensuring that the conservative constraint is never violated in each episode with high probability. Theoretical analysis shows that both algorithms achieve the same regret order as LSVI-UCB, their constraint-free counterpart from Jin et al. (2020), indicating that obeying the stringent episode-wise conservative constraint does not compromise the learning performance of these algorithms. We further extend the analysis to the setting where the baseline policy is not given a priori but must be learned from an offline dataset, and prove that similar safety guarantee and regret can be achieved if the offline dataset is sufficiently large. Experiment results corroborate the theoretical analysis and demonstrate the effectiveness of the proposed conservative exploration strategies.", "keywords": "Conservative Exploration;Sample Complexity;Linear MDP;Offline and Online RL", "primary_area": "", "supplementary_material": "/attachment/6513d84e1ce1e66db5bad25b3e6d8ada5fd8094c.zip", "author": "Ruiquan Huang;Donghao Li;Cong Shen;Ashley Prater-Bennette;Jing Yang", "authorids": "~Ruiquan_Huang1;~Donghao_Li3;~Cong_Shen1;~Ashley_Prater-Bennette1;~Jing_Yang3", "gender": "M;;M;F;", "homepage": ";;https://cshen317.github.io/;;http://www.ee.psu.edu/yang", "dblp": "304/8880;;79/6027-1.html;158/9018;", "google_scholar": "0eo3JGgAAAAJ;4vygbUIAAAAJ;70LBhKcAAAAJ;f1WPBE8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-3148-4453;;", "linkedin": "ruiquan-huang-369543185/;;cong-shen-3372404/;;", "or_profile": "~Ruiquan_Huang1;~Donghao_Li3;~Cong_Shen1;~Ashley_Prater-Bennette1;~Jing_Yang3", "aff": "Pennsylvania State University;Pennsylvania State University;University of Virginia;Air Force Research Laboratory;Pennsylvania State University", "aff_domain": "psu.edu;psu.edu;virginia.edu;us.af.mil;psu.edu", "position": "PhD student;PhD student;Assistant Professor;Principal Researcher;Associate Professor", "bibtex": "@misc{\nhuang2023conservative,\ntitle={Conservative Exploration in Linear {MDP}s under Episode-wise Constraints},\nauthor={Ruiquan Huang and Donghao Li and Cong Shen and Ashley Prater-Bennette and Jing Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=RHWAEeEYmwW}\n}", "github": "", "project": "", "reviewers": "4C37;94rF;iCy5;ejMP", "site": "https://openreview.net/forum?id=RHWAEeEYmwW", "pdf_size": 2302867, "recommendation": "5;5;6;6", "confidence": "3;4;3;2", "correctness": "3;3;4;3", "technical_novelty": "3;3;2;4", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "78;144;57;96", "wc_strength_and_weaknesses": "201;717;73;343", "wc_clarity_quality_novelty_and_reproducibility": "18;2;114;256", "wc_summary_review": "44;41;78;85", "wc_review": "341;904;322;780", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 93.75, 32.12767498590584 ], "wc_strength_and_weaknesses_avg": [ 333.5, 241.13222513799354 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 97.5, 101.03835905239158 ], "wc_summary_review_avg": [ 62.0, 19.685019685029527 ], "wc_review_avg": [ 586.75, 259.07467552811875 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.7071067811865475, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FtjONbf2mloJ:scholar.google.com/&scioq=Conservative+Exploration+in+Linear+MDPs+under+Episode-wise+Constraints&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Pennsylvania State University;University of Virginia;Air Force Research Laboratory", "aff_unique_dep": ";;", "aff_unique_url": "https://www.psu.edu;https://www.virginia.edu;https://www.afrl.af.mil/", "aff_unique_abbr": "PSU;UVA;AFRL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "RHsOd1Aineq", "title": "Learning to Boost Resilience of Complex Networks via Neural Edge Rewiring", "track": "main", "status": "Reject", "tldr": "We develop an inductive network resilience optimization method with the proposed topology-inspired FireGNN for learning inductive neural edge rewiring to boost resilience of complex networks without rich features.", "abstract": "The resilience of complex networks, a critical structural characteristic in network science, measures the network's ability to withstand noise corruption and structural changes. Improving resilience typically resorts to minimal modifications of the network structure via degree-preserving edge rewiring-based methods. Despite their effectiveness, existing methods are learning-free, sharing the limitation of transduction: a learned edge rewiring strategy from one graph cannot be generalized to another. Such a limitation cannot be trivially addressed by existing graph neural networks (GNNs)-based approaches since there is no rich initial node features for GNNs to learn meaningful representations. However, neural edge rewiring relies on GNNs for obtaining meaningful representations from pure graph topologies to select edges. We found existing GNNs degenerate remarkably with only pure topologies on the resilience task, leading to the undesired infinite action backtracking. In this work, inspired by persistent homology, we specifically design a variant of GNN called FireGNN for learning inductive edge rewiring strategies. Based on meaningful representations from FireGNN, we develop the first end-to-end inductive method, ResiNet, to discover $\\textbf{resi}$lient $\\textbf{net}$work topologies while balancing network utility. ResiNet reformulates network resilience optimization as a Markov decision process equipped with edge rewiring action space and learns to select correct edges successively. Extensive experiments demonstrate that ResiNet achieves a near-optimal resilience gain on various graphs while balancing the utility and outperforms existing approaches by a large margin.", "keywords": "complex networks;network resilience;network robustness;graph neural networks", "primary_area": "", "supplementary_material": "", "author": "Shanchao Yang;MA KAILI;Tianshu Yu;Baoxiang Wang;Hongyuan Zha", "authorids": "~Shanchao_Yang1;~MA_KAILI1;~Tianshu_Yu2;~Baoxiang_Wang1;~Hongyuan_Zha1", "gender": "M;F;M;;", "homepage": "https://yangysc.github.io/;;https://mypage.cuhk.edu.cn/academics/yutianshu/;;", "dblp": ";200/0854-1.html;152/6675;;z/HongyuanZha", "google_scholar": "gakMZhcAAAAJ;;MTHO7DsAAAAJ;;n1DQMIsAAAAJ", "orcid": ";;0000-0002-6537-1924;;", "linkedin": ";;;;", "or_profile": "~Shanchao_Yang1;~MA_KAILI1;~Tianshu_Yu2;~Baoxiang_Wang1;~Hongyuan_Zha1", "aff": "The Chinese University of Hong Kong, Shenzhen;Department of Computer Science and Engineering, The Chinese University of Hong Kong;Chinese University of Hong Kong (Shenzhen);;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "cuhk.edu.cn;cse.cuhk.edu.hk;cuhk.edu.cn;;cuhk.edu.cn", "position": "PhD student;PhD student;Assistant Professor;;Full Professor", "bibtex": "@misc{\nyang2023learning,\ntitle={Learning to Boost Resilience of Complex Networks via Neural Edge Rewiring},\nauthor={Shanchao Yang and MA KAILI and Tianshu Yu and Baoxiang Wang and Hongyuan Zha},\nyear={2023},\nurl={https://openreview.net/forum?id=RHsOd1Aineq}\n}", "github": "", "project": "", "reviewers": "aMne;CVfp;6mo7;sxfB", "site": "https://openreview.net/forum?id=RHsOd1Aineq", "pdf_size": 929156, "recommendation": "3;3;5;8", "confidence": "3;5;4;2", "correctness": "3;3;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "96;109;91;96", "wc_strength_and_weaknesses": "278;249;439;36", "wc_clarity_quality_novelty_and_reproducibility": "119;80;24;32", "wc_summary_review": "40;68;38;72", "wc_review": "533;506;592;236", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 98.0, 6.670832032063167 ], "wc_strength_and_weaknesses_avg": [ 250.5, 143.44075432038136 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 63.75, 38.42118556213486 ], "wc_summary_review_avg": [ 54.5, 15.580436450882884 ], "wc_review_avg": [ 466.75, 136.8052904678763 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.7102658741205989, "corr_recommendation_correctness": 0.9169493006161777, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8354487890012943630&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.cn", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Shenzhen;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "RIJM-pJF_3K", "title": "Causally Constrained Data Synthesis For Private Data Release", "track": "main", "status": "Reject", "tldr": "", "abstract": "Data privacy is critical in many decision-making contexts, such as healthcare and finance. A common mechanism is to create differentially private synthetic data using generative models. Such data generation reflects certain statistical properties of the original data, but often has an unacceptable privacy vs. utility trade-off. Since natural data inherently exhibits causal structure, we propose incorporating \\emph{causal information} into the training process to favorably navigate the aforementioned trade-off. Under certain assumptions for linear gaussian models and a broader class of models, we theoretically prove that causally informed generative models provide better differential privacy guarantees than their non-causal counterparts. We evaluate our proposal using variational autoencoders, and demonstrate that the trade-off is mitigated through better utility for comparable privacy.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Varun Chandrasekaran;Darren Edge;Somesh Jha;Lukas Wutschitz;Amit Sharma;Cheng Zhang;Shruti Tople", "authorids": "~Varun_Chandrasekaran1;darren.edge@microsoft.com;~Somesh_Jha1;~Lukas_Wutschitz1;~Amit_Sharma3;~Cheng_Zhang1;~Shruti_Tople2", "gender": "M;;M;M;M;F;", "homepage": "http://pages.cs.wisc.edu/~chandrasekaran/;;;;http://amitsharma.in/;http://cheng-zhang.org;", "dblp": ";;j/SomeshJha;263/8844;72/2540-7;82/6384-5;", "google_scholar": "Sl7nSOsAAAAJ;;BaI7l8QAAAAJ;;https://scholar.google.co.in/citations?user=CXgQufgAAAAJ;r40iAwIAAAAJ;", "orcid": ";;;0000-0003-4321-6509;0000-0002-2086-3191;;", "linkedin": ";;;;;;", "or_profile": "~Varun_Chandrasekaran1;darren.edge@microsoft.com;~Somesh_Jha1;~Lukas_Wutschitz1;~Amit_Sharma3;~Cheng_Zhang1;~Shruti_Tople2", "aff": "Microsoft;;Department of Computer Science, University of Wisconsin, Madison;Microsoft;Microsoft Research;Microsoft;", "aff_domain": "microsoft.com;;cs.wisc.edu;microsoft.com;microsoft.com;microsoft.com;", "position": "Postdoc;;Full Professor;Researcher;Principal Researcher;Principal Researcher;", "bibtex": "@misc{\nchandrasekaran2023causally,\ntitle={Causally Constrained Data Synthesis For Private Data Release},\nauthor={Varun Chandrasekaran and Darren Edge and Somesh Jha and Lukas Wutschitz and Amit Sharma and Cheng Zhang and Shruti Tople},\nyear={2023},\nurl={https://openreview.net/forum?id=RIJM-pJF_3K}\n}", "github": "", "project": "", "reviewers": "2NXW;3noA;PkKv", "site": "https://openreview.net/forum?id=RIJM-pJF_3K", "pdf_size": 1765503, "recommendation": "3;3;5", "confidence": "3;2;4", "correctness": "2;3;3", "technical_novelty": "3;2;3", "empirical_novelty": "2;2;2", "wc_summary_paper": "61;23;92", "wc_strength_and_weaknesses": "85;236;254", "wc_clarity_quality_novelty_and_reproducibility": "270;163;90", "wc_summary_review": "37;91;38", "wc_review": "453;513;474", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "406;410;372", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 58.666666666666664, 28.21740991342441 ], "wc_strength_and_weaknesses_avg": [ 191.66666666666666, 75.78185065626789 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 174.33333333333334, 73.92037758441323 ], "wc_summary_review_avg": [ 55.333333333333336, 25.223445883190152 ], "wc_review_avg": [ 480.0, 24.859605789312106 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 396.0, 17.048949136725895 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.8660254037844387, "corr_recommendation_correctness": 0.5, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4518232432533071503&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Microsoft;University of Wisconsin-Madison", "aff_unique_dep": "Microsoft Corporation;Department of Computer Science", "aff_unique_url": "https://www.microsoft.com;https://www.wisc.edu", "aff_unique_abbr": "Microsoft;UW-Madison", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "RIcaT3C0wP", "title": "A Simple Unsupervised Data Depth-based Method to Detect Adversarial Images", "track": "main", "status": "Withdraw", "tldr": "We crafted a simple detection method for adversarial samples based on data depths which is especially designed for vision transformers architectures", "abstract": "Deep neural networks suffer from critical vulnerabilities regarding robustness, which limits their exploitation in many real-world applications. In particular, a serious concern is their inability to defend against adversarial attacks. Although the research community has developed a large amount of effective attacks, the detection problem has received little attention. Existing detection methods either rely on additional training or on specific heuristics at the risk of overfitting. Moreover, they have mainly focused on ResNet architectures while transformers, which are state-of-the-art for vision tasks, have not been properly investigated. In this paper, we overcome these limitations by introducing APPROVED, a simple unsupervised detection method for transformer architectures. It leverages the information available in the logit layer and computes a similarity score with respect to the training distribution. This is accomplished using a data depth that is: (i) computationally efficient; and (ii) non-differentiable, making it harder for gradient-based adversaries to craft malicious samples. Our extensive experiments show that APPROVED consistently outperforms previous detectors on CIFAR10, CIFAR100 and Tiny ImageNet.", "keywords": "Adversarial attacks;Detection;Vision transformers;Safety AI", "primary_area": "", "supplementary_material": "/attachment/cb246ae9e49a0b3a15f4ddc9461c698a22535ac5.zip", "author": "Marine Picot;Guillaume Staerman;Federica Granese;Nathan Noiry;Francisco Messina;Pablo Piantanida;Pierre Colombo", "authorids": "~Marine_Picot2;~Guillaume_Staerman1;~Federica_Granese1;~Nathan_Noiry1;~Francisco_Messina1;~Pablo_Piantanida2;~Pierre_Colombo2", "gender": "Not Specified;F;M;M;M;M;F", "homepage": "https://guillaumestaermanml.github.io/;https://fgranese.github.io/;https://noiry.perso.math.cnrs.fr/;;https://www.pablo-piantanida.org;https://pierrecolombo.github.io/;", "dblp": ";251/6090;;;44/1416;;", "google_scholar": "Zb2ax0wAAAAJ;https://scholar.google.ca/citations?hl=it;;pJ4zRlgAAAAJ;https://scholar.google.fr/citations?user=QyBEFv0AAAAJ;yPoMt8gAAAAJ;", "orcid": ";0000-0002-0084-521X;;;;;", "linkedin": ";federica-granese-201b311a0/;;;pablo-piantanida-60a51bb5/?locale=en_US;;marine-p-170b75100/", "or_profile": "~Guillaume_Staerman1;~Federica_Granese1;~Nathan_Noiry1;~Francisco_Messina1;~Pablo_Piantanida2;~Pierre_Colombo2;~Marine_PICOT1", "aff": "INRIA;\u00c9cole Polytechnique;;;Mila - Quebec AI Institute ;CentraleSupelec;", "aff_domain": "inria.fr;polytechnique.edu;;;mila.quebec;centralesupelec.fr;", "position": "Postdoc;PhD student;;;Full Professor;Assistant Professor;", "bibtex": "@misc{\npicot2023a,\ntitle={A Simple Unsupervised Data Depth-based Method to Detect Adversarial Images},\nauthor={Marine Picot and Guillaume Staerman and Federica Granese and Nathan Noiry and Francisco Messina and Pablo Piantanida and Pierre Colombo},\nyear={2023},\nurl={https://openreview.net/forum?id=RIcaT3C0wP}\n}", "github": "", "project": "", "reviewers": "m8Sa;4Dqi;5MMQ;VRvw", "site": "https://openreview.net/forum?id=RIcaT3C0wP", "pdf_size": 745267, "recommendation": "1;3;3;8", "confidence": "5;4;4;4", "correctness": "1;2;2;4", "technical_novelty": "1;2;2;3", "empirical_novelty": "1;2;2;4", "wc_summary_paper": "47;57;65;104", "wc_strength_and_weaknesses": "282;352;895;183", "wc_clarity_quality_novelty_and_reproducibility": "35;59;10;12", "wc_summary_review": "48;77;68;79", "wc_review": "412;545;1038;378", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.75, 2.5860201081971503 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.25, 1.0897247358851685 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_paper_avg": [ 68.25, 21.602951187279945 ], "wc_strength_and_weaknesses_avg": [ 428.0, 276.2272615076217 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.0, 19.912307751739878 ], "wc_summary_review_avg": [ 68.0, 12.267844146385297 ], "wc_review_avg": [ 593.25, 264.251182589596 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.6139601294045424, "corr_recommendation_correctness": 0.9980305249223754, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2117248454146797991&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "INRIA;Ecole Polytechnique;Quebec AI Institute;CentraleSup\u00e9lec", "aff_unique_dep": ";;AI Institute;", "aff_unique_url": "https://www.inria.fr;https://www.polytechnique.edu;https://mila.quebec;https://www.centralesupelec.fr", "aff_unique_abbr": "INRIA;X;Mila;CS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "France;Canada" }, { "id": "RKMbC8Tslx", "title": "A GENERAL SCENARIO-AGNOSTIC REINFORCEMENT LEARNING FOR TRAFFIC SIGNAL CONTROL", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Reinforcement learning has been recently adopted to revolutionize and optimize traditional traffic signal control systems. Existing methods are either based on a single scenario or multiple independent scenarios, where each scenario has a separate simulation environment with predefined road network topology and traffic signal settings. These models implement training and testing in the same scenario, thus being strictly tied up with the specific setting and sacrificing model generalization heavily. While a few recent models could be trained by multiple scenarios, they require a huge amount of manual labor to label the intersection structure, hindering the model\u2019s generalization. In this work, we aim at a general framework that could eliminate heavy labeling and model a variety of scenarios simultaneously. To this end, we propose a GEneral Scenario-Agnostic (GESA) reinforcement learning framework for traffic signal control with: (1) A general plug-in module to map all different intersections into a unified structure, freeing us from the heavy manual labor to specify the structure of intersections; (2) A unified state and action space to keep the model input and output consistently structured; (3) A large-scale co-training with multiple scenarios, leading to a generic traffic signal control algorithm. In experiments, we demonstrate our algorithm as the first one that can be co-trained with seven different scenarios without manual annotation, and get 17.20% higher rewards than benchmarks. When dealing with a new scenario, our model can still achieve 10.36% higher rewards. The code and scenarios will be released upon acceptance.", "keywords": "reinforcement learning;model generalizability;traffic signal control;smart mobility", "primary_area": "", "supplementary_material": "/attachment/da2463e84b9e7553f6b8b9b103f35d3f5ba34d5a.zip", "author": "Haoyuan Jiang;Ziyue Li;LEI BAI;zhishuai Li;Rui Zhao", "authorids": "~Haoyuan_Jiang1;~Ziyue_Li2;~LEI_BAI1;~zhishuai_Li1;~Rui_Zhao6", "gender": "M;;M;M;M", "homepage": ";https://bonaldli.github.io/;http://leibai.site/;;http://zhaorui.xyz/", "dblp": "198/7688;189/5871-2;119/1223-1;234/3635.html;26/2578-1", "google_scholar": ";q5_My2AAAAAJ;https://scholar.google.com.au/citations?user=sakOO04AAAAJ;KN7DA0sAAAAJ;1c9oQNMAAAAJ", "orcid": ";0000-0003-4983-9352;0000-0003-3378-7201;;", "linkedin": ";;lei-bai-641370153/;;", "or_profile": "~Haoyuan_Jiang1;~Ziyue_Li2;~LEI_BAI1;~zhishuai_Li1;~Rui_Zhao6", "aff": "Sensetime;EWI gGmbH;Shanghai AI Laboratory;SenseTime Research;SenseTime Research", "aff_domain": "sensetime.com;ewi.uni-koeln.de;pjlab.org.cn;sentime.com;sensetime.com", "position": "Researcher;Principal Researcher;Researcher;Researcher;Researcher", "bibtex": "@misc{\njiang2023a,\ntitle={A {GENERAL} {SCENARIO}-{AGNOSTIC} {REINFORCEMENT} {LEARNING} {FOR} {TRAFFIC} {SIGNAL} {CONTROL}},\nauthor={Haoyuan Jiang and Ziyue Li and LEI BAI and zhishuai Li and Rui Zhao},\nyear={2023},\nurl={https://openreview.net/forum?id=RKMbC8Tslx}\n}", "github": "", "project": "", "reviewers": "rkV5;Su28;ZsuU;bTdE", "site": "https://openreview.net/forum?id=RKMbC8Tslx", "pdf_size": 7224996, "recommendation": "6;6;6;6", "confidence": "3;4;2;3", "correctness": "3;3;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "61;76;45;100", "wc_strength_and_weaknesses": "92;65;68;478", "wc_clarity_quality_novelty_and_reproducibility": "18;42;14;69", "wc_summary_review": "46;36;24;77", "wc_review": "217;219;151;724", "wc_reply_reviewers": "0;0;35;48", "wc_reply_authors": "1296;1064;873;2460", "reply_reviewers": "0;0;1;1", "reply_authors": "3;3;3;6", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 70.5, 20.254629100529094 ], "wc_strength_and_weaknesses_avg": [ 175.75, 174.8175834977706 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.75, 21.98152633462927 ], "wc_summary_review_avg": [ 45.75, 19.651653874419832 ], "wc_review_avg": [ 327.75, 230.40548496075348 ], "wc_reply_reviewers_avg": [ 20.75, 21.25294097295713 ], "wc_reply_authors_avg": [ 1423.25, 617.0248678132835 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.75, 1.299038105676658 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6658474912523525684&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "SenseTime;EWI gGmbH;Shanghai AI Laboratory", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sensetime.com;;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "SenseTime;;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;Germany" }, { "id": "RKiWwhocuiU", "title": "Domain Generalization with Small Data", "track": "main", "status": "Reject", "tldr": " A novel domain generalization method in the context of insufficient data is proposed in this work", "abstract": "In this work, we propose to tackle the problem of domain generalization in the context of insufficient samples. Instead of extracting latent feature embeddings based on deterministic models, we propose to learn a domain-invariant representation based on the probabilistic framework by mapping each data point into probabilistic embeddings. Specifically, we first extend empirical maximum mean discrepancy (MMD) to a novel probabilistic MMD that can measure the discrepancy between mixture distributions (i.e., source domains) consisted of a serial of latent distributions rather than latent points. Moreover, instead of imposing the contrastive semantic alignment (CSA) loss based on pairs of latent points, a novel probabilistic CSA loss encourages positive probabilistic embedding pairs to be closer while pulling other negative ones apart. Benefiting from the learned representation captured by probabilistic models, our proposed method can marriage the measurement on the distribution over distributions (i.e., the global perspective alignment) and the distribution-based contrastive semantic alignment (i.e., the local perspective alignment). Extensive experimental results on three challenging medical datasets show the effectiveness of our proposed method in the context of insufficient data compared with state-of-the-art baseline methods.", "keywords": "domain generalization;small data;healthcare;medical image", "primary_area": "", "supplementary_material": "", "author": "Kecheng Chen;Elena Gal;Hong Yan;Haoliang Li", "authorids": "~Kecheng_Chen1;~Elena_Gal1;~Hong_Yan2;~Haoliang_Li2", "gender": "M;;;", "homepage": "https://tonyckc.github.io;;;", "dblp": "244/8268;;;", "google_scholar": "xE3hzToAAAAJ;;;", "orcid": "0000-0001-6657-3221;;;", "linkedin": ";;;", "or_profile": "~Kecheng_Chen1;~Elena_Gal1;~Hong_Yan2;~Haoliang_Li2", "aff": "City University of Hong Kong;;;", "aff_domain": "cityu.edu.hk;;;", "position": "PhD student;;;", "bibtex": "@misc{\nchen2023domain,\ntitle={Domain Generalization with Small Data},\nauthor={Kecheng Chen and Elena Gal and Hong Yan and Haoliang Li},\nyear={2023},\nurl={https://openreview.net/forum?id=RKiWwhocuiU}\n}", "github": "", "project": "", "reviewers": "BRdo;6Mag;RmYa;s3JK", "site": "https://openreview.net/forum?id=RKiWwhocuiU", "pdf_size": 1343971, "recommendation": "5;5;6;8", "confidence": "4;3;2;5", "correctness": "3;2;3;4", "technical_novelty": "3;3;3;4", "empirical_novelty": "3;2;3;4", "wc_summary_paper": "114;82;33;43", "wc_strength_and_weaknesses": "478;232;75;108", "wc_clarity_quality_novelty_and_reproducibility": "185;60;29;28", "wc_summary_review": "82;116;34;30", "wc_review": "859;490;171;209", "wc_reply_reviewers": "22;0;0;0", "wc_reply_authors": "3324;2200;641;184", "reply_reviewers": "1;0;0;0", "reply_authors": "7;4;1;1", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 68.0, 32.25678223257862 ], "wc_strength_and_weaknesses_avg": [ 223.25, 158.29936039036923 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 75.5, 64.5155020130821 ], "wc_summary_review_avg": [ 65.5, 35.61951712193752 ], "wc_review_avg": [ 432.25, 275.4735695125759 ], "wc_reply_reviewers_avg": [ 5.5, 9.526279441628825 ], "wc_reply_authors_avg": [ 1587.25, 1250.6261181904047 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 2.48746859276655 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5477225575051661, "corr_recommendation_correctness": 0.8660254037844386, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13058502528219645048&as_sdt=800005&sciodt=0,15&hl=en", "gs_version_total": 10, "aff_unique_index": "0", "aff_unique_norm": "City University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cityu.edu.hk", "aff_unique_abbr": "CityU", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "RMnJxnLwGak", "title": "VQ-TR: Vector Quantized Attention for Time Series Forecasting", "track": "main", "status": "Reject", "tldr": "A linear transformer using a vector quantized cross attention block for time series forecasting.", "abstract": "Modern time series datasets can easily contain hundreds or thousands of temporal time points, however, Transformer based models scale poorly to the size of the sequence length constraining their context size in the seq-to-seq setting. In this work, we introduce VQ-TR which maps large sequences to a discrete set of latents representations as part of the Attention module. This allows us to attend over larger context windows with linear complexity with respect to the sequence length. We compare this method with other competitive deep learning and classical univariate probabilistic models and highlight its performance using both probabilistic and point forecasting metrics on a variety of open datasets from different domains.", "keywords": "deep learning;time series forecasting;latent variable models;transformer", "primary_area": "", "supplementary_material": "", "author": "Kashif Rasul;Umang Gupta;Hena Ghonia;Yuriy Nevmyvaka", "authorids": "~Kashif_Rasul1;~Umang_Gupta1;~Hena_Ghonia1;~Yuriy_Nevmyvaka1", "gender": ";M;F;", "homepage": ";https://umgupta.github.io;https://hstellar.github.io/;", "dblp": "80/5769;g/UmangGupta;;92/1859", "google_scholar": "cfIrwmAAAAAJ;qxFVK6UAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": ";;hena-ghonia-0876aa129/;", "or_profile": "~Kashif_Rasul1;~Umang_Gupta1;~Hena_Ghonia1;~Yuriy_Nevmyvaka1", "aff": "Zalando SE;University of Southern California;Universit\u00e9 de Montr\u00e9al;Morgan Stanley", "aff_domain": "zalando.de;usc.edu;umontreal.ca;morganstanley.com", "position": "Researcher;PhD student;MS student;Principal Researcher", "bibtex": "@misc{\nrasul2023vqtr,\ntitle={{VQ}-{TR}: Vector Quantized Attention for Time Series Forecasting},\nauthor={Kashif Rasul and Umang Gupta and Hena Ghonia and Yuriy Nevmyvaka},\nyear={2023},\nurl={https://openreview.net/forum?id=RMnJxnLwGak}\n}", "github": "", "project": "", "reviewers": "ZX4K;PERd;TMte;miFG", "site": "https://openreview.net/forum?id=RMnJxnLwGak", "pdf_size": 510594, "recommendation": "1;3;5;6", "confidence": "4;3;3;3", "correctness": "1;3;3;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "182;95;41;68", "wc_strength_and_weaknesses": "1082;292;157;150", "wc_clarity_quality_novelty_and_reproducibility": "75;132;32;69", "wc_summary_review": "51;100;25;32", "wc_review": "1390;619;255;319", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;164;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;1;0;0", "recommendation_avg": [ 3.75, 1.920286436967152 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 96.5, 52.92683629313205 ], "wc_strength_and_weaknesses_avg": [ 420.25, 386.23074385657077 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 77.0, 35.770099245039845 ], "wc_summary_review_avg": [ 52.0, 29.300170647967224 ], "wc_review_avg": [ 645.75, 451.1304550792376 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 41.0, 71.01408311032397 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0.25, 0.4330127018922193 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8268106308031117, "corr_recommendation_correctness": 0.8268106308031117, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11585685520974964496&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Zalando SE;University of Southern California;Universit\u00e9 de Montr\u00e9al;Morgan Stanley", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zalando.de;https://www.usc.edu;https://www.umontreal.ca;https://www.morganstanley.com", "aff_unique_abbr": "Zalando;USC;UdeM;Morgan Stanley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "Germany;United States;Canada" }, { "id": "RN4iVt9ndGa", "title": "Active Learning based Structural Inference", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this paper, we propose an active-learning based framework, Active Learning based Structural Inference (ALaSI), to infer the existence of directed connections from observed agents' states over a time period in a dynamical system. With the help of deep active learning, ALaSI is competent in learning the representation of connections with relatively small pool of prior knowledge. Moreover, based on information theory, we propose inter- and out-of-scope message learning pipelines, which are remarkably beneficial to the structural inference for large dynamical systems. We evaluate ALaSI on various large datasets including simulated systems and real-world networks, to demonstrate that ALaSI is able to precisely infer the existence of connections in these systems under either supervised learning or unsupervised learning, with better performance than baseline methods.", "keywords": "Structural Inference;Active Learning;Mutual Information;Deep Learning", "primary_area": "", "supplementary_material": "/attachment/20747e282f75dddbc1d09921e10c17eb5b7c9ca3.zip", "author": "Aoran Wang;Jun Pang", "authorids": "~Aoran_Wang1;~Jun_Pang1", "gender": ";M", "homepage": ";https://satoss.uni.lu/members/jun/", "dblp": ";p/JunPang", "google_scholar": ";0JjOM_gAAAAJ", "orcid": ";0000-0002-4521-4112", "linkedin": ";", "or_profile": "~Aoran_Wang1;~Jun_Pang1", "aff": ";University of Luxembourg", "aff_domain": ";uni.lu", "position": ";Principal Researcher", "bibtex": "@misc{\nwang2023active,\ntitle={Active Learning based Structural Inference},\nauthor={Aoran Wang and Jun Pang},\nyear={2023},\nurl={https://openreview.net/forum?id=RN4iVt9ndGa}\n}", "github": "", "project": "", "reviewers": "Sh3s;o2om;5BCf", "site": "https://openreview.net/forum?id=RN4iVt9ndGa", "pdf_size": 714665, "recommendation": "3;6;6", "confidence": "3;3;3", "correctness": "3;3;4", "technical_novelty": "2;2;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "100;58;71", "wc_strength_and_weaknesses": "276;91;44", "wc_clarity_quality_novelty_and_reproducibility": "135;40;45", "wc_summary_review": "14;87;58", "wc_review": "525;276;218", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "2376;826;432", "reply_reviewers": "0;0;0", "reply_authors": "5;2;2", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 76.33333333333333, 17.556258776351587 ], "wc_strength_and_weaknesses_avg": [ 137.0, 100.14323075808302 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.33333333333333, 43.65266951236265 ], "wc_summary_review_avg": [ 53.0, 30.011109054259666 ], "wc_review_avg": [ 339.6666666666667, 133.17240288020972 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1211.3333333333333, 839.1048140065036 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6886385906052571916&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0", "aff_unique_norm": "University of Luxembourg", "aff_unique_dep": "", "aff_unique_url": "https://wwwen.uniluxembourg.lu", "aff_unique_abbr": "Uni Lu", "aff_country_unique_index": "0", "aff_country_unique": "Luxembourg" }, { "id": "ROHDcdx8g6n", "title": "Efficient Controllable Generation with Guarantee", "track": "main", "status": "Withdraw", "tldr": "We propose a general method with theoretical guarantee and integrate it with NVAE for controllable generation.", "abstract": "Generative models have achieved great success in image synthesis, and controllability of the generative process is a key requirement for their successful adoption in real-world applications. Most existing methods for controllable generation lack theoretical guarantees and are time-consuming, which weakens their reliability and applicability. In this paper, we propose an identifiability theorem to provide a guarantee of controllability. This theorem ensures that semantic attributes can be disentangled and hence independently controlled by orthogonalization in latent space in a supervised manner. Based on the theoretical analysis, we propose a general method for controllable generation, which can be integrated with most latent-variable generative models. We further propose to plug it into a pre-trained NVAE. Such a scheme significantly reduces the cost of time and has better consistency in image editing due to the merits of NVAE. Experiments show that our method is comparable with the state-of-the-art methods in attribute-conditional generation and image editing, and has advantages in efficiency and consistency.", "keywords": "controllable generation;variational autoencoder;identifiability", "primary_area": "", "supplementary_material": "", "author": "Xiaojiang Yang;Jiacheng Sun;Xing Zhang;Zhenguo Li;Junchi Yan", "authorids": "~Xiaojiang_Yang1;~Jiacheng_Sun1;~Xing_Zhang6;~Zhenguo_Li1;~Junchi_Yan2", "gender": "M;M;;M;M", "homepage": "https://thinklab.sjtu.edu.cn/;;;http://www.ee.columbia.edu/~zgli/;http://thinklab.sjtu.edu.cn/", "dblp": ";165/5350;;23/6479;60/7949.html", "google_scholar": ";;5HlbQhkAAAAJ;XboZC1AAAAAJ;ga230VoAAAAJ", "orcid": ";;;;0000-0001-9639-7679", "linkedin": ";https://www.linkedin.cn/incareer/in/jiacheng-sun-ab622b131;;;", "or_profile": "~Xiaojiang_Yang1;~Jiacheng_Sun1;~Xing_Zhang6;~Zhenguo_Li1;~Junchi_Yan1", "aff": "Shanghai Jiaotong University;Huawei Noah's Ark Lab;Huawei Technologies Ltd.;Huawei Noah's Ark Lab;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;huawei.com;huawei.com;huawei.com;sjtu.edu.cn", "position": "PhD student;Senior Researcher;AI Researcher;Principal Researcher;Associate Professor", "bibtex": "@misc{\nyang2023efficient,\ntitle={Efficient Controllable Generation with Guarantee},\nauthor={Xiaojiang Yang and Jiacheng Sun and Xing Zhang and Zhenguo Li and Junchi Yan},\nyear={2023},\nurl={https://openreview.net/forum?id=ROHDcdx8g6n}\n}", "github": "", "project": "", "reviewers": "Ym9t;NofJ;tC75", "site": "https://openreview.net/forum?id=ROHDcdx8g6n", "pdf_size": 689339, "recommendation": "1;5;5", "confidence": "4;4;4", "correctness": "2;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "63;93;84", "wc_strength_and_weaknesses": "403;90;470", "wc_clarity_quality_novelty_and_reproducibility": "68;22;48", "wc_summary_review": "72;83;49", "wc_review": "606;288;651", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "236;117;173", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 3.6666666666666665, 1.8856180831641267 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 80.0, 12.569805089976535 ], "wc_strength_and_weaknesses_avg": [ 321.0, 165.61602176923182 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.0, 18.83259585576738 ], "wc_summary_review_avg": [ 68.0, 14.165686240583852 ], "wc_review_avg": [ 515.0, 161.56113394006616 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 175.33333333333334, 48.60955553066587 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5cjZujJWogoJ:scholar.google.com/&scioq=Efficient+Controllable+Generation+with+Guarantee&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Huawei", "aff_unique_dep": ";Noah's Ark Lab", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.huawei.com", "aff_unique_abbr": "SJTU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "RPVgoRFYWHB", "title": "TOWARD RELIABLE NEURAL SPECIFICATIONS", "track": "main", "status": "Reject", "tldr": "We propose a new family of specifications based on neural activation patterns and evaluate its effectiveness through both statistical analysis and formal verification.", "abstract": "Having reliable specifications is an unavoidable challenge in achieving verifiable correctness, robustness, and interpretability of AI systems. Existing specifications for neural networks are in the flavor of \u201cdata as specification\u201d, that is, the local neighborhood centering around a reference input is considered to be correct (or robust). However, our empirical study shows that such specifications fail to certify any test data points, making it impractical for real-world applications. We propose a new family of specifications called \u201cneural representation as specification\u201d, which uses the intrinsic information of neural networks \u2014 neural activation patterns (NAP) rather than input data to specify the correctness and/or robustness of neural network predictions. We present a simple statistical approach to extracting dominant neural activation patterns. We analyze NAPs from a statistical point of view and find that a single NAP can cover a large number of training and testing data points whereas ad hoc data-as-specification can only cover a single training data point and often zero testing data points. To show the effectiveness of discovered NAPs, we formally verify several important properties, such as a particular type of misclassification never happens for a given NAP, and there is no ambiguity among different NAPs. We show that by using NAP, we can verify the prediction of the entire input space, while still recalling 84% of the data. Thus, we argue that using NAPs is a more reliable and extensible specification for neural network verification.", "keywords": "formal verification;specification;neural network verfication;trustworthy AI;interpretability", "primary_area": "", "supplementary_material": "", "author": "Chuqin Geng;Nham Le;Xiaojie Xu;Zhaoyue Wang;Arie Gurfinkel;Xujie Si", "authorids": "~Chuqin_Geng1;~Nham_Le1;~Xiaojie_Xu2;~Zhaoyue_Wang1;~Arie_Gurfinkel1;~Xujie_Si1", "gender": "M;;F;F;M;M", "homepage": "https://allengeng123.github.io/;;;;https://ece.uwaterloo.ca/~agurfink;https://xujie.si", "dblp": "330/9828;;58/4931;;44/3532;142/8449", "google_scholar": ";ybPmA0QAAAAJ;;;https://scholar.google.com.tw/citations?user=WHTB3_MAAAAJ;Ru-jrx4AAAAJ", "orcid": "0000-0002-3563-1596;0000-0003-2800-9392;;;0000-0002-5964-6792;", "linkedin": ";nhamle/;julia-xu-a4293018a/;rebecca-wang-3840-/;arie-gurfinkel-b7049b8/;", "or_profile": "~Chuqin_Geng1;~Nham_Le1;~Xiaojie_Xu2;~Zhaoyue_Wang1;~Arie_Gurfinkel1;~Xujie_Si1", "aff": "McGill University, McGill University;University of Waterloo;McGill University, McGill University;McGill University;University of Waterloo;University of Toronto", "aff_domain": "mail.mcgill.ca;uwaterloo.ca;mail.mcgill.ca;mcgill.ca;uwaterloo.ca;toronto.edu", "position": "PhD student;PhD student;MS student;MS student;Full Professor;Assistant Professor", "bibtex": "@misc{\ngeng2023toward,\ntitle={{TOWARD} {RELIABLE} {NEURAL} {SPECIFICATIONS}},\nauthor={Chuqin Geng and Nham Le and Xiaojie Xu and Zhaoyue Wang and Arie Gurfinkel and Xujie Si},\nyear={2023},\nurl={https://openreview.net/forum?id=RPVgoRFYWHB}\n}", "github": "", "project": "", "reviewers": "rBXu;F47b;epj3;uPcc", "site": "https://openreview.net/forum?id=RPVgoRFYWHB", "pdf_size": 651503, "recommendation": "3;3;5;8", "confidence": "3;4;3;5", "correctness": "3;2;3;4", "technical_novelty": "3;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "43;158;41;66", "wc_strength_and_weaknesses": "208;283;103;177", "wc_clarity_quality_novelty_and_reproducibility": "38;67;76;44", "wc_summary_review": "30;32;26;22", "wc_review": "319;540;246;309", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "375;836;466;312", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 77.0, 47.78598120788146 ], "wc_strength_and_weaknesses_avg": [ 192.75, 64.57698893568823 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.25, 15.722197683530124 ], "wc_summary_review_avg": [ 27.5, 3.840572873934304 ], "wc_review_avg": [ 353.5, 111.25308984473196 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 497.25, 203.09526705465098 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.6998739952495694, "corr_recommendation_correctness": 0.8638684255813602, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1371877400093015514&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;0;0;1;2", "aff_unique_norm": "McGill University;University of Waterloo;University of Toronto", "aff_unique_dep": ";;", "aff_unique_url": "https://www.mcgill.ca;https://uwaterloo.ca;https://www.utoronto.ca", "aff_unique_abbr": "McGill;UW;U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Canada" }, { "id": "RPyemmvfqNF", "title": "Motif-induced Graph Normalization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have emerged as a powerful category of learning architecture for handling graph-structured data in the non-Euclidean domain. Despite their success, existing GNNs typically suffer from the insufficient expressive power bottlenecked by Weisfeiler-Lehman (WL) test, and meanwhile are prone to the over-smoothing situation with increasing layer numbers. In this paper, we strive to strengthen the discriminative capabilities of GNNs by devising a dedicated plug-and-play normalization scheme, termed as Motif-induced Normalization (MotifNorm), that explicitly considers the intra-connection information within each node-induced subgraph. To this end, we embed the motif-induced structural weights at the beginning and the end of the standard BatchNorm, as well as incorporate the graph instance-specific statistics for improved distinguishable capabilities. In the meantime, we provide the theoretical analysis to support that, with the proposed elaborated MotifNorm, an arbitrary GNNs is capable of more expressive abilities than the 1-WL test in distinguishing k-regular graphs. Furthermore, the proposed MotifNorm scheme is also exemplified to be able to alleviate the over-smoothing phenomenon. Experimental results on ten popular benchmarks across all the tasks of the graph-, node-, as well as link-level property predictions, demonstrate the effectiveness of the proposed method. Our code is made available in the supplementary material.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/854e4a987134da029085e4848d447ce7fda46b94.zip", "author": "Kaixuan Chen;Shunyu Liu;Tongtian Zhu;Tongya Zheng;Haofei Zhang;Jie Song;Mingli Song", "authorids": "~Kaixuan_Chen2;~Shunyu_Liu1;~Tongtian_Zhu1;~Tongya_Zheng1;~Haofei_Zhang1;~Jie_Song3;~Mingli_Song1", "gender": ";M;M;;M;M;M", "homepage": "https://liushunyu.github.io/;https://raiden-zhu.github.io;https://doujiang-zheng.github.io/;;https://person.zju.edu.cn/en/NB20021;https://person.zju.edu.cn/msong;https://chenchkx.github.io/", "dblp": "235/0752-1;323/5165;245/8743;270/0826;09/4756-11.html;71/5333;220/5629", "google_scholar": "4U-X6d4AAAAJ;QvBDUsIAAAAJ;Ko2OtGgAAAAJ;7-zeA3UAAAAJ;4OjO-WYAAAAJ;7oLbhAwAAAAJ;", "orcid": "0000-0003-0584-9129;;0000-0003-1190-9773;;0000-0003-3671-6521;0000-0003-2621-6048;0000-0002-2492-5230", "linkedin": ";;;;;;", "or_profile": "~Shunyu_Liu1;~Tongtian_Zhu1;~Tongya_Zheng1;~Haofei_Zhang1;~Jie_Song3;~Mingli_Song1;~Kai-Xuan_Chen1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor;Full Professor;", "bibtex": "@misc{\nchen2023motifinduced,\ntitle={Motif-induced Graph Normalization},\nauthor={Kaixuan Chen and Shunyu Liu and Tongtian Zhu and Tongya Zheng and Haofei Zhang and Jie Song and Mingli Song},\nyear={2023},\nurl={https://openreview.net/forum?id=RPyemmvfqNF}\n}", "github": "", "project": "", "reviewers": "iJ89;N94g;WzdD;BFQU", "site": "https://openreview.net/forum?id=RPyemmvfqNF", "pdf_size": 1865838, "recommendation": "3;3;5;6", "confidence": "4;3;4;3", "correctness": "2;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "60;77;94;30", "wc_strength_and_weaknesses": "633;423;294;175", "wc_clarity_quality_novelty_and_reproducibility": "28;31;102;15", "wc_summary_review": "24;44;13;33", "wc_review": "745;575;503;253", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1767;1804;1169;468", "reply_reviewers": "0;0;0;0", "reply_authors": "4;4;3;2", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 65.25, 23.636571240347024 ], "wc_strength_and_weaknesses_avg": [ 381.25, 169.7592044632632 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.0, 34.022051672408 ], "wc_summary_review_avg": [ 28.5, 11.412712210513327 ], "wc_review_avg": [ 519.0, 176.93501631955164 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1302.0, 543.4781504347714 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.19245008972987526, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QAu3nm97puAJ:scholar.google.com/&scioq=Motif-induced+Graph+Normalization&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Solving Constrained Variational Inequalities via a First-order Interior Point-based Method", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11284", "id": "RQY2AXFMRiu", "poster": "/media/PosterPDFs/ICLR%202023/11284.png?t=1681544342.9892156", "openreview": "https://openreview.net/forum?id=RQY2AXFMRiu", "slides": "https://iclr.cc/virtual/2023/poster/11284", "video": "https://iclr.cc/virtual/2023/poster/11284", "author_site": "Tong Yang, Michael Jordan, Tatjana Chavdarova", "tldr": "We derive a first-order method for solving constrained variational inequality problem when given general constraints, by combining interior-point methods and ADMM.", "abstract": "We develop an interior-point approach to solve constrained variational inequality (cVI) problems. Inspired by the efficacy of the alternating direction method of multipliers (ADMM) method in the single-objective context, we generalize ADMM to derive a first-order method for cVIs, that we refer to as ADMM-based interior-point method for constrained VIs (ACVI). We provide convergence guarantees for ACVI in two general classes of problems: (i) when the operator is $\\xi$-monotone, and (ii) when it is monotone, some constraints are active and the game is not purely rotational. When the operator is in addition L-Lipschitz for the latter case, we match known lower bounds on rates for the gap function of $\\mathcal{O}(1/\\sqrt{K})$ and $\\mathcal{O}(1/K)$ for the last and average iterate, respectively. To the best of our knowledge, this is the first presentation of a first-order interior-point method for the general cVI problem that has a global convergence guarantee. Moreover, unlike previous work in this setting, ACVI provides a means to solve cVIs when the constraints are nontrivial. Empirical analyses demonstrate clear advantages of ACVI over common first-order methods. In particular, (i) cyclical behavior is notably reduced as our methods approach the solution from the analytic center, and (ii) unlike projection-based methods that zigzag when near a constraint, ACVI efficiently handles the constraints.", "keywords": "constrained variational inequality;interior point;admm", "primary_area": "", "supplementary_material": "", "author": "Tong Yang;Michael Jordan;Tatjana Chavdarova", "authorids": "~Tong_Yang4;~Michael_Jordan1;~Tatjana_Chavdarova2", "gender": "F;M;F", "homepage": "https://pptmiao.github.io;http://www.cs.berkeley.edu/~jordan/;https://chavdarova.github.io", "dblp": ";j/MichaelIJordan;160/6038", "google_scholar": ";https://scholar.google.com.tw/citations?user=yxUduqMAAAAJ;", "orcid": ";0000-0001-8935-817X;", "linkedin": ";;", "or_profile": "~Tong_Yang4;~Michael_Jordan1;~Tatjana_Chavdarova2", "aff": "Peking University;University of California, Berkeley;University of California, Berkeley", "aff_domain": "pku.edu.cn;berkeley.edu;berkeley.edu", "position": "MS student;Full Professor;Postdoc", "bibtex": "@inproceedings{\nyang2023solving,\ntitle={Solving Constrained Variational Inequalities via a First-order Interior Point-based Method},\nauthor={Tong Yang and Michael Jordan and Tatjana Chavdarova},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=RQY2AXFMRiu}\n}", "github": "", "project": "", "reviewers": "WuKs;2xGb;P1Mi;9sD3", "pdf_size": 2438884, "recommendation": "6;6;6;8", "confidence": "3;4;3;3", "correctness": "3;3;4;4", "technical_novelty": "2;3;3;4", "empirical_novelty": "3;2;3;0", "wc_summary_paper": "63;93;43;204", "wc_strength_and_weaknesses": "199;290;297;91", "wc_clarity_quality_novelty_and_reproducibility": "26;2;26;119", "wc_summary_review": "14;26;6;45", "wc_review": "302;411;372;459", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "604;892;676;139", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 100.75, 62.21083105054939 ], "wc_strength_and_weaknesses_avg": [ 219.25, 83.5295606357414 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 43.25, 44.81838350498599 ], "wc_summary_review_avg": [ 22.75, 14.686303142724515 ], "wc_review_avg": [ 386.0, 57.458680806297664 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 577.75, 274.58912487569495 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3637057979806414711&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=RQY2AXFMRiu", "email": "pku.edu.cn;berkeley.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Peking University;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.berkeley.edu", "aff_unique_abbr": "Peking U;UC Berkeley", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "Quantifying and Mitigating the Impact of Label Errors on Model Disparity Metrics", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10679", "id": "RUzSobdYy0V", "poster": "", "openreview": "https://openreview.net/forum?id=RUzSobdYy0V", "slides": "https://iclr.cc/virtual/2023/poster/10679", "video": "https://iclr.cc/virtual/2023/poster/10679", "author_site": "Julius Adebayo, Melissa Hall, Bowen Yu, Bobbie Chern", "tldr": "", "abstract": "Errors in labels obtained via human annotation adversely affect a trained model's performance. Existing approaches propose ways to mitigate the effect of label error on a model's downstream accuracy, yet little is known about its impact on a model's group-based disparity metrics\\footnote{Group-based disparity metrics like subgroup calibration, false positive rate, false negative rate, equalized odds, and equal opportunity are more often known, colloquially, as \\textit{fairness metrics} in the literature. We use the term group-based disparity metrics in this work.}. Here we study the effect of label error on a model's group-based disparity metrics like group calibration. We empirically characterize how varying levels of label error, in both training and test data, affect these disparity metrics. We find that group calibration and other metrics are sensitive to train-time and test-time label error---particularly for minority groups. For the same level of label error, the percentage change in group calibration error for the minority group is on average 1.5 times larger than the change for the majority group. Towards mitigating the impact of training-time label error, we present an approach to estimate how changing a single training input's label affects a model's group disparity metric on a test set. We empirically assess the proposed approach on a variety of datasets and find a 10-40\\% improvement, compared to alternative approaches, in identifying training inputs that improve a model's disparity metric. The proposed approach can help surface training inputs that may need to be corrected for improving a model's group-based disparity metrics.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/151652f4d981a49f9dfa81be992839a243893cd1.zip", "author": "Julius Adebayo;Melissa Hall;Bowen Yu;Bobbie Chern", "authorids": "~Julius_Adebayo1;~Melissa_Hall1;~Bowen_Yu2;~Bobbie_Chern1", "gender": "M;F;M;M", "homepage": "https://juliusadebayo.com/;;;", "dblp": "146/1271;287/5067;;", "google_scholar": "y1bnRg4AAAAJ;DcfHZoUAAAAJ;RxPsnDkAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Julius_Adebayo1;~Melissa_Hall1;~Bowen_Yu2;~Bobbie_Chern1", "aff": "Prescient Design / Genentech;Research, Facebook;Meta Facebook;", "aff_domain": "gene.com;research.facebook.com;fb.com;", "position": "Postdoc;Researcher;Researcher;", "bibtex": "@inproceedings{\nadebayo2023quantifying,\ntitle={Quantifying and Mitigating the Impact of Label Errors on Model Disparity Metrics},\nauthor={Julius Adebayo and Melissa Hall and Bowen Yu and Bobbie Chern},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=RUzSobdYy0V}\n}", "github": "", "project": "", "reviewers": "Xn1n;oCK7;DK6U", "pdf_size": 1099144, "recommendation": "5;6;8", "confidence": "4;3;3", "correctness": "3;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "1;3;3", "wc_summary_paper": "73;89;82", "wc_strength_and_weaknesses": "294;124;174", "wc_clarity_quality_novelty_and_reproducibility": "33;21;101", "wc_summary_review": "43;40;44", "wc_review": "443;274;401", "wc_reply_reviewers": "110;0;0", "wc_reply_authors": "1804;541;631", "reply_reviewers": "1;0;0", "reply_authors": "5;3;3", "recommendation_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_paper_avg": [ 81.33333333333333, 6.548960901462833 ], "wc_strength_and_weaknesses_avg": [ 197.33333333333334, 71.336448530109 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.666666666666664, 35.22625283632775 ], "wc_summary_review_avg": [ 42.333333333333336, 1.699673171197595 ], "wc_review_avg": [ 372.6666666666667, 71.84396673037726 ], "wc_reply_reviewers_avg": [ 36.666666666666664, 51.85449728701349 ], "wc_reply_authors_avg": [ 992.0, 575.3451138229993 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.7559289460184545, "corr_recommendation_correctness": 0.944911182523068, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5966222525384649204&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=RUzSobdYy0V", "email": "gene.com;research.facebook.com;fb.com;", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Genentech;Meta", "aff_unique_dep": ";Research", "aff_unique_url": "https://www.gene.com;https://www.facebook.com", "aff_unique_abbr": "Genentech;FB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Diversify and Disambiguate: Out-of-Distribution Robustness via Disagreement", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12231", "id": "RVTOp3MwT3n", "poster": "", "openreview": "https://openreview.net/forum?id=RVTOp3MwT3n", "slides": "https://iclr.cc/virtual/2023/poster/12231", "video": "https://iclr.cc/virtual/2023/poster/12231", "author_site": "Yoonho Lee, Huaxiu Yao, Chelsea Finn", "tldr": "Given underspecified data, (1) find a diverse set of solutions and (2) choose the best one.", "abstract": "Real-world machine learning problems often exhibit shifts between the source and target distributions, in which source data does not fully convey the desired behavior on target inputs. Different functions that achieve near-perfect source accuracy can make differing predictions on test inputs, and such ambiguity makes robustness to distribution shifts challenging. We propose DivDis, a simple two-stage framework for identifying and resolving ambiguity in data. DivDis first learns a diverse set of hypotheses that achieve low source loss but make differing predictions on target inputs. We then disambiguate by selecting one of the discovered functions using additional information, for example, a small number of target labels. Our experimental evaluation shows improved performance in subpopulation shift and domain generalization settings, demonstrating that DivDis can scalably adapt to distribution shifts in image and text classification benchmarks.", "keywords": "Out-of-distribution robustness;spurious correlations;underspecification;ambiguity;ensembles", "primary_area": "", "supplementary_material": "", "author": "Yoonho Lee;Huaxiu Yao;Chelsea Finn", "authorids": "~Yoonho_Lee1;~Huaxiu_Yao1;~Chelsea_Finn1", "gender": "M;M;F", "homepage": "https://yoonholee.com/;http://huaxiuyao.mystrikingly.com;https://ai.stanford.edu/~cbfinn/", "dblp": "213/8086;197/1635;131/1783", "google_scholar": "https://scholar.google.co.kr/citations?user=BAAZ_ysAAAAJ;A20BZnQAAAAJ;vfPE6hgAAAAJ", "orcid": ";;", "linkedin": ";huaxiuyao/;", "or_profile": "~Yoonho_Lee1;~Huaxiu_Yao1;~Chelsea_Finn1", "aff": "Stanford University;Computer Science Department, Stanford University;Google", "aff_domain": "stanford.edu;cs.stanford.edu;google.com", "position": "PhD student;Postdoc;Research Scientist", "bibtex": "@inproceedings{\nlee2023diversify,\ntitle={Diversify and Disambiguate: Out-of-Distribution Robustness via Disagreement},\nauthor={Yoonho Lee and Huaxiu Yao and Chelsea Finn},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=RVTOp3MwT3n}\n}", "github": "", "project": "", "reviewers": "jJSw;X9wJ;kpaT;5h8Y", "pdf_size": 5743274, "recommendation": "6;8;8;8", "confidence": "4;4;5;4", "correctness": "3;4;4;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "112;61;40;123", "wc_strength_and_weaknesses": "447;345;251;63", "wc_clarity_quality_novelty_and_reproducibility": "65;35;19;66", "wc_summary_review": "67;60;25;93", "wc_review": "691;501;335;345", "wc_reply_reviewers": "40;24;99;0", "wc_reply_authors": "951;830;840;266", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;2;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 84.0, 34.532593299664015 ], "wc_strength_and_weaknesses_avg": [ 276.5, 141.41693675087154 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.25, 20.06707502353046 ], "wc_summary_review_avg": [ 61.25, 24.27318479310039 ], "wc_review_avg": [ 468.0, 144.59944674859582 ], "wc_reply_reviewers_avg": [ 40.75, 36.519686471819554 ], "wc_reply_authors_avg": [ 721.75, 267.3783601939394 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 1.0, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16302317351575210961&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=RVTOp3MwT3n", "email": "stanford.edu;cs.stanford.edu;google.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "Stanford;Google", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Stanford;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "RVgssxlEVfl", "title": "Non-Parametric State-Space Models: Identifiability, Estimation and Forecasting", "track": "main", "status": "Reject", "tldr": "Flexible state space model for time series forecasting, inspired by the general structural causal model.", "abstract": "State-space models (SSMs) provide a standard methodology for time series analysis and prediction. While recent works utilize nonlinear functions to parameterize the transition and emission processes to enhance their expressivity, the form of additive noise still limits their applicability in real-world scenarios. In this work, we propose a general formulation of SSMs with a completely non-parametric transition model and a flexible emission model which can account for sensor distortion. Besides, to deal with more general scenarios (e.g., non-stationary time series), we add a higher level model to capture time-varying characteristics of the process.\nInterestingly, we find that even though the proposed model is remarkably flexible, the latent processes are generally identifiable. Given this, we further propose the corresponding estimation procedure and make use of it for the forecasting task. Our model can recover the latent processes and their relations from observed sequential data. Accordingly, the proposed procedure can also be viewed as a method for causal representation learning. We argue that forecasting can benefit from causal representation learning, since the estimated latent variables are generally identifiable. Empirical comparisons on various datasets validate that our model could not only reliably identify the latent processes from the observed data, but also consistently outperform baselines in the forecasting task.", "keywords": "state-space model;time series forecasting;causal representation learning", "primary_area": "", "supplementary_material": "/attachment/99bcb3051378f2f4f4978ef5220719ab45d5d55f.zip", "author": "Chenghao Liu;Weiran Yao;Steven Hoi;Kun Zhang", "authorids": "~Chenghao_Liu1;~Weiran_Yao1;~Steven_Hoi2;~Kun_Zhang1", "gender": "M;M;M;M", "homepage": ";;http://stevenhoi.com;http://www.andrew.cmu.edu/user/kunz1/", "dblp": ";192/3295;;96/3115-1", "google_scholar": "https://scholar.google.com/citations?hl=en;rr_leUAAAAAJ;JoLjflYAAAAJ;RGoypN4AAAAJ", "orcid": ";;;", "linkedin": "chenghao-liu-40a62a56/;;;", "or_profile": "~Chenghao_Liu1;~Weiran_Yao1;~Steven_Hoi2;~Kun_Zhang1", "aff": "Salesforce AI Research;SalesForce.com;Singapore Management University;Carnegie Mellon University", "aff_domain": "salesforce.com;salesforce.com;smu.edu.sg;cmu.edu", "position": "Researcher;Researcher;Associate Professor;Associate Professor", "bibtex": "@misc{\nliu2023nonparametric,\ntitle={Non-Parametric State-Space Models: Identifiability, Estimation and Forecasting},\nauthor={Chenghao Liu and Weiran Yao and Steven Hoi and Kun Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=RVgssxlEVfl}\n}", "github": "", "project": "", "reviewers": "qCMD;Uowm;AS1z", "site": "https://openreview.net/forum?id=RVgssxlEVfl", "pdf_size": 2603461, "recommendation": "6;6;6", "confidence": "3;4;2", "correctness": "3;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "135;60;68", "wc_strength_and_weaknesses": "207;463;207", "wc_clarity_quality_novelty_and_reproducibility": "171;57;29", "wc_summary_review": "88;46;12", "wc_review": "601;626;316", "wc_reply_reviewers": "0;449;25", "wc_reply_authors": "1125;3296;1801", "reply_reviewers": "0;3;1", "reply_authors": "2;9;7", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 87.66666666666667, 33.62869145371091 ], "wc_strength_and_weaknesses_avg": [ 292.3333333333333, 120.67955732250411 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 85.66666666666667, 61.412991748941494 ], "wc_summary_review_avg": [ 48.666666666666664, 31.084115271666047 ], "wc_review_avg": [ 514.3333333333334, 140.61373411662967 ], "wc_reply_reviewers_avg": [ 158.0, 206.02103452479474 ], "wc_reply_authors_avg": [ 2074.0, 907.0858099797762 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 6.0, 2.943920288775949 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6tEnykbik1sJ:scholar.google.com/&scioq=Non-Parametric+State-Space+Models:+Identifiability,+Estimation+and+Forecasting&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Salesforce;Singapore Management University;Carnegie Mellon University", "aff_unique_dep": "Salesforce AI Research;;", "aff_unique_url": "https://www.salesforce.com;https://www.smu.edu.sg;https://www.cmu.edu", "aff_unique_abbr": "Salesforce AI;SMU;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Singapore" }, { "title": "Simplicial Embeddings in Self-Supervised Learning and Downstream Classification", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12047", "id": "RWtGreRpovS", "poster": "", "openreview": "https://openreview.net/forum?id=RWtGreRpovS", "slides": "https://iclr.cc/virtual/2023/poster/12047", "video": "https://iclr.cc/virtual/2023/poster/12047", "author_site": "Samuel Lavoie, Christos Tsirigotis, Max Schwarzer, Ankit Vani, Mikhail Noukhovitch, Kenji Kawaguchi, Aaron Courville", "tldr": "We use softmax to embed representations in a collection of simplices in SSL models, which offers improved generalization properties for downstream classification.", "abstract": "Simplicial Embeddings (SEM) are representations learned through self-supervised learning (SSL), wherein a representation is projected into $L$ simplices of $V$ dimensions each using a \\texttt{softmax} operation. This procedure conditions the representation onto a constrained space during pretraining and imparts an inductive bias for group sparsity. For downstream classification, we formally prove that the SEM representation leads to better generalization than an unnormalized representation.\nFurthermore, we empirically demonstrate that SSL methods trained with SEMs have improved generalization on natural image datasets such as CIFAR-100 and ImageNet. Finally, when used in a downstream classification task, we show that SEM features exhibit emergent semantic coherence where small groups of learned features are distinctly predictive of semantically-relevant classes.", "keywords": "Self-Supervised learning;Representation learning;Pre-training", "primary_area": "", "supplementary_material": "/attachment/4fd5f9371de7715e85d215337cea506dcb997227.zip", "author": "Samuel Lavoie;Christos Tsirigotis;Max Schwarzer;Ankit Vani;Michael Noukhovitch;Kenji Kawaguchi;Aaron Courville", "authorids": "~Samuel_Lavoie1;~Christos_Tsirigotis1;~Max_Schwarzer1;~Ankit_Vani1;~Michael_Noukhovitch1;~Kenji_Kawaguchi1;~Aaron_Courville3", "gender": ";;M;M;;;M", "homepage": ";;https://ankitvani.com/;http://mnoukhov.github.io;https://ml.comp.nus.edu.sg/#members;;http://example.com", "dblp": "215/5173;;178/2855;218/6652;;56/1688;225/6508", "google_scholar": "https://scholar.google.com/citations?hl=en;YmWRSvgAAAAJ;KtnTuq8AAAAJ;https://scholar.google.ca/citations?user=EwmQKdMAAAAJ;aLl3rYoAAAAJ;https://scholar.google.ca/citations?user=km6CP8cAAAAJ;", "orcid": ";;;;;;", "linkedin": "tsirif/;maxaschwarzer/;ankitvani/;;;;", "or_profile": "~Christos_Tsirigotis1;~Max_Schwarzer1;~Ankit_Vani1;~Michael_Noukhovitch1;~Kenji_Kawaguchi1;~Aaron_Courville3;~Samuel_Lavoie-Marchildon1", "aff": "Mila, Quebec Artificial Intelligence Institute;University of Montreal;Mila;University of Montreal;National University of Singapore;Universit\u00e9 de Montr\u00e9al;University of Montreal", "aff_domain": "mila.quebec;umontreal.ca;mila.quebec;umontreal.ca;nus.edu; ;umontreal.ca", "position": "Researcher;PhD student;PhD student;PhD student;Presidential Young Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nlavoie2023simplicial,\ntitle={Simplicial Embeddings in Self-Supervised Learning and Downstream Classification},\nauthor={Samuel Lavoie and Christos Tsirigotis and Max Schwarzer and Ankit Vani and Michael Noukhovitch and Kenji Kawaguchi and Aaron Courville},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=RWtGreRpovS}\n}", "github": "", "project": "", "reviewers": "Jv3d;4CQ8;E9n7;u5uQ", "pdf_size": 1213642, "recommendation": "8;8;8;8", "confidence": "3;4;3;4", "correctness": "4;3;3;4", "technical_novelty": "4;3;3;3", "empirical_novelty": "4;3;3;3", "wc_summary_paper": "85;68;107;91", "wc_strength_and_weaknesses": "184;903;579;283", "wc_clarity_quality_novelty_and_reproducibility": "68;102;20;57", "wc_summary_review": "55;54;100;38", "wc_review": "392;1127;806;469", "wc_reply_reviewers": "0;192;68;11", "wc_reply_authors": "719;1020;910;839", "reply_reviewers": "0;2;1;1", "reply_authors": "2;3;2;2", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 87.75, 13.953046262375826 ], "wc_strength_and_weaknesses_avg": [ 487.25, 280.59969262278247 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.75, 29.260681810238122 ], "wc_summary_review_avg": [ 61.75, 23.09085316743407 ], "wc_review_avg": [ 698.5, 292.31019482734433 ], "wc_reply_reviewers_avg": [ 67.75, 76.23770392660052 ], "wc_reply_authors_avg": [ 872.0, 109.36864267238576 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14612645559403291763&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=RWtGreRpovS", "email": "mila.quebec;umontreal.ca;mila.quebec;umontreal.ca;nus.edu; ;umontreal.ca", "author_num": 7, "aff_unique_index": "0;1;2;1;3;4;1", "aff_unique_norm": "Quebec Artificial Intelligence Institute;University of Montreal;Mila;National University of Singapore;Universit\u00e9 de Montr\u00e9al", "aff_unique_dep": "Artificial Intelligence;;Quebec Artificial Intelligence Institute;;", "aff_unique_url": "https://mila.quebec;https://wwwumontreal.ca;https://mila.quebec;https://www.nus.edu.sg;https://www.umontreal.ca", "aff_unique_abbr": "Mila;UM;Mila;NUS;UdeM", "aff_campus_unique_index": "0", "aff_campus_unique": "Quebec;", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "Canada;Singapore" }, { "id": "RYRUJEjcCY", "title": "HEAV: Hierarchical Ensembling of Augmented Views for Image Captioning", "track": "main", "status": "Withdraw", "tldr": "We tackle the problem of how to efficiently and effectively leverage and ensemble heterogeneous views for image captioning", "abstract": "A great deal of progress has been made in image captioning, driven by research into how to encode the image using pre-trained models. This includes visual encodings (e.g. image grid features or detected objects) and more recently textual encodings (e.g. image tags or text descriptions of image regions). As more advanced encodings are available and incorporated, it is natural to ask: how to efficiently and effectively leverage and ensemble the heterogeneous set of encodings? In this paper, we propose to regard the encodings as augmented views of the input image. The model encodes each view independently with a shared encoder efficiently, and a contrastive loss is incorporated across the encoded views to improve the representation quality, as well as to enable semi-supervised training of image captioning. Our proposed hierarchical decoder then adaptively ensembles the encoded views according to their usefulness by first ensembling within each view at the token level, and then across views at the view level. We demonstrate significant performance improvements of +5.6% CIDEr on MS-COCO compared to state of the art under the same trained-from-scratch setting and +16.8% CIDEr on Flickr30K with semi-supervised training, and conduct rigorous analyses to demonstrate the importance of each part of our design. ", "keywords": "image captioning;vision and language", "primary_area": "", "supplementary_material": "", "author": "Chia-Wen Kuo;Zsolt Kira", "authorids": "~Chia-Wen_Kuo1;~Zsolt_Kira1", "gender": "M;M", "homepage": "https://sites.google.com/view/chiawen-kuo/home;https://faculty.cc.gatech.edu/~zk15", "dblp": ";36/4127", "google_scholar": "iip65VkAAAAJ;2a5XgNAAAAAJ", "orcid": ";0000-0002-2626-2004", "linkedin": ";", "or_profile": "~Chia-Wen_Kuo1;~Zsolt_Kira1", "aff": "Georgia Institute of Technology;Georgia Tech Research Institute", "aff_domain": "gatech.edu;gtri.gatech.edu", "position": "PhD student;Senior Research Scientist", "bibtex": "@misc{\nkuo2023heav,\ntitle={{HEAV}: Hierarchical Ensembling of Augmented Views for Image Captioning},\nauthor={Chia-Wen Kuo and Zsolt Kira},\nyear={2023},\nurl={https://openreview.net/forum?id=RYRUJEjcCY}\n}", "github": "", "project": "", "reviewers": "bKbQ;SBRe;PL8L;sRNF", "site": "https://openreview.net/forum?id=RYRUJEjcCY", "pdf_size": 4365897, "recommendation": "3;5;5;6", "confidence": "5;4;3;4", "correctness": "3;3;3;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "38;93;137;38", "wc_strength_and_weaknesses": "177;234;144;105", "wc_clarity_quality_novelty_and_reproducibility": "11;43;65;25", "wc_summary_review": "19;18;61;54", "wc_review": "245;388;407;222", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 76.5, 41.52408939398912 ], "wc_strength_and_weaknesses_avg": [ 165.0, 47.29164831130334 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.0, 20.223748416156685 ], "wc_summary_review_avg": [ 38.0, 19.6596032513375 ], "wc_review_avg": [ 315.5, 82.67557075702592 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.6488856845230502, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:h4V9rGm73vQJ:scholar.google.com/&scioq=HEAV:+Hierarchical+Ensembling+of+Augmented+Views+for+Image+Captioning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Georgia Institute of Technology;Georgia Tech Research Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.gtri.gatech.edu", "aff_unique_abbr": "Georgia Tech;GTRI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "RZHdb7FnqlY", "title": "Towards the Detection of Diffusion Model Deepfakes", "track": "main", "status": "Reject", "tldr": "We take a first look at the detection of images generated by diffusion models by evaluating state-of-the-art detectors and analyzing DM-generated images in the frequency domain.", "abstract": "Diffusion models (DMs) have recently emerged as a promising method in image synthesis. They have surpassed generative adversarial networks (GANs) in both diversity and quality, and have achieved impressive results in text-to-image modeling. However, to date, only little attention has been paid to the detection of DM-generated images, which is critical to prevent adverse impacts on our society. While prior works have shown that GAN-generated images can be reliably detected using automated methods, it is unclear whether the same methods are effective against DMs. In this work, we address this challenge and take a first look at detecting DM-generated images. We approach the problem from two different angles: First, we evaluate the performance of state-of-the-art detectors on a variety of DMs. Second, we analyze DM-generated images in the frequency domain and study different factors that influence the spectral properties of these images. Most importantly, we demonstrate that GANs and DMs produce images with different characteristics, which requires adaptation of existing classifiers to ensure reliable detection. We believe this work provides the foundation and starting point for further research to detect DM deepfakes effectively.", "keywords": "Diffusion Model;Generative Adversarial Network;GAN;Deepfakes;Detection;Frequency Artifact;Frequency Analysis;Spectrum Discrepancies;Synthetic Images;Disinformation;Social Media", "primary_area": "", "supplementary_material": "/attachment/4f4ac4f0887ad918504da69e5b2fdeb1d095524c.zip", "author": "Jonas Ricker;Simon Damm;Thorsten Holz;Asja Fischer", "authorids": "~Jonas_Ricker1;~Simon_Damm1;~Thorsten_Holz1;~Asja_Fischer1", "gender": ";M;M;F", "homepage": ";https://ellis.eu/projects/generative-modeling-and-it-s-theoretical-foundations;https://cispa.de;", "dblp": ";331/9351;h/ThorstenHolz;76/8485", "google_scholar": ";rswJ0gUAAAAJ;tv2HR38AAAAJ;FyZbyIUAAAAJ", "orcid": ";0000-0002-4584-1765;0000-0002-2783-1264;0000-0002-1916-7033", "linkedin": ";simon-damm/;;", "or_profile": "~Jonas_Ricker1;~Simon_Damm1;~Thorsten_Holz1;~Asja_Fischer1", "aff": ";Ruhr-Universt\u00e4t Bochum;CISPA Helmholtz Center for Information Security;Ruhr-Universit\u00e4t Bochum", "aff_domain": ";rub.de;cispa.saarland;ruhr-uni-bochum.de", "position": ";PhD student;Principal Researcher;Full Professor", "bibtex": "@misc{\nricker2023towards,\ntitle={Towards the Detection of Diffusion Model Deepfakes},\nauthor={Jonas Ricker and Simon Damm and Thorsten Holz and Asja Fischer},\nyear={2023},\nurl={https://openreview.net/forum?id=RZHdb7FnqlY}\n}", "github": "", "project": "", "reviewers": "iMjn;Gcjc;rYBt;gmjk;3UTf", "site": "https://openreview.net/forum?id=RZHdb7FnqlY", "pdf_size": 37561531, "recommendation": "5;5;6;6;8", "confidence": "2;5;4;5;4", "correctness": "3;3;3;3;4", "technical_novelty": "2;2;2;2;3", "empirical_novelty": "2;2;4;4;3", "wc_summary_paper": "35;28;39;70;42", "wc_strength_and_weaknesses": "98;76;47;463;83", "wc_clarity_quality_novelty_and_reproducibility": "20;28;29;53;42", "wc_summary_review": "17;28;32;49;38", "wc_review": "170;160;147;635;205", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "466;160;139;1020;171", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;2;1", "recommendation_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 4.0, 1.0954451150103321 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 2.2, 0.39999999999999997 ], "empirical_novelty_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_paper_avg": [ 42.8, 14.386104406683556 ], "wc_strength_and_weaknesses_avg": [ 153.4, 155.6850667212498 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.4, 11.672189169131897 ], "wc_summary_review_avg": [ 32.8, 10.609429767899876 ], "wc_review_avg": [ 263.4, 186.7946466042322 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 391.2, 336.6098037787967 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.1666666666666667, "corr_recommendation_correctness": 0.912870929175277, "gs_citation": 115, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12504086545099518207&as_sdt=1005&sciodt=0,4&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Ruhr-Universit\u00e4t Bochum;CISPA Helmholtz Center for Information Security", "aff_unique_dep": ";", "aff_unique_url": "https://www.ruhr-uni-bochum.de;https://www.cispa.de/", "aff_unique_abbr": "RUB;CISPA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "RZT4uwbZ5qr", "title": "Memory Efficient Dynamic Sparse Training", "track": "main", "status": "Reject", "tldr": "", "abstract": "The excessive memory and energy consumption of modern Artificial Neural Networks (ANNs) is posing limitations on the machines that can run these models. Sparsification of ANNs is often motivated by time, memory and energy savings only during model inference, yielding no benefits during training. A growing body of work is now focusing on providing the benefits of model sparsification also during training. While these methods improve the energy efficiency during training, the algorithms yielding the most accurate models still have a peak memory usage on the same order as the dense model. We propose a Dynamic Sparse Training (DST) algorithm that reduces the peak memory usage during training while preserving the energy advantages of sparsely trained models. We evaluate our algorithm on CIFAR-10/100 using ResNet-56 and VGG-16 and compare it against a range of sparsification methods. The benefits of our method are twofold: first, it allows for a given model to be trained to an accuracy on par with the dense model while requiring significantly less memory and energy; second, the savings in memory and energy can be allocated towards training an even larger sparse model on the same machine, generally improving the accuracy of the model.", "keywords": "Dynamic Sparse Training;Sparse Neural Networks", "primary_area": "", "supplementary_material": "", "author": "Mike Heddes;Narayan Srinivasa", "authorids": "~Mike_Heddes1;narayan.srinivasa@intel.com", "gender": "M;", "homepage": "https://www.mikeheddes.nl;", "dblp": "320/5938.html;", "google_scholar": "SZpFJqIAAAAJ;", "orcid": "0000-0002-9276-458X;", "linkedin": "mikeheddes/;", "or_profile": "~Mike_Heddes1;narayan.srinivasa@intel.com", "aff": "University of California, Irvine;", "aff_domain": "uci.edu;", "position": "PhD student;", "bibtex": "@misc{\nheddes2023memory,\ntitle={Memory Efficient Dynamic Sparse Training},\nauthor={Mike Heddes and Narayan Srinivasa},\nyear={2023},\nurl={https://openreview.net/forum?id=RZT4uwbZ5qr}\n}", "github": "", "project": "", "reviewers": "U17z;WdYk;qDT4;ghDx", "site": "https://openreview.net/forum?id=RZT4uwbZ5qr", "pdf_size": 423097, "recommendation": "3;3;3;3", "confidence": "4;4;4;5", "correctness": "3;2;2;2", "technical_novelty": "2;2;3;1", "empirical_novelty": "1;2;1;1", "wc_summary_paper": "43;60;89;97", "wc_strength_and_weaknesses": "142;147;181;122", "wc_clarity_quality_novelty_and_reproducibility": "22;150;91;5", "wc_summary_review": "28;22;47;17", "wc_review": "235;379;408;241", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 1.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 72.25, 21.787324296480282 ], "wc_strength_and_weaknesses_avg": [ 148.0, 21.224985276791124 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 67.0, 57.736470276593806 ], "wc_summary_review_avg": [ 28.5, 11.368817000902073 ], "wc_review_avg": [ 315.75, 78.45181642256603 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0", "aff_unique_norm": "University of California, Irvine", "aff_unique_dep": "", "aff_unique_url": "https://www.uci.edu", "aff_unique_abbr": "UCI", "aff_campus_unique_index": "0", "aff_campus_unique": "Irvine", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Interpretability with full complexity by constraining feature information", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12143", "id": "R_OL5mLhsv", "poster": "", "openreview": "https://openreview.net/forum?id=R_OL5mLhsv", "slides": "https://iclr.cc/virtual/2023/poster/12143", "video": "https://iclr.cc/virtual/2023/poster/12143", "author_site": "Kieran Murphy, Danielle Bassett", "tldr": "", "abstract": "Interpretability is a pressing issue for machine learning. Common approaches to interpretable machine learning constrain interactions between features of the input, sacrificing model complexity in order to render more comprehensible the effects of those features on the model's output. We approach interpretability from a new angle: constrain the information about the features without restricting the complexity of the model. We use the Distributed Information Bottleneck to optimally compress each feature so as to maximally preserve information about the output. The learned information allocation, by feature and by feature value, provides rich opportunities for interpretation, particularly in problems with many features and complex feature interactions. The central object of analysis is not a single trained model, but rather a spectrum of models serving as approximations that leverage variable amounts of information about the inputs. Information is allocated to features by their relevance to the output, thereby solving the problem of feature selection by constructing a learned continuum of feature inclusion-to-exclusion. The optimal compression of each feature---at every stage of approximation---allows fine-grained inspection of the distinctions among feature values that are most impactful for prediction. We develop a framework for extracting insight from the spectrum of approximate models and demonstrate its utility on a range of tabular datasets. ", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Kieran A Murphy;Danielle Bassett", "authorids": "~Kieran_A_Murphy1;~Danielle_Bassett1", "gender": "M;Non-Binary", "homepage": "https://kieranamurphy.com;http://danisbassett.com/", "dblp": "287/4780;", "google_scholar": "VC653zEAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-0960-6685;0000-0002-6183-4493", "linkedin": ";dani-s-bassett-4aa7591a/", "or_profile": "~Kieran_A_Murphy1;~Danielle_Bassett1", "aff": "University of Pennsylvania;University of Pennsylvania", "aff_domain": "penn.edu;upenn.edu", "position": "Postdoc;Full Professor", "bibtex": "@inproceedings{\nmurphy2023interpretability,\ntitle={Interpretability with full complexity by constraining feature information},\nauthor={Kieran A Murphy and Danielle Bassett},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=R_OL5mLhsv}\n}", "github": "", "project": "", "reviewers": "qhwf;rdFY;sKPb;zAhj", "pdf_size": 3409306, "recommendation": "6;6;6;8", "confidence": "4;3;3;4", "correctness": "3;2;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;0;3", "wc_summary_paper": "75;60;98;51", "wc_strength_and_weaknesses": "568;630;233;285", "wc_clarity_quality_novelty_and_reproducibility": "224;22;54;15", "wc_summary_review": "63;29;78;40", "wc_review": "930;741;463;391", "wc_reply_reviewers": "35;385;13;0", "wc_reply_authors": "1287;1729;771;652", "reply_reviewers": "1;2;1;0", "reply_authors": "2;4;1;1", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 71.0, 17.790446874657196 ], "wc_strength_and_weaknesses_avg": [ 429.0, 172.39054498434652 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 78.75, 85.1392242153991 ], "wc_summary_review_avg": [ 52.5, 19.1637678967368 ], "wc_review_avg": [ 631.25, 216.40514665783715 ], "wc_reply_reviewers_avg": [ 108.25, 160.27066949382848 ], "wc_reply_authors_avg": [ 1109.75, 429.8763630394209 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18431166211848912880&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=R_OL5mLhsv", "email": "penn.edu;upenn.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "Rb3RN0maB7F", "title": "QuAFL: Federated Averaging Made Asynchronous and Communication-Efficient", "track": "main", "status": "Reject", "tldr": "", "abstract": "Federated Learning (FL) is an emerging paradigm to enable the large-scale distributed training of machine learning models, while still providing privacy guarantees. \n In this work, we address two of the main practical challenges when scaling federated optimization to large node counts: the need for tight synchronization between the central authority and individual computing nodes, and the large communication cost of transmissions between the central server and clients. \n Specifically, we present a new variant of the classic federated averaging (FedAvg) algorithm, which supports both asynchronous communication and communication compression. We provide a new analysis technique showing that, in spite of these system relaxations, our algorithm can provide similar convergence to FedAvg in some parameter regimes.\n On the experimental side, we show that our algorithm ensures fast convergence for standard federated tasks. ", "keywords": "Federated Learning;Distributed optimization;Load Balancing", "primary_area": "", "supplementary_material": "", "author": "Hossein Zakerinia;Shayan Talaei;Giorgi Nadiradze;Dan Alistarh", "authorids": "~Hossein_Zakerinia1;~Shayan_Talaei1;~Giorgi_Nadiradze1;~Dan_Alistarh7", "gender": ";M;M;", "homepage": ";;;", "dblp": ";322/8865;;", "google_scholar": ";DXuAYKwAAAAJ;iq-7vhMAAAAJ;", "orcid": ";0009-0005-6697-8487;;", "linkedin": ";shayan-talaei-6b65a0229/;;", "or_profile": "~Hossein_Zakerinia1;~Shayan_Talaei1;~Giorgi_Nadiradze1;~Dan_Alistarh7", "aff": ";Stanford University;Institute of Science and Technology Austria;", "aff_domain": ";stanford.edu;ist.ac.at;", "position": ";PhD student;Postdoc;", "bibtex": "@misc{\nzakerinia2023quafl,\ntitle={Qu{AFL}: Federated Averaging Made Asynchronous and Communication-Efficient},\nauthor={Hossein Zakerinia and Shayan Talaei and Giorgi Nadiradze and Dan Alistarh},\nyear={2023},\nurl={https://openreview.net/forum?id=Rb3RN0maB7F}\n}", "github": "", "project": "", "reviewers": "ea7y;xMTC;ZjQd;oDyr", "site": "https://openreview.net/forum?id=Rb3RN0maB7F", "pdf_size": 689277, "recommendation": "3;3;5;5", "confidence": "4;3;3;4", "correctness": "2;3;4;3", "technical_novelty": "3;2;2;2", "empirical_novelty": "2;1;2;2", "wc_summary_paper": "63;74;37;76", "wc_strength_and_weaknesses": "644;219;235;390", "wc_clarity_quality_novelty_and_reproducibility": "2;57;12;15", "wc_summary_review": "64;94;15;24", "wc_review": "773;444;299;505", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1238;760;1166;1475", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;2;2", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 62.5, 15.532224567009067 ], "wc_strength_and_weaknesses_avg": [ 372.0, 170.65022707280528 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 21.5, 21.0535032714273 ], "wc_summary_review_avg": [ 49.25, 31.744093938873103 ], "wc_review_avg": [ 505.25, 171.7416300726181 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1159.75, 257.55812450784777 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:q_Mu6gVbdi8J:scholar.google.com/&scioq=QuAFL:+Federated+Averaging+Made+Asynchronous+and+Communication-Efficient&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Stanford University;Institute of Science and Technology Austria", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.ist.ac.at", "aff_unique_abbr": "Stanford;IST Austria", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Austria" }, { "id": "Rc0Xpxxfx5", "title": "DIFFUSED INSTANCE CONDITIONED GAN", "track": "main", "status": "Withdraw", "tldr": "Improving image quality and mode coverage of GAN using diffusion based Gaussian mixture in feature space as partition guidance. ", "abstract": "Recently, numerous data partitioning methods for generative adversarial networks has been developed for better distribution coverage on complex distribution. Most of these approaches aims to build fine-grained overlapping clusters in data manifold and condition both generator and discriminator with compressed representation about cluster. Although giving larger size of condition can be more informative, existing algorithms only utilize low dimension vector as condition due to dependency on clustering algorithm and unsupervised / self-supervised learning methods. In this work, we take a step towards using richer representation for cluster by utilizing diffusion based Gaussian mixture. Our analysis shows that we can derive continuous representation of cluster with Gaussian mixture when\nnoise scale is given. Moreover, unlike other counterparts, we do not need excessive computation for acquiring clustered representation. Experiments on multiple datasets show that our model produces better results compared to recent GAN models.", "keywords": "generative adversarial networks;GAN;conditional GAN;image generation", "primary_area": "", "supplementary_material": "", "author": "Junho Park;Jae-Pil Heo", "authorids": "~Junho_Park1;~Jae-Pil_Heo3", "gender": "M;M", "homepage": ";", "dblp": ";17/7557", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;VXyJ_ssAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Junho_Park1;~Jae-pil_Heo1", "aff": "Sungkyunkwan University;Sungkyunkwan University", "aff_domain": "skku.edu;skku.edu", "position": "MS student;Associate Professor", "bibtex": "@misc{\npark2023diffused,\ntitle={{DIFFUSED} {INSTANCE} {CONDITIONED} {GAN}},\nauthor={Junho Park and Jae-Pil Heo},\nyear={2023},\nurl={https://openreview.net/forum?id=Rc0Xpxxfx5}\n}", "github": "", "project": "", "reviewers": "", "site": "https://openreview.net/forum?id=Rc0Xpxxfx5", "pdf_size": 15833906, "recommendation": "", "confidence": "", "correctness": "", "technical_novelty": "", "empirical_novelty": "", "wc_summary_paper": "", "wc_strength_and_weaknesses": "", "wc_clarity_quality_novelty_and_reproducibility": "", "wc_summary_review": "", "wc_review": "", "wc_reply_reviewers": "", "wc_reply_authors": "", "reply_reviewers": "", "reply_authors": "", "recommendation_avg": [ 0, 0 ], "confidence_avg": [ 0, 0 ], "correctness_avg": [ 0, 0 ], "technical_novelty_avg": [ 0, 0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 0, 0 ], "wc_strength_and_weaknesses_avg": [ 0, 0 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 0, 0 ], "wc_summary_review_avg": [ 0, 0 ], "wc_review_avg": [ 0, 0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 1, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0, "corr_recommendation_correctness": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JzSzmVhiUhkJ:scholar.google.com/&scioq=DIFFUSED+INSTANCE+CONDITIONED+GAN&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Sungkyunkwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.skku.edu", "aff_unique_abbr": "SKKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "RdudTla7eIM", "title": "MoCa: Cognitive Scaffolding for Language Models in Causal and Moral Judgment Tasks", "track": "main", "status": "Reject", "tldr": "We summarized the main findings of 24 cognitive science papers around human intuitions on causal and moral judgments, and collect a dataset to evaluate large language models.", "abstract": "Human commonsense understanding of the physical and social world is organized around intuitive theories. These theories support making causal and moral judgments. When something bad happened, we naturally ask: who did what, and why? A rich literature in cognitive science has studied people's causal and moral intuitions. These works have revealed a number of factors that systematically influence people's judgments, such as the presence of norms, and whether or not the protagonist in a scenario was aware of their action's potential consequences. Here, we investigate whether large language models (LLMs) make causal and moral judgments about text-based scenarios that align with those of human participants. We find that without any annotations, LLMs and human participants are not well aligned (17\\%-39\\% agreement). However, LLMs can accurately annotate what relevant factors are present in a scenario with simple expert-written instructions. We demonstrate how these annotations can be used to bring LLMs in closer alignment with people (36.3\\%-47.2\\% agreement). These results show how insights from cognitive science can help scaffold language models to more closely match human intuitions in challenging commonsense evaluation tasks.", "keywords": "cognitive science;causal reasoning;moral reasoning;dataset;chain-of-thought;step-by-step;language models", "primary_area": "", "supplementary_material": "/attachment/a61b62436b87c136d14644b75f9e56d649bec47b.zip", "author": "Allen Nie;Yuhui Zhang;Atharva Amdekar;Christopher J Piech;Tatsunori Hashimoto;Tobias Gerstenberg", "authorids": "~Allen_Nie1;~Yuhui_Zhang3;~Atharva_Amdekar1;~Christopher_J_Piech1;~Tatsunori_Hashimoto1;~Tobias_Gerstenberg1", "gender": "M;M;M;M;M;M", "homepage": "https://anie.me;https://cs.stanford.edu/~yuhuiz/;http://www.amdekar.me;;https://thashim.github.io;http://cicl.stanford.edu/member/tobias_gerstenberg", "dblp": "207/7996;;;35/10987.html;;", "google_scholar": "r90OelAAAAAJ;X-Agfu8AAAAJ;EGrcQYgAAAAJ;;5ygiTwsAAAAJ;d0TfP8EAAAAJ", "orcid": ";;;;;0000-0002-9162-0779", "linkedin": ";;atharva-amdekar/;;;", "or_profile": "~Allen_Nie1;~Yuhui_Zhang3;~Atharva_Amdekar1;~Christopher_J_Piech1;~Tatsunori_Hashimoto1;~Tobias_Gerstenberg1", "aff": "Microsoft Research;Stanford University;Stanford University;;Stanford University;Stanford University", "aff_domain": "microsoft.com;stanford.edu;stanford.edu;;stanford.edu;stanford.edu", "position": "Intern;PhD student;MS student;;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nnie2023moca,\ntitle={MoCa: Cognitive Scaffolding for Language Models in Causal and Moral Judgment Tasks},\nauthor={Allen Nie and Yuhui Zhang and Atharva Amdekar and Christopher J Piech and Tatsunori Hashimoto and Tobias Gerstenberg},\nyear={2023},\nurl={https://openreview.net/forum?id=RdudTla7eIM}\n}", "github": "", "project": "", "reviewers": "XfnA;ikDj;FUkn", "site": "https://openreview.net/forum?id=RdudTla7eIM", "pdf_size": 631375, "recommendation": "3;3;8", "confidence": "4;4;4", "correctness": "2;2;4", "technical_novelty": "2;3;4", "empirical_novelty": "2;3;4", "wc_summary_paper": "140;86;104", "wc_strength_and_weaknesses": "766;583;101", "wc_clarity_quality_novelty_and_reproducibility": "52;123;46", "wc_summary_review": "80;155;141", "wc_review": "1038;947;392", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1668;2161;370", "reply_reviewers": "0;0;0", "reply_authors": "3;3;1", "recommendation_avg": [ 4.666666666666667, 2.357022603955158 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 110.0, 22.44994432064365 ], "wc_strength_and_weaknesses_avg": [ 483.3333333333333, 280.4833130310766 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 73.66666666666667, 34.96982826507572 ], "wc_summary_review_avg": [ 125.33333333333333, 32.561053764001905 ], "wc_review_avg": [ 792.3333333333334, 285.50578900532923 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1399.6666666666667, 755.3905538785144 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11706295428039283211&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Microsoft;Stanford University", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.stanford.edu", "aff_unique_abbr": "MSR;Stanford", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Human alignment of neural network representations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11553", "id": "ReDQ1OUQR0X", "poster": "", "openreview": "https://openreview.net/forum?id=ReDQ1OUQR0X", "slides": "https://iclr.cc/virtual/2023/poster/11553", "video": "https://iclr.cc/virtual/2023/poster/11553", "author_site": "Lukas Muttenthaler, Jonas Dippel, Lorenz Johannes Linhardt, Robert A Vandermeulen, Simon Kornblith", "tldr": "We evaluate the alignment of neural network representations with human judgments about object similarities in an odd-one-out triplet task, finding that dataset and objective function, but not model size or architecture, have a significant impact.", "abstract": "Today\u2019s computer vision models achieve human or near-human level performance across a wide variety of vision tasks. However, their architectures, data, and learning algorithms differ in numerous ways from those that give rise to human vision. In this paper, we investigate the factors that affect the alignment between the representations learned by neural networks and human mental representations inferred from behavioral responses. We find that model scale and architecture have essentially no effect on the alignment with human behavioral responses, whereas the training dataset and objective function both have a much larger impact. These findings are consistent across three datasets of human similarity judgments collected using two different tasks. Linear transformations of neural network representations learned from behavioral responses from one dataset substantially improve alignment with human similarity judgments on the other two datasets. In addition, we find that some human concepts such as food and animals are well-represented by neural networks whereas others such as royal or sports-related objects are not. Overall, although models trained on larger, more diverse datasets achieve better alignment with humans than models trained on ImageNet alone, our results indicate that scaling alone is unlikely to be sufficient to train neural networks with conceptual representations that match those used by humans.", "keywords": "Human Alignment;Robustness;Neural Network Representations;Human Concepts;Object Similarity;Computer Vision", "primary_area": "", "supplementary_material": "", "author": "Lukas Muttenthaler;Jonas Dippel;Lorenz Linhardt;Robert A. Vandermeulen;Simon Kornblith", "authorids": "~Lukas_Muttenthaler1;~Jonas_Dippel1;~Lorenz_Linhardt2;~Robert_A._Vandermeulen2;~Simon_Kornblith1", "gender": "M;M;;M;M", "homepage": "https://lukasmut.github.io/;;;;https://www.user.tu-berlin.de/rvdm/", "dblp": "245/4369;249/3158;210/5418;220/4059;137/3375", "google_scholar": "https://scholar.google.com/citations?hl=en;ZLQCgRoAAAAJ;579iMjgAAAAJ;1O3RPmsAAAAJ;eSjfzOUAAAAJ", "orcid": "0000-0002-0804-4687;0000-0002-0552-8977;0000-0002-5533-5524;;0000-0001-6863-7006", "linkedin": "lukas-muttenthaler/;jdippel/;;;", "or_profile": "~Lukas_Muttenthaler1;~Jonas_Dippel1;~Lorenz_Linhardt2;~Simon_Kornblith1;~Robert_Vandermeulen1", "aff": "TU Berlin;Technische Universit\u00e4t Berlin;TU Berlin;Google;Berlin Institute for the Foundations of Learning and Data", "aff_domain": "tu-berlin.de;tu-berlin.de;tu-berlin.de;google.com;tu-berlin.de", "position": "PhD student;PhD student;PhD student;Research Scientist;Researcher", "bibtex": "@inproceedings{\nmuttenthaler2023human,\ntitle={Human alignment of neural network representations},\nauthor={Lukas Muttenthaler and Jonas Dippel and Lorenz Linhardt and Robert A. Vandermeulen and Simon Kornblith},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=ReDQ1OUQR0X}\n}", "github": "", "project": "", "reviewers": "hcki;fmAV;u84n", "pdf_size": 21026373, "recommendation": "1;6;8", "confidence": "3;3;4", "correctness": "1;4;4", "technical_novelty": "3;3;2", "empirical_novelty": "0;3;3", "wc_summary_paper": "73;65;99", "wc_strength_and_weaknesses": "61;166;514", "wc_clarity_quality_novelty_and_reproducibility": "24;94;41", "wc_summary_review": "173;81;63", "wc_review": "331;406;717", "wc_reply_reviewers": "0;0;647", "wc_reply_authors": "1206;1094;2961", "reply_reviewers": "0;0;4", "reply_authors": "2;2;7", "recommendation_avg": [ 5.0, 2.943920288775949 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 1.4142135623730951 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 79.0, 14.514360704718161 ], "wc_strength_and_weaknesses_avg": [ 247.0, 193.60268593178142 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 53.0, 29.81051268708183 ], "wc_summary_review_avg": [ 105.66666666666667, 48.175604707039106 ], "wc_review_avg": [ 484.6666666666667, 167.11340128454356 ], "wc_reply_reviewers_avg": [ 215.66666666666666, 304.9987249517975 ], "wc_reply_authors_avg": [ 1753.6666666666667, 854.9371646826189 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.8856180831641267 ], "reply_authors_avg": [ 3.6666666666666665, 2.3570226039551585 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.720576692122892, "corr_recommendation_correctness": 0.960768922830523, "gs_citation": 88, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=629582568958980548&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=ReDQ1OUQR0X", "email": "tu-berlin.de;tu-berlin.de;tu-berlin.de;google.com;tu-berlin.de", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Technische Universit\u00e4t Berlin;Google;Berlin Institute for the Foundations of Learning and Data", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.tu-berlin.de;https://www.google.com;https://www.bifold.berlin", "aff_unique_abbr": "TU Berlin;Google;BIFOLD", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Berlin;;Mountain View", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Germany;United States" }, { "id": "RePt5K6wPux", "title": "Code Means More Than Plain Language: Bringing Syntax Structure Awareness To Algorithmic Problem Solution Generation", "track": "main", "status": "Withdraw", "tldr": "The first work to introduce syntax tree structure in programming synthesis", "abstract": "Program Synthesis (PS) is the task of building computer programs that satisfy problem specifications. Large-scale pre-trained language models treat the PS as a sequence prediction task, which has gained vivid popularity recently. However, these methods heavily rely on the conventional Natural Language Processing (NLP) tokenizers, which overlooks the rich structural/syntax information in the code. In this work, we posit that the syntax structures help generate syntax error-free and algorithmically correct programs. If the program syntax structures can be integrated into the tokenizer, the program representation space could be significantly simplified. To this end, we propose a new end-to-end framework named ASTer, coupled with our novel syntax-aware tokenization design toolkit. More specifically, our tokenizer encodes and decodes the program by its syntax roles and contents, not by what is superficially shown on the strings. The ASTer encompasses a novel sample-wise and token-wise attention mechanism, and avails the benefits of training with the syntactically aligned samples from our tokenization toolkit. Extensive evaluations show superior performance against state-of-the-arts, which confirms that bringing syntax knowledge into the language model can help better capture the data structure and simplify the search space. All of our codes will be publicly available upon acceptance. ", "keywords": "program synthesis;transformer;syntax structure", "primary_area": "", "supplementary_material": "", "author": "Wenqing Zheng;S P Sharan;AJAY KUMAR JAISWAL;Kevin Wang;Yihan Xi;Zhangyang Wang", "authorids": "~Wenqing_Zheng1;~S_P_Sharan1;~AJAY_KUMAR_JAISWAL1;~Kevin_Wang4;~Yihan_Xi1;~Zhangyang_Wang1", "gender": "M;M;M;M;F;M", "homepage": "https://wenqing-zheng.github.io;https://spsharan.com/;https://ajay1994.github.io/;;;https://vita-group.github.io", "dblp": ";324/6204;30/9707;;;119/4026", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;1NtGcNIAAAAJ;I783HxYAAAAJ;;;pxFyKAIAAAAJ", "orcid": "0000-0002-8283-7511;0000-0002-6298-6464;;;;", "linkedin": ";;;kevin-wang-01/;yihan-xi-4b4822225/;", "or_profile": "~Wenqing_Zheng1;~S_P_Sharan1;~AJAY_KUMAR_JAISWAL1;~Kevin_Wang4;~Yihan_Xi1;~Zhangyang_Wang1", "aff": "University of Texas, Austin;NEC-Labs;Amazon;University of Texas at Austin;University of Texas at Austin;University of Texas, Austin", "aff_domain": "utexas.edu;nec-labs.com;amazon.com;utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;Intern;Researcher;Undergrad student;Undergrad student;Assistant Professor", "bibtex": "@misc{\nzheng2023code,\ntitle={Code Means More Than Plain Language: Bringing Syntax Structure Awareness To Algorithmic Problem Solution Generation},\nauthor={Wenqing Zheng and S P Sharan and AJAY KUMAR JAISWAL and Kevin Wang and Yihan Xi and Zhangyang Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=RePt5K6wPux}\n}", "github": "", "project": "", "reviewers": "2qfr;pDnd;3UsJ", "site": "https://openreview.net/forum?id=RePt5K6wPux", "pdf_size": 5854851, "recommendation": "3;3;3", "confidence": "4;4;5", "correctness": "2;3;2", "technical_novelty": "2;3;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "118;142;113", "wc_strength_and_weaknesses": "658;502;532", "wc_clarity_quality_novelty_and_reproducibility": "63;71;69", "wc_summary_review": "36;10;52", "wc_review": "875;725;766", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 124.33333333333333, 12.657891697365017 ], "wc_strength_and_weaknesses_avg": [ 564.0, 67.58698099486321 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 67.66666666666667, 3.39934634239519 ], "wc_summary_review_avg": [ 32.666666666666664, 17.30767331432956 ], "wc_review_avg": [ 788.6666666666666, 63.29999122345033 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6713774331171548135&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;0;0", "aff_unique_norm": "University of Texas at Austin;NEC Laboratories;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.utexas.edu;https://www.nec-labs.com;https://www.amazon.com", "aff_unique_abbr": "UT Austin;NEC-Labs;Amazon", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Sparse Mixture-of-Experts are Domain Generalizable Learners", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12164", "id": "RecZ9nB9Q4", "poster": "", "openreview": "https://openreview.net/forum?id=RecZ9nB9Q4", "slides": "https://iclr.cc/virtual/2023/poster/12164", "video": "https://iclr.cc/virtual/2023/poster/12164", "author_site": "Bo Li, Yifei Shen, Jingkang Yang, Yezhen Wang, Jiawei Ren, Tong Che, Jun Zhang, Ziwei Liu", "tldr": "We theoretically investigate the impact of backbone architecture on DG. We propose a novel SOTA model Generalizable Mixture-of-Experts (GMoE) for DG.", "abstract": "Human visual perception can easily generalize to out-of-distributed visual data, which is far beyond the capability of modern machine learning models. Domain generalization (DG) aims to close this gap, with existing DG methods mainly focusing on the loss function design. In this paper, we propose to explore an orthogonal direction, i.e., the design of the backbone architecture. It is motivated by an empirical finding that transformer-based models trained with empirical risk minimization (ERM) outperform CNN-based models employing state-of-the-art (SOTA) DG algorithms on multiple DG datasets. We develop a formal framework to characterize a network's robustness to distribution shifts by studying its architecture's alignment with the correlations in the dataset. This analysis guides us to propose a novel DG model built upon vision transformers, namely \\emph{Generalizable Mixture-of-Experts (GMoE)}. Extensive experiments on DomainBed demonstrate that GMoE trained with ERM outperforms SOTA DG baselines by a large margin. Moreover, GMoE is complementary to existing DG methods and its performance is substantially improved when trained with DG algorithms.", "keywords": "domain generalization;mixture-of-experts;algorithmic alignment;visual attributes", "primary_area": "", "supplementary_material": "/attachment/78b2842e3cfed8bd0a85fcfe29c68bb81c9e412f.zip", "author": "Bo Li;Yifei Shen;Jingkang Yang;Yezhen Wang;Jiawei Ren;Tong Che;Jun Zhang;Ziwei Liu", "authorids": "~Bo_Li23;~Yifei_Shen1;~Jingkang_Yang1;~Yezhen_Wang1;~Jiawei_Ren1;~Tong_Che1;~Jun_Zhang25;~Ziwei_Liu1", "gender": "M;M;M;M;Unspecified;M;;M", "homepage": "https://www.brianboli.com/;https://openreview.net/profile?id=~Yifei_Shen1;https://jingkang50.github.io/;;https://jiawei-ren.github.io/;;https://eejzhang.people.ust.hk/;https://liuziwei7.github.io/", "dblp": "50/3402-80;51/609.html;175/5365.html;;122/3626-1;125/0738;z/JunZhang4;05/6300-2", "google_scholar": "1_zc1-IAAAAJ;;S-YjbUYAAAAJ;g-VEnLEAAAAJ;https://scholar.google.com.sg/citations?user=YUKPVCoAAAAJ;7b5tlJkAAAAJ;1Is687QAAAAJ;https://scholar.google.com.hk/citations?user=lc45xlcAAAAJ", "orcid": ";;;;0000-0003-1950-5976;;0000-0002-5222-1898;", "linkedin": "brianbo1121/;;;;;;;", "or_profile": "~Bo_Li23;~Yifei_Shen1;~Jingkang_Yang1;~Yezhen_Wang1;~Jiawei_Ren1;~Tong_Che1;~Jun_Zhang25;~Ziwei_Liu1", "aff": "Nanyang Technological University;Microsoft Research Asia;Nanyang Technological University;National University of Singapore;Nanyang Technological University;NVIDIA;Hong Kong University of Science and Technology;Nanyang Technological University", "aff_domain": "ntu.edu.sg;microsoft.com;ntu.edu.sg;nus.edu;ntu.edu.sg;nvidia.com;ust.hk;ntu.edu.sg", "position": "PhD student;Research Cheerleader;PhD student;PhD student;PhD student;Researcher;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nli2023sparse,\ntitle={Sparse Mixture-of-Experts are Domain Generalizable Learners},\nauthor={Bo Li and Yifei Shen and Jingkang Yang and Yezhen Wang and Jiawei Ren and Tong Che and Jun Zhang and Ziwei Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=RecZ9nB9Q4}\n}", "github": "", "project": "", "reviewers": "FUc5;nMjY;aHin;jTSg", "pdf_size": 28781991, "recommendation": "5;6;8;8", "confidence": "4;5;3;3", "correctness": "2;3;4;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "45;28;91;174", "wc_strength_and_weaknesses": "199;196;88;244", "wc_clarity_quality_novelty_and_reproducibility": "2;37;79;44", "wc_summary_review": "2;22;18;50", "wc_review": "248;283;276;512", "wc_reply_reviewers": "0;23;14;0", "wc_reply_authors": "1399;810;172;494", "reply_reviewers": "0;1;1;0", "reply_authors": "3;1;1;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 84.5, 56.5795899596312 ], "wc_strength_and_weaknesses_avg": [ 181.75, 57.3688722217894 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 40.5, 27.33587386567329 ], "wc_summary_review_avg": [ 23.0, 17.291616465790582 ], "wc_review_avg": [ 329.75, 106.03389788176231 ], "wc_reply_reviewers_avg": [ 9.25, 9.781998773256925 ], "wc_reply_authors_avg": [ 718.75, 452.91134618156786 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.7543365091413573, "corr_recommendation_correctness": 0.986440050415621, "gs_citation": 84, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14321047647801621605&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=RecZ9nB9Q4", "email": "ntu.edu.sg;microsoft.com;ntu.edu.sg;nus.edu;ntu.edu.sg;nvidia.com;ust.hk;ntu.edu.sg", "author_num": 8, "aff_unique_index": "0;1;0;2;0;3;4;0", "aff_unique_norm": "Nanyang Technological University;Microsoft;National University of Singapore;NVIDIA;Hong Kong University of Science and Technology", "aff_unique_dep": ";Research;;NVIDIA Corporation;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.microsoft.com/en-us/research/group/asia;https://www.nus.edu.sg;https://www.nvidia.com;https://www.ust.hk", "aff_unique_abbr": "NTU;MSR Asia;NUS;NVIDIA;HKUST", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Asia;Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;0;2;1;0", "aff_country_unique": "Singapore;China;United States" }, { "id": "Refb0S-paCx", "title": "An Investigation of Domain Generalization with Rademacher Complexity", "track": "main", "status": "Reject", "tldr": "", "abstract": "The domain generalization (DG) setting challenges a model trained on multiple known data distributions to generalise well on unseen data distributions. Due to its practical importance, many methods have been proposed to address this challenge. \nHowever much work in general purpose DG is heuristically motivated, \nas the DG problem is hard to model formally; and recent evaluations have cast doubt on existing methods\u2019 practical efficacy -- in particular compared to a well tuned empirical risk minimisation baseline. \nWe present a novel learning-theoretic generalisation bound for DG that bounds unseen domain generalisation performance in terms of the model\u2019s empirical risk and Rademacher complexity -- providing a sufficient condition for DG. Based on this insight, we empirically analyze the performance of several methods and show that their performance is indeed influenced by model complexity in practice. \nAlgorithmically, our analysis suggests that tuning for domain generalisation should be achieved by simply performing regularised ERM with a leave-one-domain-out cross-validation objective. Empirical results on the DomainBed benchmark corroborate this.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Da Li;Henry Gouk;Timothy Hospedales", "authorids": "~Da_Li3;~Henry_Gouk1;~Timothy_Hospedales1", "gender": "M;M;M", "homepage": "https://dali-dl.github.io/;https://www.henrygouk.com;http://homepages.inf.ed.ac.uk/thospeda/", "dblp": "43/4804-1;172/0943;32/3545", "google_scholar": "RPvaE3oAAAAJ;https://scholar.google.co.nz/citations?user=i1bzlyAAAAAJ;https://scholar.google.fr/citations?user=nHhtvqkAAAAJ", "orcid": "0000-0002-2101-2989;;0000-0003-4867-7486", "linkedin": ";;timothyhospedales/", "or_profile": "~Da_Li3;~Henry_Gouk1;~Timothy_Hospedales1", "aff": "University of Edinburgh;University of Edinburgh;Samsung AI Research Centre", "aff_domain": "ed.ac.uk;ed.ac.uk;samsung.com", "position": "Visiting Scholar;RAEng Research Fellow;Principal Researcher", "bibtex": "@misc{\nli2023an,\ntitle={An Investigation of Domain Generalization with Rademacher Complexity},\nauthor={Da Li and Henry Gouk and Timothy Hospedales},\nyear={2023},\nurl={https://openreview.net/forum?id=Refb0S-paCx}\n}", "github": "", "project": "", "reviewers": "EU2D;ss4Y;ECRP;rAHY", "site": "https://openreview.net/forum?id=Refb0S-paCx", "pdf_size": 664427, "recommendation": "3;3;3;3", "confidence": "5;3;3;3", "correctness": "4;4;3;4", "technical_novelty": "1;2;1;2", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "18;129;34;94", "wc_strength_and_weaknesses": "427;283;238;234", "wc_clarity_quality_novelty_and_reproducibility": "8;70;228;18", "wc_summary_review": "2;98;37;38", "wc_review": "455;580;537;384", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 1.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 68.75, 44.862985856940014 ], "wc_strength_and_weaknesses_avg": [ 295.5, 78.32145299980077 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 81.0, 88.07383266328314 ], "wc_summary_review_avg": [ 43.75, 34.513584282134474 ], "wc_review_avg": [ 489.0, 75.44203072558426 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VM-VchfdG6oJ:scholar.google.com/&scioq=An+Investigation+of+Domain+Generalization+with+Rademacher+Complexity&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Edinburgh;Samsung", "aff_unique_dep": ";AI Research", "aff_unique_url": "https://www.ed.ac.uk;https://www.samsung.com/global/researchers/samsung-ai-research-centre/", "aff_unique_abbr": "Edinburgh;SARC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;South Korea" }, { "id": "Rg1LG7wtd2D", "title": "Offline Reinforcement Learning from Heteroskedastic Data Via Support Constraints", "track": "main", "status": "Reject", "tldr": "We show that conventional distributional constraint RL algorithms are need with heteroskedatic datasets. We propose an offline RL method to handle such settings.", "abstract": "Offline reinforcement learning (RL) learns policies entirely from static datasets, thereby avoiding the challenges associated with online data collection. Practical applications of offline RL will inevitably require learning from datasets where the variability of demonstrated behaviors changes non-uniformly across the state space. For example, at a red light, nearly all human drivers behave similarly by stopping, but when merging onto a highway, some drivers merge quickly, efficiently, and safely, while many hesitate or merge dangerously. \nWe show that existing popular offline RL methods based on distribution constraints fail to learn from data with such non-uniform change in the variability of demonstrated behaviors, often due to the requirement to stay close to the behavior policy to the same extent across the state space. We demonstrate this failure mode both theoretically and experimentally. Ideally, the learned policy should be free to choose per-state how closely to follow the behavior policy to maximize long-term return, as long as the learned policy stays within the support of the behavior policy. To instantiate this principle, we reweight the data distribution in conservative Q-learning and show that support constraints emerge when doing so. The reweighted distribution is a mixture of the current policy and an additional policy trained to mine poor actions that are likely under the behavior policy. Our method CQL (ReDS) is simple, theoretically motivated, and improves performance across a wide range of offline RL problems in Atari games, navigation, and pixel-based manipulation. ", "keywords": "offline RL;support constraints;heteroskedastic data", "primary_area": "", "supplementary_material": "/attachment/dbba387dd52471f7713d26086d3f80019058df88.zip", "author": "Anikait Singh;Aviral Kumar;Quan Vuong;Yevgen Chebotar;Sergey Levine", "authorids": "~Anikait_Singh1;~Aviral_Kumar2;~Quan_Vuong2;~Yevgen_Chebotar1;~Sergey_Levine1", "gender": "M;M;M;M;M", "homepage": "https://asap7772.github.io/;https://aviralkumar2907.github.io/;https://quanvuong.github.io;;https://people.eecs.berkeley.edu/~svlevine/", "dblp": "302/3876;202/7961;;01/11424;80/7594", "google_scholar": "lPaISmIAAAAJ;;NSWI3OwAAAAJ;ADkiClQAAAAJ;8R35rCwAAAAJ", "orcid": ";;;;", "linkedin": "asap7772/;;;;", "or_profile": "~Anikait_Singh1;~Aviral_Kumar2;~Quan_Vuong2;~Yevgen_Chebotar1;~Sergey_Levine1", "aff": "University of California, Berkeley;University of California, Berkeley;;Google;Google", "aff_domain": "berkeley.edu;berkeley.edu;;google.com;google.com", "position": "Undergrad student;PhD student;;Research Scientist;Research Scientist", "bibtex": "@misc{\nsingh2023offline,\ntitle={Offline Reinforcement Learning from Heteroskedastic Data Via Support Constraints},\nauthor={Anikait Singh and Aviral Kumar and Quan Vuong and Yevgen Chebotar and Sergey Levine},\nyear={2023},\nurl={https://openreview.net/forum?id=Rg1LG7wtd2D}\n}", "github": "", "project": "", "reviewers": "N7S6;HyhP;d9Ww", "site": "https://openreview.net/forum?id=Rg1LG7wtd2D", "pdf_size": 3114162, "recommendation": "5;6;6", "confidence": "4;3;4", "correctness": "3;4;2", "technical_novelty": "3;3;2", "empirical_novelty": "2;3;3", "wc_summary_paper": "266;59;182", "wc_strength_and_weaknesses": "641;57;271", "wc_clarity_quality_novelty_and_reproducibility": "2;7;68", "wc_summary_review": "2;3;62", "wc_review": "911;126;583", "wc_reply_reviewers": "148;40;0", "wc_reply_authors": "2258;365;1226", "reply_reviewers": "1;1;0", "reply_authors": "7;1;3", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 169.0, 85.00588214941364 ], "wc_strength_and_weaknesses_avg": [ 323.0, 241.23570769408633 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.666666666666668, 30.003703475108235 ], "wc_summary_review_avg": [ 22.333333333333332, 28.051539866625664 ], "wc_review_avg": [ 540.0, 321.9140672084193 ], "wc_reply_reviewers_avg": [ 62.666666666666664, 62.5104435718989 ], "wc_reply_authors_avg": [ 1283.0, 773.864329194724 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 2.494438257849294 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8468122162965415176&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of California, Berkeley;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.berkeley.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;Google", "aff_campus_unique_index": "0;0;1;1", "aff_campus_unique": "Berkeley;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "CUDA: Curriculum of Data Augmentation for Long-tailed Recognition", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11772", "id": "RgUPdudkWlN", "poster": "", "openreview": "https://openreview.net/forum?id=RgUPdudkWlN", "slides": "https://iclr.cc/virtual/2023/poster/11772", "video": "https://iclr.cc/virtual/2023/poster/11772", "author_site": "Sumyeong Ahn, Jongwoo Ko, Se-Young Yun", "tldr": "We propose a class-wise data augmentation method by designing the curriculum of data augmentation, which is based on our findings that stronger augmentation on major classes improves the performance on long-tailed recognition.", "abstract": "Class imbalance problems frequently occur in real-world tasks, and conventional deep learning algorithms are well known for performance degradation on imbalanced training datasets. To mitigate this problem, many approaches have aimed to balance among given classes by re-weighting or re-sampling training samples. These re-balancing methods increase the impact of minority classes and reduce the influence of majority classes on the output of models. However, the extracted representations may be of poor quality owing to the limited number of minority samples. To handle this restriction, several methods have been developed that increase the representations of minority samples by leveraging the features of the majority samples. Despite extensive recent studies, no deep analysis has been conducted on determination of classes to be augmented and strength of augmentation has been conducted. In this study, we first investigate the correlation between the degree of augmentation and class-wise performance, and find that the proper degree of augmentation must be allocated for each class to mitigate class imbalance problems. Motivated by this finding, we propose a simple and efficient novel curriculum, which is designed to find the appropriate per-class strength of data augmentation, called CUDA: CUrriculum of Data Augmentation for long-tailed recognition. CUDA can simply be integrated into existing long-tailed recognition methods. We present the results of experiments showing that CUDA effectively achieves better generalization performance compared to the state-of-the-art method on various imbalanced datasets such as CIFAR-100-LT, ImageNet-LT, and iNaturalist 2018. \n", "keywords": "Long-tailed recognition;class imbalance", "primary_area": "", "supplementary_material": "", "author": "Sumyeong Ahn;Jongwoo Ko;Se-Young Yun", "authorids": "~Sumyeong_Ahn1;~Jongwoo_Ko1;~Se-Young_Yun1", "gender": "M;M;M", "homepage": "https://sumyeongahn.github.io;https://sites.google.com/view/jongwooko;https://fbsqkd.github.io", "dblp": "217/5462;286/1503;23/8862", "google_scholar": "krxhvIYAAAAJ;l2jkwHwAAAAJ;X_IAjb8AAAAJ", "orcid": ";;", "linkedin": ";jongwoo-ko-8b93051b4/;seyoung-yun-395130ab/", "or_profile": "~Sumyeong_Ahn1;~Jongwoo_Ko1;~Se-Young_Yun1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;KAIST", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nahn2023cuda,\ntitle={{CUDA}: Curriculum of Data Augmentation for Long-tailed Recognition},\nauthor={Sumyeong Ahn and Jongwoo Ko and Se-Young Yun},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=RgUPdudkWlN}\n}", "github": "", "project": "", "reviewers": "QwML;nPVy;N4yQ;Z6g5;URnx", "pdf_size": 5735100, "recommendation": "6;6;6;6;8", "confidence": "5;5;4;4;3", "correctness": "3;3;3;3;4", "technical_novelty": "3;3;3;3;3", "empirical_novelty": "3;3;3;3;3", "wc_summary_paper": "45;31;80;64;223", "wc_strength_and_weaknesses": "601;266;125;221;261", "wc_clarity_quality_novelty_and_reproducibility": "96;58;77;8;22", "wc_summary_review": "39;28;20;56;29", "wc_review": "781;383;302;349;535", "wc_reply_reviewers": "39;78;60;29;0", "wc_reply_authors": "1544;1922;1994;1451;1129", "reply_reviewers": "1;1;1;1;0", "reply_authors": "4;6;5;4;2", "recommendation_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 88.6, 69.22600667379277 ], "wc_strength_and_weaknesses_avg": [ 294.8, 161.25061240193787 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 52.2, 32.96300957133617 ], "wc_summary_review_avg": [ 34.4, 12.370933675353692 ], "wc_review_avg": [ 470.0, 174.0 ], "wc_reply_reviewers_avg": [ 41.2, 26.678830559078108 ], "wc_reply_authors_avg": [ 1608.0, 318.0496816536687 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 4.2, 1.32664991614216 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.8017837257372732, "corr_recommendation_correctness": 1.0, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10648622730036866230&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=RgUPdudkWlN", "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Equivariant Hypergraph Diffusion Neural Operators", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11538", "id": "RiTjKoscnNd", "poster": "/media/PosterPDFs/ICLR%202023/11538.png?t=1681877335.71331", "openreview": "https://openreview.net/forum?id=RiTjKoscnNd", "slides": "https://iclr.cc/virtual/2023/poster/11538", "video": "https://iclr.cc/virtual/2023/poster/11538", "author_site": "Peihao Wang, Shenghao Yang, Yunyu Liu, Zhangyang Wang, Pan Li", "tldr": "In this work, we are inspired by hypergraph diffusion algorithms and design a novel HNN architecture that holds provable expressiveness while keeping efficiency.", "abstract": "Hypergraph neural networks (HNNs) using neural networks to encode hypergraphs provide a promising way to model higher-order relations in data and further solve relevant prediction tasks built upon such higher-order relations. However, higher-order relations in practice contain complex patterns and are often highly irregular. So, it is often challenging to design an HNN that suffices to express those relations while keeping computational efficiency. Inspired by hypergraph diffusion algorithms, this work proposes a new HNN architecture named ED-HNN, which provably approximates any continuous equivariant hypergraph diffusion operators that can model a wide range of higher-order relations. ED-HNN can be implemented efficiently by combining star expansions of hypergraphs with standard message passing neural networks. ED-HNN further shows great superiority in processing heterophilic hypergraphs and constructing deep models. We evaluate ED-HNN for node classification on nine real-world hypergraph datasets. ED-HNN uniformly outperforms the best baselines over these nine datasets and achieves more than 2%$\\uparrow$ in prediction accuracy over four datasets therein. Our code is available at: https://github.com/Graph-COM/ED-HNN.", "keywords": "Hypergraph Neural Network;Hypergraph Diffusion;Equivariant Network", "primary_area": "", "supplementary_material": "/attachment/9677a32fa4dd4c58bd736a962ee5b37e63009262.zip", "author": "Peihao Wang;Shenghao Yang;Yunyu Liu;Zhangyang Wang;Pan Li", "authorids": "~Peihao_Wang1;~Shenghao_Yang1;~Yunyu_Liu1;~Zhangyang_Wang1;~Pan_Li2", "gender": "M;M;M;M;", "homepage": "https://peihaowang.github.io/;https://cs.uwaterloo.ca/~s286yang/;https://wenwen0319.github.io/;https://vita-group.github.io;", "dblp": "239/4075;41/4482-2;;119/4026;https://dblp.org/pers/hd/l/Li_0005:Pan", "google_scholar": "fqf2tBsAAAAJ;ocLDM-AAAAAJ;;pxFyKAIAAAAJ;IroP0EwAAAAJ", "orcid": ";;;;", "linkedin": "peihao-wang-25a411162/;;;;pan-li-b951105a/", "or_profile": "~Peihao_Wang1;~Shenghao_Yang1;~Yunyu_Liu1;~Zhangyang_Wang1;~Pan_Li2", "aff": "University of Texas, Austin;University of Waterloo;Purdue University;University of Texas, Austin;Purdue University", "aff_domain": "utexas.edu;uwaterloo.ca;purdue.edu;utexas.edu;purdue.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2023equivariant,\ntitle={Equivariant Hypergraph Diffusion Neural Operators},\nauthor={Peihao Wang and Shenghao Yang and Yunyu Liu and Zhangyang Wang and Pan Li},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=RiTjKoscnNd}\n}", "github": "", "project": "", "reviewers": "GNhA;n5MN;z25x;TN8f", "pdf_size": 1079032, "recommendation": "6;6;6;6", "confidence": "5;2;2;3", "correctness": "3;2;3;2", "technical_novelty": "3;2;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "47;77;29;77", "wc_strength_and_weaknesses": "102;59;282;171", "wc_clarity_quality_novelty_and_reproducibility": "263;279;6;75", "wc_summary_review": "24;239;23;36", "wc_review": "436;654;340;359", "wc_reply_reviewers": "1489;612;22;17", "wc_reply_authors": "3784;2074;659;419", "reply_reviewers": "8;5;1;1", "reply_authors": "9;7;2;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 57.5, 20.512191496766015 ], "wc_strength_and_weaknesses_avg": [ 153.5, 84.26298119577778 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 155.75, 117.9393382209685 ], "wc_summary_review_avg": [ 80.5, 91.65287775078315 ], "wc_review_avg": [ 447.25, 124.66229381813893 ], "wc_reply_reviewers_avg": [ 535.0, 601.5683668545081 ], "wc_reply_authors_avg": [ 1734.0, 1341.9156083748337 ], "reply_reviewers_avg": [ 3.75, 2.947456530637899 ], "reply_authors_avg": [ 5.0, 3.082207001484488 ], "replies_avg": [ 40, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4806532106233986883&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=RiTjKoscnNd", "email": "utexas.edu;uwaterloo.ca;purdue.edu;utexas.edu;purdue.edu", "author_num": 5, "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "University of Texas at Austin;University of Waterloo;Purdue University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utexas.edu;https://uwaterloo.ca;https://www.purdue.edu", "aff_unique_abbr": "UT Austin;UW;Purdue", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Canada" }, { "id": "RjsiAoZqN6", "title": "Bridging attack and prompting: An Enhanced Visual Prompting at the pixel level", "track": "main", "status": "Withdraw", "tldr": "We design a novel and concise visual prompting method incorporating a simple and effective training strategy inspired from adversarial attack, and ourperform traditional linear probe in many scenarios.", "abstract": "In this paper, we study the problem of the visual prompt at the pixel level. Recent works demonstrate flexibility and generalization of visual-only prompt. However, it still cannot achieve superior results compared with linear probe in terms of accuracy and parameter efficiency. We believe that the full power of visual prompt remains to be harnessed through a novel perspective, which bridges adversarial attack and visual prompt considering the high similarity in both formats and objective functions. Bringing in the \u201cold ideas\u201d in adversarial attacks to enhance visual prompt is promising since there are extensive theoretical and empirical solutions to improve the performance of adversarial attack. Therefore, we propose a novel and concise visual prompting method incorporating simple and effective training strategies inspired by ideas from adversarial attack. Specifically, we introduce the input diversity and gradient normalization into visual prompt learning to obtain better generalization ability. Moreover, to avoid disruptions to the original image caused by perturbation without changing the spatial size of inputs, we separate the prompt and image by shrinking and then padding the image with learnable visual prompts, which can significantly improve the performance further without increasing FLOPs. Extensive experiments are conducted on various large-scale pre-trained models across several downstream datasets under different scenarios. We show that with a CLIP-based model, our enhanced visual prompt can successfully outperform linear probe by 1.9% across 12 datasets on average with a comparable number of parameters, and can even match fully fine-tuning paradigm in some settings training only 0.04% parameters.", "keywords": "prompting;adversarial machine learning;CLIP", "primary_area": "", "supplementary_material": "", "author": "Junyang Wu;Xianhang Li;Chen Wei;Huiyu Wang;Alan Yuille;Yuyin Zhou;Cihang Xie", "authorids": "~Junyang_Wu1;~Xianhang_Li1;~Chen_Wei2;~Huiyu_Wang1;~Alan_Yuille1;~Yuyin_Zhou1;~Cihang_Xie3", "gender": "M;M;;;M;;M", "homepage": "https://github.com/jywu511;https://xhl-video.github.io/xianhangli/;https://weichen582.github.io/;http://csrhddlam.github.io/;;https://yuyinzhou.github.io/;https://cihangxie.github.io/", "dblp": "320/7787;268/5945;181/2831-5;;y/AlanLYuille;192/1413;175/3366", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;YKpFz4YAAAAJ;https://scholar.google.com/citations?hl=en;SnmuYloAAAAJ;;eiqVLC0AAAAJ;X3vVZPcAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Junyang_Wu1;~Xianhang_Li1;~Chen_Wei2;~Huiyu_Wang1;~Alan_Yuille1;~Yuyin_Zhou1;~cihang_xie1", "aff": "University of California, Santa Cruz;University of California, Santa Cruz;Johns Hopkins University;Meta Platforms;Johns Hopkins University;University of California, Santa Cruz;University of California, Santa Cruz", "aff_domain": "ucsc.edu;ucsc.edu;jhu.edu;meta.com;johnshopkins.edu;ucsc.edu;ucsc.edu", "position": "Intern;PhD student;PhD student;Researcher;Full Professor;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nwu2023bridging,\ntitle={Bridging attack and prompting: An Enhanced Visual Prompting at the pixel level},\nauthor={Junyang Wu and Xianhang Li and Chen Wei and Huiyu Wang and Alan Yuille and Yuyin Zhou and Cihang Xie},\nyear={2023},\nurl={https://openreview.net/forum?id=RjsiAoZqN6}\n}", "github": "", "project": "", "reviewers": "1dJX;5Puy;CZSf;7tPu", "site": "https://openreview.net/forum?id=RjsiAoZqN6", "pdf_size": 646314, "recommendation": "3;5;5;5", "confidence": "4;4;4;3", "correctness": "2;4;4;2", "technical_novelty": "2;2;2;2", "empirical_novelty": "3;3;3;2", "wc_summary_paper": "98;88;49;33", "wc_strength_and_weaknesses": "752;261;276;208", "wc_clarity_quality_novelty_and_reproducibility": "14;17;32;10", "wc_summary_review": "25;85;42;2", "wc_review": "889;451;399;253", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 1.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 67.0, 26.842131062939096 ], "wc_strength_and_weaknesses_avg": [ 374.25, 219.55224321331815 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 18.25, 8.317902379807062 ], "wc_summary_review_avg": [ 38.5, 30.36856927812043 ], "wc_review_avg": [ 498.0, 237.1265484925718 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:POO8f6sxFq8J:scholar.google.com/&scioq=Bridging+attack+and+prompting:+An+Enhanced+Visual+Prompting+at+the+pixel+level&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;1;0;0", "aff_unique_norm": "University of California, Santa Cruz;Johns Hopkins University;Meta", "aff_unique_dep": ";;Meta Platforms, Inc.", "aff_unique_url": "https://www.ucsc.edu;https://www.jhu.edu;https://www.meta.com", "aff_unique_abbr": "UCSC;JHU;Meta", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Santa Cruz;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Rkk51I-BpMH", "title": "Caption supervision enables robust learners: a controlled study of distributionally robust model training", "track": "main", "status": "Reject", "tldr": "We introduce CaptionNet, a fully captioned, fully supervised dataset with ImageNet-compliant labels, and through experiment, show how the choice of loss function, data filtration and supervision strategy enable robust computer vision.", "abstract": "Vision language models like CLIP are robust to natural distribution shifts, in part because CLIP learns on unstructured data using a technique called caption supervision; the model inteprets image-linked texts as ground-truth labels. In a carefully controlled comparison study, we show that CNNs trained on a standard cross-entropy loss can also benefit from caption supervision, in some cases even more than VL models, on the same data. To facilitate future experiments with high-accuracy caption-supervised models, we introduce CaptionNet, one piece of which is a class-balanced, fully supervised dataset with over 50,000 new human-labeled ImageNet-compliant samples which includes web-scraped captions. In a series of experiments on CaptionNet, we show how the choice of loss function, data filtration and supervision strategy enable robust computer vision.", "keywords": "self-supervised learning;computer vision;effective robustness;vision language;CLIP;ImageNet;LAION;CC12M;YFCC", "primary_area": "", "supplementary_material": "/attachment/55e0dc37085841dbe449ec56031d91ccc96d751a.zip", "author": "Benjamin Feuer;Ameya Joshi;Chinmay Hegde", "authorids": "~Benjamin_Feuer1;~Ameya_Joshi2;~Chinmay_Hegde1", "gender": "M;M;M", "homepage": "https://penfever.github.io/;https://chinmayhegde.github.io/;https://ameya005.github.io", "dblp": "322/5063.html;39/2056;148/8731", "google_scholar": "VPXu100AAAAJ;eJAV17IAAAAJ;jZgsp_sAAAAJ", "orcid": "0000-0002-7938-542X;;", "linkedin": "benjaminfeuer/;;", "or_profile": "~Benjamin_Feuer1;~Chinmay_Hegde1;~Ameya_A_Joshi1", "aff": "New York University;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu", "position": "PhD student;Associate Professor;PhD Student", "bibtex": "@misc{\nfeuer2023caption,\ntitle={Caption supervision enables robust learners: a controlled study of distributionally robust model training},\nauthor={Benjamin Feuer and Ameya Joshi and Chinmay Hegde},\nyear={2023},\nurl={https://openreview.net/forum?id=Rkk51I-BpMH}\n}", "github": "", "project": "", "reviewers": "hwfD;y4rQ;kSsF;zEXa;ybgD", "site": "https://openreview.net/forum?id=Rkk51I-BpMH", "pdf_size": 3894999, "recommendation": "1;3;5;5;6", "confidence": "3;3;4;3;4", "correctness": "2;2;3;3;3", "technical_novelty": "2;2;3;2;3", "empirical_novelty": "0;2;3;2;3", "wc_summary_paper": "58;47;69;39;140", "wc_strength_and_weaknesses": "547;485;282;64;252", "wc_clarity_quality_novelty_and_reproducibility": "30;50;23;65;16", "wc_summary_review": "43;30;73;26;14", "wc_review": "678;612;447;194;422", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "512;635;244;216;68", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 4.0, 1.7888543819998317 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "correctness_avg": [ 2.6, 0.4898979485566356 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.0, 1.0954451150103321 ], "wc_summary_paper_avg": [ 70.6, 36.14747570716384 ], "wc_strength_and_weaknesses_avg": [ 326.0, 173.30781863493638 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.8, 18.10414317221337 ], "wc_summary_review_avg": [ 37.2, 20.153411621856982 ], "wc_review_avg": [ 470.6, 168.78104158939178 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 335.0, 207.39334608419819 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.6846531968814576, "corr_recommendation_correctness": 0.9128709291752769, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:O3e64Gamkh8J:scholar.google.com/&scioq=Caption+supervision+enables+robust+learners:+a+controlled+study+of+distributionally+robust+model+training&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Rkxj1GXn9_", "title": "On the Expressive Power of Geometric Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "We propose a geometric version of the Weisfeler-Leman graph isomorphism test to characterise the expressive power of GNNs for geometric graphs.", "abstract": "The expressive power of Graph Neural Networks (GNNs) has been studied extensively through the lens of the Weisfeiler-Leman (WL) graph isomorphism test. Yet, many graphs arising in real-world applications come embedded in Euclidean space with an additional notion of geometric isomorphism, which is not covered by the WL framework. In this work, we propose a geometric version of the WL test (GWL) for discriminating geometric graphs while respecting the underlying physical symmetries: permutation, rotation, reflection, and translation. We use GWL to characterise the expressive power of GNNs that are invariant or equivariant to physical symmetries by studying the classes of geometric graphs that can or cannot be distinguished by these architectures. This allows us to formalise the advantages equivariant GNN layers have over their invariant counterparts in the Geometric Deep Learning blueprint. Finally, we connect our discrimination-based perspective with the universal approximation properties of geometric GNNs and prove they are two sides of the same coin.", "keywords": "Graph Neural Networks;Geometric Deep Learning;Equivariance;Expressive Power;Graph Isomorphism", "primary_area": "", "supplementary_material": "/attachment/01f702eaf8a4a954a9942d3c2da4a58bd4026c88.zip", "author": "Chaitanya K. Joshi;Cristian Bodnar;Simon V Mathis;Taco Cohen;Pietro Lio", "authorids": "~Chaitanya_K._Joshi1;~Cristian_Bodnar1;~Simon_V_Mathis1;~Taco_Cohen1;~Pietro_Lio1", "gender": "M;M;M;M;M", "homepage": "https://crisbodnar.github.io/;https://simonmathis.org;http://www.ta.co.nl;https://www.cst.cam.ac.uk/people/pl219;http://www.chaitjo.com/", "dblp": "220/3234;338/5638;142/2903;l/PietroLio.html;202/2132", "google_scholar": "pSmh9tkAAAAJ;https://scholar.google.ch/citations?user=N6I6fT0AAAAJ;a3q4YxEAAAAJ;https://scholar.google.co.uk/citations?user=3YrWf7EAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-5246-6481;;0000-0002-0540-5053;0000-0003-4722-1815", "linkedin": ";simonmathis/;;;chaitjo", "or_profile": "~Cristian_Bodnar1;~Simon_V_Mathis1;~Taco_Cohen1;~Pietro_Lio1;~Chaitanya_Krishna_Joshi1", "aff": "University of Cambridge;University of Cambridge;Qualcomm Inc, QualComm;University of Cambridge;Genentech, Roche", "aff_domain": "cam.ac.uk;cam.ac.uk;qti.qualcomm.com;cam.ac.uk;roche.com", "position": "PhD student;PhD student;Principal Researcher;Full Professor;Intern", "bibtex": "@misc{\njoshi2023on,\ntitle={On the Expressive Power of Geometric Graph Neural Networks},\nauthor={Chaitanya K. Joshi and Cristian Bodnar and Simon V Mathis and Taco Cohen and Pietro Lio},\nyear={2023},\nurl={https://openreview.net/forum?id=Rkxj1GXn9_}\n}", "github": "", "project": "", "reviewers": "J5ce;Kinq;Vdrv;XpRF", "site": "https://openreview.net/forum?id=Rkxj1GXn9_", "pdf_size": 910469, "recommendation": "3;5;5;8", "confidence": "3;4;4;5", "correctness": "2;4;4;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;0;2;3", "wc_summary_paper": "91;73;57;178", "wc_strength_and_weaknesses": "81;393;256;108", "wc_clarity_quality_novelty_and_reproducibility": "156;54;13;61", "wc_summary_review": "12;212;28;2", "wc_review": "340;732;354;349", "wc_reply_reviewers": "105;306;304;318", "wc_reply_authors": "2970;4464;2737;1507", "reply_reviewers": "1;3;2;2", "reply_authors": "8;12;6;4", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 99.75, 46.75133687928079 ], "wc_strength_and_weaknesses_avg": [ 209.5, 125.14891130169691 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 71.0, 52.387975719624826 ], "wc_summary_review_avg": [ 63.5, 86.2365931609082 ], "wc_review_avg": [ 443.75, 166.49680927873663 ], "wc_reply_reviewers_avg": [ 258.25, 88.6407778621104 ], "wc_reply_authors_avg": [ 2919.5, 1050.7726918796473 ], "reply_reviewers_avg": [ 2.0, 0.7071067811865476 ], "reply_authors_avg": [ 7.5, 2.958039891549808 ], "replies_avg": [ 50, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.9901475429766743, "corr_recommendation_correctness": 0.2955402316445243, "gs_citation": 120, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15818086437267897909&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "University of Cambridge;Qualcomm Incorporated;Genentech", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;https://www.qualcomm.com;https://www.gene.com", "aff_unique_abbr": "Cambridge;Qualcomm;Genentech", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "Rl4ihTreFnV", "title": "Robust Multi-Agent Reinforcement Learning with State Uncertainties", "track": "main", "status": "Reject", "tldr": "fundamental research about robust multi-agent reinforcement learning with state uncertainty ", "abstract": "In real-world multi-agent reinforcement learning (MARL) applications, agents may not have perfect state information (e.g., due to inaccurate measurement or malicious attacks), which challenges the robustness of agents' policies. Though robustness is getting important in MARL deployment, little prior work has studied state uncertainties in MARL, neither in problem formulation nor algorithm design. Motivated by this robustness issue, we study the problem of MARL with state uncertainty in this work. We provide the first attempt to the theoretical and empirical analysis of this challenging problem. We first model the problem as a Markov Game with state perturbation adversaries (MG-SPA), and introduce Robust Equilibrium as the solution concept. We conduct fundamental analysis regarding MG-SPA and give conditions under which such an equilibrium exists. Then we propose a robust multi-agent Q-learning (RMAQ) algorithm to find such an equilibrium, with convergence guarantees. To handle high-dimensional state-action space, we design a robust multi-agent actor-critic (RMAAC) algorithm based on an analytical expression of the policy gradient derived in the paper. Our experiments show that the proposed RMAQ algorithm converges to the optimal value function; our RMAAC algorithm outperforms several MARL methods that do not consider the state uncertainty in several multi-agent environments.", "keywords": "multi-agent reinforcement learning;robust reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/5cdab303e12bb681f6a2fb71e32bba4f36022d76.zip", "author": "Sihong He;Songyang Han;Sanbao Su;Shuo Han;Shaofeng Zou;Fei Miao", "authorids": "~Sihong_He1;~Songyang_Han1;~Sanbao_Su1;~Shuo_Han3;~Shaofeng_Zou1;~Fei_Miao1", "gender": "F;M;M;;;F", "homepage": "https://sihonghe.com/;https://songyanghan.com;https://sanbaos.com/;https://hanshuo.people.uic.edu;;http://www.feimiao.org", "dblp": "237/6086;;221/2885;20/7794-2.html;;143/6002", "google_scholar": "-zSd9V0AAAAJ;https://scholar.google.com/citations?hl=en;EWyaceAAAAAJ;yFqY8x4AAAAJ;;fH2YF6YAAAAJ", "orcid": ";;;;;0000-0003-0066-4379", "linkedin": ";;sanbao-su/;;;fei-miao-76964727/", "or_profile": "~Sihong_He1;~Songyang_Han1;~Sanbao_Su1;~Shuo_Han3;~Shaofeng_Zou1;~Fei_Miao1", "aff": "University of Connecticut;University of Connecticut;University of Connecticut;University of Illinois Chicago;;University of Connecticut", "aff_domain": "uconn.edu;uconn.edu;uconn.edu;uic.edu;;uconn.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;;Assistant Professor", "bibtex": "@misc{\nhe2023robust,\ntitle={Robust Multi-Agent Reinforcement Learning with State Uncertainties},\nauthor={Sihong He and Songyang Han and Sanbao Su and Shuo Han and Shaofeng Zou and Fei Miao},\nyear={2023},\nurl={https://openreview.net/forum?id=Rl4ihTreFnV}\n}", "github": "", "project": "", "reviewers": "dBqZ;QZNZ;wL7B;qQaj", "site": "https://openreview.net/forum?id=Rl4ihTreFnV", "pdf_size": 7201930, "recommendation": "5;6;6;8", "confidence": "4;3;4;4", "correctness": "2;3;4;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "90;49;71;72", "wc_strength_and_weaknesses": "539;147;187;123", "wc_clarity_quality_novelty_and_reproducibility": "291;29;84;45", "wc_summary_review": "72;38;57;50", "wc_review": "992;263;399;290", "wc_reply_reviewers": "840;19;73;45", "wc_reply_authors": "5829;680;430;693", "reply_reviewers": "2;1;1;2", "reply_authors": "12;2;1;2", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 70.5, 14.534441853748634 ], "wc_strength_and_weaknesses_avg": [ 249.0, 168.98520645310938 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 112.25, 105.12225026130291 ], "wc_summary_review_avg": [ 54.25, 12.295832627357937 ], "wc_review_avg": [ 486.0, 296.5425770441742 ], "wc_reply_reviewers_avg": [ 244.25, 344.4861209105528 ], "wc_reply_authors_avg": [ 1908.0, 2266.2156781736376 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 4.25, 4.493050188902857 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.13245323570650439, "corr_recommendation_correctness": 0.3244428422615251, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11711371647957377851&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Connecticut;University of Illinois at Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.uconn.edu;https://www.uic.edu", "aff_unique_abbr": "UConn;UIC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "RlCa0pFZsR9", "title": "Benchmarking Encoder-Decoder Architectures for Biplanar X-ray to 3D Bone Shape Reconstruction", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Various deep learning pipelines have been proposed for 3D Bone Shape Reconstruction from Biplanar X-rays. Although these methods individually report excellent results, we do not know how these architecture pipelines compare against each other since they are reported on different anatomy, datasets, and cohort distribution. We benchmark these disparate architectures on equal footing on three different anatomies using public datasets. We describe various benchmarking tasks to simulate real-world clinical scenarios including reconstruction of fractured bones, bones with implants, robustness to population shift, and estimate clinical parameters. \n\nWe provide an open-source implementation of SOTA architectures, dataset pipelines, and extraction of clinical parameters. \nComparing the encoder-decoder architectures with baseline retrieval models, we find that the encoder-decoder methods are able to learn from data and are much better than retrieval baselines. However, the best methods have limited difference on performance, but the domain shift plays an important role in deteriorating the performance of these methods. ", "keywords": "2D-3D Reconstruction;Object Reconstruction;Medical Applications;Encoder-Decoder Architectures", "primary_area": "", "supplementary_material": "", "author": "Mahesh Shakya;Bishesh Khanal", "authorids": "~Mahesh_Shakya1;~Bishesh_Khanal4", "gender": ";M", "homepage": "https://www.naamii.org.np/teams/mahesh-shakya/;https://bishesh.github.io/", "dblp": "358/2809;18/10556", "google_scholar": "dzcUdHQAAAAJ;https://scholar.google.co.uk/citations?user=ZfaUCG5h3xsC", "orcid": ";0000-0002-2775-4748", "linkedin": ";", "or_profile": "~Mahesh_Shakya1;~Bishesh_Khanal4", "aff": "NAAMII,Nepal;NepAl Applied Mathematics and Informatics Institute for Research (NAAMII)", "aff_domain": "naamii.org.np;naamii.org.np", "position": "Researcher;Research scientist", "bibtex": "@misc{\nshakya2023benchmarking,\ntitle={Benchmarking Encoder-Decoder Architectures for Biplanar X-ray to 3D Bone Shape Reconstruction},\nauthor={Mahesh Shakya and Bishesh Khanal},\nyear={2023},\nurl={https://openreview.net/forum?id=RlCa0pFZsR9}\n}", "github": "", "project": "", "reviewers": "yGTn;c6kn;ExSP", "site": "https://openreview.net/forum?id=RlCa0pFZsR9", "pdf_size": 1851198, "recommendation": "3;5;5", "confidence": "2;5;3", "correctness": "3;3;3", "technical_novelty": "2;1;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "63;83;48", "wc_strength_and_weaknesses": "73;269;54", "wc_clarity_quality_novelty_and_reproducibility": "16;53;18", "wc_summary_review": "33;57;23", "wc_review": "185;462;143", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 64.66666666666667, 14.337208778404378 ], "wc_strength_and_weaknesses_avg": [ 132.0, 97.1836748979306 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 29.0, 16.990193249832878 ], "wc_summary_review_avg": [ 37.666666666666664, 14.2672897060218 ], "wc_review_avg": [ 263.3333333333333, 141.52110168530425 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.7559289460184544, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4131734010284899180&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1", "aff_unique_norm": "NAAMII;Nepal Applied Mathematics and Informatics Institute for Research", "aff_unique_dep": ";Applied Mathematics and Informatics", "aff_unique_url": ";", "aff_unique_abbr": ";NAAMII", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Nepal" }, { "title": "GAIN: On the Generalization of Instructional Action Understanding", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11798", "id": "RlPmWBiyp6w", "poster": "/media/PosterPDFs/ICLR%202023/11798.png?t=1681328553.261075", "openreview": "https://openreview.net/forum?id=RlPmWBiyp6w", "slides": "https://iclr.cc/virtual/2023/poster/11798", "video": "https://iclr.cc/virtual/2023/poster/11798", "author_site": "Junlong Li, Guangyi Chen, Yansong Tang, Jinan Bao, Kun Zhang, Jie Zhou, Jiwen Lu", "tldr": "", "abstract": "Despite the great success achieved in instructional action understanding by deep learning and mountainous data, deploying trained models to the unseen environment still remains a great challenge, since it requires strong generalizability of models from in-distribution training data to out-of-distribution (OOD) data. In this paper, we introduce a benchmark, named GAIN, to analyze the GeneralizAbility of INstructional action understanding models. In GAIN, we reassemble steps of existing instructional video training datasets to construct the OOD tasks and then collect the corresponding videos. We evaluate the generalizability of models trained on in-distribution datasets with the performance on OOD videos and observe a significant performance drop. We further propose a simple yet effective approach, which cuts off the excessive contextual dependency of action steps by performing causal inference, to provide a potential direction for enhancing the OOD generalizability. In the experiments, we show that this simple approach can improve several baselines on both instructional action segmentation and detection tasks. We expect the introduction of the GAIN dataset will promote future in-depth research on the generalization of instructional video understanding.", "keywords": "Action Analysis;Instructional Video;OOD Generalization", "primary_area": "", "supplementary_material": "/attachment/4d19c5edffd4737b7ebb6f29aa93111c4b3809d6.zip", "author": "Junlong Li;Guangyi Chen;Yansong Tang;Jinan Bao;Kun Zhang;Jie Zhou;Jiwen Lu", "authorids": "~Junlong_Li2;~Guangyi_Chen1;~Yansong_Tang1;jbao1@ualberta.ca;~Kun_Zhang1;~Jie_Zhou3;~Jiwen_Lu1", "gender": "M;M;M;;M;M;M", "homepage": "http://ivg.au.tsinghua.edu.cn/people.php;https://chengy12.github.io/;https://andytang15.github.io/;;http://www.andrew.cmu.edu/user/kunz1/;https://www.tsinghua.edu.cn/publish/auen/1713/2011/20110506105532098625469/20110506105532098625469_.html;http://ivg.au.tsinghua.edu.cn/Jiwen_Lu/", "dblp": ";c/GuangyiChen-2;214/9568;;96/3115-1;00/5012-1;http://dblp.uni-trier.de/pers/hd/l/Lu:Jiwen", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;TIbistUAAAAJ;;RGoypN4AAAAJ;;TN8uDQoAAAAJ", "orcid": ";;;;;;0000-0002-6121-5529", "linkedin": ";;;;;;", "or_profile": "~Junlong_Li2;~Guangyi_Chen1;~Yansong_Tang1;jbao1@ualberta.ca;~Kun_Zhang1;~Jie_Zhou3;~Jiwen_Lu1", "aff": "Tsinghua University;Carnegie Mellon University;Tsinghua University;;Carnegie Mellon University;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;cmu.edu;tsinghua.edu.cn;;cmu.edu;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;Postdoc;Assistant Professor;;Associate Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nli2023gain,\ntitle={{GAIN}: On the Generalization of Instructional Action Understanding},\nauthor={Junlong Li and Guangyi Chen and Yansong Tang and Jinan Bao and Kun Zhang and Jie Zhou and Jiwen Lu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=RlPmWBiyp6w}\n}", "github": "", "project": "", "reviewers": "ESEp;Ct13;2UkL", "pdf_size": 7663336, "recommendation": "6;6;8", "confidence": "3;3;4", "correctness": "3;4;4", "technical_novelty": "4;3;3", "empirical_novelty": "4;3;4", "wc_summary_paper": "101;61;47", "wc_strength_and_weaknesses": "524;163;215", "wc_clarity_quality_novelty_and_reproducibility": "69;24;19", "wc_summary_review": "80;33;41", "wc_review": "774;281;322", "wc_reply_reviewers": "37;0;0", "wc_reply_authors": "2327;653;528", "reply_reviewers": "1;0;0", "reply_authors": "6;3;1", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 69.66666666666667, 22.88133640230735 ], "wc_strength_and_weaknesses_avg": [ 300.6666666666667, 159.34100399945882 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.333333333333336, 22.484562605386735 ], "wc_summary_review_avg": [ 51.333333333333336, 20.531818125912658 ], "wc_review_avg": [ 459.0, 223.3666641794757 ], "wc_reply_reviewers_avg": [ 12.333333333333334, 17.441967269268172 ], "wc_reply_authors_avg": [ 1169.3333333333333, 820.1830419011491 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 2.0548046676563256 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.9999999999999998, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5096103166693621320&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=RlPmWBiyp6w", "email": "mails.tsinghua.edu.cn;cmu.edu;tsinghua.edu.cn;;cmu.edu;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 7, "aff_unique_index": "0;1;0;1;0;0", "aff_unique_norm": "Tsinghua University;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.cmu.edu", "aff_unique_abbr": "THU;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0;0", "aff_country_unique": "China;United States" }, { "id": "RlxNpChToM_", "title": "A Picture of the Space of Typical Learning Tasks", "track": "main", "status": "Reject", "tldr": "We develop a technique to analyze the learned representation on a task, and its relationship to other tasks. We identify several surprising phenomena, e.g., the manifold of probabilistic models learned on different tasks is low-dimensional.", "abstract": "We develop a technique to analyze representations learned by deep networks when they are trained on different tasks using supervised, multi-task, meta- and contrastive learning. We develop a technique to visualize such representations using an isometric embedding of the space of probabilistic models into a lower-dimensional space, i.e., one that preserves pairwise distances. We discover the following surprising phenomena that shed light upon the structure in the space of learning tasks: (1) the manifold of probabilistic models trained on different tasks using different representation learning methods is effectively low-dimensional; (2) supervised learning on one task results in a surprising amount of progress on seemingly dissimilar tasks; progress on other tasks is larger if the training task has diverse classes; (3) the structure of the space of tasks indicated by our visualization technique is consistent with parts of the Wordnet phylogenetic tree; (4) fine-tuning a model upon a sub-task does not change the representation much if the model was trained for a large number of epochs; (5) episodic meta-learning algorithms fit similar models eventually as that of supervised learning, even if the two traverse different trajectories during training; (6) contrastive learning methods trained on different datasets learn similar representations. We use classification tasks constructed from the CIFAR-10 and Imagenet datasets to study these phenomena.", "keywords": "Information Geometry;Space of learning tasks", "primary_area": "", "supplementary_material": "", "author": "Rahul Ramesh;Jialin Mao;Itay Griniasty;Rubing Yang;Han Kheng Teoh;Mark Transtrum;James Sethna;Pratik Chaudhari", "authorids": "~Rahul_Ramesh2;~Jialin_Mao1;ig324@cornell.edu;~Rubing_Yang1;ht452@cornell.edu;~Mark_Transtrum1;~James_Sethna1;~Pratik_Chaudhari1", "gender": "M;;;F;;M;M;M", "homepage": "https://cis.upenn.edu/~rahulram;;;https://www.linkedin.com/in/rubing-yang-b4383a149/;;https://mktranstrum.github.io/Publications.html;https://sethna.lassp.cornell.edu/;https://pratikac.github.io/", "dblp": "168/7029;209/4867;;304/8991;;;;", "google_scholar": "wCa6nygAAAAJ;;;;;YtfPZDAAAAAJ;O8zsFcAAAAAJ;c_z5hWEAAAAJ", "orcid": ";;;;;0000-0001-9529-9399;;", "linkedin": ";;;;;;;pratik-chaudhari-59508765", "or_profile": "~Rahul_Ramesh2;~Jialin_Mao1;ig324@cornell.edu;~Rubing_Yang1;ht452@cornell.edu;~Mark_Transtrum1;~James_Sethna1;~Pratik_Chaudhari1", "aff": "University of Pennsylvania;University of Pennsylvania;;University of Pennsylvania;;Brigham Young University;Cornell University;School of Engineering and Applied Science, University of Pennsylvania", "aff_domain": "upenn.edu;upenn.edu;;upenn.edu;;byu.edu;cornell.edu;seas.upenn.edu", "position": "PhD student;PhD student;;PhD student;;Associate Professor;Full Professor;Assistant Professor", "bibtex": "@misc{\nramesh2023a,\ntitle={A Picture of the Space of Typical Learning Tasks},\nauthor={Rahul Ramesh and Jialin Mao and Itay Griniasty and Rubing Yang and Han Kheng Teoh and Mark Transtrum and James Sethna and Pratik Chaudhari},\nyear={2023},\nurl={https://openreview.net/forum?id=RlxNpChToM_}\n}", "github": "", "project": "", "reviewers": "opzC;hj5S;wHo9", "site": "https://openreview.net/forum?id=RlxNpChToM_", "pdf_size": 2576667, "recommendation": "3;6;6", "confidence": "3;3;3", "correctness": "3;4;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "40;139;62", "wc_strength_and_weaknesses": "294;153;439", "wc_clarity_quality_novelty_and_reproducibility": "281;42;29", "wc_summary_review": "42;65;44", "wc_review": "657;399;574", "wc_reply_reviewers": "535;0;0", "wc_reply_authors": "4151;1032;1163", "reply_reviewers": "4;0;0", "reply_authors": "9;2;2", "recommendation_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 80.33333333333333, 42.444735310230826 ], "wc_strength_and_weaknesses_avg": [ 295.3333333333333, 116.76281752148479 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 117.33333333333333, 115.85143743413612 ], "wc_summary_review_avg": [ 50.333333333333336, 10.402991022884823 ], "wc_review_avg": [ 543.3333333333334, 107.5370737105219 ], "wc_reply_reviewers_avg": [ 178.33333333333334, 252.20141862320196 ], "wc_reply_authors_avg": [ 2115.3333333333335, 1440.4268657434695 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.8856180831641267 ], "reply_authors_avg": [ 4.333333333333333, 3.299831645537222 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-VmZa7pXlHgJ:scholar.google.com/&scioq=A+Picture+of+the+Space+of+Typical+Learning+Tasks&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "University of Pennsylvania;Brigham Young University;Cornell University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.upenn.edu;https://www.byu.edu;https://www.cornell.edu", "aff_unique_abbr": "UPenn;BYU;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Rn50hCOX9XX", "title": "Gene finding revisited: improved robustness through structured decoding from learning embeddings", "track": "main", "status": "Reject", "tldr": "Improving the robustness of predicting the exact coding sequences of genomes by combining deep learning with a graphical model encoding gene structure. ", "abstract": "Gene finding is the task of identifying the locations of coding sequences within the vast amount of genetic code contained in the genome. With an ever increasing quantity of raw genome sequences, gene finding is an important avenue towards understanding the genetic information of (novel) organisms, as well as learning shared patterns across evolutionarily diverse species. The current state of the art are graphical models usually trained per organism and requiring manually curated data sets. However, these models lack the flexibility to incorporate deep learning representation learning techniques that have in recent years been transformative in the analysis of protein sequences, and which could potentially help gene finders exploit the growing number of sequenced genomes to expand performance across multiple organisms. Here, we propose a novel approach, combining learned embeddings of raw genetic sequences with exact\ndecoding using a latent conditional random field. We show that the model achieves performance matching the current state of the art, while increasing training robustness, and removing the need for manually fitted length distributions. As language models for DNA improve, this paves the way for more performant cross-organism gene-finders. ", "keywords": "gene finding;graphical model;gene prediction;gene splicing;conditional random fields;structured decoding;DNA;learned embeddings", "primary_area": "", "supplementary_material": "", "author": "Frederikke Isa Marin;Dennis Pultz;Wouter Boomsma", "authorids": "~Frederikke_Isa_Marin1;~Dennis_Pultz1;~Wouter_Boomsma1", "gender": "F;M;M", "homepage": ";;", "dblp": ";;06/5945", "google_scholar": ";;EwqU_jsAAAAJ", "orcid": "0000-0003-4403-6745;;0000-0002-8257-3827", "linkedin": ";https://dk.linkedin.com/in/dennispultz;", "or_profile": "~Frederikke_Isa_Marin1;~Dennis_Pultz1;~Wouter_Boomsma1", "aff": "Copenhagen University;Novozymes A/S;University of Copenhagen", "aff_domain": "ku.dk;novozymes.com;ku.dk", "position": "PhD student;Researcher;Full Professor", "bibtex": "@misc{\nmarin2023gene,\ntitle={Gene finding revisited: improved robustness through structured decoding from learning embeddings},\nauthor={Frederikke Isa Marin and Dennis Pultz and Wouter Boomsma},\nyear={2023},\nurl={https://openreview.net/forum?id=Rn50hCOX9XX}\n}", "github": "", "project": "", "reviewers": "rvN5;DUdq;ggXe;6LHj", "site": "https://openreview.net/forum?id=Rn50hCOX9XX", "pdf_size": 380928, "recommendation": "1;3;6;8", "confidence": "5;3;3;4", "correctness": "2;2;3;3", "technical_novelty": "1;2;3;2", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "55;20;84;82", "wc_strength_and_weaknesses": "314;139;145;128", "wc_clarity_quality_novelty_and_reproducibility": "2;9;75;518", "wc_summary_review": "2;68;115;47", "wc_review": "373;236;419;775", "wc_reply_reviewers": "22;0;0;0", "wc_reply_authors": "473;254;484;273", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.5, 2.692582403567252 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 60.25, 25.907286619790966 ], "wc_strength_and_weaknesses_avg": [ 181.5, 76.74144903505537 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 151.0, 213.79312430478208 ], "wc_summary_review_avg": [ 58.0, 40.6386515524322 ], "wc_review_avg": [ 450.75, 198.94016060112145 ], "wc_reply_reviewers_avg": [ 5.5, 9.526279441628825 ], "wc_reply_authors_avg": [ 371.0, 107.77986825005864 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.391924757669098, "corr_recommendation_correctness": 0.9284766908852594, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:65j8TkPx_EAJ:scholar.google.com/&scioq=Gene+finding+revisited:+improved+robustness+through+structured+decoding+from+learning+embeddings&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Copenhagen;Novozymes", "aff_unique_dep": ";", "aff_unique_url": "https://www.ku.dk;https://www.novozymes.com", "aff_unique_abbr": "UCPH;Novozymes", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Denmark" }, { "id": "Rn8u4MYgeNJ", "title": "Mitigating the Limitations of Multimodal VAEs with Coordination-Based Approach", "track": "main", "status": "Reject", "tldr": "", "abstract": "One of the key challenges in multimodal variational autoencoders (VAEs) is inferring a joint representation from arbitrary subsets of modalities. The state-of-the-art approach to achieving this is to sub-sample the modality subsets and learn to generate all modalities from them. However, this sub-sampling in the mixture-based approach has been shown to degrade other important features of multimodal VAEs, such as quality of generation, and furthermore, this degradation is theoretically unavoidable. In this study, we focus on another approach to learning the joint representation by bringing unimodal inferences closer to joint inference from all modalities, which does not have the above limitation. Although there have been models that can be categorized under this approach, they were derived from different backgrounds; therefore, the relation and superiority between them were not clear. To take a unified view, we first categorize them as coordination-based multimodal VAEs and show that these can be derived from the same multimodal evidence lower bound (ELBO) and that the difference in their performance is related to whether they are more tightly lower bounded. Next, we point out that these existing coordination-based models perform poorly on cross-modal generation (or cross-coherence) because they do not learn to reconstruct modalities from unimodal inferences. Therefore, we propose a novel coordination-based model that incorporates these unimodal reconstructions, which avoids the limitations of both mixture and coordination-based models. Experiments with diverse and challenging datasets show that the proposed model mitigates the limitations in multimodal VAEs and performs well in both cross-coherence and generation quality.", "keywords": "mutlimodal learning;deep generative models", "primary_area": "", "supplementary_material": "/attachment/9fb5dc1994fa4ed00353d9551f51e853cdb96fc3.zip", "author": "Masahiro Suzuki;Yutaka Matsuo", "authorids": "~Masahiro_Suzuki1;~Yutaka_Matsuo1", "gender": "M;M", "homepage": ";http://ymatsuo.com", "dblp": ";m/YMatsuo.html", "google_scholar": "r2nt5kUAAAAJ;Dy8iau4AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Masahiro_Suzuki1;~Yutaka_Matsuo1", "aff": "The University of Tokyo, Tokyo Institute of Technology;The University of Tokyo", "aff_domain": "u-tokyo.ac.jp;u-tokyo.ac.jp", "position": "Assistant Professor;Associate Professor", "bibtex": "@misc{\nsuzuki2023mitigating,\ntitle={Mitigating the Limitations of Multimodal {VAE}s with Coordination-Based Approach},\nauthor={Masahiro Suzuki and Yutaka Matsuo},\nyear={2023},\nurl={https://openreview.net/forum?id=Rn8u4MYgeNJ}\n}", "github": "", "project": "", "reviewers": "Jrda;P3oY;venE;DC9h", "site": "https://openreview.net/forum?id=Rn8u4MYgeNJ", "pdf_size": 1343204, "recommendation": "5;5;5;8", "confidence": "3;4;3;1", "correctness": "4;3;4;3", "technical_novelty": "3;2;2;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "115;127;70;92", "wc_strength_and_weaknesses": "166;483;190;45", "wc_clarity_quality_novelty_and_reproducibility": "44;184;19;11", "wc_summary_review": "45;77;39;26", "wc_review": "370;871;318;174", "wc_reply_reviewers": "0;182;0;0", "wc_reply_authors": "361;624;419;33", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 101.0, 21.874642854227357 ], "wc_strength_and_weaknesses_avg": [ 221.0, 160.93942960008278 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 64.5, 70.05890378816957 ], "wc_summary_review_avg": [ 46.75, 18.766659265836314 ], "wc_review_avg": [ 433.25, 262.73501384474815 ], "wc_reply_reviewers_avg": [ 45.5, 78.80831174438391 ], "wc_reply_authors_avg": [ 359.25, 212.19374990795558 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.9271726499455307, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:rqI1xyhOz7QJ:scholar.google.com/&scioq=Mitigating+the+Limitations+of+Multimodal+VAEs+with+Coordination-Based+Approach&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_campus_unique_index": "0", "aff_campus_unique": "Tokyo;", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "id": "RnH_0iL4xao", "title": "Towards Conditionally Dependent Masked Language Models", "track": "main", "status": "Reject", "tldr": "We study the limitations of MRFs defined from MLMs' unary conditionals, and propose alternatives that are either better (from a probabilistic modeling standpoint) or faster to run", "abstract": "Masked language modeling has proven to be an effective paradigm for learning representations of language. However, when multiple tokens are masked out, the masked language model's (MLM) distribution over the masked positions assumes that the masked tokens are conditionally independent given the unmasked tokens---an assumption that does not hold in practice. Existing work addresses this limitation by interpreting the sum of unary scores (i.e., the logits or the log probabilities of single tokens when conditioned on all others) as the log potential a Markov random field (MRF). While this new model no longer makes any independence assumptions, it remains unclear whether this approach (i) results in a good probabilistic model of language and further (ii) derives a model that is faithful (i.e., has matching unary distributions) to the original model. This paper studies MRFs derived this way in a controlled setting where only two tokens are masked out at a time, which makes it possible to compute exact distributional properties. We find that such pairwise MRFs are often worse probabilistic models of language from a perplexity standpoint, and moreover have unary distributions that do not match the unary distributions of the original MLM. We then study a statistically-motivated iterative optimization algorithm for deriving joint pairwise distributions that are more compatible with the original unary distributions. While this iterative approach outperforms the MRF approach, the algorithm itself is too expensive to be practical. We thus amortize this optimization process through a parameterized feed-forward layer that learns to modify the original MLM's pairwise distributions to be both non-independent and faithful, and find that this approach outperforms the MLM for scoring pairwise tokens.", "keywords": "Markov random fields;masked language models;compatibility", "primary_area": "", "supplementary_material": "", "author": "Lucas Torroba Hennigen;Yoon Kim", "authorids": "~Lucas_Torroba_Hennigen1;~Yoon_Kim1", "gender": "M;", "homepage": "https://ltorroba.github.io/;https://people.csail.mit.edu/yoonkim/", "dblp": "267/9755;", "google_scholar": "Zhy1N1sAAAAJ;n_ts4eYAAAAJ", "orcid": "0000-0002-8197-9008;", "linkedin": "lucas-torroba-hennigen/;", "or_profile": "~Lucas_Torroba_Hennigen1;~Yoon_Kim1", "aff": "International Business Machines;Massachusetts Institute of Technology", "aff_domain": "ibm.com;mit.edu", "position": "Intern;Assistant Professor", "bibtex": "@misc{\nhennigen2023towards,\ntitle={Towards Conditionally Dependent Masked Language Models},\nauthor={Lucas Torroba Hennigen and Yoon Kim},\nyear={2023},\nurl={https://openreview.net/forum?id=RnH_0iL4xao}\n}", "github": "", "project": "", "reviewers": "fxig;cQUX;zRNa", "site": "https://openreview.net/forum?id=RnH_0iL4xao", "pdf_size": 337959, "recommendation": "5;5;6", "confidence": "3;4;2", "correctness": "4;2;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;2", "wc_summary_paper": "106;6;69", "wc_strength_and_weaknesses": "164;6;53", "wc_clarity_quality_novelty_and_reproducibility": "17;6;217", "wc_summary_review": "37;431;35", "wc_review": "324;449;374", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "222;489;195", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 60.333333333333336, 41.282226468811274 ], "wc_strength_and_weaknesses_avg": [ 74.33333333333333, 66.24365797736581 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 80.0, 96.9776606578374 ], "wc_summary_review_avg": [ 167.66666666666666, 186.20657584760238 ], "wc_review_avg": [ 382.3333333333333, 51.37011669140814 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 302.0, 132.68760303811354 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.8660254037844385, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DYNmhUBlqswJ:scholar.google.com/&scioq=Towards+Conditionally+Dependent+Masked+Language+Models&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "International Business Machines Corporation;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ibm.com;https://web.mit.edu", "aff_unique_abbr": "IBM;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "Rpo9dvNlEYW", "title": "Federated Representation Learning via Maximal Coding Rate Reduction", "track": "main", "status": "Reject", "tldr": "We propose a federated way of learning low dimensional representations. ", "abstract": "We propose a federated methodology to learn low-dimensional representations from a dataset that is distributed among several clients. In particular, we move away from the commonly-used cross-entropy loss in federated learning, and seek to learn shared low-dimensional representations of the data in a decentralized manner via the principle of maximal coding rate reduction (MCR2). Our proposed method, which we refer to as FLOW, utilizes MCR2 as the objective of choice, hence resulting in representations that are both between-class discriminative and within-class compressible. We theoretically show that our distributed algorithm achieves a first-order stationary point. Moreover, we demonstrate, via numerical experiments, the utility of the learned low-dimensional representations.", "keywords": "Federated Learning;Representation Learning;Information Theory", "primary_area": "", "supplementary_material": "/attachment/6abeacebc5a6879f56e059b71e1d6d60c07a421e.zip", "author": "Juan Cervino;Navid Naderializadeh;Alejandro Ribeiro", "authorids": "~Juan_Cervino1;~Navid_Naderializadeh1;~Alejandro_Ribeiro1", "gender": "M;M;M", "homepage": "https://juancervino.github.io/;https://www.seas.upenn.edu/~nnaderi/;https://alelab.seas.upenn.edu", "dblp": ";126/5064.html;32/15", "google_scholar": "lbyYN_sAAAAJ;roVp_WwAAAAJ;7mrPM4kAAAAJ", "orcid": ";0000-0002-4891-6726;0000-0003-4230-9906", "linkedin": ";navid-naderi-alizadeh/;", "or_profile": "~Juan_Cervino1;~Navid_Naderializadeh1;~Alejandro_Ribeiro1", "aff": "University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": "upenn.edu;upenn.edu;upenn.edu", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@misc{\ncervino2023federated,\ntitle={Federated Representation Learning via Maximal Coding Rate Reduction},\nauthor={Juan Cervino and Navid Naderializadeh and Alejandro Ribeiro},\nyear={2023},\nurl={https://openreview.net/forum?id=Rpo9dvNlEYW}\n}", "github": "", "project": "", "reviewers": "fuw1;LtxX;L6k4;bnnY", "site": "https://openreview.net/forum?id=Rpo9dvNlEYW", "pdf_size": 602452, "recommendation": "1;3;3;5", "confidence": "5;2;5;3", "correctness": "1;4;4;3", "technical_novelty": "1;1;1;2", "empirical_novelty": "1;1;1;2", "wc_summary_paper": "23;48;65;73", "wc_strength_and_weaknesses": "300;47;243;102", "wc_clarity_quality_novelty_and_reproducibility": "26;12;17;25", "wc_summary_review": "29;31;57;32", "wc_review": "378;138;382;232", "wc_reply_reviewers": "271;0;0;0", "wc_reply_authors": "905;320;372;219", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 1.299038105676658 ], "correctness_avg": [ 3.0, 1.224744871391589 ], "technical_novelty_avg": [ 1.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 52.25, 19.149086140074676 ], "wc_strength_and_weaknesses_avg": [ 173.0, 102.40361321750322 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 20.0, 5.787918451395113 ], "wc_summary_review_avg": [ 37.25, 11.453711188955307 ], "wc_review_avg": [ 282.5, 103.01820227513194 ], "wc_reply_reviewers_avg": [ 67.75, 117.34644221279143 ], "wc_reply_authors_avg": [ 454.0, 266.1324858035937 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5443310539518174, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=990898581229335708&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Molecule Generation For Target Protein Binding with Structural Motifs", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11890", "id": "Rq13idF0F73", "poster": "", "openreview": "https://openreview.net/forum?id=Rq13idF0F73", "slides": "https://iclr.cc/virtual/2023/poster/11890", "video": "https://iclr.cc/virtual/2023/poster/11890", "author_site": "ZAIXI ZHANG, Yaosen Min, Shuxin Zheng, Qi Liu", "tldr": "", "abstract": "Designing ligand molecules that bind to specific protein binding sites is a fundamental problem in structure-based drug design. Although deep generative models and geometric deep learning have made great progress in drug design, existing works either sample in the 2D graph space or fail to generate valid molecules with realistic substructures. To tackle these problems, we propose a Fragment-based LigAnd Generation framework (FLAG), to generate 3D molecules with valid and realistic substructures fragment-by-fragment. In FLAG, a motif vocabulary is constructed by extracting common molecular fragments (i.e., motif) in the dataset. At each generation step, a 3D graph neural network is first employed to encode the intermediate context information. Then, our model selects the focal motif, predicts the next motif type, and attaches the new motif. The bond lengths/angles can be quickly and accurately determined by cheminformatics tools. Finally, the molecular geometry is further adjusted according to the predicted rotation angle and the structure refinement. Our model not only achieves competitive performances on conventional metrics such as binding affinity, QED, and SA, but also outperforms baselines by a large margin in generating molecules with realistic substructures.", "keywords": "Structure-based Drug Design", "primary_area": "", "supplementary_material": "", "author": "ZAIXI ZHANG;Yaosen Min;Shuxin Zheng;Qi Liu", "authorids": "~ZAIXI_ZHANG2;minys18@mails.tsinghua.edu.cn;~Shuxin_Zheng1;~Qi_Liu3", "gender": "M;;M;M", "homepage": "http://home.ustc.edu.cn/~zaixi/;;https://www.microsoft.com/en-us/research/people/shuz/;http://staff.ustc.edu.cn/~qiliuql/", "dblp": "267/9295.html;;186/8255;95/2446-3", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.co.jp/citations?user=rPhGUw0AAAAJ;5EoHAFwAAAAJ", "orcid": ";;;0000-0001-6956-5550", "linkedin": ";;;", "or_profile": "~ZAIXI_ZHANG2;minys18@mails.tsinghua.edu.cn;~Shuxin_Zheng1;~Qi_Liu3", "aff": "University of Science and Technology of China;;Microsoft;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;;microsoft.com;ustc.edu.cn", "position": "PhD student;;Senior Researcher;Full Professor", "bibtex": "@inproceedings{\nzhang2023molecule,\ntitle={Molecule Generation For Target Protein Binding with Structural Motifs},\nauthor={ZAIXI ZHANG and Yaosen Min and Shuxin Zheng and Qi Liu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Rq13idF0F73}\n}", "github": "", "project": "", "reviewers": "xsvH;WWrH;nStG;fSis", "pdf_size": 2365429, "recommendation": "5;6;8;8", "confidence": "4;4;4;2", "correctness": "3;3;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;3;0;0", "wc_summary_paper": "58;160;127;87", "wc_strength_and_weaknesses": "174;331;22;101", "wc_clarity_quality_novelty_and_reproducibility": "51;71;157;18", "wc_summary_review": "52;108;443;16", "wc_review": "335;670;749;222", "wc_reply_reviewers": "103;0;0;18", "wc_reply_authors": "1029;915;0;284", "reply_reviewers": "1;0;0;1", "reply_authors": "2;2;0;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 1.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 108.0, 38.749193539995126 ], "wc_strength_and_weaknesses_avg": [ 157.0, 113.93638575977386 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 74.25, 51.38762010445706 ], "wc_summary_review_avg": [ 154.75, 169.61924271732852 ], "wc_review_avg": [ 494.0, 220.94456318271332 ], "wc_reply_reviewers_avg": [ 30.25, 42.6402098962939 ], "wc_reply_authors_avg": [ 557.0, 428.8723586336615 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.82915619758885 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5555555555555555, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2991405955987800076&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=Rq13idF0F73", "email": "ustc.edu.cn;;microsoft.com;ustc.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Science and Technology of China;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "http://www.ustc.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "USTC;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "id": "RqJZTlQMph", "title": "Weakly Supervised Neuro-Symbolic Image Manipulation via Multi-Hop Complex Instructions", "track": "main", "status": "Reject", "tldr": "We propose a weakly supervised neuro-symbolic approach for the problem of image manipulation using text instructions.", "abstract": "We are interested in image manipulation via natural language text \u2013 a task that is extremely useful for multiple AI applications but requires complex reasoning over multi-modal spaces. Recent work on neuro-symbolic approaches (Mao et al., 2019) (NSCL) has been quite effective for solving VQA as they offer better modularity, interpretability, and generalizability. We extend NSCL for the image manipulation task and propose a solution referred to as NeuroSIM. Previous work either requires supervised training data in the form of manipulated images or can only deal with very simple reasoning instructions over single object scenes. In contrast, NeuroSIM can perform complex multi-hop reasoning over multi-object scenes and only requires weak supervision in the form of annotated data for VQA. NeuroSIM parses an instruction into a symbolic program, based on a Domain Specific Language (DSL) comprising of object attributes and manipulation operations, that guides the manipulation. We design neural modules for manipulation, as well as novel loss functions that are capable of testing the correctness of manipulated object and scene graph representations via query networks trained merely on VQA data. An image decoder is trained to render the final image from the manipulated scene graph. Extensive experiments demonstrate that NeuroSIM, without using target images as supervision, is highly competitive with SOTA baselines that make use of supervised data for manipulation.", "keywords": "Neuro-Symbolic Reasoning;Natural Language Guided Image Manipulation;Visual Question Answering;Weakly Supervised Learning", "primary_area": "", "supplementary_material": "", "author": "Harman Singh;Poorva Garg;Mohit Gupta;Kevin Shah;Arnab Kumar Mondal;Dinesh Khandelwal;Parag Singla;Dinesh Garg", "authorids": "~Harman_Singh1;~Poorva_Garg1;~Mohit_Gupta6;~Kevin_Shah1;~Arnab_Kumar_Mondal2;~Dinesh_Khandelwal2;~Parag_Singla1;~Dinesh_Garg1", "gender": "M;;M;M;M;M;M;M", "homepage": ";;https://www.linkedin.com/in/mohit-gupta-295b42133/;;;https://research.ibm.com/people/dinesh-khandelwal;http://www.cse.iitd.ac.in/~parags;https://researcher.watson.ibm.com/researcher/view.php?person=in-garg.dinesh", "dblp": "162/5054.html;;;;;177/0164;14/167;https://dblp.uni-trier.de/pers/g/Garg:Dinesh.html", "google_scholar": "BanlVLYAAAAJ;;;;MZ8N49AAAAAJ;Pi-SqXwAAAAJ;https://scholar.google.co.in/citations?user=V49BsgMAAAAJ;https://scholar.google.com.tw/citations?user=YrU_ZDkAAAAJ", "orcid": "0000-0002-3970-6276;;;;0000-0001-7297-374X;;;", "linkedin": "harman-singh-4243ab180/;;mohit-gupta-295b42133/;kevin-shah-5527a178/;arnab-mondal-a4448a18/;dinesh-khandelwal-68689420/;;dingarg/", "or_profile": "~Harman_Singh1;~Poorva_Garg1;~Mohit_Gupta6;~Kevin_Shah1;~Arnab_Kumar_Mondal2;~Dinesh_Khandelwal2;~Parag_Singla1;~Dinesh_Garg1", "aff": "Meta;;;Indian Institute of Technology Delhi;Fujitsu Research and Development Center Co. Ltm.;International Business Machines;Indian Institute of Technology, Delhi;", "aff_domain": "fb.com;;;iitd.ac.in;fujitsu.com;ibm.com;iitd.ac.in;", "position": "AI Resident;;;Undergrad student;Researcher;Researcher;Associate Professor;", "bibtex": "@misc{\nsingh2023weakly,\ntitle={Weakly Supervised Neuro-Symbolic Image Manipulation via Multi-Hop Complex Instructions},\nauthor={Harman Singh and Poorva Garg and Mohit Gupta and Kevin Shah and Arnab Kumar Mondal and Dinesh Khandelwal and Parag Singla and Dinesh Garg},\nyear={2023},\nurl={https://openreview.net/forum?id=RqJZTlQMph}\n}", "github": "", "project": "", "reviewers": "zm9X;oHcq;nYyA", "site": "https://openreview.net/forum?id=RqJZTlQMph", "pdf_size": 23649777, "recommendation": "6;6;8", "confidence": "5;5;3", "correctness": "3;3;3", "technical_novelty": "3;3;4", "empirical_novelty": "3;3;4", "wc_summary_paper": "100;112;25", "wc_strength_and_weaknesses": "83;198;26", "wc_clarity_quality_novelty_and_reproducibility": "23;100;5", "wc_summary_review": "36;327;8", "wc_review": "242;737;64", "wc_reply_reviewers": "115;0;0", "wc_reply_authors": "2004;2351;105", "reply_reviewers": "1;0;0", "reply_authors": "7;6;3", "recommendation_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 79.0, 38.49675310984031 ], "wc_strength_and_weaknesses_avg": [ 102.33333333333333, 71.53709216592156 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.666666666666664, 41.20140234938073 ], "wc_summary_review_avg": [ 123.66666666666667, 144.23206609103568 ], "wc_review_avg": [ 347.6666666666667, 284.72950126196775 ], "wc_reply_reviewers_avg": [ 38.333333333333336, 54.21151989096864 ], "wc_reply_authors_avg": [ 1486.6666666666667, 987.2028948273783 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 5.333333333333333, 1.699673171197595 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.9999999999999998, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15659513963482557273&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "Meta;Indian Institute of Technology Delhi;Fujitsu Research and Development Center;International Business Machines Corporation", "aff_unique_dep": "Meta Platforms, Inc.;;Research and Development;", "aff_unique_url": "https://meta.com;https://www.iitd.ac.in;https://www.fujitsu.com/global/;https://www.ibm.com", "aff_unique_abbr": "Meta;IIT Delhi;Fujitsu R&D;IBM", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Delhi", "aff_country_unique_index": "0;1;2;0;1", "aff_country_unique": "United States;India;Japan" }, { "id": "RrO3xNCqz7J", "title": "Discovering Distinctive ``Semantics'' in Super-Resolution Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Image super-resolution (SR) is a representative low-level vision problem. Although deep SR networks have achieved extraordinary success, we are still unaware of their working mechanisms. Specifically, whether SR networks can learn semantic information, or just perform complex mapping function? What hinders SR networks from generalizing to real-world data? These questions not only raise our curiosity, but also influence SR network development. In this paper, we make the primary attempt to answer the above fundamental questions. After comprehensively analyzing the feature representations (via dimensionality reduction and visualization), we successfully discover the distinctive ``semantics'' in SR networks, i.e., deep degradation representations (DDR), which relate to image degradation instead of image content. We show that a well-trained deep SR network is naturally a good descriptor of degradation information. Our experiments also reveal two key factors (adversarial learning and global residual) that influence the extraction of such semantics. We further apply DDR in several interesting applications (such as distortion identification, blind SR and generalization evaluation) and achieve promising results, demonstrating the correctness and effectiveness of our findings.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yihao Liu;Anran Liu;Jinjin Gu;Zhipeng Zhang;Wenhao Wu;Yu Qiao;Chao Dong", "authorids": "~Yihao_Liu1;~Anran_Liu1;~Jinjin_Gu1;~Zhipeng_Zhang2;~Wenhao_Wu2;~Yu_Qiao1;~Chao_Dong4", "gender": "M;;M;;M;;M", "homepage": ";;http://www.jasongt.com;;https://whwu95.github.io/;;http://xpixel.group/2010/01/20/chaodong.html", "dblp": "200/6534-1;;209/5709;;;;16/1278-5", "google_scholar": "WRIYcNwAAAAJ;;uMQ-G-QAAAAJ;;Kn5d1ckAAAAJ;;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0001-9874-0602;;0000-0002-4389-6236;;0000-0002-8511-743X;;", "linkedin": ";;jinjingu;;wenhao-w-usyd/;;", "or_profile": "~Yihao_Liu1;~Anran_Liu1;~Jinjin_Gu1;~Zhipeng_Zhang2;~Wenhao_Wu2;~Yu_Qiao1;~Chao_Dong4", "aff": "Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences;;University of Sydney;;Baidu;;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences", "aff_domain": "siat.ac.cn;;sydney.edu.au;;baidu.com;;siat.ac.cn", "position": "PhD student;;PhD student;;Senior R&D Engineer;;Full Professor", "bibtex": "@misc{\nliu2023discovering,\ntitle={Discovering Distinctive ``Semantics'' in Super-Resolution Networks},\nauthor={Yihao Liu and Anran Liu and Jinjin Gu and Zhipeng Zhang and Wenhao Wu and Yu Qiao and Chao Dong},\nyear={2023},\nurl={https://openreview.net/forum?id=RrO3xNCqz7J}\n}", "github": "", "project": "", "reviewers": "CXK4;xxDc;DDMx;EvNB", "site": "https://openreview.net/forum?id=RrO3xNCqz7J", "pdf_size": 3971947, "recommendation": "3;5;5;8", "confidence": "5;3;3;5", "correctness": "2;1;3;4", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "36;58;62;95", "wc_strength_and_weaknesses": "390;304;169;205", "wc_clarity_quality_novelty_and_reproducibility": "20;91;7;18", "wc_summary_review": "31;41;40;17", "wc_review": "477;494;278;335", "wc_reply_reviewers": "0;0;119;0", "wc_reply_authors": "1555;852;761;963", "reply_reviewers": "0;0;1;0", "reply_authors": "3;2;2;3", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 4.0, 1.0 ], "correctness_avg": [ 2.5, 1.118033988749895 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 62.75, 21.087614848531352 ], "wc_strength_and_weaknesses_avg": [ 267.0, 86.52456298647223 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.0, 33.279122584587476 ], "wc_summary_review_avg": [ 32.25, 9.627434756984853 ], "wc_review_avg": [ 396.0, 91.93747875594588 ], "wc_reply_reviewers_avg": [ 29.75, 51.528511525174096 ], "wc_reply_authors_avg": [ 1032.75, 309.8906056982044 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.14002800840280097, "corr_recommendation_correctness": 0.6888467201936643, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6328842853777243518&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Sydney;Baidu", "aff_unique_dep": "Shenzhen Institutes of Advanced Technology;;Baidu, Inc.", "aff_unique_url": "http://www.siat.cas.cn;https://www.sydney.edu.au;https://www.baidu.com", "aff_unique_abbr": "SIAT;USYD;Baidu", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;Australia" }, { "id": "RsSJ2_M2Nk4", "title": "SpaceEvo: Searching Hardware-Friendly Search Space for Efficient Int8 Inference", "track": "main", "status": "Withdraw", "tldr": "We introduce techniques to search a quantization-friendly search space for a given device", "abstract": "INT8 quantization is an essential compression tool to deploy a deep neural network (DNN) on resource-limited edge devices. While it greatly reduces model size and memory cost, current edge-regime DNN models cannot well utilize INT8 quantization to reduce inference latency. In this work, we find that the poor INT8 latency performance is due to the quantization-unfriendly issue: the operator and configuration (e.g., channel width) choices in a normal model design space lead to diverse quantization efficiency and can slow down the INT8 latency. To alleviate this issue, we propose SpaceEvo to efficiently search a novel hardware-aware, quantization-friendly search space, where its top-tier sub-networks achieve both superior quantization efficiency and accuracy. The key idea is to automatically evolve hardware-preferred operators and configurations guided by a search space quality metric, called Q-T score. However, naively training a candidate space from scratch for Q-T score evaluation brings prohibitive training cost, making it difficult to evolve search space on large-scale tasks (e.g., ImageNet). We further propose to conduct block-wise training and build INT8 accuracy lookup table to greatly reduce the cost. On diverse devices, SpaceEvo consistently outperforms existing manually-designed search spaces by producing both tiny and large quantized models with superior ImageNet accuracy and hardware efficiency. The discovered models, named SeqNet, achieve up to 10.1% accuracy improvement under the same latency. Our study addressed the hardware-friendly search space design challenge in NAS and paved the way for searching the search space towards efficient deployment.", "keywords": "Neural Architecture Search;Search Space Design;INT8 Quantization;Edge Hardware", "primary_area": "", "supplementary_material": "/attachment/c86f28ae3c021fa981c360b13665d7b17bc0e92c.zip", "author": "Li Lyna Zhang;Xudong Wang;Jiahang Xu;Quanlu Zhang;Yuqing Yang;Ningxin Zheng;Ting Cao;Mao Yang", "authorids": "~Li_Lyna_Zhang1;~Xudong_Wang7;jiahangxu@microsoft.com;quanlu.zhang@microsoft.com;~Yuqing_Yang1;~Ningxin_Zheng1;~Ting_Cao1;~Mao_Yang1", "gender": "F;M;;;;M;;", "homepage": "https://www.microsoft.com/en-us/research/people/lzhani/;;;;;https://dblp.org/pid/234/5381.html;https://www.microsoft.com/en-us/research/people/ticao/;", "dblp": "195/5224;;;;91/9064-1.html;;;", "google_scholar": "-_ItfAoAAAAJ;https://scholar.google.com/citations?view_op=list_works;;;4BtNQAEAAAAJ;;;LgJqohwAAAAJ", "orcid": ";;;;0000-0003-3518-5212;;;", "linkedin": ";;;;;;;", "or_profile": "~Li_Lyna_Zhang1;~Xudong_Wang7;jiahangxu@microsoft.com;quanlu.zhang@microsoft.com;~Yuqing_Yang1;~Ningxin_Zheng1;~Ting_Cao1;~Mao_Yang1", "aff": "Microsoft Research Asia;;;;Microsoft Research;ByteDance Inc.;Microsoft Research;", "aff_domain": "microsoft.com;;;;research.microsoft.com;bytedance.com;microsoft.com;", "position": "Researcher;;;;Researcher;Researcher;Principal Researcher;", "bibtex": "@misc{\nzhang2023spaceevo,\ntitle={SpaceEvo: Searching Hardware-Friendly Search Space for Efficient Int8 Inference},\nauthor={Li Lyna Zhang and Xudong Wang and Jiahang Xu and Quanlu Zhang and Yuqing Yang and Ningxin Zheng and Ting Cao and Mao Yang},\nyear={2023},\nurl={https://openreview.net/forum?id=RsSJ2_M2Nk4}\n}", "github": "", "project": "", "reviewers": "uDfp;JQ7p;VDU9;48FZ", "site": "https://openreview.net/forum?id=RsSJ2_M2Nk4", "pdf_size": 4714928, "recommendation": "3;5;6;6", "confidence": "4;4;4;3", "correctness": "2;3;3;4", "technical_novelty": "2;2;2;2", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "94;268;97;97", "wc_strength_and_weaknesses": "275;1462;180;124", "wc_clarity_quality_novelty_and_reproducibility": "32;54;171;8", "wc_summary_review": "23;147;70;58", "wc_review": "424;1931;518;287", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 139.0, 74.48825410761081 ], "wc_strength_and_weaknesses_avg": [ 510.25, 552.1378337879048 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.25, 62.627370214627405 ], "wc_summary_review_avg": [ 74.5, 45.27968639467372 ], "wc_review_avg": [ 790.0, 663.8580420541729 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.4714045207910316, "corr_recommendation_correctness": 0.8660254037844386, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VWNzWAkRzi8J:scholar.google.com/&scioq=SpaceEvo:+Searching+Hardware-Friendly+Search+Space+for+Efficient+Int8+Inference&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Microsoft;ByteDance", "aff_unique_dep": "Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research/group/asia;https://www.bytedance.com", "aff_unique_abbr": "MSR Asia;ByteDance", "aff_campus_unique_index": "0", "aff_campus_unique": "Asia;", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "China;United States" }, { "id": "Rsrd5wK4kEh", "title": "How Powerful is Implicit Denoising in Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "We theoretically analyze the denoising effect in graph neural networks.", "abstract": "Graph Neural Networks (GNNs), which aggregate features from neighbors, are widely used for processing graph-structured data due to their powerful representation learning capabilities. It is generally believed that GNNs can implicitly remove feature noises. However, existing works have not rigorously analyzed the implicit denoising effect in graph neural networks. In this work, we conduct a comprehensive theoretical study and analyze when and why implicit denoising happens in GNNs. Our theoretical analysis suggests that the implicit denoising largely depends on the connectivity and size of the graph, as well as the GNN architectures. Motivated by adversarial machine learning in improving the robustness of neural networks, we propose the adversarial graph signal denoising (AGSD) problem. By solving such a problem, we derive a robust graph convolution, where the smoothness of the node representations and the implicit denoising effect can be enhanced. Extensive empirical evaluations verify our theoretical analyses and the effectiveness of our proposed model.", "keywords": "GNN denoising;GNN theory", "primary_area": "", "supplementary_material": "", "author": "Songtao Liu;Zhitao Ying;Hanze Dong;Lu Lin;Jinghui Chen;Dinghao Wu", "authorids": "~Songtao_Liu2;~Zhitao_Ying1;~Hanze_Dong1;~Lu_Lin2;~Jinghui_Chen1;~Dinghao_Wu1", "gender": "M;M;M;F;M;", "homepage": "https://songtaoliu0823.github.io/;https://www.cs.yale.edu/homes/ying-rex;https://hendrydong.github.io/;https://louise-lulin.github.io;https://jinghuichen.github.io/;", "dblp": ";209/4936;228/7798;86/2209-1;67/5633;", "google_scholar": "https://scholar.google.com.tw/citations?hl=zh-CN;6fqNXooAAAAJ;g9WLzWoAAAAJ;8N04pBgAAAAJ;mKia7Y4AAAAJ;", "orcid": ";;;0000-0002-2539-3352;;", "linkedin": ";rex-ying-92770148/;hanze-dong/;lulin92/;;", "or_profile": "~Songtao_Liu2;~Zhitao_Ying1;~Hanze_Dong1;~Lu_Lin2;~Jinghui_Chen1;~Dinghao_Wu1", "aff": "Shanghai Jiaotong University;Yale University;Hong Kong University of Science and Technology;Pennsylvania State University;Pennsylvania State University;", "aff_domain": "sjtu.edu.cn;yale.edu;ust.hk;psu.edu;psu.edu;", "position": "Intern;Assistant Professor;PhD student;Assistant Professor;Assistant Professor;", "bibtex": "@misc{\nliu2023how,\ntitle={How Powerful is Implicit Denoising in Graph Neural Networks},\nauthor={Songtao Liu and Zhitao Ying and Hanze Dong and Lu Lin and Jinghui Chen and Dinghao Wu},\nyear={2023},\nurl={https://openreview.net/forum?id=Rsrd5wK4kEh}\n}", "github": "", "project": "", "reviewers": "Jway;ZXuu;2Src;J8PX", "site": "https://openreview.net/forum?id=Rsrd5wK4kEh", "pdf_size": 556873, "recommendation": "3;5;6;6", "confidence": "3;4;5;2", "correctness": "2;3;4;2", "technical_novelty": "2;2;4;2", "empirical_novelty": "1;3;4;2", "wc_summary_paper": "154;41;95;116", "wc_strength_and_weaknesses": "432;543;268;19", "wc_clarity_quality_novelty_and_reproducibility": "187;17;17;22", "wc_summary_review": "41;42;46;31", "wc_review": "814;643;426;188", "wc_reply_reviewers": "353;169;0;0", "wc_reply_authors": "1813;1319;555;11", "reply_reviewers": "1;1;0;0", "reply_authors": "4;4;2;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 101.5, 40.8319727664486 ], "wc_strength_and_weaknesses_avg": [ 315.5, 197.1655395853951 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.75, 72.9190475801762 ], "wc_summary_review_avg": [ 40.0, 5.522680508593631 ], "wc_review_avg": [ 517.75, 234.84289961589215 ], "wc_reply_reviewers_avg": [ 130.5, 145.8158084708239 ], "wc_reply_authors_avg": [ 924.5, 692.1118045518368 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.18257418583505536, "corr_recommendation_correctness": 0.4923659639173309, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1694957940898771372&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Shanghai Jiao Tong University;Yale University;Hong Kong University of Science and Technology;Pennsylvania State University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.yale.edu;https://www.ust.hk;https://www.psu.edu", "aff_unique_abbr": "SJTU;Yale;HKUST;PSU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;1;1", "aff_country_unique": "China;United States" }, { "id": "RtB4CXS1Jxv", "title": "Data-Free Continual Graph Learning", "track": "main", "status": "Withdraw", "tldr": "consider and study an important yet ignored case in existing continual graph learning works ", "abstract": "Graph Neural Networks (GNNs), which effectively learn from static graph-structured data become ineffective when directly applied to streaming data in a continual learning (CL) scenario. A few recent works study this so-called \u201ccatastrophic forgetting\u201d problem in GNNs, where historical data are not available during the training stage. However, they make a strong assumption that full access of historical data is provided during the inference stage. This assumption could make the graph learning system impractical to deploy due to a number of reasons, such as limited storage, GDPR1 data retention policy, to name a few. In this work, we study continual graph learning without this strong assumption. Moreover, in practical continual learning, models are sometimes trained with accumulated batch data but required to do on-the-fly inference with a stream of test samples. In this case, without being re-inserted into previous training graphs for inference, streaming test nodes are often very sparsely connected. It makes the inference more difficult as the model is trained on a much more dense graph while required to infer on a sparse graph with insufficient neighborhood information. We propose a simple Replay GNN (ReGNN) to jointly solve the above two challenges without memory buffers (i.e., data-free): catastrophic forgetting and poor neighbour information during inference. Extensive experiments demonstrate the effectiveness of our model over baseline models, including competitive baselines with memory buffers.", "keywords": "continual learning;graph representation learning;graph neural networks;lifelong learning", "primary_area": "", "supplementary_material": "", "author": "Daiqing Qi;Handong Zhao;Yun Fu;Sheng Li", "authorids": "~Daiqing_Qi2;~Handong_Zhao3;~Yun_Fu1;~Sheng_Li3", "gender": "M;M;M;", "homepage": "https://daiqing-qi.github.io/research.html;http://www1.ece.neu.edu/~yunfu/;http://sheng-li.org;https://hdzhao.github.io/", "dblp": "229/9064;00/5815-1;23/3439-1;79/8522", "google_scholar": "FIa-pykAAAAJ;https://scholar.google.com.tw/citations?user=h-JEcQ8AAAAJ;DEncVcYAAAAJ;0f-YOFgAAAAJ", "orcid": "0000-0001-9543-5792;0000-0002-5098-2853;0000-0003-1205-8632;", "linkedin": ";furaymond/;sheng-li-15a70022/;", "or_profile": "~Daiqing_Qi2;~Yun_Fu1;~Sheng_Li3;~Handong_Zhao1", "aff": "University of Virginia, Charlottesville;Northeastern University;University of Virginia, Charlottesville;Adobe Systems", "aff_domain": "virginia.edu;northeastern.edu;virginia.edu;adobe.com", "position": "PhD student;Full Professor;Assistant Professor;Research Scientist", "bibtex": "@misc{\nqi2023datafree,\ntitle={Data-Free Continual Graph Learning },\nauthor={Daiqing Qi and Handong Zhao and Yun Fu and Sheng Li},\nyear={2023},\nurl={https://openreview.net/forum?id=RtB4CXS1Jxv}\n}", "github": "", "project": "", "reviewers": "2BDZ;gLth;4Az4;VcNx", "site": "https://openreview.net/forum?id=RtB4CXS1Jxv", "pdf_size": 353213, "recommendation": "3;3;6;6", "confidence": "5;4;3;3", "correctness": "3;2;4;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "43;73;156;15", "wc_strength_and_weaknesses": "161;267;224;60", "wc_clarity_quality_novelty_and_reproducibility": "14;47;54;10", "wc_summary_review": "12;35;49;15", "wc_review": "230;422;483;100", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 71.75, 52.789085046058524 ], "wc_strength_and_weaknesses_avg": [ 178.0, 77.86205751198719 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.25, 19.45989465541887 ], "wc_summary_review_avg": [ 27.75, 15.122417134836613 ], "wc_review_avg": [ 308.75, 152.45224662168806 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.9045340337332909, "corr_recommendation_correctness": 0.7071067811865476, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5so-HJ-5UgIJ:scholar.google.com/&scioq=Data-Free+Continual+Graph+Learning&hl=en&as_sdt=0,10", "gs_version_total": 0, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Virginia;Northeastern University;Adobe", "aff_unique_dep": ";;Adobe Systems Incorporated", "aff_unique_url": "https://www.virginia.edu;https://www.northeastern.edu;https://www.adobe.com", "aff_unique_abbr": "UVA;NEU;Adobe", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Charlottesville;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "RuLGBgoonoM", "title": "ViTKD: Practical Guidelines for ViT Feature Knowledge Distillation", "track": "main", "status": "Withdraw", "tldr": "A feature-based knowledge distillation method for ViT", "abstract": "Knowledge Distillation (KD) for Convolutional Neural Network (CNN) is extensively studied as a way to boost the performance of a small model. Recently, Vision Transformer (ViT) has achieved great success on many computer vision tasks and KD for ViT is also desired. However, besides the output logit-based KD, other feature-based KD methods for CNNs cannot be directly applied to ViT due to the huge structure gap. In this paper, we explore the way of feature-based distillation for ViT. Based on the nature of feature maps in ViT, we design a series of controlled experiments and derive three practical guidelines for ViT's feature distillation. Some of our findings are even opposite to the practices in the CNN era. Based on the three guidelines, we propose our feature-based method ViTKD which brings consistent and considerable improvement to the student. On ImageNet-1k, we boost DeiT-Tiny from 74.42% to 76.06%, DeiT-Small from 80.55% to 81.95%, and DeiT-Base from 81.76% to 83.46%. Moreover, ViTKD and the logit-based KD method are complementary and can be applied together directly. This combination can further improve the performance of the student. Specifically, the student DeiT-Tiny, Small, and Base achieve 77.78%, 83.59%, and 85.41%, respectively.", "keywords": "Knowledge Distillation;Vision Transformer;Image Classification", "primary_area": "", "supplementary_material": "/attachment/d8f6ce2775e67b05abbf0a445c2c7b5e42d19722.zip", "author": "Zhendong Yang;Zhe Li;Ailing Zeng;Zexian Li;Chun Yuan;Yu Li", "authorids": "~Zhendong_Yang2;~Zhe_Li11;~Ailing_Zeng1;~Zexian_Li1;~Chun_Yuan1;~Yu_Li4", "gender": "M;M;F;M;M;M", "homepage": ";;https://ailingzeng.site/;;https://www.sigs.tsinghua.edu.cn/fg3/105064.jhtml;https://yu-li.github.io/", "dblp": "14/1820;;226/4720;;;34/2997-3", "google_scholar": "M9qKrogAAAAJ;mmTJPJ4AAAAJ;Tn7fzS8AAAAJ;8SabwVEAAAAJ;https://scholar.google.com.hk/citations?user=fYdxi2sAAAAJ;j9lwU7kAAAAJ", "orcid": ";;;;;", "linkedin": ";;%E7%88%B1%E7%8E%B2-%E6%9B%BE-65504112a/;;;", "or_profile": "~Zhendong_Yang2;~Zhe_Li11;~Ailing_Zeng1;~Zexian_Li1;~Chun_Yuan1;~Yu_Li4", "aff": " Tsinghua University;AMD;International Digital Economy Academy;ByteDance Inc.;Tsinghua University;International Digital Economy Academy", "aff_domain": "mails.tsinghua.edu.cn;amd.com;idea.edu.cn;bytedance.com;tsinghua.edu.cn;idea.edu.cn", "position": "MS student;Researcher;Researcher;Researcher;Full Professor;Principal Researcher", "bibtex": "@misc{\nyang2023vitkd,\ntitle={Vi{TKD}: Practical Guidelines for ViT Feature Knowledge Distillation},\nauthor={Zhendong Yang and Zhe Li and Ailing Zeng and Zexian Li and Chun Yuan and Yu Li},\nyear={2023},\nurl={https://openreview.net/forum?id=RuLGBgoonoM}\n}", "github": "", "project": "", "reviewers": "2TNp;sEEd;3DaB;QYXf", "site": "https://openreview.net/forum?id=RuLGBgoonoM", "pdf_size": 5386252, "recommendation": "3;3;3;5", "confidence": "4;5;4;4", "correctness": "2;2;2;2", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "82;57;19;42", "wc_strength_and_weaknesses": "384;178;102;421", "wc_clarity_quality_novelty_and_reproducibility": "31;26;18;73", "wc_summary_review": "33;22;12;36", "wc_review": "530;283;151;572", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 50.0, 22.901964981197573 ], "wc_strength_and_weaknesses_avg": [ 271.25, 134.6093886027271 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.0, 21.295539439046856 ], "wc_summary_review_avg": [ 25.75, 9.496709956611289 ], "wc_review_avg": [ 384.0, 174.03304283957112 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4504943264737055020&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3;0;2", "aff_unique_norm": "Tsinghua University;Advanced Micro Devices, Inc.;International Digital Economy Academy;ByteDance", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.amd.com;;https://www.bytedance.com", "aff_unique_abbr": "THU;AMD;;ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States;" }, { "id": "Rumwc_raZvE", "title": "On Intriguing Layer-Wise Properties of Robust Overfitting in Adversarial Training", "track": "main", "status": "Reject", "tldr": "", "abstract": "Adversarial training has proven to be one of the most effective methods to defend against adversarial attacks. Nevertheless, robust overfitting is a common obstacle in adversarial training of deep networks. There is a common belief that the features learned by different network layers have different properties, however, existing works generally investigate robust overfitting by considering a DNN as a single unit and hence the impact of different network layers on robust overfitting remains unclear. In this work, we divide a DNN into a series of layers and investigate the effect of different network layers on robust overfitting. We find that different layers exhibit distinct properties towards robust overfitting, and in particular, robust overfitting is mostly related to the optimization of latter parts of the network. Based upon the observed effect, we propose a robust adversarial training (RAT) prototype: in a mini-batch, we optimize the front parts of the network as usual, and adopt additional measures to regularize the optimization of the latter parts. Based on the prototype, we designed two realizations of RAT, and extensive experiments demonstrate that RAT can eliminate robust overfitting and boost adversarial robustness over the standard adversarial training.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Duke Nguyen;Chaojian Yu;Vinoth Nandakumar;Young Lee;Tongliang Liu", "authorids": "~Duke_Nguyen1;~Chaojian_Yu1;~Vinoth_Nandakumar1;young.lee@mq.edu.au;~Tongliang_Liu1", "gender": "M;M;M;;M", "homepage": ";;https://sites.google.com/view/vinothmn/;;https://tongliang-liu.github.io/", "dblp": ";223/9872;231/5054;;150/6667", "google_scholar": ";b3ltuG8AAAAJ;SKq_-mgAAAAJ;;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ", "orcid": ";;;;", "linkedin": "dukeng/;;;;", "or_profile": "~Duke_Nguyen1;~Chaojian_Yu1;~Vinoth_Nandakumar1;young.lee@mq.edu.au;~Tongliang_Liu1", "aff": "University Of Sydney;The University of Sydney;University of Sydney;;University of Sydney", "aff_domain": "uni.sydney.edu.au;uni.sydney.edu.au;sydney.edu.au;;sydney.edu.au", "position": "MS student;PhD student;Researcher;;Lecturer", "bibtex": "@misc{\nnguyen2023on,\ntitle={On Intriguing Layer-Wise Properties of Robust Overfitting in Adversarial Training},\nauthor={Duke Nguyen and Chaojian Yu and Vinoth Nandakumar and Young Lee and Tongliang Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=Rumwc_raZvE}\n}", "github": "", "project": "", "reviewers": "VqAU;2pex;8P5a;jxMW", "site": "https://openreview.net/forum?id=Rumwc_raZvE", "pdf_size": 845708, "recommendation": "3;3;5;6", "confidence": "4;5;4;4", "correctness": "2;3;3;4", "technical_novelty": "3;2;3;4", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "95;114;58;53", "wc_strength_and_weaknesses": "268;479;285;68", "wc_clarity_quality_novelty_and_reproducibility": "38;42;21;14", "wc_summary_review": "21;56;33;43", "wc_review": "422;691;397;178", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 80.0, 25.465663156493687 ], "wc_strength_and_weaknesses_avg": [ 275.0, 145.44242847257468 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.75, 11.60549438843516 ], "wc_summary_review_avg": [ 38.25, 12.871965661856 ], "wc_review_avg": [ 422.0, 182.01785626690585 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5555555555555555, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iOqj-rrfsIgJ:scholar.google.com/&scioq=On+Intriguing+Layer-Wise+Properties+of+Robust+Overfitting+in+Adversarial+Training&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Sydney", "aff_unique_dep": "", "aff_unique_url": "https://www.sydney.edu.au", "aff_unique_abbr": "USYD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "id": "RusKt9aoTON", "title": "FedGSNR: Accelerating Federated Learning on Non-IID Data via Maximum Gradient Signal to Noise Ratio", "track": "main", "status": "Reject", "tldr": "This paper interprets federated learning algorithms with Gradient Signal to Noise Ratio and proposes the corresponding method to accelerate model convergence with optimal local updates in non-iid scenarios.", "abstract": "Federated learning (FL) allows participants jointly training a model without direct data sharing. In such a process, participants rather than the central server perform local updates of stochastic gradient descent (SGD) and the central server aggregates the gradients from the participants to update the global model. However, the non-iid training data in participants significantly impact global model convergence.Most of existing studies addressed this issue by utilizing variance reduction or regularization. However, these studies focusing on specific datasets lack theoretical guarantee for efficient model training. In this paper, we provide a novel perspective on the non-iid issue by optimizing Gradient Signal to Noise Ratio (GSNR) during model training. In each participant, we decompose local gradients calculated on the non-iid training data into the signal and noise components and then speed up the model convergence by maximizing GSNR. We prove that GSNR can be maximized by using the optimal number of local updates. Subsequently, we develop FedGSNR to compute the optimal number of local updates for each participant, which can be applied to existing gradient calculation algorithms to accelerate the global model convergence. Moreover, according to the positive correlation between GSNR and the quality of shared information, FedGSNR allows the server to accurately evaluate contributions of different participants (i.e., the quality of local datasets) by utilizing GSNR. Extensive experimental evaluations demonstrate that FedGSNR achieves on average a 1.69\u00d7 speedup with comparable accuracy.", "keywords": "Federated learning;Gradient Signal to Noise Ratio;Optimal Local Updates;Non-IID Data", "primary_area": "", "supplementary_material": "", "author": "Qi Tan;Yi Zhao;Ke Xu;Qi Li", "authorids": "~Qi_Tan4;~Yi_Zhao10;~Ke_Xu9;~Qi_Li12", "gender": "M;M;;", "homepage": ";;;https://sites.google.com/site/qili2012/", "dblp": ";51/4138-11;;181/2688-2", "google_scholar": ";;;", "orcid": "0000-0003-4991-5693;0000-0003-3632-3381;;0000-0001-8776-8730", "linkedin": ";;;", "or_profile": "~Qi_Tan4;~Yi_Zhao10;~Ke_Xu9;~Qi_Li12", "aff": "Tsinghua University;Tsinghua University;;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn", "position": "PhD student;Postdoc;;Associate Professor", "bibtex": "@misc{\ntan2023fedgsnr,\ntitle={Fed{GSNR}: Accelerating Federated Learning on Non-{IID} Data via Maximum Gradient Signal to Noise Ratio},\nauthor={Qi Tan and Yi Zhao and Ke Xu and Qi Li},\nyear={2023},\nurl={https://openreview.net/forum?id=RusKt9aoTON}\n}", "github": "", "project": "", "reviewers": "8mk1;kU4r;Dx1S;zXke", "site": "https://openreview.net/forum?id=RusKt9aoTON", "pdf_size": 1226413, "recommendation": "3;3;6;6", "confidence": "2;3;2;1", "correctness": "3;2;2;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "135;30;4;67", "wc_strength_and_weaknesses": "110;318;234;61", "wc_clarity_quality_novelty_and_reproducibility": "334;1;4;68", "wc_summary_review": "8;18;4;78", "wc_review": "587;367;246;274", "wc_reply_reviewers": "199;327;0;0", "wc_reply_authors": "1547;1951;314;285", "reply_reviewers": "2;1;0;0", "reply_authors": "3;4;1;1", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 2.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 59.0, 49.25951684700125 ], "wc_strength_and_weaknesses_avg": [ 180.75, 101.26543092289688 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 101.75, 136.7340027206108 ], "wc_summary_review_avg": [ 27.0, 29.88310559496787 ], "wc_review_avg": [ 368.5, 133.86653801454642 ], "wc_reply_reviewers_avg": [ 131.5, 139.0692273653665 ], "wc_reply_authors_avg": [ 1024.25, 738.7622672416344 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.7071067811865476, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JezgprrPhsMJ:scholar.google.com/&scioq=FedGSNR:+Accelerating+Federated+Learning+on+Non-IID+Data+via+Maximum+Gradient+Signal+to+Noise+Ratio&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "RvV2xvoML7G", "title": "Treatment Effect Estimation with Collider Bias and Confounding Bias", "track": "main", "status": "Reject", "tldr": "", "abstract": "To answer causal questions from observational data, it is important to consider the mechanisms that determine which data values are observed and which are missing. Prior work has considered the treatment assignment mechanism and proposed methods to remove the confounding bias from the common causes of treatment and outcome. However, there are other issues in sample selection, commonly overlooked in prior work, that can bias the treatment effect estimation, such as the issue of censored outcome as a form of collider bias. In this paper, we propose the novel Selection Controlled CounterFactual Regression (SC-CFR) to simultaneously address confounding and collider bias. Specifically, we first calculate the magnitude of the collider bias of different instances by estimating the selection model and then add a control term to remove the collider bias while learning a balanced representation to remove the confounding bias when estimating the outcome model. Our method is shown to provide unbiased treatment effect estimates from observational data with confounding and collider bias. Extensive empirical results on both synthetic and real-world datasets show that our method consistently outperforms benchmarks when both types of biases exist.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Baohong Li;Kun Kuang;Ruoxuan Xiong;Fei Wu", "authorids": "~Baohong_Li1;~Kun_Kuang1;~Ruoxuan_Xiong1;~Fei_Wu1", "gender": "M;M;;M", "homepage": ";http://kunkuang.github.io;http://www.ruoxuanxiong.com/;https://person.zju.edu.cn/wufei", "dblp": "83/3116;194/4245;222/2927;84/3254-1", "google_scholar": "M08DvYsAAAAJ;https://scholar.google.com.hk/citations?user=FOsNiMQAAAAJ;lg_0u-0AAAAJ;XJLn4MYAAAAJ", "orcid": "0000-0002-3222-002X;0009-0000-7528-8131;;", "linkedin": ";;;", "or_profile": "~Baohong_Li1;~Kun_Kuang1;~Ruoxuan_Xiong1;~Fei_Wu1", "aff": "Zhejiang University;Zhejiang University;Emory University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;emory.edu;zju.edu.cn", "position": "PhD student;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@misc{\nli2023treatment,\ntitle={Treatment Effect Estimation with Collider Bias and Confounding Bias},\nauthor={Baohong Li and Kun Kuang and Ruoxuan Xiong and Fei Wu},\nyear={2023},\nurl={https://openreview.net/forum?id=RvV2xvoML7G}\n}", "github": "", "project": "", "reviewers": "CGRS;e76D;xiZL", "site": "https://openreview.net/forum?id=RvV2xvoML7G", "pdf_size": 429046, "recommendation": "3;5;5", "confidence": "4;4;4", "correctness": "1;3;4", "technical_novelty": "2;2;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "64;94;29", "wc_strength_and_weaknesses": "390;351;2", "wc_clarity_quality_novelty_and_reproducibility": "27;45;2", "wc_summary_review": "27;31;463", "wc_review": "508;521;496", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "238;516;366", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 1.247219128924647 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 62.333333333333336, 26.562295750848712 ], "wc_strength_and_weaknesses_avg": [ 247.66666666666666, 174.44069351947545 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 24.666666666666668, 17.632041540584257 ], "wc_summary_review_avg": [ 173.66666666666666, 204.5960790327018 ], "wc_review_avg": [ 508.3333333333333, 10.208928554075703 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 373.3333333333333, 113.61142352578617 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9449111825230679, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XNf0ndJn6P0J:scholar.google.com/&scioq=Treatment+Effect+Estimation+with+Collider+Bias+and+Confounding+Bias&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Zhejiang University;Emory University", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.emory.edu", "aff_unique_abbr": "ZJU;Emory", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Verifying the Union of Manifolds Hypothesis for Image Data", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11032", "id": "Rvee9CAX4fi", "poster": "/media/PosterPDFs/ICLR%202023/11032.png?t=1682900087.523577", "openreview": "https://openreview.net/forum?id=Rvee9CAX4fi", "slides": "https://iclr.cc/virtual/2023/poster/11032", "video": "https://iclr.cc/virtual/2023/poster/11032", "author_site": "Bradley Brown, Anthony Caterini, Brendan Ross, Jesse Cresswell, Gabriel Loaiza-Ganem", "tldr": "We show data of interest has varying intrinsic dimension, thus conforming to a union of manifolds hypothesis rather than the manifold hypothesis; and we study some implications in deep learning.", "abstract": "Deep learning has had tremendous success at learning low-dimensional representations of high-dimensional data. This success would be impossible if there was no hidden low-dimensional structure in data of interest; this existence is posited by the manifold hypothesis, which states that the data lies on an unknown manifold of low intrinsic dimension. In this paper, we argue that this hypothesis does not properly capture the low-dimensional structure typically present in image data. Assuming that data lies on a single manifold implies intrinsic dimension is identical across the entire data space, and does not allow for subregions of this space to have a different number of factors of variation. To address this deficiency, we consider the union of manifolds hypothesis, which states that data lies on a disjoint union of manifolds of varying intrinsic dimensions. We empirically verify this hypothesis on commonly-used image datasets, finding that indeed, observed data lies on a disconnected set and that intrinsic dimension is not constant. We also provide insights into the implications of the union of manifolds hypothesis in deep learning, both supervised and unsupervised, showing that designing models with an inductive bias for this structure improves performance across classification and generative modelling tasks. Our code is available at https://github.com/layer6ai-labs/UoMH.", "keywords": "manifold hypothesis;geometry;generative models", "primary_area": "", "supplementary_material": "/attachment/13cf696bee49c34808a86c21d68dd66a5a5d2fd5.zip", "author": "Bradley CA Brown;Anthony L. Caterini;Brendan Leigh Ross;Jesse C Cresswell;Gabriel Loaiza-Ganem", "authorids": "~Bradley_CA_Brown1;~Anthony_L._Caterini1;~Brendan_Leigh_Ross1;~Jesse_C_Cresswell1;~Gabriel_Loaiza-Ganem1", "gender": ";M;M;;M", "homepage": ";;;https://jescresswell.github.io/;https://sites.google.com/view/gabriel-loaiza-ganem/about-me", "dblp": ";167/4383;295/0098;279/6764;238/1617", "google_scholar": ";34sCXQEAAAAJ;https://scholar.google.ca/citations?user=TyY1aSYAAAAJ;https://scholar.google.ca/citations?hl=en;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-9284-8804;", "linkedin": ";;brendan-ross;;", "or_profile": "~Bradley_CA_Brown1;~Anthony_L._Caterini1;~Brendan_Leigh_Ross1;~Jesse_C_Cresswell1;~Gabriel_Loaiza-Ganem1", "aff": ";Layer6;Layer 6 AI;Layer 6 AI;Layer 6 AI", "aff_domain": ";layer6.ai;layer6.ai;layer6.ai;layer6.ai", "position": ";Researcher;Senior Machine Learning Scientist;Staff Machine Learning Scientist;Machine Learning Research Scientist", "bibtex": "@inproceedings{\nbrown2023verifying,\ntitle={Verifying the Union of Manifolds Hypothesis for Image Data},\nauthor={Bradley CA Brown and Anthony L. Caterini and Brendan Leigh Ross and Jesse C Cresswell and Gabriel Loaiza-Ganem},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Rvee9CAX4fi}\n}", "github": "", "project": "", "reviewers": "GFby;FLmA;6dov", "pdf_size": 1407475, "recommendation": "3;5;8", "confidence": "4;5;4", "correctness": "2;2;3", "technical_novelty": "2;2;4", "empirical_novelty": "2;0;4", "wc_summary_paper": "37;37;62", "wc_strength_and_weaknesses": "518;199;188", "wc_clarity_quality_novelty_and_reproducibility": "17;10;34", "wc_summary_review": "47;54;67", "wc_review": "619;300;351", "wc_reply_reviewers": "0;20;181", "wc_reply_authors": "1522;869;811", "reply_reviewers": "0;1;2", "reply_authors": "3;2;3", "recommendation_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.0, 1.632993161855452 ], "wc_summary_paper_avg": [ 45.333333333333336, 11.785113019775793 ], "wc_strength_and_weaknesses_avg": [ 301.6666666666667, 153.03666953453418 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 20.333333333333332, 10.077477638553981 ], "wc_summary_review_avg": [ 56.0, 8.286535263104035 ], "wc_review_avg": [ 423.3333333333333, 139.91505359403692 ], "wc_reply_reviewers_avg": [ 67.0, 81.02263058347752 ], "wc_reply_authors_avg": [ 1067.3333333333333, 322.36866404096344 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.11470786693528084, "corr_recommendation_correctness": 0.9176629354822472, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16700134185547187186&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=Rvee9CAX4fi", "email": ";layer6.ai;layer6.ai;layer6.ai;layer6.ai", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Layer6 AI;Layer 6 AI", "aff_unique_dep": ";", "aff_unique_url": "https://layer6.ai;https://layer6.ai", "aff_unique_abbr": "Layer6;Layer 6 AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "id": "Ry-cTiH_cus", "title": "Bandwith Enables Generalization in Quantum Kernel Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Quantum computers are known to provide speedups over classical state-of-the-art machine learning methods in some specialized settings. For example, quantum kernel methods have been shown to provide an exponential speedup on a learning version of the discrete logarithm problem. Understanding the generalization of quantum models is essential to realizing similar speedups on practically interesting problems. Recent results demonstrate that generalization is hindered by the exponential size of the quantum feature space. Although these results suggest that quantum models cannot generalize when the number of qubits is large, in this paper we show that these results rely on overly restrictive assumptions. We consider a wider class of models by varying a hyperparameter that we call quantum kernel bandwidth. We analyze the large-qubit limit and provide explicit formulas for the generalization of a quantum model that can be solved in closed form. Specifically, we show that changing the value of bandwidth can take a model from provably not being able to generalize on any target function to good generalization for well-aligned targets. Our analysis shows how the bandwidth controls the spectrum of the kernel integral operator, and thereby the inductive bias of the model. We demonstrate empirically that our theory correctly predicts how varying the bandwidth affects generalization of quantum models on challenging datasets, including those far outside our theoretical assumptions. We discuss the implications of our results for quantum advantage in machine learning.", "keywords": "kernel methods;generalization error;quantum machine learning;spectral bias", "primary_area": "", "supplementary_material": "/attachment/26f7fab5752fbc0b1330ed67bb4605e6780a02a9.zip", "author": "Abdulkadir Canatar;Evan Peters;Cengiz Pehlevan;Stefan M. Wild;Ruslan Shaydulin", "authorids": "~Abdulkadir_Canatar1;~Evan_Peters1;~Cengiz_Pehlevan2;~Stefan_M._Wild1;~Ruslan_Shaydulin1", "gender": "M;;;M;", "homepage": ";https://peterse.github.io/;https://pehlevan.seas.harvard.edu/;https://wildsm.github.io/;https://shaydul.in", "dblp": "258/0594;260/1050;145/3480;05/1044;", "google_scholar": "_F4TER8AAAAJ;JLvD4RkAAAAJ;veDLTPEAAAAJ;https://scholar.google.com/citations?hl=en;PxOuGGcAAAAJ", "orcid": "0000-0002-0140-5718;0000-0001-7083-6733;0000-0001-9767-6063;0000-0002-6099-2772;0000-0002-8657-2848", "linkedin": ";;;;", "or_profile": "~Abdulkadir_Canatar1;~Evan_Peters1;~Cengiz_Pehlevan2;~Stefan_M._Wild1;~Ruslan_Shaydulin1", "aff": "Flatiron Institute;University of Waterloo;School of Engineering and Applied Sciences, Harvard University;Northwestern University;J.P. Morgan Chase", "aff_domain": "flatironinstitute.org;uwaterloo.ca;seas.harvard.edu;northwestern.edu;jpmorgan.com", "position": "Postdoc;PhD student;Assistant Professor;Researcher;Researcher", "bibtex": "@misc{\ncanatar2023bandwith,\ntitle={Bandwith Enables Generalization in Quantum Kernel Models},\nauthor={Abdulkadir Canatar and Evan Peters and Cengiz Pehlevan and Stefan M. Wild and Ruslan Shaydulin},\nyear={2023},\nurl={https://openreview.net/forum?id=Ry-cTiH_cus}\n}", "github": "", "project": "", "reviewers": "P99u;YWYZ;jwdc;em9T", "site": "https://openreview.net/forum?id=Ry-cTiH_cus", "pdf_size": 2970345, "recommendation": "3;3;6;8", "confidence": "4;3;3;2", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;0;3;2", "wc_summary_paper": "42;72;226;49", "wc_strength_and_weaknesses": "48;156;46;90", "wc_clarity_quality_novelty_and_reproducibility": "16;29;10;154", "wc_summary_review": "29;69;43;31", "wc_review": "135;326;325;324", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "179;1050;463;272", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;1", "recommendation_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 97.25, 75.1577507646417 ], "wc_strength_and_weaknesses_avg": [ 85.0, 44.598206241955516 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 52.25, 59.14547742642712 ], "wc_summary_review_avg": [ 43.0, 15.937377450509228 ], "wc_review_avg": [ 277.5, 82.27545198903498 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 491.0, 338.58898387277753 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.8333333333333334, "corr_recommendation_correctness": 0.8164965809277261, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5100230008753180414&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Flatiron Institute;University of Waterloo;Harvard University;Northwestern University;JPMorgan Chase & Co.", "aff_unique_dep": ";;School of Engineering and Applied Sciences;;", "aff_unique_url": "https://flatironinstitute.org;https://uwaterloo.ca;https://www.harvard.edu;https://www.northwestern.edu;https://www.jpmorganchase.com", "aff_unique_abbr": "Flatiron;UW;Harvard;NU;JPM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Canada" }, { "id": "Rywi6F_HVCO", "title": "Augmentative Topology Agents For Open-Ended Learning", "track": "main", "status": "Reject", "tldr": "This work brings generalization capabilities and ability to solve complex environments to Open Ended Learning framework by adding agents that augment their topologies over time.", "abstract": "In this work, we tackle the problem of Open-Ended Learning by a method that simultaneously evolves agents and increasingly challenging environments. Unlike previous open-ended approaches that optimize agents using a fixed neural network topology, we hypothesize that generalization can be improved by allowing agents' controllers to become more complex as they encounter more difficult environments. Our method, Augmentative Topology EPOET (ATEP), extends the Enhanced Paired Open-Ended Trailblazer (EPOET) algorithm by allowing agents to evolve their own neural network structures over time, adding complexity and capacity as necessary. Empirical results demonstrate that ATEP results in general agents capable of solving more environments than a fixed-topology baseline. We also investigate mechanisms for transferring agents between environments and find that a species-based approach further improves the performance and generalization of agents.", "keywords": "Open-Ended Learning;NeuroEvolution", "primary_area": "", "supplementary_material": "/attachment/92d0ac5cfd2bdf54196bcd65eda561e25d1c64ad.zip", "author": "Muhammad Umair Nasir;Michael Beukman;Steven James;Christopher Wesley Cleghorn", "authorids": "~Muhammad_Umair_Nasir1;~Michael_Beukman1;~Steven_James1;~Christopher_Wesley_Cleghorn1", "gender": "M;;M;", "homepage": "https://umair-nasir14.github.io/;;;", "dblp": "319/7239;;195/8202;", "google_scholar": "O-4Fbb0AAAAJ;;;", "orcid": "0000-0002-2458-9599;;;", "linkedin": "umair-nasir/;;;", "or_profile": "~Muhammad_Umair_Nasir1;~Michael_Beukman1;~Steven_James1;~Christopher_Wesley_Cleghorn1", "aff": "University of Witwatersrand;;University of the Witwatersrand;", "aff_domain": "wits.ac.za;;wits.ac.za;", "position": "MS student;;Lecturer;", "bibtex": "@misc{\nnasir2023augmentative,\ntitle={Augmentative Topology Agents For Open-Ended Learning},\nauthor={Muhammad Umair Nasir and Michael Beukman and Steven James and Christopher Wesley Cleghorn},\nyear={2023},\nurl={https://openreview.net/forum?id=Rywi6F_HVCO}\n}", "github": "", "project": "", "reviewers": "TYB9;JKxU;TUdn;SyVu", "site": "https://openreview.net/forum?id=Rywi6F_HVCO", "pdf_size": 2916034, "recommendation": "3;3;3;3", "confidence": "4;4;3;5", "correctness": "3;2;2;2", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;3;2;1", "wc_summary_paper": "59;161;22;29", "wc_strength_and_weaknesses": "678;454;130;170", "wc_clarity_quality_novelty_and_reproducibility": "59;432;30;77", "wc_summary_review": "47;151;25;5", "wc_review": "843;1198;207;281", "wc_reply_reviewers": "111;255;7;0", "wc_reply_authors": "2163;2109;480;883", "reply_reviewers": "1;2;1;0", "reply_authors": "4;5;2;2", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 67.75, 55.60294506588657 ], "wc_strength_and_weaknesses_avg": [ 358.0, 223.0156945149825 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 149.5, 163.96112344089374 ], "wc_summary_review_avg": [ 57.0, 56.2672195865408 ], "wc_review_avg": [ 632.25, 408.87123584326645 ], "wc_reply_reviewers_avg": [ 93.25, 103.21427953534337 ], "wc_reply_authors_avg": [ 1408.75, 741.3219189394038 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3659106976278590256&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 16, "aff_unique_index": "0;0", "aff_unique_norm": "University of the Witwatersrand", "aff_unique_dep": "", "aff_unique_url": "https://www.wits.ac.za", "aff_unique_abbr": "Wits", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Africa" }, { "title": "Long-Tailed Learning Requires Feature Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11815", "id": "S-h1oFv-mq", "poster": "/media/PosterPDFs/ICLR%202023/11815.png?t=1682481310.2564442", "openreview": "https://openreview.net/forum?id=S-h1oFv-mq", "slides": "https://iclr.cc/virtual/2023/poster/11815", "video": "https://iclr.cc/virtual/2023/poster/11815", "author_site": "Thomas Laurent, James von Brecht, Xavier Bresson", "tldr": "We study the importance of learning features in order to achieve good generalization when the data distribution has a long tail. ", "abstract": "We propose a simple data model inspired from natural data such as text or images, and use it to study the importance of learning features in order to achieve good generalization. Our data model follows a long-tailed distribution in the sense that some rare and uncommon subcategories have few representatives in the training set. In this context we provide evidence that a learner succeeds if and only if it identifies the correct features, and moreover derive non-asymptotic generalization error bounds that precisely quantify the penalty that one must pay for not learning features.", "keywords": "deep learning theory;generalization;long-tailed data distribution", "primary_area": "", "supplementary_material": "", "author": "Thomas Laurent;James von Brecht;Xavier Bresson", "authorids": "~Thomas_Laurent1;~James_von_Brecht1;~Xavier_Bresson6", "gender": "M;;M", "homepage": "http://thomaslaurent.lmu.build/homepage.html;;https://www.comp.nus.edu.sg/cs/people/xaviercs/", "dblp": "47/8889-1;24/9745;95/378", "google_scholar": "_Ag_9uAAAAAJ;;https://scholar.google.com.sg/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Thomas_Laurent1;~James_von_Brecht1;~Xavier_Bresson6", "aff": "Loyola Marymount University;None;National University of Singapore", "aff_domain": "lmu.edu;gmail.com;nus.edu.sg", "position": "Full Professor;Independent Scholar;Associate Professor", "bibtex": "@inproceedings{\nlaurent2023longtailed,\ntitle={Long-Tailed Learning Requires Feature Learning},\nauthor={Thomas Laurent and James von Brecht and Xavier Bresson},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=S-h1oFv-mq}\n}", "github": "", "project": "", "reviewers": "LVLa;VYVD;gVke;3imt", "pdf_size": 4997917, "recommendation": "5;5;6;8", "confidence": "3;4;3;4", "correctness": "3;4;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;1;3", "wc_summary_paper": "29;73;277;227", "wc_strength_and_weaknesses": "143;114;364;354", "wc_clarity_quality_novelty_and_reproducibility": "49;38;87;66", "wc_summary_review": "83;49;64;111", "wc_review": "304;274;792;758", "wc_reply_reviewers": "0;0;0;136", "wc_reply_authors": "514;292;509;1275", "reply_reviewers": "0;0;0;2", "reply_authors": "2;2;3;4", "recommendation_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 151.5, 103.22184846242583 ], "wc_strength_and_weaknesses_avg": [ 243.75, 115.7591789017182 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.0, 18.506755523321747 ], "wc_summary_review_avg": [ 76.75, 23.155722834755128 ], "wc_review_avg": [ 532.0, 243.52823244954578 ], "wc_reply_reviewers_avg": [ 34.0, 58.88972745734183 ], "wc_reply_authors_avg": [ 647.5, 373.2093916288817 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.40824829046386296, "corr_recommendation_correctness": 0.40824829046386296, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5952217462569194288&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=S-h1oFv-mq", "email": "lmu.edu;gmail.com;nus.edu.sg", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Loyola Marymount University;Google;National University of Singapore", "aff_unique_dep": ";Google AI;", "aff_unique_url": "https://www.lmu.edu;https://ai.google;https://www.nus.edu.sg", "aff_unique_abbr": "LMU;Google AI;NUS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Singapore" }, { "title": "A Model or 603 Exemplars: Towards Memory-Efficient Class-Incremental Learning", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11837", "id": "S07feAlQHgM", "poster": "/media/PosterPDFs/ICLR%202023/11837.png?t=1681111539.9356303", "openreview": "https://openreview.net/forum?id=S07feAlQHgM", "slides": "https://iclr.cc/virtual/2023/poster/11837", "video": "https://iclr.cc/virtual/2023/poster/11837", "author_site": "Da-Wei Zhou, Qi-Wei Wang, Han-Jia Ye, De-Chuan Zhan", "tldr": "", "abstract": "Real-world applications require the classification model to adapt to new classes without forgetting old ones. Correspondingly, Class-Incremental Learning (CIL) aims to train a model with limited memory size to meet this requirement. Typical CIL methods tend to save representative exemplars from former classes to resist forgetting, while recent works find that storing models from history can substantially boost the performance. However, the stored models are not counted into the memory budget, which implicitly results in unfair comparisons. We find that when counting the model size into the total budget and comparing methods with aligned memory size, saving models do not consistently work, especially for the case with limited memory budgets. As a result, we need to holistically evaluate different CIL methods at different memory scales and simultaneously consider accuracy and memory size for measurement. On the other hand, we dive deeply into the construction of the memory buffer for memory efficiency. By analyzing the effect of different layers in the network, we find that shallow and deep layers have different characteristics in CIL. Motivated by this, we propose a simple yet effective baseline, denoted as MEMO for Memory-efficient Expandable MOdel. MEMO extends specialized layers based on the shared generalized representations, efficiently extracting diverse representations with modest cost and maintaining representative exemplars. Extensive experiments on benchmark datasets validate MEMO's competitive performance. Code is available at: https://github.com/wangkiw/ICLR23-MEMO", "keywords": "class-incremental learning", "primary_area": "", "supplementary_material": "", "author": "Da-Wei Zhou;Qi-Wei Wang;Han-Jia Ye;De-Chuan Zhan", "authorids": "~Da-Wei_Zhou1;~Qi-Wei_Wang1;~Han-Jia_Ye1;~De-Chuan_Zhan1", "gender": ";;M;M", "homepage": "http://www.lamda.nju.edu.cn/zhoudw/;http://www.lamda.nju.edu.cn/wangqiwei/;http://www.lamda.nju.edu.cn/yehj;http://www.lamda.nju.edu.cn/zhandc/", "dblp": "120/6109;195/9944;165/3014;74/498", "google_scholar": "kMNaR-YAAAAJ;PQkB2EsAAAAJ;mgOYhtoAAAAJ;mYJf4TcAAAAJ", "orcid": ";;;0000-0002-3533-2078", "linkedin": ";;;", "or_profile": "~Da-Wei_Zhou1;~Qi-Wei_Wang1;~Han-Jia_Ye1;~De-Chuan_Zhan1", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2023a,\ntitle={A Model or 603 Exemplars: Towards Memory-Efficient Class-Incremental Learning},\nauthor={Da-Wei Zhou and Qi-Wei Wang and Han-Jia Ye and De-Chuan Zhan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=S07feAlQHgM}\n}", "github": "", "project": "", "reviewers": "FkFJ;dAkF;GaiS;aBa9", "pdf_size": 2780544, "recommendation": "6;8;8;8", "confidence": "4;4;5;3", "correctness": "3;3;4;3", "technical_novelty": "2;4;4;3", "empirical_novelty": "3;4;4;3", "wc_summary_paper": "163;70;216;64", "wc_strength_and_weaknesses": "504;157;215;210", "wc_clarity_quality_novelty_and_reproducibility": "103;38;31;111", "wc_summary_review": "51;27;52;54", "wc_review": "821;292;514;439", "wc_reply_reviewers": "436;11;0;25", "wc_reply_authors": "2301;455;49;858", "reply_reviewers": "1;1;0;1", "reply_authors": "5;1;1;3", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 128.25, 64.08734274410197 ], "wc_strength_and_weaknesses_avg": [ 271.5, 136.144224996876 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.75, 36.44430682562093 ], "wc_summary_review_avg": [ 46.0, 11.022703842524301 ], "wc_review_avg": [ 516.5, 193.08871018265154 ], "wc_reply_reviewers_avg": [ 118.0, 183.81104428189292 ], "wc_reply_authors_avg": [ 915.75, 849.381944416056 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 221, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10265381621780934793&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=S07feAlQHgM", "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "S0v71vsLBYhM", "title": "Inducing Gaussian Process Networks", "track": "main", "status": "Reject", "tldr": "We introduce a new method to efficiently learn the kernel and inducing points for Gaussian processes.", "abstract": "Gaussian processes (GPs) are powerful but computationally expensive machine learning models, requiring an estimate of the kernel covariance matrix for every prediction. In large and complex domains, such as graphs, sets, or images, the choice of suitable kernel can also be non-trivial to determine, providing an additional obstacle to the learning task. Over the last decade, these challenges have resulted in significant advances being made in terms of scalability and expressivity, exemplified by, e.g., the use of inducing points and neural network kernel approximations.\n\nIn this paper, we propose inducing Gaussian process networks (IGN), a simple framework for simultaneously learning the feature space as well as the inducing points. The inducing points, in particular, are learned directly in the feature space, enabling a seamless representation of complex structured domains while also facilitating scalable gradient-based learning methods. \n\nWe consider both regression and (binary) classification tasks and report on experimental results for real-world data sets showing that IGNs provide significant advances over state-of-the-art methods. We also demonstrate how IGNs can be used to effectively model complex domains using neural network architectures.", "keywords": "Gaussian processes;Kernel Methods;Classification;Regression", "primary_area": "", "supplementary_material": "/attachment/e064befef5031eba1c9534810f248a863e90e357.zip", "author": "Alessandro Tibo;Thomas Dyhre Nielsen", "authorids": "~Alessandro_Tibo1;~Thomas_Dyhre_Nielsen1", "gender": ";M", "homepage": ";http://people.cs.aau.dk/~tdn/", "dblp": ";23/1643", "google_scholar": ";https://scholar.google.dk/citations?user=6fWF0CgAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Alessandro_Tibo1;~Thomas_Dyhre_Nielsen1", "aff": ";Aalborg University", "aff_domain": ";aau.dk", "position": ";Associate Professor", "bibtex": "@misc{\ntibo2023inducing,\ntitle={Inducing Gaussian Process Networks},\nauthor={Alessandro Tibo and Thomas Dyhre Nielsen},\nyear={2023},\nurl={https://openreview.net/forum?id=S0v71vsLBYhM}\n}", "github": "", "project": "", "reviewers": "MpyF;r62e;dTCT", "site": "https://openreview.net/forum?id=S0v71vsLBYhM", "pdf_size": 2703094, "recommendation": "5;5;5", "confidence": "4;3;3", "correctness": "3;4;3", "technical_novelty": "2;3;2", "empirical_novelty": "1;2;3", "wc_summary_paper": "71;169;42", "wc_strength_and_weaknesses": "391;307;128", "wc_clarity_quality_novelty_and_reproducibility": "207;160;25", "wc_summary_review": "161;73;16", "wc_review": "830;709;211", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 94.0, 54.33844556726542 ], "wc_strength_and_weaknesses_avg": [ 275.3333333333333, 109.67933057579972 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 130.66666666666666, 77.14200469840597 ], "wc_summary_review_avg": [ 83.33333333333333, 59.64524755213576 ], "wc_review_avg": [ 583.3333333333334, 267.8735190761159 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11628678703726161886&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "Aalborg University", "aff_unique_dep": "", "aff_unique_url": "https://www.aau.dk", "aff_unique_abbr": "AAU", "aff_country_unique_index": "0", "aff_country_unique": "Denmark" }, { "id": "S1Jgnb7mLfI", "title": "Neural Attention Memory", "track": "main", "status": "Reject", "tldr": "Neural attention memory is a differentiable NN memory architecture based on attention which is efficient and powerful.", "abstract": "Scaled dot-product attention has become the essence of state-of-the-art deep neural networks for various machine learning tasks. Though its ubiquitous accomplishments, it is inefficient for long sequence tasks and problematic for tasks requiring memory states such as compositional generalization. We propose a novel perspective of the attention mechanism by reinventing it as a memory architecture for neural networks, namely Neural Attention Memory (NAM). NAM follows the same query-key-value structure by constructing a memory matrix while reducing its computational complexity from quadratic to linear to the sequence length. NAM writes a memory matrix via the sum of outer products of value and unit key vectors, and reads it by multiplying the matrix with a unit query vector. Indeed, we show that our normalized outer-product attention mechanism is mathematically equivalent to the conventional attention mechanism. Then, we evaluate a NAM-based Transformer on long-range arena tasks and demonstrate its efficiency and efficacy. Finally, we propose two NAM-based memory-augmented neural networks, namely Long Short-Term Attention Memory (LSAM) and NAM Turing Machine (NAM-TM), and test their compositional generalization capability using four different tasks. LSAM replaces LSTM's long-term cell state with NAM memory matrix and NAM-TM implements a Turing tape data structure using NAM read/write primitives. The experimental results show that the proposed models outperform traditional Transformer and LSTM, as well as DNC. NAM opens up possibilities in diverse machine learning research problems, including hierarchical data modeling, efficient edge inference, and few-shot learning.", "keywords": "Neuro-symbolic AI;Transformer;Memory-augmented neural network;compositional generalization", "primary_area": "", "supplementary_material": "/attachment/1c9668f7fb6ff117f8fd8509ec011423d507cdf2.zip", "author": "Hyoungwook Nam;Seung Byum Seo", "authorids": "~Hyoungwook_Nam2;~Seung_Byum_Seo1", "gender": "M;", "homepage": ";", "dblp": "202/9965;", "google_scholar": "s2EWQzQAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Hyoungwook_Nam2;~Seung_Byum_Seo1", "aff": "University of Illinois, Urbana Champaign;", "aff_domain": "illinois.edu;", "position": "PhD student;", "bibtex": "@misc{\nnam2023neural,\ntitle={Neural Attention Memory},\nauthor={Hyoungwook Nam and Seung Byum Seo},\nyear={2023},\nurl={https://openreview.net/forum?id=S1Jgnb7mLfI}\n}", "github": "", "project": "", "reviewers": "ek6u;83W7;g5E5;Ln3M", "site": "https://openreview.net/forum?id=S1Jgnb7mLfI", "pdf_size": 297814, "recommendation": "3;3;6;6", "confidence": "3;5;3;4", "correctness": "3;3;3;4", "technical_novelty": "2;1;3;3", "empirical_novelty": "2;1;2;3", "wc_summary_paper": "89;55;153;107", "wc_strength_and_weaknesses": "92;193;360;124", "wc_clarity_quality_novelty_and_reproducibility": "50;53;136;42", "wc_summary_review": "47;30;44;48", "wc_review": "278;331;693;321", "wc_reply_reviewers": "49;0;0;0", "wc_reply_authors": "587;363;293;157", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 101.0, 35.35533905932738 ], "wc_strength_and_weaknesses_avg": [ 192.25, 103.49969806719244 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.25, 38.17312536327095 ], "wc_summary_review_avg": [ 42.25, 7.224091638399945 ], "wc_review_avg": [ 405.75, 167.0349888496419 ], "wc_reply_reviewers_avg": [ 12.25, 21.21762239271875 ], "wc_reply_authors_avg": [ 350.0, 155.59241626763176 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.3015113445777637, "corr_recommendation_correctness": 0.5773502691896258, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "S2N25rUM55l", "title": "IEDR: A Context-aware Intrinsic and Extrinsic Disentangled Recommender System", "track": "main", "status": "Reject", "tldr": "We propose a recommender system that capture intrinsic and extrinsic factors from various contexts to enhance the recommendation quality.", "abstract": "Intrinsic and extrinsic factors jointly affect users' decisions in item selection (e.g., click, purchase). Intrinsic factors reveal users' real interests and are invariant in different contexts (e.g., time, weather), whereas extrinsic factors can change w.r.t. different contexts. Analyzing these two factors is an essential yet challenging task in recommender systems. However, in existing studies, factor analysis is either largely neglected, or designed for a specific context (e.g., the time context in sequential recommendation), which limits the applicability of such models. In this paper, we propose a generic model, IEDR, to learn intrinsic and extrinsic factors from various contexts for recommendation. IEDR contains two key components: a contrastive learning component, and a disentangling component. The two components collaboratively enable our model to learn context-invariant intrinsic factors and context-based extrinsic factors from all available contexts. Experimental results on real-world datasets demonstrate the effectiveness of our model in factor learning and impart a significant improvement in recommendation accuracy over the state-of-the-art methods.", "keywords": "Recommender Systems;Intrinsic and Extrinsic Factors;Contrastive Learning;Disentangled Representation;Mutual Information", "primary_area": "", "supplementary_material": "", "author": "Yixin Su;Wei Jiang;Yunxiang Zhao;Fangquan Lin;Cheng Yang;Sarah Monazam Erfani;Junhao Gan", "authorids": "~Yixin_Su2;~Wei_Jiang11;~Yunxiang_Zhao2;~Fangquan_Lin1;~Cheng_Yang3;~Sarah_Monazam_Erfani1;~Junhao_Gan1", "gender": ";;M;M;M;;M", "homepage": ";;https://yunxiangz.github.io/;;;https://people.eng.unimelb.edu.au/smonazam/;https://sites.google.com/site/junhogan/", "dblp": ";;171/1065;279/6367.html;;136/0170;06/11411", "google_scholar": ";ub7MOPoAAAAJ;https://scholar.google.com.au/citations?user=1YhptHQAAAAJ;;5QdPzoAAAAAJ;https://scholar.google.com.au/citations?user=Jq9ocx4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-6065-811X;;;;0000-0001-9101-1503", "linkedin": ";;;;;;", "or_profile": "~Yixin_Su2;~Wei_Jiang11;~Yunxiang_Zhao2;~Fangquan_Lin1;~Cheng_Yang3;~Sarah_Monazam_Erfani1;~Junhao_Gan1", "aff": ";Alibaba Group;;Alibaba Group;Alibaba Group;The University of Melbourne;The University of Melbourne", "aff_domain": ";alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com;unimelb.edu.au;unimelb.edu.au", "position": ";Researcher;;Researcher;Researcher;Associate Professor;Lecturer", "bibtex": "@misc{\nsu2023iedr,\ntitle={{IEDR}: A Context-aware Intrinsic and Extrinsic Disentangled Recommender System},\nauthor={Yixin Su and Wei Jiang and Yunxiang Zhao and Fangquan Lin and Cheng Yang and Sarah Monazam Erfani and Junhao Gan},\nyear={2023},\nurl={https://openreview.net/forum?id=S2N25rUM55l}\n}", "github": "", "project": "", "reviewers": "ywxS;N1Ar;TrQ3;vJvU", "site": "https://openreview.net/forum?id=S2N25rUM55l", "pdf_size": 2072139, "recommendation": "6;6;6;6", "confidence": "4;4;3;3", "correctness": "3;3;3;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;3;3;3", "wc_summary_paper": "101;44;131;58", "wc_strength_and_weaknesses": "129;124;268;105", "wc_clarity_quality_novelty_and_reproducibility": "33;36;13;38", "wc_summary_review": "47;21;127;10", "wc_review": "310;225;539;211", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "749;385;684;775", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.0, 0.0 ], "wc_summary_paper_avg": [ 83.5, 34.54345089883175 ], "wc_strength_and_weaknesses_avg": [ 156.5, 64.99423051317709 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.0, 9.974968671630002 ], "wc_summary_review_avg": [ 51.25, 45.75136609982264 ], "wc_review_avg": [ 321.25, 131.301894502707 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 648.25, 155.55927326906615 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8OVWuCTEwygJ:scholar.google.com/&scioq=IEDR:+A+Context-aware+Intrinsic+and+Extrinsic+Disentangled+Recommender+System&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;1", "aff_unique_norm": "Alibaba Group;University of Melbourne", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://www.unimelb.edu.au", "aff_unique_abbr": "Alibaba;UniMelb", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1", "aff_country_unique": "China;Australia" }, { "title": "Provable Sim-to-real Transfer in Continuous Domain with Partial Observations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11887", "id": "S31oTB72m0G", "poster": "", "openreview": "https://openreview.net/forum?id=S31oTB72m0G", "slides": "https://iclr.cc/virtual/2023/poster/11887", "video": "https://iclr.cc/virtual/2023/poster/11887", "author_site": "Jiachen Hu, Han Zhong, Chi Jin, Liwei Wang", "tldr": "", "abstract": " Sim-to-real transfer, which trains RL agents in the simulated environments and then deploys them in the real world, has been widely used to overcome the limitations of gathering samples in the real world. Despite the empirical success of the sim-to-real transfer, its theoretical foundation is much less understood. In this paper, we study the sim-to-real transfer in continuous domain with partial observations, where the simulated environments and real-world environments are modeled by linear quadratic Gaussian (LQG) systems. We show that a popular robust adversarial training algorithm is capable of learning a policy from the simulated environment that is competitive to the optimal policy in the real-world environment. To achieve our results, we design a new algorithm for infinite-horizon average-cost LQGs and establish a regret bound that depends on the intrinsic complexity of the model class. Our algorithm crucially relies on a novel history clipping scheme, which might be of independent interest.", "keywords": "sim-to-real;RL theory;partial observations", "primary_area": "", "supplementary_material": "", "author": "Jiachen Hu;Han Zhong;Chi Jin;Liwei Wang", "authorids": "~Jiachen_Hu1;~Han_Zhong1;~Chi_Jin1;~Liwei_Wang1", "gender": "M;;M;M", "homepage": "https://nickhclos.github.io/;https://hanzhong-ml.github.io/;https://sites.google.com/view/cjin/home;http://www.liweiwang-pku.com/", "dblp": "239/5040;137/8096.html;126/1802-1;", "google_scholar": "5GavKiQAAAAJ;Bk5q_pAAAAAJ;GINhGvwAAAAJ;VZHxoh8AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Jiachen_Hu1;~Han_Zhong1;~Chi_Jin1;~Liwei_Wang1", "aff": "Peking University;Peking University;Princeton University;Peking University", "aff_domain": "pku.edu.cn;stu.pku.edu.cn;princeton.edu;pku.edu.cn", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhu2023provable,\ntitle={Provable Sim-to-real Transfer in Continuous Domain with Partial Observations},\nauthor={Jiachen Hu and Han Zhong and Chi Jin and Liwei Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=S31oTB72m0G}\n}", "github": "", "project": "", "reviewers": "rBWA;86xP;YiUA", "pdf_size": 473384, "recommendation": "6;8;8", "confidence": "2;3;2", "correctness": "3;4;3", "technical_novelty": "2;4;3", "empirical_novelty": "0;0;3", "wc_summary_paper": "41;54;99", "wc_strength_and_weaknesses": "301;209;201", "wc_clarity_quality_novelty_and_reproducibility": "14;96;56", "wc_summary_review": "31;35;18", "wc_review": "387;394;374", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1060;183;158", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 1.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 64.66666666666667, 24.850665092821068 ], "wc_strength_and_weaknesses_avg": [ 237.0, 45.37253207246281 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.333333333333336, 33.47967874530592 ], "wc_summary_review_avg": [ 28.0, 7.2571803523590805 ], "wc_review_avg": [ 385.0, 8.286535263104035 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 467.0, 419.43851357101994 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.49999999999999983, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10177240292352149423&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=S31oTB72m0G", "email": "pku.edu.cn;stu.pku.edu.cn;princeton.edu;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Peking University;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.princeton.edu", "aff_unique_abbr": "Peking U;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Distilling Cognitive Backdoor Patterns within an Image", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12155", "id": "S3D9NLzjnQ5", "poster": "/media/PosterPDFs/ICLR%202023/12155.png?t=1681781368.0955417", "openreview": "https://openreview.net/forum?id=S3D9NLzjnQ5", "slides": "https://iclr.cc/virtual/2023/poster/12155", "video": "https://iclr.cc/virtual/2023/poster/12155", "author_site": "Hanxun Huang, Xingjun Ma, Sarah Erfani, James Bailey", "tldr": "A novel method effectively and robustly detect backdoor samples in the dataset. ", "abstract": "This paper proposes a simple method to distill and detect backdoor patterns within an image: \\emph{Cognitive Distillation} (CD). The idea is to extract the ``minimal essence\" from an input image responsible for the model's prediction. CD optimizes an input mask to extract a small pattern from the input image that can lead to the same model output (i.e., logits or deep features). The extracted pattern can help understand the cognitive mechanism of a model on clean vs. backdoor images and is thus called a \\emph{Cognitive Pattern} (CP). Using CD and the distilled CPs, we uncover an interesting phenomenon of backdoor attacks: despite the various forms and sizes of trigger patterns used by different attacks, the CPs of backdoor samples are all surprisingly and suspiciously small. \nOne thus can leverage the learned mask to detect and remove backdoor examples from poisoned training datasets. \nWe conduct extensive experiments to show that CD can robustly detect a wide range of advanced backdoor attacks.\nWe also show that CD can potentially be applied to help detect potential biases from face datasets.\nCode is available at https://github.com/HanxunH/CognitiveDistillation.", "keywords": "Backdoor sample detection;Backdoor defence", "primary_area": "", "supplementary_material": "/attachment/e1d22853a0f02fdc458fddbc6807ca77c67f1e2b.zip", "author": "Hanxun Huang;Xingjun Ma;Sarah Monazam Erfani;James Bailey", "authorids": "~Hanxun_Huang1;~Xingjun_Ma1;~Sarah_Monazam_Erfani1;~James_Bailey1", "gender": "M;M;;", "homepage": ";http://xingjunma.com/;https://people.eng.unimelb.edu.au/smonazam/;", "dblp": "268/5655.html;195/8270;136/0170;", "google_scholar": "https://scholar.google.com.au/citations?user=8CxZe3IAAAAJ;https://scholar.google.com.au/citations?user=XQViiyYAAAAJ;https://scholar.google.com.au/citations?user=Jq9ocx4AAAAJ;", "orcid": "0000-0002-2793-6680;;;", "linkedin": ";xingjun-ma-173532129/;;", "or_profile": "~Hanxun_Huang1;~Xingjun_Ma1;~Sarah_Monazam_Erfani1;~James_Bailey1", "aff": "The University of Melbourne;Fudan University;The University of Melbourne;", "aff_domain": "unimelb.edu.au;fudan.edu.cn;unimelb.edu.au;", "position": "PhD student;Associate Professor;Associate Professor;", "bibtex": "@inproceedings{\nhuang2023distilling,\ntitle={Distilling Cognitive Backdoor Patterns within an Image},\nauthor={Hanxun Huang and Xingjun Ma and Sarah Monazam Erfani and James Bailey},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=S3D9NLzjnQ5}\n}", "github": "", "project": "", "reviewers": "czZL;o4Nd;QeF1;Xw5r", "pdf_size": 18562512, "recommendation": "3;6;6;8", "confidence": "4;3;4;3", "correctness": "1;3;3;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "0;3;3;3", "wc_summary_paper": "121;121;51;79", "wc_strength_and_weaknesses": "762;372;136;95", "wc_clarity_quality_novelty_and_reproducibility": "8;48;10;42", "wc_summary_review": "99;30;3;55", "wc_review": "990;571;200;271", "wc_reply_reviewers": "0;35;0;0", "wc_reply_authors": "2037;1357;931;269", "reply_reviewers": "0;1;0;0", "reply_authors": "7;3;4;2", "recommendation_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.75, 1.0897247358851685 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 93.0, 29.698484809834994 ], "wc_strength_and_weaknesses_avg": [ 341.25, 264.9258150879223 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.0, 18.138357147217054 ], "wc_summary_review_avg": [ 46.75, 35.329697139941636 ], "wc_review_avg": [ 508.0, 311.17760202173935 ], "wc_reply_reviewers_avg": [ 8.75, 15.155444566227676 ], "wc_reply_authors_avg": [ 1148.5, 642.9873637949661 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.0, 1.8708286933869707 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.7001400420140049, "corr_recommendation_correctness": 0.9958634776150149, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2602340458341975703&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=S3D9NLzjnQ5", "email": "unimelb.edu.au;fudan.edu.cn;unimelb.edu.au;", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Melbourne;Fudan University", "aff_unique_dep": ";", "aff_unique_url": "https://www.unimelb.edu.au;https://www.fudan.edu.cn", "aff_unique_abbr": "UniMelb;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Australia;China" }, { "id": "S4PGxCIbznF", "title": "Client-agnostic Learning and Zero-shot Adaptation for Federated Domain Generalization", "track": "main", "status": "Reject", "tldr": "Propose client-agnostic learning and zero-shot adaptation for federated domain generalization", "abstract": "Federated domain generalization (federated DG) aims to learn a client-agnostic global model from various distributed source domains and generalize the model to new clients in completely unseen domains. The main challenges of federated DG are the difficulty of building the global model with local client models from different domains while keeping data private and low generalizability to test clients, where data distribution deviates from those of training clients. To solve these challenges, we present two strategies: (1) client-agnostic learning with mixed instance-global statistics and (2) zero-shot adaptation with estimated statistics. In client-agnostic learning, we first augment local features by using data distribution of other clients via global statistics in the global model's batch normalization layers. This approach allows the generation of diverse domains by mixing local and global feature statistics while keeping data private. Local models then learn client-invariant representations by applying our client-agnostic objectives with the augmented data. Next, we propose a zero-shot adapter to help the learned global model to directly bridge a large domain gap between seen and unseen clients. At inference time, the adapter mixes instance statistics of a test input with global statistics that are vulnerable to distribution shift. With the aid of the adapter, the global model improves generalizability further by reflecting test distribution. We comprehensively evaluate our methods on several benchmarks in federated DG.", "keywords": "Federated learning;Domain generalization;Zero-shot adaptation", "primary_area": "", "supplementary_material": "", "author": "Seunghan Yang;Seokeon Choi;Hyunsin Park;Sungha Choi;Simyung Chang;Sungrack Yun", "authorids": "~Seunghan_Yang1;~Seokeon_Choi1;~Hyunsin_Park2;~Sungha_Choi1;~Simyung_Chang1;~Sungrack_Yun1", "gender": "M;M;M;M;M;M", "homepage": ";https://sites.google.com/site/seokeonchoi/;;https://www.sunghachoi.com/;;", "dblp": "250/9141;214/2200;50/9205;16/1923;206/6540;67/8053", "google_scholar": "g1-oNmAAAAAJ;https://scholar.google.co.kr/citations?user=wydV__gAAAAJ;mwtBKioAAAAJ;JMTnthsAAAAJ;https://scholar.google.co.kr/citations?user=0-tF1dwAAAAJ;", "orcid": ";0000-0002-1695-5894;0000-0003-3556-5792;0000-0003-2313-9243;;", "linkedin": ";seokeon/;hyunsin-park-598aa0221/;sungha-choi-1130185a/;;", "or_profile": "~Seunghan_Yang1;~Seokeon_Choi1;~Hyunsin_Park2;~Sungha_Choi1;~Simyung_Chang1;~Sungrack_Yun1", "aff": "Qualcomm AI Research;Qualcomm Inc, QualComm;Qualcomm Inc, QualComm;Qualcomm AI Research;QualComm AI Research;Qualcomm", "aff_domain": "qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;qualcomm.com;qualcomm.com", "position": "Researcher;Researcher;Staff Engineer;Researcher;Researcher;Researcher", "bibtex": "@misc{\nyang2023clientagnostic,\ntitle={Client-agnostic Learning and Zero-shot Adaptation for Federated Domain Generalization},\nauthor={Seunghan Yang and Seokeon Choi and Hyunsin Park and Sungha Choi and Simyung Chang and Sungrack Yun},\nyear={2023},\nurl={https://openreview.net/forum?id=S4PGxCIbznF}\n}", "github": "", "project": "", "reviewers": "Rg7n;RmwN;LXCP;p4rT", "site": "https://openreview.net/forum?id=S4PGxCIbznF", "pdf_size": 3448335, "recommendation": "3;5;6;6", "confidence": "5;3;4;4", "correctness": "2;3;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "59;84;102;55", "wc_strength_and_weaknesses": "372;609;167;168", "wc_clarity_quality_novelty_and_reproducibility": "36;31;57;20", "wc_summary_review": "30;26;43;63", "wc_review": "497;750;369;306", "wc_reply_reviewers": "239;177;147;0", "wc_reply_authors": "1986;3217;965;1213", "reply_reviewers": "1;1;2;0", "reply_authors": "4;6;4;2", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 75.0, 19.144189719076646 ], "wc_strength_and_weaknesses_avg": [ 329.0, 181.94367260226446 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.0, 13.435028842544403 ], "wc_summary_review_avg": [ 40.5, 14.430869689661812 ], "wc_review_avg": [ 480.5, 170.13597503173747 ], "wc_reply_reviewers_avg": [ 140.75, 87.77350112647895 ], "wc_reply_authors_avg": [ 1845.25, 876.9391013633729 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 4.0, 1.4142135623730951 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.8660254037844386, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12801323736907713166&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1;0;2;1", "aff_unique_norm": "Qualcomm;Qualcomm Incorporated;Qualcomm AI Research", "aff_unique_dep": "Qualcomm AI Research;;Qualcomm AI Research", "aff_unique_url": "https://www.qualcomm.com/research;https://www.qualcomm.com;https://www.qualcomm.com/research", "aff_unique_abbr": "QAI;Qualcomm;QAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "S5RYm-9Q4o", "title": "Fine-Grained Source Code Vulnerability Detection via Graph Neural Networks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "The number of exploitable vulnerabilities in software continues to increase, the speed of bug fixes and software updates have not increased accordingly. It is therefore crucial to analyze the source code and identify vulnerabilities in the early phase of software development. In this paper, a fine-grained source code vulnerability detection model based on Graph Neural Networks (GNNs) is proposed with the aim of locating vulnerabilities at the function level and line level. First of all, detailed information about the source code is extracted through multi-dimensional program feature encoding to facilitate learning about patterns of vulnerability. Second, extensive experiments are conducted on both a public hybrid dataset and our proposed dataset, which is collected entirely from real software projects. It is demonstrated that our proposed model outperforms the state-of-the-art methods and achieves significant improvements even when faced with more complex real-project source code. Finally, a novel location module is designed to identify potential key vulnerable lines of code. And the effectiveness of the model and its contributions to reducing human workload in practical production are evaluated.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Jingjing Wang;Minhuan Huang;Yuanping Nie;Xiaohui Kuang;Xiang Li;Wenjing Zhong", "authorids": "~Jingjing_Wang5;~Minhuan_Huang1;~Yuanping_Nie1;~Xiaohui_Kuang1;~Xiang_Li41;~Wenjing_Zhong1", "gender": "F;M;M;M;F;F", "homepage": "https://ieeexplore.ieee.org/author/37088904264;https://ieeexplore.ieee.org/author/37086385592;https://ieeexplore.ieee.org/author/37085683034;https://ieeexplore.ieee.org/author/37543548800;https://ieeexplore.ieee.org/author/38241044300;https://ieeexplore.ieee.org/author/37088458653", "dblp": ";;150/3642;;;", "google_scholar": ";;;;;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Jingjing_Wang5;~Minhuan_Huang1;~Yuanping_Nie1;~Xiaohui_Kuang1;~Xiang_Li41;~Wenjing_Zhong1", "aff": "Academy of Military Sciences;;;;;Xi'an University of Electronic Science and Technology", "aff_domain": "nudt.edu.cn;;;;;xidian.edu.cn", "position": "PhD student;;;;;PhD student", "bibtex": "@misc{\nwang2023finegrained,\ntitle={Fine-Grained Source Code Vulnerability Detection via Graph Neural Networks},\nauthor={Jingjing Wang and Minhuan Huang and Yuanping Nie and Xiaohui Kuang and Xiang Li and Wenjing Zhong},\nyear={2023},\nurl={https://openreview.net/forum?id=S5RYm-9Q4o}\n}", "github": "", "project": "", "reviewers": "28eG;98pL;95AA;QDVs", "site": "https://openreview.net/forum?id=S5RYm-9Q4o", "pdf_size": 632591, "recommendation": "1;3;3;5", "confidence": "5;4;5;5", "correctness": "3;3;2;3", "technical_novelty": "1;2;3;3", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "77;89;27;73", "wc_strength_and_weaknesses": "154;213;205;42", "wc_clarity_quality_novelty_and_reproducibility": "37;263;33;28", "wc_summary_review": "36;28;28;625", "wc_review": "304;593;293;768", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 66.5, 23.553131426627754 ], "wc_strength_and_weaknesses_avg": [ 153.5, 68.2367203197809 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 90.25, 99.78821323182413 ], "wc_summary_review_avg": [ 179.25, 257.374605390664 ], "wc_review_avg": [ 489.5, 200.80898884263124 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DWYWPFfB7b0J:scholar.google.com/&scioq=Fine-Grained+Source+Code+Vulnerability+Detection+via+Graph+Neural+Networks&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Academy of Military Sciences;Xi'an University of Electronic Science and Technology", "aff_unique_dep": ";", "aff_unique_url": ";http://www.xidian.edu.cn/", "aff_unique_abbr": ";Xidian University", "aff_campus_unique_index": "1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Learning to Compose Soft Prompts for Compositional Zero-Shot Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12162", "id": "S8-A2FXnIh", "poster": "", "openreview": "https://openreview.net/forum?id=S8-A2FXnIh", "slides": "https://iclr.cc/virtual/2023/poster/12162", "video": "https://iclr.cc/virtual/2023/poster/12162", "author_site": "Nihal Nayak, Peilin Yu, Stephen Bach", "tldr": "We introduce compositional soft prompting (CSP), a parameter-efficient learning technique to improve the zero-shot compositionality of large-scale pretrained vision-language models (VLMs).", "abstract": "We introduce compositional soft prompting (CSP), a parameter-efficient learning technique to improve the zero-shot compositionality of large-scale pretrained vision-language models (VLMs) like CLIP. We develop CSP for compositional zero-shot learning, the task of predicting unseen attribute-object compositions (e.g., old cat and young tiger). VLMs have a flexible text encoder that can represent arbitrary classes as natural language prompts but they often underperform task-specific architectures on the compositional zero-shot benchmark datasets. CSP treats the attributes and objects that define classes as learnable tokens of vocabulary. During training, the vocabulary is tuned to recognize classes that compose tokens in multiple ways (e.g., old cat and white cat). At test time, we recompose the learned attribute-object vocabulary in new combinations to recognize novel classes. We show that CSP outperforms the CLIP on benchmark datasets by an average of 10.9 percentage points on AUC. CSP also outperforms CoOp, a soft prompting method that fine-tunes the prefix context tokens, by an average of 5.8 percentage points on AUC. We perform additional experiments to show that CSP improves generalization to higher-order attribute-attribute-object compositions (e.g., old white cat) and combinations of pretrained attributes and fine-tuned objects. The code is available at https://github.com/BatsResearch/csp.", "keywords": "compositional zero-shot learning;prompts;foundation models", "primary_area": "", "supplementary_material": "/attachment/cd3c8d56a68e5457210258f0b01b08216f5c21b6.zip", "author": "Nihal V. Nayak;Peilin Yu;Stephen Bach", "authorids": "~Nihal_V._Nayak1;~Peilin_Yu1;~Stephen_Bach1", "gender": ";M;M", "homepage": "https://nihalnayak.github.io/;https://yupeilin.com;http://stephenbach.net", "dblp": "203/9278;230/3699;90/1077", "google_scholar": "Bx497RMAAAAJ;P6waTgMAAAAJ;hs6pGXoAAAAJ", "orcid": "0000-0002-3150-1997;;0000-0003-3857-3560", "linkedin": ";;", "or_profile": "~Nihal_V._Nayak1;~Peilin_Yu1;~Stephen_Bach1", "aff": "Brown University;Brown University;Snorkel AI", "aff_domain": "brown.edu;brown.edu;snorkel.ai", "position": "PhD student;PhD student;Researcher", "bibtex": "@inproceedings{\nnayak2023learning,\ntitle={Learning to Compose Soft Prompts for Compositional Zero-Shot Learning},\nauthor={Nihal V. Nayak and Peilin Yu and Stephen Bach},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=S8-A2FXnIh}\n}", "github": "", "project": "", "reviewers": "1rcT;43mh;m1LH;wi3v", "pdf_size": 1594979, "recommendation": "5;6;8;8", "confidence": "4;4;4;4", "correctness": "3;3;4;4", "technical_novelty": "2;3;3;2", "empirical_novelty": "3;3;3;2", "wc_summary_paper": "71;46;80;54", "wc_strength_and_weaknesses": "217;332;266;305", "wc_clarity_quality_novelty_and_reproducibility": "71;8;134;59", "wc_summary_review": "58;33;270;222", "wc_review": "417;419;750;640", "wc_reply_reviewers": "0;64;14;0", "wc_reply_authors": "630;530;304;909", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;2", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 62.75, 13.442005058770064 ], "wc_strength_and_weaknesses_avg": [ 280.0, 43.2839462156583 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.0, 44.84974916317816 ], "wc_summary_review_avg": [ 145.75, 102.05972516129955 ], "wc_review_avg": [ 556.5, 143.8584373611781 ], "wc_reply_reviewers_avg": [ 19.5, 26.320144376503713 ], "wc_reply_authors_avg": [ 593.25, 217.20655491950512 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9622504486493761, "gs_citation": 93, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6377859616967750029&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=S8-A2FXnIh", "email": "brown.edu;brown.edu;snorkel.ai", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Brown University;Snorkel AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.brown.edu;https://www.snorkelai.com", "aff_unique_abbr": "Brown;Snorkel AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "S80I3NwbbpS", "title": "CAB: Comprehensive Attention Benchmarking on Long Sequence Modeling", "track": "main", "status": "Reject", "tldr": "We propose Comprehensive Attention Benchmark (CAB) with seven real-world tasks from different research areas to evaluate efficient attentions under four fine-grained attention patterns.", "abstract": "Transformer has achieved remarkable success in language, image, and speech processing. Recently, various efficient attention architectures have been proposed to improve transformer's efficiency while largely preserving its efficacy, especially in modeling long sequences. A widely-used benchmark to test these efficient methods' capability on long-range modeling is Long Range Arena (LRA). However, LRA only focuses on the standard bidirectional (or noncausal) self attention, and completely ignores cross attentions and unidirectional (or causal) attentions, which are equally important to downstream applications. Although designing cross and causal variants of an attention method is straightforward for vanilla attention, it is often challenging for efficient attentions with subquadratic time and memory complexity. In this paper, we propose Comprehensive Attention Benchmark (CAB) under a fine-grained attention taxonomy with four distinguishable attention patterns, namely, noncausal self, causal self, noncausal cross, and causal cross attentions. CAB collects seven real-world tasks from different research areas to evaluate efficient attentions under the four attention patterns. Among these tasks, CAB validates efficient attentions in eight backbone networks to show their generalization across neural architectures. We conduct exhaustive experiments to benchmark the performances of nine widely-used efficient attention architectures designed with different philosophies on CAB. Extensive experimental results also shed light on the fundamental problems of efficient attentions, such as efficiency length against vanilla attention, performance consistency across attention patterns, the benefit of attention mechanisms, and interpolation/extrapolation on long-context language modeling.", "keywords": "Long Sequence Modeling;Benchmark;Efficient Attention", "primary_area": "", "supplementary_material": "/attachment/5e1bf5477e23a9de2fa88e85fb3daa4f87c0d387.zip", "author": "Jun Zhang;Shuyang Jiang;Jiangtao Feng;Lin Zheng;Lingpeng Kong", "authorids": "~Jun_Zhang27;~Shuyang_Jiang2;~Jiangtao_Feng1;~Lin_Zheng1;~Lingpeng_Kong1", "gender": "M;M;M;M;M", "homepage": ";;https://jiangtaofeng.github.io/;https://lzhengisme.github.io/;https://ikekonglp.github.io/", "dblp": ";153/1949;183/0908;;144/7656", "google_scholar": ";slwTiOUAAAAJ;7ufSFeIAAAAJ;3NXH0t8AAAAJ;f1hBi5wAAAAJ", "orcid": "0000-0002-3152-5091;;;;", "linkedin": ";%E4%B9%A6%E6%B4%8B-%E6%B1%9F-b8288223a/;;;", "or_profile": "~Jun_Zhang27;~Shuyang_Jiang2;~Jiangtao_Feng1;~Lin_Zheng1;~Lingpeng_Kong1", "aff": "Shanghai AI Lab;Shanghai Jiaotong University;Shanghai AI Lab;The University of Hong Kong;Department of Computer Science, The University of Hong Kong", "aff_domain": "pjlab.org.cn;cs.sjtu.edu.cn;pjlab.org.cn;hku.hk;cs.hku.hk", "position": "Researcher;Undergrad student;Researcher;PhD student;Assistant Professor", "bibtex": "@misc{\nzhang2023cab,\ntitle={{CAB}: Comprehensive Attention Benchmarking on Long Sequence Modeling},\nauthor={Jun Zhang and Shuyang Jiang and Jiangtao Feng and Lin Zheng and Lingpeng Kong},\nyear={2023},\nurl={https://openreview.net/forum?id=S80I3NwbbpS}\n}", "github": "", "project": "", "reviewers": "MeQL;VyuY;kTjS;auuz", "site": "https://openreview.net/forum?id=S80I3NwbbpS", "pdf_size": 29193277, "recommendation": "5;6;6;8", "confidence": "4;4;3;4", "correctness": "3;3;3;3", "technical_novelty": "2;2;3;2", "empirical_novelty": "2;2;3;4", "wc_summary_paper": "31;28;68;125", "wc_strength_and_weaknesses": "68;51;96;312", "wc_clarity_quality_novelty_and_reproducibility": "28;200;12;65", "wc_summary_review": "25;59;44;73", "wc_review": "152;338;220;575", "wc_reply_reviewers": "0;57;0;0", "wc_reply_authors": "674;515;407;582", "reply_reviewers": "0;1;0;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 63.0, 39.10882253405234 ], "wc_strength_and_weaknesses_avg": [ 131.75, 105.30046296194523 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 76.25, 73.98775236483401 ], "wc_summary_review_avg": [ 50.25, 17.82379028153103 ], "wc_review_avg": [ 321.25, 160.9089416409169 ], "wc_reply_reviewers_avg": [ 14.25, 24.681724007856502 ], "wc_reply_authors_avg": [ 544.5, 97.40764857032532 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.13245323570650439, "corr_recommendation_correctness": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3389278703858060842&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;0;2;2", "aff_unique_norm": "Shanghai AI Lab;Shanghai Jiao Tong University;University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.shanghaiailab.com;https://www.sjtu.edu.cn;https://www.hku.hk", "aff_unique_abbr": "SAIL;SJTU;HKU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "S80ioOGLpD9", "title": "Joint-Predictive Representations for Multi-Agent Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "The recent advances in reinforcement learning have demonstrated the effectiveness of vision-based self-supervised learning (SSL). However, the main efforts on this direction have been paid on single-agent setting, making multi-agent reinforcement learning~(MARL) lags thus far. There are two significant obstacles that prevent applying off-the-shelf SSL approaches with MARL on a partially observable multi-agent system : (a) each agent only gets a partial observation, and (b) previous SSL approaches only take consistent temporal representations into account, while ignoring the characterization that captures the interaction and fusion among agents. In this paper, we propose \\textbf{M}ulti-\\textbf{A}gent \\textbf{Jo}int-Predictive \\textbf{R}epresentations~(MAJOR), a novel framework to explore self-supervised learning on cooperative MARL. Specifically, we treat the latent representations of local observations of all agents as the sequence of masked contexts of the global state, and we then learn effective representations by predicting the future latent representations for each agent with the help of the agent-level information interactions in a joint transition model. We have conducted extensive experiments on wide-range MARL environments, including both vision-based and state-based scenarios, and show that our proposed MAJOR achieves superior asymptotic performance and sample efficiency against other state-of-the-art methods.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Mingxiao Feng;Wengang Zhou;Yaodong Yang;Houqiang Li", "authorids": "~Mingxiao_Feng1;~Wengang_Zhou1;~Yaodong_Yang1;~Houqiang_Li1", "gender": ";M;M;M", "homepage": "https://fmxfranky.github.io/;http://staff.ustc.edu.cn/~zhwg/index.html;https://www.yangyaodong.com;https://staff.ustc.edu.cn/~lihq/", "dblp": ";22/4544-1;170/1496-1;59/7017.html", "google_scholar": ";8s1JF8YAAAAJ;https://scholar.google.co.uk/citations?user=6yL0xw8AAAAJ;7sFMIKoAAAAJ", "orcid": ";0000-0003-1690-9836;0000-0001-8132-5613;0000-0003-2188-3028", "linkedin": ";;yaodong-yang;", "or_profile": "~Mingxiao_Feng1;~Wengang_Zhou1;~Yaodong_Yang1;~Houqiang_Li1", "aff": "University of Science and Technology of China;University of Science and Technology of China;Peking University;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;pku.edu.cn;ustc.edu.cn", "position": "PhD student;Full Professor;Assistant Professor;Professor", "bibtex": "@misc{\nfeng2023jointpredictive,\ntitle={Joint-Predictive Representations for Multi-Agent Reinforcement Learning},\nauthor={Mingxiao Feng and Wengang Zhou and Yaodong Yang and Houqiang Li},\nyear={2023},\nurl={https://openreview.net/forum?id=S80ioOGLpD9}\n}", "github": "", "project": "", "reviewers": "patt;hLyZ;wdZd;yv1U", "site": "https://openreview.net/forum?id=S80ioOGLpD9", "pdf_size": 5982827, "recommendation": "5;6;6;6", "confidence": "4;3;3;3", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;3;3;2", "wc_summary_paper": "71;37;167;80", "wc_strength_and_weaknesses": "180;158;484;35", "wc_clarity_quality_novelty_and_reproducibility": "47;75;52;32", "wc_summary_review": "56;57;76;27", "wc_review": "354;327;779;174", "wc_reply_reviewers": "289;137;23;16", "wc_reply_authors": "2176;553;845;442", "reply_reviewers": "1;1;1;1", "reply_authors": "4;1;2;1", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 88.75, 47.93941489004637 ], "wc_strength_and_weaknesses_avg": [ 214.25, 165.25189106330978 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.5, 15.435349040433131 ], "wc_summary_review_avg": [ 54.0, 17.507141400011598 ], "wc_review_avg": [ 408.5, 224.65139661261847 ], "wc_reply_reviewers_avg": [ 116.25, 110.700892046993 ], "wc_reply_authors_avg": [ 1004.0, 692.479241566128 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7230774656310257566&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Science and Technology of China;Peking University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "USTC;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "S9GpoS2TmN", "title": "Abstract-to-Executable Trajectory Translation for One-Shot Task Generalization", "track": "main", "status": "Reject", "tldr": "We tackle the problem of one-shot generalization for long-horizon tasks by learning a model to translate an abstract trajectory to an executable trajectory.", "abstract": "Training long-horizon robotic policies in complex physical environments is essential for many applications, such as robotic manipulation. However, learning a policy that can generalize to unseen tasks is challenging. In this work, we propose to achieve one-shot task generalization by decoupling plan generation and plan execution. Specifically, our method solves complex long-horizon tasks in three steps: build a paired abstract environment by simplifying geometry and physics, generate abstract trajectories, and solve the original task by an abstract-to-executable trajectory translator. In the abstract environment, complex dynamics such as physical manipulation are removed, making abstract trajectories easier to generate. However, this introduces a large domain gap between abstract trajectories and the actual executed trajectories as abstract trajectories lack low-level details and aren\u2019t aligned frame-to-frame with the executed trajectory. In a manner reminiscent of language translation, our approach leverages a seq-to-seq model to overcome the large domain gap between the abstract and executable trajectories, enabling the low-level policy to follow the abstract trajectory. Experimental results on various unseen long-horizon tasks with different robot embodiments demonstrate the practicability of our methods to achieve one-shot task generalization. Videos and more details can be found in the supplementary materials and project page: https://sites.google.com/view/abstract-to-executable-iclr23/", "keywords": "Trajectory Translation;One-Shot Generalization;Long-Horizon Task;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/8cfebac0fb380cd140b04cb5b71a9de442583170.zip", "author": "Stone Tao;Xiaochen Li;Tongzhou Mu;Zhiao Huang;Yuzhe Qin;Hao Su", "authorids": "~Stone_Tao1;~Xiaochen_Li1;~Tongzhou_Mu1;~Zhiao_Huang1;~Yuzhe_Qin1;~Hao_Su1", "gender": "M;M;M;M;M;M", "homepage": "https://www.stoneztao.com;https://sites.google.com/view/xiaochen-li/about;http://cseweb.ucsd.edu/~t3mu/;;https://yzqin.github.io/;http://ai.ucsd.edu/~haosu", "dblp": ";;183/0943;172/1410;241/9337;09/4945-1", "google_scholar": "GAMO0EwAAAAJ;YG7bk6IAAAAJ;uVsZydYAAAAJ;;3KF3AIMAAAAJ;1P8Zu04AAAAJ", "orcid": ";0000-0003-2653-5786;;;0000-0002-9321-9305;", "linkedin": ";;;;;", "or_profile": "~Stone_Tao1;~Xiaochen_Li1;~Tongzhou_Mu1;~Zhiao_Huang1;~Yuzhe_Qin1;~Hao_Su1", "aff": "University of California, San Diego;Brown University;University of California, San Diego;University of California, San Diego, University of California, San Diego;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;brown.edu;ucsd.edu;eng.ucsd.edu;ucsd.edu;ucsd.edu", "position": "Undergrad student;MS student;PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\ntao2023abstracttoexecutable,\ntitle={Abstract-to-Executable Trajectory Translation for One-Shot Task Generalization},\nauthor={Stone Tao and Xiaochen Li and Tongzhou Mu and Zhiao Huang and Yuzhe Qin and Hao Su},\nyear={2023},\nurl={https://openreview.net/forum?id=S9GpoS2TmN}\n}", "github": "", "project": "", "reviewers": "Nwsy;KkrM;iXws;QFp8", "site": "https://openreview.net/forum?id=S9GpoS2TmN", "pdf_size": 9125305, "recommendation": "5;5;6;6", "confidence": "5;4;4;3", "correctness": "2;2;3;3", "technical_novelty": "3;2;3;2", "empirical_novelty": "0;3;3;3", "wc_summary_paper": "58;98;95;90", "wc_strength_and_weaknesses": "299;411;129;241", "wc_clarity_quality_novelty_and_reproducibility": "35;422;144;89", "wc_summary_review": "54;442;77;52", "wc_review": "446;1373;445;472", "wc_reply_reviewers": "264;125;355;133", "wc_reply_authors": "2100;1354;1569;811", "reply_reviewers": "1;1;2;1", "reply_authors": "4;4;2;3", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 85.25, 15.990231392947383 ], "wc_strength_and_weaknesses_avg": [ 270.0, 101.78899744078434 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 172.5, 149.1148885926553 ], "wc_summary_review_avg": [ 156.25, 165.27004417013993 ], "wc_review_avg": [ 684.0, 397.941578626813 ], "wc_reply_reviewers_avg": [ 219.25, 95.85503377496667 ], "wc_reply_authors_avg": [ 1458.5, 462.035983447177 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.7071067811865475, "corr_recommendation_correctness": 1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9891829457176109323&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "University of California, San Diego;Brown University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsd.edu;https://www.brown.edu", "aff_unique_abbr": "UCSD;Brown", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "SCk8vEhwKo", "title": "ATTRIBUTES RECONSTRUCTION IN HETEROGENEOUS NETWORKS VIA GRAPH AUGMENTATION", "track": "main", "status": "Reject", "tldr": "", "abstract": "Heterogeneous Graph Neural Networks(HGNNs), as an effective tool for mining heterogeneous graphs, have achieved remarkable performance on node classification tasks. Yet, HGNNs are limited in their mining power as they require all nodes to have complete and reliable attributes. It is usually unrealistic since the attributes of many nodes in reality are inevitably missing or defective. Existing methods usually take imputation schemes to complete missing attributes, in which topology information is ignored, leading to suboptimal performance. And some graph augmentation techniques have improved the quality of attributes, while few of them are designed for heterogeneous graphs. In this work, we study the data augmentation on heterogeneous graphs, tackling the missing and defective attributes simultaneously, and propose a novel generic architecture\u2014Attributes Reconstruction in Heterogeneous networks via Graph Augmentation(ARHGA), including random sampling, attribute augmentation and consistency training. In graph augmentation, to ensure attributes plausible and accurate, the attention mechanism is adopted to reconstruct attributes under the guidance of the topological relationship between nodes. Our proposed architecture can be easily combined with any GNN-based heterogeneous model, and improves the performance. Extensive experiments on three benchmark datasets demonstrate the superior performance of ARHGA over strate-of-the-art baselines on semi-supervised node classification.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/80c1c4a3d57c63da57e11f158243ca700e6882fe.zip", "author": "yixuan Liang;yuan wan", "authorids": "~yixuan_Liang1;wanyuan@whut.edu.cn", "gender": "F;", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": "0000-0003-1575-8554;", "linkedin": ";", "or_profile": "~yixuan_Liang1;wanyuan@whut.edu.cn", "aff": "\u6b66\u6c49\u7406\u5de5\u5927\u5b66;", "aff_domain": "ssci.whut.edu.cn;", "position": "MS student;", "bibtex": "@misc{\nliang2023attributes,\ntitle={{ATTRIBUTES} {RECONSTRUCTION} {IN} {HETEROGENEOUS} {NETWORKS} {VIA} {GRAPH} {AUGMENTATION}},\nauthor={yixuan Liang and yuan wan},\nyear={2023},\nurl={https://openreview.net/forum?id=SCk8vEhwKo}\n}", "github": "", "project": "", "reviewers": "Qaru;FtXi;Aba8", "site": "https://openreview.net/forum?id=SCk8vEhwKo", "pdf_size": 1062357, "recommendation": "1;3;5", "confidence": "3;4;3", "correctness": "3;3;3", "technical_novelty": "1;2;2", "empirical_novelty": "0;2;3", "wc_summary_paper": "54;42;35", "wc_strength_and_weaknesses": "228;98;228", "wc_clarity_quality_novelty_and_reproducibility": "15;42;28", "wc_summary_review": "12;117;30", "wc_review": "309;299;321", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 1.632993161855452 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 43.666666666666664, 7.845734863959881 ], "wc_strength_and_weaknesses_avg": [ 184.66666666666666, 61.28258770283412 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.333333333333332, 11.025223605694151 ], "wc_summary_review_avg": [ 53.0, 45.8475735453906 ], "wc_review_avg": [ 309.6666666666667, 8.993825042154693 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0", "aff_unique_norm": "Wuhan University of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.whut.edu.cn", "aff_unique_abbr": "WUT", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "SDHSQuBpf2", "title": "Laziness, Barren Plateau, and Noises in Machine Learning", "track": "main", "status": "Desk Reject", "tldr": "Variational quantum algorithms are lazy and noise-resilient in the overparametrization regime.", "abstract": "We define \\emph{laziness} to describe a large suppression of variational parameter updates for neural networks, classical or quantum. In the quantum case, the suppression is exponential in the number of qubits for randomized variational quantum circuits. We discuss the difference between laziness and \\emph{barren plateau} in quantum machine learning created by quantum physicists in \\cite{mcclean2018barren} for the flatness of the loss function landscape during gradient descent. We address a novel theoretical understanding of those two phenomena in light of the theory of neural tangent kernels. For noiseless quantum circuits, without the measurement noise, the loss function landscape is complicated in the overparametrized regime with a large number of trainable variational angles. Instead, around a random starting point in optimization, there are large numbers of local minima that are good enough and could minimize the mean square loss function, where we still have quantum laziness, but we do not have barren plateaus. However, the complicated landscape is not visible within a limited number of iterations, and low precision in quantum control and quantum sensing. Moreover, we look at the effect of noises during optimization by assuming intuitive noise models, and show that variational quantum algorithms are noise-resilient in the overparametrization regime. Our work precisely reformulates the quantum barren plateau statement towards a precision statement and justifies the statement in certain noise models, injects new hope toward near-term variational quantum algorithms, and provides theoretical connections toward classical machine learning. Our paper provides conceptual perspectives about quantum barren plateaus, together with discussions about the gradient descent dynamics.", "keywords": "theoretical issues in deep learning;learning representations of outputs or states", "primary_area": "", "supplementary_material": "", "author": "Zexi Lin;Liang Jiang", "authorids": "zexil@uchicago.edu;liang.jiang@uchicago.edu", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": ";", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@misc{\nlin2023laziness,\ntitle={Laziness, Barren Plateau, and Noises in Machine Learning},\nauthor={Zexi Lin and Liang Jiang},\nyear={2023},\nurl={https://openreview.net/forum?id=SDHSQuBpf2}\n}", "github": "", "project": "", "reviewers": "c2ao;gg18;DqJm;NXo6", "site": "https://openreview.net/forum?id=SDHSQuBpf2", "pdf_size": 2464049, "recommendation": "3;5;6;6", "confidence": "2;3;1;3", "correctness": "2;4;3;4", "technical_novelty": "3;2;3;3", "empirical_novelty": "0;2;3;2", "wc_summary_paper": "94;65;66;41", "wc_strength_and_weaknesses": "296;252;73;89", "wc_clarity_quality_novelty_and_reproducibility": "57;137;39;38", "wc_summary_review": "78;20;74;25", "wc_review": "525;474;252;193", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "631;733;290;115", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 1.0897247358851685 ], "wc_summary_paper_avg": [ 66.5, 18.76832437912346 ], "wc_strength_and_weaknesses_avg": [ 177.5, 97.90939689325025 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 67.75, 40.69014008331748 ], "wc_summary_review_avg": [ 49.25, 26.845623479442605 ], "wc_review_avg": [ 361.0, 141.21791671030982 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 442.25, 250.21728057830057 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.7385489458759963, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6131853032616936571&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8 }, { "title": "Iterative Circuit Repair Against Formal Specifications", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10712", "id": "SEcSahl0Ql", "poster": "/media/PosterPDFs/ICLR%202023/10712.png?t=1682108813.7213588", "openreview": "https://openreview.net/forum?id=SEcSahl0Ql", "slides": "https://iclr.cc/virtual/2023/poster/10712", "video": "https://iclr.cc/virtual/2023/poster/10712", "author_site": "Matthias Cosler, Frederik Schmitt, Christopher Hahn, Bernd Finkbeiner", "tldr": "We present a deep learning approach for repairing sequential circuits against formal specifications given in linear-time temporal logic (LTL).", "abstract": "We present a deep learning approach for repairing sequential circuits against formal specifications given in linear-time temporal logic (LTL). Given a defective circuit and its formal specification, we train Transformer models to output circuits that satisfy the corresponding specification. We propose a separated hierarchical Transformer for multimodal representation learning of the formal specification and the circuit. We introduce a data generation algorithm that enables generalization to more complex specifications and out-of-distribution datasets. In addition, our proposed repair mechanism significantly improves the automated synthesis of circuits from LTL specifications with Transformers. It improves the state-of-the-art by $6.8$ percentage points on held-out instances and $11.8$ percentage points on an out-of-distribution dataset from the annual reactive synthesis competition.", "keywords": "sequential circuits;repair;synthesis;transformer", "primary_area": "", "supplementary_material": "/attachment/c64eb81ab36cf9f7bd4a52dc80501f02fc6d7b80.zip", "author": "Matthias Cosler;Frederik Schmitt;Christopher Hahn;Bernd Finkbeiner", "authorids": "~Matthias_Cosler1;~Frederik_Schmitt1;~Christopher_Hahn1;~Bernd_Finkbeiner1", "gender": "M;M;M;M", "homepage": "https://cispa.de/de/people/c01maco;https://www.react.uni-saarland.de/people/schmitt.html;https://www.christopherhahn.io;https://www.react.uni-saarland.de/people/finkbeiner.html", "dblp": "341/6077;245/0350;91/9661;https://dblp.uni-trier.de/pid/73/4443.html", "google_scholar": "https://scholar.google.de/citations?user=_4VZ1e4AAAAJ;;bADdSwYAAAAJ;https://scholar.google.de/citations?hl=de", "orcid": ";;;0000-0002-4280-8441", "linkedin": ";frederik-schmitt-282814172/;;", "or_profile": "~Matthias_Cosler1;~Frederik_Schmitt1;~Christopher_Hahn1;~Bernd_Finkbeiner1", "aff": "CISPA Helmholtz Center for Information Security;CISPA Helmholtz Center for Information Security;Stanford University;Saarland University", "aff_domain": "cispa.de;cispa.saarland;stanford.edu;uni-saarland.de", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ncosler2023iterative,\ntitle={Iterative Circuit Repair Against Formal Specifications},\nauthor={Matthias Cosler and Frederik Schmitt and Christopher Hahn and Bernd Finkbeiner},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=SEcSahl0Ql}\n}", "github": "", "project": "", "reviewers": "ocom;Y3T4;NdwS;LxSj", "pdf_size": 571143, "recommendation": "5;5;6;6", "confidence": "2;3;3;3", "correctness": "4;3;4;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "58;75;115;52", "wc_strength_and_weaknesses": "80;226;244;143", "wc_clarity_quality_novelty_and_reproducibility": "81;180;41;26", "wc_summary_review": "21;27;58;15", "wc_review": "240;508;458;236", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "327;826;651;293", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;2;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 75.0, 24.586581706288495 ], "wc_strength_and_weaknesses_avg": [ 173.25, 65.95216069242917 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 82.0, 60.045815840906016 ], "wc_summary_review_avg": [ 30.25, 16.57369904396722 ], "wc_review_avg": [ 360.5, 123.7770172528002 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 524.25, 223.32865355793464 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14875449172022356103&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "pdf": "https://openreview.net/pdf?id=SEcSahl0Ql", "email": "cispa.de;cispa.saarland;stanford.edu;uni-saarland.de", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "CISPA Helmholtz Center for Information Security;Stanford University;Saarland University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cispa.de/;https://www.stanford.edu;https://www.uni-saarland.de", "aff_unique_abbr": "CISPA;Stanford;UdS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Germany;United States" }, { "id": "SEfxlDwL7fR", "title": "Temporally-Weighted Spike Encoding for Event-based Object Detection and Classification", "track": "main", "status": "Reject", "tldr": "Performing spiking neural network-based classification and object detection using a new spike encoding method for event-based vision sensors.", "abstract": "Event-based cameras exhibit high dynamic range and temporal precision that could make them ideal for detecting objects with high speeds and low relative luminance. These properties have made event-based cameras especially interesting for use in space domain awareness tasks, such as detecting dim, artificial satellites with high brightness backgrounds using ground-based optical sensors; however, the asynchronous nature of event-based data presents new challenges to performing objection detection. While spiking neural networks (SNNs) have been shown to naturally complement the asynchronous and binary properties of event-based data, they also present a number of challenges in their training, such as the spike vanishing problem and the large number of timesteps required for maximizing classification and detection accuracy. Furthermore, the extremely high sampling rate of event-based sensors and the density of noisy space-based data collections can results in excessively large event streams within a short window of recording. We present a temporally-weighted spike encoding that greatly reduces the number of spikes derived from an event-based data stream, enabling the training of larger SNNs with fewer timesteps for maximal accuracy. We propose using this spike encoding with a variant of convolutional SNN trained utilizing surrogate spiking neuron gradients with backpropagation-through-time (BPTT) for both classification and object detection tasks with an emphasis on space-domain awareness. To demonstrate the efficacy of our encoding and SNN approach, we present competitive classification accuracies on benchmark datasets N-MNIST (99.7%), DVS-CIFAR10 (74.0%), and N-Caltech101 (72.8%), as well as state-of-the-art object detection performance on event-based, satellite collections. ", "keywords": "Event-based vision;spiking neural networks;object detection;classification", "primary_area": "", "supplementary_material": "", "author": "Nikolaus Salvatore;Justin Fletcher", "authorids": "~Nikolaus_Salvatore1;~Justin_Fletcher1", "gender": "M;M", "homepage": "https://dl.acm.org/profile/99659543055;", "dblp": ";", "google_scholar": ";YSEl3usAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Nikolaus_Salvatore1;~Justin_Fletcher1", "aff": "KBR;", "aff_domain": "us.kbr.com;", "position": "Researcher;", "bibtex": "@misc{\nsalvatore2023temporallyweighted,\ntitle={Temporally-Weighted Spike Encoding for Event-based Object Detection and Classification},\nauthor={Nikolaus Salvatore and Justin Fletcher},\nyear={2023},\nurl={https://openreview.net/forum?id=SEfxlDwL7fR}\n}", "github": "", "project": "", "reviewers": "Uhms;AfjB;gpjB;WXPN", "site": "https://openreview.net/forum?id=SEfxlDwL7fR", "pdf_size": 633776, "recommendation": "3;3;6;6", "confidence": "3;5;5;3", "correctness": "2;2;3;3", "technical_novelty": "2;1;3;2", "empirical_novelty": "2;1;3;2", "wc_summary_paper": "56;50;38;41", "wc_strength_and_weaknesses": "276;403;127;93", "wc_clarity_quality_novelty_and_reproducibility": "45;145;79;95", "wc_summary_review": "19;30;77;29", "wc_review": "396;628;321;258", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 4.0, 1.0 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 46.25, 7.1545440106270926 ], "wc_strength_and_weaknesses_avg": [ 224.75, 123.80705755327521 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 91.0, 36.02776706930364 ], "wc_summary_review_avg": [ 38.75, 22.498611068241523 ], "wc_review_avg": [ 400.75, 140.00245533561187 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RdzZKWR4JqsJ:scholar.google.com/&scioq=Temporally-Weighted+Spike+Encoding+for+Event-based+Object+Detection+and+Classification&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "KBR Inc.", "aff_unique_dep": "", "aff_unique_url": "https://www.kbr.com", "aff_unique_abbr": "KBR", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Meta-prediction Model for Distillation-Aware NAS on Unseen Datasets", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12067", "id": "SEh5SfEQtqB", "poster": "/media/PosterPDFs/ICLR%202023/12067.png?t=1680955560.0636811", "openreview": "https://openreview.net/forum?id=SEh5SfEQtqB", "slides": "https://iclr.cc/virtual/2023/poster/12067", "video": "https://iclr.cc/virtual/2023/poster/12067", "author_site": "Hayeon Lee, Sohyun An, Minseon Kim, Sung Ju Hwang", "tldr": "We propose a one-shot meta accuracy prediction model which can predict a given architecture's final performances on a dataset when performing KD with a given teacher, without having to actually train it on the target task. ", "abstract": "Distillation-aware Neural Architecture Search (DaNAS) aims to search for an optimal student architecture that obtains the best performance and/or efficiency when distilling the knowledge from a given teacher model. Previous DaNAS methods have mostly tackled the search for the neural architecture for fixed datasets and the teacher, which are not generalized well on a new task consisting of an unseen dataset and an unseen teacher, thus need to perform a costly search for any new combination of the datasets and the teachers. For standard NAS tasks without KD, meta-learning-based computationally efficient NAS methods have been proposed, which learn the generalized search process over multiple tasks (datasets) and transfer the knowledge obtained over those tasks to a new task. However, since they assume learning from scratch without KD from a teacher, they might not be ideal for DaNAS scenarios. To eliminate the excessive computational cost of DaNAS methods and the sub-optimality of rapid NAS methods, we propose a distillation-aware meta-accuracy prediction model, DaSS (Distillation-aware Student Search), which can predict a given architecture's final performances on a dataset when performing KD with a given teacher, without having actually to train it on the target task. The experimental results demonstrate that our proposed meta-prediction model successfully generalizes to multiple unseen datasets for DaNAS tasks, largely outperforming existing meta-NAS methods and rapid NAS baselines. Code is available at https://github.com/CownowAn/DaSS.", "keywords": "Neural Architecture Search;Meta Learning", "primary_area": "", "supplementary_material": "/attachment/de76a4c3147835acec72f50759fee58ce808d98c.zip", "author": "Hayeon Lee;Sohyun An;Minseon Kim;Sung Ju Hwang", "authorids": "~Hayeon_Lee1;~Sohyun_An1;~Minseon_Kim1;~Sung_Ju_Hwang1", "gender": "F;F;;", "homepage": "https://hayeonlee.github.io/;https://cownowan.github.io/;https://kim-minseon.github.io/;", "dblp": "246/4987;348/6996;247/5952;", "google_scholar": "5DaLgBUAAAAJ;tW1jSXMAAAAJ;ZwObZNwAAAAJ;", "orcid": ";;;", "linkedin": ";sohyunan0423;minseon-kim-707a84174;", "or_profile": "~Hayeon_Lee1;~Sohyun_An1;~Minseon_Kim1;~Sung_Ju_Hwang1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;", "position": "PhD student;MS student;PhD student;", "bibtex": "@inproceedings{\nlee2023metaprediction,\ntitle={Meta-prediction Model for Distillation-Aware {NAS} on Unseen Datasets},\nauthor={Hayeon Lee and Sohyun An and Minseon Kim and Sung Ju Hwang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=SEh5SfEQtqB}\n}", "github": "", "project": "", "reviewers": "FiPx;L5tN;T2PL", "pdf_size": 1055952, "recommendation": "3;8;8", "confidence": "4;4;4", "correctness": "3;3;3", "technical_novelty": "2;3;3", "empirical_novelty": "2;3;4", "wc_summary_paper": "143;43;35", "wc_strength_and_weaknesses": "209;170;537", "wc_clarity_quality_novelty_and_reproducibility": "36;43;76", "wc_summary_review": "45;20;138", "wc_review": "433;276;786", "wc_reply_reviewers": "144;74;29", "wc_reply_authors": "4003;1299;3110", "reply_reviewers": "1;1;1", "reply_authors": "10;4;8", "recommendation_avg": [ 6.333333333333333, 2.357022603955158 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 73.66666666666667, 49.13473539383541 ], "wc_strength_and_weaknesses_avg": [ 305.3333333333333, 164.58499999156126 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.666666666666664, 17.441967269268172 ], "wc_summary_review_avg": [ 67.66666666666667, 50.769632218045025 ], "wc_review_avg": [ 498.3333333333333, 213.27030318875205 ], "wc_reply_reviewers_avg": [ 82.33333333333333, 47.3168985552613 ], "wc_reply_authors_avg": [ 2804.0, 1124.909181519409 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 7.333333333333333, 2.494438257849294 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16936295843830929447&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=SEh5SfEQtqB", "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "SFyOjfEOJO", "title": "Hybrid Neuro-Symbolic Reasoning based on Multimodal Fusion", "track": "main", "status": "Reject", "tldr": "A hybrid neural/symbolic modeling to enhance complex image classifications using commonsense knowledge.", "abstract": "Deep neural models and symbolic Artificial Intelligence (AI) systems have contrasting advantages and disadvantages. Neural models can be trained from raw, incomplete and noisy data to obtain abstraction of features at various levels, but their uninterpretability is well-known. On the other hand, the traditional rule-based symbolic reasoning encodes domain knowledge, but its failure is often attributed to the acquisition bottleneck. We propose to build a hybrid learning and reasoning system which is based on multimodal fusion approach that brings together\nadvantageous features from both the paradigms. Specifically, we enhance convolutional neural networks (CNNs) with the structured information of \u2018if-then\u2019 symbolic logic rules obtained via word embeddings corresponding to propositional symbols and terms. With many dozens of intuitive rules relating the type of a scene with its typical constituent objects, we are able to achieve significant improvement over the base CNN-based classification. Our approach is extendible to handle first-order logical syntax for rules and other deep learning models.", "keywords": "Neural Networks;Deep Learning;Symbolic Reasoning;Multimodal Fusion;Word Embedding;Rule-based Reasoning", "primary_area": "", "supplementary_material": "", "author": "Subrata Das;Bodong Zhou", "authorids": "~Subrata_Das2;zhou.bod@northeastern.edu", "gender": "M;", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": "subrata-das-1293354/;", "or_profile": "~Subrata_Das2;zhou.bod@northeastern.edu", "aff": "Northeastern University;", "aff_domain": "neu.edu;", "position": "Full Professor;", "bibtex": "@misc{\ndas2023hybrid,\ntitle={Hybrid Neuro-Symbolic Reasoning based on Multimodal Fusion},\nauthor={Subrata Das and Bodong Zhou},\nyear={2023},\nurl={https://openreview.net/forum?id=SFyOjfEOJO}\n}", "github": "", "project": "", "reviewers": "XLX8;vv79;fQgQ;jnEW", "site": "https://openreview.net/forum?id=SFyOjfEOJO", "pdf_size": 757352, "recommendation": "1;3;3;5", "confidence": "4;4;4;3", "correctness": "1;2;4;3", "technical_novelty": "2;2;1;2", "empirical_novelty": "1;2;1;2", "wc_summary_paper": "99;95;25;31", "wc_strength_and_weaknesses": "74;188;326;37", "wc_clarity_quality_novelty_and_reproducibility": "475;32;234;42", "wc_summary_review": "21;11;77;237", "wc_review": "669;326;662;347", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.5, 1.118033988749895 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.5, 0.5 ], "wc_summary_paper_avg": [ 62.5, 34.59407463713981 ], "wc_strength_and_weaknesses_avg": [ 156.25, 112.70398173977705 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 195.75, 180.2059585585338 ], "wc_summary_review_avg": [ 86.5, 90.45855404548539 ], "wc_review_avg": [ 501.0, 164.68606498426027 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.6324555320336759, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15185580141047417534&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "SGQi3LgFnqj", "title": "Grammar-Induced Geometry for Data-Efficient Molecular Property Prediction", "track": "main", "status": "Reject", "tldr": "We propose a data-efficient molecular property predictor based on an explicit geometry of the space of molecular graphs induced by a learnable hierarchical molecular grammar.", "abstract": "The prediction of molecular properties is a crucial task in the field of material and drug discovery. The potential benefits of using deep learning techniques are reflected in the wealth of recent literature. Still, these techniques are faced with a common challenge in practice: Labeled data are limited by the cost of manual extraction from literature and laborious experimentation. In this work, we propose a data-efficient property predictor by utilizing a learnable hierarchical molecular grammar that can generate molecules from grammar production rules. Such a grammar induces an explicit geometry of the space of molecular graphs, which provides an informative prior on molecular structural similarity. The property prediction is performed using graph neural diffusion over the grammar-induced geometry. On both small and large datasets, our evaluation shows that this approach outperforms a wide spectrum of baselines, including supervised and pre-trained graph neural networks. We include a detailed ablation study and further analysis of our solution, showing its effectiveness in cases with extremely limited data (only ${\\sim}100$ samples), and its extension to application in molecular generation.", "keywords": "Molecular property prediction;Graph grammar;Data-efficient model", "primary_area": "", "supplementary_material": "", "author": "Minghao Guo;Veronika Thost;Samuel Song;Adithya Balachandran;Payel Das;Jie Chen;Wojciech Matusik", "authorids": "~Minghao_Guo1;~Veronika_Thost1;ssong64@mit.edu;adithyab@mit.edu;~Payel_Das1;~Jie_Chen1;~Wojciech_Matusik2", "gender": "M;F;;;F;;M", "homepage": "https://www.minghaoguo.com/;https://mitibmwatsonailab.mit.edu/people/veronika-thost/;;;;https://jiechenjiechen.github.io;https://cdfg.mit.edu/wojciech", "dblp": "145/0008/;132/3874;;;56/7926;92/6289-7;", "google_scholar": "Hq2unJcAAAAJ;TyScgJ0AAAAJ;;;;Z-lkme8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0003-4984-1532;;;;;0000-0003-0212-5643", "linkedin": ";;;;;;wojciech-matusik-67238126/", "or_profile": "~Minghao_Guo1;~Veronika_Thost1;ssong64@mit.edu;adithyab@mit.edu;~Payel_Das1;~Jie_Chen1;~Wojciech_Matusik2", "aff": "Massachusetts Institute of Technology;IBM Research;;;IBM, International Business Machines;International Business Machines;Massachusetts Institute of Technology", "aff_domain": "mit.edu;ibm.com;;;us.ibm.com;ibm.com;mit.edu", "position": "PhD student;Research Scientist;;;Principal Researcher;Research Staff Member;Full Professor", "bibtex": "@misc{\nguo2023grammarinduced,\ntitle={Grammar-Induced Geometry for Data-Efficient Molecular Property Prediction},\nauthor={Minghao Guo and Veronika Thost and Samuel Song and Adithya Balachandran and Payel Das and Jie Chen and Wojciech Matusik},\nyear={2023},\nurl={https://openreview.net/forum?id=SGQi3LgFnqj}\n}", "github": "", "project": "", "reviewers": "Ahkk;CjiQ;aHZ7;EFAZ", "site": "https://openreview.net/forum?id=SGQi3LgFnqj", "pdf_size": 2388302, "recommendation": "5;5;6;6", "confidence": "3;2;4;2", "correctness": "3;2;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "82;61;134;56", "wc_strength_and_weaknesses": "270;150;783;95", "wc_clarity_quality_novelty_and_reproducibility": "38;38;178;96", "wc_summary_review": "72;35;346;362", "wc_review": "462;284;1441;609", "wc_reply_reviewers": "0;45;263;0", "wc_reply_authors": "727;876;600;703", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;1;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 83.25, 30.8818312280862 ], "wc_strength_and_weaknesses_avg": [ 324.5, 272.17319853358083 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 87.5, 57.365059051656175 ], "wc_summary_review_avg": [ 203.75, 150.92444301702756 ], "wc_review_avg": [ 699.0, 443.5814468617911 ], "wc_reply_reviewers_avg": [ 77.0, 108.94723493508222 ], "wc_reply_authors_avg": [ 726.5, 98.62175216451998 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.30151134457776363, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:R7AF7b2zbbQJ:scholar.google.com/&scioq=Grammar-Induced+Geometry+for+Data-Efficient+Molecular+Property+Prediction&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Massachusetts Institute of Technology;IBM;International Business Machines;International Business Machines Corporation", "aff_unique_dep": ";IBM Research;;", "aff_unique_url": "https://web.mit.edu;https://www.ibm.com/research;https://www.ibm.com;https://www.ibm.com", "aff_unique_abbr": "MIT;IBM;IBM;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "SI0ON7mZYY", "title": "Categorial Grammar Induction as a Compositionality Measure for Emergent Languages in Signaling Games", "track": "main", "status": "Withdraw", "tldr": "This paper proposes a method for investigating the non-trivially compositional structure of emergent languages using Categorial Grammar Induction.", "abstract": "This paper proposes a method to analyze the compositional structure of emergent languages using Categorial Grammar Induction (CGI). Emergent languages are communication protocols arising among agents in environments such as signaling games. Previous work has studied how similar or dissimilar emergent languages are to natural languages in compositionality. However, most of them focused on trivial compositionality, assuming flat structures in languages. We further focus on non-trivial compositionality, i.e., the relationship between hierarchical syntax and semantics. To this end, we apply CGI to emergent languages, inspired by previous NLP work. Given sentence-meaning pairs of a language, CGI induces 1) a categorial grammar that describes the syntax of the language and 2) a semantic parser that compositionally maps sentences to meanings. We also propose compositionality measures based on the grammar size and semantic parser performance. CGI and the proposed measures enable deeper insights into the non-trivial compositionality of emergent languages, while correlating well with existing measures like TopSim.", "keywords": "Emergent Communication;Emergent Language;Categorial Grammar Induction;Syntax;Compositionality", "primary_area": "", "supplementary_material": "", "author": "Ryo Ueda;Taiga Ishii;Koki Washio;Yusuke Miyao", "authorids": "~Ryo_Ueda1;~Taiga_Ishii1;~Koki_Washio2;~Yusuke_Miyao2", "gender": "M;M;M;M", "homepage": "https://sites.google.com/view/ryo-ueda/;;https://sites.google.com/view/kokiwashio/;https://mynlp.is.s.u-tokyo.ac.jp/en/", "dblp": "191/3366;;219/5416;34/467.html", "google_scholar": "https://scholar.google.co.jp/citations?user=4HULQlwAAAAJ;;QPtNSa4AAAAJ;", "orcid": ";;;", "linkedin": ";taiga-ishii-89a196220/;;", "or_profile": "~Ryo_Ueda1;~Taiga_Ishii1;~Koki_Washio2;~Yusuke_Miyao2", "aff": "The University of Tokyo;The University of Tokyo;Megagon Labs, Tokyo;The University of Tokyo", "aff_domain": "u-tokyo.ac.jp;u-tokyo.ac.jp;megagon.ai;u-tokyo.ac.jp", "position": "MS student;MS student;Researcher;Full Professor", "bibtex": "@misc{\nueda2023categorial,\ntitle={Categorial Grammar Induction as a Compositionality Measure for Emergent Languages in Signaling Games},\nauthor={Ryo Ueda and Taiga Ishii and Koki Washio and Yusuke Miyao},\nyear={2023},\nurl={https://openreview.net/forum?id=SI0ON7mZYY}\n}", "github": "", "project": "", "reviewers": "PdUJ;cXuX;KzQh", "site": "https://openreview.net/forum?id=SI0ON7mZYY", "pdf_size": 867773, "recommendation": "3;5;6", "confidence": "4;4;4", "correctness": "3;2;3", "technical_novelty": "3;3;3", "empirical_novelty": "0;3;3", "wc_summary_paper": "53;77;120", "wc_strength_and_weaknesses": "351;6;124", "wc_clarity_quality_novelty_and_reproducibility": "250;7;625", "wc_summary_review": "132;652;172", "wc_review": "786;742;1041", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 83.33333333333333, 27.716822007983207 ], "wc_strength_and_weaknesses_avg": [ 160.33333333333334, 143.16967400799498 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 294.0, 254.20857577981118 ], "wc_summary_review_avg": [ 318.6666666666667, 236.267268622258 ], "wc_review_avg": [ 856.3333333333334, 131.80878406068223 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.18898223650461363, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=636393063183946476&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Tokyo;Megagon Labs", "aff_unique_dep": ";", "aff_unique_url": "https://www.u-tokyo.ac.jp;", "aff_unique_abbr": "UTokyo;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Tokyo", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "title": "Extreme Q-Learning: MaxEnt RL without Entropy", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11367", "id": "SJ0Lde3tRL", "poster": "", "openreview": "https://openreview.net/forum?id=SJ0Lde3tRL", "slides": "https://iclr.cc/virtual/2023/poster/11367", "video": "https://iclr.cc/virtual/2023/poster/11367", "author_site": "Divyansh Garg, Joey Hejna, Matthieu Geist, Stefano Ermon", "tldr": "Introduce a novel framework for Q-learning that models the maximal soft-values without needing to sample from a policy and reaches SOTA performance on online and offline RL settings.", "abstract": "Modern Deep Reinforcement Learning (RL) algorithms require estimates of the maximal Q-value, which are difficult to compute in continuous domains with an infinite number of possible actions. In this work, we introduce a new update rule for online and offline RL which directly models the maximal value using Extreme Value Theory (EVT), drawing inspiration from economics. By doing so, we avoid computing Q-values using out-of-distribution actions which is often a substantial source of error. Our key insight is to introduce an objective that directly estimates the optimal soft-value functions (LogSumExp) in the maximum entropy RL setting without needing to sample from a policy. Using EVT, we derive our \\emph{Extreme Q-Learning} framework and consequently online and, for the first time, offline MaxEnt Q-learning algorithms, that do not explicitly require access to a policy or its entropy. Our method obtains consistently strong performance in the D4RL benchmark, outperforming prior works by \\emph{10+ points} on the challenging Franka Kitchen tasks while offering moderate improvements over SAC and TD3 on online DM Control tasks. Visualizations and code can be found on our website.", "keywords": "reinforcement learning;offline reinforcement learning;statistical learning;extreme value analysis;maximum entropy rl;gumbel", "primary_area": "", "supplementary_material": "/attachment/bc9aeca039fdf42a9f7c0788b657e41130fab7f9.zip", "author": "Divyansh Garg;Joey Hejna;Matthieu Geist;Stefano Ermon", "authorids": "~Divyansh_Garg1;~Joey_Hejna1;~Matthieu_Geist1;~Stefano_Ermon1", "gender": "M;M;M;M", "homepage": "http://divyanshgarg.com;;http://cs.stanford.edu/~ermon/;https://joeyhejna.com", "dblp": ";38/6508;47/8135;336/3297", "google_scholar": "https://scholar.google.com/citations?hl=en;ectPLEUAAAAJ;;y_sLoXoAAAAJ", "orcid": ";;;", "linkedin": "https://linkedin.com/in/div99/;;;", "or_profile": "~Divyansh_Garg1;~Matthieu_Geist1;~Stefano_Ermon1;~Donald_Joseph_Hejna_III1", "aff": ";Google;Stanford University;Stanford University", "aff_domain": ";google.com;stanford.edu;stanford.edu", "position": ";Researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\ngarg2023extreme,\ntitle={Extreme Q-Learning: MaxEnt {RL} without Entropy},\nauthor={Divyansh Garg and Joey Hejna and Matthieu Geist and Stefano Ermon},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=SJ0Lde3tRL}\n}", "github": "", "project": "", "reviewers": "pDUA;XF2J;HuwQ;PnpD", "pdf_size": 1086935, "recommendation": "6;6;8;10", "confidence": "4;3;3;3", "correctness": "2;3;4;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;0;3;3", "wc_summary_paper": "48;86;91;43", "wc_strength_and_weaknesses": "316;335;296;73", "wc_clarity_quality_novelty_and_reproducibility": "2;14;44;19", "wc_summary_review": "39;63;53;25", "wc_review": "405;498;484;160", "wc_reply_reviewers": "79;0;0;0", "wc_reply_authors": "757;736;375;186", "reply_reviewers": "1;0;0;0", "reply_authors": "2;2;1;1", "recommendation_avg": [ 7.5, 1.6583123951777 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 67.0, 21.644860821913362 ], "wc_strength_and_weaknesses_avg": [ 255.0, 105.97877145919365 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 19.75, 15.303185942802891 ], "wc_summary_review_avg": [ 45.0, 14.352700094407323 ], "wc_review_avg": [ 386.75, 135.63070264508696 ], "wc_reply_reviewers_avg": [ 19.75, 34.208003449485325 ], "wc_reply_authors_avg": [ 513.5, 242.50618548812318 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.5222329678670935, "corr_recommendation_correctness": 0.8181818181818182, "gs_citation": 80, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1881587105629522568&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=SJ0Lde3tRL", "email": ";google.com;stanford.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Google;Stanford University", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.stanford.edu", "aff_unique_abbr": "Google;Stanford", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Mountain View;Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Human Motion Diffusion Model", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12114", "id": "SJ1kSyO2jwu", "poster": "/media/PosterPDFs/ICLR%202023/12114.png?t=1681304199.0559504", "openreview": "https://openreview.net/forum?id=SJ1kSyO2jwu", "slides": "https://iclr.cc/virtual/2023/poster/12114", "video": "https://iclr.cc/virtual/2023/poster/12114", "author_site": "Guy Tevet, Sigal Raab, Brian Gordon, Yonatan Shafir, Daniel Cohen-Or, Amit Bermano", "tldr": "", "abstract": "Natural and expressive human motion generation is the holy grail of computer animation.\nIt is a challenging task, due to the diversity of possible motion, human perceptual sensitivity to it, and the difficulty of accurately describing it. Therefore, current generative solutions are either low-quality or limited in expressiveness. \nDiffusion models are promising candidates for the human motion domain since they\nhave already shown remarkable generative capabilities in other domains, and their many-to-many nature. \nIn this paper, we introduce Motion Diffusion Model (MDM), a carefully adapted classifier-free diffusion-based generative model for human motion data. MDM is transformer-based, combining insights from motion generation literature. \nA notable design-choice is that it predicts the sample itself rather than the noise in each step to facilitate the use of established geometric losses on the locations and velocities of the motion, such as the foot contact loss. As we demonstrate, MDM is a generic approach, enabling different modes of conditioning, and different generation tasks. We show that our model is trained with lightweight resources and yet achieves state-of-the-art results on leading benchmarks for text-to-motion, action-to-motion, and unconditioned motion generation. ", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/74f8649e23bb80a0a4b2c910705fd1764e157648.zip", "author": "Guy Tevet;Sigal Raab;Brian Gordon;Yoni Shafir;Daniel Cohen-or;Amit Haim Bermano", "authorids": "~Guy_Tevet1;~Sigal_Raab1;~Brian_Gordon1;~Yoni_Shafir1;~Daniel_Cohen-or2;~Amit_Haim_Bermano2", "gender": "M;F;M;M;M;M", "homepage": "https://guytevet.github.io/;https://sigal-raab.github.io;;;https://www.cs.tau.ac.il/~amberman/;http://www.cs.tau.ac.il/~dcor/", "dblp": "229/4227.html;74/1089;08/4166;330/2698;97/10458;c/DCohenOr", "google_scholar": "P9ROgN8AAAAJ;STnhSioAAAAJ;VNDhTycAAAAJ;;https://scholar.google.co.il/citations?user=EPO5_f4AAAAJ;https://scholar.google.com.tw/citations?user=fAxws1sAAAAJ", "orcid": "0000-0003-4376-2403;0000-0001-6616-257X;0000-0002-3016-3690;;;0000-0001-6777-7445", "linkedin": ";sigal-raab-65496328;brian-gordon-38b29bb1/;yonatan-shafir-2811a3148/;;", "or_profile": "~Guy_Tevet1;~Sigal_Raab1;~Brian_Gordon1;~Yoni_Shafir1;~Amit_Haim_Bermano2;~Daniel_Cohen-Or1", "aff": "Tel Aviv University;Tel Aviv University;Amazon;Tel Aviv University;Tel Aviv University;Tel Aviv University", "aff_domain": "tau.ac.il;tau.ac.il;amazon.com;tau.ac.il;tau.ac.il;tau.ac.il", "position": "PhD student;PhD student;Researcher;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\ntevet2023human,\ntitle={Human Motion Diffusion Model},\nauthor={Guy Tevet and Sigal Raab and Brian Gordon and Yoni Shafir and Daniel Cohen-or and Amit Haim Bermano},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=SJ1kSyO2jwu}\n}", "github": "", "project": "", "reviewers": "Lxvz;Eywo;W5bV;Cmzb", "pdf_size": 12026724, "recommendation": "6;8;8;8", "confidence": "4;4;5;4", "correctness": "3;4;3;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;3;4;4", "wc_summary_paper": "67;69;121;62", "wc_strength_and_weaknesses": "232;163;636;143", "wc_clarity_quality_novelty_and_reproducibility": "52;59;76;45", "wc_summary_review": "55;53;40;60", "wc_review": "406;344;873;310", "wc_reply_reviewers": "113;16;42;25", "wc_reply_authors": "324;228;625;70", "reply_reviewers": "2;1;1;1", "reply_authors": "2;2;1;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 79.75, 23.951774464536026 ], "wc_strength_and_weaknesses_avg": [ 293.5, 200.48004888267562 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 58.0, 11.510864433221338 ], "wc_summary_review_avg": [ 52.0, 7.3824115301167 ], "wc_review_avg": [ 483.25, 227.63938038046052 ], "wc_reply_reviewers_avg": [ 49.0, 38.11167800031901 ], "wc_reply_authors_avg": [ 311.75, 202.3195183367141 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 921, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15192131734292891832&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=SJ1kSyO2jwu", "email": "tau.ac.il;tau.ac.il;amazon.com;tau.ac.il;tau.ac.il;tau.ac.il", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Tel Aviv University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.tau.ac.il;https://www.amazon.com", "aff_unique_abbr": "TAU;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "Israel;United States" }, { "id": "SJO188Y53lk", "title": "Do We Really Achieve Fairness with Explicit Sensitive Attributes?", "track": "main", "status": "Withdraw", "tldr": "We found that different sample leak different amount of sensitive information and has different-level violation of demographic parity, thus we propose a new metric and method to address this problem.", "abstract": "Recently the wide usage of machine learning models for high-stake decision-making raises the concerns about the fairness and discrimination issue. Existing works found that sensitive information of a sample could be leaked completely by sensitive attributes or partially by non-sensitive attributes, thus removing the sensitive attributes directly from the original features can not achieve fairness. The current fairness practice is to leverage the explicit sensitive attributes (i.e., as regularization) to debias the prediction, based on a strong assumption that non-sensitive attributes of all samples leak the sensitive information totally. However, we investigate the distribution of leaked sensitive information from non-sensitive attributes and make interesting findings that 1) the sensitive information distinctly varies across different samples. 2) the violation of demographic parity for samples prone to leak sensitive information (high-sensitive) are worse than that for low-sensitive samples, indicating the failure of current demographic parity measurements. To this end, we propose a new group fairness ($\\alpha$-Demographic Parity) to measure the demographic parity for samples with different levels of sensitive information leakage. Furthermore, we move one step forward and propose to achieve $\\alpha$-demographic parity by encouraging the independence of the distribution of the sensitive information in non-sensitive attributes and that of downstream task prediction, which is formulated as a cross-task knowledge distillation framework. Specifically, the sensitive teacher models the distribution of the sensitive information and the fair student models the distribution of the downstream task prediction. Then we encourage the independence between them by minimizing the Hilbert-Schmidt Independence Criterion. Our model can naturally tackle the limited sensitive attribution scenario since the teacher models can be trained with partial samples with sensitive attributes. Extensive experiments show the superior performance of our proposed method on the $\\alpha$-demographic parity and performs well on limited sensitive attribute scenarios.", "keywords": "fairness;debias;demographic parity", "primary_area": "", "supplementary_material": "", "author": "Xiaotian Han;Zhimeng Jiang;Ninghao Liu;Na Zou;Qifan Wang;Xia Hu", "authorids": "~Xiaotian_Han1;~Zhimeng_Jiang1;~Ninghao_Liu2;~Na_Zou2;~Qifan_Wang2;~Xia_Hu4", "gender": "M;M;F;M;M;M", "homepage": "https://ahxt.github.io/;http://www.zhimengjiang.com/;https://nzou1.github.io/;https://wqfcr.github.io/;https://cobweb.cs.uga.edu/~ninghaoliu/;https://cs.rice.edu/~xh37/index.html", "dblp": ";217/3235;152/0090-1.html;33/8610;145/4489;256/9406.html", "google_scholar": "Uromx98AAAAJ;5Es3Yk4AAAAJ;https://scholar.google.com/citations?hl=en;LrSyLosAAAAJ;Nir-EDYAAAAJ;https://scholar.google.com.tw/citations?user=pcCS60IAAAAJ", "orcid": ";0000-0001-6933-3952;0000-0003-1984-795X;0000-0002-7570-5756;0000-0002-9170-2424;", "linkedin": ";;na-zou-a1721535/;;;", "or_profile": "~Xiaotian_Han1;~Zhimeng_Jiang1;~Na_Zou2;~Qifan_Wang2;~Ninghao_Liu1;~Xia_Hu2", "aff": "Texas A&M University;Texas A&M University;Texas A&M University - College Station;Meta AI;University of Georgia;Rice University", "aff_domain": "tamu.edu;tamu.edu;tamu.edu;fb.com;uga.edu;rice.edu", "position": "PhD student;PhD student;Assistant Professor;Principal Researcher;Assistant Professor;Associate Professor", "bibtex": "@misc{\nhan2023do,\ntitle={Do We Really Achieve Fairness with Explicit Sensitive Attributes? },\nauthor={Xiaotian Han and Zhimeng Jiang and Ninghao Liu and Na Zou and Qifan Wang and Xia Hu},\nyear={2023},\nurl={https://openreview.net/forum?id=SJO188Y53lk}\n}", "github": "", "project": "", "reviewers": "b7SZ;uBvQ;cqmQ", "site": "https://openreview.net/forum?id=SJO188Y53lk", "pdf_size": 1820568, "recommendation": "1;3;3", "confidence": "4;4;4", "correctness": "1;2;2", "technical_novelty": "1;2;3", "empirical_novelty": "1;2;3", "wc_summary_paper": "45;92;45", "wc_strength_and_weaknesses": "93;628;302", "wc_clarity_quality_novelty_and_reproducibility": "437;45;47", "wc_summary_review": "12;46;26", "wc_review": "587;811;420", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 60.666666666666664, 22.15601247717849 ], "wc_strength_and_weaknesses_avg": [ 341.0, 220.14692063861958 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 176.33333333333334, 184.32097607766244 ], "wc_summary_review_avg": [ 28.0, 13.9522996909709 ], "wc_review_avg": [ 606.0, 160.18947114797112 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13320059210948161520&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2;3", "aff_unique_norm": "Texas A&M University;Meta;University of Georgia;Rice University", "aff_unique_dep": ";Meta AI;;", "aff_unique_url": "https://www.tamu.edu;https://meta.com;https://www.uga.edu;https://www.rice.edu", "aff_unique_abbr": "TAMU;Meta;UGA;Rice", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "SJjvXfape5U", "title": "Sufficient Subgraph Embedding Memory for Continual Graph Representation Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Memory replay, which constructs a buffer to store representative samples and retrain the model over the buffer to maintain its performance over existing tasks, has shown great success for continual learning with Euclidean data. Directly applying it to graph data, however, can lead to the memory explosion problem due to the necessity to consider explicit topological connections of representative nodes. To this end, we present Parameter Decoupled Graph Neural Networks (PDGNNs) with Sufficient Subgraph Embedding Memory (SSEM) to fully utilize the explicit topological information for memory replay and reduce the memory space complexity from $\\mathcal{O}(nd^L)$ to $\\mathcal{O}(n)$, where $n$ is the memory buffer size, $d$ is the average node degree, and $L$ is the range of neighborhood aggregation. Specifically, PDGNNs decouple trainable parameters from the computation subgraphs via $\\textit{Sufficient Subgraph Embeddings}$ (SSEs), which compress subgraphs into vectors ($\\textit{i.e.}$, SSEs) to reduce the memory consumption. Besides, we discover a $\\textit{pseudo-training effect}$ in memory based continual graph learning, which does not exist in continual learning on Euclidean data without topological connection ($\\textit{e.g.}$, individual images). Based on the discovery, we develop a novel $\\textit{coverage maximization sampling}$ strategy to enhance the performance when the memory budget is tight. Thorough empirical studies demonstrate that PDGNNs with SSEM outperform state-of-the-art techniques for both class-incremental and task-incremental settings. ", "keywords": "Graph;Class-incremental learning;continual learning;network", "primary_area": "", "supplementary_material": "", "author": "Xikun ZHANG;Dongjin Song;Dacheng Tao", "authorids": "~Xikun_ZHANG2;~Dongjin_Song2;~Dacheng_Tao1", "gender": ";M;", "homepage": ";https://songdj.github.io/;", "dblp": "38/326-2;41/3281;", "google_scholar": "oBlKsZ4AAAAJ;BJdHw6AAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Xikun_ZHANG2;~Dongjin_Song2;~Dacheng_Tao1", "aff": "University of Sydney;University of Connecticut;", "aff_domain": "sydney.edu.au;uconn.edu;", "position": "PhD student;Assistant Professor;", "bibtex": "@misc{\nzhang2023sufficient,\ntitle={Sufficient Subgraph Embedding Memory for Continual Graph Representation Learning},\nauthor={Xikun ZHANG and Dongjin Song and Dacheng Tao},\nyear={2023},\nurl={https://openreview.net/forum?id=SJjvXfape5U}\n}", "github": "", "project": "", "reviewers": "zw6P;g88c;AL85;U4Fi", "site": "https://openreview.net/forum?id=SJjvXfape5U", "pdf_size": 8143938, "recommendation": "3;3;5;8", "confidence": "3;4;4;3", "correctness": "3;4;3;4", "technical_novelty": "1;2;2;4", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "51;64;105;65", "wc_strength_and_weaknesses": "331;293;267;422", "wc_clarity_quality_novelty_and_reproducibility": "8;29;60;69", "wc_summary_review": "25;55;25;15", "wc_review": "415;441;457;571", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1391;2891;1708;280", "reply_reviewers": "0;0;0;0", "reply_authors": "4;7;5;1", "recommendation_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 1.0897247358851685 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 71.25, 20.253086184579377 ], "wc_strength_and_weaknesses_avg": [ 328.25, 58.717012015258405 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.5, 24.37724348649781 ], "wc_summary_review_avg": [ 30.0, 15.0 ], "wc_review_avg": [ 471.0, 59.648973168026956 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1567.5, 930.0807760619505 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 4.25, 2.165063509461097 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.3665083330689157, "corr_recommendation_correctness": 0.3665083330689157, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qDQjMAXRBe8J:scholar.google.com/&scioq=Sufficient+Subgraph+Embedding+Memory+for+Continual+Graph+Representation+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of Sydney;University of Connecticut", "aff_unique_dep": ";", "aff_unique_url": "https://www.sydney.edu.au;https://www.uconn.edu", "aff_unique_abbr": "USYD;UConn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Australia;United States" }, { "id": "SKat5ZX5RET", "title": "Self-Programming Artificial Intelligence Using Code-Generating Language Models", "track": "main", "status": "Withdraw", "tldr": "We develop and experimentally validate the first practical implementation of a self-reprogramming AI system. ", "abstract": "Recent progress in large-scale language models has enabled breakthroughs in previously intractable computer programming tasks. Prior work in meta-learning and neural architecture search has led to substantial successes across various task domains, spawning myriad approaches for algorithmically optimizing the design and learning dynamics of deep learning models. At the intersection of these research areas, we implement a code-generating language model with the ability to modify its own source code. Self-programming AI algorithms have been of interest since the dawn of AI itself. Although various theoretical formulations of generalized self-programming AI have been posed, no such system has been successfully implemented to date under real-world computational constraints. Applying AI-based code generation to AI itself, we develop and experimentally validate the first practical implementation of a self-programming AI system. We empirically show that a self-programming AI implemented using a code generation model can successfully modify its own source code to improve performance and program sub-models to perform auxiliary tasks. Our model can self-modify various properties including model architecture, computational capacity, and learning dynamics.", "keywords": "Self-programming AI;NLP;code generation;AutoML", "primary_area": "", "supplementary_material": "", "author": "Alex Sheng;Shankar Padmanabhan", "authorids": "~Alex_Sheng1;~Shankar_Padmanabhan1", "gender": "M;M", "homepage": "http://alexsheng.com/;", "dblp": "282/8198.html;", "google_scholar": ";", "orcid": ";", "linkedin": "alex-sheng-bab99b18a/;shankar-padmanabhan-1691a5222/", "or_profile": "~Alex_Sheng1;~Shankar_Padmanabhan1", "aff": "New York University;University of Texas at Austin", "aff_domain": "nyu.edu;utexas.edu", "position": "Undergrad student;Undergrad student", "bibtex": "@misc{\nsheng2023selfprogramming,\ntitle={Self-Programming Artificial Intelligence Using Code-Generating Language Models},\nauthor={Alex Sheng and Shankar Padmanabhan},\nyear={2023},\nurl={https://openreview.net/forum?id=SKat5ZX5RET}\n}", "github": "", "project": "", "reviewers": "1HJg;EsdQ;twx1;EMH8;jmai", "site": "https://openreview.net/forum?id=SKat5ZX5RET", "pdf_size": 684202, "recommendation": "1;3;3;3;3", "confidence": "4;4;3;3;4", "correctness": "1;1;2;2;2", "technical_novelty": "2;3;2;2;2", "empirical_novelty": "0;2;1;2;3", "wc_summary_paper": "49;140;159;67;39", "wc_strength_and_weaknesses": "130;221;374;258;170", "wc_clarity_quality_novelty_and_reproducibility": "9;72;130;134;12", "wc_summary_review": "12;33;101;75;28", "wc_review": "200;466;764;534;249", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 2.6, 0.8000000000000002 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "correctness_avg": [ 1.6, 0.4898979485566356 ], "technical_novelty_avg": [ 2.2, 0.39999999999999997 ], "empirical_novelty_avg": [ 1.6, 1.019803902718557 ], "wc_summary_paper_avg": [ 90.8, 49.13003154894163 ], "wc_strength_and_weaknesses_avg": [ 230.6, 83.90375438560542 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 71.4, 54.360279616646565 ], "wc_summary_review_avg": [ 49.8, 32.993332659796586 ], "wc_review_avg": [ 442.6, 204.2230153533142 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.40824829046386296, "corr_recommendation_correctness": 0.6123724356957944, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12977507252896878206&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "New York University;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://www.utexas.edu", "aff_unique_abbr": "NYU;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "SKxzoEbLZpy", "title": "PREF: Phasorial Embedding Fields for Compact Neural Representations", "track": "main", "status": "Withdraw", "tldr": "An efficient frequency-based neural representation is proposed. ", "abstract": "We present an efficient frequency-based neural representation termed PREF: a shallow MLP augmented with a phasor volume that covers significant border spectra than previous Fourier feature mapping or Positional Encoding. At the core is our compact 3D phasor volume where frequencies distribute uniformly along a 2D plane and dilate along a 1D axis. To this end, we develop a tailored and efficient Fourier transform that combines both Fast Fourier transform and local interpolation to accelerate na\\\"ive Fourier mapping. We also introduce a Parsvel regularizer that stables frequency-based learning. In these ways, Our PREF reduces the costly MLP in the frequency-based representation, thereby significantly closing the efficiency gap between it and other hybrid representations, and improving its interpretability. Comprehensive experiments demonstrate that our PREF is able to capture high-frequency details while remaining compact and robust, including 2D image generalization, 3D signed distance function regression and 5D neural radiance field reconstruction.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/41da913c7ed158baa03cb8213ca25940f644595b.zip", "author": "Binbin Huang;Xinhao Yan;Anpei Chen;Shenghua Gao;Jingyi Yu", "authorids": "~Binbin_Huang1;~Xinhao_Yan1;~Anpei_Chen2;~Shenghua_Gao1;~Jingyi_Yu5", "gender": "M;M;M;M;M", "homepage": ";https://svip-lab.github.io/index.html;https://apchenstu.github.io/;;", "dblp": ";;210/2592;63/7642;", "google_scholar": "5da8iKwAAAAJ;;fuR1FBwAAAAJ;fe-1v0MAAAAJ;R9L_AfQAAAAJ", "orcid": "0009-0008-7495-2406;;;;", "linkedin": ";;;;", "or_profile": "~Binbin_Huang1;~Xinhao_Yan1;~Anpei_Chen2;~Shenghua_Gao1;~Jingyi_Yu5", "aff": "Microsoft Research Asia;ShanghaiTech University;Department of Computer Science, ETHZ - ETH Zurich;ShanghaiTech University;ShanghaiTech University", "aff_domain": "microsoft.com;shanghaitech.edu.cn;inf.ethz.ch;shanghaitech.edu.cn;shanghaitech.edu.cn", "position": "Intern;MS student;Postdoc;Associate Professor;Full Professor", "bibtex": "@misc{\nhuang2023pref,\ntitle={{PREF}: Phasorial Embedding Fields for Compact Neural Representations},\nauthor={Binbin Huang and Xinhao Yan and Anpei Chen and Shenghua Gao and Jingyi Yu},\nyear={2023},\nurl={https://openreview.net/forum?id=SKxzoEbLZpy}\n}", "github": "", "project": "", "reviewers": "MZbT;qHxa;4meL", "site": "https://openreview.net/forum?id=SKxzoEbLZpy", "pdf_size": 1950213, "recommendation": "3;5;6", "confidence": "5;3;3", "correctness": "3;2;4", "technical_novelty": "2;3;3", "empirical_novelty": "1;2;3", "wc_summary_paper": "109;94;141", "wc_strength_and_weaknesses": "660;305;457", "wc_clarity_quality_novelty_and_reproducibility": "50;36;40", "wc_summary_review": "84;60;122", "wc_review": "903;495;760", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 114.66666666666667, 19.601587237318874 ], "wc_strength_and_weaknesses_avg": [ 474.0, 145.42581155581243 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 42.0, 5.887840577551898 ], "wc_summary_review_avg": [ 88.66666666666667, 25.525586292102197 ], "wc_review_avg": [ 719.3333333333334, 169.02925453568352 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.944911182523068, "corr_recommendation_correctness": 0.3273268353539886, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1305636680117952307&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Microsoft;ShanghaiTech University;ETH Zurich", "aff_unique_dep": "Research;;Department of Computer Science", "aff_unique_url": "https://www.microsoft.com/en-us/research/group/asia;https://www.shanghaitech.edu.cn;https://www.ethz.ch", "aff_unique_abbr": "MSR Asia;ShanghaiTech;ETHZ", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Asia;;Zurich", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;Switzerland" }, { "title": "ContraNorm: A Contrastive Learning Perspective on Oversmoothing and Beyond", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11036", "id": "SM7XkJouWHm", "poster": "/media/PosterPDFs/ICLR%202023/11036.png?t=1682431434.8068764", "openreview": "https://openreview.net/forum?id=SM7XkJouWHm", "slides": "https://iclr.cc/virtual/2023/poster/11036", "video": "https://iclr.cc/virtual/2023/poster/11036", "author_site": "Xiaojun Guo, Yifei Wang, Tianqi Du, Yisen Wang", "tldr": "", "abstract": "Oversmoothing is a common phenomenon in a wide range of Graph Neural Networks (GNNs) and Transformers, where performance degenerates as the layer goes deeper. Instead of characterizing oversmoothing from the view of complete collapse in which representations converge to a single point, we dive into a more general perspective dimensional collapse in which representations lie in a narrow cone. Accordingly, inspired by the power of contrastive learning in preventing dimensional collapse, we propose a novel normalization layer ContraNorm. Intuitively, ContraNorm implicitly shatters representations in the embedding space, leading to a more uniform distribution and slighter dimensional collapse. On the theoretical analysis, we prove that ContraNorm can alleviate both complete collapse and dimensional collapse under some conditions. Our proposed normalization layer can be easily inserted into GNNs and Transformers with negligible parameter overhead. Experiments on various real-world datasets verify the effectiveness of our method.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Xiaojun Guo;Yifei Wang;Tianqi Du;Yisen Wang", "authorids": "~Xiaojun_Guo1;~Yifei_Wang1;~Tianqi_Du1;~Yisen_Wang1", "gender": "F;M;M;M", "homepage": "https://zero-lab-pku.github.io/personwise/guoxiaojun/;https://yifeiwang77.com;https://yisenwang.github.io/;https://github.com/rexdu2003/rexdu.github.io", "dblp": ";00/555-1;172/1346-1;341/5548", "google_scholar": ";-CLy6YsAAAAJ;uMWPDboAAAAJ;nQjREpoAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Xiaojun_Guo1;~Yifei_Wang1;~Yisen_Wang1;~Du_Tianqi1", "aff": "Peking University;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;PhD student;Assistant Professor;Undergrad student", "bibtex": "@inproceedings{\nguo2023contranorm,\ntitle={ContraNorm: A Contrastive Learning Perspective on Oversmoothing and Beyond},\nauthor={Xiaojun Guo and Yifei Wang and Tianqi Du and Yisen Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=SM7XkJouWHm}\n}", "github": "", "project": "", "reviewers": "uxeV;Q9GP;Zh7A;HpT5", "pdf_size": 643729, "recommendation": "6;6;6;8", "confidence": "4;5;3;4", "correctness": "3;4;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;3;2", "wc_summary_paper": "99;27;83;74", "wc_strength_and_weaknesses": "569;231;277;221", "wc_clarity_quality_novelty_and_reproducibility": "38;8;6;150", "wc_summary_review": "173;22;35;48", "wc_review": "879;288;401;493", "wc_reply_reviewers": "141;0;0;0", "wc_reply_authors": "1250;836;833;2013", "reply_reviewers": "1;0;0;0", "reply_authors": "3;3;2;4", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 70.75, 26.799020504488592 ], "wc_strength_and_weaknesses_avg": [ 324.5, 142.73314261235896 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.5, 58.82813952523061 ], "wc_summary_review_avg": [ 69.5, 60.45866356445534 ], "wc_review_avg": [ 515.25, 222.2075325005883 ], "wc_reply_reviewers_avg": [ 35.25, 61.054790966802926 ], "wc_reply_authors_avg": [ 1233.0, 481.221882295475 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10107504895816145562&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=SM7XkJouWHm", "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Aligning Model and Macaque Inferior Temporal Cortex Representations Improves Model-to-Human Behavioral Alignment and Adversarial Robustness", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11087", "id": "SMYdcXjJh1q", "poster": "", "openreview": "https://openreview.net/forum?id=SMYdcXjJh1q", "slides": "https://iclr.cc/virtual/2023/poster/11087", "video": "https://iclr.cc/virtual/2023/poster/11087", "author_site": "Joel Dapello, Kohitij Kar, Martin Schrimpf, Robert Geary, Michael Ferguson, David Cox, James DiCarlo", "tldr": "Aligning late stage model representations with neural recordings from macaque IT broadly improves adversarial robustness and alignment on human behavior.", "abstract": "While some state-of-the-art artificial neural network systems in computer vision are strikingly accurate models of the corresponding primate visual processing, there are still many discrepancies between these models and the behavior of primates on object recognition tasks. Many current models suffer from extreme sensitivity to adversarial attacks and often do not align well with the image-by-image behavioral error patterns observed in humans. Previous research has provided strong evidence that primate object recognition behavior can be very accurately predicted by neural population activity in the inferior temporal (IT) cortex, a brain area in the late stages of the visual processing hierarchy. Therefore, here we directly test whether making the late stage representations of models more similar to that of macaque IT produces new models that exhibit more robust, primate-like behavior. We conducted chronic, large-scale multi-electrode recordings across the IT cortex in six non-human primates (rhesus macaques). We then use these data to fine-tune (end-to-end) the model \"IT\" representations such that they are more aligned with the biological IT representations, while preserving accuracy on object recognition tasks. We generate a cohort of models with a range of IT similarity scores validated on held-out animals across two image sets with distinct statistics. Across a battery of optimization conditions, we observed a strong correlation between the models' IT-likeness and alignment with human behavior, as well as an increase in its adversarial robustness. We further assessed the limitations of this approach and find that the improvements in behavioral alignment and adversarial robustness generalize across different image statistics, but not to object categories outside of those covered in our IT training set. Taken together, our results demonstrate that building models that are more aligned with the primate brain leads to more robust and human-like behavior, and call for larger neural data-sets to further augment these gains.", "keywords": "Computer Vision;Primate Vision;Adversarial Robustness;Behavioral Alignment;Inferior Temporal Cortex", "primary_area": "", "supplementary_material": "/attachment/a2ee5ffe47c5d6cdb6863f3fa7c50a2d74d4add3.zip", "author": "Joel Dapello;Kohitij Kar;Martin Schrimpf;Robert Baldwin Geary;Michael Ferguson;David Daniel Cox;James J. DiCarlo", "authorids": "~Joel_Dapello1;~Kohitij_Kar2;~Martin_Schrimpf1;~Robert_Baldwin_Geary1;~Michael_Ferguson3;~David_Daniel_Cox1;~James_J._DiCarlo1", "gender": "M;;;M;M;;M", "homepage": ";;http://mschrimpf.com/;;;;http://dicarlolab.mit.edu", "dblp": "238/2587;;190/7063;;;48/7659;80/7658", "google_scholar": "4ZnsOa8AAAAJ;;RiZ-RdwAAAAJ;;IXt8KfIAAAAJ;;", "orcid": "0000-0002-6574-097X;;0000-0001-7766-7223;;;;0000-0002-1592-5896", "linkedin": ";;mschrimpf/;rob-geary-122235241;;;james-j-dicarlo/", "or_profile": "~Joel_Dapello1;~Kohitij_Kar2;~Martin_Schrimpf1;~Robert_Baldwin_Geary1;~Michael_Ferguson3;~David_Daniel_Cox1;~James_J._DiCarlo1", "aff": "Altos Labs;;Massachusetts Institute of Technology;Harvard University;Massachusetts Institute of Technology;International Business Machines;Massachusetts Institute of Technology", "aff_domain": "altoslabs.com;;mit.edu;harvard.edu;mit.edu;ibm.com;mit.edu", "position": "Researcher;;Researcher;PhD student;Researcher;IBM Director, MIT-IBM Watson AI Lab;Full Professor", "bibtex": "@inproceedings{\ndapello2023aligning,\ntitle={Aligning Model and Macaque Inferior Temporal Cortex Representations Improves Model-to-Human Behavioral Alignment and Adversarial Robustness},\nauthor={Joel Dapello and Kohitij Kar and Martin Schrimpf and Robert Baldwin Geary and Michael Ferguson and David Daniel Cox and James J. DiCarlo},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=SMYdcXjJh1q}\n}", "github": "", "project": "", "reviewers": "Bkvv;LQZr;KMTQ", "pdf_size": 16500692, "recommendation": "8;8;8", "confidence": "4;4;4", "correctness": "4;4;4", "technical_novelty": "3;4;3", "empirical_novelty": "3;4;4", "wc_summary_paper": "65;33;125", "wc_strength_and_weaknesses": "333;47;582", "wc_clarity_quality_novelty_and_reproducibility": "47;92;84", "wc_summary_review": "48;15;88", "wc_review": "493;187;879", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "302;143;421", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 74.33333333333333, 38.134265722866914 ], "wc_strength_and_weaknesses_avg": [ 320.6666666666667, 218.58687568612672 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 74.33333333333333, 19.601587237318874 ], "wc_summary_review_avg": [ 50.333333333333336, 29.847761874031505 ], "wc_review_avg": [ 519.6666666666666, 283.1364021496039 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 288.6666666666667, 113.88395653275994 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15450149728660711715&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=SMYdcXjJh1q", "email": "altoslabs.com;;mit.edu;harvard.edu;mit.edu;ibm.com;mit.edu", "author_num": 7, "aff_unique_index": "0;1;2;1;3;1", "aff_unique_norm": "Altos Labs;Massachusetts Institute of Technology;Harvard University;International Business Machines Corporation", "aff_unique_dep": ";;;", "aff_unique_url": "https://altoslabs.com;https://web.mit.edu;https://www.harvard.edu;https://www.ibm.com", "aff_unique_abbr": ";MIT;Harvard;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Draft, Sketch, and Prove: Guiding Formal Theorem Provers with Informal Proofs", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11536", "id": "SMa9EAovKMC", "poster": "/media/PosterPDFs/ICLR%202023/11536.png?t=1682418006.7213085", "openreview": "https://openreview.net/forum?id=SMa9EAovKMC", "slides": "https://iclr.cc/virtual/2023/poster/11536", "video": "https://iclr.cc/virtual/2023/poster/11536", "author_site": "Qiaochu Jiang, Sean Welleck, Jin Zhou, Timoth\u00e9e Lacroix, Jiacheng Liu, Wenda Li, Mateja Jamnik, Guillaume Lample, Yuhuai Wu", "tldr": "", "abstract": "The formalization of existing mathematical proofs is a notoriously difficult process. Despite decades of research on automation and proof assistants, writing formal proofs remains arduous and only accessible to a few experts. While previous studies to automate formalization focused on powerful search algorithms, no attempts were made to take advantage of available informal proofs. In this work, we introduce Draft, Sketch, and Prove (DSP), a method that maps informal proofs to formal proof sketches, and uses the sketches to guide an automated prover by directing its search to easier sub-problems. We investigate two relevant setups where informal proofs are either written by humans or generated by a language model. Our experiments and ablation studies show that large language models are able to produce well-structured formal sketches that follow the same reasoning steps as the informal proofs. Guiding an automated prover with these sketches enhances its performance from $20.9\\%$ to $39.3\\%$ on a collection of mathematical competition problems.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/8c607c5975302ad8ab559737ea1172dc947f1f66.zip", "author": "Albert Qiaochu Jiang;Sean Welleck;Jin Peng Zhou;Timothee Lacroix;Jiacheng Liu;Wenda Li;Mateja Jamnik;Guillaume Lample;Yuhuai Wu", "authorids": "~Albert_Qiaochu_Jiang1;~Sean_Welleck1;~Jin_Peng_Zhou1;~Timothee_Lacroix1;~Jiacheng_Liu2;~Wenda_Li1;~Mateja_Jamnik1;~Guillaume_Lample1;~Yuhuai_Wu1", "gender": ";;M;M;M;M;F;M;M", "homepage": ";;;;https://github.com/liujch1998;https://wenda302.github.io;http://www.cl.cam.ac.uk/~mj201;;http://www.cs.toronto.edu/~ywu/", "dblp": ";;255/1107;https://dblp.org/pers/l/Lacroix:Timoth=eacute=e.html;289/6273;132/9868.html;41/1392;;", "google_scholar": ";;Nf48jqcAAAAJ;tZGS6dIAAAAJ;GJfoBZAAAAAJ;ufYxQkEAAAAJ;d5QiyJkAAAAJ;H7sVDmIAAAAJ;https://scholar.google.ca/citations?user=bOQGfFIAAAAJ", "orcid": ";;;;0000-0003-3308-2869;;0000-0003-2772-2532;;", "linkedin": ";;https://ca.linkedin.com/in/jinpeng-zhou;;liujch1998/;;;;", "or_profile": "~Albert_Qiaochu_Jiang1;~Sean_Welleck1;~Jin_Peng_Zhou1;~Timothee_Lacroix1;~Jiacheng_Liu2;~Wenda_Li1;~Mateja_Jamnik1;~Guillaume_Lample1;~Yuhuai_Wu1", "aff": ";;Department of Computer Science, Cornell University;Meta Facebook;Meta Facebook;University of Cambridge;University of Cambridge;Meta Facebook;Stanford University", "aff_domain": ";;cs.cornell.edu;fb.com;meta.com;cam.ac.uk;cam.ac.uk;fb.com;stanford.edu", "position": ";;PhD student;Research Engineer;Intern;Postdoc;Professor in Artificial Intelligence;Researcher;Postdoc", "bibtex": "@inproceedings{\njiang2023draft,\ntitle={Draft, Sketch, and Prove: Guiding Formal Theorem Provers with Informal Proofs},\nauthor={Albert Qiaochu Jiang and Sean Welleck and Jin Peng Zhou and Timothee Lacroix and Jiacheng Liu and Wenda Li and Mateja Jamnik and Guillaume Lample and Yuhuai Wu},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=SMa9EAovKMC}\n}", "github": "", "project": "", "reviewers": "9yAP;AVNg;AYPt;7zK6", "pdf_size": 2833306, "recommendation": "6;8;8;8", "confidence": "2;2;5;5", "correctness": "3;4;3;4", "technical_novelty": "3;3;3;4", "empirical_novelty": "3;3;3;4", "wc_summary_paper": "190;161;108;63", "wc_strength_and_weaknesses": "360;190;283;69", "wc_clarity_quality_novelty_and_reproducibility": "175;46;34;25", "wc_summary_review": "62;27;36;21", "wc_review": "787;424;461;178", "wc_reply_reviewers": "0;162;0;0", "wc_reply_authors": "1491;844;523;158", "reply_reviewers": "0;2;0;0", "reply_authors": "4;3;1;1", "recommendation_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 1.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 130.5, 48.81854155953453 ], "wc_strength_and_weaknesses_avg": [ 225.5, 108.56910241869001 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.0, 61.077819214506995 ], "wc_summary_review_avg": [ 36.5, 15.660459763365825 ], "wc_review_avg": [ 462.5, 216.63621580889932 ], "wc_reply_reviewers_avg": [ 40.5, 70.14805770653953 ], "wc_reply_authors_avg": [ 754.0, 489.85865308270303 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": 0.5773502691896258, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 172, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12947145545570780140&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=SMa9EAovKMC", "email": ";;cs.cornell.edu;fb.com;meta.com;cam.ac.uk;cam.ac.uk;fb.com;stanford.edu", "author_num": 9, "aff_unique_index": "0;1;1;2;2;1;3", "aff_unique_norm": "Cornell University;Meta;University of Cambridge;Stanford University", "aff_unique_dep": "Department of Computer Science;Meta Platforms, Inc.;;", "aff_unique_url": "https://www.cornell.edu;https://meta.com;https://www.cam.ac.uk;https://www.stanford.edu", "aff_unique_abbr": "Cornell;Meta;Cambridge;Stanford", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Cambridge;Stanford", "aff_country_unique_index": "0;0;0;1;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "SNONkz5zEUF", "title": "Convergence Analysis of Split Learning on Non-IID Data", "track": "main", "status": "Reject", "tldr": "Convergence Analysis of Split Learning on Non-IID Data", "abstract": "Split Learning (SL) is one promising variant of Federated Learning (FL), where the AI model is split and trained at the clients and the server collaboratively. By offloading the computation-intensive portions to the server, SL enables efficient model training on resource-constrained clients. Despite its booming applications, SL still lacks rigorous convergence analysis on non-IID data, which is critical for hyperparameter selection. In this paper, we first prove that SL exhibits an $\\mathcal{O}(1/\\sqrt{T})$ convergence rate for non-convex objectives on non-IID data, where $T$ is the number of total steps. By comparing the convergence analysis and experimental results, SL can outperform FL in terms of convergence rate (w.r.t. per-client training/communication rounds, and hence, the computation efficiency) and exhibit comparable accuracy to FL on mildly non-IID data. In contrast, FL prevails on highly non-IID data.", "keywords": "Federated Learning;Split Leanring;Convergence analysis", "primary_area": "", "supplementary_material": "/attachment/d9fac26fb41fe71a85ed93dfa4a3f2cbd42758ff.zip", "author": "Yipeng Li;Xinchen Lyu", "authorids": "~Yipeng_Li1;~Xinchen_Lyu1", "gender": "M;M", "homepage": "https://liyipeng00.github.io/;", "dblp": ";179/9891", "google_scholar": ";https://scholar.google.com.hk/citations?user=eZOOkb4AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Yipeng_Li1;~Xinchen_Lyu1", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications", "aff_domain": "bupt.edu.cn;bupt.edu.cn", "position": "MS student;Associate Professor", "bibtex": "@misc{\nli2023convergence,\ntitle={Convergence Analysis of Split Learning on Non-{IID} Data},\nauthor={Yipeng Li and Xinchen Lyu},\nyear={2023},\nurl={https://openreview.net/forum?id=SNONkz5zEUF}\n}", "github": "", "project": "", "reviewers": "4bP6;434u;SLb6", "site": "https://openreview.net/forum?id=SNONkz5zEUF", "pdf_size": 962604, "recommendation": "5;6;6", "confidence": "4;4;3", "correctness": "3;4;3", "technical_novelty": "2;4;2", "empirical_novelty": "2;4;2", "wc_summary_paper": "64;46;23", "wc_strength_and_weaknesses": "246;363;2", "wc_clarity_quality_novelty_and_reproducibility": "65;71;2", "wc_summary_review": "125;28;367", "wc_review": "500;508;394", "wc_reply_reviewers": "0;235;204", "wc_reply_authors": "1558;1653;2124", "reply_reviewers": "0;3;4", "reply_authors": "3;4;5", "recommendation_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_paper_avg": [ 44.333333333333336, 16.779617264870957 ], "wc_strength_and_weaknesses_avg": [ 203.66666666666666, 150.38690841367216 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 46.0, 31.20897306865447 ], "wc_summary_review_avg": [ 173.33333333333334, 142.55369358790938 ], "wc_review_avg": [ 467.3333333333333, 51.95724738239657 ], "wc_reply_reviewers_avg": [ 146.33333333333334, 104.24437101776235 ], "wc_reply_authors_avg": [ 1778.3333333333333, 247.48108794186453 ], "reply_reviewers_avg": [ 2.3333333333333335, 1.699673171197595 ], "reply_authors_avg": [ 4.0, 0.816496580927726 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications", "aff_unique_dep": "", "aff_unique_url": "http://www.bupt.edu.cn/", "aff_unique_abbr": "BUPT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "SNZxVIFZBIq", "title": "Radial Spike and Slab Bayesian Neural Networks for Sparse Data in Ransomware Attacks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Ransomware attacks are increasing at an alarming rate, leading to large financial losses, unrecov- erable encrypted data, data leakage, and privacy concerns. The prompt detection of ransomware attacks is required to minimize further damage, particularly during the encryption stage. However, the frequency and structure of the observed ransomware attack data makes this task difficult to accomplish in practice. The data corresponding to ransomware attacks represents temporal, high- dimensional sparse signals, with limited records and very imbalanced classes. While traditional deep learning models have been able to achieve state-of-the-art results in a wide variety of domains, Bayesian Neural Networks, which are a class of probabilistic models, are better suited to the issues of the ransomware data. These models combine ideas from Bayesian statistics with the rich expres- sive power of neural networks. In this paper, we propose the Radial Spike and Slab Bayesian Neural Network, which is a new type of Bayesian Neural network that includes a new form of the approx- imate posterior distribution. The model scales well to large architectures and recovers the sparse structure of target functions. We provide a theoretical justification for using this type of distribution, as well as a computationally efficient method to perform variational inference. We demonstrate the performance of our model on a real dataset of ransomware attacks and show improvement over a large number of baselines, including state-of-the-art models such as Neural ODEs (ordinary dif- ferential equations). In addition, we propose to represent low-level events as MITRE ATT&CK tactics, techniques, and procedures (TTPs) which allows the model to better generalize to unseen ransomware attacks.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Jurijs Nazarovs;Jack W Stokes;Melissa Turcotte;Justin Carroll;Itai Grady", "authorids": "~Jurijs_Nazarovs1;~Jack_W_Stokes1;~Melissa_Turcotte1;justin.carroll@microsoft.com;igrady@microsoft.com", "gender": "M;;;;", "homepage": "https://stat.wisc.edu/staff/nazarovs-jurijs/;;;;", "dblp": ";;;;", "google_scholar": ";;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Jurijs_Nazarovs1;~Jack_W_Stokes1;~Melissa_Turcotte1;justin.carroll@microsoft.com;igrady@microsoft.com", "aff": "University of Wisconsin, Madison;;;;", "aff_domain": "wisc.edu;;;;", "position": "PhD student;;;;", "bibtex": "@misc{\nnazarovs2023radial,\ntitle={Radial Spike and Slab Bayesian Neural Networks for Sparse Data in Ransomware Attacks},\nauthor={Jurijs Nazarovs and Jack W Stokes and Melissa Turcotte and Justin Carroll and Itai Grady},\nyear={2023},\nurl={https://openreview.net/forum?id=SNZxVIFZBIq}\n}", "github": "", "project": "", "reviewers": "F9pu;qQmb;zqwu", "site": "https://openreview.net/forum?id=SNZxVIFZBIq", "pdf_size": 1082627, "recommendation": "3;5;6", "confidence": "4;4;3", "correctness": "3;4;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;2", "wc_summary_paper": "105;220;76", "wc_strength_and_weaknesses": "431;75;100", "wc_clarity_quality_novelty_and_reproducibility": "27;170;40", "wc_summary_review": "48;59;20", "wc_review": "611;524;236", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "736;344;316", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 133.66666666666666, 62.184313849144374 ], "wc_strength_and_weaknesses_avg": [ 202.0, 162.24878017004215 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 79.0, 64.56521251158914 ], "wc_summary_review_avg": [ 42.333333333333336, 16.418147141366337 ], "wc_review_avg": [ 457.0, 160.25604512778918 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 465.3333333333333, 191.73129345229194 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.7559289460184545, "corr_recommendation_correctness": 0.18898223650461363, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3775587112430585665&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 5, "aff_unique_index": "0", "aff_unique_norm": "University of Wisconsin", "aff_unique_dep": "", "aff_unique_url": "https://www.wisc.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Revisiting the Entropy Semiring for Neural Speech Recognition", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12175", "id": "SNgLnzFQeiD", "poster": "", "openreview": "https://openreview.net/forum?id=SNgLnzFQeiD", "slides": "https://iclr.cc/virtual/2023/poster/12175", "video": "https://iclr.cc/virtual/2023/poster/12175", "author_site": "Oscar Chang, Dongseong Hwang, Olivier Siohan", "tldr": "A numerically stable open-source implementation of the entropy semiring for CTC and RNN-T; obtained SOTA on Librispeech streaming.", "abstract": "In streaming settings, speech recognition models have to map sub-sequences of speech to text before the full audio stream becomes available. However, since alignment information between speech and text is rarely available during training, models need to learn it in a completely self-supervised way. In practice, the exponential number of possible alignments makes this extremely challenging, with models often learning peaky or sub-optimal alignments. Prima facie, the exponential nature of the alignment space makes it difficult to even quantify the uncertainty of a model's alignment distribution. Fortunately, it has been known for decades that the entropy of a probabilistic finite state transducer can be computed in time linear to the size of the transducer via a dynamic programming reduction based on semirings. In this work, we revisit the entropy semiring for neural speech recognition models, and show how alignment entropy can be used to supervise models through regularization or distillation. We also contribute an open-source implementation of CTC and RNN-T in the semiring framework that includes numerically stable and highly parallel variants of the entropy semiring. Empirically, we observe that the addition of alignment distillation improves the accuracy and latency of an already well-optimized teacher-student distillation model, achieving state-of-the-art performance on the Librispeech dataset in the streaming scenario.", "keywords": "semiring;asr;ctc;rnn-t;entropy;regularization;distillation;streaming;speech recognition", "primary_area": "", "supplementary_material": "/attachment/ff55320be5ba1def2b9255f1fa67b64ad2352e3d.zip", "author": "Oscar Chang;Dongseong Hwang;Olivier Siohan", "authorids": "~Oscar_Chang1;~Dongseong_Hwang1;~Olivier_Siohan1", "gender": "Unspecified;M;", "homepage": ";https://github.com/ds-hwang;", "dblp": "45/984;303/4326.html;66/58", "google_scholar": "eEVU18YAAAAJ;YePvlA8AAAAJ;yMA0gioAAAAJ", "orcid": ";0000-0001-7369-5390;0000-0001-9749-4091", "linkedin": ";dongseong-hwang-5ba79547/;", "or_profile": "~Oscar_Chang1;~Dongseong_Hwang1;~Olivier_Siohan1", "aff": "Google;Google;Google", "aff_domain": "google.com;google.com;google.com", "position": "Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nchang2023revisiting,\ntitle={Revisiting the Entropy Semiring for Neural Speech Recognition},\nauthor={Oscar Chang and Dongseong Hwang and Olivier Siohan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=SNgLnzFQeiD}\n}", "github": "", "project": "", "reviewers": "3ceX;NK1H;dC8C;jsaa", "pdf_size": 892168, "recommendation": "6;8;10;10", "confidence": "3;3;4;5", "correctness": "3;4;4;3", "technical_novelty": "3;3;4;3", "empirical_novelty": "3;3;4;4", "wc_summary_paper": "38;194;109;95", "wc_strength_and_weaknesses": "75;108;37;100", "wc_clarity_quality_novelty_and_reproducibility": "31;180;16;106", "wc_summary_review": "104;71;19;118", "wc_review": "248;553;181;419", "wc_reply_reviewers": "0;0;0;19", "wc_reply_authors": "392;836;12;492", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "recommendation_avg": [ 8.5, 1.6583123951777 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 109.0, 55.81666417836164 ], "wc_strength_and_weaknesses_avg": [ 80.0, 27.649593125396983 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.25, 65.44224552993273 ], "wc_summary_review_avg": [ 78.0, 38.09855640309748 ], "wc_review_avg": [ 350.25, 145.7178352158719 ], "wc_reply_reviewers_avg": [ 4.75, 8.227241335952167 ], "wc_reply_authors_avg": [ 433.0, 293.60347409388737 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.8181818181818182, "corr_recommendation_correctness": 0.30151134457776363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11367801936358018524&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=SNgLnzFQeiD", "email": "google.com;google.com;google.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Near-Optimal Deployment Efficiency in Reward-Free Reinforcement Learning with Linear Function Approximation", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11300", "id": "SNwH0dDGl7_", "poster": "/media/PosterPDFs/ICLR%202023/11300.png?t=1680764332.0446444", "openreview": "https://openreview.net/forum?id=SNwH0dDGl7_", "slides": "https://iclr.cc/virtual/2023/poster/11300", "video": "https://iclr.cc/virtual/2023/poster/11300", "author_site": "Dan Qiao, Yu-Xiang Wang", "tldr": "We design algorithms for reward free RL under linear MDP with near-optimal deployment complexity and sample complexity.", "abstract": "We study the problem of deployment efficient reinforcement learning (RL) with linear function approximation under the \\emph{reward-free} exploration setting. This is a well-motivated problem because deploying new policies is costly in real-life RL applications. Under the linear MDP setting with feature dimension $d$ and planning horizon $H$, we propose a new algorithm that collects at most $\\widetilde{O}(\\frac{d^2H^5}{\\epsilon^2})$ trajectories within $H$ deployments to identify $\\epsilon$-optimal policy for any (possibly data-dependent) choice of reward functions. To the best of our knowledge, our approach is the first to achieve optimal deployment complexity and optimal $d$ dependence in sample complexity at the same time, even if the reward is known ahead of time. Our novel techniques include an exploration-preserving policy discretization and a generalized G-optimal experiment design, which could be of independent interest. Lastly, we analyze the related problem of regret minimization in low-adaptive RL and provide information-theoretic lower bounds for switching cost and batch complexity.", "keywords": "Reinforcement Learning;Deployment efficiency;Reward free RL;Low adaptive RL", "primary_area": "", "supplementary_material": "/attachment/7a1abab21e5eef9eaadb5eefa3223910c8005728.zip", "author": "Dan Qiao;Yu-Xiang Wang", "authorids": "~Dan_Qiao1;~Yu-Xiang_Wang1", "gender": "M;", "homepage": ";http://www.cs.ucsb.edu/~yuxiangw/publications.html", "dblp": ";62/1637-3.html", "google_scholar": "EyfAUuUAAAAJ;HGNZ1fkAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Dan_Qiao1;~Yu-Xiang_Wang1", "aff": ", University of California, Santa Barbara;UC Santa Barbara", "aff_domain": "cs.ucsb.edu;ucsb.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nqiao2023nearoptimal,\ntitle={Near-Optimal Deployment Efficiency in Reward-Free Reinforcement Learning with Linear Function Approximation},\nauthor={Dan Qiao and Yu-Xiang Wang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=SNwH0dDGl7_}\n}", "github": "", "project": "", "reviewers": "y46E;i7oS;idiY;W2e3", "pdf_size": 549528, "recommendation": "5;6;6;6", "confidence": "4;3;3;2", "correctness": "3;3;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "0;0;0;0", "wc_summary_paper": "60;31;56;23", "wc_strength_and_weaknesses": "431;150;130;94", "wc_clarity_quality_novelty_and_reproducibility": "29;21;10;20", "wc_summary_review": "50;18;16;25", "wc_review": "570;220;212;162", "wc_reply_reviewers": "360;0;0;28", "wc_reply_authors": "1655;513;365;448", "reply_reviewers": "1;0;0;1", "reply_authors": "3;1;1;1", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 0, 0 ], "wc_summary_paper_avg": [ 42.5, 15.819292019556375 ], "wc_strength_and_weaknesses_avg": [ 201.25, 134.15546019450719 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 20.0, 6.745368781616021 ], "wc_summary_review_avg": [ 27.25, 13.5531361684298 ], "wc_review_avg": [ 291.0, 162.60688792299052 ], "wc_reply_reviewers_avg": [ 97.0, 152.27278154680172 ], "wc_reply_authors_avg": [ 745.25, 527.8571658128741 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.816496580927726, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12455642668856852397&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=SNwH0dDGl7_", "email": "cs.ucsb.edu;ucsb.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Santa Barbara", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsb.edu", "aff_unique_abbr": "UCSB", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Santa Barbara", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "SNzzt94tGzP", "title": "Learn to Know Unknowns: A Bionic Memory Network for Unsupervised Anomaly Detection", "track": "main", "status": "Reject", "tldr": "We proposed a biomimetic neural network for unsupervised anomaly detection inspired by the hippocampus-cortex cascade, enabling the model to know the unknowns.", "abstract": "Is generalization always beneficial? Over-strong generalization induces the model insensitive to anomalies. Unsupervised anomaly detection requires only unlabeled non-anomalous data to learn and generalize normal patterns, which results in a modest reconstruction error when reconstructing normal instances and a significant reconstruction error when reconstructing anomalies. However, over-strong generalization leads to the indistinguishable reconstruction error of normal instances and anomalies, which means that the model well reconstructs the unknown anomalies, resulting in unnoticeable reconstruction error. Inspired by the cascade structure of the hippocampus and cortex in human brain memory, we proposed a re-representation memory network called Random Forgetting Twin Memory (RFTM) to decompose the latent space and introduce a configurable reintegration mechanism to suppress overgeneralization. RFTM shows striking brain-like memory characteristics, which enables the model to know what it does not know. RFTM has the convenience of a single line of code boosting at the model level without adding any additional extra loss terms at the loss function level. RFTM-based models have achieved state-of-the-art experimental results on different public benchmarks.", "keywords": "Unsupervised learning;Anomaly detection;Memory bank", "primary_area": "", "supplementary_material": "", "author": "Jiahao Li;Yiqiang Chen;Yunbing Xing", "authorids": "~Jiahao_Li6;~Yiqiang_Chen1;xingyunbing@ict.ac.cn", "gender": "M;M;", "homepage": "https://lijiahao-alex.github.io/homepage/;http://www.ict.cas.cn/sourcedb_2018_ict_cas/cn/jssrck/200909/t20090917_2496596.html;", "dblp": ";;", "google_scholar": "aNFn6jkAAAAJ;LC3SwhEAAAAJ;", "orcid": "0000-0001-6011-688X;;", "linkedin": ";;", "or_profile": "~Jiahao_Li6;~Yiqiang_Chen1;xingyunbing@ict.ac.cn", "aff": "Chinese Academy of Sciences;Chinese Academy of Sciences;", "aff_domain": "ict.ac.cn;ict.ac.cn;", "position": "PhD student;Full Professor;", "bibtex": "@misc{\nli2023learn,\ntitle={Learn to Know Unknowns: A Bionic Memory Network for Unsupervised Anomaly Detection},\nauthor={Jiahao Li and Yiqiang Chen and Yunbing Xing},\nyear={2023},\nurl={https://openreview.net/forum?id=SNzzt94tGzP}\n}", "github": "", "project": "", "reviewers": "P1F7;c8kV;8c7P;ZPFH", "site": "https://openreview.net/forum?id=SNzzt94tGzP", "pdf_size": 6682996, "recommendation": "3;3;5;5", "confidence": "4;5;3;3", "correctness": "1;2;4;3", "technical_novelty": "1;1;2;2", "empirical_novelty": "1;0;3;2", "wc_summary_paper": "89;32;52;123", "wc_strength_and_weaknesses": "187;369;134;209", "wc_clarity_quality_novelty_and_reproducibility": "100;22;61;82", "wc_summary_review": "41;18;54;26", "wc_review": "417;441;301;440", "wc_reply_reviewers": "92;0;0;0", "wc_reply_authors": "1346;579;361;840", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;2", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.5, 1.118033988749895 ], "technical_novelty_avg": [ 1.5, 0.5 ], "empirical_novelty_avg": [ 1.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 74.0, 34.90701935141412 ], "wc_strength_and_weaknesses_avg": [ 224.75, 87.63097340552596 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.25, 29.03769102390891 ], "wc_summary_review_avg": [ 34.75, 13.845125496000389 ], "wc_review_avg": [ 399.75, 57.8159796250137 ], "wc_reply_reviewers_avg": [ 23.0, 39.83716857408418 ], "wc_reply_authors_avg": [ 781.5, 367.3925012843893 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9045340337332909, "corr_recommendation_correctness": 0.8944271909999159, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lDzqO2uSZ_4J:scholar.google.com/&scioq=Learn+to+Know+Unknowns:+A+Bionic+Memory+Network+for+Unsupervised+Anomaly+Detection&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "", "aff_unique_url": "https://www.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Analogy-Forming Transformers for Few-Shot 3D Parsing", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10990", "id": "SRIQZTh0IK", "poster": "/media/PosterPDFs/ICLR%202023/10990.png?t=1681628231.142513", "openreview": "https://openreview.net/forum?id=SRIQZTh0IK", "slides": "https://iclr.cc/virtual/2023/poster/10990", "video": "https://iclr.cc/virtual/2023/poster/10990", "author_site": "Nikolaos Gkanatsios, Mayank Singh, Zhaoyuan Fang, Shubham Tulsiani, Katerina Fragkiadaki", "tldr": "", "abstract": "We present Analogical Networks, a model that segments 3D object scenes with analogical reasoning: instead of mapping a scene to part segments directly, our model first retrieves related scenes from memory and their corresponding part structures, and then predicts analogous part structures in the input object 3D point cloud, via an end-to-end learnable modulation mechanism. By conditioning on more than one retrieved memories, compositions of structures are predicted, that mix and match parts across the retrieved memories. One-shot, few-shot or many-shot learning are treated uniformly in Analogical Networks, by conditioning on the appropriate set of memories, whether taken from a single, few or many memory exemplars, and inferring analogous parses. We show Analogical Networks are competitive with state-of-the-art 3D segmentation transformer in many-shot settings and outperform them and existing paradigms of meta-learning and few-shot learning in few-shot scenarios. Our model successfully parses instances of novel object categories simply by expanding its memory, without any weight updates.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Nikolaos Gkanatsios;Mayank Singh;Zhaoyuan Fang;Shubham Tulsiani;Katerina Fragkiadaki", "authorids": "~Nikolaos_Gkanatsios1;~Mayank_Singh2;~Zhaoyuan_Fang1;~Shubham_Tulsiani1;~Katerina_Fragkiadaki1", "gender": "M;M;;M;F", "homepage": "https://nickgkan.github.io/;https://msingh27.github.io/;;https://shubhtuls.github.io/;https://www.cs.cmu.edu/~katef/", "dblp": "225/5677;96/4770-7;;135/6623;21/8780", "google_scholar": "https://scholar.google.gr/citations?user=jk7GqOEAAAAJ;RgeKqSAAAAAJ;;06rffEkAAAAJ;FWp7728AAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Nikolaos_Gkanatsios1;~Mayank_Singh2;~Zhaoyuan_Fang1;~Shubham_Tulsiani1;~Katerina_Fragkiadaki1", "aff": "Carnegie Mellon University;;;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;;;cmu.edu;cmu.edu", "position": "Graduate student;;;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ngkanatsios2023analogyforming,\ntitle={Analogy-Forming Transformers for Few-Shot 3D Parsing},\nauthor={Nikolaos Gkanatsios and Mayank Singh and Zhaoyuan Fang and Shubham Tulsiani and Katerina Fragkiadaki},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=SRIQZTh0IK}\n}", "github": "", "project": "", "reviewers": "mbsn;edrP;feEi;MKmk", "pdf_size": 5184366, "recommendation": "5;6;8;8", "confidence": "4;4;3;4", "correctness": "3;3;4;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "38;73;96;79", "wc_strength_and_weaknesses": "162;13;71;324", "wc_clarity_quality_novelty_and_reproducibility": "20;84;108;192", "wc_summary_review": "48;23;52;106", "wc_review": "268;193;327;701", "wc_reply_reviewers": "217;0;0;24", "wc_reply_authors": "1943;765;371;1827", "reply_reviewers": "2;0;0;1", "reply_authors": "5;1;1;4", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 71.5, 21.10094784600919 ], "wc_strength_and_weaknesses_avg": [ 142.5, 117.47872147755099 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 101.0, 61.603571325045756 ], "wc_summary_review_avg": [ 57.25, 30.26032881513352 ], "wc_review_avg": [ 372.25, 195.6545105536798 ], "wc_reply_reviewers_avg": [ 60.25, 91.0284982848778 ], "wc_reply_authors_avg": [ 1226.5, 674.3209547389137 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 1.7853571071357126 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5555555555555555, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3088051357495606259&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=SRIQZTh0IK", "email": "cmu.edu;;;cmu.edu;cmu.edu", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "SUcUqu_X30", "title": "Attention De-sparsification Matters: Inducing Diversity in Digital Pathology Representation Learning", "track": "main", "status": "Reject", "tldr": "We introduce Di-SSL, a diversity-inducing self-supervised learning method to enhance the representation learning in Digital Pathology.", "abstract": "In this work, we develop Di-SSL, a Diversity-inducing Self-Supervised Learning technique for histopathology image analysis. SSL techniques, such as contrastive and non-contrastive approaches, have been shown to learn rich and effective rep- resentations without any human supervision. Lately, computational pathology has also benefited from the resounding success of SSL. In this work, we develop a novel domain-aware pretext task to enhance representation learning in digital pathology. Our analysis of vanilla SSL-pretrained models\u2019 attention distribution reveals an insightful observation: sparsity in attention, i.e, models tends to localize most of their attention to some prominent patterns in the image. Although atten- tion sparsity can be beneficial in natural images due to these prominent patterns being the object of interest itself, this can be sub-optimal in digital pathology; this is because, unlike natural images, digital pathology scans are not object-centric, but rather a complex phenotype of various spatially intermixed biological com- ponents. Inadequate diversification of attention in these complex images could result in crucial information loss. To address this, we first leverage cell segmenta- tion to densely extract multiple histopathology-specific representations. We then propose a dense pretext task for SSL, designed to match the multiple correspond- ing representations between the views. Through this, the model learns to attend to various components more closely and evenly, thus inducing adequate diversi- fication in attention for capturing context rich representations. Through quantita- tive and qualitative analysis on multiple slide-level tasks across cancer types, and patch-level classification tasks, we demonstrate the efficacy of our method and observe that the attention is more globally distributed. Specifically, we obtain a relative improvement in accuracy of up to 6.9% in slide-level and 2% in patch level classification tasks (corresponding AUC improvement up to 7.9% and 0.7%, respectively) over a baseline SSL model.", "keywords": "Computational pathology;Cell segmentation;Self supervised learning;Vision Transformer;Sparse attention", "primary_area": "", "supplementary_material": "", "author": "Saarthak Kapse;Srijan Das;Jingwei Zhang;Rajarsi R. Gupta;Joel Saltz;Dimitris Samaras;Prateek Prasanna", "authorids": "~Saarthak_Kapse1;~Srijan_Das1;~Jingwei_Zhang7;~Rajarsi_R._Gupta1;~Joel_Saltz1;~Dimitris_Samaras3;~Prateek_Prasanna3", "gender": "M;M;M;M;M;M;M", "homepage": "https://saarthak-kapse.github.io/;https://srijandas07.github.io/;;;https://bmi.stonybrookmedicine.edu/people/joel_saltz;https://www.cs.stonybrook.edu/~samaras/;https://you.stonybrook.edu/imaginelab/", "dblp": "270/3809;173/0062;;211/7730;s/JoelHSaltz.html;s/DimitrisSamaras;133/6611", "google_scholar": "WgDf0fcAAAAJ;ZDTF5AEAAAAJ;8yA5YncAAAAJ;LcEPA3cAAAAJ;_0dkufgAAAAJ;https://scholar.google.com/citations?hl=en;uyA1Q18AAAAJ", "orcid": "0000-0002-5426-4111;;0000-0001-6878-5586;0000-0002-1577-8718;0000-0002-3451-2165;0000-0002-1373-0294;", "linkedin": "saarthak-kapse-77b49116b/;;;;joel-saltz-5a68ba16/;;", "or_profile": "~Saarthak_Kapse1;~Srijan_Das1;~Jingwei_Zhang7;~Rajarsi_R._Gupta1;~Joel_Saltz1;~Dimitris_Samaras3;~Prateek_Prasanna3", "aff": "State University of New York at Stony Brook;University of North Carolina at Charlotte;, State University of New York at Stony Brook;Academic medical center at State University of New York at Stony Brook;Academic medical center at State University of New York at Stony Brook;Stony Brook University;State University of New York, Stony Brook", "aff_domain": "stonybrook.edu;uncc.edu;cs.stonybrook.edu;stonybrookmedicine.edu;stonybrookmedicine.edu;cs.stonybrook.edu;stonybrook.edu", "position": "PhD student;Assistant Professor;PhD student;Assistant Professor;Full Professor;Full Professor;Assistant Professor", "bibtex": "@misc{\nkapse2023attention,\ntitle={Attention De-sparsification Matters: Inducing Diversity in Digital Pathology Representation Learning},\nauthor={Saarthak Kapse and Srijan Das and Jingwei Zhang and Rajarsi R. Gupta and Joel Saltz and Dimitris Samaras and Prateek Prasanna},\nyear={2023},\nurl={https://openreview.net/forum?id=SUcUqu_X30}\n}", "github": "", "project": "", "reviewers": "6NiW;nwTd;vgEj", "site": "https://openreview.net/forum?id=SUcUqu_X30", "pdf_size": 6271357, "recommendation": "6;6;6", "confidence": "4;4;4", "correctness": "3;3;3", "technical_novelty": "3;2;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "75;60;253", "wc_strength_and_weaknesses": "178;313;284", "wc_clarity_quality_novelty_and_reproducibility": "41;37;127", "wc_summary_review": "47;81;40", "wc_review": "341;491;704", "wc_reply_reviewers": "0;50;394", "wc_reply_authors": "631;1635;1674", "reply_reviewers": "0;2;2", "reply_authors": "1;5;6", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 129.33333333333334, 87.65969554032355 ], "wc_strength_and_weaknesses_avg": [ 258.3333333333333, 58.02489887013065 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.33333333333333, 41.51572660517404 ], "wc_summary_review_avg": [ 56.0, 17.90716802475106 ], "wc_review_avg": [ 512.0, 148.93622796351463 ], "wc_reply_reviewers_avg": [ 148.0, 175.14184727433553 ], "wc_reply_authors_avg": [ 1313.3333333333333, 482.7451593634978 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.9428090415820634 ], "reply_authors_avg": [ 4.0, 2.160246899469287 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5714268715148740378&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0;0;0;2;3", "aff_unique_norm": "State University of New York at Stony Brook;University of North Carolina at Charlotte;Stony Brook University;State University of New York", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.stonybrook.edu;https://www.uncc.edu;https://www.stonybrook.edu;https://www.stonybrook.edu", "aff_unique_abbr": "SUNY Stony Brook;UNCC;SBU;SUNY Stony Brook", "aff_campus_unique_index": "0;1;0;0;0;0", "aff_campus_unique": "Stony Brook;Charlotte;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "CASR: Generating Complex Sequences with Autoregressive Self-Boost Refinement", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10749", "id": "SVl1w1u3InX", "poster": "/media/PosterPDFs/ICLR%202023/10749.png?t=1680795348.581872", "openreview": "https://openreview.net/forum?id=SVl1w1u3InX", "slides": "https://iclr.cc/virtual/2023/poster/10749", "video": "https://iclr.cc/virtual/2023/poster/10749", "author_site": "Hongwei Han, Mengyu Zhou, Shi Han, Xiu Li, Dongmei Zhang", "tldr": "CASR improves left-to-right autoregressive generation without heuristic intermediate sequences for complex answers via self-boost refinement", "abstract": "There are sequence generation tasks where the best order to generate the target sequence is not left-to-right. For example, an answer to the Sudoku game, a structured code like s-expression, and even a logical natural language answer where the analysis may be generated after the decision. We define the target sequences of those tasks as complex sequences. Obviously, a complex sequence should be constructed with multiple logical steps, and has dependencies among each part of itself (e.g. decisions depend on analyses). It's a great challenge for the classic left-to-right autoregressive generation system to generate complex sequences. Current approaches improve one-pass left-to-right generation on NLG tasks by generating different heuristic intermediate sequences in multiple stages. However, for complex sequences, the heuristic rules to break down them may hurt performance, and increase additional exposure bias. To tackle these challenges, we propose a PLM-friendly autoregressive self-boost refinement framework, CASR. When training, CASR inputs the predictions generated by the model itself at the previous refinement step (instead of those produced by heuristic rules). To find an optimal design, we also discuss model architecture, parameter efficiency and initialization strategy. By evaluating CASR on Sudoku, WebQSP, MTOP and KVRET through controlled experiments and empirical studies, we find that CASR produces high-quality outputs. CASR also improves Accuracy on Sudoku (70.93% --> 97.28%) and achieves state-of-the-art performance on KVRET with Micro F1 score (67.88% --> 70.00%).", "keywords": "self-boost refinement;complex answers;autoregressive generation", "primary_area": "", "supplementary_material": "/attachment/986cb4e712538f8ebae6f77762665143f4020e21.zip", "author": "Hongwei Han;Mengyu Zhou;Shi Han;Xiu Li;Dongmei Zhang", "authorids": "~Hongwei_Han1;~Mengyu_Zhou1;~Shi_Han1;~Xiu_Li1;~Dongmei_Zhang2", "gender": "M;M;M;F;", "homepage": ";http://zmy.io;https://www.microsoft.com/en-us/research/people/shihan/;https://thusigsiclab.github.io/thu.github.io/introduction.html;https://www.microsoft.com/en-us/research/people/dongmeiz/", "dblp": ";181/1084.html;23/3395;13/1206-1;87/461-1", "google_scholar": ";Pvnsg6kAAAAJ;wLabxmYAAAAJ;https://scholar.google.com/citations?hl=zh-CN;jLlBBl4AAAAJ", "orcid": "0000-0002-9693-9922;0000-0002-0322-7513;0000-0002-0360-6089;0000-0003-0403-1923;0000-0002-9230-2799", "linkedin": "%E5%AE%8F%E7%82%9C-%E9%9F%A9-175a971aa/;zmoony/;shi-han-86888526/;;dongmei-zhang-38a86317/", "or_profile": "~Hongwei_Han1;~Mengyu_Zhou1;~Shi_Han1;~Xiu_Li1;~Dongmei_Zhang2", "aff": "Tsinghua University;Microsoft Research;Microsoft;Tsinghua University;Microsoft", "aff_domain": "tsinghua.edu.cn;microsoft.com;microsoft.com;tsinghua.edu.cn;microsoft.com", "position": "MS student;Principal Researcher;Researcher;Professor;Assistant Managing Director, Microsoft Research Asia", "bibtex": "@inproceedings{\nhan2023casr,\ntitle={{CASR}: Generating Complex Sequences with Autoregressive Self-Boost Refinement},\nauthor={Hongwei Han and Mengyu Zhou and Shi Han and Xiu Li and Dongmei Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=SVl1w1u3InX}\n}", "github": "", "project": "", "reviewers": "B3Ea;rnaL;Nx7n", "pdf_size": 1184592, "recommendation": "6;6;6", "confidence": "3;4;3", "correctness": "4;3;3", "technical_novelty": "2;3;2", "empirical_novelty": "2;3;2", "wc_summary_paper": "76;146;73", "wc_strength_and_weaknesses": "199;321;220", "wc_clarity_quality_novelty_and_reproducibility": "75;94;9", "wc_summary_review": "30;50;64", "wc_review": "380;611;366", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "176;568;269", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 98.33333333333333, 33.7276675083759 ], "wc_strength_and_weaknesses_avg": [ 246.66666666666666, 53.25619421459088 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 59.333333333333336, 36.42648609032841 ], "wc_summary_review_avg": [ 48.0, 13.9522996909709 ], "wc_review_avg": [ 452.3333333333333, 112.33976242730009 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 337.6666666666667, 167.2370240772725 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:t9Znzvfty8QJ:scholar.google.com/&scioq=CASR:+Generating+Complex+Sequences+with+Autoregressive+Self-Boost+Refinement&hl=en&as_sdt=0,5", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=SVl1w1u3InX", "email": "tsinghua.edu.cn;microsoft.com;microsoft.com;tsinghua.edu.cn;microsoft.com", "author_num": 5, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Tsinghua University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "THU;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "China;United States" }, { "id": "SWPFPk9Tm81", "title": "CLEEGN: A Convolutional Neural Network for Plug-and-Play Automatic EEG Reconstruction", "track": "main", "status": "Reject", "tldr": "A novel CNN model for training-free online EEG reconstruction with the SOTA performance.", "abstract": "Human electroencephalography (EEG) is a brain monitoring modality that senses cortical neuroelectrophysiological activity in high-temporal resolution. One of the greatest challenges posed in applications of EEG is the unstable signal quality susceptible to inevitable artifacts during recordings. To date, most existing techniques for EEG artifact removal and reconstruction are applicable to offline analysis solely, or require individualized training data to facilitate online reconstruction. We have proposed CLEEGN, a novel convolutional neural network for plug-and-play automatic EEG reconstruction. CLEEGN is based on a subject-independent pre-trained model using existing data and can operate on a new user without any further calibration. The performance of CLEEGN was validated using multiple evaluations including waveform observation, reconstruction error assessment, and decoding accuracy on well-studied labeled datasets. The results of simulated online validation suggest that, even without any calibration, CLEEGN can largely preserve inherent brain activity and outperforms leading online/offline artifact removal methods in the decoding accuracy of reconstructed EEG data. In addition, visualization of model parameters and latent features exhibit the model behavior and reveal explainable insights related to existing knowledge of neuroscience. We foresee pervasive applications of CLEEGN in prospective works of online plug-and-play EEG decoding and analysis.", "keywords": "EEG;Brain-computer interface;EEG artifact removal;convolutional neural network", "primary_area": "", "supplementary_material": "/attachment/f2d327efe9c885bd73ff06b574d21de643db2b8a.zip", "author": "Pin-Hua Lai;Wei-Chun Yang;Hsiang-Chieh Tsou;Chun-Shu Wei", "authorids": "~Pin-Hua_Lai1;e86ric1224@gmail.com;hsjts0u.cs08@nctu.edu.tw;~Chun-Shu_Wei1", "gender": "M;;;", "homepage": "https://github.com/cemeteryparty;;;", "dblp": ";;;", "google_scholar": ";;;tcob660AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Pin-Hua_Lai1;e86ric1224@gmail.com;hsjts0u.cs08@nctu.edu.tw;~Chun-Shu_Wei1", "aff": "National Yang Ming Chiao Tung University;;;National Chiao Tung University, National Chiao Tung University", "aff_domain": "nycu.edu.tw;;;cs.nctu.edu.tw", "position": "MS student;;;Assistant Professor", "bibtex": "@misc{\nlai2023cleegn,\ntitle={{CLEEGN}: A Convolutional Neural Network for Plug-and-Play Automatic {EEG} Reconstruction},\nauthor={Pin-Hua Lai and Wei-Chun Yang and Hsiang-Chieh Tsou and Chun-Shu Wei},\nyear={2023},\nurl={https://openreview.net/forum?id=SWPFPk9Tm81}\n}", "github": "", "project": "", "reviewers": "fmQB;8xpw;KYwV;sQXi", "site": "https://openreview.net/forum?id=SWPFPk9Tm81", "pdf_size": 3787857, "recommendation": "3;5;5;8", "confidence": "5;4;3;3", "correctness": "2;3;3;4", "technical_novelty": "1;2;2;3", "empirical_novelty": "1;3;2;3", "wc_summary_paper": "49;55;11;70", "wc_strength_and_weaknesses": "76;191;102;132", "wc_clarity_quality_novelty_and_reproducibility": "18;38;14;19", "wc_summary_review": "38;57;30;24", "wc_review": "181;341;157;245", "wc_reply_reviewers": "0;0;93;21", "wc_reply_authors": "228;692;529;422", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;2", "recommendation_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 46.25, 21.741377601246892 ], "wc_strength_and_weaknesses_avg": [ 125.25, 42.82157750480475 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 22.25, 9.283722313813572 ], "wc_summary_review_avg": [ 37.25, 12.43734296383275 ], "wc_review_avg": [ 231.0, 71.18988692223074 ], "wc_reply_reviewers_avg": [ 28.5, 38.21321760857099 ], "wc_reply_authors_avg": [ 467.75, 168.53245236452236 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8021806287494232, "corr_recommendation_correctness": 0.9901475429766743, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2899112628416576107&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "National Yang Ming Chiao Tung University;National Chiao Tung University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nycu.edu.tw;https://www.nctu.edu.tw", "aff_unique_abbr": "NYCU;NCTU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "SWUGykek_T", "title": "Robustness Exploration of Semantic Information in Adversarial Training", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this paper, we look into the problem of adversarial robustness from the semantic information perspective. We demonstrate a novel insight that adversarial attacks destroy the correlation between visual representations and semantic word vectors, and adversarial training fixed it. We further find that the correlation between robust features of different categories is consistent with the correlation between corresponding semantic word vectors. Based on that, we introduce the semantic information to assist model training and propose Semantic Constraint Adversarial Robust Learning (SCARL). First, we follow an information-theoretical lens to formulate the mutual information between the visual representation and the corresponding semantic word vector in the embedding space to bridge the information gap. We further provide a differentiable lower bound to optimize such mutual information efficiently. Second, we propose a novel semantic structural constraint, encouraging the trained model to keep the structure of visual representations consistent with that of semantic word vectors. Finally, we combine these two techniques with adversarial training to learn robust visual representation. Experimentally, we conduct extensive experiments on several benchmarks, demonstrating that semantic information is indeed beneficial to model robustness.", "keywords": "Adversarial training;Semantic information;Adversarial robustness", "primary_area": "", "supplementary_material": "/attachment/c5ec7f8a7e8a899de66533556a81eaabeba96acd.zip", "author": "Huafeng Kuang;Hong Liu;Mingliang Xu;YONGJIAN WU;Rongrong Ji", "authorids": "~Huafeng_Kuang1;~Hong_Liu9;~Mingliang_Xu1;~YONGJIAN_WU2;~Rongrong_Ji5", "gender": ";Non-Binary;M;;M", "homepage": ";https://lynnhongliu.github.io/hliu/;;https://open.youtu.qq.com/;http://mac.xmu.edu.cn/rrji-en.html", "dblp": "251/3442;29/5010-9;13/8698-1;;86/5681", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;BC7N2dYAAAAJ;u-8x34cAAAAJ;;", "orcid": ";0000-0001-5318-6388;0000-0002-6885-3451;;", "linkedin": ";;;;", "or_profile": "~Huafeng_Kuang1;~Hong_Liu9;~Mingliang_Xu1;~YONGJIAN_WU2;~Rongrong_Ji5", "aff": "Xiamen University;National Institute of Informatics;Zhengzhou University;;Xiamen University", "aff_domain": "xmu.edu.cn;nii.ac.jp;zzu.edu.cn;;xmu.edu.cn", "position": "Researcher;Postdoc;Full Professor;;Full Professor", "bibtex": "@misc{\nkuang2023robustness,\ntitle={Robustness Exploration of Semantic Information in Adversarial Training},\nauthor={Huafeng Kuang and Hong Liu and Mingliang Xu and YONGJIAN WU and Rongrong Ji},\nyear={2023},\nurl={https://openreview.net/forum?id=SWUGykek_T}\n}", "github": "", "project": "", "reviewers": "ARcW;Xby6;FuyA", "site": "https://openreview.net/forum?id=SWUGykek_T", "pdf_size": 4630164, "recommendation": "5;5;6", "confidence": "3;2;4", "correctness": "3;3;3", "technical_novelty": "3;3;3", "empirical_novelty": "2;2;3", "wc_summary_paper": "43;16;87", "wc_strength_and_weaknesses": "123;66;264", "wc_clarity_quality_novelty_and_reproducibility": "45;10;13", "wc_summary_review": "68;23;2", "wc_review": "279;115;366", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "392;616;759", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 48.666666666666664, 29.261275129806325 ], "wc_strength_and_weaknesses_avg": [ 151.0, 83.22259308625273 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 22.666666666666668, 15.839472494022296 ], "wc_summary_review_avg": [ 31.0, 27.53179979587241 ], "wc_review_avg": [ 253.33333333333334, 104.06515053347857 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 589.0, 151.03862640618348 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.8660254037844385, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-tfslkwT1dgJ:scholar.google.com/&scioq=Robustness+Exploration+of+Semantic+Information+in+Adversarial+Training&hl=en&as_sdt=0,34", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Xiamen University;National Institute of Informatics;Zhengzhou University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.xmu.edu.cn;https://www.nii.ac.jp/;http://www.zzu.edu.cn", "aff_unique_abbr": "XMU;NII;ZZU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;Japan" }, { "title": "Personalized Federated Learning with Feature Alignment and Classifier Collaboration", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11721", "id": "SXZr8aDKia", "poster": "", "openreview": "https://openreview.net/forum?id=SXZr8aDKia", "slides": "https://iclr.cc/virtual/2023/poster/11721", "video": "https://iclr.cc/virtual/2023/poster/11721", "author_site": "Jian Xu, Xinyi Tong, Shao-Lun Huang", "tldr": "", "abstract": "Data heterogeneity is one of the most challenging issues in federated learning, which motivates a variety of approaches to learn personalized models for participating clients. One such approach in deep neural networks based tasks is employing a shared feature representation and learning a customized classifier head for each client. However, previous works do not utilize the global knowledge during local representation learning and also neglect the fine-grained collaboration between local classifier heads, which limits the model generalization ability. In this work, we conduct explicit local-global feature alignment by leveraging global semantic knowledge for learning a better representation. Moreover, we quantify the benefit of classifier combination for each client as a function of the combining weights and derive an optimization problem for estimating optimal weights. Finally, extensive evaluation results on benchmark datasets with various heterogeneous data scenarios demonstrate the effectiveness of our proposed method.", "keywords": "Federated Learning;Personalization;Collaboration", "primary_area": "", "supplementary_material": "", "author": "Jian Xu;Xinyi Tong;Shao-Lun Huang", "authorids": "~Jian_Xu7;~Xinyi_Tong1;~Shao-Lun_Huang3", "gender": "M;M;M", "homepage": ";https://github.com/txyyaohui;https://sites.google.com/view/slhuang/home", "dblp": "73/1149-16;171/0531;64/2243", "google_scholar": "5kjbGosAAAAJ;;", "orcid": "0000-0001-6201-9215;;", "linkedin": ";;", "or_profile": "~Jian_Xu7;~Xinyi_Tong1;~Shao-Lun_Huang3", "aff": "Huawei Noah's Ark Lab;Tsinghua University;Tsinghua University", "aff_domain": "huawei.com;tsinghua.edu.cn;tsinghua.edu.cn", "position": "Intern;PhD student;Associate Professor", "bibtex": "@inproceedings{\nxu2023personalized,\ntitle={Personalized Federated Learning with Feature Alignment and Classifier Collaboration},\nauthor={Jian Xu and Xinyi Tong and Shao-Lun Huang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=SXZr8aDKia}\n}", "github": "", "project": "", "reviewers": "Ddph;ivCQ;8wN3;zWd3", "pdf_size": 751411, "recommendation": "5;8;8;8", "confidence": "5;3;4;3", "correctness": "3;4;3;4", "technical_novelty": "2;4;3;3", "empirical_novelty": "2;4;3;3", "wc_summary_paper": "59;32;77;97", "wc_strength_and_weaknesses": "37;62;198;105", "wc_clarity_quality_novelty_and_reproducibility": "13;56;18;16", "wc_summary_review": "160;28;47;50", "wc_review": "269;178;340;268", "wc_reply_reviewers": "83;0;170;0", "wc_reply_authors": "1143;32;1669;31", "reply_reviewers": "1;0;2;0", "reply_authors": "2;1;4;1", "recommendation_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 66.25, 23.909987452945266 ], "wc_strength_and_weaknesses_avg": [ 100.5, 61.32087735836792 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.75, 17.55526986406076 ], "wc_summary_review_avg": [ 71.25, 51.929639898616664 ], "wc_review_avg": [ 263.75, 57.47336339557656 ], "wc_reply_reviewers_avg": [ 63.25, 70.33269154525512 ], "wc_reply_authors_avg": [ 718.75, 711.9671252944197 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.8703882797784892, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 163, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13340196162554909206&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=SXZr8aDKia", "email": "huawei.com;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Huawei;Tsinghua University", "aff_unique_dep": "Noah's Ark Lab;", "aff_unique_url": "https://www.huawei.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Huawei;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "SYsmAZ8PHez", "title": "InteriorSim: A Photorealistic Simulator for Embodied AI", "track": "main", "status": "Withdraw", "tldr": "InteriorSim is a photorealistic simulator for embodied AI in the home.", "abstract": "Interactive simulators are becoming powerful tools for training embodied agents, but existing simulators suffer from limited content diversity, physical interactivity, and visual fidelity. We address these limitations by introducing InteriorSim, a photorealistic simulator for embodied AI in the home. To create our simulator, we worked closely with a team of professional artists for over a year to construct 300 unique virtual indoor environments with 2,566 unique rooms and 17,234 unique objects that can be manipulated individually. Each of our environments features detailed geometry, photorealistic materials, and a unique floor plan and object layout designed by a professional artist, i.e., we do not rely on remixing existing layouts to create additional content. Our environments are implemented as Unreal Engine assets, and we provide an OpenAI Gym interface for interacting with the environments via Python. We demonstrate the utility of our simulator by using it in a zero-shot sim-to-real transfer scenario, i.e., we train a point-goal navigation policy entirely in simulation that can successfully navigate through cluttered real-world environments when deployed on a real robot. We also demonstrate that our simulator is quantitatively more photorealistic than existing simulators measured by human comparisons and standard metrics for evaluating generative models. Finally, we demonstrate that our simulator achieves better sim-to-real performance than existing simulators on a real-world semantic segmentation task. All of our assets and code will be made available online.", "keywords": "simulation environment;embodied AI", "primary_area": "", "supplementary_material": "/attachment/f356593521e2851e9cce5d1028e6fcdfa093443c.zip", "author": "Mike Roberts;Quentin Leboutet;Rachith Prakash;Renhan Wang;Hailin Zhang;Rui Tang;Marti Ferragut;Stefan Leutenegger;Stephan R. Richter;Vladlen Koltun;Matthias M\u00fcller;German Ros", "authorids": "~Mike_Roberts1;~Quentin_Leboutet1;~Rachith_Prakash1;~Renhan_Wang1;~Hailin_Zhang1;~Rui_Tang1;mferragut@cvc.uab.cat;~Stefan_Leutenegger1;~Stephan_R._Richter1;~Vladlen_Koltun1;~Matthias_M\u00fcller1;~German_Ros2", "gender": "M;M;M;M;M;M;;M;M;M;;M", "homepage": "https://mikeroberts3000.github.io;https://github.com/quentin-leboutet;https://rachithp.github.io;https://github.com/rwang15;https://blog.csdn.net/sh15285118586;;;https://www.srl.in.tum.de;http://www.stephanrichter.org/;http://vladlen.info/;https://matthias.pw;http://germanros.net/", "dblp": ";192/7108;;;;78/437;;;169/4708;66/5458.html;169/4686-1;78/9464", "google_scholar": "1hqOg28AAAAJ;SfiqI4AAAAAJ;qVAZ2PsAAAAJ;;;https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.com.tw/citations?user=SmGQ48gAAAAJ;https://scholar.google.de/citations?user=6hB2vJUAAAAJ;kg4bCpgAAAAJ;AeMLOMEAAAAJ;uDFb6OcAAAAJ", "orcid": ";0000-0002-8155-0965;;;;;;;;0000-0003-0858-0970;;0000-0002-3182-6345", "linkedin": ";quentinleboutet/;;;;;;;;vladlenkoltun/;;", "or_profile": "~Mike_Roberts1;~Quentin_Leboutet1;~Rachith_Prakash1;~Renhan_Wang1;~Hailin_Zhang1;~Rui_Tang1;mferragut@cvc.uab.cat;~Stefan_Leutenegger1;~Stephan_R._Richter1;~Vladlen_Koltun1;~Matthias_M\u00fcller1;~German_Ros2", "aff": "Intel Labs;Intel;Intel;;;;;Imperial College London;Apple;Apple;Intel;Intel", "aff_domain": "intel.com;intel.com;intel.com;;;;;imperial.ac.uk;apple.com;apple.com;intel.com;intel.com", "position": "Researcher;Researcher;Researcher;;;;;Reader;Researcher;Distinguished Scientist;Researcher;Research Scientist", "bibtex": "@misc{\nroberts2023interiorsim,\ntitle={InteriorSim: A Photorealistic Simulator for Embodied {AI}},\nauthor={Mike Roberts and Quentin Leboutet and Rachith Prakash and Renhan Wang and Hailin Zhang and Rui Tang and Marti Ferragut and Stefan Leutenegger and Stephan R. Richter and Vladlen Koltun and Matthias M{\\\"u}ller and German Ros},\nyear={2023},\nurl={https://openreview.net/forum?id=SYsmAZ8PHez}\n}", "github": "", "project": "", "reviewers": "NefA;LdDs;FgFt;4Aa8", "site": "https://openreview.net/forum?id=SYsmAZ8PHez", "pdf_size": 24611864, "recommendation": "3;5;5;6", "confidence": "5;5;4;4", "correctness": "3;3;3;4", "technical_novelty": "1;2;2;2", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "60;42;29;122", "wc_strength_and_weaknesses": "368;599;152;425", "wc_clarity_quality_novelty_and_reproducibility": "63;429;66;55", "wc_summary_review": "76;111;64;127", "wc_review": "567;1181;311;729", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 63.25, 35.66072769868837 ], "wc_strength_and_weaknesses_avg": [ 386.0, 159.66370908882206 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 153.25, 159.25510195908953 ], "wc_summary_review_avg": [ 94.5, 25.5 ], "wc_review_avg": [ 697.0, 316.6922796659243 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 12, 0 ], "corr_recommendation_confidence": -0.6882472016116854, "corr_recommendation_correctness": 0.6622661785325219, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ud-U95bAGuMJ:scholar.google.com/&scioq=InteriorSim:+A+Photorealistic+Simulator+for+Embodied+AI&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2;2;0;0", "aff_unique_norm": "Intel;Imperial College London;Apple", "aff_unique_dep": "Intel Labs;;Apple Inc.", "aff_unique_url": "https://www.intel.com;https://www.imperial.ac.uk;https://www.apple.com", "aff_unique_abbr": "Intel;ICL;Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "SZBy3XeXQvd", "title": "LSAP: Rethinking Inversion Fidelity, Perception and Editability in GAN Latent Space", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "As the methods evolve, inversion is mainly divided into two steps. The first step is Image Embedding, in which an encoder or optimization process embeds images to get the corresponding latent codes. Afterward, the second step aims to refine the inversion and editing results, which we named Result Refinement. Although the second step significantly improves fidelity, perception and editability are almost unchanged, deeply dependent on inverse latent codes attained in the first step. Therefore, a crucial problem is gaining the latent codes with better perception and editability while retaining the reconstruction fidelity. In this work, we first point out that these two characteristics are related to the degree of alignment (or disalignment) of the inverse codes with the synthetic distribution. Then, we propose Latent Space Alignment Inversion Paradigm (LSAP), which consists of evaluation metric and solution for this problem. Specifically, we introduce Normalized Style Space ($\\mathcal{S^N}$ space) and $\\mathcal{S^N}$ Cosine Distance (SNCD) to measure disalignment of inversion methods. Since our proposed SNCD is differentiable, it can be optimized in both encoder-based and optimization-based embedding methods to conduct a uniform solution. Extensive experiments in various domains demonstrate that SNCD effectively reflects perception and editability, and our alignment paradigm archives the state-of-the-art in both two steps.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/2a6f1178d9d88b443ab7cabadcb02a10a347cef9.zip", "author": "Cao Pu;Lu Yang;Dongxv Liu;Zhiwei Liu;Wenguan Wang;Shan Li;Qing Song", "authorids": "~Cao_Pu1;~Lu_Yang4;~Dongxv_Liu1;~Zhiwei_Liu2;~Wenguan_Wang4;~Shan_Li1;~Qing_Song1", "gender": "M;M;M;M;M;F;F", "homepage": ";;;;https://sites.google.com/view/wenguanwang/;;https://github.com/BUPT-PRIV/Pet-dev", "dblp": "169/2437;58/2893-6;;90/9499;145/1078;15/1152-1;150/5904-6.html", "google_scholar": "https://scholar.google.com.gt/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=V-6H56AAAAAJ;https://scholar.google.com/;https://scholar.google.com/citations?hl=zh-CN;CqAQQkgAAAAJ;99KpPZ4AAAAJ;", "orcid": ";0000-0003-3857-3982;;;0000-0002-0802-9567;;", "linkedin": ";;;;wenguanwang;;", "or_profile": "~Cao_Pu1;~Lu_Yang4;~Dongxv_Liu1;~Zhiwei_Liu2;~Wenguan_Wang4;~Shan_Li1;~Qing_Song1", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Post and Telecommunication;;, Institute of automation, Chinese academy of science;University of Technology Sydney;Beijing University of Posts and Telecommunications;Beijing University of Post and Telecommunication", "aff_domain": "bupt.edu.cn;bupt.edu.cn;;nlpr.ia.ac.cn;uts.edu.au;bupt.edu.cn;bupt.edu.cn", "position": "PhD student;Postdoc;;Assistant Professor;Lecturer;Postdoc;Associate Professor", "bibtex": "@misc{\npu2023lsap,\ntitle={{LSAP}: Rethinking Inversion Fidelity, Perception and Editability in {GAN} Latent Space},\nauthor={Cao Pu and Lu Yang and Dongxv Liu and Zhiwei Liu and Wenguan Wang and Shan Li and Qing Song},\nyear={2023},\nurl={https://openreview.net/forum?id=SZBy3XeXQvd}\n}", "github": "", "project": "", "reviewers": "imHW;fyYU;3c58;6spN", "site": "https://openreview.net/forum?id=SZBy3XeXQvd", "pdf_size": 19800911, "recommendation": "3;3;5;5", "confidence": "4;4;4;4", "correctness": "4;2;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;2;2;0", "wc_summary_paper": "104;134;101;64", "wc_strength_and_weaknesses": "190;427;592;120", "wc_clarity_quality_novelty_and_reproducibility": "260;141;31;24", "wc_summary_review": "48;131;13;45", "wc_review": "602;833;737;253", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 100.75, 24.833193511910626 ], "wc_strength_and_weaknesses_avg": [ 332.25, 188.23705134749642 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 114.0, 96.2210995572177 ], "wc_summary_review_avg": [ 59.25, 43.63699691775317 ], "wc_review_avg": [ 606.25, 219.8378663924848 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10351434833229297514&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Chinese Academy of Sciences;University of Technology Sydney", "aff_unique_dep": ";Institute of Automation;", "aff_unique_url": "http://www.bupt.edu.cn/;http://www.ia.cas.cn;https://www.uts.edu.au", "aff_unique_abbr": "BUPT;CAS;UTS", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;Australia" }, { "id": "SZYXyhE2c6f", "title": "A Probabilistic Framework For Modular Continual Learning", "track": "main", "status": "Reject", "tldr": "We introduce a scalable modular continual learning algorithm that is capable of forward knowledge transfer across similar and dissimilar input domains.", "abstract": "Continual learning (CL) algorithms seek to accumulate and transfer knowledge across a sequence of tasks and achieve better performance on each successive task. Modular approaches, which use a different composition of modules for each task and avoid forgetting by design, have been shown to be a promising direction to CL. However, searching through the large space of possible module compositions remains a challenge. In this work, we develop a scalable probabilistic search framework as a solution to this challenge. Our framework has two distinct components. The first is designed to transfer knowledge across similar input domains. To this end, it models each module\u2019s training input distribution and uses a Bayesian model to find the most promising module compositions for a new task. The second component targets transfer across tasks with disparate input distributions or different input spaces and uses Bayesian optimisation to explore the space of module compositions. We show that these two methods can be easily combined and evaluate the resulting approach on two benchmark suites designed to capture different desiderata of CL techniques. The experiments show that our framework offers superior performance compared to state-of-the-art CL baselines.", "keywords": "continual learning;modular;Bayesian networks;Bayesian optimisation", "primary_area": "", "supplementary_material": "", "author": "Lazar Valkov;Akash Srivastava;Dipak Chaudhari;Swarat Chaudhuri;Charles Sutton", "authorids": "~Lazar_Valkov1;~Akash_Srivastava1;~Dipak_Chaudhari1;~Swarat_Chaudhuri1;~Charles_Sutton1", "gender": "M;M;M;M;M", "homepage": ";http://akashgit.github.io;https://dipakc.bitbucket.io/;http://www.cs.utexas.edu/~swarat;http://homepages.inf.ed.ac.uk/csutton/", "dblp": "209/4938;24/9528;07/8396;37/6100;59/5879", "google_scholar": ";https://scholar.google.co.uk/citations?user=2h6SZeEAAAAJ;https://scholar.google.co.in/citations?user=S1a250gAAAAJ;9j6RBYQAAAAJ;https://scholar.google.co.uk/citations?user=hYtGXD0AAAAJ", "orcid": ";;;0000-0002-6859-1391;0000-0002-0041-3820", "linkedin": ";https://uk.linkedin.com/in/akash-srivastava-aa97361b;;swarat-chaudhuri-609b3092/;charles-sutton-772aa126", "or_profile": "~Lazar_Valkov1;~Akash_Srivastava1;~Dipak_Chaudhari1;~Swarat_Chaudhuri1;~Charles_Sutton1", "aff": "International Business Machines;MIT-IBM Watson AI Research Lab;Meta Platforms, Inc.;University of Texas, Austin;University of Edinburgh", "aff_domain": "ibm.com;ibm.com;meta.com;utexas.edu;ed.ac.uk", "position": "Postdoc;Research Scientist;Researcher;Associate Professor;Professor", "bibtex": "@misc{\nvalkov2023a,\ntitle={A Probabilistic Framework For Modular Continual Learning},\nauthor={Lazar Valkov and Akash Srivastava and Dipak Chaudhari and Swarat Chaudhuri and Charles Sutton},\nyear={2023},\nurl={https://openreview.net/forum?id=SZYXyhE2c6f}\n}", "github": "", "project": "", "reviewers": "ifu1;LPZE;ARaq;E5fq", "site": "https://openreview.net/forum?id=SZYXyhE2c6f", "pdf_size": 1316703, "recommendation": "3;3;5;6", "confidence": "5;4;4;4", "correctness": "3;2;3;4", "technical_novelty": "3;2;3;4", "empirical_novelty": "3;2;3;2", "wc_summary_paper": "137;113;123;136", "wc_strength_and_weaknesses": "115;277;95;182", "wc_clarity_quality_novelty_and_reproducibility": "444;19;34;4", "wc_summary_review": "46;17;14;16", "wc_review": "742;426;266;338", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "209;341;32;31", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 127.25, 9.908960591303208 ], "wc_strength_and_weaknesses_avg": [ 167.25, 71.0857756516731 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 125.25, 184.33580091778157 ], "wc_summary_review_avg": [ 23.25, 13.179055353097201 ], "wc_review_avg": [ 443.0, 181.68929522676893 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 153.25, 130.388601879152 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5555555555555555, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11463154643239765344&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "International Business Machines Corporation;Massachusetts Institute of Technology;Meta;University of Texas at Austin;University of Edinburgh", "aff_unique_dep": ";MIT-IBM Watson AI Research Lab;Meta Platforms, Inc.;;", "aff_unique_url": "https://www.ibm.com;https://www.mitibmwatsonailab.org;https://www.meta.com;https://www.utexas.edu;https://www.ed.ac.uk", "aff_unique_abbr": "IBM;MIT-IBM AI Lab;Meta;UT Austin;Edinburgh", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Tier Balancing: Towards Dynamic Fairness over Underlying Causal Factors", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11458", "id": "SZdfz5k7cd1", "poster": "", "openreview": "https://openreview.net/forum?id=SZdfz5k7cd1", "slides": "https://iclr.cc/virtual/2023/poster/11458", "video": "https://iclr.cc/virtual/2023/poster/11458", "author_site": "Zeyu Tang, Yatong Chen, Yang Liu, Kun Zhang", "tldr": "We formulate and investigate a long-term fairness notion that captures decision-distribution interplay via a detailed modeling over both observed and latent causal factors.", "abstract": "The pursuit of long-term fairness involves the interplay between decision-making and the underlying data generating process. In this paper, through causal modeling with a directed acyclic graph (DAG) on the decision-distribution interplay, we investigate the possibility of achieving long-term fairness from a dynamic perspective. We propose Tier Balancing, a technically more challenging but more natural notion to achieve in the context of long-term, dynamic fairness analysis. Different from previous fairness notions that are defined purely on observed variables, our notion goes one step further, capturing behind-the-scenes situation changes on the unobserved latent causal factors that directly carry out the influence from the current decision to the future data distribution. Under the specified dynamics, we prove that in general one cannot achieve the long-term fairness goal only through one-step interventions. Furthermore, in the effort of approaching long-term fairness, we consider the mission of \"getting closer to\" the long-term fairness goal and present possibility and impossibility results accordingly.", "keywords": "Algorithmic Fairness;Causality;Dynamic Modeling;Long-term Fairness", "primary_area": "", "supplementary_material": "/attachment/b27b3c9315bd426b7d1d48d7428886300889985b.zip", "author": "Zeyu Tang;Yatong Chen;Yang Liu;Kun Zhang", "authorids": "~Zeyu_Tang1;~Yatong_Chen1;~Yang_Liu3;~Kun_Zhang1", "gender": ";F;M;M", "homepage": "https://zeyu.one;https://yatongchen.github.io/;http://www.yliuu.com;http://www.andrew.cmu.edu/user/kunz1/", "dblp": "296/1601-2;202/8466;51/3710-18;96/3115-1", "google_scholar": "https://scholar.google.com/citations?hl=en;yoExm_UAAAAJ;jKrIVCIAAAAJ;RGoypN4AAAAJ", "orcid": "0000-0002-4423-4728;;0000-0001-8420-6011;", "linkedin": ";;;", "or_profile": "~Zeyu_Tang1;~Yatong_Chen1;~Yang_Liu3;~Kun_Zhang1", "aff": "Carnegie Mellon University;University of California, Santa Cruz;University of California, Santa Cruz;Carnegie Mellon University", "aff_domain": "cmu.edu;ucsc.edu;ucsc.edu;cmu.edu", "position": "PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\ntang2023tier,\ntitle={Tier Balancing: Towards Dynamic Fairness over Underlying Causal Factors},\nauthor={Zeyu Tang and Yatong Chen and Yang Liu and Kun Zhang},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=SZdfz5k7cd1}\n}", "github": "", "project": "", "reviewers": "pPYU;qVYP;WEUm;N3M4", "pdf_size": 1361878, "recommendation": "5;6;6;6", "confidence": "5;2;3;3", "correctness": "3;3;3;3", "technical_novelty": "2;4;3;3", "empirical_novelty": "1;3;3;2", "wc_summary_paper": "131;78;87;119", "wc_strength_and_weaknesses": "986;214;157;347", "wc_clarity_quality_novelty_and_reproducibility": "79;33;14;148", "wc_summary_review": "124;48;24;88", "wc_review": "1320;373;282;702", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "2008;1070;636;935", "reply_reviewers": "0;0;0;0", "reply_authors": "3;2;1;2", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 103.75, 21.901769334919038 ], "wc_strength_and_weaknesses_avg": [ 426.0, 330.58508738296104 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.5, 51.62605931116571 ], "wc_summary_review_avg": [ 71.0, 38.19685850956856 ], "wc_review_avg": [ 669.25, 406.90132403323537 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1162.25, 512.9290277416555 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.9271726499455306, "corr_recommendation_correctness": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12586455584706242600&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=SZdfz5k7cd1", "email": "cmu.edu;ucsc.edu;ucsc.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Carnegie Mellon University;University of California, Santa Cruz", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.ucsc.edu", "aff_unique_abbr": "CMU;UCSC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Santa Cruz", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "SZojABvWnkx", "title": "Prompt Tuning for Graph Neural Networks", "track": "main", "status": "Withdraw", "tldr": "We explore the prompt tuning method for pre-trained GNN models.", "abstract": "In recent years, prompt tuning has set off a research boom in the adaptation of pre-trained models. In this paper, we propose Graph Prompt as an efficient and effective alternative to full fine-tuning for adapting the pre-trianed GNN models to downstream tasks. To the best of our knowledge, we are the first to explore the effectiveness of prompt tuning on existing pre-trained GNN models. Specifically, without tuning the parameters of the pre-trained GNN model, we train a task-specific graph prompt that provides graph-level transformations on the downstream graphs during the adaptation stage. Then, we introduce a concrete implementation of the graph prompt, called GP-Feature (GPF), which adds learnable perturbations to the feature space of the downstream graph. GPF has a strong expressive ability that it can modify both the node features and the graph structure implicitly. Accordingly, we demonstrate that GPF can achieve the approximately equivalent effect of any graph-level transformations under most existing pre-trained GNN models. We validate the effectiveness of GPF on numerous pre-trained GNN models, and the experimental results show that with a small amount (about 0.1% of that for fine-tuning ) of tunable parameters, GPF can achieve comparable performances as fine-tuning, and even obtain significant performance gains in some cases. ", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/8f153e28bc6fcb675bd9de97a49e057b60e4ddf0.zip", "author": "Taoran Fang;Yunchao Mercer Zhang;Yang Yang;Chunping Wang", "authorids": "~Taoran_Fang2;~Yunchao_Mercer_Zhang1;~Yang_Yang35;~Chunping_Wang1", "gender": "M;M;M;F", "homepage": "https://www.baidu.com;https://yunchaozhang.netlify.app/;http://yangy.org;", "dblp": ";;;54/2715-1", "google_scholar": ";;;Rmy5RogAAAAJ", "orcid": ";;0000-0002-5058-4417;0000-0003-1854-8667", "linkedin": ";;;https://linkedin.com/in/chunping-wang-7b94a15/", "or_profile": "~Taoran_Fang2;~Yunchao_Mercer_Zhang1;~Yang_Yang35;~Chunping_Wang1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Finvolution Group", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;xinye.com", "position": "PhD student;Undergrad student;Associate Professor;Principal Scientist", "bibtex": "@misc{\nfang2023prompt,\ntitle={Prompt Tuning for Graph Neural Networks},\nauthor={Taoran Fang and Yunchao Mercer Zhang and Yang Yang and Chunping Wang},\nyear={2023},\nurl={https://openreview.net/forum?id=SZojABvWnkx}\n}", "github": "", "project": "", "reviewers": "mcky;Vc2p;q8DN;wghj", "site": "https://openreview.net/forum?id=SZojABvWnkx", "pdf_size": 541656, "recommendation": "3;3;5;8", "confidence": "5;4;4;4", "correctness": "2;2;2;3", "technical_novelty": "2;2;2;4", "empirical_novelty": "2;2;2;4", "wc_summary_paper": "23;70;22;98", "wc_strength_and_weaknesses": "165;194;242;335", "wc_clarity_quality_novelty_and_reproducibility": "18;39;33;236", "wc_summary_review": "11;20;38;34", "wc_review": "217;323;335;703", "wc_reply_reviewers": "0;0;0;60", "wc_reply_authors": "752;897;597;369", "reply_reviewers": "0;0;0;1", "reply_authors": "1;2;1;1", "recommendation_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.8660254037844386 ], "empirical_novelty_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 53.25, 32.306152664778885 ], "wc_strength_and_weaknesses_avg": [ 234.0, 64.4709236788182 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 81.5, 89.5279286033135 ], "wc_summary_review_avg": [ 25.75, 10.825317547305483 ], "wc_review_avg": [ 394.5, 183.93680980162725 ], "wc_reply_reviewers_avg": [ 15.0, 25.98076211353316 ], "wc_reply_authors_avg": [ 653.75, 195.657066062026 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.49374193110101877, "corr_recommendation_correctness": 0.9169493006161777, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14315280589933228120&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Zhejiang University;FinVolution Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.finvolutiongroup.com", "aff_unique_abbr": "ZJU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Boosting Adversarial Transferability using Dynamic Cues", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11498", "id": "SZynfVLGd5", "poster": "/media/PosterPDFs/ICLR%202023/11498.png?t=1680959013.3953297", "openreview": "https://openreview.net/forum?id=SZynfVLGd5", "slides": "https://iclr.cc/virtual/2023/poster/11498", "video": "https://iclr.cc/virtual/2023/poster/11498", "author_site": "Muzammal Naseer, Ahmad Mahmood, Salman Khan, Fahad Khan", "tldr": "A new approach for optimizing temporal prompts through frozen image models to capture motion dynamics for better transferability", "abstract": "The transferability of adversarial perturbations between image models has been extensively studied. In this case, an attack is generated from a known surrogate \\eg, the ImageNet trained model, and transferred to change the decision of an unknown (black-box) model trained on an image dataset. However, attacks generated from image models do not capture the dynamic nature of a moving object or a changing scene due to a lack of temporal cues within image models. This leads to reduced transferability of adversarial attacks from representation-enriched \\emph{image} models such as Supervised Vision Transformers (ViTs), Self-supervised ViTs (\\eg, DINO), and Vision-language models (\\eg, CLIP) to black-box \\emph{video} models. In this work, we induce dynamic cues within the image models without sacrificing their original performance on images. To this end, we optimize \\emph{temporal prompts} through frozen image models to capture motion dynamics. Our temporal prompts are the result of a learnable transformation that allows optimizing for temporal gradients during an adversarial attack to fool the motion dynamics. Specifically, we introduce spatial (image) and temporal (video) cues within the same source model through task-specific prompts. Attacking such prompts maximizes the adversarial transferability from image-to-video and image-to-image models using the attacks designed for image models. As an example, an iterative attack launched from image model Deit-B with temporal prompts reduces generalization (top1 \\% accuracy) of a video model by 35\\% on Kinetics-400. Our approach also improves adversarial transferability to image models by 9\\% on ImageNet w.r.t the current state-of-the-art approach. Our attack results indicate that the attacker does not need specialized architectures, \\eg, divided space-time attention, 3D convolutions, or multi-view convolution networks for different data modalities. Image models are effective surrogates to optimize an adversarial attack to fool black-box models in a changing environment over time. Code is available at \\url{https://bit.ly/3Xd9gRQ}", "keywords": "Adversarial attacks;Transferability;Prompt learning;Dynamic video modeling", "primary_area": "", "supplementary_material": "", "author": "Muzammal Naseer;Ahmad Mahmood;Salman Khan;Fahad Khan", "authorids": "~Muzammal_Naseer1;ah9nov@gmail.com;~Salman_Khan4;~Fahad_Khan1", "gender": "M;;M;M", "homepage": "https://muzammal-naseer.com/;;https://salman-h-khan.github.io/;https://sites.google.com/view/fahadkhans/home", "dblp": ";;32/11535-1;05/8618", "google_scholar": "https://scholar.google.ch/citations?user=tM9xKA8AAAAJ;;https://scholar.google.es/citations?user=M59O9lkAAAAJ;zvaeYnUAAAAJ", "orcid": "0000-0001-7663-7161;;0000-0002-9502-1749;", "linkedin": "muzammalnaseer/;;;", "or_profile": "~Muzammal_Naseer1;ah9nov@gmail.com;~Salman_Khan4;~Fahad_Khan1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;;Australian National University;Link\u00f6ping University", "aff_domain": "mbzuai.ac.ae;;anu.edu.au;liu.se", "position": "Researcher;;Lecturer;Associate Professor", "bibtex": "@inproceedings{\nnaseer2023boosting,\ntitle={Boosting Adversarial Transferability using Dynamic Cues},\nauthor={Muzammal Naseer and Ahmad Mahmood and Salman Khan and Fahad Khan},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=SZynfVLGd5}\n}", "github": "", "project": "", "reviewers": "iNv7;QXGV;TDqC;yaNd", "pdf_size": 34143220, "recommendation": "5;6;6;6", "confidence": "3;3;3;3", "correctness": "2;3;2;3", "technical_novelty": "3;2;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "70;88;106;203", "wc_strength_and_weaknesses": "542;274;415;352", "wc_clarity_quality_novelty_and_reproducibility": "36;104;45;38", "wc_summary_review": "44;34;93;45", "wc_review": "692;500;659;638", "wc_reply_reviewers": "0;0;683;0", "wc_reply_authors": "2560;499;3323;1690", "reply_reviewers": "0;0;2;0", "reply_authors": "5;1;7;3", "recommendation_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 116.75, 51.39734915343397 ], "wc_strength_and_weaknesses_avg": [ 395.75, 98.10294338092002 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.75, 28.05686190577984 ], "wc_summary_review_avg": [ 54.0, 22.9237867726953 ], "wc_review_avg": [ 622.25, 73.15864610556977 ], "wc_reply_reviewers_avg": [ 170.75, 295.7476753923858 ], "wc_reply_authors_avg": [ 2018.0, 1050.206408283629 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 4.0, 2.23606797749979 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14599328654750065701&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=SZynfVLGd5", "email": "mbzuai.ac.ae;;anu.edu.au;liu.se", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;Australian National University;Link\u00f6ping University", "aff_unique_dep": ";;", "aff_unique_url": "https://mbzuai.ac.ae;https://www.anu.edu.au;https://www.liu.se", "aff_unique_abbr": "MBZUAI;ANU;LiU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United Arab Emirates;Australia;Sweden" }, { "title": "Language Models Can Teach Themselves to Program Better", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11352", "id": "SaRj2ka1XZ3", "poster": "/media/PosterPDFs/ICLR%202023/11352.png?t=1680835639.9056969", "openreview": "https://openreview.net/forum?id=SaRj2ka1XZ3", "slides": "https://iclr.cc/virtual/2023/poster/11352", "video": "https://iclr.cc/virtual/2023/poster/11352", "author_site": "Patrick Haluptzok, Matthew Bowers, Adam Tauman Kalai", "tldr": "Language Models can be used to generate Programming Puzzles and Solutions, which can be filtered for correctness and used to finetune the LLM to improve its performance.", "abstract": "Recent Language Models (LMs) achieve breakthrough performance in code generation when trained on human-authored problems, even solving some competitive-programming problems. Self-play has proven useful in games such as Go, and thus it is natural to ask whether LMs can generate their own instructive programming problems to improve their performance. We show that it is possible for an LM to synthesize programming problems and solutions, which are filtered for correctness by a Python interpreter. The LM\u2019s performance is then seen to improve when it is fine-tuned on its own synthetic problems and verified solutions; thus the model \u201cimproves itself\u201d using the Python interpreter. Problems are specified formally as programming puzzles [Schuster et al. , 2021], a code-based problem format where solutions can easily be verified for correctness by execution. In experiments on publicly-available LMs, test accuracy more than doubles. This work demonstrates the potential for code LMs, with an interpreter, to generate instructive problems and improve their own performance.", "keywords": "deep learning;natural language processing;program synthesis;large language models", "primary_area": "", "supplementary_material": "/attachment/8e4e6188cb3909bf3259606f5391c184bd0f7f33.zip", "author": "Patrick Haluptzok;Matthew Bowers;Adam Tauman Kalai", "authorids": "~Patrick_Haluptzok1;~Matthew_Bowers1;~Adam_Tauman_Kalai1", "gender": "M;F;", "homepage": "https://github.com/haluptzok;https://mlb2251.github.io/;", "dblp": ";282/1446;", "google_scholar": ";ghdbIsoAAAAJ;", "orcid": ";0000-0001-8450-7033;", "linkedin": "patrick-haluptzok/;;", "or_profile": "~Patrick_Haluptzok1;~Matthew_Bowers1;~Adam_Tauman_Kalai1", "aff": "Microsoft Research;Massachusetts Institute of Technology;", "aff_domain": "research.microsoft.com;mit.edu;", "position": "Researcher;PhD student;", "bibtex": "@inproceedings{\nhaluptzok2023language,\ntitle={Language Models Can Teach Themselves to Program Better},\nauthor={Patrick Haluptzok and Matthew Bowers and Adam Tauman Kalai},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=SaRj2ka1XZ3}\n}", "github": "", "project": "", "reviewers": "t8tu;bB2P;qoTN;cLPb", "pdf_size": 4623945, "recommendation": "5;6;6;8", "confidence": "3;5;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "3;3;2;3", "wc_summary_paper": "112;27;60;37", "wc_strength_and_weaknesses": "177;280;242;104", "wc_clarity_quality_novelty_and_reproducibility": "75;67;87;163", "wc_summary_review": "70;43;40;63", "wc_review": "434;417;429;367", "wc_reply_reviewers": "0;0;0;8", "wc_reply_authors": "806;1085;751;278", "reply_reviewers": "0;0;0;1", "reply_authors": "1;2;1;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 59.0, 32.85574531189332 ], "wc_strength_and_weaknesses_avg": [ 200.75, 66.90805257964097 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 98.0, 38.19685850956856 ], "wc_summary_review_avg": [ 54.0, 12.786711852544421 ], "wc_review_avg": [ 411.75, 26.564779313971346 ], "wc_reply_reviewers_avg": [ 2.0, 3.4641016151377544 ], "wc_reply_authors_avg": [ 730.0, 290.0629242078346 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.3244428422615251, "corr_recommendation_correctness": 0.0, "gs_citation": 79, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16547265852973293006&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=SaRj2ka1XZ3", "email": "research.microsoft.com;mit.edu;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Microsoft;Massachusetts Institute of Technology", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://web.mit.edu", "aff_unique_abbr": "MSR;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "SbEvg8qlasl", "title": "TEXTCRAFT: ZERO-SHOT GENERATION OF HIGH FIDELITY AND DIVERSE SHAPES FROM TEXT", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Language is one of the primary means by which we describe the 3D world around us. While rapid progress has been made in text-to-2D-image synthesis, similar progress in text-to-3D-shape synthesis has been hindered by the lack of paired (text, shape) data. Moreover, extant methods for text-to-shape generation have limited shape diversity and fidelity. We introduce TextCraft, a method to address these limitations by producing high-fidelity and diverse 3D shapes without the need for (text, shape) pairs for training. TextCraft achieves this by using CLIP and using a multi-resolution approach by first generating in a low-dimensional latent space and then upscaling to a higher resolution, improving the fidelity of the generated shape. To improve shape diversity, we use a discrete latent space which is modelled using a bidirectional transformer conditioned on the interchangeable image-text embedding space induced by CLIP. Moreover, we present a novel variant of classifier-free guidance, which further improves the accuracy diversity trade-off. Finally, we perform extensive experiments that demonstrate that TextCraft outperforms state-of-the-art baselines.", "keywords": "Text to shape generation;3D shape generation;Zero-Shot Method;CLIP;Vision-Text models", "primary_area": "", "supplementary_material": "/attachment/94a2b96fa1f6d3f102a8a6eeb7bad69cd8adbe92.zip", "author": "Aditya Sanghi;Rao Fu;Vivian Liu;Karl Willis;Hooman Shayani;Amir Hosein Khasahmadi;Srinath Sridhar;Daniel Ritchie", "authorids": "~Aditya_Sanghi1;~Rao_Fu1;~Vivian_Liu1;~Karl_Willis1;~Hooman_Shayani1;~Amir_Hosein_Khasahmadi1;~Srinath_Sridhar2;~Daniel_Ritchie1", "gender": "M;F;;;M;M;;M", "homepage": "https://github.com/sanghiad;https://freddierao.github.io/;https://www.vivian-liu.com;;;http://dritchie.github.io;;https://srinathsridhar.com", "dblp": ";0000-0002-0115-0831;;82/121;62/5219;17/7188.html;259/1508;78/1463-2", "google_scholar": "q0-11e25FxIC;;;yMoEQSMAAAAJ;https://scholar.google.co.uk/citations?hl=en;0RiypNsAAAAJ;cFpYRhkAAAAJ;qIvZT74AAAAJ", "orcid": ";;;;;;;", "linkedin": ";;;;;;amir-khas/;srinathsridhar", "or_profile": "~Aditya_Sanghi1;~Rao_Fu1;~Vivian_Liu1;~Karl_Willis1;~Hooman_Shayani1;~Daniel_Ritchie1;~Amir_Hosein_Khasahmadi2;~Srinath_Sridhar1", "aff": "Autodesk;Brown University;Columbia University;Autodesk;Autodesk AI Lab;Brown University;Toronto University;Amazon", "aff_domain": "autodesk.com;brown.edu;columbia.edu;autodesk.com;autodesk.com;brown.edu;utoronto.ca;amazon.com", "position": "Researcher;PhD student;PhD student;Senior Research Manager;Principal Researcher;Assistant Professor;MS student;Visiting Academic", "bibtex": "@misc{\nsanghi2023textcraft,\ntitle={{TEXTCRAFT}: {ZERO}-{SHOT} {GENERATION} {OF} {HIGH} {FIDELITY} {AND} {DIVERSE} {SHAPES} {FROM} {TEXT}},\nauthor={Aditya Sanghi and Rao Fu and Vivian Liu and Karl Willis and Hooman Shayani and Amir Hosein Khasahmadi and Srinath Sridhar and Daniel Ritchie},\nyear={2023},\nurl={https://openreview.net/forum?id=SbEvg8qlasl}\n}", "github": "", "project": "", "reviewers": "26ne;Hf7V;WCYg;9zR5", "site": "https://openreview.net/forum?id=SbEvg8qlasl", "pdf_size": 22967734, "recommendation": "3;5;5;6", "confidence": "4;4;3;4", "correctness": "4;3;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "84;73;61;45", "wc_strength_and_weaknesses": "326;537;254;88", "wc_clarity_quality_novelty_and_reproducibility": "29;105;27;20", "wc_summary_review": "82;62;58;14", "wc_review": "521;777;400;167", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 65.75, 14.48059045757458 ], "wc_strength_and_weaknesses_avg": [ 301.25, 161.16664512237017 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 45.25, 34.65815199920504 ], "wc_summary_review_avg": [ 54.0, 24.819347291981714 ], "wc_review_avg": [ 466.25, 219.94473737736942 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": -0.13245323570650439, "corr_recommendation_correctness": -0.9271726499455306, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=395980715571089109&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;0;1;3;4", "aff_unique_norm": "Autodesk;Brown University;Columbia University;University of Toronto;Amazon", "aff_unique_dep": ";;;;Amazon.com, Inc.", "aff_unique_url": "https://www.autodesk.com;https://www.brown.edu;https://www.columbia.edu;https://www.utoronto.ca;https://www.amazon.com", "aff_unique_abbr": "Autodesk;Brown;Columbia;U of T;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1;0", "aff_country_unique": "United States;Canada" }, { "title": "Bag of Tricks for Unsupervised Text-to-Speech", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11869", "id": "SbR9mpTuBn", "poster": "", "openreview": "https://openreview.net/forum?id=SbR9mpTuBn", "slides": "https://iclr.cc/virtual/2023/poster/11869", "video": "https://iclr.cc/virtual/2023/poster/11869", "author_site": "Yi Ren, Chen Zhang, shuicheng YAN", "tldr": "We introduce a bag of tricks to enable effective unsupervised TTS using low-quality and multi-speaker unpaired data.", "abstract": "Unsupervised text-to-speech (TTS) aims to train TTS models for a specific language without any paired speech-text training data in that language. Existing methods either use speech and corresponding pseudo text generated by an unsupervised automatic speech recognition (ASR) model as training data, or employ the back-translation technique. Though effective, they suffer from low robustness to low-quality data and heavy dependence on the lexicon of a language that is sometimes unavailable, leading to difficulty in convergence, especially in low-resource language scenarios. In this work, we introduce a bag of tricks to enable effective unsupervised TTS. Specifically, 1) we carefully design a voice conversion model to normalize the variable and noisy information in the low-quality speech data while preserving the pronunciation information; 2) we employ the non-autoregressive TTS model to overcome the robustness issue; and 3) we explore several tricks applied in back-translation, including curriculum learning, length augmentation and auxiliary supervised loss to stabilize the back-translation and improve its effectiveness. Through experiments, it has been demonstrated that our method achieves better intelligibility and audio quality than all previous methods, and that these tricks are very essential to the performance gain.", "keywords": "speech synthesis;unsupervised learning", "primary_area": "", "supplementary_material": "/attachment/0c95d94739afc2bad89ccac8257a6e4e41f6945c.zip", "author": "Yi Ren;Chen Zhang;Shuicheng YAN", "authorids": "~Yi_Ren2;~Chen_Zhang3;~Shuicheng_YAN3", "gender": "M;F;M", "homepage": "https://rayeren.github.io/;https://actuy.github.io/;https://yanshuicheng.ai/", "dblp": "75/6568-6;94/4084-20;y/ShuichengYan", "google_scholar": "4FA6C0AAAAAJ;eBBFeVcAAAAJ;https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yi_Ren2;~Chen_Zhang3;~Shuicheng_YAN3", "aff": "ByteDance;Zhejiang University;sea Group", "aff_domain": "bytedance.com;zju.edu.cn;sea.com", "position": "Researcher;MS student;Researcher", "bibtex": "@inproceedings{\nren2023bag,\ntitle={Bag of Tricks for Unsupervised Text-to-Speech},\nauthor={Yi Ren and Chen Zhang and Shuicheng YAN},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=SbR9mpTuBn}\n}", "github": "", "project": "", "reviewers": "oNy6;sG8y;G8SV", "pdf_size": 810234, "recommendation": "6;8;8", "confidence": "4;3;4", "correctness": "3;3;4", "technical_novelty": "2;3;3", "empirical_novelty": "3;3;4", "wc_summary_paper": "76;54;90", "wc_strength_and_weaknesses": "239;115;58", "wc_clarity_quality_novelty_and_reproducibility": "84;13;206", "wc_summary_review": "83;25;38", "wc_review": "482;207;392", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "449;290;385", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 73.33333333333333, 14.817407180595247 ], "wc_strength_and_weaknesses_avg": [ 137.33333333333334, 75.56160106532653 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 101.0, 79.7036176510619 ], "wc_summary_review_avg": [ 48.666666666666664, 24.850665092821068 ], "wc_review_avg": [ 360.3333333333333, 114.47949840716265 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 374.6666666666667, 65.32142748661337 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.49999999999999983, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16760848509932487165&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=SbR9mpTuBn", "email": "bytedance.com;zju.edu.cn;sea.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "ByteDance;Zhejiang University;Sea Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bytedance.com;https://www.zju.edu.cn;", "aff_unique_abbr": "ByteDance;ZJU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China;" }, { "id": "Sc3Ylriwp4", "title": "Learning Dynamical Characteristics with Neural Operators for Data Assimilation", "track": "main", "status": "Reject", "tldr": "A new deep learning framework is proposed for data assimilation issues.", "abstract": "Data assimilation refers to a group of algorithms that combines numerical models with observations to obtain an optimal estimation of the system's states. In areas like earth science, numerical models are usually formulated by differential equations, also known as the prior dynamics. It is a great challenge for neural networks to properly exploit the dynamical characteristics for data assimilation, because first, it is difficult to represent complicated dynamical characteristics in neural networks, and second, the dynamics are likely to be biased. The state-of-the-art neural networks borrow from the traditional method to introduce dynamical characteristics by optimizing the 4D-Var objective function in which the dynamics are inherently quantified, but the iterative optimization process leads to high computational cost. In this paper, we develop a novel deep learning framework with neural operators for data assimilation. The key novelty of our proposed approach is that we design a so-called flow operator through self-supervised learning to explicitly learn dynamical characteristics for reconstructed states. Numerical experiments on the Lorenz-63 and Lorenz-96 systems, which are the standard benchmarks for data assimilation performance evaluation, show that the proposed method is at least three times faster than state-of-the-art neural networks, and reduces the dynamic loss by two orders of magnitude. It is also demonstrated that our method is well-adapted to biases in the prior dynamics. ", "keywords": "AI for science;data assimilation;generative models", "primary_area": "", "supplementary_material": "", "author": "Yi Xiao;Wei Xue", "authorids": "~Yi_Xiao4;~Wei_Xue1", "gender": "Non-Binary;M", "homepage": "https://github.com/xiaoyi-jason;http://www.cs.tsinghua.edu.cn/publish/csen/4623/2010/20101224235122610366982/20101224235122610366982_.html", "dblp": ";", "google_scholar": ";https://scholar.google.com.tw/citations?user=iaziYXMAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Yi_Xiao4;~Wei_Xue1", "aff": "Tsinghua University;", "aff_domain": "tsinghua.edu.cn;", "position": "PhD student;", "bibtex": "@misc{\nxiao2023learning,\ntitle={Learning Dynamical Characteristics with Neural Operators for Data Assimilation},\nauthor={Yi Xiao and Wei Xue},\nyear={2023},\nurl={https://openreview.net/forum?id=Sc3Ylriwp4}\n}", "github": "", "project": "", "reviewers": "6BrX;wimX;oVQ6;BfUY;1MTc", "site": "https://openreview.net/forum?id=Sc3Ylriwp4", "pdf_size": 930621, "recommendation": "3;6;6;8;8", "confidence": "4;3;3;4;4", "correctness": "2;3;3;4;4", "technical_novelty": "3;3;3;3;3", "empirical_novelty": "2;2;2;3;3", "wc_summary_paper": "1047;15;103;98;61", "wc_strength_and_weaknesses": "2;327;155;450;209", "wc_clarity_quality_novelty_and_reproducibility": "2;114;434;51;343", "wc_summary_review": "2;30;37;67;90", "wc_review": "1053;486;729;666;703", "wc_reply_reviewers": "0;28;25;6;30", "wc_reply_authors": "2548;1646;2465;1770;2231", "reply_reviewers": "0;1;1;1;1", "reply_authors": "4;3;5;3;4", "recommendation_avg": [ 6.2, 1.8330302779823362 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "correctness_avg": [ 3.2, 0.7483314773547882 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 264.8, 392.3704372146301 ], "wc_strength_and_weaknesses_avg": [ 228.6, 152.21642486932873 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 188.8, 169.33918625055455 ], "wc_summary_review_avg": [ 45.2, 30.48540634467581 ], "wc_review_avg": [ 727.4, 183.6612098402926 ], "wc_reply_reviewers_avg": [ 17.8, 12.33531515608742 ], "wc_reply_authors_avg": [ 2132.0, 363.5893287762995 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 3.8, 0.7483314773547882 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.08908708063747477, "corr_recommendation_correctness": 0.9914601339836673, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RwBdOcKC3EsJ:scholar.google.com/&scioq=Learning+Dynamical+Characteristics+with+Neural+Operators+for+Data+Assimilation&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "ScEfNWshH3B", "title": "Adaptive Weight Decay: On The Fly Weight Decay Tuning for Improving Robustness", "track": "main", "status": "Reject", "tldr": "We tune the hyper-parameter for weight decay during each iteration to stabilize training networks with smaller weight-norms which results in more robustness to adversarial examples and label noise, and less sensitivity to choices of learning rate.", "abstract": "We introduce adaptive weight decay, which automatically tunes the hyper-parameter for weight decay during each training iteration. For classification problems, we propose changing the value of the weight-decay hyper-parameter on the fly based on the strength of updates from the classification loss (i.e., gradient of cross-entropy), and the regularization loss (i.e., $\\ell_2$-norm of the weights). We show that this simple modification can result in large improvements in adversarial robustness \u2014 an area which suffers from robust overfitting \u2014 without requiring extra data. Specifically, our reformulation results in 20% relative robustness improvement for CIFAR-100, and 10% relative robustness improvement on CIFAR-10 comparing to traditional weight-decay. In addition, this method has other desirable properties, such as less sensitivity to learning rate, and smaller weight norms, which the latter contributes to robustness to overfitting to label noise, and pruning.", "keywords": "weight decay;regularization;robust overfitting;adversarial robustness;noisy label;adversarial;pruning", "primary_area": "", "supplementary_material": "", "author": "Amin Ghiasi;Ali Shafahi;Reza Ardekani", "authorids": "~Amin_Ghiasi1;~Ali_Shafahi1;~Reza_Ardekani1", "gender": "M;M;M", "homepage": "http://cs.umd.edu/~amin;;", "dblp": "239/8313;136/0235;", "google_scholar": "tNQWOxUAAAAJ;5Jnk00MAAAAJ;5OZTNTQAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Amin_Ghiasi1;~Ali_Shafahi1;~Reza_Ardekani1", "aff": "Apple;Apple;Apple", "aff_domain": "apple.com;apple.com;apple.com", "position": "Researcher;Researcher;Researcher", "bibtex": "@misc{\nghiasi2023adaptive,\ntitle={Adaptive Weight Decay: On The Fly Weight Decay Tuning for Improving Robustness},\nauthor={Amin Ghiasi and Ali Shafahi and Reza Ardekani},\nyear={2023},\nurl={https://openreview.net/forum?id=ScEfNWshH3B}\n}", "github": "", "project": "", "reviewers": "d8cT;N7iq;ksvx;4bPN", "site": "https://openreview.net/forum?id=ScEfNWshH3B", "pdf_size": 1750392, "recommendation": "3;5;5;6", "confidence": "3;4;3;5", "correctness": "3;2;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "61;66;154;75", "wc_strength_and_weaknesses": "187;262;438;401", "wc_clarity_quality_novelty_and_reproducibility": "67;48;7;21", "wc_summary_review": "19;33;30;25", "wc_review": "334;409;629;522", "wc_reply_reviewers": "0;0;153;20", "wc_reply_authors": "366;492;434;240", "reply_reviewers": "0;0;2;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 89.0, 37.8615900352851 ], "wc_strength_and_weaknesses_avg": [ 322.0, 101.8847387983107 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.75, 23.29565410114084 ], "wc_summary_review_avg": [ 26.75, 5.3091901453988255 ], "wc_review_avg": [ 473.5, 111.97432741481416 ], "wc_reply_reviewers_avg": [ 43.25, 63.888085743744114 ], "wc_reply_authors_avg": [ 383.0, 93.83496150156401 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.7608859102526822, "corr_recommendation_correctness": -0.13245323570650439, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17261730083595363334&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Apple", "aff_unique_dep": "Apple Inc.", "aff_unique_url": "https://www.apple.com", "aff_unique_abbr": "Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "SdBfRJE9SX-", "title": "What Does Vision Supervision Bring to Language Models? A Case Study of CLIP", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Vision-language~(V+L) pre-training has shown promising performance in cross-modal tasks such as image-text retrieval and image captioning. On the other hand, these models surprisingly perform worse than text-only models (e.g., BERT) on widely-used text-only understanding tasks. The conflicting results naturally raise a question: What does vision supervision bring to language models? \nIn this paper, we investigate this under-explored problem with one representative cross-modal model CLIP.\nWe compare the text encoder of CLIP and widely-used text-only models on a wide range of tasks. We design a suite of evaluation tasks across three perception aspects, including the linguistic world featuring syntactic knowledge~(e.g., dependency labeling), the visual world examining visual-related commonsense knowledge (e.g., color), and the embodied world featuring physical-related commonsense knowledge (e.g., mass). Experiments demonstrate that text-only models are not always better than CLIP on these perception tasks. \nAlthough the text encoder of CLIP falls far behind text-only models in linguistic-related tasks, CLIP achieves better zero-shot results in visual and embodied worlds with only $0.3\\%$ parameters compared to OPT-175B (one of the largest text-only models). This proves that CLIP can empower text encoders to learn rich visual and embodied knowledge through vision-text pre-training. Furthermore, qualitative studies show that CLIP pre-training yet restricts the text encoder from learning fine-grained semantics, like understanding ambiguous texts. These results shed light on future directions to improve V+L pre-training. ", "keywords": "Contrastive Language-Image Pre-training;Vision-and-Language;Knowledge Probing", "primary_area": "", "supplementary_material": "/attachment/8cabeac0000a94efaa2938c0a8ed58358424254f.zip", "author": "Lei Li;Jingjing Xu;Qingxiu Dong;Ce Zheng;Qi Liu;Lingpeng Kong;Xu Sun", "authorids": "~Lei_Li14;~Jingjing_Xu1;~Qingxiu_Dong1;~Ce_Zheng2;~Qi_Liu5;~Lingpeng_Kong1;~Xu_Sun1", "gender": "F;F;M;M;M;M;M", "homepage": ";https://dqxiu.github.io/;;http://leuchine.github.io/;https://ikekonglp.github.io/;https://xusun.org/;https://lilei-nlp.github.io", "dblp": "25/624;284/0673;99/6967;;144/7656;37/1971-1;13/7007-39", "google_scholar": ";ibcR7VkAAAAJ;r7qFs7UAAAAJ;Y-OeKMwAAAAJ;f1hBi5wAAAAJ;https://scholar.google.com/citations?hl=en;MeV4GGsAAAAJ", "orcid": ";;;0000-0003-4608-5778;;;0009-0008-6984-5104", "linkedin": ";qingxiu-dong-a3758a199/;;;;;", "or_profile": "~Jingjing_Xu1;~Qingxiu_Dong1;~Ce_Zheng2;~Qi_Liu5;~Lingpeng_Kong1;~Xu_Sun1;~Tobias_Lee1", "aff": ";Peking University;Peking University;University of Hong Kong;Department of Computer Science, The University of Hong Kong;Peking University;Peking University", "aff_domain": ";pku.edu.cn;pku.edu.cn;hku.hk;cs.hku.hk;pku.edu.cn;pku.edu.cn", "position": ";PhD student;MS student;Assistant Professor;Assistant Professor;Associate Professor;MS student", "bibtex": "@misc{\nli2023what,\ntitle={What Does Vision Supervision Bring to Language Models? A Case Study of {CLIP}},\nauthor={Lei Li and Jingjing Xu and Qingxiu Dong and Ce Zheng and Qi Liu and Lingpeng Kong and Xu Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=SdBfRJE9SX-}\n}", "github": "", "project": "", "reviewers": "NLEY;umLU;awxy;PpQ8", "site": "https://openreview.net/forum?id=SdBfRJE9SX-", "pdf_size": 2935771, "recommendation": "3;3;5;5", "confidence": "5;5;4;4", "correctness": "1;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "1;2;2;2", "wc_summary_paper": "16;99;50;64", "wc_strength_and_weaknesses": "176;777;194;266", "wc_clarity_quality_novelty_and_reproducibility": "159;134;37;2", "wc_summary_review": "19;91;62;47", "wc_review": "370;1101;343;379", "wc_reply_reviewers": "0;297;0;0", "wc_reply_authors": "137;508;391;307", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 57.25, 29.76050234791073 ], "wc_strength_and_weaknesses_avg": [ 353.25, 246.95887815585817 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 83.0, 65.29548223269356 ], "wc_summary_review_avg": [ 54.75, 26.00360551923521 ], "wc_review_avg": [ 548.25, 319.4052089431229 ], "wc_reply_reviewers_avg": [ 74.25, 128.60477246198914 ], "wc_reply_authors_avg": [ 335.75, 135.13951124671127 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=649697585852852244&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;1;0;0", "aff_unique_norm": "Peking University;University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.hku.hk", "aff_unique_abbr": "Peking U;HKU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "SdXv2C2-tnj", "title": "Density Sketches for Sampling and Estimation", "track": "main", "status": "Reject", "tldr": "online summary of data for density estimation and sampling new data.", "abstract": "There has been an exponential increase in the data generated worldwide. Insights into this data led by machine learning (ML) have given rise to exciting applications such as recommendation engines, conversational agents, and so on. Often, data for these applications is generated at a rate faster than ML pipelines can consume it. In this paper, we propose Density Sketches(DS) - a cheap and practical approach to reducing data redundancy in a streaming fashion. DS creates a succinct online summary of data distribution. While DS does not store the samples from the stream, we can sample unseen data on the fly from DS to use for downstream learning tasks. In this sense, DS can replace actual data in many machine learning pipelines analogous to generative models. Importantly, unlike generative models, which do not have statistical guarantees, the sampling distribution of DS asymptotically converges to underlying unknown density distribution.", "keywords": "density estimation;sampling;machine learning", "primary_area": "", "supplementary_material": "/attachment/1a57aa3781ecc43b8ef4779a1aa0809c7eed84ac.zip", "author": "Aditya Desai;Benjamin Coleman;Anshumali Shrivastava", "authorids": "~Aditya_Desai1;~Benjamin_Coleman1;~Anshumali_Shrivastava1", "gender": "M;M;M", "homepage": "https://randorithms.com/research;https://www.cs.rice.edu/~as143/;https://apd10.github.io/", "dblp": "217/2220;63/9828;18/8339", "google_scholar": "fInuVkEAAAAJ;https://scholar.google.com.tw/citations?user=SGT23RAAAAAJ;ymdbDZwAAAAJ", "orcid": ";;0009-0002-9111-9391", "linkedin": ";;aditya-desai-ai/", "or_profile": "~Benjamin_Coleman1;~Anshumali_Shrivastava1;~Adity_Desai1", "aff": "Google DeepMind;ThirdAI Corp.;Rice University", "aff_domain": "google.com;thirdai.com;rice.edu", "position": "Researcher;CEO;PhD student", "bibtex": "@misc{\ndesai2023density,\ntitle={Density Sketches for Sampling and Estimation},\nauthor={Aditya Desai and Benjamin Coleman and Anshumali Shrivastava},\nyear={2023},\nurl={https://openreview.net/forum?id=SdXv2C2-tnj}\n}", "github": "", "project": "", "reviewers": "VE18;Kdqf;jjJ4", "site": "https://openreview.net/forum?id=SdXv2C2-tnj", "pdf_size": 1035814, "recommendation": "5;5;6", "confidence": "2;2;4", "correctness": "3;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "0;2;3", "wc_summary_paper": "66;63;76", "wc_strength_and_weaknesses": "51;19;614", "wc_clarity_quality_novelty_and_reproducibility": "10;25;141", "wc_summary_review": "18;292;92", "wc_review": "145;399;923", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;296;587", "reply_reviewers": "0;0;0", "reply_authors": "0;1;2", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 1.6666666666666667, 1.247219128924647 ], "wc_summary_paper_avg": [ 68.33333333333333, 5.557777333511022 ], "wc_strength_and_weaknesses_avg": [ 228.0, 273.25568002635674 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 58.666666666666664, 58.53963519606942 ], "wc_summary_review_avg": [ 134.0, 115.73533024390895 ], "wc_review_avg": [ 489.0, 323.93003359779203 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 294.3333333333333, 239.6446443289638 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.816496580927726 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 1.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7754369595733192360&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;ThirdAI Corp.;Rice University", "aff_unique_dep": "Google DeepMind;;", "aff_unique_url": "https://deepmind.com;;https://www.rice.edu", "aff_unique_abbr": "DeepMind;;Rice", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "SeZ5ONageGl", "title": "Deep Duplex Learning for Weak Supervision", "track": "main", "status": "Withdraw", "tldr": "We propose a deep duplex learning method for general weakly-supervised learning.", "abstract": "Weak supervision widely exists in practice and shows various forms such as noisy labels, partial labels, or pseudo labels. As a weak supervisor might provide false training signals, most existing works focus on correcting the supervisor or ignoring certain constraints. While they tackle each type separately, we propose a deep duplex learning (DDL) method to deal with all kinds of weak supervision from a unified perspective of supervision utilization. We exploit both the supervision and counter-supervision signals for training and allow the network to implicitly and adaptively balance the two signals. We describe each image using a duplex representation composed of a superficial representation (SR) and a hypocritical representation (HR). We then impose the supervision signal and counter-supervision signal on SR and HR, respectively. The SR and HR collaborate to interact with the weak supervisor to adaptively confine the effect of false supervisions on the network. Our DDL sets new state-of-the-arts for noisy label learning, partial label learning, and semi-supervised learning on standard benchmarks.", "keywords": "Weakly supervised learning;learning with noisy labels;partial label learning;semi-supervised learning.", "primary_area": "", "supplementary_material": "/attachment/e68e63845ea674a22e7dc1a75b6a07d17709c687.zip", "author": "Wenzhao Zheng;Chengkun Wang;Jie Zhou;Jiwen Lu", "authorids": "~Wenzhao_Zheng1;~Chengkun_Wang1;~Jie_Zhou3;~Jiwen_Lu1", "gender": ";M;M;M", "homepage": "https://wzzheng.net;https://www.tsinghua.edu.cn/publish/auen/1713/2011/20110506105532098625469/20110506105532098625469_.html;http://ivg.au.tsinghua.edu.cn/Jiwen_Lu/;http://ivg.au.tsinghua.edu.cn/people.php", "dblp": "230/1277;00/5012-1;http://dblp.uni-trier.de/pers/hd/l/Lu:Jiwen;", "google_scholar": "LdK9scgAAAAJ;;TN8uDQoAAAAJ;", "orcid": ";;0000-0002-6121-5529;", "linkedin": ";;;", "or_profile": "~Wenzhao_Zheng1;~Jie_Zhou3;~Jiwen_Lu1;~Wang_Chengkun1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua university", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn", "position": "PhD student;Full Professor;Associate Professor;PhD student", "bibtex": "@misc{\nzheng2023deep,\ntitle={Deep Duplex Learning for Weak Supervision},\nauthor={Wenzhao Zheng and Chengkun Wang and Jie Zhou and Jiwen Lu},\nyear={2023},\nurl={https://openreview.net/forum?id=SeZ5ONageGl}\n}", "github": "", "project": "", "reviewers": "8GrF;Tmou;Ls4j;dvKo", "site": "https://openreview.net/forum?id=SeZ5ONageGl", "pdf_size": 6566036, "recommendation": "1;3;3;5", "confidence": "5;2;4;4", "correctness": "2;2;2;3", "technical_novelty": "1;2;2;2", "empirical_novelty": "1;2;1;3", "wc_summary_paper": "62;24;31;93", "wc_strength_and_weaknesses": "217;156;72;93", "wc_clarity_quality_novelty_and_reproducibility": "47;87;79;52", "wc_summary_review": "57;69;27;13", "wc_review": "383;336;209;251", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 52.5, 27.408940147331492 ], "wc_strength_and_weaknesses_avg": [ 134.5, 56.7824796922431 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 66.25, 17.07886120325357 ], "wc_summary_review_avg": [ 41.5, 22.46664193866097 ], "wc_review_avg": [ 294.75, 68.47764233675105 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.3244428422615251, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IF55yqO8ykcJ:scholar.google.com/&scioq=Deep+Duplex+Learning+for+Weak+Supervision&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "Sh97TNO5YY_", "title": "Biases in Evaluation of Molecular Optimization Methods and Bias Reduction Strategies", "track": "main", "status": "Reject", "tldr": "This paper analyzes biases in the evaluation of molecular optimization methods, and methods to alleviate them.", "abstract": "We are interested in in silico evaluation methodology for molecular optimization methods. Given a sample of molecules and their properties of our interest, we wish not only to train a generator of molecules that can find those optimized with respect to a target property but also to evaluate its performance accurately. A common practice is to train a predictor of the target property on the sample and use it for both training and evaluating the generator. We theoretically investigate this evaluation methodology and show that it potentially suffers from two biases; one is due to misspecification of the predictor and the other to reusing the same sample for training and evaluation. We discuss bias reduction methods for each of the biases, and empirically investigate their effectiveness.\n", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/8eeed343cb6979ddcd24da17048ecf5fefbf0301.zip", "author": "Hiroshi Kajino;Kohei Miyaguchi;Takayuki Osogami", "authorids": "~Hiroshi_Kajino1;~Kohei_Miyaguchi1;~Takayuki_Osogami1", "gender": "M;M;M", "homepage": "https://sites.google.com/site/hiroshikajino1989/;https://koheimiya.github.io/about/;https://sites.google.com/site/takayukiosogami/", "dblp": "117/4868;172/7749;95/5631", "google_scholar": "https://scholar.google.co.jp/citations?user=51_14FYAAAAJ;p78Mw3QAAAAJ;wtOZ8wwAAAAJ", "orcid": ";;", "linkedin": ";;takayuki-osogami-1151853/?ppe=1", "or_profile": "~Hiroshi_Kajino1;~Kohei_Miyaguchi1;~Takayuki_Osogami1", "aff": "International Business Machines;International Business Machines;International Business Machines", "aff_domain": "ibm.com;ibm.com;ibm.com", "position": "Research Staff Member;Researcher;Principal Researcher", "bibtex": "@misc{\nkajino2023biases,\ntitle={Biases in Evaluation of Molecular Optimization Methods and Bias Reduction Strategies},\nauthor={Hiroshi Kajino and Kohei Miyaguchi and Takayuki Osogami},\nyear={2023},\nurl={https://openreview.net/forum?id=Sh97TNO5YY_}\n}", "github": "", "project": "", "reviewers": "rwC3;zS58;JgZe;EYRk", "site": "https://openreview.net/forum?id=Sh97TNO5YY_", "pdf_size": 342013, "recommendation": "3;5;6;8", "confidence": "4;3;4;2", "correctness": "3;3;3;3", "technical_novelty": "2;3;4;3", "empirical_novelty": "0;2;3;3", "wc_summary_paper": "89;125;123;117", "wc_strength_and_weaknesses": "622;631;206;138", "wc_clarity_quality_novelty_and_reproducibility": "45;205;24;502", "wc_summary_review": "141;72;78;88", "wc_review": "897;1033;431;845", "wc_reply_reviewers": "2163;1447;90;0", "wc_reply_authors": "2661;2583;251;423", "reply_reviewers": "5;5;1;0", "reply_authors": "7;7;2;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 113.5, 14.44818327679989 ], "wc_strength_and_weaknesses_avg": [ 399.25, 228.54034107789374 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 194.0, 191.10599153349432 ], "wc_summary_review_avg": [ 94.75, 27.307279249313726 ], "wc_review_avg": [ 801.5, 224.65250944514287 ], "wc_reply_reviewers_avg": [ 925.0, 916.2393246308521 ], "wc_reply_authors_avg": [ 1479.5, 1144.4495401720428 ], "reply_reviewers_avg": [ 2.75, 2.277608394786075 ], "reply_authors_avg": [ 4.25, 2.7726341266023544 ], "replies_avg": [ 40, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.7526178090063818, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11080721429638266156&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "International Business Machines Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.ibm.com", "aff_unique_abbr": "IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Si_XWk8umO", "title": "Towards Large Scale Transfer Learning for Differentially Private Image Classification", "track": "main", "status": "Withdraw", "tldr": "We perform comprehensive exploration of Differentially Private training on ImageNet. Combined with large scale transfer learning and a few insights, we obtain state of the art private results with minimal computational overhead.", "abstract": "Differentially Private Stochastic Gradient Descent (DP-SGD) has emerged as a popular private training algorithm. Unfortunately, the computational cost of training large-scale models with DP-SGD is substantially higher than non-private training. This is further exacerbated by the fact that increasing the number of parameters leads to larger degradation in utility with DP. In this work, we zoom in on the ImageNet dataset and demonstrate that, similar to the non-private case, pre-training over-parameterized models on a large public dataset can lead to substantial gains when the models are finetuned privately. Moreover, by systematically comparing private and non-private models across a range of large batch sizes, we find that similar to the non-private setting, the choice of optimizer can further improve performance substantially with DP. By using the LAMB optimizer with DP-SGD we saw improvement of up to 20$\\%$ points (absolute). We also show that finetuning just the last layer for a \\emph{single step} in the full batch setting, combined with extremely small-scale (near-zero) initialization leads to both SOTA results of 81.7 $\\%$ under a wide privacy budget range of $\\epsilon \\in [4, 10]$ and $\\delta$ = $10^{-6}$ while minimizing the computational overhead substantially. Finally, we present additional results on CIFAR-10 and CIFAR-100, surpassing previous state of the art by leveraging transfer learning with our recommendations.", "keywords": "Differential Privacy;Understanding Differential Privacy;Image Classification;Deep Learning", "primary_area": "", "supplementary_material": "", "author": "Harsh Mehta;Abhradeep Guha Thakurta;Alexey Kurakin;Ashok Cutkosky", "authorids": "~Harsh_Mehta1;~Abhradeep_Guha_Thakurta1;~Alexey_Kurakin1;~Ashok_Cutkosky1", "gender": "M;M;M;", "homepage": ";https://athakurta.squarespace.com/;http://kurakin.me;http://www.cs.stanford.edu/~ashokc", "dblp": "122/1475;31/8315;56/9834;191/6725", "google_scholar": "murJPNoAAAAJ;1rV69hMAAAAJ;nCh4qyMAAAAJ;h4AbGp0AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Harsh_Mehta1;~Abhradeep_Guha_Thakurta1;~Alexey_Kurakin1;~Ashok_Cutkosky1", "aff": "Google Research;Google;Research, Google;Boston University", "aff_domain": "google.com;google.com;research.google.com;bu.edu", "position": "Software Engineer;Senior Research Scientist;Research Software Engineer;Assistant Professor", "bibtex": "@misc{\nmehta2023towards,\ntitle={Towards Large Scale Transfer Learning for Differentially Private Image Classification},\nauthor={Harsh Mehta and Abhradeep Guha Thakurta and Alexey Kurakin and Ashok Cutkosky},\nyear={2023},\nurl={https://openreview.net/forum?id=Si_XWk8umO}\n}", "github": "", "project": "", "reviewers": "niny;AD52;VC7w;rLq2", "site": "https://openreview.net/forum?id=Si_XWk8umO", "pdf_size": 2312080, "recommendation": "3;3;5;6", "confidence": "3;4;3;4", "correctness": "3;3;4;4", "technical_novelty": "1;3;2;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "52;43;40;97", "wc_strength_and_weaknesses": "199;295;44;149", "wc_clarity_quality_novelty_and_reproducibility": "26;1;7;79", "wc_summary_review": "31;27;38;82", "wc_review": "308;366;129;407", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 58.0, 22.94558781116753 ], "wc_strength_and_weaknesses_avg": [ 171.75, 90.51346584901056 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.25, 30.719497066195597 ], "wc_summary_review_avg": [ 44.5, 22.005681084665387 ], "wc_review_avg": [ 302.5, 106.16614337913947 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.19245008972987526, "corr_recommendation_correctness": 0.9622504486493761, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14936110649348045547&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Google;Boston University", "aff_unique_dep": "Google Research;", "aff_unique_url": "https://research.google;https://www.bu.edu", "aff_unique_abbr": "Google Research;BU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Siln8xpTMrZ", "title": "DADAO: Decoupled Accelerated Decentralized Asynchronous Optimization", "track": "main", "status": "Reject", "tldr": "We introduce a novel decentralized asynchronous accelerated stochastic first order algorithm to minimize a sum of smooth and strongly convex functions over a time-varying connectivity network.", "abstract": "DADAO is a novel decentralized asynchronous stochastic first order algorithm to minimize a sum of $L$-smooth and $\\mu$-strongly convex functions distributed over a time-varying connectivity network of size $n$. We model the local gradient updates and gossip communication procedures with separate independent Poisson Point Processes, decoupling the computation and communication steps in addition to making the whole approach completely asynchronous. Our method employs primal gradients and does not use a multi-consensus inner loop nor other ad-hoc mechanisms such as Error Feedback, Gradient Tracking, or a Proximal operator. By relating the inverse of the smallest positive eigenvalue $\\chi^*_1$ and the effective resistance $\\chi_2^*$ of our graph to a necessary minimal communication rate between nodes of the network, we show that our algorithm requires $\\mathcal{O}(n\\sqrt{\\frac{L}{\\mu}}\\log \\epsilon)$ local gradients and only $\\mathcal{O}(n\\sqrt{\\chi_1^*\\chi_2^*}\\sqrt{\\frac{L}{\\mu}}\\log \\epsilon)$ communications to reach a precision $\\epsilon$. If SGD with uniform noise $\\sigma^2$ is used, we reach a precision $\\epsilon$ with same speed, up to a bias term in $\\mathcal{O}(\\frac{\\sigma^2}{\\sqrt{\\mu L}})$. This improves upon the bounds obtained with current state-of-the-art approaches, our simulations validating the strength of our relatively unconstrained method.", "keywords": "Decentralized Asynchronous Optimization;Convex Optimization;Time-Varying Networks", "primary_area": "", "supplementary_material": "/attachment/352fd82ea3edfbbdb98c33b44723997b12810692.zip", "author": "Adel Nabli;Edouard Oyallon", "authorids": "~Adel_Nabli1;~Edouard_Oyallon1", "gender": "M;", "homepage": ";", "dblp": "269/9664.html;", "google_scholar": "bvNfLmMAAAAJ;", "orcid": "0000-0003-3180-5445;", "linkedin": ";", "or_profile": "~Adel_Nabli1;~Edouard_Oyallon1", "aff": "Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;", "aff_domain": "mila.umontreal.ca;", "position": "PhD student;", "bibtex": "@misc{\nnabli2023dadao,\ntitle={{DADAO}: Decoupled Accelerated Decentralized Asynchronous Optimization},\nauthor={Adel Nabli and Edouard Oyallon},\nyear={2023},\nurl={https://openreview.net/forum?id=Siln8xpTMrZ}\n}", "github": "", "project": "", "reviewers": "c2yc;R4MY;gkNG;L3mL", "site": "https://openreview.net/forum?id=Siln8xpTMrZ", "pdf_size": 596633, "recommendation": "3;5;5;5", "confidence": "5;3;3;3", "correctness": "2;3;4;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;0;2;0", "wc_summary_paper": "39;86;42;91", "wc_strength_and_weaknesses": "169;74;489;141", "wc_clarity_quality_novelty_and_reproducibility": "8;7;24;58", "wc_summary_review": "39;39;53;47", "wc_review": "255;206;608;337", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "522;293;1116;644", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.0, 1.0 ], "wc_summary_paper_avg": [ 64.5, 24.088378940891808 ], "wc_strength_and_weaknesses_avg": [ 218.25, 160.0833767135114 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 24.25, 20.620075169601105 ], "wc_summary_review_avg": [ 44.5, 5.894913061275798 ], "wc_review_avg": [ 351.5, 155.31017352382295 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 643.75, 300.36176104823994 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -1.0, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1093577798307718670&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 33, "aff_unique_index": "0", "aff_unique_norm": "University of Montreal", "aff_unique_dep": "Montreal Institute for Learning Algorithms", "aff_unique_url": "https://www.mila.quebec", "aff_unique_abbr": "MILA", "aff_campus_unique_index": "0", "aff_campus_unique": "Montreal", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "Replay Memory as An Empirical MDP: Combining Conservative Estimation with Experience Replay", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10881", "id": "SjzFVSJUt8S", "poster": "/media/PosterPDFs/ICLR%202023/10881.png?t=1681612586.2265246", "openreview": "https://openreview.net/forum?id=SjzFVSJUt8S", "slides": "https://iclr.cc/virtual/2023/poster/10881", "video": "https://iclr.cc/virtual/2023/poster/10881", "author_site": "Hongming Zhang, Chenjun Xiao, Han Wang, Jun Jin, bo xu, Martin Mueller", "tldr": "", "abstract": "Experience replay, which stores transitions in a replay memory for repeated use, plays an important role of improving sample efficiency in reinforcement learning. Existing techniques such as reweighted sampling, episodic learning and reverse sweep update further process the information in the replay memory to make experience replay more efficient. In this work, we further exploit the information in the replay memory by treating it as an empirical \\emph{Replay Memory MDP (RM-MDP)}. By solving it with dynamic programming, we learn a conservative value estimate that \\emph{only} considers transitions observed in the replay memory. Both value and policy regularizers based on this conservative estimate are developed and integrated with model-free learning algorithms. We design the metric \\textit{memory density} to measure the quality of RM-MDP. Our empirical studies quantitatively find a strong correlation between performance improvement and memory density. Our method combines \\emph{Conservative Estimation with Experience Replay (CEER)}, improving sample efficiency by a large margin, especially when the memory density is high. Even when the memory density is low, such a conservative estimate can still help to avoid suicidal actions and thereby improve performance.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Hongming Zhang;Chenjun Xiao;Han Wang;Jun Jin;bo xu;Martin M\u00fcller", "authorids": "~Hongming_Zhang3;~Chenjun_Xiao1;~Han_Wang8;~Jun_Jin1;~bo_xu1;~Martin_M\u00fcller2", "gender": "M;;;;M;M", "homepage": "https://github.com/initial-h;https://chenjun-x.github.io/;;;;https://webdocs.cs.ualberta.ca/~mmueller/", "dblp": ";178/8641;;78/8436.html;;https://dblp.org/pers/hd/m/M=uuml=ller_0003:Martin", "google_scholar": "https://scholar.google.ca/citations?user=mwbsY3AAAAAJ;;;a6grwUcAAAAJ;;J60BcHkAAAAJ", "orcid": ";0000-0002-5493-1500;;0000-0003-4413-8565;;0000-0002-5639-5318", "linkedin": ";;han-wang-b68a7a17b/;;;", "or_profile": "~Hongming_Zhang3;~Chenjun_Xiao1;~Han_Wang8;~Jun_Jin1;~bo_xu1;~Martin_Mueller1", "aff": "University of Alberta;Huawei Technologies Ltd.;University of Alberta;Huawei Technologies Ltd. Canada;;University of Alberta", "aff_domain": "ualberta.ca;huawei.com;ualberta.ca;huawei.com;;ualberta.ca", "position": "PhD student;Researcher;PhD student;Researcher;;Full Professor", "bibtex": "@inproceedings{\nzhang2023replay,\ntitle={Replay Memory as An Empirical {MDP}: Combining Conservative Estimation with Experience Replay},\nauthor={Hongming Zhang and Chenjun Xiao and Han Wang and Jun Jin and bo xu and Martin M{\\\"u}ller},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=SjzFVSJUt8S}\n}", "github": "", "project": "", "reviewers": "MxYs;PqTv;oYzW;nSvQ", "pdf_size": 1420959, "recommendation": "6;6;8;8", "confidence": "4;4;4;5", "correctness": "3;4;3;3", "technical_novelty": "3;3;3;2", "empirical_novelty": "3;3;3;2", "wc_summary_paper": "139;115;4;135", "wc_strength_and_weaknesses": "832;125;460;340", "wc_clarity_quality_novelty_and_reproducibility": "136;82;4;28", "wc_summary_review": "49;55;4;44", "wc_review": "1156;377;472;547", "wc_reply_reviewers": "266;0;142;131", "wc_reply_authors": "2306;290;1267;1445", "reply_reviewers": "2;0;1;2", "reply_authors": "5;1;3;4", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 98.25, 55.16962479480896 ], "wc_strength_and_weaknesses_avg": [ 439.25, 256.5573766236317 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.5, 50.977936403899285 ], "wc_summary_review_avg": [ 38.0, 20.0124960961895 ], "wc_review_avg": [ 638.0, 305.07458104535687 ], "wc_reply_reviewers_avg": [ 134.75, 94.14184776176852 ], "wc_reply_authors_avg": [ 1327.0, 716.1239417866155 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 1.479019945774904 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5819841335685125012&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=SjzFVSJUt8S", "email": "ualberta.ca;huawei.com;ualberta.ca;huawei.com;;ualberta.ca", "author_num": 6, "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "University of Alberta;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.ualberta.ca;https://www.huawei.com", "aff_unique_abbr": "UAlberta;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "Canada;China" }, { "id": "SlzEll3EsKv", "title": "Analyzing the Latent Space of GAN through Local Dimension Estimation", "track": "main", "status": "Reject", "tldr": "We analyze the latent space of GAN through local dimension estimation and propose a global disentanglement metric called Distortion.", "abstract": "The impressive success of style-based GANs (StyleGANs) in high-fidelity image synthesis has motivated research to understand the semantic properties of their latent spaces. Recently, a close relationship was observed between the semantically disentangled local perturbations and the local PCA components in the learned latent space $\\mathcal{W}$. However, understanding the number of disentangled perturbations remains challenging. Building upon this observation, we propose a local dimension estimation algorithm for an arbitrary intermediate layer in a pre-trained GAN model. The estimated intrinsic dimension corresponds to the number of disentangled local perturbations. In this perspective, we analyze the intermediate layers of the mapping network in StyleGANs. Our analysis clarifies the success of $\\mathcal{W}$-space in StyleGAN and suggests a method for finding an alternative. Moreover, the intrinsic dimension estimation opens the possibility of unsupervised evaluation of global-basis-compatibility and disentanglement for a latent space. Our proposed metric, called Distortion, measures an inconsistency of intrinsic tangent space on the learned latent space. The metric is purely geometric and does not require any additional attribute information. Nevertheless, the metric shows a high correlation with the global-basis-compatibility and supervised disentanglement score. Our findings pave the way towards an unsupervised selection of globally disentangled latent space among the intermediate latent spaces in a GAN.", "keywords": "generative adversarial network;disentanglement;semantic factorization;dimension estimation;grassmannian", "primary_area": "", "supplementary_material": "/attachment/33a5581dd90c5bc4df8a3cbdbd6871a84ffdbb6c.zip", "author": "Jaewoong Choi;Geonho Hwang;Hyunsoo Cho;Myungjoo Kang", "authorids": "~Jaewoong_Choi1;~Geonho_Hwang1;~Hyunsoo_Cho2;~Myungjoo_Kang1", "gender": "M;;M;", "homepage": ";;;http://ncia.snu.ac.kr/", "dblp": "63/11483;;;64/5657.html", "google_scholar": "e4ZLjREAAAAJ;UJ_Mw6YAAAAJ;;", "orcid": ";;0000-0003-1172-2458;", "linkedin": ";;hyunsoo-cho-82569478;", "or_profile": "~Jaewoong_Choi1;~Geonho_Hwang1;~Hyunsoo_Cho2;~Myungjoo_Kang1", "aff": "Korea Institute for Advanced Study;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "kias.re.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "Postdoc;PhD student;PhD student;Full Professor", "bibtex": "@misc{\nchoi2023analyzing,\ntitle={Analyzing the Latent Space of {GAN} through Local Dimension Estimation},\nauthor={Jaewoong Choi and Geonho Hwang and Hyunsoo Cho and Myungjoo Kang},\nyear={2023},\nurl={https://openreview.net/forum?id=SlzEll3EsKv}\n}", "github": "", "project": "", "reviewers": "1W36;STT2;dnDf;RU6v", "site": "https://openreview.net/forum?id=SlzEll3EsKv", "pdf_size": 49183624, "recommendation": "5;5;6;6", "confidence": "2;3;4;4", "correctness": "2;4;3;4", "technical_novelty": "3;4;3;2", "empirical_novelty": "0;3;3;2", "wc_summary_paper": "55;44;227;58", "wc_strength_and_weaknesses": "275;224;206;40", "wc_clarity_quality_novelty_and_reproducibility": "48;73;84;25", "wc_summary_review": "36;160;50;46", "wc_review": "414;501;567;169", "wc_reply_reviewers": "108;0;0;0", "wc_reply_authors": "1083;934;828;281", "reply_reviewers": "1;0;0;0", "reply_authors": "2;2;2;2", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 96.0, 75.81226813649621 ], "wc_strength_and_weaknesses_avg": [ 186.25, 88.14866703473173 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 57.5, 22.85278976405288 ], "wc_summary_review_avg": [ 73.0, 50.48762224545735 ], "wc_review_avg": [ 412.75, 150.82833785466178 ], "wc_reply_reviewers_avg": [ 27.0, 46.76537180435969 ], "wc_reply_authors_avg": [ 781.5, 302.82874698416595 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.9045340337332909, "corr_recommendation_correctness": 0.30151134457776363, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4763769668252366347&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Korea Institute for Advanced Study;Seoul National University", "aff_unique_dep": ";", "aff_unique_url": "http://www.kaist.edu;https://www.snu.ac.kr", "aff_unique_abbr": "KIAS;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "Sme6eesZqW", "title": "PADDLES: Phase-Amplitude Spectrum Disentangled Early Stopping for Learning with Noisy Labels", "track": "main", "status": "Withdraw", "tldr": "We propose a new early stopping training method for learning with noisy labels by choosing different stopping points for the Phase and Amplitude spectrum in the frequency domain. ", "abstract": "Deep Neural Networks (DNNs) have demonstrated superiority in learning various patterns. However, DNNs are sensitive to label noises and would easily overfit noisy labels during training. The early stopping strategy averts updating DNNs during the early training phase and is widely employed as an effective method when learning with noisy labels. Motivated by biological findings that the amplitude spectrum (AS) and phase spectrum (PS) in the frequency domain play different roles in the animal's vision system, we observe that PS, which captures more semantic information, is more resistant to label noise than AS. Performing the early stopping on AS and PS at the same time is therefore undesirable. In contrast, we propose early stops at different times for AS and PS. In order to achieve this, we disentangle the features of some layer(s) into AS and PS using Discrete Fourier Transform (DFT) during training. The AS and PS will be detached at different training stages from the gradient computational graph. The features are then restored via inverse DFT (iDFT) for the next layer. We term the proposed method Phase-AmplituDe DisentangLed Early Stopping (PADDLES). Simple yet effective, PADDLES outperforms other early stopping methods and obtains state-of-the-art performance on both synthetic and real-world label-noise datasets.", "keywords": "Learning with noisy labels;Frequency domain decomposition;Early Stopping Training", "primary_area": "", "supplementary_material": "", "author": "Huaxi Huang;Hui Kang;Sheng Liu;Olivier Salvado;Thierry Rakotoarivelo;Dadong Wang;Tongliang Liu", "authorids": "~Huaxi_Huang1;~Hui_Kang1;~Sheng_Liu2;~Olivier_Salvado1;~Thierry_Rakotoarivelo1;~Dadong_Wang1;~Tongliang_Liu1", "gender": "M;M;;M;M;;M", "homepage": ";https://randydl.github.io;https://shengliu66.github.io/;https://www.qut.edu.au/about/our-people/academic-profiles/olivier.salvado;https://people.csiro.au/R/T/Thierry-Rakotoarivelo;;https://tongliang-liu.github.io/", "dblp": "184/0802.html;;;;;;150/6667", "google_scholar": "F4icUy8AAAAJ;;rzhzR-cAAAAJ;https://scholar.google.com.au/citations?user=DaLwuqYAAAAJ;;;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;oliviersalvado/?originalSubdomain=au;;;", "or_profile": "~Huaxi_Huang1;~Hui_Kang1;~Sheng_Liu2;~Olivier_Salvado1;~Thierry_Rakotoarivelo1;~Dadong_Wang1;~Tongliang_Liu1", "aff": "CSIRO;University of Sydney;New York University;CSIRO;CSIRO;;University of Sydney", "aff_domain": "csiro.au;usyd.edu.au;nyu.edu;csiro.au;csiro.au;;sydney.edu.au", "position": "Postdoc;MS student;PhD student;Full Professor;Researcher;;Lecturer", "bibtex": "@misc{\nhuang2023paddles,\ntitle={{PADDLES}: Phase-Amplitude Spectrum Disentangled Early Stopping for Learning with Noisy Labels},\nauthor={Huaxi Huang and Hui Kang and Sheng Liu and Olivier Salvado and Thierry Rakotoarivelo and Dadong Wang and Tongliang Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=Sme6eesZqW}\n}", "github": "", "project": "", "reviewers": "XzdG;6ms8;AZGT", "site": "https://openreview.net/forum?id=Sme6eesZqW", "pdf_size": 471648, "recommendation": "3;5;5", "confidence": "4;3;4", "correctness": "2;3;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;2", "wc_summary_paper": "60;102;65", "wc_strength_and_weaknesses": "446;221;72", "wc_clarity_quality_novelty_and_reproducibility": "116;254;45", "wc_summary_review": "65;24;79", "wc_review": "687;601;261", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 75.66666666666667, 18.732028424302822 ], "wc_strength_and_weaknesses_avg": [ 246.33333333333334, 153.7320901944534 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 138.33333333333334, 86.77301167733867 ], "wc_summary_review_avg": [ 56.0, 23.338094752285727 ], "wc_review_avg": [ 516.3333333333334, 183.92993835214054 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.49999999999999983, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5796768913654447271&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;2;0;0;1", "aff_unique_norm": "Commonwealth Scientific and Industrial Research Organisation;University of Sydney;New York University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.csiro.au;https://www.sydney.edu.au;https://www.nyu.edu", "aff_unique_abbr": "CSIRO;USYD;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "Australia;United States" }, { "id": "SmjW4kKLjuU", "title": "ON COMPLEX-DOMAIN CNN REPRESENTATIONS FOR CLASSIFYING REAL/COMPLEX-VALUED DATA", "track": "main", "status": "Withdraw", "tldr": "We address the contradictory answers present in the literature for the following question: CV-CNN performs better or worse than RV-CNN for classification task?", "abstract": "This paper is about complex-valued CNNs (CV-CNNs) for computer vision that use representations that are complex-valued instead of real-valued. We divide input data into three categories: inherently real-valued, inherently complex-valued, and complex-valued obtained by transforming real-valued. We study the question whether complex-valued representation of CV-CNNs offers any advantages over the commonly used real-valued CNNs (RV-CNNs). For concreteness, we focus on the classification task. The existing literature offers contradictory answers to our question. We find that this is mainly because (a) they seldom employ a common performance measure (e.g., CV-CNN compared against RV-CNN with similar network structure vs similar number of parameters) (b) diversity of evaluation datasets used are limited (e.g., datasets in which magnitude information is more, less or as important as phase information) (c) less effort has been devoted to reduce the randomness in training between CV-CNN and RV-CNN. Towards this, we propose performance measures based on similar network structure, number of parameters and number of MAC operations. Also, we consider diverse datasets with varying magnitude/phase information, and deal with the randomness in training. As a result, we expect that any observed performance differences will be independent of the above disparities, and arise from the use of real vs complex representations. Theoretically, we show that, unlike RV-CNNs, CV-CNNs can preserve magnitude and phase through intermediate stages of processing. Our main experimental findings are the following. (1) As network depth decreases --- the performance of CV-CNNs improves with respect to similar network structure; the performances of CV-CNN and RV-CNN having a similar number of parameters become more comparable; and the performance of RV-CNNs improves with respect to similar number of MAC operations; (2) The above performance differences diminish as the network depth increases. (3) With respect to data diversity, performance depends on whether the dataset has dominant magnitude or phase, i.e., whether reconstruction error is lower using only magnitude or only phase. If a complex-valued data has dominant magnitude, instead of providing real and imaginary parts as input, providing the magnitude part produces significant performance gain, whereas if the data has dominant phase, providing both real and imaginary parts is important. This holds true for different network depths.", "keywords": "Complex-valued neural networks;Complex-valued representations;Complex-value CNN;Classification;Complex numbers", "primary_area": "", "supplementary_material": "", "author": "Mahesh Mohan M R;Kartik Srivastava;Narendra Ahuja", "authorids": "~Mahesh_Mohan_M_R1;~Kartik_Srivastava1;~Narendra_Ahuja1", "gender": "M;M;", "homepage": "https://maheshmohanmr.github.io/;;http://vision.ai.illinois.edu/ahuja.html", "dblp": "211/7261.html;;", "google_scholar": "gNshB_kAAAAJ;Anjh17EAAAAJ;dY7OSl0AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Mahesh_Mohan_M_R1;~Kartik_Srivastava1;~Narendra_Ahuja1", "aff": "Department of Computer Science;;University of Illinois, Urbana Champaign", "aff_domain": "cs.illinois.edu;;illinois.edu", "position": "Postdoc;;Research Professor", "bibtex": "@misc{\nr2023on,\ntitle={{ON} {COMPLEX}-{DOMAIN} {CNN} {REPRESENTATIONS} {FOR} {CLASSIFYING} {REAL}/{COMPLEX}-{VALUED} {DATA}},\nauthor={Mahesh Mohan M R and Kartik Srivastava and Narendra Ahuja},\nyear={2023},\nurl={https://openreview.net/forum?id=SmjW4kKLjuU}\n}", "github": "", "project": "", "reviewers": "W3j3;ag6a;46si", "site": "https://openreview.net/forum?id=SmjW4kKLjuU", "pdf_size": 1869594, "recommendation": "1;3;6", "confidence": "4;4;3", "correctness": "1;2;3", "technical_novelty": "1;3;2", "empirical_novelty": "2;2;0", "wc_summary_paper": "109;132;72", "wc_strength_and_weaknesses": "502;611;102", "wc_clarity_quality_novelty_and_reproducibility": "119;373;81", "wc_summary_review": "135;72;26", "wc_review": "865;1188;281", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.3333333333333335, 2.0548046676563256 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 2.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 1.3333333333333333, 0.9428090415820634 ], "wc_summary_paper_avg": [ 104.33333333333333, 24.716166549222166 ], "wc_strength_and_weaknesses_avg": [ 405.0, 218.82565358446132 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 191.0, 129.62510044997717 ], "wc_summary_review_avg": [ 77.66666666666667, 44.67910274638717 ], "wc_review_avg": [ 778.0, 375.35671922408244 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.9176629354822468, "corr_recommendation_correctness": 0.9933992677987827, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:sRJalEEWQ-0J:scholar.google.com/&scioq=ON+COMPLEX-DOMAIN+CNN+REPRESENTATIONS+FOR+CLASSIFYING+REAL/COMPLEX-VALUED+DATA&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Unknown Institution;University of Illinois Urbana-Champaign", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": ";https://illinois.edu", "aff_unique_abbr": ";UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "1", "aff_country_unique": ";United States" }, { "title": "Policy-Based Self-Competition for Planning Problems", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11255", "id": "SmufNDN90G", "poster": "/media/PosterPDFs/ICLR%202023/11255.png?t=1681729143.1533422", "openreview": "https://openreview.net/forum?id=SmufNDN90G", "slides": "https://iclr.cc/virtual/2023/poster/11255", "video": "https://iclr.cc/virtual/2023/poster/11255", "author_site": "Jonathan Pirnay, Quirin G\u00f6ttl, Jakob Burger, Dominik Grimm", "tldr": "Solving deterministic single-agent problems through self-competition by including a historical policy in the planning process of Gumbel AlphaZero.", "abstract": "AlphaZero-type algorithms may stop improving on single-player tasks in case the value network guiding the tree search is unable to approximate the outcome of an episode sufficiently well. One technique to address this problem is transforming the single-player task through self-competition. The main idea is to compute a scalar baseline from the agent\u2019s historical performances and to reshape an episode\u2019s reward into a binary output, indicating whether the baseline has been exceeded or not. However, this baseline only carries limited information for the agent about strategies how to improve. We leverage the idea of self-competition and directly incorporate a historical policy into the planning process instead of its scalar performance. Based on the recently introduced Gumbel AlphaZero (GAZ), we propose our algorithm GAZ \u2018Play-to-Plan\u2019 (GAZ PTP), in which the agent learns to find strong trajectories by planning against possible strategies of its past self. We show the effectiveness of our approach in two well-known combinatorial optimization problems, the Traveling Salesman Problem and the Job-Shop Scheduling Problem. With only half of the simulation budget for search, GAZ PTP consistently outperforms all selected single-player variants of GAZ.", "keywords": "reinforcement learning;alphazero;self-competition;self-critical;gumbel;mcts", "primary_area": "", "supplementary_material": "", "author": "Jonathan Pirnay;Quirin G\u00f6ttl;Jakob Burger;Dominik Gerhard Grimm", "authorids": "~Jonathan_Pirnay1;~Quirin_G\u00f6ttl1;~Jakob_Burger1;~Dominik_Gerhard_Grimm1", "gender": "M;M;;M", "homepage": "https://bit.cs.tum.de/team/jonathan-pirnay/;;https://ctv.cs.tum.de/;http://bit.cs.tum.de", "dblp": "291/4353;283/5443;270/0442;283/5927", "google_scholar": "uB6DNQQAAAAJ;https://scholar.google.de/citations?hl=de;https://scholar.google.de/citations?user=Om-cEEQAAAAJ;https://scholar.google.de/citations?user=Bb936LAAAAAJ", "orcid": ";;;0000-0003-2085-4591", "linkedin": ";;;", "or_profile": "~Jonathan_Pirnay1;~Quirin_G\u00f6ttl1;~Jakob_Burger1;~Dominik_Gerhard_Grimm1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;Technical University of Munich, Campus Straubing", "aff_domain": "tum.de;tum.de;tum.de;cs.tum.de", "position": "PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\npirnay2023policybased,\ntitle={Policy-Based Self-Competition for Planning Problems},\nauthor={Jonathan Pirnay and Quirin G{\\\"o}ttl and Jakob Burger and Dominik Gerhard Grimm},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=SmufNDN90G}\n}", "github": "", "project": "", "reviewers": "8mG6;T4pL;vDBJ", "pdf_size": 2822121, "recommendation": "6;8;8", "confidence": "4;4;5", "correctness": "3;4;4", "technical_novelty": "3;3;3", "empirical_novelty": "3;3;2", "wc_summary_paper": "133;57;64", "wc_strength_and_weaknesses": "169;69;182", "wc_clarity_quality_novelty_and_reproducibility": "64;880;28", "wc_summary_review": "54;114;51", "wc_review": "420;1120;325", "wc_reply_reviewers": "24;220;0", "wc_reply_authors": "1523;2477;667", "reply_reviewers": "1;1;0", "reply_authors": "3;5;1", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 84.66666666666667, 34.296096311711956 ], "wc_strength_and_weaknesses_avg": [ 140.0, 50.48432099837203 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 324.0, 393.42597778997765 ], "wc_summary_review_avg": [ 73.0, 29.017236257093817 ], "wc_review_avg": [ 621.6666666666666, 354.50278168474534 ], "wc_reply_reviewers_avg": [ 81.33333333333333, 98.54045982347668 ], "wc_reply_authors_avg": [ 1555.6666666666667, 739.2903504187121 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 1.632993161855452 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12339002582253519708&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=SmufNDN90G", "email": "tum.de;tum.de;tum.de;cs.tum.de", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Technical University of Munich", "aff_unique_dep": ";", "aff_unique_url": "https://www.tum.de;https://www.tum.de", "aff_unique_abbr": "TUM;TUM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Straubing", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "SnBDX5k-KuJ", "title": "Solving Continual Learning via Problem Decomposition", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper is concerned with class incremental learning (CIL) in continual learning (CL). CIL is the popular continual learning paradigm in which a system receives a sequence of tasks with different classes in each task and is expected to learn to predict the class of each test instance without given any task related information for the instance. Although many techniques have been proposed to solve CIL, it remains to be highly challenging due to the difficulty of dealing with catastrophic forgetting (CF). This paper starts from the first principle and proposes a novel method to solve the problem. The definition of CIL reveals that the problem can be decomposed into two probabilities: within-task prediction probability and task-id prediction probability. This paper proposes an effective technique to estimate these two probabilities based on the estimation of feature distributions in the latent space using incremental PCA and Mahalanobis distance. The proposed method does not require a memory buffer to save replay data and it outperforms strong baselines including replay-based methods.", "keywords": "Continual learning;lifelong learning", "primary_area": "", "supplementary_material": "/attachment/b979b7f42e5536cbc2d9cba23f14a162d63a625c.zip", "author": "Gyuhak Kim;Changnan Xiao;Tatsuya Konishi;Zixuan Ke;Bing Liu", "authorids": "~Gyuhak_Kim1;~Changnan_Xiao1;~Tatsuya_Konishi2;~Zixuan_Ke1;~Bing_Liu1", "gender": ";M;M;M;M", "homepage": "https://k-gyuhak.github.io/;https://github.com/ChangnXX;https://vincent950129.github.io/;https://www.cs.uic.edu/~liub/;", "dblp": "317/0166;;196/3817;l/BingLiu1.html;185/3974.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;SZ4sFNEAAAAJ;Kt1bjZoAAAAJ;tx15SxoAAAAJ", "orcid": ";;;;0000-0002-2255-0156", "linkedin": ";;;;ukaznil/", "or_profile": "~Gyuhak_Kim1;~Changnan_Xiao1;~Zixuan_Ke1;~Bing_Liu1;~Tatsuya_KONISHI1", "aff": "University of Illinois, Chicago;Bytedance;University of Illinois, Chicago;University of Illinois at Chicago;KDDI Research, Inc.", "aff_domain": "uic.edu;bytedance.com;uic.edu;uic.edu;kddi-research.jp", "position": "PhD student;Researcher;PhD student;Full Professor;Researcher", "bibtex": "@misc{\nkim2023solving,\ntitle={Solving Continual Learning via Problem Decomposition},\nauthor={Gyuhak Kim and Changnan Xiao and Tatsuya Konishi and Zixuan Ke and Bing Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=SnBDX5k-KuJ}\n}", "github": "", "project": "", "reviewers": "vcxy;D1ii;JguF;cHhV", "site": "https://openreview.net/forum?id=SnBDX5k-KuJ", "pdf_size": 567718, "recommendation": "3;5;6;8", "confidence": "2;4;4;3", "correctness": "1;3;3;3", "technical_novelty": "3;2;3;2", "empirical_novelty": "3;2;3;2", "wc_summary_paper": "107;48;70;102", "wc_strength_and_weaknesses": "140;110;267;65", "wc_clarity_quality_novelty_and_reproducibility": "83;42;53;42", "wc_summary_review": "14;28;41;18", "wc_review": "344;228;431;227", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1179;1263;429;614", "reply_reviewers": "0;0;0;0", "reply_authors": "4;4;1;1", "recommendation_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 81.75, 24.107830678018296 ], "wc_strength_and_weaknesses_avg": [ 145.5, 75.05497984810869 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.0, 16.777961735562517 ], "wc_summary_review_avg": [ 25.25, 10.425329730996522 ], "wc_review_avg": [ 307.5, 85.71026776296992 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 871.25, 357.05067917593993 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.4181210050035454, "corr_recommendation_correctness": 0.8006407690254357, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RbXxL_b7MLYJ:scholar.google.com/&scioq=Solving+Continual+Learning+via+Problem+Decomposition&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "University of Illinois at Chicago;ByteDance;KDDI Research", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uic.edu;https://www.bytedance.com;https://www.kddi-research.com", "aff_unique_abbr": "UIC;Bytedance;KDDI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Chicago;", "aff_country_unique_index": "0;1;0;0;2", "aff_country_unique": "United States;China;Japan" }, { "id": "SoAnNZ7Z3xw", "title": "Locally Invariant Explanations: Towards Stable and Unidirectional Explanations through Local Invariant Learning", "track": "main", "status": "Reject", "tldr": "A local explanation method that is stable and unidirectional", "abstract": "Locally interpretable model agnostic explanations (LIME) method is one of the most popular methods used to explain black-box models at a per example level. Although many variants have been proposed, few provide a simple way to produce high fidelity explanations that are also stable and intuitive. In this work, we provide a novel perspective by proposing a model agnostic local explanation method inspired by the invariant risk minimization (IRM) principle -- originally proposed for (global) out-of-distribution generalization -- to provide such high fidelity explanations that are also stable and unidirectional across nearby examples. Our method is based on a game theoretic formulation where we theoretically show that our approach has a strong tendency to eliminate features where the gradient of the black-box function abruptly changes sign in the locality of the example we want to explain, while in other cases it is more careful and will choose a more conservative (feature) attribution, a behavior which can be highly desirable for recourse. Empirically, we show on tabular, image and text data that the quality of our explanations with neighborhoods formed using random perturbations are much better than LIME and in some cases even comparable to other methods that use realistic neighbors sampled from the data manifold. This is desirable given that learning a manifold to either create realistic neighbors or to project explanations is typically expensive or may even be impossible. Moreover, our algorithm is simple and efficient to train, and can ascertain stable input features for local decisions of a black-box without access to side information such as a (partial) causal graph as has been seen in some recent works.", "keywords": "explainable AI", "primary_area": "", "supplementary_material": "/attachment/02fa7f9e2450030bb884c7937cafb055c08adfbc.zip", "author": "Amit Dhurandhar;Karthikeyan Natesan Ramamurthy;Kartik Ahuja;Vijay Arya", "authorids": "~Amit_Dhurandhar1;~Karthikeyan_Natesan_Ramamurthy1;~Kartik_Ahuja1;~Vijay_Arya1", "gender": "M;;;M", "homepage": "https://researcher.watson.ibm.com/researcher/view.php?person=us-adhuran;https://nrkarthikeyan.github.io/;;", "dblp": "66/3289;58/7800;;77/1485", "google_scholar": "km9vIPEAAAAJ;mG8HuhEAAAAJ;;", "orcid": ";0000-0002-6021-5930;;", "linkedin": ";;;", "or_profile": "~Amit_Dhurandhar1;~Karthikeyan_Natesan_Ramamurthy1;~Kartik_Ahuja1;~Vijay_Arya1", "aff": "International Business Machines;International Business Machines;;IBM Research", "aff_domain": "ibm.com;ibm.com;;ibm.com", "position": "Principal Researcher;Research Staff Member;;Researcher", "bibtex": "@misc{\ndhurandhar2023locally,\ntitle={Locally Invariant Explanations: Towards Stable and Unidirectional Explanations through Local Invariant Learning},\nauthor={Amit Dhurandhar and Karthikeyan Natesan Ramamurthy and Kartik Ahuja and Vijay Arya},\nyear={2023},\nurl={https://openreview.net/forum?id=SoAnNZ7Z3xw}\n}", "github": "", "project": "", "reviewers": "Tzhp;8tif;uJGF;xs2D", "site": "https://openreview.net/forum?id=SoAnNZ7Z3xw", "pdf_size": 6748102, "recommendation": "5;5;6;6", "confidence": "4;2;4;3", "correctness": "3;3;3;2", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "158;78;109;100", "wc_strength_and_weaknesses": "379;596;59;264", "wc_clarity_quality_novelty_and_reproducibility": "32;172;262;43", "wc_summary_review": "48;36;42;82", "wc_review": "617;882;472;489", "wc_reply_reviewers": "0;176;0;187", "wc_reply_authors": "784;1052;357;234", "reply_reviewers": "0;1;0;1", "reply_authors": "3;4;3;3", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 111.25, 29.252136674096132 ], "wc_strength_and_weaknesses_avg": [ 324.5, 194.18612205819446 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 127.25, 95.30313478579811 ], "wc_summary_review_avg": [ 52.0, 17.832554500127006 ], "wc_review_avg": [ 615.0, 164.0259125870056 ], "wc_reply_reviewers_avg": [ 90.75, 90.83329510702559 ], "wc_reply_authors_avg": [ 606.75, 328.2463823106052 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.30151134457776363, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5800432145814744598&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1", "aff_unique_norm": "International Business Machines Corporation;IBM", "aff_unique_dep": ";IBM Research", "aff_unique_url": "https://www.ibm.com;https://www.ibm.com/research", "aff_unique_abbr": "IBM;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Logical Message Passing Networks with One-hop Inference on Atomic Formulas", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11066", "id": "SoyOsp7i_l", "poster": "/media/PosterPDFs/ICLR%202023/11066.png?t=1682574942.5056424", "openreview": "https://openreview.net/forum?id=SoyOsp7i_l", "slides": "https://iclr.cc/virtual/2023/poster/11066", "video": "https://iclr.cc/virtual/2023/poster/11066", "author_site": "Zihao Wang, Yangqiu Song, Ginny Wong, Simon See", "tldr": "", "abstract": "Complex Query Answering (CQA) over Knowledge Graphs (KGs) has attracted a lot of attention to potentially support many applications. Given that KGs are usually incomplete, neural models are proposed to answer the logical queries by parameterizing set operators with complex neural networks. However, such methods usually train neural set operators with a large number of entity and relation embeddings from the zero, where whether and how the embeddings or the neural set operators contribute to the performance remains not clear. In this paper, we propose a simple framework for complex query answering that decomposes the KG embeddings from neural set operators. We propose to represent the complex queries into the query graph. On top of the query graph, we propose the Logical Message Passing Neural Network (LMPNN) that connects the local one-hop inferences on atomic formulas to the global logical reasoning for complex query answering. We leverage existing effective KG embeddings to conduct one-hop inferences on atomic formulas, the results of which are regarded as the messages passed in LMPNN. The reasoning process over the overall logical formulas is turned into the forward pass of LMPNN that incrementally aggregates local information to finally predict the answers' embeddings. The complex logical inference across different types of queries will then be learned from training examples based on the LMPNN architecture. Theoretically, our query-graph represenation is more general than the prevailing operator-tree formulation, so our approach applies to a broader range of complex KG queries. Empirically, our approach yields the new state-of-the-art neural CQA model. Our research bridges the gap between complex KG query answering tasks and the long-standing achievements of knowledge graph representation learning. Our implementation can be found at https://github.com/HKUST-KnowComp/LMPNN.", "keywords": "knowledge graph;complex query answering;graph neural network;representation learning", "primary_area": "", "supplementary_material": "", "author": "Zihao Wang;Yangqiu Song;Ginny Wong;Simon See", "authorids": "~Zihao_Wang11;~Yangqiu_Song1;~Ginny_Wong1;~Simon_See1", "gender": ";M;F;M", "homepage": "https://zihao-wang.github.io;https://www.cse.ust.hk/~yqsong/;;", "dblp": "148/9655-1;86/2159;78/8568;62/6547", "google_scholar": "T28rR00AAAAJ;MdQZ-q8AAAAJ;;ebIHTEoAAAAJ", "orcid": "0000-0002-3919-0396;0000-0002-7818-6090;0000-0001-7432-8496;0000-0002-4958-9237", "linkedin": "zihao-wang-6a0a3286/;yqsong/;ginny-wong-phd-3985bab5/;simonsee/", "or_profile": "~Zihao_Wang11;~Yangqiu_Song1;~Ginny_Wong1;~Simon_See1", "aff": "University of Illinois Urbana-Champaign;Hong Kong University of Science and Technology;NVIDIA;NVIDIA", "aff_domain": "illinois.edu;ust.hk;nvidia.com;nvidia.com", "position": "Intern;Associate Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nwang2023logical,\ntitle={Logical Message Passing Networks with One-hop Inference on Atomic Formulas},\nauthor={Zihao Wang and Yangqiu Song and Ginny Wong and Simon See},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=SoyOsp7i_l}\n}", "github": "", "project": "", "reviewers": "5THt;zhhh;NhRN", "pdf_size": 470352, "recommendation": "6;6;6", "confidence": "4;5;3", "correctness": "4;3;3", "technical_novelty": "3;4;3", "empirical_novelty": "2;3;3", "wc_summary_paper": "60;62;97", "wc_strength_and_weaknesses": "240;171;274", "wc_clarity_quality_novelty_and_reproducibility": "93;96;17", "wc_summary_review": "5;40;49", "wc_review": "398;369;437", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "748;386;360", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "correctness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 73.0, 16.990193249832878 ], "wc_strength_and_weaknesses_avg": [ 228.33333333333334, 42.851163604063565 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 68.66666666666667, 36.55437350334734 ], "wc_summary_review_avg": [ 31.333333333333332, 18.979521127315678 ], "wc_review_avg": [ 401.3333333333333, 27.860764925289153 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 498.0, 177.09507804190005 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=398899629150369908&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=SoyOsp7i_l", "email": "illinois.edu;ust.hk;nvidia.com;nvidia.com", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;Hong Kong University of Science and Technology;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://illinois.edu;https://www.ust.hk;https://www.nvidia.com", "aff_unique_abbr": "UIUC;HKUST;NVIDIA", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Urbana-Champaign;Hong Kong SAR;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Relative representations enable zero-shot latent space communication", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11612", "id": "SrC-nwieGJ", "poster": "/media/PosterPDFs/ICLR%202023/11612.png?t=1683029886.3689482", "openreview": "https://openreview.net/forum?id=SrC-nwieGJ", "slides": "https://iclr.cc/virtual/2023/poster/11612", "video": "https://iclr.cc/virtual/2023/poster/11612", "author_site": "Luca Moschella, Valentino Maiorca, Marco Fumero, Antonio Norelli, Francesco Locatello, Emanuele Rodol\u00e0", "tldr": "Relative representations can be leveraged to enable solving tasks regarding \"latent communication\": from zero-shot model stitching to latent space comparison between diverse settings.", "abstract": "Neural networks embed the geometric structure of a data manifold lying in a high-dimensional space into latent representations. Ideally, the distribution of the data points in the latent space should depend only on the task, the data, the loss, and other architecture-specific constraints. However, factors such as the random weights initialization, training hyperparameters, or other sources of randomness in the training phase may induce incoherent latent spaces that hinder any form of reuse. Nevertheless, we empirically observe that, under the same data and modeling choices, the angles between the encodings within distinct latent spaces do not change. In this work, we propose the latent similarity between each sample and a fixed set of anchors as an alternative data representation, demonstrating that it can enforce the desired invariances without any additional training. We show how neural architectures can leverage these relative representations to guarantee, in practice, invariance to latent isometries and rescalings, effectively enabling latent space communication: from zero-shot model stitching to latent space comparison between diverse settings. We extensively validate the generalization capability of our approach on different datasets, spanning various modalities (images, text, graphs), tasks (e.g., classification, reconstruction) and architectures (e.g., CNNs, GCNs, transformers).", "keywords": "relative representation;zero-shot;stitching;invariance;latent communication;isometry;representation learning", "primary_area": "", "supplementary_material": "/attachment/d5b9db5fe044181bfa21d0f1d2406f69deb91e7f.zip", "author": "Luca Moschella;Valentino Maiorca;Marco Fumero;Antonio Norelli;Francesco Locatello;Emanuele Rodol\u00e0", "authorids": "~Luca_Moschella1;~Valentino_Maiorca1;~Marco_Fumero1;~Antonio_Norelli2;~Francesco_Locatello1;~Emanuele_Rodol\u00e01", "gender": "M;M;;M;M;M", "homepage": "https://luca.moschella.dev;https://gladia.di.uniroma1.it/authors/maiorca/;;https://phd.uniroma1.it/web/ANTONIO-NORELLI_nP1612487_EN.aspx;https://twitter.com/FrancescoLocat8;", "dblp": "205/3639;305/9789;273/9625;261/9526;195/6074;54/8401", "google_scholar": "4byA-nefJJMC;https://scholar.google.it/citations?user=2VUUfFEAAAAJ;VYEljYEAAAAJ;;;-EH4wBYAAAAJ", "orcid": "0000-0002-0550-7498;0000-0001-5795-3695;0000-0001-5614-5004;;;0000-0003-0091-7241", "linkedin": "lucamoschella/;valentino-maiorca;;;;", "or_profile": "~Luca_Moschella1;~Valentino_Maiorca1;~Marco_Fumero1;~Antonio_Norelli2;~Francesco_Locatello1;~Emanuele_Rodol\u00e01", "aff": "NVIDIA;University of Roma \"La Sapienza\";Sapienza University of Rome;Sapienza University of Rome;Amazon;Sapienza University of Rome", "aff_domain": "nvidia.com;uniroma1.it;uniroma1.it;uniroma1.it;amazon.com;uniroma1.it", "position": "Intern;PhD student;PhD student;PhD student;Senior Applied Scientist;Full Professor", "bibtex": "@inproceedings{\nmoschella2023relative,\ntitle={Relative representations enable zero-shot latent space communication},\nauthor={Luca Moschella and Valentino Maiorca and Marco Fumero and Antonio Norelli and Francesco Locatello and Emanuele Rodol{\\`a}},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=SrC-nwieGJ}\n}", "github": "", "project": "", "reviewers": "hU9r;LNFQ;LWWZ", "pdf_size": 11317474, "recommendation": "8;8;10", "confidence": "4;4;4", "correctness": "3;4;4", "technical_novelty": "3;3;4", "empirical_novelty": "3;3;4", "wc_summary_paper": "137;137;167", "wc_strength_and_weaknesses": "1017;164;135", "wc_clarity_quality_novelty_and_reproducibility": "223;4;59", "wc_summary_review": "91;31;58", "wc_review": "1468;336;419", "wc_reply_reviewers": "268;0;14", "wc_reply_authors": "1782;383;117", "reply_reviewers": "1;0;1", "reply_authors": "3;2;1", "recommendation_avg": [ 8.666666666666666, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 147.0, 14.142135623730951 ], "wc_strength_and_weaknesses_avg": [ 438.6666666666667, 409.114762492004 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 95.33333333333333, 93.0244890099137 ], "wc_summary_review_avg": [ 60.0, 24.535688292770594 ], "wc_review_avg": [ 741.0, 515.1821684284761 ], "wc_reply_reviewers_avg": [ 94.0, 123.16926023430791 ], "wc_reply_authors_avg": [ 760.6666666666666, 730.3105884180754 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 101, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12616326664049243989&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=SrC-nwieGJ", "email": "nvidia.com;uniroma1.it;uniroma1.it;uniroma1.it;amazon.com;uniroma1.it", "author_num": 6, "aff_unique_index": "0;1;2;2;3;2", "aff_unique_norm": "NVIDIA;University of Rome La Sapienza;Sapienza University of Rome;Amazon", "aff_unique_dep": "NVIDIA Corporation;;;Amazon.com, Inc.", "aff_unique_url": "https://www.nvidia.com;https://www.uniroma1.it;https://www.uniroma1.it;https://www.amazon.com", "aff_unique_abbr": "NVIDIA;La Sapienza;Sapienza;Amazon", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Rome", "aff_country_unique_index": "0;1;1;1;0;1", "aff_country_unique": "United States;Italy" }, { "id": "Su04-8n0ia4", "title": "HAS IT REALLY IMPROVED? KNOWLEDGE GRAPH BASED SEPARATION AND FUSION FOR RECOMMENDATION", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this paper we study the knowledge graph (KG) based recommendation systems. We first design the metric to study the relationship between different SOTA models and find that the current recommendation systems based on knowledge graph have poor ability to retain collaborative filtering signals, and higher-order connectivity would introduce noises. In addition, we explore the collaborative filtering recommendation method using GNN and design the experiment to show that the information learned between GNN models stacked with different layers is different, which provides the explanation for the unstable performance of GNN stacking different layers from a new perspective. According to the above findings, we first design the model-agnostic Cross-Layer Fusion Mechanism without any parameters to improve the performance of GNN. Experimental results on three datasets for collaborative filtering show that Cross-Layer Fusion Mechanism is effective for improving GNN performance. Then we design three independent signal extractors to mine the data at three different perspectives and train them separately. Finally, we use the signal fusion mechanism to fuse different signals. Experimental results on three datasets that introduce KG show that our KGSF achieves significant improvements over current SOTA KG based recommendation methods and the results are interpretable.", "keywords": "recommendation;knowledge-graph;graph neural network", "primary_area": "", "supplementary_material": "", "author": "Ying Tang;Jintian Zhang", "authorids": "~Ying_Tang2;1401057351@qq.com", "gender": "F;", "homepage": "http://www.homepage.zjut.edu.cn/ty/;", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Ying_Tang2;1401057351@qq.com", "aff": "Zhejiang University of Technology;", "aff_domain": "zjut.edu.cn;", "position": "Full Professor;", "bibtex": "@misc{\ntang2023has,\ntitle={{HAS} {IT} {REALLY} {IMPROVED}? {KNOWLEDGE} {GRAPH} {BASED} {SEPARATION} {AND} {FUSION} {FOR} {RECOMMENDATION}},\nauthor={Ying Tang and Jintian Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=Su04-8n0ia4}\n}", "github": "", "project": "", "reviewers": "8G5y;wbd8;3bRV", "site": "https://openreview.net/forum?id=Su04-8n0ia4", "pdf_size": 1386491, "recommendation": "3;3;3", "confidence": "5;4;4", "correctness": "3;2;3", "technical_novelty": "2;2;1", "empirical_novelty": "2;2;3", "wc_summary_paper": "50;87;66", "wc_strength_and_weaknesses": "182;231;57", "wc_clarity_quality_novelty_and_reproducibility": "33;471;209", "wc_summary_review": "258;26;38", "wc_review": "523;815;370", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 67.66666666666667, 15.15109090315135 ], "wc_strength_and_weaknesses_avg": [ 156.66666666666666, 73.25905328960299 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 237.66666666666666, 179.95801979597599 ], "wc_summary_review_avg": [ 107.33333333333333, 106.64999869771319 ], "wc_review_avg": [ 569.3333333333334, 184.60107138246937 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JAohB4sLGSEJ:scholar.google.com/&scioq=HAS+IT+REALLY+IMPROVED%3F+KNOWLEDGE+GRAPH+BASED+SEPARATION+AND+FUSION+FOR+RECOMMENDATION&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Zhejiang University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.zjut.edu.cn", "aff_unique_abbr": "ZJUT", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "Su84ELBdm5U", "title": "How does overparametrization affect performance on minority groups?", "track": "main", "status": "Reject", "tldr": "", "abstract": "The benefits of overparameterization for the overall performance of modern machine learning (ML) models are well known. However, the effect of overparameterization at a more granular level of data subgroups is less understood. Recent empirical studies demonstrate encouraging results: (i) when groups are not known, overparameterized models trained with empirical risk minimization (ERM) perform better on minority groups; (ii) when groups are known, ERM on data subsampled to equalize group sizes yields state-of-the-art worst-group-accuracy in the overparameterized regime. In this paper, we complement these empirical studies with a theoretical investigation of the risk of overparameterized random feature models on minority groups. In a setting in which the regression functions for the majority and minority groups are different, we show that overparameterization always improves minority group performance.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/c08803e2070f8b70c2b85bb9c183ba1c694610f1.zip", "author": "Subha Maity;Saptarshi Roy;Songkai Xue;Mikhail Yurochkin;Yuekai Sun", "authorids": "~Subha_Maity1;~Saptarshi_Roy1;~Songkai_Xue1;~Mikhail_Yurochkin1;~Yuekai_Sun1", "gender": "M;M;;M;", "homepage": "https://lsa.umich.edu/stats/people/phd-students/smaity.html;https://sites.google.com/umich.edu/saptarshi-roys-home-page/home?authuser=1;http://www-personal.umich.edu/~sxue/;https://moonfolk.github.io/;https://yuekai.github.io/", "dblp": "278/2922;;260/6635;191/6719;", "google_scholar": "eD9vCGMAAAAJ;Ywix3OUAAAAJ;YZjCcnoAAAAJ;QjBF9sUAAAAJ;6T1XtW8AAAAJ", "orcid": ";0000-0003-4183-205X;;;", "linkedin": ";;;mikhail-yurochkin-a45659114/;", "or_profile": "~Subha_Maity1;~Saptarshi_Roy1;~Songkai_Xue1;~Mikhail_Yurochkin1;~Yuekai_Sun1", "aff": ";University of Michigan - Ann Arbor;University of Michigan;IBM Research;University of Michigan - Ann Arbor", "aff_domain": ";umich.edu;umich.edu;ibm.com;umich.edu", "position": ";PhD student;PhD student;Researcher;Assistant \u2192 Associate Professor of Statistics", "bibtex": "@misc{\nmaity2023how,\ntitle={How does overparametrization affect performance on minority groups?},\nauthor={Subha Maity and Saptarshi Roy and Songkai Xue and Mikhail Yurochkin and Yuekai Sun},\nyear={2023},\nurl={https://openreview.net/forum?id=Su84ELBdm5U}\n}", "github": "", "project": "", "reviewers": "BTHL;1vd9;Fo6C;2EGb;y1cp", "site": "https://openreview.net/forum?id=Su84ELBdm5U", "pdf_size": 584861, "recommendation": "3;3;3;5;5", "confidence": "4;2;4;1;3", "correctness": "4;2;3;3;3", "technical_novelty": "1;2;2;2;2", "empirical_novelty": "0;1;0;2;1", "wc_summary_paper": "83;45;73;55;44", "wc_strength_and_weaknesses": "219;92;172;71;111", "wc_clarity_quality_novelty_and_reproducibility": "95;8;30;15;23", "wc_summary_review": "31;2;31;2;20", "wc_review": "428;147;306;143;198", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "recommendation_avg": [ 3.8, 0.9797958971132712 ], "confidence_avg": [ 2.8, 1.16619037896906 ], "correctness_avg": [ 3.0, 0.6324555320336759 ], "technical_novelty_avg": [ 1.8, 0.4000000000000001 ], "empirical_novelty_avg": [ 0.8, 0.7483314773547883 ], "wc_summary_paper_avg": [ 60.0, 15.517731793016658 ], "wc_strength_and_weaknesses_avg": [ 133.0, 54.63698381133424 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 34.2, 31.28833648502266 ], "wc_summary_review_avg": [ 17.2, 13.044539087296263 ], "wc_review_avg": [ 244.4, 109.0185305349508 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.560112033611204, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11215136022529055853&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Michigan;IBM", "aff_unique_dep": ";IBM Research", "aff_unique_url": "https://www.umich.edu;https://www.ibm.com/research", "aff_unique_abbr": "UM;IBM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Su_HbZ0Sdz", "title": "Feasible Adversarial Robust Reinforcement Learning for Underspecified Environments", "track": "main", "status": "Reject", "tldr": "We propose a method to train a robust RL agent to feasible parameters even when the adversary has access to infeasible parameters.", "abstract": "Robust reinforcement learning (RL) considers the problem of learning policies that perform well in the worst case among a set of possible environment parameter values. In real-world environments, choosing the set of possible values for robust RL can be a difficult task. When that set is specified too narrowly, the agent will be left vulnerable to reasonable parameter values unaccounted for. When specified too broadly, the agent will be too cautious. In this paper, we propose Feasible Adversarial Robust RL (FARR), a novel problem formulation and objective for automatically determining the set of environment parameter values over which to be robust. FARR implicitly defines the set of feasible parameter values as those on which an agent could achieve a benchmark reward given enough training resources. By formulating this problem as a two-player zero-sum game, optimizing the FARR objective jointly produces an adversarial distribution over parameter values with feasible support and a policy robust over this feasible parameter set. We demonstrate that approximate Nash equilibria for this objective can be found using a variation of the PSRO algorithm. Furthermore, we show that an optimal agent trained with FARR is more robust to feasible adversarial parameter selection than with existing minimax, domain-randomization, and regret objectives in a parameterized gridworld and three MuJoCo control environments.", "keywords": "reinforcement learning;robust rl;sim-to-real;game-theory;psro", "primary_area": "", "supplementary_material": "", "author": "John Banister Lanier;Stephen Marcus McAleer;Pierre Baldi;Roy Fox", "authorids": "~John_Banister_Lanier1;~Stephen_Marcus_McAleer1;~Pierre_Baldi1;~Roy_Fox1", "gender": "M;M;;M", "homepage": "https://jblanier.net;https://www.andrew.cmu.edu/user/smcaleer/;;https://royf.org", "dblp": "242/9187;;;32/7007", "google_scholar": "6IkG2m0AAAAJ;iEFL4-YAAAAJ;;FH9nKOAAAAAJ", "orcid": ";;;0000-0002-5562-3315", "linkedin": ";stephen-mcaleer/;;", "or_profile": "~John_Banister_Lanier1;~Stephen_Marcus_McAleer1;~Pierre_Baldi1;~Roy_Fox1", "aff": "University of California, Irvine;Carnegie Mellon University;;University of California, Irvine", "aff_domain": "uci.edu;cmu.edu;;uci.edu", "position": "PhD student;Postdoc;;Assistant Professor", "bibtex": "@misc{\nlanier2023feasible,\ntitle={Feasible Adversarial Robust Reinforcement Learning for Underspecified Environments},\nauthor={John Banister Lanier and Stephen Marcus McAleer and Pierre Baldi and Roy Fox},\nyear={2023},\nurl={https://openreview.net/forum?id=Su_HbZ0Sdz}\n}", "github": "", "project": "", "reviewers": "yKnP;Rk3h;uRb1;9k7R", "site": "https://openreview.net/forum?id=Su_HbZ0Sdz", "pdf_size": 1453190, "recommendation": "3;3;5;8", "confidence": "5;5;4;4", "correctness": "3;2;3;4", "technical_novelty": "1;1;2;4", "empirical_novelty": "1;1;3;2", "wc_summary_paper": "75;65;133;143", "wc_strength_and_weaknesses": "258;434;485;223", "wc_clarity_quality_novelty_and_reproducibility": "8;26;70;38", "wc_summary_review": "12;178;24;25", "wc_review": "353;703;712;429", "wc_reply_reviewers": "80;90;152;29", "wc_reply_authors": "519;1030;987;373", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "recommendation_avg": [ 4.75, 2.0463381929681126 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.0, 1.224744871391589 ], "empirical_novelty_avg": [ 1.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 104.0, 34.36568055487916 ], "wc_strength_and_weaknesses_avg": [ 350.0, 111.66243773086812 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.5, 22.599778759979046 ], "wc_summary_review_avg": [ 59.75, 68.46303747278527 ], "wc_review_avg": [ 549.25, 160.54652752395486 ], "wc_reply_reviewers_avg": [ 87.75, 43.71713050967549 ], "wc_reply_authors_avg": [ 727.25, 286.3515103854003 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.8551861104941366, "corr_recommendation_correctness": 0.8638684255813602, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14012953356446073164&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Irvine;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uci.edu;https://www.cmu.edu", "aff_unique_abbr": "UCI;CMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Irvine;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "SvcawuEiUVM", "title": "Compositional Image Generation and Manipulation with Latent Diffusion Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "We propose a principled method for compositional image generation and manipulation using diffusion probabilistic models. In particular, for any pre-trained generative model with a semantic latent space, we train a latent diffusion model and auxiliary latent classifiers to help navigate latent representations in a non-linear fashion. We show that such conditional generation achieved by latent classifier guidance provably maximizes a lower bound of the conditional log-likelihood during training, and can reduce to a simple latent arithmetic method with additional assumption, which is surprisingly under-studied in the context of compositionality. We then derive a new guidance term which is shown to be crucial for maintaining the original semantics when doing manipulation. Unlike previous methods, our method is agnostic to pre-trained generative models and latent spaces, while still achieving competitive performance on compositional image generation as well as sequential manipulation of real and synthetic images.", "keywords": "Compositionality;Diffusion Models", "primary_area": "", "supplementary_material": "", "author": "Changhao Shi;Haomiao Ni;Kai Li;Shaobo Han;Mingfu Liang;Gal Mishne;Martin Renqiang Min", "authorids": "~Changhao_Shi1;~Haomiao_Ni1;~Kai_Li11;~Shaobo_Han1;~Mingfu_Liang1;~Gal_Mishne1;~Martin_Renqiang_Min1", "gender": ";M;;M;M;F;M", "homepage": ";https://sites.google.com/view/haomiaoni/home;;https://shaobohan.net/;https://mingfuliang.com/;http://mishne.ucsd.edu/;http://www.cs.toronto.edu/~cuty", "dblp": ";196/1975;https://dblp.uni-trier.de/pers/hd/l/Li_0012:Kai;;241/9790;125/3214;29/7048", "google_scholar": ";3PzBXAsAAAAJ;YsROc4UAAAAJ;3L333oYAAAAJ;_uUUvt4AAAAJ;KrwpdXYAAAAJ;T2M4JjEAAAAJ", "orcid": ";;;;0000-0001-6779-2418;0000-0002-5287-3626;0000-0002-8563-6133", "linkedin": "changhao-s-68235a177/;;;;;;martin-renqiang-min-955a8766", "or_profile": "~Changhao_Shi1;~Haomiao_Ni1;~Kai_Li11;~Shaobo_Han1;~Mingfu_Liang1;~Gal_Mishne1;~Martin_Renqiang_Min1", "aff": "University of California, San Diego;Pennsylvania State University;NEC-Labs;NEC Labs America;Northwestern University;University of California, San Diego;NEC Laboratories America", "aff_domain": "ucsd.edu;psu.edu;nec-labs.com;nec-labs.com;northwestern.edu;ucsd.edu;nec-labs.com", "position": "PhD student;PhD student;NEC Labs, America;Researcher;PhD student;Assistant Professor;Researcher", "bibtex": "@misc{\nshi2023compositional,\ntitle={Compositional Image Generation and Manipulation with Latent Diffusion Models},\nauthor={Changhao Shi and Haomiao Ni and Kai Li and Shaobo Han and Mingfu Liang and Gal Mishne and Martin Renqiang Min},\nyear={2023},\nurl={https://openreview.net/forum?id=SvcawuEiUVM}\n}", "github": "", "project": "", "reviewers": "Pegm;uW6K;yrKx;SSFi", "site": "https://openreview.net/forum?id=SvcawuEiUVM", "pdf_size": 27423590, "recommendation": "3;3;3;5", "confidence": "4;4;4;4", "correctness": "2;3;2;2", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;1;3", "wc_summary_paper": "73;41;45;68", "wc_strength_and_weaknesses": "429;171;342;289", "wc_clarity_quality_novelty_and_reproducibility": "38;169;18;57", "wc_summary_review": "18;27;41;34", "wc_review": "558;408;446;448", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 56.75, 13.935117509371782 ], "wc_strength_and_weaknesses_avg": [ 307.75, 93.44349897130351 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 70.5, 58.5170915203413 ], "wc_summary_review_avg": [ 30.0, 8.514693182963201 ], "wc_review_avg": [ 465.0, 56.00892785976178 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10311604859278566986&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4;0;5", "aff_unique_norm": "University of California, San Diego;Pennsylvania State University;NEC Laboratories;NEC Labs America;Northwestern University;NEC Laboratories America", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.ucsd.edu;https://www.psu.edu;https://www.nec-labs.com;https://www.nec-labs.com;https://www.northwestern.edu;https://www.nec-labs.com", "aff_unique_abbr": "UCSD;PSU;NEC-Labs;NEC LA;NU;NEC Labs America", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "SxO-qoAwVM", "title": "Understanding Hindsight Goal Relabeling Requires Rethinking Divergence Minimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Hindsight goal relabeling has become a foundational technique for multi-goal reinforcement learning (RL). The idea is quite simple: any arbitrary trajectory can be seen as an expert demonstration for reaching the trajectory's end state. Intuitively, this procedure trains a goal-conditioned policy to imitate a sub-optimal expert. However, this connection between imitation and hindsight relabeling is not well understood. Modern imitation learning algorithms are described in the language of divergence minimization, and yet it remains an open problem how to recast hindsight goal relabeling into that framework. In this work, we develop a unified objective for goal-reaching that explains such a connection, from which we can derive goal-conditioned supervised learning (GCSL) and the reward function in hindsight experience replay (HER) from first principles. Experimentally, we find that despite recent advances in goal-conditioned behaviour cloning (BC), multi-goal Q-learning can still outperform BC-like methods; moreover, a vanilla combination of both actually hurts model performance. Under our framework, we study when BC is expected to help, and empirically validate our findings. Our work further bridges goal-reaching and generative modeling, illustrating the nuances and new pathways of extending the success of generative models to RL.", "keywords": "reinforcement learning;multi-goal reinforcement learning;imitation learning", "primary_area": "", "supplementary_material": "/attachment/a007315e46e041aa46cec5c00a739fd00eca281a.zip", "author": "Lunjun Zhang;Bradly C. Stadie", "authorids": "~Lunjun_Zhang1;~Bradly_C._Stadie1", "gender": ";", "homepage": "https://lunjunzhang.github.io/;", "dblp": "274/6535;166/1368", "google_scholar": "OqD5GcgAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Lunjun_Zhang1;~Bradly_C._Stadie1", "aff": "Waabi Innovation;Northwestern University", "aff_domain": "waabi.ai;northwestern.edu", "position": "Researcher;Assistant Professor", "bibtex": "@misc{\nzhang2023understanding,\ntitle={Understanding Hindsight Goal Relabeling Requires Rethinking Divergence Minimization},\nauthor={Lunjun Zhang and Bradly C. Stadie},\nyear={2023},\nurl={https://openreview.net/forum?id=SxO-qoAwVM}\n}", "github": "", "project": "", "reviewers": "CKyn;aKS5;4ceU;jwR4", "site": "https://openreview.net/forum?id=SxO-qoAwVM", "pdf_size": 2919686, "recommendation": "3;5;6;6", "confidence": "4;4;2;2", "correctness": "2;4;4;3", "technical_novelty": "2;3;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "65;107;120;118", "wc_strength_and_weaknesses": "789;274;351;380", "wc_clarity_quality_novelty_and_reproducibility": "83;65;78;221", "wc_summary_review": "45;50;92;78", "wc_review": "982;496;641;797", "wc_reply_reviewers": "326;204;0;0", "wc_reply_authors": "887;997;565;663", "reply_reviewers": "1;2;0;0", "reply_authors": "2;3;1;1", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 1.0 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 102.5, 22.20923231451281 ], "wc_strength_and_weaknesses_avg": [ 448.5, 200.3677868321153 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 111.75, 63.41677617160936 ], "wc_summary_review_avg": [ 66.25, 19.472737352514155 ], "wc_review_avg": [ 729.0, 180.73876175297872 ], "wc_reply_reviewers_avg": [ 132.5, 139.34399879435065 ], "wc_reply_authors_avg": [ 778.0, 172.07265907168403 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": -0.8164965809277259, "corr_recommendation_correctness": 0.7385489458759963, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:x1AaBpdEjNYJ:scholar.google.com/&scioq=Understanding+Hindsight+Goal+Relabeling+Requires+Rethinking+Divergence+Minimization&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Waabi Innovation;Northwestern University", "aff_unique_dep": ";", "aff_unique_url": "https://waabi.ai;https://www.northwestern.edu", "aff_unique_abbr": ";NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;United States" }, { "title": "Become a Proficient Player with Limited Data through Watching Pure Videos", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11936", "id": "Sy-o2N0hF4f", "poster": "/media/PosterPDFs/ICLR%202023/11936.png?t=1680960328.6203065", "openreview": "https://openreview.net/forum?id=Sy-o2N0hF4f", "slides": "https://iclr.cc/virtual/2023/poster/11936", "video": "https://iclr.cc/virtual/2023/poster/11936", "author_site": "Weirui Ye, Yunsheng Zhang, Pieter Abbeel, Yang Gao", "tldr": "", "abstract": "Recently, RL has shown its strong ability for visually complex tasks. However, it suffers from the low sample efficiency and poor generalization ability, which prevent RL from being useful in real-world scenarios. Inspired by the huge success of unsupervised pre-training methods on language and vision domains, we propose to improve the sample efficiency via a novel pre-training method for model-based RL. \nInstead of using pre-recorded agent trajectories that come with their own actions, we consider the setting where the pre-training data are action-free videos, which are more common and available in the real world. We introduce a two-phase training pipeline as follows: for the pre-training phase, we implicitly extract the hidden action embedding from videos and pre-train the visual representation and the environment dynamics network through a novel \\Changes{forward-inverse} cycle consistency \\Changes{(FICC)} objective based on vector quantization; for down-stream tasks, we finetune with small amount of task data based on the learned models. Our framework can significantly improve the sample efficiency on Atari Games with data of only one hour of game playing. We achieve 118.4\\% mean human performance and 36.0\\% median performance with only 50k environment steps, which is 85.6\\% and 65.1\\% better than the scratch EfficientZero model. We believe such pre-training approach can provide an option for solving real-world RL problems. The code is available at \\url{https://github.com/YeWR/FICC.git}.", "keywords": "Pre-training;Fine-tune;MCTS;Reinforcement learning;Vector Quantization", "primary_area": "", "supplementary_material": "", "author": "Weirui Ye;Yunsheng Zhang;Pieter Abbeel;Yang Gao", "authorids": "~Weirui_Ye1;~Yunsheng_Zhang1;~Pieter_Abbeel2;~Yang_Gao1", "gender": "M;M;M;M", "homepage": "https://yewr.github.io/;https://isa233.github.io/;https://people.eecs.berkeley.edu/~pabbeel/;http://yang-gao.weebly.com", "dblp": "245/3595;;;89/4402-29", "google_scholar": "_GgST9AAAAAJ;;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": ";;;yang-gao-45245348/", "or_profile": "~Weirui_Ye1;~Yunsheng_Zhang1;~Pieter_Abbeel2;~Yang_Gao1", "aff": "Tsinghua University;;Covariant;Tsinghua University", "aff_domain": "tsinghua.edu.cn;;covariant.ai;tsinghua.edu.cn", "position": "PhD student;;Founder;Assistant Professor", "bibtex": "@inproceedings{\nye2023become,\ntitle={Become a Proficient Player with Limited Data through Watching Pure Videos},\nauthor={Weirui Ye and Yunsheng Zhang and Pieter Abbeel and Yang Gao},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=Sy-o2N0hF4f}\n}", "github": "", "project": "", "reviewers": "wEqK;1ysS;HyxY;Krh3", "pdf_size": 3190227, "recommendation": "5;6;6;8", "confidence": "3;2;4;3", "correctness": "3;3;3;3", "technical_novelty": "2;3;3;4", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "43;181;84;48", "wc_strength_and_weaknesses": "122;113;367;273", "wc_clarity_quality_novelty_and_reproducibility": "58;131;173;31", "wc_summary_review": "32;37;77;41", "wc_review": "255;462;701;393", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 89.0, 55.421115109676386 ], "wc_strength_and_weaknesses_avg": [ 218.75, 106.61232339650046 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 98.25, 56.574618867474484 ], "wc_summary_review_avg": [ 46.75, 17.75352077758099 ], "wc_review_avg": [ 452.75, 161.54623951055004 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1741556525857971590&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "pdf": "https://openreview.net/pdf?id=Sy-o2N0hF4f", "email": "tsinghua.edu.cn;;covariant.ai;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Tsinghua University;Covariant", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;", "aff_unique_abbr": "THU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China;" }, { "id": "T-DKAYt6BMk", "title": "Clustering Embedding Tables, Without First Learning Them", "track": "main", "status": "Reject", "tldr": "We train recommendation systems using less memory than previous work. This is achieved using clustering of a \"pseudo embedding table\" trained via hashing.", "abstract": "Machine learning systems use embedding tables to work with categorical features. These tables may get extremely large in modern recommendation systems, and various methods have been suggested to fit them in memory.\n\nProduct- and Residual Vector Quantization are some of the most successful methods for table compression. They function by substituting table rows with references to ``codewords'' picked by k-means clustering. Unfortunately, this means that they must first know the table before compressing it, thus they can only save memory at inference time, not training time. Recent work has employed hashing-based approaches to minimize memory usage during training, however the compression obtained is poorer than that achieved by ``post-training'' quantization.\n\nWe demonstrate that combining hashing and clustering based algorithms provides the best of both worlds. By first training a hashing-based ``sketch'', then clustering it, and then training the clustered quantization, our method may achieve compression ratios close to those of post-training quantization with the training time memory reductions of hashing-based methods. We prove that this technique works rigorously in the least-square setting.", "keywords": "Clustering;Sketching;Recommendation Systems;Embeddings;Sparse Matrices", "primary_area": "", "supplementary_material": "/attachment/3f0ae296ecbfcfddcaaab43eb8054aefa5eecd46.zip", "author": "Henry Ling-Hei Tsang;Thomas Dybdahl Ahle", "authorids": "tsang.79@osu.edu;~Thomas_Dybdahl_Ahle1", "gender": ";M", "homepage": ";https://thomasahle.com", "dblp": ";169/9933.html", "google_scholar": ";https://scholar.google.dk/citations?user=aRiVoYgAAAAJ", "orcid": ";0000-0001-9747-0479", "linkedin": ";thomasahle/", "or_profile": "tsang.79@osu.edu;~Thomas_Dybdahl_Ahle1", "aff": ";Meta Facebook", "aff_domain": ";facebook.com", "position": ";Researcher", "bibtex": "@misc{\ntsang2023clustering,\ntitle={Clustering Embedding Tables, Without First Learning Them},\nauthor={Henry Ling-Hei Tsang and Thomas Dybdahl Ahle},\nyear={2023},\nurl={https://openreview.net/forum?id=T-DKAYt6BMk}\n}", "github": "", "project": "", "reviewers": "ZrK7;ZdXj;7zzb", "site": "https://openreview.net/forum?id=T-DKAYt6BMk", "pdf_size": 969064, "recommendation": "5;5;5", "confidence": "3;3;4", "correctness": "3;3;3", "technical_novelty": "2;2;2", "empirical_novelty": "3;2;2", "wc_summary_paper": "17;40;60", "wc_strength_and_weaknesses": "430;103;157", "wc_clarity_quality_novelty_and_reproducibility": "22;22;15", "wc_summary_review": "48;453;19", "wc_review": "517;618;251", "wc_reply_reviewers": "77;41;0", "wc_reply_authors": "785;776;422", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 39.0, 17.568911937472585 ], "wc_strength_and_weaknesses_avg": [ 230.0, 143.12931216211445 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 19.666666666666668, 3.299831645537222 ], "wc_summary_review_avg": [ 173.33333333333334, 198.1082756698692 ], "wc_review_avg": [ 462.0, 154.79233400484233 ], "wc_reply_reviewers_avg": [ 39.333333333333336, 31.45720196641074 ], "wc_reply_authors_avg": [ 661.0, 169.03845716285983 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17251933366467606734&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "T-camDtiuUg", "title": "Trimsformer: Trimming Transformer via Searching for Low-Rank Structure", "track": "main", "status": "Withdraw", "tldr": "Constructing the efficient low-rank vision transformer structure based on neural architecture search.", "abstract": "Vision Transformers (ViT) have recently been used successfully in various computer vision tasks, but the high computational cost hinders their practical deployment. One of the most well-known methods to alleviate computational burden is low-rank approximation. However, how to automatically search for a low-rank configuration efficiently remains a challenge. In this paper, we propose Trimsformer, an end-to-end automatic low-rank approximation framework based on a neural architecture search scheme, which tackles the inefficiency of searching for a target low-rank configuration out of numerous ones. We propose weight inheritance which encodes enormous rank choices into a single search space. In addition, we share the gradient information among building blocks to boost the convergence of the supernet training. Furthermore, to mitigate the initial performance gap between subnetworks caused by using pre-trained weights, we adopt non-uniform sampling to promote the overall subnetwork performance. Extensive results show the efficacy of our Trimsformer framework. For instance, with our method, Trim-DeiT-B/Trim-Swin-B can save up to 57%/46% FLOPs with 1.1%/0.2% higher accuracy over DeiT-B/Swin-B. Last but not least, Trimsformer exhibits remarkable generality and orthogonality. We can yield extra 21%$\\sim$26% FLOPs reductions on top of the popular compression method as well as the compact hybrid structure. Our code will be released.", "keywords": "Vision Transformer;Model Compression;Low-Rank Approximation;Neural Architecture Search", "primary_area": "", "supplementary_material": "", "author": "Yuan-Yao Sung;Chi-Chih Chang;Shixing Yu;Yu-Shin Han;Diana Marculescu;Kai-Chiang Wu", "authorids": "~Yuan-Yao_Sung1;~Chi-Chih_Chang1;~Shixing_Yu1;~Yu-Shin_Han1;~Diana_Marculescu4;kcw@cs.nctu.edu.tw", "gender": "M;;M;F;;", "homepage": ";https://ccchang.info;https://billysx.github.io/;;;", "dblp": ";53/1921;;280/7618;;", "google_scholar": ";lbXB3gMAAAAJ;;;;", "orcid": ";;;0000-0002-9004-5864;;", "linkedin": "yysung/;\u6a5f\u667a-\u5f35-871a37233;%E4%B8%96%E5%85%B4-%E4%BA%8E-029401182/;;;", "or_profile": "~Yuan-Yao_Sung1;~Chi-Chih_Chang1;~Shixing_Yu1;~Yu-Shin_Han1;~Diana_Marculescu4;kcw@cs.nctu.edu.tw", "aff": "National Chiao Tung University, National Chiao Tung University;National Yang Ming Chiao Tung University;Cornell University;National Chiao Tung University, National Chiao Tung University;;", "aff_domain": "cs.nctu.edu.tw;cs.nycu.edu.tw;cornell.edu;cs.nctu.edu.tw;;", "position": "MS student;Undergrad student;PhD student;MS student;;", "bibtex": "@misc{\nsung2023trimsformer,\ntitle={Trimsformer: Trimming Transformer via Searching for Low-Rank Structure},\nauthor={Yuan-Yao Sung and Chi-Chih Chang and Shixing Yu and Yu-Shin Han and Diana Marculescu and Kai-Chiang Wu},\nyear={2023},\nurl={https://openreview.net/forum?id=T-camDtiuUg}\n}", "github": "", "project": "", "reviewers": "5hQF;Z7Uh;j7TD", "site": "https://openreview.net/forum?id=T-camDtiuUg", "pdf_size": 848624, "recommendation": "5;5;6", "confidence": "4;5;4", "correctness": "2;3;3", "technical_novelty": "3;3;2", "empirical_novelty": "2;2;2", "wc_summary_paper": "74;39;61", "wc_strength_and_weaknesses": "198;243;68", "wc_clarity_quality_novelty_and_reproducibility": "31;25;60", "wc_summary_review": "38;40;79", "wc_review": "341;347;268", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 58.0, 14.445299120013633 ], "wc_strength_and_weaknesses_avg": [ 169.66666666666666, 74.19943096517355 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 38.666666666666664, 15.2825245151302 ], "wc_summary_review_avg": [ 52.333333333333336, 18.87385022252275 ], "wc_review_avg": [ 318.6666666666667, 35.91038228083288 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16399322188187920482&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "National Chiao Tung University;National Yang Ming Chiao Tung University;Cornell University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nctu.edu.tw;https://www.nycu.edu.tw;https://www.cornell.edu", "aff_unique_abbr": "NCTU;NYCU;Cornell", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Taiwan;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Serving Graph Compression for Graph Neural Networks", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10950", "id": "T-qVtA3pAxG", "poster": "", "openreview": "https://openreview.net/forum?id=T-qVtA3pAxG", "slides": "https://iclr.cc/virtual/2023/poster/10950", "video": "https://iclr.cc/virtual/2023/poster/10950", "author_site": "Si Si, Felix Yu, Ankit Singh Rawat, Cho-Jui Hsieh, Sanjiv Kumar", "tldr": "Compressing the graph for graph neural networks inference", "abstract": "Serving a GNN model online is challenging --- in many applications when testing nodes are connected to training nodes, one has to propagate information from training nodes to testing nodes to achieve the best performance, and storing the whole training set (including training graph and node features) during inference stage is prohibitive for large-scale problems. In this paper, we study graph compression to reduce the storage requirement for GNN in serving. Given a GNN model to be served, we propose to construct a compressed graph with a smaller number of nodes. In serving time, one just needs to replace the original training set graph by this compressed graph, without the need of changing the actual GNN model and the forward pass. We carefully analyze the error in the forward pass and derive simple ways to construct the compressed graph to minimize the approximation error. Experimental results on semi-supervised node classification demonstrate that the proposed method can significantly reduce the serving space requirement for GNN inference.", "keywords": "Model compression;Graph Neural Networks", "primary_area": "", "supplementary_material": "/attachment/4c30433bc328a7ef55391e8799ca724098e60305.zip", "author": "Si Si;Felix Yu;Ankit Singh Rawat;Cho-Jui Hsieh;Sanjiv Kumar", "authorids": "~Si_Si1;~Felix_Yu1;~Ankit_Singh_Rawat1;~Cho-Jui_Hsieh1;~Sanjiv_Kumar1", "gender": "F;M;M;M;", "homepage": ";http://felixyu.org;https://ankitsrawat.github.io/home/;http://web.cs.ucla.edu/~chohsieh/index.html;http://www.sanjivk.com/", "dblp": "03/7627;23/10574;https://dblp.org/pers/hd/r/Rawat:Ankit_Singh;14/2770;", "google_scholar": ";lYvF6cUAAAAJ;http://scholar.google.com/citations?user=U0_ab4cAAAAJ;Wy89g4IAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Si_Si1;~Felix_Yu1;~Ankit_Singh_Rawat1;~Cho-Jui_Hsieh1;~Sanjiv_Kumar1", "aff": "Google;Google;Google;Amazon;Google", "aff_domain": "google.com;google.com;google.com;amazon.com;google.com", "position": "research scientist;Research Scientist;Research Scientist;visiting scholar;Research Scientist", "bibtex": "@inproceedings{\nsi2023serving,\ntitle={Serving Graph Compression for Graph Neural Networks},\nauthor={Si Si and Felix Yu and Ankit Singh Rawat and Cho-Jui Hsieh and Sanjiv Kumar},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=T-qVtA3pAxG}\n}", "github": "", "project": "", "reviewers": "afao;LGG4;f4pk;t66n", "pdf_size": 378101, "recommendation": "3;6;8;8", "confidence": "3;3;4;4", "correctness": "3;3;4;4", "technical_novelty": "2;3;4;4", "empirical_novelty": "2;2;4;3", "wc_summary_paper": "75;96;159;61", "wc_strength_and_weaknesses": "300;331;76;355", "wc_clarity_quality_novelty_and_reproducibility": "17;98;217;207", "wc_summary_review": "29;46;59;57", "wc_review": "421;571;511;680", "wc_reply_reviewers": "0;0;51;0", "wc_reply_authors": "1402;866;913;764", "reply_reviewers": "0;0;1;0", "reply_authors": "3;2;2;1", "recommendation_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 3.25, 0.82915619758885 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 97.75, 37.49249924984996 ], "wc_strength_and_weaknesses_avg": [ 265.5, 111.13167865194875 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 134.75, 82.4632493902587 ], "wc_summary_review_avg": [ 47.75, 11.903255857117413 ], "wc_review_avg": [ 545.75, 94.11528834360547 ], "wc_reply_reviewers_avg": [ 12.75, 22.083647796503186 ], "wc_reply_authors_avg": [ 986.25, 246.00241360604574 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.8551861104941366, "corr_recommendation_correctness": 0.8551861104941366, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3777236279266281776&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=T-qVtA3pAxG", "email": "google.com;google.com;google.com;amazon.com;google.com", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Google;Amazon", "aff_unique_dep": "Google;Amazon.com, Inc.", "aff_unique_url": "https://www.google.com;https://www.amazon.com", "aff_unique_abbr": "Google;Amazon", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "T1Qx6EC08o", "title": "On the Importance of Pretrained Knowledge Distillation for 3D Object Detection", "track": "main", "status": "Withdraw", "tldr": "We propose PreDistill, a pretrained distillation paradigm for knowledge transfer and demonstrate that PreDistill serves as a plug-and-play module to various state-of-the-art detectors.", "abstract": "Multi-camera 3D object detection for autonomous driving is quite challenging and has drawn great attention from both academia and industry. The core issue of the vision-only methods is that it is difficult to mine accurate geometry-aware features from images. To improve the performance of vision-only approaches, one promising ingredient in the recipe lies in how to use visual features to simulate the geometry information of LiDAR, since point cloud data inherently carries 3D spatial information. In this paper, we resort to knowledge distillation to leverage useful representations from the LiADR-based expert to enhance feature learning in the camera-based pipeline. It is observed that the joint optimization of expert-apprentice distillation as well as the target task might be difficult to learn in the conventional distillation paradigm. Inspired by the great blossom and impressive results of foundation models in general vision, we propose a pretrained distillation paradigm, termed as PreDistill, to decouple the training procedure into two stages. The apprentice network first emphasizes the knowledge transfer from the expert; then it performs finetuning on the downstream target task. Such a strategy would facilitate the optimal representation learning with targeted goals and ease the joint feature learning as resided in conventional single-stage counterpart. PreDistill serves as a convenient plug-and-play that is flexible to extend to multiple state-of-the-art detectors. Without bells and whistles, building on top of the most recent approaches, e.g., BEVFusion-C, BEVFormer, and BEVDepth, we could guarantee a unanimous gain of 7.6%, 1.0%, and 0.6% in terms of NDS metric on nuScenes benchmark. Code and model checkpoints would be available.", "keywords": "knowledge distillation;object detection", "primary_area": "", "supplementary_material": "", "author": "Linyan Huang;Huijie Wang;Peng Gao;Li Zhiqi;Wenhai Wang;Jifeng Dai;Hongyang Li", "authorids": "~Linyan_Huang2;~Huijie_Wang1;~Peng_Gao3;~Li_Zhiqi1;~Wenhai_Wang2;~Jifeng_Dai1;~Hongyang_Li1", "gender": ";M;M;M;Not Specified;M;M", "homepage": ";https://zhiqi-li.github.io/;https://jifengdai.org/;https://datascience.hku.hk/people/hongyang-li/;;;http://whai362.github.io/", "dblp": ";;14/9399;95/8433-1;237/8239;;122/3593.html", "google_scholar": "Xg4cp-EAAAAJ;https://scholar.google.com.hk/citations?user=H2fJLqEAAAAJ;SH_-B_AAAAAJ;https://scholar.google.com.hk/citations?user=Hfrih1EAAAAJ;https://scholar.google.com/citations?hl=zh-CN;miFIAFMAAAAJ;WM0OglcAAAAJ", "orcid": "0000-0003-3960-084X;;;0000-0001-9110-5534;;;", "linkedin": ";;;hongyangli2020/;;;", "or_profile": "~Huijie_Wang1;~Li_Zhiqi1;~Jifeng_Dai1;~Hongyang_Li1;~Linyan_Huang3;~Gao_Peng1;~Wenhai_Wang1", "aff": "OpenDriveLab;Nanjing University;Tsinghua University;Shanghai AI Lab;Shanghai AI Laboratory;shanghai ai lab ;Shanghai AI Laboratory", "aff_domain": "opendrivelab.com;nju.edu.cn;tsinghua.edu.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn", "position": "Researcher;PhD student;Associate Professor;Researcher;Intern;Researcher;Researcher", "bibtex": "@misc{\nhuang2023on,\ntitle={On the Importance of Pretrained Knowledge Distillation for 3D Object Detection},\nauthor={Linyan Huang and Huijie Wang and Peng Gao and Li Zhiqi and Wenhai Wang and Jifeng Dai and Hongyang Li},\nyear={2023},\nurl={https://openreview.net/forum?id=T1Qx6EC08o}\n}", "github": "", "project": "", "reviewers": "QEzY;n5F6;DyNp", "site": "https://openreview.net/forum?id=T1Qx6EC08o", "pdf_size": 1672443, "recommendation": "3;3;5", "confidence": "5;4;2", "correctness": "4;2;3", "technical_novelty": "1;2;3", "empirical_novelty": "2;2;2", "wc_summary_paper": "58;134;74", "wc_strength_and_weaknesses": "86;226;88", "wc_clarity_quality_novelty_and_reproducibility": "11;9;64", "wc_summary_review": "14;16;46", "wc_review": "169;385;272", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 88.66666666666667, 32.71425105702746 ], "wc_strength_and_weaknesses_avg": [ 133.33333333333334, 65.53031529164362 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.0, 25.468935326524086 ], "wc_summary_review_avg": [ 25.333333333333332, 14.636332266733431 ], "wc_review_avg": [ 275.3333333333333, 88.21312575549942 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.9449111825230683, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Suw8fucLEW4J:scholar.google.com/&scioq=On+the+Importance+of+Pretrained+Knowledge+Distillation+for+3D+Object+Detection&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4;3;4", "aff_unique_norm": "OpenDriveLab;Nanjing University;Tsinghua University;Shanghai AI Lab;Shanghai AI Laboratory", "aff_unique_dep": ";;;;", "aff_unique_url": ";https://www.nju.edu.cn;https://www.tsinghua.edu.cn;https://www.shanghaiailab.com;https://www.shanghai-ai-lab.com", "aff_unique_abbr": ";Nanjing U;THU;SAIL;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1;1;1;1", "aff_country_unique": ";China" }, { "title": "In-Situ Text-Only Adaptation of Speech Models with Low-Overhead Speech Imputations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11088", "id": "T2Ncx_PN2K", "poster": "/media/PosterPDFs/ICLR%202023/11088.png?t=1682397748.560081", "openreview": "https://openreview.net/forum?id=T2Ncx_PN2K", "slides": "https://iclr.cc/virtual/2023/poster/11088", "video": "https://iclr.cc/virtual/2023/poster/11088", "author_site": "Ashish Mittal, Sunita Sarawagi, Preethi Jyothi", "tldr": "A lightweight text-only adaptation technique for end-to-end speech recognition that is both fast and accurate.", "abstract": "Fast and accurate adaptation of automatic speech recognition (ASR) systems using only text data in the target domain is a problem of long-standing practical relevance. Text-only adaptation was easy in traditional cascaded ASR systems with completely decoupled acoustic and language models. Recently, the RNNTransducer (RNN-T) has emerged as a default ASR model because of its high accuracy, low latency, and capability of supporting streaming input. However text-only adaptation of the RNN-T model is significantly more challenging due to its tight integration of acoustic and language models and end-to-end training. Existing recent approaches for text-only adaptation of RNN-Ts, either entail significant modification to the network or introduce high latency during decoding. We propose a new approach (TOLSTOI) that imputes speech representations internal to a baseline RNN-T, starting from text-only inputs, and performs in-situ adaptation that results in higher adaptation accuracy without any runtime overheads during decoding. Our imputation model is a function of the labeled data and trained parameters of the ASR model, and that we show, is more effective in controlling catastrophic forgetting compared to existing methods. We establish the effectiveness of TOLSTOI using three target domains and two ASR models of varying complexity. We yield up to 35% relative reduction in word error rate with text-only adaptation while forgetting the least compared to existing adaptation approaches. Our method is easy to implement and can be harnessed on existing RNN-T models without requiring ASR model training from scratch.", "keywords": "Text-Only Adaptation;End-to-end Speech Recognition", "primary_area": "", "supplementary_material": "", "author": "Ashish Mittal;Sunita Sarawagi;Preethi Jyothi", "authorids": "~Ashish_Mittal1;~Sunita_Sarawagi1;~Preethi_Jyothi2", "gender": "M;F;F", "homepage": "https://researcher.watson.ibm.com/researcher/view.php?person=in-arakeshk;https://www.cse.iitb.ac.in/~sunita/;http://www.cse.iitb.ac.in/~pjyothi", "dblp": "184/1441;s/SunitaSarawagi;01/9014", "google_scholar": "https://scholar.google.co.in/citations?user=4LMwouUAAAAJ;https://scholar.google.com.tw/citations?user=Hg4HmTAAAAAJ;https://scholar.google.co.in/citations?user=QN_uhu8AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ashish_Mittal1;~Sunita_Sarawagi1;~Preethi_Jyothi2", "aff": "IBM Research;IIT Bombay;Indian Institute of Technology Bombay", "aff_domain": "ibm.com;iitb.ac.in;iitb.ac.in", "position": "Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nmittal2023insitu,\ntitle={In-Situ Text-Only Adaptation of Speech Models with Low-Overhead Speech Imputations},\nauthor={Ashish Mittal and Sunita Sarawagi and Preethi Jyothi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=T2Ncx_PN2K}\n}", "github": "", "project": "", "reviewers": "USzV;cvPh;BcMJ;hxLw", "pdf_size": 2370274, "recommendation": "6;6;8;8", "confidence": "4;4;4;4", "correctness": "3;4;3;3", "technical_novelty": "3;4;4;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "97;210;87;70", "wc_strength_and_weaknesses": "178;120;192;170", "wc_clarity_quality_novelty_and_reproducibility": "26;96;52;15", "wc_summary_review": "31;224;34;18", "wc_review": "332;650;365;273", "wc_reply_reviewers": "13;0;22;0", "wc_reply_authors": "276;374;312;335", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 116.0, 55.122590650295095 ], "wc_strength_and_weaknesses_avg": [ 165.0, 27.147743920996454 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.25, 31.1879383736726 ], "wc_summary_review_avg": [ 76.75, 85.22726969696964 ], "wc_review_avg": [ 405.0, 145.239457448725 ], "wc_reply_reviewers_avg": [ 8.75, 9.310612224768036 ], "wc_reply_authors_avg": [ 324.25, 35.59757716474535 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4860263175168042400&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=T2Ncx_PN2K", "email": "ibm.com;iitb.ac.in;iitb.ac.in", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "IBM;Indian Institute of Technology Bombay", "aff_unique_dep": "IBM Research;", "aff_unique_url": "https://www.ibm.com/research;https://www.iitb.ac.in", "aff_unique_abbr": "IBM;IITB", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mumbai;Bombay", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;India" }, { "id": "T5ADm9PHGeJ", "title": "Tiered Pruning for Efficient Differentialble Inference-Aware Neural Architecture Search", "track": "main", "status": "Reject", "tldr": "", "abstract": "We propose three novel pruning techniques to improve the cost and results of inference-aware Differentiable Neural Architecture Search (DNAS). First, we introduce $\\textbf{Prunode}$, a stochastic bi-path building block for DNAS, which can search over inner hidden dimensions with $\\mathcal{O}(1)$ memory and compute complexity. Second, we present an algorithm for pruning blocks within a stochastic layer of the SuperNet during the search. Third, we describe a novel technique for pruning unnecessary stochastic layers during the search. The optimized models resulting from the search are called PruNet and establishes a new state-of-the-art Pareto frontier for NVIDIA V100 in terms of inference latency for ImageNet Top-1 image classification accuracy. PruNet as a backbone also outperforms GPUNet and EfficientNet on the COCO object detection task on inference latency relative to mean Average Precision (mAP).", "keywords": "nas;dnas;neural architecture search;differentiable neural architecture search;state-of-the-art;imagenet;classification;gpunet;efficientnet;pruning;inference-aware;computer vision;object detection", "primary_area": "", "supplementary_material": "", "author": "Slawomir Kierat;Mateusz Sieniawski;Denys Fridman;Chenhan D. Yu;Szymon Migacz;Pawel Morkisz;Alex Fit-Florea", "authorids": "~Slawomir_Kierat1;~Mateusz_Sieniawski1;dfridman@nvidia.com;~Chenhan_D._Yu1;~Szymon_Migacz1;~Pawel_Morkisz1;~Alex_Fit-Florea1", "gender": "M;M;;M;M;M;M", "homepage": ";;;;;http://home.agh.edu.pl/~morkiszp/;", "dblp": ";;;16/10641;;173/3108.html;20/1030.html", "google_scholar": ";;;tmhdpd8AAAAJ;;E8gToekAAAAJ;", "orcid": ";;;;;0000-0002-4734-966X;", "linkedin": "slawomir-kierat-66149315/;mateusz-sieniawski;;;szmigacz/;pawel-morkisz/;alex-fit-florea-7aa0007", "or_profile": "~Slawomir_Kierat1;~Mateusz_Sieniawski1;dfridman@nvidia.com;~Chenhan_D._Yu1;~Szymon_Migacz1;~Pawel_Morkisz1;~Alex_Fit-Florea1", "aff": "NVIDIA;University of Warsaw;;NVIDIA;NVIDIA;AGH University of Science and Technology, Krakow, Poland;NVIDIA", "aff_domain": "nvidia.com;mimuw.edu.pl;;nvidia.com;nvidia.com;agh.edu.pl;nvidia.com", "position": "Employee;MS student;;Researcher;Researcher;Assistant Professor;Management", "bibtex": "@misc{\nkierat2023tiered,\ntitle={Tiered Pruning for Efficient Differentialble Inference-Aware Neural Architecture Search},\nauthor={Slawomir Kierat and Mateusz Sieniawski and Denys Fridman and Chenhan D. Yu and Szymon Migacz and Pawel Morkisz and Alex Fit-Florea},\nyear={2023},\nurl={https://openreview.net/forum?id=T5ADm9PHGeJ}\n}", "github": "", "project": "", "reviewers": "Xpgv;qypu;Jf9e;GWQr", "site": "https://openreview.net/forum?id=T5ADm9PHGeJ", "pdf_size": 575608, "recommendation": "3;3;3;5", "confidence": "5;5;4;3", "correctness": "2;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "87;69;305;99", "wc_strength_and_weaknesses": "275;21;182;326", "wc_clarity_quality_novelty_and_reproducibility": "73;51;25;55", "wc_summary_review": "30;99;21;61", "wc_review": "465;240;533;541", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 140.0, 95.85927185202274 ], "wc_strength_and_weaknesses_avg": [ 201.0, 116.04094105099286 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 51.0, 17.146428199482248 ], "wc_summary_review_avg": [ 52.75, 30.548117781624452 ], "wc_review_avg": [ 444.75, 121.84493218841726 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": -0.8703882797784891, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:p2RUV8ifglUJ:scholar.google.com/&scioq=Tiered+Pruning+for+Efficient+Differentialble+Inference-Aware+Neural+Architecture+Search&hl=en&as_sdt=0,44", "gs_version_total": 5, "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "NVIDIA;University of Warsaw;AGH University of Science and Technology", "aff_unique_dep": "NVIDIA Corporation;;", "aff_unique_url": "https://www.nvidia.com;https://www.uw.edu.pl;https://www.agh.edu.pl", "aff_unique_abbr": "NVIDIA;UW;AGH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Krakow", "aff_country_unique_index": "0;1;0;0;1;0", "aff_country_unique": "United States;Poland" }, { "title": "Sparse Upcycling: Training Mixture-of-Experts from Dense Checkpoints", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11064", "id": "T5nUQDrM4u", "poster": "/media/PosterPDFs/ICLR%202023/11064.png?t=1681632881.9769258", "openreview": "https://openreview.net/forum?id=T5nUQDrM4u", "slides": "https://iclr.cc/virtual/2023/poster/11064", "video": "https://iclr.cc/virtual/2023/poster/11064", "author_site": "Aran Komatsuzaki, Joan Puigcerver, James Lee-Thorp, Carlos Riquelme, Basil Mustafa, Joshua Ainslie, Yi Tay, Mostafa Dehghani, Neil Houlsby", "tldr": "We create sparsely activated Mixture-of-Experts models from pre-existing dense models, showing significant performance improvements and computational savings in doing so.", "abstract": "Training large, deep neural networks to convergence can be prohibitively expensive. As a result, often only a small selection of popular, dense models are reused across different contexts and tasks. Increasingly, sparsely activated models, which seek to decouple model size from computation costs, are becoming an attractive alternative to dense models. Although more efficient in terms of quality and computation cost, sparse models remain data-hungry and costly to train from scratch in the large scale regime. In this work, we propose sparse upcycling -- a simple way to reuse sunk training costs by initializing a sparsely activated Mixture-of-Experts model from a dense checkpoint. We show that sparsely upcycled T5 Base, Large, and XL language models and Vision Transformer Base and Large models, respectively, significantly outperform their dense counterparts on SuperGLUE and ImageNet, using only ~50% of the initial dense pretraining sunk cost. The upcycled models also outperform sparse models trained from scratch on 100% of the initial dense pretraining computation budget.", "keywords": "mixture of experts;sparse;vision;language;deep learning;superglue;imagenet", "primary_area": "", "supplementary_material": "", "author": "Aran Komatsuzaki;Joan Puigcerver;James Lee-Thorp;Carlos Riquelme Ruiz;Basil Mustafa;Joshua Ainslie;Yi Tay;Mostafa Dehghani;Neil Houlsby", "authorids": "~Aran_Komatsuzaki1;~Joan_Puigcerver1;~James_Lee-Thorp1;~Carlos_Riquelme_Ruiz1;~Basil_Mustafa1;~Joshua_Ainslie1;~Yi_Tay1;~Mostafa_Dehghani1;~Neil_Houlsby1", "gender": ";M;M;M;M;;M;M;M", "homepage": "https://arankomatsuzaki.wordpress.com/;http://www.jpuigcerver.net;;https://rikel.github.io/;https://www.basilmustafa.com/;;http://yitay.net;http://mostafadehghani.com/;https://neilhoulsby.github.io/", "dblp": ";155/3271;;https://dblp.uni-trier.de/pers/hd/r/Riquelme:Carlos;;263/3363;;125/4062;91/10669", "google_scholar": "zzksRXYAAAAJ;https://scholar.google.com/citations?hl=en;qsPv098AAAAJ;Es2BBeYAAAAJ;https://scholar.google.co.uk/citations?user=LuxZAJwAAAAJ;;VBclY_cAAAAJ;https://scholar.google.nl/citations?user=MiHOX3QAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-6445-7155;;;;;;", "linkedin": ";;;;basil-mustafa/;;;;", "or_profile": "~Aran_Komatsuzaki1;~Joan_Puigcerver1;~James_Lee-Thorp1;~Carlos_Riquelme_Ruiz1;~Basil_Mustafa1;~Joshua_Ainslie1;~Yi_Tay1;~Mostafa_Dehghani1;~Neil_Houlsby1", "aff": "Georgia Institute of Technology;Google;Google;Google;Google;Google;Google;Google DeepMind;Google", "aff_domain": "gatech.edu;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "position": "PhD student;Software Engineer in Research;Researcher;Researcher;Research Software Engineer;Software Engineer;Research Scientist;Research Scientist;Researcher", "bibtex": "@inproceedings{\nkomatsuzaki2023sparse,\ntitle={Sparse Upcycling: Training Mixture-of-Experts from Dense Checkpoints},\nauthor={Aran Komatsuzaki and Joan Puigcerver and James Lee-Thorp and Carlos Riquelme Ruiz and Basil Mustafa and Joshua Ainslie and Yi Tay and Mostafa Dehghani and Neil Houlsby},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=T5nUQDrM4u}\n}", "github": "", "project": "", "reviewers": "vrXq;zFGk;puDi;36d4", "pdf_size": 734825, "recommendation": "5;6;8;8", "confidence": "5;4;4;4", "correctness": "3;3;3;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;4;3", "wc_summary_paper": "34;57;67;83", "wc_strength_and_weaknesses": "343;471;159;191", "wc_clarity_quality_novelty_and_reproducibility": "2;64;124;9", "wc_summary_review": "2;80;70;24", "wc_review": "381;672;420;307", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "485;805;262;581", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 60.25, 17.76759691123141 ], "wc_strength_and_weaknesses_avg": [ 291.0, 125.02799686470227 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.75, 49.13438205574585 ], "wc_summary_review_avg": [ 44.0, 32.155870381627054 ], "wc_review_avg": [ 445.0, 137.1987609273495 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 533.25, 194.95688625950098 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.7777777777777777, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 119, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4163698350044298449&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=T5nUQDrM4u", "email": "gatech.edu;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "author_num": 9, "aff_unique_index": "0;1;1;1;1;1;1;1;1", "aff_unique_norm": "Georgia Institute of Technology;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.gatech.edu;https://www.google.com", "aff_unique_abbr": "Georgia Tech;Google", "aff_campus_unique_index": "1;1;1;1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "T6HPzkhaKeS", "title": "Action Matching: A Variational Method for Learning Stochastic Dynamics from Samples", "track": "main", "status": "Reject", "tldr": "We propose Action Matching for modeling stochastic dynamics by learning an underlying mechanism to move samples.", "abstract": "Stochastic dynamics are ubiquitous in many fields of science, from the evolution of quantum systems in physics to diffusion-based models in machine learning. Existing methods such as score matching can be used to simulate these physical processes by assuming that the dynamics is a diffusion, which is not always the case. In this work, we propose a method called \"Action Matching\" that enables us to learn a much broader family of stochastic dynamics. Our method requires access only to samples from different time-steps, makes no explicit assumptions about the underlying dynamics, and can be applied even when samples are uncorrelated (i.e., are not part of a trajectory). Action Matching directly learns an underlying mechanism to move samples in time without modeling the distributions at each time-step. In this work, we showcase how Action Matching can be used for several computer vision tasks such as generative modeling, super-resolution, colorization, and inpainting; and further discuss potential applications in other areas of science.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Kirill Neklyudov;Daniel Severo;Alireza Makhzani", "authorids": "~Kirill_Neklyudov1;~Daniel_Severo1;~Alireza_Makhzani1", "gender": "M;M;", "homepage": "https://necludov.github.io/;http://dsevero.com;http://www.alireza.ai/", "dblp": "195/1093;249/9390;122/5126.html", "google_scholar": "https://scholar.google.ru/citations?user=eOttYWgAAAAJ;5bQjLz4AAAAJ;B0KVWJEAAAAJ", "orcid": ";0000-0003-0472-5300;", "linkedin": ";danielsevero/;", "or_profile": "~Kirill_Neklyudov1;~Daniel_Severo1;~Alireza_Makhzani1", "aff": "Vector Institute;Google;Vector Institute", "aff_domain": "vectorinstitute.ai;google.com;vectorinstitute.ai", "position": "Postdoc;Intern;Researcher", "bibtex": "@misc{\nneklyudov2023action,\ntitle={Action Matching: A Variational Method for Learning Stochastic Dynamics from Samples},\nauthor={Kirill Neklyudov and Daniel Severo and Alireza Makhzani},\nyear={2023},\nurl={https://openreview.net/forum?id=T6HPzkhaKeS}\n}", "github": "", "project": "", "reviewers": "iYDX;Sbd8;Ln22;dWGD", "site": "https://openreview.net/forum?id=T6HPzkhaKeS", "pdf_size": 29921617, "recommendation": "3;5;5;6", "confidence": "3;3;4;3", "correctness": "3;3;3;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "1;3;2;3", "wc_summary_paper": "101;115;51;76", "wc_strength_and_weaknesses": "85;181;86;81", "wc_clarity_quality_novelty_and_reproducibility": "116;40;19;14", "wc_summary_review": "67;26;8;19", "wc_review": "369;362;164;190", "wc_reply_reviewers": "0;35;0;0", "wc_reply_authors": "959;599;552;315", "reply_reviewers": "0;1;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 85.75, 24.44764814864612 ], "wc_strength_and_weaknesses_avg": [ 108.25, 42.043875891739575 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 47.25, 40.874044331335746 ], "wc_summary_review_avg": [ 30.0, 22.304708023195463 ], "wc_review_avg": [ 271.25, 94.72954924415085 ], "wc_reply_reviewers_avg": [ 8.75, 15.155444566227676 ], "wc_reply_authors_avg": [ 606.25, 230.35556754721603 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.13245323570650439, "corr_recommendation_correctness": 0.6622661785325219, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;1;0", "aff_unique_norm": "Vector Institute;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://vectorinstitute.ai/;https://www.google.com", "aff_unique_abbr": "Vector Institute;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Canada;United States" }, { "id": "T6NIgvKRb7b", "title": "Denoising Differential Privacy in Split Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Differential Privacy (DP) is applied in split learning to address privacy concerns about data leakage. Previous work combines split neural network (SplitNN) training with DP by adding noise to the intermediate results during the forward pass. Unfortunately, DP noise injection significantly degrades the training accuracy of SplitNN. This paper focuses on improving the training accuracy of DP-protected SplitNNs without sacrificing the privacy guarantee. We propose two denoising techniques, namely scaling and random masking. Our theoretical investigation shows that both of our techniques achieve accurate estimation of the intermediate variables during the forward pass of SplitNN training. Our experiments with real networks demonstrate that our denoising approach allows SplitNN training that can tolerate high levels of DP noise while achieving almost the same accuracy as the non-private (i.e., non-DP protected) baseline. Interestingly, we show that after applying our techniques, the resultant network is more resilient against a state-of-the-art attack, compared to the plain DP-protected baseline.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/2d9163cd61a7eb77d164971b78959fc9ac54a77d.zip", "author": "Hang Xu;Aritra Dutta;Weiran Liu;Xin Li;Panos Kalnis", "authorids": "~Hang_Xu3;~Aritra_Dutta1;~Weiran_Liu1;~Xin_Li39;~Panos_Kalnis1", "gender": "M;M;M;M;M", "homepage": ";https://sciences.ucf.edu/math/person/aritra-dutta/;https://www.zhihu.com/people/liu-wei-ran-8-34;https://sciences.ucf.edu/math/xli/;https://www.kaust.edu.sa/en/study/faculty/panagiotis-kalnis", "dblp": ";189/9262;71/10355;;97/3036", "google_scholar": "UhUecFUAAAAJ;vquoiHsAAAAJ;;;-NdSrrYAAAAJ", "orcid": ";0000-0001-6994-1659;0000-0002-1466-7418;;0000-0002-5060-1360", "linkedin": ";aritra-dutta-7b28052b/;;;", "or_profile": "~Hang_Xu3;~Aritra_Dutta1;~Weiran_Liu1;~Xin_Li39;~Panos_Kalnis1", "aff": "King Abdullah University of Science and Technology;University of Southern Denmark (SDU);Alibaba Group;University of Central Florida;KAUST", "aff_domain": "kaust.edu.sa;sdu.dk;alibaba-inc.com;ucf.edu;kaust.edu.sa", "position": "PhD student;Assistant Professor;Researcher;Full Professor;Full Professor", "bibtex": "@misc{\nxu2023denoising,\ntitle={Denoising Differential Privacy in Split Learning},\nauthor={Hang Xu and Aritra Dutta and Weiran Liu and Xin Li and Panos Kalnis},\nyear={2023},\nurl={https://openreview.net/forum?id=T6NIgvKRb7b}\n}", "github": "", "project": "", "reviewers": "UriU;pjXR;Fu65;P2Hx", "site": "https://openreview.net/forum?id=T6NIgvKRb7b", "pdf_size": 2804650, "recommendation": "3;3;5;6", "confidence": "4;3;4;4", "correctness": "2;4;3;2", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;2;3;3", "wc_summary_paper": "65;87;124;27", "wc_strength_and_weaknesses": "194;147;300;97", "wc_clarity_quality_novelty_and_reproducibility": "15;52;9;39", "wc_summary_review": "21;10;111;18", "wc_review": "295;296;544;181", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 75.75, 35.16656793035112 ], "wc_strength_and_weaknesses_avg": [ 184.5, 74.98833242578475 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 28.75, 17.498214194597114 ], "wc_summary_review_avg": [ 40.0, 41.188590653237945 ], "wc_review_avg": [ 329.0, 132.64049155518083 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.5555555555555555, "corr_recommendation_correctness": -0.4061811972299616, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11389848658396282403&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "King Abdullah University of Science and Technology;University of Southern Denmark;Alibaba Group;University of Central Florida", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.kast.kau.edu.sa;https://www.sdu.dk;https://www.alibaba.com;https://www.ucf.edu", "aff_unique_abbr": "KAUST;SDU;Alibaba;UCF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;3;0", "aff_country_unique": "Saudi Arabia;Denmark;China;United States" }, { "id": "T7mOB22uL_", "title": "Controllable Adaptive Learning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "As deep learning enabled unprecedented applications in versatile vision cognition tasks, researchers surged for the solutions of higher performance and more generalized algorithms, coming with expensive training and deployment to be applied in complex scenarios across domains. However, we argue that generalization and high performance are not always the ultimate goal in real-life with various applications and regulatory requirements. In this work, for the first time to our knowledge, we propose a Controllable Adaptive Learning (CAL) paradigm that allows the model to perform well on some data domains while performing poorly on others by control. We define the problem as a Controlled Multi-target Unsupervised Domain Adaptation (CMUDA) Task. Without the need to access labels in the target domain, we make the model perform poorly on certain target domains through a novel distribution different loss function design. We then introduced two easy-to-use control methods, namely implicit representation controller and explicit text-prompt controller, to regain access to the high-performance result with little effort, without the need to retrain the entire network. Extensive experiments demonstrated the effectiveness of our approach. We believe that our CAL paradigm will lead to an emerging trend for future research. Our code is at *URL*.", "keywords": "Controllable Adaptive Learning", "primary_area": "", "supplementary_material": "", "author": "Lanyun Zhu;Tianrun Chen;Yan WANG", "authorids": "~Lanyun_Zhu1;~Tianrun_Chen1;~Yan_WANG21", "gender": "M;M;F", "homepage": "https://lanyunzhu.site;http://tianrun-chen.github.io;", "dblp": "245/2640;317/5235;", "google_scholar": "urOSnlQAAAAJ;;https://scholar.google.com.hk/citations?hl=zh-CN", "orcid": ";;", "linkedin": ";https://www.linkedin.cn/incareer/in/tianrun-chen-3441731a2;", "or_profile": "~Lanyun_Zhu1;~Tianrun_Chen1;~Yan_WANG21", "aff": "Singapore University of Technology and Design;Zhejiang University;Beihang University", "aff_domain": "sutd.edu.sg;zju.edu.cn;buaa.edu.cn", "position": "PhD student;PhD student;PhD student", "bibtex": "@misc{\nzhu2023controllable,\ntitle={Controllable Adaptive Learning},\nauthor={Lanyun Zhu and Tianrun Chen and Yan WANG},\nyear={2023},\nurl={https://openreview.net/forum?id=T7mOB22uL_}\n}", "github": "", "project": "", "reviewers": "s3f2;CA3z;mrkY;5NuU", "site": "https://openreview.net/forum?id=T7mOB22uL_", "pdf_size": 691305, "recommendation": "3;3;3;5", "confidence": "4;3;4;5", "correctness": "3;3;2;3", "technical_novelty": "2;1;2;2", "empirical_novelty": "3;1;3;2", "wc_summary_paper": "114;104;76;49", "wc_strength_and_weaknesses": "398;110;248;180", "wc_clarity_quality_novelty_and_reproducibility": "225;122;31;14", "wc_summary_review": "62;21;48;11", "wc_review": "799;357;403;254", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 1.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 85.75, 25.380849079571785 ], "wc_strength_and_weaknesses_avg": [ 234.0, 106.51760417883985 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 98.0, 84.03868156985806 ], "wc_summary_review_avg": [ 35.5, 20.426698215815495 ], "wc_review_avg": [ 453.25, 206.7805297894364 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.816496580927726, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HuCT9AUK0J8J:scholar.google.com/&scioq=Controllable+Adaptive+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Singapore University of Technology and Design;Zhejiang University;Beihang University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sutd.edu.sg;https://www.zju.edu.cn;http://www.buaa.edu.cn/", "aff_unique_abbr": "SUTD;ZJU;BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Singapore;China" }, { "id": "T9iojz-kOfU", "title": "Panoptically guided Image Inpainting with Image-level and Object-level Semantic Discriminators", "track": "main", "status": "Withdraw", "tldr": "Guided Image Inpainting and image inpainting with a novel discriminator design.", "abstract": "Recent image inpainting methods have made great progress. However, the existing approaches often struggle to hallucinate realistic object instances in natural scenes. Such a limitation is partially due to the lack of semantic-level constraints inside the hole as well as the lack of a mechanism to enforce the realism of local objects. To tackle the challenging object inpainting task, we propose a new panoptically guided image inpainting task that leverages a panoptic segmentation map to guide the completion of object instances. To enforce the realism of the generated objects, we propose a semantic discriminator that leverages pretrained visual features to improve the generated semantics. Furthermore, we propose object-level discriminators that take aligned instances as input to enforce the realism of individual objects. Experiments on the large-scale Places2 dataset demonstrate the significant improvement by our method on object completion, verified in both quantitative and qualitative evaluation. Furthermore, our framework is flexible and can be generalized to other inpainting tasks including segmentation-guided inpainting, edge-guided inpainting, as well as standard image inpainting without guidance. Consequently, our approach achieves new state-of-the-art performance on the various inpainting tasks and impressive results on object completion. ", "keywords": "Generative model;image inpainting;image manipulation", "primary_area": "", "supplementary_material": "", "author": "Haitian Zheng;Zhe Lin;Jingwan Lu;Scott Cohen;Eli Shechtman;Jianming Zhang;Sohrab Amirghodsi;Qing Liu;Jiebo Luo", "authorids": "~Haitian_Zheng2;~Zhe_Lin1;~Jingwan_Lu1;~Scott_Cohen1;~Eli_Shechtman3;~Jianming_Zhang1;tamirgho@adobe.com;qingl@adobe.com;~Jiebo_Luo1", "gender": "M;M;F;M;;M;;;", "homepage": "https://www.cs.rochester.edu/u/hzheng15/haitian_homepage/index.html;https://sites.google.com/site/zhelin625/;https://research.adobe.com/person/jingwan-lu/;;;https://jimmie33.github.io/;;;", "dblp": "171/0919;42/1680-1;08/7867;54/4155;;;;;", "google_scholar": "hLG8AmwAAAAJ;R0bnqaAAAAAJ;jN2Y51YAAAAJ;;;TkVHKDgAAAAJ;;;", "orcid": ";0000-0003-1154-9907;;;;0000-0002-9954-6294;;;", "linkedin": ";;jingwanlu/;;;;;;", "or_profile": "~Haitian_Zheng2;~Zhe_Lin1;~Jingwan_Lu1;~Scott_Cohen1;~Eli_Shechtman3;~Jianming_Zhang1;tamirgho@adobe.com;qingl@adobe.com;~Jiebo_Luo1", "aff": "University of Rochester;Adobe Research;Adobe Research & Firefly;Adobe Systems;;Adobe Systems;;;", "aff_domain": "rochester.edu;adobe.com;research.adobe.com;adobe.com;;adobe.com;;;", "position": "PhD student;Principal Researcher;Principal Researcher;Research Scientist;;Research Scientist;;;", "bibtex": "@misc{\nzheng2023panoptically,\ntitle={Panoptically guided Image Inpainting with Image-level and Object-level Semantic Discriminators},\nauthor={Haitian Zheng and Zhe Lin and Jingwan Lu and Scott Cohen and Eli Shechtman and Jianming Zhang and Sohrab Amirghodsi and Qing Liu and Jiebo Luo},\nyear={2023},\nurl={https://openreview.net/forum?id=T9iojz-kOfU}\n}", "github": "", "project": "", "reviewers": "yQ2b;imot;aB3Y;7j6F", "site": "https://openreview.net/forum?id=T9iojz-kOfU", "pdf_size": 51182241, "recommendation": "3;5;6;6", "confidence": "4;5;2;3", "correctness": "4;3;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "1;1;3;3", "wc_summary_paper": "95;46;36;70", "wc_strength_and_weaknesses": "228;187;36;179", "wc_clarity_quality_novelty_and_reproducibility": "14;17;40;17", "wc_summary_review": "48;20;66;44", "wc_review": "385;270;178;310", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 1.0 ], "wc_summary_paper_avg": [ 61.75, 22.829531313629722 ], "wc_strength_and_weaknesses_avg": [ 157.5, 72.56893274673398 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 22.0, 10.464224768228174 ], "wc_summary_review_avg": [ 44.5, 16.393596310755 ], "wc_review_avg": [ 285.75, 74.66048151465405 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.5477225575051661, "corr_recommendation_correctness": -0.9428090415820632, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ZDCWaDdMt5AJ:scholar.google.com/&scioq=Panoptically+guided+Image+Inpainting+with+Image-level+and+Object-level+Semantic+Discriminators&hl=en&as_sdt=0,28", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "University of Rochester;Adobe", "aff_unique_dep": ";Adobe Research", "aff_unique_url": "https://www.rochester.edu;https://research.adobe.com", "aff_unique_abbr": "U of R;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SemPPL: Predicting Pseudo-Labels for Better Contrastive Representations", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11049", "id": "TAVBJ4aHsWt", "poster": "", "openreview": "https://openreview.net/forum?id=TAVBJ4aHsWt", "slides": "https://iclr.cc/virtual/2023/poster/11049", "video": "https://iclr.cc/virtual/2023/poster/11049", "author_site": "Matko Bo\u0161njak, Pierre Richemond, Nenad Tomasev, Florian Strub, Jacob C Walker, Felix Hill, Lars Buesing, Razvan Pascanu, Charles Blundell, Jovana Mitrovic", "tldr": "", "abstract": "Learning from large amounts of unsupervised data and a small amount of supervision is an important open problem in computer vision. We propose a new semi-supervised learning method, Semantic Positives via Pseudo-Labels (SEMPPL), that combines labelled and unlabelled data to learn informative representations. Our method extends self-supervised contrastive learning\u2014where representations are shaped by distinguishing whether two samples represent the same underlying datum (positives) or not (negatives)\u2014with a novel approach to selecting positives. To enrich the set of positives, we leverage the few existing ground-truth labels to predict the missing ones through a k-nearest neighbors classifier by using the learned embeddings of the labelled data. We thus extend the set of positives with datapoints having the same pseudo-label and call these semantic positives. We jointly learn the representation and predict bootstrapped pseudo-labels. This creates a reinforcing cycle. Strong initial representations enable better pseudo-label predictions which then improve the selection of semantic positives and lead to even better representations. SEMPPL outperforms competing semi-supervised methods setting new state-of-the-art performance of 76% and 68.5% top-1accuracy when using a ResNet-50 and training on 10% and 1% of labels on ImageNet, respectively. Furthermore, when using selective kernels, SEMPPL significantly outperforms previous state-of-the-art achieving 72.3% and 78.3% top-1accuracy on ImageNet with 1% and 10% labels, respectively, which improves absolute +7.8% and +6.2% over previous work. SEMPPL also exhibits state-of-the-art performance over larger ResNet models as well as strong robustness, out-of-distribution and transfer performance. We release the checkpoints and the evaluation code at https://github.com/deepmind/semppl.", "keywords": "contrastive learning;representation learning;semi-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Matko Bo\u0161njak;Pierre Harvey Richemond;Nenad Tomasev;Florian Strub;Jacob C Walker;Felix Hill;Lars Holger Buesing;Razvan Pascanu;Charles Blundell;Jovana Mitrovic", "authorids": "~Matko_Bo\u0161njak2;~Pierre_Harvey_Richemond1;~Nenad_Tomasev1;~Florian_Strub1;~Jacob_C_Walker1;~Felix_Hill1;~Lars_Holger_Buesing1;~Razvan_Pascanu1;~Charles_Blundell1;~Jovana_Mitrovic1", "gender": "M;M;M;;;M;M;;;", "homepage": ";;http://www.florian-strub.com;;https://fh295.github.io/;;https://razp.info;http://www.gatsby.ucl.ac.uk/~ucgtcbl/;http://jovana-mitrovic.github.io;http://matko.info/", "dblp": "200/8842;;;135/1696;116/0509;https://dblp.uni-trier.de/pers/hd/b/Buesing:Lars;65/8368.html;35/8396;176/5114;39/10827", "google_scholar": ";https://scholar.google.co.uk/citations?user=-PIq1igAAAAJ;zxO5kccAAAAJ;0dR_wD0AAAAJ;https://scholar.google.co.uk/citations?user=4HLUnhIAAAAJ;1h_mxPMAAAAJ;https://scholar.google.ca/citations?user=eSPY8LwAAAAJ;https://scholar.google.co.uk/citations?user=f31mvPsAAAAJ;;https://scholar.google.co.uk/citations?user=JDaHecMAAAAJ", "orcid": ";;;;;;;;;", "linkedin": ";https://uk.linkedin.com/in/nenadtomasev;florian-strub-64443527/;;;;;;;", "or_profile": "~Pierre_Harvey_Richemond1;~Nenad_Tomasev1;~Florian_Strub1;~Jacob_C_Walker1;~Felix_Hill1;~Lars_Holger_Buesing1;~Razvan_Pascanu1;~Charles_Blundell1;~Jovana_Mitrovic1;~Matko_Bosnjak1", "aff": "Imperial College London;Google DeepMind;Google DeepMind;Google;Google;Deepmind;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind", "aff_domain": "imperial.ac.uk;deepmind.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;deepmind.com", "position": "Visiting Researcher;Researcher;Research Scientist;Research Scientist;Researcher;Postdoc;Research Scientist;Research Scientist;Research Scientist;Researcher", "bibtex": "@inproceedings{\nbo{\\v{s}}njak2023semppl,\ntitle={Sem{PPL}: Predicting Pseudo-Labels for Better Contrastive Representations},\nauthor={Matko Bo{\\v{s}}njak and Pierre Harvey Richemond and Nenad Tomasev and Florian Strub and Jacob C Walker and Felix Hill and Lars Holger Buesing and Razvan Pascanu and Charles Blundell and Jovana Mitrovic},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=TAVBJ4aHsWt}\n}", "github": "", "project": "", "reviewers": "JoKU;iXGB;15Rd;U91Z;vzyU", "pdf_size": 1263536, "recommendation": "6;6;6;6;8", "confidence": "4;3;4;4;5", "correctness": "3;3;3;4;4", "technical_novelty": "3;2;2;2;3", "empirical_novelty": "4;2;0;3;4", "wc_summary_paper": "83;71;63;44;188", "wc_strength_and_weaknesses": "130;288;25;256;505", "wc_clarity_quality_novelty_and_reproducibility": "50;64;87;45;675", "wc_summary_review": "6;51;20;58;162", "wc_review": "269;474;195;403;1530", "wc_reply_reviewers": "28;33;65;0;96", "wc_reply_authors": "542;1063;436;1037;1670", "reply_reviewers": "1;1;1;0;2", "reply_authors": "3;4;3;3;4", "recommendation_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.6, 1.4966629547095764 ], "wc_summary_paper_avg": [ 89.8, 50.71252310820277 ], "wc_strength_and_weaknesses_avg": [ 240.8, 161.93010838012802 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 184.2, 245.831975137491 ], "wc_summary_review_avg": [ 59.4, 54.77809781290328 ], "wc_review_avg": [ 574.2, 487.8202127833573 ], "wc_reply_reviewers_avg": [ 44.4, 33.03694901167479 ], "wc_reply_authors_avg": [ 949.6, 440.31743095180775 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 3.4, 0.4898979485566356 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 10, 0 ], "corr_recommendation_confidence": 0.7905694150420948, "corr_recommendation_correctness": 0.6123724356957946, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17439466265850904499&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=TAVBJ4aHsWt", "email": "imperial.ac.uk;deepmind.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;deepmind.com", "author_num": 10, "aff_unique_index": "0;1;1;1;1;2;1;1;1;1", "aff_unique_norm": "Imperial College London;Google;DeepMind", "aff_unique_dep": ";Google DeepMind;", "aff_unique_url": "https://www.imperial.ac.uk;https://deepmind.com;https://deepmind.com", "aff_unique_abbr": "ICL;DeepMind;DeepMind", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1;1;0;0;0;0;0", "aff_country_unique": "United Kingdom;United States" }, { "id": "TAtAJFo35lc", "title": "Learning Object-Centric Dynamic Modes from Video and Emerging Properties", "track": "main", "status": "Withdraw", "tldr": "We propose a model for dynamics interpretability and manipulation by means of object-centric dynamic mode decomposition, directly from pixels.", "abstract": "One of the long-term objectives of Artificial Intelligence is to endow machines with the capacity of structuring and interpreting the world as we do. Towards this goal, recent methods have successfully decomposed and disentangled video sequences into their composing objects, attributes and dynamics, in a self-supervised fashion. However, there have been scarce efforts to propose useful decompositions of the dynamics in a scene. We propose a method to decompose a video into moving objects, their attributes and the dynamic modes of their trajectories. We model the objects' dynamics with linear system identification tools, by means of a Koopman mapping and the Koopman operator $\\mathcal{K}$. This allows user access and interpretation of the dynamics in the scene. We test our framework in a variety of datasets, while illustrating the novel features that emerge from our dynamic modes decomposition: temporal super-resolution, backwards forecasting, model reduction and video dynamics interpretation and manipulation at test-time. We successfully forecast challenging object trajectories from pixels, achieving competitive performance while drawing useful insights.", "keywords": "Koopman theory;dynamics;video representation learning;dynamic mode decomposition;video manipulation;object-centric decomposition", "primary_area": "", "supplementary_material": "/attachment/a3085f208a938b4494a5ef266ab6f754b1e02fb6.zip", "author": "Armand Comas;Christian Fernandez Lopez;Sandesh Ghimire;Haolin Li;Mario Sznaier;Octavia Camps", "authorids": "~Armand_Comas1;~Christian_Fernandez_Lopez1;~Sandesh_Ghimire2;~Haolin_Li1;~Mario_Sznaier1;~Octavia_Camps1", "gender": "M;M;M;M;F;M", "homepage": ";;;http://robustsystems.coe.neu.edu;http://robustsystems.coe.neu.edu;http://www.sandeshgh.com/", "dblp": "268/5392;;;14/1686;69/6960;205/3415", "google_scholar": "-I-Q7XsAAAAJ;;tpKciu4AAAAJ;https://scholar.google.com/citations?view_op=search_authors;htt9T1AAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0003-4439-3988;0000-0003-1945-9172;", "linkedin": "armandcomas/;christian-fernandez-lopez-868b09161/;%E6%98%8A%E9%9C%96-%E6%9D%8E-b04a6b228/;;;sandesh-ghimire/", "or_profile": "~Armand_Comas1;~Christian_Fernandez_Lopez1;~Haolin_Li1;~Mario_Sznaier1;~Octavia_I._Camps1;~Sandesh_Ghimire1", "aff": "Northeastern University;;Northeastern University;Northeastern University;Northeastern University;QualComm", "aff_domain": "neu.edu;;neu.edu;northeastern.edu;neu.edu;qualcomm.com", "position": "PhD student;;PhD student;Full Professor;Full Professor;Researcher", "bibtex": "@misc{\ncomas2023learning,\ntitle={Learning Object-Centric Dynamic Modes from Video and Emerging Properties},\nauthor={Armand Comas and Christian Fernandez Lopez and Sandesh Ghimire and Haolin Li and Mario Sznaier and Octavia Camps},\nyear={2023},\nurl={https://openreview.net/forum?id=TAtAJFo35lc}\n}", "github": "", "project": "", "reviewers": "ns6R;kjLR;8w9G;dikn", "site": "https://openreview.net/forum?id=TAtAJFo35lc", "pdf_size": 4051884, "recommendation": "3;3;5;5", "confidence": "3;5;3;4", "correctness": "4;2;3;3", "technical_novelty": "2;2;3;3", "empirical_novelty": "1;2;2;3", "wc_summary_paper": "51;105;87;60", "wc_strength_and_weaknesses": "78;384;146;388", "wc_clarity_quality_novelty_and_reproducibility": "42;27;134;115", "wc_summary_review": "95;33;48;19", "wc_review": "266;549;415;582", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 75.75, 21.46363203188128 ], "wc_strength_and_weaknesses_avg": [ 249.0, 139.1006829602213 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 79.5, 45.80665890457413 ], "wc_summary_review_avg": [ 48.75, 28.603976996215053 ], "wc_review_avg": [ 453.0, 124.7697880097582 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.30151134457776363, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17535684359265528691&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Northeastern University;Qualcomm Incorporated", "aff_unique_dep": ";", "aff_unique_url": "https://www.northeastern.edu;https://www.qualcomm.com", "aff_unique_abbr": "NEU;Qualcomm", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "TBOFHtBariC", "title": "On discrete symmetries of robotics systems: A group-theoretic and data-driven analysis", "track": "main", "status": "Reject", "tldr": "We present a group-theoretic analysis of bilateral/radial symmetries of dynamical systems. Characterizing the symmetries of the system's dynamics, control, and proprioceptive/exteroceptive data. And elucidating how to exploit these symmetries in DL", "abstract": "In this work, we study the Morphological Symmetries of dynamical systems with one or more planes of symmetry, a predominant feature in animal biology and robotic systems, characterized by the duplication and balanced distribution of body parts. These morphological symmetries imply that the system's dynamics are symmetric (or approximately symmetric), which in turn imprints symmetries in optimal control policies and in all proprioceptive and exteroceptive measurements related to the evolution of the system's dynamics. For data-driven methods, symmetry represents an inductive bias that justifies data augmentation and the construction of symmetric function approximators. To this end, we use Group Theory to present a theoretical and practical framework allowing for (1) the identification of the system's morphological symmetry Group $\\G$, (2) the characterization of how the group acts upon the system state variables and any relevant measurement living in the Euclidean space, and (3) the exploitation of data symmetries through the use of $\\G$-equivariant/$\\G$-invariant Neural Networks, for which we present experimental results on synthetic and real-world applications, demonstrating how symmetry constraints lead to better sample efficiency and generalization while reducing the number of trainable parameters.", "keywords": "Morphological Symmetries;Discrete Symmetries of Dynamical Systems;Equivariant Dynamics;Equivariant Function Approximators;Geometric Deep Learning", "primary_area": "", "supplementary_material": "", "author": "Daniel Ordonez-Apraez;Mario Martin;Antonio Agudo;Francesc Moreno-Noguer", "authorids": "~Daniel_Ordonez-Apraez1;~Mario_Martin2;~Antonio_Agudo3;~Francesc_Moreno-Noguer1", "gender": "M;;M;M", "homepage": "https://daniel-ordonez-apraez.netlify.app/;https://www.cs.upc.edu/~mmartin/;http://www.iri.upc.edu/people/fmoreno/;http://www.iri.upc.edu/people/aagudo/", "dblp": ";169/5613.html;44/991;12/10782", "google_scholar": "https://scholar.google.com.co/citations?user=zNBUoy4AAAAJ;SGZH24YAAAAJ;https://scholar.google.com/citations?hl=en;JV-mGb8AAAAJ", "orcid": "0000-0002-9793-2482;0000-0002-4125-6630;;0000-0001-6845-4998", "linkedin": "danfoa/;;francesc-moreno-noguer-923a8598/?ppe=1;antonio-agudo-computer-vision/?originalSubdomain=es", "or_profile": "~Daniel_Ordonez-Apraez1;~Mario_Martin2;~Francesc_Moreno-Noguer1;~Antonio_Agudo2", "aff": "Universit\u00e0 degli Studi di Genova, Istituto Italiano di Tecnologia;Universidad Polit\u00e9cnica de Catalunya;Universidad Polit\u00e9cnica de Cataluna;Spanish National Research Council, Institut de Rob\u00f2tica i Inform\u00e0tica Industrial,", "aff_domain": "iit.it;upc.edu;upc.edu;csic.es", "position": "PhD student;Associate Professor;Researcher;Researcher", "bibtex": "@misc{\nordonez-apraez2023on,\ntitle={On discrete symmetries of robotics systems: A group-theoretic and data-driven analysis},\nauthor={Daniel Ordonez-Apraez and Mario Martin and Antonio Agudo and Francesc Moreno-Noguer},\nyear={2023},\nurl={https://openreview.net/forum?id=TBOFHtBariC}\n}", "github": "", "project": "", "reviewers": "tbqU;Yn9H;vevv", "site": "https://openreview.net/forum?id=TBOFHtBariC", "pdf_size": 3970936, "recommendation": "5;5;6", "confidence": "3;5;3", "correctness": "3;4;4", "technical_novelty": "2;3;4", "empirical_novelty": "3;3;0", "wc_summary_paper": "51;22;72", "wc_strength_and_weaknesses": "245;162;102", "wc_clarity_quality_novelty_and_reproducibility": "29;41;2", "wc_summary_review": "23;20;20", "wc_review": "348;245;196", "wc_reply_reviewers": "307;0;0", "wc_reply_authors": "2917;2069;648", "reply_reviewers": "2;0;0", "reply_authors": "7;4;1", "recommendation_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 48.333333333333336, 20.49932248202906 ], "wc_strength_and_weaknesses_avg": [ 169.66666666666666, 58.6306707752483 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 24.0, 16.30950643030009 ], "wc_summary_review_avg": [ 21.0, 1.4142135623730951 ], "wc_review_avg": [ 263.0, 63.34561284466878 ], "wc_reply_reviewers_avg": [ 102.33333333333333, 144.72118788284672 ], "wc_reply_authors_avg": [ 1878.0, 936.1093240998439 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 4.0, 2.449489742783178 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": -0.4999999999999999, "corr_recommendation_correctness": 0.4999999999999999, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13142199566877590517&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Universit\u00e0 degli Studi di Genova;Universitat Polit\u00e8cnica de Catalunya;Spanish National Research Council", "aff_unique_dep": ";;Institut de Rob\u00f2tica i Inform\u00e0tica Industrial", "aff_unique_url": "https://www.unige.it;https://www.upc.edu;https://www.csic.es", "aff_unique_abbr": "UniGe;UPC;CSIC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Italy;Spain" }, { "id": "TBaS6AqX_F_", "title": "MyoDex: Generalizable Representations for Dexterous Physiological Manipulation", "track": "main", "status": "Reject", "tldr": "", "abstract": "The complexity of human dexterity has attracted attention from multiple fields. Still, much is to be understood about how hand manipulation behaviors emerge. In this work we aim at learning dexterous manipulation behaviors with a physiologically realistic hand model: MyoHand. In contrast to prior works demonstrating isolated postural and force control, here we demonstrate musculoskeletal agents (MyoDex) exhibiting contact-rich dynamic dexterous manipulation behaviors in simulation. Furthermore, to demonstrate generalization, we show that a single MyoDex agent can be trained to solve up-to 14 different contact-rich tasks. Aligned with human development, simultaneous learning of multiple tasks imparts physiological coordinated muscle contractions i.e., muscle synergies, that are not only shared amongst those in-domain tasks but are also effective in out-of-domain tasks. By leveraging these pre-trained manipulation synergies, we show generalization to 14 additional previously unsolved tasks. While physiological behaviors with large muscle groups (such as legged-locomotion, arm-reaching, etc), have been demonstrated before, to the best of our knowledge nimble behaviors of this complexity with smaller muscle groups are being demonstrated for the first time.", "keywords": "Musculoskeletal;Machine Learning;human dexterity;muscle synergies", "primary_area": "", "supplementary_material": "", "author": "Vittorio Caggiano;Sudeep Dasari;Vikash Kumar", "authorids": "~Vittorio_Caggiano1;~Sudeep_Dasari2;~Vikash_Kumar2", "gender": ";M;M", "homepage": ";http://vikashplus.github.io/;https://sudeepdasari.github.io/", "dblp": ";82/7475;215/3640", "google_scholar": "lCt9zVkAAAAJ;nu3W--sAAAAJ;NpOg5soAAAAJ", "orcid": "0000-0002-2186-1550;;", "linkedin": "vittorio-caggiano-26b6a7b/;;", "or_profile": "~Vittorio_Caggiano1;~Vikash_Kumar2;~KSudeep_Dasari1", "aff": ";Meta Facebook;Carnegie Mellon University", "aff_domain": ";facebook.com;cmu.edu", "position": ";Researcher;PhD student", "bibtex": "@misc{\ncaggiano2023myodex,\ntitle={MyoDex: Generalizable Representations for Dexterous Physiological Manipulation},\nauthor={Vittorio Caggiano and Sudeep Dasari and Vikash Kumar},\nyear={2023},\nurl={https://openreview.net/forum?id=TBaS6AqX_F_}\n}", "github": "", "project": "", "reviewers": "RfAu;Sq1z;sdCp;G4dn", "site": "https://openreview.net/forum?id=TBaS6AqX_F_", "pdf_size": 16026212, "recommendation": "3;5;5;6", "confidence": "3;4;4;3", "correctness": "4;3;3;3", "technical_novelty": "2;2;2;2", "empirical_novelty": "2;3;2;2", "wc_summary_paper": "101;42;39;38", "wc_strength_and_weaknesses": "110;826;196;80", "wc_clarity_quality_novelty_and_reproducibility": "12;112;12;40", "wc_summary_review": "84;136;46;249", "wc_review": "307;1116;293;407", "wc_reply_reviewers": "0;156;0;0", "wc_reply_authors": "468;1866;723;968", "reply_reviewers": "0;1;0;0", "reply_authors": "1;3;1;2", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 55.0, 26.59887215654077 ], "wc_strength_and_weaknesses_avg": [ 303.0, 304.9409778957233 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 44.0, 40.890096600521744 ], "wc_summary_review_avg": [ 128.75, 76.42439074012955 ], "wc_review_avg": [ 530.75, 340.7421128947815 ], "wc_reply_reviewers_avg": [ 39.0, 67.54998149518622 ], "wc_reply_authors_avg": [ 1006.25, 526.9195265882638 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.2294157338705618, "corr_recommendation_correctness": -0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:acbcfVGjkv4J:scholar.google.com/&scioq=MyoDex:+Generalizable+Representations+for+Dexterous+Physiological+Manipulation&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Meta;Carnegie Mellon University", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.cmu.edu", "aff_unique_abbr": "Meta;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "TC39w69m8bB", "title": "ELRT: Towards Efficient Low-Rank Training for Compact Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Low-rank compression, a popular model compression technique that produces compact convolutional neural networks (CNNs) with low rankness, has been well studied in the literature. On the other hand, low-rank training, as an alternative way to train low-rank CNNs from scratch, is little exploited yet. Unlike low-rank compression, low-rank training does not need pre-trained full-rank models and the entire training phase is always performed on the low-rank structure, bringing attractive benefits for practical applications. However, the existing low-rank training solutions are still very limited and do not demonstrate their effectiveness for training modern low-rank CNN models in the large-scale dataset from scratch. In this paper, we perform a systematic investigation on low-rank CNN training. By identifying the proper low-rank format and performance-improving strategy, we propose ELRT, an efficient low-rank training solution for high-accuracy high-compactness low-rank CNN models. Our extensive evaluation results for training various CNNs on different datasets demonstrate the effectiveness of ELRT.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Yang Sui;Miao Yin;Wanzhao Yang;Yu Gong;Jinqi Xiao;Huy Phan;Ding Ding;Xiaozhong Xu;Shan Liu;Zhenzhong Chen;Bo Yuan", "authorids": "~Yang_Sui1;~Miao_Yin1;~Wanzhao_Yang1;~Yu_Gong4;~Jinqi_Xiao1;~Huy_Phan3;~Ding_Ding1;~Xiaozhong_Xu1;~Shan_Liu2;~Zhenzhong_Chen2;~Bo_Yuan3", "gender": "M;;;M;M;;M;M;F;;", "homepage": "https://eclipsess.github.io/yangsui.github.io/;https://noodle-lab.github.io/;;;https://github.com/jinqixiao;;;;https://www.linkedin.com/in/shanliu/;;", "dblp": "77/10522;199/1982;;;338/6610;;;;49/4215-1;;41/1662-1", "google_scholar": "Q2W1p6sAAAAJ;ILDdu98AAAAJ;;FR4HP5wAAAAJ;ITSm2LYAAAAJ;;6glC_iEAAAAJ;Xs4QqgcAAAAJ;bdBZ43wAAAAJ;;oUy9elEAAAAJ", "orcid": "0000-0003-3020-0612;;;;0009-0004-7311-9413;;;;0000-0002-1442-1207;;", "linkedin": "yang-sui-308055117/;miao-yin-55ab64170/;;;;;;;shanliu/;;", "or_profile": "~Yang_Sui1;~Miao_Yin1;~Wanzhao_Yang1;~Yu_Gong4;~Jinqi_Xiao1;~Huy_Phan3;~Ding_Ding1;~Xiaozhong_Xu1;~Shan_Liu2;~Zhenzhong_Chen2;~Bo_Yuan3", "aff": "Rutgers University;Rutgers University;;Rutgers University;Rutgers University;;;Tencent Media Lab;Tencent Media Lab;;Rutgers University", "aff_domain": "rutgers.edu;rutgers.edu;;rutgers.edu;rutgers.edu;;;tencent.com;tencent.com;;rutgers.edu", "position": "PhD student;PhD student;;PhD student;PhD student;;;Principal Researcher;Distinguished Scientist;;Assistant Professor", "bibtex": "@misc{\nsui2023elrt,\ntitle={{ELRT}: Towards Efficient Low-Rank Training for Compact Neural Networks},\nauthor={Yang Sui and Miao Yin and Wanzhao Yang and Yu Gong and Jinqi Xiao and Huy Phan and Ding Ding and Xiaozhong Xu and Shan Liu and Zhenzhong Chen and Bo Yuan},\nyear={2023},\nurl={https://openreview.net/forum?id=TC39w69m8bB}\n}", "github": "", "project": "", "reviewers": "Afr4;DfQU;yVpF;rfLo", "site": "https://openreview.net/forum?id=TC39w69m8bB", "pdf_size": 1190650, "recommendation": "5;5;6;6", "confidence": "4;4;4;4", "correctness": "3;2;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;4;2;3", "wc_summary_paper": "64;70;27;56", "wc_strength_and_weaknesses": "302;248;178;331", "wc_clarity_quality_novelty_and_reproducibility": "101;66;32;24", "wc_summary_review": "40;63;30;36", "wc_review": "507;447;267;447", "wc_reply_reviewers": "0;132;49;75", "wc_reply_authors": "1170;2132;1210;1538", "reply_reviewers": "0;1;1;1", "reply_authors": "2;5;3;3", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 54.25, 16.498105951896417 ], "wc_strength_and_weaknesses_avg": [ 264.75, 58.27252783259021 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 55.75, 30.515364982251153 ], "wc_summary_review_avg": [ 42.25, 12.497499749949988 ], "wc_review_avg": [ 417.0, 90.0 ], "wc_reply_reviewers_avg": [ 64.0, 47.60777247467056 ], "wc_reply_authors_avg": [ 1512.5, 385.11134753470975 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 11, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DFC82wd9wYMJ:scholar.google.com/&scioq=ELRT:+Towards+Efficient+Low-Rank+Training+for+Compact+Neural+Networks&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1;1;0", "aff_unique_norm": "Rutgers University;Tencent", "aff_unique_dep": ";Media Lab", "aff_unique_url": "https://www.rutgers.edu;https://www.tencent.com", "aff_unique_abbr": "Rutgers;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;1;0", "aff_country_unique": "United States;China" }, { "title": "Statistical Efficiency of Score Matching: The View from Isoperimetry", "status": "Top-5%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11885", "id": "TD7AnQjNzR6", "poster": "", "openreview": "https://openreview.net/forum?id=TD7AnQjNzR6", "slides": "https://iclr.cc/virtual/2023/poster/11885", "video": "https://iclr.cc/virtual/2023/poster/11885", "author_site": "Frederic Koehler, Alexander Heckett, Andrej Risteski", "tldr": "We show a tight connection between the statistical efficiency of score matching and the isoperimetric properties (e.g. log-Sobolev constant) of the distribution being estimated", "abstract": " Deep generative models parametrized up to a normalizing constant (e.g. energy-based models) are difficult to train by maximizing the likelihood of the data because the likelihood and/or gradients thereof cannot be explicitly or efficiently written down. Score matching is a training method, whereby instead of fitting the likelihood $\\log p(x)$ for the training data, we instead fit the score function $\\nabla_x \\log p(x)$ --- obviating the need to evaluate the partition function. Though this estimator is known to be consistent, its unclear whether (and when) its statistical efficiency is comparable to that of maximum likelihood --- which is known to be (asymptotically) optimal. We initiate this line of inquiry in this paper, and show a tight connection between statistical efficiency of score matching and the isoperimetric properties of the distribution being estimated --- i.e. the Poincar\\'e, log-Sobolev and isoperimetric constant --- quantities which govern the mixing time of Markov processes like Langevin dynamics. Roughly, we show that the score matching estimator is statistically comparable to the maximum likelihood when the distribution has a small isoperimetric constant. Conversely, if the distribution has a large isoperimetric constant --- even for simple families of distributions like exponential families with rich enough sufficient statistics --- score matching will be substantially less efficient than maximum likelihood. We suitably formalize these results both in the finite sample regime, and in the asymptotic regime. Finally, we identify a direct parallel in the discrete setting, where we connect the statistical properties of pseudolikelihood estimation with approximate tensorization of entropy and the Glauber dynamics.\n", "keywords": "score matching;log-Sobolev inequality;isoperimetry;relative efficiency;sample complexity", "primary_area": "", "supplementary_material": "", "author": "Frederic Koehler;Alexander Heckett;Andrej Risteski", "authorids": "~Frederic_Koehler1;aheckett@andrew.cmu.edu;~Andrej_Risteski2", "gender": ";;M", "homepage": "https://frkoehle.github.io/;;", "dblp": "132/1904;;63/11143", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Frederic_Koehler1;aheckett@andrew.cmu.edu;~Andrej_Risteski2", "aff": "Stanford University;;Carnegie Mellon University", "aff_domain": "stanford.edu;;cmu.edu", "position": "Postdoc;;Assistant Professor", "bibtex": "@inproceedings{\nkoehler2023statistical,\ntitle={Statistical Efficiency of Score Matching: The View from Isoperimetry},\nauthor={Frederic Koehler and Alexander Heckett and Andrej Risteski},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=TD7AnQjNzR6}\n}", "github": "", "project": "", "reviewers": "R2sB;tSNd;gNn2", "pdf_size": 711039, "recommendation": "8;8;8", "confidence": "3;2;4", "correctness": "4;3;4", "technical_novelty": "3;3;3", "empirical_novelty": "3;0;3", "wc_summary_paper": "96;68;25", "wc_strength_and_weaknesses": "58;278;112", "wc_clarity_quality_novelty_and_reproducibility": "589;28;47", "wc_summary_review": "45;24;51", "wc_review": "788;398;235", "wc_reply_reviewers": "10;18;0", "wc_reply_authors": "376;485;43", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "recommendation_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 1.4142135623730951 ], "wc_summary_paper_avg": [ 63.0, 29.20045661743437 ], "wc_strength_and_weaknesses_avg": [ 149.33333333333334, 93.61386412041519 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 221.33333333333334, 260.0952816864534 ], "wc_summary_review_avg": [ 40.0, 11.575836902790225 ], "wc_review_avg": [ 473.6666666666667, 232.01484626826985 ], "wc_reply_reviewers_avg": [ 9.333333333333334, 7.363574011458175 ], "wc_reply_authors_avg": [ 301.3333333333333, 188.01122897907513 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12439618741909605002&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "pdf": "https://openreview.net/pdf?id=TD7AnQjNzR6", "email": "stanford.edu;;cmu.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Stanford University;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.cmu.edu", "aff_unique_abbr": "Stanford;CMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "TDUMUFa5zz", "title": "Divide-and-Cluster: Spatial Decomposition Based Hierarchical Clustering", "track": "main", "status": "Reject", "tldr": "This paper clusters n points located in a D-dimensional space by detecting their mutual clustering affinity within local neighborhoods, using more efficient local computations, and then hierarchically growing the local clusters outward.", "abstract": "This paper is about increasing the computational efficiency of clustering algorithms. Many clustering algorithms are based on properties of relative locations of points, globally or locally, e.g., interpoint distances and nearest neighbor distances. This amounts to using a lower dimensional space than the full dimensionality $D$ of the space in which the points are embedded. We present a clustering algorithm, Divide-and-Cluster (DAC), which detects local clusters in small neighborhoods obtained by recursive tessellation of space, and then merges them hierarchically, following the Divide-and-Conquer paradigm. This significantly reduces computation time which may otherwise grow nonlinearly number $n$ of points. We define locality as hypercubical neighborhoods in a recursive hypercubical decomposition of space, represented by a tree. Clusters are detected within each hypercube, and merged with those from neighboring hypercubes while traversing up the tree. We expect DAC to perform better than many other algorithms because (a) as clusters merge into larger clusters (components), their number steadily decreases vs the number of points, and (b) we cluster only neighboring components. The ordering of component appearances also simultaneously yields a cluster hierarchy (tree). Further, our use of small neighborhoods allows piecewise uniform approximation of large, nonuniform, arbitrary shaped clusters, thus avoiding the need for global cluster models. We experimentally verify the correctness of detected clusters on a variety of datasets, posing a variety of challenges, as well as show that DAC\u2019s runtime is significantly better than representative algorithms of other types, particularly for increasing values of $n$.\n", "keywords": "Unsupervised Learning;High-dimensional features;World Centered Clustering;Points Centered Clustering;Hierarchical Clustering;Complexity;Minimal Spanning Tree", "primary_area": "", "supplementary_material": "", "author": "Narendra Ahuja;Akshat Sharma;Divyam Goel", "authorids": "~Narendra_Ahuja1;~Akshat_Sharma1;~Divyam_Goel2", "gender": ";M;M", "homepage": "http://vision.ai.illinois.edu/ahuja.html;;", "dblp": ";351/4945;", "google_scholar": "dY7OSl0AAAAJ;xRRx-NQAAAAJ;", "orcid": ";;", "linkedin": ";akshat-sharma-0b19881b2/;divyam-goel/", "or_profile": "~Narendra_Ahuja1;~Akshat_Sharma1;~Divyam_Goel2", "aff": "University of Illinois, Urbana Champaign;IIT Kanpur, Indian Institute of Technology, Kanpur;", "aff_domain": "illinois.edu;cse.iitk.ac.in;", "position": "Research Professor;Undergrad student;", "bibtex": "@misc{\nahuja2023divideandcluster,\ntitle={Divide-and-Cluster: Spatial Decomposition Based Hierarchical Clustering},\nauthor={Narendra Ahuja and Akshat Sharma and Divyam Goel},\nyear={2023},\nurl={https://openreview.net/forum?id=TDUMUFa5zz}\n}", "github": "", "project": "", "reviewers": "1fFd;Sm2r;snwo;ZhK4", "site": "https://openreview.net/forum?id=TDUMUFa5zz", "pdf_size": 1936575, "recommendation": "3;3;3;5", "confidence": "3;2;3;4", "correctness": "3;2;2;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;1;2;2", "wc_summary_paper": "48;74;69;106", "wc_strength_and_weaknesses": "257;99;277;351", "wc_clarity_quality_novelty_and_reproducibility": "102;72;189;125", "wc_summary_review": "63;123;60;103", "wc_review": "470;368;595;685", "wc_reply_reviewers": "45;0;130;0", "wc_reply_authors": "341;257;624;816", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 74.25, 20.765054779605084 ], "wc_strength_and_weaknesses_avg": [ 246.0, 91.80958555619343 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 122.0, 43.00581356049435 ], "wc_summary_review_avg": [ 87.25, 26.7242867070386 ], "wc_review_avg": [ 529.5, 120.5124474898755 ], "wc_reply_reviewers_avg": [ 43.75, 53.07718436390536 ], "wc_reply_authors_avg": [ 509.5, 223.15969618190468 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.816496580927726, "corr_recommendation_correctness": 0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:14cDRpeGgEsJ:scholar.google.com/&scioq=Divide-and-Cluster:+Spatial+Decomposition+Based+Hierarchical+Clustering&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Indian Institute of Technology Kanpur", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.iitk.ac.in", "aff_unique_abbr": "UIUC;IITK", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Urbana-Champaign;Kanpur", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;India" }, { "title": "Meta Knowledge Condensation for Federated Learning", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12070", "id": "TDf-XFAwc79", "poster": "/media/PosterPDFs/ICLR%202023/12070.png?t=1682523615.735847", "openreview": "https://openreview.net/forum?id=TDf-XFAwc79", "slides": "https://iclr.cc/virtual/2023/poster/12070", "video": "https://iclr.cc/virtual/2023/poster/12070", "author_site": "Ping Liu, Xin Yu, Joey T Zhou", "tldr": "", "abstract": "Existing federated learning paradigms usually extensively exchange distributed models, rather than original data, at a central solver to achieve a more powerful model. However, this would incur severe communication burden between a server and multiple clients especially when data distributions are heterogeneous. As a result, current federated learning methods often require plenty of communication rounds in training. Unlike existing paradigms, we introduce an alternative perspective to significantly decrease the federate learning communication cost without leaking original data. In this work, we first present a meta knowledge representation method that extracts meta knowledge from distributed clients. The extracted meta knowledge encodes essential information that can be used to improve the current model. As the training progresses, the contributions of the same training samples to a federated model should also vary. Thus, we introduce a dynamic weight assignment mechanism that enables informative samples to contribute adaptively to the current model update. Then, informative meta knowledge from all active clients is sent to the server for model update. Training model on the combined meta knowledge that is regarded as a condense form of original data can significantly mitigate the heterogeneity issues. Moreover, to further ameliorate data heterogeneity, we also exchange meta knowledge among clients as conditional initialisation for meta knowledge extraction. Extensive experiments demonstrate the effectiveness and efficiency of our proposed method. Remarkably, our method outperforms the state-of-the-art by a large margin (from $74.07\\%$ to $92.95\\%$) on MNIST with a restricted communication budget (\\textit{i.e.}, 10 rounds).", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/c36627d63dc750984dced618d37101de28d8ad46.zip", "author": "Ping Liu;Xin Yu;Joey Tianyi Zhou", "authorids": "~Ping_Liu1;~Xin_Yu1;~Joey_Tianyi_Zhou1", "gender": "M;M;M", "homepage": "https://sites.google.com/site/pingliu264/;https://sites.google.com/view/xinyus-homepage/Home;https://joeyzhouty.github.io/", "dblp": "34/188-4;54/1184-2;123/5110", "google_scholar": ";oxdtuSEAAAAJ;https://scholar.google.com.sg/citations?user=cYNqDokAAAAJ", "orcid": ";0000-0002-0269-5649;0000-0002-4675-7055", "linkedin": ";;", "or_profile": "~Ping_Liu1;~Xin_Yu1;~Joey_Tianyi_Zhou1", "aff": "Institute of High Performance Computing, Singapore, A*STAR;University of Queensland;A*STAR Centre for Frontier AI Research", "aff_domain": "ihpc.a-star.edu.sg;uq.edu.au;cfar.a-star.edu.sg", "position": "Research Scientist;Senior Lecturer;Principal Researcher", "bibtex": "@inproceedings{\nliu2023meta,\ntitle={Meta Knowledge Condensation for Federated Learning},\nauthor={Ping Liu and Xin Yu and Joey Tianyi Zhou},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=TDf-XFAwc79}\n}", "github": "", "project": "", "reviewers": "FoXU;StBv;QKpf;ouFd", "pdf_size": 1014620, "recommendation": "6;6;8;8", "confidence": "3;4;4;4", "correctness": "2;3;4;4", "technical_novelty": "3;3;4;3", "empirical_novelty": "3;3;4;4", "wc_summary_paper": "72;35;119;119", "wc_strength_and_weaknesses": "574;184;215;143", "wc_clarity_quality_novelty_and_reproducibility": "82;84;46;15", "wc_summary_review": "121;45;46;272", "wc_review": "849;348;426;549", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 3.5, 0.5 ], "wc_summary_paper_avg": [ 86.25, 35.26595383652624 ], "wc_strength_and_weaknesses_avg": [ 279.0, 172.22224014336825 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 56.75, 28.455008346510812 ], "wc_summary_review_avg": [ 121.0, 92.46891369536034 ], "wc_review_avg": [ 543.0, 190.64758062981025 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4565373816579670427&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=TDf-XFAwc79", "email": "ihpc.a-star.edu.sg;uq.edu.au;cfar.a-star.edu.sg", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Institute of High Performance Computing;University of Queensland;A*STAR", "aff_unique_dep": ";;Centre for Frontier AI Research", "aff_unique_url": "https://www.ihpc.a-star.edu.sg;https://www.uq.edu.au;https://www.a-star.edu.sg", "aff_unique_abbr": "IHPC;UQ;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;Australia" }, { "id": "TFMEqzfFrP_", "title": "Input Perturbation Reduces Exposure Bias in Diffusion Models", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Denoising Diffusion Probabilistic Models (DDPMs) are fast becoming one of the dominant generative methods thanks to their high generation quality and diversity. However, one of the main problems of DDPMs is their large computational cost, which is due to the chain of sampling steps. In this paper, we argue that one of the reasons why DDPMs need a long sampling chain is due to an exposure bias problem, similar to the analogous problem in autoregressive text generation. Specifically, we note that there is a discrepancy between training and testing, since the former is conditioned on the ground truth samples, while the latter is conditioned on the previously generated results. In order to alleviate this problem, we propose a very simple but effective training protocol modification, consisting in perturbing the ground truth samples to simulate the inference time prediction errors. We empirically show that the proposed input perturbation leads to a significant improvement of the sample quality and to smoother sampling chains, with a drastic acceleration of the inference time. For instance, in all the tested benchmarks, we observed an acceleration over a state-of-the-art DDPM of 12.5 times.", "keywords": "Generative Models;Diffusion Model", "primary_area": "", "supplementary_material": "", "author": "Mang Ning;Enver Sangineto;Angelo Porrello;Simone Calderara;Rita Cucchiara", "authorids": "~Mang_Ning1;~Enver_Sangineto1;~Angelo_Porrello1;~Simone_Calderara1;~Rita_Cucchiara1", "gender": "M;;M;M;F", "homepage": ";;;;https://aimagelab.ing.unimore.it/imagelab/", "dblp": "302/2427;http://dblp.uni-trier.de/pers/hd/s/Sangineto:Enver;223/4466;13/422;c/RitaCucchiara", "google_scholar": "dLE4q0cAAAAJ;https://scholar.google.it/citations?user=eJZlvlAAAAAJ;b3-5Ys4AAAAJ;https://scholar.google.it/citations?user=CZd-WXkAAAAJ;OM3sZEoAAAAJ", "orcid": "0000-0001-6037-1661;;0000-0002-9022-8484;0000-0001-9056-1538;0000-0002-2239-283X", "linkedin": "mang-ning-851785110/;;;;rita-cucchiara-a4653a13/?originalSubdomain=it", "or_profile": "~Mang_Ning1;~Enver_Sangineto1;~Angelo_Porrello1;~Simone_Calderara1;~Rita_Cucchiara1", "aff": "Utrecht University;University of Trento;University of Modena and Reggio Emilia, AimageLab;University of Modena and Reggio Emilia;Universit\u00e0 di modena e reggio emilia", "aff_domain": "uu.nl;unitn.it;unimore.it;unimore.it;unimore.it", "position": "PhD student;Postdoc;Postdoc;Associate Professor;Full Professor", "bibtex": "@misc{\nning2023input,\ntitle={Input Perturbation Reduces Exposure Bias in Diffusion Models},\nauthor={Mang Ning and Enver Sangineto and Angelo Porrello and Simone Calderara and Rita Cucchiara},\nyear={2023},\nurl={https://openreview.net/forum?id=TFMEqzfFrP_}\n}", "github": "", "project": "", "reviewers": "Cr6u;e8hz;g1C9;hNo6", "site": "https://openreview.net/forum?id=TFMEqzfFrP_", "pdf_size": 1896672, "recommendation": "3;3;5;6", "confidence": "4;4;4;3", "correctness": "2;3;3;4", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;3;2;3", "wc_summary_paper": "87;74;126;66", "wc_strength_and_weaknesses": "363;289;463;101", "wc_clarity_quality_novelty_and_reproducibility": "77;330;28;61", "wc_summary_review": "31;62;40;50", "wc_review": "558;755;657;278", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "583;362;804;92", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;2;1", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.0, 0.7071067811865476 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 88.25, 23.047505287991584 ], "wc_strength_and_weaknesses_avg": [ 304.0, 132.47263868437136 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 124.0, 120.23934464225925 ], "wc_summary_review_avg": [ 45.75, 11.54068888758379 ], "wc_review_avg": [ 562.0, 178.14741087088524 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 460.25, 263.86206150183847 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.7777777777777777, "corr_recommendation_correctness": 0.816496580927726, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11073701578868408249&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff_unique_index": "0;1;2;2;3", "aff_unique_norm": "Utrecht University;University of Trento;University of Modena and Reggio Emilia;Universit\u00e0 di Modena e Reggio Emilia", "aff_unique_dep": ";;AimageLab;", "aff_unique_url": "https://www.uu.nl;https://www.unitn.it;https://www.unimore.it;https://www.unimore.it", "aff_unique_abbr": "UU;UniTN;;Unimore", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "Netherlands;Italy" }, { "title": "SlotFormer: Unsupervised Visual Dynamics Simulation with Object-Centric Models", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11779", "id": "TFbwV6I0VLg", "poster": "/media/PosterPDFs/ICLR%202023/11779.png?t=1681478127.5975552", "openreview": "https://openreview.net/forum?id=TFbwV6I0VLg", "slides": "https://iclr.cc/virtual/2023/poster/11779", "video": "https://iclr.cc/virtual/2023/poster/11779", "author_site": "Ziyi Wu, Nikita Dvornik, Klaus Greff, Thomas Kipf, Animesh Garg", "tldr": "We propose a general Transformer-based dynamic model to enable consistent future prediction in object-centric models", "abstract": "Understanding dynamics from visual observations is a challenging problem that requires disentangling individual objects from the scene and learning their interactions. While recent object-centric models can successfully decompose a scene into objects, modeling their dynamics effectively still remains a challenge. We address this problem by introducing SlotFormer -- a Transformer-based autoregressive model operating on learned object-centric representations. Given a video clip, our approach reasons over object features to model spatio-temporal relationships and predicts accurate future object states. In this paper, we successfully apply SlotFormer to perform video prediction on datasets with complex object interactions. Moreover, the unsupervised SlotFormer's dynamics model can be used to improve the performance on supervised downstream tasks, such as Visual Question Answering (VQA), and goal-conditioned planning. Compared to past works on dynamics modeling, our method achieves significantly better long-term synthesis of object dynamics, while retaining high quality visual generation. Besides, SlotFormer enables VQA models to reason about the future without object-level labels, even outperforming counterparts that use ground-truth annotations. Finally, we show its ability to serve as a world model for model-based planning, which is competitive with methods designed specifically for such tasks.", "keywords": "Object-centric learning;dynamics modeling;Transformer", "primary_area": "", "supplementary_material": "", "author": "Ziyi Wu;Nikita Dvornik;Klaus Greff;Thomas Kipf;Animesh Garg", "authorids": "~Ziyi_Wu1;~Nikita_Dvornik1;~Klaus_Greff1;~Thomas_Kipf2;~Animesh_Garg1", "gender": "M;M;M;M;M", "homepage": "https://wuziyi616.github.io/;https://dvornikita.github.io/;http://qwlouse.github.io/;http://animesh.garg.tech;http://tkipf.github.io/", "dblp": "217/8678;205/2510;76/11430;123/5728;186/8206", "google_scholar": "iopH6wIAAAAJ;UOLJQTIAAAAJ;https://scholar.google.ch/citations?user=OcownLgAAAAJ;zp8V7ZMAAAAJ;83HL5FwAAAAJ", "orcid": "0000-0002-8247-5872;;0000-0001-6982-0937;0000-0003-0482-4296;", "linkedin": ";;;animeshgarg/;thomas-kipf-6b260410a", "or_profile": "~Ziyi_Wu1;~Nikita_Dvornik1;~Klaus_Greff1;~Animesh_Garg1;~Thomas_N._Kipf1", "aff": "Google;Waabi;Google;University of Toronto;Google", "aff_domain": "google.com;waabi.ai;google.com;toronto.edu;google.com", "position": "Intern;Researcher;Researcher;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nwu2023slotformer,\ntitle={SlotFormer: Unsupervised Visual Dynamics Simulation with Object-Centric Models},\nauthor={Ziyi Wu and Nikita Dvornik and Klaus Greff and Thomas Kipf and Animesh Garg},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=TFbwV6I0VLg}\n}", "github": "", "project": "", "reviewers": "HGtg;yrhY;eCuX", "pdf_size": 6732254, "recommendation": "6;8;8", "confidence": "4;4;5", "correctness": "3;4;4", "technical_novelty": "2;4;3", "empirical_novelty": "3;4;3", "wc_summary_paper": "97;47;101", "wc_strength_and_weaknesses": "201;424;305", "wc_clarity_quality_novelty_and_reproducibility": "41;102;192", "wc_summary_review": "58;30;44", "wc_review": "397;603;642", "wc_reply_reviewers": "34;23;20", "wc_reply_authors": "875;747;642", "reply_reviewers": "1;1;1", "reply_authors": "3;2;2", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 3.0, 0.816496580927726 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 81.66666666666667, 24.567367696917707 ], "wc_strength_and_weaknesses_avg": [ 310.0, 91.1079945266422 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 111.66666666666667, 62.02329311548113 ], "wc_summary_review_avg": [ 44.0, 11.430952132988164 ], "wc_review_avg": [ 547.3333333333334, 107.4874669696684 ], "wc_reply_reviewers_avg": [ 25.666666666666668, 6.018490028422596 ], "wc_reply_authors_avg": [ 754.6666666666666, 95.27620666019169 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.4999999999999999, "corr_recommendation_correctness": 0.9999999999999998, "gs_citation": 100, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10338526484857000717&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=TFbwV6I0VLg", "email": "google.com;waabi.ai;google.com;toronto.edu;google.com", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Google;Waabi;University of Toronto", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;;https://www.utoronto.ca", "aff_unique_abbr": "Google;;U of T", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;2;0", "aff_country_unique": "United States;;Canada" }, { "title": "What Makes Convolutional Models Great on Long Sequence Modeling?", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10776", "id": "TGJSPbRpJX-", "poster": "", "openreview": "https://openreview.net/forum?id=TGJSPbRpJX-", "slides": "https://iclr.cc/virtual/2023/poster/10776", "video": "https://iclr.cc/virtual/2023/poster/10776", "author_site": "Yuhong Li, Tianle Cai, Yi Zhang, Deming Chen, Debadeepta Dey", "tldr": "We proposed a simple Strucured Global Convolution Kernel for long-range dependencies.", "abstract": "Convolutional models have been widely used in multiple domains. However, most existing models only use local convolution, making the model unable to handle long-range dependencies efficiently. Attention overcomes this problem by aggregating global information based on the pair-wise attention score but also makes the computational complexity quadratic to the sequence length. Recently, Gu et al. proposed a model called S4 inspired by the state space model. S4 can be efficiently implemented as a global convolutional model whose kernel size equals the input sequence length. With Fast Fourier Transform, S4 can model much longer sequences than Transformers and achieve significant gains over SoTA on several long-range tasks. Despite its empirical success, S4 is involved. It requires sophisticated parameterization and initialization schemes that combine the wisdom from several prior works. As a result, S4 is less intuitive and hard to use for researchers with limited prior knowledge. Here we aim to demystify S4 and extract basic principles that contribute to the success of S4 as a global convolutional model. We focus on the structure of the convolution kernel and identify two critical but intuitive principles enjoyed by S4 that are sufficient to make up an effective global convolutional model: 1) The parameterization of the convolutional kernel needs to be efficient in the sense that the number of parameters should scale sub-linearly with sequence length. 2) The kernel needs to satisfy a decaying structure that the weights for convolving with closer neighbors are larger than the more distant ones. Based on the two principles, we propose a simple yet effective convolutional model called Structured Global Convolution (SGConv). SGConv exhibits strong empirical performance over several tasks: 1) With faster speed, SGConv surpasses the previous SoTA on Long Range Arena and Speech Command datasets. 2) When plugging SGConv into standard language and vision models, it shows the potential to improve both efficiency and performance.", "keywords": "Convolutional Neural Network;Deep Learning Architectures;Long-range dependence;Reparameterization", "primary_area": "", "supplementary_material": "", "author": "Yuhong Li;Tianle Cai;Yi Zhang;Deming Chen;Debadeepta Dey", "authorids": "~Yuhong_Li2;~Tianle_Cai1;~Yi_Zhang1;~Deming_Chen1;~Debadeepta_Dey1", "gender": "M;M;M;;M", "homepage": "https://leeyeehoo.github.io/;https://tianle.website;https://yi-zhang.me;;http://www.debadeepta.com", "dblp": ";241/9458;64/6544-74;;76/10090", "google_scholar": "Qh-6mV8AAAAJ;CvwLRSMAAAAJ;lc6CVqEAAAAJ;;uIBzJWIAAAAJ", "orcid": "0000-0002-3769-6772;;;;", "linkedin": ";;;;", "or_profile": "~Yuhong_Li2;~Tianle_Cai1;~Yi_Zhang1;~Deming_Chen1;~Debadeepta_Dey1", "aff": "University of Illinois, Urbana Champaign;Princeton University;Microsoft;;Microsoft Research", "aff_domain": "illinois.edu;princeton.edu;microsoft.com;;microsoft.com", "position": "PhD student;PhD student;Postdoc;;Principal Researcher", "bibtex": "@inproceedings{\nli2023what,\ntitle={What Makes Convolutional Models Great on Long Sequence Modeling?},\nauthor={Yuhong Li and Tianle Cai and Yi Zhang and Deming Chen and Debadeepta Dey},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=TGJSPbRpJX-}\n}", "github": "", "project": "", "reviewers": "gGtF;oChP;sRmA;WDs3", "pdf_size": 1176648, "recommendation": "6;6;8;8", "confidence": "4;4;4;3", "correctness": "4;4;4;3", "technical_novelty": "3;3;3;3", "empirical_novelty": "3;3;4;2", "wc_summary_paper": "70;194;98;60", "wc_strength_and_weaknesses": "35;270;43;165", "wc_clarity_quality_novelty_and_reproducibility": "18;68;10;13", "wc_summary_review": "43;52;2;28", "wc_review": "166;584;153;266", "wc_reply_reviewers": "0;114;0;0", "wc_reply_authors": "66;535;24;272", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_paper_avg": [ 105.5, 52.95989048327045 ], "wc_strength_and_weaknesses_avg": [ 128.25, 96.7041234901594 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 27.25, 23.699947257325277 ], "wc_summary_review_avg": [ 31.25, 18.93904696651867 ], "wc_review_avg": [ 292.25, 174.02352570845127 ], "wc_reply_reviewers_avg": [ 28.5, 49.363448015713004 ], "wc_reply_authors_avg": [ 224.25, 202.47762222033327 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": -0.5773502691896257, "gs_citation": 116, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2112933941156408638&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=TGJSPbRpJX-", "email": "illinois.edu;princeton.edu;microsoft.com;;microsoft.com", "author_num": 5, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;Princeton University;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://illinois.edu;https://www.princeton.edu;https://www.microsoft.com", "aff_unique_abbr": "UIUC;Princeton;Microsoft", "aff_campus_unique_index": "0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "THp4UABcMv", "title": "Learning Asymmetric Visual Semantic Embedding for Image-Text Retrieval", "track": "main", "status": "Withdraw", "tldr": "In this paper, we propose a novel method to calculate visual semantic similarity for image-text matching and achieve outperform recent state-of-the-art methods on two widely used datasets.", "abstract": "Learning visual semantic similarity is the key challenge to bridge the correspondences between images and texts. However, there are many inherent variations between vision and language data, such as information density, i.e., images can contain textual information from multiple different views, which makes it difficult to accurately compute the similarity between these two modality data. In the mainstream methods, global-level methods cannot effectively handle the above problem, while local-level methods need complicated mechanism, which significantly affects the retrieval efficiency. In this paper, we propose Asymmetric Visual Semantic Embedding (AVSE), which aims to design a novel model to learn visual semantic similarity by explicitly considering the difference in information density between the two modalities and eschew the prohibitive computations. Specifically, to keep the information density of images, AVSE exploits the large spatial redundancy of image regions to capture and concatenate multi-view features as image embedding. It also has a novel module to efficiently calculate the visual semantic similarity of asymmetric image embedding and text embedding via dividing embeddings into many semantic blocks with the same dimension and compute the similarity by finding the optimal match between these semantic blocks. Extensive experiments on large-scale MS-COCO and Flickr30K datasets verify the superiority of our proposed AVSE compared with recent state-of-the-art methods. Compared to the recent NAAF method, our AVSE inference is 1000 times faster on the 1K test set and more accurately on the widely used benchmarks.", "keywords": "Cross-modal retrieval;image-text matching", "primary_area": "", "supplementary_material": "", "author": "Yang Liu;Chen Chen;Can Wang;Mengyuan Liu", "authorids": "~Yang_Liu76;~Chen_Chen18;wangcan@linxrobot.com;~Mengyuan_Liu2", "gender": "M;M;;", "homepage": ";https://www.crcv.ucf.edu/chenchen/;;https://www.semanticscholar.org/author/Mengyuan-Liu/47842072", "dblp": "51/3710-264;65/4423-1;;", "google_scholar": "0prb9aQAAAAJ;TuEwcZ0AAAAJ;;woX_4AcAAAAJ", "orcid": "0000-0001-5159-5954;0000-0003-3957-7061;;0000-0002-6332-8316", "linkedin": ";dennychen/;;", "or_profile": "~Yang_Liu76;~Chen_Chen18;wangcan@linxrobot.com;~Mengyuan_Liu2", "aff": "SUN YAT-SEN UNIVERSITY;University of Central Florida;;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;ucf.edu;;sysu.edu.cn", "position": "Research Assistant;Assistant Professor;;Associate Professor", "bibtex": "@misc{\nliu2023learning,\ntitle={Learning Asymmetric Visual Semantic Embedding for Image-Text Retrieval},\nauthor={Yang Liu and Chen Chen and Can Wang and Mengyuan Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=THp4UABcMv}\n}", "github": "", "project": "", "reviewers": "yppV;q2AE;xDLx;pQQE", "site": "https://openreview.net/forum?id=THp4UABcMv", "pdf_size": 1372800, "recommendation": "3;5;5;6", "confidence": "4;5;4;5", "correctness": "2;3;3;3", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "67;74;87;244", "wc_strength_and_weaknesses": "101;365;196;709", "wc_clarity_quality_novelty_and_reproducibility": "33;13;109;87", "wc_summary_review": "19;41;67;105", "wc_review": "220;493;459;1145", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 118.0, 73.09924760214705 ], "wc_strength_and_weaknesses_avg": [ 342.75, 231.63157707877394 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 60.5, 38.94547470502831 ], "wc_summary_review_avg": [ 58.0, 32.01562118716424 ], "wc_review_avg": [ 579.25, 343.1591285395159 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.6882472016116854, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:R46WaIoHt30J:scholar.google.com/&scioq=Learning+Asymmetric+Visual+Semantic+Embedding+for+Image-Text+Retrieval&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Sun Yat-sen University;University of Central Florida", "aff_unique_dep": ";", "aff_unique_url": "http://www.sysu.edu.cn;https://www.ucf.edu", "aff_unique_abbr": "SYSU;UCF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "title": "The Lazy Neuron Phenomenon: On Emergence of Activation Sparsity in Transformers", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11756", "id": "TJ2nxciYCk-", "poster": "/media/PosterPDFs/ICLR%202023/11756.png?t=1682797377.175357", "openreview": "https://openreview.net/forum?id=TJ2nxciYCk-", "slides": "https://iclr.cc/virtual/2023/poster/11756", "video": "https://iclr.cc/virtual/2023/poster/11756", "author_site": "Zonglin Li, Chong You, Srinadh Bhojanapalli, Daliang Li, Ankit Singh Rawat, Sashank Reddi, Ke Ye, Felix Chern, Felix Yu, Ruiqi Guo, Sanjiv Kumar", "tldr": "Learned Transformers for NLP (e.g., T5) and Vision (e.g., ViT) tasks produce sparse representations in their MLP layers. The sparsity may be leveraged to improve robustness, calibration, and computational efficiency of Transformer models.", "abstract": "This paper studies a curious phenomenon that machine learning model with Transformer architectures have sparse activation maps. By activation map we refer to the intermediate output of the multi-layer perceptrons (MLPs) after a ReLU activation function, and by \"sparse\" we mean that on average very few entries (e.g., 3.0% for T5-Base and 6.3% for ViT-B16) are nonzero for each input to MLP. Moreover, larger Transformers with more layers and wider MLP hidden dimensions are sparser as measured by the percentage of nonzero entries. Through extensive experiments we demonstrate that the emergence of sparsity is a prevalent phenomenon that occurs for both natural language processing and vision tasks, on both training and evaluation data, for Transformers of various configurations, at layers of all depth levels. We discuss how sparsity immediately implies a way to significantly reduce the FLOP count and improve efficiency for Transformers. Moreover, we demonstrate perhaps surprisingly that enforcing an even sparser activation via Top-k thresholding with a small k brings a collection of desired properties, namely less sensitivity to noisy training data, more robustness to input corruptions, and better calibration for their prediction confidence.", "keywords": "Transformers;Sparse;Calibration;Robustness;Label Noise;Efficiency", "primary_area": "", "supplementary_material": "", "author": "Zonglin Li;Chong You;Srinadh Bhojanapalli;Daliang Li;Ankit Singh Rawat;Sashank J. Reddi;Ke Ye;Felix Chern;Felix Yu;Ruiqi Guo;Sanjiv Kumar", "authorids": "~Zonglin_Li2;~Chong_You2;~Srinadh_Bhojanapalli1;~Daliang_Li1;~Ankit_Singh_Rawat1;~Sashank_J._Reddi1;~Ke_Ye1;~Felix_Chern1;~Felix_Yu1;~Ruiqi_Guo3;~Sanjiv_Kumar1", "gender": "M;M;M;M;M;M;M;M;M;M;", "homepage": ";https://sites.google.com/view/cyou;https://bsrinadh.github.io/;;https://ankitsrawat.github.io/home/;;;https://research.google/people/FelixChern/;http://felixyu.org;http://aqua.cs.uiuc.edu/site/;http://www.sanjivk.com/", "dblp": "142/9188;164/7311;131/6700;;https://dblp.org/pers/hd/r/Rawat:Ankit_Singh;50/10452;;247/9515.html;23/10574;78/7198;", "google_scholar": ";Mfrpm_IAAAAJ;bpSF_9EAAAAJ;Am6f2DsAAAAJ;http://scholar.google.com/citations?user=U0_ab4cAAAAJ;70lgwYwAAAAJ;J3cxj1wAAAAJ;;lYvF6cUAAAAJ;Cgb68qkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;0000-0002-2978-2013;;;;", "linkedin": "lizonglin;;;daliangli/;;;ke-ye-27b53977/;;;;", "or_profile": "~Zonglin_Li2;~Chong_You2;~Srinadh_Bhojanapalli1;~Daliang_Li1;~Ankit_Singh_Rawat1;~Sashank_J._Reddi1;~Ke_Ye1;~Felix_Chern1;~Felix_Yu1;~Ruiqi_Guo3;~Sanjiv_Kumar1", "aff": "Google;Google;Google;Google;Google;Google;Google;Google;Google;Google;Google", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "position": "Researcher;Research Scientist;Research Scientist;Researcher;Research Scientist;Research Scientist;Researcher;Researcher;Research Scientist;Researcher;Research Scientist", "bibtex": "@inproceedings{\nli2023the,\ntitle={The Lazy Neuron Phenomenon: On Emergence of Activation Sparsity in Transformers},\nauthor={Zonglin Li and Chong You and Srinadh Bhojanapalli and Daliang Li and Ankit Singh Rawat and Sashank J. Reddi and Ke Ye and Felix Chern and Felix Yu and Ruiqi Guo and Sanjiv Kumar},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=TJ2nxciYCk-}\n}", "github": "", "project": "", "reviewers": "x5F1;cu8i;urTS;HpEq;KPJJ", "pdf_size": 2370545, "recommendation": "5;5;6;8;8", "confidence": "3;2;4;3;3", "correctness": "3;4;4;3;3", "technical_novelty": "2;2;2;3;3", "empirical_novelty": "2;2;3;3;4", "wc_summary_paper": "137;29;57;41;62", "wc_strength_and_weaknesses": "179;135;304;122;154", "wc_clarity_quality_novelty_and_reproducibility": "71;36;13;30;35", "wc_summary_review": "28;39;90;46;64", "wc_review": "415;239;464;239;315", "wc_reply_reviewers": "0;0;17;18;22", "wc_reply_authors": "331;293;646;251;341", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;1;1", "recommendation_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_paper_avg": [ 65.2, 37.75923728043245 ], "wc_strength_and_weaknesses_avg": [ 178.8, 65.47488067954 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 37.0, 18.89973544788392 ], "wc_summary_review_avg": [ 53.4, 21.721878371816743 ], "wc_review_avg": [ 334.4, 91.51087367083761 ], "wc_reply_reviewers_avg": [ 11.4, 9.457272334029511 ], "wc_reply_authors_avg": [ 372.4, 140.4273477638882 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 11, 0 ], "corr_recommendation_confidence": 0.23312620206007845, "corr_recommendation_correctness": -0.5417363388859615, "gs_citation": 92, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15594712823591842593&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=TJ2nxciYCk-", "email": "google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "author_num": 11, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "TJPmwnQIMmw", "title": "Adversarial Causal Augmentation for Graph Covariate Shift", "track": "main", "status": "Reject", "tldr": "We propose a novel graph data augmentation method, Adversarial Causal Augmentation (AdvCA), to address the covariate shift issues.", "abstract": "Out-of-distribution (OOD) generalization on graphs is drawing widespread attention. However, existing efforts mainly focus on the OOD issue of correlation shift. While another type, covariate shift, remains largely unexplored but is the focus of this work. From a data generation view, causal features are stable substructures in data, which play key roles in OOD generalization. While their complementary parts, environments, are unstable features that often lead to various distribution shifts. Correlation shift establishes spurious statistical correlations between environments and labels. In contrast, covariate shift means that there exist unseen environmental features in test data. Existing strategies of graph invariant learning and data augmentation suffer from limited environments or unstable causal features, which greatly limits their generalization ability on covariate shift. In view of that, we propose a novel graph augmentation strategy: Adversarial Causal Augmentation (AdvCA), to alleviate the covariate shift. Specifically, it adversarially augments the data to explore diverse distributions of the environments. Meanwhile, it keeps the causal features invariant across diverse environments. It maintains the environmental diversity while ensuring the invariance of the causal features, thereby effectively alleviating the covariate shift. Extensive experimental results with in-depth analyses demonstrate that AdvCA can outperform 14 baselines on synthetic and real-world datasets with various covariate shifts.", "keywords": "Graph Data Augmentation;Graph Neural Networks;Covariate Shift;OOD Generalization", "primary_area": "", "supplementary_material": "/attachment/87a5b1d10e2f7ffc7ef606d017e94292c0f4f301.zip", "author": "Yongduo Sui;Xiang Wang;Jiancan Wu;An Zhang;Xiangnan He;Tat-Seng Chua", "authorids": "~Yongduo_Sui1;~Xiang_Wang6;~Jiancan_Wu1;~An_Zhang2;~Xiangnan_He1;~Tat-Seng_Chua2", "gender": "M;M;M;M;F;M", "homepage": "https://yongduosui.github.io/;https://github.com/xiangwang1223;https://wujcan.github.io/;http://staff.ustc.edu.cn/~hexn;https://github.com/anzhang314;http://www.comp.nus.edu.sg/~chuats/", "dblp": "277/5175;31/2864-10;257/4945;59/1007;78/5581-3;", "google_scholar": "VD9g6ogAAAAJ;https://scholar.google.com.sg/citations?user=HdhaQB0AAAAJ;z9zW1UgAAAAJ;https://scholar.google.com.sg/citations?user=X45Go24AAAAJ;https://scholar.google.com.sg/citations?user=BcX7GJcAAAAJ;https://scholar.google.com.tw/citations?user=Z9DWCBEAAAAJ", "orcid": "0000-0003-4492-147X;0000-0002-6148-6329;0000-0002-6941-5218;0000-0001-8472-7992;;0000-0001-6097-7807", "linkedin": "yongduosui/;;;;;", "or_profile": "~Yongduo_Sui1;~Xiang_Wang6;~Jiancan_Wu1;~Xiangnan_He1;~AN_ZHANG1;~Tat-seng_Chua1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;National University of Singapore;National University of Singapore", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;nus.edu.sg;nus.edu.sg", "position": "PhD student;Full Professor;Postdoc;Professor;Postdoc;Full Professor", "bibtex": "@misc{\nsui2023adversarial,\ntitle={Adversarial Causal Augmentation for Graph Covariate Shift},\nauthor={Yongduo Sui and Xiang Wang and Jiancan Wu and An Zhang and Xiangnan He and Tat-Seng Chua},\nyear={2023},\nurl={https://openreview.net/forum?id=TJPmwnQIMmw}\n}", "github": "", "project": "", "reviewers": "5fRb;gnLo;rgVp;Jykb", "site": "https://openreview.net/forum?id=TJPmwnQIMmw", "pdf_size": 3279735, "recommendation": "3;3;6;6", "confidence": "4;5;3;3", "correctness": "2;2;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "3;2;3;3", "wc_summary_paper": "70;23;59;95", "wc_strength_and_weaknesses": "306;153;118;124", "wc_clarity_quality_novelty_and_reproducibility": "24;37;8;9", "wc_summary_review": "29;41;28;29", "wc_review": "429;254;213;257", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "2036;1651;750;986", "reply_reviewers": "0;0;0;0", "reply_authors": "5;5;2;3", "recommendation_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.75, 0.82915619758885 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 61.75, 25.897635027160298 ], "wc_strength_and_weaknesses_avg": [ 175.25, 76.63998629958124 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 19.5, 11.926860441876563 ], "wc_summary_review_avg": [ 31.75, 5.356071321407137 ], "wc_review_avg": [ 288.25, 83.10046630434753 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1355.75, 513.2155370796952 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.75, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.9045340337332909, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13225572133086738273&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1;1", "aff_unique_norm": "University of Science and Technology of China;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "USTC;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;1", "aff_country_unique": "China;Singapore" }, { "id": "TJY-0L3WyAM", "title": "Causal Information Bottleneck Boosts Adversarial Robustness of Deep Neural Network", "track": "main", "status": "Withdraw", "tldr": "The information bottleneck based on the causal method could boost the adversarial robustness", "abstract": "The information bottleneck (IB) method is a feasible defense solution against adversarial attacks in deep learning. However, this method suffers from the spurious correlation, which leads to the limitation of its further improvement of adversarial robustness. In this paper, we incorporate the causal inference into the IB framework to alleviate such a problem. Specifically, we divide the features obtained by the IB method into robust features (content information) and non-robust features (style information) via the instrumental variables to estimate the causal effects. With the utilization of such a framework, the influence of non-robust features could be mitigated to strengthen the adversarial robustness. We make an analysis of the effectiveness of our proposed method. The extensive experiments in MNIST, FashionMNIST, and CIFAR-10 show that our method exhibits the considerable robustness against multiple adversarial attacks. Our code would be released.", "keywords": "deep learning;adversarial examples;information bottleneck", "primary_area": "", "supplementary_material": "", "author": "Huan Hua;Jun Yan;Xi Fang;weiquan Huang;Huilin Yin;Wancheng Ge", "authorids": "~Huan_Hua1;~Jun_Yan7;~Xi_Fang1;~weiquan_Huang1;yinhuilin@tongji.edu.cn;gwc828@tongji.edu.cn", "gender": "M;M;M;F;;", "homepage": ";https://github.com/momo1986;;;;", "dblp": ";89/5901-9.html;;;;", "google_scholar": ";kvIH_ZgAAAAJ;f1IADCIAAAAJ;;;", "orcid": ";0000-0003-3048-9604;;;;", "linkedin": "https://www.linkedin.cn/incareer/in/%E6%A1%93-%E5%8D%8E-5800b319b;;https://www.linkedin.cn/incareer/in/%E6%9B%A6-%E6%96%B9-503886103;;;", "or_profile": "~Huan_Hua1;~Jun_Yan7;~Xi_Fang1;~weiquan_Huang1;yinhuilin@tongji.edu.cn;gwc828@tongji.edu.cn", "aff": "Tongji University;Tongji University;SmartMore;Tongji University;;", "aff_domain": "tongji.edu.cn;tongji.edu.cn;smartmore.com;tongji.edu.cn;;", "position": "Undergrad student;PhD student;Researcher;PhD student;;", "bibtex": "@misc{\nhua2023causal,\ntitle={Causal Information Bottleneck Boosts Adversarial Robustness of Deep Neural Network},\nauthor={Huan Hua and Jun Yan and Xi Fang and weiquan Huang and Huilin Yin and Wancheng Ge},\nyear={2023},\nurl={https://openreview.net/forum?id=TJY-0L3WyAM}\n}", "github": "", "project": "", "reviewers": "c4e4;z5Rm;2u4y;T2VZ", "site": "https://openreview.net/forum?id=TJY-0L3WyAM", "pdf_size": 735239, "recommendation": "1;1;3;5", "confidence": "4;5;3;4", "correctness": "2;2;3;3", "technical_novelty": "1;1;2;3", "empirical_novelty": "1;0;2;3", "wc_summary_paper": "48;25;51;68", "wc_strength_and_weaknesses": "29;242;159;160", "wc_clarity_quality_novelty_and_reproducibility": "61;13;39;156", "wc_summary_review": "94;26;27;49", "wc_review": "232;306;276;433", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "80;283;222;80", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 2.5, 1.6583123951777 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 1.75, 0.82915619758885 ], "empirical_novelty_avg": [ 1.5, 1.118033988749895 ], "wc_summary_paper_avg": [ 48.0, 15.313392831113555 ], "wc_strength_and_weaknesses_avg": [ 147.5, 76.25778648767613 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 67.25, 53.98321498391884 ], "wc_summary_review_avg": [ 49.0, 27.55902755904134 ], "wc_review_avg": [ 311.75, 74.78761595344513 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 166.25, 88.9054975802959 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.42640143271122083, "corr_recommendation_correctness": 0.9045340337332909, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7352757307336947501&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Tongji University;SmartMore", "aff_unique_dep": ";", "aff_unique_url": "https://www.tongji.edu.cn;", "aff_unique_abbr": "Tongji;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "id": "TJjaQEOK8a", "title": "In the ZONE: Measuring difficulty and progression in curriculum generation", "track": "main", "status": "Reject", "tldr": "This work proposes a Bayesian computational framework to operationalize ``the zone of proximal development'' and to improve existing curriculum generation algorithms.", "abstract": "A common strategy in curriculum generation for reinforcement learning is to train a teacher network to generate tasks that fall within a student network's ``zone of proximal development'' (ZPD). These are tasks that are not too easy and not too hard for the student. Albeit intuitive, ZPD is not well understood computationally. We propose ZONE, a novel computational framework that operationalizes ZPD. It formalizes ZPD through the language of Bayesian probability theory, revealing that tasks should be selected by difficulty (the student's success probability on the task) and learning progression (the degree of change in the student's model parameters). ZONE operationalizes ZPD with two techniques that we apply on top of existing algorithms. One is REJECT, which rejects tasks outside a difficulty scope and the other is GRAD, which prioritizes tasks that maximize the student's gradient norm. Compared to the original algorithms, the ZONE techniques improve the student\u2019s generalization performance on discrete Minigrid environments and continuous control Mujoco domains with up to $9 \\times$ higher success. ZONE also accelerates the student's learning by training on up to $10\\times$ less data.", "keywords": "curriculum learning;multiagent;Bayesian", "primary_area": "", "supplementary_material": "/attachment/90dc063dce547299448dbfc8f2eba154836b20f3.zip", "author": "Rose E Wang;Jesse Mu;Dilip Arumugam;Natasha Jaques;Noah Goodman", "authorids": "~Rose_E_Wang1;~Jesse_Mu1;~Dilip_Arumugam1;~Natasha_Jaques1;~Noah_Goodman1", "gender": "F;;M;F;", "homepage": "https://cs.stanford.edu/~rewang;https://www.jesse.mu/;http://dilipa.github.io/;https://natashajaques.ai/;https://cocolab.stanford.edu/", "dblp": "259/1500;205/9022;165/1303;145/7732;96/1216", "google_scholar": "V-dlwF4AAAAJ;djLcGEQAAAAJ;gzHbYVQAAAAJ;8iCb2TwAAAAJ;OUpIbcQAAAAJ", "orcid": ";0000-0002-0812-2710;;;", "linkedin": ";jayelm;;natashajaques;", "or_profile": "~Rose_E_Wang1;~Jesse_Mu1;~Dilip_Arumugam1;~Natasha_Jaques1;~Noah_Goodman1", "aff": "Stanford University;Stanford University;Stanford University;Google;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;google.com;stanford.edu", "position": "PhD student;PhD student;PhD student;Senior Research Scientist;Full Professor", "bibtex": "@misc{\nwang2023in,\ntitle={In the {ZONE}: Measuring difficulty and progression in curriculum generation},\nauthor={Rose E Wang and Jesse Mu and Dilip Arumugam and Natasha Jaques and Noah Goodman},\nyear={2023},\nurl={https://openreview.net/forum?id=TJjaQEOK8a}\n}", "github": "", "project": "", "reviewers": "BAdD;9WXc;3apF;Go3r", "site": "https://openreview.net/forum?id=TJjaQEOK8a", "pdf_size": 3630665, "recommendation": "5;5;5;5", "confidence": "4;2;4;5", "correctness": "3;3;3;3", "technical_novelty": "3;2;2;3", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "125;76;75;99", "wc_strength_and_weaknesses": "188;107;515;164", "wc_clarity_quality_novelty_and_reproducibility": "1;16;23;30", "wc_summary_review": "52;18;44;43", "wc_review": "366;217;657;336", "wc_reply_reviewers": "290;0;0;0", "wc_reply_authors": "1240;427;2111;522", "reply_reviewers": "2;0;0;0", "reply_authors": "3;1;3;1", "recommendation_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 93.75, 20.437404434027332 ], "wc_strength_and_weaknesses_avg": [ 243.5, 159.48746032212063 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 17.5, 10.735455276791944 ], "wc_summary_review_avg": [ 39.25, 12.754901018824098 ], "wc_review_avg": [ 394.0, 161.7451699433402 ], "wc_reply_reviewers_avg": [ 72.5, 125.5736835487436 ], "wc_reply_authors_avg": [ 1075.0, 675.6911276611526 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5352535747857840121&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "Stanford;Google", "aff_campus_unique_index": "0;0;0;1;0", "aff_campus_unique": "Stanford;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Can CNNs Be More Robust Than Transformers?", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11898", "id": "TKIFuQHHECj", "poster": "/media/PosterPDFs/ICLR%202023/11898.png?t=1682710777.101982", "openreview": "https://openreview.net/forum?id=TKIFuQHHECj", "slides": "https://iclr.cc/virtual/2023/poster/11898", "video": "https://iclr.cc/virtual/2023/poster/11898", "author_site": "Zeyu Wang, Yutong Bai, Yuyin Zhou, Cihang Xie", "tldr": "we show CNNs can be as robust as, or even more robust than, Transformers", "abstract": "The recent success of Vision Transformers is shaking the long dominance of Convolutional Neural Networks (CNNs) in image recognition for a decade. Specifically, in terms of robustness on out-of-distribution samples, recent research finds that Transformers are inherently more robust than CNNs, regardless of different training setups. Moreover, it is believed that such superiority of Transformers should largely be credited to their \\emph{self-attention-like architectures per se}. In this paper, we question that belief by closely examining the design of Transformers. Our findings lead to three highly effective architecture designs for boosting robustness, yet simple enough to be implemented in several lines of code, namely a) patchifying input images, b) enlarging kernel size, and c) reducing activation layers and normalization layers. Bringing these components together, we are able to build pure CNN architectures without any attention-like operations that are as robust as, or even more robust than, Transformers. We hope this work can help the community better understand the design of robust neural architectures. The code is publicly available at https://github.com/UCSC-VLAA/RobustCNN.", "keywords": "CNNs;Transformers;Out-of-Distribution Robustness", "primary_area": "", "supplementary_material": "", "author": "Zeyu Wang;Yutong Bai;Yuyin Zhou;Cihang Xie", "authorids": "~Zeyu_Wang2;~Yutong_Bai1;~Yuyin_Zhou1;~Cihang_Xie3", "gender": ";F;;", "homepage": ";https://yutongbai.com/;https://yuyinzhou.github.io/;", "dblp": ";216/8431;192/1413;", "google_scholar": ";N1-l4GsAAAAJ;eiqVLC0AAAAJ;", "orcid": ";;;", "linkedin": ";%E9%9B%A8%E6%A1%90-%E7%99%BD-59a44a136/;;", "or_profile": "~Zeyu_Wang2;~Yutong_Bai1;~Yuyin_Zhou1;~Cihang_Xie3", "aff": ";Johns Hopkins University;University of California, Santa Cruz;", "aff_domain": ";jhu.edu;ucsc.edu;", "position": ";PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nwang2023can,\ntitle={Can {CNN}s Be More Robust Than Transformers?},\nauthor={Zeyu Wang and Yutong Bai and Yuyin Zhou and Cihang Xie},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=TKIFuQHHECj}\n}", "github": "", "project": "", "reviewers": "rxfk;DpNa;ntqh", "pdf_size": 525131, "recommendation": "6;8;8", "confidence": "4;4;4", "correctness": "4;4;3", "technical_novelty": "3;3;2", "empirical_novelty": "3;3;4", "wc_summary_paper": "54;262;67", "wc_strength_and_weaknesses": "227;343;298", "wc_clarity_quality_novelty_and_reproducibility": "35;33;22", "wc_summary_review": "19;63;114", "wc_review": "335;701;501", "wc_reply_reviewers": "11;0;0", "wc_reply_authors": "527;296;853", "reply_reviewers": "1;0;0", "reply_authors": "2;1;2", "recommendation_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_paper_avg": [ 127.66666666666667, 95.1361597337673 ], "wc_strength_and_weaknesses_avg": [ 289.3333333333333, 47.75167245471327 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.0, 5.715476066494082 ], "wc_summary_review_avg": [ 65.33333333333333, 38.818666758260626 ], "wc_review_avg": [ 512.3333333333334, 149.6336266426174 ], "wc_reply_reviewers_avg": [ 3.6666666666666665, 5.185449728701348 ], "wc_reply_authors_avg": [ 558.6666666666666, 228.4941039842288 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": -0.49999999999999983, "gs_citation": 73, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6467862774147464277&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=TKIFuQHHECj", "email": ";jhu.edu;ucsc.edu;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Johns Hopkins University;University of California, Santa Cruz", "aff_unique_dep": ";", "aff_unique_url": "https://www.jhu.edu;https://www.ucsc.edu", "aff_unique_abbr": "JHU;UCSC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Cruz", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "TKcVjKZ0BxE", "title": "A NEW PARADIGM FOR CROSS-MODALITY PERSON RE-IDENTIFICATION", "track": "main", "status": "Reject", "tldr": "", "abstract": "Visible and infrared Person Re-identification(ReID) is still very challenging on account of few cross-modality dataset and large inter-modality variation. Most existing cross-modality ReID methods have trouble eliminating cross-modality discrepancy resulting from the heterogeneous images. In this paper, we present an effective framework and build a large benchmark, named NPU-ReID. To this end, we propose a dual-path fusion network and taking transformer as the smallest feature extraction unit. To expand cross-modality sample diversity, we propose a modality augmentation strategy to generate semi-modality pedestrian images by exchanging certain patch and the main innovation is that the cross-modality gap can be indirectly minimized by reducing the variance of semi-modality and infrared or visible modality. Moreover, in order to make the traditional triplet loss more suitable for cross-modal matching tasks, multi-masking triplet loss is a targeted design for optimizing the relative distance between anchor and positive/negative samples pairs from cross-modality, especially constraining the distance between simple and hard positive samples. Experimental results demonstrate that our proposed method achieves superior performance than other methods on SYSU-MM01, RegDB and our proposed NPU-ReID dataset, especially on the RegDB dataset with significant improvement of 6.81$\\%$ in rank1 and 9.65$\\%$ in mAP.", "keywords": "People Re-identification\uff0cCross-modality", "primary_area": "", "supplementary_material": "", "author": "Yumeng Wang;Feng Yang;Tongkai Xu;Yanze Zhu", "authorids": "~Yumeng_Wang2;yangfeng@nwpu.edu.cn;220221897@seu.edu.cn;yanzezhu07@gmail.com", "gender": "F;;;", "homepage": ";;;", "dblp": ";;;", "google_scholar": "KajLg4MAAAAJ;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yumeng_Wang2;yangfeng@nwpu.edu.cn;220221897@seu.edu.cn;yanzezhu07@gmail.com", "aff": "Northwest Polytechnical University Xi'an;;;", "aff_domain": "nwpu.edu.cn;;;", "position": "MS student;;;", "bibtex": "@misc{\nwang2023a,\ntitle={A {NEW} {PARADIGM} {FOR} {CROSS}-{MODALITY} {PERSON} {RE}-{IDENTIFICATION}},\nauthor={Yumeng Wang and Feng Yang and Tongkai Xu and Yanze Zhu},\nyear={2023},\nurl={https://openreview.net/forum?id=TKcVjKZ0BxE}\n}", "github": "", "project": "", "reviewers": "hWin;2Ybv;heJg;Wz3t", "site": "https://openreview.net/forum?id=TKcVjKZ0BxE", "pdf_size": 1180056, "recommendation": "3;3;3;3", "confidence": "4;5;4;5", "correctness": "3;2;3;2", "technical_novelty": "2;2;2;2", "empirical_novelty": "0;0;3;2", "wc_summary_paper": "76;20;73;48", "wc_strength_and_weaknesses": "577;263;240;301", "wc_clarity_quality_novelty_and_reproducibility": "68;30;144;2", "wc_summary_review": "49;67;40;28", "wc_review": "770;380;497;379", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 3.0, 0.0 ], "confidence_avg": [ 4.5, 0.5 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.0, 0.0 ], "empirical_novelty_avg": [ 1.25, 1.299038105676658 ], "wc_summary_paper_avg": [ 54.25, 22.56518335843961 ], "wc_strength_and_weaknesses_avg": [ 345.25, 135.56248559243815 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.0, 53.33854141237835 ], "wc_summary_review_avg": [ 46.0, 14.230249470757707 ], "wc_review_avg": [ 506.5, 159.51567321112995 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:18eSgTtgIdcJ:scholar.google.com/&scioq=A+NEW+PARADIGM+FOR+CROSS-MODALITY+PERSON+RE-IDENTIFICATION&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Northwest Polytechnical University", "aff_unique_dep": "", "aff_unique_url": "http://www.nwpu.edu.cn", "aff_unique_abbr": "NWPU", "aff_campus_unique_index": "0", "aff_campus_unique": "Xi'an", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "TLx9diIRJVj", "title": "SynBench: Task-Agnostic Benchmarking of Pretrained Representations using Synthetic Data", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent success in fine-tuning large models, that are pretrained on broad data at scale, on downstream tasks has led to a significant paradigm shift in deep learning, from task-centric model design to task-agnostic representation learning and task-specific fine-tuning. As the representations of pretrained models are used as a foundation for different downstream tasks, this paper proposes a new task-agnostic framework, \\textit{SynBench}, to measure the quality of pretrained representations using synthetic data. We set up a reference by a theoretically-derived robustness-accuracy tradeoff of the class conditional Gaussian mixture. Given a pretrained model, the representations of data synthesized from the Gaussian mixture are used to compare with our reference to infer the quality. By comparing the ratio of area-under-curve between the raw data and their representations, SynBench offers a quantifiable score for robustness-accuracy performance benchmarking. Our framework applies to a wide range of pretrained models taking continuous data inputs and is independent of the downstream tasks and datasets. Evaluated with several pretrained vision transformer models, the experimental results show that our SynBench score well matches the actual linear probing performance of the pre-trained model when fine-tuned on downstream tasks. Moreover, our framework can be used to inform the design of robust linear probing on pretrained representations to mitigate the robustness-accuracy tradeoff in downstream tasks.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Ching-Yun Ko;Pin-Yu Chen;Jeet Mohapatra;Payel Das;Luca Daniel", "authorids": "~Ching-Yun_Ko1;~Pin-Yu_Chen1;~Jeet_Mohapatra1;~Payel_Das1;~Luca_Daniel1", "gender": "F;M;M;F;", "homepage": ";http://www.pinyuchen.com;;;https://www.mit.edu/~dluca/", "dblp": "206/6472;39/8969;210/2304;56/7926;35/5202", "google_scholar": ";jxwlCUUAAAAJ;;;", "orcid": ";0000-0003-1039-8369;;;0000-0002-5880-3151", "linkedin": ";pin-yu-chen-940062a2;;;", "or_profile": "~Ching-Yun_Ko1;~Pin-Yu_Chen1;~Jeet_Mohapatra1;~Payel_Das1;~Luca_Daniel1", "aff": "Massachusetts Institute of Technology;International Business Machines;;IBM, International Business Machines;", "aff_domain": "mit.edu;ibm.com;;us.ibm.com;", "position": "PhD student;Principal Researcher;;Principal Researcher;", "bibtex": "@misc{\nko2023synbench,\ntitle={SynBench: Task-Agnostic Benchmarking of Pretrained Representations using Synthetic Data},\nauthor={Ching-Yun Ko and Pin-Yu Chen and Jeet Mohapatra and Payel Das and Luca Daniel},\nyear={2023},\nurl={https://openreview.net/forum?id=TLx9diIRJVj}\n}", "github": "", "project": "", "reviewers": "Lj97;REYd;xsCU", "site": "https://openreview.net/forum?id=TLx9diIRJVj", "pdf_size": 1338444, "recommendation": "3;3;5", "confidence": "4;3;3", "correctness": "3;2;3", "technical_novelty": "2;2;3", "empirical_novelty": "2;2;2", "wc_summary_paper": "45;68;93", "wc_strength_and_weaknesses": "351;522;120", "wc_clarity_quality_novelty_and_reproducibility": "38;464;56", "wc_summary_review": "145;45;85", "wc_review": "579;1099;354", "wc_reply_reviewers": "0;466;0", "wc_reply_authors": "835;2573;1112", "reply_reviewers": "0;2;0", "reply_authors": "2;5;2", "recommendation_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "correctness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 68.66666666666667, 19.601587237318874 ], "wc_strength_and_weaknesses_avg": [ 331.0, 164.72401160729422 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 186.0, 196.71298889498883 ], "wc_summary_review_avg": [ 91.66666666666667, 41.09609335312651 ], "wc_review_avg": [ 677.3333333333334, 311.99180900928934 ], "wc_reply_reviewers_avg": [ 155.33333333333334, 219.67450668862074 ], "wc_reply_authors_avg": [ 1506.6666666666667, 762.4444606366784 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5, "corr_recommendation_correctness": 0.5, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3748029612495230674&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2", "aff_unique_norm": "Massachusetts Institute of Technology;International Business Machines Corporation;International Business Machines", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.ibm.com;https://www.ibm.com", "aff_unique_abbr": "MIT;IBM;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "TM9jOSaIzN", "title": "Neural Decoding of Visual Imagery via Hierarchical Variational Autoencoders", "track": "main", "status": "Reject", "tldr": "We propose a novel architecture for decoding visual imagery from fMRI recordings using Hierarchical VAEs.", "abstract": "Reconstructing natural images from fMRI recordings is a challenging task of great importance in neuroscience. The current architectures are bottlenecked because they fail to effectively capture the hierarchical processing of visual stimuli that takes place in the human brain. Motivated by that fact, we introduce a novel neural network architecture for the problem of neural decoding. Our architecture uses Hierarchical Variational Autoencoders (HVAEs) to learn meaningful representations of natural images and leverages their latent space hierarchy to learn voxel-to-image mappings. By mapping the early stages of the visual pathway to the first set of latent variables and the higher visual cortex areas to the deeper layers in the latent hierarchy, we are able to construct a latent variable neural decoding model that replicates the hierarchical visual information processing. Our model achieves better reconstructions compared to the state of the art and our ablation study indicates that the hierarchical structure of the latent space is responsible for that performance. ", "keywords": "neural decoding;hierarchical variational autoencoders;neuroscience", "primary_area": "", "supplementary_material": "/attachment/289636f7cc8ad8fe0d93fa414a63b7363ad754a4.zip", "author": "Eleni Miliotou;Panagiotis Kyriakis;Jason D Hinman;Andrei Irimia;Paul Bogdan", "authorids": "~Eleni_Miliotou1;~Panagiotis_Kyriakis1;~Jason_D_Hinman1;~Andrei_Irimia2;~Paul_Bogdan1", "gender": "F;M;M;M;M", "homepage": ";;https://hinmanlabucla.org;https://gero.usc.edu/labs/irimialab/;https://cps.usc.edu/", "dblp": ";225/3791;;;05/5539", "google_scholar": ";https://scholar.google.com/citations?hl=en;;IaPx6wIAAAAJ;Xw_v8-gAAAAJ", "orcid": ";;;;0000-0003-2118-0816", "linkedin": "eleni-miliotou/;;;irimia;paul-bogdan-4b098a6/", "or_profile": "~Eleni_Miliotou1;~Panagiotis_Kyriakis1;~Jason_D_Hinman1;~Andrei_Irimia2;~Paul_Bogdan1", "aff": ";;University of California, Los Angeles;University of Southern California;University of Southern California", "aff_domain": ";;ucla.edu;usc.edu;usc.edu", "position": ";;Associate Professor;Associate Professor;Jack Munushian Early Career Chair associate professor", "bibtex": "@misc{\nmiliotou2023neural,\ntitle={Neural Decoding of Visual Imagery via Hierarchical Variational Autoencoders},\nauthor={Eleni Miliotou and Panagiotis Kyriakis and Jason D Hinman and Andrei Irimia and Paul Bogdan},\nyear={2023},\nurl={https://openreview.net/forum?id=TM9jOSaIzN}\n}", "github": "", "project": "", "reviewers": "J68Z;sMBW;eDwx;H6z7", "site": "https://openreview.net/forum?id=TM9jOSaIzN", "pdf_size": 12107991, "recommendation": "1;3;6;8", "confidence": "5;4;4;4", "correctness": "2;2;3;3", "technical_novelty": "2;2;3;4", "empirical_novelty": "2;2;4;4", "wc_summary_paper": "81;111;55;235", "wc_strength_and_weaknesses": "387;299;132;206", "wc_clarity_quality_novelty_and_reproducibility": "24;69;5;426", "wc_summary_review": "42;80;50;83", "wc_review": "534;559;242;950", "wc_reply_reviewers": "0;70;0;0", "wc_reply_authors": "1057;651;258;1920", "reply_reviewers": "0;1;0;0", "reply_authors": "3;2;1;3", "recommendation_avg": [ 4.5, 2.692582403567252 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 2.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 3.0, 1.0 ], "wc_summary_paper_avg": [ 120.5, 69.01267999433148 ], "wc_strength_and_weaknesses_avg": [ 256.0, 96.02864156073437 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 131.0, 171.89677134838803 ], "wc_summary_review_avg": [ 63.75, 18.005207580030838 ], "wc_review_avg": [ 571.25, 251.69165162952862 ], "wc_reply_reviewers_avg": [ 17.5, 30.31088913245535 ], "wc_reply_authors_avg": [ 971.5, 616.1909200888958 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.7504787743864564, "corr_recommendation_correctness": 0.9284766908852594, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:A0AFg8bLv0AJ:scholar.google.com/&scioq=Neural+Decoding+of+Visual+Imagery+via+Hierarchical+Variational+Autoencoders&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of California, Los Angeles;University of Southern California", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucla.edu;https://www.usc.edu", "aff_unique_abbr": "UCLA;USC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "TMYzh1hsHd", "title": "MA2QL: A Minimalist Approach to Fully Decentralized Multi-Agent Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "A new algorithm for multi-agent reinforcement learning", "abstract": "Decentralized learning has shown great promise for cooperative multi-agent reinforcement learning (MARL). However, non-stationarity remains a significant challenge in fully decentralized learning. In the paper, we tackle the non-stationarity problem in the simplest and fundamental way and propose multi-agent alternate Q-learning (MA2QL), where agents take turns to update their Q-functions by Q-learning. MA2QL is a minimalist approach to fully decentralized cooperative MARL but is theoretically grounded. We prove that when each agent guarantees $\\varepsilon$-convergence at each turn, their joint policy converges to a Nash equilibrium. In practice, MA2QL only requires minimal changes to independent Q-learning (IQL). We empirically evaluate MA2QL on a variety of cooperative multi-agent tasks. Results show MA2QL consistently outperforms IQL, which verifies the effectiveness of MA2QL, despite such minimal changes.", "keywords": "multi-agent reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Kefan Su;Siyuan Zhou;Chuang Gan;Xiangjun Wang;Zongqing Lu", "authorids": "~Kefan_Su1;~Siyuan_Zhou2;~Chuang_Gan1;~Xiangjun_Wang1;~Zongqing_Lu2", "gender": ";;M;;", "homepage": ";https://scholar.google.com/citations?user=WjUmtm0AAAAJ&hl=zh-CN;http://people.csail.mit.edu/ganchuang/;;", "dblp": ";;139/6993;;", "google_scholar": ";WjUmtm0AAAAJ;PTeSCbIAAAAJ;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Kefan_Su1;~Siyuan_Zhou2;~Chuang_Gan1;~Xiangjun_Wang1;~Zongqing_Lu2", "aff": ";Hong Kong University of Science and Technology;MIT-IBM Watson AI Lab;;", "aff_domain": ";hkust.edu;ibm.com;;", "position": ";PhD student;PhD student;;", "bibtex": "@misc{\nsu2023maql,\ntitle={{MA}2{QL}: A Minimalist Approach to Fully Decentralized Multi-Agent Reinforcement Learning},\nauthor={Kefan Su and Siyuan Zhou and Chuang Gan and Xiangjun Wang and Zongqing Lu},\nyear={2023},\nurl={https://openreview.net/forum?id=TMYzh1hsHd}\n}", "github": "", "project": "", "reviewers": "xmYF;p9BU;J8oo;SmxM", "site": "https://openreview.net/forum?id=TMYzh1hsHd", "pdf_size": 2230811, "recommendation": "3;3;3;6", "confidence": "4;4;4;4", "correctness": "3;3;3;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "46;83;78;65", "wc_strength_and_weaknesses": "293;113;694;191", "wc_clarity_quality_novelty_and_reproducibility": "82;63;47;616", "wc_summary_review": "68;348;54;20", "wc_review": "489;607;873;892", "wc_reply_reviewers": "0;0;24;95", "wc_reply_authors": "312;415;706;650", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;2", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 68.0, 14.300349646075091 ], "wc_strength_and_weaknesses_avg": [ 322.75, 223.64299117119677 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 202.0, 239.34389484588905 ], "wc_summary_review_avg": [ 122.5, 131.35733706192434 ], "wc_review_avg": [ 715.25, 172.50561585061513 ], "wc_reply_reviewers_avg": [ 29.75, 38.925409439079765 ], "wc_reply_authors_avg": [ 520.75, 162.62130088029673 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1081376719699700253&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Hong Kong University of Science and Technology;Massachusetts Institute of Technology", "aff_unique_dep": ";IBM Watson AI Lab", "aff_unique_url": "https://www.ust.hk;https://www.mitibmwatsonailab.org", "aff_unique_abbr": "HKUST;MIT-IBM AI Lab", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "id": "TMnxVoWdX_M", "title": "A Closer Look at Dual Batch Normalization and Two-domain Hypothesis In Adversarial Training With Hybrid Samples", "track": "main", "status": "Reject", "tldr": "", "abstract": "There is a growing concern about applying batch normalization (BN) in adversarial training (AT), especially when the model is trained on both \\textit{adversarial} samples and \\textit{clean} samples (termed Hybrid-AT). With the assumption that \\textit{adversarial} and \\textit{clean} samples are from two different domains, a common practice in prior works is to adopt dual BN, where BN$_{adv}$ and BN$_{clean}$ are used for adversarial and clean branches, respectively. A popular belief for motivating dual BN is that estimating normalization statistics of this mixture distribution is challenging and thus disentangling it for normalization achieves stronger robustness. In contrast to this belief, we reveal that what makes dual BN effective mainly lies in its two sets of affine parameters. Moreover, we demonstrate that the domain gap between adversarial and clean samples is actually not very large, which is counter-intuitive considering the significant influence of adversarial perturbation on the model. Overall, our work sheds new light on understanding the mechanism of dual BN in Hybrid-AT as well as its underlying two-domain hypothesis. ", "keywords": "Adversarial training;batch normalization", "primary_area": "", "supplementary_material": "", "author": "Chaoning Zhang;Kang Zhang;Chenshuang Zhang;Axi Niu;Chang D. Yoo;In So Kweon", "authorids": "~Chaoning_Zhang1;~Kang_Zhang6;~Chenshuang_Zhang2;~Axi_Niu1;~Chang_D._Yoo1;~In_So_Kweon2", "gender": "M;M;F;F;M;M", "homepage": ";;https://chenshuang-zhang.github.io/;;https://sanctusfactory.com/family.php;https://ee.kaist.ac.kr/en/professor-s2/2/", "dblp": ";29/177-8;165/5102.html;283/5444;31/7819;74/4917.html", "google_scholar": "https://scholar.google.co.kr/citations?user=lvhxhyQAAAAJ;nj19btQAAAAJ;HbqjLHYAAAAJ;5apnc_UAAAAJ;gFWgUQEAAAAJ;XA8EOlEAAAAJ", "orcid": ";0000-0003-2761-9383;;0000-0001-5238-9917;0000-0002-0756-7179;", "linkedin": ";;;;;", "or_profile": "~Chaoning_Zhang1;~Kang_Zhang6;~Chenshuang_Zhang2;~Axi_Niu1;~Chang_D._Yoo1;~In-So_Kweon1", "aff": "Kyung Hee Universityniversity;Korea Advanced Institute of Science & Technology;Kyung Hee University;KAIST;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "khu.ac.kr;kaist.ac.kr;khu.ac.kr;ee.kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "Assistant Professor;PhD student;Researcher;Intern;Full Professor;Emeritus", "bibtex": "@misc{\nzhang2023a,\ntitle={A Closer Look at Dual Batch Normalization and Two-domain Hypothesis In Adversarial Training With Hybrid Samples},\nauthor={Chaoning Zhang and Kang Zhang and Chenshuang Zhang and Axi Niu and Chang D. Yoo and In So Kweon},\nyear={2023},\nurl={https://openreview.net/forum?id=TMnxVoWdX_M}\n}", "github": "", "project": "", "reviewers": "8GPb;uM2g;UFTM;dNrE", "site": "https://openreview.net/forum?id=TMnxVoWdX_M", "pdf_size": 658752, "recommendation": "5;5;5;6", "confidence": "4;4;3;3", "correctness": "1;2;3;4", "technical_novelty": "2;2;2;3", "empirical_novelty": "3;2;3;0", "wc_summary_paper": "74;110;84;31", "wc_strength_and_weaknesses": "129;644;194;141", "wc_clarity_quality_novelty_and_reproducibility": "23;5;5;3", "wc_summary_review": "39;30;59;3", "wc_review": "265;789;342;178", "wc_reply_reviewers": "77;0;0;0", "wc_reply_authors": "1235;1291;827;193", "reply_reviewers": "1;0;0;0", "reply_authors": "3;2;1;1", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 2.5, 1.118033988749895 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 74.75, 28.472574523565655 ], "wc_strength_and_weaknesses_avg": [ 277.0, 213.29439748854165 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 9.0, 8.12403840463596 ], "wc_summary_review_avg": [ 32.75, 20.129269733400662 ], "wc_review_avg": [ 393.5, 235.5976443006169 ], "wc_reply_reviewers_avg": [ 19.25, 33.34197804570089 ], "wc_reply_authors_avg": [ 886.5, 438.6214199055947 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.7745966692414834, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1032484133433067670&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;0;1;1;1", "aff_unique_norm": "Kyung Hee University;Korea Advanced Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.khu.ac.kr;https://www.kaist.ac.kr", "aff_unique_abbr": "KHU;KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Don\u2019t fear the unlabelled: safe semi-supervised learning via debiasing", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/10704", "id": "TN9gQ4x0Ep3", "poster": "/media/PosterPDFs/ICLR%202023/10704.png?t=1682501377.4504871", "openreview": "https://openreview.net/forum?id=TN9gQ4x0Ep3", "slides": "https://iclr.cc/virtual/2023/poster/10704", "video": "https://iclr.cc/virtual/2023/poster/10704", "author_site": "Hugo Schmutz, Olivier HUMBERT, Pierre-Alexandre Mattei", "tldr": "We propose a slight modification of most common semi-supervised learning methods to make them safe by debiasing their risk estimate. In particular, we apply it successfully to Fixmatch.", "abstract": "Semi-supervised learning (SSL) provides an effective means of leveraging unlabelled data to improve a model\u2019s performance. Even though the domain has received a considerable amount of attention in the past years, most methods present the common drawback of lacking theoretical guarantees. Our starting point is to notice that the estimate of the risk that most discriminative SSL methods minimise is biased, even asymptotically. This bias impedes the use of standard statistical learning theory and can hurt empirical performance. We propose a simple way of removing the bias. Our debiasing approach is straightforward to implement and applicable to most deep SSL methods. We provide simple theoretical guarantees on the trustworthiness of these modified methods, without having to rely on the strong assumptions on the data distribution that SSL theory usually requires. In particular, we provide generalisation error bounds for the proposed methods. We evaluate debiased versions of different existing SSL methods, such as the Pseudo-label method and Fixmatch, and show that debiasing can compete with classic deep SSL techniques in various settings by providing better calibrated models. Additionally, we provide a theoretical explanation of the intuition of the popular SSL methods. An implementation of a debiased version of Fixmatch is available at\nhttps://github.com/HugoSchmutz/DeFixmatch", "keywords": "Semi-supervised learning;deep learning;empirical risk minimisation;control variate;variance reduction;asymptotic statistics", "primary_area": "", "supplementary_material": "/attachment/326d1079ecf48e2fcbb2b70087abab31fbd6ae1d.zip", "author": "Hugo Schmutz;Olivier HUMBERT;Pierre-Alexandre Mattei", "authorids": "~Hugo_Schmutz1;~Olivier_HUMBERT1;~Pierre-Alexandre_Mattei3", "gender": "M;M;M", "homepage": "https://team.inria.fr/maasai/hugo-schmutz/;;http://pamattei.github.io", "dblp": ";;177/7275", "google_scholar": ";WlF6vtsAAAAJ;https://scholar.google.fr/citations?user=Tqa_-D0AAAAJ", "orcid": ";;", "linkedin": "hugo-schmutz-11bb02145/;;", "or_profile": "~Hugo_Schmutz1;~Olivier_HUMBERT1;~Pierre-Alexandre_Mattei3", "aff": "INRIA;Universit\u00e9 C\u00f4te d'Azur;INRIA", "aff_domain": "inria.fr;unice.fr;inria.fr", "position": "PhD student;Assistant Professor;Research scientist", "bibtex": "@inproceedings{\nschmutz2023dont,\ntitle={Don{\\textquoteright}t fear the unlabelled: safe semi-supervised learning via debiasing},\nauthor={Hugo Schmutz and Olivier HUMBERT and Pierre-Alexandre Mattei},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=TN9gQ4x0Ep3}\n}", "github": "", "project": "", "reviewers": "DVba;6iPj;1cgk;NKdq", "pdf_size": 1209116, "recommendation": "6;6;8;8", "confidence": "3;5;4;3", "correctness": "4;2;3;4", "technical_novelty": "3;2;3;4", "empirical_novelty": "3;0;3;2", "wc_summary_paper": "91;40;262;79", "wc_strength_and_weaknesses": "155;155;219;152", "wc_clarity_quality_novelty_and_reproducibility": "8;17;119;53", "wc_summary_review": "62;43;59;27", "wc_review": "316;255;659;311", "wc_reply_reviewers": "96;0;0;0", "wc_reply_authors": "773;535;116;387", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 3.0, 0.7071067811865476 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 118.0, 85.24963343029692 ], "wc_strength_and_weaknesses_avg": [ 170.25, 28.172459956489423 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 49.25, 43.64845358085439 ], "wc_summary_review_avg": [ 47.75, 13.988834833537782 ], "wc_review_avg": [ 385.25, 159.85364399975373 ], "wc_reply_reviewers_avg": [ 24.0, 41.569219381653056 ], "wc_reply_authors_avg": [ 452.75, 238.24816368652247 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.30151134457776363, "corr_recommendation_correctness": 0.30151134457776363, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=280975411140933342&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "pdf": "https://openreview.net/pdf?id=TN9gQ4x0Ep3", "email": "inria.fr;unice.fr;inria.fr", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "INRIA;Universit\u00e9 C\u00f4te d'Azur", "aff_unique_dep": ";", "aff_unique_url": "https://www.inria.fr;https://www.univ-cotedazur.fr", "aff_unique_abbr": "INRIA;UCA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Behind the Scenes of Gradient Descent: A Trajectory Analysis via Basis Function Decomposition", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11926", "id": "TPiwkItUSu", "poster": "/media/PosterPDFs/ICLR%202023/11926.png?t=1681332120.7663379", "openreview": "https://openreview.net/forum?id=TPiwkItUSu", "slides": "https://iclr.cc/virtual/2023/poster/11926", "video": "https://iclr.cc/virtual/2023/poster/11926", "author_site": "Jianhao Ma, Lingjun Guo, Salar Fattahi", "tldr": "", "abstract": "This work analyzes the solution trajectory of gradient-based algorithms via a novel basis function decomposition. We show that, although solution trajectories of gradient-based algorithms may vary depending on the learning task, they behave almost monotonically when projected onto an appropriate orthonormal function basis. Such projection gives rise to a basis function decomposition of the solution trajectory. Theoretically, we use our proposed basis function decomposition to establish the convergence of gradient descent (GD) on several representative learning tasks. In particular, we improve the convergence of GD on symmetric matrix factorization and provide a completely new convergence result for the orthogonal symmetric tensor decomposition. Empirically, we illustrate the promise of our proposed framework on realistic deep neural networks (DNNs) across different architectures, gradient-based solvers, and datasets. Our key finding is that gradient-based algorithms monotonically learn the coefficients of a particular orthonormal function basis of DNNs defined as the eigenvectors of the conjugate kernel after training.", "keywords": "nonconvex optimization;trajectory analysis;neural network optimization", "primary_area": "", "supplementary_material": "/attachment/5a76b938d7ab9697c89b5579532fee57f762c8da.zip", "author": "Jianhao Ma;Lingjun Guo;Salar Fattahi", "authorids": "~Jianhao_Ma1;~Lingjun_Guo1;~Salar_Fattahi2", "gender": "M;M;M", "homepage": "https://jianhaoma.github.io/;https://github.com/LingjunGuo2333;http://fattahi.engin.umich.edu/", "dblp": ";;175/9308", "google_scholar": "https://scholar.google.com/citations?hl=en;;nca_I7gAAAAJ", "orcid": ";;", "linkedin": "jianhao-ma/;;", "or_profile": "~Jianhao_Ma1;~Lingjun_Guo1;~Salar_Fattahi2", "aff": "University of Michigan;;University of Michigan", "aff_domain": "umich.edu;;umich.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nma2023behind,\ntitle={Behind the Scenes of Gradient Descent: A Trajectory Analysis via Basis Function Decomposition},\nauthor={Jianhao Ma and Lingjun Guo and Salar Fattahi},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=TPiwkItUSu}\n}", "github": "", "project": "", "reviewers": "CW4U;iyzY;5bKV;yaF6", "pdf_size": 1274262, "recommendation": "6;6;6;6", "confidence": "3;2;3;2", "correctness": "4;3;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "2;2;4;3", "wc_summary_paper": "231;111;232;53", "wc_strength_and_weaknesses": "160;214;45;152", "wc_clarity_quality_novelty_and_reproducibility": "13;33;27;33", "wc_summary_review": "30;31;32;6", "wc_review": "434;389;336;244", "wc_reply_reviewers": "91;0;0;105", "wc_reply_authors": "972;1123;654;1063", "reply_reviewers": "1;0;0;1", "reply_authors": "3;2;1;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.75, 0.82915619758885 ], "wc_summary_paper_avg": [ 156.75, 77.51249899209805 ], "wc_strength_and_weaknesses_avg": [ 142.75, 61.267344483011506 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 26.5, 8.170067319184096 ], "wc_summary_review_avg": [ 24.75, 10.848386976873567 ], "wc_review_avg": [ 350.75, 70.72260953895861 ], "wc_reply_reviewers_avg": [ 49.0, 49.24936547814601 ], "wc_reply_authors_avg": [ 953.0, 180.80514373214055 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4507846866426936911&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "pdf": "https://openreview.net/pdf?id=TPiwkItUSu", "email": "umich.edu;;umich.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "On amortizing convex conjugates for optimal transport", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/12232", "id": "TQ5WUwS_4ai", "poster": "", "openreview": "https://openreview.net/forum?id=TQ5WUwS_4ai", "slides": "https://iclr.cc/virtual/2023/poster/12232", "video": "https://iclr.cc/virtual/2023/poster/12232", "tldr": "State-of-the art continuous Wasserstein-2 potential learning, and along the way I improved Jax's L-BFGS implementation to run in 3% of the time for solving batches of optimization problems", "abstract": "This paper focuses on computing the convex conjugate operation that arises when solving Euclidean Wasserstein-2 optimal transport problems. This conjugation, which is also referred to as the Legendre-Fenchel conjugate or c-transform,is considered difficult to compute and in practice,Wasserstein-2 methods are limited by not being able to exactly conjugate the dual potentials in continuous space. To overcome this, the computation of the conjugate can be approximated with amortized optimization, which learns a model to predict the conjugate. I show that combining amortized approximations to the conjugate with a solver for fine-tuning significantly improves the quality of transport maps learned for the Wasserstein-2 benchmark by Korotin et al. (2021a) and is able to model many 2-dimensional couplings and flows considered in the literature. All of the baselines, methods, and solvers in this paper are available at http://github.com/facebookresearch/w2ot.", "keywords": "optimal transport;wasserstein-2;convex conjugate;c-transform;amortized optimization", "primary_area": "", "supplementary_material": "/attachment/90d9bf3d241e0ce5bb7924d13ca1dba0b57ffacd.zip", "author": "Brandon Amos", "authorids": "~Brandon_Amos1", "gender": "", "homepage": "http://bamos.github.io", "dblp": "133/4801.html", "google_scholar": "d8gdZR4AAAAJ", "orcid": "", "linkedin": "bdamos", "or_profile": "~Brandon_Amos1", "aff": "Meta", "aff_domain": "meta.com", "position": "Research Scientist", "bibtex": "@inproceedings{\namos2023on,\ntitle={On amortizing convex conjugates for optimal transport},\nauthor={Brandon Amos},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=TQ5WUwS_4ai}\n}", "github": "", "project": "", "reviewers": "G2nL;dkEA;eQGj;7Bdg", "pdf_size": 3324783, "recommendation": "6;6;6;8", "confidence": "5;2;2;3", "correctness": "4;3;4;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "4;3;3;3", "wc_summary_paper": "73;69;92;122", "wc_strength_and_weaknesses": "309;223;97;831", "wc_clarity_quality_novelty_and_reproducibility": "34;93;128;118", "wc_summary_review": "34;71;37;121", "wc_review": "450;456;354;1192", "wc_reply_reviewers": "275;0;50;164", "wc_reply_authors": "1408;679;705;1291", "reply_reviewers": "1;0;1;1", "reply_authors": "3;1;1;3", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "correctness_avg": [ 3.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 89.0, 20.940391591371924 ], "wc_strength_and_weaknesses_avg": [ 365.0, 279.4100928742553 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 93.25, 36.50599265874029 ], "wc_summary_review_avg": [ 65.75, 35.0526389876711 ], "wc_review_avg": [ 613.0, 336.7268923029463 ], "wc_reply_reviewers_avg": [ 122.25, 106.34936530135006 ], "wc_reply_authors_avg": [ 1020.75, 331.4697384377645 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 1, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.3333333333333333, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10982459574561476749&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "pdf": "https://openreview.net/pdf?id=TQ5WUwS_4ai", "email": "meta.com", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "TQZkycVeMIy", "title": "Test-time Adaptation for Segmentation via Image Synthesis", "track": "main", "status": "Reject", "tldr": "We propose a test-time adaptation framework that optimizes image synthesis loss to improve image segmentation.", "abstract": "We consider the problem of segmenting scenes into constituent objects and their parts. Current supervised visual detectors, though impressive within their training distribution, often fail to segment out-of-distribution scenes into their constituent entities. Recent test-time adaptation methods use auxiliary self-supervised losses to adapt the network parameters to each test example independently and have shown promising results towards generalization outside the training distribution for the task of image classification. In our work, we find evidence that these losses can be insufficient for instance segmentation tasks, without also considering architectural inductive biases. For image segmentation, recent slot-centric generative models break such dependence on supervision by attempting to segment scenes into entities in a self-supervised manner by reconstructing pixels. Drawing upon these two lines of work, we propose Generating Fast and Slow Networks (GFS-Nets), a semi-supervised instance segmentation model equipped with a slot-centric image or point-cloud rendering component that is adapted per scene at test time through gradient descent on reconstruction or novel view synthesis objectives. We show that test-time adaptation greatly improves segmentation in out-of-distribution scenes. We evaluate GFS-Nets in several 3D and 2D scene segmentation benchmarks and show substantial out-of-distribution performance improvements against state-of-the-art supervised feed forward detectors and self-supervised domain adaptation models.", "keywords": "object-centric learning;test-time adaptation;unsupervised domain adaptation;test-time training;entity-centric models", "primary_area": "", "supplementary_material": "/attachment/253ea602474a81ac28a45adf8f86415f37a3a904.zip", "author": "Mihir Prabhudesai;Anirudh Goyal;Sujoy Paul;Mehdi S. M. Sajjadi;Sjoerd van Steenkiste;Gaurav Aggarwal;Thomas Kipf;Deepak Pathak;Katerina Fragkiadaki", "authorids": "~Mihir_Prabhudesai1;~Anirudh_Goyal1;~Sujoy_Paul1;~Mehdi_S._M._Sajjadi1;~Sjoerd_van_Steenkiste1;~Gaurav_Aggarwal4;~Thomas_Kipf2;~Deepak_Pathak1;~Katerina_Fragkiadaki1", "gender": "M;M;M;Unspecified;M;;M;F;M", "homepage": "https://mihirp1998.github.io/;https://anirudh9119.github.io/;https://intra.ece.ucr.edu/~supaul/;http://msajjadi.com;http://www.sjoerdvansteenkiste.com/;;https://www.cs.cmu.edu/~dpathak/;https://www.cs.cmu.edu/~katef/;http://tkipf.github.io/", "dblp": "249/9214;172/1039;138/6200;164/6190;183/9326;14/5218;155/9860;21/8780;186/8206", "google_scholar": ";krrh6OUAAAAJ;Iq8BQUYAAAAJ;https://scholar.google.de/citations?user=rHF25YEAAAAJ;i-AStBYAAAAJ;https://scholar.google.co.in/citations?user=9XiIwDQAAAAJ;https://scholar.google.cl/citations?user=AEsPCAUAAAAJ;FWp7728AAAAJ;83HL5FwAAAAJ", "orcid": ";;;0000-0002-6002-2370;;;;;", "linkedin": ";;;;;;pathak22/;;thomas-kipf-6b260410a", "or_profile": "~Mihir_Prabhudesai1;~Anirudh_Goyal1;~Sujoy_Paul1;~Mehdi_S._M._Sajjadi1;~Sjoerd_van_Steenkiste1;~Gaurav_Aggarwal4;~Deepak_Pathak1;~Katerina_Fragkiadaki1;~Thomas_N._Kipf1", "aff": "School of Computer Science, Carnegie Mellon University;Google DeepMind;Google;Google DeepMind;Google;Google;Carnegie Mellon University;Carnegie Mellon University;Google", "aff_domain": "cs.cmu.edu;google.com;google.com;google.com;google.com;google.com;cmu.edu;cmu.edu;google.com", "position": "PhD student;Researcher;Researcher;Researcher;Researcher;Researcher;Assistant Professor;Assistant Professor;Research Scientist", "bibtex": "@misc{\nprabhudesai2023testtime,\ntitle={Test-time Adaptation for Segmentation via Image Synthesis},\nauthor={Mihir Prabhudesai and Anirudh Goyal and Sujoy Paul and Mehdi S. M. Sajjadi and Sjoerd van Steenkiste and Gaurav Aggarwal and Thomas Kipf and Deepak Pathak and Katerina Fragkiadaki},\nyear={2023},\nurl={https://openreview.net/forum?id=TQZkycVeMIy}\n}", "github": "", "project": "", "reviewers": "f456;Ad2e;VgmG;MJUr;Hiry", "site": "https://openreview.net/forum?id=TQZkycVeMIy", "pdf_size": 18097583, "recommendation": "3;3;5;6;6", "confidence": "4;4;3;2;1", "correctness": "3;3;4;3;3", "technical_novelty": "1;2;2;3;3", "empirical_novelty": "1;2;3;3;3", "wc_summary_paper": "66;84;68;43;1", "wc_strength_and_weaknesses": "273;222;249;112;1", "wc_clarity_quality_novelty_and_reproducibility": "32;30;34;30;1", "wc_summary_review": "25;30;85;37;1", "wc_review": "396;366;436;222;4", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "1328;719;1008;279;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "4;2;4;2;0", "recommendation_avg": [ 4.6, 1.3564659966250536 ], "confidence_avg": [ 2.8, 1.16619037896906 ], "correctness_avg": [ 3.2, 0.39999999999999997 ], "technical_novelty_avg": [ 2.2, 0.7483314773547882 ], "empirical_novelty_avg": [ 2.4, 0.8 ], "wc_summary_paper_avg": [ 52.4, 28.83470131629596 ], "wc_strength_and_weaknesses_avg": [ 171.4, 101.45856297030822 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 25.4, 12.289833196589774 ], "wc_summary_review_avg": [ 35.6, 27.507089995126712 ], "wc_review_avg": [ 284.8, 157.86880629180675 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 666.8, 479.86223022863555 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.4, 1.4966629547095767 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.9355852142347214, "corr_recommendation_correctness": 0.14744195615489716, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8bRjvU9rlRMJ:scholar.google.com/&scioq=Test-time+Adaptation+for+Segmentation+via+Image+Synthesis&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;1;1;0;0;1", "aff_unique_norm": "Carnegie Mellon University;Google", "aff_unique_dep": "School of Computer Science;Google DeepMind", "aff_unique_url": "https://www.cmu.edu;https://deepmind.com", "aff_unique_abbr": "CMU;DeepMind", "aff_campus_unique_index": "0;2;2;2;2", "aff_campus_unique": "Pittsburgh;;Mountain View", "aff_country_unique_index": "0;1;0;1;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "TS_VsCpuWr", "title": "Spectral Subgraph Localization", "track": "main", "status": "Reject", "tldr": "We localize a subgraph Q in a graph G by manipulating their Laplacian spectra.", "abstract": "Several graph mining problems are based on some variant of the subgraph isomorphism problem: Given two graphs, G and Q, does G contain a subgraph isomorphic to Q? As this problem is NP-hard, many methods avoid addressing it explicitly. In this paper, we propose a method that solves the problem by localizing, i.e., finding the position of, Q in G, by means of an alignment among graph spectra. Finding a node correspondence from Q to G thereafter is relegated to a separate task, as an instance of the graph alignment problem. We demonstrate that our spectral approach outperforms a baseline based on the state-of-the-art method for graph alignment in terms of accuracy on real graphs and scales to hundreds of nodes as no other method does.", "keywords": "subgraph isomorphism;subgraph localization", "primary_area": "", "supplementary_material": "", "author": "Judith Hermanns;Amit Boyarski;Petros Petsinis;Alex M. Bronstein;Davide Mottin;Panagiotis Karras", "authorids": "~Judith_Hermanns1;~Amit_Boyarski1;petsinis@cs.au.dk;~Alex_M._Bronstein1;~Davide_Mottin1;~Panagiotis_Karras1", "gender": ";M;;M;M;M", "homepage": ";https://vista.cs.technion.ac.il;;https://bron.cs.technion.ac.il;https://mott.in;http://cs.au.dk/~karras/", "dblp": "169/1811.html;;;;135/7623;08/5342", "google_scholar": ";;;https://scholar.google.co.il/citations?user=lafKN0sAAAAJ;https://scholar.google.it/citations?user=evZ9Q9EAAAAJ;https://scholar.google.com.tw/citations?user=B6C4aBoAAAAJ", "orcid": ";;;;0000-0001-8256-2258;", "linkedin": ";;;;davide-mottin-67ab7323/;", "or_profile": "~Judith_Hermanns1;~Amit_Boyarski1;petsinis@cs.au.dk;~Alex_M._Bronstein1;~Davide_Mottin1;~Panagiotis_Karras1", "aff": ";Technion, Technion;;Computer Science Department, Technion - Israel Institute of Technology;Aarhus University;Nagoya University", "aff_domain": ";technion.ac.il;;cs.technion.ac.il;au.dk;nagoya-u.ac.jp", "position": ";PhD student;;Full Professor;Assistant Professor;Associate Professor", "bibtex": "@misc{\nhermanns2023spectral,\ntitle={Spectral Subgraph Localization},\nauthor={Judith Hermanns and Amit Boyarski and Petros Petsinis and Alex M. Bronstein and Davide Mottin and Panagiotis Karras},\nyear={2023},\nurl={https://openreview.net/forum?id=TS_VsCpuWr}\n}", "github": "", "project": "", "reviewers": "aKY2;oSHA;rJzq", "site": "https://openreview.net/forum?id=TS_VsCpuWr", "pdf_size": 648541, "recommendation": "3;3;8", "confidence": "3;4;4", "correctness": "4;3;4", "technical_novelty": "2;2;3", "empirical_novelty": "4;2;3", "wc_summary_paper": "218;114;89", "wc_strength_and_weaknesses": "145;608;45", "wc_clarity_quality_novelty_and_reproducibility": "51;119;18", "wc_summary_review": "206;229;74", "wc_review": "620;1070;226", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "84;611;42", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "recommendation_avg": [ 4.666666666666667, 2.357022603955158 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "correctness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 3.0, 0.816496580927726 ], "wc_summary_paper_avg": [ 140.33333333333334, 55.8589493476401 ], "wc_strength_and_weaknesses_avg": [ 266.0, 245.25225109398417 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 62.666666666666664, 42.05023450852827 ], "wc_summary_review_avg": [ 169.66666666666666, 68.29511126151141 ], "wc_review_avg": [ 638.6666666666666, 344.8142817356742 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 245.66666666666666, 258.89809234952315 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": 0.5000000000000001, "corr_recommendation_correctness": 0.5000000000000001, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:T7tjKCrBnV8J:scholar.google.com/&scioq=Spectral+Subgraph+Localization&hl=en&as_sdt=0,5", "gs_version_total": 8, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Technion - Israel Institute of Technology;Aarhus University;Nagoya University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.technion.ac.il/en/;https://au.dk;https://www.nagoya-u.ac.jp", "aff_unique_abbr": "Technion;AU;Nagoya U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "Israel;Denmark;Japan" }, { "id": "TSqKS0lQQA6", "title": "Prompt Tuning with Prompt-aligned Gradient for Vision-Language Models", "track": "main", "status": "Reject", "tldr": "We present Prompt-aligned Gradient to prevent prompt tuning from forgetting the general knowledge learned from CLIP.", "abstract": "Thanks to the large pre-trained vision-language models (VLMs) like CLIP, we can craft a zero-shot classifier by ``prompt'', e.g., using the model provided similarity measure between an image and the prompt sentence ``$\\texttt{a photo of a [CLASS]}$'', as the confidence score of predicting the image is ``$\\texttt{[CLASS]}$''. Therefore, prompt shows a great potential for fast adapting the VLMs to downstream tasks if we fine-tune the prompt-based similarity measure. However, we find a common failure that improper fine-tuning may not only undermine the prompt's inherent prediction for the task-related classes, but also for other classes in the VLM vocabulary. Existing methods still address this problem by using traditional anti-overfitting techniques such as early stopping and data augmentation, which lack a principled solution specific to prompt. We present Prompt-aligned Gradient, dubbed $\\texttt{ProGrad}$, to prevent prompt tuning from forgetting the the general knowledge learned from VLMs. In particular, $\\texttt{ProGrad}$ only updates the prompt whose gradient is aligned (or non-conflicting) to the ``general direction'', which is represented as the gradient of the KL loss of the pre-defined prompt prediction. Extensive experiments demonstrate the stronger few-shot generalization ability of $\\texttt{ProGrad}$ over state-of-the-art prompt tuning methods. Codes are in Appendix.", "keywords": "prompt tuning;vision-language models;CLIP", "primary_area": "", "supplementary_material": "/attachment/0a1444c4e505e18a6e0187486d7817a462273b8d.zip", "author": "Beier Zhu;Yulei Niu;Yucheng Han;Yue Wu;Hanwang Zhang", "authorids": "~Beier_Zhu1;~Yulei_Niu1;~Yucheng_Han1;~Yue_Wu18;~Hanwang_Zhang3", "gender": "M;M;M;M;M", "homepage": "https://beierzhu.github.io;https://yuleiniu.github.io;https://tingxueronghua.github.io/;https://mreallab.github.io/index.html;", "dblp": "243/7531;165/2982;226/9017;79/8116.html;", "google_scholar": "jHczmjwAAAAJ;WXd3dDwAAAAJ;LbwqJBQAAAAJ;YG0DFyYAAAAJ;srajsjoAAAAJ", "orcid": "0000-0002-7900-6979;;;;", "linkedin": ";;;;", "or_profile": "~Beier_Zhu1;~Yulei_Niu1;~Yucheng_Han1;~Hanwang_Zhang3;~Yue_Wu3", "aff": "Nanyang Technological University;Columbia University;Nanyang Technological University;Nanyang Technological University;Alibaba Group", "aff_domain": "ntu.edu.sg;columbia.edu;ntu.edu.sg;ntu.edu.sg;alibaba-inc.com", "position": "PhD student;Postdoc;PhD student;Associate Professor;Researcher", "bibtex": "@misc{\nzhu2023prompt,\ntitle={Prompt Tuning with Prompt-aligned Gradient for Vision-Language Models },\nauthor={Beier Zhu and Yulei Niu and Yucheng Han and Yue Wu and Hanwang Zhang},\nyear={2023},\nurl={https://openreview.net/forum?id=TSqKS0lQQA6}\n}", "github": "", "project": "", "reviewers": "7YcA;6JR9;Wdn9;VFpS;edmg", "site": "https://openreview.net/forum?id=TSqKS0lQQA6", "pdf_size": 4439301, "recommendation": "3;6;6;6;6", "confidence": "4;4;4;4;3", "correctness": "3;3;4;3;4", "technical_novelty": "2;3;2;2;3", "empirical_novelty": "2;3;3;2;2", "wc_summary_paper": "72;115;70;141;80", "wc_strength_and_weaknesses": "201;194;165;423;136", "wc_clarity_quality_novelty_and_reproducibility": "22;24;82;47;34", "wc_summary_review": "298;31;32;48;24", "wc_review": "593;364;349;659;274", "wc_reply_reviewers": "0;0;0;15;0", "wc_reply_authors": "996;383;339;1239;385", "reply_reviewers": "0;0;0;1;0", "reply_authors": "3;1;1;3;1", "recommendation_avg": [ 5.4, 1.2 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "correctness_avg": [ 3.4, 0.4898979485566356 ], "technical_novelty_avg": [ 2.4, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_paper_avg": [ 95.6, 27.90412155936825 ], "wc_strength_and_weaknesses_avg": [ 223.8, 102.22993690695499 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 41.8, 21.96724834839357 ], "wc_summary_review_avg": [ 86.6, 105.9916977880815 ], "wc_review_avg": [ 447.8, 150.11915267546644 ], "wc_reply_reviewers_avg": [ 3.0, 6.0 ], "wc_reply_authors_avg": [ 668.4, 375.0144530548123 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 1.8, 0.9797958971132713 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.25, "corr_recommendation_correctness": 0.4082482904638631, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-2HmIkJ7M1AJ:scholar.google.com/&scioq=Prompt+Tuning+with+Prompt-aligned+Gradient+for+Vision-Language+Models&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Nanyang Technological University;Columbia University;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.columbia.edu;https://www.alibaba.com", "aff_unique_abbr": "NTU;Columbia;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;2", "aff_country_unique": "Singapore;United States;China" }, { "id": "TSqRwmrRiOn", "title": "Bias Amplification Improves Worst-Group Accuracy without Group Information", "track": "main", "status": "Reject", "tldr": "We propose a novel two-stage training algorithm that achieves the state-of-the-art worst-group accuracy on test data without group information.", "abstract": "Neural networks produced by standard training are known to suffer from poor accuracy on rare subgroups despite achieving high accuracy on average, due to the correlations between certain spurious features and labels. Previous approaches based on worst-group loss minimization (\\textit{e.g.} Group-DRO) are effective in improving worse-group accuracy but require expensive group annotations for all the training samples. In this paper, we focus on the more challenging and realistic setting where group annotations are only available on a small validation set or are not available at all. We propose \\bam, a novel two-stage training algorithm: in the first stage, the model is trained using a \\emph{bias amplification} scheme via introducing a learnable \\emph{auxiliary variable} for each training sample together with the adoption of squared loss; in the second stage, we upweight the samples that the bias-amplified model misclassifies, and then continue training the same model on the reweighted dataset. Empirically, \\bam leads to consistent improvement over its counterparts in worst-group accuracy, resulting in state-of-the-art performance in spurious correlation benchmarks in computer vision and natural language processing. Moreover, we find a simple stopping criterion that completely removes the need for group annotations, with little or no loss in worst-group accuracy. ", "keywords": "spurious correlation;worst-group accuracy;group robustness", "primary_area": "", "supplementary_material": "", "author": "Gaotang Li;Jiarui Liu;Wei Hu", "authorids": "~Gaotang_Li1;~Jiarui_Liu1;~Wei_Hu1", "gender": "M;M;M", "homepage": "https://gaotangli.github.io/;https://jiarui-liu.github.io/;https://weihu.me", "dblp": "348/5271;134/1248-4;", "google_scholar": "0aVJRykAAAAJ;mSIhZTAAAAAJ;ZybgAqkAAAAJ", "orcid": "0009-0004-3294-1347;;", "linkedin": ";https://linkedin.com/in/jia-rui-liu;", "or_profile": "~Gaotang_Li1;~Jiarui_Liu1;~Wei_Hu1", "aff": "University of Michigan - Ann Arbor;Electrical Engineering and Computer Science, University of Michigan - Ann Arbor;Google", "aff_domain": "umich.edu;eecs.umich.edu;google.com", "position": "Undergrad student;Undergrad student;Visiting researcher", "bibtex": "@misc{\nli2023bias,\ntitle={Bias Amplification Improves Worst-Group Accuracy without Group Information},\nauthor={Gaotang Li and Jiarui Liu and Wei Hu},\nyear={2023},\nurl={https://openreview.net/forum?id=TSqRwmrRiOn}\n}", "github": "", "project": "", "reviewers": "bEzt;3fdp;CiNy;UsCc", "site": "https://openreview.net/forum?id=TSqRwmrRiOn", "pdf_size": 1164706, "recommendation": "5;5;5;6", "confidence": "4;4;4;4", "correctness": "3;3;3;3", "technical_novelty": "3;2;2;3", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "113;70;156;72", "wc_strength_and_weaknesses": "767;229;388;428", "wc_clarity_quality_novelty_and_reproducibility": "141;47;65;53", "wc_summary_review": "98;46;99;45", "wc_review": "1119;392;708;598", "wc_reply_reviewers": "200;0;0;0", "wc_reply_authors": "1292;824;446;871", "reply_reviewers": "1;0;0;0", "reply_authors": "4;2;1;3", "recommendation_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 102.75, 35.20919624189112 ], "wc_strength_and_weaknesses_avg": [ 453.0, 195.97321245517205 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 76.5, 37.79880950506246 ], "wc_summary_review_avg": [ 72.0, 26.504716561397142 ], "wc_review_avg": [ 704.25, 264.96261528751563 ], "wc_reply_reviewers_avg": [ 50.0, 86.60254037844386 ], "wc_reply_authors_avg": [ 858.25, 299.76021667326035 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16191032158658193568&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Michigan;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.umich.edu;https://www.google.com", "aff_unique_abbr": "UM;Google", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Ann Arbor;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "TT66Tpbus3b", "title": "DeepPipe: Deep, Modular and Extendable Representations of Machine Learning Pipelines", "track": "main", "status": "Reject", "tldr": "How to learn Machine Learning pipelines representations to improve their optimization", "abstract": "Finding accurate Machine Learning pipelines is essential in achieving state-of-the-art AI predictive performance. Unfortunately, most existing Pipeline Optimization techniques rely on flavors of Bayesian Optimization that do not explore the deep interaction between pipeline stages/components (e.g. between hyperparameters of the deployed preprocessing algorithm and the hyperparameters of a classifier). In this paper, we are the first to capture the deep interaction between components of a Machine Learning pipeline. We propose embedding pipelines in a deep latent representation through a novel per-component encoder mechanism. Such pipeline embeddings are used with deep kernel Gaussian Process surrogates inside a Bayesian Optimization setup. Through extensive experiments on large-scale meta-datasets, we demonstrate that learning pipeline embeddings with Deep Neural Networks significantly advances the state-of-the-art in Pipeline Optimization.", "keywords": "Pipeline optimization;meta-learning;bayesian optimization;representation learning", "primary_area": "", "supplementary_material": "/attachment/d8be489d510fa5af6b1e220bdc6168fed0e8a5e1.zip", "author": "Sebastian Pineda Arango;Josif Grabocka", "authorids": "~Sebastian_Pineda_Arango1;~Josif_Grabocka1", "gender": "M;M", "homepage": ";https://www.utn.de/departments/department-engineering/machine-learning-lab/", "dblp": "271/4257;117/4936", "google_scholar": "8UI_0B0AAAAJ;KRy27XcAAAAJ", "orcid": ";", "linkedin": "sebaspine/;", "or_profile": "~Sebastian_Pineda_Arango1;~Josif_Grabocka1", "aff": "Universit\u00e4t Freiburg;Universit\u00e4t Freiburg", "aff_domain": "uni-freiburg.de;uni-freiburg.de", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\narango2023deeppipe,\ntitle={DeepPipe: Deep, Modular and Extendable Representations of Machine Learning Pipelines},\nauthor={Sebastian Pineda Arango and Josif Grabocka},\nyear={2023},\nurl={https://openreview.net/forum?id=TT66Tpbus3b}\n}", "github": "", "project": "", "reviewers": "5R6t;KTJe;2otF", "site": "https://openreview.net/forum?id=TT66Tpbus3b", "pdf_size": 2861347, "recommendation": "6;6;6", "confidence": "4;5;4", "correctness": "3;2;4", "technical_novelty": "2;3;2", "empirical_novelty": "3;2;3", "wc_summary_paper": "105;112;81", "wc_strength_and_weaknesses": "79;131;422", "wc_clarity_quality_novelty_and_reproducibility": "249;66;70", "wc_summary_review": "66;57;45", "wc_review": "499;366;618", "wc_reply_reviewers": "59;284;158", "wc_reply_authors": "764;1108;925", "reply_reviewers": "1;2;1", "reply_authors": "1;3;2", "recommendation_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "correctness_avg": [ 3.0, 0.816496580927726 ], "technical_novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "empirical_novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_paper_avg": [ 99.33333333333333, 13.27487183449325 ], "wc_strength_and_weaknesses_avg": [ 210.66666666666666, 150.9356006896834 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 128.33333333333334, 85.33984350166625 ], "wc_summary_review_avg": [ 56.0, 8.602325267042627 ], "wc_review_avg": [ 494.3333333333333, 102.93147666719295 ], "wc_reply_reviewers_avg": [ 167.0, 92.07605551933683 ], "wc_reply_authors_avg": [ 932.3333333333334, 140.53311195428483 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_6mJmXlupz4J:scholar.google.com/&scioq=DeepPipe:+Deep,+Modular+and+Extendable+Representations+of+Machine+Learning+Pipelines&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Freiburg", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-freiburg.de", "aff_unique_abbr": "Uni Freiburg", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Sequential Attention for Feature Selection", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11379", "id": "TTLLGx3eet", "poster": "", "openreview": "https://openreview.net/forum?id=TTLLGx3eet", "slides": "https://iclr.cc/virtual/2023/poster/11379", "video": "https://iclr.cc/virtual/2023/poster/11379", "author_site": "Taisuke Yasuda, MohammadHossein Bateni, Lin Chen, Matthew Fahrbach, Gang Fu, Vahab Mirrokni", "tldr": "Sequential feature selection using the attention mechanism, with provable guarantees.", "abstract": "Feature selection is the problem of selecting a subset of features for a machine learning model that maximizes model quality subject to a budget constraint. For neural networks, prior methods, including those based on $\\ell_1$ regularization, attention, and other techniques, typically select the entire feature subset in one evaluation round, ignoring the residual value of features during selection, i.e., the marginal contribution of a feature given that other features have already been selected. We propose a feature selection algorithm called Sequential Attention that achieves state-of-the-art empirical results for neural networks. This algorithm is based on an efficient one-pass implementation of greedy forward selection and uses attention weights at each step as a proxy for feature importance. We give theoretical insights into our algorithm for linear regression by showing that an adaptation to this setting is equivalent to the classical Orthogonal Matching Pursuit (OMP) algorithm, and thus inherits all of its provable guarantees. Our theoretical and empirical analyses offer new explanations towards the effectiveness of attention and its connections to overparameterization, which may be of independent interest.", "keywords": "feature selection;attention", "primary_area": "", "supplementary_material": "", "author": "Taisuke Yasuda;Mohammadhossein Bateni;Lin Chen;Matthew Fahrbach;Gang Fu;Vahab Mirrokni", "authorids": "~Taisuke_Yasuda1;~Mohammadhossein_Bateni1;~Lin_Chen14;~Matthew_Fahrbach1;~Gang_Fu3;~Vahab_Mirrokni2", "gender": "M;;;;;M", "homepage": "https://taisukeyasuda.github.io/;http://mhbateni.com/academic;;;;https://people.csail.mit.edu/mirrokni/Welcome.html", "dblp": "177/9741-2;22/4739;;;;m/VahabSMirrokni", "google_scholar": "c62WqiEAAAAJ;n4eReqMAAAAJ;;;;opbZfw0AAAAJ", "orcid": ";;;;;", "linkedin": "taisukeyasuda/;;;;;", "or_profile": "~Taisuke_Yasuda1;~Mohammadhossein_Bateni1;~Lin_Chen14;~Matthew_Fahrbach1;~Gang_Fu3;~Vahab_Mirrokni2", "aff": "School of Computer Science, Carnegie Mellon University;Google;;;;Google Research", "aff_domain": "cs.cmu.edu;google.com;;;;google.com", "position": "PhD student;Research scientist;;;;VP, Google Fellow", "bibtex": "@inproceedings{\nyasuda2023sequential,\ntitle={Sequential Attention for Feature Selection},\nauthor={Taisuke Yasuda and Mohammadhossein Bateni and Lin Chen and Matthew Fahrbach and Gang Fu and Vahab Mirrokni},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=TTLLGx3eet}\n}", "github": "", "project": "", "reviewers": "vzVg;FwQ2;tvep;ABqs", "pdf_size": 782313, "recommendation": "5;6;6;8", "confidence": "4;3;3;3", "correctness": "3;3;3;4", "technical_novelty": "2;2;3;3", "empirical_novelty": "3;2;3;2", "wc_summary_paper": "64;95;93;74", "wc_strength_and_weaknesses": "49;201;138;123", "wc_clarity_quality_novelty_and_reproducibility": "337;37;183;63", "wc_summary_review": "53;28;40;16", "wc_review": "503;361;454;276", "wc_reply_reviewers": "0;0;243;0", "wc_reply_authors": "1354;769;2237;362", "reply_reviewers": "0;0;2;0", "reply_authors": "2;2;5;1", "recommendation_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 81.5, 13.009611831257688 ], "wc_strength_and_weaknesses_avg": [ 127.75, 54.07113370366854 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 155.0, 118.63389060466659 ], "wc_summary_review_avg": [ 34.25, 13.754544703478919 ], "wc_review_avg": [ 398.5, 87.19661690685024 ], "wc_reply_reviewers_avg": [ 60.75, 105.2220865598093 ], "wc_reply_authors_avg": [ 1180.5, 704.5510982178653 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_recommendation_confidence": -0.6622661785325219, "corr_recommendation_correctness": 0.9271726499455306, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=403744429575841055&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=TTLLGx3eet", "email": "cs.cmu.edu;google.com;;;;google.com", "author_num": 6, "aff_unique_index": "0;1;1", "aff_unique_norm": "Carnegie Mellon University;Google", "aff_unique_dep": "School of Computer Science;Google", "aff_unique_url": "https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "CMU;Google", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Pittsburgh;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "TTMyoOdB9hZ", "title": "Important Channel Tuning", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Large vision transformers (ViT) have tremendously succeeded in various computer vision tasks. These ViT models pre-trained on large datasets such as ImageNet21K and JFT-300M enjoy robustness in both low-level and high-level visual representations, and they repeatedly yield performance improvements on multiple downstream tasks. One straightforward way to inherit these robust representations is full fine-tuning. However, full fine-tuning is prone to overfitting the small downstream data by adjusting the massive weights of pre-trained large models. In addition, updating the whole parameters of pre-trained large models requires high GPU memory and computations, which limits the application of these large models. To address the above two drawbacks of full fine-tuning, in this paper, we propose a parameter-efficient tuning (PET) method dubbed Important Channel Tuning (ICT). Different from previous PET methods that adopt a trainable module to tune all the channels of a feature map, we hypothesize and corroborate experimentally that not all channels are equal for adaptation. Specifically, we design a tiny external module that determines the most informative channels in the feature map for effective adaptation. In particular, with only a simple linear layer applied to the important channels, our ICT surpasses full fine-tuning on 18 out of 19 datasets in VTAB-1K benchmark by adding only 0.11M parameters of the ViT-B, which is 0.13% of its full fine-tuning counterpart. Moreover, compared with the previous PET methods, ICT achieves the state-of-the-art average performance in the VTAB-1K benchmark with ViT and Swin Transformer backbones.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Hengyuan Zhao;Pichao WANG;Yuyang Zhao;Fan Wang;Mike Zheng Shou", "authorids": "~Hengyuan_Zhao2;~Pichao_WANG3;~Yuyang_Zhao1;~Fan_Wang6;~Mike_Zheng_Shou1", "gender": "M;M;M;F;", "homepage": "https://zhaohengyuan1.github.io;https://wangpichao.github.io/;http://yuyangzhao.com/;;http://www.columbia.edu/~zs2262/", "dblp": "260/3042;;;;284/0807", "google_scholar": "QLSk-6IAAAAJ;;u5M6XPAAAAAJ;WCRGTHsAAAAJ;h1-3lSoAAAAJ", "orcid": "0000-0001-8047-4465;;0000-0002-4754-0325;0000-0001-7320-1119;", "linkedin": ";;;;", "or_profile": "~Hengyuan_Zhao2;~Pichao_WANG3;~Yuyang_Zhao1;~Fan_Wang6;~Zheng_Shou1", "aff": "National University of Singapore;Amazon;National University of Singapore;Alibaba Group;National University of Singapore", "aff_domain": "u.nus.edu;amazon.com;nus.edu.sg;alibaba-inc.com;nus.edu.sg", "position": "PhD student;Researcher;PhD student;Senior Staff Algorithm Engineer;Assistant Professor", "bibtex": "@misc{\nzhao2023important,\ntitle={Important Channel Tuning},\nauthor={Hengyuan Zhao and Pichao WANG and Yuyang Zhao and Fan Wang and Mike Zheng Shou},\nyear={2023},\nurl={https://openreview.net/forum?id=TTMyoOdB9hZ}\n}", "github": "", "project": "", "reviewers": "b18c;GVsj;BdjV;6u8y", "site": "https://openreview.net/forum?id=TTMyoOdB9hZ", "pdf_size": 3001246, "recommendation": "3;5;6;6", "confidence": "4;4;4;4", "correctness": "2;3;4;4", "technical_novelty": "2;3;3;3", "empirical_novelty": "2;3;3;3", "wc_summary_paper": "34;77;78;70", "wc_strength_and_weaknesses": "383;228;201;182", "wc_clarity_quality_novelty_and_reproducibility": "32;15;56;18", "wc_summary_review": "61;8;31;11", "wc_review": "510;328;366;281", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "correctness_avg": [ 3.25, 0.82915619758885 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_paper_avg": [ 64.75, 18.019087102292392 ], "wc_strength_and_weaknesses_avg": [ 248.5, 79.35521406939812 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 30.25, 16.192204914711276 ], "wc_summary_review_avg": [ 27.75, 21.134982848348848 ], "wc_review_avg": [ 371.25, 85.57854579273943 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 0.9847319278346618, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PSwtZhqKni8J:scholar.google.com/&scioq=Important+Channel+Tuning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "National University of Singapore;Amazon;Alibaba Group", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.nus.edu.sg;https://www.amazon.com;https://www.alibaba.com", "aff_unique_abbr": "NUS;Amazon;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;0", "aff_country_unique": "Singapore;United States;China" }, { "id": "TTSyyMBNUjd", "title": "Are Neurons Actually Collapsed? On the Fine-Grained Structure in Neural Representations", "track": "main", "status": "Reject", "tldr": "We provide compelling empirical evidence proving that there exists fine-grained structures in the last-layer representations of a well trained neural network, as a complement to existing Neural Collapse hypothesis.", "abstract": "Recent work has observed an intriguing \"Neural Collapse\" phenomenon in well-trained neural networks, where the last-layer representations of training samples with the same label collapse into each other. This suggests that the last-layer representations are completely determined by the labels, and do not depend on the intrinsic structure of input distribution. We provide evidence that this is not a complete description, and that the apparent collapse hides important fine-grained structure in the representations. Specifically, even when representations apparently collapse, the small amount of remaining variation can still faithfully and accurately captures the intrinsic structure of input distribution. As an example, if we train on CIFAR-10 using only 5 coarse-grained labels (by combining two classes into one super-class) until convergence, we can reconstruct the original 10-class labels from the learned representations via unsupervised clustering. The reconstructed labels achieve $93\\%$ accuracy on the CIFAR-10 test set, nearly matching the normal CIFAR-10 accuracy for the same architecture. Our findings show concretely how the structure of input data can play a significant role in determining the fine-grained structure of neural representations, going beyond what Neural Collapse predicts.\n", "keywords": "Neural Collapse;Representation Learning;Neural Networks", "primary_area": "", "supplementary_material": "", "author": "Yongyi Yang;Jacob Steinhardt;Wei Hu", "authorids": "~Yongyi_Yang1;~Jacob_Steinhardt1;~Wei_Hu1", "gender": ";M;M", "homepage": ";https://weihu.me;https://fftyyy.github.io", "dblp": "35/10625;;05/3653", "google_scholar": ";ZybgAqkAAAAJ;EmL0jD0AAAAJ", "orcid": ";;", "linkedin": ";;yongyi-yang-528922218/?originalSubdomain=cn", "or_profile": "~Jacob_Steinhardt1;~Wei_Hu1;~Yang_Yongyi1", "aff": "University of California, Berkeley;Google;University of Michigan - Ann Arbor", "aff_domain": "berkeley.edu;google.com;umich.edu", "position": "Assistant Professor;Visiting researcher;PhD student", "bibtex": "@misc{\nyang2023are,\ntitle={Are Neurons Actually Collapsed? On the Fine-Grained Structure in Neural Representations},\nauthor={Yongyi Yang and Jacob Steinhardt and Wei Hu},\nyear={2023},\nurl={https://openreview.net/forum?id=TTSyyMBNUjd}\n}", "github": "", "project": "", "reviewers": "VqKm;fK87;MGwH;jgRV", "site": "https://openreview.net/forum?id=TTSyyMBNUjd", "pdf_size": 33506280, "recommendation": "3;3;5;5", "confidence": "5;4;4;4", "correctness": "3;3;3;4", "technical_novelty": "1;1;1;2", "empirical_novelty": "1;2;3;3", "wc_summary_paper": "70;102;91;126", "wc_strength_and_weaknesses": "472;367;187;340", "wc_clarity_quality_novelty_and_reproducibility": "6;9;24;108", "wc_summary_review": "14;85;50;100", "wc_review": "562;563;352;674", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "806;747;321;679", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "recommendation_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "correctness_avg": [ 3.25, 0.4330127018922193 ], "technical_novelty_avg": [ 1.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 97.25, 20.191272867256288 ], "wc_strength_and_weaknesses_avg": [ 341.5, 101.92276487615513 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 36.75, 41.69757187175291 ], "wc_summary_review_avg": [ 62.25, 33.24436042398771 ], "wc_review_avg": [ 537.75, 116.50402353567021 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 638.25, 188.59662642794012 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2299317134128156496&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Berkeley;Google;University of Michigan", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.berkeley.edu;https://www.google.com;https://www.umich.edu", "aff_unique_abbr": "UC Berkeley;Google;UM", "aff_campus_unique_index": "0;1;2", "aff_campus_unique": "Berkeley;Mountain View;Ann Arbor", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "TTcpISh-_oI", "title": "ResFed: Communication Efficient Federated Learning by Transmitting Deep Compressed Residuals", "track": "main", "status": "Withdraw", "tldr": "We introduce ResFed federated learning framework to achieve more efficient communication by leveraging deep compressed residuals rather than weights or gradients.", "abstract": "Federated learning enables cooperative training among massively distributed clients by sharing their learned local model parameters. However, with increasing model size, deploying federated learning requires a large communication bandwidth, which limits its deployment in wireless networks. To address this bottleneck, we introduce a residual-based federated learning framework (ResFed), where residuals rather than model parameters are transmitted in communication networks for training. In particular, we integrate two pairs of shared predictors for the model prediction in both server-to-client and client-to-server communication. By employing a common prediction rule, both locally and globally updated models are always fully recoverable in clients and the server. We highlight that the residuals only indicate the quasi-update of a model in a single inter-round, and hence contain more dense information and have a lower entropy than the model, comparing to model weights and gradients. Based on this property, we further conduct lossy compression of the residuals by sparsification and quantization and encode them for efficient communication. The experimental evaluation shows that our ResFed needs remarkably less communication costs and achieves better accuracy by leveraging less sensitive residuals, compared to standard federated learning. For instance, to train a 4.08 MB CNN model on CIFAR-10 with 10 clients under non-independent and identically distributed (Non-IID) setting, our approach achieves a compression ratio over 700X in each communication round with minimum impact on the accuracy. To reach an accuracy of 70%, it saves around 99% of the total communication volume from 587.61 Mb to 6.79 Mb in up-streaming and to 4.61 Mb in down-streaming on average for all clients.", "keywords": "Federated Learning;Communication Efficiency;Deep Compression", "primary_area": "", "supplementary_material": "/attachment/ec20e01f3a2e8fb687e4e63f7cae9b7aa4a7e1c6.zip", "author": "Rui Song;LIGUO ZHOU;Lingjuan Lyu;Andreas Festag;Alois Knoll", "authorids": "~Rui_Song6;~LIGUO_ZHOU1;~Lingjuan_Lyu1;andreas.festag@ivi.fraunhofer.de;~Alois_Knoll1", "gender": ";;F;;M", "homepage": ";;https://sites.google.com/view/lingjuan-lyu;;https://www.in.tum.de/i06/people/prof-dr-ing-habil-alois-knoll/", "dblp": ";;178/9876;;k/AloisKnoll", "google_scholar": ";;;;https://scholar.google.de/citations?user=-CA8QgwAAAAJ", "orcid": ";;;;0000-0003-4840-076X", "linkedin": ";;;;alois-knoll-505480166", "or_profile": "~Rui_Song6;~LIGUO_ZHOU1;~Lingjuan_Lyu1;andreas.festag@ivi.fraunhofer.de;~Alois_Knoll1", "aff": ";;Sony;;Technical University Munich", "aff_domain": ";;sony.com;;tum.de", "position": ";;scientist;;Full Professor", "bibtex": "@misc{\nsong2023resfed,\ntitle={ResFed: Communication Efficient Federated Learning by Transmitting Deep Compressed Residuals},\nauthor={Rui Song and LIGUO ZHOU and Lingjuan Lyu and Andreas Festag and Alois Knoll},\nyear={2023},\nurl={https://openreview.net/forum?id=TTcpISh-_oI}\n}", "github": "", "project": "", "reviewers": "n2oV;pmNE;wdQh;LF7k", "site": "https://openreview.net/forum?id=TTcpISh-_oI", "pdf_size": 4392470, "recommendation": "3;3;3;6", "confidence": "5;4;3;3", "correctness": "2;2;2;4", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;2;2;0", "wc_summary_paper": "111;63;40;106", "wc_strength_and_weaknesses": "863;448;106;338", "wc_clarity_quality_novelty_and_reproducibility": "45;36;43;197", "wc_summary_review": "29;30;32;47", "wc_review": "1048;577;221;688", "wc_reply_reviewers": "0;0;28;0", "wc_reply_authors": "1439;1195;457;1003", "reply_reviewers": "0;0;1;0", "reply_authors": "2;2;1;2", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "correctness_avg": [ 2.5, 0.8660254037844386 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_paper_avg": [ 80.0, 29.690065678607045 ], "wc_strength_and_weaknesses_avg": [ 438.75, 274.2930686328038 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 80.25, 67.48842493346544 ], "wc_summary_review_avg": [ 34.5, 7.297259759663212 ], "wc_review_avg": [ 633.5, 295.0122878796746 ], "wc_reply_reviewers_avg": [ 7.0, 12.12435565298214 ], "wc_reply_authors_avg": [ 1023.5, 361.7302171508485 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_recommendation_confidence": -0.5222329678670935, "corr_recommendation_correctness": 1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=460853860158347422&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Sony Corporation;Technical University of Munich", "aff_unique_dep": ";", "aff_unique_url": "https://www.sony.com;https://www.tum.de", "aff_unique_abbr": "Sony;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Japan;Germany" }, { "id": "TTduM2sE0Ja", "title": "Exp-$\\alpha$: Beyond Proportional Aggregation in Federated Learning", "track": "main", "status": "Reject", "tldr": "We theoretically study properties of proportional aggregation and propose a novel aggregation strategy for faster convergence under Non-IID setting.", "abstract": "Federated Learning (FL) is a distributed learning paradigm, which computes gradients of a model locally on different clients and aggregates the updates to construct a new model collectively. Typically, the updates from local clients are aggregated with weights proportional to the size of clients' local datasets. In practice, clients have different local datasets suffering from data heterogeneity, such as imbalance. Although proportional aggregation still theoretically converges to the global optimum, it is provably slower when non-IID data is present (under convexity assumptions), the effect of which is exacerbated in practice. We posit that this analysis ignores convergence rate, which is especially important under such settings in the more realistic non-convex real world. To account for this, we analyze a generic and time-varying aggregation strategy to reveal a surprising trade-off between convergence rate and convergence error under convexity assumptions. Inspired by the theory, we propose a new aggregation strategy, Exp-$\\alpha$, which weights clients differently based on their severity of data heterogeneity. It achieves stronger convergence rates at the theoretical cost of a non-vanishing convergence error. Through a series of controlled experiments, we empirically demonstrate the superior convergence behavior (both in terms of rate and, in practice, even error) of the proposed aggregation on three types of data heterogeneity: imbalance, label-flipping, and domain shift when combined with existing FL algorithms. For example, on our imbalance benchmark, Exp-$\\alpha$, combined with FedAvg, achieves a relative $12\\%$ increase in convergence rate and a relative $3\\%$ reduction in error across four FL communication settings. ", "keywords": "Federated Learning", "primary_area": "", "supplementary_material": "/attachment/7c88dd7e5c8eb6c24662148c6ca0315b2165af85.zip", "author": "Junjiao Tian;Xiaoliang Dai;Chih-Yao Ma;Zecheng He;Yen-Cheng Liu;Sayan Ghosh;Peter Vajda;Anqi Wu;Zsolt Kira", "authorids": "~Junjiao_Tian1;~Xiaoliang_Dai1;~Chih-Yao_Ma1;~Zecheng_He1;~Yen-Cheng_Liu1;~Sayan_Ghosh1;~Peter_Vajda1;~Anqi_Wu3;~Zsolt_Kira1", "gender": "M;M;M;M;;;;F;M", "homepage": ";;https://chihyaoma.github.io/;http://www.princeton.edu/~zechengh/;https://ycliu93.github.io/;;https://sites.google.com/site/vajdap;https://sites.google.com/view/brainml/home;https://faculty.cc.gatech.edu/~zk15", "dblp": "246/3115.htm;192/3904;198/0963;203/5675;29/7584;67/6126-4;44/5953;15/9453;36/4127", "google_scholar": "iHZD850AAAAJ;u4olrOcAAAAJ;HrrtgKkAAAAJ;tcwZh8oAAAAJ;yeAeAhsAAAAJ;WC_NlykAAAAJ;k8QB5VUAAAAJ;ptGYJiEAAAAJ;2a5XgNAAAAAJ", "orcid": ";;;;;;;0000-0002-7866-9455;0000-0002-2626-2004", "linkedin": ";;kevin-chih-yao-ma-9b5b3063/;;;;p%C3%A9ter-vajda-9a03aaa/;;", "or_profile": "~Junjiao_Tian1;~Xiaoliang_Dai1;~Chih-Yao_Ma1;~Zecheng_He1;~Yen-Cheng_Liu1;~Sayan_Ghosh1;~Peter_Vajda1;~Anqi_Wu3;~Zsolt_Kira1", "aff": "Georgia Institute of Technology;Meta Facebook;Meta;Meta;Georgia Institute of Technology;Meta Facebook;Meta;Georgia Institute of Technology;Georgia Tech Research Institute", "aff_domain": "gatech.edu;fb.com;meta.com;meta.com;gatech.edu;fb.com;meta.com;gatech.edu;gtri.gatech.edu", "position": "PhD student;Research Scientist;Research Scientist;Research Scientist;PhD student;Research Scientist;Researcher;Assistant Professor;Senior Research Scientist", "bibtex": "@misc{\ntian2023expalpha,\ntitle={Exp-\\${\\textbackslash}alpha\\$: Beyond Proportional Aggregation in Federated Learning},\nauthor={Junjiao Tian and Xiaoliang Dai and Chih-Yao Ma and Zecheng He and Yen-Cheng Liu and Sayan Ghosh and Peter Vajda and Anqi Wu and Zsolt Kira},\nyear={2023},\nurl={https://openreview.net/forum?id=TTduM2sE0Ja}\n}", "github": "", "project": "", "reviewers": "gbV4;Cggb;CKau;cWRm", "site": "https://openreview.net/forum?id=TTduM2sE0Ja", "pdf_size": 4702346, "recommendation": "5;5;6;6", "confidence": "3;3;3;1", "correctness": "3;2;3;3", "technical_novelty": "3;3;2;3", "empirical_novelty": "3;2;2;3", "wc_summary_paper": "62;80;78;76", "wc_strength_and_weaknesses": "227;291;619;75", "wc_clarity_quality_novelty_and_reproducibility": "16;39;5;140", "wc_summary_review": "42;66;5;32", "wc_review": "347;476;707;323", "wc_reply_reviewers": "83;0;0;0", "wc_reply_authors": "520;1256;1815;152", "reply_reviewers": "2;0;0;0", "reply_authors": "2;3;4;1", "recommendation_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.75, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.5, 0.5 ], "wc_summary_paper_avg": [ 74.0, 7.0710678118654755 ], "wc_strength_and_weaknesses_avg": [ 303.0, 198.59506539690256 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 50.0, 53.39007398384086 ], "wc_summary_review_avg": [ 36.25, 21.867498713844707 ], "wc_review_avg": [ 463.25, 152.2832476013038 ], "wc_reply_reviewers_avg": [ 20.75, 35.94005425705421 ], "wc_reply_authors_avg": [ 935.75, 644.7388521719472 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 9, 0 ], "corr_recommendation_confidence": -0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pi2Mx_1krpoJ:scholar.google.com/&scioq=Exp-%24%5Calpha%24:+Beyond+Proportional+Aggregation+in+Federated+Learning&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;0;1;1;0;2", "aff_unique_norm": "Georgia Institute of Technology;Meta;Georgia Tech Research Institute", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.gatech.edu;https://meta.com;https://www.gtri.gatech.edu", "aff_unique_abbr": "Georgia Tech;Meta;GTRI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Neural Design for Genetic Perturbation Experiments", "status": "Top-25%", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11460", "id": "TUBpc5rqGA", "poster": "/media/PosterPDFs/ICLR%202023/11460.png?t=1681119172.1709704", "openreview": "https://openreview.net/forum?id=TUBpc5rqGA", "slides": "https://iclr.cc/virtual/2023/poster/11460", "video": "https://iclr.cc/virtual/2023/poster/11460", "author_site": "Aldo Pacchiano, Drausin Wulsin, Robert Barton, Luis Voloch", "tldr": "We introduce and analyze many tractable methods for noiseless optimistic arm elimination with applications in genetic perturbation experiments.", "abstract": "The problem of how to genetically modify cells in order to maximize a certain cellular phenotype has taken center stage in drug development over the last few years (with, for example, genetically edited CAR-T, CAR-NK, and CAR-NKT cells entering cancer clinical trials). Exhausting the search space for all possible genetic edits (perturbations) or combinations thereof is infeasible due to cost and experimental limitations. This work provides a theoretically sound framework for iteratively exploring the space of perturbations in pooled batches in order to maximize a target phenotype under an experimental budget. Inspired by this application domain, we study the problem of batch query bandit optimization and introduce the Optimistic Arm Elimination ($\\mathrm{OAE}$) principle designed to find an almost optimal arm under different functional relationships between the queries (arms) and the outputs (rewards). We analyze the convergence properties of $\\mathrm{OAE}$ by relating it to the Eluder dimension of the algorithm's function class and validate that $\\mathrm{OAE}$ outperforms other strategies in finding optimal actions in experiments on simulated problems, public datasets well-studied in bandit contexts, and in genetic perturbation datasets when the regression model is a deep neural network. OAE also outperforms the benchmark algorithms in 3 of 4 datasets in the GeneDisco experimental planning challenge. ", "keywords": "genetiic perturbation experiments;gene disco;optimism;neural optimism", "primary_area": "", "supplementary_material": "/attachment/ffb6f99135593550230f2491700fb42284a2917c.zip", "author": "Aldo Pacchiano;Drausin Wulsin;Robert A Barton;Luis Voloch", "authorids": "~Aldo_Pacchiano1;~Drausin_Wulsin1;~Robert_A_Barton1;luis@immunai.com", "gender": "M;;M;", "homepage": "https://www.aldopacchiano.ai;;;", "dblp": "129/6338;23/9077;;", "google_scholar": "no_BfYgAAAAJ;_PXVUn4AAAAJ;uIGfO6oAAAAJ;", "orcid": ";;;", "linkedin": ";drausin-wulsin-3a3a8239/;;", "or_profile": "~Aldo_Pacchiano1;~Drausin_Wulsin1;~Robert_A_Barton1;luis@immunai.com", "aff": "Microsoft;;;", "aff_domain": "microsoft.com;;;", "position": "Postdoc;;;", "bibtex": "@inproceedings{\npacchiano2023neural,\ntitle={Neural Design for Genetic Perturbation Experiments},\nauthor={Aldo Pacchiano and Drausin Wulsin and Robert A Barton and Luis Voloch},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=TUBpc5rqGA}\n}", "github": "", "project": "", "reviewers": "bZbX;UfWC;s1ZP;sSRW", "pdf_size": 6953558, "recommendation": "6;6;8;8", "confidence": "3;3;4;2", "correctness": "3;3;4;4", "technical_novelty": "2;3;4;2", "empirical_novelty": "2;2;4;4", "wc_summary_paper": "26;66;121;124", "wc_strength_and_weaknesses": "356;350;293;112", "wc_clarity_quality_novelty_and_reproducibility": "268;61;23;15", "wc_summary_review": "66;78;44;71", "wc_review": "716;555;481;322", "wc_reply_reviewers": "47;318;55;108", "wc_reply_authors": "1683;1062;629;1166", "reply_reviewers": "1;2;1;1", "reply_authors": "4;4;2;5", "recommendation_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 3.0, 1.0 ], "wc_summary_paper_avg": [ 84.25, 40.794454279963105 ], "wc_strength_and_weaknesses_avg": [ 277.75, 98.8037828223191 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 91.75, 103.231233161287 ], "wc_summary_review_avg": [ 64.75, 12.71563997602952 ], "wc_review_avg": [ 518.5, 141.73655139024655 ], "wc_reply_reviewers_avg": [ 132.0, 109.91587692412776 ], "wc_reply_authors_avg": [ 1135.0, 375.0433308299189 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 1.0897247358851685 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.0, "corr_recommendation_correctness": 1.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6425279916988948543&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "pdf": "https://openreview.net/pdf?id=TUBpc5rqGA", "email": "microsoft.com;;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "TUhgwGQBtE", "title": "Do Not Train It: A Linear Neural Architecture Search of Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Neural architecture search (NAS) for Graph neural networks (GNNs), called NAS-GNNs, has achieved significant performance over manually designed GNN architectures. However, these methods inherit issues from the conventional NAS methods, such as high computational cost and optimization difficulty. More importantly, previous NAS methods have ignored the uniqueness of GNNs, where the non-linearity has limited effect. Based on this, we are the first to theoretically prove that a GNN fixed with random weights can obtain optimal outputs under mild conditions. With the randomly-initialized weights, we can then seek the optimal architecture parameters via the sparse coding objective and derive a novel NAS-GNNs method, namely neural architecture coding (NAC). Consequently, our NAC holds a no-update scheme on GNNs and can efficiently compute in linear time. Empirical evaluations on multiple GNN benchmark datasets demonstrate that our approach leads to state-of-the-art performance, which is up to $200\\times$ faster and $18.8\\%$ more accurate than the strong baselines.", "keywords": "Neural Architecture Search;Graph neural network;Automated Machine Learning", "primary_area": "", "supplementary_material": "/attachment/623b99a5ad67c72fb4b4e22813c5d8aa408f8058.zip", "author": "Peng XU;Lin Zhang;Xuanzhou Liu;Jiaqi Sun;Yue Zhao;Haiqin Yang;Bei Yu", "authorids": "~Peng_XU10;~Lin_Zhang9;~Xuanzhou_Liu1;~Jiaqi_Sun1;~Yue_Zhao13;~Haiqin_Yang2;~Bei_Yu2", "gender": ";;M;;M;;M", "homepage": ";;https://github.com/XuanzhouLiu;;https://viterbi-web.usc.edu/~yzhao010/;;http://www.cse.cuhk.edu.hk/~byu/index.html", "dblp": ";;;;48/76-16;;28/4556-1.html", "google_scholar": ";;;;https://scholar.google.ca/citations?user=zoGDYsoAAAAJ;;tGneTm4AAAAJ", "orcid": ";;;;0000-0003-3401-4921;;0000-0001-6406-4810", "linkedin": ";;;;yzhao062/;;yubei/", "or_profile": "~Peng_XU10;~Lin_Zhang9;~Xuanzhou_Liu1;~Jiaqi_Sun1;~Yue_Zhao13;~Haiqin_Yang2;~Bei_Yu2", "aff": ";;Electronic Engineering, Tsinghua University, Tsinghua University;;Carnegie Mellon University;;Department of Computer Science and Engineering, The Chinese University of Hong Kong", "aff_domain": ";;mails.tsinghua.edu.cn;;cmu.edu;;cse.cuhk.edu.hk", "position": ";;MS student;;PhD student;;Associate Professor", "bibtex": "@misc{\nxu2023do,\ntitle={Do Not Train It: A Linear Neural Architecture Search of Graph Neural Networks},\nauthor={Peng XU and Lin Zhang and Xuanzhou Liu and Jiaqi Sun and Yue Zhao and Haiqin Yang and Bei Yu},\nyear={2023},\nurl={https://openreview.net/forum?id=TUhgwGQBtE}\n}", "github": "", "project": "", "reviewers": "WVaT;GxHc;oy8p;P1Y6;9TQX", "site": "https://openreview.net/forum?id=TUhgwGQBtE", "pdf_size": 565494, "recommendation": "5;5;5;5;6", "confidence": "3;4;3;4;4", "correctness": "3;3;3;3;3", "technical_novelty": "2;3;2;3;3", "empirical_novelty": "2;2;3;3;3", "wc_summary_paper": "64;37;38;110;78", "wc_strength_and_weaknesses": "68;171;310;396;82", "wc_clarity_quality_novelty_and_reproducibility": "19;17;17;78;26", "wc_summary_review": "269;27;25;83;185", "wc_review": "420;252;390;667;371", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "730;504;1022;784;596", "reply_reviewers": "0;0;0;0;0", "reply_authors": "2;2;2;2;2", "recommendation_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.6, 0.4898979485566356 ], "empirical_novelty_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_paper_avg": [ 65.4, 27.22939588018801 ], "wc_strength_and_weaknesses_avg": [ 205.4, 128.49840465935753 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 31.4, 23.53380547212881 ], "wc_summary_review_avg": [ 117.8, 95.33603725769181 ], "wc_review_avg": [ 420.0, 136.03970008787874 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 727.2, 177.29794133040573 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_recommendation_confidence": 0.408248290463863, "corr_recommendation_correctness": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8739392537587140285&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 10, "aff_unique_index": "0;1;2", "aff_unique_norm": "Tsinghua University;Carnegie Mellon University;Chinese University of Hong Kong", "aff_unique_dep": "Electronic Engineering;;Department of Computer Science and Engineering", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.cmu.edu;https://www.cuhk.edu.hk", "aff_unique_abbr": "THU;CMU;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "id": "TVAFpPEWSn7", "title": "On the Activation Function Dependence of the Spectral Bias of Neural Networks", "track": "main", "status": "Withdraw", "tldr": "", "abstract": "Neural networks are universal function approximators which are known to generalize well despite being dramatically overparameterized. We study this phenomenon from the point of view of the spectral bias of neural networks. Our contributions are two-fold. First, we provide a theoretical explanation for the spectral bias of ReLU neural networks by leveraging connections with the theory of finite element methods, which is widely used to numerically solve PDEs. Second, based upon this theory we predict that switching the activation function to a piecewise linear B-spline, namely the Hat function, will remove this spectral bias, which we verify empirically in a variety of settings. This is of particular significance for solving PDEs using neural networks since for such problems it is important to capture all frequencies in the solutions. Our empirical studies also show that neural networks with the Hat activation function are trained significantly faster using stochastic gradient descent and ADAM. Combined with previous work showing that the Hat activation function also improves generalization accuracy on image classification tasks, this indicates that using the Hat activation provides significant advantages over the ReLU on a variety of problems.\n", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/f2ff0d2b7ed763a7a9a24a532898ccf624a7d87d.zip", "author": "Qingguo Hong;Jonathan W. Siegel;Qinyang Tan;Jinchao Xu", "authorids": "~Qingguo_Hong1;~Jonathan_W._Siegel1;~Qinyang_Tan1;~Jinchao_Xu1", "gender": "M;M;M;M", "homepage": ";https://jwsiegel2510.github.io;;https://www.personal.psu.edu/jxx1/", "dblp": ";239/6028;;", "google_scholar": "Nc-m7uUAAAAJ;oI42qIIAAAAJ;;pBHiYxcAAAAJ", "orcid": ";;;", "linkedin": ";;qinyang-tan-195403230/;", "or_profile": "~Qingguo_Hong1;~Jonathan_W._Siegel1;~Qinyang_Tan1;~Jinchao_Xu1", "aff": ";Texas A&M University - College Station;University of Southern California;Pennsylvania State University", "aff_domain": ";tamu.edu;usc.edu;psu.edu", "position": ";Assistant Professor;PhD student;Full Professor", "bibtex": "@misc{\nhong2023on,\ntitle={On the Activation Function Dependence of the Spectral Bias of Neural Networks},\nauthor={Qingguo Hong and Jonathan W. Siegel and Qinyang Tan and Jinchao Xu},\nyear={2023},\nurl={https://openreview.net/forum?id=TVAFpPEWSn7}\n}", "github": "", "project": "", "reviewers": "KCyB;hgY9;wJVv;TZkP", "site": "https://openreview.net/forum?id=TVAFpPEWSn7", "pdf_size": 2421294, "recommendation": "3;3;5;6", "confidence": "4;3;4;4", "correctness": "3;2;3;3", "technical_novelty": "2;3;2;3", "empirical_novelty": "2;2;2;2", "wc_summary_paper": "57;53;98;108", "wc_strength_and_weaknesses": "353;565;91;187", "wc_clarity_quality_novelty_and_reproducibility": "9;211;219;373", "wc_summary_review": "34;114;75;56", "wc_review": "453;943;483;724", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "recommendation_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "correctness_avg": [ 2.75, 0.4330127018922193 ], "technical_novelty_avg": [ 2.5, 0.5 ], "empirical_novelty_avg": [ 2.0, 0.0 ], "wc_summary_paper_avg": [ 79.0, 24.300205760445735 ], "wc_strength_and_weaknesses_avg": [ 299.0, 179.91664736760742 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 203.0, 129.28263611173776 ], "wc_summary_review_avg": [ 69.75, 29.38005275693017 ], "wc_review_avg": [ 650.75, 198.75911928764427 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.5555555555555555, "corr_recommendation_correctness": 0.5555555555555555, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13506702075126795868&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Texas A&M University;University of Southern California;Pennsylvania State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tamu.edu;https://www.usc.edu;https://www.psu.edu", "aff_unique_abbr": "TAMU;USC;PSU", "aff_campus_unique_index": "0;1", "aff_campus_unique": "College Station;Los Angeles;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "TVMjn0RpLHf", "title": "Topology Matters in Fair Graph Learning: a Theoretical Pilot Study", "track": "main", "status": "Reject", "tldr": "A theoretical pilot study to show why GNN amplifies prediction bias", "abstract": "Recent advances in fair graph learning observe that graph neural networks (GNNs) further amplify prediction bias compared with multilayer perception (MLP), while the reason behind this is unknown. In this paper, we conduct a theoretical analysis of the bias amplification mechanism in GNNs. This is a challenging task since GNNs are difficult to be interpreted, and real-world networks are complex. To bridge the gap, we theoretically and experimentally demonstrate that aggregation operation in representative GNNs accumulates bias in node representation due to topology bias induced by graph topology. We provide a sufficient condition identifying the statistical information of graph data, so that graph aggregation enhances prediction bias in GNNs. \n Motivated by this data-centric finding, we propose a fair graph refinement algorithm, named \\textit{FairGR}, to rewire graph topology to reduce sensitive homophily coefficient while preserving useful graph topology. Experiments on node classification tasks demonstrate that \\textit{FairGR} can mitigate the prediction bias with comparable performance on three real-world datasets. Additionally, \\textit{FairGR} is compatible with many state-of-the-art methods, such as adding regularization, adversarial debiasing, and Fair mixup via refining graph topology. Therefore, \\textit{FairGR} is a plug-in fairness method and can be adapted to improve existing fair graph learning strategies. ", "keywords": "Graph Neural Networks;Fairness;Topology", "primary_area": "", "supplementary_material": "", "author": "Zhimeng Jiang;Xiaotian Han;Chao Fan;Zirui Liu;Xiao Huang;Na Zou;Ali Mostafavi;Xia Hu", "authorids": "~Zhimeng_Jiang1;~Xiaotian_Han1;~Chao_Fan2;~Zirui_Liu1;~Xiao_Huang1;~Na_Zou2;~Ali_Mostafavi2;~Xia_Hu4", "gender": "M;M;;M;M;F;M;M", "homepage": "http://www.zhimengjiang.com/;https://ahxt.github.io/;https://fanchaolab.com;https://zirui-ray-liu.github.io/;https://www4.comp.polyu.edu.hk/~xiaohuang/;https://nzou1.github.io/;;https://cs.rice.edu/~xh37/index.html", "dblp": "217/3235;;;196/8629-1.html;25/692-1.html;152/0090-1.html;;256/9406.html", "google_scholar": "5Es3Yk4AAAAJ;Uromx98AAAAJ;3k_B_zUAAAAJ;https://scholar.google.com/citations?hl=zh-CN;Be21PkYAAAAJ;https://scholar.google.com/citations?hl=en;DFNvQPYAAAAJ;https://scholar.google.com.tw/citations?user=pcCS60IAAAAJ", "orcid": "0000-0001-6933-3952;;;;0000-0002-3867-900X;0000-0003-1984-795X;;", "linkedin": ";;;;;na-zou-a1721535/;;", "or_profile": "~Zhimeng_Jiang1;~Xiaotian_Han1;~Chao_Fan2;~Zirui_Liu1;~Xiao_Huang1;~Na_Zou2;~Ali_Mostafavi2;~Xia_Hu2", "aff": "Texas A&M University;Texas A&M University;Clemson University;Rice University;The Hong Kong Polytechnic University;Texas A&M University - College Station;Texas A&M;Rice University", "aff_domain": "tamu.edu;tamu.edu;clemson.edu;rice.edu;polyu.edu.hk;tamu.edu;tamu.edu;rice.edu", "position": "PhD student;PhD student;Assistant Professor;PhD student;Assistant Professor;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@misc{\njiang2023topology,\ntitle={Topology Matters in Fair Graph Learning: a Theoretical Pilot Study},\nauthor={Zhimeng Jiang and Xiaotian Han and Chao Fan and Zirui Liu and Xiao Huang and Na Zou and Ali Mostafavi and Xia Hu},\nyear={2023},\nurl={https://openreview.net/forum?id=TVMjn0RpLHf}\n}", "github": "", "project": "", "reviewers": "K1rB;bTV8;qbDf;SmbS", "site": "https://openreview.net/forum?id=TVMjn0RpLHf", "pdf_size": 1884902, "recommendation": "3;6;6;6", "confidence": "2;3;2;3", "correctness": "3;3;4;4", "technical_novelty": "2;3;2;4", "empirical_novelty": "1;3;2;3", "wc_summary_paper": "70;61;61;84", "wc_strength_and_weaknesses": "204;267;145;217", "wc_clarity_quality_novelty_and_reproducibility": "19;45;22;55", "wc_summary_review": "73;152;17;47", "wc_review": "366;525;245;403", "wc_reply_reviewers": "122;52;42;82", "wc_reply_authors": "2250;467;914;916", "reply_reviewers": "1;1;1;1", "reply_authors": "6;2;4;2", "recommendation_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 2.5, 0.5 ], "correctness_avg": [ 3.5, 0.5 ], "technical_novelty_avg": [ 2.75, 0.82915619758885 ], "empirical_novelty_avg": [ 2.25, 0.82915619758885 ], "wc_summary_paper_avg": [ 69.0, 9.40744386111339 ], "wc_strength_and_weaknesses_avg": [ 208.25, 43.43601616170618 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 35.25, 15.20485119953497 ], "wc_summary_review_avg": [ 72.25, 50.12671443452084 ], "wc_review_avg": [ 384.75, 99.85583358021704 ], "wc_reply_reviewers_avg": [ 74.5, 31.12474899497183 ], "wc_reply_authors_avg": [ 1136.75, 668.2512158612209 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.5, 1.6583123951777 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16508704730984092978&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;1;2;3;0;0;2", "aff_unique_norm": "Texas A&M University;Clemson University;Rice University;Hong Kong Polytechnic University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tamu.edu;https://www.clemson.edu;https://www.rice.edu;https://www.polyu.edu.hk", "aff_unique_abbr": "TAMU;Clemson;Rice;PolyU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Hong Kong SAR;College Station", "aff_country_unique_index": "0;0;0;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Private Federated Learning Without a Trusted Server: Optimal Algorithms for Convex Losses", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11920", "id": "TVY6GoURrw", "poster": "", "openreview": "https://openreview.net/forum?id=TVY6GoURrw", "slides": "https://iclr.cc/virtual/2023/poster/11920", "video": "https://iclr.cc/virtual/2023/poster/11920", "author_site": "Andrew Lowy, Meisam Razaviyayn", "tldr": "Optimal algorithms for differentially private convex/strongly convex federated learning with data from people who do not trust the server or other silos/clients. ", "abstract": "This paper studies federated learning (FL)\u2014especially cross-silo FL\u2014with data from people who do not trust the server or other silos. In this setting, each silo (e.g. hospital) has data from different people (e.g. patients) and must maintain the privacy of each person\u2019s data (e.g. medical record), even if the server or other silos act as adversarial eavesdroppers. This requirement motivates the study of Inter-Silo Record-Level Differential Privacy (ISRL-DP), which requires silo $i$\u2019s communications to satisfy record/item-level differential privacy (DP). ISRL-DP ensures that the data of each person (e.g. patient) in silo $i$ (e.g. hospital $i$) cannot be leaked. ISRL-DP is different from well-studied privacy notions. Central and user-level DP assume that people trust the server/other silos. On the other end of the spectrum, local DP assumes that people do not trust anyone at all (even their own silo). Sitting between central and local DP, ISRL-DP makes the realistic assumption (in cross-silo FL) that people trust their own silo, but not the server or other silos. In this work, we provide tight (up to logarithms) upper and lower bounds for ISRL-DP FL with convex/strongly convex loss functions and homogeneous (i.i.d.) silo data. Remarkably, we show that similar bounds are attainable for smooth losses with arbitrary heterogeneous silo data distributions, via an accelerated ISRL-DP algorithm. We also provide tight upper and lower bounds for ISRL-DP federated empirical risk minimization, and use acceleration to attain the optimal bounds in fewer rounds of communication than the state-of-the-art. Finally, with a secure \u201cshuffler\u201d to anonymize silo messages (but without a trusted server), our algorithm attains the optimal central DP rates under more practical trust assumptions. Numerical experiments show favorable privacy-accuracy tradeoffs for our algorithm in classification and regression tasks.", "keywords": "differential privacy;federated learning;distributed optimization;private optimization;stochastic convex optimization;cross-silo federated learning", "primary_area": "", "supplementary_material": "/attachment/aca5111c370c007fdad6310397d0dadf16b95fab.zip", "author": "Andrew Lowy;Meisam Razaviyayn", "authorids": "~Andrew_Lowy1;~Meisam_Razaviyayn1", "gender": ";M", "homepage": "https://sites.google.com/view/andrewlowy;https://sites.usc.edu/razaviyayn/", "dblp": "285/5314;43/8577", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Andrew_Lowy1;~Meisam_Razaviyayn1", "aff": "University of Southern California;Google", "aff_domain": "usc.edu;google.com", "position": "PhD student;Researcher", "bibtex": "@inproceedings{\nlowy2023private,\ntitle={Private Federated Learning Without a Trusted Server: Optimal Algorithms for Convex Losses},\nauthor={Andrew Lowy and Meisam Razaviyayn},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=TVY6GoURrw}\n}", "github": "", "project": "", "reviewers": "beiC;SUpG;Gj6i;aKXR", "pdf_size": 3492611, "recommendation": "6;6;6;8", "confidence": "3;3;4;4", "correctness": "4;4;4;4", "technical_novelty": "3;3;3;3", "empirical_novelty": "2;3;0;3", "wc_summary_paper": "288;61;99;38", "wc_strength_and_weaknesses": "333;142;335;164", "wc_clarity_quality_novelty_and_reproducibility": "165;47;10;25", "wc_summary_review": "140;41;46;16", "wc_review": "926;291;490;243", "wc_reply_reviewers": "43;0;0;38", "wc_reply_authors": "458;441;765;628", "reply_reviewers": "1;0;0;1", "reply_authors": "2;2;2;2", "recommendation_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "correctness_avg": [ 4.0, 0.0 ], "technical_novelty_avg": [ 3.0, 0.0 ], "empirical_novelty_avg": [ 2.0, 1.224744871391589 ], "wc_summary_paper_avg": [ 121.5, 98.56596775763936 ], "wc_strength_and_weaknesses_avg": [ 243.5, 90.8363913858317 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 61.75, 61.04660105198323 ], "wc_summary_review_avg": [ 60.75, 47.14538683688999 ], "wc_review_avg": [ 487.5, 269.5742012878829 ], "wc_reply_reviewers_avg": [ 20.25, 20.327014045353536 ], "wc_reply_authors_avg": [ 573.0, 132.7949547234382 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_recommendation_confidence": 0.5773502691896257, "corr_recommendation_correctness": 0.0, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10344781971428283735&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "pdf": "https://openreview.net/pdf?id=TVY6GoURrw", "email": "usc.edu;google.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Southern California;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.usc.edu;https://www.google.com", "aff_unique_abbr": "USC;Google", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Los Angeles;Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "TXH64IwWgS", "title": "Inapplicable Actions Learning for Knowledge Transfer in Reinforcement Learning", "track": "main", "status": "Withdraw", "tldr": "This paper presents a framework to use, learn and reuse knowledge about sate-dependent inapplicable actions in order improve the sample efficiency of RL algorithms.", "abstract": "Reinforcement Learning (RL) algorithms are known to scale poorly to environments with many available actions, requiring numerous samples to learn an optimal policy. The traditional approach of considering the same fixed action space in every possible state implies that the agent must understand, while also learning to maximize its reward, to ignore irrelevant actions such as $\\textit{inapplicable actions}$ (i.e. actions that have no effect on the environment when performed in a given state). Knowing this information can help reduce the sample complexity of RL algorithms by masking the inapplicable actions from the policy distribution to only explore actions relevant to finding an optimal policy. This is typically done in an ad-hoc manner with hand-crafted domain logic added to the RL algorithm. In this paper, we propose a more systematic approach to introduce this knowledge into the algorithm. We (i) standardize the way knowledge can be manually specified to the agent; and (ii) present a new framework to autonomously learn these state-dependent action constraints jointly with the policy. We show experimentally that learning inapplicable actions greatly improves the sample efficiency of the algorithm by providing a reliable signal to mask out irrelevant actions. Moreover, we demonstrate that thanks to the transferability of the knowledge acquired, it can be reused in other tasks to make the learning process more efficient.", "keywords": "reinforcement learning;transfer learning", "primary_area": "", "supplementary_material": "/attachment/2b0c547d0d8eedf81bb79cf5501a1843bd949dcb.zip", "author": "Leo Ardon;Alberto Pozanco;Daniel Borrajo;Sumitra Ganesh", "authorids": "~Leo_Ardon1;~Alberto_Pozanco1;daniel.borrajo@jpmchase.com;~Sumitra_Ganesh1", "gender": ";;;F", "homepage": ";;;", "dblp": ";;;98/463.html", "google_scholar": "yz1AsB0AAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-4400-7127;;;", "linkedin": ";;;sumitra-ganesh-0379853", "or_profile": "~Leo_Ardon1;~Alberto_Pozanco1;daniel.borrajo@jpmchase.com;~Sumitra_Ganesh1", "aff": "J.P. Morgan AI Research;;;J.P. Morgan Chase", "aff_domain": "jpmorgan.com;;;jpmorgan.com", "position": "Researcher;;;Researcher", "bibtex": "@misc{\nardon2023inapplicable,\ntitle={Inapplicable Actions Learning for Knowledge Transfer in Reinforcement Learning},\nauthor={Leo Ardon and Alberto Pozanco and Daniel Borrajo and Sumitra Ganesh},\nyear={2023},\nurl={https://openreview.net/forum?id=TXH64IwWgS}\n}", "github": "", "project": "", "reviewers": "q3BH;c5jh;9CRv;HWfy", "site": "https://openreview.net/forum?id=TXH64IwWgS", "pdf_size": 3014195, "recommendation": "3;3;3;6", "confidence": "4;2;4;4", "correctness": "3;3;3;3", "technical_novelty": "2;3;2;2", "empirical_novelty": "2;2;2;3", "wc_summary_paper": "91;122;63;86", "wc_strength_and_weaknesses": "192;350;12;757", "wc_clarity_quality_novelty_and_reproducibility": "54;23;226;195", "wc_summary_review": "32;26;9;84", "wc_review": "369;521;310;1122", "wc_reply_reviewers": "0;0;0;256", "wc_reply_authors": "279;602;695;695", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "recommendation_avg": [ 3.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "correctness_avg": [ 3.0, 0.0 ], "technical_novelty_avg": [ 2.25, 0.4330127018922193 ], "empirical_novelty_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_paper_avg": [ 90.5, 21.02974084481309 ], "wc_strength_and_weaknesses_avg": [ 327.75, 275.1711967121559 ], "wc_clarity_quality_novelty_and_reproducibility_avg": [ 124.5, 87.38563955250314 ], "wc_summary_review_avg": [ 37.75, 28.003348014121453 ], "wc_review_avg": [ 580.5, 321.972436708486 ], "wc_reply_reviewers_avg": [ 64.0, 110.85125168440814 ], "wc_reply_authors_avg": [ 567.75, 170.97861708412546 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_recommendation_confidence": 0.3333333333333333, "corr_recommendation_correctness": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:X-s2lFM1X-wJ:scholar.google.com/&scioq=Inapplicable+Actions+Learning+for+Knowledge+Transfer+in+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "J.P. Morgan;JPMorgan Chase & Co.", "aff_unique_dep": "AI Research;", "aff_unique_url": "https://www.jpmorgan.com;https://www.jpmorganchase.com", "aff_unique_abbr": "JPM;JPM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning Sparse and Low-Rank Priors for Image Recovery via Iterative Reweighted Least Squares Minimization", "status": "Poster", "track": "main", "site": "https://iclr.cc/virtual/2023/poster/11996", "id": "TXPN6MtdSE4", "poster": "/media/PosterPDFs/ICLR%202023/11996.png?t=1681986593.8120308", "openreview": "https://openreview.net/forum?id=TXPN6MtdSE4", "slides": "https://iclr.cc/virtual/2023/poster/11996", "video": "https://iclr.cc/virtual/2023/poster/11996", "author_site": "Stamatios Lefkimmiatis, Iaroslav Koshelev", "tldr": "", "abstract": "In this work we introduce a novel optimization algorithm for image recovery under learned sparse and low-rank constraints, which are parameterized with weighted extensions of the $\\ell_p^p$-vector and $\\mathcal{S}_p^p$ Schatten-matrix quasi-norms for $0\\!